{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9595389048991354, "eval_steps": 500, "global_step": 8500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00023054755043227666, "grad_norm": 0.50523491192486, "learning_rate": 0.0, "loss": 1.3478702306747437, "step": 1 }, { "epoch": 0.0004610951008645533, "grad_norm": 0.48124949879069834, "learning_rate": 4.6082949308755755e-09, "loss": 1.3662631511688232, "step": 2 }, { "epoch": 0.00069164265129683, "grad_norm": 0.4973440342857191, "learning_rate": 9.216589861751151e-09, "loss": 1.3309710025787354, "step": 3 }, { "epoch": 0.0009221902017291067, "grad_norm": 0.5353819800819951, "learning_rate": 1.3824884792626728e-08, "loss": 1.4097447395324707, "step": 4 }, { "epoch": 0.0011527377521613833, "grad_norm": 0.5883475979029319, "learning_rate": 1.8433179723502302e-08, "loss": 1.519249439239502, "step": 5 }, { "epoch": 0.00138328530259366, "grad_norm": 0.46695865322112806, "learning_rate": 2.304147465437788e-08, "loss": 1.2603031396865845, "step": 6 }, { "epoch": 0.0016138328530259365, "grad_norm": 0.439129921669028, "learning_rate": 2.7649769585253456e-08, "loss": 1.1772313117980957, "step": 7 }, { "epoch": 0.0018443804034582133, "grad_norm": 0.5319224558631903, "learning_rate": 3.225806451612903e-08, "loss": 1.4954156875610352, "step": 8 }, { "epoch": 0.00207492795389049, "grad_norm": 0.4236962197810375, "learning_rate": 3.6866359447004604e-08, "loss": 1.261225938796997, "step": 9 }, { "epoch": 0.0023054755043227667, "grad_norm": 0.4722435329827196, "learning_rate": 4.1474654377880186e-08, "loss": 1.3137118816375732, "step": 10 }, { "epoch": 0.002536023054755043, "grad_norm": 0.5227481950657756, "learning_rate": 4.608294930875576e-08, "loss": 1.3614041805267334, "step": 11 }, { "epoch": 0.00276657060518732, "grad_norm": 0.501227735935382, "learning_rate": 5.069124423963134e-08, "loss": 1.2697081565856934, "step": 12 }, { "epoch": 0.0029971181556195966, "grad_norm": 0.4604754826614043, "learning_rate": 5.529953917050691e-08, "loss": 1.2227230072021484, "step": 13 }, { "epoch": 0.003227665706051873, "grad_norm": 0.5117274741098783, "learning_rate": 5.990783410138249e-08, "loss": 1.4593628644943237, "step": 14 }, { "epoch": 0.00345821325648415, "grad_norm": 0.47884130963603894, "learning_rate": 6.451612903225806e-08, "loss": 1.3960611820220947, "step": 15 }, { "epoch": 0.0036887608069164266, "grad_norm": 0.4832693331921684, "learning_rate": 6.912442396313364e-08, "loss": 1.432786464691162, "step": 16 }, { "epoch": 0.003919308357348703, "grad_norm": 0.530883463141207, "learning_rate": 7.373271889400921e-08, "loss": 1.44321870803833, "step": 17 }, { "epoch": 0.00414985590778098, "grad_norm": 0.5073509519871556, "learning_rate": 7.834101382488478e-08, "loss": 1.4029710292816162, "step": 18 }, { "epoch": 0.004380403458213256, "grad_norm": 0.47114331013100513, "learning_rate": 8.294930875576037e-08, "loss": 1.2752254009246826, "step": 19 }, { "epoch": 0.004610951008645533, "grad_norm": 0.5134696688961232, "learning_rate": 8.755760368663594e-08, "loss": 1.4107906818389893, "step": 20 }, { "epoch": 0.00484149855907781, "grad_norm": 0.5192976074458424, "learning_rate": 9.216589861751152e-08, "loss": 1.473652720451355, "step": 21 }, { "epoch": 0.005072046109510086, "grad_norm": 0.46860465779836935, "learning_rate": 9.677419354838709e-08, "loss": 1.3515217304229736, "step": 22 }, { "epoch": 0.005302593659942363, "grad_norm": 0.4232064440585856, "learning_rate": 1.0138248847926267e-07, "loss": 1.2591620683670044, "step": 23 }, { "epoch": 0.00553314121037464, "grad_norm": 0.47756486586420427, "learning_rate": 1.0599078341013824e-07, "loss": 1.4121818542480469, "step": 24 }, { "epoch": 0.005763688760806916, "grad_norm": 0.5080537563386874, "learning_rate": 1.1059907834101383e-07, "loss": 1.4280284643173218, "step": 25 }, { "epoch": 0.005994236311239193, "grad_norm": 0.49501252357111303, "learning_rate": 1.152073732718894e-07, "loss": 1.24143385887146, "step": 26 }, { "epoch": 0.00622478386167147, "grad_norm": 0.4986915218495848, "learning_rate": 1.1981566820276498e-07, "loss": 1.4424221515655518, "step": 27 }, { "epoch": 0.006455331412103746, "grad_norm": 0.5271724916728263, "learning_rate": 1.2442396313364054e-07, "loss": 1.4094964265823364, "step": 28 }, { "epoch": 0.006685878962536023, "grad_norm": 0.48753764373588065, "learning_rate": 1.2903225806451611e-07, "loss": 1.301988124847412, "step": 29 }, { "epoch": 0.0069164265129683, "grad_norm": 0.49597522677188116, "learning_rate": 1.336405529953917e-07, "loss": 1.342317819595337, "step": 30 }, { "epoch": 0.007146974063400576, "grad_norm": 0.4662909329278552, "learning_rate": 1.3824884792626728e-07, "loss": 1.374321699142456, "step": 31 }, { "epoch": 0.007377521613832853, "grad_norm": 0.48003195379069746, "learning_rate": 1.4285714285714285e-07, "loss": 1.2922568321228027, "step": 32 }, { "epoch": 0.00760806916426513, "grad_norm": 0.4791954611138749, "learning_rate": 1.4746543778801842e-07, "loss": 1.3206393718719482, "step": 33 }, { "epoch": 0.007838616714697407, "grad_norm": 0.46770282190111545, "learning_rate": 1.52073732718894e-07, "loss": 1.3069640398025513, "step": 34 }, { "epoch": 0.008069164265129682, "grad_norm": 0.49996972274526347, "learning_rate": 1.5668202764976955e-07, "loss": 1.4134069681167603, "step": 35 }, { "epoch": 0.00829971181556196, "grad_norm": 0.5280258817640504, "learning_rate": 1.6129032258064515e-07, "loss": 1.3728907108306885, "step": 36 }, { "epoch": 0.008530259365994237, "grad_norm": 0.4285974688946003, "learning_rate": 1.6589861751152074e-07, "loss": 1.2913165092468262, "step": 37 }, { "epoch": 0.008760806916426512, "grad_norm": 0.42264550533680856, "learning_rate": 1.705069124423963e-07, "loss": 1.2775439023971558, "step": 38 }, { "epoch": 0.00899135446685879, "grad_norm": 0.507820888455636, "learning_rate": 1.7511520737327188e-07, "loss": 1.2585256099700928, "step": 39 }, { "epoch": 0.009221902017291067, "grad_norm": 0.5027390631956755, "learning_rate": 1.7972350230414745e-07, "loss": 1.4137083292007446, "step": 40 }, { "epoch": 0.009452449567723342, "grad_norm": 0.4985154114340913, "learning_rate": 1.8433179723502305e-07, "loss": 1.4829634428024292, "step": 41 }, { "epoch": 0.00968299711815562, "grad_norm": 0.45097397965172786, "learning_rate": 1.889400921658986e-07, "loss": 1.274835467338562, "step": 42 }, { "epoch": 0.009913544668587897, "grad_norm": 0.499092720542228, "learning_rate": 1.9354838709677418e-07, "loss": 1.4171526432037354, "step": 43 }, { "epoch": 0.010144092219020172, "grad_norm": 0.46556868786502403, "learning_rate": 1.9815668202764975e-07, "loss": 1.353653907775879, "step": 44 }, { "epoch": 0.01037463976945245, "grad_norm": 0.5613880908390132, "learning_rate": 2.0276497695852535e-07, "loss": 1.5567824840545654, "step": 45 }, { "epoch": 0.010605187319884727, "grad_norm": 0.5081064697106673, "learning_rate": 2.073732718894009e-07, "loss": 1.3403587341308594, "step": 46 }, { "epoch": 0.010835734870317002, "grad_norm": 0.5101668560208579, "learning_rate": 2.1198156682027649e-07, "loss": 1.4320882558822632, "step": 47 }, { "epoch": 0.01106628242074928, "grad_norm": 0.5330267535714837, "learning_rate": 2.1658986175115208e-07, "loss": 1.4313323497772217, "step": 48 }, { "epoch": 0.011296829971181557, "grad_norm": 0.553336579270586, "learning_rate": 2.2119815668202765e-07, "loss": 1.3737103939056396, "step": 49 }, { "epoch": 0.011527377521613832, "grad_norm": 0.5249313819409881, "learning_rate": 2.2580645161290322e-07, "loss": 1.392624855041504, "step": 50 }, { "epoch": 0.01175792507204611, "grad_norm": 0.501704521925026, "learning_rate": 2.304147465437788e-07, "loss": 1.518836259841919, "step": 51 }, { "epoch": 0.011988472622478387, "grad_norm": 0.494303112571743, "learning_rate": 2.3502304147465438e-07, "loss": 1.353430986404419, "step": 52 }, { "epoch": 0.012219020172910662, "grad_norm": 0.5529237573464274, "learning_rate": 2.3963133640552995e-07, "loss": 1.4298975467681885, "step": 53 }, { "epoch": 0.01244956772334294, "grad_norm": 0.608889837254303, "learning_rate": 2.442396313364055e-07, "loss": 1.5087032318115234, "step": 54 }, { "epoch": 0.012680115273775217, "grad_norm": 0.5072050336667386, "learning_rate": 2.488479262672811e-07, "loss": 1.2958388328552246, "step": 55 }, { "epoch": 0.012910662824207492, "grad_norm": 0.49535361810797784, "learning_rate": 2.534562211981567e-07, "loss": 1.3586616516113281, "step": 56 }, { "epoch": 0.01314121037463977, "grad_norm": 0.5061073729366113, "learning_rate": 2.5806451612903223e-07, "loss": 1.4593045711517334, "step": 57 }, { "epoch": 0.013371757925072046, "grad_norm": 0.5674593981664924, "learning_rate": 2.6267281105990777e-07, "loss": 1.4921320676803589, "step": 58 }, { "epoch": 0.013602305475504322, "grad_norm": 0.46119048814961744, "learning_rate": 2.672811059907834e-07, "loss": 1.2952760457992554, "step": 59 }, { "epoch": 0.0138328530259366, "grad_norm": 0.5474941657423043, "learning_rate": 2.7188940092165896e-07, "loss": 1.4734549522399902, "step": 60 }, { "epoch": 0.014063400576368876, "grad_norm": 0.5926884561781834, "learning_rate": 2.7649769585253456e-07, "loss": 1.4697580337524414, "step": 61 }, { "epoch": 0.014293948126801152, "grad_norm": 0.47617437574101296, "learning_rate": 2.8110599078341015e-07, "loss": 1.3642436265945435, "step": 62 }, { "epoch": 0.01452449567723343, "grad_norm": 0.4266689877497005, "learning_rate": 2.857142857142857e-07, "loss": 1.1863957643508911, "step": 63 }, { "epoch": 0.014755043227665706, "grad_norm": 0.4972126101611472, "learning_rate": 2.903225806451613e-07, "loss": 1.4668104648590088, "step": 64 }, { "epoch": 0.014985590778097982, "grad_norm": 0.4920879694799663, "learning_rate": 2.9493087557603683e-07, "loss": 1.2462284564971924, "step": 65 }, { "epoch": 0.01521613832853026, "grad_norm": 0.5870123543834398, "learning_rate": 2.9953917050691243e-07, "loss": 1.459596872329712, "step": 66 }, { "epoch": 0.015446685878962536, "grad_norm": 0.4875968214693539, "learning_rate": 3.04147465437788e-07, "loss": 1.348015546798706, "step": 67 }, { "epoch": 0.015677233429394814, "grad_norm": 0.5086081285359251, "learning_rate": 3.0875576036866356e-07, "loss": 1.2952334880828857, "step": 68 }, { "epoch": 0.01590778097982709, "grad_norm": 0.5392907343811632, "learning_rate": 3.133640552995391e-07, "loss": 1.4372143745422363, "step": 69 }, { "epoch": 0.016138328530259365, "grad_norm": 0.4524557334907612, "learning_rate": 3.1797235023041476e-07, "loss": 1.2725008726119995, "step": 70 }, { "epoch": 0.016368876080691642, "grad_norm": 0.4952177804507778, "learning_rate": 3.225806451612903e-07, "loss": 1.3648872375488281, "step": 71 }, { "epoch": 0.01659942363112392, "grad_norm": 0.6124636807685551, "learning_rate": 3.271889400921659e-07, "loss": 1.3494001626968384, "step": 72 }, { "epoch": 0.016829971181556196, "grad_norm": 0.4692285797456175, "learning_rate": 3.317972350230415e-07, "loss": 1.1907480955123901, "step": 73 }, { "epoch": 0.017060518731988474, "grad_norm": 0.5228163344563065, "learning_rate": 3.3640552995391703e-07, "loss": 1.4449963569641113, "step": 74 }, { "epoch": 0.01729106628242075, "grad_norm": 0.4749685021463316, "learning_rate": 3.410138248847926e-07, "loss": 1.277104377746582, "step": 75 }, { "epoch": 0.017521613832853025, "grad_norm": 0.5209033114195116, "learning_rate": 3.4562211981566817e-07, "loss": 1.3609862327575684, "step": 76 }, { "epoch": 0.017752161383285302, "grad_norm": 0.5369087777206046, "learning_rate": 3.5023041474654376e-07, "loss": 1.3189136981964111, "step": 77 }, { "epoch": 0.01798270893371758, "grad_norm": 0.5465154788756614, "learning_rate": 3.5483870967741936e-07, "loss": 1.4422173500061035, "step": 78 }, { "epoch": 0.018213256484149856, "grad_norm": 0.5634760024465724, "learning_rate": 3.594470046082949e-07, "loss": 1.44877290725708, "step": 79 }, { "epoch": 0.018443804034582133, "grad_norm": 0.5119851144094427, "learning_rate": 3.6405529953917044e-07, "loss": 1.3519830703735352, "step": 80 }, { "epoch": 0.01867435158501441, "grad_norm": 0.4843675992150354, "learning_rate": 3.686635944700461e-07, "loss": 1.4015991687774658, "step": 81 }, { "epoch": 0.018904899135446684, "grad_norm": 0.5374856510016354, "learning_rate": 3.7327188940092163e-07, "loss": 1.4938485622406006, "step": 82 }, { "epoch": 0.01913544668587896, "grad_norm": 0.5341991720772048, "learning_rate": 3.778801843317972e-07, "loss": 1.5170536041259766, "step": 83 }, { "epoch": 0.01936599423631124, "grad_norm": 0.46764341815409355, "learning_rate": 3.824884792626728e-07, "loss": 1.4166152477264404, "step": 84 }, { "epoch": 0.019596541786743516, "grad_norm": 0.4730806672754701, "learning_rate": 3.8709677419354837e-07, "loss": 1.2796568870544434, "step": 85 }, { "epoch": 0.019827089337175793, "grad_norm": 0.537778002692222, "learning_rate": 3.9170506912442396e-07, "loss": 1.580716848373413, "step": 86 }, { "epoch": 0.02005763688760807, "grad_norm": 0.5915866249189787, "learning_rate": 3.963133640552995e-07, "loss": 1.5387516021728516, "step": 87 }, { "epoch": 0.020288184438040344, "grad_norm": 0.4420233078332847, "learning_rate": 4.009216589861751e-07, "loss": 1.2988545894622803, "step": 88 }, { "epoch": 0.02051873198847262, "grad_norm": 0.5173220217871495, "learning_rate": 4.055299539170507e-07, "loss": 1.3798308372497559, "step": 89 }, { "epoch": 0.0207492795389049, "grad_norm": 0.5168573395560399, "learning_rate": 4.1013824884792624e-07, "loss": 1.432153582572937, "step": 90 }, { "epoch": 0.020979827089337176, "grad_norm": 0.5105535607807559, "learning_rate": 4.147465437788018e-07, "loss": 1.3643224239349365, "step": 91 }, { "epoch": 0.021210374639769453, "grad_norm": 0.5225018509714875, "learning_rate": 4.1935483870967743e-07, "loss": 1.3699355125427246, "step": 92 }, { "epoch": 0.02144092219020173, "grad_norm": 0.6310663899937882, "learning_rate": 4.2396313364055297e-07, "loss": 1.5346300601959229, "step": 93 }, { "epoch": 0.021671469740634004, "grad_norm": 0.5162418549049712, "learning_rate": 4.285714285714285e-07, "loss": 1.378378987312317, "step": 94 }, { "epoch": 0.02190201729106628, "grad_norm": 0.5009778072654538, "learning_rate": 4.3317972350230416e-07, "loss": 1.2565600872039795, "step": 95 }, { "epoch": 0.02213256484149856, "grad_norm": 0.47131967484595777, "learning_rate": 4.377880184331797e-07, "loss": 1.1516107320785522, "step": 96 }, { "epoch": 0.022363112391930836, "grad_norm": 0.5075938685585429, "learning_rate": 4.423963133640553e-07, "loss": 1.3053100109100342, "step": 97 }, { "epoch": 0.022593659942363113, "grad_norm": 0.6224731685867901, "learning_rate": 4.4700460829493084e-07, "loss": 1.465439796447754, "step": 98 }, { "epoch": 0.02282420749279539, "grad_norm": 0.5193994738284144, "learning_rate": 4.5161290322580644e-07, "loss": 1.3480231761932373, "step": 99 }, { "epoch": 0.023054755043227664, "grad_norm": 0.5029697791885893, "learning_rate": 4.5622119815668203e-07, "loss": 1.3548729419708252, "step": 100 }, { "epoch": 0.02328530259365994, "grad_norm": 0.4821335123926122, "learning_rate": 4.608294930875576e-07, "loss": 1.2621939182281494, "step": 101 }, { "epoch": 0.02351585014409222, "grad_norm": 0.5666073310249758, "learning_rate": 4.654377880184331e-07, "loss": 1.4433940649032593, "step": 102 }, { "epoch": 0.023746397694524496, "grad_norm": 0.5090266918300268, "learning_rate": 4.7004608294930877e-07, "loss": 1.3975611925125122, "step": 103 }, { "epoch": 0.023976945244956773, "grad_norm": 0.463497389397919, "learning_rate": 4.746543778801843e-07, "loss": 1.3603153228759766, "step": 104 }, { "epoch": 0.02420749279538905, "grad_norm": 0.5598887073047998, "learning_rate": 4.792626728110599e-07, "loss": 1.4595959186553955, "step": 105 }, { "epoch": 0.024438040345821324, "grad_norm": 0.5718053046324956, "learning_rate": 4.838709677419355e-07, "loss": 1.2416110038757324, "step": 106 }, { "epoch": 0.0246685878962536, "grad_norm": 0.5201886582455413, "learning_rate": 4.88479262672811e-07, "loss": 1.3226549625396729, "step": 107 }, { "epoch": 0.02489913544668588, "grad_norm": 0.5355547943819389, "learning_rate": 4.930875576036866e-07, "loss": 1.381019115447998, "step": 108 }, { "epoch": 0.025129682997118156, "grad_norm": 0.4743830386883031, "learning_rate": 4.976958525345622e-07, "loss": 1.3001632690429688, "step": 109 }, { "epoch": 0.025360230547550433, "grad_norm": 0.5460938028869833, "learning_rate": 5.023041474654378e-07, "loss": 1.3989886045455933, "step": 110 }, { "epoch": 0.02559077809798271, "grad_norm": 0.5107214873439961, "learning_rate": 5.069124423963134e-07, "loss": 1.3286147117614746, "step": 111 }, { "epoch": 0.025821325648414984, "grad_norm": 0.5004549050503952, "learning_rate": 5.11520737327189e-07, "loss": 1.2327868938446045, "step": 112 }, { "epoch": 0.02605187319884726, "grad_norm": 0.47534130961461846, "learning_rate": 5.161290322580645e-07, "loss": 1.214202880859375, "step": 113 }, { "epoch": 0.02628242074927954, "grad_norm": 0.5527744015839821, "learning_rate": 5.2073732718894e-07, "loss": 1.3953044414520264, "step": 114 }, { "epoch": 0.026512968299711816, "grad_norm": 0.7593284220674507, "learning_rate": 5.253456221198155e-07, "loss": 1.4174964427947998, "step": 115 }, { "epoch": 0.026743515850144093, "grad_norm": 0.5505732654457145, "learning_rate": 5.299539170506912e-07, "loss": 1.4795210361480713, "step": 116 }, { "epoch": 0.02697406340057637, "grad_norm": 0.510535557692495, "learning_rate": 5.345622119815668e-07, "loss": 1.3342328071594238, "step": 117 }, { "epoch": 0.027204610951008644, "grad_norm": 0.5358682136549786, "learning_rate": 5.391705069124423e-07, "loss": 1.4108824729919434, "step": 118 }, { "epoch": 0.02743515850144092, "grad_norm": 0.5428329715314709, "learning_rate": 5.437788018433179e-07, "loss": 1.4235775470733643, "step": 119 }, { "epoch": 0.0276657060518732, "grad_norm": 0.5157269485071635, "learning_rate": 5.483870967741935e-07, "loss": 1.320220947265625, "step": 120 }, { "epoch": 0.027896253602305476, "grad_norm": 0.5135491640261608, "learning_rate": 5.529953917050691e-07, "loss": 1.303511381149292, "step": 121 }, { "epoch": 0.028126801152737753, "grad_norm": 0.5471566857353616, "learning_rate": 5.576036866359447e-07, "loss": 1.4310801029205322, "step": 122 }, { "epoch": 0.02835734870317003, "grad_norm": 0.524820965238312, "learning_rate": 5.622119815668203e-07, "loss": 1.4052631855010986, "step": 123 }, { "epoch": 0.028587896253602304, "grad_norm": 0.5156246818207144, "learning_rate": 5.668202764976958e-07, "loss": 1.3878209590911865, "step": 124 }, { "epoch": 0.02881844380403458, "grad_norm": 0.5496340547026753, "learning_rate": 5.714285714285714e-07, "loss": 1.3234784603118896, "step": 125 }, { "epoch": 0.02904899135446686, "grad_norm": 0.5570198679920451, "learning_rate": 5.760368663594469e-07, "loss": 1.4433726072311401, "step": 126 }, { "epoch": 0.029279538904899136, "grad_norm": 0.5672242590978396, "learning_rate": 5.806451612903226e-07, "loss": 1.4903366565704346, "step": 127 }, { "epoch": 0.029510086455331413, "grad_norm": 0.5764053931926064, "learning_rate": 5.852534562211982e-07, "loss": 1.4877443313598633, "step": 128 }, { "epoch": 0.02974063400576369, "grad_norm": 0.6493956597599972, "learning_rate": 5.898617511520737e-07, "loss": 1.5012906789779663, "step": 129 }, { "epoch": 0.029971181556195964, "grad_norm": 0.5636643803778404, "learning_rate": 5.944700460829493e-07, "loss": 1.3269531726837158, "step": 130 }, { "epoch": 0.03020172910662824, "grad_norm": 0.48299121768794717, "learning_rate": 5.990783410138249e-07, "loss": 1.2456672191619873, "step": 131 }, { "epoch": 0.03043227665706052, "grad_norm": 0.5007320148704202, "learning_rate": 6.036866359447004e-07, "loss": 1.2842707633972168, "step": 132 }, { "epoch": 0.030662824207492795, "grad_norm": 0.4783035528969415, "learning_rate": 6.08294930875576e-07, "loss": 1.220112919807434, "step": 133 }, { "epoch": 0.030893371757925073, "grad_norm": 0.5443919458428148, "learning_rate": 6.129032258064516e-07, "loss": 1.347076654434204, "step": 134 }, { "epoch": 0.03112391930835735, "grad_norm": 0.520182785254012, "learning_rate": 6.175115207373271e-07, "loss": 1.34126877784729, "step": 135 }, { "epoch": 0.03135446685878963, "grad_norm": 0.5234361924556283, "learning_rate": 6.221198156682027e-07, "loss": 1.3580594062805176, "step": 136 }, { "epoch": 0.0315850144092219, "grad_norm": 0.5382720022504871, "learning_rate": 6.267281105990782e-07, "loss": 1.3195347785949707, "step": 137 }, { "epoch": 0.03181556195965418, "grad_norm": 0.6644195369547549, "learning_rate": 6.313364055299539e-07, "loss": 1.5621061325073242, "step": 138 }, { "epoch": 0.032046109510086455, "grad_norm": 0.5685106208934547, "learning_rate": 6.359447004608295e-07, "loss": 1.4042680263519287, "step": 139 }, { "epoch": 0.03227665706051873, "grad_norm": 0.5167568131947204, "learning_rate": 6.40552995391705e-07, "loss": 1.2934812307357788, "step": 140 }, { "epoch": 0.03250720461095101, "grad_norm": 0.60837930284751, "learning_rate": 6.451612903225806e-07, "loss": 1.4804668426513672, "step": 141 }, { "epoch": 0.032737752161383284, "grad_norm": 0.6121742149463929, "learning_rate": 6.497695852534562e-07, "loss": 1.5287294387817383, "step": 142 }, { "epoch": 0.032968299711815564, "grad_norm": 0.5672386486164406, "learning_rate": 6.543778801843318e-07, "loss": 1.5354558229446411, "step": 143 }, { "epoch": 0.03319884726224784, "grad_norm": 0.5484275315763268, "learning_rate": 6.589861751152074e-07, "loss": 1.3472375869750977, "step": 144 }, { "epoch": 0.03342939481268011, "grad_norm": 0.5749434129647923, "learning_rate": 6.63594470046083e-07, "loss": 1.403039813041687, "step": 145 }, { "epoch": 0.03365994236311239, "grad_norm": 0.5204858023557942, "learning_rate": 6.682027649769585e-07, "loss": 1.2957086563110352, "step": 146 }, { "epoch": 0.033890489913544666, "grad_norm": 0.5758848399522136, "learning_rate": 6.728110599078341e-07, "loss": 1.3671963214874268, "step": 147 }, { "epoch": 0.03412103746397695, "grad_norm": 0.5307364079465571, "learning_rate": 6.774193548387096e-07, "loss": 1.3373156785964966, "step": 148 }, { "epoch": 0.03435158501440922, "grad_norm": 0.5281640020378694, "learning_rate": 6.820276497695853e-07, "loss": 1.4774576425552368, "step": 149 }, { "epoch": 0.0345821325648415, "grad_norm": 0.5679122579833843, "learning_rate": 6.866359447004608e-07, "loss": 1.4094908237457275, "step": 150 }, { "epoch": 0.034812680115273775, "grad_norm": 0.6310275430866781, "learning_rate": 6.912442396313363e-07, "loss": 1.5288136005401611, "step": 151 }, { "epoch": 0.03504322766570605, "grad_norm": 0.5491763264170931, "learning_rate": 6.958525345622119e-07, "loss": 1.4010430574417114, "step": 152 }, { "epoch": 0.03527377521613833, "grad_norm": 0.5102304190283374, "learning_rate": 7.004608294930875e-07, "loss": 1.228097915649414, "step": 153 }, { "epoch": 0.035504322766570603, "grad_norm": 0.556977580118223, "learning_rate": 7.05069124423963e-07, "loss": 1.3849995136260986, "step": 154 }, { "epoch": 0.035734870317002884, "grad_norm": 0.6038000879986429, "learning_rate": 7.096774193548387e-07, "loss": 1.436859369277954, "step": 155 }, { "epoch": 0.03596541786743516, "grad_norm": 0.5448106660281533, "learning_rate": 7.142857142857143e-07, "loss": 1.3830995559692383, "step": 156 }, { "epoch": 0.03619596541786743, "grad_norm": 0.5178664488372983, "learning_rate": 7.188940092165898e-07, "loss": 1.1554113626480103, "step": 157 }, { "epoch": 0.03642651296829971, "grad_norm": 0.5186614381206474, "learning_rate": 7.235023041474654e-07, "loss": 1.276925802230835, "step": 158 }, { "epoch": 0.036657060518731986, "grad_norm": 0.5296293850131283, "learning_rate": 7.281105990783409e-07, "loss": 1.3374000787734985, "step": 159 }, { "epoch": 0.03688760806916427, "grad_norm": 0.6018629559621754, "learning_rate": 7.327188940092166e-07, "loss": 1.384819746017456, "step": 160 }, { "epoch": 0.03711815561959654, "grad_norm": 0.6124602230831588, "learning_rate": 7.373271889400922e-07, "loss": 1.422861099243164, "step": 161 }, { "epoch": 0.03734870317002882, "grad_norm": 0.6196521585488064, "learning_rate": 7.419354838709677e-07, "loss": 1.4244587421417236, "step": 162 }, { "epoch": 0.037579250720461095, "grad_norm": 0.6199123784871026, "learning_rate": 7.465437788018433e-07, "loss": 1.4938528537750244, "step": 163 }, { "epoch": 0.03780979827089337, "grad_norm": 0.5452147962770174, "learning_rate": 7.511520737327189e-07, "loss": 1.2724919319152832, "step": 164 }, { "epoch": 0.03804034582132565, "grad_norm": 0.5579715996476083, "learning_rate": 7.557603686635944e-07, "loss": 1.4015090465545654, "step": 165 }, { "epoch": 0.03827089337175792, "grad_norm": 0.6142689682562157, "learning_rate": 7.603686635944701e-07, "loss": 1.4200658798217773, "step": 166 }, { "epoch": 0.038501440922190204, "grad_norm": 0.550927379145584, "learning_rate": 7.649769585253457e-07, "loss": 1.2477431297302246, "step": 167 }, { "epoch": 0.03873198847262248, "grad_norm": 0.5996206737221976, "learning_rate": 7.695852534562211e-07, "loss": 1.3901419639587402, "step": 168 }, { "epoch": 0.03896253602305476, "grad_norm": 0.6147375477963635, "learning_rate": 7.741935483870967e-07, "loss": 1.4381290674209595, "step": 169 }, { "epoch": 0.03919308357348703, "grad_norm": 0.6270207649449981, "learning_rate": 7.788018433179722e-07, "loss": 1.5323734283447266, "step": 170 }, { "epoch": 0.039423631123919306, "grad_norm": 0.5273848125246383, "learning_rate": 7.834101382488479e-07, "loss": 1.3429911136627197, "step": 171 }, { "epoch": 0.03965417867435159, "grad_norm": 0.7032717723888388, "learning_rate": 7.880184331797235e-07, "loss": 1.5647220611572266, "step": 172 }, { "epoch": 0.03988472622478386, "grad_norm": 0.5934411017478175, "learning_rate": 7.92626728110599e-07, "loss": 1.1879881620407104, "step": 173 }, { "epoch": 0.04011527377521614, "grad_norm": 0.5726555140125118, "learning_rate": 7.972350230414746e-07, "loss": 1.4102849960327148, "step": 174 }, { "epoch": 0.040345821325648415, "grad_norm": 0.616432174195689, "learning_rate": 8.018433179723502e-07, "loss": 1.4521185159683228, "step": 175 }, { "epoch": 0.04057636887608069, "grad_norm": 0.5588880965327565, "learning_rate": 8.064516129032257e-07, "loss": 1.3322495222091675, "step": 176 }, { "epoch": 0.04080691642651297, "grad_norm": 0.6095347282001032, "learning_rate": 8.110599078341014e-07, "loss": 1.3836069107055664, "step": 177 }, { "epoch": 0.04103746397694524, "grad_norm": 0.6033088969243905, "learning_rate": 8.15668202764977e-07, "loss": 1.3639270067214966, "step": 178 }, { "epoch": 0.041268011527377524, "grad_norm": 0.60951279416829, "learning_rate": 8.202764976958525e-07, "loss": 1.3034193515777588, "step": 179 }, { "epoch": 0.0414985590778098, "grad_norm": 0.5682361207295581, "learning_rate": 8.248847926267281e-07, "loss": 1.346369981765747, "step": 180 }, { "epoch": 0.04172910662824208, "grad_norm": 0.5646969989653233, "learning_rate": 8.294930875576036e-07, "loss": 1.2872177362442017, "step": 181 }, { "epoch": 0.04195965417867435, "grad_norm": 0.5336435896443295, "learning_rate": 8.341013824884793e-07, "loss": 1.3010566234588623, "step": 182 }, { "epoch": 0.042190201729106626, "grad_norm": 0.5733249196968232, "learning_rate": 8.387096774193549e-07, "loss": 1.2692077159881592, "step": 183 }, { "epoch": 0.04242074927953891, "grad_norm": 0.5181695473064807, "learning_rate": 8.433179723502303e-07, "loss": 1.2789500951766968, "step": 184 }, { "epoch": 0.04265129682997118, "grad_norm": 0.671579335481803, "learning_rate": 8.479262672811059e-07, "loss": 1.4537731409072876, "step": 185 }, { "epoch": 0.04288184438040346, "grad_norm": 0.5760279966734834, "learning_rate": 8.525345622119815e-07, "loss": 1.2711801528930664, "step": 186 }, { "epoch": 0.043112391930835735, "grad_norm": 0.5840770470208928, "learning_rate": 8.57142857142857e-07, "loss": 1.326183557510376, "step": 187 }, { "epoch": 0.04334293948126801, "grad_norm": 0.6180137341655191, "learning_rate": 8.617511520737327e-07, "loss": 1.3028078079223633, "step": 188 }, { "epoch": 0.04357348703170029, "grad_norm": 0.5721731746298903, "learning_rate": 8.663594470046083e-07, "loss": 1.3517916202545166, "step": 189 }, { "epoch": 0.04380403458213256, "grad_norm": 0.5729165983462421, "learning_rate": 8.709677419354838e-07, "loss": 1.4067044258117676, "step": 190 }, { "epoch": 0.044034582132564844, "grad_norm": 0.541532744989784, "learning_rate": 8.755760368663594e-07, "loss": 1.2318730354309082, "step": 191 }, { "epoch": 0.04426512968299712, "grad_norm": 0.6053194817049817, "learning_rate": 8.801843317972349e-07, "loss": 1.4224486351013184, "step": 192 }, { "epoch": 0.0444956772334294, "grad_norm": 0.5557887004182813, "learning_rate": 8.847926267281106e-07, "loss": 1.2940380573272705, "step": 193 }, { "epoch": 0.04472622478386167, "grad_norm": 0.5691666959738478, "learning_rate": 8.894009216589862e-07, "loss": 1.2765517234802246, "step": 194 }, { "epoch": 0.044956772334293946, "grad_norm": 0.5991065245698037, "learning_rate": 8.940092165898617e-07, "loss": 1.3684041500091553, "step": 195 }, { "epoch": 0.045187319884726226, "grad_norm": 0.6214872461469259, "learning_rate": 8.986175115207373e-07, "loss": 1.26853346824646, "step": 196 }, { "epoch": 0.0454178674351585, "grad_norm": 0.6109731509326806, "learning_rate": 9.032258064516129e-07, "loss": 1.362388014793396, "step": 197 }, { "epoch": 0.04564841498559078, "grad_norm": 0.7382108636991422, "learning_rate": 9.078341013824884e-07, "loss": 1.5644274950027466, "step": 198 }, { "epoch": 0.045878962536023055, "grad_norm": 0.5729864726362653, "learning_rate": 9.124423963133641e-07, "loss": 1.2623369693756104, "step": 199 }, { "epoch": 0.04610951008645533, "grad_norm": 0.5981899078758937, "learning_rate": 9.170506912442397e-07, "loss": 1.3940534591674805, "step": 200 }, { "epoch": 0.04634005763688761, "grad_norm": 0.5996408828481996, "learning_rate": 9.216589861751152e-07, "loss": 1.3285980224609375, "step": 201 }, { "epoch": 0.04657060518731988, "grad_norm": 0.649848642705014, "learning_rate": 9.262672811059907e-07, "loss": 1.5363751649856567, "step": 202 }, { "epoch": 0.046801152737752164, "grad_norm": 0.6186471428262311, "learning_rate": 9.308755760368662e-07, "loss": 1.3822460174560547, "step": 203 }, { "epoch": 0.04703170028818444, "grad_norm": 0.6073589592236044, "learning_rate": 9.354838709677418e-07, "loss": 1.2392113208770752, "step": 204 }, { "epoch": 0.04726224783861672, "grad_norm": 0.5839583864456539, "learning_rate": 9.400921658986175e-07, "loss": 1.2738463878631592, "step": 205 }, { "epoch": 0.04749279538904899, "grad_norm": 0.5799115119327442, "learning_rate": 9.44700460829493e-07, "loss": 1.234877347946167, "step": 206 }, { "epoch": 0.047723342939481266, "grad_norm": 0.623495775301441, "learning_rate": 9.493087557603686e-07, "loss": 1.3901491165161133, "step": 207 }, { "epoch": 0.047953890489913546, "grad_norm": 0.6822208959755598, "learning_rate": 9.539170506912442e-07, "loss": 1.2906568050384521, "step": 208 }, { "epoch": 0.04818443804034582, "grad_norm": 0.677926582170199, "learning_rate": 9.585253456221198e-07, "loss": 1.3887840509414673, "step": 209 }, { "epoch": 0.0484149855907781, "grad_norm": 0.6565533240560858, "learning_rate": 9.631336405529954e-07, "loss": 1.4642484188079834, "step": 210 }, { "epoch": 0.048645533141210374, "grad_norm": 0.6481627320935237, "learning_rate": 9.67741935483871e-07, "loss": 1.3764479160308838, "step": 211 }, { "epoch": 0.04887608069164265, "grad_norm": 0.631978356438684, "learning_rate": 9.723502304147466e-07, "loss": 1.343896746635437, "step": 212 }, { "epoch": 0.04910662824207493, "grad_norm": 0.6740692889548267, "learning_rate": 9.76958525345622e-07, "loss": 1.4437646865844727, "step": 213 }, { "epoch": 0.0493371757925072, "grad_norm": 0.6720558555641115, "learning_rate": 9.815668202764976e-07, "loss": 1.438147783279419, "step": 214 }, { "epoch": 0.04956772334293948, "grad_norm": 0.684406863070909, "learning_rate": 9.861751152073732e-07, "loss": 1.4664554595947266, "step": 215 }, { "epoch": 0.04979827089337176, "grad_norm": 0.5387789373752218, "learning_rate": 9.907834101382488e-07, "loss": 1.1996713876724243, "step": 216 }, { "epoch": 0.05002881844380404, "grad_norm": 0.7065986142812887, "learning_rate": 9.953917050691244e-07, "loss": 1.4759405851364136, "step": 217 }, { "epoch": 0.05025936599423631, "grad_norm": 0.616083267592582, "learning_rate": 1e-06, "loss": 1.2561213970184326, "step": 218 }, { "epoch": 0.050489913544668585, "grad_norm": 0.5674526859382938, "learning_rate": 1.0046082949308756e-06, "loss": 1.1770460605621338, "step": 219 }, { "epoch": 0.050720461095100866, "grad_norm": 0.6008598963516047, "learning_rate": 1.0092165898617511e-06, "loss": 1.2505006790161133, "step": 220 }, { "epoch": 0.05095100864553314, "grad_norm": 0.610874183665047, "learning_rate": 1.0138248847926267e-06, "loss": 1.3262345790863037, "step": 221 }, { "epoch": 0.05118155619596542, "grad_norm": 0.6545962637044704, "learning_rate": 1.0184331797235021e-06, "loss": 1.275759220123291, "step": 222 }, { "epoch": 0.051412103746397694, "grad_norm": 0.6055134335770284, "learning_rate": 1.023041474654378e-06, "loss": 1.26314377784729, "step": 223 }, { "epoch": 0.05164265129682997, "grad_norm": 0.5652742852996497, "learning_rate": 1.0276497695852535e-06, "loss": 1.2621712684631348, "step": 224 }, { "epoch": 0.05187319884726225, "grad_norm": 0.6542335987225151, "learning_rate": 1.032258064516129e-06, "loss": 1.2935044765472412, "step": 225 }, { "epoch": 0.05210374639769452, "grad_norm": 0.6619850782143697, "learning_rate": 1.0368663594470047e-06, "loss": 1.4024615287780762, "step": 226 }, { "epoch": 0.0523342939481268, "grad_norm": 0.6512011773812943, "learning_rate": 1.04147465437788e-06, "loss": 1.3970675468444824, "step": 227 }, { "epoch": 0.05256484149855908, "grad_norm": 0.7012746956320595, "learning_rate": 1.0460829493087557e-06, "loss": 1.3722915649414062, "step": 228 }, { "epoch": 0.05279538904899136, "grad_norm": 0.8098853264626967, "learning_rate": 1.050691244239631e-06, "loss": 1.5259283781051636, "step": 229 }, { "epoch": 0.05302593659942363, "grad_norm": 0.656727684224188, "learning_rate": 1.0552995391705069e-06, "loss": 1.2052996158599854, "step": 230 }, { "epoch": 0.053256484149855905, "grad_norm": 0.6581276402973916, "learning_rate": 1.0599078341013825e-06, "loss": 1.336460828781128, "step": 231 }, { "epoch": 0.053487031700288186, "grad_norm": 0.7539723540951386, "learning_rate": 1.0645161290322579e-06, "loss": 1.4954627752304077, "step": 232 }, { "epoch": 0.05371757925072046, "grad_norm": 0.7919794318433034, "learning_rate": 1.0691244239631337e-06, "loss": 1.5053772926330566, "step": 233 }, { "epoch": 0.05394812680115274, "grad_norm": 0.7129657400690349, "learning_rate": 1.073732718894009e-06, "loss": 1.3133140802383423, "step": 234 }, { "epoch": 0.054178674351585014, "grad_norm": 0.649949904492445, "learning_rate": 1.0783410138248847e-06, "loss": 1.2675721645355225, "step": 235 }, { "epoch": 0.05440922190201729, "grad_norm": 0.7135120090535518, "learning_rate": 1.0829493087557605e-06, "loss": 1.3571391105651855, "step": 236 }, { "epoch": 0.05463976945244957, "grad_norm": 0.6816473641194337, "learning_rate": 1.0875576036866358e-06, "loss": 1.366161584854126, "step": 237 }, { "epoch": 0.05487031700288184, "grad_norm": 0.7159147406875376, "learning_rate": 1.0921658986175114e-06, "loss": 1.4242830276489258, "step": 238 }, { "epoch": 0.05510086455331412, "grad_norm": 0.6832604726478776, "learning_rate": 1.096774193548387e-06, "loss": 1.3941435813903809, "step": 239 }, { "epoch": 0.0553314121037464, "grad_norm": 0.676053870538488, "learning_rate": 1.1013824884792626e-06, "loss": 1.2358953952789307, "step": 240 }, { "epoch": 0.05556195965417868, "grad_norm": 0.694435060104584, "learning_rate": 1.1059907834101382e-06, "loss": 1.3759924173355103, "step": 241 }, { "epoch": 0.05579250720461095, "grad_norm": 0.6008373532438006, "learning_rate": 1.1105990783410138e-06, "loss": 1.2209219932556152, "step": 242 }, { "epoch": 0.056023054755043225, "grad_norm": 0.7605596488241804, "learning_rate": 1.1152073732718894e-06, "loss": 1.5694777965545654, "step": 243 }, { "epoch": 0.056253602305475506, "grad_norm": 0.683498482658377, "learning_rate": 1.1198156682027648e-06, "loss": 1.4109654426574707, "step": 244 }, { "epoch": 0.05648414985590778, "grad_norm": 0.7081776504622735, "learning_rate": 1.1244239631336406e-06, "loss": 1.3836995363235474, "step": 245 }, { "epoch": 0.05671469740634006, "grad_norm": 0.7009333329190326, "learning_rate": 1.1290322580645162e-06, "loss": 1.3234455585479736, "step": 246 }, { "epoch": 0.056945244956772334, "grad_norm": 0.7399576167451694, "learning_rate": 1.1336405529953916e-06, "loss": 1.3350820541381836, "step": 247 }, { "epoch": 0.05717579250720461, "grad_norm": 0.6607735183019582, "learning_rate": 1.1382488479262674e-06, "loss": 1.3644275665283203, "step": 248 }, { "epoch": 0.05740634005763689, "grad_norm": 0.6299746657112861, "learning_rate": 1.1428571428571428e-06, "loss": 1.2501192092895508, "step": 249 }, { "epoch": 0.05763688760806916, "grad_norm": 0.7553557637327969, "learning_rate": 1.1474654377880184e-06, "loss": 1.352830171585083, "step": 250 }, { "epoch": 0.05786743515850144, "grad_norm": 0.7090024654896367, "learning_rate": 1.1520737327188938e-06, "loss": 1.2814360857009888, "step": 251 }, { "epoch": 0.05809798270893372, "grad_norm": 0.6188362792572963, "learning_rate": 1.1566820276497696e-06, "loss": 1.2255218029022217, "step": 252 }, { "epoch": 0.058328530259366, "grad_norm": 0.6924813663949737, "learning_rate": 1.1612903225806452e-06, "loss": 1.3151466846466064, "step": 253 }, { "epoch": 0.05855907780979827, "grad_norm": 0.7272314638515059, "learning_rate": 1.1658986175115205e-06, "loss": 1.3779305219650269, "step": 254 }, { "epoch": 0.058789625360230545, "grad_norm": 0.6935461974752941, "learning_rate": 1.1705069124423963e-06, "loss": 1.2810460329055786, "step": 255 }, { "epoch": 0.059020172910662826, "grad_norm": 0.717205959707173, "learning_rate": 1.1751152073732717e-06, "loss": 1.4521377086639404, "step": 256 }, { "epoch": 0.0592507204610951, "grad_norm": 0.6682723171222476, "learning_rate": 1.1797235023041473e-06, "loss": 1.350247859954834, "step": 257 }, { "epoch": 0.05948126801152738, "grad_norm": 0.7016795378132386, "learning_rate": 1.1843317972350231e-06, "loss": 1.313316822052002, "step": 258 }, { "epoch": 0.059711815561959654, "grad_norm": 0.7343429085377424, "learning_rate": 1.1889400921658985e-06, "loss": 1.3576340675354004, "step": 259 }, { "epoch": 0.05994236311239193, "grad_norm": 0.7069971351993517, "learning_rate": 1.1935483870967741e-06, "loss": 1.232670545578003, "step": 260 }, { "epoch": 0.06017291066282421, "grad_norm": 0.6720886414289914, "learning_rate": 1.1981566820276497e-06, "loss": 1.2778209447860718, "step": 261 }, { "epoch": 0.06040345821325648, "grad_norm": 0.7391359754625508, "learning_rate": 1.2027649769585253e-06, "loss": 1.3562755584716797, "step": 262 }, { "epoch": 0.06063400576368876, "grad_norm": 0.6745648671854169, "learning_rate": 1.207373271889401e-06, "loss": 1.2798476219177246, "step": 263 }, { "epoch": 0.06086455331412104, "grad_norm": 0.6712121780259053, "learning_rate": 1.2119815668202765e-06, "loss": 1.2518937587738037, "step": 264 }, { "epoch": 0.06109510086455332, "grad_norm": 0.6699598442540771, "learning_rate": 1.216589861751152e-06, "loss": 1.394336462020874, "step": 265 }, { "epoch": 0.06132564841498559, "grad_norm": 0.6776471690188184, "learning_rate": 1.2211981566820275e-06, "loss": 1.214491605758667, "step": 266 }, { "epoch": 0.061556195965417865, "grad_norm": 0.6818615172714886, "learning_rate": 1.2258064516129033e-06, "loss": 1.2698123455047607, "step": 267 }, { "epoch": 0.061786743515850145, "grad_norm": 0.6535435362430803, "learning_rate": 1.2304147465437787e-06, "loss": 1.3011083602905273, "step": 268 }, { "epoch": 0.06201729106628242, "grad_norm": 0.6934735806473995, "learning_rate": 1.2350230414746543e-06, "loss": 1.296421766281128, "step": 269 }, { "epoch": 0.0622478386167147, "grad_norm": 0.79226943048109, "learning_rate": 1.23963133640553e-06, "loss": 1.485987901687622, "step": 270 }, { "epoch": 0.062478386167146974, "grad_norm": 0.7765859624895566, "learning_rate": 1.2442396313364054e-06, "loss": 1.3649810552597046, "step": 271 }, { "epoch": 0.06270893371757925, "grad_norm": 0.6341780163248334, "learning_rate": 1.248847926267281e-06, "loss": 1.2397961616516113, "step": 272 }, { "epoch": 0.06293948126801152, "grad_norm": 0.6525113440013135, "learning_rate": 1.2534562211981564e-06, "loss": 1.2815860509872437, "step": 273 }, { "epoch": 0.0631700288184438, "grad_norm": 0.6709241814765708, "learning_rate": 1.2580645161290322e-06, "loss": 1.228407859802246, "step": 274 }, { "epoch": 0.06340057636887608, "grad_norm": 0.5902550603186971, "learning_rate": 1.2626728110599078e-06, "loss": 1.2208014726638794, "step": 275 }, { "epoch": 0.06363112391930836, "grad_norm": 0.6883266896303725, "learning_rate": 1.2672811059907832e-06, "loss": 1.3022860288619995, "step": 276 }, { "epoch": 0.06386167146974063, "grad_norm": 0.6958190963931714, "learning_rate": 1.271889400921659e-06, "loss": 1.2390055656433105, "step": 277 }, { "epoch": 0.06409221902017291, "grad_norm": 0.7060333360910418, "learning_rate": 1.2764976958525344e-06, "loss": 1.2937133312225342, "step": 278 }, { "epoch": 0.06432276657060519, "grad_norm": 0.72113993110492, "learning_rate": 1.28110599078341e-06, "loss": 1.278928279876709, "step": 279 }, { "epoch": 0.06455331412103746, "grad_norm": 0.7956220599215559, "learning_rate": 1.2857142857142858e-06, "loss": 1.3676493167877197, "step": 280 }, { "epoch": 0.06478386167146974, "grad_norm": 0.6685675905435974, "learning_rate": 1.2903225806451612e-06, "loss": 1.2168056964874268, "step": 281 }, { "epoch": 0.06501440922190202, "grad_norm": 0.7514960943888414, "learning_rate": 1.2949308755760368e-06, "loss": 1.3900643587112427, "step": 282 }, { "epoch": 0.0652449567723343, "grad_norm": 0.6534086686230444, "learning_rate": 1.2995391705069124e-06, "loss": 1.2207615375518799, "step": 283 }, { "epoch": 0.06547550432276657, "grad_norm": 0.6869837785111367, "learning_rate": 1.304147465437788e-06, "loss": 1.2372363805770874, "step": 284 }, { "epoch": 0.06570605187319885, "grad_norm": 0.7276088382668475, "learning_rate": 1.3087557603686636e-06, "loss": 1.15517258644104, "step": 285 }, { "epoch": 0.06593659942363113, "grad_norm": 0.7261960848573564, "learning_rate": 1.3133640552995392e-06, "loss": 1.3100334405899048, "step": 286 }, { "epoch": 0.0661671469740634, "grad_norm": 0.795094119655108, "learning_rate": 1.3179723502304148e-06, "loss": 1.4036345481872559, "step": 287 }, { "epoch": 0.06639769452449568, "grad_norm": 0.7130936562568114, "learning_rate": 1.3225806451612901e-06, "loss": 1.2247200012207031, "step": 288 }, { "epoch": 0.06662824207492796, "grad_norm": 0.6327777226077211, "learning_rate": 1.327188940092166e-06, "loss": 1.1968882083892822, "step": 289 }, { "epoch": 0.06685878962536022, "grad_norm": 0.73231661672907, "learning_rate": 1.3317972350230413e-06, "loss": 1.317826271057129, "step": 290 }, { "epoch": 0.0670893371757925, "grad_norm": 0.7205974467953724, "learning_rate": 1.336405529953917e-06, "loss": 1.2956342697143555, "step": 291 }, { "epoch": 0.06731988472622479, "grad_norm": 0.7444236121791392, "learning_rate": 1.3410138248847927e-06, "loss": 1.2325165271759033, "step": 292 }, { "epoch": 0.06755043227665707, "grad_norm": 0.6273978280781463, "learning_rate": 1.3456221198156681e-06, "loss": 1.2119462490081787, "step": 293 }, { "epoch": 0.06778097982708933, "grad_norm": 0.7992045270603186, "learning_rate": 1.3502304147465437e-06, "loss": 1.3506251573562622, "step": 294 }, { "epoch": 0.06801152737752161, "grad_norm": 0.6560610786503305, "learning_rate": 1.354838709677419e-06, "loss": 1.1504114866256714, "step": 295 }, { "epoch": 0.0682420749279539, "grad_norm": 0.6390191765410149, "learning_rate": 1.359447004608295e-06, "loss": 1.1813435554504395, "step": 296 }, { "epoch": 0.06847262247838616, "grad_norm": 0.6787488194794526, "learning_rate": 1.3640552995391705e-06, "loss": 1.205298900604248, "step": 297 }, { "epoch": 0.06870317002881844, "grad_norm": 0.6488526630183898, "learning_rate": 1.3686635944700459e-06, "loss": 1.152748942375183, "step": 298 }, { "epoch": 0.06893371757925072, "grad_norm": 0.6794904899583581, "learning_rate": 1.3732718894009217e-06, "loss": 1.2536249160766602, "step": 299 }, { "epoch": 0.069164265129683, "grad_norm": 0.6751957999851543, "learning_rate": 1.377880184331797e-06, "loss": 1.2739291191101074, "step": 300 }, { "epoch": 0.06939481268011527, "grad_norm": 0.6991854064813895, "learning_rate": 1.3824884792626727e-06, "loss": 1.2369191646575928, "step": 301 }, { "epoch": 0.06962536023054755, "grad_norm": 0.826219529491011, "learning_rate": 1.3870967741935485e-06, "loss": 1.3230082988739014, "step": 302 }, { "epoch": 0.06985590778097983, "grad_norm": 0.808711382879254, "learning_rate": 1.3917050691244239e-06, "loss": 1.3362655639648438, "step": 303 }, { "epoch": 0.0700864553314121, "grad_norm": 0.6609985845459885, "learning_rate": 1.3963133640552995e-06, "loss": 1.1898441314697266, "step": 304 }, { "epoch": 0.07031700288184438, "grad_norm": 0.7674645714085818, "learning_rate": 1.400921658986175e-06, "loss": 1.3019602298736572, "step": 305 }, { "epoch": 0.07054755043227666, "grad_norm": 0.7540320255609526, "learning_rate": 1.4055299539170507e-06, "loss": 1.3292012214660645, "step": 306 }, { "epoch": 0.07077809798270894, "grad_norm": 0.7296176404767546, "learning_rate": 1.410138248847926e-06, "loss": 1.2561442852020264, "step": 307 }, { "epoch": 0.07100864553314121, "grad_norm": 0.7354489519106788, "learning_rate": 1.4147465437788018e-06, "loss": 1.1946594715118408, "step": 308 }, { "epoch": 0.07123919308357349, "grad_norm": 0.8454555568104161, "learning_rate": 1.4193548387096774e-06, "loss": 1.3130412101745605, "step": 309 }, { "epoch": 0.07146974063400577, "grad_norm": 0.7568231549725508, "learning_rate": 1.4239631336405528e-06, "loss": 1.264148235321045, "step": 310 }, { "epoch": 0.07170028818443804, "grad_norm": 0.684674340294116, "learning_rate": 1.4285714285714286e-06, "loss": 1.133709192276001, "step": 311 }, { "epoch": 0.07193083573487032, "grad_norm": 0.7308987546704907, "learning_rate": 1.433179723502304e-06, "loss": 1.2604464292526245, "step": 312 }, { "epoch": 0.0721613832853026, "grad_norm": 0.7914252137423667, "learning_rate": 1.4377880184331796e-06, "loss": 1.386889100074768, "step": 313 }, { "epoch": 0.07239193083573486, "grad_norm": 0.7000597647399976, "learning_rate": 1.4423963133640554e-06, "loss": 1.22135591506958, "step": 314 }, { "epoch": 0.07262247838616714, "grad_norm": 0.702819138693291, "learning_rate": 1.4470046082949308e-06, "loss": 1.2078099250793457, "step": 315 }, { "epoch": 0.07285302593659942, "grad_norm": 0.8775736552686038, "learning_rate": 1.4516129032258064e-06, "loss": 1.3379974365234375, "step": 316 }, { "epoch": 0.0730835734870317, "grad_norm": 0.7634284151011571, "learning_rate": 1.4562211981566818e-06, "loss": 1.219855785369873, "step": 317 }, { "epoch": 0.07331412103746397, "grad_norm": 0.717221305660253, "learning_rate": 1.4608294930875576e-06, "loss": 1.0662527084350586, "step": 318 }, { "epoch": 0.07354466858789625, "grad_norm": 0.7065622523456622, "learning_rate": 1.4654377880184332e-06, "loss": 1.0985239744186401, "step": 319 }, { "epoch": 0.07377521613832853, "grad_norm": 0.6722074850030375, "learning_rate": 1.4700460829493086e-06, "loss": 1.1001049280166626, "step": 320 }, { "epoch": 0.0740057636887608, "grad_norm": 0.6840726731766956, "learning_rate": 1.4746543778801844e-06, "loss": 1.225736141204834, "step": 321 }, { "epoch": 0.07423631123919308, "grad_norm": 0.6852972399988531, "learning_rate": 1.4792626728110598e-06, "loss": 1.1907551288604736, "step": 322 }, { "epoch": 0.07446685878962536, "grad_norm": 0.7595767032333773, "learning_rate": 1.4838709677419353e-06, "loss": 1.1923848390579224, "step": 323 }, { "epoch": 0.07469740634005764, "grad_norm": 0.6912770856373969, "learning_rate": 1.4884792626728112e-06, "loss": 1.1136579513549805, "step": 324 }, { "epoch": 0.07492795389048991, "grad_norm": 0.6984679703551435, "learning_rate": 1.4930875576036865e-06, "loss": 1.1610283851623535, "step": 325 }, { "epoch": 0.07515850144092219, "grad_norm": 0.6677016485987167, "learning_rate": 1.4976958525345621e-06, "loss": 1.1658828258514404, "step": 326 }, { "epoch": 0.07538904899135447, "grad_norm": 0.7507307887297694, "learning_rate": 1.5023041474654377e-06, "loss": 1.1573631763458252, "step": 327 }, { "epoch": 0.07561959654178674, "grad_norm": 0.8364861854285678, "learning_rate": 1.5069124423963133e-06, "loss": 1.305356740951538, "step": 328 }, { "epoch": 0.07585014409221902, "grad_norm": 0.7387762275193068, "learning_rate": 1.5115207373271887e-06, "loss": 1.1737552881240845, "step": 329 }, { "epoch": 0.0760806916426513, "grad_norm": 0.8767163364621563, "learning_rate": 1.5161290322580645e-06, "loss": 1.3644309043884277, "step": 330 }, { "epoch": 0.07631123919308358, "grad_norm": 0.7488441886874779, "learning_rate": 1.5207373271889401e-06, "loss": 1.1332610845565796, "step": 331 }, { "epoch": 0.07654178674351585, "grad_norm": 0.6720106893242441, "learning_rate": 1.5253456221198155e-06, "loss": 1.1542474031448364, "step": 332 }, { "epoch": 0.07677233429394813, "grad_norm": 0.6844904773167374, "learning_rate": 1.5299539170506913e-06, "loss": 1.2047884464263916, "step": 333 }, { "epoch": 0.07700288184438041, "grad_norm": 0.7116834419031486, "learning_rate": 1.5345622119815667e-06, "loss": 1.185925006866455, "step": 334 }, { "epoch": 0.07723342939481267, "grad_norm": 0.8221074928786821, "learning_rate": 1.5391705069124423e-06, "loss": 1.3261258602142334, "step": 335 }, { "epoch": 0.07746397694524496, "grad_norm": 0.7367381450019147, "learning_rate": 1.543778801843318e-06, "loss": 1.226957082748413, "step": 336 }, { "epoch": 0.07769452449567724, "grad_norm": 0.690883992926701, "learning_rate": 1.5483870967741935e-06, "loss": 1.0979371070861816, "step": 337 }, { "epoch": 0.07792507204610952, "grad_norm": 0.6808836094905616, "learning_rate": 1.552995391705069e-06, "loss": 1.2402095794677734, "step": 338 }, { "epoch": 0.07815561959654178, "grad_norm": 0.7713932969797707, "learning_rate": 1.5576036866359445e-06, "loss": 1.123030185699463, "step": 339 }, { "epoch": 0.07838616714697406, "grad_norm": 0.7901531027112338, "learning_rate": 1.5622119815668203e-06, "loss": 1.1617474555969238, "step": 340 }, { "epoch": 0.07861671469740635, "grad_norm": 0.8006480481311214, "learning_rate": 1.5668202764976959e-06, "loss": 1.1931252479553223, "step": 341 }, { "epoch": 0.07884726224783861, "grad_norm": 0.7207543428315331, "learning_rate": 1.5714285714285712e-06, "loss": 1.164405107498169, "step": 342 }, { "epoch": 0.07907780979827089, "grad_norm": 0.7403414009803999, "learning_rate": 1.576036866359447e-06, "loss": 1.1092296838760376, "step": 343 }, { "epoch": 0.07930835734870317, "grad_norm": 0.8301892918218122, "learning_rate": 1.5806451612903224e-06, "loss": 1.2289469242095947, "step": 344 }, { "epoch": 0.07953890489913544, "grad_norm": 0.7791254530131521, "learning_rate": 1.585253456221198e-06, "loss": 1.3640224933624268, "step": 345 }, { "epoch": 0.07976945244956772, "grad_norm": 0.807839732947003, "learning_rate": 1.5898617511520738e-06, "loss": 1.2496929168701172, "step": 346 }, { "epoch": 0.08, "grad_norm": 0.6959074479343652, "learning_rate": 1.5944700460829492e-06, "loss": 1.0853437185287476, "step": 347 }, { "epoch": 0.08023054755043228, "grad_norm": 0.7268485255917756, "learning_rate": 1.5990783410138248e-06, "loss": 1.2237377166748047, "step": 348 }, { "epoch": 0.08046109510086455, "grad_norm": 0.6646199872578112, "learning_rate": 1.6036866359447004e-06, "loss": 0.9917643666267395, "step": 349 }, { "epoch": 0.08069164265129683, "grad_norm": 0.7942775598883802, "learning_rate": 1.608294930875576e-06, "loss": 1.3162565231323242, "step": 350 }, { "epoch": 0.08092219020172911, "grad_norm": 0.6469982944799066, "learning_rate": 1.6129032258064514e-06, "loss": 0.994131863117218, "step": 351 }, { "epoch": 0.08115273775216138, "grad_norm": 0.7099061902752692, "learning_rate": 1.6175115207373272e-06, "loss": 1.154555082321167, "step": 352 }, { "epoch": 0.08138328530259366, "grad_norm": 0.7439801731909884, "learning_rate": 1.6221198156682028e-06, "loss": 1.0531972646713257, "step": 353 }, { "epoch": 0.08161383285302594, "grad_norm": 0.793781848371416, "learning_rate": 1.6267281105990782e-06, "loss": 1.1934162378311157, "step": 354 }, { "epoch": 0.08184438040345822, "grad_norm": 0.7349998365191204, "learning_rate": 1.631336405529954e-06, "loss": 1.19966721534729, "step": 355 }, { "epoch": 0.08207492795389049, "grad_norm": 0.8221740376040937, "learning_rate": 1.6359447004608294e-06, "loss": 1.0795832872390747, "step": 356 }, { "epoch": 0.08230547550432277, "grad_norm": 0.7298340356609088, "learning_rate": 1.640552995391705e-06, "loss": 1.0402061939239502, "step": 357 }, { "epoch": 0.08253602305475505, "grad_norm": 0.7847539046036707, "learning_rate": 1.6451612903225808e-06, "loss": 1.229203462600708, "step": 358 }, { "epoch": 0.08276657060518731, "grad_norm": 0.7172034000711612, "learning_rate": 1.6497695852534561e-06, "loss": 1.1610770225524902, "step": 359 }, { "epoch": 0.0829971181556196, "grad_norm": 0.7669965747112191, "learning_rate": 1.6543778801843317e-06, "loss": 1.1292459964752197, "step": 360 }, { "epoch": 0.08322766570605188, "grad_norm": 0.7517664120896831, "learning_rate": 1.6589861751152071e-06, "loss": 1.1873208284378052, "step": 361 }, { "epoch": 0.08345821325648416, "grad_norm": 0.733900951458625, "learning_rate": 1.663594470046083e-06, "loss": 1.161617398262024, "step": 362 }, { "epoch": 0.08368876080691642, "grad_norm": 0.7247739442724684, "learning_rate": 1.6682027649769585e-06, "loss": 1.0853413343429565, "step": 363 }, { "epoch": 0.0839193083573487, "grad_norm": 0.8591163320147464, "learning_rate": 1.672811059907834e-06, "loss": 1.1864356994628906, "step": 364 }, { "epoch": 0.08414985590778098, "grad_norm": 0.7336734972950607, "learning_rate": 1.6774193548387097e-06, "loss": 1.1111290454864502, "step": 365 }, { "epoch": 0.08438040345821325, "grad_norm": 0.7814288357485446, "learning_rate": 1.682027649769585e-06, "loss": 1.1710078716278076, "step": 366 }, { "epoch": 0.08461095100864553, "grad_norm": 0.6790802315270096, "learning_rate": 1.6866359447004607e-06, "loss": 1.0132228136062622, "step": 367 }, { "epoch": 0.08484149855907781, "grad_norm": 0.8159760904129824, "learning_rate": 1.6912442396313363e-06, "loss": 1.2294046878814697, "step": 368 }, { "epoch": 0.08507204610951008, "grad_norm": 0.7957648972259336, "learning_rate": 1.6958525345622119e-06, "loss": 1.1442105770111084, "step": 369 }, { "epoch": 0.08530259365994236, "grad_norm": 0.7995238723541568, "learning_rate": 1.7004608294930875e-06, "loss": 1.119593620300293, "step": 370 }, { "epoch": 0.08553314121037464, "grad_norm": 0.7863033103100832, "learning_rate": 1.705069124423963e-06, "loss": 1.145449161529541, "step": 371 }, { "epoch": 0.08576368876080692, "grad_norm": 0.7341046610073212, "learning_rate": 1.7096774193548387e-06, "loss": 1.0639642477035522, "step": 372 }, { "epoch": 0.08599423631123919, "grad_norm": 0.9126792061000337, "learning_rate": 1.714285714285714e-06, "loss": 1.1851778030395508, "step": 373 }, { "epoch": 0.08622478386167147, "grad_norm": 0.7973778810449275, "learning_rate": 1.7188940092165899e-06, "loss": 1.0616769790649414, "step": 374 }, { "epoch": 0.08645533141210375, "grad_norm": 0.7901345366558606, "learning_rate": 1.7235023041474655e-06, "loss": 1.0211896896362305, "step": 375 }, { "epoch": 0.08668587896253602, "grad_norm": 0.8252437141593564, "learning_rate": 1.7281105990783408e-06, "loss": 1.1738722324371338, "step": 376 }, { "epoch": 0.0869164265129683, "grad_norm": 0.7679463056914901, "learning_rate": 1.7327188940092167e-06, "loss": 1.0378369092941284, "step": 377 }, { "epoch": 0.08714697406340058, "grad_norm": 0.8131293786091974, "learning_rate": 1.737327188940092e-06, "loss": 1.0998988151550293, "step": 378 }, { "epoch": 0.08737752161383286, "grad_norm": 0.7733685330823146, "learning_rate": 1.7419354838709676e-06, "loss": 1.023256540298462, "step": 379 }, { "epoch": 0.08760806916426513, "grad_norm": 0.7246433117470548, "learning_rate": 1.7465437788018434e-06, "loss": 1.1030439138412476, "step": 380 }, { "epoch": 0.0878386167146974, "grad_norm": 0.7707540243721439, "learning_rate": 1.7511520737327188e-06, "loss": 1.0966073274612427, "step": 381 }, { "epoch": 0.08806916426512969, "grad_norm": 0.7627867300031437, "learning_rate": 1.7557603686635944e-06, "loss": 1.07340669631958, "step": 382 }, { "epoch": 0.08829971181556195, "grad_norm": 0.8500159940576424, "learning_rate": 1.7603686635944698e-06, "loss": 1.14761221408844, "step": 383 }, { "epoch": 0.08853025936599423, "grad_norm": 0.7457463472293449, "learning_rate": 1.7649769585253456e-06, "loss": 0.9445088505744934, "step": 384 }, { "epoch": 0.08876080691642652, "grad_norm": 0.8663122026588986, "learning_rate": 1.7695852534562212e-06, "loss": 1.1617610454559326, "step": 385 }, { "epoch": 0.0889913544668588, "grad_norm": 0.7082642521683337, "learning_rate": 1.7741935483870966e-06, "loss": 0.951229989528656, "step": 386 }, { "epoch": 0.08922190201729106, "grad_norm": 0.7461309802802878, "learning_rate": 1.7788018433179724e-06, "loss": 1.0025156736373901, "step": 387 }, { "epoch": 0.08945244956772334, "grad_norm": 0.8360911278199987, "learning_rate": 1.7834101382488478e-06, "loss": 1.1546887159347534, "step": 388 }, { "epoch": 0.08968299711815562, "grad_norm": 0.7751782140721731, "learning_rate": 1.7880184331797234e-06, "loss": 0.9596165418624878, "step": 389 }, { "epoch": 0.08991354466858789, "grad_norm": 0.7529645387949501, "learning_rate": 1.792626728110599e-06, "loss": 0.9940363168716431, "step": 390 }, { "epoch": 0.09014409221902017, "grad_norm": 0.804620736198686, "learning_rate": 1.7972350230414746e-06, "loss": 1.0265294313430786, "step": 391 }, { "epoch": 0.09037463976945245, "grad_norm": 0.7957136691031254, "learning_rate": 1.8018433179723502e-06, "loss": 0.9495709538459778, "step": 392 }, { "epoch": 0.09060518731988472, "grad_norm": 0.787168756666669, "learning_rate": 1.8064516129032258e-06, "loss": 0.9847695231437683, "step": 393 }, { "epoch": 0.090835734870317, "grad_norm": 0.7881149009057379, "learning_rate": 1.8110599078341013e-06, "loss": 1.0195221900939941, "step": 394 }, { "epoch": 0.09106628242074928, "grad_norm": 0.7999107012228945, "learning_rate": 1.8156682027649767e-06, "loss": 0.9874474406242371, "step": 395 }, { "epoch": 0.09129682997118156, "grad_norm": 0.927208840166958, "learning_rate": 1.8202764976958525e-06, "loss": 1.1311742067337036, "step": 396 }, { "epoch": 0.09152737752161383, "grad_norm": 0.9095065417039184, "learning_rate": 1.8248847926267281e-06, "loss": 1.1371029615402222, "step": 397 }, { "epoch": 0.09175792507204611, "grad_norm": 0.8481304449628981, "learning_rate": 1.8294930875576035e-06, "loss": 1.0090055465698242, "step": 398 }, { "epoch": 0.09198847262247839, "grad_norm": 0.8420788132547982, "learning_rate": 1.8341013824884793e-06, "loss": 1.07207190990448, "step": 399 }, { "epoch": 0.09221902017291066, "grad_norm": 0.8184534103423728, "learning_rate": 1.8387096774193547e-06, "loss": 0.9810532331466675, "step": 400 }, { "epoch": 0.09244956772334294, "grad_norm": 1.0411936731827351, "learning_rate": 1.8433179723502303e-06, "loss": 1.2664501667022705, "step": 401 }, { "epoch": 0.09268011527377522, "grad_norm": 0.7852023561267767, "learning_rate": 1.8479262672811061e-06, "loss": 1.0095962285995483, "step": 402 }, { "epoch": 0.0929106628242075, "grad_norm": 0.7857131234487584, "learning_rate": 1.8525345622119815e-06, "loss": 1.0042834281921387, "step": 403 }, { "epoch": 0.09314121037463977, "grad_norm": 0.7709647586214176, "learning_rate": 1.857142857142857e-06, "loss": 0.8744128942489624, "step": 404 }, { "epoch": 0.09337175792507205, "grad_norm": 0.8530952338978857, "learning_rate": 1.8617511520737325e-06, "loss": 1.0015833377838135, "step": 405 }, { "epoch": 0.09360230547550433, "grad_norm": 0.7989612398012207, "learning_rate": 1.8663594470046083e-06, "loss": 1.0201606750488281, "step": 406 }, { "epoch": 0.0938328530259366, "grad_norm": 1.043996819106173, "learning_rate": 1.8709677419354837e-06, "loss": 1.1015177965164185, "step": 407 }, { "epoch": 0.09406340057636887, "grad_norm": 0.882516706195983, "learning_rate": 1.8755760368663593e-06, "loss": 1.091389775276184, "step": 408 }, { "epoch": 0.09429394812680116, "grad_norm": 0.7840852185129056, "learning_rate": 1.880184331797235e-06, "loss": 0.8727986216545105, "step": 409 }, { "epoch": 0.09452449567723344, "grad_norm": 0.8398591191752447, "learning_rate": 1.8847926267281104e-06, "loss": 0.9092183113098145, "step": 410 }, { "epoch": 0.0947550432276657, "grad_norm": 0.8436593101983947, "learning_rate": 1.889400921658986e-06, "loss": 0.9508894085884094, "step": 411 }, { "epoch": 0.09498559077809798, "grad_norm": 0.7678545338337761, "learning_rate": 1.8940092165898616e-06, "loss": 0.8827848434448242, "step": 412 }, { "epoch": 0.09521613832853026, "grad_norm": 0.7648299956803372, "learning_rate": 1.8986175115207372e-06, "loss": 1.0385243892669678, "step": 413 }, { "epoch": 0.09544668587896253, "grad_norm": 0.8242600559542741, "learning_rate": 1.9032258064516128e-06, "loss": 0.9325747489929199, "step": 414 }, { "epoch": 0.09567723342939481, "grad_norm": 0.7843703854217207, "learning_rate": 1.9078341013824884e-06, "loss": 0.9631662964820862, "step": 415 }, { "epoch": 0.09590778097982709, "grad_norm": 1.1262060148133348, "learning_rate": 1.912442396313364e-06, "loss": 1.1359961032867432, "step": 416 }, { "epoch": 0.09613832853025936, "grad_norm": 0.8039492437719185, "learning_rate": 1.9170506912442396e-06, "loss": 0.9880660772323608, "step": 417 }, { "epoch": 0.09636887608069164, "grad_norm": 0.8001061947110307, "learning_rate": 1.921658986175115e-06, "loss": 0.8506733179092407, "step": 418 }, { "epoch": 0.09659942363112392, "grad_norm": 0.9791271445171249, "learning_rate": 1.926267281105991e-06, "loss": 1.0341942310333252, "step": 419 }, { "epoch": 0.0968299711815562, "grad_norm": 0.7763561172041712, "learning_rate": 1.930875576036866e-06, "loss": 0.8546561002731323, "step": 420 }, { "epoch": 0.09706051873198847, "grad_norm": 0.8024656403802054, "learning_rate": 1.935483870967742e-06, "loss": 0.9161783456802368, "step": 421 }, { "epoch": 0.09729106628242075, "grad_norm": 0.9113560381281316, "learning_rate": 1.9400921658986174e-06, "loss": 1.015718698501587, "step": 422 }, { "epoch": 0.09752161383285303, "grad_norm": 0.8303491458325148, "learning_rate": 1.944700460829493e-06, "loss": 0.9243098497390747, "step": 423 }, { "epoch": 0.0977521613832853, "grad_norm": 0.8465800362864491, "learning_rate": 1.9493087557603686e-06, "loss": 0.8776401281356812, "step": 424 }, { "epoch": 0.09798270893371758, "grad_norm": 0.7514444317981556, "learning_rate": 1.953917050691244e-06, "loss": 0.8500463962554932, "step": 425 }, { "epoch": 0.09821325648414986, "grad_norm": 0.7938198109985205, "learning_rate": 1.9585253456221198e-06, "loss": 0.8716859817504883, "step": 426 }, { "epoch": 0.09844380403458214, "grad_norm": 0.8017507552829808, "learning_rate": 1.963133640552995e-06, "loss": 0.9234505891799927, "step": 427 }, { "epoch": 0.0986743515850144, "grad_norm": 0.7566678903766497, "learning_rate": 1.967741935483871e-06, "loss": 0.9051532745361328, "step": 428 }, { "epoch": 0.09890489913544669, "grad_norm": 0.886957468759461, "learning_rate": 1.9723502304147463e-06, "loss": 0.8781849145889282, "step": 429 }, { "epoch": 0.09913544668587897, "grad_norm": 0.7774885647358278, "learning_rate": 1.976958525345622e-06, "loss": 0.8522506952285767, "step": 430 }, { "epoch": 0.09936599423631123, "grad_norm": 0.7871502402754743, "learning_rate": 1.9815668202764975e-06, "loss": 0.9448544979095459, "step": 431 }, { "epoch": 0.09959654178674351, "grad_norm": 0.7617033311294167, "learning_rate": 1.9861751152073733e-06, "loss": 0.7997490763664246, "step": 432 }, { "epoch": 0.0998270893371758, "grad_norm": 0.7915288270224765, "learning_rate": 1.9907834101382487e-06, "loss": 0.8592349290847778, "step": 433 }, { "epoch": 0.10005763688760808, "grad_norm": 0.818004653923457, "learning_rate": 1.995391705069124e-06, "loss": 0.9532517194747925, "step": 434 }, { "epoch": 0.10028818443804034, "grad_norm": 0.8041219998303657, "learning_rate": 2e-06, "loss": 0.873796820640564, "step": 435 }, { "epoch": 0.10051873198847262, "grad_norm": 0.9934391344021535, "learning_rate": 1.9999999273552013e-06, "loss": 0.9449926614761353, "step": 436 }, { "epoch": 0.1007492795389049, "grad_norm": 0.9938752545804322, "learning_rate": 1.999999709420816e-06, "loss": 0.988682746887207, "step": 437 }, { "epoch": 0.10097982708933717, "grad_norm": 0.7475711832587217, "learning_rate": 1.9999993461968757e-06, "loss": 0.7442165017127991, "step": 438 }, { "epoch": 0.10121037463976945, "grad_norm": 0.7463599495804439, "learning_rate": 1.9999988376834334e-06, "loss": 0.8737642168998718, "step": 439 }, { "epoch": 0.10144092219020173, "grad_norm": 0.8576731282949522, "learning_rate": 1.9999981838805625e-06, "loss": 0.9158309698104858, "step": 440 }, { "epoch": 0.101671469740634, "grad_norm": 0.8150704084388924, "learning_rate": 1.9999973847883583e-06, "loss": 0.867765486240387, "step": 441 }, { "epoch": 0.10190201729106628, "grad_norm": 0.8553308555715755, "learning_rate": 1.9999964404069368e-06, "loss": 0.8964484930038452, "step": 442 }, { "epoch": 0.10213256484149856, "grad_norm": 0.7705340065317197, "learning_rate": 1.9999953507364356e-06, "loss": 0.8330350518226624, "step": 443 }, { "epoch": 0.10236311239193084, "grad_norm": 0.7066282569211535, "learning_rate": 1.9999941157770124e-06, "loss": 0.7785549163818359, "step": 444 }, { "epoch": 0.10259365994236311, "grad_norm": 0.760446701085777, "learning_rate": 1.999992735528847e-06, "loss": 0.8079872131347656, "step": 445 }, { "epoch": 0.10282420749279539, "grad_norm": 0.7523871144342295, "learning_rate": 1.99999120999214e-06, "loss": 0.8472942113876343, "step": 446 }, { "epoch": 0.10305475504322767, "grad_norm": 0.7264431448160646, "learning_rate": 1.9999895391671126e-06, "loss": 0.8861861228942871, "step": 447 }, { "epoch": 0.10328530259365994, "grad_norm": 0.8217421098006639, "learning_rate": 1.999987723054008e-06, "loss": 0.885787308216095, "step": 448 }, { "epoch": 0.10351585014409222, "grad_norm": 0.7285720204415543, "learning_rate": 1.9999857616530898e-06, "loss": 0.6860470771789551, "step": 449 }, { "epoch": 0.1037463976945245, "grad_norm": 0.8092634702328965, "learning_rate": 1.999983654964643e-06, "loss": 0.8421996235847473, "step": 450 }, { "epoch": 0.10397694524495678, "grad_norm": 0.6918798585679139, "learning_rate": 1.999981402988974e-06, "loss": 0.7358509302139282, "step": 451 }, { "epoch": 0.10420749279538905, "grad_norm": 0.8711876840819741, "learning_rate": 1.99997900572641e-06, "loss": 0.9160239100456238, "step": 452 }, { "epoch": 0.10443804034582133, "grad_norm": 0.793739222544006, "learning_rate": 1.9999764631772986e-06, "loss": 0.8336344361305237, "step": 453 }, { "epoch": 0.1046685878962536, "grad_norm": 0.7780053576863255, "learning_rate": 1.999973775342009e-06, "loss": 0.8190123438835144, "step": 454 }, { "epoch": 0.10489913544668587, "grad_norm": 0.808952000995293, "learning_rate": 1.9999709422209335e-06, "loss": 0.8161033391952515, "step": 455 }, { "epoch": 0.10512968299711815, "grad_norm": 0.8989947005366407, "learning_rate": 1.999967963814482e-06, "loss": 0.9421751499176025, "step": 456 }, { "epoch": 0.10536023054755043, "grad_norm": 0.7968643165098177, "learning_rate": 1.999964840123088e-06, "loss": 0.763748288154602, "step": 457 }, { "epoch": 0.10559077809798272, "grad_norm": 0.7481696350009063, "learning_rate": 1.9999615711472054e-06, "loss": 0.8567416667938232, "step": 458 }, { "epoch": 0.10582132564841498, "grad_norm": 1.0087319259323613, "learning_rate": 1.9999581568873087e-06, "loss": 0.9419023394584656, "step": 459 }, { "epoch": 0.10605187319884726, "grad_norm": 0.8346606599446912, "learning_rate": 1.999954597343894e-06, "loss": 0.9568943381309509, "step": 460 }, { "epoch": 0.10628242074927954, "grad_norm": 0.6775195295071811, "learning_rate": 1.9999508925174788e-06, "loss": 0.7311264276504517, "step": 461 }, { "epoch": 0.10651296829971181, "grad_norm": 0.7526753824189546, "learning_rate": 1.999947042408601e-06, "loss": 0.7850263118743896, "step": 462 }, { "epoch": 0.10674351585014409, "grad_norm": 0.7457484944783759, "learning_rate": 1.9999430470178204e-06, "loss": 0.7745206356048584, "step": 463 }, { "epoch": 0.10697406340057637, "grad_norm": 0.7003739754723187, "learning_rate": 1.9999389063457173e-06, "loss": 0.8353025913238525, "step": 464 }, { "epoch": 0.10720461095100864, "grad_norm": 0.9101417777797033, "learning_rate": 1.999934620392893e-06, "loss": 0.897802472114563, "step": 465 }, { "epoch": 0.10743515850144092, "grad_norm": 0.903369064301118, "learning_rate": 1.999930189159971e-06, "loss": 0.9702463746070862, "step": 466 }, { "epoch": 0.1076657060518732, "grad_norm": 0.7170983912845004, "learning_rate": 1.9999256126475942e-06, "loss": 0.8617191314697266, "step": 467 }, { "epoch": 0.10789625360230548, "grad_norm": 0.7306281027611775, "learning_rate": 1.9999208908564277e-06, "loss": 0.7819014191627502, "step": 468 }, { "epoch": 0.10812680115273775, "grad_norm": 0.827677640050239, "learning_rate": 1.9999160237871578e-06, "loss": 0.8597872257232666, "step": 469 }, { "epoch": 0.10835734870317003, "grad_norm": 0.8728777793339176, "learning_rate": 1.9999110114404922e-06, "loss": 0.7132382392883301, "step": 470 }, { "epoch": 0.10858789625360231, "grad_norm": 0.7146110096996957, "learning_rate": 1.9999058538171577e-06, "loss": 0.8056540489196777, "step": 471 }, { "epoch": 0.10881844380403458, "grad_norm": 0.7959224929560886, "learning_rate": 1.999900550917905e-06, "loss": 0.8378279209136963, "step": 472 }, { "epoch": 0.10904899135446686, "grad_norm": 0.7547724153366271, "learning_rate": 1.9998951027435034e-06, "loss": 0.7748581767082214, "step": 473 }, { "epoch": 0.10927953890489914, "grad_norm": 0.7391603156936171, "learning_rate": 1.9998895092947455e-06, "loss": 0.7897888422012329, "step": 474 }, { "epoch": 0.10951008645533142, "grad_norm": 0.7396833498322501, "learning_rate": 1.999883770572444e-06, "loss": 0.8274221420288086, "step": 475 }, { "epoch": 0.10974063400576368, "grad_norm": 0.7616365712426187, "learning_rate": 1.9998778865774314e-06, "loss": 0.8707382082939148, "step": 476 }, { "epoch": 0.10997118155619597, "grad_norm": 0.7283542403436611, "learning_rate": 1.9998718573105633e-06, "loss": 0.7552956342697144, "step": 477 }, { "epoch": 0.11020172910662825, "grad_norm": 0.6823864451295132, "learning_rate": 1.9998656827727163e-06, "loss": 0.7214533090591431, "step": 478 }, { "epoch": 0.11043227665706051, "grad_norm": 0.763520371282763, "learning_rate": 1.9998593629647873e-06, "loss": 0.8750051856040955, "step": 479 }, { "epoch": 0.1106628242074928, "grad_norm": 0.7178374692315476, "learning_rate": 1.9998528978876937e-06, "loss": 0.7860144376754761, "step": 480 }, { "epoch": 0.11089337175792507, "grad_norm": 0.6859242313007144, "learning_rate": 1.9998462875423753e-06, "loss": 0.757607102394104, "step": 481 }, { "epoch": 0.11112391930835736, "grad_norm": 0.6015793809430244, "learning_rate": 1.9998395319297926e-06, "loss": 0.7959357500076294, "step": 482 }, { "epoch": 0.11135446685878962, "grad_norm": 0.6229506349949882, "learning_rate": 1.9998326310509272e-06, "loss": 0.7823261022567749, "step": 483 }, { "epoch": 0.1115850144092219, "grad_norm": 0.7767261362242464, "learning_rate": 1.999825584906781e-06, "loss": 0.8102509379386902, "step": 484 }, { "epoch": 0.11181556195965418, "grad_norm": 0.7984930890199812, "learning_rate": 1.999818393498379e-06, "loss": 0.8045464754104614, "step": 485 }, { "epoch": 0.11204610951008645, "grad_norm": 0.7465165784036604, "learning_rate": 1.999811056826765e-06, "loss": 0.7793935537338257, "step": 486 }, { "epoch": 0.11227665706051873, "grad_norm": 0.9275795908823249, "learning_rate": 1.999803574893005e-06, "loss": 0.8787537217140198, "step": 487 }, { "epoch": 0.11250720461095101, "grad_norm": 0.6449054496936255, "learning_rate": 1.9997959476981865e-06, "loss": 0.7439980506896973, "step": 488 }, { "epoch": 0.11273775216138328, "grad_norm": 0.7974021521444602, "learning_rate": 1.999788175243418e-06, "loss": 0.8799367547035217, "step": 489 }, { "epoch": 0.11296829971181556, "grad_norm": 0.6347750262437738, "learning_rate": 1.9997802575298277e-06, "loss": 0.8095611929893494, "step": 490 }, { "epoch": 0.11319884726224784, "grad_norm": 0.678068196789053, "learning_rate": 1.9997721945585666e-06, "loss": 0.7679798603057861, "step": 491 }, { "epoch": 0.11342939481268012, "grad_norm": 0.9415597885304294, "learning_rate": 1.999763986330806e-06, "loss": 0.7401316165924072, "step": 492 }, { "epoch": 0.11365994236311239, "grad_norm": 0.6821887571104429, "learning_rate": 1.9997556328477384e-06, "loss": 0.7155672311782837, "step": 493 }, { "epoch": 0.11389048991354467, "grad_norm": 0.7229913098734038, "learning_rate": 1.9997471341105782e-06, "loss": 0.7619851231575012, "step": 494 }, { "epoch": 0.11412103746397695, "grad_norm": 0.8032976234822868, "learning_rate": 1.999738490120559e-06, "loss": 0.8438892960548401, "step": 495 }, { "epoch": 0.11435158501440922, "grad_norm": 0.9076868092366523, "learning_rate": 1.999729700878937e-06, "loss": 0.8797614574432373, "step": 496 }, { "epoch": 0.1145821325648415, "grad_norm": 0.9089650759995708, "learning_rate": 1.99972076638699e-06, "loss": 0.8678663969039917, "step": 497 }, { "epoch": 0.11481268011527378, "grad_norm": 0.8397478898714018, "learning_rate": 1.9997116866460154e-06, "loss": 0.8200712203979492, "step": 498 }, { "epoch": 0.11504322766570606, "grad_norm": 0.8811759253610771, "learning_rate": 1.9997024616573327e-06, "loss": 0.8951148986816406, "step": 499 }, { "epoch": 0.11527377521613832, "grad_norm": 0.7151742231494962, "learning_rate": 1.9996930914222816e-06, "loss": 0.7555750012397766, "step": 500 }, { "epoch": 0.1155043227665706, "grad_norm": 0.8184974509806977, "learning_rate": 1.9996835759422245e-06, "loss": 0.7769593596458435, "step": 501 }, { "epoch": 0.11573487031700289, "grad_norm": 1.0057466201597953, "learning_rate": 1.999673915218543e-06, "loss": 0.8568921685218811, "step": 502 }, { "epoch": 0.11596541786743515, "grad_norm": 0.6854968317254936, "learning_rate": 1.9996641092526405e-06, "loss": 0.8469095826148987, "step": 503 }, { "epoch": 0.11619596541786743, "grad_norm": 0.6147466487176672, "learning_rate": 1.999654158045943e-06, "loss": 0.6636455059051514, "step": 504 }, { "epoch": 0.11642651296829971, "grad_norm": 0.7667665478590456, "learning_rate": 1.9996440615998954e-06, "loss": 0.7256879806518555, "step": 505 }, { "epoch": 0.116657060518732, "grad_norm": 0.848039033522635, "learning_rate": 1.9996338199159648e-06, "loss": 0.755784273147583, "step": 506 }, { "epoch": 0.11688760806916426, "grad_norm": 0.7864448678312735, "learning_rate": 1.9996234329956387e-06, "loss": 0.8655821084976196, "step": 507 }, { "epoch": 0.11711815561959654, "grad_norm": 0.764272109359362, "learning_rate": 1.9996129008404266e-06, "loss": 0.8127482533454895, "step": 508 }, { "epoch": 0.11734870317002882, "grad_norm": 0.6826490621044304, "learning_rate": 1.999602223451859e-06, "loss": 0.7233914136886597, "step": 509 }, { "epoch": 0.11757925072046109, "grad_norm": 0.6440562693914861, "learning_rate": 1.999591400831487e-06, "loss": 0.687708854675293, "step": 510 }, { "epoch": 0.11780979827089337, "grad_norm": 1.0116475587467835, "learning_rate": 1.9995804329808833e-06, "loss": 0.8101118803024292, "step": 511 }, { "epoch": 0.11804034582132565, "grad_norm": 0.7641615867983651, "learning_rate": 1.999569319901641e-06, "loss": 0.8226180672645569, "step": 512 }, { "epoch": 0.11827089337175793, "grad_norm": 0.7622277313388016, "learning_rate": 1.9995580615953745e-06, "loss": 0.7804063558578491, "step": 513 }, { "epoch": 0.1185014409221902, "grad_norm": 0.7501641804711608, "learning_rate": 1.9995466580637203e-06, "loss": 0.8480167388916016, "step": 514 }, { "epoch": 0.11873198847262248, "grad_norm": 0.7373769561784406, "learning_rate": 1.9995351093083342e-06, "loss": 0.7421882152557373, "step": 515 }, { "epoch": 0.11896253602305476, "grad_norm": 0.792331723623861, "learning_rate": 1.999523415330895e-06, "loss": 0.7909554839134216, "step": 516 }, { "epoch": 0.11919308357348703, "grad_norm": 0.5843007809679732, "learning_rate": 1.999511576133101e-06, "loss": 0.7070448398590088, "step": 517 }, { "epoch": 0.11942363112391931, "grad_norm": 0.6862834382234418, "learning_rate": 1.9994995917166733e-06, "loss": 0.7208311557769775, "step": 518 }, { "epoch": 0.11965417867435159, "grad_norm": 0.8009340684054793, "learning_rate": 1.9994874620833524e-06, "loss": 0.7771036624908447, "step": 519 }, { "epoch": 0.11988472622478386, "grad_norm": 0.627959573002497, "learning_rate": 1.999475187234901e-06, "loss": 0.7144759297370911, "step": 520 }, { "epoch": 0.12011527377521614, "grad_norm": 0.6879525250480599, "learning_rate": 1.9994627671731016e-06, "loss": 0.7363512516021729, "step": 521 }, { "epoch": 0.12034582132564842, "grad_norm": 0.7683035087809761, "learning_rate": 1.9994502018997592e-06, "loss": 0.7519102096557617, "step": 522 }, { "epoch": 0.1205763688760807, "grad_norm": 0.7758582460040493, "learning_rate": 1.9994374914167e-06, "loss": 0.7713091373443604, "step": 523 }, { "epoch": 0.12080691642651296, "grad_norm": 0.7495321802737405, "learning_rate": 1.9994246357257704e-06, "loss": 0.8354437351226807, "step": 524 }, { "epoch": 0.12103746397694524, "grad_norm": 0.6278302590975512, "learning_rate": 1.9994116348288378e-06, "loss": 0.6978895664215088, "step": 525 }, { "epoch": 0.12126801152737753, "grad_norm": 0.7128547735064281, "learning_rate": 1.9993984887277913e-06, "loss": 0.7193020582199097, "step": 526 }, { "epoch": 0.12149855907780979, "grad_norm": 0.8734535525152816, "learning_rate": 1.999385197424541e-06, "loss": 0.891850471496582, "step": 527 }, { "epoch": 0.12172910662824207, "grad_norm": 0.661834209816186, "learning_rate": 1.999371760921018e-06, "loss": 0.7829855680465698, "step": 528 }, { "epoch": 0.12195965417867435, "grad_norm": 0.9881520932349097, "learning_rate": 1.999358179219174e-06, "loss": 0.7942383289337158, "step": 529 }, { "epoch": 0.12219020172910663, "grad_norm": 0.8322324544338607, "learning_rate": 1.9993444523209827e-06, "loss": 0.7958294153213501, "step": 530 }, { "epoch": 0.1224207492795389, "grad_norm": 0.5981241873885679, "learning_rate": 1.9993305802284385e-06, "loss": 0.6725438833236694, "step": 531 }, { "epoch": 0.12265129682997118, "grad_norm": 0.7477005083386355, "learning_rate": 1.9993165629435572e-06, "loss": 0.7068517208099365, "step": 532 }, { "epoch": 0.12288184438040346, "grad_norm": 0.6787076143899758, "learning_rate": 1.999302400468375e-06, "loss": 0.7997239828109741, "step": 533 }, { "epoch": 0.12311239193083573, "grad_norm": 0.7084903189288138, "learning_rate": 1.999288092804949e-06, "loss": 0.7294374704360962, "step": 534 }, { "epoch": 0.12334293948126801, "grad_norm": 0.6987144343357352, "learning_rate": 1.999273639955359e-06, "loss": 0.7457236051559448, "step": 535 }, { "epoch": 0.12357348703170029, "grad_norm": 0.6724082603264683, "learning_rate": 1.999259041921704e-06, "loss": 0.7387409210205078, "step": 536 }, { "epoch": 0.12380403458213257, "grad_norm": 0.6898762574950271, "learning_rate": 1.9992442987061055e-06, "loss": 0.7364134192466736, "step": 537 }, { "epoch": 0.12403458213256484, "grad_norm": 0.6521987981633685, "learning_rate": 1.9992294103107053e-06, "loss": 0.7248586416244507, "step": 538 }, { "epoch": 0.12426512968299712, "grad_norm": 0.7533468153268684, "learning_rate": 1.9992143767376665e-06, "loss": 0.7337394952774048, "step": 539 }, { "epoch": 0.1244956772334294, "grad_norm": 0.7417989879116603, "learning_rate": 1.9991991979891738e-06, "loss": 0.7718614935874939, "step": 540 }, { "epoch": 0.12472622478386167, "grad_norm": 0.6895167987027419, "learning_rate": 1.9991838740674315e-06, "loss": 0.7660986185073853, "step": 541 }, { "epoch": 0.12495677233429395, "grad_norm": 0.6217984048756272, "learning_rate": 1.999168404974667e-06, "loss": 0.6900891065597534, "step": 542 }, { "epoch": 0.12518731988472623, "grad_norm": 0.7509984015500013, "learning_rate": 1.999152790713127e-06, "loss": 0.7981909513473511, "step": 543 }, { "epoch": 0.1254178674351585, "grad_norm": 0.7877345810756031, "learning_rate": 1.999137031285081e-06, "loss": 0.7916548848152161, "step": 544 }, { "epoch": 0.1256484149855908, "grad_norm": 0.9082774378814212, "learning_rate": 1.9991211266928177e-06, "loss": 0.844967246055603, "step": 545 }, { "epoch": 0.12587896253602304, "grad_norm": 0.9213658517355716, "learning_rate": 1.9991050769386483e-06, "loss": 0.7453466653823853, "step": 546 }, { "epoch": 0.12610951008645532, "grad_norm": 0.698098221320302, "learning_rate": 1.999088882024905e-06, "loss": 0.8082910776138306, "step": 547 }, { "epoch": 0.1263400576368876, "grad_norm": 0.6335333912387642, "learning_rate": 1.9990725419539407e-06, "loss": 0.6806755065917969, "step": 548 }, { "epoch": 0.12657060518731988, "grad_norm": 0.7003994944861476, "learning_rate": 1.999056056728129e-06, "loss": 0.7786964774131775, "step": 549 }, { "epoch": 0.12680115273775217, "grad_norm": 0.7647676187763052, "learning_rate": 1.9990394263498648e-06, "loss": 0.7257200479507446, "step": 550 }, { "epoch": 0.12703170028818445, "grad_norm": 0.7175599687438361, "learning_rate": 1.9990226508215653e-06, "loss": 0.8026515245437622, "step": 551 }, { "epoch": 0.12726224783861673, "grad_norm": 0.7080335765217586, "learning_rate": 1.999005730145667e-06, "loss": 0.7155156135559082, "step": 552 }, { "epoch": 0.12749279538904898, "grad_norm": 0.6991736303344278, "learning_rate": 1.9989886643246286e-06, "loss": 0.7604411244392395, "step": 553 }, { "epoch": 0.12772334293948126, "grad_norm": 0.8995696774069738, "learning_rate": 1.9989714533609296e-06, "loss": 0.7291309237480164, "step": 554 }, { "epoch": 0.12795389048991354, "grad_norm": 0.9961871820049175, "learning_rate": 1.9989540972570703e-06, "loss": 0.7417641878128052, "step": 555 }, { "epoch": 0.12818443804034582, "grad_norm": 1.0793796541266174, "learning_rate": 1.998936596015573e-06, "loss": 0.866726279258728, "step": 556 }, { "epoch": 0.1284149855907781, "grad_norm": 0.7057517655951833, "learning_rate": 1.9989189496389797e-06, "loss": 0.7615523338317871, "step": 557 }, { "epoch": 0.12864553314121038, "grad_norm": 0.7155888618962294, "learning_rate": 1.9989011581298546e-06, "loss": 0.7050062417984009, "step": 558 }, { "epoch": 0.12887608069164266, "grad_norm": 0.6853891634641955, "learning_rate": 1.9988832214907824e-06, "loss": 0.6849932074546814, "step": 559 }, { "epoch": 0.12910662824207492, "grad_norm": 0.6575409961311479, "learning_rate": 1.9988651397243698e-06, "loss": 0.674652099609375, "step": 560 }, { "epoch": 0.1293371757925072, "grad_norm": 0.754705339146669, "learning_rate": 1.998846912833243e-06, "loss": 0.7126532196998596, "step": 561 }, { "epoch": 0.12956772334293948, "grad_norm": 0.6947105773602015, "learning_rate": 1.9988285408200503e-06, "loss": 0.7211127281188965, "step": 562 }, { "epoch": 0.12979827089337176, "grad_norm": 0.863971231352938, "learning_rate": 1.998810023687462e-06, "loss": 0.8206064701080322, "step": 563 }, { "epoch": 0.13002881844380404, "grad_norm": 0.7850413983525022, "learning_rate": 1.998791361438167e-06, "loss": 0.6803351640701294, "step": 564 }, { "epoch": 0.13025936599423632, "grad_norm": 0.6937411478964312, "learning_rate": 1.9987725540748777e-06, "loss": 0.700650155544281, "step": 565 }, { "epoch": 0.1304899135446686, "grad_norm": 0.9061416401080528, "learning_rate": 1.9987536016003265e-06, "loss": 0.6874483823776245, "step": 566 }, { "epoch": 0.13072046109510085, "grad_norm": 0.8317311817071676, "learning_rate": 1.9987345040172666e-06, "loss": 0.8148888349533081, "step": 567 }, { "epoch": 0.13095100864553313, "grad_norm": 0.6811216751990012, "learning_rate": 1.998715261328473e-06, "loss": 0.6662083864212036, "step": 568 }, { "epoch": 0.13118155619596542, "grad_norm": 0.7320582363601624, "learning_rate": 1.9986958735367413e-06, "loss": 0.7676659822463989, "step": 569 }, { "epoch": 0.1314121037463977, "grad_norm": 0.8307608690655258, "learning_rate": 1.9986763406448883e-06, "loss": 0.7907297015190125, "step": 570 }, { "epoch": 0.13164265129682998, "grad_norm": 0.6988345305832468, "learning_rate": 1.998656662655752e-06, "loss": 0.6941409111022949, "step": 571 }, { "epoch": 0.13187319884726226, "grad_norm": 0.7762530825788114, "learning_rate": 1.9986368395721916e-06, "loss": 0.674893319606781, "step": 572 }, { "epoch": 0.13210374639769454, "grad_norm": 0.655381279738601, "learning_rate": 1.9986168713970866e-06, "loss": 0.6929521560668945, "step": 573 }, { "epoch": 0.1323342939481268, "grad_norm": 0.8094828471006544, "learning_rate": 1.998596758133339e-06, "loss": 0.7927180528640747, "step": 574 }, { "epoch": 0.13256484149855907, "grad_norm": 0.9363455004086035, "learning_rate": 1.9985764997838708e-06, "loss": 0.8200454711914062, "step": 575 }, { "epoch": 0.13279538904899135, "grad_norm": 0.9063897496222706, "learning_rate": 1.9985560963516248e-06, "loss": 0.7749574184417725, "step": 576 }, { "epoch": 0.13302593659942363, "grad_norm": 0.7429668697491804, "learning_rate": 1.998535547839566e-06, "loss": 0.7536444664001465, "step": 577 }, { "epoch": 0.1332564841498559, "grad_norm": 0.6697558499328553, "learning_rate": 1.9985148542506797e-06, "loss": 0.5805482864379883, "step": 578 }, { "epoch": 0.1334870317002882, "grad_norm": 0.7029078961615552, "learning_rate": 1.998494015587972e-06, "loss": 0.8079668283462524, "step": 579 }, { "epoch": 0.13371757925072045, "grad_norm": 0.7012305802566163, "learning_rate": 1.9984730318544713e-06, "loss": 0.6749714016914368, "step": 580 }, { "epoch": 0.13394812680115273, "grad_norm": 0.7581464486907759, "learning_rate": 1.998451903053226e-06, "loss": 0.7245970368385315, "step": 581 }, { "epoch": 0.134178674351585, "grad_norm": 0.9045791499382148, "learning_rate": 1.9984306291873055e-06, "loss": 0.7243727445602417, "step": 582 }, { "epoch": 0.1344092219020173, "grad_norm": 1.2504748978932971, "learning_rate": 1.9984092102598015e-06, "loss": 0.80174720287323, "step": 583 }, { "epoch": 0.13463976945244957, "grad_norm": 0.7662030678381475, "learning_rate": 1.9983876462738255e-06, "loss": 0.7339394688606262, "step": 584 }, { "epoch": 0.13487031700288185, "grad_norm": 0.8389078489910984, "learning_rate": 1.9983659372325103e-06, "loss": 0.785433292388916, "step": 585 }, { "epoch": 0.13510086455331413, "grad_norm": 0.8035269095479699, "learning_rate": 1.9983440831390103e-06, "loss": 0.7554272413253784, "step": 586 }, { "epoch": 0.13533141210374638, "grad_norm": 0.7515708504793569, "learning_rate": 1.9983220839965005e-06, "loss": 0.6374361515045166, "step": 587 }, { "epoch": 0.13556195965417867, "grad_norm": 0.7429526116241276, "learning_rate": 1.9982999398081773e-06, "loss": 0.7672165632247925, "step": 588 }, { "epoch": 0.13579250720461095, "grad_norm": 0.6689968310029749, "learning_rate": 1.998277650577258e-06, "loss": 0.6684931516647339, "step": 589 }, { "epoch": 0.13602305475504323, "grad_norm": 0.7452264804377413, "learning_rate": 1.998255216306981e-06, "loss": 0.7915084362030029, "step": 590 }, { "epoch": 0.1362536023054755, "grad_norm": 1.4052641353457251, "learning_rate": 1.9982326370006055e-06, "loss": 0.7956736087799072, "step": 591 }, { "epoch": 0.1364841498559078, "grad_norm": 0.6473849699527142, "learning_rate": 1.998209912661412e-06, "loss": 0.6230663061141968, "step": 592 }, { "epoch": 0.13671469740634007, "grad_norm": 0.733416699363805, "learning_rate": 1.998187043292703e-06, "loss": 0.6164396405220032, "step": 593 }, { "epoch": 0.13694524495677232, "grad_norm": 1.02852211395565, "learning_rate": 1.9981640288978004e-06, "loss": 0.7923756837844849, "step": 594 }, { "epoch": 0.1371757925072046, "grad_norm": 0.7916283439941715, "learning_rate": 1.9981408694800478e-06, "loss": 0.752497673034668, "step": 595 }, { "epoch": 0.13740634005763688, "grad_norm": 0.8631260691956123, "learning_rate": 1.998117565042811e-06, "loss": 0.7453225255012512, "step": 596 }, { "epoch": 0.13763688760806916, "grad_norm": 0.8745179307249391, "learning_rate": 1.9980941155894743e-06, "loss": 0.7739442586898804, "step": 597 }, { "epoch": 0.13786743515850144, "grad_norm": 0.7824694345670692, "learning_rate": 1.998070521123446e-06, "loss": 0.7126696109771729, "step": 598 }, { "epoch": 0.13809798270893373, "grad_norm": 0.6923320348843225, "learning_rate": 1.998046781648154e-06, "loss": 0.8027236461639404, "step": 599 }, { "epoch": 0.138328530259366, "grad_norm": 0.926925177155617, "learning_rate": 1.9980228971670465e-06, "loss": 0.7869859933853149, "step": 600 }, { "epoch": 0.13855907780979826, "grad_norm": 0.7384657646970024, "learning_rate": 1.9979988676835945e-06, "loss": 0.6863809823989868, "step": 601 }, { "epoch": 0.13878962536023054, "grad_norm": 0.670320439572825, "learning_rate": 1.9979746932012887e-06, "loss": 0.6836833953857422, "step": 602 }, { "epoch": 0.13902017291066282, "grad_norm": 0.8330589554433049, "learning_rate": 1.997950373723642e-06, "loss": 0.6044712662696838, "step": 603 }, { "epoch": 0.1392507204610951, "grad_norm": 0.7807440800239879, "learning_rate": 1.9979259092541876e-06, "loss": 0.704325795173645, "step": 604 }, { "epoch": 0.13948126801152738, "grad_norm": 0.9973562962562118, "learning_rate": 1.9979012997964796e-06, "loss": 0.8456264734268188, "step": 605 }, { "epoch": 0.13971181556195966, "grad_norm": 0.6848983372751881, "learning_rate": 1.997876545354094e-06, "loss": 0.7414695620536804, "step": 606 }, { "epoch": 0.13994236311239194, "grad_norm": 0.951142668255918, "learning_rate": 1.997851645930627e-06, "loss": 0.8303127288818359, "step": 607 }, { "epoch": 0.1401729106628242, "grad_norm": 0.6104498978549777, "learning_rate": 1.997826601529696e-06, "loss": 0.5950440168380737, "step": 608 }, { "epoch": 0.14040345821325648, "grad_norm": 0.7575557670877245, "learning_rate": 1.9978014121549403e-06, "loss": 0.7623804807662964, "step": 609 }, { "epoch": 0.14063400576368876, "grad_norm": 0.7818079152915381, "learning_rate": 1.9977760778100194e-06, "loss": 0.720527172088623, "step": 610 }, { "epoch": 0.14086455331412104, "grad_norm": 0.7351247394083988, "learning_rate": 1.9977505984986135e-06, "loss": 0.7490646839141846, "step": 611 }, { "epoch": 0.14109510086455332, "grad_norm": 0.8740889756756283, "learning_rate": 1.9977249742244253e-06, "loss": 0.7666628956794739, "step": 612 }, { "epoch": 0.1413256484149856, "grad_norm": 0.8037463276401666, "learning_rate": 1.9976992049911777e-06, "loss": 0.6825721263885498, "step": 613 }, { "epoch": 0.14155619596541788, "grad_norm": 0.9576981414718043, "learning_rate": 1.997673290802614e-06, "loss": 0.7550063729286194, "step": 614 }, { "epoch": 0.14178674351585013, "grad_norm": 0.8551111061457136, "learning_rate": 1.9976472316625005e-06, "loss": 0.687241792678833, "step": 615 }, { "epoch": 0.14201729106628241, "grad_norm": 0.8571830506090616, "learning_rate": 1.9976210275746215e-06, "loss": 0.7312265634536743, "step": 616 }, { "epoch": 0.1422478386167147, "grad_norm": 0.767467152870913, "learning_rate": 1.997594678542786e-06, "loss": 0.7246025204658508, "step": 617 }, { "epoch": 0.14247838616714698, "grad_norm": 0.6447556085599853, "learning_rate": 1.9975681845708214e-06, "loss": 0.6550637483596802, "step": 618 }, { "epoch": 0.14270893371757926, "grad_norm": 0.7539334884870844, "learning_rate": 1.997541545662577e-06, "loss": 0.7373122572898865, "step": 619 }, { "epoch": 0.14293948126801154, "grad_norm": 0.8546078077593016, "learning_rate": 1.997514761821923e-06, "loss": 0.8237804770469666, "step": 620 }, { "epoch": 0.14317002881844382, "grad_norm": 0.8108598991328027, "learning_rate": 1.9974878330527517e-06, "loss": 0.7576577663421631, "step": 621 }, { "epoch": 0.14340057636887607, "grad_norm": 0.9132866549115137, "learning_rate": 1.9974607593589747e-06, "loss": 0.7277255058288574, "step": 622 }, { "epoch": 0.14363112391930835, "grad_norm": 0.8415081855318006, "learning_rate": 1.9974335407445253e-06, "loss": 0.6797576546669006, "step": 623 }, { "epoch": 0.14386167146974063, "grad_norm": 0.8337435130468953, "learning_rate": 1.9974061772133587e-06, "loss": 0.7779988050460815, "step": 624 }, { "epoch": 0.1440922190201729, "grad_norm": 0.8116008410687129, "learning_rate": 1.99737866876945e-06, "loss": 0.663394570350647, "step": 625 }, { "epoch": 0.1443227665706052, "grad_norm": 0.6909233474214471, "learning_rate": 1.9973510154167974e-06, "loss": 0.6705121397972107, "step": 626 }, { "epoch": 0.14455331412103747, "grad_norm": 0.6711778054860513, "learning_rate": 1.9973232171594164e-06, "loss": 0.7186808586120605, "step": 627 }, { "epoch": 0.14478386167146973, "grad_norm": 0.7551679221380408, "learning_rate": 1.997295274001347e-06, "loss": 0.6327730417251587, "step": 628 }, { "epoch": 0.145014409221902, "grad_norm": 0.9897011229130392, "learning_rate": 1.9972671859466493e-06, "loss": 0.8166565895080566, "step": 629 }, { "epoch": 0.1452449567723343, "grad_norm": 0.7048630892808823, "learning_rate": 1.997238952999404e-06, "loss": 0.6133385896682739, "step": 630 }, { "epoch": 0.14547550432276657, "grad_norm": 0.7829850836217312, "learning_rate": 1.9972105751637125e-06, "loss": 0.6579192876815796, "step": 631 }, { "epoch": 0.14570605187319885, "grad_norm": 0.9330944891206956, "learning_rate": 1.9971820524436985e-06, "loss": 0.6703581809997559, "step": 632 }, { "epoch": 0.14593659942363113, "grad_norm": 1.0013138431798316, "learning_rate": 1.9971533848435055e-06, "loss": 0.8173651695251465, "step": 633 }, { "epoch": 0.1461671469740634, "grad_norm": 0.8881150759844724, "learning_rate": 1.997124572367299e-06, "loss": 0.7017172574996948, "step": 634 }, { "epoch": 0.14639769452449566, "grad_norm": 0.882826859802223, "learning_rate": 1.997095615019265e-06, "loss": 0.6777410507202148, "step": 635 }, { "epoch": 0.14662824207492794, "grad_norm": 0.8891372191078558, "learning_rate": 1.9970665128036106e-06, "loss": 0.8573586344718933, "step": 636 }, { "epoch": 0.14685878962536023, "grad_norm": 0.7356113695826332, "learning_rate": 1.9970372657245643e-06, "loss": 0.6344877481460571, "step": 637 }, { "epoch": 0.1470893371757925, "grad_norm": 0.9791168675445887, "learning_rate": 1.997007873786375e-06, "loss": 0.6762720942497253, "step": 638 }, { "epoch": 0.1473198847262248, "grad_norm": 0.800842104744362, "learning_rate": 1.996978336993314e-06, "loss": 0.7008575201034546, "step": 639 }, { "epoch": 0.14755043227665707, "grad_norm": 0.8741772887346724, "learning_rate": 1.9969486553496716e-06, "loss": 0.6853412985801697, "step": 640 }, { "epoch": 0.14778097982708935, "grad_norm": 1.0370740005159675, "learning_rate": 1.9969188288597605e-06, "loss": 0.7110375165939331, "step": 641 }, { "epoch": 0.1480115273775216, "grad_norm": 1.0581908050178295, "learning_rate": 1.996888857527914e-06, "loss": 0.6819010972976685, "step": 642 }, { "epoch": 0.14824207492795388, "grad_norm": 0.8772886208417229, "learning_rate": 1.9968587413584873e-06, "loss": 0.7901614308357239, "step": 643 }, { "epoch": 0.14847262247838616, "grad_norm": 0.745175076561396, "learning_rate": 1.9968284803558555e-06, "loss": 0.7313079833984375, "step": 644 }, { "epoch": 0.14870317002881844, "grad_norm": 0.8465608138783899, "learning_rate": 1.9967980745244156e-06, "loss": 0.6434149742126465, "step": 645 }, { "epoch": 0.14893371757925072, "grad_norm": 0.8410482692497646, "learning_rate": 1.996767523868585e-06, "loss": 0.790850818157196, "step": 646 }, { "epoch": 0.149164265129683, "grad_norm": 0.8004008762577605, "learning_rate": 1.9967368283928023e-06, "loss": 0.8198965191841125, "step": 647 }, { "epoch": 0.14939481268011529, "grad_norm": 0.8482890907318102, "learning_rate": 1.9967059881015266e-06, "loss": 0.6279035806655884, "step": 648 }, { "epoch": 0.14962536023054754, "grad_norm": 0.8852994503658375, "learning_rate": 1.99667500299924e-06, "loss": 0.6705282926559448, "step": 649 }, { "epoch": 0.14985590778097982, "grad_norm": 0.978665350202633, "learning_rate": 1.9966438730904435e-06, "loss": 0.6400725841522217, "step": 650 }, { "epoch": 0.1500864553314121, "grad_norm": 0.714749094089356, "learning_rate": 1.9966125983796603e-06, "loss": 0.6101740002632141, "step": 651 }, { "epoch": 0.15031700288184438, "grad_norm": 0.7346583641916052, "learning_rate": 1.996581178871434e-06, "loss": 0.6152533292770386, "step": 652 }, { "epoch": 0.15054755043227666, "grad_norm": 0.8353434405574146, "learning_rate": 1.9965496145703294e-06, "loss": 0.6547197103500366, "step": 653 }, { "epoch": 0.15077809798270894, "grad_norm": 0.9331778145980045, "learning_rate": 1.996517905480933e-06, "loss": 0.6906094551086426, "step": 654 }, { "epoch": 0.15100864553314122, "grad_norm": 0.8380037292342015, "learning_rate": 1.9964860516078514e-06, "loss": 0.6932255625724792, "step": 655 }, { "epoch": 0.15123919308357348, "grad_norm": 0.7361090355434313, "learning_rate": 1.9964540529557124e-06, "loss": 0.7039364576339722, "step": 656 }, { "epoch": 0.15146974063400576, "grad_norm": 0.8241985864072432, "learning_rate": 1.996421909529166e-06, "loss": 0.6963860392570496, "step": 657 }, { "epoch": 0.15170028818443804, "grad_norm": 0.9014935244681216, "learning_rate": 1.9963896213328814e-06, "loss": 0.604318380355835, "step": 658 }, { "epoch": 0.15193083573487032, "grad_norm": 0.8819571142719487, "learning_rate": 1.99635718837155e-06, "loss": 0.6434466242790222, "step": 659 }, { "epoch": 0.1521613832853026, "grad_norm": 0.9264914863210155, "learning_rate": 1.9963246106498843e-06, "loss": 0.7714329957962036, "step": 660 }, { "epoch": 0.15239193083573488, "grad_norm": 0.8106754232828391, "learning_rate": 1.996291888172617e-06, "loss": 0.6435364484786987, "step": 661 }, { "epoch": 0.15262247838616716, "grad_norm": 0.7565008247244859, "learning_rate": 1.9962590209445026e-06, "loss": 0.646420955657959, "step": 662 }, { "epoch": 0.1528530259365994, "grad_norm": 0.7252882219501727, "learning_rate": 1.9962260089703164e-06, "loss": 0.662223219871521, "step": 663 }, { "epoch": 0.1530835734870317, "grad_norm": 0.8659668858040968, "learning_rate": 1.9961928522548544e-06, "loss": 0.7200876474380493, "step": 664 }, { "epoch": 0.15331412103746397, "grad_norm": 0.7566658933179319, "learning_rate": 1.9961595508029344e-06, "loss": 0.6113970875740051, "step": 665 }, { "epoch": 0.15354466858789625, "grad_norm": 0.8245060640492226, "learning_rate": 1.9961261046193946e-06, "loss": 0.649490237236023, "step": 666 }, { "epoch": 0.15377521613832854, "grad_norm": 0.7578040413722006, "learning_rate": 1.996092513709094e-06, "loss": 0.6530452370643616, "step": 667 }, { "epoch": 0.15400576368876082, "grad_norm": 0.7405695664138088, "learning_rate": 1.9960587780769136e-06, "loss": 0.7147825360298157, "step": 668 }, { "epoch": 0.1542363112391931, "grad_norm": 0.8315791587831606, "learning_rate": 1.9960248977277546e-06, "loss": 0.6634937524795532, "step": 669 }, { "epoch": 0.15446685878962535, "grad_norm": 0.6638255750523387, "learning_rate": 1.995990872666539e-06, "loss": 0.5835539102554321, "step": 670 }, { "epoch": 0.15469740634005763, "grad_norm": 0.9977800574356578, "learning_rate": 1.9959567028982106e-06, "loss": 0.6927201151847839, "step": 671 }, { "epoch": 0.1549279538904899, "grad_norm": 0.8488790358421198, "learning_rate": 1.9959223884277344e-06, "loss": 0.6332941651344299, "step": 672 }, { "epoch": 0.1551585014409222, "grad_norm": 0.7345490192229425, "learning_rate": 1.995887929260096e-06, "loss": 0.6358315944671631, "step": 673 }, { "epoch": 0.15538904899135447, "grad_norm": 0.8810198063378296, "learning_rate": 1.9958533254003004e-06, "loss": 0.7304986715316772, "step": 674 }, { "epoch": 0.15561959654178675, "grad_norm": 0.9410490987859614, "learning_rate": 1.995818576853377e-06, "loss": 0.6921132802963257, "step": 675 }, { "epoch": 0.15585014409221903, "grad_norm": 0.8065213934569144, "learning_rate": 1.995783683624373e-06, "loss": 0.5625938177108765, "step": 676 }, { "epoch": 0.1560806916426513, "grad_norm": 0.7157890295181991, "learning_rate": 1.9957486457183593e-06, "loss": 0.6823471188545227, "step": 677 }, { "epoch": 0.15631123919308357, "grad_norm": 0.8329002490021682, "learning_rate": 1.995713463140426e-06, "loss": 0.645065426826477, "step": 678 }, { "epoch": 0.15654178674351585, "grad_norm": 0.8327299093562661, "learning_rate": 1.9956781358956846e-06, "loss": 0.615644633769989, "step": 679 }, { "epoch": 0.15677233429394813, "grad_norm": 0.9574374641380435, "learning_rate": 1.9956426639892674e-06, "loss": 0.6672168374061584, "step": 680 }, { "epoch": 0.1570028818443804, "grad_norm": 0.7820039633645871, "learning_rate": 1.9956070474263293e-06, "loss": 0.6651773452758789, "step": 681 }, { "epoch": 0.1572334293948127, "grad_norm": 0.8533425761585396, "learning_rate": 1.9955712862120443e-06, "loss": 0.692477822303772, "step": 682 }, { "epoch": 0.15746397694524494, "grad_norm": 0.9212488541381597, "learning_rate": 1.995535380351608e-06, "loss": 0.6489748358726501, "step": 683 }, { "epoch": 0.15769452449567722, "grad_norm": 0.8435974522262087, "learning_rate": 1.9954993298502366e-06, "loss": 0.6771219372749329, "step": 684 }, { "epoch": 0.1579250720461095, "grad_norm": 0.877153187521053, "learning_rate": 1.9954631347131692e-06, "loss": 0.665330171585083, "step": 685 }, { "epoch": 0.15815561959654179, "grad_norm": 1.0295162658348551, "learning_rate": 1.995426794945664e-06, "loss": 0.5750322937965393, "step": 686 }, { "epoch": 0.15838616714697407, "grad_norm": 0.7522809296648445, "learning_rate": 1.9953903105530005e-06, "loss": 0.728310227394104, "step": 687 }, { "epoch": 0.15861671469740635, "grad_norm": 0.9608157515262961, "learning_rate": 1.9953536815404794e-06, "loss": 0.6932300329208374, "step": 688 }, { "epoch": 0.15884726224783863, "grad_norm": 0.8943804935883133, "learning_rate": 1.995316907913423e-06, "loss": 0.6161103844642639, "step": 689 }, { "epoch": 0.15907780979827088, "grad_norm": 0.7775043477204376, "learning_rate": 1.9952799896771744e-06, "loss": 0.6205453872680664, "step": 690 }, { "epoch": 0.15930835734870316, "grad_norm": 0.7737451170401874, "learning_rate": 1.9952429268370964e-06, "loss": 0.7487895488739014, "step": 691 }, { "epoch": 0.15953890489913544, "grad_norm": 0.8997226561911318, "learning_rate": 1.995205719398575e-06, "loss": 0.5926559567451477, "step": 692 }, { "epoch": 0.15976945244956772, "grad_norm": 1.053529919661844, "learning_rate": 1.9951683673670152e-06, "loss": 0.7555570602416992, "step": 693 }, { "epoch": 0.16, "grad_norm": 0.8300153409903857, "learning_rate": 1.995130870747844e-06, "loss": 0.6297062635421753, "step": 694 }, { "epoch": 0.16023054755043228, "grad_norm": 1.0222141785273, "learning_rate": 1.99509322954651e-06, "loss": 0.6655765771865845, "step": 695 }, { "epoch": 0.16046109510086456, "grad_norm": 0.7738556572418642, "learning_rate": 1.995055443768481e-06, "loss": 0.5945572853088379, "step": 696 }, { "epoch": 0.16069164265129682, "grad_norm": 0.956272279349006, "learning_rate": 1.9950175134192473e-06, "loss": 0.646022379398346, "step": 697 }, { "epoch": 0.1609221902017291, "grad_norm": 1.0562448292545954, "learning_rate": 1.99497943850432e-06, "loss": 0.6553836464881897, "step": 698 }, { "epoch": 0.16115273775216138, "grad_norm": 0.8311639966120081, "learning_rate": 1.994941219029231e-06, "loss": 0.6118045449256897, "step": 699 }, { "epoch": 0.16138328530259366, "grad_norm": 0.7402593984430477, "learning_rate": 1.994902854999533e-06, "loss": 0.5951248407363892, "step": 700 }, { "epoch": 0.16161383285302594, "grad_norm": 0.7755953239943872, "learning_rate": 1.9948643464208e-06, "loss": 0.7181081771850586, "step": 701 }, { "epoch": 0.16184438040345822, "grad_norm": 0.710887180335347, "learning_rate": 1.9948256932986264e-06, "loss": 0.5914522409439087, "step": 702 }, { "epoch": 0.1620749279538905, "grad_norm": 0.9033570710857827, "learning_rate": 1.994786895638629e-06, "loss": 0.5532323718070984, "step": 703 }, { "epoch": 0.16230547550432275, "grad_norm": 0.7658441548893321, "learning_rate": 1.994747953446444e-06, "loss": 0.6182093620300293, "step": 704 }, { "epoch": 0.16253602305475504, "grad_norm": 0.7455777185611792, "learning_rate": 1.9947088667277295e-06, "loss": 0.6127386093139648, "step": 705 }, { "epoch": 0.16276657060518732, "grad_norm": 0.7619418353404411, "learning_rate": 1.9946696354881644e-06, "loss": 0.7171872854232788, "step": 706 }, { "epoch": 0.1629971181556196, "grad_norm": 0.9028634018107199, "learning_rate": 1.994630259733449e-06, "loss": 0.6722875833511353, "step": 707 }, { "epoch": 0.16322766570605188, "grad_norm": 0.7912271958302307, "learning_rate": 1.994590739469303e-06, "loss": 0.6032121777534485, "step": 708 }, { "epoch": 0.16345821325648416, "grad_norm": 0.7790480227256618, "learning_rate": 1.9945510747014696e-06, "loss": 0.6348932981491089, "step": 709 }, { "epoch": 0.16368876080691644, "grad_norm": 0.8262533224784293, "learning_rate": 1.9945112654357114e-06, "loss": 0.6864136457443237, "step": 710 }, { "epoch": 0.1639193083573487, "grad_norm": 0.8121807702655695, "learning_rate": 1.9944713116778118e-06, "loss": 0.6009939908981323, "step": 711 }, { "epoch": 0.16414985590778097, "grad_norm": 0.9655971342263576, "learning_rate": 1.994431213433576e-06, "loss": 0.7821159362792969, "step": 712 }, { "epoch": 0.16438040345821325, "grad_norm": 0.9555271008092482, "learning_rate": 1.9943909707088293e-06, "loss": 0.6368619799613953, "step": 713 }, { "epoch": 0.16461095100864553, "grad_norm": 0.7923745246469236, "learning_rate": 1.994350583509419e-06, "loss": 0.5924729704856873, "step": 714 }, { "epoch": 0.16484149855907781, "grad_norm": 0.8669831643763101, "learning_rate": 1.9943100518412137e-06, "loss": 0.6996514797210693, "step": 715 }, { "epoch": 0.1650720461095101, "grad_norm": 0.8402039238329811, "learning_rate": 1.994269375710101e-06, "loss": 0.5582215785980225, "step": 716 }, { "epoch": 0.16530259365994238, "grad_norm": 1.2068222368874355, "learning_rate": 1.994228555121991e-06, "loss": 0.8005632162094116, "step": 717 }, { "epoch": 0.16553314121037463, "grad_norm": 0.8659636397344134, "learning_rate": 1.994187590082815e-06, "loss": 0.6704587936401367, "step": 718 }, { "epoch": 0.1657636887608069, "grad_norm": 0.9519332733008289, "learning_rate": 1.9941464805985242e-06, "loss": 0.6965141296386719, "step": 719 }, { "epoch": 0.1659942363112392, "grad_norm": 0.7982296755018282, "learning_rate": 1.994105226675092e-06, "loss": 0.7778419852256775, "step": 720 }, { "epoch": 0.16622478386167147, "grad_norm": 0.8437919125520283, "learning_rate": 1.9940638283185117e-06, "loss": 0.5983673334121704, "step": 721 }, { "epoch": 0.16645533141210375, "grad_norm": 0.8114559147772581, "learning_rate": 1.994022285534798e-06, "loss": 0.7356874346733093, "step": 722 }, { "epoch": 0.16668587896253603, "grad_norm": 1.0241509316608044, "learning_rate": 1.9939805983299867e-06, "loss": 0.5962327718734741, "step": 723 }, { "epoch": 0.1669164265129683, "grad_norm": 1.103575730726766, "learning_rate": 1.9939387667101354e-06, "loss": 0.751507043838501, "step": 724 }, { "epoch": 0.16714697406340057, "grad_norm": 0.8938560577593438, "learning_rate": 1.9938967906813204e-06, "loss": 0.5894922018051147, "step": 725 }, { "epoch": 0.16737752161383285, "grad_norm": 0.7622931465354307, "learning_rate": 1.993854670249641e-06, "loss": 0.6028705835342407, "step": 726 }, { "epoch": 0.16760806916426513, "grad_norm": 0.8601442524669536, "learning_rate": 1.993812405421217e-06, "loss": 0.6514176726341248, "step": 727 }, { "epoch": 0.1678386167146974, "grad_norm": 0.9323980788065775, "learning_rate": 1.993769996202189e-06, "loss": 0.5832291841506958, "step": 728 }, { "epoch": 0.1680691642651297, "grad_norm": 0.834026784773322, "learning_rate": 1.9937274425987188e-06, "loss": 0.6265125274658203, "step": 729 }, { "epoch": 0.16829971181556197, "grad_norm": 0.8839362204216874, "learning_rate": 1.9936847446169883e-06, "loss": 0.6474361419677734, "step": 730 }, { "epoch": 0.16853025936599422, "grad_norm": 0.9244363591024001, "learning_rate": 1.9936419022632015e-06, "loss": 0.6126378774642944, "step": 731 }, { "epoch": 0.1687608069164265, "grad_norm": 0.7248328619497468, "learning_rate": 1.9935989155435832e-06, "loss": 0.5518519878387451, "step": 732 }, { "epoch": 0.16899135446685878, "grad_norm": 1.020008258225897, "learning_rate": 1.9935557844643786e-06, "loss": 0.641282320022583, "step": 733 }, { "epoch": 0.16922190201729106, "grad_norm": 0.8726481364655894, "learning_rate": 1.9935125090318544e-06, "loss": 0.562978208065033, "step": 734 }, { "epoch": 0.16945244956772335, "grad_norm": 0.9315669775898957, "learning_rate": 1.9934690892522977e-06, "loss": 0.6757839918136597, "step": 735 }, { "epoch": 0.16968299711815563, "grad_norm": 0.8770207431066531, "learning_rate": 1.9934255251320173e-06, "loss": 0.6061424612998962, "step": 736 }, { "epoch": 0.1699135446685879, "grad_norm": 0.9969221705962019, "learning_rate": 1.9933818166773425e-06, "loss": 0.665751576423645, "step": 737 }, { "epoch": 0.17014409221902016, "grad_norm": 1.0458611843133037, "learning_rate": 1.9933379638946237e-06, "loss": 0.7042183876037598, "step": 738 }, { "epoch": 0.17037463976945244, "grad_norm": 0.8154471262488174, "learning_rate": 1.993293966790232e-06, "loss": 0.5603055953979492, "step": 739 }, { "epoch": 0.17060518731988472, "grad_norm": 0.7962101585189754, "learning_rate": 1.99324982537056e-06, "loss": 0.6231967210769653, "step": 740 }, { "epoch": 0.170835734870317, "grad_norm": 0.8275501344055423, "learning_rate": 1.9932055396420214e-06, "loss": 0.6461664438247681, "step": 741 }, { "epoch": 0.17106628242074928, "grad_norm": 0.8713861091012736, "learning_rate": 1.9931611096110492e-06, "loss": 0.5177653431892395, "step": 742 }, { "epoch": 0.17129682997118156, "grad_norm": 1.0666109203407566, "learning_rate": 1.9931165352841003e-06, "loss": 0.6173226833343506, "step": 743 }, { "epoch": 0.17152737752161384, "grad_norm": 0.8795669631473715, "learning_rate": 1.9930718166676494e-06, "loss": 0.6352604627609253, "step": 744 }, { "epoch": 0.1717579250720461, "grad_norm": 1.0227022082976704, "learning_rate": 1.9930269537681946e-06, "loss": 0.5541161298751831, "step": 745 }, { "epoch": 0.17198847262247838, "grad_norm": 0.902318564730913, "learning_rate": 1.9929819465922537e-06, "loss": 0.650184154510498, "step": 746 }, { "epoch": 0.17221902017291066, "grad_norm": 0.9978722892445172, "learning_rate": 1.9929367951463654e-06, "loss": 0.771975040435791, "step": 747 }, { "epoch": 0.17244956772334294, "grad_norm": 0.969046207938326, "learning_rate": 1.9928914994370904e-06, "loss": 0.6753678917884827, "step": 748 }, { "epoch": 0.17268011527377522, "grad_norm": 0.8717387512827466, "learning_rate": 1.992846059471009e-06, "loss": 0.6315422058105469, "step": 749 }, { "epoch": 0.1729106628242075, "grad_norm": 0.8010764362566868, "learning_rate": 1.992800475254724e-06, "loss": 0.6182739734649658, "step": 750 }, { "epoch": 0.17314121037463978, "grad_norm": 0.9934700256786312, "learning_rate": 1.9927547467948576e-06, "loss": 0.6191136837005615, "step": 751 }, { "epoch": 0.17337175792507203, "grad_norm": 0.8955478416270761, "learning_rate": 1.9927088740980536e-06, "loss": 0.5914082527160645, "step": 752 }, { "epoch": 0.17360230547550431, "grad_norm": 0.8872553916343393, "learning_rate": 1.9926628571709777e-06, "loss": 0.7570660710334778, "step": 753 }, { "epoch": 0.1738328530259366, "grad_norm": 0.8275631219312759, "learning_rate": 1.992616696020315e-06, "loss": 0.7140552997589111, "step": 754 }, { "epoch": 0.17406340057636888, "grad_norm": 0.8784414210362078, "learning_rate": 1.992570390652772e-06, "loss": 0.6851143836975098, "step": 755 }, { "epoch": 0.17429394812680116, "grad_norm": 0.9265966905529803, "learning_rate": 1.992523941075077e-06, "loss": 0.6568159461021423, "step": 756 }, { "epoch": 0.17452449567723344, "grad_norm": 0.8027115290319551, "learning_rate": 1.9924773472939785e-06, "loss": 0.6214026212692261, "step": 757 }, { "epoch": 0.17475504322766572, "grad_norm": 1.1556501321398456, "learning_rate": 1.992430609316246e-06, "loss": 0.6564410924911499, "step": 758 }, { "epoch": 0.17498559077809797, "grad_norm": 0.9640812513598375, "learning_rate": 1.9923837271486697e-06, "loss": 0.5648280382156372, "step": 759 }, { "epoch": 0.17521613832853025, "grad_norm": 0.8862992700585784, "learning_rate": 1.9923367007980614e-06, "loss": 0.608031153678894, "step": 760 }, { "epoch": 0.17544668587896253, "grad_norm": 1.0031309058296456, "learning_rate": 1.9922895302712537e-06, "loss": 0.6203290224075317, "step": 761 }, { "epoch": 0.1756772334293948, "grad_norm": 0.7779205294449888, "learning_rate": 1.9922422155751003e-06, "loss": 0.5530174374580383, "step": 762 }, { "epoch": 0.1759077809798271, "grad_norm": 0.8910414656783568, "learning_rate": 1.9921947567164745e-06, "loss": 0.6626535654067993, "step": 763 }, { "epoch": 0.17613832853025937, "grad_norm": 0.9849371373448682, "learning_rate": 1.9921471537022723e-06, "loss": 0.6544541120529175, "step": 764 }, { "epoch": 0.17636887608069166, "grad_norm": 1.0175898233717289, "learning_rate": 1.9920994065394098e-06, "loss": 0.7820296287536621, "step": 765 }, { "epoch": 0.1765994236311239, "grad_norm": 0.8308987980464507, "learning_rate": 1.992051515234824e-06, "loss": 0.5658930540084839, "step": 766 }, { "epoch": 0.1768299711815562, "grad_norm": 1.0108649733600985, "learning_rate": 1.9920034797954734e-06, "loss": 0.6288915872573853, "step": 767 }, { "epoch": 0.17706051873198847, "grad_norm": 0.8972174241474462, "learning_rate": 1.9919553002283366e-06, "loss": 0.5960980653762817, "step": 768 }, { "epoch": 0.17729106628242075, "grad_norm": 0.9596016587654722, "learning_rate": 1.9919069765404136e-06, "loss": 0.6264806389808655, "step": 769 }, { "epoch": 0.17752161383285303, "grad_norm": 1.1687849824342589, "learning_rate": 1.991858508738726e-06, "loss": 0.7963491678237915, "step": 770 }, { "epoch": 0.1777521613832853, "grad_norm": 0.88739443362272, "learning_rate": 1.9918098968303147e-06, "loss": 0.5877400040626526, "step": 771 }, { "epoch": 0.1779827089337176, "grad_norm": 0.9846679485059646, "learning_rate": 1.991761140822243e-06, "loss": 0.6420770883560181, "step": 772 }, { "epoch": 0.17821325648414985, "grad_norm": 1.0021199463672827, "learning_rate": 1.991712240721595e-06, "loss": 0.5594414472579956, "step": 773 }, { "epoch": 0.17844380403458213, "grad_norm": 0.7497950059658199, "learning_rate": 1.9916631965354746e-06, "loss": 0.5771572589874268, "step": 774 }, { "epoch": 0.1786743515850144, "grad_norm": 1.100944925371416, "learning_rate": 1.991614008271008e-06, "loss": 0.6717950105667114, "step": 775 }, { "epoch": 0.1789048991354467, "grad_norm": 1.0132945459281757, "learning_rate": 1.9915646759353416e-06, "loss": 0.6760128736495972, "step": 776 }, { "epoch": 0.17913544668587897, "grad_norm": 0.9410353518724599, "learning_rate": 1.9915151995356425e-06, "loss": 0.6075339913368225, "step": 777 }, { "epoch": 0.17936599423631125, "grad_norm": 1.1007106896730625, "learning_rate": 1.9914655790791e-06, "loss": 0.6417431831359863, "step": 778 }, { "epoch": 0.1795965417867435, "grad_norm": 1.0600536954685635, "learning_rate": 1.9914158145729226e-06, "loss": 0.5260112285614014, "step": 779 }, { "epoch": 0.17982708933717578, "grad_norm": 0.8866673612354495, "learning_rate": 1.9913659060243407e-06, "loss": 0.6338676810264587, "step": 780 }, { "epoch": 0.18005763688760806, "grad_norm": 0.8272489485080615, "learning_rate": 1.991315853440606e-06, "loss": 0.5719002485275269, "step": 781 }, { "epoch": 0.18028818443804034, "grad_norm": 1.090350118782068, "learning_rate": 1.99126565682899e-06, "loss": 0.6078590154647827, "step": 782 }, { "epoch": 0.18051873198847262, "grad_norm": 0.9290187091081005, "learning_rate": 1.991215316196786e-06, "loss": 0.659256100654602, "step": 783 }, { "epoch": 0.1807492795389049, "grad_norm": 0.9147661256460412, "learning_rate": 1.991164831551308e-06, "loss": 0.5777862071990967, "step": 784 }, { "epoch": 0.1809798270893372, "grad_norm": 0.9294731076788324, "learning_rate": 1.9911142028998907e-06, "loss": 0.6155215501785278, "step": 785 }, { "epoch": 0.18121037463976944, "grad_norm": 0.9904733686631906, "learning_rate": 1.9910634302498904e-06, "loss": 0.5919966697692871, "step": 786 }, { "epoch": 0.18144092219020172, "grad_norm": 0.8056232981385001, "learning_rate": 1.991012513608683e-06, "loss": 0.6729590892791748, "step": 787 }, { "epoch": 0.181671469740634, "grad_norm": 1.1208667835091055, "learning_rate": 1.990961452983667e-06, "loss": 0.6397472023963928, "step": 788 }, { "epoch": 0.18190201729106628, "grad_norm": 0.9065849098074823, "learning_rate": 1.9909102483822607e-06, "loss": 0.5506640672683716, "step": 789 }, { "epoch": 0.18213256484149856, "grad_norm": 0.9037026872448599, "learning_rate": 1.9908588998119035e-06, "loss": 0.5978207588195801, "step": 790 }, { "epoch": 0.18236311239193084, "grad_norm": 0.7186623128592075, "learning_rate": 1.9908074072800557e-06, "loss": 0.5460508465766907, "step": 791 }, { "epoch": 0.18259365994236312, "grad_norm": 0.7892780958696924, "learning_rate": 1.990755770794199e-06, "loss": 0.6750969290733337, "step": 792 }, { "epoch": 0.18282420749279538, "grad_norm": 1.3348299809463013, "learning_rate": 1.9907039903618352e-06, "loss": 0.6955918073654175, "step": 793 }, { "epoch": 0.18305475504322766, "grad_norm": 0.7742914275515491, "learning_rate": 1.990652065990488e-06, "loss": 0.5774638652801514, "step": 794 }, { "epoch": 0.18328530259365994, "grad_norm": 1.0579806797579008, "learning_rate": 1.990599997687701e-06, "loss": 0.6769977807998657, "step": 795 }, { "epoch": 0.18351585014409222, "grad_norm": 0.8232976375327974, "learning_rate": 1.9905477854610395e-06, "loss": 0.6034688353538513, "step": 796 }, { "epoch": 0.1837463976945245, "grad_norm": 0.9920075065890873, "learning_rate": 1.990495429318089e-06, "loss": 0.6486461162567139, "step": 797 }, { "epoch": 0.18397694524495678, "grad_norm": 0.8827350375987653, "learning_rate": 1.9904429292664565e-06, "loss": 0.6474006175994873, "step": 798 }, { "epoch": 0.18420749279538906, "grad_norm": 1.0129748972927366, "learning_rate": 1.99039028531377e-06, "loss": 0.6424986124038696, "step": 799 }, { "epoch": 0.1844380403458213, "grad_norm": 0.9671373838473223, "learning_rate": 1.990337497467678e-06, "loss": 0.6319411993026733, "step": 800 }, { "epoch": 0.1846685878962536, "grad_norm": 0.8922602557785707, "learning_rate": 1.9902845657358493e-06, "loss": 0.5928018093109131, "step": 801 }, { "epoch": 0.18489913544668587, "grad_norm": 0.9942140949537916, "learning_rate": 1.9902314901259755e-06, "loss": 0.6054724454879761, "step": 802 }, { "epoch": 0.18512968299711816, "grad_norm": 0.9002048262250127, "learning_rate": 1.9901782706457667e-06, "loss": 0.6309449672698975, "step": 803 }, { "epoch": 0.18536023054755044, "grad_norm": 0.8725199458934897, "learning_rate": 1.9901249073029566e-06, "loss": 0.625114917755127, "step": 804 }, { "epoch": 0.18559077809798272, "grad_norm": 1.0830513836098963, "learning_rate": 1.990071400105297e-06, "loss": 0.6478957533836365, "step": 805 }, { "epoch": 0.185821325648415, "grad_norm": 0.9895928941486563, "learning_rate": 1.9900177490605628e-06, "loss": 0.6707916259765625, "step": 806 }, { "epoch": 0.18605187319884725, "grad_norm": 0.8882427566351567, "learning_rate": 1.9899639541765483e-06, "loss": 0.5120225548744202, "step": 807 }, { "epoch": 0.18628242074927953, "grad_norm": 0.9733618335628766, "learning_rate": 1.98991001546107e-06, "loss": 0.7165584564208984, "step": 808 }, { "epoch": 0.1865129682997118, "grad_norm": 0.8929091718348325, "learning_rate": 1.9898559329219636e-06, "loss": 0.4724568724632263, "step": 809 }, { "epoch": 0.1867435158501441, "grad_norm": 0.9623165111634494, "learning_rate": 1.989801706567088e-06, "loss": 0.7015688419342041, "step": 810 }, { "epoch": 0.18697406340057637, "grad_norm": 0.8369835155480084, "learning_rate": 1.989747336404321e-06, "loss": 0.5083395838737488, "step": 811 }, { "epoch": 0.18720461095100865, "grad_norm": 0.9079243051153807, "learning_rate": 1.9896928224415623e-06, "loss": 0.6297205090522766, "step": 812 }, { "epoch": 0.18743515850144093, "grad_norm": 1.1433534088888602, "learning_rate": 1.989638164686732e-06, "loss": 0.5437130928039551, "step": 813 }, { "epoch": 0.1876657060518732, "grad_norm": 0.9912906490963889, "learning_rate": 1.989583363147771e-06, "loss": 0.5922385454177856, "step": 814 }, { "epoch": 0.18789625360230547, "grad_norm": 0.8463175425409384, "learning_rate": 1.989528417832642e-06, "loss": 0.5371031761169434, "step": 815 }, { "epoch": 0.18812680115273775, "grad_norm": 0.8249505406171526, "learning_rate": 1.989473328749328e-06, "loss": 0.5495747327804565, "step": 816 }, { "epoch": 0.18835734870317003, "grad_norm": 0.9329352547357952, "learning_rate": 1.9894180959058323e-06, "loss": 0.6508893370628357, "step": 817 }, { "epoch": 0.1885878962536023, "grad_norm": 1.0113195463897409, "learning_rate": 1.9893627193101804e-06, "loss": 0.49949508905410767, "step": 818 }, { "epoch": 0.1888184438040346, "grad_norm": 1.1670316270661754, "learning_rate": 1.989307198970417e-06, "loss": 0.6115611791610718, "step": 819 }, { "epoch": 0.18904899135446687, "grad_norm": 1.5920704099480587, "learning_rate": 1.9892515348946094e-06, "loss": 0.6748598217964172, "step": 820 }, { "epoch": 0.18927953890489913, "grad_norm": 0.943253279158822, "learning_rate": 1.989195727090845e-06, "loss": 0.6547701358795166, "step": 821 }, { "epoch": 0.1895100864553314, "grad_norm": 0.960954411586292, "learning_rate": 1.9891397755672314e-06, "loss": 0.6120291948318481, "step": 822 }, { "epoch": 0.1897406340057637, "grad_norm": 1.038147761124326, "learning_rate": 1.9890836803318982e-06, "loss": 0.567481517791748, "step": 823 }, { "epoch": 0.18997118155619597, "grad_norm": 0.9777313935838982, "learning_rate": 1.989027441392996e-06, "loss": 0.6567938327789307, "step": 824 }, { "epoch": 0.19020172910662825, "grad_norm": 0.9020948308980783, "learning_rate": 1.988971058758695e-06, "loss": 0.6386862993240356, "step": 825 }, { "epoch": 0.19043227665706053, "grad_norm": 0.996140220083916, "learning_rate": 1.988914532437187e-06, "loss": 0.6657274961471558, "step": 826 }, { "epoch": 0.1906628242074928, "grad_norm": 0.9530652312449698, "learning_rate": 1.988857862436685e-06, "loss": 0.6014574766159058, "step": 827 }, { "epoch": 0.19089337175792506, "grad_norm": 0.9808303514948067, "learning_rate": 1.988801048765423e-06, "loss": 0.6482441425323486, "step": 828 }, { "epoch": 0.19112391930835734, "grad_norm": 0.9723490419441589, "learning_rate": 1.988744091431654e-06, "loss": 0.6074355244636536, "step": 829 }, { "epoch": 0.19135446685878962, "grad_norm": 0.8677748383178393, "learning_rate": 1.9886869904436544e-06, "loss": 0.609101414680481, "step": 830 }, { "epoch": 0.1915850144092219, "grad_norm": 0.8515757003715301, "learning_rate": 1.988629745809721e-06, "loss": 0.5866901874542236, "step": 831 }, { "epoch": 0.19181556195965418, "grad_norm": 1.1523148131986478, "learning_rate": 1.988572357538169e-06, "loss": 0.6352179050445557, "step": 832 }, { "epoch": 0.19204610951008647, "grad_norm": 0.9505829656120092, "learning_rate": 1.988514825637338e-06, "loss": 0.5662895441055298, "step": 833 }, { "epoch": 0.19227665706051872, "grad_norm": 1.0014129874079212, "learning_rate": 1.988457150115586e-06, "loss": 0.6476075649261475, "step": 834 }, { "epoch": 0.192507204610951, "grad_norm": 0.855790701855717, "learning_rate": 1.988399330981293e-06, "loss": 0.5920289158821106, "step": 835 }, { "epoch": 0.19273775216138328, "grad_norm": 1.2467411442207388, "learning_rate": 1.988341368242859e-06, "loss": 0.7046043276786804, "step": 836 }, { "epoch": 0.19296829971181556, "grad_norm": 0.9167265685067049, "learning_rate": 1.9882832619087057e-06, "loss": 0.5909844636917114, "step": 837 }, { "epoch": 0.19319884726224784, "grad_norm": 1.046085100012217, "learning_rate": 1.9882250119872754e-06, "loss": 0.605388879776001, "step": 838 }, { "epoch": 0.19342939481268012, "grad_norm": 1.5157450979253604, "learning_rate": 1.9881666184870314e-06, "loss": 0.6636893153190613, "step": 839 }, { "epoch": 0.1936599423631124, "grad_norm": 0.9152396125482961, "learning_rate": 1.9881080814164574e-06, "loss": 0.5917089581489563, "step": 840 }, { "epoch": 0.19389048991354466, "grad_norm": 0.8672781191194197, "learning_rate": 1.988049400784058e-06, "loss": 0.6431874632835388, "step": 841 }, { "epoch": 0.19412103746397694, "grad_norm": 0.8347880968744955, "learning_rate": 1.9879905765983593e-06, "loss": 0.5309115648269653, "step": 842 }, { "epoch": 0.19435158501440922, "grad_norm": 1.3518239663287992, "learning_rate": 1.9879316088679076e-06, "loss": 0.5478585362434387, "step": 843 }, { "epoch": 0.1945821325648415, "grad_norm": 1.06976855886381, "learning_rate": 1.9878724976012703e-06, "loss": 0.6842525005340576, "step": 844 }, { "epoch": 0.19481268011527378, "grad_norm": 0.870308127392832, "learning_rate": 1.987813242807036e-06, "loss": 0.5301089286804199, "step": 845 }, { "epoch": 0.19504322766570606, "grad_norm": 0.8463702037283144, "learning_rate": 1.987753844493813e-06, "loss": 0.6047420501708984, "step": 846 }, { "epoch": 0.19527377521613834, "grad_norm": 0.9048551432370087, "learning_rate": 1.9876943026702325e-06, "loss": 0.6532707214355469, "step": 847 }, { "epoch": 0.1955043227665706, "grad_norm": 0.9860575648750018, "learning_rate": 1.9876346173449444e-06, "loss": 0.6906484365463257, "step": 848 }, { "epoch": 0.19573487031700287, "grad_norm": 0.842900243439286, "learning_rate": 1.98757478852662e-06, "loss": 0.5873023867607117, "step": 849 }, { "epoch": 0.19596541786743515, "grad_norm": 0.8944983517133327, "learning_rate": 1.9875148162239534e-06, "loss": 0.5956071615219116, "step": 850 }, { "epoch": 0.19619596541786744, "grad_norm": 1.0725227324782516, "learning_rate": 1.9874547004456562e-06, "loss": 0.6224364042282104, "step": 851 }, { "epoch": 0.19642651296829972, "grad_norm": 0.9179836097168517, "learning_rate": 1.9873944412004633e-06, "loss": 0.5743613839149475, "step": 852 }, { "epoch": 0.196657060518732, "grad_norm": 0.9085386217997975, "learning_rate": 1.98733403849713e-06, "loss": 0.6316232085227966, "step": 853 }, { "epoch": 0.19688760806916428, "grad_norm": 1.1614192101370417, "learning_rate": 1.987273492344432e-06, "loss": 0.6850833892822266, "step": 854 }, { "epoch": 0.19711815561959653, "grad_norm": 0.9615141189589504, "learning_rate": 1.9872128027511656e-06, "loss": 0.6533515453338623, "step": 855 }, { "epoch": 0.1973487031700288, "grad_norm": 1.17455236527873, "learning_rate": 1.987151969726149e-06, "loss": 0.617554783821106, "step": 856 }, { "epoch": 0.1975792507204611, "grad_norm": 1.0764121250331788, "learning_rate": 1.98709099327822e-06, "loss": 0.6278855800628662, "step": 857 }, { "epoch": 0.19780979827089337, "grad_norm": 0.9092814301734246, "learning_rate": 1.9870298734162384e-06, "loss": 0.6289564371109009, "step": 858 }, { "epoch": 0.19804034582132565, "grad_norm": 1.065819744433503, "learning_rate": 1.986968610149084e-06, "loss": 0.5363434553146362, "step": 859 }, { "epoch": 0.19827089337175793, "grad_norm": 0.889097528046652, "learning_rate": 1.986907203485658e-06, "loss": 0.5072166919708252, "step": 860 }, { "epoch": 0.19850144092219021, "grad_norm": 0.9690950352141645, "learning_rate": 1.986845653434882e-06, "loss": 0.6337966918945312, "step": 861 }, { "epoch": 0.19873198847262247, "grad_norm": 1.1682445222467872, "learning_rate": 1.9867839600056984e-06, "loss": 0.6706831455230713, "step": 862 }, { "epoch": 0.19896253602305475, "grad_norm": 1.145903275739384, "learning_rate": 1.9867221232070706e-06, "loss": 0.6381477117538452, "step": 863 }, { "epoch": 0.19919308357348703, "grad_norm": 1.0005395841954985, "learning_rate": 1.9866601430479826e-06, "loss": 0.7144027948379517, "step": 864 }, { "epoch": 0.1994236311239193, "grad_norm": 0.8750080759967084, "learning_rate": 1.98659801953744e-06, "loss": 0.5705598592758179, "step": 865 }, { "epoch": 0.1996541786743516, "grad_norm": 0.9156602793809858, "learning_rate": 1.986535752684469e-06, "loss": 0.5879906415939331, "step": 866 }, { "epoch": 0.19988472622478387, "grad_norm": 0.9724116332795385, "learning_rate": 1.9864733424981155e-06, "loss": 0.5378298759460449, "step": 867 }, { "epoch": 0.20011527377521615, "grad_norm": 1.1694103064412866, "learning_rate": 1.986410788987448e-06, "loss": 0.6873736381530762, "step": 868 }, { "epoch": 0.2003458213256484, "grad_norm": 1.2934294621539983, "learning_rate": 1.9863480921615537e-06, "loss": 0.6900503635406494, "step": 869 }, { "epoch": 0.20057636887608069, "grad_norm": 1.0027549189128846, "learning_rate": 1.9862852520295426e-06, "loss": 0.6074845790863037, "step": 870 }, { "epoch": 0.20080691642651297, "grad_norm": 0.7544689789402186, "learning_rate": 1.9862222686005443e-06, "loss": 0.5881202220916748, "step": 871 }, { "epoch": 0.20103746397694525, "grad_norm": 1.0664423288729972, "learning_rate": 1.98615914188371e-06, "loss": 0.56011962890625, "step": 872 }, { "epoch": 0.20126801152737753, "grad_norm": 0.8988899214507405, "learning_rate": 1.986095871888211e-06, "loss": 0.5858177542686462, "step": 873 }, { "epoch": 0.2014985590778098, "grad_norm": 1.0133619404753802, "learning_rate": 1.9860324586232404e-06, "loss": 0.5983797311782837, "step": 874 }, { "epoch": 0.2017291066282421, "grad_norm": 1.1981047171553392, "learning_rate": 1.985968902098011e-06, "loss": 0.614532470703125, "step": 875 }, { "epoch": 0.20195965417867434, "grad_norm": 0.8750332579169862, "learning_rate": 1.9859052023217564e-06, "loss": 0.6202026009559631, "step": 876 }, { "epoch": 0.20219020172910662, "grad_norm": 1.023100856340593, "learning_rate": 1.9858413593037324e-06, "loss": 0.6846225261688232, "step": 877 }, { "epoch": 0.2024207492795389, "grad_norm": 0.9705265756320178, "learning_rate": 1.9857773730532145e-06, "loss": 0.587134838104248, "step": 878 }, { "epoch": 0.20265129682997118, "grad_norm": 0.9699235709827684, "learning_rate": 1.9857132435794986e-06, "loss": 0.585313081741333, "step": 879 }, { "epoch": 0.20288184438040346, "grad_norm": 0.7607859606406122, "learning_rate": 1.985648970891903e-06, "loss": 0.6294115781784058, "step": 880 }, { "epoch": 0.20311239193083575, "grad_norm": 1.5538153967344521, "learning_rate": 1.9855845549997655e-06, "loss": 0.5994957089424133, "step": 881 }, { "epoch": 0.203342939481268, "grad_norm": 0.8200153437294728, "learning_rate": 1.985519995912445e-06, "loss": 0.5432143211364746, "step": 882 }, { "epoch": 0.20357348703170028, "grad_norm": 1.039970116286451, "learning_rate": 1.9854552936393212e-06, "loss": 0.4527829885482788, "step": 883 }, { "epoch": 0.20380403458213256, "grad_norm": 1.0605902620434908, "learning_rate": 1.985390448189795e-06, "loss": 0.635351836681366, "step": 884 }, { "epoch": 0.20403458213256484, "grad_norm": 1.1235404274480423, "learning_rate": 1.9853254595732867e-06, "loss": 0.6122138500213623, "step": 885 }, { "epoch": 0.20426512968299712, "grad_norm": 0.8680670360235221, "learning_rate": 1.98526032779924e-06, "loss": 0.5663925409317017, "step": 886 }, { "epoch": 0.2044956772334294, "grad_norm": 0.9855268643697941, "learning_rate": 1.985195052877117e-06, "loss": 0.622967004776001, "step": 887 }, { "epoch": 0.20472622478386168, "grad_norm": 1.0340743236415533, "learning_rate": 1.9851296348164013e-06, "loss": 0.5478787422180176, "step": 888 }, { "epoch": 0.20495677233429394, "grad_norm": 0.9432090732932807, "learning_rate": 1.985064073626598e-06, "loss": 0.5574431419372559, "step": 889 }, { "epoch": 0.20518731988472622, "grad_norm": 0.954691206105306, "learning_rate": 1.9849983693172324e-06, "loss": 0.49230653047561646, "step": 890 }, { "epoch": 0.2054178674351585, "grad_norm": 1.0132924882146737, "learning_rate": 1.98493252189785e-06, "loss": 0.6339923143386841, "step": 891 }, { "epoch": 0.20564841498559078, "grad_norm": 1.041505282249045, "learning_rate": 1.9848665313780186e-06, "loss": 0.6324957609176636, "step": 892 }, { "epoch": 0.20587896253602306, "grad_norm": 0.9105419954136654, "learning_rate": 1.984800397767325e-06, "loss": 0.5840550661087036, "step": 893 }, { "epoch": 0.20610951008645534, "grad_norm": 1.0158365608395679, "learning_rate": 1.984734121075379e-06, "loss": 0.5304306149482727, "step": 894 }, { "epoch": 0.20634005763688762, "grad_norm": 0.8993681880640182, "learning_rate": 1.9846677013118088e-06, "loss": 0.5756544470787048, "step": 895 }, { "epoch": 0.20657060518731987, "grad_norm": 1.0683503528028089, "learning_rate": 1.9846011384862652e-06, "loss": 0.6194400787353516, "step": 896 }, { "epoch": 0.20680115273775215, "grad_norm": 0.9824376438411858, "learning_rate": 1.9845344326084185e-06, "loss": 0.6125355362892151, "step": 897 }, { "epoch": 0.20703170028818443, "grad_norm": 1.1037085256416217, "learning_rate": 1.9844675836879606e-06, "loss": 0.598976731300354, "step": 898 }, { "epoch": 0.20726224783861671, "grad_norm": 0.9713841503620613, "learning_rate": 1.984400591734604e-06, "loss": 0.578935444355011, "step": 899 }, { "epoch": 0.207492795389049, "grad_norm": 1.0652265800434835, "learning_rate": 1.9843334567580822e-06, "loss": 0.5881237983703613, "step": 900 }, { "epoch": 0.20772334293948128, "grad_norm": 1.2219068277886247, "learning_rate": 1.9842661787681485e-06, "loss": 0.6280317306518555, "step": 901 }, { "epoch": 0.20795389048991356, "grad_norm": 1.1730342254756378, "learning_rate": 1.9841987577745786e-06, "loss": 0.5957608819007874, "step": 902 }, { "epoch": 0.2081844380403458, "grad_norm": 0.8340813337763022, "learning_rate": 1.9841311937871674e-06, "loss": 0.606480598449707, "step": 903 }, { "epoch": 0.2084149855907781, "grad_norm": 1.0407514976116163, "learning_rate": 1.9840634868157314e-06, "loss": 0.6092411279678345, "step": 904 }, { "epoch": 0.20864553314121037, "grad_norm": 1.0247425642925165, "learning_rate": 1.9839956368701076e-06, "loss": 0.6318541765213013, "step": 905 }, { "epoch": 0.20887608069164265, "grad_norm": 1.1532320918613648, "learning_rate": 1.983927643960155e-06, "loss": 0.6250811219215393, "step": 906 }, { "epoch": 0.20910662824207493, "grad_norm": 1.1313706395340732, "learning_rate": 1.9838595080957506e-06, "loss": 0.6782780885696411, "step": 907 }, { "epoch": 0.2093371757925072, "grad_norm": 1.0695473520404728, "learning_rate": 1.9837912292867946e-06, "loss": 0.5548110008239746, "step": 908 }, { "epoch": 0.2095677233429395, "grad_norm": 0.9444146509070543, "learning_rate": 1.983722807543207e-06, "loss": 0.5968413352966309, "step": 909 }, { "epoch": 0.20979827089337175, "grad_norm": 1.2982877947412854, "learning_rate": 1.983654242874929e-06, "loss": 0.6580274105072021, "step": 910 }, { "epoch": 0.21002881844380403, "grad_norm": 1.0176231954876447, "learning_rate": 1.9835855352919224e-06, "loss": 0.6023098230361938, "step": 911 }, { "epoch": 0.2102593659942363, "grad_norm": 0.9242771902070097, "learning_rate": 1.9835166848041694e-06, "loss": 0.5659872889518738, "step": 912 }, { "epoch": 0.2104899135446686, "grad_norm": 0.8294999124008526, "learning_rate": 1.983447691421674e-06, "loss": 0.5198249220848083, "step": 913 }, { "epoch": 0.21072046109510087, "grad_norm": 1.1591542811764048, "learning_rate": 1.983378555154459e-06, "loss": 0.6395134925842285, "step": 914 }, { "epoch": 0.21095100864553315, "grad_norm": 1.0318636409216984, "learning_rate": 1.98330927601257e-06, "loss": 0.6178885698318481, "step": 915 }, { "epoch": 0.21118155619596543, "grad_norm": 0.9143749378081076, "learning_rate": 1.9832398540060722e-06, "loss": 0.6037659645080566, "step": 916 }, { "epoch": 0.21141210374639768, "grad_norm": 0.9096271537150638, "learning_rate": 1.9831702891450527e-06, "loss": 0.5862294435501099, "step": 917 }, { "epoch": 0.21164265129682996, "grad_norm": 1.3364895989669363, "learning_rate": 1.9831005814396173e-06, "loss": 0.5884512662887573, "step": 918 }, { "epoch": 0.21187319884726225, "grad_norm": 0.920898959799489, "learning_rate": 1.9830307308998944e-06, "loss": 0.6083986759185791, "step": 919 }, { "epoch": 0.21210374639769453, "grad_norm": 0.8631208244450347, "learning_rate": 1.982960737536033e-06, "loss": 0.5277038216590881, "step": 920 }, { "epoch": 0.2123342939481268, "grad_norm": 1.0015447276757887, "learning_rate": 1.9828906013582016e-06, "loss": 0.6744534969329834, "step": 921 }, { "epoch": 0.2125648414985591, "grad_norm": 0.9877926387823571, "learning_rate": 1.9828203223765906e-06, "loss": 0.5403028130531311, "step": 922 }, { "epoch": 0.21279538904899137, "grad_norm": 0.8105271073394711, "learning_rate": 1.9827499006014106e-06, "loss": 0.5237953066825867, "step": 923 }, { "epoch": 0.21302593659942362, "grad_norm": 0.9207602163228722, "learning_rate": 1.982679336042894e-06, "loss": 0.5229809284210205, "step": 924 }, { "epoch": 0.2132564841498559, "grad_norm": 1.0055767309241133, "learning_rate": 1.9826086287112924e-06, "loss": 0.5582294464111328, "step": 925 }, { "epoch": 0.21348703170028818, "grad_norm": 0.8758414895973887, "learning_rate": 1.9825377786168785e-06, "loss": 0.5803326964378357, "step": 926 }, { "epoch": 0.21371757925072046, "grad_norm": 1.0609163639206318, "learning_rate": 1.9824667857699468e-06, "loss": 0.6260091662406921, "step": 927 }, { "epoch": 0.21394812680115274, "grad_norm": 1.0231347292786834, "learning_rate": 1.9823956501808114e-06, "loss": 0.6557651162147522, "step": 928 }, { "epoch": 0.21417867435158502, "grad_norm": 0.9080397919378964, "learning_rate": 1.982324371859808e-06, "loss": 0.613300085067749, "step": 929 }, { "epoch": 0.21440922190201728, "grad_norm": 1.0385007304553873, "learning_rate": 1.9822529508172918e-06, "loss": 0.6877464056015015, "step": 930 }, { "epoch": 0.21463976945244956, "grad_norm": 1.2357325420265879, "learning_rate": 1.9821813870636403e-06, "loss": 0.6349912881851196, "step": 931 }, { "epoch": 0.21487031700288184, "grad_norm": 0.8782361615705243, "learning_rate": 1.9821096806092505e-06, "loss": 0.6005406975746155, "step": 932 }, { "epoch": 0.21510086455331412, "grad_norm": 1.0161666615573182, "learning_rate": 1.982037831464541e-06, "loss": 0.6088801622390747, "step": 933 }, { "epoch": 0.2153314121037464, "grad_norm": 0.9111448870031985, "learning_rate": 1.9819658396399504e-06, "loss": 0.5831236243247986, "step": 934 }, { "epoch": 0.21556195965417868, "grad_norm": 1.0726672557651, "learning_rate": 1.9818937051459387e-06, "loss": 0.6151256561279297, "step": 935 }, { "epoch": 0.21579250720461096, "grad_norm": 0.9951140694484449, "learning_rate": 1.9818214279929858e-06, "loss": 0.5083675384521484, "step": 936 }, { "epoch": 0.21602305475504321, "grad_norm": 0.9882190028949231, "learning_rate": 1.9817490081915933e-06, "loss": 0.5814487934112549, "step": 937 }, { "epoch": 0.2162536023054755, "grad_norm": 1.0890112715760354, "learning_rate": 1.9816764457522826e-06, "loss": 0.6241549253463745, "step": 938 }, { "epoch": 0.21648414985590778, "grad_norm": 1.259145105095718, "learning_rate": 1.981603740685597e-06, "loss": 0.7381168603897095, "step": 939 }, { "epoch": 0.21671469740634006, "grad_norm": 0.9539284871563865, "learning_rate": 1.981530893002099e-06, "loss": 0.5332478880882263, "step": 940 }, { "epoch": 0.21694524495677234, "grad_norm": 1.0612148203565177, "learning_rate": 1.981457902712373e-06, "loss": 0.6405541896820068, "step": 941 }, { "epoch": 0.21717579250720462, "grad_norm": 1.2447747157532114, "learning_rate": 1.9813847698270234e-06, "loss": 0.6976902484893799, "step": 942 }, { "epoch": 0.2174063400576369, "grad_norm": 0.8077530273736168, "learning_rate": 1.981311494356676e-06, "loss": 0.5148216485977173, "step": 943 }, { "epoch": 0.21763688760806915, "grad_norm": 0.9533804720685899, "learning_rate": 1.981238076311977e-06, "loss": 0.612360954284668, "step": 944 }, { "epoch": 0.21786743515850143, "grad_norm": 0.9002805860040698, "learning_rate": 1.981164515703593e-06, "loss": 0.5379883050918579, "step": 945 }, { "epoch": 0.2180979827089337, "grad_norm": 1.1634066704773265, "learning_rate": 1.9810908125422117e-06, "loss": 0.5931693911552429, "step": 946 }, { "epoch": 0.218328530259366, "grad_norm": 1.0807462385025328, "learning_rate": 1.9810169668385415e-06, "loss": 0.610332727432251, "step": 947 }, { "epoch": 0.21855907780979827, "grad_norm": 0.9154396888433077, "learning_rate": 1.980942978603311e-06, "loss": 0.6150614619255066, "step": 948 }, { "epoch": 0.21878962536023056, "grad_norm": 0.8210884658105372, "learning_rate": 1.9808688478472707e-06, "loss": 0.5653204917907715, "step": 949 }, { "epoch": 0.21902017291066284, "grad_norm": 1.0191839025794176, "learning_rate": 1.9807945745811906e-06, "loss": 0.5285670161247253, "step": 950 }, { "epoch": 0.2192507204610951, "grad_norm": 1.0087326366586278, "learning_rate": 1.9807201588158617e-06, "loss": 0.5583071708679199, "step": 951 }, { "epoch": 0.21948126801152737, "grad_norm": 0.845970473922194, "learning_rate": 1.9806456005620957e-06, "loss": 0.5426152944564819, "step": 952 }, { "epoch": 0.21971181556195965, "grad_norm": 1.063629105933386, "learning_rate": 1.9805708998307256e-06, "loss": 0.5979200601577759, "step": 953 }, { "epoch": 0.21994236311239193, "grad_norm": 0.9035517877439286, "learning_rate": 1.9804960566326045e-06, "loss": 0.5606704950332642, "step": 954 }, { "epoch": 0.2201729106628242, "grad_norm": 0.9445059191114737, "learning_rate": 1.9804210709786057e-06, "loss": 0.5354186296463013, "step": 955 }, { "epoch": 0.2204034582132565, "grad_norm": 0.9121007186425357, "learning_rate": 1.980345942879625e-06, "loss": 0.5811333060264587, "step": 956 }, { "epoch": 0.22063400576368877, "grad_norm": 0.9064710646915293, "learning_rate": 1.980270672346577e-06, "loss": 0.6390595436096191, "step": 957 }, { "epoch": 0.22086455331412103, "grad_norm": 0.9905980945153653, "learning_rate": 1.9801952593903983e-06, "loss": 0.5870425701141357, "step": 958 }, { "epoch": 0.2210951008645533, "grad_norm": 1.073439369948722, "learning_rate": 1.9801197040220443e-06, "loss": 0.7697482109069824, "step": 959 }, { "epoch": 0.2213256484149856, "grad_norm": 0.9893905657388904, "learning_rate": 1.980044006252494e-06, "loss": 0.4672078490257263, "step": 960 }, { "epoch": 0.22155619596541787, "grad_norm": 1.0315754655846494, "learning_rate": 1.979968166092744e-06, "loss": 0.7151461839675903, "step": 961 }, { "epoch": 0.22178674351585015, "grad_norm": 1.0107132012823437, "learning_rate": 1.9798921835538147e-06, "loss": 0.6638733148574829, "step": 962 }, { "epoch": 0.22201729106628243, "grad_norm": 1.0380957382757268, "learning_rate": 1.979816058646745e-06, "loss": 0.723508358001709, "step": 963 }, { "epoch": 0.2222478386167147, "grad_norm": 0.9188622865566421, "learning_rate": 1.979739791382594e-06, "loss": 0.5891420841217041, "step": 964 }, { "epoch": 0.22247838616714696, "grad_norm": 0.9724575741379544, "learning_rate": 1.979663381772443e-06, "loss": 0.6365354061126709, "step": 965 }, { "epoch": 0.22270893371757924, "grad_norm": 0.9422715444997953, "learning_rate": 1.979586829827395e-06, "loss": 0.5367093682289124, "step": 966 }, { "epoch": 0.22293948126801152, "grad_norm": 1.0633311974899067, "learning_rate": 1.9795101355585702e-06, "loss": 0.5783185958862305, "step": 967 }, { "epoch": 0.2231700288184438, "grad_norm": 1.0645482046561703, "learning_rate": 1.979433298977113e-06, "loss": 0.5637539625167847, "step": 968 }, { "epoch": 0.22340057636887609, "grad_norm": 1.1944531298271532, "learning_rate": 1.979356320094186e-06, "loss": 0.6345614194869995, "step": 969 }, { "epoch": 0.22363112391930837, "grad_norm": 0.8516167233031302, "learning_rate": 1.9792791989209734e-06, "loss": 0.6066634058952332, "step": 970 }, { "epoch": 0.22386167146974065, "grad_norm": 0.8550340640468586, "learning_rate": 1.9792019354686807e-06, "loss": 0.5921822786331177, "step": 971 }, { "epoch": 0.2240922190201729, "grad_norm": 1.0592142868604846, "learning_rate": 1.9791245297485334e-06, "loss": 0.6421139240264893, "step": 972 }, { "epoch": 0.22432276657060518, "grad_norm": 0.9092290184495944, "learning_rate": 1.9790469817717775e-06, "loss": 0.5608785152435303, "step": 973 }, { "epoch": 0.22455331412103746, "grad_norm": 0.9137300340422447, "learning_rate": 1.97896929154968e-06, "loss": 0.5314462184906006, "step": 974 }, { "epoch": 0.22478386167146974, "grad_norm": 1.1560347021267936, "learning_rate": 1.9788914590935284e-06, "loss": 0.6488084197044373, "step": 975 }, { "epoch": 0.22501440922190202, "grad_norm": 1.1192487958571726, "learning_rate": 1.978813484414631e-06, "loss": 0.6000053882598877, "step": 976 }, { "epoch": 0.2252449567723343, "grad_norm": 0.8931287325899492, "learning_rate": 1.9787353675243162e-06, "loss": 0.624097466468811, "step": 977 }, { "epoch": 0.22547550432276656, "grad_norm": 0.8981584252602057, "learning_rate": 1.9786571084339346e-06, "loss": 0.5497676730155945, "step": 978 }, { "epoch": 0.22570605187319884, "grad_norm": 1.032097826259714, "learning_rate": 1.9785787071548558e-06, "loss": 0.6013498902320862, "step": 979 }, { "epoch": 0.22593659942363112, "grad_norm": 1.0473431651513534, "learning_rate": 1.978500163698471e-06, "loss": 0.5227783918380737, "step": 980 }, { "epoch": 0.2261671469740634, "grad_norm": 1.0552898348513462, "learning_rate": 1.9784214780761912e-06, "loss": 0.5406474471092224, "step": 981 }, { "epoch": 0.22639769452449568, "grad_norm": 1.178464281869854, "learning_rate": 1.9783426502994495e-06, "loss": 0.5630630254745483, "step": 982 }, { "epoch": 0.22662824207492796, "grad_norm": 1.0296512675290248, "learning_rate": 1.9782636803796975e-06, "loss": 0.7161320447921753, "step": 983 }, { "epoch": 0.22685878962536024, "grad_norm": 0.8220598435695787, "learning_rate": 1.97818456832841e-06, "loss": 0.6241968870162964, "step": 984 }, { "epoch": 0.2270893371757925, "grad_norm": 1.0289641391925526, "learning_rate": 1.97810531415708e-06, "loss": 0.6150163412094116, "step": 985 }, { "epoch": 0.22731988472622477, "grad_norm": 0.9798195379296696, "learning_rate": 1.9780259178772236e-06, "loss": 0.5523653626441956, "step": 986 }, { "epoch": 0.22755043227665706, "grad_norm": 0.9247757003196834, "learning_rate": 1.977946379500375e-06, "loss": 0.6309713125228882, "step": 987 }, { "epoch": 0.22778097982708934, "grad_norm": 0.8708946148848759, "learning_rate": 1.977866699038091e-06, "loss": 0.572121798992157, "step": 988 }, { "epoch": 0.22801152737752162, "grad_norm": 1.0646066439636737, "learning_rate": 1.9777868765019477e-06, "loss": 0.5242247581481934, "step": 989 }, { "epoch": 0.2282420749279539, "grad_norm": 0.9115141253954517, "learning_rate": 1.9777069119035435e-06, "loss": 0.6013658046722412, "step": 990 }, { "epoch": 0.22847262247838618, "grad_norm": 1.1012219089888924, "learning_rate": 1.977626805254496e-06, "loss": 0.5432295799255371, "step": 991 }, { "epoch": 0.22870317002881843, "grad_norm": 1.2118421806825892, "learning_rate": 1.9775465565664436e-06, "loss": 0.5512800216674805, "step": 992 }, { "epoch": 0.2289337175792507, "grad_norm": 1.2153606816751235, "learning_rate": 1.9774661658510454e-06, "loss": 0.6150898337364197, "step": 993 }, { "epoch": 0.229164265129683, "grad_norm": 0.8320247490731437, "learning_rate": 1.977385633119982e-06, "loss": 0.6104747653007507, "step": 994 }, { "epoch": 0.22939481268011527, "grad_norm": 1.1044827298829611, "learning_rate": 1.9773049583849537e-06, "loss": 0.7229997515678406, "step": 995 }, { "epoch": 0.22962536023054755, "grad_norm": 0.8296089967891425, "learning_rate": 1.9772241416576814e-06, "loss": 0.49942266941070557, "step": 996 }, { "epoch": 0.22985590778097983, "grad_norm": 1.0511867199438154, "learning_rate": 1.9771431829499075e-06, "loss": 0.6764867901802063, "step": 997 }, { "epoch": 0.23008645533141212, "grad_norm": 0.8769497757034282, "learning_rate": 1.9770620822733943e-06, "loss": 0.5971235036849976, "step": 998 }, { "epoch": 0.23031700288184437, "grad_norm": 1.1909353124607884, "learning_rate": 1.9769808396399244e-06, "loss": 0.49988481402397156, "step": 999 }, { "epoch": 0.23054755043227665, "grad_norm": 1.0028814447039645, "learning_rate": 1.976899455061302e-06, "loss": 0.5656229853630066, "step": 1000 }, { "epoch": 0.23077809798270893, "grad_norm": 0.9811524671709572, "learning_rate": 1.9768179285493505e-06, "loss": 0.618227481842041, "step": 1001 }, { "epoch": 0.2310086455331412, "grad_norm": 0.8840369979000756, "learning_rate": 1.9767362601159163e-06, "loss": 0.599855363368988, "step": 1002 }, { "epoch": 0.2312391930835735, "grad_norm": 0.9428020117039089, "learning_rate": 1.9766544497728645e-06, "loss": 0.6400339603424072, "step": 1003 }, { "epoch": 0.23146974063400577, "grad_norm": 0.7764568185943042, "learning_rate": 1.9765724975320806e-06, "loss": 0.5003043413162231, "step": 1004 }, { "epoch": 0.23170028818443805, "grad_norm": 1.0854087202550196, "learning_rate": 1.976490403405472e-06, "loss": 0.6133515238761902, "step": 1005 }, { "epoch": 0.2319308357348703, "grad_norm": 1.2022521939980604, "learning_rate": 1.9764081674049664e-06, "loss": 0.6931927800178528, "step": 1006 }, { "epoch": 0.23216138328530259, "grad_norm": 0.8703497424794447, "learning_rate": 1.976325789542511e-06, "loss": 0.6216111779212952, "step": 1007 }, { "epoch": 0.23239193083573487, "grad_norm": 1.0933161640642175, "learning_rate": 1.976243269830075e-06, "loss": 0.588605523109436, "step": 1008 }, { "epoch": 0.23262247838616715, "grad_norm": 0.9817338350394261, "learning_rate": 1.9761606082796476e-06, "loss": 0.582482635974884, "step": 1009 }, { "epoch": 0.23285302593659943, "grad_norm": 0.8848970658107764, "learning_rate": 1.9760778049032386e-06, "loss": 0.6296440362930298, "step": 1010 }, { "epoch": 0.2330835734870317, "grad_norm": 1.0232733748116352, "learning_rate": 1.9759948597128785e-06, "loss": 0.6383839845657349, "step": 1011 }, { "epoch": 0.233314121037464, "grad_norm": 1.0868243233359214, "learning_rate": 1.975911772720618e-06, "loss": 0.7672706842422485, "step": 1012 }, { "epoch": 0.23354466858789624, "grad_norm": 1.008765447219668, "learning_rate": 1.9758285439385295e-06, "loss": 0.6237273216247559, "step": 1013 }, { "epoch": 0.23377521613832852, "grad_norm": 0.9625532663927989, "learning_rate": 1.975745173378705e-06, "loss": 0.49947991967201233, "step": 1014 }, { "epoch": 0.2340057636887608, "grad_norm": 1.1387405141282927, "learning_rate": 1.975661661053257e-06, "loss": 0.3864251375198364, "step": 1015 }, { "epoch": 0.23423631123919308, "grad_norm": 1.0401714631169554, "learning_rate": 1.9755780069743194e-06, "loss": 0.6911368370056152, "step": 1016 }, { "epoch": 0.23446685878962537, "grad_norm": 0.912344579821443, "learning_rate": 1.9754942111540463e-06, "loss": 0.5999586582183838, "step": 1017 }, { "epoch": 0.23469740634005765, "grad_norm": 0.8660990428846996, "learning_rate": 1.9754102736046118e-06, "loss": 0.5257225632667542, "step": 1018 }, { "epoch": 0.23492795389048993, "grad_norm": 0.8213356313909403, "learning_rate": 1.975326194338212e-06, "loss": 0.5269970893859863, "step": 1019 }, { "epoch": 0.23515850144092218, "grad_norm": 1.197368044414957, "learning_rate": 1.975241973367062e-06, "loss": 0.6317660212516785, "step": 1020 }, { "epoch": 0.23538904899135446, "grad_norm": 0.9754684221461335, "learning_rate": 1.9751576107033985e-06, "loss": 0.4785078763961792, "step": 1021 }, { "epoch": 0.23561959654178674, "grad_norm": 1.1345512879493804, "learning_rate": 1.9750731063594787e-06, "loss": 0.647568941116333, "step": 1022 }, { "epoch": 0.23585014409221902, "grad_norm": 1.0412315528593523, "learning_rate": 1.9749884603475798e-06, "loss": 0.6356335878372192, "step": 1023 }, { "epoch": 0.2360806916426513, "grad_norm": 0.8615851871429264, "learning_rate": 1.9749036726800003e-06, "loss": 0.5727132558822632, "step": 1024 }, { "epoch": 0.23631123919308358, "grad_norm": 1.0321299541613722, "learning_rate": 1.9748187433690587e-06, "loss": 0.5090360641479492, "step": 1025 }, { "epoch": 0.23654178674351586, "grad_norm": 0.884453476481366, "learning_rate": 1.974733672427095e-06, "loss": 0.5971696376800537, "step": 1026 }, { "epoch": 0.23677233429394812, "grad_norm": 0.9510484245916626, "learning_rate": 1.974648459866468e-06, "loss": 0.5483651161193848, "step": 1027 }, { "epoch": 0.2370028818443804, "grad_norm": 1.200960551078261, "learning_rate": 1.9745631056995594e-06, "loss": 0.5528438091278076, "step": 1028 }, { "epoch": 0.23723342939481268, "grad_norm": 1.3305054944312353, "learning_rate": 1.9744776099387695e-06, "loss": 0.6233264207839966, "step": 1029 }, { "epoch": 0.23746397694524496, "grad_norm": 0.946050950875143, "learning_rate": 1.97439197259652e-06, "loss": 0.6005399823188782, "step": 1030 }, { "epoch": 0.23769452449567724, "grad_norm": 0.9374072373092667, "learning_rate": 1.9743061936852537e-06, "loss": 0.5596457719802856, "step": 1031 }, { "epoch": 0.23792507204610952, "grad_norm": 0.9984185508791168, "learning_rate": 1.9742202732174328e-06, "loss": 0.6051905155181885, "step": 1032 }, { "epoch": 0.23815561959654177, "grad_norm": 1.0283665443374947, "learning_rate": 1.974134211205541e-06, "loss": 0.6691153049468994, "step": 1033 }, { "epoch": 0.23838616714697405, "grad_norm": 0.9603102228958054, "learning_rate": 1.9740480076620814e-06, "loss": 0.5938076972961426, "step": 1034 }, { "epoch": 0.23861671469740633, "grad_norm": 0.8705818698994734, "learning_rate": 1.9739616625995796e-06, "loss": 0.525333046913147, "step": 1035 }, { "epoch": 0.23884726224783862, "grad_norm": 0.8540807816466253, "learning_rate": 1.97387517603058e-06, "loss": 0.6196523308753967, "step": 1036 }, { "epoch": 0.2390778097982709, "grad_norm": 0.9798479702089192, "learning_rate": 1.9737885479676484e-06, "loss": 0.626924991607666, "step": 1037 }, { "epoch": 0.23930835734870318, "grad_norm": 0.999826886746383, "learning_rate": 1.973701778423371e-06, "loss": 0.6047407984733582, "step": 1038 }, { "epoch": 0.23953890489913546, "grad_norm": 0.9299360668604228, "learning_rate": 1.9736148674103543e-06, "loss": 0.5994965434074402, "step": 1039 }, { "epoch": 0.2397694524495677, "grad_norm": 1.0820368371686164, "learning_rate": 1.9735278149412257e-06, "loss": 0.4967964291572571, "step": 1040 }, { "epoch": 0.24, "grad_norm": 0.9325838905339232, "learning_rate": 1.973440621028633e-06, "loss": 0.5955591201782227, "step": 1041 }, { "epoch": 0.24023054755043227, "grad_norm": 0.9238934455540466, "learning_rate": 1.9733532856852444e-06, "loss": 0.5286005139350891, "step": 1042 }, { "epoch": 0.24046109510086455, "grad_norm": 0.7955501791954424, "learning_rate": 1.9732658089237494e-06, "loss": 0.5257104635238647, "step": 1043 }, { "epoch": 0.24069164265129683, "grad_norm": 1.0432386643488358, "learning_rate": 1.9731781907568564e-06, "loss": 0.6578767895698547, "step": 1044 }, { "epoch": 0.24092219020172911, "grad_norm": 1.081984606268088, "learning_rate": 1.9730904311972963e-06, "loss": 0.5154295563697815, "step": 1045 }, { "epoch": 0.2411527377521614, "grad_norm": 0.9939154656713256, "learning_rate": 1.97300253025782e-06, "loss": 0.5729039311408997, "step": 1046 }, { "epoch": 0.24138328530259365, "grad_norm": 1.056713721812048, "learning_rate": 1.9729144879511976e-06, "loss": 0.5996200442314148, "step": 1047 }, { "epoch": 0.24161383285302593, "grad_norm": 0.9545204667371253, "learning_rate": 1.9728263042902207e-06, "loss": 0.46382981538772583, "step": 1048 }, { "epoch": 0.2418443804034582, "grad_norm": 0.970105230555475, "learning_rate": 1.9727379792877024e-06, "loss": 0.5767467617988586, "step": 1049 }, { "epoch": 0.2420749279538905, "grad_norm": 1.0533787261450176, "learning_rate": 1.9726495129564747e-06, "loss": 0.5285289287567139, "step": 1050 }, { "epoch": 0.24230547550432277, "grad_norm": 1.1516900956240101, "learning_rate": 1.972560905309391e-06, "loss": 0.5614134073257446, "step": 1051 }, { "epoch": 0.24253602305475505, "grad_norm": 0.9565577766108253, "learning_rate": 1.9724721563593253e-06, "loss": 0.5901243686676025, "step": 1052 }, { "epoch": 0.24276657060518733, "grad_norm": 1.0833385993994633, "learning_rate": 1.9723832661191716e-06, "loss": 0.4962998628616333, "step": 1053 }, { "epoch": 0.24299711815561958, "grad_norm": 0.9970132155639208, "learning_rate": 1.9722942346018446e-06, "loss": 0.42762500047683716, "step": 1054 }, { "epoch": 0.24322766570605187, "grad_norm": 0.9766037381813959, "learning_rate": 1.9722050618202802e-06, "loss": 0.5721521377563477, "step": 1055 }, { "epoch": 0.24345821325648415, "grad_norm": 1.206042721018001, "learning_rate": 1.972115747787434e-06, "loss": 0.6909880042076111, "step": 1056 }, { "epoch": 0.24368876080691643, "grad_norm": 0.9431570922856867, "learning_rate": 1.9720262925162823e-06, "loss": 0.5736039876937866, "step": 1057 }, { "epoch": 0.2439193083573487, "grad_norm": 0.8314919230697462, "learning_rate": 1.971936696019822e-06, "loss": 0.5220572352409363, "step": 1058 }, { "epoch": 0.244149855907781, "grad_norm": 1.4060669997055812, "learning_rate": 1.971846958311071e-06, "loss": 0.6336761116981506, "step": 1059 }, { "epoch": 0.24438040345821327, "grad_norm": 0.8735700995362021, "learning_rate": 1.9717570794030663e-06, "loss": 0.46690547466278076, "step": 1060 }, { "epoch": 0.24461095100864552, "grad_norm": 0.9452291694943687, "learning_rate": 1.971667059308867e-06, "loss": 0.5011268854141235, "step": 1061 }, { "epoch": 0.2448414985590778, "grad_norm": 1.026129576805315, "learning_rate": 1.971576898041552e-06, "loss": 0.6146481037139893, "step": 1062 }, { "epoch": 0.24507204610951008, "grad_norm": 1.1263839223339915, "learning_rate": 1.9714865956142216e-06, "loss": 0.6958246231079102, "step": 1063 }, { "epoch": 0.24530259365994236, "grad_norm": 1.0566004679295709, "learning_rate": 1.9713961520399943e-06, "loss": 0.6039535403251648, "step": 1064 }, { "epoch": 0.24553314121037464, "grad_norm": 1.0074912611483957, "learning_rate": 1.9713055673320116e-06, "loss": 0.5674936771392822, "step": 1065 }, { "epoch": 0.24576368876080693, "grad_norm": 0.9022345989400322, "learning_rate": 1.9712148415034343e-06, "loss": 0.573884129524231, "step": 1066 }, { "epoch": 0.2459942363112392, "grad_norm": 1.0703229553304763, "learning_rate": 1.971123974567444e-06, "loss": 0.5769180059432983, "step": 1067 }, { "epoch": 0.24622478386167146, "grad_norm": 1.0813400566275309, "learning_rate": 1.9710329665372423e-06, "loss": 0.6574220657348633, "step": 1068 }, { "epoch": 0.24645533141210374, "grad_norm": 0.9314089138220044, "learning_rate": 1.970941817426052e-06, "loss": 0.5505692362785339, "step": 1069 }, { "epoch": 0.24668587896253602, "grad_norm": 1.1082972197850245, "learning_rate": 1.970850527247116e-06, "loss": 0.6399080157279968, "step": 1070 }, { "epoch": 0.2469164265129683, "grad_norm": 1.30372883235568, "learning_rate": 1.9707590960136983e-06, "loss": 0.6340930461883545, "step": 1071 }, { "epoch": 0.24714697406340058, "grad_norm": 0.8757882392932358, "learning_rate": 1.9706675237390825e-06, "loss": 0.534781277179718, "step": 1072 }, { "epoch": 0.24737752161383286, "grad_norm": 1.159381670874605, "learning_rate": 1.970575810436573e-06, "loss": 0.5925683975219727, "step": 1073 }, { "epoch": 0.24760806916426514, "grad_norm": 1.0150707063788142, "learning_rate": 1.970483956119495e-06, "loss": 0.6358739137649536, "step": 1074 }, { "epoch": 0.2478386167146974, "grad_norm": 1.1453368323410489, "learning_rate": 1.970391960801194e-06, "loss": 0.6039003133773804, "step": 1075 }, { "epoch": 0.24806916426512968, "grad_norm": 1.1201673932401737, "learning_rate": 1.970299824495036e-06, "loss": 0.5916771292686462, "step": 1076 }, { "epoch": 0.24829971181556196, "grad_norm": 1.0794126617042616, "learning_rate": 1.9702075472144067e-06, "loss": 0.5258580446243286, "step": 1077 }, { "epoch": 0.24853025936599424, "grad_norm": 0.8601905249462747, "learning_rate": 1.9701151289727147e-06, "loss": 0.6265199184417725, "step": 1078 }, { "epoch": 0.24876080691642652, "grad_norm": 0.979702903534406, "learning_rate": 1.9700225697833854e-06, "loss": 0.579441249370575, "step": 1079 }, { "epoch": 0.2489913544668588, "grad_norm": 0.8004324701001138, "learning_rate": 1.969929869659868e-06, "loss": 0.5297039747238159, "step": 1080 }, { "epoch": 0.24922190201729105, "grad_norm": 1.0732237546296053, "learning_rate": 1.9698370286156306e-06, "loss": 0.5611151456832886, "step": 1081 }, { "epoch": 0.24945244956772333, "grad_norm": 0.9095359949477679, "learning_rate": 1.969744046664162e-06, "loss": 0.5669015645980835, "step": 1082 }, { "epoch": 0.24968299711815561, "grad_norm": 0.9098376279113534, "learning_rate": 1.9696509238189715e-06, "loss": 0.5561148524284363, "step": 1083 }, { "epoch": 0.2499135446685879, "grad_norm": 0.8595555128128054, "learning_rate": 1.9695576600935886e-06, "loss": 0.5287264585494995, "step": 1084 }, { "epoch": 0.2501440922190202, "grad_norm": 0.9117185255603872, "learning_rate": 1.9694642555015638e-06, "loss": 0.6170656681060791, "step": 1085 }, { "epoch": 0.25037463976945246, "grad_norm": 1.0935336615242157, "learning_rate": 1.969370710056468e-06, "loss": 0.5874326229095459, "step": 1086 }, { "epoch": 0.25060518731988474, "grad_norm": 1.042070981592915, "learning_rate": 1.9692770237718924e-06, "loss": 0.5682862997055054, "step": 1087 }, { "epoch": 0.250835734870317, "grad_norm": 1.0488760267187054, "learning_rate": 1.969183196661448e-06, "loss": 0.5732690095901489, "step": 1088 }, { "epoch": 0.2510662824207493, "grad_norm": 1.0767943466207117, "learning_rate": 1.9690892287387675e-06, "loss": 0.5902074575424194, "step": 1089 }, { "epoch": 0.2512968299711816, "grad_norm": 2.0373106353221635, "learning_rate": 1.9689951200175033e-06, "loss": 0.6051011085510254, "step": 1090 }, { "epoch": 0.25152737752161386, "grad_norm": 0.9633389270435931, "learning_rate": 1.9689008705113283e-06, "loss": 0.5677424669265747, "step": 1091 }, { "epoch": 0.2517579250720461, "grad_norm": 1.198630695418835, "learning_rate": 1.9688064802339364e-06, "loss": 0.5889866352081299, "step": 1092 }, { "epoch": 0.25198847262247837, "grad_norm": 0.8479895449997884, "learning_rate": 1.968711949199041e-06, "loss": 0.574920654296875, "step": 1093 }, { "epoch": 0.25221902017291065, "grad_norm": 1.1022849092895963, "learning_rate": 1.9686172774203765e-06, "loss": 0.642555296421051, "step": 1094 }, { "epoch": 0.2524495677233429, "grad_norm": 1.1981893904929435, "learning_rate": 1.9685224649116985e-06, "loss": 0.5177907347679138, "step": 1095 }, { "epoch": 0.2526801152737752, "grad_norm": 0.9512396951241048, "learning_rate": 1.968427511686781e-06, "loss": 0.5089443922042847, "step": 1096 }, { "epoch": 0.2529106628242075, "grad_norm": 1.285147110492746, "learning_rate": 1.9683324177594205e-06, "loss": 0.5840367078781128, "step": 1097 }, { "epoch": 0.25314121037463977, "grad_norm": 0.9223951585699367, "learning_rate": 1.9682371831434335e-06, "loss": 0.5504645109176636, "step": 1098 }, { "epoch": 0.25337175792507205, "grad_norm": 1.0341672284132668, "learning_rate": 1.968141807852656e-06, "loss": 0.5837891101837158, "step": 1099 }, { "epoch": 0.25360230547550433, "grad_norm": 1.0197758826620191, "learning_rate": 1.9680462919009453e-06, "loss": 0.5304594039916992, "step": 1100 }, { "epoch": 0.2538328530259366, "grad_norm": 1.1017033952693849, "learning_rate": 1.9679506353021784e-06, "loss": 0.6483093500137329, "step": 1101 }, { "epoch": 0.2540634005763689, "grad_norm": 0.9880067001638037, "learning_rate": 1.967854838070254e-06, "loss": 0.5041504502296448, "step": 1102 }, { "epoch": 0.2542939481268012, "grad_norm": 0.9447138676205955, "learning_rate": 1.9677589002190897e-06, "loss": 0.574493408203125, "step": 1103 }, { "epoch": 0.25452449567723345, "grad_norm": 1.1268609072582123, "learning_rate": 1.9676628217626244e-06, "loss": 0.5675390958786011, "step": 1104 }, { "epoch": 0.2547550432276657, "grad_norm": 0.8346824743228196, "learning_rate": 1.967566602714818e-06, "loss": 0.5008331537246704, "step": 1105 }, { "epoch": 0.25498559077809796, "grad_norm": 1.2638039956043987, "learning_rate": 1.967470243089649e-06, "loss": 0.6198144555091858, "step": 1106 }, { "epoch": 0.25521613832853024, "grad_norm": 1.0477413614815971, "learning_rate": 1.9673737429011192e-06, "loss": 0.6584227085113525, "step": 1107 }, { "epoch": 0.2554466858789625, "grad_norm": 1.1653742789318777, "learning_rate": 1.967277102163247e-06, "loss": 0.6924588680267334, "step": 1108 }, { "epoch": 0.2556772334293948, "grad_norm": 0.9489629504050993, "learning_rate": 1.9671803208900743e-06, "loss": 0.4881629943847656, "step": 1109 }, { "epoch": 0.2559077809798271, "grad_norm": 1.0207283356812054, "learning_rate": 1.967083399095663e-06, "loss": 0.5229992866516113, "step": 1110 }, { "epoch": 0.25613832853025936, "grad_norm": 0.9505227305729165, "learning_rate": 1.9669863367940933e-06, "loss": 0.5473639965057373, "step": 1111 }, { "epoch": 0.25636887608069164, "grad_norm": 1.1682394417720268, "learning_rate": 1.966889133999469e-06, "loss": 0.6385387182235718, "step": 1112 }, { "epoch": 0.2565994236311239, "grad_norm": 0.914948520803806, "learning_rate": 1.966791790725911e-06, "loss": 0.6966750621795654, "step": 1113 }, { "epoch": 0.2568299711815562, "grad_norm": 1.0791431206016944, "learning_rate": 1.966694306987564e-06, "loss": 0.62409508228302, "step": 1114 }, { "epoch": 0.2570605187319885, "grad_norm": 1.1529607219012756, "learning_rate": 1.96659668279859e-06, "loss": 0.6782431602478027, "step": 1115 }, { "epoch": 0.25729106628242077, "grad_norm": 0.9868247966268149, "learning_rate": 1.9664989181731736e-06, "loss": 0.5746676921844482, "step": 1116 }, { "epoch": 0.25752161383285305, "grad_norm": 1.1773953097235699, "learning_rate": 1.9664010131255185e-06, "loss": 0.622604250907898, "step": 1117 }, { "epoch": 0.2577521613832853, "grad_norm": 0.9014524054572233, "learning_rate": 1.9663029676698493e-06, "loss": 0.5258551836013794, "step": 1118 }, { "epoch": 0.25798270893371755, "grad_norm": 0.8949489094709896, "learning_rate": 1.9662047818204113e-06, "loss": 0.49920374155044556, "step": 1119 }, { "epoch": 0.25821325648414983, "grad_norm": 0.9638407580541706, "learning_rate": 1.96610645559147e-06, "loss": 0.5653362274169922, "step": 1120 }, { "epoch": 0.2584438040345821, "grad_norm": 0.8681512571682525, "learning_rate": 1.9660079889973106e-06, "loss": 0.635971188545227, "step": 1121 }, { "epoch": 0.2586743515850144, "grad_norm": 0.9626228874806679, "learning_rate": 1.9659093820522395e-06, "loss": 0.5810589790344238, "step": 1122 }, { "epoch": 0.2589048991354467, "grad_norm": 0.9265775915235015, "learning_rate": 1.9658106347705837e-06, "loss": 0.5082226991653442, "step": 1123 }, { "epoch": 0.25913544668587896, "grad_norm": 1.166109865260175, "learning_rate": 1.9657117471666893e-06, "loss": 0.5961207151412964, "step": 1124 }, { "epoch": 0.25936599423631124, "grad_norm": 0.8593078573487662, "learning_rate": 1.9656127192549247e-06, "loss": 0.5848531723022461, "step": 1125 }, { "epoch": 0.2595965417867435, "grad_norm": 1.0267097223141122, "learning_rate": 1.965513551049677e-06, "loss": 0.5630871057510376, "step": 1126 }, { "epoch": 0.2598270893371758, "grad_norm": 1.0523335755503282, "learning_rate": 1.965414242565354e-06, "loss": 0.5128393769264221, "step": 1127 }, { "epoch": 0.2600576368876081, "grad_norm": 0.9700118998001345, "learning_rate": 1.9653147938163846e-06, "loss": 0.5488131046295166, "step": 1128 }, { "epoch": 0.26028818443804036, "grad_norm": 0.9798403795119491, "learning_rate": 1.9652152048172177e-06, "loss": 0.5557059049606323, "step": 1129 }, { "epoch": 0.26051873198847264, "grad_norm": 0.9637059141265438, "learning_rate": 1.965115475582323e-06, "loss": 0.49585384130477905, "step": 1130 }, { "epoch": 0.2607492795389049, "grad_norm": 0.9836788141885479, "learning_rate": 1.9650156061261887e-06, "loss": 0.5157963037490845, "step": 1131 }, { "epoch": 0.2609798270893372, "grad_norm": 0.9745401863616632, "learning_rate": 1.964915596463326e-06, "loss": 0.487041175365448, "step": 1132 }, { "epoch": 0.2612103746397694, "grad_norm": 0.9355004815538274, "learning_rate": 1.9648154466082655e-06, "loss": 0.5007308721542358, "step": 1133 }, { "epoch": 0.2614409221902017, "grad_norm": 1.044356333568675, "learning_rate": 1.9647151565755567e-06, "loss": 0.5466841459274292, "step": 1134 }, { "epoch": 0.261671469740634, "grad_norm": 0.9908185235722049, "learning_rate": 1.964614726379772e-06, "loss": 0.6433593034744263, "step": 1135 }, { "epoch": 0.26190201729106627, "grad_norm": 1.1188612298182607, "learning_rate": 1.964514156035502e-06, "loss": 0.6677781939506531, "step": 1136 }, { "epoch": 0.26213256484149855, "grad_norm": 1.1970113817017574, "learning_rate": 1.9644134455573584e-06, "loss": 0.601581335067749, "step": 1137 }, { "epoch": 0.26236311239193083, "grad_norm": 1.0197346026129679, "learning_rate": 1.964312594959974e-06, "loss": 0.512954592704773, "step": 1138 }, { "epoch": 0.2625936599423631, "grad_norm": 1.106389545686755, "learning_rate": 1.964211604258001e-06, "loss": 0.5901329517364502, "step": 1139 }, { "epoch": 0.2628242074927954, "grad_norm": 1.3151832775759593, "learning_rate": 1.9641104734661126e-06, "loss": 0.5046425461769104, "step": 1140 }, { "epoch": 0.2630547550432277, "grad_norm": 0.7869009238004686, "learning_rate": 1.9640092025990017e-06, "loss": 0.5134037137031555, "step": 1141 }, { "epoch": 0.26328530259365995, "grad_norm": 1.0902924836300172, "learning_rate": 1.963907791671382e-06, "loss": 0.512336015701294, "step": 1142 }, { "epoch": 0.26351585014409223, "grad_norm": 0.9203546707939008, "learning_rate": 1.9638062406979877e-06, "loss": 0.5255711674690247, "step": 1143 }, { "epoch": 0.2637463976945245, "grad_norm": 1.1168638198696839, "learning_rate": 1.963704549693573e-06, "loss": 0.5508878827095032, "step": 1144 }, { "epoch": 0.2639769452449568, "grad_norm": 1.0728921970422391, "learning_rate": 1.9636027186729122e-06, "loss": 0.5642615556716919, "step": 1145 }, { "epoch": 0.2642074927953891, "grad_norm": 0.9196228228553693, "learning_rate": 1.9635007476508006e-06, "loss": 0.5792304873466492, "step": 1146 }, { "epoch": 0.2644380403458213, "grad_norm": 0.9053914378248806, "learning_rate": 1.9633986366420534e-06, "loss": 0.5633686780929565, "step": 1147 }, { "epoch": 0.2646685878962536, "grad_norm": 0.9794987092956121, "learning_rate": 1.9632963856615063e-06, "loss": 0.5152523517608643, "step": 1148 }, { "epoch": 0.26489913544668586, "grad_norm": 0.9488988181213043, "learning_rate": 1.9631939947240155e-06, "loss": 0.5173834562301636, "step": 1149 }, { "epoch": 0.26512968299711814, "grad_norm": 1.224448518118437, "learning_rate": 1.963091463844457e-06, "loss": 0.6685044169425964, "step": 1150 }, { "epoch": 0.2653602305475504, "grad_norm": 1.3480795736809403, "learning_rate": 1.9629887930377277e-06, "loss": 0.5302361249923706, "step": 1151 }, { "epoch": 0.2655907780979827, "grad_norm": 1.0617602165821738, "learning_rate": 1.9628859823187445e-06, "loss": 0.6829941272735596, "step": 1152 }, { "epoch": 0.265821325648415, "grad_norm": 0.9098011562920064, "learning_rate": 1.962783031702445e-06, "loss": 0.5956755876541138, "step": 1153 }, { "epoch": 0.26605187319884727, "grad_norm": 1.0333072443812423, "learning_rate": 1.9626799412037866e-06, "loss": 0.5287376642227173, "step": 1154 }, { "epoch": 0.26628242074927955, "grad_norm": 0.885618968970558, "learning_rate": 1.962576710837747e-06, "loss": 0.5352818965911865, "step": 1155 }, { "epoch": 0.2665129682997118, "grad_norm": 1.1674068509683497, "learning_rate": 1.962473340619325e-06, "loss": 0.6345375776290894, "step": 1156 }, { "epoch": 0.2667435158501441, "grad_norm": 0.9996146363746836, "learning_rate": 1.962369830563539e-06, "loss": 0.5909037590026855, "step": 1157 }, { "epoch": 0.2669740634005764, "grad_norm": 0.9050400725217519, "learning_rate": 1.962266180685428e-06, "loss": 0.5138572454452515, "step": 1158 }, { "epoch": 0.26720461095100867, "grad_norm": 1.070933486271155, "learning_rate": 1.962162391000051e-06, "loss": 0.47365278005599976, "step": 1159 }, { "epoch": 0.2674351585014409, "grad_norm": 1.0003212071539427, "learning_rate": 1.962058461522488e-06, "loss": 0.5489984750747681, "step": 1160 }, { "epoch": 0.2676657060518732, "grad_norm": 0.8947391835141708, "learning_rate": 1.9619543922678383e-06, "loss": 0.5615831017494202, "step": 1161 }, { "epoch": 0.26789625360230546, "grad_norm": 1.1210395543673535, "learning_rate": 1.9618501832512232e-06, "loss": 0.6183937788009644, "step": 1162 }, { "epoch": 0.26812680115273774, "grad_norm": 0.9636900062000658, "learning_rate": 1.9617458344877815e-06, "loss": 0.5763455033302307, "step": 1163 }, { "epoch": 0.26835734870317, "grad_norm": 1.0756939610160325, "learning_rate": 1.9616413459926755e-06, "loss": 0.42940446734428406, "step": 1164 }, { "epoch": 0.2685878962536023, "grad_norm": 1.0571097576051838, "learning_rate": 1.9615367177810854e-06, "loss": 0.5339791178703308, "step": 1165 }, { "epoch": 0.2688184438040346, "grad_norm": 0.9952506277636632, "learning_rate": 1.961431949868213e-06, "loss": 0.4905571937561035, "step": 1166 }, { "epoch": 0.26904899135446686, "grad_norm": 1.1894286703853092, "learning_rate": 1.9613270422692796e-06, "loss": 0.5400816202163696, "step": 1167 }, { "epoch": 0.26927953890489914, "grad_norm": 1.2337518633584537, "learning_rate": 1.9612219949995276e-06, "loss": 0.55318284034729, "step": 1168 }, { "epoch": 0.2695100864553314, "grad_norm": 0.9693884072277771, "learning_rate": 1.9611168080742193e-06, "loss": 0.5832536816596985, "step": 1169 }, { "epoch": 0.2697406340057637, "grad_norm": 1.1742682617570424, "learning_rate": 1.961011481508637e-06, "loss": 0.6217491626739502, "step": 1170 }, { "epoch": 0.269971181556196, "grad_norm": 0.8391232455044884, "learning_rate": 1.960906015318084e-06, "loss": 0.5328724384307861, "step": 1171 }, { "epoch": 0.27020172910662826, "grad_norm": 1.0363005984112303, "learning_rate": 1.960800409517882e-06, "loss": 0.5914000272750854, "step": 1172 }, { "epoch": 0.27043227665706054, "grad_norm": 0.8623610094735963, "learning_rate": 1.9606946641233765e-06, "loss": 0.5801169872283936, "step": 1173 }, { "epoch": 0.27066282420749277, "grad_norm": 1.076472927325218, "learning_rate": 1.96058877914993e-06, "loss": 0.606931746006012, "step": 1174 }, { "epoch": 0.27089337175792505, "grad_norm": 0.9432338977489813, "learning_rate": 1.960482754612926e-06, "loss": 0.492758572101593, "step": 1175 }, { "epoch": 0.27112391930835733, "grad_norm": 0.8217955088144805, "learning_rate": 1.9603765905277705e-06, "loss": 0.507538914680481, "step": 1176 }, { "epoch": 0.2713544668587896, "grad_norm": 0.9913040193455875, "learning_rate": 1.9602702869098863e-06, "loss": 0.4696667790412903, "step": 1177 }, { "epoch": 0.2715850144092219, "grad_norm": 1.120659252201242, "learning_rate": 1.9601638437747193e-06, "loss": 0.6008190512657166, "step": 1178 }, { "epoch": 0.2718155619596542, "grad_norm": 0.9398480673155941, "learning_rate": 1.960057261137734e-06, "loss": 0.6491943597793579, "step": 1179 }, { "epoch": 0.27204610951008645, "grad_norm": 0.9683862753304229, "learning_rate": 1.9599505390144158e-06, "loss": 0.5670884847640991, "step": 1180 }, { "epoch": 0.27227665706051873, "grad_norm": 0.9863251399458569, "learning_rate": 1.959843677420271e-06, "loss": 0.6009481549263, "step": 1181 }, { "epoch": 0.272507204610951, "grad_norm": 1.0320129585811584, "learning_rate": 1.9597366763708244e-06, "loss": 0.6310709714889526, "step": 1182 }, { "epoch": 0.2727377521613833, "grad_norm": 1.0214391551855733, "learning_rate": 1.9596295358816227e-06, "loss": 0.6486451029777527, "step": 1183 }, { "epoch": 0.2729682997118156, "grad_norm": 1.2065179679905316, "learning_rate": 1.9595222559682323e-06, "loss": 0.5718737840652466, "step": 1184 }, { "epoch": 0.27319884726224786, "grad_norm": 1.0556460449279068, "learning_rate": 1.95941483664624e-06, "loss": 0.5779617428779602, "step": 1185 }, { "epoch": 0.27342939481268014, "grad_norm": 0.9802737305305549, "learning_rate": 1.9593072779312522e-06, "loss": 0.5744156837463379, "step": 1186 }, { "epoch": 0.2736599423631124, "grad_norm": 1.0517062665649357, "learning_rate": 1.959199579838897e-06, "loss": 0.5460283756256104, "step": 1187 }, { "epoch": 0.27389048991354464, "grad_norm": 1.1680872558774986, "learning_rate": 1.9590917423848205e-06, "loss": 0.6543044447898865, "step": 1188 }, { "epoch": 0.2741210374639769, "grad_norm": 1.224778428725764, "learning_rate": 1.9589837655846913e-06, "loss": 0.5198811292648315, "step": 1189 }, { "epoch": 0.2743515850144092, "grad_norm": 1.251778848697816, "learning_rate": 1.9588756494541974e-06, "loss": 0.5084035396575928, "step": 1190 }, { "epoch": 0.2745821325648415, "grad_norm": 1.0710257569853987, "learning_rate": 1.958767394009046e-06, "loss": 0.5904719829559326, "step": 1191 }, { "epoch": 0.27481268011527377, "grad_norm": 0.9829228316564669, "learning_rate": 1.9586589992649663e-06, "loss": 0.5030600428581238, "step": 1192 }, { "epoch": 0.27504322766570605, "grad_norm": 0.9873617968914287, "learning_rate": 1.958550465237707e-06, "loss": 0.5184324383735657, "step": 1193 }, { "epoch": 0.27527377521613833, "grad_norm": 0.9906489399793428, "learning_rate": 1.9584417919430368e-06, "loss": 0.5646244287490845, "step": 1194 }, { "epoch": 0.2755043227665706, "grad_norm": 0.9414859170655192, "learning_rate": 1.9583329793967446e-06, "loss": 0.5001581907272339, "step": 1195 }, { "epoch": 0.2757348703170029, "grad_norm": 1.084929589963752, "learning_rate": 1.95822402761464e-06, "loss": 0.5051171183586121, "step": 1196 }, { "epoch": 0.27596541786743517, "grad_norm": 1.0285988722389665, "learning_rate": 1.9581149366125517e-06, "loss": 0.5530160069465637, "step": 1197 }, { "epoch": 0.27619596541786745, "grad_norm": 1.0520557763319474, "learning_rate": 1.9580057064063305e-06, "loss": 0.614437460899353, "step": 1198 }, { "epoch": 0.27642651296829973, "grad_norm": 1.0793234079478642, "learning_rate": 1.9578963370118463e-06, "loss": 0.5519070625305176, "step": 1199 }, { "epoch": 0.276657060518732, "grad_norm": 1.0375188615074011, "learning_rate": 1.9577868284449894e-06, "loss": 0.5517419576644897, "step": 1200 }, { "epoch": 0.2768876080691643, "grad_norm": 1.1157623296809824, "learning_rate": 1.9576771807216692e-06, "loss": 0.5128840804100037, "step": 1201 }, { "epoch": 0.2771181556195965, "grad_norm": 1.1639234270532368, "learning_rate": 1.9575673938578177e-06, "loss": 0.535677433013916, "step": 1202 }, { "epoch": 0.2773487031700288, "grad_norm": 1.0321913645714726, "learning_rate": 1.957457467869385e-06, "loss": 0.3944365680217743, "step": 1203 }, { "epoch": 0.2775792507204611, "grad_norm": 1.1284561091398966, "learning_rate": 1.957347402772343e-06, "loss": 0.5461306571960449, "step": 1204 }, { "epoch": 0.27780979827089336, "grad_norm": 1.106350357831354, "learning_rate": 1.9572371985826817e-06, "loss": 0.5643556714057922, "step": 1205 }, { "epoch": 0.27804034582132564, "grad_norm": 0.9942891425121166, "learning_rate": 1.957126855316414e-06, "loss": 0.5576694011688232, "step": 1206 }, { "epoch": 0.2782708933717579, "grad_norm": 1.0327594151382022, "learning_rate": 1.9570163729895705e-06, "loss": 0.4932776689529419, "step": 1207 }, { "epoch": 0.2785014409221902, "grad_norm": 1.2617809472764765, "learning_rate": 1.956905751618204e-06, "loss": 0.5831997394561768, "step": 1208 }, { "epoch": 0.2787319884726225, "grad_norm": 1.020903077723172, "learning_rate": 1.9567949912183865e-06, "loss": 0.5535416007041931, "step": 1209 }, { "epoch": 0.27896253602305476, "grad_norm": 0.9795082042300237, "learning_rate": 1.9566840918062096e-06, "loss": 0.5003511905670166, "step": 1210 }, { "epoch": 0.27919308357348704, "grad_norm": 1.1445109073320356, "learning_rate": 1.9565730533977866e-06, "loss": 0.5496214628219604, "step": 1211 }, { "epoch": 0.2794236311239193, "grad_norm": 1.2037504849326626, "learning_rate": 1.95646187600925e-06, "loss": 0.5799363851547241, "step": 1212 }, { "epoch": 0.2796541786743516, "grad_norm": 1.0179191940873225, "learning_rate": 1.9563505596567524e-06, "loss": 0.520091712474823, "step": 1213 }, { "epoch": 0.2798847262247839, "grad_norm": 1.1205541262351417, "learning_rate": 1.9562391043564674e-06, "loss": 0.6219311952590942, "step": 1214 }, { "epoch": 0.2801152737752161, "grad_norm": 1.1950602989006576, "learning_rate": 1.9561275101245882e-06, "loss": 0.5564324259757996, "step": 1215 }, { "epoch": 0.2803458213256484, "grad_norm": 1.0120577427828765, "learning_rate": 1.956015776977328e-06, "loss": 0.5743024349212646, "step": 1216 }, { "epoch": 0.2805763688760807, "grad_norm": 1.017164461130754, "learning_rate": 1.955903904930921e-06, "loss": 0.5233654379844666, "step": 1217 }, { "epoch": 0.28080691642651295, "grad_norm": 0.9067723518704797, "learning_rate": 1.9557918940016204e-06, "loss": 0.4716556668281555, "step": 1218 }, { "epoch": 0.28103746397694523, "grad_norm": 1.2253551599931534, "learning_rate": 1.9556797442057002e-06, "loss": 0.6010521650314331, "step": 1219 }, { "epoch": 0.2812680115273775, "grad_norm": 0.8966324233540468, "learning_rate": 1.9555674555594553e-06, "loss": 0.5651501417160034, "step": 1220 }, { "epoch": 0.2814985590778098, "grad_norm": 0.9432307335638032, "learning_rate": 1.9554550280791994e-06, "loss": 0.5450448393821716, "step": 1221 }, { "epoch": 0.2817291066282421, "grad_norm": 1.0874841889552394, "learning_rate": 1.9553424617812675e-06, "loss": 0.5615352392196655, "step": 1222 }, { "epoch": 0.28195965417867436, "grad_norm": 1.0554961372199343, "learning_rate": 1.9552297566820143e-06, "loss": 0.5538485050201416, "step": 1223 }, { "epoch": 0.28219020172910664, "grad_norm": 0.8618486691105575, "learning_rate": 1.9551169127978145e-06, "loss": 0.5403180122375488, "step": 1224 }, { "epoch": 0.2824207492795389, "grad_norm": 1.0882900408954626, "learning_rate": 1.955003930145063e-06, "loss": 0.6076033115386963, "step": 1225 }, { "epoch": 0.2826512968299712, "grad_norm": 0.9873645747206957, "learning_rate": 1.954890808740175e-06, "loss": 0.6477121114730835, "step": 1226 }, { "epoch": 0.2828818443804035, "grad_norm": 1.5079810049528974, "learning_rate": 1.954777548599586e-06, "loss": 0.601321816444397, "step": 1227 }, { "epoch": 0.28311239193083576, "grad_norm": 0.8957891169447902, "learning_rate": 1.954664149739752e-06, "loss": 0.4857567250728607, "step": 1228 }, { "epoch": 0.283342939481268, "grad_norm": 1.0502205225822576, "learning_rate": 1.954550612177148e-06, "loss": 0.5555423498153687, "step": 1229 }, { "epoch": 0.28357348703170027, "grad_norm": 1.1164653543013627, "learning_rate": 1.95443693592827e-06, "loss": 0.4719756543636322, "step": 1230 }, { "epoch": 0.28380403458213255, "grad_norm": 1.0822345938438416, "learning_rate": 1.9543231210096337e-06, "loss": 0.5177173018455505, "step": 1231 }, { "epoch": 0.28403458213256483, "grad_norm": 1.275308044022549, "learning_rate": 1.954209167437776e-06, "loss": 0.5307407975196838, "step": 1232 }, { "epoch": 0.2842651296829971, "grad_norm": 0.9179700709062559, "learning_rate": 1.9540950752292525e-06, "loss": 0.49399054050445557, "step": 1233 }, { "epoch": 0.2844956772334294, "grad_norm": 1.0252223662898894, "learning_rate": 1.95398084440064e-06, "loss": 0.5200883150100708, "step": 1234 }, { "epoch": 0.28472622478386167, "grad_norm": 1.292162031598749, "learning_rate": 1.953866474968535e-06, "loss": 0.576574444770813, "step": 1235 }, { "epoch": 0.28495677233429395, "grad_norm": 1.0138321768032905, "learning_rate": 1.953751966949554e-06, "loss": 0.5668514966964722, "step": 1236 }, { "epoch": 0.28518731988472623, "grad_norm": 1.3589401298927464, "learning_rate": 1.9536373203603334e-06, "loss": 0.5539048910140991, "step": 1237 }, { "epoch": 0.2854178674351585, "grad_norm": 1.2048300056875207, "learning_rate": 1.953522535217531e-06, "loss": 0.5473074913024902, "step": 1238 }, { "epoch": 0.2856484149855908, "grad_norm": 0.9738586778260675, "learning_rate": 1.953407611537823e-06, "loss": 0.5185353755950928, "step": 1239 }, { "epoch": 0.2858789625360231, "grad_norm": 0.9337692843468727, "learning_rate": 1.953292549337908e-06, "loss": 0.5476157665252686, "step": 1240 }, { "epoch": 0.28610951008645535, "grad_norm": 1.0006717621648324, "learning_rate": 1.9531773486345024e-06, "loss": 0.5680351257324219, "step": 1241 }, { "epoch": 0.28634005763688763, "grad_norm": 1.040109549295159, "learning_rate": 1.9530620094443435e-06, "loss": 0.5800622701644897, "step": 1242 }, { "epoch": 0.28657060518731986, "grad_norm": 1.132839901571783, "learning_rate": 1.952946531784189e-06, "loss": 0.625177800655365, "step": 1243 }, { "epoch": 0.28680115273775214, "grad_norm": 0.9379385742897268, "learning_rate": 1.952830915670817e-06, "loss": 0.5468524098396301, "step": 1244 }, { "epoch": 0.2870317002881844, "grad_norm": 1.3685689236372411, "learning_rate": 1.9527151611210247e-06, "loss": 0.49441972374916077, "step": 1245 }, { "epoch": 0.2872622478386167, "grad_norm": 1.0100756301642373, "learning_rate": 1.9525992681516304e-06, "loss": 0.5579795837402344, "step": 1246 }, { "epoch": 0.287492795389049, "grad_norm": 0.9919306835197232, "learning_rate": 1.9524832367794724e-06, "loss": 0.6528097987174988, "step": 1247 }, { "epoch": 0.28772334293948126, "grad_norm": 1.0917924034643365, "learning_rate": 1.9523670670214086e-06, "loss": 0.6705083847045898, "step": 1248 }, { "epoch": 0.28795389048991354, "grad_norm": 1.2147583724315956, "learning_rate": 1.952250758894317e-06, "loss": 0.5354126691818237, "step": 1249 }, { "epoch": 0.2881844380403458, "grad_norm": 0.8572145708756924, "learning_rate": 1.9521343124150964e-06, "loss": 0.5452643036842346, "step": 1250 }, { "epoch": 0.2884149855907781, "grad_norm": 1.0087690058045542, "learning_rate": 1.952017727600665e-06, "loss": 0.49629518389701843, "step": 1251 }, { "epoch": 0.2886455331412104, "grad_norm": 1.840977052870024, "learning_rate": 1.9519010044679613e-06, "loss": 0.5664533376693726, "step": 1252 }, { "epoch": 0.28887608069164267, "grad_norm": 0.9663129128076696, "learning_rate": 1.9517841430339443e-06, "loss": 0.6245483160018921, "step": 1253 }, { "epoch": 0.28910662824207495, "grad_norm": 1.0476534361120005, "learning_rate": 1.9516671433155924e-06, "loss": 0.5137460827827454, "step": 1254 }, { "epoch": 0.28933717579250723, "grad_norm": 1.2755773097313365, "learning_rate": 1.9515500053299044e-06, "loss": 0.608911395072937, "step": 1255 }, { "epoch": 0.28956772334293945, "grad_norm": 0.9793924921709027, "learning_rate": 1.9514327290939e-06, "loss": 0.6000815629959106, "step": 1256 }, { "epoch": 0.28979827089337173, "grad_norm": 1.0139494480660591, "learning_rate": 1.951315314624617e-06, "loss": 0.4862588047981262, "step": 1257 }, { "epoch": 0.290028818443804, "grad_norm": 1.2298057267979068, "learning_rate": 1.9511977619391155e-06, "loss": 0.6026263236999512, "step": 1258 }, { "epoch": 0.2902593659942363, "grad_norm": 0.9908207251811982, "learning_rate": 1.951080071054474e-06, "loss": 0.5419458150863647, "step": 1259 }, { "epoch": 0.2904899135446686, "grad_norm": 0.9670582939367568, "learning_rate": 1.9509622419877926e-06, "loss": 0.5565283298492432, "step": 1260 }, { "epoch": 0.29072046109510086, "grad_norm": 0.8620916182528232, "learning_rate": 1.9508442747561894e-06, "loss": 0.4857860803604126, "step": 1261 }, { "epoch": 0.29095100864553314, "grad_norm": 1.0547944595229073, "learning_rate": 1.950726169376805e-06, "loss": 0.5074048638343811, "step": 1262 }, { "epoch": 0.2911815561959654, "grad_norm": 0.9913288407112848, "learning_rate": 1.9506079258667983e-06, "loss": 0.5540251731872559, "step": 1263 }, { "epoch": 0.2914121037463977, "grad_norm": 1.026810903562498, "learning_rate": 1.9504895442433487e-06, "loss": 0.6073076725006104, "step": 1264 }, { "epoch": 0.29164265129683, "grad_norm": 0.9981046042175337, "learning_rate": 1.9503710245236564e-06, "loss": 0.5984017848968506, "step": 1265 }, { "epoch": 0.29187319884726226, "grad_norm": 0.9710138044607731, "learning_rate": 1.9502523667249403e-06, "loss": 0.5397658348083496, "step": 1266 }, { "epoch": 0.29210374639769454, "grad_norm": 1.0270946207698817, "learning_rate": 1.950133570864441e-06, "loss": 0.6069176197052002, "step": 1267 }, { "epoch": 0.2923342939481268, "grad_norm": 0.9979443392497551, "learning_rate": 1.950014636959418e-06, "loss": 0.564436137676239, "step": 1268 }, { "epoch": 0.2925648414985591, "grad_norm": 1.0766140252728487, "learning_rate": 1.949895565027151e-06, "loss": 0.5707285404205322, "step": 1269 }, { "epoch": 0.29279538904899133, "grad_norm": 1.0411047503839765, "learning_rate": 1.9497763550849395e-06, "loss": 0.6490185260772705, "step": 1270 }, { "epoch": 0.2930259365994236, "grad_norm": 0.7845119542797886, "learning_rate": 1.949657007150104e-06, "loss": 0.45218831300735474, "step": 1271 }, { "epoch": 0.2932564841498559, "grad_norm": 1.1831171935673066, "learning_rate": 1.949537521239985e-06, "loss": 0.6371254324913025, "step": 1272 }, { "epoch": 0.29348703170028817, "grad_norm": 1.085157200958947, "learning_rate": 1.949417897371942e-06, "loss": 0.5877312421798706, "step": 1273 }, { "epoch": 0.29371757925072045, "grad_norm": 0.9929167587229449, "learning_rate": 1.9492981355633542e-06, "loss": 0.449150413274765, "step": 1274 }, { "epoch": 0.29394812680115273, "grad_norm": 0.9869589296917282, "learning_rate": 1.949178235831624e-06, "loss": 0.5272694826126099, "step": 1275 }, { "epoch": 0.294178674351585, "grad_norm": 1.1193537456970053, "learning_rate": 1.949058198194169e-06, "loss": 0.5642216801643372, "step": 1276 }, { "epoch": 0.2944092219020173, "grad_norm": 1.1033603738976057, "learning_rate": 1.948938022668431e-06, "loss": 0.5657975673675537, "step": 1277 }, { "epoch": 0.2946397694524496, "grad_norm": 0.9277124761937087, "learning_rate": 1.9488177092718705e-06, "loss": 0.4832008183002472, "step": 1278 }, { "epoch": 0.29487031700288185, "grad_norm": 0.9360781230227813, "learning_rate": 1.9486972580219666e-06, "loss": 0.5040748119354248, "step": 1279 }, { "epoch": 0.29510086455331414, "grad_norm": 1.060263936557758, "learning_rate": 1.9485766689362204e-06, "loss": 0.5735876560211182, "step": 1280 }, { "epoch": 0.2953314121037464, "grad_norm": 0.920068097844014, "learning_rate": 1.9484559420321522e-06, "loss": 0.5178484916687012, "step": 1281 }, { "epoch": 0.2955619596541787, "grad_norm": 1.1062841405611552, "learning_rate": 1.948335077327302e-06, "loss": 0.587762713432312, "step": 1282 }, { "epoch": 0.295792507204611, "grad_norm": 0.9847238724786035, "learning_rate": 1.9482140748392304e-06, "loss": 0.525052547454834, "step": 1283 }, { "epoch": 0.2960230547550432, "grad_norm": 1.3177666588340105, "learning_rate": 1.948092934585518e-06, "loss": 0.5834689736366272, "step": 1284 }, { "epoch": 0.2962536023054755, "grad_norm": 0.8901759039642109, "learning_rate": 1.947971656583765e-06, "loss": 0.5162187814712524, "step": 1285 }, { "epoch": 0.29648414985590776, "grad_norm": 0.9676490343075778, "learning_rate": 1.947850240851591e-06, "loss": 0.5092250108718872, "step": 1286 }, { "epoch": 0.29671469740634004, "grad_norm": 0.9800964573508747, "learning_rate": 1.9477286874066385e-06, "loss": 0.46872952580451965, "step": 1287 }, { "epoch": 0.2969452449567723, "grad_norm": 0.9775546666041902, "learning_rate": 1.947606996266566e-06, "loss": 0.5852276682853699, "step": 1288 }, { "epoch": 0.2971757925072046, "grad_norm": 0.9644782015943425, "learning_rate": 1.947485167449055e-06, "loss": 0.5660973787307739, "step": 1289 }, { "epoch": 0.2974063400576369, "grad_norm": 0.8399040326569367, "learning_rate": 1.9473632009718057e-06, "loss": 0.4820208251476288, "step": 1290 }, { "epoch": 0.29763688760806917, "grad_norm": 1.075602040364581, "learning_rate": 1.9472410968525384e-06, "loss": 0.5744599103927612, "step": 1291 }, { "epoch": 0.29786743515850145, "grad_norm": 1.3419660280345462, "learning_rate": 1.947118855108994e-06, "loss": 0.6871058940887451, "step": 1292 }, { "epoch": 0.29809798270893373, "grad_norm": 1.1137065086424975, "learning_rate": 1.946996475758932e-06, "loss": 0.4869844913482666, "step": 1293 }, { "epoch": 0.298328530259366, "grad_norm": 1.029480216558641, "learning_rate": 1.946873958820134e-06, "loss": 0.4967701733112335, "step": 1294 }, { "epoch": 0.2985590778097983, "grad_norm": 1.0144576128566964, "learning_rate": 1.9467513043104e-06, "loss": 0.5952246189117432, "step": 1295 }, { "epoch": 0.29878962536023057, "grad_norm": 1.064633828381666, "learning_rate": 1.94662851224755e-06, "loss": 0.4900238513946533, "step": 1296 }, { "epoch": 0.29902017291066285, "grad_norm": 0.9918014262454831, "learning_rate": 1.946505582649425e-06, "loss": 0.5941853523254395, "step": 1297 }, { "epoch": 0.2992507204610951, "grad_norm": 1.1235707771533257, "learning_rate": 1.9463825155338848e-06, "loss": 0.6051995754241943, "step": 1298 }, { "epoch": 0.29948126801152736, "grad_norm": 1.072282162357322, "learning_rate": 1.94625931091881e-06, "loss": 0.6283519268035889, "step": 1299 }, { "epoch": 0.29971181556195964, "grad_norm": 1.0103002736371975, "learning_rate": 1.9461359688221017e-06, "loss": 0.6248390674591064, "step": 1300 }, { "epoch": 0.2999423631123919, "grad_norm": 1.1117448751600545, "learning_rate": 1.9460124892616794e-06, "loss": 0.5948354005813599, "step": 1301 }, { "epoch": 0.3001729106628242, "grad_norm": 1.0728652421682976, "learning_rate": 1.9458888722554835e-06, "loss": 0.5658224821090698, "step": 1302 }, { "epoch": 0.3004034582132565, "grad_norm": 1.4319897671453239, "learning_rate": 1.9457651178214742e-06, "loss": 0.570247232913971, "step": 1303 }, { "epoch": 0.30063400576368876, "grad_norm": 1.0399032267284913, "learning_rate": 1.945641225977632e-06, "loss": 0.5247939825057983, "step": 1304 }, { "epoch": 0.30086455331412104, "grad_norm": 1.1738678571390115, "learning_rate": 1.9455171967419568e-06, "loss": 0.6583060622215271, "step": 1305 }, { "epoch": 0.3010951008645533, "grad_norm": 1.2083121778111987, "learning_rate": 1.945393030132469e-06, "loss": 0.506061851978302, "step": 1306 }, { "epoch": 0.3013256484149856, "grad_norm": 1.0324264811301358, "learning_rate": 1.9452687261672086e-06, "loss": 0.5356897115707397, "step": 1307 }, { "epoch": 0.3015561959654179, "grad_norm": 1.2305285140166144, "learning_rate": 1.945144284864236e-06, "loss": 0.6293138265609741, "step": 1308 }, { "epoch": 0.30178674351585016, "grad_norm": 1.153317719192606, "learning_rate": 1.9450197062416307e-06, "loss": 0.5654667019844055, "step": 1309 }, { "epoch": 0.30201729106628245, "grad_norm": 1.0118363366308951, "learning_rate": 1.944894990317493e-06, "loss": 0.5266513228416443, "step": 1310 }, { "epoch": 0.30224783861671467, "grad_norm": 1.0768469268187721, "learning_rate": 1.944770137109943e-06, "loss": 0.5196292400360107, "step": 1311 }, { "epoch": 0.30247838616714695, "grad_norm": 0.9943801399014878, "learning_rate": 1.94464514663712e-06, "loss": 0.6095438003540039, "step": 1312 }, { "epoch": 0.30270893371757923, "grad_norm": 0.9779112540640975, "learning_rate": 1.9445200189171844e-06, "loss": 0.5676658153533936, "step": 1313 }, { "epoch": 0.3029394812680115, "grad_norm": 1.148105030397317, "learning_rate": 1.9443947539683152e-06, "loss": 0.6375502347946167, "step": 1314 }, { "epoch": 0.3031700288184438, "grad_norm": 1.367494353612328, "learning_rate": 1.9442693518087132e-06, "loss": 0.550786018371582, "step": 1315 }, { "epoch": 0.3034005763688761, "grad_norm": 1.1457742620403593, "learning_rate": 1.944143812456597e-06, "loss": 0.5615516901016235, "step": 1316 }, { "epoch": 0.30363112391930835, "grad_norm": 1.1750754942303703, "learning_rate": 1.9440181359302067e-06, "loss": 0.566293478012085, "step": 1317 }, { "epoch": 0.30386167146974064, "grad_norm": 1.1631676838517875, "learning_rate": 1.943892322247802e-06, "loss": 0.6293504238128662, "step": 1318 }, { "epoch": 0.3040922190201729, "grad_norm": 1.3799066955799153, "learning_rate": 1.9437663714276614e-06, "loss": 0.5615923404693604, "step": 1319 }, { "epoch": 0.3043227665706052, "grad_norm": 1.0656917099801624, "learning_rate": 1.9436402834880854e-06, "loss": 0.630609929561615, "step": 1320 }, { "epoch": 0.3045533141210375, "grad_norm": 1.178750749580376, "learning_rate": 1.9435140584473923e-06, "loss": 0.6257727742195129, "step": 1321 }, { "epoch": 0.30478386167146976, "grad_norm": 1.0402167057810885, "learning_rate": 1.943387696323922e-06, "loss": 0.5694669485092163, "step": 1322 }, { "epoch": 0.30501440922190204, "grad_norm": 0.9945053454829159, "learning_rate": 1.943261197136033e-06, "loss": 0.606473445892334, "step": 1323 }, { "epoch": 0.3052449567723343, "grad_norm": 1.1845589501222127, "learning_rate": 1.943134560902105e-06, "loss": 0.585598349571228, "step": 1324 }, { "epoch": 0.30547550432276654, "grad_norm": 1.0688662630155763, "learning_rate": 1.943007787640536e-06, "loss": 0.5569879412651062, "step": 1325 }, { "epoch": 0.3057060518731988, "grad_norm": 1.0126453524950452, "learning_rate": 1.942880877369746e-06, "loss": 0.5914568305015564, "step": 1326 }, { "epoch": 0.3059365994236311, "grad_norm": 1.0252549201933292, "learning_rate": 1.9427538301081723e-06, "loss": 0.47556912899017334, "step": 1327 }, { "epoch": 0.3061671469740634, "grad_norm": 1.0841679845773295, "learning_rate": 1.942626645874275e-06, "loss": 0.5298174619674683, "step": 1328 }, { "epoch": 0.30639769452449567, "grad_norm": 1.0392947019090986, "learning_rate": 1.942499324686532e-06, "loss": 0.583850622177124, "step": 1329 }, { "epoch": 0.30662824207492795, "grad_norm": 1.0139999403008324, "learning_rate": 1.9423718665634413e-06, "loss": 0.4839683771133423, "step": 1330 }, { "epoch": 0.30685878962536023, "grad_norm": 1.0298436614312085, "learning_rate": 1.9422442715235223e-06, "loss": 0.5600621104240417, "step": 1331 }, { "epoch": 0.3070893371757925, "grad_norm": 1.0626416339831364, "learning_rate": 1.942116539585312e-06, "loss": 0.5607948303222656, "step": 1332 }, { "epoch": 0.3073198847262248, "grad_norm": 1.0244776150888006, "learning_rate": 1.9419886707673695e-06, "loss": 0.6241478323936462, "step": 1333 }, { "epoch": 0.30755043227665707, "grad_norm": 1.059435056912846, "learning_rate": 1.941860665088272e-06, "loss": 0.5256654620170593, "step": 1334 }, { "epoch": 0.30778097982708935, "grad_norm": 0.9814039676018265, "learning_rate": 1.9417325225666185e-06, "loss": 0.5031943321228027, "step": 1335 }, { "epoch": 0.30801152737752163, "grad_norm": 1.0381509501910948, "learning_rate": 1.9416042432210256e-06, "loss": 0.5167732238769531, "step": 1336 }, { "epoch": 0.3082420749279539, "grad_norm": 1.3652241862581105, "learning_rate": 1.941475827070132e-06, "loss": 0.5503576397895813, "step": 1337 }, { "epoch": 0.3084726224783862, "grad_norm": 1.0886579362905786, "learning_rate": 1.9413472741325947e-06, "loss": 0.4676959812641144, "step": 1338 }, { "epoch": 0.3087031700288184, "grad_norm": 1.1278655674224347, "learning_rate": 1.9412185844270903e-06, "loss": 0.5845292806625366, "step": 1339 }, { "epoch": 0.3089337175792507, "grad_norm": 1.170562835357779, "learning_rate": 1.9410897579723175e-06, "loss": 0.5555682182312012, "step": 1340 }, { "epoch": 0.309164265129683, "grad_norm": 1.0107459327742527, "learning_rate": 1.940960794786993e-06, "loss": 0.6104729175567627, "step": 1341 }, { "epoch": 0.30939481268011526, "grad_norm": 0.9471421356356189, "learning_rate": 1.9408316948898535e-06, "loss": 0.641234278678894, "step": 1342 }, { "epoch": 0.30962536023054754, "grad_norm": 1.0545748452641606, "learning_rate": 1.940702458299656e-06, "loss": 0.56852126121521, "step": 1343 }, { "epoch": 0.3098559077809798, "grad_norm": 1.0184665425128694, "learning_rate": 1.9405730850351766e-06, "loss": 0.4960979223251343, "step": 1344 }, { "epoch": 0.3100864553314121, "grad_norm": 1.0984570627083017, "learning_rate": 1.9404435751152133e-06, "loss": 0.5483115315437317, "step": 1345 }, { "epoch": 0.3103170028818444, "grad_norm": 1.258420824095703, "learning_rate": 1.9403139285585814e-06, "loss": 0.5561012029647827, "step": 1346 }, { "epoch": 0.31054755043227666, "grad_norm": 1.121498999268742, "learning_rate": 1.940184145384118e-06, "loss": 0.549866795539856, "step": 1347 }, { "epoch": 0.31077809798270895, "grad_norm": 1.020975611109825, "learning_rate": 1.9400542256106783e-06, "loss": 0.547815203666687, "step": 1348 }, { "epoch": 0.3110086455331412, "grad_norm": 1.021689053716725, "learning_rate": 1.939924169257139e-06, "loss": 0.5230641961097717, "step": 1349 }, { "epoch": 0.3112391930835735, "grad_norm": 0.9340659983787873, "learning_rate": 1.939793976342396e-06, "loss": 0.5421465635299683, "step": 1350 }, { "epoch": 0.3114697406340058, "grad_norm": 1.1783810620622794, "learning_rate": 1.939663646885364e-06, "loss": 0.5560643672943115, "step": 1351 }, { "epoch": 0.31170028818443807, "grad_norm": 1.295011871686265, "learning_rate": 1.93953318090498e-06, "loss": 0.5311995148658752, "step": 1352 }, { "epoch": 0.3119308357348703, "grad_norm": 1.202368782772509, "learning_rate": 1.9394025784201985e-06, "loss": 0.539401650428772, "step": 1353 }, { "epoch": 0.3121613832853026, "grad_norm": 1.2005427387305667, "learning_rate": 1.9392718394499945e-06, "loss": 0.6014061570167542, "step": 1354 }, { "epoch": 0.31239193083573485, "grad_norm": 0.9270167680245519, "learning_rate": 1.9391409640133634e-06, "loss": 0.6065000295639038, "step": 1355 }, { "epoch": 0.31262247838616714, "grad_norm": 1.2988921211312436, "learning_rate": 1.9390099521293196e-06, "loss": 0.613541305065155, "step": 1356 }, { "epoch": 0.3128530259365994, "grad_norm": 0.8285824717727959, "learning_rate": 1.9388788038168985e-06, "loss": 0.47520384192466736, "step": 1357 }, { "epoch": 0.3130835734870317, "grad_norm": 1.0343980272119644, "learning_rate": 1.9387475190951543e-06, "loss": 0.615745484828949, "step": 1358 }, { "epoch": 0.313314121037464, "grad_norm": 0.9813010157914711, "learning_rate": 1.9386160979831607e-06, "loss": 0.5197638273239136, "step": 1359 }, { "epoch": 0.31354466858789626, "grad_norm": 1.1121294770562518, "learning_rate": 1.9384845405000124e-06, "loss": 0.5387387871742249, "step": 1360 }, { "epoch": 0.31377521613832854, "grad_norm": 0.924622519065769, "learning_rate": 1.9383528466648232e-06, "loss": 0.45428377389907837, "step": 1361 }, { "epoch": 0.3140057636887608, "grad_norm": 0.948104410443073, "learning_rate": 1.938221016496727e-06, "loss": 0.5130504369735718, "step": 1362 }, { "epoch": 0.3142363112391931, "grad_norm": 1.0809781469576445, "learning_rate": 1.9380890500148773e-06, "loss": 0.60721355676651, "step": 1363 }, { "epoch": 0.3144668587896254, "grad_norm": 1.1461080053421355, "learning_rate": 1.9379569472384475e-06, "loss": 0.5277825593948364, "step": 1364 }, { "epoch": 0.31469740634005766, "grad_norm": 1.0995766102280293, "learning_rate": 1.9378247081866303e-06, "loss": 0.5549559593200684, "step": 1365 }, { "epoch": 0.3149279538904899, "grad_norm": 1.0898464003561188, "learning_rate": 1.937692332878639e-06, "loss": 0.5606675148010254, "step": 1366 }, { "epoch": 0.31515850144092217, "grad_norm": 1.2514686660901941, "learning_rate": 1.9375598213337066e-06, "loss": 0.44079354405403137, "step": 1367 }, { "epoch": 0.31538904899135445, "grad_norm": 1.1147898414738042, "learning_rate": 1.9374271735710854e-06, "loss": 0.5627914667129517, "step": 1368 }, { "epoch": 0.31561959654178673, "grad_norm": 1.0827906414110418, "learning_rate": 1.9372943896100475e-06, "loss": 0.6157082319259644, "step": 1369 }, { "epoch": 0.315850144092219, "grad_norm": 1.0107986194096998, "learning_rate": 1.9371614694698853e-06, "loss": 0.5075211524963379, "step": 1370 }, { "epoch": 0.3160806916426513, "grad_norm": 1.1077211347008051, "learning_rate": 1.937028413169911e-06, "loss": 0.5673394799232483, "step": 1371 }, { "epoch": 0.31631123919308357, "grad_norm": 1.2194715035727013, "learning_rate": 1.9368952207294555e-06, "loss": 0.636246919631958, "step": 1372 }, { "epoch": 0.31654178674351585, "grad_norm": 0.9520240215012735, "learning_rate": 1.9367618921678714e-06, "loss": 0.5447783470153809, "step": 1373 }, { "epoch": 0.31677233429394813, "grad_norm": 1.0624943189039389, "learning_rate": 1.9366284275045285e-06, "loss": 0.5455813407897949, "step": 1374 }, { "epoch": 0.3170028818443804, "grad_norm": 1.351516959021647, "learning_rate": 1.936494826758819e-06, "loss": 0.6078809499740601, "step": 1375 }, { "epoch": 0.3172334293948127, "grad_norm": 1.255648645931826, "learning_rate": 1.9363610899501533e-06, "loss": 0.576380729675293, "step": 1376 }, { "epoch": 0.317463976945245, "grad_norm": 1.0694137963246684, "learning_rate": 1.9362272170979625e-06, "loss": 0.5999764204025269, "step": 1377 }, { "epoch": 0.31769452449567726, "grad_norm": 0.9127077886533301, "learning_rate": 1.936093208221696e-06, "loss": 0.5560207366943359, "step": 1378 }, { "epoch": 0.31792507204610954, "grad_norm": 1.0526952264732803, "learning_rate": 1.935959063340824e-06, "loss": 0.56638103723526, "step": 1379 }, { "epoch": 0.31815561959654176, "grad_norm": 1.0802135441151437, "learning_rate": 1.935824782474837e-06, "loss": 0.6130156517028809, "step": 1380 }, { "epoch": 0.31838616714697404, "grad_norm": 1.17418749132308, "learning_rate": 1.9356903656432445e-06, "loss": 0.5616703629493713, "step": 1381 }, { "epoch": 0.3186167146974063, "grad_norm": 1.0343181717058683, "learning_rate": 1.9355558128655757e-06, "loss": 0.6343744993209839, "step": 1382 }, { "epoch": 0.3188472622478386, "grad_norm": 1.095750213947718, "learning_rate": 1.935421124161379e-06, "loss": 0.5805482864379883, "step": 1383 }, { "epoch": 0.3190778097982709, "grad_norm": 1.1026578295358351, "learning_rate": 1.9352862995502244e-06, "loss": 0.46264296770095825, "step": 1384 }, { "epoch": 0.31930835734870316, "grad_norm": 1.0410251136907926, "learning_rate": 1.9351513390517007e-06, "loss": 0.6638646125793457, "step": 1385 }, { "epoch": 0.31953890489913545, "grad_norm": 1.1206503286933907, "learning_rate": 1.9350162426854148e-06, "loss": 0.6349970698356628, "step": 1386 }, { "epoch": 0.3197694524495677, "grad_norm": 1.072301849044211, "learning_rate": 1.934881010470996e-06, "loss": 0.5592948794364929, "step": 1387 }, { "epoch": 0.32, "grad_norm": 1.0458234682938432, "learning_rate": 1.9347456424280914e-06, "loss": 0.5730908513069153, "step": 1388 }, { "epoch": 0.3202305475504323, "grad_norm": 1.2900263608093252, "learning_rate": 1.9346101385763693e-06, "loss": 0.7006485462188721, "step": 1389 }, { "epoch": 0.32046109510086457, "grad_norm": 1.4129067567606644, "learning_rate": 1.934474498935516e-06, "loss": 0.573637068271637, "step": 1390 }, { "epoch": 0.32069164265129685, "grad_norm": 1.1318090296507703, "learning_rate": 1.93433872352524e-06, "loss": 0.4896121025085449, "step": 1391 }, { "epoch": 0.32092219020172913, "grad_norm": 1.2605619597232016, "learning_rate": 1.9342028123652665e-06, "loss": 0.6335302591323853, "step": 1392 }, { "epoch": 0.3211527377521614, "grad_norm": 0.9805948619235417, "learning_rate": 1.934066765475343e-06, "loss": 0.5807539224624634, "step": 1393 }, { "epoch": 0.32138328530259364, "grad_norm": 1.0981000795794762, "learning_rate": 1.9339305828752353e-06, "loss": 0.5706362128257751, "step": 1394 }, { "epoch": 0.3216138328530259, "grad_norm": 1.0353211161605218, "learning_rate": 1.9337942645847293e-06, "loss": 0.5357315540313721, "step": 1395 }, { "epoch": 0.3218443804034582, "grad_norm": 1.100852226093223, "learning_rate": 1.933657810623631e-06, "loss": 0.5349493026733398, "step": 1396 }, { "epoch": 0.3220749279538905, "grad_norm": 1.2109710983573714, "learning_rate": 1.9335212210117657e-06, "loss": 0.5859971642494202, "step": 1397 }, { "epoch": 0.32230547550432276, "grad_norm": 0.9676229309878287, "learning_rate": 1.9333844957689773e-06, "loss": 0.49399334192276, "step": 1398 }, { "epoch": 0.32253602305475504, "grad_norm": 1.0860725860312863, "learning_rate": 1.9332476349151325e-06, "loss": 0.5584002137184143, "step": 1399 }, { "epoch": 0.3227665706051873, "grad_norm": 1.268047844459102, "learning_rate": 1.9331106384701143e-06, "loss": 0.6149849891662598, "step": 1400 }, { "epoch": 0.3229971181556196, "grad_norm": 0.9344661358479289, "learning_rate": 1.932973506453827e-06, "loss": 0.44001221656799316, "step": 1401 }, { "epoch": 0.3232276657060519, "grad_norm": 1.1066396272948926, "learning_rate": 1.932836238886195e-06, "loss": 0.44574856758117676, "step": 1402 }, { "epoch": 0.32345821325648416, "grad_norm": 1.3401453983415261, "learning_rate": 1.9326988357871615e-06, "loss": 0.5376108884811401, "step": 1403 }, { "epoch": 0.32368876080691644, "grad_norm": 0.8767130557662413, "learning_rate": 1.93256129717669e-06, "loss": 0.5934798717498779, "step": 1404 }, { "epoch": 0.3239193083573487, "grad_norm": 1.303119875991707, "learning_rate": 1.932423623074763e-06, "loss": 0.5526829957962036, "step": 1405 }, { "epoch": 0.324149855907781, "grad_norm": 1.1334805404406405, "learning_rate": 1.9322858135013836e-06, "loss": 0.6259517669677734, "step": 1406 }, { "epoch": 0.32438040345821323, "grad_norm": 0.9954594387833945, "learning_rate": 1.932147868476574e-06, "loss": 0.4495973289012909, "step": 1407 }, { "epoch": 0.3246109510086455, "grad_norm": 0.861457749123995, "learning_rate": 1.932009788020376e-06, "loss": 0.5155202746391296, "step": 1408 }, { "epoch": 0.3248414985590778, "grad_norm": 0.9990951421158675, "learning_rate": 1.9318715721528508e-06, "loss": 0.4643939733505249, "step": 1409 }, { "epoch": 0.32507204610951007, "grad_norm": 1.1115294412044667, "learning_rate": 1.931733220894081e-06, "loss": 0.47659850120544434, "step": 1410 }, { "epoch": 0.32530259365994235, "grad_norm": 1.192025751825093, "learning_rate": 1.931594734264166e-06, "loss": 0.618567705154419, "step": 1411 }, { "epoch": 0.32553314121037463, "grad_norm": 1.3463014236604622, "learning_rate": 1.931456112283228e-06, "loss": 0.6162246465682983, "step": 1412 }, { "epoch": 0.3257636887608069, "grad_norm": 1.137080487588782, "learning_rate": 1.9313173549714063e-06, "loss": 0.5859405398368835, "step": 1413 }, { "epoch": 0.3259942363112392, "grad_norm": 1.0299388107109648, "learning_rate": 1.9311784623488614e-06, "loss": 0.45844388008117676, "step": 1414 }, { "epoch": 0.3262247838616715, "grad_norm": 1.1761474671222747, "learning_rate": 1.9310394344357725e-06, "loss": 0.5407997965812683, "step": 1415 }, { "epoch": 0.32645533141210376, "grad_norm": 1.1981225744663582, "learning_rate": 1.9309002712523394e-06, "loss": 0.5546882152557373, "step": 1416 }, { "epoch": 0.32668587896253604, "grad_norm": 0.8759460054604783, "learning_rate": 1.9307609728187807e-06, "loss": 0.4846392869949341, "step": 1417 }, { "epoch": 0.3269164265129683, "grad_norm": 1.000243508175514, "learning_rate": 1.9306215391553353e-06, "loss": 0.5062232613563538, "step": 1418 }, { "epoch": 0.3271469740634006, "grad_norm": 1.116854057805208, "learning_rate": 1.9304819702822615e-06, "loss": 0.5836912393569946, "step": 1419 }, { "epoch": 0.3273775216138329, "grad_norm": 1.094195818303245, "learning_rate": 1.9303422662198366e-06, "loss": 0.5329402089118958, "step": 1420 }, { "epoch": 0.3276080691642651, "grad_norm": 1.1607957633552695, "learning_rate": 1.930202426988359e-06, "loss": 0.6544215679168701, "step": 1421 }, { "epoch": 0.3278386167146974, "grad_norm": 1.100571228439363, "learning_rate": 1.930062452608145e-06, "loss": 0.47018110752105713, "step": 1422 }, { "epoch": 0.32806916426512966, "grad_norm": 1.0524336036947848, "learning_rate": 1.929922343099532e-06, "loss": 0.5112531185150146, "step": 1423 }, { "epoch": 0.32829971181556195, "grad_norm": 1.2000967886351006, "learning_rate": 1.9297820984828768e-06, "loss": 0.6076794266700745, "step": 1424 }, { "epoch": 0.3285302593659942, "grad_norm": 1.2298516207472598, "learning_rate": 1.9296417187785546e-06, "loss": 0.6222262382507324, "step": 1425 }, { "epoch": 0.3287608069164265, "grad_norm": 1.0861185792017791, "learning_rate": 1.929501204006962e-06, "loss": 0.5821695327758789, "step": 1426 }, { "epoch": 0.3289913544668588, "grad_norm": 0.9550005504042167, "learning_rate": 1.929360554188513e-06, "loss": 0.4985813498497009, "step": 1427 }, { "epoch": 0.32922190201729107, "grad_norm": 1.0175745295306269, "learning_rate": 1.929219769343644e-06, "loss": 0.5906369686126709, "step": 1428 }, { "epoch": 0.32945244956772335, "grad_norm": 0.9717883625472988, "learning_rate": 1.929078849492809e-06, "loss": 0.562096357345581, "step": 1429 }, { "epoch": 0.32968299711815563, "grad_norm": 1.1465278498569305, "learning_rate": 1.9289377946564822e-06, "loss": 0.5363502502441406, "step": 1430 }, { "epoch": 0.3299135446685879, "grad_norm": 1.2820536110562617, "learning_rate": 1.9287966048551573e-06, "loss": 0.5975755453109741, "step": 1431 }, { "epoch": 0.3301440922190202, "grad_norm": 1.0778576523614563, "learning_rate": 1.9286552801093476e-06, "loss": 0.5664670467376709, "step": 1432 }, { "epoch": 0.33037463976945247, "grad_norm": 1.0263666085507623, "learning_rate": 1.9285138204395864e-06, "loss": 0.5981261730194092, "step": 1433 }, { "epoch": 0.33060518731988475, "grad_norm": 0.9529552937728545, "learning_rate": 1.928372225866426e-06, "loss": 0.5640159845352173, "step": 1434 }, { "epoch": 0.330835734870317, "grad_norm": 0.9422839165194713, "learning_rate": 1.9282304964104397e-06, "loss": 0.5626721382141113, "step": 1435 }, { "epoch": 0.33106628242074926, "grad_norm": 1.1190074685130234, "learning_rate": 1.928088632092218e-06, "loss": 0.5522770881652832, "step": 1436 }, { "epoch": 0.33129682997118154, "grad_norm": 0.9690434170385617, "learning_rate": 1.9279466329323727e-06, "loss": 0.4949793517589569, "step": 1437 }, { "epoch": 0.3315273775216138, "grad_norm": 1.1199903848507864, "learning_rate": 1.927804498951535e-06, "loss": 0.5845533013343811, "step": 1438 }, { "epoch": 0.3317579250720461, "grad_norm": 1.0405222768252325, "learning_rate": 1.927662230170355e-06, "loss": 0.49574515223503113, "step": 1439 }, { "epoch": 0.3319884726224784, "grad_norm": 0.9203608823490977, "learning_rate": 1.927519826609503e-06, "loss": 0.5285886526107788, "step": 1440 }, { "epoch": 0.33221902017291066, "grad_norm": 1.1230062738043034, "learning_rate": 1.9273772882896698e-06, "loss": 0.474979043006897, "step": 1441 }, { "epoch": 0.33244956772334294, "grad_norm": 1.3266216308471324, "learning_rate": 1.927234615231564e-06, "loss": 0.48791128396987915, "step": 1442 }, { "epoch": 0.3326801152737752, "grad_norm": 1.0106410509782415, "learning_rate": 1.9270918074559135e-06, "loss": 0.4927103519439697, "step": 1443 }, { "epoch": 0.3329106628242075, "grad_norm": 1.0834822645958162, "learning_rate": 1.9269488649834683e-06, "loss": 0.5418181419372559, "step": 1444 }, { "epoch": 0.3331412103746398, "grad_norm": 1.2573247433245271, "learning_rate": 1.926805787834996e-06, "loss": 0.4865915775299072, "step": 1445 }, { "epoch": 0.33337175792507207, "grad_norm": 0.99396933954953, "learning_rate": 1.9266625760312838e-06, "loss": 0.5750860571861267, "step": 1446 }, { "epoch": 0.33360230547550435, "grad_norm": 1.078541464684242, "learning_rate": 1.9265192295931394e-06, "loss": 0.550861120223999, "step": 1447 }, { "epoch": 0.3338328530259366, "grad_norm": 1.007683687550808, "learning_rate": 1.926375748541389e-06, "loss": 0.5320810675621033, "step": 1448 }, { "epoch": 0.33406340057636885, "grad_norm": 1.017101643881844, "learning_rate": 1.9262321328968795e-06, "loss": 0.6060050129890442, "step": 1449 }, { "epoch": 0.33429394812680113, "grad_norm": 1.0363868425077463, "learning_rate": 1.9260883826804767e-06, "loss": 0.6425552368164062, "step": 1450 }, { "epoch": 0.3345244956772334, "grad_norm": 0.9764138596613898, "learning_rate": 1.925944497913065e-06, "loss": 0.49570873379707336, "step": 1451 }, { "epoch": 0.3347550432276657, "grad_norm": 1.2208122621725788, "learning_rate": 1.9258004786155512e-06, "loss": 0.5989271402359009, "step": 1452 }, { "epoch": 0.334985590778098, "grad_norm": 0.9503162789922354, "learning_rate": 1.925656324808858e-06, "loss": 0.5301828384399414, "step": 1453 }, { "epoch": 0.33521613832853026, "grad_norm": 0.8316884167256052, "learning_rate": 1.925512036513931e-06, "loss": 0.5198702216148376, "step": 1454 }, { "epoch": 0.33544668587896254, "grad_norm": 0.9455782785140152, "learning_rate": 1.925367613751732e-06, "loss": 0.4890085458755493, "step": 1455 }, { "epoch": 0.3356772334293948, "grad_norm": 1.0869496416180913, "learning_rate": 1.925223056543246e-06, "loss": 0.613020658493042, "step": 1456 }, { "epoch": 0.3359077809798271, "grad_norm": 1.3127727305608659, "learning_rate": 1.925078364909474e-06, "loss": 0.6544125080108643, "step": 1457 }, { "epoch": 0.3361383285302594, "grad_norm": 1.1781068643375063, "learning_rate": 1.9249335388714397e-06, "loss": 0.48095571994781494, "step": 1458 }, { "epoch": 0.33636887608069166, "grad_norm": 1.0490387987699479, "learning_rate": 1.9247885784501837e-06, "loss": 0.43932127952575684, "step": 1459 }, { "epoch": 0.33659942363112394, "grad_norm": 1.0790643638521016, "learning_rate": 1.9246434836667674e-06, "loss": 0.4519491195678711, "step": 1460 }, { "epoch": 0.3368299711815562, "grad_norm": 1.2051025882543964, "learning_rate": 1.9244982545422724e-06, "loss": 0.5621665120124817, "step": 1461 }, { "epoch": 0.33706051873198845, "grad_norm": 1.3398506945777509, "learning_rate": 1.924352891097798e-06, "loss": 0.4763834476470947, "step": 1462 }, { "epoch": 0.3372910662824207, "grad_norm": 1.242985575101598, "learning_rate": 1.9242073933544644e-06, "loss": 0.6005351543426514, "step": 1463 }, { "epoch": 0.337521613832853, "grad_norm": 1.110700986093752, "learning_rate": 1.9240617613334112e-06, "loss": 0.5566102862358093, "step": 1464 }, { "epoch": 0.3377521613832853, "grad_norm": 1.0242858373073467, "learning_rate": 1.9239159950557966e-06, "loss": 0.473061203956604, "step": 1465 }, { "epoch": 0.33798270893371757, "grad_norm": 1.1506065660394986, "learning_rate": 1.9237700945427993e-06, "loss": 0.5492761135101318, "step": 1466 }, { "epoch": 0.33821325648414985, "grad_norm": 1.2881267649729666, "learning_rate": 1.923624059815617e-06, "loss": 0.5164840221405029, "step": 1467 }, { "epoch": 0.33844380403458213, "grad_norm": 1.170718183576445, "learning_rate": 1.923477890895467e-06, "loss": 0.5348359942436218, "step": 1468 }, { "epoch": 0.3386743515850144, "grad_norm": 1.255138301940698, "learning_rate": 1.9233315878035863e-06, "loss": 0.6063251495361328, "step": 1469 }, { "epoch": 0.3389048991354467, "grad_norm": 1.1740353245373907, "learning_rate": 1.9231851505612315e-06, "loss": 0.5293298363685608, "step": 1470 }, { "epoch": 0.33913544668587897, "grad_norm": 1.2192155228489063, "learning_rate": 1.9230385791896773e-06, "loss": 0.566419243812561, "step": 1471 }, { "epoch": 0.33936599423631125, "grad_norm": 1.215891621061933, "learning_rate": 1.92289187371022e-06, "loss": 0.5496705770492554, "step": 1472 }, { "epoch": 0.33959654178674353, "grad_norm": 1.101213232352079, "learning_rate": 1.9227450341441742e-06, "loss": 0.4794740080833435, "step": 1473 }, { "epoch": 0.3398270893371758, "grad_norm": 1.2201116310406535, "learning_rate": 1.9225980605128744e-06, "loss": 0.5791349411010742, "step": 1474 }, { "epoch": 0.3400576368876081, "grad_norm": 1.4571369283257354, "learning_rate": 1.9224509528376734e-06, "loss": 0.479930579662323, "step": 1475 }, { "epoch": 0.3402881844380403, "grad_norm": 1.0415599481638325, "learning_rate": 1.9223037111399453e-06, "loss": 0.4530009627342224, "step": 1476 }, { "epoch": 0.3405187319884726, "grad_norm": 1.204701640132816, "learning_rate": 1.9221563354410828e-06, "loss": 0.5403045415878296, "step": 1477 }, { "epoch": 0.3407492795389049, "grad_norm": 0.8630253900146837, "learning_rate": 1.9220088257624975e-06, "loss": 0.493504136800766, "step": 1478 }, { "epoch": 0.34097982708933716, "grad_norm": 1.4310766024245367, "learning_rate": 1.921861182125621e-06, "loss": 0.5821102857589722, "step": 1479 }, { "epoch": 0.34121037463976944, "grad_norm": 1.082253088614022, "learning_rate": 1.9217134045519047e-06, "loss": 0.5295383334159851, "step": 1480 }, { "epoch": 0.3414409221902017, "grad_norm": 1.1829405649995548, "learning_rate": 1.9215654930628194e-06, "loss": 0.6152981519699097, "step": 1481 }, { "epoch": 0.341671469740634, "grad_norm": 1.0966776837147827, "learning_rate": 1.9214174476798547e-06, "loss": 0.5630486011505127, "step": 1482 }, { "epoch": 0.3419020172910663, "grad_norm": 1.1063696979108066, "learning_rate": 1.9212692684245203e-06, "loss": 0.5250644087791443, "step": 1483 }, { "epoch": 0.34213256484149857, "grad_norm": 1.1921011792402902, "learning_rate": 1.921120955318345e-06, "loss": 0.5262070894241333, "step": 1484 }, { "epoch": 0.34236311239193085, "grad_norm": 1.0557911041666306, "learning_rate": 1.920972508382877e-06, "loss": 0.4584987163543701, "step": 1485 }, { "epoch": 0.3425936599423631, "grad_norm": 0.8987901766147761, "learning_rate": 1.920823927639684e-06, "loss": 0.5689778923988342, "step": 1486 }, { "epoch": 0.3428242074927954, "grad_norm": 1.5261490950420458, "learning_rate": 1.920675213110354e-06, "loss": 0.5871669054031372, "step": 1487 }, { "epoch": 0.3430547550432277, "grad_norm": 1.1867087256934146, "learning_rate": 1.9205263648164927e-06, "loss": 0.5816771984100342, "step": 1488 }, { "epoch": 0.34328530259365997, "grad_norm": 1.1506734961006173, "learning_rate": 1.9203773827797266e-06, "loss": 0.5304274559020996, "step": 1489 }, { "epoch": 0.3435158501440922, "grad_norm": 1.1468129901849249, "learning_rate": 1.9202282670217014e-06, "loss": 0.5348042249679565, "step": 1490 }, { "epoch": 0.3437463976945245, "grad_norm": 1.159885461181134, "learning_rate": 1.920079017564082e-06, "loss": 0.5374947786331177, "step": 1491 }, { "epoch": 0.34397694524495676, "grad_norm": 1.2907287290922715, "learning_rate": 1.9199296344285527e-06, "loss": 0.6263279914855957, "step": 1492 }, { "epoch": 0.34420749279538904, "grad_norm": 1.0523687721115222, "learning_rate": 1.919780117636817e-06, "loss": 0.5039552450180054, "step": 1493 }, { "epoch": 0.3444380403458213, "grad_norm": 1.0964626478821635, "learning_rate": 1.9196304672105994e-06, "loss": 0.4592825174331665, "step": 1494 }, { "epoch": 0.3446685878962536, "grad_norm": 1.1561933172055967, "learning_rate": 1.919480683171641e-06, "loss": 0.5041275024414062, "step": 1495 }, { "epoch": 0.3448991354466859, "grad_norm": 1.1372473895034165, "learning_rate": 1.9193307655417043e-06, "loss": 0.573014497756958, "step": 1496 }, { "epoch": 0.34512968299711816, "grad_norm": 1.2502610467615811, "learning_rate": 1.9191807143425714e-06, "loss": 0.5881764888763428, "step": 1497 }, { "epoch": 0.34536023054755044, "grad_norm": 1.1627055981370906, "learning_rate": 1.9190305295960425e-06, "loss": 0.523137092590332, "step": 1498 }, { "epoch": 0.3455907780979827, "grad_norm": 1.3731261688393495, "learning_rate": 1.9188802113239383e-06, "loss": 0.5579402446746826, "step": 1499 }, { "epoch": 0.345821325648415, "grad_norm": 1.2642778137685087, "learning_rate": 1.918729759548098e-06, "loss": 0.6252793073654175, "step": 1500 }, { "epoch": 0.3460518731988473, "grad_norm": 1.0221709474046514, "learning_rate": 1.9185791742903813e-06, "loss": 0.5688179731369019, "step": 1501 }, { "epoch": 0.34628242074927956, "grad_norm": 0.9608059209732579, "learning_rate": 1.9184284555726664e-06, "loss": 0.4556620717048645, "step": 1502 }, { "epoch": 0.34651296829971184, "grad_norm": 1.4535889503619228, "learning_rate": 1.9182776034168513e-06, "loss": 0.5266132950782776, "step": 1503 }, { "epoch": 0.34674351585014407, "grad_norm": 1.014116036253219, "learning_rate": 1.9181266178448525e-06, "loss": 0.5345441102981567, "step": 1504 }, { "epoch": 0.34697406340057635, "grad_norm": 1.471506683270131, "learning_rate": 1.9179754988786077e-06, "loss": 0.5279865264892578, "step": 1505 }, { "epoch": 0.34720461095100863, "grad_norm": 0.9951759038068401, "learning_rate": 1.917824246540072e-06, "loss": 0.5321294069290161, "step": 1506 }, { "epoch": 0.3474351585014409, "grad_norm": 1.1405669476940652, "learning_rate": 1.9176728608512216e-06, "loss": 0.6012279987335205, "step": 1507 }, { "epoch": 0.3476657060518732, "grad_norm": 1.280934102974154, "learning_rate": 1.917521341834051e-06, "loss": 0.6327307224273682, "step": 1508 }, { "epoch": 0.34789625360230547, "grad_norm": 1.3541871619240426, "learning_rate": 1.9173696895105738e-06, "loss": 0.4634242057800293, "step": 1509 }, { "epoch": 0.34812680115273775, "grad_norm": 0.9650176834853621, "learning_rate": 1.917217903902824e-06, "loss": 0.6095619201660156, "step": 1510 }, { "epoch": 0.34835734870317003, "grad_norm": 0.8778117797545331, "learning_rate": 1.9170659850328543e-06, "loss": 0.5752026438713074, "step": 1511 }, { "epoch": 0.3485878962536023, "grad_norm": 1.1303848756633323, "learning_rate": 1.9169139329227373e-06, "loss": 0.5497609376907349, "step": 1512 }, { "epoch": 0.3488184438040346, "grad_norm": 0.9721420679355441, "learning_rate": 1.916761747594564e-06, "loss": 0.562045693397522, "step": 1513 }, { "epoch": 0.3490489913544669, "grad_norm": 1.2143743690553912, "learning_rate": 1.916609429070446e-06, "loss": 0.533704400062561, "step": 1514 }, { "epoch": 0.34927953890489916, "grad_norm": 1.023070261432892, "learning_rate": 1.916456977372513e-06, "loss": 0.5165727734565735, "step": 1515 }, { "epoch": 0.34951008645533144, "grad_norm": 1.0822455620220128, "learning_rate": 1.9163043925229154e-06, "loss": 0.5291183590888977, "step": 1516 }, { "epoch": 0.34974063400576366, "grad_norm": 1.1691832350355116, "learning_rate": 1.916151674543821e-06, "loss": 0.5755541324615479, "step": 1517 }, { "epoch": 0.34997118155619594, "grad_norm": 1.096375651985543, "learning_rate": 1.915998823457419e-06, "loss": 0.5173031091690063, "step": 1518 }, { "epoch": 0.3502017291066282, "grad_norm": 1.0266729234069105, "learning_rate": 1.9158458392859175e-06, "loss": 0.5223626494407654, "step": 1519 }, { "epoch": 0.3504322766570605, "grad_norm": 1.3968429150305617, "learning_rate": 1.9156927220515426e-06, "loss": 0.49933186173439026, "step": 1520 }, { "epoch": 0.3506628242074928, "grad_norm": 0.9366627758580057, "learning_rate": 1.91553947177654e-06, "loss": 0.4844704568386078, "step": 1521 }, { "epoch": 0.35089337175792507, "grad_norm": 1.1060039419898107, "learning_rate": 1.9153860884831775e-06, "loss": 0.44557222723960876, "step": 1522 }, { "epoch": 0.35112391930835735, "grad_norm": 1.1817623923973588, "learning_rate": 1.9152325721937388e-06, "loss": 0.5139213800430298, "step": 1523 }, { "epoch": 0.3513544668587896, "grad_norm": 1.1356038591189508, "learning_rate": 1.9150789229305276e-06, "loss": 0.6255537271499634, "step": 1524 }, { "epoch": 0.3515850144092219, "grad_norm": 1.4572939210307672, "learning_rate": 1.914925140715869e-06, "loss": 0.5222468376159668, "step": 1525 }, { "epoch": 0.3518155619596542, "grad_norm": 0.9379777889373196, "learning_rate": 1.914771225572105e-06, "loss": 0.5266926288604736, "step": 1526 }, { "epoch": 0.35204610951008647, "grad_norm": 1.182155209163351, "learning_rate": 1.914617177521598e-06, "loss": 0.5775296688079834, "step": 1527 }, { "epoch": 0.35227665706051875, "grad_norm": 1.544551250397055, "learning_rate": 1.9144629965867296e-06, "loss": 0.6237180233001709, "step": 1528 }, { "epoch": 0.35250720461095103, "grad_norm": 1.0167232716229981, "learning_rate": 1.914308682789901e-06, "loss": 0.40847349166870117, "step": 1529 }, { "epoch": 0.3527377521613833, "grad_norm": 0.9570236522891805, "learning_rate": 1.914154236153532e-06, "loss": 0.5963910818099976, "step": 1530 }, { "epoch": 0.35296829971181554, "grad_norm": 1.0391049584893006, "learning_rate": 1.9139996567000624e-06, "loss": 0.516531765460968, "step": 1531 }, { "epoch": 0.3531988472622478, "grad_norm": 0.9409985228344566, "learning_rate": 1.9138449444519507e-06, "loss": 0.5145821571350098, "step": 1532 }, { "epoch": 0.3534293948126801, "grad_norm": 1.1658422646261672, "learning_rate": 1.9136900994316753e-06, "loss": 0.5966194272041321, "step": 1533 }, { "epoch": 0.3536599423631124, "grad_norm": 1.0504783958956583, "learning_rate": 1.913535121661733e-06, "loss": 0.4880404472351074, "step": 1534 }, { "epoch": 0.35389048991354466, "grad_norm": 1.1586094316232247, "learning_rate": 1.9133800111646414e-06, "loss": 0.5151012539863586, "step": 1535 }, { "epoch": 0.35412103746397694, "grad_norm": 1.1633319411140208, "learning_rate": 1.9132247679629353e-06, "loss": 0.5591508150100708, "step": 1536 }, { "epoch": 0.3543515850144092, "grad_norm": 1.3102428915861866, "learning_rate": 1.9130693920791708e-06, "loss": 0.5961824655532837, "step": 1537 }, { "epoch": 0.3545821325648415, "grad_norm": 1.0721758303169822, "learning_rate": 1.912913883535922e-06, "loss": 0.5701007843017578, "step": 1538 }, { "epoch": 0.3548126801152738, "grad_norm": 1.2232919385309438, "learning_rate": 1.9127582423557827e-06, "loss": 0.5492852926254272, "step": 1539 }, { "epoch": 0.35504322766570606, "grad_norm": 1.0664413609489287, "learning_rate": 1.912602468561366e-06, "loss": 0.5030492544174194, "step": 1540 }, { "epoch": 0.35527377521613834, "grad_norm": 1.0804112062824267, "learning_rate": 1.9124465621753047e-06, "loss": 0.567867636680603, "step": 1541 }, { "epoch": 0.3555043227665706, "grad_norm": 1.1359172862574665, "learning_rate": 1.9122905232202497e-06, "loss": 0.6277697682380676, "step": 1542 }, { "epoch": 0.3557348703170029, "grad_norm": 1.0326808049343164, "learning_rate": 1.912134351718872e-06, "loss": 0.5682080984115601, "step": 1543 }, { "epoch": 0.3559654178674352, "grad_norm": 1.1630983445060303, "learning_rate": 1.9119780476938616e-06, "loss": 0.4987330138683319, "step": 1544 }, { "epoch": 0.3561959654178674, "grad_norm": 1.0409430932448371, "learning_rate": 1.911821611167928e-06, "loss": 0.4908757209777832, "step": 1545 }, { "epoch": 0.3564265129682997, "grad_norm": 1.098875754274837, "learning_rate": 1.9116650421637995e-06, "loss": 0.4820883870124817, "step": 1546 }, { "epoch": 0.35665706051873197, "grad_norm": 1.3119370882450934, "learning_rate": 1.911508340704225e-06, "loss": 0.5155225992202759, "step": 1547 }, { "epoch": 0.35688760806916425, "grad_norm": 0.8397950319284104, "learning_rate": 1.9113515068119705e-06, "loss": 0.4797988533973694, "step": 1548 }, { "epoch": 0.35711815561959653, "grad_norm": 0.8794949811896489, "learning_rate": 1.911194540509822e-06, "loss": 0.4822116196155548, "step": 1549 }, { "epoch": 0.3573487031700288, "grad_norm": 1.0294258069172015, "learning_rate": 1.9110374418205866e-06, "loss": 0.5781491994857788, "step": 1550 }, { "epoch": 0.3575792507204611, "grad_norm": 1.065474237959385, "learning_rate": 1.910880210767088e-06, "loss": 0.5163141489028931, "step": 1551 }, { "epoch": 0.3578097982708934, "grad_norm": 0.9693021918748141, "learning_rate": 1.9107228473721703e-06, "loss": 0.5735442638397217, "step": 1552 }, { "epoch": 0.35804034582132566, "grad_norm": 1.289606289132748, "learning_rate": 1.9105653516586975e-06, "loss": 0.47912898659706116, "step": 1553 }, { "epoch": 0.35827089337175794, "grad_norm": 1.2363840205017804, "learning_rate": 1.9104077236495507e-06, "loss": 0.5857046842575073, "step": 1554 }, { "epoch": 0.3585014409221902, "grad_norm": 0.9205720464341692, "learning_rate": 1.910249963367633e-06, "loss": 0.47841203212738037, "step": 1555 }, { "epoch": 0.3587319884726225, "grad_norm": 1.0020264560044534, "learning_rate": 1.9100920708358644e-06, "loss": 0.5707235336303711, "step": 1556 }, { "epoch": 0.3589625360230548, "grad_norm": 0.9331299501384178, "learning_rate": 1.9099340460771856e-06, "loss": 0.5102289915084839, "step": 1557 }, { "epoch": 0.359193083573487, "grad_norm": 1.2369505926587712, "learning_rate": 1.9097758891145557e-06, "loss": 0.4796826243400574, "step": 1558 }, { "epoch": 0.3594236311239193, "grad_norm": 1.0814348251306543, "learning_rate": 1.9096175999709538e-06, "loss": 0.5577390789985657, "step": 1559 }, { "epoch": 0.35965417867435157, "grad_norm": 1.0239605929741993, "learning_rate": 1.9094591786693767e-06, "loss": 0.4793698191642761, "step": 1560 }, { "epoch": 0.35988472622478385, "grad_norm": 1.0987189800590818, "learning_rate": 1.909300625232842e-06, "loss": 0.5652080178260803, "step": 1561 }, { "epoch": 0.3601152737752161, "grad_norm": 0.9830853981670961, "learning_rate": 1.909141939684385e-06, "loss": 0.5174850225448608, "step": 1562 }, { "epoch": 0.3603458213256484, "grad_norm": 1.5804535303236673, "learning_rate": 1.908983122047063e-06, "loss": 0.5839135050773621, "step": 1563 }, { "epoch": 0.3605763688760807, "grad_norm": 1.0464812980537448, "learning_rate": 1.9088241723439486e-06, "loss": 0.4825834333896637, "step": 1564 }, { "epoch": 0.36080691642651297, "grad_norm": 1.2145113527517095, "learning_rate": 1.9086650905981364e-06, "loss": 0.608122706413269, "step": 1565 }, { "epoch": 0.36103746397694525, "grad_norm": 1.255316392122234, "learning_rate": 1.908505876832739e-06, "loss": 0.6223492622375488, "step": 1566 }, { "epoch": 0.36126801152737753, "grad_norm": 1.0060646589623696, "learning_rate": 1.9083465310708894e-06, "loss": 0.49106669425964355, "step": 1567 }, { "epoch": 0.3614985590778098, "grad_norm": 1.072457075768019, "learning_rate": 1.9081870533357373e-06, "loss": 0.6098700165748596, "step": 1568 }, { "epoch": 0.3617291066282421, "grad_norm": 1.3117902009377698, "learning_rate": 1.9080274436504547e-06, "loss": 0.520710825920105, "step": 1569 }, { "epoch": 0.3619596541786744, "grad_norm": 1.4310182215994716, "learning_rate": 1.90786770203823e-06, "loss": 0.6175330877304077, "step": 1570 }, { "epoch": 0.36219020172910665, "grad_norm": 1.066318396536309, "learning_rate": 1.907707828522273e-06, "loss": 0.5232914686203003, "step": 1571 }, { "epoch": 0.3624207492795389, "grad_norm": 1.0921905909716596, "learning_rate": 1.907547823125811e-06, "loss": 0.45075923204421997, "step": 1572 }, { "epoch": 0.36265129682997116, "grad_norm": 0.9633775515017127, "learning_rate": 1.9073876858720914e-06, "loss": 0.4931294918060303, "step": 1573 }, { "epoch": 0.36288184438040344, "grad_norm": 1.0692876552208703, "learning_rate": 1.9072274167843805e-06, "loss": 0.4915880858898163, "step": 1574 }, { "epoch": 0.3631123919308357, "grad_norm": 1.0666594923147616, "learning_rate": 1.9070670158859634e-06, "loss": 0.5257406234741211, "step": 1575 }, { "epoch": 0.363342939481268, "grad_norm": 0.9711753769566253, "learning_rate": 1.906906483200145e-06, "loss": 0.5337891578674316, "step": 1576 }, { "epoch": 0.3635734870317003, "grad_norm": 1.0365593071400123, "learning_rate": 1.9067458187502491e-06, "loss": 0.5388165712356567, "step": 1577 }, { "epoch": 0.36380403458213256, "grad_norm": 1.056509558135423, "learning_rate": 1.9065850225596183e-06, "loss": 0.34295597672462463, "step": 1578 }, { "epoch": 0.36403458213256484, "grad_norm": 1.1966716237030297, "learning_rate": 1.9064240946516148e-06, "loss": 0.4885653853416443, "step": 1579 }, { "epoch": 0.3642651296829971, "grad_norm": 1.0967168066015518, "learning_rate": 1.9062630350496195e-06, "loss": 0.5201048851013184, "step": 1580 }, { "epoch": 0.3644956772334294, "grad_norm": 1.1549525250242287, "learning_rate": 1.9061018437770332e-06, "loss": 0.509685218334198, "step": 1581 }, { "epoch": 0.3647262247838617, "grad_norm": 1.2103985180509436, "learning_rate": 1.9059405208572747e-06, "loss": 0.5103805661201477, "step": 1582 }, { "epoch": 0.36495677233429397, "grad_norm": 1.1299754007600582, "learning_rate": 1.9057790663137828e-06, "loss": 0.7274478077888489, "step": 1583 }, { "epoch": 0.36518731988472625, "grad_norm": 1.0971187411733057, "learning_rate": 1.9056174801700155e-06, "loss": 0.5056940317153931, "step": 1584 }, { "epoch": 0.3654178674351585, "grad_norm": 1.4885850320982672, "learning_rate": 1.905455762449449e-06, "loss": 0.519898533821106, "step": 1585 }, { "epoch": 0.36564841498559075, "grad_norm": 1.2387908894479935, "learning_rate": 1.9052939131755798e-06, "loss": 0.5524897575378418, "step": 1586 }, { "epoch": 0.36587896253602303, "grad_norm": 1.553847548415249, "learning_rate": 1.9051319323719224e-06, "loss": 0.5931388139724731, "step": 1587 }, { "epoch": 0.3661095100864553, "grad_norm": 1.139599456662856, "learning_rate": 1.904969820062011e-06, "loss": 0.4642411470413208, "step": 1588 }, { "epoch": 0.3663400576368876, "grad_norm": 1.0238584681270078, "learning_rate": 1.9048075762693992e-06, "loss": 0.5271746516227722, "step": 1589 }, { "epoch": 0.3665706051873199, "grad_norm": 1.1002878432026475, "learning_rate": 1.904645201017659e-06, "loss": 0.6071990728378296, "step": 1590 }, { "epoch": 0.36680115273775216, "grad_norm": 1.0574792476681798, "learning_rate": 1.9044826943303819e-06, "loss": 0.5722445249557495, "step": 1591 }, { "epoch": 0.36703170028818444, "grad_norm": 0.9282439309709182, "learning_rate": 1.9043200562311786e-06, "loss": 0.5569512844085693, "step": 1592 }, { "epoch": 0.3672622478386167, "grad_norm": 1.0196945335938161, "learning_rate": 1.9041572867436784e-06, "loss": 0.5678357481956482, "step": 1593 }, { "epoch": 0.367492795389049, "grad_norm": 1.0907444083428672, "learning_rate": 1.90399438589153e-06, "loss": 0.5517602562904358, "step": 1594 }, { "epoch": 0.3677233429394813, "grad_norm": 1.156373448455516, "learning_rate": 1.903831353698402e-06, "loss": 0.5436903238296509, "step": 1595 }, { "epoch": 0.36795389048991356, "grad_norm": 0.9322530068282044, "learning_rate": 1.9036681901879802e-06, "loss": 0.4968247413635254, "step": 1596 }, { "epoch": 0.36818443804034584, "grad_norm": 1.3503537931752878, "learning_rate": 1.9035048953839712e-06, "loss": 0.4991995096206665, "step": 1597 }, { "epoch": 0.3684149855907781, "grad_norm": 1.1693808144768583, "learning_rate": 1.9033414693100999e-06, "loss": 0.5020145177841187, "step": 1598 }, { "epoch": 0.3686455331412104, "grad_norm": 1.179802024734829, "learning_rate": 1.9031779119901104e-06, "loss": 0.45959436893463135, "step": 1599 }, { "epoch": 0.3688760806916426, "grad_norm": 1.2294549218788287, "learning_rate": 1.9030142234477658e-06, "loss": 0.5194531679153442, "step": 1600 }, { "epoch": 0.3691066282420749, "grad_norm": 1.0829366118476111, "learning_rate": 1.9028504037068481e-06, "loss": 0.5488829016685486, "step": 1601 }, { "epoch": 0.3693371757925072, "grad_norm": 1.1508769427557617, "learning_rate": 1.9026864527911593e-06, "loss": 0.49845972657203674, "step": 1602 }, { "epoch": 0.36956772334293947, "grad_norm": 0.9523664823081464, "learning_rate": 1.9025223707245192e-06, "loss": 0.5122306942939758, "step": 1603 }, { "epoch": 0.36979827089337175, "grad_norm": 1.1063344831139217, "learning_rate": 1.9023581575307677e-06, "loss": 0.5586007833480835, "step": 1604 }, { "epoch": 0.37002881844380403, "grad_norm": 1.2087997693756056, "learning_rate": 1.9021938132337625e-06, "loss": 0.5375609993934631, "step": 1605 }, { "epoch": 0.3702593659942363, "grad_norm": 1.121295133902373, "learning_rate": 1.902029337857382e-06, "loss": 0.44368264079093933, "step": 1606 }, { "epoch": 0.3704899135446686, "grad_norm": 1.0604325469477982, "learning_rate": 1.901864731425522e-06, "loss": 0.47421109676361084, "step": 1607 }, { "epoch": 0.3707204610951009, "grad_norm": 1.1261708226384317, "learning_rate": 1.9016999939620986e-06, "loss": 0.46196484565734863, "step": 1608 }, { "epoch": 0.37095100864553315, "grad_norm": 1.1243231065601214, "learning_rate": 1.9015351254910464e-06, "loss": 0.5860691070556641, "step": 1609 }, { "epoch": 0.37118155619596543, "grad_norm": 1.376635822166593, "learning_rate": 1.9013701260363186e-06, "loss": 0.48964136838912964, "step": 1610 }, { "epoch": 0.3714121037463977, "grad_norm": 1.2771258863133614, "learning_rate": 1.9012049956218885e-06, "loss": 0.577031135559082, "step": 1611 }, { "epoch": 0.37164265129683, "grad_norm": 1.750265298033527, "learning_rate": 1.9010397342717477e-06, "loss": 0.5630660653114319, "step": 1612 }, { "epoch": 0.3718731988472622, "grad_norm": 1.452203115340865, "learning_rate": 1.9008743420099064e-06, "loss": 0.6493782997131348, "step": 1613 }, { "epoch": 0.3721037463976945, "grad_norm": 1.2469626311837894, "learning_rate": 1.9007088188603952e-06, "loss": 0.5408718585968018, "step": 1614 }, { "epoch": 0.3723342939481268, "grad_norm": 1.2257479900702195, "learning_rate": 1.9005431648472622e-06, "loss": 0.6069578528404236, "step": 1615 }, { "epoch": 0.37256484149855906, "grad_norm": 1.1160644334466228, "learning_rate": 1.900377379994576e-06, "loss": 0.5539328455924988, "step": 1616 }, { "epoch": 0.37279538904899134, "grad_norm": 1.1810044813760578, "learning_rate": 1.9002114643264227e-06, "loss": 0.48285481333732605, "step": 1617 }, { "epoch": 0.3730259365994236, "grad_norm": 1.1390789738354967, "learning_rate": 1.900045417866908e-06, "loss": 0.5164967775344849, "step": 1618 }, { "epoch": 0.3732564841498559, "grad_norm": 1.3865337554151096, "learning_rate": 1.8998792406401573e-06, "loss": 0.6291834115982056, "step": 1619 }, { "epoch": 0.3734870317002882, "grad_norm": 1.2947366356266334, "learning_rate": 1.8997129326703142e-06, "loss": 0.43779683113098145, "step": 1620 }, { "epoch": 0.37371757925072047, "grad_norm": 1.0769662454824116, "learning_rate": 1.8995464939815417e-06, "loss": 0.5595699548721313, "step": 1621 }, { "epoch": 0.37394812680115275, "grad_norm": 1.0790060447546612, "learning_rate": 1.8993799245980213e-06, "loss": 0.5409479737281799, "step": 1622 }, { "epoch": 0.37417867435158503, "grad_norm": 1.1307121675788347, "learning_rate": 1.8992132245439538e-06, "loss": 0.4442507326602936, "step": 1623 }, { "epoch": 0.3744092219020173, "grad_norm": 1.200366310775744, "learning_rate": 1.8990463938435593e-06, "loss": 0.5552202463150024, "step": 1624 }, { "epoch": 0.3746397694524496, "grad_norm": 1.210477522460972, "learning_rate": 1.8988794325210761e-06, "loss": 0.4405897855758667, "step": 1625 }, { "epoch": 0.37487031700288187, "grad_norm": 1.1718941866133759, "learning_rate": 1.8987123406007626e-06, "loss": 0.48420459032058716, "step": 1626 }, { "epoch": 0.3751008645533141, "grad_norm": 1.0944718615024338, "learning_rate": 1.8985451181068948e-06, "loss": 0.47946181893348694, "step": 1627 }, { "epoch": 0.3753314121037464, "grad_norm": 1.4314462923323383, "learning_rate": 1.8983777650637687e-06, "loss": 0.5447190403938293, "step": 1628 }, { "epoch": 0.37556195965417866, "grad_norm": 1.2308740500999567, "learning_rate": 1.8982102814956994e-06, "loss": 0.5597184300422668, "step": 1629 }, { "epoch": 0.37579250720461094, "grad_norm": 1.0808740812316415, "learning_rate": 1.8980426674270195e-06, "loss": 0.4728265404701233, "step": 1630 }, { "epoch": 0.3760230547550432, "grad_norm": 1.0022470440365883, "learning_rate": 1.8978749228820825e-06, "loss": 0.5532448291778564, "step": 1631 }, { "epoch": 0.3762536023054755, "grad_norm": 1.3647616222651395, "learning_rate": 1.8977070478852596e-06, "loss": 0.5713067650794983, "step": 1632 }, { "epoch": 0.3764841498559078, "grad_norm": 1.047205989185189, "learning_rate": 1.8975390424609414e-06, "loss": 0.5646129846572876, "step": 1633 }, { "epoch": 0.37671469740634006, "grad_norm": 1.0707041675317837, "learning_rate": 1.897370906633537e-06, "loss": 0.5725210309028625, "step": 1634 }, { "epoch": 0.37694524495677234, "grad_norm": 1.2576302459262307, "learning_rate": 1.8972026404274752e-06, "loss": 0.6406511068344116, "step": 1635 }, { "epoch": 0.3771757925072046, "grad_norm": 1.3865349965726392, "learning_rate": 1.8970342438672032e-06, "loss": 0.6073347330093384, "step": 1636 }, { "epoch": 0.3774063400576369, "grad_norm": 0.950712343876015, "learning_rate": 1.8968657169771871e-06, "loss": 0.5833244323730469, "step": 1637 }, { "epoch": 0.3776368876080692, "grad_norm": 1.05768625493054, "learning_rate": 1.8966970597819122e-06, "loss": 0.5105189681053162, "step": 1638 }, { "epoch": 0.37786743515850146, "grad_norm": 1.2405093128151112, "learning_rate": 1.8965282723058827e-06, "loss": 0.6037019491195679, "step": 1639 }, { "epoch": 0.37809798270893374, "grad_norm": 1.2668863969044166, "learning_rate": 1.8963593545736218e-06, "loss": 0.43045759201049805, "step": 1640 }, { "epoch": 0.37832853025936597, "grad_norm": 1.031350209842573, "learning_rate": 1.8961903066096712e-06, "loss": 0.5377180576324463, "step": 1641 }, { "epoch": 0.37855907780979825, "grad_norm": 1.0130795042585947, "learning_rate": 1.8960211284385919e-06, "loss": 0.4094654321670532, "step": 1642 }, { "epoch": 0.37878962536023053, "grad_norm": 1.0057294140777453, "learning_rate": 1.8958518200849638e-06, "loss": 0.4871266484260559, "step": 1643 }, { "epoch": 0.3790201729106628, "grad_norm": 1.0413940989400527, "learning_rate": 1.8956823815733855e-06, "loss": 0.546768069267273, "step": 1644 }, { "epoch": 0.3792507204610951, "grad_norm": 1.489464911824181, "learning_rate": 1.8955128129284747e-06, "loss": 0.5822614431381226, "step": 1645 }, { "epoch": 0.3794812680115274, "grad_norm": 1.2747301835507696, "learning_rate": 1.8953431141748685e-06, "loss": 0.499586284160614, "step": 1646 }, { "epoch": 0.37971181556195965, "grad_norm": 1.0356602712164185, "learning_rate": 1.8951732853372214e-06, "loss": 0.4616992771625519, "step": 1647 }, { "epoch": 0.37994236311239193, "grad_norm": 1.1023032116439935, "learning_rate": 1.8950033264402084e-06, "loss": 0.5688509941101074, "step": 1648 }, { "epoch": 0.3801729106628242, "grad_norm": 1.2135433507747628, "learning_rate": 1.8948332375085226e-06, "loss": 0.5367652177810669, "step": 1649 }, { "epoch": 0.3804034582132565, "grad_norm": 1.1441147670237812, "learning_rate": 1.8946630185668759e-06, "loss": 0.5865902900695801, "step": 1650 }, { "epoch": 0.3806340057636888, "grad_norm": 1.20236181961399, "learning_rate": 1.89449266964e-06, "loss": 0.4999021887779236, "step": 1651 }, { "epoch": 0.38086455331412106, "grad_norm": 1.1459570976309312, "learning_rate": 1.8943221907526443e-06, "loss": 0.5646007657051086, "step": 1652 }, { "epoch": 0.38109510086455334, "grad_norm": 1.1175142171833454, "learning_rate": 1.8941515819295776e-06, "loss": 0.532716691493988, "step": 1653 }, { "epoch": 0.3813256484149856, "grad_norm": 1.2540916265885504, "learning_rate": 1.893980843195588e-06, "loss": 0.5148980021476746, "step": 1654 }, { "epoch": 0.38155619596541784, "grad_norm": 1.0547624939084206, "learning_rate": 1.8938099745754815e-06, "loss": 0.5159789323806763, "step": 1655 }, { "epoch": 0.3817867435158501, "grad_norm": 1.0842073433449368, "learning_rate": 1.8936389760940839e-06, "loss": 0.521435022354126, "step": 1656 }, { "epoch": 0.3820172910662824, "grad_norm": 1.1669048607685022, "learning_rate": 1.8934678477762395e-06, "loss": 0.5362331867218018, "step": 1657 }, { "epoch": 0.3822478386167147, "grad_norm": 0.9857130142976365, "learning_rate": 1.8932965896468113e-06, "loss": 0.5501196980476379, "step": 1658 }, { "epoch": 0.38247838616714697, "grad_norm": 1.3279799890705724, "learning_rate": 1.8931252017306813e-06, "loss": 0.5484409928321838, "step": 1659 }, { "epoch": 0.38270893371757925, "grad_norm": 1.2442187734169075, "learning_rate": 1.8929536840527507e-06, "loss": 0.6500132083892822, "step": 1660 }, { "epoch": 0.38293948126801153, "grad_norm": 1.3286091674792162, "learning_rate": 1.8927820366379388e-06, "loss": 0.513029158115387, "step": 1661 }, { "epoch": 0.3831700288184438, "grad_norm": 1.3694471654734355, "learning_rate": 1.8926102595111843e-06, "loss": 0.6097410917282104, "step": 1662 }, { "epoch": 0.3834005763688761, "grad_norm": 1.2096429751854834, "learning_rate": 1.8924383526974453e-06, "loss": 0.5139362812042236, "step": 1663 }, { "epoch": 0.38363112391930837, "grad_norm": 1.1695686636695835, "learning_rate": 1.892266316221697e-06, "loss": 0.4893265962600708, "step": 1664 }, { "epoch": 0.38386167146974065, "grad_norm": 1.1419708571669807, "learning_rate": 1.8920941501089352e-06, "loss": 0.5500860810279846, "step": 1665 }, { "epoch": 0.38409221902017293, "grad_norm": 1.2040433648252555, "learning_rate": 1.8919218543841736e-06, "loss": 0.46147310733795166, "step": 1666 }, { "epoch": 0.3843227665706052, "grad_norm": 1.2894839572530414, "learning_rate": 1.891749429072445e-06, "loss": 0.4785606265068054, "step": 1667 }, { "epoch": 0.38455331412103744, "grad_norm": 1.1550508785039575, "learning_rate": 1.8915768741988012e-06, "loss": 0.5280581712722778, "step": 1668 }, { "epoch": 0.3847838616714697, "grad_norm": 0.8004967971503981, "learning_rate": 1.8914041897883125e-06, "loss": 0.4316279888153076, "step": 1669 }, { "epoch": 0.385014409221902, "grad_norm": 0.9031817030472468, "learning_rate": 1.8912313758660679e-06, "loss": 0.4610823392868042, "step": 1670 }, { "epoch": 0.3852449567723343, "grad_norm": 1.1057745952132196, "learning_rate": 1.8910584324571758e-06, "loss": 0.5529364943504333, "step": 1671 }, { "epoch": 0.38547550432276656, "grad_norm": 1.1712489054484079, "learning_rate": 1.890885359586763e-06, "loss": 0.5393742322921753, "step": 1672 }, { "epoch": 0.38570605187319884, "grad_norm": 1.1889287828623867, "learning_rate": 1.890712157279975e-06, "loss": 0.5502661466598511, "step": 1673 }, { "epoch": 0.3859365994236311, "grad_norm": 1.1175808690641762, "learning_rate": 1.8905388255619764e-06, "loss": 0.5967349410057068, "step": 1674 }, { "epoch": 0.3861671469740634, "grad_norm": 0.9843385175883147, "learning_rate": 1.8903653644579508e-06, "loss": 0.5181038975715637, "step": 1675 }, { "epoch": 0.3863976945244957, "grad_norm": 1.2399078270539727, "learning_rate": 1.8901917739931e-06, "loss": 0.6069591641426086, "step": 1676 }, { "epoch": 0.38662824207492796, "grad_norm": 1.395380167151623, "learning_rate": 1.8900180541926445e-06, "loss": 0.4811745882034302, "step": 1677 }, { "epoch": 0.38685878962536024, "grad_norm": 1.1868848622007617, "learning_rate": 1.889844205081825e-06, "loss": 0.42992472648620605, "step": 1678 }, { "epoch": 0.3870893371757925, "grad_norm": 1.3846875348981704, "learning_rate": 1.889670226685899e-06, "loss": 0.515068531036377, "step": 1679 }, { "epoch": 0.3873198847262248, "grad_norm": 1.0504931066417105, "learning_rate": 1.889496119030144e-06, "loss": 0.5068717002868652, "step": 1680 }, { "epoch": 0.3875504322766571, "grad_norm": 1.204035195367357, "learning_rate": 1.8893218821398564e-06, "loss": 0.6236181259155273, "step": 1681 }, { "epoch": 0.3877809798270893, "grad_norm": 1.063984922450486, "learning_rate": 1.8891475160403508e-06, "loss": 0.5492556095123291, "step": 1682 }, { "epoch": 0.3880115273775216, "grad_norm": 1.2451795909219046, "learning_rate": 1.8889730207569605e-06, "loss": 0.5750234723091125, "step": 1683 }, { "epoch": 0.3882420749279539, "grad_norm": 1.283210442061239, "learning_rate": 1.8887983963150384e-06, "loss": 0.52640700340271, "step": 1684 }, { "epoch": 0.38847262247838615, "grad_norm": 1.144571674362394, "learning_rate": 1.8886236427399549e-06, "loss": 0.5916281342506409, "step": 1685 }, { "epoch": 0.38870317002881843, "grad_norm": 1.2712652855122726, "learning_rate": 1.8884487600571007e-06, "loss": 0.5805083513259888, "step": 1686 }, { "epoch": 0.3889337175792507, "grad_norm": 1.1516486375209825, "learning_rate": 1.8882737482918838e-06, "loss": 0.47049853205680847, "step": 1687 }, { "epoch": 0.389164265129683, "grad_norm": 1.2991101405673497, "learning_rate": 1.8880986074697318e-06, "loss": 0.5835333466529846, "step": 1688 }, { "epoch": 0.3893948126801153, "grad_norm": 1.1881783272928648, "learning_rate": 1.8879233376160907e-06, "loss": 0.4839910864830017, "step": 1689 }, { "epoch": 0.38962536023054756, "grad_norm": 1.3683999792816708, "learning_rate": 1.8877479387564258e-06, "loss": 0.5125032663345337, "step": 1690 }, { "epoch": 0.38985590778097984, "grad_norm": 0.9692753104780237, "learning_rate": 1.8875724109162203e-06, "loss": 0.595876932144165, "step": 1691 }, { "epoch": 0.3900864553314121, "grad_norm": 1.1077982199461915, "learning_rate": 1.887396754120977e-06, "loss": 0.5293446779251099, "step": 1692 }, { "epoch": 0.3903170028818444, "grad_norm": 0.947630233327163, "learning_rate": 1.8872209683962163e-06, "loss": 0.5455681681632996, "step": 1693 }, { "epoch": 0.3905475504322767, "grad_norm": 1.1346104082787818, "learning_rate": 1.8870450537674787e-06, "loss": 0.596378743648529, "step": 1694 }, { "epoch": 0.39077809798270896, "grad_norm": 1.1455389251374453, "learning_rate": 1.8868690102603226e-06, "loss": 0.49379733204841614, "step": 1695 }, { "epoch": 0.3910086455331412, "grad_norm": 1.2108182415680409, "learning_rate": 1.8866928379003251e-06, "loss": 0.4551328420639038, "step": 1696 }, { "epoch": 0.39123919308357347, "grad_norm": 1.4250898609027278, "learning_rate": 1.8865165367130822e-06, "loss": 0.54606693983078, "step": 1697 }, { "epoch": 0.39146974063400575, "grad_norm": 1.008628556444834, "learning_rate": 1.886340106724209e-06, "loss": 0.5410532355308533, "step": 1698 }, { "epoch": 0.39170028818443803, "grad_norm": 1.1387013906480672, "learning_rate": 1.8861635479593386e-06, "loss": 0.533348560333252, "step": 1699 }, { "epoch": 0.3919308357348703, "grad_norm": 1.2000257753384949, "learning_rate": 1.8859868604441233e-06, "loss": 0.48270243406295776, "step": 1700 }, { "epoch": 0.3921613832853026, "grad_norm": 1.6970909551883837, "learning_rate": 1.8858100442042339e-06, "loss": 0.6110135316848755, "step": 1701 }, { "epoch": 0.39239193083573487, "grad_norm": 1.2287166717299047, "learning_rate": 1.8856330992653603e-06, "loss": 0.4867699146270752, "step": 1702 }, { "epoch": 0.39262247838616715, "grad_norm": 1.0530809153005494, "learning_rate": 1.8854560256532098e-06, "loss": 0.5660721659660339, "step": 1703 }, { "epoch": 0.39285302593659943, "grad_norm": 1.3131541890601917, "learning_rate": 1.8852788233935102e-06, "loss": 0.5702673196792603, "step": 1704 }, { "epoch": 0.3930835734870317, "grad_norm": 1.0760886702069608, "learning_rate": 1.8851014925120071e-06, "loss": 0.6213070154190063, "step": 1705 }, { "epoch": 0.393314121037464, "grad_norm": 1.1645752082269338, "learning_rate": 1.8849240330344647e-06, "loss": 0.5179423689842224, "step": 1706 }, { "epoch": 0.3935446685878963, "grad_norm": 1.1314191100724993, "learning_rate": 1.884746444986666e-06, "loss": 0.5448884963989258, "step": 1707 }, { "epoch": 0.39377521613832855, "grad_norm": 1.7831366963927755, "learning_rate": 1.8845687283944124e-06, "loss": 0.4889717698097229, "step": 1708 }, { "epoch": 0.3940057636887608, "grad_norm": 1.0708754186574336, "learning_rate": 1.8843908832835248e-06, "loss": 0.5224671363830566, "step": 1709 }, { "epoch": 0.39423631123919306, "grad_norm": 1.1316735747750348, "learning_rate": 1.8842129096798418e-06, "loss": 0.4966825246810913, "step": 1710 }, { "epoch": 0.39446685878962534, "grad_norm": 1.1170822014486683, "learning_rate": 1.8840348076092215e-06, "loss": 0.425929993391037, "step": 1711 }, { "epoch": 0.3946974063400576, "grad_norm": 1.1640213717882835, "learning_rate": 1.8838565770975399e-06, "loss": 0.5540965795516968, "step": 1712 }, { "epoch": 0.3949279538904899, "grad_norm": 0.944462132080172, "learning_rate": 1.8836782181706922e-06, "loss": 0.5570763349533081, "step": 1713 }, { "epoch": 0.3951585014409222, "grad_norm": 1.0849748508948478, "learning_rate": 1.8834997308545924e-06, "loss": 0.5277444124221802, "step": 1714 }, { "epoch": 0.39538904899135446, "grad_norm": 1.2193838491486786, "learning_rate": 1.8833211151751724e-06, "loss": 0.5402891635894775, "step": 1715 }, { "epoch": 0.39561959654178674, "grad_norm": 1.0252111569197984, "learning_rate": 1.8831423711583834e-06, "loss": 0.6028883457183838, "step": 1716 }, { "epoch": 0.395850144092219, "grad_norm": 1.2831614100796882, "learning_rate": 1.882963498830195e-06, "loss": 0.5924968719482422, "step": 1717 }, { "epoch": 0.3960806916426513, "grad_norm": 1.0174586090749438, "learning_rate": 1.8827844982165955e-06, "loss": 0.5173168778419495, "step": 1718 }, { "epoch": 0.3963112391930836, "grad_norm": 1.165042756566303, "learning_rate": 1.882605369343592e-06, "loss": 0.47797566652297974, "step": 1719 }, { "epoch": 0.39654178674351587, "grad_norm": 1.131691075857789, "learning_rate": 1.8824261122372095e-06, "loss": 0.5594542026519775, "step": 1720 }, { "epoch": 0.39677233429394815, "grad_norm": 1.2640007167386567, "learning_rate": 1.882246726923493e-06, "loss": 0.4570848345756531, "step": 1721 }, { "epoch": 0.39700288184438043, "grad_norm": 0.971236274675553, "learning_rate": 1.8820672134285048e-06, "loss": 0.48942142724990845, "step": 1722 }, { "epoch": 0.39723342939481265, "grad_norm": 1.0417289219929287, "learning_rate": 1.8818875717783263e-06, "loss": 0.5194408893585205, "step": 1723 }, { "epoch": 0.39746397694524493, "grad_norm": 1.3905898231547866, "learning_rate": 1.8817078019990577e-06, "loss": 0.5944944024085999, "step": 1724 }, { "epoch": 0.3976945244956772, "grad_norm": 1.0798692984180562, "learning_rate": 1.881527904116818e-06, "loss": 0.46111130714416504, "step": 1725 }, { "epoch": 0.3979250720461095, "grad_norm": 1.0231368964738081, "learning_rate": 1.881347878157744e-06, "loss": 0.45540904998779297, "step": 1726 }, { "epoch": 0.3981556195965418, "grad_norm": 1.069281496320294, "learning_rate": 1.8811677241479918e-06, "loss": 0.5093264579772949, "step": 1727 }, { "epoch": 0.39838616714697406, "grad_norm": 1.2516879690636435, "learning_rate": 1.8809874421137358e-06, "loss": 0.5825635194778442, "step": 1728 }, { "epoch": 0.39861671469740634, "grad_norm": 1.1927287526332015, "learning_rate": 1.880807032081169e-06, "loss": 0.49300920963287354, "step": 1729 }, { "epoch": 0.3988472622478386, "grad_norm": 1.0829675696070233, "learning_rate": 1.8806264940765036e-06, "loss": 0.6261301040649414, "step": 1730 }, { "epoch": 0.3990778097982709, "grad_norm": 1.3103095493354275, "learning_rate": 1.8804458281259695e-06, "loss": 0.5493542551994324, "step": 1731 }, { "epoch": 0.3993083573487032, "grad_norm": 1.2025684089327158, "learning_rate": 1.8802650342558158e-06, "loss": 0.40386199951171875, "step": 1732 }, { "epoch": 0.39953890489913546, "grad_norm": 1.0567308440121088, "learning_rate": 1.8800841124923097e-06, "loss": 0.6049227118492126, "step": 1733 }, { "epoch": 0.39976945244956774, "grad_norm": 1.2561339846834512, "learning_rate": 1.8799030628617373e-06, "loss": 0.49980294704437256, "step": 1734 }, { "epoch": 0.4, "grad_norm": 1.3692289290302153, "learning_rate": 1.8797218853904035e-06, "loss": 0.5892654657363892, "step": 1735 }, { "epoch": 0.4002305475504323, "grad_norm": 1.3450627943725142, "learning_rate": 1.8795405801046314e-06, "loss": 0.566817045211792, "step": 1736 }, { "epoch": 0.40046109510086453, "grad_norm": 1.0535742982367924, "learning_rate": 1.8793591470307626e-06, "loss": 0.5093469619750977, "step": 1737 }, { "epoch": 0.4006916426512968, "grad_norm": 1.282782359379558, "learning_rate": 1.8791775861951574e-06, "loss": 0.5166475176811218, "step": 1738 }, { "epoch": 0.4009221902017291, "grad_norm": 1.6381884392348767, "learning_rate": 1.8789958976241946e-06, "loss": 0.4210118055343628, "step": 1739 }, { "epoch": 0.40115273775216137, "grad_norm": 1.1490169896035265, "learning_rate": 1.8788140813442724e-06, "loss": 0.48690280318260193, "step": 1740 }, { "epoch": 0.40138328530259365, "grad_norm": 1.2454394412870222, "learning_rate": 1.878632137381806e-06, "loss": 0.5239908695220947, "step": 1741 }, { "epoch": 0.40161383285302593, "grad_norm": 1.167282839522106, "learning_rate": 1.8784500657632304e-06, "loss": 0.5424253940582275, "step": 1742 }, { "epoch": 0.4018443804034582, "grad_norm": 1.4221599808285748, "learning_rate": 1.8782678665149986e-06, "loss": 0.570236325263977, "step": 1743 }, { "epoch": 0.4020749279538905, "grad_norm": 1.2291084700951356, "learning_rate": 1.8780855396635821e-06, "loss": 0.6240546703338623, "step": 1744 }, { "epoch": 0.4023054755043228, "grad_norm": 1.259784949833717, "learning_rate": 1.8779030852354713e-06, "loss": 0.49374204874038696, "step": 1745 }, { "epoch": 0.40253602305475505, "grad_norm": 1.2162048883726875, "learning_rate": 1.8777205032571749e-06, "loss": 0.5949456691741943, "step": 1746 }, { "epoch": 0.40276657060518734, "grad_norm": 1.2411044263502289, "learning_rate": 1.8775377937552199e-06, "loss": 0.4195283055305481, "step": 1747 }, { "epoch": 0.4029971181556196, "grad_norm": 1.024383998377649, "learning_rate": 1.8773549567561523e-06, "loss": 0.494783878326416, "step": 1748 }, { "epoch": 0.4032276657060519, "grad_norm": 0.9805394591840607, "learning_rate": 1.8771719922865369e-06, "loss": 0.5290952324867249, "step": 1749 }, { "epoch": 0.4034582132564842, "grad_norm": 1.1062918130028372, "learning_rate": 1.8769889003729558e-06, "loss": 0.5689871907234192, "step": 1750 }, { "epoch": 0.4036887608069164, "grad_norm": 0.9139238206795531, "learning_rate": 1.8768056810420104e-06, "loss": 0.48432207107543945, "step": 1751 }, { "epoch": 0.4039193083573487, "grad_norm": 1.0993885645774597, "learning_rate": 1.876622334320321e-06, "loss": 0.56418776512146, "step": 1752 }, { "epoch": 0.40414985590778096, "grad_norm": 0.9735263367246159, "learning_rate": 1.8764388602345257e-06, "loss": 0.5624086856842041, "step": 1753 }, { "epoch": 0.40438040345821324, "grad_norm": 1.4524594835395888, "learning_rate": 1.8762552588112816e-06, "loss": 0.5015785694122314, "step": 1754 }, { "epoch": 0.4046109510086455, "grad_norm": 0.9394639774386047, "learning_rate": 1.8760715300772638e-06, "loss": 0.41496244072914124, "step": 1755 }, { "epoch": 0.4048414985590778, "grad_norm": 1.4027706508763087, "learning_rate": 1.875887674059166e-06, "loss": 0.6052544713020325, "step": 1756 }, { "epoch": 0.4050720461095101, "grad_norm": 1.0389427837449594, "learning_rate": 1.8757036907837009e-06, "loss": 0.5018082857131958, "step": 1757 }, { "epoch": 0.40530259365994237, "grad_norm": 1.2000927575372657, "learning_rate": 1.8755195802775996e-06, "loss": 0.5213236808776855, "step": 1758 }, { "epoch": 0.40553314121037465, "grad_norm": 1.0043625854035514, "learning_rate": 1.875335342567611e-06, "loss": 0.5396578907966614, "step": 1759 }, { "epoch": 0.40576368876080693, "grad_norm": 1.1447196519735834, "learning_rate": 1.8751509776805029e-06, "loss": 0.4654269814491272, "step": 1760 }, { "epoch": 0.4059942363112392, "grad_norm": 1.0888985719941529, "learning_rate": 1.8749664856430618e-06, "loss": 0.5281137228012085, "step": 1761 }, { "epoch": 0.4062247838616715, "grad_norm": 1.088793787907681, "learning_rate": 1.8747818664820927e-06, "loss": 0.6041824817657471, "step": 1762 }, { "epoch": 0.40645533141210377, "grad_norm": 0.9910156594308228, "learning_rate": 1.8745971202244184e-06, "loss": 0.45012253522872925, "step": 1763 }, { "epoch": 0.406685878962536, "grad_norm": 0.9756039486828318, "learning_rate": 1.8744122468968806e-06, "loss": 0.5526795387268066, "step": 1764 }, { "epoch": 0.4069164265129683, "grad_norm": 0.9925167062427037, "learning_rate": 1.8742272465263399e-06, "loss": 0.4435810446739197, "step": 1765 }, { "epoch": 0.40714697406340056, "grad_norm": 1.1848709843080019, "learning_rate": 1.8740421191396746e-06, "loss": 0.4480190873146057, "step": 1766 }, { "epoch": 0.40737752161383284, "grad_norm": 1.0852081990027085, "learning_rate": 1.8738568647637819e-06, "loss": 0.6167557239532471, "step": 1767 }, { "epoch": 0.4076080691642651, "grad_norm": 1.2101115264274098, "learning_rate": 1.873671483425577e-06, "loss": 0.5288156270980835, "step": 1768 }, { "epoch": 0.4078386167146974, "grad_norm": 1.1485331887340928, "learning_rate": 1.8734859751519948e-06, "loss": 0.5483111143112183, "step": 1769 }, { "epoch": 0.4080691642651297, "grad_norm": 1.0072561211275919, "learning_rate": 1.8733003399699868e-06, "loss": 0.5418217778205872, "step": 1770 }, { "epoch": 0.40829971181556196, "grad_norm": 1.123107557275528, "learning_rate": 1.873114577906524e-06, "loss": 0.5031202435493469, "step": 1771 }, { "epoch": 0.40853025936599424, "grad_norm": 1.2830585286564486, "learning_rate": 1.8729286889885959e-06, "loss": 0.5669084787368774, "step": 1772 }, { "epoch": 0.4087608069164265, "grad_norm": 1.320215056578161, "learning_rate": 1.87274267324321e-06, "loss": 0.6212494373321533, "step": 1773 }, { "epoch": 0.4089913544668588, "grad_norm": 0.9044281123557663, "learning_rate": 1.872556530697393e-06, "loss": 0.47481727600097656, "step": 1774 }, { "epoch": 0.4092219020172911, "grad_norm": 1.0800809902848345, "learning_rate": 1.872370261378189e-06, "loss": 0.6407462358474731, "step": 1775 }, { "epoch": 0.40945244956772336, "grad_norm": 1.0727598714512039, "learning_rate": 1.8721838653126613e-06, "loss": 0.5465847849845886, "step": 1776 }, { "epoch": 0.40968299711815565, "grad_norm": 1.5052681429553312, "learning_rate": 1.871997342527891e-06, "loss": 0.5464286804199219, "step": 1777 }, { "epoch": 0.40991354466858787, "grad_norm": 1.1767943613463163, "learning_rate": 1.8718106930509778e-06, "loss": 0.46753352880477905, "step": 1778 }, { "epoch": 0.41014409221902015, "grad_norm": 0.973706053688001, "learning_rate": 1.8716239169090406e-06, "loss": 0.4888344705104828, "step": 1779 }, { "epoch": 0.41037463976945243, "grad_norm": 1.1283990917005553, "learning_rate": 1.8714370141292153e-06, "loss": 0.4504971504211426, "step": 1780 }, { "epoch": 0.4106051873198847, "grad_norm": 0.9474408538787359, "learning_rate": 1.8712499847386574e-06, "loss": 0.403839111328125, "step": 1781 }, { "epoch": 0.410835734870317, "grad_norm": 1.2416666688249398, "learning_rate": 1.8710628287645398e-06, "loss": 0.5075634717941284, "step": 1782 }, { "epoch": 0.4110662824207493, "grad_norm": 1.0201988702930243, "learning_rate": 1.870875546234055e-06, "loss": 0.4988376498222351, "step": 1783 }, { "epoch": 0.41129682997118155, "grad_norm": 1.1287965100043822, "learning_rate": 1.8706881371744128e-06, "loss": 0.5289707183837891, "step": 1784 }, { "epoch": 0.41152737752161384, "grad_norm": 1.1288272138801552, "learning_rate": 1.8705006016128418e-06, "loss": 0.48820483684539795, "step": 1785 }, { "epoch": 0.4117579250720461, "grad_norm": 1.1021300108323646, "learning_rate": 1.870312939576589e-06, "loss": 0.523395836353302, "step": 1786 }, { "epoch": 0.4119884726224784, "grad_norm": 1.0502779868402847, "learning_rate": 1.8701251510929197e-06, "loss": 0.5213379263877869, "step": 1787 }, { "epoch": 0.4122190201729107, "grad_norm": 1.51700777394792, "learning_rate": 1.869937236189118e-06, "loss": 0.5818710923194885, "step": 1788 }, { "epoch": 0.41244956772334296, "grad_norm": 1.217213297698164, "learning_rate": 1.8697491948924854e-06, "loss": 0.599867045879364, "step": 1789 }, { "epoch": 0.41268011527377524, "grad_norm": 1.1893745188668343, "learning_rate": 1.8695610272303426e-06, "loss": 0.562250018119812, "step": 1790 }, { "epoch": 0.4129106628242075, "grad_norm": 1.054818186219627, "learning_rate": 1.8693727332300285e-06, "loss": 0.5593204498291016, "step": 1791 }, { "epoch": 0.41314121037463974, "grad_norm": 1.0866589405542582, "learning_rate": 1.8691843129189e-06, "loss": 0.585111141204834, "step": 1792 }, { "epoch": 0.413371757925072, "grad_norm": 1.1845299989706493, "learning_rate": 1.868995766324333e-06, "loss": 0.582213282585144, "step": 1793 }, { "epoch": 0.4136023054755043, "grad_norm": 1.1273952476109894, "learning_rate": 1.868807093473721e-06, "loss": 0.5196056365966797, "step": 1794 }, { "epoch": 0.4138328530259366, "grad_norm": 1.6255229812391085, "learning_rate": 1.8686182943944764e-06, "loss": 0.5141228437423706, "step": 1795 }, { "epoch": 0.41406340057636887, "grad_norm": 1.0388777012541068, "learning_rate": 1.8684293691140296e-06, "loss": 0.5710434913635254, "step": 1796 }, { "epoch": 0.41429394812680115, "grad_norm": 1.1143205444284763, "learning_rate": 1.8682403176598296e-06, "loss": 0.5534354448318481, "step": 1797 }, { "epoch": 0.41452449567723343, "grad_norm": 1.118968020644909, "learning_rate": 1.8680511400593435e-06, "loss": 0.4919063448905945, "step": 1798 }, { "epoch": 0.4147550432276657, "grad_norm": 1.0919336938400037, "learning_rate": 1.867861836340057e-06, "loss": 0.5988746881484985, "step": 1799 }, { "epoch": 0.414985590778098, "grad_norm": 1.0860449740836524, "learning_rate": 1.867672406529474e-06, "loss": 0.4136649966239929, "step": 1800 }, { "epoch": 0.41521613832853027, "grad_norm": 1.0694509491683928, "learning_rate": 1.8674828506551164e-06, "loss": 0.539253294467926, "step": 1801 }, { "epoch": 0.41544668587896255, "grad_norm": 1.05460638777189, "learning_rate": 1.867293168744525e-06, "loss": 0.5144013166427612, "step": 1802 }, { "epoch": 0.41567723342939483, "grad_norm": 1.4412084541043333, "learning_rate": 1.8671033608252583e-06, "loss": 0.5293606519699097, "step": 1803 }, { "epoch": 0.4159077809798271, "grad_norm": 1.2991891107249438, "learning_rate": 1.8669134269248933e-06, "loss": 0.5156667232513428, "step": 1804 }, { "epoch": 0.4161383285302594, "grad_norm": 1.459589715601901, "learning_rate": 1.8667233670710258e-06, "loss": 0.5277712345123291, "step": 1805 }, { "epoch": 0.4163688760806916, "grad_norm": 1.109858006621566, "learning_rate": 1.8665331812912699e-06, "loss": 0.5984486937522888, "step": 1806 }, { "epoch": 0.4165994236311239, "grad_norm": 1.1513901451974835, "learning_rate": 1.8663428696132567e-06, "loss": 0.626596212387085, "step": 1807 }, { "epoch": 0.4168299711815562, "grad_norm": 1.1512225176078512, "learning_rate": 1.866152432064637e-06, "loss": 0.5107407569885254, "step": 1808 }, { "epoch": 0.41706051873198846, "grad_norm": 1.0987389433084875, "learning_rate": 1.8659618686730794e-06, "loss": 0.5654654502868652, "step": 1809 }, { "epoch": 0.41729106628242074, "grad_norm": 1.0577605574421043, "learning_rate": 1.8657711794662706e-06, "loss": 0.49788808822631836, "step": 1810 }, { "epoch": 0.417521613832853, "grad_norm": 0.9387772619602837, "learning_rate": 1.8655803644719158e-06, "loss": 0.5334138870239258, "step": 1811 }, { "epoch": 0.4177521613832853, "grad_norm": 1.143732906797826, "learning_rate": 1.8653894237177387e-06, "loss": 0.5429027080535889, "step": 1812 }, { "epoch": 0.4179827089337176, "grad_norm": 1.2110297581702978, "learning_rate": 1.8651983572314806e-06, "loss": 0.5475035905838013, "step": 1813 }, { "epoch": 0.41821325648414986, "grad_norm": 0.8994137789901391, "learning_rate": 1.8650071650409021e-06, "loss": 0.5310901403427124, "step": 1814 }, { "epoch": 0.41844380403458215, "grad_norm": 1.1148250587589583, "learning_rate": 1.8648158471737806e-06, "loss": 0.5586632490158081, "step": 1815 }, { "epoch": 0.4186743515850144, "grad_norm": 1.1882298726835794, "learning_rate": 1.8646244036579132e-06, "loss": 0.5620261430740356, "step": 1816 }, { "epoch": 0.4189048991354467, "grad_norm": 1.1768360700686766, "learning_rate": 1.8644328345211141e-06, "loss": 0.46965062618255615, "step": 1817 }, { "epoch": 0.419135446685879, "grad_norm": 1.0420774163319328, "learning_rate": 1.864241139791217e-06, "loss": 0.6193602085113525, "step": 1818 }, { "epoch": 0.4193659942363112, "grad_norm": 1.4471262040360602, "learning_rate": 1.8640493194960726e-06, "loss": 0.555870532989502, "step": 1819 }, { "epoch": 0.4195965417867435, "grad_norm": 1.4906810572373932, "learning_rate": 1.863857373663551e-06, "loss": 0.6341157555580139, "step": 1820 }, { "epoch": 0.4198270893371758, "grad_norm": 1.0990483949708958, "learning_rate": 1.8636653023215392e-06, "loss": 0.5381972789764404, "step": 1821 }, { "epoch": 0.42005763688760805, "grad_norm": 1.0324608703235092, "learning_rate": 1.8634731054979435e-06, "loss": 0.6126211881637573, "step": 1822 }, { "epoch": 0.42028818443804034, "grad_norm": 1.2532481877249375, "learning_rate": 1.8632807832206884e-06, "loss": 0.5185278058052063, "step": 1823 }, { "epoch": 0.4205187319884726, "grad_norm": 1.179777187779905, "learning_rate": 1.8630883355177156e-06, "loss": 0.5594161748886108, "step": 1824 }, { "epoch": 0.4207492795389049, "grad_norm": 1.0743944299141273, "learning_rate": 1.8628957624169863e-06, "loss": 0.46694353222846985, "step": 1825 }, { "epoch": 0.4209798270893372, "grad_norm": 0.9352998625668567, "learning_rate": 1.8627030639464794e-06, "loss": 0.5055704116821289, "step": 1826 }, { "epoch": 0.42121037463976946, "grad_norm": 0.956496667483655, "learning_rate": 1.8625102401341919e-06, "loss": 0.5006394386291504, "step": 1827 }, { "epoch": 0.42144092219020174, "grad_norm": 1.0724067921691902, "learning_rate": 1.8623172910081388e-06, "loss": 0.4366666078567505, "step": 1828 }, { "epoch": 0.421671469740634, "grad_norm": 1.1988055621631712, "learning_rate": 1.8621242165963539e-06, "loss": 0.521892786026001, "step": 1829 }, { "epoch": 0.4219020172910663, "grad_norm": 0.9264285940430287, "learning_rate": 1.8619310169268889e-06, "loss": 0.4993744492530823, "step": 1830 }, { "epoch": 0.4221325648414986, "grad_norm": 1.127882483842638, "learning_rate": 1.8617376920278134e-06, "loss": 0.5524100065231323, "step": 1831 }, { "epoch": 0.42236311239193086, "grad_norm": 1.0452957425401903, "learning_rate": 1.8615442419272158e-06, "loss": 0.5477861166000366, "step": 1832 }, { "epoch": 0.4225936599423631, "grad_norm": 1.1661409117320833, "learning_rate": 1.8613506666532026e-06, "loss": 0.534308910369873, "step": 1833 }, { "epoch": 0.42282420749279537, "grad_norm": 0.937996924949033, "learning_rate": 1.861156966233898e-06, "loss": 0.537063479423523, "step": 1834 }, { "epoch": 0.42305475504322765, "grad_norm": 1.238788764410924, "learning_rate": 1.8609631406974441e-06, "loss": 0.5953266620635986, "step": 1835 }, { "epoch": 0.42328530259365993, "grad_norm": 1.0946370940172376, "learning_rate": 1.8607691900720028e-06, "loss": 0.5244371891021729, "step": 1836 }, { "epoch": 0.4235158501440922, "grad_norm": 0.9951162728945512, "learning_rate": 1.8605751143857525e-06, "loss": 0.5036677122116089, "step": 1837 }, { "epoch": 0.4237463976945245, "grad_norm": 1.2314528252971808, "learning_rate": 1.8603809136668901e-06, "loss": 0.5777844190597534, "step": 1838 }, { "epoch": 0.42397694524495677, "grad_norm": 1.0285277747549548, "learning_rate": 1.8601865879436315e-06, "loss": 0.5430940985679626, "step": 1839 }, { "epoch": 0.42420749279538905, "grad_norm": 0.992815671689248, "learning_rate": 1.8599921372442101e-06, "loss": 0.5749884843826294, "step": 1840 }, { "epoch": 0.42443804034582133, "grad_norm": 1.115697410389442, "learning_rate": 1.8597975615968778e-06, "loss": 0.6325528621673584, "step": 1841 }, { "epoch": 0.4246685878962536, "grad_norm": 1.165073474333435, "learning_rate": 1.8596028610299037e-06, "loss": 0.498636931180954, "step": 1842 }, { "epoch": 0.4248991354466859, "grad_norm": 1.0192254593494965, "learning_rate": 1.8594080355715763e-06, "loss": 0.47933921217918396, "step": 1843 }, { "epoch": 0.4251296829971182, "grad_norm": 1.0742766403042006, "learning_rate": 1.8592130852502015e-06, "loss": 0.560950756072998, "step": 1844 }, { "epoch": 0.42536023054755046, "grad_norm": 1.0761528552561603, "learning_rate": 1.8590180100941035e-06, "loss": 0.4497816562652588, "step": 1845 }, { "epoch": 0.42559077809798274, "grad_norm": 1.043194198187338, "learning_rate": 1.858822810131625e-06, "loss": 0.5248370170593262, "step": 1846 }, { "epoch": 0.42582132564841496, "grad_norm": 1.2610988442234758, "learning_rate": 1.8586274853911263e-06, "loss": 0.5351696014404297, "step": 1847 }, { "epoch": 0.42605187319884724, "grad_norm": 0.9804766807097018, "learning_rate": 1.8584320359009861e-06, "loss": 0.4677377939224243, "step": 1848 }, { "epoch": 0.4262824207492795, "grad_norm": 1.0904414335677286, "learning_rate": 1.8582364616896014e-06, "loss": 0.5866556763648987, "step": 1849 }, { "epoch": 0.4265129682997118, "grad_norm": 1.0169583686468386, "learning_rate": 1.8580407627853864e-06, "loss": 0.5760546922683716, "step": 1850 }, { "epoch": 0.4267435158501441, "grad_norm": 1.2888742381353209, "learning_rate": 1.8578449392167749e-06, "loss": 0.5178868174552917, "step": 1851 }, { "epoch": 0.42697406340057636, "grad_norm": 1.102440281004502, "learning_rate": 1.8576489910122178e-06, "loss": 0.546269416809082, "step": 1852 }, { "epoch": 0.42720461095100865, "grad_norm": 1.18487190612363, "learning_rate": 1.8574529182001838e-06, "loss": 0.5659330487251282, "step": 1853 }, { "epoch": 0.4274351585014409, "grad_norm": 1.2802501916805824, "learning_rate": 1.8572567208091612e-06, "loss": 0.6241079568862915, "step": 1854 }, { "epoch": 0.4276657060518732, "grad_norm": 1.1080199645869717, "learning_rate": 1.8570603988676545e-06, "loss": 0.4504891633987427, "step": 1855 }, { "epoch": 0.4278962536023055, "grad_norm": 1.5835233717071364, "learning_rate": 1.856863952404188e-06, "loss": 0.5238963961601257, "step": 1856 }, { "epoch": 0.42812680115273777, "grad_norm": 0.963983780535358, "learning_rate": 1.8566673814473027e-06, "loss": 0.45989540219306946, "step": 1857 }, { "epoch": 0.42835734870317005, "grad_norm": 1.1207149880393046, "learning_rate": 1.856470686025559e-06, "loss": 0.5468976497650146, "step": 1858 }, { "epoch": 0.42858789625360233, "grad_norm": 1.1600381921463452, "learning_rate": 1.8562738661675342e-06, "loss": 0.5318598747253418, "step": 1859 }, { "epoch": 0.42881844380403455, "grad_norm": 1.2406291189558882, "learning_rate": 1.856076921901824e-06, "loss": 0.5824429988861084, "step": 1860 }, { "epoch": 0.42904899135446684, "grad_norm": 1.352026717175643, "learning_rate": 1.855879853257043e-06, "loss": 0.5112953186035156, "step": 1861 }, { "epoch": 0.4292795389048991, "grad_norm": 1.1707177653661884, "learning_rate": 1.8556826602618228e-06, "loss": 0.4736165404319763, "step": 1862 }, { "epoch": 0.4295100864553314, "grad_norm": 1.0256476637781957, "learning_rate": 1.8554853429448132e-06, "loss": 0.5070540308952332, "step": 1863 }, { "epoch": 0.4297406340057637, "grad_norm": 1.323605946142347, "learning_rate": 1.855287901334683e-06, "loss": 0.5629868507385254, "step": 1864 }, { "epoch": 0.42997118155619596, "grad_norm": 1.2516434837227657, "learning_rate": 1.8550903354601178e-06, "loss": 0.556925892829895, "step": 1865 }, { "epoch": 0.43020172910662824, "grad_norm": 1.197839400328371, "learning_rate": 1.8548926453498228e-06, "loss": 0.5992434024810791, "step": 1866 }, { "epoch": 0.4304322766570605, "grad_norm": 1.0775701233448667, "learning_rate": 1.8546948310325195e-06, "loss": 0.5160760879516602, "step": 1867 }, { "epoch": 0.4306628242074928, "grad_norm": 1.4261590730738651, "learning_rate": 1.8544968925369479e-06, "loss": 0.4804280996322632, "step": 1868 }, { "epoch": 0.4308933717579251, "grad_norm": 1.0367943571108267, "learning_rate": 1.8542988298918675e-06, "loss": 0.546431303024292, "step": 1869 }, { "epoch": 0.43112391930835736, "grad_norm": 1.1416319892947229, "learning_rate": 1.8541006431260542e-06, "loss": 0.504807710647583, "step": 1870 }, { "epoch": 0.43135446685878964, "grad_norm": 0.9126882501045127, "learning_rate": 1.8539023322683023e-06, "loss": 0.5243191719055176, "step": 1871 }, { "epoch": 0.4315850144092219, "grad_norm": 1.0353650163291783, "learning_rate": 1.8537038973474245e-06, "loss": 0.5665335655212402, "step": 1872 }, { "epoch": 0.4318155619596542, "grad_norm": 0.9834001102421943, "learning_rate": 1.8535053383922516e-06, "loss": 0.5381483435630798, "step": 1873 }, { "epoch": 0.43204610951008643, "grad_norm": 1.1487056035004923, "learning_rate": 1.8533066554316317e-06, "loss": 0.6149561405181885, "step": 1874 }, { "epoch": 0.4322766570605187, "grad_norm": 1.2246632391367256, "learning_rate": 1.8531078484944315e-06, "loss": 0.46560800075531006, "step": 1875 }, { "epoch": 0.432507204610951, "grad_norm": 1.1446369126118372, "learning_rate": 1.8529089176095356e-06, "loss": 0.5057603120803833, "step": 1876 }, { "epoch": 0.43273775216138327, "grad_norm": 1.4054085121564255, "learning_rate": 1.8527098628058467e-06, "loss": 0.5998879671096802, "step": 1877 }, { "epoch": 0.43296829971181555, "grad_norm": 1.280386466022756, "learning_rate": 1.852510684112285e-06, "loss": 0.4326424300670624, "step": 1878 }, { "epoch": 0.43319884726224783, "grad_norm": 1.3156502516901807, "learning_rate": 1.8523113815577898e-06, "loss": 0.5775609016418457, "step": 1879 }, { "epoch": 0.4334293948126801, "grad_norm": 1.1983240449158061, "learning_rate": 1.852111955171317e-06, "loss": 0.4824531674385071, "step": 1880 }, { "epoch": 0.4336599423631124, "grad_norm": 1.0538528260927602, "learning_rate": 1.8519124049818415e-06, "loss": 0.5059521198272705, "step": 1881 }, { "epoch": 0.4338904899135447, "grad_norm": 1.3615012333103178, "learning_rate": 1.851712731018356e-06, "loss": 0.605829119682312, "step": 1882 }, { "epoch": 0.43412103746397696, "grad_norm": 1.1443647209997476, "learning_rate": 1.8515129333098707e-06, "loss": 0.5453581809997559, "step": 1883 }, { "epoch": 0.43435158501440924, "grad_norm": 0.9094855258644963, "learning_rate": 1.8513130118854144e-06, "loss": 0.4414307475090027, "step": 1884 }, { "epoch": 0.4345821325648415, "grad_norm": 1.187455945175179, "learning_rate": 1.8511129667740333e-06, "loss": 0.46538764238357544, "step": 1885 }, { "epoch": 0.4348126801152738, "grad_norm": 1.2074916052806453, "learning_rate": 1.8509127980047925e-06, "loss": 0.5243799686431885, "step": 1886 }, { "epoch": 0.4350432276657061, "grad_norm": 1.1750450903566612, "learning_rate": 1.8507125056067736e-06, "loss": 0.49447011947631836, "step": 1887 }, { "epoch": 0.4352737752161383, "grad_norm": 1.2669723260527122, "learning_rate": 1.8505120896090775e-06, "loss": 0.5576674342155457, "step": 1888 }, { "epoch": 0.4355043227665706, "grad_norm": 1.252977351011341, "learning_rate": 1.8503115500408226e-06, "loss": 0.534508466720581, "step": 1889 }, { "epoch": 0.43573487031700286, "grad_norm": 1.1653246216034023, "learning_rate": 1.8501108869311452e-06, "loss": 0.6251751184463501, "step": 1890 }, { "epoch": 0.43596541786743515, "grad_norm": 1.248545901064603, "learning_rate": 1.8499101003091993e-06, "loss": 0.5061008334159851, "step": 1891 }, { "epoch": 0.4361959654178674, "grad_norm": 1.3943513634974303, "learning_rate": 1.8497091902041573e-06, "loss": 0.5893880128860474, "step": 1892 }, { "epoch": 0.4364265129682997, "grad_norm": 1.1272604118781044, "learning_rate": 1.8495081566452093e-06, "loss": 0.4692481458187103, "step": 1893 }, { "epoch": 0.436657060518732, "grad_norm": 1.1428179360440205, "learning_rate": 1.8493069996615633e-06, "loss": 0.5942026376724243, "step": 1894 }, { "epoch": 0.43688760806916427, "grad_norm": 1.006636208596501, "learning_rate": 1.8491057192824456e-06, "loss": 0.45053642988204956, "step": 1895 }, { "epoch": 0.43711815561959655, "grad_norm": 1.2982117381258422, "learning_rate": 1.8489043155371e-06, "loss": 0.5858089327812195, "step": 1896 }, { "epoch": 0.43734870317002883, "grad_norm": 1.4148475744082645, "learning_rate": 1.8487027884547878e-06, "loss": 0.5926138162612915, "step": 1897 }, { "epoch": 0.4375792507204611, "grad_norm": 1.1520460405179238, "learning_rate": 1.8485011380647898e-06, "loss": 0.5945650339126587, "step": 1898 }, { "epoch": 0.4378097982708934, "grad_norm": 1.082365437697179, "learning_rate": 1.8482993643964033e-06, "loss": 0.47609788179397583, "step": 1899 }, { "epoch": 0.43804034582132567, "grad_norm": 1.1243277241033836, "learning_rate": 1.8480974674789435e-06, "loss": 0.53432697057724, "step": 1900 }, { "epoch": 0.43827089337175795, "grad_norm": 1.1561114509211146, "learning_rate": 1.8478954473417448e-06, "loss": 0.5097007155418396, "step": 1901 }, { "epoch": 0.4385014409221902, "grad_norm": 1.2348452315788374, "learning_rate": 1.8476933040141573e-06, "loss": 0.4931800365447998, "step": 1902 }, { "epoch": 0.43873198847262246, "grad_norm": 1.08234776361571, "learning_rate": 1.8474910375255516e-06, "loss": 0.4183500409126282, "step": 1903 }, { "epoch": 0.43896253602305474, "grad_norm": 1.4177312445162358, "learning_rate": 1.8472886479053144e-06, "loss": 0.5541513562202454, "step": 1904 }, { "epoch": 0.439193083573487, "grad_norm": 1.2624803894699181, "learning_rate": 1.8470861351828508e-06, "loss": 0.5068531632423401, "step": 1905 }, { "epoch": 0.4394236311239193, "grad_norm": 1.2393049802147562, "learning_rate": 1.8468834993875837e-06, "loss": 0.5673441886901855, "step": 1906 }, { "epoch": 0.4396541786743516, "grad_norm": 1.243028720976925, "learning_rate": 1.8466807405489543e-06, "loss": 0.5551744699478149, "step": 1907 }, { "epoch": 0.43988472622478386, "grad_norm": 1.2837612370283014, "learning_rate": 1.846477858696421e-06, "loss": 0.5674556493759155, "step": 1908 }, { "epoch": 0.44011527377521614, "grad_norm": 1.1781882177034535, "learning_rate": 1.8462748538594606e-06, "loss": 0.5947737097740173, "step": 1909 }, { "epoch": 0.4403458213256484, "grad_norm": 1.1459887068925942, "learning_rate": 1.8460717260675675e-06, "loss": 0.4657576084136963, "step": 1910 }, { "epoch": 0.4405763688760807, "grad_norm": 1.1740915126815885, "learning_rate": 1.8458684753502541e-06, "loss": 0.5272006988525391, "step": 1911 }, { "epoch": 0.440806916426513, "grad_norm": 0.9441836906401117, "learning_rate": 1.8456651017370507e-06, "loss": 0.43320992588996887, "step": 1912 }, { "epoch": 0.44103746397694527, "grad_norm": 1.303994087100911, "learning_rate": 1.8454616052575051e-06, "loss": 0.5666035413742065, "step": 1913 }, { "epoch": 0.44126801152737755, "grad_norm": 1.207816046586181, "learning_rate": 1.845257985941184e-06, "loss": 0.4790865182876587, "step": 1914 }, { "epoch": 0.44149855907780977, "grad_norm": 1.011869278505435, "learning_rate": 1.8450542438176702e-06, "loss": 0.47281613945961, "step": 1915 }, { "epoch": 0.44172910662824205, "grad_norm": 1.176772034844018, "learning_rate": 1.8448503789165656e-06, "loss": 0.460035115480423, "step": 1916 }, { "epoch": 0.44195965417867433, "grad_norm": 1.422851587462456, "learning_rate": 1.8446463912674898e-06, "loss": 0.5391891002655029, "step": 1917 }, { "epoch": 0.4421902017291066, "grad_norm": 1.0200200858868254, "learning_rate": 1.84444228090008e-06, "loss": 0.5077770352363586, "step": 1918 }, { "epoch": 0.4424207492795389, "grad_norm": 1.1154128808888242, "learning_rate": 1.8442380478439914e-06, "loss": 0.4816160202026367, "step": 1919 }, { "epoch": 0.4426512968299712, "grad_norm": 1.440872793963344, "learning_rate": 1.844033692128897e-06, "loss": 0.5676149725914001, "step": 1920 }, { "epoch": 0.44288184438040346, "grad_norm": 1.2363025568528492, "learning_rate": 1.843829213784487e-06, "loss": 0.5344497561454773, "step": 1921 }, { "epoch": 0.44311239193083574, "grad_norm": 0.9579491754815904, "learning_rate": 1.843624612840471e-06, "loss": 0.48390740156173706, "step": 1922 }, { "epoch": 0.443342939481268, "grad_norm": 0.9939274394683353, "learning_rate": 1.8434198893265744e-06, "loss": 0.47681474685668945, "step": 1923 }, { "epoch": 0.4435734870317003, "grad_norm": 1.3607634076805728, "learning_rate": 1.843215043272542e-06, "loss": 0.5424403548240662, "step": 1924 }, { "epoch": 0.4438040345821326, "grad_norm": 1.132944592162935, "learning_rate": 1.8430100747081357e-06, "loss": 0.5096845030784607, "step": 1925 }, { "epoch": 0.44403458213256486, "grad_norm": 1.2412079673939016, "learning_rate": 1.842804983663135e-06, "loss": 0.4748343229293823, "step": 1926 }, { "epoch": 0.44426512968299714, "grad_norm": 1.1920300276188596, "learning_rate": 1.8425997701673377e-06, "loss": 0.498948335647583, "step": 1927 }, { "epoch": 0.4444956772334294, "grad_norm": 1.2541237667041538, "learning_rate": 1.842394434250559e-06, "loss": 0.5499871969223022, "step": 1928 }, { "epoch": 0.44472622478386165, "grad_norm": 1.0416927811353323, "learning_rate": 1.8421889759426327e-06, "loss": 0.5115629434585571, "step": 1929 }, { "epoch": 0.4449567723342939, "grad_norm": 1.125699371509029, "learning_rate": 1.841983395273409e-06, "loss": 0.6036213636398315, "step": 1930 }, { "epoch": 0.4451873198847262, "grad_norm": 1.0090846446480575, "learning_rate": 1.8417776922727572e-06, "loss": 0.5032718777656555, "step": 1931 }, { "epoch": 0.4454178674351585, "grad_norm": 1.094758440725449, "learning_rate": 1.8415718669705633e-06, "loss": 0.5102940797805786, "step": 1932 }, { "epoch": 0.44564841498559077, "grad_norm": 1.242891540202676, "learning_rate": 1.8413659193967322e-06, "loss": 0.5087441205978394, "step": 1933 }, { "epoch": 0.44587896253602305, "grad_norm": 1.0962110976461552, "learning_rate": 1.841159849581185e-06, "loss": 0.49124574661254883, "step": 1934 }, { "epoch": 0.44610951008645533, "grad_norm": 1.065512972031945, "learning_rate": 1.8409536575538627e-06, "loss": 0.4788215160369873, "step": 1935 }, { "epoch": 0.4463400576368876, "grad_norm": 1.0673448954130573, "learning_rate": 1.8407473433447218e-06, "loss": 0.47166967391967773, "step": 1936 }, { "epoch": 0.4465706051873199, "grad_norm": 1.2748997262930661, "learning_rate": 1.840540906983738e-06, "loss": 0.5045751333236694, "step": 1937 }, { "epoch": 0.44680115273775217, "grad_norm": 1.286853008681351, "learning_rate": 1.8403343485009044e-06, "loss": 0.45810800790786743, "step": 1938 }, { "epoch": 0.44703170028818445, "grad_norm": 1.1970009042005485, "learning_rate": 1.840127667926232e-06, "loss": 0.5283209085464478, "step": 1939 }, { "epoch": 0.44726224783861673, "grad_norm": 1.1262937811137792, "learning_rate": 1.8399208652897492e-06, "loss": 0.5559916496276855, "step": 1940 }, { "epoch": 0.447492795389049, "grad_norm": 1.1722580551533806, "learning_rate": 1.839713940621502e-06, "loss": 0.5017634034156799, "step": 1941 }, { "epoch": 0.4477233429394813, "grad_norm": 1.1582944210519264, "learning_rate": 1.8395068939515545e-06, "loss": 0.4802021384239197, "step": 1942 }, { "epoch": 0.4479538904899135, "grad_norm": 1.3831202159733433, "learning_rate": 1.8392997253099887e-06, "loss": 0.610235333442688, "step": 1943 }, { "epoch": 0.4481844380403458, "grad_norm": 1.1590182995459894, "learning_rate": 1.839092434726904e-06, "loss": 0.5174393057823181, "step": 1944 }, { "epoch": 0.4484149855907781, "grad_norm": 1.3203957347602924, "learning_rate": 1.8388850222324171e-06, "loss": 0.5430256128311157, "step": 1945 }, { "epoch": 0.44864553314121036, "grad_norm": 1.102963606742874, "learning_rate": 1.8386774878566635e-06, "loss": 0.5431778430938721, "step": 1946 }, { "epoch": 0.44887608069164264, "grad_norm": 1.2399915730702564, "learning_rate": 1.8384698316297952e-06, "loss": 0.5374635457992554, "step": 1947 }, { "epoch": 0.4491066282420749, "grad_norm": 1.294777639188434, "learning_rate": 1.8382620535819831e-06, "loss": 0.5724903345108032, "step": 1948 }, { "epoch": 0.4493371757925072, "grad_norm": 1.1486927176754915, "learning_rate": 1.8380541537434148e-06, "loss": 0.6397042274475098, "step": 1949 }, { "epoch": 0.4495677233429395, "grad_norm": 1.2579568239218235, "learning_rate": 1.8378461321442961e-06, "loss": 0.6346575021743774, "step": 1950 }, { "epoch": 0.44979827089337177, "grad_norm": 1.3678754797352473, "learning_rate": 1.83763798881485e-06, "loss": 0.5354228019714355, "step": 1951 }, { "epoch": 0.45002881844380405, "grad_norm": 1.05476245879278, "learning_rate": 1.8374297237853185e-06, "loss": 0.42358189821243286, "step": 1952 }, { "epoch": 0.4502593659942363, "grad_norm": 1.182016543993005, "learning_rate": 1.8372213370859592e-06, "loss": 0.5547488331794739, "step": 1953 }, { "epoch": 0.4504899135446686, "grad_norm": 1.3456900721260578, "learning_rate": 1.8370128287470493e-06, "loss": 0.5722674131393433, "step": 1954 }, { "epoch": 0.4507204610951009, "grad_norm": 1.16212266758737, "learning_rate": 1.8368041987988824e-06, "loss": 0.46524208784103394, "step": 1955 }, { "epoch": 0.4509510086455331, "grad_norm": 1.0688055041729316, "learning_rate": 1.836595447271771e-06, "loss": 0.4921358525753021, "step": 1956 }, { "epoch": 0.4511815561959654, "grad_norm": 1.3682259724330377, "learning_rate": 1.8363865741960436e-06, "loss": 0.4774383008480072, "step": 1957 }, { "epoch": 0.4514121037463977, "grad_norm": 1.3347835036444615, "learning_rate": 1.8361775796020481e-06, "loss": 0.5653456449508667, "step": 1958 }, { "epoch": 0.45164265129682996, "grad_norm": 1.1677588385218338, "learning_rate": 1.8359684635201487e-06, "loss": 0.49100229144096375, "step": 1959 }, { "epoch": 0.45187319884726224, "grad_norm": 1.152014645660814, "learning_rate": 1.8357592259807276e-06, "loss": 0.5214860439300537, "step": 1960 }, { "epoch": 0.4521037463976945, "grad_norm": 1.3874409389020983, "learning_rate": 1.8355498670141859e-06, "loss": 0.5328176021575928, "step": 1961 }, { "epoch": 0.4523342939481268, "grad_norm": 1.4332841300178276, "learning_rate": 1.83534038665094e-06, "loss": 0.5857157111167908, "step": 1962 }, { "epoch": 0.4525648414985591, "grad_norm": 1.2855323158344572, "learning_rate": 1.8351307849214258e-06, "loss": 0.6459437608718872, "step": 1963 }, { "epoch": 0.45279538904899136, "grad_norm": 1.2744824051695016, "learning_rate": 1.8349210618560967e-06, "loss": 0.6034260988235474, "step": 1964 }, { "epoch": 0.45302593659942364, "grad_norm": 1.3214790421577447, "learning_rate": 1.8347112174854224e-06, "loss": 0.49320968985557556, "step": 1965 }, { "epoch": 0.4532564841498559, "grad_norm": 1.1125399152311024, "learning_rate": 1.834501251839892e-06, "loss": 0.4928455352783203, "step": 1966 }, { "epoch": 0.4534870317002882, "grad_norm": 1.092912119431471, "learning_rate": 1.8342911649500104e-06, "loss": 0.4440082013607025, "step": 1967 }, { "epoch": 0.4537175792507205, "grad_norm": 1.129522243569259, "learning_rate": 1.8340809568463016e-06, "loss": 0.4822162389755249, "step": 1968 }, { "epoch": 0.45394812680115276, "grad_norm": 1.2084769396869046, "learning_rate": 1.8338706275593066e-06, "loss": 0.5176507830619812, "step": 1969 }, { "epoch": 0.454178674351585, "grad_norm": 1.0817164261968486, "learning_rate": 1.8336601771195839e-06, "loss": 0.49748751521110535, "step": 1970 }, { "epoch": 0.45440922190201727, "grad_norm": 1.4007065984620797, "learning_rate": 1.83344960555771e-06, "loss": 0.6159261465072632, "step": 1971 }, { "epoch": 0.45463976945244955, "grad_norm": 1.0943694323176245, "learning_rate": 1.8332389129042784e-06, "loss": 0.4831198453903198, "step": 1972 }, { "epoch": 0.45487031700288183, "grad_norm": 1.0823216779103817, "learning_rate": 1.833028099189901e-06, "loss": 0.5536303520202637, "step": 1973 }, { "epoch": 0.4551008645533141, "grad_norm": 1.0841447387467815, "learning_rate": 1.8328171644452067e-06, "loss": 0.5078235268592834, "step": 1974 }, { "epoch": 0.4553314121037464, "grad_norm": 1.2162049122078602, "learning_rate": 1.8326061087008418e-06, "loss": 0.37912893295288086, "step": 1975 }, { "epoch": 0.45556195965417867, "grad_norm": 1.1731934530051857, "learning_rate": 1.8323949319874708e-06, "loss": 0.4849812090396881, "step": 1976 }, { "epoch": 0.45579250720461095, "grad_norm": 1.3841903464729448, "learning_rate": 1.8321836343357752e-06, "loss": 0.4759640097618103, "step": 1977 }, { "epoch": 0.45602305475504323, "grad_norm": 1.1855814813320218, "learning_rate": 1.8319722157764549e-06, "loss": 0.5319406986236572, "step": 1978 }, { "epoch": 0.4562536023054755, "grad_norm": 1.1795216044414802, "learning_rate": 1.8317606763402265e-06, "loss": 0.4678229093551636, "step": 1979 }, { "epoch": 0.4564841498559078, "grad_norm": 1.1514814772775641, "learning_rate": 1.8315490160578243e-06, "loss": 0.5077648162841797, "step": 1980 }, { "epoch": 0.4567146974063401, "grad_norm": 1.123179531902537, "learning_rate": 1.8313372349600002e-06, "loss": 0.5307132005691528, "step": 1981 }, { "epoch": 0.45694524495677236, "grad_norm": 1.2210423439646636, "learning_rate": 1.8311253330775247e-06, "loss": 0.5892186760902405, "step": 1982 }, { "epoch": 0.45717579250720464, "grad_norm": 1.1778654127023787, "learning_rate": 1.830913310441184e-06, "loss": 0.5626486539840698, "step": 1983 }, { "epoch": 0.45740634005763686, "grad_norm": 0.999814167949733, "learning_rate": 1.830701167081783e-06, "loss": 0.4591505825519562, "step": 1984 }, { "epoch": 0.45763688760806914, "grad_norm": 1.4041250876438491, "learning_rate": 1.8304889030301442e-06, "loss": 0.47362181544303894, "step": 1985 }, { "epoch": 0.4578674351585014, "grad_norm": 1.1193514072723911, "learning_rate": 1.8302765183171071e-06, "loss": 0.5645753145217896, "step": 1986 }, { "epoch": 0.4580979827089337, "grad_norm": 1.3651592876994867, "learning_rate": 1.8300640129735294e-06, "loss": 0.47337716817855835, "step": 1987 }, { "epoch": 0.458328530259366, "grad_norm": 1.3024146234369909, "learning_rate": 1.8298513870302852e-06, "loss": 0.6080120801925659, "step": 1988 }, { "epoch": 0.45855907780979827, "grad_norm": 1.126672680817471, "learning_rate": 1.8296386405182673e-06, "loss": 0.5114408135414124, "step": 1989 }, { "epoch": 0.45878962536023055, "grad_norm": 1.4785881214685916, "learning_rate": 1.8294257734683857e-06, "loss": 0.5666244029998779, "step": 1990 }, { "epoch": 0.4590201729106628, "grad_norm": 1.3569918428175534, "learning_rate": 1.8292127859115674e-06, "loss": 0.4955924153327942, "step": 1991 }, { "epoch": 0.4592507204610951, "grad_norm": 1.124675073336242, "learning_rate": 1.8289996778787575e-06, "loss": 0.5668392181396484, "step": 1992 }, { "epoch": 0.4594812680115274, "grad_norm": 1.0789902638202715, "learning_rate": 1.828786449400918e-06, "loss": 0.5244185328483582, "step": 1993 }, { "epoch": 0.45971181556195967, "grad_norm": 1.287548290174774, "learning_rate": 1.8285731005090297e-06, "loss": 0.6035805940628052, "step": 1994 }, { "epoch": 0.45994236311239195, "grad_norm": 1.360909428674116, "learning_rate": 1.828359631234089e-06, "loss": 0.5430639982223511, "step": 1995 }, { "epoch": 0.46017291066282423, "grad_norm": 1.1164961359354388, "learning_rate": 1.8281460416071112e-06, "loss": 0.40246638655662537, "step": 1996 }, { "epoch": 0.4604034582132565, "grad_norm": 1.217124013278456, "learning_rate": 1.8279323316591286e-06, "loss": 0.522178053855896, "step": 1997 }, { "epoch": 0.46063400576368874, "grad_norm": 1.0095360764724484, "learning_rate": 1.8277185014211911e-06, "loss": 0.4550439119338989, "step": 1998 }, { "epoch": 0.460864553314121, "grad_norm": 1.402214335246735, "learning_rate": 1.8275045509243659e-06, "loss": 0.47628867626190186, "step": 1999 }, { "epoch": 0.4610951008645533, "grad_norm": 1.2641661070203307, "learning_rate": 1.8272904801997376e-06, "loss": 0.514962911605835, "step": 2000 }, { "epoch": 0.4613256484149856, "grad_norm": 1.3606266892470449, "learning_rate": 1.8270762892784086e-06, "loss": 0.536049485206604, "step": 2001 }, { "epoch": 0.46155619596541786, "grad_norm": 1.285847350621859, "learning_rate": 1.8268619781914989e-06, "loss": 0.5808422565460205, "step": 2002 }, { "epoch": 0.46178674351585014, "grad_norm": 1.124706413285645, "learning_rate": 1.8266475469701455e-06, "loss": 0.5651894807815552, "step": 2003 }, { "epoch": 0.4620172910662824, "grad_norm": 1.2017589465119973, "learning_rate": 1.826432995645503e-06, "loss": 0.4927813708782196, "step": 2004 }, { "epoch": 0.4622478386167147, "grad_norm": 1.2175911343424217, "learning_rate": 1.8262183242487433e-06, "loss": 0.5447172522544861, "step": 2005 }, { "epoch": 0.462478386167147, "grad_norm": 1.182254404157546, "learning_rate": 1.8260035328110561e-06, "loss": 0.4893835783004761, "step": 2006 }, { "epoch": 0.46270893371757926, "grad_norm": 1.4394250466795038, "learning_rate": 1.8257886213636483e-06, "loss": 0.5068031549453735, "step": 2007 }, { "epoch": 0.46293948126801154, "grad_norm": 1.0045726883537807, "learning_rate": 1.8255735899377442e-06, "loss": 0.45055803656578064, "step": 2008 }, { "epoch": 0.4631700288184438, "grad_norm": 1.1822319586036718, "learning_rate": 1.825358438564586e-06, "loss": 0.48656368255615234, "step": 2009 }, { "epoch": 0.4634005763688761, "grad_norm": 1.058216918397958, "learning_rate": 1.8251431672754328e-06, "loss": 0.4219861626625061, "step": 2010 }, { "epoch": 0.46363112391930833, "grad_norm": 1.1972282872935691, "learning_rate": 1.824927776101561e-06, "loss": 0.5223569869995117, "step": 2011 }, { "epoch": 0.4638616714697406, "grad_norm": 1.3462274105768874, "learning_rate": 1.8247122650742647e-06, "loss": 0.47259521484375, "step": 2012 }, { "epoch": 0.4640922190201729, "grad_norm": 1.1607275951902687, "learning_rate": 1.8244966342248558e-06, "loss": 0.5716425180435181, "step": 2013 }, { "epoch": 0.46432276657060517, "grad_norm": 1.159709514247773, "learning_rate": 1.824280883584663e-06, "loss": 0.5796461701393127, "step": 2014 }, { "epoch": 0.46455331412103745, "grad_norm": 1.2308298420460582, "learning_rate": 1.8240650131850325e-06, "loss": 0.49397438764572144, "step": 2015 }, { "epoch": 0.46478386167146973, "grad_norm": 1.0702319858460478, "learning_rate": 1.8238490230573285e-06, "loss": 0.4773065447807312, "step": 2016 }, { "epoch": 0.465014409221902, "grad_norm": 1.0911071946042072, "learning_rate": 1.8236329132329314e-06, "loss": 0.5000171065330505, "step": 2017 }, { "epoch": 0.4652449567723343, "grad_norm": 1.3472772685703793, "learning_rate": 1.8234166837432403e-06, "loss": 0.6179405450820923, "step": 2018 }, { "epoch": 0.4654755043227666, "grad_norm": 1.1359405392356112, "learning_rate": 1.823200334619671e-06, "loss": 0.5591844320297241, "step": 2019 }, { "epoch": 0.46570605187319886, "grad_norm": 0.966495776089465, "learning_rate": 1.8229838658936564e-06, "loss": 0.5247224569320679, "step": 2020 }, { "epoch": 0.46593659942363114, "grad_norm": 1.1427900374179103, "learning_rate": 1.8227672775966476e-06, "loss": 0.5076649188995361, "step": 2021 }, { "epoch": 0.4661671469740634, "grad_norm": 1.214581270233277, "learning_rate": 1.822550569760112e-06, "loss": 0.5224828124046326, "step": 2022 }, { "epoch": 0.4663976945244957, "grad_norm": 1.466153109015355, "learning_rate": 1.822333742415536e-06, "loss": 0.595023512840271, "step": 2023 }, { "epoch": 0.466628242074928, "grad_norm": 1.0217685477065697, "learning_rate": 1.8221167955944216e-06, "loss": 0.4959990978240967, "step": 2024 }, { "epoch": 0.4668587896253602, "grad_norm": 1.1676437653997107, "learning_rate": 1.8218997293282893e-06, "loss": 0.4608197808265686, "step": 2025 }, { "epoch": 0.4670893371757925, "grad_norm": 0.9821397024600826, "learning_rate": 1.821682543648676e-06, "loss": 0.4783310294151306, "step": 2026 }, { "epoch": 0.46731988472622477, "grad_norm": 1.065215299840776, "learning_rate": 1.821465238587137e-06, "loss": 0.5161324739456177, "step": 2027 }, { "epoch": 0.46755043227665705, "grad_norm": 1.2816207732030394, "learning_rate": 1.8212478141752446e-06, "loss": 0.5252971053123474, "step": 2028 }, { "epoch": 0.4677809798270893, "grad_norm": 1.0910388272274432, "learning_rate": 1.8210302704445878e-06, "loss": 0.5302141904830933, "step": 2029 }, { "epoch": 0.4680115273775216, "grad_norm": 1.0800329607334094, "learning_rate": 1.8208126074267738e-06, "loss": 0.46280181407928467, "step": 2030 }, { "epoch": 0.4682420749279539, "grad_norm": 1.2445319229306726, "learning_rate": 1.8205948251534268e-06, "loss": 0.6222575306892395, "step": 2031 }, { "epoch": 0.46847262247838617, "grad_norm": 1.1593148286418322, "learning_rate": 1.8203769236561884e-06, "loss": 0.49378883838653564, "step": 2032 }, { "epoch": 0.46870317002881845, "grad_norm": 1.0157150881959576, "learning_rate": 1.8201589029667165e-06, "loss": 0.4863582253456116, "step": 2033 }, { "epoch": 0.46893371757925073, "grad_norm": 1.4072028447431575, "learning_rate": 1.8199407631166888e-06, "loss": 0.543857991695404, "step": 2034 }, { "epoch": 0.469164265129683, "grad_norm": 1.0771978734352377, "learning_rate": 1.8197225041377972e-06, "loss": 0.4453166127204895, "step": 2035 }, { "epoch": 0.4693948126801153, "grad_norm": 1.4395287176153397, "learning_rate": 1.8195041260617534e-06, "loss": 0.642902672290802, "step": 2036 }, { "epoch": 0.4696253602305476, "grad_norm": 1.2089411302375417, "learning_rate": 1.8192856289202853e-06, "loss": 0.433509886264801, "step": 2037 }, { "epoch": 0.46985590778097985, "grad_norm": 1.2704279876832443, "learning_rate": 1.8190670127451381e-06, "loss": 0.5348495244979858, "step": 2038 }, { "epoch": 0.4700864553314121, "grad_norm": 1.3239841182062162, "learning_rate": 1.8188482775680745e-06, "loss": 0.5972989797592163, "step": 2039 }, { "epoch": 0.47031700288184436, "grad_norm": 1.1466823333239362, "learning_rate": 1.8186294234208745e-06, "loss": 0.5785202980041504, "step": 2040 }, { "epoch": 0.47054755043227664, "grad_norm": 1.2551617120211864, "learning_rate": 1.8184104503353353e-06, "loss": 0.4959946572780609, "step": 2041 }, { "epoch": 0.4707780979827089, "grad_norm": 1.2660924545498422, "learning_rate": 1.8181913583432715e-06, "loss": 0.4742332696914673, "step": 2042 }, { "epoch": 0.4710086455331412, "grad_norm": 1.086156350597083, "learning_rate": 1.8179721474765146e-06, "loss": 0.5113345980644226, "step": 2043 }, { "epoch": 0.4712391930835735, "grad_norm": 1.2191869726515814, "learning_rate": 1.817752817766914e-06, "loss": 0.43116605281829834, "step": 2044 }, { "epoch": 0.47146974063400576, "grad_norm": 1.4602324212669267, "learning_rate": 1.8175333692463362e-06, "loss": 0.40598607063293457, "step": 2045 }, { "epoch": 0.47170028818443804, "grad_norm": 1.2700037116465859, "learning_rate": 1.817313801946664e-06, "loss": 0.5269483327865601, "step": 2046 }, { "epoch": 0.4719308357348703, "grad_norm": 1.3055905499244451, "learning_rate": 1.817094115899799e-06, "loss": 0.6085952520370483, "step": 2047 }, { "epoch": 0.4721613832853026, "grad_norm": 1.1226437123382058, "learning_rate": 1.816874311137659e-06, "loss": 0.4086014926433563, "step": 2048 }, { "epoch": 0.4723919308357349, "grad_norm": 1.1725786832352496, "learning_rate": 1.816654387692179e-06, "loss": 0.5595110654830933, "step": 2049 }, { "epoch": 0.47262247838616717, "grad_norm": 1.2337988950283274, "learning_rate": 1.8164343455953124e-06, "loss": 0.5148875713348389, "step": 2050 }, { "epoch": 0.47285302593659945, "grad_norm": 1.1613861580132026, "learning_rate": 1.8162141848790284e-06, "loss": 0.47360190749168396, "step": 2051 }, { "epoch": 0.4730835734870317, "grad_norm": 1.3159698189726914, "learning_rate": 1.8159939055753144e-06, "loss": 0.542681097984314, "step": 2052 }, { "epoch": 0.47331412103746395, "grad_norm": 0.9407747552971376, "learning_rate": 1.8157735077161744e-06, "loss": 0.4918665885925293, "step": 2053 }, { "epoch": 0.47354466858789623, "grad_norm": 1.1060379080565603, "learning_rate": 1.81555299133363e-06, "loss": 0.4638371467590332, "step": 2054 }, { "epoch": 0.4737752161383285, "grad_norm": 1.4597203401356291, "learning_rate": 1.81533235645972e-06, "loss": 0.5285000205039978, "step": 2055 }, { "epoch": 0.4740057636887608, "grad_norm": 1.1329280057548656, "learning_rate": 1.8151116031265006e-06, "loss": 0.4810779094696045, "step": 2056 }, { "epoch": 0.4742363112391931, "grad_norm": 1.1882237849544774, "learning_rate": 1.8148907313660441e-06, "loss": 0.5632082223892212, "step": 2057 }, { "epoch": 0.47446685878962536, "grad_norm": 1.2365131196813692, "learning_rate": 1.8146697412104422e-06, "loss": 0.47608134150505066, "step": 2058 }, { "epoch": 0.47469740634005764, "grad_norm": 1.0676720083276605, "learning_rate": 1.8144486326918012e-06, "loss": 0.47561490535736084, "step": 2059 }, { "epoch": 0.4749279538904899, "grad_norm": 1.2561213728895482, "learning_rate": 1.8142274058422467e-06, "loss": 0.5634682774543762, "step": 2060 }, { "epoch": 0.4751585014409222, "grad_norm": 1.0913143929244284, "learning_rate": 1.8140060606939202e-06, "loss": 0.5818713903427124, "step": 2061 }, { "epoch": 0.4753890489913545, "grad_norm": 1.2100494893120814, "learning_rate": 1.8137845972789811e-06, "loss": 0.4999740719795227, "step": 2062 }, { "epoch": 0.47561959654178676, "grad_norm": 1.0912745719982555, "learning_rate": 1.8135630156296058e-06, "loss": 0.5102949142456055, "step": 2063 }, { "epoch": 0.47585014409221904, "grad_norm": 1.2787817700287705, "learning_rate": 1.8133413157779876e-06, "loss": 0.5175629258155823, "step": 2064 }, { "epoch": 0.4760806916426513, "grad_norm": 1.2601037233558323, "learning_rate": 1.8131194977563368e-06, "loss": 0.5314140915870667, "step": 2065 }, { "epoch": 0.47631123919308355, "grad_norm": 1.3763380052693162, "learning_rate": 1.8128975615968823e-06, "loss": 0.5157697796821594, "step": 2066 }, { "epoch": 0.4765417867435158, "grad_norm": 1.2519563843783486, "learning_rate": 1.8126755073318682e-06, "loss": 0.6178181171417236, "step": 2067 }, { "epoch": 0.4767723342939481, "grad_norm": 1.0243913567227492, "learning_rate": 1.8124533349935569e-06, "loss": 0.5079492926597595, "step": 2068 }, { "epoch": 0.4770028818443804, "grad_norm": 1.1632375854196408, "learning_rate": 1.812231044614228e-06, "loss": 0.581571102142334, "step": 2069 }, { "epoch": 0.47723342939481267, "grad_norm": 1.0935257485384182, "learning_rate": 1.8120086362261779e-06, "loss": 0.5577228665351868, "step": 2070 }, { "epoch": 0.47746397694524495, "grad_norm": 1.2309123046111559, "learning_rate": 1.8117861098617197e-06, "loss": 0.5082104206085205, "step": 2071 }, { "epoch": 0.47769452449567723, "grad_norm": 1.1348218977787359, "learning_rate": 1.8115634655531848e-06, "loss": 0.4945356249809265, "step": 2072 }, { "epoch": 0.4779250720461095, "grad_norm": 1.2419525307146027, "learning_rate": 1.811340703332921e-06, "loss": 0.5593979954719543, "step": 2073 }, { "epoch": 0.4781556195965418, "grad_norm": 1.2030111845817482, "learning_rate": 1.8111178232332933e-06, "loss": 0.44920873641967773, "step": 2074 }, { "epoch": 0.4783861671469741, "grad_norm": 0.9934876361350219, "learning_rate": 1.8108948252866837e-06, "loss": 0.5078163743019104, "step": 2075 }, { "epoch": 0.47861671469740635, "grad_norm": 1.3757026294964343, "learning_rate": 1.8106717095254913e-06, "loss": 0.41619086265563965, "step": 2076 }, { "epoch": 0.47884726224783863, "grad_norm": 1.4228159723788971, "learning_rate": 1.8104484759821328e-06, "loss": 0.6011626124382019, "step": 2077 }, { "epoch": 0.4790778097982709, "grad_norm": 1.1103134463956559, "learning_rate": 1.810225124689042e-06, "loss": 0.5150628089904785, "step": 2078 }, { "epoch": 0.4793083573487032, "grad_norm": 1.3396393945640488, "learning_rate": 1.8100016556786688e-06, "loss": 0.5351274609565735, "step": 2079 }, { "epoch": 0.4795389048991354, "grad_norm": 1.2255502966022296, "learning_rate": 1.8097780689834816e-06, "loss": 0.514049232006073, "step": 2080 }, { "epoch": 0.4797694524495677, "grad_norm": 1.2631952502658392, "learning_rate": 1.8095543646359649e-06, "loss": 0.5486019849777222, "step": 2081 }, { "epoch": 0.48, "grad_norm": 1.0912725423085439, "learning_rate": 1.8093305426686203e-06, "loss": 0.5539723634719849, "step": 2082 }, { "epoch": 0.48023054755043226, "grad_norm": 1.347806494618309, "learning_rate": 1.8091066031139675e-06, "loss": 0.6295641660690308, "step": 2083 }, { "epoch": 0.48046109510086454, "grad_norm": 1.4839180788394328, "learning_rate": 1.808882546004542e-06, "loss": 0.5032966136932373, "step": 2084 }, { "epoch": 0.4806916426512968, "grad_norm": 1.300518495948047, "learning_rate": 1.8086583713728974e-06, "loss": 0.5227913856506348, "step": 2085 }, { "epoch": 0.4809221902017291, "grad_norm": 1.3494249283086248, "learning_rate": 1.8084340792516035e-06, "loss": 0.5598339438438416, "step": 2086 }, { "epoch": 0.4811527377521614, "grad_norm": 0.936105706787139, "learning_rate": 1.808209669673248e-06, "loss": 0.5645616054534912, "step": 2087 }, { "epoch": 0.48138328530259367, "grad_norm": 1.2822280997334143, "learning_rate": 1.8079851426704352e-06, "loss": 0.48751646280288696, "step": 2088 }, { "epoch": 0.48161383285302595, "grad_norm": 1.2419035742128983, "learning_rate": 1.8077604982757867e-06, "loss": 0.528518795967102, "step": 2089 }, { "epoch": 0.48184438040345823, "grad_norm": 1.188310462626312, "learning_rate": 1.8075357365219403e-06, "loss": 0.5478106737136841, "step": 2090 }, { "epoch": 0.4820749279538905, "grad_norm": 1.2009761465671023, "learning_rate": 1.8073108574415523e-06, "loss": 0.5034850835800171, "step": 2091 }, { "epoch": 0.4823054755043228, "grad_norm": 1.3064576473905998, "learning_rate": 1.807085861067295e-06, "loss": 0.5288310050964355, "step": 2092 }, { "epoch": 0.48253602305475507, "grad_norm": 1.5549767465705366, "learning_rate": 1.806860747431858e-06, "loss": 0.5817336440086365, "step": 2093 }, { "epoch": 0.4827665706051873, "grad_norm": 1.1287206353806019, "learning_rate": 1.806635516567948e-06, "loss": 0.5058947801589966, "step": 2094 }, { "epoch": 0.4829971181556196, "grad_norm": 1.2962898270991883, "learning_rate": 1.8064101685082886e-06, "loss": 0.4131927788257599, "step": 2095 }, { "epoch": 0.48322766570605186, "grad_norm": 1.3216989005029416, "learning_rate": 1.8061847032856208e-06, "loss": 0.5075147151947021, "step": 2096 }, { "epoch": 0.48345821325648414, "grad_norm": 1.141303867546858, "learning_rate": 1.8059591209327022e-06, "loss": 0.49490487575531006, "step": 2097 }, { "epoch": 0.4836887608069164, "grad_norm": 1.1392879390687263, "learning_rate": 1.8057334214823073e-06, "loss": 0.6112065315246582, "step": 2098 }, { "epoch": 0.4839193083573487, "grad_norm": 1.0180072685628039, "learning_rate": 1.8055076049672282e-06, "loss": 0.4702821373939514, "step": 2099 }, { "epoch": 0.484149855907781, "grad_norm": 1.0776070240866942, "learning_rate": 1.8052816714202736e-06, "loss": 0.4686674475669861, "step": 2100 }, { "epoch": 0.48438040345821326, "grad_norm": 1.0343204551386471, "learning_rate": 1.8050556208742695e-06, "loss": 0.5218414068222046, "step": 2101 }, { "epoch": 0.48461095100864554, "grad_norm": 0.9563633917956249, "learning_rate": 1.8048294533620582e-06, "loss": 0.5427126288414001, "step": 2102 }, { "epoch": 0.4848414985590778, "grad_norm": 1.271635291620792, "learning_rate": 1.8046031689165001e-06, "loss": 0.5796751976013184, "step": 2103 }, { "epoch": 0.4850720461095101, "grad_norm": 1.0928705473494975, "learning_rate": 1.8043767675704718e-06, "loss": 0.5725299119949341, "step": 2104 }, { "epoch": 0.4853025936599424, "grad_norm": 1.0835389410732317, "learning_rate": 1.8041502493568667e-06, "loss": 0.507230281829834, "step": 2105 }, { "epoch": 0.48553314121037466, "grad_norm": 1.3563692986550824, "learning_rate": 1.8039236143085958e-06, "loss": 0.5179410576820374, "step": 2106 }, { "epoch": 0.4857636887608069, "grad_norm": 1.0040019176194879, "learning_rate": 1.8036968624585869e-06, "loss": 0.49801725149154663, "step": 2107 }, { "epoch": 0.48599423631123917, "grad_norm": 1.1386662177255882, "learning_rate": 1.8034699938397843e-06, "loss": 0.4731954038143158, "step": 2108 }, { "epoch": 0.48622478386167145, "grad_norm": 1.2220519213958883, "learning_rate": 1.8032430084851505e-06, "loss": 0.5271996855735779, "step": 2109 }, { "epoch": 0.48645533141210373, "grad_norm": 1.3946338651870764, "learning_rate": 1.803015906427663e-06, "loss": 0.441206693649292, "step": 2110 }, { "epoch": 0.486685878962536, "grad_norm": 1.2215374705306121, "learning_rate": 1.802788687700318e-06, "loss": 0.5455319285392761, "step": 2111 }, { "epoch": 0.4869164265129683, "grad_norm": 1.148243592256876, "learning_rate": 1.802561352336128e-06, "loss": 0.45889076590538025, "step": 2112 }, { "epoch": 0.4871469740634006, "grad_norm": 1.2793636515861055, "learning_rate": 1.8023339003681225e-06, "loss": 0.5482321977615356, "step": 2113 }, { "epoch": 0.48737752161383285, "grad_norm": 1.1670602057135955, "learning_rate": 1.8021063318293474e-06, "loss": 0.48072755336761475, "step": 2114 }, { "epoch": 0.48760806916426513, "grad_norm": 1.1056861144019507, "learning_rate": 1.801878646752867e-06, "loss": 0.4769946336746216, "step": 2115 }, { "epoch": 0.4878386167146974, "grad_norm": 1.3095766673379263, "learning_rate": 1.8016508451717604e-06, "loss": 0.5335594415664673, "step": 2116 }, { "epoch": 0.4880691642651297, "grad_norm": 1.053340143352943, "learning_rate": 1.801422927119126e-06, "loss": 0.5021346211433411, "step": 2117 }, { "epoch": 0.488299711815562, "grad_norm": 1.056050874174104, "learning_rate": 1.801194892628077e-06, "loss": 0.40786126255989075, "step": 2118 }, { "epoch": 0.48853025936599426, "grad_norm": 1.1636672663703702, "learning_rate": 1.8009667417317447e-06, "loss": 0.5735047459602356, "step": 2119 }, { "epoch": 0.48876080691642654, "grad_norm": 1.3770736980628029, "learning_rate": 1.8007384744632772e-06, "loss": 0.5656315088272095, "step": 2120 }, { "epoch": 0.48899135446685876, "grad_norm": 1.2003142020448403, "learning_rate": 1.8005100908558393e-06, "loss": 0.5263736248016357, "step": 2121 }, { "epoch": 0.48922190201729104, "grad_norm": 1.14755344931242, "learning_rate": 1.8002815909426129e-06, "loss": 0.4543651342391968, "step": 2122 }, { "epoch": 0.4894524495677233, "grad_norm": 1.2567132050361414, "learning_rate": 1.800052974756796e-06, "loss": 0.5164092183113098, "step": 2123 }, { "epoch": 0.4896829971181556, "grad_norm": 1.065148302467529, "learning_rate": 1.7998242423316053e-06, "loss": 0.47448939085006714, "step": 2124 }, { "epoch": 0.4899135446685879, "grad_norm": 1.2249325142019851, "learning_rate": 1.7995953937002722e-06, "loss": 0.5526837706565857, "step": 2125 }, { "epoch": 0.49014409221902017, "grad_norm": 1.1191314576308082, "learning_rate": 1.7993664288960466e-06, "loss": 0.5415492057800293, "step": 2126 }, { "epoch": 0.49037463976945245, "grad_norm": 1.5504915305169138, "learning_rate": 1.7991373479521943e-06, "loss": 0.5514999032020569, "step": 2127 }, { "epoch": 0.49060518731988473, "grad_norm": 1.1460513293920873, "learning_rate": 1.7989081509019988e-06, "loss": 0.5257915258407593, "step": 2128 }, { "epoch": 0.490835734870317, "grad_norm": 1.286281965295671, "learning_rate": 1.7986788377787598e-06, "loss": 0.5336320400238037, "step": 2129 }, { "epoch": 0.4910662824207493, "grad_norm": 1.0512868854767012, "learning_rate": 1.798449408615794e-06, "loss": 0.3695172667503357, "step": 2130 }, { "epoch": 0.49129682997118157, "grad_norm": 1.2616560050006966, "learning_rate": 1.7982198634464354e-06, "loss": 0.47775453329086304, "step": 2131 }, { "epoch": 0.49152737752161385, "grad_norm": 0.9945703807662574, "learning_rate": 1.7979902023040342e-06, "loss": 0.516392171382904, "step": 2132 }, { "epoch": 0.49175792507204613, "grad_norm": 1.1798218769026139, "learning_rate": 1.7977604252219583e-06, "loss": 0.525421142578125, "step": 2133 }, { "epoch": 0.4919884726224784, "grad_norm": 1.128891548004505, "learning_rate": 1.7975305322335915e-06, "loss": 0.5425143241882324, "step": 2134 }, { "epoch": 0.49221902017291064, "grad_norm": 0.9471448914040171, "learning_rate": 1.7973005233723345e-06, "loss": 0.5511288642883301, "step": 2135 }, { "epoch": 0.4924495677233429, "grad_norm": 1.4239095788389364, "learning_rate": 1.7970703986716058e-06, "loss": 0.5777868032455444, "step": 2136 }, { "epoch": 0.4926801152737752, "grad_norm": 1.2503673840795373, "learning_rate": 1.79684015816484e-06, "loss": 0.46998846530914307, "step": 2137 }, { "epoch": 0.4929106628242075, "grad_norm": 1.185801465844648, "learning_rate": 1.7966098018854884e-06, "loss": 0.5201677680015564, "step": 2138 }, { "epoch": 0.49314121037463976, "grad_norm": 1.1459376135454182, "learning_rate": 1.7963793298670197e-06, "loss": 0.5589007139205933, "step": 2139 }, { "epoch": 0.49337175792507204, "grad_norm": 1.2547944348154982, "learning_rate": 1.796148742142919e-06, "loss": 0.5880102515220642, "step": 2140 }, { "epoch": 0.4936023054755043, "grad_norm": 1.1932412523736406, "learning_rate": 1.7959180387466884e-06, "loss": 0.5860651731491089, "step": 2141 }, { "epoch": 0.4938328530259366, "grad_norm": 1.2025551212842234, "learning_rate": 1.795687219711846e-06, "loss": 0.42490053176879883, "step": 2142 }, { "epoch": 0.4940634005763689, "grad_norm": 1.503932183071744, "learning_rate": 1.7954562850719283e-06, "loss": 0.5949894189834595, "step": 2143 }, { "epoch": 0.49429394812680116, "grad_norm": 1.1843513634065304, "learning_rate": 1.7952252348604873e-06, "loss": 0.5673636198043823, "step": 2144 }, { "epoch": 0.49452449567723344, "grad_norm": 1.3424804711783924, "learning_rate": 1.7949940691110923e-06, "loss": 0.5709353089332581, "step": 2145 }, { "epoch": 0.4947550432276657, "grad_norm": 1.5096007165005685, "learning_rate": 1.7947627878573292e-06, "loss": 0.5557876825332642, "step": 2146 }, { "epoch": 0.494985590778098, "grad_norm": 1.605658897794125, "learning_rate": 1.7945313911328008e-06, "loss": 0.6168490648269653, "step": 2147 }, { "epoch": 0.4952161383285303, "grad_norm": 1.0371807620079299, "learning_rate": 1.7942998789711266e-06, "loss": 0.5257415771484375, "step": 2148 }, { "epoch": 0.4954466858789625, "grad_norm": 1.3491005725160026, "learning_rate": 1.794068251405943e-06, "loss": 0.5957303047180176, "step": 2149 }, { "epoch": 0.4956772334293948, "grad_norm": 1.3791883865126864, "learning_rate": 1.7938365084709028e-06, "loss": 0.5421463251113892, "step": 2150 }, { "epoch": 0.4959077809798271, "grad_norm": 1.2199946418762686, "learning_rate": 1.793604650199676e-06, "loss": 0.4956265091896057, "step": 2151 }, { "epoch": 0.49613832853025935, "grad_norm": 1.528899494407416, "learning_rate": 1.7933726766259493e-06, "loss": 0.6387878656387329, "step": 2152 }, { "epoch": 0.49636887608069163, "grad_norm": 1.2129061766976703, "learning_rate": 1.793140587783426e-06, "loss": 0.48594605922698975, "step": 2153 }, { "epoch": 0.4965994236311239, "grad_norm": 1.1561693003542137, "learning_rate": 1.7929083837058262e-06, "loss": 0.5035887360572815, "step": 2154 }, { "epoch": 0.4968299711815562, "grad_norm": 1.2461631938928077, "learning_rate": 1.7926760644268868e-06, "loss": 0.44821372628211975, "step": 2155 }, { "epoch": 0.4970605187319885, "grad_norm": 1.3217722479914835, "learning_rate": 1.7924436299803612e-06, "loss": 0.5536797642707825, "step": 2156 }, { "epoch": 0.49729106628242076, "grad_norm": 1.1618226791661102, "learning_rate": 1.79221108040002e-06, "loss": 0.5425612926483154, "step": 2157 }, { "epoch": 0.49752161383285304, "grad_norm": 1.2313751283119363, "learning_rate": 1.7919784157196497e-06, "loss": 0.5238672494888306, "step": 2158 }, { "epoch": 0.4977521613832853, "grad_norm": 1.198679351371627, "learning_rate": 1.7917456359730543e-06, "loss": 0.5356197953224182, "step": 2159 }, { "epoch": 0.4979827089337176, "grad_norm": 1.0280774531466672, "learning_rate": 1.7915127411940545e-06, "loss": 0.4530688226222992, "step": 2160 }, { "epoch": 0.4982132564841499, "grad_norm": 1.3115285925447417, "learning_rate": 1.7912797314164875e-06, "loss": 0.5374774932861328, "step": 2161 }, { "epoch": 0.4984438040345821, "grad_norm": 1.0967027052102745, "learning_rate": 1.7910466066742068e-06, "loss": 0.4482705295085907, "step": 2162 }, { "epoch": 0.4986743515850144, "grad_norm": 1.2292872660786507, "learning_rate": 1.7908133670010837e-06, "loss": 0.4459438920021057, "step": 2163 }, { "epoch": 0.49890489913544667, "grad_norm": 1.139537158289869, "learning_rate": 1.7905800124310044e-06, "loss": 0.49267393350601196, "step": 2164 }, { "epoch": 0.49913544668587895, "grad_norm": 1.355016564375121, "learning_rate": 1.7903465429978742e-06, "loss": 0.5207797884941101, "step": 2165 }, { "epoch": 0.49936599423631123, "grad_norm": 1.3100564580839544, "learning_rate": 1.7901129587356128e-06, "loss": 0.5956555604934692, "step": 2166 }, { "epoch": 0.4995965417867435, "grad_norm": 1.396335552060292, "learning_rate": 1.7898792596781575e-06, "loss": 0.47083795070648193, "step": 2167 }, { "epoch": 0.4998270893371758, "grad_norm": 1.4321432022692109, "learning_rate": 1.7896454458594631e-06, "loss": 0.5580272674560547, "step": 2168 }, { "epoch": 0.5000576368876081, "grad_norm": 1.1568398545920477, "learning_rate": 1.7894115173135e-06, "loss": 0.5935468077659607, "step": 2169 }, { "epoch": 0.5002881844380404, "grad_norm": 1.3058147531506012, "learning_rate": 1.7891774740742553e-06, "loss": 0.48544418811798096, "step": 2170 }, { "epoch": 0.5005187319884726, "grad_norm": 1.0916510321690198, "learning_rate": 1.7889433161757336e-06, "loss": 0.5714924335479736, "step": 2171 }, { "epoch": 0.5007492795389049, "grad_norm": 1.4318986319035987, "learning_rate": 1.7887090436519551e-06, "loss": 0.582866907119751, "step": 2172 }, { "epoch": 0.5009798270893372, "grad_norm": 1.092756168813296, "learning_rate": 1.7884746565369573e-06, "loss": 0.4349023401737213, "step": 2173 }, { "epoch": 0.5012103746397695, "grad_norm": 1.307231397890865, "learning_rate": 1.7882401548647942e-06, "loss": 0.40139514207839966, "step": 2174 }, { "epoch": 0.5014409221902018, "grad_norm": 1.124014712251471, "learning_rate": 1.7880055386695366e-06, "loss": 0.534995436668396, "step": 2175 }, { "epoch": 0.501671469740634, "grad_norm": 1.2597913986300637, "learning_rate": 1.7877708079852716e-06, "loss": 0.5228029489517212, "step": 2176 }, { "epoch": 0.5019020172910663, "grad_norm": 1.2856346391499722, "learning_rate": 1.7875359628461034e-06, "loss": 0.6084067821502686, "step": 2177 }, { "epoch": 0.5021325648414986, "grad_norm": 1.3340996931649014, "learning_rate": 1.787301003286152e-06, "loss": 0.5027675628662109, "step": 2178 }, { "epoch": 0.5023631123919309, "grad_norm": 1.1721828803051668, "learning_rate": 1.7870659293395552e-06, "loss": 0.6257070302963257, "step": 2179 }, { "epoch": 0.5025936599423632, "grad_norm": 1.202911283172134, "learning_rate": 1.7868307410404664e-06, "loss": 0.45355063676834106, "step": 2180 }, { "epoch": 0.5028242074927954, "grad_norm": 1.1770009421306744, "learning_rate": 1.7865954384230567e-06, "loss": 0.5211625099182129, "step": 2181 }, { "epoch": 0.5030547550432277, "grad_norm": 1.170570821757426, "learning_rate": 1.7863600215215117e-06, "loss": 0.45476096868515015, "step": 2182 }, { "epoch": 0.5032853025936599, "grad_norm": 1.1288308091354253, "learning_rate": 1.7861244903700366e-06, "loss": 0.5483888983726501, "step": 2183 }, { "epoch": 0.5035158501440922, "grad_norm": 1.3267959714324822, "learning_rate": 1.7858888450028507e-06, "loss": 0.6125767230987549, "step": 2184 }, { "epoch": 0.5037463976945245, "grad_norm": 1.2869025477599574, "learning_rate": 1.7856530854541912e-06, "loss": 0.5761919021606445, "step": 2185 }, { "epoch": 0.5039769452449567, "grad_norm": 1.280348139691172, "learning_rate": 1.785417211758311e-06, "loss": 0.4902021884918213, "step": 2186 }, { "epoch": 0.504207492795389, "grad_norm": 1.1062677808924173, "learning_rate": 1.7851812239494808e-06, "loss": 0.5162317752838135, "step": 2187 }, { "epoch": 0.5044380403458213, "grad_norm": 1.3349217104827404, "learning_rate": 1.784945122061987e-06, "loss": 0.5260510444641113, "step": 2188 }, { "epoch": 0.5046685878962536, "grad_norm": 1.372484573808915, "learning_rate": 1.7847089061301324e-06, "loss": 0.5578324794769287, "step": 2189 }, { "epoch": 0.5048991354466859, "grad_norm": 1.1183471298010517, "learning_rate": 1.7844725761882366e-06, "loss": 0.5394254922866821, "step": 2190 }, { "epoch": 0.5051296829971181, "grad_norm": 1.5877235098098084, "learning_rate": 1.7842361322706365e-06, "loss": 0.5949487686157227, "step": 2191 }, { "epoch": 0.5053602305475504, "grad_norm": 1.0904188301293072, "learning_rate": 1.7839995744116844e-06, "loss": 0.5092563629150391, "step": 2192 }, { "epoch": 0.5055907780979827, "grad_norm": 1.1725099724792964, "learning_rate": 1.7837629026457503e-06, "loss": 0.4975352883338928, "step": 2193 }, { "epoch": 0.505821325648415, "grad_norm": 1.5695722641221526, "learning_rate": 1.7835261170072196e-06, "loss": 0.4792792499065399, "step": 2194 }, { "epoch": 0.5060518731988473, "grad_norm": 1.1092286708587664, "learning_rate": 1.7832892175304947e-06, "loss": 0.4511220455169678, "step": 2195 }, { "epoch": 0.5062824207492795, "grad_norm": 1.139606306229032, "learning_rate": 1.7830522042499952e-06, "loss": 0.498882532119751, "step": 2196 }, { "epoch": 0.5065129682997118, "grad_norm": 1.3633349831921355, "learning_rate": 1.7828150772001563e-06, "loss": 0.5208792090415955, "step": 2197 }, { "epoch": 0.5067435158501441, "grad_norm": 1.0920136721929539, "learning_rate": 1.78257783641543e-06, "loss": 0.48315608501434326, "step": 2198 }, { "epoch": 0.5069740634005764, "grad_norm": 1.3188291756320296, "learning_rate": 1.7823404819302853e-06, "loss": 0.643965482711792, "step": 2199 }, { "epoch": 0.5072046109510087, "grad_norm": 1.2350867324332677, "learning_rate": 1.782103013779207e-06, "loss": 0.5071272850036621, "step": 2200 }, { "epoch": 0.5074351585014409, "grad_norm": 1.2777572177668994, "learning_rate": 1.7818654319966968e-06, "loss": 0.549786388874054, "step": 2201 }, { "epoch": 0.5076657060518732, "grad_norm": 1.1703456168007746, "learning_rate": 1.781627736617273e-06, "loss": 0.520953893661499, "step": 2202 }, { "epoch": 0.5078962536023055, "grad_norm": 1.2766880262368039, "learning_rate": 1.78138992767547e-06, "loss": 0.5666658878326416, "step": 2203 }, { "epoch": 0.5081268011527378, "grad_norm": 1.0842976431840186, "learning_rate": 1.7811520052058392e-06, "loss": 0.5050726532936096, "step": 2204 }, { "epoch": 0.5083573487031701, "grad_norm": 1.1791951625976107, "learning_rate": 1.7809139692429485e-06, "loss": 0.4998525381088257, "step": 2205 }, { "epoch": 0.5085878962536023, "grad_norm": 1.1200846842686203, "learning_rate": 1.7806758198213814e-06, "loss": 0.4982251226902008, "step": 2206 }, { "epoch": 0.5088184438040346, "grad_norm": 1.1568502456227219, "learning_rate": 1.780437556975739e-06, "loss": 0.5644215941429138, "step": 2207 }, { "epoch": 0.5090489913544669, "grad_norm": 1.1426739752438277, "learning_rate": 1.7801991807406385e-06, "loss": 0.5029370784759521, "step": 2208 }, { "epoch": 0.5092795389048992, "grad_norm": 1.1040340904512378, "learning_rate": 1.779960691150713e-06, "loss": 0.49697983264923096, "step": 2209 }, { "epoch": 0.5095100864553314, "grad_norm": 1.137855829488058, "learning_rate": 1.779722088240613e-06, "loss": 0.5203051567077637, "step": 2210 }, { "epoch": 0.5097406340057636, "grad_norm": 1.2732773389651266, "learning_rate": 1.7794833720450049e-06, "loss": 0.5182983875274658, "step": 2211 }, { "epoch": 0.5099711815561959, "grad_norm": 1.6767963892591715, "learning_rate": 1.7792445425985716e-06, "loss": 0.5266735553741455, "step": 2212 }, { "epoch": 0.5102017291066282, "grad_norm": 1.5377950327647543, "learning_rate": 1.7790055999360126e-06, "loss": 0.6422331929206848, "step": 2213 }, { "epoch": 0.5104322766570605, "grad_norm": 1.2017440986990142, "learning_rate": 1.7787665440920435e-06, "loss": 0.5400121212005615, "step": 2214 }, { "epoch": 0.5106628242074928, "grad_norm": 1.2987992561000663, "learning_rate": 1.778527375101397e-06, "loss": 0.4500657021999359, "step": 2215 }, { "epoch": 0.510893371757925, "grad_norm": 1.2807387577008382, "learning_rate": 1.778288092998822e-06, "loss": 0.5762274265289307, "step": 2216 }, { "epoch": 0.5111239193083573, "grad_norm": 1.2541048946673465, "learning_rate": 1.778048697819083e-06, "loss": 0.5062060952186584, "step": 2217 }, { "epoch": 0.5113544668587896, "grad_norm": 1.158049884571781, "learning_rate": 1.7778091895969627e-06, "loss": 0.5776544809341431, "step": 2218 }, { "epoch": 0.5115850144092219, "grad_norm": 1.2665559359038843, "learning_rate": 1.7775695683672583e-06, "loss": 0.48977309465408325, "step": 2219 }, { "epoch": 0.5118155619596542, "grad_norm": 1.2212119525380398, "learning_rate": 1.7773298341647843e-06, "loss": 0.4855668544769287, "step": 2220 }, { "epoch": 0.5120461095100864, "grad_norm": 1.2324354065266638, "learning_rate": 1.777089987024372e-06, "loss": 0.5176600217819214, "step": 2221 }, { "epoch": 0.5122766570605187, "grad_norm": 1.1833774336501766, "learning_rate": 1.7768500269808687e-06, "loss": 0.4856322109699249, "step": 2222 }, { "epoch": 0.512507204610951, "grad_norm": 1.209139379315303, "learning_rate": 1.7766099540691375e-06, "loss": 0.5031648278236389, "step": 2223 }, { "epoch": 0.5127377521613833, "grad_norm": 1.2514657246641, "learning_rate": 1.7763697683240588e-06, "loss": 0.5545702576637268, "step": 2224 }, { "epoch": 0.5129682997118156, "grad_norm": 1.141568946980341, "learning_rate": 1.7761294697805295e-06, "loss": 0.5827726721763611, "step": 2225 }, { "epoch": 0.5131988472622478, "grad_norm": 1.238809848511236, "learning_rate": 1.7758890584734621e-06, "loss": 0.6185393929481506, "step": 2226 }, { "epoch": 0.5134293948126801, "grad_norm": 1.2043172717203658, "learning_rate": 1.7756485344377859e-06, "loss": 0.4593431353569031, "step": 2227 }, { "epoch": 0.5136599423631124, "grad_norm": 1.1772106526015698, "learning_rate": 1.7754078977084466e-06, "loss": 0.47052207589149475, "step": 2228 }, { "epoch": 0.5138904899135447, "grad_norm": 1.43950522799351, "learning_rate": 1.7751671483204059e-06, "loss": 0.5208712220191956, "step": 2229 }, { "epoch": 0.514121037463977, "grad_norm": 1.3760015774843395, "learning_rate": 1.7749262863086427e-06, "loss": 0.5334997177124023, "step": 2230 }, { "epoch": 0.5143515850144093, "grad_norm": 1.1787674135463977, "learning_rate": 1.7746853117081514e-06, "loss": 0.5811659097671509, "step": 2231 }, { "epoch": 0.5145821325648415, "grad_norm": 1.0770203228857202, "learning_rate": 1.7744442245539433e-06, "loss": 0.4372256398200989, "step": 2232 }, { "epoch": 0.5148126801152738, "grad_norm": 1.156862339294714, "learning_rate": 1.7742030248810456e-06, "loss": 0.5962051153182983, "step": 2233 }, { "epoch": 0.5150432276657061, "grad_norm": 1.323655401087979, "learning_rate": 1.7739617127245023e-06, "loss": 0.5646089911460876, "step": 2234 }, { "epoch": 0.5152737752161384, "grad_norm": 1.1987421954627402, "learning_rate": 1.7737202881193736e-06, "loss": 0.48204538226127625, "step": 2235 }, { "epoch": 0.5155043227665707, "grad_norm": 1.3415367488143433, "learning_rate": 1.773478751100736e-06, "loss": 0.5023562908172607, "step": 2236 }, { "epoch": 0.5157348703170029, "grad_norm": 1.3435806853729588, "learning_rate": 1.773237101703682e-06, "loss": 0.5214341878890991, "step": 2237 }, { "epoch": 0.5159654178674351, "grad_norm": 1.112054409920411, "learning_rate": 1.772995339963321e-06, "loss": 0.5498196482658386, "step": 2238 }, { "epoch": 0.5161959654178674, "grad_norm": 1.38478483246607, "learning_rate": 1.7727534659147785e-06, "loss": 0.6163268685340881, "step": 2239 }, { "epoch": 0.5164265129682997, "grad_norm": 1.0193285176800957, "learning_rate": 1.7725114795931962e-06, "loss": 0.4866952896118164, "step": 2240 }, { "epoch": 0.516657060518732, "grad_norm": 1.0610043743082396, "learning_rate": 1.7722693810337322e-06, "loss": 0.5931833982467651, "step": 2241 }, { "epoch": 0.5168876080691642, "grad_norm": 1.0620090506254725, "learning_rate": 1.7720271702715605e-06, "loss": 0.5204564332962036, "step": 2242 }, { "epoch": 0.5171181556195965, "grad_norm": 1.1822334561053938, "learning_rate": 1.7717848473418726e-06, "loss": 0.5478333234786987, "step": 2243 }, { "epoch": 0.5173487031700288, "grad_norm": 1.0737770188215454, "learning_rate": 1.771542412279875e-06, "loss": 0.47076964378356934, "step": 2244 }, { "epoch": 0.5175792507204611, "grad_norm": 1.389729566368652, "learning_rate": 1.771299865120791e-06, "loss": 0.45618507266044617, "step": 2245 }, { "epoch": 0.5178097982708934, "grad_norm": 1.2313171508645646, "learning_rate": 1.7710572058998604e-06, "loss": 0.5953116416931152, "step": 2246 }, { "epoch": 0.5180403458213256, "grad_norm": 1.237761845718166, "learning_rate": 1.7708144346523391e-06, "loss": 0.5020469427108765, "step": 2247 }, { "epoch": 0.5182708933717579, "grad_norm": 1.2190445885386132, "learning_rate": 1.7705715514134987e-06, "loss": 0.5417760610580444, "step": 2248 }, { "epoch": 0.5185014409221902, "grad_norm": 1.2065438974520686, "learning_rate": 1.7703285562186282e-06, "loss": 0.5705471038818359, "step": 2249 }, { "epoch": 0.5187319884726225, "grad_norm": 1.2710991899081208, "learning_rate": 1.7700854491030319e-06, "loss": 0.5350116491317749, "step": 2250 }, { "epoch": 0.5189625360230548, "grad_norm": 1.250653785454124, "learning_rate": 1.7698422301020311e-06, "loss": 0.5022974610328674, "step": 2251 }, { "epoch": 0.519193083573487, "grad_norm": 1.1860519947436583, "learning_rate": 1.7695988992509624e-06, "loss": 0.6222025156021118, "step": 2252 }, { "epoch": 0.5194236311239193, "grad_norm": 1.3651382210724667, "learning_rate": 1.76935545658518e-06, "loss": 0.5001484751701355, "step": 2253 }, { "epoch": 0.5196541786743516, "grad_norm": 1.485991529924687, "learning_rate": 1.7691119021400532e-06, "loss": 0.5362265706062317, "step": 2254 }, { "epoch": 0.5198847262247839, "grad_norm": 1.0098110160856955, "learning_rate": 1.7688682359509677e-06, "loss": 0.5197827816009521, "step": 2255 }, { "epoch": 0.5201152737752162, "grad_norm": 1.1023930981612833, "learning_rate": 1.7686244580533261e-06, "loss": 0.6146107912063599, "step": 2256 }, { "epoch": 0.5203458213256484, "grad_norm": 1.235820532389256, "learning_rate": 1.7683805684825463e-06, "loss": 0.5412692427635193, "step": 2257 }, { "epoch": 0.5205763688760807, "grad_norm": 1.1120937693778494, "learning_rate": 1.7681365672740633e-06, "loss": 0.5003819465637207, "step": 2258 }, { "epoch": 0.520806916426513, "grad_norm": 1.1234189645725328, "learning_rate": 1.767892454463328e-06, "loss": 0.5292670726776123, "step": 2259 }, { "epoch": 0.5210374639769453, "grad_norm": 1.103744899542332, "learning_rate": 1.7676482300858073e-06, "loss": 0.4764510989189148, "step": 2260 }, { "epoch": 0.5212680115273776, "grad_norm": 1.1164612132421818, "learning_rate": 1.7674038941769845e-06, "loss": 0.5774421691894531, "step": 2261 }, { "epoch": 0.5214985590778098, "grad_norm": 1.229036573526321, "learning_rate": 1.767159446772359e-06, "loss": 0.46915918588638306, "step": 2262 }, { "epoch": 0.5217291066282421, "grad_norm": 1.3167447922417834, "learning_rate": 1.766914887907446e-06, "loss": 0.4545619487762451, "step": 2263 }, { "epoch": 0.5219596541786744, "grad_norm": 1.2121919468262714, "learning_rate": 1.7666702176177784e-06, "loss": 0.5084496736526489, "step": 2264 }, { "epoch": 0.5221902017291066, "grad_norm": 1.3395786479792613, "learning_rate": 1.7664254359389034e-06, "loss": 0.524928092956543, "step": 2265 }, { "epoch": 0.5224207492795389, "grad_norm": 1.4477722066137377, "learning_rate": 1.7661805429063859e-06, "loss": 0.5276768207550049, "step": 2266 }, { "epoch": 0.5226512968299711, "grad_norm": 1.0912538150871292, "learning_rate": 1.7659355385558055e-06, "loss": 0.4555545747280121, "step": 2267 }, { "epoch": 0.5228818443804034, "grad_norm": 1.2237495534956078, "learning_rate": 1.7656904229227597e-06, "loss": 0.47479552030563354, "step": 2268 }, { "epoch": 0.5231123919308357, "grad_norm": 1.252326361415701, "learning_rate": 1.7654451960428602e-06, "loss": 0.520822286605835, "step": 2269 }, { "epoch": 0.523342939481268, "grad_norm": 1.2429338112694408, "learning_rate": 1.7651998579517367e-06, "loss": 0.5228800177574158, "step": 2270 }, { "epoch": 0.5235734870317003, "grad_norm": 1.1076970934273507, "learning_rate": 1.7649544086850341e-06, "loss": 0.4785909652709961, "step": 2271 }, { "epoch": 0.5238040345821325, "grad_norm": 1.3595525407205307, "learning_rate": 1.7647088482784135e-06, "loss": 0.49906525015830994, "step": 2272 }, { "epoch": 0.5240345821325648, "grad_norm": 1.2162675408338783, "learning_rate": 1.7644631767675527e-06, "loss": 0.44544023275375366, "step": 2273 }, { "epoch": 0.5242651296829971, "grad_norm": 1.1742658425403796, "learning_rate": 1.7642173941881444e-06, "loss": 0.4601054787635803, "step": 2274 }, { "epoch": 0.5244956772334294, "grad_norm": 1.5239044281877645, "learning_rate": 1.763971500575899e-06, "loss": 0.45652949810028076, "step": 2275 }, { "epoch": 0.5247262247838617, "grad_norm": 1.1081315795212425, "learning_rate": 1.7637254959665419e-06, "loss": 0.4013681411743164, "step": 2276 }, { "epoch": 0.5249567723342939, "grad_norm": 1.164103173902687, "learning_rate": 1.763479380395815e-06, "loss": 0.5990947484970093, "step": 2277 }, { "epoch": 0.5251873198847262, "grad_norm": 1.1449594131860659, "learning_rate": 1.7632331538994768e-06, "loss": 0.5234252214431763, "step": 2278 }, { "epoch": 0.5254178674351585, "grad_norm": 1.0795592791106945, "learning_rate": 1.7629868165133006e-06, "loss": 0.39725440740585327, "step": 2279 }, { "epoch": 0.5256484149855908, "grad_norm": 1.16263901682717, "learning_rate": 1.7627403682730777e-06, "loss": 0.5516578555107117, "step": 2280 }, { "epoch": 0.5258789625360231, "grad_norm": 1.1085422332556214, "learning_rate": 1.7624938092146135e-06, "loss": 0.526121199131012, "step": 2281 }, { "epoch": 0.5261095100864553, "grad_norm": 1.120052507754705, "learning_rate": 1.762247139373731e-06, "loss": 0.4965516924858093, "step": 2282 }, { "epoch": 0.5263400576368876, "grad_norm": 1.1307486794153496, "learning_rate": 1.7620003587862686e-06, "loss": 0.42001771926879883, "step": 2283 }, { "epoch": 0.5265706051873199, "grad_norm": 1.5691877841787754, "learning_rate": 1.7617534674880812e-06, "loss": 0.5619962215423584, "step": 2284 }, { "epoch": 0.5268011527377522, "grad_norm": 1.2189639043372158, "learning_rate": 1.761506465515039e-06, "loss": 0.40275585651397705, "step": 2285 }, { "epoch": 0.5270317002881845, "grad_norm": 1.247569513691176, "learning_rate": 1.7612593529030298e-06, "loss": 0.4621019959449768, "step": 2286 }, { "epoch": 0.5272622478386167, "grad_norm": 1.1749490638414628, "learning_rate": 1.7610121296879553e-06, "loss": 0.4977012276649475, "step": 2287 }, { "epoch": 0.527492795389049, "grad_norm": 1.1555374356489423, "learning_rate": 1.7607647959057351e-06, "loss": 0.524575412273407, "step": 2288 }, { "epoch": 0.5277233429394813, "grad_norm": 1.4739921891246035, "learning_rate": 1.7605173515923042e-06, "loss": 0.49018362164497375, "step": 2289 }, { "epoch": 0.5279538904899136, "grad_norm": 1.1265063866344376, "learning_rate": 1.7602697967836135e-06, "loss": 0.5055255889892578, "step": 2290 }, { "epoch": 0.5281844380403459, "grad_norm": 1.3537014805587113, "learning_rate": 1.7600221315156305e-06, "loss": 0.48760661482810974, "step": 2291 }, { "epoch": 0.5284149855907782, "grad_norm": 1.1150714705179763, "learning_rate": 1.7597743558243379e-06, "loss": 0.45736271142959595, "step": 2292 }, { "epoch": 0.5286455331412103, "grad_norm": 1.1364237403755952, "learning_rate": 1.7595264697457353e-06, "loss": 0.5347146391868591, "step": 2293 }, { "epoch": 0.5288760806916426, "grad_norm": 0.9862978081391702, "learning_rate": 1.7592784733158376e-06, "loss": 0.464704692363739, "step": 2294 }, { "epoch": 0.5291066282420749, "grad_norm": 1.2267665080220351, "learning_rate": 1.7590303665706766e-06, "loss": 0.5551707744598389, "step": 2295 }, { "epoch": 0.5293371757925072, "grad_norm": 1.1513699485458475, "learning_rate": 1.7587821495462993e-06, "loss": 0.5731069445610046, "step": 2296 }, { "epoch": 0.5295677233429394, "grad_norm": 1.092210739341485, "learning_rate": 1.7585338222787694e-06, "loss": 0.5296573638916016, "step": 2297 }, { "epoch": 0.5297982708933717, "grad_norm": 1.7893810554454528, "learning_rate": 1.7582853848041658e-06, "loss": 0.5088247060775757, "step": 2298 }, { "epoch": 0.530028818443804, "grad_norm": 1.3042429607129178, "learning_rate": 1.7580368371585839e-06, "loss": 0.49944454431533813, "step": 2299 }, { "epoch": 0.5302593659942363, "grad_norm": 1.1556706324591968, "learning_rate": 1.7577881793781355e-06, "loss": 0.527984619140625, "step": 2300 }, { "epoch": 0.5304899135446686, "grad_norm": 1.2825366591563032, "learning_rate": 1.7575394114989475e-06, "loss": 0.5389485955238342, "step": 2301 }, { "epoch": 0.5307204610951008, "grad_norm": 1.1681706665417333, "learning_rate": 1.7572905335571634e-06, "loss": 0.531416654586792, "step": 2302 }, { "epoch": 0.5309510086455331, "grad_norm": 0.9682807419290672, "learning_rate": 1.7570415455889434e-06, "loss": 0.5177662372589111, "step": 2303 }, { "epoch": 0.5311815561959654, "grad_norm": 1.243285510489316, "learning_rate": 1.7567924476304613e-06, "loss": 0.5060547590255737, "step": 2304 }, { "epoch": 0.5314121037463977, "grad_norm": 1.1616547133634478, "learning_rate": 1.75654323971791e-06, "loss": 0.5338248014450073, "step": 2305 }, { "epoch": 0.53164265129683, "grad_norm": 1.515823497153389, "learning_rate": 1.756293921887496e-06, "loss": 0.5937076210975647, "step": 2306 }, { "epoch": 0.5318731988472623, "grad_norm": 1.2866160645526843, "learning_rate": 1.7560444941754424e-06, "loss": 0.52833092212677, "step": 2307 }, { "epoch": 0.5321037463976945, "grad_norm": 1.1769387885340463, "learning_rate": 1.755794956617989e-06, "loss": 0.4994063973426819, "step": 2308 }, { "epoch": 0.5323342939481268, "grad_norm": 1.5436647070263716, "learning_rate": 1.7555453092513908e-06, "loss": 0.5200421214103699, "step": 2309 }, { "epoch": 0.5325648414985591, "grad_norm": 1.1745455381351673, "learning_rate": 1.755295552111919e-06, "loss": 0.5218993425369263, "step": 2310 }, { "epoch": 0.5327953890489914, "grad_norm": 1.1504625770334531, "learning_rate": 1.7550456852358603e-06, "loss": 0.4781727194786072, "step": 2311 }, { "epoch": 0.5330259365994237, "grad_norm": 1.3403464072628806, "learning_rate": 1.7547957086595187e-06, "loss": 0.5551970601081848, "step": 2312 }, { "epoch": 0.5332564841498559, "grad_norm": 1.297723482722601, "learning_rate": 1.7545456224192123e-06, "loss": 0.5485920906066895, "step": 2313 }, { "epoch": 0.5334870317002882, "grad_norm": 1.1296888028982712, "learning_rate": 1.7542954265512764e-06, "loss": 0.4536609649658203, "step": 2314 }, { "epoch": 0.5337175792507205, "grad_norm": 1.0439097273883267, "learning_rate": 1.7540451210920616e-06, "loss": 0.5044888257980347, "step": 2315 }, { "epoch": 0.5339481268011528, "grad_norm": 1.3932004795894137, "learning_rate": 1.753794706077935e-06, "loss": 0.5383297801017761, "step": 2316 }, { "epoch": 0.5341786743515851, "grad_norm": 1.196782948603355, "learning_rate": 1.7535441815452792e-06, "loss": 0.5694228410720825, "step": 2317 }, { "epoch": 0.5344092219020173, "grad_norm": 0.9575339089400295, "learning_rate": 1.753293547530493e-06, "loss": 0.49534568190574646, "step": 2318 }, { "epoch": 0.5346397694524496, "grad_norm": 1.2393768694985452, "learning_rate": 1.7530428040699904e-06, "loss": 0.5388910174369812, "step": 2319 }, { "epoch": 0.5348703170028818, "grad_norm": 1.1881701575449917, "learning_rate": 1.7527919512002023e-06, "loss": 0.4393211603164673, "step": 2320 }, { "epoch": 0.5351008645533141, "grad_norm": 1.3054845336595484, "learning_rate": 1.7525409889575745e-06, "loss": 0.6011124849319458, "step": 2321 }, { "epoch": 0.5353314121037464, "grad_norm": 1.274741444961471, "learning_rate": 1.7522899173785696e-06, "loss": 0.46487540006637573, "step": 2322 }, { "epoch": 0.5355619596541786, "grad_norm": 1.1421177967152112, "learning_rate": 1.752038736499666e-06, "loss": 0.4753795564174652, "step": 2323 }, { "epoch": 0.5357925072046109, "grad_norm": 1.2683556519471963, "learning_rate": 1.7517874463573572e-06, "loss": 0.5985465049743652, "step": 2324 }, { "epoch": 0.5360230547550432, "grad_norm": 1.056507312381586, "learning_rate": 1.751536046988153e-06, "loss": 0.4543229043483734, "step": 2325 }, { "epoch": 0.5362536023054755, "grad_norm": 1.2118204577653935, "learning_rate": 1.7512845384285793e-06, "loss": 0.6149678230285645, "step": 2326 }, { "epoch": 0.5364841498559078, "grad_norm": 1.0689041198632254, "learning_rate": 1.7510329207151775e-06, "loss": 0.5103700160980225, "step": 2327 }, { "epoch": 0.53671469740634, "grad_norm": 1.1235720243428842, "learning_rate": 1.7507811938845052e-06, "loss": 0.4708825945854187, "step": 2328 }, { "epoch": 0.5369452449567723, "grad_norm": 0.975325384779532, "learning_rate": 1.7505293579731357e-06, "loss": 0.5039220452308655, "step": 2329 }, { "epoch": 0.5371757925072046, "grad_norm": 1.1841254181875327, "learning_rate": 1.7502774130176582e-06, "loss": 0.5144309997558594, "step": 2330 }, { "epoch": 0.5374063400576369, "grad_norm": 1.3039622953440149, "learning_rate": 1.7500253590546774e-06, "loss": 0.41529229283332825, "step": 2331 }, { "epoch": 0.5376368876080692, "grad_norm": 1.165707977395041, "learning_rate": 1.7497731961208144e-06, "loss": 0.45477360486984253, "step": 2332 }, { "epoch": 0.5378674351585014, "grad_norm": 1.2847866373142411, "learning_rate": 1.7495209242527057e-06, "loss": 0.5498300790786743, "step": 2333 }, { "epoch": 0.5380979827089337, "grad_norm": 1.0807005264316651, "learning_rate": 1.7492685434870036e-06, "loss": 0.4764532446861267, "step": 2334 }, { "epoch": 0.538328530259366, "grad_norm": 1.2969878344335828, "learning_rate": 1.7490160538603771e-06, "loss": 0.4899054765701294, "step": 2335 }, { "epoch": 0.5385590778097983, "grad_norm": 1.2821827405222348, "learning_rate": 1.7487634554095095e-06, "loss": 0.5095956921577454, "step": 2336 }, { "epoch": 0.5387896253602306, "grad_norm": 1.212075947407855, "learning_rate": 1.748510748171101e-06, "loss": 0.5495618581771851, "step": 2337 }, { "epoch": 0.5390201729106628, "grad_norm": 1.0328701753125462, "learning_rate": 1.7482579321818676e-06, "loss": 0.4582991898059845, "step": 2338 }, { "epoch": 0.5392507204610951, "grad_norm": 1.3477998717170885, "learning_rate": 1.7480050074785405e-06, "loss": 0.47231215238571167, "step": 2339 }, { "epoch": 0.5394812680115274, "grad_norm": 1.3032973137322832, "learning_rate": 1.7477519740978673e-06, "loss": 0.4859996736049652, "step": 2340 }, { "epoch": 0.5397118155619597, "grad_norm": 1.293149430738758, "learning_rate": 1.747498832076611e-06, "loss": 0.5070324540138245, "step": 2341 }, { "epoch": 0.539942363112392, "grad_norm": 1.2383400794741863, "learning_rate": 1.7472455814515501e-06, "loss": 0.5261486768722534, "step": 2342 }, { "epoch": 0.5401729106628242, "grad_norm": 1.3106600806025066, "learning_rate": 1.74699222225948e-06, "loss": 0.5616657733917236, "step": 2343 }, { "epoch": 0.5404034582132565, "grad_norm": 1.154027686559668, "learning_rate": 1.7467387545372104e-06, "loss": 0.5504746437072754, "step": 2344 }, { "epoch": 0.5406340057636888, "grad_norm": 1.1026580407179722, "learning_rate": 1.7464851783215684e-06, "loss": 0.5194835662841797, "step": 2345 }, { "epoch": 0.5408645533141211, "grad_norm": 1.3293115562160591, "learning_rate": 1.7462314936493953e-06, "loss": 0.522796094417572, "step": 2346 }, { "epoch": 0.5410951008645534, "grad_norm": 1.3369471744938577, "learning_rate": 1.7459777005575492e-06, "loss": 0.4633204936981201, "step": 2347 }, { "epoch": 0.5413256484149855, "grad_norm": 1.2486516319766208, "learning_rate": 1.7457237990829033e-06, "loss": 0.4521179795265198, "step": 2348 }, { "epoch": 0.5415561959654178, "grad_norm": 1.6780170315313812, "learning_rate": 1.7454697892623471e-06, "loss": 0.678740918636322, "step": 2349 }, { "epoch": 0.5417867435158501, "grad_norm": 1.2192388332823851, "learning_rate": 1.7452156711327854e-06, "loss": 0.4683062732219696, "step": 2350 }, { "epoch": 0.5420172910662824, "grad_norm": 0.9552718931480573, "learning_rate": 1.7449614447311393e-06, "loss": 0.43280357122421265, "step": 2351 }, { "epoch": 0.5422478386167147, "grad_norm": 1.1384909744897507, "learning_rate": 1.7447071100943446e-06, "loss": 0.4849068522453308, "step": 2352 }, { "epoch": 0.5424783861671469, "grad_norm": 1.2382586073876527, "learning_rate": 1.744452667259354e-06, "loss": 0.5404252409934998, "step": 2353 }, { "epoch": 0.5427089337175792, "grad_norm": 1.14218337171458, "learning_rate": 1.7441981162631353e-06, "loss": 0.41409242153167725, "step": 2354 }, { "epoch": 0.5429394812680115, "grad_norm": 1.2694326083074758, "learning_rate": 1.7439434571426718e-06, "loss": 0.549047589302063, "step": 2355 }, { "epoch": 0.5431700288184438, "grad_norm": 1.7272228580432107, "learning_rate": 1.7436886899349635e-06, "loss": 0.6400755643844604, "step": 2356 }, { "epoch": 0.5434005763688761, "grad_norm": 1.2277192228486493, "learning_rate": 1.7434338146770247e-06, "loss": 0.46230536699295044, "step": 2357 }, { "epoch": 0.5436311239193083, "grad_norm": 1.3530870479148573, "learning_rate": 1.7431788314058862e-06, "loss": 0.4629567861557007, "step": 2358 }, { "epoch": 0.5438616714697406, "grad_norm": 1.0772570900190532, "learning_rate": 1.742923740158595e-06, "loss": 0.4405868649482727, "step": 2359 }, { "epoch": 0.5440922190201729, "grad_norm": 1.2150946309984234, "learning_rate": 1.7426685409722128e-06, "loss": 0.4655589163303375, "step": 2360 }, { "epoch": 0.5443227665706052, "grad_norm": 1.2675240389340952, "learning_rate": 1.7424132338838171e-06, "loss": 0.4461101293563843, "step": 2361 }, { "epoch": 0.5445533141210375, "grad_norm": 1.5854266917259596, "learning_rate": 1.7421578189305022e-06, "loss": 0.4191123843193054, "step": 2362 }, { "epoch": 0.5447838616714697, "grad_norm": 1.6758559799234898, "learning_rate": 1.741902296149376e-06, "loss": 0.5072780251502991, "step": 2363 }, { "epoch": 0.545014409221902, "grad_norm": 1.2601130548302264, "learning_rate": 1.7416466655775648e-06, "loss": 0.5171830654144287, "step": 2364 }, { "epoch": 0.5452449567723343, "grad_norm": 1.2783697742533648, "learning_rate": 1.7413909272522079e-06, "loss": 0.5393522381782532, "step": 2365 }, { "epoch": 0.5454755043227666, "grad_norm": 1.5053510484478103, "learning_rate": 1.7411350812104617e-06, "loss": 0.5618822574615479, "step": 2366 }, { "epoch": 0.5457060518731989, "grad_norm": 1.1848495900457634, "learning_rate": 1.7408791274894986e-06, "loss": 0.5396535992622375, "step": 2367 }, { "epoch": 0.5459365994236312, "grad_norm": 1.5782103314859273, "learning_rate": 1.740623066126505e-06, "loss": 0.6367689967155457, "step": 2368 }, { "epoch": 0.5461671469740634, "grad_norm": 1.0855916632567553, "learning_rate": 1.7403668971586844e-06, "loss": 0.47266045212745667, "step": 2369 }, { "epoch": 0.5463976945244957, "grad_norm": 1.2728774961386682, "learning_rate": 1.7401106206232558e-06, "loss": 0.5436207056045532, "step": 2370 }, { "epoch": 0.546628242074928, "grad_norm": 1.1570761720253744, "learning_rate": 1.7398542365574532e-06, "loss": 0.47154513001441956, "step": 2371 }, { "epoch": 0.5468587896253603, "grad_norm": 1.3437856079692005, "learning_rate": 1.7395977449985264e-06, "loss": 0.5550810098648071, "step": 2372 }, { "epoch": 0.5470893371757926, "grad_norm": 1.4928115947596696, "learning_rate": 1.7393411459837414e-06, "loss": 0.5480188131332397, "step": 2373 }, { "epoch": 0.5473198847262248, "grad_norm": 1.1868713323946698, "learning_rate": 1.7390844395503787e-06, "loss": 0.5999584197998047, "step": 2374 }, { "epoch": 0.547550432276657, "grad_norm": 1.2883633347308001, "learning_rate": 1.7388276257357357e-06, "loss": 0.5834146738052368, "step": 2375 }, { "epoch": 0.5477809798270893, "grad_norm": 1.2072211348002257, "learning_rate": 1.7385707045771248e-06, "loss": 0.5123052597045898, "step": 2376 }, { "epoch": 0.5480115273775216, "grad_norm": 1.2449367994236626, "learning_rate": 1.7383136761118734e-06, "loss": 0.5500702857971191, "step": 2377 }, { "epoch": 0.5482420749279538, "grad_norm": 1.2112346356392183, "learning_rate": 1.7380565403773255e-06, "loss": 0.4849172830581665, "step": 2378 }, { "epoch": 0.5484726224783861, "grad_norm": 1.1806595721818476, "learning_rate": 1.7377992974108402e-06, "loss": 0.5452470779418945, "step": 2379 }, { "epoch": 0.5487031700288184, "grad_norm": 1.093817020961923, "learning_rate": 1.737541947249792e-06, "loss": 0.4986187219619751, "step": 2380 }, { "epoch": 0.5489337175792507, "grad_norm": 1.5132521321219672, "learning_rate": 1.7372844899315715e-06, "loss": 0.5750565528869629, "step": 2381 }, { "epoch": 0.549164265129683, "grad_norm": 1.5433696305301123, "learning_rate": 1.7370269254935843e-06, "loss": 0.45188283920288086, "step": 2382 }, { "epoch": 0.5493948126801153, "grad_norm": 1.3782935189362007, "learning_rate": 1.7367692539732519e-06, "loss": 0.4593166708946228, "step": 2383 }, { "epoch": 0.5496253602305475, "grad_norm": 1.0865107387475765, "learning_rate": 1.7365114754080118e-06, "loss": 0.4712238013744354, "step": 2384 }, { "epoch": 0.5498559077809798, "grad_norm": 1.2693708537220825, "learning_rate": 1.7362535898353156e-06, "loss": 0.5285966396331787, "step": 2385 }, { "epoch": 0.5500864553314121, "grad_norm": 1.0580449420809215, "learning_rate": 1.735995597292632e-06, "loss": 0.5930126905441284, "step": 2386 }, { "epoch": 0.5503170028818444, "grad_norm": 1.3211740236059126, "learning_rate": 1.7357374978174447e-06, "loss": 0.48222798109054565, "step": 2387 }, { "epoch": 0.5505475504322767, "grad_norm": 1.0661809221331433, "learning_rate": 1.7354792914472528e-06, "loss": 0.463559627532959, "step": 2388 }, { "epoch": 0.5507780979827089, "grad_norm": 1.2928471066929086, "learning_rate": 1.7352209782195706e-06, "loss": 0.5738880038261414, "step": 2389 }, { "epoch": 0.5510086455331412, "grad_norm": 1.168260381089932, "learning_rate": 1.7349625581719285e-06, "loss": 0.4638671278953552, "step": 2390 }, { "epoch": 0.5512391930835735, "grad_norm": 1.3236614245482354, "learning_rate": 1.7347040313418729e-06, "loss": 0.5681020617485046, "step": 2391 }, { "epoch": 0.5514697406340058, "grad_norm": 1.1347196291887822, "learning_rate": 1.7344453977669639e-06, "loss": 0.49352413415908813, "step": 2392 }, { "epoch": 0.5517002881844381, "grad_norm": 1.1909845997462791, "learning_rate": 1.7341866574847792e-06, "loss": 0.4587385058403015, "step": 2393 }, { "epoch": 0.5519308357348703, "grad_norm": 1.2519226107096215, "learning_rate": 1.733927810532911e-06, "loss": 0.5855783224105835, "step": 2394 }, { "epoch": 0.5521613832853026, "grad_norm": 1.3059494948390526, "learning_rate": 1.7336688569489663e-06, "loss": 0.5600197315216064, "step": 2395 }, { "epoch": 0.5523919308357349, "grad_norm": 1.1744558919405392, "learning_rate": 1.7334097967705691e-06, "loss": 0.5392236709594727, "step": 2396 }, { "epoch": 0.5526224783861672, "grad_norm": 1.4517486170338612, "learning_rate": 1.7331506300353582e-06, "loss": 0.4800894260406494, "step": 2397 }, { "epoch": 0.5528530259365995, "grad_norm": 1.178372280796007, "learning_rate": 1.7328913567809874e-06, "loss": 0.4818227291107178, "step": 2398 }, { "epoch": 0.5530835734870317, "grad_norm": 1.7548710225897248, "learning_rate": 1.7326319770451263e-06, "loss": 0.513884425163269, "step": 2399 }, { "epoch": 0.553314121037464, "grad_norm": 1.126782436647883, "learning_rate": 1.7323724908654607e-06, "loss": 0.4948759078979492, "step": 2400 }, { "epoch": 0.5535446685878963, "grad_norm": 1.0554552263799144, "learning_rate": 1.7321128982796908e-06, "loss": 0.492409884929657, "step": 2401 }, { "epoch": 0.5537752161383286, "grad_norm": 1.2491555996131936, "learning_rate": 1.7318531993255328e-06, "loss": 0.5249980688095093, "step": 2402 }, { "epoch": 0.5540057636887608, "grad_norm": 1.1270035201738609, "learning_rate": 1.7315933940407184e-06, "loss": 0.5396232604980469, "step": 2403 }, { "epoch": 0.554236311239193, "grad_norm": 0.9723547393116351, "learning_rate": 1.731333482462994e-06, "loss": 0.5420812368392944, "step": 2404 }, { "epoch": 0.5544668587896253, "grad_norm": 1.2855153094106506, "learning_rate": 1.731073464630123e-06, "loss": 0.5918469429016113, "step": 2405 }, { "epoch": 0.5546974063400576, "grad_norm": 1.3464941159444543, "learning_rate": 1.7308133405798825e-06, "loss": 0.5120134353637695, "step": 2406 }, { "epoch": 0.5549279538904899, "grad_norm": 1.0375398976716819, "learning_rate": 1.7305531103500664e-06, "loss": 0.5390212535858154, "step": 2407 }, { "epoch": 0.5551585014409222, "grad_norm": 1.316770240861726, "learning_rate": 1.7302927739784828e-06, "loss": 0.524927020072937, "step": 2408 }, { "epoch": 0.5553890489913544, "grad_norm": 1.265948132616221, "learning_rate": 1.7300323315029563e-06, "loss": 0.5254822969436646, "step": 2409 }, { "epoch": 0.5556195965417867, "grad_norm": 1.2631241917657168, "learning_rate": 1.7297717829613268e-06, "loss": 0.4715406000614166, "step": 2410 }, { "epoch": 0.555850144092219, "grad_norm": 1.1354586776799696, "learning_rate": 1.7295111283914485e-06, "loss": 0.5462017059326172, "step": 2411 }, { "epoch": 0.5560806916426513, "grad_norm": 1.3087280521714544, "learning_rate": 1.7292503678311923e-06, "loss": 0.5210998058319092, "step": 2412 }, { "epoch": 0.5563112391930836, "grad_norm": 1.3244585954422035, "learning_rate": 1.728989501318444e-06, "loss": 0.6012462377548218, "step": 2413 }, { "epoch": 0.5565417867435158, "grad_norm": 1.2235365774596485, "learning_rate": 1.7287285288911045e-06, "loss": 0.5233356952667236, "step": 2414 }, { "epoch": 0.5567723342939481, "grad_norm": 1.179341457042005, "learning_rate": 1.7284674505870907e-06, "loss": 0.5262124538421631, "step": 2415 }, { "epoch": 0.5570028818443804, "grad_norm": 1.200909446705809, "learning_rate": 1.728206266444334e-06, "loss": 0.5770957469940186, "step": 2416 }, { "epoch": 0.5572334293948127, "grad_norm": 1.6431811554350846, "learning_rate": 1.7279449765007827e-06, "loss": 0.4863049387931824, "step": 2417 }, { "epoch": 0.557463976945245, "grad_norm": 1.0434342032855735, "learning_rate": 1.727683580794399e-06, "loss": 0.4981330633163452, "step": 2418 }, { "epoch": 0.5576945244956772, "grad_norm": 1.5074776608800005, "learning_rate": 1.7274220793631606e-06, "loss": 0.4425088167190552, "step": 2419 }, { "epoch": 0.5579250720461095, "grad_norm": 1.1366341564315194, "learning_rate": 1.7271604722450616e-06, "loss": 0.44763046503067017, "step": 2420 }, { "epoch": 0.5581556195965418, "grad_norm": 1.2585978898519974, "learning_rate": 1.7268987594781103e-06, "loss": 0.561431348323822, "step": 2421 }, { "epoch": 0.5583861671469741, "grad_norm": 1.3718566333183382, "learning_rate": 1.7266369411003312e-06, "loss": 0.43858832120895386, "step": 2422 }, { "epoch": 0.5586167146974064, "grad_norm": 1.2234114143834152, "learning_rate": 1.7263750171497633e-06, "loss": 0.46294379234313965, "step": 2423 }, { "epoch": 0.5588472622478386, "grad_norm": 1.2068012948681028, "learning_rate": 1.7261129876644623e-06, "loss": 0.43397650122642517, "step": 2424 }, { "epoch": 0.5590778097982709, "grad_norm": 1.1704200234778626, "learning_rate": 1.7258508526824973e-06, "loss": 0.5309501886367798, "step": 2425 }, { "epoch": 0.5593083573487032, "grad_norm": 1.0071111236432664, "learning_rate": 1.7255886122419544e-06, "loss": 0.505167543888092, "step": 2426 }, { "epoch": 0.5595389048991355, "grad_norm": 1.2356906230131073, "learning_rate": 1.725326266380934e-06, "loss": 0.5945361256599426, "step": 2427 }, { "epoch": 0.5597694524495678, "grad_norm": 1.1822616583201517, "learning_rate": 1.725063815137553e-06, "loss": 0.4658198952674866, "step": 2428 }, { "epoch": 0.56, "grad_norm": 0.9726608754536225, "learning_rate": 1.7248012585499422e-06, "loss": 0.4013107419013977, "step": 2429 }, { "epoch": 0.5602305475504322, "grad_norm": 1.2094159491990593, "learning_rate": 1.7245385966562485e-06, "loss": 0.47318965196609497, "step": 2430 }, { "epoch": 0.5604610951008645, "grad_norm": 1.0448285007897542, "learning_rate": 1.7242758294946338e-06, "loss": 0.5096567273139954, "step": 2431 }, { "epoch": 0.5606916426512968, "grad_norm": 1.3183204430020672, "learning_rate": 1.7240129571032758e-06, "loss": 0.6132520437240601, "step": 2432 }, { "epoch": 0.5609221902017291, "grad_norm": 1.20612372705674, "learning_rate": 1.7237499795203665e-06, "loss": 0.5368523597717285, "step": 2433 }, { "epoch": 0.5611527377521613, "grad_norm": 1.156268037467991, "learning_rate": 1.7234868967841143e-06, "loss": 0.48554062843322754, "step": 2434 }, { "epoch": 0.5613832853025936, "grad_norm": 1.0256198870418538, "learning_rate": 1.7232237089327426e-06, "loss": 0.5702558755874634, "step": 2435 }, { "epoch": 0.5616138328530259, "grad_norm": 1.3174974585828079, "learning_rate": 1.7229604160044893e-06, "loss": 0.5398519039154053, "step": 2436 }, { "epoch": 0.5618443804034582, "grad_norm": 1.1225841092153466, "learning_rate": 1.7226970180376083e-06, "loss": 0.502672553062439, "step": 2437 }, { "epoch": 0.5620749279538905, "grad_norm": 1.3348084140762158, "learning_rate": 1.7224335150703691e-06, "loss": 0.5490902662277222, "step": 2438 }, { "epoch": 0.5623054755043227, "grad_norm": 1.2252956936987631, "learning_rate": 1.722169907141055e-06, "loss": 0.5142146348953247, "step": 2439 }, { "epoch": 0.562536023054755, "grad_norm": 1.1994402774290347, "learning_rate": 1.721906194287966e-06, "loss": 0.4676271677017212, "step": 2440 }, { "epoch": 0.5627665706051873, "grad_norm": 1.2483659723793379, "learning_rate": 1.721642376549417e-06, "loss": 0.4180489182472229, "step": 2441 }, { "epoch": 0.5629971181556196, "grad_norm": 1.2264187858798972, "learning_rate": 1.7213784539637378e-06, "loss": 0.45822733640670776, "step": 2442 }, { "epoch": 0.5632276657060519, "grad_norm": 0.9474625129952845, "learning_rate": 1.7211144265692736e-06, "loss": 0.46978574991226196, "step": 2443 }, { "epoch": 0.5634582132564842, "grad_norm": 1.2092008884546377, "learning_rate": 1.7208502944043846e-06, "loss": 0.5099056363105774, "step": 2444 }, { "epoch": 0.5636887608069164, "grad_norm": 1.4043049353060089, "learning_rate": 1.7205860575074467e-06, "loss": 0.5157277584075928, "step": 2445 }, { "epoch": 0.5639193083573487, "grad_norm": 1.4140179513751376, "learning_rate": 1.7203217159168509e-06, "loss": 0.5684963464736938, "step": 2446 }, { "epoch": 0.564149855907781, "grad_norm": 1.395481014428841, "learning_rate": 1.7200572696710031e-06, "loss": 0.5450068712234497, "step": 2447 }, { "epoch": 0.5643804034582133, "grad_norm": 1.219230832097251, "learning_rate": 1.7197927188083247e-06, "loss": 0.487520694732666, "step": 2448 }, { "epoch": 0.5646109510086456, "grad_norm": 1.0252448838780284, "learning_rate": 1.719528063367252e-06, "loss": 0.473537802696228, "step": 2449 }, { "epoch": 0.5648414985590778, "grad_norm": 1.3599987921866794, "learning_rate": 1.7192633033862366e-06, "loss": 0.5973968505859375, "step": 2450 }, { "epoch": 0.5650720461095101, "grad_norm": 1.192988798166061, "learning_rate": 1.7189984389037463e-06, "loss": 0.494625985622406, "step": 2451 }, { "epoch": 0.5653025936599424, "grad_norm": 1.0679499637949388, "learning_rate": 1.7187334699582616e-06, "loss": 0.5459957122802734, "step": 2452 }, { "epoch": 0.5655331412103747, "grad_norm": 1.1247915420976404, "learning_rate": 1.718468396588281e-06, "loss": 0.492592453956604, "step": 2453 }, { "epoch": 0.565763688760807, "grad_norm": 1.2564768345784407, "learning_rate": 1.7182032188323161e-06, "loss": 0.5006682872772217, "step": 2454 }, { "epoch": 0.5659942363112392, "grad_norm": 1.2050444012646155, "learning_rate": 1.717937936728895e-06, "loss": 0.4829084277153015, "step": 2455 }, { "epoch": 0.5662247838616715, "grad_norm": 1.2953134882724824, "learning_rate": 1.7176725503165606e-06, "loss": 0.5307221412658691, "step": 2456 }, { "epoch": 0.5664553314121038, "grad_norm": 1.4233139606988499, "learning_rate": 1.7174070596338698e-06, "loss": 0.381227046251297, "step": 2457 }, { "epoch": 0.566685878962536, "grad_norm": 1.1760942170869182, "learning_rate": 1.7171414647193966e-06, "loss": 0.4984063506126404, "step": 2458 }, { "epoch": 0.5669164265129683, "grad_norm": 1.1776079567311266, "learning_rate": 1.716875765611729e-06, "loss": 0.4236326813697815, "step": 2459 }, { "epoch": 0.5671469740634005, "grad_norm": 1.1740922365288478, "learning_rate": 1.7166099623494698e-06, "loss": 0.5070061683654785, "step": 2460 }, { "epoch": 0.5673775216138328, "grad_norm": 1.5114956621718905, "learning_rate": 1.7163440549712382e-06, "loss": 0.5020880699157715, "step": 2461 }, { "epoch": 0.5676080691642651, "grad_norm": 1.1096400864282738, "learning_rate": 1.7160780435156674e-06, "loss": 0.5303773283958435, "step": 2462 }, { "epoch": 0.5678386167146974, "grad_norm": 1.494282249895922, "learning_rate": 1.7158119280214058e-06, "loss": 0.47430795431137085, "step": 2463 }, { "epoch": 0.5680691642651297, "grad_norm": 2.011684138207683, "learning_rate": 1.7155457085271174e-06, "loss": 0.49290892481803894, "step": 2464 }, { "epoch": 0.5682997118155619, "grad_norm": 1.185573295335424, "learning_rate": 1.7152793850714817e-06, "loss": 0.5034571290016174, "step": 2465 }, { "epoch": 0.5685302593659942, "grad_norm": 1.3815592270043073, "learning_rate": 1.715012957693192e-06, "loss": 0.4832080602645874, "step": 2466 }, { "epoch": 0.5687608069164265, "grad_norm": 1.2151204568168548, "learning_rate": 1.7147464264309576e-06, "loss": 0.5464382171630859, "step": 2467 }, { "epoch": 0.5689913544668588, "grad_norm": 1.1492964258847325, "learning_rate": 1.714479791323503e-06, "loss": 0.50137859582901, "step": 2468 }, { "epoch": 0.5692219020172911, "grad_norm": 1.157123770105093, "learning_rate": 1.714213052409567e-06, "loss": 0.4540822505950928, "step": 2469 }, { "epoch": 0.5694524495677233, "grad_norm": 1.2490601252140423, "learning_rate": 1.7139462097279046e-06, "loss": 0.4347212016582489, "step": 2470 }, { "epoch": 0.5696829971181556, "grad_norm": 1.3725663062255251, "learning_rate": 1.7136792633172848e-06, "loss": 0.5054244995117188, "step": 2471 }, { "epoch": 0.5699135446685879, "grad_norm": 1.3145868829879277, "learning_rate": 1.7134122132164922e-06, "loss": 0.570202112197876, "step": 2472 }, { "epoch": 0.5701440922190202, "grad_norm": 1.046325975638938, "learning_rate": 1.7131450594643266e-06, "loss": 0.39734238386154175, "step": 2473 }, { "epoch": 0.5703746397694525, "grad_norm": 1.3305714790552408, "learning_rate": 1.712877802099603e-06, "loss": 0.5042159557342529, "step": 2474 }, { "epoch": 0.5706051873198847, "grad_norm": 1.4450527868868759, "learning_rate": 1.71261044116115e-06, "loss": 0.5412349700927734, "step": 2475 }, { "epoch": 0.570835734870317, "grad_norm": 1.2189812093120667, "learning_rate": 1.7123429766878133e-06, "loss": 0.5226187705993652, "step": 2476 }, { "epoch": 0.5710662824207493, "grad_norm": 1.1260169170850247, "learning_rate": 1.7120754087184523e-06, "loss": 0.4852250814437866, "step": 2477 }, { "epoch": 0.5712968299711816, "grad_norm": 1.190113614657056, "learning_rate": 1.7118077372919425e-06, "loss": 0.5083650350570679, "step": 2478 }, { "epoch": 0.5715273775216139, "grad_norm": 1.3460854369133757, "learning_rate": 1.7115399624471728e-06, "loss": 0.5298900604248047, "step": 2479 }, { "epoch": 0.5717579250720461, "grad_norm": 1.359750962222541, "learning_rate": 1.7112720842230485e-06, "loss": 0.5619887113571167, "step": 2480 }, { "epoch": 0.5719884726224784, "grad_norm": 1.923177254565902, "learning_rate": 1.7110041026584898e-06, "loss": 0.5785295367240906, "step": 2481 }, { "epoch": 0.5722190201729107, "grad_norm": 1.1169896209151449, "learning_rate": 1.7107360177924312e-06, "loss": 0.4424097537994385, "step": 2482 }, { "epoch": 0.572449567723343, "grad_norm": 1.0927297402951583, "learning_rate": 1.7104678296638234e-06, "loss": 0.5585045218467712, "step": 2483 }, { "epoch": 0.5726801152737753, "grad_norm": 1.1082744154066473, "learning_rate": 1.7101995383116302e-06, "loss": 0.4811630845069885, "step": 2484 }, { "epoch": 0.5729106628242074, "grad_norm": 1.5640254807037144, "learning_rate": 1.7099311437748322e-06, "loss": 0.5382398366928101, "step": 2485 }, { "epoch": 0.5731412103746397, "grad_norm": 1.256587442517617, "learning_rate": 1.7096626460924246e-06, "loss": 0.5314421653747559, "step": 2486 }, { "epoch": 0.573371757925072, "grad_norm": 1.3291523627771589, "learning_rate": 1.7093940453034167e-06, "loss": 0.5254508256912231, "step": 2487 }, { "epoch": 0.5736023054755043, "grad_norm": 1.1798528892424343, "learning_rate": 1.7091253414468338e-06, "loss": 0.5426524877548218, "step": 2488 }, { "epoch": 0.5738328530259366, "grad_norm": 1.3436385596476037, "learning_rate": 1.7088565345617156e-06, "loss": 0.5764416456222534, "step": 2489 }, { "epoch": 0.5740634005763688, "grad_norm": 1.036753645582628, "learning_rate": 1.7085876246871172e-06, "loss": 0.43790721893310547, "step": 2490 }, { "epoch": 0.5742939481268011, "grad_norm": 1.0552814657492613, "learning_rate": 1.7083186118621083e-06, "loss": 0.5315482020378113, "step": 2491 }, { "epoch": 0.5745244956772334, "grad_norm": 1.5762916246577796, "learning_rate": 1.7080494961257731e-06, "loss": 0.5694386959075928, "step": 2492 }, { "epoch": 0.5747550432276657, "grad_norm": 1.2536007668458176, "learning_rate": 1.7077802775172124e-06, "loss": 0.5026420950889587, "step": 2493 }, { "epoch": 0.574985590778098, "grad_norm": 1.3171524239513317, "learning_rate": 1.70751095607554e-06, "loss": 0.472505658864975, "step": 2494 }, { "epoch": 0.5752161383285302, "grad_norm": 1.228300898794264, "learning_rate": 1.7072415318398856e-06, "loss": 0.4240390658378601, "step": 2495 }, { "epoch": 0.5754466858789625, "grad_norm": 1.2252372715220645, "learning_rate": 1.706972004849394e-06, "loss": 0.46073201298713684, "step": 2496 }, { "epoch": 0.5756772334293948, "grad_norm": 1.386636595307024, "learning_rate": 1.7067023751432247e-06, "loss": 0.5547488927841187, "step": 2497 }, { "epoch": 0.5759077809798271, "grad_norm": 1.2036393936459033, "learning_rate": 1.7064326427605523e-06, "loss": 0.4080501198768616, "step": 2498 }, { "epoch": 0.5761383285302594, "grad_norm": 1.288559046017793, "learning_rate": 1.7061628077405653e-06, "loss": 0.5036013126373291, "step": 2499 }, { "epoch": 0.5763688760806917, "grad_norm": 1.2217187839100583, "learning_rate": 1.7058928701224683e-06, "loss": 0.43331170082092285, "step": 2500 }, { "epoch": 0.5765994236311239, "grad_norm": 1.2639727491075898, "learning_rate": 1.7056228299454808e-06, "loss": 0.5010221004486084, "step": 2501 }, { "epoch": 0.5768299711815562, "grad_norm": 1.3767543189150935, "learning_rate": 1.7053526872488365e-06, "loss": 0.4393835663795471, "step": 2502 }, { "epoch": 0.5770605187319885, "grad_norm": 1.6106797071078789, "learning_rate": 1.7050824420717844e-06, "loss": 0.4924699068069458, "step": 2503 }, { "epoch": 0.5772910662824208, "grad_norm": 1.4001771231988265, "learning_rate": 1.7048120944535883e-06, "loss": 0.5389400720596313, "step": 2504 }, { "epoch": 0.577521613832853, "grad_norm": 0.9828526919896293, "learning_rate": 1.7045416444335267e-06, "loss": 0.4584382176399231, "step": 2505 }, { "epoch": 0.5777521613832853, "grad_norm": 1.2872729332849036, "learning_rate": 1.7042710920508936e-06, "loss": 0.5079721808433533, "step": 2506 }, { "epoch": 0.5779827089337176, "grad_norm": 1.0324776991211704, "learning_rate": 1.7040004373449973e-06, "loss": 0.4421960115432739, "step": 2507 }, { "epoch": 0.5782132564841499, "grad_norm": 1.2349316962791468, "learning_rate": 1.7037296803551607e-06, "loss": 0.4270066022872925, "step": 2508 }, { "epoch": 0.5784438040345822, "grad_norm": 1.1151044129570897, "learning_rate": 1.7034588211207224e-06, "loss": 0.49084147810935974, "step": 2509 }, { "epoch": 0.5786743515850145, "grad_norm": 1.021345503807098, "learning_rate": 1.7031878596810354e-06, "loss": 0.3782140612602234, "step": 2510 }, { "epoch": 0.5789048991354467, "grad_norm": 1.175168927743577, "learning_rate": 1.7029167960754676e-06, "loss": 0.5119669437408447, "step": 2511 }, { "epoch": 0.5791354466858789, "grad_norm": 1.3563408269400135, "learning_rate": 1.7026456303434013e-06, "loss": 0.5158041715621948, "step": 2512 }, { "epoch": 0.5793659942363112, "grad_norm": 1.16044866200114, "learning_rate": 1.7023743625242346e-06, "loss": 0.487191766500473, "step": 2513 }, { "epoch": 0.5795965417867435, "grad_norm": 1.1434101258085176, "learning_rate": 1.7021029926573798e-06, "loss": 0.4553701877593994, "step": 2514 }, { "epoch": 0.5798270893371757, "grad_norm": 1.4681723884009028, "learning_rate": 1.7018315207822639e-06, "loss": 0.6393533945083618, "step": 2515 }, { "epoch": 0.580057636887608, "grad_norm": 1.280445367600244, "learning_rate": 1.701559946938329e-06, "loss": 0.502502977848053, "step": 2516 }, { "epoch": 0.5802881844380403, "grad_norm": 1.2538970141245356, "learning_rate": 1.7012882711650321e-06, "loss": 0.44787830114364624, "step": 2517 }, { "epoch": 0.5805187319884726, "grad_norm": 1.1418537417579828, "learning_rate": 1.7010164935018445e-06, "loss": 0.541442334651947, "step": 2518 }, { "epoch": 0.5807492795389049, "grad_norm": 1.0922323335203685, "learning_rate": 1.7007446139882533e-06, "loss": 0.40795016288757324, "step": 2519 }, { "epoch": 0.5809798270893372, "grad_norm": 1.2342785454648022, "learning_rate": 1.700472632663759e-06, "loss": 0.5261722207069397, "step": 2520 }, { "epoch": 0.5812103746397694, "grad_norm": 1.219906251892028, "learning_rate": 1.7002005495678782e-06, "loss": 0.5007427930831909, "step": 2521 }, { "epoch": 0.5814409221902017, "grad_norm": 1.1929415488360804, "learning_rate": 1.6999283647401416e-06, "loss": 0.4413378834724426, "step": 2522 }, { "epoch": 0.581671469740634, "grad_norm": 1.1216903504380007, "learning_rate": 1.6996560782200949e-06, "loss": 0.545518159866333, "step": 2523 }, { "epoch": 0.5819020172910663, "grad_norm": 1.5308783706292048, "learning_rate": 1.6993836900472984e-06, "loss": 0.5393378734588623, "step": 2524 }, { "epoch": 0.5821325648414986, "grad_norm": 1.198481766250496, "learning_rate": 1.6991112002613272e-06, "loss": 0.5119227170944214, "step": 2525 }, { "epoch": 0.5823631123919308, "grad_norm": 1.6691866786968823, "learning_rate": 1.6988386089017714e-06, "loss": 0.5835440158843994, "step": 2526 }, { "epoch": 0.5825936599423631, "grad_norm": 1.2405582078403503, "learning_rate": 1.6985659160082354e-06, "loss": 0.46577557921409607, "step": 2527 }, { "epoch": 0.5828242074927954, "grad_norm": 1.1157785797661819, "learning_rate": 1.698293121620339e-06, "loss": 0.4569145441055298, "step": 2528 }, { "epoch": 0.5830547550432277, "grad_norm": 1.2049909838697974, "learning_rate": 1.698020225777716e-06, "loss": 0.5309783220291138, "step": 2529 }, { "epoch": 0.58328530259366, "grad_norm": 1.120647300804938, "learning_rate": 1.6977472285200158e-06, "loss": 0.48042014241218567, "step": 2530 }, { "epoch": 0.5835158501440922, "grad_norm": 1.1626136587389744, "learning_rate": 1.697474129886902e-06, "loss": 0.49191930890083313, "step": 2531 }, { "epoch": 0.5837463976945245, "grad_norm": 1.394105681476053, "learning_rate": 1.6972009299180528e-06, "loss": 0.512083113193512, "step": 2532 }, { "epoch": 0.5839769452449568, "grad_norm": 1.4049225353163461, "learning_rate": 1.696927628653161e-06, "loss": 0.3880317211151123, "step": 2533 }, { "epoch": 0.5842074927953891, "grad_norm": 1.6757831983931575, "learning_rate": 1.6966542261319345e-06, "loss": 0.45285481214523315, "step": 2534 }, { "epoch": 0.5844380403458214, "grad_norm": 1.0843645857738136, "learning_rate": 1.6963807223940966e-06, "loss": 0.515269935131073, "step": 2535 }, { "epoch": 0.5846685878962536, "grad_norm": 1.2972770484450729, "learning_rate": 1.696107117479384e-06, "loss": 0.5730916261672974, "step": 2536 }, { "epoch": 0.5848991354466859, "grad_norm": 1.1822238481147822, "learning_rate": 1.6958334114275482e-06, "loss": 0.4333222508430481, "step": 2537 }, { "epoch": 0.5851296829971182, "grad_norm": 1.0623302480533618, "learning_rate": 1.695559604278357e-06, "loss": 0.4325833022594452, "step": 2538 }, { "epoch": 0.5853602305475505, "grad_norm": 1.1199883883456008, "learning_rate": 1.6952856960715907e-06, "loss": 0.41645392775535583, "step": 2539 }, { "epoch": 0.5855907780979827, "grad_norm": 1.257944303936855, "learning_rate": 1.6950116868470458e-06, "loss": 0.5943924188613892, "step": 2540 }, { "epoch": 0.5858213256484149, "grad_norm": 1.4798490098031383, "learning_rate": 1.6947375766445328e-06, "loss": 0.5209153294563293, "step": 2541 }, { "epoch": 0.5860518731988472, "grad_norm": 1.248783629040668, "learning_rate": 1.694463365503877e-06, "loss": 0.5074030160903931, "step": 2542 }, { "epoch": 0.5862824207492795, "grad_norm": 1.5168507350170992, "learning_rate": 1.6941890534649188e-06, "loss": 0.5399416089057922, "step": 2543 }, { "epoch": 0.5865129682997118, "grad_norm": 1.3533022502962022, "learning_rate": 1.6939146405675127e-06, "loss": 0.4862229824066162, "step": 2544 }, { "epoch": 0.5867435158501441, "grad_norm": 1.4917228203054256, "learning_rate": 1.6936401268515278e-06, "loss": 0.5033354759216309, "step": 2545 }, { "epoch": 0.5869740634005763, "grad_norm": 1.2265536167956566, "learning_rate": 1.693365512356848e-06, "loss": 0.3859539031982422, "step": 2546 }, { "epoch": 0.5872046109510086, "grad_norm": 1.2430465412866725, "learning_rate": 1.6930907971233726e-06, "loss": 0.584037184715271, "step": 2547 }, { "epoch": 0.5874351585014409, "grad_norm": 1.1967021755581315, "learning_rate": 1.6928159811910144e-06, "loss": 0.5054824352264404, "step": 2548 }, { "epoch": 0.5876657060518732, "grad_norm": 1.2511899777779398, "learning_rate": 1.692541064599701e-06, "loss": 0.510034441947937, "step": 2549 }, { "epoch": 0.5878962536023055, "grad_norm": 1.4397980152012548, "learning_rate": 1.6922660473893756e-06, "loss": 0.45610690116882324, "step": 2550 }, { "epoch": 0.5881268011527377, "grad_norm": 1.4588684788429476, "learning_rate": 1.691990929599995e-06, "loss": 0.5473066568374634, "step": 2551 }, { "epoch": 0.58835734870317, "grad_norm": 1.5335219528163933, "learning_rate": 1.691715711271531e-06, "loss": 0.5674794316291809, "step": 2552 }, { "epoch": 0.5885878962536023, "grad_norm": 1.2774108469534404, "learning_rate": 1.6914403924439698e-06, "loss": 0.522304892539978, "step": 2553 }, { "epoch": 0.5888184438040346, "grad_norm": 1.2476953304994698, "learning_rate": 1.6911649731573125e-06, "loss": 0.45838260650634766, "step": 2554 }, { "epoch": 0.5890489913544669, "grad_norm": 1.3098546735155092, "learning_rate": 1.6908894534515748e-06, "loss": 0.5382635593414307, "step": 2555 }, { "epoch": 0.5892795389048991, "grad_norm": 1.357876961582571, "learning_rate": 1.6906138333667865e-06, "loss": 0.4829067289829254, "step": 2556 }, { "epoch": 0.5895100864553314, "grad_norm": 1.2356399026717673, "learning_rate": 1.6903381129429924e-06, "loss": 0.49646514654159546, "step": 2557 }, { "epoch": 0.5897406340057637, "grad_norm": 1.2637946757850296, "learning_rate": 1.6900622922202522e-06, "loss": 0.47126126289367676, "step": 2558 }, { "epoch": 0.589971181556196, "grad_norm": 1.7976328265249715, "learning_rate": 1.6897863712386396e-06, "loss": 0.6280478239059448, "step": 2559 }, { "epoch": 0.5902017291066283, "grad_norm": 1.2319066968459778, "learning_rate": 1.6895103500382428e-06, "loss": 0.5028468370437622, "step": 2560 }, { "epoch": 0.5904322766570606, "grad_norm": 1.3749190554066881, "learning_rate": 1.6892342286591648e-06, "loss": 0.49227872490882874, "step": 2561 }, { "epoch": 0.5906628242074928, "grad_norm": 1.2592035477612311, "learning_rate": 1.6889580071415236e-06, "loss": 0.5569860935211182, "step": 2562 }, { "epoch": 0.5908933717579251, "grad_norm": 1.2538699608771755, "learning_rate": 1.6886816855254511e-06, "loss": 0.5619305968284607, "step": 2563 }, { "epoch": 0.5911239193083574, "grad_norm": 1.3015347853061459, "learning_rate": 1.6884052638510938e-06, "loss": 0.5059368014335632, "step": 2564 }, { "epoch": 0.5913544668587897, "grad_norm": 1.5453199631735335, "learning_rate": 1.688128742158613e-06, "loss": 0.546272873878479, "step": 2565 }, { "epoch": 0.591585014409222, "grad_norm": 1.4159852524047818, "learning_rate": 1.6878521204881842e-06, "loss": 0.5926029682159424, "step": 2566 }, { "epoch": 0.5918155619596541, "grad_norm": 1.2170290620681634, "learning_rate": 1.687575398879998e-06, "loss": 0.481456458568573, "step": 2567 }, { "epoch": 0.5920461095100864, "grad_norm": 1.25729683235646, "learning_rate": 1.6872985773742591e-06, "loss": 0.5525637865066528, "step": 2568 }, { "epoch": 0.5922766570605187, "grad_norm": 1.3697297123320187, "learning_rate": 1.6870216560111869e-06, "loss": 0.4845820665359497, "step": 2569 }, { "epoch": 0.592507204610951, "grad_norm": 1.4648168720443564, "learning_rate": 1.6867446348310147e-06, "loss": 0.5175113677978516, "step": 2570 }, { "epoch": 0.5927377521613832, "grad_norm": 1.2238128546340372, "learning_rate": 1.6864675138739917e-06, "loss": 0.4506435990333557, "step": 2571 }, { "epoch": 0.5929682997118155, "grad_norm": 1.0467204345258045, "learning_rate": 1.6861902931803796e-06, "loss": 0.389871209859848, "step": 2572 }, { "epoch": 0.5931988472622478, "grad_norm": 1.4348334874532345, "learning_rate": 1.6859129727904565e-06, "loss": 0.5711140632629395, "step": 2573 }, { "epoch": 0.5934293948126801, "grad_norm": 1.2813025661978332, "learning_rate": 1.6856355527445134e-06, "loss": 0.5305861234664917, "step": 2574 }, { "epoch": 0.5936599423631124, "grad_norm": 1.2339903535392835, "learning_rate": 1.685358033082857e-06, "loss": 0.5364730358123779, "step": 2575 }, { "epoch": 0.5938904899135447, "grad_norm": 1.2845034573576757, "learning_rate": 1.6850804138458087e-06, "loss": 0.5428116917610168, "step": 2576 }, { "epoch": 0.5941210374639769, "grad_norm": 1.323193557767603, "learning_rate": 1.6848026950737028e-06, "loss": 0.4862017035484314, "step": 2577 }, { "epoch": 0.5943515850144092, "grad_norm": 1.5929939838626597, "learning_rate": 1.6845248768068888e-06, "loss": 0.4363023638725281, "step": 2578 }, { "epoch": 0.5945821325648415, "grad_norm": 1.3848419713925337, "learning_rate": 1.6842469590857315e-06, "loss": 0.5355821847915649, "step": 2579 }, { "epoch": 0.5948126801152738, "grad_norm": 1.288102716693538, "learning_rate": 1.683968941950609e-06, "loss": 0.5334150791168213, "step": 2580 }, { "epoch": 0.595043227665706, "grad_norm": 1.4843631469204195, "learning_rate": 1.6836908254419144e-06, "loss": 0.5291295647621155, "step": 2581 }, { "epoch": 0.5952737752161383, "grad_norm": 1.2881595327169846, "learning_rate": 1.6834126096000552e-06, "loss": 0.5341989994049072, "step": 2582 }, { "epoch": 0.5955043227665706, "grad_norm": 1.477342922270466, "learning_rate": 1.6831342944654532e-06, "loss": 0.5400925874710083, "step": 2583 }, { "epoch": 0.5957348703170029, "grad_norm": 1.122996582802429, "learning_rate": 1.6828558800785446e-06, "loss": 0.517853856086731, "step": 2584 }, { "epoch": 0.5959654178674352, "grad_norm": 1.069073216139065, "learning_rate": 1.6825773664797805e-06, "loss": 0.501392662525177, "step": 2585 }, { "epoch": 0.5961959654178675, "grad_norm": 1.1419308725251818, "learning_rate": 1.6822987537096256e-06, "loss": 0.41234803199768066, "step": 2586 }, { "epoch": 0.5964265129682997, "grad_norm": 1.4363918485606544, "learning_rate": 1.6820200418085598e-06, "loss": 0.5413755178451538, "step": 2587 }, { "epoch": 0.596657060518732, "grad_norm": 1.3047825946648908, "learning_rate": 1.6817412308170763e-06, "loss": 0.546847939491272, "step": 2588 }, { "epoch": 0.5968876080691643, "grad_norm": 1.2129787332488573, "learning_rate": 1.6814623207756844e-06, "loss": 0.5571908950805664, "step": 2589 }, { "epoch": 0.5971181556195966, "grad_norm": 1.3660847669739697, "learning_rate": 1.6811833117249063e-06, "loss": 0.6310220956802368, "step": 2590 }, { "epoch": 0.5973487031700289, "grad_norm": 1.232002982807809, "learning_rate": 1.6809042037052792e-06, "loss": 0.5155299305915833, "step": 2591 }, { "epoch": 0.5975792507204611, "grad_norm": 1.3004013438441295, "learning_rate": 1.6806249967573547e-06, "loss": 0.44853711128234863, "step": 2592 }, { "epoch": 0.5978097982708934, "grad_norm": 1.0345081557729547, "learning_rate": 1.6803456909216987e-06, "loss": 0.4670305550098419, "step": 2593 }, { "epoch": 0.5980403458213257, "grad_norm": 1.2039512585696586, "learning_rate": 1.680066286238891e-06, "loss": 0.5101944208145142, "step": 2594 }, { "epoch": 0.5982708933717579, "grad_norm": 1.3955661622281244, "learning_rate": 1.6797867827495267e-06, "loss": 0.4965336322784424, "step": 2595 }, { "epoch": 0.5985014409221902, "grad_norm": 1.2697803535398042, "learning_rate": 1.6795071804942145e-06, "loss": 0.45601886510849, "step": 2596 }, { "epoch": 0.5987319884726224, "grad_norm": 1.1608194807011891, "learning_rate": 1.6792274795135777e-06, "loss": 0.5248251557350159, "step": 2597 }, { "epoch": 0.5989625360230547, "grad_norm": 1.27827520401908, "learning_rate": 1.678947679848254e-06, "loss": 0.4440120458602905, "step": 2598 }, { "epoch": 0.599193083573487, "grad_norm": 1.158625988953876, "learning_rate": 1.6786677815388955e-06, "loss": 0.40963852405548096, "step": 2599 }, { "epoch": 0.5994236311239193, "grad_norm": 1.4419097975345436, "learning_rate": 1.6783877846261683e-06, "loss": 0.4669606685638428, "step": 2600 }, { "epoch": 0.5996541786743516, "grad_norm": 1.242787814192596, "learning_rate": 1.6781076891507531e-06, "loss": 0.5647035837173462, "step": 2601 }, { "epoch": 0.5998847262247838, "grad_norm": 1.3438978653222697, "learning_rate": 1.6778274951533447e-06, "loss": 0.41351717710494995, "step": 2602 }, { "epoch": 0.6001152737752161, "grad_norm": 1.503998962721002, "learning_rate": 1.6775472026746526e-06, "loss": 0.41477349400520325, "step": 2603 }, { "epoch": 0.6003458213256484, "grad_norm": 1.1280590999863394, "learning_rate": 1.6772668117554005e-06, "loss": 0.5451614260673523, "step": 2604 }, { "epoch": 0.6005763688760807, "grad_norm": 1.0711470724108474, "learning_rate": 1.6769863224363263e-06, "loss": 0.4912793040275574, "step": 2605 }, { "epoch": 0.600806916426513, "grad_norm": 1.7686256949774513, "learning_rate": 1.6767057347581818e-06, "loss": 0.544170618057251, "step": 2606 }, { "epoch": 0.6010374639769452, "grad_norm": 1.7609398614368525, "learning_rate": 1.6764250487617335e-06, "loss": 0.5384647846221924, "step": 2607 }, { "epoch": 0.6012680115273775, "grad_norm": 1.2039688741569976, "learning_rate": 1.6761442644877626e-06, "loss": 0.5443817973136902, "step": 2608 }, { "epoch": 0.6014985590778098, "grad_norm": 1.368759520926053, "learning_rate": 1.6758633819770637e-06, "loss": 0.5432279706001282, "step": 2609 }, { "epoch": 0.6017291066282421, "grad_norm": 1.6108602714580338, "learning_rate": 1.6755824012704465e-06, "loss": 0.4550110697746277, "step": 2610 }, { "epoch": 0.6019596541786744, "grad_norm": 1.4123061030133546, "learning_rate": 1.675301322408734e-06, "loss": 0.5927733778953552, "step": 2611 }, { "epoch": 0.6021902017291066, "grad_norm": 1.331775123117798, "learning_rate": 1.6750201454327643e-06, "loss": 0.5468032360076904, "step": 2612 }, { "epoch": 0.6024207492795389, "grad_norm": 1.2756219325658316, "learning_rate": 1.67473887038339e-06, "loss": 0.5089372992515564, "step": 2613 }, { "epoch": 0.6026512968299712, "grad_norm": 1.095233293104996, "learning_rate": 1.6744574973014767e-06, "loss": 0.5835996866226196, "step": 2614 }, { "epoch": 0.6028818443804035, "grad_norm": 1.195604010821045, "learning_rate": 1.6741760262279055e-06, "loss": 0.559473991394043, "step": 2615 }, { "epoch": 0.6031123919308358, "grad_norm": 1.0725638778600672, "learning_rate": 1.6738944572035707e-06, "loss": 0.4891049563884735, "step": 2616 }, { "epoch": 0.603342939481268, "grad_norm": 1.6099327952013749, "learning_rate": 1.6736127902693819e-06, "loss": 0.560591459274292, "step": 2617 }, { "epoch": 0.6035734870317003, "grad_norm": 1.3275205349545465, "learning_rate": 1.6733310254662621e-06, "loss": 0.5701932907104492, "step": 2618 }, { "epoch": 0.6038040345821326, "grad_norm": 1.1973967263171255, "learning_rate": 1.6730491628351486e-06, "loss": 0.4753883481025696, "step": 2619 }, { "epoch": 0.6040345821325649, "grad_norm": 1.347075643962319, "learning_rate": 1.6727672024169936e-06, "loss": 0.47594785690307617, "step": 2620 }, { "epoch": 0.6042651296829972, "grad_norm": 1.6816380957027248, "learning_rate": 1.6724851442527624e-06, "loss": 0.45491674542427063, "step": 2621 }, { "epoch": 0.6044956772334293, "grad_norm": 1.4513506801945901, "learning_rate": 1.6722029883834358e-06, "loss": 0.581158459186554, "step": 2622 }, { "epoch": 0.6047262247838616, "grad_norm": 1.159366601543868, "learning_rate": 1.671920734850008e-06, "loss": 0.41334211826324463, "step": 2623 }, { "epoch": 0.6049567723342939, "grad_norm": 1.5139603662934273, "learning_rate": 1.6716383836934869e-06, "loss": 0.47984325885772705, "step": 2624 }, { "epoch": 0.6051873198847262, "grad_norm": 1.456724922494102, "learning_rate": 1.6713559349548956e-06, "loss": 0.4930099844932556, "step": 2625 }, { "epoch": 0.6054178674351585, "grad_norm": 1.2383087695672668, "learning_rate": 1.6710733886752708e-06, "loss": 0.4817400872707367, "step": 2626 }, { "epoch": 0.6056484149855907, "grad_norm": 1.1506762604804934, "learning_rate": 1.670790744895664e-06, "loss": 0.43708014488220215, "step": 2627 }, { "epoch": 0.605878962536023, "grad_norm": 1.312898538931884, "learning_rate": 1.6705080036571397e-06, "loss": 0.5880838632583618, "step": 2628 }, { "epoch": 0.6061095100864553, "grad_norm": 1.382585469079517, "learning_rate": 1.6702251650007778e-06, "loss": 0.5228145718574524, "step": 2629 }, { "epoch": 0.6063400576368876, "grad_norm": 1.3711772829373339, "learning_rate": 1.6699422289676718e-06, "loss": 0.5998802185058594, "step": 2630 }, { "epoch": 0.6065706051873199, "grad_norm": 1.1506769792285998, "learning_rate": 1.669659195598929e-06, "loss": 0.4910133481025696, "step": 2631 }, { "epoch": 0.6068011527377521, "grad_norm": 1.1374580098234712, "learning_rate": 1.6693760649356714e-06, "loss": 0.5143430233001709, "step": 2632 }, { "epoch": 0.6070317002881844, "grad_norm": 1.3291493600304582, "learning_rate": 1.6690928370190352e-06, "loss": 0.5022460222244263, "step": 2633 }, { "epoch": 0.6072622478386167, "grad_norm": 1.2010474194378777, "learning_rate": 1.66880951189017e-06, "loss": 0.5120739936828613, "step": 2634 }, { "epoch": 0.607492795389049, "grad_norm": 1.1655918439037996, "learning_rate": 1.66852608959024e-06, "loss": 0.5094325542449951, "step": 2635 }, { "epoch": 0.6077233429394813, "grad_norm": 1.6545296058614192, "learning_rate": 1.668242570160424e-06, "loss": 0.5701217651367188, "step": 2636 }, { "epoch": 0.6079538904899136, "grad_norm": 1.6680631360673492, "learning_rate": 1.6679589536419142e-06, "loss": 0.43836015462875366, "step": 2637 }, { "epoch": 0.6081844380403458, "grad_norm": 1.1122806906078924, "learning_rate": 1.667675240075917e-06, "loss": 0.46791714429855347, "step": 2638 }, { "epoch": 0.6084149855907781, "grad_norm": 1.2841557472628726, "learning_rate": 1.6673914295036528e-06, "loss": 0.5161240696907043, "step": 2639 }, { "epoch": 0.6086455331412104, "grad_norm": 1.306348640330571, "learning_rate": 1.667107521966357e-06, "loss": 0.5272632837295532, "step": 2640 }, { "epoch": 0.6088760806916427, "grad_norm": 1.2415691557258128, "learning_rate": 1.666823517505278e-06, "loss": 0.5190865993499756, "step": 2641 }, { "epoch": 0.609106628242075, "grad_norm": 1.3383874728602996, "learning_rate": 1.6665394161616788e-06, "loss": 0.5450509190559387, "step": 2642 }, { "epoch": 0.6093371757925072, "grad_norm": 1.1224851387365165, "learning_rate": 1.6662552179768362e-06, "loss": 0.4620264768600464, "step": 2643 }, { "epoch": 0.6095677233429395, "grad_norm": 1.4790968500843589, "learning_rate": 1.6659709229920412e-06, "loss": 0.5052369236946106, "step": 2644 }, { "epoch": 0.6097982708933718, "grad_norm": 1.3024913053280143, "learning_rate": 1.6656865312485992e-06, "loss": 0.5384722352027893, "step": 2645 }, { "epoch": 0.6100288184438041, "grad_norm": 1.206674709939484, "learning_rate": 1.6654020427878293e-06, "loss": 0.4649192690849304, "step": 2646 }, { "epoch": 0.6102593659942364, "grad_norm": 1.227154571916659, "learning_rate": 1.6651174576510645e-06, "loss": 0.5306943655014038, "step": 2647 }, { "epoch": 0.6104899135446686, "grad_norm": 1.0456204729401848, "learning_rate": 1.664832775879652e-06, "loss": 0.5300636887550354, "step": 2648 }, { "epoch": 0.6107204610951009, "grad_norm": 1.3444050876781224, "learning_rate": 1.6645479975149535e-06, "loss": 0.5449787378311157, "step": 2649 }, { "epoch": 0.6109510086455331, "grad_norm": 1.313946366107906, "learning_rate": 1.664263122598344e-06, "loss": 0.5364447832107544, "step": 2650 }, { "epoch": 0.6111815561959654, "grad_norm": 1.297750723710745, "learning_rate": 1.6639781511712132e-06, "loss": 0.4894382357597351, "step": 2651 }, { "epoch": 0.6114121037463977, "grad_norm": 1.247105878162738, "learning_rate": 1.663693083274964e-06, "loss": 0.6076130867004395, "step": 2652 }, { "epoch": 0.6116426512968299, "grad_norm": 1.2142432606071334, "learning_rate": 1.6634079189510142e-06, "loss": 0.5427982807159424, "step": 2653 }, { "epoch": 0.6118731988472622, "grad_norm": 1.2083266453516546, "learning_rate": 1.6631226582407952e-06, "loss": 0.48615583777427673, "step": 2654 }, { "epoch": 0.6121037463976945, "grad_norm": 1.269675522188132, "learning_rate": 1.662837301185752e-06, "loss": 0.5485595464706421, "step": 2655 }, { "epoch": 0.6123342939481268, "grad_norm": 1.1870072528862068, "learning_rate": 1.6625518478273444e-06, "loss": 0.479083776473999, "step": 2656 }, { "epoch": 0.612564841498559, "grad_norm": 1.1736970458680376, "learning_rate": 1.6622662982070459e-06, "loss": 0.4724195599555969, "step": 2657 }, { "epoch": 0.6127953890489913, "grad_norm": 1.15099040352794, "learning_rate": 1.6619806523663433e-06, "loss": 0.5106989741325378, "step": 2658 }, { "epoch": 0.6130259365994236, "grad_norm": 1.3245439515084314, "learning_rate": 1.6616949103467387e-06, "loss": 0.538973867893219, "step": 2659 }, { "epoch": 0.6132564841498559, "grad_norm": 1.213303693990195, "learning_rate": 1.661409072189747e-06, "loss": 0.49926918745040894, "step": 2660 }, { "epoch": 0.6134870317002882, "grad_norm": 1.0216209944980308, "learning_rate": 1.6611231379368977e-06, "loss": 0.4001106023788452, "step": 2661 }, { "epoch": 0.6137175792507205, "grad_norm": 1.1517339301323273, "learning_rate": 1.660837107629734e-06, "loss": 0.44094690680503845, "step": 2662 }, { "epoch": 0.6139481268011527, "grad_norm": 1.3250501139748343, "learning_rate": 1.6605509813098129e-06, "loss": 0.5321308374404907, "step": 2663 }, { "epoch": 0.614178674351585, "grad_norm": 1.2378894255576012, "learning_rate": 1.6602647590187058e-06, "loss": 0.4906134009361267, "step": 2664 }, { "epoch": 0.6144092219020173, "grad_norm": 1.3263504160433166, "learning_rate": 1.659978440797998e-06, "loss": 0.45977315306663513, "step": 2665 }, { "epoch": 0.6146397694524496, "grad_norm": 1.3165371046851393, "learning_rate": 1.6596920266892881e-06, "loss": 0.5100743770599365, "step": 2666 }, { "epoch": 0.6148703170028819, "grad_norm": 1.2693396142216467, "learning_rate": 1.6594055167341896e-06, "loss": 0.4486650228500366, "step": 2667 }, { "epoch": 0.6151008645533141, "grad_norm": 1.433519074909581, "learning_rate": 1.6591189109743292e-06, "loss": 0.5782293081283569, "step": 2668 }, { "epoch": 0.6153314121037464, "grad_norm": 1.3772589499745378, "learning_rate": 1.6588322094513476e-06, "loss": 0.5620462894439697, "step": 2669 }, { "epoch": 0.6155619596541787, "grad_norm": 1.1994240432869185, "learning_rate": 1.6585454122068997e-06, "loss": 0.5833989381790161, "step": 2670 }, { "epoch": 0.615792507204611, "grad_norm": 1.1206341611976025, "learning_rate": 1.658258519282654e-06, "loss": 0.4710484743118286, "step": 2671 }, { "epoch": 0.6160230547550433, "grad_norm": 1.1981953327193071, "learning_rate": 1.6579715307202932e-06, "loss": 0.492951899766922, "step": 2672 }, { "epoch": 0.6162536023054755, "grad_norm": 1.379813532435923, "learning_rate": 1.6576844465615142e-06, "loss": 0.537446141242981, "step": 2673 }, { "epoch": 0.6164841498559078, "grad_norm": 1.271596422526649, "learning_rate": 1.6573972668480263e-06, "loss": 0.45248350501060486, "step": 2674 }, { "epoch": 0.6167146974063401, "grad_norm": 1.0491664954993434, "learning_rate": 1.6571099916215546e-06, "loss": 0.48271554708480835, "step": 2675 }, { "epoch": 0.6169452449567724, "grad_norm": 1.2893317600930594, "learning_rate": 1.6568226209238367e-06, "loss": 0.5537866353988647, "step": 2676 }, { "epoch": 0.6171757925072046, "grad_norm": 1.2315065049281728, "learning_rate": 1.6565351547966247e-06, "loss": 0.44220322370529175, "step": 2677 }, { "epoch": 0.6174063400576368, "grad_norm": 1.6057841417293577, "learning_rate": 1.6562475932816847e-06, "loss": 0.545140266418457, "step": 2678 }, { "epoch": 0.6176368876080691, "grad_norm": 1.298272810341285, "learning_rate": 1.655959936420796e-06, "loss": 0.499568372964859, "step": 2679 }, { "epoch": 0.6178674351585014, "grad_norm": 1.2211993021087002, "learning_rate": 1.655672184255753e-06, "loss": 0.5183538198471069, "step": 2680 }, { "epoch": 0.6180979827089337, "grad_norm": 1.4169596526310997, "learning_rate": 1.6553843368283618e-06, "loss": 0.6033202409744263, "step": 2681 }, { "epoch": 0.618328530259366, "grad_norm": 1.1460338801726684, "learning_rate": 1.6550963941804444e-06, "loss": 0.45113763213157654, "step": 2682 }, { "epoch": 0.6185590778097982, "grad_norm": 1.1306727750017995, "learning_rate": 1.6548083563538358e-06, "loss": 0.5167637467384338, "step": 2683 }, { "epoch": 0.6187896253602305, "grad_norm": 1.301069241875041, "learning_rate": 1.6545202233903846e-06, "loss": 0.5253550410270691, "step": 2684 }, { "epoch": 0.6190201729106628, "grad_norm": 1.5130397732960972, "learning_rate": 1.6542319953319544e-06, "loss": 0.5394268035888672, "step": 2685 }, { "epoch": 0.6192507204610951, "grad_norm": 1.2309300315341822, "learning_rate": 1.6539436722204206e-06, "loss": 0.4581655263900757, "step": 2686 }, { "epoch": 0.6194812680115274, "grad_norm": 1.3473232843540013, "learning_rate": 1.6536552540976742e-06, "loss": 0.4924882650375366, "step": 2687 }, { "epoch": 0.6197118155619596, "grad_norm": 1.3212128637586775, "learning_rate": 1.653366741005619e-06, "loss": 0.40816426277160645, "step": 2688 }, { "epoch": 0.6199423631123919, "grad_norm": 1.487996666110314, "learning_rate": 1.6530781329861735e-06, "loss": 0.386010080575943, "step": 2689 }, { "epoch": 0.6201729106628242, "grad_norm": 1.2404836484730133, "learning_rate": 1.6527894300812693e-06, "loss": 0.4776495099067688, "step": 2690 }, { "epoch": 0.6204034582132565, "grad_norm": 1.268980085495045, "learning_rate": 1.6525006323328514e-06, "loss": 0.5063761472702026, "step": 2691 }, { "epoch": 0.6206340057636888, "grad_norm": 1.327914489138475, "learning_rate": 1.6522117397828795e-06, "loss": 0.5012977123260498, "step": 2692 }, { "epoch": 0.620864553314121, "grad_norm": 1.5586706262282004, "learning_rate": 1.6519227524733266e-06, "loss": 0.567261815071106, "step": 2693 }, { "epoch": 0.6210951008645533, "grad_norm": 1.080122828705907, "learning_rate": 1.6516336704461796e-06, "loss": 0.4405871629714966, "step": 2694 }, { "epoch": 0.6213256484149856, "grad_norm": 1.2897294207591083, "learning_rate": 1.6513444937434392e-06, "loss": 0.5243908166885376, "step": 2695 }, { "epoch": 0.6215561959654179, "grad_norm": 1.230064101444913, "learning_rate": 1.6510552224071198e-06, "loss": 0.4709666073322296, "step": 2696 }, { "epoch": 0.6217867435158502, "grad_norm": 1.222110695276511, "learning_rate": 1.650765856479249e-06, "loss": 0.48736077547073364, "step": 2697 }, { "epoch": 0.6220172910662825, "grad_norm": 1.3590016738405688, "learning_rate": 1.6504763960018692e-06, "loss": 0.46533918380737305, "step": 2698 }, { "epoch": 0.6222478386167147, "grad_norm": 1.35575800489149, "learning_rate": 1.6501868410170359e-06, "loss": 0.4393872916698456, "step": 2699 }, { "epoch": 0.622478386167147, "grad_norm": 1.3208545237029374, "learning_rate": 1.6498971915668183e-06, "loss": 0.49825766682624817, "step": 2700 }, { "epoch": 0.6227089337175793, "grad_norm": 1.2486030770302066, "learning_rate": 1.6496074476932993e-06, "loss": 0.47903305292129517, "step": 2701 }, { "epoch": 0.6229394812680116, "grad_norm": 1.4622129412897775, "learning_rate": 1.6493176094385764e-06, "loss": 0.6245414018630981, "step": 2702 }, { "epoch": 0.6231700288184439, "grad_norm": 1.1866684251840278, "learning_rate": 1.6490276768447591e-06, "loss": 0.5198970437049866, "step": 2703 }, { "epoch": 0.6234005763688761, "grad_norm": 1.6440881721015699, "learning_rate": 1.6487376499539722e-06, "loss": 0.5924375057220459, "step": 2704 }, { "epoch": 0.6236311239193083, "grad_norm": 1.2404517941438844, "learning_rate": 1.6484475288083534e-06, "loss": 0.4759942889213562, "step": 2705 }, { "epoch": 0.6238616714697406, "grad_norm": 1.2374460666011324, "learning_rate": 1.6481573134500547e-06, "loss": 0.4802717864513397, "step": 2706 }, { "epoch": 0.6240922190201729, "grad_norm": 1.2053044068656478, "learning_rate": 1.6478670039212404e-06, "loss": 0.4586595892906189, "step": 2707 }, { "epoch": 0.6243227665706051, "grad_norm": 1.2429891166800078, "learning_rate": 1.6475766002640904e-06, "loss": 0.5571914911270142, "step": 2708 }, { "epoch": 0.6245533141210374, "grad_norm": 1.4255139010134108, "learning_rate": 1.647286102520797e-06, "loss": 0.5134386420249939, "step": 2709 }, { "epoch": 0.6247838616714697, "grad_norm": 1.2269438614282533, "learning_rate": 1.6469955107335664e-06, "loss": 0.5108852386474609, "step": 2710 }, { "epoch": 0.625014409221902, "grad_norm": 1.272719981801389, "learning_rate": 1.6467048249446187e-06, "loss": 0.4626818895339966, "step": 2711 }, { "epoch": 0.6252449567723343, "grad_norm": 1.2442767706548579, "learning_rate": 1.6464140451961875e-06, "loss": 0.5615794062614441, "step": 2712 }, { "epoch": 0.6254755043227666, "grad_norm": 1.4865776451893986, "learning_rate": 1.6461231715305197e-06, "loss": 0.5956846475601196, "step": 2713 }, { "epoch": 0.6257060518731988, "grad_norm": 1.360476557499295, "learning_rate": 1.6458322039898768e-06, "loss": 0.5330410003662109, "step": 2714 }, { "epoch": 0.6259365994236311, "grad_norm": 1.2714727610034695, "learning_rate": 1.6455411426165334e-06, "loss": 0.4754364490509033, "step": 2715 }, { "epoch": 0.6261671469740634, "grad_norm": 1.194353411400933, "learning_rate": 1.6452499874527771e-06, "loss": 0.4814460277557373, "step": 2716 }, { "epoch": 0.6263976945244957, "grad_norm": 1.1993429832567999, "learning_rate": 1.6449587385409101e-06, "loss": 0.5211490392684937, "step": 2717 }, { "epoch": 0.626628242074928, "grad_norm": 1.685645733076896, "learning_rate": 1.6446673959232478e-06, "loss": 0.43656522035598755, "step": 2718 }, { "epoch": 0.6268587896253602, "grad_norm": 1.3017375720981144, "learning_rate": 1.6443759596421192e-06, "loss": 0.5601837635040283, "step": 2719 }, { "epoch": 0.6270893371757925, "grad_norm": 1.2022937294499874, "learning_rate": 1.644084429739867e-06, "loss": 0.5415230989456177, "step": 2720 }, { "epoch": 0.6273198847262248, "grad_norm": 1.2148604998226076, "learning_rate": 1.6437928062588473e-06, "loss": 0.5256547927856445, "step": 2721 }, { "epoch": 0.6275504322766571, "grad_norm": 1.5289209117578932, "learning_rate": 1.6435010892414303e-06, "loss": 0.4892258644104004, "step": 2722 }, { "epoch": 0.6277809798270894, "grad_norm": 1.4507176159512447, "learning_rate": 1.6432092787299992e-06, "loss": 0.6185523271560669, "step": 2723 }, { "epoch": 0.6280115273775216, "grad_norm": 1.230316705129502, "learning_rate": 1.642917374766951e-06, "loss": 0.4964678883552551, "step": 2724 }, { "epoch": 0.6282420749279539, "grad_norm": 1.6264616977516906, "learning_rate": 1.6426253773946962e-06, "loss": 0.5079313516616821, "step": 2725 }, { "epoch": 0.6284726224783862, "grad_norm": 1.1997980442079967, "learning_rate": 1.6423332866556594e-06, "loss": 0.5262078046798706, "step": 2726 }, { "epoch": 0.6287031700288185, "grad_norm": 1.1471642727061313, "learning_rate": 1.642041102592278e-06, "loss": 0.5222228765487671, "step": 2727 }, { "epoch": 0.6289337175792508, "grad_norm": 1.1511187711328779, "learning_rate": 1.6417488252470038e-06, "loss": 0.4470428228378296, "step": 2728 }, { "epoch": 0.629164265129683, "grad_norm": 1.5218800246743136, "learning_rate": 1.6414564546623007e-06, "loss": 0.5850222110748291, "step": 2729 }, { "epoch": 0.6293948126801153, "grad_norm": 1.3890766989379473, "learning_rate": 1.6411639908806477e-06, "loss": 0.5844837427139282, "step": 2730 }, { "epoch": 0.6296253602305476, "grad_norm": 1.390267605631488, "learning_rate": 1.6408714339445373e-06, "loss": 0.5246438384056091, "step": 2731 }, { "epoch": 0.6298559077809798, "grad_norm": 1.3255906196936882, "learning_rate": 1.640578783896474e-06, "loss": 0.47431260347366333, "step": 2732 }, { "epoch": 0.630086455331412, "grad_norm": 1.1973273797462285, "learning_rate": 1.6402860407789772e-06, "loss": 0.5054109692573547, "step": 2733 }, { "epoch": 0.6303170028818443, "grad_norm": 1.2276344696691943, "learning_rate": 1.6399932046345794e-06, "loss": 0.47106099128723145, "step": 2734 }, { "epoch": 0.6305475504322766, "grad_norm": 1.209538624936261, "learning_rate": 1.6397002755058269e-06, "loss": 0.4557371139526367, "step": 2735 }, { "epoch": 0.6307780979827089, "grad_norm": 1.458954969848585, "learning_rate": 1.6394072534352787e-06, "loss": 0.6111027002334595, "step": 2736 }, { "epoch": 0.6310086455331412, "grad_norm": 1.5969397340893883, "learning_rate": 1.6391141384655085e-06, "loss": 0.5637114644050598, "step": 2737 }, { "epoch": 0.6312391930835735, "grad_norm": 1.087830166618947, "learning_rate": 1.6388209306391024e-06, "loss": 0.43901634216308594, "step": 2738 }, { "epoch": 0.6314697406340057, "grad_norm": 1.2084082927136561, "learning_rate": 1.6385276299986608e-06, "loss": 0.5315161347389221, "step": 2739 }, { "epoch": 0.631700288184438, "grad_norm": 1.0744838804086243, "learning_rate": 1.6382342365867968e-06, "loss": 0.396445095539093, "step": 2740 }, { "epoch": 0.6319308357348703, "grad_norm": 1.253067633447061, "learning_rate": 1.637940750446138e-06, "loss": 0.5279150605201721, "step": 2741 }, { "epoch": 0.6321613832853026, "grad_norm": 1.3477604514012602, "learning_rate": 1.6376471716193241e-06, "loss": 0.5353842377662659, "step": 2742 }, { "epoch": 0.6323919308357349, "grad_norm": 1.39680851665267, "learning_rate": 1.6373535001490095e-06, "loss": 0.5103511214256287, "step": 2743 }, { "epoch": 0.6326224783861671, "grad_norm": 1.4406962358010578, "learning_rate": 1.637059736077862e-06, "loss": 0.5704224109649658, "step": 2744 }, { "epoch": 0.6328530259365994, "grad_norm": 1.2410789821967132, "learning_rate": 1.6367658794485615e-06, "loss": 0.6006341576576233, "step": 2745 }, { "epoch": 0.6330835734870317, "grad_norm": 1.1849794356119856, "learning_rate": 1.6364719303038031e-06, "loss": 0.5593788027763367, "step": 2746 }, { "epoch": 0.633314121037464, "grad_norm": 1.2983683254340128, "learning_rate": 1.6361778886862944e-06, "loss": 0.5924923419952393, "step": 2747 }, { "epoch": 0.6335446685878963, "grad_norm": 1.4250134538892931, "learning_rate": 1.6358837546387565e-06, "loss": 0.4591634273529053, "step": 2748 }, { "epoch": 0.6337752161383285, "grad_norm": 1.4268958179759377, "learning_rate": 1.635589528203924e-06, "loss": 0.5870101451873779, "step": 2749 }, { "epoch": 0.6340057636887608, "grad_norm": 1.2390542829845288, "learning_rate": 1.635295209424545e-06, "loss": 0.5609645843505859, "step": 2750 }, { "epoch": 0.6342363112391931, "grad_norm": 1.2219097215373855, "learning_rate": 1.6350007983433808e-06, "loss": 0.49148842692375183, "step": 2751 }, { "epoch": 0.6344668587896254, "grad_norm": 1.2092576484571942, "learning_rate": 1.6347062950032063e-06, "loss": 0.4845973253250122, "step": 2752 }, { "epoch": 0.6346974063400577, "grad_norm": 1.633007866264204, "learning_rate": 1.63441169944681e-06, "loss": 0.5573195815086365, "step": 2753 }, { "epoch": 0.63492795389049, "grad_norm": 1.1092730661182224, "learning_rate": 1.6341170117169934e-06, "loss": 0.473361998796463, "step": 2754 }, { "epoch": 0.6351585014409222, "grad_norm": 1.3324554132101556, "learning_rate": 1.6338222318565716e-06, "loss": 0.47250160574913025, "step": 2755 }, { "epoch": 0.6353890489913545, "grad_norm": 1.088490649248913, "learning_rate": 1.633527359908373e-06, "loss": 0.5084018707275391, "step": 2756 }, { "epoch": 0.6356195965417868, "grad_norm": 1.5890648743750697, "learning_rate": 1.6332323959152396e-06, "loss": 0.5426309108734131, "step": 2757 }, { "epoch": 0.6358501440922191, "grad_norm": 1.454023530885245, "learning_rate": 1.6329373399200261e-06, "loss": 0.3922150135040283, "step": 2758 }, { "epoch": 0.6360806916426512, "grad_norm": 1.5606393342793812, "learning_rate": 1.6326421919656018e-06, "loss": 0.4642726182937622, "step": 2759 }, { "epoch": 0.6363112391930835, "grad_norm": 1.3512521798508315, "learning_rate": 1.632346952094848e-06, "loss": 0.5146275758743286, "step": 2760 }, { "epoch": 0.6365417867435158, "grad_norm": 1.161835041029154, "learning_rate": 1.6320516203506605e-06, "loss": 0.4769957959651947, "step": 2761 }, { "epoch": 0.6367723342939481, "grad_norm": 1.202865986079401, "learning_rate": 1.6317561967759473e-06, "loss": 0.5173189640045166, "step": 2762 }, { "epoch": 0.6370028818443804, "grad_norm": 1.3233830027378666, "learning_rate": 1.6314606814136311e-06, "loss": 0.48731061816215515, "step": 2763 }, { "epoch": 0.6372334293948126, "grad_norm": 1.3622928770440887, "learning_rate": 1.6311650743066468e-06, "loss": 0.40830880403518677, "step": 2764 }, { "epoch": 0.6374639769452449, "grad_norm": 1.3508081076594225, "learning_rate": 1.630869375497943e-06, "loss": 0.5563752055168152, "step": 2765 }, { "epoch": 0.6376945244956772, "grad_norm": 1.1200426971373956, "learning_rate": 1.6305735850304816e-06, "loss": 0.4951537847518921, "step": 2766 }, { "epoch": 0.6379250720461095, "grad_norm": 1.3503538208414971, "learning_rate": 1.630277702947238e-06, "loss": 0.5482779741287231, "step": 2767 }, { "epoch": 0.6381556195965418, "grad_norm": 1.3487057607161679, "learning_rate": 1.629981729291201e-06, "loss": 0.5211485624313354, "step": 2768 }, { "epoch": 0.638386167146974, "grad_norm": 1.459955596146605, "learning_rate": 1.6296856641053723e-06, "loss": 0.5225323438644409, "step": 2769 }, { "epoch": 0.6386167146974063, "grad_norm": 1.2949707858976387, "learning_rate": 1.629389507432767e-06, "loss": 0.4153757393360138, "step": 2770 }, { "epoch": 0.6388472622478386, "grad_norm": 1.2904066759459198, "learning_rate": 1.6290932593164138e-06, "loss": 0.47255784273147583, "step": 2771 }, { "epoch": 0.6390778097982709, "grad_norm": 1.4584165547508758, "learning_rate": 1.6287969197993542e-06, "loss": 0.5152851939201355, "step": 2772 }, { "epoch": 0.6393083573487032, "grad_norm": 1.451234860527591, "learning_rate": 1.6285004889246436e-06, "loss": 0.48233699798583984, "step": 2773 }, { "epoch": 0.6395389048991355, "grad_norm": 1.3457498152659688, "learning_rate": 1.62820396673535e-06, "loss": 0.4834440350532532, "step": 2774 }, { "epoch": 0.6397694524495677, "grad_norm": 1.3644783916718317, "learning_rate": 1.627907353274555e-06, "loss": 0.5308742523193359, "step": 2775 }, { "epoch": 0.64, "grad_norm": 1.231882052164345, "learning_rate": 1.6276106485853537e-06, "loss": 0.40322256088256836, "step": 2776 }, { "epoch": 0.6402305475504323, "grad_norm": 1.2547531033607235, "learning_rate": 1.6273138527108541e-06, "loss": 0.4684373140335083, "step": 2777 }, { "epoch": 0.6404610951008646, "grad_norm": 1.1482277712335578, "learning_rate": 1.6270169656941772e-06, "loss": 0.46157366037368774, "step": 2778 }, { "epoch": 0.6406916426512969, "grad_norm": 1.2771654472074374, "learning_rate": 1.6267199875784585e-06, "loss": 0.497269868850708, "step": 2779 }, { "epoch": 0.6409221902017291, "grad_norm": 1.2070868658726128, "learning_rate": 1.6264229184068447e-06, "loss": 0.4726135730743408, "step": 2780 }, { "epoch": 0.6411527377521614, "grad_norm": 1.126951114278981, "learning_rate": 1.6261257582224976e-06, "loss": 0.5554429292678833, "step": 2781 }, { "epoch": 0.6413832853025937, "grad_norm": 1.7654211496256054, "learning_rate": 1.6258285070685914e-06, "loss": 0.4444640278816223, "step": 2782 }, { "epoch": 0.641613832853026, "grad_norm": 1.230279574199785, "learning_rate": 1.6255311649883133e-06, "loss": 0.5426352024078369, "step": 2783 }, { "epoch": 0.6418443804034583, "grad_norm": 1.4455139026368424, "learning_rate": 1.6252337320248643e-06, "loss": 0.563956081867218, "step": 2784 }, { "epoch": 0.6420749279538905, "grad_norm": 1.3541204424273805, "learning_rate": 1.6249362082214584e-06, "loss": 0.5468907952308655, "step": 2785 }, { "epoch": 0.6423054755043228, "grad_norm": 1.212988322705407, "learning_rate": 1.6246385936213222e-06, "loss": 0.5481438636779785, "step": 2786 }, { "epoch": 0.642536023054755, "grad_norm": 1.4319478354990522, "learning_rate": 1.6243408882676962e-06, "loss": 0.6007488965988159, "step": 2787 }, { "epoch": 0.6427665706051873, "grad_norm": 1.2664699137320072, "learning_rate": 1.6240430922038345e-06, "loss": 0.4572671055793762, "step": 2788 }, { "epoch": 0.6429971181556196, "grad_norm": 1.4178410912876163, "learning_rate": 1.6237452054730029e-06, "loss": 0.4373534321784973, "step": 2789 }, { "epoch": 0.6432276657060518, "grad_norm": 1.2711378651618674, "learning_rate": 1.6234472281184821e-06, "loss": 0.5494809150695801, "step": 2790 }, { "epoch": 0.6434582132564841, "grad_norm": 1.296311074035635, "learning_rate": 1.6231491601835643e-06, "loss": 0.5352902412414551, "step": 2791 }, { "epoch": 0.6436887608069164, "grad_norm": 1.34469702291001, "learning_rate": 1.622851001711556e-06, "loss": 0.5362370014190674, "step": 2792 }, { "epoch": 0.6439193083573487, "grad_norm": 1.0955286846537269, "learning_rate": 1.6225527527457768e-06, "loss": 0.5180599689483643, "step": 2793 }, { "epoch": 0.644149855907781, "grad_norm": 1.227300788277134, "learning_rate": 1.6222544133295585e-06, "loss": 0.4420490860939026, "step": 2794 }, { "epoch": 0.6443804034582132, "grad_norm": 1.1282456913511916, "learning_rate": 1.6219559835062472e-06, "loss": 0.47733911871910095, "step": 2795 }, { "epoch": 0.6446109510086455, "grad_norm": 1.2972747931781763, "learning_rate": 1.6216574633192019e-06, "loss": 0.5379013419151306, "step": 2796 }, { "epoch": 0.6448414985590778, "grad_norm": 1.488376687235578, "learning_rate": 1.6213588528117941e-06, "loss": 0.48056793212890625, "step": 2797 }, { "epoch": 0.6450720461095101, "grad_norm": 1.314531005053451, "learning_rate": 1.6210601520274088e-06, "loss": 0.6051667332649231, "step": 2798 }, { "epoch": 0.6453025936599424, "grad_norm": 1.3389075396682297, "learning_rate": 1.620761361009444e-06, "loss": 0.5118743777275085, "step": 2799 }, { "epoch": 0.6455331412103746, "grad_norm": 1.1684233871088228, "learning_rate": 1.6204624798013113e-06, "loss": 0.5176658630371094, "step": 2800 }, { "epoch": 0.6457636887608069, "grad_norm": 1.1942250008445177, "learning_rate": 1.6201635084464346e-06, "loss": 0.4896622896194458, "step": 2801 }, { "epoch": 0.6459942363112392, "grad_norm": 1.203968558366863, "learning_rate": 1.619864446988252e-06, "loss": 0.5606796741485596, "step": 2802 }, { "epoch": 0.6462247838616715, "grad_norm": 1.1802584853211424, "learning_rate": 1.6195652954702129e-06, "loss": 0.4668291509151459, "step": 2803 }, { "epoch": 0.6464553314121038, "grad_norm": 1.419071418109651, "learning_rate": 1.619266053935782e-06, "loss": 0.6159840822219849, "step": 2804 }, { "epoch": 0.646685878962536, "grad_norm": 1.3809858964710453, "learning_rate": 1.6189667224284355e-06, "loss": 0.5556408166885376, "step": 2805 }, { "epoch": 0.6469164265129683, "grad_norm": 1.3502640253584333, "learning_rate": 1.6186673009916634e-06, "loss": 0.4046534299850464, "step": 2806 }, { "epoch": 0.6471469740634006, "grad_norm": 1.2457573044005343, "learning_rate": 1.618367789668968e-06, "loss": 0.5278058052062988, "step": 2807 }, { "epoch": 0.6473775216138329, "grad_norm": 1.5332124630047532, "learning_rate": 1.6180681885038656e-06, "loss": 0.4232120215892792, "step": 2808 }, { "epoch": 0.6476080691642652, "grad_norm": 1.0482270723606208, "learning_rate": 1.617768497539885e-06, "loss": 0.4960458278656006, "step": 2809 }, { "epoch": 0.6478386167146974, "grad_norm": 1.4950274929002954, "learning_rate": 1.6174687168205685e-06, "loss": 0.49290311336517334, "step": 2810 }, { "epoch": 0.6480691642651297, "grad_norm": 1.0907019420926827, "learning_rate": 1.6171688463894706e-06, "loss": 0.49928852915763855, "step": 2811 }, { "epoch": 0.648299711815562, "grad_norm": 1.1125971886307398, "learning_rate": 1.6168688862901597e-06, "loss": 0.5087406039237976, "step": 2812 }, { "epoch": 0.6485302593659943, "grad_norm": 1.1525453732948439, "learning_rate": 1.616568836566217e-06, "loss": 0.5653507113456726, "step": 2813 }, { "epoch": 0.6487608069164265, "grad_norm": 1.2308519470610553, "learning_rate": 1.6162686972612361e-06, "loss": 0.5233205556869507, "step": 2814 }, { "epoch": 0.6489913544668587, "grad_norm": 1.173802957785401, "learning_rate": 1.6159684684188242e-06, "loss": 0.4954048991203308, "step": 2815 }, { "epoch": 0.649221902017291, "grad_norm": 1.6344259227585325, "learning_rate": 1.6156681500826022e-06, "loss": 0.5647044777870178, "step": 2816 }, { "epoch": 0.6494524495677233, "grad_norm": 1.3909620583545594, "learning_rate": 1.6153677422962022e-06, "loss": 0.5182117223739624, "step": 2817 }, { "epoch": 0.6496829971181556, "grad_norm": 1.4001152309390272, "learning_rate": 1.615067245103271e-06, "loss": 0.47642621397972107, "step": 2818 }, { "epoch": 0.6499135446685879, "grad_norm": 1.2874703217432304, "learning_rate": 1.6147666585474672e-06, "loss": 0.4365708827972412, "step": 2819 }, { "epoch": 0.6501440922190201, "grad_norm": 1.2119512472522103, "learning_rate": 1.6144659826724635e-06, "loss": 0.46274715662002563, "step": 2820 }, { "epoch": 0.6503746397694524, "grad_norm": 1.1857173604007696, "learning_rate": 1.6141652175219447e-06, "loss": 0.3781696557998657, "step": 2821 }, { "epoch": 0.6506051873198847, "grad_norm": 1.1820429620454833, "learning_rate": 1.613864363139609e-06, "loss": 0.46696609258651733, "step": 2822 }, { "epoch": 0.650835734870317, "grad_norm": 1.3911928241305225, "learning_rate": 1.6135634195691668e-06, "loss": 0.49795544147491455, "step": 2823 }, { "epoch": 0.6510662824207493, "grad_norm": 1.3162254980705577, "learning_rate": 1.6132623868543424e-06, "loss": 0.43032360076904297, "step": 2824 }, { "epoch": 0.6512968299711815, "grad_norm": 1.4132619120351104, "learning_rate": 1.6129612650388734e-06, "loss": 0.4829779267311096, "step": 2825 }, { "epoch": 0.6515273775216138, "grad_norm": 1.4598202738444805, "learning_rate": 1.6126600541665089e-06, "loss": 0.4899333119392395, "step": 2826 }, { "epoch": 0.6517579250720461, "grad_norm": 1.458031941277911, "learning_rate": 1.6123587542810118e-06, "loss": 0.5031615495681763, "step": 2827 }, { "epoch": 0.6519884726224784, "grad_norm": 1.2733901426169307, "learning_rate": 1.6120573654261578e-06, "loss": 0.4357362985610962, "step": 2828 }, { "epoch": 0.6522190201729107, "grad_norm": 1.2220684621989164, "learning_rate": 1.611755887645736e-06, "loss": 0.5158397555351257, "step": 2829 }, { "epoch": 0.652449567723343, "grad_norm": 1.4578040902897245, "learning_rate": 1.6114543209835476e-06, "loss": 0.5768516063690186, "step": 2830 }, { "epoch": 0.6526801152737752, "grad_norm": 1.2943410970497278, "learning_rate": 1.611152665483407e-06, "loss": 0.5171727538108826, "step": 2831 }, { "epoch": 0.6529106628242075, "grad_norm": 1.359610619679167, "learning_rate": 1.6108509211891419e-06, "loss": 0.508929967880249, "step": 2832 }, { "epoch": 0.6531412103746398, "grad_norm": 1.2987632103127786, "learning_rate": 1.6105490881445926e-06, "loss": 0.49480926990509033, "step": 2833 }, { "epoch": 0.6533717579250721, "grad_norm": 1.4007906850413607, "learning_rate": 1.6102471663936125e-06, "loss": 0.5405118465423584, "step": 2834 }, { "epoch": 0.6536023054755044, "grad_norm": 1.4214264035517046, "learning_rate": 1.6099451559800671e-06, "loss": 0.4784564971923828, "step": 2835 }, { "epoch": 0.6538328530259366, "grad_norm": 1.2317082135715491, "learning_rate": 1.6096430569478355e-06, "loss": 0.4536136984825134, "step": 2836 }, { "epoch": 0.6540634005763689, "grad_norm": 1.2316225701677532, "learning_rate": 1.60934086934081e-06, "loss": 0.5488549470901489, "step": 2837 }, { "epoch": 0.6542939481268012, "grad_norm": 1.2426165303182712, "learning_rate": 1.6090385932028948e-06, "loss": 0.4609632194042206, "step": 2838 }, { "epoch": 0.6545244956772335, "grad_norm": 1.3762329881463276, "learning_rate": 1.608736228578008e-06, "loss": 0.523021936416626, "step": 2839 }, { "epoch": 0.6547550432276658, "grad_norm": 1.1802127861575282, "learning_rate": 1.6084337755100794e-06, "loss": 0.47583144903182983, "step": 2840 }, { "epoch": 0.654985590778098, "grad_norm": 1.2693471084083858, "learning_rate": 1.608131234043053e-06, "loss": 0.49806085228919983, "step": 2841 }, { "epoch": 0.6552161383285302, "grad_norm": 1.3784595470681247, "learning_rate": 1.6078286042208843e-06, "loss": 0.46162575483322144, "step": 2842 }, { "epoch": 0.6554466858789625, "grad_norm": 1.4479182061912272, "learning_rate": 1.6075258860875425e-06, "loss": 0.5477081537246704, "step": 2843 }, { "epoch": 0.6556772334293948, "grad_norm": 1.5130797283302535, "learning_rate": 1.6072230796870092e-06, "loss": 0.5085225105285645, "step": 2844 }, { "epoch": 0.655907780979827, "grad_norm": 1.2647298148528885, "learning_rate": 1.6069201850632798e-06, "loss": 0.5255313515663147, "step": 2845 }, { "epoch": 0.6561383285302593, "grad_norm": 1.413414772362052, "learning_rate": 1.6066172022603607e-06, "loss": 0.5655765533447266, "step": 2846 }, { "epoch": 0.6563688760806916, "grad_norm": 1.458278918754854, "learning_rate": 1.606314131322273e-06, "loss": 0.5228176116943359, "step": 2847 }, { "epoch": 0.6565994236311239, "grad_norm": 1.287040721080716, "learning_rate": 1.606010972293049e-06, "loss": 0.46039047837257385, "step": 2848 }, { "epoch": 0.6568299711815562, "grad_norm": 1.1829978641835537, "learning_rate": 1.6057077252167353e-06, "loss": 0.4958561062812805, "step": 2849 }, { "epoch": 0.6570605187319885, "grad_norm": 1.2609401742374116, "learning_rate": 1.60540439013739e-06, "loss": 0.4093541204929352, "step": 2850 }, { "epoch": 0.6572910662824207, "grad_norm": 1.3273683463000963, "learning_rate": 1.605100967099085e-06, "loss": 0.554291844367981, "step": 2851 }, { "epoch": 0.657521613832853, "grad_norm": 1.6129391874110828, "learning_rate": 1.6047974561459037e-06, "loss": 0.5679658055305481, "step": 2852 }, { "epoch": 0.6577521613832853, "grad_norm": 1.2433466350266238, "learning_rate": 1.6044938573219438e-06, "loss": 0.5162447094917297, "step": 2853 }, { "epoch": 0.6579827089337176, "grad_norm": 1.3792495026872826, "learning_rate": 1.6041901706713149e-06, "loss": 0.5094351172447205, "step": 2854 }, { "epoch": 0.6582132564841499, "grad_norm": 1.4358897989440063, "learning_rate": 1.6038863962381397e-06, "loss": 0.5806282758712769, "step": 2855 }, { "epoch": 0.6584438040345821, "grad_norm": 1.1262191223686615, "learning_rate": 1.6035825340665528e-06, "loss": 0.4678384065628052, "step": 2856 }, { "epoch": 0.6586743515850144, "grad_norm": 1.4869165644593196, "learning_rate": 1.6032785842007028e-06, "loss": 0.4143972396850586, "step": 2857 }, { "epoch": 0.6589048991354467, "grad_norm": 1.1375175731769085, "learning_rate": 1.6029745466847505e-06, "loss": 0.42894694209098816, "step": 2858 }, { "epoch": 0.659135446685879, "grad_norm": 1.3380971361263998, "learning_rate": 1.602670421562869e-06, "loss": 0.47713708877563477, "step": 2859 }, { "epoch": 0.6593659942363113, "grad_norm": 1.2246962047118364, "learning_rate": 1.6023662088792445e-06, "loss": 0.5422366261482239, "step": 2860 }, { "epoch": 0.6595965417867435, "grad_norm": 1.1327420643327901, "learning_rate": 1.6020619086780767e-06, "loss": 0.4384247660636902, "step": 2861 }, { "epoch": 0.6598270893371758, "grad_norm": 1.2661736589224049, "learning_rate": 1.6017575210035764e-06, "loss": 0.5371978878974915, "step": 2862 }, { "epoch": 0.6600576368876081, "grad_norm": 1.3390128666305818, "learning_rate": 1.601453045899968e-06, "loss": 0.6491122245788574, "step": 2863 }, { "epoch": 0.6602881844380404, "grad_norm": 1.1591928446006092, "learning_rate": 1.6011484834114893e-06, "loss": 0.4868931770324707, "step": 2864 }, { "epoch": 0.6605187319884727, "grad_norm": 1.3765898164637491, "learning_rate": 1.600843833582389e-06, "loss": 0.41603779792785645, "step": 2865 }, { "epoch": 0.6607492795389049, "grad_norm": 1.1922285846357408, "learning_rate": 1.6005390964569308e-06, "loss": 0.47878536581993103, "step": 2866 }, { "epoch": 0.6609798270893372, "grad_norm": 1.164643681870199, "learning_rate": 1.6002342720793888e-06, "loss": 0.4476701021194458, "step": 2867 }, { "epoch": 0.6612103746397695, "grad_norm": 1.2582254976557383, "learning_rate": 1.599929360494051e-06, "loss": 0.48599839210510254, "step": 2868 }, { "epoch": 0.6614409221902017, "grad_norm": 1.4832886173859434, "learning_rate": 1.5996243617452184e-06, "loss": 0.4305136203765869, "step": 2869 }, { "epoch": 0.661671469740634, "grad_norm": 1.3451524114419875, "learning_rate": 1.5993192758772036e-06, "loss": 0.5310144424438477, "step": 2870 }, { "epoch": 0.6619020172910662, "grad_norm": 1.3551281861732154, "learning_rate": 1.599014102934333e-06, "loss": 0.5530884265899658, "step": 2871 }, { "epoch": 0.6621325648414985, "grad_norm": 1.5490376838312165, "learning_rate": 1.5987088429609442e-06, "loss": 0.5356169939041138, "step": 2872 }, { "epoch": 0.6623631123919308, "grad_norm": 1.1305826916001416, "learning_rate": 1.5984034960013887e-06, "loss": 0.469211220741272, "step": 2873 }, { "epoch": 0.6625936599423631, "grad_norm": 1.2389323700358805, "learning_rate": 1.5980980621000305e-06, "loss": 0.4920649528503418, "step": 2874 }, { "epoch": 0.6628242074927954, "grad_norm": 1.361901936332142, "learning_rate": 1.5977925413012461e-06, "loss": 0.5592747330665588, "step": 2875 }, { "epoch": 0.6630547550432276, "grad_norm": 1.0963319699906886, "learning_rate": 1.5974869336494239e-06, "loss": 0.4942903220653534, "step": 2876 }, { "epoch": 0.6632853025936599, "grad_norm": 1.2490208016607074, "learning_rate": 1.5971812391889654e-06, "loss": 0.5296880006790161, "step": 2877 }, { "epoch": 0.6635158501440922, "grad_norm": 1.4126429356832046, "learning_rate": 1.5968754579642855e-06, "loss": 0.5113412737846375, "step": 2878 }, { "epoch": 0.6637463976945245, "grad_norm": 1.221750609950105, "learning_rate": 1.596569590019811e-06, "loss": 0.5092888474464417, "step": 2879 }, { "epoch": 0.6639769452449568, "grad_norm": 1.1209556874218505, "learning_rate": 1.5962636353999806e-06, "loss": 0.4685785472393036, "step": 2880 }, { "epoch": 0.664207492795389, "grad_norm": 1.1860834020143705, "learning_rate": 1.595957594149247e-06, "loss": 0.5484071969985962, "step": 2881 }, { "epoch": 0.6644380403458213, "grad_norm": 1.3098494571724633, "learning_rate": 1.5956514663120748e-06, "loss": 0.5027823448181152, "step": 2882 }, { "epoch": 0.6646685878962536, "grad_norm": 1.2663519411792166, "learning_rate": 1.5953452519329407e-06, "loss": 0.509772539138794, "step": 2883 }, { "epoch": 0.6648991354466859, "grad_norm": 1.1995137267951683, "learning_rate": 1.5950389510563346e-06, "loss": 0.5210834741592407, "step": 2884 }, { "epoch": 0.6651296829971182, "grad_norm": 1.3471404101864493, "learning_rate": 1.5947325637267594e-06, "loss": 0.5124255418777466, "step": 2885 }, { "epoch": 0.6653602305475504, "grad_norm": 1.3751318102582792, "learning_rate": 1.594426089988729e-06, "loss": 0.5071109533309937, "step": 2886 }, { "epoch": 0.6655907780979827, "grad_norm": 1.2999435524473473, "learning_rate": 1.5941195298867723e-06, "loss": 0.468585342168808, "step": 2887 }, { "epoch": 0.665821325648415, "grad_norm": 1.1643123580379142, "learning_rate": 1.5938128834654276e-06, "loss": 0.47526606917381287, "step": 2888 }, { "epoch": 0.6660518731988473, "grad_norm": 0.9943492283337346, "learning_rate": 1.5935061507692486e-06, "loss": 0.49675557017326355, "step": 2889 }, { "epoch": 0.6662824207492796, "grad_norm": 1.1709047337470004, "learning_rate": 1.5931993318428002e-06, "loss": 0.4787757396697998, "step": 2890 }, { "epoch": 0.6665129682997118, "grad_norm": 1.3981208786435964, "learning_rate": 1.5928924267306594e-06, "loss": 0.5428439974784851, "step": 2891 }, { "epoch": 0.6667435158501441, "grad_norm": 1.3700152135370784, "learning_rate": 1.5925854354774168e-06, "loss": 0.47622811794281006, "step": 2892 }, { "epoch": 0.6669740634005764, "grad_norm": 1.2018111138110872, "learning_rate": 1.592278358127675e-06, "loss": 0.5330455899238586, "step": 2893 }, { "epoch": 0.6672046109510087, "grad_norm": 1.2085315359118407, "learning_rate": 1.5919711947260492e-06, "loss": 0.5267072916030884, "step": 2894 }, { "epoch": 0.667435158501441, "grad_norm": 1.403057971845759, "learning_rate": 1.5916639453171672e-06, "loss": 0.5285246968269348, "step": 2895 }, { "epoch": 0.6676657060518733, "grad_norm": 1.2487314723582965, "learning_rate": 1.5913566099456686e-06, "loss": 0.41800087690353394, "step": 2896 }, { "epoch": 0.6678962536023054, "grad_norm": 1.2515913608592821, "learning_rate": 1.5910491886562062e-06, "loss": 0.5241307616233826, "step": 2897 }, { "epoch": 0.6681268011527377, "grad_norm": 1.446448879574996, "learning_rate": 1.5907416814934456e-06, "loss": 0.4888567328453064, "step": 2898 }, { "epoch": 0.66835734870317, "grad_norm": 1.07101179258961, "learning_rate": 1.590434088502064e-06, "loss": 0.3744029104709625, "step": 2899 }, { "epoch": 0.6685878962536023, "grad_norm": 1.2925871141386034, "learning_rate": 1.5901264097267513e-06, "loss": 0.6257427334785461, "step": 2900 }, { "epoch": 0.6688184438040345, "grad_norm": 1.2413020944843032, "learning_rate": 1.58981864521221e-06, "loss": 0.4519440233707428, "step": 2901 }, { "epoch": 0.6690489913544668, "grad_norm": 1.2299997137786967, "learning_rate": 1.589510795003156e-06, "loss": 0.3806610107421875, "step": 2902 }, { "epoch": 0.6692795389048991, "grad_norm": 1.291805154904489, "learning_rate": 1.5892028591443154e-06, "loss": 0.5493309497833252, "step": 2903 }, { "epoch": 0.6695100864553314, "grad_norm": 1.2160139328707327, "learning_rate": 1.5888948376804291e-06, "loss": 0.5231510400772095, "step": 2904 }, { "epoch": 0.6697406340057637, "grad_norm": 1.653045730376608, "learning_rate": 1.5885867306562488e-06, "loss": 0.47482627630233765, "step": 2905 }, { "epoch": 0.669971181556196, "grad_norm": 1.2801554509870998, "learning_rate": 1.5882785381165396e-06, "loss": 0.5615172386169434, "step": 2906 }, { "epoch": 0.6702017291066282, "grad_norm": 1.4220510811597196, "learning_rate": 1.5879702601060788e-06, "loss": 0.610340416431427, "step": 2907 }, { "epoch": 0.6704322766570605, "grad_norm": 1.3550154221756145, "learning_rate": 1.5876618966696553e-06, "loss": 0.5085784196853638, "step": 2908 }, { "epoch": 0.6706628242074928, "grad_norm": 1.1827008952943212, "learning_rate": 1.5873534478520716e-06, "loss": 0.4580487906932831, "step": 2909 }, { "epoch": 0.6708933717579251, "grad_norm": 1.5359854157211643, "learning_rate": 1.5870449136981425e-06, "loss": 0.5045263767242432, "step": 2910 }, { "epoch": 0.6711239193083574, "grad_norm": 1.284237656199812, "learning_rate": 1.5867362942526938e-06, "loss": 0.5317284464836121, "step": 2911 }, { "epoch": 0.6713544668587896, "grad_norm": 1.4099609003610267, "learning_rate": 1.5864275895605658e-06, "loss": 0.430474191904068, "step": 2912 }, { "epoch": 0.6715850144092219, "grad_norm": 1.092793379229376, "learning_rate": 1.5861187996666088e-06, "loss": 0.4927142262458801, "step": 2913 }, { "epoch": 0.6718155619596542, "grad_norm": 1.475708068624904, "learning_rate": 1.585809924615688e-06, "loss": 0.5057426691055298, "step": 2914 }, { "epoch": 0.6720461095100865, "grad_norm": 1.1048383600570364, "learning_rate": 1.5855009644526792e-06, "loss": 0.37006527185440063, "step": 2915 }, { "epoch": 0.6722766570605188, "grad_norm": 1.226294106349504, "learning_rate": 1.585191919222471e-06, "loss": 0.48783624172210693, "step": 2916 }, { "epoch": 0.672507204610951, "grad_norm": 1.4872973610774505, "learning_rate": 1.5848827889699646e-06, "loss": 0.49577081203460693, "step": 2917 }, { "epoch": 0.6727377521613833, "grad_norm": 1.3646021511618893, "learning_rate": 1.5845735737400732e-06, "loss": 0.5596980452537537, "step": 2918 }, { "epoch": 0.6729682997118156, "grad_norm": 1.5361721176693865, "learning_rate": 1.5842642735777228e-06, "loss": 0.3920667767524719, "step": 2919 }, { "epoch": 0.6731988472622479, "grad_norm": 1.4567898143159288, "learning_rate": 1.5839548885278515e-06, "loss": 0.6292526721954346, "step": 2920 }, { "epoch": 0.6734293948126802, "grad_norm": 1.2680110772754039, "learning_rate": 1.5836454186354098e-06, "loss": 0.4730093479156494, "step": 2921 }, { "epoch": 0.6736599423631124, "grad_norm": 1.2495291848198702, "learning_rate": 1.58333586394536e-06, "loss": 0.48224174976348877, "step": 2922 }, { "epoch": 0.6738904899135447, "grad_norm": 1.325720545623316, "learning_rate": 1.5830262245026778e-06, "loss": 0.5028641223907471, "step": 2923 }, { "epoch": 0.6741210374639769, "grad_norm": 1.3175381512862607, "learning_rate": 1.58271650035235e-06, "loss": 0.5031313300132751, "step": 2924 }, { "epoch": 0.6743515850144092, "grad_norm": 1.5161079858248256, "learning_rate": 1.582406691539377e-06, "loss": 0.584574818611145, "step": 2925 }, { "epoch": 0.6745821325648415, "grad_norm": 1.2461318316468335, "learning_rate": 1.5820967981087701e-06, "loss": 0.46639564633369446, "step": 2926 }, { "epoch": 0.6748126801152737, "grad_norm": 1.4880257215555335, "learning_rate": 1.5817868201055537e-06, "loss": 0.45523303747177124, "step": 2927 }, { "epoch": 0.675043227665706, "grad_norm": 1.3102261377548885, "learning_rate": 1.5814767575747647e-06, "loss": 0.48584091663360596, "step": 2928 }, { "epoch": 0.6752737752161383, "grad_norm": 1.3040040020796448, "learning_rate": 1.5811666105614518e-06, "loss": 0.43126803636550903, "step": 2929 }, { "epoch": 0.6755043227665706, "grad_norm": 1.25431891535012, "learning_rate": 1.5808563791106761e-06, "loss": 0.4430522322654724, "step": 2930 }, { "epoch": 0.6757348703170029, "grad_norm": 1.1852464321668217, "learning_rate": 1.580546063267511e-06, "loss": 0.5478621125221252, "step": 2931 }, { "epoch": 0.6759654178674351, "grad_norm": 1.155223214055886, "learning_rate": 1.5802356630770425e-06, "loss": 0.4092339873313904, "step": 2932 }, { "epoch": 0.6761959654178674, "grad_norm": 1.1811158393108148, "learning_rate": 1.5799251785843682e-06, "loss": 0.4377528727054596, "step": 2933 }, { "epoch": 0.6764265129682997, "grad_norm": 1.3770671734713864, "learning_rate": 1.579614609834598e-06, "loss": 0.5447462797164917, "step": 2934 }, { "epoch": 0.676657060518732, "grad_norm": 1.58389651828821, "learning_rate": 1.5793039568728546e-06, "loss": 0.5153322219848633, "step": 2935 }, { "epoch": 0.6768876080691643, "grad_norm": 1.3626759798711972, "learning_rate": 1.578993219744273e-06, "loss": 0.5152921676635742, "step": 2936 }, { "epoch": 0.6771181556195965, "grad_norm": 1.4530601678970734, "learning_rate": 1.5786823984939995e-06, "loss": 0.451922208070755, "step": 2937 }, { "epoch": 0.6773487031700288, "grad_norm": 1.1810123065006097, "learning_rate": 1.5783714931671933e-06, "loss": 0.4027182459831238, "step": 2938 }, { "epoch": 0.6775792507204611, "grad_norm": 1.2545756749048065, "learning_rate": 1.578060503809026e-06, "loss": 0.5972239971160889, "step": 2939 }, { "epoch": 0.6778097982708934, "grad_norm": 1.3351852794948187, "learning_rate": 1.5777494304646807e-06, "loss": 0.4376659691333771, "step": 2940 }, { "epoch": 0.6780403458213257, "grad_norm": 1.6381068625585637, "learning_rate": 1.5774382731793537e-06, "loss": 0.5066968202590942, "step": 2941 }, { "epoch": 0.6782708933717579, "grad_norm": 1.233959474935461, "learning_rate": 1.5771270319982523e-06, "loss": 0.47554099559783936, "step": 2942 }, { "epoch": 0.6785014409221902, "grad_norm": 1.3592025231702105, "learning_rate": 1.576815706966597e-06, "loss": 0.4573550522327423, "step": 2943 }, { "epoch": 0.6787319884726225, "grad_norm": 1.2899141824371814, "learning_rate": 1.57650429812962e-06, "loss": 0.498090922832489, "step": 2944 }, { "epoch": 0.6789625360230548, "grad_norm": 1.439288051511995, "learning_rate": 1.5761928055325657e-06, "loss": 0.3874198794364929, "step": 2945 }, { "epoch": 0.6791930835734871, "grad_norm": 1.3892308861229652, "learning_rate": 1.5758812292206906e-06, "loss": 0.5814070701599121, "step": 2946 }, { "epoch": 0.6794236311239193, "grad_norm": 1.1114806158765154, "learning_rate": 1.5755695692392642e-06, "loss": 0.4853003919124603, "step": 2947 }, { "epoch": 0.6796541786743516, "grad_norm": 1.4749591273474632, "learning_rate": 1.5752578256335664e-06, "loss": 0.5008170008659363, "step": 2948 }, { "epoch": 0.6798847262247839, "grad_norm": 1.2399062484261723, "learning_rate": 1.5749459984488912e-06, "loss": 0.5139249563217163, "step": 2949 }, { "epoch": 0.6801152737752162, "grad_norm": 1.3293402007434927, "learning_rate": 1.574634087730543e-06, "loss": 0.5634652376174927, "step": 2950 }, { "epoch": 0.6803458213256485, "grad_norm": 1.500163790638001, "learning_rate": 1.57432209352384e-06, "loss": 0.5859615802764893, "step": 2951 }, { "epoch": 0.6805763688760806, "grad_norm": 1.5412120263580897, "learning_rate": 1.5740100158741112e-06, "loss": 0.5175807476043701, "step": 2952 }, { "epoch": 0.6808069164265129, "grad_norm": 1.3090445672867828, "learning_rate": 1.5736978548266988e-06, "loss": 0.4430953860282898, "step": 2953 }, { "epoch": 0.6810374639769452, "grad_norm": 1.2865088532644573, "learning_rate": 1.5733856104269556e-06, "loss": 0.4908146262168884, "step": 2954 }, { "epoch": 0.6812680115273775, "grad_norm": 1.810302965226072, "learning_rate": 1.5730732827202483e-06, "loss": 0.5244361162185669, "step": 2955 }, { "epoch": 0.6814985590778098, "grad_norm": 1.3563775580730366, "learning_rate": 1.5727608717519544e-06, "loss": 0.4997614026069641, "step": 2956 }, { "epoch": 0.681729106628242, "grad_norm": 1.331712876030176, "learning_rate": 1.5724483775674644e-06, "loss": 0.4843396544456482, "step": 2957 }, { "epoch": 0.6819596541786743, "grad_norm": 1.2508340904890844, "learning_rate": 1.5721358002121799e-06, "loss": 0.4678846001625061, "step": 2958 }, { "epoch": 0.6821902017291066, "grad_norm": 1.5103932364725328, "learning_rate": 1.5718231397315158e-06, "loss": 0.626168966293335, "step": 2959 }, { "epoch": 0.6824207492795389, "grad_norm": 1.2990874275627162, "learning_rate": 1.5715103961708977e-06, "loss": 0.43531563878059387, "step": 2960 }, { "epoch": 0.6826512968299712, "grad_norm": 1.2463918257107651, "learning_rate": 1.5711975695757648e-06, "loss": 0.510450005531311, "step": 2961 }, { "epoch": 0.6828818443804034, "grad_norm": 1.3271181664609262, "learning_rate": 1.5708846599915666e-06, "loss": 0.5074174404144287, "step": 2962 }, { "epoch": 0.6831123919308357, "grad_norm": 1.3176146899484875, "learning_rate": 1.5705716674637663e-06, "loss": 0.5755868554115295, "step": 2963 }, { "epoch": 0.683342939481268, "grad_norm": 1.3499337753621126, "learning_rate": 1.5702585920378384e-06, "loss": 0.5092858672142029, "step": 2964 }, { "epoch": 0.6835734870317003, "grad_norm": 1.1765613552448693, "learning_rate": 1.569945433759269e-06, "loss": 0.4780580997467041, "step": 2965 }, { "epoch": 0.6838040345821326, "grad_norm": 1.3312217595628797, "learning_rate": 1.5696321926735577e-06, "loss": 0.46418172121047974, "step": 2966 }, { "epoch": 0.6840345821325649, "grad_norm": 1.4402952016719341, "learning_rate": 1.569318868826214e-06, "loss": 0.49936625361442566, "step": 2967 }, { "epoch": 0.6842651296829971, "grad_norm": 1.4586611460279373, "learning_rate": 1.5690054622627613e-06, "loss": 0.3950899839401245, "step": 2968 }, { "epoch": 0.6844956772334294, "grad_norm": 1.6977188097354168, "learning_rate": 1.5686919730287342e-06, "loss": 0.662026047706604, "step": 2969 }, { "epoch": 0.6847262247838617, "grad_norm": 1.244533695524091, "learning_rate": 1.5683784011696797e-06, "loss": 0.5297297835350037, "step": 2970 }, { "epoch": 0.684956772334294, "grad_norm": 1.320167274856565, "learning_rate": 1.5680647467311555e-06, "loss": 0.5406937599182129, "step": 2971 }, { "epoch": 0.6851873198847263, "grad_norm": 1.2468519195560397, "learning_rate": 1.5677510097587339e-06, "loss": 0.5149558782577515, "step": 2972 }, { "epoch": 0.6854178674351585, "grad_norm": 1.6834143469534373, "learning_rate": 1.5674371902979964e-06, "loss": 0.5197988748550415, "step": 2973 }, { "epoch": 0.6856484149855908, "grad_norm": 1.350490097294227, "learning_rate": 1.567123288394538e-06, "loss": 0.4733935594558716, "step": 2974 }, { "epoch": 0.6858789625360231, "grad_norm": 1.4037154754911856, "learning_rate": 1.5668093040939657e-06, "loss": 0.49029862880706787, "step": 2975 }, { "epoch": 0.6861095100864554, "grad_norm": 1.4125916913384953, "learning_rate": 1.5664952374418977e-06, "loss": 0.509502112865448, "step": 2976 }, { "epoch": 0.6863400576368877, "grad_norm": 1.3378380848439302, "learning_rate": 1.5661810884839647e-06, "loss": 0.5216405391693115, "step": 2977 }, { "epoch": 0.6865706051873199, "grad_norm": 1.7407414962930694, "learning_rate": 1.5658668572658098e-06, "loss": 0.5375246405601501, "step": 2978 }, { "epoch": 0.6868011527377521, "grad_norm": 1.3958219888508585, "learning_rate": 1.5655525438330868e-06, "loss": 0.4444112777709961, "step": 2979 }, { "epoch": 0.6870317002881844, "grad_norm": 1.4595199598065813, "learning_rate": 1.5652381482314627e-06, "loss": 0.5068175792694092, "step": 2980 }, { "epoch": 0.6872622478386167, "grad_norm": 1.1364189534845608, "learning_rate": 1.5649236705066158e-06, "loss": 0.45731109380722046, "step": 2981 }, { "epoch": 0.687492795389049, "grad_norm": 1.4303465111962608, "learning_rate": 1.5646091107042364e-06, "loss": 0.5306158661842346, "step": 2982 }, { "epoch": 0.6877233429394812, "grad_norm": 1.433927269332233, "learning_rate": 1.5642944688700261e-06, "loss": 0.4398651719093323, "step": 2983 }, { "epoch": 0.6879538904899135, "grad_norm": 1.3400708801277281, "learning_rate": 1.5639797450497004e-06, "loss": 0.5990906953811646, "step": 2984 }, { "epoch": 0.6881844380403458, "grad_norm": 1.1626847488175958, "learning_rate": 1.5636649392889845e-06, "loss": 0.4668315052986145, "step": 2985 }, { "epoch": 0.6884149855907781, "grad_norm": 1.269211368058643, "learning_rate": 1.5633500516336168e-06, "loss": 0.4629107713699341, "step": 2986 }, { "epoch": 0.6886455331412104, "grad_norm": 1.3353647625587797, "learning_rate": 1.5630350821293465e-06, "loss": 0.5427249670028687, "step": 2987 }, { "epoch": 0.6888760806916426, "grad_norm": 1.5063565488918145, "learning_rate": 1.5627200308219361e-06, "loss": 0.5910916328430176, "step": 2988 }, { "epoch": 0.6891066282420749, "grad_norm": 1.21637133241614, "learning_rate": 1.5624048977571593e-06, "loss": 0.5487556457519531, "step": 2989 }, { "epoch": 0.6893371757925072, "grad_norm": 1.8277563810893047, "learning_rate": 1.5620896829808014e-06, "loss": 0.5941481590270996, "step": 2990 }, { "epoch": 0.6895677233429395, "grad_norm": 1.2657483365011457, "learning_rate": 1.5617743865386597e-06, "loss": 0.5026910305023193, "step": 2991 }, { "epoch": 0.6897982708933718, "grad_norm": 1.2728272105438154, "learning_rate": 1.5614590084765434e-06, "loss": 0.453794002532959, "step": 2992 }, { "epoch": 0.690028818443804, "grad_norm": 1.5162382136032657, "learning_rate": 1.5611435488402747e-06, "loss": 0.534416675567627, "step": 2993 }, { "epoch": 0.6902593659942363, "grad_norm": 1.3021552826846616, "learning_rate": 1.5608280076756852e-06, "loss": 0.5203788876533508, "step": 2994 }, { "epoch": 0.6904899135446686, "grad_norm": 1.2974339804272135, "learning_rate": 1.5605123850286207e-06, "loss": 0.5063557624816895, "step": 2995 }, { "epoch": 0.6907204610951009, "grad_norm": 1.0854825641740868, "learning_rate": 1.5601966809449372e-06, "loss": 0.4803149700164795, "step": 2996 }, { "epoch": 0.6909510086455332, "grad_norm": 1.4622322271463624, "learning_rate": 1.5598808954705039e-06, "loss": 0.5497993230819702, "step": 2997 }, { "epoch": 0.6911815561959654, "grad_norm": 1.3566519579664447, "learning_rate": 1.5595650286512005e-06, "loss": 0.44242218136787415, "step": 2998 }, { "epoch": 0.6914121037463977, "grad_norm": 1.4880341344096335, "learning_rate": 1.5592490805329197e-06, "loss": 0.4667905867099762, "step": 2999 }, { "epoch": 0.69164265129683, "grad_norm": 1.2275920951045758, "learning_rate": 1.5589330511615653e-06, "loss": 0.5017203092575073, "step": 3000 }, { "epoch": 0.6918731988472623, "grad_norm": 1.1629025443905772, "learning_rate": 1.5586169405830528e-06, "loss": 0.4988882541656494, "step": 3001 }, { "epoch": 0.6921037463976946, "grad_norm": 1.2059313879115356, "learning_rate": 1.5583007488433102e-06, "loss": 0.48794132471084595, "step": 3002 }, { "epoch": 0.6923342939481268, "grad_norm": 1.1972491085590309, "learning_rate": 1.5579844759882766e-06, "loss": 0.48638075590133667, "step": 3003 }, { "epoch": 0.6925648414985591, "grad_norm": 1.523401545887088, "learning_rate": 1.5576681220639035e-06, "loss": 0.5094320178031921, "step": 3004 }, { "epoch": 0.6927953890489914, "grad_norm": 1.2878330835344656, "learning_rate": 1.5573516871161529e-06, "loss": 0.5538557171821594, "step": 3005 }, { "epoch": 0.6930259365994237, "grad_norm": 1.4879801831920034, "learning_rate": 1.5570351711910007e-06, "loss": 0.37026524543762207, "step": 3006 }, { "epoch": 0.6932564841498559, "grad_norm": 1.118855469869377, "learning_rate": 1.5567185743344328e-06, "loss": 0.505375325679779, "step": 3007 }, { "epoch": 0.6934870317002881, "grad_norm": 1.3758356333811415, "learning_rate": 1.5564018965924474e-06, "loss": 0.5471951961517334, "step": 3008 }, { "epoch": 0.6937175792507204, "grad_norm": 1.3946032953876015, "learning_rate": 1.5560851380110547e-06, "loss": 0.5329568386077881, "step": 3009 }, { "epoch": 0.6939481268011527, "grad_norm": 1.2171394692811979, "learning_rate": 1.555768298636276e-06, "loss": 0.4184609651565552, "step": 3010 }, { "epoch": 0.694178674351585, "grad_norm": 1.299950019418537, "learning_rate": 1.5554513785141454e-06, "loss": 0.4469276964664459, "step": 3011 }, { "epoch": 0.6944092219020173, "grad_norm": 1.2624011455255777, "learning_rate": 1.5551343776907073e-06, "loss": 0.46018022298812866, "step": 3012 }, { "epoch": 0.6946397694524495, "grad_norm": 1.4446192207747934, "learning_rate": 1.5548172962120195e-06, "loss": 0.39802467823028564, "step": 3013 }, { "epoch": 0.6948703170028818, "grad_norm": 1.6421441785374291, "learning_rate": 1.5545001341241502e-06, "loss": 0.5130733847618103, "step": 3014 }, { "epoch": 0.6951008645533141, "grad_norm": 1.4874797046908075, "learning_rate": 1.5541828914731797e-06, "loss": 0.4477139711380005, "step": 3015 }, { "epoch": 0.6953314121037464, "grad_norm": 1.5908123652169408, "learning_rate": 1.5538655683051998e-06, "loss": 0.45926767587661743, "step": 3016 }, { "epoch": 0.6955619596541787, "grad_norm": 1.2608445606501002, "learning_rate": 1.553548164666315e-06, "loss": 0.45936119556427, "step": 3017 }, { "epoch": 0.6957925072046109, "grad_norm": 1.254131874779924, "learning_rate": 1.5532306806026401e-06, "loss": 0.42640042304992676, "step": 3018 }, { "epoch": 0.6960230547550432, "grad_norm": 1.359223674128618, "learning_rate": 1.5529131161603024e-06, "loss": 0.545343816280365, "step": 3019 }, { "epoch": 0.6962536023054755, "grad_norm": 1.499789251776708, "learning_rate": 1.5525954713854406e-06, "loss": 0.55390465259552, "step": 3020 }, { "epoch": 0.6964841498559078, "grad_norm": 1.4431952423899748, "learning_rate": 1.5522777463242056e-06, "loss": 0.5054730176925659, "step": 3021 }, { "epoch": 0.6967146974063401, "grad_norm": 1.3421071937183544, "learning_rate": 1.5519599410227593e-06, "loss": 0.5345525741577148, "step": 3022 }, { "epoch": 0.6969452449567723, "grad_norm": 1.3873484237085747, "learning_rate": 1.5516420555272758e-06, "loss": 0.5678459405899048, "step": 3023 }, { "epoch": 0.6971757925072046, "grad_norm": 1.5494993250847158, "learning_rate": 1.5513240898839398e-06, "loss": 0.6118779182434082, "step": 3024 }, { "epoch": 0.6974063400576369, "grad_norm": 1.4303865941013976, "learning_rate": 1.551006044138949e-06, "loss": 0.5693734884262085, "step": 3025 }, { "epoch": 0.6976368876080692, "grad_norm": 1.382814555248119, "learning_rate": 1.5506879183385117e-06, "loss": 0.47115445137023926, "step": 3026 }, { "epoch": 0.6978674351585015, "grad_norm": 1.4819808824760035, "learning_rate": 1.550369712528849e-06, "loss": 0.4977574944496155, "step": 3027 }, { "epoch": 0.6980979827089338, "grad_norm": 1.345366236747585, "learning_rate": 1.550051426756192e-06, "loss": 0.5112085938453674, "step": 3028 }, { "epoch": 0.698328530259366, "grad_norm": 1.1673490314186687, "learning_rate": 1.549733061066785e-06, "loss": 0.5258888602256775, "step": 3029 }, { "epoch": 0.6985590778097983, "grad_norm": 1.3170150520989703, "learning_rate": 1.5494146155068833e-06, "loss": 0.4910382926464081, "step": 3030 }, { "epoch": 0.6987896253602306, "grad_norm": 1.492317262307736, "learning_rate": 1.5490960901227532e-06, "loss": 0.538005530834198, "step": 3031 }, { "epoch": 0.6990201729106629, "grad_norm": 1.2798707818585195, "learning_rate": 1.5487774849606732e-06, "loss": 0.5382951498031616, "step": 3032 }, { "epoch": 0.6992507204610952, "grad_norm": 1.3563181533260928, "learning_rate": 1.5484588000669333e-06, "loss": 0.5431716442108154, "step": 3033 }, { "epoch": 0.6994812680115273, "grad_norm": 1.4730881654634107, "learning_rate": 1.5481400354878358e-06, "loss": 0.5745220184326172, "step": 3034 }, { "epoch": 0.6997118155619596, "grad_norm": 1.5039346648575265, "learning_rate": 1.5478211912696929e-06, "loss": 0.4814023971557617, "step": 3035 }, { "epoch": 0.6999423631123919, "grad_norm": 1.2452001063336693, "learning_rate": 1.5475022674588298e-06, "loss": 0.3987554907798767, "step": 3036 }, { "epoch": 0.7001729106628242, "grad_norm": 1.2273894324003525, "learning_rate": 1.5471832641015828e-06, "loss": 0.4156729578971863, "step": 3037 }, { "epoch": 0.7004034582132564, "grad_norm": 1.1620912406102892, "learning_rate": 1.5468641812442997e-06, "loss": 0.5085793733596802, "step": 3038 }, { "epoch": 0.7006340057636887, "grad_norm": 1.3012231858429548, "learning_rate": 1.5465450189333403e-06, "loss": 0.4714711904525757, "step": 3039 }, { "epoch": 0.700864553314121, "grad_norm": 1.194325392111808, "learning_rate": 1.5462257772150751e-06, "loss": 0.3911649286746979, "step": 3040 }, { "epoch": 0.7010951008645533, "grad_norm": 1.5023223698140733, "learning_rate": 1.5459064561358868e-06, "loss": 0.6202036142349243, "step": 3041 }, { "epoch": 0.7013256484149856, "grad_norm": 1.318488566568559, "learning_rate": 1.5455870557421692e-06, "loss": 0.4943694770336151, "step": 3042 }, { "epoch": 0.7015561959654179, "grad_norm": 1.4976388100892126, "learning_rate": 1.5452675760803281e-06, "loss": 0.5165269374847412, "step": 3043 }, { "epoch": 0.7017867435158501, "grad_norm": 1.2655871125899185, "learning_rate": 1.5449480171967804e-06, "loss": 0.4172588586807251, "step": 3044 }, { "epoch": 0.7020172910662824, "grad_norm": 1.2826628166511769, "learning_rate": 1.5446283791379551e-06, "loss": 0.5359724760055542, "step": 3045 }, { "epoch": 0.7022478386167147, "grad_norm": 1.4599282692486921, "learning_rate": 1.5443086619502917e-06, "loss": 0.4579007625579834, "step": 3046 }, { "epoch": 0.702478386167147, "grad_norm": 1.1736042374177846, "learning_rate": 1.5439888656802423e-06, "loss": 0.5238107442855835, "step": 3047 }, { "epoch": 0.7027089337175793, "grad_norm": 1.3607655039329225, "learning_rate": 1.5436689903742692e-06, "loss": 0.4772608280181885, "step": 3048 }, { "epoch": 0.7029394812680115, "grad_norm": 1.1308906076124552, "learning_rate": 1.5433490360788479e-06, "loss": 0.4986886978149414, "step": 3049 }, { "epoch": 0.7031700288184438, "grad_norm": 1.4921821574000111, "learning_rate": 1.543029002840464e-06, "loss": 0.505290150642395, "step": 3050 }, { "epoch": 0.7034005763688761, "grad_norm": 1.216509889918091, "learning_rate": 1.5427088907056148e-06, "loss": 0.44803524017333984, "step": 3051 }, { "epoch": 0.7036311239193084, "grad_norm": 1.3045688882387803, "learning_rate": 1.5423886997208096e-06, "loss": 0.5130533576011658, "step": 3052 }, { "epoch": 0.7038616714697407, "grad_norm": 1.2848001617196636, "learning_rate": 1.5420684299325686e-06, "loss": 0.5944375991821289, "step": 3053 }, { "epoch": 0.7040922190201729, "grad_norm": 1.4438153026517662, "learning_rate": 1.541748081387424e-06, "loss": 0.5240592956542969, "step": 3054 }, { "epoch": 0.7043227665706052, "grad_norm": 1.4566838697048774, "learning_rate": 1.5414276541319185e-06, "loss": 0.5878169536590576, "step": 3055 }, { "epoch": 0.7045533141210375, "grad_norm": 1.389504644244344, "learning_rate": 1.5411071482126074e-06, "loss": 0.5841420292854309, "step": 3056 }, { "epoch": 0.7047838616714698, "grad_norm": 1.4362185597913253, "learning_rate": 1.5407865636760566e-06, "loss": 0.4527501165866852, "step": 3057 }, { "epoch": 0.7050144092219021, "grad_norm": 1.38132910863877, "learning_rate": 1.5404659005688441e-06, "loss": 0.433325856924057, "step": 3058 }, { "epoch": 0.7052449567723343, "grad_norm": 1.3438668709731927, "learning_rate": 1.5401451589375585e-06, "loss": 0.5717126131057739, "step": 3059 }, { "epoch": 0.7054755043227666, "grad_norm": 1.300530022976824, "learning_rate": 1.5398243388288002e-06, "loss": 0.4420049786567688, "step": 3060 }, { "epoch": 0.7057060518731988, "grad_norm": 1.3883239483781253, "learning_rate": 1.5395034402891809e-06, "loss": 0.42093801498413086, "step": 3061 }, { "epoch": 0.7059365994236311, "grad_norm": 1.317256008379363, "learning_rate": 1.5391824633653245e-06, "loss": 0.4691120386123657, "step": 3062 }, { "epoch": 0.7061671469740634, "grad_norm": 1.571058237037257, "learning_rate": 1.538861408103865e-06, "loss": 0.5704994797706604, "step": 3063 }, { "epoch": 0.7063976945244956, "grad_norm": 1.6209585177680517, "learning_rate": 1.5385402745514485e-06, "loss": 0.5992046594619751, "step": 3064 }, { "epoch": 0.7066282420749279, "grad_norm": 1.6228865067081202, "learning_rate": 1.5382190627547327e-06, "loss": 0.47539931535720825, "step": 3065 }, { "epoch": 0.7068587896253602, "grad_norm": 1.269049568068291, "learning_rate": 1.5378977727603856e-06, "loss": 0.5070189237594604, "step": 3066 }, { "epoch": 0.7070893371757925, "grad_norm": 1.4075154012090485, "learning_rate": 1.537576404615088e-06, "loss": 0.5456782579421997, "step": 3067 }, { "epoch": 0.7073198847262248, "grad_norm": 1.245118043630662, "learning_rate": 1.5372549583655315e-06, "loss": 0.5040643811225891, "step": 3068 }, { "epoch": 0.707550432276657, "grad_norm": 1.2490813291253982, "learning_rate": 1.536933434058418e-06, "loss": 0.5937368869781494, "step": 3069 }, { "epoch": 0.7077809798270893, "grad_norm": 1.3837089052575293, "learning_rate": 1.536611831740462e-06, "loss": 0.5315635800361633, "step": 3070 }, { "epoch": 0.7080115273775216, "grad_norm": 1.489313806253537, "learning_rate": 1.5362901514583896e-06, "loss": 0.4116753339767456, "step": 3071 }, { "epoch": 0.7082420749279539, "grad_norm": 1.4466674401069384, "learning_rate": 1.535968393258937e-06, "loss": 0.5934009552001953, "step": 3072 }, { "epoch": 0.7084726224783862, "grad_norm": 1.625081313125215, "learning_rate": 1.535646557188852e-06, "loss": 0.5851935744285583, "step": 3073 }, { "epoch": 0.7087031700288184, "grad_norm": 1.0978038324799, "learning_rate": 1.5353246432948952e-06, "loss": 0.4650305211544037, "step": 3074 }, { "epoch": 0.7089337175792507, "grad_norm": 1.4075996932378603, "learning_rate": 1.535002651623836e-06, "loss": 0.4810240864753723, "step": 3075 }, { "epoch": 0.709164265129683, "grad_norm": 1.2812004885082364, "learning_rate": 1.534680582222457e-06, "loss": 0.5431346297264099, "step": 3076 }, { "epoch": 0.7093948126801153, "grad_norm": 1.3726019599129855, "learning_rate": 1.534358435137552e-06, "loss": 0.49898943305015564, "step": 3077 }, { "epoch": 0.7096253602305476, "grad_norm": 2.2531586386992926, "learning_rate": 1.534036210415925e-06, "loss": 0.5768055319786072, "step": 3078 }, { "epoch": 0.7098559077809798, "grad_norm": 1.526605019564366, "learning_rate": 1.5337139081043922e-06, "loss": 0.5711077451705933, "step": 3079 }, { "epoch": 0.7100864553314121, "grad_norm": 1.3112089962432258, "learning_rate": 1.5333915282497805e-06, "loss": 0.46261918544769287, "step": 3080 }, { "epoch": 0.7103170028818444, "grad_norm": 1.2588542955672957, "learning_rate": 1.5330690708989286e-06, "loss": 0.4627862572669983, "step": 3081 }, { "epoch": 0.7105475504322767, "grad_norm": 1.3465412006949553, "learning_rate": 1.5327465360986857e-06, "loss": 0.5247993469238281, "step": 3082 }, { "epoch": 0.710778097982709, "grad_norm": 1.2562229226560728, "learning_rate": 1.5324239238959135e-06, "loss": 0.5141404867172241, "step": 3083 }, { "epoch": 0.7110086455331412, "grad_norm": 1.2193016691106844, "learning_rate": 1.5321012343374843e-06, "loss": 0.41849881410598755, "step": 3084 }, { "epoch": 0.7112391930835735, "grad_norm": 1.176720568433765, "learning_rate": 1.5317784674702805e-06, "loss": 0.5312439203262329, "step": 3085 }, { "epoch": 0.7114697406340058, "grad_norm": 1.448994861920568, "learning_rate": 1.5314556233411975e-06, "loss": 0.5194967985153198, "step": 3086 }, { "epoch": 0.7117002881844381, "grad_norm": 1.2531304236512153, "learning_rate": 1.5311327019971411e-06, "loss": 0.5438544750213623, "step": 3087 }, { "epoch": 0.7119308357348704, "grad_norm": 1.283053771121221, "learning_rate": 1.5308097034850284e-06, "loss": 0.5709701776504517, "step": 3088 }, { "epoch": 0.7121613832853025, "grad_norm": 1.1723690120388703, "learning_rate": 1.5304866278517876e-06, "loss": 0.46026527881622314, "step": 3089 }, { "epoch": 0.7123919308357348, "grad_norm": 1.3347805311464678, "learning_rate": 1.5301634751443585e-06, "loss": 0.48327285051345825, "step": 3090 }, { "epoch": 0.7126224783861671, "grad_norm": 1.278497590029932, "learning_rate": 1.5298402454096913e-06, "loss": 0.499522864818573, "step": 3091 }, { "epoch": 0.7128530259365994, "grad_norm": 1.2877514654924413, "learning_rate": 1.5295169386947487e-06, "loss": 0.490348756313324, "step": 3092 }, { "epoch": 0.7130835734870317, "grad_norm": 1.4853489603741208, "learning_rate": 1.5291935550465029e-06, "loss": 0.5159205794334412, "step": 3093 }, { "epoch": 0.7133141210374639, "grad_norm": 1.5618066997282813, "learning_rate": 1.528870094511939e-06, "loss": 0.6093416810035706, "step": 3094 }, { "epoch": 0.7135446685878962, "grad_norm": 1.3896221479742665, "learning_rate": 1.5285465571380518e-06, "loss": 0.5157930254936218, "step": 3095 }, { "epoch": 0.7137752161383285, "grad_norm": 1.594771684664948, "learning_rate": 1.5282229429718484e-06, "loss": 0.5248245596885681, "step": 3096 }, { "epoch": 0.7140057636887608, "grad_norm": 1.147007768121321, "learning_rate": 1.5278992520603465e-06, "loss": 0.5186586976051331, "step": 3097 }, { "epoch": 0.7142363112391931, "grad_norm": 1.280751872248316, "learning_rate": 1.5275754844505743e-06, "loss": 0.47629937529563904, "step": 3098 }, { "epoch": 0.7144668587896253, "grad_norm": 1.200709497044733, "learning_rate": 1.527251640189573e-06, "loss": 0.47172415256500244, "step": 3099 }, { "epoch": 0.7146974063400576, "grad_norm": 1.3100833521000486, "learning_rate": 1.5269277193243935e-06, "loss": 0.42949551343917847, "step": 3100 }, { "epoch": 0.7149279538904899, "grad_norm": 1.1770401760493896, "learning_rate": 1.5266037219020975e-06, "loss": 0.4720640778541565, "step": 3101 }, { "epoch": 0.7151585014409222, "grad_norm": 1.4867492108041584, "learning_rate": 1.5262796479697588e-06, "loss": 0.6004112958908081, "step": 3102 }, { "epoch": 0.7153890489913545, "grad_norm": 1.2325085653784102, "learning_rate": 1.525955497574462e-06, "loss": 0.5297414064407349, "step": 3103 }, { "epoch": 0.7156195965417868, "grad_norm": 1.1170096797212132, "learning_rate": 1.5256312707633033e-06, "loss": 0.4032800495624542, "step": 3104 }, { "epoch": 0.715850144092219, "grad_norm": 1.3276364077152654, "learning_rate": 1.5253069675833884e-06, "loss": 0.5147067904472351, "step": 3105 }, { "epoch": 0.7160806916426513, "grad_norm": 1.24754995211441, "learning_rate": 1.524982588081836e-06, "loss": 0.5029354691505432, "step": 3106 }, { "epoch": 0.7163112391930836, "grad_norm": 1.193419187811573, "learning_rate": 1.524658132305775e-06, "loss": 0.482473760843277, "step": 3107 }, { "epoch": 0.7165417867435159, "grad_norm": 1.5679177312787276, "learning_rate": 1.524333600302345e-06, "loss": 0.5003863573074341, "step": 3108 }, { "epoch": 0.7167723342939482, "grad_norm": 1.241966240767067, "learning_rate": 1.5240089921186976e-06, "loss": 0.5416104197502136, "step": 3109 }, { "epoch": 0.7170028818443804, "grad_norm": 1.3873843990860806, "learning_rate": 1.5236843078019948e-06, "loss": 0.5003090500831604, "step": 3110 }, { "epoch": 0.7172334293948127, "grad_norm": 1.3456709258527728, "learning_rate": 1.52335954739941e-06, "loss": 0.507997989654541, "step": 3111 }, { "epoch": 0.717463976945245, "grad_norm": 1.3569169943499046, "learning_rate": 1.523034710958127e-06, "loss": 0.5409040451049805, "step": 3112 }, { "epoch": 0.7176945244956773, "grad_norm": 1.1752130871727025, "learning_rate": 1.5227097985253421e-06, "loss": 0.46137601137161255, "step": 3113 }, { "epoch": 0.7179250720461096, "grad_norm": 1.4940949255724785, "learning_rate": 1.5223848101482604e-06, "loss": 0.52419114112854, "step": 3114 }, { "epoch": 0.7181556195965418, "grad_norm": 1.2026467071943991, "learning_rate": 1.5220597458741003e-06, "loss": 0.44112980365753174, "step": 3115 }, { "epoch": 0.718386167146974, "grad_norm": 1.1526512249043757, "learning_rate": 1.5217346057500905e-06, "loss": 0.4375038743019104, "step": 3116 }, { "epoch": 0.7186167146974063, "grad_norm": 1.3224156894029113, "learning_rate": 1.5214093898234691e-06, "loss": 0.5393679141998291, "step": 3117 }, { "epoch": 0.7188472622478386, "grad_norm": 1.3081629631325682, "learning_rate": 1.5210840981414878e-06, "loss": 0.5080707669258118, "step": 3118 }, { "epoch": 0.7190778097982709, "grad_norm": 1.3655587043810777, "learning_rate": 1.5207587307514083e-06, "loss": 0.46670597791671753, "step": 3119 }, { "epoch": 0.7193083573487031, "grad_norm": 1.4194299174496814, "learning_rate": 1.5204332877005022e-06, "loss": 0.49168825149536133, "step": 3120 }, { "epoch": 0.7195389048991354, "grad_norm": 1.0310387909664662, "learning_rate": 1.5201077690360533e-06, "loss": 0.4821666479110718, "step": 3121 }, { "epoch": 0.7197694524495677, "grad_norm": 1.2633547986191462, "learning_rate": 1.5197821748053561e-06, "loss": 0.5115829706192017, "step": 3122 }, { "epoch": 0.72, "grad_norm": 1.4998871193086376, "learning_rate": 1.5194565050557162e-06, "loss": 0.5272057056427002, "step": 3123 }, { "epoch": 0.7202305475504323, "grad_norm": 1.346831550802848, "learning_rate": 1.5191307598344495e-06, "loss": 0.6039499640464783, "step": 3124 }, { "epoch": 0.7204610951008645, "grad_norm": 1.4453840852241573, "learning_rate": 1.5188049391888847e-06, "loss": 0.526665985584259, "step": 3125 }, { "epoch": 0.7206916426512968, "grad_norm": 1.3777089134097682, "learning_rate": 1.5184790431663584e-06, "loss": 0.510013997554779, "step": 3126 }, { "epoch": 0.7209221902017291, "grad_norm": 1.3429612403841265, "learning_rate": 1.518153071814221e-06, "loss": 0.4767458438873291, "step": 3127 }, { "epoch": 0.7211527377521614, "grad_norm": 1.4484846708597472, "learning_rate": 1.517827025179833e-06, "loss": 0.5258289575576782, "step": 3128 }, { "epoch": 0.7213832853025937, "grad_norm": 1.3093467599223096, "learning_rate": 1.5175009033105645e-06, "loss": 0.512428343296051, "step": 3129 }, { "epoch": 0.7216138328530259, "grad_norm": 1.3318335614334949, "learning_rate": 1.5171747062537984e-06, "loss": 0.45008307695388794, "step": 3130 }, { "epoch": 0.7218443804034582, "grad_norm": 1.1846718745624494, "learning_rate": 1.5168484340569274e-06, "loss": 0.4721248745918274, "step": 3131 }, { "epoch": 0.7220749279538905, "grad_norm": 1.369284305549072, "learning_rate": 1.5165220867673556e-06, "loss": 0.5113618969917297, "step": 3132 }, { "epoch": 0.7223054755043228, "grad_norm": 1.3733683308454623, "learning_rate": 1.5161956644324981e-06, "loss": 0.5095973014831543, "step": 3133 }, { "epoch": 0.7225360230547551, "grad_norm": 1.4404559032274478, "learning_rate": 1.5158691670997803e-06, "loss": 0.49031388759613037, "step": 3134 }, { "epoch": 0.7227665706051873, "grad_norm": 1.4631484742851009, "learning_rate": 1.515542594816639e-06, "loss": 0.48376762866973877, "step": 3135 }, { "epoch": 0.7229971181556196, "grad_norm": 1.4168472451107348, "learning_rate": 1.5152159476305216e-06, "loss": 0.5298563241958618, "step": 3136 }, { "epoch": 0.7232276657060519, "grad_norm": 1.1340078225918775, "learning_rate": 1.514889225588887e-06, "loss": 0.4832274913787842, "step": 3137 }, { "epoch": 0.7234582132564842, "grad_norm": 1.3157069004056066, "learning_rate": 1.5145624287392043e-06, "loss": 0.5212540626525879, "step": 3138 }, { "epoch": 0.7236887608069165, "grad_norm": 1.6243689909614896, "learning_rate": 1.514235557128953e-06, "loss": 0.506250262260437, "step": 3139 }, { "epoch": 0.7239193083573487, "grad_norm": 1.2552589816192705, "learning_rate": 1.5139086108056254e-06, "loss": 0.4448832869529724, "step": 3140 }, { "epoch": 0.724149855907781, "grad_norm": 1.3005846690358085, "learning_rate": 1.5135815898167223e-06, "loss": 0.46571147441864014, "step": 3141 }, { "epoch": 0.7243804034582133, "grad_norm": 1.148037591887767, "learning_rate": 1.5132544942097572e-06, "loss": 0.4139944911003113, "step": 3142 }, { "epoch": 0.7246109510086456, "grad_norm": 1.242975321866892, "learning_rate": 1.5129273240322529e-06, "loss": 0.46135827898979187, "step": 3143 }, { "epoch": 0.7248414985590778, "grad_norm": 1.514996810707073, "learning_rate": 1.5126000793317448e-06, "loss": 0.4942426085472107, "step": 3144 }, { "epoch": 0.72507204610951, "grad_norm": 1.2884620434427276, "learning_rate": 1.5122727601557774e-06, "loss": 0.48823419213294983, "step": 3145 }, { "epoch": 0.7253025936599423, "grad_norm": 1.2949364451776857, "learning_rate": 1.511945366551907e-06, "loss": 0.48425859212875366, "step": 3146 }, { "epoch": 0.7255331412103746, "grad_norm": 1.4240605097464292, "learning_rate": 1.5116178985677005e-06, "loss": 0.3889104127883911, "step": 3147 }, { "epoch": 0.7257636887608069, "grad_norm": 1.2084808875919353, "learning_rate": 1.5112903562507357e-06, "loss": 0.4651326537132263, "step": 3148 }, { "epoch": 0.7259942363112392, "grad_norm": 1.3875484913968303, "learning_rate": 1.5109627396486011e-06, "loss": 0.5221618413925171, "step": 3149 }, { "epoch": 0.7262247838616714, "grad_norm": 1.2662900393043348, "learning_rate": 1.5106350488088958e-06, "loss": 0.5054634213447571, "step": 3150 }, { "epoch": 0.7264553314121037, "grad_norm": 1.2101168031562617, "learning_rate": 1.5103072837792295e-06, "loss": 0.5082115530967712, "step": 3151 }, { "epoch": 0.726685878962536, "grad_norm": 1.2590734826919803, "learning_rate": 1.5099794446072239e-06, "loss": 0.45884132385253906, "step": 3152 }, { "epoch": 0.7269164265129683, "grad_norm": 1.2411545140923466, "learning_rate": 1.50965153134051e-06, "loss": 0.4660666882991791, "step": 3153 }, { "epoch": 0.7271469740634006, "grad_norm": 1.3868010494860628, "learning_rate": 1.5093235440267308e-06, "loss": 0.5483843684196472, "step": 3154 }, { "epoch": 0.7273775216138328, "grad_norm": 1.3466110391906256, "learning_rate": 1.5089954827135384e-06, "loss": 0.438312828540802, "step": 3155 }, { "epoch": 0.7276080691642651, "grad_norm": 1.3819060249646318, "learning_rate": 1.5086673474485978e-06, "loss": 0.4444723129272461, "step": 3156 }, { "epoch": 0.7278386167146974, "grad_norm": 1.5587131329445694, "learning_rate": 1.5083391382795835e-06, "loss": 0.4306541681289673, "step": 3157 }, { "epoch": 0.7280691642651297, "grad_norm": 1.4276962124392467, "learning_rate": 1.5080108552541799e-06, "loss": 0.5440046787261963, "step": 3158 }, { "epoch": 0.728299711815562, "grad_norm": 1.0995595949722106, "learning_rate": 1.5076824984200837e-06, "loss": 0.46402427554130554, "step": 3159 }, { "epoch": 0.7285302593659942, "grad_norm": 1.2297644719249652, "learning_rate": 1.5073540678250024e-06, "loss": 0.5025292634963989, "step": 3160 }, { "epoch": 0.7287608069164265, "grad_norm": 1.610056083044619, "learning_rate": 1.5070255635166526e-06, "loss": 0.5699792504310608, "step": 3161 }, { "epoch": 0.7289913544668588, "grad_norm": 1.1607368738487698, "learning_rate": 1.506696985542763e-06, "loss": 0.5201475024223328, "step": 3162 }, { "epoch": 0.7292219020172911, "grad_norm": 2.0192527279994508, "learning_rate": 1.5063683339510722e-06, "loss": 0.47518134117126465, "step": 3163 }, { "epoch": 0.7294524495677234, "grad_norm": 1.4627008855208656, "learning_rate": 1.5060396087893303e-06, "loss": 0.45600777864456177, "step": 3164 }, { "epoch": 0.7296829971181557, "grad_norm": 1.3350658804578093, "learning_rate": 1.5057108101052975e-06, "loss": 0.5589861869812012, "step": 3165 }, { "epoch": 0.7299135446685879, "grad_norm": 1.5326020129462947, "learning_rate": 1.5053819379467449e-06, "loss": 0.5672855377197266, "step": 3166 }, { "epoch": 0.7301440922190202, "grad_norm": 1.358392085768019, "learning_rate": 1.5050529923614539e-06, "loss": 0.4816353917121887, "step": 3167 }, { "epoch": 0.7303746397694525, "grad_norm": 1.5426455545970876, "learning_rate": 1.5047239733972172e-06, "loss": 0.5251726508140564, "step": 3168 }, { "epoch": 0.7306051873198848, "grad_norm": 1.3452570725680248, "learning_rate": 1.5043948811018376e-06, "loss": 0.5041342973709106, "step": 3169 }, { "epoch": 0.730835734870317, "grad_norm": 1.27265857523486, "learning_rate": 1.5040657155231294e-06, "loss": 0.47151800990104675, "step": 3170 }, { "epoch": 0.7310662824207492, "grad_norm": 1.3177939212987457, "learning_rate": 1.5037364767089158e-06, "loss": 0.4337390661239624, "step": 3171 }, { "epoch": 0.7312968299711815, "grad_norm": 1.2975145506792556, "learning_rate": 1.5034071647070326e-06, "loss": 0.5004295706748962, "step": 3172 }, { "epoch": 0.7315273775216138, "grad_norm": 1.3770112611667393, "learning_rate": 1.5030777795653253e-06, "loss": 0.5853343605995178, "step": 3173 }, { "epoch": 0.7317579250720461, "grad_norm": 1.4038198026175097, "learning_rate": 1.50274832133165e-06, "loss": 0.5205816626548767, "step": 3174 }, { "epoch": 0.7319884726224783, "grad_norm": 1.4163405803887914, "learning_rate": 1.5024187900538732e-06, "loss": 0.6013909578323364, "step": 3175 }, { "epoch": 0.7322190201729106, "grad_norm": 1.6139845231432477, "learning_rate": 1.5020891857798734e-06, "loss": 0.6791777014732361, "step": 3176 }, { "epoch": 0.7324495677233429, "grad_norm": 1.3435286466605478, "learning_rate": 1.5017595085575377e-06, "loss": 0.5194481611251831, "step": 3177 }, { "epoch": 0.7326801152737752, "grad_norm": 1.485630449161062, "learning_rate": 1.501429758434765e-06, "loss": 0.442119836807251, "step": 3178 }, { "epoch": 0.7329106628242075, "grad_norm": 1.4386371107965295, "learning_rate": 1.501099935459465e-06, "loss": 0.4681103229522705, "step": 3179 }, { "epoch": 0.7331412103746398, "grad_norm": 1.4831338394596723, "learning_rate": 1.500770039679557e-06, "loss": 0.47549009323120117, "step": 3180 }, { "epoch": 0.733371757925072, "grad_norm": 1.210045907042475, "learning_rate": 1.5004400711429716e-06, "loss": 0.5011273622512817, "step": 3181 }, { "epoch": 0.7336023054755043, "grad_norm": 1.3507700304649388, "learning_rate": 1.5001100298976498e-06, "loss": 0.4527067244052887, "step": 3182 }, { "epoch": 0.7338328530259366, "grad_norm": 1.4783974186980648, "learning_rate": 1.4997799159915437e-06, "loss": 0.4995662271976471, "step": 3183 }, { "epoch": 0.7340634005763689, "grad_norm": 1.289236007560327, "learning_rate": 1.499449729472614e-06, "loss": 0.4354374408721924, "step": 3184 }, { "epoch": 0.7342939481268012, "grad_norm": 1.1085815488677964, "learning_rate": 1.4991194703888352e-06, "loss": 0.4196980893611908, "step": 3185 }, { "epoch": 0.7345244956772334, "grad_norm": 1.2454446345841115, "learning_rate": 1.498789138788189e-06, "loss": 0.41767656803131104, "step": 3186 }, { "epoch": 0.7347550432276657, "grad_norm": 1.3175981275733846, "learning_rate": 1.4984587347186701e-06, "loss": 0.5601884722709656, "step": 3187 }, { "epoch": 0.734985590778098, "grad_norm": 1.3717772217606947, "learning_rate": 1.498128258228282e-06, "loss": 0.5722160935401917, "step": 3188 }, { "epoch": 0.7352161383285303, "grad_norm": 1.3550924414269825, "learning_rate": 1.4977977093650402e-06, "loss": 0.49754706025123596, "step": 3189 }, { "epoch": 0.7354466858789626, "grad_norm": 1.2671729909166771, "learning_rate": 1.4974670881769699e-06, "loss": 0.3791263997554779, "step": 3190 }, { "epoch": 0.7356772334293948, "grad_norm": 1.5238810977019115, "learning_rate": 1.4971363947121062e-06, "loss": 0.5790057182312012, "step": 3191 }, { "epoch": 0.7359077809798271, "grad_norm": 1.2068480937901207, "learning_rate": 1.4968056290184962e-06, "loss": 0.4697153568267822, "step": 3192 }, { "epoch": 0.7361383285302594, "grad_norm": 1.3533406878320302, "learning_rate": 1.4964747911441967e-06, "loss": 0.5018360614776611, "step": 3193 }, { "epoch": 0.7363688760806917, "grad_norm": 1.177404560227721, "learning_rate": 1.4961438811372744e-06, "loss": 0.4887595772743225, "step": 3194 }, { "epoch": 0.736599423631124, "grad_norm": 1.3181068958883442, "learning_rate": 1.4958128990458078e-06, "loss": 0.4804183840751648, "step": 3195 }, { "epoch": 0.7368299711815562, "grad_norm": 1.8934118111380485, "learning_rate": 1.4954818449178846e-06, "loss": 0.4760059118270874, "step": 3196 }, { "epoch": 0.7370605187319885, "grad_norm": 1.262489651614568, "learning_rate": 1.4951507188016037e-06, "loss": 0.5415846109390259, "step": 3197 }, { "epoch": 0.7372910662824208, "grad_norm": 1.548909219963287, "learning_rate": 1.4948195207450747e-06, "loss": 0.4729132652282715, "step": 3198 }, { "epoch": 0.737521613832853, "grad_norm": 1.1399718829955798, "learning_rate": 1.4944882507964166e-06, "loss": 0.45299506187438965, "step": 3199 }, { "epoch": 0.7377521613832853, "grad_norm": 1.2986205608956582, "learning_rate": 1.4941569090037592e-06, "loss": 0.4444664716720581, "step": 3200 }, { "epoch": 0.7379827089337175, "grad_norm": 1.2888444051901529, "learning_rate": 1.493825495415244e-06, "loss": 0.5595009922981262, "step": 3201 }, { "epoch": 0.7382132564841498, "grad_norm": 1.2769760823016159, "learning_rate": 1.4934940100790217e-06, "loss": 0.5800896286964417, "step": 3202 }, { "epoch": 0.7384438040345821, "grad_norm": 1.4684684564858026, "learning_rate": 1.4931624530432531e-06, "loss": 0.5237326622009277, "step": 3203 }, { "epoch": 0.7386743515850144, "grad_norm": 1.5052585559748772, "learning_rate": 1.4928308243561106e-06, "loss": 0.49139225482940674, "step": 3204 }, { "epoch": 0.7389048991354467, "grad_norm": 1.1853994118787092, "learning_rate": 1.4924991240657758e-06, "loss": 0.4052927494049072, "step": 3205 }, { "epoch": 0.7391354466858789, "grad_norm": 1.4576894877729643, "learning_rate": 1.4921673522204418e-06, "loss": 0.4857429265975952, "step": 3206 }, { "epoch": 0.7393659942363112, "grad_norm": 1.1777599862408406, "learning_rate": 1.4918355088683114e-06, "loss": 0.464494526386261, "step": 3207 }, { "epoch": 0.7395965417867435, "grad_norm": 1.539493657840995, "learning_rate": 1.4915035940575982e-06, "loss": 0.4662816822528839, "step": 3208 }, { "epoch": 0.7398270893371758, "grad_norm": 1.6279996883671786, "learning_rate": 1.4911716078365255e-06, "loss": 0.6633642911911011, "step": 3209 }, { "epoch": 0.7400576368876081, "grad_norm": 1.2374546972110039, "learning_rate": 1.490839550253328e-06, "loss": 0.47886157035827637, "step": 3210 }, { "epoch": 0.7402881844380403, "grad_norm": 1.6545796769921648, "learning_rate": 1.49050742135625e-06, "loss": 0.568007230758667, "step": 3211 }, { "epoch": 0.7405187319884726, "grad_norm": 1.3532552620344924, "learning_rate": 1.4901752211935461e-06, "loss": 0.5382261872291565, "step": 3212 }, { "epoch": 0.7407492795389049, "grad_norm": 1.2960864654756472, "learning_rate": 1.489842949813482e-06, "loss": 0.42607924342155457, "step": 3213 }, { "epoch": 0.7409798270893372, "grad_norm": 1.1718315561608335, "learning_rate": 1.4895106072643329e-06, "loss": 0.4686487317085266, "step": 3214 }, { "epoch": 0.7412103746397695, "grad_norm": 1.250504933101052, "learning_rate": 1.4891781935943849e-06, "loss": 0.4415118098258972, "step": 3215 }, { "epoch": 0.7414409221902017, "grad_norm": 1.4069825438254808, "learning_rate": 1.4888457088519337e-06, "loss": 0.5895603895187378, "step": 3216 }, { "epoch": 0.741671469740634, "grad_norm": 1.2536937286907117, "learning_rate": 1.488513153085287e-06, "loss": 0.49977678060531616, "step": 3217 }, { "epoch": 0.7419020172910663, "grad_norm": 1.617869919003562, "learning_rate": 1.4881805263427606e-06, "loss": 0.5159600377082825, "step": 3218 }, { "epoch": 0.7421325648414986, "grad_norm": 1.3312699005053992, "learning_rate": 1.4878478286726825e-06, "loss": 0.486503928899765, "step": 3219 }, { "epoch": 0.7423631123919309, "grad_norm": 1.3493254176291696, "learning_rate": 1.48751506012339e-06, "loss": 0.5594410300254822, "step": 3220 }, { "epoch": 0.7425936599423631, "grad_norm": 1.282002454566378, "learning_rate": 1.4871822207432306e-06, "loss": 0.58189857006073, "step": 3221 }, { "epoch": 0.7428242074927954, "grad_norm": 1.4722677481345254, "learning_rate": 1.4868493105805624e-06, "loss": 0.4876249432563782, "step": 3222 }, { "epoch": 0.7430547550432277, "grad_norm": 1.317973913245676, "learning_rate": 1.486516329683754e-06, "loss": 0.4791470170021057, "step": 3223 }, { "epoch": 0.74328530259366, "grad_norm": 1.4498879865814052, "learning_rate": 1.486183278101184e-06, "loss": 0.5361772179603577, "step": 3224 }, { "epoch": 0.7435158501440923, "grad_norm": 1.1522319567135113, "learning_rate": 1.4858501558812416e-06, "loss": 0.43522512912750244, "step": 3225 }, { "epoch": 0.7437463976945244, "grad_norm": 1.340874944457649, "learning_rate": 1.4855169630723256e-06, "loss": 0.480033814907074, "step": 3226 }, { "epoch": 0.7439769452449567, "grad_norm": 1.4405213728495958, "learning_rate": 1.4851836997228457e-06, "loss": 0.49638503789901733, "step": 3227 }, { "epoch": 0.744207492795389, "grad_norm": 1.1863542915109169, "learning_rate": 1.4848503658812215e-06, "loss": 0.46532535552978516, "step": 3228 }, { "epoch": 0.7444380403458213, "grad_norm": 1.4964825329069535, "learning_rate": 1.4845169615958825e-06, "loss": 0.57126384973526, "step": 3229 }, { "epoch": 0.7446685878962536, "grad_norm": 1.3843465256914986, "learning_rate": 1.48418348691527e-06, "loss": 0.44086208939552307, "step": 3230 }, { "epoch": 0.7448991354466858, "grad_norm": 1.3494587128334137, "learning_rate": 1.4838499418878333e-06, "loss": 0.527345597743988, "step": 3231 }, { "epoch": 0.7451296829971181, "grad_norm": 1.1821347534533735, "learning_rate": 1.4835163265620337e-06, "loss": 0.5213931798934937, "step": 3232 }, { "epoch": 0.7453602305475504, "grad_norm": 1.4266449251298718, "learning_rate": 1.4831826409863415e-06, "loss": 0.4754828214645386, "step": 3233 }, { "epoch": 0.7455907780979827, "grad_norm": 1.2356724187797137, "learning_rate": 1.482848885209238e-06, "loss": 0.5262855291366577, "step": 3234 }, { "epoch": 0.745821325648415, "grad_norm": 1.3091823753963572, "learning_rate": 1.4825150592792146e-06, "loss": 0.6030062437057495, "step": 3235 }, { "epoch": 0.7460518731988472, "grad_norm": 1.537234499936022, "learning_rate": 1.4821811632447728e-06, "loss": 0.5706098079681396, "step": 3236 }, { "epoch": 0.7462824207492795, "grad_norm": 1.415148150243323, "learning_rate": 1.4818471971544236e-06, "loss": 0.5095677971839905, "step": 3237 }, { "epoch": 0.7465129682997118, "grad_norm": 1.2198058723620682, "learning_rate": 1.481513161056689e-06, "loss": 0.40414959192276, "step": 3238 }, { "epoch": 0.7467435158501441, "grad_norm": 1.6486746642937935, "learning_rate": 1.481179055000102e-06, "loss": 0.5147525668144226, "step": 3239 }, { "epoch": 0.7469740634005764, "grad_norm": 1.2602084168070753, "learning_rate": 1.4808448790332034e-06, "loss": 0.5350685119628906, "step": 3240 }, { "epoch": 0.7472046109510087, "grad_norm": 1.3799401187569467, "learning_rate": 1.4805106332045457e-06, "loss": 0.5179615020751953, "step": 3241 }, { "epoch": 0.7474351585014409, "grad_norm": 1.3099210508258956, "learning_rate": 1.4801763175626922e-06, "loss": 0.46143245697021484, "step": 3242 }, { "epoch": 0.7476657060518732, "grad_norm": 1.2390835702681366, "learning_rate": 1.4798419321562148e-06, "loss": 0.45439475774765015, "step": 3243 }, { "epoch": 0.7478962536023055, "grad_norm": 1.584653266512537, "learning_rate": 1.4795074770336963e-06, "loss": 0.47151651978492737, "step": 3244 }, { "epoch": 0.7481268011527378, "grad_norm": 1.3115239213335883, "learning_rate": 1.4791729522437298e-06, "loss": 0.5368925333023071, "step": 3245 }, { "epoch": 0.7483573487031701, "grad_norm": 1.3145011041134524, "learning_rate": 1.4788383578349177e-06, "loss": 0.4919322431087494, "step": 3246 }, { "epoch": 0.7485878962536023, "grad_norm": 1.266771108560509, "learning_rate": 1.4785036938558737e-06, "loss": 0.449468731880188, "step": 3247 }, { "epoch": 0.7488184438040346, "grad_norm": 1.3549371512329456, "learning_rate": 1.4781689603552208e-06, "loss": 0.463690847158432, "step": 3248 }, { "epoch": 0.7490489913544669, "grad_norm": 1.135747734564589, "learning_rate": 1.477834157381592e-06, "loss": 0.47871702909469604, "step": 3249 }, { "epoch": 0.7492795389048992, "grad_norm": 1.5722908594204268, "learning_rate": 1.4774992849836312e-06, "loss": 0.4971943497657776, "step": 3250 }, { "epoch": 0.7495100864553315, "grad_norm": 1.2012715930460522, "learning_rate": 1.4771643432099915e-06, "loss": 0.42786940932273865, "step": 3251 }, { "epoch": 0.7497406340057637, "grad_norm": 1.458596022540129, "learning_rate": 1.4768293321093366e-06, "loss": 0.5166041851043701, "step": 3252 }, { "epoch": 0.749971181556196, "grad_norm": 1.4316055802929983, "learning_rate": 1.4764942517303397e-06, "loss": 0.5358223915100098, "step": 3253 }, { "epoch": 0.7502017291066282, "grad_norm": 1.2502899398623475, "learning_rate": 1.4761591021216854e-06, "loss": 0.4700164198875427, "step": 3254 }, { "epoch": 0.7504322766570605, "grad_norm": 1.5378679862419669, "learning_rate": 1.475823883332067e-06, "loss": 0.4827917516231537, "step": 3255 }, { "epoch": 0.7506628242074928, "grad_norm": 1.4586380104389158, "learning_rate": 1.4754885954101881e-06, "loss": 0.48745471239089966, "step": 3256 }, { "epoch": 0.750893371757925, "grad_norm": 1.7262371957910712, "learning_rate": 1.4751532384047626e-06, "loss": 0.4328693747520447, "step": 3257 }, { "epoch": 0.7511239193083573, "grad_norm": 1.4947067603037787, "learning_rate": 1.4748178123645148e-06, "loss": 0.5124896764755249, "step": 3258 }, { "epoch": 0.7513544668587896, "grad_norm": 1.4778565285714071, "learning_rate": 1.474482317338178e-06, "loss": 0.46671369671821594, "step": 3259 }, { "epoch": 0.7515850144092219, "grad_norm": 1.4926182145171014, "learning_rate": 1.4741467533744964e-06, "loss": 0.47405868768692017, "step": 3260 }, { "epoch": 0.7518155619596542, "grad_norm": 1.5314925124371308, "learning_rate": 1.473811120522224e-06, "loss": 0.6070246696472168, "step": 3261 }, { "epoch": 0.7520461095100864, "grad_norm": 1.2253029967074474, "learning_rate": 1.4734754188301248e-06, "loss": 0.4673004150390625, "step": 3262 }, { "epoch": 0.7522766570605187, "grad_norm": 1.2921346331685029, "learning_rate": 1.4731396483469726e-06, "loss": 0.4255906939506531, "step": 3263 }, { "epoch": 0.752507204610951, "grad_norm": 1.789844998921356, "learning_rate": 1.4728038091215518e-06, "loss": 0.6287655830383301, "step": 3264 }, { "epoch": 0.7527377521613833, "grad_norm": 1.5134361950809017, "learning_rate": 1.4724679012026558e-06, "loss": 0.5185075998306274, "step": 3265 }, { "epoch": 0.7529682997118156, "grad_norm": 1.2691589676603439, "learning_rate": 1.472131924639089e-06, "loss": 0.5780787467956543, "step": 3266 }, { "epoch": 0.7531988472622478, "grad_norm": 1.4836762250468871, "learning_rate": 1.4717958794796646e-06, "loss": 0.5578355193138123, "step": 3267 }, { "epoch": 0.7534293948126801, "grad_norm": 1.2160311025308315, "learning_rate": 1.4714597657732073e-06, "loss": 0.46400272846221924, "step": 3268 }, { "epoch": 0.7536599423631124, "grad_norm": 1.5435283070764019, "learning_rate": 1.47112358356855e-06, "loss": 0.4976726770401001, "step": 3269 }, { "epoch": 0.7538904899135447, "grad_norm": 2.007123996849609, "learning_rate": 1.4707873329145372e-06, "loss": 0.5713675022125244, "step": 3270 }, { "epoch": 0.754121037463977, "grad_norm": 1.2543678638742324, "learning_rate": 1.470451013860023e-06, "loss": 0.5416299700737, "step": 3271 }, { "epoch": 0.7543515850144092, "grad_norm": 1.2483433805975221, "learning_rate": 1.47011462645387e-06, "loss": 0.44016146659851074, "step": 3272 }, { "epoch": 0.7545821325648415, "grad_norm": 0.9999842753367084, "learning_rate": 1.4697781707449525e-06, "loss": 0.4472951292991638, "step": 3273 }, { "epoch": 0.7548126801152738, "grad_norm": 1.2756009717160484, "learning_rate": 1.4694416467821533e-06, "loss": 0.42740410566329956, "step": 3274 }, { "epoch": 0.7550432276657061, "grad_norm": 1.5386149050309754, "learning_rate": 1.469105054614367e-06, "loss": 0.5016801953315735, "step": 3275 }, { "epoch": 0.7552737752161384, "grad_norm": 1.1801187234317005, "learning_rate": 1.4687683942904956e-06, "loss": 0.45598453283309937, "step": 3276 }, { "epoch": 0.7555043227665706, "grad_norm": 1.5133516505478535, "learning_rate": 1.4684316658594536e-06, "loss": 0.5792516469955444, "step": 3277 }, { "epoch": 0.7557348703170029, "grad_norm": 1.1812462798913754, "learning_rate": 1.4680948693701632e-06, "loss": 0.4660930633544922, "step": 3278 }, { "epoch": 0.7559654178674352, "grad_norm": 1.2146112468729, "learning_rate": 1.4677580048715576e-06, "loss": 0.47444018721580505, "step": 3279 }, { "epoch": 0.7561959654178675, "grad_norm": 1.5995735050771667, "learning_rate": 1.4674210724125801e-06, "loss": 0.5156435370445251, "step": 3280 }, { "epoch": 0.7564265129682997, "grad_norm": 1.3317370759839628, "learning_rate": 1.4670840720421839e-06, "loss": 0.5381336212158203, "step": 3281 }, { "epoch": 0.7566570605187319, "grad_norm": 1.3039255390329512, "learning_rate": 1.4667470038093298e-06, "loss": 0.4314936399459839, "step": 3282 }, { "epoch": 0.7568876080691642, "grad_norm": 1.3315809296965806, "learning_rate": 1.4664098677629924e-06, "loss": 0.517806887626648, "step": 3283 }, { "epoch": 0.7571181556195965, "grad_norm": 1.17402962693007, "learning_rate": 1.466072663952153e-06, "loss": 0.5067352056503296, "step": 3284 }, { "epoch": 0.7573487031700288, "grad_norm": 1.3482800959852768, "learning_rate": 1.465735392425804e-06, "loss": 0.49119824171066284, "step": 3285 }, { "epoch": 0.7575792507204611, "grad_norm": 1.5805195342469058, "learning_rate": 1.465398053232947e-06, "loss": 0.5447220802307129, "step": 3286 }, { "epoch": 0.7578097982708933, "grad_norm": 1.5136634313996882, "learning_rate": 1.465060646422595e-06, "loss": 0.45386195182800293, "step": 3287 }, { "epoch": 0.7580403458213256, "grad_norm": 1.4455737325065987, "learning_rate": 1.4647231720437684e-06, "loss": 0.5146728754043579, "step": 3288 }, { "epoch": 0.7582708933717579, "grad_norm": 1.208398611343664, "learning_rate": 1.4643856301454998e-06, "loss": 0.48067593574523926, "step": 3289 }, { "epoch": 0.7585014409221902, "grad_norm": 1.3311997339356765, "learning_rate": 1.4640480207768298e-06, "loss": 0.5257793664932251, "step": 3290 }, { "epoch": 0.7587319884726225, "grad_norm": 1.3400035497885063, "learning_rate": 1.4637103439868097e-06, "loss": 0.47576746344566345, "step": 3291 }, { "epoch": 0.7589625360230547, "grad_norm": 1.3460861407557851, "learning_rate": 1.4633725998245007e-06, "loss": 0.5666048526763916, "step": 3292 }, { "epoch": 0.759193083573487, "grad_norm": 1.1564836610678924, "learning_rate": 1.463034788338973e-06, "loss": 0.44034671783447266, "step": 3293 }, { "epoch": 0.7594236311239193, "grad_norm": 1.4202662515562567, "learning_rate": 1.4626969095793075e-06, "loss": 0.4745502471923828, "step": 3294 }, { "epoch": 0.7596541786743516, "grad_norm": 1.4448760394719364, "learning_rate": 1.4623589635945948e-06, "loss": 0.5635093450546265, "step": 3295 }, { "epoch": 0.7598847262247839, "grad_norm": 1.4934304968991277, "learning_rate": 1.4620209504339343e-06, "loss": 0.6082891225814819, "step": 3296 }, { "epoch": 0.7601152737752161, "grad_norm": 1.3232103372571715, "learning_rate": 1.461682870146436e-06, "loss": 0.4839524030685425, "step": 3297 }, { "epoch": 0.7603458213256484, "grad_norm": 1.358400964322484, "learning_rate": 1.461344722781219e-06, "loss": 0.526859700679779, "step": 3298 }, { "epoch": 0.7605763688760807, "grad_norm": 1.2678987797548023, "learning_rate": 1.4610065083874138e-06, "loss": 0.5154972672462463, "step": 3299 }, { "epoch": 0.760806916426513, "grad_norm": 1.3734418069144714, "learning_rate": 1.4606682270141583e-06, "loss": 0.4647676944732666, "step": 3300 }, { "epoch": 0.7610374639769453, "grad_norm": 1.366663754645629, "learning_rate": 1.4603298787106015e-06, "loss": 0.4470120966434479, "step": 3301 }, { "epoch": 0.7612680115273776, "grad_norm": 1.3628610757351864, "learning_rate": 1.4599914635259023e-06, "loss": 0.5059522986412048, "step": 3302 }, { "epoch": 0.7614985590778098, "grad_norm": 1.6696301085697236, "learning_rate": 1.4596529815092284e-06, "loss": 0.6402307748794556, "step": 3303 }, { "epoch": 0.7617291066282421, "grad_norm": 1.1781562854865368, "learning_rate": 1.4593144327097581e-06, "loss": 0.4766733646392822, "step": 3304 }, { "epoch": 0.7619596541786744, "grad_norm": 1.2614674941288848, "learning_rate": 1.4589758171766788e-06, "loss": 0.4931349754333496, "step": 3305 }, { "epoch": 0.7621902017291067, "grad_norm": 1.2984040294603267, "learning_rate": 1.458637134959188e-06, "loss": 0.48644161224365234, "step": 3306 }, { "epoch": 0.762420749279539, "grad_norm": 1.3725039156042402, "learning_rate": 1.4582983861064925e-06, "loss": 0.6103028059005737, "step": 3307 }, { "epoch": 0.7626512968299712, "grad_norm": 1.284768652256614, "learning_rate": 1.4579595706678094e-06, "loss": 0.4911022484302521, "step": 3308 }, { "epoch": 0.7628818443804034, "grad_norm": 1.3556353217542514, "learning_rate": 1.4576206886923648e-06, "loss": 0.49871009588241577, "step": 3309 }, { "epoch": 0.7631123919308357, "grad_norm": 1.3055666196848867, "learning_rate": 1.457281740229394e-06, "loss": 0.48048093914985657, "step": 3310 }, { "epoch": 0.763342939481268, "grad_norm": 1.351104735921863, "learning_rate": 1.4569427253281438e-06, "loss": 0.39041709899902344, "step": 3311 }, { "epoch": 0.7635734870317002, "grad_norm": 1.211363331490999, "learning_rate": 1.456603644037869e-06, "loss": 0.5072616338729858, "step": 3312 }, { "epoch": 0.7638040345821325, "grad_norm": 1.3516359175587098, "learning_rate": 1.4562644964078348e-06, "loss": 0.4459783732891083, "step": 3313 }, { "epoch": 0.7640345821325648, "grad_norm": 1.3522114158420477, "learning_rate": 1.4559252824873153e-06, "loss": 0.46400186419487, "step": 3314 }, { "epoch": 0.7642651296829971, "grad_norm": 1.3451209364608996, "learning_rate": 1.455586002325596e-06, "loss": 0.4863799810409546, "step": 3315 }, { "epoch": 0.7644956772334294, "grad_norm": 1.5370462205813438, "learning_rate": 1.4552466559719692e-06, "loss": 0.4174691438674927, "step": 3316 }, { "epoch": 0.7647262247838617, "grad_norm": 1.2694266587224816, "learning_rate": 1.4549072434757392e-06, "loss": 0.5211185812950134, "step": 3317 }, { "epoch": 0.7649567723342939, "grad_norm": 1.3359981437064172, "learning_rate": 1.4545677648862192e-06, "loss": 0.47883665561676025, "step": 3318 }, { "epoch": 0.7651873198847262, "grad_norm": 1.403465964148792, "learning_rate": 1.4542282202527316e-06, "loss": 0.4836026430130005, "step": 3319 }, { "epoch": 0.7654178674351585, "grad_norm": 1.1848104317381678, "learning_rate": 1.4538886096246091e-06, "loss": 0.4277452528476715, "step": 3320 }, { "epoch": 0.7656484149855908, "grad_norm": 1.6322770231147972, "learning_rate": 1.453548933051193e-06, "loss": 0.4482790231704712, "step": 3321 }, { "epoch": 0.7658789625360231, "grad_norm": 1.4474066354965356, "learning_rate": 1.4532091905818354e-06, "loss": 0.4530835747718811, "step": 3322 }, { "epoch": 0.7661095100864553, "grad_norm": 1.1318959264422865, "learning_rate": 1.4528693822658966e-06, "loss": 0.43995189666748047, "step": 3323 }, { "epoch": 0.7663400576368876, "grad_norm": 1.2709137573762306, "learning_rate": 1.452529508152748e-06, "loss": 0.5199185609817505, "step": 3324 }, { "epoch": 0.7665706051873199, "grad_norm": 1.5146053118213605, "learning_rate": 1.4521895682917695e-06, "loss": 0.46382370591163635, "step": 3325 }, { "epoch": 0.7668011527377522, "grad_norm": 1.3262747578317158, "learning_rate": 1.4518495627323506e-06, "loss": 0.44753193855285645, "step": 3326 }, { "epoch": 0.7670317002881845, "grad_norm": 1.3868444920521885, "learning_rate": 1.4515094915238905e-06, "loss": 0.560943067073822, "step": 3327 }, { "epoch": 0.7672622478386167, "grad_norm": 1.3504010302196803, "learning_rate": 1.4511693547157987e-06, "loss": 0.5463162660598755, "step": 3328 }, { "epoch": 0.767492795389049, "grad_norm": 1.7895593803441687, "learning_rate": 1.4508291523574928e-06, "loss": 0.44977307319641113, "step": 3329 }, { "epoch": 0.7677233429394813, "grad_norm": 1.3352084625834344, "learning_rate": 1.4504888844984014e-06, "loss": 0.4786399304866791, "step": 3330 }, { "epoch": 0.7679538904899136, "grad_norm": 1.2243491120544803, "learning_rate": 1.450148551187961e-06, "loss": 0.5283411145210266, "step": 3331 }, { "epoch": 0.7681844380403459, "grad_norm": 1.4307683990100575, "learning_rate": 1.449808152475619e-06, "loss": 0.5005373954772949, "step": 3332 }, { "epoch": 0.7684149855907781, "grad_norm": 1.4475030416705006, "learning_rate": 1.4494676884108317e-06, "loss": 0.48068857192993164, "step": 3333 }, { "epoch": 0.7686455331412104, "grad_norm": 1.156674368619787, "learning_rate": 1.449127159043065e-06, "loss": 0.4934327006340027, "step": 3334 }, { "epoch": 0.7688760806916427, "grad_norm": 1.28650908922562, "learning_rate": 1.4487865644217942e-06, "loss": 0.49922657012939453, "step": 3335 }, { "epoch": 0.7691066282420749, "grad_norm": 1.3756554611981087, "learning_rate": 1.4484459045965044e-06, "loss": 0.5592349767684937, "step": 3336 }, { "epoch": 0.7693371757925072, "grad_norm": 1.2847582796112, "learning_rate": 1.4481051796166895e-06, "loss": 0.4248694181442261, "step": 3337 }, { "epoch": 0.7695677233429394, "grad_norm": 1.472604097829593, "learning_rate": 1.4477643895318537e-06, "loss": 0.580863356590271, "step": 3338 }, { "epoch": 0.7697982708933717, "grad_norm": 1.3223200851539239, "learning_rate": 1.44742353439151e-06, "loss": 0.48872265219688416, "step": 3339 }, { "epoch": 0.770028818443804, "grad_norm": 1.1094983126631952, "learning_rate": 1.4470826142451812e-06, "loss": 0.4195341467857361, "step": 3340 }, { "epoch": 0.7702593659942363, "grad_norm": 1.475980825700943, "learning_rate": 1.4467416291423998e-06, "loss": 0.5242099761962891, "step": 3341 }, { "epoch": 0.7704899135446686, "grad_norm": 1.3418034436842976, "learning_rate": 1.4464005791327067e-06, "loss": 0.4401935935020447, "step": 3342 }, { "epoch": 0.7707204610951008, "grad_norm": 1.4397973067757597, "learning_rate": 1.4460594642656532e-06, "loss": 0.47286516427993774, "step": 3343 }, { "epoch": 0.7709510086455331, "grad_norm": 1.2137594722189566, "learning_rate": 1.4457182845908e-06, "loss": 0.4484996497631073, "step": 3344 }, { "epoch": 0.7711815561959654, "grad_norm": 1.2540008258880415, "learning_rate": 1.4453770401577168e-06, "loss": 0.45632821321487427, "step": 3345 }, { "epoch": 0.7714121037463977, "grad_norm": 1.1241907177042443, "learning_rate": 1.445035731015983e-06, "loss": 0.4622989892959595, "step": 3346 }, { "epoch": 0.77164265129683, "grad_norm": 1.5455270730171995, "learning_rate": 1.4446943572151867e-06, "loss": 0.48796528577804565, "step": 3347 }, { "epoch": 0.7718731988472622, "grad_norm": 1.3039989310725912, "learning_rate": 1.4443529188049263e-06, "loss": 0.46072205901145935, "step": 3348 }, { "epoch": 0.7721037463976945, "grad_norm": 1.340539994752118, "learning_rate": 1.4440114158348097e-06, "loss": 0.5429719686508179, "step": 3349 }, { "epoch": 0.7723342939481268, "grad_norm": 1.4033984421504853, "learning_rate": 1.4436698483544532e-06, "loss": 0.466327041387558, "step": 3350 }, { "epoch": 0.7725648414985591, "grad_norm": 1.4675566024133893, "learning_rate": 1.443328216413483e-06, "loss": 0.5271450877189636, "step": 3351 }, { "epoch": 0.7727953890489914, "grad_norm": 1.171281390175273, "learning_rate": 1.442986520061535e-06, "loss": 0.4191371500492096, "step": 3352 }, { "epoch": 0.7730259365994236, "grad_norm": 1.334196633801331, "learning_rate": 1.4426447593482538e-06, "loss": 0.47077637910842896, "step": 3353 }, { "epoch": 0.7732564841498559, "grad_norm": 1.3111319856519132, "learning_rate": 1.442302934323294e-06, "loss": 0.5060920715332031, "step": 3354 }, { "epoch": 0.7734870317002882, "grad_norm": 1.2904401653594715, "learning_rate": 1.4419610450363186e-06, "loss": 0.4885629713535309, "step": 3355 }, { "epoch": 0.7737175792507205, "grad_norm": 1.239927093293297, "learning_rate": 1.4416190915370015e-06, "loss": 0.5303410291671753, "step": 3356 }, { "epoch": 0.7739481268011528, "grad_norm": 1.409650109035996, "learning_rate": 1.4412770738750242e-06, "loss": 0.5328885912895203, "step": 3357 }, { "epoch": 0.774178674351585, "grad_norm": 1.2629126051729718, "learning_rate": 1.4409349921000786e-06, "loss": 0.44366949796676636, "step": 3358 }, { "epoch": 0.7744092219020173, "grad_norm": 1.4207531408710683, "learning_rate": 1.4405928462618656e-06, "loss": 0.5273492336273193, "step": 3359 }, { "epoch": 0.7746397694524496, "grad_norm": 1.4234295193689461, "learning_rate": 1.4402506364100955e-06, "loss": 0.5007044076919556, "step": 3360 }, { "epoch": 0.7748703170028819, "grad_norm": 1.4068515591968336, "learning_rate": 1.4399083625944878e-06, "loss": 0.46129006147384644, "step": 3361 }, { "epoch": 0.7751008645533142, "grad_norm": 1.131790729379322, "learning_rate": 1.4395660248647712e-06, "loss": 0.4864243268966675, "step": 3362 }, { "epoch": 0.7753314121037463, "grad_norm": 1.4823929826396494, "learning_rate": 1.439223623270684e-06, "loss": 0.5185636878013611, "step": 3363 }, { "epoch": 0.7755619596541786, "grad_norm": 1.5336447453024817, "learning_rate": 1.438881157861973e-06, "loss": 0.503462553024292, "step": 3364 }, { "epoch": 0.7757925072046109, "grad_norm": 1.1579630453204193, "learning_rate": 1.4385386286883958e-06, "loss": 0.421749472618103, "step": 3365 }, { "epoch": 0.7760230547550432, "grad_norm": 1.663964100537045, "learning_rate": 1.4381960357997183e-06, "loss": 0.5585425496101379, "step": 3366 }, { "epoch": 0.7762536023054755, "grad_norm": 1.4186057360195095, "learning_rate": 1.4378533792457145e-06, "loss": 0.5588064789772034, "step": 3367 }, { "epoch": 0.7764841498559077, "grad_norm": 1.2195290831267696, "learning_rate": 1.4375106590761699e-06, "loss": 0.39466559886932373, "step": 3368 }, { "epoch": 0.77671469740634, "grad_norm": 1.2902755420837042, "learning_rate": 1.437167875340878e-06, "loss": 0.483551949262619, "step": 3369 }, { "epoch": 0.7769452449567723, "grad_norm": 1.5188689936616353, "learning_rate": 1.4368250280896414e-06, "loss": 0.4568926990032196, "step": 3370 }, { "epoch": 0.7771757925072046, "grad_norm": 1.4484780605131717, "learning_rate": 1.4364821173722726e-06, "loss": 0.4849422574043274, "step": 3371 }, { "epoch": 0.7774063400576369, "grad_norm": 1.6018044814378665, "learning_rate": 1.4361391432385926e-06, "loss": 0.48174428939819336, "step": 3372 }, { "epoch": 0.7776368876080691, "grad_norm": 1.5647433832432838, "learning_rate": 1.435796105738432e-06, "loss": 0.5706121921539307, "step": 3373 }, { "epoch": 0.7778674351585014, "grad_norm": 1.326716608540626, "learning_rate": 1.435453004921631e-06, "loss": 0.5058635473251343, "step": 3374 }, { "epoch": 0.7780979827089337, "grad_norm": 1.2089771113874428, "learning_rate": 1.4351098408380382e-06, "loss": 0.4246353209018707, "step": 3375 }, { "epoch": 0.778328530259366, "grad_norm": 1.4154899807455992, "learning_rate": 1.4347666135375118e-06, "loss": 0.4570143520832062, "step": 3376 }, { "epoch": 0.7785590778097983, "grad_norm": 1.3663868339749332, "learning_rate": 1.4344233230699194e-06, "loss": 0.5174911022186279, "step": 3377 }, { "epoch": 0.7787896253602306, "grad_norm": 1.3658429357569173, "learning_rate": 1.4340799694851373e-06, "loss": 0.5119853019714355, "step": 3378 }, { "epoch": 0.7790201729106628, "grad_norm": 1.3179107043166238, "learning_rate": 1.4337365528330516e-06, "loss": 0.4212768077850342, "step": 3379 }, { "epoch": 0.7792507204610951, "grad_norm": 1.5039977694618414, "learning_rate": 1.4333930731635562e-06, "loss": 0.4929216206073761, "step": 3380 }, { "epoch": 0.7794812680115274, "grad_norm": 1.4056076676761982, "learning_rate": 1.433049530526556e-06, "loss": 0.5154866576194763, "step": 3381 }, { "epoch": 0.7797118155619597, "grad_norm": 1.1061064947121604, "learning_rate": 1.4327059249719643e-06, "loss": 0.537919819355011, "step": 3382 }, { "epoch": 0.779942363112392, "grad_norm": 1.3211021723465781, "learning_rate": 1.432362256549703e-06, "loss": 0.5221723318099976, "step": 3383 }, { "epoch": 0.7801729106628242, "grad_norm": 1.2423942665098675, "learning_rate": 1.4320185253097032e-06, "loss": 0.44169020652770996, "step": 3384 }, { "epoch": 0.7804034582132565, "grad_norm": 1.3032004555348564, "learning_rate": 1.4316747313019062e-06, "loss": 0.4524272084236145, "step": 3385 }, { "epoch": 0.7806340057636888, "grad_norm": 1.3773696861621003, "learning_rate": 1.4313308745762614e-06, "loss": 0.49299994111061096, "step": 3386 }, { "epoch": 0.7808645533141211, "grad_norm": 1.4415837849285507, "learning_rate": 1.4309869551827274e-06, "loss": 0.3977648615837097, "step": 3387 }, { "epoch": 0.7810951008645534, "grad_norm": 1.3750364479895638, "learning_rate": 1.4306429731712722e-06, "loss": 0.4714970588684082, "step": 3388 }, { "epoch": 0.7813256484149856, "grad_norm": 1.3484439441991842, "learning_rate": 1.430298928591873e-06, "loss": 0.4930286109447479, "step": 3389 }, { "epoch": 0.7815561959654179, "grad_norm": 1.5313273344124982, "learning_rate": 1.4299548214945158e-06, "loss": 0.47765177488327026, "step": 3390 }, { "epoch": 0.7817867435158501, "grad_norm": 1.530990230685995, "learning_rate": 1.4296106519291958e-06, "loss": 0.5838227272033691, "step": 3391 }, { "epoch": 0.7820172910662824, "grad_norm": 1.5544559796124315, "learning_rate": 1.429266419945917e-06, "loss": 0.5206441879272461, "step": 3392 }, { "epoch": 0.7822478386167147, "grad_norm": 1.4914577305568477, "learning_rate": 1.428922125594693e-06, "loss": 0.4364234209060669, "step": 3393 }, { "epoch": 0.7824783861671469, "grad_norm": 1.3845197160287306, "learning_rate": 1.4285777689255465e-06, "loss": 0.48500919342041016, "step": 3394 }, { "epoch": 0.7827089337175792, "grad_norm": 1.6273547936063282, "learning_rate": 1.4282333499885085e-06, "loss": 0.5136459469795227, "step": 3395 }, { "epoch": 0.7829394812680115, "grad_norm": 1.2794363096503683, "learning_rate": 1.427888868833619e-06, "loss": 0.5106863975524902, "step": 3396 }, { "epoch": 0.7831700288184438, "grad_norm": 1.3619144328823791, "learning_rate": 1.4275443255109284e-06, "loss": 0.44078588485717773, "step": 3397 }, { "epoch": 0.7834005763688761, "grad_norm": 1.2498324876454319, "learning_rate": 1.4271997200704955e-06, "loss": 0.4955654442310333, "step": 3398 }, { "epoch": 0.7836311239193083, "grad_norm": 1.5359504534794015, "learning_rate": 1.4268550525623867e-06, "loss": 0.52781742811203, "step": 3399 }, { "epoch": 0.7838616714697406, "grad_norm": 1.3659296872523792, "learning_rate": 1.4265103230366796e-06, "loss": 0.45101356506347656, "step": 3400 }, { "epoch": 0.7840922190201729, "grad_norm": 1.5248196276431427, "learning_rate": 1.4261655315434593e-06, "loss": 0.5663477182388306, "step": 3401 }, { "epoch": 0.7843227665706052, "grad_norm": 1.3703871781933803, "learning_rate": 1.4258206781328205e-06, "loss": 0.48097681999206543, "step": 3402 }, { "epoch": 0.7845533141210375, "grad_norm": 1.404096619728342, "learning_rate": 1.425475762854867e-06, "loss": 0.5087981820106506, "step": 3403 }, { "epoch": 0.7847838616714697, "grad_norm": 1.46092867428151, "learning_rate": 1.4251307857597111e-06, "loss": 0.4906430244445801, "step": 3404 }, { "epoch": 0.785014409221902, "grad_norm": 1.488433432921487, "learning_rate": 1.4247857468974749e-06, "loss": 0.5091785788536072, "step": 3405 }, { "epoch": 0.7852449567723343, "grad_norm": 1.3564098758024126, "learning_rate": 1.4244406463182884e-06, "loss": 0.4587729573249817, "step": 3406 }, { "epoch": 0.7854755043227666, "grad_norm": 1.4157992822014371, "learning_rate": 1.4240954840722917e-06, "loss": 0.5763654708862305, "step": 3407 }, { "epoch": 0.7857060518731989, "grad_norm": 1.3214244473234844, "learning_rate": 1.4237502602096328e-06, "loss": 0.4503173828125, "step": 3408 }, { "epoch": 0.7859365994236311, "grad_norm": 1.3342645340505106, "learning_rate": 1.423404974780469e-06, "loss": 0.4033720791339874, "step": 3409 }, { "epoch": 0.7861671469740634, "grad_norm": 1.300850205565397, "learning_rate": 1.423059627834967e-06, "loss": 0.4149436950683594, "step": 3410 }, { "epoch": 0.7863976945244957, "grad_norm": 1.344125208287592, "learning_rate": 1.4227142194233024e-06, "loss": 0.48265737295150757, "step": 3411 }, { "epoch": 0.786628242074928, "grad_norm": 1.3980526693501352, "learning_rate": 1.422368749595659e-06, "loss": 0.4554409980773926, "step": 3412 }, { "epoch": 0.7868587896253603, "grad_norm": 1.5087719772601593, "learning_rate": 1.4220232184022297e-06, "loss": 0.4369552731513977, "step": 3413 }, { "epoch": 0.7870893371757925, "grad_norm": 1.5230063521052837, "learning_rate": 1.4216776258932175e-06, "loss": 0.6042525768280029, "step": 3414 }, { "epoch": 0.7873198847262248, "grad_norm": 1.3246312252241845, "learning_rate": 1.4213319721188328e-06, "loss": 0.4805430769920349, "step": 3415 }, { "epoch": 0.7875504322766571, "grad_norm": 1.5729403638570199, "learning_rate": 1.4209862571292954e-06, "loss": 0.4798882007598877, "step": 3416 }, { "epoch": 0.7877809798270894, "grad_norm": 1.4229019082389107, "learning_rate": 1.4206404809748344e-06, "loss": 0.5285375118255615, "step": 3417 }, { "epoch": 0.7880115273775216, "grad_norm": 1.1309356073852486, "learning_rate": 1.4202946437056872e-06, "loss": 0.47220277786254883, "step": 3418 }, { "epoch": 0.7882420749279538, "grad_norm": 1.3218992691271476, "learning_rate": 1.4199487453721006e-06, "loss": 0.4025140404701233, "step": 3419 }, { "epoch": 0.7884726224783861, "grad_norm": 1.6108738068796637, "learning_rate": 1.4196027860243303e-06, "loss": 0.527138352394104, "step": 3420 }, { "epoch": 0.7887031700288184, "grad_norm": 1.3234348965245013, "learning_rate": 1.4192567657126396e-06, "loss": 0.5286415815353394, "step": 3421 }, { "epoch": 0.7889337175792507, "grad_norm": 1.4695018037410315, "learning_rate": 1.4189106844873026e-06, "loss": 0.6222572326660156, "step": 3422 }, { "epoch": 0.789164265129683, "grad_norm": 1.4042703472266378, "learning_rate": 1.418564542398601e-06, "loss": 0.5256388187408447, "step": 3423 }, { "epoch": 0.7893948126801152, "grad_norm": 1.1750007294892328, "learning_rate": 1.4182183394968257e-06, "loss": 0.44092851877212524, "step": 3424 }, { "epoch": 0.7896253602305475, "grad_norm": 1.0680800247317186, "learning_rate": 1.417872075832276e-06, "loss": 0.49720248579978943, "step": 3425 }, { "epoch": 0.7898559077809798, "grad_norm": 1.3216310981310417, "learning_rate": 1.417525751455261e-06, "loss": 0.5339962244033813, "step": 3426 }, { "epoch": 0.7900864553314121, "grad_norm": 1.2572992892349242, "learning_rate": 1.4171793664160978e-06, "loss": 0.45812660455703735, "step": 3427 }, { "epoch": 0.7903170028818444, "grad_norm": 1.4380138533229172, "learning_rate": 1.4168329207651123e-06, "loss": 0.48299676179885864, "step": 3428 }, { "epoch": 0.7905475504322766, "grad_norm": 1.6688451231553607, "learning_rate": 1.4164864145526396e-06, "loss": 0.4708644151687622, "step": 3429 }, { "epoch": 0.7907780979827089, "grad_norm": 1.5321338371589512, "learning_rate": 1.4161398478290237e-06, "loss": 0.40403974056243896, "step": 3430 }, { "epoch": 0.7910086455331412, "grad_norm": 1.4234994908788658, "learning_rate": 1.4157932206446167e-06, "loss": 0.4803101122379303, "step": 3431 }, { "epoch": 0.7912391930835735, "grad_norm": 1.2821081470388878, "learning_rate": 1.4154465330497803e-06, "loss": 0.4613071084022522, "step": 3432 }, { "epoch": 0.7914697406340058, "grad_norm": 1.1011574369493982, "learning_rate": 1.4150997850948842e-06, "loss": 0.4833601117134094, "step": 3433 }, { "epoch": 0.791700288184438, "grad_norm": 1.5818926357989893, "learning_rate": 1.4147529768303077e-06, "loss": 0.5288225412368774, "step": 3434 }, { "epoch": 0.7919308357348703, "grad_norm": 1.394870295518193, "learning_rate": 1.4144061083064384e-06, "loss": 0.49780988693237305, "step": 3435 }, { "epoch": 0.7921613832853026, "grad_norm": 1.4923520865822915, "learning_rate": 1.4140591795736725e-06, "loss": 0.522127091884613, "step": 3436 }, { "epoch": 0.7923919308357349, "grad_norm": 1.3400151393389832, "learning_rate": 1.413712190682415e-06, "loss": 0.47800448536872864, "step": 3437 }, { "epoch": 0.7926224783861672, "grad_norm": 1.4774277715563446, "learning_rate": 1.4133651416830802e-06, "loss": 0.5165538787841797, "step": 3438 }, { "epoch": 0.7928530259365995, "grad_norm": 1.212288656706865, "learning_rate": 1.4130180326260904e-06, "loss": 0.44580405950546265, "step": 3439 }, { "epoch": 0.7930835734870317, "grad_norm": 1.3979714746769198, "learning_rate": 1.4126708635618769e-06, "loss": 0.46524137258529663, "step": 3440 }, { "epoch": 0.793314121037464, "grad_norm": 1.2774955650433963, "learning_rate": 1.4123236345408796e-06, "loss": 0.4641501307487488, "step": 3441 }, { "epoch": 0.7935446685878963, "grad_norm": 1.4649488671253945, "learning_rate": 1.4119763456135482e-06, "loss": 0.4559420943260193, "step": 3442 }, { "epoch": 0.7937752161383286, "grad_norm": 1.3524968198829954, "learning_rate": 1.4116289968303389e-06, "loss": 0.45827484130859375, "step": 3443 }, { "epoch": 0.7940057636887609, "grad_norm": 1.1948682370487143, "learning_rate": 1.4112815882417187e-06, "loss": 0.4432838261127472, "step": 3444 }, { "epoch": 0.7942363112391931, "grad_norm": 1.3739238935791784, "learning_rate": 1.410934119898162e-06, "loss": 0.45351576805114746, "step": 3445 }, { "epoch": 0.7944668587896253, "grad_norm": 1.5157866838440086, "learning_rate": 1.4105865918501526e-06, "loss": 0.5147103071212769, "step": 3446 }, { "epoch": 0.7946974063400576, "grad_norm": 1.2972558887632173, "learning_rate": 1.4102390041481828e-06, "loss": 0.42173266410827637, "step": 3447 }, { "epoch": 0.7949279538904899, "grad_norm": 1.3419874816991175, "learning_rate": 1.4098913568427533e-06, "loss": 0.46593934297561646, "step": 3448 }, { "epoch": 0.7951585014409221, "grad_norm": 1.3509410352673197, "learning_rate": 1.409543649984374e-06, "loss": 0.5319167375564575, "step": 3449 }, { "epoch": 0.7953890489913544, "grad_norm": 1.5617478431314156, "learning_rate": 1.409195883623562e-06, "loss": 0.5881174802780151, "step": 3450 }, { "epoch": 0.7956195965417867, "grad_norm": 1.1523297014628997, "learning_rate": 1.4088480578108453e-06, "loss": 0.40378129482269287, "step": 3451 }, { "epoch": 0.795850144092219, "grad_norm": 1.4412429262971402, "learning_rate": 1.4085001725967592e-06, "loss": 0.4907345473766327, "step": 3452 }, { "epoch": 0.7960806916426513, "grad_norm": 1.4038961218455859, "learning_rate": 1.4081522280318472e-06, "loss": 0.5060145258903503, "step": 3453 }, { "epoch": 0.7963112391930836, "grad_norm": 1.3097671833232498, "learning_rate": 1.4078042241666624e-06, "loss": 0.4996522068977356, "step": 3454 }, { "epoch": 0.7965417867435158, "grad_norm": 1.4059796957773147, "learning_rate": 1.4074561610517664e-06, "loss": 0.44975441694259644, "step": 3455 }, { "epoch": 0.7967723342939481, "grad_norm": 1.3944917831982637, "learning_rate": 1.4071080387377286e-06, "loss": 0.5002501010894775, "step": 3456 }, { "epoch": 0.7970028818443804, "grad_norm": 1.4468660365438362, "learning_rate": 1.4067598572751277e-06, "loss": 0.4908374547958374, "step": 3457 }, { "epoch": 0.7972334293948127, "grad_norm": 1.3993390163985648, "learning_rate": 1.4064116167145515e-06, "loss": 0.5603960752487183, "step": 3458 }, { "epoch": 0.797463976945245, "grad_norm": 1.5644785025139432, "learning_rate": 1.4060633171065949e-06, "loss": 0.5647206902503967, "step": 3459 }, { "epoch": 0.7976945244956772, "grad_norm": 1.2624336646884822, "learning_rate": 1.4057149585018624e-06, "loss": 0.4450913667678833, "step": 3460 }, { "epoch": 0.7979250720461095, "grad_norm": 1.579456116775947, "learning_rate": 1.4053665409509672e-06, "loss": 0.4865730404853821, "step": 3461 }, { "epoch": 0.7981556195965418, "grad_norm": 1.6941134643857723, "learning_rate": 1.4050180645045305e-06, "loss": 0.5277330279350281, "step": 3462 }, { "epoch": 0.7983861671469741, "grad_norm": 1.3510815860850502, "learning_rate": 1.4046695292131823e-06, "loss": 0.4863555431365967, "step": 3463 }, { "epoch": 0.7986167146974064, "grad_norm": 1.5401585546637828, "learning_rate": 1.4043209351275613e-06, "loss": 0.520676851272583, "step": 3464 }, { "epoch": 0.7988472622478386, "grad_norm": 1.5129404253879941, "learning_rate": 1.4039722822983145e-06, "loss": 0.5390855669975281, "step": 3465 }, { "epoch": 0.7990778097982709, "grad_norm": 1.4532008026364132, "learning_rate": 1.4036235707760973e-06, "loss": 0.48805344104766846, "step": 3466 }, { "epoch": 0.7993083573487032, "grad_norm": 1.3257398340732425, "learning_rate": 1.4032748006115741e-06, "loss": 0.5081312656402588, "step": 3467 }, { "epoch": 0.7995389048991355, "grad_norm": 1.1210684086195801, "learning_rate": 1.402925971855418e-06, "loss": 0.41373687982559204, "step": 3468 }, { "epoch": 0.7997694524495678, "grad_norm": 1.5795268987436135, "learning_rate": 1.4025770845583093e-06, "loss": 0.45267200469970703, "step": 3469 }, { "epoch": 0.8, "grad_norm": 1.3610036905604035, "learning_rate": 1.402228138770938e-06, "loss": 0.4800153970718384, "step": 3470 }, { "epoch": 0.8002305475504323, "grad_norm": 1.6530255088882315, "learning_rate": 1.401879134544003e-06, "loss": 0.5338609218597412, "step": 3471 }, { "epoch": 0.8004610951008646, "grad_norm": 1.3481447596394023, "learning_rate": 1.4015300719282102e-06, "loss": 0.4515029788017273, "step": 3472 }, { "epoch": 0.8006916426512968, "grad_norm": 1.548330384939731, "learning_rate": 1.4011809509742746e-06, "loss": 0.5389237403869629, "step": 3473 }, { "epoch": 0.8009221902017291, "grad_norm": 1.6598589936630226, "learning_rate": 1.4008317717329208e-06, "loss": 0.4558069705963135, "step": 3474 }, { "epoch": 0.8011527377521613, "grad_norm": 1.4976877876644026, "learning_rate": 1.4004825342548798e-06, "loss": 0.5577516555786133, "step": 3475 }, { "epoch": 0.8013832853025936, "grad_norm": 1.3729770351500163, "learning_rate": 1.400133238590893e-06, "loss": 0.5946800708770752, "step": 3476 }, { "epoch": 0.8016138328530259, "grad_norm": 1.3598913825351893, "learning_rate": 1.3997838847917093e-06, "loss": 0.5363301038742065, "step": 3477 }, { "epoch": 0.8018443804034582, "grad_norm": 1.631090682220392, "learning_rate": 1.3994344729080856e-06, "loss": 0.5241801738739014, "step": 3478 }, { "epoch": 0.8020749279538905, "grad_norm": 1.557902999736325, "learning_rate": 1.3990850029907883e-06, "loss": 0.5182064175605774, "step": 3479 }, { "epoch": 0.8023054755043227, "grad_norm": 1.3906232947379025, "learning_rate": 1.3987354750905922e-06, "loss": 0.46549123525619507, "step": 3480 }, { "epoch": 0.802536023054755, "grad_norm": 1.3039609736352331, "learning_rate": 1.3983858892582792e-06, "loss": 0.4560173451900482, "step": 3481 }, { "epoch": 0.8027665706051873, "grad_norm": 1.7257423768652569, "learning_rate": 1.3980362455446404e-06, "loss": 0.5379814505577087, "step": 3482 }, { "epoch": 0.8029971181556196, "grad_norm": 1.551642989984616, "learning_rate": 1.3976865440004763e-06, "loss": 0.509077250957489, "step": 3483 }, { "epoch": 0.8032276657060519, "grad_norm": 1.6445081986461623, "learning_rate": 1.3973367846765944e-06, "loss": 0.5225260257720947, "step": 3484 }, { "epoch": 0.8034582132564841, "grad_norm": 1.8771783864644709, "learning_rate": 1.396986967623811e-06, "loss": 0.49268752336502075, "step": 3485 }, { "epoch": 0.8036887608069164, "grad_norm": 1.2992049945109447, "learning_rate": 1.3966370928929508e-06, "loss": 0.5338944792747498, "step": 3486 }, { "epoch": 0.8039193083573487, "grad_norm": 1.4726736877240487, "learning_rate": 1.3962871605348472e-06, "loss": 0.5020278692245483, "step": 3487 }, { "epoch": 0.804149855907781, "grad_norm": 1.451859327899609, "learning_rate": 1.3959371706003418e-06, "loss": 0.5514947175979614, "step": 3488 }, { "epoch": 0.8043804034582133, "grad_norm": 1.4474465154502296, "learning_rate": 1.3955871231402843e-06, "loss": 0.46391648054122925, "step": 3489 }, { "epoch": 0.8046109510086455, "grad_norm": 1.282000029061929, "learning_rate": 1.395237018205533e-06, "loss": 0.4571886956691742, "step": 3490 }, { "epoch": 0.8048414985590778, "grad_norm": 1.587215609657964, "learning_rate": 1.3948868558469546e-06, "loss": 0.524207353591919, "step": 3491 }, { "epoch": 0.8050720461095101, "grad_norm": 1.3146551085590652, "learning_rate": 1.3945366361154238e-06, "loss": 0.4406725764274597, "step": 3492 }, { "epoch": 0.8053025936599424, "grad_norm": 1.3725564429124097, "learning_rate": 1.3941863590618246e-06, "loss": 0.46178239583969116, "step": 3493 }, { "epoch": 0.8055331412103747, "grad_norm": 1.2708692924858904, "learning_rate": 1.3938360247370475e-06, "loss": 0.41395291686058044, "step": 3494 }, { "epoch": 0.805763688760807, "grad_norm": 1.2877185719651698, "learning_rate": 1.3934856331919929e-06, "loss": 0.42799150943756104, "step": 3495 }, { "epoch": 0.8059942363112392, "grad_norm": 1.4566912923520166, "learning_rate": 1.3931351844775697e-06, "loss": 0.5145558714866638, "step": 3496 }, { "epoch": 0.8062247838616715, "grad_norm": 1.4297054776894131, "learning_rate": 1.3927846786446934e-06, "loss": 0.4697001576423645, "step": 3497 }, { "epoch": 0.8064553314121038, "grad_norm": 1.25977839257532, "learning_rate": 1.3924341157442895e-06, "loss": 0.44351255893707275, "step": 3498 }, { "epoch": 0.8066858789625361, "grad_norm": 1.3009582600594158, "learning_rate": 1.392083495827291e-06, "loss": 0.44177383184432983, "step": 3499 }, { "epoch": 0.8069164265129684, "grad_norm": 1.242620176786914, "learning_rate": 1.3917328189446395e-06, "loss": 0.4446752071380615, "step": 3500 }, { "epoch": 0.8071469740634005, "grad_norm": 1.341227072498732, "learning_rate": 1.3913820851472845e-06, "loss": 0.4698784351348877, "step": 3501 }, { "epoch": 0.8073775216138328, "grad_norm": 1.4170578615183846, "learning_rate": 1.3910312944861837e-06, "loss": 0.5006378889083862, "step": 3502 }, { "epoch": 0.8076080691642651, "grad_norm": 1.380723657233392, "learning_rate": 1.3906804470123038e-06, "loss": 0.47502535581588745, "step": 3503 }, { "epoch": 0.8078386167146974, "grad_norm": 1.3489334550760619, "learning_rate": 1.3903295427766192e-06, "loss": 0.6008619070053101, "step": 3504 }, { "epoch": 0.8080691642651296, "grad_norm": 1.4759126954971649, "learning_rate": 1.3899785818301123e-06, "loss": 0.4955158531665802, "step": 3505 }, { "epoch": 0.8082997118155619, "grad_norm": 1.3044047485737327, "learning_rate": 1.3896275642237747e-06, "loss": 0.42474400997161865, "step": 3506 }, { "epoch": 0.8085302593659942, "grad_norm": 1.4323063583263014, "learning_rate": 1.3892764900086048e-06, "loss": 0.5359533429145813, "step": 3507 }, { "epoch": 0.8087608069164265, "grad_norm": 1.3384923488872582, "learning_rate": 1.3889253592356106e-06, "loss": 0.5112940669059753, "step": 3508 }, { "epoch": 0.8089913544668588, "grad_norm": 1.2992899352507392, "learning_rate": 1.388574171955808e-06, "loss": 0.5173017978668213, "step": 3509 }, { "epoch": 0.809221902017291, "grad_norm": 1.2833115975697806, "learning_rate": 1.3882229282202202e-06, "loss": 0.49251043796539307, "step": 3510 }, { "epoch": 0.8094524495677233, "grad_norm": 1.3177101495371608, "learning_rate": 1.3878716280798793e-06, "loss": 0.5374883413314819, "step": 3511 }, { "epoch": 0.8096829971181556, "grad_norm": 1.4600072526939667, "learning_rate": 1.3875202715858262e-06, "loss": 0.4614766240119934, "step": 3512 }, { "epoch": 0.8099135446685879, "grad_norm": 1.481594502687008, "learning_rate": 1.3871688587891087e-06, "loss": 0.5260672569274902, "step": 3513 }, { "epoch": 0.8101440922190202, "grad_norm": 1.4772605570900035, "learning_rate": 1.3868173897407838e-06, "loss": 0.4460594058036804, "step": 3514 }, { "epoch": 0.8103746397694525, "grad_norm": 1.7026122663882848, "learning_rate": 1.3864658644919161e-06, "loss": 0.5009329319000244, "step": 3515 }, { "epoch": 0.8106051873198847, "grad_norm": 1.384991070111951, "learning_rate": 1.3861142830935783e-06, "loss": 0.5075333118438721, "step": 3516 }, { "epoch": 0.810835734870317, "grad_norm": 1.5223425101973258, "learning_rate": 1.385762645596852e-06, "loss": 0.44005393981933594, "step": 3517 }, { "epoch": 0.8110662824207493, "grad_norm": 1.1883449015067757, "learning_rate": 1.3854109520528265e-06, "loss": 0.4705634117126465, "step": 3518 }, { "epoch": 0.8112968299711816, "grad_norm": 1.2999872371396695, "learning_rate": 1.385059202512599e-06, "loss": 0.4802425801753998, "step": 3519 }, { "epoch": 0.8115273775216139, "grad_norm": 1.3224607860470696, "learning_rate": 1.384707397027275e-06, "loss": 0.5397260189056396, "step": 3520 }, { "epoch": 0.8117579250720461, "grad_norm": 1.4021212201308277, "learning_rate": 1.3843555356479681e-06, "loss": 0.4671134948730469, "step": 3521 }, { "epoch": 0.8119884726224784, "grad_norm": 1.354117617094354, "learning_rate": 1.3840036184258005e-06, "loss": 0.46687519550323486, "step": 3522 }, { "epoch": 0.8122190201729107, "grad_norm": 1.449750301342306, "learning_rate": 1.3836516454119016e-06, "loss": 0.5054115056991577, "step": 3523 }, { "epoch": 0.812449567723343, "grad_norm": 1.5277280034784881, "learning_rate": 1.3832996166574098e-06, "loss": 0.40758228302001953, "step": 3524 }, { "epoch": 0.8126801152737753, "grad_norm": 1.2548484991062847, "learning_rate": 1.3829475322134711e-06, "loss": 0.4480733573436737, "step": 3525 }, { "epoch": 0.8129106628242075, "grad_norm": 1.3977222634007296, "learning_rate": 1.3825953921312398e-06, "loss": 0.4944935441017151, "step": 3526 }, { "epoch": 0.8131412103746398, "grad_norm": 1.4647957112314143, "learning_rate": 1.3822431964618776e-06, "loss": 0.45961660146713257, "step": 3527 }, { "epoch": 0.813371757925072, "grad_norm": 1.2821219036004161, "learning_rate": 1.3818909452565558e-06, "loss": 0.4575357437133789, "step": 3528 }, { "epoch": 0.8136023054755043, "grad_norm": 1.3626120540148763, "learning_rate": 1.3815386385664524e-06, "loss": 0.47747135162353516, "step": 3529 }, { "epoch": 0.8138328530259366, "grad_norm": 1.444122143074095, "learning_rate": 1.381186276442754e-06, "loss": 0.5448052883148193, "step": 3530 }, { "epoch": 0.8140634005763688, "grad_norm": 1.201463194482245, "learning_rate": 1.3808338589366545e-06, "loss": 0.4915286898612976, "step": 3531 }, { "epoch": 0.8142939481268011, "grad_norm": 1.4949477151672794, "learning_rate": 1.3804813860993574e-06, "loss": 0.5531010627746582, "step": 3532 }, { "epoch": 0.8145244956772334, "grad_norm": 1.6945509499432712, "learning_rate": 1.380128857982073e-06, "loss": 0.454088032245636, "step": 3533 }, { "epoch": 0.8147550432276657, "grad_norm": 1.5456279149373304, "learning_rate": 1.37977627463602e-06, "loss": 0.5124789476394653, "step": 3534 }, { "epoch": 0.814985590778098, "grad_norm": 1.480001259627385, "learning_rate": 1.3794236361124248e-06, "loss": 0.503969132900238, "step": 3535 }, { "epoch": 0.8152161383285302, "grad_norm": 1.322239173486934, "learning_rate": 1.3790709424625222e-06, "loss": 0.5175889134407043, "step": 3536 }, { "epoch": 0.8154466858789625, "grad_norm": 1.1817953864001418, "learning_rate": 1.3787181937375557e-06, "loss": 0.46064361929893494, "step": 3537 }, { "epoch": 0.8156772334293948, "grad_norm": 1.5516876974260332, "learning_rate": 1.3783653899887747e-06, "loss": 0.5197643041610718, "step": 3538 }, { "epoch": 0.8159077809798271, "grad_norm": 1.2896396858717665, "learning_rate": 1.3780125312674388e-06, "loss": 0.5226879715919495, "step": 3539 }, { "epoch": 0.8161383285302594, "grad_norm": 1.511035190303668, "learning_rate": 1.3776596176248146e-06, "loss": 0.4556129574775696, "step": 3540 }, { "epoch": 0.8163688760806916, "grad_norm": 1.4478108658486855, "learning_rate": 1.3773066491121766e-06, "loss": 0.4984133243560791, "step": 3541 }, { "epoch": 0.8165994236311239, "grad_norm": 1.5419703816463373, "learning_rate": 1.3769536257808074e-06, "loss": 0.48790422081947327, "step": 3542 }, { "epoch": 0.8168299711815562, "grad_norm": 1.2462927406104716, "learning_rate": 1.376600547681998e-06, "loss": 0.44798487424850464, "step": 3543 }, { "epoch": 0.8170605187319885, "grad_norm": 1.343867717269155, "learning_rate": 1.3762474148670467e-06, "loss": 0.49012643098831177, "step": 3544 }, { "epoch": 0.8172910662824208, "grad_norm": 1.6171785281361803, "learning_rate": 1.3758942273872598e-06, "loss": 0.527820885181427, "step": 3545 }, { "epoch": 0.817521613832853, "grad_norm": 1.539967947177677, "learning_rate": 1.3755409852939521e-06, "loss": 0.5369070768356323, "step": 3546 }, { "epoch": 0.8177521613832853, "grad_norm": 1.2728699323899526, "learning_rate": 1.3751876886384462e-06, "loss": 0.4529775381088257, "step": 3547 }, { "epoch": 0.8179827089337176, "grad_norm": 1.45208589614268, "learning_rate": 1.3748343374720717e-06, "loss": 0.49659043550491333, "step": 3548 }, { "epoch": 0.8182132564841499, "grad_norm": 1.4337263996576257, "learning_rate": 1.3744809318461674e-06, "loss": 0.5159590840339661, "step": 3549 }, { "epoch": 0.8184438040345822, "grad_norm": 1.2952875309827698, "learning_rate": 1.3741274718120796e-06, "loss": 0.5124874711036682, "step": 3550 }, { "epoch": 0.8186743515850144, "grad_norm": 1.466684664869956, "learning_rate": 1.3737739574211619e-06, "loss": 0.4404506981372833, "step": 3551 }, { "epoch": 0.8189048991354467, "grad_norm": 1.5174615480057096, "learning_rate": 1.3734203887247763e-06, "loss": 0.3680835962295532, "step": 3552 }, { "epoch": 0.819135446685879, "grad_norm": 1.4287902210203849, "learning_rate": 1.3730667657742934e-06, "loss": 0.46388792991638184, "step": 3553 }, { "epoch": 0.8193659942363113, "grad_norm": 1.1942746308000458, "learning_rate": 1.3727130886210901e-06, "loss": 0.44120755791664124, "step": 3554 }, { "epoch": 0.8195965417867436, "grad_norm": 1.6778310740061562, "learning_rate": 1.3723593573165521e-06, "loss": 0.46938973665237427, "step": 3555 }, { "epoch": 0.8198270893371757, "grad_norm": 1.3549067457449737, "learning_rate": 1.3720055719120734e-06, "loss": 0.4371834993362427, "step": 3556 }, { "epoch": 0.820057636887608, "grad_norm": 1.7435095113050765, "learning_rate": 1.3716517324590545e-06, "loss": 0.5550415515899658, "step": 3557 }, { "epoch": 0.8202881844380403, "grad_norm": 1.7043736119794284, "learning_rate": 1.3712978390089055e-06, "loss": 0.4762400984764099, "step": 3558 }, { "epoch": 0.8205187319884726, "grad_norm": 1.2860874189212628, "learning_rate": 1.370943891613043e-06, "loss": 0.45802271366119385, "step": 3559 }, { "epoch": 0.8207492795389049, "grad_norm": 1.5384333307494868, "learning_rate": 1.3705898903228917e-06, "loss": 0.49840807914733887, "step": 3560 }, { "epoch": 0.8209798270893371, "grad_norm": 1.4905282376893718, "learning_rate": 1.3702358351898844e-06, "loss": 0.4900815486907959, "step": 3561 }, { "epoch": 0.8212103746397694, "grad_norm": 1.2987837199320826, "learning_rate": 1.369881726265462e-06, "loss": 0.5785123109817505, "step": 3562 }, { "epoch": 0.8214409221902017, "grad_norm": 1.3354174661321194, "learning_rate": 1.3695275636010727e-06, "loss": 0.44613367319107056, "step": 3563 }, { "epoch": 0.821671469740634, "grad_norm": 1.4261577457389192, "learning_rate": 1.3691733472481721e-06, "loss": 0.5450624823570251, "step": 3564 }, { "epoch": 0.8219020172910663, "grad_norm": 1.6118170061862536, "learning_rate": 1.368819077258225e-06, "loss": 0.49650490283966064, "step": 3565 }, { "epoch": 0.8221325648414985, "grad_norm": 1.3702523924375916, "learning_rate": 1.3684647536827025e-06, "loss": 0.4716230034828186, "step": 3566 }, { "epoch": 0.8223631123919308, "grad_norm": 1.3226178858378896, "learning_rate": 1.3681103765730843e-06, "loss": 0.5759170651435852, "step": 3567 }, { "epoch": 0.8225936599423631, "grad_norm": 1.1648618477588448, "learning_rate": 1.3677559459808578e-06, "loss": 0.4766387939453125, "step": 3568 }, { "epoch": 0.8228242074927954, "grad_norm": 1.3636946954115148, "learning_rate": 1.3674014619575184e-06, "loss": 0.6306042671203613, "step": 3569 }, { "epoch": 0.8230547550432277, "grad_norm": 1.5291938076513798, "learning_rate": 1.367046924554568e-06, "loss": 0.5182838439941406, "step": 3570 }, { "epoch": 0.82328530259366, "grad_norm": 1.470314419911423, "learning_rate": 1.366692333823518e-06, "loss": 0.49583834409713745, "step": 3571 }, { "epoch": 0.8235158501440922, "grad_norm": 1.300822411685143, "learning_rate": 1.3663376898158867e-06, "loss": 0.48286184668540955, "step": 3572 }, { "epoch": 0.8237463976945245, "grad_norm": 1.4808130648732762, "learning_rate": 1.3659829925831998e-06, "loss": 0.5258426666259766, "step": 3573 }, { "epoch": 0.8239769452449568, "grad_norm": 1.3932683936949655, "learning_rate": 1.3656282421769916e-06, "loss": 0.4287678301334381, "step": 3574 }, { "epoch": 0.8242074927953891, "grad_norm": 1.223580053985394, "learning_rate": 1.3652734386488032e-06, "loss": 0.4457889795303345, "step": 3575 }, { "epoch": 0.8244380403458214, "grad_norm": 1.4128958414592394, "learning_rate": 1.3649185820501839e-06, "loss": 0.4268190860748291, "step": 3576 }, { "epoch": 0.8246685878962536, "grad_norm": 1.3779100654403185, "learning_rate": 1.3645636724326909e-06, "loss": 0.43643975257873535, "step": 3577 }, { "epoch": 0.8248991354466859, "grad_norm": 1.2944281887265883, "learning_rate": 1.3642087098478888e-06, "loss": 0.5144875049591064, "step": 3578 }, { "epoch": 0.8251296829971182, "grad_norm": 1.3487775063149092, "learning_rate": 1.36385369434735e-06, "loss": 0.4662047028541565, "step": 3579 }, { "epoch": 0.8253602305475505, "grad_norm": 1.270291737455824, "learning_rate": 1.3634986259826543e-06, "loss": 0.5866056680679321, "step": 3580 }, { "epoch": 0.8255907780979828, "grad_norm": 1.5499964236276533, "learning_rate": 1.3631435048053896e-06, "loss": 0.5337757468223572, "step": 3581 }, { "epoch": 0.825821325648415, "grad_norm": 1.2164376884898056, "learning_rate": 1.3627883308671518e-06, "loss": 0.501396656036377, "step": 3582 }, { "epoch": 0.8260518731988472, "grad_norm": 1.354943780023473, "learning_rate": 1.3624331042195432e-06, "loss": 0.4726705551147461, "step": 3583 }, { "epoch": 0.8262824207492795, "grad_norm": 1.3367759670185508, "learning_rate": 1.3620778249141748e-06, "loss": 0.52537602186203, "step": 3584 }, { "epoch": 0.8265129682997118, "grad_norm": 1.2520406736372904, "learning_rate": 1.3617224930026652e-06, "loss": 0.43500566482543945, "step": 3585 }, { "epoch": 0.826743515850144, "grad_norm": 1.6605179728034725, "learning_rate": 1.3613671085366406e-06, "loss": 0.4788056015968323, "step": 3586 }, { "epoch": 0.8269740634005763, "grad_norm": 1.6686837912118906, "learning_rate": 1.3610116715677338e-06, "loss": 0.4457281231880188, "step": 3587 }, { "epoch": 0.8272046109510086, "grad_norm": 1.2791720026472448, "learning_rate": 1.360656182147587e-06, "loss": 0.5165676474571228, "step": 3588 }, { "epoch": 0.8274351585014409, "grad_norm": 1.2518329894582794, "learning_rate": 1.3603006403278485e-06, "loss": 0.5236124992370605, "step": 3589 }, { "epoch": 0.8276657060518732, "grad_norm": 1.3949843727870912, "learning_rate": 1.3599450461601754e-06, "loss": 0.5561662912368774, "step": 3590 }, { "epoch": 0.8278962536023055, "grad_norm": 1.461488217809503, "learning_rate": 1.3595893996962313e-06, "loss": 0.520460307598114, "step": 3591 }, { "epoch": 0.8281268011527377, "grad_norm": 1.3837342621409494, "learning_rate": 1.3592337009876884e-06, "loss": 0.4899410903453827, "step": 3592 }, { "epoch": 0.82835734870317, "grad_norm": 1.5007694688548554, "learning_rate": 1.3588779500862253e-06, "loss": 0.45363926887512207, "step": 3593 }, { "epoch": 0.8285878962536023, "grad_norm": 1.4492330929060628, "learning_rate": 1.35852214704353e-06, "loss": 0.46841347217559814, "step": 3594 }, { "epoch": 0.8288184438040346, "grad_norm": 1.2348409794594017, "learning_rate": 1.358166291911296e-06, "loss": 0.490681916475296, "step": 3595 }, { "epoch": 0.8290489913544669, "grad_norm": 1.4667015092814133, "learning_rate": 1.3578103847412257e-06, "loss": 0.49177154898643494, "step": 3596 }, { "epoch": 0.8292795389048991, "grad_norm": 1.1277202452814692, "learning_rate": 1.3574544255850288e-06, "loss": 0.4094833433628082, "step": 3597 }, { "epoch": 0.8295100864553314, "grad_norm": 1.3251048380610777, "learning_rate": 1.3570984144944225e-06, "loss": 0.553383469581604, "step": 3598 }, { "epoch": 0.8297406340057637, "grad_norm": 1.3429509939320259, "learning_rate": 1.3567423515211314e-06, "loss": 0.5091391801834106, "step": 3599 }, { "epoch": 0.829971181556196, "grad_norm": 1.3776124908463028, "learning_rate": 1.3563862367168875e-06, "loss": 0.4854011535644531, "step": 3600 }, { "epoch": 0.8302017291066283, "grad_norm": 1.591671011790601, "learning_rate": 1.3560300701334308e-06, "loss": 0.5501555800437927, "step": 3601 }, { "epoch": 0.8304322766570605, "grad_norm": 1.3392087051404795, "learning_rate": 1.355673851822509e-06, "loss": 0.4151724874973297, "step": 3602 }, { "epoch": 0.8306628242074928, "grad_norm": 1.2702951133114364, "learning_rate": 1.3553175818358761e-06, "loss": 0.408272922039032, "step": 3603 }, { "epoch": 0.8308933717579251, "grad_norm": 1.2667799997100424, "learning_rate": 1.3549612602252953e-06, "loss": 0.4814903736114502, "step": 3604 }, { "epoch": 0.8311239193083574, "grad_norm": 1.2668151257014681, "learning_rate": 1.3546048870425354e-06, "loss": 0.44924643635749817, "step": 3605 }, { "epoch": 0.8313544668587897, "grad_norm": 1.4703658781584852, "learning_rate": 1.3542484623393749e-06, "loss": 0.5310448408126831, "step": 3606 }, { "epoch": 0.831585014409222, "grad_norm": 1.5531269499723805, "learning_rate": 1.3538919861675978e-06, "loss": 0.5050290822982788, "step": 3607 }, { "epoch": 0.8318155619596542, "grad_norm": 1.3231435225244499, "learning_rate": 1.3535354585789965e-06, "loss": 0.4794940948486328, "step": 3608 }, { "epoch": 0.8320461095100865, "grad_norm": 1.3928315758026486, "learning_rate": 1.3531788796253705e-06, "loss": 0.4965074062347412, "step": 3609 }, { "epoch": 0.8322766570605188, "grad_norm": 1.5037573190112965, "learning_rate": 1.352822249358528e-06, "loss": 0.5224358439445496, "step": 3610 }, { "epoch": 0.832507204610951, "grad_norm": 1.277499818911999, "learning_rate": 1.3524655678302826e-06, "loss": 0.4840403199195862, "step": 3611 }, { "epoch": 0.8327377521613832, "grad_norm": 1.4604355145667074, "learning_rate": 1.3521088350924567e-06, "loss": 0.5683179497718811, "step": 3612 }, { "epoch": 0.8329682997118155, "grad_norm": 1.3815402228742937, "learning_rate": 1.3517520511968803e-06, "loss": 0.4887921214103699, "step": 3613 }, { "epoch": 0.8331988472622478, "grad_norm": 1.609496545258807, "learning_rate": 1.3513952161953899e-06, "loss": 0.49039024114608765, "step": 3614 }, { "epoch": 0.8334293948126801, "grad_norm": 1.5270344421851152, "learning_rate": 1.35103833013983e-06, "loss": 0.5262584686279297, "step": 3615 }, { "epoch": 0.8336599423631124, "grad_norm": 1.4463121674933177, "learning_rate": 1.3506813930820527e-06, "loss": 0.5098379850387573, "step": 3616 }, { "epoch": 0.8338904899135446, "grad_norm": 1.5024028998528263, "learning_rate": 1.3503244050739169e-06, "loss": 0.5597623586654663, "step": 3617 }, { "epoch": 0.8341210374639769, "grad_norm": 1.2361771648921898, "learning_rate": 1.3499673661672894e-06, "loss": 0.49627748131752014, "step": 3618 }, { "epoch": 0.8343515850144092, "grad_norm": 1.2808464002536806, "learning_rate": 1.3496102764140443e-06, "loss": 0.4776031970977783, "step": 3619 }, { "epoch": 0.8345821325648415, "grad_norm": 1.518980683456512, "learning_rate": 1.3492531358660633e-06, "loss": 0.554206371307373, "step": 3620 }, { "epoch": 0.8348126801152738, "grad_norm": 1.5508283995044057, "learning_rate": 1.348895944575234e-06, "loss": 0.5198627710342407, "step": 3621 }, { "epoch": 0.835043227665706, "grad_norm": 1.277882860539239, "learning_rate": 1.348538702593454e-06, "loss": 0.39228206872940063, "step": 3622 }, { "epoch": 0.8352737752161383, "grad_norm": 1.4700971295114715, "learning_rate": 1.3481814099726266e-06, "loss": 0.5013151168823242, "step": 3623 }, { "epoch": 0.8355043227665706, "grad_norm": 1.3346013574826283, "learning_rate": 1.347824066764662e-06, "loss": 0.5275527238845825, "step": 3624 }, { "epoch": 0.8357348703170029, "grad_norm": 1.2993289039746385, "learning_rate": 1.3474666730214788e-06, "loss": 0.4131700396537781, "step": 3625 }, { "epoch": 0.8359654178674352, "grad_norm": 1.3417984305917734, "learning_rate": 1.3471092287950027e-06, "loss": 0.4572218656539917, "step": 3626 }, { "epoch": 0.8361959654178674, "grad_norm": 1.5009745298932513, "learning_rate": 1.3467517341371668e-06, "loss": 0.4194955825805664, "step": 3627 }, { "epoch": 0.8364265129682997, "grad_norm": 1.219361308104634, "learning_rate": 1.3463941890999108e-06, "loss": 0.5566304922103882, "step": 3628 }, { "epoch": 0.836657060518732, "grad_norm": 1.3289507453572718, "learning_rate": 1.3460365937351824e-06, "loss": 0.48764199018478394, "step": 3629 }, { "epoch": 0.8368876080691643, "grad_norm": 1.3635097888093082, "learning_rate": 1.3456789480949371e-06, "loss": 0.5235868692398071, "step": 3630 }, { "epoch": 0.8371181556195966, "grad_norm": 1.3481928837782249, "learning_rate": 1.3453212522311365e-06, "loss": 0.39009493589401245, "step": 3631 }, { "epoch": 0.8373487031700289, "grad_norm": 1.4385596765141213, "learning_rate": 1.3449635061957506e-06, "loss": 0.46862345933914185, "step": 3632 }, { "epoch": 0.8375792507204611, "grad_norm": 1.4116689668927676, "learning_rate": 1.3446057100407556e-06, "loss": 0.5485839247703552, "step": 3633 }, { "epoch": 0.8378097982708934, "grad_norm": 1.256291851973583, "learning_rate": 1.3442478638181354e-06, "loss": 0.4831143915653229, "step": 3634 }, { "epoch": 0.8380403458213257, "grad_norm": 1.3851749917377867, "learning_rate": 1.343889967579882e-06, "loss": 0.44708937406539917, "step": 3635 }, { "epoch": 0.838270893371758, "grad_norm": 1.6698743063833459, "learning_rate": 1.343532021377994e-06, "loss": 0.45937833189964294, "step": 3636 }, { "epoch": 0.8385014409221903, "grad_norm": 1.3417409012531487, "learning_rate": 1.3431740252644767e-06, "loss": 0.5108849406242371, "step": 3637 }, { "epoch": 0.8387319884726224, "grad_norm": 1.4510248789771545, "learning_rate": 1.3428159792913435e-06, "loss": 0.5477361679077148, "step": 3638 }, { "epoch": 0.8389625360230547, "grad_norm": 1.2875265535134595, "learning_rate": 1.3424578835106148e-06, "loss": 0.5166784524917603, "step": 3639 }, { "epoch": 0.839193083573487, "grad_norm": 1.452063908950837, "learning_rate": 1.342099737974318e-06, "loss": 0.5114049315452576, "step": 3640 }, { "epoch": 0.8394236311239193, "grad_norm": 1.648045658357982, "learning_rate": 1.3417415427344885e-06, "loss": 0.4201454520225525, "step": 3641 }, { "epoch": 0.8396541786743515, "grad_norm": 1.3260350906579368, "learning_rate": 1.3413832978431676e-06, "loss": 0.4956648349761963, "step": 3642 }, { "epoch": 0.8398847262247838, "grad_norm": 1.3312034034289655, "learning_rate": 1.3410250033524048e-06, "loss": 0.378828227519989, "step": 3643 }, { "epoch": 0.8401152737752161, "grad_norm": 1.4156130470610577, "learning_rate": 1.3406666593142569e-06, "loss": 0.493254691362381, "step": 3644 }, { "epoch": 0.8403458213256484, "grad_norm": 1.4086565383527434, "learning_rate": 1.340308265780787e-06, "loss": 0.48233136534690857, "step": 3645 }, { "epoch": 0.8405763688760807, "grad_norm": 1.3334208525664306, "learning_rate": 1.3399498228040661e-06, "loss": 0.45329928398132324, "step": 3646 }, { "epoch": 0.840806916426513, "grad_norm": 1.510171100735499, "learning_rate": 1.3395913304361728e-06, "loss": 0.47325652837753296, "step": 3647 }, { "epoch": 0.8410374639769452, "grad_norm": 1.4245815844886007, "learning_rate": 1.3392327887291918e-06, "loss": 0.5237877368927002, "step": 3648 }, { "epoch": 0.8412680115273775, "grad_norm": 1.3392602042833703, "learning_rate": 1.3388741977352156e-06, "loss": 0.4137705862522125, "step": 3649 }, { "epoch": 0.8414985590778098, "grad_norm": 1.4945784448020432, "learning_rate": 1.3385155575063434e-06, "loss": 0.5430322885513306, "step": 3650 }, { "epoch": 0.8417291066282421, "grad_norm": 1.7615745807954628, "learning_rate": 1.3381568680946824e-06, "loss": 0.542243480682373, "step": 3651 }, { "epoch": 0.8419596541786744, "grad_norm": 1.5271060313738907, "learning_rate": 1.3377981295523464e-06, "loss": 0.45017683506011963, "step": 3652 }, { "epoch": 0.8421902017291066, "grad_norm": 1.4484659185032926, "learning_rate": 1.3374393419314559e-06, "loss": 0.5193800330162048, "step": 3653 }, { "epoch": 0.8424207492795389, "grad_norm": 1.4240617315786623, "learning_rate": 1.3370805052841393e-06, "loss": 0.39487144351005554, "step": 3654 }, { "epoch": 0.8426512968299712, "grad_norm": 1.7496918494705047, "learning_rate": 1.3367216196625322e-06, "loss": 0.5312062501907349, "step": 3655 }, { "epoch": 0.8428818443804035, "grad_norm": 1.3680421123056206, "learning_rate": 1.3363626851187763e-06, "loss": 0.5203391313552856, "step": 3656 }, { "epoch": 0.8431123919308358, "grad_norm": 1.3194431068725445, "learning_rate": 1.3360037017050215e-06, "loss": 0.5030896663665771, "step": 3657 }, { "epoch": 0.843342939481268, "grad_norm": 1.2711152514275694, "learning_rate": 1.3356446694734242e-06, "loss": 0.4215394854545593, "step": 3658 }, { "epoch": 0.8435734870317003, "grad_norm": 1.3571550082526849, "learning_rate": 1.3352855884761481e-06, "loss": 0.46229058504104614, "step": 3659 }, { "epoch": 0.8438040345821326, "grad_norm": 1.3179757408232118, "learning_rate": 1.334926458765364e-06, "loss": 0.48473960161209106, "step": 3660 }, { "epoch": 0.8440345821325649, "grad_norm": 1.4348121957624616, "learning_rate": 1.3345672803932497e-06, "loss": 0.48085319995880127, "step": 3661 }, { "epoch": 0.8442651296829972, "grad_norm": 1.183681688837606, "learning_rate": 1.3342080534119896e-06, "loss": 0.4717422127723694, "step": 3662 }, { "epoch": 0.8444956772334294, "grad_norm": 1.5024885852724694, "learning_rate": 1.3338487778737762e-06, "loss": 0.4296337962150574, "step": 3663 }, { "epoch": 0.8447262247838617, "grad_norm": 1.2935172742719372, "learning_rate": 1.3334894538308087e-06, "loss": 0.48610907793045044, "step": 3664 }, { "epoch": 0.8449567723342939, "grad_norm": 1.52285756397875, "learning_rate": 1.3331300813352922e-06, "loss": 0.5388171672821045, "step": 3665 }, { "epoch": 0.8451873198847262, "grad_norm": 1.2878926477397548, "learning_rate": 1.3327706604394403e-06, "loss": 0.4291438162326813, "step": 3666 }, { "epoch": 0.8454178674351585, "grad_norm": 1.2885114325801958, "learning_rate": 1.3324111911954736e-06, "loss": 0.5693022012710571, "step": 3667 }, { "epoch": 0.8456484149855907, "grad_norm": 1.9611798926901898, "learning_rate": 1.3320516736556188e-06, "loss": 0.4639110565185547, "step": 3668 }, { "epoch": 0.845878962536023, "grad_norm": 1.3579868594952884, "learning_rate": 1.3316921078721102e-06, "loss": 0.49151986837387085, "step": 3669 }, { "epoch": 0.8461095100864553, "grad_norm": 1.2215058568931207, "learning_rate": 1.3313324938971886e-06, "loss": 0.45903199911117554, "step": 3670 }, { "epoch": 0.8463400576368876, "grad_norm": 1.408166413442317, "learning_rate": 1.3309728317831024e-06, "loss": 0.5564982891082764, "step": 3671 }, { "epoch": 0.8465706051873199, "grad_norm": 1.1499291953976989, "learning_rate": 1.3306131215821067e-06, "loss": 0.4355608820915222, "step": 3672 }, { "epoch": 0.8468011527377521, "grad_norm": 1.3895488327495742, "learning_rate": 1.330253363346464e-06, "loss": 0.5584323406219482, "step": 3673 }, { "epoch": 0.8470317002881844, "grad_norm": 1.377539936505586, "learning_rate": 1.329893557128443e-06, "loss": 0.5399061441421509, "step": 3674 }, { "epoch": 0.8472622478386167, "grad_norm": 1.6120619266325988, "learning_rate": 1.32953370298032e-06, "loss": 0.45847171545028687, "step": 3675 }, { "epoch": 0.847492795389049, "grad_norm": 1.3633635424650061, "learning_rate": 1.329173800954378e-06, "loss": 0.48243457078933716, "step": 3676 }, { "epoch": 0.8477233429394813, "grad_norm": 1.513997205378081, "learning_rate": 1.3288138511029071e-06, "loss": 0.5331195592880249, "step": 3677 }, { "epoch": 0.8479538904899135, "grad_norm": 1.4372930833672997, "learning_rate": 1.3284538534782044e-06, "loss": 0.5083351135253906, "step": 3678 }, { "epoch": 0.8481844380403458, "grad_norm": 1.2133182192138496, "learning_rate": 1.3280938081325732e-06, "loss": 0.47454866766929626, "step": 3679 }, { "epoch": 0.8484149855907781, "grad_norm": 1.234205887620048, "learning_rate": 1.3277337151183252e-06, "loss": 0.42275500297546387, "step": 3680 }, { "epoch": 0.8486455331412104, "grad_norm": 1.3262104260117105, "learning_rate": 1.3273735744877775e-06, "loss": 0.531667947769165, "step": 3681 }, { "epoch": 0.8488760806916427, "grad_norm": 1.2530945065747214, "learning_rate": 1.3270133862932551e-06, "loss": 0.537842333316803, "step": 3682 }, { "epoch": 0.849106628242075, "grad_norm": 1.4271755209888484, "learning_rate": 1.3266531505870896e-06, "loss": 0.47730106115341187, "step": 3683 }, { "epoch": 0.8493371757925072, "grad_norm": 1.3660515052317093, "learning_rate": 1.326292867421619e-06, "loss": 0.41942697763442993, "step": 3684 }, { "epoch": 0.8495677233429395, "grad_norm": 1.4070770160971164, "learning_rate": 1.3259325368491897e-06, "loss": 0.5268753170967102, "step": 3685 }, { "epoch": 0.8497982708933718, "grad_norm": 1.356452328307654, "learning_rate": 1.325572158922153e-06, "loss": 0.43105173110961914, "step": 3686 }, { "epoch": 0.8500288184438041, "grad_norm": 1.2933233661813608, "learning_rate": 1.3252117336928686e-06, "loss": 0.5447876453399658, "step": 3687 }, { "epoch": 0.8502593659942363, "grad_norm": 1.3613250875979575, "learning_rate": 1.3248512612137023e-06, "loss": 0.49573707580566406, "step": 3688 }, { "epoch": 0.8504899135446686, "grad_norm": 1.4776299074252148, "learning_rate": 1.3244907415370273e-06, "loss": 0.46929413080215454, "step": 3689 }, { "epoch": 0.8507204610951009, "grad_norm": 1.4515685025287064, "learning_rate": 1.3241301747152233e-06, "loss": 0.5122408866882324, "step": 3690 }, { "epoch": 0.8509510086455332, "grad_norm": 1.1931030830480251, "learning_rate": 1.3237695608006766e-06, "loss": 0.4353037476539612, "step": 3691 }, { "epoch": 0.8511815561959655, "grad_norm": 1.2542691426037724, "learning_rate": 1.3234088998457807e-06, "loss": 0.45018789172172546, "step": 3692 }, { "epoch": 0.8514121037463976, "grad_norm": 1.3013512335769646, "learning_rate": 1.3230481919029362e-06, "loss": 0.49650731682777405, "step": 3693 }, { "epoch": 0.8516426512968299, "grad_norm": 1.8498390352772125, "learning_rate": 1.3226874370245497e-06, "loss": 0.4956985116004944, "step": 3694 }, { "epoch": 0.8518731988472622, "grad_norm": 1.422696360099423, "learning_rate": 1.3223266352630355e-06, "loss": 0.4598352015018463, "step": 3695 }, { "epoch": 0.8521037463976945, "grad_norm": 1.4805802300223785, "learning_rate": 1.3219657866708147e-06, "loss": 0.43021154403686523, "step": 3696 }, { "epoch": 0.8523342939481268, "grad_norm": 1.0649446070909838, "learning_rate": 1.321604891300314e-06, "loss": 0.4016476273536682, "step": 3697 }, { "epoch": 0.852564841498559, "grad_norm": 1.424130195501248, "learning_rate": 1.3212439492039687e-06, "loss": 0.4343821406364441, "step": 3698 }, { "epoch": 0.8527953890489913, "grad_norm": 1.4128523736821907, "learning_rate": 1.3208829604342189e-06, "loss": 0.5592546463012695, "step": 3699 }, { "epoch": 0.8530259365994236, "grad_norm": 1.3131744964719476, "learning_rate": 1.3205219250435133e-06, "loss": 0.47344446182250977, "step": 3700 }, { "epoch": 0.8532564841498559, "grad_norm": 1.4103812210291373, "learning_rate": 1.3201608430843063e-06, "loss": 0.5225629806518555, "step": 3701 }, { "epoch": 0.8534870317002882, "grad_norm": 1.4128306987353971, "learning_rate": 1.3197997146090593e-06, "loss": 0.4450826644897461, "step": 3702 }, { "epoch": 0.8537175792507204, "grad_norm": 1.348511793859822, "learning_rate": 1.3194385396702406e-06, "loss": 0.5104360580444336, "step": 3703 }, { "epoch": 0.8539481268011527, "grad_norm": 1.2498427531651606, "learning_rate": 1.319077318320325e-06, "loss": 0.46101412177085876, "step": 3704 }, { "epoch": 0.854178674351585, "grad_norm": 1.3605615250308858, "learning_rate": 1.3187160506117947e-06, "loss": 0.47033798694610596, "step": 3705 }, { "epoch": 0.8544092219020173, "grad_norm": 1.4110469176729128, "learning_rate": 1.3183547365971376e-06, "loss": 0.3864026367664337, "step": 3706 }, { "epoch": 0.8546397694524496, "grad_norm": 1.6313869920317292, "learning_rate": 1.3179933763288487e-06, "loss": 0.4850136339664459, "step": 3707 }, { "epoch": 0.8548703170028819, "grad_norm": 1.2823453751359075, "learning_rate": 1.3176319698594307e-06, "loss": 0.5593098998069763, "step": 3708 }, { "epoch": 0.8551008645533141, "grad_norm": 1.3952870507706057, "learning_rate": 1.3172705172413916e-06, "loss": 0.4883347749710083, "step": 3709 }, { "epoch": 0.8553314121037464, "grad_norm": 1.446792702585452, "learning_rate": 1.3169090185272466e-06, "loss": 0.5428842306137085, "step": 3710 }, { "epoch": 0.8555619596541787, "grad_norm": 1.304386174447863, "learning_rate": 1.3165474737695184e-06, "loss": 0.4476752281188965, "step": 3711 }, { "epoch": 0.855792507204611, "grad_norm": 1.1763400531562, "learning_rate": 1.3161858830207349e-06, "loss": 0.47227632999420166, "step": 3712 }, { "epoch": 0.8560230547550433, "grad_norm": 1.7211720189167297, "learning_rate": 1.315824246333432e-06, "loss": 0.4643186330795288, "step": 3713 }, { "epoch": 0.8562536023054755, "grad_norm": 1.2821644108063668, "learning_rate": 1.3154625637601515e-06, "loss": 0.4912930130958557, "step": 3714 }, { "epoch": 0.8564841498559078, "grad_norm": 1.1876618070425506, "learning_rate": 1.3151008353534424e-06, "loss": 0.4728042483329773, "step": 3715 }, { "epoch": 0.8567146974063401, "grad_norm": 1.5209773177568184, "learning_rate": 1.3147390611658592e-06, "loss": 0.5035809874534607, "step": 3716 }, { "epoch": 0.8569452449567724, "grad_norm": 1.4748136352873917, "learning_rate": 1.314377241249965e-06, "loss": 0.5977092981338501, "step": 3717 }, { "epoch": 0.8571757925072047, "grad_norm": 1.4766041364380091, "learning_rate": 1.3140153756583284e-06, "loss": 0.46641305088996887, "step": 3718 }, { "epoch": 0.8574063400576369, "grad_norm": 1.3233412264919238, "learning_rate": 1.313653464443524e-06, "loss": 0.4669331908226013, "step": 3719 }, { "epoch": 0.8576368876080691, "grad_norm": 1.5334191310376337, "learning_rate": 1.3132915076581336e-06, "loss": 0.5101985335350037, "step": 3720 }, { "epoch": 0.8578674351585014, "grad_norm": 1.873618677639978, "learning_rate": 1.3129295053547469e-06, "loss": 0.5713244080543518, "step": 3721 }, { "epoch": 0.8580979827089337, "grad_norm": 1.5421267681706392, "learning_rate": 1.3125674575859585e-06, "loss": 0.5616867542266846, "step": 3722 }, { "epoch": 0.858328530259366, "grad_norm": 1.3186271635095725, "learning_rate": 1.3122053644043698e-06, "loss": 0.39379388093948364, "step": 3723 }, { "epoch": 0.8585590778097982, "grad_norm": 1.4015318528894827, "learning_rate": 1.3118432258625894e-06, "loss": 0.49586233496665955, "step": 3724 }, { "epoch": 0.8587896253602305, "grad_norm": 1.719866212131585, "learning_rate": 1.3114810420132323e-06, "loss": 0.6034448742866516, "step": 3725 }, { "epoch": 0.8590201729106628, "grad_norm": 1.1170956956255402, "learning_rate": 1.3111188129089202e-06, "loss": 0.502906322479248, "step": 3726 }, { "epoch": 0.8592507204610951, "grad_norm": 1.2761243448032986, "learning_rate": 1.310756538602281e-06, "loss": 0.46334415674209595, "step": 3727 }, { "epoch": 0.8594812680115274, "grad_norm": 1.2932841158550064, "learning_rate": 1.3103942191459496e-06, "loss": 0.4570457339286804, "step": 3728 }, { "epoch": 0.8597118155619596, "grad_norm": 1.6385975818385836, "learning_rate": 1.310031854592567e-06, "loss": 0.4651646018028259, "step": 3729 }, { "epoch": 0.8599423631123919, "grad_norm": 1.5487322132450227, "learning_rate": 1.3096694449947812e-06, "loss": 0.5101544260978699, "step": 3730 }, { "epoch": 0.8601729106628242, "grad_norm": 1.4132801397133001, "learning_rate": 1.3093069904052467e-06, "loss": 0.5086382627487183, "step": 3731 }, { "epoch": 0.8604034582132565, "grad_norm": 1.383321089409367, "learning_rate": 1.3089444908766235e-06, "loss": 0.4948277473449707, "step": 3732 }, { "epoch": 0.8606340057636888, "grad_norm": 1.6523780148313876, "learning_rate": 1.3085819464615802e-06, "loss": 0.501600444316864, "step": 3733 }, { "epoch": 0.860864553314121, "grad_norm": 1.2778182602877666, "learning_rate": 1.3082193572127902e-06, "loss": 0.42970049381256104, "step": 3734 }, { "epoch": 0.8610951008645533, "grad_norm": 1.3310363616338574, "learning_rate": 1.3078567231829337e-06, "loss": 0.5060045719146729, "step": 3735 }, { "epoch": 0.8613256484149856, "grad_norm": 1.3440313324926134, "learning_rate": 1.307494044424698e-06, "loss": 0.467104434967041, "step": 3736 }, { "epoch": 0.8615561959654179, "grad_norm": 1.4120038847759988, "learning_rate": 1.3071313209907766e-06, "loss": 0.5574711561203003, "step": 3737 }, { "epoch": 0.8617867435158502, "grad_norm": 1.2363227218724966, "learning_rate": 1.3067685529338693e-06, "loss": 0.46039044857025146, "step": 3738 }, { "epoch": 0.8620172910662824, "grad_norm": 1.1503513795065752, "learning_rate": 1.3064057403066822e-06, "loss": 0.4340815544128418, "step": 3739 }, { "epoch": 0.8622478386167147, "grad_norm": 1.5523166737624763, "learning_rate": 1.3060428831619287e-06, "loss": 0.43785548210144043, "step": 3740 }, { "epoch": 0.862478386167147, "grad_norm": 1.5457298304008735, "learning_rate": 1.305679981552328e-06, "loss": 0.5572985410690308, "step": 3741 }, { "epoch": 0.8627089337175793, "grad_norm": 1.3596059279946906, "learning_rate": 1.3053170355306057e-06, "loss": 0.5185844898223877, "step": 3742 }, { "epoch": 0.8629394812680116, "grad_norm": 1.3503829945064458, "learning_rate": 1.3049540451494942e-06, "loss": 0.46550601720809937, "step": 3743 }, { "epoch": 0.8631700288184438, "grad_norm": 1.4222091653360487, "learning_rate": 1.3045910104617327e-06, "loss": 0.5469560623168945, "step": 3744 }, { "epoch": 0.8634005763688761, "grad_norm": 1.3405137838232928, "learning_rate": 1.3042279315200657e-06, "loss": 0.4938455820083618, "step": 3745 }, { "epoch": 0.8636311239193084, "grad_norm": 1.1926594999161433, "learning_rate": 1.303864808377245e-06, "loss": 0.39218518137931824, "step": 3746 }, { "epoch": 0.8638616714697407, "grad_norm": 1.2206192808907754, "learning_rate": 1.3035016410860291e-06, "loss": 0.5242647528648376, "step": 3747 }, { "epoch": 0.8640922190201729, "grad_norm": 1.964856674282453, "learning_rate": 1.3031384296991817e-06, "loss": 0.5636630058288574, "step": 3748 }, { "epoch": 0.8643227665706051, "grad_norm": 1.2276813313800654, "learning_rate": 1.302775174269474e-06, "loss": 0.423178493976593, "step": 3749 }, { "epoch": 0.8645533141210374, "grad_norm": 1.3659960313257418, "learning_rate": 1.3024118748496832e-06, "loss": 0.5249595046043396, "step": 3750 }, { "epoch": 0.8647838616714697, "grad_norm": 1.5492622232744115, "learning_rate": 1.302048531492593e-06, "loss": 0.46585753560066223, "step": 3751 }, { "epoch": 0.865014409221902, "grad_norm": 1.1870998359636753, "learning_rate": 1.301685144250993e-06, "loss": 0.5256447792053223, "step": 3752 }, { "epoch": 0.8652449567723343, "grad_norm": 1.407334330978888, "learning_rate": 1.3013217131776806e-06, "loss": 0.4594920575618744, "step": 3753 }, { "epoch": 0.8654755043227665, "grad_norm": 1.5476606038444285, "learning_rate": 1.3009582383254572e-06, "loss": 0.47202157974243164, "step": 3754 }, { "epoch": 0.8657060518731988, "grad_norm": 1.7434821206800877, "learning_rate": 1.3005947197471327e-06, "loss": 0.5966329574584961, "step": 3755 }, { "epoch": 0.8659365994236311, "grad_norm": 1.366790612468713, "learning_rate": 1.3002311574955226e-06, "loss": 0.4795987904071808, "step": 3756 }, { "epoch": 0.8661671469740634, "grad_norm": 1.5108288163436319, "learning_rate": 1.2998675516234486e-06, "loss": 0.47260379791259766, "step": 3757 }, { "epoch": 0.8663976945244957, "grad_norm": 1.25511672370241, "learning_rate": 1.2995039021837386e-06, "loss": 0.4049740433692932, "step": 3758 }, { "epoch": 0.866628242074928, "grad_norm": 1.5146922201872883, "learning_rate": 1.2991402092292277e-06, "loss": 0.45385992527008057, "step": 3759 }, { "epoch": 0.8668587896253602, "grad_norm": 1.4053212622212365, "learning_rate": 1.298776472812756e-06, "loss": 0.5093865990638733, "step": 3760 }, { "epoch": 0.8670893371757925, "grad_norm": 1.3453447275976842, "learning_rate": 1.2984126929871705e-06, "loss": 0.5042203664779663, "step": 3761 }, { "epoch": 0.8673198847262248, "grad_norm": 1.388435149050926, "learning_rate": 1.2980488698053257e-06, "loss": 0.5201801061630249, "step": 3762 }, { "epoch": 0.8675504322766571, "grad_norm": 1.6553043139643804, "learning_rate": 1.2976850033200804e-06, "loss": 0.43103480339050293, "step": 3763 }, { "epoch": 0.8677809798270893, "grad_norm": 1.2908585366398646, "learning_rate": 1.297321093584301e-06, "loss": 0.49498647451400757, "step": 3764 }, { "epoch": 0.8680115273775216, "grad_norm": 1.4922160261358184, "learning_rate": 1.2969571406508593e-06, "loss": 0.47413933277130127, "step": 3765 }, { "epoch": 0.8682420749279539, "grad_norm": 1.1674121222476106, "learning_rate": 1.2965931445726346e-06, "loss": 0.4971849322319031, "step": 3766 }, { "epoch": 0.8684726224783862, "grad_norm": 1.3900091493712237, "learning_rate": 1.296229105402511e-06, "loss": 0.5081756114959717, "step": 3767 }, { "epoch": 0.8687031700288185, "grad_norm": 1.5541801599858096, "learning_rate": 1.2958650231933806e-06, "loss": 0.3967844247817993, "step": 3768 }, { "epoch": 0.8689337175792508, "grad_norm": 1.6064453941430812, "learning_rate": 1.2955008979981397e-06, "loss": 0.49939191341400146, "step": 3769 }, { "epoch": 0.869164265129683, "grad_norm": 1.1874512006657068, "learning_rate": 1.2951367298696924e-06, "loss": 0.4890215992927551, "step": 3770 }, { "epoch": 0.8693948126801153, "grad_norm": 1.2271551559366067, "learning_rate": 1.2947725188609486e-06, "loss": 0.41297200322151184, "step": 3771 }, { "epoch": 0.8696253602305476, "grad_norm": 1.353945692705735, "learning_rate": 1.2944082650248245e-06, "loss": 0.4626082181930542, "step": 3772 }, { "epoch": 0.8698559077809799, "grad_norm": 1.6592151979939496, "learning_rate": 1.2940439684142417e-06, "loss": 0.39542341232299805, "step": 3773 }, { "epoch": 0.8700864553314122, "grad_norm": 1.4526755191413092, "learning_rate": 1.2936796290821293e-06, "loss": 0.5241938829421997, "step": 3774 }, { "epoch": 0.8703170028818443, "grad_norm": 1.2509273324342485, "learning_rate": 1.2933152470814222e-06, "loss": 0.4975101947784424, "step": 3775 }, { "epoch": 0.8705475504322766, "grad_norm": 1.3336305279154814, "learning_rate": 1.2929508224650608e-06, "loss": 0.39954787492752075, "step": 3776 }, { "epoch": 0.8707780979827089, "grad_norm": 1.2742484885253946, "learning_rate": 1.292586355285992e-06, "loss": 0.4336436986923218, "step": 3777 }, { "epoch": 0.8710086455331412, "grad_norm": 1.4481272254111919, "learning_rate": 1.2922218455971701e-06, "loss": 0.5100188255310059, "step": 3778 }, { "epoch": 0.8712391930835734, "grad_norm": 1.3266075653142237, "learning_rate": 1.2918572934515537e-06, "loss": 0.4073595106601715, "step": 3779 }, { "epoch": 0.8714697406340057, "grad_norm": 1.4097190424033288, "learning_rate": 1.2914926989021087e-06, "loss": 0.5112053155899048, "step": 3780 }, { "epoch": 0.871700288184438, "grad_norm": 1.3139163731344454, "learning_rate": 1.2911280620018069e-06, "loss": 0.4807807207107544, "step": 3781 }, { "epoch": 0.8719308357348703, "grad_norm": 1.284068633811954, "learning_rate": 1.2907633828036263e-06, "loss": 0.4051937460899353, "step": 3782 }, { "epoch": 0.8721613832853026, "grad_norm": 1.651292594956854, "learning_rate": 1.2903986613605507e-06, "loss": 0.5108781456947327, "step": 3783 }, { "epoch": 0.8723919308357349, "grad_norm": 1.7675344941826705, "learning_rate": 1.2900338977255707e-06, "loss": 0.44521909952163696, "step": 3784 }, { "epoch": 0.8726224783861671, "grad_norm": 1.5809881129891559, "learning_rate": 1.2896690919516825e-06, "loss": 0.6048822999000549, "step": 3785 }, { "epoch": 0.8728530259365994, "grad_norm": 1.270946294003379, "learning_rate": 1.2893042440918887e-06, "loss": 0.4376435875892639, "step": 3786 }, { "epoch": 0.8730835734870317, "grad_norm": 1.4781872894193138, "learning_rate": 1.2889393541991975e-06, "loss": 0.5266781449317932, "step": 3787 }, { "epoch": 0.873314121037464, "grad_norm": 1.3112914328188243, "learning_rate": 1.2885744223266244e-06, "loss": 0.4608879089355469, "step": 3788 }, { "epoch": 0.8735446685878963, "grad_norm": 1.3985767247612233, "learning_rate": 1.2882094485271893e-06, "loss": 0.6094374656677246, "step": 3789 }, { "epoch": 0.8737752161383285, "grad_norm": 1.440566163110992, "learning_rate": 1.2878444328539198e-06, "loss": 0.48071056604385376, "step": 3790 }, { "epoch": 0.8740057636887608, "grad_norm": 1.2344258963818233, "learning_rate": 1.2874793753598486e-06, "loss": 0.5486899614334106, "step": 3791 }, { "epoch": 0.8742363112391931, "grad_norm": 1.3009848139904665, "learning_rate": 1.2871142760980145e-06, "loss": 0.4325149655342102, "step": 3792 }, { "epoch": 0.8744668587896254, "grad_norm": 1.3339799042834972, "learning_rate": 1.2867491351214628e-06, "loss": 0.4406658113002777, "step": 3793 }, { "epoch": 0.8746974063400577, "grad_norm": 1.5722454397734178, "learning_rate": 1.2863839524832453e-06, "loss": 0.4819502830505371, "step": 3794 }, { "epoch": 0.8749279538904899, "grad_norm": 1.4646927994024876, "learning_rate": 1.2860187282364183e-06, "loss": 0.40368038415908813, "step": 3795 }, { "epoch": 0.8751585014409222, "grad_norm": 1.5390691991987933, "learning_rate": 1.2856534624340455e-06, "loss": 0.5182099342346191, "step": 3796 }, { "epoch": 0.8753890489913545, "grad_norm": 1.418305639642291, "learning_rate": 1.2852881551291964e-06, "loss": 0.5145357847213745, "step": 3797 }, { "epoch": 0.8756195965417868, "grad_norm": 1.417053237022279, "learning_rate": 1.2849228063749458e-06, "loss": 0.47494733333587646, "step": 3798 }, { "epoch": 0.8758501440922191, "grad_norm": 1.5460781370684025, "learning_rate": 1.2845574162243757e-06, "loss": 0.44350409507751465, "step": 3799 }, { "epoch": 0.8760806916426513, "grad_norm": 1.333493282792236, "learning_rate": 1.2841919847305732e-06, "loss": 0.48289233446121216, "step": 3800 }, { "epoch": 0.8763112391930836, "grad_norm": 1.508439142440734, "learning_rate": 1.2838265119466316e-06, "loss": 0.5384439826011658, "step": 3801 }, { "epoch": 0.8765417867435159, "grad_norm": 1.710922327336333, "learning_rate": 1.2834609979256503e-06, "loss": 0.5271417498588562, "step": 3802 }, { "epoch": 0.8767723342939481, "grad_norm": 1.4659820833456867, "learning_rate": 1.283095442720735e-06, "loss": 0.4707641005516052, "step": 3803 }, { "epoch": 0.8770028818443804, "grad_norm": 1.614777501143066, "learning_rate": 1.2827298463849969e-06, "loss": 0.49682319164276123, "step": 3804 }, { "epoch": 0.8772334293948126, "grad_norm": 1.7144253113224919, "learning_rate": 1.2823642089715531e-06, "loss": 0.5188574194908142, "step": 3805 }, { "epoch": 0.8774639769452449, "grad_norm": 1.4545861368963406, "learning_rate": 1.281998530533527e-06, "loss": 0.47443845868110657, "step": 3806 }, { "epoch": 0.8776945244956772, "grad_norm": 1.5019289075727473, "learning_rate": 1.2816328111240485e-06, "loss": 0.4482235908508301, "step": 3807 }, { "epoch": 0.8779250720461095, "grad_norm": 1.3960327819894567, "learning_rate": 1.2812670507962519e-06, "loss": 0.46176886558532715, "step": 3808 }, { "epoch": 0.8781556195965418, "grad_norm": 1.6027449038682064, "learning_rate": 1.280901249603279e-06, "loss": 0.6133224964141846, "step": 3809 }, { "epoch": 0.878386167146974, "grad_norm": 1.5464017426728758, "learning_rate": 1.2805354075982764e-06, "loss": 0.5276920795440674, "step": 3810 }, { "epoch": 0.8786167146974063, "grad_norm": 1.3703428637588941, "learning_rate": 1.2801695248343976e-06, "loss": 0.45439988374710083, "step": 3811 }, { "epoch": 0.8788472622478386, "grad_norm": 1.7551101397428777, "learning_rate": 1.2798036013648015e-06, "loss": 0.581672191619873, "step": 3812 }, { "epoch": 0.8790778097982709, "grad_norm": 1.4957389309962374, "learning_rate": 1.279437637242653e-06, "loss": 0.47344762086868286, "step": 3813 }, { "epoch": 0.8793083573487032, "grad_norm": 1.1882502138510669, "learning_rate": 1.2790716325211222e-06, "loss": 0.4005385935306549, "step": 3814 }, { "epoch": 0.8795389048991354, "grad_norm": 1.3488455900541656, "learning_rate": 1.2787055872533865e-06, "loss": 0.4977230429649353, "step": 3815 }, { "epoch": 0.8797694524495677, "grad_norm": 1.26803029784315, "learning_rate": 1.2783395014926286e-06, "loss": 0.4099036455154419, "step": 3816 }, { "epoch": 0.88, "grad_norm": 1.6253478470481617, "learning_rate": 1.2779733752920366e-06, "loss": 0.522419810295105, "step": 3817 }, { "epoch": 0.8802305475504323, "grad_norm": 1.3877455165242094, "learning_rate": 1.2776072087048044e-06, "loss": 0.6116030812263489, "step": 3818 }, { "epoch": 0.8804610951008646, "grad_norm": 1.4523883351381868, "learning_rate": 1.2772410017841331e-06, "loss": 0.4522816836833954, "step": 3819 }, { "epoch": 0.8806916426512968, "grad_norm": 1.3925368174753927, "learning_rate": 1.276874754583228e-06, "loss": 0.5046182870864868, "step": 3820 }, { "epoch": 0.8809221902017291, "grad_norm": 1.5514128300497483, "learning_rate": 1.2765084671553017e-06, "loss": 0.42840707302093506, "step": 3821 }, { "epoch": 0.8811527377521614, "grad_norm": 1.38028870575887, "learning_rate": 1.2761421395535714e-06, "loss": 0.4718896448612213, "step": 3822 }, { "epoch": 0.8813832853025937, "grad_norm": 1.418382492806823, "learning_rate": 1.275775771831261e-06, "loss": 0.42692285776138306, "step": 3823 }, { "epoch": 0.881613832853026, "grad_norm": 1.227035211124785, "learning_rate": 1.2754093640415997e-06, "loss": 0.5146567821502686, "step": 3824 }, { "epoch": 0.8818443804034583, "grad_norm": 1.3655704393095225, "learning_rate": 1.2750429162378226e-06, "loss": 0.519682765007019, "step": 3825 }, { "epoch": 0.8820749279538905, "grad_norm": 1.3597639009077442, "learning_rate": 1.2746764284731713e-06, "loss": 0.4810214042663574, "step": 3826 }, { "epoch": 0.8823054755043228, "grad_norm": 1.673464893922515, "learning_rate": 1.2743099008008922e-06, "loss": 0.4206662178039551, "step": 3827 }, { "epoch": 0.8825360230547551, "grad_norm": 1.5098444961762751, "learning_rate": 1.2739433332742379e-06, "loss": 0.5288581252098083, "step": 3828 }, { "epoch": 0.8827665706051874, "grad_norm": 1.275143856060944, "learning_rate": 1.2735767259464676e-06, "loss": 0.4625706672668457, "step": 3829 }, { "epoch": 0.8829971181556195, "grad_norm": 1.4159230358123176, "learning_rate": 1.2732100788708446e-06, "loss": 0.5021357536315918, "step": 3830 }, { "epoch": 0.8832276657060518, "grad_norm": 1.883786032569818, "learning_rate": 1.2728433921006391e-06, "loss": 0.5525540113449097, "step": 3831 }, { "epoch": 0.8834582132564841, "grad_norm": 1.4405227933488816, "learning_rate": 1.2724766656891276e-06, "loss": 0.5107265710830688, "step": 3832 }, { "epoch": 0.8836887608069164, "grad_norm": 1.186656316362735, "learning_rate": 1.272109899689591e-06, "loss": 0.42621174454689026, "step": 3833 }, { "epoch": 0.8839193083573487, "grad_norm": 1.535378140499116, "learning_rate": 1.2717430941553163e-06, "loss": 0.48517292737960815, "step": 3834 }, { "epoch": 0.884149855907781, "grad_norm": 1.3570442439657298, "learning_rate": 1.2713762491395971e-06, "loss": 0.4864816665649414, "step": 3835 }, { "epoch": 0.8843804034582132, "grad_norm": 1.4310044115767937, "learning_rate": 1.271009364695732e-06, "loss": 0.490509957075119, "step": 3836 }, { "epoch": 0.8846109510086455, "grad_norm": 1.403739279698503, "learning_rate": 1.2706424408770255e-06, "loss": 0.5011172294616699, "step": 3837 }, { "epoch": 0.8848414985590778, "grad_norm": 1.3811542981316953, "learning_rate": 1.2702754777367876e-06, "loss": 0.4880932569503784, "step": 3838 }, { "epoch": 0.8850720461095101, "grad_norm": 1.3425275690337104, "learning_rate": 1.2699084753283344e-06, "loss": 0.46879449486732483, "step": 3839 }, { "epoch": 0.8853025936599423, "grad_norm": 1.2274518723165866, "learning_rate": 1.2695414337049878e-06, "loss": 0.4026890993118286, "step": 3840 }, { "epoch": 0.8855331412103746, "grad_norm": 1.4213648008939022, "learning_rate": 1.2691743529200747e-06, "loss": 0.41836002469062805, "step": 3841 }, { "epoch": 0.8857636887608069, "grad_norm": 1.2462460722293722, "learning_rate": 1.2688072330269281e-06, "loss": 0.45401957631111145, "step": 3842 }, { "epoch": 0.8859942363112392, "grad_norm": 1.5928757723481226, "learning_rate": 1.2684400740788872e-06, "loss": 0.5391957759857178, "step": 3843 }, { "epoch": 0.8862247838616715, "grad_norm": 1.4989767229376667, "learning_rate": 1.268072876129296e-06, "loss": 0.4797601103782654, "step": 3844 }, { "epoch": 0.8864553314121038, "grad_norm": 1.936441003907435, "learning_rate": 1.2677056392315049e-06, "loss": 0.6728458404541016, "step": 3845 }, { "epoch": 0.886685878962536, "grad_norm": 1.5685808545128517, "learning_rate": 1.2673383634388686e-06, "loss": 0.4655725657939911, "step": 3846 }, { "epoch": 0.8869164265129683, "grad_norm": 1.5246140905217858, "learning_rate": 1.2669710488047494e-06, "loss": 0.5772045850753784, "step": 3847 }, { "epoch": 0.8871469740634006, "grad_norm": 1.4036690205681275, "learning_rate": 1.2666036953825146e-06, "loss": 0.46486788988113403, "step": 3848 }, { "epoch": 0.8873775216138329, "grad_norm": 1.4488820168717385, "learning_rate": 1.2662363032255356e-06, "loss": 0.5417192578315735, "step": 3849 }, { "epoch": 0.8876080691642652, "grad_norm": 1.5289545467931411, "learning_rate": 1.2658688723871917e-06, "loss": 0.4081997871398926, "step": 3850 }, { "epoch": 0.8878386167146974, "grad_norm": 1.4923350478825586, "learning_rate": 1.2655014029208665e-06, "loss": 0.48307013511657715, "step": 3851 }, { "epoch": 0.8880691642651297, "grad_norm": 1.3266351841026254, "learning_rate": 1.265133894879949e-06, "loss": 0.4746254086494446, "step": 3852 }, { "epoch": 0.888299711815562, "grad_norm": 1.2468994625849585, "learning_rate": 1.264766348317835e-06, "loss": 0.4846293032169342, "step": 3853 }, { "epoch": 0.8885302593659943, "grad_norm": 1.7122070966503224, "learning_rate": 1.2643987632879247e-06, "loss": 0.520226776599884, "step": 3854 }, { "epoch": 0.8887608069164266, "grad_norm": 1.7159698618881642, "learning_rate": 1.2640311398436248e-06, "loss": 0.5007052421569824, "step": 3855 }, { "epoch": 0.8889913544668588, "grad_norm": 1.3347258185539248, "learning_rate": 1.2636634780383468e-06, "loss": 0.48406559228897095, "step": 3856 }, { "epoch": 0.8892219020172911, "grad_norm": 1.490180533752101, "learning_rate": 1.2632957779255085e-06, "loss": 0.5644323229789734, "step": 3857 }, { "epoch": 0.8894524495677233, "grad_norm": 1.3614741031097277, "learning_rate": 1.2629280395585327e-06, "loss": 0.44816267490386963, "step": 3858 }, { "epoch": 0.8896829971181556, "grad_norm": 1.4856304945777632, "learning_rate": 1.2625602629908473e-06, "loss": 0.36427319049835205, "step": 3859 }, { "epoch": 0.8899135446685879, "grad_norm": 1.370413175613297, "learning_rate": 1.2621924482758876e-06, "loss": 0.45405641198158264, "step": 3860 }, { "epoch": 0.8901440922190201, "grad_norm": 1.294524655309081, "learning_rate": 1.2618245954670928e-06, "loss": 0.5117720365524292, "step": 3861 }, { "epoch": 0.8903746397694524, "grad_norm": 1.4254811243009609, "learning_rate": 1.261456704617908e-06, "loss": 0.45705491304397583, "step": 3862 }, { "epoch": 0.8906051873198847, "grad_norm": 1.470712717016214, "learning_rate": 1.2610887757817832e-06, "loss": 0.5323429703712463, "step": 3863 }, { "epoch": 0.890835734870317, "grad_norm": 1.5650874097714547, "learning_rate": 1.2607208090121762e-06, "loss": 0.4185718297958374, "step": 3864 }, { "epoch": 0.8910662824207493, "grad_norm": 1.4040457955050967, "learning_rate": 1.2603528043625474e-06, "loss": 0.5212376117706299, "step": 3865 }, { "epoch": 0.8912968299711815, "grad_norm": 1.4617247018385677, "learning_rate": 1.2599847618863646e-06, "loss": 0.4622758626937866, "step": 3866 }, { "epoch": 0.8915273775216138, "grad_norm": 1.4316141337585846, "learning_rate": 1.2596166816371003e-06, "loss": 0.5874351859092712, "step": 3867 }, { "epoch": 0.8917579250720461, "grad_norm": 1.362612891341034, "learning_rate": 1.2592485636682332e-06, "loss": 0.44203174114227295, "step": 3868 }, { "epoch": 0.8919884726224784, "grad_norm": 1.4820160895628598, "learning_rate": 1.2588804080332467e-06, "loss": 0.47004449367523193, "step": 3869 }, { "epoch": 0.8922190201729107, "grad_norm": 1.259347293997827, "learning_rate": 1.25851221478563e-06, "loss": 0.45785069465637207, "step": 3870 }, { "epoch": 0.8924495677233429, "grad_norm": 1.5013835331703855, "learning_rate": 1.2581439839788775e-06, "loss": 0.5324156284332275, "step": 3871 }, { "epoch": 0.8926801152737752, "grad_norm": 1.2878744861876057, "learning_rate": 1.2577757156664897e-06, "loss": 0.4123559296131134, "step": 3872 }, { "epoch": 0.8929106628242075, "grad_norm": 1.4431943954654778, "learning_rate": 1.257407409901972e-06, "loss": 0.5121662616729736, "step": 3873 }, { "epoch": 0.8931412103746398, "grad_norm": 1.1985178944416017, "learning_rate": 1.2570390667388353e-06, "loss": 0.432760089635849, "step": 3874 }, { "epoch": 0.8933717579250721, "grad_norm": 1.8769468836868954, "learning_rate": 1.256670686230596e-06, "loss": 0.4611935019493103, "step": 3875 }, { "epoch": 0.8936023054755043, "grad_norm": 1.2284902202201224, "learning_rate": 1.2563022684307765e-06, "loss": 0.39416176080703735, "step": 3876 }, { "epoch": 0.8938328530259366, "grad_norm": 1.6072394074056906, "learning_rate": 1.2559338133929033e-06, "loss": 0.55222088098526, "step": 3877 }, { "epoch": 0.8940634005763689, "grad_norm": 1.4877009433690547, "learning_rate": 1.2555653211705098e-06, "loss": 0.4382261037826538, "step": 3878 }, { "epoch": 0.8942939481268012, "grad_norm": 1.5531857609954407, "learning_rate": 1.2551967918171333e-06, "loss": 0.5172265768051147, "step": 3879 }, { "epoch": 0.8945244956772335, "grad_norm": 1.471596820954927, "learning_rate": 1.254828225386318e-06, "loss": 0.4504626989364624, "step": 3880 }, { "epoch": 0.8947550432276657, "grad_norm": 1.2909793693386424, "learning_rate": 1.2544596219316123e-06, "loss": 0.4999982714653015, "step": 3881 }, { "epoch": 0.894985590778098, "grad_norm": 1.4045585194974466, "learning_rate": 1.2540909815065708e-06, "loss": 0.45146411657333374, "step": 3882 }, { "epoch": 0.8952161383285303, "grad_norm": 1.435519527817537, "learning_rate": 1.2537223041647528e-06, "loss": 0.4375761151313782, "step": 3883 }, { "epoch": 0.8954466858789626, "grad_norm": 1.3610683543202822, "learning_rate": 1.2533535899597233e-06, "loss": 0.46330153942108154, "step": 3884 }, { "epoch": 0.8956772334293948, "grad_norm": 1.3915852786240441, "learning_rate": 1.252984838945053e-06, "loss": 0.5131307244300842, "step": 3885 }, { "epoch": 0.895907780979827, "grad_norm": 1.4118750717373771, "learning_rate": 1.2526160511743177e-06, "loss": 0.47963929176330566, "step": 3886 }, { "epoch": 0.8961383285302593, "grad_norm": 1.4162114734869578, "learning_rate": 1.2522472267010973e-06, "loss": 0.5846255421638489, "step": 3887 }, { "epoch": 0.8963688760806916, "grad_norm": 1.2092365440421156, "learning_rate": 1.2518783655789792e-06, "loss": 0.4484374523162842, "step": 3888 }, { "epoch": 0.8965994236311239, "grad_norm": 1.4172661156409336, "learning_rate": 1.2515094678615553e-06, "loss": 0.4247318506240845, "step": 3889 }, { "epoch": 0.8968299711815562, "grad_norm": 1.3343249187068733, "learning_rate": 1.2511405336024216e-06, "loss": 0.42753252387046814, "step": 3890 }, { "epoch": 0.8970605187319884, "grad_norm": 1.45236289391602, "learning_rate": 1.250771562855181e-06, "loss": 0.5063097476959229, "step": 3891 }, { "epoch": 0.8972910662824207, "grad_norm": 1.369985588812653, "learning_rate": 1.2504025556734411e-06, "loss": 0.45213985443115234, "step": 3892 }, { "epoch": 0.897521613832853, "grad_norm": 1.4094391689341048, "learning_rate": 1.250033512110815e-06, "loss": 0.47396397590637207, "step": 3893 }, { "epoch": 0.8977521613832853, "grad_norm": 1.2781299729891589, "learning_rate": 1.2496644322209202e-06, "loss": 0.4215230941772461, "step": 3894 }, { "epoch": 0.8979827089337176, "grad_norm": 1.542998332559841, "learning_rate": 1.249295316057381e-06, "loss": 0.4587506651878357, "step": 3895 }, { "epoch": 0.8982132564841498, "grad_norm": 1.3617158708985202, "learning_rate": 1.2489261636738255e-06, "loss": 0.5030006766319275, "step": 3896 }, { "epoch": 0.8984438040345821, "grad_norm": 1.3744693750148733, "learning_rate": 1.248556975123888e-06, "loss": 0.4707000255584717, "step": 3897 }, { "epoch": 0.8986743515850144, "grad_norm": 1.3687086395939398, "learning_rate": 1.2481877504612075e-06, "loss": 0.5365906953811646, "step": 3898 }, { "epoch": 0.8989048991354467, "grad_norm": 1.3698885636510183, "learning_rate": 1.2478184897394293e-06, "loss": 0.5381914377212524, "step": 3899 }, { "epoch": 0.899135446685879, "grad_norm": 1.3132088828358914, "learning_rate": 1.2474491930122017e-06, "loss": 0.4934455156326294, "step": 3900 }, { "epoch": 0.8993659942363113, "grad_norm": 1.330529717680055, "learning_rate": 1.2470798603331811e-06, "loss": 0.44426971673965454, "step": 3901 }, { "epoch": 0.8995965417867435, "grad_norm": 1.8380237109293664, "learning_rate": 1.2467104917560272e-06, "loss": 0.5359126329421997, "step": 3902 }, { "epoch": 0.8998270893371758, "grad_norm": 1.5365113795650323, "learning_rate": 1.2463410873344051e-06, "loss": 0.5049535632133484, "step": 3903 }, { "epoch": 0.9000576368876081, "grad_norm": 1.228286736353076, "learning_rate": 1.2459716471219854e-06, "loss": 0.47197240591049194, "step": 3904 }, { "epoch": 0.9002881844380404, "grad_norm": 1.347891577851575, "learning_rate": 1.2456021711724444e-06, "loss": 0.5094764828681946, "step": 3905 }, { "epoch": 0.9005187319884727, "grad_norm": 1.614513274766241, "learning_rate": 1.2452326595394632e-06, "loss": 0.48482170701026917, "step": 3906 }, { "epoch": 0.9007492795389049, "grad_norm": 1.338580704201997, "learning_rate": 1.2448631122767273e-06, "loss": 0.4780023694038391, "step": 3907 }, { "epoch": 0.9009798270893372, "grad_norm": 1.3763662563145533, "learning_rate": 1.2444935294379284e-06, "loss": 0.45455271005630493, "step": 3908 }, { "epoch": 0.9012103746397695, "grad_norm": 1.6202257206479407, "learning_rate": 1.244123911076763e-06, "loss": 0.5535339117050171, "step": 3909 }, { "epoch": 0.9014409221902018, "grad_norm": 1.6798210240312226, "learning_rate": 1.2437542572469332e-06, "loss": 0.49363064765930176, "step": 3910 }, { "epoch": 0.9016714697406341, "grad_norm": 1.247430657266991, "learning_rate": 1.2433845680021455e-06, "loss": 0.4634913206100464, "step": 3911 }, { "epoch": 0.9019020172910662, "grad_norm": 1.6355430895028145, "learning_rate": 1.243014843396112e-06, "loss": 0.5658323764801025, "step": 3912 }, { "epoch": 0.9021325648414985, "grad_norm": 1.483540600317653, "learning_rate": 1.2426450834825497e-06, "loss": 0.3981133699417114, "step": 3913 }, { "epoch": 0.9023631123919308, "grad_norm": 1.3171638139619342, "learning_rate": 1.2422752883151808e-06, "loss": 0.4588771462440491, "step": 3914 }, { "epoch": 0.9025936599423631, "grad_norm": 1.462458369282688, "learning_rate": 1.2419054579477332e-06, "loss": 0.4534187912940979, "step": 3915 }, { "epoch": 0.9028242074927953, "grad_norm": 1.323212121221774, "learning_rate": 1.2415355924339386e-06, "loss": 0.505224347114563, "step": 3916 }, { "epoch": 0.9030547550432276, "grad_norm": 1.631385539397761, "learning_rate": 1.2411656918275353e-06, "loss": 0.5698803663253784, "step": 3917 }, { "epoch": 0.9032853025936599, "grad_norm": 1.344522271913295, "learning_rate": 1.240795756182266e-06, "loss": 0.5096467137336731, "step": 3918 }, { "epoch": 0.9035158501440922, "grad_norm": 1.2092359473357193, "learning_rate": 1.240425785551878e-06, "loss": 0.4160349667072296, "step": 3919 }, { "epoch": 0.9037463976945245, "grad_norm": 1.514137735832078, "learning_rate": 1.2400557799901243e-06, "loss": 0.5298006534576416, "step": 3920 }, { "epoch": 0.9039769452449568, "grad_norm": 1.5005579252676167, "learning_rate": 1.2396857395507635e-06, "loss": 0.5257192850112915, "step": 3921 }, { "epoch": 0.904207492795389, "grad_norm": 1.3593778581503964, "learning_rate": 1.2393156642875577e-06, "loss": 0.4265173673629761, "step": 3922 }, { "epoch": 0.9044380403458213, "grad_norm": 1.5580046287622509, "learning_rate": 1.2389455542542757e-06, "loss": 0.5779361724853516, "step": 3923 }, { "epoch": 0.9046685878962536, "grad_norm": 1.355408621755586, "learning_rate": 1.2385754095046903e-06, "loss": 0.5402140617370605, "step": 3924 }, { "epoch": 0.9048991354466859, "grad_norm": 1.6556822355333962, "learning_rate": 1.2382052300925796e-06, "loss": 0.5139520764350891, "step": 3925 }, { "epoch": 0.9051296829971182, "grad_norm": 1.4744237883021032, "learning_rate": 1.237835016071727e-06, "loss": 0.5268999338150024, "step": 3926 }, { "epoch": 0.9053602305475504, "grad_norm": 1.1940680219551778, "learning_rate": 1.237464767495921e-06, "loss": 0.4186581075191498, "step": 3927 }, { "epoch": 0.9055907780979827, "grad_norm": 1.5404784760334345, "learning_rate": 1.2370944844189542e-06, "loss": 0.42975491285324097, "step": 3928 }, { "epoch": 0.905821325648415, "grad_norm": 1.5264713593301018, "learning_rate": 1.2367241668946256e-06, "loss": 0.5356771945953369, "step": 3929 }, { "epoch": 0.9060518731988473, "grad_norm": 1.4340272166135701, "learning_rate": 1.2363538149767381e-06, "loss": 0.4588644504547119, "step": 3930 }, { "epoch": 0.9062824207492796, "grad_norm": 1.3559415891530742, "learning_rate": 1.2359834287190998e-06, "loss": 0.4316065013408661, "step": 3931 }, { "epoch": 0.9065129682997118, "grad_norm": 1.8926645973507907, "learning_rate": 1.2356130081755241e-06, "loss": 0.5879726409912109, "step": 3932 }, { "epoch": 0.9067435158501441, "grad_norm": 1.3952030980221555, "learning_rate": 1.2352425533998298e-06, "loss": 0.5054802894592285, "step": 3933 }, { "epoch": 0.9069740634005764, "grad_norm": 1.6013693344378155, "learning_rate": 1.2348720644458395e-06, "loss": 0.47121232748031616, "step": 3934 }, { "epoch": 0.9072046109510087, "grad_norm": 1.3838461965950462, "learning_rate": 1.2345015413673815e-06, "loss": 0.45739877223968506, "step": 3935 }, { "epoch": 0.907435158501441, "grad_norm": 1.5155985877456657, "learning_rate": 1.2341309842182888e-06, "loss": 0.5117859840393066, "step": 3936 }, { "epoch": 0.9076657060518732, "grad_norm": 1.5290945554256112, "learning_rate": 1.2337603930524e-06, "loss": 0.5041125416755676, "step": 3937 }, { "epoch": 0.9078962536023055, "grad_norm": 1.3831423035498782, "learning_rate": 1.2333897679235577e-06, "loss": 0.44464683532714844, "step": 3938 }, { "epoch": 0.9081268011527378, "grad_norm": 1.8217468846182516, "learning_rate": 1.23301910888561e-06, "loss": 0.5294856429100037, "step": 3939 }, { "epoch": 0.90835734870317, "grad_norm": 1.6866871720820709, "learning_rate": 1.2326484159924099e-06, "loss": 0.5219826102256775, "step": 3940 }, { "epoch": 0.9085878962536023, "grad_norm": 1.3129059510531136, "learning_rate": 1.232277689297815e-06, "loss": 0.47138598561286926, "step": 3941 }, { "epoch": 0.9088184438040345, "grad_norm": 1.5478253692531152, "learning_rate": 1.2319069288556885e-06, "loss": 0.49205005168914795, "step": 3942 }, { "epoch": 0.9090489913544668, "grad_norm": 1.2297274416962674, "learning_rate": 1.2315361347198975e-06, "loss": 0.49453967809677124, "step": 3943 }, { "epoch": 0.9092795389048991, "grad_norm": 1.668590073547552, "learning_rate": 1.231165306944315e-06, "loss": 0.4912596344947815, "step": 3944 }, { "epoch": 0.9095100864553314, "grad_norm": 1.5738731090807616, "learning_rate": 1.2307944455828175e-06, "loss": 0.5746258497238159, "step": 3945 }, { "epoch": 0.9097406340057637, "grad_norm": 1.2718340719170307, "learning_rate": 1.2304235506892887e-06, "loss": 0.45906275510787964, "step": 3946 }, { "epoch": 0.9099711815561959, "grad_norm": 1.3569754433422505, "learning_rate": 1.2300526223176149e-06, "loss": 0.5177116394042969, "step": 3947 }, { "epoch": 0.9102017291066282, "grad_norm": 1.2901403946054024, "learning_rate": 1.2296816605216883e-06, "loss": 0.45164844393730164, "step": 3948 }, { "epoch": 0.9104322766570605, "grad_norm": 1.5561420901310485, "learning_rate": 1.2293106653554054e-06, "loss": 0.5441509485244751, "step": 3949 }, { "epoch": 0.9106628242074928, "grad_norm": 1.6569407840335213, "learning_rate": 1.2289396368726684e-06, "loss": 0.4755045771598816, "step": 3950 }, { "epoch": 0.9108933717579251, "grad_norm": 1.3697502981572085, "learning_rate": 1.228568575127384e-06, "loss": 0.5330208539962769, "step": 3951 }, { "epoch": 0.9111239193083573, "grad_norm": 1.4523221492491385, "learning_rate": 1.2281974801734632e-06, "loss": 0.5201523303985596, "step": 3952 }, { "epoch": 0.9113544668587896, "grad_norm": 1.498143694444682, "learning_rate": 1.2278263520648224e-06, "loss": 0.5279841423034668, "step": 3953 }, { "epoch": 0.9115850144092219, "grad_norm": 1.5841756767163864, "learning_rate": 1.2274551908553828e-06, "loss": 0.4712948203086853, "step": 3954 }, { "epoch": 0.9118155619596542, "grad_norm": 1.432279815006193, "learning_rate": 1.2270839965990698e-06, "loss": 0.513918399810791, "step": 3955 }, { "epoch": 0.9120461095100865, "grad_norm": 1.2571574370365426, "learning_rate": 1.226712769349815e-06, "loss": 0.3688378930091858, "step": 3956 }, { "epoch": 0.9122766570605187, "grad_norm": 1.46428205883802, "learning_rate": 1.2263415091615528e-06, "loss": 0.5429365634918213, "step": 3957 }, { "epoch": 0.912507204610951, "grad_norm": 1.3697810351726807, "learning_rate": 1.225970216088224e-06, "loss": 0.5164231061935425, "step": 3958 }, { "epoch": 0.9127377521613833, "grad_norm": 1.5119297834458385, "learning_rate": 1.2255988901837737e-06, "loss": 0.48215287923812866, "step": 3959 }, { "epoch": 0.9129682997118156, "grad_norm": 1.3695567208291244, "learning_rate": 1.2252275315021514e-06, "loss": 0.47684353590011597, "step": 3960 }, { "epoch": 0.9131988472622479, "grad_norm": 1.2862730862475011, "learning_rate": 1.2248561400973114e-06, "loss": 0.498749315738678, "step": 3961 }, { "epoch": 0.9134293948126802, "grad_norm": 1.532172345538078, "learning_rate": 1.224484716023214e-06, "loss": 0.6198222637176514, "step": 3962 }, { "epoch": 0.9136599423631124, "grad_norm": 1.2104233560563726, "learning_rate": 1.2241132593338224e-06, "loss": 0.45880454778671265, "step": 3963 }, { "epoch": 0.9138904899135447, "grad_norm": 1.1252676206788053, "learning_rate": 1.2237417700831055e-06, "loss": 0.5064502954483032, "step": 3964 }, { "epoch": 0.914121037463977, "grad_norm": 1.3166703099327564, "learning_rate": 1.223370248325037e-06, "loss": 0.3796413540840149, "step": 3965 }, { "epoch": 0.9143515850144093, "grad_norm": 1.824971760714775, "learning_rate": 1.222998694113595e-06, "loss": 0.520294725894928, "step": 3966 }, { "epoch": 0.9145821325648414, "grad_norm": 1.2643227228990135, "learning_rate": 1.2226271075027627e-06, "loss": 0.4708889424800873, "step": 3967 }, { "epoch": 0.9148126801152737, "grad_norm": 1.4788536918637403, "learning_rate": 1.2222554885465277e-06, "loss": 0.4617408215999603, "step": 3968 }, { "epoch": 0.915043227665706, "grad_norm": 1.6953855273421374, "learning_rate": 1.2218838372988822e-06, "loss": 0.47521114349365234, "step": 3969 }, { "epoch": 0.9152737752161383, "grad_norm": 1.3617165473153408, "learning_rate": 1.2215121538138233e-06, "loss": 0.47553038597106934, "step": 3970 }, { "epoch": 0.9155043227665706, "grad_norm": 1.371230823038315, "learning_rate": 1.2211404381453527e-06, "loss": 0.4502425789833069, "step": 3971 }, { "epoch": 0.9157348703170028, "grad_norm": 1.3152534809096919, "learning_rate": 1.2207686903474774e-06, "loss": 0.4478566646575928, "step": 3972 }, { "epoch": 0.9159654178674351, "grad_norm": 1.2964432112303936, "learning_rate": 1.2203969104742074e-06, "loss": 0.48119837045669556, "step": 3973 }, { "epoch": 0.9161959654178674, "grad_norm": 1.3052580206449855, "learning_rate": 1.2200250985795592e-06, "loss": 0.4442863166332245, "step": 3974 }, { "epoch": 0.9164265129682997, "grad_norm": 1.4544787661984238, "learning_rate": 1.2196532547175534e-06, "loss": 0.4326072037220001, "step": 3975 }, { "epoch": 0.916657060518732, "grad_norm": 1.3112341607173732, "learning_rate": 1.2192813789422146e-06, "loss": 0.4617190361022949, "step": 3976 }, { "epoch": 0.9168876080691643, "grad_norm": 1.4746796735453866, "learning_rate": 1.218909471307572e-06, "loss": 0.5099925994873047, "step": 3977 }, { "epoch": 0.9171181556195965, "grad_norm": 1.3055542454695868, "learning_rate": 1.2185375318676611e-06, "loss": 0.4432462453842163, "step": 3978 }, { "epoch": 0.9173487031700288, "grad_norm": 1.5060839944143631, "learning_rate": 1.21816556067652e-06, "loss": 0.46479105949401855, "step": 3979 }, { "epoch": 0.9175792507204611, "grad_norm": 1.6825755102995348, "learning_rate": 1.2177935577881926e-06, "loss": 0.5027565360069275, "step": 3980 }, { "epoch": 0.9178097982708934, "grad_norm": 1.323342879993209, "learning_rate": 1.217421523256727e-06, "loss": 0.5658999085426331, "step": 3981 }, { "epoch": 0.9180403458213257, "grad_norm": 1.3697743466973002, "learning_rate": 1.2170494571361753e-06, "loss": 0.503716230392456, "step": 3982 }, { "epoch": 0.9182708933717579, "grad_norm": 1.5398117661982387, "learning_rate": 1.216677359480596e-06, "loss": 0.4836317300796509, "step": 3983 }, { "epoch": 0.9185014409221902, "grad_norm": 1.2670647547552656, "learning_rate": 1.2163052303440502e-06, "loss": 0.4926934838294983, "step": 3984 }, { "epoch": 0.9187319884726225, "grad_norm": 1.2680144774170914, "learning_rate": 1.2159330697806047e-06, "loss": 0.4263528287410736, "step": 3985 }, { "epoch": 0.9189625360230548, "grad_norm": 1.5304339584560502, "learning_rate": 1.21556087784433e-06, "loss": 0.465299129486084, "step": 3986 }, { "epoch": 0.9191930835734871, "grad_norm": 1.2798602314469432, "learning_rate": 1.2151886545893028e-06, "loss": 0.4937010407447815, "step": 3987 }, { "epoch": 0.9194236311239193, "grad_norm": 1.3820049835108226, "learning_rate": 1.2148164000696024e-06, "loss": 0.4511493146419525, "step": 3988 }, { "epoch": 0.9196541786743516, "grad_norm": 1.4217188148151207, "learning_rate": 1.2144441143393139e-06, "loss": 0.47422119975090027, "step": 3989 }, { "epoch": 0.9198847262247839, "grad_norm": 1.9912580790846817, "learning_rate": 1.2140717974525263e-06, "loss": 0.48183363676071167, "step": 3990 }, { "epoch": 0.9201152737752162, "grad_norm": 1.2871112603155308, "learning_rate": 1.2136994494633333e-06, "loss": 0.5231306552886963, "step": 3991 }, { "epoch": 0.9203458213256485, "grad_norm": 1.2840416788228404, "learning_rate": 1.2133270704258336e-06, "loss": 0.5468826293945312, "step": 3992 }, { "epoch": 0.9205763688760807, "grad_norm": 1.4782261085318393, "learning_rate": 1.2129546603941297e-06, "loss": 0.5139263868331909, "step": 3993 }, { "epoch": 0.920806916426513, "grad_norm": 1.5250421500545963, "learning_rate": 1.2125822194223287e-06, "loss": 0.44919341802597046, "step": 3994 }, { "epoch": 0.9210374639769452, "grad_norm": 1.4583297611315478, "learning_rate": 1.212209747564543e-06, "loss": 0.423626184463501, "step": 3995 }, { "epoch": 0.9212680115273775, "grad_norm": 1.5952268021154143, "learning_rate": 1.2118372448748885e-06, "loss": 0.6177996397018433, "step": 3996 }, { "epoch": 0.9214985590778098, "grad_norm": 1.2688969514305488, "learning_rate": 1.2114647114074861e-06, "loss": 0.49319177865982056, "step": 3997 }, { "epoch": 0.921729106628242, "grad_norm": 1.4475287935451044, "learning_rate": 1.2110921472164607e-06, "loss": 0.4969290494918823, "step": 3998 }, { "epoch": 0.9219596541786743, "grad_norm": 1.586155388040351, "learning_rate": 1.2107195523559424e-06, "loss": 0.48943108320236206, "step": 3999 }, { "epoch": 0.9221902017291066, "grad_norm": 2.0995701290892352, "learning_rate": 1.2103469268800654e-06, "loss": 0.484131783246994, "step": 4000 }, { "epoch": 0.9224207492795389, "grad_norm": 1.435307334693519, "learning_rate": 1.209974270842968e-06, "loss": 0.507775068283081, "step": 4001 }, { "epoch": 0.9226512968299712, "grad_norm": 1.981545451286363, "learning_rate": 1.209601584298793e-06, "loss": 0.4608537554740906, "step": 4002 }, { "epoch": 0.9228818443804034, "grad_norm": 1.3751735682066653, "learning_rate": 1.2092288673016886e-06, "loss": 0.4954952001571655, "step": 4003 }, { "epoch": 0.9231123919308357, "grad_norm": 1.4081328435502214, "learning_rate": 1.2088561199058066e-06, "loss": 0.5661128163337708, "step": 4004 }, { "epoch": 0.923342939481268, "grad_norm": 1.3545501865482017, "learning_rate": 1.2084833421653024e-06, "loss": 0.5523176193237305, "step": 4005 }, { "epoch": 0.9235734870317003, "grad_norm": 1.6845686246960427, "learning_rate": 1.2081105341343377e-06, "loss": 0.5262937545776367, "step": 4006 }, { "epoch": 0.9238040345821326, "grad_norm": 1.556031475297922, "learning_rate": 1.2077376958670775e-06, "loss": 0.5856887102127075, "step": 4007 }, { "epoch": 0.9240345821325648, "grad_norm": 1.1475269427941932, "learning_rate": 1.207364827417691e-06, "loss": 0.4267149567604065, "step": 4008 }, { "epoch": 0.9242651296829971, "grad_norm": 1.418841731188768, "learning_rate": 1.2069919288403522e-06, "loss": 0.5250238180160522, "step": 4009 }, { "epoch": 0.9244956772334294, "grad_norm": 1.3666576970448538, "learning_rate": 1.2066190001892396e-06, "loss": 0.5198487043380737, "step": 4010 }, { "epoch": 0.9247262247838617, "grad_norm": 1.3830301229983213, "learning_rate": 1.2062460415185354e-06, "loss": 0.47743192315101624, "step": 4011 }, { "epoch": 0.924956772334294, "grad_norm": 1.4075712291395979, "learning_rate": 1.2058730528824273e-06, "loss": 0.4159664213657379, "step": 4012 }, { "epoch": 0.9251873198847262, "grad_norm": 1.4264938029777896, "learning_rate": 1.205500034335106e-06, "loss": 0.5445389151573181, "step": 4013 }, { "epoch": 0.9254178674351585, "grad_norm": 1.3580321349174527, "learning_rate": 1.2051269859307673e-06, "loss": 0.42347821593284607, "step": 4014 }, { "epoch": 0.9256484149855908, "grad_norm": 1.52434699932796, "learning_rate": 1.2047539077236116e-06, "loss": 0.5288738012313843, "step": 4015 }, { "epoch": 0.9258789625360231, "grad_norm": 1.3100008419238212, "learning_rate": 1.2043807997678436e-06, "loss": 0.4394925832748413, "step": 4016 }, { "epoch": 0.9261095100864554, "grad_norm": 1.565361448615934, "learning_rate": 1.2040076621176712e-06, "loss": 0.5634762048721313, "step": 4017 }, { "epoch": 0.9263400576368876, "grad_norm": 1.4510569270002092, "learning_rate": 1.2036344948273074e-06, "loss": 0.4297552704811096, "step": 4018 }, { "epoch": 0.9265706051873199, "grad_norm": 1.466340432854967, "learning_rate": 1.2032612979509701e-06, "loss": 0.48720866441726685, "step": 4019 }, { "epoch": 0.9268011527377522, "grad_norm": 1.6032805373268917, "learning_rate": 1.2028880715428808e-06, "loss": 0.49143970012664795, "step": 4020 }, { "epoch": 0.9270317002881845, "grad_norm": 1.5228540386105625, "learning_rate": 1.2025148156572654e-06, "loss": 0.5053589940071106, "step": 4021 }, { "epoch": 0.9272622478386167, "grad_norm": 1.5279572652998323, "learning_rate": 1.2021415303483537e-06, "loss": 0.5389736890792847, "step": 4022 }, { "epoch": 0.9274927953890489, "grad_norm": 1.4328489921033678, "learning_rate": 1.2017682156703805e-06, "loss": 0.46251988410949707, "step": 4023 }, { "epoch": 0.9277233429394812, "grad_norm": 1.6247276881495054, "learning_rate": 1.2013948716775848e-06, "loss": 0.5429900288581848, "step": 4024 }, { "epoch": 0.9279538904899135, "grad_norm": 1.3971561705543436, "learning_rate": 1.201021498424209e-06, "loss": 0.45329588651657104, "step": 4025 }, { "epoch": 0.9281844380403458, "grad_norm": 1.6206327315291964, "learning_rate": 1.200648095964501e-06, "loss": 0.4960551857948303, "step": 4026 }, { "epoch": 0.9284149855907781, "grad_norm": 1.246015007964809, "learning_rate": 1.2002746643527117e-06, "loss": 0.38909661769866943, "step": 4027 }, { "epoch": 0.9286455331412103, "grad_norm": 1.4656512778264124, "learning_rate": 1.199901203643097e-06, "loss": 0.4518124461174011, "step": 4028 }, { "epoch": 0.9288760806916426, "grad_norm": 1.4345267950370788, "learning_rate": 1.1995277138899173e-06, "loss": 0.4774012267589569, "step": 4029 }, { "epoch": 0.9291066282420749, "grad_norm": 1.8466433066122896, "learning_rate": 1.1991541951474363e-06, "loss": 0.48850271105766296, "step": 4030 }, { "epoch": 0.9293371757925072, "grad_norm": 1.405365935647213, "learning_rate": 1.1987806474699223e-06, "loss": 0.46933990716934204, "step": 4031 }, { "epoch": 0.9295677233429395, "grad_norm": 1.4191229128527267, "learning_rate": 1.1984070709116483e-06, "loss": 0.4658009707927704, "step": 4032 }, { "epoch": 0.9297982708933717, "grad_norm": 1.7378069570906458, "learning_rate": 1.1980334655268907e-06, "loss": 0.4660704731941223, "step": 4033 }, { "epoch": 0.930028818443804, "grad_norm": 1.238361666141835, "learning_rate": 1.197659831369931e-06, "loss": 0.44522547721862793, "step": 4034 }, { "epoch": 0.9302593659942363, "grad_norm": 1.4315651225888224, "learning_rate": 1.1972861684950535e-06, "loss": 0.5145701169967651, "step": 4035 }, { "epoch": 0.9304899135446686, "grad_norm": 1.407197717514797, "learning_rate": 1.1969124769565485e-06, "loss": 0.5040268898010254, "step": 4036 }, { "epoch": 0.9307204610951009, "grad_norm": 1.4531027015491225, "learning_rate": 1.1965387568087086e-06, "loss": 0.46897944808006287, "step": 4037 }, { "epoch": 0.9309510086455332, "grad_norm": 1.761132451430314, "learning_rate": 1.1961650081058325e-06, "loss": 0.6305633783340454, "step": 4038 }, { "epoch": 0.9311815561959654, "grad_norm": 1.4142062841342746, "learning_rate": 1.1957912309022207e-06, "loss": 0.4110141396522522, "step": 4039 }, { "epoch": 0.9314121037463977, "grad_norm": 1.5122107480089741, "learning_rate": 1.1954174252521802e-06, "loss": 0.41255778074264526, "step": 4040 }, { "epoch": 0.93164265129683, "grad_norm": 1.7308884755783311, "learning_rate": 1.195043591210021e-06, "loss": 0.5402833223342896, "step": 4041 }, { "epoch": 0.9318731988472623, "grad_norm": 1.4904148344208392, "learning_rate": 1.1946697288300564e-06, "loss": 0.5163707733154297, "step": 4042 }, { "epoch": 0.9321037463976946, "grad_norm": 1.3642989361766586, "learning_rate": 1.1942958381666055e-06, "loss": 0.5049669742584229, "step": 4043 }, { "epoch": 0.9323342939481268, "grad_norm": 1.512669161605088, "learning_rate": 1.1939219192739907e-06, "loss": 0.554564356803894, "step": 4044 }, { "epoch": 0.9325648414985591, "grad_norm": 1.1648703874817907, "learning_rate": 1.1935479722065382e-06, "loss": 0.46329542994499207, "step": 4045 }, { "epoch": 0.9327953890489914, "grad_norm": 1.380659317735328, "learning_rate": 1.1931739970185787e-06, "loss": 0.49481451511383057, "step": 4046 }, { "epoch": 0.9330259365994237, "grad_norm": 1.4592472514709256, "learning_rate": 1.1927999937644474e-06, "loss": 0.5521741509437561, "step": 4047 }, { "epoch": 0.933256484149856, "grad_norm": 1.6574635084552074, "learning_rate": 1.1924259624984822e-06, "loss": 0.4786396324634552, "step": 4048 }, { "epoch": 0.9334870317002882, "grad_norm": 1.553655262105641, "learning_rate": 1.1920519032750266e-06, "loss": 0.5652132630348206, "step": 4049 }, { "epoch": 0.9337175792507204, "grad_norm": 1.2329472545327154, "learning_rate": 1.1916778161484272e-06, "loss": 0.3993264436721802, "step": 4050 }, { "epoch": 0.9339481268011527, "grad_norm": 1.4293564999356616, "learning_rate": 1.1913037011730354e-06, "loss": 0.504034161567688, "step": 4051 }, { "epoch": 0.934178674351585, "grad_norm": 1.625339530227479, "learning_rate": 1.1909295584032054e-06, "loss": 0.40499967336654663, "step": 4052 }, { "epoch": 0.9344092219020173, "grad_norm": 2.035223953420821, "learning_rate": 1.1905553878932972e-06, "loss": 0.5135485529899597, "step": 4053 }, { "epoch": 0.9346397694524495, "grad_norm": 1.3278296978190696, "learning_rate": 1.1901811896976736e-06, "loss": 0.4639396369457245, "step": 4054 }, { "epoch": 0.9348703170028818, "grad_norm": 1.4183899547937462, "learning_rate": 1.1898069638707011e-06, "loss": 0.4949880838394165, "step": 4055 }, { "epoch": 0.9351008645533141, "grad_norm": 1.3028741958089267, "learning_rate": 1.1894327104667514e-06, "loss": 0.4456254243850708, "step": 4056 }, { "epoch": 0.9353314121037464, "grad_norm": 1.3778790723085623, "learning_rate": 1.1890584295402e-06, "loss": 0.466667115688324, "step": 4057 }, { "epoch": 0.9355619596541787, "grad_norm": 1.2920684649771048, "learning_rate": 1.188684121145425e-06, "loss": 0.4107869863510132, "step": 4058 }, { "epoch": 0.9357925072046109, "grad_norm": 1.4103631270546013, "learning_rate": 1.1883097853368099e-06, "loss": 0.49275922775268555, "step": 4059 }, { "epoch": 0.9360230547550432, "grad_norm": 1.4435221132968696, "learning_rate": 1.1879354221687424e-06, "loss": 0.5320898294448853, "step": 4060 }, { "epoch": 0.9362536023054755, "grad_norm": 1.4843366418758739, "learning_rate": 1.1875610316956127e-06, "loss": 0.48131513595581055, "step": 4061 }, { "epoch": 0.9364841498559078, "grad_norm": 1.2831782058716195, "learning_rate": 1.1871866139718164e-06, "loss": 0.42021268606185913, "step": 4062 }, { "epoch": 0.9367146974063401, "grad_norm": 1.42206888115783, "learning_rate": 1.1868121690517524e-06, "loss": 0.4804028868675232, "step": 4063 }, { "epoch": 0.9369452449567723, "grad_norm": 1.357971019510146, "learning_rate": 1.1864376969898238e-06, "loss": 0.4920700192451477, "step": 4064 }, { "epoch": 0.9371757925072046, "grad_norm": 1.6009723063265457, "learning_rate": 1.1860631978404369e-06, "loss": 0.4702967703342438, "step": 4065 }, { "epoch": 0.9374063400576369, "grad_norm": 1.4828010491515886, "learning_rate": 1.1856886716580032e-06, "loss": 0.41766661405563354, "step": 4066 }, { "epoch": 0.9376368876080692, "grad_norm": 1.5572182245704769, "learning_rate": 1.1853141184969372e-06, "loss": 0.43666309118270874, "step": 4067 }, { "epoch": 0.9378674351585015, "grad_norm": 1.402637303057815, "learning_rate": 1.1849395384116573e-06, "loss": 0.5529673099517822, "step": 4068 }, { "epoch": 0.9380979827089337, "grad_norm": 1.737132310570529, "learning_rate": 1.1845649314565866e-06, "loss": 0.4728221297264099, "step": 4069 }, { "epoch": 0.938328530259366, "grad_norm": 1.464406585741721, "learning_rate": 1.1841902976861516e-06, "loss": 0.4904414117336273, "step": 4070 }, { "epoch": 0.9385590778097983, "grad_norm": 1.3210429529267262, "learning_rate": 1.183815637154782e-06, "loss": 0.4774867594242096, "step": 4071 }, { "epoch": 0.9387896253602306, "grad_norm": 1.3081579109295827, "learning_rate": 1.1834409499169125e-06, "loss": 0.4732491970062256, "step": 4072 }, { "epoch": 0.9390201729106629, "grad_norm": 1.6141272556528279, "learning_rate": 1.1830662360269817e-06, "loss": 0.43487685918807983, "step": 4073 }, { "epoch": 0.9392507204610951, "grad_norm": 1.5910806235991546, "learning_rate": 1.1826914955394307e-06, "loss": 0.42725875973701477, "step": 4074 }, { "epoch": 0.9394812680115274, "grad_norm": 1.6421401366141255, "learning_rate": 1.1823167285087062e-06, "loss": 0.5362523198127747, "step": 4075 }, { "epoch": 0.9397118155619597, "grad_norm": 1.542102971714458, "learning_rate": 1.1819419349892573e-06, "loss": 0.5027948617935181, "step": 4076 }, { "epoch": 0.9399423631123919, "grad_norm": 1.5260318111513538, "learning_rate": 1.1815671150355384e-06, "loss": 0.5027229189872742, "step": 4077 }, { "epoch": 0.9401729106628242, "grad_norm": 1.239313772323881, "learning_rate": 1.1811922687020064e-06, "loss": 0.43377360701560974, "step": 4078 }, { "epoch": 0.9404034582132564, "grad_norm": 1.9123276757663772, "learning_rate": 1.1808173960431224e-06, "loss": 0.5102596282958984, "step": 4079 }, { "epoch": 0.9406340057636887, "grad_norm": 1.4541636061516516, "learning_rate": 1.180442497113352e-06, "loss": 0.4819416105747223, "step": 4080 }, { "epoch": 0.940864553314121, "grad_norm": 2.2469220393286258, "learning_rate": 1.1800675719671637e-06, "loss": 0.5308434963226318, "step": 4081 }, { "epoch": 0.9410951008645533, "grad_norm": 1.2322230699289647, "learning_rate": 1.1796926206590306e-06, "loss": 0.3851321339607239, "step": 4082 }, { "epoch": 0.9413256484149856, "grad_norm": 1.2904563965230043, "learning_rate": 1.1793176432434292e-06, "loss": 0.4828380048274994, "step": 4083 }, { "epoch": 0.9415561959654178, "grad_norm": 1.3580681901731781, "learning_rate": 1.1789426397748393e-06, "loss": 0.43688488006591797, "step": 4084 }, { "epoch": 0.9417867435158501, "grad_norm": 1.2794460643131822, "learning_rate": 1.1785676103077453e-06, "loss": 0.5123027563095093, "step": 4085 }, { "epoch": 0.9420172910662824, "grad_norm": 1.6221515546011513, "learning_rate": 1.1781925548966353e-06, "loss": 0.5363332033157349, "step": 4086 }, { "epoch": 0.9422478386167147, "grad_norm": 1.3527370433567738, "learning_rate": 1.1778174735960007e-06, "loss": 0.4401679039001465, "step": 4087 }, { "epoch": 0.942478386167147, "grad_norm": 1.6235340600095725, "learning_rate": 1.1774423664603369e-06, "loss": 0.48118162155151367, "step": 4088 }, { "epoch": 0.9427089337175792, "grad_norm": 1.644375200451457, "learning_rate": 1.1770672335441435e-06, "loss": 0.4561998248100281, "step": 4089 }, { "epoch": 0.9429394812680115, "grad_norm": 1.3516589509901438, "learning_rate": 1.1766920749019227e-06, "loss": 0.5039771199226379, "step": 4090 }, { "epoch": 0.9431700288184438, "grad_norm": 1.289993272064434, "learning_rate": 1.1763168905881817e-06, "loss": 0.3734637498855591, "step": 4091 }, { "epoch": 0.9434005763688761, "grad_norm": 1.5072018021814846, "learning_rate": 1.1759416806574306e-06, "loss": 0.5121400952339172, "step": 4092 }, { "epoch": 0.9436311239193084, "grad_norm": 1.3533206831206654, "learning_rate": 1.1755664451641835e-06, "loss": 0.47663238644599915, "step": 4093 }, { "epoch": 0.9438616714697406, "grad_norm": 1.3920201979100617, "learning_rate": 1.1751911841629585e-06, "loss": 0.5083819627761841, "step": 4094 }, { "epoch": 0.9440922190201729, "grad_norm": 1.7300221057773006, "learning_rate": 1.1748158977082768e-06, "loss": 0.5221288800239563, "step": 4095 }, { "epoch": 0.9443227665706052, "grad_norm": 1.5179395653322596, "learning_rate": 1.1744405858546635e-06, "loss": 0.5698456168174744, "step": 4096 }, { "epoch": 0.9445533141210375, "grad_norm": 1.8027766062759447, "learning_rate": 1.1740652486566482e-06, "loss": 0.49966350197792053, "step": 4097 }, { "epoch": 0.9447838616714698, "grad_norm": 1.6204478112236895, "learning_rate": 1.1736898861687629e-06, "loss": 0.5273131728172302, "step": 4098 }, { "epoch": 0.945014409221902, "grad_norm": 1.3525958396034103, "learning_rate": 1.1733144984455441e-06, "loss": 0.515166699886322, "step": 4099 }, { "epoch": 0.9452449567723343, "grad_norm": 1.3435489819348247, "learning_rate": 1.172939085541531e-06, "loss": 0.448274165391922, "step": 4100 }, { "epoch": 0.9454755043227666, "grad_norm": 1.3997663854784892, "learning_rate": 1.1725636475112686e-06, "loss": 0.3678357005119324, "step": 4101 }, { "epoch": 0.9457060518731989, "grad_norm": 1.4128979216296833, "learning_rate": 1.1721881844093031e-06, "loss": 0.4662661552429199, "step": 4102 }, { "epoch": 0.9459365994236312, "grad_norm": 1.3930942066092626, "learning_rate": 1.1718126962901855e-06, "loss": 0.49695825576782227, "step": 4103 }, { "epoch": 0.9461671469740635, "grad_norm": 1.5090047516434983, "learning_rate": 1.1714371832084707e-06, "loss": 0.4838793873786926, "step": 4104 }, { "epoch": 0.9463976945244956, "grad_norm": 1.5195967659155012, "learning_rate": 1.1710616452187164e-06, "loss": 0.5055621266365051, "step": 4105 }, { "epoch": 0.9466282420749279, "grad_norm": 1.555064483270701, "learning_rate": 1.1706860823754844e-06, "loss": 0.44428279995918274, "step": 4106 }, { "epoch": 0.9468587896253602, "grad_norm": 1.2604559743030346, "learning_rate": 1.1703104947333406e-06, "loss": 0.4698944687843323, "step": 4107 }, { "epoch": 0.9470893371757925, "grad_norm": 1.705702471223194, "learning_rate": 1.1699348823468533e-06, "loss": 0.43742209672927856, "step": 4108 }, { "epoch": 0.9473198847262247, "grad_norm": 1.3409195488655554, "learning_rate": 1.1695592452705958e-06, "loss": 0.4127580523490906, "step": 4109 }, { "epoch": 0.947550432276657, "grad_norm": 1.2372268437863259, "learning_rate": 1.1691835835591434e-06, "loss": 0.4625944495201111, "step": 4110 }, { "epoch": 0.9477809798270893, "grad_norm": 1.5550413151368645, "learning_rate": 1.1688078972670768e-06, "loss": 0.5464745759963989, "step": 4111 }, { "epoch": 0.9480115273775216, "grad_norm": 1.4902811228226185, "learning_rate": 1.1684321864489783e-06, "loss": 0.5199334025382996, "step": 4112 }, { "epoch": 0.9482420749279539, "grad_norm": 1.6755931463282898, "learning_rate": 1.1680564511594355e-06, "loss": 0.5582367181777954, "step": 4113 }, { "epoch": 0.9484726224783862, "grad_norm": 1.3068527496246771, "learning_rate": 1.1676806914530388e-06, "loss": 0.4847801923751831, "step": 4114 }, { "epoch": 0.9487031700288184, "grad_norm": 1.4834802377017484, "learning_rate": 1.1673049073843818e-06, "loss": 0.3963737487792969, "step": 4115 }, { "epoch": 0.9489337175792507, "grad_norm": 1.3472234175773388, "learning_rate": 1.166929099008062e-06, "loss": 0.4763021171092987, "step": 4116 }, { "epoch": 0.949164265129683, "grad_norm": 1.334596061000354, "learning_rate": 1.1665532663786808e-06, "loss": 0.48453909158706665, "step": 4117 }, { "epoch": 0.9493948126801153, "grad_norm": 1.5556497216649185, "learning_rate": 1.1661774095508427e-06, "loss": 0.4530877470970154, "step": 4118 }, { "epoch": 0.9496253602305476, "grad_norm": 1.6439439863567922, "learning_rate": 1.1658015285791556e-06, "loss": 0.4559391438961029, "step": 4119 }, { "epoch": 0.9498559077809798, "grad_norm": 1.2742689425270366, "learning_rate": 1.1654256235182313e-06, "loss": 0.4056190550327301, "step": 4120 }, { "epoch": 0.9500864553314121, "grad_norm": 1.4031159218637288, "learning_rate": 1.1650496944226846e-06, "loss": 0.4559820294380188, "step": 4121 }, { "epoch": 0.9503170028818444, "grad_norm": 1.7520001627092183, "learning_rate": 1.1646737413471344e-06, "loss": 0.5147289037704468, "step": 4122 }, { "epoch": 0.9505475504322767, "grad_norm": 1.3527745912371363, "learning_rate": 1.1642977643462026e-06, "loss": 0.4429762363433838, "step": 4123 }, { "epoch": 0.950778097982709, "grad_norm": 1.478213249860498, "learning_rate": 1.1639217634745151e-06, "loss": 0.5090504884719849, "step": 4124 }, { "epoch": 0.9510086455331412, "grad_norm": 1.2839815904961693, "learning_rate": 1.1635457387867001e-06, "loss": 0.4712049067020416, "step": 4125 }, { "epoch": 0.9512391930835735, "grad_norm": 1.422837273489116, "learning_rate": 1.163169690337391e-06, "loss": 0.4788722097873688, "step": 4126 }, { "epoch": 0.9514697406340058, "grad_norm": 1.5690514399087891, "learning_rate": 1.1627936181812232e-06, "loss": 0.5272341370582581, "step": 4127 }, { "epoch": 0.9517002881844381, "grad_norm": 1.3989516399360993, "learning_rate": 1.162417522372836e-06, "loss": 0.476615309715271, "step": 4128 }, { "epoch": 0.9519308357348704, "grad_norm": 1.5696451971842293, "learning_rate": 1.1620414029668723e-06, "loss": 0.5830926895141602, "step": 4129 }, { "epoch": 0.9521613832853026, "grad_norm": 1.5100186493547834, "learning_rate": 1.1616652600179791e-06, "loss": 0.4271087646484375, "step": 4130 }, { "epoch": 0.9523919308357349, "grad_norm": 1.6424034375782277, "learning_rate": 1.161289093580805e-06, "loss": 0.450754851102829, "step": 4131 }, { "epoch": 0.9526224783861671, "grad_norm": 1.6045811944232449, "learning_rate": 1.1609129037100032e-06, "loss": 0.49380576610565186, "step": 4132 }, { "epoch": 0.9528530259365994, "grad_norm": 1.4716623019189798, "learning_rate": 1.1605366904602307e-06, "loss": 0.5741837024688721, "step": 4133 }, { "epoch": 0.9530835734870317, "grad_norm": 1.425182799360816, "learning_rate": 1.1601604538861472e-06, "loss": 0.48263800144195557, "step": 4134 }, { "epoch": 0.9533141210374639, "grad_norm": 1.4462099155347636, "learning_rate": 1.1597841940424155e-06, "loss": 0.48200082778930664, "step": 4135 }, { "epoch": 0.9535446685878962, "grad_norm": 1.5671368311646794, "learning_rate": 1.159407910983703e-06, "loss": 0.46894192695617676, "step": 4136 }, { "epoch": 0.9537752161383285, "grad_norm": 1.8281622353907203, "learning_rate": 1.159031604764679e-06, "loss": 0.4476175308227539, "step": 4137 }, { "epoch": 0.9540057636887608, "grad_norm": 1.6057888319214861, "learning_rate": 1.1586552754400174e-06, "loss": 0.48476332426071167, "step": 4138 }, { "epoch": 0.9542363112391931, "grad_norm": 1.6116514948965757, "learning_rate": 1.1582789230643945e-06, "loss": 0.5018280744552612, "step": 4139 }, { "epoch": 0.9544668587896253, "grad_norm": 1.3285285890301965, "learning_rate": 1.1579025476924911e-06, "loss": 0.3942481279373169, "step": 4140 }, { "epoch": 0.9546974063400576, "grad_norm": 1.4850340648020761, "learning_rate": 1.1575261493789897e-06, "loss": 0.4592955708503723, "step": 4141 }, { "epoch": 0.9549279538904899, "grad_norm": 1.4813387420747084, "learning_rate": 1.1571497281785775e-06, "loss": 0.4634397029876709, "step": 4142 }, { "epoch": 0.9551585014409222, "grad_norm": 1.3089084887646203, "learning_rate": 1.1567732841459452e-06, "loss": 0.47805315256118774, "step": 4143 }, { "epoch": 0.9553890489913545, "grad_norm": 1.5017286949965145, "learning_rate": 1.1563968173357849e-06, "loss": 0.38585370779037476, "step": 4144 }, { "epoch": 0.9556195965417867, "grad_norm": 1.6731042018184503, "learning_rate": 1.1560203278027943e-06, "loss": 0.5024282932281494, "step": 4145 }, { "epoch": 0.955850144092219, "grad_norm": 1.435684415467379, "learning_rate": 1.1556438156016731e-06, "loss": 0.5150408744812012, "step": 4146 }, { "epoch": 0.9560806916426513, "grad_norm": 1.3955310313452336, "learning_rate": 1.1552672807871247e-06, "loss": 0.4688153862953186, "step": 4147 }, { "epoch": 0.9563112391930836, "grad_norm": 1.5113914175565248, "learning_rate": 1.1548907234138555e-06, "loss": 0.5097207427024841, "step": 4148 }, { "epoch": 0.9565417867435159, "grad_norm": 1.4226145218267139, "learning_rate": 1.1545141435365755e-06, "loss": 0.5258755087852478, "step": 4149 }, { "epoch": 0.9567723342939481, "grad_norm": 1.5209010048986649, "learning_rate": 1.1541375412099977e-06, "loss": 0.4533793330192566, "step": 4150 }, { "epoch": 0.9570028818443804, "grad_norm": 1.4320238586920968, "learning_rate": 1.1537609164888386e-06, "loss": 0.4984521269798279, "step": 4151 }, { "epoch": 0.9572334293948127, "grad_norm": 1.3166306039112776, "learning_rate": 1.1533842694278182e-06, "loss": 0.46638578176498413, "step": 4152 }, { "epoch": 0.957463976945245, "grad_norm": 1.339478263676104, "learning_rate": 1.153007600081659e-06, "loss": 0.5370229482650757, "step": 4153 }, { "epoch": 0.9576945244956773, "grad_norm": 1.794431535413407, "learning_rate": 1.1526309085050869e-06, "loss": 0.5898431539535522, "step": 4154 }, { "epoch": 0.9579250720461095, "grad_norm": 1.3662613635895884, "learning_rate": 1.1522541947528317e-06, "loss": 0.4435083270072937, "step": 4155 }, { "epoch": 0.9581556195965418, "grad_norm": 1.220784689368442, "learning_rate": 1.1518774588796263e-06, "loss": 0.4848114848136902, "step": 4156 }, { "epoch": 0.9583861671469741, "grad_norm": 1.5830439158524807, "learning_rate": 1.1515007009402056e-06, "loss": 0.4745471477508545, "step": 4157 }, { "epoch": 0.9586167146974064, "grad_norm": 1.403752142570791, "learning_rate": 1.1511239209893093e-06, "loss": 0.5204712152481079, "step": 4158 }, { "epoch": 0.9588472622478387, "grad_norm": 1.4330320976709396, "learning_rate": 1.1507471190816794e-06, "loss": 0.4748867154121399, "step": 4159 }, { "epoch": 0.9590778097982708, "grad_norm": 1.245034738772996, "learning_rate": 1.150370295272061e-06, "loss": 0.48487526178359985, "step": 4160 }, { "epoch": 0.9593083573487031, "grad_norm": 1.4945063772490301, "learning_rate": 1.1499934496152033e-06, "loss": 0.5005877017974854, "step": 4161 }, { "epoch": 0.9595389048991354, "grad_norm": 1.5714269938752097, "learning_rate": 1.1496165821658577e-06, "loss": 0.46235162019729614, "step": 4162 }, { "epoch": 0.9597694524495677, "grad_norm": 1.4165164470664084, "learning_rate": 1.149239692978779e-06, "loss": 0.4043562412261963, "step": 4163 }, { "epoch": 0.96, "grad_norm": 1.3191665324250768, "learning_rate": 1.1488627821087254e-06, "loss": 0.401640385389328, "step": 4164 }, { "epoch": 0.9602305475504322, "grad_norm": 1.561998336702079, "learning_rate": 1.1484858496104585e-06, "loss": 0.4345995783805847, "step": 4165 }, { "epoch": 0.9604610951008645, "grad_norm": 1.589739589742711, "learning_rate": 1.1481088955387416e-06, "loss": 0.4832232892513275, "step": 4166 }, { "epoch": 0.9606916426512968, "grad_norm": 1.2721195951536524, "learning_rate": 1.1477319199483433e-06, "loss": 0.4984737038612366, "step": 4167 }, { "epoch": 0.9609221902017291, "grad_norm": 1.415582783119745, "learning_rate": 1.1473549228940341e-06, "loss": 0.5141474604606628, "step": 4168 }, { "epoch": 0.9611527377521614, "grad_norm": 1.3918933528998594, "learning_rate": 1.1469779044305874e-06, "loss": 0.46393290162086487, "step": 4169 }, { "epoch": 0.9613832853025936, "grad_norm": 1.5054675813872291, "learning_rate": 1.1466008646127798e-06, "loss": 0.44959738850593567, "step": 4170 }, { "epoch": 0.9616138328530259, "grad_norm": 1.6032904767337253, "learning_rate": 1.146223803495392e-06, "loss": 0.48199838399887085, "step": 4171 }, { "epoch": 0.9618443804034582, "grad_norm": 1.48749369000813, "learning_rate": 1.1458467211332066e-06, "loss": 0.40777313709259033, "step": 4172 }, { "epoch": 0.9620749279538905, "grad_norm": 1.5173779911426573, "learning_rate": 1.1454696175810098e-06, "loss": 0.48183488845825195, "step": 4173 }, { "epoch": 0.9623054755043228, "grad_norm": 1.3962963258973355, "learning_rate": 1.145092492893591e-06, "loss": 0.5259125828742981, "step": 4174 }, { "epoch": 0.962536023054755, "grad_norm": 1.4905678005204057, "learning_rate": 1.1447153471257422e-06, "loss": 0.5235443115234375, "step": 4175 }, { "epoch": 0.9627665706051873, "grad_norm": 1.3760213189461934, "learning_rate": 1.1443381803322588e-06, "loss": 0.5166189670562744, "step": 4176 }, { "epoch": 0.9629971181556196, "grad_norm": 1.5648914687391227, "learning_rate": 1.1439609925679398e-06, "loss": 0.4759864807128906, "step": 4177 }, { "epoch": 0.9632276657060519, "grad_norm": 1.4276484295690437, "learning_rate": 1.1435837838875854e-06, "loss": 0.46920865774154663, "step": 4178 }, { "epoch": 0.9634582132564842, "grad_norm": 1.735858922856514, "learning_rate": 1.1432065543460015e-06, "loss": 0.5612783432006836, "step": 4179 }, { "epoch": 0.9636887608069165, "grad_norm": 1.356617595870239, "learning_rate": 1.1428293039979947e-06, "loss": 0.5130957365036011, "step": 4180 }, { "epoch": 0.9639193083573487, "grad_norm": 1.4484495019945183, "learning_rate": 1.1424520328983762e-06, "loss": 0.49154847860336304, "step": 4181 }, { "epoch": 0.964149855907781, "grad_norm": 1.0824944418575104, "learning_rate": 1.1420747411019588e-06, "loss": 0.3869001567363739, "step": 4182 }, { "epoch": 0.9643804034582133, "grad_norm": 1.4337113954478253, "learning_rate": 1.1416974286635597e-06, "loss": 0.4921383559703827, "step": 4183 }, { "epoch": 0.9646109510086456, "grad_norm": 1.2793039473507872, "learning_rate": 1.1413200956379985e-06, "loss": 0.4888332486152649, "step": 4184 }, { "epoch": 0.9648414985590779, "grad_norm": 1.6971333270171032, "learning_rate": 1.1409427420800971e-06, "loss": 0.5268207788467407, "step": 4185 }, { "epoch": 0.9650720461095101, "grad_norm": 1.4734570779566258, "learning_rate": 1.1405653680446815e-06, "loss": 0.5089021921157837, "step": 4186 }, { "epoch": 0.9653025936599423, "grad_norm": 1.443469066686051, "learning_rate": 1.1401879735865805e-06, "loss": 0.4346863627433777, "step": 4187 }, { "epoch": 0.9655331412103746, "grad_norm": 1.5343802406915352, "learning_rate": 1.139810558760625e-06, "loss": 0.5371497869491577, "step": 4188 }, { "epoch": 0.9657636887608069, "grad_norm": 1.4494296397979445, "learning_rate": 1.1394331236216498e-06, "loss": 0.41937965154647827, "step": 4189 }, { "epoch": 0.9659942363112392, "grad_norm": 1.8240668562947975, "learning_rate": 1.1390556682244925e-06, "loss": 0.5104124546051025, "step": 4190 }, { "epoch": 0.9662247838616714, "grad_norm": 1.531665150197191, "learning_rate": 1.1386781926239927e-06, "loss": 0.4532161355018616, "step": 4191 }, { "epoch": 0.9664553314121037, "grad_norm": 1.64841184801151, "learning_rate": 1.1383006968749947e-06, "loss": 0.49004897475242615, "step": 4192 }, { "epoch": 0.966685878962536, "grad_norm": 1.3461756503958848, "learning_rate": 1.1379231810323438e-06, "loss": 0.5654203295707703, "step": 4193 }, { "epoch": 0.9669164265129683, "grad_norm": 1.448708484238038, "learning_rate": 1.1375456451508894e-06, "loss": 0.4646185636520386, "step": 4194 }, { "epoch": 0.9671469740634006, "grad_norm": 1.4243458240080633, "learning_rate": 1.1371680892854838e-06, "loss": 0.41903266310691833, "step": 4195 }, { "epoch": 0.9673775216138328, "grad_norm": 1.6130252006332106, "learning_rate": 1.1367905134909817e-06, "loss": 0.4434237480163574, "step": 4196 }, { "epoch": 0.9676080691642651, "grad_norm": 1.4819674775092757, "learning_rate": 1.1364129178222413e-06, "loss": 0.48367393016815186, "step": 4197 }, { "epoch": 0.9678386167146974, "grad_norm": 1.2984099347552687, "learning_rate": 1.1360353023341225e-06, "loss": 0.4555853009223938, "step": 4198 }, { "epoch": 0.9680691642651297, "grad_norm": 1.4467890861045218, "learning_rate": 1.1356576670814895e-06, "loss": 0.4551926851272583, "step": 4199 }, { "epoch": 0.968299711815562, "grad_norm": 1.3665755006252467, "learning_rate": 1.135280012119209e-06, "loss": 0.4708002805709839, "step": 4200 }, { "epoch": 0.9685302593659942, "grad_norm": 1.5757117044568527, "learning_rate": 1.13490233750215e-06, "loss": 0.4727732837200165, "step": 4201 }, { "epoch": 0.9687608069164265, "grad_norm": 1.4810458497183263, "learning_rate": 1.1345246432851846e-06, "loss": 0.4873931407928467, "step": 4202 }, { "epoch": 0.9689913544668588, "grad_norm": 1.2376264936878605, "learning_rate": 1.1341469295231879e-06, "loss": 0.5011585354804993, "step": 4203 }, { "epoch": 0.9692219020172911, "grad_norm": 1.550445723931056, "learning_rate": 1.1337691962710377e-06, "loss": 0.5259454250335693, "step": 4204 }, { "epoch": 0.9694524495677234, "grad_norm": 1.475427860284367, "learning_rate": 1.1333914435836152e-06, "loss": 0.4858463406562805, "step": 4205 }, { "epoch": 0.9696829971181556, "grad_norm": 1.5183455256295848, "learning_rate": 1.1330136715158034e-06, "loss": 0.3388391137123108, "step": 4206 }, { "epoch": 0.9699135446685879, "grad_norm": 1.6855547609136186, "learning_rate": 1.132635880122489e-06, "loss": 0.47413328289985657, "step": 4207 }, { "epoch": 0.9701440922190202, "grad_norm": 1.7557933633332012, "learning_rate": 1.1322580694585608e-06, "loss": 0.40621912479400635, "step": 4208 }, { "epoch": 0.9703746397694525, "grad_norm": 1.3208613343258073, "learning_rate": 1.1318802395789111e-06, "loss": 0.49066615104675293, "step": 4209 }, { "epoch": 0.9706051873198848, "grad_norm": 1.3654778936529173, "learning_rate": 1.1315023905384346e-06, "loss": 0.3960093557834625, "step": 4210 }, { "epoch": 0.970835734870317, "grad_norm": 1.827857800057562, "learning_rate": 1.1311245223920285e-06, "loss": 0.49717360734939575, "step": 4211 }, { "epoch": 0.9710662824207493, "grad_norm": 1.4503837255791163, "learning_rate": 1.1307466351945934e-06, "loss": 0.47067588567733765, "step": 4212 }, { "epoch": 0.9712968299711816, "grad_norm": 1.5495719646282835, "learning_rate": 1.1303687290010327e-06, "loss": 0.5154353380203247, "step": 4213 }, { "epoch": 0.9715273775216138, "grad_norm": 1.4525324547254197, "learning_rate": 1.1299908038662512e-06, "loss": 0.4328816831111908, "step": 4214 }, { "epoch": 0.9717579250720461, "grad_norm": 1.536944432116303, "learning_rate": 1.1296128598451584e-06, "loss": 0.43496429920196533, "step": 4215 }, { "epoch": 0.9719884726224783, "grad_norm": 2.4125329863559943, "learning_rate": 1.129234896992666e-06, "loss": 0.4734534025192261, "step": 4216 }, { "epoch": 0.9722190201729106, "grad_norm": 1.2552514953990885, "learning_rate": 1.128856915363687e-06, "loss": 0.46757328510284424, "step": 4217 }, { "epoch": 0.9724495677233429, "grad_norm": 1.4054270022856143, "learning_rate": 1.1284789150131387e-06, "loss": 0.45532792806625366, "step": 4218 }, { "epoch": 0.9726801152737752, "grad_norm": 1.668526261128857, "learning_rate": 1.1281008959959403e-06, "loss": 0.45022863149642944, "step": 4219 }, { "epoch": 0.9729106628242075, "grad_norm": 1.6148563350303777, "learning_rate": 1.1277228583670146e-06, "loss": 0.48149728775024414, "step": 4220 }, { "epoch": 0.9731412103746397, "grad_norm": 1.3290471777296209, "learning_rate": 1.1273448021812863e-06, "loss": 0.48621249198913574, "step": 4221 }, { "epoch": 0.973371757925072, "grad_norm": 1.4123073963135984, "learning_rate": 1.126966727493683e-06, "loss": 0.4082604944705963, "step": 4222 }, { "epoch": 0.9736023054755043, "grad_norm": 1.4706751776044729, "learning_rate": 1.1265886343591348e-06, "loss": 0.46301013231277466, "step": 4223 }, { "epoch": 0.9738328530259366, "grad_norm": 1.742370781706499, "learning_rate": 1.1262105228325751e-06, "loss": 0.5629076957702637, "step": 4224 }, { "epoch": 0.9740634005763689, "grad_norm": 1.6392739760147546, "learning_rate": 1.1258323929689395e-06, "loss": 0.4499921202659607, "step": 4225 }, { "epoch": 0.9742939481268011, "grad_norm": 1.5255138727661328, "learning_rate": 1.125454244823166e-06, "loss": 0.4177723228931427, "step": 4226 }, { "epoch": 0.9745244956772334, "grad_norm": 1.6446007209672973, "learning_rate": 1.1250760784501956e-06, "loss": 0.5151868462562561, "step": 4227 }, { "epoch": 0.9747550432276657, "grad_norm": 1.5356213745996614, "learning_rate": 1.1246978939049724e-06, "loss": 0.461150586605072, "step": 4228 }, { "epoch": 0.974985590778098, "grad_norm": 1.3822737934397757, "learning_rate": 1.1243196912424426e-06, "loss": 0.4268735945224762, "step": 4229 }, { "epoch": 0.9752161383285303, "grad_norm": 1.2292644910946908, "learning_rate": 1.1239414705175547e-06, "loss": 0.41963332891464233, "step": 4230 }, { "epoch": 0.9754466858789625, "grad_norm": 1.66032983081408, "learning_rate": 1.1235632317852604e-06, "loss": 0.5295606255531311, "step": 4231 }, { "epoch": 0.9756772334293948, "grad_norm": 1.4509344335905492, "learning_rate": 1.123184975100514e-06, "loss": 0.494422972202301, "step": 4232 }, { "epoch": 0.9759077809798271, "grad_norm": 1.3661883066888274, "learning_rate": 1.1228067005182722e-06, "loss": 0.4872778058052063, "step": 4233 }, { "epoch": 0.9761383285302594, "grad_norm": 1.4615558807186686, "learning_rate": 1.1224284080934944e-06, "loss": 0.5760270357131958, "step": 4234 }, { "epoch": 0.9763688760806917, "grad_norm": 1.4806023359508287, "learning_rate": 1.1220500978811425e-06, "loss": 0.46235722303390503, "step": 4235 }, { "epoch": 0.976599423631124, "grad_norm": 1.9622654190849842, "learning_rate": 1.1216717699361808e-06, "loss": 0.5219517946243286, "step": 4236 }, { "epoch": 0.9768299711815562, "grad_norm": 1.4798295064212674, "learning_rate": 1.121293424313577e-06, "loss": 0.5163636207580566, "step": 4237 }, { "epoch": 0.9770605187319885, "grad_norm": 1.2412310305077237, "learning_rate": 1.1209150610683005e-06, "loss": 0.4972173571586609, "step": 4238 }, { "epoch": 0.9772910662824208, "grad_norm": 1.7411076626589757, "learning_rate": 1.1205366802553228e-06, "loss": 0.5234299898147583, "step": 4239 }, { "epoch": 0.9775216138328531, "grad_norm": 1.540993516834482, "learning_rate": 1.12015828192962e-06, "loss": 0.5402883291244507, "step": 4240 }, { "epoch": 0.9777521613832854, "grad_norm": 1.6444434333723918, "learning_rate": 1.1197798661461687e-06, "loss": 0.5082305073738098, "step": 4241 }, { "epoch": 0.9779827089337175, "grad_norm": 1.7783542476181406, "learning_rate": 1.1194014329599491e-06, "loss": 0.49959874153137207, "step": 4242 }, { "epoch": 0.9782132564841498, "grad_norm": 1.6275513366341, "learning_rate": 1.1190229824259427e-06, "loss": 0.4957766532897949, "step": 4243 }, { "epoch": 0.9784438040345821, "grad_norm": 1.4378821851714094, "learning_rate": 1.118644514599136e-06, "loss": 0.5325085520744324, "step": 4244 }, { "epoch": 0.9786743515850144, "grad_norm": 1.555991168807003, "learning_rate": 1.118266029534515e-06, "loss": 0.4923267364501953, "step": 4245 }, { "epoch": 0.9789048991354466, "grad_norm": 1.5474363935948672, "learning_rate": 1.1178875272870706e-06, "loss": 0.4696193337440491, "step": 4246 }, { "epoch": 0.9791354466858789, "grad_norm": 1.5847121893215383, "learning_rate": 1.1175090079117944e-06, "loss": 0.4032594561576843, "step": 4247 }, { "epoch": 0.9793659942363112, "grad_norm": 1.236816331938737, "learning_rate": 1.1171304714636817e-06, "loss": 0.41643059253692627, "step": 4248 }, { "epoch": 0.9795965417867435, "grad_norm": 1.3517082904179765, "learning_rate": 1.1167519179977305e-06, "loss": 0.44701042771339417, "step": 4249 }, { "epoch": 0.9798270893371758, "grad_norm": 1.6281507254385772, "learning_rate": 1.11637334756894e-06, "loss": 0.5249829888343811, "step": 4250 }, { "epoch": 0.980057636887608, "grad_norm": 1.395017390893199, "learning_rate": 1.1159947602323126e-06, "loss": 0.44194433093070984, "step": 4251 }, { "epoch": 0.9802881844380403, "grad_norm": 1.617259594653265, "learning_rate": 1.115616156042853e-06, "loss": 0.4252261817455292, "step": 4252 }, { "epoch": 0.9805187319884726, "grad_norm": 1.465003293721735, "learning_rate": 1.115237535055569e-06, "loss": 0.4745545983314514, "step": 4253 }, { "epoch": 0.9807492795389049, "grad_norm": 1.3491129015710808, "learning_rate": 1.11485889732547e-06, "loss": 0.4565798044204712, "step": 4254 }, { "epoch": 0.9809798270893372, "grad_norm": 1.4736219803772463, "learning_rate": 1.1144802429075676e-06, "loss": 0.5221571922302246, "step": 4255 }, { "epoch": 0.9812103746397695, "grad_norm": 1.295490167048315, "learning_rate": 1.114101571856877e-06, "loss": 0.4520608186721802, "step": 4256 }, { "epoch": 0.9814409221902017, "grad_norm": 1.422747328343904, "learning_rate": 1.1137228842284154e-06, "loss": 0.5013391375541687, "step": 4257 }, { "epoch": 0.981671469740634, "grad_norm": 1.3110684205211343, "learning_rate": 1.1133441800772015e-06, "loss": 0.4168761968612671, "step": 4258 }, { "epoch": 0.9819020172910663, "grad_norm": 1.3998776606261594, "learning_rate": 1.1129654594582572e-06, "loss": 0.49070900678634644, "step": 4259 }, { "epoch": 0.9821325648414986, "grad_norm": 1.4441181086061512, "learning_rate": 1.112586722426607e-06, "loss": 0.37209975719451904, "step": 4260 }, { "epoch": 0.9823631123919309, "grad_norm": 1.334384584407338, "learning_rate": 1.112207969037277e-06, "loss": 0.4262358248233795, "step": 4261 }, { "epoch": 0.9825936599423631, "grad_norm": 1.3159343622874629, "learning_rate": 1.1118291993452966e-06, "loss": 0.43453359603881836, "step": 4262 }, { "epoch": 0.9828242074927954, "grad_norm": 1.303119108796015, "learning_rate": 1.1114504134056967e-06, "loss": 0.47000735998153687, "step": 4263 }, { "epoch": 0.9830547550432277, "grad_norm": 1.4041476427264843, "learning_rate": 1.1110716112735113e-06, "loss": 0.5550954341888428, "step": 4264 }, { "epoch": 0.98328530259366, "grad_norm": 1.2951063796923508, "learning_rate": 1.1106927930037759e-06, "loss": 0.5161601901054382, "step": 4265 }, { "epoch": 0.9835158501440923, "grad_norm": 1.269823955589903, "learning_rate": 1.1103139586515295e-06, "loss": 0.49598196148872375, "step": 4266 }, { "epoch": 0.9837463976945245, "grad_norm": 1.2479693796740894, "learning_rate": 1.1099351082718127e-06, "loss": 0.4219573736190796, "step": 4267 }, { "epoch": 0.9839769452449568, "grad_norm": 1.5519518921664053, "learning_rate": 1.1095562419196678e-06, "loss": 0.4803532660007477, "step": 4268 }, { "epoch": 0.984207492795389, "grad_norm": 1.5190785508850868, "learning_rate": 1.1091773596501408e-06, "loss": 0.5081897377967834, "step": 4269 }, { "epoch": 0.9844380403458213, "grad_norm": 1.40111882638203, "learning_rate": 1.1087984615182795e-06, "loss": 0.3814433217048645, "step": 4270 }, { "epoch": 0.9846685878962536, "grad_norm": 1.5915243108834793, "learning_rate": 1.108419547579133e-06, "loss": 0.43628042936325073, "step": 4271 }, { "epoch": 0.9848991354466858, "grad_norm": 1.527363813385048, "learning_rate": 1.1080406178877543e-06, "loss": 0.4480942487716675, "step": 4272 }, { "epoch": 0.9851296829971181, "grad_norm": 1.4235030288497876, "learning_rate": 1.1076616724991983e-06, "loss": 0.4736294746398926, "step": 4273 }, { "epoch": 0.9853602305475504, "grad_norm": 1.4964534780647072, "learning_rate": 1.1072827114685206e-06, "loss": 0.4558507800102234, "step": 4274 }, { "epoch": 0.9855907780979827, "grad_norm": 1.3128324817273487, "learning_rate": 1.1069037348507812e-06, "loss": 0.43823373317718506, "step": 4275 }, { "epoch": 0.985821325648415, "grad_norm": 1.56555561288982, "learning_rate": 1.1065247427010414e-06, "loss": 0.5391641855239868, "step": 4276 }, { "epoch": 0.9860518731988472, "grad_norm": 1.4062251041212817, "learning_rate": 1.106145735074364e-06, "loss": 0.5643177628517151, "step": 4277 }, { "epoch": 0.9862824207492795, "grad_norm": 1.530134885597804, "learning_rate": 1.1057667120258163e-06, "loss": 0.49635857343673706, "step": 4278 }, { "epoch": 0.9865129682997118, "grad_norm": 1.6216547009902138, "learning_rate": 1.1053876736104654e-06, "loss": 0.5055712461471558, "step": 4279 }, { "epoch": 0.9867435158501441, "grad_norm": 1.392265005140282, "learning_rate": 1.1050086198833815e-06, "loss": 0.4772811830043793, "step": 4280 }, { "epoch": 0.9869740634005764, "grad_norm": 1.4534901655783465, "learning_rate": 1.1046295508996377e-06, "loss": 0.5212994813919067, "step": 4281 }, { "epoch": 0.9872046109510086, "grad_norm": 1.7569793496789246, "learning_rate": 1.1042504667143089e-06, "loss": 0.5265607237815857, "step": 4282 }, { "epoch": 0.9874351585014409, "grad_norm": 1.5716464684568976, "learning_rate": 1.1038713673824713e-06, "loss": 0.5931172370910645, "step": 4283 }, { "epoch": 0.9876657060518732, "grad_norm": 1.835044010809655, "learning_rate": 1.1034922529592046e-06, "loss": 0.37950432300567627, "step": 4284 }, { "epoch": 0.9878962536023055, "grad_norm": 1.513292412653091, "learning_rate": 1.1031131234995905e-06, "loss": 0.5008571147918701, "step": 4285 }, { "epoch": 0.9881268011527378, "grad_norm": 1.4057938261159666, "learning_rate": 1.102733979058712e-06, "loss": 0.5004929304122925, "step": 4286 }, { "epoch": 0.98835734870317, "grad_norm": 1.7178021958689218, "learning_rate": 1.1023548196916553e-06, "loss": 0.5646581649780273, "step": 4287 }, { "epoch": 0.9885878962536023, "grad_norm": 1.3763606862881304, "learning_rate": 1.1019756454535078e-06, "loss": 0.4696485996246338, "step": 4288 }, { "epoch": 0.9888184438040346, "grad_norm": 1.477189841532629, "learning_rate": 1.1015964563993598e-06, "loss": 0.5698454976081848, "step": 4289 }, { "epoch": 0.9890489913544669, "grad_norm": 1.4341233392476178, "learning_rate": 1.1012172525843037e-06, "loss": 0.44907110929489136, "step": 4290 }, { "epoch": 0.9892795389048992, "grad_norm": 1.3904171575550368, "learning_rate": 1.1008380340634337e-06, "loss": 0.447033554315567, "step": 4291 }, { "epoch": 0.9895100864553315, "grad_norm": 1.289432455784493, "learning_rate": 1.1004588008918466e-06, "loss": 0.4011508822441101, "step": 4292 }, { "epoch": 0.9897406340057637, "grad_norm": 1.333931067199064, "learning_rate": 1.1000795531246406e-06, "loss": 0.4825517237186432, "step": 4293 }, { "epoch": 0.989971181556196, "grad_norm": 1.4931399552561173, "learning_rate": 1.0997002908169163e-06, "loss": 0.5283117294311523, "step": 4294 }, { "epoch": 0.9902017291066283, "grad_norm": 1.5482168678011143, "learning_rate": 1.0993210140237777e-06, "loss": 0.48783427476882935, "step": 4295 }, { "epoch": 0.9904322766570606, "grad_norm": 1.5566253013073328, "learning_rate": 1.0989417228003283e-06, "loss": 0.4624701738357544, "step": 4296 }, { "epoch": 0.9906628242074927, "grad_norm": 1.5344364140491837, "learning_rate": 1.0985624172016759e-06, "loss": 0.3982674181461334, "step": 4297 }, { "epoch": 0.990893371757925, "grad_norm": 1.3341170614191478, "learning_rate": 1.09818309728293e-06, "loss": 0.507615864276886, "step": 4298 }, { "epoch": 0.9911239193083573, "grad_norm": 1.2567976733410893, "learning_rate": 1.0978037630992012e-06, "loss": 0.46235620975494385, "step": 4299 }, { "epoch": 0.9913544668587896, "grad_norm": 1.7589261659786115, "learning_rate": 1.097424414705603e-06, "loss": 0.4740482568740845, "step": 4300 }, { "epoch": 0.9915850144092219, "grad_norm": 1.4084586130190382, "learning_rate": 1.0970450521572508e-06, "loss": 0.45482778549194336, "step": 4301 }, { "epoch": 0.9918155619596541, "grad_norm": 1.406956819388606, "learning_rate": 1.096665675509262e-06, "loss": 0.4501914978027344, "step": 4302 }, { "epoch": 0.9920461095100864, "grad_norm": 1.4314348115046323, "learning_rate": 1.0962862848167564e-06, "loss": 0.3777778446674347, "step": 4303 }, { "epoch": 0.9922766570605187, "grad_norm": 1.6582396145460987, "learning_rate": 1.0959068801348552e-06, "loss": 0.4541711211204529, "step": 4304 }, { "epoch": 0.992507204610951, "grad_norm": 1.6170437681024339, "learning_rate": 1.0955274615186818e-06, "loss": 0.5733718872070312, "step": 4305 }, { "epoch": 0.9927377521613833, "grad_norm": 1.6380431554541108, "learning_rate": 1.0951480290233622e-06, "loss": 0.5367584228515625, "step": 4306 }, { "epoch": 0.9929682997118155, "grad_norm": 1.5021041158954818, "learning_rate": 1.0947685827040238e-06, "loss": 0.458662748336792, "step": 4307 }, { "epoch": 0.9931988472622478, "grad_norm": 1.5550346941158644, "learning_rate": 1.0943891226157961e-06, "loss": 0.457683265209198, "step": 4308 }, { "epoch": 0.9934293948126801, "grad_norm": 1.452724464754786, "learning_rate": 1.094009648813811e-06, "loss": 0.4951286315917969, "step": 4309 }, { "epoch": 0.9936599423631124, "grad_norm": 1.5676198431506971, "learning_rate": 1.0936301613532016e-06, "loss": 0.6215308904647827, "step": 4310 }, { "epoch": 0.9938904899135447, "grad_norm": 1.90910681269644, "learning_rate": 1.0932506602891041e-06, "loss": 0.5981507301330566, "step": 4311 }, { "epoch": 0.994121037463977, "grad_norm": 1.5298286854468381, "learning_rate": 1.0928711456766552e-06, "loss": 0.42787450551986694, "step": 4312 }, { "epoch": 0.9943515850144092, "grad_norm": 1.7160093773370675, "learning_rate": 1.0924916175709952e-06, "loss": 0.4542294442653656, "step": 4313 }, { "epoch": 0.9945821325648415, "grad_norm": 1.3930381925713722, "learning_rate": 1.0921120760272654e-06, "loss": 0.5470898151397705, "step": 4314 }, { "epoch": 0.9948126801152738, "grad_norm": 1.6713314091042928, "learning_rate": 1.0917325211006095e-06, "loss": 0.469581663608551, "step": 4315 }, { "epoch": 0.9950432276657061, "grad_norm": 1.5294920571651511, "learning_rate": 1.091352952846172e-06, "loss": 0.44600075483322144, "step": 4316 }, { "epoch": 0.9952737752161384, "grad_norm": 1.2648510479760464, "learning_rate": 1.0909733713191011e-06, "loss": 0.44750702381134033, "step": 4317 }, { "epoch": 0.9955043227665706, "grad_norm": 1.340203440617395, "learning_rate": 1.0905937765745457e-06, "loss": 0.5289393067359924, "step": 4318 }, { "epoch": 0.9957348703170029, "grad_norm": 1.5607494499506507, "learning_rate": 1.0902141686676569e-06, "loss": 0.5402871966362, "step": 4319 }, { "epoch": 0.9959654178674352, "grad_norm": 1.5050684797855236, "learning_rate": 1.089834547653588e-06, "loss": 0.522109866142273, "step": 4320 }, { "epoch": 0.9961959654178675, "grad_norm": 1.4180672798377163, "learning_rate": 1.0894549135874935e-06, "loss": 0.41185298562049866, "step": 4321 }, { "epoch": 0.9964265129682998, "grad_norm": 1.4566310616338858, "learning_rate": 1.089075266524531e-06, "loss": 0.5705811977386475, "step": 4322 }, { "epoch": 0.996657060518732, "grad_norm": 1.470813920648617, "learning_rate": 1.0886956065198586e-06, "loss": 0.560600996017456, "step": 4323 }, { "epoch": 0.9968876080691642, "grad_norm": 1.8636354943852207, "learning_rate": 1.088315933628638e-06, "loss": 0.4886099100112915, "step": 4324 }, { "epoch": 0.9971181556195965, "grad_norm": 1.465916297308428, "learning_rate": 1.0879362479060301e-06, "loss": 0.5046563148498535, "step": 4325 }, { "epoch": 0.9973487031700288, "grad_norm": 1.8418351566021292, "learning_rate": 1.0875565494072003e-06, "loss": 0.5192925930023193, "step": 4326 }, { "epoch": 0.997579250720461, "grad_norm": 1.7255732387200025, "learning_rate": 1.0871768381873152e-06, "loss": 0.41461387276649475, "step": 4327 }, { "epoch": 0.9978097982708933, "grad_norm": 1.423557709546974, "learning_rate": 1.0867971143015423e-06, "loss": 0.4424000680446625, "step": 4328 }, { "epoch": 0.9980403458213256, "grad_norm": 1.3391499583858686, "learning_rate": 1.086417377805051e-06, "loss": 0.5320795774459839, "step": 4329 }, { "epoch": 0.9982708933717579, "grad_norm": 1.32007573291002, "learning_rate": 1.0860376287530144e-06, "loss": 0.444003701210022, "step": 4330 }, { "epoch": 0.9985014409221902, "grad_norm": 1.4760538613024474, "learning_rate": 1.085657867200605e-06, "loss": 0.5243555307388306, "step": 4331 }, { "epoch": 0.9987319884726225, "grad_norm": 1.4502781068225374, "learning_rate": 1.0852780932029985e-06, "loss": 0.4801725149154663, "step": 4332 }, { "epoch": 0.9989625360230547, "grad_norm": 1.2833884817247785, "learning_rate": 1.0848983068153725e-06, "loss": 0.45034441351890564, "step": 4333 }, { "epoch": 0.999193083573487, "grad_norm": 1.5849689437640124, "learning_rate": 1.0845185080929055e-06, "loss": 0.5011032819747925, "step": 4334 }, { "epoch": 0.9994236311239193, "grad_norm": 1.4546910942796216, "learning_rate": 1.0841386970907784e-06, "loss": 0.5422689914703369, "step": 4335 }, { "epoch": 0.9996541786743516, "grad_norm": 1.4006767595332805, "learning_rate": 1.0837588738641742e-06, "loss": 0.48687103390693665, "step": 4336 }, { "epoch": 0.9998847262247839, "grad_norm": 1.4288954711940178, "learning_rate": 1.083379038468277e-06, "loss": 0.47676515579223633, "step": 4337 }, { "epoch": 1.0, "grad_norm": 3.106354912289367, "learning_rate": 1.0829991909582724e-06, "loss": 0.4786119759082794, "step": 4338 }, { "epoch": 1.0002305475504323, "grad_norm": 1.3871434401118967, "learning_rate": 1.0826193313893492e-06, "loss": 0.4178802967071533, "step": 4339 }, { "epoch": 1.0004610951008646, "grad_norm": 1.5442462018887193, "learning_rate": 1.0822394598166965e-06, "loss": 0.5447506904602051, "step": 4340 }, { "epoch": 1.0006916426512968, "grad_norm": 1.5787095830935702, "learning_rate": 1.0818595762955057e-06, "loss": 0.4838123321533203, "step": 4341 }, { "epoch": 1.0009221902017291, "grad_norm": 1.1408717499721517, "learning_rate": 1.08147968088097e-06, "loss": 0.4060467481613159, "step": 4342 }, { "epoch": 1.0011527377521614, "grad_norm": 1.7147604581934335, "learning_rate": 1.0810997736282846e-06, "loss": 0.5802867412567139, "step": 4343 }, { "epoch": 1.0013832853025937, "grad_norm": 1.4674016544687594, "learning_rate": 1.0807198545926456e-06, "loss": 0.40671414136886597, "step": 4344 }, { "epoch": 1.001613832853026, "grad_norm": 1.4142081560028723, "learning_rate": 1.0803399238292511e-06, "loss": 0.49008500576019287, "step": 4345 }, { "epoch": 1.0018443804034582, "grad_norm": 1.4937256123283484, "learning_rate": 1.0799599813933018e-06, "loss": 0.45358604192733765, "step": 4346 }, { "epoch": 1.0020749279538905, "grad_norm": 1.4709246428352223, "learning_rate": 1.0795800273399991e-06, "loss": 0.44286537170410156, "step": 4347 }, { "epoch": 1.0023054755043228, "grad_norm": 1.4440445642380524, "learning_rate": 1.079200061724546e-06, "loss": 0.41197699308395386, "step": 4348 }, { "epoch": 1.002536023054755, "grad_norm": 1.4020363593613934, "learning_rate": 1.0788200846021483e-06, "loss": 0.5265473127365112, "step": 4349 }, { "epoch": 1.0027665706051874, "grad_norm": 1.626211002371467, "learning_rate": 1.0784400960280115e-06, "loss": 0.5782182216644287, "step": 4350 }, { "epoch": 1.0029971181556196, "grad_norm": 1.5244546461931463, "learning_rate": 1.0780600960573452e-06, "loss": 0.46403801441192627, "step": 4351 }, { "epoch": 1.003227665706052, "grad_norm": 1.2339365132906865, "learning_rate": 1.0776800847453592e-06, "loss": 0.469561368227005, "step": 4352 }, { "epoch": 1.0034582132564842, "grad_norm": 1.2673633475315595, "learning_rate": 1.0773000621472646e-06, "loss": 0.5608446002006531, "step": 4353 }, { "epoch": 1.0036887608069165, "grad_norm": 1.6480925340630606, "learning_rate": 1.0769200283182752e-06, "loss": 0.534286379814148, "step": 4354 }, { "epoch": 1.0039193083573488, "grad_norm": 1.592904048213369, "learning_rate": 1.076539983313606e-06, "loss": 0.4879153370857239, "step": 4355 }, { "epoch": 1.004149855907781, "grad_norm": 1.7289069037550475, "learning_rate": 1.076159927188473e-06, "loss": 0.4208434224128723, "step": 4356 }, { "epoch": 1.0043804034582133, "grad_norm": 1.4619714495069853, "learning_rate": 1.0757798599980954e-06, "loss": 0.5276066064834595, "step": 4357 }, { "epoch": 1.0046109510086456, "grad_norm": 1.476134912638207, "learning_rate": 1.0753997817976923e-06, "loss": 0.44685155153274536, "step": 4358 }, { "epoch": 1.004841498559078, "grad_norm": 1.3336692974635336, "learning_rate": 1.0750196926424852e-06, "loss": 0.4210960865020752, "step": 4359 }, { "epoch": 1.0050720461095102, "grad_norm": 1.487533331972068, "learning_rate": 1.0746395925876972e-06, "loss": 0.5184726715087891, "step": 4360 }, { "epoch": 1.0053025936599425, "grad_norm": 1.4533904323160831, "learning_rate": 1.0742594816885528e-06, "loss": 0.46501922607421875, "step": 4361 }, { "epoch": 1.0055331412103747, "grad_norm": 1.4885879141852645, "learning_rate": 1.0738793600002781e-06, "loss": 0.43683815002441406, "step": 4362 }, { "epoch": 1.005763688760807, "grad_norm": 1.6128856913318383, "learning_rate": 1.0734992275781013e-06, "loss": 0.48467642068862915, "step": 4363 }, { "epoch": 1.0059942363112393, "grad_norm": 1.4816149851760412, "learning_rate": 1.0731190844772511e-06, "loss": 0.4620075225830078, "step": 4364 }, { "epoch": 1.0062247838616716, "grad_norm": 1.8688222432221726, "learning_rate": 1.0727389307529588e-06, "loss": 0.39401185512542725, "step": 4365 }, { "epoch": 1.0064553314121036, "grad_norm": 1.7537121860722333, "learning_rate": 1.072358766460456e-06, "loss": 0.5232348442077637, "step": 4366 }, { "epoch": 1.006685878962536, "grad_norm": 1.473686642896004, "learning_rate": 1.0719785916549778e-06, "loss": 0.5005226731300354, "step": 4367 }, { "epoch": 1.0069164265129682, "grad_norm": 1.3600032880524948, "learning_rate": 1.0715984063917589e-06, "loss": 0.505985677242279, "step": 4368 }, { "epoch": 1.0071469740634005, "grad_norm": 1.5263088445062156, "learning_rate": 1.0712182107260362e-06, "loss": 0.4981151819229126, "step": 4369 }, { "epoch": 1.0073775216138328, "grad_norm": 1.2501548242250369, "learning_rate": 1.070838004713048e-06, "loss": 0.4614086449146271, "step": 4370 }, { "epoch": 1.007608069164265, "grad_norm": 1.439429049840933, "learning_rate": 1.0704577884080352e-06, "loss": 0.4255724549293518, "step": 4371 }, { "epoch": 1.0078386167146973, "grad_norm": 1.6888152805440162, "learning_rate": 1.0700775618662385e-06, "loss": 0.4840260148048401, "step": 4372 }, { "epoch": 1.0080691642651296, "grad_norm": 1.583402549772347, "learning_rate": 1.069697325142901e-06, "loss": 0.5588693618774414, "step": 4373 }, { "epoch": 1.0082997118155619, "grad_norm": 1.5157181906879742, "learning_rate": 1.0693170782932674e-06, "loss": 0.5086601972579956, "step": 4374 }, { "epoch": 1.0085302593659942, "grad_norm": 1.439160179137555, "learning_rate": 1.0689368213725834e-06, "loss": 0.3936936855316162, "step": 4375 }, { "epoch": 1.0087608069164264, "grad_norm": 1.7344419771703223, "learning_rate": 1.0685565544360962e-06, "loss": 0.5114755034446716, "step": 4376 }, { "epoch": 1.0089913544668587, "grad_norm": 1.2434564308153218, "learning_rate": 1.0681762775390548e-06, "loss": 0.524695873260498, "step": 4377 }, { "epoch": 1.009221902017291, "grad_norm": 1.5809148564841888, "learning_rate": 1.0677959907367095e-06, "loss": 0.4670463800430298, "step": 4378 }, { "epoch": 1.0094524495677233, "grad_norm": 1.5730500794833102, "learning_rate": 1.0674156940843123e-06, "loss": 0.5309491157531738, "step": 4379 }, { "epoch": 1.0096829971181556, "grad_norm": 1.4093681895946824, "learning_rate": 1.0670353876371158e-06, "loss": 0.422494113445282, "step": 4380 }, { "epoch": 1.0099135446685878, "grad_norm": 1.4365767688932187, "learning_rate": 1.066655071450375e-06, "loss": 0.5568174123764038, "step": 4381 }, { "epoch": 1.0101440922190201, "grad_norm": 1.4363708269644129, "learning_rate": 1.0662747455793458e-06, "loss": 0.4783478379249573, "step": 4382 }, { "epoch": 1.0103746397694524, "grad_norm": 1.4277915820954656, "learning_rate": 1.0658944100792851e-06, "loss": 0.5188988447189331, "step": 4383 }, { "epoch": 1.0106051873198847, "grad_norm": 1.8516308226296496, "learning_rate": 1.0655140650054524e-06, "loss": 0.44151294231414795, "step": 4384 }, { "epoch": 1.010835734870317, "grad_norm": 1.317137900872795, "learning_rate": 1.0651337104131076e-06, "loss": 0.4886103868484497, "step": 4385 }, { "epoch": 1.0110662824207493, "grad_norm": 1.287712297930063, "learning_rate": 1.064753346357512e-06, "loss": 0.4710986018180847, "step": 4386 }, { "epoch": 1.0112968299711815, "grad_norm": 1.5084820581681782, "learning_rate": 1.064372972893929e-06, "loss": 0.5069217085838318, "step": 4387 }, { "epoch": 1.0115273775216138, "grad_norm": 1.6432564360854036, "learning_rate": 1.0639925900776226e-06, "loss": 0.49354636669158936, "step": 4388 }, { "epoch": 1.011757925072046, "grad_norm": 1.3698200894854542, "learning_rate": 1.0636121979638587e-06, "loss": 0.41078031063079834, "step": 4389 }, { "epoch": 1.0119884726224784, "grad_norm": 1.5910628800230602, "learning_rate": 1.0632317966079043e-06, "loss": 0.5079636573791504, "step": 4390 }, { "epoch": 1.0122190201729107, "grad_norm": 1.501052153907989, "learning_rate": 1.0628513860650272e-06, "loss": 0.4242056906223297, "step": 4391 }, { "epoch": 1.012449567723343, "grad_norm": 1.4571526068933165, "learning_rate": 1.0624709663904976e-06, "loss": 0.4912078380584717, "step": 4392 }, { "epoch": 1.0126801152737752, "grad_norm": 1.432903959990896, "learning_rate": 1.062090537639587e-06, "loss": 0.48734402656555176, "step": 4393 }, { "epoch": 1.0129106628242075, "grad_norm": 1.1916015715246997, "learning_rate": 1.0617100998675668e-06, "loss": 0.35444220900535583, "step": 4394 }, { "epoch": 1.0131412103746398, "grad_norm": 1.4116354536956222, "learning_rate": 1.0613296531297106e-06, "loss": 0.4598267674446106, "step": 4395 }, { "epoch": 1.013371757925072, "grad_norm": 1.493649072074075, "learning_rate": 1.0609491974812944e-06, "loss": 0.5195865631103516, "step": 4396 }, { "epoch": 1.0136023054755043, "grad_norm": 1.766065577527857, "learning_rate": 1.060568732977594e-06, "loss": 0.5357420444488525, "step": 4397 }, { "epoch": 1.0138328530259366, "grad_norm": 1.3453143231303708, "learning_rate": 1.0601882596738864e-06, "loss": 0.4296334981918335, "step": 4398 }, { "epoch": 1.014063400576369, "grad_norm": 1.5493487080133135, "learning_rate": 1.0598077776254508e-06, "loss": 0.46664875745773315, "step": 4399 }, { "epoch": 1.0142939481268012, "grad_norm": 1.304042047949052, "learning_rate": 1.0594272868875676e-06, "loss": 0.4178355932235718, "step": 4400 }, { "epoch": 1.0145244956772335, "grad_norm": 1.2861059048068348, "learning_rate": 1.0590467875155173e-06, "loss": 0.4932866394519806, "step": 4401 }, { "epoch": 1.0147550432276657, "grad_norm": 1.7354787006202048, "learning_rate": 1.0586662795645836e-06, "loss": 0.5387941598892212, "step": 4402 }, { "epoch": 1.014985590778098, "grad_norm": 1.665137984348258, "learning_rate": 1.0582857630900491e-06, "loss": 0.5688868761062622, "step": 4403 }, { "epoch": 1.0152161383285303, "grad_norm": 1.477824761922792, "learning_rate": 1.0579052381472002e-06, "loss": 0.45884644985198975, "step": 4404 }, { "epoch": 1.0154466858789626, "grad_norm": 1.7295708482538712, "learning_rate": 1.0575247047913222e-06, "loss": 0.4536563754081726, "step": 4405 }, { "epoch": 1.0156772334293949, "grad_norm": 1.6222586501020617, "learning_rate": 1.0571441630777034e-06, "loss": 0.39148998260498047, "step": 4406 }, { "epoch": 1.0159077809798271, "grad_norm": 1.640574201613157, "learning_rate": 1.0567636130616318e-06, "loss": 0.5296084880828857, "step": 4407 }, { "epoch": 1.0161383285302594, "grad_norm": 2.1458672337974254, "learning_rate": 1.0563830547983977e-06, "loss": 0.5447714328765869, "step": 4408 }, { "epoch": 1.0163688760806917, "grad_norm": 1.437566589215361, "learning_rate": 1.0560024883432926e-06, "loss": 0.5003344416618347, "step": 4409 }, { "epoch": 1.016599423631124, "grad_norm": 1.4621660414589328, "learning_rate": 1.0556219137516082e-06, "loss": 0.47146621346473694, "step": 4410 }, { "epoch": 1.0168299711815563, "grad_norm": 1.370045824066682, "learning_rate": 1.0552413310786383e-06, "loss": 0.44027960300445557, "step": 4411 }, { "epoch": 1.0170605187319885, "grad_norm": 1.35500440689804, "learning_rate": 1.054860740379678e-06, "loss": 0.3912389576435089, "step": 4412 }, { "epoch": 1.0172910662824208, "grad_norm": 1.4370390292161166, "learning_rate": 1.0544801417100227e-06, "loss": 0.4845673441886902, "step": 4413 }, { "epoch": 1.0175216138328531, "grad_norm": 1.546009165239184, "learning_rate": 1.0540995351249694e-06, "loss": 0.46626967191696167, "step": 4414 }, { "epoch": 1.0177521613832854, "grad_norm": 1.5178833656938138, "learning_rate": 1.0537189206798168e-06, "loss": 0.4380578398704529, "step": 4415 }, { "epoch": 1.0179827089337177, "grad_norm": 1.566646806091141, "learning_rate": 1.0533382984298635e-06, "loss": 0.4218786656856537, "step": 4416 }, { "epoch": 1.01821325648415, "grad_norm": 1.2308252036857967, "learning_rate": 1.0529576684304104e-06, "loss": 0.4456557631492615, "step": 4417 }, { "epoch": 1.0184438040345822, "grad_norm": 1.5800328705633249, "learning_rate": 1.052577030736759e-06, "loss": 0.5125781297683716, "step": 4418 }, { "epoch": 1.0186743515850145, "grad_norm": 1.2883404092674424, "learning_rate": 1.052196385404212e-06, "loss": 0.45318183302879333, "step": 4419 }, { "epoch": 1.0189048991354466, "grad_norm": 1.3112604347930223, "learning_rate": 1.0518157324880732e-06, "loss": 0.41572675108909607, "step": 4420 }, { "epoch": 1.0191354466858789, "grad_norm": 1.6426527091173992, "learning_rate": 1.0514350720436474e-06, "loss": 0.4221467971801758, "step": 4421 }, { "epoch": 1.0193659942363111, "grad_norm": 1.7855699658311184, "learning_rate": 1.0510544041262411e-06, "loss": 0.5241785049438477, "step": 4422 }, { "epoch": 1.0195965417867434, "grad_norm": 1.7878207477876489, "learning_rate": 1.0506737287911603e-06, "loss": 0.5138394832611084, "step": 4423 }, { "epoch": 1.0198270893371757, "grad_norm": 1.3052170365612978, "learning_rate": 1.0502930460937141e-06, "loss": 0.4385989308357239, "step": 4424 }, { "epoch": 1.020057636887608, "grad_norm": 1.8389031959070536, "learning_rate": 1.0499123560892119e-06, "loss": 0.5446112155914307, "step": 4425 }, { "epoch": 1.0202881844380403, "grad_norm": 1.5968459189575308, "learning_rate": 1.0495316588329632e-06, "loss": 0.42088472843170166, "step": 4426 }, { "epoch": 1.0205187319884725, "grad_norm": 1.380239084591332, "learning_rate": 1.04915095438028e-06, "loss": 0.40304017066955566, "step": 4427 }, { "epoch": 1.0207492795389048, "grad_norm": 1.3840006399284464, "learning_rate": 1.0487702427864742e-06, "loss": 0.4344356060028076, "step": 4428 }, { "epoch": 1.020979827089337, "grad_norm": 1.4981806010192171, "learning_rate": 1.0483895241068595e-06, "loss": 0.49237990379333496, "step": 4429 }, { "epoch": 1.0212103746397694, "grad_norm": 1.695916606130073, "learning_rate": 1.0480087983967503e-06, "loss": 0.5327268242835999, "step": 4430 }, { "epoch": 1.0214409221902017, "grad_norm": 1.6144239446081834, "learning_rate": 1.0476280657114621e-06, "loss": 0.5176074504852295, "step": 4431 }, { "epoch": 1.021671469740634, "grad_norm": 1.5447018911477126, "learning_rate": 1.0472473261063116e-06, "loss": 0.4684191942214966, "step": 4432 }, { "epoch": 1.0219020172910662, "grad_norm": 1.3925875102152703, "learning_rate": 1.046866579636616e-06, "loss": 0.38662853837013245, "step": 4433 }, { "epoch": 1.0221325648414985, "grad_norm": 1.6044931484850973, "learning_rate": 1.0464858263576939e-06, "loss": 0.5248677730560303, "step": 4434 }, { "epoch": 1.0223631123919308, "grad_norm": 1.546034740596121, "learning_rate": 1.046105066324865e-06, "loss": 0.4143308401107788, "step": 4435 }, { "epoch": 1.022593659942363, "grad_norm": 1.572368124431397, "learning_rate": 1.0457242995934492e-06, "loss": 0.5042206048965454, "step": 4436 }, { "epoch": 1.0228242074927953, "grad_norm": 1.2279861153562417, "learning_rate": 1.0453435262187686e-06, "loss": 0.47694748640060425, "step": 4437 }, { "epoch": 1.0230547550432276, "grad_norm": 1.7412013285642873, "learning_rate": 1.0449627462561456e-06, "loss": 0.3953937292098999, "step": 4438 }, { "epoch": 1.02328530259366, "grad_norm": 1.3965038538033099, "learning_rate": 1.0445819597609028e-06, "loss": 0.48966118693351746, "step": 4439 }, { "epoch": 1.0235158501440922, "grad_norm": 1.28557182576021, "learning_rate": 1.0442011667883652e-06, "loss": 0.36514124274253845, "step": 4440 }, { "epoch": 1.0237463976945245, "grad_norm": 1.401188513600808, "learning_rate": 1.0438203673938582e-06, "loss": 0.4037533402442932, "step": 4441 }, { "epoch": 1.0239769452449567, "grad_norm": 1.3235111960085384, "learning_rate": 1.0434395616327072e-06, "loss": 0.4208472967147827, "step": 4442 }, { "epoch": 1.024207492795389, "grad_norm": 1.5996053630906766, "learning_rate": 1.04305874956024e-06, "loss": 0.524165153503418, "step": 4443 }, { "epoch": 1.0244380403458213, "grad_norm": 1.2942402749474435, "learning_rate": 1.0426779312317843e-06, "loss": 0.39956557750701904, "step": 4444 }, { "epoch": 1.0246685878962536, "grad_norm": 1.4600178018333099, "learning_rate": 1.0422971067026694e-06, "loss": 0.45270049571990967, "step": 4445 }, { "epoch": 1.0248991354466859, "grad_norm": 1.3235016423492378, "learning_rate": 1.0419162760282247e-06, "loss": 0.4914137125015259, "step": 4446 }, { "epoch": 1.0251296829971182, "grad_norm": 1.49486336775705, "learning_rate": 1.0415354392637813e-06, "loss": 0.5092133283615112, "step": 4447 }, { "epoch": 1.0253602305475504, "grad_norm": 1.3532599291496046, "learning_rate": 1.0411545964646705e-06, "loss": 0.48098695278167725, "step": 4448 }, { "epoch": 1.0255907780979827, "grad_norm": 1.6127809694648108, "learning_rate": 1.040773747686225e-06, "loss": 0.5379494428634644, "step": 4449 }, { "epoch": 1.025821325648415, "grad_norm": 1.6433258148389769, "learning_rate": 1.0403928929837784e-06, "loss": 0.42458122968673706, "step": 4450 }, { "epoch": 1.0260518731988473, "grad_norm": 1.78483029766293, "learning_rate": 1.0400120324126645e-06, "loss": 0.49995291233062744, "step": 4451 }, { "epoch": 1.0262824207492796, "grad_norm": 1.736176405485827, "learning_rate": 1.039631166028218e-06, "loss": 0.500437319278717, "step": 4452 }, { "epoch": 1.0265129682997118, "grad_norm": 1.7033831160422268, "learning_rate": 1.0392502938857762e-06, "loss": 0.5602716207504272, "step": 4453 }, { "epoch": 1.0267435158501441, "grad_norm": 1.4186875936390437, "learning_rate": 1.0388694160406745e-06, "loss": 0.49821144342422485, "step": 4454 }, { "epoch": 1.0269740634005764, "grad_norm": 1.6203380931256655, "learning_rate": 1.0384885325482512e-06, "loss": 0.4660237431526184, "step": 4455 }, { "epoch": 1.0272046109510087, "grad_norm": 1.8296722968299048, "learning_rate": 1.0381076434638443e-06, "loss": 0.564801037311554, "step": 4456 }, { "epoch": 1.027435158501441, "grad_norm": 1.3996510699189642, "learning_rate": 1.0377267488427932e-06, "loss": 0.4113255441188812, "step": 4457 }, { "epoch": 1.0276657060518732, "grad_norm": 1.3659807096425378, "learning_rate": 1.0373458487404382e-06, "loss": 0.4769394099712372, "step": 4458 }, { "epoch": 1.0278962536023055, "grad_norm": 1.3384975798165966, "learning_rate": 1.0369649432121197e-06, "loss": 0.42901676893234253, "step": 4459 }, { "epoch": 1.0281268011527378, "grad_norm": 1.5476233273543298, "learning_rate": 1.0365840323131795e-06, "loss": 0.46431535482406616, "step": 4460 }, { "epoch": 1.02835734870317, "grad_norm": 1.2099397488415262, "learning_rate": 1.03620311609896e-06, "loss": 0.44773775339126587, "step": 4461 }, { "epoch": 1.0285878962536024, "grad_norm": 1.6798930628346755, "learning_rate": 1.0358221946248043e-06, "loss": 0.5518221855163574, "step": 4462 }, { "epoch": 1.0288184438040346, "grad_norm": 1.480004409369121, "learning_rate": 1.0354412679460568e-06, "loss": 0.5178619623184204, "step": 4463 }, { "epoch": 1.029048991354467, "grad_norm": 1.609320522926487, "learning_rate": 1.0350603361180609e-06, "loss": 0.49681615829467773, "step": 4464 }, { "epoch": 1.0292795389048992, "grad_norm": 1.4735231521597927, "learning_rate": 1.0346793991961634e-06, "loss": 0.40041211247444153, "step": 4465 }, { "epoch": 1.0295100864553315, "grad_norm": 1.2768035889523142, "learning_rate": 1.0342984572357102e-06, "loss": 0.4649925231933594, "step": 4466 }, { "epoch": 1.0297406340057638, "grad_norm": 1.7706832584466219, "learning_rate": 1.0339175102920478e-06, "loss": 0.6232483386993408, "step": 4467 }, { "epoch": 1.029971181556196, "grad_norm": 1.4245956852693253, "learning_rate": 1.0335365584205239e-06, "loss": 0.5647813081741333, "step": 4468 }, { "epoch": 1.0302017291066283, "grad_norm": 1.3076641167650391, "learning_rate": 1.0331556016764868e-06, "loss": 0.36746442317962646, "step": 4469 }, { "epoch": 1.0304322766570606, "grad_norm": 1.5775380660887826, "learning_rate": 1.0327746401152858e-06, "loss": 0.4890482723712921, "step": 4470 }, { "epoch": 1.030662824207493, "grad_norm": 1.733394771520275, "learning_rate": 1.0323936737922707e-06, "loss": 0.4886651635169983, "step": 4471 }, { "epoch": 1.0308933717579252, "grad_norm": 1.6224830403306456, "learning_rate": 1.0320127027627917e-06, "loss": 0.4815050959587097, "step": 4472 }, { "epoch": 1.0311239193083575, "grad_norm": 1.6980757353739753, "learning_rate": 1.0316317270821999e-06, "loss": 0.48830682039260864, "step": 4473 }, { "epoch": 1.0313544668587897, "grad_norm": 1.3969476743395202, "learning_rate": 1.0312507468058473e-06, "loss": 0.4594195783138275, "step": 4474 }, { "epoch": 1.031585014409222, "grad_norm": 1.607466777328964, "learning_rate": 1.0308697619890864e-06, "loss": 0.5010625123977661, "step": 4475 }, { "epoch": 1.0318155619596543, "grad_norm": 1.283561854988696, "learning_rate": 1.0304887726872699e-06, "loss": 0.4723336696624756, "step": 4476 }, { "epoch": 1.0320461095100864, "grad_norm": 1.3381329229380114, "learning_rate": 1.0301077789557519e-06, "loss": 0.45434504747390747, "step": 4477 }, { "epoch": 1.0322766570605186, "grad_norm": 1.377250204828909, "learning_rate": 1.029726780849887e-06, "loss": 0.48399144411087036, "step": 4478 }, { "epoch": 1.032507204610951, "grad_norm": 1.3862763857384357, "learning_rate": 1.0293457784250304e-06, "loss": 0.41350722312927246, "step": 4479 }, { "epoch": 1.0327377521613832, "grad_norm": 1.468557287853429, "learning_rate": 1.028964771736537e-06, "loss": 0.4828647971153259, "step": 4480 }, { "epoch": 1.0329682997118155, "grad_norm": 1.3435400829034796, "learning_rate": 1.0285837608397637e-06, "loss": 0.48510146141052246, "step": 4481 }, { "epoch": 1.0331988472622478, "grad_norm": 1.4106226741729768, "learning_rate": 1.0282027457900675e-06, "loss": 0.5286427140235901, "step": 4482 }, { "epoch": 1.03342939481268, "grad_norm": 1.6836246338288663, "learning_rate": 1.0278217266428054e-06, "loss": 0.5355654954910278, "step": 4483 }, { "epoch": 1.0336599423631123, "grad_norm": 1.388122836127959, "learning_rate": 1.0274407034533361e-06, "loss": 0.501393735408783, "step": 4484 }, { "epoch": 1.0338904899135446, "grad_norm": 1.407159922284391, "learning_rate": 1.0270596762770178e-06, "loss": 0.5080795288085938, "step": 4485 }, { "epoch": 1.0341210374639769, "grad_norm": 1.5608351056758434, "learning_rate": 1.0266786451692103e-06, "loss": 0.4568654000759125, "step": 4486 }, { "epoch": 1.0343515850144092, "grad_norm": 1.5451894749796247, "learning_rate": 1.026297610185273e-06, "loss": 0.5166634321212769, "step": 4487 }, { "epoch": 1.0345821325648414, "grad_norm": 1.625027529753705, "learning_rate": 1.0259165713805667e-06, "loss": 0.5145444869995117, "step": 4488 }, { "epoch": 1.0348126801152737, "grad_norm": 1.6809207183251986, "learning_rate": 1.0255355288104518e-06, "loss": 0.532972514629364, "step": 4489 }, { "epoch": 1.035043227665706, "grad_norm": 1.4595457080171315, "learning_rate": 1.0251544825302905e-06, "loss": 0.47362518310546875, "step": 4490 }, { "epoch": 1.0352737752161383, "grad_norm": 1.7731872017548376, "learning_rate": 1.0247734325954448e-06, "loss": 0.41648489236831665, "step": 4491 }, { "epoch": 1.0355043227665706, "grad_norm": 1.3859772973760465, "learning_rate": 1.0243923790612768e-06, "loss": 0.41394931077957153, "step": 4492 }, { "epoch": 1.0357348703170028, "grad_norm": 1.4656217428361447, "learning_rate": 1.0240113219831496e-06, "loss": 0.49118804931640625, "step": 4493 }, { "epoch": 1.0359654178674351, "grad_norm": 1.6694899845625315, "learning_rate": 1.0236302614164275e-06, "loss": 0.5071209669113159, "step": 4494 }, { "epoch": 1.0361959654178674, "grad_norm": 1.5269400513380635, "learning_rate": 1.023249197416474e-06, "loss": 0.43428805470466614, "step": 4495 }, { "epoch": 1.0364265129682997, "grad_norm": 1.599655780691587, "learning_rate": 1.0228681300386541e-06, "loss": 0.4495439827442169, "step": 4496 }, { "epoch": 1.036657060518732, "grad_norm": 1.6914403127897477, "learning_rate": 1.0224870593383326e-06, "loss": 0.5649271011352539, "step": 4497 }, { "epoch": 1.0368876080691642, "grad_norm": 1.7669982984032149, "learning_rate": 1.0221059853708758e-06, "loss": 0.5247937440872192, "step": 4498 }, { "epoch": 1.0371181556195965, "grad_norm": 1.9131453841840644, "learning_rate": 1.0217249081916489e-06, "loss": 0.5018899440765381, "step": 4499 }, { "epoch": 1.0373487031700288, "grad_norm": 1.6895237166192871, "learning_rate": 1.0213438278560188e-06, "loss": 0.3834974765777588, "step": 4500 }, { "epoch": 1.037579250720461, "grad_norm": 1.404878328837829, "learning_rate": 1.0209627444193527e-06, "loss": 0.45946773886680603, "step": 4501 }, { "epoch": 1.0378097982708934, "grad_norm": 1.3092615059102766, "learning_rate": 1.020581657937018e-06, "loss": 0.4812207520008087, "step": 4502 }, { "epoch": 1.0380403458213256, "grad_norm": 1.5145567812643126, "learning_rate": 1.0202005684643821e-06, "loss": 0.5373339653015137, "step": 4503 }, { "epoch": 1.038270893371758, "grad_norm": 1.4172584982767447, "learning_rate": 1.0198194760568144e-06, "loss": 0.5129649639129639, "step": 4504 }, { "epoch": 1.0385014409221902, "grad_norm": 1.2932618611676632, "learning_rate": 1.0194383807696824e-06, "loss": 0.41758543252944946, "step": 4505 }, { "epoch": 1.0387319884726225, "grad_norm": 1.3882475624615442, "learning_rate": 1.0190572826583559e-06, "loss": 0.39521563053131104, "step": 4506 }, { "epoch": 1.0389625360230548, "grad_norm": 1.524715494193292, "learning_rate": 1.018676181778205e-06, "loss": 0.4855652451515198, "step": 4507 }, { "epoch": 1.039193083573487, "grad_norm": 1.423908966609684, "learning_rate": 1.0182950781845988e-06, "loss": 0.4977639317512512, "step": 4508 }, { "epoch": 1.0394236311239193, "grad_norm": 1.5272276294299776, "learning_rate": 1.0179139719329079e-06, "loss": 0.543816089630127, "step": 4509 }, { "epoch": 1.0396541786743516, "grad_norm": 1.585484219875182, "learning_rate": 1.0175328630785035e-06, "loss": 0.4400789141654968, "step": 4510 }, { "epoch": 1.039884726224784, "grad_norm": 1.5540847499410908, "learning_rate": 1.0171517516767564e-06, "loss": 0.5500770211219788, "step": 4511 }, { "epoch": 1.0401152737752162, "grad_norm": 1.4513336491340072, "learning_rate": 1.0167706377830384e-06, "loss": 0.43814778327941895, "step": 4512 }, { "epoch": 1.0403458213256485, "grad_norm": 1.6266107876638078, "learning_rate": 1.016389521452721e-06, "loss": 0.538263738155365, "step": 4513 }, { "epoch": 1.0405763688760807, "grad_norm": 1.31961944446499, "learning_rate": 1.0160084027411766e-06, "loss": 0.4274001717567444, "step": 4514 }, { "epoch": 1.040806916426513, "grad_norm": 1.4814208238012136, "learning_rate": 1.015627281703778e-06, "loss": 0.5197881460189819, "step": 4515 }, { "epoch": 1.0410374639769453, "grad_norm": 1.6946538046452169, "learning_rate": 1.0152461583958979e-06, "loss": 0.43719834089279175, "step": 4516 }, { "epoch": 1.0412680115273776, "grad_norm": 1.4416740952187463, "learning_rate": 1.0148650328729096e-06, "loss": 0.4394899904727936, "step": 4517 }, { "epoch": 1.0414985590778099, "grad_norm": 1.579992854212008, "learning_rate": 1.0144839051901864e-06, "loss": 0.3841872811317444, "step": 4518 }, { "epoch": 1.0417291066282421, "grad_norm": 1.3549804204711746, "learning_rate": 1.0141027754031023e-06, "loss": 0.4959898591041565, "step": 4519 }, { "epoch": 1.0419596541786744, "grad_norm": 1.4775766757395223, "learning_rate": 1.0137216435670324e-06, "loss": 0.4911465346813202, "step": 4520 }, { "epoch": 1.0421902017291067, "grad_norm": 1.4224879080779271, "learning_rate": 1.01334050973735e-06, "loss": 0.5083625316619873, "step": 4521 }, { "epoch": 1.042420749279539, "grad_norm": 1.5446913974368652, "learning_rate": 1.01295937396943e-06, "loss": 0.446804940700531, "step": 4522 }, { "epoch": 1.0426512968299713, "grad_norm": 1.414836456184516, "learning_rate": 1.0125782363186482e-06, "loss": 0.3899458646774292, "step": 4523 }, { "epoch": 1.0428818443804035, "grad_norm": 1.3771886132474889, "learning_rate": 1.0121970968403794e-06, "loss": 0.4964328408241272, "step": 4524 }, { "epoch": 1.0431123919308358, "grad_norm": 1.8083793225311697, "learning_rate": 1.0118159555899993e-06, "loss": 0.4805730879306793, "step": 4525 }, { "epoch": 1.043342939481268, "grad_norm": 1.3549553002461836, "learning_rate": 1.0114348126228837e-06, "loss": 0.4567984938621521, "step": 4526 }, { "epoch": 1.0435734870317004, "grad_norm": 1.809108804929047, "learning_rate": 1.0110536679944087e-06, "loss": 0.5105732679367065, "step": 4527 }, { "epoch": 1.0438040345821327, "grad_norm": 1.6641150664487896, "learning_rate": 1.010672521759951e-06, "loss": 0.49492210149765015, "step": 4528 }, { "epoch": 1.044034582132565, "grad_norm": 1.3216915800378581, "learning_rate": 1.0102913739748869e-06, "loss": 0.3918877840042114, "step": 4529 }, { "epoch": 1.044265129682997, "grad_norm": 1.4336608956901862, "learning_rate": 1.009910224694593e-06, "loss": 0.48184382915496826, "step": 4530 }, { "epoch": 1.0444956772334293, "grad_norm": 1.2096752758028502, "learning_rate": 1.0095290739744465e-06, "loss": 0.47491276264190674, "step": 4531 }, { "epoch": 1.0447262247838616, "grad_norm": 1.6404923277184014, "learning_rate": 1.0091479218698248e-06, "loss": 0.43935298919677734, "step": 4532 }, { "epoch": 1.0449567723342938, "grad_norm": 1.5886667617497374, "learning_rate": 1.0087667684361056e-06, "loss": 0.4855877757072449, "step": 4533 }, { "epoch": 1.0451873198847261, "grad_norm": 1.459325025676877, "learning_rate": 1.0083856137286658e-06, "loss": 0.5075816512107849, "step": 4534 }, { "epoch": 1.0454178674351584, "grad_norm": 1.428685173416776, "learning_rate": 1.0080044578028834e-06, "loss": 0.44345924258232117, "step": 4535 }, { "epoch": 1.0456484149855907, "grad_norm": 1.2632777016131902, "learning_rate": 1.0076233007141368e-06, "loss": 0.4527851641178131, "step": 4536 }, { "epoch": 1.045878962536023, "grad_norm": 1.5014095581737672, "learning_rate": 1.007242142517804e-06, "loss": 0.4629090428352356, "step": 4537 }, { "epoch": 1.0461095100864553, "grad_norm": 1.6090876537997274, "learning_rate": 1.0068609832692627e-06, "loss": 0.4479471445083618, "step": 4538 }, { "epoch": 1.0463400576368875, "grad_norm": 1.453798875944705, "learning_rate": 1.0064798230238925e-06, "loss": 0.5213382244110107, "step": 4539 }, { "epoch": 1.0465706051873198, "grad_norm": 1.7004188694147924, "learning_rate": 1.006098661837071e-06, "loss": 0.4633294939994812, "step": 4540 }, { "epoch": 1.046801152737752, "grad_norm": 1.2759434531953768, "learning_rate": 1.0057174997641777e-06, "loss": 0.44942396879196167, "step": 4541 }, { "epoch": 1.0470317002881844, "grad_norm": 1.2842723268727798, "learning_rate": 1.005336336860591e-06, "loss": 0.4552151560783386, "step": 4542 }, { "epoch": 1.0472622478386167, "grad_norm": 1.483709026132755, "learning_rate": 1.00495517318169e-06, "loss": 0.4890771806240082, "step": 4543 }, { "epoch": 1.047492795389049, "grad_norm": 1.303122700347689, "learning_rate": 1.004574008782854e-06, "loss": 0.47421228885650635, "step": 4544 }, { "epoch": 1.0477233429394812, "grad_norm": 1.6851868929876013, "learning_rate": 1.004192843719462e-06, "loss": 0.6262589693069458, "step": 4545 }, { "epoch": 1.0479538904899135, "grad_norm": 1.4442935171267732, "learning_rate": 1.0038116780468935e-06, "loss": 0.4006143808364868, "step": 4546 }, { "epoch": 1.0481844380403458, "grad_norm": 1.4133817727166142, "learning_rate": 1.003430511820528e-06, "loss": 0.44862663745880127, "step": 4547 }, { "epoch": 1.048414985590778, "grad_norm": 1.5403193933763355, "learning_rate": 1.0030493450957445e-06, "loss": 0.5606727600097656, "step": 4548 }, { "epoch": 1.0486455331412103, "grad_norm": 1.3748512451721946, "learning_rate": 1.0026681779279233e-06, "loss": 0.47314101457595825, "step": 4549 }, { "epoch": 1.0488760806916426, "grad_norm": 1.6094501862176287, "learning_rate": 1.002287010372443e-06, "loss": 0.44440731406211853, "step": 4550 }, { "epoch": 1.049106628242075, "grad_norm": 1.35134755450168, "learning_rate": 1.0019058424846842e-06, "loss": 0.4923070967197418, "step": 4551 }, { "epoch": 1.0493371757925072, "grad_norm": 1.5637255748005057, "learning_rate": 1.0015246743200266e-06, "loss": 0.4886152148246765, "step": 4552 }, { "epoch": 1.0495677233429395, "grad_norm": 1.6856486366703245, "learning_rate": 1.0011435059338494e-06, "loss": 0.5305925607681274, "step": 4553 }, { "epoch": 1.0497982708933717, "grad_norm": 1.579540935604605, "learning_rate": 1.0007623373815323e-06, "loss": 0.42822471261024475, "step": 4554 }, { "epoch": 1.050028818443804, "grad_norm": 1.5491845784693112, "learning_rate": 1.0003811687184562e-06, "loss": 0.5669623017311096, "step": 4555 }, { "epoch": 1.0502593659942363, "grad_norm": 1.5132164274116935, "learning_rate": 1e-06, "loss": 0.45508894324302673, "step": 4556 }, { "epoch": 1.0504899135446686, "grad_norm": 1.3829998425737389, "learning_rate": 9.996188312815435e-07, "loss": 0.4438894987106323, "step": 4557 }, { "epoch": 1.0507204610951009, "grad_norm": 1.7413175640700402, "learning_rate": 9.992376626184676e-07, "loss": 0.5677859783172607, "step": 4558 }, { "epoch": 1.0509510086455331, "grad_norm": 1.8839030912732315, "learning_rate": 9.988564940661508e-07, "loss": 0.5217305421829224, "step": 4559 }, { "epoch": 1.0511815561959654, "grad_norm": 1.2410955600288431, "learning_rate": 9.984753256799737e-07, "loss": 0.3913061320781708, "step": 4560 }, { "epoch": 1.0514121037463977, "grad_norm": 1.5511360318143517, "learning_rate": 9.980941575153157e-07, "loss": 0.48233336210250854, "step": 4561 }, { "epoch": 1.05164265129683, "grad_norm": 1.4454747638053118, "learning_rate": 9.977129896275568e-07, "loss": 0.45223701000213623, "step": 4562 }, { "epoch": 1.0518731988472623, "grad_norm": 1.6341160882250192, "learning_rate": 9.97331822072077e-07, "loss": 0.5448843836784363, "step": 4563 }, { "epoch": 1.0521037463976946, "grad_norm": 1.5670186133404156, "learning_rate": 9.969506549042556e-07, "loss": 0.4266091585159302, "step": 4564 }, { "epoch": 1.0523342939481268, "grad_norm": 1.5920644599913252, "learning_rate": 9.965694881794719e-07, "loss": 0.4513993263244629, "step": 4565 }, { "epoch": 1.0525648414985591, "grad_norm": 1.6763509325376744, "learning_rate": 9.961883219531064e-07, "loss": 0.44799959659576416, "step": 4566 }, { "epoch": 1.0527953890489914, "grad_norm": 2.153193445948826, "learning_rate": 9.95807156280538e-07, "loss": 0.5780029296875, "step": 4567 }, { "epoch": 1.0530259365994237, "grad_norm": 1.5985216986617043, "learning_rate": 9.954259912171462e-07, "loss": 0.4681410789489746, "step": 4568 }, { "epoch": 1.053256484149856, "grad_norm": 1.5646561466762618, "learning_rate": 9.9504482681831e-07, "loss": 0.49490541219711304, "step": 4569 }, { "epoch": 1.0534870317002882, "grad_norm": 1.3876036304903236, "learning_rate": 9.946636631394091e-07, "loss": 0.43849432468414307, "step": 4570 }, { "epoch": 1.0537175792507205, "grad_norm": 1.2547290934910935, "learning_rate": 9.942825002358227e-07, "loss": 0.3409537672996521, "step": 4571 }, { "epoch": 1.0539481268011528, "grad_norm": 1.5892871152279466, "learning_rate": 9.939013381629288e-07, "loss": 0.4381803274154663, "step": 4572 }, { "epoch": 1.054178674351585, "grad_norm": 1.3553936906788953, "learning_rate": 9.935201769761077e-07, "loss": 0.4395095109939575, "step": 4573 }, { "epoch": 1.0544092219020174, "grad_norm": 1.3843041469920634, "learning_rate": 9.931390167307374e-07, "loss": 0.4942593574523926, "step": 4574 }, { "epoch": 1.0546397694524496, "grad_norm": 1.6627568420794305, "learning_rate": 9.927578574821961e-07, "loss": 0.4674314856529236, "step": 4575 }, { "epoch": 1.054870317002882, "grad_norm": 1.5977132892055355, "learning_rate": 9.923766992858633e-07, "loss": 0.4729968011379242, "step": 4576 }, { "epoch": 1.0551008645533142, "grad_norm": 1.7001005285115778, "learning_rate": 9.919955421971168e-07, "loss": 0.5141623616218567, "step": 4577 }, { "epoch": 1.0553314121037465, "grad_norm": 1.503844678970446, "learning_rate": 9.916143862713341e-07, "loss": 0.44645851850509644, "step": 4578 }, { "epoch": 1.0555619596541788, "grad_norm": 1.630335682025123, "learning_rate": 9.912332315638947e-07, "loss": 0.5603121519088745, "step": 4579 }, { "epoch": 1.055792507204611, "grad_norm": 1.3908420909888959, "learning_rate": 9.908520781301751e-07, "loss": 0.4082290232181549, "step": 4580 }, { "epoch": 1.0560230547550433, "grad_norm": 1.369787996525026, "learning_rate": 9.904709260255532e-07, "loss": 0.48363831639289856, "step": 4581 }, { "epoch": 1.0562536023054756, "grad_norm": 1.4093134407140262, "learning_rate": 9.90089775305407e-07, "loss": 0.5562942028045654, "step": 4582 }, { "epoch": 1.0564841498559079, "grad_norm": 1.490643675328337, "learning_rate": 9.897086260251132e-07, "loss": 0.4740902781486511, "step": 4583 }, { "epoch": 1.0567146974063402, "grad_norm": 1.4465043958177053, "learning_rate": 9.893274782400493e-07, "loss": 0.46482330560684204, "step": 4584 }, { "epoch": 1.0569452449567724, "grad_norm": 1.4739791043264163, "learning_rate": 9.889463320055912e-07, "loss": 0.5256012678146362, "step": 4585 }, { "epoch": 1.0571757925072045, "grad_norm": 1.4346601686074334, "learning_rate": 9.885651873771162e-07, "loss": 0.46507787704467773, "step": 4586 }, { "epoch": 1.0574063400576368, "grad_norm": 1.3895966625957024, "learning_rate": 9.88184044410001e-07, "loss": 0.47399261593818665, "step": 4587 }, { "epoch": 1.057636887608069, "grad_norm": 1.3868992540366305, "learning_rate": 9.878029031596208e-07, "loss": 0.4795050024986267, "step": 4588 }, { "epoch": 1.0578674351585013, "grad_norm": 1.3090753005692493, "learning_rate": 9.874217636813517e-07, "loss": 0.4199279844760895, "step": 4589 }, { "epoch": 1.0580979827089336, "grad_norm": 1.663501386491142, "learning_rate": 9.8704062603057e-07, "loss": 0.41600513458251953, "step": 4590 }, { "epoch": 1.058328530259366, "grad_norm": 1.3202559065579687, "learning_rate": 9.866594902626501e-07, "loss": 0.45088812708854675, "step": 4591 }, { "epoch": 1.0585590778097982, "grad_norm": 1.5365099799322879, "learning_rate": 9.86278356432968e-07, "loss": 0.47499629855155945, "step": 4592 }, { "epoch": 1.0587896253602305, "grad_norm": 1.6280612027171966, "learning_rate": 9.858972245968976e-07, "loss": 0.48040494322776794, "step": 4593 }, { "epoch": 1.0590201729106627, "grad_norm": 1.4282716545191367, "learning_rate": 9.855160948098135e-07, "loss": 0.5223349928855896, "step": 4594 }, { "epoch": 1.059250720461095, "grad_norm": 1.5420066669414958, "learning_rate": 9.851349671270907e-07, "loss": 0.46820712089538574, "step": 4595 }, { "epoch": 1.0594812680115273, "grad_norm": 1.5344743511950931, "learning_rate": 9.847538416041025e-07, "loss": 0.4419459402561188, "step": 4596 }, { "epoch": 1.0597118155619596, "grad_norm": 1.7026231172894162, "learning_rate": 9.84372718296222e-07, "loss": 0.4973066449165344, "step": 4597 }, { "epoch": 1.0599423631123919, "grad_norm": 1.6533749765494075, "learning_rate": 9.839915972588233e-07, "loss": 0.5010764002799988, "step": 4598 }, { "epoch": 1.0601729106628242, "grad_norm": 1.3352189477102197, "learning_rate": 9.83610478547279e-07, "loss": 0.4472631514072418, "step": 4599 }, { "epoch": 1.0604034582132564, "grad_norm": 1.4633853965912471, "learning_rate": 9.83229362216962e-07, "loss": 0.5191864371299744, "step": 4600 }, { "epoch": 1.0606340057636887, "grad_norm": 1.442422737166096, "learning_rate": 9.828482483232435e-07, "loss": 0.4557565450668335, "step": 4601 }, { "epoch": 1.060864553314121, "grad_norm": 1.4201969439884339, "learning_rate": 9.824671369214964e-07, "loss": 0.5049344301223755, "step": 4602 }, { "epoch": 1.0610951008645533, "grad_norm": 1.6670814032897683, "learning_rate": 9.820860280670922e-07, "loss": 0.47939494252204895, "step": 4603 }, { "epoch": 1.0613256484149856, "grad_norm": 1.658621194351885, "learning_rate": 9.817049218154012e-07, "loss": 0.429465115070343, "step": 4604 }, { "epoch": 1.0615561959654178, "grad_norm": 1.81916330142038, "learning_rate": 9.813238182217954e-07, "loss": 0.5290513634681702, "step": 4605 }, { "epoch": 1.0617867435158501, "grad_norm": 1.644457909311076, "learning_rate": 9.80942717341644e-07, "loss": 0.4728652536869049, "step": 4606 }, { "epoch": 1.0620172910662824, "grad_norm": 1.5999219561315778, "learning_rate": 9.805616192303176e-07, "loss": 0.498574435710907, "step": 4607 }, { "epoch": 1.0622478386167147, "grad_norm": 1.356976867296984, "learning_rate": 9.80180523943186e-07, "loss": 0.4974418878555298, "step": 4608 }, { "epoch": 1.062478386167147, "grad_norm": 1.5988775450204102, "learning_rate": 9.797994315356178e-07, "loss": 0.5187455415725708, "step": 4609 }, { "epoch": 1.0627089337175792, "grad_norm": 1.4676791173009547, "learning_rate": 9.79418342062982e-07, "loss": 0.4465523958206177, "step": 4610 }, { "epoch": 1.0629394812680115, "grad_norm": 1.4881247830158704, "learning_rate": 9.790372555806472e-07, "loss": 0.38395804166793823, "step": 4611 }, { "epoch": 1.0631700288184438, "grad_norm": 1.2625355877596527, "learning_rate": 9.786561721439813e-07, "loss": 0.4082638621330261, "step": 4612 }, { "epoch": 1.063400576368876, "grad_norm": 1.3885240890249793, "learning_rate": 9.782750918083515e-07, "loss": 0.4943958520889282, "step": 4613 }, { "epoch": 1.0636311239193084, "grad_norm": 1.3863404713614964, "learning_rate": 9.778940146291243e-07, "loss": 0.48641303181648254, "step": 4614 }, { "epoch": 1.0638616714697406, "grad_norm": 1.5048568512606781, "learning_rate": 9.77512940661667e-07, "loss": 0.5512930154800415, "step": 4615 }, { "epoch": 1.064092219020173, "grad_norm": 1.7370636910994324, "learning_rate": 9.77131869961346e-07, "loss": 0.5123332142829895, "step": 4616 }, { "epoch": 1.0643227665706052, "grad_norm": 1.4868846938288016, "learning_rate": 9.76750802583526e-07, "loss": 0.4741990566253662, "step": 4617 }, { "epoch": 1.0645533141210375, "grad_norm": 1.3478064232580702, "learning_rate": 9.763697385835725e-07, "loss": 0.46791690587997437, "step": 4618 }, { "epoch": 1.0647838616714698, "grad_norm": 1.3689702474752652, "learning_rate": 9.759886780168505e-07, "loss": 0.4555216133594513, "step": 4619 }, { "epoch": 1.065014409221902, "grad_norm": 1.6358498544131885, "learning_rate": 9.756076209387233e-07, "loss": 0.5309184193611145, "step": 4620 }, { "epoch": 1.0652449567723343, "grad_norm": 1.3971481165355712, "learning_rate": 9.752265674045553e-07, "loss": 0.47507262229919434, "step": 4621 }, { "epoch": 1.0654755043227666, "grad_norm": 1.5596009652461436, "learning_rate": 9.748455174697094e-07, "loss": 0.5707837343215942, "step": 4622 }, { "epoch": 1.065706051873199, "grad_norm": 1.9435380622940854, "learning_rate": 9.744644711895479e-07, "loss": 0.45958513021469116, "step": 4623 }, { "epoch": 1.0659365994236312, "grad_norm": 1.5800769453439238, "learning_rate": 9.740834286194334e-07, "loss": 0.4757344126701355, "step": 4624 }, { "epoch": 1.0661671469740635, "grad_norm": 1.6339645956094502, "learning_rate": 9.73702389814727e-07, "loss": 0.4484025835990906, "step": 4625 }, { "epoch": 1.0663976945244957, "grad_norm": 1.6753880967774484, "learning_rate": 9.733213548307896e-07, "loss": 0.4026743173599243, "step": 4626 }, { "epoch": 1.066628242074928, "grad_norm": 1.8514450132857911, "learning_rate": 9.72940323722982e-07, "loss": 0.5340179204940796, "step": 4627 }, { "epoch": 1.0668587896253603, "grad_norm": 1.6161086513447296, "learning_rate": 9.72559296546664e-07, "loss": 0.4515274167060852, "step": 4628 }, { "epoch": 1.0670893371757926, "grad_norm": 1.866066478952204, "learning_rate": 9.72178273357195e-07, "loss": 0.49658486247062683, "step": 4629 }, { "epoch": 1.0673198847262249, "grad_norm": 1.6194964007936918, "learning_rate": 9.717972542099326e-07, "loss": 0.4344639182090759, "step": 4630 }, { "epoch": 1.0675504322766571, "grad_norm": 1.6063008031788568, "learning_rate": 9.714162391602363e-07, "loss": 0.5005955100059509, "step": 4631 }, { "epoch": 1.0677809798270894, "grad_norm": 1.7066518802868487, "learning_rate": 9.710352282634632e-07, "loss": 0.6442652940750122, "step": 4632 }, { "epoch": 1.0680115273775217, "grad_norm": 1.3344273253931838, "learning_rate": 9.706542215749697e-07, "loss": 0.4573819637298584, "step": 4633 }, { "epoch": 1.068242074927954, "grad_norm": 1.3322530236364596, "learning_rate": 9.702732191501127e-07, "loss": 0.4596165716648102, "step": 4634 }, { "epoch": 1.0684726224783863, "grad_norm": 1.4873477616834596, "learning_rate": 9.69892221044248e-07, "loss": 0.4277721047401428, "step": 4635 }, { "epoch": 1.0687031700288185, "grad_norm": 1.3152713454926654, "learning_rate": 9.6951122731273e-07, "loss": 0.39352816343307495, "step": 4636 }, { "epoch": 1.0689337175792508, "grad_norm": 1.4248950438905919, "learning_rate": 9.691302380109138e-07, "loss": 0.4209185242652893, "step": 4637 }, { "epoch": 1.069164265129683, "grad_norm": 1.3774255316125943, "learning_rate": 9.687492531941528e-07, "loss": 0.47638988494873047, "step": 4638 }, { "epoch": 1.0693948126801152, "grad_norm": 1.7927969058207647, "learning_rate": 9.683682729178e-07, "loss": 0.5114691257476807, "step": 4639 }, { "epoch": 1.0696253602305474, "grad_norm": 2.207610541748439, "learning_rate": 9.679872972372085e-07, "loss": 0.48080897331237793, "step": 4640 }, { "epoch": 1.0698559077809797, "grad_norm": 1.611428399695435, "learning_rate": 9.676063262077293e-07, "loss": 0.5234401822090149, "step": 4641 }, { "epoch": 1.070086455331412, "grad_norm": 1.3098949045011148, "learning_rate": 9.672253598847139e-07, "loss": 0.3622468113899231, "step": 4642 }, { "epoch": 1.0703170028818443, "grad_norm": 1.5524376763252634, "learning_rate": 9.668443983235131e-07, "loss": 0.45639339089393616, "step": 4643 }, { "epoch": 1.0705475504322766, "grad_norm": 1.2743609898532986, "learning_rate": 9.66463441579476e-07, "loss": 0.48489123582839966, "step": 4644 }, { "epoch": 1.0707780979827088, "grad_norm": 1.5672043285567319, "learning_rate": 9.660824897079525e-07, "loss": 0.5093181133270264, "step": 4645 }, { "epoch": 1.0710086455331411, "grad_norm": 1.4956950420324642, "learning_rate": 9.657015427642897e-07, "loss": 0.4775876998901367, "step": 4646 }, { "epoch": 1.0712391930835734, "grad_norm": 1.7652073053177477, "learning_rate": 9.653206008038362e-07, "loss": 0.47746625542640686, "step": 4647 }, { "epoch": 1.0714697406340057, "grad_norm": 1.2175686688635734, "learning_rate": 9.64939663881939e-07, "loss": 0.48147135972976685, "step": 4648 }, { "epoch": 1.071700288184438, "grad_norm": 1.524354935204934, "learning_rate": 9.645587320539434e-07, "loss": 0.3762315511703491, "step": 4649 }, { "epoch": 1.0719308357348702, "grad_norm": 1.8127361533613366, "learning_rate": 9.641778053751957e-07, "loss": 0.601677417755127, "step": 4650 }, { "epoch": 1.0721613832853025, "grad_norm": 1.390968763863657, "learning_rate": 9.6379688390104e-07, "loss": 0.559817910194397, "step": 4651 }, { "epoch": 1.0723919308357348, "grad_norm": 1.4914114417009505, "learning_rate": 9.634159676868202e-07, "loss": 0.4357626736164093, "step": 4652 }, { "epoch": 1.072622478386167, "grad_norm": 1.5444496888118033, "learning_rate": 9.630350567878804e-07, "loss": 0.4259532690048218, "step": 4653 }, { "epoch": 1.0728530259365994, "grad_norm": 1.3855450396071676, "learning_rate": 9.62654151259562e-07, "loss": 0.43510159850120544, "step": 4654 }, { "epoch": 1.0730835734870317, "grad_norm": 1.3142166864023803, "learning_rate": 9.622732511572065e-07, "loss": 0.47992539405822754, "step": 4655 }, { "epoch": 1.073314121037464, "grad_norm": 1.563639153951261, "learning_rate": 9.618923565361558e-07, "loss": 0.5122553110122681, "step": 4656 }, { "epoch": 1.0735446685878962, "grad_norm": 1.3477880780884035, "learning_rate": 9.615114674517492e-07, "loss": 0.4490904211997986, "step": 4657 }, { "epoch": 1.0737752161383285, "grad_norm": 1.6020560200523055, "learning_rate": 9.611305839593259e-07, "loss": 0.6317769885063171, "step": 4658 }, { "epoch": 1.0740057636887608, "grad_norm": 1.4896746014843723, "learning_rate": 9.60749706114224e-07, "loss": 0.6265285611152649, "step": 4659 }, { "epoch": 1.074236311239193, "grad_norm": 1.501646883657502, "learning_rate": 9.603688339717817e-07, "loss": 0.4698525369167328, "step": 4660 }, { "epoch": 1.0744668587896253, "grad_norm": 1.5908829607086772, "learning_rate": 9.599879675873358e-07, "loss": 0.49619418382644653, "step": 4661 }, { "epoch": 1.0746974063400576, "grad_norm": 1.323354232277254, "learning_rate": 9.596071070162217e-07, "loss": 0.46166038513183594, "step": 4662 }, { "epoch": 1.07492795389049, "grad_norm": 1.5115372830059979, "learning_rate": 9.592262523137747e-07, "loss": 0.44326937198638916, "step": 4663 }, { "epoch": 1.0751585014409222, "grad_norm": 1.4132443514252715, "learning_rate": 9.588454035353296e-07, "loss": 0.49652424454689026, "step": 4664 }, { "epoch": 1.0753890489913545, "grad_norm": 1.4411262259507003, "learning_rate": 9.584645607362186e-07, "loss": 0.42872053384780884, "step": 4665 }, { "epoch": 1.0756195965417867, "grad_norm": 1.3788553083650343, "learning_rate": 9.580837239717754e-07, "loss": 0.4546404480934143, "step": 4666 }, { "epoch": 1.075850144092219, "grad_norm": 1.6310927881369055, "learning_rate": 9.577028932973308e-07, "loss": 0.4827216863632202, "step": 4667 }, { "epoch": 1.0760806916426513, "grad_norm": 1.4044809082874001, "learning_rate": 9.573220687682154e-07, "loss": 0.48063087463378906, "step": 4668 }, { "epoch": 1.0763112391930836, "grad_norm": 2.3285506480260283, "learning_rate": 9.5694125043976e-07, "loss": 0.4597262740135193, "step": 4669 }, { "epoch": 1.0765417867435159, "grad_norm": 1.376042518158115, "learning_rate": 9.565604383672927e-07, "loss": 0.4230908751487732, "step": 4670 }, { "epoch": 1.0767723342939481, "grad_norm": 1.6404181287671118, "learning_rate": 9.561796326061417e-07, "loss": 0.41329729557037354, "step": 4671 }, { "epoch": 1.0770028818443804, "grad_norm": 1.336192639897515, "learning_rate": 9.55798833211635e-07, "loss": 0.36296525597572327, "step": 4672 }, { "epoch": 1.0772334293948127, "grad_norm": 1.4646789281240495, "learning_rate": 9.55418040239097e-07, "loss": 0.4914727807044983, "step": 4673 }, { "epoch": 1.077463976945245, "grad_norm": 1.390877382033457, "learning_rate": 9.550372537438547e-07, "loss": 0.4246331453323364, "step": 4674 }, { "epoch": 1.0776945244956773, "grad_norm": 1.4081852205814585, "learning_rate": 9.546564737812313e-07, "loss": 0.416849821805954, "step": 4675 }, { "epoch": 1.0779250720461095, "grad_norm": 1.4292647003721168, "learning_rate": 9.542757004065505e-07, "loss": 0.4291315972805023, "step": 4676 }, { "epoch": 1.0781556195965418, "grad_norm": 1.6415771810836237, "learning_rate": 9.538949336751353e-07, "loss": 0.403107225894928, "step": 4677 }, { "epoch": 1.078386167146974, "grad_norm": 1.4119962131192818, "learning_rate": 9.535141736423062e-07, "loss": 0.4733341932296753, "step": 4678 }, { "epoch": 1.0786167146974064, "grad_norm": 1.7859752811554186, "learning_rate": 9.531334203633838e-07, "loss": 0.588297963142395, "step": 4679 }, { "epoch": 1.0788472622478387, "grad_norm": 1.8273419442887104, "learning_rate": 9.527526738936885e-07, "loss": 0.544661283493042, "step": 4680 }, { "epoch": 1.079077809798271, "grad_norm": 1.4494540560127676, "learning_rate": 9.523719342885379e-07, "loss": 0.5208151340484619, "step": 4681 }, { "epoch": 1.0793083573487032, "grad_norm": 1.8810490080535522, "learning_rate": 9.5199120160325e-07, "loss": 0.5054324865341187, "step": 4682 }, { "epoch": 1.0795389048991355, "grad_norm": 1.5509547417738327, "learning_rate": 9.516104758931406e-07, "loss": 0.5247195959091187, "step": 4683 }, { "epoch": 1.0797694524495678, "grad_norm": 1.7616045237495106, "learning_rate": 9.512297572135259e-07, "loss": 0.4715186655521393, "step": 4684 }, { "epoch": 1.08, "grad_norm": 1.376815174594021, "learning_rate": 9.508490456197203e-07, "loss": 0.4610804319381714, "step": 4685 }, { "epoch": 1.0802305475504324, "grad_norm": 1.501280260470384, "learning_rate": 9.504683411670368e-07, "loss": 0.39732515811920166, "step": 4686 }, { "epoch": 1.0804610951008646, "grad_norm": 1.3857848874896455, "learning_rate": 9.500876439107881e-07, "loss": 0.508002758026123, "step": 4687 }, { "epoch": 1.080691642651297, "grad_norm": 1.3410811454931406, "learning_rate": 9.497069539062859e-07, "loss": 0.46706700325012207, "step": 4688 }, { "epoch": 1.0809221902017292, "grad_norm": 1.6751530232611822, "learning_rate": 9.493262712088395e-07, "loss": 0.5082959532737732, "step": 4689 }, { "epoch": 1.0811527377521615, "grad_norm": 1.4221868188021594, "learning_rate": 9.489455958737593e-07, "loss": 0.45620518922805786, "step": 4690 }, { "epoch": 1.0813832853025938, "grad_norm": 1.8105569017956895, "learning_rate": 9.485649279563527e-07, "loss": 0.5285652279853821, "step": 4691 }, { "epoch": 1.081613832853026, "grad_norm": 1.1979265453913894, "learning_rate": 9.481842675119267e-07, "loss": 0.3795422315597534, "step": 4692 }, { "epoch": 1.0818443804034583, "grad_norm": 1.662830387193791, "learning_rate": 9.478036145957881e-07, "loss": 0.42700281739234924, "step": 4693 }, { "epoch": 1.0820749279538906, "grad_norm": 1.6070968941325912, "learning_rate": 9.47422969263241e-07, "loss": 0.43620502948760986, "step": 4694 }, { "epoch": 1.0823054755043229, "grad_norm": 1.3925806824632918, "learning_rate": 9.470423315695899e-07, "loss": 0.501217782497406, "step": 4695 }, { "epoch": 1.0825360230547552, "grad_norm": 1.339834807838682, "learning_rate": 9.466617015701366e-07, "loss": 0.4491414427757263, "step": 4696 }, { "epoch": 1.0827665706051872, "grad_norm": 1.4479543684791027, "learning_rate": 9.462810793201833e-07, "loss": 0.4844500720500946, "step": 4697 }, { "epoch": 1.0829971181556195, "grad_norm": 1.9044814000505415, "learning_rate": 9.459004648750307e-07, "loss": 0.47131961584091187, "step": 4698 }, { "epoch": 1.0832276657060518, "grad_norm": 1.603910730442911, "learning_rate": 9.455198582899773e-07, "loss": 0.5120211839675903, "step": 4699 }, { "epoch": 1.083458213256484, "grad_norm": 1.3551244603653407, "learning_rate": 9.451392596203219e-07, "loss": 0.46080902218818665, "step": 4700 }, { "epoch": 1.0836887608069163, "grad_norm": 1.6646455172457901, "learning_rate": 9.447586689213617e-07, "loss": 0.46871304512023926, "step": 4701 }, { "epoch": 1.0839193083573486, "grad_norm": 1.4595951361347437, "learning_rate": 9.443780862483918e-07, "loss": 0.5012568235397339, "step": 4702 }, { "epoch": 1.084149855907781, "grad_norm": 1.4508081611111274, "learning_rate": 9.439975116567077e-07, "loss": 0.4377376437187195, "step": 4703 }, { "epoch": 1.0843804034582132, "grad_norm": 1.3718342044857916, "learning_rate": 9.436169452016024e-07, "loss": 0.44364726543426514, "step": 4704 }, { "epoch": 1.0846109510086455, "grad_norm": 1.619060845934508, "learning_rate": 9.432363869383682e-07, "loss": 0.4641938805580139, "step": 4705 }, { "epoch": 1.0848414985590777, "grad_norm": 1.7336364607563637, "learning_rate": 9.428558369222969e-07, "loss": 0.5737602710723877, "step": 4706 }, { "epoch": 1.08507204610951, "grad_norm": 1.7107358159172448, "learning_rate": 9.424752952086777e-07, "loss": 0.5098488330841064, "step": 4707 }, { "epoch": 1.0853025936599423, "grad_norm": 1.2019465668810627, "learning_rate": 9.420947618527997e-07, "loss": 0.35696643590927124, "step": 4708 }, { "epoch": 1.0855331412103746, "grad_norm": 1.4669534605111085, "learning_rate": 9.417142369099507e-07, "loss": 0.4182433485984802, "step": 4709 }, { "epoch": 1.0857636887608069, "grad_norm": 1.4801438236679534, "learning_rate": 9.413337204354166e-07, "loss": 0.4930199980735779, "step": 4710 }, { "epoch": 1.0859942363112391, "grad_norm": 1.4113635474707797, "learning_rate": 9.409532124844828e-07, "loss": 0.49246686697006226, "step": 4711 }, { "epoch": 1.0862247838616714, "grad_norm": 1.7711418276471607, "learning_rate": 9.405727131124326e-07, "loss": 0.4412927031517029, "step": 4712 }, { "epoch": 1.0864553314121037, "grad_norm": 1.4350333443942689, "learning_rate": 9.401922223745491e-07, "loss": 0.42520982027053833, "step": 4713 }, { "epoch": 1.086685878962536, "grad_norm": 1.6745037730870926, "learning_rate": 9.398117403261138e-07, "loss": 0.5159086585044861, "step": 4714 }, { "epoch": 1.0869164265129683, "grad_norm": 1.320729871652049, "learning_rate": 9.39431267022406e-07, "loss": 0.5348051190376282, "step": 4715 }, { "epoch": 1.0871469740634006, "grad_norm": 1.4807752036978776, "learning_rate": 9.390508025187054e-07, "loss": 0.5200084447860718, "step": 4716 }, { "epoch": 1.0873775216138328, "grad_norm": 1.7351487137422408, "learning_rate": 9.386703468702892e-07, "loss": 0.5356011390686035, "step": 4717 }, { "epoch": 1.0876080691642651, "grad_norm": 1.4606974960715933, "learning_rate": 9.382899001324334e-07, "loss": 0.49807971715927124, "step": 4718 }, { "epoch": 1.0878386167146974, "grad_norm": 1.4204133831852221, "learning_rate": 9.379094623604133e-07, "loss": 0.4936211407184601, "step": 4719 }, { "epoch": 1.0880691642651297, "grad_norm": 1.472033735820111, "learning_rate": 9.375290336095023e-07, "loss": 0.43047142028808594, "step": 4720 }, { "epoch": 1.088299711815562, "grad_norm": 1.6521573876351554, "learning_rate": 9.371486139349727e-07, "loss": 0.5489988327026367, "step": 4721 }, { "epoch": 1.0885302593659942, "grad_norm": 1.4462296906896397, "learning_rate": 9.36768203392096e-07, "loss": 0.3681810796260834, "step": 4722 }, { "epoch": 1.0887608069164265, "grad_norm": 1.6193248142724037, "learning_rate": 9.363878020361415e-07, "loss": 0.4829370677471161, "step": 4723 }, { "epoch": 1.0889913544668588, "grad_norm": 1.4582135448230624, "learning_rate": 9.360074099223772e-07, "loss": 0.51734459400177, "step": 4724 }, { "epoch": 1.089221902017291, "grad_norm": 1.6653806071052673, "learning_rate": 9.35627027106071e-07, "loss": 0.47395747900009155, "step": 4725 }, { "epoch": 1.0894524495677234, "grad_norm": 1.682967664113776, "learning_rate": 9.35246653642488e-07, "loss": 0.474406361579895, "step": 4726 }, { "epoch": 1.0896829971181556, "grad_norm": 1.7582029988340027, "learning_rate": 9.348662895868928e-07, "loss": 0.4683076739311218, "step": 4727 }, { "epoch": 1.089913544668588, "grad_norm": 1.3542305902132215, "learning_rate": 9.344859349945477e-07, "loss": 0.3666248917579651, "step": 4728 }, { "epoch": 1.0901440922190202, "grad_norm": 1.4958744858069852, "learning_rate": 9.341055899207149e-07, "loss": 0.42460423707962036, "step": 4729 }, { "epoch": 1.0903746397694525, "grad_norm": 1.2929965233797245, "learning_rate": 9.337252544206545e-07, "loss": 0.47737449407577515, "step": 4730 }, { "epoch": 1.0906051873198848, "grad_norm": 1.7759362318926004, "learning_rate": 9.33344928549625e-07, "loss": 0.48160216212272644, "step": 4731 }, { "epoch": 1.090835734870317, "grad_norm": 1.8201639638551033, "learning_rate": 9.32964612362884e-07, "loss": 0.4271983504295349, "step": 4732 }, { "epoch": 1.0910662824207493, "grad_norm": 1.5964518411792343, "learning_rate": 9.325843059156878e-07, "loss": 0.49104204773902893, "step": 4733 }, { "epoch": 1.0912968299711816, "grad_norm": 1.8237737626424653, "learning_rate": 9.322040092632901e-07, "loss": 0.4309648275375366, "step": 4734 }, { "epoch": 1.0915273775216139, "grad_norm": 1.6425169957458328, "learning_rate": 9.318237224609452e-07, "loss": 0.4608195424079895, "step": 4735 }, { "epoch": 1.0917579250720462, "grad_norm": 1.4917232104667926, "learning_rate": 9.314434455639039e-07, "loss": 0.47512125968933105, "step": 4736 }, { "epoch": 1.0919884726224784, "grad_norm": 1.3580334365102393, "learning_rate": 9.310631786274166e-07, "loss": 0.43002909421920776, "step": 4737 }, { "epoch": 1.0922190201729107, "grad_norm": 1.4761686095814857, "learning_rate": 9.306829217067326e-07, "loss": 0.4949305057525635, "step": 4738 }, { "epoch": 1.092449567723343, "grad_norm": 1.7661439414574969, "learning_rate": 9.30302674857099e-07, "loss": 0.5113263726234436, "step": 4739 }, { "epoch": 1.0926801152737753, "grad_norm": 1.4223065996038133, "learning_rate": 9.299224381337616e-07, "loss": 0.4186338186264038, "step": 4740 }, { "epoch": 1.0929106628242076, "grad_norm": 1.3736249275403172, "learning_rate": 9.295422115919646e-07, "loss": 0.4391184151172638, "step": 4741 }, { "epoch": 1.0931412103746398, "grad_norm": 1.6001208967535951, "learning_rate": 9.291619952869517e-07, "loss": 0.5554238557815552, "step": 4742 }, { "epoch": 1.0933717579250721, "grad_norm": 1.4372296033393044, "learning_rate": 9.287817892739641e-07, "loss": 0.5122871398925781, "step": 4743 }, { "epoch": 1.0936023054755044, "grad_norm": 1.481435629659421, "learning_rate": 9.284015936082413e-07, "loss": 0.5083051919937134, "step": 4744 }, { "epoch": 1.0938328530259367, "grad_norm": 1.3561535598473955, "learning_rate": 9.28021408345022e-07, "loss": 0.5710517168045044, "step": 4745 }, { "epoch": 1.094063400576369, "grad_norm": 1.1918247091292848, "learning_rate": 9.276412335395438e-07, "loss": 0.35339492559432983, "step": 4746 }, { "epoch": 1.0942939481268013, "grad_norm": 1.5524968954607061, "learning_rate": 9.272610692470412e-07, "loss": 0.44773539900779724, "step": 4747 }, { "epoch": 1.0945244956772335, "grad_norm": 1.5477488686398166, "learning_rate": 9.268809155227489e-07, "loss": 0.47520720958709717, "step": 4748 }, { "epoch": 1.0947550432276656, "grad_norm": 1.4714161216323214, "learning_rate": 9.265007724218987e-07, "loss": 0.44710588455200195, "step": 4749 }, { "epoch": 1.0949855907780979, "grad_norm": 2.110410170202345, "learning_rate": 9.261206399997216e-07, "loss": 0.49495795369148254, "step": 4750 }, { "epoch": 1.0952161383285302, "grad_norm": 1.5374348664848534, "learning_rate": 9.257405183114472e-07, "loss": 0.4427485167980194, "step": 4751 }, { "epoch": 1.0954466858789624, "grad_norm": 1.6718709382696622, "learning_rate": 9.253604074123029e-07, "loss": 0.5643877983093262, "step": 4752 }, { "epoch": 1.0956772334293947, "grad_norm": 1.2647633162037908, "learning_rate": 9.249803073575147e-07, "loss": 0.37769174575805664, "step": 4753 }, { "epoch": 1.095907780979827, "grad_norm": 1.3828941981233933, "learning_rate": 9.246002182023078e-07, "loss": 0.4493235945701599, "step": 4754 }, { "epoch": 1.0961383285302593, "grad_norm": 1.4529605693419783, "learning_rate": 9.242201400019046e-07, "loss": 0.38867485523223877, "step": 4755 }, { "epoch": 1.0963688760806916, "grad_norm": 1.3977757435336773, "learning_rate": 9.23840072811527e-07, "loss": 0.3969729244709015, "step": 4756 }, { "epoch": 1.0965994236311238, "grad_norm": 1.5812573695205239, "learning_rate": 9.234600166863941e-07, "loss": 0.5222212076187134, "step": 4757 }, { "epoch": 1.0968299711815561, "grad_norm": 1.5818434035233087, "learning_rate": 9.230799716817248e-07, "loss": 0.42709431052207947, "step": 4758 }, { "epoch": 1.0970605187319884, "grad_norm": 1.587224691957781, "learning_rate": 9.226999378527356e-07, "loss": 0.4916965365409851, "step": 4759 }, { "epoch": 1.0972910662824207, "grad_norm": 1.3771120349564459, "learning_rate": 9.223199152546409e-07, "loss": 0.4756208062171936, "step": 4760 }, { "epoch": 1.097521613832853, "grad_norm": 1.3764453858571746, "learning_rate": 9.219399039426546e-07, "loss": 0.45373475551605225, "step": 4761 }, { "epoch": 1.0977521613832852, "grad_norm": 1.5172679641639661, "learning_rate": 9.215599039719884e-07, "loss": 0.45572659373283386, "step": 4762 }, { "epoch": 1.0979827089337175, "grad_norm": 1.44738634077843, "learning_rate": 9.211799153978519e-07, "loss": 0.5436868071556091, "step": 4763 }, { "epoch": 1.0982132564841498, "grad_norm": 1.6883255773599282, "learning_rate": 9.20799938275454e-07, "loss": 0.5677649974822998, "step": 4764 }, { "epoch": 1.098443804034582, "grad_norm": 1.4487804074611155, "learning_rate": 9.20419972660001e-07, "loss": 0.44892898201942444, "step": 4765 }, { "epoch": 1.0986743515850144, "grad_norm": 1.4841544259994688, "learning_rate": 9.200400186066979e-07, "loss": 0.4553558826446533, "step": 4766 }, { "epoch": 1.0989048991354466, "grad_norm": 1.7722170802145676, "learning_rate": 9.196600761707487e-07, "loss": 0.4699048697948456, "step": 4767 }, { "epoch": 1.099135446685879, "grad_norm": 1.4749195909576496, "learning_rate": 9.192801454073546e-07, "loss": 0.4807226359844208, "step": 4768 }, { "epoch": 1.0993659942363112, "grad_norm": 1.5160658010929253, "learning_rate": 9.189002263717153e-07, "loss": 0.4812129735946655, "step": 4769 }, { "epoch": 1.0995965417867435, "grad_norm": 1.622674615504792, "learning_rate": 9.185203191190298e-07, "loss": 0.5983732342720032, "step": 4770 }, { "epoch": 1.0998270893371758, "grad_norm": 1.3486018335986438, "learning_rate": 9.181404237044943e-07, "loss": 0.490646094083786, "step": 4771 }, { "epoch": 1.100057636887608, "grad_norm": 1.4440200781704027, "learning_rate": 9.177605401833037e-07, "loss": 0.38754355907440186, "step": 4772 }, { "epoch": 1.1002881844380403, "grad_norm": 1.6409742404681038, "learning_rate": 9.173806686106508e-07, "loss": 0.4629727005958557, "step": 4773 }, { "epoch": 1.1005187319884726, "grad_norm": 1.7575318830561308, "learning_rate": 9.170008090417274e-07, "loss": 0.6194955110549927, "step": 4774 }, { "epoch": 1.100749279538905, "grad_norm": 1.5942308214770384, "learning_rate": 9.166209615317233e-07, "loss": 0.49309587478637695, "step": 4775 }, { "epoch": 1.1009798270893372, "grad_norm": 1.5728528642226745, "learning_rate": 9.162411261358256e-07, "loss": 0.5451973080635071, "step": 4776 }, { "epoch": 1.1012103746397695, "grad_norm": 1.5387601694748296, "learning_rate": 9.158613029092213e-07, "loss": 0.48952755331993103, "step": 4777 }, { "epoch": 1.1014409221902017, "grad_norm": 1.5407814625276188, "learning_rate": 9.154814919070945e-07, "loss": 0.44005918502807617, "step": 4778 }, { "epoch": 1.101671469740634, "grad_norm": 2.0586763099102945, "learning_rate": 9.151016931846274e-07, "loss": 0.590754508972168, "step": 4779 }, { "epoch": 1.1019020172910663, "grad_norm": 1.703751089767445, "learning_rate": 9.147219067970015e-07, "loss": 0.4286467432975769, "step": 4780 }, { "epoch": 1.1021325648414986, "grad_norm": 1.3155675663714783, "learning_rate": 9.143421327993951e-07, "loss": 0.5757718086242676, "step": 4781 }, { "epoch": 1.1023631123919309, "grad_norm": 1.5555439515197538, "learning_rate": 9.139623712469855e-07, "loss": 0.4068324565887451, "step": 4782 }, { "epoch": 1.1025936599423631, "grad_norm": 1.3476061460464042, "learning_rate": 9.13582622194949e-07, "loss": 0.47467997670173645, "step": 4783 }, { "epoch": 1.1028242074927954, "grad_norm": 1.58616176312586, "learning_rate": 9.13202885698458e-07, "loss": 0.4873571991920471, "step": 4784 }, { "epoch": 1.1030547550432277, "grad_norm": 1.6677849857086358, "learning_rate": 9.12823161812685e-07, "loss": 0.6014094352722168, "step": 4785 }, { "epoch": 1.10328530259366, "grad_norm": 1.2865551378829043, "learning_rate": 9.124434505927996e-07, "loss": 0.3454878330230713, "step": 4786 }, { "epoch": 1.1035158501440923, "grad_norm": 1.57020124413607, "learning_rate": 9.120637520939698e-07, "loss": 0.5097141265869141, "step": 4787 }, { "epoch": 1.1037463976945245, "grad_norm": 1.289337143667391, "learning_rate": 9.116840663713624e-07, "loss": 0.48765894770622253, "step": 4788 }, { "epoch": 1.1039769452449568, "grad_norm": 1.2994448528986111, "learning_rate": 9.113043934801412e-07, "loss": 0.4968012571334839, "step": 4789 }, { "epoch": 1.104207492795389, "grad_norm": 1.3673390492212412, "learning_rate": 9.109247334754688e-07, "loss": 0.38764679431915283, "step": 4790 }, { "epoch": 1.1044380403458214, "grad_norm": 1.489042273618506, "learning_rate": 9.105450864125064e-07, "loss": 0.4387054145336151, "step": 4791 }, { "epoch": 1.1046685878962537, "grad_norm": 1.3415444855993421, "learning_rate": 9.101654523464121e-07, "loss": 0.4431966543197632, "step": 4792 }, { "epoch": 1.104899135446686, "grad_norm": 1.5309861879191928, "learning_rate": 9.097858313323434e-07, "loss": 0.5146256685256958, "step": 4793 }, { "epoch": 1.1051296829971182, "grad_norm": 1.3518960528695185, "learning_rate": 9.094062234254543e-07, "loss": 0.4577777683734894, "step": 4794 }, { "epoch": 1.1053602305475505, "grad_norm": 1.7110129497463176, "learning_rate": 9.09026628680899e-07, "loss": 0.48272567987442017, "step": 4795 }, { "epoch": 1.1055907780979828, "grad_norm": 1.785422212510911, "learning_rate": 9.086470471538281e-07, "loss": 0.4567776024341583, "step": 4796 }, { "epoch": 1.105821325648415, "grad_norm": 2.043723899772794, "learning_rate": 9.082674788993907e-07, "loss": 0.5596228837966919, "step": 4797 }, { "epoch": 1.1060518731988473, "grad_norm": 1.6819939360850304, "learning_rate": 9.078879239727344e-07, "loss": 0.5048364400863647, "step": 4798 }, { "epoch": 1.1062824207492796, "grad_norm": 1.700251984933967, "learning_rate": 9.075083824290048e-07, "loss": 0.5421375036239624, "step": 4799 }, { "epoch": 1.106512968299712, "grad_norm": 1.4133431513066128, "learning_rate": 9.071288543233446e-07, "loss": 0.377308189868927, "step": 4800 }, { "epoch": 1.1067435158501442, "grad_norm": 1.605698503728756, "learning_rate": 9.067493397108963e-07, "loss": 0.43329721689224243, "step": 4801 }, { "epoch": 1.1069740634005765, "grad_norm": 1.7362921748159275, "learning_rate": 9.063698386467986e-07, "loss": 0.4355202913284302, "step": 4802 }, { "epoch": 1.1072046109510087, "grad_norm": 1.4551532252708075, "learning_rate": 9.059903511861891e-07, "loss": 0.46485209465026855, "step": 4803 }, { "epoch": 1.107435158501441, "grad_norm": 1.3716674949679923, "learning_rate": 9.056108773842039e-07, "loss": 0.4985603094100952, "step": 4804 }, { "epoch": 1.1076657060518733, "grad_norm": 1.5586874286041357, "learning_rate": 9.052314172959763e-07, "loss": 0.5287209749221802, "step": 4805 }, { "epoch": 1.1078962536023056, "grad_norm": 1.680660088067333, "learning_rate": 9.048519709766375e-07, "loss": 0.5115060806274414, "step": 4806 }, { "epoch": 1.1081268011527377, "grad_norm": 1.4253147157574297, "learning_rate": 9.044725384813181e-07, "loss": 0.5016480088233948, "step": 4807 }, { "epoch": 1.10835734870317, "grad_norm": 1.623786045853983, "learning_rate": 9.040931198651449e-07, "loss": 0.4842113256454468, "step": 4808 }, { "epoch": 1.1085878962536022, "grad_norm": 1.7318094433488047, "learning_rate": 9.037137151832439e-07, "loss": 0.5217285752296448, "step": 4809 }, { "epoch": 1.1088184438040345, "grad_norm": 1.5956576132022964, "learning_rate": 9.033343244907378e-07, "loss": 0.5034878253936768, "step": 4810 }, { "epoch": 1.1090489913544668, "grad_norm": 1.450218297985674, "learning_rate": 9.029549478427492e-07, "loss": 0.5122587084770203, "step": 4811 }, { "epoch": 1.109279538904899, "grad_norm": 1.5471365323068564, "learning_rate": 9.025755852943972e-07, "loss": 0.503132164478302, "step": 4812 }, { "epoch": 1.1095100864553313, "grad_norm": 1.3442228745978584, "learning_rate": 9.021962369007989e-07, "loss": 0.42048484086990356, "step": 4813 }, { "epoch": 1.1097406340057636, "grad_norm": 1.4746279418354657, "learning_rate": 9.018169027170701e-07, "loss": 0.4284360706806183, "step": 4814 }, { "epoch": 1.109971181556196, "grad_norm": 1.5301705140832524, "learning_rate": 9.014375827983241e-07, "loss": 0.5939760208129883, "step": 4815 }, { "epoch": 1.1102017291066282, "grad_norm": 1.4889436613914528, "learning_rate": 9.010582771996716e-07, "loss": 0.4964909553527832, "step": 4816 }, { "epoch": 1.1104322766570605, "grad_norm": 1.5842307031357417, "learning_rate": 9.006789859762227e-07, "loss": 0.4841457009315491, "step": 4817 }, { "epoch": 1.1106628242074927, "grad_norm": 1.8305702695417665, "learning_rate": 9.002997091830836e-07, "loss": 0.5084402561187744, "step": 4818 }, { "epoch": 1.110893371757925, "grad_norm": 1.4377869688891702, "learning_rate": 8.999204468753594e-07, "loss": 0.4945530295372009, "step": 4819 }, { "epoch": 1.1111239193083573, "grad_norm": 1.424196476465804, "learning_rate": 8.995411991081535e-07, "loss": 0.45646482706069946, "step": 4820 }, { "epoch": 1.1113544668587896, "grad_norm": 1.8000869076178088, "learning_rate": 8.991619659365662e-07, "loss": 0.4761883020401001, "step": 4821 }, { "epoch": 1.1115850144092219, "grad_norm": 1.4244112912910287, "learning_rate": 8.987827474156965e-07, "loss": 0.37377166748046875, "step": 4822 }, { "epoch": 1.1118155619596541, "grad_norm": 1.42162278725734, "learning_rate": 8.984035436006402e-07, "loss": 0.46969032287597656, "step": 4823 }, { "epoch": 1.1120461095100864, "grad_norm": 1.4551702613987585, "learning_rate": 8.980243545464923e-07, "loss": 0.4871477484703064, "step": 4824 }, { "epoch": 1.1122766570605187, "grad_norm": 1.3735159399298857, "learning_rate": 8.976451803083452e-07, "loss": 0.5354928970336914, "step": 4825 }, { "epoch": 1.112507204610951, "grad_norm": 1.487364565462764, "learning_rate": 8.972660209412879e-07, "loss": 0.541530966758728, "step": 4826 }, { "epoch": 1.1127377521613833, "grad_norm": 1.3765304830204101, "learning_rate": 8.968868765004095e-07, "loss": 0.5282139778137207, "step": 4827 }, { "epoch": 1.1129682997118155, "grad_norm": 1.5188179873424779, "learning_rate": 8.965077470407955e-07, "loss": 0.43828681111335754, "step": 4828 }, { "epoch": 1.1131988472622478, "grad_norm": 1.540567571058341, "learning_rate": 8.961286326175287e-07, "loss": 0.528573751449585, "step": 4829 }, { "epoch": 1.11342939481268, "grad_norm": 2.012088539323808, "learning_rate": 8.957495332856915e-07, "loss": 0.5125828385353088, "step": 4830 }, { "epoch": 1.1136599423631124, "grad_norm": 1.7582058630443533, "learning_rate": 8.953704491003624e-07, "loss": 0.659877359867096, "step": 4831 }, { "epoch": 1.1138904899135447, "grad_norm": 1.6814587419722573, "learning_rate": 8.949913801166183e-07, "loss": 0.4152098298072815, "step": 4832 }, { "epoch": 1.114121037463977, "grad_norm": 1.4999210133226772, "learning_rate": 8.946123263895349e-07, "loss": 0.4220502972602844, "step": 4833 }, { "epoch": 1.1143515850144092, "grad_norm": 1.9244731687036327, "learning_rate": 8.942332879741838e-07, "loss": 0.5434603691101074, "step": 4834 }, { "epoch": 1.1145821325648415, "grad_norm": 1.4746482954312572, "learning_rate": 8.938542649256355e-07, "loss": 0.5064136981964111, "step": 4835 }, { "epoch": 1.1148126801152738, "grad_norm": 1.5136764860349439, "learning_rate": 8.934752572989588e-07, "loss": 0.4972747266292572, "step": 4836 }, { "epoch": 1.115043227665706, "grad_norm": 1.5111229695700725, "learning_rate": 8.930962651492188e-07, "loss": 0.4222022294998169, "step": 4837 }, { "epoch": 1.1152737752161384, "grad_norm": 1.66913696458909, "learning_rate": 8.927172885314796e-07, "loss": 0.4139519929885864, "step": 4838 }, { "epoch": 1.1155043227665706, "grad_norm": 1.7770405386437609, "learning_rate": 8.923383275008018e-07, "loss": 0.6009548306465149, "step": 4839 }, { "epoch": 1.115734870317003, "grad_norm": 1.4282707283397387, "learning_rate": 8.919593821122455e-07, "loss": 0.5030194520950317, "step": 4840 }, { "epoch": 1.1159654178674352, "grad_norm": 1.3591409790995543, "learning_rate": 8.915804524208669e-07, "loss": 0.5004386901855469, "step": 4841 }, { "epoch": 1.1161959654178675, "grad_norm": 1.7972328125538324, "learning_rate": 8.912015384817206e-07, "loss": 0.39989084005355835, "step": 4842 }, { "epoch": 1.1164265129682998, "grad_norm": 1.6078188458230296, "learning_rate": 8.90822640349859e-07, "loss": 0.46599575877189636, "step": 4843 }, { "epoch": 1.116657060518732, "grad_norm": 1.5910400636453796, "learning_rate": 8.904437580803322e-07, "loss": 0.48841774463653564, "step": 4844 }, { "epoch": 1.1168876080691643, "grad_norm": 1.4794610309861063, "learning_rate": 8.900648917281873e-07, "loss": 0.4746759831905365, "step": 4845 }, { "epoch": 1.1171181556195966, "grad_norm": 2.0991510052575095, "learning_rate": 8.896860413484705e-07, "loss": 0.5819174647331238, "step": 4846 }, { "epoch": 1.1173487031700289, "grad_norm": 1.3802001580093008, "learning_rate": 8.893072069962239e-07, "loss": 0.4410976469516754, "step": 4847 }, { "epoch": 1.1175792507204612, "grad_norm": 1.3344798381915968, "learning_rate": 8.889283887264887e-07, "loss": 0.43819811940193176, "step": 4848 }, { "epoch": 1.1178097982708934, "grad_norm": 1.3486885144083764, "learning_rate": 8.885495865943033e-07, "loss": 0.43866032361984253, "step": 4849 }, { "epoch": 1.1180403458213257, "grad_norm": 1.5267759422148517, "learning_rate": 8.881708006547035e-07, "loss": 0.44860726594924927, "step": 4850 }, { "epoch": 1.118270893371758, "grad_norm": 1.4715352178336798, "learning_rate": 8.877920309627228e-07, "loss": 0.579569935798645, "step": 4851 }, { "epoch": 1.1185014409221903, "grad_norm": 1.6155598521149455, "learning_rate": 8.874132775733931e-07, "loss": 0.41236403584480286, "step": 4852 }, { "epoch": 1.1187319884726226, "grad_norm": 1.549256572240456, "learning_rate": 8.870345405417428e-07, "loss": 0.45414695143699646, "step": 4853 }, { "epoch": 1.1189625360230548, "grad_norm": 1.6050881880917633, "learning_rate": 8.866558199227988e-07, "loss": 0.44578975439071655, "step": 4854 }, { "epoch": 1.1191930835734871, "grad_norm": 1.4571011661113775, "learning_rate": 8.862771157715846e-07, "loss": 0.42466121912002563, "step": 4855 }, { "epoch": 1.1194236311239194, "grad_norm": 1.6079203038736904, "learning_rate": 8.858984281431228e-07, "loss": 0.5185168981552124, "step": 4856 }, { "epoch": 1.1196541786743517, "grad_norm": 1.7110922940542974, "learning_rate": 8.855197570924324e-07, "loss": 0.4561188817024231, "step": 4857 }, { "epoch": 1.1198847262247837, "grad_norm": 1.5555744196993486, "learning_rate": 8.851411026745302e-07, "loss": 0.4159931540489197, "step": 4858 }, { "epoch": 1.120115273775216, "grad_norm": 1.5248463429149375, "learning_rate": 8.847624649444309e-07, "loss": 0.5513845682144165, "step": 4859 }, { "epoch": 1.1203458213256483, "grad_norm": 1.4270693773942695, "learning_rate": 8.84383843957147e-07, "loss": 0.4656720757484436, "step": 4860 }, { "epoch": 1.1205763688760806, "grad_norm": 1.382817028392081, "learning_rate": 8.840052397676873e-07, "loss": 0.5290340781211853, "step": 4861 }, { "epoch": 1.1208069164265129, "grad_norm": 1.727021963566853, "learning_rate": 8.836266524310603e-07, "loss": 0.4851052165031433, "step": 4862 }, { "epoch": 1.1210374639769451, "grad_norm": 1.435439662651569, "learning_rate": 8.832480820022696e-07, "loss": 0.45340481400489807, "step": 4863 }, { "epoch": 1.1212680115273774, "grad_norm": 1.7016168632340074, "learning_rate": 8.828695285363179e-07, "loss": 0.5824764966964722, "step": 4864 }, { "epoch": 1.1214985590778097, "grad_norm": 1.4809494332211275, "learning_rate": 8.824909920882056e-07, "loss": 0.4200345277786255, "step": 4865 }, { "epoch": 1.121729106628242, "grad_norm": 1.7782815491554205, "learning_rate": 8.821124727129297e-07, "loss": 0.4244277775287628, "step": 4866 }, { "epoch": 1.1219596541786743, "grad_norm": 1.3618326629672723, "learning_rate": 8.817339704654852e-07, "loss": 0.43537044525146484, "step": 4867 }, { "epoch": 1.1221902017291066, "grad_norm": 1.8260743774909134, "learning_rate": 8.813554854008641e-07, "loss": 0.42428910732269287, "step": 4868 }, { "epoch": 1.1224207492795388, "grad_norm": 1.5288566423130419, "learning_rate": 8.80977017574057e-07, "loss": 0.47865474224090576, "step": 4869 }, { "epoch": 1.1226512968299711, "grad_norm": 1.5444061070950739, "learning_rate": 8.805985670400513e-07, "loss": 0.4466247856616974, "step": 4870 }, { "epoch": 1.1228818443804034, "grad_norm": 1.8287166421574825, "learning_rate": 8.802201338538312e-07, "loss": 0.3930908441543579, "step": 4871 }, { "epoch": 1.1231123919308357, "grad_norm": 1.8199781856573405, "learning_rate": 8.798417180703799e-07, "loss": 0.4518371522426605, "step": 4872 }, { "epoch": 1.123342939481268, "grad_norm": 1.70058941138662, "learning_rate": 8.79463319744677e-07, "loss": 0.5394526720046997, "step": 4873 }, { "epoch": 1.1235734870317002, "grad_norm": 1.575874203584041, "learning_rate": 8.790849389316997e-07, "loss": 0.4246562719345093, "step": 4874 }, { "epoch": 1.1238040345821325, "grad_norm": 1.9575281438943082, "learning_rate": 8.787065756864232e-07, "loss": 0.5607410669326782, "step": 4875 }, { "epoch": 1.1240345821325648, "grad_norm": 1.3749123465435562, "learning_rate": 8.783282300638192e-07, "loss": 0.38254958391189575, "step": 4876 }, { "epoch": 1.124265129682997, "grad_norm": 1.3613826546915397, "learning_rate": 8.779499021188574e-07, "loss": 0.44105666875839233, "step": 4877 }, { "epoch": 1.1244956772334294, "grad_norm": 1.5631401572974992, "learning_rate": 8.775715919065056e-07, "loss": 0.37160882353782654, "step": 4878 }, { "epoch": 1.1247262247838616, "grad_norm": 1.7025997410821614, "learning_rate": 8.771932994817278e-07, "loss": 0.49676722288131714, "step": 4879 }, { "epoch": 1.124956772334294, "grad_norm": 1.6298726513756532, "learning_rate": 8.768150248994858e-07, "loss": 0.5305773019790649, "step": 4880 }, { "epoch": 1.1251873198847262, "grad_norm": 1.2917934278832566, "learning_rate": 8.764367682147395e-07, "loss": 0.36669573187828064, "step": 4881 }, { "epoch": 1.1254178674351585, "grad_norm": 1.5501344216889468, "learning_rate": 8.760585294824454e-07, "loss": 0.4404510259628296, "step": 4882 }, { "epoch": 1.1256484149855908, "grad_norm": 1.390626461072737, "learning_rate": 8.756803087575578e-07, "loss": 0.4724805951118469, "step": 4883 }, { "epoch": 1.125878962536023, "grad_norm": 1.548592980840675, "learning_rate": 8.753021060950274e-07, "loss": 0.45248547196388245, "step": 4884 }, { "epoch": 1.1261095100864553, "grad_norm": 1.3484839335073744, "learning_rate": 8.749239215498043e-07, "loss": 0.4234843850135803, "step": 4885 }, { "epoch": 1.1263400576368876, "grad_norm": 1.8625422172760093, "learning_rate": 8.745457551768342e-07, "loss": 0.49027156829833984, "step": 4886 }, { "epoch": 1.1265706051873199, "grad_norm": 1.5067827985484183, "learning_rate": 8.741676070310605e-07, "loss": 0.6061224341392517, "step": 4887 }, { "epoch": 1.1268011527377522, "grad_norm": 1.5336136504090925, "learning_rate": 8.737894771674248e-07, "loss": 0.43946483731269836, "step": 4888 }, { "epoch": 1.1270317002881844, "grad_norm": 1.480981892396503, "learning_rate": 8.734113656408651e-07, "loss": 0.5716358423233032, "step": 4889 }, { "epoch": 1.1272622478386167, "grad_norm": 1.7389086828401863, "learning_rate": 8.730332725063168e-07, "loss": 0.5096737146377563, "step": 4890 }, { "epoch": 1.127492795389049, "grad_norm": 1.6664009863971434, "learning_rate": 8.726551978187138e-07, "loss": 0.38695579767227173, "step": 4891 }, { "epoch": 1.1277233429394813, "grad_norm": 1.8381615508922664, "learning_rate": 8.722771416329854e-07, "loss": 0.5407450795173645, "step": 4892 }, { "epoch": 1.1279538904899136, "grad_norm": 1.9914901771925757, "learning_rate": 8.718991040040594e-07, "loss": 0.5152771472930908, "step": 4893 }, { "epoch": 1.1281844380403458, "grad_norm": 1.5958383708352593, "learning_rate": 8.715210849868615e-07, "loss": 0.5410465598106384, "step": 4894 }, { "epoch": 1.1284149855907781, "grad_norm": 1.337702225927413, "learning_rate": 8.711430846363132e-07, "loss": 0.4508117437362671, "step": 4895 }, { "epoch": 1.1286455331412104, "grad_norm": 1.6777628929807271, "learning_rate": 8.70765103007334e-07, "loss": 0.5080430507659912, "step": 4896 }, { "epoch": 1.1288760806916427, "grad_norm": 1.7083293725933877, "learning_rate": 8.703871401548415e-07, "loss": 0.470861554145813, "step": 4897 }, { "epoch": 1.129106628242075, "grad_norm": 1.872662754797883, "learning_rate": 8.700091961337486e-07, "loss": 0.6018689274787903, "step": 4898 }, { "epoch": 1.1293371757925073, "grad_norm": 1.5500246582401473, "learning_rate": 8.696312709989677e-07, "loss": 0.5060360431671143, "step": 4899 }, { "epoch": 1.1295677233429395, "grad_norm": 1.4588678421625432, "learning_rate": 8.692533648054067e-07, "loss": 0.45208120346069336, "step": 4900 }, { "epoch": 1.1297982708933718, "grad_norm": 1.5825168488447021, "learning_rate": 8.688754776079714e-07, "loss": 0.536127507686615, "step": 4901 }, { "epoch": 1.130028818443804, "grad_norm": 1.5789528377651103, "learning_rate": 8.684976094615657e-07, "loss": 0.4760720133781433, "step": 4902 }, { "epoch": 1.1302593659942364, "grad_norm": 1.5428248897097812, "learning_rate": 8.68119760421089e-07, "loss": 0.5124382972717285, "step": 4903 }, { "epoch": 1.1304899135446687, "grad_norm": 1.3522374572244407, "learning_rate": 8.67741930541439e-07, "loss": 0.49435877799987793, "step": 4904 }, { "epoch": 1.130720461095101, "grad_norm": 1.5469811060879746, "learning_rate": 8.673641198775111e-07, "loss": 0.48838311433792114, "step": 4905 }, { "epoch": 1.1309510086455332, "grad_norm": 1.3093772355610735, "learning_rate": 8.669863284841966e-07, "loss": 0.47754883766174316, "step": 4906 }, { "epoch": 1.1311815561959655, "grad_norm": 1.657432473083644, "learning_rate": 8.666085564163851e-07, "loss": 0.49613162875175476, "step": 4907 }, { "epoch": 1.1314121037463978, "grad_norm": 1.4477928132979816, "learning_rate": 8.662308037289622e-07, "loss": 0.4798436462879181, "step": 4908 }, { "epoch": 1.13164265129683, "grad_norm": 1.7066266033391562, "learning_rate": 8.658530704768121e-07, "loss": 0.4057399034500122, "step": 4909 }, { "epoch": 1.1318731988472623, "grad_norm": 1.7346495783036942, "learning_rate": 8.654753567148157e-07, "loss": 0.5270807147026062, "step": 4910 }, { "epoch": 1.1321037463976946, "grad_norm": 1.4512953437479286, "learning_rate": 8.650976624978502e-07, "loss": 0.5122570991516113, "step": 4911 }, { "epoch": 1.132334293948127, "grad_norm": 1.7172370990813912, "learning_rate": 8.647199878807912e-07, "loss": 0.5793176889419556, "step": 4912 }, { "epoch": 1.1325648414985592, "grad_norm": 1.4271719139773258, "learning_rate": 8.643423329185104e-07, "loss": 0.48190778493881226, "step": 4913 }, { "epoch": 1.1327953890489915, "grad_norm": 1.606113566768016, "learning_rate": 8.639646976658774e-07, "loss": 0.4640873074531555, "step": 4914 }, { "epoch": 1.1330259365994237, "grad_norm": 1.6263125990821405, "learning_rate": 8.635870821777591e-07, "loss": 0.5187903642654419, "step": 4915 }, { "epoch": 1.133256484149856, "grad_norm": 1.414474025707106, "learning_rate": 8.632094865090184e-07, "loss": 0.5168712735176086, "step": 4916 }, { "epoch": 1.1334870317002883, "grad_norm": 1.7263194018807568, "learning_rate": 8.628319107145161e-07, "loss": 0.4974696636199951, "step": 4917 }, { "epoch": 1.1337175792507204, "grad_norm": 1.609612326681278, "learning_rate": 8.624543548491105e-07, "loss": 0.5246702432632446, "step": 4918 }, { "epoch": 1.1339481268011526, "grad_norm": 1.2630050489883073, "learning_rate": 8.620768189676564e-07, "loss": 0.4184077978134155, "step": 4919 }, { "epoch": 1.134178674351585, "grad_norm": 1.5960723155174386, "learning_rate": 8.616993031250057e-07, "loss": 0.5239197015762329, "step": 4920 }, { "epoch": 1.1344092219020172, "grad_norm": 1.712138394358373, "learning_rate": 8.613218073760073e-07, "loss": 0.5908505916595459, "step": 4921 }, { "epoch": 1.1346397694524495, "grad_norm": 1.9712676240506681, "learning_rate": 8.609443317755077e-07, "loss": 0.4874504506587982, "step": 4922 }, { "epoch": 1.1348703170028818, "grad_norm": 1.5709977593377218, "learning_rate": 8.605668763783503e-07, "loss": 0.44740432500839233, "step": 4923 }, { "epoch": 1.135100864553314, "grad_norm": 1.9603291324895793, "learning_rate": 8.60189441239375e-07, "loss": 0.3754269480705261, "step": 4924 }, { "epoch": 1.1353314121037463, "grad_norm": 1.505221231169863, "learning_rate": 8.598120264134195e-07, "loss": 0.40734755992889404, "step": 4925 }, { "epoch": 1.1355619596541786, "grad_norm": 1.5628975410629704, "learning_rate": 8.594346319553186e-07, "loss": 0.5404030680656433, "step": 4926 }, { "epoch": 1.135792507204611, "grad_norm": 1.7008702550350545, "learning_rate": 8.590572579199029e-07, "loss": 0.3678287863731384, "step": 4927 }, { "epoch": 1.1360230547550432, "grad_norm": 1.6117451920140595, "learning_rate": 8.586799043620019e-07, "loss": 0.562045693397522, "step": 4928 }, { "epoch": 1.1362536023054755, "grad_norm": 1.298080197408525, "learning_rate": 8.583025713364404e-07, "loss": 0.46270644664764404, "step": 4929 }, { "epoch": 1.1364841498559077, "grad_norm": 1.4129593772006035, "learning_rate": 8.57925258898041e-07, "loss": 0.4325964152812958, "step": 4930 }, { "epoch": 1.13671469740634, "grad_norm": 1.6019125673177061, "learning_rate": 8.57547967101624e-07, "loss": 0.4210170805454254, "step": 4931 }, { "epoch": 1.1369452449567723, "grad_norm": 1.5728507205557822, "learning_rate": 8.571706960020053e-07, "loss": 0.4151025414466858, "step": 4932 }, { "epoch": 1.1371757925072046, "grad_norm": 1.6118551503277867, "learning_rate": 8.567934456539983e-07, "loss": 0.4276087284088135, "step": 4933 }, { "epoch": 1.1374063400576369, "grad_norm": 1.5052919595806051, "learning_rate": 8.564162161124144e-07, "loss": 0.41048938035964966, "step": 4934 }, { "epoch": 1.1376368876080691, "grad_norm": 1.571746912251262, "learning_rate": 8.560390074320605e-07, "loss": 0.5300489068031311, "step": 4935 }, { "epoch": 1.1378674351585014, "grad_norm": 1.5179733531616169, "learning_rate": 8.556618196677413e-07, "loss": 0.5026379823684692, "step": 4936 }, { "epoch": 1.1380979827089337, "grad_norm": 1.5055765080542705, "learning_rate": 8.552846528742579e-07, "loss": 0.3983602821826935, "step": 4937 }, { "epoch": 1.138328530259366, "grad_norm": 1.55955976181173, "learning_rate": 8.549075071064091e-07, "loss": 0.5190225839614868, "step": 4938 }, { "epoch": 1.1385590778097983, "grad_norm": 1.5517589840709185, "learning_rate": 8.545303824189904e-07, "loss": 0.49603796005249023, "step": 4939 }, { "epoch": 1.1387896253602305, "grad_norm": 1.5762221673293377, "learning_rate": 8.541532788667933e-07, "loss": 0.47140175104141235, "step": 4940 }, { "epoch": 1.1390201729106628, "grad_norm": 1.796577364081372, "learning_rate": 8.537761965046079e-07, "loss": 0.5093640685081482, "step": 4941 }, { "epoch": 1.139250720461095, "grad_norm": 1.61771614386877, "learning_rate": 8.533991353872203e-07, "loss": 0.44927600026130676, "step": 4942 }, { "epoch": 1.1394812680115274, "grad_norm": 1.3040634418575354, "learning_rate": 8.530220955694127e-07, "loss": 0.43341124057769775, "step": 4943 }, { "epoch": 1.1397118155619597, "grad_norm": 1.4304151716973417, "learning_rate": 8.526450771059661e-07, "loss": 0.47579699754714966, "step": 4944 }, { "epoch": 1.139942363112392, "grad_norm": 1.7691519185450943, "learning_rate": 8.522680800516566e-07, "loss": 0.42832237482070923, "step": 4945 }, { "epoch": 1.1401729106628242, "grad_norm": 1.7533376996528798, "learning_rate": 8.518911044612582e-07, "loss": 0.4995993375778198, "step": 4946 }, { "epoch": 1.1404034582132565, "grad_norm": 1.6528588102849888, "learning_rate": 8.51514150389542e-07, "loss": 0.4735615849494934, "step": 4947 }, { "epoch": 1.1406340057636888, "grad_norm": 1.414172066097951, "learning_rate": 8.511372178912746e-07, "loss": 0.5336610078811646, "step": 4948 }, { "epoch": 1.140864553314121, "grad_norm": 1.4570893187309275, "learning_rate": 8.507603070212209e-07, "loss": 0.4889930784702301, "step": 4949 }, { "epoch": 1.1410951008645533, "grad_norm": 1.4948561610167277, "learning_rate": 8.503834178341425e-07, "loss": 0.4650326073169708, "step": 4950 }, { "epoch": 1.1413256484149856, "grad_norm": 1.7146295859985705, "learning_rate": 8.500065503847967e-07, "loss": 0.422588586807251, "step": 4951 }, { "epoch": 1.141556195965418, "grad_norm": 1.5443817175032366, "learning_rate": 8.496297047279391e-07, "loss": 0.391678124666214, "step": 4952 }, { "epoch": 1.1417867435158502, "grad_norm": 1.6000480430652493, "learning_rate": 8.492528809183208e-07, "loss": 0.4672621488571167, "step": 4953 }, { "epoch": 1.1420172910662825, "grad_norm": 1.3846533790350994, "learning_rate": 8.488760790106907e-07, "loss": 0.44236963987350464, "step": 4954 }, { "epoch": 1.1422478386167148, "grad_norm": 1.872328058335658, "learning_rate": 8.484992990597946e-07, "loss": 0.4638679325580597, "step": 4955 }, { "epoch": 1.142478386167147, "grad_norm": 1.4933075788494417, "learning_rate": 8.481225411203738e-07, "loss": 0.39137962460517883, "step": 4956 }, { "epoch": 1.1427089337175793, "grad_norm": 1.478246135116108, "learning_rate": 8.477458052471682e-07, "loss": 0.4590994417667389, "step": 4957 }, { "epoch": 1.1429394812680116, "grad_norm": 1.7349053396994705, "learning_rate": 8.473690914949131e-07, "loss": 0.4529002606868744, "step": 4958 }, { "epoch": 1.1431700288184439, "grad_norm": 1.5961832007652752, "learning_rate": 8.46992399918341e-07, "loss": 0.4857093095779419, "step": 4959 }, { "epoch": 1.1434005763688762, "grad_norm": 1.7309392540399018, "learning_rate": 8.466157305721819e-07, "loss": 0.5138260722160339, "step": 4960 }, { "epoch": 1.1436311239193084, "grad_norm": 1.395111083728356, "learning_rate": 8.462390835111612e-07, "loss": 0.42551133036613464, "step": 4961 }, { "epoch": 1.1438616714697407, "grad_norm": 1.2166875183733967, "learning_rate": 8.458624587900021e-07, "loss": 0.4757024049758911, "step": 4962 }, { "epoch": 1.144092219020173, "grad_norm": 1.6010369718376882, "learning_rate": 8.454858564634247e-07, "loss": 0.45634210109710693, "step": 4963 }, { "epoch": 1.1443227665706053, "grad_norm": 1.594728695980143, "learning_rate": 8.451092765861446e-07, "loss": 0.43474477529525757, "step": 4964 }, { "epoch": 1.1445533141210376, "grad_norm": 1.569588067868878, "learning_rate": 8.447327192128756e-07, "loss": 0.3684108555316925, "step": 4965 }, { "epoch": 1.1447838616714696, "grad_norm": 1.544743293825418, "learning_rate": 8.443561843983269e-07, "loss": 0.4448085427284241, "step": 4966 }, { "epoch": 1.145014409221902, "grad_norm": 1.5575083692045397, "learning_rate": 8.439796721972056e-07, "loss": 0.5501620173454285, "step": 4967 }, { "epoch": 1.1452449567723342, "grad_norm": 1.496852309992749, "learning_rate": 8.436031826642151e-07, "loss": 0.46348631381988525, "step": 4968 }, { "epoch": 1.1454755043227665, "grad_norm": 1.4230652325158895, "learning_rate": 8.432267158540549e-07, "loss": 0.4619203209877014, "step": 4969 }, { "epoch": 1.1457060518731987, "grad_norm": 1.5562637049741876, "learning_rate": 8.428502718214222e-07, "loss": 0.376537024974823, "step": 4970 }, { "epoch": 1.145936599423631, "grad_norm": 1.2665150891924384, "learning_rate": 8.424738506210103e-07, "loss": 0.528576135635376, "step": 4971 }, { "epoch": 1.1461671469740633, "grad_norm": 1.4114442104230585, "learning_rate": 8.420974523075089e-07, "loss": 0.41852709650993347, "step": 4972 }, { "epoch": 1.1463976945244956, "grad_norm": 1.4444856174172405, "learning_rate": 8.417210769356053e-07, "loss": 0.4927000403404236, "step": 4973 }, { "epoch": 1.1466282420749279, "grad_norm": 1.5086189669079528, "learning_rate": 8.413447245599827e-07, "loss": 0.43919095396995544, "step": 4974 }, { "epoch": 1.1468587896253601, "grad_norm": 1.5829083232751864, "learning_rate": 8.409683952353208e-07, "loss": 0.5224364995956421, "step": 4975 }, { "epoch": 1.1470893371757924, "grad_norm": 1.2855645239828792, "learning_rate": 8.405920890162972e-07, "loss": 0.47760850191116333, "step": 4976 }, { "epoch": 1.1473198847262247, "grad_norm": 1.7882126310834827, "learning_rate": 8.402158059575845e-07, "loss": 0.4845304489135742, "step": 4977 }, { "epoch": 1.147550432276657, "grad_norm": 1.5850593696336175, "learning_rate": 8.398395461138527e-07, "loss": 0.523693859577179, "step": 4978 }, { "epoch": 1.1477809798270893, "grad_norm": 1.4071229583070886, "learning_rate": 8.394633095397693e-07, "loss": 0.44773513078689575, "step": 4979 }, { "epoch": 1.1480115273775215, "grad_norm": 1.9493074463498687, "learning_rate": 8.390870962899967e-07, "loss": 0.5348447561264038, "step": 4980 }, { "epoch": 1.1482420749279538, "grad_norm": 1.582930450446158, "learning_rate": 8.387109064191954e-07, "loss": 0.4859353303909302, "step": 4981 }, { "epoch": 1.148472622478386, "grad_norm": 1.8753855861674091, "learning_rate": 8.38334739982021e-07, "loss": 0.4306800663471222, "step": 4982 }, { "epoch": 1.1487031700288184, "grad_norm": 1.3609318032284317, "learning_rate": 8.379585970331274e-07, "loss": 0.4003479480743408, "step": 4983 }, { "epoch": 1.1489337175792507, "grad_norm": 2.1221671774243407, "learning_rate": 8.37582477627164e-07, "loss": 0.6015596985816956, "step": 4984 }, { "epoch": 1.149164265129683, "grad_norm": 1.6080254217986074, "learning_rate": 8.372063818187767e-07, "loss": 0.5425978302955627, "step": 4985 }, { "epoch": 1.1493948126801152, "grad_norm": 1.6118240887855368, "learning_rate": 8.368303096626089e-07, "loss": 0.462574303150177, "step": 4986 }, { "epoch": 1.1496253602305475, "grad_norm": 1.5465454371671152, "learning_rate": 8.364542612132999e-07, "loss": 0.4790104627609253, "step": 4987 }, { "epoch": 1.1498559077809798, "grad_norm": 1.6307505419711474, "learning_rate": 8.360782365254849e-07, "loss": 0.4083213210105896, "step": 4988 }, { "epoch": 1.150086455331412, "grad_norm": 1.6480494744817742, "learning_rate": 8.357022356537974e-07, "loss": 0.4527336359024048, "step": 4989 }, { "epoch": 1.1503170028818444, "grad_norm": 1.5469171582750165, "learning_rate": 8.353262586528655e-07, "loss": 0.45044830441474915, "step": 4990 }, { "epoch": 1.1505475504322766, "grad_norm": 1.4972630884076448, "learning_rate": 8.349503055773152e-07, "loss": 0.4949982464313507, "step": 4991 }, { "epoch": 1.150778097982709, "grad_norm": 1.4696355307323234, "learning_rate": 8.345743764817688e-07, "loss": 0.5219828486442566, "step": 4992 }, { "epoch": 1.1510086455331412, "grad_norm": 1.4698594015955475, "learning_rate": 8.341984714208445e-07, "loss": 0.47528931498527527, "step": 4993 }, { "epoch": 1.1512391930835735, "grad_norm": 1.419305533417606, "learning_rate": 8.338225904491572e-07, "loss": 0.3760669231414795, "step": 4994 }, { "epoch": 1.1514697406340058, "grad_norm": 1.7202794759708913, "learning_rate": 8.334467336213192e-07, "loss": 0.47153982520103455, "step": 4995 }, { "epoch": 1.151700288184438, "grad_norm": 1.6502148368463363, "learning_rate": 8.330709009919379e-07, "loss": 0.448361337184906, "step": 4996 }, { "epoch": 1.1519308357348703, "grad_norm": 1.6574762273201675, "learning_rate": 8.326950926156185e-07, "loss": 0.4243425130844116, "step": 4997 }, { "epoch": 1.1521613832853026, "grad_norm": 1.619484058823572, "learning_rate": 8.323193085469613e-07, "loss": 0.46340247988700867, "step": 4998 }, { "epoch": 1.1523919308357349, "grad_norm": 1.5996286621587357, "learning_rate": 8.319435488405644e-07, "loss": 0.4195745587348938, "step": 4999 }, { "epoch": 1.1526224783861672, "grad_norm": 1.5213884038871759, "learning_rate": 8.315678135510218e-07, "loss": 0.5506634712219238, "step": 5000 }, { "epoch": 1.1528530259365994, "grad_norm": 1.3389955091698362, "learning_rate": 8.311921027329231e-07, "loss": 0.43178266286849976, "step": 5001 }, { "epoch": 1.1530835734870317, "grad_norm": 1.483870001679962, "learning_rate": 8.308164164408565e-07, "loss": 0.43827325105667114, "step": 5002 }, { "epoch": 1.153314121037464, "grad_norm": 1.3812119919237116, "learning_rate": 8.304407547294044e-07, "loss": 0.47002434730529785, "step": 5003 }, { "epoch": 1.1535446685878963, "grad_norm": 1.6557647695376148, "learning_rate": 8.300651176531464e-07, "loss": 0.4747048616409302, "step": 5004 }, { "epoch": 1.1537752161383286, "grad_norm": 1.5487662173335255, "learning_rate": 8.296895052666594e-07, "loss": 0.44455668330192566, "step": 5005 }, { "epoch": 1.1540057636887608, "grad_norm": 1.5585235356930989, "learning_rate": 8.293139176245155e-07, "loss": 0.546316385269165, "step": 5006 }, { "epoch": 1.1542363112391931, "grad_norm": 1.675135125637205, "learning_rate": 8.289383547812835e-07, "loss": 0.4774520993232727, "step": 5007 }, { "epoch": 1.1544668587896254, "grad_norm": 1.3977257653778448, "learning_rate": 8.285628167915295e-07, "loss": 0.4609676003456116, "step": 5008 }, { "epoch": 1.1546974063400577, "grad_norm": 1.5997296940439065, "learning_rate": 8.281873037098145e-07, "loss": 0.5442596673965454, "step": 5009 }, { "epoch": 1.15492795389049, "grad_norm": 1.433364845269251, "learning_rate": 8.278118155906973e-07, "loss": 0.43525272607803345, "step": 5010 }, { "epoch": 1.1551585014409222, "grad_norm": 1.8110200812495216, "learning_rate": 8.274363524887314e-07, "loss": 0.47077393531799316, "step": 5011 }, { "epoch": 1.1553890489913545, "grad_norm": 1.666313225347892, "learning_rate": 8.270609144584687e-07, "loss": 0.4974507689476013, "step": 5012 }, { "epoch": 1.1556195965417868, "grad_norm": 1.7757066050265735, "learning_rate": 8.266855015544563e-07, "loss": 0.4794807732105255, "step": 5013 }, { "epoch": 1.155850144092219, "grad_norm": 1.5898294982164218, "learning_rate": 8.26310113831237e-07, "loss": 0.528843879699707, "step": 5014 }, { "epoch": 1.1560806916426514, "grad_norm": 1.521661516174903, "learning_rate": 8.259347513433516e-07, "loss": 0.49961280822753906, "step": 5015 }, { "epoch": 1.1563112391930837, "grad_norm": 1.5573702934409057, "learning_rate": 8.255594141453364e-07, "loss": 0.46634775400161743, "step": 5016 }, { "epoch": 1.156541786743516, "grad_norm": 1.3785535507550584, "learning_rate": 8.251841022917233e-07, "loss": 0.5132392048835754, "step": 5017 }, { "epoch": 1.1567723342939482, "grad_norm": 1.503101964610376, "learning_rate": 8.248088158370419e-07, "loss": 0.46664172410964966, "step": 5018 }, { "epoch": 1.1570028818443805, "grad_norm": 2.223349806199342, "learning_rate": 8.244335548358165e-07, "loss": 0.5046276450157166, "step": 5019 }, { "epoch": 1.1572334293948128, "grad_norm": 1.4765801454732264, "learning_rate": 8.240583193425694e-07, "loss": 0.4508659839630127, "step": 5020 }, { "epoch": 1.157463976945245, "grad_norm": 1.6522378690504231, "learning_rate": 8.236831094118186e-07, "loss": 0.4357062876224518, "step": 5021 }, { "epoch": 1.1576945244956773, "grad_norm": 1.4792784189312642, "learning_rate": 8.233079250980773e-07, "loss": 0.5156815052032471, "step": 5022 }, { "epoch": 1.1579250720461096, "grad_norm": 1.7175978777418233, "learning_rate": 8.229327664558566e-07, "loss": 0.5102704763412476, "step": 5023 }, { "epoch": 1.158155619596542, "grad_norm": 1.4500341849668028, "learning_rate": 8.225576335396631e-07, "loss": 0.5001027584075928, "step": 5024 }, { "epoch": 1.1583861671469742, "grad_norm": 1.615768339974115, "learning_rate": 8.221825264039992e-07, "loss": 0.5214229226112366, "step": 5025 }, { "epoch": 1.1586167146974065, "grad_norm": 1.596713767399932, "learning_rate": 8.218074451033648e-07, "loss": 0.4978953003883362, "step": 5026 }, { "epoch": 1.1588472622478387, "grad_norm": 1.72655777253444, "learning_rate": 8.214323896922548e-07, "loss": 0.40742921829223633, "step": 5027 }, { "epoch": 1.1590778097982708, "grad_norm": 1.4098774572275268, "learning_rate": 8.210573602251607e-07, "loss": 0.500441312789917, "step": 5028 }, { "epoch": 1.159308357348703, "grad_norm": 1.6555011856350113, "learning_rate": 8.206823567565711e-07, "loss": 0.4656379222869873, "step": 5029 }, { "epoch": 1.1595389048991354, "grad_norm": 1.4004525989876406, "learning_rate": 8.203073793409694e-07, "loss": 0.408259779214859, "step": 5030 }, { "epoch": 1.1597694524495676, "grad_norm": 1.5906053969397733, "learning_rate": 8.19932428032836e-07, "loss": 0.4703931212425232, "step": 5031 }, { "epoch": 1.16, "grad_norm": 1.5767341376014499, "learning_rate": 8.195575028866479e-07, "loss": 0.449575275182724, "step": 5032 }, { "epoch": 1.1602305475504322, "grad_norm": 1.6523550817121402, "learning_rate": 8.191826039568776e-07, "loss": 0.46949630975723267, "step": 5033 }, { "epoch": 1.1604610951008645, "grad_norm": 1.4441557842189188, "learning_rate": 8.18807731297994e-07, "loss": 0.5106115937232971, "step": 5034 }, { "epoch": 1.1606916426512968, "grad_norm": 1.4135101448635017, "learning_rate": 8.184328849644616e-07, "loss": 0.4903862178325653, "step": 5035 }, { "epoch": 1.160922190201729, "grad_norm": 1.6994208159537325, "learning_rate": 8.180580650107425e-07, "loss": 0.4848126769065857, "step": 5036 }, { "epoch": 1.1611527377521613, "grad_norm": 1.9267658147173174, "learning_rate": 8.176832714912942e-07, "loss": 0.5161526203155518, "step": 5037 }, { "epoch": 1.1613832853025936, "grad_norm": 1.4045260428359359, "learning_rate": 8.173085044605693e-07, "loss": 0.444364070892334, "step": 5038 }, { "epoch": 1.1616138328530259, "grad_norm": 1.4741868944919188, "learning_rate": 8.169337639730184e-07, "loss": 0.40291786193847656, "step": 5039 }, { "epoch": 1.1618443804034582, "grad_norm": 1.5142762668473808, "learning_rate": 8.165590500830876e-07, "loss": 0.43826034665107727, "step": 5040 }, { "epoch": 1.1620749279538904, "grad_norm": 1.6982487740088041, "learning_rate": 8.161843628452181e-07, "loss": 0.5153712034225464, "step": 5041 }, { "epoch": 1.1623054755043227, "grad_norm": 1.6774861326863326, "learning_rate": 8.158097023138488e-07, "loss": 0.43770891427993774, "step": 5042 }, { "epoch": 1.162536023054755, "grad_norm": 1.3656492090603147, "learning_rate": 8.154350685434135e-07, "loss": 0.4041779041290283, "step": 5043 }, { "epoch": 1.1627665706051873, "grad_norm": 1.6864751806757157, "learning_rate": 8.150604615883425e-07, "loss": 0.5335817337036133, "step": 5044 }, { "epoch": 1.1629971181556196, "grad_norm": 1.8248598871276303, "learning_rate": 8.14685881503063e-07, "loss": 0.5161072015762329, "step": 5045 }, { "epoch": 1.1632276657060518, "grad_norm": 1.5273246721191258, "learning_rate": 8.143113283419968e-07, "loss": 0.39934635162353516, "step": 5046 }, { "epoch": 1.1634582132564841, "grad_norm": 1.5055663366064564, "learning_rate": 8.139368021595633e-07, "loss": 0.4325847029685974, "step": 5047 }, { "epoch": 1.1636887608069164, "grad_norm": 1.5488180260968318, "learning_rate": 8.135623030101763e-07, "loss": 0.4243529438972473, "step": 5048 }, { "epoch": 1.1639193083573487, "grad_norm": 1.5959515230472425, "learning_rate": 8.131878309482475e-07, "loss": 0.5942574739456177, "step": 5049 }, { "epoch": 1.164149855907781, "grad_norm": 1.4099782206145253, "learning_rate": 8.128133860281837e-07, "loss": 0.5493526458740234, "step": 5050 }, { "epoch": 1.1643804034582133, "grad_norm": 1.6745427589200643, "learning_rate": 8.124389683043872e-07, "loss": 0.44348329305648804, "step": 5051 }, { "epoch": 1.1646109510086455, "grad_norm": 1.553703570504434, "learning_rate": 8.120645778312577e-07, "loss": 0.530125617980957, "step": 5052 }, { "epoch": 1.1648414985590778, "grad_norm": 1.471177336732143, "learning_rate": 8.116902146631901e-07, "loss": 0.4052886962890625, "step": 5053 }, { "epoch": 1.16507204610951, "grad_norm": 1.8383146265148649, "learning_rate": 8.113158788545751e-07, "loss": 0.4531574845314026, "step": 5054 }, { "epoch": 1.1653025936599424, "grad_norm": 1.6630001339456053, "learning_rate": 8.109415704598004e-07, "loss": 0.456318199634552, "step": 5055 }, { "epoch": 1.1655331412103747, "grad_norm": 1.4028342384501173, "learning_rate": 8.105672895332485e-07, "loss": 0.49434107542037964, "step": 5056 }, { "epoch": 1.165763688760807, "grad_norm": 1.437792736683449, "learning_rate": 8.101930361292987e-07, "loss": 0.4440796971321106, "step": 5057 }, { "epoch": 1.1659942363112392, "grad_norm": 1.8668287714681662, "learning_rate": 8.098188103023266e-07, "loss": 0.45466774702072144, "step": 5058 }, { "epoch": 1.1662247838616715, "grad_norm": 1.5229028325724632, "learning_rate": 8.094446121067026e-07, "loss": 0.5570865273475647, "step": 5059 }, { "epoch": 1.1664553314121038, "grad_norm": 1.53454366004412, "learning_rate": 8.090704415967942e-07, "loss": 0.39382970333099365, "step": 5060 }, { "epoch": 1.166685878962536, "grad_norm": 1.6339069895946312, "learning_rate": 8.086962988269646e-07, "loss": 0.5676811337471008, "step": 5061 }, { "epoch": 1.1669164265129683, "grad_norm": 1.4415998432790569, "learning_rate": 8.083221838515727e-07, "loss": 0.49144673347473145, "step": 5062 }, { "epoch": 1.1671469740634006, "grad_norm": 1.7643102538106996, "learning_rate": 8.079480967249737e-07, "loss": 0.5352723598480225, "step": 5063 }, { "epoch": 1.167377521613833, "grad_norm": 1.6939389814092696, "learning_rate": 8.075740375015178e-07, "loss": 0.5697407722473145, "step": 5064 }, { "epoch": 1.1676080691642652, "grad_norm": 1.5192410214640266, "learning_rate": 8.072000062355528e-07, "loss": 0.4873645305633545, "step": 5065 }, { "epoch": 1.1678386167146975, "grad_norm": 1.5905728661562026, "learning_rate": 8.068260029814213e-07, "loss": 0.5032225847244263, "step": 5066 }, { "epoch": 1.1680691642651297, "grad_norm": 1.5892996781628121, "learning_rate": 8.064520277934618e-07, "loss": 0.45441383123397827, "step": 5067 }, { "epoch": 1.168299711815562, "grad_norm": 1.5165242772078702, "learning_rate": 8.060780807260094e-07, "loss": 0.5142766833305359, "step": 5068 }, { "epoch": 1.1685302593659943, "grad_norm": 1.3038765575265592, "learning_rate": 8.057041618333946e-07, "loss": 0.4434770345687866, "step": 5069 }, { "epoch": 1.1687608069164266, "grad_norm": 1.985553119140782, "learning_rate": 8.053302711699436e-07, "loss": 0.4094642400741577, "step": 5070 }, { "epoch": 1.1689913544668589, "grad_norm": 1.3275869000209546, "learning_rate": 8.049564087899794e-07, "loss": 0.45409655570983887, "step": 5071 }, { "epoch": 1.1692219020172911, "grad_norm": 1.5867037847106558, "learning_rate": 8.045825747478199e-07, "loss": 0.5118057131767273, "step": 5072 }, { "epoch": 1.1694524495677234, "grad_norm": 1.3524637128330204, "learning_rate": 8.042087690977791e-07, "loss": 0.3896600604057312, "step": 5073 }, { "epoch": 1.1696829971181557, "grad_norm": 1.3790318279559413, "learning_rate": 8.038349918941678e-07, "loss": 0.4440167546272278, "step": 5074 }, { "epoch": 1.169913544668588, "grad_norm": 1.5642943145914474, "learning_rate": 8.034612431912913e-07, "loss": 0.4162600636482239, "step": 5075 }, { "epoch": 1.17014409221902, "grad_norm": 1.5696782051249465, "learning_rate": 8.030875230434516e-07, "loss": 0.5087725520133972, "step": 5076 }, { "epoch": 1.1703746397694523, "grad_norm": 1.4896579171326287, "learning_rate": 8.027138315049465e-07, "loss": 0.4495736360549927, "step": 5077 }, { "epoch": 1.1706051873198846, "grad_norm": 1.282122386534862, "learning_rate": 8.023401686300692e-07, "loss": 0.489845335483551, "step": 5078 }, { "epoch": 1.170835734870317, "grad_norm": 1.51185017043383, "learning_rate": 8.019665344731095e-07, "loss": 0.5112447738647461, "step": 5079 }, { "epoch": 1.1710662824207492, "grad_norm": 1.6932024361341402, "learning_rate": 8.015929290883517e-07, "loss": 0.5937504768371582, "step": 5080 }, { "epoch": 1.1712968299711815, "grad_norm": 1.4969039934386026, "learning_rate": 8.012193525300776e-07, "loss": 0.44051915407180786, "step": 5081 }, { "epoch": 1.1715273775216137, "grad_norm": 1.45813729961155, "learning_rate": 8.008458048525639e-07, "loss": 0.46435099840164185, "step": 5082 }, { "epoch": 1.171757925072046, "grad_norm": 1.6776025006446114, "learning_rate": 8.004722861100827e-07, "loss": 0.6185746788978577, "step": 5083 }, { "epoch": 1.1719884726224783, "grad_norm": 1.7013307081411828, "learning_rate": 8.000987963569028e-07, "loss": 0.4826294481754303, "step": 5084 }, { "epoch": 1.1722190201729106, "grad_norm": 1.4399795258671182, "learning_rate": 7.997253356472884e-07, "loss": 0.4688999056816101, "step": 5085 }, { "epoch": 1.1724495677233429, "grad_norm": 2.065689721826827, "learning_rate": 7.993519040354989e-07, "loss": 0.5494599938392639, "step": 5086 }, { "epoch": 1.1726801152737751, "grad_norm": 1.7126589073325484, "learning_rate": 7.989785015757909e-07, "loss": 0.37470385432243347, "step": 5087 }, { "epoch": 1.1729106628242074, "grad_norm": 1.9404321864616616, "learning_rate": 7.986051283224153e-07, "loss": 0.5580037236213684, "step": 5088 }, { "epoch": 1.1731412103746397, "grad_norm": 1.2841436695629171, "learning_rate": 7.982317843296191e-07, "loss": 0.5286623239517212, "step": 5089 }, { "epoch": 1.173371757925072, "grad_norm": 1.4009308302426564, "learning_rate": 7.978584696516463e-07, "loss": 0.4835943877696991, "step": 5090 }, { "epoch": 1.1736023054755043, "grad_norm": 1.8751415579020991, "learning_rate": 7.974851843427348e-07, "loss": 0.5184438824653625, "step": 5091 }, { "epoch": 1.1738328530259365, "grad_norm": 1.696931221696011, "learning_rate": 7.971119284571194e-07, "loss": 0.39980262517929077, "step": 5092 }, { "epoch": 1.1740634005763688, "grad_norm": 1.3010621045377282, "learning_rate": 7.967387020490297e-07, "loss": 0.4931245446205139, "step": 5093 }, { "epoch": 1.174293948126801, "grad_norm": 1.4718683650776678, "learning_rate": 7.963655051726925e-07, "loss": 0.5418246984481812, "step": 5094 }, { "epoch": 1.1745244956772334, "grad_norm": 1.547908981740155, "learning_rate": 7.959923378823292e-07, "loss": 0.40723925828933716, "step": 5095 }, { "epoch": 1.1747550432276657, "grad_norm": 1.5860273572842907, "learning_rate": 7.956192002321564e-07, "loss": 0.4601886570453644, "step": 5096 }, { "epoch": 1.174985590778098, "grad_norm": 1.3393801052353216, "learning_rate": 7.952460922763881e-07, "loss": 0.439882755279541, "step": 5097 }, { "epoch": 1.1752161383285302, "grad_norm": 1.6695184794084814, "learning_rate": 7.948730140692326e-07, "loss": 0.5210170745849609, "step": 5098 }, { "epoch": 1.1754466858789625, "grad_norm": 1.8621316471839704, "learning_rate": 7.944999656648938e-07, "loss": 0.43414199352264404, "step": 5099 }, { "epoch": 1.1756772334293948, "grad_norm": 2.2151031419179072, "learning_rate": 7.941269471175729e-07, "loss": 0.5412740707397461, "step": 5100 }, { "epoch": 1.175907780979827, "grad_norm": 1.4771921151316811, "learning_rate": 7.937539584814645e-07, "loss": 0.49985402822494507, "step": 5101 }, { "epoch": 1.1761383285302593, "grad_norm": 1.4276177294779857, "learning_rate": 7.933809998107603e-07, "loss": 0.4674869179725647, "step": 5102 }, { "epoch": 1.1763688760806916, "grad_norm": 1.7775337786781618, "learning_rate": 7.930080711596477e-07, "loss": 0.4349539279937744, "step": 5103 }, { "epoch": 1.176599423631124, "grad_norm": 1.554477721089947, "learning_rate": 7.92635172582309e-07, "loss": 0.5000085830688477, "step": 5104 }, { "epoch": 1.1768299711815562, "grad_norm": 1.7477345906618489, "learning_rate": 7.922623041329223e-07, "loss": 0.6095143556594849, "step": 5105 }, { "epoch": 1.1770605187319885, "grad_norm": 1.4438459951254565, "learning_rate": 7.918894658656622e-07, "loss": 0.4526306986808777, "step": 5106 }, { "epoch": 1.1772910662824208, "grad_norm": 1.7380379348373243, "learning_rate": 7.915166578346974e-07, "loss": 0.47447705268859863, "step": 5107 }, { "epoch": 1.177521613832853, "grad_norm": 1.5624193087101916, "learning_rate": 7.911438800941938e-07, "loss": 0.5678610801696777, "step": 5108 }, { "epoch": 1.1777521613832853, "grad_norm": 1.7268534957559538, "learning_rate": 7.907711326983113e-07, "loss": 0.49429306387901306, "step": 5109 }, { "epoch": 1.1779827089337176, "grad_norm": 1.4407413119987242, "learning_rate": 7.903984157012068e-07, "loss": 0.430848628282547, "step": 5110 }, { "epoch": 1.1782132564841499, "grad_norm": 1.7950110825220507, "learning_rate": 7.900257291570324e-07, "loss": 0.4113616645336151, "step": 5111 }, { "epoch": 1.1784438040345822, "grad_norm": 1.5284371816604458, "learning_rate": 7.896530731199346e-07, "loss": 0.49423450231552124, "step": 5112 }, { "epoch": 1.1786743515850144, "grad_norm": 1.3589976827610897, "learning_rate": 7.892804476440574e-07, "loss": 0.42228102684020996, "step": 5113 }, { "epoch": 1.1789048991354467, "grad_norm": 1.4991917175795098, "learning_rate": 7.889078527835393e-07, "loss": 0.5798023343086243, "step": 5114 }, { "epoch": 1.179135446685879, "grad_norm": 1.5093615640917626, "learning_rate": 7.885352885925138e-07, "loss": 0.4872003197669983, "step": 5115 }, { "epoch": 1.1793659942363113, "grad_norm": 1.3378284729255738, "learning_rate": 7.881627551251116e-07, "loss": 0.40125834941864014, "step": 5116 }, { "epoch": 1.1795965417867436, "grad_norm": 1.5996579095623482, "learning_rate": 7.877902524354569e-07, "loss": 0.4458635449409485, "step": 5117 }, { "epoch": 1.1798270893371758, "grad_norm": 1.262725489425069, "learning_rate": 7.87417780577671e-07, "loss": 0.4682433009147644, "step": 5118 }, { "epoch": 1.1800576368876081, "grad_norm": 1.25949021074412, "learning_rate": 7.870453396058704e-07, "loss": 0.3488504886627197, "step": 5119 }, { "epoch": 1.1802881844380404, "grad_norm": 1.5564681991959675, "learning_rate": 7.866729295741666e-07, "loss": 0.508482813835144, "step": 5120 }, { "epoch": 1.1805187319884727, "grad_norm": 1.5995779613986645, "learning_rate": 7.863005505366664e-07, "loss": 0.4301029145717621, "step": 5121 }, { "epoch": 1.180749279538905, "grad_norm": 1.5221914817292215, "learning_rate": 7.859282025474738e-07, "loss": 0.46766793727874756, "step": 5122 }, { "epoch": 1.1809798270893372, "grad_norm": 1.5282755704648547, "learning_rate": 7.855558856606862e-07, "loss": 0.4898037314414978, "step": 5123 }, { "epoch": 1.1812103746397695, "grad_norm": 1.5328082397130638, "learning_rate": 7.851835999303977e-07, "loss": 0.4219735860824585, "step": 5124 }, { "epoch": 1.1814409221902018, "grad_norm": 1.5861961330265528, "learning_rate": 7.848113454106971e-07, "loss": 0.47639018297195435, "step": 5125 }, { "epoch": 1.181671469740634, "grad_norm": 1.8716402338437157, "learning_rate": 7.844391221556696e-07, "loss": 0.5062400102615356, "step": 5126 }, { "epoch": 1.1819020172910664, "grad_norm": 1.6261243350490995, "learning_rate": 7.840669302193957e-07, "loss": 0.4984915256500244, "step": 5127 }, { "epoch": 1.1821325648414986, "grad_norm": 1.6122125447702478, "learning_rate": 7.836947696559497e-07, "loss": 0.4356485605239868, "step": 5128 }, { "epoch": 1.182363112391931, "grad_norm": 1.3005367585942214, "learning_rate": 7.833226405194039e-07, "loss": 0.4968249797821045, "step": 5129 }, { "epoch": 1.1825936599423632, "grad_norm": 1.8474286891846123, "learning_rate": 7.829505428638245e-07, "loss": 0.497678279876709, "step": 5130 }, { "epoch": 1.1828242074927955, "grad_norm": 1.4865858757479278, "learning_rate": 7.825784767432731e-07, "loss": 0.43530794978141785, "step": 5131 }, { "epoch": 1.1830547550432278, "grad_norm": 1.5492043974535954, "learning_rate": 7.822064422118078e-07, "loss": 0.5460381507873535, "step": 5132 }, { "epoch": 1.18328530259366, "grad_norm": 1.7873525324615471, "learning_rate": 7.818344393234799e-07, "loss": 0.400234192609787, "step": 5133 }, { "epoch": 1.1835158501440923, "grad_norm": 1.4067634014586672, "learning_rate": 7.814624681323387e-07, "loss": 0.41525113582611084, "step": 5134 }, { "epoch": 1.1837463976945246, "grad_norm": 1.7141603087851884, "learning_rate": 7.810905286924281e-07, "loss": 0.6016113758087158, "step": 5135 }, { "epoch": 1.183976945244957, "grad_norm": 1.565613245374153, "learning_rate": 7.807186210577856e-07, "loss": 0.44173747301101685, "step": 5136 }, { "epoch": 1.1842074927953892, "grad_norm": 1.5753493289476495, "learning_rate": 7.803467452824469e-07, "loss": 0.4887153208255768, "step": 5137 }, { "epoch": 1.1844380403458212, "grad_norm": 1.4753411189568921, "learning_rate": 7.799749014204409e-07, "loss": 0.35861653089523315, "step": 5138 }, { "epoch": 1.1846685878962535, "grad_norm": 1.51791048669176, "learning_rate": 7.796030895257924e-07, "loss": 0.49498188495635986, "step": 5139 }, { "epoch": 1.1848991354466858, "grad_norm": 1.5790347235405173, "learning_rate": 7.792313096525229e-07, "loss": 0.4640830159187317, "step": 5140 }, { "epoch": 1.185129682997118, "grad_norm": 1.6546050323090251, "learning_rate": 7.788595618546473e-07, "loss": 0.43868836760520935, "step": 5141 }, { "epoch": 1.1853602305475504, "grad_norm": 1.3247618330025193, "learning_rate": 7.784878461861766e-07, "loss": 0.42038998007774353, "step": 5142 }, { "epoch": 1.1855907780979826, "grad_norm": 1.3831120122965148, "learning_rate": 7.78116162701118e-07, "loss": 0.46532997488975525, "step": 5143 }, { "epoch": 1.185821325648415, "grad_norm": 1.8301201761711272, "learning_rate": 7.777445114534724e-07, "loss": 0.4519978165626526, "step": 5144 }, { "epoch": 1.1860518731988472, "grad_norm": 2.0549662658660752, "learning_rate": 7.773728924972374e-07, "loss": 0.4602941870689392, "step": 5145 }, { "epoch": 1.1862824207492795, "grad_norm": 1.8488754653531705, "learning_rate": 7.770013058864048e-07, "loss": 0.49775010347366333, "step": 5146 }, { "epoch": 1.1865129682997118, "grad_norm": 1.303047332232113, "learning_rate": 7.76629751674963e-07, "loss": 0.4270223379135132, "step": 5147 }, { "epoch": 1.186743515850144, "grad_norm": 1.7263287626755313, "learning_rate": 7.762582299168947e-07, "loss": 0.4341789186000824, "step": 5148 }, { "epoch": 1.1869740634005763, "grad_norm": 1.5341090099331076, "learning_rate": 7.758867406661777e-07, "loss": 0.5144226551055908, "step": 5149 }, { "epoch": 1.1872046109510086, "grad_norm": 1.6436159515518085, "learning_rate": 7.75515283976786e-07, "loss": 0.5009859800338745, "step": 5150 }, { "epoch": 1.1874351585014409, "grad_norm": 1.7538603635849173, "learning_rate": 7.751438599026885e-07, "loss": 0.411882221698761, "step": 5151 }, { "epoch": 1.1876657060518732, "grad_norm": 1.5272494525926428, "learning_rate": 7.747724684978488e-07, "loss": 0.5135201215744019, "step": 5152 }, { "epoch": 1.1878962536023054, "grad_norm": 1.6382337953276667, "learning_rate": 7.744011098162265e-07, "loss": 0.5519058108329773, "step": 5153 }, { "epoch": 1.1881268011527377, "grad_norm": 1.6462491753715431, "learning_rate": 7.740297839117761e-07, "loss": 0.49767088890075684, "step": 5154 }, { "epoch": 1.18835734870317, "grad_norm": 1.7563113941156678, "learning_rate": 7.736584908384472e-07, "loss": 0.5366396903991699, "step": 5155 }, { "epoch": 1.1885878962536023, "grad_norm": 1.4986658639467278, "learning_rate": 7.732872306501852e-07, "loss": 0.4962652325630188, "step": 5156 }, { "epoch": 1.1888184438040346, "grad_norm": 1.7508485740809516, "learning_rate": 7.729160034009301e-07, "loss": 0.5071662664413452, "step": 5157 }, { "epoch": 1.1890489913544668, "grad_norm": 1.5980525858667156, "learning_rate": 7.725448091446171e-07, "loss": 0.45525041222572327, "step": 5158 }, { "epoch": 1.1892795389048991, "grad_norm": 1.6066578942153278, "learning_rate": 7.721736479351777e-07, "loss": 0.47270429134368896, "step": 5159 }, { "epoch": 1.1895100864553314, "grad_norm": 1.4730405508861286, "learning_rate": 7.71802519826537e-07, "loss": 0.49913090467453003, "step": 5160 }, { "epoch": 1.1897406340057637, "grad_norm": 1.7049072426900878, "learning_rate": 7.714314248726164e-07, "loss": 0.4268707036972046, "step": 5161 }, { "epoch": 1.189971181556196, "grad_norm": 1.437999496558197, "learning_rate": 7.710603631273316e-07, "loss": 0.3650474548339844, "step": 5162 }, { "epoch": 1.1902017291066282, "grad_norm": 1.6164991497032537, "learning_rate": 7.706893346445947e-07, "loss": 0.4717981219291687, "step": 5163 }, { "epoch": 1.1904322766570605, "grad_norm": 1.4762753138416673, "learning_rate": 7.703183394783122e-07, "loss": 0.5165996551513672, "step": 5164 }, { "epoch": 1.1906628242074928, "grad_norm": 1.5972624214363949, "learning_rate": 7.699473776823851e-07, "loss": 0.4477986693382263, "step": 5165 }, { "epoch": 1.190893371757925, "grad_norm": 1.3871967139667596, "learning_rate": 7.695764493107112e-07, "loss": 0.4584044814109802, "step": 5166 }, { "epoch": 1.1911239193083574, "grad_norm": 1.4925097507009442, "learning_rate": 7.692055544171823e-07, "loss": 0.5276877880096436, "step": 5167 }, { "epoch": 1.1913544668587897, "grad_norm": 1.8352054779159281, "learning_rate": 7.68834693055685e-07, "loss": 0.5204349756240845, "step": 5168 }, { "epoch": 1.191585014409222, "grad_norm": 1.6795305875537256, "learning_rate": 7.684638652801025e-07, "loss": 0.44728660583496094, "step": 5169 }, { "epoch": 1.1918155619596542, "grad_norm": 1.4809310662300776, "learning_rate": 7.680930711443116e-07, "loss": 0.48899370431900024, "step": 5170 }, { "epoch": 1.1920461095100865, "grad_norm": 1.3367974168589116, "learning_rate": 7.677223107021847e-07, "loss": 0.41567301750183105, "step": 5171 }, { "epoch": 1.1922766570605188, "grad_norm": 1.6483731426886425, "learning_rate": 7.673515840075901e-07, "loss": 0.5072032809257507, "step": 5172 }, { "epoch": 1.192507204610951, "grad_norm": 1.5620093494432372, "learning_rate": 7.669808911143901e-07, "loss": 0.4741431176662445, "step": 5173 }, { "epoch": 1.1927377521613833, "grad_norm": 1.5478542834432012, "learning_rate": 7.666102320764421e-07, "loss": 0.4430406987667084, "step": 5174 }, { "epoch": 1.1929682997118156, "grad_norm": 1.4964356352817318, "learning_rate": 7.662396069476002e-07, "loss": 0.40400367975234985, "step": 5175 }, { "epoch": 1.193198847262248, "grad_norm": 1.5885122676102894, "learning_rate": 7.658690157817112e-07, "loss": 0.5351930856704712, "step": 5176 }, { "epoch": 1.1934293948126802, "grad_norm": 1.492376923213462, "learning_rate": 7.65498458632619e-07, "loss": 0.5067006349563599, "step": 5177 }, { "epoch": 1.1936599423631125, "grad_norm": 1.4138890281535976, "learning_rate": 7.651279355541607e-07, "loss": 0.385654091835022, "step": 5178 }, { "epoch": 1.1938904899135447, "grad_norm": 1.857913919053153, "learning_rate": 7.647574466001703e-07, "loss": 0.4534091353416443, "step": 5179 }, { "epoch": 1.194121037463977, "grad_norm": 1.4074916536819597, "learning_rate": 7.643869918244759e-07, "loss": 0.5353249907493591, "step": 5180 }, { "epoch": 1.1943515850144093, "grad_norm": 1.4274240401119769, "learning_rate": 7.640165712809001e-07, "loss": 0.41028958559036255, "step": 5181 }, { "epoch": 1.1945821325648416, "grad_norm": 1.624072204554771, "learning_rate": 7.636461850232622e-07, "loss": 0.4383719563484192, "step": 5182 }, { "epoch": 1.1948126801152739, "grad_norm": 1.5213363057247005, "learning_rate": 7.632758331053746e-07, "loss": 0.469385027885437, "step": 5183 }, { "epoch": 1.1950432276657061, "grad_norm": 1.4242859850099794, "learning_rate": 7.629055155810456e-07, "loss": 0.4319891929626465, "step": 5184 }, { "epoch": 1.1952737752161384, "grad_norm": 1.6816172631406454, "learning_rate": 7.625352325040792e-07, "loss": 0.5012685656547546, "step": 5185 }, { "epoch": 1.1955043227665705, "grad_norm": 1.432377738464319, "learning_rate": 7.621649839282728e-07, "loss": 0.4383701682090759, "step": 5186 }, { "epoch": 1.1957348703170028, "grad_norm": 1.506815615793495, "learning_rate": 7.617947699074202e-07, "loss": 0.5478798151016235, "step": 5187 }, { "epoch": 1.195965417867435, "grad_norm": 1.592514094718691, "learning_rate": 7.614245904953098e-07, "loss": 0.4665898084640503, "step": 5188 }, { "epoch": 1.1961959654178673, "grad_norm": 1.6410040946837772, "learning_rate": 7.610544457457245e-07, "loss": 0.49260783195495605, "step": 5189 }, { "epoch": 1.1964265129682996, "grad_norm": 1.941675199926286, "learning_rate": 7.606843357124425e-07, "loss": 0.4491361379623413, "step": 5190 }, { "epoch": 1.1966570605187319, "grad_norm": 1.3495666028535211, "learning_rate": 7.603142604492366e-07, "loss": 0.5150983929634094, "step": 5191 }, { "epoch": 1.1968876080691642, "grad_norm": 1.4180648868874097, "learning_rate": 7.599442200098756e-07, "loss": 0.4191433787345886, "step": 5192 }, { "epoch": 1.1971181556195964, "grad_norm": 1.6520041909295045, "learning_rate": 7.595742144481222e-07, "loss": 0.47440847754478455, "step": 5193 }, { "epoch": 1.1973487031700287, "grad_norm": 1.4858965402977922, "learning_rate": 7.592042438177341e-07, "loss": 0.4382219612598419, "step": 5194 }, { "epoch": 1.197579250720461, "grad_norm": 1.4785659490868535, "learning_rate": 7.588343081724646e-07, "loss": 0.44138234853744507, "step": 5195 }, { "epoch": 1.1978097982708933, "grad_norm": 1.7981437297331029, "learning_rate": 7.584644075660614e-07, "loss": 0.41076284646987915, "step": 5196 }, { "epoch": 1.1980403458213256, "grad_norm": 1.6500368550264195, "learning_rate": 7.580945420522669e-07, "loss": 0.5953484773635864, "step": 5197 }, { "epoch": 1.1982708933717579, "grad_norm": 1.5393411560043135, "learning_rate": 7.577247116848192e-07, "loss": 0.3844539523124695, "step": 5198 }, { "epoch": 1.1985014409221901, "grad_norm": 1.4919048187646704, "learning_rate": 7.573549165174504e-07, "loss": 0.47392016649246216, "step": 5199 }, { "epoch": 1.1987319884726224, "grad_norm": 1.437882902973616, "learning_rate": 7.569851566038879e-07, "loss": 0.43976885080337524, "step": 5200 }, { "epoch": 1.1989625360230547, "grad_norm": 1.6498454472520225, "learning_rate": 7.566154319978545e-07, "loss": 0.4783346652984619, "step": 5201 }, { "epoch": 1.199193083573487, "grad_norm": 1.424108162037939, "learning_rate": 7.562457427530668e-07, "loss": 0.534496009349823, "step": 5202 }, { "epoch": 1.1994236311239193, "grad_norm": 1.4707784169466331, "learning_rate": 7.558760889232365e-07, "loss": 0.42668965458869934, "step": 5203 }, { "epoch": 1.1996541786743515, "grad_norm": 1.737746094211821, "learning_rate": 7.555064705620717e-07, "loss": 0.45171916484832764, "step": 5204 }, { "epoch": 1.1998847262247838, "grad_norm": 1.4495856306459454, "learning_rate": 7.551368877232728e-07, "loss": 0.41587740182876587, "step": 5205 }, { "epoch": 1.200115273775216, "grad_norm": 2.0458844084652426, "learning_rate": 7.547673404605372e-07, "loss": 0.5804768204689026, "step": 5206 }, { "epoch": 1.2003458213256484, "grad_norm": 1.6550173812518771, "learning_rate": 7.543978288275554e-07, "loss": 0.5011946558952332, "step": 5207 }, { "epoch": 1.2005763688760807, "grad_norm": 1.6293811211630818, "learning_rate": 7.540283528780145e-07, "loss": 0.4823184013366699, "step": 5208 }, { "epoch": 1.200806916426513, "grad_norm": 1.583832485145893, "learning_rate": 7.536589126655952e-07, "loss": 0.4285504221916199, "step": 5209 }, { "epoch": 1.2010374639769452, "grad_norm": 1.5324557734796895, "learning_rate": 7.532895082439728e-07, "loss": 0.5323970317840576, "step": 5210 }, { "epoch": 1.2012680115273775, "grad_norm": 1.5033751815855423, "learning_rate": 7.529201396668188e-07, "loss": 0.4522852301597595, "step": 5211 }, { "epoch": 1.2014985590778098, "grad_norm": 1.6592368965468285, "learning_rate": 7.525508069877981e-07, "loss": 0.37899982929229736, "step": 5212 }, { "epoch": 1.201729106628242, "grad_norm": 1.542899034057244, "learning_rate": 7.521815102605709e-07, "loss": 0.48277533054351807, "step": 5213 }, { "epoch": 1.2019596541786743, "grad_norm": 1.6228734814854653, "learning_rate": 7.518122495387924e-07, "loss": 0.5247419476509094, "step": 5214 }, { "epoch": 1.2021902017291066, "grad_norm": 1.6953521267123268, "learning_rate": 7.514430248761121e-07, "loss": 0.5083534717559814, "step": 5215 }, { "epoch": 1.202420749279539, "grad_norm": 1.4712281663420155, "learning_rate": 7.510738363261743e-07, "loss": 0.4977297782897949, "step": 5216 }, { "epoch": 1.2026512968299712, "grad_norm": 1.8268790082927158, "learning_rate": 7.507046839426193e-07, "loss": 0.49126511812210083, "step": 5217 }, { "epoch": 1.2028818443804035, "grad_norm": 1.390643338354512, "learning_rate": 7.503355677790797e-07, "loss": 0.45457524061203003, "step": 5218 }, { "epoch": 1.2031123919308357, "grad_norm": 1.5594668449448779, "learning_rate": 7.499664878891849e-07, "loss": 0.4343973994255066, "step": 5219 }, { "epoch": 1.203342939481268, "grad_norm": 1.7979682805933468, "learning_rate": 7.495974443265588e-07, "loss": 0.465421199798584, "step": 5220 }, { "epoch": 1.2035734870317003, "grad_norm": 1.643661704169387, "learning_rate": 7.492284371448189e-07, "loss": 0.6031137704849243, "step": 5221 }, { "epoch": 1.2038040345821326, "grad_norm": 1.3428704799411881, "learning_rate": 7.488594663975786e-07, "loss": 0.4751429557800293, "step": 5222 }, { "epoch": 1.2040345821325649, "grad_norm": 1.4175138149901096, "learning_rate": 7.484905321384448e-07, "loss": 0.4252334237098694, "step": 5223 }, { "epoch": 1.2042651296829971, "grad_norm": 1.7243093798167899, "learning_rate": 7.481216344210205e-07, "loss": 0.46463245153427124, "step": 5224 }, { "epoch": 1.2044956772334294, "grad_norm": 1.5792374494579962, "learning_rate": 7.477527732989026e-07, "loss": 0.5066741704940796, "step": 5225 }, { "epoch": 1.2047262247838617, "grad_norm": 1.4978142606595146, "learning_rate": 7.473839488256825e-07, "loss": 0.43169310688972473, "step": 5226 }, { "epoch": 1.204956772334294, "grad_norm": 1.3295563962936168, "learning_rate": 7.470151610549469e-07, "loss": 0.4527069330215454, "step": 5227 }, { "epoch": 1.2051873198847263, "grad_norm": 1.7731392679004423, "learning_rate": 7.466464100402765e-07, "loss": 0.5407136082649231, "step": 5228 }, { "epoch": 1.2054178674351586, "grad_norm": 1.776257403864893, "learning_rate": 7.46277695835247e-07, "loss": 0.499603807926178, "step": 5229 }, { "epoch": 1.2056484149855908, "grad_norm": 1.3999462799385904, "learning_rate": 7.459090184934293e-07, "loss": 0.43973076343536377, "step": 5230 }, { "epoch": 1.2058789625360231, "grad_norm": 1.4512778914107467, "learning_rate": 7.455403780683877e-07, "loss": 0.5759705305099487, "step": 5231 }, { "epoch": 1.2061095100864554, "grad_norm": 1.959975964174113, "learning_rate": 7.451717746136819e-07, "loss": 0.4623042643070221, "step": 5232 }, { "epoch": 1.2063400576368877, "grad_norm": 1.6890420465514966, "learning_rate": 7.448032081828666e-07, "loss": 0.5411099791526794, "step": 5233 }, { "epoch": 1.20657060518732, "grad_norm": 1.4414259450436449, "learning_rate": 7.444346788294904e-07, "loss": 0.4358411133289337, "step": 5234 }, { "epoch": 1.2068011527377522, "grad_norm": 1.5143380411163672, "learning_rate": 7.440661866070967e-07, "loss": 0.4569090008735657, "step": 5235 }, { "epoch": 1.2070317002881845, "grad_norm": 1.8867912221636376, "learning_rate": 7.436977315692234e-07, "loss": 0.4413526654243469, "step": 5236 }, { "epoch": 1.2072622478386168, "grad_norm": 1.59137760270953, "learning_rate": 7.433293137694038e-07, "loss": 0.5472520589828491, "step": 5237 }, { "epoch": 1.207492795389049, "grad_norm": 1.5410351897128989, "learning_rate": 7.429609332611648e-07, "loss": 0.48993226885795593, "step": 5238 }, { "epoch": 1.2077233429394814, "grad_norm": 1.5668405345191256, "learning_rate": 7.42592590098028e-07, "loss": 0.4739914536476135, "step": 5239 }, { "epoch": 1.2079538904899136, "grad_norm": 1.6541576418498671, "learning_rate": 7.422242843335103e-07, "loss": 0.4202721118927002, "step": 5240 }, { "epoch": 1.208184438040346, "grad_norm": 1.5270783995121628, "learning_rate": 7.418560160211227e-07, "loss": 0.40436694025993347, "step": 5241 }, { "epoch": 1.2084149855907782, "grad_norm": 1.4882888138390808, "learning_rate": 7.4148778521437e-07, "loss": 0.5451452136039734, "step": 5242 }, { "epoch": 1.2086455331412105, "grad_norm": 2.0232712244262707, "learning_rate": 7.411195919667536e-07, "loss": 0.47577959299087524, "step": 5243 }, { "epoch": 1.2088760806916428, "grad_norm": 1.6782045659247888, "learning_rate": 7.407514363317668e-07, "loss": 0.5722167491912842, "step": 5244 }, { "epoch": 1.209106628242075, "grad_norm": 1.2764452833125082, "learning_rate": 7.403833183628994e-07, "loss": 0.4649240970611572, "step": 5245 }, { "epoch": 1.2093371757925073, "grad_norm": 1.5732711836020632, "learning_rate": 7.400152381136356e-07, "loss": 0.43235695362091064, "step": 5246 }, { "epoch": 1.2095677233429396, "grad_norm": 1.585508242802622, "learning_rate": 7.396471956374526e-07, "loss": 0.4768486022949219, "step": 5247 }, { "epoch": 1.2097982708933717, "grad_norm": 1.4279497343125311, "learning_rate": 7.392791909878238e-07, "loss": 0.4668810963630676, "step": 5248 }, { "epoch": 1.210028818443804, "grad_norm": 1.4689492151474428, "learning_rate": 7.389112242182167e-07, "loss": 0.5458219051361084, "step": 5249 }, { "epoch": 1.2102593659942362, "grad_norm": 1.4005798560782017, "learning_rate": 7.385432953820923e-07, "loss": 0.5364928245544434, "step": 5250 }, { "epoch": 1.2104899135446685, "grad_norm": 1.6602298965175872, "learning_rate": 7.381754045329074e-07, "loss": 0.38189631700515747, "step": 5251 }, { "epoch": 1.2107204610951008, "grad_norm": 1.4124893480452145, "learning_rate": 7.378075517241125e-07, "loss": 0.4769268333911896, "step": 5252 }, { "epoch": 1.210951008645533, "grad_norm": 1.6246057596513548, "learning_rate": 7.374397370091524e-07, "loss": 0.5034056901931763, "step": 5253 }, { "epoch": 1.2111815561959653, "grad_norm": 1.543411080095952, "learning_rate": 7.370719604414677e-07, "loss": 0.4454866647720337, "step": 5254 }, { "epoch": 1.2114121037463976, "grad_norm": 2.0913685445588013, "learning_rate": 7.367042220744917e-07, "loss": 0.45258912444114685, "step": 5255 }, { "epoch": 1.21164265129683, "grad_norm": 1.4734776431585603, "learning_rate": 7.36336521961653e-07, "loss": 0.38827258348464966, "step": 5256 }, { "epoch": 1.2118731988472622, "grad_norm": 1.3890787791174941, "learning_rate": 7.359688601563751e-07, "loss": 0.4223392605781555, "step": 5257 }, { "epoch": 1.2121037463976945, "grad_norm": 2.0074293946274, "learning_rate": 7.356012367120752e-07, "loss": 0.5090558528900146, "step": 5258 }, { "epoch": 1.2123342939481268, "grad_norm": 1.6974670853065486, "learning_rate": 7.352336516821654e-07, "loss": 0.457112193107605, "step": 5259 }, { "epoch": 1.212564841498559, "grad_norm": 1.7143853713755068, "learning_rate": 7.34866105120051e-07, "loss": 0.4801919162273407, "step": 5260 }, { "epoch": 1.2127953890489913, "grad_norm": 1.9161855479797785, "learning_rate": 7.344985970791337e-07, "loss": 0.4039991497993469, "step": 5261 }, { "epoch": 1.2130259365994236, "grad_norm": 1.6279032064972734, "learning_rate": 7.341311276128086e-07, "loss": 0.4903545677661896, "step": 5262 }, { "epoch": 1.2132564841498559, "grad_norm": 1.3609848879438589, "learning_rate": 7.337636967744642e-07, "loss": 0.42703670263290405, "step": 5263 }, { "epoch": 1.2134870317002882, "grad_norm": 1.3994720029391854, "learning_rate": 7.333963046174856e-07, "loss": 0.44627687335014343, "step": 5264 }, { "epoch": 1.2137175792507204, "grad_norm": 1.3128568673715006, "learning_rate": 7.330289511952505e-07, "loss": 0.4332897663116455, "step": 5265 }, { "epoch": 1.2139481268011527, "grad_norm": 1.637478754152211, "learning_rate": 7.326616365611312e-07, "loss": 0.3755282163619995, "step": 5266 }, { "epoch": 1.214178674351585, "grad_norm": 1.3904375127772393, "learning_rate": 7.322943607684955e-07, "loss": 0.43442171812057495, "step": 5267 }, { "epoch": 1.2144092219020173, "grad_norm": 1.6462684630654334, "learning_rate": 7.319271238707041e-07, "loss": 0.53243088722229, "step": 5268 }, { "epoch": 1.2146397694524496, "grad_norm": 1.6026015390069184, "learning_rate": 7.315599259211126e-07, "loss": 0.47449052333831787, "step": 5269 }, { "epoch": 1.2148703170028818, "grad_norm": 1.3730761035928816, "learning_rate": 7.311927669730718e-07, "loss": 0.4487804174423218, "step": 5270 }, { "epoch": 1.2151008645533141, "grad_norm": 1.4002622501825168, "learning_rate": 7.308256470799254e-07, "loss": 0.44249311089515686, "step": 5271 }, { "epoch": 1.2153314121037464, "grad_norm": 1.3523133908066987, "learning_rate": 7.304585662950124e-07, "loss": 0.5116596817970276, "step": 5272 }, { "epoch": 1.2155619596541787, "grad_norm": 1.5195812729956901, "learning_rate": 7.300915246716654e-07, "loss": 0.4131404161453247, "step": 5273 }, { "epoch": 1.215792507204611, "grad_norm": 1.381246861741987, "learning_rate": 7.297245222632124e-07, "loss": 0.5043983459472656, "step": 5274 }, { "epoch": 1.2160230547550432, "grad_norm": 1.637523206196005, "learning_rate": 7.293575591229748e-07, "loss": 0.5042402744293213, "step": 5275 }, { "epoch": 1.2162536023054755, "grad_norm": 1.5077498186665426, "learning_rate": 7.28990635304268e-07, "loss": 0.5686768293380737, "step": 5276 }, { "epoch": 1.2164841498559078, "grad_norm": 1.7943867829472084, "learning_rate": 7.286237508604029e-07, "loss": 0.48758572340011597, "step": 5277 }, { "epoch": 1.21671469740634, "grad_norm": 1.7282870483340211, "learning_rate": 7.282569058446839e-07, "loss": 0.5371814966201782, "step": 5278 }, { "epoch": 1.2169452449567724, "grad_norm": 1.4246190071563047, "learning_rate": 7.278901003104092e-07, "loss": 0.42574048042297363, "step": 5279 }, { "epoch": 1.2171757925072046, "grad_norm": 1.4665158384559256, "learning_rate": 7.275233343108725e-07, "loss": 0.4266633987426758, "step": 5280 }, { "epoch": 1.217406340057637, "grad_norm": 1.5187689250162444, "learning_rate": 7.271566078993608e-07, "loss": 0.5175603628158569, "step": 5281 }, { "epoch": 1.2176368876080692, "grad_norm": 1.4145262211461822, "learning_rate": 7.267899211291553e-07, "loss": 0.45306429266929626, "step": 5282 }, { "epoch": 1.2178674351585015, "grad_norm": 1.5722710553655959, "learning_rate": 7.264232740535326e-07, "loss": 0.44565948843955994, "step": 5283 }, { "epoch": 1.2180979827089338, "grad_norm": 1.7079312896564505, "learning_rate": 7.260566667257619e-07, "loss": 0.5211349129676819, "step": 5284 }, { "epoch": 1.218328530259366, "grad_norm": 1.5221889542953, "learning_rate": 7.256900991991078e-07, "loss": 0.4979493021965027, "step": 5285 }, { "epoch": 1.2185590778097983, "grad_norm": 1.8057148340658564, "learning_rate": 7.253235715268288e-07, "loss": 0.5332674980163574, "step": 5286 }, { "epoch": 1.2187896253602306, "grad_norm": 1.4882443630436493, "learning_rate": 7.249570837621773e-07, "loss": 0.49410900473594666, "step": 5287 }, { "epoch": 1.219020172910663, "grad_norm": 1.4338054427483777, "learning_rate": 7.245906359584007e-07, "loss": 0.37368130683898926, "step": 5288 }, { "epoch": 1.2192507204610952, "grad_norm": 1.4281840560624028, "learning_rate": 7.242242281687392e-07, "loss": 0.38786208629608154, "step": 5289 }, { "epoch": 1.2194812680115275, "grad_norm": 1.5093115457086763, "learning_rate": 7.238578604464286e-07, "loss": 0.3937080502510071, "step": 5290 }, { "epoch": 1.2197118155619597, "grad_norm": 1.3008901010675145, "learning_rate": 7.234915328446984e-07, "loss": 0.40712523460388184, "step": 5291 }, { "epoch": 1.219942363112392, "grad_norm": 1.513634738152884, "learning_rate": 7.231252454167718e-07, "loss": 0.4975137412548065, "step": 5292 }, { "epoch": 1.2201729106628243, "grad_norm": 1.9471699294165814, "learning_rate": 7.227589982158668e-07, "loss": 0.4989965558052063, "step": 5293 }, { "epoch": 1.2204034582132566, "grad_norm": 1.7515604771469182, "learning_rate": 7.223927912951957e-07, "loss": 0.46941643953323364, "step": 5294 }, { "epoch": 1.2206340057636889, "grad_norm": 1.4552925878868335, "learning_rate": 7.220266247079636e-07, "loss": 0.4827825129032135, "step": 5295 }, { "epoch": 1.220864553314121, "grad_norm": 1.640471954378316, "learning_rate": 7.216604985073715e-07, "loss": 0.5201072692871094, "step": 5296 }, { "epoch": 1.2210951008645532, "grad_norm": 1.5192370436913325, "learning_rate": 7.212944127466134e-07, "loss": 0.4791795015335083, "step": 5297 }, { "epoch": 1.2213256484149855, "grad_norm": 1.571406498612016, "learning_rate": 7.209283674788776e-07, "loss": 0.4800533056259155, "step": 5298 }, { "epoch": 1.2215561959654178, "grad_norm": 1.5414636998167182, "learning_rate": 7.205623627573474e-07, "loss": 0.39302635192871094, "step": 5299 }, { "epoch": 1.22178674351585, "grad_norm": 1.7227103984195933, "learning_rate": 7.201963986351985e-07, "loss": 0.4840422570705414, "step": 5300 }, { "epoch": 1.2220172910662823, "grad_norm": 1.5551647759093306, "learning_rate": 7.198304751656021e-07, "loss": 0.4697931110858917, "step": 5301 }, { "epoch": 1.2222478386167146, "grad_norm": 2.184299348287721, "learning_rate": 7.194645924017235e-07, "loss": 0.5836421847343445, "step": 5302 }, { "epoch": 1.2224783861671469, "grad_norm": 1.5210470262328177, "learning_rate": 7.190987503967211e-07, "loss": 0.4224538803100586, "step": 5303 }, { "epoch": 1.2227089337175792, "grad_norm": 1.5588545608663777, "learning_rate": 7.187329492037483e-07, "loss": 0.4659155011177063, "step": 5304 }, { "epoch": 1.2229394812680114, "grad_norm": 1.5975720556067594, "learning_rate": 7.183671888759515e-07, "loss": 0.5140712857246399, "step": 5305 }, { "epoch": 1.2231700288184437, "grad_norm": 1.5874727824945862, "learning_rate": 7.180014694664727e-07, "loss": 0.5154543519020081, "step": 5306 }, { "epoch": 1.223400576368876, "grad_norm": 1.446540356714536, "learning_rate": 7.17635791028447e-07, "loss": 0.47813570499420166, "step": 5307 }, { "epoch": 1.2236311239193083, "grad_norm": 1.6575633815006634, "learning_rate": 7.172701536150031e-07, "loss": 0.5261724591255188, "step": 5308 }, { "epoch": 1.2238616714697406, "grad_norm": 1.8019702671398061, "learning_rate": 7.169045572792649e-07, "loss": 0.4635971486568451, "step": 5309 }, { "epoch": 1.2240922190201728, "grad_norm": 1.262174656023695, "learning_rate": 7.165390020743497e-07, "loss": 0.46142834424972534, "step": 5310 }, { "epoch": 1.2243227665706051, "grad_norm": 1.4418390684347826, "learning_rate": 7.161734880533683e-07, "loss": 0.47900235652923584, "step": 5311 }, { "epoch": 1.2245533141210374, "grad_norm": 1.473019079046211, "learning_rate": 7.15808015269427e-07, "loss": 0.5513979196548462, "step": 5312 }, { "epoch": 1.2247838616714697, "grad_norm": 2.0803994243808637, "learning_rate": 7.154425837756244e-07, "loss": 0.46302229166030884, "step": 5313 }, { "epoch": 1.225014409221902, "grad_norm": 1.468383069657206, "learning_rate": 7.150771936250539e-07, "loss": 0.5822727680206299, "step": 5314 }, { "epoch": 1.2252449567723342, "grad_norm": 1.2990824770630092, "learning_rate": 7.147118448708039e-07, "loss": 0.37017765641212463, "step": 5315 }, { "epoch": 1.2254755043227665, "grad_norm": 1.285655707049756, "learning_rate": 7.143465375659545e-07, "loss": 0.44237178564071655, "step": 5316 }, { "epoch": 1.2257060518731988, "grad_norm": 1.6766031200731948, "learning_rate": 7.13981271763582e-07, "loss": 0.5030481815338135, "step": 5317 }, { "epoch": 1.225936599423631, "grad_norm": 1.5702404213513188, "learning_rate": 7.136160475167547e-07, "loss": 0.5518827438354492, "step": 5318 }, { "epoch": 1.2261671469740634, "grad_norm": 1.251114193413058, "learning_rate": 7.132508648785369e-07, "loss": 0.5090180039405823, "step": 5319 }, { "epoch": 1.2263976945244957, "grad_norm": 1.7345902549723513, "learning_rate": 7.128857239019857e-07, "loss": 0.495076060295105, "step": 5320 }, { "epoch": 1.226628242074928, "grad_norm": 1.5675333212129947, "learning_rate": 7.125206246401514e-07, "loss": 0.530053973197937, "step": 5321 }, { "epoch": 1.2268587896253602, "grad_norm": 1.2898853274906796, "learning_rate": 7.121555671460802e-07, "loss": 0.46412545442581177, "step": 5322 }, { "epoch": 1.2270893371757925, "grad_norm": 1.5765192599645659, "learning_rate": 7.117905514728107e-07, "loss": 0.443530797958374, "step": 5323 }, { "epoch": 1.2273198847262248, "grad_norm": 1.6711155634725154, "learning_rate": 7.114255776733755e-07, "loss": 0.4647101163864136, "step": 5324 }, { "epoch": 1.227550432276657, "grad_norm": 1.547419086723459, "learning_rate": 7.110606458008023e-07, "loss": 0.45051440596580505, "step": 5325 }, { "epoch": 1.2277809798270893, "grad_norm": 1.6266032223725158, "learning_rate": 7.106957559081115e-07, "loss": 0.5258926153182983, "step": 5326 }, { "epoch": 1.2280115273775216, "grad_norm": 1.6807374525723655, "learning_rate": 7.103309080483173e-07, "loss": 0.39645054936408997, "step": 5327 }, { "epoch": 1.228242074927954, "grad_norm": 1.5875759565882142, "learning_rate": 7.099661022744294e-07, "loss": 0.48216918110847473, "step": 5328 }, { "epoch": 1.2284726224783862, "grad_norm": 1.4750837464060895, "learning_rate": 7.096013386394493e-07, "loss": 0.42962855100631714, "step": 5329 }, { "epoch": 1.2287031700288185, "grad_norm": 1.613399761856971, "learning_rate": 7.092366171963738e-07, "loss": 0.5269042253494263, "step": 5330 }, { "epoch": 1.2289337175792507, "grad_norm": 1.831645544729853, "learning_rate": 7.088719379981932e-07, "loss": 0.47110509872436523, "step": 5331 }, { "epoch": 1.229164265129683, "grad_norm": 1.4339565179319511, "learning_rate": 7.085073010978915e-07, "loss": 0.45334869623184204, "step": 5332 }, { "epoch": 1.2293948126801153, "grad_norm": 1.4131421708681255, "learning_rate": 7.081427065484467e-07, "loss": 0.5029184818267822, "step": 5333 }, { "epoch": 1.2296253602305476, "grad_norm": 1.7892961583851241, "learning_rate": 7.0777815440283e-07, "loss": 0.5292627215385437, "step": 5334 }, { "epoch": 1.2298559077809799, "grad_norm": 1.5276984778361233, "learning_rate": 7.074136447140077e-07, "loss": 0.4436877965927124, "step": 5335 }, { "epoch": 1.2300864553314121, "grad_norm": 1.6707368087722814, "learning_rate": 7.070491775349396e-07, "loss": 0.4915885925292969, "step": 5336 }, { "epoch": 1.2303170028818444, "grad_norm": 1.4733414399918665, "learning_rate": 7.066847529185779e-07, "loss": 0.4028368592262268, "step": 5337 }, { "epoch": 1.2305475504322767, "grad_norm": 1.56972201812435, "learning_rate": 7.063203709178704e-07, "loss": 0.41268208622932434, "step": 5338 }, { "epoch": 1.230778097982709, "grad_norm": 1.494676374710187, "learning_rate": 7.059560315857585e-07, "loss": 0.4848160743713379, "step": 5339 }, { "epoch": 1.2310086455331413, "grad_norm": 1.6665066281678116, "learning_rate": 7.055917349751755e-07, "loss": 0.43197691440582275, "step": 5340 }, { "epoch": 1.2312391930835735, "grad_norm": 1.5221352439773972, "learning_rate": 7.052274811390514e-07, "loss": 0.4859619736671448, "step": 5341 }, { "epoch": 1.2314697406340058, "grad_norm": 1.8033359613530964, "learning_rate": 7.048632701303075e-07, "loss": 0.43828168511390686, "step": 5342 }, { "epoch": 1.231700288184438, "grad_norm": 1.465814284332743, "learning_rate": 7.044991020018601e-07, "loss": 0.4256266951560974, "step": 5343 }, { "epoch": 1.2319308357348704, "grad_norm": 1.6334021582177483, "learning_rate": 7.041349768066196e-07, "loss": 0.5345013737678528, "step": 5344 }, { "epoch": 1.2321613832853027, "grad_norm": 1.5664723740177018, "learning_rate": 7.037708945974887e-07, "loss": 0.46685951948165894, "step": 5345 }, { "epoch": 1.232391930835735, "grad_norm": 1.8414903460242973, "learning_rate": 7.034068554273653e-07, "loss": 0.4725074768066406, "step": 5346 }, { "epoch": 1.2326224783861672, "grad_norm": 1.447512025177934, "learning_rate": 7.030428593491407e-07, "loss": 0.4719870984554291, "step": 5347 }, { "epoch": 1.2328530259365995, "grad_norm": 1.5137035821770004, "learning_rate": 7.026789064156992e-07, "loss": 0.47967803478240967, "step": 5348 }, { "epoch": 1.2330835734870318, "grad_norm": 1.7079828842976972, "learning_rate": 7.023149966799198e-07, "loss": 0.5108177065849304, "step": 5349 }, { "epoch": 1.233314121037464, "grad_norm": 1.8907292823913628, "learning_rate": 7.019511301946743e-07, "loss": 0.5461745262145996, "step": 5350 }, { "epoch": 1.2335446685878964, "grad_norm": 1.6940587927951292, "learning_rate": 7.015873070128292e-07, "loss": 0.5612732172012329, "step": 5351 }, { "epoch": 1.2337752161383286, "grad_norm": 1.477660087960271, "learning_rate": 7.012235271872443e-07, "loss": 0.4839070439338684, "step": 5352 }, { "epoch": 1.234005763688761, "grad_norm": 1.5409923852543361, "learning_rate": 7.008597907707724e-07, "loss": 0.4188167452812195, "step": 5353 }, { "epoch": 1.2342363112391932, "grad_norm": 1.6513108999988988, "learning_rate": 7.004960978162617e-07, "loss": 0.49374920129776, "step": 5354 }, { "epoch": 1.2344668587896255, "grad_norm": 1.5545279058414754, "learning_rate": 7.001324483765515e-07, "loss": 0.5485600829124451, "step": 5355 }, { "epoch": 1.2346974063400578, "grad_norm": 1.5209405886137708, "learning_rate": 6.997688425044772e-07, "loss": 0.48566675186157227, "step": 5356 }, { "epoch": 1.23492795389049, "grad_norm": 1.6039735368693278, "learning_rate": 6.994052802528674e-07, "loss": 0.41277337074279785, "step": 5357 }, { "epoch": 1.235158501440922, "grad_norm": 1.4736114676774628, "learning_rate": 6.990417616745428e-07, "loss": 0.38438326120376587, "step": 5358 }, { "epoch": 1.2353890489913544, "grad_norm": 1.7030181330099163, "learning_rate": 6.986782868223194e-07, "loss": 0.4412330389022827, "step": 5359 }, { "epoch": 1.2356195965417867, "grad_norm": 1.7135416795794967, "learning_rate": 6.983148557490069e-07, "loss": 0.4721234142780304, "step": 5360 }, { "epoch": 1.235850144092219, "grad_norm": 1.2725652745663298, "learning_rate": 6.979514685074069e-07, "loss": 0.37214499711990356, "step": 5361 }, { "epoch": 1.2360806916426512, "grad_norm": 1.6936901145141183, "learning_rate": 6.975881251503168e-07, "loss": 0.5757625699043274, "step": 5362 }, { "epoch": 1.2363112391930835, "grad_norm": 1.5702972159481126, "learning_rate": 6.972248257305261e-07, "loss": 0.47131651639938354, "step": 5363 }, { "epoch": 1.2365417867435158, "grad_norm": 1.4382156565054938, "learning_rate": 6.968615703008181e-07, "loss": 0.4582099914550781, "step": 5364 }, { "epoch": 1.236772334293948, "grad_norm": 1.4379269165858373, "learning_rate": 6.96498358913971e-07, "loss": 0.4932486116886139, "step": 5365 }, { "epoch": 1.2370028818443803, "grad_norm": 1.4366623990202567, "learning_rate": 6.961351916227549e-07, "loss": 0.44992512464523315, "step": 5366 }, { "epoch": 1.2372334293948126, "grad_norm": 1.4889934853931641, "learning_rate": 6.957720684799342e-07, "loss": 0.5043992400169373, "step": 5367 }, { "epoch": 1.237463976945245, "grad_norm": 1.4017599596662496, "learning_rate": 6.954089895382675e-07, "loss": 0.44705575704574585, "step": 5368 }, { "epoch": 1.2376945244956772, "grad_norm": 1.8731873123119112, "learning_rate": 6.950459548505057e-07, "loss": 0.48370909690856934, "step": 5369 }, { "epoch": 1.2379250720461095, "grad_norm": 1.4064067860312528, "learning_rate": 6.946829644693947e-07, "loss": 0.4326665997505188, "step": 5370 }, { "epoch": 1.2381556195965417, "grad_norm": 1.9138397041169513, "learning_rate": 6.943200184476723e-07, "loss": 0.5717728137969971, "step": 5371 }, { "epoch": 1.238386167146974, "grad_norm": 1.6265922150082073, "learning_rate": 6.939571168380715e-07, "loss": 0.4615858793258667, "step": 5372 }, { "epoch": 1.2386167146974063, "grad_norm": 1.6132244275345642, "learning_rate": 6.935942596933181e-07, "loss": 0.5257784724235535, "step": 5373 }, { "epoch": 1.2388472622478386, "grad_norm": 1.4477940177516195, "learning_rate": 6.932314470661309e-07, "loss": 0.5333257913589478, "step": 5374 }, { "epoch": 1.2390778097982709, "grad_norm": 1.4168682949552263, "learning_rate": 6.928686790092234e-07, "loss": 0.4580482244491577, "step": 5375 }, { "epoch": 1.2393083573487031, "grad_norm": 1.583728497558373, "learning_rate": 6.925059555753021e-07, "loss": 0.4775884449481964, "step": 5376 }, { "epoch": 1.2395389048991354, "grad_norm": 1.5687995312552376, "learning_rate": 6.921432768170661e-07, "loss": 0.44678401947021484, "step": 5377 }, { "epoch": 1.2397694524495677, "grad_norm": 1.4949610582315778, "learning_rate": 6.917806427872099e-07, "loss": 0.4390775263309479, "step": 5378 }, { "epoch": 1.24, "grad_norm": 1.5677366458650814, "learning_rate": 6.914180535384198e-07, "loss": 0.4696844518184662, "step": 5379 }, { "epoch": 1.2402305475504323, "grad_norm": 1.6121063432311435, "learning_rate": 6.910555091233761e-07, "loss": 0.560876727104187, "step": 5380 }, { "epoch": 1.2404610951008646, "grad_norm": 1.5109825976981524, "learning_rate": 6.906930095947537e-07, "loss": 0.5124620795249939, "step": 5381 }, { "epoch": 1.2406916426512968, "grad_norm": 1.3305908427691708, "learning_rate": 6.903305550052187e-07, "loss": 0.4296848177909851, "step": 5382 }, { "epoch": 1.2409221902017291, "grad_norm": 1.4071369163278793, "learning_rate": 6.899681454074327e-07, "loss": 0.40255898237228394, "step": 5383 }, { "epoch": 1.2411527377521614, "grad_norm": 1.7565777753092369, "learning_rate": 6.896057808540505e-07, "loss": 0.4180254340171814, "step": 5384 }, { "epoch": 1.2413832853025937, "grad_norm": 1.4270834637818937, "learning_rate": 6.892434613977189e-07, "loss": 0.41249316930770874, "step": 5385 }, { "epoch": 1.241613832853026, "grad_norm": 1.9199945589002845, "learning_rate": 6.8888118709108e-07, "loss": 0.44984108209609985, "step": 5386 }, { "epoch": 1.2418443804034582, "grad_norm": 1.5651965928462388, "learning_rate": 6.885189579867677e-07, "loss": 0.4913838505744934, "step": 5387 }, { "epoch": 1.2420749279538905, "grad_norm": 1.8945057573554496, "learning_rate": 6.881567741374107e-07, "loss": 0.6019924879074097, "step": 5388 }, { "epoch": 1.2423054755043228, "grad_norm": 1.3856621078353595, "learning_rate": 6.877946355956305e-07, "loss": 0.5212692618370056, "step": 5389 }, { "epoch": 1.242536023054755, "grad_norm": 1.5912853207047202, "learning_rate": 6.874325424140417e-07, "loss": 0.49732527136802673, "step": 5390 }, { "epoch": 1.2427665706051874, "grad_norm": 1.6221741050482537, "learning_rate": 6.87070494645253e-07, "loss": 0.44408339262008667, "step": 5391 }, { "epoch": 1.2429971181556196, "grad_norm": 1.584527881503366, "learning_rate": 6.867084923418663e-07, "loss": 0.43635284900665283, "step": 5392 }, { "epoch": 1.243227665706052, "grad_norm": 1.382962686107236, "learning_rate": 6.863465355564761e-07, "loss": 0.48582303524017334, "step": 5393 }, { "epoch": 1.2434582132564842, "grad_norm": 1.574457872856851, "learning_rate": 6.85984624341672e-07, "loss": 0.4211381673812866, "step": 5394 }, { "epoch": 1.2436887608069165, "grad_norm": 1.5127608845686662, "learning_rate": 6.85622758750035e-07, "loss": 0.47154808044433594, "step": 5395 }, { "epoch": 1.2439193083573488, "grad_norm": 1.2959619642815412, "learning_rate": 6.852609388341406e-07, "loss": 0.3881720304489136, "step": 5396 }, { "epoch": 1.244149855907781, "grad_norm": 1.8465773987676242, "learning_rate": 6.84899164646558e-07, "loss": 0.4934437870979309, "step": 5397 }, { "epoch": 1.2443804034582133, "grad_norm": 1.6728854789305907, "learning_rate": 6.845374362398486e-07, "loss": 0.49199211597442627, "step": 5398 }, { "epoch": 1.2446109510086456, "grad_norm": 1.667169706269872, "learning_rate": 6.841757536665683e-07, "loss": 0.5011521577835083, "step": 5399 }, { "epoch": 1.2448414985590779, "grad_norm": 1.5545634113819367, "learning_rate": 6.83814116979265e-07, "loss": 0.4599594473838806, "step": 5400 }, { "epoch": 1.2450720461095102, "grad_norm": 2.335132416331749, "learning_rate": 6.834525262304817e-07, "loss": 0.5006797313690186, "step": 5401 }, { "epoch": 1.2453025936599424, "grad_norm": 1.7500701936339498, "learning_rate": 6.830909814727534e-07, "loss": 0.44420236349105835, "step": 5402 }, { "epoch": 1.2455331412103747, "grad_norm": 1.41432328248269, "learning_rate": 6.827294827586086e-07, "loss": 0.4200541377067566, "step": 5403 }, { "epoch": 1.245763688760807, "grad_norm": 1.4530188886581012, "learning_rate": 6.823680301405693e-07, "loss": 0.4684341251850128, "step": 5404 }, { "epoch": 1.2459942363112393, "grad_norm": 1.5805133579379969, "learning_rate": 6.820066236711514e-07, "loss": 0.5357221364974976, "step": 5405 }, { "epoch": 1.2462247838616713, "grad_norm": 1.4518037927097922, "learning_rate": 6.816452634028626e-07, "loss": 0.4230891168117523, "step": 5406 }, { "epoch": 1.2464553314121036, "grad_norm": 1.5147135801771385, "learning_rate": 6.812839493882056e-07, "loss": 0.4489879608154297, "step": 5407 }, { "epoch": 1.246685878962536, "grad_norm": 1.4193036604440596, "learning_rate": 6.80922681679675e-07, "loss": 0.4748901128768921, "step": 5408 }, { "epoch": 1.2469164265129682, "grad_norm": 1.3796380805346808, "learning_rate": 6.805614603297594e-07, "loss": 0.44227588176727295, "step": 5409 }, { "epoch": 1.2471469740634005, "grad_norm": 1.4650703004037071, "learning_rate": 6.802002853909408e-07, "loss": 0.42565715312957764, "step": 5410 }, { "epoch": 1.2473775216138328, "grad_norm": 1.2687998722413263, "learning_rate": 6.798391569156938e-07, "loss": 0.4466899633407593, "step": 5411 }, { "epoch": 1.247608069164265, "grad_norm": 1.4877290529879426, "learning_rate": 6.794780749564865e-07, "loss": 0.45467138290405273, "step": 5412 }, { "epoch": 1.2478386167146973, "grad_norm": 1.587000793967483, "learning_rate": 6.79117039565781e-07, "loss": 0.4968474507331848, "step": 5413 }, { "epoch": 1.2480691642651296, "grad_norm": 1.7383476070225938, "learning_rate": 6.787560507960315e-07, "loss": 0.4603409171104431, "step": 5414 }, { "epoch": 1.2482997118155619, "grad_norm": 1.812747956168159, "learning_rate": 6.783951086996859e-07, "loss": 0.5013781785964966, "step": 5415 }, { "epoch": 1.2485302593659942, "grad_norm": 1.3508050811528929, "learning_rate": 6.780342133291853e-07, "loss": 0.42432457208633423, "step": 5416 }, { "epoch": 1.2487608069164264, "grad_norm": 1.7613681947256843, "learning_rate": 6.776733647369642e-07, "loss": 0.4237065315246582, "step": 5417 }, { "epoch": 1.2489913544668587, "grad_norm": 1.3175927182751024, "learning_rate": 6.773125629754503e-07, "loss": 0.41611090302467346, "step": 5418 }, { "epoch": 1.249221902017291, "grad_norm": 1.7410881231831246, "learning_rate": 6.769518080970639e-07, "loss": 0.44240689277648926, "step": 5419 }, { "epoch": 1.2494524495677233, "grad_norm": 1.2785667812742771, "learning_rate": 6.765911001542193e-07, "loss": 0.45134061574935913, "step": 5420 }, { "epoch": 1.2496829971181556, "grad_norm": 1.9175453690913649, "learning_rate": 6.762304391993237e-07, "loss": 0.5218105316162109, "step": 5421 }, { "epoch": 1.2499135446685878, "grad_norm": 1.6363309057276445, "learning_rate": 6.758698252847768e-07, "loss": 0.4311027228832245, "step": 5422 }, { "epoch": 1.2501440922190201, "grad_norm": 1.5244203394981957, "learning_rate": 6.755092584629727e-07, "loss": 0.4937070310115814, "step": 5423 }, { "epoch": 1.2503746397694524, "grad_norm": 1.4854060603156747, "learning_rate": 6.751487387862975e-07, "loss": 0.4381704330444336, "step": 5424 }, { "epoch": 1.2506051873198847, "grad_norm": 1.7381307437192484, "learning_rate": 6.747882663071312e-07, "loss": 0.4766447842121124, "step": 5425 }, { "epoch": 1.250835734870317, "grad_norm": 1.5211127182293511, "learning_rate": 6.74427841077847e-07, "loss": 0.41801929473876953, "step": 5426 }, { "epoch": 1.2510662824207492, "grad_norm": 1.5333588514457408, "learning_rate": 6.740674631508105e-07, "loss": 0.42379114031791687, "step": 5427 }, { "epoch": 1.2512968299711815, "grad_norm": 1.6293048691492493, "learning_rate": 6.737071325783806e-07, "loss": 0.4198606014251709, "step": 5428 }, { "epoch": 1.2515273775216138, "grad_norm": 1.6141332879533556, "learning_rate": 6.733468494129105e-07, "loss": 0.4047771692276001, "step": 5429 }, { "epoch": 1.251757925072046, "grad_norm": 1.5300772739078217, "learning_rate": 6.729866137067449e-07, "loss": 0.5023326873779297, "step": 5430 }, { "epoch": 1.2519884726224784, "grad_norm": 1.6198465774895092, "learning_rate": 6.726264255122227e-07, "loss": 0.34151679277420044, "step": 5431 }, { "epoch": 1.2522190201729106, "grad_norm": 1.486645506129829, "learning_rate": 6.722662848816748e-07, "loss": 0.38678500056266785, "step": 5432 }, { "epoch": 1.252449567723343, "grad_norm": 1.7272506827447611, "learning_rate": 6.719061918674267e-07, "loss": 0.49455368518829346, "step": 5433 }, { "epoch": 1.2526801152737752, "grad_norm": 1.4314837349274792, "learning_rate": 6.715461465217959e-07, "loss": 0.42544132471084595, "step": 5434 }, { "epoch": 1.2529106628242075, "grad_norm": 1.7176926108989403, "learning_rate": 6.711861488970927e-07, "loss": 0.4194113612174988, "step": 5435 }, { "epoch": 1.2531412103746398, "grad_norm": 1.4888625044157147, "learning_rate": 6.708261990456219e-07, "loss": 0.4825857877731323, "step": 5436 }, { "epoch": 1.253371757925072, "grad_norm": 1.9183567280059182, "learning_rate": 6.704662970196801e-07, "loss": 0.5231322050094604, "step": 5437 }, { "epoch": 1.2536023054755043, "grad_norm": 1.5966670163534955, "learning_rate": 6.701064428715568e-07, "loss": 0.4393799304962158, "step": 5438 }, { "epoch": 1.2538328530259366, "grad_norm": 1.441480411219716, "learning_rate": 6.69746636653536e-07, "loss": 0.4439913034439087, "step": 5439 }, { "epoch": 1.254063400576369, "grad_norm": 1.3682222278508178, "learning_rate": 6.693868784178933e-07, "loss": 0.44157153367996216, "step": 5440 }, { "epoch": 1.2542939481268012, "grad_norm": 1.5181266642772941, "learning_rate": 6.690271682168976e-07, "loss": 0.4028700292110443, "step": 5441 }, { "epoch": 1.2545244956772335, "grad_norm": 1.4208577251896788, "learning_rate": 6.686675061028115e-07, "loss": 0.44558918476104736, "step": 5442 }, { "epoch": 1.2547550432276657, "grad_norm": 1.6092471535381658, "learning_rate": 6.6830789212789e-07, "loss": 0.44742459058761597, "step": 5443 }, { "epoch": 1.254985590778098, "grad_norm": 1.5896606564493474, "learning_rate": 6.679483263443813e-07, "loss": 0.48470282554626465, "step": 5444 }, { "epoch": 1.2552161383285303, "grad_norm": 1.8312792387216397, "learning_rate": 6.675888088045263e-07, "loss": 0.4570988416671753, "step": 5445 }, { "epoch": 1.2554466858789626, "grad_norm": 1.838942069663871, "learning_rate": 6.672293395605595e-07, "loss": 0.3854732811450958, "step": 5446 }, { "epoch": 1.2556772334293949, "grad_norm": 1.6067053822119532, "learning_rate": 6.66869918664708e-07, "loss": 0.572611927986145, "step": 5447 }, { "epoch": 1.2559077809798271, "grad_norm": 1.765302400736949, "learning_rate": 6.665105461691916e-07, "loss": 0.4152478873729706, "step": 5448 }, { "epoch": 1.2561383285302594, "grad_norm": 1.6331380960048347, "learning_rate": 6.661512221262237e-07, "loss": 0.5350701808929443, "step": 5449 }, { "epoch": 1.2563688760806917, "grad_norm": 1.6537785251879598, "learning_rate": 6.657919465880106e-07, "loss": 0.510172963142395, "step": 5450 }, { "epoch": 1.256599423631124, "grad_norm": 1.3958498055611923, "learning_rate": 6.654327196067504e-07, "loss": 0.4707493782043457, "step": 5451 }, { "epoch": 1.2568299711815563, "grad_norm": 1.4416810291315851, "learning_rate": 6.650735412346361e-07, "loss": 0.4108200967311859, "step": 5452 }, { "epoch": 1.2570605187319885, "grad_norm": 1.460053318153373, "learning_rate": 6.647144115238519e-07, "loss": 0.5222622752189636, "step": 5453 }, { "epoch": 1.2572910662824208, "grad_norm": 1.6015667511775546, "learning_rate": 6.643553305265755e-07, "loss": 0.425457239151001, "step": 5454 }, { "epoch": 1.257521613832853, "grad_norm": 1.8741374124254828, "learning_rate": 6.639962982949785e-07, "loss": 0.49435800313949585, "step": 5455 }, { "epoch": 1.2577521613832854, "grad_norm": 1.6022646309557353, "learning_rate": 6.636373148812237e-07, "loss": 0.5040857791900635, "step": 5456 }, { "epoch": 1.2579827089337177, "grad_norm": 1.476700783342049, "learning_rate": 6.632783803374678e-07, "loss": 0.4675745368003845, "step": 5457 }, { "epoch": 1.25821325648415, "grad_norm": 1.900550198001673, "learning_rate": 6.629194947158606e-07, "loss": 0.5321175456047058, "step": 5458 }, { "epoch": 1.2584438040345822, "grad_norm": 1.3273206827091684, "learning_rate": 6.625606580685442e-07, "loss": 0.4415740370750427, "step": 5459 }, { "epoch": 1.2586743515850145, "grad_norm": 1.5235733720363593, "learning_rate": 6.622018704476539e-07, "loss": 0.48481184244155884, "step": 5460 }, { "epoch": 1.2589048991354468, "grad_norm": 1.74947332771249, "learning_rate": 6.618431319053176e-07, "loss": 0.46835392713546753, "step": 5461 }, { "epoch": 1.259135446685879, "grad_norm": 1.3284706762425806, "learning_rate": 6.614844424936566e-07, "loss": 0.4376718997955322, "step": 5462 }, { "epoch": 1.2593659942363113, "grad_norm": 1.558886627315865, "learning_rate": 6.611258022647847e-07, "loss": 0.4625728130340576, "step": 5463 }, { "epoch": 1.2595965417867436, "grad_norm": 1.6370741370932307, "learning_rate": 6.607672112708081e-07, "loss": 0.44812503457069397, "step": 5464 }, { "epoch": 1.259827089337176, "grad_norm": 1.6367384240246787, "learning_rate": 6.60408669563827e-07, "loss": 0.46147626638412476, "step": 5465 }, { "epoch": 1.2600576368876082, "grad_norm": 1.4309654266560352, "learning_rate": 6.600501771959337e-07, "loss": 0.45850062370300293, "step": 5466 }, { "epoch": 1.2602881844380405, "grad_norm": 1.5248045750601964, "learning_rate": 6.596917342192129e-07, "loss": 0.47113144397735596, "step": 5467 }, { "epoch": 1.2605187319884728, "grad_norm": 1.7668685332089469, "learning_rate": 6.593333406857435e-07, "loss": 0.4846932888031006, "step": 5468 }, { "epoch": 1.260749279538905, "grad_norm": 1.5362515481003414, "learning_rate": 6.589749966475951e-07, "loss": 0.4357692003250122, "step": 5469 }, { "epoch": 1.2609798270893373, "grad_norm": 1.3245508183347572, "learning_rate": 6.586167021568323e-07, "loss": 0.44653600454330444, "step": 5470 }, { "epoch": 1.2612103746397694, "grad_norm": 1.5525959738010278, "learning_rate": 6.582584572655118e-07, "loss": 0.43938639760017395, "step": 5471 }, { "epoch": 1.2614409221902017, "grad_norm": 1.3892882632769337, "learning_rate": 6.579002620256817e-07, "loss": 0.4045637845993042, "step": 5472 }, { "epoch": 1.261671469740634, "grad_norm": 1.6336701340291093, "learning_rate": 6.575421164893849e-07, "loss": 0.483223557472229, "step": 5473 }, { "epoch": 1.2619020172910662, "grad_norm": 1.6770649329919185, "learning_rate": 6.571840207086565e-07, "loss": 0.51056307554245, "step": 5474 }, { "epoch": 1.2621325648414985, "grad_norm": 1.607498257265911, "learning_rate": 6.568259747355233e-07, "loss": 0.5062652826309204, "step": 5475 }, { "epoch": 1.2623631123919308, "grad_norm": 1.82615736140384, "learning_rate": 6.564679786220062e-07, "loss": 0.46831825375556946, "step": 5476 }, { "epoch": 1.262593659942363, "grad_norm": 1.5711366050335192, "learning_rate": 6.56110032420118e-07, "loss": 0.4801711440086365, "step": 5477 }, { "epoch": 1.2628242074927953, "grad_norm": 1.687813586503547, "learning_rate": 6.557521361818643e-07, "loss": 0.41865023970603943, "step": 5478 }, { "epoch": 1.2630547550432276, "grad_norm": 1.4052825771397335, "learning_rate": 6.553942899592446e-07, "loss": 0.4309377074241638, "step": 5479 }, { "epoch": 1.26328530259366, "grad_norm": 1.455269980692782, "learning_rate": 6.550364938042496e-07, "loss": 0.44679516553878784, "step": 5480 }, { "epoch": 1.2635158501440922, "grad_norm": 1.4446890497651368, "learning_rate": 6.546787477688631e-07, "loss": 0.40785765647888184, "step": 5481 }, { "epoch": 1.2637463976945245, "grad_norm": 1.6383514003925148, "learning_rate": 6.543210519050628e-07, "loss": 0.43477606773376465, "step": 5482 }, { "epoch": 1.2639769452449567, "grad_norm": 1.5580217211873189, "learning_rate": 6.539634062648174e-07, "loss": 0.4252205193042755, "step": 5483 }, { "epoch": 1.264207492795389, "grad_norm": 1.5719265387067087, "learning_rate": 6.536058109000895e-07, "loss": 0.5158742666244507, "step": 5484 }, { "epoch": 1.2644380403458213, "grad_norm": 1.7733816595451595, "learning_rate": 6.532482658628333e-07, "loss": 0.5523275136947632, "step": 5485 }, { "epoch": 1.2646685878962536, "grad_norm": 1.9166044512792477, "learning_rate": 6.528907712049971e-07, "loss": 0.45393699407577515, "step": 5486 }, { "epoch": 1.2648991354466859, "grad_norm": 1.4684258251388687, "learning_rate": 6.525333269785213e-07, "loss": 0.3747859001159668, "step": 5487 }, { "epoch": 1.2651296829971181, "grad_norm": 1.5546958152903012, "learning_rate": 6.521759332353381e-07, "loss": 0.5249335169792175, "step": 5488 }, { "epoch": 1.2653602305475504, "grad_norm": 1.3641169125497614, "learning_rate": 6.518185900273736e-07, "loss": 0.4011837840080261, "step": 5489 }, { "epoch": 1.2655907780979827, "grad_norm": 1.7629229180871924, "learning_rate": 6.514612974065459e-07, "loss": 0.47748851776123047, "step": 5490 }, { "epoch": 1.265821325648415, "grad_norm": 2.008372903254068, "learning_rate": 6.511040554247655e-07, "loss": 0.5392353534698486, "step": 5491 }, { "epoch": 1.2660518731988473, "grad_norm": 1.5145152277351324, "learning_rate": 6.507468641339371e-07, "loss": 0.4395901560783386, "step": 5492 }, { "epoch": 1.2662824207492795, "grad_norm": 1.6896616144085306, "learning_rate": 6.503897235859556e-07, "loss": 0.4864005148410797, "step": 5493 }, { "epoch": 1.2665129682997118, "grad_norm": 1.964853072629702, "learning_rate": 6.500326338327104e-07, "loss": 0.4969862103462219, "step": 5494 }, { "epoch": 1.266743515850144, "grad_norm": 1.6281912350437215, "learning_rate": 6.49675594926083e-07, "loss": 0.5065462589263916, "step": 5495 }, { "epoch": 1.2669740634005764, "grad_norm": 1.629865686272151, "learning_rate": 6.493186069179474e-07, "loss": 0.4453302025794983, "step": 5496 }, { "epoch": 1.2672046109510087, "grad_norm": 1.6609285038031356, "learning_rate": 6.489616698601701e-07, "loss": 0.4587283730506897, "step": 5497 }, { "epoch": 1.267435158501441, "grad_norm": 1.5797490262179883, "learning_rate": 6.486047838046102e-07, "loss": 0.5289033055305481, "step": 5498 }, { "epoch": 1.2676657060518732, "grad_norm": 1.6771196950319116, "learning_rate": 6.482479488031199e-07, "loss": 0.46285438537597656, "step": 5499 }, { "epoch": 1.2678962536023055, "grad_norm": 1.5333241787291638, "learning_rate": 6.478911649075434e-07, "loss": 0.44040411710739136, "step": 5500 }, { "epoch": 1.2681268011527378, "grad_norm": 1.337665135823913, "learning_rate": 6.475344321697175e-07, "loss": 0.4678620994091034, "step": 5501 }, { "epoch": 1.26835734870317, "grad_norm": 1.7348492481978741, "learning_rate": 6.471777506414721e-07, "loss": 0.4077882170677185, "step": 5502 }, { "epoch": 1.2685878962536024, "grad_norm": 1.4599912002094135, "learning_rate": 6.468211203746294e-07, "loss": 0.476462721824646, "step": 5503 }, { "epoch": 1.2688184438040346, "grad_norm": 1.955778831188656, "learning_rate": 6.464645414210036e-07, "loss": 0.47313517332077026, "step": 5504 }, { "epoch": 1.269048991354467, "grad_norm": 1.600431181416076, "learning_rate": 6.461080138324025e-07, "loss": 0.46002405881881714, "step": 5505 }, { "epoch": 1.2692795389048992, "grad_norm": 1.471883891355057, "learning_rate": 6.457515376606253e-07, "loss": 0.49490875005722046, "step": 5506 }, { "epoch": 1.2695100864553315, "grad_norm": 1.9073348208275467, "learning_rate": 6.453951129574643e-07, "loss": 0.3652802109718323, "step": 5507 }, { "epoch": 1.2697406340057638, "grad_norm": 1.795903663355042, "learning_rate": 6.450387397747049e-07, "loss": 0.43633294105529785, "step": 5508 }, { "epoch": 1.269971181556196, "grad_norm": 1.581833052782087, "learning_rate": 6.446824181641239e-07, "loss": 0.4316346049308777, "step": 5509 }, { "epoch": 1.2702017291066283, "grad_norm": 1.4893056012571666, "learning_rate": 6.443261481774909e-07, "loss": 0.4697961211204529, "step": 5510 }, { "epoch": 1.2704322766570606, "grad_norm": 1.4744438084114087, "learning_rate": 6.439699298665691e-07, "loss": 0.3447864055633545, "step": 5511 }, { "epoch": 1.2706628242074927, "grad_norm": 1.4930172335593572, "learning_rate": 6.436137632831124e-07, "loss": 0.42046308517456055, "step": 5512 }, { "epoch": 1.270893371757925, "grad_norm": 1.9205238969624363, "learning_rate": 6.43257648478869e-07, "loss": 0.5696559548377991, "step": 5513 }, { "epoch": 1.2711239193083572, "grad_norm": 1.5834811016484576, "learning_rate": 6.429015855055775e-07, "loss": 0.37561696767807007, "step": 5514 }, { "epoch": 1.2713544668587895, "grad_norm": 1.7696273804871432, "learning_rate": 6.425455744149711e-07, "loss": 0.4987408518791199, "step": 5515 }, { "epoch": 1.2715850144092218, "grad_norm": 1.5472001628110608, "learning_rate": 6.421896152587745e-07, "loss": 0.41388893127441406, "step": 5516 }, { "epoch": 1.271815561959654, "grad_norm": 1.3907677350646108, "learning_rate": 6.41833708088704e-07, "loss": 0.531815230846405, "step": 5517 }, { "epoch": 1.2720461095100863, "grad_norm": 2.0563641939125152, "learning_rate": 6.414778529564701e-07, "loss": 0.5133731365203857, "step": 5518 }, { "epoch": 1.2722766570605186, "grad_norm": 1.4492977630588102, "learning_rate": 6.411220499137746e-07, "loss": 0.49691277742385864, "step": 5519 }, { "epoch": 1.272507204610951, "grad_norm": 1.469594469426552, "learning_rate": 6.407662990123117e-07, "loss": 0.45275843143463135, "step": 5520 }, { "epoch": 1.2727377521613832, "grad_norm": 1.3066235759971525, "learning_rate": 6.404106003037688e-07, "loss": 0.43602895736694336, "step": 5521 }, { "epoch": 1.2729682997118155, "grad_norm": 1.4446893971845258, "learning_rate": 6.400549538398248e-07, "loss": 0.4969235956668854, "step": 5522 }, { "epoch": 1.2731988472622477, "grad_norm": 1.5937900788273875, "learning_rate": 6.396993596721512e-07, "loss": 0.5304889678955078, "step": 5523 }, { "epoch": 1.27342939481268, "grad_norm": 1.5346452715505707, "learning_rate": 6.393438178524131e-07, "loss": 0.46131831407546997, "step": 5524 }, { "epoch": 1.2736599423631123, "grad_norm": 1.5540801807722993, "learning_rate": 6.389883284322663e-07, "loss": 0.44982582330703735, "step": 5525 }, { "epoch": 1.2738904899135446, "grad_norm": 1.4566262270008488, "learning_rate": 6.386328914633594e-07, "loss": 0.42927485704421997, "step": 5526 }, { "epoch": 1.2741210374639769, "grad_norm": 1.6365410597421224, "learning_rate": 6.382775069973347e-07, "loss": 0.4596608579158783, "step": 5527 }, { "epoch": 1.2743515850144091, "grad_norm": 1.4090066202201719, "learning_rate": 6.379221750858251e-07, "loss": 0.40814411640167236, "step": 5528 }, { "epoch": 1.2745821325648414, "grad_norm": 1.4832230987219173, "learning_rate": 6.375668957804569e-07, "loss": 0.47916269302368164, "step": 5529 }, { "epoch": 1.2748126801152737, "grad_norm": 1.640737186868239, "learning_rate": 6.372116691328483e-07, "loss": 0.48566362261772156, "step": 5530 }, { "epoch": 1.275043227665706, "grad_norm": 1.4732784262191274, "learning_rate": 6.368564951946103e-07, "loss": 0.4357905089855194, "step": 5531 }, { "epoch": 1.2752737752161383, "grad_norm": 1.3641981295499355, "learning_rate": 6.365013740173459e-07, "loss": 0.4812158942222595, "step": 5532 }, { "epoch": 1.2755043227665706, "grad_norm": 1.5210079549425446, "learning_rate": 6.361463056526501e-07, "loss": 0.4973459839820862, "step": 5533 }, { "epoch": 1.2757348703170028, "grad_norm": 1.6397578725103514, "learning_rate": 6.357912901521114e-07, "loss": 0.44668370485305786, "step": 5534 }, { "epoch": 1.2759654178674351, "grad_norm": 1.5010005866568603, "learning_rate": 6.354363275673093e-07, "loss": 0.40911513566970825, "step": 5535 }, { "epoch": 1.2761959654178674, "grad_norm": 1.5942835145336292, "learning_rate": 6.35081417949816e-07, "loss": 0.4385657012462616, "step": 5536 }, { "epoch": 1.2764265129682997, "grad_norm": 1.3458840351620285, "learning_rate": 6.347265613511969e-07, "loss": 0.4490683376789093, "step": 5537 }, { "epoch": 1.276657060518732, "grad_norm": 1.3989827670727941, "learning_rate": 6.343717578230086e-07, "loss": 0.4358097314834595, "step": 5538 }, { "epoch": 1.2768876080691642, "grad_norm": 1.7207001807997868, "learning_rate": 6.340170074167999e-07, "loss": 0.48730576038360596, "step": 5539 }, { "epoch": 1.2771181556195965, "grad_norm": 1.5892957523550955, "learning_rate": 6.336623101841132e-07, "loss": 0.4328501224517822, "step": 5540 }, { "epoch": 1.2773487031700288, "grad_norm": 1.6472307415258993, "learning_rate": 6.333076661764818e-07, "loss": 0.49830788373947144, "step": 5541 }, { "epoch": 1.277579250720461, "grad_norm": 1.343710132243457, "learning_rate": 6.329530754454321e-07, "loss": 0.46980321407318115, "step": 5542 }, { "epoch": 1.2778097982708934, "grad_norm": 1.7570308864747564, "learning_rate": 6.325985380424816e-07, "loss": 0.47295019030570984, "step": 5543 }, { "epoch": 1.2780403458213256, "grad_norm": 1.655776208512617, "learning_rate": 6.322440540191421e-07, "loss": 0.5206797122955322, "step": 5544 }, { "epoch": 1.278270893371758, "grad_norm": 1.63546950482743, "learning_rate": 6.318896234269158e-07, "loss": 0.46184998750686646, "step": 5545 }, { "epoch": 1.2785014409221902, "grad_norm": 1.7239763029029178, "learning_rate": 6.315352463172973e-07, "loss": 0.49634110927581787, "step": 5546 }, { "epoch": 1.2787319884726225, "grad_norm": 1.4144369647998525, "learning_rate": 6.31180922741775e-07, "loss": 0.35143324732780457, "step": 5547 }, { "epoch": 1.2789625360230548, "grad_norm": 1.4978465409722184, "learning_rate": 6.308266527518279e-07, "loss": 0.4144735634326935, "step": 5548 }, { "epoch": 1.279193083573487, "grad_norm": 1.4183195079291904, "learning_rate": 6.304724363989273e-07, "loss": 0.5448867678642273, "step": 5549 }, { "epoch": 1.2794236311239193, "grad_norm": 1.338439375156975, "learning_rate": 6.301182737345381e-07, "loss": 0.43275925517082214, "step": 5550 }, { "epoch": 1.2796541786743516, "grad_norm": 1.2908687957691545, "learning_rate": 6.297641648101156e-07, "loss": 0.4401479959487915, "step": 5551 }, { "epoch": 1.2798847262247839, "grad_norm": 1.7017435488796109, "learning_rate": 6.294101096771083e-07, "loss": 0.4905737042427063, "step": 5552 }, { "epoch": 1.2801152737752162, "grad_norm": 1.8493974545984773, "learning_rate": 6.290561083869572e-07, "loss": 0.5399729013442993, "step": 5553 }, { "epoch": 1.2803458213256484, "grad_norm": 1.7191281973650443, "learning_rate": 6.287021609910945e-07, "loss": 0.4323650598526001, "step": 5554 }, { "epoch": 1.2805763688760807, "grad_norm": 1.6009091238858228, "learning_rate": 6.283482675409453e-07, "loss": 0.4450085759162903, "step": 5555 }, { "epoch": 1.280806916426513, "grad_norm": 1.6050803328137053, "learning_rate": 6.279944280879268e-07, "loss": 0.48366865515708923, "step": 5556 }, { "epoch": 1.2810374639769453, "grad_norm": 1.6069583853116107, "learning_rate": 6.276406426834479e-07, "loss": 0.43875235319137573, "step": 5557 }, { "epoch": 1.2812680115273776, "grad_norm": 1.461319209325984, "learning_rate": 6.272869113789102e-07, "loss": 0.421744167804718, "step": 5558 }, { "epoch": 1.2814985590778099, "grad_norm": 1.8153973429348083, "learning_rate": 6.269332342257066e-07, "loss": 0.569898247718811, "step": 5559 }, { "epoch": 1.2817291066282421, "grad_norm": 1.449207816940006, "learning_rate": 6.265796112752232e-07, "loss": 0.3934449553489685, "step": 5560 }, { "epoch": 1.2819596541786744, "grad_norm": 2.2386537654098295, "learning_rate": 6.262260425788381e-07, "loss": 0.4563683271408081, "step": 5561 }, { "epoch": 1.2821902017291067, "grad_norm": 1.4202120834205216, "learning_rate": 6.258725281879204e-07, "loss": 0.45703044533729553, "step": 5562 }, { "epoch": 1.282420749279539, "grad_norm": 1.350065136609383, "learning_rate": 6.255190681538324e-07, "loss": 0.4308219254016876, "step": 5563 }, { "epoch": 1.2826512968299713, "grad_norm": 1.4828315701652353, "learning_rate": 6.251656625279283e-07, "loss": 0.43510013818740845, "step": 5564 }, { "epoch": 1.2828818443804035, "grad_norm": 1.6397508409229253, "learning_rate": 6.248123113615538e-07, "loss": 0.4328692555427551, "step": 5565 }, { "epoch": 1.2831123919308358, "grad_norm": 1.5343379599739684, "learning_rate": 6.244590147060479e-07, "loss": 0.4080934524536133, "step": 5566 }, { "epoch": 1.283342939481268, "grad_norm": 1.7350612167191992, "learning_rate": 6.241057726127402e-07, "loss": 0.4813555181026459, "step": 5567 }, { "epoch": 1.2835734870317004, "grad_norm": 1.6567967388315745, "learning_rate": 6.237525851329533e-07, "loss": 0.430223673582077, "step": 5568 }, { "epoch": 1.2838040345821327, "grad_norm": 1.4875391508810385, "learning_rate": 6.233994523180021e-07, "loss": 0.5455408692359924, "step": 5569 }, { "epoch": 1.284034582132565, "grad_norm": 2.1560276048689895, "learning_rate": 6.230463742191925e-07, "loss": 0.5247420072555542, "step": 5570 }, { "epoch": 1.2842651296829972, "grad_norm": 1.8980481153200757, "learning_rate": 6.226933508878232e-07, "loss": 0.5348542332649231, "step": 5571 }, { "epoch": 1.2844956772334295, "grad_norm": 1.6792634796819574, "learning_rate": 6.223403823751854e-07, "loss": 0.4977971315383911, "step": 5572 }, { "epoch": 1.2847262247838618, "grad_norm": 1.6070638173254763, "learning_rate": 6.219874687325611e-07, "loss": 0.40318992733955383, "step": 5573 }, { "epoch": 1.284956772334294, "grad_norm": 1.9663675843987791, "learning_rate": 6.216346100112255e-07, "loss": 0.5009844303131104, "step": 5574 }, { "epoch": 1.2851873198847263, "grad_norm": 1.6937420088731798, "learning_rate": 6.212818062624445e-07, "loss": 0.5087116956710815, "step": 5575 }, { "epoch": 1.2854178674351586, "grad_norm": 1.6817159218995477, "learning_rate": 6.209290575374775e-07, "loss": 0.5260793566703796, "step": 5576 }, { "epoch": 1.285648414985591, "grad_norm": 1.6762340662691828, "learning_rate": 6.205763638875754e-07, "loss": 0.4408110976219177, "step": 5577 }, { "epoch": 1.2858789625360232, "grad_norm": 1.5270773893490903, "learning_rate": 6.202237253639799e-07, "loss": 0.4744076132774353, "step": 5578 }, { "epoch": 1.2861095100864555, "grad_norm": 1.5635420193144807, "learning_rate": 6.198711420179273e-07, "loss": 0.48688220977783203, "step": 5579 }, { "epoch": 1.2863400576368877, "grad_norm": 1.6213517296286213, "learning_rate": 6.195186139006425e-07, "loss": 0.47676223516464233, "step": 5580 }, { "epoch": 1.2865706051873198, "grad_norm": 1.606680809029221, "learning_rate": 6.191661410633452e-07, "loss": 0.5136919021606445, "step": 5581 }, { "epoch": 1.286801152737752, "grad_norm": 1.5245386080744614, "learning_rate": 6.188137235572464e-07, "loss": 0.5181657671928406, "step": 5582 }, { "epoch": 1.2870317002881844, "grad_norm": 1.6707771822267998, "learning_rate": 6.184613614335476e-07, "loss": 0.5066704750061035, "step": 5583 }, { "epoch": 1.2872622478386166, "grad_norm": 1.5163975923186304, "learning_rate": 6.181090547434438e-07, "loss": 0.4127427339553833, "step": 5584 }, { "epoch": 1.287492795389049, "grad_norm": 1.3664322264218698, "learning_rate": 6.177568035381223e-07, "loss": 0.4792090654373169, "step": 5585 }, { "epoch": 1.2877233429394812, "grad_norm": 1.4546030620202877, "learning_rate": 6.174046078687603e-07, "loss": 0.47321128845214844, "step": 5586 }, { "epoch": 1.2879538904899135, "grad_norm": 1.5045985608502348, "learning_rate": 6.17052467786529e-07, "loss": 0.6202956438064575, "step": 5587 }, { "epoch": 1.2881844380403458, "grad_norm": 1.5090966461095818, "learning_rate": 6.167003833425902e-07, "loss": 0.4611511528491974, "step": 5588 }, { "epoch": 1.288414985590778, "grad_norm": 1.5961777946986442, "learning_rate": 6.163483545880981e-07, "loss": 0.37117400765419006, "step": 5589 }, { "epoch": 1.2886455331412103, "grad_norm": 1.5363317852310883, "learning_rate": 6.159963815741996e-07, "loss": 0.40295302867889404, "step": 5590 }, { "epoch": 1.2888760806916426, "grad_norm": 1.2834821490038788, "learning_rate": 6.156444643520319e-07, "loss": 0.4332062602043152, "step": 5591 }, { "epoch": 1.289106628242075, "grad_norm": 1.622352739011229, "learning_rate": 6.152926029727249e-07, "loss": 0.5083199739456177, "step": 5592 }, { "epoch": 1.2893371757925072, "grad_norm": 1.4338205668361415, "learning_rate": 6.14940797487401e-07, "loss": 0.4602397680282593, "step": 5593 }, { "epoch": 1.2895677233429395, "grad_norm": 1.4280283710663018, "learning_rate": 6.145890479471734e-07, "loss": 0.41422247886657715, "step": 5594 }, { "epoch": 1.2897982708933717, "grad_norm": 1.395051644435662, "learning_rate": 6.14237354403148e-07, "loss": 0.4773354232311249, "step": 5595 }, { "epoch": 1.290028818443804, "grad_norm": 1.7507850247217935, "learning_rate": 6.138857169064215e-07, "loss": 0.5527161359786987, "step": 5596 }, { "epoch": 1.2902593659942363, "grad_norm": 1.6330535523345988, "learning_rate": 6.135341355080841e-07, "loss": 0.4463421404361725, "step": 5597 }, { "epoch": 1.2904899135446686, "grad_norm": 1.8408791330637222, "learning_rate": 6.131826102592165e-07, "loss": 0.46494734287261963, "step": 5598 }, { "epoch": 1.2907204610951009, "grad_norm": 1.525583159053329, "learning_rate": 6.128311412108913e-07, "loss": 0.41373857855796814, "step": 5599 }, { "epoch": 1.2909510086455331, "grad_norm": 1.6892159775229958, "learning_rate": 6.124797284141738e-07, "loss": 0.4944826364517212, "step": 5600 }, { "epoch": 1.2911815561959654, "grad_norm": 1.3091958060274738, "learning_rate": 6.121283719201207e-07, "loss": 0.4663166403770447, "step": 5601 }, { "epoch": 1.2914121037463977, "grad_norm": 1.480078368011403, "learning_rate": 6.117770717797798e-07, "loss": 0.5245934724807739, "step": 5602 }, { "epoch": 1.29164265129683, "grad_norm": 1.4141646006395505, "learning_rate": 6.114258280441922e-07, "loss": 0.4217444956302643, "step": 5603 }, { "epoch": 1.2918731988472623, "grad_norm": 1.4191022644328553, "learning_rate": 6.110746407643892e-07, "loss": 0.459358811378479, "step": 5604 }, { "epoch": 1.2921037463976945, "grad_norm": 1.336340183417743, "learning_rate": 6.10723509991395e-07, "loss": 0.417694628238678, "step": 5605 }, { "epoch": 1.2923342939481268, "grad_norm": 1.2397570173595072, "learning_rate": 6.103724357762254e-07, "loss": 0.3651599884033203, "step": 5606 }, { "epoch": 1.292564841498559, "grad_norm": 1.5399308093733746, "learning_rate": 6.100214181698877e-07, "loss": 0.49200180172920227, "step": 5607 }, { "epoch": 1.2927953890489914, "grad_norm": 1.9240361082865032, "learning_rate": 6.096704572233806e-07, "loss": 0.497753381729126, "step": 5608 }, { "epoch": 1.2930259365994237, "grad_norm": 1.4344257549227153, "learning_rate": 6.093195529876962e-07, "loss": 0.5006631016731262, "step": 5609 }, { "epoch": 1.293256484149856, "grad_norm": 1.6754143744802865, "learning_rate": 6.089687055138163e-07, "loss": 0.5016833543777466, "step": 5610 }, { "epoch": 1.2934870317002882, "grad_norm": 1.527136681773818, "learning_rate": 6.086179148527159e-07, "loss": 0.4424899220466614, "step": 5611 }, { "epoch": 1.2937175792507205, "grad_norm": 1.6530124892602107, "learning_rate": 6.082671810553606e-07, "loss": 0.5102081298828125, "step": 5612 }, { "epoch": 1.2939481268011528, "grad_norm": 1.5659948984902143, "learning_rate": 6.079165041727089e-07, "loss": 0.4826313853263855, "step": 5613 }, { "epoch": 1.294178674351585, "grad_norm": 1.395872003247078, "learning_rate": 6.075658842557105e-07, "loss": 0.42120543122291565, "step": 5614 }, { "epoch": 1.2944092219020173, "grad_norm": 1.699388547513595, "learning_rate": 6.072153213553066e-07, "loss": 0.4455034136772156, "step": 5615 }, { "epoch": 1.2946397694524496, "grad_norm": 1.9713259117027755, "learning_rate": 6.068648155224305e-07, "loss": 0.42256850004196167, "step": 5616 }, { "epoch": 1.294870317002882, "grad_norm": 1.9231580761268008, "learning_rate": 6.06514366808007e-07, "loss": 0.5602281093597412, "step": 5617 }, { "epoch": 1.2951008645533142, "grad_norm": 1.4513434908592018, "learning_rate": 6.061639752629526e-07, "loss": 0.43879711627960205, "step": 5618 }, { "epoch": 1.2953314121037465, "grad_norm": 1.4190069954434061, "learning_rate": 6.058136409381757e-07, "loss": 0.3930923640727997, "step": 5619 }, { "epoch": 1.2955619596541788, "grad_norm": 1.839985350345671, "learning_rate": 6.05463363884576e-07, "loss": 0.4514680802822113, "step": 5620 }, { "epoch": 1.295792507204611, "grad_norm": 1.2536955913267351, "learning_rate": 6.051131441530453e-07, "loss": 0.46888765692710876, "step": 5621 }, { "epoch": 1.296023054755043, "grad_norm": 1.7812263281619622, "learning_rate": 6.04762981794467e-07, "loss": 0.4355303645133972, "step": 5622 }, { "epoch": 1.2962536023054754, "grad_norm": 1.6229536165276595, "learning_rate": 6.044128768597157e-07, "loss": 0.4351111054420471, "step": 5623 }, { "epoch": 1.2964841498559077, "grad_norm": 1.6901196215872798, "learning_rate": 6.040628293996583e-07, "loss": 0.4702025055885315, "step": 5624 }, { "epoch": 1.29671469740634, "grad_norm": 1.9691583510108008, "learning_rate": 6.037128394651527e-07, "loss": 0.46238428354263306, "step": 5625 }, { "epoch": 1.2969452449567722, "grad_norm": 1.5645356062188, "learning_rate": 6.033629071070492e-07, "loss": 0.3857001066207886, "step": 5626 }, { "epoch": 1.2971757925072045, "grad_norm": 1.432067471334769, "learning_rate": 6.030130323761893e-07, "loss": 0.49124062061309814, "step": 5627 }, { "epoch": 1.2974063400576368, "grad_norm": 1.3667653448147599, "learning_rate": 6.026632153234057e-07, "loss": 0.44536006450653076, "step": 5628 }, { "epoch": 1.297636887608069, "grad_norm": 1.9079098337606286, "learning_rate": 6.023134559995237e-07, "loss": 0.5029529333114624, "step": 5629 }, { "epoch": 1.2978674351585013, "grad_norm": 2.0487774662218174, "learning_rate": 6.019637544553595e-07, "loss": 0.45321863889694214, "step": 5630 }, { "epoch": 1.2980979827089336, "grad_norm": 1.3767097952219558, "learning_rate": 6.01614110741721e-07, "loss": 0.46636098623275757, "step": 5631 }, { "epoch": 1.298328530259366, "grad_norm": 1.7562160355468237, "learning_rate": 6.012645249094081e-07, "loss": 0.4282764494419098, "step": 5632 }, { "epoch": 1.2985590778097982, "grad_norm": 1.449718049336432, "learning_rate": 6.009149970092115e-07, "loss": 0.42735451459884644, "step": 5633 }, { "epoch": 1.2987896253602305, "grad_norm": 1.3504653089580498, "learning_rate": 6.005655270919141e-07, "loss": 0.3382011950016022, "step": 5634 }, { "epoch": 1.2990201729106627, "grad_norm": 1.5943447106279303, "learning_rate": 6.002161152082908e-07, "loss": 0.43230611085891724, "step": 5635 }, { "epoch": 1.299250720461095, "grad_norm": 1.7064048884456655, "learning_rate": 5.99866761409107e-07, "loss": 0.5036906599998474, "step": 5636 }, { "epoch": 1.2994812680115273, "grad_norm": 1.4451080113763042, "learning_rate": 5.995174657451198e-07, "loss": 0.4540144205093384, "step": 5637 }, { "epoch": 1.2997118155619596, "grad_norm": 1.6244240251604507, "learning_rate": 5.991682282670794e-07, "loss": 0.5304908752441406, "step": 5638 }, { "epoch": 1.2999423631123919, "grad_norm": 1.6756011798471222, "learning_rate": 5.988190490257252e-07, "loss": 0.48925888538360596, "step": 5639 }, { "epoch": 1.3001729106628241, "grad_norm": 1.544278359402648, "learning_rate": 5.984699280717902e-07, "loss": 0.41739997267723083, "step": 5640 }, { "epoch": 1.3004034582132564, "grad_norm": 1.8573165659255007, "learning_rate": 5.98120865455997e-07, "loss": 0.368470698595047, "step": 5641 }, { "epoch": 1.3006340057636887, "grad_norm": 1.4369868317123584, "learning_rate": 5.977718612290618e-07, "loss": 0.47940701246261597, "step": 5642 }, { "epoch": 1.300864553314121, "grad_norm": 1.576956184529271, "learning_rate": 5.974229154416908e-07, "loss": 0.4344274699687958, "step": 5643 }, { "epoch": 1.3010951008645533, "grad_norm": 1.5095504652283631, "learning_rate": 5.97074028144582e-07, "loss": 0.4318895637989044, "step": 5644 }, { "epoch": 1.3013256484149855, "grad_norm": 1.597236816262288, "learning_rate": 5.967251993884257e-07, "loss": 0.483765572309494, "step": 5645 }, { "epoch": 1.3015561959654178, "grad_norm": 1.3118188483806787, "learning_rate": 5.963764292239029e-07, "loss": 0.39786484837532043, "step": 5646 }, { "epoch": 1.30178674351585, "grad_norm": 1.604619731251052, "learning_rate": 5.960277177016854e-07, "loss": 0.5057064294815063, "step": 5647 }, { "epoch": 1.3020172910662824, "grad_norm": 1.611278241917598, "learning_rate": 5.956790648724388e-07, "loss": 0.3967602252960205, "step": 5648 }, { "epoch": 1.3022478386167147, "grad_norm": 1.5141547098519228, "learning_rate": 5.953304707868177e-07, "loss": 0.46510642766952515, "step": 5649 }, { "epoch": 1.302478386167147, "grad_norm": 1.5274485657271228, "learning_rate": 5.949819354954694e-07, "loss": 0.4845235347747803, "step": 5650 }, { "epoch": 1.3027089337175792, "grad_norm": 1.7942214793176274, "learning_rate": 5.946334590490329e-07, "loss": 0.4742242097854614, "step": 5651 }, { "epoch": 1.3029394812680115, "grad_norm": 1.5892392961762456, "learning_rate": 5.942850414981376e-07, "loss": 0.4435562193393707, "step": 5652 }, { "epoch": 1.3031700288184438, "grad_norm": 1.4875348946887388, "learning_rate": 5.93936682893405e-07, "loss": 0.44417089223861694, "step": 5653 }, { "epoch": 1.303400576368876, "grad_norm": 1.5970020700205978, "learning_rate": 5.935883832854485e-07, "loss": 0.4877326488494873, "step": 5654 }, { "epoch": 1.3036311239193084, "grad_norm": 1.3171877697027834, "learning_rate": 5.932401427248721e-07, "loss": 0.3517765402793884, "step": 5655 }, { "epoch": 1.3038616714697406, "grad_norm": 1.3570215231005136, "learning_rate": 5.928919612622716e-07, "loss": 0.4456232786178589, "step": 5656 }, { "epoch": 1.304092219020173, "grad_norm": 1.8836957623192498, "learning_rate": 5.925438389482338e-07, "loss": 0.5343309044837952, "step": 5657 }, { "epoch": 1.3043227665706052, "grad_norm": 1.5719703891271506, "learning_rate": 5.921957758333375e-07, "loss": 0.46931013464927673, "step": 5658 }, { "epoch": 1.3045533141210375, "grad_norm": 1.3499152363640425, "learning_rate": 5.91847771968153e-07, "loss": 0.4783180356025696, "step": 5659 }, { "epoch": 1.3047838616714698, "grad_norm": 1.3505796312286786, "learning_rate": 5.91499827403241e-07, "loss": 0.4500124454498291, "step": 5660 }, { "epoch": 1.305014409221902, "grad_norm": 1.5688722879131622, "learning_rate": 5.911519421891545e-07, "loss": 0.416248619556427, "step": 5661 }, { "epoch": 1.3052449567723343, "grad_norm": 1.464160574072666, "learning_rate": 5.90804116376438e-07, "loss": 0.4546668529510498, "step": 5662 }, { "epoch": 1.3054755043227666, "grad_norm": 1.6212778345510863, "learning_rate": 5.904563500156262e-07, "loss": 0.4955529570579529, "step": 5663 }, { "epoch": 1.3057060518731989, "grad_norm": 1.6100740336658428, "learning_rate": 5.901086431572468e-07, "loss": 0.5418561100959778, "step": 5664 }, { "epoch": 1.3059365994236312, "grad_norm": 1.3667792510170178, "learning_rate": 5.897609958518171e-07, "loss": 0.45207348465919495, "step": 5665 }, { "epoch": 1.3061671469740634, "grad_norm": 1.567610206868078, "learning_rate": 5.894134081498471e-07, "loss": 0.39835965633392334, "step": 5666 }, { "epoch": 1.3063976945244957, "grad_norm": 2.1400479034078232, "learning_rate": 5.89065880101838e-07, "loss": 0.4610930383205414, "step": 5667 }, { "epoch": 1.306628242074928, "grad_norm": 1.480337861014841, "learning_rate": 5.887184117582814e-07, "loss": 0.47697019577026367, "step": 5668 }, { "epoch": 1.3068587896253603, "grad_norm": 1.7998277326040601, "learning_rate": 5.883710031696613e-07, "loss": 0.5246520042419434, "step": 5669 }, { "epoch": 1.3070893371757926, "grad_norm": 1.3822061021496619, "learning_rate": 5.880236543864521e-07, "loss": 0.43360599875450134, "step": 5670 }, { "epoch": 1.3073198847262248, "grad_norm": 1.603823660095097, "learning_rate": 5.876763654591202e-07, "loss": 0.4036989212036133, "step": 5671 }, { "epoch": 1.3075504322766571, "grad_norm": 1.92161850074288, "learning_rate": 5.873291364381234e-07, "loss": 0.5217401385307312, "step": 5672 }, { "epoch": 1.3077809798270894, "grad_norm": 1.8626510168435175, "learning_rate": 5.869819673739097e-07, "loss": 0.4804594814777374, "step": 5673 }, { "epoch": 1.3080115273775217, "grad_norm": 1.5969102074834967, "learning_rate": 5.866348583169199e-07, "loss": 0.4158530533313751, "step": 5674 }, { "epoch": 1.308242074927954, "grad_norm": 1.4467346607257023, "learning_rate": 5.862878093175852e-07, "loss": 0.3767266273498535, "step": 5675 }, { "epoch": 1.3084726224783862, "grad_norm": 1.5914288798121585, "learning_rate": 5.859408204263274e-07, "loss": 0.4574335515499115, "step": 5676 }, { "epoch": 1.3087031700288185, "grad_norm": 1.246598683737431, "learning_rate": 5.855938916935616e-07, "loss": 0.4004058539867401, "step": 5677 }, { "epoch": 1.3089337175792508, "grad_norm": 1.539707925409524, "learning_rate": 5.852470231696922e-07, "loss": 0.5084467530250549, "step": 5678 }, { "epoch": 1.309164265129683, "grad_norm": 1.96730424218735, "learning_rate": 5.849002149051155e-07, "loss": 0.49348288774490356, "step": 5679 }, { "epoch": 1.3093948126801154, "grad_norm": 1.5400304304805938, "learning_rate": 5.845534669502199e-07, "loss": 0.4904360771179199, "step": 5680 }, { "epoch": 1.3096253602305477, "grad_norm": 1.725745441906493, "learning_rate": 5.842067793553833e-07, "loss": 0.39716076850891113, "step": 5681 }, { "epoch": 1.30985590778098, "grad_norm": 1.951959855113649, "learning_rate": 5.838601521709763e-07, "loss": 0.45109057426452637, "step": 5682 }, { "epoch": 1.3100864553314122, "grad_norm": 1.2108615351157967, "learning_rate": 5.835135854473606e-07, "loss": 0.4689937233924866, "step": 5683 }, { "epoch": 1.3103170028818445, "grad_norm": 1.57788584068215, "learning_rate": 5.831670792348878e-07, "loss": 0.46770697832107544, "step": 5684 }, { "epoch": 1.3105475504322768, "grad_norm": 1.7640277377337972, "learning_rate": 5.828206335839025e-07, "loss": 0.44661128520965576, "step": 5685 }, { "epoch": 1.310778097982709, "grad_norm": 1.9000908259737026, "learning_rate": 5.82474248544739e-07, "loss": 0.43092700839042664, "step": 5686 }, { "epoch": 1.3110086455331413, "grad_norm": 1.6902521158602983, "learning_rate": 5.821279241677237e-07, "loss": 0.4759563207626343, "step": 5687 }, { "epoch": 1.3112391930835736, "grad_norm": 1.505811136559332, "learning_rate": 5.817816605031744e-07, "loss": 0.4558718800544739, "step": 5688 }, { "epoch": 1.311469740634006, "grad_norm": 1.4687373535557187, "learning_rate": 5.814354576013991e-07, "loss": 0.41315966844558716, "step": 5689 }, { "epoch": 1.3117002881844382, "grad_norm": 1.7259723493996193, "learning_rate": 5.810893155126972e-07, "loss": 0.48093000054359436, "step": 5690 }, { "epoch": 1.3119308357348702, "grad_norm": 1.4454250122793362, "learning_rate": 5.807432342873604e-07, "loss": 0.44277292490005493, "step": 5691 }, { "epoch": 1.3121613832853025, "grad_norm": 1.7712457329533509, "learning_rate": 5.803972139756699e-07, "loss": 0.4686616063117981, "step": 5692 }, { "epoch": 1.3123919308357348, "grad_norm": 1.8037865500083508, "learning_rate": 5.800512546278992e-07, "loss": 0.5273550748825073, "step": 5693 }, { "epoch": 1.312622478386167, "grad_norm": 1.5751540447686536, "learning_rate": 5.797053562943126e-07, "loss": 0.40962180495262146, "step": 5694 }, { "epoch": 1.3128530259365994, "grad_norm": 1.5552313010741665, "learning_rate": 5.793595190251657e-07, "loss": 0.4359856843948364, "step": 5695 }, { "epoch": 1.3130835734870316, "grad_norm": 1.427706356459816, "learning_rate": 5.790137428707047e-07, "loss": 0.3920098841190338, "step": 5696 }, { "epoch": 1.313314121037464, "grad_norm": 1.6313193317124348, "learning_rate": 5.786680278811673e-07, "loss": 0.44638365507125854, "step": 5697 }, { "epoch": 1.3135446685878962, "grad_norm": 1.7792052140749095, "learning_rate": 5.783223741067822e-07, "loss": 0.49320071935653687, "step": 5698 }, { "epoch": 1.3137752161383285, "grad_norm": 1.6043100517885838, "learning_rate": 5.779767815977701e-07, "loss": 0.5009332299232483, "step": 5699 }, { "epoch": 1.3140057636887608, "grad_norm": 1.9474485717331127, "learning_rate": 5.776312504043408e-07, "loss": 0.399705171585083, "step": 5700 }, { "epoch": 1.314236311239193, "grad_norm": 1.4585410826624658, "learning_rate": 5.77285780576698e-07, "loss": 0.4848003685474396, "step": 5701 }, { "epoch": 1.3144668587896253, "grad_norm": 1.4467862164072396, "learning_rate": 5.76940372165033e-07, "loss": 0.4309002161026001, "step": 5702 }, { "epoch": 1.3146974063400576, "grad_norm": 1.4911760535404153, "learning_rate": 5.765950252195309e-07, "loss": 0.47664520144462585, "step": 5703 }, { "epoch": 1.3149279538904899, "grad_norm": 1.3214665856632308, "learning_rate": 5.762497397903674e-07, "loss": 0.40434837341308594, "step": 5704 }, { "epoch": 1.3151585014409222, "grad_norm": 1.767762796112652, "learning_rate": 5.759045159277083e-07, "loss": 0.46631279587745667, "step": 5705 }, { "epoch": 1.3153890489913544, "grad_norm": 1.4544198218645066, "learning_rate": 5.75559353681711e-07, "loss": 0.37656185030937195, "step": 5706 }, { "epoch": 1.3156195965417867, "grad_norm": 1.2767291509456058, "learning_rate": 5.752142531025252e-07, "loss": 0.404850572347641, "step": 5707 }, { "epoch": 1.315850144092219, "grad_norm": 1.8381735708700344, "learning_rate": 5.748692142402887e-07, "loss": 0.5541446208953857, "step": 5708 }, { "epoch": 1.3160806916426513, "grad_norm": 1.657101287344229, "learning_rate": 5.745242371451331e-07, "loss": 0.5300667881965637, "step": 5709 }, { "epoch": 1.3163112391930836, "grad_norm": 1.5088076439154363, "learning_rate": 5.741793218671794e-07, "loss": 0.41248297691345215, "step": 5710 }, { "epoch": 1.3165417867435159, "grad_norm": 1.485990096285573, "learning_rate": 5.738344684565406e-07, "loss": 0.42053329944610596, "step": 5711 }, { "epoch": 1.3167723342939481, "grad_norm": 1.7569253091641839, "learning_rate": 5.734896769633204e-07, "loss": 0.49210411310195923, "step": 5712 }, { "epoch": 1.3170028818443804, "grad_norm": 1.4735959040405031, "learning_rate": 5.731449474376133e-07, "loss": 0.43861931562423706, "step": 5713 }, { "epoch": 1.3172334293948127, "grad_norm": 1.616710147227633, "learning_rate": 5.728002799295049e-07, "loss": 0.4376800060272217, "step": 5714 }, { "epoch": 1.317463976945245, "grad_norm": 1.6699352009633992, "learning_rate": 5.724556744890714e-07, "loss": 0.5226752161979675, "step": 5715 }, { "epoch": 1.3176945244956773, "grad_norm": 1.527907496447196, "learning_rate": 5.721111311663807e-07, "loss": 0.49144458770751953, "step": 5716 }, { "epoch": 1.3179250720461095, "grad_norm": 1.588011520770062, "learning_rate": 5.717666500114918e-07, "loss": 0.41572028398513794, "step": 5717 }, { "epoch": 1.3181556195965418, "grad_norm": 1.9136714216275055, "learning_rate": 5.714222310744535e-07, "loss": 0.4877137243747711, "step": 5718 }, { "epoch": 1.318386167146974, "grad_norm": 1.3894714709044902, "learning_rate": 5.710778744053069e-07, "loss": 0.5112143754959106, "step": 5719 }, { "epoch": 1.3186167146974064, "grad_norm": 1.4178344137923677, "learning_rate": 5.707335800540832e-07, "loss": 0.3639913499355316, "step": 5720 }, { "epoch": 1.3188472622478387, "grad_norm": 1.339168057765147, "learning_rate": 5.703893480708044e-07, "loss": 0.4214756488800049, "step": 5721 }, { "epoch": 1.319077809798271, "grad_norm": 1.6532547512994948, "learning_rate": 5.700451785054845e-07, "loss": 0.45181459188461304, "step": 5722 }, { "epoch": 1.3193083573487032, "grad_norm": 1.8115910959996735, "learning_rate": 5.69701071408127e-07, "loss": 0.5234519243240356, "step": 5723 }, { "epoch": 1.3195389048991355, "grad_norm": 1.5235287093918135, "learning_rate": 5.693570268287277e-07, "loss": 0.4809603691101074, "step": 5724 }, { "epoch": 1.3197694524495678, "grad_norm": 1.5449169163448038, "learning_rate": 5.690130448172727e-07, "loss": 0.49713581800460815, "step": 5725 }, { "epoch": 1.32, "grad_norm": 1.7653735856275472, "learning_rate": 5.68669125423739e-07, "loss": 0.4501224756240845, "step": 5726 }, { "epoch": 1.3202305475504323, "grad_norm": 1.5979447021388788, "learning_rate": 5.683252686980937e-07, "loss": 0.4798666834831238, "step": 5727 }, { "epoch": 1.3204610951008646, "grad_norm": 1.7043100133103994, "learning_rate": 5.679814746902968e-07, "loss": 0.39191022515296936, "step": 5728 }, { "epoch": 1.320691642651297, "grad_norm": 1.349443879583748, "learning_rate": 5.67637743450297e-07, "loss": 0.4909393787384033, "step": 5729 }, { "epoch": 1.3209221902017292, "grad_norm": 1.5585291332202789, "learning_rate": 5.672940750280357e-07, "loss": 0.5315475463867188, "step": 5730 }, { "epoch": 1.3211527377521615, "grad_norm": 1.8034782223278798, "learning_rate": 5.669504694734436e-07, "loss": 0.5093904733657837, "step": 5731 }, { "epoch": 1.3213832853025935, "grad_norm": 1.617415818890137, "learning_rate": 5.666069268364437e-07, "loss": 0.4729066491127014, "step": 5732 }, { "epoch": 1.3216138328530258, "grad_norm": 1.728742183209921, "learning_rate": 5.662634471669489e-07, "loss": 0.46405333280563354, "step": 5733 }, { "epoch": 1.321844380403458, "grad_norm": 1.41763204916087, "learning_rate": 5.659200305148626e-07, "loss": 0.38553181290626526, "step": 5734 }, { "epoch": 1.3220749279538904, "grad_norm": 1.6908629643110433, "learning_rate": 5.655766769300804e-07, "loss": 0.4223875403404236, "step": 5735 }, { "epoch": 1.3223054755043226, "grad_norm": 1.7695441587075371, "learning_rate": 5.652333864624881e-07, "loss": 0.44964247941970825, "step": 5736 }, { "epoch": 1.322536023054755, "grad_norm": 1.4486441745330252, "learning_rate": 5.648901591619617e-07, "loss": 0.417505145072937, "step": 5737 }, { "epoch": 1.3227665706051872, "grad_norm": 1.4410963247757809, "learning_rate": 5.64546995078369e-07, "loss": 0.419208288192749, "step": 5738 }, { "epoch": 1.3229971181556195, "grad_norm": 1.8506448148738834, "learning_rate": 5.642038942615681e-07, "loss": 0.499774307012558, "step": 5739 }, { "epoch": 1.3232276657060518, "grad_norm": 1.815255555024286, "learning_rate": 5.638608567614076e-07, "loss": 0.5035330057144165, "step": 5740 }, { "epoch": 1.323458213256484, "grad_norm": 1.4388619620353644, "learning_rate": 5.635178826277278e-07, "loss": 0.4658032953739166, "step": 5741 }, { "epoch": 1.3236887608069163, "grad_norm": 1.7274819728556454, "learning_rate": 5.631749719103586e-07, "loss": 0.42654508352279663, "step": 5742 }, { "epoch": 1.3239193083573486, "grad_norm": 1.6352351844005595, "learning_rate": 5.628321246591219e-07, "loss": 0.46338269114494324, "step": 5743 }, { "epoch": 1.324149855907781, "grad_norm": 1.7058135427472088, "learning_rate": 5.6248934092383e-07, "loss": 0.49489402770996094, "step": 5744 }, { "epoch": 1.3243804034582132, "grad_norm": 1.7667721028029346, "learning_rate": 5.621466207542855e-07, "loss": 0.43571901321411133, "step": 5745 }, { "epoch": 1.3246109510086455, "grad_norm": 1.6558339040197791, "learning_rate": 5.618039642002823e-07, "loss": 0.3845377564430237, "step": 5746 }, { "epoch": 1.3248414985590777, "grad_norm": 1.7985128014396068, "learning_rate": 5.61461371311604e-07, "loss": 0.46888309717178345, "step": 5747 }, { "epoch": 1.32507204610951, "grad_norm": 1.4860170610023868, "learning_rate": 5.611188421380266e-07, "loss": 0.47584888339042664, "step": 5748 }, { "epoch": 1.3253025936599423, "grad_norm": 1.462500100058502, "learning_rate": 5.607763767293162e-07, "loss": 0.45152002573013306, "step": 5749 }, { "epoch": 1.3255331412103746, "grad_norm": 1.3836540155127026, "learning_rate": 5.604339751352288e-07, "loss": 0.4863770604133606, "step": 5750 }, { "epoch": 1.3257636887608069, "grad_norm": 1.6099021073453343, "learning_rate": 5.600916374055124e-07, "loss": 0.4634966254234314, "step": 5751 }, { "epoch": 1.3259942363112391, "grad_norm": 1.5103532444548802, "learning_rate": 5.597493635899047e-07, "loss": 0.4578768014907837, "step": 5752 }, { "epoch": 1.3262247838616714, "grad_norm": 1.645303626914304, "learning_rate": 5.594071537381344e-07, "loss": 0.4615476727485657, "step": 5753 }, { "epoch": 1.3264553314121037, "grad_norm": 1.649679181206382, "learning_rate": 5.590650078999215e-07, "loss": 0.4493352174758911, "step": 5754 }, { "epoch": 1.326685878962536, "grad_norm": 1.9417083139706726, "learning_rate": 5.587229261249758e-07, "loss": 0.45934945344924927, "step": 5755 }, { "epoch": 1.3269164265129683, "grad_norm": 1.5433058030935336, "learning_rate": 5.583809084629983e-07, "loss": 0.44645899534225464, "step": 5756 }, { "epoch": 1.3271469740634005, "grad_norm": 1.8230512410234043, "learning_rate": 5.580389549636813e-07, "loss": 0.4972764551639557, "step": 5757 }, { "epoch": 1.3273775216138328, "grad_norm": 1.4829618993989975, "learning_rate": 5.576970656767062e-07, "loss": 0.56006920337677, "step": 5758 }, { "epoch": 1.327608069164265, "grad_norm": 1.4530984592703375, "learning_rate": 5.573552406517465e-07, "loss": 0.35086876153945923, "step": 5759 }, { "epoch": 1.3278386167146974, "grad_norm": 1.5453147440802761, "learning_rate": 5.570134799384652e-07, "loss": 0.5021958351135254, "step": 5760 }, { "epoch": 1.3280691642651297, "grad_norm": 1.3980714186138523, "learning_rate": 5.566717835865168e-07, "loss": 0.4798334836959839, "step": 5761 }, { "epoch": 1.328299711815562, "grad_norm": 1.6793393039174773, "learning_rate": 5.56330151645547e-07, "loss": 0.4362230896949768, "step": 5762 }, { "epoch": 1.3285302593659942, "grad_norm": 1.9287032003864895, "learning_rate": 5.559885841651901e-07, "loss": 0.4839896261692047, "step": 5763 }, { "epoch": 1.3287608069164265, "grad_norm": 1.7469751122977706, "learning_rate": 5.556470811950735e-07, "loss": 0.4296723008155823, "step": 5764 }, { "epoch": 1.3289913544668588, "grad_norm": 1.6438669469084637, "learning_rate": 5.553056427848135e-07, "loss": 0.4957331717014313, "step": 5765 }, { "epoch": 1.329221902017291, "grad_norm": 1.6461439100608464, "learning_rate": 5.549642689840173e-07, "loss": 0.5281147360801697, "step": 5766 }, { "epoch": 1.3294524495677233, "grad_norm": 1.5868314889631472, "learning_rate": 5.546229598422833e-07, "loss": 0.48177218437194824, "step": 5767 }, { "epoch": 1.3296829971181556, "grad_norm": 1.5218912835977805, "learning_rate": 5.542817154091999e-07, "loss": 0.4848208427429199, "step": 5768 }, { "epoch": 1.329913544668588, "grad_norm": 1.4668359429602678, "learning_rate": 5.539405357343464e-07, "loss": 0.5182781219482422, "step": 5769 }, { "epoch": 1.3301440922190202, "grad_norm": 1.4082178913730994, "learning_rate": 5.535994208672932e-07, "loss": 0.4479871094226837, "step": 5770 }, { "epoch": 1.3303746397694525, "grad_norm": 1.6224978164814792, "learning_rate": 5.532583708576005e-07, "loss": 0.43546003103256226, "step": 5771 }, { "epoch": 1.3306051873198848, "grad_norm": 1.735010886055401, "learning_rate": 5.529173857548186e-07, "loss": 0.46656274795532227, "step": 5772 }, { "epoch": 1.330835734870317, "grad_norm": 1.5926561794354557, "learning_rate": 5.525764656084901e-07, "loss": 0.40682974457740784, "step": 5773 }, { "epoch": 1.3310662824207493, "grad_norm": 1.509669363197034, "learning_rate": 5.522356104681463e-07, "loss": 0.4639260470867157, "step": 5774 }, { "epoch": 1.3312968299711816, "grad_norm": 1.6625210006206694, "learning_rate": 5.518948203833106e-07, "loss": 0.41499418020248413, "step": 5775 }, { "epoch": 1.3315273775216139, "grad_norm": 1.7316213010818284, "learning_rate": 5.515540954034956e-07, "loss": 0.48424363136291504, "step": 5776 }, { "epoch": 1.3317579250720462, "grad_norm": 1.7500118650105563, "learning_rate": 5.512134355782059e-07, "loss": 0.481282114982605, "step": 5777 }, { "epoch": 1.3319884726224784, "grad_norm": 1.4970286912195556, "learning_rate": 5.508728409569353e-07, "loss": 0.46897292137145996, "step": 5778 }, { "epoch": 1.3322190201729107, "grad_norm": 1.5395432239994278, "learning_rate": 5.505323115891684e-07, "loss": 0.4447956681251526, "step": 5779 }, { "epoch": 1.332449567723343, "grad_norm": 1.7841646267882065, "learning_rate": 5.50191847524381e-07, "loss": 0.4055880606174469, "step": 5780 }, { "epoch": 1.3326801152737753, "grad_norm": 1.8999810775506178, "learning_rate": 5.498514488120391e-07, "loss": 0.48870790004730225, "step": 5781 }, { "epoch": 1.3329106628242076, "grad_norm": 1.5931778786898716, "learning_rate": 5.495111155015986e-07, "loss": 0.46856385469436646, "step": 5782 }, { "epoch": 1.3331412103746398, "grad_norm": 1.6068645914103106, "learning_rate": 5.49170847642507e-07, "loss": 0.42831993103027344, "step": 5783 }, { "epoch": 1.3333717579250721, "grad_norm": 1.4770491256923433, "learning_rate": 5.488306452842013e-07, "loss": 0.41119974851608276, "step": 5784 }, { "epoch": 1.3336023054755044, "grad_norm": 1.8185317788143798, "learning_rate": 5.484905084761091e-07, "loss": 0.5369082689285278, "step": 5785 }, { "epoch": 1.3338328530259367, "grad_norm": 1.9538048720090067, "learning_rate": 5.481504372676495e-07, "loss": 0.4573896527290344, "step": 5786 }, { "epoch": 1.334063400576369, "grad_norm": 1.6221206727707211, "learning_rate": 5.478104317082305e-07, "loss": 0.538988471031189, "step": 5787 }, { "epoch": 1.3342939481268012, "grad_norm": 1.5995139592730108, "learning_rate": 5.474704918472516e-07, "loss": 0.47058168053627014, "step": 5788 }, { "epoch": 1.3345244956772335, "grad_norm": 1.5580693417811027, "learning_rate": 5.471306177341031e-07, "loss": 0.522472083568573, "step": 5789 }, { "epoch": 1.3347550432276658, "grad_norm": 1.5860681876603904, "learning_rate": 5.467908094181648e-07, "loss": 0.4278186559677124, "step": 5790 }, { "epoch": 1.334985590778098, "grad_norm": 1.5625539938501531, "learning_rate": 5.464510669488073e-07, "loss": 0.4699779450893402, "step": 5791 }, { "epoch": 1.3352161383285304, "grad_norm": 1.7937776074083913, "learning_rate": 5.461113903753911e-07, "loss": 0.4560111165046692, "step": 5792 }, { "epoch": 1.3354466858789626, "grad_norm": 1.837112208791973, "learning_rate": 5.457717797472683e-07, "loss": 0.48460662364959717, "step": 5793 }, { "epoch": 1.335677233429395, "grad_norm": 1.7402459378855901, "learning_rate": 5.454322351137809e-07, "loss": 0.4199531376361847, "step": 5794 }, { "epoch": 1.3359077809798272, "grad_norm": 1.5222181264331647, "learning_rate": 5.450927565242605e-07, "loss": 0.4457356929779053, "step": 5795 }, { "epoch": 1.3361383285302595, "grad_norm": 1.649365506502914, "learning_rate": 5.447533440280309e-07, "loss": 0.3968189060688019, "step": 5796 }, { "epoch": 1.3363688760806918, "grad_norm": 1.4671060690724647, "learning_rate": 5.444139976744044e-07, "loss": 0.41763365268707275, "step": 5797 }, { "epoch": 1.336599423631124, "grad_norm": 1.722950328594955, "learning_rate": 5.440747175126843e-07, "loss": 0.5074071288108826, "step": 5798 }, { "epoch": 1.3368299711815563, "grad_norm": 1.6111552809013234, "learning_rate": 5.437355035921654e-07, "loss": 0.4077880382537842, "step": 5799 }, { "epoch": 1.3370605187319884, "grad_norm": 1.5466812995082164, "learning_rate": 5.433963559621308e-07, "loss": 0.4905518889427185, "step": 5800 }, { "epoch": 1.3372910662824207, "grad_norm": 1.682630265863772, "learning_rate": 5.430572746718558e-07, "loss": 0.4662671685218811, "step": 5801 }, { "epoch": 1.337521613832853, "grad_norm": 1.3132407255636611, "learning_rate": 5.427182597706057e-07, "loss": 0.4523214101791382, "step": 5802 }, { "epoch": 1.3377521613832852, "grad_norm": 1.90695064970206, "learning_rate": 5.423793113076356e-07, "loss": 0.47909995913505554, "step": 5803 }, { "epoch": 1.3379827089337175, "grad_norm": 1.7910549238251845, "learning_rate": 5.420404293321908e-07, "loss": 0.5743957757949829, "step": 5804 }, { "epoch": 1.3382132564841498, "grad_norm": 1.5499659591777002, "learning_rate": 5.417016138935073e-07, "loss": 0.46549567580223083, "step": 5805 }, { "epoch": 1.338443804034582, "grad_norm": 1.3200635620126038, "learning_rate": 5.413628650408117e-07, "loss": 0.3912985324859619, "step": 5806 }, { "epoch": 1.3386743515850144, "grad_norm": 1.3955673682018792, "learning_rate": 5.41024182823321e-07, "loss": 0.4237633943557739, "step": 5807 }, { "epoch": 1.3389048991354466, "grad_norm": 1.6883882388649214, "learning_rate": 5.406855672902417e-07, "loss": 0.4899691641330719, "step": 5808 }, { "epoch": 1.339135446685879, "grad_norm": 1.3966918885304072, "learning_rate": 5.403470184907716e-07, "loss": 0.4774795174598694, "step": 5809 }, { "epoch": 1.3393659942363112, "grad_norm": 1.8954957129895125, "learning_rate": 5.400085364740981e-07, "loss": 0.5134497880935669, "step": 5810 }, { "epoch": 1.3395965417867435, "grad_norm": 1.33408195533013, "learning_rate": 5.396701212893985e-07, "loss": 0.3900358974933624, "step": 5811 }, { "epoch": 1.3398270893371758, "grad_norm": 1.5802049157805196, "learning_rate": 5.39331772985842e-07, "loss": 0.5683782696723938, "step": 5812 }, { "epoch": 1.340057636887608, "grad_norm": 1.4035611271860546, "learning_rate": 5.389934916125865e-07, "loss": 0.4435126781463623, "step": 5813 }, { "epoch": 1.3402881844380403, "grad_norm": 1.5175886691873446, "learning_rate": 5.386552772187806e-07, "loss": 0.42716413736343384, "step": 5814 }, { "epoch": 1.3405187319884726, "grad_norm": 1.6801684258194023, "learning_rate": 5.383171298535645e-07, "loss": 0.4489009976387024, "step": 5815 }, { "epoch": 1.3407492795389049, "grad_norm": 1.7185586590192998, "learning_rate": 5.37979049566066e-07, "loss": 0.4845975637435913, "step": 5816 }, { "epoch": 1.3409798270893372, "grad_norm": 1.7370256253696736, "learning_rate": 5.376410364054052e-07, "loss": 0.5482637882232666, "step": 5817 }, { "epoch": 1.3412103746397694, "grad_norm": 2.613056414919246, "learning_rate": 5.373030904206924e-07, "loss": 0.5108325481414795, "step": 5818 }, { "epoch": 1.3414409221902017, "grad_norm": 1.4978146785702697, "learning_rate": 5.369652116610269e-07, "loss": 0.459455668926239, "step": 5819 }, { "epoch": 1.341671469740634, "grad_norm": 1.509610858326331, "learning_rate": 5.366274001754995e-07, "loss": 0.5242212414741516, "step": 5820 }, { "epoch": 1.3419020172910663, "grad_norm": 1.7716640742532166, "learning_rate": 5.362896560131905e-07, "loss": 0.509830892086029, "step": 5821 }, { "epoch": 1.3421325648414986, "grad_norm": 1.7106393348542666, "learning_rate": 5.359519792231703e-07, "loss": 0.45476478338241577, "step": 5822 }, { "epoch": 1.3423631123919308, "grad_norm": 1.9332971228816167, "learning_rate": 5.356143698545006e-07, "loss": 0.43780767917633057, "step": 5823 }, { "epoch": 1.3425936599423631, "grad_norm": 1.519077249132743, "learning_rate": 5.352768279562314e-07, "loss": 0.4382368326187134, "step": 5824 }, { "epoch": 1.3428242074927954, "grad_norm": 1.694275850243012, "learning_rate": 5.34939353577405e-07, "loss": 0.4616197943687439, "step": 5825 }, { "epoch": 1.3430547550432277, "grad_norm": 1.673271377870483, "learning_rate": 5.346019467670527e-07, "loss": 0.5287420749664307, "step": 5826 }, { "epoch": 1.34328530259366, "grad_norm": 1.6107217520062422, "learning_rate": 5.342646075741964e-07, "loss": 0.4960588216781616, "step": 5827 }, { "epoch": 1.3435158501440922, "grad_norm": 1.787783527320612, "learning_rate": 5.339273360478473e-07, "loss": 0.4494114816188812, "step": 5828 }, { "epoch": 1.3437463976945245, "grad_norm": 1.6044879846817381, "learning_rate": 5.335901322370077e-07, "loss": 0.5143953561782837, "step": 5829 }, { "epoch": 1.3439769452449568, "grad_norm": 1.6617843855030219, "learning_rate": 5.332529961906698e-07, "loss": 0.47664588689804077, "step": 5830 }, { "epoch": 1.344207492795389, "grad_norm": 1.6680735023411237, "learning_rate": 5.329159279578166e-07, "loss": 0.5280349850654602, "step": 5831 }, { "epoch": 1.3444380403458214, "grad_norm": 1.4132124166382474, "learning_rate": 5.325789275874195e-07, "loss": 0.4075919985771179, "step": 5832 }, { "epoch": 1.3446685878962537, "grad_norm": 1.695845807521503, "learning_rate": 5.322419951284422e-07, "loss": 0.48058271408081055, "step": 5833 }, { "epoch": 1.344899135446686, "grad_norm": 1.6607672510224756, "learning_rate": 5.319051306298371e-07, "loss": 0.5294280648231506, "step": 5834 }, { "epoch": 1.3451296829971182, "grad_norm": 1.7474336626992906, "learning_rate": 5.315683341405466e-07, "loss": 0.47455504536628723, "step": 5835 }, { "epoch": 1.3453602305475505, "grad_norm": 1.4619071794552545, "learning_rate": 5.312316057095045e-07, "loss": 0.4823703169822693, "step": 5836 }, { "epoch": 1.3455907780979828, "grad_norm": 1.4777456061736687, "learning_rate": 5.308949453856333e-07, "loss": 0.47531551122665405, "step": 5837 }, { "epoch": 1.345821325648415, "grad_norm": 1.6734267117158743, "learning_rate": 5.305583532178464e-07, "loss": 0.4509006142616272, "step": 5838 }, { "epoch": 1.3460518731988473, "grad_norm": 1.5829632523885313, "learning_rate": 5.302218292550478e-07, "loss": 0.48463982343673706, "step": 5839 }, { "epoch": 1.3462824207492796, "grad_norm": 1.694704250371438, "learning_rate": 5.298853735461303e-07, "loss": 0.4683498442173004, "step": 5840 }, { "epoch": 1.346512968299712, "grad_norm": 1.6056494194260316, "learning_rate": 5.295489861399771e-07, "loss": 0.4532889723777771, "step": 5841 }, { "epoch": 1.346743515850144, "grad_norm": 1.6004406350046356, "learning_rate": 5.292126670854626e-07, "loss": 0.4410448968410492, "step": 5842 }, { "epoch": 1.3469740634005762, "grad_norm": 1.5125184912832104, "learning_rate": 5.288764164314499e-07, "loss": 0.48533615469932556, "step": 5843 }, { "epoch": 1.3472046109510085, "grad_norm": 1.4454172468360864, "learning_rate": 5.28540234226793e-07, "loss": 0.5278619527816772, "step": 5844 }, { "epoch": 1.3474351585014408, "grad_norm": 1.6917564731344552, "learning_rate": 5.282041205203354e-07, "loss": 0.43173807859420776, "step": 5845 }, { "epoch": 1.347665706051873, "grad_norm": 1.311431998190532, "learning_rate": 5.278680753609113e-07, "loss": 0.44198548793792725, "step": 5846 }, { "epoch": 1.3478962536023054, "grad_norm": 1.675999276148586, "learning_rate": 5.275320987973444e-07, "loss": 0.5097990036010742, "step": 5847 }, { "epoch": 1.3481268011527376, "grad_norm": 1.5200547839915433, "learning_rate": 5.271961908784483e-07, "loss": 0.41765618324279785, "step": 5848 }, { "epoch": 1.34835734870317, "grad_norm": 1.509547179156895, "learning_rate": 5.268603516530274e-07, "loss": 0.465061217546463, "step": 5849 }, { "epoch": 1.3485878962536022, "grad_norm": 1.640068419096848, "learning_rate": 5.265245811698751e-07, "loss": 0.4371737837791443, "step": 5850 }, { "epoch": 1.3488184438040345, "grad_norm": 1.6272380446300156, "learning_rate": 5.261888794777757e-07, "loss": 0.4835420846939087, "step": 5851 }, { "epoch": 1.3490489913544668, "grad_norm": 1.6120940633761554, "learning_rate": 5.258532466255037e-07, "loss": 0.434193879365921, "step": 5852 }, { "epoch": 1.349279538904899, "grad_norm": 1.456789174255129, "learning_rate": 5.255176826618223e-07, "loss": 0.48119521141052246, "step": 5853 }, { "epoch": 1.3495100864553313, "grad_norm": 2.0695007608526255, "learning_rate": 5.251821876354853e-07, "loss": 0.5133852958679199, "step": 5854 }, { "epoch": 1.3497406340057636, "grad_norm": 1.5740277395577307, "learning_rate": 5.248467615952374e-07, "loss": 0.502734899520874, "step": 5855 }, { "epoch": 1.3499711815561959, "grad_norm": 1.737984028582727, "learning_rate": 5.245114045898118e-07, "loss": 0.47067946195602417, "step": 5856 }, { "epoch": 1.3502017291066282, "grad_norm": 1.861351064114255, "learning_rate": 5.241761166679331e-07, "loss": 0.5280636548995972, "step": 5857 }, { "epoch": 1.3504322766570604, "grad_norm": 1.9027445398531666, "learning_rate": 5.238408978783143e-07, "loss": 0.43284815549850464, "step": 5858 }, { "epoch": 1.3506628242074927, "grad_norm": 1.6794616508594868, "learning_rate": 5.235057482696601e-07, "loss": 0.4984050989151001, "step": 5859 }, { "epoch": 1.350893371757925, "grad_norm": 1.455600936685728, "learning_rate": 5.231706678906637e-07, "loss": 0.4615752696990967, "step": 5860 }, { "epoch": 1.3511239193083573, "grad_norm": 1.5178879378297239, "learning_rate": 5.228356567900086e-07, "loss": 0.4811222553253174, "step": 5861 }, { "epoch": 1.3513544668587896, "grad_norm": 1.7164878231984242, "learning_rate": 5.225007150163687e-07, "loss": 0.5302398800849915, "step": 5862 }, { "epoch": 1.3515850144092219, "grad_norm": 1.7231443524330496, "learning_rate": 5.22165842618408e-07, "loss": 0.46788594126701355, "step": 5863 }, { "epoch": 1.3518155619596541, "grad_norm": 1.4074482819945295, "learning_rate": 5.218310396447791e-07, "loss": 0.42734235525131226, "step": 5864 }, { "epoch": 1.3520461095100864, "grad_norm": 1.6191419784980083, "learning_rate": 5.214963061441264e-07, "loss": 0.4698154926300049, "step": 5865 }, { "epoch": 1.3522766570605187, "grad_norm": 1.6424454305776117, "learning_rate": 5.211616421650826e-07, "loss": 0.48623502254486084, "step": 5866 }, { "epoch": 1.352507204610951, "grad_norm": 1.8362604622502379, "learning_rate": 5.208270477562704e-07, "loss": 0.5312871336936951, "step": 5867 }, { "epoch": 1.3527377521613833, "grad_norm": 2.04277197466832, "learning_rate": 5.204925229663039e-07, "loss": 0.5257741212844849, "step": 5868 }, { "epoch": 1.3529682997118155, "grad_norm": 1.515431923993323, "learning_rate": 5.201580678437852e-07, "loss": 0.42586231231689453, "step": 5869 }, { "epoch": 1.3531988472622478, "grad_norm": 1.5045820135918335, "learning_rate": 5.198236824373075e-07, "loss": 0.4793773293495178, "step": 5870 }, { "epoch": 1.35342939481268, "grad_norm": 1.6874766609639118, "learning_rate": 5.194893667954541e-07, "loss": 0.4825376570224762, "step": 5871 }, { "epoch": 1.3536599423631124, "grad_norm": 1.5749959416130654, "learning_rate": 5.191551209667968e-07, "loss": 0.5299191474914551, "step": 5872 }, { "epoch": 1.3538904899135447, "grad_norm": 1.9579620248680123, "learning_rate": 5.188209449998984e-07, "loss": 0.45400893688201904, "step": 5873 }, { "epoch": 1.354121037463977, "grad_norm": 1.586588649654464, "learning_rate": 5.184868389433108e-07, "loss": 0.40687328577041626, "step": 5874 }, { "epoch": 1.3543515850144092, "grad_norm": 1.7386772311529148, "learning_rate": 5.181528028455764e-07, "loss": 0.44856715202331543, "step": 5875 }, { "epoch": 1.3545821325648415, "grad_norm": 1.6814656770848047, "learning_rate": 5.178188367552276e-07, "loss": 0.4961288869380951, "step": 5876 }, { "epoch": 1.3548126801152738, "grad_norm": 1.3451366506384344, "learning_rate": 5.174849407207853e-07, "loss": 0.48112860321998596, "step": 5877 }, { "epoch": 1.355043227665706, "grad_norm": 1.6313677309379906, "learning_rate": 5.17151114790762e-07, "loss": 0.5009165406227112, "step": 5878 }, { "epoch": 1.3552737752161383, "grad_norm": 1.6571659753115824, "learning_rate": 5.168173590136588e-07, "loss": 0.4461110234260559, "step": 5879 }, { "epoch": 1.3555043227665706, "grad_norm": 1.9922430252685355, "learning_rate": 5.164836734379666e-07, "loss": 0.46784788370132446, "step": 5880 }, { "epoch": 1.355734870317003, "grad_norm": 1.6236627472570804, "learning_rate": 5.161500581121669e-07, "loss": 0.4470252990722656, "step": 5881 }, { "epoch": 1.3559654178674352, "grad_norm": 1.6544593178546172, "learning_rate": 5.158165130847301e-07, "loss": 0.4843372106552124, "step": 5882 }, { "epoch": 1.3561959654178675, "grad_norm": 1.4568756529342444, "learning_rate": 5.15483038404117e-07, "loss": 0.48338162899017334, "step": 5883 }, { "epoch": 1.3564265129682997, "grad_norm": 1.5639784023031311, "learning_rate": 5.151496341187786e-07, "loss": 0.49267876148223877, "step": 5884 }, { "epoch": 1.356657060518732, "grad_norm": 1.4532699235197888, "learning_rate": 5.148163002771543e-07, "loss": 0.44972002506256104, "step": 5885 }, { "epoch": 1.3568876080691643, "grad_norm": 1.849096817052275, "learning_rate": 5.144830369276747e-07, "loss": 0.47793418169021606, "step": 5886 }, { "epoch": 1.3571181556195966, "grad_norm": 2.019581972182712, "learning_rate": 5.141498441187584e-07, "loss": 0.5081642866134644, "step": 5887 }, { "epoch": 1.3573487031700289, "grad_norm": 1.7936039173954244, "learning_rate": 5.138167218988156e-07, "loss": 0.4508650302886963, "step": 5888 }, { "epoch": 1.3575792507204612, "grad_norm": 1.9903590976587098, "learning_rate": 5.13483670316246e-07, "loss": 0.509946346282959, "step": 5889 }, { "epoch": 1.3578097982708934, "grad_norm": 1.3894335336694332, "learning_rate": 5.131506894194376e-07, "loss": 0.3508078455924988, "step": 5890 }, { "epoch": 1.3580403458213257, "grad_norm": 1.6347599571971514, "learning_rate": 5.128177792567696e-07, "loss": 0.5096418857574463, "step": 5891 }, { "epoch": 1.358270893371758, "grad_norm": 1.2587415692746868, "learning_rate": 5.124849398766103e-07, "loss": 0.4068000018596649, "step": 5892 }, { "epoch": 1.3585014409221903, "grad_norm": 1.6029483959229915, "learning_rate": 5.121521713273173e-07, "loss": 0.4583415687084198, "step": 5893 }, { "epoch": 1.3587319884726226, "grad_norm": 1.9696596179786654, "learning_rate": 5.118194736572395e-07, "loss": 0.48043733835220337, "step": 5894 }, { "epoch": 1.3589625360230548, "grad_norm": 1.623202830766786, "learning_rate": 5.11486846914713e-07, "loss": 0.5002140998840332, "step": 5895 }, { "epoch": 1.3591930835734871, "grad_norm": 1.620019064708946, "learning_rate": 5.111542911480659e-07, "loss": 0.5203151702880859, "step": 5896 }, { "epoch": 1.3594236311239194, "grad_norm": 1.4403729884467253, "learning_rate": 5.108218064056152e-07, "loss": 0.4077376127243042, "step": 5897 }, { "epoch": 1.3596541786743517, "grad_norm": 1.5876907999897627, "learning_rate": 5.104893927356674e-07, "loss": 0.5146734714508057, "step": 5898 }, { "epoch": 1.359884726224784, "grad_norm": 1.7641615656345115, "learning_rate": 5.10157050186518e-07, "loss": 0.5297499299049377, "step": 5899 }, { "epoch": 1.3601152737752162, "grad_norm": 1.586023517840485, "learning_rate": 5.09824778806454e-07, "loss": 0.49775469303131104, "step": 5900 }, { "epoch": 1.3603458213256485, "grad_norm": 1.5383657146391863, "learning_rate": 5.094925786437499e-07, "loss": 0.42014068365097046, "step": 5901 }, { "epoch": 1.3605763688760808, "grad_norm": 1.5811863841429854, "learning_rate": 5.09160449746672e-07, "loss": 0.39976444840431213, "step": 5902 }, { "epoch": 1.360806916426513, "grad_norm": 1.5391009905059254, "learning_rate": 5.088283921634742e-07, "loss": 0.4181244671344757, "step": 5903 }, { "epoch": 1.3610374639769454, "grad_norm": 1.7318442603568622, "learning_rate": 5.084964059424018e-07, "loss": 0.49912410974502563, "step": 5904 }, { "epoch": 1.3612680115273776, "grad_norm": 1.6464926496982077, "learning_rate": 5.081644911316886e-07, "loss": 0.5237860679626465, "step": 5905 }, { "epoch": 1.36149855907781, "grad_norm": 1.9750248134134947, "learning_rate": 5.078326477795583e-07, "loss": 0.4730784296989441, "step": 5906 }, { "epoch": 1.3617291066282422, "grad_norm": 1.5345984420273122, "learning_rate": 5.075008759342241e-07, "loss": 0.5080181360244751, "step": 5907 }, { "epoch": 1.3619596541786745, "grad_norm": 1.6710731790396276, "learning_rate": 5.071691756438897e-07, "loss": 0.4697926640510559, "step": 5908 }, { "epoch": 1.3621902017291068, "grad_norm": 1.490941194326937, "learning_rate": 5.068375469567468e-07, "loss": 0.5204674005508423, "step": 5909 }, { "epoch": 1.3624207492795388, "grad_norm": 1.5304764486604812, "learning_rate": 5.065059899209785e-07, "loss": 0.43228816986083984, "step": 5910 }, { "epoch": 1.362651296829971, "grad_norm": 1.3898360585052687, "learning_rate": 5.06174504584756e-07, "loss": 0.47551077604293823, "step": 5911 }, { "epoch": 1.3628818443804034, "grad_norm": 1.4150986020819334, "learning_rate": 5.058430909962406e-07, "loss": 0.44586285948753357, "step": 5912 }, { "epoch": 1.3631123919308357, "grad_norm": 1.3091754547925671, "learning_rate": 5.055117492035839e-07, "loss": 0.47856760025024414, "step": 5913 }, { "epoch": 1.363342939481268, "grad_norm": 1.9810859445793283, "learning_rate": 5.051804792549254e-07, "loss": 0.6033484935760498, "step": 5914 }, { "epoch": 1.3635734870317002, "grad_norm": 1.642277635899145, "learning_rate": 5.048492811983959e-07, "loss": 0.5101956129074097, "step": 5915 }, { "epoch": 1.3638040345821325, "grad_norm": 2.462967749056604, "learning_rate": 5.045181550821154e-07, "loss": 0.530934751033783, "step": 5916 }, { "epoch": 1.3640345821325648, "grad_norm": 1.558351988913394, "learning_rate": 5.041871009541922e-07, "loss": 0.4618695378303528, "step": 5917 }, { "epoch": 1.364265129682997, "grad_norm": 1.393276685582818, "learning_rate": 5.038561188627257e-07, "loss": 0.4308139383792877, "step": 5918 }, { "epoch": 1.3644956772334293, "grad_norm": 1.4122819038580587, "learning_rate": 5.035252088558034e-07, "loss": 0.4413840174674988, "step": 5919 }, { "epoch": 1.3647262247838616, "grad_norm": 1.762353694417549, "learning_rate": 5.031943709815036e-07, "loss": 0.426006019115448, "step": 5920 }, { "epoch": 1.364956772334294, "grad_norm": 2.0693345821353852, "learning_rate": 5.028636052878938e-07, "loss": 0.5016382932662964, "step": 5921 }, { "epoch": 1.3651873198847262, "grad_norm": 1.5626757094374883, "learning_rate": 5.025329118230302e-07, "loss": 0.4329431653022766, "step": 5922 }, { "epoch": 1.3654178674351585, "grad_norm": 1.4393399562375095, "learning_rate": 5.022022906349598e-07, "loss": 0.4622783064842224, "step": 5923 }, { "epoch": 1.3656484149855908, "grad_norm": 1.8772089679378412, "learning_rate": 5.018717417717181e-07, "loss": 0.45837199687957764, "step": 5924 }, { "epoch": 1.365878962536023, "grad_norm": 1.508840858176121, "learning_rate": 5.0154126528133e-07, "loss": 0.40243199467658997, "step": 5925 }, { "epoch": 1.3661095100864553, "grad_norm": 1.7704033220607236, "learning_rate": 5.012108612118111e-07, "loss": 0.3701140284538269, "step": 5926 }, { "epoch": 1.3663400576368876, "grad_norm": 1.4584895223212495, "learning_rate": 5.008805296111649e-07, "loss": 0.42856937646865845, "step": 5927 }, { "epoch": 1.3665706051873199, "grad_norm": 1.4058422037004188, "learning_rate": 5.005502705273855e-07, "loss": 0.4932258129119873, "step": 5928 }, { "epoch": 1.3668011527377522, "grad_norm": 1.5903214965689192, "learning_rate": 5.00220084008457e-07, "loss": 0.40012168884277344, "step": 5929 }, { "epoch": 1.3670317002881844, "grad_norm": 1.454636698679369, "learning_rate": 4.998899701023503e-07, "loss": 0.4348130524158478, "step": 5930 }, { "epoch": 1.3672622478386167, "grad_norm": 1.531204931434994, "learning_rate": 4.995599288570287e-07, "loss": 0.43674468994140625, "step": 5931 }, { "epoch": 1.367492795389049, "grad_norm": 1.722849550986695, "learning_rate": 4.992299603204432e-07, "loss": 0.5576746463775635, "step": 5932 }, { "epoch": 1.3677233429394813, "grad_norm": 1.4734378875973935, "learning_rate": 4.98900064540535e-07, "loss": 0.4338483214378357, "step": 5933 }, { "epoch": 1.3679538904899136, "grad_norm": 1.5640070595952762, "learning_rate": 4.98570241565235e-07, "loss": 0.5156441330909729, "step": 5934 }, { "epoch": 1.3681844380403458, "grad_norm": 1.4905894229317878, "learning_rate": 4.982404914424626e-07, "loss": 0.4708647131919861, "step": 5935 }, { "epoch": 1.3684149855907781, "grad_norm": 1.6577000540746343, "learning_rate": 4.979108142201267e-07, "loss": 0.4355749487876892, "step": 5936 }, { "epoch": 1.3686455331412104, "grad_norm": 1.7883353786762823, "learning_rate": 4.975812099461268e-07, "loss": 0.4360370635986328, "step": 5937 }, { "epoch": 1.3688760806916427, "grad_norm": 1.9642902580940569, "learning_rate": 4.972516786683501e-07, "loss": 0.5324473977088928, "step": 5938 }, { "epoch": 1.369106628242075, "grad_norm": 1.429248391480111, "learning_rate": 4.969222204346751e-07, "loss": 0.49706023931503296, "step": 5939 }, { "epoch": 1.3693371757925072, "grad_norm": 1.6516845494204488, "learning_rate": 4.965928352929674e-07, "loss": 0.3852601647377014, "step": 5940 }, { "epoch": 1.3695677233429395, "grad_norm": 1.5936594611011352, "learning_rate": 4.962635232910843e-07, "loss": 0.4638129472732544, "step": 5941 }, { "epoch": 1.3697982708933718, "grad_norm": 1.683078925842744, "learning_rate": 4.959342844768711e-07, "loss": 0.4691101908683777, "step": 5942 }, { "epoch": 1.370028818443804, "grad_norm": 1.3490562795558882, "learning_rate": 4.956051188981623e-07, "loss": 0.5083839893341064, "step": 5943 }, { "epoch": 1.3702593659942364, "grad_norm": 1.4625392664965804, "learning_rate": 4.952760266027825e-07, "loss": 0.4441392719745636, "step": 5944 }, { "epoch": 1.3704899135446686, "grad_norm": 1.6907734985715615, "learning_rate": 4.94947007638546e-07, "loss": 0.38773834705352783, "step": 5945 }, { "epoch": 1.370720461095101, "grad_norm": 1.4182609011262102, "learning_rate": 4.946180620532548e-07, "loss": 0.4743019938468933, "step": 5946 }, { "epoch": 1.3709510086455332, "grad_norm": 1.6787322002316782, "learning_rate": 4.942891898947024e-07, "loss": 0.3981400728225708, "step": 5947 }, { "epoch": 1.3711815561959655, "grad_norm": 1.4088695472372947, "learning_rate": 4.939603912106696e-07, "loss": 0.40556612610816956, "step": 5948 }, { "epoch": 1.3714121037463978, "grad_norm": 1.6422838341462083, "learning_rate": 4.936316660489277e-07, "loss": 0.4359711706638336, "step": 5949 }, { "epoch": 1.37164265129683, "grad_norm": 1.4347305213532893, "learning_rate": 4.933030144572372e-07, "loss": 0.44561219215393066, "step": 5950 }, { "epoch": 1.371873198847262, "grad_norm": 1.319594203653507, "learning_rate": 4.929744364833474e-07, "loss": 0.3856406807899475, "step": 5951 }, { "epoch": 1.3721037463976944, "grad_norm": 1.6102685715345482, "learning_rate": 4.926459321749973e-07, "loss": 0.4586164951324463, "step": 5952 }, { "epoch": 1.3723342939481267, "grad_norm": 2.0415905828304117, "learning_rate": 4.92317501579916e-07, "loss": 0.5609460473060608, "step": 5953 }, { "epoch": 1.372564841498559, "grad_norm": 2.256441945151045, "learning_rate": 4.919891447458204e-07, "loss": 0.5234363675117493, "step": 5954 }, { "epoch": 1.3727953890489912, "grad_norm": 1.959793340744298, "learning_rate": 4.916608617204171e-07, "loss": 0.4611673951148987, "step": 5955 }, { "epoch": 1.3730259365994235, "grad_norm": 1.6387175941742442, "learning_rate": 4.913326525514021e-07, "loss": 0.4873179793357849, "step": 5956 }, { "epoch": 1.3732564841498558, "grad_norm": 1.915167462474106, "learning_rate": 4.910045172864613e-07, "loss": 0.5042279362678528, "step": 5957 }, { "epoch": 1.373487031700288, "grad_norm": 1.728746475348662, "learning_rate": 4.906764559732695e-07, "loss": 0.5057715177536011, "step": 5958 }, { "epoch": 1.3737175792507204, "grad_norm": 1.7996746605755354, "learning_rate": 4.903484686594897e-07, "loss": 0.5340696573257446, "step": 5959 }, { "epoch": 1.3739481268011526, "grad_norm": 1.5779353792159843, "learning_rate": 4.90020555392776e-07, "loss": 0.43351268768310547, "step": 5960 }, { "epoch": 1.374178674351585, "grad_norm": 1.573562503159586, "learning_rate": 4.896927162207707e-07, "loss": 0.457750141620636, "step": 5961 }, { "epoch": 1.3744092219020172, "grad_norm": 1.5967761640899065, "learning_rate": 4.893649511911044e-07, "loss": 0.4459339678287506, "step": 5962 }, { "epoch": 1.3746397694524495, "grad_norm": 1.5932726302811062, "learning_rate": 4.890372603513993e-07, "loss": 0.5079913139343262, "step": 5963 }, { "epoch": 1.3748703170028818, "grad_norm": 1.6314201964372212, "learning_rate": 4.887096437492643e-07, "loss": 0.4558470845222473, "step": 5964 }, { "epoch": 1.375100864553314, "grad_norm": 1.555540006249106, "learning_rate": 4.883821014322992e-07, "loss": 0.5119925737380981, "step": 5965 }, { "epoch": 1.3753314121037463, "grad_norm": 1.5582303154225734, "learning_rate": 4.880546334480929e-07, "loss": 0.39001739025115967, "step": 5966 }, { "epoch": 1.3755619596541786, "grad_norm": 1.5191279412240843, "learning_rate": 4.877272398442228e-07, "loss": 0.38213586807250977, "step": 5967 }, { "epoch": 1.3757925072046109, "grad_norm": 1.4691257079314304, "learning_rate": 4.873999206682552e-07, "loss": 0.48196929693222046, "step": 5968 }, { "epoch": 1.3760230547550432, "grad_norm": 1.6328356700934117, "learning_rate": 4.87072675967747e-07, "loss": 0.36515331268310547, "step": 5969 }, { "epoch": 1.3762536023054754, "grad_norm": 1.6247882795619184, "learning_rate": 4.867455057902429e-07, "loss": 0.5114316940307617, "step": 5970 }, { "epoch": 1.3764841498559077, "grad_norm": 1.8444487237090605, "learning_rate": 4.864184101832778e-07, "loss": 0.563550591468811, "step": 5971 }, { "epoch": 1.37671469740634, "grad_norm": 1.7225734496778158, "learning_rate": 4.860913891943746e-07, "loss": 0.5151046514511108, "step": 5972 }, { "epoch": 1.3769452449567723, "grad_norm": 1.529072988346735, "learning_rate": 4.857644428710469e-07, "loss": 0.4265006184577942, "step": 5973 }, { "epoch": 1.3771757925072046, "grad_norm": 1.551487562216016, "learning_rate": 4.854375712607961e-07, "loss": 0.4501451253890991, "step": 5974 }, { "epoch": 1.3774063400576368, "grad_norm": 1.569217036910626, "learning_rate": 4.85110774411113e-07, "loss": 0.4197757840156555, "step": 5975 }, { "epoch": 1.3776368876080691, "grad_norm": 1.5926547930252053, "learning_rate": 4.847840523694784e-07, "loss": 0.4908246695995331, "step": 5976 }, { "epoch": 1.3778674351585014, "grad_norm": 1.526074427590657, "learning_rate": 4.84457405183361e-07, "loss": 0.4114115536212921, "step": 5977 }, { "epoch": 1.3780979827089337, "grad_norm": 1.6071297705826257, "learning_rate": 4.841308329002195e-07, "loss": 0.4672767221927643, "step": 5978 }, { "epoch": 1.378328530259366, "grad_norm": 1.2631644967059048, "learning_rate": 4.838043355675019e-07, "loss": 0.3806523084640503, "step": 5979 }, { "epoch": 1.3785590778097983, "grad_norm": 1.9350101011141525, "learning_rate": 4.834779132326444e-07, "loss": 0.44928061962127686, "step": 5980 }, { "epoch": 1.3787896253602305, "grad_norm": 1.8150559336473449, "learning_rate": 4.831515659430726e-07, "loss": 0.40093138813972473, "step": 5981 }, { "epoch": 1.3790201729106628, "grad_norm": 2.041794025799897, "learning_rate": 4.828252937462018e-07, "loss": 0.4751448631286621, "step": 5982 }, { "epoch": 1.379250720461095, "grad_norm": 1.7047133009163284, "learning_rate": 4.824990966894355e-07, "loss": 0.5027964115142822, "step": 5983 }, { "epoch": 1.3794812680115274, "grad_norm": 1.5876448572494875, "learning_rate": 4.821729748201674e-07, "loss": 0.4667786955833435, "step": 5984 }, { "epoch": 1.3797118155619597, "grad_norm": 1.6975993686252844, "learning_rate": 4.818469281857787e-07, "loss": 0.4899994134902954, "step": 5985 }, { "epoch": 1.379942363112392, "grad_norm": 1.967426478945087, "learning_rate": 4.815209568336415e-07, "loss": 0.5160613059997559, "step": 5986 }, { "epoch": 1.3801729106628242, "grad_norm": 1.4806686181451014, "learning_rate": 4.811950608111158e-07, "loss": 0.5122005939483643, "step": 5987 }, { "epoch": 1.3804034582132565, "grad_norm": 1.5095702034418825, "learning_rate": 4.808692401655503e-07, "loss": 0.4773480296134949, "step": 5988 }, { "epoch": 1.3806340057636888, "grad_norm": 1.4967712307165415, "learning_rate": 4.805434949442837e-07, "loss": 0.4779089093208313, "step": 5989 }, { "epoch": 1.380864553314121, "grad_norm": 1.5842683506783093, "learning_rate": 4.80217825194644e-07, "loss": 0.4837608337402344, "step": 5990 }, { "epoch": 1.3810951008645533, "grad_norm": 1.8521614971327032, "learning_rate": 4.798922309639466e-07, "loss": 0.4466870129108429, "step": 5991 }, { "epoch": 1.3813256484149856, "grad_norm": 1.9601310722449918, "learning_rate": 4.795667122994979e-07, "loss": 0.4643474519252777, "step": 5992 }, { "epoch": 1.381556195965418, "grad_norm": 1.5897841101760393, "learning_rate": 4.79241269248592e-07, "loss": 0.4888812303543091, "step": 5993 }, { "epoch": 1.3817867435158502, "grad_norm": 1.5638951554173341, "learning_rate": 4.789159018585118e-07, "loss": 0.505973219871521, "step": 5994 }, { "epoch": 1.3820172910662825, "grad_norm": 1.3822895824056294, "learning_rate": 4.785906101765309e-07, "loss": 0.44251859188079834, "step": 5995 }, { "epoch": 1.3822478386167147, "grad_norm": 1.960049432168954, "learning_rate": 4.782653942499097e-07, "loss": 0.5573620200157166, "step": 5996 }, { "epoch": 1.382478386167147, "grad_norm": 1.37836624882881, "learning_rate": 4.779402541258993e-07, "loss": 0.37017643451690674, "step": 5997 }, { "epoch": 1.3827089337175793, "grad_norm": 1.970528045777299, "learning_rate": 4.776151898517394e-07, "loss": 0.4964868426322937, "step": 5998 }, { "epoch": 1.3829394812680116, "grad_norm": 1.5277765140499937, "learning_rate": 4.772902014746583e-07, "loss": 0.460393488407135, "step": 5999 }, { "epoch": 1.3831700288184439, "grad_norm": 1.898227204559436, "learning_rate": 4.769652890418732e-07, "loss": 0.497903048992157, "step": 6000 }, { "epoch": 1.3834005763688761, "grad_norm": 1.5051772587978125, "learning_rate": 4.7664045260059015e-07, "loss": 0.4663991928100586, "step": 6001 }, { "epoch": 1.3836311239193084, "grad_norm": 1.630329268480054, "learning_rate": 4.763156921980049e-07, "loss": 0.43742233514785767, "step": 6002 }, { "epoch": 1.3838616714697407, "grad_norm": 1.2809856045675265, "learning_rate": 4.7599100788130233e-07, "loss": 0.4082593023777008, "step": 6003 }, { "epoch": 1.384092219020173, "grad_norm": 1.9460794232048821, "learning_rate": 4.7566639969765465e-07, "loss": 0.5778148174285889, "step": 6004 }, { "epoch": 1.3843227665706053, "grad_norm": 1.520124922186599, "learning_rate": 4.7534186769422504e-07, "loss": 0.4080566167831421, "step": 6005 }, { "epoch": 1.3845533141210375, "grad_norm": 1.5687504912879382, "learning_rate": 4.7501741191816403e-07, "loss": 0.46496686339378357, "step": 6006 }, { "epoch": 1.3847838616714698, "grad_norm": 1.4953220020064624, "learning_rate": 4.746930324166114e-07, "loss": 0.47985541820526123, "step": 6007 }, { "epoch": 1.385014409221902, "grad_norm": 1.5018708897664648, "learning_rate": 4.7436872923669703e-07, "loss": 0.48982805013656616, "step": 6008 }, { "epoch": 1.3852449567723344, "grad_norm": 1.4445730747155596, "learning_rate": 4.740445024255377e-07, "loss": 0.46816062927246094, "step": 6009 }, { "epoch": 1.3854755043227667, "grad_norm": 1.460923651414453, "learning_rate": 4.7372035203024097e-07, "loss": 0.4749149680137634, "step": 6010 }, { "epoch": 1.385706051873199, "grad_norm": 1.323855531957966, "learning_rate": 4.733962780979025e-07, "loss": 0.4393165707588196, "step": 6011 }, { "epoch": 1.3859365994236312, "grad_norm": 1.6288416030334434, "learning_rate": 4.7307228067560677e-07, "loss": 0.3817910850048065, "step": 6012 }, { "epoch": 1.3861671469740635, "grad_norm": 1.4361365253762288, "learning_rate": 4.727483598104267e-07, "loss": 0.3621605336666107, "step": 6013 }, { "epoch": 1.3863976945244958, "grad_norm": 1.581373327873717, "learning_rate": 4.7242451554942555e-07, "loss": 0.39434587955474854, "step": 6014 }, { "epoch": 1.386628242074928, "grad_norm": 1.310743303542111, "learning_rate": 4.7210074793965357e-07, "loss": 0.5066482424736023, "step": 6015 }, { "epoch": 1.3868587896253604, "grad_norm": 1.754271147284867, "learning_rate": 4.7177705702815175e-07, "loss": 0.4697442650794983, "step": 6016 }, { "epoch": 1.3870893371757926, "grad_norm": 1.6770775757631975, "learning_rate": 4.71453442861948e-07, "loss": 0.4051814079284668, "step": 6017 }, { "epoch": 1.387319884726225, "grad_norm": 1.6336553716475837, "learning_rate": 4.711299054880612e-07, "loss": 0.5127224326133728, "step": 6018 }, { "epoch": 1.3875504322766572, "grad_norm": 1.7922826477354121, "learning_rate": 4.708064449534973e-07, "loss": 0.3973507285118103, "step": 6019 }, { "epoch": 1.3877809798270893, "grad_norm": 1.597857166916995, "learning_rate": 4.704830613052515e-07, "loss": 0.505881667137146, "step": 6020 }, { "epoch": 1.3880115273775215, "grad_norm": 1.4925723751693731, "learning_rate": 4.701597545903089e-07, "loss": 0.4754818081855774, "step": 6021 }, { "epoch": 1.3882420749279538, "grad_norm": 1.4074084258772404, "learning_rate": 4.6983652485564163e-07, "loss": 0.4562879204750061, "step": 6022 }, { "epoch": 1.388472622478386, "grad_norm": 1.7249030074653906, "learning_rate": 4.6951337214821216e-07, "loss": 0.5570380091667175, "step": 6023 }, { "epoch": 1.3887031700288184, "grad_norm": 1.9969873123775743, "learning_rate": 4.6919029651497157e-07, "loss": 0.4170283079147339, "step": 6024 }, { "epoch": 1.3889337175792507, "grad_norm": 1.5864056005736995, "learning_rate": 4.68867298002859e-07, "loss": 0.4627062976360321, "step": 6025 }, { "epoch": 1.389164265129683, "grad_norm": 1.6317114770758834, "learning_rate": 4.685443766588023e-07, "loss": 0.47997409105300903, "step": 6026 }, { "epoch": 1.3893948126801152, "grad_norm": 1.5433535584548672, "learning_rate": 4.682215325297195e-07, "loss": 0.4558975100517273, "step": 6027 }, { "epoch": 1.3896253602305475, "grad_norm": 1.500194393702039, "learning_rate": 4.6789876566251573e-07, "loss": 0.47624218463897705, "step": 6028 }, { "epoch": 1.3898559077809798, "grad_norm": 1.4670498948839097, "learning_rate": 4.6757607610408623e-07, "loss": 0.43521273136138916, "step": 6029 }, { "epoch": 1.390086455331412, "grad_norm": 1.4064558734872, "learning_rate": 4.672534639013139e-07, "loss": 0.4876500368118286, "step": 6030 }, { "epoch": 1.3903170028818443, "grad_norm": 1.953687488635846, "learning_rate": 4.6693092910107157e-07, "loss": 0.5351635217666626, "step": 6031 }, { "epoch": 1.3905475504322766, "grad_norm": 1.5870986874022448, "learning_rate": 4.6660847175021976e-07, "loss": 0.4978008270263672, "step": 6032 }, { "epoch": 1.390778097982709, "grad_norm": 1.8145501622265596, "learning_rate": 4.66286091895608e-07, "loss": 0.532027542591095, "step": 6033 }, { "epoch": 1.3910086455331412, "grad_norm": 1.4756865518363598, "learning_rate": 4.659637895840748e-07, "loss": 0.40873438119888306, "step": 6034 }, { "epoch": 1.3912391930835735, "grad_norm": 1.3799051211541253, "learning_rate": 4.65641564862448e-07, "loss": 0.3943024277687073, "step": 6035 }, { "epoch": 1.3914697406340057, "grad_norm": 1.6362745005680124, "learning_rate": 4.6531941777754257e-07, "loss": 0.4594842493534088, "step": 6036 }, { "epoch": 1.391700288184438, "grad_norm": 1.5130847369004203, "learning_rate": 4.649973483761643e-07, "loss": 0.429911732673645, "step": 6037 }, { "epoch": 1.3919308357348703, "grad_norm": 1.5953387971551207, "learning_rate": 4.6467535670510516e-07, "loss": 0.4754660725593567, "step": 6038 }, { "epoch": 1.3921613832853026, "grad_norm": 1.7926411696243407, "learning_rate": 4.6435344281114775e-07, "loss": 0.49685293436050415, "step": 6039 }, { "epoch": 1.3923919308357349, "grad_norm": 1.7779518102975276, "learning_rate": 4.640316067410632e-07, "loss": 0.483062207698822, "step": 6040 }, { "epoch": 1.3926224783861672, "grad_norm": 1.7120940563362925, "learning_rate": 4.6370984854161033e-07, "loss": 0.511294960975647, "step": 6041 }, { "epoch": 1.3928530259365994, "grad_norm": 1.7910142322176623, "learning_rate": 4.633881682595375e-07, "loss": 0.5286852717399597, "step": 6042 }, { "epoch": 1.3930835734870317, "grad_norm": 1.5909865165506805, "learning_rate": 4.630665659415823e-07, "loss": 0.49661144614219666, "step": 6043 }, { "epoch": 1.393314121037464, "grad_norm": 1.6939702924642166, "learning_rate": 4.627450416344687e-07, "loss": 0.48525696992874146, "step": 6044 }, { "epoch": 1.3935446685878963, "grad_norm": 1.9008448770845712, "learning_rate": 4.6242359538491205e-07, "loss": 0.4805188775062561, "step": 6045 }, { "epoch": 1.3937752161383286, "grad_norm": 1.466547085894007, "learning_rate": 4.6210222723961436e-07, "loss": 0.5583043098449707, "step": 6046 }, { "epoch": 1.3940057636887608, "grad_norm": 1.767099349933789, "learning_rate": 4.6178093724526724e-07, "loss": 0.4752922058105469, "step": 6047 }, { "epoch": 1.3942363112391931, "grad_norm": 2.1204506587480236, "learning_rate": 4.614597254485515e-07, "loss": 0.5305292010307312, "step": 6048 }, { "epoch": 1.3944668587896254, "grad_norm": 1.6156047909242823, "learning_rate": 4.611385918961352e-07, "loss": 0.4300711154937744, "step": 6049 }, { "epoch": 1.3946974063400577, "grad_norm": 1.8395044872177642, "learning_rate": 4.6081753663467546e-07, "loss": 0.4568009376525879, "step": 6050 }, { "epoch": 1.39492795389049, "grad_norm": 1.515683664151756, "learning_rate": 4.6049655971081913e-07, "loss": 0.5203668475151062, "step": 6051 }, { "epoch": 1.3951585014409222, "grad_norm": 1.4603584943150332, "learning_rate": 4.601756611711999e-07, "loss": 0.40216517448425293, "step": 6052 }, { "epoch": 1.3953890489913545, "grad_norm": 1.4642830157949756, "learning_rate": 4.5985484106244175e-07, "loss": 0.4311853349208832, "step": 6053 }, { "epoch": 1.3956195965417868, "grad_norm": 1.5517607357762497, "learning_rate": 4.5953409943115584e-07, "loss": 0.5079714059829712, "step": 6054 }, { "epoch": 1.395850144092219, "grad_norm": 1.5088915517004085, "learning_rate": 4.5921343632394327e-07, "loss": 0.4816412925720215, "step": 6055 }, { "epoch": 1.3960806916426514, "grad_norm": 1.8928583094761373, "learning_rate": 4.588928517873928e-07, "loss": 0.4365989863872528, "step": 6056 }, { "epoch": 1.3963112391930836, "grad_norm": 1.7375818461870982, "learning_rate": 4.5857234586808144e-07, "loss": 0.47723880410194397, "step": 6057 }, { "epoch": 1.396541786743516, "grad_norm": 1.6552182086251737, "learning_rate": 4.5825191861257596e-07, "loss": 0.4895835518836975, "step": 6058 }, { "epoch": 1.3967723342939482, "grad_norm": 1.4001733151811946, "learning_rate": 4.5793157006743145e-07, "loss": 0.4960166811943054, "step": 6059 }, { "epoch": 1.3970028818443805, "grad_norm": 1.6684440025816993, "learning_rate": 4.5761130027919025e-07, "loss": 0.4520935118198395, "step": 6060 }, { "epoch": 1.3972334293948125, "grad_norm": 1.481564869888927, "learning_rate": 4.572911092943852e-07, "loss": 0.4755667448043823, "step": 6061 }, { "epoch": 1.3974639769452448, "grad_norm": 1.456572260239621, "learning_rate": 4.5697099715953634e-07, "loss": 0.4399319887161255, "step": 6062 }, { "epoch": 1.397694524495677, "grad_norm": 1.7189931326092531, "learning_rate": 4.566509639211521e-07, "loss": 0.4009808599948883, "step": 6063 }, { "epoch": 1.3979250720461094, "grad_norm": 1.6812792934483527, "learning_rate": 4.563310096257309e-07, "loss": 0.4784051477909088, "step": 6064 }, { "epoch": 1.3981556195965417, "grad_norm": 1.8540756549671957, "learning_rate": 4.560111343197579e-07, "loss": 0.521167516708374, "step": 6065 }, { "epoch": 1.398386167146974, "grad_norm": 1.9181010135961398, "learning_rate": 4.556913380497085e-07, "loss": 0.44407376646995544, "step": 6066 }, { "epoch": 1.3986167146974062, "grad_norm": 1.6432970918022198, "learning_rate": 4.5537162086204495e-07, "loss": 0.4660610556602478, "step": 6067 }, { "epoch": 1.3988472622478385, "grad_norm": 1.8554905581879808, "learning_rate": 4.5505198280321967e-07, "loss": 0.45331743359565735, "step": 6068 }, { "epoch": 1.3990778097982708, "grad_norm": 1.3269229239434595, "learning_rate": 4.5473242391967227e-07, "loss": 0.42603427171707153, "step": 6069 }, { "epoch": 1.399308357348703, "grad_norm": 1.5609089936334382, "learning_rate": 4.5441294425783094e-07, "loss": 0.4841277599334717, "step": 6070 }, { "epoch": 1.3995389048991353, "grad_norm": 1.6494964083060812, "learning_rate": 4.5409354386411326e-07, "loss": 0.5001981258392334, "step": 6071 }, { "epoch": 1.3997694524495676, "grad_norm": 1.6407020689096197, "learning_rate": 4.5377422278492493e-07, "loss": 0.4393565058708191, "step": 6072 }, { "epoch": 1.4, "grad_norm": 2.0348511108564753, "learning_rate": 4.534549810666596e-07, "loss": 0.48033279180526733, "step": 6073 }, { "epoch": 1.4002305475504322, "grad_norm": 1.557372777178074, "learning_rate": 4.5313581875570015e-07, "loss": 0.5052364468574524, "step": 6074 }, { "epoch": 1.4004610951008645, "grad_norm": 1.665250892528103, "learning_rate": 4.528167358984173e-07, "loss": 0.49901437759399414, "step": 6075 }, { "epoch": 1.4006916426512968, "grad_norm": 1.7238877472049492, "learning_rate": 4.524977325411702e-07, "loss": 0.47194209694862366, "step": 6076 }, { "epoch": 1.400922190201729, "grad_norm": 1.6051143361503823, "learning_rate": 4.5217880873030734e-07, "loss": 0.5385118722915649, "step": 6077 }, { "epoch": 1.4011527377521613, "grad_norm": 1.6829333959036268, "learning_rate": 4.5185996451216435e-07, "loss": 0.42518895864486694, "step": 6078 }, { "epoch": 1.4013832853025936, "grad_norm": 1.9857639773328855, "learning_rate": 4.515411999330664e-07, "loss": 0.6034430861473083, "step": 6079 }, { "epoch": 1.4016138328530259, "grad_norm": 2.1725180303091105, "learning_rate": 4.5122251503932684e-07, "loss": 0.46876388788223267, "step": 6080 }, { "epoch": 1.4018443804034582, "grad_norm": 1.4349271940518988, "learning_rate": 4.5090390987724713e-07, "loss": 0.4204791784286499, "step": 6081 }, { "epoch": 1.4020749279538904, "grad_norm": 1.5732232756880702, "learning_rate": 4.505853844931171e-07, "loss": 0.44004327058792114, "step": 6082 }, { "epoch": 1.4023054755043227, "grad_norm": 1.7944396338097273, "learning_rate": 4.502669389332149e-07, "loss": 0.47575461864471436, "step": 6083 }, { "epoch": 1.402536023054755, "grad_norm": 1.7168372511929604, "learning_rate": 4.4994857324380773e-07, "loss": 0.5163394212722778, "step": 6084 }, { "epoch": 1.4027665706051873, "grad_norm": 1.3499104261023827, "learning_rate": 4.496302874711512e-07, "loss": 0.40582704544067383, "step": 6085 }, { "epoch": 1.4029971181556196, "grad_norm": 1.8288150818970852, "learning_rate": 4.49312081661488e-07, "loss": 0.5648316740989685, "step": 6086 }, { "epoch": 1.4032276657060518, "grad_norm": 1.536379330296098, "learning_rate": 4.4899395586105113e-07, "loss": 0.4152177572250366, "step": 6087 }, { "epoch": 1.4034582132564841, "grad_norm": 1.4582400127460118, "learning_rate": 4.4867591011606057e-07, "loss": 0.395770400762558, "step": 6088 }, { "epoch": 1.4036887608069164, "grad_norm": 1.6620585836074195, "learning_rate": 4.4835794447272446e-07, "loss": 0.5302882790565491, "step": 6089 }, { "epoch": 1.4039193083573487, "grad_norm": 1.8122041008088263, "learning_rate": 4.4804005897724084e-07, "loss": 0.5821331739425659, "step": 6090 }, { "epoch": 1.404149855907781, "grad_norm": 1.8462465057477913, "learning_rate": 4.477222536757943e-07, "loss": 0.518589973449707, "step": 6091 }, { "epoch": 1.4043804034582132, "grad_norm": 1.6347459739887822, "learning_rate": 4.47404528614559e-07, "loss": 0.4511559009552002, "step": 6092 }, { "epoch": 1.4046109510086455, "grad_norm": 1.3912951974967376, "learning_rate": 4.470868838396976e-07, "loss": 0.4637323021888733, "step": 6093 }, { "epoch": 1.4048414985590778, "grad_norm": 1.7391893367323972, "learning_rate": 4.467693193973602e-07, "loss": 0.5015200972557068, "step": 6094 }, { "epoch": 1.40507204610951, "grad_norm": 1.6600607094430424, "learning_rate": 4.4645183533368515e-07, "loss": 0.34618085622787476, "step": 6095 }, { "epoch": 1.4053025936599424, "grad_norm": 1.8174784404033468, "learning_rate": 4.4613443169480023e-07, "loss": 0.5129716396331787, "step": 6096 }, { "epoch": 1.4055331412103746, "grad_norm": 1.6451722890507783, "learning_rate": 4.458171085268204e-07, "loss": 0.48730310797691345, "step": 6097 }, { "epoch": 1.405763688760807, "grad_norm": 2.017135112557972, "learning_rate": 4.4549986587584996e-07, "loss": 0.4687865376472473, "step": 6098 }, { "epoch": 1.4059942363112392, "grad_norm": 1.7161898202788912, "learning_rate": 4.451827037879804e-07, "loss": 0.43602675199508667, "step": 6099 }, { "epoch": 1.4062247838616715, "grad_norm": 1.5398135343338488, "learning_rate": 4.448656223092926e-07, "loss": 0.40164947509765625, "step": 6100 }, { "epoch": 1.4064553314121038, "grad_norm": 1.892876938818121, "learning_rate": 4.4454862148585494e-07, "loss": 0.457908570766449, "step": 6101 }, { "epoch": 1.406685878962536, "grad_norm": 1.8597516649322154, "learning_rate": 4.442317013637239e-07, "loss": 0.4982607960700989, "step": 6102 }, { "epoch": 1.4069164265129683, "grad_norm": 1.686924482899156, "learning_rate": 4.439148619889451e-07, "loss": 0.4464913606643677, "step": 6103 }, { "epoch": 1.4071469740634006, "grad_norm": 1.4308914648810174, "learning_rate": 4.435981034075525e-07, "loss": 0.4235959053039551, "step": 6104 }, { "epoch": 1.407377521613833, "grad_norm": 1.6685279003688986, "learning_rate": 4.432814256655669e-07, "loss": 0.48098224401474, "step": 6105 }, { "epoch": 1.4076080691642652, "grad_norm": 1.8164478402131154, "learning_rate": 4.429648288089992e-07, "loss": 0.5259070992469788, "step": 6106 }, { "epoch": 1.4078386167146975, "grad_norm": 2.1051952106576706, "learning_rate": 4.426483128838471e-07, "loss": 0.43548309803009033, "step": 6107 }, { "epoch": 1.4080691642651297, "grad_norm": 2.1581744627863597, "learning_rate": 4.423318779360966e-07, "loss": 0.4666636288166046, "step": 6108 }, { "epoch": 1.408299711815562, "grad_norm": 1.7160181832394292, "learning_rate": 4.4201552401172346e-07, "loss": 0.48951369524002075, "step": 6109 }, { "epoch": 1.4085302593659943, "grad_norm": 2.086438615634531, "learning_rate": 4.416992511566897e-07, "loss": 0.5804085731506348, "step": 6110 }, { "epoch": 1.4087608069164266, "grad_norm": 1.5393560212970487, "learning_rate": 4.413830594169472e-07, "loss": 0.5000404119491577, "step": 6111 }, { "epoch": 1.4089913544668589, "grad_norm": 1.5396501247377221, "learning_rate": 4.410669488384347e-07, "loss": 0.4344940185546875, "step": 6112 }, { "epoch": 1.4092219020172911, "grad_norm": 1.9142144594904624, "learning_rate": 4.407509194670803e-07, "loss": 0.4730883836746216, "step": 6113 }, { "epoch": 1.4094524495677234, "grad_norm": 1.683589524108464, "learning_rate": 4.404349713487996e-07, "loss": 0.4169773459434509, "step": 6114 }, { "epoch": 1.4096829971181557, "grad_norm": 1.4212947387266628, "learning_rate": 4.401191045294962e-07, "loss": 0.38692671060562134, "step": 6115 }, { "epoch": 1.409913544668588, "grad_norm": 1.7880224033139351, "learning_rate": 4.398033190550625e-07, "loss": 0.4843568801879883, "step": 6116 }, { "epoch": 1.4101440922190203, "grad_norm": 1.8161229642449126, "learning_rate": 4.3948761497137945e-07, "loss": 0.3911912441253662, "step": 6117 }, { "epoch": 1.4103746397694525, "grad_norm": 1.3495609018309418, "learning_rate": 4.391719923243146e-07, "loss": 0.4209800958633423, "step": 6118 }, { "epoch": 1.4106051873198848, "grad_norm": 1.484656438743802, "learning_rate": 4.3885645115972536e-07, "loss": 0.5201029777526855, "step": 6119 }, { "epoch": 1.410835734870317, "grad_norm": 1.5580728922190459, "learning_rate": 4.3854099152345636e-07, "loss": 0.46659159660339355, "step": 6120 }, { "epoch": 1.4110662824207494, "grad_norm": 1.573073448587342, "learning_rate": 4.3822561346134025e-07, "loss": 0.3859689235687256, "step": 6121 }, { "epoch": 1.4112968299711817, "grad_norm": 1.3691662874620947, "learning_rate": 4.3791031701919876e-07, "loss": 0.525857150554657, "step": 6122 }, { "epoch": 1.411527377521614, "grad_norm": 1.8712349604235057, "learning_rate": 4.3759510224284056e-07, "loss": 0.4414822459220886, "step": 6123 }, { "epoch": 1.4117579250720462, "grad_norm": 1.6938210047217055, "learning_rate": 4.3727996917806353e-07, "loss": 0.45479732751846313, "step": 6124 }, { "epoch": 1.4119884726224785, "grad_norm": 1.6359082859481933, "learning_rate": 4.3696491787065337e-07, "loss": 0.44235748052597046, "step": 6125 }, { "epoch": 1.4122190201729108, "grad_norm": 1.4596228705783454, "learning_rate": 4.366499483663836e-07, "loss": 0.4811703562736511, "step": 6126 }, { "epoch": 1.412449567723343, "grad_norm": 1.9449964414371903, "learning_rate": 4.363350607110158e-07, "loss": 0.4828331172466278, "step": 6127 }, { "epoch": 1.4126801152737753, "grad_norm": 1.4685922441222679, "learning_rate": 4.360202549502997e-07, "loss": 0.5047401785850525, "step": 6128 }, { "epoch": 1.4129106628242076, "grad_norm": 1.6609285866380643, "learning_rate": 4.3570553112997357e-07, "loss": 0.3716857135295868, "step": 6129 }, { "epoch": 1.4131412103746397, "grad_norm": 1.633832477639241, "learning_rate": 4.353908892957638e-07, "loss": 0.4703335762023926, "step": 6130 }, { "epoch": 1.413371757925072, "grad_norm": 1.7488068800395162, "learning_rate": 4.350763294933841e-07, "loss": 0.463678240776062, "step": 6131 }, { "epoch": 1.4136023054755043, "grad_norm": 1.3006583528006206, "learning_rate": 4.347618517685373e-07, "loss": 0.41314953565597534, "step": 6132 }, { "epoch": 1.4138328530259365, "grad_norm": 1.5221749066271508, "learning_rate": 4.3444745616691325e-07, "loss": 0.46069851517677307, "step": 6133 }, { "epoch": 1.4140634005763688, "grad_norm": 1.4483737045255485, "learning_rate": 4.341331427341902e-07, "loss": 0.3713275194168091, "step": 6134 }, { "epoch": 1.414293948126801, "grad_norm": 2.2040413085700057, "learning_rate": 4.338189115160353e-07, "loss": 0.5063982605934143, "step": 6135 }, { "epoch": 1.4145244956772334, "grad_norm": 1.7310624628289812, "learning_rate": 4.335047625581023e-07, "loss": 0.5195713639259338, "step": 6136 }, { "epoch": 1.4147550432276657, "grad_norm": 2.027672927737903, "learning_rate": 4.331906959060342e-07, "loss": 0.48258209228515625, "step": 6137 }, { "epoch": 1.414985590778098, "grad_norm": 1.5880816797426516, "learning_rate": 4.3287671160546193e-07, "loss": 0.47651320695877075, "step": 6138 }, { "epoch": 1.4152161383285302, "grad_norm": 1.6724019808978872, "learning_rate": 4.325628097020038e-07, "loss": 0.39837729930877686, "step": 6139 }, { "epoch": 1.4154466858789625, "grad_norm": 1.6085191304764304, "learning_rate": 4.322489902412662e-07, "loss": 0.4356096386909485, "step": 6140 }, { "epoch": 1.4156772334293948, "grad_norm": 1.5428745920415274, "learning_rate": 4.3193525326884426e-07, "loss": 0.5047112703323364, "step": 6141 }, { "epoch": 1.415907780979827, "grad_norm": 1.8689144541994767, "learning_rate": 4.316215988303203e-07, "loss": 0.5051916241645813, "step": 6142 }, { "epoch": 1.4161383285302593, "grad_norm": 1.6542346539121724, "learning_rate": 4.313080269712658e-07, "loss": 0.4928128123283386, "step": 6143 }, { "epoch": 1.4163688760806916, "grad_norm": 1.7523545888738172, "learning_rate": 4.309945377372385e-07, "loss": 0.4253290891647339, "step": 6144 }, { "epoch": 1.416599423631124, "grad_norm": 1.5588360330776463, "learning_rate": 4.3068113117378603e-07, "loss": 0.4693288207054138, "step": 6145 }, { "epoch": 1.4168299711815562, "grad_norm": 1.7376987458736854, "learning_rate": 4.3036780732644273e-07, "loss": 0.3377845287322998, "step": 6146 }, { "epoch": 1.4170605187319885, "grad_norm": 1.5961011734430122, "learning_rate": 4.300545662407309e-07, "loss": 0.44026291370391846, "step": 6147 }, { "epoch": 1.4172910662824207, "grad_norm": 1.6678026768298122, "learning_rate": 4.297414079621615e-07, "loss": 0.4033926725387573, "step": 6148 }, { "epoch": 1.417521613832853, "grad_norm": 1.591044742689737, "learning_rate": 4.2942833253623357e-07, "loss": 0.4513046145439148, "step": 6149 }, { "epoch": 1.4177521613832853, "grad_norm": 1.7764371951105735, "learning_rate": 4.2911534000843306e-07, "loss": 0.5358277559280396, "step": 6150 }, { "epoch": 1.4179827089337176, "grad_norm": 1.5239739695025156, "learning_rate": 4.2880243042423524e-07, "loss": 0.47580230236053467, "step": 6151 }, { "epoch": 1.4182132564841499, "grad_norm": 1.557650739221615, "learning_rate": 4.2848960382910225e-07, "loss": 0.4727135896682739, "step": 6152 }, { "epoch": 1.4184438040345821, "grad_norm": 1.71869847258335, "learning_rate": 4.281768602684841e-07, "loss": 0.4576184153556824, "step": 6153 }, { "epoch": 1.4186743515850144, "grad_norm": 1.8118055618452378, "learning_rate": 4.2786419978782006e-07, "loss": 0.4298781156539917, "step": 6154 }, { "epoch": 1.4189048991354467, "grad_norm": 1.3541799773484497, "learning_rate": 4.275516224325355e-07, "loss": 0.42015182971954346, "step": 6155 }, { "epoch": 1.419135446685879, "grad_norm": 1.8522934176845671, "learning_rate": 4.272391282480455e-07, "loss": 0.5046502351760864, "step": 6156 }, { "epoch": 1.4193659942363113, "grad_norm": 1.7317433079809288, "learning_rate": 4.2692671727975193e-07, "loss": 0.46464890241622925, "step": 6157 }, { "epoch": 1.4195965417867435, "grad_norm": 1.9285455657348056, "learning_rate": 4.266143895730444e-07, "loss": 0.456853449344635, "step": 6158 }, { "epoch": 1.4198270893371758, "grad_norm": 1.5730881793585256, "learning_rate": 4.2630214517330167e-07, "loss": 0.5618214011192322, "step": 6159 }, { "epoch": 1.420057636887608, "grad_norm": 1.8422559070497995, "learning_rate": 4.259899841258887e-07, "loss": 0.47811365127563477, "step": 6160 }, { "epoch": 1.4202881844380404, "grad_norm": 1.8468838919820687, "learning_rate": 4.2567790647615974e-07, "loss": 0.40685200691223145, "step": 6161 }, { "epoch": 1.4205187319884727, "grad_norm": 1.9439131357752444, "learning_rate": 4.2536591226945685e-07, "loss": 0.4835454821586609, "step": 6162 }, { "epoch": 1.420749279538905, "grad_norm": 1.5657796902096128, "learning_rate": 4.2505400155110904e-07, "loss": 0.46967822313308716, "step": 6163 }, { "epoch": 1.4209798270893372, "grad_norm": 1.4756380896942416, "learning_rate": 4.247421743664339e-07, "loss": 0.41301921010017395, "step": 6164 }, { "epoch": 1.4212103746397695, "grad_norm": 1.567339455776478, "learning_rate": 4.2443043076073603e-07, "loss": 0.4748044013977051, "step": 6165 }, { "epoch": 1.4214409221902018, "grad_norm": 1.7509442825646346, "learning_rate": 4.24118770779309e-07, "loss": 0.5216407775878906, "step": 6166 }, { "epoch": 1.421671469740634, "grad_norm": 1.3159416230441767, "learning_rate": 4.238071944674343e-07, "loss": 0.4520043730735779, "step": 6167 }, { "epoch": 1.4219020172910664, "grad_norm": 1.5360413927774947, "learning_rate": 4.2349570187037985e-07, "loss": 0.3486665189266205, "step": 6168 }, { "epoch": 1.4221325648414986, "grad_norm": 1.5354184007061404, "learning_rate": 4.2318429303340297e-07, "loss": 0.4190082550048828, "step": 6169 }, { "epoch": 1.422363112391931, "grad_norm": 1.7686291285808324, "learning_rate": 4.228729680017479e-07, "loss": 0.4878532290458679, "step": 6170 }, { "epoch": 1.422593659942363, "grad_norm": 1.5021962460950842, "learning_rate": 4.225617268206464e-07, "loss": 0.4264869689941406, "step": 6171 }, { "epoch": 1.4228242074927953, "grad_norm": 1.5869703714522394, "learning_rate": 4.2225056953531933e-07, "loss": 0.47751015424728394, "step": 6172 }, { "epoch": 1.4230547550432275, "grad_norm": 1.9078138295934728, "learning_rate": 4.21939496190974e-07, "loss": 0.45385488867759705, "step": 6173 }, { "epoch": 1.4232853025936598, "grad_norm": 1.5958951589441288, "learning_rate": 4.216285068328065e-07, "loss": 0.4519824981689453, "step": 6174 }, { "epoch": 1.423515850144092, "grad_norm": 1.5192412548501215, "learning_rate": 4.213176015060006e-07, "loss": 0.49791768193244934, "step": 6175 }, { "epoch": 1.4237463976945244, "grad_norm": 1.527247990921823, "learning_rate": 4.2100678025572724e-07, "loss": 0.44258758425712585, "step": 6176 }, { "epoch": 1.4239769452449567, "grad_norm": 1.6937889737807723, "learning_rate": 4.2069604312714525e-07, "loss": 0.4176792502403259, "step": 6177 }, { "epoch": 1.424207492795389, "grad_norm": 1.8648960890447026, "learning_rate": 4.203853901654021e-07, "loss": 0.4810779392719269, "step": 6178 }, { "epoch": 1.4244380403458212, "grad_norm": 1.6056895640711444, "learning_rate": 4.2007482141563186e-07, "loss": 0.5062845945358276, "step": 6179 }, { "epoch": 1.4246685878962535, "grad_norm": 1.6213901133636355, "learning_rate": 4.1976433692295754e-07, "loss": 0.4448728561401367, "step": 6180 }, { "epoch": 1.4248991354466858, "grad_norm": 1.4307330121028476, "learning_rate": 4.1945393673248873e-07, "loss": 0.47862520813941956, "step": 6181 }, { "epoch": 1.425129682997118, "grad_norm": 1.703952709162038, "learning_rate": 4.1914362088932386e-07, "loss": 0.48513489961624146, "step": 6182 }, { "epoch": 1.4253602305475503, "grad_norm": 2.0858372404714314, "learning_rate": 4.188333894385484e-07, "loss": 0.5556698441505432, "step": 6183 }, { "epoch": 1.4255907780979826, "grad_norm": 1.5697295606732804, "learning_rate": 4.185232424252353e-07, "loss": 0.5382585525512695, "step": 6184 }, { "epoch": 1.425821325648415, "grad_norm": 1.834044011941015, "learning_rate": 4.182131798944462e-07, "loss": 0.5234952569007874, "step": 6185 }, { "epoch": 1.4260518731988472, "grad_norm": 2.054448938324408, "learning_rate": 4.179032018912301e-07, "loss": 0.44383174180984497, "step": 6186 }, { "epoch": 1.4262824207492795, "grad_norm": 1.6371314228420715, "learning_rate": 4.1759330846062303e-07, "loss": 0.47934067249298096, "step": 6187 }, { "epoch": 1.4265129682997117, "grad_norm": 1.5588160900794392, "learning_rate": 4.1728349964764984e-07, "loss": 0.5123411417007446, "step": 6188 }, { "epoch": 1.426743515850144, "grad_norm": 1.4601611877162428, "learning_rate": 4.1697377549732236e-07, "loss": 0.3838074803352356, "step": 6189 }, { "epoch": 1.4269740634005763, "grad_norm": 1.4713344422959787, "learning_rate": 4.166641360546399e-07, "loss": 0.3905826210975647, "step": 6190 }, { "epoch": 1.4272046109510086, "grad_norm": 1.6243224441884077, "learning_rate": 4.1635458136459044e-07, "loss": 0.4994160830974579, "step": 6191 }, { "epoch": 1.4274351585014409, "grad_norm": 1.4096266963123292, "learning_rate": 4.1604511147214836e-07, "loss": 0.43674254417419434, "step": 6192 }, { "epoch": 1.4276657060518732, "grad_norm": 1.5219590055657222, "learning_rate": 4.1573572642227694e-07, "loss": 0.46856212615966797, "step": 6193 }, { "epoch": 1.4278962536023054, "grad_norm": 1.5309187370305974, "learning_rate": 4.1542642625992674e-07, "loss": 0.4608234167098999, "step": 6194 }, { "epoch": 1.4281268011527377, "grad_norm": 1.4332212544399399, "learning_rate": 4.151172110300356e-07, "loss": 0.4033564329147339, "step": 6195 }, { "epoch": 1.42835734870317, "grad_norm": 1.6166843007495713, "learning_rate": 4.1480808077752936e-07, "loss": 0.4321993589401245, "step": 6196 }, { "epoch": 1.4285878962536023, "grad_norm": 1.4833337580993968, "learning_rate": 4.1449903554732104e-07, "loss": 0.40581172704696655, "step": 6197 }, { "epoch": 1.4288184438040346, "grad_norm": 1.8252750599229672, "learning_rate": 4.1419007538431186e-07, "loss": 0.4893447160720825, "step": 6198 }, { "epoch": 1.4290489913544668, "grad_norm": 1.5117375099736445, "learning_rate": 4.138812003333911e-07, "loss": 0.43434032797813416, "step": 6199 }, { "epoch": 1.4292795389048991, "grad_norm": 1.806419884918517, "learning_rate": 4.1357241043943426e-07, "loss": 0.5144214630126953, "step": 6200 }, { "epoch": 1.4295100864553314, "grad_norm": 1.5993245565697396, "learning_rate": 4.132637057473062e-07, "loss": 0.44391340017318726, "step": 6201 }, { "epoch": 1.4297406340057637, "grad_norm": 1.4128063462984315, "learning_rate": 4.1295508630185785e-07, "loss": 0.41327035427093506, "step": 6202 }, { "epoch": 1.429971181556196, "grad_norm": 1.4869234266710627, "learning_rate": 4.126465521479282e-07, "loss": 0.4482381343841553, "step": 6203 }, { "epoch": 1.4302017291066282, "grad_norm": 1.7027878810602206, "learning_rate": 4.123381033303448e-07, "loss": 0.6047927737236023, "step": 6204 }, { "epoch": 1.4304322766570605, "grad_norm": 1.842833087774749, "learning_rate": 4.1202973989392133e-07, "loss": 0.5015072822570801, "step": 6205 }, { "epoch": 1.4306628242074928, "grad_norm": 1.700957541882254, "learning_rate": 4.117214618834601e-07, "loss": 0.38783231377601624, "step": 6206 }, { "epoch": 1.430893371757925, "grad_norm": 1.838628795440048, "learning_rate": 4.1141326934375107e-07, "loss": 0.5287540555000305, "step": 6207 }, { "epoch": 1.4311239193083574, "grad_norm": 1.5517084366812568, "learning_rate": 4.1110516231957103e-07, "loss": 0.475554883480072, "step": 6208 }, { "epoch": 1.4313544668587896, "grad_norm": 1.6639045521148432, "learning_rate": 4.1079714085568486e-07, "loss": 0.5235040187835693, "step": 6209 }, { "epoch": 1.431585014409222, "grad_norm": 1.9325689770095837, "learning_rate": 4.1048920499684427e-07, "loss": 0.45399802923202515, "step": 6210 }, { "epoch": 1.4318155619596542, "grad_norm": 1.3260149665798413, "learning_rate": 4.101813547877897e-07, "loss": 0.4158547520637512, "step": 6211 }, { "epoch": 1.4320461095100865, "grad_norm": 1.558704413544226, "learning_rate": 4.0987359027324886e-07, "loss": 0.41257238388061523, "step": 6212 }, { "epoch": 1.4322766570605188, "grad_norm": 1.891001021238512, "learning_rate": 4.0956591149793607e-07, "loss": 0.5313294529914856, "step": 6213 }, { "epoch": 1.432507204610951, "grad_norm": 1.2595979569069053, "learning_rate": 4.0925831850655444e-07, "loss": 0.4157707095146179, "step": 6214 }, { "epoch": 1.4327377521613833, "grad_norm": 1.2040642494272713, "learning_rate": 4.0895081134379394e-07, "loss": 0.4087299704551697, "step": 6215 }, { "epoch": 1.4329682997118156, "grad_norm": 1.7148771732386008, "learning_rate": 4.0864339005433145e-07, "loss": 0.3862289488315582, "step": 6216 }, { "epoch": 1.4331988472622479, "grad_norm": 1.9798056573423377, "learning_rate": 4.0833605468283317e-07, "loss": 0.523725152015686, "step": 6217 }, { "epoch": 1.4334293948126802, "grad_norm": 1.4407692345614906, "learning_rate": 4.0802880527395067e-07, "loss": 0.49756765365600586, "step": 6218 }, { "epoch": 1.4336599423631124, "grad_norm": 1.367214788762715, "learning_rate": 4.077216418723246e-07, "loss": 0.46542513370513916, "step": 6219 }, { "epoch": 1.4338904899135447, "grad_norm": 1.4834393486713002, "learning_rate": 4.0741456452258307e-07, "loss": 0.477664589881897, "step": 6220 }, { "epoch": 1.434121037463977, "grad_norm": 1.7561511956990303, "learning_rate": 4.0710757326934074e-07, "loss": 0.43805643916130066, "step": 6221 }, { "epoch": 1.4343515850144093, "grad_norm": 1.4656378770889396, "learning_rate": 4.0680066815719995e-07, "loss": 0.42762941122055054, "step": 6222 }, { "epoch": 1.4345821325648416, "grad_norm": 1.3924764249205592, "learning_rate": 4.064938492307515e-07, "loss": 0.39931389689445496, "step": 6223 }, { "epoch": 1.4348126801152739, "grad_norm": 1.7265894992285387, "learning_rate": 4.0618711653457216e-07, "loss": 0.4285869002342224, "step": 6224 }, { "epoch": 1.4350432276657061, "grad_norm": 1.509947149065422, "learning_rate": 4.05880470113228e-07, "loss": 0.36583176255226135, "step": 6225 }, { "epoch": 1.4352737752161384, "grad_norm": 1.8527675270132944, "learning_rate": 4.055739100112705e-07, "loss": 0.6161515712738037, "step": 6226 }, { "epoch": 1.4355043227665707, "grad_norm": 1.952912084401196, "learning_rate": 4.0526743627324064e-07, "loss": 0.5286136269569397, "step": 6227 }, { "epoch": 1.435734870317003, "grad_norm": 1.5581410654741574, "learning_rate": 4.0496104894366547e-07, "loss": 0.5855327844619751, "step": 6228 }, { "epoch": 1.4359654178674353, "grad_norm": 1.4944372815532387, "learning_rate": 4.0465474806705937e-07, "loss": 0.47179684042930603, "step": 6229 }, { "epoch": 1.4361959654178675, "grad_norm": 1.4543699646458046, "learning_rate": 4.043485336879252e-07, "loss": 0.3799450993537903, "step": 6230 }, { "epoch": 1.4364265129682998, "grad_norm": 1.6053005008404755, "learning_rate": 4.040424058507529e-07, "loss": 0.49765193462371826, "step": 6231 }, { "epoch": 1.436657060518732, "grad_norm": 1.729005204741036, "learning_rate": 4.0373636460001917e-07, "loss": 0.4718540906906128, "step": 6232 }, { "epoch": 1.4368876080691644, "grad_norm": 1.6500825373448902, "learning_rate": 4.034304099801891e-07, "loss": 0.566913366317749, "step": 6233 }, { "epoch": 1.4371181556195967, "grad_norm": 1.4341252322387557, "learning_rate": 4.0312454203571455e-07, "loss": 0.3900049924850464, "step": 6234 }, { "epoch": 1.437348703170029, "grad_norm": 1.955668386191478, "learning_rate": 4.028187608110345e-07, "loss": 0.3742540180683136, "step": 6235 }, { "epoch": 1.4375792507204612, "grad_norm": 1.5530650034379911, "learning_rate": 4.025130663505765e-07, "loss": 0.4583272933959961, "step": 6236 }, { "epoch": 1.4378097982708935, "grad_norm": 2.011266106765332, "learning_rate": 4.0220745869875394e-07, "loss": 0.4786511957645416, "step": 6237 }, { "epoch": 1.4380403458213258, "grad_norm": 1.5855947294064638, "learning_rate": 4.0190193789996907e-07, "loss": 0.5637160539627075, "step": 6238 }, { "epoch": 1.438270893371758, "grad_norm": 1.6000199656569591, "learning_rate": 4.015965039986111e-07, "loss": 0.5228704214096069, "step": 6239 }, { "epoch": 1.4385014409221901, "grad_norm": 1.5727211756275186, "learning_rate": 4.012911570390559e-07, "loss": 0.4603196084499359, "step": 6240 }, { "epoch": 1.4387319884726224, "grad_norm": 1.7564085712912887, "learning_rate": 4.0098589706566743e-07, "loss": 0.4373057186603546, "step": 6241 }, { "epoch": 1.4389625360230547, "grad_norm": 1.3059861405345108, "learning_rate": 4.006807241227964e-07, "loss": 0.3848613500595093, "step": 6242 }, { "epoch": 1.439193083573487, "grad_norm": 1.773866478707716, "learning_rate": 4.0037563825478147e-07, "loss": 0.45864033699035645, "step": 6243 }, { "epoch": 1.4394236311239192, "grad_norm": 1.7833304023969065, "learning_rate": 4.0007063950594887e-07, "loss": 0.5278322696685791, "step": 6244 }, { "epoch": 1.4396541786743515, "grad_norm": 1.652601767877407, "learning_rate": 3.9976572792061115e-07, "loss": 0.500469982624054, "step": 6245 }, { "epoch": 1.4398847262247838, "grad_norm": 1.6838341724119685, "learning_rate": 3.994609035430694e-07, "loss": 0.487979531288147, "step": 6246 }, { "epoch": 1.440115273775216, "grad_norm": 1.6309708233356133, "learning_rate": 3.9915616641761096e-07, "loss": 0.5660527944564819, "step": 6247 }, { "epoch": 1.4403458213256484, "grad_norm": 1.6648921828279533, "learning_rate": 3.988515165885108e-07, "loss": 0.41596394777297974, "step": 6248 }, { "epoch": 1.4405763688760806, "grad_norm": 1.8488691582565622, "learning_rate": 3.9854695410003204e-07, "loss": 0.41838061809539795, "step": 6249 }, { "epoch": 1.440806916426513, "grad_norm": 1.8635370267148315, "learning_rate": 3.982424789964237e-07, "loss": 0.4344887137413025, "step": 6250 }, { "epoch": 1.4410374639769452, "grad_norm": 1.596955676535653, "learning_rate": 3.979380913219231e-07, "loss": 0.44062328338623047, "step": 6251 }, { "epoch": 1.4412680115273775, "grad_norm": 1.8121811553129317, "learning_rate": 3.976337911207552e-07, "loss": 0.4305090308189392, "step": 6252 }, { "epoch": 1.4414985590778098, "grad_norm": 1.5838295963307605, "learning_rate": 3.9732957843713113e-07, "loss": 0.502083957195282, "step": 6253 }, { "epoch": 1.441729106628242, "grad_norm": 1.427531595240151, "learning_rate": 3.9702545331524986e-07, "loss": 0.47407883405685425, "step": 6254 }, { "epoch": 1.4419596541786743, "grad_norm": 1.7242438620668297, "learning_rate": 3.967214157992972e-07, "loss": 0.4704144597053528, "step": 6255 }, { "epoch": 1.4421902017291066, "grad_norm": 1.451462667384279, "learning_rate": 3.9641746593344705e-07, "loss": 0.4758627116680145, "step": 6256 }, { "epoch": 1.442420749279539, "grad_norm": 1.5373451481480342, "learning_rate": 3.961136037618605e-07, "loss": 0.4934813976287842, "step": 6257 }, { "epoch": 1.4426512968299712, "grad_norm": 1.2690471851311347, "learning_rate": 3.958098293286849e-07, "loss": 0.4143943190574646, "step": 6258 }, { "epoch": 1.4428818443804035, "grad_norm": 1.654088578407426, "learning_rate": 3.9550614267805613e-07, "loss": 0.5096204876899719, "step": 6259 }, { "epoch": 1.4431123919308357, "grad_norm": 1.8140254863740195, "learning_rate": 3.9520254385409647e-07, "loss": 0.5065678358078003, "step": 6260 }, { "epoch": 1.443342939481268, "grad_norm": 1.8200536333409338, "learning_rate": 3.948990329009152e-07, "loss": 0.5405898094177246, "step": 6261 }, { "epoch": 1.4435734870317003, "grad_norm": 1.414774723719948, "learning_rate": 3.945956098626101e-07, "loss": 0.555136501789093, "step": 6262 }, { "epoch": 1.4438040345821326, "grad_norm": 1.4749429352123176, "learning_rate": 3.9429227478326466e-07, "loss": 0.4158381223678589, "step": 6263 }, { "epoch": 1.4440345821325649, "grad_norm": 1.8063967161180363, "learning_rate": 3.9398902770695065e-07, "loss": 0.5013213157653809, "step": 6264 }, { "epoch": 1.4442651296829971, "grad_norm": 1.6664196354264296, "learning_rate": 3.936858686777269e-07, "loss": 0.4610293209552765, "step": 6265 }, { "epoch": 1.4444956772334294, "grad_norm": 1.5189284441427042, "learning_rate": 3.933827977396392e-07, "loss": 0.4541108310222626, "step": 6266 }, { "epoch": 1.4447262247838617, "grad_norm": 1.7447582201728986, "learning_rate": 3.9307981493672017e-07, "loss": 0.5167892575263977, "step": 6267 }, { "epoch": 1.444956772334294, "grad_norm": 1.7117416076745173, "learning_rate": 3.927769203129907e-07, "loss": 0.49022093415260315, "step": 6268 }, { "epoch": 1.4451873198847263, "grad_norm": 1.551473147802093, "learning_rate": 3.924741139124574e-07, "loss": 0.5062391757965088, "step": 6269 }, { "epoch": 1.4454178674351585, "grad_norm": 1.4758917732598034, "learning_rate": 3.9217139577911586e-07, "loss": 0.38461071252822876, "step": 6270 }, { "epoch": 1.4456484149855908, "grad_norm": 1.6377148611250167, "learning_rate": 3.9186876595694706e-07, "loss": 0.48669880628585815, "step": 6271 }, { "epoch": 1.445878962536023, "grad_norm": 1.560585881225336, "learning_rate": 3.915662244899206e-07, "loss": 0.4502665400505066, "step": 6272 }, { "epoch": 1.4461095100864554, "grad_norm": 1.8121212441561765, "learning_rate": 3.912637714219923e-07, "loss": 0.46156373620033264, "step": 6273 }, { "epoch": 1.4463400576368877, "grad_norm": 1.6312679824277865, "learning_rate": 3.909614067971051e-07, "loss": 0.45263969898223877, "step": 6274 }, { "epoch": 1.44657060518732, "grad_norm": 1.5703236593794554, "learning_rate": 3.906591306591899e-07, "loss": 0.4911407232284546, "step": 6275 }, { "epoch": 1.4468011527377522, "grad_norm": 1.6102563776848093, "learning_rate": 3.903569430521644e-07, "loss": 0.4695231318473816, "step": 6276 }, { "epoch": 1.4470317002881845, "grad_norm": 1.6730607470278018, "learning_rate": 3.9005484401993314e-07, "loss": 0.43057340383529663, "step": 6277 }, { "epoch": 1.4472622478386168, "grad_norm": 1.7564179291630202, "learning_rate": 3.897528336063879e-07, "loss": 0.49983319640159607, "step": 6278 }, { "epoch": 1.447492795389049, "grad_norm": 1.5495814925532476, "learning_rate": 3.8945091185540725e-07, "loss": 0.46399611234664917, "step": 6279 }, { "epoch": 1.4477233429394814, "grad_norm": 1.6622842234199082, "learning_rate": 3.891490788108578e-07, "loss": 0.44408589601516724, "step": 6280 }, { "epoch": 1.4479538904899134, "grad_norm": 1.922048473357172, "learning_rate": 3.888473345165929e-07, "loss": 0.3725231885910034, "step": 6281 }, { "epoch": 1.4481844380403457, "grad_norm": 1.7108395707282047, "learning_rate": 3.885456790164523e-07, "loss": 0.4412611722946167, "step": 6282 }, { "epoch": 1.448414985590778, "grad_norm": 1.5115694649774922, "learning_rate": 3.8824411235426404e-07, "loss": 0.48282700777053833, "step": 6283 }, { "epoch": 1.4486455331412103, "grad_norm": 1.4372123625640567, "learning_rate": 3.8794263457384226e-07, "loss": 0.443182110786438, "step": 6284 }, { "epoch": 1.4488760806916425, "grad_norm": 1.6465099767718465, "learning_rate": 3.8764124571898826e-07, "loss": 0.5350714325904846, "step": 6285 }, { "epoch": 1.4491066282420748, "grad_norm": 1.621326131358088, "learning_rate": 3.8733994583349136e-07, "loss": 0.48168644309043884, "step": 6286 }, { "epoch": 1.449337175792507, "grad_norm": 2.0377729139688197, "learning_rate": 3.870387349611266e-07, "loss": 0.5208690166473389, "step": 6287 }, { "epoch": 1.4495677233429394, "grad_norm": 1.5878874835937575, "learning_rate": 3.867376131456571e-07, "loss": 0.39023950695991516, "step": 6288 }, { "epoch": 1.4497982708933717, "grad_norm": 1.6699418552277543, "learning_rate": 3.864365804308333e-07, "loss": 0.5270309448242188, "step": 6289 }, { "epoch": 1.450028818443804, "grad_norm": 1.7984598642312444, "learning_rate": 3.861356368603914e-07, "loss": 0.46370822191238403, "step": 6290 }, { "epoch": 1.4502593659942362, "grad_norm": 1.629232628741741, "learning_rate": 3.8583478247805554e-07, "loss": 0.4571373462677002, "step": 6291 }, { "epoch": 1.4504899135446685, "grad_norm": 1.8809560181882357, "learning_rate": 3.855340173275365e-07, "loss": 0.5077657103538513, "step": 6292 }, { "epoch": 1.4507204610951008, "grad_norm": 1.4749726509433596, "learning_rate": 3.852333414525326e-07, "loss": 0.47071346640586853, "step": 6293 }, { "epoch": 1.450951008645533, "grad_norm": 1.6663865831998346, "learning_rate": 3.8493275489672914e-07, "loss": 0.5249844789505005, "step": 6294 }, { "epoch": 1.4511815561959653, "grad_norm": 1.6297516532596577, "learning_rate": 3.846322577037977e-07, "loss": 0.4389895796775818, "step": 6295 }, { "epoch": 1.4514121037463976, "grad_norm": 1.9107463185916174, "learning_rate": 3.8433184991739797e-07, "loss": 0.47682899236679077, "step": 6296 }, { "epoch": 1.45164265129683, "grad_norm": 1.583134869267576, "learning_rate": 3.8403153158117585e-07, "loss": 0.5012357831001282, "step": 6297 }, { "epoch": 1.4518731988472622, "grad_norm": 1.7933259402091364, "learning_rate": 3.83731302738764e-07, "loss": 0.4444109797477722, "step": 6298 }, { "epoch": 1.4521037463976945, "grad_norm": 1.6273839601687068, "learning_rate": 3.8343116343378333e-07, "loss": 0.401306688785553, "step": 6299 }, { "epoch": 1.4523342939481267, "grad_norm": 1.5929632009179213, "learning_rate": 3.831311137098402e-07, "loss": 0.44299593567848206, "step": 6300 }, { "epoch": 1.452564841498559, "grad_norm": 1.5423692013544683, "learning_rate": 3.828311536105291e-07, "loss": 0.4148893654346466, "step": 6301 }, { "epoch": 1.4527953890489913, "grad_norm": 1.8002510409281733, "learning_rate": 3.825312831794314e-07, "loss": 0.4563109278678894, "step": 6302 }, { "epoch": 1.4530259365994236, "grad_norm": 1.6337296475781216, "learning_rate": 3.822315024601149e-07, "loss": 0.4540612995624542, "step": 6303 }, { "epoch": 1.4532564841498559, "grad_norm": 1.7199257308385885, "learning_rate": 3.819318114961343e-07, "loss": 0.4511542320251465, "step": 6304 }, { "epoch": 1.4534870317002881, "grad_norm": 1.8322950511376024, "learning_rate": 3.816322103310321e-07, "loss": 0.47727471590042114, "step": 6305 }, { "epoch": 1.4537175792507204, "grad_norm": 1.674326740334363, "learning_rate": 3.8133269900833664e-07, "loss": 0.4711531400680542, "step": 6306 }, { "epoch": 1.4539481268011527, "grad_norm": 1.4365052516515433, "learning_rate": 3.8103327757156454e-07, "loss": 0.4542125463485718, "step": 6307 }, { "epoch": 1.454178674351585, "grad_norm": 1.7375177208970245, "learning_rate": 3.807339460642178e-07, "loss": 0.5262948274612427, "step": 6308 }, { "epoch": 1.4544092219020173, "grad_norm": 1.4962137087496394, "learning_rate": 3.804347045297871e-07, "loss": 0.5131025314331055, "step": 6309 }, { "epoch": 1.4546397694524495, "grad_norm": 1.8610634194791733, "learning_rate": 3.801355530117485e-07, "loss": 0.41658881306648254, "step": 6310 }, { "epoch": 1.4548703170028818, "grad_norm": 1.7539949314322378, "learning_rate": 3.7983649155356533e-07, "loss": 0.4876325726509094, "step": 6311 }, { "epoch": 1.455100864553314, "grad_norm": 1.5534286551953016, "learning_rate": 3.7953752019868865e-07, "loss": 0.4055835008621216, "step": 6312 }, { "epoch": 1.4553314121037464, "grad_norm": 1.4355176124522515, "learning_rate": 3.79238638990556e-07, "loss": 0.45018666982650757, "step": 6313 }, { "epoch": 1.4555619596541787, "grad_norm": 2.33610396723976, "learning_rate": 3.7893984797259113e-07, "loss": 0.5295370817184448, "step": 6314 }, { "epoch": 1.455792507204611, "grad_norm": 1.6012578754804487, "learning_rate": 3.7864114718820594e-07, "loss": 0.44755294919013977, "step": 6315 }, { "epoch": 1.4560230547550432, "grad_norm": 1.4087585915017262, "learning_rate": 3.783425366807982e-07, "loss": 0.3790748119354248, "step": 6316 }, { "epoch": 1.4562536023054755, "grad_norm": 1.4371137527160003, "learning_rate": 3.780440164937525e-07, "loss": 0.44833457469940186, "step": 6317 }, { "epoch": 1.4564841498559078, "grad_norm": 1.5127457638273085, "learning_rate": 3.7774558667044154e-07, "loss": 0.486098051071167, "step": 6318 }, { "epoch": 1.45671469740634, "grad_norm": 1.6312165307012176, "learning_rate": 3.774472472542233e-07, "loss": 0.5650969743728638, "step": 6319 }, { "epoch": 1.4569452449567724, "grad_norm": 1.576011420615042, "learning_rate": 3.771489982884437e-07, "loss": 0.5329450368881226, "step": 6320 }, { "epoch": 1.4571757925072046, "grad_norm": 1.275904497817475, "learning_rate": 3.768508398164356e-07, "loss": 0.4540603756904602, "step": 6321 }, { "epoch": 1.457406340057637, "grad_norm": 1.7203841934704942, "learning_rate": 3.765527718815181e-07, "loss": 0.4849478304386139, "step": 6322 }, { "epoch": 1.4576368876080692, "grad_norm": 1.8368644261234324, "learning_rate": 3.7625479452699714e-07, "loss": 0.5025255680084229, "step": 6323 }, { "epoch": 1.4578674351585015, "grad_norm": 1.5544842869111484, "learning_rate": 3.7595690779616554e-07, "loss": 0.4498249888420105, "step": 6324 }, { "epoch": 1.4580979827089338, "grad_norm": 1.4645542423324267, "learning_rate": 3.7565911173230347e-07, "loss": 0.43486863374710083, "step": 6325 }, { "epoch": 1.458328530259366, "grad_norm": 1.4567650293051104, "learning_rate": 3.7536140637867784e-07, "loss": 0.45068681240081787, "step": 6326 }, { "epoch": 1.4585590778097983, "grad_norm": 1.640676579677221, "learning_rate": 3.750637917785415e-07, "loss": 0.48379603028297424, "step": 6327 }, { "epoch": 1.4587896253602306, "grad_norm": 1.5536049420371754, "learning_rate": 3.7476626797513564e-07, "loss": 0.4718289375305176, "step": 6328 }, { "epoch": 1.4590201729106629, "grad_norm": 1.4598459872980478, "learning_rate": 3.744688350116868e-07, "loss": 0.4820924997329712, "step": 6329 }, { "epoch": 1.4592507204610952, "grad_norm": 1.4067339533366963, "learning_rate": 3.741714929314086e-07, "loss": 0.4419173002243042, "step": 6330 }, { "epoch": 1.4594812680115274, "grad_norm": 1.5713519098289923, "learning_rate": 3.7387424177750237e-07, "loss": 0.49854522943496704, "step": 6331 }, { "epoch": 1.4597118155619597, "grad_norm": 1.5875195079981788, "learning_rate": 3.7357708159315514e-07, "loss": 0.48068612813949585, "step": 6332 }, { "epoch": 1.459942363112392, "grad_norm": 1.7026609781525415, "learning_rate": 3.732800124215414e-07, "loss": 0.511599063873291, "step": 6333 }, { "epoch": 1.4601729106628243, "grad_norm": 1.4797586406170442, "learning_rate": 3.7298303430582245e-07, "loss": 0.5052134990692139, "step": 6334 }, { "epoch": 1.4604034582132566, "grad_norm": 1.7667173800076938, "learning_rate": 3.7268614728914606e-07, "loss": 0.4742302894592285, "step": 6335 }, { "epoch": 1.4606340057636888, "grad_norm": 1.403636658794488, "learning_rate": 3.7238935141464644e-07, "loss": 0.5089839696884155, "step": 6336 }, { "epoch": 1.4608645533141211, "grad_norm": 2.0880344022264437, "learning_rate": 3.720926467254449e-07, "loss": 0.5136945247650146, "step": 6337 }, { "epoch": 1.4610951008645534, "grad_norm": 1.5636116190285216, "learning_rate": 3.7179603326464993e-07, "loss": 0.3947451710700989, "step": 6338 }, { "epoch": 1.4613256484149857, "grad_norm": 1.7992854560678408, "learning_rate": 3.714995110753565e-07, "loss": 0.48703646659851074, "step": 6339 }, { "epoch": 1.461556195965418, "grad_norm": 1.6901577747405614, "learning_rate": 3.712030802006455e-07, "loss": 0.4713754951953888, "step": 6340 }, { "epoch": 1.4617867435158503, "grad_norm": 1.47391484527644, "learning_rate": 3.709067406835862e-07, "loss": 0.4496157169342041, "step": 6341 }, { "epoch": 1.4620172910662825, "grad_norm": 1.7559130114451649, "learning_rate": 3.706104925672331e-07, "loss": 0.5094351768493652, "step": 6342 }, { "epoch": 1.4622478386167148, "grad_norm": 1.274246430476679, "learning_rate": 3.7031433589462766e-07, "loss": 0.46877321600914, "step": 6343 }, { "epoch": 1.462478386167147, "grad_norm": 1.9260152638351333, "learning_rate": 3.700182707087991e-07, "loss": 0.3847515881061554, "step": 6344 }, { "epoch": 1.4627089337175794, "grad_norm": 1.453999915283592, "learning_rate": 3.697222970527618e-07, "loss": 0.5548876523971558, "step": 6345 }, { "epoch": 1.4629394812680117, "grad_norm": 1.4413433555035606, "learning_rate": 3.694264149695182e-07, "loss": 0.47204387187957764, "step": 6346 }, { "epoch": 1.463170028818444, "grad_norm": 1.6869492340215804, "learning_rate": 3.6913062450205714e-07, "loss": 0.47992634773254395, "step": 6347 }, { "epoch": 1.4634005763688762, "grad_norm": 1.5705466032049853, "learning_rate": 3.688349256933534e-07, "loss": 0.5625392198562622, "step": 6348 }, { "epoch": 1.4636311239193083, "grad_norm": 1.4260083123464304, "learning_rate": 3.685393185863689e-07, "loss": 0.3677716851234436, "step": 6349 }, { "epoch": 1.4638616714697406, "grad_norm": 1.5900275170626974, "learning_rate": 3.682438032240527e-07, "loss": 0.3630062937736511, "step": 6350 }, { "epoch": 1.4640922190201728, "grad_norm": 1.4447444002714263, "learning_rate": 3.6794837964933943e-07, "loss": 0.5151525139808655, "step": 6351 }, { "epoch": 1.4643227665706051, "grad_norm": 1.6404693772770675, "learning_rate": 3.6765304790515193e-07, "loss": 0.37844717502593994, "step": 6352 }, { "epoch": 1.4645533141210374, "grad_norm": 1.3380095823905058, "learning_rate": 3.673578080343981e-07, "loss": 0.4164416193962097, "step": 6353 }, { "epoch": 1.4647838616714697, "grad_norm": 1.6805497958260283, "learning_rate": 3.670626600799739e-07, "loss": 0.5269230008125305, "step": 6354 }, { "epoch": 1.465014409221902, "grad_norm": 1.647915234149178, "learning_rate": 3.667676040847607e-07, "loss": 0.4049336612224579, "step": 6355 }, { "epoch": 1.4652449567723342, "grad_norm": 1.6287952578421923, "learning_rate": 3.66472640091627e-07, "loss": 0.4874676465988159, "step": 6356 }, { "epoch": 1.4654755043227665, "grad_norm": 1.359427032107508, "learning_rate": 3.6617776814342826e-07, "loss": 0.44552722573280334, "step": 6357 }, { "epoch": 1.4657060518731988, "grad_norm": 1.6381292926536035, "learning_rate": 3.6588298828300655e-07, "loss": 0.46151018142700195, "step": 6358 }, { "epoch": 1.465936599423631, "grad_norm": 1.759691893404636, "learning_rate": 3.655883005531898e-07, "loss": 0.4587894082069397, "step": 6359 }, { "epoch": 1.4661671469740634, "grad_norm": 1.8298309075681234, "learning_rate": 3.6529370499679367e-07, "loss": 0.5404157638549805, "step": 6360 }, { "epoch": 1.4663976945244956, "grad_norm": 1.3135400416651812, "learning_rate": 3.649992016566195e-07, "loss": 0.42960917949676514, "step": 6361 }, { "epoch": 1.466628242074928, "grad_norm": 1.9123580665558777, "learning_rate": 3.647047905754551e-07, "loss": 0.550566554069519, "step": 6362 }, { "epoch": 1.4668587896253602, "grad_norm": 1.5220633344951369, "learning_rate": 3.644104717960761e-07, "loss": 0.42004531621932983, "step": 6363 }, { "epoch": 1.4670893371757925, "grad_norm": 2.0037836879571413, "learning_rate": 3.641162453612434e-07, "loss": 0.4716450572013855, "step": 6364 }, { "epoch": 1.4673198847262248, "grad_norm": 1.9961081968547811, "learning_rate": 3.6382211131370534e-07, "loss": 0.4558556079864502, "step": 6365 }, { "epoch": 1.467550432276657, "grad_norm": 1.79825755837218, "learning_rate": 3.6352806969619667e-07, "loss": 0.51080721616745, "step": 6366 }, { "epoch": 1.4677809798270893, "grad_norm": 1.867330178377139, "learning_rate": 3.6323412055143843e-07, "loss": 0.35964512825012207, "step": 6367 }, { "epoch": 1.4680115273775216, "grad_norm": 1.7497633832514439, "learning_rate": 3.629402639221384e-07, "loss": 0.4457089900970459, "step": 6368 }, { "epoch": 1.4682420749279539, "grad_norm": 1.652033538972312, "learning_rate": 3.626464998509905e-07, "loss": 0.38707441091537476, "step": 6369 }, { "epoch": 1.4684726224783862, "grad_norm": 1.5451052503855718, "learning_rate": 3.623528283806758e-07, "loss": 0.5018205642700195, "step": 6370 }, { "epoch": 1.4687031700288184, "grad_norm": 1.6247787287129325, "learning_rate": 3.620592495538622e-07, "loss": 0.4383612871170044, "step": 6371 }, { "epoch": 1.4689337175792507, "grad_norm": 1.4673351855920176, "learning_rate": 3.6176576341320297e-07, "loss": 0.46968695521354675, "step": 6372 }, { "epoch": 1.469164265129683, "grad_norm": 1.5714223819434983, "learning_rate": 3.6147237000133925e-07, "loss": 0.45279714465141296, "step": 6373 }, { "epoch": 1.4693948126801153, "grad_norm": 1.926925770479076, "learning_rate": 3.6117906936089757e-07, "loss": 0.4974134862422943, "step": 6374 }, { "epoch": 1.4696253602305476, "grad_norm": 1.89913152070155, "learning_rate": 3.608858615344914e-07, "loss": 0.49392572045326233, "step": 6375 }, { "epoch": 1.4698559077809799, "grad_norm": 1.8956623188876287, "learning_rate": 3.605927465647213e-07, "loss": 0.45273101329803467, "step": 6376 }, { "epoch": 1.4700864553314121, "grad_norm": 1.715042184201244, "learning_rate": 3.602997244941731e-07, "loss": 0.45529431104660034, "step": 6377 }, { "epoch": 1.4703170028818444, "grad_norm": 1.8449740006889035, "learning_rate": 3.600067953654203e-07, "loss": 0.5191174745559692, "step": 6378 }, { "epoch": 1.4705475504322767, "grad_norm": 1.6198609763992475, "learning_rate": 3.5971395922102276e-07, "loss": 0.44115346670150757, "step": 6379 }, { "epoch": 1.470778097982709, "grad_norm": 1.4551263848150924, "learning_rate": 3.5942121610352616e-07, "loss": 0.3980026841163635, "step": 6380 }, { "epoch": 1.4710086455331413, "grad_norm": 1.8677194748312773, "learning_rate": 3.5912856605546303e-07, "loss": 0.4519263505935669, "step": 6381 }, { "epoch": 1.4712391930835735, "grad_norm": 1.9194690118602158, "learning_rate": 3.5883600911935206e-07, "loss": 0.47958600521087646, "step": 6382 }, { "epoch": 1.4714697406340058, "grad_norm": 1.495711078011954, "learning_rate": 3.5854354533769915e-07, "loss": 0.5039705038070679, "step": 6383 }, { "epoch": 1.471700288184438, "grad_norm": 1.6950047090453357, "learning_rate": 3.582511747529965e-07, "loss": 0.4200620651245117, "step": 6384 }, { "epoch": 1.4719308357348704, "grad_norm": 1.746616763124911, "learning_rate": 3.579588974077218e-07, "loss": 0.44767335057258606, "step": 6385 }, { "epoch": 1.4721613832853027, "grad_norm": 1.9564062341779582, "learning_rate": 3.5766671334434053e-07, "loss": 0.4160998463630676, "step": 6386 }, { "epoch": 1.472391930835735, "grad_norm": 2.0148129925088436, "learning_rate": 3.5737462260530384e-07, "loss": 0.4349063038825989, "step": 6387 }, { "epoch": 1.4726224783861672, "grad_norm": 1.379105631547287, "learning_rate": 3.570826252330491e-07, "loss": 0.44622567296028137, "step": 6388 }, { "epoch": 1.4728530259365995, "grad_norm": 1.6792585009694532, "learning_rate": 3.56790721270001e-07, "loss": 0.5509021878242493, "step": 6389 }, { "epoch": 1.4730835734870318, "grad_norm": 1.589744434583523, "learning_rate": 3.5649891075856963e-07, "loss": 0.4545692801475525, "step": 6390 }, { "epoch": 1.4733141210374638, "grad_norm": 1.5193646777338217, "learning_rate": 3.5620719374115237e-07, "loss": 0.45771169662475586, "step": 6391 }, { "epoch": 1.4735446685878961, "grad_norm": 1.5457460822793552, "learning_rate": 3.559155702601333e-07, "loss": 0.40960395336151123, "step": 6392 }, { "epoch": 1.4737752161383284, "grad_norm": 1.5018594999569612, "learning_rate": 3.5562404035788084e-07, "loss": 0.4792563319206238, "step": 6393 }, { "epoch": 1.4740057636887607, "grad_norm": 1.9530047641946722, "learning_rate": 3.5533260407675205e-07, "loss": 0.4988730251789093, "step": 6394 }, { "epoch": 1.474236311239193, "grad_norm": 1.4554877163973188, "learning_rate": 3.5504126145908985e-07, "loss": 0.49683940410614014, "step": 6395 }, { "epoch": 1.4744668587896252, "grad_norm": 2.220410748790677, "learning_rate": 3.547500125472227e-07, "loss": 0.515201985836029, "step": 6396 }, { "epoch": 1.4746974063400575, "grad_norm": 1.6620796307322583, "learning_rate": 3.544588573834666e-07, "loss": 0.5007616877555847, "step": 6397 }, { "epoch": 1.4749279538904898, "grad_norm": 1.5582513723700742, "learning_rate": 3.5416779601012316e-07, "loss": 0.41666027903556824, "step": 6398 }, { "epoch": 1.475158501440922, "grad_norm": 1.5507523525405247, "learning_rate": 3.538768284694801e-07, "loss": 0.4753478169441223, "step": 6399 }, { "epoch": 1.4753890489913544, "grad_norm": 1.5959517593070658, "learning_rate": 3.535859548038128e-07, "loss": 0.39610493183135986, "step": 6400 }, { "epoch": 1.4756195965417866, "grad_norm": 1.7175570820294603, "learning_rate": 3.5329517505538133e-07, "loss": 0.49294179677963257, "step": 6401 }, { "epoch": 1.475850144092219, "grad_norm": 1.4777974691524935, "learning_rate": 3.5300448926643345e-07, "loss": 0.5709241628646851, "step": 6402 }, { "epoch": 1.4760806916426512, "grad_norm": 1.6977497595363569, "learning_rate": 3.52713897479203e-07, "loss": 0.5481114387512207, "step": 6403 }, { "epoch": 1.4763112391930835, "grad_norm": 1.293897371508969, "learning_rate": 3.524233997359097e-07, "loss": 0.39064526557922363, "step": 6404 }, { "epoch": 1.4765417867435158, "grad_norm": 1.7655897829606124, "learning_rate": 3.521329960787598e-07, "loss": 0.43510839343070984, "step": 6405 }, { "epoch": 1.476772334293948, "grad_norm": 1.6197138314599864, "learning_rate": 3.518426865499456e-07, "loss": 0.5212624073028564, "step": 6406 }, { "epoch": 1.4770028818443803, "grad_norm": 1.3980445990441086, "learning_rate": 3.5155247119164646e-07, "loss": 0.44855934381484985, "step": 6407 }, { "epoch": 1.4772334293948126, "grad_norm": 1.6879035741159887, "learning_rate": 3.512623500460279e-07, "loss": 0.45301520824432373, "step": 6408 }, { "epoch": 1.477463976945245, "grad_norm": 1.7659988453507556, "learning_rate": 3.5097232315524074e-07, "loss": 0.44875574111938477, "step": 6409 }, { "epoch": 1.4776945244956772, "grad_norm": 1.6352648357517579, "learning_rate": 3.506823905614238e-07, "loss": 0.4552629590034485, "step": 6410 }, { "epoch": 1.4779250720461095, "grad_norm": 1.634142332572928, "learning_rate": 3.503925523067007e-07, "loss": 0.5640658140182495, "step": 6411 }, { "epoch": 1.4781556195965417, "grad_norm": 1.689760519647161, "learning_rate": 3.501028084331817e-07, "loss": 0.4508011043071747, "step": 6412 }, { "epoch": 1.478386167146974, "grad_norm": 1.6675433414567795, "learning_rate": 3.4981315898296437e-07, "loss": 0.4895268678665161, "step": 6413 }, { "epoch": 1.4786167146974063, "grad_norm": 1.5223819083256382, "learning_rate": 3.495236039981307e-07, "loss": 0.6094552874565125, "step": 6414 }, { "epoch": 1.4788472622478386, "grad_norm": 1.8997558277564615, "learning_rate": 3.492341435207509e-07, "loss": 0.4547635316848755, "step": 6415 }, { "epoch": 1.4790778097982709, "grad_norm": 1.3581768371839973, "learning_rate": 3.489447775928803e-07, "loss": 0.3610043227672577, "step": 6416 }, { "epoch": 1.4793083573487031, "grad_norm": 1.6041394568745782, "learning_rate": 3.4865550625656094e-07, "loss": 0.5122381448745728, "step": 6417 }, { "epoch": 1.4795389048991354, "grad_norm": 1.867753113062549, "learning_rate": 3.483663295538206e-07, "loss": 0.49883753061294556, "step": 6418 }, { "epoch": 1.4797694524495677, "grad_norm": 1.6738054659426496, "learning_rate": 3.4807724752667344e-07, "loss": 0.4434877634048462, "step": 6419 }, { "epoch": 1.48, "grad_norm": 1.6103141345408045, "learning_rate": 3.477882602171205e-07, "loss": 0.477453351020813, "step": 6420 }, { "epoch": 1.4802305475504323, "grad_norm": 1.4146735862110984, "learning_rate": 3.474993676671487e-07, "loss": 0.4113251864910126, "step": 6421 }, { "epoch": 1.4804610951008645, "grad_norm": 1.6403201677984904, "learning_rate": 3.4721056991873063e-07, "loss": 0.48774218559265137, "step": 6422 }, { "epoch": 1.4806916426512968, "grad_norm": 1.7694899270224396, "learning_rate": 3.469218670138264e-07, "loss": 0.49067050218582153, "step": 6423 }, { "epoch": 1.480922190201729, "grad_norm": 1.461965616606682, "learning_rate": 3.46633258994381e-07, "loss": 0.44339796900749207, "step": 6424 }, { "epoch": 1.4811527377521614, "grad_norm": 1.5104802529368333, "learning_rate": 3.4634474590232585e-07, "loss": 0.46774202585220337, "step": 6425 }, { "epoch": 1.4813832853025937, "grad_norm": 1.6010012626937276, "learning_rate": 3.460563277795796e-07, "loss": 0.5128026008605957, "step": 6426 }, { "epoch": 1.481613832853026, "grad_norm": 1.567736868007956, "learning_rate": 3.457680046680458e-07, "loss": 0.402060866355896, "step": 6427 }, { "epoch": 1.4818443804034582, "grad_norm": 1.6986772514907804, "learning_rate": 3.4547977660961504e-07, "loss": 0.4166930317878723, "step": 6428 }, { "epoch": 1.4820749279538905, "grad_norm": 1.6211572453475638, "learning_rate": 3.451916436461643e-07, "loss": 0.428037166595459, "step": 6429 }, { "epoch": 1.4823054755043228, "grad_norm": 1.4970288634545281, "learning_rate": 3.449036058195558e-07, "loss": 0.5032195448875427, "step": 6430 }, { "epoch": 1.482536023054755, "grad_norm": 1.6031976447892884, "learning_rate": 3.4461566317163827e-07, "loss": 0.480010986328125, "step": 6431 }, { "epoch": 1.4827665706051874, "grad_norm": 1.5460841020576666, "learning_rate": 3.4432781574424743e-07, "loss": 0.6013551950454712, "step": 6432 }, { "epoch": 1.4829971181556196, "grad_norm": 2.0092772021219942, "learning_rate": 3.440400635792037e-07, "loss": 0.42359572649002075, "step": 6433 }, { "epoch": 1.483227665706052, "grad_norm": 1.7875564171400882, "learning_rate": 3.437524067183153e-07, "loss": 0.4685453772544861, "step": 6434 }, { "epoch": 1.4834582132564842, "grad_norm": 1.6281902482139496, "learning_rate": 3.4346484520337513e-07, "loss": 0.4501311480998993, "step": 6435 }, { "epoch": 1.4836887608069165, "grad_norm": 1.714416901486118, "learning_rate": 3.431773790761634e-07, "loss": 0.43600693345069885, "step": 6436 }, { "epoch": 1.4839193083573488, "grad_norm": 1.802737382888876, "learning_rate": 3.4289000837844574e-07, "loss": 0.5174646377563477, "step": 6437 }, { "epoch": 1.484149855907781, "grad_norm": 2.0164710202679554, "learning_rate": 3.426027331519737e-07, "loss": 0.587194561958313, "step": 6438 }, { "epoch": 1.4843804034582133, "grad_norm": 1.5580641317732793, "learning_rate": 3.4231555343848585e-07, "loss": 0.4370976388454437, "step": 6439 }, { "epoch": 1.4846109510086456, "grad_norm": 1.4989526897857972, "learning_rate": 3.4202846927970664e-07, "loss": 0.4005950093269348, "step": 6440 }, { "epoch": 1.4848414985590779, "grad_norm": 1.7120206565146892, "learning_rate": 3.4174148071734565e-07, "loss": 0.489225834608078, "step": 6441 }, { "epoch": 1.4850720461095102, "grad_norm": 1.4871889439736439, "learning_rate": 3.4145458779310034e-07, "loss": 0.464316725730896, "step": 6442 }, { "epoch": 1.4853025936599424, "grad_norm": 1.5005161742665332, "learning_rate": 3.411677905486525e-07, "loss": 0.42455434799194336, "step": 6443 }, { "epoch": 1.4855331412103747, "grad_norm": 1.859604235829819, "learning_rate": 3.408810890256708e-07, "loss": 0.4953247308731079, "step": 6444 }, { "epoch": 1.485763688760807, "grad_norm": 1.6362253603742314, "learning_rate": 3.405944832658104e-07, "loss": 0.5813614130020142, "step": 6445 }, { "epoch": 1.4859942363112393, "grad_norm": 1.510210977741005, "learning_rate": 3.403079733107117e-07, "loss": 0.5422607064247131, "step": 6446 }, { "epoch": 1.4862247838616716, "grad_norm": 1.808286563495416, "learning_rate": 3.4002155920200183e-07, "loss": 0.4816162586212158, "step": 6447 }, { "epoch": 1.4864553314121038, "grad_norm": 1.8383473199815739, "learning_rate": 3.39735240981294e-07, "loss": 0.513064444065094, "step": 6448 }, { "epoch": 1.4866858789625361, "grad_norm": 1.784239888983303, "learning_rate": 3.3944901869018714e-07, "loss": 0.45172595977783203, "step": 6449 }, { "epoch": 1.4869164265129684, "grad_norm": 1.6442365366535128, "learning_rate": 3.391628923702664e-07, "loss": 0.458996057510376, "step": 6450 }, { "epoch": 1.4871469740634007, "grad_norm": 1.808440287394247, "learning_rate": 3.388768620631024e-07, "loss": 0.5369571447372437, "step": 6451 }, { "epoch": 1.487377521613833, "grad_norm": 1.488742958974303, "learning_rate": 3.3859092781025276e-07, "loss": 0.40987443923950195, "step": 6452 }, { "epoch": 1.4876080691642652, "grad_norm": 1.3654680024752937, "learning_rate": 3.3830508965326123e-07, "loss": 0.4854167401790619, "step": 6453 }, { "epoch": 1.4878386167146975, "grad_norm": 1.4869168297481818, "learning_rate": 3.3801934763365637e-07, "loss": 0.4233596920967102, "step": 6454 }, { "epoch": 1.4880691642651298, "grad_norm": 1.6653305815248807, "learning_rate": 3.3773370179295415e-07, "loss": 0.4606029987335205, "step": 6455 }, { "epoch": 1.488299711815562, "grad_norm": 1.565802296000991, "learning_rate": 3.3744815217265566e-07, "loss": 0.500397801399231, "step": 6456 }, { "epoch": 1.4885302593659944, "grad_norm": 1.4104137796905991, "learning_rate": 3.371626988142479e-07, "loss": 0.4079688787460327, "step": 6457 }, { "epoch": 1.4887608069164266, "grad_norm": 1.437160720705673, "learning_rate": 3.3687734175920503e-07, "loss": 0.4022506773471832, "step": 6458 }, { "epoch": 1.4889913544668587, "grad_norm": 1.5597051684116123, "learning_rate": 3.365920810489856e-07, "loss": 0.46588951349258423, "step": 6459 }, { "epoch": 1.489221902017291, "grad_norm": 1.5966856514161005, "learning_rate": 3.3630691672503565e-07, "loss": 0.5634331703186035, "step": 6460 }, { "epoch": 1.4894524495677233, "grad_norm": 1.5519766042222847, "learning_rate": 3.360218488287867e-07, "loss": 0.45025673508644104, "step": 6461 }, { "epoch": 1.4896829971181555, "grad_norm": 1.6343139379388767, "learning_rate": 3.357368774016559e-07, "loss": 0.4171956777572632, "step": 6462 }, { "epoch": 1.4899135446685878, "grad_norm": 1.7433785479359918, "learning_rate": 3.354520024850467e-07, "loss": 0.4150547981262207, "step": 6463 }, { "epoch": 1.4901440922190201, "grad_norm": 1.6499845840112966, "learning_rate": 3.351672241203479e-07, "loss": 0.5727693438529968, "step": 6464 }, { "epoch": 1.4903746397694524, "grad_norm": 1.4575635695527691, "learning_rate": 3.3488254234893554e-07, "loss": 0.3606629967689514, "step": 6465 }, { "epoch": 1.4906051873198847, "grad_norm": 1.669334025951129, "learning_rate": 3.345979572121709e-07, "loss": 0.447257936000824, "step": 6466 }, { "epoch": 1.490835734870317, "grad_norm": 1.4454918598699755, "learning_rate": 3.3431346875140067e-07, "loss": 0.4196828603744507, "step": 6467 }, { "epoch": 1.4910662824207492, "grad_norm": 1.761385296418873, "learning_rate": 3.340290770079588e-07, "loss": 0.46776294708251953, "step": 6468 }, { "epoch": 1.4912968299711815, "grad_norm": 1.7102419270453626, "learning_rate": 3.3374478202316403e-07, "loss": 0.463356614112854, "step": 6469 }, { "epoch": 1.4915273775216138, "grad_norm": 1.7784174334662857, "learning_rate": 3.3346058383832123e-07, "loss": 0.49275442957878113, "step": 6470 }, { "epoch": 1.491757925072046, "grad_norm": 1.6776101672774684, "learning_rate": 3.3317648249472205e-07, "loss": 0.5000715851783752, "step": 6471 }, { "epoch": 1.4919884726224784, "grad_norm": 1.7556476404250725, "learning_rate": 3.328924780336428e-07, "loss": 0.6033698320388794, "step": 6472 }, { "epoch": 1.4922190201729106, "grad_norm": 1.4908393069487778, "learning_rate": 3.326085704963467e-07, "loss": 0.521305501461029, "step": 6473 }, { "epoch": 1.492449567723343, "grad_norm": 1.2845886716264758, "learning_rate": 3.3232475992408293e-07, "loss": 0.4261690378189087, "step": 6474 }, { "epoch": 1.4926801152737752, "grad_norm": 1.5797229025924917, "learning_rate": 3.320410463580859e-07, "loss": 0.4929957985877991, "step": 6475 }, { "epoch": 1.4929106628242075, "grad_norm": 1.7503702857264898, "learning_rate": 3.3175742983957577e-07, "loss": 0.5392374992370605, "step": 6476 }, { "epoch": 1.4931412103746398, "grad_norm": 1.6614074077433174, "learning_rate": 3.314739104097599e-07, "loss": 0.45847803354263306, "step": 6477 }, { "epoch": 1.493371757925072, "grad_norm": 1.5197176985824143, "learning_rate": 3.3119048810982996e-07, "loss": 0.317424476146698, "step": 6478 }, { "epoch": 1.4936023054755043, "grad_norm": 1.7221896627907232, "learning_rate": 3.3090716298096497e-07, "loss": 0.4409928321838379, "step": 6479 }, { "epoch": 1.4938328530259366, "grad_norm": 1.7378684668549815, "learning_rate": 3.306239350643284e-07, "loss": 0.4996468424797058, "step": 6480 }, { "epoch": 1.4940634005763689, "grad_norm": 1.4267403901787394, "learning_rate": 3.3034080440107104e-07, "loss": 0.4341059625148773, "step": 6481 }, { "epoch": 1.4942939481268012, "grad_norm": 1.5210821751127084, "learning_rate": 3.3005777103232833e-07, "loss": 0.48193103075027466, "step": 6482 }, { "epoch": 1.4945244956772334, "grad_norm": 1.4847373995641082, "learning_rate": 3.297748349992221e-07, "loss": 0.3965853452682495, "step": 6483 }, { "epoch": 1.4947550432276657, "grad_norm": 1.798682040014271, "learning_rate": 3.2949199634285994e-07, "loss": 0.517971396446228, "step": 6484 }, { "epoch": 1.494985590778098, "grad_norm": 1.4832490379969678, "learning_rate": 3.2920925510433605e-07, "loss": 0.4893750548362732, "step": 6485 }, { "epoch": 1.4952161383285303, "grad_norm": 1.8768690559978958, "learning_rate": 3.289266113247289e-07, "loss": 0.4487413167953491, "step": 6486 }, { "epoch": 1.4954466858789626, "grad_norm": 1.5362967716419886, "learning_rate": 3.2864406504510444e-07, "loss": 0.4499363303184509, "step": 6487 }, { "epoch": 1.4956772334293948, "grad_norm": 1.7573103415083038, "learning_rate": 3.2836161630651327e-07, "loss": 0.47149038314819336, "step": 6488 }, { "epoch": 1.4959077809798271, "grad_norm": 1.866127921997491, "learning_rate": 3.2807926514999206e-07, "loss": 0.4235773980617523, "step": 6489 }, { "epoch": 1.4961383285302594, "grad_norm": 1.841841873618585, "learning_rate": 3.2779701161656414e-07, "loss": 0.47815465927124023, "step": 6490 }, { "epoch": 1.4963688760806917, "grad_norm": 2.0492176414960523, "learning_rate": 3.2751485574723725e-07, "loss": 0.500824511051178, "step": 6491 }, { "epoch": 1.496599423631124, "grad_norm": 1.7353423201350575, "learning_rate": 3.2723279758300614e-07, "loss": 0.4901300370693207, "step": 6492 }, { "epoch": 1.4968299711815563, "grad_norm": 1.6715223899825975, "learning_rate": 3.2695083716485116e-07, "loss": 0.43286561965942383, "step": 6493 }, { "epoch": 1.4970605187319885, "grad_norm": 1.6129181056594784, "learning_rate": 3.26668974533738e-07, "loss": 0.5049563646316528, "step": 6494 }, { "epoch": 1.4972910662824208, "grad_norm": 1.3860068483985046, "learning_rate": 3.2638720973061826e-07, "loss": 0.44142240285873413, "step": 6495 }, { "epoch": 1.497521613832853, "grad_norm": 1.7555180120285485, "learning_rate": 3.261055427964292e-07, "loss": 0.46423906087875366, "step": 6496 }, { "epoch": 1.4977521613832854, "grad_norm": 1.630721059004709, "learning_rate": 3.2582397377209446e-07, "loss": 0.5402355790138245, "step": 6497 }, { "epoch": 1.4979827089337177, "grad_norm": 1.776211928117776, "learning_rate": 3.2554250269852326e-07, "loss": 0.4754972457885742, "step": 6498 }, { "epoch": 1.49821325648415, "grad_norm": 1.7088366443415275, "learning_rate": 3.2526112961660987e-07, "loss": 0.4837331771850586, "step": 6499 }, { "epoch": 1.498443804034582, "grad_norm": 1.4244012985740182, "learning_rate": 3.2497985456723556e-07, "loss": 0.41410496830940247, "step": 6500 }, { "epoch": 1.4986743515850143, "grad_norm": 1.3926696859702201, "learning_rate": 3.246986775912661e-07, "loss": 0.39501869678497314, "step": 6501 }, { "epoch": 1.4989048991354466, "grad_norm": 1.7615254739148636, "learning_rate": 3.2441759872955367e-07, "loss": 0.44316792488098145, "step": 6502 }, { "epoch": 1.4991354466858788, "grad_norm": 1.5504390307833396, "learning_rate": 3.2413661802293633e-07, "loss": 0.4260290861129761, "step": 6503 }, { "epoch": 1.4993659942363111, "grad_norm": 1.4857903718592202, "learning_rate": 3.2385573551223733e-07, "loss": 0.4658172130584717, "step": 6504 }, { "epoch": 1.4995965417867434, "grad_norm": 1.4040986890780063, "learning_rate": 3.235749512382662e-07, "loss": 0.4857284426689148, "step": 6505 }, { "epoch": 1.4998270893371757, "grad_norm": 1.6098416781838087, "learning_rate": 3.232942652418185e-07, "loss": 0.3886150121688843, "step": 6506 }, { "epoch": 1.500057636887608, "grad_norm": 1.5908712909726912, "learning_rate": 3.2301367756367383e-07, "loss": 0.47719478607177734, "step": 6507 }, { "epoch": 1.5002881844380402, "grad_norm": 1.5313150321043758, "learning_rate": 3.227331882445995e-07, "loss": 0.4501890540122986, "step": 6508 }, { "epoch": 1.5005187319884725, "grad_norm": 1.6816112124849405, "learning_rate": 3.224527973253472e-07, "loss": 0.5312929153442383, "step": 6509 }, { "epoch": 1.5007492795389048, "grad_norm": 1.7099512604146767, "learning_rate": 3.22172504846655e-07, "loss": 0.4247457981109619, "step": 6510 }, { "epoch": 1.500979827089337, "grad_norm": 1.5170821792110236, "learning_rate": 3.2189231084924693e-07, "loss": 0.41195109486579895, "step": 6511 }, { "epoch": 1.5012103746397694, "grad_norm": 1.414534154865254, "learning_rate": 3.2161221537383187e-07, "loss": 0.49529117345809937, "step": 6512 }, { "epoch": 1.5014409221902016, "grad_norm": 1.7823472150342257, "learning_rate": 3.213322184611045e-07, "loss": 0.47565385699272156, "step": 6513 }, { "epoch": 1.501671469740634, "grad_norm": 1.59315040296854, "learning_rate": 3.210523201517461e-07, "loss": 0.44504472613334656, "step": 6514 }, { "epoch": 1.5019020172910662, "grad_norm": 1.578129124805014, "learning_rate": 3.2077252048642224e-07, "loss": 0.46689483523368835, "step": 6515 }, { "epoch": 1.5021325648414985, "grad_norm": 1.9438217919470961, "learning_rate": 3.2049281950578554e-07, "loss": 0.4728453755378723, "step": 6516 }, { "epoch": 1.5023631123919308, "grad_norm": 1.5831150547958008, "learning_rate": 3.2021321725047326e-07, "loss": 0.4356672167778015, "step": 6517 }, { "epoch": 1.502593659942363, "grad_norm": 1.6622807686406893, "learning_rate": 3.1993371376110903e-07, "loss": 0.4222509562969208, "step": 6518 }, { "epoch": 1.5028242074927953, "grad_norm": 1.4965022852461283, "learning_rate": 3.1965430907830157e-07, "loss": 0.3961385190486908, "step": 6519 }, { "epoch": 1.5030547550432276, "grad_norm": 1.7015715818454764, "learning_rate": 3.193750032426452e-07, "loss": 0.40793734788894653, "step": 6520 }, { "epoch": 1.5032853025936599, "grad_norm": 1.2428021579416293, "learning_rate": 3.190957962947205e-07, "loss": 0.418218195438385, "step": 6521 }, { "epoch": 1.5035158501440922, "grad_norm": 1.5780813489572465, "learning_rate": 3.188166882750937e-07, "loss": 0.4289787709712982, "step": 6522 }, { "epoch": 1.5037463976945245, "grad_norm": 1.4926692792678622, "learning_rate": 3.185376792243154e-07, "loss": 0.41293439269065857, "step": 6523 }, { "epoch": 1.5039769452449567, "grad_norm": 1.6656656082223846, "learning_rate": 3.182587691829236e-07, "loss": 0.5296646952629089, "step": 6524 }, { "epoch": 1.504207492795389, "grad_norm": 1.6592089492848106, "learning_rate": 3.179799581914406e-07, "loss": 0.40602487325668335, "step": 6525 }, { "epoch": 1.5044380403458213, "grad_norm": 1.9931387488132102, "learning_rate": 3.1770124629037445e-07, "loss": 0.4724326729774475, "step": 6526 }, { "epoch": 1.5046685878962536, "grad_norm": 1.5756364961675935, "learning_rate": 3.174226335202197e-07, "loss": 0.49090898036956787, "step": 6527 }, { "epoch": 1.5048991354466859, "grad_norm": 2.036877642601554, "learning_rate": 3.171441199214553e-07, "loss": 0.4932633638381958, "step": 6528 }, { "epoch": 1.5051296829971181, "grad_norm": 1.564392641724374, "learning_rate": 3.168657055345466e-07, "loss": 0.45044368505477905, "step": 6529 }, { "epoch": 1.5053602305475504, "grad_norm": 1.801725407800878, "learning_rate": 3.165873903999449e-07, "loss": 0.5444917678833008, "step": 6530 }, { "epoch": 1.5055907780979827, "grad_norm": 1.6246016746918435, "learning_rate": 3.163091745580857e-07, "loss": 0.45643138885498047, "step": 6531 }, { "epoch": 1.505821325648415, "grad_norm": 1.5583644902514906, "learning_rate": 3.160310580493913e-07, "loss": 0.45805928111076355, "step": 6532 }, { "epoch": 1.5060518731988473, "grad_norm": 2.1170198831310096, "learning_rate": 3.157530409142687e-07, "loss": 0.4036891460418701, "step": 6533 }, { "epoch": 1.5062824207492795, "grad_norm": 1.7185763373562066, "learning_rate": 3.154751231931111e-07, "loss": 0.5445838570594788, "step": 6534 }, { "epoch": 1.5065129682997118, "grad_norm": 1.9448138702130089, "learning_rate": 3.1519730492629737e-07, "loss": 0.5139729976654053, "step": 6535 }, { "epoch": 1.506743515850144, "grad_norm": 1.7903199300823514, "learning_rate": 3.1491958615419123e-07, "loss": 0.4956102669239044, "step": 6536 }, { "epoch": 1.5069740634005764, "grad_norm": 1.408735724577368, "learning_rate": 3.146419669171426e-07, "loss": 0.4758613705635071, "step": 6537 }, { "epoch": 1.5072046109510087, "grad_norm": 1.4805391629752656, "learning_rate": 3.1436444725548674e-07, "loss": 0.41319960355758667, "step": 6538 }, { "epoch": 1.507435158501441, "grad_norm": 1.454596870187288, "learning_rate": 3.140870272095437e-07, "loss": 0.4641035795211792, "step": 6539 }, { "epoch": 1.5076657060518732, "grad_norm": 1.7193566807860163, "learning_rate": 3.138097068196206e-07, "loss": 0.4940011501312256, "step": 6540 }, { "epoch": 1.5078962536023055, "grad_norm": 1.734016298128087, "learning_rate": 3.135324861260085e-07, "loss": 0.5018976330757141, "step": 6541 }, { "epoch": 1.5081268011527378, "grad_norm": 1.5024512602760236, "learning_rate": 3.132553651689849e-07, "loss": 0.41456544399261475, "step": 6542 }, { "epoch": 1.50835734870317, "grad_norm": 1.3831056672275375, "learning_rate": 3.1297834398881293e-07, "loss": 0.43678340315818787, "step": 6543 }, { "epoch": 1.5085878962536023, "grad_norm": 1.6751460423842672, "learning_rate": 3.1270142262574084e-07, "loss": 0.4336814880371094, "step": 6544 }, { "epoch": 1.5088184438040346, "grad_norm": 1.763114589172267, "learning_rate": 3.124246011200018e-07, "loss": 0.4593331217765808, "step": 6545 }, { "epoch": 1.509048991354467, "grad_norm": 1.5015058915538149, "learning_rate": 3.121478795118158e-07, "loss": 0.4398609399795532, "step": 6546 }, { "epoch": 1.5092795389048992, "grad_norm": 1.3983198539077313, "learning_rate": 3.11871257841387e-07, "loss": 0.40382882952690125, "step": 6547 }, { "epoch": 1.5095100864553315, "grad_norm": 1.6180984558312, "learning_rate": 3.115947361489064e-07, "loss": 0.511704683303833, "step": 6548 }, { "epoch": 1.5097406340057637, "grad_norm": 1.5988312105042757, "learning_rate": 3.113183144745488e-07, "loss": 0.4606817960739136, "step": 6549 }, { "epoch": 1.509971181556196, "grad_norm": 1.792080137203916, "learning_rate": 3.1104199285847645e-07, "loss": 0.43390586972236633, "step": 6550 }, { "epoch": 1.5102017291066283, "grad_norm": 1.6165670343276886, "learning_rate": 3.1076577134083524e-07, "loss": 0.4556368589401245, "step": 6551 }, { "epoch": 1.5104322766570606, "grad_norm": 1.6645688544418311, "learning_rate": 3.104896499617573e-07, "loss": 0.477630078792572, "step": 6552 }, { "epoch": 1.5106628242074929, "grad_norm": 1.2719017711677718, "learning_rate": 3.102136287613606e-07, "loss": 0.41332900524139404, "step": 6553 }, { "epoch": 1.5108933717579252, "grad_norm": 1.348486974941644, "learning_rate": 3.099377077797477e-07, "loss": 0.4499499201774597, "step": 6554 }, { "epoch": 1.5111239193083574, "grad_norm": 1.8893704868945012, "learning_rate": 3.096618870570072e-07, "loss": 0.47415900230407715, "step": 6555 }, { "epoch": 1.5113544668587897, "grad_norm": 1.5506294716293978, "learning_rate": 3.0938616663321346e-07, "loss": 0.4729388952255249, "step": 6556 }, { "epoch": 1.511585014409222, "grad_norm": 1.9901244862666623, "learning_rate": 3.0911054654842547e-07, "loss": 0.49651503562927246, "step": 6557 }, { "epoch": 1.5118155619596543, "grad_norm": 1.6254741291231039, "learning_rate": 3.0883502684268747e-07, "loss": 0.4242505431175232, "step": 6558 }, { "epoch": 1.5120461095100866, "grad_norm": 1.5927069260474938, "learning_rate": 3.085596075560304e-07, "loss": 0.4927797019481659, "step": 6559 }, { "epoch": 1.5122766570605188, "grad_norm": 1.572555154857978, "learning_rate": 3.0828428872846903e-07, "loss": 0.5201178193092346, "step": 6560 }, { "epoch": 1.5125072046109511, "grad_norm": 1.893958450562389, "learning_rate": 3.0800907040000515e-07, "loss": 0.5418535470962524, "step": 6561 }, { "epoch": 1.5127377521613834, "grad_norm": 2.210710686259104, "learning_rate": 3.077339526106243e-07, "loss": 0.5631084442138672, "step": 6562 }, { "epoch": 1.5129682997118157, "grad_norm": 1.4163424994270397, "learning_rate": 3.07458935400299e-07, "loss": 0.3862801790237427, "step": 6563 }, { "epoch": 1.513198847262248, "grad_norm": 1.432435754713911, "learning_rate": 3.071840188089859e-07, "loss": 0.4399529695510864, "step": 6564 }, { "epoch": 1.5134293948126802, "grad_norm": 1.416271606847585, "learning_rate": 3.069092028766275e-07, "loss": 0.42560017108917236, "step": 6565 }, { "epoch": 1.5136599423631125, "grad_norm": 1.3843654986107572, "learning_rate": 3.066344876431518e-07, "loss": 0.5227498412132263, "step": 6566 }, { "epoch": 1.5138904899135448, "grad_norm": 1.7845341383364082, "learning_rate": 3.0635987314847234e-07, "loss": 0.5001060962677002, "step": 6567 }, { "epoch": 1.514121037463977, "grad_norm": 1.4633273931040118, "learning_rate": 3.0608535943248725e-07, "loss": 0.4475817382335663, "step": 6568 }, { "epoch": 1.5143515850144094, "grad_norm": 1.776494937774852, "learning_rate": 3.058109465350811e-07, "loss": 0.41835230588912964, "step": 6569 }, { "epoch": 1.5145821325648416, "grad_norm": 1.5649603798840601, "learning_rate": 3.05536634496123e-07, "loss": 0.43384939432144165, "step": 6570 }, { "epoch": 1.514812680115274, "grad_norm": 1.4466265728431833, "learning_rate": 3.0526242335546714e-07, "loss": 0.49595510959625244, "step": 6571 }, { "epoch": 1.5150432276657062, "grad_norm": 1.4382916193314434, "learning_rate": 3.0498831315295425e-07, "loss": 0.4870753288269043, "step": 6572 }, { "epoch": 1.5152737752161385, "grad_norm": 2.104289673848665, "learning_rate": 3.047143039284091e-07, "loss": 0.5014097094535828, "step": 6573 }, { "epoch": 1.5155043227665708, "grad_norm": 1.5411668776330472, "learning_rate": 3.044403957216427e-07, "loss": 0.45475223660469055, "step": 6574 }, { "epoch": 1.515734870317003, "grad_norm": 2.2103660716265936, "learning_rate": 3.0416658857245135e-07, "loss": 0.4280875027179718, "step": 6575 }, { "epoch": 1.515965417867435, "grad_norm": 1.411762768587303, "learning_rate": 3.038928825206162e-07, "loss": 0.41492798924446106, "step": 6576 }, { "epoch": 1.5161959654178674, "grad_norm": 1.5234346382387383, "learning_rate": 3.0361927760590356e-07, "loss": 0.5283424854278564, "step": 6577 }, { "epoch": 1.5164265129682997, "grad_norm": 1.7769534343214894, "learning_rate": 3.0334577386806535e-07, "loss": 0.5035547614097595, "step": 6578 }, { "epoch": 1.516657060518732, "grad_norm": 1.7167448261757756, "learning_rate": 3.03072371346839e-07, "loss": 0.5148544907569885, "step": 6579 }, { "epoch": 1.5168876080691642, "grad_norm": 1.8335675563589118, "learning_rate": 3.0279907008194747e-07, "loss": 0.48413559794425964, "step": 6580 }, { "epoch": 1.5171181556195965, "grad_norm": 1.8842526874117616, "learning_rate": 3.0252587011309785e-07, "loss": 0.4580768942832947, "step": 6581 }, { "epoch": 1.5173487031700288, "grad_norm": 1.7010607027180409, "learning_rate": 3.0225277147998397e-07, "loss": 0.49141448736190796, "step": 6582 }, { "epoch": 1.517579250720461, "grad_norm": 1.5163321507217336, "learning_rate": 3.0197977422228393e-07, "loss": 0.490544855594635, "step": 6583 }, { "epoch": 1.5178097982708934, "grad_norm": 1.56421821222723, "learning_rate": 3.017068783796609e-07, "loss": 0.4357878565788269, "step": 6584 }, { "epoch": 1.5180403458213256, "grad_norm": 1.6556135569370023, "learning_rate": 3.0143408399176463e-07, "loss": 0.4955572485923767, "step": 6585 }, { "epoch": 1.518270893371758, "grad_norm": 1.7663661601044416, "learning_rate": 3.0116139109822855e-07, "loss": 0.5085045695304871, "step": 6586 }, { "epoch": 1.5185014409221902, "grad_norm": 1.4316549536529295, "learning_rate": 3.008887997386725e-07, "loss": 0.3946433663368225, "step": 6587 }, { "epoch": 1.5187319884726225, "grad_norm": 1.5043787480659017, "learning_rate": 3.006163099527016e-07, "loss": 0.4135388433933258, "step": 6588 }, { "epoch": 1.5189625360230548, "grad_norm": 1.4759287632093028, "learning_rate": 3.003439217799052e-07, "loss": 0.45946577191352844, "step": 6589 }, { "epoch": 1.519193083573487, "grad_norm": 1.9119134231407664, "learning_rate": 3.0007163525985823e-07, "loss": 0.5455194711685181, "step": 6590 }, { "epoch": 1.5194236311239193, "grad_norm": 2.3789785967639165, "learning_rate": 2.9979945043212173e-07, "loss": 0.49064499139785767, "step": 6591 }, { "epoch": 1.5196541786743516, "grad_norm": 1.663692129917964, "learning_rate": 2.9952736733624086e-07, "loss": 0.42849159240722656, "step": 6592 }, { "epoch": 1.5198847262247839, "grad_norm": 1.915483300525053, "learning_rate": 2.9925538601174685e-07, "loss": 0.47390738129615784, "step": 6593 }, { "epoch": 1.5201152737752162, "grad_norm": 1.6913351541435369, "learning_rate": 2.989835064981553e-07, "loss": 0.5051196813583374, "step": 6594 }, { "epoch": 1.5203458213256484, "grad_norm": 1.824205122638696, "learning_rate": 2.98711728834968e-07, "loss": 0.45643603801727295, "step": 6595 }, { "epoch": 1.5205763688760807, "grad_norm": 1.7692485956097788, "learning_rate": 2.984400530616712e-07, "loss": 0.5199561715126038, "step": 6596 }, { "epoch": 1.520806916426513, "grad_norm": 1.5344483393306434, "learning_rate": 2.9816847921773614e-07, "loss": 0.38316380977630615, "step": 6597 }, { "epoch": 1.5210374639769453, "grad_norm": 1.813431679729583, "learning_rate": 2.9789700734262036e-07, "loss": 0.5445187091827393, "step": 6598 }, { "epoch": 1.5212680115273776, "grad_norm": 2.052172217871983, "learning_rate": 2.976256374757653e-07, "loss": 0.4933769702911377, "step": 6599 }, { "epoch": 1.5214985590778098, "grad_norm": 1.8156847296260061, "learning_rate": 2.973543696565984e-07, "loss": 0.4137638807296753, "step": 6600 }, { "epoch": 1.5217291066282421, "grad_norm": 1.622501548220719, "learning_rate": 2.970832039245325e-07, "loss": 0.45649808645248413, "step": 6601 }, { "epoch": 1.5219596541786744, "grad_norm": 1.748320177273405, "learning_rate": 2.968121403189647e-07, "loss": 0.5007699728012085, "step": 6602 }, { "epoch": 1.5221902017291065, "grad_norm": 1.6579513532546735, "learning_rate": 2.9654117887927755e-07, "loss": 0.48721688985824585, "step": 6603 }, { "epoch": 1.5224207492795387, "grad_norm": 1.3583198525190263, "learning_rate": 2.962703196448394e-07, "loss": 0.3983156681060791, "step": 6604 }, { "epoch": 1.522651296829971, "grad_norm": 1.9077712460543044, "learning_rate": 2.959995626550028e-07, "loss": 0.5087774991989136, "step": 6605 }, { "epoch": 1.5228818443804033, "grad_norm": 1.6900731884970928, "learning_rate": 2.957289079491064e-07, "loss": 0.4693753719329834, "step": 6606 }, { "epoch": 1.5231123919308356, "grad_norm": 1.6484608802659817, "learning_rate": 2.954583555664731e-07, "loss": 0.4728010892868042, "step": 6607 }, { "epoch": 1.5233429394812679, "grad_norm": 1.9993958773405, "learning_rate": 2.951879055464118e-07, "loss": 0.48341798782348633, "step": 6608 }, { "epoch": 1.5235734870317001, "grad_norm": 1.777920307748833, "learning_rate": 2.9491755792821584e-07, "loss": 0.49080032110214233, "step": 6609 }, { "epoch": 1.5238040345821324, "grad_norm": 1.736793286100546, "learning_rate": 2.946473127511635e-07, "loss": 0.49306032061576843, "step": 6610 }, { "epoch": 1.5240345821325647, "grad_norm": 1.7339244766073734, "learning_rate": 2.94377170054519e-07, "loss": 0.457378625869751, "step": 6611 }, { "epoch": 1.524265129682997, "grad_norm": 1.75933167721537, "learning_rate": 2.9410712987753163e-07, "loss": 0.5000994801521301, "step": 6612 }, { "epoch": 1.5244956772334293, "grad_norm": 1.2946273184485895, "learning_rate": 2.938371922594347e-07, "loss": 0.4149599075317383, "step": 6613 }, { "epoch": 1.5247262247838616, "grad_norm": 1.8715591693483276, "learning_rate": 2.9356735723944827e-07, "loss": 0.4516978859901428, "step": 6614 }, { "epoch": 1.5249567723342938, "grad_norm": 1.5047320302067637, "learning_rate": 2.932976248567752e-07, "loss": 0.49368464946746826, "step": 6615 }, { "epoch": 1.5251873198847261, "grad_norm": 1.6819963043711963, "learning_rate": 2.9302799515060574e-07, "loss": 0.5174213647842407, "step": 6616 }, { "epoch": 1.5254178674351584, "grad_norm": 1.5888803256634056, "learning_rate": 2.927584681601144e-07, "loss": 0.3940533399581909, "step": 6617 }, { "epoch": 1.5256484149855907, "grad_norm": 1.331505711299331, "learning_rate": 2.9248904392445993e-07, "loss": 0.38714292645454407, "step": 6618 }, { "epoch": 1.525878962536023, "grad_norm": 1.796591789965291, "learning_rate": 2.9221972248278734e-07, "loss": 0.45089441537857056, "step": 6619 }, { "epoch": 1.5261095100864552, "grad_norm": 1.5199676831316118, "learning_rate": 2.9195050387422693e-07, "loss": 0.47767460346221924, "step": 6620 }, { "epoch": 1.5263400576368875, "grad_norm": 2.02038935767357, "learning_rate": 2.9168138813789176e-07, "loss": 0.4977542757987976, "step": 6621 }, { "epoch": 1.5265706051873198, "grad_norm": 1.6269397756794977, "learning_rate": 2.914123753128829e-07, "loss": 0.4222266972064972, "step": 6622 }, { "epoch": 1.526801152737752, "grad_norm": 1.4270360279099836, "learning_rate": 2.9114346543828425e-07, "loss": 0.4695430099964142, "step": 6623 }, { "epoch": 1.5270317002881844, "grad_norm": 1.8374099270801607, "learning_rate": 2.9087465855316595e-07, "loss": 0.5093779563903809, "step": 6624 }, { "epoch": 1.5272622478386166, "grad_norm": 1.644131671999478, "learning_rate": 2.9060595469658324e-07, "loss": 0.4236484169960022, "step": 6625 }, { "epoch": 1.527492795389049, "grad_norm": 1.7674503617278896, "learning_rate": 2.903373539075755e-07, "loss": 0.4549310803413391, "step": 6626 }, { "epoch": 1.5277233429394812, "grad_norm": 1.9004549728971407, "learning_rate": 2.9006885622516765e-07, "loss": 0.5295060873031616, "step": 6627 }, { "epoch": 1.5279538904899135, "grad_norm": 1.6391132617373654, "learning_rate": 2.898004616883699e-07, "loss": 0.43450385332107544, "step": 6628 }, { "epoch": 1.5281844380403458, "grad_norm": 1.3851601688362805, "learning_rate": 2.895321703361767e-07, "loss": 0.4820800721645355, "step": 6629 }, { "epoch": 1.528414985590778, "grad_norm": 1.8242908406641638, "learning_rate": 2.8926398220756874e-07, "loss": 0.4907156825065613, "step": 6630 }, { "epoch": 1.5286455331412103, "grad_norm": 1.5568983091450215, "learning_rate": 2.889958973415101e-07, "loss": 0.39823904633522034, "step": 6631 }, { "epoch": 1.5288760806916426, "grad_norm": 1.613972299703288, "learning_rate": 2.887279157769514e-07, "loss": 0.4582018256187439, "step": 6632 }, { "epoch": 1.5291066282420749, "grad_norm": 1.447957337480398, "learning_rate": 2.8846003755282744e-07, "loss": 0.43874603509902954, "step": 6633 }, { "epoch": 1.5293371757925072, "grad_norm": 1.7886894424220765, "learning_rate": 2.8819226270805775e-07, "loss": 0.38620924949645996, "step": 6634 }, { "epoch": 1.5295677233429394, "grad_norm": 1.403143890487367, "learning_rate": 2.879245912815473e-07, "loss": 0.41149455308914185, "step": 6635 }, { "epoch": 1.5297982708933717, "grad_norm": 1.5521023219473429, "learning_rate": 2.8765702331218667e-07, "loss": 0.4521693289279938, "step": 6636 }, { "epoch": 1.530028818443804, "grad_norm": 1.8134136314855847, "learning_rate": 2.8738955883884983e-07, "loss": 0.5196795463562012, "step": 6637 }, { "epoch": 1.5302593659942363, "grad_norm": 1.8251461959103044, "learning_rate": 2.8712219790039726e-07, "loss": 0.4373961091041565, "step": 6638 }, { "epoch": 1.5304899135446686, "grad_norm": 1.6509894275378938, "learning_rate": 2.868549405356734e-07, "loss": 0.39191538095474243, "step": 6639 }, { "epoch": 1.5307204610951008, "grad_norm": 1.7497602657864888, "learning_rate": 2.865877867835076e-07, "loss": 0.48131316900253296, "step": 6640 }, { "epoch": 1.5309510086455331, "grad_norm": 1.670385630744972, "learning_rate": 2.863207366827153e-07, "loss": 0.4298658072948456, "step": 6641 }, { "epoch": 1.5311815561959654, "grad_norm": 1.3877032472926891, "learning_rate": 2.8605379027209545e-07, "loss": 0.4172072410583496, "step": 6642 }, { "epoch": 1.5314121037463977, "grad_norm": 1.4575459808797373, "learning_rate": 2.8578694759043295e-07, "loss": 0.47554445266723633, "step": 6643 }, { "epoch": 1.53164265129683, "grad_norm": 1.6657810796732697, "learning_rate": 2.8552020867649704e-07, "loss": 0.39061158895492554, "step": 6644 }, { "epoch": 1.5318731988472623, "grad_norm": 2.1350395786045415, "learning_rate": 2.8525357356904243e-07, "loss": 0.5809512138366699, "step": 6645 }, { "epoch": 1.5321037463976945, "grad_norm": 1.6219135412965018, "learning_rate": 2.849870423068083e-07, "loss": 0.45639652013778687, "step": 6646 }, { "epoch": 1.5323342939481268, "grad_norm": 1.8136030445889209, "learning_rate": 2.847206149285184e-07, "loss": 0.49183255434036255, "step": 6647 }, { "epoch": 1.532564841498559, "grad_norm": 1.5868308524868744, "learning_rate": 2.844542914728822e-07, "loss": 0.497137188911438, "step": 6648 }, { "epoch": 1.5327953890489914, "grad_norm": 1.3089081966783787, "learning_rate": 2.8418807197859415e-07, "loss": 0.4407503008842468, "step": 6649 }, { "epoch": 1.5330259365994237, "grad_norm": 1.5201255778420197, "learning_rate": 2.839219564843326e-07, "loss": 0.5553654432296753, "step": 6650 }, { "epoch": 1.533256484149856, "grad_norm": 1.780260170294563, "learning_rate": 2.836559450287618e-07, "loss": 0.5073549151420593, "step": 6651 }, { "epoch": 1.5334870317002882, "grad_norm": 1.5664457648186267, "learning_rate": 2.8339003765053017e-07, "loss": 0.47718626260757446, "step": 6652 }, { "epoch": 1.5337175792507205, "grad_norm": 1.6245823258461864, "learning_rate": 2.831242343882709e-07, "loss": 0.5722469687461853, "step": 6653 }, { "epoch": 1.5339481268011528, "grad_norm": 1.6980020739907509, "learning_rate": 2.8285853528060334e-07, "loss": 0.46500977873802185, "step": 6654 }, { "epoch": 1.534178674351585, "grad_norm": 1.4448204582266613, "learning_rate": 2.8259294036613e-07, "loss": 0.40789636969566345, "step": 6655 }, { "epoch": 1.5344092219020173, "grad_norm": 1.833854839817425, "learning_rate": 2.8232744968343936e-07, "loss": 0.5013411045074463, "step": 6656 }, { "epoch": 1.5346397694524496, "grad_norm": 1.480266495626672, "learning_rate": 2.820620632711048e-07, "loss": 0.5018881559371948, "step": 6657 }, { "epoch": 1.534870317002882, "grad_norm": 1.9887255008924078, "learning_rate": 2.817967811676839e-07, "loss": 0.4205210208892822, "step": 6658 }, { "epoch": 1.5351008645533142, "grad_norm": 1.8583776706428177, "learning_rate": 2.815316034117193e-07, "loss": 0.4614740014076233, "step": 6659 }, { "epoch": 1.5353314121037465, "grad_norm": 1.814790283470267, "learning_rate": 2.812665300417384e-07, "loss": 0.528913676738739, "step": 6660 }, { "epoch": 1.5355619596541787, "grad_norm": 1.8558142199957448, "learning_rate": 2.8100156109625385e-07, "loss": 0.46981993317604065, "step": 6661 }, { "epoch": 1.535792507204611, "grad_norm": 1.6880680317462484, "learning_rate": 2.807366966137632e-07, "loss": 0.4568699598312378, "step": 6662 }, { "epoch": 1.5360230547550433, "grad_norm": 1.499127303889182, "learning_rate": 2.804719366327479e-07, "loss": 0.5659410953521729, "step": 6663 }, { "epoch": 1.5362536023054756, "grad_norm": 2.0231461298701707, "learning_rate": 2.802072811916754e-07, "loss": 0.36596113443374634, "step": 6664 }, { "epoch": 1.5364841498559079, "grad_norm": 1.6883253033702186, "learning_rate": 2.799427303289971e-07, "loss": 0.4764753580093384, "step": 6665 }, { "epoch": 1.5367146974063401, "grad_norm": 1.6695170645245285, "learning_rate": 2.796782840831491e-07, "loss": 0.5203686356544495, "step": 6666 }, { "epoch": 1.5369452449567724, "grad_norm": 1.4951683162021838, "learning_rate": 2.7941394249255336e-07, "loss": 0.42811352014541626, "step": 6667 }, { "epoch": 1.5371757925072047, "grad_norm": 1.5858028897704401, "learning_rate": 2.7914970559561546e-07, "loss": 0.5146535038948059, "step": 6668 }, { "epoch": 1.537406340057637, "grad_norm": 1.919094576999998, "learning_rate": 2.788855734307264e-07, "loss": 0.45644527673721313, "step": 6669 }, { "epoch": 1.5376368876080693, "grad_norm": 1.4614837709110287, "learning_rate": 2.786215460362622e-07, "loss": 0.4498692750930786, "step": 6670 }, { "epoch": 1.5378674351585016, "grad_norm": 1.7804653724848765, "learning_rate": 2.783576234505831e-07, "loss": 0.5310048460960388, "step": 6671 }, { "epoch": 1.5380979827089338, "grad_norm": 1.96879936627813, "learning_rate": 2.780938057120339e-07, "loss": 0.5187146663665771, "step": 6672 }, { "epoch": 1.5383285302593661, "grad_norm": 1.5356727607037908, "learning_rate": 2.778300928589451e-07, "loss": 0.4811710715293884, "step": 6673 }, { "epoch": 1.5385590778097984, "grad_norm": 1.666562643230331, "learning_rate": 2.7756648492963096e-07, "loss": 0.49236875772476196, "step": 6674 }, { "epoch": 1.5387896253602307, "grad_norm": 1.3329310825608907, "learning_rate": 2.7730298196239157e-07, "loss": 0.36981940269470215, "step": 6675 }, { "epoch": 1.539020172910663, "grad_norm": 1.597840023115702, "learning_rate": 2.7703958399551054e-07, "loss": 0.47676587104797363, "step": 6676 }, { "epoch": 1.5392507204610952, "grad_norm": 1.9450827633385033, "learning_rate": 2.767762910672574e-07, "loss": 0.4283655285835266, "step": 6677 }, { "epoch": 1.5394812680115275, "grad_norm": 1.546272242244294, "learning_rate": 2.7651310321588573e-07, "loss": 0.4599767327308655, "step": 6678 }, { "epoch": 1.5397118155619598, "grad_norm": 1.5217395243560299, "learning_rate": 2.7625002047963343e-07, "loss": 0.44924429059028625, "step": 6679 }, { "epoch": 1.539942363112392, "grad_norm": 1.435567998485271, "learning_rate": 2.7598704289672423e-07, "loss": 0.46689385175704956, "step": 6680 }, { "epoch": 1.5401729106628244, "grad_norm": 1.7978574318643632, "learning_rate": 2.7572417050536624e-07, "loss": 0.41301921010017395, "step": 6681 }, { "epoch": 1.5404034582132566, "grad_norm": 1.3658980896134711, "learning_rate": 2.7546140334375145e-07, "loss": 0.4051539897918701, "step": 6682 }, { "epoch": 1.540634005763689, "grad_norm": 1.7841621413207764, "learning_rate": 2.7519874145005784e-07, "loss": 0.5061618089675903, "step": 6683 }, { "epoch": 1.5408645533141212, "grad_norm": 1.8580808389371584, "learning_rate": 2.7493618486244707e-07, "loss": 0.5225817561149597, "step": 6684 }, { "epoch": 1.5410951008645535, "grad_norm": 1.6859040287421543, "learning_rate": 2.746737336190658e-07, "loss": 0.4648950695991516, "step": 6685 }, { "epoch": 1.5413256484149855, "grad_norm": 1.860078359778471, "learning_rate": 2.744113877580457e-07, "loss": 0.43647703528404236, "step": 6686 }, { "epoch": 1.5415561959654178, "grad_norm": 1.4941624249640828, "learning_rate": 2.741491473175027e-07, "loss": 0.521142840385437, "step": 6687 }, { "epoch": 1.54178674351585, "grad_norm": 1.532927468541072, "learning_rate": 2.73887012335538e-07, "loss": 0.4534454941749573, "step": 6688 }, { "epoch": 1.5420172910662824, "grad_norm": 1.4675733231304722, "learning_rate": 2.736249828502364e-07, "loss": 0.46668314933776855, "step": 6689 }, { "epoch": 1.5422478386167147, "grad_norm": 1.5326573954944585, "learning_rate": 2.7336305889966883e-07, "loss": 0.4454139471054077, "step": 6690 }, { "epoch": 1.542478386167147, "grad_norm": 1.7336061149384574, "learning_rate": 2.7310124052188974e-07, "loss": 0.5094617605209351, "step": 6691 }, { "epoch": 1.5427089337175792, "grad_norm": 1.5785750525116626, "learning_rate": 2.7283952775493837e-07, "loss": 0.45893144607543945, "step": 6692 }, { "epoch": 1.5429394812680115, "grad_norm": 1.7402929016413005, "learning_rate": 2.72577920636839e-07, "loss": 0.5504060983657837, "step": 6693 }, { "epoch": 1.5431700288184438, "grad_norm": 1.6147246247988525, "learning_rate": 2.72316419205601e-07, "loss": 0.45066389441490173, "step": 6694 }, { "epoch": 1.543400576368876, "grad_norm": 1.7322862041204292, "learning_rate": 2.7205502349921693e-07, "loss": 0.5064136385917664, "step": 6695 }, { "epoch": 1.5436311239193083, "grad_norm": 1.552736874900683, "learning_rate": 2.717937335556656e-07, "loss": 0.4684542715549469, "step": 6696 }, { "epoch": 1.5438616714697406, "grad_norm": 1.4915294954935223, "learning_rate": 2.715325494129095e-07, "loss": 0.5206316709518433, "step": 6697 }, { "epoch": 1.544092219020173, "grad_norm": 1.4864642883898989, "learning_rate": 2.7127147110889546e-07, "loss": 0.4287317097187042, "step": 6698 }, { "epoch": 1.5443227665706052, "grad_norm": 2.0644796323853956, "learning_rate": 2.710104986815562e-07, "loss": 0.5084264278411865, "step": 6699 }, { "epoch": 1.5445533141210375, "grad_norm": 2.1288340424993213, "learning_rate": 2.7074963216880763e-07, "loss": 0.5043609142303467, "step": 6700 }, { "epoch": 1.5447838616714697, "grad_norm": 1.59845836991449, "learning_rate": 2.7048887160855126e-07, "loss": 0.47983455657958984, "step": 6701 }, { "epoch": 1.545014409221902, "grad_norm": 1.5964312039079533, "learning_rate": 2.7022821703867324e-07, "loss": 0.46194732189178467, "step": 6702 }, { "epoch": 1.5452449567723343, "grad_norm": 1.6469503223160236, "learning_rate": 2.699676684970437e-07, "loss": 0.45597344636917114, "step": 6703 }, { "epoch": 1.5454755043227666, "grad_norm": 1.6935530011692812, "learning_rate": 2.697072260215174e-07, "loss": 0.4783972501754761, "step": 6704 }, { "epoch": 1.5457060518731989, "grad_norm": 1.380249835342775, "learning_rate": 2.694468896499338e-07, "loss": 0.38894015550613403, "step": 6705 }, { "epoch": 1.5459365994236312, "grad_norm": 1.3839758510594902, "learning_rate": 2.691866594201173e-07, "loss": 0.4892553389072418, "step": 6706 }, { "epoch": 1.5461671469740634, "grad_norm": 1.566000483112353, "learning_rate": 2.689265353698771e-07, "loss": 0.5352126359939575, "step": 6707 }, { "epoch": 1.5463976945244957, "grad_norm": 1.5309860118301892, "learning_rate": 2.6866651753700576e-07, "loss": 0.4805898666381836, "step": 6708 }, { "epoch": 1.546628242074928, "grad_norm": 1.5493878656820466, "learning_rate": 2.684066059592818e-07, "loss": 0.45238590240478516, "step": 6709 }, { "epoch": 1.5468587896253603, "grad_norm": 1.8917787793535266, "learning_rate": 2.6814680067446736e-07, "loss": 0.34512025117874146, "step": 6710 }, { "epoch": 1.5470893371757926, "grad_norm": 1.485168346068275, "learning_rate": 2.6788710172030916e-07, "loss": 0.5297055244445801, "step": 6711 }, { "epoch": 1.5473198847262248, "grad_norm": 1.374095554873191, "learning_rate": 2.6762750913453947e-07, "loss": 0.4209028482437134, "step": 6712 }, { "epoch": 1.547550432276657, "grad_norm": 1.7935969299479495, "learning_rate": 2.673680229548736e-07, "loss": 0.4791908264160156, "step": 6713 }, { "epoch": 1.5477809798270892, "grad_norm": 1.90306719497615, "learning_rate": 2.671086432190125e-07, "loss": 0.4767991602420807, "step": 6714 }, { "epoch": 1.5480115273775215, "grad_norm": 1.7090911972846048, "learning_rate": 2.668493699646418e-07, "loss": 0.4787396192550659, "step": 6715 }, { "epoch": 1.5482420749279537, "grad_norm": 1.6003570038579618, "learning_rate": 2.6659020322943084e-07, "loss": 0.42476657032966614, "step": 6716 }, { "epoch": 1.548472622478386, "grad_norm": 1.7618849900264748, "learning_rate": 2.6633114305103357e-07, "loss": 0.5486623048782349, "step": 6717 }, { "epoch": 1.5487031700288183, "grad_norm": 1.7089266434509598, "learning_rate": 2.6607218946708933e-07, "loss": 0.4897763431072235, "step": 6718 }, { "epoch": 1.5489337175792506, "grad_norm": 1.530185410258912, "learning_rate": 2.6581334251522057e-07, "loss": 0.5206410884857178, "step": 6719 }, { "epoch": 1.5491642651296829, "grad_norm": 1.585267194715654, "learning_rate": 2.6555460223303603e-07, "loss": 0.5253853797912598, "step": 6720 }, { "epoch": 1.5493948126801151, "grad_norm": 1.5285645352455675, "learning_rate": 2.652959686581272e-07, "loss": 0.4229533076286316, "step": 6721 }, { "epoch": 1.5496253602305474, "grad_norm": 1.465857605111891, "learning_rate": 2.650374418280714e-07, "loss": 0.38957664370536804, "step": 6722 }, { "epoch": 1.5498559077809797, "grad_norm": 1.7558356792496363, "learning_rate": 2.6477902178042965e-07, "loss": 0.49704110622406006, "step": 6723 }, { "epoch": 1.550086455331412, "grad_norm": 1.5616300844187965, "learning_rate": 2.6452070855274735e-07, "loss": 0.4891868531703949, "step": 6724 }, { "epoch": 1.5503170028818443, "grad_norm": 1.716364048095666, "learning_rate": 2.6426250218255506e-07, "loss": 0.4441138505935669, "step": 6725 }, { "epoch": 1.5505475504322765, "grad_norm": 1.7565337255811777, "learning_rate": 2.6400440270736776e-07, "loss": 0.3671753704547882, "step": 6726 }, { "epoch": 1.5507780979827088, "grad_norm": 1.6232570381426292, "learning_rate": 2.6374641016468413e-07, "loss": 0.46538716554641724, "step": 6727 }, { "epoch": 1.551008645533141, "grad_norm": 1.3516189453331386, "learning_rate": 2.6348852459198855e-07, "loss": 0.45591798424720764, "step": 6728 }, { "epoch": 1.5512391930835734, "grad_norm": 1.5308684864062825, "learning_rate": 2.63230746026748e-07, "loss": 0.438511461019516, "step": 6729 }, { "epoch": 1.5514697406340057, "grad_norm": 1.8201832016490442, "learning_rate": 2.629730745064156e-07, "loss": 0.463054358959198, "step": 6730 }, { "epoch": 1.551700288184438, "grad_norm": 1.5514099053161647, "learning_rate": 2.6271551006842865e-07, "loss": 0.4669750928878784, "step": 6731 }, { "epoch": 1.5519308357348702, "grad_norm": 1.7191439883700805, "learning_rate": 2.6245805275020783e-07, "loss": 0.399213969707489, "step": 6732 }, { "epoch": 1.5521613832853025, "grad_norm": 1.8508399260110915, "learning_rate": 2.622007025891598e-07, "loss": 0.47209489345550537, "step": 6733 }, { "epoch": 1.5523919308357348, "grad_norm": 1.8455257948540806, "learning_rate": 2.619434596226746e-07, "loss": 0.4839526414871216, "step": 6734 }, { "epoch": 1.552622478386167, "grad_norm": 1.6023080770873257, "learning_rate": 2.616863238881266e-07, "loss": 0.47192198038101196, "step": 6735 }, { "epoch": 1.5528530259365994, "grad_norm": 1.8657326968382677, "learning_rate": 2.614292954228754e-07, "loss": 0.389009028673172, "step": 6736 }, { "epoch": 1.5530835734870316, "grad_norm": 1.8766723343050034, "learning_rate": 2.611723742642641e-07, "loss": 0.47060155868530273, "step": 6737 }, { "epoch": 1.553314121037464, "grad_norm": 1.4040566740202909, "learning_rate": 2.6091556044962094e-07, "loss": 0.4835396409034729, "step": 6738 }, { "epoch": 1.5535446685878962, "grad_norm": 1.6935927742553605, "learning_rate": 2.6065885401625867e-07, "loss": 0.5195285677909851, "step": 6739 }, { "epoch": 1.5537752161383285, "grad_norm": 2.006654013641868, "learning_rate": 2.6040225500147363e-07, "loss": 0.48561543226242065, "step": 6740 }, { "epoch": 1.5540057636887608, "grad_norm": 1.6144400066706488, "learning_rate": 2.601457634425471e-07, "loss": 0.4577465355396271, "step": 6741 }, { "epoch": 1.554236311239193, "grad_norm": 1.364151207746382, "learning_rate": 2.5988937937674427e-07, "loss": 0.3737722635269165, "step": 6742 }, { "epoch": 1.5544668587896253, "grad_norm": 1.815041454227608, "learning_rate": 2.5963310284131545e-07, "loss": 0.4451100528240204, "step": 6743 }, { "epoch": 1.5546974063400576, "grad_norm": 1.4908238286019466, "learning_rate": 2.5937693387349513e-07, "loss": 0.3610233664512634, "step": 6744 }, { "epoch": 1.5549279538904899, "grad_norm": 1.5670821566769393, "learning_rate": 2.591208725105015e-07, "loss": 0.43974393606185913, "step": 6745 }, { "epoch": 1.5551585014409222, "grad_norm": 2.0006014338762346, "learning_rate": 2.588649187895382e-07, "loss": 0.5391696095466614, "step": 6746 }, { "epoch": 1.5553890489913544, "grad_norm": 1.8335182104585823, "learning_rate": 2.586090727477923e-07, "loss": 0.49853283166885376, "step": 6747 }, { "epoch": 1.5556195965417867, "grad_norm": 1.8206435100176481, "learning_rate": 2.5835333442243524e-07, "loss": 0.4586958885192871, "step": 6748 }, { "epoch": 1.555850144092219, "grad_norm": 2.0804734095027477, "learning_rate": 2.580977038506239e-07, "loss": 0.4456654489040375, "step": 6749 }, { "epoch": 1.5560806916426513, "grad_norm": 1.730782017536939, "learning_rate": 2.5784218106949795e-07, "loss": 0.39961719512939453, "step": 6750 }, { "epoch": 1.5563112391930836, "grad_norm": 1.8389730814722773, "learning_rate": 2.5758676611618257e-07, "loss": 0.460401713848114, "step": 6751 }, { "epoch": 1.5565417867435158, "grad_norm": 1.726236854490963, "learning_rate": 2.5733145902778733e-07, "loss": 0.4925374388694763, "step": 6752 }, { "epoch": 1.5567723342939481, "grad_norm": 1.5901382869844622, "learning_rate": 2.570762598414051e-07, "loss": 0.42615199089050293, "step": 6753 }, { "epoch": 1.5570028818443804, "grad_norm": 1.6475869590160572, "learning_rate": 2.568211685941136e-07, "loss": 0.47983551025390625, "step": 6754 }, { "epoch": 1.5572334293948127, "grad_norm": 1.4379677339689119, "learning_rate": 2.5656618532297547e-07, "loss": 0.44416266679763794, "step": 6755 }, { "epoch": 1.557463976945245, "grad_norm": 1.8167037891496292, "learning_rate": 2.563113100650366e-07, "loss": 0.47325876355171204, "step": 6756 }, { "epoch": 1.5576945244956772, "grad_norm": 1.4587384999344424, "learning_rate": 2.5605654285732814e-07, "loss": 0.4115426242351532, "step": 6757 }, { "epoch": 1.5579250720461095, "grad_norm": 1.7770718135646975, "learning_rate": 2.558018837368646e-07, "loss": 0.46395576000213623, "step": 6758 }, { "epoch": 1.5581556195965418, "grad_norm": 1.3519334824438696, "learning_rate": 2.5554733274064597e-07, "loss": 0.41246697306632996, "step": 6759 }, { "epoch": 1.558386167146974, "grad_norm": 1.7600589242580773, "learning_rate": 2.5529288990565557e-07, "loss": 0.5139991044998169, "step": 6760 }, { "epoch": 1.5586167146974064, "grad_norm": 1.6093969605391563, "learning_rate": 2.5503855526886084e-07, "loss": 0.4346150755882263, "step": 6761 }, { "epoch": 1.5588472622478386, "grad_norm": 1.6307147869055905, "learning_rate": 2.5478432886721434e-07, "loss": 0.44862866401672363, "step": 6762 }, { "epoch": 1.559077809798271, "grad_norm": 1.422425939609917, "learning_rate": 2.545302107376529e-07, "loss": 0.41274869441986084, "step": 6763 }, { "epoch": 1.5593083573487032, "grad_norm": 1.7395975622996638, "learning_rate": 2.5427620091709645e-07, "loss": 0.5566954612731934, "step": 6764 }, { "epoch": 1.5595389048991355, "grad_norm": 2.028516666606984, "learning_rate": 2.540222994424508e-07, "loss": 0.5888369083404541, "step": 6765 }, { "epoch": 1.5597694524495678, "grad_norm": 1.7585045308287393, "learning_rate": 2.537685063506048e-07, "loss": 0.4688549339771271, "step": 6766 }, { "epoch": 1.56, "grad_norm": 1.9492383318424087, "learning_rate": 2.5351482167843153e-07, "loss": 0.4225703477859497, "step": 6767 }, { "epoch": 1.5602305475504323, "grad_norm": 1.6959394085598527, "learning_rate": 2.5326124546278947e-07, "loss": 0.47270894050598145, "step": 6768 }, { "epoch": 1.5604610951008646, "grad_norm": 1.8877771671569992, "learning_rate": 2.530077777405201e-07, "loss": 0.5117524266242981, "step": 6769 }, { "epoch": 1.560691642651297, "grad_norm": 1.452301579537498, "learning_rate": 2.5275441854844967e-07, "loss": 0.4925321638584137, "step": 6770 }, { "epoch": 1.5609221902017292, "grad_norm": 1.5144358135947686, "learning_rate": 2.5250116792338917e-07, "loss": 0.3935622572898865, "step": 6771 }, { "epoch": 1.5611527377521615, "grad_norm": 1.6339047025884095, "learning_rate": 2.522480259021329e-07, "loss": 0.43235844373703003, "step": 6772 }, { "epoch": 1.5613832853025937, "grad_norm": 1.8401079537023814, "learning_rate": 2.519949925214597e-07, "loss": 0.578273594379425, "step": 6773 }, { "epoch": 1.561613832853026, "grad_norm": 1.7857356240818296, "learning_rate": 2.5174206781813243e-07, "loss": 0.4800739288330078, "step": 6774 }, { "epoch": 1.5618443804034583, "grad_norm": 1.4278838200119695, "learning_rate": 2.514892518288988e-07, "loss": 0.4485281705856323, "step": 6775 }, { "epoch": 1.5620749279538906, "grad_norm": 1.7437958447826178, "learning_rate": 2.5123654459049057e-07, "loss": 0.5072147846221924, "step": 6776 }, { "epoch": 1.5623054755043229, "grad_norm": 1.8975658070712482, "learning_rate": 2.509839461396229e-07, "loss": 0.4516841471195221, "step": 6777 }, { "epoch": 1.5625360230547551, "grad_norm": 1.7740833513311727, "learning_rate": 2.507314565129962e-07, "loss": 0.5076330900192261, "step": 6778 }, { "epoch": 1.5627665706051874, "grad_norm": 1.6281423676668454, "learning_rate": 2.5047907574729443e-07, "loss": 0.5011087656021118, "step": 6779 }, { "epoch": 1.5629971181556197, "grad_norm": 1.4979953019445997, "learning_rate": 2.502268038791856e-07, "loss": 0.3949703574180603, "step": 6780 }, { "epoch": 1.563227665706052, "grad_norm": 1.624938698617509, "learning_rate": 2.499746409453227e-07, "loss": 0.39678525924682617, "step": 6781 }, { "epoch": 1.5634582132564843, "grad_norm": 1.6812991120579335, "learning_rate": 2.4972258698234185e-07, "loss": 0.44274720549583435, "step": 6782 }, { "epoch": 1.5636887608069165, "grad_norm": 1.4348522477076457, "learning_rate": 2.494706420268641e-07, "loss": 0.5082550048828125, "step": 6783 }, { "epoch": 1.5639193083573488, "grad_norm": 1.3908999166270042, "learning_rate": 2.492188061154946e-07, "loss": 0.44391587376594543, "step": 6784 }, { "epoch": 1.564149855907781, "grad_norm": 1.7005169151282067, "learning_rate": 2.4896707928482254e-07, "loss": 0.471325159072876, "step": 6785 }, { "epoch": 1.5643804034582134, "grad_norm": 1.6719246286514318, "learning_rate": 2.48715461571421e-07, "loss": 0.4599985182285309, "step": 6786 }, { "epoch": 1.5646109510086457, "grad_norm": 1.7250479291501297, "learning_rate": 2.4846395301184706e-07, "loss": 0.5035426616668701, "step": 6787 }, { "epoch": 1.564841498559078, "grad_norm": 1.752363888581634, "learning_rate": 2.482125536426427e-07, "loss": 0.4742690920829773, "step": 6788 }, { "epoch": 1.5650720461095102, "grad_norm": 1.387253350450147, "learning_rate": 2.47961263500334e-07, "loss": 0.4548560380935669, "step": 6789 }, { "epoch": 1.5653025936599425, "grad_norm": 1.7361892806040864, "learning_rate": 2.4771008262143003e-07, "loss": 0.42407310009002686, "step": 6790 }, { "epoch": 1.5655331412103748, "grad_norm": 1.7554110781883456, "learning_rate": 2.4745901104242537e-07, "loss": 0.5894111394882202, "step": 6791 }, { "epoch": 1.565763688760807, "grad_norm": 1.5368093751158929, "learning_rate": 2.4720804879979796e-07, "loss": 0.4132724702358246, "step": 6792 }, { "epoch": 1.5659942363112394, "grad_norm": 1.682979728804869, "learning_rate": 2.4695719593000964e-07, "loss": 0.5049244165420532, "step": 6793 }, { "epoch": 1.5662247838616716, "grad_norm": 2.3775113241805315, "learning_rate": 2.4670645246950725e-07, "loss": 0.5342719554901123, "step": 6794 }, { "epoch": 1.566455331412104, "grad_norm": 1.7576636684029612, "learning_rate": 2.4645581845472077e-07, "loss": 0.5695189237594604, "step": 6795 }, { "epoch": 1.566685878962536, "grad_norm": 1.589967543943453, "learning_rate": 2.4620529392206477e-07, "loss": 0.44183048605918884, "step": 6796 }, { "epoch": 1.5669164265129683, "grad_norm": 1.6197990017545385, "learning_rate": 2.4595487890793834e-07, "loss": 0.46745526790618896, "step": 6797 }, { "epoch": 1.5671469740634005, "grad_norm": 1.8670350554799435, "learning_rate": 2.4570457344872386e-07, "loss": 0.4594680666923523, "step": 6798 }, { "epoch": 1.5673775216138328, "grad_norm": 1.9718361980202608, "learning_rate": 2.454543775807877e-07, "loss": 0.48752421140670776, "step": 6799 }, { "epoch": 1.567608069164265, "grad_norm": 1.4241399512515274, "learning_rate": 2.4520429134048146e-07, "loss": 0.4049089252948761, "step": 6800 }, { "epoch": 1.5678386167146974, "grad_norm": 1.593383566073588, "learning_rate": 2.449543147641394e-07, "loss": 0.5329450368881226, "step": 6801 }, { "epoch": 1.5680691642651297, "grad_norm": 1.6931578369576277, "learning_rate": 2.4470444788808106e-07, "loss": 0.48651859164237976, "step": 6802 }, { "epoch": 1.568299711815562, "grad_norm": 1.5393896989762188, "learning_rate": 2.44454690748609e-07, "loss": 0.44224095344543457, "step": 6803 }, { "epoch": 1.5685302593659942, "grad_norm": 1.5599638316816633, "learning_rate": 2.4420504338201096e-07, "loss": 0.4946790337562561, "step": 6804 }, { "epoch": 1.5687608069164265, "grad_norm": 1.4168062301601407, "learning_rate": 2.439555058245577e-07, "loss": 0.41671523451805115, "step": 6805 }, { "epoch": 1.5689913544668588, "grad_norm": 1.560840380576806, "learning_rate": 2.437060781125041e-07, "loss": 0.4457974433898926, "step": 6806 }, { "epoch": 1.569221902017291, "grad_norm": 1.7545615802633734, "learning_rate": 2.4345676028208985e-07, "loss": 0.48806965351104736, "step": 6807 }, { "epoch": 1.5694524495677233, "grad_norm": 1.3837750693537794, "learning_rate": 2.432075523695385e-07, "loss": 0.4820772409439087, "step": 6808 }, { "epoch": 1.5696829971181556, "grad_norm": 1.4951609195898863, "learning_rate": 2.429584544110567e-07, "loss": 0.4809981882572174, "step": 6809 }, { "epoch": 1.569913544668588, "grad_norm": 1.766097670042948, "learning_rate": 2.427094664428364e-07, "loss": 0.45282435417175293, "step": 6810 }, { "epoch": 1.5701440922190202, "grad_norm": 1.8589225856452647, "learning_rate": 2.424605885010527e-07, "loss": 0.5170393586158752, "step": 6811 }, { "epoch": 1.5703746397694525, "grad_norm": 1.4134732880067309, "learning_rate": 2.422118206218646e-07, "loss": 0.388106107711792, "step": 6812 }, { "epoch": 1.5706051873198847, "grad_norm": 1.63584303547554, "learning_rate": 2.419631628414163e-07, "loss": 0.44863706827163696, "step": 6813 }, { "epoch": 1.570835734870317, "grad_norm": 1.4107779406078444, "learning_rate": 2.4171461519583425e-07, "loss": 0.4372791051864624, "step": 6814 }, { "epoch": 1.5710662824207493, "grad_norm": 1.29268864966395, "learning_rate": 2.4146617772123046e-07, "loss": 0.4813999533653259, "step": 6815 }, { "epoch": 1.5712968299711816, "grad_norm": 1.5420414623437797, "learning_rate": 2.4121785045370046e-07, "loss": 0.4367016553878784, "step": 6816 }, { "epoch": 1.5715273775216139, "grad_norm": 1.6434220509981157, "learning_rate": 2.409696334293233e-07, "loss": 0.42757725715637207, "step": 6817 }, { "epoch": 1.5717579250720461, "grad_norm": 1.6577819897791792, "learning_rate": 2.4072152668416236e-07, "loss": 0.4397827386856079, "step": 6818 }, { "epoch": 1.5719884726224784, "grad_norm": 1.5756653213645186, "learning_rate": 2.4047353025426476e-07, "loss": 0.45200103521347046, "step": 6819 }, { "epoch": 1.5722190201729107, "grad_norm": 1.6542457562292414, "learning_rate": 2.4022564417566193e-07, "loss": 0.45443668961524963, "step": 6820 }, { "epoch": 1.572449567723343, "grad_norm": 1.6997348607365212, "learning_rate": 2.3997786848436965e-07, "loss": 0.516747236251831, "step": 6821 }, { "epoch": 1.5726801152737753, "grad_norm": 1.6365599277185443, "learning_rate": 2.3973020321638625e-07, "loss": 0.44745802879333496, "step": 6822 }, { "epoch": 1.5729106628242073, "grad_norm": 2.158839604427667, "learning_rate": 2.3948264840769585e-07, "loss": 0.5438615679740906, "step": 6823 }, { "epoch": 1.5731412103746396, "grad_norm": 1.643481401912203, "learning_rate": 2.39235204094265e-07, "loss": 0.4899371862411499, "step": 6824 }, { "epoch": 1.573371757925072, "grad_norm": 1.55622048106585, "learning_rate": 2.389878703120447e-07, "loss": 0.4395183324813843, "step": 6825 }, { "epoch": 1.5736023054755042, "grad_norm": 1.4934042205985354, "learning_rate": 2.387406470969704e-07, "loss": 0.479698121547699, "step": 6826 }, { "epoch": 1.5738328530259365, "grad_norm": 1.9307758017589527, "learning_rate": 2.384935344849607e-07, "loss": 0.6000853776931763, "step": 6827 }, { "epoch": 1.5740634005763687, "grad_norm": 1.3692449284024986, "learning_rate": 2.382465325119185e-07, "loss": 0.41117769479751587, "step": 6828 }, { "epoch": 1.574293948126801, "grad_norm": 1.9113886128584991, "learning_rate": 2.3799964121373117e-07, "loss": 0.5113134980201721, "step": 6829 }, { "epoch": 1.5745244956772333, "grad_norm": 1.4251842959330887, "learning_rate": 2.3775286062626897e-07, "loss": 0.49158281087875366, "step": 6830 }, { "epoch": 1.5747550432276656, "grad_norm": 1.780662153833107, "learning_rate": 2.375061907853866e-07, "loss": 0.5564873814582825, "step": 6831 }, { "epoch": 1.5749855907780979, "grad_norm": 1.9181105692541727, "learning_rate": 2.3725963172692244e-07, "loss": 0.4885261058807373, "step": 6832 }, { "epoch": 1.5752161383285301, "grad_norm": 1.5805066300147617, "learning_rate": 2.3701318348669908e-07, "loss": 0.4490503668785095, "step": 6833 }, { "epoch": 1.5754466858789624, "grad_norm": 1.4499272850949159, "learning_rate": 2.3676684610052334e-07, "loss": 0.45185232162475586, "step": 6834 }, { "epoch": 1.5756772334293947, "grad_norm": 1.995751623566264, "learning_rate": 2.365206196041848e-07, "loss": 0.5666052103042603, "step": 6835 }, { "epoch": 1.575907780979827, "grad_norm": 1.5643028039542144, "learning_rate": 2.3627450403345816e-07, "loss": 0.45692935585975647, "step": 6836 }, { "epoch": 1.5761383285302593, "grad_norm": 1.682140694304972, "learning_rate": 2.360284994241012e-07, "loss": 0.44901716709136963, "step": 6837 }, { "epoch": 1.5763688760806915, "grad_norm": 1.6290031908081577, "learning_rate": 2.357826058118555e-07, "loss": 0.5051450729370117, "step": 6838 }, { "epoch": 1.5765994236311238, "grad_norm": 1.8396132592321723, "learning_rate": 2.3553682323244762e-07, "loss": 0.43471968173980713, "step": 6839 }, { "epoch": 1.576829971181556, "grad_norm": 1.417260930252744, "learning_rate": 2.352911517215863e-07, "loss": 0.42540234327316284, "step": 6840 }, { "epoch": 1.5770605187319884, "grad_norm": 1.695717710695922, "learning_rate": 2.350455913149657e-07, "loss": 0.5011035799980164, "step": 6841 }, { "epoch": 1.5772910662824207, "grad_norm": 1.3387049067961254, "learning_rate": 2.3480014204826348e-07, "loss": 0.4265633225440979, "step": 6842 }, { "epoch": 1.577521613832853, "grad_norm": 1.8058533825125813, "learning_rate": 2.345548039571399e-07, "loss": 0.4330548346042633, "step": 6843 }, { "epoch": 1.5777521613832852, "grad_norm": 1.5588036812303396, "learning_rate": 2.3430957707724052e-07, "loss": 0.453426718711853, "step": 6844 }, { "epoch": 1.5779827089337175, "grad_norm": 1.5822406339462496, "learning_rate": 2.3406446144419446e-07, "loss": 0.4875522255897522, "step": 6845 }, { "epoch": 1.5782132564841498, "grad_norm": 1.8097072716346074, "learning_rate": 2.3381945709361416e-07, "loss": 0.41219890117645264, "step": 6846 }, { "epoch": 1.578443804034582, "grad_norm": 1.7446551101419503, "learning_rate": 2.3357456406109644e-07, "loss": 0.4699985980987549, "step": 6847 }, { "epoch": 1.5786743515850143, "grad_norm": 1.7989203387061783, "learning_rate": 2.3332978238222178e-07, "loss": 0.4073752164840698, "step": 6848 }, { "epoch": 1.5789048991354466, "grad_norm": 1.7603722349498907, "learning_rate": 2.3308511209255376e-07, "loss": 0.5328919887542725, "step": 6849 }, { "epoch": 1.579135446685879, "grad_norm": 1.9514955832292185, "learning_rate": 2.328405532276413e-07, "loss": 0.538253128528595, "step": 6850 }, { "epoch": 1.5793659942363112, "grad_norm": 1.5632567756625935, "learning_rate": 2.3259610582301558e-07, "loss": 0.44995903968811035, "step": 6851 }, { "epoch": 1.5795965417867435, "grad_norm": 1.7120874755790536, "learning_rate": 2.3235176991419247e-07, "loss": 0.4335440397262573, "step": 6852 }, { "epoch": 1.5798270893371757, "grad_norm": 1.6187630739158092, "learning_rate": 2.321075455366719e-07, "loss": 0.4642670154571533, "step": 6853 }, { "epoch": 1.580057636887608, "grad_norm": 1.8439898983263738, "learning_rate": 2.3186343272593656e-07, "loss": 0.4488638639450073, "step": 6854 }, { "epoch": 1.5802881844380403, "grad_norm": 1.585715016192241, "learning_rate": 2.3161943151745378e-07, "loss": 0.48309770226478577, "step": 6855 }, { "epoch": 1.5805187319884726, "grad_norm": 1.5603192455872934, "learning_rate": 2.313755419466741e-07, "loss": 0.44456416368484497, "step": 6856 }, { "epoch": 1.5807492795389049, "grad_norm": 1.599130287546092, "learning_rate": 2.3113176404903222e-07, "loss": 0.46257686614990234, "step": 6857 }, { "epoch": 1.5809798270893372, "grad_norm": 1.6327518009499344, "learning_rate": 2.308880978599469e-07, "loss": 0.3914128541946411, "step": 6858 }, { "epoch": 1.5812103746397694, "grad_norm": 2.107969712331713, "learning_rate": 2.3064454341481988e-07, "loss": 0.508151113986969, "step": 6859 }, { "epoch": 1.5814409221902017, "grad_norm": 1.5415023234803529, "learning_rate": 2.304011007490374e-07, "loss": 0.43072253465652466, "step": 6860 }, { "epoch": 1.581671469740634, "grad_norm": 1.5469316288580008, "learning_rate": 2.3015776989796909e-07, "loss": 0.432369589805603, "step": 6861 }, { "epoch": 1.5819020172910663, "grad_norm": 1.682956567076036, "learning_rate": 2.299145508969681e-07, "loss": 0.4282001554965973, "step": 6862 }, { "epoch": 1.5821325648414986, "grad_norm": 1.8586742904701485, "learning_rate": 2.2967144378137194e-07, "loss": 0.5486190319061279, "step": 6863 }, { "epoch": 1.5823631123919308, "grad_norm": 1.6360275945951068, "learning_rate": 2.2942844858650122e-07, "loss": 0.3602842092514038, "step": 6864 }, { "epoch": 1.5825936599423631, "grad_norm": 1.8002022611312485, "learning_rate": 2.2918556534766087e-07, "loss": 0.39400649070739746, "step": 6865 }, { "epoch": 1.5828242074927954, "grad_norm": 2.0187438455147397, "learning_rate": 2.289427941001395e-07, "loss": 0.5241566896438599, "step": 6866 }, { "epoch": 1.5830547550432277, "grad_norm": 1.9477429004752067, "learning_rate": 2.2870013487920902e-07, "loss": 0.4443414807319641, "step": 6867 }, { "epoch": 1.58328530259366, "grad_norm": 1.4339092748204783, "learning_rate": 2.2845758772012523e-07, "loss": 0.4845046401023865, "step": 6868 }, { "epoch": 1.5835158501440922, "grad_norm": 1.4451826716980998, "learning_rate": 2.2821515265812753e-07, "loss": 0.4631197452545166, "step": 6869 }, { "epoch": 1.5837463976945245, "grad_norm": 1.8130555774397925, "learning_rate": 2.2797282972843935e-07, "loss": 0.47642138600349426, "step": 6870 }, { "epoch": 1.5839769452449568, "grad_norm": 1.5693735960928867, "learning_rate": 2.2773061896626811e-07, "loss": 0.48786088824272156, "step": 6871 }, { "epoch": 1.584207492795389, "grad_norm": 1.656811061554273, "learning_rate": 2.2748852040680378e-07, "loss": 0.4556247591972351, "step": 6872 }, { "epoch": 1.5844380403458214, "grad_norm": 1.604595617408912, "learning_rate": 2.2724653408522155e-07, "loss": 0.4918019771575928, "step": 6873 }, { "epoch": 1.5846685878962536, "grad_norm": 1.2997559949157804, "learning_rate": 2.2700466003667917e-07, "loss": 0.4443207383155823, "step": 6874 }, { "epoch": 1.584899135446686, "grad_norm": 1.593924905912374, "learning_rate": 2.2676289829631802e-07, "loss": 0.42983290553092957, "step": 6875 }, { "epoch": 1.5851296829971182, "grad_norm": 1.5434495937585317, "learning_rate": 2.2652124889926417e-07, "loss": 0.414949893951416, "step": 6876 }, { "epoch": 1.5853602305475505, "grad_norm": 1.521196276312085, "learning_rate": 2.2627971188062622e-07, "loss": 0.39912670850753784, "step": 6877 }, { "epoch": 1.5855907780979828, "grad_norm": 1.3168327583899544, "learning_rate": 2.2603828727549734e-07, "loss": 0.4181273579597473, "step": 6878 }, { "epoch": 1.585821325648415, "grad_norm": 1.7553292134606597, "learning_rate": 2.2579697511895425e-07, "loss": 0.5589914321899414, "step": 6879 }, { "epoch": 1.5860518731988473, "grad_norm": 1.625959912645244, "learning_rate": 2.2555577544605686e-07, "loss": 0.4346531629562378, "step": 6880 }, { "epoch": 1.5862824207492796, "grad_norm": 1.5868469446711888, "learning_rate": 2.2531468829184852e-07, "loss": 0.4413943290710449, "step": 6881 }, { "epoch": 1.586512968299712, "grad_norm": 1.4600697465130843, "learning_rate": 2.250737136913574e-07, "loss": 0.45540279150009155, "step": 6882 }, { "epoch": 1.5867435158501442, "grad_norm": 1.6137315194829938, "learning_rate": 2.24832851679594e-07, "loss": 0.4250563979148865, "step": 6883 }, { "epoch": 1.5869740634005765, "grad_norm": 1.4798874934990478, "learning_rate": 2.2459210229155356e-07, "loss": 0.44773417711257935, "step": 6884 }, { "epoch": 1.5872046109510087, "grad_norm": 1.4993813947174892, "learning_rate": 2.2435146556221408e-07, "loss": 0.4882596731185913, "step": 6885 }, { "epoch": 1.587435158501441, "grad_norm": 1.5587737674283808, "learning_rate": 2.2411094152653798e-07, "loss": 0.4748955965042114, "step": 6886 }, { "epoch": 1.5876657060518733, "grad_norm": 1.6227574167605214, "learning_rate": 2.2387053021947065e-07, "loss": 0.3629099130630493, "step": 6887 }, { "epoch": 1.5878962536023056, "grad_norm": 1.6993060193268632, "learning_rate": 2.236302316759411e-07, "loss": 0.4068647623062134, "step": 6888 }, { "epoch": 1.5881268011527379, "grad_norm": 1.4233201625460687, "learning_rate": 2.2339004593086252e-07, "loss": 0.44618022441864014, "step": 6889 }, { "epoch": 1.5883573487031701, "grad_norm": 1.8263823863304054, "learning_rate": 2.2314997301913153e-07, "loss": 0.46568962931632996, "step": 6890 }, { "epoch": 1.5885878962536024, "grad_norm": 1.8982767649260424, "learning_rate": 2.2291001297562784e-07, "loss": 0.5379288792610168, "step": 6891 }, { "epoch": 1.5888184438040347, "grad_norm": 1.9640546549928781, "learning_rate": 2.2267016583521558e-07, "loss": 0.4629078805446625, "step": 6892 }, { "epoch": 1.589048991354467, "grad_norm": 1.489161454120763, "learning_rate": 2.2243043163274189e-07, "loss": 0.4108186662197113, "step": 6893 }, { "epoch": 1.5892795389048993, "grad_norm": 1.8632335546890277, "learning_rate": 2.2219081040303734e-07, "loss": 0.4663807153701782, "step": 6894 }, { "epoch": 1.5895100864553315, "grad_norm": 1.8071280855278253, "learning_rate": 2.2195130218091685e-07, "loss": 0.42701542377471924, "step": 6895 }, { "epoch": 1.5897406340057638, "grad_norm": 1.8417473897562753, "learning_rate": 2.2171190700117804e-07, "loss": 0.5210022926330566, "step": 6896 }, { "epoch": 1.589971181556196, "grad_norm": 1.8057710310189852, "learning_rate": 2.2147262489860275e-07, "loss": 0.41991421580314636, "step": 6897 }, { "epoch": 1.5902017291066284, "grad_norm": 1.7840543361838392, "learning_rate": 2.212334559079564e-07, "loss": 0.46946293115615845, "step": 6898 }, { "epoch": 1.5904322766570607, "grad_norm": 1.7012515976823714, "learning_rate": 2.2099440006398772e-07, "loss": 0.35496097803115845, "step": 6899 }, { "epoch": 1.590662824207493, "grad_norm": 1.518063979509112, "learning_rate": 2.2075545740142875e-07, "loss": 0.4571149945259094, "step": 6900 }, { "epoch": 1.5908933717579252, "grad_norm": 1.4946182440732432, "learning_rate": 2.2051662795499525e-07, "loss": 0.4564734399318695, "step": 6901 }, { "epoch": 1.5911239193083575, "grad_norm": 1.4348743515049034, "learning_rate": 2.2027791175938693e-07, "loss": 0.4557456076145172, "step": 6902 }, { "epoch": 1.5913544668587898, "grad_norm": 1.4908849547027518, "learning_rate": 2.2003930884928702e-07, "loss": 0.4800085723400116, "step": 6903 }, { "epoch": 1.591585014409222, "grad_norm": 1.5940599632244288, "learning_rate": 2.1980081925936144e-07, "loss": 0.45482996106147766, "step": 6904 }, { "epoch": 1.5918155619596541, "grad_norm": 1.7435440666868574, "learning_rate": 2.1956244302426097e-07, "loss": 0.478973925113678, "step": 6905 }, { "epoch": 1.5920461095100864, "grad_norm": 1.3952796040058404, "learning_rate": 2.1932418017861863e-07, "loss": 0.4436086416244507, "step": 6906 }, { "epoch": 1.5922766570605187, "grad_norm": 1.7126522646300093, "learning_rate": 2.1908603075705156e-07, "loss": 0.4582422375679016, "step": 6907 }, { "epoch": 1.592507204610951, "grad_norm": 1.630225683771869, "learning_rate": 2.188479947941607e-07, "loss": 0.5626469850540161, "step": 6908 }, { "epoch": 1.5927377521613832, "grad_norm": 1.6154153830226072, "learning_rate": 2.186100723245299e-07, "loss": 0.49945351481437683, "step": 6909 }, { "epoch": 1.5929682997118155, "grad_norm": 1.317835601373723, "learning_rate": 2.1837226338272685e-07, "loss": 0.4216611683368683, "step": 6910 }, { "epoch": 1.5931988472622478, "grad_norm": 1.6644970218843584, "learning_rate": 2.181345680033031e-07, "loss": 0.461073100566864, "step": 6911 }, { "epoch": 1.59342939481268, "grad_norm": 1.656443059857395, "learning_rate": 2.178969862207931e-07, "loss": 0.5296883583068848, "step": 6912 }, { "epoch": 1.5936599423631124, "grad_norm": 1.5254712888429038, "learning_rate": 2.1765951806971484e-07, "loss": 0.4975186586380005, "step": 6913 }, { "epoch": 1.5938904899135447, "grad_norm": 1.6364108447848835, "learning_rate": 2.174221635845699e-07, "loss": 0.4512255787849426, "step": 6914 }, { "epoch": 1.594121037463977, "grad_norm": 1.6579860313320571, "learning_rate": 2.1718492279984358e-07, "loss": 0.5384291410446167, "step": 6915 }, { "epoch": 1.5943515850144092, "grad_norm": 1.6048868780122425, "learning_rate": 2.1694779575000476e-07, "loss": 0.5140515565872192, "step": 6916 }, { "epoch": 1.5945821325648415, "grad_norm": 1.6618051236543094, "learning_rate": 2.1671078246950503e-07, "loss": 0.3933897018432617, "step": 6917 }, { "epoch": 1.5948126801152738, "grad_norm": 1.6798554271750792, "learning_rate": 2.1647388299278046e-07, "loss": 0.45260024070739746, "step": 6918 }, { "epoch": 1.595043227665706, "grad_norm": 1.6622533875894812, "learning_rate": 2.162370973542499e-07, "loss": 0.46932220458984375, "step": 6919 }, { "epoch": 1.5952737752161383, "grad_norm": 1.7467607595951578, "learning_rate": 2.1600042558831545e-07, "loss": 0.46241551637649536, "step": 6920 }, { "epoch": 1.5955043227665706, "grad_norm": 1.8116389882531025, "learning_rate": 2.1576386772936363e-07, "loss": 0.5214533805847168, "step": 6921 }, { "epoch": 1.595734870317003, "grad_norm": 1.8713209887402171, "learning_rate": 2.1552742381176326e-07, "loss": 0.49391108751296997, "step": 6922 }, { "epoch": 1.5959654178674352, "grad_norm": 1.4096867093895666, "learning_rate": 2.1529109386986754e-07, "loss": 0.4527132511138916, "step": 6923 }, { "epoch": 1.5961959654178675, "grad_norm": 1.463280102232623, "learning_rate": 2.1505487793801301e-07, "loss": 0.43627679347991943, "step": 6924 }, { "epoch": 1.5964265129682997, "grad_norm": 1.5759299558725761, "learning_rate": 2.1481877605051913e-07, "loss": 0.5021758079528809, "step": 6925 }, { "epoch": 1.596657060518732, "grad_norm": 1.3836341256873759, "learning_rate": 2.1458278824168874e-07, "loss": 0.4158906936645508, "step": 6926 }, { "epoch": 1.5968876080691643, "grad_norm": 1.5708743524166162, "learning_rate": 2.1434691454580888e-07, "loss": 0.45191070437431335, "step": 6927 }, { "epoch": 1.5971181556195966, "grad_norm": 1.700284121200831, "learning_rate": 2.1411115499714916e-07, "loss": 0.4693247973918915, "step": 6928 }, { "epoch": 1.5973487031700289, "grad_norm": 2.1682335280453526, "learning_rate": 2.1387550962996336e-07, "loss": 0.3784172832965851, "step": 6929 }, { "epoch": 1.5975792507204611, "grad_norm": 1.8489077076485807, "learning_rate": 2.136399784784879e-07, "loss": 0.45491930842399597, "step": 6930 }, { "epoch": 1.5978097982708934, "grad_norm": 1.741517442028974, "learning_rate": 2.1340456157694354e-07, "loss": 0.4505925476551056, "step": 6931 }, { "epoch": 1.5980403458213257, "grad_norm": 1.6067304492037071, "learning_rate": 2.1316925895953364e-07, "loss": 0.45480477809906006, "step": 6932 }, { "epoch": 1.5982708933717578, "grad_norm": 1.7828518645464755, "learning_rate": 2.1293407066044478e-07, "loss": 0.4488682746887207, "step": 6933 }, { "epoch": 1.59850144092219, "grad_norm": 2.009780498310941, "learning_rate": 2.1269899671384785e-07, "loss": 0.4563372731208801, "step": 6934 }, { "epoch": 1.5987319884726223, "grad_norm": 1.8158550451175957, "learning_rate": 2.1246403715389672e-07, "loss": 0.49727576971054077, "step": 6935 }, { "epoch": 1.5989625360230546, "grad_norm": 1.9371356570822131, "learning_rate": 2.1222919201472823e-07, "loss": 0.5009286403656006, "step": 6936 }, { "epoch": 1.5991930835734869, "grad_norm": 1.5623560718411584, "learning_rate": 2.1199446133046338e-07, "loss": 0.3688209056854248, "step": 6937 }, { "epoch": 1.5994236311239192, "grad_norm": 1.6119779247292187, "learning_rate": 2.1175984513520584e-07, "loss": 0.4590919613838196, "step": 6938 }, { "epoch": 1.5996541786743514, "grad_norm": 1.8414452773127332, "learning_rate": 2.1152534346304275e-07, "loss": 0.5258738994598389, "step": 6939 }, { "epoch": 1.5998847262247837, "grad_norm": 1.7277239463285377, "learning_rate": 2.1129095634804505e-07, "loss": 0.4137745797634125, "step": 6940 }, { "epoch": 1.600115273775216, "grad_norm": 1.776208310064266, "learning_rate": 2.1105668382426634e-07, "loss": 0.38268011808395386, "step": 6941 }, { "epoch": 1.6003458213256483, "grad_norm": 1.8866925869596325, "learning_rate": 2.1082252592574423e-07, "loss": 0.4915100932121277, "step": 6942 }, { "epoch": 1.6005763688760806, "grad_norm": 1.643885549144145, "learning_rate": 2.1058848268649986e-07, "loss": 0.44428160786628723, "step": 6943 }, { "epoch": 1.6008069164265128, "grad_norm": 1.947073483480518, "learning_rate": 2.1035455414053682e-07, "loss": 0.46833938360214233, "step": 6944 }, { "epoch": 1.6010374639769451, "grad_norm": 1.6568134566322756, "learning_rate": 2.1012074032184247e-07, "loss": 0.47894763946533203, "step": 6945 }, { "epoch": 1.6012680115273774, "grad_norm": 1.515406639156062, "learning_rate": 2.0988704126438738e-07, "loss": 0.3642117381095886, "step": 6946 }, { "epoch": 1.6014985590778097, "grad_norm": 1.4026104553016838, "learning_rate": 2.0965345700212578e-07, "loss": 0.395542174577713, "step": 6947 }, { "epoch": 1.601729106628242, "grad_norm": 1.8436231094001752, "learning_rate": 2.0941998756899537e-07, "loss": 0.47251084446907043, "step": 6948 }, { "epoch": 1.6019596541786743, "grad_norm": 2.2260940930238693, "learning_rate": 2.0918663299891625e-07, "loss": 0.515550971031189, "step": 6949 }, { "epoch": 1.6021902017291065, "grad_norm": 1.7608353830332475, "learning_rate": 2.0895339332579299e-07, "loss": 0.5145356059074402, "step": 6950 }, { "epoch": 1.6024207492795388, "grad_norm": 1.4657228610920408, "learning_rate": 2.0872026858351255e-07, "loss": 0.4490816593170166, "step": 6951 }, { "epoch": 1.602651296829971, "grad_norm": 1.8585570966425249, "learning_rate": 2.084872588059453e-07, "loss": 0.6396125555038452, "step": 6952 }, { "epoch": 1.6028818443804034, "grad_norm": 1.677774039877492, "learning_rate": 2.0825436402694574e-07, "loss": 0.4160453677177429, "step": 6953 }, { "epoch": 1.6031123919308357, "grad_norm": 1.732479091236752, "learning_rate": 2.0802158428035034e-07, "loss": 0.455702543258667, "step": 6954 }, { "epoch": 1.603342939481268, "grad_norm": 1.6973414598129097, "learning_rate": 2.0778891959998002e-07, "loss": 0.47815048694610596, "step": 6955 }, { "epoch": 1.6035734870317002, "grad_norm": 1.5949839265658996, "learning_rate": 2.0755637001963878e-07, "loss": 0.5771400928497314, "step": 6956 }, { "epoch": 1.6038040345821325, "grad_norm": 1.5561630257878414, "learning_rate": 2.0732393557311323e-07, "loss": 0.4287475347518921, "step": 6957 }, { "epoch": 1.6040345821325648, "grad_norm": 1.6827470461890062, "learning_rate": 2.0709161629417382e-07, "loss": 0.44147899746894836, "step": 6958 }, { "epoch": 1.604265129682997, "grad_norm": 1.5690305134788955, "learning_rate": 2.0685941221657388e-07, "loss": 0.5066714286804199, "step": 6959 }, { "epoch": 1.6044956772334293, "grad_norm": 1.4619488956650715, "learning_rate": 2.0662732337405054e-07, "loss": 0.47425931692123413, "step": 6960 }, { "epoch": 1.6047262247838616, "grad_norm": 2.006019965656153, "learning_rate": 2.063953498003239e-07, "loss": 0.4885402023792267, "step": 6961 }, { "epoch": 1.604956772334294, "grad_norm": 1.6398180184758835, "learning_rate": 2.061634915290974e-07, "loss": 0.4193480908870697, "step": 6962 }, { "epoch": 1.6051873198847262, "grad_norm": 1.9483442057634373, "learning_rate": 2.0593174859405714e-07, "loss": 0.4796232283115387, "step": 6963 }, { "epoch": 1.6054178674351585, "grad_norm": 1.7132013221413995, "learning_rate": 2.0570012102887356e-07, "loss": 0.42399299144744873, "step": 6964 }, { "epoch": 1.6056484149855907, "grad_norm": 1.7163715090870402, "learning_rate": 2.054686088671992e-07, "loss": 0.4394020140171051, "step": 6965 }, { "epoch": 1.605878962536023, "grad_norm": 1.7765040569397281, "learning_rate": 2.0523721214267087e-07, "loss": 0.3917948007583618, "step": 6966 }, { "epoch": 1.6061095100864553, "grad_norm": 1.454882745207512, "learning_rate": 2.050059308889076e-07, "loss": 0.39274662733078003, "step": 6967 }, { "epoch": 1.6063400576368876, "grad_norm": 1.779744919694154, "learning_rate": 2.0477476513951265e-07, "loss": 0.4619693458080292, "step": 6968 }, { "epoch": 1.6065706051873199, "grad_norm": 1.3167764775765771, "learning_rate": 2.0454371492807177e-07, "loss": 0.39066576957702637, "step": 6969 }, { "epoch": 1.6068011527377521, "grad_norm": 1.6112784154759012, "learning_rate": 2.0431278028815392e-07, "loss": 0.5046144723892212, "step": 6970 }, { "epoch": 1.6070317002881844, "grad_norm": 1.6893801716169201, "learning_rate": 2.0408196125331167e-07, "loss": 0.3814374506473541, "step": 6971 }, { "epoch": 1.6072622478386167, "grad_norm": 1.542470838097386, "learning_rate": 2.03851257857081e-07, "loss": 0.49477407336235046, "step": 6972 }, { "epoch": 1.607492795389049, "grad_norm": 1.7326796573740122, "learning_rate": 2.0362067013298e-07, "loss": 0.5003507137298584, "step": 6973 }, { "epoch": 1.6077233429394813, "grad_norm": 1.9600144557835601, "learning_rate": 2.0339019811451152e-07, "loss": 0.3798295259475708, "step": 6974 }, { "epoch": 1.6079538904899136, "grad_norm": 1.5009878438661703, "learning_rate": 2.0315984183516012e-07, "loss": 0.3913137912750244, "step": 6975 }, { "epoch": 1.6081844380403458, "grad_norm": 1.7801044809151643, "learning_rate": 2.029296013283942e-07, "loss": 0.40811246633529663, "step": 6976 }, { "epoch": 1.6084149855907781, "grad_norm": 1.7717326489240075, "learning_rate": 2.0269947662766562e-07, "loss": 0.5058863162994385, "step": 6977 }, { "epoch": 1.6086455331412104, "grad_norm": 1.826456443587998, "learning_rate": 2.024694677664087e-07, "loss": 0.45775991678237915, "step": 6978 }, { "epoch": 1.6088760806916427, "grad_norm": 1.3868260218878463, "learning_rate": 2.0223957477804164e-07, "loss": 0.39464837312698364, "step": 6979 }, { "epoch": 1.609106628242075, "grad_norm": 1.557694777666343, "learning_rate": 2.020097976959656e-07, "loss": 0.44453293085098267, "step": 6980 }, { "epoch": 1.6093371757925072, "grad_norm": 1.5365654039847554, "learning_rate": 2.0178013655356463e-07, "loss": 0.5004311203956604, "step": 6981 }, { "epoch": 1.6095677233429395, "grad_norm": 1.6102093248541456, "learning_rate": 2.0155059138420615e-07, "loss": 0.4887525141239166, "step": 6982 }, { "epoch": 1.6097982708933718, "grad_norm": 1.6742015253238756, "learning_rate": 2.0132116222124028e-07, "loss": 0.4750329256057739, "step": 6983 }, { "epoch": 1.610028818443804, "grad_norm": 1.5520776036181012, "learning_rate": 2.0109184909800115e-07, "loss": 0.5468109846115112, "step": 6984 }, { "epoch": 1.6102593659942364, "grad_norm": 1.8852776003246023, "learning_rate": 2.0086265204780572e-07, "loss": 0.4403616786003113, "step": 6985 }, { "epoch": 1.6104899135446686, "grad_norm": 1.3958899933269038, "learning_rate": 2.006335711039534e-07, "loss": 0.4939368963241577, "step": 6986 }, { "epoch": 1.610720461095101, "grad_norm": 1.3992598613678886, "learning_rate": 2.0040460629972788e-07, "loss": 0.4437975287437439, "step": 6987 }, { "epoch": 1.6109510086455332, "grad_norm": 1.5741668789949548, "learning_rate": 2.0017575766839502e-07, "loss": 0.4779764413833618, "step": 6988 }, { "epoch": 1.6111815561959655, "grad_norm": 1.6090259987078506, "learning_rate": 1.9994702524320383e-07, "loss": 0.49435535073280334, "step": 6989 }, { "epoch": 1.6114121037463978, "grad_norm": 1.682670242630337, "learning_rate": 1.9971840905738735e-07, "loss": 0.41287925839424133, "step": 6990 }, { "epoch": 1.61164265129683, "grad_norm": 1.6608345205857078, "learning_rate": 1.9948990914416065e-07, "loss": 0.43594056367874146, "step": 6991 }, { "epoch": 1.6118731988472623, "grad_norm": 1.4566510618351554, "learning_rate": 1.9926152553672258e-07, "loss": 0.4222400188446045, "step": 6992 }, { "epoch": 1.6121037463976946, "grad_norm": 1.3927222561183994, "learning_rate": 1.9903325826825524e-07, "loss": 0.40775904059410095, "step": 6993 }, { "epoch": 1.6123342939481269, "grad_norm": 1.5765829905078221, "learning_rate": 1.9880510737192312e-07, "loss": 0.4643257260322571, "step": 6994 }, { "epoch": 1.6125648414985592, "grad_norm": 1.8968521533386222, "learning_rate": 1.9857707288087434e-07, "loss": 0.42287328839302063, "step": 6995 }, { "epoch": 1.6127953890489914, "grad_norm": 1.7958779987187703, "learning_rate": 1.9834915482823943e-07, "loss": 0.44881507754325867, "step": 6996 }, { "epoch": 1.6130259365994237, "grad_norm": 1.4789752954442097, "learning_rate": 1.9812135324713307e-07, "loss": 0.5019153952598572, "step": 6997 }, { "epoch": 1.613256484149856, "grad_norm": 1.9782823177827327, "learning_rate": 1.9789366817065244e-07, "loss": 0.46092158555984497, "step": 6998 }, { "epoch": 1.6134870317002883, "grad_norm": 1.6796264243497427, "learning_rate": 1.9766609963187753e-07, "loss": 0.5497767925262451, "step": 6999 }, { "epoch": 1.6137175792507206, "grad_norm": 1.6615467659002465, "learning_rate": 1.9743864766387196e-07, "loss": 0.3865404427051544, "step": 7000 }, { "epoch": 1.6139481268011528, "grad_norm": 1.7045504670159946, "learning_rate": 1.9721131229968213e-07, "loss": 0.4722781181335449, "step": 7001 }, { "epoch": 1.6141786743515851, "grad_norm": 1.496853211993526, "learning_rate": 1.9698409357233702e-07, "loss": 0.46194642782211304, "step": 7002 }, { "epoch": 1.6144092219020174, "grad_norm": 1.5419924356187527, "learning_rate": 1.967569915148498e-07, "loss": 0.42680829763412476, "step": 7003 }, { "epoch": 1.6146397694524497, "grad_norm": 1.5370455081070127, "learning_rate": 1.9653000616021554e-07, "loss": 0.4733467102050781, "step": 7004 }, { "epoch": 1.614870317002882, "grad_norm": 1.5140177132473809, "learning_rate": 1.9630313754141293e-07, "loss": 0.42426949739456177, "step": 7005 }, { "epoch": 1.6151008645533143, "grad_norm": 1.5682665529467597, "learning_rate": 1.9607638569140405e-07, "loss": 0.4114811420440674, "step": 7006 }, { "epoch": 1.6153314121037465, "grad_norm": 1.5390762052298386, "learning_rate": 1.9584975064313337e-07, "loss": 0.44919753074645996, "step": 7007 }, { "epoch": 1.6155619596541788, "grad_norm": 1.4647408998092328, "learning_rate": 1.9562323242952816e-07, "loss": 0.44578665494918823, "step": 7008 }, { "epoch": 1.615792507204611, "grad_norm": 1.6845882444382327, "learning_rate": 1.953968310834998e-07, "loss": 0.4226934611797333, "step": 7009 }, { "epoch": 1.6160230547550434, "grad_norm": 2.2486201594203656, "learning_rate": 1.9517054663794153e-07, "loss": 0.5555834770202637, "step": 7010 }, { "epoch": 1.6162536023054757, "grad_norm": 1.4767118807632889, "learning_rate": 1.9494437912573058e-07, "loss": 0.428075909614563, "step": 7011 }, { "epoch": 1.616484149855908, "grad_norm": 1.5189056600876345, "learning_rate": 1.9471832857972625e-07, "loss": 0.47747939825057983, "step": 7012 }, { "epoch": 1.6167146974063402, "grad_norm": 1.5100831598484277, "learning_rate": 1.9449239503277194e-07, "loss": 0.4276934862136841, "step": 7013 }, { "epoch": 1.6169452449567725, "grad_norm": 1.8983824161544947, "learning_rate": 1.9426657851769302e-07, "loss": 0.44419193267822266, "step": 7014 }, { "epoch": 1.6171757925072046, "grad_norm": 1.6660017186142178, "learning_rate": 1.9404087906729806e-07, "loss": 0.4462706446647644, "step": 7015 }, { "epoch": 1.6174063400576368, "grad_norm": 1.6051052042823168, "learning_rate": 1.9381529671437923e-07, "loss": 0.43562668561935425, "step": 7016 }, { "epoch": 1.6176368876080691, "grad_norm": 1.9934644528425425, "learning_rate": 1.935898314917115e-07, "loss": 0.4618384838104248, "step": 7017 }, { "epoch": 1.6178674351585014, "grad_norm": 1.5305830970877417, "learning_rate": 1.933644834320519e-07, "loss": 0.42981481552124023, "step": 7018 }, { "epoch": 1.6180979827089337, "grad_norm": 1.916475433542249, "learning_rate": 1.93139252568142e-07, "loss": 0.39951127767562866, "step": 7019 }, { "epoch": 1.618328530259366, "grad_norm": 1.4880364379634898, "learning_rate": 1.9291413893270514e-07, "loss": 0.4628783166408539, "step": 7020 }, { "epoch": 1.6185590778097982, "grad_norm": 1.9586483355597437, "learning_rate": 1.926891425584476e-07, "loss": 0.4748151898384094, "step": 7021 }, { "epoch": 1.6187896253602305, "grad_norm": 1.4040170327047168, "learning_rate": 1.9246426347805967e-07, "loss": 0.4238407015800476, "step": 7022 }, { "epoch": 1.6190201729106628, "grad_norm": 1.4544645637680234, "learning_rate": 1.9223950172421332e-07, "loss": 0.4519417881965637, "step": 7023 }, { "epoch": 1.619250720461095, "grad_norm": 1.5479222790274918, "learning_rate": 1.9201485732956445e-07, "loss": 0.4104294776916504, "step": 7024 }, { "epoch": 1.6194812680115274, "grad_norm": 1.6509334362523898, "learning_rate": 1.9179033032675173e-07, "loss": 0.3946937322616577, "step": 7025 }, { "epoch": 1.6197118155619596, "grad_norm": 1.6453980835597737, "learning_rate": 1.915659207483964e-07, "loss": 0.41711992025375366, "step": 7026 }, { "epoch": 1.619942363112392, "grad_norm": 1.5895165297825928, "learning_rate": 1.913416286271028e-07, "loss": 0.4761412739753723, "step": 7027 }, { "epoch": 1.6201729106628242, "grad_norm": 1.4830868217383606, "learning_rate": 1.9111745399545798e-07, "loss": 0.5078233480453491, "step": 7028 }, { "epoch": 1.6204034582132565, "grad_norm": 1.3877209141758606, "learning_rate": 1.9089339688603246e-07, "loss": 0.3472098708152771, "step": 7029 }, { "epoch": 1.6206340057636888, "grad_norm": 1.5886201993830495, "learning_rate": 1.9066945733137974e-07, "loss": 0.3884485363960266, "step": 7030 }, { "epoch": 1.620864553314121, "grad_norm": 1.3892892625927125, "learning_rate": 1.9044563536403524e-07, "loss": 0.4992063641548157, "step": 7031 }, { "epoch": 1.6210951008645533, "grad_norm": 1.4924190063226175, "learning_rate": 1.902219310165185e-07, "loss": 0.3512866795063019, "step": 7032 }, { "epoch": 1.6213256484149856, "grad_norm": 1.5305883772970803, "learning_rate": 1.8999834432133133e-07, "loss": 0.44194120168685913, "step": 7033 }, { "epoch": 1.621556195965418, "grad_norm": 1.6224356039874788, "learning_rate": 1.8977487531095814e-07, "loss": 0.4745762348175049, "step": 7034 }, { "epoch": 1.6217867435158502, "grad_norm": 1.9250806360959296, "learning_rate": 1.8955152401786723e-07, "loss": 0.4458765387535095, "step": 7035 }, { "epoch": 1.6220172910662825, "grad_norm": 1.701108971645492, "learning_rate": 1.893282904745087e-07, "loss": 0.5192512273788452, "step": 7036 }, { "epoch": 1.6222478386167147, "grad_norm": 1.6193165039007456, "learning_rate": 1.8910517471331632e-07, "loss": 0.47647416591644287, "step": 7037 }, { "epoch": 1.622478386167147, "grad_norm": 1.6132356068665432, "learning_rate": 1.888821767667067e-07, "loss": 0.49768751859664917, "step": 7038 }, { "epoch": 1.6227089337175793, "grad_norm": 1.9401291765695219, "learning_rate": 1.8865929666707893e-07, "loss": 0.5038268566131592, "step": 7039 }, { "epoch": 1.6229394812680116, "grad_norm": 1.6903831936251612, "learning_rate": 1.8843653444681519e-07, "loss": 0.47656458616256714, "step": 7040 }, { "epoch": 1.6231700288184439, "grad_norm": 1.4991134295006157, "learning_rate": 1.8821389013828016e-07, "loss": 0.5014642477035522, "step": 7041 }, { "epoch": 1.6234005763688761, "grad_norm": 2.093487981103135, "learning_rate": 1.879913637738221e-07, "loss": 0.4911212921142578, "step": 7042 }, { "epoch": 1.6236311239193082, "grad_norm": 2.113248716786874, "learning_rate": 1.8776895538577185e-07, "loss": 0.4738670289516449, "step": 7043 }, { "epoch": 1.6238616714697405, "grad_norm": 1.6705700650062818, "learning_rate": 1.8754666500644278e-07, "loss": 0.42330676317214966, "step": 7044 }, { "epoch": 1.6240922190201728, "grad_norm": 1.7524142065793642, "learning_rate": 1.8732449266813178e-07, "loss": 0.4284883141517639, "step": 7045 }, { "epoch": 1.624322766570605, "grad_norm": 1.6676122484771183, "learning_rate": 1.8710243840311778e-07, "loss": 0.4407314658164978, "step": 7046 }, { "epoch": 1.6245533141210373, "grad_norm": 1.6726112467885852, "learning_rate": 1.868805022436629e-07, "loss": 0.445793092250824, "step": 7047 }, { "epoch": 1.6247838616714696, "grad_norm": 2.0179165105135484, "learning_rate": 1.866586842220126e-07, "loss": 0.49527254700660706, "step": 7048 }, { "epoch": 1.6250144092219019, "grad_norm": 1.8829724432355108, "learning_rate": 1.8643698437039423e-07, "loss": 0.5024650692939758, "step": 7049 }, { "epoch": 1.6252449567723342, "grad_norm": 1.4839586443824915, "learning_rate": 1.8621540272101864e-07, "loss": 0.4003955125808716, "step": 7050 }, { "epoch": 1.6254755043227664, "grad_norm": 1.4243235634156672, "learning_rate": 1.8599393930607965e-07, "loss": 0.4203549027442932, "step": 7051 }, { "epoch": 1.6257060518731987, "grad_norm": 1.8616676934177332, "learning_rate": 1.8577259415775336e-07, "loss": 0.498489648103714, "step": 7052 }, { "epoch": 1.625936599423631, "grad_norm": 1.849928674121505, "learning_rate": 1.8555136730819865e-07, "loss": 0.42907896637916565, "step": 7053 }, { "epoch": 1.6261671469740633, "grad_norm": 1.5752798727413557, "learning_rate": 1.85330258789558e-07, "loss": 0.4849644601345062, "step": 7054 }, { "epoch": 1.6263976945244956, "grad_norm": 1.4894906930427128, "learning_rate": 1.851092686339556e-07, "loss": 0.37567687034606934, "step": 7055 }, { "epoch": 1.6266282420749278, "grad_norm": 1.6419279864399527, "learning_rate": 1.8488839687349967e-07, "loss": 0.40736931562423706, "step": 7056 }, { "epoch": 1.6268587896253601, "grad_norm": 1.5390004415931011, "learning_rate": 1.8466764354027986e-07, "loss": 0.45521795749664307, "step": 7057 }, { "epoch": 1.6270893371757924, "grad_norm": 2.0439510581772042, "learning_rate": 1.844470086663701e-07, "loss": 0.45577508211135864, "step": 7058 }, { "epoch": 1.6273198847262247, "grad_norm": 1.8442747006182973, "learning_rate": 1.842264922838258e-07, "loss": 0.46271002292633057, "step": 7059 }, { "epoch": 1.627550432276657, "grad_norm": 1.8168769739622397, "learning_rate": 1.8400609442468573e-07, "loss": 0.41168513894081116, "step": 7060 }, { "epoch": 1.6277809798270892, "grad_norm": 1.6704034042742053, "learning_rate": 1.8378581512097146e-07, "loss": 0.48050713539123535, "step": 7061 }, { "epoch": 1.6280115273775215, "grad_norm": 1.7102134601368413, "learning_rate": 1.8356565440468763e-07, "loss": 0.4158909022808075, "step": 7062 }, { "epoch": 1.6282420749279538, "grad_norm": 1.5610634608134886, "learning_rate": 1.8334561230782075e-07, "loss": 0.42451566457748413, "step": 7063 }, { "epoch": 1.628472622478386, "grad_norm": 1.4952643713001559, "learning_rate": 1.8312568886234114e-07, "loss": 0.5046045184135437, "step": 7064 }, { "epoch": 1.6287031700288184, "grad_norm": 1.6727214912336503, "learning_rate": 1.8290588410020113e-07, "loss": 0.5656751990318298, "step": 7065 }, { "epoch": 1.6289337175792507, "grad_norm": 1.756636707422769, "learning_rate": 1.8268619805333597e-07, "loss": 0.5253554582595825, "step": 7066 }, { "epoch": 1.629164265129683, "grad_norm": 1.5333023336983513, "learning_rate": 1.8246663075366408e-07, "loss": 0.46105247735977173, "step": 7067 }, { "epoch": 1.6293948126801152, "grad_norm": 1.5741857770042025, "learning_rate": 1.8224718223308576e-07, "loss": 0.5526989698410034, "step": 7068 }, { "epoch": 1.6296253602305475, "grad_norm": 1.4157346505000856, "learning_rate": 1.8202785252348506e-07, "loss": 0.43069472908973694, "step": 7069 }, { "epoch": 1.6298559077809798, "grad_norm": 1.6082377506047894, "learning_rate": 1.818086416567285e-07, "loss": 0.5010451078414917, "step": 7070 }, { "epoch": 1.630086455331412, "grad_norm": 1.4560397719214118, "learning_rate": 1.8158954966466467e-07, "loss": 0.48797810077667236, "step": 7071 }, { "epoch": 1.6303170028818443, "grad_norm": 1.5172724872127616, "learning_rate": 1.8137057657912568e-07, "loss": 0.3758738934993744, "step": 7072 }, { "epoch": 1.6305475504322766, "grad_norm": 1.490229867825559, "learning_rate": 1.8115172243192556e-07, "loss": 0.4795163869857788, "step": 7073 }, { "epoch": 1.630778097982709, "grad_norm": 1.6959094837276352, "learning_rate": 1.8093298725486184e-07, "loss": 0.3993092179298401, "step": 7074 }, { "epoch": 1.6310086455331412, "grad_norm": 1.802185920731944, "learning_rate": 1.8071437107971476e-07, "loss": 0.5435800552368164, "step": 7075 }, { "epoch": 1.6312391930835735, "grad_norm": 1.7039163337391874, "learning_rate": 1.804958739382464e-07, "loss": 0.4518716335296631, "step": 7076 }, { "epoch": 1.6314697406340057, "grad_norm": 1.6900376783333737, "learning_rate": 1.8027749586220277e-07, "loss": 0.5534895062446594, "step": 7077 }, { "epoch": 1.631700288184438, "grad_norm": 1.8998675234284237, "learning_rate": 1.800592368833115e-07, "loss": 0.5099557638168335, "step": 7078 }, { "epoch": 1.6319308357348703, "grad_norm": 1.35099602311764, "learning_rate": 1.7984109703328322e-07, "loss": 0.43192172050476074, "step": 7079 }, { "epoch": 1.6321613832853026, "grad_norm": 1.6063094501829667, "learning_rate": 1.796230763438119e-07, "loss": 0.46690988540649414, "step": 7080 }, { "epoch": 1.6323919308357349, "grad_norm": 1.7268310015035628, "learning_rate": 1.7940517484657301e-07, "loss": 0.487186074256897, "step": 7081 }, { "epoch": 1.6326224783861671, "grad_norm": 1.6482432744189444, "learning_rate": 1.7918739257322613e-07, "loss": 0.4217287302017212, "step": 7082 }, { "epoch": 1.6328530259365994, "grad_norm": 1.9234836261724548, "learning_rate": 1.7896972955541223e-07, "loss": 0.4879988431930542, "step": 7083 }, { "epoch": 1.6330835734870317, "grad_norm": 1.4694641918643139, "learning_rate": 1.787521858247555e-07, "loss": 0.449859619140625, "step": 7084 }, { "epoch": 1.633314121037464, "grad_norm": 1.5243370275164099, "learning_rate": 1.7853476141286306e-07, "loss": 0.38333576917648315, "step": 7085 }, { "epoch": 1.6335446685878963, "grad_norm": 1.3646889918108736, "learning_rate": 1.78317456351324e-07, "loss": 0.386202871799469, "step": 7086 }, { "epoch": 1.6337752161383285, "grad_norm": 1.9790389429198783, "learning_rate": 1.7810027067171075e-07, "loss": 0.5273287296295166, "step": 7087 }, { "epoch": 1.6340057636887608, "grad_norm": 1.469553115830399, "learning_rate": 1.7788320440557836e-07, "loss": 0.4877879023551941, "step": 7088 }, { "epoch": 1.634236311239193, "grad_norm": 1.3693249465977024, "learning_rate": 1.7766625758446407e-07, "loss": 0.5114452242851257, "step": 7089 }, { "epoch": 1.6344668587896254, "grad_norm": 1.4466728323394555, "learning_rate": 1.774494302398878e-07, "loss": 0.534758985042572, "step": 7090 }, { "epoch": 1.6346974063400577, "grad_norm": 1.6884547026848957, "learning_rate": 1.7723272240335262e-07, "loss": 0.4727644622325897, "step": 7091 }, { "epoch": 1.63492795389049, "grad_norm": 1.3041580702100446, "learning_rate": 1.7701613410634365e-07, "loss": 0.44658514857292175, "step": 7092 }, { "epoch": 1.6351585014409222, "grad_norm": 1.8133669891438844, "learning_rate": 1.767996653803292e-07, "loss": 0.4832932949066162, "step": 7093 }, { "epoch": 1.6353890489913545, "grad_norm": 1.9873008129610308, "learning_rate": 1.7658331625675958e-07, "loss": 0.4222508668899536, "step": 7094 }, { "epoch": 1.6356195965417868, "grad_norm": 1.6637875675152678, "learning_rate": 1.7636708676706856e-07, "loss": 0.5232953429222107, "step": 7095 }, { "epoch": 1.635850144092219, "grad_norm": 2.1560102051327936, "learning_rate": 1.7615097694267177e-07, "loss": 0.4896438419818878, "step": 7096 }, { "epoch": 1.6360806916426514, "grad_norm": 1.521912219118672, "learning_rate": 1.7593498681496743e-07, "loss": 0.4000094532966614, "step": 7097 }, { "epoch": 1.6363112391930836, "grad_norm": 1.6788307180179725, "learning_rate": 1.7571911641533698e-07, "loss": 0.5291081070899963, "step": 7098 }, { "epoch": 1.636541786743516, "grad_norm": 1.7872228150885656, "learning_rate": 1.7550336577514424e-07, "loss": 0.4801519513130188, "step": 7099 }, { "epoch": 1.6367723342939482, "grad_norm": 1.7969227029518855, "learning_rate": 1.7528773492573524e-07, "loss": 0.4453350901603699, "step": 7100 }, { "epoch": 1.6370028818443805, "grad_norm": 1.8067107495609092, "learning_rate": 1.7507222389843923e-07, "loss": 0.5279836058616638, "step": 7101 }, { "epoch": 1.6372334293948128, "grad_norm": 2.2043741342581575, "learning_rate": 1.7485683272456754e-07, "loss": 0.5456463098526001, "step": 7102 }, { "epoch": 1.637463976945245, "grad_norm": 1.5492809486335855, "learning_rate": 1.7464156143541398e-07, "loss": 0.445858895778656, "step": 7103 }, { "epoch": 1.6376945244956773, "grad_norm": 1.768665301644395, "learning_rate": 1.744264100622558e-07, "loss": 0.48954901099205017, "step": 7104 }, { "epoch": 1.6379250720461096, "grad_norm": 1.9762098585433456, "learning_rate": 1.742113786363517e-07, "loss": 0.4365660548210144, "step": 7105 }, { "epoch": 1.6381556195965419, "grad_norm": 1.5120765060907773, "learning_rate": 1.739964671889438e-07, "loss": 0.40327224135398865, "step": 7106 }, { "epoch": 1.6383861671469742, "grad_norm": 1.5579134604152398, "learning_rate": 1.7378167575125668e-07, "loss": 0.4767388701438904, "step": 7107 }, { "epoch": 1.6386167146974064, "grad_norm": 1.8299910010033837, "learning_rate": 1.735670043544971e-07, "loss": 0.5005271434783936, "step": 7108 }, { "epoch": 1.6388472622478387, "grad_norm": 1.888346334221167, "learning_rate": 1.7335245302985458e-07, "loss": 0.5074931979179382, "step": 7109 }, { "epoch": 1.639077809798271, "grad_norm": 2.3323407528830695, "learning_rate": 1.7313802180850102e-07, "loss": 0.3878687024116516, "step": 7110 }, { "epoch": 1.6393083573487033, "grad_norm": 1.7073526439249955, "learning_rate": 1.7292371072159118e-07, "loss": 0.5035123229026794, "step": 7111 }, { "epoch": 1.6395389048991356, "grad_norm": 1.625831303720922, "learning_rate": 1.727095198002625e-07, "loss": 0.47408533096313477, "step": 7112 }, { "epoch": 1.6397694524495678, "grad_norm": 1.5861177391571943, "learning_rate": 1.724954490756342e-07, "loss": 0.45419204235076904, "step": 7113 }, { "epoch": 1.6400000000000001, "grad_norm": 1.5212582464247422, "learning_rate": 1.7228149857880902e-07, "loss": 0.44029003381729126, "step": 7114 }, { "epoch": 1.6402305475504324, "grad_norm": 1.5718073030301392, "learning_rate": 1.7206766834087148e-07, "loss": 0.4374336004257202, "step": 7115 }, { "epoch": 1.6404610951008647, "grad_norm": 1.977156786293177, "learning_rate": 1.7185395839288875e-07, "loss": 0.5088529586791992, "step": 7116 }, { "epoch": 1.640691642651297, "grad_norm": 1.9035660253830302, "learning_rate": 1.7164036876591105e-07, "loss": 0.5353911519050598, "step": 7117 }, { "epoch": 1.6409221902017292, "grad_norm": 1.5646144443377459, "learning_rate": 1.7142689949097033e-07, "loss": 0.49949127435684204, "step": 7118 }, { "epoch": 1.6411527377521615, "grad_norm": 1.416160387365733, "learning_rate": 1.712135505990816e-07, "loss": 0.4392736256122589, "step": 7119 }, { "epoch": 1.6413832853025938, "grad_norm": 1.618778441026721, "learning_rate": 1.7100032212124248e-07, "loss": 0.4985026717185974, "step": 7120 }, { "epoch": 1.641613832853026, "grad_norm": 1.5434378799917168, "learning_rate": 1.7078721408843266e-07, "loss": 0.39500099420547485, "step": 7121 }, { "epoch": 1.6418443804034584, "grad_norm": 1.4981414645710247, "learning_rate": 1.7057422653161424e-07, "loss": 0.3887489438056946, "step": 7122 }, { "epoch": 1.6420749279538907, "grad_norm": 1.5956592528760005, "learning_rate": 1.7036135948173268e-07, "loss": 0.5208394527435303, "step": 7123 }, { "epoch": 1.642305475504323, "grad_norm": 1.6406147446679789, "learning_rate": 1.7014861296971473e-07, "loss": 0.4292425513267517, "step": 7124 }, { "epoch": 1.642536023054755, "grad_norm": 1.8466690932217622, "learning_rate": 1.6993598702647084e-07, "loss": 0.5247593522071838, "step": 7125 }, { "epoch": 1.6427665706051873, "grad_norm": 1.6236257528874523, "learning_rate": 1.6972348168289275e-07, "loss": 0.40911680459976196, "step": 7126 }, { "epoch": 1.6429971181556196, "grad_norm": 1.7593622225771122, "learning_rate": 1.6951109696985576e-07, "loss": 0.5300519466400146, "step": 7127 }, { "epoch": 1.6432276657060518, "grad_norm": 1.8530552916322522, "learning_rate": 1.692988329182171e-07, "loss": 0.4551096558570862, "step": 7128 }, { "epoch": 1.6434582132564841, "grad_norm": 1.5407775659664558, "learning_rate": 1.690866895588161e-07, "loss": 0.4497135877609253, "step": 7129 }, { "epoch": 1.6436887608069164, "grad_norm": 1.4118252540002971, "learning_rate": 1.6887466692247554e-07, "loss": 0.4177432656288147, "step": 7130 }, { "epoch": 1.6439193083573487, "grad_norm": 1.583844642588823, "learning_rate": 1.6866276503999965e-07, "loss": 0.4593951404094696, "step": 7131 }, { "epoch": 1.644149855907781, "grad_norm": 1.6310540123379373, "learning_rate": 1.684509839421757e-07, "loss": 0.38442713022232056, "step": 7132 }, { "epoch": 1.6443804034582132, "grad_norm": 1.6385922234515504, "learning_rate": 1.6823932365977356e-07, "loss": 0.4701668620109558, "step": 7133 }, { "epoch": 1.6446109510086455, "grad_norm": 1.7942474001482556, "learning_rate": 1.6802778422354514e-07, "loss": 0.47364962100982666, "step": 7134 }, { "epoch": 1.6448414985590778, "grad_norm": 1.6308226749238004, "learning_rate": 1.6781636566422463e-07, "loss": 0.5317097902297974, "step": 7135 }, { "epoch": 1.64507204610951, "grad_norm": 1.3748228459577467, "learning_rate": 1.6760506801252926e-07, "loss": 0.47745388746261597, "step": 7136 }, { "epoch": 1.6453025936599424, "grad_norm": 1.6435587909932845, "learning_rate": 1.6739389129915817e-07, "loss": 0.4827711880207062, "step": 7137 }, { "epoch": 1.6455331412103746, "grad_norm": 1.7001993101774864, "learning_rate": 1.671828355547934e-07, "loss": 0.5359855890274048, "step": 7138 }, { "epoch": 1.645763688760807, "grad_norm": 1.7037683748039727, "learning_rate": 1.6697190081009882e-07, "loss": 0.48604434728622437, "step": 7139 }, { "epoch": 1.6459942363112392, "grad_norm": 1.648303118966041, "learning_rate": 1.6676108709572146e-07, "loss": 0.602343738079071, "step": 7140 }, { "epoch": 1.6462247838616715, "grad_norm": 1.5710736352040753, "learning_rate": 1.6655039444229013e-07, "loss": 0.36290526390075684, "step": 7141 }, { "epoch": 1.6464553314121038, "grad_norm": 1.5863608375532527, "learning_rate": 1.6633982288041603e-07, "loss": 0.4096101224422455, "step": 7142 }, { "epoch": 1.646685878962536, "grad_norm": 1.5485446810610417, "learning_rate": 1.6612937244069326e-07, "loss": 0.4343856871128082, "step": 7143 }, { "epoch": 1.6469164265129683, "grad_norm": 1.5635554519919759, "learning_rate": 1.6591904315369833e-07, "loss": 0.44130879640579224, "step": 7144 }, { "epoch": 1.6471469740634006, "grad_norm": 1.429256520832658, "learning_rate": 1.6570883504998945e-07, "loss": 0.45949405431747437, "step": 7145 }, { "epoch": 1.6473775216138329, "grad_norm": 1.6107689761815698, "learning_rate": 1.654987481601081e-07, "loss": 0.39314505457878113, "step": 7146 }, { "epoch": 1.6476080691642652, "grad_norm": 1.6243284006873182, "learning_rate": 1.6528878251457757e-07, "loss": 0.45313894748687744, "step": 7147 }, { "epoch": 1.6478386167146974, "grad_norm": 1.4856311266688829, "learning_rate": 1.6507893814390328e-07, "loss": 0.4263615310192108, "step": 7148 }, { "epoch": 1.6480691642651297, "grad_norm": 1.688675267530104, "learning_rate": 1.6486921507857398e-07, "loss": 0.5187538266181946, "step": 7149 }, { "epoch": 1.648299711815562, "grad_norm": 1.3737191741307955, "learning_rate": 1.6465961334905986e-07, "loss": 0.44234776496887207, "step": 7150 }, { "epoch": 1.6485302593659943, "grad_norm": 1.5480906235848935, "learning_rate": 1.64450132985814e-07, "loss": 0.42623990774154663, "step": 7151 }, { "epoch": 1.6487608069164263, "grad_norm": 1.4835957957904586, "learning_rate": 1.6424077401927206e-07, "loss": 0.4729412794113159, "step": 7152 }, { "epoch": 1.6489913544668586, "grad_norm": 1.4548490747782812, "learning_rate": 1.6403153647985134e-07, "loss": 0.4603039026260376, "step": 7153 }, { "epoch": 1.649221902017291, "grad_norm": 1.456280541182368, "learning_rate": 1.6382242039795213e-07, "loss": 0.5220425724983215, "step": 7154 }, { "epoch": 1.6494524495677232, "grad_norm": 1.784246781666792, "learning_rate": 1.6361342580395632e-07, "loss": 0.4285042881965637, "step": 7155 }, { "epoch": 1.6496829971181555, "grad_norm": 1.9528209037439446, "learning_rate": 1.6340455272822894e-07, "loss": 0.47878625988960266, "step": 7156 }, { "epoch": 1.6499135446685878, "grad_norm": 1.341682698287289, "learning_rate": 1.631958012011173e-07, "loss": 0.44329750537872314, "step": 7157 }, { "epoch": 1.65014409221902, "grad_norm": 1.7415780885053698, "learning_rate": 1.6298717125295057e-07, "loss": 0.4647062420845032, "step": 7158 }, { "epoch": 1.6503746397694523, "grad_norm": 1.5567564662904019, "learning_rate": 1.6277866291404074e-07, "loss": 0.4851604104042053, "step": 7159 }, { "epoch": 1.6506051873198846, "grad_norm": 1.582233265535471, "learning_rate": 1.6257027621468177e-07, "loss": 0.532638669013977, "step": 7160 }, { "epoch": 1.6508357348703169, "grad_norm": 1.5173909016412013, "learning_rate": 1.623620111851498e-07, "loss": 0.4583241641521454, "step": 7161 }, { "epoch": 1.6510662824207492, "grad_norm": 1.514446467644476, "learning_rate": 1.6215386785570405e-07, "loss": 0.3982502222061157, "step": 7162 }, { "epoch": 1.6512968299711814, "grad_norm": 1.7091680314569688, "learning_rate": 1.6194584625658514e-07, "loss": 0.465998113155365, "step": 7163 }, { "epoch": 1.6515273775216137, "grad_norm": 1.642523863514421, "learning_rate": 1.6173794641801675e-07, "loss": 0.49632728099823, "step": 7164 }, { "epoch": 1.651757925072046, "grad_norm": 1.6357073961102824, "learning_rate": 1.615301683702046e-07, "loss": 0.47182852029800415, "step": 7165 }, { "epoch": 1.6519884726224783, "grad_norm": 1.5740312008724333, "learning_rate": 1.6132251214333658e-07, "loss": 0.42910608649253845, "step": 7166 }, { "epoch": 1.6522190201729106, "grad_norm": 1.4210692456044949, "learning_rate": 1.6111497776758276e-07, "loss": 0.42247748374938965, "step": 7167 }, { "epoch": 1.6524495677233428, "grad_norm": 2.016987391877327, "learning_rate": 1.609075652730961e-07, "loss": 0.4508252739906311, "step": 7168 }, { "epoch": 1.6526801152737751, "grad_norm": 1.4449046621735189, "learning_rate": 1.6070027469001114e-07, "loss": 0.4595048129558563, "step": 7169 }, { "epoch": 1.6529106628242074, "grad_norm": 1.5448968518745512, "learning_rate": 1.6049310604844536e-07, "loss": 0.3852691054344177, "step": 7170 }, { "epoch": 1.6531412103746397, "grad_norm": 1.6692560122028328, "learning_rate": 1.6028605937849793e-07, "loss": 0.4829123020172119, "step": 7171 }, { "epoch": 1.653371757925072, "grad_norm": 1.709360623818806, "learning_rate": 1.600791347102508e-07, "loss": 0.5782856941223145, "step": 7172 }, { "epoch": 1.6536023054755042, "grad_norm": 1.696439015666858, "learning_rate": 1.5987233207376794e-07, "loss": 0.44069433212280273, "step": 7173 }, { "epoch": 1.6538328530259365, "grad_norm": 1.7407402903841707, "learning_rate": 1.596656514990954e-07, "loss": 0.41529107093811035, "step": 7174 }, { "epoch": 1.6540634005763688, "grad_norm": 1.619986578008229, "learning_rate": 1.5945909301626205e-07, "loss": 0.4839997887611389, "step": 7175 }, { "epoch": 1.654293948126801, "grad_norm": 1.7287640110596156, "learning_rate": 1.5925265665527821e-07, "loss": 0.4021362066268921, "step": 7176 }, { "epoch": 1.6545244956772334, "grad_norm": 1.4304015495283673, "learning_rate": 1.5904634244613723e-07, "loss": 0.5291178226470947, "step": 7177 }, { "epoch": 1.6547550432276656, "grad_norm": 1.9082896937154643, "learning_rate": 1.5884015041881483e-07, "loss": 0.4622513949871063, "step": 7178 }, { "epoch": 1.654985590778098, "grad_norm": 1.5727457944548462, "learning_rate": 1.58634080603268e-07, "loss": 0.5098867416381836, "step": 7179 }, { "epoch": 1.6552161383285302, "grad_norm": 1.8633855733896474, "learning_rate": 1.5842813302943646e-07, "loss": 0.6110771894454956, "step": 7180 }, { "epoch": 1.6554466858789625, "grad_norm": 1.8109337460557362, "learning_rate": 1.5822230772724288e-07, "loss": 0.5498735308647156, "step": 7181 }, { "epoch": 1.6556772334293948, "grad_norm": 1.6288862141191938, "learning_rate": 1.5801660472659074e-07, "loss": 0.5020328760147095, "step": 7182 }, { "epoch": 1.655907780979827, "grad_norm": 1.5319375398935402, "learning_rate": 1.5781102405736723e-07, "loss": 0.4844881594181061, "step": 7183 }, { "epoch": 1.6561383285302593, "grad_norm": 1.6333215277041324, "learning_rate": 1.5760556574944062e-07, "loss": 0.47296953201293945, "step": 7184 }, { "epoch": 1.6563688760806916, "grad_norm": 1.5894632852363868, "learning_rate": 1.5740022983266232e-07, "loss": 0.5081913471221924, "step": 7185 }, { "epoch": 1.656599423631124, "grad_norm": 1.8719272137228433, "learning_rate": 1.5719501633686517e-07, "loss": 0.3864198923110962, "step": 7186 }, { "epoch": 1.6568299711815562, "grad_norm": 1.621789029663121, "learning_rate": 1.569899252918644e-07, "loss": 0.5106151700019836, "step": 7187 }, { "epoch": 1.6570605187319885, "grad_norm": 1.623996761371585, "learning_rate": 1.5678495672745785e-07, "loss": 0.4601839780807495, "step": 7188 }, { "epoch": 1.6572910662824207, "grad_norm": 1.7015255797101292, "learning_rate": 1.5658011067342546e-07, "loss": 0.41081488132476807, "step": 7189 }, { "epoch": 1.657521613832853, "grad_norm": 1.655929621273635, "learning_rate": 1.563753871595289e-07, "loss": 0.5042529702186584, "step": 7190 }, { "epoch": 1.6577521613832853, "grad_norm": 1.504758223783334, "learning_rate": 1.5617078621551305e-07, "loss": 0.4239219129085541, "step": 7191 }, { "epoch": 1.6579827089337176, "grad_norm": 1.734035504638041, "learning_rate": 1.559663078711032e-07, "loss": 0.40397927165031433, "step": 7192 }, { "epoch": 1.6582132564841499, "grad_norm": 1.3443962161928609, "learning_rate": 1.5576195215600862e-07, "loss": 0.44509077072143555, "step": 7193 }, { "epoch": 1.6584438040345821, "grad_norm": 1.6126140371628406, "learning_rate": 1.555577190999201e-07, "loss": 0.3795713782310486, "step": 7194 }, { "epoch": 1.6586743515850144, "grad_norm": 1.3064119517656203, "learning_rate": 1.5535360873251024e-07, "loss": 0.43911436200141907, "step": 7195 }, { "epoch": 1.6589048991354467, "grad_norm": 1.4863642261752135, "learning_rate": 1.5514962108343432e-07, "loss": 0.485666960477829, "step": 7196 }, { "epoch": 1.659135446685879, "grad_norm": 1.783744090847337, "learning_rate": 1.549457561823302e-07, "loss": 0.3457345962524414, "step": 7197 }, { "epoch": 1.6593659942363113, "grad_norm": 1.6175427546858157, "learning_rate": 1.5474201405881616e-07, "loss": 0.5008036494255066, "step": 7198 }, { "epoch": 1.6595965417867435, "grad_norm": 1.6987150806432352, "learning_rate": 1.5453839474249474e-07, "loss": 0.53286212682724, "step": 7199 }, { "epoch": 1.6598270893371758, "grad_norm": 1.7805581367356456, "learning_rate": 1.5433489826294921e-07, "loss": 0.5207295417785645, "step": 7200 }, { "epoch": 1.660057636887608, "grad_norm": 1.55676080740635, "learning_rate": 1.5413152464974565e-07, "loss": 0.4445546865463257, "step": 7201 }, { "epoch": 1.6602881844380404, "grad_norm": 1.5721680438234895, "learning_rate": 1.5392827393243246e-07, "loss": 0.4071146547794342, "step": 7202 }, { "epoch": 1.6605187319884727, "grad_norm": 1.899976807653372, "learning_rate": 1.5372514614053956e-07, "loss": 0.43851250410079956, "step": 7203 }, { "epoch": 1.660749279538905, "grad_norm": 1.6317372171777702, "learning_rate": 1.53522141303579e-07, "loss": 0.4426385164260864, "step": 7204 }, { "epoch": 1.6609798270893372, "grad_norm": 1.4622497310054086, "learning_rate": 1.5331925945104585e-07, "loss": 0.4058944582939148, "step": 7205 }, { "epoch": 1.6612103746397695, "grad_norm": 1.4412439963389565, "learning_rate": 1.531165006124161e-07, "loss": 0.46442219614982605, "step": 7206 }, { "epoch": 1.6614409221902018, "grad_norm": 1.8998885504324288, "learning_rate": 1.5291386481714917e-07, "loss": 0.44008395075798035, "step": 7207 }, { "epoch": 1.661671469740634, "grad_norm": 1.6165431540422157, "learning_rate": 1.5271135209468545e-07, "loss": 0.4646702706813812, "step": 7208 }, { "epoch": 1.6619020172910663, "grad_norm": 1.6141720205477035, "learning_rate": 1.5250896247444833e-07, "loss": 0.5206056833267212, "step": 7209 }, { "epoch": 1.6621325648414986, "grad_norm": 1.5917839537896612, "learning_rate": 1.5230669598584266e-07, "loss": 0.43179526925086975, "step": 7210 }, { "epoch": 1.662363112391931, "grad_norm": 1.3678375311854771, "learning_rate": 1.521045526582554e-07, "loss": 0.41198521852493286, "step": 7211 }, { "epoch": 1.6625936599423632, "grad_norm": 1.6500020374603228, "learning_rate": 1.5190253252105624e-07, "loss": 0.542406439781189, "step": 7212 }, { "epoch": 1.6628242074927955, "grad_norm": 1.858718160982589, "learning_rate": 1.517006356035967e-07, "loss": 0.46104592084884644, "step": 7213 }, { "epoch": 1.6630547550432278, "grad_norm": 1.519964730065185, "learning_rate": 1.514988619352099e-07, "loss": 0.39294254779815674, "step": 7214 }, { "epoch": 1.66328530259366, "grad_norm": 1.7134471062991041, "learning_rate": 1.512972115452119e-07, "loss": 0.5588621497154236, "step": 7215 }, { "epoch": 1.6635158501440923, "grad_norm": 1.8550173716299083, "learning_rate": 1.510956844629002e-07, "loss": 0.5034887790679932, "step": 7216 }, { "epoch": 1.6637463976945246, "grad_norm": 1.802824758529772, "learning_rate": 1.508942807175544e-07, "loss": 0.503799319267273, "step": 7217 }, { "epoch": 1.6639769452449569, "grad_norm": 1.765440871146946, "learning_rate": 1.5069300033843668e-07, "loss": 0.465304970741272, "step": 7218 }, { "epoch": 1.6642074927953892, "grad_norm": 1.9150024875802376, "learning_rate": 1.5049184335479072e-07, "loss": 0.47217413783073425, "step": 7219 }, { "epoch": 1.6644380403458214, "grad_norm": 1.6818734317510342, "learning_rate": 1.5029080979584275e-07, "loss": 0.4457072615623474, "step": 7220 }, { "epoch": 1.6646685878962537, "grad_norm": 1.5435205402521186, "learning_rate": 1.5008989969080065e-07, "loss": 0.43352675437927246, "step": 7221 }, { "epoch": 1.664899135446686, "grad_norm": 1.400426360312227, "learning_rate": 1.4988911306885487e-07, "loss": 0.4387536644935608, "step": 7222 }, { "epoch": 1.6651296829971183, "grad_norm": 1.7947162267476848, "learning_rate": 1.496884499591774e-07, "loss": 0.39572250843048096, "step": 7223 }, { "epoch": 1.6653602305475506, "grad_norm": 1.4532491521694952, "learning_rate": 1.4948791039092234e-07, "loss": 0.4870808720588684, "step": 7224 }, { "epoch": 1.6655907780979828, "grad_norm": 1.967784988941882, "learning_rate": 1.4928749439322618e-07, "loss": 0.4750326871871948, "step": 7225 }, { "epoch": 1.6658213256484151, "grad_norm": 1.733380464244531, "learning_rate": 1.4908720199520763e-07, "loss": 0.4600903391838074, "step": 7226 }, { "epoch": 1.6660518731988474, "grad_norm": 1.845212682331216, "learning_rate": 1.488870332259664e-07, "loss": 0.5157172679901123, "step": 7227 }, { "epoch": 1.6662824207492797, "grad_norm": 1.527717288320904, "learning_rate": 1.4868698811458558e-07, "loss": 0.5109579563140869, "step": 7228 }, { "epoch": 1.666512968299712, "grad_norm": 1.5715781586375175, "learning_rate": 1.4848706669012933e-07, "loss": 0.5458623766899109, "step": 7229 }, { "epoch": 1.6667435158501442, "grad_norm": 1.559742556790004, "learning_rate": 1.48287268981644e-07, "loss": 0.4763823449611664, "step": 7230 }, { "epoch": 1.6669740634005765, "grad_norm": 1.474661322860322, "learning_rate": 1.480875950181585e-07, "loss": 0.46346691250801086, "step": 7231 }, { "epoch": 1.6672046109510088, "grad_norm": 1.8805501138397551, "learning_rate": 1.4788804482868289e-07, "loss": 0.5442988872528076, "step": 7232 }, { "epoch": 1.667435158501441, "grad_norm": 1.5922849196085607, "learning_rate": 1.4768861844221002e-07, "loss": 0.508071780204773, "step": 7233 }, { "epoch": 1.6676657060518734, "grad_norm": 2.044136469779381, "learning_rate": 1.4748931588771484e-07, "loss": 0.607035756111145, "step": 7234 }, { "epoch": 1.6678962536023054, "grad_norm": 2.56236340259614, "learning_rate": 1.4729013719415352e-07, "loss": 0.6532429456710815, "step": 7235 }, { "epoch": 1.6681268011527377, "grad_norm": 1.659139984651841, "learning_rate": 1.4709108239046465e-07, "loss": 0.49710947275161743, "step": 7236 }, { "epoch": 1.66835734870317, "grad_norm": 2.3059474306371293, "learning_rate": 1.4689215150556856e-07, "loss": 0.5243515968322754, "step": 7237 }, { "epoch": 1.6685878962536023, "grad_norm": 2.0007736669763516, "learning_rate": 1.4669334456836825e-07, "loss": 0.5056744813919067, "step": 7238 }, { "epoch": 1.6688184438040345, "grad_norm": 1.7502562249309883, "learning_rate": 1.4649466160774847e-07, "loss": 0.43398773670196533, "step": 7239 }, { "epoch": 1.6690489913544668, "grad_norm": 1.5465503969894825, "learning_rate": 1.462961026525752e-07, "loss": 0.5139761567115784, "step": 7240 }, { "epoch": 1.669279538904899, "grad_norm": 1.6696867508957232, "learning_rate": 1.4609766773169763e-07, "loss": 0.4375327229499817, "step": 7241 }, { "epoch": 1.6695100864553314, "grad_norm": 1.5816533801320478, "learning_rate": 1.4589935687394593e-07, "loss": 0.4920062720775604, "step": 7242 }, { "epoch": 1.6697406340057637, "grad_norm": 1.7465659879703033, "learning_rate": 1.4570117010813243e-07, "loss": 0.47602343559265137, "step": 7243 }, { "epoch": 1.669971181556196, "grad_norm": 1.6609975329131823, "learning_rate": 1.4550310746305194e-07, "loss": 0.4663471579551697, "step": 7244 }, { "epoch": 1.6702017291066282, "grad_norm": 1.825000722601049, "learning_rate": 1.4530516896748068e-07, "loss": 0.4032224416732788, "step": 7245 }, { "epoch": 1.6704322766570605, "grad_norm": 1.6515038226392735, "learning_rate": 1.4510735465017708e-07, "loss": 0.3822782635688782, "step": 7246 }, { "epoch": 1.6706628242074928, "grad_norm": 1.7342878908697361, "learning_rate": 1.4490966453988185e-07, "loss": 0.4859994649887085, "step": 7247 }, { "epoch": 1.670893371757925, "grad_norm": 1.8492242805033283, "learning_rate": 1.4471209866531708e-07, "loss": 0.5374040007591248, "step": 7248 }, { "epoch": 1.6711239193083574, "grad_norm": 1.6953840635476194, "learning_rate": 1.4451465705518663e-07, "loss": 0.48769307136535645, "step": 7249 }, { "epoch": 1.6713544668587896, "grad_norm": 1.6109119052274135, "learning_rate": 1.443173397381774e-07, "loss": 0.4628336429595947, "step": 7250 }, { "epoch": 1.671585014409222, "grad_norm": 1.6751219673185227, "learning_rate": 1.4412014674295703e-07, "loss": 0.5241574645042419, "step": 7251 }, { "epoch": 1.6718155619596542, "grad_norm": 1.6820438668557176, "learning_rate": 1.4392307809817594e-07, "loss": 0.4850649833679199, "step": 7252 }, { "epoch": 1.6720461095100865, "grad_norm": 1.4736638522112695, "learning_rate": 1.4372613383246579e-07, "loss": 0.4610109329223633, "step": 7253 }, { "epoch": 1.6722766570605188, "grad_norm": 1.8781850963576057, "learning_rate": 1.43529313974441e-07, "loss": 0.422643780708313, "step": 7254 }, { "epoch": 1.672507204610951, "grad_norm": 1.3707324521464075, "learning_rate": 1.4333261855269717e-07, "loss": 0.4568919241428375, "step": 7255 }, { "epoch": 1.6727377521613833, "grad_norm": 1.5655308869863263, "learning_rate": 1.43136047595812e-07, "loss": 0.4420914649963379, "step": 7256 }, { "epoch": 1.6729682997118156, "grad_norm": 1.809187677501354, "learning_rate": 1.4293960113234526e-07, "loss": 0.531182587146759, "step": 7257 }, { "epoch": 1.6731988472622479, "grad_norm": 1.588312906840417, "learning_rate": 1.4274327919083883e-07, "loss": 0.4499055743217468, "step": 7258 }, { "epoch": 1.6734293948126802, "grad_norm": 1.4474121027649385, "learning_rate": 1.425470817998159e-07, "loss": 0.4007442593574524, "step": 7259 }, { "epoch": 1.6736599423631124, "grad_norm": 1.328708907630177, "learning_rate": 1.423510089877823e-07, "loss": 0.38373956084251404, "step": 7260 }, { "epoch": 1.6738904899135447, "grad_norm": 1.565700725727342, "learning_rate": 1.4215506078322513e-07, "loss": 0.5179092884063721, "step": 7261 }, { "epoch": 1.6741210374639768, "grad_norm": 2.1636236089677094, "learning_rate": 1.4195923721461345e-07, "loss": 0.4818217158317566, "step": 7262 }, { "epoch": 1.674351585014409, "grad_norm": 1.405414242668412, "learning_rate": 1.417635383103989e-07, "loss": 0.4114675521850586, "step": 7263 }, { "epoch": 1.6745821325648413, "grad_norm": 1.864769627750043, "learning_rate": 1.4156796409901383e-07, "loss": 0.4796205163002014, "step": 7264 }, { "epoch": 1.6748126801152736, "grad_norm": 1.6488410041287704, "learning_rate": 1.4137251460887366e-07, "loss": 0.5158804655075073, "step": 7265 }, { "epoch": 1.675043227665706, "grad_norm": 1.7213812053974584, "learning_rate": 1.4117718986837491e-07, "loss": 0.5137546062469482, "step": 7266 }, { "epoch": 1.6752737752161382, "grad_norm": 1.4792265893063055, "learning_rate": 1.409819899058965e-07, "loss": 0.48155319690704346, "step": 7267 }, { "epoch": 1.6755043227665705, "grad_norm": 2.1364016007887234, "learning_rate": 1.4078691474979865e-07, "loss": 0.5631832480430603, "step": 7268 }, { "epoch": 1.6757348703170027, "grad_norm": 1.4486975390027959, "learning_rate": 1.405919644284238e-07, "loss": 0.37383341789245605, "step": 7269 }, { "epoch": 1.675965417867435, "grad_norm": 1.5115227541499854, "learning_rate": 1.403971389700962e-07, "loss": 0.4356718361377716, "step": 7270 }, { "epoch": 1.6761959654178673, "grad_norm": 1.7254792983162555, "learning_rate": 1.402024384031223e-07, "loss": 0.5697557926177979, "step": 7271 }, { "epoch": 1.6764265129682996, "grad_norm": 1.8330890314003314, "learning_rate": 1.4000786275578957e-07, "loss": 0.38880759477615356, "step": 7272 }, { "epoch": 1.6766570605187319, "grad_norm": 1.5828613232595852, "learning_rate": 1.398134120563682e-07, "loss": 0.40824171900749207, "step": 7273 }, { "epoch": 1.6768876080691641, "grad_norm": 1.8426743516757296, "learning_rate": 1.396190863331098e-07, "loss": 0.45844566822052, "step": 7274 }, { "epoch": 1.6771181556195964, "grad_norm": 1.7101458464470949, "learning_rate": 1.394248856142476e-07, "loss": 0.4732695519924164, "step": 7275 }, { "epoch": 1.6773487031700287, "grad_norm": 1.5405894803577098, "learning_rate": 1.3923080992799729e-07, "loss": 0.45811837911605835, "step": 7276 }, { "epoch": 1.677579250720461, "grad_norm": 1.4406228369333338, "learning_rate": 1.3903685930255572e-07, "loss": 0.46207255125045776, "step": 7277 }, { "epoch": 1.6778097982708933, "grad_norm": 1.4671120213207756, "learning_rate": 1.3884303376610195e-07, "loss": 0.4856521487236023, "step": 7278 }, { "epoch": 1.6780403458213256, "grad_norm": 1.5200443209372343, "learning_rate": 1.386493333467973e-07, "loss": 0.4625289738178253, "step": 7279 }, { "epoch": 1.6782708933717578, "grad_norm": 1.5447587816062573, "learning_rate": 1.3845575807278398e-07, "loss": 0.4227305054664612, "step": 7280 }, { "epoch": 1.6785014409221901, "grad_norm": 1.6564382894986278, "learning_rate": 1.3826230797218664e-07, "loss": 0.40824317932128906, "step": 7281 }, { "epoch": 1.6787319884726224, "grad_norm": 1.4249612038198227, "learning_rate": 1.380689830731112e-07, "loss": 0.4376741647720337, "step": 7282 }, { "epoch": 1.6789625360230547, "grad_norm": 1.4853225121555564, "learning_rate": 1.3787578340364602e-07, "loss": 0.4159294366836548, "step": 7283 }, { "epoch": 1.679193083573487, "grad_norm": 1.6921340878324538, "learning_rate": 1.3768270899186118e-07, "loss": 0.44097238779067993, "step": 7284 }, { "epoch": 1.6794236311239192, "grad_norm": 1.7542650919820308, "learning_rate": 1.37489759865808e-07, "loss": 0.5287643074989319, "step": 7285 }, { "epoch": 1.6796541786743515, "grad_norm": 1.6980963761970458, "learning_rate": 1.3729693605352054e-07, "loss": 0.5027199983596802, "step": 7286 }, { "epoch": 1.6798847262247838, "grad_norm": 1.4467586071513, "learning_rate": 1.371042375830137e-07, "loss": 0.38447409868240356, "step": 7287 }, { "epoch": 1.680115273775216, "grad_norm": 1.4823170991132026, "learning_rate": 1.369116644822843e-07, "loss": 0.32439717650413513, "step": 7288 }, { "epoch": 1.6803458213256484, "grad_norm": 1.6304048881341835, "learning_rate": 1.3671921677931185e-07, "loss": 0.466668963432312, "step": 7289 }, { "epoch": 1.6805763688760806, "grad_norm": 1.5670479284811365, "learning_rate": 1.3652689450205633e-07, "loss": 0.3803076446056366, "step": 7290 }, { "epoch": 1.680806916426513, "grad_norm": 1.5423068826131208, "learning_rate": 1.3633469767846063e-07, "loss": 0.5098183155059814, "step": 7291 }, { "epoch": 1.6810374639769452, "grad_norm": 1.9666029015445945, "learning_rate": 1.3614262633644903e-07, "loss": 0.4866775870323181, "step": 7292 }, { "epoch": 1.6812680115273775, "grad_norm": 1.5218473916717437, "learning_rate": 1.3595068050392722e-07, "loss": 0.4426755905151367, "step": 7293 }, { "epoch": 1.6814985590778098, "grad_norm": 1.6763772291665047, "learning_rate": 1.3575886020878291e-07, "loss": 0.49981606006622314, "step": 7294 }, { "epoch": 1.681729106628242, "grad_norm": 1.7060577764824782, "learning_rate": 1.355671654788858e-07, "loss": 0.47976410388946533, "step": 7295 }, { "epoch": 1.6819596541786743, "grad_norm": 1.721079690327414, "learning_rate": 1.3537559634208683e-07, "loss": 0.49790793657302856, "step": 7296 }, { "epoch": 1.6821902017291066, "grad_norm": 1.790367918060309, "learning_rate": 1.351841528262194e-07, "loss": 0.48854726552963257, "step": 7297 }, { "epoch": 1.6824207492795389, "grad_norm": 1.6106089496589149, "learning_rate": 1.3499283495909784e-07, "loss": 0.46680933237075806, "step": 7298 }, { "epoch": 1.6826512968299712, "grad_norm": 1.913402875215736, "learning_rate": 1.3480164276851923e-07, "loss": 0.4013046324253082, "step": 7299 }, { "epoch": 1.6828818443804034, "grad_norm": 1.445836567637905, "learning_rate": 1.3461057628226135e-07, "loss": 0.5207708477973938, "step": 7300 }, { "epoch": 1.6831123919308357, "grad_norm": 1.5618988740075492, "learning_rate": 1.34419635528084e-07, "loss": 0.42747941613197327, "step": 7301 }, { "epoch": 1.683342939481268, "grad_norm": 2.258248985065798, "learning_rate": 1.3422882053372918e-07, "loss": 0.46138545870780945, "step": 7302 }, { "epoch": 1.6835734870317003, "grad_norm": 1.607848950586652, "learning_rate": 1.3403813132692054e-07, "loss": 0.5034617185592651, "step": 7303 }, { "epoch": 1.6838040345821326, "grad_norm": 1.7549778043587798, "learning_rate": 1.3384756793536277e-07, "loss": 0.5109648704528809, "step": 7304 }, { "epoch": 1.6840345821325649, "grad_norm": 1.6683673761165936, "learning_rate": 1.3365713038674342e-07, "loss": 0.5048235654830933, "step": 7305 }, { "epoch": 1.6842651296829971, "grad_norm": 1.671440552476867, "learning_rate": 1.3346681870873022e-07, "loss": 0.46366703510284424, "step": 7306 }, { "epoch": 1.6844956772334294, "grad_norm": 1.8814222847361726, "learning_rate": 1.3327663292897385e-07, "loss": 0.5473049283027649, "step": 7307 }, { "epoch": 1.6847262247838617, "grad_norm": 1.5517081701402915, "learning_rate": 1.3308657307510662e-07, "loss": 0.4539650082588196, "step": 7308 }, { "epoch": 1.684956772334294, "grad_norm": 1.6894656634552743, "learning_rate": 1.328966391747418e-07, "loss": 0.43108680844306946, "step": 7309 }, { "epoch": 1.6851873198847263, "grad_norm": 1.9974250241260127, "learning_rate": 1.3270683125547522e-07, "loss": 0.573739767074585, "step": 7310 }, { "epoch": 1.6854178674351585, "grad_norm": 2.037790233442197, "learning_rate": 1.3251714934488368e-07, "loss": 0.405525803565979, "step": 7311 }, { "epoch": 1.6856484149855908, "grad_norm": 1.765339357509686, "learning_rate": 1.3232759347052603e-07, "loss": 0.5189083814620972, "step": 7312 }, { "epoch": 1.685878962536023, "grad_norm": 1.806012516151003, "learning_rate": 1.32138163659943e-07, "loss": 0.47043824195861816, "step": 7313 }, { "epoch": 1.6861095100864554, "grad_norm": 1.5219455074379382, "learning_rate": 1.319488599406563e-07, "loss": 0.5212691426277161, "step": 7314 }, { "epoch": 1.6863400576368877, "grad_norm": 1.9352097714905825, "learning_rate": 1.317596823401702e-07, "loss": 0.44503504037857056, "step": 7315 }, { "epoch": 1.68657060518732, "grad_norm": 1.8653042666003878, "learning_rate": 1.3157063088597033e-07, "loss": 0.486750066280365, "step": 7316 }, { "epoch": 1.6868011527377522, "grad_norm": 1.6466316943983774, "learning_rate": 1.3138170560552365e-07, "loss": 0.436980664730072, "step": 7317 }, { "epoch": 1.6870317002881845, "grad_norm": 1.4906620727560134, "learning_rate": 1.3119290652627912e-07, "loss": 0.4514414072036743, "step": 7318 }, { "epoch": 1.6872622478386168, "grad_norm": 1.594310471174457, "learning_rate": 1.3100423367566704e-07, "loss": 0.48360395431518555, "step": 7319 }, { "epoch": 1.687492795389049, "grad_norm": 1.8385147655691183, "learning_rate": 1.308156870810999e-07, "loss": 0.5205049514770508, "step": 7320 }, { "epoch": 1.6877233429394813, "grad_norm": 1.6713700205301076, "learning_rate": 1.306272667699716e-07, "loss": 0.4396322965621948, "step": 7321 }, { "epoch": 1.6879538904899136, "grad_norm": 1.6747688199386033, "learning_rate": 1.304389727696573e-07, "loss": 0.42613643407821655, "step": 7322 }, { "epoch": 1.688184438040346, "grad_norm": 1.6965469885346547, "learning_rate": 1.3025080510751463e-07, "loss": 0.3865918815135956, "step": 7323 }, { "epoch": 1.6884149855907782, "grad_norm": 1.9903164455925761, "learning_rate": 1.3006276381088222e-07, "loss": 0.5589674711227417, "step": 7324 }, { "epoch": 1.6886455331412105, "grad_norm": 1.6749318244122438, "learning_rate": 1.2987484890708022e-07, "loss": 0.4480137228965759, "step": 7325 }, { "epoch": 1.6888760806916427, "grad_norm": 1.6726911081582678, "learning_rate": 1.2968706042341114e-07, "loss": 0.46543216705322266, "step": 7326 }, { "epoch": 1.689106628242075, "grad_norm": 1.7581803186716298, "learning_rate": 1.2949939838715827e-07, "loss": 0.4383571743965149, "step": 7327 }, { "epoch": 1.6893371757925073, "grad_norm": 1.6177869416016535, "learning_rate": 1.2931186282558715e-07, "loss": 0.47900426387786865, "step": 7328 }, { "epoch": 1.6895677233429396, "grad_norm": 1.821208964293862, "learning_rate": 1.2912445376594504e-07, "loss": 0.5839447975158691, "step": 7329 }, { "epoch": 1.6897982708933719, "grad_norm": 1.3312817497628286, "learning_rate": 1.2893717123546023e-07, "loss": 0.5179777145385742, "step": 7330 }, { "epoch": 1.6900288184438041, "grad_norm": 1.8984725283324497, "learning_rate": 1.2875001526134266e-07, "loss": 0.4351516366004944, "step": 7331 }, { "epoch": 1.6902593659942364, "grad_norm": 1.917939401942737, "learning_rate": 1.2856298587078474e-07, "loss": 0.48052316904067993, "step": 7332 }, { "epoch": 1.6904899135446687, "grad_norm": 2.027779365017344, "learning_rate": 1.2837608309095937e-07, "loss": 0.5767349600791931, "step": 7333 }, { "epoch": 1.690720461095101, "grad_norm": 1.5008580056938157, "learning_rate": 1.2818930694902208e-07, "loss": 0.4722314774990082, "step": 7334 }, { "epoch": 1.6909510086455333, "grad_norm": 1.7280315705596727, "learning_rate": 1.280026574721089e-07, "loss": 0.472305029630661, "step": 7335 }, { "epoch": 1.6911815561959656, "grad_norm": 1.7438045229147328, "learning_rate": 1.2781613468733864e-07, "loss": 0.5404185652732849, "step": 7336 }, { "epoch": 1.6914121037463978, "grad_norm": 1.5394991269331093, "learning_rate": 1.2762973862181092e-07, "loss": 0.4667291045188904, "step": 7337 }, { "epoch": 1.6916426512968301, "grad_norm": 1.538686442787995, "learning_rate": 1.2744346930260685e-07, "loss": 0.4928268492221832, "step": 7338 }, { "epoch": 1.6918731988472624, "grad_norm": 1.7770397518440189, "learning_rate": 1.2725732675678958e-07, "loss": 0.4096994996070862, "step": 7339 }, { "epoch": 1.6921037463976947, "grad_norm": 1.5070597312201002, "learning_rate": 1.270713110114041e-07, "loss": 0.4654881954193115, "step": 7340 }, { "epoch": 1.692334293948127, "grad_norm": 1.6648905898246225, "learning_rate": 1.2688542209347597e-07, "loss": 0.4741584360599518, "step": 7341 }, { "epoch": 1.6925648414985592, "grad_norm": 2.0538633630714576, "learning_rate": 1.2669966003001342e-07, "loss": 0.48024487495422363, "step": 7342 }, { "epoch": 1.6927953890489915, "grad_norm": 1.7078715634541877, "learning_rate": 1.2651402484800545e-07, "loss": 0.40225690603256226, "step": 7343 }, { "epoch": 1.6930259365994238, "grad_norm": 1.6998853863064503, "learning_rate": 1.263285165744228e-07, "loss": 0.4933784008026123, "step": 7344 }, { "epoch": 1.6932564841498559, "grad_norm": 1.9278324438812642, "learning_rate": 1.2614313523621823e-07, "loss": 0.5119373798370361, "step": 7345 }, { "epoch": 1.6934870317002881, "grad_norm": 1.5071956319462745, "learning_rate": 1.2595788086032545e-07, "loss": 0.45921066403388977, "step": 7346 }, { "epoch": 1.6937175792507204, "grad_norm": 1.8213068226512792, "learning_rate": 1.2577275347366e-07, "loss": 0.4157813489437103, "step": 7347 }, { "epoch": 1.6939481268011527, "grad_norm": 1.6844334121601705, "learning_rate": 1.255877531031193e-07, "loss": 0.47223663330078125, "step": 7348 }, { "epoch": 1.694178674351585, "grad_norm": 1.703336195724066, "learning_rate": 1.2540287977558173e-07, "loss": 0.49459707736968994, "step": 7349 }, { "epoch": 1.6944092219020173, "grad_norm": 1.4956091776340097, "learning_rate": 1.2521813351790756e-07, "loss": 0.36979377269744873, "step": 7350 }, { "epoch": 1.6946397694524495, "grad_norm": 1.48101418059435, "learning_rate": 1.2503351435693809e-07, "loss": 0.5258666276931763, "step": 7351 }, { "epoch": 1.6948703170028818, "grad_norm": 1.7171134118237417, "learning_rate": 1.248490223194969e-07, "loss": 0.48548775911331177, "step": 7352 }, { "epoch": 1.695100864553314, "grad_norm": 1.5260650611402817, "learning_rate": 1.2466465743238908e-07, "loss": 0.49529772996902466, "step": 7353 }, { "epoch": 1.6953314121037464, "grad_norm": 1.4265089916669407, "learning_rate": 1.244804197224003e-07, "loss": 0.525967001914978, "step": 7354 }, { "epoch": 1.6955619596541787, "grad_norm": 1.5675417944673544, "learning_rate": 1.2429630921629886e-07, "loss": 0.45880353450775146, "step": 7355 }, { "epoch": 1.695792507204611, "grad_norm": 1.786310864642647, "learning_rate": 1.24112325940834e-07, "loss": 0.4868921637535095, "step": 7356 }, { "epoch": 1.6960230547550432, "grad_norm": 1.9091573764166652, "learning_rate": 1.239284699227363e-07, "loss": 0.48856431245803833, "step": 7357 }, { "epoch": 1.6962536023054755, "grad_norm": 1.6702490923414848, "learning_rate": 1.2374474118871848e-07, "loss": 0.4778832793235779, "step": 7358 }, { "epoch": 1.6964841498559078, "grad_norm": 1.5686777857090144, "learning_rate": 1.235611397654741e-07, "loss": 0.4598827660083771, "step": 7359 }, { "epoch": 1.69671469740634, "grad_norm": 1.6359168914665654, "learning_rate": 1.2337766567967868e-07, "loss": 0.41662126779556274, "step": 7360 }, { "epoch": 1.6969452449567723, "grad_norm": 1.8557843651768584, "learning_rate": 1.2319431895798937e-07, "loss": 0.5126262903213501, "step": 7361 }, { "epoch": 1.6971757925072046, "grad_norm": 1.8495853092328842, "learning_rate": 1.2301109962704425e-07, "loss": 0.5296661257743835, "step": 7362 }, { "epoch": 1.697406340057637, "grad_norm": 1.5429948348957294, "learning_rate": 1.2282800771346326e-07, "loss": 0.45704740285873413, "step": 7363 }, { "epoch": 1.6976368876080692, "grad_norm": 1.9200104385218533, "learning_rate": 1.2264504324384739e-07, "loss": 0.48733824491500854, "step": 7364 }, { "epoch": 1.6978674351585015, "grad_norm": 1.4498218230085718, "learning_rate": 1.2246220624477988e-07, "loss": 0.499523788690567, "step": 7365 }, { "epoch": 1.6980979827089338, "grad_norm": 1.5061276319908403, "learning_rate": 1.222794967428251e-07, "loss": 0.4288235902786255, "step": 7366 }, { "epoch": 1.698328530259366, "grad_norm": 1.610798127644449, "learning_rate": 1.2209691476452854e-07, "loss": 0.49590837955474854, "step": 7367 }, { "epoch": 1.6985590778097983, "grad_norm": 1.5318629161297386, "learning_rate": 1.2191446033641784e-07, "loss": 0.36183077096939087, "step": 7368 }, { "epoch": 1.6987896253602306, "grad_norm": 1.60727685293895, "learning_rate": 1.2173213348500156e-07, "loss": 0.4574984312057495, "step": 7369 }, { "epoch": 1.6990201729106629, "grad_norm": 1.4620696246719465, "learning_rate": 1.215499342367695e-07, "loss": 0.4039604365825653, "step": 7370 }, { "epoch": 1.6992507204610952, "grad_norm": 1.5626947359485344, "learning_rate": 1.2136786261819398e-07, "loss": 0.46439865231513977, "step": 7371 }, { "epoch": 1.6994812680115272, "grad_norm": 1.6367083291338689, "learning_rate": 1.2118591865572757e-07, "loss": 0.3887529969215393, "step": 7372 }, { "epoch": 1.6997118155619595, "grad_norm": 1.679799527354739, "learning_rate": 1.2100410237580506e-07, "loss": 0.4796936511993408, "step": 7373 }, { "epoch": 1.6999423631123918, "grad_norm": 1.7129464656234383, "learning_rate": 1.208224138048426e-07, "loss": 0.4265397787094116, "step": 7374 }, { "epoch": 1.700172910662824, "grad_norm": 1.3055425525744166, "learning_rate": 1.2064085296923764e-07, "loss": 0.38613706827163696, "step": 7375 }, { "epoch": 1.7004034582132563, "grad_norm": 1.5801772819812159, "learning_rate": 1.2045941989536866e-07, "loss": 0.3781717121601105, "step": 7376 }, { "epoch": 1.7006340057636886, "grad_norm": 1.4615581683312544, "learning_rate": 1.2027811460959646e-07, "loss": 0.3941626250743866, "step": 7377 }, { "epoch": 1.700864553314121, "grad_norm": 1.842751075683814, "learning_rate": 1.2009693713826251e-07, "loss": 0.6087595820426941, "step": 7378 }, { "epoch": 1.7010951008645532, "grad_norm": 1.7553383978846564, "learning_rate": 1.1991588750769033e-07, "loss": 0.45024704933166504, "step": 7379 }, { "epoch": 1.7013256484149855, "grad_norm": 1.6137642064096758, "learning_rate": 1.1973496574418418e-07, "loss": 0.4617878794670105, "step": 7380 }, { "epoch": 1.7015561959654177, "grad_norm": 1.9862996703255327, "learning_rate": 1.1955417187403037e-07, "loss": 0.4676027297973633, "step": 7381 }, { "epoch": 1.70178674351585, "grad_norm": 1.871973148067532, "learning_rate": 1.193735059234965e-07, "loss": 0.4123028516769409, "step": 7382 }, { "epoch": 1.7020172910662823, "grad_norm": 1.6050301624607715, "learning_rate": 1.1919296791883082e-07, "loss": 0.46627044677734375, "step": 7383 }, { "epoch": 1.7022478386167146, "grad_norm": 1.7149279499035175, "learning_rate": 1.1901255788626418e-07, "loss": 0.5274061560630798, "step": 7384 }, { "epoch": 1.7024783861671469, "grad_norm": 1.5052434216982107, "learning_rate": 1.1883227585200839e-07, "loss": 0.4972034990787506, "step": 7385 }, { "epoch": 1.7027089337175791, "grad_norm": 1.9494573631811647, "learning_rate": 1.1865212184225604e-07, "loss": 0.4843828082084656, "step": 7386 }, { "epoch": 1.7029394812680114, "grad_norm": 1.505775210941553, "learning_rate": 1.1847209588318208e-07, "loss": 0.42801034450531006, "step": 7387 }, { "epoch": 1.7031700288184437, "grad_norm": 1.5219550860788174, "learning_rate": 1.1829219800094226e-07, "loss": 0.4895517826080322, "step": 7388 }, { "epoch": 1.703400576368876, "grad_norm": 1.6293329855163652, "learning_rate": 1.1811242822167367e-07, "loss": 0.4011702537536621, "step": 7389 }, { "epoch": 1.7036311239193083, "grad_norm": 1.648771005525557, "learning_rate": 1.179327865714953e-07, "loss": 0.43081313371658325, "step": 7390 }, { "epoch": 1.7038616714697405, "grad_norm": 1.6248501330425935, "learning_rate": 1.1775327307650695e-07, "loss": 0.4731036424636841, "step": 7391 }, { "epoch": 1.7040922190201728, "grad_norm": 1.6823492765317505, "learning_rate": 1.1757388776279043e-07, "loss": 0.3956582546234131, "step": 7392 }, { "epoch": 1.704322766570605, "grad_norm": 2.067253157839649, "learning_rate": 1.1739463065640798e-07, "loss": 0.5273596048355103, "step": 7393 }, { "epoch": 1.7045533141210374, "grad_norm": 1.7700345516037628, "learning_rate": 1.1721550178340445e-07, "loss": 0.4681214392185211, "step": 7394 }, { "epoch": 1.7047838616714697, "grad_norm": 1.5670620820320287, "learning_rate": 1.1703650116980513e-07, "loss": 0.5035468339920044, "step": 7395 }, { "epoch": 1.705014409221902, "grad_norm": 1.5253972732776402, "learning_rate": 1.1685762884161654e-07, "loss": 0.3710506558418274, "step": 7396 }, { "epoch": 1.7052449567723342, "grad_norm": 1.5910552354864504, "learning_rate": 1.1667888482482746e-07, "loss": 0.37030795216560364, "step": 7397 }, { "epoch": 1.7054755043227665, "grad_norm": 1.923678020581632, "learning_rate": 1.1650026914540755e-07, "loss": 0.5114949941635132, "step": 7398 }, { "epoch": 1.7057060518731988, "grad_norm": 1.6569237958778402, "learning_rate": 1.1632178182930751e-07, "loss": 0.3987428545951843, "step": 7399 }, { "epoch": 1.705936599423631, "grad_norm": 1.7268416884810995, "learning_rate": 1.1614342290246004e-07, "loss": 0.45176962018013, "step": 7400 }, { "epoch": 1.7061671469740634, "grad_norm": 1.5994820804443497, "learning_rate": 1.1596519239077863e-07, "loss": 0.4312123656272888, "step": 7401 }, { "epoch": 1.7063976945244956, "grad_norm": 1.5751618334265127, "learning_rate": 1.157870903201581e-07, "loss": 0.5310814380645752, "step": 7402 }, { "epoch": 1.706628242074928, "grad_norm": 1.6328659776924948, "learning_rate": 1.1560911671647534e-07, "loss": 0.47525835037231445, "step": 7403 }, { "epoch": 1.7068587896253602, "grad_norm": 1.5436637705693566, "learning_rate": 1.1543127160558752e-07, "loss": 0.5096621513366699, "step": 7404 }, { "epoch": 1.7070893371757925, "grad_norm": 1.389329550146149, "learning_rate": 1.15253555013334e-07, "loss": 0.3848613500595093, "step": 7405 }, { "epoch": 1.7073198847262248, "grad_norm": 1.7666796743032251, "learning_rate": 1.1507596696553523e-07, "loss": 0.46764057874679565, "step": 7406 }, { "epoch": 1.707550432276657, "grad_norm": 1.4488805676543326, "learning_rate": 1.148985074879928e-07, "loss": 0.4664180278778076, "step": 7407 }, { "epoch": 1.7077809798270893, "grad_norm": 1.817722227071109, "learning_rate": 1.1472117660648973e-07, "loss": 0.4912991523742676, "step": 7408 }, { "epoch": 1.7080115273775216, "grad_norm": 1.565352212690196, "learning_rate": 1.145439743467902e-07, "loss": 0.44789934158325195, "step": 7409 }, { "epoch": 1.7082420749279539, "grad_norm": 1.6748377858979937, "learning_rate": 1.1436690073463984e-07, "loss": 0.4465000629425049, "step": 7410 }, { "epoch": 1.7084726224783862, "grad_norm": 1.6189125297433695, "learning_rate": 1.1418995579576607e-07, "loss": 0.4135594069957733, "step": 7411 }, { "epoch": 1.7087031700288184, "grad_norm": 1.6376644340887752, "learning_rate": 1.1401313955587655e-07, "loss": 0.5296405553817749, "step": 7412 }, { "epoch": 1.7089337175792507, "grad_norm": 1.8368866532400925, "learning_rate": 1.1383645204066127e-07, "loss": 0.42997848987579346, "step": 7413 }, { "epoch": 1.709164265129683, "grad_norm": 1.7018740159985664, "learning_rate": 1.1365989327579106e-07, "loss": 0.3954406976699829, "step": 7414 }, { "epoch": 1.7093948126801153, "grad_norm": 1.573589198376906, "learning_rate": 1.134834632869176e-07, "loss": 0.5098167657852173, "step": 7415 }, { "epoch": 1.7096253602305476, "grad_norm": 1.666317247889276, "learning_rate": 1.1330716209967505e-07, "loss": 0.44079747796058655, "step": 7416 }, { "epoch": 1.7098559077809798, "grad_norm": 1.7228180635145338, "learning_rate": 1.1313098973967738e-07, "loss": 0.4745299220085144, "step": 7417 }, { "epoch": 1.7100864553314121, "grad_norm": 1.7819888998245728, "learning_rate": 1.129549462325211e-07, "loss": 0.586134672164917, "step": 7418 }, { "epoch": 1.7103170028818444, "grad_norm": 1.6308460435680996, "learning_rate": 1.1277903160378377e-07, "loss": 0.453177273273468, "step": 7419 }, { "epoch": 1.7105475504322767, "grad_norm": 1.5209333044759716, "learning_rate": 1.1260324587902314e-07, "loss": 0.4852634370326996, "step": 7420 }, { "epoch": 1.710778097982709, "grad_norm": 1.6132453532504059, "learning_rate": 1.1242758908377959e-07, "loss": 0.44662681221961975, "step": 7421 }, { "epoch": 1.7110086455331412, "grad_norm": 1.856186184886188, "learning_rate": 1.1225206124357412e-07, "loss": 0.4371451735496521, "step": 7422 }, { "epoch": 1.7112391930835735, "grad_norm": 1.7655414830639746, "learning_rate": 1.12076662383909e-07, "loss": 0.46445029973983765, "step": 7423 }, { "epoch": 1.7114697406340058, "grad_norm": 1.8715652362026352, "learning_rate": 1.119013925302682e-07, "loss": 0.4788290858268738, "step": 7424 }, { "epoch": 1.711700288184438, "grad_norm": 1.6675143296503283, "learning_rate": 1.1172625170811634e-07, "loss": 0.44457030296325684, "step": 7425 }, { "epoch": 1.7119308357348704, "grad_norm": 1.5832176674715501, "learning_rate": 1.1155123994289927e-07, "loss": 0.4288104176521301, "step": 7426 }, { "epoch": 1.7121613832853027, "grad_norm": 1.7512263233099108, "learning_rate": 1.1137635726004502e-07, "loss": 0.4504792392253876, "step": 7427 }, { "epoch": 1.712391930835735, "grad_norm": 1.7570403694181211, "learning_rate": 1.1120160368496167e-07, "loss": 0.4845864772796631, "step": 7428 }, { "epoch": 1.7126224783861672, "grad_norm": 1.6506356701568592, "learning_rate": 1.1102697924303928e-07, "loss": 0.5140354633331299, "step": 7429 }, { "epoch": 1.7128530259365995, "grad_norm": 1.8835666301746035, "learning_rate": 1.1085248395964919e-07, "loss": 0.5070383548736572, "step": 7430 }, { "epoch": 1.7130835734870318, "grad_norm": 1.670709235469594, "learning_rate": 1.1067811786014358e-07, "loss": 0.5340418219566345, "step": 7431 }, { "epoch": 1.713314121037464, "grad_norm": 1.7797047375233717, "learning_rate": 1.1050388096985596e-07, "loss": 0.4950510561466217, "step": 7432 }, { "epoch": 1.7135446685878963, "grad_norm": 1.5585786144398537, "learning_rate": 1.1032977331410109e-07, "loss": 0.44572609663009644, "step": 7433 }, { "epoch": 1.7137752161383286, "grad_norm": 1.560749431847804, "learning_rate": 1.1015579491817506e-07, "loss": 0.4561808109283447, "step": 7434 }, { "epoch": 1.714005763688761, "grad_norm": 2.192343641588309, "learning_rate": 1.0998194580735531e-07, "loss": 0.5268326997756958, "step": 7435 }, { "epoch": 1.7142363112391932, "grad_norm": 1.63175270270566, "learning_rate": 1.098082260069001e-07, "loss": 0.42211106419563293, "step": 7436 }, { "epoch": 1.7144668587896255, "grad_norm": 2.3567165239179846, "learning_rate": 1.0963463554204922e-07, "loss": 0.451328307390213, "step": 7437 }, { "epoch": 1.7146974063400577, "grad_norm": 1.6768366495694842, "learning_rate": 1.094611744380236e-07, "loss": 0.4603223204612732, "step": 7438 }, { "epoch": 1.71492795389049, "grad_norm": 1.5061721175015017, "learning_rate": 1.09287842720025e-07, "loss": 0.47026073932647705, "step": 7439 }, { "epoch": 1.7151585014409223, "grad_norm": 1.4706168009476588, "learning_rate": 1.0911464041323715e-07, "loss": 0.4415278434753418, "step": 7440 }, { "epoch": 1.7153890489913546, "grad_norm": 1.64598531774651, "learning_rate": 1.0894156754282424e-07, "loss": 0.4910876750946045, "step": 7441 }, { "epoch": 1.7156195965417869, "grad_norm": 1.6127230404554662, "learning_rate": 1.0876862413393195e-07, "loss": 0.43458497524261475, "step": 7442 }, { "epoch": 1.7158501440922191, "grad_norm": 1.495202383352549, "learning_rate": 1.0859581021168762e-07, "loss": 0.3776114881038666, "step": 7443 }, { "epoch": 1.7160806916426514, "grad_norm": 1.7256092990297747, "learning_rate": 1.0842312580119884e-07, "loss": 0.5010780096054077, "step": 7444 }, { "epoch": 1.7163112391930837, "grad_norm": 1.3668277186923021, "learning_rate": 1.0825057092755507e-07, "loss": 0.39507001638412476, "step": 7445 }, { "epoch": 1.716541786743516, "grad_norm": 1.4987676780901875, "learning_rate": 1.080781456158264e-07, "loss": 0.4180053770542145, "step": 7446 }, { "epoch": 1.7167723342939483, "grad_norm": 2.1314587491759442, "learning_rate": 1.0790584989106467e-07, "loss": 0.47408896684646606, "step": 7447 }, { "epoch": 1.7170028818443805, "grad_norm": 1.5715711040468392, "learning_rate": 1.0773368377830294e-07, "loss": 0.4921650290489197, "step": 7448 }, { "epoch": 1.7172334293948128, "grad_norm": 1.7750258778835253, "learning_rate": 1.0756164730255469e-07, "loss": 0.47502100467681885, "step": 7449 }, { "epoch": 1.717463976945245, "grad_norm": 1.7577426271633303, "learning_rate": 1.0738974048881544e-07, "loss": 0.40656572580337524, "step": 7450 }, { "epoch": 1.7176945244956774, "grad_norm": 1.5162151917678242, "learning_rate": 1.0721796336206124e-07, "loss": 0.5114340782165527, "step": 7451 }, { "epoch": 1.7179250720461097, "grad_norm": 1.6900637058341903, "learning_rate": 1.0704631594724933e-07, "loss": 0.36296752095222473, "step": 7452 }, { "epoch": 1.718155619596542, "grad_norm": 1.7155576773786951, "learning_rate": 1.0687479826931878e-07, "loss": 0.4491519331932068, "step": 7453 }, { "epoch": 1.718386167146974, "grad_norm": 1.4226771137370906, "learning_rate": 1.0670341035318875e-07, "loss": 0.4326419234275818, "step": 7454 }, { "epoch": 1.7186167146974063, "grad_norm": 1.6302999867116295, "learning_rate": 1.0653215222376044e-07, "loss": 0.5220270156860352, "step": 7455 }, { "epoch": 1.7188472622478386, "grad_norm": 1.567650077192639, "learning_rate": 1.0636102390591606e-07, "loss": 0.4059186279773712, "step": 7456 }, { "epoch": 1.7190778097982709, "grad_norm": 1.7354307880622482, "learning_rate": 1.061900254245186e-07, "loss": 0.42291224002838135, "step": 7457 }, { "epoch": 1.7193083573487031, "grad_norm": 1.4894312189644163, "learning_rate": 1.0601915680441209e-07, "loss": 0.43607455492019653, "step": 7458 }, { "epoch": 1.7195389048991354, "grad_norm": 1.9632855044272428, "learning_rate": 1.0584841807042234e-07, "loss": 0.5138526558876038, "step": 7459 }, { "epoch": 1.7197694524495677, "grad_norm": 1.7822906481694845, "learning_rate": 1.0567780924735559e-07, "loss": 0.4020421802997589, "step": 7460 }, { "epoch": 1.72, "grad_norm": 1.623501776252126, "learning_rate": 1.0550733036000004e-07, "loss": 0.49493610858917236, "step": 7461 }, { "epoch": 1.7202305475504323, "grad_norm": 1.581571013878747, "learning_rate": 1.0533698143312386e-07, "loss": 0.47467899322509766, "step": 7462 }, { "epoch": 1.7204610951008645, "grad_norm": 1.8688599536477708, "learning_rate": 1.0516676249147749e-07, "loss": 0.5697565078735352, "step": 7463 }, { "epoch": 1.7206916426512968, "grad_norm": 1.3314748131695917, "learning_rate": 1.0499667355979169e-07, "loss": 0.37515026330947876, "step": 7464 }, { "epoch": 1.720922190201729, "grad_norm": 1.558567814194751, "learning_rate": 1.048267146627786e-07, "loss": 0.4420028328895569, "step": 7465 }, { "epoch": 1.7211527377521614, "grad_norm": 1.7343593017294057, "learning_rate": 1.0465688582513155e-07, "loss": 0.4828314185142517, "step": 7466 }, { "epoch": 1.7213832853025937, "grad_norm": 1.6846810381650634, "learning_rate": 1.0448718707152504e-07, "loss": 0.4600035548210144, "step": 7467 }, { "epoch": 1.721613832853026, "grad_norm": 1.807146642346794, "learning_rate": 1.0431761842661435e-07, "loss": 0.41191548109054565, "step": 7468 }, { "epoch": 1.7218443804034582, "grad_norm": 1.9114706919498279, "learning_rate": 1.0414817991503622e-07, "loss": 0.5425341725349426, "step": 7469 }, { "epoch": 1.7220749279538905, "grad_norm": 1.8729482375195492, "learning_rate": 1.0397887156140816e-07, "loss": 0.487109512090683, "step": 7470 }, { "epoch": 1.7223054755043228, "grad_norm": 1.7803299951951388, "learning_rate": 1.0380969339032886e-07, "loss": 0.4489486515522003, "step": 7471 }, { "epoch": 1.722536023054755, "grad_norm": 1.6945025110814675, "learning_rate": 1.036406454263783e-07, "loss": 0.5018674731254578, "step": 7472 }, { "epoch": 1.7227665706051873, "grad_norm": 2.040381850324315, "learning_rate": 1.0347172769411717e-07, "loss": 0.5236009955406189, "step": 7473 }, { "epoch": 1.7229971181556196, "grad_norm": 1.5273022991430243, "learning_rate": 1.0330294021808761e-07, "loss": 0.4675469994544983, "step": 7474 }, { "epoch": 1.723227665706052, "grad_norm": 1.4423948071468964, "learning_rate": 1.0313428302281279e-07, "loss": 0.46353641152381897, "step": 7475 }, { "epoch": 1.7234582132564842, "grad_norm": 1.5910405062096624, "learning_rate": 1.029657561327969e-07, "loss": 0.44482338428497314, "step": 7476 }, { "epoch": 1.7236887608069165, "grad_norm": 1.8731422268451656, "learning_rate": 1.0279735957252489e-07, "loss": 0.46485304832458496, "step": 7477 }, { "epoch": 1.7239193083573487, "grad_norm": 1.6235316800460735, "learning_rate": 1.0262909336646297e-07, "loss": 0.46616819500923157, "step": 7478 }, { "epoch": 1.724149855907781, "grad_norm": 2.1075896653310897, "learning_rate": 1.0246095753905859e-07, "loss": 0.5326619744300842, "step": 7479 }, { "epoch": 1.7243804034582133, "grad_norm": 1.5629362609682114, "learning_rate": 1.0229295211474031e-07, "loss": 0.4326254725456238, "step": 7480 }, { "epoch": 1.7246109510086456, "grad_norm": 1.6926827697188294, "learning_rate": 1.021250771179173e-07, "loss": 0.4316496253013611, "step": 7481 }, { "epoch": 1.7248414985590776, "grad_norm": 1.5119148511884464, "learning_rate": 1.0195733257298034e-07, "loss": 0.4678090810775757, "step": 7482 }, { "epoch": 1.72507204610951, "grad_norm": 2.0805412539757158, "learning_rate": 1.0178971850430085e-07, "loss": 0.5122083425521851, "step": 7483 }, { "epoch": 1.7253025936599422, "grad_norm": 1.8165420465218807, "learning_rate": 1.0162223493623113e-07, "loss": 0.533470869064331, "step": 7484 }, { "epoch": 1.7255331412103745, "grad_norm": 1.5064649628800673, "learning_rate": 1.0145488189310525e-07, "loss": 0.4170408844947815, "step": 7485 }, { "epoch": 1.7257636887608068, "grad_norm": 1.5295516246279552, "learning_rate": 1.0128765939923745e-07, "loss": 0.43090832233428955, "step": 7486 }, { "epoch": 1.725994236311239, "grad_norm": 1.7285765401539372, "learning_rate": 1.0112056747892361e-07, "loss": 0.41703808307647705, "step": 7487 }, { "epoch": 1.7262247838616713, "grad_norm": 1.5954882562837394, "learning_rate": 1.0095360615644066e-07, "loss": 0.43948811292648315, "step": 7488 }, { "epoch": 1.7264553314121036, "grad_norm": 1.6198432317521159, "learning_rate": 1.0078677545604608e-07, "loss": 0.5152919292449951, "step": 7489 }, { "epoch": 1.726685878962536, "grad_norm": 1.618593763623408, "learning_rate": 1.0062007540197881e-07, "loss": 0.44976603984832764, "step": 7490 }, { "epoch": 1.7269164265129682, "grad_norm": 1.4603076911656099, "learning_rate": 1.0045350601845825e-07, "loss": 0.39342233538627625, "step": 7491 }, { "epoch": 1.7271469740634005, "grad_norm": 1.8025946719805128, "learning_rate": 1.0028706732968551e-07, "loss": 0.5002644658088684, "step": 7492 }, { "epoch": 1.7273775216138327, "grad_norm": 1.4920644612153386, "learning_rate": 1.0012075935984254e-07, "loss": 0.4744255840778351, "step": 7493 }, { "epoch": 1.727608069164265, "grad_norm": 1.7250863140843284, "learning_rate": 9.995458213309183e-08, "loss": 0.5364977717399597, "step": 7494 }, { "epoch": 1.7278386167146973, "grad_norm": 1.5826177519538227, "learning_rate": 9.978853567357748e-08, "loss": 0.41442006826400757, "step": 7495 }, { "epoch": 1.7280691642651296, "grad_norm": 1.8900488961036888, "learning_rate": 9.96226200054242e-08, "loss": 0.48307210206985474, "step": 7496 }, { "epoch": 1.7282997118155619, "grad_norm": 1.377997456460788, "learning_rate": 9.945683515273762e-08, "loss": 0.42677950859069824, "step": 7497 }, { "epoch": 1.7285302593659941, "grad_norm": 1.6897651930501534, "learning_rate": 9.929118113960488e-08, "loss": 0.4564162790775299, "step": 7498 }, { "epoch": 1.7287608069164264, "grad_norm": 1.637171531739475, "learning_rate": 9.912565799009342e-08, "loss": 0.5233185887336731, "step": 7499 }, { "epoch": 1.7289913544668587, "grad_norm": 1.613782917482401, "learning_rate": 9.896026572825233e-08, "loss": 0.45117032527923584, "step": 7500 }, { "epoch": 1.729221902017291, "grad_norm": 1.6941256160277793, "learning_rate": 9.879500437811139e-08, "loss": 0.40787798166275024, "step": 7501 }, { "epoch": 1.7294524495677233, "grad_norm": 1.7574036134910922, "learning_rate": 9.862987396368138e-08, "loss": 0.49555718898773193, "step": 7502 }, { "epoch": 1.7296829971181555, "grad_norm": 1.411079526679828, "learning_rate": 9.846487450895357e-08, "loss": 0.4298393726348877, "step": 7503 }, { "epoch": 1.7299135446685878, "grad_norm": 1.5124401855552176, "learning_rate": 9.830000603790134e-08, "loss": 0.444818913936615, "step": 7504 }, { "epoch": 1.73014409221902, "grad_norm": 1.8237459278593453, "learning_rate": 9.813526857447785e-08, "loss": 0.5020506381988525, "step": 7505 }, { "epoch": 1.7303746397694524, "grad_norm": 1.608208641626091, "learning_rate": 9.797066214261806e-08, "loss": 0.48920828104019165, "step": 7506 }, { "epoch": 1.7306051873198847, "grad_norm": 1.7777996714125537, "learning_rate": 9.78061867662372e-08, "loss": 0.47981560230255127, "step": 7507 }, { "epoch": 1.730835734870317, "grad_norm": 1.7932378217487064, "learning_rate": 9.764184246923235e-08, "loss": 0.43281418085098267, "step": 7508 }, { "epoch": 1.7310662824207492, "grad_norm": 1.5295134527039604, "learning_rate": 9.747762927548064e-08, "loss": 0.5252255797386169, "step": 7509 }, { "epoch": 1.7312968299711815, "grad_norm": 1.542548175322286, "learning_rate": 9.731354720884056e-08, "loss": 0.3210105299949646, "step": 7510 }, { "epoch": 1.7315273775216138, "grad_norm": 1.6116337463422301, "learning_rate": 9.714959629315156e-08, "loss": 0.3888552784919739, "step": 7511 }, { "epoch": 1.731757925072046, "grad_norm": 1.946842581393702, "learning_rate": 9.698577655223427e-08, "loss": 0.4745385944843292, "step": 7512 }, { "epoch": 1.7319884726224783, "grad_norm": 1.489733706159224, "learning_rate": 9.682208800988955e-08, "loss": 0.49115753173828125, "step": 7513 }, { "epoch": 1.7322190201729106, "grad_norm": 1.7715639242385375, "learning_rate": 9.665853068990005e-08, "loss": 0.5387924909591675, "step": 7514 }, { "epoch": 1.732449567723343, "grad_norm": 1.525187821805336, "learning_rate": 9.649510461602884e-08, "loss": 0.524321436882019, "step": 7515 }, { "epoch": 1.7326801152737752, "grad_norm": 1.5860436064187413, "learning_rate": 9.633180981201972e-08, "loss": 0.4536818861961365, "step": 7516 }, { "epoch": 1.7329106628242075, "grad_norm": 1.5337023749780843, "learning_rate": 9.616864630159816e-08, "loss": 0.42552828788757324, "step": 7517 }, { "epoch": 1.7331412103746398, "grad_norm": 2.0073070998312628, "learning_rate": 9.600561410846963e-08, "loss": 0.4798361659049988, "step": 7518 }, { "epoch": 1.733371757925072, "grad_norm": 1.5754383687584421, "learning_rate": 9.584271325632143e-08, "loss": 0.46831148862838745, "step": 7519 }, { "epoch": 1.7336023054755043, "grad_norm": 1.4214145640785232, "learning_rate": 9.567994376882138e-08, "loss": 0.45275694131851196, "step": 7520 }, { "epoch": 1.7338328530259366, "grad_norm": 1.5148035503433661, "learning_rate": 9.551730566961802e-08, "loss": 0.4453716278076172, "step": 7521 }, { "epoch": 1.7340634005763689, "grad_norm": 1.721382063454988, "learning_rate": 9.535479898234112e-08, "loss": 0.4417135715484619, "step": 7522 }, { "epoch": 1.7342939481268012, "grad_norm": 1.2708031102992778, "learning_rate": 9.519242373060077e-08, "loss": 0.3882251977920532, "step": 7523 }, { "epoch": 1.7345244956772334, "grad_norm": 1.6303128159977553, "learning_rate": 9.503017993798879e-08, "loss": 0.44697415828704834, "step": 7524 }, { "epoch": 1.7347550432276657, "grad_norm": 1.4940295737615545, "learning_rate": 9.486806762807753e-08, "loss": 0.453482985496521, "step": 7525 }, { "epoch": 1.734985590778098, "grad_norm": 1.3459156592909483, "learning_rate": 9.470608682442005e-08, "loss": 0.4285128712654114, "step": 7526 }, { "epoch": 1.7352161383285303, "grad_norm": 1.569173733211583, "learning_rate": 9.454423755055097e-08, "loss": 0.5039681196212769, "step": 7527 }, { "epoch": 1.7354466858789626, "grad_norm": 1.545842201869689, "learning_rate": 9.438251982998446e-08, "loss": 0.47230884432792664, "step": 7528 }, { "epoch": 1.7356772334293948, "grad_norm": 1.6050836653007727, "learning_rate": 9.422093368621697e-08, "loss": 0.4544826149940491, "step": 7529 }, { "epoch": 1.7359077809798271, "grad_norm": 1.742414138608038, "learning_rate": 9.405947914272528e-08, "loss": 0.5290546417236328, "step": 7530 }, { "epoch": 1.7361383285302594, "grad_norm": 1.7746094817380647, "learning_rate": 9.389815622296682e-08, "loss": 0.4567055106163025, "step": 7531 }, { "epoch": 1.7363688760806917, "grad_norm": 1.4581374638124471, "learning_rate": 9.37369649503802e-08, "loss": 0.4331890642642975, "step": 7532 }, { "epoch": 1.736599423631124, "grad_norm": 1.2487963100679016, "learning_rate": 9.357590534838533e-08, "loss": 0.436681866645813, "step": 7533 }, { "epoch": 1.7368299711815562, "grad_norm": 1.6198245450897364, "learning_rate": 9.341497744038174e-08, "loss": 0.338731586933136, "step": 7534 }, { "epoch": 1.7370605187319885, "grad_norm": 1.508881034011893, "learning_rate": 9.325418124975104e-08, "loss": 0.4191438555717468, "step": 7535 }, { "epoch": 1.7372910662824208, "grad_norm": 1.8780313553460073, "learning_rate": 9.309351679985488e-08, "loss": 0.48121005296707153, "step": 7536 }, { "epoch": 1.737521613832853, "grad_norm": 1.6253985288606565, "learning_rate": 9.293298411403649e-08, "loss": 0.4818571209907532, "step": 7537 }, { "epoch": 1.7377521613832854, "grad_norm": 1.690304719427989, "learning_rate": 9.277258321561953e-08, "loss": 0.5245034694671631, "step": 7538 }, { "epoch": 1.7379827089337176, "grad_norm": 1.6355032908600926, "learning_rate": 9.261231412790871e-08, "loss": 0.34384316205978394, "step": 7539 }, { "epoch": 1.73821325648415, "grad_norm": 1.6274411603144268, "learning_rate": 9.245217687418893e-08, "loss": 0.5642977356910706, "step": 7540 }, { "epoch": 1.7384438040345822, "grad_norm": 1.7735493838829446, "learning_rate": 9.229217147772706e-08, "loss": 0.42372941970825195, "step": 7541 }, { "epoch": 1.7386743515850145, "grad_norm": 1.4194954678009137, "learning_rate": 9.21322979617698e-08, "loss": 0.45389068126678467, "step": 7542 }, { "epoch": 1.7389048991354468, "grad_norm": 1.9956141977502952, "learning_rate": 9.197255634954549e-08, "loss": 0.4558347463607788, "step": 7543 }, { "epoch": 1.739135446685879, "grad_norm": 1.518636010257655, "learning_rate": 9.181294666426242e-08, "loss": 0.39747804403305054, "step": 7544 }, { "epoch": 1.7393659942363113, "grad_norm": 1.4376186670875364, "learning_rate": 9.165346892911086e-08, "loss": 0.4366666376590729, "step": 7545 }, { "epoch": 1.7395965417867436, "grad_norm": 1.6808343438571287, "learning_rate": 9.14941231672608e-08, "loss": 0.46908068656921387, "step": 7546 }, { "epoch": 1.739827089337176, "grad_norm": 1.6446423333356077, "learning_rate": 9.133490940186362e-08, "loss": 0.4479343891143799, "step": 7547 }, { "epoch": 1.7400576368876082, "grad_norm": 1.7651113248837644, "learning_rate": 9.117582765605125e-08, "loss": 0.5398527383804321, "step": 7548 }, { "epoch": 1.7402881844380405, "grad_norm": 1.7495160233878475, "learning_rate": 9.101687795293711e-08, "loss": 0.5177565217018127, "step": 7549 }, { "epoch": 1.7405187319884727, "grad_norm": 1.7544400731280996, "learning_rate": 9.085806031561449e-08, "loss": 0.4867921471595764, "step": 7550 }, { "epoch": 1.740749279538905, "grad_norm": 1.8079319879826028, "learning_rate": 9.069937476715817e-08, "loss": 0.4968165159225464, "step": 7551 }, { "epoch": 1.7409798270893373, "grad_norm": 1.5072359519878373, "learning_rate": 9.054082133062346e-08, "loss": 0.4723459780216217, "step": 7552 }, { "epoch": 1.7412103746397696, "grad_norm": 1.7902281905237034, "learning_rate": 9.03824000290464e-08, "loss": 0.4756123423576355, "step": 7553 }, { "epoch": 1.7414409221902019, "grad_norm": 1.563530493865973, "learning_rate": 9.022411088544412e-08, "loss": 0.4623698592185974, "step": 7554 }, { "epoch": 1.7416714697406341, "grad_norm": 1.5031875380247095, "learning_rate": 9.006595392281424e-08, "loss": 0.5495933890342712, "step": 7555 }, { "epoch": 1.7419020172910664, "grad_norm": 1.6009783877508532, "learning_rate": 8.990792916413526e-08, "loss": 0.48480600118637085, "step": 7556 }, { "epoch": 1.7421325648414987, "grad_norm": 2.3377126287594545, "learning_rate": 8.975003663236702e-08, "loss": 0.4872364401817322, "step": 7557 }, { "epoch": 1.742363112391931, "grad_norm": 1.9626684536569303, "learning_rate": 8.95922763504492e-08, "loss": 0.47713416814804077, "step": 7558 }, { "epoch": 1.7425936599423633, "grad_norm": 1.6011185020405347, "learning_rate": 8.943464834130287e-08, "loss": 0.42151015996932983, "step": 7559 }, { "epoch": 1.7428242074927955, "grad_norm": 1.7001226994436034, "learning_rate": 8.927715262782954e-08, "loss": 0.4855753779411316, "step": 7560 }, { "epoch": 1.7430547550432278, "grad_norm": 1.6165035114904904, "learning_rate": 8.911978923291186e-08, "loss": 0.45306575298309326, "step": 7561 }, { "epoch": 1.74328530259366, "grad_norm": 1.764884197279863, "learning_rate": 8.896255817941334e-08, "loss": 0.4395045042037964, "step": 7562 }, { "epoch": 1.7435158501440924, "grad_norm": 1.9974865160949407, "learning_rate": 8.880545949017748e-08, "loss": 0.3771313428878784, "step": 7563 }, { "epoch": 1.7437463976945244, "grad_norm": 1.5074350350589907, "learning_rate": 8.86484931880297e-08, "loss": 0.46764516830444336, "step": 7564 }, { "epoch": 1.7439769452449567, "grad_norm": 1.828556449583164, "learning_rate": 8.849165929577517e-08, "loss": 0.4414178133010864, "step": 7565 }, { "epoch": 1.744207492795389, "grad_norm": 1.8922732063370227, "learning_rate": 8.833495783620016e-08, "loss": 0.5199191570281982, "step": 7566 }, { "epoch": 1.7444380403458213, "grad_norm": 1.9299795436619636, "learning_rate": 8.817838883207218e-08, "loss": 0.4091680645942688, "step": 7567 }, { "epoch": 1.7446685878962536, "grad_norm": 1.6528169018956758, "learning_rate": 8.802195230613852e-08, "loss": 0.4918748140335083, "step": 7568 }, { "epoch": 1.7448991354466858, "grad_norm": 1.7608829566111894, "learning_rate": 8.786564828112809e-08, "loss": 0.6308727264404297, "step": 7569 }, { "epoch": 1.7451296829971181, "grad_norm": 1.5275596205430586, "learning_rate": 8.770947677975038e-08, "loss": 0.4739742875099182, "step": 7570 }, { "epoch": 1.7453602305475504, "grad_norm": 1.944068702259704, "learning_rate": 8.755343782469538e-08, "loss": 0.4230186343193054, "step": 7571 }, { "epoch": 1.7455907780979827, "grad_norm": 1.4651669474961737, "learning_rate": 8.739753143863382e-08, "loss": 0.4493221640586853, "step": 7572 }, { "epoch": 1.745821325648415, "grad_norm": 1.5840361731430554, "learning_rate": 8.724175764421715e-08, "loss": 0.45003601908683777, "step": 7573 }, { "epoch": 1.7460518731988472, "grad_norm": 1.5857283185800684, "learning_rate": 8.708611646407793e-08, "loss": 0.4641885757446289, "step": 7574 }, { "epoch": 1.7462824207492795, "grad_norm": 1.7333150129203774, "learning_rate": 8.693060792082929e-08, "loss": 0.4831950068473816, "step": 7575 }, { "epoch": 1.7465129682997118, "grad_norm": 1.5101721481830663, "learning_rate": 8.67752320370646e-08, "loss": 0.4084625840187073, "step": 7576 }, { "epoch": 1.746743515850144, "grad_norm": 2.0710521973450464, "learning_rate": 8.661998883535881e-08, "loss": 0.4741554856300354, "step": 7577 }, { "epoch": 1.7469740634005764, "grad_norm": 1.819950018248465, "learning_rate": 8.646487833826698e-08, "loss": 0.4739700257778168, "step": 7578 }, { "epoch": 1.7472046109510087, "grad_norm": 1.937161185285157, "learning_rate": 8.630990056832487e-08, "loss": 0.46832704544067383, "step": 7579 }, { "epoch": 1.747435158501441, "grad_norm": 1.6055622161766872, "learning_rate": 8.615505554804936e-08, "loss": 0.4395570158958435, "step": 7580 }, { "epoch": 1.7476657060518732, "grad_norm": 1.6800369119355125, "learning_rate": 8.600034329993755e-08, "loss": 0.4883463382720947, "step": 7581 }, { "epoch": 1.7478962536023055, "grad_norm": 1.3748001970742678, "learning_rate": 8.58457638464678e-08, "loss": 0.43945634365081787, "step": 7582 }, { "epoch": 1.7481268011527378, "grad_norm": 1.5799846720591912, "learning_rate": 8.569131721009892e-08, "loss": 0.431932270526886, "step": 7583 }, { "epoch": 1.74835734870317, "grad_norm": 1.710985178231973, "learning_rate": 8.55370034132703e-08, "loss": 0.3820973336696625, "step": 7584 }, { "epoch": 1.7485878962536023, "grad_norm": 1.2876621633227396, "learning_rate": 8.5382822478402e-08, "loss": 0.476523756980896, "step": 7585 }, { "epoch": 1.7488184438040346, "grad_norm": 1.8780737473029232, "learning_rate": 8.522877442789511e-08, "loss": 0.4477803707122803, "step": 7586 }, { "epoch": 1.749048991354467, "grad_norm": 1.5728974720788302, "learning_rate": 8.507485928413095e-08, "loss": 0.5614187717437744, "step": 7587 }, { "epoch": 1.7492795389048992, "grad_norm": 1.603867913680985, "learning_rate": 8.492107706947216e-08, "loss": 0.48507487773895264, "step": 7588 }, { "epoch": 1.7495100864553315, "grad_norm": 1.9320347667746667, "learning_rate": 8.476742780626134e-08, "loss": 0.5384523868560791, "step": 7589 }, { "epoch": 1.7497406340057637, "grad_norm": 1.3666371507332613, "learning_rate": 8.46139115168224e-08, "loss": 0.4491095542907715, "step": 7590 }, { "epoch": 1.749971181556196, "grad_norm": 1.788798941175675, "learning_rate": 8.446052822345961e-08, "loss": 0.4440731406211853, "step": 7591 }, { "epoch": 1.750201729106628, "grad_norm": 1.8620853583487789, "learning_rate": 8.43072779484577e-08, "loss": 0.44330766797065735, "step": 7592 }, { "epoch": 1.7504322766570604, "grad_norm": 2.168954666945145, "learning_rate": 8.415416071408255e-08, "loss": 0.5658286809921265, "step": 7593 }, { "epoch": 1.7506628242074926, "grad_norm": 1.6560038514007611, "learning_rate": 8.400117654258065e-08, "loss": 0.529731273651123, "step": 7594 }, { "epoch": 1.750893371757925, "grad_norm": 1.9880717674846065, "learning_rate": 8.38483254561787e-08, "loss": 0.48404788970947266, "step": 7595 }, { "epoch": 1.7511239193083572, "grad_norm": 1.8893587082964727, "learning_rate": 8.36956074770847e-08, "loss": 0.5418789386749268, "step": 7596 }, { "epoch": 1.7513544668587895, "grad_norm": 1.5987650722786961, "learning_rate": 8.354302262748681e-08, "loss": 0.4364627003669739, "step": 7597 }, { "epoch": 1.7515850144092218, "grad_norm": 1.9291958087354615, "learning_rate": 8.339057092955382e-08, "loss": 0.5439783334732056, "step": 7598 }, { "epoch": 1.751815561959654, "grad_norm": 1.324151225959263, "learning_rate": 8.323825240543581e-08, "loss": 0.3421855568885803, "step": 7599 }, { "epoch": 1.7520461095100863, "grad_norm": 1.4472184681007734, "learning_rate": 8.30860670772625e-08, "loss": 0.4138490855693817, "step": 7600 }, { "epoch": 1.7522766570605186, "grad_norm": 1.424984649539542, "learning_rate": 8.293401496714536e-08, "loss": 0.48010265827178955, "step": 7601 }, { "epoch": 1.7525072046109509, "grad_norm": 2.0212458348438536, "learning_rate": 8.27820960971759e-08, "loss": 0.4731842279434204, "step": 7602 }, { "epoch": 1.7527377521613832, "grad_norm": 1.8169709585385185, "learning_rate": 8.263031048942626e-08, "loss": 0.4629393517971039, "step": 7603 }, { "epoch": 1.7529682997118154, "grad_norm": 1.5488079089685691, "learning_rate": 8.247865816594934e-08, "loss": 0.47763800621032715, "step": 7604 }, { "epoch": 1.7531988472622477, "grad_norm": 1.6496235472038807, "learning_rate": 8.232713914877831e-08, "loss": 0.4947577714920044, "step": 7605 }, { "epoch": 1.75342939481268, "grad_norm": 1.683138181441751, "learning_rate": 8.217575345992767e-08, "loss": 0.525865375995636, "step": 7606 }, { "epoch": 1.7536599423631123, "grad_norm": 2.0948110988687034, "learning_rate": 8.202450112139237e-08, "loss": 0.3592625558376312, "step": 7607 }, { "epoch": 1.7538904899135446, "grad_norm": 1.802480905567215, "learning_rate": 8.187338215514727e-08, "loss": 0.4164069890975952, "step": 7608 }, { "epoch": 1.7541210374639769, "grad_norm": 1.875988052607345, "learning_rate": 8.172239658314883e-08, "loss": 0.508970320224762, "step": 7609 }, { "epoch": 1.7543515850144091, "grad_norm": 1.6717467531748178, "learning_rate": 8.15715444273336e-08, "loss": 0.5025101900100708, "step": 7610 }, { "epoch": 1.7545821325648414, "grad_norm": 1.633734503844184, "learning_rate": 8.14208257096185e-08, "loss": 0.40589481592178345, "step": 7611 }, { "epoch": 1.7548126801152737, "grad_norm": 1.5657822184185666, "learning_rate": 8.127024045190179e-08, "loss": 0.4361206889152527, "step": 7612 }, { "epoch": 1.755043227665706, "grad_norm": 1.7797082565488338, "learning_rate": 8.111978867606173e-08, "loss": 0.4599993824958801, "step": 7613 }, { "epoch": 1.7552737752161383, "grad_norm": 1.7072368164248672, "learning_rate": 8.096947040395729e-08, "loss": 0.4837978482246399, "step": 7614 }, { "epoch": 1.7555043227665705, "grad_norm": 1.5967344816784776, "learning_rate": 8.081928565742868e-08, "loss": 0.44848477840423584, "step": 7615 }, { "epoch": 1.7557348703170028, "grad_norm": 1.719065982432159, "learning_rate": 8.066923445829565e-08, "loss": 0.5391553044319153, "step": 7616 }, { "epoch": 1.755965417867435, "grad_norm": 1.8326741541692133, "learning_rate": 8.051931682835933e-08, "loss": 0.5540188550949097, "step": 7617 }, { "epoch": 1.7561959654178674, "grad_norm": 1.6797073632881683, "learning_rate": 8.036953278940095e-08, "loss": 0.432576984167099, "step": 7618 }, { "epoch": 1.7564265129682997, "grad_norm": 1.7495401832350141, "learning_rate": 8.021988236318267e-08, "loss": 0.48879313468933105, "step": 7619 }, { "epoch": 1.756657060518732, "grad_norm": 1.540187656958341, "learning_rate": 8.007036557144742e-08, "loss": 0.3939533531665802, "step": 7620 }, { "epoch": 1.7568876080691642, "grad_norm": 1.588849068766626, "learning_rate": 7.992098243591794e-08, "loss": 0.4698101282119751, "step": 7621 }, { "epoch": 1.7571181556195965, "grad_norm": 1.7235994666899865, "learning_rate": 7.977173297829865e-08, "loss": 0.3880566656589508, "step": 7622 }, { "epoch": 1.7573487031700288, "grad_norm": 1.547739865650052, "learning_rate": 7.962261722027352e-08, "loss": 0.4888259768486023, "step": 7623 }, { "epoch": 1.757579250720461, "grad_norm": 1.7611245522192838, "learning_rate": 7.947363518350746e-08, "loss": 0.5065386295318604, "step": 7624 }, { "epoch": 1.7578097982708933, "grad_norm": 1.6305093185850665, "learning_rate": 7.932478688964628e-08, "loss": 0.4453485310077667, "step": 7625 }, { "epoch": 1.7580403458213256, "grad_norm": 1.695465926028088, "learning_rate": 7.917607236031587e-08, "loss": 0.49680840969085693, "step": 7626 }, { "epoch": 1.758270893371758, "grad_norm": 1.902125357001842, "learning_rate": 7.902749161712297e-08, "loss": 0.46799665689468384, "step": 7627 }, { "epoch": 1.7585014409221902, "grad_norm": 1.4641445248158584, "learning_rate": 7.887904468165507e-08, "loss": 0.4290396273136139, "step": 7628 }, { "epoch": 1.7587319884726225, "grad_norm": 1.494429958046351, "learning_rate": 7.873073157547971e-08, "loss": 0.4317634701728821, "step": 7629 }, { "epoch": 1.7589625360230547, "grad_norm": 1.5596923902995892, "learning_rate": 7.85825523201451e-08, "loss": 0.47573816776275635, "step": 7630 }, { "epoch": 1.759193083573487, "grad_norm": 1.869048567302145, "learning_rate": 7.843450693718046e-08, "loss": 0.5061118602752686, "step": 7631 }, { "epoch": 1.7594236311239193, "grad_norm": 1.90036981437545, "learning_rate": 7.828659544809502e-08, "loss": 0.4778732657432556, "step": 7632 }, { "epoch": 1.7596541786743516, "grad_norm": 1.7460511681021387, "learning_rate": 7.813881787437904e-08, "loss": 0.43174654245376587, "step": 7633 }, { "epoch": 1.7598847262247839, "grad_norm": 2.4436834763569792, "learning_rate": 7.799117423750267e-08, "loss": 0.5490479469299316, "step": 7634 }, { "epoch": 1.7601152737752161, "grad_norm": 1.9505377666705999, "learning_rate": 7.784366455891733e-08, "loss": 0.4566101133823395, "step": 7635 }, { "epoch": 1.7603458213256484, "grad_norm": 1.9604546224667367, "learning_rate": 7.769628886005463e-08, "loss": 0.4798309803009033, "step": 7636 }, { "epoch": 1.7605763688760807, "grad_norm": 1.72870260704983, "learning_rate": 7.754904716232647e-08, "loss": 0.38129231333732605, "step": 7637 }, { "epoch": 1.760806916426513, "grad_norm": 1.6337711899440401, "learning_rate": 7.740193948712559e-08, "loss": 0.5362708568572998, "step": 7638 }, { "epoch": 1.7610374639769453, "grad_norm": 1.6562532161260684, "learning_rate": 7.725496585582547e-08, "loss": 0.4723502993583679, "step": 7639 }, { "epoch": 1.7612680115273776, "grad_norm": 1.9008305274132495, "learning_rate": 7.710812628977958e-08, "loss": 0.484948992729187, "step": 7640 }, { "epoch": 1.7614985590778098, "grad_norm": 1.5406976005020663, "learning_rate": 7.696142081032264e-08, "loss": 0.42966312170028687, "step": 7641 }, { "epoch": 1.7617291066282421, "grad_norm": 1.934961065868341, "learning_rate": 7.68148494387687e-08, "loss": 0.46488505601882935, "step": 7642 }, { "epoch": 1.7619596541786744, "grad_norm": 1.7842781707991795, "learning_rate": 7.666841219641351e-08, "loss": 0.4626274108886719, "step": 7643 }, { "epoch": 1.7621902017291067, "grad_norm": 1.5991637379651926, "learning_rate": 7.652210910453283e-08, "loss": 0.45007890462875366, "step": 7644 }, { "epoch": 1.762420749279539, "grad_norm": 1.8799325604165342, "learning_rate": 7.637594018438288e-08, "loss": 0.48811158537864685, "step": 7645 }, { "epoch": 1.7626512968299712, "grad_norm": 1.4567095096492855, "learning_rate": 7.622990545720054e-08, "loss": 0.4320351481437683, "step": 7646 }, { "epoch": 1.7628818443804035, "grad_norm": 1.7129571899462184, "learning_rate": 7.608400494420353e-08, "loss": 0.43537721037864685, "step": 7647 }, { "epoch": 1.7631123919308358, "grad_norm": 1.8137752166479428, "learning_rate": 7.593823866658889e-08, "loss": 0.39277005195617676, "step": 7648 }, { "epoch": 1.763342939481268, "grad_norm": 1.7043881583043696, "learning_rate": 7.579260664553544e-08, "loss": 0.4029799997806549, "step": 7649 }, { "epoch": 1.7635734870317004, "grad_norm": 1.7369149494853777, "learning_rate": 7.564710890220183e-08, "loss": 0.393459677696228, "step": 7650 }, { "epoch": 1.7638040345821326, "grad_norm": 1.7712546173623327, "learning_rate": 7.550174545772747e-08, "loss": 0.47554415464401245, "step": 7651 }, { "epoch": 1.764034582132565, "grad_norm": 1.2906409481776995, "learning_rate": 7.535651633323226e-08, "loss": 0.3958222270011902, "step": 7652 }, { "epoch": 1.7642651296829972, "grad_norm": 1.5409557041571615, "learning_rate": 7.521142154981641e-08, "loss": 0.4722353219985962, "step": 7653 }, { "epoch": 1.7644956772334295, "grad_norm": 1.6974701199285933, "learning_rate": 7.506646112856041e-08, "loss": 0.4863585829734802, "step": 7654 }, { "epoch": 1.7647262247838618, "grad_norm": 1.7158035391820794, "learning_rate": 7.492163509052585e-08, "loss": 0.47278356552124023, "step": 7655 }, { "epoch": 1.764956772334294, "grad_norm": 1.5747549758580093, "learning_rate": 7.477694345675411e-08, "loss": 0.4372313320636749, "step": 7656 }, { "epoch": 1.7651873198847263, "grad_norm": 1.767687407393855, "learning_rate": 7.463238624826785e-08, "loss": 0.5864512920379639, "step": 7657 }, { "epoch": 1.7654178674351586, "grad_norm": 1.5559070085327171, "learning_rate": 7.448796348606923e-08, "loss": 0.4714244306087494, "step": 7658 }, { "epoch": 1.7656484149855909, "grad_norm": 1.56910530297036, "learning_rate": 7.434367519114182e-08, "loss": 0.4437907040119171, "step": 7659 }, { "epoch": 1.7658789625360232, "grad_norm": 1.7453753195379775, "learning_rate": 7.419952138444896e-08, "loss": 0.4242848753929138, "step": 7660 }, { "epoch": 1.7661095100864554, "grad_norm": 1.5917510491266111, "learning_rate": 7.405550208693456e-08, "loss": 0.3774869441986084, "step": 7661 }, { "epoch": 1.7663400576368877, "grad_norm": 1.7728290606673953, "learning_rate": 7.391161731952356e-08, "loss": 0.37027084827423096, "step": 7662 }, { "epoch": 1.76657060518732, "grad_norm": 1.8043432929961043, "learning_rate": 7.376786710312043e-08, "loss": 0.4489648640155792, "step": 7663 }, { "epoch": 1.7668011527377523, "grad_norm": 1.977706820286657, "learning_rate": 7.362425145861072e-08, "loss": 0.4108201861381531, "step": 7664 }, { "epoch": 1.7670317002881846, "grad_norm": 1.5197214354873567, "learning_rate": 7.348077040686062e-08, "loss": 0.43439608812332153, "step": 7665 }, { "epoch": 1.7672622478386169, "grad_norm": 1.7853031370306771, "learning_rate": 7.333742396871623e-08, "loss": 0.42088496685028076, "step": 7666 }, { "epoch": 1.7674927953890491, "grad_norm": 1.5723255421968871, "learning_rate": 7.319421216500399e-08, "loss": 0.48281657695770264, "step": 7667 }, { "epoch": 1.7677233429394814, "grad_norm": 1.4536217774292859, "learning_rate": 7.305113501653159e-08, "loss": 0.5121546983718872, "step": 7668 }, { "epoch": 1.7679538904899137, "grad_norm": 1.3857702545327046, "learning_rate": 7.290819254408631e-08, "loss": 0.40559446811676025, "step": 7669 }, { "epoch": 1.768184438040346, "grad_norm": 1.6995494609484647, "learning_rate": 7.276538476843641e-08, "loss": 0.5207052826881409, "step": 7670 }, { "epoch": 1.7684149855907783, "grad_norm": 1.4468462521729057, "learning_rate": 7.262271171033007e-08, "loss": 0.5177881121635437, "step": 7671 }, { "epoch": 1.7686455331412105, "grad_norm": 1.4929042033851914, "learning_rate": 7.248017339049662e-08, "loss": 0.34873148798942566, "step": 7672 }, { "epoch": 1.7688760806916428, "grad_norm": 1.4828796543268334, "learning_rate": 7.233776982964513e-08, "loss": 0.5187006592750549, "step": 7673 }, { "epoch": 1.7691066282420749, "grad_norm": 1.7628461302950964, "learning_rate": 7.219550104846528e-08, "loss": 0.4007762670516968, "step": 7674 }, { "epoch": 1.7693371757925072, "grad_norm": 1.7373369616493144, "learning_rate": 7.205336706762732e-08, "loss": 0.34868037700653076, "step": 7675 }, { "epoch": 1.7695677233429394, "grad_norm": 1.5968096719688372, "learning_rate": 7.191136790778207e-08, "loss": 0.4267783761024475, "step": 7676 }, { "epoch": 1.7697982708933717, "grad_norm": 1.662213854570385, "learning_rate": 7.176950358956025e-08, "loss": 0.48516613245010376, "step": 7677 }, { "epoch": 1.770028818443804, "grad_norm": 1.4490843200762542, "learning_rate": 7.162777413357345e-08, "loss": 0.370013564825058, "step": 7678 }, { "epoch": 1.7702593659942363, "grad_norm": 1.5754606178259032, "learning_rate": 7.148617956041347e-08, "loss": 0.4909803569316864, "step": 7679 }, { "epoch": 1.7704899135446686, "grad_norm": 1.6427495490321167, "learning_rate": 7.134471989065227e-08, "loss": 0.45908260345458984, "step": 7680 }, { "epoch": 1.7707204610951008, "grad_norm": 1.5785664303220688, "learning_rate": 7.120339514484285e-08, "loss": 0.4022323191165924, "step": 7681 }, { "epoch": 1.7709510086455331, "grad_norm": 1.5712968291810534, "learning_rate": 7.10622053435178e-08, "loss": 0.5402769446372986, "step": 7682 }, { "epoch": 1.7711815561959654, "grad_norm": 1.5473368342653435, "learning_rate": 7.092115050719083e-08, "loss": 0.4438230097293854, "step": 7683 }, { "epoch": 1.7714121037463977, "grad_norm": 1.573083190057225, "learning_rate": 7.078023065635585e-08, "loss": 0.49692100286483765, "step": 7684 }, { "epoch": 1.77164265129683, "grad_norm": 1.6609187799962484, "learning_rate": 7.063944581148684e-08, "loss": 0.4989246726036072, "step": 7685 }, { "epoch": 1.7718731988472622, "grad_norm": 1.8704844560277196, "learning_rate": 7.049879599303842e-08, "loss": 0.5415306687355042, "step": 7686 }, { "epoch": 1.7721037463976945, "grad_norm": 1.7026704659165601, "learning_rate": 7.035828122144538e-08, "loss": 0.5030748844146729, "step": 7687 }, { "epoch": 1.7723342939481268, "grad_norm": 1.764587481495758, "learning_rate": 7.021790151712326e-08, "loss": 0.5167194604873657, "step": 7688 }, { "epoch": 1.772564841498559, "grad_norm": 1.7278659087314725, "learning_rate": 7.007765690046774e-08, "loss": 0.48385316133499146, "step": 7689 }, { "epoch": 1.7727953890489914, "grad_norm": 1.755610455110011, "learning_rate": 6.993754739185487e-08, "loss": 0.44149184226989746, "step": 7690 }, { "epoch": 1.7730259365994236, "grad_norm": 1.827798361341849, "learning_rate": 6.979757301164113e-08, "loss": 0.5165606737136841, "step": 7691 }, { "epoch": 1.773256484149856, "grad_norm": 1.779706905725424, "learning_rate": 6.965773378016348e-08, "loss": 0.3729487359523773, "step": 7692 }, { "epoch": 1.7734870317002882, "grad_norm": 1.7294467511491705, "learning_rate": 6.951802971773868e-08, "loss": 0.459484338760376, "step": 7693 }, { "epoch": 1.7737175792507205, "grad_norm": 1.7442166010782674, "learning_rate": 6.93784608446647e-08, "loss": 0.41767945885658264, "step": 7694 }, { "epoch": 1.7739481268011528, "grad_norm": 1.5424246362534682, "learning_rate": 6.923902718121921e-08, "loss": 0.5340604782104492, "step": 7695 }, { "epoch": 1.774178674351585, "grad_norm": 1.5182197777494724, "learning_rate": 6.909972874766057e-08, "loss": 0.4299081265926361, "step": 7696 }, { "epoch": 1.7744092219020173, "grad_norm": 1.5921658694779064, "learning_rate": 6.896056556422747e-08, "loss": 0.4131748080253601, "step": 7697 }, { "epoch": 1.7746397694524496, "grad_norm": 1.7608740339569051, "learning_rate": 6.882153765113885e-08, "loss": 0.460279643535614, "step": 7698 }, { "epoch": 1.774870317002882, "grad_norm": 1.8352317153683946, "learning_rate": 6.868264502859366e-08, "loss": 0.5152443051338196, "step": 7699 }, { "epoch": 1.7751008645533142, "grad_norm": 1.6921081006962246, "learning_rate": 6.854388771677211e-08, "loss": 0.49927568435668945, "step": 7700 }, { "epoch": 1.7753314121037462, "grad_norm": 2.152713876850487, "learning_rate": 6.840526573583383e-08, "loss": 0.43129733204841614, "step": 7701 }, { "epoch": 1.7755619596541785, "grad_norm": 1.6631961804324806, "learning_rate": 6.826677910591926e-08, "loss": 0.39078906178474426, "step": 7702 }, { "epoch": 1.7757925072046108, "grad_norm": 1.6788233641075792, "learning_rate": 6.8128427847149e-08, "loss": 0.4480215907096863, "step": 7703 }, { "epoch": 1.776023054755043, "grad_norm": 1.7287551762868483, "learning_rate": 6.79902119796243e-08, "loss": 0.39621448516845703, "step": 7704 }, { "epoch": 1.7762536023054754, "grad_norm": 1.5061600691752293, "learning_rate": 6.785213152342628e-08, "loss": 0.4708957076072693, "step": 7705 }, { "epoch": 1.7764841498559076, "grad_norm": 1.6137574769212935, "learning_rate": 6.771418649861638e-08, "loss": 0.38831156492233276, "step": 7706 }, { "epoch": 1.77671469740634, "grad_norm": 1.4887027346159967, "learning_rate": 6.7576376925237e-08, "loss": 0.4053429365158081, "step": 7707 }, { "epoch": 1.7769452449567722, "grad_norm": 1.575497148186284, "learning_rate": 6.74387028233101e-08, "loss": 0.4687873125076294, "step": 7708 }, { "epoch": 1.7771757925072045, "grad_norm": 1.9413502528314348, "learning_rate": 6.730116421283838e-08, "loss": 0.4794561266899109, "step": 7709 }, { "epoch": 1.7774063400576368, "grad_norm": 1.5506812991717323, "learning_rate": 6.716376111380506e-08, "loss": 0.5206823945045471, "step": 7710 }, { "epoch": 1.777636887608069, "grad_norm": 1.5577917172326046, "learning_rate": 6.702649354617307e-08, "loss": 0.5148698687553406, "step": 7711 }, { "epoch": 1.7778674351585013, "grad_norm": 1.7026300959091676, "learning_rate": 6.688936152988589e-08, "loss": 0.47977179288864136, "step": 7712 }, { "epoch": 1.7780979827089336, "grad_norm": 1.5608303640349446, "learning_rate": 6.67523650848677e-08, "loss": 0.3876994848251343, "step": 7713 }, { "epoch": 1.7783285302593659, "grad_norm": 1.6373362273851646, "learning_rate": 6.661550423102235e-08, "loss": 0.48703986406326294, "step": 7714 }, { "epoch": 1.7785590778097982, "grad_norm": 1.6262507534520982, "learning_rate": 6.647877898823462e-08, "loss": 0.40404099225997925, "step": 7715 }, { "epoch": 1.7787896253602304, "grad_norm": 1.7573860934066001, "learning_rate": 6.634218937636882e-08, "loss": 0.4791918396949768, "step": 7716 }, { "epoch": 1.7790201729106627, "grad_norm": 1.6154685028874851, "learning_rate": 6.620573541527042e-08, "loss": 0.46973907947540283, "step": 7717 }, { "epoch": 1.779250720461095, "grad_norm": 1.9052536891572849, "learning_rate": 6.606941712476466e-08, "loss": 0.5298879146575928, "step": 7718 }, { "epoch": 1.7794812680115273, "grad_norm": 1.4315445647873681, "learning_rate": 6.593323452465693e-08, "loss": 0.45558857917785645, "step": 7719 }, { "epoch": 1.7797118155619596, "grad_norm": 1.74666285264939, "learning_rate": 6.579718763473329e-08, "loss": 0.42475903034210205, "step": 7720 }, { "epoch": 1.7799423631123918, "grad_norm": 1.6085492262804577, "learning_rate": 6.566127647476016e-08, "loss": 0.4914790391921997, "step": 7721 }, { "epoch": 1.7801729106628241, "grad_norm": 1.7428285922364788, "learning_rate": 6.552550106448363e-08, "loss": 0.49469706416130066, "step": 7722 }, { "epoch": 1.7804034582132564, "grad_norm": 1.6195332469418184, "learning_rate": 6.538986142363089e-08, "loss": 0.36061471700668335, "step": 7723 }, { "epoch": 1.7806340057636887, "grad_norm": 1.8328650472042145, "learning_rate": 6.525435757190867e-08, "loss": 0.5629843473434448, "step": 7724 }, { "epoch": 1.780864553314121, "grad_norm": 1.8941469492593315, "learning_rate": 6.511898952900419e-08, "loss": 0.46624869108200073, "step": 7725 }, { "epoch": 1.7810951008645532, "grad_norm": 1.7694804696248736, "learning_rate": 6.498375731458527e-08, "loss": 0.5627322793006897, "step": 7726 }, { "epoch": 1.7813256484149855, "grad_norm": 1.4591931185069376, "learning_rate": 6.484866094829944e-08, "loss": 0.4573343098163605, "step": 7727 }, { "epoch": 1.7815561959654178, "grad_norm": 1.785446481353626, "learning_rate": 6.47137004497751e-08, "loss": 0.44986212253570557, "step": 7728 }, { "epoch": 1.78178674351585, "grad_norm": 2.16333875922491, "learning_rate": 6.457887583862065e-08, "loss": 0.4796055853366852, "step": 7729 }, { "epoch": 1.7820172910662824, "grad_norm": 1.774300664934424, "learning_rate": 6.444418713442445e-08, "loss": 0.447618305683136, "step": 7730 }, { "epoch": 1.7822478386167147, "grad_norm": 1.8485863438016088, "learning_rate": 6.430963435675551e-08, "loss": 0.46969741582870483, "step": 7731 }, { "epoch": 1.782478386167147, "grad_norm": 1.733301132250343, "learning_rate": 6.417521752516275e-08, "loss": 0.43413540720939636, "step": 7732 }, { "epoch": 1.7827089337175792, "grad_norm": 2.138945448309459, "learning_rate": 6.404093665917576e-08, "loss": 0.532086968421936, "step": 7733 }, { "epoch": 1.7829394812680115, "grad_norm": 1.9071183008372716, "learning_rate": 6.390679177830417e-08, "loss": 0.5077864527702332, "step": 7734 }, { "epoch": 1.7831700288184438, "grad_norm": 2.059822081609659, "learning_rate": 6.377278290203757e-08, "loss": 0.5576372146606445, "step": 7735 }, { "epoch": 1.783400576368876, "grad_norm": 1.73331324996214, "learning_rate": 6.363891004984646e-08, "loss": 0.4265804886817932, "step": 7736 }, { "epoch": 1.7836311239193083, "grad_norm": 1.4188934525049064, "learning_rate": 6.350517324118087e-08, "loss": 0.40466463565826416, "step": 7737 }, { "epoch": 1.7838616714697406, "grad_norm": 1.8493725205047176, "learning_rate": 6.337157249547132e-08, "loss": 0.42143672704696655, "step": 7738 }, { "epoch": 1.784092219020173, "grad_norm": 1.6661748528000018, "learning_rate": 6.32381078321289e-08, "loss": 0.49432891607284546, "step": 7739 }, { "epoch": 1.7843227665706052, "grad_norm": 1.7897170262913213, "learning_rate": 6.310477927054436e-08, "loss": 0.4701034426689148, "step": 7740 }, { "epoch": 1.7845533141210375, "grad_norm": 1.8045876586105092, "learning_rate": 6.297158683008896e-08, "loss": 0.4706331491470337, "step": 7741 }, { "epoch": 1.7847838616714697, "grad_norm": 1.6424743309135614, "learning_rate": 6.283853053011456e-08, "loss": 0.5200982689857483, "step": 7742 }, { "epoch": 1.785014409221902, "grad_norm": 1.5460597062029549, "learning_rate": 6.270561038995248e-08, "loss": 0.39499545097351074, "step": 7743 }, { "epoch": 1.7852449567723343, "grad_norm": 1.7159811116379573, "learning_rate": 6.25728264289147e-08, "loss": 0.4668952226638794, "step": 7744 }, { "epoch": 1.7854755043227666, "grad_norm": 1.4261473913577194, "learning_rate": 6.244017866629337e-08, "loss": 0.3721618950366974, "step": 7745 }, { "epoch": 1.7857060518731989, "grad_norm": 1.6135087449550563, "learning_rate": 6.230766712136082e-08, "loss": 0.4165458679199219, "step": 7746 }, { "epoch": 1.7859365994236311, "grad_norm": 1.7713530242570052, "learning_rate": 6.217529181336967e-08, "loss": 0.4876209497451782, "step": 7747 }, { "epoch": 1.7861671469740634, "grad_norm": 2.115685960160452, "learning_rate": 6.204305276155252e-08, "loss": 0.576325535774231, "step": 7748 }, { "epoch": 1.7863976945244957, "grad_norm": 1.679232826772317, "learning_rate": 6.191094998512259e-08, "loss": 0.4436579942703247, "step": 7749 }, { "epoch": 1.786628242074928, "grad_norm": 1.5230671727260017, "learning_rate": 6.177898350327282e-08, "loss": 0.34511566162109375, "step": 7750 }, { "epoch": 1.7868587896253603, "grad_norm": 1.3956973107037376, "learning_rate": 6.164715333517656e-08, "loss": 0.47144967317581177, "step": 7751 }, { "epoch": 1.7870893371757925, "grad_norm": 1.7458104416555835, "learning_rate": 6.15154594999876e-08, "loss": 0.45297929644584656, "step": 7752 }, { "epoch": 1.7873198847262248, "grad_norm": 1.617434355385532, "learning_rate": 6.13839020168393e-08, "loss": 0.38023924827575684, "step": 7753 }, { "epoch": 1.787550432276657, "grad_norm": 1.553165083717958, "learning_rate": 6.125248090484581e-08, "loss": 0.49789756536483765, "step": 7754 }, { "epoch": 1.7877809798270894, "grad_norm": 1.5119135940794262, "learning_rate": 6.112119618310141e-08, "loss": 0.41589903831481934, "step": 7755 }, { "epoch": 1.7880115273775217, "grad_norm": 1.9352630835722213, "learning_rate": 6.099004787068018e-08, "loss": 0.4091898500919342, "step": 7756 }, { "epoch": 1.788242074927954, "grad_norm": 1.5523031339189788, "learning_rate": 6.085903598663655e-08, "loss": 0.42826372385025024, "step": 7757 }, { "epoch": 1.7884726224783862, "grad_norm": 1.621501159790323, "learning_rate": 6.072816055000552e-08, "loss": 0.43129992485046387, "step": 7758 }, { "epoch": 1.7887031700288185, "grad_norm": 1.5341459583525834, "learning_rate": 6.059742157980152e-08, "loss": 0.46008431911468506, "step": 7759 }, { "epoch": 1.7889337175792508, "grad_norm": 1.9309743062622498, "learning_rate": 6.046681909501994e-08, "loss": 0.5652228593826294, "step": 7760 }, { "epoch": 1.789164265129683, "grad_norm": 1.7004047485154516, "learning_rate": 6.03363531146357e-08, "loss": 0.44568371772766113, "step": 7761 }, { "epoch": 1.7893948126801154, "grad_norm": 1.5142959284192654, "learning_rate": 6.020602365760419e-08, "loss": 0.44745492935180664, "step": 7762 }, { "epoch": 1.7896253602305476, "grad_norm": 1.9154302941464691, "learning_rate": 6.007583074286094e-08, "loss": 0.449093759059906, "step": 7763 }, { "epoch": 1.78985590778098, "grad_norm": 1.267128025484339, "learning_rate": 5.994577438932169e-08, "loss": 0.37527692317962646, "step": 7764 }, { "epoch": 1.7900864553314122, "grad_norm": 1.4630537022894423, "learning_rate": 5.981585461588213e-08, "loss": 0.4551668167114258, "step": 7765 }, { "epoch": 1.7903170028818445, "grad_norm": 1.5603801053478943, "learning_rate": 5.968607144141846e-08, "loss": 0.4525032043457031, "step": 7766 }, { "epoch": 1.7905475504322768, "grad_norm": 1.7729818006554123, "learning_rate": 5.955642488478674e-08, "loss": 0.357668399810791, "step": 7767 }, { "epoch": 1.790778097982709, "grad_norm": 1.828688502419706, "learning_rate": 5.9426914964823327e-08, "loss": 0.49838197231292725, "step": 7768 }, { "epoch": 1.7910086455331413, "grad_norm": 1.4374954094595649, "learning_rate": 5.9297541700344286e-08, "loss": 0.43415066599845886, "step": 7769 }, { "epoch": 1.7912391930835736, "grad_norm": 1.4808851812838624, "learning_rate": 5.91683051101467e-08, "loss": 0.44491448998451233, "step": 7770 }, { "epoch": 1.7914697406340059, "grad_norm": 1.5676470000342726, "learning_rate": 5.9039205213007094e-08, "loss": 0.5361511707305908, "step": 7771 }, { "epoch": 1.7917002881844382, "grad_norm": 1.5923433064017045, "learning_rate": 5.891024202768224e-08, "loss": 0.4797734320163727, "step": 7772 }, { "epoch": 1.7919308357348704, "grad_norm": 1.5894190231720695, "learning_rate": 5.878141557290939e-08, "loss": 0.39747869968414307, "step": 7773 }, { "epoch": 1.7921613832853027, "grad_norm": 1.836432899941197, "learning_rate": 5.865272586740566e-08, "loss": 0.5796657800674438, "step": 7774 }, { "epoch": 1.792391930835735, "grad_norm": 1.5280268419302134, "learning_rate": 5.8524172929867995e-08, "loss": 0.4255181849002838, "step": 7775 }, { "epoch": 1.7926224783861673, "grad_norm": 1.6995321372757681, "learning_rate": 5.8395756778974125e-08, "loss": 0.4837808310985565, "step": 7776 }, { "epoch": 1.7928530259365996, "grad_norm": 1.5350170754716532, "learning_rate": 5.826747743338134e-08, "loss": 0.47962331771850586, "step": 7777 }, { "epoch": 1.7930835734870318, "grad_norm": 1.5014268099772772, "learning_rate": 5.813933491172751e-08, "loss": 0.4127134084701538, "step": 7778 }, { "epoch": 1.7933141210374641, "grad_norm": 1.5155643699973835, "learning_rate": 5.801132923263052e-08, "loss": 0.42512404918670654, "step": 7779 }, { "epoch": 1.7935446685878964, "grad_norm": 1.9564743709601773, "learning_rate": 5.7883460414687946e-08, "loss": 0.5483304262161255, "step": 7780 }, { "epoch": 1.7937752161383287, "grad_norm": 1.6888505182787188, "learning_rate": 5.775572847647781e-08, "loss": 0.42901748418807983, "step": 7781 }, { "epoch": 1.794005763688761, "grad_norm": 1.8007028985429232, "learning_rate": 5.762813343655859e-08, "loss": 0.4339037239551544, "step": 7782 }, { "epoch": 1.7942363112391932, "grad_norm": 1.65295831348881, "learning_rate": 5.7500675313468026e-08, "loss": 0.4670749008655548, "step": 7783 }, { "epoch": 1.7944668587896253, "grad_norm": 1.5443675410420685, "learning_rate": 5.737335412572497e-08, "loss": 0.5136678218841553, "step": 7784 }, { "epoch": 1.7946974063400576, "grad_norm": 1.4237421461129909, "learning_rate": 5.72461698918274e-08, "loss": 0.43283748626708984, "step": 7785 }, { "epoch": 1.7949279538904899, "grad_norm": 1.7650305092506278, "learning_rate": 5.71191226302542e-08, "loss": 0.476909875869751, "step": 7786 }, { "epoch": 1.7951585014409221, "grad_norm": 1.672567271487365, "learning_rate": 5.699221235946394e-08, "loss": 0.4585307240486145, "step": 7787 }, { "epoch": 1.7953890489913544, "grad_norm": 1.9458664676542134, "learning_rate": 5.6865439097895096e-08, "loss": 0.5796236991882324, "step": 7788 }, { "epoch": 1.7956195965417867, "grad_norm": 2.0548684321027015, "learning_rate": 5.6738802863966816e-08, "loss": 0.4688211679458618, "step": 7789 }, { "epoch": 1.795850144092219, "grad_norm": 1.7820844049311864, "learning_rate": 5.661230367607805e-08, "loss": 0.48832905292510986, "step": 7790 }, { "epoch": 1.7960806916426513, "grad_norm": 1.6531888312791727, "learning_rate": 5.648594155260744e-08, "loss": 0.49670735001564026, "step": 7791 }, { "epoch": 1.7963112391930836, "grad_norm": 1.6231028236984248, "learning_rate": 5.6359716511914624e-08, "loss": 0.4234163761138916, "step": 7792 }, { "epoch": 1.7965417867435158, "grad_norm": 1.7032647254914282, "learning_rate": 5.6233628572338375e-08, "loss": 0.5399529933929443, "step": 7793 }, { "epoch": 1.7967723342939481, "grad_norm": 1.519848058141304, "learning_rate": 5.610767775219805e-08, "loss": 0.40350812673568726, "step": 7794 }, { "epoch": 1.7970028818443804, "grad_norm": 1.6695509859757407, "learning_rate": 5.598186406979311e-08, "loss": 0.44279199838638306, "step": 7795 }, { "epoch": 1.7972334293948127, "grad_norm": 2.1845146197296907, "learning_rate": 5.585618754340282e-08, "loss": 0.49113231897354126, "step": 7796 }, { "epoch": 1.797463976945245, "grad_norm": 1.6014376932476218, "learning_rate": 5.573064819128681e-08, "loss": 0.47899287939071655, "step": 7797 }, { "epoch": 1.7976945244956772, "grad_norm": 1.962506870462268, "learning_rate": 5.5605246031684485e-08, "loss": 0.4684637784957886, "step": 7798 }, { "epoch": 1.7979250720461095, "grad_norm": 1.704176925336371, "learning_rate": 5.547998108281571e-08, "loss": 0.5158382654190063, "step": 7799 }, { "epoch": 1.7981556195965418, "grad_norm": 1.5361177647232538, "learning_rate": 5.5354853362880036e-08, "loss": 0.41753411293029785, "step": 7800 }, { "epoch": 1.798386167146974, "grad_norm": 1.4773819899817666, "learning_rate": 5.522986289005704e-08, "loss": 0.4594987630844116, "step": 7801 }, { "epoch": 1.7986167146974064, "grad_norm": 1.5236452636861773, "learning_rate": 5.510500968250675e-08, "loss": 0.4700300693511963, "step": 7802 }, { "epoch": 1.7988472622478386, "grad_norm": 1.945100425844493, "learning_rate": 5.49802937583691e-08, "loss": 0.39876431226730347, "step": 7803 }, { "epoch": 1.799077809798271, "grad_norm": 1.6345492691406456, "learning_rate": 5.4855715135763927e-08, "loss": 0.4913487434387207, "step": 7804 }, { "epoch": 1.7993083573487032, "grad_norm": 2.0246385500829414, "learning_rate": 5.473127383279119e-08, "loss": 0.4329494833946228, "step": 7805 }, { "epoch": 1.7995389048991355, "grad_norm": 1.5152375389497672, "learning_rate": 5.460696986753099e-08, "loss": 0.35094910860061646, "step": 7806 }, { "epoch": 1.7997694524495678, "grad_norm": 1.5548589132603017, "learning_rate": 5.448280325804322e-08, "loss": 0.4664410948753357, "step": 7807 }, { "epoch": 1.8, "grad_norm": 1.9514902411487012, "learning_rate": 5.435877402236821e-08, "loss": 0.48508626222610474, "step": 7808 }, { "epoch": 1.8002305475504323, "grad_norm": 1.7554423106257484, "learning_rate": 5.4234882178525896e-08, "loss": 0.5313225388526917, "step": 7809 }, { "epoch": 1.8004610951008646, "grad_norm": 1.3389949821748275, "learning_rate": 5.411112774451665e-08, "loss": 0.4444521963596344, "step": 7810 }, { "epoch": 1.8006916426512967, "grad_norm": 1.635479397086268, "learning_rate": 5.398751073832075e-08, "loss": 0.448794424533844, "step": 7811 }, { "epoch": 1.800922190201729, "grad_norm": 1.5018240504535878, "learning_rate": 5.38640311778984e-08, "loss": 0.3712002635002136, "step": 7812 }, { "epoch": 1.8011527377521612, "grad_norm": 1.9098610685931527, "learning_rate": 5.3740689081189784e-08, "loss": 0.4695093333721161, "step": 7813 }, { "epoch": 1.8013832853025935, "grad_norm": 1.777484518094469, "learning_rate": 5.361748446611525e-08, "loss": 0.4755779206752777, "step": 7814 }, { "epoch": 1.8016138328530258, "grad_norm": 1.5127484913328306, "learning_rate": 5.349441735057514e-08, "loss": 0.4299379289150238, "step": 7815 }, { "epoch": 1.801844380403458, "grad_norm": 1.9413837097604036, "learning_rate": 5.337148775245004e-08, "loss": 0.43760111927986145, "step": 7816 }, { "epoch": 1.8020749279538903, "grad_norm": 1.502605290866374, "learning_rate": 5.324869568960011e-08, "loss": 0.4866371750831604, "step": 7817 }, { "epoch": 1.8023054755043226, "grad_norm": 1.8861304768122535, "learning_rate": 5.312604117986586e-08, "loss": 0.4446576237678528, "step": 7818 }, { "epoch": 1.802536023054755, "grad_norm": 1.6888151118163675, "learning_rate": 5.30035242410678e-08, "loss": 0.4882046580314636, "step": 7819 }, { "epoch": 1.8027665706051872, "grad_norm": 1.6607748455710494, "learning_rate": 5.288114489100615e-08, "loss": 0.5194848775863647, "step": 7820 }, { "epoch": 1.8029971181556195, "grad_norm": 1.5306526873181203, "learning_rate": 5.2758903147461456e-08, "loss": 0.47810953855514526, "step": 7821 }, { "epoch": 1.8032276657060518, "grad_norm": 1.3512000021688482, "learning_rate": 5.2636799028194175e-08, "loss": 0.4498436450958252, "step": 7822 }, { "epoch": 1.803458213256484, "grad_norm": 1.8685173906778947, "learning_rate": 5.251483255094469e-08, "loss": 0.4595226049423218, "step": 7823 }, { "epoch": 1.8036887608069163, "grad_norm": 1.9918192648087005, "learning_rate": 5.2393003733433695e-08, "loss": 0.4187047481536865, "step": 7824 }, { "epoch": 1.8039193083573486, "grad_norm": 1.9097202165834481, "learning_rate": 5.2271312593361593e-08, "loss": 0.46125978231430054, "step": 7825 }, { "epoch": 1.8041498559077809, "grad_norm": 1.6079780482495436, "learning_rate": 5.214975914840847e-08, "loss": 0.5298447608947754, "step": 7826 }, { "epoch": 1.8043804034582132, "grad_norm": 1.8654987830999303, "learning_rate": 5.20283434162353e-08, "loss": 0.36386626958847046, "step": 7827 }, { "epoch": 1.8046109510086454, "grad_norm": 1.8624196207161894, "learning_rate": 5.190706541448209e-08, "loss": 0.46004655957221985, "step": 7828 }, { "epoch": 1.8048414985590777, "grad_norm": 1.528524654700596, "learning_rate": 5.178592516076963e-08, "loss": 0.43738240003585815, "step": 7829 }, { "epoch": 1.80507204610951, "grad_norm": 1.7413237279253044, "learning_rate": 5.166492267269795e-08, "loss": 0.4735296368598938, "step": 7830 }, { "epoch": 1.8053025936599423, "grad_norm": 1.4916939190312641, "learning_rate": 5.154405796784789e-08, "loss": 0.40936362743377686, "step": 7831 }, { "epoch": 1.8055331412103746, "grad_norm": 1.4495523848817589, "learning_rate": 5.142333106377961e-08, "loss": 0.45034337043762207, "step": 7832 }, { "epoch": 1.8057636887608068, "grad_norm": 1.3784789325623554, "learning_rate": 5.13027419780333e-08, "loss": 0.44316160678863525, "step": 7833 }, { "epoch": 1.8059942363112391, "grad_norm": 1.4092068526068369, "learning_rate": 5.118229072812952e-08, "loss": 0.4842323660850525, "step": 7834 }, { "epoch": 1.8062247838616714, "grad_norm": 1.566812551044544, "learning_rate": 5.10619773315687e-08, "loss": 0.4448675513267517, "step": 7835 }, { "epoch": 1.8064553314121037, "grad_norm": 1.6102992842469919, "learning_rate": 5.0941801805830743e-08, "loss": 0.38104236125946045, "step": 7836 }, { "epoch": 1.806685878962536, "grad_norm": 1.5296809805750156, "learning_rate": 5.082176416837636e-08, "loss": 0.44963395595550537, "step": 7837 }, { "epoch": 1.8069164265129682, "grad_norm": 1.5844384454532618, "learning_rate": 5.070186443664548e-08, "loss": 0.5729601383209229, "step": 7838 }, { "epoch": 1.8071469740634005, "grad_norm": 1.4362989763417529, "learning_rate": 5.058210262805818e-08, "loss": 0.41734176874160767, "step": 7839 }, { "epoch": 1.8073775216138328, "grad_norm": 1.693804486024896, "learning_rate": 5.046247876001497e-08, "loss": 0.3615468740463257, "step": 7840 }, { "epoch": 1.807608069164265, "grad_norm": 1.5964757784006058, "learning_rate": 5.034299284989563e-08, "loss": 0.4586338698863983, "step": 7841 }, { "epoch": 1.8078386167146974, "grad_norm": 1.7334876895530935, "learning_rate": 5.022364491506037e-08, "loss": 0.5273293852806091, "step": 7842 }, { "epoch": 1.8080691642651296, "grad_norm": 1.8912136860353268, "learning_rate": 5.0104434972849106e-08, "loss": 0.4835001528263092, "step": 7843 }, { "epoch": 1.808299711815562, "grad_norm": 1.7745284627014155, "learning_rate": 4.99853630405821e-08, "loss": 0.5108824372291565, "step": 7844 }, { "epoch": 1.8085302593659942, "grad_norm": 1.700329152976267, "learning_rate": 4.986642913555894e-08, "loss": 0.3851168751716614, "step": 7845 }, { "epoch": 1.8087608069164265, "grad_norm": 1.7141226792443536, "learning_rate": 4.9747633275059486e-08, "loss": 0.5318799614906311, "step": 7846 }, { "epoch": 1.8089913544668588, "grad_norm": 1.7788983980504363, "learning_rate": 4.962897547634359e-08, "loss": 0.4470546245574951, "step": 7847 }, { "epoch": 1.809221902017291, "grad_norm": 1.799452040929468, "learning_rate": 4.951045575665114e-08, "loss": 0.4645649194717407, "step": 7848 }, { "epoch": 1.8094524495677233, "grad_norm": 1.7833884340536976, "learning_rate": 4.9392074133201675e-08, "loss": 0.5158429741859436, "step": 7849 }, { "epoch": 1.8096829971181556, "grad_norm": 1.6174396436681213, "learning_rate": 4.927383062319501e-08, "loss": 0.45542553067207336, "step": 7850 }, { "epoch": 1.809913544668588, "grad_norm": 1.7293030074031477, "learning_rate": 4.915572524381051e-08, "loss": 0.4432970881462097, "step": 7851 }, { "epoch": 1.8101440922190202, "grad_norm": 1.4973425388688861, "learning_rate": 4.903775801220755e-08, "loss": 0.4452818036079407, "step": 7852 }, { "epoch": 1.8103746397694525, "grad_norm": 1.4630308885956378, "learning_rate": 4.891992894552588e-08, "loss": 0.5507289171218872, "step": 7853 }, { "epoch": 1.8106051873198847, "grad_norm": 1.9247487981178528, "learning_rate": 4.880223806088446e-08, "loss": 0.5079092979431152, "step": 7854 }, { "epoch": 1.810835734870317, "grad_norm": 1.5662190264538383, "learning_rate": 4.8684685375382726e-08, "loss": 0.5003194808959961, "step": 7855 }, { "epoch": 1.8110662824207493, "grad_norm": 1.6396730822390861, "learning_rate": 4.856727090610002e-08, "loss": 0.4839469790458679, "step": 7856 }, { "epoch": 1.8112968299711816, "grad_norm": 1.5156804101091823, "learning_rate": 4.8449994670095254e-08, "loss": 0.4743555784225464, "step": 7857 }, { "epoch": 1.8115273775216139, "grad_norm": 1.8329893402310773, "learning_rate": 4.8332856684407565e-08, "loss": 0.48717260360717773, "step": 7858 }, { "epoch": 1.8117579250720461, "grad_norm": 1.6627403476546345, "learning_rate": 4.821585696605568e-08, "loss": 0.4353930354118347, "step": 7859 }, { "epoch": 1.8119884726224784, "grad_norm": 1.5088427953216483, "learning_rate": 4.809899553203844e-08, "loss": 0.5033609867095947, "step": 7860 }, { "epoch": 1.8122190201729107, "grad_norm": 2.0189049548486864, "learning_rate": 4.798227239933495e-08, "loss": 0.6085183620452881, "step": 7861 }, { "epoch": 1.812449567723343, "grad_norm": 1.5411871293354347, "learning_rate": 4.7865687584903503e-08, "loss": 0.4610944986343384, "step": 7862 }, { "epoch": 1.8126801152737753, "grad_norm": 1.5441686393391751, "learning_rate": 4.7749241105682905e-08, "loss": 0.43830424547195435, "step": 7863 }, { "epoch": 1.8129106628242075, "grad_norm": 1.8213823551119195, "learning_rate": 4.76329329785915e-08, "loss": 0.44822877645492554, "step": 7864 }, { "epoch": 1.8131412103746398, "grad_norm": 1.7073238966182782, "learning_rate": 4.751676322052756e-08, "loss": 0.506583571434021, "step": 7865 }, { "epoch": 1.813371757925072, "grad_norm": 1.7477332396001743, "learning_rate": 4.740073184836946e-08, "loss": 0.3813684582710266, "step": 7866 }, { "epoch": 1.8136023054755044, "grad_norm": 1.9006354309336493, "learning_rate": 4.728483887897527e-08, "loss": 0.45449137687683105, "step": 7867 }, { "epoch": 1.8138328530259367, "grad_norm": 1.7166109802455793, "learning_rate": 4.716908432918309e-08, "loss": 0.42221778631210327, "step": 7868 }, { "epoch": 1.814063400576369, "grad_norm": 1.4612227977115941, "learning_rate": 4.705346821581102e-08, "loss": 0.40090346336364746, "step": 7869 }, { "epoch": 1.8142939481268012, "grad_norm": 1.5809246396763592, "learning_rate": 4.693799055565673e-08, "loss": 0.4458918869495392, "step": 7870 }, { "epoch": 1.8145244956772335, "grad_norm": 1.8347081818496995, "learning_rate": 4.682265136549768e-08, "loss": 0.44281435012817383, "step": 7871 }, { "epoch": 1.8147550432276658, "grad_norm": 1.9080323902144045, "learning_rate": 4.670745066209192e-08, "loss": 0.503116250038147, "step": 7872 }, { "epoch": 1.814985590778098, "grad_norm": 1.450463241085339, "learning_rate": 4.65923884621765e-08, "loss": 0.37526172399520874, "step": 7873 }, { "epoch": 1.8152161383285303, "grad_norm": 1.5131592845477215, "learning_rate": 4.6477464782469054e-08, "loss": 0.3724762201309204, "step": 7874 }, { "epoch": 1.8154466858789626, "grad_norm": 1.4737758886121874, "learning_rate": 4.636267963966656e-08, "loss": 0.41912156343460083, "step": 7875 }, { "epoch": 1.815677233429395, "grad_norm": 1.6217019756861422, "learning_rate": 4.6248033050446336e-08, "loss": 0.3820981979370117, "step": 7876 }, { "epoch": 1.8159077809798272, "grad_norm": 1.72842311014601, "learning_rate": 4.613352503146517e-08, "loss": 0.5527099370956421, "step": 7877 }, { "epoch": 1.8161383285302595, "grad_norm": 1.999533464900462, "learning_rate": 4.601915559935987e-08, "loss": 0.4588435888290405, "step": 7878 }, { "epoch": 1.8163688760806918, "grad_norm": 1.6219483828800259, "learning_rate": 4.5904924770747144e-08, "loss": 0.4699181318283081, "step": 7879 }, { "epoch": 1.816599423631124, "grad_norm": 1.5998365257982348, "learning_rate": 4.5790832562223825e-08, "loss": 0.45713916420936584, "step": 7880 }, { "epoch": 1.8168299711815563, "grad_norm": 1.55324771004424, "learning_rate": 4.5676878990366096e-08, "loss": 0.38195744156837463, "step": 7881 }, { "epoch": 1.8170605187319886, "grad_norm": 1.9872138227633411, "learning_rate": 4.556306407173016e-08, "loss": 0.44771432876586914, "step": 7882 }, { "epoch": 1.8172910662824209, "grad_norm": 1.791967057454681, "learning_rate": 4.5449387822852016e-08, "loss": 0.5057739019393921, "step": 7883 }, { "epoch": 1.8175216138328532, "grad_norm": 1.5014714706473309, "learning_rate": 4.533585026024789e-08, "loss": 0.4792659878730774, "step": 7884 }, { "epoch": 1.8177521613832854, "grad_norm": 1.60058179236347, "learning_rate": 4.52224514004137e-08, "loss": 0.5714812874794006, "step": 7885 }, { "epoch": 1.8179827089337177, "grad_norm": 1.6688246286733348, "learning_rate": 4.510919125982482e-08, "loss": 0.5826704502105713, "step": 7886 }, { "epoch": 1.81821325648415, "grad_norm": 1.6495391511246666, "learning_rate": 4.499606985493709e-08, "loss": 0.37359529733657837, "step": 7887 }, { "epoch": 1.8184438040345823, "grad_norm": 1.5419285481811944, "learning_rate": 4.4883087202185696e-08, "loss": 0.4854467213153839, "step": 7888 }, { "epoch": 1.8186743515850146, "grad_norm": 1.4892991712894692, "learning_rate": 4.477024331798562e-08, "loss": 0.5693310499191284, "step": 7889 }, { "epoch": 1.8189048991354468, "grad_norm": 1.6688923135871851, "learning_rate": 4.46575382187323e-08, "loss": 0.5239206552505493, "step": 7890 }, { "epoch": 1.8191354466858791, "grad_norm": 1.5245551187313666, "learning_rate": 4.4544971920800425e-08, "loss": 0.4846993684768677, "step": 7891 }, { "epoch": 1.8193659942363114, "grad_norm": 1.6316413156349714, "learning_rate": 4.443254444054456e-08, "loss": 0.4502261281013489, "step": 7892 }, { "epoch": 1.8195965417867437, "grad_norm": 2.0167261690755445, "learning_rate": 4.4320255794299655e-08, "loss": 0.43649619817733765, "step": 7893 }, { "epoch": 1.8198270893371757, "grad_norm": 1.483085562494321, "learning_rate": 4.420810599837987e-08, "loss": 0.5035286545753479, "step": 7894 }, { "epoch": 1.820057636887608, "grad_norm": 1.4711203449015122, "learning_rate": 4.4096095069079296e-08, "loss": 0.39399099349975586, "step": 7895 }, { "epoch": 1.8202881844380403, "grad_norm": 1.8796636800361097, "learning_rate": 4.3984223022672015e-08, "loss": 0.4257911443710327, "step": 7896 }, { "epoch": 1.8205187319884726, "grad_norm": 1.6978394419820524, "learning_rate": 4.387248987541181e-08, "loss": 0.43202370405197144, "step": 7897 }, { "epoch": 1.8207492795389049, "grad_norm": 1.6730325432474051, "learning_rate": 4.376089564353258e-08, "loss": 0.475969135761261, "step": 7898 }, { "epoch": 1.8209798270893371, "grad_norm": 1.4401421618443093, "learning_rate": 4.3649440343247466e-08, "loss": 0.41923943161964417, "step": 7899 }, { "epoch": 1.8212103746397694, "grad_norm": 1.5983550240567561, "learning_rate": 4.3538123990750184e-08, "loss": 0.4265141487121582, "step": 7900 }, { "epoch": 1.8214409221902017, "grad_norm": 1.794717875578816, "learning_rate": 4.342694660221358e-08, "loss": 0.5309425592422485, "step": 7901 }, { "epoch": 1.821671469740634, "grad_norm": 1.7266349285203761, "learning_rate": 4.3315908193790384e-08, "loss": 0.4863468110561371, "step": 7902 }, { "epoch": 1.8219020172910663, "grad_norm": 1.5239348980367693, "learning_rate": 4.320500878161382e-08, "loss": 0.477884978055954, "step": 7903 }, { "epoch": 1.8221325648414985, "grad_norm": 1.7240173692545748, "learning_rate": 4.3094248381795874e-08, "loss": 0.5297879576683044, "step": 7904 }, { "epoch": 1.8223631123919308, "grad_norm": 1.7928218302726315, "learning_rate": 4.298362701042924e-08, "loss": 0.45477786660194397, "step": 7905 }, { "epoch": 1.822593659942363, "grad_norm": 1.607998070552905, "learning_rate": 4.287314468358605e-08, "loss": 0.49528950452804565, "step": 7906 }, { "epoch": 1.8228242074927954, "grad_norm": 1.3870439541037285, "learning_rate": 4.276280141731814e-08, "loss": 0.4592822194099426, "step": 7907 }, { "epoch": 1.8230547550432277, "grad_norm": 1.5905605119266304, "learning_rate": 4.265259722765713e-08, "loss": 0.4995965361595154, "step": 7908 }, { "epoch": 1.82328530259366, "grad_norm": 1.598553219734128, "learning_rate": 4.254253213061476e-08, "loss": 0.391417533159256, "step": 7909 }, { "epoch": 1.8235158501440922, "grad_norm": 1.8868519986532408, "learning_rate": 4.243260614218214e-08, "loss": 0.5150176882743835, "step": 7910 }, { "epoch": 1.8237463976945245, "grad_norm": 1.6268404324890209, "learning_rate": 4.232281927833059e-08, "loss": 0.4422363042831421, "step": 7911 }, { "epoch": 1.8239769452449568, "grad_norm": 1.8210697233868607, "learning_rate": 4.2213171555010696e-08, "loss": 0.48169761896133423, "step": 7912 }, { "epoch": 1.824207492795389, "grad_norm": 1.5167646952042861, "learning_rate": 4.210366298815349e-08, "loss": 0.48462286591529846, "step": 7913 }, { "epoch": 1.8244380403458214, "grad_norm": 1.7110004354591246, "learning_rate": 4.1994293593669236e-08, "loss": 0.4958770275115967, "step": 7914 }, { "epoch": 1.8246685878962536, "grad_norm": 1.529936451310175, "learning_rate": 4.188506338744813e-08, "loss": 0.4321090281009674, "step": 7915 }, { "epoch": 1.824899135446686, "grad_norm": 1.6613956831762409, "learning_rate": 4.1775972385360234e-08, "loss": 0.4604012966156006, "step": 7916 }, { "epoch": 1.8251296829971182, "grad_norm": 1.5114138411857048, "learning_rate": 4.166702060325544e-08, "loss": 0.4154652953147888, "step": 7917 }, { "epoch": 1.8253602305475505, "grad_norm": 1.6531876577774711, "learning_rate": 4.1558208056963086e-08, "loss": 0.4332526922225952, "step": 7918 }, { "epoch": 1.8255907780979828, "grad_norm": 1.3857924196517601, "learning_rate": 4.1449534762292735e-08, "loss": 0.3537461757659912, "step": 7919 }, { "epoch": 1.825821325648415, "grad_norm": 1.6404383737371222, "learning_rate": 4.134100073503344e-08, "loss": 0.5045830607414246, "step": 7920 }, { "epoch": 1.826051873198847, "grad_norm": 1.3402611675019687, "learning_rate": 4.12326059909538e-08, "loss": 0.39267051219940186, "step": 7921 }, { "epoch": 1.8262824207492794, "grad_norm": 1.6633828678500635, "learning_rate": 4.112435054580276e-08, "loss": 0.48170942068099976, "step": 7922 }, { "epoch": 1.8265129682997117, "grad_norm": 1.587202742021785, "learning_rate": 4.101623441530855e-08, "loss": 0.4519041180610657, "step": 7923 }, { "epoch": 1.826743515850144, "grad_norm": 1.8456233576104417, "learning_rate": 4.0908257615179467e-08, "loss": 0.5125565528869629, "step": 7924 }, { "epoch": 1.8269740634005762, "grad_norm": 1.700906466331933, "learning_rate": 4.080042016110319e-08, "loss": 0.4696243703365326, "step": 7925 }, { "epoch": 1.8272046109510085, "grad_norm": 1.7066910811269922, "learning_rate": 4.0692722068747745e-08, "loss": 0.48319560289382935, "step": 7926 }, { "epoch": 1.8274351585014408, "grad_norm": 1.6121339862505244, "learning_rate": 4.0585163353760165e-08, "loss": 0.4324444532394409, "step": 7927 }, { "epoch": 1.827665706051873, "grad_norm": 1.7831255886648252, "learning_rate": 4.0477744031767625e-08, "loss": 0.472909152507782, "step": 7928 }, { "epoch": 1.8278962536023053, "grad_norm": 1.718704717525841, "learning_rate": 4.03704641183773e-08, "loss": 0.4177134037017822, "step": 7929 }, { "epoch": 1.8281268011527376, "grad_norm": 1.5718528363636486, "learning_rate": 4.0263323629175724e-08, "loss": 0.46940964460372925, "step": 7930 }, { "epoch": 1.82835734870317, "grad_norm": 1.6808687193803358, "learning_rate": 4.015632257972912e-08, "loss": 0.4983375668525696, "step": 7931 }, { "epoch": 1.8285878962536022, "grad_norm": 1.4863665442165397, "learning_rate": 4.004946098558404e-08, "loss": 0.44752681255340576, "step": 7932 }, { "epoch": 1.8288184438040345, "grad_norm": 1.816894342124537, "learning_rate": 3.9942738862266065e-08, "loss": 0.5251951217651367, "step": 7933 }, { "epoch": 1.8290489913544667, "grad_norm": 1.7568175864500193, "learning_rate": 3.983615622528069e-08, "loss": 0.4417540431022644, "step": 7934 }, { "epoch": 1.829279538904899, "grad_norm": 1.436205708803146, "learning_rate": 3.9729713090113635e-08, "loss": 0.49237650632858276, "step": 7935 }, { "epoch": 1.8295100864553313, "grad_norm": 1.5345793839651518, "learning_rate": 3.962340947222953e-08, "loss": 0.45194119215011597, "step": 7936 }, { "epoch": 1.8297406340057636, "grad_norm": 1.4439924303115503, "learning_rate": 3.9517245387073574e-08, "loss": 0.36154115200042725, "step": 7937 }, { "epoch": 1.8299711815561959, "grad_norm": 1.6402341415525263, "learning_rate": 3.94112208500702e-08, "loss": 0.42474454641342163, "step": 7938 }, { "epoch": 1.8302017291066282, "grad_norm": 1.760242952555505, "learning_rate": 3.9305335876623545e-08, "loss": 0.49459904432296753, "step": 7939 }, { "epoch": 1.8304322766570604, "grad_norm": 1.5170817136048635, "learning_rate": 3.919959048211785e-08, "loss": 0.4881632328033447, "step": 7940 }, { "epoch": 1.8306628242074927, "grad_norm": 1.7241690625217052, "learning_rate": 3.909398468191638e-08, "loss": 0.4779052436351776, "step": 7941 }, { "epoch": 1.830893371757925, "grad_norm": 1.6098002323896041, "learning_rate": 3.898851849136298e-08, "loss": 0.38114166259765625, "step": 7942 }, { "epoch": 1.8311239193083573, "grad_norm": 1.7909906484652018, "learning_rate": 3.8883191925780604e-08, "loss": 0.5009176731109619, "step": 7943 }, { "epoch": 1.8313544668587896, "grad_norm": 1.5346894615338271, "learning_rate": 3.8778005000472125e-08, "loss": 0.3927236795425415, "step": 7944 }, { "epoch": 1.8315850144092218, "grad_norm": 1.793902552920017, "learning_rate": 3.867295773072021e-08, "loss": 0.502021074295044, "step": 7945 }, { "epoch": 1.8318155619596541, "grad_norm": 1.5109925254989232, "learning_rate": 3.85680501317871e-08, "loss": 0.5001766681671143, "step": 7946 }, { "epoch": 1.8320461095100864, "grad_norm": 1.4735968339841883, "learning_rate": 3.8463282218914595e-08, "loss": 0.35029879212379456, "step": 7947 }, { "epoch": 1.8322766570605187, "grad_norm": 1.4571551513662828, "learning_rate": 3.835865400732452e-08, "loss": 0.40344852209091187, "step": 7948 }, { "epoch": 1.832507204610951, "grad_norm": 1.8610379031960593, "learning_rate": 3.8254165512218276e-08, "loss": 0.4865550994873047, "step": 7949 }, { "epoch": 1.8327377521613832, "grad_norm": 1.5911656176036144, "learning_rate": 3.814981674877693e-08, "loss": 0.4122176766395569, "step": 7950 }, { "epoch": 1.8329682997118155, "grad_norm": 1.6337636431412388, "learning_rate": 3.804560773216137e-08, "loss": 0.4108060598373413, "step": 7951 }, { "epoch": 1.8331988472622478, "grad_norm": 1.6927194768268683, "learning_rate": 3.7941538477511914e-08, "loss": 0.44751685857772827, "step": 7952 }, { "epoch": 1.83342939481268, "grad_norm": 1.8176286894290226, "learning_rate": 3.783760899994881e-08, "loss": 0.5625091791152954, "step": 7953 }, { "epoch": 1.8336599423631124, "grad_norm": 1.7256683800022798, "learning_rate": 3.773381931457198e-08, "loss": 0.5466880202293396, "step": 7954 }, { "epoch": 1.8338904899135446, "grad_norm": 1.623766763090723, "learning_rate": 3.7630169436460915e-08, "loss": 0.4447929263114929, "step": 7955 }, { "epoch": 1.834121037463977, "grad_norm": 2.1137124064205834, "learning_rate": 3.7526659380675006e-08, "loss": 0.5581841468811035, "step": 7956 }, { "epoch": 1.8343515850144092, "grad_norm": 1.726869046029688, "learning_rate": 3.74232891622529e-08, "loss": 0.385328471660614, "step": 7957 }, { "epoch": 1.8345821325648415, "grad_norm": 1.599527906679486, "learning_rate": 3.732005879621358e-08, "loss": 0.4313199520111084, "step": 7958 }, { "epoch": 1.8348126801152738, "grad_norm": 1.8941416481134308, "learning_rate": 3.721696829755505e-08, "loss": 0.48205095529556274, "step": 7959 }, { "epoch": 1.835043227665706, "grad_norm": 1.4975360935823894, "learning_rate": 3.7114017681255324e-08, "loss": 0.5020872354507446, "step": 7960 }, { "epoch": 1.8352737752161383, "grad_norm": 1.6682410179064875, "learning_rate": 3.701120696227222e-08, "loss": 0.440343976020813, "step": 7961 }, { "epoch": 1.8355043227665706, "grad_norm": 1.7650179418507541, "learning_rate": 3.690853615554301e-08, "loss": 0.44800078868865967, "step": 7962 }, { "epoch": 1.8357348703170029, "grad_norm": 1.7281281440709166, "learning_rate": 3.680600527598454e-08, "loss": 0.4218701124191284, "step": 7963 }, { "epoch": 1.8359654178674352, "grad_norm": 1.701041640665313, "learning_rate": 3.6703614338493674e-08, "loss": 0.4884364902973175, "step": 7964 }, { "epoch": 1.8361959654178674, "grad_norm": 1.792804134531825, "learning_rate": 3.6601363357946725e-08, "loss": 0.472229540348053, "step": 7965 }, { "epoch": 1.8364265129682997, "grad_norm": 1.4963073935095241, "learning_rate": 3.6499252349199486e-08, "loss": 0.411716490983963, "step": 7966 }, { "epoch": 1.836657060518732, "grad_norm": 2.247546301190474, "learning_rate": 3.639728132708797e-08, "loss": 0.48858320713043213, "step": 7967 }, { "epoch": 1.8368876080691643, "grad_norm": 1.8323771399281195, "learning_rate": 3.629545030642711e-08, "loss": 0.42073512077331543, "step": 7968 }, { "epoch": 1.8371181556195966, "grad_norm": 1.926368162913144, "learning_rate": 3.6193759302012296e-08, "loss": 0.5200133919715881, "step": 7969 }, { "epoch": 1.8373487031700289, "grad_norm": 1.6431215963345491, "learning_rate": 3.609220832861781e-08, "loss": 0.4794218838214874, "step": 7970 }, { "epoch": 1.8375792507204611, "grad_norm": 1.7666660619120225, "learning_rate": 3.599079740099831e-08, "loss": 0.516029953956604, "step": 7971 }, { "epoch": 1.8378097982708934, "grad_norm": 5.5684397461669874, "learning_rate": 3.5889526533887434e-08, "loss": 0.4258018434047699, "step": 7972 }, { "epoch": 1.8380403458213257, "grad_norm": 1.4274910996664532, "learning_rate": 3.5788395741998876e-08, "loss": 0.4117387533187866, "step": 7973 }, { "epoch": 1.838270893371758, "grad_norm": 1.8002661199837686, "learning_rate": 3.5687405040025987e-08, "loss": 0.43812108039855957, "step": 7974 }, { "epoch": 1.8385014409221903, "grad_norm": 1.433184579488626, "learning_rate": 3.558655444264158e-08, "loss": 0.4854302406311035, "step": 7975 }, { "epoch": 1.8387319884726225, "grad_norm": 1.6279391160867416, "learning_rate": 3.5485843964498163e-08, "loss": 0.47127220034599304, "step": 7976 }, { "epoch": 1.8389625360230548, "grad_norm": 1.63494329169352, "learning_rate": 3.538527362022814e-08, "loss": 0.4757349491119385, "step": 7977 }, { "epoch": 1.839193083573487, "grad_norm": 1.309631044571452, "learning_rate": 3.5284843424443155e-08, "loss": 0.427249938249588, "step": 7978 }, { "epoch": 1.8394236311239194, "grad_norm": 1.370934941994731, "learning_rate": 3.518455339173454e-08, "loss": 0.409855455160141, "step": 7979 }, { "epoch": 1.8396541786743517, "grad_norm": 1.5739512241520999, "learning_rate": 3.5084403536673634e-08, "loss": 0.38040632009506226, "step": 7980 }, { "epoch": 1.839884726224784, "grad_norm": 1.91777543224754, "learning_rate": 3.498439387381103e-08, "loss": 0.4681670069694519, "step": 7981 }, { "epoch": 1.8401152737752162, "grad_norm": 1.6476666655223309, "learning_rate": 3.4884524417677086e-08, "loss": 0.5145970582962036, "step": 7982 }, { "epoch": 1.8403458213256485, "grad_norm": 1.7150302446575982, "learning_rate": 3.478479518278199e-08, "loss": 0.5431094169616699, "step": 7983 }, { "epoch": 1.8405763688760808, "grad_norm": 1.6753926575592268, "learning_rate": 3.4685206183615146e-08, "loss": 0.5518392324447632, "step": 7984 }, { "epoch": 1.840806916426513, "grad_norm": 1.4372422949374304, "learning_rate": 3.458575743464598e-08, "loss": 0.5075215101242065, "step": 7985 }, { "epoch": 1.8410374639769453, "grad_norm": 1.507303141864474, "learning_rate": 3.448644895032304e-08, "loss": 0.42477503418922424, "step": 7986 }, { "epoch": 1.8412680115273776, "grad_norm": 1.4856885288037505, "learning_rate": 3.4387280745075134e-08, "loss": 0.507225751876831, "step": 7987 }, { "epoch": 1.84149855907781, "grad_norm": 1.4825041181942642, "learning_rate": 3.428825283331027e-08, "loss": 0.4144738018512726, "step": 7988 }, { "epoch": 1.8417291066282422, "grad_norm": 1.9591826993684927, "learning_rate": 3.418936522941618e-08, "loss": 0.3863438367843628, "step": 7989 }, { "epoch": 1.8419596541786745, "grad_norm": 1.5040121036504583, "learning_rate": 3.409061794776025e-08, "loss": 0.39375755190849304, "step": 7990 }, { "epoch": 1.8421902017291067, "grad_norm": 1.4086042716424876, "learning_rate": 3.3992011002689334e-08, "loss": 0.48356667160987854, "step": 7991 }, { "epoch": 1.842420749279539, "grad_norm": 1.594841231811865, "learning_rate": 3.3893544408529985e-08, "loss": 0.42886489629745483, "step": 7992 }, { "epoch": 1.8426512968299713, "grad_norm": 1.6653919162381416, "learning_rate": 3.3795218179588524e-08, "loss": 0.36877313256263733, "step": 7993 }, { "epoch": 1.8428818443804036, "grad_norm": 1.833016012492649, "learning_rate": 3.369703233015053e-08, "loss": 0.45927101373672485, "step": 7994 }, { "epoch": 1.8431123919308359, "grad_norm": 1.7521984280825604, "learning_rate": 3.3598986874481484e-08, "loss": 0.478916734457016, "step": 7995 }, { "epoch": 1.8433429394812682, "grad_norm": 1.5148570498452396, "learning_rate": 3.350108182682654e-08, "loss": 0.4402740001678467, "step": 7996 }, { "epoch": 1.8435734870317004, "grad_norm": 1.4672604870133275, "learning_rate": 3.3403317201409986e-08, "loss": 0.45362555980682373, "step": 7997 }, { "epoch": 1.8438040345821327, "grad_norm": 1.7127744840337602, "learning_rate": 3.330569301243602e-08, "loss": 0.42510533332824707, "step": 7998 }, { "epoch": 1.844034582132565, "grad_norm": 1.8047053012106982, "learning_rate": 3.320820927408874e-08, "loss": 0.4747004508972168, "step": 7999 }, { "epoch": 1.8442651296829973, "grad_norm": 1.4734662369910507, "learning_rate": 3.3110866000531144e-08, "loss": 0.4616791307926178, "step": 8000 }, { "epoch": 1.8444956772334296, "grad_norm": 1.5205060225146327, "learning_rate": 3.301366320590659e-08, "loss": 0.5137572288513184, "step": 8001 }, { "epoch": 1.8447262247838618, "grad_norm": 1.7717149493752453, "learning_rate": 3.291660090433734e-08, "loss": 0.5519400835037231, "step": 8002 }, { "epoch": 1.844956772334294, "grad_norm": 1.46812017440615, "learning_rate": 3.281967910992556e-08, "loss": 0.46323487162590027, "step": 8003 }, { "epoch": 1.8451873198847262, "grad_norm": 1.662894278457401, "learning_rate": 3.272289783675308e-08, "loss": 0.4647497832775116, "step": 8004 }, { "epoch": 1.8454178674351585, "grad_norm": 1.5023892185449783, "learning_rate": 3.262625709888101e-08, "loss": 0.40287381410598755, "step": 8005 }, { "epoch": 1.8456484149855907, "grad_norm": 1.7424527329087247, "learning_rate": 3.252975691035042e-08, "loss": 0.4404665231704712, "step": 8006 }, { "epoch": 1.845878962536023, "grad_norm": 1.7167182806108425, "learning_rate": 3.2433397285181906e-08, "loss": 0.4510694742202759, "step": 8007 }, { "epoch": 1.8461095100864553, "grad_norm": 1.808603235242201, "learning_rate": 3.233717823737536e-08, "loss": 0.5496842861175537, "step": 8008 }, { "epoch": 1.8463400576368876, "grad_norm": 1.2919273138221294, "learning_rate": 3.2241099780910385e-08, "loss": 0.3442491590976715, "step": 8009 }, { "epoch": 1.8465706051873199, "grad_norm": 1.5771951982307342, "learning_rate": 3.214516192974615e-08, "loss": 0.4181824326515198, "step": 8010 }, { "epoch": 1.8468011527377521, "grad_norm": 2.536239745080693, "learning_rate": 3.204936469782149e-08, "loss": 0.5211422443389893, "step": 8011 }, { "epoch": 1.8470317002881844, "grad_norm": 1.7289079469042739, "learning_rate": 3.195370809905484e-08, "loss": 0.4499666690826416, "step": 8012 }, { "epoch": 1.8472622478386167, "grad_norm": 1.521332043446148, "learning_rate": 3.1858192147343977e-08, "loss": 0.4396360218524933, "step": 8013 }, { "epoch": 1.847492795389049, "grad_norm": 1.5919836717749307, "learning_rate": 3.1762816856566454e-08, "loss": 0.5222504734992981, "step": 8014 }, { "epoch": 1.8477233429394813, "grad_norm": 1.7151085760805032, "learning_rate": 3.16675822405793e-08, "loss": 0.4413851499557495, "step": 8015 }, { "epoch": 1.8479538904899135, "grad_norm": 1.5672185881021385, "learning_rate": 3.1572488313218904e-08, "loss": 0.4500206708908081, "step": 8016 }, { "epoch": 1.8481844380403458, "grad_norm": 1.6869036603734429, "learning_rate": 3.1477535088301755e-08, "loss": 0.526034951210022, "step": 8017 }, { "epoch": 1.848414985590778, "grad_norm": 1.719809726136178, "learning_rate": 3.1382722579623376e-08, "loss": 0.4836745858192444, "step": 8018 }, { "epoch": 1.8486455331412104, "grad_norm": 1.6521486962664456, "learning_rate": 3.128805080095898e-08, "loss": 0.4079389274120331, "step": 8019 }, { "epoch": 1.8488760806916427, "grad_norm": 1.4957674152417437, "learning_rate": 3.1193519766063655e-08, "loss": 0.44330352544784546, "step": 8020 }, { "epoch": 1.849106628242075, "grad_norm": 1.9754043574725335, "learning_rate": 3.109912948867166e-08, "loss": 0.5943433046340942, "step": 8021 }, { "epoch": 1.8493371757925072, "grad_norm": 1.6271444611930428, "learning_rate": 3.100487998249679e-08, "loss": 0.403645396232605, "step": 8022 }, { "epoch": 1.8495677233429395, "grad_norm": 1.62649256942446, "learning_rate": 3.091077126123254e-08, "loss": 0.38898250460624695, "step": 8023 }, { "epoch": 1.8497982708933718, "grad_norm": 1.6220661737220665, "learning_rate": 3.0816803338551966e-08, "loss": 0.4852311611175537, "step": 8024 }, { "epoch": 1.850028818443804, "grad_norm": 1.5505326704151483, "learning_rate": 3.072297622810782e-08, "loss": 0.5085941553115845, "step": 8025 }, { "epoch": 1.8502593659942363, "grad_norm": 1.5598630793553008, "learning_rate": 3.062928994353187e-08, "loss": 0.5087497234344482, "step": 8026 }, { "epoch": 1.8504899135446686, "grad_norm": 1.515152966212454, "learning_rate": 3.053574449843599e-08, "loss": 0.41259822249412537, "step": 8027 }, { "epoch": 1.850720461095101, "grad_norm": 1.6528788980804292, "learning_rate": 3.044233990641143e-08, "loss": 0.5031530857086182, "step": 8028 }, { "epoch": 1.8509510086455332, "grad_norm": 1.558033346939741, "learning_rate": 3.034907618102856e-08, "loss": 0.5004956722259521, "step": 8029 }, { "epoch": 1.8511815561959655, "grad_norm": 1.7892456756117716, "learning_rate": 3.025595333583797e-08, "loss": 0.506257951259613, "step": 8030 }, { "epoch": 1.8514121037463975, "grad_norm": 1.695644279509625, "learning_rate": 3.016297138436918e-08, "loss": 0.3953269124031067, "step": 8031 }, { "epoch": 1.8516426512968298, "grad_norm": 1.4718849636623421, "learning_rate": 3.007013034013173e-08, "loss": 0.4835085868835449, "step": 8032 }, { "epoch": 1.851873198847262, "grad_norm": 1.8663871893102746, "learning_rate": 2.997743021661448e-08, "loss": 0.414350688457489, "step": 8033 }, { "epoch": 1.8521037463976944, "grad_norm": 1.4238378341848068, "learning_rate": 2.988487102728554e-08, "loss": 0.4191391170024872, "step": 8034 }, { "epoch": 1.8523342939481267, "grad_norm": 1.5441051894921214, "learning_rate": 2.9792452785592947e-08, "loss": 0.4623367190361023, "step": 8035 }, { "epoch": 1.852564841498559, "grad_norm": 1.8389565697437233, "learning_rate": 2.9700175504964175e-08, "loss": 0.43516361713409424, "step": 8036 }, { "epoch": 1.8527953890489912, "grad_norm": 1.3914012278068204, "learning_rate": 2.9608039198805944e-08, "loss": 0.38310742378234863, "step": 8037 }, { "epoch": 1.8530259365994235, "grad_norm": 1.8121203481344736, "learning_rate": 2.9516043880504882e-08, "loss": 0.4491914212703705, "step": 8038 }, { "epoch": 1.8532564841498558, "grad_norm": 1.4258495096905672, "learning_rate": 2.9424189563426848e-08, "loss": 0.40703436732292175, "step": 8039 }, { "epoch": 1.853487031700288, "grad_norm": 1.6268645644688815, "learning_rate": 2.9332476260917505e-08, "loss": 0.4627934694290161, "step": 8040 }, { "epoch": 1.8537175792507203, "grad_norm": 1.73857404363576, "learning_rate": 2.9240903986301634e-08, "loss": 0.5413684844970703, "step": 8041 }, { "epoch": 1.8539481268011526, "grad_norm": 1.4418238041833238, "learning_rate": 2.914947275288382e-08, "loss": 0.48317795991897583, "step": 8042 }, { "epoch": 1.854178674351585, "grad_norm": 1.9593036490159952, "learning_rate": 2.9058182573947986e-08, "loss": 0.5212484002113342, "step": 8043 }, { "epoch": 1.8544092219020172, "grad_norm": 1.560534281141119, "learning_rate": 2.896703346275775e-08, "loss": 0.46468842029571533, "step": 8044 }, { "epoch": 1.8546397694524495, "grad_norm": 1.4520449873662198, "learning_rate": 2.8876025432556073e-08, "loss": 0.4069516956806183, "step": 8045 }, { "epoch": 1.8548703170028817, "grad_norm": 1.354675996882102, "learning_rate": 2.8785158496565598e-08, "loss": 0.41940397024154663, "step": 8046 }, { "epoch": 1.855100864553314, "grad_norm": 1.3952687524013894, "learning_rate": 2.869443266798832e-08, "loss": 0.43869268894195557, "step": 8047 }, { "epoch": 1.8553314121037463, "grad_norm": 1.6019003431265055, "learning_rate": 2.8603847960005477e-08, "loss": 0.42834728956222534, "step": 8048 }, { "epoch": 1.8555619596541786, "grad_norm": 2.0473667943403324, "learning_rate": 2.8513404385778428e-08, "loss": 0.5309191942214966, "step": 8049 }, { "epoch": 1.8557925072046109, "grad_norm": 1.6839541365806647, "learning_rate": 2.8423101958447437e-08, "loss": 0.40201905369758606, "step": 8050 }, { "epoch": 1.8560230547550431, "grad_norm": 1.586417513378552, "learning_rate": 2.8332940691132567e-08, "loss": 0.5671436190605164, "step": 8051 }, { "epoch": 1.8562536023054754, "grad_norm": 1.8928597752283605, "learning_rate": 2.824292059693356e-08, "loss": 0.3906604051589966, "step": 8052 }, { "epoch": 1.8564841498559077, "grad_norm": 1.681854638373173, "learning_rate": 2.815304168892918e-08, "loss": 0.4506712555885315, "step": 8053 }, { "epoch": 1.85671469740634, "grad_norm": 1.8408224537520856, "learning_rate": 2.8063303980177866e-08, "loss": 0.42090779542922974, "step": 8054 }, { "epoch": 1.8569452449567723, "grad_norm": 1.4139598318096287, "learning_rate": 2.7973707483717635e-08, "loss": 0.34566083550453186, "step": 8055 }, { "epoch": 1.8571757925072045, "grad_norm": 1.688139206456363, "learning_rate": 2.7884252212565738e-08, "loss": 0.5240504145622253, "step": 8056 }, { "epoch": 1.8574063400576368, "grad_norm": 1.5731797225643818, "learning_rate": 2.779493817971956e-08, "loss": 0.5085941553115845, "step": 8057 }, { "epoch": 1.857636887608069, "grad_norm": 1.5901229313111294, "learning_rate": 2.7705765398155058e-08, "loss": 0.4701150059700012, "step": 8058 }, { "epoch": 1.8578674351585014, "grad_norm": 1.7286913224525837, "learning_rate": 2.7616733880828304e-08, "loss": 0.4169929325580597, "step": 8059 }, { "epoch": 1.8580979827089337, "grad_norm": 1.6076137148912093, "learning_rate": 2.7527843640674618e-08, "loss": 0.4952937066555023, "step": 8060 }, { "epoch": 1.858328530259366, "grad_norm": 1.881595697948473, "learning_rate": 2.7439094690608787e-08, "loss": 0.47481924295425415, "step": 8061 }, { "epoch": 1.8585590778097982, "grad_norm": 1.5277002490466254, "learning_rate": 2.735048704352527e-08, "loss": 0.3752020299434662, "step": 8062 }, { "epoch": 1.8587896253602305, "grad_norm": 1.809986336100686, "learning_rate": 2.726202071229755e-08, "loss": 0.5471283793449402, "step": 8063 }, { "epoch": 1.8590201729106628, "grad_norm": 1.4088159083069367, "learning_rate": 2.7173695709779008e-08, "loss": 0.4557954668998718, "step": 8064 }, { "epoch": 1.859250720461095, "grad_norm": 1.550122560907728, "learning_rate": 2.7085512048802606e-08, "loss": 0.40548449754714966, "step": 8065 }, { "epoch": 1.8594812680115274, "grad_norm": 1.3791933294840308, "learning_rate": 2.699746974218009e-08, "loss": 0.43826359510421753, "step": 8066 }, { "epoch": 1.8597118155619596, "grad_norm": 1.7188043861384448, "learning_rate": 2.6909568802703453e-08, "loss": 0.4630689024925232, "step": 8067 }, { "epoch": 1.859942363112392, "grad_norm": 1.3522114967992507, "learning_rate": 2.6821809243143367e-08, "loss": 0.4804350733757019, "step": 8068 }, { "epoch": 1.8601729106628242, "grad_norm": 1.6824990648749616, "learning_rate": 2.6734191076250744e-08, "loss": 0.5459887981414795, "step": 8069 }, { "epoch": 1.8604034582132565, "grad_norm": 1.4159310296071261, "learning_rate": 2.6646714314755513e-08, "loss": 0.366889625787735, "step": 8070 }, { "epoch": 1.8606340057636888, "grad_norm": 1.9582286801999929, "learning_rate": 2.6559378971366953e-08, "loss": 0.4883540868759155, "step": 8071 }, { "epoch": 1.860864553314121, "grad_norm": 1.6324605985235177, "learning_rate": 2.6472185058774243e-08, "loss": 0.4964878261089325, "step": 8072 }, { "epoch": 1.8610951008645533, "grad_norm": 1.673302060019101, "learning_rate": 2.6385132589645697e-08, "loss": 0.4955672323703766, "step": 8073 }, { "epoch": 1.8613256484149856, "grad_norm": 1.6435310144192468, "learning_rate": 2.6298221576628977e-08, "loss": 0.42056921124458313, "step": 8074 }, { "epoch": 1.8615561959654179, "grad_norm": 1.4381408247631826, "learning_rate": 2.6211452032351534e-08, "loss": 0.47011131048202515, "step": 8075 }, { "epoch": 1.8617867435158502, "grad_norm": 1.509318362247471, "learning_rate": 2.612482396941984e-08, "loss": 0.3521801233291626, "step": 8076 }, { "epoch": 1.8620172910662824, "grad_norm": 1.636071263399774, "learning_rate": 2.6038337400420164e-08, "loss": 0.4199404716491699, "step": 8077 }, { "epoch": 1.8622478386167147, "grad_norm": 1.5150893291359375, "learning_rate": 2.595199233791834e-08, "loss": 0.40499597787857056, "step": 8078 }, { "epoch": 1.862478386167147, "grad_norm": 1.7531061656772358, "learning_rate": 2.586578879445922e-08, "loss": 0.5024272799491882, "step": 8079 }, { "epoch": 1.8627089337175793, "grad_norm": 1.5549183536571185, "learning_rate": 2.5779726782567124e-08, "loss": 0.4589402675628662, "step": 8080 }, { "epoch": 1.8629394812680116, "grad_norm": 1.6355789321212768, "learning_rate": 2.5693806314746157e-08, "loss": 0.4677194356918335, "step": 8081 }, { "epoch": 1.8631700288184438, "grad_norm": 1.6860171847729095, "learning_rate": 2.560802740347956e-08, "loss": 0.4410317540168762, "step": 8082 }, { "epoch": 1.8634005763688761, "grad_norm": 1.8113207487688465, "learning_rate": 2.5522390061230358e-08, "loss": 0.5089725255966187, "step": 8083 }, { "epoch": 1.8636311239193084, "grad_norm": 1.588730434569227, "learning_rate": 2.543689430044038e-08, "loss": 0.47457408905029297, "step": 8084 }, { "epoch": 1.8638616714697407, "grad_norm": 1.4319853393057471, "learning_rate": 2.535154013353169e-08, "loss": 0.49628597497940063, "step": 8085 }, { "epoch": 1.864092219020173, "grad_norm": 1.9014302038219766, "learning_rate": 2.5266327572905144e-08, "loss": 0.49943581223487854, "step": 8086 }, { "epoch": 1.8643227665706052, "grad_norm": 1.7732762838979956, "learning_rate": 2.5181256630941063e-08, "loss": 0.4620710015296936, "step": 8087 }, { "epoch": 1.8645533141210375, "grad_norm": 1.5972635490235074, "learning_rate": 2.5096327319999555e-08, "loss": 0.4752368927001953, "step": 8088 }, { "epoch": 1.8647838616714698, "grad_norm": 1.4886652930685826, "learning_rate": 2.50115396524202e-08, "loss": 0.4133688509464264, "step": 8089 }, { "epoch": 1.865014409221902, "grad_norm": 1.949281605819026, "learning_rate": 2.492689364052125e-08, "loss": 0.4419419765472412, "step": 8090 }, { "epoch": 1.8652449567723344, "grad_norm": 1.7344029726538643, "learning_rate": 2.4842389296601428e-08, "loss": 0.4486713409423828, "step": 8091 }, { "epoch": 1.8654755043227667, "grad_norm": 1.8228844715495627, "learning_rate": 2.4758026632938022e-08, "loss": 0.47762376070022583, "step": 8092 }, { "epoch": 1.865706051873199, "grad_norm": 2.0782542300039517, "learning_rate": 2.4673805661788007e-08, "loss": 0.4357690215110779, "step": 8093 }, { "epoch": 1.8659365994236312, "grad_norm": 1.8276227922492578, "learning_rate": 2.458972639538792e-08, "loss": 0.5147565603256226, "step": 8094 }, { "epoch": 1.8661671469740635, "grad_norm": 1.6692516341274928, "learning_rate": 2.4505788845953668e-08, "loss": 0.39681991934776306, "step": 8095 }, { "epoch": 1.8663976945244958, "grad_norm": 1.4487927889395267, "learning_rate": 2.4421993025680265e-08, "loss": 0.4552622437477112, "step": 8096 }, { "epoch": 1.866628242074928, "grad_norm": 1.7503441015857129, "learning_rate": 2.4338338946742752e-08, "loss": 0.45923811197280884, "step": 8097 }, { "epoch": 1.8668587896253603, "grad_norm": 1.502251183757353, "learning_rate": 2.4254826621294966e-08, "loss": 0.47280222177505493, "step": 8098 }, { "epoch": 1.8670893371757926, "grad_norm": 1.456189671497043, "learning_rate": 2.417145606147042e-08, "loss": 0.4157524108886719, "step": 8099 }, { "epoch": 1.867319884726225, "grad_norm": 1.4379336942099938, "learning_rate": 2.4088227279381757e-08, "loss": 0.42565596103668213, "step": 8100 }, { "epoch": 1.8675504322766572, "grad_norm": 1.6304916370441087, "learning_rate": 2.4005140287121528e-08, "loss": 0.47616803646087646, "step": 8101 }, { "epoch": 1.8677809798270895, "grad_norm": 1.515736347680141, "learning_rate": 2.392219509676152e-08, "loss": 0.4814712107181549, "step": 8102 }, { "epoch": 1.8680115273775217, "grad_norm": 1.441013936213762, "learning_rate": 2.383939172035243e-08, "loss": 0.5077251195907593, "step": 8103 }, { "epoch": 1.868242074927954, "grad_norm": 1.6890380706345745, "learning_rate": 2.3756730169925075e-08, "loss": 0.38274845480918884, "step": 8104 }, { "epoch": 1.8684726224783863, "grad_norm": 2.0943824772733315, "learning_rate": 2.3674210457489074e-08, "loss": 0.4606715440750122, "step": 8105 }, { "epoch": 1.8687031700288186, "grad_norm": 1.6432807697262717, "learning_rate": 2.3591832595033723e-08, "loss": 0.46634575724601746, "step": 8106 }, { "epoch": 1.8689337175792509, "grad_norm": 1.8861732961995932, "learning_rate": 2.3509596594527893e-08, "loss": 0.5166279673576355, "step": 8107 }, { "epoch": 1.8691642651296831, "grad_norm": 1.599107373552059, "learning_rate": 2.3427502467919357e-08, "loss": 0.4556184709072113, "step": 8108 }, { "epoch": 1.8693948126801154, "grad_norm": 1.5633160648919202, "learning_rate": 2.334555022713558e-08, "loss": 0.43875348567962646, "step": 8109 }, { "epoch": 1.8696253602305477, "grad_norm": 1.424121149992357, "learning_rate": 2.326373988408359e-08, "loss": 0.4601413607597351, "step": 8110 }, { "epoch": 1.86985590778098, "grad_norm": 1.769455045601545, "learning_rate": 2.318207145064921e-08, "loss": 0.476366251707077, "step": 8111 }, { "epoch": 1.8700864553314123, "grad_norm": 1.7211682546672895, "learning_rate": 2.3100544938698396e-08, "loss": 0.42146819829940796, "step": 8112 }, { "epoch": 1.8703170028818443, "grad_norm": 1.6952336846407405, "learning_rate": 2.3019160360075784e-08, "loss": 0.5055359601974487, "step": 8113 }, { "epoch": 1.8705475504322766, "grad_norm": 2.2577515216080406, "learning_rate": 2.2937917726605803e-08, "loss": 0.5550209879875183, "step": 8114 }, { "epoch": 1.8707780979827089, "grad_norm": 1.6101290683313494, "learning_rate": 2.2856817050092346e-08, "loss": 0.4699084758758545, "step": 8115 }, { "epoch": 1.8710086455331412, "grad_norm": 1.7215028056651225, "learning_rate": 2.2775858342318323e-08, "loss": 0.5803818702697754, "step": 8116 }, { "epoch": 1.8712391930835734, "grad_norm": 1.7430698785050744, "learning_rate": 2.2695041615046097e-08, "loss": 0.5021014213562012, "step": 8117 }, { "epoch": 1.8714697406340057, "grad_norm": 1.8052568666638755, "learning_rate": 2.261436688001772e-08, "loss": 0.47168630361557007, "step": 8118 }, { "epoch": 1.871700288184438, "grad_norm": 1.4385974626029983, "learning_rate": 2.2533834148954266e-08, "loss": 0.4514986574649811, "step": 8119 }, { "epoch": 1.8719308357348703, "grad_norm": 1.6683524018814926, "learning_rate": 2.2453443433556373e-08, "loss": 0.507361650466919, "step": 8120 }, { "epoch": 1.8721613832853026, "grad_norm": 1.4143259238979617, "learning_rate": 2.237319474550392e-08, "loss": 0.46427232027053833, "step": 8121 }, { "epoch": 1.8723919308357349, "grad_norm": 1.6353248584942321, "learning_rate": 2.229308809645625e-08, "loss": 0.4799825847148895, "step": 8122 }, { "epoch": 1.8726224783861671, "grad_norm": 1.424423920616398, "learning_rate": 2.2213123498051933e-08, "loss": 0.4089062809944153, "step": 8123 }, { "epoch": 1.8728530259365994, "grad_norm": 1.8857434186496964, "learning_rate": 2.213330096190913e-08, "loss": 0.6294845342636108, "step": 8124 }, { "epoch": 1.8730835734870317, "grad_norm": 1.3744302251940603, "learning_rate": 2.2053620499625003e-08, "loss": 0.40907663106918335, "step": 8125 }, { "epoch": 1.873314121037464, "grad_norm": 1.6129897410187974, "learning_rate": 2.1974082122776627e-08, "loss": 0.4643021821975708, "step": 8126 }, { "epoch": 1.8735446685878963, "grad_norm": 1.3724860354028519, "learning_rate": 2.189468584291976e-08, "loss": 0.45625531673431396, "step": 8127 }, { "epoch": 1.8737752161383285, "grad_norm": 1.5215478461136303, "learning_rate": 2.1815431671590168e-08, "loss": 0.3957236409187317, "step": 8128 }, { "epoch": 1.8740057636887608, "grad_norm": 1.7966855487137956, "learning_rate": 2.1736319620302423e-08, "loss": 0.5302785634994507, "step": 8129 }, { "epoch": 1.874236311239193, "grad_norm": 1.60041594224808, "learning_rate": 2.1657349700550774e-08, "loss": 0.5150219202041626, "step": 8130 }, { "epoch": 1.8744668587896254, "grad_norm": 1.5418466180847037, "learning_rate": 2.1578521923808712e-08, "loss": 0.49146413803100586, "step": 8131 }, { "epoch": 1.8746974063400577, "grad_norm": 1.7718176093322833, "learning_rate": 2.1499836301529073e-08, "loss": 0.5348008275032043, "step": 8132 }, { "epoch": 1.87492795389049, "grad_norm": 1.8197773841806884, "learning_rate": 2.1421292845144045e-08, "loss": 0.5781833529472351, "step": 8133 }, { "epoch": 1.8751585014409222, "grad_norm": 1.6712001358143913, "learning_rate": 2.134289156606528e-08, "loss": 0.4303954839706421, "step": 8134 }, { "epoch": 1.8753890489913545, "grad_norm": 1.5346781500505091, "learning_rate": 2.1264632475683665e-08, "loss": 0.3662906289100647, "step": 8135 }, { "epoch": 1.8756195965417868, "grad_norm": 1.8494759301054144, "learning_rate": 2.1186515585369323e-08, "loss": 0.4143645763397217, "step": 8136 }, { "epoch": 1.875850144092219, "grad_norm": 1.6333327402319844, "learning_rate": 2.110854090647185e-08, "loss": 0.41202569007873535, "step": 8137 }, { "epoch": 1.8760806916426513, "grad_norm": 1.8987185928487378, "learning_rate": 2.1030708450320068e-08, "loss": 0.4279648959636688, "step": 8138 }, { "epoch": 1.8763112391930836, "grad_norm": 1.605664384270582, "learning_rate": 2.0953018228222484e-08, "loss": 0.4687902629375458, "step": 8139 }, { "epoch": 1.876541786743516, "grad_norm": 1.7901855377352036, "learning_rate": 2.0875470251466408e-08, "loss": 0.44786232709884644, "step": 8140 }, { "epoch": 1.876772334293948, "grad_norm": 1.5508099943894624, "learning_rate": 2.0798064531319048e-08, "loss": 0.43611055612564087, "step": 8141 }, { "epoch": 1.8770028818443802, "grad_norm": 1.5918102453875247, "learning_rate": 2.0720801079026407e-08, "loss": 0.5804335474967957, "step": 8142 }, { "epoch": 1.8772334293948125, "grad_norm": 1.450097927891864, "learning_rate": 2.064367990581406e-08, "loss": 0.3775164484977722, "step": 8143 }, { "epoch": 1.8774639769452448, "grad_norm": 1.9129361746381695, "learning_rate": 2.0566701022887044e-08, "loss": 0.44318705797195435, "step": 8144 }, { "epoch": 1.877694524495677, "grad_norm": 1.6495228346572506, "learning_rate": 2.0489864441429526e-08, "loss": 0.4423883557319641, "step": 8145 }, { "epoch": 1.8779250720461094, "grad_norm": 2.010247138726775, "learning_rate": 2.0413170172605022e-08, "loss": 0.513456404209137, "step": 8146 }, { "epoch": 1.8781556195965416, "grad_norm": 1.492461163153598, "learning_rate": 2.0336618227556502e-08, "loss": 0.37731099128723145, "step": 8147 }, { "epoch": 1.878386167146974, "grad_norm": 1.85946715978495, "learning_rate": 2.026020861740607e-08, "loss": 0.5168889760971069, "step": 8148 }, { "epoch": 1.8786167146974062, "grad_norm": 1.7236419764627888, "learning_rate": 2.0183941353255407e-08, "loss": 0.4313003122806549, "step": 8149 }, { "epoch": 1.8788472622478385, "grad_norm": 1.6137221955666339, "learning_rate": 2.010781644618509e-08, "loss": 0.4891592562198639, "step": 8150 }, { "epoch": 1.8790778097982708, "grad_norm": 1.5371431093905303, "learning_rate": 2.003183390725549e-08, "loss": 0.4526386260986328, "step": 8151 }, { "epoch": 1.879308357348703, "grad_norm": 1.7453687827090651, "learning_rate": 1.9955993747506005e-08, "loss": 0.41896072030067444, "step": 8152 }, { "epoch": 1.8795389048991353, "grad_norm": 1.3994731816960104, "learning_rate": 1.9880295977955486e-08, "loss": 0.4495571255683899, "step": 8153 }, { "epoch": 1.8797694524495676, "grad_norm": 1.56608977367896, "learning_rate": 1.980474060960191e-08, "loss": 0.45853012800216675, "step": 8154 }, { "epoch": 1.88, "grad_norm": 1.7029949485801334, "learning_rate": 1.9729327653422834e-08, "loss": 0.45541200041770935, "step": 8155 }, { "epoch": 1.8802305475504322, "grad_norm": 1.6854487376632121, "learning_rate": 1.9654057120374824e-08, "loss": 0.48467034101486206, "step": 8156 }, { "epoch": 1.8804610951008645, "grad_norm": 1.435646075094146, "learning_rate": 1.957892902139402e-08, "loss": 0.40592968463897705, "step": 8157 }, { "epoch": 1.8806916426512967, "grad_norm": 1.546237825496461, "learning_rate": 1.9503943367395692e-08, "loss": 0.5229415893554688, "step": 8158 }, { "epoch": 1.880922190201729, "grad_norm": 1.692350015447075, "learning_rate": 1.942910016927446e-08, "loss": 0.4685397744178772, "step": 8159 }, { "epoch": 1.8811527377521613, "grad_norm": 1.7486120551838806, "learning_rate": 1.93543994379044e-08, "loss": 0.48139676451683044, "step": 8160 }, { "epoch": 1.8813832853025936, "grad_norm": 1.4835995524654266, "learning_rate": 1.9279841184138613e-08, "loss": 0.4402969479560852, "step": 8161 }, { "epoch": 1.8816138328530259, "grad_norm": 1.787346384712089, "learning_rate": 1.920542541880954e-08, "loss": 0.4192197024822235, "step": 8162 }, { "epoch": 1.8818443804034581, "grad_norm": 1.5358084843474231, "learning_rate": 1.913115215272931e-08, "loss": 0.38458961248397827, "step": 8163 }, { "epoch": 1.8820749279538904, "grad_norm": 2.0291302973789547, "learning_rate": 1.9057021396688856e-08, "loss": 0.4597528278827667, "step": 8164 }, { "epoch": 1.8823054755043227, "grad_norm": 1.7232445103457965, "learning_rate": 1.898303316145866e-08, "loss": 0.5111892223358154, "step": 8165 }, { "epoch": 1.882536023054755, "grad_norm": 1.7242418431244608, "learning_rate": 1.8909187457788357e-08, "loss": 0.406166672706604, "step": 8166 }, { "epoch": 1.8827665706051873, "grad_norm": 2.128090849602097, "learning_rate": 1.8835484296407134e-08, "loss": 0.5733405947685242, "step": 8167 }, { "epoch": 1.8829971181556195, "grad_norm": 1.8251609057771778, "learning_rate": 1.8761923688023096e-08, "loss": 0.4798845648765564, "step": 8168 }, { "epoch": 1.8832276657060518, "grad_norm": 1.6647247641023675, "learning_rate": 1.8688505643323916e-08, "loss": 0.5291308164596558, "step": 8169 }, { "epoch": 1.883458213256484, "grad_norm": 2.119330206077609, "learning_rate": 1.8615230172976505e-08, "loss": 0.5666407346725464, "step": 8170 }, { "epoch": 1.8836887608069164, "grad_norm": 1.4319363819920743, "learning_rate": 1.8542097287627123e-08, "loss": 0.3907548785209656, "step": 8171 }, { "epoch": 1.8839193083573487, "grad_norm": 1.6951882671978693, "learning_rate": 1.846910699790094e-08, "loss": 0.4055211544036865, "step": 8172 }, { "epoch": 1.884149855907781, "grad_norm": 1.7439765908427145, "learning_rate": 1.8396259314402918e-08, "loss": 0.5032040476799011, "step": 8173 }, { "epoch": 1.8843804034582132, "grad_norm": 1.6673926968240602, "learning_rate": 1.832355424771703e-08, "loss": 0.44346821308135986, "step": 8174 }, { "epoch": 1.8846109510086455, "grad_norm": 1.727405604744471, "learning_rate": 1.82509918084065e-08, "loss": 0.49914759397506714, "step": 8175 }, { "epoch": 1.8848414985590778, "grad_norm": 1.9751386726315723, "learning_rate": 1.8178572007014005e-08, "loss": 0.5221455693244934, "step": 8176 }, { "epoch": 1.88507204610951, "grad_norm": 1.510541508033986, "learning_rate": 1.810629485406112e-08, "loss": 0.42171233892440796, "step": 8177 }, { "epoch": 1.8853025936599423, "grad_norm": 1.6261053990647354, "learning_rate": 1.8034160360049234e-08, "loss": 0.36800506711006165, "step": 8178 }, { "epoch": 1.8855331412103746, "grad_norm": 2.0110866169235972, "learning_rate": 1.7962168535458842e-08, "loss": 0.44831427931785583, "step": 8179 }, { "epoch": 1.885763688760807, "grad_norm": 1.8264839989305564, "learning_rate": 1.7890319390749255e-08, "loss": 0.39368799328804016, "step": 8180 }, { "epoch": 1.8859942363112392, "grad_norm": 1.8410759547301747, "learning_rate": 1.7818612936359666e-08, "loss": 0.5152736902236938, "step": 8181 }, { "epoch": 1.8862247838616715, "grad_norm": 1.7682123665451497, "learning_rate": 1.7747049182708086e-08, "loss": 0.43120649456977844, "step": 8182 }, { "epoch": 1.8864553314121038, "grad_norm": 1.5438355501907768, "learning_rate": 1.767562814019208e-08, "loss": 0.4215066432952881, "step": 8183 }, { "epoch": 1.886685878962536, "grad_norm": 1.4643158724856222, "learning_rate": 1.760434981918846e-08, "loss": 0.44176608324050903, "step": 8184 }, { "epoch": 1.8869164265129683, "grad_norm": 1.5750882708401341, "learning_rate": 1.753321423005305e-08, "loss": 0.46879637241363525, "step": 8185 }, { "epoch": 1.8871469740634006, "grad_norm": 1.4540600241751436, "learning_rate": 1.746222138312137e-08, "loss": 0.42334824800491333, "step": 8186 }, { "epoch": 1.8873775216138329, "grad_norm": 1.8563297655343165, "learning_rate": 1.7391371288707712e-08, "loss": 0.47822874784469604, "step": 8187 }, { "epoch": 1.8876080691642652, "grad_norm": 1.4944491017270831, "learning_rate": 1.7320663957105963e-08, "loss": 0.42995691299438477, "step": 8188 }, { "epoch": 1.8878386167146974, "grad_norm": 1.6297543723171652, "learning_rate": 1.7250099398589125e-08, "loss": 0.44044607877731323, "step": 8189 }, { "epoch": 1.8880691642651297, "grad_norm": 1.6463599352675649, "learning_rate": 1.717967762340944e-08, "loss": 0.40334299206733704, "step": 8190 }, { "epoch": 1.888299711815562, "grad_norm": 1.4705090673445491, "learning_rate": 1.71093986417985e-08, "loss": 0.4697923958301544, "step": 8191 }, { "epoch": 1.8885302593659943, "grad_norm": 1.7306768855177799, "learning_rate": 1.703926246396714e-08, "loss": 0.5038257837295532, "step": 8192 }, { "epoch": 1.8887608069164266, "grad_norm": 2.034955626144784, "learning_rate": 1.6969269100105544e-08, "loss": 0.5519133806228638, "step": 8193 }, { "epoch": 1.8889913544668588, "grad_norm": 1.7878403739464412, "learning_rate": 1.6899418560382796e-08, "loss": 0.43426749110221863, "step": 8194 }, { "epoch": 1.8892219020172911, "grad_norm": 1.609592778887584, "learning_rate": 1.6829710854947553e-08, "loss": 0.5156720280647278, "step": 8195 }, { "epoch": 1.8894524495677234, "grad_norm": 1.9758568999656432, "learning_rate": 1.6760145993927498e-08, "loss": 0.47747790813446045, "step": 8196 }, { "epoch": 1.8896829971181557, "grad_norm": 1.7451557727910063, "learning_rate": 1.6690723987429877e-08, "loss": 0.43909454345703125, "step": 8197 }, { "epoch": 1.889913544668588, "grad_norm": 1.6052109954506801, "learning_rate": 1.6621444845540845e-08, "loss": 0.459448903799057, "step": 8198 }, { "epoch": 1.8901440922190202, "grad_norm": 2.2071080011540185, "learning_rate": 1.6552308578326125e-08, "loss": 0.47015225887298584, "step": 8199 }, { "epoch": 1.8903746397694525, "grad_norm": 1.713037489046331, "learning_rate": 1.648331519583035e-08, "loss": 0.5074043869972229, "step": 8200 }, { "epoch": 1.8906051873198848, "grad_norm": 1.8682646944165644, "learning_rate": 1.641446470807739e-08, "loss": 0.54727703332901, "step": 8201 }, { "epoch": 1.890835734870317, "grad_norm": 1.7945484902703406, "learning_rate": 1.6345757125070802e-08, "loss": 0.4788065552711487, "step": 8202 }, { "epoch": 1.8910662824207494, "grad_norm": 1.5828500242044055, "learning_rate": 1.6277192456792933e-08, "loss": 0.4665883779525757, "step": 8203 }, { "epoch": 1.8912968299711816, "grad_norm": 1.623464134473763, "learning_rate": 1.6208770713205476e-08, "loss": 0.47191154956817627, "step": 8204 }, { "epoch": 1.891527377521614, "grad_norm": 1.7046539879820468, "learning_rate": 1.6140491904249485e-08, "loss": 0.48569393157958984, "step": 8205 }, { "epoch": 1.8917579250720462, "grad_norm": 1.964953961808447, "learning_rate": 1.6072356039845248e-08, "loss": 0.4643250107765198, "step": 8206 }, { "epoch": 1.8919884726224785, "grad_norm": 1.9677334911903335, "learning_rate": 1.6004363129891952e-08, "loss": 0.5228808522224426, "step": 8207 }, { "epoch": 1.8922190201729108, "grad_norm": 1.6695153529028328, "learning_rate": 1.5936513184268473e-08, "loss": 0.6013127565383911, "step": 8208 }, { "epoch": 1.892449567723343, "grad_norm": 1.4893331292755958, "learning_rate": 1.5868806212832485e-08, "loss": 0.435358464717865, "step": 8209 }, { "epoch": 1.8926801152737753, "grad_norm": 1.6150621839423676, "learning_rate": 1.580124222542134e-08, "loss": 0.4283503293991089, "step": 8210 }, { "epoch": 1.8929106628242076, "grad_norm": 1.6969954577088078, "learning_rate": 1.5733821231851297e-08, "loss": 0.48960375785827637, "step": 8211 }, { "epoch": 1.89314121037464, "grad_norm": 2.1552511560352308, "learning_rate": 1.566654324191785e-08, "loss": 0.4763854742050171, "step": 8212 }, { "epoch": 1.8933717579250722, "grad_norm": 1.6589761188821655, "learning_rate": 1.5599408265395964e-08, "loss": 0.4161483645439148, "step": 8213 }, { "epoch": 1.8936023054755045, "grad_norm": 1.4659774004562973, "learning_rate": 1.5532416312039387e-08, "loss": 0.49362367391586304, "step": 8214 }, { "epoch": 1.8938328530259367, "grad_norm": 1.5963069795498221, "learning_rate": 1.5465567391581557e-08, "loss": 0.4685269892215729, "step": 8215 }, { "epoch": 1.894063400576369, "grad_norm": 1.6900259640459938, "learning_rate": 1.539886151373493e-08, "loss": 0.41390180587768555, "step": 8216 }, { "epoch": 1.8942939481268013, "grad_norm": 1.6700784942712323, "learning_rate": 1.5332298688191082e-08, "loss": 0.4230450391769409, "step": 8217 }, { "epoch": 1.8945244956772336, "grad_norm": 1.8268833435784397, "learning_rate": 1.5265878924621056e-08, "loss": 0.48427850008010864, "step": 8218 }, { "epoch": 1.8947550432276659, "grad_norm": 1.4475419458342855, "learning_rate": 1.5199602232674692e-08, "loss": 0.5288915634155273, "step": 8219 }, { "epoch": 1.8949855907780981, "grad_norm": 1.6861602943756362, "learning_rate": 1.5133468621981505e-08, "loss": 0.4539833664894104, "step": 8220 }, { "epoch": 1.8952161383285304, "grad_norm": 2.50047407355109, "learning_rate": 1.5067478102149922e-08, "loss": 0.4823256731033325, "step": 8221 }, { "epoch": 1.8954466858789627, "grad_norm": 1.997599510564075, "learning_rate": 1.5001630682767718e-08, "loss": 0.48464787006378174, "step": 8222 }, { "epoch": 1.8956772334293948, "grad_norm": 1.5490561236499911, "learning_rate": 1.4935926373401907e-08, "loss": 0.49926498532295227, "step": 8223 }, { "epoch": 1.895907780979827, "grad_norm": 1.7495479454026852, "learning_rate": 1.4870365183598632e-08, "loss": 0.4835718870162964, "step": 8224 }, { "epoch": 1.8961383285302593, "grad_norm": 1.7245680541797082, "learning_rate": 1.4804947122883049e-08, "loss": 0.5158127546310425, "step": 8225 }, { "epoch": 1.8963688760806916, "grad_norm": 1.5787923396995764, "learning_rate": 1.473967220076e-08, "loss": 0.45827725529670715, "step": 8226 }, { "epoch": 1.8965994236311239, "grad_norm": 1.6342570379037924, "learning_rate": 1.4674540426713012e-08, "loss": 0.4177684187889099, "step": 8227 }, { "epoch": 1.8968299711815562, "grad_norm": 1.7374508298892117, "learning_rate": 1.4609551810205178e-08, "loss": 0.4626643657684326, "step": 8228 }, { "epoch": 1.8970605187319884, "grad_norm": 1.4056791598602973, "learning_rate": 1.4544706360678616e-08, "loss": 0.424638569355011, "step": 8229 }, { "epoch": 1.8972910662824207, "grad_norm": 1.4685379441322668, "learning_rate": 1.4480004087554898e-08, "loss": 0.4635809659957886, "step": 8230 }, { "epoch": 1.897521613832853, "grad_norm": 1.75143293227431, "learning_rate": 1.4415445000234282e-08, "loss": 0.4738515317440033, "step": 8231 }, { "epoch": 1.8977521613832853, "grad_norm": 1.5264824829484198, "learning_rate": 1.4351029108096713e-08, "loss": 0.4863637089729309, "step": 8232 }, { "epoch": 1.8979827089337176, "grad_norm": 1.5399019801695686, "learning_rate": 1.4286756420501034e-08, "loss": 0.5584152340888977, "step": 8233 }, { "epoch": 1.8982132564841498, "grad_norm": 1.496901447925544, "learning_rate": 1.4222626946785666e-08, "loss": 0.4283461570739746, "step": 8234 }, { "epoch": 1.8984438040345821, "grad_norm": 1.8147365486613847, "learning_rate": 1.4158640696267598e-08, "loss": 0.49863070249557495, "step": 8235 }, { "epoch": 1.8986743515850144, "grad_norm": 1.6662108192886154, "learning_rate": 1.409479767824362e-08, "loss": 0.38217055797576904, "step": 8236 }, { "epoch": 1.8989048991354467, "grad_norm": 1.6967046835869566, "learning_rate": 1.4031097901989308e-08, "loss": 0.4688405394554138, "step": 8237 }, { "epoch": 1.899135446685879, "grad_norm": 1.9034545453492508, "learning_rate": 1.3967541376759706e-08, "loss": 0.5327590703964233, "step": 8238 }, { "epoch": 1.8993659942363113, "grad_norm": 1.8446265826303438, "learning_rate": 1.3904128111788872e-08, "loss": 0.49749255180358887, "step": 8239 }, { "epoch": 1.8995965417867435, "grad_norm": 1.7179092731540313, "learning_rate": 1.3840858116289988e-08, "loss": 0.5315423011779785, "step": 8240 }, { "epoch": 1.8998270893371758, "grad_norm": 1.5737385318700703, "learning_rate": 1.3777731399455594e-08, "loss": 0.4651438593864441, "step": 8241 }, { "epoch": 1.900057636887608, "grad_norm": 1.7144081952703678, "learning_rate": 1.3714747970457352e-08, "loss": 0.323408842086792, "step": 8242 }, { "epoch": 1.9002881844380404, "grad_norm": 1.9581034490172329, "learning_rate": 1.3651907838446275e-08, "loss": 0.5402773022651672, "step": 8243 }, { "epoch": 1.9005187319884727, "grad_norm": 1.670746605340447, "learning_rate": 1.358921101255206e-08, "loss": 0.5026879906654358, "step": 8244 }, { "epoch": 1.900749279538905, "grad_norm": 1.7720812388937348, "learning_rate": 1.3526657501884087e-08, "loss": 0.5281597375869751, "step": 8245 }, { "epoch": 1.9009798270893372, "grad_norm": 1.6385593343590161, "learning_rate": 1.3464247315530642e-08, "loss": 0.412728488445282, "step": 8246 }, { "epoch": 1.9012103746397695, "grad_norm": 1.7237289199424797, "learning_rate": 1.340198046255947e-08, "loss": 0.4562723636627197, "step": 8247 }, { "epoch": 1.9014409221902018, "grad_norm": 1.626201065698003, "learning_rate": 1.3339856952017115e-08, "loss": 0.39775967597961426, "step": 8248 }, { "epoch": 1.901671469740634, "grad_norm": 1.6533133902767987, "learning_rate": 1.3277876792929466e-08, "loss": 0.39823421835899353, "step": 8249 }, { "epoch": 1.9019020172910661, "grad_norm": 1.4228259330908177, "learning_rate": 1.3216039994301765e-08, "loss": 0.3946484923362732, "step": 8250 }, { "epoch": 1.9021325648414984, "grad_norm": 2.068299680905734, "learning_rate": 1.3154346565118046e-08, "loss": 0.46879899501800537, "step": 8251 }, { "epoch": 1.9023631123919307, "grad_norm": 1.6777738143005605, "learning_rate": 1.3092796514341808e-08, "loss": 0.4379505515098572, "step": 8252 }, { "epoch": 1.902593659942363, "grad_norm": 1.5749007782930142, "learning_rate": 1.3031389850915674e-08, "loss": 0.39895427227020264, "step": 8253 }, { "epoch": 1.9028242074927952, "grad_norm": 1.495609087075131, "learning_rate": 1.2970126583761287e-08, "loss": 0.4997497797012329, "step": 8254 }, { "epoch": 1.9030547550432275, "grad_norm": 1.5959222152903147, "learning_rate": 1.2909006721779858e-08, "loss": 0.4764189124107361, "step": 8255 }, { "epoch": 1.9032853025936598, "grad_norm": 1.5708268780308896, "learning_rate": 1.2848030273851062e-08, "loss": 0.42075514793395996, "step": 8256 }, { "epoch": 1.903515850144092, "grad_norm": 2.1937828998511186, "learning_rate": 1.278719724883437e-08, "loss": 0.49138063192367554, "step": 8257 }, { "epoch": 1.9037463976945244, "grad_norm": 1.513683079655307, "learning_rate": 1.2726507655568264e-08, "loss": 0.4185170531272888, "step": 8258 }, { "epoch": 1.9039769452449566, "grad_norm": 1.412269470806139, "learning_rate": 1.2665961502870026e-08, "loss": 0.42299705743789673, "step": 8259 }, { "epoch": 1.904207492795389, "grad_norm": 1.5645581808319793, "learning_rate": 1.2605558799536508e-08, "loss": 0.3970172703266144, "step": 8260 }, { "epoch": 1.9044380403458212, "grad_norm": 1.8688835594223658, "learning_rate": 1.2545299554343803e-08, "loss": 0.48734235763549805, "step": 8261 }, { "epoch": 1.9046685878962535, "grad_norm": 1.6788669457783065, "learning_rate": 1.2485183776046793e-08, "loss": 0.42422592639923096, "step": 8262 }, { "epoch": 1.9048991354466858, "grad_norm": 1.5306993869763585, "learning_rate": 1.2425211473379604e-08, "loss": 0.43414005637168884, "step": 8263 }, { "epoch": 1.905129682997118, "grad_norm": 1.657887750948753, "learning_rate": 1.2365382655055601e-08, "loss": 0.5316790342330933, "step": 8264 }, { "epoch": 1.9053602305475503, "grad_norm": 1.9919373780449294, "learning_rate": 1.2305697329767384e-08, "loss": 0.517704963684082, "step": 8265 }, { "epoch": 1.9055907780979826, "grad_norm": 1.671616617942954, "learning_rate": 1.2246155506186572e-08, "loss": 0.47799554467201233, "step": 8266 }, { "epoch": 1.9058213256484149, "grad_norm": 1.5562617979556934, "learning_rate": 1.2186757192963915e-08, "loss": 0.5152599811553955, "step": 8267 }, { "epoch": 1.9060518731988472, "grad_norm": 1.4688745064382045, "learning_rate": 1.2127502398729505e-08, "loss": 0.46499788761138916, "step": 8268 }, { "epoch": 1.9062824207492794, "grad_norm": 1.8146565107608943, "learning_rate": 1.2068391132092348e-08, "loss": 0.5220280885696411, "step": 8269 }, { "epoch": 1.9065129682997117, "grad_norm": 1.4497058250698829, "learning_rate": 1.2009423401640684e-08, "loss": 0.46435877680778503, "step": 8270 }, { "epoch": 1.906743515850144, "grad_norm": 1.896368333082707, "learning_rate": 1.1950599215941992e-08, "loss": 0.5128264427185059, "step": 8271 }, { "epoch": 1.9069740634005763, "grad_norm": 1.6162622871518073, "learning_rate": 1.189191858354266e-08, "loss": 0.39614224433898926, "step": 8272 }, { "epoch": 1.9072046109510086, "grad_norm": 1.5987359191908617, "learning_rate": 1.1833381512968422e-08, "loss": 0.469489187002182, "step": 8273 }, { "epoch": 1.9074351585014409, "grad_norm": 1.5090874127212548, "learning_rate": 1.1774988012724363e-08, "loss": 0.40567007660865784, "step": 8274 }, { "epoch": 1.9076657060518731, "grad_norm": 1.8342116060768927, "learning_rate": 1.1716738091294143e-08, "loss": 0.47286513447761536, "step": 8275 }, { "epoch": 1.9078962536023054, "grad_norm": 1.8677623698184516, "learning_rate": 1.165863175714099e-08, "loss": 0.4351140856742859, "step": 8276 }, { "epoch": 1.9081268011527377, "grad_norm": 2.1204578995330823, "learning_rate": 1.1600669018707043e-08, "loss": 0.5400139093399048, "step": 8277 }, { "epoch": 1.90835734870317, "grad_norm": 1.9666099328710733, "learning_rate": 1.1542849884413897e-08, "loss": 0.4617918133735657, "step": 8278 }, { "epoch": 1.9085878962536023, "grad_norm": 1.7973383029247338, "learning_rate": 1.1485174362661942e-08, "loss": 0.4001161754131317, "step": 8279 }, { "epoch": 1.9088184438040345, "grad_norm": 1.7749869657942918, "learning_rate": 1.14276424618307e-08, "loss": 0.4725416302680969, "step": 8280 }, { "epoch": 1.9090489913544668, "grad_norm": 1.4489800751611797, "learning_rate": 1.137025419027926e-08, "loss": 0.43786871433258057, "step": 8281 }, { "epoch": 1.909279538904899, "grad_norm": 1.6944845320829343, "learning_rate": 1.1313009556345288e-08, "loss": 0.4960116147994995, "step": 8282 }, { "epoch": 1.9095100864553314, "grad_norm": 1.485040665380673, "learning_rate": 1.1255908568345906e-08, "loss": 0.46718698740005493, "step": 8283 }, { "epoch": 1.9097406340057637, "grad_norm": 1.601866993353985, "learning_rate": 1.119895123457737e-08, "loss": 0.49856632947921753, "step": 8284 }, { "epoch": 1.909971181556196, "grad_norm": 1.736808763671631, "learning_rate": 1.1142137563314835e-08, "loss": 0.44509345293045044, "step": 8285 }, { "epoch": 1.9102017291066282, "grad_norm": 1.5155711234857085, "learning_rate": 1.1085467562812812e-08, "loss": 0.4501849412918091, "step": 8286 }, { "epoch": 1.9104322766570605, "grad_norm": 1.864296209064819, "learning_rate": 1.1028941241305046e-08, "loss": 0.4580952823162079, "step": 8287 }, { "epoch": 1.9106628242074928, "grad_norm": 1.820881586406844, "learning_rate": 1.0972558607003968e-08, "loss": 0.43742048740386963, "step": 8288 }, { "epoch": 1.910893371757925, "grad_norm": 1.6142791937625995, "learning_rate": 1.091631966810147e-08, "loss": 0.41808924078941345, "step": 8289 }, { "epoch": 1.9111239193083573, "grad_norm": 1.6208739211222591, "learning_rate": 1.0860224432768462e-08, "loss": 0.46481162309646606, "step": 8290 }, { "epoch": 1.9113544668587896, "grad_norm": 2.011637000699936, "learning_rate": 1.0804272909155087e-08, "loss": 0.4603671431541443, "step": 8291 }, { "epoch": 1.911585014409222, "grad_norm": 1.8494404636328983, "learning_rate": 1.0748465105390403e-08, "loss": 0.49557918310165405, "step": 8292 }, { "epoch": 1.9118155619596542, "grad_norm": 2.002537998599403, "learning_rate": 1.0692801029582809e-08, "loss": 0.4407039284706116, "step": 8293 }, { "epoch": 1.9120461095100865, "grad_norm": 1.7586281642289996, "learning_rate": 1.0637280689819617e-08, "loss": 0.5207707285881042, "step": 8294 }, { "epoch": 1.9122766570605187, "grad_norm": 1.5038089673632116, "learning_rate": 1.0581904094167483e-08, "loss": 0.5129815340042114, "step": 8295 }, { "epoch": 1.912507204610951, "grad_norm": 1.947553585968147, "learning_rate": 1.0526671250671858e-08, "loss": 0.39395007491111755, "step": 8296 }, { "epoch": 1.9127377521613833, "grad_norm": 1.6292888672697963, "learning_rate": 1.0471582167357662e-08, "loss": 0.5052094459533691, "step": 8297 }, { "epoch": 1.9129682997118156, "grad_norm": 1.8639782484748226, "learning_rate": 1.0416636852228822e-08, "loss": 0.5339310169219971, "step": 8298 }, { "epoch": 1.9131988472622479, "grad_norm": 1.6724915320609384, "learning_rate": 1.0361835313268064e-08, "loss": 0.3916591703891754, "step": 8299 }, { "epoch": 1.9134293948126802, "grad_norm": 1.6417155774632985, "learning_rate": 1.0307177558437684e-08, "loss": 0.43442589044570923, "step": 8300 }, { "epoch": 1.9136599423631124, "grad_norm": 1.765418684368188, "learning_rate": 1.0252663595678889e-08, "loss": 0.48208218812942505, "step": 8301 }, { "epoch": 1.9138904899135447, "grad_norm": 1.5550213363618528, "learning_rate": 1.0198293432911898e-08, "loss": 0.4363470673561096, "step": 8302 }, { "epoch": 1.914121037463977, "grad_norm": 1.468689516141858, "learning_rate": 1.0144067078036167e-08, "loss": 0.43322134017944336, "step": 8303 }, { "epoch": 1.9143515850144093, "grad_norm": 1.559621305722424, "learning_rate": 1.0089984538930173e-08, "loss": 0.432983934879303, "step": 8304 }, { "epoch": 1.9145821325648416, "grad_norm": 1.8386361285939403, "learning_rate": 1.0036045823451634e-08, "loss": 0.4156948924064636, "step": 8305 }, { "epoch": 1.9148126801152738, "grad_norm": 1.6606703244215435, "learning_rate": 9.982250939437275e-09, "loss": 0.4155515432357788, "step": 8306 }, { "epoch": 1.9150432276657061, "grad_norm": 1.7059318829970815, "learning_rate": 9.928599894702961e-09, "loss": 0.47653689980506897, "step": 8307 }, { "epoch": 1.9152737752161384, "grad_norm": 1.5705288067964687, "learning_rate": 9.875092697043563e-09, "loss": 0.45228058099746704, "step": 8308 }, { "epoch": 1.9155043227665707, "grad_norm": 1.5823697184468708, "learning_rate": 9.821729354232977e-09, "loss": 0.4896412789821625, "step": 8309 }, { "epoch": 1.915734870317003, "grad_norm": 1.7271525589204022, "learning_rate": 9.768509874024556e-09, "loss": 0.41047337651252747, "step": 8310 }, { "epoch": 1.9159654178674352, "grad_norm": 1.6723293195859799, "learning_rate": 9.715434264150557e-09, "loss": 0.5180307626724243, "step": 8311 }, { "epoch": 1.9161959654178675, "grad_norm": 1.7461325154516316, "learning_rate": 9.662502532322147e-09, "loss": 0.46440133452415466, "step": 8312 }, { "epoch": 1.9164265129682998, "grad_norm": 1.6166489895049083, "learning_rate": 9.609714686229952e-09, "loss": 0.4066951274871826, "step": 8313 }, { "epoch": 1.916657060518732, "grad_norm": 1.7089538604188352, "learning_rate": 9.557070733543393e-09, "loss": 0.4911927282810211, "step": 8314 }, { "epoch": 1.9168876080691644, "grad_norm": 1.7069173470957535, "learning_rate": 9.504570681910907e-09, "loss": 0.5309121608734131, "step": 8315 }, { "epoch": 1.9171181556195966, "grad_norm": 1.802146087409815, "learning_rate": 9.452214538960501e-09, "loss": 0.4592200517654419, "step": 8316 }, { "epoch": 1.917348703170029, "grad_norm": 2.2439207686561886, "learning_rate": 9.400002312298871e-09, "loss": 0.44735923409461975, "step": 8317 }, { "epoch": 1.9175792507204612, "grad_norm": 1.868917565152502, "learning_rate": 9.347934009511837e-09, "loss": 0.5140376091003418, "step": 8318 }, { "epoch": 1.9178097982708935, "grad_norm": 1.4954089291718025, "learning_rate": 9.29600963816446e-09, "loss": 0.42626407742500305, "step": 8319 }, { "epoch": 1.9180403458213258, "grad_norm": 1.5252659338318748, "learning_rate": 9.244229205800813e-09, "loss": 0.4173426628112793, "step": 8320 }, { "epoch": 1.918270893371758, "grad_norm": 1.6423957945500776, "learning_rate": 9.192592719943992e-09, "loss": 0.41199928522109985, "step": 8321 }, { "epoch": 1.9185014409221903, "grad_norm": 1.621590778725344, "learning_rate": 9.14110018809644e-09, "loss": 0.43618956208229065, "step": 8322 }, { "epoch": 1.9187319884726226, "grad_norm": 1.7165545250310659, "learning_rate": 9.089751617739172e-09, "loss": 0.48510247468948364, "step": 8323 }, { "epoch": 1.9189625360230549, "grad_norm": 2.19025289523308, "learning_rate": 9.038547016332776e-09, "loss": 0.46992772817611694, "step": 8324 }, { "epoch": 1.9191930835734872, "grad_norm": 1.5553353507324814, "learning_rate": 8.987486391316745e-09, "loss": 0.4461814761161804, "step": 8325 }, { "epoch": 1.9194236311239194, "grad_norm": 1.6004360220081093, "learning_rate": 8.936569750109701e-09, "loss": 0.4462101459503174, "step": 8326 }, { "epoch": 1.9196541786743517, "grad_norm": 1.9035892190993247, "learning_rate": 8.885797100109283e-09, "loss": 0.5376943945884705, "step": 8327 }, { "epoch": 1.919884726224784, "grad_norm": 1.4622106701043842, "learning_rate": 8.835168448692032e-09, "loss": 0.4294065237045288, "step": 8328 }, { "epoch": 1.9201152737752163, "grad_norm": 1.5772985070440868, "learning_rate": 8.784683803214066e-09, "loss": 0.4223681688308716, "step": 8329 }, { "epoch": 1.9203458213256486, "grad_norm": 1.673718906169722, "learning_rate": 8.73434317100996e-09, "loss": 0.5321441888809204, "step": 8330 }, { "epoch": 1.9205763688760809, "grad_norm": 1.47290212215442, "learning_rate": 8.684146559393979e-09, "loss": 0.4908212423324585, "step": 8331 }, { "epoch": 1.9208069164265131, "grad_norm": 1.5252367208444346, "learning_rate": 8.634093975659062e-09, "loss": 0.48342543840408325, "step": 8332 }, { "epoch": 1.9210374639769452, "grad_norm": 1.584338687628807, "learning_rate": 8.584185427077285e-09, "loss": 0.5173824429512024, "step": 8333 }, { "epoch": 1.9212680115273775, "grad_norm": 1.3721837054005588, "learning_rate": 8.534420920899844e-09, "loss": 0.42781007289886475, "step": 8334 }, { "epoch": 1.9214985590778098, "grad_norm": 1.7522004684189494, "learning_rate": 8.484800464357067e-09, "loss": 0.48095422983169556, "step": 8335 }, { "epoch": 1.921729106628242, "grad_norm": 1.486479647979893, "learning_rate": 8.4353240646583e-09, "loss": 0.4701269865036011, "step": 8336 }, { "epoch": 1.9219596541786743, "grad_norm": 1.5874139106024778, "learning_rate": 8.385991728991903e-09, "loss": 0.48512130975723267, "step": 8337 }, { "epoch": 1.9221902017291066, "grad_norm": 1.6650791676703436, "learning_rate": 8.336803464525255e-09, "loss": 0.456530898809433, "step": 8338 }, { "epoch": 1.9224207492795389, "grad_norm": 1.6568880053058186, "learning_rate": 8.287759278405082e-09, "loss": 0.48397719860076904, "step": 8339 }, { "epoch": 1.9226512968299712, "grad_norm": 1.6643186904423333, "learning_rate": 8.238859177756907e-09, "loss": 0.5594744086265564, "step": 8340 }, { "epoch": 1.9228818443804034, "grad_norm": 1.771066054203885, "learning_rate": 8.190103169685269e-09, "loss": 0.4647720754146576, "step": 8341 }, { "epoch": 1.9231123919308357, "grad_norm": 1.712221014293666, "learning_rate": 8.141491261274169e-09, "loss": 0.5003525018692017, "step": 8342 }, { "epoch": 1.923342939481268, "grad_norm": 1.4924276842796294, "learning_rate": 8.09302345958629e-09, "loss": 0.5174271464347839, "step": 8343 }, { "epoch": 1.9235734870317003, "grad_norm": 2.0224825185920197, "learning_rate": 8.044699771663554e-09, "loss": 0.4582705795764923, "step": 8344 }, { "epoch": 1.9238040345821326, "grad_norm": 1.630234830459202, "learning_rate": 7.99652020452679e-09, "loss": 0.4875626564025879, "step": 8345 }, { "epoch": 1.9240345821325648, "grad_norm": 1.607848466927164, "learning_rate": 7.948484765175956e-09, "loss": 0.45983967185020447, "step": 8346 }, { "epoch": 1.9242651296829971, "grad_norm": 1.873627093178645, "learning_rate": 7.900593460590133e-09, "loss": 0.46369433403015137, "step": 8347 }, { "epoch": 1.9244956772334294, "grad_norm": 1.840943880232746, "learning_rate": 7.852846297727644e-09, "loss": 0.4666575491428375, "step": 8348 }, { "epoch": 1.9247262247838617, "grad_norm": 1.7631099459348156, "learning_rate": 7.805243283525387e-09, "loss": 0.39856386184692383, "step": 8349 }, { "epoch": 1.924956772334294, "grad_norm": 1.5924965126506507, "learning_rate": 7.757784424899716e-09, "loss": 0.3968764543533325, "step": 8350 }, { "epoch": 1.9251873198847262, "grad_norm": 1.5539586874837867, "learning_rate": 7.710469728745895e-09, "loss": 0.42590945959091187, "step": 8351 }, { "epoch": 1.9254178674351585, "grad_norm": 1.8558131754812899, "learning_rate": 7.6632992019382e-09, "loss": 0.4320804476737976, "step": 8352 }, { "epoch": 1.9256484149855908, "grad_norm": 1.373435252153639, "learning_rate": 7.616272851330151e-09, "loss": 0.42604368925094604, "step": 8353 }, { "epoch": 1.925878962536023, "grad_norm": 1.5219509394941837, "learning_rate": 7.569390683753951e-09, "loss": 0.46070748567581177, "step": 8354 }, { "epoch": 1.9261095100864554, "grad_norm": 1.9609201609900513, "learning_rate": 7.52265270602126e-09, "loss": 0.5836024284362793, "step": 8355 }, { "epoch": 1.9263400576368876, "grad_norm": 1.8423197030839495, "learning_rate": 7.476058924922645e-09, "loss": 0.4969649910926819, "step": 8356 }, { "epoch": 1.92657060518732, "grad_norm": 1.597412550113596, "learning_rate": 7.429609347227694e-09, "loss": 0.5294152498245239, "step": 8357 }, { "epoch": 1.9268011527377522, "grad_norm": 1.8991873203876128, "learning_rate": 7.383303979684896e-09, "loss": 0.47974759340286255, "step": 8358 }, { "epoch": 1.9270317002881845, "grad_norm": 1.7244962769819776, "learning_rate": 7.337142829022202e-09, "loss": 0.5180144309997559, "step": 8359 }, { "epoch": 1.9272622478386165, "grad_norm": 1.8763071699681506, "learning_rate": 7.291125901946027e-09, "loss": 0.4036504924297333, "step": 8360 }, { "epoch": 1.9274927953890488, "grad_norm": 1.74498653425893, "learning_rate": 7.2452532051423546e-09, "loss": 0.4952111840248108, "step": 8361 }, { "epoch": 1.927723342939481, "grad_norm": 1.9182000125605638, "learning_rate": 7.199524745275965e-09, "loss": 0.43847912549972534, "step": 8362 }, { "epoch": 1.9279538904899134, "grad_norm": 1.480848731477337, "learning_rate": 7.153940528990765e-09, "loss": 0.42722171545028687, "step": 8363 }, { "epoch": 1.9281844380403457, "grad_norm": 1.496577307338104, "learning_rate": 7.10850056290968e-09, "loss": 0.44882699847221375, "step": 8364 }, { "epoch": 1.928414985590778, "grad_norm": 1.8191818061281437, "learning_rate": 7.0632048536345415e-09, "loss": 0.5153505206108093, "step": 8365 }, { "epoch": 1.9286455331412102, "grad_norm": 1.6967369186549641, "learning_rate": 7.018053407746416e-09, "loss": 0.5489900708198547, "step": 8366 }, { "epoch": 1.9288760806916425, "grad_norm": 1.6353777624372818, "learning_rate": 6.97304623180539e-09, "loss": 0.4193941354751587, "step": 8367 }, { "epoch": 1.9291066282420748, "grad_norm": 1.623882816413383, "learning_rate": 6.928183332350346e-09, "loss": 0.4031536281108856, "step": 8368 }, { "epoch": 1.929337175792507, "grad_norm": 1.594539749841058, "learning_rate": 6.883464715899734e-09, "loss": 0.4182126522064209, "step": 8369 }, { "epoch": 1.9295677233429394, "grad_norm": 1.428635807002028, "learning_rate": 6.838890388950469e-09, "loss": 0.4714782238006592, "step": 8370 }, { "epoch": 1.9297982708933716, "grad_norm": 1.8277761014318794, "learning_rate": 6.7944603579787044e-09, "loss": 0.47345131635665894, "step": 8371 }, { "epoch": 1.930028818443804, "grad_norm": 1.6599936080596263, "learning_rate": 6.750174629439831e-09, "loss": 0.5616276264190674, "step": 8372 }, { "epoch": 1.9302593659942362, "grad_norm": 1.7317139547020615, "learning_rate": 6.706033209767925e-09, "loss": 0.544796347618103, "step": 8373 }, { "epoch": 1.9304899135446685, "grad_norm": 1.5033990065109937, "learning_rate": 6.662036105376412e-09, "loss": 0.5631225109100342, "step": 8374 }, { "epoch": 1.9307204610951008, "grad_norm": 1.6075623646085158, "learning_rate": 6.6181833226575116e-09, "loss": 0.39414408802986145, "step": 8375 }, { "epoch": 1.930951008645533, "grad_norm": 1.671437220717172, "learning_rate": 6.574474867982793e-09, "loss": 0.4659278392791748, "step": 8376 }, { "epoch": 1.9311815561959653, "grad_norm": 1.4579597767228383, "learning_rate": 6.5309107477022895e-09, "loss": 0.423857718706131, "step": 8377 }, { "epoch": 1.9314121037463976, "grad_norm": 1.5793968032935768, "learning_rate": 6.4874909681457145e-09, "loss": 0.47983941435813904, "step": 8378 }, { "epoch": 1.9316426512968299, "grad_norm": 1.6183823258999779, "learning_rate": 6.444215535621245e-09, "loss": 0.36726903915405273, "step": 8379 }, { "epoch": 1.9318731988472622, "grad_norm": 1.718540594241041, "learning_rate": 6.401084456416628e-09, "loss": 0.5433714985847473, "step": 8380 }, { "epoch": 1.9321037463976944, "grad_norm": 1.5160106606022126, "learning_rate": 6.358097736798295e-09, "loss": 0.4377497434616089, "step": 8381 }, { "epoch": 1.9323342939481267, "grad_norm": 1.778623844156025, "learning_rate": 6.3152553830115864e-09, "loss": 0.5426797270774841, "step": 8382 }, { "epoch": 1.932564841498559, "grad_norm": 1.4848182166015895, "learning_rate": 6.2725574012812975e-09, "loss": 0.3502352237701416, "step": 8383 }, { "epoch": 1.9327953890489913, "grad_norm": 1.747637938738868, "learning_rate": 6.23000379781069e-09, "loss": 0.4589088559150696, "step": 8384 }, { "epoch": 1.9330259365994236, "grad_norm": 1.6442032368895256, "learning_rate": 6.187594578782707e-09, "loss": 0.5089824795722961, "step": 8385 }, { "epoch": 1.9332564841498558, "grad_norm": 1.6208238067165452, "learning_rate": 6.145329750358752e-09, "loss": 0.3649140000343323, "step": 8386 }, { "epoch": 1.9334870317002881, "grad_norm": 1.4952107950779492, "learning_rate": 6.103209318679469e-09, "loss": 0.3923742175102234, "step": 8387 }, { "epoch": 1.9337175792507204, "grad_norm": 1.7006901623122679, "learning_rate": 6.061233289864632e-09, "loss": 0.4399063289165497, "step": 8388 }, { "epoch": 1.9339481268011527, "grad_norm": 1.4843303104270151, "learning_rate": 6.0194016700129134e-09, "loss": 0.4042898416519165, "step": 8389 }, { "epoch": 1.934178674351585, "grad_norm": 1.5429865198164772, "learning_rate": 5.9777144652018994e-09, "loss": 0.4660540521144867, "step": 8390 }, { "epoch": 1.9344092219020173, "grad_norm": 1.6674342548655874, "learning_rate": 5.9361716814883e-09, "loss": 0.5535542964935303, "step": 8391 }, { "epoch": 1.9346397694524495, "grad_norm": 1.703428306689839, "learning_rate": 5.894773324907953e-09, "loss": 0.4237617254257202, "step": 8392 }, { "epoch": 1.9348703170028818, "grad_norm": 1.8224589830441662, "learning_rate": 5.853519401475604e-09, "loss": 0.5574471950531006, "step": 8393 }, { "epoch": 1.935100864553314, "grad_norm": 2.049918989905908, "learning_rate": 5.812409917185012e-09, "loss": 0.48153114318847656, "step": 8394 }, { "epoch": 1.9353314121037464, "grad_norm": 1.6430356369587549, "learning_rate": 5.771444878008846e-09, "loss": 0.523391604423523, "step": 8395 }, { "epoch": 1.9355619596541787, "grad_norm": 1.5802726481699332, "learning_rate": 5.730624289899122e-09, "loss": 0.4961997866630554, "step": 8396 }, { "epoch": 1.935792507204611, "grad_norm": 1.5755062335679042, "learning_rate": 5.6899481587863174e-09, "loss": 0.4530646800994873, "step": 8397 }, { "epoch": 1.9360230547550432, "grad_norm": 1.799741209476833, "learning_rate": 5.649416490580594e-09, "loss": 0.4895196855068207, "step": 8398 }, { "epoch": 1.9362536023054755, "grad_norm": 1.3496435901328925, "learning_rate": 5.609029291170575e-09, "loss": 0.4179130792617798, "step": 8399 }, { "epoch": 1.9364841498559078, "grad_norm": 1.5676225979121055, "learning_rate": 5.568786566424122e-09, "loss": 0.5279220342636108, "step": 8400 }, { "epoch": 1.93671469740634, "grad_norm": 1.7613704417043121, "learning_rate": 5.528688322188224e-09, "loss": 0.5009425282478333, "step": 8401 }, { "epoch": 1.9369452449567723, "grad_norm": 1.964452384404303, "learning_rate": 5.488734564288555e-09, "loss": 0.4320024847984314, "step": 8402 }, { "epoch": 1.9371757925072046, "grad_norm": 2.2196156524988764, "learning_rate": 5.448925298530027e-09, "loss": 0.6183265447616577, "step": 8403 }, { "epoch": 1.937406340057637, "grad_norm": 1.6120974046128558, "learning_rate": 5.40926053069668e-09, "loss": 0.5422444343566895, "step": 8404 }, { "epoch": 1.9376368876080692, "grad_norm": 1.7196795497330961, "learning_rate": 5.369740266551126e-09, "loss": 0.4645472466945648, "step": 8405 }, { "epoch": 1.9378674351585015, "grad_norm": 1.4249047377418558, "learning_rate": 5.330364511835439e-09, "loss": 0.3970335125923157, "step": 8406 }, { "epoch": 1.9380979827089337, "grad_norm": 1.8953829064382313, "learning_rate": 5.291133272270376e-09, "loss": 0.5468182563781738, "step": 8407 }, { "epoch": 1.938328530259366, "grad_norm": 1.865719573844204, "learning_rate": 5.252046553556044e-09, "loss": 0.5717053413391113, "step": 8408 }, { "epoch": 1.9385590778097983, "grad_norm": 1.4715205331924197, "learning_rate": 5.213104361371012e-09, "loss": 0.5246316194534302, "step": 8409 }, { "epoch": 1.9387896253602306, "grad_norm": 1.7486117905905436, "learning_rate": 5.174306701373421e-09, "loss": 0.4892142415046692, "step": 8410 }, { "epoch": 1.9390201729106629, "grad_norm": 1.7044045562562955, "learning_rate": 5.135653579200094e-09, "loss": 0.46718069911003113, "step": 8411 }, { "epoch": 1.9392507204610951, "grad_norm": 1.678083637284962, "learning_rate": 5.097145000466985e-09, "loss": 0.48289304971694946, "step": 8412 }, { "epoch": 1.9394812680115274, "grad_norm": 1.589487272584262, "learning_rate": 5.05878097076895e-09, "loss": 0.4352980852127075, "step": 8413 }, { "epoch": 1.9397118155619597, "grad_norm": 1.5324614271383616, "learning_rate": 5.020561495679865e-09, "loss": 0.4404779076576233, "step": 8414 }, { "epoch": 1.939942363112392, "grad_norm": 1.5122574500468116, "learning_rate": 4.9824865807526205e-09, "loss": 0.5069785714149475, "step": 8415 }, { "epoch": 1.9401729106628243, "grad_norm": 1.7183195331173957, "learning_rate": 4.944556231519015e-09, "loss": 0.5377410650253296, "step": 8416 }, { "epoch": 1.9404034582132565, "grad_norm": 1.6585204896545156, "learning_rate": 4.9067704534901944e-09, "loss": 0.44542694091796875, "step": 8417 }, { "epoch": 1.9406340057636888, "grad_norm": 1.8607811753782286, "learning_rate": 4.869129252155768e-09, "loss": 0.3760349750518799, "step": 8418 }, { "epoch": 1.940864553314121, "grad_norm": 1.8090313920756074, "learning_rate": 4.831632632984695e-09, "loss": 0.4890024960041046, "step": 8419 }, { "epoch": 1.9410951008645534, "grad_norm": 1.5849870242815058, "learning_rate": 4.794280601424949e-09, "loss": 0.4114872217178345, "step": 8420 }, { "epoch": 1.9413256484149857, "grad_norm": 1.650126186884988, "learning_rate": 4.757073162903302e-09, "loss": 0.4893375039100647, "step": 8421 }, { "epoch": 1.941556195965418, "grad_norm": 1.6807297976206002, "learning_rate": 4.7200103228255405e-09, "loss": 0.4684467911720276, "step": 8422 }, { "epoch": 1.9417867435158502, "grad_norm": 1.67464763167606, "learning_rate": 4.68309208657669e-09, "loss": 0.44905227422714233, "step": 8423 }, { "epoch": 1.9420172910662825, "grad_norm": 1.7633620797202647, "learning_rate": 4.646318459520349e-09, "loss": 0.41259336471557617, "step": 8424 }, { "epoch": 1.9422478386167148, "grad_norm": 1.5507547402556556, "learning_rate": 4.6096894469996876e-09, "loss": 0.43815112113952637, "step": 8425 }, { "epoch": 1.942478386167147, "grad_norm": 1.5778990502154555, "learning_rate": 4.573205054336115e-09, "loss": 0.45738598704338074, "step": 8426 }, { "epoch": 1.9427089337175794, "grad_norm": 1.5588092260341486, "learning_rate": 4.536865286830727e-09, "loss": 0.3894132971763611, "step": 8427 }, { "epoch": 1.9429394812680116, "grad_norm": 1.623138474885205, "learning_rate": 4.5006701497631864e-09, "loss": 0.5066829919815063, "step": 8428 }, { "epoch": 1.943170028818444, "grad_norm": 2.1057192715369077, "learning_rate": 4.464619648392287e-09, "loss": 0.5245810747146606, "step": 8429 }, { "epoch": 1.9434005763688762, "grad_norm": 1.450252578239379, "learning_rate": 4.42871378795584e-09, "loss": 0.4634879231452942, "step": 8430 }, { "epoch": 1.9436311239193085, "grad_norm": 1.6490696686605655, "learning_rate": 4.3929525736705605e-09, "loss": 0.5357192158699036, "step": 8431 }, { "epoch": 1.9438616714697408, "grad_norm": 1.6299846504660511, "learning_rate": 4.357336010732071e-09, "loss": 0.4591226577758789, "step": 8432 }, { "epoch": 1.944092219020173, "grad_norm": 1.5898360627427908, "learning_rate": 4.321864104315343e-09, "loss": 0.42836296558380127, "step": 8433 }, { "epoch": 1.9443227665706053, "grad_norm": 1.6213619512908282, "learning_rate": 4.286536859573919e-09, "loss": 0.5093647241592407, "step": 8434 }, { "epoch": 1.9445533141210376, "grad_norm": 1.9084911394042954, "learning_rate": 4.251354281640473e-09, "loss": 0.40465253591537476, "step": 8435 }, { "epoch": 1.9447838616714699, "grad_norm": 1.7729160223942424, "learning_rate": 4.2163163756265825e-09, "loss": 0.46636664867401123, "step": 8436 }, { "epoch": 1.9450144092219022, "grad_norm": 1.9152181663086612, "learning_rate": 4.1814231466230645e-09, "loss": 0.49786052107810974, "step": 8437 }, { "epoch": 1.9452449567723344, "grad_norm": 1.6016583543312068, "learning_rate": 4.146674599699418e-09, "loss": 0.48284637928009033, "step": 8438 }, { "epoch": 1.9454755043227667, "grad_norm": 1.3525514753444163, "learning_rate": 4.112070739904272e-09, "loss": 0.4034563899040222, "step": 8439 }, { "epoch": 1.945706051873199, "grad_norm": 1.8628888292595736, "learning_rate": 4.077611572265382e-09, "loss": 0.5521979928016663, "step": 8440 }, { "epoch": 1.9459365994236313, "grad_norm": 1.6868967423968648, "learning_rate": 4.043297101789078e-09, "loss": 0.4537632465362549, "step": 8441 }, { "epoch": 1.9461671469740636, "grad_norm": 1.6850998569290199, "learning_rate": 4.009127333460926e-09, "loss": 0.5159620046615601, "step": 8442 }, { "epoch": 1.9463976945244956, "grad_norm": 1.4228816698157278, "learning_rate": 3.975102272245512e-09, "loss": 0.46216505765914917, "step": 8443 }, { "epoch": 1.946628242074928, "grad_norm": 1.929330228609917, "learning_rate": 3.941221923086324e-09, "loss": 0.5094718933105469, "step": 8444 }, { "epoch": 1.9468587896253602, "grad_norm": 1.733746574616843, "learning_rate": 3.907486290905759e-09, "loss": 0.4824807941913605, "step": 8445 }, { "epoch": 1.9470893371757925, "grad_norm": 1.4644111723489823, "learning_rate": 3.873895380605341e-09, "loss": 0.4324292838573456, "step": 8446 }, { "epoch": 1.9473198847262247, "grad_norm": 1.8148293632262469, "learning_rate": 3.8404491970653874e-09, "loss": 0.5109740495681763, "step": 8447 }, { "epoch": 1.947550432276657, "grad_norm": 1.915278078833533, "learning_rate": 3.8071477451453445e-09, "loss": 0.47580888867378235, "step": 8448 }, { "epoch": 1.9477809798270893, "grad_norm": 1.582715092663918, "learning_rate": 3.773991029683565e-09, "loss": 0.4678313136100769, "step": 8449 }, { "epoch": 1.9480115273775216, "grad_norm": 1.6330856951875405, "learning_rate": 3.740979055497306e-09, "loss": 0.46340861916542053, "step": 8450 }, { "epoch": 1.9482420749279539, "grad_norm": 1.387966844064718, "learning_rate": 3.7081118273829536e-09, "loss": 0.43767350912094116, "step": 8451 }, { "epoch": 1.9484726224783862, "grad_norm": 1.7100462700673766, "learning_rate": 3.6753893501156873e-09, "loss": 0.5428752303123474, "step": 8452 }, { "epoch": 1.9487031700288184, "grad_norm": 1.47058628039416, "learning_rate": 3.6428116284498157e-09, "loss": 0.380368709564209, "step": 8453 }, { "epoch": 1.9489337175792507, "grad_norm": 1.6255373517486364, "learning_rate": 3.610378667118552e-09, "loss": 0.4518704414367676, "step": 8454 }, { "epoch": 1.949164265129683, "grad_norm": 1.9772184397093173, "learning_rate": 3.5780904708340167e-09, "loss": 0.5296592712402344, "step": 8455 }, { "epoch": 1.9493948126801153, "grad_norm": 1.6560836592749282, "learning_rate": 3.545947044287345e-09, "loss": 0.4429657459259033, "step": 8456 }, { "epoch": 1.9496253602305476, "grad_norm": 1.5997951231441618, "learning_rate": 3.5139483921486913e-09, "loss": 0.4942644238471985, "step": 8457 }, { "epoch": 1.9498559077809798, "grad_norm": 1.514213668293002, "learning_rate": 3.4820945190671138e-09, "loss": 0.4621245861053467, "step": 8458 }, { "epoch": 1.9500864553314121, "grad_norm": 1.663238270461513, "learning_rate": 3.450385429670577e-09, "loss": 0.3954099416732788, "step": 8459 }, { "epoch": 1.9503170028818444, "grad_norm": 1.6486452582394135, "learning_rate": 3.418821128566174e-09, "loss": 0.48606687784194946, "step": 8460 }, { "epoch": 1.9505475504322767, "grad_norm": 1.5621828863712628, "learning_rate": 3.3874016203397916e-09, "loss": 0.4746624827384949, "step": 8461 }, { "epoch": 1.950778097982709, "grad_norm": 1.7626425316421102, "learning_rate": 3.356126909556445e-09, "loss": 0.473477840423584, "step": 8462 }, { "epoch": 1.9510086455331412, "grad_norm": 1.718222636076735, "learning_rate": 3.3249970007599435e-09, "loss": 0.47697845101356506, "step": 8463 }, { "epoch": 1.9512391930835735, "grad_norm": 1.6677094487521866, "learning_rate": 3.294011898473115e-09, "loss": 0.4818275570869446, "step": 8464 }, { "epoch": 1.9514697406340058, "grad_norm": 1.7732869220620682, "learning_rate": 3.263171607197912e-09, "loss": 0.4384726285934448, "step": 8465 }, { "epoch": 1.951700288184438, "grad_norm": 1.6730968086224116, "learning_rate": 3.232476131415085e-09, "loss": 0.5690720081329346, "step": 8466 }, { "epoch": 1.9519308357348704, "grad_norm": 1.5356559279376012, "learning_rate": 3.2019254755841774e-09, "loss": 0.4018305540084839, "step": 8467 }, { "epoch": 1.9521613832853026, "grad_norm": 1.3752378081542544, "learning_rate": 3.171519644144083e-09, "loss": 0.4394480586051941, "step": 8468 }, { "epoch": 1.952391930835735, "grad_norm": 1.784880778617597, "learning_rate": 3.1412586415123787e-09, "loss": 0.4497807025909424, "step": 8469 }, { "epoch": 1.952622478386167, "grad_norm": 1.77748620875406, "learning_rate": 3.1111424720856595e-09, "loss": 0.4412338137626648, "step": 8470 }, { "epoch": 1.9528530259365993, "grad_norm": 1.6059566008206951, "learning_rate": 3.081171140239536e-09, "loss": 0.44863519072532654, "step": 8471 }, { "epoch": 1.9530835734870315, "grad_norm": 1.6456475980995775, "learning_rate": 3.0513446503285245e-09, "loss": 0.5177716016769409, "step": 8472 }, { "epoch": 1.9533141210374638, "grad_norm": 1.6665867061870976, "learning_rate": 3.021663006686048e-09, "loss": 0.5143194198608398, "step": 8473 }, { "epoch": 1.953544668587896, "grad_norm": 1.9552097979581144, "learning_rate": 2.9921262136246574e-09, "loss": 0.5202943086624146, "step": 8474 }, { "epoch": 1.9537752161383284, "grad_norm": 1.5949249311044142, "learning_rate": 2.9627342754355853e-09, "loss": 0.3946771025657654, "step": 8475 }, { "epoch": 1.9540057636887607, "grad_norm": 1.3562551955817035, "learning_rate": 2.933487196389195e-09, "loss": 0.3503817021846771, "step": 8476 }, { "epoch": 1.954236311239193, "grad_norm": 1.7470917367547707, "learning_rate": 2.9043849807349753e-09, "loss": 0.41341015696525574, "step": 8477 }, { "epoch": 1.9544668587896252, "grad_norm": 1.704213335494292, "learning_rate": 2.8754276327009886e-09, "loss": 0.4515586495399475, "step": 8478 }, { "epoch": 1.9546974063400575, "grad_norm": 2.058172843593399, "learning_rate": 2.8466151564944253e-09, "loss": 0.49089083075523376, "step": 8479 }, { "epoch": 1.9549279538904898, "grad_norm": 1.4448060876815116, "learning_rate": 2.817947556301492e-09, "loss": 0.4171237349510193, "step": 8480 }, { "epoch": 1.955158501440922, "grad_norm": 1.6605698756015046, "learning_rate": 2.789424836287413e-09, "loss": 0.4951333999633789, "step": 8481 }, { "epoch": 1.9553890489913544, "grad_norm": 1.567040182627561, "learning_rate": 2.761047000595984e-09, "loss": 0.37939924001693726, "step": 8482 }, { "epoch": 1.9556195965417866, "grad_norm": 1.964544419289842, "learning_rate": 2.732814053350463e-09, "loss": 0.433444619178772, "step": 8483 }, { "epoch": 1.955850144092219, "grad_norm": 1.7955734026982275, "learning_rate": 2.7047259986526795e-09, "loss": 0.5602415800094604, "step": 8484 }, { "epoch": 1.9560806916426512, "grad_norm": 1.6268724855988212, "learning_rate": 2.67678284058348e-09, "loss": 0.46474429965019226, "step": 8485 }, { "epoch": 1.9563112391930835, "grad_norm": 1.6444413974801657, "learning_rate": 2.648984583202951e-09, "loss": 0.4493546783924103, "step": 8486 }, { "epoch": 1.9565417867435158, "grad_norm": 1.6869718104061497, "learning_rate": 2.6213312305495283e-09, "loss": 0.4560815691947937, "step": 8487 }, { "epoch": 1.956772334293948, "grad_norm": 1.6786523975409646, "learning_rate": 2.593822786641331e-09, "loss": 0.4888133406639099, "step": 8488 }, { "epoch": 1.9570028818443803, "grad_norm": 1.6374287407121897, "learning_rate": 2.5664592554747176e-09, "loss": 0.4761776328086853, "step": 8489 }, { "epoch": 1.9572334293948126, "grad_norm": 1.5386878575559964, "learning_rate": 2.5392406410256196e-09, "loss": 0.48560792207717896, "step": 8490 }, { "epoch": 1.9574639769452449, "grad_norm": 1.770586958344679, "learning_rate": 2.5121669472484287e-09, "loss": 0.5601654052734375, "step": 8491 }, { "epoch": 1.9576945244956772, "grad_norm": 1.4713351138364472, "learning_rate": 2.485238178076665e-09, "loss": 0.3691544532775879, "step": 8492 }, { "epoch": 1.9579250720461094, "grad_norm": 1.4167042718550695, "learning_rate": 2.458454337422866e-09, "loss": 0.39126554131507874, "step": 8493 }, { "epoch": 1.9581556195965417, "grad_norm": 1.8048695509109922, "learning_rate": 2.431815429178474e-09, "loss": 0.5396989583969116, "step": 8494 }, { "epoch": 1.958386167146974, "grad_norm": 1.3843806172146762, "learning_rate": 2.405321457213727e-09, "loss": 0.4269161820411682, "step": 8495 }, { "epoch": 1.9586167146974063, "grad_norm": 1.6940755991478917, "learning_rate": 2.3789724253781006e-09, "loss": 0.5214752554893494, "step": 8496 }, { "epoch": 1.9588472622478386, "grad_norm": 1.713092802772806, "learning_rate": 2.352768337499755e-09, "loss": 0.4246710538864136, "step": 8497 }, { "epoch": 1.9590778097982708, "grad_norm": 1.5890787020160493, "learning_rate": 2.3267091973857568e-09, "loss": 0.4201776683330536, "step": 8498 }, { "epoch": 1.9593083573487031, "grad_norm": 1.8593976573086313, "learning_rate": 2.3007950088222984e-09, "loss": 0.4610064625740051, "step": 8499 }, { "epoch": 1.9595389048991354, "grad_norm": 1.6805721571635974, "learning_rate": 2.2750257755745907e-09, "loss": 0.5150582194328308, "step": 8500 } ], "logging_steps": 1, "max_steps": 8676, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2964802977792000.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }