diff --git "a/checkpoint-8676/trainer_state.json" "b/checkpoint-8676/trainer_state.json" new file mode 100644--- /dev/null +++ "b/checkpoint-8676/trainer_state.json" @@ -0,0 +1,60766 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.0, + "eval_steps": 500, + "global_step": 8676, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.00023054755043227666, + "grad_norm": 0.50523491192486, + "learning_rate": 0.0, + "loss": 1.3478702306747437, + "step": 1 + }, + { + "epoch": 0.0004610951008645533, + "grad_norm": 0.48124949879069834, + "learning_rate": 4.6082949308755755e-09, + "loss": 1.3662631511688232, + "step": 2 + }, + { + "epoch": 0.00069164265129683, + "grad_norm": 0.4973440342857191, + "learning_rate": 9.216589861751151e-09, + "loss": 1.3309710025787354, + "step": 3 + }, + { + "epoch": 0.0009221902017291067, + "grad_norm": 0.5353819800819951, + "learning_rate": 1.3824884792626728e-08, + "loss": 1.4097447395324707, + "step": 4 + }, + { + "epoch": 0.0011527377521613833, + "grad_norm": 0.5883475979029319, + "learning_rate": 1.8433179723502302e-08, + "loss": 1.519249439239502, + "step": 5 + }, + { + "epoch": 0.00138328530259366, + "grad_norm": 0.46695865322112806, + "learning_rate": 2.304147465437788e-08, + "loss": 1.2603031396865845, + "step": 6 + }, + { + "epoch": 0.0016138328530259365, + "grad_norm": 0.439129921669028, + "learning_rate": 2.7649769585253456e-08, + "loss": 1.1772313117980957, + "step": 7 + }, + { + "epoch": 0.0018443804034582133, + "grad_norm": 0.5319224558631903, + "learning_rate": 3.225806451612903e-08, + "loss": 1.4954156875610352, + "step": 8 + }, + { + "epoch": 0.00207492795389049, + "grad_norm": 0.4236962197810375, + "learning_rate": 3.6866359447004604e-08, + "loss": 1.261225938796997, + "step": 9 + }, + { + "epoch": 0.0023054755043227667, + "grad_norm": 0.4722435329827196, + "learning_rate": 4.1474654377880186e-08, + "loss": 1.3137118816375732, + "step": 10 + }, + { + "epoch": 0.002536023054755043, + "grad_norm": 0.5227481950657756, + "learning_rate": 4.608294930875576e-08, + "loss": 1.3614041805267334, + "step": 11 + }, + { + "epoch": 0.00276657060518732, + "grad_norm": 0.501227735935382, + "learning_rate": 5.069124423963134e-08, + "loss": 1.2697081565856934, + "step": 12 + }, + { + "epoch": 0.0029971181556195966, + "grad_norm": 0.4604754826614043, + "learning_rate": 5.529953917050691e-08, + "loss": 1.2227230072021484, + "step": 13 + }, + { + "epoch": 0.003227665706051873, + "grad_norm": 0.5117274741098783, + "learning_rate": 5.990783410138249e-08, + "loss": 1.4593628644943237, + "step": 14 + }, + { + "epoch": 0.00345821325648415, + "grad_norm": 0.47884130963603894, + "learning_rate": 6.451612903225806e-08, + "loss": 1.3960611820220947, + "step": 15 + }, + { + "epoch": 0.0036887608069164266, + "grad_norm": 0.4832693331921684, + "learning_rate": 6.912442396313364e-08, + "loss": 1.432786464691162, + "step": 16 + }, + { + "epoch": 0.003919308357348703, + "grad_norm": 0.530883463141207, + "learning_rate": 7.373271889400921e-08, + "loss": 1.44321870803833, + "step": 17 + }, + { + "epoch": 0.00414985590778098, + "grad_norm": 0.5073509519871556, + "learning_rate": 7.834101382488478e-08, + "loss": 1.4029710292816162, + "step": 18 + }, + { + "epoch": 0.004380403458213256, + "grad_norm": 0.47114331013100513, + "learning_rate": 8.294930875576037e-08, + "loss": 1.2752254009246826, + "step": 19 + }, + { + "epoch": 0.004610951008645533, + "grad_norm": 0.5134696688961232, + "learning_rate": 8.755760368663594e-08, + "loss": 1.4107906818389893, + "step": 20 + }, + { + "epoch": 0.00484149855907781, + "grad_norm": 0.5192976074458424, + "learning_rate": 9.216589861751152e-08, + "loss": 1.473652720451355, + "step": 21 + }, + { + "epoch": 0.005072046109510086, + "grad_norm": 0.46860465779836935, + "learning_rate": 9.677419354838709e-08, + "loss": 1.3515217304229736, + "step": 22 + }, + { + "epoch": 0.005302593659942363, + "grad_norm": 0.4232064440585856, + "learning_rate": 1.0138248847926267e-07, + "loss": 1.2591620683670044, + "step": 23 + }, + { + "epoch": 0.00553314121037464, + "grad_norm": 0.47756486586420427, + "learning_rate": 1.0599078341013824e-07, + "loss": 1.4121818542480469, + "step": 24 + }, + { + "epoch": 0.005763688760806916, + "grad_norm": 0.5080537563386874, + "learning_rate": 1.1059907834101383e-07, + "loss": 1.4280284643173218, + "step": 25 + }, + { + "epoch": 0.005994236311239193, + "grad_norm": 0.49501252357111303, + "learning_rate": 1.152073732718894e-07, + "loss": 1.24143385887146, + "step": 26 + }, + { + "epoch": 0.00622478386167147, + "grad_norm": 0.4986915218495848, + "learning_rate": 1.1981566820276498e-07, + "loss": 1.4424221515655518, + "step": 27 + }, + { + "epoch": 0.006455331412103746, + "grad_norm": 0.5271724916728263, + "learning_rate": 1.2442396313364054e-07, + "loss": 1.4094964265823364, + "step": 28 + }, + { + "epoch": 0.006685878962536023, + "grad_norm": 0.48753764373588065, + "learning_rate": 1.2903225806451611e-07, + "loss": 1.301988124847412, + "step": 29 + }, + { + "epoch": 0.0069164265129683, + "grad_norm": 0.49597522677188116, + "learning_rate": 1.336405529953917e-07, + "loss": 1.342317819595337, + "step": 30 + }, + { + "epoch": 0.007146974063400576, + "grad_norm": 0.4662909329278552, + "learning_rate": 1.3824884792626728e-07, + "loss": 1.374321699142456, + "step": 31 + }, + { + "epoch": 0.007377521613832853, + "grad_norm": 0.48003195379069746, + "learning_rate": 1.4285714285714285e-07, + "loss": 1.2922568321228027, + "step": 32 + }, + { + "epoch": 0.00760806916426513, + "grad_norm": 0.4791954611138749, + "learning_rate": 1.4746543778801842e-07, + "loss": 1.3206393718719482, + "step": 33 + }, + { + "epoch": 0.007838616714697407, + "grad_norm": 0.46770282190111545, + "learning_rate": 1.52073732718894e-07, + "loss": 1.3069640398025513, + "step": 34 + }, + { + "epoch": 0.008069164265129682, + "grad_norm": 0.49996972274526347, + "learning_rate": 1.5668202764976955e-07, + "loss": 1.4134069681167603, + "step": 35 + }, + { + "epoch": 0.00829971181556196, + "grad_norm": 0.5280258817640504, + "learning_rate": 1.6129032258064515e-07, + "loss": 1.3728907108306885, + "step": 36 + }, + { + "epoch": 0.008530259365994237, + "grad_norm": 0.4285974688946003, + "learning_rate": 1.6589861751152074e-07, + "loss": 1.2913165092468262, + "step": 37 + }, + { + "epoch": 0.008760806916426512, + "grad_norm": 0.42264550533680856, + "learning_rate": 1.705069124423963e-07, + "loss": 1.2775439023971558, + "step": 38 + }, + { + "epoch": 0.00899135446685879, + "grad_norm": 0.507820888455636, + "learning_rate": 1.7511520737327188e-07, + "loss": 1.2585256099700928, + "step": 39 + }, + { + "epoch": 0.009221902017291067, + "grad_norm": 0.5027390631956755, + "learning_rate": 1.7972350230414745e-07, + "loss": 1.4137083292007446, + "step": 40 + }, + { + "epoch": 0.009452449567723342, + "grad_norm": 0.4985154114340913, + "learning_rate": 1.8433179723502305e-07, + "loss": 1.4829634428024292, + "step": 41 + }, + { + "epoch": 0.00968299711815562, + "grad_norm": 0.45097397965172786, + "learning_rate": 1.889400921658986e-07, + "loss": 1.274835467338562, + "step": 42 + }, + { + "epoch": 0.009913544668587897, + "grad_norm": 0.499092720542228, + "learning_rate": 1.9354838709677418e-07, + "loss": 1.4171526432037354, + "step": 43 + }, + { + "epoch": 0.010144092219020172, + "grad_norm": 0.46556868786502403, + "learning_rate": 1.9815668202764975e-07, + "loss": 1.353653907775879, + "step": 44 + }, + { + "epoch": 0.01037463976945245, + "grad_norm": 0.5613880908390132, + "learning_rate": 2.0276497695852535e-07, + "loss": 1.5567824840545654, + "step": 45 + }, + { + "epoch": 0.010605187319884727, + "grad_norm": 0.5081064697106673, + "learning_rate": 2.073732718894009e-07, + "loss": 1.3403587341308594, + "step": 46 + }, + { + "epoch": 0.010835734870317002, + "grad_norm": 0.5101668560208579, + "learning_rate": 2.1198156682027649e-07, + "loss": 1.4320882558822632, + "step": 47 + }, + { + "epoch": 0.01106628242074928, + "grad_norm": 0.5330267535714837, + "learning_rate": 2.1658986175115208e-07, + "loss": 1.4313323497772217, + "step": 48 + }, + { + "epoch": 0.011296829971181557, + "grad_norm": 0.553336579270586, + "learning_rate": 2.2119815668202765e-07, + "loss": 1.3737103939056396, + "step": 49 + }, + { + "epoch": 0.011527377521613832, + "grad_norm": 0.5249313819409881, + "learning_rate": 2.2580645161290322e-07, + "loss": 1.392624855041504, + "step": 50 + }, + { + "epoch": 0.01175792507204611, + "grad_norm": 0.501704521925026, + "learning_rate": 2.304147465437788e-07, + "loss": 1.518836259841919, + "step": 51 + }, + { + "epoch": 0.011988472622478387, + "grad_norm": 0.494303112571743, + "learning_rate": 2.3502304147465438e-07, + "loss": 1.353430986404419, + "step": 52 + }, + { + "epoch": 0.012219020172910662, + "grad_norm": 0.5529237573464274, + "learning_rate": 2.3963133640552995e-07, + "loss": 1.4298975467681885, + "step": 53 + }, + { + "epoch": 0.01244956772334294, + "grad_norm": 0.608889837254303, + "learning_rate": 2.442396313364055e-07, + "loss": 1.5087032318115234, + "step": 54 + }, + { + "epoch": 0.012680115273775217, + "grad_norm": 0.5072050336667386, + "learning_rate": 2.488479262672811e-07, + "loss": 1.2958388328552246, + "step": 55 + }, + { + "epoch": 0.012910662824207492, + "grad_norm": 0.49535361810797784, + "learning_rate": 2.534562211981567e-07, + "loss": 1.3586616516113281, + "step": 56 + }, + { + "epoch": 0.01314121037463977, + "grad_norm": 0.5061073729366113, + "learning_rate": 2.5806451612903223e-07, + "loss": 1.4593045711517334, + "step": 57 + }, + { + "epoch": 0.013371757925072046, + "grad_norm": 0.5674593981664924, + "learning_rate": 2.6267281105990777e-07, + "loss": 1.4921320676803589, + "step": 58 + }, + { + "epoch": 0.013602305475504322, + "grad_norm": 0.46119048814961744, + "learning_rate": 2.672811059907834e-07, + "loss": 1.2952760457992554, + "step": 59 + }, + { + "epoch": 0.0138328530259366, + "grad_norm": 0.5474941657423043, + "learning_rate": 2.7188940092165896e-07, + "loss": 1.4734549522399902, + "step": 60 + }, + { + "epoch": 0.014063400576368876, + "grad_norm": 0.5926884561781834, + "learning_rate": 2.7649769585253456e-07, + "loss": 1.4697580337524414, + "step": 61 + }, + { + "epoch": 0.014293948126801152, + "grad_norm": 0.47617437574101296, + "learning_rate": 2.8110599078341015e-07, + "loss": 1.3642436265945435, + "step": 62 + }, + { + "epoch": 0.01452449567723343, + "grad_norm": 0.4266689877497005, + "learning_rate": 2.857142857142857e-07, + "loss": 1.1863957643508911, + "step": 63 + }, + { + "epoch": 0.014755043227665706, + "grad_norm": 0.4972126101611472, + "learning_rate": 2.903225806451613e-07, + "loss": 1.4668104648590088, + "step": 64 + }, + { + "epoch": 0.014985590778097982, + "grad_norm": 0.4920879694799663, + "learning_rate": 2.9493087557603683e-07, + "loss": 1.2462284564971924, + "step": 65 + }, + { + "epoch": 0.01521613832853026, + "grad_norm": 0.5870123543834398, + "learning_rate": 2.9953917050691243e-07, + "loss": 1.459596872329712, + "step": 66 + }, + { + "epoch": 0.015446685878962536, + "grad_norm": 0.4875968214693539, + "learning_rate": 3.04147465437788e-07, + "loss": 1.348015546798706, + "step": 67 + }, + { + "epoch": 0.015677233429394814, + "grad_norm": 0.5086081285359251, + "learning_rate": 3.0875576036866356e-07, + "loss": 1.2952334880828857, + "step": 68 + }, + { + "epoch": 0.01590778097982709, + "grad_norm": 0.5392907343811632, + "learning_rate": 3.133640552995391e-07, + "loss": 1.4372143745422363, + "step": 69 + }, + { + "epoch": 0.016138328530259365, + "grad_norm": 0.4524557334907612, + "learning_rate": 3.1797235023041476e-07, + "loss": 1.2725008726119995, + "step": 70 + }, + { + "epoch": 0.016368876080691642, + "grad_norm": 0.4952177804507778, + "learning_rate": 3.225806451612903e-07, + "loss": 1.3648872375488281, + "step": 71 + }, + { + "epoch": 0.01659942363112392, + "grad_norm": 0.6124636807685551, + "learning_rate": 3.271889400921659e-07, + "loss": 1.3494001626968384, + "step": 72 + }, + { + "epoch": 0.016829971181556196, + "grad_norm": 0.4692285797456175, + "learning_rate": 3.317972350230415e-07, + "loss": 1.1907480955123901, + "step": 73 + }, + { + "epoch": 0.017060518731988474, + "grad_norm": 0.5228163344563065, + "learning_rate": 3.3640552995391703e-07, + "loss": 1.4449963569641113, + "step": 74 + }, + { + "epoch": 0.01729106628242075, + "grad_norm": 0.4749685021463316, + "learning_rate": 3.410138248847926e-07, + "loss": 1.277104377746582, + "step": 75 + }, + { + "epoch": 0.017521613832853025, + "grad_norm": 0.5209033114195116, + "learning_rate": 3.4562211981566817e-07, + "loss": 1.3609862327575684, + "step": 76 + }, + { + "epoch": 0.017752161383285302, + "grad_norm": 0.5369087777206046, + "learning_rate": 3.5023041474654376e-07, + "loss": 1.3189136981964111, + "step": 77 + }, + { + "epoch": 0.01798270893371758, + "grad_norm": 0.5465154788756614, + "learning_rate": 3.5483870967741936e-07, + "loss": 1.4422173500061035, + "step": 78 + }, + { + "epoch": 0.018213256484149856, + "grad_norm": 0.5634760024465724, + "learning_rate": 3.594470046082949e-07, + "loss": 1.44877290725708, + "step": 79 + }, + { + "epoch": 0.018443804034582133, + "grad_norm": 0.5119851144094427, + "learning_rate": 3.6405529953917044e-07, + "loss": 1.3519830703735352, + "step": 80 + }, + { + "epoch": 0.01867435158501441, + "grad_norm": 0.4843675992150354, + "learning_rate": 3.686635944700461e-07, + "loss": 1.4015991687774658, + "step": 81 + }, + { + "epoch": 0.018904899135446684, + "grad_norm": 0.5374856510016354, + "learning_rate": 3.7327188940092163e-07, + "loss": 1.4938485622406006, + "step": 82 + }, + { + "epoch": 0.01913544668587896, + "grad_norm": 0.5341991720772048, + "learning_rate": 3.778801843317972e-07, + "loss": 1.5170536041259766, + "step": 83 + }, + { + "epoch": 0.01936599423631124, + "grad_norm": 0.46764341815409355, + "learning_rate": 3.824884792626728e-07, + "loss": 1.4166152477264404, + "step": 84 + }, + { + "epoch": 0.019596541786743516, + "grad_norm": 0.4730806672754701, + "learning_rate": 3.8709677419354837e-07, + "loss": 1.2796568870544434, + "step": 85 + }, + { + "epoch": 0.019827089337175793, + "grad_norm": 0.537778002692222, + "learning_rate": 3.9170506912442396e-07, + "loss": 1.580716848373413, + "step": 86 + }, + { + "epoch": 0.02005763688760807, + "grad_norm": 0.5915866249189787, + "learning_rate": 3.963133640552995e-07, + "loss": 1.5387516021728516, + "step": 87 + }, + { + "epoch": 0.020288184438040344, + "grad_norm": 0.4420233078332847, + "learning_rate": 4.009216589861751e-07, + "loss": 1.2988545894622803, + "step": 88 + }, + { + "epoch": 0.02051873198847262, + "grad_norm": 0.5173220217871495, + "learning_rate": 4.055299539170507e-07, + "loss": 1.3798308372497559, + "step": 89 + }, + { + "epoch": 0.0207492795389049, + "grad_norm": 0.5168573395560399, + "learning_rate": 4.1013824884792624e-07, + "loss": 1.432153582572937, + "step": 90 + }, + { + "epoch": 0.020979827089337176, + "grad_norm": 0.5105535607807559, + "learning_rate": 4.147465437788018e-07, + "loss": 1.3643224239349365, + "step": 91 + }, + { + "epoch": 0.021210374639769453, + "grad_norm": 0.5225018509714875, + "learning_rate": 4.1935483870967743e-07, + "loss": 1.3699355125427246, + "step": 92 + }, + { + "epoch": 0.02144092219020173, + "grad_norm": 0.6310663899937882, + "learning_rate": 4.2396313364055297e-07, + "loss": 1.5346300601959229, + "step": 93 + }, + { + "epoch": 0.021671469740634004, + "grad_norm": 0.5162418549049712, + "learning_rate": 4.285714285714285e-07, + "loss": 1.378378987312317, + "step": 94 + }, + { + "epoch": 0.02190201729106628, + "grad_norm": 0.5009778072654538, + "learning_rate": 4.3317972350230416e-07, + "loss": 1.2565600872039795, + "step": 95 + }, + { + "epoch": 0.02213256484149856, + "grad_norm": 0.47131967484595777, + "learning_rate": 4.377880184331797e-07, + "loss": 1.1516107320785522, + "step": 96 + }, + { + "epoch": 0.022363112391930836, + "grad_norm": 0.5075938685585429, + "learning_rate": 4.423963133640553e-07, + "loss": 1.3053100109100342, + "step": 97 + }, + { + "epoch": 0.022593659942363113, + "grad_norm": 0.6224731685867901, + "learning_rate": 4.4700460829493084e-07, + "loss": 1.465439796447754, + "step": 98 + }, + { + "epoch": 0.02282420749279539, + "grad_norm": 0.5193994738284144, + "learning_rate": 4.5161290322580644e-07, + "loss": 1.3480231761932373, + "step": 99 + }, + { + "epoch": 0.023054755043227664, + "grad_norm": 0.5029697791885893, + "learning_rate": 4.5622119815668203e-07, + "loss": 1.3548729419708252, + "step": 100 + }, + { + "epoch": 0.02328530259365994, + "grad_norm": 0.4821335123926122, + "learning_rate": 4.608294930875576e-07, + "loss": 1.2621939182281494, + "step": 101 + }, + { + "epoch": 0.02351585014409222, + "grad_norm": 0.5666073310249758, + "learning_rate": 4.654377880184331e-07, + "loss": 1.4433940649032593, + "step": 102 + }, + { + "epoch": 0.023746397694524496, + "grad_norm": 0.5090266918300268, + "learning_rate": 4.7004608294930877e-07, + "loss": 1.3975611925125122, + "step": 103 + }, + { + "epoch": 0.023976945244956773, + "grad_norm": 0.463497389397919, + "learning_rate": 4.746543778801843e-07, + "loss": 1.3603153228759766, + "step": 104 + }, + { + "epoch": 0.02420749279538905, + "grad_norm": 0.5598887073047998, + "learning_rate": 4.792626728110599e-07, + "loss": 1.4595959186553955, + "step": 105 + }, + { + "epoch": 0.024438040345821324, + "grad_norm": 0.5718053046324956, + "learning_rate": 4.838709677419355e-07, + "loss": 1.2416110038757324, + "step": 106 + }, + { + "epoch": 0.0246685878962536, + "grad_norm": 0.5201886582455413, + "learning_rate": 4.88479262672811e-07, + "loss": 1.3226549625396729, + "step": 107 + }, + { + "epoch": 0.02489913544668588, + "grad_norm": 0.5355547943819389, + "learning_rate": 4.930875576036866e-07, + "loss": 1.381019115447998, + "step": 108 + }, + { + "epoch": 0.025129682997118156, + "grad_norm": 0.4743830386883031, + "learning_rate": 4.976958525345622e-07, + "loss": 1.3001632690429688, + "step": 109 + }, + { + "epoch": 0.025360230547550433, + "grad_norm": 0.5460938028869833, + "learning_rate": 5.023041474654378e-07, + "loss": 1.3989886045455933, + "step": 110 + }, + { + "epoch": 0.02559077809798271, + "grad_norm": 0.5107214873439961, + "learning_rate": 5.069124423963134e-07, + "loss": 1.3286147117614746, + "step": 111 + }, + { + "epoch": 0.025821325648414984, + "grad_norm": 0.5004549050503952, + "learning_rate": 5.11520737327189e-07, + "loss": 1.2327868938446045, + "step": 112 + }, + { + "epoch": 0.02605187319884726, + "grad_norm": 0.47534130961461846, + "learning_rate": 5.161290322580645e-07, + "loss": 1.214202880859375, + "step": 113 + }, + { + "epoch": 0.02628242074927954, + "grad_norm": 0.5527744015839821, + "learning_rate": 5.2073732718894e-07, + "loss": 1.3953044414520264, + "step": 114 + }, + { + "epoch": 0.026512968299711816, + "grad_norm": 0.7593284220674507, + "learning_rate": 5.253456221198155e-07, + "loss": 1.4174964427947998, + "step": 115 + }, + { + "epoch": 0.026743515850144093, + "grad_norm": 0.5505732654457145, + "learning_rate": 5.299539170506912e-07, + "loss": 1.4795210361480713, + "step": 116 + }, + { + "epoch": 0.02697406340057637, + "grad_norm": 0.510535557692495, + "learning_rate": 5.345622119815668e-07, + "loss": 1.3342328071594238, + "step": 117 + }, + { + "epoch": 0.027204610951008644, + "grad_norm": 0.5358682136549786, + "learning_rate": 5.391705069124423e-07, + "loss": 1.4108824729919434, + "step": 118 + }, + { + "epoch": 0.02743515850144092, + "grad_norm": 0.5428329715314709, + "learning_rate": 5.437788018433179e-07, + "loss": 1.4235775470733643, + "step": 119 + }, + { + "epoch": 0.0276657060518732, + "grad_norm": 0.5157269485071635, + "learning_rate": 5.483870967741935e-07, + "loss": 1.320220947265625, + "step": 120 + }, + { + "epoch": 0.027896253602305476, + "grad_norm": 0.5135491640261608, + "learning_rate": 5.529953917050691e-07, + "loss": 1.303511381149292, + "step": 121 + }, + { + "epoch": 0.028126801152737753, + "grad_norm": 0.5471566857353616, + "learning_rate": 5.576036866359447e-07, + "loss": 1.4310801029205322, + "step": 122 + }, + { + "epoch": 0.02835734870317003, + "grad_norm": 0.524820965238312, + "learning_rate": 5.622119815668203e-07, + "loss": 1.4052631855010986, + "step": 123 + }, + { + "epoch": 0.028587896253602304, + "grad_norm": 0.5156246818207144, + "learning_rate": 5.668202764976958e-07, + "loss": 1.3878209590911865, + "step": 124 + }, + { + "epoch": 0.02881844380403458, + "grad_norm": 0.5496340547026753, + "learning_rate": 5.714285714285714e-07, + "loss": 1.3234784603118896, + "step": 125 + }, + { + "epoch": 0.02904899135446686, + "grad_norm": 0.5570198679920451, + "learning_rate": 5.760368663594469e-07, + "loss": 1.4433726072311401, + "step": 126 + }, + { + "epoch": 0.029279538904899136, + "grad_norm": 0.5672242590978396, + "learning_rate": 5.806451612903226e-07, + "loss": 1.4903366565704346, + "step": 127 + }, + { + "epoch": 0.029510086455331413, + "grad_norm": 0.5764053931926064, + "learning_rate": 5.852534562211982e-07, + "loss": 1.4877443313598633, + "step": 128 + }, + { + "epoch": 0.02974063400576369, + "grad_norm": 0.6493956597599972, + "learning_rate": 5.898617511520737e-07, + "loss": 1.5012906789779663, + "step": 129 + }, + { + "epoch": 0.029971181556195964, + "grad_norm": 0.5636643803778404, + "learning_rate": 5.944700460829493e-07, + "loss": 1.3269531726837158, + "step": 130 + }, + { + "epoch": 0.03020172910662824, + "grad_norm": 0.48299121768794717, + "learning_rate": 5.990783410138249e-07, + "loss": 1.2456672191619873, + "step": 131 + }, + { + "epoch": 0.03043227665706052, + "grad_norm": 0.5007320148704202, + "learning_rate": 6.036866359447004e-07, + "loss": 1.2842707633972168, + "step": 132 + }, + { + "epoch": 0.030662824207492795, + "grad_norm": 0.4783035528969415, + "learning_rate": 6.08294930875576e-07, + "loss": 1.220112919807434, + "step": 133 + }, + { + "epoch": 0.030893371757925073, + "grad_norm": 0.5443919458428148, + "learning_rate": 6.129032258064516e-07, + "loss": 1.347076654434204, + "step": 134 + }, + { + "epoch": 0.03112391930835735, + "grad_norm": 0.520182785254012, + "learning_rate": 6.175115207373271e-07, + "loss": 1.34126877784729, + "step": 135 + }, + { + "epoch": 0.03135446685878963, + "grad_norm": 0.5234361924556283, + "learning_rate": 6.221198156682027e-07, + "loss": 1.3580594062805176, + "step": 136 + }, + { + "epoch": 0.0315850144092219, + "grad_norm": 0.5382720022504871, + "learning_rate": 6.267281105990782e-07, + "loss": 1.3195347785949707, + "step": 137 + }, + { + "epoch": 0.03181556195965418, + "grad_norm": 0.6644195369547549, + "learning_rate": 6.313364055299539e-07, + "loss": 1.5621061325073242, + "step": 138 + }, + { + "epoch": 0.032046109510086455, + "grad_norm": 0.5685106208934547, + "learning_rate": 6.359447004608295e-07, + "loss": 1.4042680263519287, + "step": 139 + }, + { + "epoch": 0.03227665706051873, + "grad_norm": 0.5167568131947204, + "learning_rate": 6.40552995391705e-07, + "loss": 1.2934812307357788, + "step": 140 + }, + { + "epoch": 0.03250720461095101, + "grad_norm": 0.60837930284751, + "learning_rate": 6.451612903225806e-07, + "loss": 1.4804668426513672, + "step": 141 + }, + { + "epoch": 0.032737752161383284, + "grad_norm": 0.6121742149463929, + "learning_rate": 6.497695852534562e-07, + "loss": 1.5287294387817383, + "step": 142 + }, + { + "epoch": 0.032968299711815564, + "grad_norm": 0.5672386486164406, + "learning_rate": 6.543778801843318e-07, + "loss": 1.5354558229446411, + "step": 143 + }, + { + "epoch": 0.03319884726224784, + "grad_norm": 0.5484275315763268, + "learning_rate": 6.589861751152074e-07, + "loss": 1.3472375869750977, + "step": 144 + }, + { + "epoch": 0.03342939481268011, + "grad_norm": 0.5749434129647923, + "learning_rate": 6.63594470046083e-07, + "loss": 1.403039813041687, + "step": 145 + }, + { + "epoch": 0.03365994236311239, + "grad_norm": 0.5204858023557942, + "learning_rate": 6.682027649769585e-07, + "loss": 1.2957086563110352, + "step": 146 + }, + { + "epoch": 0.033890489913544666, + "grad_norm": 0.5758848399522136, + "learning_rate": 6.728110599078341e-07, + "loss": 1.3671963214874268, + "step": 147 + }, + { + "epoch": 0.03412103746397695, + "grad_norm": 0.5307364079465571, + "learning_rate": 6.774193548387096e-07, + "loss": 1.3373156785964966, + "step": 148 + }, + { + "epoch": 0.03435158501440922, + "grad_norm": 0.5281640020378694, + "learning_rate": 6.820276497695853e-07, + "loss": 1.4774576425552368, + "step": 149 + }, + { + "epoch": 0.0345821325648415, + "grad_norm": 0.5679122579833843, + "learning_rate": 6.866359447004608e-07, + "loss": 1.4094908237457275, + "step": 150 + }, + { + "epoch": 0.034812680115273775, + "grad_norm": 0.6310275430866781, + "learning_rate": 6.912442396313363e-07, + "loss": 1.5288136005401611, + "step": 151 + }, + { + "epoch": 0.03504322766570605, + "grad_norm": 0.5491763264170931, + "learning_rate": 6.958525345622119e-07, + "loss": 1.4010430574417114, + "step": 152 + }, + { + "epoch": 0.03527377521613833, + "grad_norm": 0.5102304190283374, + "learning_rate": 7.004608294930875e-07, + "loss": 1.228097915649414, + "step": 153 + }, + { + "epoch": 0.035504322766570603, + "grad_norm": 0.556977580118223, + "learning_rate": 7.05069124423963e-07, + "loss": 1.3849995136260986, + "step": 154 + }, + { + "epoch": 0.035734870317002884, + "grad_norm": 0.6038000879986429, + "learning_rate": 7.096774193548387e-07, + "loss": 1.436859369277954, + "step": 155 + }, + { + "epoch": 0.03596541786743516, + "grad_norm": 0.5448106660281533, + "learning_rate": 7.142857142857143e-07, + "loss": 1.3830995559692383, + "step": 156 + }, + { + "epoch": 0.03619596541786743, + "grad_norm": 0.5178664488372983, + "learning_rate": 7.188940092165898e-07, + "loss": 1.1554113626480103, + "step": 157 + }, + { + "epoch": 0.03642651296829971, + "grad_norm": 0.5186614381206474, + "learning_rate": 7.235023041474654e-07, + "loss": 1.276925802230835, + "step": 158 + }, + { + "epoch": 0.036657060518731986, + "grad_norm": 0.5296293850131283, + "learning_rate": 7.281105990783409e-07, + "loss": 1.3374000787734985, + "step": 159 + }, + { + "epoch": 0.03688760806916427, + "grad_norm": 0.6018629559621754, + "learning_rate": 7.327188940092166e-07, + "loss": 1.384819746017456, + "step": 160 + }, + { + "epoch": 0.03711815561959654, + "grad_norm": 0.6124602230831588, + "learning_rate": 7.373271889400922e-07, + "loss": 1.422861099243164, + "step": 161 + }, + { + "epoch": 0.03734870317002882, + "grad_norm": 0.6196521585488064, + "learning_rate": 7.419354838709677e-07, + "loss": 1.4244587421417236, + "step": 162 + }, + { + "epoch": 0.037579250720461095, + "grad_norm": 0.6199123784871026, + "learning_rate": 7.465437788018433e-07, + "loss": 1.4938528537750244, + "step": 163 + }, + { + "epoch": 0.03780979827089337, + "grad_norm": 0.5452147962770174, + "learning_rate": 7.511520737327189e-07, + "loss": 1.2724919319152832, + "step": 164 + }, + { + "epoch": 0.03804034582132565, + "grad_norm": 0.5579715996476083, + "learning_rate": 7.557603686635944e-07, + "loss": 1.4015090465545654, + "step": 165 + }, + { + "epoch": 0.03827089337175792, + "grad_norm": 0.6142689682562157, + "learning_rate": 7.603686635944701e-07, + "loss": 1.4200658798217773, + "step": 166 + }, + { + "epoch": 0.038501440922190204, + "grad_norm": 0.550927379145584, + "learning_rate": 7.649769585253457e-07, + "loss": 1.2477431297302246, + "step": 167 + }, + { + "epoch": 0.03873198847262248, + "grad_norm": 0.5996206737221976, + "learning_rate": 7.695852534562211e-07, + "loss": 1.3901419639587402, + "step": 168 + }, + { + "epoch": 0.03896253602305476, + "grad_norm": 0.6147375477963635, + "learning_rate": 7.741935483870967e-07, + "loss": 1.4381290674209595, + "step": 169 + }, + { + "epoch": 0.03919308357348703, + "grad_norm": 0.6270207649449981, + "learning_rate": 7.788018433179722e-07, + "loss": 1.5323734283447266, + "step": 170 + }, + { + "epoch": 0.039423631123919306, + "grad_norm": 0.5273848125246383, + "learning_rate": 7.834101382488479e-07, + "loss": 1.3429911136627197, + "step": 171 + }, + { + "epoch": 0.03965417867435159, + "grad_norm": 0.7032717723888388, + "learning_rate": 7.880184331797235e-07, + "loss": 1.5647220611572266, + "step": 172 + }, + { + "epoch": 0.03988472622478386, + "grad_norm": 0.5934411017478175, + "learning_rate": 7.92626728110599e-07, + "loss": 1.1879881620407104, + "step": 173 + }, + { + "epoch": 0.04011527377521614, + "grad_norm": 0.5726555140125118, + "learning_rate": 7.972350230414746e-07, + "loss": 1.4102849960327148, + "step": 174 + }, + { + "epoch": 0.040345821325648415, + "grad_norm": 0.616432174195689, + "learning_rate": 8.018433179723502e-07, + "loss": 1.4521185159683228, + "step": 175 + }, + { + "epoch": 0.04057636887608069, + "grad_norm": 0.5588880965327565, + "learning_rate": 8.064516129032257e-07, + "loss": 1.3322495222091675, + "step": 176 + }, + { + "epoch": 0.04080691642651297, + "grad_norm": 0.6095347282001032, + "learning_rate": 8.110599078341014e-07, + "loss": 1.3836069107055664, + "step": 177 + }, + { + "epoch": 0.04103746397694524, + "grad_norm": 0.6033088969243905, + "learning_rate": 8.15668202764977e-07, + "loss": 1.3639270067214966, + "step": 178 + }, + { + "epoch": 0.041268011527377524, + "grad_norm": 0.60951279416829, + "learning_rate": 8.202764976958525e-07, + "loss": 1.3034193515777588, + "step": 179 + }, + { + "epoch": 0.0414985590778098, + "grad_norm": 0.5682361207295581, + "learning_rate": 8.248847926267281e-07, + "loss": 1.346369981765747, + "step": 180 + }, + { + "epoch": 0.04172910662824208, + "grad_norm": 0.5646969989653233, + "learning_rate": 8.294930875576036e-07, + "loss": 1.2872177362442017, + "step": 181 + }, + { + "epoch": 0.04195965417867435, + "grad_norm": 0.5336435896443295, + "learning_rate": 8.341013824884793e-07, + "loss": 1.3010566234588623, + "step": 182 + }, + { + "epoch": 0.042190201729106626, + "grad_norm": 0.5733249196968232, + "learning_rate": 8.387096774193549e-07, + "loss": 1.2692077159881592, + "step": 183 + }, + { + "epoch": 0.04242074927953891, + "grad_norm": 0.5181695473064807, + "learning_rate": 8.433179723502303e-07, + "loss": 1.2789500951766968, + "step": 184 + }, + { + "epoch": 0.04265129682997118, + "grad_norm": 0.671579335481803, + "learning_rate": 8.479262672811059e-07, + "loss": 1.4537731409072876, + "step": 185 + }, + { + "epoch": 0.04288184438040346, + "grad_norm": 0.5760279966734834, + "learning_rate": 8.525345622119815e-07, + "loss": 1.2711801528930664, + "step": 186 + }, + { + "epoch": 0.043112391930835735, + "grad_norm": 0.5840770470208928, + "learning_rate": 8.57142857142857e-07, + "loss": 1.326183557510376, + "step": 187 + }, + { + "epoch": 0.04334293948126801, + "grad_norm": 0.6180137341655191, + "learning_rate": 8.617511520737327e-07, + "loss": 1.3028078079223633, + "step": 188 + }, + { + "epoch": 0.04357348703170029, + "grad_norm": 0.5721731746298903, + "learning_rate": 8.663594470046083e-07, + "loss": 1.3517916202545166, + "step": 189 + }, + { + "epoch": 0.04380403458213256, + "grad_norm": 0.5729165983462421, + "learning_rate": 8.709677419354838e-07, + "loss": 1.4067044258117676, + "step": 190 + }, + { + "epoch": 0.044034582132564844, + "grad_norm": 0.541532744989784, + "learning_rate": 8.755760368663594e-07, + "loss": 1.2318730354309082, + "step": 191 + }, + { + "epoch": 0.04426512968299712, + "grad_norm": 0.6053194817049817, + "learning_rate": 8.801843317972349e-07, + "loss": 1.4224486351013184, + "step": 192 + }, + { + "epoch": 0.0444956772334294, + "grad_norm": 0.5557887004182813, + "learning_rate": 8.847926267281106e-07, + "loss": 1.2940380573272705, + "step": 193 + }, + { + "epoch": 0.04472622478386167, + "grad_norm": 0.5691666959738478, + "learning_rate": 8.894009216589862e-07, + "loss": 1.2765517234802246, + "step": 194 + }, + { + "epoch": 0.044956772334293946, + "grad_norm": 0.5991065245698037, + "learning_rate": 8.940092165898617e-07, + "loss": 1.3684041500091553, + "step": 195 + }, + { + "epoch": 0.045187319884726226, + "grad_norm": 0.6214872461469259, + "learning_rate": 8.986175115207373e-07, + "loss": 1.26853346824646, + "step": 196 + }, + { + "epoch": 0.0454178674351585, + "grad_norm": 0.6109731509326806, + "learning_rate": 9.032258064516129e-07, + "loss": 1.362388014793396, + "step": 197 + }, + { + "epoch": 0.04564841498559078, + "grad_norm": 0.7382108636991422, + "learning_rate": 9.078341013824884e-07, + "loss": 1.5644274950027466, + "step": 198 + }, + { + "epoch": 0.045878962536023055, + "grad_norm": 0.5729864726362653, + "learning_rate": 9.124423963133641e-07, + "loss": 1.2623369693756104, + "step": 199 + }, + { + "epoch": 0.04610951008645533, + "grad_norm": 0.5981899078758937, + "learning_rate": 9.170506912442397e-07, + "loss": 1.3940534591674805, + "step": 200 + }, + { + "epoch": 0.04634005763688761, + "grad_norm": 0.5996408828481996, + "learning_rate": 9.216589861751152e-07, + "loss": 1.3285980224609375, + "step": 201 + }, + { + "epoch": 0.04657060518731988, + "grad_norm": 0.649848642705014, + "learning_rate": 9.262672811059907e-07, + "loss": 1.5363751649856567, + "step": 202 + }, + { + "epoch": 0.046801152737752164, + "grad_norm": 0.6186471428262311, + "learning_rate": 9.308755760368662e-07, + "loss": 1.3822460174560547, + "step": 203 + }, + { + "epoch": 0.04703170028818444, + "grad_norm": 0.6073589592236044, + "learning_rate": 9.354838709677418e-07, + "loss": 1.2392113208770752, + "step": 204 + }, + { + "epoch": 0.04726224783861672, + "grad_norm": 0.5839583864456539, + "learning_rate": 9.400921658986175e-07, + "loss": 1.2738463878631592, + "step": 205 + }, + { + "epoch": 0.04749279538904899, + "grad_norm": 0.5799115119327442, + "learning_rate": 9.44700460829493e-07, + "loss": 1.234877347946167, + "step": 206 + }, + { + "epoch": 0.047723342939481266, + "grad_norm": 0.623495775301441, + "learning_rate": 9.493087557603686e-07, + "loss": 1.3901491165161133, + "step": 207 + }, + { + "epoch": 0.047953890489913546, + "grad_norm": 0.6822208959755598, + "learning_rate": 9.539170506912442e-07, + "loss": 1.2906568050384521, + "step": 208 + }, + { + "epoch": 0.04818443804034582, + "grad_norm": 0.677926582170199, + "learning_rate": 9.585253456221198e-07, + "loss": 1.3887840509414673, + "step": 209 + }, + { + "epoch": 0.0484149855907781, + "grad_norm": 0.6565533240560858, + "learning_rate": 9.631336405529954e-07, + "loss": 1.4642484188079834, + "step": 210 + }, + { + "epoch": 0.048645533141210374, + "grad_norm": 0.6481627320935237, + "learning_rate": 9.67741935483871e-07, + "loss": 1.3764479160308838, + "step": 211 + }, + { + "epoch": 0.04887608069164265, + "grad_norm": 0.631978356438684, + "learning_rate": 9.723502304147466e-07, + "loss": 1.343896746635437, + "step": 212 + }, + { + "epoch": 0.04910662824207493, + "grad_norm": 0.6740692889548267, + "learning_rate": 9.76958525345622e-07, + "loss": 1.4437646865844727, + "step": 213 + }, + { + "epoch": 0.0493371757925072, + "grad_norm": 0.6720558555641115, + "learning_rate": 9.815668202764976e-07, + "loss": 1.438147783279419, + "step": 214 + }, + { + "epoch": 0.04956772334293948, + "grad_norm": 0.684406863070909, + "learning_rate": 9.861751152073732e-07, + "loss": 1.4664554595947266, + "step": 215 + }, + { + "epoch": 0.04979827089337176, + "grad_norm": 0.5387789373752218, + "learning_rate": 9.907834101382488e-07, + "loss": 1.1996713876724243, + "step": 216 + }, + { + "epoch": 0.05002881844380404, + "grad_norm": 0.7065986142812887, + "learning_rate": 9.953917050691244e-07, + "loss": 1.4759405851364136, + "step": 217 + }, + { + "epoch": 0.05025936599423631, + "grad_norm": 0.616083267592582, + "learning_rate": 1e-06, + "loss": 1.2561213970184326, + "step": 218 + }, + { + "epoch": 0.050489913544668585, + "grad_norm": 0.5674526859382938, + "learning_rate": 1.0046082949308756e-06, + "loss": 1.1770460605621338, + "step": 219 + }, + { + "epoch": 0.050720461095100866, + "grad_norm": 0.6008598963516047, + "learning_rate": 1.0092165898617511e-06, + "loss": 1.2505006790161133, + "step": 220 + }, + { + "epoch": 0.05095100864553314, + "grad_norm": 0.610874183665047, + "learning_rate": 1.0138248847926267e-06, + "loss": 1.3262345790863037, + "step": 221 + }, + { + "epoch": 0.05118155619596542, + "grad_norm": 0.6545962637044704, + "learning_rate": 1.0184331797235021e-06, + "loss": 1.275759220123291, + "step": 222 + }, + { + "epoch": 0.051412103746397694, + "grad_norm": 0.6055134335770284, + "learning_rate": 1.023041474654378e-06, + "loss": 1.26314377784729, + "step": 223 + }, + { + "epoch": 0.05164265129682997, + "grad_norm": 0.5652742852996497, + "learning_rate": 1.0276497695852535e-06, + "loss": 1.2621712684631348, + "step": 224 + }, + { + "epoch": 0.05187319884726225, + "grad_norm": 0.6542335987225151, + "learning_rate": 1.032258064516129e-06, + "loss": 1.2935044765472412, + "step": 225 + }, + { + "epoch": 0.05210374639769452, + "grad_norm": 0.6619850782143697, + "learning_rate": 1.0368663594470047e-06, + "loss": 1.4024615287780762, + "step": 226 + }, + { + "epoch": 0.0523342939481268, + "grad_norm": 0.6512011773812943, + "learning_rate": 1.04147465437788e-06, + "loss": 1.3970675468444824, + "step": 227 + }, + { + "epoch": 0.05256484149855908, + "grad_norm": 0.7012746956320595, + "learning_rate": 1.0460829493087557e-06, + "loss": 1.3722915649414062, + "step": 228 + }, + { + "epoch": 0.05279538904899136, + "grad_norm": 0.8098853264626967, + "learning_rate": 1.050691244239631e-06, + "loss": 1.5259283781051636, + "step": 229 + }, + { + "epoch": 0.05302593659942363, + "grad_norm": 0.656727684224188, + "learning_rate": 1.0552995391705069e-06, + "loss": 1.2052996158599854, + "step": 230 + }, + { + "epoch": 0.053256484149855905, + "grad_norm": 0.6581276402973916, + "learning_rate": 1.0599078341013825e-06, + "loss": 1.336460828781128, + "step": 231 + }, + { + "epoch": 0.053487031700288186, + "grad_norm": 0.7539723540951386, + "learning_rate": 1.0645161290322579e-06, + "loss": 1.4954627752304077, + "step": 232 + }, + { + "epoch": 0.05371757925072046, + "grad_norm": 0.7919794318433034, + "learning_rate": 1.0691244239631337e-06, + "loss": 1.5053772926330566, + "step": 233 + }, + { + "epoch": 0.05394812680115274, + "grad_norm": 0.7129657400690349, + "learning_rate": 1.073732718894009e-06, + "loss": 1.3133140802383423, + "step": 234 + }, + { + "epoch": 0.054178674351585014, + "grad_norm": 0.649949904492445, + "learning_rate": 1.0783410138248847e-06, + "loss": 1.2675721645355225, + "step": 235 + }, + { + "epoch": 0.05440922190201729, + "grad_norm": 0.7135120090535518, + "learning_rate": 1.0829493087557605e-06, + "loss": 1.3571391105651855, + "step": 236 + }, + { + "epoch": 0.05463976945244957, + "grad_norm": 0.6816473641194337, + "learning_rate": 1.0875576036866358e-06, + "loss": 1.366161584854126, + "step": 237 + }, + { + "epoch": 0.05487031700288184, + "grad_norm": 0.7159147406875376, + "learning_rate": 1.0921658986175114e-06, + "loss": 1.4242830276489258, + "step": 238 + }, + { + "epoch": 0.05510086455331412, + "grad_norm": 0.6832604726478776, + "learning_rate": 1.096774193548387e-06, + "loss": 1.3941435813903809, + "step": 239 + }, + { + "epoch": 0.0553314121037464, + "grad_norm": 0.676053870538488, + "learning_rate": 1.1013824884792626e-06, + "loss": 1.2358953952789307, + "step": 240 + }, + { + "epoch": 0.05556195965417868, + "grad_norm": 0.694435060104584, + "learning_rate": 1.1059907834101382e-06, + "loss": 1.3759924173355103, + "step": 241 + }, + { + "epoch": 0.05579250720461095, + "grad_norm": 0.6008373532438006, + "learning_rate": 1.1105990783410138e-06, + "loss": 1.2209219932556152, + "step": 242 + }, + { + "epoch": 0.056023054755043225, + "grad_norm": 0.7605596488241804, + "learning_rate": 1.1152073732718894e-06, + "loss": 1.5694777965545654, + "step": 243 + }, + { + "epoch": 0.056253602305475506, + "grad_norm": 0.683498482658377, + "learning_rate": 1.1198156682027648e-06, + "loss": 1.4109654426574707, + "step": 244 + }, + { + "epoch": 0.05648414985590778, + "grad_norm": 0.7081776504622735, + "learning_rate": 1.1244239631336406e-06, + "loss": 1.3836995363235474, + "step": 245 + }, + { + "epoch": 0.05671469740634006, + "grad_norm": 0.7009333329190326, + "learning_rate": 1.1290322580645162e-06, + "loss": 1.3234455585479736, + "step": 246 + }, + { + "epoch": 0.056945244956772334, + "grad_norm": 0.7399576167451694, + "learning_rate": 1.1336405529953916e-06, + "loss": 1.3350820541381836, + "step": 247 + }, + { + "epoch": 0.05717579250720461, + "grad_norm": 0.6607735183019582, + "learning_rate": 1.1382488479262674e-06, + "loss": 1.3644275665283203, + "step": 248 + }, + { + "epoch": 0.05740634005763689, + "grad_norm": 0.6299746657112861, + "learning_rate": 1.1428571428571428e-06, + "loss": 1.2501192092895508, + "step": 249 + }, + { + "epoch": 0.05763688760806916, + "grad_norm": 0.7553557637327969, + "learning_rate": 1.1474654377880184e-06, + "loss": 1.352830171585083, + "step": 250 + }, + { + "epoch": 0.05786743515850144, + "grad_norm": 0.7090024654896367, + "learning_rate": 1.1520737327188938e-06, + "loss": 1.2814360857009888, + "step": 251 + }, + { + "epoch": 0.05809798270893372, + "grad_norm": 0.6188362792572963, + "learning_rate": 1.1566820276497696e-06, + "loss": 1.2255218029022217, + "step": 252 + }, + { + "epoch": 0.058328530259366, + "grad_norm": 0.6924813663949737, + "learning_rate": 1.1612903225806452e-06, + "loss": 1.3151466846466064, + "step": 253 + }, + { + "epoch": 0.05855907780979827, + "grad_norm": 0.7272314638515059, + "learning_rate": 1.1658986175115205e-06, + "loss": 1.3779305219650269, + "step": 254 + }, + { + "epoch": 0.058789625360230545, + "grad_norm": 0.6935461974752941, + "learning_rate": 1.1705069124423963e-06, + "loss": 1.2810460329055786, + "step": 255 + }, + { + "epoch": 0.059020172910662826, + "grad_norm": 0.717205959707173, + "learning_rate": 1.1751152073732717e-06, + "loss": 1.4521377086639404, + "step": 256 + }, + { + "epoch": 0.0592507204610951, + "grad_norm": 0.6682723171222476, + "learning_rate": 1.1797235023041473e-06, + "loss": 1.350247859954834, + "step": 257 + }, + { + "epoch": 0.05948126801152738, + "grad_norm": 0.7016795378132386, + "learning_rate": 1.1843317972350231e-06, + "loss": 1.313316822052002, + "step": 258 + }, + { + "epoch": 0.059711815561959654, + "grad_norm": 0.7343429085377424, + "learning_rate": 1.1889400921658985e-06, + "loss": 1.3576340675354004, + "step": 259 + }, + { + "epoch": 0.05994236311239193, + "grad_norm": 0.7069971351993517, + "learning_rate": 1.1935483870967741e-06, + "loss": 1.232670545578003, + "step": 260 + }, + { + "epoch": 0.06017291066282421, + "grad_norm": 0.6720886414289914, + "learning_rate": 1.1981566820276497e-06, + "loss": 1.2778209447860718, + "step": 261 + }, + { + "epoch": 0.06040345821325648, + "grad_norm": 0.7391359754625508, + "learning_rate": 1.2027649769585253e-06, + "loss": 1.3562755584716797, + "step": 262 + }, + { + "epoch": 0.06063400576368876, + "grad_norm": 0.6745648671854169, + "learning_rate": 1.207373271889401e-06, + "loss": 1.2798476219177246, + "step": 263 + }, + { + "epoch": 0.06086455331412104, + "grad_norm": 0.6712121780259053, + "learning_rate": 1.2119815668202765e-06, + "loss": 1.2518937587738037, + "step": 264 + }, + { + "epoch": 0.06109510086455332, + "grad_norm": 0.6699598442540771, + "learning_rate": 1.216589861751152e-06, + "loss": 1.394336462020874, + "step": 265 + }, + { + "epoch": 0.06132564841498559, + "grad_norm": 0.6776471690188184, + "learning_rate": 1.2211981566820275e-06, + "loss": 1.214491605758667, + "step": 266 + }, + { + "epoch": 0.061556195965417865, + "grad_norm": 0.6818615172714886, + "learning_rate": 1.2258064516129033e-06, + "loss": 1.2698123455047607, + "step": 267 + }, + { + "epoch": 0.061786743515850145, + "grad_norm": 0.6535435362430803, + "learning_rate": 1.2304147465437787e-06, + "loss": 1.3011083602905273, + "step": 268 + }, + { + "epoch": 0.06201729106628242, + "grad_norm": 0.6934735806473995, + "learning_rate": 1.2350230414746543e-06, + "loss": 1.296421766281128, + "step": 269 + }, + { + "epoch": 0.0622478386167147, + "grad_norm": 0.79226943048109, + "learning_rate": 1.23963133640553e-06, + "loss": 1.485987901687622, + "step": 270 + }, + { + "epoch": 0.062478386167146974, + "grad_norm": 0.7765859624895566, + "learning_rate": 1.2442396313364054e-06, + "loss": 1.3649810552597046, + "step": 271 + }, + { + "epoch": 0.06270893371757925, + "grad_norm": 0.6341780163248334, + "learning_rate": 1.248847926267281e-06, + "loss": 1.2397961616516113, + "step": 272 + }, + { + "epoch": 0.06293948126801152, + "grad_norm": 0.6525113440013135, + "learning_rate": 1.2534562211981564e-06, + "loss": 1.2815860509872437, + "step": 273 + }, + { + "epoch": 0.0631700288184438, + "grad_norm": 0.6709241814765708, + "learning_rate": 1.2580645161290322e-06, + "loss": 1.228407859802246, + "step": 274 + }, + { + "epoch": 0.06340057636887608, + "grad_norm": 0.5902550603186971, + "learning_rate": 1.2626728110599078e-06, + "loss": 1.2208014726638794, + "step": 275 + }, + { + "epoch": 0.06363112391930836, + "grad_norm": 0.6883266896303725, + "learning_rate": 1.2672811059907832e-06, + "loss": 1.3022860288619995, + "step": 276 + }, + { + "epoch": 0.06386167146974063, + "grad_norm": 0.6958190963931714, + "learning_rate": 1.271889400921659e-06, + "loss": 1.2390055656433105, + "step": 277 + }, + { + "epoch": 0.06409221902017291, + "grad_norm": 0.7060333360910418, + "learning_rate": 1.2764976958525344e-06, + "loss": 1.2937133312225342, + "step": 278 + }, + { + "epoch": 0.06432276657060519, + "grad_norm": 0.72113993110492, + "learning_rate": 1.28110599078341e-06, + "loss": 1.278928279876709, + "step": 279 + }, + { + "epoch": 0.06455331412103746, + "grad_norm": 0.7956220599215559, + "learning_rate": 1.2857142857142858e-06, + "loss": 1.3676493167877197, + "step": 280 + }, + { + "epoch": 0.06478386167146974, + "grad_norm": 0.6685675905435974, + "learning_rate": 1.2903225806451612e-06, + "loss": 1.2168056964874268, + "step": 281 + }, + { + "epoch": 0.06501440922190202, + "grad_norm": 0.7514960943888414, + "learning_rate": 1.2949308755760368e-06, + "loss": 1.3900643587112427, + "step": 282 + }, + { + "epoch": 0.0652449567723343, + "grad_norm": 0.6534086686230444, + "learning_rate": 1.2995391705069124e-06, + "loss": 1.2207615375518799, + "step": 283 + }, + { + "epoch": 0.06547550432276657, + "grad_norm": 0.6869837785111367, + "learning_rate": 1.304147465437788e-06, + "loss": 1.2372363805770874, + "step": 284 + }, + { + "epoch": 0.06570605187319885, + "grad_norm": 0.7276088382668475, + "learning_rate": 1.3087557603686636e-06, + "loss": 1.15517258644104, + "step": 285 + }, + { + "epoch": 0.06593659942363113, + "grad_norm": 0.7261960848573564, + "learning_rate": 1.3133640552995392e-06, + "loss": 1.3100334405899048, + "step": 286 + }, + { + "epoch": 0.0661671469740634, + "grad_norm": 0.795094119655108, + "learning_rate": 1.3179723502304148e-06, + "loss": 1.4036345481872559, + "step": 287 + }, + { + "epoch": 0.06639769452449568, + "grad_norm": 0.7130936562568114, + "learning_rate": 1.3225806451612901e-06, + "loss": 1.2247200012207031, + "step": 288 + }, + { + "epoch": 0.06662824207492796, + "grad_norm": 0.6327777226077211, + "learning_rate": 1.327188940092166e-06, + "loss": 1.1968882083892822, + "step": 289 + }, + { + "epoch": 0.06685878962536022, + "grad_norm": 0.73231661672907, + "learning_rate": 1.3317972350230413e-06, + "loss": 1.317826271057129, + "step": 290 + }, + { + "epoch": 0.0670893371757925, + "grad_norm": 0.7205974467953724, + "learning_rate": 1.336405529953917e-06, + "loss": 1.2956342697143555, + "step": 291 + }, + { + "epoch": 0.06731988472622479, + "grad_norm": 0.7444236121791392, + "learning_rate": 1.3410138248847927e-06, + "loss": 1.2325165271759033, + "step": 292 + }, + { + "epoch": 0.06755043227665707, + "grad_norm": 0.6273978280781463, + "learning_rate": 1.3456221198156681e-06, + "loss": 1.2119462490081787, + "step": 293 + }, + { + "epoch": 0.06778097982708933, + "grad_norm": 0.7992045270603186, + "learning_rate": 1.3502304147465437e-06, + "loss": 1.3506251573562622, + "step": 294 + }, + { + "epoch": 0.06801152737752161, + "grad_norm": 0.6560610786503305, + "learning_rate": 1.354838709677419e-06, + "loss": 1.1504114866256714, + "step": 295 + }, + { + "epoch": 0.0682420749279539, + "grad_norm": 0.6390191765410149, + "learning_rate": 1.359447004608295e-06, + "loss": 1.1813435554504395, + "step": 296 + }, + { + "epoch": 0.06847262247838616, + "grad_norm": 0.6787488194794526, + "learning_rate": 1.3640552995391705e-06, + "loss": 1.205298900604248, + "step": 297 + }, + { + "epoch": 0.06870317002881844, + "grad_norm": 0.6488526630183898, + "learning_rate": 1.3686635944700459e-06, + "loss": 1.152748942375183, + "step": 298 + }, + { + "epoch": 0.06893371757925072, + "grad_norm": 0.6794904899583581, + "learning_rate": 1.3732718894009217e-06, + "loss": 1.2536249160766602, + "step": 299 + }, + { + "epoch": 0.069164265129683, + "grad_norm": 0.6751957999851543, + "learning_rate": 1.377880184331797e-06, + "loss": 1.2739291191101074, + "step": 300 + }, + { + "epoch": 0.06939481268011527, + "grad_norm": 0.6991854064813895, + "learning_rate": 1.3824884792626727e-06, + "loss": 1.2369191646575928, + "step": 301 + }, + { + "epoch": 0.06962536023054755, + "grad_norm": 0.826219529491011, + "learning_rate": 1.3870967741935485e-06, + "loss": 1.3230082988739014, + "step": 302 + }, + { + "epoch": 0.06985590778097983, + "grad_norm": 0.808711382879254, + "learning_rate": 1.3917050691244239e-06, + "loss": 1.3362655639648438, + "step": 303 + }, + { + "epoch": 0.0700864553314121, + "grad_norm": 0.6609985845459885, + "learning_rate": 1.3963133640552995e-06, + "loss": 1.1898441314697266, + "step": 304 + }, + { + "epoch": 0.07031700288184438, + "grad_norm": 0.7674645714085818, + "learning_rate": 1.400921658986175e-06, + "loss": 1.3019602298736572, + "step": 305 + }, + { + "epoch": 0.07054755043227666, + "grad_norm": 0.7540320255609526, + "learning_rate": 1.4055299539170507e-06, + "loss": 1.3292012214660645, + "step": 306 + }, + { + "epoch": 0.07077809798270894, + "grad_norm": 0.7296176404767546, + "learning_rate": 1.410138248847926e-06, + "loss": 1.2561442852020264, + "step": 307 + }, + { + "epoch": 0.07100864553314121, + "grad_norm": 0.7354489519106788, + "learning_rate": 1.4147465437788018e-06, + "loss": 1.1946594715118408, + "step": 308 + }, + { + "epoch": 0.07123919308357349, + "grad_norm": 0.8454555568104161, + "learning_rate": 1.4193548387096774e-06, + "loss": 1.3130412101745605, + "step": 309 + }, + { + "epoch": 0.07146974063400577, + "grad_norm": 0.7568231549725508, + "learning_rate": 1.4239631336405528e-06, + "loss": 1.264148235321045, + "step": 310 + }, + { + "epoch": 0.07170028818443804, + "grad_norm": 0.684674340294116, + "learning_rate": 1.4285714285714286e-06, + "loss": 1.133709192276001, + "step": 311 + }, + { + "epoch": 0.07193083573487032, + "grad_norm": 0.7308987546704907, + "learning_rate": 1.433179723502304e-06, + "loss": 1.2604464292526245, + "step": 312 + }, + { + "epoch": 0.0721613832853026, + "grad_norm": 0.7914252137423667, + "learning_rate": 1.4377880184331796e-06, + "loss": 1.386889100074768, + "step": 313 + }, + { + "epoch": 0.07239193083573486, + "grad_norm": 0.7000597647399976, + "learning_rate": 1.4423963133640554e-06, + "loss": 1.22135591506958, + "step": 314 + }, + { + "epoch": 0.07262247838616714, + "grad_norm": 0.702819138693291, + "learning_rate": 1.4470046082949308e-06, + "loss": 1.2078099250793457, + "step": 315 + }, + { + "epoch": 0.07285302593659942, + "grad_norm": 0.8775736552686038, + "learning_rate": 1.4516129032258064e-06, + "loss": 1.3379974365234375, + "step": 316 + }, + { + "epoch": 0.0730835734870317, + "grad_norm": 0.7634284151011571, + "learning_rate": 1.4562211981566818e-06, + "loss": 1.219855785369873, + "step": 317 + }, + { + "epoch": 0.07331412103746397, + "grad_norm": 0.717221305660253, + "learning_rate": 1.4608294930875576e-06, + "loss": 1.0662527084350586, + "step": 318 + }, + { + "epoch": 0.07354466858789625, + "grad_norm": 0.7065622523456622, + "learning_rate": 1.4654377880184332e-06, + "loss": 1.0985239744186401, + "step": 319 + }, + { + "epoch": 0.07377521613832853, + "grad_norm": 0.6722074850030375, + "learning_rate": 1.4700460829493086e-06, + "loss": 1.1001049280166626, + "step": 320 + }, + { + "epoch": 0.0740057636887608, + "grad_norm": 0.6840726731766956, + "learning_rate": 1.4746543778801844e-06, + "loss": 1.225736141204834, + "step": 321 + }, + { + "epoch": 0.07423631123919308, + "grad_norm": 0.6852972399988531, + "learning_rate": 1.4792626728110598e-06, + "loss": 1.1907551288604736, + "step": 322 + }, + { + "epoch": 0.07446685878962536, + "grad_norm": 0.7595767032333773, + "learning_rate": 1.4838709677419353e-06, + "loss": 1.1923848390579224, + "step": 323 + }, + { + "epoch": 0.07469740634005764, + "grad_norm": 0.6912770856373969, + "learning_rate": 1.4884792626728112e-06, + "loss": 1.1136579513549805, + "step": 324 + }, + { + "epoch": 0.07492795389048991, + "grad_norm": 0.6984679703551435, + "learning_rate": 1.4930875576036865e-06, + "loss": 1.1610283851623535, + "step": 325 + }, + { + "epoch": 0.07515850144092219, + "grad_norm": 0.6677016485987167, + "learning_rate": 1.4976958525345621e-06, + "loss": 1.1658828258514404, + "step": 326 + }, + { + "epoch": 0.07538904899135447, + "grad_norm": 0.7507307887297694, + "learning_rate": 1.5023041474654377e-06, + "loss": 1.1573631763458252, + "step": 327 + }, + { + "epoch": 0.07561959654178674, + "grad_norm": 0.8364861854285678, + "learning_rate": 1.5069124423963133e-06, + "loss": 1.305356740951538, + "step": 328 + }, + { + "epoch": 0.07585014409221902, + "grad_norm": 0.7387762275193068, + "learning_rate": 1.5115207373271887e-06, + "loss": 1.1737552881240845, + "step": 329 + }, + { + "epoch": 0.0760806916426513, + "grad_norm": 0.8767163364621563, + "learning_rate": 1.5161290322580645e-06, + "loss": 1.3644309043884277, + "step": 330 + }, + { + "epoch": 0.07631123919308358, + "grad_norm": 0.7488441886874779, + "learning_rate": 1.5207373271889401e-06, + "loss": 1.1332610845565796, + "step": 331 + }, + { + "epoch": 0.07654178674351585, + "grad_norm": 0.6720106893242441, + "learning_rate": 1.5253456221198155e-06, + "loss": 1.1542474031448364, + "step": 332 + }, + { + "epoch": 0.07677233429394813, + "grad_norm": 0.6844904773167374, + "learning_rate": 1.5299539170506913e-06, + "loss": 1.2047884464263916, + "step": 333 + }, + { + "epoch": 0.07700288184438041, + "grad_norm": 0.7116834419031486, + "learning_rate": 1.5345622119815667e-06, + "loss": 1.185925006866455, + "step": 334 + }, + { + "epoch": 0.07723342939481267, + "grad_norm": 0.8221074928786821, + "learning_rate": 1.5391705069124423e-06, + "loss": 1.3261258602142334, + "step": 335 + }, + { + "epoch": 0.07746397694524496, + "grad_norm": 0.7367381450019147, + "learning_rate": 1.543778801843318e-06, + "loss": 1.226957082748413, + "step": 336 + }, + { + "epoch": 0.07769452449567724, + "grad_norm": 0.690883992926701, + "learning_rate": 1.5483870967741935e-06, + "loss": 1.0979371070861816, + "step": 337 + }, + { + "epoch": 0.07792507204610952, + "grad_norm": 0.6808836094905616, + "learning_rate": 1.552995391705069e-06, + "loss": 1.2402095794677734, + "step": 338 + }, + { + "epoch": 0.07815561959654178, + "grad_norm": 0.7713932969797707, + "learning_rate": 1.5576036866359445e-06, + "loss": 1.123030185699463, + "step": 339 + }, + { + "epoch": 0.07838616714697406, + "grad_norm": 0.7901531027112338, + "learning_rate": 1.5622119815668203e-06, + "loss": 1.1617474555969238, + "step": 340 + }, + { + "epoch": 0.07861671469740635, + "grad_norm": 0.8006480481311214, + "learning_rate": 1.5668202764976959e-06, + "loss": 1.1931252479553223, + "step": 341 + }, + { + "epoch": 0.07884726224783861, + "grad_norm": 0.7207543428315331, + "learning_rate": 1.5714285714285712e-06, + "loss": 1.164405107498169, + "step": 342 + }, + { + "epoch": 0.07907780979827089, + "grad_norm": 0.7403414009803999, + "learning_rate": 1.576036866359447e-06, + "loss": 1.1092296838760376, + "step": 343 + }, + { + "epoch": 0.07930835734870317, + "grad_norm": 0.8301892918218122, + "learning_rate": 1.5806451612903224e-06, + "loss": 1.2289469242095947, + "step": 344 + }, + { + "epoch": 0.07953890489913544, + "grad_norm": 0.7791254530131521, + "learning_rate": 1.585253456221198e-06, + "loss": 1.3640224933624268, + "step": 345 + }, + { + "epoch": 0.07976945244956772, + "grad_norm": 0.807839732947003, + "learning_rate": 1.5898617511520738e-06, + "loss": 1.2496929168701172, + "step": 346 + }, + { + "epoch": 0.08, + "grad_norm": 0.6959074479343652, + "learning_rate": 1.5944700460829492e-06, + "loss": 1.0853437185287476, + "step": 347 + }, + { + "epoch": 0.08023054755043228, + "grad_norm": 0.7268485255917756, + "learning_rate": 1.5990783410138248e-06, + "loss": 1.2237377166748047, + "step": 348 + }, + { + "epoch": 0.08046109510086455, + "grad_norm": 0.6646199872578112, + "learning_rate": 1.6036866359447004e-06, + "loss": 0.9917643666267395, + "step": 349 + }, + { + "epoch": 0.08069164265129683, + "grad_norm": 0.7942775598883802, + "learning_rate": 1.608294930875576e-06, + "loss": 1.3162565231323242, + "step": 350 + }, + { + "epoch": 0.08092219020172911, + "grad_norm": 0.6469982944799066, + "learning_rate": 1.6129032258064514e-06, + "loss": 0.994131863117218, + "step": 351 + }, + { + "epoch": 0.08115273775216138, + "grad_norm": 0.7099061902752692, + "learning_rate": 1.6175115207373272e-06, + "loss": 1.154555082321167, + "step": 352 + }, + { + "epoch": 0.08138328530259366, + "grad_norm": 0.7439801731909884, + "learning_rate": 1.6221198156682028e-06, + "loss": 1.0531972646713257, + "step": 353 + }, + { + "epoch": 0.08161383285302594, + "grad_norm": 0.793781848371416, + "learning_rate": 1.6267281105990782e-06, + "loss": 1.1934162378311157, + "step": 354 + }, + { + "epoch": 0.08184438040345822, + "grad_norm": 0.7349998365191204, + "learning_rate": 1.631336405529954e-06, + "loss": 1.19966721534729, + "step": 355 + }, + { + "epoch": 0.08207492795389049, + "grad_norm": 0.8221740376040937, + "learning_rate": 1.6359447004608294e-06, + "loss": 1.0795832872390747, + "step": 356 + }, + { + "epoch": 0.08230547550432277, + "grad_norm": 0.7298340356609088, + "learning_rate": 1.640552995391705e-06, + "loss": 1.0402061939239502, + "step": 357 + }, + { + "epoch": 0.08253602305475505, + "grad_norm": 0.7847539046036707, + "learning_rate": 1.6451612903225808e-06, + "loss": 1.229203462600708, + "step": 358 + }, + { + "epoch": 0.08276657060518731, + "grad_norm": 0.7172034000711612, + "learning_rate": 1.6497695852534561e-06, + "loss": 1.1610770225524902, + "step": 359 + }, + { + "epoch": 0.0829971181556196, + "grad_norm": 0.7669965747112191, + "learning_rate": 1.6543778801843317e-06, + "loss": 1.1292459964752197, + "step": 360 + }, + { + "epoch": 0.08322766570605188, + "grad_norm": 0.7517664120896831, + "learning_rate": 1.6589861751152071e-06, + "loss": 1.1873208284378052, + "step": 361 + }, + { + "epoch": 0.08345821325648416, + "grad_norm": 0.733900951458625, + "learning_rate": 1.663594470046083e-06, + "loss": 1.161617398262024, + "step": 362 + }, + { + "epoch": 0.08368876080691642, + "grad_norm": 0.7247739442724684, + "learning_rate": 1.6682027649769585e-06, + "loss": 1.0853413343429565, + "step": 363 + }, + { + "epoch": 0.0839193083573487, + "grad_norm": 0.8591163320147464, + "learning_rate": 1.672811059907834e-06, + "loss": 1.1864356994628906, + "step": 364 + }, + { + "epoch": 0.08414985590778098, + "grad_norm": 0.7336734972950607, + "learning_rate": 1.6774193548387097e-06, + "loss": 1.1111290454864502, + "step": 365 + }, + { + "epoch": 0.08438040345821325, + "grad_norm": 0.7814288357485446, + "learning_rate": 1.682027649769585e-06, + "loss": 1.1710078716278076, + "step": 366 + }, + { + "epoch": 0.08461095100864553, + "grad_norm": 0.6790802315270096, + "learning_rate": 1.6866359447004607e-06, + "loss": 1.0132228136062622, + "step": 367 + }, + { + "epoch": 0.08484149855907781, + "grad_norm": 0.8159760904129824, + "learning_rate": 1.6912442396313363e-06, + "loss": 1.2294046878814697, + "step": 368 + }, + { + "epoch": 0.08507204610951008, + "grad_norm": 0.7957648972259336, + "learning_rate": 1.6958525345622119e-06, + "loss": 1.1442105770111084, + "step": 369 + }, + { + "epoch": 0.08530259365994236, + "grad_norm": 0.7995238723541568, + "learning_rate": 1.7004608294930875e-06, + "loss": 1.119593620300293, + "step": 370 + }, + { + "epoch": 0.08553314121037464, + "grad_norm": 0.7863033103100832, + "learning_rate": 1.705069124423963e-06, + "loss": 1.145449161529541, + "step": 371 + }, + { + "epoch": 0.08576368876080692, + "grad_norm": 0.7341046610073212, + "learning_rate": 1.7096774193548387e-06, + "loss": 1.0639642477035522, + "step": 372 + }, + { + "epoch": 0.08599423631123919, + "grad_norm": 0.9126792061000337, + "learning_rate": 1.714285714285714e-06, + "loss": 1.1851778030395508, + "step": 373 + }, + { + "epoch": 0.08622478386167147, + "grad_norm": 0.7973778810449275, + "learning_rate": 1.7188940092165899e-06, + "loss": 1.0616769790649414, + "step": 374 + }, + { + "epoch": 0.08645533141210375, + "grad_norm": 0.7901345366558606, + "learning_rate": 1.7235023041474655e-06, + "loss": 1.0211896896362305, + "step": 375 + }, + { + "epoch": 0.08668587896253602, + "grad_norm": 0.8252437141593564, + "learning_rate": 1.7281105990783408e-06, + "loss": 1.1738722324371338, + "step": 376 + }, + { + "epoch": 0.0869164265129683, + "grad_norm": 0.7679463056914901, + "learning_rate": 1.7327188940092167e-06, + "loss": 1.0378369092941284, + "step": 377 + }, + { + "epoch": 0.08714697406340058, + "grad_norm": 0.8131293786091974, + "learning_rate": 1.737327188940092e-06, + "loss": 1.0998988151550293, + "step": 378 + }, + { + "epoch": 0.08737752161383286, + "grad_norm": 0.7733685330823146, + "learning_rate": 1.7419354838709676e-06, + "loss": 1.023256540298462, + "step": 379 + }, + { + "epoch": 0.08760806916426513, + "grad_norm": 0.7246433117470548, + "learning_rate": 1.7465437788018434e-06, + "loss": 1.1030439138412476, + "step": 380 + }, + { + "epoch": 0.0878386167146974, + "grad_norm": 0.7707540243721439, + "learning_rate": 1.7511520737327188e-06, + "loss": 1.0966073274612427, + "step": 381 + }, + { + "epoch": 0.08806916426512969, + "grad_norm": 0.7627867300031437, + "learning_rate": 1.7557603686635944e-06, + "loss": 1.07340669631958, + "step": 382 + }, + { + "epoch": 0.08829971181556195, + "grad_norm": 0.8500159940576424, + "learning_rate": 1.7603686635944698e-06, + "loss": 1.14761221408844, + "step": 383 + }, + { + "epoch": 0.08853025936599423, + "grad_norm": 0.7457463472293449, + "learning_rate": 1.7649769585253456e-06, + "loss": 0.9445088505744934, + "step": 384 + }, + { + "epoch": 0.08876080691642652, + "grad_norm": 0.8663122026588986, + "learning_rate": 1.7695852534562212e-06, + "loss": 1.1617610454559326, + "step": 385 + }, + { + "epoch": 0.0889913544668588, + "grad_norm": 0.7082642521683337, + "learning_rate": 1.7741935483870966e-06, + "loss": 0.951229989528656, + "step": 386 + }, + { + "epoch": 0.08922190201729106, + "grad_norm": 0.7461309802802878, + "learning_rate": 1.7788018433179724e-06, + "loss": 1.0025156736373901, + "step": 387 + }, + { + "epoch": 0.08945244956772334, + "grad_norm": 0.8360911278199987, + "learning_rate": 1.7834101382488478e-06, + "loss": 1.1546887159347534, + "step": 388 + }, + { + "epoch": 0.08968299711815562, + "grad_norm": 0.7751782140721731, + "learning_rate": 1.7880184331797234e-06, + "loss": 0.9596165418624878, + "step": 389 + }, + { + "epoch": 0.08991354466858789, + "grad_norm": 0.7529645387949501, + "learning_rate": 1.792626728110599e-06, + "loss": 0.9940363168716431, + "step": 390 + }, + { + "epoch": 0.09014409221902017, + "grad_norm": 0.804620736198686, + "learning_rate": 1.7972350230414746e-06, + "loss": 1.0265294313430786, + "step": 391 + }, + { + "epoch": 0.09037463976945245, + "grad_norm": 0.7957136691031254, + "learning_rate": 1.8018433179723502e-06, + "loss": 0.9495709538459778, + "step": 392 + }, + { + "epoch": 0.09060518731988472, + "grad_norm": 0.787168756666669, + "learning_rate": 1.8064516129032258e-06, + "loss": 0.9847695231437683, + "step": 393 + }, + { + "epoch": 0.090835734870317, + "grad_norm": 0.7881149009057379, + "learning_rate": 1.8110599078341013e-06, + "loss": 1.0195221900939941, + "step": 394 + }, + { + "epoch": 0.09106628242074928, + "grad_norm": 0.7999107012228945, + "learning_rate": 1.8156682027649767e-06, + "loss": 0.9874474406242371, + "step": 395 + }, + { + "epoch": 0.09129682997118156, + "grad_norm": 0.927208840166958, + "learning_rate": 1.8202764976958525e-06, + "loss": 1.1311742067337036, + "step": 396 + }, + { + "epoch": 0.09152737752161383, + "grad_norm": 0.9095065417039184, + "learning_rate": 1.8248847926267281e-06, + "loss": 1.1371029615402222, + "step": 397 + }, + { + "epoch": 0.09175792507204611, + "grad_norm": 0.8481304449628981, + "learning_rate": 1.8294930875576035e-06, + "loss": 1.0090055465698242, + "step": 398 + }, + { + "epoch": 0.09198847262247839, + "grad_norm": 0.8420788132547982, + "learning_rate": 1.8341013824884793e-06, + "loss": 1.07207190990448, + "step": 399 + }, + { + "epoch": 0.09221902017291066, + "grad_norm": 0.8184534103423728, + "learning_rate": 1.8387096774193547e-06, + "loss": 0.9810532331466675, + "step": 400 + }, + { + "epoch": 0.09244956772334294, + "grad_norm": 1.0411936731827351, + "learning_rate": 1.8433179723502303e-06, + "loss": 1.2664501667022705, + "step": 401 + }, + { + "epoch": 0.09268011527377522, + "grad_norm": 0.7852023561267767, + "learning_rate": 1.8479262672811061e-06, + "loss": 1.0095962285995483, + "step": 402 + }, + { + "epoch": 0.0929106628242075, + "grad_norm": 0.7857131234487584, + "learning_rate": 1.8525345622119815e-06, + "loss": 1.0042834281921387, + "step": 403 + }, + { + "epoch": 0.09314121037463977, + "grad_norm": 0.7709647586214176, + "learning_rate": 1.857142857142857e-06, + "loss": 0.8744128942489624, + "step": 404 + }, + { + "epoch": 0.09337175792507205, + "grad_norm": 0.8530952338978857, + "learning_rate": 1.8617511520737325e-06, + "loss": 1.0015833377838135, + "step": 405 + }, + { + "epoch": 0.09360230547550433, + "grad_norm": 0.7989612398012207, + "learning_rate": 1.8663594470046083e-06, + "loss": 1.0201606750488281, + "step": 406 + }, + { + "epoch": 0.0938328530259366, + "grad_norm": 1.043996819106173, + "learning_rate": 1.8709677419354837e-06, + "loss": 1.1015177965164185, + "step": 407 + }, + { + "epoch": 0.09406340057636887, + "grad_norm": 0.882516706195983, + "learning_rate": 1.8755760368663593e-06, + "loss": 1.091389775276184, + "step": 408 + }, + { + "epoch": 0.09429394812680116, + "grad_norm": 0.7840852185129056, + "learning_rate": 1.880184331797235e-06, + "loss": 0.8727986216545105, + "step": 409 + }, + { + "epoch": 0.09452449567723344, + "grad_norm": 0.8398591191752447, + "learning_rate": 1.8847926267281104e-06, + "loss": 0.9092183113098145, + "step": 410 + }, + { + "epoch": 0.0947550432276657, + "grad_norm": 0.8436593101983947, + "learning_rate": 1.889400921658986e-06, + "loss": 0.9508894085884094, + "step": 411 + }, + { + "epoch": 0.09498559077809798, + "grad_norm": 0.7678545338337761, + "learning_rate": 1.8940092165898616e-06, + "loss": 0.8827848434448242, + "step": 412 + }, + { + "epoch": 0.09521613832853026, + "grad_norm": 0.7648299956803372, + "learning_rate": 1.8986175115207372e-06, + "loss": 1.0385243892669678, + "step": 413 + }, + { + "epoch": 0.09544668587896253, + "grad_norm": 0.8242600559542741, + "learning_rate": 1.9032258064516128e-06, + "loss": 0.9325747489929199, + "step": 414 + }, + { + "epoch": 0.09567723342939481, + "grad_norm": 0.7843703854217207, + "learning_rate": 1.9078341013824884e-06, + "loss": 0.9631662964820862, + "step": 415 + }, + { + "epoch": 0.09590778097982709, + "grad_norm": 1.1262060148133348, + "learning_rate": 1.912442396313364e-06, + "loss": 1.1359961032867432, + "step": 416 + }, + { + "epoch": 0.09613832853025936, + "grad_norm": 0.8039492437719185, + "learning_rate": 1.9170506912442396e-06, + "loss": 0.9880660772323608, + "step": 417 + }, + { + "epoch": 0.09636887608069164, + "grad_norm": 0.8001061947110307, + "learning_rate": 1.921658986175115e-06, + "loss": 0.8506733179092407, + "step": 418 + }, + { + "epoch": 0.09659942363112392, + "grad_norm": 0.9791271445171249, + "learning_rate": 1.926267281105991e-06, + "loss": 1.0341942310333252, + "step": 419 + }, + { + "epoch": 0.0968299711815562, + "grad_norm": 0.7763561172041712, + "learning_rate": 1.930875576036866e-06, + "loss": 0.8546561002731323, + "step": 420 + }, + { + "epoch": 0.09706051873198847, + "grad_norm": 0.8024656403802054, + "learning_rate": 1.935483870967742e-06, + "loss": 0.9161783456802368, + "step": 421 + }, + { + "epoch": 0.09729106628242075, + "grad_norm": 0.9113560381281316, + "learning_rate": 1.9400921658986174e-06, + "loss": 1.015718698501587, + "step": 422 + }, + { + "epoch": 0.09752161383285303, + "grad_norm": 0.8303491458325148, + "learning_rate": 1.944700460829493e-06, + "loss": 0.9243098497390747, + "step": 423 + }, + { + "epoch": 0.0977521613832853, + "grad_norm": 0.8465800362864491, + "learning_rate": 1.9493087557603686e-06, + "loss": 0.8776401281356812, + "step": 424 + }, + { + "epoch": 0.09798270893371758, + "grad_norm": 0.7514444317981556, + "learning_rate": 1.953917050691244e-06, + "loss": 0.8500463962554932, + "step": 425 + }, + { + "epoch": 0.09821325648414986, + "grad_norm": 0.7938198109985205, + "learning_rate": 1.9585253456221198e-06, + "loss": 0.8716859817504883, + "step": 426 + }, + { + "epoch": 0.09844380403458214, + "grad_norm": 0.8017507552829808, + "learning_rate": 1.963133640552995e-06, + "loss": 0.9234505891799927, + "step": 427 + }, + { + "epoch": 0.0986743515850144, + "grad_norm": 0.7566678903766497, + "learning_rate": 1.967741935483871e-06, + "loss": 0.9051532745361328, + "step": 428 + }, + { + "epoch": 0.09890489913544669, + "grad_norm": 0.886957468759461, + "learning_rate": 1.9723502304147463e-06, + "loss": 0.8781849145889282, + "step": 429 + }, + { + "epoch": 0.09913544668587897, + "grad_norm": 0.7774885647358278, + "learning_rate": 1.976958525345622e-06, + "loss": 0.8522506952285767, + "step": 430 + }, + { + "epoch": 0.09936599423631123, + "grad_norm": 0.7871502402754743, + "learning_rate": 1.9815668202764975e-06, + "loss": 0.9448544979095459, + "step": 431 + }, + { + "epoch": 0.09959654178674351, + "grad_norm": 0.7617033311294167, + "learning_rate": 1.9861751152073733e-06, + "loss": 0.7997490763664246, + "step": 432 + }, + { + "epoch": 0.0998270893371758, + "grad_norm": 0.7915288270224765, + "learning_rate": 1.9907834101382487e-06, + "loss": 0.8592349290847778, + "step": 433 + }, + { + "epoch": 0.10005763688760808, + "grad_norm": 0.818004653923457, + "learning_rate": 1.995391705069124e-06, + "loss": 0.9532517194747925, + "step": 434 + }, + { + "epoch": 0.10028818443804034, + "grad_norm": 0.8041219998303657, + "learning_rate": 2e-06, + "loss": 0.873796820640564, + "step": 435 + }, + { + "epoch": 0.10051873198847262, + "grad_norm": 0.9934391344021535, + "learning_rate": 1.9999999273552013e-06, + "loss": 0.9449926614761353, + "step": 436 + }, + { + "epoch": 0.1007492795389049, + "grad_norm": 0.9938752545804322, + "learning_rate": 1.999999709420816e-06, + "loss": 0.988682746887207, + "step": 437 + }, + { + "epoch": 0.10097982708933717, + "grad_norm": 0.7475711832587217, + "learning_rate": 1.9999993461968757e-06, + "loss": 0.7442165017127991, + "step": 438 + }, + { + "epoch": 0.10121037463976945, + "grad_norm": 0.7463599495804439, + "learning_rate": 1.9999988376834334e-06, + "loss": 0.8737642168998718, + "step": 439 + }, + { + "epoch": 0.10144092219020173, + "grad_norm": 0.8576731282949522, + "learning_rate": 1.9999981838805625e-06, + "loss": 0.9158309698104858, + "step": 440 + }, + { + "epoch": 0.101671469740634, + "grad_norm": 0.8150704084388924, + "learning_rate": 1.9999973847883583e-06, + "loss": 0.867765486240387, + "step": 441 + }, + { + "epoch": 0.10190201729106628, + "grad_norm": 0.8553308555715755, + "learning_rate": 1.9999964404069368e-06, + "loss": 0.8964484930038452, + "step": 442 + }, + { + "epoch": 0.10213256484149856, + "grad_norm": 0.7705340065317197, + "learning_rate": 1.9999953507364356e-06, + "loss": 0.8330350518226624, + "step": 443 + }, + { + "epoch": 0.10236311239193084, + "grad_norm": 0.7066282569211535, + "learning_rate": 1.9999941157770124e-06, + "loss": 0.7785549163818359, + "step": 444 + }, + { + "epoch": 0.10259365994236311, + "grad_norm": 0.760446701085777, + "learning_rate": 1.999992735528847e-06, + "loss": 0.8079872131347656, + "step": 445 + }, + { + "epoch": 0.10282420749279539, + "grad_norm": 0.7523871144342295, + "learning_rate": 1.99999120999214e-06, + "loss": 0.8472942113876343, + "step": 446 + }, + { + "epoch": 0.10305475504322767, + "grad_norm": 0.7264431448160646, + "learning_rate": 1.9999895391671126e-06, + "loss": 0.8861861228942871, + "step": 447 + }, + { + "epoch": 0.10328530259365994, + "grad_norm": 0.8217421098006639, + "learning_rate": 1.999987723054008e-06, + "loss": 0.885787308216095, + "step": 448 + }, + { + "epoch": 0.10351585014409222, + "grad_norm": 0.7285720204415543, + "learning_rate": 1.9999857616530898e-06, + "loss": 0.6860470771789551, + "step": 449 + }, + { + "epoch": 0.1037463976945245, + "grad_norm": 0.8092634702328965, + "learning_rate": 1.999983654964643e-06, + "loss": 0.8421996235847473, + "step": 450 + }, + { + "epoch": 0.10397694524495678, + "grad_norm": 0.6918798585679139, + "learning_rate": 1.999981402988974e-06, + "loss": 0.7358509302139282, + "step": 451 + }, + { + "epoch": 0.10420749279538905, + "grad_norm": 0.8711876840819741, + "learning_rate": 1.99997900572641e-06, + "loss": 0.9160239100456238, + "step": 452 + }, + { + "epoch": 0.10443804034582133, + "grad_norm": 0.793739222544006, + "learning_rate": 1.9999764631772986e-06, + "loss": 0.8336344361305237, + "step": 453 + }, + { + "epoch": 0.1046685878962536, + "grad_norm": 0.7780053576863255, + "learning_rate": 1.999973775342009e-06, + "loss": 0.8190123438835144, + "step": 454 + }, + { + "epoch": 0.10489913544668587, + "grad_norm": 0.808952000995293, + "learning_rate": 1.9999709422209335e-06, + "loss": 0.8161033391952515, + "step": 455 + }, + { + "epoch": 0.10512968299711815, + "grad_norm": 0.8989947005366407, + "learning_rate": 1.999967963814482e-06, + "loss": 0.9421751499176025, + "step": 456 + }, + { + "epoch": 0.10536023054755043, + "grad_norm": 0.7968643165098177, + "learning_rate": 1.999964840123088e-06, + "loss": 0.763748288154602, + "step": 457 + }, + { + "epoch": 0.10559077809798272, + "grad_norm": 0.7481696350009063, + "learning_rate": 1.9999615711472054e-06, + "loss": 0.8567416667938232, + "step": 458 + }, + { + "epoch": 0.10582132564841498, + "grad_norm": 1.0087319259323613, + "learning_rate": 1.9999581568873087e-06, + "loss": 0.9419023394584656, + "step": 459 + }, + { + "epoch": 0.10605187319884726, + "grad_norm": 0.8346606599446912, + "learning_rate": 1.999954597343894e-06, + "loss": 0.9568943381309509, + "step": 460 + }, + { + "epoch": 0.10628242074927954, + "grad_norm": 0.6775195295071811, + "learning_rate": 1.9999508925174788e-06, + "loss": 0.7311264276504517, + "step": 461 + }, + { + "epoch": 0.10651296829971181, + "grad_norm": 0.7526753824189546, + "learning_rate": 1.999947042408601e-06, + "loss": 0.7850263118743896, + "step": 462 + }, + { + "epoch": 0.10674351585014409, + "grad_norm": 0.7457484944783759, + "learning_rate": 1.9999430470178204e-06, + "loss": 0.7745206356048584, + "step": 463 + }, + { + "epoch": 0.10697406340057637, + "grad_norm": 0.7003739754723187, + "learning_rate": 1.9999389063457173e-06, + "loss": 0.8353025913238525, + "step": 464 + }, + { + "epoch": 0.10720461095100864, + "grad_norm": 0.9101417777797033, + "learning_rate": 1.999934620392893e-06, + "loss": 0.897802472114563, + "step": 465 + }, + { + "epoch": 0.10743515850144092, + "grad_norm": 0.903369064301118, + "learning_rate": 1.999930189159971e-06, + "loss": 0.9702463746070862, + "step": 466 + }, + { + "epoch": 0.1076657060518732, + "grad_norm": 0.7170983912845004, + "learning_rate": 1.9999256126475942e-06, + "loss": 0.8617191314697266, + "step": 467 + }, + { + "epoch": 0.10789625360230548, + "grad_norm": 0.7306281027611775, + "learning_rate": 1.9999208908564277e-06, + "loss": 0.7819014191627502, + "step": 468 + }, + { + "epoch": 0.10812680115273775, + "grad_norm": 0.827677640050239, + "learning_rate": 1.9999160237871578e-06, + "loss": 0.8597872257232666, + "step": 469 + }, + { + "epoch": 0.10835734870317003, + "grad_norm": 0.8728777793339176, + "learning_rate": 1.9999110114404922e-06, + "loss": 0.7132382392883301, + "step": 470 + }, + { + "epoch": 0.10858789625360231, + "grad_norm": 0.7146110096996957, + "learning_rate": 1.9999058538171577e-06, + "loss": 0.8056540489196777, + "step": 471 + }, + { + "epoch": 0.10881844380403458, + "grad_norm": 0.7959224929560886, + "learning_rate": 1.999900550917905e-06, + "loss": 0.8378279209136963, + "step": 472 + }, + { + "epoch": 0.10904899135446686, + "grad_norm": 0.7547724153366271, + "learning_rate": 1.9998951027435034e-06, + "loss": 0.7748581767082214, + "step": 473 + }, + { + "epoch": 0.10927953890489914, + "grad_norm": 0.7391603156936171, + "learning_rate": 1.9998895092947455e-06, + "loss": 0.7897888422012329, + "step": 474 + }, + { + "epoch": 0.10951008645533142, + "grad_norm": 0.7396833498322501, + "learning_rate": 1.999883770572444e-06, + "loss": 0.8274221420288086, + "step": 475 + }, + { + "epoch": 0.10974063400576368, + "grad_norm": 0.7616365712426187, + "learning_rate": 1.9998778865774314e-06, + "loss": 0.8707382082939148, + "step": 476 + }, + { + "epoch": 0.10997118155619597, + "grad_norm": 0.7283542403436611, + "learning_rate": 1.9998718573105633e-06, + "loss": 0.7552956342697144, + "step": 477 + }, + { + "epoch": 0.11020172910662825, + "grad_norm": 0.6823864451295132, + "learning_rate": 1.9998656827727163e-06, + "loss": 0.7214533090591431, + "step": 478 + }, + { + "epoch": 0.11043227665706051, + "grad_norm": 0.763520371282763, + "learning_rate": 1.9998593629647873e-06, + "loss": 0.8750051856040955, + "step": 479 + }, + { + "epoch": 0.1106628242074928, + "grad_norm": 0.7178374692315476, + "learning_rate": 1.9998528978876937e-06, + "loss": 0.7860144376754761, + "step": 480 + }, + { + "epoch": 0.11089337175792507, + "grad_norm": 0.6859242313007144, + "learning_rate": 1.9998462875423753e-06, + "loss": 0.757607102394104, + "step": 481 + }, + { + "epoch": 0.11112391930835736, + "grad_norm": 0.6015793809430244, + "learning_rate": 1.9998395319297926e-06, + "loss": 0.7959357500076294, + "step": 482 + }, + { + "epoch": 0.11135446685878962, + "grad_norm": 0.6229506349949882, + "learning_rate": 1.9998326310509272e-06, + "loss": 0.7823261022567749, + "step": 483 + }, + { + "epoch": 0.1115850144092219, + "grad_norm": 0.7767261362242464, + "learning_rate": 1.999825584906781e-06, + "loss": 0.8102509379386902, + "step": 484 + }, + { + "epoch": 0.11181556195965418, + "grad_norm": 0.7984930890199812, + "learning_rate": 1.999818393498379e-06, + "loss": 0.8045464754104614, + "step": 485 + }, + { + "epoch": 0.11204610951008645, + "grad_norm": 0.7465165784036604, + "learning_rate": 1.999811056826765e-06, + "loss": 0.7793935537338257, + "step": 486 + }, + { + "epoch": 0.11227665706051873, + "grad_norm": 0.9275795908823249, + "learning_rate": 1.999803574893005e-06, + "loss": 0.8787537217140198, + "step": 487 + }, + { + "epoch": 0.11250720461095101, + "grad_norm": 0.6449054496936255, + "learning_rate": 1.9997959476981865e-06, + "loss": 0.7439980506896973, + "step": 488 + }, + { + "epoch": 0.11273775216138328, + "grad_norm": 0.7974021521444602, + "learning_rate": 1.999788175243418e-06, + "loss": 0.8799367547035217, + "step": 489 + }, + { + "epoch": 0.11296829971181556, + "grad_norm": 0.6347750262437738, + "learning_rate": 1.9997802575298277e-06, + "loss": 0.8095611929893494, + "step": 490 + }, + { + "epoch": 0.11319884726224784, + "grad_norm": 0.678068196789053, + "learning_rate": 1.9997721945585666e-06, + "loss": 0.7679798603057861, + "step": 491 + }, + { + "epoch": 0.11342939481268012, + "grad_norm": 0.9415597885304294, + "learning_rate": 1.999763986330806e-06, + "loss": 0.7401316165924072, + "step": 492 + }, + { + "epoch": 0.11365994236311239, + "grad_norm": 0.6821887571104429, + "learning_rate": 1.9997556328477384e-06, + "loss": 0.7155672311782837, + "step": 493 + }, + { + "epoch": 0.11389048991354467, + "grad_norm": 0.7229913098734038, + "learning_rate": 1.9997471341105782e-06, + "loss": 0.7619851231575012, + "step": 494 + }, + { + "epoch": 0.11412103746397695, + "grad_norm": 0.8032976234822868, + "learning_rate": 1.999738490120559e-06, + "loss": 0.8438892960548401, + "step": 495 + }, + { + "epoch": 0.11435158501440922, + "grad_norm": 0.9076868092366523, + "learning_rate": 1.999729700878937e-06, + "loss": 0.8797614574432373, + "step": 496 + }, + { + "epoch": 0.1145821325648415, + "grad_norm": 0.9089650759995708, + "learning_rate": 1.99972076638699e-06, + "loss": 0.8678663969039917, + "step": 497 + }, + { + "epoch": 0.11481268011527378, + "grad_norm": 0.8397478898714018, + "learning_rate": 1.9997116866460154e-06, + "loss": 0.8200712203979492, + "step": 498 + }, + { + "epoch": 0.11504322766570606, + "grad_norm": 0.8811759253610771, + "learning_rate": 1.9997024616573327e-06, + "loss": 0.8951148986816406, + "step": 499 + }, + { + "epoch": 0.11527377521613832, + "grad_norm": 0.7151742231494962, + "learning_rate": 1.9996930914222816e-06, + "loss": 0.7555750012397766, + "step": 500 + }, + { + "epoch": 0.1155043227665706, + "grad_norm": 0.8184974509806977, + "learning_rate": 1.9996835759422245e-06, + "loss": 0.7769593596458435, + "step": 501 + }, + { + "epoch": 0.11573487031700289, + "grad_norm": 1.0057466201597953, + "learning_rate": 1.999673915218543e-06, + "loss": 0.8568921685218811, + "step": 502 + }, + { + "epoch": 0.11596541786743515, + "grad_norm": 0.6854968317254936, + "learning_rate": 1.9996641092526405e-06, + "loss": 0.8469095826148987, + "step": 503 + }, + { + "epoch": 0.11619596541786743, + "grad_norm": 0.6147466487176672, + "learning_rate": 1.999654158045943e-06, + "loss": 0.6636455059051514, + "step": 504 + }, + { + "epoch": 0.11642651296829971, + "grad_norm": 0.7667665478590456, + "learning_rate": 1.9996440615998954e-06, + "loss": 0.7256879806518555, + "step": 505 + }, + { + "epoch": 0.116657060518732, + "grad_norm": 0.848039033522635, + "learning_rate": 1.9996338199159648e-06, + "loss": 0.755784273147583, + "step": 506 + }, + { + "epoch": 0.11688760806916426, + "grad_norm": 0.7864448678312735, + "learning_rate": 1.9996234329956387e-06, + "loss": 0.8655821084976196, + "step": 507 + }, + { + "epoch": 0.11711815561959654, + "grad_norm": 0.764272109359362, + "learning_rate": 1.9996129008404266e-06, + "loss": 0.8127482533454895, + "step": 508 + }, + { + "epoch": 0.11734870317002882, + "grad_norm": 0.6826490621044304, + "learning_rate": 1.999602223451859e-06, + "loss": 0.7233914136886597, + "step": 509 + }, + { + "epoch": 0.11757925072046109, + "grad_norm": 0.6440562693914861, + "learning_rate": 1.999591400831487e-06, + "loss": 0.687708854675293, + "step": 510 + }, + { + "epoch": 0.11780979827089337, + "grad_norm": 1.0116475587467835, + "learning_rate": 1.9995804329808833e-06, + "loss": 0.8101118803024292, + "step": 511 + }, + { + "epoch": 0.11804034582132565, + "grad_norm": 0.7641615867983651, + "learning_rate": 1.999569319901641e-06, + "loss": 0.8226180672645569, + "step": 512 + }, + { + "epoch": 0.11827089337175793, + "grad_norm": 0.7622277313388016, + "learning_rate": 1.9995580615953745e-06, + "loss": 0.7804063558578491, + "step": 513 + }, + { + "epoch": 0.1185014409221902, + "grad_norm": 0.7501641804711608, + "learning_rate": 1.9995466580637203e-06, + "loss": 0.8480167388916016, + "step": 514 + }, + { + "epoch": 0.11873198847262248, + "grad_norm": 0.7373769561784406, + "learning_rate": 1.9995351093083342e-06, + "loss": 0.7421882152557373, + "step": 515 + }, + { + "epoch": 0.11896253602305476, + "grad_norm": 0.792331723623861, + "learning_rate": 1.999523415330895e-06, + "loss": 0.7909554839134216, + "step": 516 + }, + { + "epoch": 0.11919308357348703, + "grad_norm": 0.5843007809679732, + "learning_rate": 1.999511576133101e-06, + "loss": 0.7070448398590088, + "step": 517 + }, + { + "epoch": 0.11942363112391931, + "grad_norm": 0.6862834382234418, + "learning_rate": 1.9994995917166733e-06, + "loss": 0.7208311557769775, + "step": 518 + }, + { + "epoch": 0.11965417867435159, + "grad_norm": 0.8009340684054793, + "learning_rate": 1.9994874620833524e-06, + "loss": 0.7771036624908447, + "step": 519 + }, + { + "epoch": 0.11988472622478386, + "grad_norm": 0.627959573002497, + "learning_rate": 1.999475187234901e-06, + "loss": 0.7144759297370911, + "step": 520 + }, + { + "epoch": 0.12011527377521614, + "grad_norm": 0.6879525250480599, + "learning_rate": 1.9994627671731016e-06, + "loss": 0.7363512516021729, + "step": 521 + }, + { + "epoch": 0.12034582132564842, + "grad_norm": 0.7683035087809761, + "learning_rate": 1.9994502018997592e-06, + "loss": 0.7519102096557617, + "step": 522 + }, + { + "epoch": 0.1205763688760807, + "grad_norm": 0.7758582460040493, + "learning_rate": 1.9994374914167e-06, + "loss": 0.7713091373443604, + "step": 523 + }, + { + "epoch": 0.12080691642651296, + "grad_norm": 0.7495321802737405, + "learning_rate": 1.9994246357257704e-06, + "loss": 0.8354437351226807, + "step": 524 + }, + { + "epoch": 0.12103746397694524, + "grad_norm": 0.6278302590975512, + "learning_rate": 1.9994116348288378e-06, + "loss": 0.6978895664215088, + "step": 525 + }, + { + "epoch": 0.12126801152737753, + "grad_norm": 0.7128547735064281, + "learning_rate": 1.9993984887277913e-06, + "loss": 0.7193020582199097, + "step": 526 + }, + { + "epoch": 0.12149855907780979, + "grad_norm": 0.8734535525152816, + "learning_rate": 1.999385197424541e-06, + "loss": 0.891850471496582, + "step": 527 + }, + { + "epoch": 0.12172910662824207, + "grad_norm": 0.661834209816186, + "learning_rate": 1.999371760921018e-06, + "loss": 0.7829855680465698, + "step": 528 + }, + { + "epoch": 0.12195965417867435, + "grad_norm": 0.9881520932349097, + "learning_rate": 1.999358179219174e-06, + "loss": 0.7942383289337158, + "step": 529 + }, + { + "epoch": 0.12219020172910663, + "grad_norm": 0.8322324544338607, + "learning_rate": 1.9993444523209827e-06, + "loss": 0.7958294153213501, + "step": 530 + }, + { + "epoch": 0.1224207492795389, + "grad_norm": 0.5981241873885679, + "learning_rate": 1.9993305802284385e-06, + "loss": 0.6725438833236694, + "step": 531 + }, + { + "epoch": 0.12265129682997118, + "grad_norm": 0.7477005083386355, + "learning_rate": 1.9993165629435572e-06, + "loss": 0.7068517208099365, + "step": 532 + }, + { + "epoch": 0.12288184438040346, + "grad_norm": 0.6787076143899758, + "learning_rate": 1.999302400468375e-06, + "loss": 0.7997239828109741, + "step": 533 + }, + { + "epoch": 0.12311239193083573, + "grad_norm": 0.7084903189288138, + "learning_rate": 1.999288092804949e-06, + "loss": 0.7294374704360962, + "step": 534 + }, + { + "epoch": 0.12334293948126801, + "grad_norm": 0.6987144343357352, + "learning_rate": 1.999273639955359e-06, + "loss": 0.7457236051559448, + "step": 535 + }, + { + "epoch": 0.12357348703170029, + "grad_norm": 0.6724082603264683, + "learning_rate": 1.999259041921704e-06, + "loss": 0.7387409210205078, + "step": 536 + }, + { + "epoch": 0.12380403458213257, + "grad_norm": 0.6898762574950271, + "learning_rate": 1.9992442987061055e-06, + "loss": 0.7364134192466736, + "step": 537 + }, + { + "epoch": 0.12403458213256484, + "grad_norm": 0.6521987981633685, + "learning_rate": 1.9992294103107053e-06, + "loss": 0.7248586416244507, + "step": 538 + }, + { + "epoch": 0.12426512968299712, + "grad_norm": 0.7533468153268684, + "learning_rate": 1.9992143767376665e-06, + "loss": 0.7337394952774048, + "step": 539 + }, + { + "epoch": 0.1244956772334294, + "grad_norm": 0.7417989879116603, + "learning_rate": 1.9991991979891738e-06, + "loss": 0.7718614935874939, + "step": 540 + }, + { + "epoch": 0.12472622478386167, + "grad_norm": 0.6895167987027419, + "learning_rate": 1.9991838740674315e-06, + "loss": 0.7660986185073853, + "step": 541 + }, + { + "epoch": 0.12495677233429395, + "grad_norm": 0.6217984048756272, + "learning_rate": 1.999168404974667e-06, + "loss": 0.6900891065597534, + "step": 542 + }, + { + "epoch": 0.12518731988472623, + "grad_norm": 0.7509984015500013, + "learning_rate": 1.999152790713127e-06, + "loss": 0.7981909513473511, + "step": 543 + }, + { + "epoch": 0.1254178674351585, + "grad_norm": 0.7877345810756031, + "learning_rate": 1.999137031285081e-06, + "loss": 0.7916548848152161, + "step": 544 + }, + { + "epoch": 0.1256484149855908, + "grad_norm": 0.9082774378814212, + "learning_rate": 1.9991211266928177e-06, + "loss": 0.844967246055603, + "step": 545 + }, + { + "epoch": 0.12587896253602304, + "grad_norm": 0.9213658517355716, + "learning_rate": 1.9991050769386483e-06, + "loss": 0.7453466653823853, + "step": 546 + }, + { + "epoch": 0.12610951008645532, + "grad_norm": 0.698098221320302, + "learning_rate": 1.999088882024905e-06, + "loss": 0.8082910776138306, + "step": 547 + }, + { + "epoch": 0.1263400576368876, + "grad_norm": 0.6335333912387642, + "learning_rate": 1.9990725419539407e-06, + "loss": 0.6806755065917969, + "step": 548 + }, + { + "epoch": 0.12657060518731988, + "grad_norm": 0.7003994944861476, + "learning_rate": 1.999056056728129e-06, + "loss": 0.7786964774131775, + "step": 549 + }, + { + "epoch": 0.12680115273775217, + "grad_norm": 0.7647676187763052, + "learning_rate": 1.9990394263498648e-06, + "loss": 0.7257200479507446, + "step": 550 + }, + { + "epoch": 0.12703170028818445, + "grad_norm": 0.7175599687438361, + "learning_rate": 1.9990226508215653e-06, + "loss": 0.8026515245437622, + "step": 551 + }, + { + "epoch": 0.12726224783861673, + "grad_norm": 0.7080335765217586, + "learning_rate": 1.999005730145667e-06, + "loss": 0.7155156135559082, + "step": 552 + }, + { + "epoch": 0.12749279538904898, + "grad_norm": 0.6991736303344278, + "learning_rate": 1.9989886643246286e-06, + "loss": 0.7604411244392395, + "step": 553 + }, + { + "epoch": 0.12772334293948126, + "grad_norm": 0.8995696774069738, + "learning_rate": 1.9989714533609296e-06, + "loss": 0.7291309237480164, + "step": 554 + }, + { + "epoch": 0.12795389048991354, + "grad_norm": 0.9961871820049175, + "learning_rate": 1.9989540972570703e-06, + "loss": 0.7417641878128052, + "step": 555 + }, + { + "epoch": 0.12818443804034582, + "grad_norm": 1.0793796541266174, + "learning_rate": 1.998936596015573e-06, + "loss": 0.866726279258728, + "step": 556 + }, + { + "epoch": 0.1284149855907781, + "grad_norm": 0.7057517655951833, + "learning_rate": 1.9989189496389797e-06, + "loss": 0.7615523338317871, + "step": 557 + }, + { + "epoch": 0.12864553314121038, + "grad_norm": 0.7155888618962294, + "learning_rate": 1.9989011581298546e-06, + "loss": 0.7050062417984009, + "step": 558 + }, + { + "epoch": 0.12887608069164266, + "grad_norm": 0.6853891634641955, + "learning_rate": 1.9988832214907824e-06, + "loss": 0.6849932074546814, + "step": 559 + }, + { + "epoch": 0.12910662824207492, + "grad_norm": 0.6575409961311479, + "learning_rate": 1.9988651397243698e-06, + "loss": 0.674652099609375, + "step": 560 + }, + { + "epoch": 0.1293371757925072, + "grad_norm": 0.754705339146669, + "learning_rate": 1.998846912833243e-06, + "loss": 0.7126532196998596, + "step": 561 + }, + { + "epoch": 0.12956772334293948, + "grad_norm": 0.6947105773602015, + "learning_rate": 1.9988285408200503e-06, + "loss": 0.7211127281188965, + "step": 562 + }, + { + "epoch": 0.12979827089337176, + "grad_norm": 0.863971231352938, + "learning_rate": 1.998810023687462e-06, + "loss": 0.8206064701080322, + "step": 563 + }, + { + "epoch": 0.13002881844380404, + "grad_norm": 0.7850413983525022, + "learning_rate": 1.998791361438167e-06, + "loss": 0.6803351640701294, + "step": 564 + }, + { + "epoch": 0.13025936599423632, + "grad_norm": 0.6937411478964312, + "learning_rate": 1.9987725540748777e-06, + "loss": 0.700650155544281, + "step": 565 + }, + { + "epoch": 0.1304899135446686, + "grad_norm": 0.9061416401080528, + "learning_rate": 1.9987536016003265e-06, + "loss": 0.6874483823776245, + "step": 566 + }, + { + "epoch": 0.13072046109510085, + "grad_norm": 0.8317311817071676, + "learning_rate": 1.9987345040172666e-06, + "loss": 0.8148888349533081, + "step": 567 + }, + { + "epoch": 0.13095100864553313, + "grad_norm": 0.6811216751990012, + "learning_rate": 1.998715261328473e-06, + "loss": 0.6662083864212036, + "step": 568 + }, + { + "epoch": 0.13118155619596542, + "grad_norm": 0.7320582363601624, + "learning_rate": 1.9986958735367413e-06, + "loss": 0.7676659822463989, + "step": 569 + }, + { + "epoch": 0.1314121037463977, + "grad_norm": 0.8307608690655258, + "learning_rate": 1.9986763406448883e-06, + "loss": 0.7907297015190125, + "step": 570 + }, + { + "epoch": 0.13164265129682998, + "grad_norm": 0.6988345305832468, + "learning_rate": 1.998656662655752e-06, + "loss": 0.6941409111022949, + "step": 571 + }, + { + "epoch": 0.13187319884726226, + "grad_norm": 0.7762530825788114, + "learning_rate": 1.9986368395721916e-06, + "loss": 0.674893319606781, + "step": 572 + }, + { + "epoch": 0.13210374639769454, + "grad_norm": 0.655381279738601, + "learning_rate": 1.9986168713970866e-06, + "loss": 0.6929521560668945, + "step": 573 + }, + { + "epoch": 0.1323342939481268, + "grad_norm": 0.8094828471006544, + "learning_rate": 1.998596758133339e-06, + "loss": 0.7927180528640747, + "step": 574 + }, + { + "epoch": 0.13256484149855907, + "grad_norm": 0.9363455004086035, + "learning_rate": 1.9985764997838708e-06, + "loss": 0.8200454711914062, + "step": 575 + }, + { + "epoch": 0.13279538904899135, + "grad_norm": 0.9063897496222706, + "learning_rate": 1.9985560963516248e-06, + "loss": 0.7749574184417725, + "step": 576 + }, + { + "epoch": 0.13302593659942363, + "grad_norm": 0.7429668697491804, + "learning_rate": 1.998535547839566e-06, + "loss": 0.7536444664001465, + "step": 577 + }, + { + "epoch": 0.1332564841498559, + "grad_norm": 0.6697558499328553, + "learning_rate": 1.9985148542506797e-06, + "loss": 0.5805482864379883, + "step": 578 + }, + { + "epoch": 0.1334870317002882, + "grad_norm": 0.7029078961615552, + "learning_rate": 1.998494015587972e-06, + "loss": 0.8079668283462524, + "step": 579 + }, + { + "epoch": 0.13371757925072045, + "grad_norm": 0.7012305802566163, + "learning_rate": 1.9984730318544713e-06, + "loss": 0.6749714016914368, + "step": 580 + }, + { + "epoch": 0.13394812680115273, + "grad_norm": 0.7581464486907759, + "learning_rate": 1.998451903053226e-06, + "loss": 0.7245970368385315, + "step": 581 + }, + { + "epoch": 0.134178674351585, + "grad_norm": 0.9045791499382148, + "learning_rate": 1.9984306291873055e-06, + "loss": 0.7243727445602417, + "step": 582 + }, + { + "epoch": 0.1344092219020173, + "grad_norm": 1.2504748978932971, + "learning_rate": 1.9984092102598015e-06, + "loss": 0.80174720287323, + "step": 583 + }, + { + "epoch": 0.13463976945244957, + "grad_norm": 0.7662030678381475, + "learning_rate": 1.9983876462738255e-06, + "loss": 0.7339394688606262, + "step": 584 + }, + { + "epoch": 0.13487031700288185, + "grad_norm": 0.8389078489910984, + "learning_rate": 1.9983659372325103e-06, + "loss": 0.785433292388916, + "step": 585 + }, + { + "epoch": 0.13510086455331413, + "grad_norm": 0.8035269095479699, + "learning_rate": 1.9983440831390103e-06, + "loss": 0.7554272413253784, + "step": 586 + }, + { + "epoch": 0.13533141210374638, + "grad_norm": 0.7515708504793569, + "learning_rate": 1.9983220839965005e-06, + "loss": 0.6374361515045166, + "step": 587 + }, + { + "epoch": 0.13556195965417867, + "grad_norm": 0.7429526116241276, + "learning_rate": 1.9982999398081773e-06, + "loss": 0.7672165632247925, + "step": 588 + }, + { + "epoch": 0.13579250720461095, + "grad_norm": 0.6689968310029749, + "learning_rate": 1.998277650577258e-06, + "loss": 0.6684931516647339, + "step": 589 + }, + { + "epoch": 0.13602305475504323, + "grad_norm": 0.7452264804377413, + "learning_rate": 1.998255216306981e-06, + "loss": 0.7915084362030029, + "step": 590 + }, + { + "epoch": 0.1362536023054755, + "grad_norm": 1.4052641353457251, + "learning_rate": 1.9982326370006055e-06, + "loss": 0.7956736087799072, + "step": 591 + }, + { + "epoch": 0.1364841498559078, + "grad_norm": 0.6473849699527142, + "learning_rate": 1.998209912661412e-06, + "loss": 0.6230663061141968, + "step": 592 + }, + { + "epoch": 0.13671469740634007, + "grad_norm": 0.733416699363805, + "learning_rate": 1.998187043292703e-06, + "loss": 0.6164396405220032, + "step": 593 + }, + { + "epoch": 0.13694524495677232, + "grad_norm": 1.02852211395565, + "learning_rate": 1.9981640288978004e-06, + "loss": 0.7923756837844849, + "step": 594 + }, + { + "epoch": 0.1371757925072046, + "grad_norm": 0.7916283439941715, + "learning_rate": 1.9981408694800478e-06, + "loss": 0.752497673034668, + "step": 595 + }, + { + "epoch": 0.13740634005763688, + "grad_norm": 0.8631260691956123, + "learning_rate": 1.998117565042811e-06, + "loss": 0.7453225255012512, + "step": 596 + }, + { + "epoch": 0.13763688760806916, + "grad_norm": 0.8745179307249391, + "learning_rate": 1.9980941155894743e-06, + "loss": 0.7739442586898804, + "step": 597 + }, + { + "epoch": 0.13786743515850144, + "grad_norm": 0.7824694345670692, + "learning_rate": 1.998070521123446e-06, + "loss": 0.7126696109771729, + "step": 598 + }, + { + "epoch": 0.13809798270893373, + "grad_norm": 0.6923320348843225, + "learning_rate": 1.998046781648154e-06, + "loss": 0.8027236461639404, + "step": 599 + }, + { + "epoch": 0.138328530259366, + "grad_norm": 0.926925177155617, + "learning_rate": 1.9980228971670465e-06, + "loss": 0.7869859933853149, + "step": 600 + }, + { + "epoch": 0.13855907780979826, + "grad_norm": 0.7384657646970024, + "learning_rate": 1.9979988676835945e-06, + "loss": 0.6863809823989868, + "step": 601 + }, + { + "epoch": 0.13878962536023054, + "grad_norm": 0.670320439572825, + "learning_rate": 1.9979746932012887e-06, + "loss": 0.6836833953857422, + "step": 602 + }, + { + "epoch": 0.13902017291066282, + "grad_norm": 0.8330589554433049, + "learning_rate": 1.997950373723642e-06, + "loss": 0.6044712662696838, + "step": 603 + }, + { + "epoch": 0.1392507204610951, + "grad_norm": 0.7807440800239879, + "learning_rate": 1.9979259092541876e-06, + "loss": 0.704325795173645, + "step": 604 + }, + { + "epoch": 0.13948126801152738, + "grad_norm": 0.9973562962562118, + "learning_rate": 1.9979012997964796e-06, + "loss": 0.8456264734268188, + "step": 605 + }, + { + "epoch": 0.13971181556195966, + "grad_norm": 0.6848983372751881, + "learning_rate": 1.997876545354094e-06, + "loss": 0.7414695620536804, + "step": 606 + }, + { + "epoch": 0.13994236311239194, + "grad_norm": 0.951142668255918, + "learning_rate": 1.997851645930627e-06, + "loss": 0.8303127288818359, + "step": 607 + }, + { + "epoch": 0.1401729106628242, + "grad_norm": 0.6104498978549777, + "learning_rate": 1.997826601529696e-06, + "loss": 0.5950440168380737, + "step": 608 + }, + { + "epoch": 0.14040345821325648, + "grad_norm": 0.7575557670877245, + "learning_rate": 1.9978014121549403e-06, + "loss": 0.7623804807662964, + "step": 609 + }, + { + "epoch": 0.14063400576368876, + "grad_norm": 0.7818079152915381, + "learning_rate": 1.9977760778100194e-06, + "loss": 0.720527172088623, + "step": 610 + }, + { + "epoch": 0.14086455331412104, + "grad_norm": 0.7351247394083988, + "learning_rate": 1.9977505984986135e-06, + "loss": 0.7490646839141846, + "step": 611 + }, + { + "epoch": 0.14109510086455332, + "grad_norm": 0.8740889756756283, + "learning_rate": 1.9977249742244253e-06, + "loss": 0.7666628956794739, + "step": 612 + }, + { + "epoch": 0.1413256484149856, + "grad_norm": 0.8037463276401666, + "learning_rate": 1.9976992049911777e-06, + "loss": 0.6825721263885498, + "step": 613 + }, + { + "epoch": 0.14155619596541788, + "grad_norm": 0.9576981414718043, + "learning_rate": 1.997673290802614e-06, + "loss": 0.7550063729286194, + "step": 614 + }, + { + "epoch": 0.14178674351585013, + "grad_norm": 0.8551111061457136, + "learning_rate": 1.9976472316625005e-06, + "loss": 0.687241792678833, + "step": 615 + }, + { + "epoch": 0.14201729106628241, + "grad_norm": 0.8571830506090616, + "learning_rate": 1.9976210275746215e-06, + "loss": 0.7312265634536743, + "step": 616 + }, + { + "epoch": 0.1422478386167147, + "grad_norm": 0.767467152870913, + "learning_rate": 1.997594678542786e-06, + "loss": 0.7246025204658508, + "step": 617 + }, + { + "epoch": 0.14247838616714698, + "grad_norm": 0.6447556085599853, + "learning_rate": 1.9975681845708214e-06, + "loss": 0.6550637483596802, + "step": 618 + }, + { + "epoch": 0.14270893371757926, + "grad_norm": 0.7539334884870844, + "learning_rate": 1.997541545662577e-06, + "loss": 0.7373122572898865, + "step": 619 + }, + { + "epoch": 0.14293948126801154, + "grad_norm": 0.8546078077593016, + "learning_rate": 1.997514761821923e-06, + "loss": 0.8237804770469666, + "step": 620 + }, + { + "epoch": 0.14317002881844382, + "grad_norm": 0.8108598991328027, + "learning_rate": 1.9974878330527517e-06, + "loss": 0.7576577663421631, + "step": 621 + }, + { + "epoch": 0.14340057636887607, + "grad_norm": 0.9132866549115137, + "learning_rate": 1.9974607593589747e-06, + "loss": 0.7277255058288574, + "step": 622 + }, + { + "epoch": 0.14363112391930835, + "grad_norm": 0.8415081855318006, + "learning_rate": 1.9974335407445253e-06, + "loss": 0.6797576546669006, + "step": 623 + }, + { + "epoch": 0.14386167146974063, + "grad_norm": 0.8337435130468953, + "learning_rate": 1.9974061772133587e-06, + "loss": 0.7779988050460815, + "step": 624 + }, + { + "epoch": 0.1440922190201729, + "grad_norm": 0.8116008410687129, + "learning_rate": 1.99737866876945e-06, + "loss": 0.663394570350647, + "step": 625 + }, + { + "epoch": 0.1443227665706052, + "grad_norm": 0.6909233474214471, + "learning_rate": 1.9973510154167974e-06, + "loss": 0.6705121397972107, + "step": 626 + }, + { + "epoch": 0.14455331412103747, + "grad_norm": 0.6711778054860513, + "learning_rate": 1.9973232171594164e-06, + "loss": 0.7186808586120605, + "step": 627 + }, + { + "epoch": 0.14478386167146973, + "grad_norm": 0.7551679221380408, + "learning_rate": 1.997295274001347e-06, + "loss": 0.6327730417251587, + "step": 628 + }, + { + "epoch": 0.145014409221902, + "grad_norm": 0.9897011229130392, + "learning_rate": 1.9972671859466493e-06, + "loss": 0.8166565895080566, + "step": 629 + }, + { + "epoch": 0.1452449567723343, + "grad_norm": 0.7048630892808823, + "learning_rate": 1.997238952999404e-06, + "loss": 0.6133385896682739, + "step": 630 + }, + { + "epoch": 0.14547550432276657, + "grad_norm": 0.7829850836217312, + "learning_rate": 1.9972105751637125e-06, + "loss": 0.6579192876815796, + "step": 631 + }, + { + "epoch": 0.14570605187319885, + "grad_norm": 0.9330944891206956, + "learning_rate": 1.9971820524436985e-06, + "loss": 0.6703581809997559, + "step": 632 + }, + { + "epoch": 0.14593659942363113, + "grad_norm": 1.0013138431798316, + "learning_rate": 1.9971533848435055e-06, + "loss": 0.8173651695251465, + "step": 633 + }, + { + "epoch": 0.1461671469740634, + "grad_norm": 0.8881150759844724, + "learning_rate": 1.997124572367299e-06, + "loss": 0.7017172574996948, + "step": 634 + }, + { + "epoch": 0.14639769452449566, + "grad_norm": 0.882826859802223, + "learning_rate": 1.997095615019265e-06, + "loss": 0.6777410507202148, + "step": 635 + }, + { + "epoch": 0.14662824207492794, + "grad_norm": 0.8891372191078558, + "learning_rate": 1.9970665128036106e-06, + "loss": 0.8573586344718933, + "step": 636 + }, + { + "epoch": 0.14685878962536023, + "grad_norm": 0.7356113695826332, + "learning_rate": 1.9970372657245643e-06, + "loss": 0.6344877481460571, + "step": 637 + }, + { + "epoch": 0.1470893371757925, + "grad_norm": 0.9791168675445887, + "learning_rate": 1.997007873786375e-06, + "loss": 0.6762720942497253, + "step": 638 + }, + { + "epoch": 0.1473198847262248, + "grad_norm": 0.800842104744362, + "learning_rate": 1.996978336993314e-06, + "loss": 0.7008575201034546, + "step": 639 + }, + { + "epoch": 0.14755043227665707, + "grad_norm": 0.8741772887346724, + "learning_rate": 1.9969486553496716e-06, + "loss": 0.6853412985801697, + "step": 640 + }, + { + "epoch": 0.14778097982708935, + "grad_norm": 1.0370740005159675, + "learning_rate": 1.9969188288597605e-06, + "loss": 0.7110375165939331, + "step": 641 + }, + { + "epoch": 0.1480115273775216, + "grad_norm": 1.0581908050178295, + "learning_rate": 1.996888857527914e-06, + "loss": 0.6819010972976685, + "step": 642 + }, + { + "epoch": 0.14824207492795388, + "grad_norm": 0.8772886208417229, + "learning_rate": 1.9968587413584873e-06, + "loss": 0.7901614308357239, + "step": 643 + }, + { + "epoch": 0.14847262247838616, + "grad_norm": 0.745175076561396, + "learning_rate": 1.9968284803558555e-06, + "loss": 0.7313079833984375, + "step": 644 + }, + { + "epoch": 0.14870317002881844, + "grad_norm": 0.8465608138783899, + "learning_rate": 1.9967980745244156e-06, + "loss": 0.6434149742126465, + "step": 645 + }, + { + "epoch": 0.14893371757925072, + "grad_norm": 0.8410482692497646, + "learning_rate": 1.996767523868585e-06, + "loss": 0.790850818157196, + "step": 646 + }, + { + "epoch": 0.149164265129683, + "grad_norm": 0.8004008762577605, + "learning_rate": 1.9967368283928023e-06, + "loss": 0.8198965191841125, + "step": 647 + }, + { + "epoch": 0.14939481268011529, + "grad_norm": 0.8482890907318102, + "learning_rate": 1.9967059881015266e-06, + "loss": 0.6279035806655884, + "step": 648 + }, + { + "epoch": 0.14962536023054754, + "grad_norm": 0.8852994503658375, + "learning_rate": 1.99667500299924e-06, + "loss": 0.6705282926559448, + "step": 649 + }, + { + "epoch": 0.14985590778097982, + "grad_norm": 0.978665350202633, + "learning_rate": 1.9966438730904435e-06, + "loss": 0.6400725841522217, + "step": 650 + }, + { + "epoch": 0.1500864553314121, + "grad_norm": 0.714749094089356, + "learning_rate": 1.9966125983796603e-06, + "loss": 0.6101740002632141, + "step": 651 + }, + { + "epoch": 0.15031700288184438, + "grad_norm": 0.7346583641916052, + "learning_rate": 1.996581178871434e-06, + "loss": 0.6152533292770386, + "step": 652 + }, + { + "epoch": 0.15054755043227666, + "grad_norm": 0.8353434405574146, + "learning_rate": 1.9965496145703294e-06, + "loss": 0.6547197103500366, + "step": 653 + }, + { + "epoch": 0.15077809798270894, + "grad_norm": 0.9331778145980045, + "learning_rate": 1.996517905480933e-06, + "loss": 0.6906094551086426, + "step": 654 + }, + { + "epoch": 0.15100864553314122, + "grad_norm": 0.8380037292342015, + "learning_rate": 1.9964860516078514e-06, + "loss": 0.6932255625724792, + "step": 655 + }, + { + "epoch": 0.15123919308357348, + "grad_norm": 0.7361090355434313, + "learning_rate": 1.9964540529557124e-06, + "loss": 0.7039364576339722, + "step": 656 + }, + { + "epoch": 0.15146974063400576, + "grad_norm": 0.8241985864072432, + "learning_rate": 1.996421909529166e-06, + "loss": 0.6963860392570496, + "step": 657 + }, + { + "epoch": 0.15170028818443804, + "grad_norm": 0.9014935244681216, + "learning_rate": 1.9963896213328814e-06, + "loss": 0.604318380355835, + "step": 658 + }, + { + "epoch": 0.15193083573487032, + "grad_norm": 0.8819571142719487, + "learning_rate": 1.99635718837155e-06, + "loss": 0.6434466242790222, + "step": 659 + }, + { + "epoch": 0.1521613832853026, + "grad_norm": 0.9264914863210155, + "learning_rate": 1.9963246106498843e-06, + "loss": 0.7714329957962036, + "step": 660 + }, + { + "epoch": 0.15239193083573488, + "grad_norm": 0.8106754232828391, + "learning_rate": 1.996291888172617e-06, + "loss": 0.6435364484786987, + "step": 661 + }, + { + "epoch": 0.15262247838616716, + "grad_norm": 0.7565008247244859, + "learning_rate": 1.9962590209445026e-06, + "loss": 0.646420955657959, + "step": 662 + }, + { + "epoch": 0.1528530259365994, + "grad_norm": 0.7252882219501727, + "learning_rate": 1.9962260089703164e-06, + "loss": 0.662223219871521, + "step": 663 + }, + { + "epoch": 0.1530835734870317, + "grad_norm": 0.8659668858040968, + "learning_rate": 1.9961928522548544e-06, + "loss": 0.7200876474380493, + "step": 664 + }, + { + "epoch": 0.15331412103746397, + "grad_norm": 0.7566658933179319, + "learning_rate": 1.9961595508029344e-06, + "loss": 0.6113970875740051, + "step": 665 + }, + { + "epoch": 0.15354466858789625, + "grad_norm": 0.8245060640492226, + "learning_rate": 1.9961261046193946e-06, + "loss": 0.649490237236023, + "step": 666 + }, + { + "epoch": 0.15377521613832854, + "grad_norm": 0.7578040413722006, + "learning_rate": 1.996092513709094e-06, + "loss": 0.6530452370643616, + "step": 667 + }, + { + "epoch": 0.15400576368876082, + "grad_norm": 0.7405695664138088, + "learning_rate": 1.9960587780769136e-06, + "loss": 0.7147825360298157, + "step": 668 + }, + { + "epoch": 0.1542363112391931, + "grad_norm": 0.8315791587831606, + "learning_rate": 1.9960248977277546e-06, + "loss": 0.6634937524795532, + "step": 669 + }, + { + "epoch": 0.15446685878962535, + "grad_norm": 0.6638255750523387, + "learning_rate": 1.995990872666539e-06, + "loss": 0.5835539102554321, + "step": 670 + }, + { + "epoch": 0.15469740634005763, + "grad_norm": 0.9977800574356578, + "learning_rate": 1.9959567028982106e-06, + "loss": 0.6927201151847839, + "step": 671 + }, + { + "epoch": 0.1549279538904899, + "grad_norm": 0.8488790358421198, + "learning_rate": 1.9959223884277344e-06, + "loss": 0.6332941651344299, + "step": 672 + }, + { + "epoch": 0.1551585014409222, + "grad_norm": 0.7345490192229425, + "learning_rate": 1.995887929260096e-06, + "loss": 0.6358315944671631, + "step": 673 + }, + { + "epoch": 0.15538904899135447, + "grad_norm": 0.8810198063378296, + "learning_rate": 1.9958533254003004e-06, + "loss": 0.7304986715316772, + "step": 674 + }, + { + "epoch": 0.15561959654178675, + "grad_norm": 0.9410490987859614, + "learning_rate": 1.995818576853377e-06, + "loss": 0.6921132802963257, + "step": 675 + }, + { + "epoch": 0.15585014409221903, + "grad_norm": 0.8065213934569144, + "learning_rate": 1.995783683624373e-06, + "loss": 0.5625938177108765, + "step": 676 + }, + { + "epoch": 0.1560806916426513, + "grad_norm": 0.7157890295181991, + "learning_rate": 1.9957486457183593e-06, + "loss": 0.6823471188545227, + "step": 677 + }, + { + "epoch": 0.15631123919308357, + "grad_norm": 0.8329002490021682, + "learning_rate": 1.995713463140426e-06, + "loss": 0.645065426826477, + "step": 678 + }, + { + "epoch": 0.15654178674351585, + "grad_norm": 0.8327299093562661, + "learning_rate": 1.9956781358956846e-06, + "loss": 0.615644633769989, + "step": 679 + }, + { + "epoch": 0.15677233429394813, + "grad_norm": 0.9574374641380435, + "learning_rate": 1.9956426639892674e-06, + "loss": 0.6672168374061584, + "step": 680 + }, + { + "epoch": 0.1570028818443804, + "grad_norm": 0.7820039633645871, + "learning_rate": 1.9956070474263293e-06, + "loss": 0.6651773452758789, + "step": 681 + }, + { + "epoch": 0.1572334293948127, + "grad_norm": 0.8533425761585396, + "learning_rate": 1.9955712862120443e-06, + "loss": 0.692477822303772, + "step": 682 + }, + { + "epoch": 0.15746397694524494, + "grad_norm": 0.9212488541381597, + "learning_rate": 1.995535380351608e-06, + "loss": 0.6489748358726501, + "step": 683 + }, + { + "epoch": 0.15769452449567722, + "grad_norm": 0.8435974522262087, + "learning_rate": 1.9954993298502366e-06, + "loss": 0.6771219372749329, + "step": 684 + }, + { + "epoch": 0.1579250720461095, + "grad_norm": 0.877153187521053, + "learning_rate": 1.9954631347131692e-06, + "loss": 0.665330171585083, + "step": 685 + }, + { + "epoch": 0.15815561959654179, + "grad_norm": 1.0295162658348551, + "learning_rate": 1.995426794945664e-06, + "loss": 0.5750322937965393, + "step": 686 + }, + { + "epoch": 0.15838616714697407, + "grad_norm": 0.7522809296648445, + "learning_rate": 1.9953903105530005e-06, + "loss": 0.728310227394104, + "step": 687 + }, + { + "epoch": 0.15861671469740635, + "grad_norm": 0.9608157515262961, + "learning_rate": 1.9953536815404794e-06, + "loss": 0.6932300329208374, + "step": 688 + }, + { + "epoch": 0.15884726224783863, + "grad_norm": 0.8943804935883133, + "learning_rate": 1.995316907913423e-06, + "loss": 0.6161103844642639, + "step": 689 + }, + { + "epoch": 0.15907780979827088, + "grad_norm": 0.7775043477204376, + "learning_rate": 1.9952799896771744e-06, + "loss": 0.6205453872680664, + "step": 690 + }, + { + "epoch": 0.15930835734870316, + "grad_norm": 0.7737451170401874, + "learning_rate": 1.9952429268370964e-06, + "loss": 0.7487895488739014, + "step": 691 + }, + { + "epoch": 0.15953890489913544, + "grad_norm": 0.8997226561911318, + "learning_rate": 1.995205719398575e-06, + "loss": 0.5926559567451477, + "step": 692 + }, + { + "epoch": 0.15976945244956772, + "grad_norm": 1.053529919661844, + "learning_rate": 1.9951683673670152e-06, + "loss": 0.7555570602416992, + "step": 693 + }, + { + "epoch": 0.16, + "grad_norm": 0.8300153409903857, + "learning_rate": 1.995130870747844e-06, + "loss": 0.6297062635421753, + "step": 694 + }, + { + "epoch": 0.16023054755043228, + "grad_norm": 1.0222141785273, + "learning_rate": 1.99509322954651e-06, + "loss": 0.6655765771865845, + "step": 695 + }, + { + "epoch": 0.16046109510086456, + "grad_norm": 0.7738556572418642, + "learning_rate": 1.995055443768481e-06, + "loss": 0.5945572853088379, + "step": 696 + }, + { + "epoch": 0.16069164265129682, + "grad_norm": 0.956272279349006, + "learning_rate": 1.9950175134192473e-06, + "loss": 0.646022379398346, + "step": 697 + }, + { + "epoch": 0.1609221902017291, + "grad_norm": 1.0562448292545954, + "learning_rate": 1.99497943850432e-06, + "loss": 0.6553836464881897, + "step": 698 + }, + { + "epoch": 0.16115273775216138, + "grad_norm": 0.8311639966120081, + "learning_rate": 1.994941219029231e-06, + "loss": 0.6118045449256897, + "step": 699 + }, + { + "epoch": 0.16138328530259366, + "grad_norm": 0.7402593984430477, + "learning_rate": 1.994902854999533e-06, + "loss": 0.5951248407363892, + "step": 700 + }, + { + "epoch": 0.16161383285302594, + "grad_norm": 0.7755953239943872, + "learning_rate": 1.9948643464208e-06, + "loss": 0.7181081771850586, + "step": 701 + }, + { + "epoch": 0.16184438040345822, + "grad_norm": 0.710887180335347, + "learning_rate": 1.9948256932986264e-06, + "loss": 0.5914522409439087, + "step": 702 + }, + { + "epoch": 0.1620749279538905, + "grad_norm": 0.9033570710857827, + "learning_rate": 1.994786895638629e-06, + "loss": 0.5532323718070984, + "step": 703 + }, + { + "epoch": 0.16230547550432275, + "grad_norm": 0.7658441548893321, + "learning_rate": 1.994747953446444e-06, + "loss": 0.6182093620300293, + "step": 704 + }, + { + "epoch": 0.16253602305475504, + "grad_norm": 0.7455777185611792, + "learning_rate": 1.9947088667277295e-06, + "loss": 0.6127386093139648, + "step": 705 + }, + { + "epoch": 0.16276657060518732, + "grad_norm": 0.7619418353404411, + "learning_rate": 1.9946696354881644e-06, + "loss": 0.7171872854232788, + "step": 706 + }, + { + "epoch": 0.1629971181556196, + "grad_norm": 0.9028634018107199, + "learning_rate": 1.994630259733449e-06, + "loss": 0.6722875833511353, + "step": 707 + }, + { + "epoch": 0.16322766570605188, + "grad_norm": 0.7912271958302307, + "learning_rate": 1.994590739469303e-06, + "loss": 0.6032121777534485, + "step": 708 + }, + { + "epoch": 0.16345821325648416, + "grad_norm": 0.7790480227256618, + "learning_rate": 1.9945510747014696e-06, + "loss": 0.6348932981491089, + "step": 709 + }, + { + "epoch": 0.16368876080691644, + "grad_norm": 0.8262533224784293, + "learning_rate": 1.9945112654357114e-06, + "loss": 0.6864136457443237, + "step": 710 + }, + { + "epoch": 0.1639193083573487, + "grad_norm": 0.8121807702655695, + "learning_rate": 1.9944713116778118e-06, + "loss": 0.6009939908981323, + "step": 711 + }, + { + "epoch": 0.16414985590778097, + "grad_norm": 0.9655971342263576, + "learning_rate": 1.994431213433576e-06, + "loss": 0.7821159362792969, + "step": 712 + }, + { + "epoch": 0.16438040345821325, + "grad_norm": 0.9555271008092482, + "learning_rate": 1.9943909707088293e-06, + "loss": 0.6368619799613953, + "step": 713 + }, + { + "epoch": 0.16461095100864553, + "grad_norm": 0.7923745246469236, + "learning_rate": 1.994350583509419e-06, + "loss": 0.5924729704856873, + "step": 714 + }, + { + "epoch": 0.16484149855907781, + "grad_norm": 0.8669831643763101, + "learning_rate": 1.9943100518412137e-06, + "loss": 0.6996514797210693, + "step": 715 + }, + { + "epoch": 0.1650720461095101, + "grad_norm": 0.8402039238329811, + "learning_rate": 1.994269375710101e-06, + "loss": 0.5582215785980225, + "step": 716 + }, + { + "epoch": 0.16530259365994238, + "grad_norm": 1.2068222368874355, + "learning_rate": 1.994228555121991e-06, + "loss": 0.8005632162094116, + "step": 717 + }, + { + "epoch": 0.16553314121037463, + "grad_norm": 0.8659636397344134, + "learning_rate": 1.994187590082815e-06, + "loss": 0.6704587936401367, + "step": 718 + }, + { + "epoch": 0.1657636887608069, + "grad_norm": 0.9519332733008289, + "learning_rate": 1.9941464805985242e-06, + "loss": 0.6965141296386719, + "step": 719 + }, + { + "epoch": 0.1659942363112392, + "grad_norm": 0.7982296755018282, + "learning_rate": 1.994105226675092e-06, + "loss": 0.7778419852256775, + "step": 720 + }, + { + "epoch": 0.16622478386167147, + "grad_norm": 0.8437919125520283, + "learning_rate": 1.9940638283185117e-06, + "loss": 0.5983673334121704, + "step": 721 + }, + { + "epoch": 0.16645533141210375, + "grad_norm": 0.8114559147772581, + "learning_rate": 1.994022285534798e-06, + "loss": 0.7356874346733093, + "step": 722 + }, + { + "epoch": 0.16668587896253603, + "grad_norm": 1.0241509316608044, + "learning_rate": 1.9939805983299867e-06, + "loss": 0.5962327718734741, + "step": 723 + }, + { + "epoch": 0.1669164265129683, + "grad_norm": 1.103575730726766, + "learning_rate": 1.9939387667101354e-06, + "loss": 0.751507043838501, + "step": 724 + }, + { + "epoch": 0.16714697406340057, + "grad_norm": 0.8938560577593438, + "learning_rate": 1.9938967906813204e-06, + "loss": 0.5894922018051147, + "step": 725 + }, + { + "epoch": 0.16737752161383285, + "grad_norm": 0.7622931465354307, + "learning_rate": 1.993854670249641e-06, + "loss": 0.6028705835342407, + "step": 726 + }, + { + "epoch": 0.16760806916426513, + "grad_norm": 0.8601442524669536, + "learning_rate": 1.993812405421217e-06, + "loss": 0.6514176726341248, + "step": 727 + }, + { + "epoch": 0.1678386167146974, + "grad_norm": 0.9323980788065775, + "learning_rate": 1.993769996202189e-06, + "loss": 0.5832291841506958, + "step": 728 + }, + { + "epoch": 0.1680691642651297, + "grad_norm": 0.834026784773322, + "learning_rate": 1.9937274425987188e-06, + "loss": 0.6265125274658203, + "step": 729 + }, + { + "epoch": 0.16829971181556197, + "grad_norm": 0.8839362204216874, + "learning_rate": 1.9936847446169883e-06, + "loss": 0.6474361419677734, + "step": 730 + }, + { + "epoch": 0.16853025936599422, + "grad_norm": 0.9244363591024001, + "learning_rate": 1.9936419022632015e-06, + "loss": 0.6126378774642944, + "step": 731 + }, + { + "epoch": 0.1687608069164265, + "grad_norm": 0.7248328619497468, + "learning_rate": 1.9935989155435832e-06, + "loss": 0.5518519878387451, + "step": 732 + }, + { + "epoch": 0.16899135446685878, + "grad_norm": 1.020008258225897, + "learning_rate": 1.9935557844643786e-06, + "loss": 0.641282320022583, + "step": 733 + }, + { + "epoch": 0.16922190201729106, + "grad_norm": 0.8726481364655894, + "learning_rate": 1.9935125090318544e-06, + "loss": 0.562978208065033, + "step": 734 + }, + { + "epoch": 0.16945244956772335, + "grad_norm": 0.9315669775898957, + "learning_rate": 1.9934690892522977e-06, + "loss": 0.6757839918136597, + "step": 735 + }, + { + "epoch": 0.16968299711815563, + "grad_norm": 0.8770207431066531, + "learning_rate": 1.9934255251320173e-06, + "loss": 0.6061424612998962, + "step": 736 + }, + { + "epoch": 0.1699135446685879, + "grad_norm": 0.9969221705962019, + "learning_rate": 1.9933818166773425e-06, + "loss": 0.665751576423645, + "step": 737 + }, + { + "epoch": 0.17014409221902016, + "grad_norm": 1.0458611843133037, + "learning_rate": 1.9933379638946237e-06, + "loss": 0.7042183876037598, + "step": 738 + }, + { + "epoch": 0.17037463976945244, + "grad_norm": 0.8154471262488174, + "learning_rate": 1.993293966790232e-06, + "loss": 0.5603055953979492, + "step": 739 + }, + { + "epoch": 0.17060518731988472, + "grad_norm": 0.7962101585189754, + "learning_rate": 1.99324982537056e-06, + "loss": 0.6231967210769653, + "step": 740 + }, + { + "epoch": 0.170835734870317, + "grad_norm": 0.8275501344055423, + "learning_rate": 1.9932055396420214e-06, + "loss": 0.6461664438247681, + "step": 741 + }, + { + "epoch": 0.17106628242074928, + "grad_norm": 0.8713861091012736, + "learning_rate": 1.9931611096110492e-06, + "loss": 0.5177653431892395, + "step": 742 + }, + { + "epoch": 0.17129682997118156, + "grad_norm": 1.0666109203407566, + "learning_rate": 1.9931165352841003e-06, + "loss": 0.6173226833343506, + "step": 743 + }, + { + "epoch": 0.17152737752161384, + "grad_norm": 0.8795669631473715, + "learning_rate": 1.9930718166676494e-06, + "loss": 0.6352604627609253, + "step": 744 + }, + { + "epoch": 0.1717579250720461, + "grad_norm": 1.0227022082976704, + "learning_rate": 1.9930269537681946e-06, + "loss": 0.5541161298751831, + "step": 745 + }, + { + "epoch": 0.17198847262247838, + "grad_norm": 0.902318564730913, + "learning_rate": 1.9929819465922537e-06, + "loss": 0.650184154510498, + "step": 746 + }, + { + "epoch": 0.17221902017291066, + "grad_norm": 0.9978722892445172, + "learning_rate": 1.9929367951463654e-06, + "loss": 0.771975040435791, + "step": 747 + }, + { + "epoch": 0.17244956772334294, + "grad_norm": 0.969046207938326, + "learning_rate": 1.9928914994370904e-06, + "loss": 0.6753678917884827, + "step": 748 + }, + { + "epoch": 0.17268011527377522, + "grad_norm": 0.8717387512827466, + "learning_rate": 1.992846059471009e-06, + "loss": 0.6315422058105469, + "step": 749 + }, + { + "epoch": 0.1729106628242075, + "grad_norm": 0.8010764362566868, + "learning_rate": 1.992800475254724e-06, + "loss": 0.6182739734649658, + "step": 750 + }, + { + "epoch": 0.17314121037463978, + "grad_norm": 0.9934700256786312, + "learning_rate": 1.9927547467948576e-06, + "loss": 0.6191136837005615, + "step": 751 + }, + { + "epoch": 0.17337175792507203, + "grad_norm": 0.8955478416270761, + "learning_rate": 1.9927088740980536e-06, + "loss": 0.5914082527160645, + "step": 752 + }, + { + "epoch": 0.17360230547550431, + "grad_norm": 0.8872553916343393, + "learning_rate": 1.9926628571709777e-06, + "loss": 0.7570660710334778, + "step": 753 + }, + { + "epoch": 0.1738328530259366, + "grad_norm": 0.8275631219312759, + "learning_rate": 1.992616696020315e-06, + "loss": 0.7140552997589111, + "step": 754 + }, + { + "epoch": 0.17406340057636888, + "grad_norm": 0.8784414210362078, + "learning_rate": 1.992570390652772e-06, + "loss": 0.6851143836975098, + "step": 755 + }, + { + "epoch": 0.17429394812680116, + "grad_norm": 0.9265966905529803, + "learning_rate": 1.992523941075077e-06, + "loss": 0.6568159461021423, + "step": 756 + }, + { + "epoch": 0.17452449567723344, + "grad_norm": 0.8027115290319551, + "learning_rate": 1.9924773472939785e-06, + "loss": 0.6214026212692261, + "step": 757 + }, + { + "epoch": 0.17475504322766572, + "grad_norm": 1.1556501321398456, + "learning_rate": 1.992430609316246e-06, + "loss": 0.6564410924911499, + "step": 758 + }, + { + "epoch": 0.17498559077809797, + "grad_norm": 0.9640812513598375, + "learning_rate": 1.9923837271486697e-06, + "loss": 0.5648280382156372, + "step": 759 + }, + { + "epoch": 0.17521613832853025, + "grad_norm": 0.8862992700585784, + "learning_rate": 1.9923367007980614e-06, + "loss": 0.608031153678894, + "step": 760 + }, + { + "epoch": 0.17544668587896253, + "grad_norm": 1.0031309058296456, + "learning_rate": 1.9922895302712537e-06, + "loss": 0.6203290224075317, + "step": 761 + }, + { + "epoch": 0.1756772334293948, + "grad_norm": 0.7779205294449888, + "learning_rate": 1.9922422155751003e-06, + "loss": 0.5530174374580383, + "step": 762 + }, + { + "epoch": 0.1759077809798271, + "grad_norm": 0.8910414656783568, + "learning_rate": 1.9921947567164745e-06, + "loss": 0.6626535654067993, + "step": 763 + }, + { + "epoch": 0.17613832853025937, + "grad_norm": 0.9849371373448682, + "learning_rate": 1.9921471537022723e-06, + "loss": 0.6544541120529175, + "step": 764 + }, + { + "epoch": 0.17636887608069166, + "grad_norm": 1.0175898233717289, + "learning_rate": 1.9920994065394098e-06, + "loss": 0.7820296287536621, + "step": 765 + }, + { + "epoch": 0.1765994236311239, + "grad_norm": 0.8308987980464507, + "learning_rate": 1.992051515234824e-06, + "loss": 0.5658930540084839, + "step": 766 + }, + { + "epoch": 0.1768299711815562, + "grad_norm": 1.0108649733600985, + "learning_rate": 1.9920034797954734e-06, + "loss": 0.6288915872573853, + "step": 767 + }, + { + "epoch": 0.17706051873198847, + "grad_norm": 0.8972174241474462, + "learning_rate": 1.9919553002283366e-06, + "loss": 0.5960980653762817, + "step": 768 + }, + { + "epoch": 0.17729106628242075, + "grad_norm": 0.9596016587654722, + "learning_rate": 1.9919069765404136e-06, + "loss": 0.6264806389808655, + "step": 769 + }, + { + "epoch": 0.17752161383285303, + "grad_norm": 1.1687849824342589, + "learning_rate": 1.991858508738726e-06, + "loss": 0.7963491678237915, + "step": 770 + }, + { + "epoch": 0.1777521613832853, + "grad_norm": 0.88739443362272, + "learning_rate": 1.9918098968303147e-06, + "loss": 0.5877400040626526, + "step": 771 + }, + { + "epoch": 0.1779827089337176, + "grad_norm": 0.9846679485059646, + "learning_rate": 1.991761140822243e-06, + "loss": 0.6420770883560181, + "step": 772 + }, + { + "epoch": 0.17821325648414985, + "grad_norm": 1.0021199463672827, + "learning_rate": 1.991712240721595e-06, + "loss": 0.5594414472579956, + "step": 773 + }, + { + "epoch": 0.17844380403458213, + "grad_norm": 0.7497950059658199, + "learning_rate": 1.9916631965354746e-06, + "loss": 0.5771572589874268, + "step": 774 + }, + { + "epoch": 0.1786743515850144, + "grad_norm": 1.100944925371416, + "learning_rate": 1.991614008271008e-06, + "loss": 0.6717950105667114, + "step": 775 + }, + { + "epoch": 0.1789048991354467, + "grad_norm": 1.0132945459281757, + "learning_rate": 1.9915646759353416e-06, + "loss": 0.6760128736495972, + "step": 776 + }, + { + "epoch": 0.17913544668587897, + "grad_norm": 0.9410353518724599, + "learning_rate": 1.9915151995356425e-06, + "loss": 0.6075339913368225, + "step": 777 + }, + { + "epoch": 0.17936599423631125, + "grad_norm": 1.1007106896730625, + "learning_rate": 1.9914655790791e-06, + "loss": 0.6417431831359863, + "step": 778 + }, + { + "epoch": 0.1795965417867435, + "grad_norm": 1.0600536954685635, + "learning_rate": 1.9914158145729226e-06, + "loss": 0.5260112285614014, + "step": 779 + }, + { + "epoch": 0.17982708933717578, + "grad_norm": 0.8866673612354495, + "learning_rate": 1.9913659060243407e-06, + "loss": 0.6338676810264587, + "step": 780 + }, + { + "epoch": 0.18005763688760806, + "grad_norm": 0.8272489485080615, + "learning_rate": 1.991315853440606e-06, + "loss": 0.5719002485275269, + "step": 781 + }, + { + "epoch": 0.18028818443804034, + "grad_norm": 1.090350118782068, + "learning_rate": 1.99126565682899e-06, + "loss": 0.6078590154647827, + "step": 782 + }, + { + "epoch": 0.18051873198847262, + "grad_norm": 0.9290187091081005, + "learning_rate": 1.991215316196786e-06, + "loss": 0.659256100654602, + "step": 783 + }, + { + "epoch": 0.1807492795389049, + "grad_norm": 0.9147661256460412, + "learning_rate": 1.991164831551308e-06, + "loss": 0.5777862071990967, + "step": 784 + }, + { + "epoch": 0.1809798270893372, + "grad_norm": 0.9294731076788324, + "learning_rate": 1.9911142028998907e-06, + "loss": 0.6155215501785278, + "step": 785 + }, + { + "epoch": 0.18121037463976944, + "grad_norm": 0.9904733686631906, + "learning_rate": 1.9910634302498904e-06, + "loss": 0.5919966697692871, + "step": 786 + }, + { + "epoch": 0.18144092219020172, + "grad_norm": 0.8056232981385001, + "learning_rate": 1.991012513608683e-06, + "loss": 0.6729590892791748, + "step": 787 + }, + { + "epoch": 0.181671469740634, + "grad_norm": 1.1208667835091055, + "learning_rate": 1.990961452983667e-06, + "loss": 0.6397472023963928, + "step": 788 + }, + { + "epoch": 0.18190201729106628, + "grad_norm": 0.9065849098074823, + "learning_rate": 1.9909102483822607e-06, + "loss": 0.5506640672683716, + "step": 789 + }, + { + "epoch": 0.18213256484149856, + "grad_norm": 0.9037026872448599, + "learning_rate": 1.9908588998119035e-06, + "loss": 0.5978207588195801, + "step": 790 + }, + { + "epoch": 0.18236311239193084, + "grad_norm": 0.7186623128592075, + "learning_rate": 1.9908074072800557e-06, + "loss": 0.5460508465766907, + "step": 791 + }, + { + "epoch": 0.18259365994236312, + "grad_norm": 0.7892780958696924, + "learning_rate": 1.990755770794199e-06, + "loss": 0.6750969290733337, + "step": 792 + }, + { + "epoch": 0.18282420749279538, + "grad_norm": 1.3348299809463013, + "learning_rate": 1.9907039903618352e-06, + "loss": 0.6955918073654175, + "step": 793 + }, + { + "epoch": 0.18305475504322766, + "grad_norm": 0.7742914275515491, + "learning_rate": 1.990652065990488e-06, + "loss": 0.5774638652801514, + "step": 794 + }, + { + "epoch": 0.18328530259365994, + "grad_norm": 1.0579806797579008, + "learning_rate": 1.990599997687701e-06, + "loss": 0.6769977807998657, + "step": 795 + }, + { + "epoch": 0.18351585014409222, + "grad_norm": 0.8232976375327974, + "learning_rate": 1.9905477854610395e-06, + "loss": 0.6034688353538513, + "step": 796 + }, + { + "epoch": 0.1837463976945245, + "grad_norm": 0.9920075065890873, + "learning_rate": 1.990495429318089e-06, + "loss": 0.6486461162567139, + "step": 797 + }, + { + "epoch": 0.18397694524495678, + "grad_norm": 0.8827350375987653, + "learning_rate": 1.9904429292664565e-06, + "loss": 0.6474006175994873, + "step": 798 + }, + { + "epoch": 0.18420749279538906, + "grad_norm": 1.0129748972927366, + "learning_rate": 1.99039028531377e-06, + "loss": 0.6424986124038696, + "step": 799 + }, + { + "epoch": 0.1844380403458213, + "grad_norm": 0.9671373838473223, + "learning_rate": 1.990337497467678e-06, + "loss": 0.6319411993026733, + "step": 800 + }, + { + "epoch": 0.1846685878962536, + "grad_norm": 0.8922602557785707, + "learning_rate": 1.9902845657358493e-06, + "loss": 0.5928018093109131, + "step": 801 + }, + { + "epoch": 0.18489913544668587, + "grad_norm": 0.9942140949537916, + "learning_rate": 1.9902314901259755e-06, + "loss": 0.6054724454879761, + "step": 802 + }, + { + "epoch": 0.18512968299711816, + "grad_norm": 0.9002048262250127, + "learning_rate": 1.9901782706457667e-06, + "loss": 0.6309449672698975, + "step": 803 + }, + { + "epoch": 0.18536023054755044, + "grad_norm": 0.8725199458934897, + "learning_rate": 1.9901249073029566e-06, + "loss": 0.625114917755127, + "step": 804 + }, + { + "epoch": 0.18559077809798272, + "grad_norm": 1.0830513836098963, + "learning_rate": 1.990071400105297e-06, + "loss": 0.6478957533836365, + "step": 805 + }, + { + "epoch": 0.185821325648415, + "grad_norm": 0.9895928941486563, + "learning_rate": 1.9900177490605628e-06, + "loss": 0.6707916259765625, + "step": 806 + }, + { + "epoch": 0.18605187319884725, + "grad_norm": 0.8882427566351567, + "learning_rate": 1.9899639541765483e-06, + "loss": 0.5120225548744202, + "step": 807 + }, + { + "epoch": 0.18628242074927953, + "grad_norm": 0.9733618335628766, + "learning_rate": 1.98991001546107e-06, + "loss": 0.7165584564208984, + "step": 808 + }, + { + "epoch": 0.1865129682997118, + "grad_norm": 0.8929091718348325, + "learning_rate": 1.9898559329219636e-06, + "loss": 0.4724568724632263, + "step": 809 + }, + { + "epoch": 0.1867435158501441, + "grad_norm": 0.9623165111634494, + "learning_rate": 1.989801706567088e-06, + "loss": 0.7015688419342041, + "step": 810 + }, + { + "epoch": 0.18697406340057637, + "grad_norm": 0.8369835155480084, + "learning_rate": 1.989747336404321e-06, + "loss": 0.5083395838737488, + "step": 811 + }, + { + "epoch": 0.18720461095100865, + "grad_norm": 0.9079243051153807, + "learning_rate": 1.9896928224415623e-06, + "loss": 0.6297205090522766, + "step": 812 + }, + { + "epoch": 0.18743515850144093, + "grad_norm": 1.1433534088888602, + "learning_rate": 1.989638164686732e-06, + "loss": 0.5437130928039551, + "step": 813 + }, + { + "epoch": 0.1876657060518732, + "grad_norm": 0.9912906490963889, + "learning_rate": 1.989583363147771e-06, + "loss": 0.5922385454177856, + "step": 814 + }, + { + "epoch": 0.18789625360230547, + "grad_norm": 0.8463175425409384, + "learning_rate": 1.989528417832642e-06, + "loss": 0.5371031761169434, + "step": 815 + }, + { + "epoch": 0.18812680115273775, + "grad_norm": 0.8249505406171526, + "learning_rate": 1.989473328749328e-06, + "loss": 0.5495747327804565, + "step": 816 + }, + { + "epoch": 0.18835734870317003, + "grad_norm": 0.9329352547357952, + "learning_rate": 1.9894180959058323e-06, + "loss": 0.6508893370628357, + "step": 817 + }, + { + "epoch": 0.1885878962536023, + "grad_norm": 1.0113195463897409, + "learning_rate": 1.9893627193101804e-06, + "loss": 0.49949508905410767, + "step": 818 + }, + { + "epoch": 0.1888184438040346, + "grad_norm": 1.1670316270661754, + "learning_rate": 1.989307198970417e-06, + "loss": 0.6115611791610718, + "step": 819 + }, + { + "epoch": 0.18904899135446687, + "grad_norm": 1.5920704099480587, + "learning_rate": 1.9892515348946094e-06, + "loss": 0.6748598217964172, + "step": 820 + }, + { + "epoch": 0.18927953890489913, + "grad_norm": 0.943253279158822, + "learning_rate": 1.989195727090845e-06, + "loss": 0.6547701358795166, + "step": 821 + }, + { + "epoch": 0.1895100864553314, + "grad_norm": 0.960954411586292, + "learning_rate": 1.9891397755672314e-06, + "loss": 0.6120291948318481, + "step": 822 + }, + { + "epoch": 0.1897406340057637, + "grad_norm": 1.038147761124326, + "learning_rate": 1.9890836803318982e-06, + "loss": 0.567481517791748, + "step": 823 + }, + { + "epoch": 0.18997118155619597, + "grad_norm": 0.9777313935838982, + "learning_rate": 1.989027441392996e-06, + "loss": 0.6567938327789307, + "step": 824 + }, + { + "epoch": 0.19020172910662825, + "grad_norm": 0.9020948308980783, + "learning_rate": 1.988971058758695e-06, + "loss": 0.6386862993240356, + "step": 825 + }, + { + "epoch": 0.19043227665706053, + "grad_norm": 0.996140220083916, + "learning_rate": 1.988914532437187e-06, + "loss": 0.6657274961471558, + "step": 826 + }, + { + "epoch": 0.1906628242074928, + "grad_norm": 0.9530652312449698, + "learning_rate": 1.988857862436685e-06, + "loss": 0.6014574766159058, + "step": 827 + }, + { + "epoch": 0.19089337175792506, + "grad_norm": 0.9808303514948067, + "learning_rate": 1.988801048765423e-06, + "loss": 0.6482441425323486, + "step": 828 + }, + { + "epoch": 0.19112391930835734, + "grad_norm": 0.9723490419441589, + "learning_rate": 1.988744091431654e-06, + "loss": 0.6074355244636536, + "step": 829 + }, + { + "epoch": 0.19135446685878962, + "grad_norm": 0.8677748383178393, + "learning_rate": 1.9886869904436544e-06, + "loss": 0.609101414680481, + "step": 830 + }, + { + "epoch": 0.1915850144092219, + "grad_norm": 0.8515757003715301, + "learning_rate": 1.988629745809721e-06, + "loss": 0.5866901874542236, + "step": 831 + }, + { + "epoch": 0.19181556195965418, + "grad_norm": 1.1523148131986478, + "learning_rate": 1.988572357538169e-06, + "loss": 0.6352179050445557, + "step": 832 + }, + { + "epoch": 0.19204610951008647, + "grad_norm": 0.9505829656120092, + "learning_rate": 1.988514825637338e-06, + "loss": 0.5662895441055298, + "step": 833 + }, + { + "epoch": 0.19227665706051872, + "grad_norm": 1.0014129874079212, + "learning_rate": 1.988457150115586e-06, + "loss": 0.6476075649261475, + "step": 834 + }, + { + "epoch": 0.192507204610951, + "grad_norm": 0.855790701855717, + "learning_rate": 1.988399330981293e-06, + "loss": 0.5920289158821106, + "step": 835 + }, + { + "epoch": 0.19273775216138328, + "grad_norm": 1.2467411442207388, + "learning_rate": 1.988341368242859e-06, + "loss": 0.7046043276786804, + "step": 836 + }, + { + "epoch": 0.19296829971181556, + "grad_norm": 0.9167265685067049, + "learning_rate": 1.9882832619087057e-06, + "loss": 0.5909844636917114, + "step": 837 + }, + { + "epoch": 0.19319884726224784, + "grad_norm": 1.046085100012217, + "learning_rate": 1.9882250119872754e-06, + "loss": 0.605388879776001, + "step": 838 + }, + { + "epoch": 0.19342939481268012, + "grad_norm": 1.5157450979253604, + "learning_rate": 1.9881666184870314e-06, + "loss": 0.6636893153190613, + "step": 839 + }, + { + "epoch": 0.1936599423631124, + "grad_norm": 0.9152396125482961, + "learning_rate": 1.9881080814164574e-06, + "loss": 0.5917089581489563, + "step": 840 + }, + { + "epoch": 0.19389048991354466, + "grad_norm": 0.8672781191194197, + "learning_rate": 1.988049400784058e-06, + "loss": 0.6431874632835388, + "step": 841 + }, + { + "epoch": 0.19412103746397694, + "grad_norm": 0.8347880968744955, + "learning_rate": 1.9879905765983593e-06, + "loss": 0.5309115648269653, + "step": 842 + }, + { + "epoch": 0.19435158501440922, + "grad_norm": 1.3518239663287992, + "learning_rate": 1.9879316088679076e-06, + "loss": 0.5478585362434387, + "step": 843 + }, + { + "epoch": 0.1945821325648415, + "grad_norm": 1.06976855886381, + "learning_rate": 1.9878724976012703e-06, + "loss": 0.6842525005340576, + "step": 844 + }, + { + "epoch": 0.19481268011527378, + "grad_norm": 0.870308127392832, + "learning_rate": 1.987813242807036e-06, + "loss": 0.5301089286804199, + "step": 845 + }, + { + "epoch": 0.19504322766570606, + "grad_norm": 0.8463702037283144, + "learning_rate": 1.987753844493813e-06, + "loss": 0.6047420501708984, + "step": 846 + }, + { + "epoch": 0.19527377521613834, + "grad_norm": 0.9048551432370087, + "learning_rate": 1.9876943026702325e-06, + "loss": 0.6532707214355469, + "step": 847 + }, + { + "epoch": 0.1955043227665706, + "grad_norm": 0.9860575648750018, + "learning_rate": 1.9876346173449444e-06, + "loss": 0.6906484365463257, + "step": 848 + }, + { + "epoch": 0.19573487031700287, + "grad_norm": 0.842900243439286, + "learning_rate": 1.98757478852662e-06, + "loss": 0.5873023867607117, + "step": 849 + }, + { + "epoch": 0.19596541786743515, + "grad_norm": 0.8944983517133327, + "learning_rate": 1.9875148162239534e-06, + "loss": 0.5956071615219116, + "step": 850 + }, + { + "epoch": 0.19619596541786744, + "grad_norm": 1.0725227324782516, + "learning_rate": 1.9874547004456562e-06, + "loss": 0.6224364042282104, + "step": 851 + }, + { + "epoch": 0.19642651296829972, + "grad_norm": 0.9179836097168517, + "learning_rate": 1.9873944412004633e-06, + "loss": 0.5743613839149475, + "step": 852 + }, + { + "epoch": 0.196657060518732, + "grad_norm": 0.9085386217997975, + "learning_rate": 1.98733403849713e-06, + "loss": 0.6316232085227966, + "step": 853 + }, + { + "epoch": 0.19688760806916428, + "grad_norm": 1.1614192101370417, + "learning_rate": 1.987273492344432e-06, + "loss": 0.6850833892822266, + "step": 854 + }, + { + "epoch": 0.19711815561959653, + "grad_norm": 0.9615141189589504, + "learning_rate": 1.9872128027511656e-06, + "loss": 0.6533515453338623, + "step": 855 + }, + { + "epoch": 0.1973487031700288, + "grad_norm": 1.17455236527873, + "learning_rate": 1.987151969726149e-06, + "loss": 0.617554783821106, + "step": 856 + }, + { + "epoch": 0.1975792507204611, + "grad_norm": 1.0764121250331788, + "learning_rate": 1.98709099327822e-06, + "loss": 0.6278855800628662, + "step": 857 + }, + { + "epoch": 0.19780979827089337, + "grad_norm": 0.9092814301734246, + "learning_rate": 1.9870298734162384e-06, + "loss": 0.6289564371109009, + "step": 858 + }, + { + "epoch": 0.19804034582132565, + "grad_norm": 1.065819744433503, + "learning_rate": 1.986968610149084e-06, + "loss": 0.5363434553146362, + "step": 859 + }, + { + "epoch": 0.19827089337175793, + "grad_norm": 0.889097528046652, + "learning_rate": 1.986907203485658e-06, + "loss": 0.5072166919708252, + "step": 860 + }, + { + "epoch": 0.19850144092219021, + "grad_norm": 0.9690950352141645, + "learning_rate": 1.986845653434882e-06, + "loss": 0.6337966918945312, + "step": 861 + }, + { + "epoch": 0.19873198847262247, + "grad_norm": 1.1682445222467872, + "learning_rate": 1.9867839600056984e-06, + "loss": 0.6706831455230713, + "step": 862 + }, + { + "epoch": 0.19896253602305475, + "grad_norm": 1.145903275739384, + "learning_rate": 1.9867221232070706e-06, + "loss": 0.6381477117538452, + "step": 863 + }, + { + "epoch": 0.19919308357348703, + "grad_norm": 1.0005395841954985, + "learning_rate": 1.9866601430479826e-06, + "loss": 0.7144027948379517, + "step": 864 + }, + { + "epoch": 0.1994236311239193, + "grad_norm": 0.8750080759967084, + "learning_rate": 1.98659801953744e-06, + "loss": 0.5705598592758179, + "step": 865 + }, + { + "epoch": 0.1996541786743516, + "grad_norm": 0.9156602793809858, + "learning_rate": 1.986535752684469e-06, + "loss": 0.5879906415939331, + "step": 866 + }, + { + "epoch": 0.19988472622478387, + "grad_norm": 0.9724116332795385, + "learning_rate": 1.9864733424981155e-06, + "loss": 0.5378298759460449, + "step": 867 + }, + { + "epoch": 0.20011527377521615, + "grad_norm": 1.1694103064412866, + "learning_rate": 1.986410788987448e-06, + "loss": 0.6873736381530762, + "step": 868 + }, + { + "epoch": 0.2003458213256484, + "grad_norm": 1.2934294621539983, + "learning_rate": 1.9863480921615537e-06, + "loss": 0.6900503635406494, + "step": 869 + }, + { + "epoch": 0.20057636887608069, + "grad_norm": 1.0027549189128846, + "learning_rate": 1.9862852520295426e-06, + "loss": 0.6074845790863037, + "step": 870 + }, + { + "epoch": 0.20080691642651297, + "grad_norm": 0.7544689789402186, + "learning_rate": 1.9862222686005443e-06, + "loss": 0.5881202220916748, + "step": 871 + }, + { + "epoch": 0.20103746397694525, + "grad_norm": 1.0664423288729972, + "learning_rate": 1.98615914188371e-06, + "loss": 0.56011962890625, + "step": 872 + }, + { + "epoch": 0.20126801152737753, + "grad_norm": 0.8988899214507405, + "learning_rate": 1.986095871888211e-06, + "loss": 0.5858177542686462, + "step": 873 + }, + { + "epoch": 0.2014985590778098, + "grad_norm": 1.0133619404753802, + "learning_rate": 1.9860324586232404e-06, + "loss": 0.5983797311782837, + "step": 874 + }, + { + "epoch": 0.2017291066282421, + "grad_norm": 1.1981047171553392, + "learning_rate": 1.985968902098011e-06, + "loss": 0.614532470703125, + "step": 875 + }, + { + "epoch": 0.20195965417867434, + "grad_norm": 0.8750332579169862, + "learning_rate": 1.9859052023217564e-06, + "loss": 0.6202026009559631, + "step": 876 + }, + { + "epoch": 0.20219020172910662, + "grad_norm": 1.023100856340593, + "learning_rate": 1.9858413593037324e-06, + "loss": 0.6846225261688232, + "step": 877 + }, + { + "epoch": 0.2024207492795389, + "grad_norm": 0.9705265756320178, + "learning_rate": 1.9857773730532145e-06, + "loss": 0.587134838104248, + "step": 878 + }, + { + "epoch": 0.20265129682997118, + "grad_norm": 0.9699235709827684, + "learning_rate": 1.9857132435794986e-06, + "loss": 0.585313081741333, + "step": 879 + }, + { + "epoch": 0.20288184438040346, + "grad_norm": 0.7607859606406122, + "learning_rate": 1.985648970891903e-06, + "loss": 0.6294115781784058, + "step": 880 + }, + { + "epoch": 0.20311239193083575, + "grad_norm": 1.5538153967344521, + "learning_rate": 1.9855845549997655e-06, + "loss": 0.5994957089424133, + "step": 881 + }, + { + "epoch": 0.203342939481268, + "grad_norm": 0.8200153437294728, + "learning_rate": 1.985519995912445e-06, + "loss": 0.5432143211364746, + "step": 882 + }, + { + "epoch": 0.20357348703170028, + "grad_norm": 1.039970116286451, + "learning_rate": 1.9854552936393212e-06, + "loss": 0.4527829885482788, + "step": 883 + }, + { + "epoch": 0.20380403458213256, + "grad_norm": 1.0605902620434908, + "learning_rate": 1.985390448189795e-06, + "loss": 0.635351836681366, + "step": 884 + }, + { + "epoch": 0.20403458213256484, + "grad_norm": 1.1235404274480423, + "learning_rate": 1.9853254595732867e-06, + "loss": 0.6122138500213623, + "step": 885 + }, + { + "epoch": 0.20426512968299712, + "grad_norm": 0.8680670360235221, + "learning_rate": 1.98526032779924e-06, + "loss": 0.5663925409317017, + "step": 886 + }, + { + "epoch": 0.2044956772334294, + "grad_norm": 0.9855268643697941, + "learning_rate": 1.985195052877117e-06, + "loss": 0.622967004776001, + "step": 887 + }, + { + "epoch": 0.20472622478386168, + "grad_norm": 1.0340743236415533, + "learning_rate": 1.9851296348164013e-06, + "loss": 0.5478787422180176, + "step": 888 + }, + { + "epoch": 0.20495677233429394, + "grad_norm": 0.9432090732932807, + "learning_rate": 1.985064073626598e-06, + "loss": 0.5574431419372559, + "step": 889 + }, + { + "epoch": 0.20518731988472622, + "grad_norm": 0.954691206105306, + "learning_rate": 1.9849983693172324e-06, + "loss": 0.49230653047561646, + "step": 890 + }, + { + "epoch": 0.2054178674351585, + "grad_norm": 1.0132924882146737, + "learning_rate": 1.98493252189785e-06, + "loss": 0.6339923143386841, + "step": 891 + }, + { + "epoch": 0.20564841498559078, + "grad_norm": 1.041505282249045, + "learning_rate": 1.9848665313780186e-06, + "loss": 0.6324957609176636, + "step": 892 + }, + { + "epoch": 0.20587896253602306, + "grad_norm": 0.9105419954136654, + "learning_rate": 1.984800397767325e-06, + "loss": 0.5840550661087036, + "step": 893 + }, + { + "epoch": 0.20610951008645534, + "grad_norm": 1.0158365608395679, + "learning_rate": 1.984734121075379e-06, + "loss": 0.5304306149482727, + "step": 894 + }, + { + "epoch": 0.20634005763688762, + "grad_norm": 0.8993681880640182, + "learning_rate": 1.9846677013118088e-06, + "loss": 0.5756544470787048, + "step": 895 + }, + { + "epoch": 0.20657060518731987, + "grad_norm": 1.0683503528028089, + "learning_rate": 1.9846011384862652e-06, + "loss": 0.6194400787353516, + "step": 896 + }, + { + "epoch": 0.20680115273775215, + "grad_norm": 0.9824376438411858, + "learning_rate": 1.9845344326084185e-06, + "loss": 0.6125355362892151, + "step": 897 + }, + { + "epoch": 0.20703170028818443, + "grad_norm": 1.1037085256416217, + "learning_rate": 1.9844675836879606e-06, + "loss": 0.598976731300354, + "step": 898 + }, + { + "epoch": 0.20726224783861671, + "grad_norm": 0.9713841503620613, + "learning_rate": 1.984400591734604e-06, + "loss": 0.578935444355011, + "step": 899 + }, + { + "epoch": 0.207492795389049, + "grad_norm": 1.0652265800434835, + "learning_rate": 1.9843334567580822e-06, + "loss": 0.5881237983703613, + "step": 900 + }, + { + "epoch": 0.20772334293948128, + "grad_norm": 1.2219068277886247, + "learning_rate": 1.9842661787681485e-06, + "loss": 0.6280317306518555, + "step": 901 + }, + { + "epoch": 0.20795389048991356, + "grad_norm": 1.1730342254756378, + "learning_rate": 1.9841987577745786e-06, + "loss": 0.5957608819007874, + "step": 902 + }, + { + "epoch": 0.2081844380403458, + "grad_norm": 0.8340813337763022, + "learning_rate": 1.9841311937871674e-06, + "loss": 0.606480598449707, + "step": 903 + }, + { + "epoch": 0.2084149855907781, + "grad_norm": 1.0407514976116163, + "learning_rate": 1.9840634868157314e-06, + "loss": 0.6092411279678345, + "step": 904 + }, + { + "epoch": 0.20864553314121037, + "grad_norm": 1.0247425642925165, + "learning_rate": 1.9839956368701076e-06, + "loss": 0.6318541765213013, + "step": 905 + }, + { + "epoch": 0.20887608069164265, + "grad_norm": 1.1532320918613648, + "learning_rate": 1.983927643960155e-06, + "loss": 0.6250811219215393, + "step": 906 + }, + { + "epoch": 0.20910662824207493, + "grad_norm": 1.1313706395340732, + "learning_rate": 1.9838595080957506e-06, + "loss": 0.6782780885696411, + "step": 907 + }, + { + "epoch": 0.2093371757925072, + "grad_norm": 1.0695473520404728, + "learning_rate": 1.9837912292867946e-06, + "loss": 0.5548110008239746, + "step": 908 + }, + { + "epoch": 0.2095677233429395, + "grad_norm": 0.9444146509070543, + "learning_rate": 1.983722807543207e-06, + "loss": 0.5968413352966309, + "step": 909 + }, + { + "epoch": 0.20979827089337175, + "grad_norm": 1.2982877947412854, + "learning_rate": 1.983654242874929e-06, + "loss": 0.6580274105072021, + "step": 910 + }, + { + "epoch": 0.21002881844380403, + "grad_norm": 1.0176231954876447, + "learning_rate": 1.9835855352919224e-06, + "loss": 0.6023098230361938, + "step": 911 + }, + { + "epoch": 0.2102593659942363, + "grad_norm": 0.9242771902070097, + "learning_rate": 1.9835166848041694e-06, + "loss": 0.5659872889518738, + "step": 912 + }, + { + "epoch": 0.2104899135446686, + "grad_norm": 0.8294999124008526, + "learning_rate": 1.983447691421674e-06, + "loss": 0.5198249220848083, + "step": 913 + }, + { + "epoch": 0.21072046109510087, + "grad_norm": 1.1591542811764048, + "learning_rate": 1.983378555154459e-06, + "loss": 0.6395134925842285, + "step": 914 + }, + { + "epoch": 0.21095100864553315, + "grad_norm": 1.0318636409216984, + "learning_rate": 1.98330927601257e-06, + "loss": 0.6178885698318481, + "step": 915 + }, + { + "epoch": 0.21118155619596543, + "grad_norm": 0.9143749378081076, + "learning_rate": 1.9832398540060722e-06, + "loss": 0.6037659645080566, + "step": 916 + }, + { + "epoch": 0.21141210374639768, + "grad_norm": 0.9096271537150638, + "learning_rate": 1.9831702891450527e-06, + "loss": 0.5862294435501099, + "step": 917 + }, + { + "epoch": 0.21164265129682996, + "grad_norm": 1.3364895989669363, + "learning_rate": 1.9831005814396173e-06, + "loss": 0.5884512662887573, + "step": 918 + }, + { + "epoch": 0.21187319884726225, + "grad_norm": 0.920898959799489, + "learning_rate": 1.9830307308998944e-06, + "loss": 0.6083986759185791, + "step": 919 + }, + { + "epoch": 0.21210374639769453, + "grad_norm": 0.8631208244450347, + "learning_rate": 1.982960737536033e-06, + "loss": 0.5277038216590881, + "step": 920 + }, + { + "epoch": 0.2123342939481268, + "grad_norm": 1.0015447276757887, + "learning_rate": 1.9828906013582016e-06, + "loss": 0.6744534969329834, + "step": 921 + }, + { + "epoch": 0.2125648414985591, + "grad_norm": 0.9877926387823571, + "learning_rate": 1.9828203223765906e-06, + "loss": 0.5403028130531311, + "step": 922 + }, + { + "epoch": 0.21279538904899137, + "grad_norm": 0.8105271073394711, + "learning_rate": 1.9827499006014106e-06, + "loss": 0.5237953066825867, + "step": 923 + }, + { + "epoch": 0.21302593659942362, + "grad_norm": 0.9207602163228722, + "learning_rate": 1.982679336042894e-06, + "loss": 0.5229809284210205, + "step": 924 + }, + { + "epoch": 0.2132564841498559, + "grad_norm": 1.0055767309241133, + "learning_rate": 1.9826086287112924e-06, + "loss": 0.5582294464111328, + "step": 925 + }, + { + "epoch": 0.21348703170028818, + "grad_norm": 0.8758414895973887, + "learning_rate": 1.9825377786168785e-06, + "loss": 0.5803326964378357, + "step": 926 + }, + { + "epoch": 0.21371757925072046, + "grad_norm": 1.0609163639206318, + "learning_rate": 1.9824667857699468e-06, + "loss": 0.6260091662406921, + "step": 927 + }, + { + "epoch": 0.21394812680115274, + "grad_norm": 1.0231347292786834, + "learning_rate": 1.9823956501808114e-06, + "loss": 0.6557651162147522, + "step": 928 + }, + { + "epoch": 0.21417867435158502, + "grad_norm": 0.9080397919378964, + "learning_rate": 1.982324371859808e-06, + "loss": 0.613300085067749, + "step": 929 + }, + { + "epoch": 0.21440922190201728, + "grad_norm": 1.0385007304553873, + "learning_rate": 1.9822529508172918e-06, + "loss": 0.6877464056015015, + "step": 930 + }, + { + "epoch": 0.21463976945244956, + "grad_norm": 1.2357325420265879, + "learning_rate": 1.9821813870636403e-06, + "loss": 0.6349912881851196, + "step": 931 + }, + { + "epoch": 0.21487031700288184, + "grad_norm": 0.8782361615705243, + "learning_rate": 1.9821096806092505e-06, + "loss": 0.6005406975746155, + "step": 932 + }, + { + "epoch": 0.21510086455331412, + "grad_norm": 1.0161666615573182, + "learning_rate": 1.982037831464541e-06, + "loss": 0.6088801622390747, + "step": 933 + }, + { + "epoch": 0.2153314121037464, + "grad_norm": 0.9111448870031985, + "learning_rate": 1.9819658396399504e-06, + "loss": 0.5831236243247986, + "step": 934 + }, + { + "epoch": 0.21556195965417868, + "grad_norm": 1.0726672557651, + "learning_rate": 1.9818937051459387e-06, + "loss": 0.6151256561279297, + "step": 935 + }, + { + "epoch": 0.21579250720461096, + "grad_norm": 0.9951140694484449, + "learning_rate": 1.9818214279929858e-06, + "loss": 0.5083675384521484, + "step": 936 + }, + { + "epoch": 0.21602305475504321, + "grad_norm": 0.9882190028949231, + "learning_rate": 1.9817490081915933e-06, + "loss": 0.5814487934112549, + "step": 937 + }, + { + "epoch": 0.2162536023054755, + "grad_norm": 1.0890112715760354, + "learning_rate": 1.9816764457522826e-06, + "loss": 0.6241549253463745, + "step": 938 + }, + { + "epoch": 0.21648414985590778, + "grad_norm": 1.259145105095718, + "learning_rate": 1.981603740685597e-06, + "loss": 0.7381168603897095, + "step": 939 + }, + { + "epoch": 0.21671469740634006, + "grad_norm": 0.9539284871563865, + "learning_rate": 1.981530893002099e-06, + "loss": 0.5332478880882263, + "step": 940 + }, + { + "epoch": 0.21694524495677234, + "grad_norm": 1.0612148203565177, + "learning_rate": 1.981457902712373e-06, + "loss": 0.6405541896820068, + "step": 941 + }, + { + "epoch": 0.21717579250720462, + "grad_norm": 1.2447747157532114, + "learning_rate": 1.9813847698270234e-06, + "loss": 0.6976902484893799, + "step": 942 + }, + { + "epoch": 0.2174063400576369, + "grad_norm": 0.8077530273736168, + "learning_rate": 1.981311494356676e-06, + "loss": 0.5148216485977173, + "step": 943 + }, + { + "epoch": 0.21763688760806915, + "grad_norm": 0.9533804720685899, + "learning_rate": 1.981238076311977e-06, + "loss": 0.612360954284668, + "step": 944 + }, + { + "epoch": 0.21786743515850143, + "grad_norm": 0.9002805860040698, + "learning_rate": 1.981164515703593e-06, + "loss": 0.5379883050918579, + "step": 945 + }, + { + "epoch": 0.2180979827089337, + "grad_norm": 1.1634066704773265, + "learning_rate": 1.9810908125422117e-06, + "loss": 0.5931693911552429, + "step": 946 + }, + { + "epoch": 0.218328530259366, + "grad_norm": 1.0807462385025328, + "learning_rate": 1.9810169668385415e-06, + "loss": 0.610332727432251, + "step": 947 + }, + { + "epoch": 0.21855907780979827, + "grad_norm": 0.9154396888433077, + "learning_rate": 1.980942978603311e-06, + "loss": 0.6150614619255066, + "step": 948 + }, + { + "epoch": 0.21878962536023056, + "grad_norm": 0.8210884658105372, + "learning_rate": 1.9808688478472707e-06, + "loss": 0.5653204917907715, + "step": 949 + }, + { + "epoch": 0.21902017291066284, + "grad_norm": 1.0191839025794176, + "learning_rate": 1.9807945745811906e-06, + "loss": 0.5285670161247253, + "step": 950 + }, + { + "epoch": 0.2192507204610951, + "grad_norm": 1.0087326366586278, + "learning_rate": 1.9807201588158617e-06, + "loss": 0.5583071708679199, + "step": 951 + }, + { + "epoch": 0.21948126801152737, + "grad_norm": 0.845970473922194, + "learning_rate": 1.9806456005620957e-06, + "loss": 0.5426152944564819, + "step": 952 + }, + { + "epoch": 0.21971181556195965, + "grad_norm": 1.063629105933386, + "learning_rate": 1.9805708998307256e-06, + "loss": 0.5979200601577759, + "step": 953 + }, + { + "epoch": 0.21994236311239193, + "grad_norm": 0.9035517877439286, + "learning_rate": 1.9804960566326045e-06, + "loss": 0.5606704950332642, + "step": 954 + }, + { + "epoch": 0.2201729106628242, + "grad_norm": 0.9445059191114737, + "learning_rate": 1.9804210709786057e-06, + "loss": 0.5354186296463013, + "step": 955 + }, + { + "epoch": 0.2204034582132565, + "grad_norm": 0.9121007186425357, + "learning_rate": 1.980345942879625e-06, + "loss": 0.5811333060264587, + "step": 956 + }, + { + "epoch": 0.22063400576368877, + "grad_norm": 0.9064710646915293, + "learning_rate": 1.980270672346577e-06, + "loss": 0.6390595436096191, + "step": 957 + }, + { + "epoch": 0.22086455331412103, + "grad_norm": 0.9905980945153653, + "learning_rate": 1.9801952593903983e-06, + "loss": 0.5870425701141357, + "step": 958 + }, + { + "epoch": 0.2210951008645533, + "grad_norm": 1.073439369948722, + "learning_rate": 1.9801197040220443e-06, + "loss": 0.7697482109069824, + "step": 959 + }, + { + "epoch": 0.2213256484149856, + "grad_norm": 0.9893905657388904, + "learning_rate": 1.980044006252494e-06, + "loss": 0.4672078490257263, + "step": 960 + }, + { + "epoch": 0.22155619596541787, + "grad_norm": 1.0315754655846494, + "learning_rate": 1.979968166092744e-06, + "loss": 0.7151461839675903, + "step": 961 + }, + { + "epoch": 0.22178674351585015, + "grad_norm": 1.0107132012823437, + "learning_rate": 1.9798921835538147e-06, + "loss": 0.6638733148574829, + "step": 962 + }, + { + "epoch": 0.22201729106628243, + "grad_norm": 1.0380957382757268, + "learning_rate": 1.979816058646745e-06, + "loss": 0.723508358001709, + "step": 963 + }, + { + "epoch": 0.2222478386167147, + "grad_norm": 0.9188622865566421, + "learning_rate": 1.979739791382594e-06, + "loss": 0.5891420841217041, + "step": 964 + }, + { + "epoch": 0.22247838616714696, + "grad_norm": 0.9724575741379544, + "learning_rate": 1.979663381772443e-06, + "loss": 0.6365354061126709, + "step": 965 + }, + { + "epoch": 0.22270893371757924, + "grad_norm": 0.9422715444997953, + "learning_rate": 1.979586829827395e-06, + "loss": 0.5367093682289124, + "step": 966 + }, + { + "epoch": 0.22293948126801152, + "grad_norm": 1.0633311974899067, + "learning_rate": 1.9795101355585702e-06, + "loss": 0.5783185958862305, + "step": 967 + }, + { + "epoch": 0.2231700288184438, + "grad_norm": 1.0645482046561703, + "learning_rate": 1.979433298977113e-06, + "loss": 0.5637539625167847, + "step": 968 + }, + { + "epoch": 0.22340057636887609, + "grad_norm": 1.1944531298271532, + "learning_rate": 1.979356320094186e-06, + "loss": 0.6345614194869995, + "step": 969 + }, + { + "epoch": 0.22363112391930837, + "grad_norm": 0.8516167233031302, + "learning_rate": 1.9792791989209734e-06, + "loss": 0.6066634058952332, + "step": 970 + }, + { + "epoch": 0.22386167146974065, + "grad_norm": 0.8550340640468586, + "learning_rate": 1.9792019354686807e-06, + "loss": 0.5921822786331177, + "step": 971 + }, + { + "epoch": 0.2240922190201729, + "grad_norm": 1.0592142868604846, + "learning_rate": 1.9791245297485334e-06, + "loss": 0.6421139240264893, + "step": 972 + }, + { + "epoch": 0.22432276657060518, + "grad_norm": 0.9092290184495944, + "learning_rate": 1.9790469817717775e-06, + "loss": 0.5608785152435303, + "step": 973 + }, + { + "epoch": 0.22455331412103746, + "grad_norm": 0.9137300340422447, + "learning_rate": 1.97896929154968e-06, + "loss": 0.5314462184906006, + "step": 974 + }, + { + "epoch": 0.22478386167146974, + "grad_norm": 1.1560347021267936, + "learning_rate": 1.9788914590935284e-06, + "loss": 0.6488084197044373, + "step": 975 + }, + { + "epoch": 0.22501440922190202, + "grad_norm": 1.1192487958571726, + "learning_rate": 1.978813484414631e-06, + "loss": 0.6000053882598877, + "step": 976 + }, + { + "epoch": 0.2252449567723343, + "grad_norm": 0.8931287325899492, + "learning_rate": 1.9787353675243162e-06, + "loss": 0.624097466468811, + "step": 977 + }, + { + "epoch": 0.22547550432276656, + "grad_norm": 0.8981584252602057, + "learning_rate": 1.9786571084339346e-06, + "loss": 0.5497676730155945, + "step": 978 + }, + { + "epoch": 0.22570605187319884, + "grad_norm": 1.032097826259714, + "learning_rate": 1.9785787071548558e-06, + "loss": 0.6013498902320862, + "step": 979 + }, + { + "epoch": 0.22593659942363112, + "grad_norm": 1.0473431651513534, + "learning_rate": 1.978500163698471e-06, + "loss": 0.5227783918380737, + "step": 980 + }, + { + "epoch": 0.2261671469740634, + "grad_norm": 1.0552898348513462, + "learning_rate": 1.9784214780761912e-06, + "loss": 0.5406474471092224, + "step": 981 + }, + { + "epoch": 0.22639769452449568, + "grad_norm": 1.178464281869854, + "learning_rate": 1.9783426502994495e-06, + "loss": 0.5630630254745483, + "step": 982 + }, + { + "epoch": 0.22662824207492796, + "grad_norm": 1.0296512675290248, + "learning_rate": 1.9782636803796975e-06, + "loss": 0.7161320447921753, + "step": 983 + }, + { + "epoch": 0.22685878962536024, + "grad_norm": 0.8220598435695787, + "learning_rate": 1.97818456832841e-06, + "loss": 0.6241968870162964, + "step": 984 + }, + { + "epoch": 0.2270893371757925, + "grad_norm": 1.0289641391925526, + "learning_rate": 1.97810531415708e-06, + "loss": 0.6150163412094116, + "step": 985 + }, + { + "epoch": 0.22731988472622477, + "grad_norm": 0.9798195379296696, + "learning_rate": 1.9780259178772236e-06, + "loss": 0.5523653626441956, + "step": 986 + }, + { + "epoch": 0.22755043227665706, + "grad_norm": 0.9247757003196834, + "learning_rate": 1.977946379500375e-06, + "loss": 0.6309713125228882, + "step": 987 + }, + { + "epoch": 0.22778097982708934, + "grad_norm": 0.8708946148848759, + "learning_rate": 1.977866699038091e-06, + "loss": 0.572121798992157, + "step": 988 + }, + { + "epoch": 0.22801152737752162, + "grad_norm": 1.0646066439636737, + "learning_rate": 1.9777868765019477e-06, + "loss": 0.5242247581481934, + "step": 989 + }, + { + "epoch": 0.2282420749279539, + "grad_norm": 0.9115141253954517, + "learning_rate": 1.9777069119035435e-06, + "loss": 0.6013658046722412, + "step": 990 + }, + { + "epoch": 0.22847262247838618, + "grad_norm": 1.1012219089888924, + "learning_rate": 1.977626805254496e-06, + "loss": 0.5432295799255371, + "step": 991 + }, + { + "epoch": 0.22870317002881843, + "grad_norm": 1.2118421806825892, + "learning_rate": 1.9775465565664436e-06, + "loss": 0.5512800216674805, + "step": 992 + }, + { + "epoch": 0.2289337175792507, + "grad_norm": 1.2153606816751235, + "learning_rate": 1.9774661658510454e-06, + "loss": 0.6150898337364197, + "step": 993 + }, + { + "epoch": 0.229164265129683, + "grad_norm": 0.8320247490731437, + "learning_rate": 1.977385633119982e-06, + "loss": 0.6104747653007507, + "step": 994 + }, + { + "epoch": 0.22939481268011527, + "grad_norm": 1.1044827298829611, + "learning_rate": 1.9773049583849537e-06, + "loss": 0.7229997515678406, + "step": 995 + }, + { + "epoch": 0.22962536023054755, + "grad_norm": 0.8296089967891425, + "learning_rate": 1.9772241416576814e-06, + "loss": 0.49942266941070557, + "step": 996 + }, + { + "epoch": 0.22985590778097983, + "grad_norm": 1.0511867199438154, + "learning_rate": 1.9771431829499075e-06, + "loss": 0.6764867901802063, + "step": 997 + }, + { + "epoch": 0.23008645533141212, + "grad_norm": 0.8769497757034282, + "learning_rate": 1.9770620822733943e-06, + "loss": 0.5971235036849976, + "step": 998 + }, + { + "epoch": 0.23031700288184437, + "grad_norm": 1.1909353124607884, + "learning_rate": 1.9769808396399244e-06, + "loss": 0.49988481402397156, + "step": 999 + }, + { + "epoch": 0.23054755043227665, + "grad_norm": 1.0028814447039645, + "learning_rate": 1.976899455061302e-06, + "loss": 0.5656229853630066, + "step": 1000 + }, + { + "epoch": 0.23077809798270893, + "grad_norm": 0.9811524671709572, + "learning_rate": 1.9768179285493505e-06, + "loss": 0.618227481842041, + "step": 1001 + }, + { + "epoch": 0.2310086455331412, + "grad_norm": 0.8840369979000756, + "learning_rate": 1.9767362601159163e-06, + "loss": 0.599855363368988, + "step": 1002 + }, + { + "epoch": 0.2312391930835735, + "grad_norm": 0.9428020117039089, + "learning_rate": 1.9766544497728645e-06, + "loss": 0.6400339603424072, + "step": 1003 + }, + { + "epoch": 0.23146974063400577, + "grad_norm": 0.7764568185943042, + "learning_rate": 1.9765724975320806e-06, + "loss": 0.5003043413162231, + "step": 1004 + }, + { + "epoch": 0.23170028818443805, + "grad_norm": 1.0854087202550196, + "learning_rate": 1.976490403405472e-06, + "loss": 0.6133515238761902, + "step": 1005 + }, + { + "epoch": 0.2319308357348703, + "grad_norm": 1.2022521939980604, + "learning_rate": 1.9764081674049664e-06, + "loss": 0.6931927800178528, + "step": 1006 + }, + { + "epoch": 0.23216138328530259, + "grad_norm": 0.8703497424794447, + "learning_rate": 1.976325789542511e-06, + "loss": 0.6216111779212952, + "step": 1007 + }, + { + "epoch": 0.23239193083573487, + "grad_norm": 1.0933161640642175, + "learning_rate": 1.976243269830075e-06, + "loss": 0.588605523109436, + "step": 1008 + }, + { + "epoch": 0.23262247838616715, + "grad_norm": 0.9817338350394261, + "learning_rate": 1.9761606082796476e-06, + "loss": 0.582482635974884, + "step": 1009 + }, + { + "epoch": 0.23285302593659943, + "grad_norm": 0.8848970658107764, + "learning_rate": 1.9760778049032386e-06, + "loss": 0.6296440362930298, + "step": 1010 + }, + { + "epoch": 0.2330835734870317, + "grad_norm": 1.0232733748116352, + "learning_rate": 1.9759948597128785e-06, + "loss": 0.6383839845657349, + "step": 1011 + }, + { + "epoch": 0.233314121037464, + "grad_norm": 1.0868243233359214, + "learning_rate": 1.975911772720618e-06, + "loss": 0.7672706842422485, + "step": 1012 + }, + { + "epoch": 0.23354466858789624, + "grad_norm": 1.008765447219668, + "learning_rate": 1.9758285439385295e-06, + "loss": 0.6237273216247559, + "step": 1013 + }, + { + "epoch": 0.23377521613832852, + "grad_norm": 0.9625532663927989, + "learning_rate": 1.975745173378705e-06, + "loss": 0.49947991967201233, + "step": 1014 + }, + { + "epoch": 0.2340057636887608, + "grad_norm": 1.1387405141282927, + "learning_rate": 1.975661661053257e-06, + "loss": 0.3864251375198364, + "step": 1015 + }, + { + "epoch": 0.23423631123919308, + "grad_norm": 1.0401714631169554, + "learning_rate": 1.9755780069743194e-06, + "loss": 0.6911368370056152, + "step": 1016 + }, + { + "epoch": 0.23446685878962537, + "grad_norm": 0.912344579821443, + "learning_rate": 1.9754942111540463e-06, + "loss": 0.5999586582183838, + "step": 1017 + }, + { + "epoch": 0.23469740634005765, + "grad_norm": 0.8660990428846996, + "learning_rate": 1.9754102736046118e-06, + "loss": 0.5257225632667542, + "step": 1018 + }, + { + "epoch": 0.23492795389048993, + "grad_norm": 0.8213356313909403, + "learning_rate": 1.975326194338212e-06, + "loss": 0.5269970893859863, + "step": 1019 + }, + { + "epoch": 0.23515850144092218, + "grad_norm": 1.197368044414957, + "learning_rate": 1.975241973367062e-06, + "loss": 0.6317660212516785, + "step": 1020 + }, + { + "epoch": 0.23538904899135446, + "grad_norm": 0.9754684221461335, + "learning_rate": 1.9751576107033985e-06, + "loss": 0.4785078763961792, + "step": 1021 + }, + { + "epoch": 0.23561959654178674, + "grad_norm": 1.1345512879493804, + "learning_rate": 1.9750731063594787e-06, + "loss": 0.647568941116333, + "step": 1022 + }, + { + "epoch": 0.23585014409221902, + "grad_norm": 1.0412315528593523, + "learning_rate": 1.9749884603475798e-06, + "loss": 0.6356335878372192, + "step": 1023 + }, + { + "epoch": 0.2360806916426513, + "grad_norm": 0.8615851871429264, + "learning_rate": 1.9749036726800003e-06, + "loss": 0.5727132558822632, + "step": 1024 + }, + { + "epoch": 0.23631123919308358, + "grad_norm": 1.0321299541613722, + "learning_rate": 1.9748187433690587e-06, + "loss": 0.5090360641479492, + "step": 1025 + }, + { + "epoch": 0.23654178674351586, + "grad_norm": 0.884453476481366, + "learning_rate": 1.974733672427095e-06, + "loss": 0.5971696376800537, + "step": 1026 + }, + { + "epoch": 0.23677233429394812, + "grad_norm": 0.9510484245916626, + "learning_rate": 1.974648459866468e-06, + "loss": 0.5483651161193848, + "step": 1027 + }, + { + "epoch": 0.2370028818443804, + "grad_norm": 1.200960551078261, + "learning_rate": 1.9745631056995594e-06, + "loss": 0.5528438091278076, + "step": 1028 + }, + { + "epoch": 0.23723342939481268, + "grad_norm": 1.3305054944312353, + "learning_rate": 1.9744776099387695e-06, + "loss": 0.6233264207839966, + "step": 1029 + }, + { + "epoch": 0.23746397694524496, + "grad_norm": 0.946050950875143, + "learning_rate": 1.97439197259652e-06, + "loss": 0.6005399823188782, + "step": 1030 + }, + { + "epoch": 0.23769452449567724, + "grad_norm": 0.9374072373092667, + "learning_rate": 1.9743061936852537e-06, + "loss": 0.5596457719802856, + "step": 1031 + }, + { + "epoch": 0.23792507204610952, + "grad_norm": 0.9984185508791168, + "learning_rate": 1.9742202732174328e-06, + "loss": 0.6051905155181885, + "step": 1032 + }, + { + "epoch": 0.23815561959654177, + "grad_norm": 1.0283665443374947, + "learning_rate": 1.974134211205541e-06, + "loss": 0.6691153049468994, + "step": 1033 + }, + { + "epoch": 0.23838616714697405, + "grad_norm": 0.9603102228958054, + "learning_rate": 1.9740480076620814e-06, + "loss": 0.5938076972961426, + "step": 1034 + }, + { + "epoch": 0.23861671469740633, + "grad_norm": 0.8705818698994734, + "learning_rate": 1.9739616625995796e-06, + "loss": 0.525333046913147, + "step": 1035 + }, + { + "epoch": 0.23884726224783862, + "grad_norm": 0.8540807816466253, + "learning_rate": 1.97387517603058e-06, + "loss": 0.6196523308753967, + "step": 1036 + }, + { + "epoch": 0.2390778097982709, + "grad_norm": 0.9798479702089192, + "learning_rate": 1.9737885479676484e-06, + "loss": 0.626924991607666, + "step": 1037 + }, + { + "epoch": 0.23930835734870318, + "grad_norm": 0.999826886746383, + "learning_rate": 1.973701778423371e-06, + "loss": 0.6047407984733582, + "step": 1038 + }, + { + "epoch": 0.23953890489913546, + "grad_norm": 0.9299360668604228, + "learning_rate": 1.9736148674103543e-06, + "loss": 0.5994965434074402, + "step": 1039 + }, + { + "epoch": 0.2397694524495677, + "grad_norm": 1.0820368371686164, + "learning_rate": 1.9735278149412257e-06, + "loss": 0.4967964291572571, + "step": 1040 + }, + { + "epoch": 0.24, + "grad_norm": 0.9325838905339232, + "learning_rate": 1.973440621028633e-06, + "loss": 0.5955591201782227, + "step": 1041 + }, + { + "epoch": 0.24023054755043227, + "grad_norm": 0.9238934455540466, + "learning_rate": 1.9733532856852444e-06, + "loss": 0.5286005139350891, + "step": 1042 + }, + { + "epoch": 0.24046109510086455, + "grad_norm": 0.7955501791954424, + "learning_rate": 1.9732658089237494e-06, + "loss": 0.5257104635238647, + "step": 1043 + }, + { + "epoch": 0.24069164265129683, + "grad_norm": 1.0432386643488358, + "learning_rate": 1.9731781907568564e-06, + "loss": 0.6578767895698547, + "step": 1044 + }, + { + "epoch": 0.24092219020172911, + "grad_norm": 1.081984606268088, + "learning_rate": 1.9730904311972963e-06, + "loss": 0.5154295563697815, + "step": 1045 + }, + { + "epoch": 0.2411527377521614, + "grad_norm": 0.9939154656713256, + "learning_rate": 1.97300253025782e-06, + "loss": 0.5729039311408997, + "step": 1046 + }, + { + "epoch": 0.24138328530259365, + "grad_norm": 1.056713721812048, + "learning_rate": 1.9729144879511976e-06, + "loss": 0.5996200442314148, + "step": 1047 + }, + { + "epoch": 0.24161383285302593, + "grad_norm": 0.9545204667371253, + "learning_rate": 1.9728263042902207e-06, + "loss": 0.46382981538772583, + "step": 1048 + }, + { + "epoch": 0.2418443804034582, + "grad_norm": 0.970105230555475, + "learning_rate": 1.9727379792877024e-06, + "loss": 0.5767467617988586, + "step": 1049 + }, + { + "epoch": 0.2420749279538905, + "grad_norm": 1.0533787261450176, + "learning_rate": 1.9726495129564747e-06, + "loss": 0.5285289287567139, + "step": 1050 + }, + { + "epoch": 0.24230547550432277, + "grad_norm": 1.1516900956240101, + "learning_rate": 1.972560905309391e-06, + "loss": 0.5614134073257446, + "step": 1051 + }, + { + "epoch": 0.24253602305475505, + "grad_norm": 0.9565577766108253, + "learning_rate": 1.9724721563593253e-06, + "loss": 0.5901243686676025, + "step": 1052 + }, + { + "epoch": 0.24276657060518733, + "grad_norm": 1.0833385993994633, + "learning_rate": 1.9723832661191716e-06, + "loss": 0.4962998628616333, + "step": 1053 + }, + { + "epoch": 0.24299711815561958, + "grad_norm": 0.9970132155639208, + "learning_rate": 1.9722942346018446e-06, + "loss": 0.42762500047683716, + "step": 1054 + }, + { + "epoch": 0.24322766570605187, + "grad_norm": 0.9766037381813959, + "learning_rate": 1.9722050618202802e-06, + "loss": 0.5721521377563477, + "step": 1055 + }, + { + "epoch": 0.24345821325648415, + "grad_norm": 1.206042721018001, + "learning_rate": 1.972115747787434e-06, + "loss": 0.6909880042076111, + "step": 1056 + }, + { + "epoch": 0.24368876080691643, + "grad_norm": 0.9431570922856867, + "learning_rate": 1.9720262925162823e-06, + "loss": 0.5736039876937866, + "step": 1057 + }, + { + "epoch": 0.2439193083573487, + "grad_norm": 0.8314919230697462, + "learning_rate": 1.971936696019822e-06, + "loss": 0.5220572352409363, + "step": 1058 + }, + { + "epoch": 0.244149855907781, + "grad_norm": 1.4060669997055812, + "learning_rate": 1.971846958311071e-06, + "loss": 0.6336761116981506, + "step": 1059 + }, + { + "epoch": 0.24438040345821327, + "grad_norm": 0.8735700995362021, + "learning_rate": 1.9717570794030663e-06, + "loss": 0.46690547466278076, + "step": 1060 + }, + { + "epoch": 0.24461095100864552, + "grad_norm": 0.9452291694943687, + "learning_rate": 1.971667059308867e-06, + "loss": 0.5011268854141235, + "step": 1061 + }, + { + "epoch": 0.2448414985590778, + "grad_norm": 1.026129576805315, + "learning_rate": 1.971576898041552e-06, + "loss": 0.6146481037139893, + "step": 1062 + }, + { + "epoch": 0.24507204610951008, + "grad_norm": 1.1263839223339915, + "learning_rate": 1.9714865956142216e-06, + "loss": 0.6958246231079102, + "step": 1063 + }, + { + "epoch": 0.24530259365994236, + "grad_norm": 1.0566004679295709, + "learning_rate": 1.9713961520399943e-06, + "loss": 0.6039535403251648, + "step": 1064 + }, + { + "epoch": 0.24553314121037464, + "grad_norm": 1.0074912611483957, + "learning_rate": 1.9713055673320116e-06, + "loss": 0.5674936771392822, + "step": 1065 + }, + { + "epoch": 0.24576368876080693, + "grad_norm": 0.9022345989400322, + "learning_rate": 1.9712148415034343e-06, + "loss": 0.573884129524231, + "step": 1066 + }, + { + "epoch": 0.2459942363112392, + "grad_norm": 1.0703229553304763, + "learning_rate": 1.971123974567444e-06, + "loss": 0.5769180059432983, + "step": 1067 + }, + { + "epoch": 0.24622478386167146, + "grad_norm": 1.0813400566275309, + "learning_rate": 1.9710329665372423e-06, + "loss": 0.6574220657348633, + "step": 1068 + }, + { + "epoch": 0.24645533141210374, + "grad_norm": 0.9314089138220044, + "learning_rate": 1.970941817426052e-06, + "loss": 0.5505692362785339, + "step": 1069 + }, + { + "epoch": 0.24668587896253602, + "grad_norm": 1.1082972197850245, + "learning_rate": 1.970850527247116e-06, + "loss": 0.6399080157279968, + "step": 1070 + }, + { + "epoch": 0.2469164265129683, + "grad_norm": 1.30372883235568, + "learning_rate": 1.9707590960136983e-06, + "loss": 0.6340930461883545, + "step": 1071 + }, + { + "epoch": 0.24714697406340058, + "grad_norm": 0.8757882392932358, + "learning_rate": 1.9706675237390825e-06, + "loss": 0.534781277179718, + "step": 1072 + }, + { + "epoch": 0.24737752161383286, + "grad_norm": 1.159381670874605, + "learning_rate": 1.970575810436573e-06, + "loss": 0.5925683975219727, + "step": 1073 + }, + { + "epoch": 0.24760806916426514, + "grad_norm": 1.0150707063788142, + "learning_rate": 1.970483956119495e-06, + "loss": 0.6358739137649536, + "step": 1074 + }, + { + "epoch": 0.2478386167146974, + "grad_norm": 1.1453368323410489, + "learning_rate": 1.970391960801194e-06, + "loss": 0.6039003133773804, + "step": 1075 + }, + { + "epoch": 0.24806916426512968, + "grad_norm": 1.1201673932401737, + "learning_rate": 1.970299824495036e-06, + "loss": 0.5916771292686462, + "step": 1076 + }, + { + "epoch": 0.24829971181556196, + "grad_norm": 1.0794126617042616, + "learning_rate": 1.9702075472144067e-06, + "loss": 0.5258580446243286, + "step": 1077 + }, + { + "epoch": 0.24853025936599424, + "grad_norm": 0.8601905249462747, + "learning_rate": 1.9701151289727147e-06, + "loss": 0.6265199184417725, + "step": 1078 + }, + { + "epoch": 0.24876080691642652, + "grad_norm": 0.979702903534406, + "learning_rate": 1.9700225697833854e-06, + "loss": 0.579441249370575, + "step": 1079 + }, + { + "epoch": 0.2489913544668588, + "grad_norm": 0.8004324701001138, + "learning_rate": 1.969929869659868e-06, + "loss": 0.5297039747238159, + "step": 1080 + }, + { + "epoch": 0.24922190201729105, + "grad_norm": 1.0732237546296053, + "learning_rate": 1.9698370286156306e-06, + "loss": 0.5611151456832886, + "step": 1081 + }, + { + "epoch": 0.24945244956772333, + "grad_norm": 0.9095359949477679, + "learning_rate": 1.969744046664162e-06, + "loss": 0.5669015645980835, + "step": 1082 + }, + { + "epoch": 0.24968299711815561, + "grad_norm": 0.9098376279113534, + "learning_rate": 1.9696509238189715e-06, + "loss": 0.5561148524284363, + "step": 1083 + }, + { + "epoch": 0.2499135446685879, + "grad_norm": 0.8595555128128054, + "learning_rate": 1.9695576600935886e-06, + "loss": 0.5287264585494995, + "step": 1084 + }, + { + "epoch": 0.2501440922190202, + "grad_norm": 0.9117185255603872, + "learning_rate": 1.9694642555015638e-06, + "loss": 0.6170656681060791, + "step": 1085 + }, + { + "epoch": 0.25037463976945246, + "grad_norm": 1.0935336615242157, + "learning_rate": 1.969370710056468e-06, + "loss": 0.5874326229095459, + "step": 1086 + }, + { + "epoch": 0.25060518731988474, + "grad_norm": 1.042070981592915, + "learning_rate": 1.9692770237718924e-06, + "loss": 0.5682862997055054, + "step": 1087 + }, + { + "epoch": 0.250835734870317, + "grad_norm": 1.0488760267187054, + "learning_rate": 1.969183196661448e-06, + "loss": 0.5732690095901489, + "step": 1088 + }, + { + "epoch": 0.2510662824207493, + "grad_norm": 1.0767943466207117, + "learning_rate": 1.9690892287387675e-06, + "loss": 0.5902074575424194, + "step": 1089 + }, + { + "epoch": 0.2512968299711816, + "grad_norm": 2.0373106353221635, + "learning_rate": 1.9689951200175033e-06, + "loss": 0.6051011085510254, + "step": 1090 + }, + { + "epoch": 0.25152737752161386, + "grad_norm": 0.9633389270435931, + "learning_rate": 1.9689008705113283e-06, + "loss": 0.5677424669265747, + "step": 1091 + }, + { + "epoch": 0.2517579250720461, + "grad_norm": 1.198630695418835, + "learning_rate": 1.9688064802339364e-06, + "loss": 0.5889866352081299, + "step": 1092 + }, + { + "epoch": 0.25198847262247837, + "grad_norm": 0.8479895449997884, + "learning_rate": 1.968711949199041e-06, + "loss": 0.574920654296875, + "step": 1093 + }, + { + "epoch": 0.25221902017291065, + "grad_norm": 1.1022849092895963, + "learning_rate": 1.9686172774203765e-06, + "loss": 0.642555296421051, + "step": 1094 + }, + { + "epoch": 0.2524495677233429, + "grad_norm": 1.1981893904929435, + "learning_rate": 1.9685224649116985e-06, + "loss": 0.5177907347679138, + "step": 1095 + }, + { + "epoch": 0.2526801152737752, + "grad_norm": 0.9512396951241048, + "learning_rate": 1.968427511686781e-06, + "loss": 0.5089443922042847, + "step": 1096 + }, + { + "epoch": 0.2529106628242075, + "grad_norm": 1.285147110492746, + "learning_rate": 1.9683324177594205e-06, + "loss": 0.5840367078781128, + "step": 1097 + }, + { + "epoch": 0.25314121037463977, + "grad_norm": 0.9223951585699367, + "learning_rate": 1.9682371831434335e-06, + "loss": 0.5504645109176636, + "step": 1098 + }, + { + "epoch": 0.25337175792507205, + "grad_norm": 1.0341672284132668, + "learning_rate": 1.968141807852656e-06, + "loss": 0.5837891101837158, + "step": 1099 + }, + { + "epoch": 0.25360230547550433, + "grad_norm": 1.0197758826620191, + "learning_rate": 1.9680462919009453e-06, + "loss": 0.5304594039916992, + "step": 1100 + }, + { + "epoch": 0.2538328530259366, + "grad_norm": 1.1017033952693849, + "learning_rate": 1.9679506353021784e-06, + "loss": 0.6483093500137329, + "step": 1101 + }, + { + "epoch": 0.2540634005763689, + "grad_norm": 0.9880067001638037, + "learning_rate": 1.967854838070254e-06, + "loss": 0.5041504502296448, + "step": 1102 + }, + { + "epoch": 0.2542939481268012, + "grad_norm": 0.9447138676205955, + "learning_rate": 1.9677589002190897e-06, + "loss": 0.574493408203125, + "step": 1103 + }, + { + "epoch": 0.25452449567723345, + "grad_norm": 1.1268609072582123, + "learning_rate": 1.9676628217626244e-06, + "loss": 0.5675390958786011, + "step": 1104 + }, + { + "epoch": 0.2547550432276657, + "grad_norm": 0.8346824743228196, + "learning_rate": 1.967566602714818e-06, + "loss": 0.5008331537246704, + "step": 1105 + }, + { + "epoch": 0.25498559077809796, + "grad_norm": 1.2638039956043987, + "learning_rate": 1.967470243089649e-06, + "loss": 0.6198144555091858, + "step": 1106 + }, + { + "epoch": 0.25521613832853024, + "grad_norm": 1.0477413614815971, + "learning_rate": 1.9673737429011192e-06, + "loss": 0.6584227085113525, + "step": 1107 + }, + { + "epoch": 0.2554466858789625, + "grad_norm": 1.1653742789318777, + "learning_rate": 1.967277102163247e-06, + "loss": 0.6924588680267334, + "step": 1108 + }, + { + "epoch": 0.2556772334293948, + "grad_norm": 0.9489629504050993, + "learning_rate": 1.9671803208900743e-06, + "loss": 0.4881629943847656, + "step": 1109 + }, + { + "epoch": 0.2559077809798271, + "grad_norm": 1.0207283356812054, + "learning_rate": 1.967083399095663e-06, + "loss": 0.5229992866516113, + "step": 1110 + }, + { + "epoch": 0.25613832853025936, + "grad_norm": 0.9505227305729165, + "learning_rate": 1.9669863367940933e-06, + "loss": 0.5473639965057373, + "step": 1111 + }, + { + "epoch": 0.25636887608069164, + "grad_norm": 1.1682394417720268, + "learning_rate": 1.966889133999469e-06, + "loss": 0.6385387182235718, + "step": 1112 + }, + { + "epoch": 0.2565994236311239, + "grad_norm": 0.914948520803806, + "learning_rate": 1.966791790725911e-06, + "loss": 0.6966750621795654, + "step": 1113 + }, + { + "epoch": 0.2568299711815562, + "grad_norm": 1.0791431206016944, + "learning_rate": 1.966694306987564e-06, + "loss": 0.62409508228302, + "step": 1114 + }, + { + "epoch": 0.2570605187319885, + "grad_norm": 1.1529607219012756, + "learning_rate": 1.96659668279859e-06, + "loss": 0.6782431602478027, + "step": 1115 + }, + { + "epoch": 0.25729106628242077, + "grad_norm": 0.9868247966268149, + "learning_rate": 1.9664989181731736e-06, + "loss": 0.5746676921844482, + "step": 1116 + }, + { + "epoch": 0.25752161383285305, + "grad_norm": 1.1773953097235699, + "learning_rate": 1.9664010131255185e-06, + "loss": 0.622604250907898, + "step": 1117 + }, + { + "epoch": 0.2577521613832853, + "grad_norm": 0.9014524054572233, + "learning_rate": 1.9663029676698493e-06, + "loss": 0.5258551836013794, + "step": 1118 + }, + { + "epoch": 0.25798270893371755, + "grad_norm": 0.8949489094709896, + "learning_rate": 1.9662047818204113e-06, + "loss": 0.49920374155044556, + "step": 1119 + }, + { + "epoch": 0.25821325648414983, + "grad_norm": 0.9638407580541706, + "learning_rate": 1.96610645559147e-06, + "loss": 0.5653362274169922, + "step": 1120 + }, + { + "epoch": 0.2584438040345821, + "grad_norm": 0.8681512571682525, + "learning_rate": 1.9660079889973106e-06, + "loss": 0.635971188545227, + "step": 1121 + }, + { + "epoch": 0.2586743515850144, + "grad_norm": 0.9626228874806679, + "learning_rate": 1.9659093820522395e-06, + "loss": 0.5810589790344238, + "step": 1122 + }, + { + "epoch": 0.2589048991354467, + "grad_norm": 0.9265775915235015, + "learning_rate": 1.9658106347705837e-06, + "loss": 0.5082226991653442, + "step": 1123 + }, + { + "epoch": 0.25913544668587896, + "grad_norm": 1.166109865260175, + "learning_rate": 1.9657117471666893e-06, + "loss": 0.5961207151412964, + "step": 1124 + }, + { + "epoch": 0.25936599423631124, + "grad_norm": 0.8593078573487662, + "learning_rate": 1.9656127192549247e-06, + "loss": 0.5848531723022461, + "step": 1125 + }, + { + "epoch": 0.2595965417867435, + "grad_norm": 1.0267097223141122, + "learning_rate": 1.965513551049677e-06, + "loss": 0.5630871057510376, + "step": 1126 + }, + { + "epoch": 0.2598270893371758, + "grad_norm": 1.0523335755503282, + "learning_rate": 1.965414242565354e-06, + "loss": 0.5128393769264221, + "step": 1127 + }, + { + "epoch": 0.2600576368876081, + "grad_norm": 0.9700118998001345, + "learning_rate": 1.9653147938163846e-06, + "loss": 0.5488131046295166, + "step": 1128 + }, + { + "epoch": 0.26028818443804036, + "grad_norm": 0.9798403795119491, + "learning_rate": 1.9652152048172177e-06, + "loss": 0.5557059049606323, + "step": 1129 + }, + { + "epoch": 0.26051873198847264, + "grad_norm": 0.9637059141265438, + "learning_rate": 1.965115475582323e-06, + "loss": 0.49585384130477905, + "step": 1130 + }, + { + "epoch": 0.2607492795389049, + "grad_norm": 0.9836788141885479, + "learning_rate": 1.9650156061261887e-06, + "loss": 0.5157963037490845, + "step": 1131 + }, + { + "epoch": 0.2609798270893372, + "grad_norm": 0.9745401863616632, + "learning_rate": 1.964915596463326e-06, + "loss": 0.487041175365448, + "step": 1132 + }, + { + "epoch": 0.2612103746397694, + "grad_norm": 0.9355004815538274, + "learning_rate": 1.9648154466082655e-06, + "loss": 0.5007308721542358, + "step": 1133 + }, + { + "epoch": 0.2614409221902017, + "grad_norm": 1.044356333568675, + "learning_rate": 1.9647151565755567e-06, + "loss": 0.5466841459274292, + "step": 1134 + }, + { + "epoch": 0.261671469740634, + "grad_norm": 0.9908185235722049, + "learning_rate": 1.964614726379772e-06, + "loss": 0.6433593034744263, + "step": 1135 + }, + { + "epoch": 0.26190201729106627, + "grad_norm": 1.1188612298182607, + "learning_rate": 1.964514156035502e-06, + "loss": 0.6677781939506531, + "step": 1136 + }, + { + "epoch": 0.26213256484149855, + "grad_norm": 1.1970113817017574, + "learning_rate": 1.9644134455573584e-06, + "loss": 0.601581335067749, + "step": 1137 + }, + { + "epoch": 0.26236311239193083, + "grad_norm": 1.0197346026129679, + "learning_rate": 1.964312594959974e-06, + "loss": 0.512954592704773, + "step": 1138 + }, + { + "epoch": 0.2625936599423631, + "grad_norm": 1.106389545686755, + "learning_rate": 1.964211604258001e-06, + "loss": 0.5901329517364502, + "step": 1139 + }, + { + "epoch": 0.2628242074927954, + "grad_norm": 1.3151832775759593, + "learning_rate": 1.9641104734661126e-06, + "loss": 0.5046425461769104, + "step": 1140 + }, + { + "epoch": 0.2630547550432277, + "grad_norm": 0.7869009238004686, + "learning_rate": 1.9640092025990017e-06, + "loss": 0.5134037137031555, + "step": 1141 + }, + { + "epoch": 0.26328530259365995, + "grad_norm": 1.0902924836300172, + "learning_rate": 1.963907791671382e-06, + "loss": 0.512336015701294, + "step": 1142 + }, + { + "epoch": 0.26351585014409223, + "grad_norm": 0.9203546707939008, + "learning_rate": 1.9638062406979877e-06, + "loss": 0.5255711674690247, + "step": 1143 + }, + { + "epoch": 0.2637463976945245, + "grad_norm": 1.1168638198696839, + "learning_rate": 1.963704549693573e-06, + "loss": 0.5508878827095032, + "step": 1144 + }, + { + "epoch": 0.2639769452449568, + "grad_norm": 1.0728921970422391, + "learning_rate": 1.9636027186729122e-06, + "loss": 0.5642615556716919, + "step": 1145 + }, + { + "epoch": 0.2642074927953891, + "grad_norm": 0.9196228228553693, + "learning_rate": 1.9635007476508006e-06, + "loss": 0.5792304873466492, + "step": 1146 + }, + { + "epoch": 0.2644380403458213, + "grad_norm": 0.9053914378248806, + "learning_rate": 1.9633986366420534e-06, + "loss": 0.5633686780929565, + "step": 1147 + }, + { + "epoch": 0.2646685878962536, + "grad_norm": 0.9794987092956121, + "learning_rate": 1.9632963856615063e-06, + "loss": 0.5152523517608643, + "step": 1148 + }, + { + "epoch": 0.26489913544668586, + "grad_norm": 0.9488988181213043, + "learning_rate": 1.9631939947240155e-06, + "loss": 0.5173834562301636, + "step": 1149 + }, + { + "epoch": 0.26512968299711814, + "grad_norm": 1.224448518118437, + "learning_rate": 1.963091463844457e-06, + "loss": 0.6685044169425964, + "step": 1150 + }, + { + "epoch": 0.2653602305475504, + "grad_norm": 1.3480795736809403, + "learning_rate": 1.9629887930377277e-06, + "loss": 0.5302361249923706, + "step": 1151 + }, + { + "epoch": 0.2655907780979827, + "grad_norm": 1.0617602165821738, + "learning_rate": 1.9628859823187445e-06, + "loss": 0.6829941272735596, + "step": 1152 + }, + { + "epoch": 0.265821325648415, + "grad_norm": 0.9098011562920064, + "learning_rate": 1.962783031702445e-06, + "loss": 0.5956755876541138, + "step": 1153 + }, + { + "epoch": 0.26605187319884727, + "grad_norm": 1.0333072443812423, + "learning_rate": 1.9626799412037866e-06, + "loss": 0.5287376642227173, + "step": 1154 + }, + { + "epoch": 0.26628242074927955, + "grad_norm": 0.885618968970558, + "learning_rate": 1.962576710837747e-06, + "loss": 0.5352818965911865, + "step": 1155 + }, + { + "epoch": 0.2665129682997118, + "grad_norm": 1.1674068509683497, + "learning_rate": 1.962473340619325e-06, + "loss": 0.6345375776290894, + "step": 1156 + }, + { + "epoch": 0.2667435158501441, + "grad_norm": 0.9996146363746836, + "learning_rate": 1.962369830563539e-06, + "loss": 0.5909037590026855, + "step": 1157 + }, + { + "epoch": 0.2669740634005764, + "grad_norm": 0.9050400725217519, + "learning_rate": 1.962266180685428e-06, + "loss": 0.5138572454452515, + "step": 1158 + }, + { + "epoch": 0.26720461095100867, + "grad_norm": 1.070933486271155, + "learning_rate": 1.962162391000051e-06, + "loss": 0.47365278005599976, + "step": 1159 + }, + { + "epoch": 0.2674351585014409, + "grad_norm": 1.0003212071539427, + "learning_rate": 1.962058461522488e-06, + "loss": 0.5489984750747681, + "step": 1160 + }, + { + "epoch": 0.2676657060518732, + "grad_norm": 0.8947391835141708, + "learning_rate": 1.9619543922678383e-06, + "loss": 0.5615831017494202, + "step": 1161 + }, + { + "epoch": 0.26789625360230546, + "grad_norm": 1.1210395543673535, + "learning_rate": 1.9618501832512232e-06, + "loss": 0.6183937788009644, + "step": 1162 + }, + { + "epoch": 0.26812680115273774, + "grad_norm": 0.9636900062000658, + "learning_rate": 1.9617458344877815e-06, + "loss": 0.5763455033302307, + "step": 1163 + }, + { + "epoch": 0.26835734870317, + "grad_norm": 1.0756939610160325, + "learning_rate": 1.9616413459926755e-06, + "loss": 0.42940446734428406, + "step": 1164 + }, + { + "epoch": 0.2685878962536023, + "grad_norm": 1.0571097576051838, + "learning_rate": 1.9615367177810854e-06, + "loss": 0.5339791178703308, + "step": 1165 + }, + { + "epoch": 0.2688184438040346, + "grad_norm": 0.9952506277636632, + "learning_rate": 1.961431949868213e-06, + "loss": 0.4905571937561035, + "step": 1166 + }, + { + "epoch": 0.26904899135446686, + "grad_norm": 1.1894286703853092, + "learning_rate": 1.9613270422692796e-06, + "loss": 0.5400816202163696, + "step": 1167 + }, + { + "epoch": 0.26927953890489914, + "grad_norm": 1.2337518633584537, + "learning_rate": 1.9612219949995276e-06, + "loss": 0.55318284034729, + "step": 1168 + }, + { + "epoch": 0.2695100864553314, + "grad_norm": 0.9693884072277771, + "learning_rate": 1.9611168080742193e-06, + "loss": 0.5832536816596985, + "step": 1169 + }, + { + "epoch": 0.2697406340057637, + "grad_norm": 1.1742682617570424, + "learning_rate": 1.961011481508637e-06, + "loss": 0.6217491626739502, + "step": 1170 + }, + { + "epoch": 0.269971181556196, + "grad_norm": 0.8391232455044884, + "learning_rate": 1.960906015318084e-06, + "loss": 0.5328724384307861, + "step": 1171 + }, + { + "epoch": 0.27020172910662826, + "grad_norm": 1.0363005984112303, + "learning_rate": 1.960800409517882e-06, + "loss": 0.5914000272750854, + "step": 1172 + }, + { + "epoch": 0.27043227665706054, + "grad_norm": 0.8623610094735963, + "learning_rate": 1.9606946641233765e-06, + "loss": 0.5801169872283936, + "step": 1173 + }, + { + "epoch": 0.27066282420749277, + "grad_norm": 1.076472927325218, + "learning_rate": 1.96058877914993e-06, + "loss": 0.606931746006012, + "step": 1174 + }, + { + "epoch": 0.27089337175792505, + "grad_norm": 0.9432338977489813, + "learning_rate": 1.960482754612926e-06, + "loss": 0.492758572101593, + "step": 1175 + }, + { + "epoch": 0.27112391930835733, + "grad_norm": 0.8217955088144805, + "learning_rate": 1.9603765905277705e-06, + "loss": 0.507538914680481, + "step": 1176 + }, + { + "epoch": 0.2713544668587896, + "grad_norm": 0.9913040193455875, + "learning_rate": 1.9602702869098863e-06, + "loss": 0.4696667790412903, + "step": 1177 + }, + { + "epoch": 0.2715850144092219, + "grad_norm": 1.120659252201242, + "learning_rate": 1.9601638437747193e-06, + "loss": 0.6008190512657166, + "step": 1178 + }, + { + "epoch": 0.2718155619596542, + "grad_norm": 0.9398480673155941, + "learning_rate": 1.960057261137734e-06, + "loss": 0.6491943597793579, + "step": 1179 + }, + { + "epoch": 0.27204610951008645, + "grad_norm": 0.9683862753304229, + "learning_rate": 1.9599505390144158e-06, + "loss": 0.5670884847640991, + "step": 1180 + }, + { + "epoch": 0.27227665706051873, + "grad_norm": 0.9863251399458569, + "learning_rate": 1.959843677420271e-06, + "loss": 0.6009481549263, + "step": 1181 + }, + { + "epoch": 0.272507204610951, + "grad_norm": 1.0320129585811584, + "learning_rate": 1.9597366763708244e-06, + "loss": 0.6310709714889526, + "step": 1182 + }, + { + "epoch": 0.2727377521613833, + "grad_norm": 1.0214391551855733, + "learning_rate": 1.9596295358816227e-06, + "loss": 0.6486451029777527, + "step": 1183 + }, + { + "epoch": 0.2729682997118156, + "grad_norm": 1.2065179679905316, + "learning_rate": 1.9595222559682323e-06, + "loss": 0.5718737840652466, + "step": 1184 + }, + { + "epoch": 0.27319884726224786, + "grad_norm": 1.0556460449279068, + "learning_rate": 1.95941483664624e-06, + "loss": 0.5779617428779602, + "step": 1185 + }, + { + "epoch": 0.27342939481268014, + "grad_norm": 0.9802737305305549, + "learning_rate": 1.9593072779312522e-06, + "loss": 0.5744156837463379, + "step": 1186 + }, + { + "epoch": 0.2736599423631124, + "grad_norm": 1.0517062665649357, + "learning_rate": 1.959199579838897e-06, + "loss": 0.5460283756256104, + "step": 1187 + }, + { + "epoch": 0.27389048991354464, + "grad_norm": 1.1680872558774986, + "learning_rate": 1.9590917423848205e-06, + "loss": 0.6543044447898865, + "step": 1188 + }, + { + "epoch": 0.2741210374639769, + "grad_norm": 1.224778428725764, + "learning_rate": 1.9589837655846913e-06, + "loss": 0.5198811292648315, + "step": 1189 + }, + { + "epoch": 0.2743515850144092, + "grad_norm": 1.251778848697816, + "learning_rate": 1.9588756494541974e-06, + "loss": 0.5084035396575928, + "step": 1190 + }, + { + "epoch": 0.2745821325648415, + "grad_norm": 1.0710257569853987, + "learning_rate": 1.958767394009046e-06, + "loss": 0.5904719829559326, + "step": 1191 + }, + { + "epoch": 0.27481268011527377, + "grad_norm": 0.9829228316564669, + "learning_rate": 1.9586589992649663e-06, + "loss": 0.5030600428581238, + "step": 1192 + }, + { + "epoch": 0.27504322766570605, + "grad_norm": 0.9873617968914287, + "learning_rate": 1.958550465237707e-06, + "loss": 0.5184324383735657, + "step": 1193 + }, + { + "epoch": 0.27527377521613833, + "grad_norm": 0.9906489399793428, + "learning_rate": 1.9584417919430368e-06, + "loss": 0.5646244287490845, + "step": 1194 + }, + { + "epoch": 0.2755043227665706, + "grad_norm": 0.9414859170655192, + "learning_rate": 1.9583329793967446e-06, + "loss": 0.5001581907272339, + "step": 1195 + }, + { + "epoch": 0.2757348703170029, + "grad_norm": 1.084929589963752, + "learning_rate": 1.95822402761464e-06, + "loss": 0.5051171183586121, + "step": 1196 + }, + { + "epoch": 0.27596541786743517, + "grad_norm": 1.0285988722389665, + "learning_rate": 1.9581149366125517e-06, + "loss": 0.5530160069465637, + "step": 1197 + }, + { + "epoch": 0.27619596541786745, + "grad_norm": 1.0520557763319474, + "learning_rate": 1.9580057064063305e-06, + "loss": 0.614437460899353, + "step": 1198 + }, + { + "epoch": 0.27642651296829973, + "grad_norm": 1.0793234079478642, + "learning_rate": 1.9578963370118463e-06, + "loss": 0.5519070625305176, + "step": 1199 + }, + { + "epoch": 0.276657060518732, + "grad_norm": 1.0375188615074011, + "learning_rate": 1.9577868284449894e-06, + "loss": 0.5517419576644897, + "step": 1200 + }, + { + "epoch": 0.2768876080691643, + "grad_norm": 1.1157623296809824, + "learning_rate": 1.9576771807216692e-06, + "loss": 0.5128840804100037, + "step": 1201 + }, + { + "epoch": 0.2771181556195965, + "grad_norm": 1.1639234270532368, + "learning_rate": 1.9575673938578177e-06, + "loss": 0.535677433013916, + "step": 1202 + }, + { + "epoch": 0.2773487031700288, + "grad_norm": 1.0321913645714726, + "learning_rate": 1.957457467869385e-06, + "loss": 0.3944365680217743, + "step": 1203 + }, + { + "epoch": 0.2775792507204611, + "grad_norm": 1.1284561091398966, + "learning_rate": 1.957347402772343e-06, + "loss": 0.5461306571960449, + "step": 1204 + }, + { + "epoch": 0.27780979827089336, + "grad_norm": 1.106350357831354, + "learning_rate": 1.9572371985826817e-06, + "loss": 0.5643556714057922, + "step": 1205 + }, + { + "epoch": 0.27804034582132564, + "grad_norm": 0.9942891425121166, + "learning_rate": 1.957126855316414e-06, + "loss": 0.5576694011688232, + "step": 1206 + }, + { + "epoch": 0.2782708933717579, + "grad_norm": 1.0327594151382022, + "learning_rate": 1.9570163729895705e-06, + "loss": 0.4932776689529419, + "step": 1207 + }, + { + "epoch": 0.2785014409221902, + "grad_norm": 1.2617809472764765, + "learning_rate": 1.956905751618204e-06, + "loss": 0.5831997394561768, + "step": 1208 + }, + { + "epoch": 0.2787319884726225, + "grad_norm": 1.020903077723172, + "learning_rate": 1.9567949912183865e-06, + "loss": 0.5535416007041931, + "step": 1209 + }, + { + "epoch": 0.27896253602305476, + "grad_norm": 0.9795082042300237, + "learning_rate": 1.9566840918062096e-06, + "loss": 0.5003511905670166, + "step": 1210 + }, + { + "epoch": 0.27919308357348704, + "grad_norm": 1.1445109073320356, + "learning_rate": 1.9565730533977866e-06, + "loss": 0.5496214628219604, + "step": 1211 + }, + { + "epoch": 0.2794236311239193, + "grad_norm": 1.2037504849326626, + "learning_rate": 1.95646187600925e-06, + "loss": 0.5799363851547241, + "step": 1212 + }, + { + "epoch": 0.2796541786743516, + "grad_norm": 1.0179191940873225, + "learning_rate": 1.9563505596567524e-06, + "loss": 0.520091712474823, + "step": 1213 + }, + { + "epoch": 0.2798847262247839, + "grad_norm": 1.1205541262351417, + "learning_rate": 1.9562391043564674e-06, + "loss": 0.6219311952590942, + "step": 1214 + }, + { + "epoch": 0.2801152737752161, + "grad_norm": 1.1950602989006576, + "learning_rate": 1.9561275101245882e-06, + "loss": 0.5564324259757996, + "step": 1215 + }, + { + "epoch": 0.2803458213256484, + "grad_norm": 1.0120577427828765, + "learning_rate": 1.956015776977328e-06, + "loss": 0.5743024349212646, + "step": 1216 + }, + { + "epoch": 0.2805763688760807, + "grad_norm": 1.017164461130754, + "learning_rate": 1.955903904930921e-06, + "loss": 0.5233654379844666, + "step": 1217 + }, + { + "epoch": 0.28080691642651295, + "grad_norm": 0.9067723518704797, + "learning_rate": 1.9557918940016204e-06, + "loss": 0.4716556668281555, + "step": 1218 + }, + { + "epoch": 0.28103746397694523, + "grad_norm": 1.2253551599931534, + "learning_rate": 1.9556797442057002e-06, + "loss": 0.6010521650314331, + "step": 1219 + }, + { + "epoch": 0.2812680115273775, + "grad_norm": 0.8966324233540468, + "learning_rate": 1.9555674555594553e-06, + "loss": 0.5651501417160034, + "step": 1220 + }, + { + "epoch": 0.2814985590778098, + "grad_norm": 0.9432307335638032, + "learning_rate": 1.9554550280791994e-06, + "loss": 0.5450448393821716, + "step": 1221 + }, + { + "epoch": 0.2817291066282421, + "grad_norm": 1.0874841889552394, + "learning_rate": 1.9553424617812675e-06, + "loss": 0.5615352392196655, + "step": 1222 + }, + { + "epoch": 0.28195965417867436, + "grad_norm": 1.0554961372199343, + "learning_rate": 1.9552297566820143e-06, + "loss": 0.5538485050201416, + "step": 1223 + }, + { + "epoch": 0.28219020172910664, + "grad_norm": 0.8618486691105575, + "learning_rate": 1.9551169127978145e-06, + "loss": 0.5403180122375488, + "step": 1224 + }, + { + "epoch": 0.2824207492795389, + "grad_norm": 1.0882900408954626, + "learning_rate": 1.955003930145063e-06, + "loss": 0.6076033115386963, + "step": 1225 + }, + { + "epoch": 0.2826512968299712, + "grad_norm": 0.9873645747206957, + "learning_rate": 1.954890808740175e-06, + "loss": 0.6477121114730835, + "step": 1226 + }, + { + "epoch": 0.2828818443804035, + "grad_norm": 1.5079810049528974, + "learning_rate": 1.954777548599586e-06, + "loss": 0.601321816444397, + "step": 1227 + }, + { + "epoch": 0.28311239193083576, + "grad_norm": 0.8957891169447902, + "learning_rate": 1.954664149739752e-06, + "loss": 0.4857567250728607, + "step": 1228 + }, + { + "epoch": 0.283342939481268, + "grad_norm": 1.0502205225822576, + "learning_rate": 1.954550612177148e-06, + "loss": 0.5555423498153687, + "step": 1229 + }, + { + "epoch": 0.28357348703170027, + "grad_norm": 1.1164653543013627, + "learning_rate": 1.95443693592827e-06, + "loss": 0.4719756543636322, + "step": 1230 + }, + { + "epoch": 0.28380403458213255, + "grad_norm": 1.0822345938438416, + "learning_rate": 1.9543231210096337e-06, + "loss": 0.5177173018455505, + "step": 1231 + }, + { + "epoch": 0.28403458213256483, + "grad_norm": 1.275308044022549, + "learning_rate": 1.954209167437776e-06, + "loss": 0.5307407975196838, + "step": 1232 + }, + { + "epoch": 0.2842651296829971, + "grad_norm": 0.9179700709062559, + "learning_rate": 1.9540950752292525e-06, + "loss": 0.49399054050445557, + "step": 1233 + }, + { + "epoch": 0.2844956772334294, + "grad_norm": 1.0252223662898894, + "learning_rate": 1.95398084440064e-06, + "loss": 0.5200883150100708, + "step": 1234 + }, + { + "epoch": 0.28472622478386167, + "grad_norm": 1.292162031598749, + "learning_rate": 1.953866474968535e-06, + "loss": 0.576574444770813, + "step": 1235 + }, + { + "epoch": 0.28495677233429395, + "grad_norm": 1.0138321768032905, + "learning_rate": 1.953751966949554e-06, + "loss": 0.5668514966964722, + "step": 1236 + }, + { + "epoch": 0.28518731988472623, + "grad_norm": 1.3589401298927464, + "learning_rate": 1.9536373203603334e-06, + "loss": 0.5539048910140991, + "step": 1237 + }, + { + "epoch": 0.2854178674351585, + "grad_norm": 1.2048300056875207, + "learning_rate": 1.953522535217531e-06, + "loss": 0.5473074913024902, + "step": 1238 + }, + { + "epoch": 0.2856484149855908, + "grad_norm": 0.9738586778260675, + "learning_rate": 1.953407611537823e-06, + "loss": 0.5185353755950928, + "step": 1239 + }, + { + "epoch": 0.2858789625360231, + "grad_norm": 0.9337692843468727, + "learning_rate": 1.953292549337908e-06, + "loss": 0.5476157665252686, + "step": 1240 + }, + { + "epoch": 0.28610951008645535, + "grad_norm": 1.0006717621648324, + "learning_rate": 1.9531773486345024e-06, + "loss": 0.5680351257324219, + "step": 1241 + }, + { + "epoch": 0.28634005763688763, + "grad_norm": 1.040109549295159, + "learning_rate": 1.9530620094443435e-06, + "loss": 0.5800622701644897, + "step": 1242 + }, + { + "epoch": 0.28657060518731986, + "grad_norm": 1.132839901571783, + "learning_rate": 1.952946531784189e-06, + "loss": 0.625177800655365, + "step": 1243 + }, + { + "epoch": 0.28680115273775214, + "grad_norm": 0.9379385742897268, + "learning_rate": 1.952830915670817e-06, + "loss": 0.5468524098396301, + "step": 1244 + }, + { + "epoch": 0.2870317002881844, + "grad_norm": 1.3685689236372411, + "learning_rate": 1.9527151611210247e-06, + "loss": 0.49441972374916077, + "step": 1245 + }, + { + "epoch": 0.2872622478386167, + "grad_norm": 1.0100756301642373, + "learning_rate": 1.9525992681516304e-06, + "loss": 0.5579795837402344, + "step": 1246 + }, + { + "epoch": 0.287492795389049, + "grad_norm": 0.9919306835197232, + "learning_rate": 1.9524832367794724e-06, + "loss": 0.6528097987174988, + "step": 1247 + }, + { + "epoch": 0.28772334293948126, + "grad_norm": 1.0917924034643365, + "learning_rate": 1.9523670670214086e-06, + "loss": 0.6705083847045898, + "step": 1248 + }, + { + "epoch": 0.28795389048991354, + "grad_norm": 1.2147583724315956, + "learning_rate": 1.952250758894317e-06, + "loss": 0.5354126691818237, + "step": 1249 + }, + { + "epoch": 0.2881844380403458, + "grad_norm": 0.8572145708756924, + "learning_rate": 1.9521343124150964e-06, + "loss": 0.5452643036842346, + "step": 1250 + }, + { + "epoch": 0.2884149855907781, + "grad_norm": 1.0087690058045542, + "learning_rate": 1.952017727600665e-06, + "loss": 0.49629518389701843, + "step": 1251 + }, + { + "epoch": 0.2886455331412104, + "grad_norm": 1.840977052870024, + "learning_rate": 1.9519010044679613e-06, + "loss": 0.5664533376693726, + "step": 1252 + }, + { + "epoch": 0.28887608069164267, + "grad_norm": 0.9663129128076696, + "learning_rate": 1.9517841430339443e-06, + "loss": 0.6245483160018921, + "step": 1253 + }, + { + "epoch": 0.28910662824207495, + "grad_norm": 1.0476534361120005, + "learning_rate": 1.9516671433155924e-06, + "loss": 0.5137460827827454, + "step": 1254 + }, + { + "epoch": 0.28933717579250723, + "grad_norm": 1.2755773097313365, + "learning_rate": 1.9515500053299044e-06, + "loss": 0.608911395072937, + "step": 1255 + }, + { + "epoch": 0.28956772334293945, + "grad_norm": 0.9793924921709027, + "learning_rate": 1.9514327290939e-06, + "loss": 0.6000815629959106, + "step": 1256 + }, + { + "epoch": 0.28979827089337173, + "grad_norm": 1.0139494480660591, + "learning_rate": 1.951315314624617e-06, + "loss": 0.4862588047981262, + "step": 1257 + }, + { + "epoch": 0.290028818443804, + "grad_norm": 1.2298057267979068, + "learning_rate": 1.9511977619391155e-06, + "loss": 0.6026263236999512, + "step": 1258 + }, + { + "epoch": 0.2902593659942363, + "grad_norm": 0.9908207251811982, + "learning_rate": 1.951080071054474e-06, + "loss": 0.5419458150863647, + "step": 1259 + }, + { + "epoch": 0.2904899135446686, + "grad_norm": 0.9670582939367568, + "learning_rate": 1.9509622419877926e-06, + "loss": 0.5565283298492432, + "step": 1260 + }, + { + "epoch": 0.29072046109510086, + "grad_norm": 0.8620916182528232, + "learning_rate": 1.9508442747561894e-06, + "loss": 0.4857860803604126, + "step": 1261 + }, + { + "epoch": 0.29095100864553314, + "grad_norm": 1.0547944595229073, + "learning_rate": 1.950726169376805e-06, + "loss": 0.5074048638343811, + "step": 1262 + }, + { + "epoch": 0.2911815561959654, + "grad_norm": 0.9913288407112848, + "learning_rate": 1.9506079258667983e-06, + "loss": 0.5540251731872559, + "step": 1263 + }, + { + "epoch": 0.2914121037463977, + "grad_norm": 1.026810903562498, + "learning_rate": 1.9504895442433487e-06, + "loss": 0.6073076725006104, + "step": 1264 + }, + { + "epoch": 0.29164265129683, + "grad_norm": 0.9981046042175337, + "learning_rate": 1.9503710245236564e-06, + "loss": 0.5984017848968506, + "step": 1265 + }, + { + "epoch": 0.29187319884726226, + "grad_norm": 0.9710138044607731, + "learning_rate": 1.9502523667249403e-06, + "loss": 0.5397658348083496, + "step": 1266 + }, + { + "epoch": 0.29210374639769454, + "grad_norm": 1.0270946207698817, + "learning_rate": 1.950133570864441e-06, + "loss": 0.6069176197052002, + "step": 1267 + }, + { + "epoch": 0.2923342939481268, + "grad_norm": 0.9979443392497551, + "learning_rate": 1.950014636959418e-06, + "loss": 0.564436137676239, + "step": 1268 + }, + { + "epoch": 0.2925648414985591, + "grad_norm": 1.0766140252728487, + "learning_rate": 1.949895565027151e-06, + "loss": 0.5707285404205322, + "step": 1269 + }, + { + "epoch": 0.29279538904899133, + "grad_norm": 1.0411047503839765, + "learning_rate": 1.9497763550849395e-06, + "loss": 0.6490185260772705, + "step": 1270 + }, + { + "epoch": 0.2930259365994236, + "grad_norm": 0.7845119542797886, + "learning_rate": 1.949657007150104e-06, + "loss": 0.45218831300735474, + "step": 1271 + }, + { + "epoch": 0.2932564841498559, + "grad_norm": 1.1831171935673066, + "learning_rate": 1.949537521239985e-06, + "loss": 0.6371254324913025, + "step": 1272 + }, + { + "epoch": 0.29348703170028817, + "grad_norm": 1.085157200958947, + "learning_rate": 1.949417897371942e-06, + "loss": 0.5877312421798706, + "step": 1273 + }, + { + "epoch": 0.29371757925072045, + "grad_norm": 0.9929167587229449, + "learning_rate": 1.9492981355633542e-06, + "loss": 0.449150413274765, + "step": 1274 + }, + { + "epoch": 0.29394812680115273, + "grad_norm": 0.9869589296917282, + "learning_rate": 1.949178235831624e-06, + "loss": 0.5272694826126099, + "step": 1275 + }, + { + "epoch": 0.294178674351585, + "grad_norm": 1.1193537456970053, + "learning_rate": 1.949058198194169e-06, + "loss": 0.5642216801643372, + "step": 1276 + }, + { + "epoch": 0.2944092219020173, + "grad_norm": 1.1033603738976057, + "learning_rate": 1.948938022668431e-06, + "loss": 0.5657975673675537, + "step": 1277 + }, + { + "epoch": 0.2946397694524496, + "grad_norm": 0.9277124761937087, + "learning_rate": 1.9488177092718705e-06, + "loss": 0.4832008183002472, + "step": 1278 + }, + { + "epoch": 0.29487031700288185, + "grad_norm": 0.9360781230227813, + "learning_rate": 1.9486972580219666e-06, + "loss": 0.5040748119354248, + "step": 1279 + }, + { + "epoch": 0.29510086455331414, + "grad_norm": 1.060263936557758, + "learning_rate": 1.9485766689362204e-06, + "loss": 0.5735876560211182, + "step": 1280 + }, + { + "epoch": 0.2953314121037464, + "grad_norm": 0.920068097844014, + "learning_rate": 1.9484559420321522e-06, + "loss": 0.5178484916687012, + "step": 1281 + }, + { + "epoch": 0.2955619596541787, + "grad_norm": 1.1062841405611552, + "learning_rate": 1.948335077327302e-06, + "loss": 0.587762713432312, + "step": 1282 + }, + { + "epoch": 0.295792507204611, + "grad_norm": 0.9847238724786035, + "learning_rate": 1.9482140748392304e-06, + "loss": 0.525052547454834, + "step": 1283 + }, + { + "epoch": 0.2960230547550432, + "grad_norm": 1.3177666588340105, + "learning_rate": 1.948092934585518e-06, + "loss": 0.5834689736366272, + "step": 1284 + }, + { + "epoch": 0.2962536023054755, + "grad_norm": 0.8901759039642109, + "learning_rate": 1.947971656583765e-06, + "loss": 0.5162187814712524, + "step": 1285 + }, + { + "epoch": 0.29648414985590776, + "grad_norm": 0.9676490343075778, + "learning_rate": 1.947850240851591e-06, + "loss": 0.5092250108718872, + "step": 1286 + }, + { + "epoch": 0.29671469740634004, + "grad_norm": 0.9800964573508747, + "learning_rate": 1.9477286874066385e-06, + "loss": 0.46872952580451965, + "step": 1287 + }, + { + "epoch": 0.2969452449567723, + "grad_norm": 0.9775546666041902, + "learning_rate": 1.947606996266566e-06, + "loss": 0.5852276682853699, + "step": 1288 + }, + { + "epoch": 0.2971757925072046, + "grad_norm": 0.9644782015943425, + "learning_rate": 1.947485167449055e-06, + "loss": 0.5660973787307739, + "step": 1289 + }, + { + "epoch": 0.2974063400576369, + "grad_norm": 0.8399040326569367, + "learning_rate": 1.9473632009718057e-06, + "loss": 0.4820208251476288, + "step": 1290 + }, + { + "epoch": 0.29763688760806917, + "grad_norm": 1.075602040364581, + "learning_rate": 1.9472410968525384e-06, + "loss": 0.5744599103927612, + "step": 1291 + }, + { + "epoch": 0.29786743515850145, + "grad_norm": 1.3419660280345462, + "learning_rate": 1.947118855108994e-06, + "loss": 0.6871058940887451, + "step": 1292 + }, + { + "epoch": 0.29809798270893373, + "grad_norm": 1.1137065086424975, + "learning_rate": 1.946996475758932e-06, + "loss": 0.4869844913482666, + "step": 1293 + }, + { + "epoch": 0.298328530259366, + "grad_norm": 1.029480216558641, + "learning_rate": 1.946873958820134e-06, + "loss": 0.4967701733112335, + "step": 1294 + }, + { + "epoch": 0.2985590778097983, + "grad_norm": 1.0144576128566964, + "learning_rate": 1.9467513043104e-06, + "loss": 0.5952246189117432, + "step": 1295 + }, + { + "epoch": 0.29878962536023057, + "grad_norm": 1.064633828381666, + "learning_rate": 1.94662851224755e-06, + "loss": 0.4900238513946533, + "step": 1296 + }, + { + "epoch": 0.29902017291066285, + "grad_norm": 0.9918014262454831, + "learning_rate": 1.946505582649425e-06, + "loss": 0.5941853523254395, + "step": 1297 + }, + { + "epoch": 0.2992507204610951, + "grad_norm": 1.1235707771533257, + "learning_rate": 1.9463825155338848e-06, + "loss": 0.6051995754241943, + "step": 1298 + }, + { + "epoch": 0.29948126801152736, + "grad_norm": 1.072282162357322, + "learning_rate": 1.94625931091881e-06, + "loss": 0.6283519268035889, + "step": 1299 + }, + { + "epoch": 0.29971181556195964, + "grad_norm": 1.0103002736371975, + "learning_rate": 1.9461359688221017e-06, + "loss": 0.6248390674591064, + "step": 1300 + }, + { + "epoch": 0.2999423631123919, + "grad_norm": 1.1117448751600545, + "learning_rate": 1.9460124892616794e-06, + "loss": 0.5948354005813599, + "step": 1301 + }, + { + "epoch": 0.3001729106628242, + "grad_norm": 1.0728652421682976, + "learning_rate": 1.9458888722554835e-06, + "loss": 0.5658224821090698, + "step": 1302 + }, + { + "epoch": 0.3004034582132565, + "grad_norm": 1.4319897671453239, + "learning_rate": 1.9457651178214742e-06, + "loss": 0.570247232913971, + "step": 1303 + }, + { + "epoch": 0.30063400576368876, + "grad_norm": 1.0399032267284913, + "learning_rate": 1.945641225977632e-06, + "loss": 0.5247939825057983, + "step": 1304 + }, + { + "epoch": 0.30086455331412104, + "grad_norm": 1.1738678571390115, + "learning_rate": 1.9455171967419568e-06, + "loss": 0.6583060622215271, + "step": 1305 + }, + { + "epoch": 0.3010951008645533, + "grad_norm": 1.2083121778111987, + "learning_rate": 1.945393030132469e-06, + "loss": 0.506061851978302, + "step": 1306 + }, + { + "epoch": 0.3013256484149856, + "grad_norm": 1.0324264811301358, + "learning_rate": 1.9452687261672086e-06, + "loss": 0.5356897115707397, + "step": 1307 + }, + { + "epoch": 0.3015561959654179, + "grad_norm": 1.2305285140166144, + "learning_rate": 1.945144284864236e-06, + "loss": 0.6293138265609741, + "step": 1308 + }, + { + "epoch": 0.30178674351585016, + "grad_norm": 1.153317719192606, + "learning_rate": 1.9450197062416307e-06, + "loss": 0.5654667019844055, + "step": 1309 + }, + { + "epoch": 0.30201729106628245, + "grad_norm": 1.0118363366308951, + "learning_rate": 1.944894990317493e-06, + "loss": 0.5266513228416443, + "step": 1310 + }, + { + "epoch": 0.30224783861671467, + "grad_norm": 1.0768469268187721, + "learning_rate": 1.944770137109943e-06, + "loss": 0.5196292400360107, + "step": 1311 + }, + { + "epoch": 0.30247838616714695, + "grad_norm": 0.9943801399014878, + "learning_rate": 1.94464514663712e-06, + "loss": 0.6095438003540039, + "step": 1312 + }, + { + "epoch": 0.30270893371757923, + "grad_norm": 0.9779112540640975, + "learning_rate": 1.9445200189171844e-06, + "loss": 0.5676658153533936, + "step": 1313 + }, + { + "epoch": 0.3029394812680115, + "grad_norm": 1.148105030397317, + "learning_rate": 1.9443947539683152e-06, + "loss": 0.6375502347946167, + "step": 1314 + }, + { + "epoch": 0.3031700288184438, + "grad_norm": 1.367494353612328, + "learning_rate": 1.9442693518087132e-06, + "loss": 0.550786018371582, + "step": 1315 + }, + { + "epoch": 0.3034005763688761, + "grad_norm": 1.1457742620403593, + "learning_rate": 1.944143812456597e-06, + "loss": 0.5615516901016235, + "step": 1316 + }, + { + "epoch": 0.30363112391930835, + "grad_norm": 1.1750754942303703, + "learning_rate": 1.9440181359302067e-06, + "loss": 0.566293478012085, + "step": 1317 + }, + { + "epoch": 0.30386167146974064, + "grad_norm": 1.1631676838517875, + "learning_rate": 1.943892322247802e-06, + "loss": 0.6293504238128662, + "step": 1318 + }, + { + "epoch": 0.3040922190201729, + "grad_norm": 1.3799066955799153, + "learning_rate": 1.9437663714276614e-06, + "loss": 0.5615923404693604, + "step": 1319 + }, + { + "epoch": 0.3043227665706052, + "grad_norm": 1.0656917099801624, + "learning_rate": 1.9436402834880854e-06, + "loss": 0.630609929561615, + "step": 1320 + }, + { + "epoch": 0.3045533141210375, + "grad_norm": 1.178750749580376, + "learning_rate": 1.9435140584473923e-06, + "loss": 0.6257727742195129, + "step": 1321 + }, + { + "epoch": 0.30478386167146976, + "grad_norm": 1.0402167057810885, + "learning_rate": 1.943387696323922e-06, + "loss": 0.5694669485092163, + "step": 1322 + }, + { + "epoch": 0.30501440922190204, + "grad_norm": 0.9945053454829159, + "learning_rate": 1.943261197136033e-06, + "loss": 0.606473445892334, + "step": 1323 + }, + { + "epoch": 0.3052449567723343, + "grad_norm": 1.1845589501222127, + "learning_rate": 1.943134560902105e-06, + "loss": 0.585598349571228, + "step": 1324 + }, + { + "epoch": 0.30547550432276654, + "grad_norm": 1.0688662630155763, + "learning_rate": 1.943007787640536e-06, + "loss": 0.5569879412651062, + "step": 1325 + }, + { + "epoch": 0.3057060518731988, + "grad_norm": 1.0126453524950452, + "learning_rate": 1.942880877369746e-06, + "loss": 0.5914568305015564, + "step": 1326 + }, + { + "epoch": 0.3059365994236311, + "grad_norm": 1.0252549201933292, + "learning_rate": 1.9427538301081723e-06, + "loss": 0.47556912899017334, + "step": 1327 + }, + { + "epoch": 0.3061671469740634, + "grad_norm": 1.0841679845773295, + "learning_rate": 1.942626645874275e-06, + "loss": 0.5298174619674683, + "step": 1328 + }, + { + "epoch": 0.30639769452449567, + "grad_norm": 1.0392947019090986, + "learning_rate": 1.942499324686532e-06, + "loss": 0.583850622177124, + "step": 1329 + }, + { + "epoch": 0.30662824207492795, + "grad_norm": 1.0139999403008324, + "learning_rate": 1.9423718665634413e-06, + "loss": 0.4839683771133423, + "step": 1330 + }, + { + "epoch": 0.30685878962536023, + "grad_norm": 1.0298436614312085, + "learning_rate": 1.9422442715235223e-06, + "loss": 0.5600621104240417, + "step": 1331 + }, + { + "epoch": 0.3070893371757925, + "grad_norm": 1.0626416339831364, + "learning_rate": 1.942116539585312e-06, + "loss": 0.5607948303222656, + "step": 1332 + }, + { + "epoch": 0.3073198847262248, + "grad_norm": 1.0244776150888006, + "learning_rate": 1.9419886707673695e-06, + "loss": 0.6241478323936462, + "step": 1333 + }, + { + "epoch": 0.30755043227665707, + "grad_norm": 1.059435056912846, + "learning_rate": 1.941860665088272e-06, + "loss": 0.5256654620170593, + "step": 1334 + }, + { + "epoch": 0.30778097982708935, + "grad_norm": 0.9814039676018265, + "learning_rate": 1.9417325225666185e-06, + "loss": 0.5031943321228027, + "step": 1335 + }, + { + "epoch": 0.30801152737752163, + "grad_norm": 1.0381509501910948, + "learning_rate": 1.9416042432210256e-06, + "loss": 0.5167732238769531, + "step": 1336 + }, + { + "epoch": 0.3082420749279539, + "grad_norm": 1.3652241862581105, + "learning_rate": 1.941475827070132e-06, + "loss": 0.5503576397895813, + "step": 1337 + }, + { + "epoch": 0.3084726224783862, + "grad_norm": 1.0886579362905786, + "learning_rate": 1.9413472741325947e-06, + "loss": 0.4676959812641144, + "step": 1338 + }, + { + "epoch": 0.3087031700288184, + "grad_norm": 1.1278655674224347, + "learning_rate": 1.9412185844270903e-06, + "loss": 0.5845292806625366, + "step": 1339 + }, + { + "epoch": 0.3089337175792507, + "grad_norm": 1.170562835357779, + "learning_rate": 1.9410897579723175e-06, + "loss": 0.5555682182312012, + "step": 1340 + }, + { + "epoch": 0.309164265129683, + "grad_norm": 1.0107459327742527, + "learning_rate": 1.940960794786993e-06, + "loss": 0.6104729175567627, + "step": 1341 + }, + { + "epoch": 0.30939481268011526, + "grad_norm": 0.9471421356356189, + "learning_rate": 1.9408316948898535e-06, + "loss": 0.641234278678894, + "step": 1342 + }, + { + "epoch": 0.30962536023054754, + "grad_norm": 1.0545748452641606, + "learning_rate": 1.940702458299656e-06, + "loss": 0.56852126121521, + "step": 1343 + }, + { + "epoch": 0.3098559077809798, + "grad_norm": 1.0184665425128694, + "learning_rate": 1.9405730850351766e-06, + "loss": 0.4960979223251343, + "step": 1344 + }, + { + "epoch": 0.3100864553314121, + "grad_norm": 1.0984570627083017, + "learning_rate": 1.9404435751152133e-06, + "loss": 0.5483115315437317, + "step": 1345 + }, + { + "epoch": 0.3103170028818444, + "grad_norm": 1.258420824095703, + "learning_rate": 1.9403139285585814e-06, + "loss": 0.5561012029647827, + "step": 1346 + }, + { + "epoch": 0.31054755043227666, + "grad_norm": 1.121498999268742, + "learning_rate": 1.940184145384118e-06, + "loss": 0.549866795539856, + "step": 1347 + }, + { + "epoch": 0.31077809798270895, + "grad_norm": 1.020975611109825, + "learning_rate": 1.9400542256106783e-06, + "loss": 0.547815203666687, + "step": 1348 + }, + { + "epoch": 0.3110086455331412, + "grad_norm": 1.021689053716725, + "learning_rate": 1.939924169257139e-06, + "loss": 0.5230641961097717, + "step": 1349 + }, + { + "epoch": 0.3112391930835735, + "grad_norm": 0.9340659983787873, + "learning_rate": 1.939793976342396e-06, + "loss": 0.5421465635299683, + "step": 1350 + }, + { + "epoch": 0.3114697406340058, + "grad_norm": 1.1783810620622794, + "learning_rate": 1.939663646885364e-06, + "loss": 0.5560643672943115, + "step": 1351 + }, + { + "epoch": 0.31170028818443807, + "grad_norm": 1.295011871686265, + "learning_rate": 1.93953318090498e-06, + "loss": 0.5311995148658752, + "step": 1352 + }, + { + "epoch": 0.3119308357348703, + "grad_norm": 1.202368782772509, + "learning_rate": 1.9394025784201985e-06, + "loss": 0.539401650428772, + "step": 1353 + }, + { + "epoch": 0.3121613832853026, + "grad_norm": 1.2005427387305667, + "learning_rate": 1.9392718394499945e-06, + "loss": 0.6014061570167542, + "step": 1354 + }, + { + "epoch": 0.31239193083573485, + "grad_norm": 0.9270167680245519, + "learning_rate": 1.9391409640133634e-06, + "loss": 0.6065000295639038, + "step": 1355 + }, + { + "epoch": 0.31262247838616714, + "grad_norm": 1.2988921211312436, + "learning_rate": 1.9390099521293196e-06, + "loss": 0.613541305065155, + "step": 1356 + }, + { + "epoch": 0.3128530259365994, + "grad_norm": 0.8285824717727959, + "learning_rate": 1.9388788038168985e-06, + "loss": 0.47520384192466736, + "step": 1357 + }, + { + "epoch": 0.3130835734870317, + "grad_norm": 1.0343980272119644, + "learning_rate": 1.9387475190951543e-06, + "loss": 0.615745484828949, + "step": 1358 + }, + { + "epoch": 0.313314121037464, + "grad_norm": 0.9813010157914711, + "learning_rate": 1.9386160979831607e-06, + "loss": 0.5197638273239136, + "step": 1359 + }, + { + "epoch": 0.31354466858789626, + "grad_norm": 1.1121294770562518, + "learning_rate": 1.9384845405000124e-06, + "loss": 0.5387387871742249, + "step": 1360 + }, + { + "epoch": 0.31377521613832854, + "grad_norm": 0.924622519065769, + "learning_rate": 1.9383528466648232e-06, + "loss": 0.45428377389907837, + "step": 1361 + }, + { + "epoch": 0.3140057636887608, + "grad_norm": 0.948104410443073, + "learning_rate": 1.938221016496727e-06, + "loss": 0.5130504369735718, + "step": 1362 + }, + { + "epoch": 0.3142363112391931, + "grad_norm": 1.0809781469576445, + "learning_rate": 1.9380890500148773e-06, + "loss": 0.60721355676651, + "step": 1363 + }, + { + "epoch": 0.3144668587896254, + "grad_norm": 1.1461080053421355, + "learning_rate": 1.9379569472384475e-06, + "loss": 0.5277825593948364, + "step": 1364 + }, + { + "epoch": 0.31469740634005766, + "grad_norm": 1.0995766102280293, + "learning_rate": 1.9378247081866303e-06, + "loss": 0.5549559593200684, + "step": 1365 + }, + { + "epoch": 0.3149279538904899, + "grad_norm": 1.0898464003561188, + "learning_rate": 1.937692332878639e-06, + "loss": 0.5606675148010254, + "step": 1366 + }, + { + "epoch": 0.31515850144092217, + "grad_norm": 1.2514686660901941, + "learning_rate": 1.9375598213337066e-06, + "loss": 0.44079354405403137, + "step": 1367 + }, + { + "epoch": 0.31538904899135445, + "grad_norm": 1.1147898414738042, + "learning_rate": 1.9374271735710854e-06, + "loss": 0.5627914667129517, + "step": 1368 + }, + { + "epoch": 0.31561959654178673, + "grad_norm": 1.0827906414110418, + "learning_rate": 1.9372943896100475e-06, + "loss": 0.6157082319259644, + "step": 1369 + }, + { + "epoch": 0.315850144092219, + "grad_norm": 1.0107986194096998, + "learning_rate": 1.9371614694698853e-06, + "loss": 0.5075211524963379, + "step": 1370 + }, + { + "epoch": 0.3160806916426513, + "grad_norm": 1.1077211347008051, + "learning_rate": 1.937028413169911e-06, + "loss": 0.5673394799232483, + "step": 1371 + }, + { + "epoch": 0.31631123919308357, + "grad_norm": 1.2194715035727013, + "learning_rate": 1.9368952207294555e-06, + "loss": 0.636246919631958, + "step": 1372 + }, + { + "epoch": 0.31654178674351585, + "grad_norm": 0.9520240215012735, + "learning_rate": 1.9367618921678714e-06, + "loss": 0.5447783470153809, + "step": 1373 + }, + { + "epoch": 0.31677233429394813, + "grad_norm": 1.0624943189039389, + "learning_rate": 1.9366284275045285e-06, + "loss": 0.5455813407897949, + "step": 1374 + }, + { + "epoch": 0.3170028818443804, + "grad_norm": 1.351516959021647, + "learning_rate": 1.936494826758819e-06, + "loss": 0.6078809499740601, + "step": 1375 + }, + { + "epoch": 0.3172334293948127, + "grad_norm": 1.255648645931826, + "learning_rate": 1.9363610899501533e-06, + "loss": 0.576380729675293, + "step": 1376 + }, + { + "epoch": 0.317463976945245, + "grad_norm": 1.0694137963246684, + "learning_rate": 1.9362272170979625e-06, + "loss": 0.5999764204025269, + "step": 1377 + }, + { + "epoch": 0.31769452449567726, + "grad_norm": 0.9127077886533301, + "learning_rate": 1.936093208221696e-06, + "loss": 0.5560207366943359, + "step": 1378 + }, + { + "epoch": 0.31792507204610954, + "grad_norm": 1.0526952264732803, + "learning_rate": 1.935959063340824e-06, + "loss": 0.56638103723526, + "step": 1379 + }, + { + "epoch": 0.31815561959654176, + "grad_norm": 1.0802135441151437, + "learning_rate": 1.935824782474837e-06, + "loss": 0.6130156517028809, + "step": 1380 + }, + { + "epoch": 0.31838616714697404, + "grad_norm": 1.17418749132308, + "learning_rate": 1.9356903656432445e-06, + "loss": 0.5616703629493713, + "step": 1381 + }, + { + "epoch": 0.3186167146974063, + "grad_norm": 1.0343181717058683, + "learning_rate": 1.9355558128655757e-06, + "loss": 0.6343744993209839, + "step": 1382 + }, + { + "epoch": 0.3188472622478386, + "grad_norm": 1.095750213947718, + "learning_rate": 1.935421124161379e-06, + "loss": 0.5805482864379883, + "step": 1383 + }, + { + "epoch": 0.3190778097982709, + "grad_norm": 1.1026578295358351, + "learning_rate": 1.9352862995502244e-06, + "loss": 0.46264296770095825, + "step": 1384 + }, + { + "epoch": 0.31930835734870316, + "grad_norm": 1.0410251136907926, + "learning_rate": 1.9351513390517007e-06, + "loss": 0.6638646125793457, + "step": 1385 + }, + { + "epoch": 0.31953890489913545, + "grad_norm": 1.1206503286933907, + "learning_rate": 1.9350162426854148e-06, + "loss": 0.6349970698356628, + "step": 1386 + }, + { + "epoch": 0.3197694524495677, + "grad_norm": 1.072301849044211, + "learning_rate": 1.934881010470996e-06, + "loss": 0.5592948794364929, + "step": 1387 + }, + { + "epoch": 0.32, + "grad_norm": 1.0458234682938432, + "learning_rate": 1.9347456424280914e-06, + "loss": 0.5730908513069153, + "step": 1388 + }, + { + "epoch": 0.3202305475504323, + "grad_norm": 1.2900263608093252, + "learning_rate": 1.9346101385763693e-06, + "loss": 0.7006485462188721, + "step": 1389 + }, + { + "epoch": 0.32046109510086457, + "grad_norm": 1.4129067567606644, + "learning_rate": 1.934474498935516e-06, + "loss": 0.573637068271637, + "step": 1390 + }, + { + "epoch": 0.32069164265129685, + "grad_norm": 1.1318090296507703, + "learning_rate": 1.93433872352524e-06, + "loss": 0.4896121025085449, + "step": 1391 + }, + { + "epoch": 0.32092219020172913, + "grad_norm": 1.2605619597232016, + "learning_rate": 1.9342028123652665e-06, + "loss": 0.6335302591323853, + "step": 1392 + }, + { + "epoch": 0.3211527377521614, + "grad_norm": 0.9805948619235417, + "learning_rate": 1.934066765475343e-06, + "loss": 0.5807539224624634, + "step": 1393 + }, + { + "epoch": 0.32138328530259364, + "grad_norm": 1.0981000795794762, + "learning_rate": 1.9339305828752353e-06, + "loss": 0.5706362128257751, + "step": 1394 + }, + { + "epoch": 0.3216138328530259, + "grad_norm": 1.0353211161605218, + "learning_rate": 1.9337942645847293e-06, + "loss": 0.5357315540313721, + "step": 1395 + }, + { + "epoch": 0.3218443804034582, + "grad_norm": 1.100852226093223, + "learning_rate": 1.933657810623631e-06, + "loss": 0.5349493026733398, + "step": 1396 + }, + { + "epoch": 0.3220749279538905, + "grad_norm": 1.2109710983573714, + "learning_rate": 1.9335212210117657e-06, + "loss": 0.5859971642494202, + "step": 1397 + }, + { + "epoch": 0.32230547550432276, + "grad_norm": 0.9676229309878287, + "learning_rate": 1.9333844957689773e-06, + "loss": 0.49399334192276, + "step": 1398 + }, + { + "epoch": 0.32253602305475504, + "grad_norm": 1.0860725860312863, + "learning_rate": 1.9332476349151325e-06, + "loss": 0.5584002137184143, + "step": 1399 + }, + { + "epoch": 0.3227665706051873, + "grad_norm": 1.268047844459102, + "learning_rate": 1.9331106384701143e-06, + "loss": 0.6149849891662598, + "step": 1400 + }, + { + "epoch": 0.3229971181556196, + "grad_norm": 0.9344661358479289, + "learning_rate": 1.932973506453827e-06, + "loss": 0.44001221656799316, + "step": 1401 + }, + { + "epoch": 0.3232276657060519, + "grad_norm": 1.1066396272948926, + "learning_rate": 1.932836238886195e-06, + "loss": 0.44574856758117676, + "step": 1402 + }, + { + "epoch": 0.32345821325648416, + "grad_norm": 1.3401453983415261, + "learning_rate": 1.9326988357871615e-06, + "loss": 0.5376108884811401, + "step": 1403 + }, + { + "epoch": 0.32368876080691644, + "grad_norm": 0.8767130557662413, + "learning_rate": 1.93256129717669e-06, + "loss": 0.5934798717498779, + "step": 1404 + }, + { + "epoch": 0.3239193083573487, + "grad_norm": 1.303119875991707, + "learning_rate": 1.932423623074763e-06, + "loss": 0.5526829957962036, + "step": 1405 + }, + { + "epoch": 0.324149855907781, + "grad_norm": 1.1334805404406405, + "learning_rate": 1.9322858135013836e-06, + "loss": 0.6259517669677734, + "step": 1406 + }, + { + "epoch": 0.32438040345821323, + "grad_norm": 0.9954594387833945, + "learning_rate": 1.932147868476574e-06, + "loss": 0.4495973289012909, + "step": 1407 + }, + { + "epoch": 0.3246109510086455, + "grad_norm": 0.861457749123995, + "learning_rate": 1.932009788020376e-06, + "loss": 0.5155202746391296, + "step": 1408 + }, + { + "epoch": 0.3248414985590778, + "grad_norm": 0.9990951421158675, + "learning_rate": 1.9318715721528508e-06, + "loss": 0.4643939733505249, + "step": 1409 + }, + { + "epoch": 0.32507204610951007, + "grad_norm": 1.1115294412044667, + "learning_rate": 1.931733220894081e-06, + "loss": 0.47659850120544434, + "step": 1410 + }, + { + "epoch": 0.32530259365994235, + "grad_norm": 1.192025751825093, + "learning_rate": 1.931594734264166e-06, + "loss": 0.618567705154419, + "step": 1411 + }, + { + "epoch": 0.32553314121037463, + "grad_norm": 1.3463014236604622, + "learning_rate": 1.931456112283228e-06, + "loss": 0.6162246465682983, + "step": 1412 + }, + { + "epoch": 0.3257636887608069, + "grad_norm": 1.137080487588782, + "learning_rate": 1.9313173549714063e-06, + "loss": 0.5859405398368835, + "step": 1413 + }, + { + "epoch": 0.3259942363112392, + "grad_norm": 1.0299388107109648, + "learning_rate": 1.9311784623488614e-06, + "loss": 0.45844388008117676, + "step": 1414 + }, + { + "epoch": 0.3262247838616715, + "grad_norm": 1.1761474671222747, + "learning_rate": 1.9310394344357725e-06, + "loss": 0.5407997965812683, + "step": 1415 + }, + { + "epoch": 0.32645533141210376, + "grad_norm": 1.1981225744663582, + "learning_rate": 1.9309002712523394e-06, + "loss": 0.5546882152557373, + "step": 1416 + }, + { + "epoch": 0.32668587896253604, + "grad_norm": 0.8759460054604783, + "learning_rate": 1.9307609728187807e-06, + "loss": 0.4846392869949341, + "step": 1417 + }, + { + "epoch": 0.3269164265129683, + "grad_norm": 1.000243508175514, + "learning_rate": 1.9306215391553353e-06, + "loss": 0.5062232613563538, + "step": 1418 + }, + { + "epoch": 0.3271469740634006, + "grad_norm": 1.116854057805208, + "learning_rate": 1.9304819702822615e-06, + "loss": 0.5836912393569946, + "step": 1419 + }, + { + "epoch": 0.3273775216138329, + "grad_norm": 1.094195818303245, + "learning_rate": 1.9303422662198366e-06, + "loss": 0.5329402089118958, + "step": 1420 + }, + { + "epoch": 0.3276080691642651, + "grad_norm": 1.1607957633552695, + "learning_rate": 1.930202426988359e-06, + "loss": 0.6544215679168701, + "step": 1421 + }, + { + "epoch": 0.3278386167146974, + "grad_norm": 1.100571228439363, + "learning_rate": 1.930062452608145e-06, + "loss": 0.47018110752105713, + "step": 1422 + }, + { + "epoch": 0.32806916426512966, + "grad_norm": 1.0524336036947848, + "learning_rate": 1.929922343099532e-06, + "loss": 0.5112531185150146, + "step": 1423 + }, + { + "epoch": 0.32829971181556195, + "grad_norm": 1.2000967886351006, + "learning_rate": 1.9297820984828768e-06, + "loss": 0.6076794266700745, + "step": 1424 + }, + { + "epoch": 0.3285302593659942, + "grad_norm": 1.2298516207472598, + "learning_rate": 1.9296417187785546e-06, + "loss": 0.6222262382507324, + "step": 1425 + }, + { + "epoch": 0.3287608069164265, + "grad_norm": 1.0861185792017791, + "learning_rate": 1.929501204006962e-06, + "loss": 0.5821695327758789, + "step": 1426 + }, + { + "epoch": 0.3289913544668588, + "grad_norm": 0.9550005504042167, + "learning_rate": 1.929360554188513e-06, + "loss": 0.4985813498497009, + "step": 1427 + }, + { + "epoch": 0.32922190201729107, + "grad_norm": 1.0175745295306269, + "learning_rate": 1.929219769343644e-06, + "loss": 0.5906369686126709, + "step": 1428 + }, + { + "epoch": 0.32945244956772335, + "grad_norm": 0.9717883625472988, + "learning_rate": 1.929078849492809e-06, + "loss": 0.562096357345581, + "step": 1429 + }, + { + "epoch": 0.32968299711815563, + "grad_norm": 1.1465278498569305, + "learning_rate": 1.9289377946564822e-06, + "loss": 0.5363502502441406, + "step": 1430 + }, + { + "epoch": 0.3299135446685879, + "grad_norm": 1.2820536110562617, + "learning_rate": 1.9287966048551573e-06, + "loss": 0.5975755453109741, + "step": 1431 + }, + { + "epoch": 0.3301440922190202, + "grad_norm": 1.0778576523614563, + "learning_rate": 1.9286552801093476e-06, + "loss": 0.5664670467376709, + "step": 1432 + }, + { + "epoch": 0.33037463976945247, + "grad_norm": 1.0263666085507623, + "learning_rate": 1.9285138204395864e-06, + "loss": 0.5981261730194092, + "step": 1433 + }, + { + "epoch": 0.33060518731988475, + "grad_norm": 0.9529552937728545, + "learning_rate": 1.928372225866426e-06, + "loss": 0.5640159845352173, + "step": 1434 + }, + { + "epoch": 0.330835734870317, + "grad_norm": 0.9422839165194713, + "learning_rate": 1.9282304964104397e-06, + "loss": 0.5626721382141113, + "step": 1435 + }, + { + "epoch": 0.33106628242074926, + "grad_norm": 1.1190074685130234, + "learning_rate": 1.928088632092218e-06, + "loss": 0.5522770881652832, + "step": 1436 + }, + { + "epoch": 0.33129682997118154, + "grad_norm": 0.9690434170385617, + "learning_rate": 1.9279466329323727e-06, + "loss": 0.4949793517589569, + "step": 1437 + }, + { + "epoch": 0.3315273775216138, + "grad_norm": 1.1199903848507864, + "learning_rate": 1.927804498951535e-06, + "loss": 0.5845533013343811, + "step": 1438 + }, + { + "epoch": 0.3317579250720461, + "grad_norm": 1.0405222768252325, + "learning_rate": 1.927662230170355e-06, + "loss": 0.49574515223503113, + "step": 1439 + }, + { + "epoch": 0.3319884726224784, + "grad_norm": 0.9203608823490977, + "learning_rate": 1.927519826609503e-06, + "loss": 0.5285886526107788, + "step": 1440 + }, + { + "epoch": 0.33221902017291066, + "grad_norm": 1.1230062738043034, + "learning_rate": 1.9273772882896698e-06, + "loss": 0.474979043006897, + "step": 1441 + }, + { + "epoch": 0.33244956772334294, + "grad_norm": 1.3266216308471324, + "learning_rate": 1.927234615231564e-06, + "loss": 0.48791128396987915, + "step": 1442 + }, + { + "epoch": 0.3326801152737752, + "grad_norm": 1.0106410509782415, + "learning_rate": 1.9270918074559135e-06, + "loss": 0.4927103519439697, + "step": 1443 + }, + { + "epoch": 0.3329106628242075, + "grad_norm": 1.0834822645958162, + "learning_rate": 1.9269488649834683e-06, + "loss": 0.5418181419372559, + "step": 1444 + }, + { + "epoch": 0.3331412103746398, + "grad_norm": 1.2573247433245271, + "learning_rate": 1.926805787834996e-06, + "loss": 0.4865915775299072, + "step": 1445 + }, + { + "epoch": 0.33337175792507207, + "grad_norm": 0.99396933954953, + "learning_rate": 1.9266625760312838e-06, + "loss": 0.5750860571861267, + "step": 1446 + }, + { + "epoch": 0.33360230547550435, + "grad_norm": 1.078541464684242, + "learning_rate": 1.9265192295931394e-06, + "loss": 0.550861120223999, + "step": 1447 + }, + { + "epoch": 0.3338328530259366, + "grad_norm": 1.007683687550808, + "learning_rate": 1.926375748541389e-06, + "loss": 0.5320810675621033, + "step": 1448 + }, + { + "epoch": 0.33406340057636885, + "grad_norm": 1.017101643881844, + "learning_rate": 1.9262321328968795e-06, + "loss": 0.6060050129890442, + "step": 1449 + }, + { + "epoch": 0.33429394812680113, + "grad_norm": 1.0363868425077463, + "learning_rate": 1.9260883826804767e-06, + "loss": 0.6425552368164062, + "step": 1450 + }, + { + "epoch": 0.3345244956772334, + "grad_norm": 0.9764138596613898, + "learning_rate": 1.925944497913065e-06, + "loss": 0.49570873379707336, + "step": 1451 + }, + { + "epoch": 0.3347550432276657, + "grad_norm": 1.2208122621725788, + "learning_rate": 1.9258004786155512e-06, + "loss": 0.5989271402359009, + "step": 1452 + }, + { + "epoch": 0.334985590778098, + "grad_norm": 0.9503162789922354, + "learning_rate": 1.925656324808858e-06, + "loss": 0.5301828384399414, + "step": 1453 + }, + { + "epoch": 0.33521613832853026, + "grad_norm": 0.8316884167256052, + "learning_rate": 1.925512036513931e-06, + "loss": 0.5198702216148376, + "step": 1454 + }, + { + "epoch": 0.33544668587896254, + "grad_norm": 0.9455782785140152, + "learning_rate": 1.925367613751732e-06, + "loss": 0.4890085458755493, + "step": 1455 + }, + { + "epoch": 0.3356772334293948, + "grad_norm": 1.0869496416180913, + "learning_rate": 1.925223056543246e-06, + "loss": 0.613020658493042, + "step": 1456 + }, + { + "epoch": 0.3359077809798271, + "grad_norm": 1.3127727305608659, + "learning_rate": 1.925078364909474e-06, + "loss": 0.6544125080108643, + "step": 1457 + }, + { + "epoch": 0.3361383285302594, + "grad_norm": 1.1781068643375063, + "learning_rate": 1.9249335388714397e-06, + "loss": 0.48095571994781494, + "step": 1458 + }, + { + "epoch": 0.33636887608069166, + "grad_norm": 1.0490387987699479, + "learning_rate": 1.9247885784501837e-06, + "loss": 0.43932127952575684, + "step": 1459 + }, + { + "epoch": 0.33659942363112394, + "grad_norm": 1.0790643638521016, + "learning_rate": 1.9246434836667674e-06, + "loss": 0.4519491195678711, + "step": 1460 + }, + { + "epoch": 0.3368299711815562, + "grad_norm": 1.2051025882543964, + "learning_rate": 1.9244982545422724e-06, + "loss": 0.5621665120124817, + "step": 1461 + }, + { + "epoch": 0.33706051873198845, + "grad_norm": 1.3398506945777509, + "learning_rate": 1.924352891097798e-06, + "loss": 0.4763834476470947, + "step": 1462 + }, + { + "epoch": 0.3372910662824207, + "grad_norm": 1.242985575101598, + "learning_rate": 1.9242073933544644e-06, + "loss": 0.6005351543426514, + "step": 1463 + }, + { + "epoch": 0.337521613832853, + "grad_norm": 1.110700986093752, + "learning_rate": 1.9240617613334112e-06, + "loss": 0.5566102862358093, + "step": 1464 + }, + { + "epoch": 0.3377521613832853, + "grad_norm": 1.0242858373073467, + "learning_rate": 1.9239159950557966e-06, + "loss": 0.473061203956604, + "step": 1465 + }, + { + "epoch": 0.33798270893371757, + "grad_norm": 1.1506065660394986, + "learning_rate": 1.9237700945427993e-06, + "loss": 0.5492761135101318, + "step": 1466 + }, + { + "epoch": 0.33821325648414985, + "grad_norm": 1.2881267649729666, + "learning_rate": 1.923624059815617e-06, + "loss": 0.5164840221405029, + "step": 1467 + }, + { + "epoch": 0.33844380403458213, + "grad_norm": 1.170718183576445, + "learning_rate": 1.923477890895467e-06, + "loss": 0.5348359942436218, + "step": 1468 + }, + { + "epoch": 0.3386743515850144, + "grad_norm": 1.255138301940698, + "learning_rate": 1.9233315878035863e-06, + "loss": 0.6063251495361328, + "step": 1469 + }, + { + "epoch": 0.3389048991354467, + "grad_norm": 1.1740353245373907, + "learning_rate": 1.9231851505612315e-06, + "loss": 0.5293298363685608, + "step": 1470 + }, + { + "epoch": 0.33913544668587897, + "grad_norm": 1.2192155228489063, + "learning_rate": 1.9230385791896773e-06, + "loss": 0.566419243812561, + "step": 1471 + }, + { + "epoch": 0.33936599423631125, + "grad_norm": 1.215891621061933, + "learning_rate": 1.92289187371022e-06, + "loss": 0.5496705770492554, + "step": 1472 + }, + { + "epoch": 0.33959654178674353, + "grad_norm": 1.101213232352079, + "learning_rate": 1.9227450341441742e-06, + "loss": 0.4794740080833435, + "step": 1473 + }, + { + "epoch": 0.3398270893371758, + "grad_norm": 1.2201116310406535, + "learning_rate": 1.9225980605128744e-06, + "loss": 0.5791349411010742, + "step": 1474 + }, + { + "epoch": 0.3400576368876081, + "grad_norm": 1.4571369283257354, + "learning_rate": 1.9224509528376734e-06, + "loss": 0.479930579662323, + "step": 1475 + }, + { + "epoch": 0.3402881844380403, + "grad_norm": 1.0415599481638325, + "learning_rate": 1.9223037111399453e-06, + "loss": 0.4530009627342224, + "step": 1476 + }, + { + "epoch": 0.3405187319884726, + "grad_norm": 1.204701640132816, + "learning_rate": 1.9221563354410828e-06, + "loss": 0.5403045415878296, + "step": 1477 + }, + { + "epoch": 0.3407492795389049, + "grad_norm": 0.8630253900146837, + "learning_rate": 1.9220088257624975e-06, + "loss": 0.493504136800766, + "step": 1478 + }, + { + "epoch": 0.34097982708933716, + "grad_norm": 1.4310766024245367, + "learning_rate": 1.921861182125621e-06, + "loss": 0.5821102857589722, + "step": 1479 + }, + { + "epoch": 0.34121037463976944, + "grad_norm": 1.082253088614022, + "learning_rate": 1.9217134045519047e-06, + "loss": 0.5295383334159851, + "step": 1480 + }, + { + "epoch": 0.3414409221902017, + "grad_norm": 1.1829405649995548, + "learning_rate": 1.9215654930628194e-06, + "loss": 0.6152981519699097, + "step": 1481 + }, + { + "epoch": 0.341671469740634, + "grad_norm": 1.0966776837147827, + "learning_rate": 1.9214174476798547e-06, + "loss": 0.5630486011505127, + "step": 1482 + }, + { + "epoch": 0.3419020172910663, + "grad_norm": 1.1063696979108066, + "learning_rate": 1.9212692684245203e-06, + "loss": 0.5250644087791443, + "step": 1483 + }, + { + "epoch": 0.34213256484149857, + "grad_norm": 1.1921011792402902, + "learning_rate": 1.921120955318345e-06, + "loss": 0.5262070894241333, + "step": 1484 + }, + { + "epoch": 0.34236311239193085, + "grad_norm": 1.0557911041666306, + "learning_rate": 1.920972508382877e-06, + "loss": 0.4584987163543701, + "step": 1485 + }, + { + "epoch": 0.3425936599423631, + "grad_norm": 0.8987901766147761, + "learning_rate": 1.920823927639684e-06, + "loss": 0.5689778923988342, + "step": 1486 + }, + { + "epoch": 0.3428242074927954, + "grad_norm": 1.5261490950420458, + "learning_rate": 1.920675213110354e-06, + "loss": 0.5871669054031372, + "step": 1487 + }, + { + "epoch": 0.3430547550432277, + "grad_norm": 1.1867087256934146, + "learning_rate": 1.9205263648164927e-06, + "loss": 0.5816771984100342, + "step": 1488 + }, + { + "epoch": 0.34328530259365997, + "grad_norm": 1.1506734961006173, + "learning_rate": 1.9203773827797266e-06, + "loss": 0.5304274559020996, + "step": 1489 + }, + { + "epoch": 0.3435158501440922, + "grad_norm": 1.1468129901849249, + "learning_rate": 1.9202282670217014e-06, + "loss": 0.5348042249679565, + "step": 1490 + }, + { + "epoch": 0.3437463976945245, + "grad_norm": 1.159885461181134, + "learning_rate": 1.920079017564082e-06, + "loss": 0.5374947786331177, + "step": 1491 + }, + { + "epoch": 0.34397694524495676, + "grad_norm": 1.2907287290922715, + "learning_rate": 1.9199296344285527e-06, + "loss": 0.6263279914855957, + "step": 1492 + }, + { + "epoch": 0.34420749279538904, + "grad_norm": 1.0523687721115222, + "learning_rate": 1.919780117636817e-06, + "loss": 0.5039552450180054, + "step": 1493 + }, + { + "epoch": 0.3444380403458213, + "grad_norm": 1.0964626478821635, + "learning_rate": 1.9196304672105994e-06, + "loss": 0.4592825174331665, + "step": 1494 + }, + { + "epoch": 0.3446685878962536, + "grad_norm": 1.1561933172055967, + "learning_rate": 1.919480683171641e-06, + "loss": 0.5041275024414062, + "step": 1495 + }, + { + "epoch": 0.3448991354466859, + "grad_norm": 1.1372473895034165, + "learning_rate": 1.9193307655417043e-06, + "loss": 0.573014497756958, + "step": 1496 + }, + { + "epoch": 0.34512968299711816, + "grad_norm": 1.2502610467615811, + "learning_rate": 1.9191807143425714e-06, + "loss": 0.5881764888763428, + "step": 1497 + }, + { + "epoch": 0.34536023054755044, + "grad_norm": 1.1627055981370906, + "learning_rate": 1.9190305295960425e-06, + "loss": 0.523137092590332, + "step": 1498 + }, + { + "epoch": 0.3455907780979827, + "grad_norm": 1.3731261688393495, + "learning_rate": 1.9188802113239383e-06, + "loss": 0.5579402446746826, + "step": 1499 + }, + { + "epoch": 0.345821325648415, + "grad_norm": 1.2642778137685087, + "learning_rate": 1.918729759548098e-06, + "loss": 0.6252793073654175, + "step": 1500 + }, + { + "epoch": 0.3460518731988473, + "grad_norm": 1.0221709474046514, + "learning_rate": 1.9185791742903813e-06, + "loss": 0.5688179731369019, + "step": 1501 + }, + { + "epoch": 0.34628242074927956, + "grad_norm": 0.9608059209732579, + "learning_rate": 1.9184284555726664e-06, + "loss": 0.4556620717048645, + "step": 1502 + }, + { + "epoch": 0.34651296829971184, + "grad_norm": 1.4535889503619228, + "learning_rate": 1.9182776034168513e-06, + "loss": 0.5266132950782776, + "step": 1503 + }, + { + "epoch": 0.34674351585014407, + "grad_norm": 1.014116036253219, + "learning_rate": 1.9181266178448525e-06, + "loss": 0.5345441102981567, + "step": 1504 + }, + { + "epoch": 0.34697406340057635, + "grad_norm": 1.471506683270131, + "learning_rate": 1.9179754988786077e-06, + "loss": 0.5279865264892578, + "step": 1505 + }, + { + "epoch": 0.34720461095100863, + "grad_norm": 0.9951759038068401, + "learning_rate": 1.917824246540072e-06, + "loss": 0.5321294069290161, + "step": 1506 + }, + { + "epoch": 0.3474351585014409, + "grad_norm": 1.1405669476940652, + "learning_rate": 1.9176728608512216e-06, + "loss": 0.6012279987335205, + "step": 1507 + }, + { + "epoch": 0.3476657060518732, + "grad_norm": 1.280934102974154, + "learning_rate": 1.917521341834051e-06, + "loss": 0.6327307224273682, + "step": 1508 + }, + { + "epoch": 0.34789625360230547, + "grad_norm": 1.3541871619240426, + "learning_rate": 1.9173696895105738e-06, + "loss": 0.4634242057800293, + "step": 1509 + }, + { + "epoch": 0.34812680115273775, + "grad_norm": 0.9650176834853621, + "learning_rate": 1.917217903902824e-06, + "loss": 0.6095619201660156, + "step": 1510 + }, + { + "epoch": 0.34835734870317003, + "grad_norm": 0.8778117797545331, + "learning_rate": 1.9170659850328543e-06, + "loss": 0.5752026438713074, + "step": 1511 + }, + { + "epoch": 0.3485878962536023, + "grad_norm": 1.1303848756633323, + "learning_rate": 1.9169139329227373e-06, + "loss": 0.5497609376907349, + "step": 1512 + }, + { + "epoch": 0.3488184438040346, + "grad_norm": 0.9721420679355441, + "learning_rate": 1.916761747594564e-06, + "loss": 0.562045693397522, + "step": 1513 + }, + { + "epoch": 0.3490489913544669, + "grad_norm": 1.2143743690553912, + "learning_rate": 1.916609429070446e-06, + "loss": 0.533704400062561, + "step": 1514 + }, + { + "epoch": 0.34927953890489916, + "grad_norm": 1.023070261432892, + "learning_rate": 1.916456977372513e-06, + "loss": 0.5165727734565735, + "step": 1515 + }, + { + "epoch": 0.34951008645533144, + "grad_norm": 1.0822455620220128, + "learning_rate": 1.9163043925229154e-06, + "loss": 0.5291183590888977, + "step": 1516 + }, + { + "epoch": 0.34974063400576366, + "grad_norm": 1.1691832350355116, + "learning_rate": 1.916151674543821e-06, + "loss": 0.5755541324615479, + "step": 1517 + }, + { + "epoch": 0.34997118155619594, + "grad_norm": 1.096375651985543, + "learning_rate": 1.915998823457419e-06, + "loss": 0.5173031091690063, + "step": 1518 + }, + { + "epoch": 0.3502017291066282, + "grad_norm": 1.0266729234069105, + "learning_rate": 1.9158458392859175e-06, + "loss": 0.5223626494407654, + "step": 1519 + }, + { + "epoch": 0.3504322766570605, + "grad_norm": 1.3968429150305617, + "learning_rate": 1.9156927220515426e-06, + "loss": 0.49933186173439026, + "step": 1520 + }, + { + "epoch": 0.3506628242074928, + "grad_norm": 0.9366627758580057, + "learning_rate": 1.91553947177654e-06, + "loss": 0.4844704568386078, + "step": 1521 + }, + { + "epoch": 0.35089337175792507, + "grad_norm": 1.1060039419898107, + "learning_rate": 1.9153860884831775e-06, + "loss": 0.44557222723960876, + "step": 1522 + }, + { + "epoch": 0.35112391930835735, + "grad_norm": 1.1817623923973588, + "learning_rate": 1.9152325721937388e-06, + "loss": 0.5139213800430298, + "step": 1523 + }, + { + "epoch": 0.3513544668587896, + "grad_norm": 1.1356038591189508, + "learning_rate": 1.9150789229305276e-06, + "loss": 0.6255537271499634, + "step": 1524 + }, + { + "epoch": 0.3515850144092219, + "grad_norm": 1.4572939210307672, + "learning_rate": 1.914925140715869e-06, + "loss": 0.5222468376159668, + "step": 1525 + }, + { + "epoch": 0.3518155619596542, + "grad_norm": 0.9379777889373196, + "learning_rate": 1.914771225572105e-06, + "loss": 0.5266926288604736, + "step": 1526 + }, + { + "epoch": 0.35204610951008647, + "grad_norm": 1.182155209163351, + "learning_rate": 1.914617177521598e-06, + "loss": 0.5775296688079834, + "step": 1527 + }, + { + "epoch": 0.35227665706051875, + "grad_norm": 1.544551250397055, + "learning_rate": 1.9144629965867296e-06, + "loss": 0.6237180233001709, + "step": 1528 + }, + { + "epoch": 0.35250720461095103, + "grad_norm": 1.0167232716229981, + "learning_rate": 1.914308682789901e-06, + "loss": 0.40847349166870117, + "step": 1529 + }, + { + "epoch": 0.3527377521613833, + "grad_norm": 0.9570236522891805, + "learning_rate": 1.914154236153532e-06, + "loss": 0.5963910818099976, + "step": 1530 + }, + { + "epoch": 0.35296829971181554, + "grad_norm": 1.0391049584893006, + "learning_rate": 1.9139996567000624e-06, + "loss": 0.516531765460968, + "step": 1531 + }, + { + "epoch": 0.3531988472622478, + "grad_norm": 0.9409985228344566, + "learning_rate": 1.9138449444519507e-06, + "loss": 0.5145821571350098, + "step": 1532 + }, + { + "epoch": 0.3534293948126801, + "grad_norm": 1.1658422646261672, + "learning_rate": 1.9136900994316753e-06, + "loss": 0.5966194272041321, + "step": 1533 + }, + { + "epoch": 0.3536599423631124, + "grad_norm": 1.0504783958956583, + "learning_rate": 1.913535121661733e-06, + "loss": 0.4880404472351074, + "step": 1534 + }, + { + "epoch": 0.35389048991354466, + "grad_norm": 1.1586094316232247, + "learning_rate": 1.9133800111646414e-06, + "loss": 0.5151012539863586, + "step": 1535 + }, + { + "epoch": 0.35412103746397694, + "grad_norm": 1.1633319411140208, + "learning_rate": 1.9132247679629353e-06, + "loss": 0.5591508150100708, + "step": 1536 + }, + { + "epoch": 0.3543515850144092, + "grad_norm": 1.3102428915861866, + "learning_rate": 1.9130693920791708e-06, + "loss": 0.5961824655532837, + "step": 1537 + }, + { + "epoch": 0.3545821325648415, + "grad_norm": 1.0721758303169822, + "learning_rate": 1.912913883535922e-06, + "loss": 0.5701007843017578, + "step": 1538 + }, + { + "epoch": 0.3548126801152738, + "grad_norm": 1.2232919385309438, + "learning_rate": 1.9127582423557827e-06, + "loss": 0.5492852926254272, + "step": 1539 + }, + { + "epoch": 0.35504322766570606, + "grad_norm": 1.0664413609489287, + "learning_rate": 1.912602468561366e-06, + "loss": 0.5030492544174194, + "step": 1540 + }, + { + "epoch": 0.35527377521613834, + "grad_norm": 1.0804112062824267, + "learning_rate": 1.9124465621753047e-06, + "loss": 0.567867636680603, + "step": 1541 + }, + { + "epoch": 0.3555043227665706, + "grad_norm": 1.1359172862574665, + "learning_rate": 1.9122905232202497e-06, + "loss": 0.6277697682380676, + "step": 1542 + }, + { + "epoch": 0.3557348703170029, + "grad_norm": 1.0326808049343164, + "learning_rate": 1.912134351718872e-06, + "loss": 0.5682080984115601, + "step": 1543 + }, + { + "epoch": 0.3559654178674352, + "grad_norm": 1.1630983445060303, + "learning_rate": 1.9119780476938616e-06, + "loss": 0.4987330138683319, + "step": 1544 + }, + { + "epoch": 0.3561959654178674, + "grad_norm": 1.0409430932448371, + "learning_rate": 1.911821611167928e-06, + "loss": 0.4908757209777832, + "step": 1545 + }, + { + "epoch": 0.3564265129682997, + "grad_norm": 1.098875754274837, + "learning_rate": 1.9116650421637995e-06, + "loss": 0.4820883870124817, + "step": 1546 + }, + { + "epoch": 0.35665706051873197, + "grad_norm": 1.3119370882450934, + "learning_rate": 1.911508340704225e-06, + "loss": 0.5155225992202759, + "step": 1547 + }, + { + "epoch": 0.35688760806916425, + "grad_norm": 0.8397950319284104, + "learning_rate": 1.9113515068119705e-06, + "loss": 0.4797988533973694, + "step": 1548 + }, + { + "epoch": 0.35711815561959653, + "grad_norm": 0.8794949811896489, + "learning_rate": 1.911194540509822e-06, + "loss": 0.4822116196155548, + "step": 1549 + }, + { + "epoch": 0.3573487031700288, + "grad_norm": 1.0294258069172015, + "learning_rate": 1.9110374418205866e-06, + "loss": 0.5781491994857788, + "step": 1550 + }, + { + "epoch": 0.3575792507204611, + "grad_norm": 1.065474237959385, + "learning_rate": 1.910880210767088e-06, + "loss": 0.5163141489028931, + "step": 1551 + }, + { + "epoch": 0.3578097982708934, + "grad_norm": 0.9693021918748141, + "learning_rate": 1.9107228473721703e-06, + "loss": 0.5735442638397217, + "step": 1552 + }, + { + "epoch": 0.35804034582132566, + "grad_norm": 1.289606289132748, + "learning_rate": 1.9105653516586975e-06, + "loss": 0.47912898659706116, + "step": 1553 + }, + { + "epoch": 0.35827089337175794, + "grad_norm": 1.2363840205017804, + "learning_rate": 1.9104077236495507e-06, + "loss": 0.5857046842575073, + "step": 1554 + }, + { + "epoch": 0.3585014409221902, + "grad_norm": 0.9205720464341692, + "learning_rate": 1.910249963367633e-06, + "loss": 0.47841203212738037, + "step": 1555 + }, + { + "epoch": 0.3587319884726225, + "grad_norm": 1.0020264560044534, + "learning_rate": 1.9100920708358644e-06, + "loss": 0.5707235336303711, + "step": 1556 + }, + { + "epoch": 0.3589625360230548, + "grad_norm": 0.9331299501384178, + "learning_rate": 1.9099340460771856e-06, + "loss": 0.5102289915084839, + "step": 1557 + }, + { + "epoch": 0.359193083573487, + "grad_norm": 1.2369505926587712, + "learning_rate": 1.9097758891145557e-06, + "loss": 0.4796826243400574, + "step": 1558 + }, + { + "epoch": 0.3594236311239193, + "grad_norm": 1.0814348251306543, + "learning_rate": 1.9096175999709538e-06, + "loss": 0.5577390789985657, + "step": 1559 + }, + { + "epoch": 0.35965417867435157, + "grad_norm": 1.0239605929741993, + "learning_rate": 1.9094591786693767e-06, + "loss": 0.4793698191642761, + "step": 1560 + }, + { + "epoch": 0.35988472622478385, + "grad_norm": 1.0987189800590818, + "learning_rate": 1.909300625232842e-06, + "loss": 0.5652080178260803, + "step": 1561 + }, + { + "epoch": 0.3601152737752161, + "grad_norm": 0.9830853981670961, + "learning_rate": 1.909141939684385e-06, + "loss": 0.5174850225448608, + "step": 1562 + }, + { + "epoch": 0.3603458213256484, + "grad_norm": 1.5804535303236673, + "learning_rate": 1.908983122047063e-06, + "loss": 0.5839135050773621, + "step": 1563 + }, + { + "epoch": 0.3605763688760807, + "grad_norm": 1.0464812980537448, + "learning_rate": 1.9088241723439486e-06, + "loss": 0.4825834333896637, + "step": 1564 + }, + { + "epoch": 0.36080691642651297, + "grad_norm": 1.2145113527517095, + "learning_rate": 1.9086650905981364e-06, + "loss": 0.608122706413269, + "step": 1565 + }, + { + "epoch": 0.36103746397694525, + "grad_norm": 1.255316392122234, + "learning_rate": 1.908505876832739e-06, + "loss": 0.6223492622375488, + "step": 1566 + }, + { + "epoch": 0.36126801152737753, + "grad_norm": 1.0060646589623696, + "learning_rate": 1.9083465310708894e-06, + "loss": 0.49106669425964355, + "step": 1567 + }, + { + "epoch": 0.3614985590778098, + "grad_norm": 1.072457075768019, + "learning_rate": 1.9081870533357373e-06, + "loss": 0.6098700165748596, + "step": 1568 + }, + { + "epoch": 0.3617291066282421, + "grad_norm": 1.3117902009377698, + "learning_rate": 1.9080274436504547e-06, + "loss": 0.520710825920105, + "step": 1569 + }, + { + "epoch": 0.3619596541786744, + "grad_norm": 1.4310182215994716, + "learning_rate": 1.90786770203823e-06, + "loss": 0.6175330877304077, + "step": 1570 + }, + { + "epoch": 0.36219020172910665, + "grad_norm": 1.066318396536309, + "learning_rate": 1.907707828522273e-06, + "loss": 0.5232914686203003, + "step": 1571 + }, + { + "epoch": 0.3624207492795389, + "grad_norm": 1.0921905909716596, + "learning_rate": 1.907547823125811e-06, + "loss": 0.45075923204421997, + "step": 1572 + }, + { + "epoch": 0.36265129682997116, + "grad_norm": 0.9633775515017127, + "learning_rate": 1.9073876858720914e-06, + "loss": 0.4931294918060303, + "step": 1573 + }, + { + "epoch": 0.36288184438040344, + "grad_norm": 1.0692876552208703, + "learning_rate": 1.9072274167843805e-06, + "loss": 0.4915880858898163, + "step": 1574 + }, + { + "epoch": 0.3631123919308357, + "grad_norm": 1.0666594923147616, + "learning_rate": 1.9070670158859634e-06, + "loss": 0.5257406234741211, + "step": 1575 + }, + { + "epoch": 0.363342939481268, + "grad_norm": 0.9711753769566253, + "learning_rate": 1.906906483200145e-06, + "loss": 0.5337891578674316, + "step": 1576 + }, + { + "epoch": 0.3635734870317003, + "grad_norm": 1.0365593071400123, + "learning_rate": 1.9067458187502491e-06, + "loss": 0.5388165712356567, + "step": 1577 + }, + { + "epoch": 0.36380403458213256, + "grad_norm": 1.056509558135423, + "learning_rate": 1.9065850225596183e-06, + "loss": 0.34295597672462463, + "step": 1578 + }, + { + "epoch": 0.36403458213256484, + "grad_norm": 1.1966716237030297, + "learning_rate": 1.9064240946516148e-06, + "loss": 0.4885653853416443, + "step": 1579 + }, + { + "epoch": 0.3642651296829971, + "grad_norm": 1.0967168066015518, + "learning_rate": 1.9062630350496195e-06, + "loss": 0.5201048851013184, + "step": 1580 + }, + { + "epoch": 0.3644956772334294, + "grad_norm": 1.1549525250242287, + "learning_rate": 1.9061018437770332e-06, + "loss": 0.509685218334198, + "step": 1581 + }, + { + "epoch": 0.3647262247838617, + "grad_norm": 1.2103985180509436, + "learning_rate": 1.9059405208572747e-06, + "loss": 0.5103805661201477, + "step": 1582 + }, + { + "epoch": 0.36495677233429397, + "grad_norm": 1.1299754007600582, + "learning_rate": 1.9057790663137828e-06, + "loss": 0.7274478077888489, + "step": 1583 + }, + { + "epoch": 0.36518731988472625, + "grad_norm": 1.0971187411733057, + "learning_rate": 1.9056174801700155e-06, + "loss": 0.5056940317153931, + "step": 1584 + }, + { + "epoch": 0.3654178674351585, + "grad_norm": 1.4885850320982672, + "learning_rate": 1.905455762449449e-06, + "loss": 0.519898533821106, + "step": 1585 + }, + { + "epoch": 0.36564841498559075, + "grad_norm": 1.2387908894479935, + "learning_rate": 1.9052939131755798e-06, + "loss": 0.5524897575378418, + "step": 1586 + }, + { + "epoch": 0.36587896253602303, + "grad_norm": 1.553847548415249, + "learning_rate": 1.9051319323719224e-06, + "loss": 0.5931388139724731, + "step": 1587 + }, + { + "epoch": 0.3661095100864553, + "grad_norm": 1.139599456662856, + "learning_rate": 1.904969820062011e-06, + "loss": 0.4642411470413208, + "step": 1588 + }, + { + "epoch": 0.3663400576368876, + "grad_norm": 1.0238584681270078, + "learning_rate": 1.9048075762693992e-06, + "loss": 0.5271746516227722, + "step": 1589 + }, + { + "epoch": 0.3665706051873199, + "grad_norm": 1.1002878432026475, + "learning_rate": 1.904645201017659e-06, + "loss": 0.6071990728378296, + "step": 1590 + }, + { + "epoch": 0.36680115273775216, + "grad_norm": 1.0574792476681798, + "learning_rate": 1.9044826943303819e-06, + "loss": 0.5722445249557495, + "step": 1591 + }, + { + "epoch": 0.36703170028818444, + "grad_norm": 0.9282439309709182, + "learning_rate": 1.9043200562311786e-06, + "loss": 0.5569512844085693, + "step": 1592 + }, + { + "epoch": 0.3672622478386167, + "grad_norm": 1.0196945335938161, + "learning_rate": 1.9041572867436784e-06, + "loss": 0.5678357481956482, + "step": 1593 + }, + { + "epoch": 0.367492795389049, + "grad_norm": 1.0907444083428672, + "learning_rate": 1.90399438589153e-06, + "loss": 0.5517602562904358, + "step": 1594 + }, + { + "epoch": 0.3677233429394813, + "grad_norm": 1.156373448455516, + "learning_rate": 1.903831353698402e-06, + "loss": 0.5436903238296509, + "step": 1595 + }, + { + "epoch": 0.36795389048991356, + "grad_norm": 0.9322530068282044, + "learning_rate": 1.9036681901879802e-06, + "loss": 0.4968247413635254, + "step": 1596 + }, + { + "epoch": 0.36818443804034584, + "grad_norm": 1.3503537931752878, + "learning_rate": 1.9035048953839712e-06, + "loss": 0.4991995096206665, + "step": 1597 + }, + { + "epoch": 0.3684149855907781, + "grad_norm": 1.1693808144768583, + "learning_rate": 1.9033414693100999e-06, + "loss": 0.5020145177841187, + "step": 1598 + }, + { + "epoch": 0.3686455331412104, + "grad_norm": 1.179802024734829, + "learning_rate": 1.9031779119901104e-06, + "loss": 0.45959436893463135, + "step": 1599 + }, + { + "epoch": 0.3688760806916426, + "grad_norm": 1.2294549218788287, + "learning_rate": 1.9030142234477658e-06, + "loss": 0.5194531679153442, + "step": 1600 + }, + { + "epoch": 0.3691066282420749, + "grad_norm": 1.0829366118476111, + "learning_rate": 1.9028504037068481e-06, + "loss": 0.5488829016685486, + "step": 1601 + }, + { + "epoch": 0.3693371757925072, + "grad_norm": 1.1508769427557617, + "learning_rate": 1.9026864527911593e-06, + "loss": 0.49845972657203674, + "step": 1602 + }, + { + "epoch": 0.36956772334293947, + "grad_norm": 0.9523664823081464, + "learning_rate": 1.9025223707245192e-06, + "loss": 0.5122306942939758, + "step": 1603 + }, + { + "epoch": 0.36979827089337175, + "grad_norm": 1.1063344831139217, + "learning_rate": 1.9023581575307677e-06, + "loss": 0.5586007833480835, + "step": 1604 + }, + { + "epoch": 0.37002881844380403, + "grad_norm": 1.2087997693756056, + "learning_rate": 1.9021938132337625e-06, + "loss": 0.5375609993934631, + "step": 1605 + }, + { + "epoch": 0.3702593659942363, + "grad_norm": 1.121295133902373, + "learning_rate": 1.902029337857382e-06, + "loss": 0.44368264079093933, + "step": 1606 + }, + { + "epoch": 0.3704899135446686, + "grad_norm": 1.0604325469477982, + "learning_rate": 1.901864731425522e-06, + "loss": 0.47421109676361084, + "step": 1607 + }, + { + "epoch": 0.3707204610951009, + "grad_norm": 1.1261708226384317, + "learning_rate": 1.9016999939620986e-06, + "loss": 0.46196484565734863, + "step": 1608 + }, + { + "epoch": 0.37095100864553315, + "grad_norm": 1.1243231065601214, + "learning_rate": 1.9015351254910464e-06, + "loss": 0.5860691070556641, + "step": 1609 + }, + { + "epoch": 0.37118155619596543, + "grad_norm": 1.376635822166593, + "learning_rate": 1.9013701260363186e-06, + "loss": 0.48964136838912964, + "step": 1610 + }, + { + "epoch": 0.3714121037463977, + "grad_norm": 1.2771258863133614, + "learning_rate": 1.9012049956218885e-06, + "loss": 0.577031135559082, + "step": 1611 + }, + { + "epoch": 0.37164265129683, + "grad_norm": 1.750265298033527, + "learning_rate": 1.9010397342717477e-06, + "loss": 0.5630660653114319, + "step": 1612 + }, + { + "epoch": 0.3718731988472622, + "grad_norm": 1.452203115340865, + "learning_rate": 1.9008743420099064e-06, + "loss": 0.6493782997131348, + "step": 1613 + }, + { + "epoch": 0.3721037463976945, + "grad_norm": 1.2469626311837894, + "learning_rate": 1.9007088188603952e-06, + "loss": 0.5408718585968018, + "step": 1614 + }, + { + "epoch": 0.3723342939481268, + "grad_norm": 1.2257479900702195, + "learning_rate": 1.9005431648472622e-06, + "loss": 0.6069578528404236, + "step": 1615 + }, + { + "epoch": 0.37256484149855906, + "grad_norm": 1.1160644334466228, + "learning_rate": 1.900377379994576e-06, + "loss": 0.5539328455924988, + "step": 1616 + }, + { + "epoch": 0.37279538904899134, + "grad_norm": 1.1810044813760578, + "learning_rate": 1.9002114643264227e-06, + "loss": 0.48285481333732605, + "step": 1617 + }, + { + "epoch": 0.3730259365994236, + "grad_norm": 1.1390789738354967, + "learning_rate": 1.900045417866908e-06, + "loss": 0.5164967775344849, + "step": 1618 + }, + { + "epoch": 0.3732564841498559, + "grad_norm": 1.3865337554151096, + "learning_rate": 1.8998792406401573e-06, + "loss": 0.6291834115982056, + "step": 1619 + }, + { + "epoch": 0.3734870317002882, + "grad_norm": 1.2947366356266334, + "learning_rate": 1.8997129326703142e-06, + "loss": 0.43779683113098145, + "step": 1620 + }, + { + "epoch": 0.37371757925072047, + "grad_norm": 1.0769662454824116, + "learning_rate": 1.8995464939815417e-06, + "loss": 0.5595699548721313, + "step": 1621 + }, + { + "epoch": 0.37394812680115275, + "grad_norm": 1.0790060447546612, + "learning_rate": 1.8993799245980213e-06, + "loss": 0.5409479737281799, + "step": 1622 + }, + { + "epoch": 0.37417867435158503, + "grad_norm": 1.1307121675788347, + "learning_rate": 1.8992132245439538e-06, + "loss": 0.4442507326602936, + "step": 1623 + }, + { + "epoch": 0.3744092219020173, + "grad_norm": 1.200366310775744, + "learning_rate": 1.8990463938435593e-06, + "loss": 0.5552202463150024, + "step": 1624 + }, + { + "epoch": 0.3746397694524496, + "grad_norm": 1.210477522460972, + "learning_rate": 1.8988794325210761e-06, + "loss": 0.4405897855758667, + "step": 1625 + }, + { + "epoch": 0.37487031700288187, + "grad_norm": 1.1718941866133759, + "learning_rate": 1.8987123406007626e-06, + "loss": 0.48420459032058716, + "step": 1626 + }, + { + "epoch": 0.3751008645533141, + "grad_norm": 1.0944718615024338, + "learning_rate": 1.8985451181068948e-06, + "loss": 0.47946181893348694, + "step": 1627 + }, + { + "epoch": 0.3753314121037464, + "grad_norm": 1.4314462923323383, + "learning_rate": 1.8983777650637687e-06, + "loss": 0.5447190403938293, + "step": 1628 + }, + { + "epoch": 0.37556195965417866, + "grad_norm": 1.2308740500999567, + "learning_rate": 1.8982102814956994e-06, + "loss": 0.5597184300422668, + "step": 1629 + }, + { + "epoch": 0.37579250720461094, + "grad_norm": 1.0808740812316415, + "learning_rate": 1.8980426674270195e-06, + "loss": 0.4728265404701233, + "step": 1630 + }, + { + "epoch": 0.3760230547550432, + "grad_norm": 1.0022470440365883, + "learning_rate": 1.8978749228820825e-06, + "loss": 0.5532448291778564, + "step": 1631 + }, + { + "epoch": 0.3762536023054755, + "grad_norm": 1.3647616222651395, + "learning_rate": 1.8977070478852596e-06, + "loss": 0.5713067650794983, + "step": 1632 + }, + { + "epoch": 0.3764841498559078, + "grad_norm": 1.047205989185189, + "learning_rate": 1.8975390424609414e-06, + "loss": 0.5646129846572876, + "step": 1633 + }, + { + "epoch": 0.37671469740634006, + "grad_norm": 1.0707041675317837, + "learning_rate": 1.897370906633537e-06, + "loss": 0.5725210309028625, + "step": 1634 + }, + { + "epoch": 0.37694524495677234, + "grad_norm": 1.2576302459262307, + "learning_rate": 1.8972026404274752e-06, + "loss": 0.6406511068344116, + "step": 1635 + }, + { + "epoch": 0.3771757925072046, + "grad_norm": 1.3865349965726392, + "learning_rate": 1.8970342438672032e-06, + "loss": 0.6073347330093384, + "step": 1636 + }, + { + "epoch": 0.3774063400576369, + "grad_norm": 0.950712343876015, + "learning_rate": 1.8968657169771871e-06, + "loss": 0.5833244323730469, + "step": 1637 + }, + { + "epoch": 0.3776368876080692, + "grad_norm": 1.05768625493054, + "learning_rate": 1.8966970597819122e-06, + "loss": 0.5105189681053162, + "step": 1638 + }, + { + "epoch": 0.37786743515850146, + "grad_norm": 1.2405093128151112, + "learning_rate": 1.8965282723058827e-06, + "loss": 0.6037019491195679, + "step": 1639 + }, + { + "epoch": 0.37809798270893374, + "grad_norm": 1.2668863969044166, + "learning_rate": 1.8963593545736218e-06, + "loss": 0.43045759201049805, + "step": 1640 + }, + { + "epoch": 0.37832853025936597, + "grad_norm": 1.031350209842573, + "learning_rate": 1.8961903066096712e-06, + "loss": 0.5377180576324463, + "step": 1641 + }, + { + "epoch": 0.37855907780979825, + "grad_norm": 1.0130795042585947, + "learning_rate": 1.8960211284385919e-06, + "loss": 0.4094654321670532, + "step": 1642 + }, + { + "epoch": 0.37878962536023053, + "grad_norm": 1.0057294140777453, + "learning_rate": 1.8958518200849638e-06, + "loss": 0.4871266484260559, + "step": 1643 + }, + { + "epoch": 0.3790201729106628, + "grad_norm": 1.0413940989400527, + "learning_rate": 1.8956823815733855e-06, + "loss": 0.546768069267273, + "step": 1644 + }, + { + "epoch": 0.3792507204610951, + "grad_norm": 1.489464911824181, + "learning_rate": 1.8955128129284747e-06, + "loss": 0.5822614431381226, + "step": 1645 + }, + { + "epoch": 0.3794812680115274, + "grad_norm": 1.2747301835507696, + "learning_rate": 1.8953431141748685e-06, + "loss": 0.499586284160614, + "step": 1646 + }, + { + "epoch": 0.37971181556195965, + "grad_norm": 1.0356602712164185, + "learning_rate": 1.8951732853372214e-06, + "loss": 0.4616992771625519, + "step": 1647 + }, + { + "epoch": 0.37994236311239193, + "grad_norm": 1.1023032116439935, + "learning_rate": 1.8950033264402084e-06, + "loss": 0.5688509941101074, + "step": 1648 + }, + { + "epoch": 0.3801729106628242, + "grad_norm": 1.2135433507747628, + "learning_rate": 1.8948332375085226e-06, + "loss": 0.5367652177810669, + "step": 1649 + }, + { + "epoch": 0.3804034582132565, + "grad_norm": 1.1441147670237812, + "learning_rate": 1.8946630185668759e-06, + "loss": 0.5865902900695801, + "step": 1650 + }, + { + "epoch": 0.3806340057636888, + "grad_norm": 1.20236181961399, + "learning_rate": 1.89449266964e-06, + "loss": 0.4999021887779236, + "step": 1651 + }, + { + "epoch": 0.38086455331412106, + "grad_norm": 1.1459570976309312, + "learning_rate": 1.8943221907526443e-06, + "loss": 0.5646007657051086, + "step": 1652 + }, + { + "epoch": 0.38109510086455334, + "grad_norm": 1.1175142171833454, + "learning_rate": 1.8941515819295776e-06, + "loss": 0.532716691493988, + "step": 1653 + }, + { + "epoch": 0.3813256484149856, + "grad_norm": 1.2540916265885504, + "learning_rate": 1.893980843195588e-06, + "loss": 0.5148980021476746, + "step": 1654 + }, + { + "epoch": 0.38155619596541784, + "grad_norm": 1.0547624939084206, + "learning_rate": 1.8938099745754815e-06, + "loss": 0.5159789323806763, + "step": 1655 + }, + { + "epoch": 0.3817867435158501, + "grad_norm": 1.0842073433449368, + "learning_rate": 1.8936389760940839e-06, + "loss": 0.521435022354126, + "step": 1656 + }, + { + "epoch": 0.3820172910662824, + "grad_norm": 1.1669048607685022, + "learning_rate": 1.8934678477762395e-06, + "loss": 0.5362331867218018, + "step": 1657 + }, + { + "epoch": 0.3822478386167147, + "grad_norm": 0.9857130142976365, + "learning_rate": 1.8932965896468113e-06, + "loss": 0.5501196980476379, + "step": 1658 + }, + { + "epoch": 0.38247838616714697, + "grad_norm": 1.3279799890705724, + "learning_rate": 1.8931252017306813e-06, + "loss": 0.5484409928321838, + "step": 1659 + }, + { + "epoch": 0.38270893371757925, + "grad_norm": 1.2442187734169075, + "learning_rate": 1.8929536840527507e-06, + "loss": 0.6500132083892822, + "step": 1660 + }, + { + "epoch": 0.38293948126801153, + "grad_norm": 1.3286091674792162, + "learning_rate": 1.8927820366379388e-06, + "loss": 0.513029158115387, + "step": 1661 + }, + { + "epoch": 0.3831700288184438, + "grad_norm": 1.3694471654734355, + "learning_rate": 1.8926102595111843e-06, + "loss": 0.6097410917282104, + "step": 1662 + }, + { + "epoch": 0.3834005763688761, + "grad_norm": 1.2096429751854834, + "learning_rate": 1.8924383526974453e-06, + "loss": 0.5139362812042236, + "step": 1663 + }, + { + "epoch": 0.38363112391930837, + "grad_norm": 1.1695686636695835, + "learning_rate": 1.892266316221697e-06, + "loss": 0.4893265962600708, + "step": 1664 + }, + { + "epoch": 0.38386167146974065, + "grad_norm": 1.1419708571669807, + "learning_rate": 1.8920941501089352e-06, + "loss": 0.5500860810279846, + "step": 1665 + }, + { + "epoch": 0.38409221902017293, + "grad_norm": 1.2040433648252555, + "learning_rate": 1.8919218543841736e-06, + "loss": 0.46147310733795166, + "step": 1666 + }, + { + "epoch": 0.3843227665706052, + "grad_norm": 1.2894839572530414, + "learning_rate": 1.891749429072445e-06, + "loss": 0.4785606265068054, + "step": 1667 + }, + { + "epoch": 0.38455331412103744, + "grad_norm": 1.1550508785039575, + "learning_rate": 1.8915768741988012e-06, + "loss": 0.5280581712722778, + "step": 1668 + }, + { + "epoch": 0.3847838616714697, + "grad_norm": 0.8004967971503981, + "learning_rate": 1.8914041897883125e-06, + "loss": 0.4316279888153076, + "step": 1669 + }, + { + "epoch": 0.385014409221902, + "grad_norm": 0.9031817030472468, + "learning_rate": 1.8912313758660679e-06, + "loss": 0.4610823392868042, + "step": 1670 + }, + { + "epoch": 0.3852449567723343, + "grad_norm": 1.1057745952132196, + "learning_rate": 1.8910584324571758e-06, + "loss": 0.5529364943504333, + "step": 1671 + }, + { + "epoch": 0.38547550432276656, + "grad_norm": 1.1712489054484079, + "learning_rate": 1.890885359586763e-06, + "loss": 0.5393742322921753, + "step": 1672 + }, + { + "epoch": 0.38570605187319884, + "grad_norm": 1.1889287828623867, + "learning_rate": 1.890712157279975e-06, + "loss": 0.5502661466598511, + "step": 1673 + }, + { + "epoch": 0.3859365994236311, + "grad_norm": 1.1175808690641762, + "learning_rate": 1.8905388255619764e-06, + "loss": 0.5967349410057068, + "step": 1674 + }, + { + "epoch": 0.3861671469740634, + "grad_norm": 0.9843385175883147, + "learning_rate": 1.8903653644579508e-06, + "loss": 0.5181038975715637, + "step": 1675 + }, + { + "epoch": 0.3863976945244957, + "grad_norm": 1.2399078270539727, + "learning_rate": 1.8901917739931e-06, + "loss": 0.6069591641426086, + "step": 1676 + }, + { + "epoch": 0.38662824207492796, + "grad_norm": 1.395380167151623, + "learning_rate": 1.8900180541926445e-06, + "loss": 0.4811745882034302, + "step": 1677 + }, + { + "epoch": 0.38685878962536024, + "grad_norm": 1.1868848622007617, + "learning_rate": 1.889844205081825e-06, + "loss": 0.42992472648620605, + "step": 1678 + }, + { + "epoch": 0.3870893371757925, + "grad_norm": 1.3846875348981704, + "learning_rate": 1.889670226685899e-06, + "loss": 0.515068531036377, + "step": 1679 + }, + { + "epoch": 0.3873198847262248, + "grad_norm": 1.0504931066417105, + "learning_rate": 1.889496119030144e-06, + "loss": 0.5068717002868652, + "step": 1680 + }, + { + "epoch": 0.3875504322766571, + "grad_norm": 1.204035195367357, + "learning_rate": 1.8893218821398564e-06, + "loss": 0.6236181259155273, + "step": 1681 + }, + { + "epoch": 0.3877809798270893, + "grad_norm": 1.063984922450486, + "learning_rate": 1.8891475160403508e-06, + "loss": 0.5492556095123291, + "step": 1682 + }, + { + "epoch": 0.3880115273775216, + "grad_norm": 1.2451795909219046, + "learning_rate": 1.8889730207569605e-06, + "loss": 0.5750234723091125, + "step": 1683 + }, + { + "epoch": 0.3882420749279539, + "grad_norm": 1.283210442061239, + "learning_rate": 1.8887983963150384e-06, + "loss": 0.52640700340271, + "step": 1684 + }, + { + "epoch": 0.38847262247838615, + "grad_norm": 1.144571674362394, + "learning_rate": 1.8886236427399549e-06, + "loss": 0.5916281342506409, + "step": 1685 + }, + { + "epoch": 0.38870317002881843, + "grad_norm": 1.2712652855122726, + "learning_rate": 1.8884487600571007e-06, + "loss": 0.5805083513259888, + "step": 1686 + }, + { + "epoch": 0.3889337175792507, + "grad_norm": 1.1516486375209825, + "learning_rate": 1.8882737482918838e-06, + "loss": 0.47049853205680847, + "step": 1687 + }, + { + "epoch": 0.389164265129683, + "grad_norm": 1.2991101405673497, + "learning_rate": 1.8880986074697318e-06, + "loss": 0.5835333466529846, + "step": 1688 + }, + { + "epoch": 0.3893948126801153, + "grad_norm": 1.1881783272928648, + "learning_rate": 1.8879233376160907e-06, + "loss": 0.4839910864830017, + "step": 1689 + }, + { + "epoch": 0.38962536023054756, + "grad_norm": 1.3683999792816708, + "learning_rate": 1.8877479387564258e-06, + "loss": 0.5125032663345337, + "step": 1690 + }, + { + "epoch": 0.38985590778097984, + "grad_norm": 0.9692753104780237, + "learning_rate": 1.8875724109162203e-06, + "loss": 0.595876932144165, + "step": 1691 + }, + { + "epoch": 0.3900864553314121, + "grad_norm": 1.1077982199461915, + "learning_rate": 1.887396754120977e-06, + "loss": 0.5293446779251099, + "step": 1692 + }, + { + "epoch": 0.3903170028818444, + "grad_norm": 0.947630233327163, + "learning_rate": 1.8872209683962163e-06, + "loss": 0.5455681681632996, + "step": 1693 + }, + { + "epoch": 0.3905475504322767, + "grad_norm": 1.1346104082787818, + "learning_rate": 1.8870450537674787e-06, + "loss": 0.596378743648529, + "step": 1694 + }, + { + "epoch": 0.39077809798270896, + "grad_norm": 1.1455389251374453, + "learning_rate": 1.8868690102603226e-06, + "loss": 0.49379733204841614, + "step": 1695 + }, + { + "epoch": 0.3910086455331412, + "grad_norm": 1.2108182415680409, + "learning_rate": 1.8866928379003251e-06, + "loss": 0.4551328420639038, + "step": 1696 + }, + { + "epoch": 0.39123919308357347, + "grad_norm": 1.4250898609027278, + "learning_rate": 1.8865165367130822e-06, + "loss": 0.54606693983078, + "step": 1697 + }, + { + "epoch": 0.39146974063400575, + "grad_norm": 1.008628556444834, + "learning_rate": 1.886340106724209e-06, + "loss": 0.5410532355308533, + "step": 1698 + }, + { + "epoch": 0.39170028818443803, + "grad_norm": 1.1387013906480672, + "learning_rate": 1.8861635479593386e-06, + "loss": 0.533348560333252, + "step": 1699 + }, + { + "epoch": 0.3919308357348703, + "grad_norm": 1.2000257753384949, + "learning_rate": 1.8859868604441233e-06, + "loss": 0.48270243406295776, + "step": 1700 + }, + { + "epoch": 0.3921613832853026, + "grad_norm": 1.6970909551883837, + "learning_rate": 1.8858100442042339e-06, + "loss": 0.6110135316848755, + "step": 1701 + }, + { + "epoch": 0.39239193083573487, + "grad_norm": 1.2287166717299047, + "learning_rate": 1.8856330992653603e-06, + "loss": 0.4867699146270752, + "step": 1702 + }, + { + "epoch": 0.39262247838616715, + "grad_norm": 1.0530809153005494, + "learning_rate": 1.8854560256532098e-06, + "loss": 0.5660721659660339, + "step": 1703 + }, + { + "epoch": 0.39285302593659943, + "grad_norm": 1.3131541890601917, + "learning_rate": 1.8852788233935102e-06, + "loss": 0.5702673196792603, + "step": 1704 + }, + { + "epoch": 0.3930835734870317, + "grad_norm": 1.0760886702069608, + "learning_rate": 1.8851014925120071e-06, + "loss": 0.6213070154190063, + "step": 1705 + }, + { + "epoch": 0.393314121037464, + "grad_norm": 1.1645752082269338, + "learning_rate": 1.8849240330344647e-06, + "loss": 0.5179423689842224, + "step": 1706 + }, + { + "epoch": 0.3935446685878963, + "grad_norm": 1.1314191100724993, + "learning_rate": 1.884746444986666e-06, + "loss": 0.5448884963989258, + "step": 1707 + }, + { + "epoch": 0.39377521613832855, + "grad_norm": 1.7831366963927755, + "learning_rate": 1.8845687283944124e-06, + "loss": 0.4889717698097229, + "step": 1708 + }, + { + "epoch": 0.3940057636887608, + "grad_norm": 1.0708754186574336, + "learning_rate": 1.8843908832835248e-06, + "loss": 0.5224671363830566, + "step": 1709 + }, + { + "epoch": 0.39423631123919306, + "grad_norm": 1.1316735747750348, + "learning_rate": 1.8842129096798418e-06, + "loss": 0.4966825246810913, + "step": 1710 + }, + { + "epoch": 0.39446685878962534, + "grad_norm": 1.1170822014486683, + "learning_rate": 1.8840348076092215e-06, + "loss": 0.425929993391037, + "step": 1711 + }, + { + "epoch": 0.3946974063400576, + "grad_norm": 1.1640213717882835, + "learning_rate": 1.8838565770975399e-06, + "loss": 0.5540965795516968, + "step": 1712 + }, + { + "epoch": 0.3949279538904899, + "grad_norm": 0.944462132080172, + "learning_rate": 1.8836782181706922e-06, + "loss": 0.5570763349533081, + "step": 1713 + }, + { + "epoch": 0.3951585014409222, + "grad_norm": 1.0849748508948478, + "learning_rate": 1.8834997308545924e-06, + "loss": 0.5277444124221802, + "step": 1714 + }, + { + "epoch": 0.39538904899135446, + "grad_norm": 1.2193838491486786, + "learning_rate": 1.8833211151751724e-06, + "loss": 0.5402891635894775, + "step": 1715 + }, + { + "epoch": 0.39561959654178674, + "grad_norm": 1.0252111569197984, + "learning_rate": 1.8831423711583834e-06, + "loss": 0.6028883457183838, + "step": 1716 + }, + { + "epoch": 0.395850144092219, + "grad_norm": 1.2831614100796882, + "learning_rate": 1.882963498830195e-06, + "loss": 0.5924968719482422, + "step": 1717 + }, + { + "epoch": 0.3960806916426513, + "grad_norm": 1.0174586090749438, + "learning_rate": 1.8827844982165955e-06, + "loss": 0.5173168778419495, + "step": 1718 + }, + { + "epoch": 0.3963112391930836, + "grad_norm": 1.165042756566303, + "learning_rate": 1.882605369343592e-06, + "loss": 0.47797566652297974, + "step": 1719 + }, + { + "epoch": 0.39654178674351587, + "grad_norm": 1.131691075857789, + "learning_rate": 1.8824261122372095e-06, + "loss": 0.5594542026519775, + "step": 1720 + }, + { + "epoch": 0.39677233429394815, + "grad_norm": 1.2640007167386567, + "learning_rate": 1.882246726923493e-06, + "loss": 0.4570848345756531, + "step": 1721 + }, + { + "epoch": 0.39700288184438043, + "grad_norm": 0.971236274675553, + "learning_rate": 1.8820672134285048e-06, + "loss": 0.48942142724990845, + "step": 1722 + }, + { + "epoch": 0.39723342939481265, + "grad_norm": 1.0417289219929287, + "learning_rate": 1.8818875717783263e-06, + "loss": 0.5194408893585205, + "step": 1723 + }, + { + "epoch": 0.39746397694524493, + "grad_norm": 1.3905898231547866, + "learning_rate": 1.8817078019990577e-06, + "loss": 0.5944944024085999, + "step": 1724 + }, + { + "epoch": 0.3976945244956772, + "grad_norm": 1.0798692984180562, + "learning_rate": 1.881527904116818e-06, + "loss": 0.46111130714416504, + "step": 1725 + }, + { + "epoch": 0.3979250720461095, + "grad_norm": 1.0231368964738081, + "learning_rate": 1.881347878157744e-06, + "loss": 0.45540904998779297, + "step": 1726 + }, + { + "epoch": 0.3981556195965418, + "grad_norm": 1.069281496320294, + "learning_rate": 1.8811677241479918e-06, + "loss": 0.5093264579772949, + "step": 1727 + }, + { + "epoch": 0.39838616714697406, + "grad_norm": 1.2516879690636435, + "learning_rate": 1.8809874421137358e-06, + "loss": 0.5825635194778442, + "step": 1728 + }, + { + "epoch": 0.39861671469740634, + "grad_norm": 1.1927287526332015, + "learning_rate": 1.880807032081169e-06, + "loss": 0.49300920963287354, + "step": 1729 + }, + { + "epoch": 0.3988472622478386, + "grad_norm": 1.0829675696070233, + "learning_rate": 1.8806264940765036e-06, + "loss": 0.6261301040649414, + "step": 1730 + }, + { + "epoch": 0.3990778097982709, + "grad_norm": 1.3103095493354275, + "learning_rate": 1.8804458281259695e-06, + "loss": 0.5493542551994324, + "step": 1731 + }, + { + "epoch": 0.3993083573487032, + "grad_norm": 1.2025684089327158, + "learning_rate": 1.8802650342558158e-06, + "loss": 0.40386199951171875, + "step": 1732 + }, + { + "epoch": 0.39953890489913546, + "grad_norm": 1.0567308440121088, + "learning_rate": 1.8800841124923097e-06, + "loss": 0.6049227118492126, + "step": 1733 + }, + { + "epoch": 0.39976945244956774, + "grad_norm": 1.2561339846834512, + "learning_rate": 1.8799030628617373e-06, + "loss": 0.49980294704437256, + "step": 1734 + }, + { + "epoch": 0.4, + "grad_norm": 1.3692289290302153, + "learning_rate": 1.8797218853904035e-06, + "loss": 0.5892654657363892, + "step": 1735 + }, + { + "epoch": 0.4002305475504323, + "grad_norm": 1.3450627943725142, + "learning_rate": 1.8795405801046314e-06, + "loss": 0.566817045211792, + "step": 1736 + }, + { + "epoch": 0.40046109510086453, + "grad_norm": 1.0535742982367924, + "learning_rate": 1.8793591470307626e-06, + "loss": 0.5093469619750977, + "step": 1737 + }, + { + "epoch": 0.4006916426512968, + "grad_norm": 1.282782359379558, + "learning_rate": 1.8791775861951574e-06, + "loss": 0.5166475176811218, + "step": 1738 + }, + { + "epoch": 0.4009221902017291, + "grad_norm": 1.6381884392348767, + "learning_rate": 1.8789958976241946e-06, + "loss": 0.4210118055343628, + "step": 1739 + }, + { + "epoch": 0.40115273775216137, + "grad_norm": 1.1490169896035265, + "learning_rate": 1.8788140813442724e-06, + "loss": 0.48690280318260193, + "step": 1740 + }, + { + "epoch": 0.40138328530259365, + "grad_norm": 1.2454394412870222, + "learning_rate": 1.878632137381806e-06, + "loss": 0.5239908695220947, + "step": 1741 + }, + { + "epoch": 0.40161383285302593, + "grad_norm": 1.167282839522106, + "learning_rate": 1.8784500657632304e-06, + "loss": 0.5424253940582275, + "step": 1742 + }, + { + "epoch": 0.4018443804034582, + "grad_norm": 1.4221599808285748, + "learning_rate": 1.8782678665149986e-06, + "loss": 0.570236325263977, + "step": 1743 + }, + { + "epoch": 0.4020749279538905, + "grad_norm": 1.2291084700951356, + "learning_rate": 1.8780855396635821e-06, + "loss": 0.6240546703338623, + "step": 1744 + }, + { + "epoch": 0.4023054755043228, + "grad_norm": 1.259784949833717, + "learning_rate": 1.8779030852354713e-06, + "loss": 0.49374204874038696, + "step": 1745 + }, + { + "epoch": 0.40253602305475505, + "grad_norm": 1.2162048883726875, + "learning_rate": 1.8777205032571749e-06, + "loss": 0.5949456691741943, + "step": 1746 + }, + { + "epoch": 0.40276657060518734, + "grad_norm": 1.2411044263502289, + "learning_rate": 1.8775377937552199e-06, + "loss": 0.4195283055305481, + "step": 1747 + }, + { + "epoch": 0.4029971181556196, + "grad_norm": 1.024383998377649, + "learning_rate": 1.8773549567561523e-06, + "loss": 0.494783878326416, + "step": 1748 + }, + { + "epoch": 0.4032276657060519, + "grad_norm": 0.9805394591840607, + "learning_rate": 1.8771719922865369e-06, + "loss": 0.5290952324867249, + "step": 1749 + }, + { + "epoch": 0.4034582132564842, + "grad_norm": 1.1062918130028372, + "learning_rate": 1.8769889003729558e-06, + "loss": 0.5689871907234192, + "step": 1750 + }, + { + "epoch": 0.4036887608069164, + "grad_norm": 0.9139238206795531, + "learning_rate": 1.8768056810420104e-06, + "loss": 0.48432207107543945, + "step": 1751 + }, + { + "epoch": 0.4039193083573487, + "grad_norm": 1.0993885645774597, + "learning_rate": 1.876622334320321e-06, + "loss": 0.56418776512146, + "step": 1752 + }, + { + "epoch": 0.40414985590778096, + "grad_norm": 0.9735263367246159, + "learning_rate": 1.8764388602345257e-06, + "loss": 0.5624086856842041, + "step": 1753 + }, + { + "epoch": 0.40438040345821324, + "grad_norm": 1.4524594835395888, + "learning_rate": 1.8762552588112816e-06, + "loss": 0.5015785694122314, + "step": 1754 + }, + { + "epoch": 0.4046109510086455, + "grad_norm": 0.9394639774386047, + "learning_rate": 1.8760715300772638e-06, + "loss": 0.41496244072914124, + "step": 1755 + }, + { + "epoch": 0.4048414985590778, + "grad_norm": 1.4027706508763087, + "learning_rate": 1.875887674059166e-06, + "loss": 0.6052544713020325, + "step": 1756 + }, + { + "epoch": 0.4050720461095101, + "grad_norm": 1.0389427837449594, + "learning_rate": 1.8757036907837009e-06, + "loss": 0.5018082857131958, + "step": 1757 + }, + { + "epoch": 0.40530259365994237, + "grad_norm": 1.2000927575372657, + "learning_rate": 1.8755195802775996e-06, + "loss": 0.5213236808776855, + "step": 1758 + }, + { + "epoch": 0.40553314121037465, + "grad_norm": 1.0043625854035514, + "learning_rate": 1.875335342567611e-06, + "loss": 0.5396578907966614, + "step": 1759 + }, + { + "epoch": 0.40576368876080693, + "grad_norm": 1.1447196519735834, + "learning_rate": 1.8751509776805029e-06, + "loss": 0.4654269814491272, + "step": 1760 + }, + { + "epoch": 0.4059942363112392, + "grad_norm": 1.0888985719941529, + "learning_rate": 1.8749664856430618e-06, + "loss": 0.5281137228012085, + "step": 1761 + }, + { + "epoch": 0.4062247838616715, + "grad_norm": 1.088793787907681, + "learning_rate": 1.8747818664820927e-06, + "loss": 0.6041824817657471, + "step": 1762 + }, + { + "epoch": 0.40645533141210377, + "grad_norm": 0.9910156594308228, + "learning_rate": 1.8745971202244184e-06, + "loss": 0.45012253522872925, + "step": 1763 + }, + { + "epoch": 0.406685878962536, + "grad_norm": 0.9756039486828318, + "learning_rate": 1.8744122468968806e-06, + "loss": 0.5526795387268066, + "step": 1764 + }, + { + "epoch": 0.4069164265129683, + "grad_norm": 0.9925167062427037, + "learning_rate": 1.8742272465263399e-06, + "loss": 0.4435810446739197, + "step": 1765 + }, + { + "epoch": 0.40714697406340056, + "grad_norm": 1.1848709843080019, + "learning_rate": 1.8740421191396746e-06, + "loss": 0.4480190873146057, + "step": 1766 + }, + { + "epoch": 0.40737752161383284, + "grad_norm": 1.0852081990027085, + "learning_rate": 1.8738568647637819e-06, + "loss": 0.6167557239532471, + "step": 1767 + }, + { + "epoch": 0.4076080691642651, + "grad_norm": 1.2101115264274098, + "learning_rate": 1.873671483425577e-06, + "loss": 0.5288156270980835, + "step": 1768 + }, + { + "epoch": 0.4078386167146974, + "grad_norm": 1.1485331887340928, + "learning_rate": 1.8734859751519948e-06, + "loss": 0.5483111143112183, + "step": 1769 + }, + { + "epoch": 0.4080691642651297, + "grad_norm": 1.0072561211275919, + "learning_rate": 1.8733003399699868e-06, + "loss": 0.5418217778205872, + "step": 1770 + }, + { + "epoch": 0.40829971181556196, + "grad_norm": 1.123107557275528, + "learning_rate": 1.873114577906524e-06, + "loss": 0.5031202435493469, + "step": 1771 + }, + { + "epoch": 0.40853025936599424, + "grad_norm": 1.2830585286564486, + "learning_rate": 1.8729286889885959e-06, + "loss": 0.5669084787368774, + "step": 1772 + }, + { + "epoch": 0.4087608069164265, + "grad_norm": 1.320215056578161, + "learning_rate": 1.87274267324321e-06, + "loss": 0.6212494373321533, + "step": 1773 + }, + { + "epoch": 0.4089913544668588, + "grad_norm": 0.9044281123557663, + "learning_rate": 1.872556530697393e-06, + "loss": 0.47481727600097656, + "step": 1774 + }, + { + "epoch": 0.4092219020172911, + "grad_norm": 1.0800809902848345, + "learning_rate": 1.872370261378189e-06, + "loss": 0.6407462358474731, + "step": 1775 + }, + { + "epoch": 0.40945244956772336, + "grad_norm": 1.0727598714512039, + "learning_rate": 1.8721838653126613e-06, + "loss": 0.5465847849845886, + "step": 1776 + }, + { + "epoch": 0.40968299711815565, + "grad_norm": 1.5052681429553312, + "learning_rate": 1.871997342527891e-06, + "loss": 0.5464286804199219, + "step": 1777 + }, + { + "epoch": 0.40991354466858787, + "grad_norm": 1.1767943613463163, + "learning_rate": 1.8718106930509778e-06, + "loss": 0.46753352880477905, + "step": 1778 + }, + { + "epoch": 0.41014409221902015, + "grad_norm": 0.973706053688001, + "learning_rate": 1.8716239169090406e-06, + "loss": 0.4888344705104828, + "step": 1779 + }, + { + "epoch": 0.41037463976945243, + "grad_norm": 1.1283990917005553, + "learning_rate": 1.8714370141292153e-06, + "loss": 0.4504971504211426, + "step": 1780 + }, + { + "epoch": 0.4106051873198847, + "grad_norm": 0.9474408538787359, + "learning_rate": 1.8712499847386574e-06, + "loss": 0.403839111328125, + "step": 1781 + }, + { + "epoch": 0.410835734870317, + "grad_norm": 1.2416666688249398, + "learning_rate": 1.8710628287645398e-06, + "loss": 0.5075634717941284, + "step": 1782 + }, + { + "epoch": 0.4110662824207493, + "grad_norm": 1.0201988702930243, + "learning_rate": 1.870875546234055e-06, + "loss": 0.4988376498222351, + "step": 1783 + }, + { + "epoch": 0.41129682997118155, + "grad_norm": 1.1287965100043822, + "learning_rate": 1.8706881371744128e-06, + "loss": 0.5289707183837891, + "step": 1784 + }, + { + "epoch": 0.41152737752161384, + "grad_norm": 1.1288272138801552, + "learning_rate": 1.8705006016128418e-06, + "loss": 0.48820483684539795, + "step": 1785 + }, + { + "epoch": 0.4117579250720461, + "grad_norm": 1.1021300108323646, + "learning_rate": 1.870312939576589e-06, + "loss": 0.523395836353302, + "step": 1786 + }, + { + "epoch": 0.4119884726224784, + "grad_norm": 1.0502779868402847, + "learning_rate": 1.8701251510929197e-06, + "loss": 0.5213379263877869, + "step": 1787 + }, + { + "epoch": 0.4122190201729107, + "grad_norm": 1.51700777394792, + "learning_rate": 1.869937236189118e-06, + "loss": 0.5818710923194885, + "step": 1788 + }, + { + "epoch": 0.41244956772334296, + "grad_norm": 1.217213297698164, + "learning_rate": 1.8697491948924854e-06, + "loss": 0.599867045879364, + "step": 1789 + }, + { + "epoch": 0.41268011527377524, + "grad_norm": 1.1893745188668343, + "learning_rate": 1.8695610272303426e-06, + "loss": 0.562250018119812, + "step": 1790 + }, + { + "epoch": 0.4129106628242075, + "grad_norm": 1.054818186219627, + "learning_rate": 1.8693727332300285e-06, + "loss": 0.5593204498291016, + "step": 1791 + }, + { + "epoch": 0.41314121037463974, + "grad_norm": 1.0866589405542582, + "learning_rate": 1.8691843129189e-06, + "loss": 0.585111141204834, + "step": 1792 + }, + { + "epoch": 0.413371757925072, + "grad_norm": 1.1845299989706493, + "learning_rate": 1.868995766324333e-06, + "loss": 0.582213282585144, + "step": 1793 + }, + { + "epoch": 0.4136023054755043, + "grad_norm": 1.1273952476109894, + "learning_rate": 1.868807093473721e-06, + "loss": 0.5196056365966797, + "step": 1794 + }, + { + "epoch": 0.4138328530259366, + "grad_norm": 1.6255229812391085, + "learning_rate": 1.8686182943944764e-06, + "loss": 0.5141228437423706, + "step": 1795 + }, + { + "epoch": 0.41406340057636887, + "grad_norm": 1.0388777012541068, + "learning_rate": 1.8684293691140296e-06, + "loss": 0.5710434913635254, + "step": 1796 + }, + { + "epoch": 0.41429394812680115, + "grad_norm": 1.1143205444284763, + "learning_rate": 1.8682403176598296e-06, + "loss": 0.5534354448318481, + "step": 1797 + }, + { + "epoch": 0.41452449567723343, + "grad_norm": 1.118968020644909, + "learning_rate": 1.8680511400593435e-06, + "loss": 0.4919063448905945, + "step": 1798 + }, + { + "epoch": 0.4147550432276657, + "grad_norm": 1.0919336938400037, + "learning_rate": 1.867861836340057e-06, + "loss": 0.5988746881484985, + "step": 1799 + }, + { + "epoch": 0.414985590778098, + "grad_norm": 1.0860449740836524, + "learning_rate": 1.867672406529474e-06, + "loss": 0.4136649966239929, + "step": 1800 + }, + { + "epoch": 0.41521613832853027, + "grad_norm": 1.0694509491683928, + "learning_rate": 1.8674828506551164e-06, + "loss": 0.539253294467926, + "step": 1801 + }, + { + "epoch": 0.41544668587896255, + "grad_norm": 1.05460638777189, + "learning_rate": 1.867293168744525e-06, + "loss": 0.5144013166427612, + "step": 1802 + }, + { + "epoch": 0.41567723342939483, + "grad_norm": 1.4412084541043333, + "learning_rate": 1.8671033608252583e-06, + "loss": 0.5293606519699097, + "step": 1803 + }, + { + "epoch": 0.4159077809798271, + "grad_norm": 1.2991891107249438, + "learning_rate": 1.8669134269248933e-06, + "loss": 0.5156667232513428, + "step": 1804 + }, + { + "epoch": 0.4161383285302594, + "grad_norm": 1.459589715601901, + "learning_rate": 1.8667233670710258e-06, + "loss": 0.5277712345123291, + "step": 1805 + }, + { + "epoch": 0.4163688760806916, + "grad_norm": 1.109858006621566, + "learning_rate": 1.8665331812912699e-06, + "loss": 0.5984486937522888, + "step": 1806 + }, + { + "epoch": 0.4165994236311239, + "grad_norm": 1.1513901451974835, + "learning_rate": 1.8663428696132567e-06, + "loss": 0.626596212387085, + "step": 1807 + }, + { + "epoch": 0.4168299711815562, + "grad_norm": 1.1512225176078512, + "learning_rate": 1.866152432064637e-06, + "loss": 0.5107407569885254, + "step": 1808 + }, + { + "epoch": 0.41706051873198846, + "grad_norm": 1.0987389433084875, + "learning_rate": 1.8659618686730794e-06, + "loss": 0.5654654502868652, + "step": 1809 + }, + { + "epoch": 0.41729106628242074, + "grad_norm": 1.0577605574421043, + "learning_rate": 1.8657711794662706e-06, + "loss": 0.49788808822631836, + "step": 1810 + }, + { + "epoch": 0.417521613832853, + "grad_norm": 0.9387772619602837, + "learning_rate": 1.8655803644719158e-06, + "loss": 0.5334138870239258, + "step": 1811 + }, + { + "epoch": 0.4177521613832853, + "grad_norm": 1.143732906797826, + "learning_rate": 1.8653894237177387e-06, + "loss": 0.5429027080535889, + "step": 1812 + }, + { + "epoch": 0.4179827089337176, + "grad_norm": 1.2110297581702978, + "learning_rate": 1.8651983572314806e-06, + "loss": 0.5475035905838013, + "step": 1813 + }, + { + "epoch": 0.41821325648414986, + "grad_norm": 0.8994137789901391, + "learning_rate": 1.8650071650409021e-06, + "loss": 0.5310901403427124, + "step": 1814 + }, + { + "epoch": 0.41844380403458215, + "grad_norm": 1.1148250587589583, + "learning_rate": 1.8648158471737806e-06, + "loss": 0.5586632490158081, + "step": 1815 + }, + { + "epoch": 0.4186743515850144, + "grad_norm": 1.1882298726835794, + "learning_rate": 1.8646244036579132e-06, + "loss": 0.5620261430740356, + "step": 1816 + }, + { + "epoch": 0.4189048991354467, + "grad_norm": 1.1768360700686766, + "learning_rate": 1.8644328345211141e-06, + "loss": 0.46965062618255615, + "step": 1817 + }, + { + "epoch": 0.419135446685879, + "grad_norm": 1.0420774163319328, + "learning_rate": 1.864241139791217e-06, + "loss": 0.6193602085113525, + "step": 1818 + }, + { + "epoch": 0.4193659942363112, + "grad_norm": 1.4471262040360602, + "learning_rate": 1.8640493194960726e-06, + "loss": 0.555870532989502, + "step": 1819 + }, + { + "epoch": 0.4195965417867435, + "grad_norm": 1.4906810572373932, + "learning_rate": 1.863857373663551e-06, + "loss": 0.6341157555580139, + "step": 1820 + }, + { + "epoch": 0.4198270893371758, + "grad_norm": 1.0990483949708958, + "learning_rate": 1.8636653023215392e-06, + "loss": 0.5381972789764404, + "step": 1821 + }, + { + "epoch": 0.42005763688760805, + "grad_norm": 1.0324608703235092, + "learning_rate": 1.8634731054979435e-06, + "loss": 0.6126211881637573, + "step": 1822 + }, + { + "epoch": 0.42028818443804034, + "grad_norm": 1.2532481877249375, + "learning_rate": 1.8632807832206884e-06, + "loss": 0.5185278058052063, + "step": 1823 + }, + { + "epoch": 0.4205187319884726, + "grad_norm": 1.179777187779905, + "learning_rate": 1.8630883355177156e-06, + "loss": 0.5594161748886108, + "step": 1824 + }, + { + "epoch": 0.4207492795389049, + "grad_norm": 1.0743944299141273, + "learning_rate": 1.8628957624169863e-06, + "loss": 0.46694353222846985, + "step": 1825 + }, + { + "epoch": 0.4209798270893372, + "grad_norm": 0.9352998625668567, + "learning_rate": 1.8627030639464794e-06, + "loss": 0.5055704116821289, + "step": 1826 + }, + { + "epoch": 0.42121037463976946, + "grad_norm": 0.956496667483655, + "learning_rate": 1.8625102401341919e-06, + "loss": 0.5006394386291504, + "step": 1827 + }, + { + "epoch": 0.42144092219020174, + "grad_norm": 1.0724067921691902, + "learning_rate": 1.8623172910081388e-06, + "loss": 0.4366666078567505, + "step": 1828 + }, + { + "epoch": 0.421671469740634, + "grad_norm": 1.1988055621631712, + "learning_rate": 1.8621242165963539e-06, + "loss": 0.521892786026001, + "step": 1829 + }, + { + "epoch": 0.4219020172910663, + "grad_norm": 0.9264285940430287, + "learning_rate": 1.8619310169268889e-06, + "loss": 0.4993744492530823, + "step": 1830 + }, + { + "epoch": 0.4221325648414986, + "grad_norm": 1.127882483842638, + "learning_rate": 1.8617376920278134e-06, + "loss": 0.5524100065231323, + "step": 1831 + }, + { + "epoch": 0.42236311239193086, + "grad_norm": 1.0452957425401903, + "learning_rate": 1.8615442419272158e-06, + "loss": 0.5477861166000366, + "step": 1832 + }, + { + "epoch": 0.4225936599423631, + "grad_norm": 1.1661409117320833, + "learning_rate": 1.8613506666532026e-06, + "loss": 0.534308910369873, + "step": 1833 + }, + { + "epoch": 0.42282420749279537, + "grad_norm": 0.937996924949033, + "learning_rate": 1.861156966233898e-06, + "loss": 0.537063479423523, + "step": 1834 + }, + { + "epoch": 0.42305475504322765, + "grad_norm": 1.238788764410924, + "learning_rate": 1.8609631406974441e-06, + "loss": 0.5953266620635986, + "step": 1835 + }, + { + "epoch": 0.42328530259365993, + "grad_norm": 1.0946370940172376, + "learning_rate": 1.8607691900720028e-06, + "loss": 0.5244371891021729, + "step": 1836 + }, + { + "epoch": 0.4235158501440922, + "grad_norm": 0.9951162728945512, + "learning_rate": 1.8605751143857525e-06, + "loss": 0.5036677122116089, + "step": 1837 + }, + { + "epoch": 0.4237463976945245, + "grad_norm": 1.2314528252971808, + "learning_rate": 1.8603809136668901e-06, + "loss": 0.5777844190597534, + "step": 1838 + }, + { + "epoch": 0.42397694524495677, + "grad_norm": 1.0285277747549548, + "learning_rate": 1.8601865879436315e-06, + "loss": 0.5430940985679626, + "step": 1839 + }, + { + "epoch": 0.42420749279538905, + "grad_norm": 0.992815671689248, + "learning_rate": 1.8599921372442101e-06, + "loss": 0.5749884843826294, + "step": 1840 + }, + { + "epoch": 0.42443804034582133, + "grad_norm": 1.115697410389442, + "learning_rate": 1.8597975615968778e-06, + "loss": 0.6325528621673584, + "step": 1841 + }, + { + "epoch": 0.4246685878962536, + "grad_norm": 1.165073474333435, + "learning_rate": 1.8596028610299037e-06, + "loss": 0.498636931180954, + "step": 1842 + }, + { + "epoch": 0.4248991354466859, + "grad_norm": 1.0192254593494965, + "learning_rate": 1.8594080355715763e-06, + "loss": 0.47933921217918396, + "step": 1843 + }, + { + "epoch": 0.4251296829971182, + "grad_norm": 1.0742766403042006, + "learning_rate": 1.8592130852502015e-06, + "loss": 0.560950756072998, + "step": 1844 + }, + { + "epoch": 0.42536023054755046, + "grad_norm": 1.0761528552561603, + "learning_rate": 1.8590180100941035e-06, + "loss": 0.4497816562652588, + "step": 1845 + }, + { + "epoch": 0.42559077809798274, + "grad_norm": 1.043194198187338, + "learning_rate": 1.858822810131625e-06, + "loss": 0.5248370170593262, + "step": 1846 + }, + { + "epoch": 0.42582132564841496, + "grad_norm": 1.2610988442234758, + "learning_rate": 1.8586274853911263e-06, + "loss": 0.5351696014404297, + "step": 1847 + }, + { + "epoch": 0.42605187319884724, + "grad_norm": 0.9804766807097018, + "learning_rate": 1.8584320359009861e-06, + "loss": 0.4677377939224243, + "step": 1848 + }, + { + "epoch": 0.4262824207492795, + "grad_norm": 1.0904414335677286, + "learning_rate": 1.8582364616896014e-06, + "loss": 0.5866556763648987, + "step": 1849 + }, + { + "epoch": 0.4265129682997118, + "grad_norm": 1.0169583686468386, + "learning_rate": 1.8580407627853864e-06, + "loss": 0.5760546922683716, + "step": 1850 + }, + { + "epoch": 0.4267435158501441, + "grad_norm": 1.2888742381353209, + "learning_rate": 1.8578449392167749e-06, + "loss": 0.5178868174552917, + "step": 1851 + }, + { + "epoch": 0.42697406340057636, + "grad_norm": 1.102440281004502, + "learning_rate": 1.8576489910122178e-06, + "loss": 0.546269416809082, + "step": 1852 + }, + { + "epoch": 0.42720461095100865, + "grad_norm": 1.18487190612363, + "learning_rate": 1.8574529182001838e-06, + "loss": 0.5659330487251282, + "step": 1853 + }, + { + "epoch": 0.4274351585014409, + "grad_norm": 1.2802501916805824, + "learning_rate": 1.8572567208091612e-06, + "loss": 0.6241079568862915, + "step": 1854 + }, + { + "epoch": 0.4276657060518732, + "grad_norm": 1.1080199645869717, + "learning_rate": 1.8570603988676545e-06, + "loss": 0.4504891633987427, + "step": 1855 + }, + { + "epoch": 0.4278962536023055, + "grad_norm": 1.5835233717071364, + "learning_rate": 1.856863952404188e-06, + "loss": 0.5238963961601257, + "step": 1856 + }, + { + "epoch": 0.42812680115273777, + "grad_norm": 0.963983780535358, + "learning_rate": 1.8566673814473027e-06, + "loss": 0.45989540219306946, + "step": 1857 + }, + { + "epoch": 0.42835734870317005, + "grad_norm": 1.1207149880393046, + "learning_rate": 1.856470686025559e-06, + "loss": 0.5468976497650146, + "step": 1858 + }, + { + "epoch": 0.42858789625360233, + "grad_norm": 1.1600381921463452, + "learning_rate": 1.8562738661675342e-06, + "loss": 0.5318598747253418, + "step": 1859 + }, + { + "epoch": 0.42881844380403455, + "grad_norm": 1.2406291189558882, + "learning_rate": 1.856076921901824e-06, + "loss": 0.5824429988861084, + "step": 1860 + }, + { + "epoch": 0.42904899135446684, + "grad_norm": 1.352026717175643, + "learning_rate": 1.855879853257043e-06, + "loss": 0.5112953186035156, + "step": 1861 + }, + { + "epoch": 0.4292795389048991, + "grad_norm": 1.1707177653661884, + "learning_rate": 1.8556826602618228e-06, + "loss": 0.4736165404319763, + "step": 1862 + }, + { + "epoch": 0.4295100864553314, + "grad_norm": 1.0256476637781957, + "learning_rate": 1.8554853429448132e-06, + "loss": 0.5070540308952332, + "step": 1863 + }, + { + "epoch": 0.4297406340057637, + "grad_norm": 1.323605946142347, + "learning_rate": 1.855287901334683e-06, + "loss": 0.5629868507385254, + "step": 1864 + }, + { + "epoch": 0.42997118155619596, + "grad_norm": 1.2516434837227657, + "learning_rate": 1.8550903354601178e-06, + "loss": 0.556925892829895, + "step": 1865 + }, + { + "epoch": 0.43020172910662824, + "grad_norm": 1.197839400328371, + "learning_rate": 1.8548926453498228e-06, + "loss": 0.5992434024810791, + "step": 1866 + }, + { + "epoch": 0.4304322766570605, + "grad_norm": 1.0775701233448667, + "learning_rate": 1.8546948310325195e-06, + "loss": 0.5160760879516602, + "step": 1867 + }, + { + "epoch": 0.4306628242074928, + "grad_norm": 1.4261590730738651, + "learning_rate": 1.8544968925369479e-06, + "loss": 0.4804280996322632, + "step": 1868 + }, + { + "epoch": 0.4308933717579251, + "grad_norm": 1.0367943571108267, + "learning_rate": 1.8542988298918675e-06, + "loss": 0.546431303024292, + "step": 1869 + }, + { + "epoch": 0.43112391930835736, + "grad_norm": 1.1416319892947229, + "learning_rate": 1.8541006431260542e-06, + "loss": 0.504807710647583, + "step": 1870 + }, + { + "epoch": 0.43135446685878964, + "grad_norm": 0.9126882501045127, + "learning_rate": 1.8539023322683023e-06, + "loss": 0.5243191719055176, + "step": 1871 + }, + { + "epoch": 0.4315850144092219, + "grad_norm": 1.0353650163291783, + "learning_rate": 1.8537038973474245e-06, + "loss": 0.5665335655212402, + "step": 1872 + }, + { + "epoch": 0.4318155619596542, + "grad_norm": 0.9834001102421943, + "learning_rate": 1.8535053383922516e-06, + "loss": 0.5381483435630798, + "step": 1873 + }, + { + "epoch": 0.43204610951008643, + "grad_norm": 1.1487056035004923, + "learning_rate": 1.8533066554316317e-06, + "loss": 0.6149561405181885, + "step": 1874 + }, + { + "epoch": 0.4322766570605187, + "grad_norm": 1.2246632391367256, + "learning_rate": 1.8531078484944315e-06, + "loss": 0.46560800075531006, + "step": 1875 + }, + { + "epoch": 0.432507204610951, + "grad_norm": 1.1446369126118372, + "learning_rate": 1.8529089176095356e-06, + "loss": 0.5057603120803833, + "step": 1876 + }, + { + "epoch": 0.43273775216138327, + "grad_norm": 1.4054085121564255, + "learning_rate": 1.8527098628058467e-06, + "loss": 0.5998879671096802, + "step": 1877 + }, + { + "epoch": 0.43296829971181555, + "grad_norm": 1.280386466022756, + "learning_rate": 1.852510684112285e-06, + "loss": 0.4326424300670624, + "step": 1878 + }, + { + "epoch": 0.43319884726224783, + "grad_norm": 1.3156502516901807, + "learning_rate": 1.8523113815577898e-06, + "loss": 0.5775609016418457, + "step": 1879 + }, + { + "epoch": 0.4334293948126801, + "grad_norm": 1.1983240449158061, + "learning_rate": 1.852111955171317e-06, + "loss": 0.4824531674385071, + "step": 1880 + }, + { + "epoch": 0.4336599423631124, + "grad_norm": 1.0538528260927602, + "learning_rate": 1.8519124049818415e-06, + "loss": 0.5059521198272705, + "step": 1881 + }, + { + "epoch": 0.4338904899135447, + "grad_norm": 1.3615012333103178, + "learning_rate": 1.851712731018356e-06, + "loss": 0.605829119682312, + "step": 1882 + }, + { + "epoch": 0.43412103746397696, + "grad_norm": 1.1443647209997476, + "learning_rate": 1.8515129333098707e-06, + "loss": 0.5453581809997559, + "step": 1883 + }, + { + "epoch": 0.43435158501440924, + "grad_norm": 0.9094855258644963, + "learning_rate": 1.8513130118854144e-06, + "loss": 0.4414307475090027, + "step": 1884 + }, + { + "epoch": 0.4345821325648415, + "grad_norm": 1.187455945175179, + "learning_rate": 1.8511129667740333e-06, + "loss": 0.46538764238357544, + "step": 1885 + }, + { + "epoch": 0.4348126801152738, + "grad_norm": 1.2074916052806453, + "learning_rate": 1.8509127980047925e-06, + "loss": 0.5243799686431885, + "step": 1886 + }, + { + "epoch": 0.4350432276657061, + "grad_norm": 1.1750450903566612, + "learning_rate": 1.8507125056067736e-06, + "loss": 0.49447011947631836, + "step": 1887 + }, + { + "epoch": 0.4352737752161383, + "grad_norm": 1.2669723260527122, + "learning_rate": 1.8505120896090775e-06, + "loss": 0.5576674342155457, + "step": 1888 + }, + { + "epoch": 0.4355043227665706, + "grad_norm": 1.252977351011341, + "learning_rate": 1.8503115500408226e-06, + "loss": 0.534508466720581, + "step": 1889 + }, + { + "epoch": 0.43573487031700286, + "grad_norm": 1.1653246216034023, + "learning_rate": 1.8501108869311452e-06, + "loss": 0.6251751184463501, + "step": 1890 + }, + { + "epoch": 0.43596541786743515, + "grad_norm": 1.248545901064603, + "learning_rate": 1.8499101003091993e-06, + "loss": 0.5061008334159851, + "step": 1891 + }, + { + "epoch": 0.4361959654178674, + "grad_norm": 1.3943513634974303, + "learning_rate": 1.8497091902041573e-06, + "loss": 0.5893880128860474, + "step": 1892 + }, + { + "epoch": 0.4364265129682997, + "grad_norm": 1.1272604118781044, + "learning_rate": 1.8495081566452093e-06, + "loss": 0.4692481458187103, + "step": 1893 + }, + { + "epoch": 0.436657060518732, + "grad_norm": 1.1428179360440205, + "learning_rate": 1.8493069996615633e-06, + "loss": 0.5942026376724243, + "step": 1894 + }, + { + "epoch": 0.43688760806916427, + "grad_norm": 1.006636208596501, + "learning_rate": 1.8491057192824456e-06, + "loss": 0.45053642988204956, + "step": 1895 + }, + { + "epoch": 0.43711815561959655, + "grad_norm": 1.2982117381258422, + "learning_rate": 1.8489043155371e-06, + "loss": 0.5858089327812195, + "step": 1896 + }, + { + "epoch": 0.43734870317002883, + "grad_norm": 1.4148475744082645, + "learning_rate": 1.8487027884547878e-06, + "loss": 0.5926138162612915, + "step": 1897 + }, + { + "epoch": 0.4375792507204611, + "grad_norm": 1.1520460405179238, + "learning_rate": 1.8485011380647898e-06, + "loss": 0.5945650339126587, + "step": 1898 + }, + { + "epoch": 0.4378097982708934, + "grad_norm": 1.082365437697179, + "learning_rate": 1.8482993643964033e-06, + "loss": 0.47609788179397583, + "step": 1899 + }, + { + "epoch": 0.43804034582132567, + "grad_norm": 1.1243277241033836, + "learning_rate": 1.8480974674789435e-06, + "loss": 0.53432697057724, + "step": 1900 + }, + { + "epoch": 0.43827089337175795, + "grad_norm": 1.1561114509211146, + "learning_rate": 1.8478954473417448e-06, + "loss": 0.5097007155418396, + "step": 1901 + }, + { + "epoch": 0.4385014409221902, + "grad_norm": 1.2348452315788374, + "learning_rate": 1.8476933040141573e-06, + "loss": 0.4931800365447998, + "step": 1902 + }, + { + "epoch": 0.43873198847262246, + "grad_norm": 1.08234776361571, + "learning_rate": 1.8474910375255516e-06, + "loss": 0.4183500409126282, + "step": 1903 + }, + { + "epoch": 0.43896253602305474, + "grad_norm": 1.4177312445162358, + "learning_rate": 1.8472886479053144e-06, + "loss": 0.5541513562202454, + "step": 1904 + }, + { + "epoch": 0.439193083573487, + "grad_norm": 1.2624803894699181, + "learning_rate": 1.8470861351828508e-06, + "loss": 0.5068531632423401, + "step": 1905 + }, + { + "epoch": 0.4394236311239193, + "grad_norm": 1.2393049802147562, + "learning_rate": 1.8468834993875837e-06, + "loss": 0.5673441886901855, + "step": 1906 + }, + { + "epoch": 0.4396541786743516, + "grad_norm": 1.243028720976925, + "learning_rate": 1.8466807405489543e-06, + "loss": 0.5551744699478149, + "step": 1907 + }, + { + "epoch": 0.43988472622478386, + "grad_norm": 1.2837612370283014, + "learning_rate": 1.846477858696421e-06, + "loss": 0.5674556493759155, + "step": 1908 + }, + { + "epoch": 0.44011527377521614, + "grad_norm": 1.1781882177034535, + "learning_rate": 1.8462748538594606e-06, + "loss": 0.5947737097740173, + "step": 1909 + }, + { + "epoch": 0.4403458213256484, + "grad_norm": 1.1459887068925942, + "learning_rate": 1.8460717260675675e-06, + "loss": 0.4657576084136963, + "step": 1910 + }, + { + "epoch": 0.4405763688760807, + "grad_norm": 1.1740915126815885, + "learning_rate": 1.8458684753502541e-06, + "loss": 0.5272006988525391, + "step": 1911 + }, + { + "epoch": 0.440806916426513, + "grad_norm": 0.9441836906401117, + "learning_rate": 1.8456651017370507e-06, + "loss": 0.43320992588996887, + "step": 1912 + }, + { + "epoch": 0.44103746397694527, + "grad_norm": 1.303994087100911, + "learning_rate": 1.8454616052575051e-06, + "loss": 0.5666035413742065, + "step": 1913 + }, + { + "epoch": 0.44126801152737755, + "grad_norm": 1.207816046586181, + "learning_rate": 1.845257985941184e-06, + "loss": 0.4790865182876587, + "step": 1914 + }, + { + "epoch": 0.44149855907780977, + "grad_norm": 1.011869278505435, + "learning_rate": 1.8450542438176702e-06, + "loss": 0.47281613945961, + "step": 1915 + }, + { + "epoch": 0.44172910662824205, + "grad_norm": 1.176772034844018, + "learning_rate": 1.8448503789165656e-06, + "loss": 0.460035115480423, + "step": 1916 + }, + { + "epoch": 0.44195965417867433, + "grad_norm": 1.422851587462456, + "learning_rate": 1.8446463912674898e-06, + "loss": 0.5391891002655029, + "step": 1917 + }, + { + "epoch": 0.4421902017291066, + "grad_norm": 1.0200200858868254, + "learning_rate": 1.84444228090008e-06, + "loss": 0.5077770352363586, + "step": 1918 + }, + { + "epoch": 0.4424207492795389, + "grad_norm": 1.1154128808888242, + "learning_rate": 1.8442380478439914e-06, + "loss": 0.4816160202026367, + "step": 1919 + }, + { + "epoch": 0.4426512968299712, + "grad_norm": 1.440872793963344, + "learning_rate": 1.844033692128897e-06, + "loss": 0.5676149725914001, + "step": 1920 + }, + { + "epoch": 0.44288184438040346, + "grad_norm": 1.2363025568528492, + "learning_rate": 1.843829213784487e-06, + "loss": 0.5344497561454773, + "step": 1921 + }, + { + "epoch": 0.44311239193083574, + "grad_norm": 0.9579491754815904, + "learning_rate": 1.843624612840471e-06, + "loss": 0.48390740156173706, + "step": 1922 + }, + { + "epoch": 0.443342939481268, + "grad_norm": 0.9939274394683353, + "learning_rate": 1.8434198893265744e-06, + "loss": 0.47681474685668945, + "step": 1923 + }, + { + "epoch": 0.4435734870317003, + "grad_norm": 1.3607634076805728, + "learning_rate": 1.843215043272542e-06, + "loss": 0.5424403548240662, + "step": 1924 + }, + { + "epoch": 0.4438040345821326, + "grad_norm": 1.132944592162935, + "learning_rate": 1.8430100747081357e-06, + "loss": 0.5096845030784607, + "step": 1925 + }, + { + "epoch": 0.44403458213256486, + "grad_norm": 1.2412079673939016, + "learning_rate": 1.842804983663135e-06, + "loss": 0.4748343229293823, + "step": 1926 + }, + { + "epoch": 0.44426512968299714, + "grad_norm": 1.1920300276188596, + "learning_rate": 1.8425997701673377e-06, + "loss": 0.498948335647583, + "step": 1927 + }, + { + "epoch": 0.4444956772334294, + "grad_norm": 1.2541237667041538, + "learning_rate": 1.842394434250559e-06, + "loss": 0.5499871969223022, + "step": 1928 + }, + { + "epoch": 0.44472622478386165, + "grad_norm": 1.0416927811353323, + "learning_rate": 1.8421889759426327e-06, + "loss": 0.5115629434585571, + "step": 1929 + }, + { + "epoch": 0.4449567723342939, + "grad_norm": 1.125699371509029, + "learning_rate": 1.841983395273409e-06, + "loss": 0.6036213636398315, + "step": 1930 + }, + { + "epoch": 0.4451873198847262, + "grad_norm": 1.0090846446480575, + "learning_rate": 1.8417776922727572e-06, + "loss": 0.5032718777656555, + "step": 1931 + }, + { + "epoch": 0.4454178674351585, + "grad_norm": 1.094758440725449, + "learning_rate": 1.8415718669705633e-06, + "loss": 0.5102940797805786, + "step": 1932 + }, + { + "epoch": 0.44564841498559077, + "grad_norm": 1.242891540202676, + "learning_rate": 1.8413659193967322e-06, + "loss": 0.5087441205978394, + "step": 1933 + }, + { + "epoch": 0.44587896253602305, + "grad_norm": 1.0962110976461552, + "learning_rate": 1.841159849581185e-06, + "loss": 0.49124574661254883, + "step": 1934 + }, + { + "epoch": 0.44610951008645533, + "grad_norm": 1.065512972031945, + "learning_rate": 1.8409536575538627e-06, + "loss": 0.4788215160369873, + "step": 1935 + }, + { + "epoch": 0.4463400576368876, + "grad_norm": 1.0673448954130573, + "learning_rate": 1.8407473433447218e-06, + "loss": 0.47166967391967773, + "step": 1936 + }, + { + "epoch": 0.4465706051873199, + "grad_norm": 1.2748997262930661, + "learning_rate": 1.840540906983738e-06, + "loss": 0.5045751333236694, + "step": 1937 + }, + { + "epoch": 0.44680115273775217, + "grad_norm": 1.286853008681351, + "learning_rate": 1.8403343485009044e-06, + "loss": 0.45810800790786743, + "step": 1938 + }, + { + "epoch": 0.44703170028818445, + "grad_norm": 1.1970009042005485, + "learning_rate": 1.840127667926232e-06, + "loss": 0.5283209085464478, + "step": 1939 + }, + { + "epoch": 0.44726224783861673, + "grad_norm": 1.1262937811137792, + "learning_rate": 1.8399208652897492e-06, + "loss": 0.5559916496276855, + "step": 1940 + }, + { + "epoch": 0.447492795389049, + "grad_norm": 1.1722580551533806, + "learning_rate": 1.839713940621502e-06, + "loss": 0.5017634034156799, + "step": 1941 + }, + { + "epoch": 0.4477233429394813, + "grad_norm": 1.1582944210519264, + "learning_rate": 1.8395068939515545e-06, + "loss": 0.4802021384239197, + "step": 1942 + }, + { + "epoch": 0.4479538904899135, + "grad_norm": 1.3831202159733433, + "learning_rate": 1.8392997253099887e-06, + "loss": 0.610235333442688, + "step": 1943 + }, + { + "epoch": 0.4481844380403458, + "grad_norm": 1.1590182995459894, + "learning_rate": 1.839092434726904e-06, + "loss": 0.5174393057823181, + "step": 1944 + }, + { + "epoch": 0.4484149855907781, + "grad_norm": 1.3203957347602924, + "learning_rate": 1.8388850222324171e-06, + "loss": 0.5430256128311157, + "step": 1945 + }, + { + "epoch": 0.44864553314121036, + "grad_norm": 1.102963606742874, + "learning_rate": 1.8386774878566635e-06, + "loss": 0.5431778430938721, + "step": 1946 + }, + { + "epoch": 0.44887608069164264, + "grad_norm": 1.2399915730702564, + "learning_rate": 1.8384698316297952e-06, + "loss": 0.5374635457992554, + "step": 1947 + }, + { + "epoch": 0.4491066282420749, + "grad_norm": 1.294777639188434, + "learning_rate": 1.8382620535819831e-06, + "loss": 0.5724903345108032, + "step": 1948 + }, + { + "epoch": 0.4493371757925072, + "grad_norm": 1.1486927176754915, + "learning_rate": 1.8380541537434148e-06, + "loss": 0.6397042274475098, + "step": 1949 + }, + { + "epoch": 0.4495677233429395, + "grad_norm": 1.2579568239218235, + "learning_rate": 1.8378461321442961e-06, + "loss": 0.6346575021743774, + "step": 1950 + }, + { + "epoch": 0.44979827089337177, + "grad_norm": 1.3678754797352473, + "learning_rate": 1.83763798881485e-06, + "loss": 0.5354228019714355, + "step": 1951 + }, + { + "epoch": 0.45002881844380405, + "grad_norm": 1.05476245879278, + "learning_rate": 1.8374297237853185e-06, + "loss": 0.42358189821243286, + "step": 1952 + }, + { + "epoch": 0.4502593659942363, + "grad_norm": 1.182016543993005, + "learning_rate": 1.8372213370859592e-06, + "loss": 0.5547488331794739, + "step": 1953 + }, + { + "epoch": 0.4504899135446686, + "grad_norm": 1.3456900721260578, + "learning_rate": 1.8370128287470493e-06, + "loss": 0.5722674131393433, + "step": 1954 + }, + { + "epoch": 0.4507204610951009, + "grad_norm": 1.16212266758737, + "learning_rate": 1.8368041987988824e-06, + "loss": 0.46524208784103394, + "step": 1955 + }, + { + "epoch": 0.4509510086455331, + "grad_norm": 1.0688055041729316, + "learning_rate": 1.836595447271771e-06, + "loss": 0.4921358525753021, + "step": 1956 + }, + { + "epoch": 0.4511815561959654, + "grad_norm": 1.3682259724330377, + "learning_rate": 1.8363865741960436e-06, + "loss": 0.4774383008480072, + "step": 1957 + }, + { + "epoch": 0.4514121037463977, + "grad_norm": 1.3347835036444615, + "learning_rate": 1.8361775796020481e-06, + "loss": 0.5653456449508667, + "step": 1958 + }, + { + "epoch": 0.45164265129682996, + "grad_norm": 1.1677588385218338, + "learning_rate": 1.8359684635201487e-06, + "loss": 0.49100229144096375, + "step": 1959 + }, + { + "epoch": 0.45187319884726224, + "grad_norm": 1.152014645660814, + "learning_rate": 1.8357592259807276e-06, + "loss": 0.5214860439300537, + "step": 1960 + }, + { + "epoch": 0.4521037463976945, + "grad_norm": 1.3874409389020983, + "learning_rate": 1.8355498670141859e-06, + "loss": 0.5328176021575928, + "step": 1961 + }, + { + "epoch": 0.4523342939481268, + "grad_norm": 1.4332841300178276, + "learning_rate": 1.83534038665094e-06, + "loss": 0.5857157111167908, + "step": 1962 + }, + { + "epoch": 0.4525648414985591, + "grad_norm": 1.2855323158344572, + "learning_rate": 1.8351307849214258e-06, + "loss": 0.6459437608718872, + "step": 1963 + }, + { + "epoch": 0.45279538904899136, + "grad_norm": 1.2744824051695016, + "learning_rate": 1.8349210618560967e-06, + "loss": 0.6034260988235474, + "step": 1964 + }, + { + "epoch": 0.45302593659942364, + "grad_norm": 1.3214790421577447, + "learning_rate": 1.8347112174854224e-06, + "loss": 0.49320968985557556, + "step": 1965 + }, + { + "epoch": 0.4532564841498559, + "grad_norm": 1.1125399152311024, + "learning_rate": 1.834501251839892e-06, + "loss": 0.4928455352783203, + "step": 1966 + }, + { + "epoch": 0.4534870317002882, + "grad_norm": 1.092912119431471, + "learning_rate": 1.8342911649500104e-06, + "loss": 0.4440082013607025, + "step": 1967 + }, + { + "epoch": 0.4537175792507205, + "grad_norm": 1.129522243569259, + "learning_rate": 1.8340809568463016e-06, + "loss": 0.4822162389755249, + "step": 1968 + }, + { + "epoch": 0.45394812680115276, + "grad_norm": 1.2084769396869046, + "learning_rate": 1.8338706275593066e-06, + "loss": 0.5176507830619812, + "step": 1969 + }, + { + "epoch": 0.454178674351585, + "grad_norm": 1.0817164261968486, + "learning_rate": 1.8336601771195839e-06, + "loss": 0.49748751521110535, + "step": 1970 + }, + { + "epoch": 0.45440922190201727, + "grad_norm": 1.4007065984620797, + "learning_rate": 1.83344960555771e-06, + "loss": 0.6159261465072632, + "step": 1971 + }, + { + "epoch": 0.45463976945244955, + "grad_norm": 1.0943694323176245, + "learning_rate": 1.8332389129042784e-06, + "loss": 0.4831198453903198, + "step": 1972 + }, + { + "epoch": 0.45487031700288183, + "grad_norm": 1.0823216779103817, + "learning_rate": 1.833028099189901e-06, + "loss": 0.5536303520202637, + "step": 1973 + }, + { + "epoch": 0.4551008645533141, + "grad_norm": 1.0841447387467815, + "learning_rate": 1.8328171644452067e-06, + "loss": 0.5078235268592834, + "step": 1974 + }, + { + "epoch": 0.4553314121037464, + "grad_norm": 1.2162049122078602, + "learning_rate": 1.8326061087008418e-06, + "loss": 0.37912893295288086, + "step": 1975 + }, + { + "epoch": 0.45556195965417867, + "grad_norm": 1.1731934530051857, + "learning_rate": 1.8323949319874708e-06, + "loss": 0.4849812090396881, + "step": 1976 + }, + { + "epoch": 0.45579250720461095, + "grad_norm": 1.3841903464729448, + "learning_rate": 1.8321836343357752e-06, + "loss": 0.4759640097618103, + "step": 1977 + }, + { + "epoch": 0.45602305475504323, + "grad_norm": 1.1855814813320218, + "learning_rate": 1.8319722157764549e-06, + "loss": 0.5319406986236572, + "step": 1978 + }, + { + "epoch": 0.4562536023054755, + "grad_norm": 1.1795216044414802, + "learning_rate": 1.8317606763402265e-06, + "loss": 0.4678229093551636, + "step": 1979 + }, + { + "epoch": 0.4564841498559078, + "grad_norm": 1.1514814772775641, + "learning_rate": 1.8315490160578243e-06, + "loss": 0.5077648162841797, + "step": 1980 + }, + { + "epoch": 0.4567146974063401, + "grad_norm": 1.123179531902537, + "learning_rate": 1.8313372349600002e-06, + "loss": 0.5307132005691528, + "step": 1981 + }, + { + "epoch": 0.45694524495677236, + "grad_norm": 1.2210423439646636, + "learning_rate": 1.8311253330775247e-06, + "loss": 0.5892186760902405, + "step": 1982 + }, + { + "epoch": 0.45717579250720464, + "grad_norm": 1.1778654127023787, + "learning_rate": 1.830913310441184e-06, + "loss": 0.5626486539840698, + "step": 1983 + }, + { + "epoch": 0.45740634005763686, + "grad_norm": 0.999814167949733, + "learning_rate": 1.830701167081783e-06, + "loss": 0.4591505825519562, + "step": 1984 + }, + { + "epoch": 0.45763688760806914, + "grad_norm": 1.4041250876438491, + "learning_rate": 1.8304889030301442e-06, + "loss": 0.47362181544303894, + "step": 1985 + }, + { + "epoch": 0.4578674351585014, + "grad_norm": 1.1193514072723911, + "learning_rate": 1.8302765183171071e-06, + "loss": 0.5645753145217896, + "step": 1986 + }, + { + "epoch": 0.4580979827089337, + "grad_norm": 1.3651592876994867, + "learning_rate": 1.8300640129735294e-06, + "loss": 0.47337716817855835, + "step": 1987 + }, + { + "epoch": 0.458328530259366, + "grad_norm": 1.3024146234369909, + "learning_rate": 1.8298513870302852e-06, + "loss": 0.6080120801925659, + "step": 1988 + }, + { + "epoch": 0.45855907780979827, + "grad_norm": 1.126672680817471, + "learning_rate": 1.8296386405182673e-06, + "loss": 0.5114408135414124, + "step": 1989 + }, + { + "epoch": 0.45878962536023055, + "grad_norm": 1.4785881214685916, + "learning_rate": 1.8294257734683857e-06, + "loss": 0.5666244029998779, + "step": 1990 + }, + { + "epoch": 0.4590201729106628, + "grad_norm": 1.3569918428175534, + "learning_rate": 1.8292127859115674e-06, + "loss": 0.4955924153327942, + "step": 1991 + }, + { + "epoch": 0.4592507204610951, + "grad_norm": 1.124675073336242, + "learning_rate": 1.8289996778787575e-06, + "loss": 0.5668392181396484, + "step": 1992 + }, + { + "epoch": 0.4594812680115274, + "grad_norm": 1.0789902638202715, + "learning_rate": 1.828786449400918e-06, + "loss": 0.5244185328483582, + "step": 1993 + }, + { + "epoch": 0.45971181556195967, + "grad_norm": 1.287548290174774, + "learning_rate": 1.8285731005090297e-06, + "loss": 0.6035805940628052, + "step": 1994 + }, + { + "epoch": 0.45994236311239195, + "grad_norm": 1.360909428674116, + "learning_rate": 1.828359631234089e-06, + "loss": 0.5430639982223511, + "step": 1995 + }, + { + "epoch": 0.46017291066282423, + "grad_norm": 1.1164961359354388, + "learning_rate": 1.8281460416071112e-06, + "loss": 0.40246638655662537, + "step": 1996 + }, + { + "epoch": 0.4604034582132565, + "grad_norm": 1.217124013278456, + "learning_rate": 1.8279323316591286e-06, + "loss": 0.522178053855896, + "step": 1997 + }, + { + "epoch": 0.46063400576368874, + "grad_norm": 1.0095360764724484, + "learning_rate": 1.8277185014211911e-06, + "loss": 0.4550439119338989, + "step": 1998 + }, + { + "epoch": 0.460864553314121, + "grad_norm": 1.402214335246735, + "learning_rate": 1.8275045509243659e-06, + "loss": 0.47628867626190186, + "step": 1999 + }, + { + "epoch": 0.4610951008645533, + "grad_norm": 1.2641661070203307, + "learning_rate": 1.8272904801997376e-06, + "loss": 0.514962911605835, + "step": 2000 + }, + { + "epoch": 0.4613256484149856, + "grad_norm": 1.3606266892470449, + "learning_rate": 1.8270762892784086e-06, + "loss": 0.536049485206604, + "step": 2001 + }, + { + "epoch": 0.46155619596541786, + "grad_norm": 1.285847350621859, + "learning_rate": 1.8268619781914989e-06, + "loss": 0.5808422565460205, + "step": 2002 + }, + { + "epoch": 0.46178674351585014, + "grad_norm": 1.124706413285645, + "learning_rate": 1.8266475469701455e-06, + "loss": 0.5651894807815552, + "step": 2003 + }, + { + "epoch": 0.4620172910662824, + "grad_norm": 1.2017589465119973, + "learning_rate": 1.826432995645503e-06, + "loss": 0.4927813708782196, + "step": 2004 + }, + { + "epoch": 0.4622478386167147, + "grad_norm": 1.2175911343424217, + "learning_rate": 1.8262183242487433e-06, + "loss": 0.5447172522544861, + "step": 2005 + }, + { + "epoch": 0.462478386167147, + "grad_norm": 1.182254404157546, + "learning_rate": 1.8260035328110561e-06, + "loss": 0.4893835783004761, + "step": 2006 + }, + { + "epoch": 0.46270893371757926, + "grad_norm": 1.4394250466795038, + "learning_rate": 1.8257886213636483e-06, + "loss": 0.5068031549453735, + "step": 2007 + }, + { + "epoch": 0.46293948126801154, + "grad_norm": 1.0045726883537807, + "learning_rate": 1.8255735899377442e-06, + "loss": 0.45055803656578064, + "step": 2008 + }, + { + "epoch": 0.4631700288184438, + "grad_norm": 1.1822319586036718, + "learning_rate": 1.825358438564586e-06, + "loss": 0.48656368255615234, + "step": 2009 + }, + { + "epoch": 0.4634005763688761, + "grad_norm": 1.058216918397958, + "learning_rate": 1.8251431672754328e-06, + "loss": 0.4219861626625061, + "step": 2010 + }, + { + "epoch": 0.46363112391930833, + "grad_norm": 1.1972282872935691, + "learning_rate": 1.824927776101561e-06, + "loss": 0.5223569869995117, + "step": 2011 + }, + { + "epoch": 0.4638616714697406, + "grad_norm": 1.3462274105768874, + "learning_rate": 1.8247122650742647e-06, + "loss": 0.47259521484375, + "step": 2012 + }, + { + "epoch": 0.4640922190201729, + "grad_norm": 1.1607275951902687, + "learning_rate": 1.8244966342248558e-06, + "loss": 0.5716425180435181, + "step": 2013 + }, + { + "epoch": 0.46432276657060517, + "grad_norm": 1.159709514247773, + "learning_rate": 1.824280883584663e-06, + "loss": 0.5796461701393127, + "step": 2014 + }, + { + "epoch": 0.46455331412103745, + "grad_norm": 1.2308298420460582, + "learning_rate": 1.8240650131850325e-06, + "loss": 0.49397438764572144, + "step": 2015 + }, + { + "epoch": 0.46478386167146973, + "grad_norm": 1.0702319858460478, + "learning_rate": 1.8238490230573285e-06, + "loss": 0.4773065447807312, + "step": 2016 + }, + { + "epoch": 0.465014409221902, + "grad_norm": 1.0911071946042072, + "learning_rate": 1.8236329132329314e-06, + "loss": 0.5000171065330505, + "step": 2017 + }, + { + "epoch": 0.4652449567723343, + "grad_norm": 1.3472772685703793, + "learning_rate": 1.8234166837432403e-06, + "loss": 0.6179405450820923, + "step": 2018 + }, + { + "epoch": 0.4654755043227666, + "grad_norm": 1.1359405392356112, + "learning_rate": 1.823200334619671e-06, + "loss": 0.5591844320297241, + "step": 2019 + }, + { + "epoch": 0.46570605187319886, + "grad_norm": 0.966495776089465, + "learning_rate": 1.8229838658936564e-06, + "loss": 0.5247224569320679, + "step": 2020 + }, + { + "epoch": 0.46593659942363114, + "grad_norm": 1.1427900374179103, + "learning_rate": 1.8227672775966476e-06, + "loss": 0.5076649188995361, + "step": 2021 + }, + { + "epoch": 0.4661671469740634, + "grad_norm": 1.214581270233277, + "learning_rate": 1.822550569760112e-06, + "loss": 0.5224828124046326, + "step": 2022 + }, + { + "epoch": 0.4663976945244957, + "grad_norm": 1.466153109015355, + "learning_rate": 1.822333742415536e-06, + "loss": 0.595023512840271, + "step": 2023 + }, + { + "epoch": 0.466628242074928, + "grad_norm": 1.0217685477065697, + "learning_rate": 1.8221167955944216e-06, + "loss": 0.4959990978240967, + "step": 2024 + }, + { + "epoch": 0.4668587896253602, + "grad_norm": 1.1676437653997107, + "learning_rate": 1.8218997293282893e-06, + "loss": 0.4608197808265686, + "step": 2025 + }, + { + "epoch": 0.4670893371757925, + "grad_norm": 0.9821397024600826, + "learning_rate": 1.821682543648676e-06, + "loss": 0.4783310294151306, + "step": 2026 + }, + { + "epoch": 0.46731988472622477, + "grad_norm": 1.065215299840776, + "learning_rate": 1.821465238587137e-06, + "loss": 0.5161324739456177, + "step": 2027 + }, + { + "epoch": 0.46755043227665705, + "grad_norm": 1.2816207732030394, + "learning_rate": 1.8212478141752446e-06, + "loss": 0.5252971053123474, + "step": 2028 + }, + { + "epoch": 0.4677809798270893, + "grad_norm": 1.0910388272274432, + "learning_rate": 1.8210302704445878e-06, + "loss": 0.5302141904830933, + "step": 2029 + }, + { + "epoch": 0.4680115273775216, + "grad_norm": 1.0800329607334094, + "learning_rate": 1.8208126074267738e-06, + "loss": 0.46280181407928467, + "step": 2030 + }, + { + "epoch": 0.4682420749279539, + "grad_norm": 1.2445319229306726, + "learning_rate": 1.8205948251534268e-06, + "loss": 0.6222575306892395, + "step": 2031 + }, + { + "epoch": 0.46847262247838617, + "grad_norm": 1.1593148286418322, + "learning_rate": 1.8203769236561884e-06, + "loss": 0.49378883838653564, + "step": 2032 + }, + { + "epoch": 0.46870317002881845, + "grad_norm": 1.0157150881959576, + "learning_rate": 1.8201589029667165e-06, + "loss": 0.4863582253456116, + "step": 2033 + }, + { + "epoch": 0.46893371757925073, + "grad_norm": 1.4072028447431575, + "learning_rate": 1.8199407631166888e-06, + "loss": 0.543857991695404, + "step": 2034 + }, + { + "epoch": 0.469164265129683, + "grad_norm": 1.0771978734352377, + "learning_rate": 1.8197225041377972e-06, + "loss": 0.4453166127204895, + "step": 2035 + }, + { + "epoch": 0.4693948126801153, + "grad_norm": 1.4395287176153397, + "learning_rate": 1.8195041260617534e-06, + "loss": 0.642902672290802, + "step": 2036 + }, + { + "epoch": 0.4696253602305476, + "grad_norm": 1.2089411302375417, + "learning_rate": 1.8192856289202853e-06, + "loss": 0.433509886264801, + "step": 2037 + }, + { + "epoch": 0.46985590778097985, + "grad_norm": 1.2704279876832443, + "learning_rate": 1.8190670127451381e-06, + "loss": 0.5348495244979858, + "step": 2038 + }, + { + "epoch": 0.4700864553314121, + "grad_norm": 1.3239841182062162, + "learning_rate": 1.8188482775680745e-06, + "loss": 0.5972989797592163, + "step": 2039 + }, + { + "epoch": 0.47031700288184436, + "grad_norm": 1.1466823333239362, + "learning_rate": 1.8186294234208745e-06, + "loss": 0.5785202980041504, + "step": 2040 + }, + { + "epoch": 0.47054755043227664, + "grad_norm": 1.2551617120211864, + "learning_rate": 1.8184104503353353e-06, + "loss": 0.4959946572780609, + "step": 2041 + }, + { + "epoch": 0.4707780979827089, + "grad_norm": 1.2660924545498422, + "learning_rate": 1.8181913583432715e-06, + "loss": 0.4742332696914673, + "step": 2042 + }, + { + "epoch": 0.4710086455331412, + "grad_norm": 1.086156350597083, + "learning_rate": 1.8179721474765146e-06, + "loss": 0.5113345980644226, + "step": 2043 + }, + { + "epoch": 0.4712391930835735, + "grad_norm": 1.2191869726515814, + "learning_rate": 1.817752817766914e-06, + "loss": 0.43116605281829834, + "step": 2044 + }, + { + "epoch": 0.47146974063400576, + "grad_norm": 1.4602324212669267, + "learning_rate": 1.8175333692463362e-06, + "loss": 0.40598607063293457, + "step": 2045 + }, + { + "epoch": 0.47170028818443804, + "grad_norm": 1.2700037116465859, + "learning_rate": 1.817313801946664e-06, + "loss": 0.5269483327865601, + "step": 2046 + }, + { + "epoch": 0.4719308357348703, + "grad_norm": 1.3055905499244451, + "learning_rate": 1.817094115899799e-06, + "loss": 0.6085952520370483, + "step": 2047 + }, + { + "epoch": 0.4721613832853026, + "grad_norm": 1.1226437123382058, + "learning_rate": 1.816874311137659e-06, + "loss": 0.4086014926433563, + "step": 2048 + }, + { + "epoch": 0.4723919308357349, + "grad_norm": 1.1725786832352496, + "learning_rate": 1.816654387692179e-06, + "loss": 0.5595110654830933, + "step": 2049 + }, + { + "epoch": 0.47262247838616717, + "grad_norm": 1.2337988950283274, + "learning_rate": 1.8164343455953124e-06, + "loss": 0.5148875713348389, + "step": 2050 + }, + { + "epoch": 0.47285302593659945, + "grad_norm": 1.1613861580132026, + "learning_rate": 1.8162141848790284e-06, + "loss": 0.47360190749168396, + "step": 2051 + }, + { + "epoch": 0.4730835734870317, + "grad_norm": 1.3159698189726914, + "learning_rate": 1.8159939055753144e-06, + "loss": 0.542681097984314, + "step": 2052 + }, + { + "epoch": 0.47331412103746395, + "grad_norm": 0.9407747552971376, + "learning_rate": 1.8157735077161744e-06, + "loss": 0.4918665885925293, + "step": 2053 + }, + { + "epoch": 0.47354466858789623, + "grad_norm": 1.1060379080565603, + "learning_rate": 1.81555299133363e-06, + "loss": 0.4638371467590332, + "step": 2054 + }, + { + "epoch": 0.4737752161383285, + "grad_norm": 1.4597203401356291, + "learning_rate": 1.81533235645972e-06, + "loss": 0.5285000205039978, + "step": 2055 + }, + { + "epoch": 0.4740057636887608, + "grad_norm": 1.1329280057548656, + "learning_rate": 1.8151116031265006e-06, + "loss": 0.4810779094696045, + "step": 2056 + }, + { + "epoch": 0.4742363112391931, + "grad_norm": 1.1882237849544774, + "learning_rate": 1.8148907313660441e-06, + "loss": 0.5632082223892212, + "step": 2057 + }, + { + "epoch": 0.47446685878962536, + "grad_norm": 1.2365131196813692, + "learning_rate": 1.8146697412104422e-06, + "loss": 0.47608134150505066, + "step": 2058 + }, + { + "epoch": 0.47469740634005764, + "grad_norm": 1.0676720083276605, + "learning_rate": 1.8144486326918012e-06, + "loss": 0.47561490535736084, + "step": 2059 + }, + { + "epoch": 0.4749279538904899, + "grad_norm": 1.2561213728895482, + "learning_rate": 1.8142274058422467e-06, + "loss": 0.5634682774543762, + "step": 2060 + }, + { + "epoch": 0.4751585014409222, + "grad_norm": 1.0913143929244284, + "learning_rate": 1.8140060606939202e-06, + "loss": 0.5818713903427124, + "step": 2061 + }, + { + "epoch": 0.4753890489913545, + "grad_norm": 1.2100494893120814, + "learning_rate": 1.8137845972789811e-06, + "loss": 0.4999740719795227, + "step": 2062 + }, + { + "epoch": 0.47561959654178676, + "grad_norm": 1.0912745719982555, + "learning_rate": 1.8135630156296058e-06, + "loss": 0.5102949142456055, + "step": 2063 + }, + { + "epoch": 0.47585014409221904, + "grad_norm": 1.2787817700287705, + "learning_rate": 1.8133413157779876e-06, + "loss": 0.5175629258155823, + "step": 2064 + }, + { + "epoch": 0.4760806916426513, + "grad_norm": 1.2601037233558323, + "learning_rate": 1.8131194977563368e-06, + "loss": 0.5314140915870667, + "step": 2065 + }, + { + "epoch": 0.47631123919308355, + "grad_norm": 1.3763380052693162, + "learning_rate": 1.8128975615968823e-06, + "loss": 0.5157697796821594, + "step": 2066 + }, + { + "epoch": 0.4765417867435158, + "grad_norm": 1.2519563843783486, + "learning_rate": 1.8126755073318682e-06, + "loss": 0.6178181171417236, + "step": 2067 + }, + { + "epoch": 0.4767723342939481, + "grad_norm": 1.0243913567227492, + "learning_rate": 1.8124533349935569e-06, + "loss": 0.5079492926597595, + "step": 2068 + }, + { + "epoch": 0.4770028818443804, + "grad_norm": 1.1632375854196408, + "learning_rate": 1.812231044614228e-06, + "loss": 0.581571102142334, + "step": 2069 + }, + { + "epoch": 0.47723342939481267, + "grad_norm": 1.0935257485384182, + "learning_rate": 1.8120086362261779e-06, + "loss": 0.5577228665351868, + "step": 2070 + }, + { + "epoch": 0.47746397694524495, + "grad_norm": 1.2309123046111559, + "learning_rate": 1.8117861098617197e-06, + "loss": 0.5082104206085205, + "step": 2071 + }, + { + "epoch": 0.47769452449567723, + "grad_norm": 1.1348218977787359, + "learning_rate": 1.8115634655531848e-06, + "loss": 0.4945356249809265, + "step": 2072 + }, + { + "epoch": 0.4779250720461095, + "grad_norm": 1.2419525307146027, + "learning_rate": 1.811340703332921e-06, + "loss": 0.5593979954719543, + "step": 2073 + }, + { + "epoch": 0.4781556195965418, + "grad_norm": 1.2030111845817482, + "learning_rate": 1.8111178232332933e-06, + "loss": 0.44920873641967773, + "step": 2074 + }, + { + "epoch": 0.4783861671469741, + "grad_norm": 0.9934876361350219, + "learning_rate": 1.8108948252866837e-06, + "loss": 0.5078163743019104, + "step": 2075 + }, + { + "epoch": 0.47861671469740635, + "grad_norm": 1.3757026294964343, + "learning_rate": 1.8106717095254913e-06, + "loss": 0.41619086265563965, + "step": 2076 + }, + { + "epoch": 0.47884726224783863, + "grad_norm": 1.4228159723788971, + "learning_rate": 1.8104484759821328e-06, + "loss": 0.6011626124382019, + "step": 2077 + }, + { + "epoch": 0.4790778097982709, + "grad_norm": 1.1103134463956559, + "learning_rate": 1.810225124689042e-06, + "loss": 0.5150628089904785, + "step": 2078 + }, + { + "epoch": 0.4793083573487032, + "grad_norm": 1.3396393945640488, + "learning_rate": 1.8100016556786688e-06, + "loss": 0.5351274609565735, + "step": 2079 + }, + { + "epoch": 0.4795389048991354, + "grad_norm": 1.2255502966022296, + "learning_rate": 1.8097780689834816e-06, + "loss": 0.514049232006073, + "step": 2080 + }, + { + "epoch": 0.4797694524495677, + "grad_norm": 1.2631952502658392, + "learning_rate": 1.8095543646359649e-06, + "loss": 0.5486019849777222, + "step": 2081 + }, + { + "epoch": 0.48, + "grad_norm": 1.0912725423085439, + "learning_rate": 1.8093305426686203e-06, + "loss": 0.5539723634719849, + "step": 2082 + }, + { + "epoch": 0.48023054755043226, + "grad_norm": 1.347806494618309, + "learning_rate": 1.8091066031139675e-06, + "loss": 0.6295641660690308, + "step": 2083 + }, + { + "epoch": 0.48046109510086454, + "grad_norm": 1.4839180788394328, + "learning_rate": 1.808882546004542e-06, + "loss": 0.5032966136932373, + "step": 2084 + }, + { + "epoch": 0.4806916426512968, + "grad_norm": 1.300518495948047, + "learning_rate": 1.8086583713728974e-06, + "loss": 0.5227913856506348, + "step": 2085 + }, + { + "epoch": 0.4809221902017291, + "grad_norm": 1.3494249283086248, + "learning_rate": 1.8084340792516035e-06, + "loss": 0.5598339438438416, + "step": 2086 + }, + { + "epoch": 0.4811527377521614, + "grad_norm": 0.936105706787139, + "learning_rate": 1.808209669673248e-06, + "loss": 0.5645616054534912, + "step": 2087 + }, + { + "epoch": 0.48138328530259367, + "grad_norm": 1.2822280997334143, + "learning_rate": 1.8079851426704352e-06, + "loss": 0.48751646280288696, + "step": 2088 + }, + { + "epoch": 0.48161383285302595, + "grad_norm": 1.2419035742128983, + "learning_rate": 1.8077604982757867e-06, + "loss": 0.528518795967102, + "step": 2089 + }, + { + "epoch": 0.48184438040345823, + "grad_norm": 1.188310462626312, + "learning_rate": 1.8075357365219403e-06, + "loss": 0.5478106737136841, + "step": 2090 + }, + { + "epoch": 0.4820749279538905, + "grad_norm": 1.2009761465671023, + "learning_rate": 1.8073108574415523e-06, + "loss": 0.5034850835800171, + "step": 2091 + }, + { + "epoch": 0.4823054755043228, + "grad_norm": 1.3064576473905998, + "learning_rate": 1.807085861067295e-06, + "loss": 0.5288310050964355, + "step": 2092 + }, + { + "epoch": 0.48253602305475507, + "grad_norm": 1.5549767465705366, + "learning_rate": 1.806860747431858e-06, + "loss": 0.5817336440086365, + "step": 2093 + }, + { + "epoch": 0.4827665706051873, + "grad_norm": 1.1287206353806019, + "learning_rate": 1.806635516567948e-06, + "loss": 0.5058947801589966, + "step": 2094 + }, + { + "epoch": 0.4829971181556196, + "grad_norm": 1.2962898270991883, + "learning_rate": 1.8064101685082886e-06, + "loss": 0.4131927788257599, + "step": 2095 + }, + { + "epoch": 0.48322766570605186, + "grad_norm": 1.3216989005029416, + "learning_rate": 1.8061847032856208e-06, + "loss": 0.5075147151947021, + "step": 2096 + }, + { + "epoch": 0.48345821325648414, + "grad_norm": 1.141303867546858, + "learning_rate": 1.8059591209327022e-06, + "loss": 0.49490487575531006, + "step": 2097 + }, + { + "epoch": 0.4836887608069164, + "grad_norm": 1.1392879390687263, + "learning_rate": 1.8057334214823073e-06, + "loss": 0.6112065315246582, + "step": 2098 + }, + { + "epoch": 0.4839193083573487, + "grad_norm": 1.0180072685628039, + "learning_rate": 1.8055076049672282e-06, + "loss": 0.4702821373939514, + "step": 2099 + }, + { + "epoch": 0.484149855907781, + "grad_norm": 1.0776070240866942, + "learning_rate": 1.8052816714202736e-06, + "loss": 0.4686674475669861, + "step": 2100 + }, + { + "epoch": 0.48438040345821326, + "grad_norm": 1.0343204551386471, + "learning_rate": 1.8050556208742695e-06, + "loss": 0.5218414068222046, + "step": 2101 + }, + { + "epoch": 0.48461095100864554, + "grad_norm": 0.9563633917956249, + "learning_rate": 1.8048294533620582e-06, + "loss": 0.5427126288414001, + "step": 2102 + }, + { + "epoch": 0.4848414985590778, + "grad_norm": 1.271635291620792, + "learning_rate": 1.8046031689165001e-06, + "loss": 0.5796751976013184, + "step": 2103 + }, + { + "epoch": 0.4850720461095101, + "grad_norm": 1.0928705473494975, + "learning_rate": 1.8043767675704718e-06, + "loss": 0.5725299119949341, + "step": 2104 + }, + { + "epoch": 0.4853025936599424, + "grad_norm": 1.0835389410732317, + "learning_rate": 1.8041502493568667e-06, + "loss": 0.507230281829834, + "step": 2105 + }, + { + "epoch": 0.48553314121037466, + "grad_norm": 1.3563692986550824, + "learning_rate": 1.8039236143085958e-06, + "loss": 0.5179410576820374, + "step": 2106 + }, + { + "epoch": 0.4857636887608069, + "grad_norm": 1.0040019176194879, + "learning_rate": 1.8036968624585869e-06, + "loss": 0.49801725149154663, + "step": 2107 + }, + { + "epoch": 0.48599423631123917, + "grad_norm": 1.1386662177255882, + "learning_rate": 1.8034699938397843e-06, + "loss": 0.4731954038143158, + "step": 2108 + }, + { + "epoch": 0.48622478386167145, + "grad_norm": 1.2220519213958883, + "learning_rate": 1.8032430084851505e-06, + "loss": 0.5271996855735779, + "step": 2109 + }, + { + "epoch": 0.48645533141210373, + "grad_norm": 1.3946338651870764, + "learning_rate": 1.803015906427663e-06, + "loss": 0.441206693649292, + "step": 2110 + }, + { + "epoch": 0.486685878962536, + "grad_norm": 1.2215374705306121, + "learning_rate": 1.802788687700318e-06, + "loss": 0.5455319285392761, + "step": 2111 + }, + { + "epoch": 0.4869164265129683, + "grad_norm": 1.148243592256876, + "learning_rate": 1.802561352336128e-06, + "loss": 0.45889076590538025, + "step": 2112 + }, + { + "epoch": 0.4871469740634006, + "grad_norm": 1.2793636515861055, + "learning_rate": 1.8023339003681225e-06, + "loss": 0.5482321977615356, + "step": 2113 + }, + { + "epoch": 0.48737752161383285, + "grad_norm": 1.1670602057135955, + "learning_rate": 1.8021063318293474e-06, + "loss": 0.48072755336761475, + "step": 2114 + }, + { + "epoch": 0.48760806916426513, + "grad_norm": 1.1056861144019507, + "learning_rate": 1.801878646752867e-06, + "loss": 0.4769946336746216, + "step": 2115 + }, + { + "epoch": 0.4878386167146974, + "grad_norm": 1.3095766673379263, + "learning_rate": 1.8016508451717604e-06, + "loss": 0.5335594415664673, + "step": 2116 + }, + { + "epoch": 0.4880691642651297, + "grad_norm": 1.053340143352943, + "learning_rate": 1.801422927119126e-06, + "loss": 0.5021346211433411, + "step": 2117 + }, + { + "epoch": 0.488299711815562, + "grad_norm": 1.056050874174104, + "learning_rate": 1.801194892628077e-06, + "loss": 0.40786126255989075, + "step": 2118 + }, + { + "epoch": 0.48853025936599426, + "grad_norm": 1.1636672663703702, + "learning_rate": 1.8009667417317447e-06, + "loss": 0.5735047459602356, + "step": 2119 + }, + { + "epoch": 0.48876080691642654, + "grad_norm": 1.3770736980628029, + "learning_rate": 1.8007384744632772e-06, + "loss": 0.5656315088272095, + "step": 2120 + }, + { + "epoch": 0.48899135446685876, + "grad_norm": 1.2003142020448403, + "learning_rate": 1.8005100908558393e-06, + "loss": 0.5263736248016357, + "step": 2121 + }, + { + "epoch": 0.48922190201729104, + "grad_norm": 1.14755344931242, + "learning_rate": 1.8002815909426129e-06, + "loss": 0.4543651342391968, + "step": 2122 + }, + { + "epoch": 0.4894524495677233, + "grad_norm": 1.2567132050361414, + "learning_rate": 1.800052974756796e-06, + "loss": 0.5164092183113098, + "step": 2123 + }, + { + "epoch": 0.4896829971181556, + "grad_norm": 1.065148302467529, + "learning_rate": 1.7998242423316053e-06, + "loss": 0.47448939085006714, + "step": 2124 + }, + { + "epoch": 0.4899135446685879, + "grad_norm": 1.2249325142019851, + "learning_rate": 1.7995953937002722e-06, + "loss": 0.5526837706565857, + "step": 2125 + }, + { + "epoch": 0.49014409221902017, + "grad_norm": 1.1191314576308082, + "learning_rate": 1.7993664288960466e-06, + "loss": 0.5415492057800293, + "step": 2126 + }, + { + "epoch": 0.49037463976945245, + "grad_norm": 1.5504915305169138, + "learning_rate": 1.7991373479521943e-06, + "loss": 0.5514999032020569, + "step": 2127 + }, + { + "epoch": 0.49060518731988473, + "grad_norm": 1.1460513293920873, + "learning_rate": 1.7989081509019988e-06, + "loss": 0.5257915258407593, + "step": 2128 + }, + { + "epoch": 0.490835734870317, + "grad_norm": 1.286281965295671, + "learning_rate": 1.7986788377787598e-06, + "loss": 0.5336320400238037, + "step": 2129 + }, + { + "epoch": 0.4910662824207493, + "grad_norm": 1.0512868854767012, + "learning_rate": 1.798449408615794e-06, + "loss": 0.3695172667503357, + "step": 2130 + }, + { + "epoch": 0.49129682997118157, + "grad_norm": 1.2616560050006966, + "learning_rate": 1.7982198634464354e-06, + "loss": 0.47775453329086304, + "step": 2131 + }, + { + "epoch": 0.49152737752161385, + "grad_norm": 0.9945703807662574, + "learning_rate": 1.7979902023040342e-06, + "loss": 0.516392171382904, + "step": 2132 + }, + { + "epoch": 0.49175792507204613, + "grad_norm": 1.1798218769026139, + "learning_rate": 1.7977604252219583e-06, + "loss": 0.525421142578125, + "step": 2133 + }, + { + "epoch": 0.4919884726224784, + "grad_norm": 1.128891548004505, + "learning_rate": 1.7975305322335915e-06, + "loss": 0.5425143241882324, + "step": 2134 + }, + { + "epoch": 0.49221902017291064, + "grad_norm": 0.9471448914040171, + "learning_rate": 1.7973005233723345e-06, + "loss": 0.5511288642883301, + "step": 2135 + }, + { + "epoch": 0.4924495677233429, + "grad_norm": 1.4239095788389364, + "learning_rate": 1.7970703986716058e-06, + "loss": 0.5777868032455444, + "step": 2136 + }, + { + "epoch": 0.4926801152737752, + "grad_norm": 1.2503673840795373, + "learning_rate": 1.79684015816484e-06, + "loss": 0.46998846530914307, + "step": 2137 + }, + { + "epoch": 0.4929106628242075, + "grad_norm": 1.185801465844648, + "learning_rate": 1.7966098018854884e-06, + "loss": 0.5201677680015564, + "step": 2138 + }, + { + "epoch": 0.49314121037463976, + "grad_norm": 1.1459376135454182, + "learning_rate": 1.7963793298670197e-06, + "loss": 0.5589007139205933, + "step": 2139 + }, + { + "epoch": 0.49337175792507204, + "grad_norm": 1.2547944348154982, + "learning_rate": 1.796148742142919e-06, + "loss": 0.5880102515220642, + "step": 2140 + }, + { + "epoch": 0.4936023054755043, + "grad_norm": 1.1932412523736406, + "learning_rate": 1.7959180387466884e-06, + "loss": 0.5860651731491089, + "step": 2141 + }, + { + "epoch": 0.4938328530259366, + "grad_norm": 1.2025551212842234, + "learning_rate": 1.795687219711846e-06, + "loss": 0.42490053176879883, + "step": 2142 + }, + { + "epoch": 0.4940634005763689, + "grad_norm": 1.503932183071744, + "learning_rate": 1.7954562850719283e-06, + "loss": 0.5949894189834595, + "step": 2143 + }, + { + "epoch": 0.49429394812680116, + "grad_norm": 1.1843513634065304, + "learning_rate": 1.7952252348604873e-06, + "loss": 0.5673636198043823, + "step": 2144 + }, + { + "epoch": 0.49452449567723344, + "grad_norm": 1.3424804711783924, + "learning_rate": 1.7949940691110923e-06, + "loss": 0.5709353089332581, + "step": 2145 + }, + { + "epoch": 0.4947550432276657, + "grad_norm": 1.5096007165005685, + "learning_rate": 1.7947627878573292e-06, + "loss": 0.5557876825332642, + "step": 2146 + }, + { + "epoch": 0.494985590778098, + "grad_norm": 1.605658897794125, + "learning_rate": 1.7945313911328008e-06, + "loss": 0.6168490648269653, + "step": 2147 + }, + { + "epoch": 0.4952161383285303, + "grad_norm": 1.0371807620079299, + "learning_rate": 1.7942998789711266e-06, + "loss": 0.5257415771484375, + "step": 2148 + }, + { + "epoch": 0.4954466858789625, + "grad_norm": 1.3491005725160026, + "learning_rate": 1.794068251405943e-06, + "loss": 0.5957303047180176, + "step": 2149 + }, + { + "epoch": 0.4956772334293948, + "grad_norm": 1.3791883865126864, + "learning_rate": 1.7938365084709028e-06, + "loss": 0.5421463251113892, + "step": 2150 + }, + { + "epoch": 0.4959077809798271, + "grad_norm": 1.2199946418762686, + "learning_rate": 1.793604650199676e-06, + "loss": 0.4956265091896057, + "step": 2151 + }, + { + "epoch": 0.49613832853025935, + "grad_norm": 1.528899494407416, + "learning_rate": 1.7933726766259493e-06, + "loss": 0.6387878656387329, + "step": 2152 + }, + { + "epoch": 0.49636887608069163, + "grad_norm": 1.2129061766976703, + "learning_rate": 1.793140587783426e-06, + "loss": 0.48594605922698975, + "step": 2153 + }, + { + "epoch": 0.4965994236311239, + "grad_norm": 1.1561693003542137, + "learning_rate": 1.7929083837058262e-06, + "loss": 0.5035887360572815, + "step": 2154 + }, + { + "epoch": 0.4968299711815562, + "grad_norm": 1.2461631938928077, + "learning_rate": 1.7926760644268868e-06, + "loss": 0.44821372628211975, + "step": 2155 + }, + { + "epoch": 0.4970605187319885, + "grad_norm": 1.3217722479914835, + "learning_rate": 1.7924436299803612e-06, + "loss": 0.5536797642707825, + "step": 2156 + }, + { + "epoch": 0.49729106628242076, + "grad_norm": 1.1618226791661102, + "learning_rate": 1.79221108040002e-06, + "loss": 0.5425612926483154, + "step": 2157 + }, + { + "epoch": 0.49752161383285304, + "grad_norm": 1.2313751283119363, + "learning_rate": 1.7919784157196497e-06, + "loss": 0.5238672494888306, + "step": 2158 + }, + { + "epoch": 0.4977521613832853, + "grad_norm": 1.198679351371627, + "learning_rate": 1.7917456359730543e-06, + "loss": 0.5356197953224182, + "step": 2159 + }, + { + "epoch": 0.4979827089337176, + "grad_norm": 1.0280774531466672, + "learning_rate": 1.7915127411940545e-06, + "loss": 0.4530688226222992, + "step": 2160 + }, + { + "epoch": 0.4982132564841499, + "grad_norm": 1.3115285925447417, + "learning_rate": 1.7912797314164875e-06, + "loss": 0.5374774932861328, + "step": 2161 + }, + { + "epoch": 0.4984438040345821, + "grad_norm": 1.0967027052102745, + "learning_rate": 1.7910466066742068e-06, + "loss": 0.4482705295085907, + "step": 2162 + }, + { + "epoch": 0.4986743515850144, + "grad_norm": 1.2292872660786507, + "learning_rate": 1.7908133670010837e-06, + "loss": 0.4459438920021057, + "step": 2163 + }, + { + "epoch": 0.49890489913544667, + "grad_norm": 1.139537158289869, + "learning_rate": 1.7905800124310044e-06, + "loss": 0.49267393350601196, + "step": 2164 + }, + { + "epoch": 0.49913544668587895, + "grad_norm": 1.355016564375121, + "learning_rate": 1.7903465429978742e-06, + "loss": 0.5207797884941101, + "step": 2165 + }, + { + "epoch": 0.49936599423631123, + "grad_norm": 1.3100564580839544, + "learning_rate": 1.7901129587356128e-06, + "loss": 0.5956555604934692, + "step": 2166 + }, + { + "epoch": 0.4995965417867435, + "grad_norm": 1.396335552060292, + "learning_rate": 1.7898792596781575e-06, + "loss": 0.47083795070648193, + "step": 2167 + }, + { + "epoch": 0.4998270893371758, + "grad_norm": 1.4321432022692109, + "learning_rate": 1.7896454458594631e-06, + "loss": 0.5580272674560547, + "step": 2168 + }, + { + "epoch": 0.5000576368876081, + "grad_norm": 1.1568398545920477, + "learning_rate": 1.7894115173135e-06, + "loss": 0.5935468077659607, + "step": 2169 + }, + { + "epoch": 0.5002881844380404, + "grad_norm": 1.3058147531506012, + "learning_rate": 1.7891774740742553e-06, + "loss": 0.48544418811798096, + "step": 2170 + }, + { + "epoch": 0.5005187319884726, + "grad_norm": 1.0916510321690198, + "learning_rate": 1.7889433161757336e-06, + "loss": 0.5714924335479736, + "step": 2171 + }, + { + "epoch": 0.5007492795389049, + "grad_norm": 1.4318986319035987, + "learning_rate": 1.7887090436519551e-06, + "loss": 0.582866907119751, + "step": 2172 + }, + { + "epoch": 0.5009798270893372, + "grad_norm": 1.092756168813296, + "learning_rate": 1.7884746565369573e-06, + "loss": 0.4349023401737213, + "step": 2173 + }, + { + "epoch": 0.5012103746397695, + "grad_norm": 1.307231397890865, + "learning_rate": 1.7882401548647942e-06, + "loss": 0.40139514207839966, + "step": 2174 + }, + { + "epoch": 0.5014409221902018, + "grad_norm": 1.124014712251471, + "learning_rate": 1.7880055386695366e-06, + "loss": 0.534995436668396, + "step": 2175 + }, + { + "epoch": 0.501671469740634, + "grad_norm": 1.2597913986300637, + "learning_rate": 1.7877708079852716e-06, + "loss": 0.5228029489517212, + "step": 2176 + }, + { + "epoch": 0.5019020172910663, + "grad_norm": 1.2856346391499722, + "learning_rate": 1.7875359628461034e-06, + "loss": 0.6084067821502686, + "step": 2177 + }, + { + "epoch": 0.5021325648414986, + "grad_norm": 1.3340996931649014, + "learning_rate": 1.787301003286152e-06, + "loss": 0.5027675628662109, + "step": 2178 + }, + { + "epoch": 0.5023631123919309, + "grad_norm": 1.1721828803051668, + "learning_rate": 1.7870659293395552e-06, + "loss": 0.6257070302963257, + "step": 2179 + }, + { + "epoch": 0.5025936599423632, + "grad_norm": 1.202911283172134, + "learning_rate": 1.7868307410404664e-06, + "loss": 0.45355063676834106, + "step": 2180 + }, + { + "epoch": 0.5028242074927954, + "grad_norm": 1.1770009421306744, + "learning_rate": 1.7865954384230567e-06, + "loss": 0.5211625099182129, + "step": 2181 + }, + { + "epoch": 0.5030547550432277, + "grad_norm": 1.170570821757426, + "learning_rate": 1.7863600215215117e-06, + "loss": 0.45476096868515015, + "step": 2182 + }, + { + "epoch": 0.5032853025936599, + "grad_norm": 1.1288308091354253, + "learning_rate": 1.7861244903700366e-06, + "loss": 0.5483888983726501, + "step": 2183 + }, + { + "epoch": 0.5035158501440922, + "grad_norm": 1.3267959714324822, + "learning_rate": 1.7858888450028507e-06, + "loss": 0.6125767230987549, + "step": 2184 + }, + { + "epoch": 0.5037463976945245, + "grad_norm": 1.2869025477599574, + "learning_rate": 1.7856530854541912e-06, + "loss": 0.5761919021606445, + "step": 2185 + }, + { + "epoch": 0.5039769452449567, + "grad_norm": 1.280348139691172, + "learning_rate": 1.785417211758311e-06, + "loss": 0.4902021884918213, + "step": 2186 + }, + { + "epoch": 0.504207492795389, + "grad_norm": 1.1062677808924173, + "learning_rate": 1.7851812239494808e-06, + "loss": 0.5162317752838135, + "step": 2187 + }, + { + "epoch": 0.5044380403458213, + "grad_norm": 1.3349217104827404, + "learning_rate": 1.784945122061987e-06, + "loss": 0.5260510444641113, + "step": 2188 + }, + { + "epoch": 0.5046685878962536, + "grad_norm": 1.372484573808915, + "learning_rate": 1.7847089061301324e-06, + "loss": 0.5578324794769287, + "step": 2189 + }, + { + "epoch": 0.5048991354466859, + "grad_norm": 1.1183471298010517, + "learning_rate": 1.7844725761882366e-06, + "loss": 0.5394254922866821, + "step": 2190 + }, + { + "epoch": 0.5051296829971181, + "grad_norm": 1.5877235098098084, + "learning_rate": 1.7842361322706365e-06, + "loss": 0.5949487686157227, + "step": 2191 + }, + { + "epoch": 0.5053602305475504, + "grad_norm": 1.0904188301293072, + "learning_rate": 1.7839995744116844e-06, + "loss": 0.5092563629150391, + "step": 2192 + }, + { + "epoch": 0.5055907780979827, + "grad_norm": 1.1725099724792964, + "learning_rate": 1.7837629026457503e-06, + "loss": 0.4975352883338928, + "step": 2193 + }, + { + "epoch": 0.505821325648415, + "grad_norm": 1.5695722641221526, + "learning_rate": 1.7835261170072196e-06, + "loss": 0.4792792499065399, + "step": 2194 + }, + { + "epoch": 0.5060518731988473, + "grad_norm": 1.1092286708587664, + "learning_rate": 1.7832892175304947e-06, + "loss": 0.4511220455169678, + "step": 2195 + }, + { + "epoch": 0.5062824207492795, + "grad_norm": 1.139606306229032, + "learning_rate": 1.7830522042499952e-06, + "loss": 0.498882532119751, + "step": 2196 + }, + { + "epoch": 0.5065129682997118, + "grad_norm": 1.3633349831921355, + "learning_rate": 1.7828150772001563e-06, + "loss": 0.5208792090415955, + "step": 2197 + }, + { + "epoch": 0.5067435158501441, + "grad_norm": 1.0920136721929539, + "learning_rate": 1.78257783641543e-06, + "loss": 0.48315608501434326, + "step": 2198 + }, + { + "epoch": 0.5069740634005764, + "grad_norm": 1.3188291756320296, + "learning_rate": 1.7823404819302853e-06, + "loss": 0.643965482711792, + "step": 2199 + }, + { + "epoch": 0.5072046109510087, + "grad_norm": 1.2350867324332677, + "learning_rate": 1.782103013779207e-06, + "loss": 0.5071272850036621, + "step": 2200 + }, + { + "epoch": 0.5074351585014409, + "grad_norm": 1.2777572177668994, + "learning_rate": 1.7818654319966968e-06, + "loss": 0.549786388874054, + "step": 2201 + }, + { + "epoch": 0.5076657060518732, + "grad_norm": 1.1703456168007746, + "learning_rate": 1.781627736617273e-06, + "loss": 0.520953893661499, + "step": 2202 + }, + { + "epoch": 0.5078962536023055, + "grad_norm": 1.2766880262368039, + "learning_rate": 1.78138992767547e-06, + "loss": 0.5666658878326416, + "step": 2203 + }, + { + "epoch": 0.5081268011527378, + "grad_norm": 1.0842976431840186, + "learning_rate": 1.7811520052058392e-06, + "loss": 0.5050726532936096, + "step": 2204 + }, + { + "epoch": 0.5083573487031701, + "grad_norm": 1.1791951625976107, + "learning_rate": 1.7809139692429485e-06, + "loss": 0.4998525381088257, + "step": 2205 + }, + { + "epoch": 0.5085878962536023, + "grad_norm": 1.1200846842686203, + "learning_rate": 1.7806758198213814e-06, + "loss": 0.4982251226902008, + "step": 2206 + }, + { + "epoch": 0.5088184438040346, + "grad_norm": 1.1568502456227219, + "learning_rate": 1.780437556975739e-06, + "loss": 0.5644215941429138, + "step": 2207 + }, + { + "epoch": 0.5090489913544669, + "grad_norm": 1.1426739752438277, + "learning_rate": 1.7801991807406385e-06, + "loss": 0.5029370784759521, + "step": 2208 + }, + { + "epoch": 0.5092795389048992, + "grad_norm": 1.1040340904512378, + "learning_rate": 1.779960691150713e-06, + "loss": 0.49697983264923096, + "step": 2209 + }, + { + "epoch": 0.5095100864553314, + "grad_norm": 1.137855829488058, + "learning_rate": 1.779722088240613e-06, + "loss": 0.5203051567077637, + "step": 2210 + }, + { + "epoch": 0.5097406340057636, + "grad_norm": 1.2732773389651266, + "learning_rate": 1.7794833720450049e-06, + "loss": 0.5182983875274658, + "step": 2211 + }, + { + "epoch": 0.5099711815561959, + "grad_norm": 1.6767963892591715, + "learning_rate": 1.7792445425985716e-06, + "loss": 0.5266735553741455, + "step": 2212 + }, + { + "epoch": 0.5102017291066282, + "grad_norm": 1.5377950327647543, + "learning_rate": 1.7790055999360126e-06, + "loss": 0.6422331929206848, + "step": 2213 + }, + { + "epoch": 0.5104322766570605, + "grad_norm": 1.2017440986990142, + "learning_rate": 1.7787665440920435e-06, + "loss": 0.5400121212005615, + "step": 2214 + }, + { + "epoch": 0.5106628242074928, + "grad_norm": 1.2987992561000663, + "learning_rate": 1.778527375101397e-06, + "loss": 0.4500657021999359, + "step": 2215 + }, + { + "epoch": 0.510893371757925, + "grad_norm": 1.2807387577008382, + "learning_rate": 1.778288092998822e-06, + "loss": 0.5762274265289307, + "step": 2216 + }, + { + "epoch": 0.5111239193083573, + "grad_norm": 1.2541048946673465, + "learning_rate": 1.778048697819083e-06, + "loss": 0.5062060952186584, + "step": 2217 + }, + { + "epoch": 0.5113544668587896, + "grad_norm": 1.158049884571781, + "learning_rate": 1.7778091895969627e-06, + "loss": 0.5776544809341431, + "step": 2218 + }, + { + "epoch": 0.5115850144092219, + "grad_norm": 1.2665559359038843, + "learning_rate": 1.7775695683672583e-06, + "loss": 0.48977309465408325, + "step": 2219 + }, + { + "epoch": 0.5118155619596542, + "grad_norm": 1.2212119525380398, + "learning_rate": 1.7773298341647843e-06, + "loss": 0.4855668544769287, + "step": 2220 + }, + { + "epoch": 0.5120461095100864, + "grad_norm": 1.2324354065266638, + "learning_rate": 1.777089987024372e-06, + "loss": 0.5176600217819214, + "step": 2221 + }, + { + "epoch": 0.5122766570605187, + "grad_norm": 1.1833774336501766, + "learning_rate": 1.7768500269808687e-06, + "loss": 0.4856322109699249, + "step": 2222 + }, + { + "epoch": 0.512507204610951, + "grad_norm": 1.209139379315303, + "learning_rate": 1.7766099540691375e-06, + "loss": 0.5031648278236389, + "step": 2223 + }, + { + "epoch": 0.5127377521613833, + "grad_norm": 1.2514657246641, + "learning_rate": 1.7763697683240588e-06, + "loss": 0.5545702576637268, + "step": 2224 + }, + { + "epoch": 0.5129682997118156, + "grad_norm": 1.141568946980341, + "learning_rate": 1.7761294697805295e-06, + "loss": 0.5827726721763611, + "step": 2225 + }, + { + "epoch": 0.5131988472622478, + "grad_norm": 1.238809848511236, + "learning_rate": 1.7758890584734621e-06, + "loss": 0.6185393929481506, + "step": 2226 + }, + { + "epoch": 0.5134293948126801, + "grad_norm": 1.2043172717203658, + "learning_rate": 1.7756485344377859e-06, + "loss": 0.4593431353569031, + "step": 2227 + }, + { + "epoch": 0.5136599423631124, + "grad_norm": 1.1772106526015698, + "learning_rate": 1.7754078977084466e-06, + "loss": 0.47052207589149475, + "step": 2228 + }, + { + "epoch": 0.5138904899135447, + "grad_norm": 1.43950522799351, + "learning_rate": 1.7751671483204059e-06, + "loss": 0.5208712220191956, + "step": 2229 + }, + { + "epoch": 0.514121037463977, + "grad_norm": 1.3760015774843395, + "learning_rate": 1.7749262863086427e-06, + "loss": 0.5334997177124023, + "step": 2230 + }, + { + "epoch": 0.5143515850144093, + "grad_norm": 1.1787674135463977, + "learning_rate": 1.7746853117081514e-06, + "loss": 0.5811659097671509, + "step": 2231 + }, + { + "epoch": 0.5145821325648415, + "grad_norm": 1.0770203228857202, + "learning_rate": 1.7744442245539433e-06, + "loss": 0.4372256398200989, + "step": 2232 + }, + { + "epoch": 0.5148126801152738, + "grad_norm": 1.156862339294714, + "learning_rate": 1.7742030248810456e-06, + "loss": 0.5962051153182983, + "step": 2233 + }, + { + "epoch": 0.5150432276657061, + "grad_norm": 1.323655401087979, + "learning_rate": 1.7739617127245023e-06, + "loss": 0.5646089911460876, + "step": 2234 + }, + { + "epoch": 0.5152737752161384, + "grad_norm": 1.1987421954627402, + "learning_rate": 1.7737202881193736e-06, + "loss": 0.48204538226127625, + "step": 2235 + }, + { + "epoch": 0.5155043227665707, + "grad_norm": 1.3415367488143433, + "learning_rate": 1.773478751100736e-06, + "loss": 0.5023562908172607, + "step": 2236 + }, + { + "epoch": 0.5157348703170029, + "grad_norm": 1.3435806853729588, + "learning_rate": 1.773237101703682e-06, + "loss": 0.5214341878890991, + "step": 2237 + }, + { + "epoch": 0.5159654178674351, + "grad_norm": 1.112054409920411, + "learning_rate": 1.772995339963321e-06, + "loss": 0.5498196482658386, + "step": 2238 + }, + { + "epoch": 0.5161959654178674, + "grad_norm": 1.38478483246607, + "learning_rate": 1.7727534659147785e-06, + "loss": 0.6163268685340881, + "step": 2239 + }, + { + "epoch": 0.5164265129682997, + "grad_norm": 1.0193285176800957, + "learning_rate": 1.7725114795931962e-06, + "loss": 0.4866952896118164, + "step": 2240 + }, + { + "epoch": 0.516657060518732, + "grad_norm": 1.0610043743082396, + "learning_rate": 1.7722693810337322e-06, + "loss": 0.5931833982467651, + "step": 2241 + }, + { + "epoch": 0.5168876080691642, + "grad_norm": 1.0620090506254725, + "learning_rate": 1.7720271702715605e-06, + "loss": 0.5204564332962036, + "step": 2242 + }, + { + "epoch": 0.5171181556195965, + "grad_norm": 1.1822334561053938, + "learning_rate": 1.7717848473418726e-06, + "loss": 0.5478333234786987, + "step": 2243 + }, + { + "epoch": 0.5173487031700288, + "grad_norm": 1.0737770188215454, + "learning_rate": 1.771542412279875e-06, + "loss": 0.47076964378356934, + "step": 2244 + }, + { + "epoch": 0.5175792507204611, + "grad_norm": 1.389729566368652, + "learning_rate": 1.771299865120791e-06, + "loss": 0.45618507266044617, + "step": 2245 + }, + { + "epoch": 0.5178097982708934, + "grad_norm": 1.2313171508645646, + "learning_rate": 1.7710572058998604e-06, + "loss": 0.5953116416931152, + "step": 2246 + }, + { + "epoch": 0.5180403458213256, + "grad_norm": 1.237761845718166, + "learning_rate": 1.7708144346523391e-06, + "loss": 0.5020469427108765, + "step": 2247 + }, + { + "epoch": 0.5182708933717579, + "grad_norm": 1.2190445885386132, + "learning_rate": 1.7705715514134987e-06, + "loss": 0.5417760610580444, + "step": 2248 + }, + { + "epoch": 0.5185014409221902, + "grad_norm": 1.2065438974520686, + "learning_rate": 1.7703285562186282e-06, + "loss": 0.5705471038818359, + "step": 2249 + }, + { + "epoch": 0.5187319884726225, + "grad_norm": 1.2710991899081208, + "learning_rate": 1.7700854491030319e-06, + "loss": 0.5350116491317749, + "step": 2250 + }, + { + "epoch": 0.5189625360230548, + "grad_norm": 1.250653785454124, + "learning_rate": 1.7698422301020311e-06, + "loss": 0.5022974610328674, + "step": 2251 + }, + { + "epoch": 0.519193083573487, + "grad_norm": 1.1860519947436583, + "learning_rate": 1.7695988992509624e-06, + "loss": 0.6222025156021118, + "step": 2252 + }, + { + "epoch": 0.5194236311239193, + "grad_norm": 1.3651382210724667, + "learning_rate": 1.76935545658518e-06, + "loss": 0.5001484751701355, + "step": 2253 + }, + { + "epoch": 0.5196541786743516, + "grad_norm": 1.485991529924687, + "learning_rate": 1.7691119021400532e-06, + "loss": 0.5362265706062317, + "step": 2254 + }, + { + "epoch": 0.5198847262247839, + "grad_norm": 1.0098110160856955, + "learning_rate": 1.7688682359509677e-06, + "loss": 0.5197827816009521, + "step": 2255 + }, + { + "epoch": 0.5201152737752162, + "grad_norm": 1.1023930981612833, + "learning_rate": 1.7686244580533261e-06, + "loss": 0.6146107912063599, + "step": 2256 + }, + { + "epoch": 0.5203458213256484, + "grad_norm": 1.235820532389256, + "learning_rate": 1.7683805684825463e-06, + "loss": 0.5412692427635193, + "step": 2257 + }, + { + "epoch": 0.5205763688760807, + "grad_norm": 1.1120937693778494, + "learning_rate": 1.7681365672740633e-06, + "loss": 0.5003819465637207, + "step": 2258 + }, + { + "epoch": 0.520806916426513, + "grad_norm": 1.1234189645725328, + "learning_rate": 1.767892454463328e-06, + "loss": 0.5292670726776123, + "step": 2259 + }, + { + "epoch": 0.5210374639769453, + "grad_norm": 1.103744899542332, + "learning_rate": 1.7676482300858073e-06, + "loss": 0.4764510989189148, + "step": 2260 + }, + { + "epoch": 0.5212680115273776, + "grad_norm": 1.1164612132421818, + "learning_rate": 1.7674038941769845e-06, + "loss": 0.5774421691894531, + "step": 2261 + }, + { + "epoch": 0.5214985590778098, + "grad_norm": 1.229036573526321, + "learning_rate": 1.767159446772359e-06, + "loss": 0.46915918588638306, + "step": 2262 + }, + { + "epoch": 0.5217291066282421, + "grad_norm": 1.3167447922417834, + "learning_rate": 1.766914887907446e-06, + "loss": 0.4545619487762451, + "step": 2263 + }, + { + "epoch": 0.5219596541786744, + "grad_norm": 1.2121919468262714, + "learning_rate": 1.7666702176177784e-06, + "loss": 0.5084496736526489, + "step": 2264 + }, + { + "epoch": 0.5221902017291066, + "grad_norm": 1.3395786479792613, + "learning_rate": 1.7664254359389034e-06, + "loss": 0.524928092956543, + "step": 2265 + }, + { + "epoch": 0.5224207492795389, + "grad_norm": 1.4477722066137377, + "learning_rate": 1.7661805429063859e-06, + "loss": 0.5276768207550049, + "step": 2266 + }, + { + "epoch": 0.5226512968299711, + "grad_norm": 1.0912538150871292, + "learning_rate": 1.7659355385558055e-06, + "loss": 0.4555545747280121, + "step": 2267 + }, + { + "epoch": 0.5228818443804034, + "grad_norm": 1.2237495534956078, + "learning_rate": 1.7656904229227597e-06, + "loss": 0.47479552030563354, + "step": 2268 + }, + { + "epoch": 0.5231123919308357, + "grad_norm": 1.252326361415701, + "learning_rate": 1.7654451960428602e-06, + "loss": 0.520822286605835, + "step": 2269 + }, + { + "epoch": 0.523342939481268, + "grad_norm": 1.2429338112694408, + "learning_rate": 1.7651998579517367e-06, + "loss": 0.5228800177574158, + "step": 2270 + }, + { + "epoch": 0.5235734870317003, + "grad_norm": 1.1076970934273507, + "learning_rate": 1.7649544086850341e-06, + "loss": 0.4785909652709961, + "step": 2271 + }, + { + "epoch": 0.5238040345821325, + "grad_norm": 1.3595525407205307, + "learning_rate": 1.7647088482784135e-06, + "loss": 0.49906525015830994, + "step": 2272 + }, + { + "epoch": 0.5240345821325648, + "grad_norm": 1.2162675408338783, + "learning_rate": 1.7644631767675527e-06, + "loss": 0.44544023275375366, + "step": 2273 + }, + { + "epoch": 0.5242651296829971, + "grad_norm": 1.1742658425403796, + "learning_rate": 1.7642173941881444e-06, + "loss": 0.4601054787635803, + "step": 2274 + }, + { + "epoch": 0.5244956772334294, + "grad_norm": 1.5239044281877645, + "learning_rate": 1.763971500575899e-06, + "loss": 0.45652949810028076, + "step": 2275 + }, + { + "epoch": 0.5247262247838617, + "grad_norm": 1.1081315795212425, + "learning_rate": 1.7637254959665419e-06, + "loss": 0.4013681411743164, + "step": 2276 + }, + { + "epoch": 0.5249567723342939, + "grad_norm": 1.164103173902687, + "learning_rate": 1.763479380395815e-06, + "loss": 0.5990947484970093, + "step": 2277 + }, + { + "epoch": 0.5251873198847262, + "grad_norm": 1.1449594131860659, + "learning_rate": 1.7632331538994768e-06, + "loss": 0.5234252214431763, + "step": 2278 + }, + { + "epoch": 0.5254178674351585, + "grad_norm": 1.0795592791106945, + "learning_rate": 1.7629868165133006e-06, + "loss": 0.39725440740585327, + "step": 2279 + }, + { + "epoch": 0.5256484149855908, + "grad_norm": 1.16263901682717, + "learning_rate": 1.7627403682730777e-06, + "loss": 0.5516578555107117, + "step": 2280 + }, + { + "epoch": 0.5258789625360231, + "grad_norm": 1.1085422332556214, + "learning_rate": 1.7624938092146135e-06, + "loss": 0.526121199131012, + "step": 2281 + }, + { + "epoch": 0.5261095100864553, + "grad_norm": 1.120052507754705, + "learning_rate": 1.762247139373731e-06, + "loss": 0.4965516924858093, + "step": 2282 + }, + { + "epoch": 0.5263400576368876, + "grad_norm": 1.1307486794153496, + "learning_rate": 1.7620003587862686e-06, + "loss": 0.42001771926879883, + "step": 2283 + }, + { + "epoch": 0.5265706051873199, + "grad_norm": 1.5691877841787754, + "learning_rate": 1.7617534674880812e-06, + "loss": 0.5619962215423584, + "step": 2284 + }, + { + "epoch": 0.5268011527377522, + "grad_norm": 1.2189639043372158, + "learning_rate": 1.761506465515039e-06, + "loss": 0.40275585651397705, + "step": 2285 + }, + { + "epoch": 0.5270317002881845, + "grad_norm": 1.247569513691176, + "learning_rate": 1.7612593529030298e-06, + "loss": 0.4621019959449768, + "step": 2286 + }, + { + "epoch": 0.5272622478386167, + "grad_norm": 1.1749490638414628, + "learning_rate": 1.7610121296879553e-06, + "loss": 0.4977012276649475, + "step": 2287 + }, + { + "epoch": 0.527492795389049, + "grad_norm": 1.1555374356489423, + "learning_rate": 1.7607647959057351e-06, + "loss": 0.524575412273407, + "step": 2288 + }, + { + "epoch": 0.5277233429394813, + "grad_norm": 1.4739921891246035, + "learning_rate": 1.7605173515923042e-06, + "loss": 0.49018362164497375, + "step": 2289 + }, + { + "epoch": 0.5279538904899136, + "grad_norm": 1.1265063866344376, + "learning_rate": 1.7602697967836135e-06, + "loss": 0.5055255889892578, + "step": 2290 + }, + { + "epoch": 0.5281844380403459, + "grad_norm": 1.3537014805587113, + "learning_rate": 1.7600221315156305e-06, + "loss": 0.48760661482810974, + "step": 2291 + }, + { + "epoch": 0.5284149855907782, + "grad_norm": 1.1150714705179763, + "learning_rate": 1.7597743558243379e-06, + "loss": 0.45736271142959595, + "step": 2292 + }, + { + "epoch": 0.5286455331412103, + "grad_norm": 1.1364237403755952, + "learning_rate": 1.7595264697457353e-06, + "loss": 0.5347146391868591, + "step": 2293 + }, + { + "epoch": 0.5288760806916426, + "grad_norm": 0.9862978081391702, + "learning_rate": 1.7592784733158376e-06, + "loss": 0.464704692363739, + "step": 2294 + }, + { + "epoch": 0.5291066282420749, + "grad_norm": 1.2267665080220351, + "learning_rate": 1.7590303665706766e-06, + "loss": 0.5551707744598389, + "step": 2295 + }, + { + "epoch": 0.5293371757925072, + "grad_norm": 1.1513699485458475, + "learning_rate": 1.7587821495462993e-06, + "loss": 0.5731069445610046, + "step": 2296 + }, + { + "epoch": 0.5295677233429394, + "grad_norm": 1.092210739341485, + "learning_rate": 1.7585338222787694e-06, + "loss": 0.5296573638916016, + "step": 2297 + }, + { + "epoch": 0.5297982708933717, + "grad_norm": 1.7893810554454528, + "learning_rate": 1.7582853848041658e-06, + "loss": 0.5088247060775757, + "step": 2298 + }, + { + "epoch": 0.530028818443804, + "grad_norm": 1.3042429607129178, + "learning_rate": 1.7580368371585839e-06, + "loss": 0.49944454431533813, + "step": 2299 + }, + { + "epoch": 0.5302593659942363, + "grad_norm": 1.1556706324591968, + "learning_rate": 1.7577881793781355e-06, + "loss": 0.527984619140625, + "step": 2300 + }, + { + "epoch": 0.5304899135446686, + "grad_norm": 1.2825366591563032, + "learning_rate": 1.7575394114989475e-06, + "loss": 0.5389485955238342, + "step": 2301 + }, + { + "epoch": 0.5307204610951008, + "grad_norm": 1.1681706665417333, + "learning_rate": 1.7572905335571634e-06, + "loss": 0.531416654586792, + "step": 2302 + }, + { + "epoch": 0.5309510086455331, + "grad_norm": 0.9682807419290672, + "learning_rate": 1.7570415455889434e-06, + "loss": 0.5177662372589111, + "step": 2303 + }, + { + "epoch": 0.5311815561959654, + "grad_norm": 1.243285510489316, + "learning_rate": 1.7567924476304613e-06, + "loss": 0.5060547590255737, + "step": 2304 + }, + { + "epoch": 0.5314121037463977, + "grad_norm": 1.1616547133634478, + "learning_rate": 1.75654323971791e-06, + "loss": 0.5338248014450073, + "step": 2305 + }, + { + "epoch": 0.53164265129683, + "grad_norm": 1.515823497153389, + "learning_rate": 1.756293921887496e-06, + "loss": 0.5937076210975647, + "step": 2306 + }, + { + "epoch": 0.5318731988472623, + "grad_norm": 1.2866160645526843, + "learning_rate": 1.7560444941754424e-06, + "loss": 0.52833092212677, + "step": 2307 + }, + { + "epoch": 0.5321037463976945, + "grad_norm": 1.1769387885340463, + "learning_rate": 1.755794956617989e-06, + "loss": 0.4994063973426819, + "step": 2308 + }, + { + "epoch": 0.5323342939481268, + "grad_norm": 1.5436647070263716, + "learning_rate": 1.7555453092513908e-06, + "loss": 0.5200421214103699, + "step": 2309 + }, + { + "epoch": 0.5325648414985591, + "grad_norm": 1.1745455381351673, + "learning_rate": 1.755295552111919e-06, + "loss": 0.5218993425369263, + "step": 2310 + }, + { + "epoch": 0.5327953890489914, + "grad_norm": 1.1504625770334531, + "learning_rate": 1.7550456852358603e-06, + "loss": 0.4781727194786072, + "step": 2311 + }, + { + "epoch": 0.5330259365994237, + "grad_norm": 1.3403464072628806, + "learning_rate": 1.7547957086595187e-06, + "loss": 0.5551970601081848, + "step": 2312 + }, + { + "epoch": 0.5332564841498559, + "grad_norm": 1.297723482722601, + "learning_rate": 1.7545456224192123e-06, + "loss": 0.5485920906066895, + "step": 2313 + }, + { + "epoch": 0.5334870317002882, + "grad_norm": 1.1296888028982712, + "learning_rate": 1.7542954265512764e-06, + "loss": 0.4536609649658203, + "step": 2314 + }, + { + "epoch": 0.5337175792507205, + "grad_norm": 1.0439097273883267, + "learning_rate": 1.7540451210920616e-06, + "loss": 0.5044888257980347, + "step": 2315 + }, + { + "epoch": 0.5339481268011528, + "grad_norm": 1.3932004795894137, + "learning_rate": 1.753794706077935e-06, + "loss": 0.5383297801017761, + "step": 2316 + }, + { + "epoch": 0.5341786743515851, + "grad_norm": 1.196782948603355, + "learning_rate": 1.7535441815452792e-06, + "loss": 0.5694228410720825, + "step": 2317 + }, + { + "epoch": 0.5344092219020173, + "grad_norm": 0.9575339089400295, + "learning_rate": 1.753293547530493e-06, + "loss": 0.49534568190574646, + "step": 2318 + }, + { + "epoch": 0.5346397694524496, + "grad_norm": 1.2393768694985452, + "learning_rate": 1.7530428040699904e-06, + "loss": 0.5388910174369812, + "step": 2319 + }, + { + "epoch": 0.5348703170028818, + "grad_norm": 1.1881701575449917, + "learning_rate": 1.7527919512002023e-06, + "loss": 0.4393211603164673, + "step": 2320 + }, + { + "epoch": 0.5351008645533141, + "grad_norm": 1.3054845336595484, + "learning_rate": 1.7525409889575745e-06, + "loss": 0.6011124849319458, + "step": 2321 + }, + { + "epoch": 0.5353314121037464, + "grad_norm": 1.274741444961471, + "learning_rate": 1.7522899173785696e-06, + "loss": 0.46487540006637573, + "step": 2322 + }, + { + "epoch": 0.5355619596541786, + "grad_norm": 1.1421177967152112, + "learning_rate": 1.752038736499666e-06, + "loss": 0.4753795564174652, + "step": 2323 + }, + { + "epoch": 0.5357925072046109, + "grad_norm": 1.2683556519471963, + "learning_rate": 1.7517874463573572e-06, + "loss": 0.5985465049743652, + "step": 2324 + }, + { + "epoch": 0.5360230547550432, + "grad_norm": 1.056507312381586, + "learning_rate": 1.751536046988153e-06, + "loss": 0.4543229043483734, + "step": 2325 + }, + { + "epoch": 0.5362536023054755, + "grad_norm": 1.2118204577653935, + "learning_rate": 1.7512845384285793e-06, + "loss": 0.6149678230285645, + "step": 2326 + }, + { + "epoch": 0.5364841498559078, + "grad_norm": 1.0689041198632254, + "learning_rate": 1.7510329207151775e-06, + "loss": 0.5103700160980225, + "step": 2327 + }, + { + "epoch": 0.53671469740634, + "grad_norm": 1.1235720243428842, + "learning_rate": 1.7507811938845052e-06, + "loss": 0.4708825945854187, + "step": 2328 + }, + { + "epoch": 0.5369452449567723, + "grad_norm": 0.975325384779532, + "learning_rate": 1.7505293579731357e-06, + "loss": 0.5039220452308655, + "step": 2329 + }, + { + "epoch": 0.5371757925072046, + "grad_norm": 1.1841254181875327, + "learning_rate": 1.7502774130176582e-06, + "loss": 0.5144309997558594, + "step": 2330 + }, + { + "epoch": 0.5374063400576369, + "grad_norm": 1.3039622953440149, + "learning_rate": 1.7500253590546774e-06, + "loss": 0.41529229283332825, + "step": 2331 + }, + { + "epoch": 0.5376368876080692, + "grad_norm": 1.165707977395041, + "learning_rate": 1.7497731961208144e-06, + "loss": 0.45477360486984253, + "step": 2332 + }, + { + "epoch": 0.5378674351585014, + "grad_norm": 1.2847866373142411, + "learning_rate": 1.7495209242527057e-06, + "loss": 0.5498300790786743, + "step": 2333 + }, + { + "epoch": 0.5380979827089337, + "grad_norm": 1.0807005264316651, + "learning_rate": 1.7492685434870036e-06, + "loss": 0.4764532446861267, + "step": 2334 + }, + { + "epoch": 0.538328530259366, + "grad_norm": 1.2969878344335828, + "learning_rate": 1.7490160538603771e-06, + "loss": 0.4899054765701294, + "step": 2335 + }, + { + "epoch": 0.5385590778097983, + "grad_norm": 1.2821827405222348, + "learning_rate": 1.7487634554095095e-06, + "loss": 0.5095956921577454, + "step": 2336 + }, + { + "epoch": 0.5387896253602306, + "grad_norm": 1.212075947407855, + "learning_rate": 1.748510748171101e-06, + "loss": 0.5495618581771851, + "step": 2337 + }, + { + "epoch": 0.5390201729106628, + "grad_norm": 1.0328701753125462, + "learning_rate": 1.7482579321818676e-06, + "loss": 0.4582991898059845, + "step": 2338 + }, + { + "epoch": 0.5392507204610951, + "grad_norm": 1.3477998717170885, + "learning_rate": 1.7480050074785405e-06, + "loss": 0.47231215238571167, + "step": 2339 + }, + { + "epoch": 0.5394812680115274, + "grad_norm": 1.3032973137322832, + "learning_rate": 1.7477519740978673e-06, + "loss": 0.4859996736049652, + "step": 2340 + }, + { + "epoch": 0.5397118155619597, + "grad_norm": 1.293149430738758, + "learning_rate": 1.747498832076611e-06, + "loss": 0.5070324540138245, + "step": 2341 + }, + { + "epoch": 0.539942363112392, + "grad_norm": 1.2383400794741863, + "learning_rate": 1.7472455814515501e-06, + "loss": 0.5261486768722534, + "step": 2342 + }, + { + "epoch": 0.5401729106628242, + "grad_norm": 1.3106600806025066, + "learning_rate": 1.74699222225948e-06, + "loss": 0.5616657733917236, + "step": 2343 + }, + { + "epoch": 0.5404034582132565, + "grad_norm": 1.154027686559668, + "learning_rate": 1.7467387545372104e-06, + "loss": 0.5504746437072754, + "step": 2344 + }, + { + "epoch": 0.5406340057636888, + "grad_norm": 1.1026580407179722, + "learning_rate": 1.7464851783215684e-06, + "loss": 0.5194835662841797, + "step": 2345 + }, + { + "epoch": 0.5408645533141211, + "grad_norm": 1.3293115562160591, + "learning_rate": 1.7462314936493953e-06, + "loss": 0.522796094417572, + "step": 2346 + }, + { + "epoch": 0.5410951008645534, + "grad_norm": 1.3369471744938577, + "learning_rate": 1.7459777005575492e-06, + "loss": 0.4633204936981201, + "step": 2347 + }, + { + "epoch": 0.5413256484149855, + "grad_norm": 1.2486516319766208, + "learning_rate": 1.7457237990829033e-06, + "loss": 0.4521179795265198, + "step": 2348 + }, + { + "epoch": 0.5415561959654178, + "grad_norm": 1.6780170315313812, + "learning_rate": 1.7454697892623471e-06, + "loss": 0.678740918636322, + "step": 2349 + }, + { + "epoch": 0.5417867435158501, + "grad_norm": 1.2192388332823851, + "learning_rate": 1.7452156711327854e-06, + "loss": 0.4683062732219696, + "step": 2350 + }, + { + "epoch": 0.5420172910662824, + "grad_norm": 0.9552718931480573, + "learning_rate": 1.7449614447311393e-06, + "loss": 0.43280357122421265, + "step": 2351 + }, + { + "epoch": 0.5422478386167147, + "grad_norm": 1.1384909744897507, + "learning_rate": 1.7447071100943446e-06, + "loss": 0.4849068522453308, + "step": 2352 + }, + { + "epoch": 0.5424783861671469, + "grad_norm": 1.2382586073876527, + "learning_rate": 1.744452667259354e-06, + "loss": 0.5404252409934998, + "step": 2353 + }, + { + "epoch": 0.5427089337175792, + "grad_norm": 1.14218337171458, + "learning_rate": 1.7441981162631353e-06, + "loss": 0.41409242153167725, + "step": 2354 + }, + { + "epoch": 0.5429394812680115, + "grad_norm": 1.2694326083074758, + "learning_rate": 1.7439434571426718e-06, + "loss": 0.549047589302063, + "step": 2355 + }, + { + "epoch": 0.5431700288184438, + "grad_norm": 1.7272228580432107, + "learning_rate": 1.7436886899349635e-06, + "loss": 0.6400755643844604, + "step": 2356 + }, + { + "epoch": 0.5434005763688761, + "grad_norm": 1.2277192228486493, + "learning_rate": 1.7434338146770247e-06, + "loss": 0.46230536699295044, + "step": 2357 + }, + { + "epoch": 0.5436311239193083, + "grad_norm": 1.3530870479148573, + "learning_rate": 1.7431788314058862e-06, + "loss": 0.4629567861557007, + "step": 2358 + }, + { + "epoch": 0.5438616714697406, + "grad_norm": 1.0772570900190532, + "learning_rate": 1.742923740158595e-06, + "loss": 0.4405868649482727, + "step": 2359 + }, + { + "epoch": 0.5440922190201729, + "grad_norm": 1.2150946309984234, + "learning_rate": 1.7426685409722128e-06, + "loss": 0.4655589163303375, + "step": 2360 + }, + { + "epoch": 0.5443227665706052, + "grad_norm": 1.2675240389340952, + "learning_rate": 1.7424132338838171e-06, + "loss": 0.4461101293563843, + "step": 2361 + }, + { + "epoch": 0.5445533141210375, + "grad_norm": 1.5854266917259596, + "learning_rate": 1.7421578189305022e-06, + "loss": 0.4191123843193054, + "step": 2362 + }, + { + "epoch": 0.5447838616714697, + "grad_norm": 1.6758559799234898, + "learning_rate": 1.741902296149376e-06, + "loss": 0.5072780251502991, + "step": 2363 + }, + { + "epoch": 0.545014409221902, + "grad_norm": 1.2601130548302264, + "learning_rate": 1.7416466655775648e-06, + "loss": 0.5171830654144287, + "step": 2364 + }, + { + "epoch": 0.5452449567723343, + "grad_norm": 1.2783697742533648, + "learning_rate": 1.7413909272522079e-06, + "loss": 0.5393522381782532, + "step": 2365 + }, + { + "epoch": 0.5454755043227666, + "grad_norm": 1.5053510484478103, + "learning_rate": 1.7411350812104617e-06, + "loss": 0.5618822574615479, + "step": 2366 + }, + { + "epoch": 0.5457060518731989, + "grad_norm": 1.1848495900457634, + "learning_rate": 1.7408791274894986e-06, + "loss": 0.5396535992622375, + "step": 2367 + }, + { + "epoch": 0.5459365994236312, + "grad_norm": 1.5782103314859273, + "learning_rate": 1.740623066126505e-06, + "loss": 0.6367689967155457, + "step": 2368 + }, + { + "epoch": 0.5461671469740634, + "grad_norm": 1.0855916632567553, + "learning_rate": 1.7403668971586844e-06, + "loss": 0.47266045212745667, + "step": 2369 + }, + { + "epoch": 0.5463976945244957, + "grad_norm": 1.2728774961386682, + "learning_rate": 1.7401106206232558e-06, + "loss": 0.5436207056045532, + "step": 2370 + }, + { + "epoch": 0.546628242074928, + "grad_norm": 1.1570761720253744, + "learning_rate": 1.7398542365574532e-06, + "loss": 0.47154513001441956, + "step": 2371 + }, + { + "epoch": 0.5468587896253603, + "grad_norm": 1.3437856079692005, + "learning_rate": 1.7395977449985264e-06, + "loss": 0.5550810098648071, + "step": 2372 + }, + { + "epoch": 0.5470893371757926, + "grad_norm": 1.4928115947596696, + "learning_rate": 1.7393411459837414e-06, + "loss": 0.5480188131332397, + "step": 2373 + }, + { + "epoch": 0.5473198847262248, + "grad_norm": 1.1868713323946698, + "learning_rate": 1.7390844395503787e-06, + "loss": 0.5999584197998047, + "step": 2374 + }, + { + "epoch": 0.547550432276657, + "grad_norm": 1.2883633347308001, + "learning_rate": 1.7388276257357357e-06, + "loss": 0.5834146738052368, + "step": 2375 + }, + { + "epoch": 0.5477809798270893, + "grad_norm": 1.2072211348002257, + "learning_rate": 1.7385707045771248e-06, + "loss": 0.5123052597045898, + "step": 2376 + }, + { + "epoch": 0.5480115273775216, + "grad_norm": 1.2449367994236626, + "learning_rate": 1.7383136761118734e-06, + "loss": 0.5500702857971191, + "step": 2377 + }, + { + "epoch": 0.5482420749279538, + "grad_norm": 1.2112346356392183, + "learning_rate": 1.7380565403773255e-06, + "loss": 0.4849172830581665, + "step": 2378 + }, + { + "epoch": 0.5484726224783861, + "grad_norm": 1.1806595721818476, + "learning_rate": 1.7377992974108402e-06, + "loss": 0.5452470779418945, + "step": 2379 + }, + { + "epoch": 0.5487031700288184, + "grad_norm": 1.093817020961923, + "learning_rate": 1.737541947249792e-06, + "loss": 0.4986187219619751, + "step": 2380 + }, + { + "epoch": 0.5489337175792507, + "grad_norm": 1.5132521321219672, + "learning_rate": 1.7372844899315715e-06, + "loss": 0.5750565528869629, + "step": 2381 + }, + { + "epoch": 0.549164265129683, + "grad_norm": 1.5433696305301123, + "learning_rate": 1.7370269254935843e-06, + "loss": 0.45188283920288086, + "step": 2382 + }, + { + "epoch": 0.5493948126801153, + "grad_norm": 1.3782935189362007, + "learning_rate": 1.7367692539732519e-06, + "loss": 0.4593166708946228, + "step": 2383 + }, + { + "epoch": 0.5496253602305475, + "grad_norm": 1.0865107387475765, + "learning_rate": 1.7365114754080118e-06, + "loss": 0.4712238013744354, + "step": 2384 + }, + { + "epoch": 0.5498559077809798, + "grad_norm": 1.2693708537220825, + "learning_rate": 1.7362535898353156e-06, + "loss": 0.5285966396331787, + "step": 2385 + }, + { + "epoch": 0.5500864553314121, + "grad_norm": 1.0580449420809215, + "learning_rate": 1.735995597292632e-06, + "loss": 0.5930126905441284, + "step": 2386 + }, + { + "epoch": 0.5503170028818444, + "grad_norm": 1.3211740236059126, + "learning_rate": 1.7357374978174447e-06, + "loss": 0.48222798109054565, + "step": 2387 + }, + { + "epoch": 0.5505475504322767, + "grad_norm": 1.0661809221331433, + "learning_rate": 1.7354792914472528e-06, + "loss": 0.463559627532959, + "step": 2388 + }, + { + "epoch": 0.5507780979827089, + "grad_norm": 1.2928471066929086, + "learning_rate": 1.7352209782195706e-06, + "loss": 0.5738880038261414, + "step": 2389 + }, + { + "epoch": 0.5510086455331412, + "grad_norm": 1.168260381089932, + "learning_rate": 1.7349625581719285e-06, + "loss": 0.4638671278953552, + "step": 2390 + }, + { + "epoch": 0.5512391930835735, + "grad_norm": 1.3236614245482354, + "learning_rate": 1.7347040313418729e-06, + "loss": 0.5681020617485046, + "step": 2391 + }, + { + "epoch": 0.5514697406340058, + "grad_norm": 1.1347196291887822, + "learning_rate": 1.7344453977669639e-06, + "loss": 0.49352413415908813, + "step": 2392 + }, + { + "epoch": 0.5517002881844381, + "grad_norm": 1.1909845997462791, + "learning_rate": 1.7341866574847792e-06, + "loss": 0.4587385058403015, + "step": 2393 + }, + { + "epoch": 0.5519308357348703, + "grad_norm": 1.2519226107096215, + "learning_rate": 1.733927810532911e-06, + "loss": 0.5855783224105835, + "step": 2394 + }, + { + "epoch": 0.5521613832853026, + "grad_norm": 1.3059494948390526, + "learning_rate": 1.7336688569489663e-06, + "loss": 0.5600197315216064, + "step": 2395 + }, + { + "epoch": 0.5523919308357349, + "grad_norm": 1.1744558919405392, + "learning_rate": 1.7334097967705691e-06, + "loss": 0.5392236709594727, + "step": 2396 + }, + { + "epoch": 0.5526224783861672, + "grad_norm": 1.4517486170338612, + "learning_rate": 1.7331506300353582e-06, + "loss": 0.4800894260406494, + "step": 2397 + }, + { + "epoch": 0.5528530259365995, + "grad_norm": 1.178372280796007, + "learning_rate": 1.7328913567809874e-06, + "loss": 0.4818227291107178, + "step": 2398 + }, + { + "epoch": 0.5530835734870317, + "grad_norm": 1.7548710225897248, + "learning_rate": 1.7326319770451263e-06, + "loss": 0.513884425163269, + "step": 2399 + }, + { + "epoch": 0.553314121037464, + "grad_norm": 1.126782436647883, + "learning_rate": 1.7323724908654607e-06, + "loss": 0.4948759078979492, + "step": 2400 + }, + { + "epoch": 0.5535446685878963, + "grad_norm": 1.0554552263799144, + "learning_rate": 1.7321128982796908e-06, + "loss": 0.492409884929657, + "step": 2401 + }, + { + "epoch": 0.5537752161383286, + "grad_norm": 1.2491555996131936, + "learning_rate": 1.7318531993255328e-06, + "loss": 0.5249980688095093, + "step": 2402 + }, + { + "epoch": 0.5540057636887608, + "grad_norm": 1.1270035201738609, + "learning_rate": 1.7315933940407184e-06, + "loss": 0.5396232604980469, + "step": 2403 + }, + { + "epoch": 0.554236311239193, + "grad_norm": 0.9723547393116351, + "learning_rate": 1.731333482462994e-06, + "loss": 0.5420812368392944, + "step": 2404 + }, + { + "epoch": 0.5544668587896253, + "grad_norm": 1.2855153094106506, + "learning_rate": 1.731073464630123e-06, + "loss": 0.5918469429016113, + "step": 2405 + }, + { + "epoch": 0.5546974063400576, + "grad_norm": 1.3464941159444543, + "learning_rate": 1.7308133405798825e-06, + "loss": 0.5120134353637695, + "step": 2406 + }, + { + "epoch": 0.5549279538904899, + "grad_norm": 1.0375398976716819, + "learning_rate": 1.7305531103500664e-06, + "loss": 0.5390212535858154, + "step": 2407 + }, + { + "epoch": 0.5551585014409222, + "grad_norm": 1.316770240861726, + "learning_rate": 1.7302927739784828e-06, + "loss": 0.524927020072937, + "step": 2408 + }, + { + "epoch": 0.5553890489913544, + "grad_norm": 1.265948132616221, + "learning_rate": 1.7300323315029563e-06, + "loss": 0.5254822969436646, + "step": 2409 + }, + { + "epoch": 0.5556195965417867, + "grad_norm": 1.2631241917657168, + "learning_rate": 1.7297717829613268e-06, + "loss": 0.4715406000614166, + "step": 2410 + }, + { + "epoch": 0.555850144092219, + "grad_norm": 1.1354586776799696, + "learning_rate": 1.7295111283914485e-06, + "loss": 0.5462017059326172, + "step": 2411 + }, + { + "epoch": 0.5560806916426513, + "grad_norm": 1.3087280521714544, + "learning_rate": 1.7292503678311923e-06, + "loss": 0.5210998058319092, + "step": 2412 + }, + { + "epoch": 0.5563112391930836, + "grad_norm": 1.3244585954422035, + "learning_rate": 1.728989501318444e-06, + "loss": 0.6012462377548218, + "step": 2413 + }, + { + "epoch": 0.5565417867435158, + "grad_norm": 1.2235365774596485, + "learning_rate": 1.7287285288911045e-06, + "loss": 0.5233356952667236, + "step": 2414 + }, + { + "epoch": 0.5567723342939481, + "grad_norm": 1.179341457042005, + "learning_rate": 1.7284674505870907e-06, + "loss": 0.5262124538421631, + "step": 2415 + }, + { + "epoch": 0.5570028818443804, + "grad_norm": 1.200909446705809, + "learning_rate": 1.728206266444334e-06, + "loss": 0.5770957469940186, + "step": 2416 + }, + { + "epoch": 0.5572334293948127, + "grad_norm": 1.6431811554350846, + "learning_rate": 1.7279449765007827e-06, + "loss": 0.4863049387931824, + "step": 2417 + }, + { + "epoch": 0.557463976945245, + "grad_norm": 1.0434342032855735, + "learning_rate": 1.727683580794399e-06, + "loss": 0.4981330633163452, + "step": 2418 + }, + { + "epoch": 0.5576945244956772, + "grad_norm": 1.5074776608800005, + "learning_rate": 1.7274220793631606e-06, + "loss": 0.4425088167190552, + "step": 2419 + }, + { + "epoch": 0.5579250720461095, + "grad_norm": 1.1366341564315194, + "learning_rate": 1.7271604722450616e-06, + "loss": 0.44763046503067017, + "step": 2420 + }, + { + "epoch": 0.5581556195965418, + "grad_norm": 1.2585978898519974, + "learning_rate": 1.7268987594781103e-06, + "loss": 0.561431348323822, + "step": 2421 + }, + { + "epoch": 0.5583861671469741, + "grad_norm": 1.3718566333183382, + "learning_rate": 1.7266369411003312e-06, + "loss": 0.43858832120895386, + "step": 2422 + }, + { + "epoch": 0.5586167146974064, + "grad_norm": 1.2234114143834152, + "learning_rate": 1.7263750171497633e-06, + "loss": 0.46294379234313965, + "step": 2423 + }, + { + "epoch": 0.5588472622478386, + "grad_norm": 1.2068012948681028, + "learning_rate": 1.7261129876644623e-06, + "loss": 0.43397650122642517, + "step": 2424 + }, + { + "epoch": 0.5590778097982709, + "grad_norm": 1.1704200234778626, + "learning_rate": 1.7258508526824973e-06, + "loss": 0.5309501886367798, + "step": 2425 + }, + { + "epoch": 0.5593083573487032, + "grad_norm": 1.0071111236432664, + "learning_rate": 1.7255886122419544e-06, + "loss": 0.505167543888092, + "step": 2426 + }, + { + "epoch": 0.5595389048991355, + "grad_norm": 1.2356906230131073, + "learning_rate": 1.725326266380934e-06, + "loss": 0.5945361256599426, + "step": 2427 + }, + { + "epoch": 0.5597694524495678, + "grad_norm": 1.1822616583201517, + "learning_rate": 1.725063815137553e-06, + "loss": 0.4658198952674866, + "step": 2428 + }, + { + "epoch": 0.56, + "grad_norm": 0.9726608754536225, + "learning_rate": 1.7248012585499422e-06, + "loss": 0.4013107419013977, + "step": 2429 + }, + { + "epoch": 0.5602305475504322, + "grad_norm": 1.2094159491990593, + "learning_rate": 1.7245385966562485e-06, + "loss": 0.47318965196609497, + "step": 2430 + }, + { + "epoch": 0.5604610951008645, + "grad_norm": 1.0448285007897542, + "learning_rate": 1.7242758294946338e-06, + "loss": 0.5096567273139954, + "step": 2431 + }, + { + "epoch": 0.5606916426512968, + "grad_norm": 1.3183204430020672, + "learning_rate": 1.7240129571032758e-06, + "loss": 0.6132520437240601, + "step": 2432 + }, + { + "epoch": 0.5609221902017291, + "grad_norm": 1.20612372705674, + "learning_rate": 1.7237499795203665e-06, + "loss": 0.5368523597717285, + "step": 2433 + }, + { + "epoch": 0.5611527377521613, + "grad_norm": 1.156268037467991, + "learning_rate": 1.7234868967841143e-06, + "loss": 0.48554062843322754, + "step": 2434 + }, + { + "epoch": 0.5613832853025936, + "grad_norm": 1.0256198870418538, + "learning_rate": 1.7232237089327426e-06, + "loss": 0.5702558755874634, + "step": 2435 + }, + { + "epoch": 0.5616138328530259, + "grad_norm": 1.3174974585828079, + "learning_rate": 1.7229604160044893e-06, + "loss": 0.5398519039154053, + "step": 2436 + }, + { + "epoch": 0.5618443804034582, + "grad_norm": 1.1225841092153466, + "learning_rate": 1.7226970180376083e-06, + "loss": 0.502672553062439, + "step": 2437 + }, + { + "epoch": 0.5620749279538905, + "grad_norm": 1.3348084140762158, + "learning_rate": 1.7224335150703691e-06, + "loss": 0.5490902662277222, + "step": 2438 + }, + { + "epoch": 0.5623054755043227, + "grad_norm": 1.2252956936987631, + "learning_rate": 1.722169907141055e-06, + "loss": 0.5142146348953247, + "step": 2439 + }, + { + "epoch": 0.562536023054755, + "grad_norm": 1.1994402774290347, + "learning_rate": 1.721906194287966e-06, + "loss": 0.4676271677017212, + "step": 2440 + }, + { + "epoch": 0.5627665706051873, + "grad_norm": 1.2483659723793379, + "learning_rate": 1.721642376549417e-06, + "loss": 0.4180489182472229, + "step": 2441 + }, + { + "epoch": 0.5629971181556196, + "grad_norm": 1.2264187858798972, + "learning_rate": 1.7213784539637378e-06, + "loss": 0.45822733640670776, + "step": 2442 + }, + { + "epoch": 0.5632276657060519, + "grad_norm": 0.9474625129952845, + "learning_rate": 1.7211144265692736e-06, + "loss": 0.46978574991226196, + "step": 2443 + }, + { + "epoch": 0.5634582132564842, + "grad_norm": 1.2092008884546377, + "learning_rate": 1.7208502944043846e-06, + "loss": 0.5099056363105774, + "step": 2444 + }, + { + "epoch": 0.5636887608069164, + "grad_norm": 1.4043049353060089, + "learning_rate": 1.7205860575074467e-06, + "loss": 0.5157277584075928, + "step": 2445 + }, + { + "epoch": 0.5639193083573487, + "grad_norm": 1.4140179513751376, + "learning_rate": 1.7203217159168509e-06, + "loss": 0.5684963464736938, + "step": 2446 + }, + { + "epoch": 0.564149855907781, + "grad_norm": 1.395481014428841, + "learning_rate": 1.7200572696710031e-06, + "loss": 0.5450068712234497, + "step": 2447 + }, + { + "epoch": 0.5643804034582133, + "grad_norm": 1.219230832097251, + "learning_rate": 1.7197927188083247e-06, + "loss": 0.487520694732666, + "step": 2448 + }, + { + "epoch": 0.5646109510086456, + "grad_norm": 1.0252448838780284, + "learning_rate": 1.719528063367252e-06, + "loss": 0.473537802696228, + "step": 2449 + }, + { + "epoch": 0.5648414985590778, + "grad_norm": 1.3599987921866794, + "learning_rate": 1.7192633033862366e-06, + "loss": 0.5973968505859375, + "step": 2450 + }, + { + "epoch": 0.5650720461095101, + "grad_norm": 1.192988798166061, + "learning_rate": 1.7189984389037463e-06, + "loss": 0.494625985622406, + "step": 2451 + }, + { + "epoch": 0.5653025936599424, + "grad_norm": 1.0679499637949388, + "learning_rate": 1.7187334699582616e-06, + "loss": 0.5459957122802734, + "step": 2452 + }, + { + "epoch": 0.5655331412103747, + "grad_norm": 1.1247915420976404, + "learning_rate": 1.718468396588281e-06, + "loss": 0.492592453956604, + "step": 2453 + }, + { + "epoch": 0.565763688760807, + "grad_norm": 1.2564768345784407, + "learning_rate": 1.7182032188323161e-06, + "loss": 0.5006682872772217, + "step": 2454 + }, + { + "epoch": 0.5659942363112392, + "grad_norm": 1.2050444012646155, + "learning_rate": 1.717937936728895e-06, + "loss": 0.4829084277153015, + "step": 2455 + }, + { + "epoch": 0.5662247838616715, + "grad_norm": 1.2953134882724824, + "learning_rate": 1.7176725503165606e-06, + "loss": 0.5307221412658691, + "step": 2456 + }, + { + "epoch": 0.5664553314121038, + "grad_norm": 1.4233139606988499, + "learning_rate": 1.7174070596338698e-06, + "loss": 0.381227046251297, + "step": 2457 + }, + { + "epoch": 0.566685878962536, + "grad_norm": 1.1760942170869182, + "learning_rate": 1.7171414647193966e-06, + "loss": 0.4984063506126404, + "step": 2458 + }, + { + "epoch": 0.5669164265129683, + "grad_norm": 1.1776079567311266, + "learning_rate": 1.716875765611729e-06, + "loss": 0.4236326813697815, + "step": 2459 + }, + { + "epoch": 0.5671469740634005, + "grad_norm": 1.1740922365288478, + "learning_rate": 1.7166099623494698e-06, + "loss": 0.5070061683654785, + "step": 2460 + }, + { + "epoch": 0.5673775216138328, + "grad_norm": 1.5114956621718905, + "learning_rate": 1.7163440549712382e-06, + "loss": 0.5020880699157715, + "step": 2461 + }, + { + "epoch": 0.5676080691642651, + "grad_norm": 1.1096400864282738, + "learning_rate": 1.7160780435156674e-06, + "loss": 0.5303773283958435, + "step": 2462 + }, + { + "epoch": 0.5678386167146974, + "grad_norm": 1.494282249895922, + "learning_rate": 1.7158119280214058e-06, + "loss": 0.47430795431137085, + "step": 2463 + }, + { + "epoch": 0.5680691642651297, + "grad_norm": 2.011684138207683, + "learning_rate": 1.7155457085271174e-06, + "loss": 0.49290892481803894, + "step": 2464 + }, + { + "epoch": 0.5682997118155619, + "grad_norm": 1.185573295335424, + "learning_rate": 1.7152793850714817e-06, + "loss": 0.5034571290016174, + "step": 2465 + }, + { + "epoch": 0.5685302593659942, + "grad_norm": 1.3815592270043073, + "learning_rate": 1.715012957693192e-06, + "loss": 0.4832080602645874, + "step": 2466 + }, + { + "epoch": 0.5687608069164265, + "grad_norm": 1.2151204568168548, + "learning_rate": 1.7147464264309576e-06, + "loss": 0.5464382171630859, + "step": 2467 + }, + { + "epoch": 0.5689913544668588, + "grad_norm": 1.1492964258847325, + "learning_rate": 1.714479791323503e-06, + "loss": 0.50137859582901, + "step": 2468 + }, + { + "epoch": 0.5692219020172911, + "grad_norm": 1.157123770105093, + "learning_rate": 1.714213052409567e-06, + "loss": 0.4540822505950928, + "step": 2469 + }, + { + "epoch": 0.5694524495677233, + "grad_norm": 1.2490601252140423, + "learning_rate": 1.7139462097279046e-06, + "loss": 0.4347212016582489, + "step": 2470 + }, + { + "epoch": 0.5696829971181556, + "grad_norm": 1.3725663062255251, + "learning_rate": 1.7136792633172848e-06, + "loss": 0.5054244995117188, + "step": 2471 + }, + { + "epoch": 0.5699135446685879, + "grad_norm": 1.3145868829879277, + "learning_rate": 1.7134122132164922e-06, + "loss": 0.570202112197876, + "step": 2472 + }, + { + "epoch": 0.5701440922190202, + "grad_norm": 1.046325975638938, + "learning_rate": 1.7131450594643266e-06, + "loss": 0.39734238386154175, + "step": 2473 + }, + { + "epoch": 0.5703746397694525, + "grad_norm": 1.3305714790552408, + "learning_rate": 1.712877802099603e-06, + "loss": 0.5042159557342529, + "step": 2474 + }, + { + "epoch": 0.5706051873198847, + "grad_norm": 1.4450527868868759, + "learning_rate": 1.71261044116115e-06, + "loss": 0.5412349700927734, + "step": 2475 + }, + { + "epoch": 0.570835734870317, + "grad_norm": 1.2189812093120667, + "learning_rate": 1.7123429766878133e-06, + "loss": 0.5226187705993652, + "step": 2476 + }, + { + "epoch": 0.5710662824207493, + "grad_norm": 1.1260169170850247, + "learning_rate": 1.7120754087184523e-06, + "loss": 0.4852250814437866, + "step": 2477 + }, + { + "epoch": 0.5712968299711816, + "grad_norm": 1.190113614657056, + "learning_rate": 1.7118077372919425e-06, + "loss": 0.5083650350570679, + "step": 2478 + }, + { + "epoch": 0.5715273775216139, + "grad_norm": 1.3460854369133757, + "learning_rate": 1.7115399624471728e-06, + "loss": 0.5298900604248047, + "step": 2479 + }, + { + "epoch": 0.5717579250720461, + "grad_norm": 1.359750962222541, + "learning_rate": 1.7112720842230485e-06, + "loss": 0.5619887113571167, + "step": 2480 + }, + { + "epoch": 0.5719884726224784, + "grad_norm": 1.923177254565902, + "learning_rate": 1.7110041026584898e-06, + "loss": 0.5785295367240906, + "step": 2481 + }, + { + "epoch": 0.5722190201729107, + "grad_norm": 1.1169896209151449, + "learning_rate": 1.7107360177924312e-06, + "loss": 0.4424097537994385, + "step": 2482 + }, + { + "epoch": 0.572449567723343, + "grad_norm": 1.0927297402951583, + "learning_rate": 1.7104678296638234e-06, + "loss": 0.5585045218467712, + "step": 2483 + }, + { + "epoch": 0.5726801152737753, + "grad_norm": 1.1082744154066473, + "learning_rate": 1.7101995383116302e-06, + "loss": 0.4811630845069885, + "step": 2484 + }, + { + "epoch": 0.5729106628242074, + "grad_norm": 1.5640254807037144, + "learning_rate": 1.7099311437748322e-06, + "loss": 0.5382398366928101, + "step": 2485 + }, + { + "epoch": 0.5731412103746397, + "grad_norm": 1.256587442517617, + "learning_rate": 1.7096626460924246e-06, + "loss": 0.5314421653747559, + "step": 2486 + }, + { + "epoch": 0.573371757925072, + "grad_norm": 1.3291523627771589, + "learning_rate": 1.7093940453034167e-06, + "loss": 0.5254508256912231, + "step": 2487 + }, + { + "epoch": 0.5736023054755043, + "grad_norm": 1.1798528892424343, + "learning_rate": 1.7091253414468338e-06, + "loss": 0.5426524877548218, + "step": 2488 + }, + { + "epoch": 0.5738328530259366, + "grad_norm": 1.3436385596476037, + "learning_rate": 1.7088565345617156e-06, + "loss": 0.5764416456222534, + "step": 2489 + }, + { + "epoch": 0.5740634005763688, + "grad_norm": 1.036753645582628, + "learning_rate": 1.7085876246871172e-06, + "loss": 0.43790721893310547, + "step": 2490 + }, + { + "epoch": 0.5742939481268011, + "grad_norm": 1.0552814657492613, + "learning_rate": 1.7083186118621083e-06, + "loss": 0.5315482020378113, + "step": 2491 + }, + { + "epoch": 0.5745244956772334, + "grad_norm": 1.5762916246577796, + "learning_rate": 1.7080494961257731e-06, + "loss": 0.5694386959075928, + "step": 2492 + }, + { + "epoch": 0.5747550432276657, + "grad_norm": 1.2536007668458176, + "learning_rate": 1.7077802775172124e-06, + "loss": 0.5026420950889587, + "step": 2493 + }, + { + "epoch": 0.574985590778098, + "grad_norm": 1.3171524239513317, + "learning_rate": 1.70751095607554e-06, + "loss": 0.472505658864975, + "step": 2494 + }, + { + "epoch": 0.5752161383285302, + "grad_norm": 1.228300898794264, + "learning_rate": 1.7072415318398856e-06, + "loss": 0.4240390658378601, + "step": 2495 + }, + { + "epoch": 0.5754466858789625, + "grad_norm": 1.2252372715220645, + "learning_rate": 1.706972004849394e-06, + "loss": 0.46073201298713684, + "step": 2496 + }, + { + "epoch": 0.5756772334293948, + "grad_norm": 1.386636595307024, + "learning_rate": 1.7067023751432247e-06, + "loss": 0.5547488927841187, + "step": 2497 + }, + { + "epoch": 0.5759077809798271, + "grad_norm": 1.2036393936459033, + "learning_rate": 1.7064326427605523e-06, + "loss": 0.4080501198768616, + "step": 2498 + }, + { + "epoch": 0.5761383285302594, + "grad_norm": 1.288559046017793, + "learning_rate": 1.7061628077405653e-06, + "loss": 0.5036013126373291, + "step": 2499 + }, + { + "epoch": 0.5763688760806917, + "grad_norm": 1.2217187839100583, + "learning_rate": 1.7058928701224683e-06, + "loss": 0.43331170082092285, + "step": 2500 + }, + { + "epoch": 0.5765994236311239, + "grad_norm": 1.2639727491075898, + "learning_rate": 1.7056228299454808e-06, + "loss": 0.5010221004486084, + "step": 2501 + }, + { + "epoch": 0.5768299711815562, + "grad_norm": 1.3767543189150935, + "learning_rate": 1.7053526872488365e-06, + "loss": 0.4393835663795471, + "step": 2502 + }, + { + "epoch": 0.5770605187319885, + "grad_norm": 1.6106797071078789, + "learning_rate": 1.7050824420717844e-06, + "loss": 0.4924699068069458, + "step": 2503 + }, + { + "epoch": 0.5772910662824208, + "grad_norm": 1.4001771231988265, + "learning_rate": 1.7048120944535883e-06, + "loss": 0.5389400720596313, + "step": 2504 + }, + { + "epoch": 0.577521613832853, + "grad_norm": 0.9828526919896293, + "learning_rate": 1.7045416444335267e-06, + "loss": 0.4584382176399231, + "step": 2505 + }, + { + "epoch": 0.5777521613832853, + "grad_norm": 1.2872729332849036, + "learning_rate": 1.7042710920508936e-06, + "loss": 0.5079721808433533, + "step": 2506 + }, + { + "epoch": 0.5779827089337176, + "grad_norm": 1.0324776991211704, + "learning_rate": 1.7040004373449973e-06, + "loss": 0.4421960115432739, + "step": 2507 + }, + { + "epoch": 0.5782132564841499, + "grad_norm": 1.2349316962791468, + "learning_rate": 1.7037296803551607e-06, + "loss": 0.4270066022872925, + "step": 2508 + }, + { + "epoch": 0.5784438040345822, + "grad_norm": 1.1151044129570897, + "learning_rate": 1.7034588211207224e-06, + "loss": 0.49084147810935974, + "step": 2509 + }, + { + "epoch": 0.5786743515850145, + "grad_norm": 1.021345503807098, + "learning_rate": 1.7031878596810354e-06, + "loss": 0.3782140612602234, + "step": 2510 + }, + { + "epoch": 0.5789048991354467, + "grad_norm": 1.175168927743577, + "learning_rate": 1.7029167960754676e-06, + "loss": 0.5119669437408447, + "step": 2511 + }, + { + "epoch": 0.5791354466858789, + "grad_norm": 1.3563408269400135, + "learning_rate": 1.7026456303434013e-06, + "loss": 0.5158041715621948, + "step": 2512 + }, + { + "epoch": 0.5793659942363112, + "grad_norm": 1.16044866200114, + "learning_rate": 1.7023743625242346e-06, + "loss": 0.487191766500473, + "step": 2513 + }, + { + "epoch": 0.5795965417867435, + "grad_norm": 1.1434101258085176, + "learning_rate": 1.7021029926573798e-06, + "loss": 0.4553701877593994, + "step": 2514 + }, + { + "epoch": 0.5798270893371757, + "grad_norm": 1.4681723884009028, + "learning_rate": 1.7018315207822639e-06, + "loss": 0.6393533945083618, + "step": 2515 + }, + { + "epoch": 0.580057636887608, + "grad_norm": 1.280445367600244, + "learning_rate": 1.701559946938329e-06, + "loss": 0.502502977848053, + "step": 2516 + }, + { + "epoch": 0.5802881844380403, + "grad_norm": 1.2538970141245356, + "learning_rate": 1.7012882711650321e-06, + "loss": 0.44787830114364624, + "step": 2517 + }, + { + "epoch": 0.5805187319884726, + "grad_norm": 1.1418537417579828, + "learning_rate": 1.7010164935018445e-06, + "loss": 0.541442334651947, + "step": 2518 + }, + { + "epoch": 0.5807492795389049, + "grad_norm": 1.0922323335203685, + "learning_rate": 1.7007446139882533e-06, + "loss": 0.40795016288757324, + "step": 2519 + }, + { + "epoch": 0.5809798270893372, + "grad_norm": 1.2342785454648022, + "learning_rate": 1.700472632663759e-06, + "loss": 0.5261722207069397, + "step": 2520 + }, + { + "epoch": 0.5812103746397694, + "grad_norm": 1.219906251892028, + "learning_rate": 1.7002005495678782e-06, + "loss": 0.5007427930831909, + "step": 2521 + }, + { + "epoch": 0.5814409221902017, + "grad_norm": 1.1929415488360804, + "learning_rate": 1.6999283647401416e-06, + "loss": 0.4413378834724426, + "step": 2522 + }, + { + "epoch": 0.581671469740634, + "grad_norm": 1.1216903504380007, + "learning_rate": 1.6996560782200949e-06, + "loss": 0.545518159866333, + "step": 2523 + }, + { + "epoch": 0.5819020172910663, + "grad_norm": 1.5308783706292048, + "learning_rate": 1.6993836900472984e-06, + "loss": 0.5393378734588623, + "step": 2524 + }, + { + "epoch": 0.5821325648414986, + "grad_norm": 1.198481766250496, + "learning_rate": 1.6991112002613272e-06, + "loss": 0.5119227170944214, + "step": 2525 + }, + { + "epoch": 0.5823631123919308, + "grad_norm": 1.6691866786968823, + "learning_rate": 1.6988386089017714e-06, + "loss": 0.5835440158843994, + "step": 2526 + }, + { + "epoch": 0.5825936599423631, + "grad_norm": 1.2405582078403503, + "learning_rate": 1.6985659160082354e-06, + "loss": 0.46577557921409607, + "step": 2527 + }, + { + "epoch": 0.5828242074927954, + "grad_norm": 1.1157785797661819, + "learning_rate": 1.698293121620339e-06, + "loss": 0.4569145441055298, + "step": 2528 + }, + { + "epoch": 0.5830547550432277, + "grad_norm": 1.2049909838697974, + "learning_rate": 1.698020225777716e-06, + "loss": 0.5309783220291138, + "step": 2529 + }, + { + "epoch": 0.58328530259366, + "grad_norm": 1.120647300804938, + "learning_rate": 1.6977472285200158e-06, + "loss": 0.48042014241218567, + "step": 2530 + }, + { + "epoch": 0.5835158501440922, + "grad_norm": 1.1626136587389744, + "learning_rate": 1.697474129886902e-06, + "loss": 0.49191930890083313, + "step": 2531 + }, + { + "epoch": 0.5837463976945245, + "grad_norm": 1.394105681476053, + "learning_rate": 1.6972009299180528e-06, + "loss": 0.512083113193512, + "step": 2532 + }, + { + "epoch": 0.5839769452449568, + "grad_norm": 1.4049225353163461, + "learning_rate": 1.696927628653161e-06, + "loss": 0.3880317211151123, + "step": 2533 + }, + { + "epoch": 0.5842074927953891, + "grad_norm": 1.6757831983931575, + "learning_rate": 1.6966542261319345e-06, + "loss": 0.45285481214523315, + "step": 2534 + }, + { + "epoch": 0.5844380403458214, + "grad_norm": 1.0843645857738136, + "learning_rate": 1.6963807223940966e-06, + "loss": 0.515269935131073, + "step": 2535 + }, + { + "epoch": 0.5846685878962536, + "grad_norm": 1.2972770484450729, + "learning_rate": 1.696107117479384e-06, + "loss": 0.5730916261672974, + "step": 2536 + }, + { + "epoch": 0.5848991354466859, + "grad_norm": 1.1822238481147822, + "learning_rate": 1.6958334114275482e-06, + "loss": 0.4333222508430481, + "step": 2537 + }, + { + "epoch": 0.5851296829971182, + "grad_norm": 1.0623302480533618, + "learning_rate": 1.695559604278357e-06, + "loss": 0.4325833022594452, + "step": 2538 + }, + { + "epoch": 0.5853602305475505, + "grad_norm": 1.1199883883456008, + "learning_rate": 1.6952856960715907e-06, + "loss": 0.41645392775535583, + "step": 2539 + }, + { + "epoch": 0.5855907780979827, + "grad_norm": 1.257944303936855, + "learning_rate": 1.6950116868470458e-06, + "loss": 0.5943924188613892, + "step": 2540 + }, + { + "epoch": 0.5858213256484149, + "grad_norm": 1.4798490098031383, + "learning_rate": 1.6947375766445328e-06, + "loss": 0.5209153294563293, + "step": 2541 + }, + { + "epoch": 0.5860518731988472, + "grad_norm": 1.248783629040668, + "learning_rate": 1.694463365503877e-06, + "loss": 0.5074030160903931, + "step": 2542 + }, + { + "epoch": 0.5862824207492795, + "grad_norm": 1.5168507350170992, + "learning_rate": 1.6941890534649188e-06, + "loss": 0.5399416089057922, + "step": 2543 + }, + { + "epoch": 0.5865129682997118, + "grad_norm": 1.3533022502962022, + "learning_rate": 1.6939146405675127e-06, + "loss": 0.4862229824066162, + "step": 2544 + }, + { + "epoch": 0.5867435158501441, + "grad_norm": 1.4917228203054256, + "learning_rate": 1.6936401268515278e-06, + "loss": 0.5033354759216309, + "step": 2545 + }, + { + "epoch": 0.5869740634005763, + "grad_norm": 1.2265536167956566, + "learning_rate": 1.693365512356848e-06, + "loss": 0.3859539031982422, + "step": 2546 + }, + { + "epoch": 0.5872046109510086, + "grad_norm": 1.2430465412866725, + "learning_rate": 1.6930907971233726e-06, + "loss": 0.584037184715271, + "step": 2547 + }, + { + "epoch": 0.5874351585014409, + "grad_norm": 1.1967021755581315, + "learning_rate": 1.6928159811910144e-06, + "loss": 0.5054824352264404, + "step": 2548 + }, + { + "epoch": 0.5876657060518732, + "grad_norm": 1.2511899777779398, + "learning_rate": 1.692541064599701e-06, + "loss": 0.510034441947937, + "step": 2549 + }, + { + "epoch": 0.5878962536023055, + "grad_norm": 1.4397980152012548, + "learning_rate": 1.6922660473893756e-06, + "loss": 0.45610690116882324, + "step": 2550 + }, + { + "epoch": 0.5881268011527377, + "grad_norm": 1.4588684788429476, + "learning_rate": 1.691990929599995e-06, + "loss": 0.5473066568374634, + "step": 2551 + }, + { + "epoch": 0.58835734870317, + "grad_norm": 1.5335219528163933, + "learning_rate": 1.691715711271531e-06, + "loss": 0.5674794316291809, + "step": 2552 + }, + { + "epoch": 0.5885878962536023, + "grad_norm": 1.2774108469534404, + "learning_rate": 1.6914403924439698e-06, + "loss": 0.522304892539978, + "step": 2553 + }, + { + "epoch": 0.5888184438040346, + "grad_norm": 1.2476953304994698, + "learning_rate": 1.6911649731573125e-06, + "loss": 0.45838260650634766, + "step": 2554 + }, + { + "epoch": 0.5890489913544669, + "grad_norm": 1.3098546735155092, + "learning_rate": 1.6908894534515748e-06, + "loss": 0.5382635593414307, + "step": 2555 + }, + { + "epoch": 0.5892795389048991, + "grad_norm": 1.357876961582571, + "learning_rate": 1.6906138333667865e-06, + "loss": 0.4829067289829254, + "step": 2556 + }, + { + "epoch": 0.5895100864553314, + "grad_norm": 1.2356399026717673, + "learning_rate": 1.6903381129429924e-06, + "loss": 0.49646514654159546, + "step": 2557 + }, + { + "epoch": 0.5897406340057637, + "grad_norm": 1.2637946757850296, + "learning_rate": 1.6900622922202522e-06, + "loss": 0.47126126289367676, + "step": 2558 + }, + { + "epoch": 0.589971181556196, + "grad_norm": 1.7976328265249715, + "learning_rate": 1.6897863712386396e-06, + "loss": 0.6280478239059448, + "step": 2559 + }, + { + "epoch": 0.5902017291066283, + "grad_norm": 1.2319066968459778, + "learning_rate": 1.6895103500382428e-06, + "loss": 0.5028468370437622, + "step": 2560 + }, + { + "epoch": 0.5904322766570606, + "grad_norm": 1.3749190554066881, + "learning_rate": 1.6892342286591648e-06, + "loss": 0.49227872490882874, + "step": 2561 + }, + { + "epoch": 0.5906628242074928, + "grad_norm": 1.2592035477612311, + "learning_rate": 1.6889580071415236e-06, + "loss": 0.5569860935211182, + "step": 2562 + }, + { + "epoch": 0.5908933717579251, + "grad_norm": 1.2538699608771755, + "learning_rate": 1.6886816855254511e-06, + "loss": 0.5619305968284607, + "step": 2563 + }, + { + "epoch": 0.5911239193083574, + "grad_norm": 1.3015347853061459, + "learning_rate": 1.6884052638510938e-06, + "loss": 0.5059368014335632, + "step": 2564 + }, + { + "epoch": 0.5913544668587897, + "grad_norm": 1.5453199631735335, + "learning_rate": 1.688128742158613e-06, + "loss": 0.546272873878479, + "step": 2565 + }, + { + "epoch": 0.591585014409222, + "grad_norm": 1.4159852524047818, + "learning_rate": 1.6878521204881842e-06, + "loss": 0.5926029682159424, + "step": 2566 + }, + { + "epoch": 0.5918155619596541, + "grad_norm": 1.2170290620681634, + "learning_rate": 1.687575398879998e-06, + "loss": 0.481456458568573, + "step": 2567 + }, + { + "epoch": 0.5920461095100864, + "grad_norm": 1.25729683235646, + "learning_rate": 1.6872985773742591e-06, + "loss": 0.5525637865066528, + "step": 2568 + }, + { + "epoch": 0.5922766570605187, + "grad_norm": 1.3697297123320187, + "learning_rate": 1.6870216560111869e-06, + "loss": 0.4845820665359497, + "step": 2569 + }, + { + "epoch": 0.592507204610951, + "grad_norm": 1.4648168720443564, + "learning_rate": 1.6867446348310147e-06, + "loss": 0.5175113677978516, + "step": 2570 + }, + { + "epoch": 0.5927377521613832, + "grad_norm": 1.2238128546340372, + "learning_rate": 1.6864675138739917e-06, + "loss": 0.4506435990333557, + "step": 2571 + }, + { + "epoch": 0.5929682997118155, + "grad_norm": 1.0467204345258045, + "learning_rate": 1.6861902931803796e-06, + "loss": 0.389871209859848, + "step": 2572 + }, + { + "epoch": 0.5931988472622478, + "grad_norm": 1.4348334874532345, + "learning_rate": 1.6859129727904565e-06, + "loss": 0.5711140632629395, + "step": 2573 + }, + { + "epoch": 0.5934293948126801, + "grad_norm": 1.2813025661978332, + "learning_rate": 1.6856355527445134e-06, + "loss": 0.5305861234664917, + "step": 2574 + }, + { + "epoch": 0.5936599423631124, + "grad_norm": 1.2339903535392835, + "learning_rate": 1.685358033082857e-06, + "loss": 0.5364730358123779, + "step": 2575 + }, + { + "epoch": 0.5938904899135447, + "grad_norm": 1.2845034573576757, + "learning_rate": 1.6850804138458087e-06, + "loss": 0.5428116917610168, + "step": 2576 + }, + { + "epoch": 0.5941210374639769, + "grad_norm": 1.323193557767603, + "learning_rate": 1.6848026950737028e-06, + "loss": 0.4862017035484314, + "step": 2577 + }, + { + "epoch": 0.5943515850144092, + "grad_norm": 1.5929939838626597, + "learning_rate": 1.6845248768068888e-06, + "loss": 0.4363023638725281, + "step": 2578 + }, + { + "epoch": 0.5945821325648415, + "grad_norm": 1.3848419713925337, + "learning_rate": 1.6842469590857315e-06, + "loss": 0.5355821847915649, + "step": 2579 + }, + { + "epoch": 0.5948126801152738, + "grad_norm": 1.288102716693538, + "learning_rate": 1.683968941950609e-06, + "loss": 0.5334150791168213, + "step": 2580 + }, + { + "epoch": 0.595043227665706, + "grad_norm": 1.4843631469204195, + "learning_rate": 1.6836908254419144e-06, + "loss": 0.5291295647621155, + "step": 2581 + }, + { + "epoch": 0.5952737752161383, + "grad_norm": 1.2881595327169846, + "learning_rate": 1.6834126096000552e-06, + "loss": 0.5341989994049072, + "step": 2582 + }, + { + "epoch": 0.5955043227665706, + "grad_norm": 1.477342922270466, + "learning_rate": 1.6831342944654532e-06, + "loss": 0.5400925874710083, + "step": 2583 + }, + { + "epoch": 0.5957348703170029, + "grad_norm": 1.122996582802429, + "learning_rate": 1.6828558800785446e-06, + "loss": 0.517853856086731, + "step": 2584 + }, + { + "epoch": 0.5959654178674352, + "grad_norm": 1.069073216139065, + "learning_rate": 1.6825773664797805e-06, + "loss": 0.501392662525177, + "step": 2585 + }, + { + "epoch": 0.5961959654178675, + "grad_norm": 1.1419308725251818, + "learning_rate": 1.6822987537096256e-06, + "loss": 0.41234803199768066, + "step": 2586 + }, + { + "epoch": 0.5964265129682997, + "grad_norm": 1.4363918485606544, + "learning_rate": 1.6820200418085598e-06, + "loss": 0.5413755178451538, + "step": 2587 + }, + { + "epoch": 0.596657060518732, + "grad_norm": 1.3047825946648908, + "learning_rate": 1.6817412308170763e-06, + "loss": 0.546847939491272, + "step": 2588 + }, + { + "epoch": 0.5968876080691643, + "grad_norm": 1.2129787332488573, + "learning_rate": 1.6814623207756844e-06, + "loss": 0.5571908950805664, + "step": 2589 + }, + { + "epoch": 0.5971181556195966, + "grad_norm": 1.3660847669739697, + "learning_rate": 1.6811833117249063e-06, + "loss": 0.6310220956802368, + "step": 2590 + }, + { + "epoch": 0.5973487031700289, + "grad_norm": 1.232002982807809, + "learning_rate": 1.6809042037052792e-06, + "loss": 0.5155299305915833, + "step": 2591 + }, + { + "epoch": 0.5975792507204611, + "grad_norm": 1.3004013438441295, + "learning_rate": 1.6806249967573547e-06, + "loss": 0.44853711128234863, + "step": 2592 + }, + { + "epoch": 0.5978097982708934, + "grad_norm": 1.0345081557729547, + "learning_rate": 1.6803456909216987e-06, + "loss": 0.4670305550098419, + "step": 2593 + }, + { + "epoch": 0.5980403458213257, + "grad_norm": 1.2039512585696586, + "learning_rate": 1.680066286238891e-06, + "loss": 0.5101944208145142, + "step": 2594 + }, + { + "epoch": 0.5982708933717579, + "grad_norm": 1.3955661622281244, + "learning_rate": 1.6797867827495267e-06, + "loss": 0.4965336322784424, + "step": 2595 + }, + { + "epoch": 0.5985014409221902, + "grad_norm": 1.2697803535398042, + "learning_rate": 1.6795071804942145e-06, + "loss": 0.45601886510849, + "step": 2596 + }, + { + "epoch": 0.5987319884726224, + "grad_norm": 1.1608194807011891, + "learning_rate": 1.6792274795135777e-06, + "loss": 0.5248251557350159, + "step": 2597 + }, + { + "epoch": 0.5989625360230547, + "grad_norm": 1.27827520401908, + "learning_rate": 1.678947679848254e-06, + "loss": 0.4440120458602905, + "step": 2598 + }, + { + "epoch": 0.599193083573487, + "grad_norm": 1.158625988953876, + "learning_rate": 1.6786677815388955e-06, + "loss": 0.40963852405548096, + "step": 2599 + }, + { + "epoch": 0.5994236311239193, + "grad_norm": 1.4419097975345436, + "learning_rate": 1.6783877846261683e-06, + "loss": 0.4669606685638428, + "step": 2600 + }, + { + "epoch": 0.5996541786743516, + "grad_norm": 1.242787814192596, + "learning_rate": 1.6781076891507531e-06, + "loss": 0.5647035837173462, + "step": 2601 + }, + { + "epoch": 0.5998847262247838, + "grad_norm": 1.3438978653222697, + "learning_rate": 1.6778274951533447e-06, + "loss": 0.41351717710494995, + "step": 2602 + }, + { + "epoch": 0.6001152737752161, + "grad_norm": 1.503998962721002, + "learning_rate": 1.6775472026746526e-06, + "loss": 0.41477349400520325, + "step": 2603 + }, + { + "epoch": 0.6003458213256484, + "grad_norm": 1.1280590999863394, + "learning_rate": 1.6772668117554005e-06, + "loss": 0.5451614260673523, + "step": 2604 + }, + { + "epoch": 0.6005763688760807, + "grad_norm": 1.0711470724108474, + "learning_rate": 1.6769863224363263e-06, + "loss": 0.4912793040275574, + "step": 2605 + }, + { + "epoch": 0.600806916426513, + "grad_norm": 1.7686256949774513, + "learning_rate": 1.6767057347581818e-06, + "loss": 0.544170618057251, + "step": 2606 + }, + { + "epoch": 0.6010374639769452, + "grad_norm": 1.7609398614368525, + "learning_rate": 1.6764250487617335e-06, + "loss": 0.5384647846221924, + "step": 2607 + }, + { + "epoch": 0.6012680115273775, + "grad_norm": 1.2039688741569976, + "learning_rate": 1.6761442644877626e-06, + "loss": 0.5443817973136902, + "step": 2608 + }, + { + "epoch": 0.6014985590778098, + "grad_norm": 1.368759520926053, + "learning_rate": 1.6758633819770637e-06, + "loss": 0.5432279706001282, + "step": 2609 + }, + { + "epoch": 0.6017291066282421, + "grad_norm": 1.6108602714580338, + "learning_rate": 1.6755824012704465e-06, + "loss": 0.4550110697746277, + "step": 2610 + }, + { + "epoch": 0.6019596541786744, + "grad_norm": 1.4123061030133546, + "learning_rate": 1.675301322408734e-06, + "loss": 0.5927733778953552, + "step": 2611 + }, + { + "epoch": 0.6021902017291066, + "grad_norm": 1.331775123117798, + "learning_rate": 1.6750201454327643e-06, + "loss": 0.5468032360076904, + "step": 2612 + }, + { + "epoch": 0.6024207492795389, + "grad_norm": 1.2756219325658316, + "learning_rate": 1.67473887038339e-06, + "loss": 0.5089372992515564, + "step": 2613 + }, + { + "epoch": 0.6026512968299712, + "grad_norm": 1.095233293104996, + "learning_rate": 1.6744574973014767e-06, + "loss": 0.5835996866226196, + "step": 2614 + }, + { + "epoch": 0.6028818443804035, + "grad_norm": 1.195604010821045, + "learning_rate": 1.6741760262279055e-06, + "loss": 0.559473991394043, + "step": 2615 + }, + { + "epoch": 0.6031123919308358, + "grad_norm": 1.0725638778600672, + "learning_rate": 1.6738944572035707e-06, + "loss": 0.4891049563884735, + "step": 2616 + }, + { + "epoch": 0.603342939481268, + "grad_norm": 1.6099327952013749, + "learning_rate": 1.6736127902693819e-06, + "loss": 0.560591459274292, + "step": 2617 + }, + { + "epoch": 0.6035734870317003, + "grad_norm": 1.3275205349545465, + "learning_rate": 1.6733310254662621e-06, + "loss": 0.5701932907104492, + "step": 2618 + }, + { + "epoch": 0.6038040345821326, + "grad_norm": 1.1973967263171255, + "learning_rate": 1.6730491628351486e-06, + "loss": 0.4753883481025696, + "step": 2619 + }, + { + "epoch": 0.6040345821325649, + "grad_norm": 1.347075643962319, + "learning_rate": 1.6727672024169936e-06, + "loss": 0.47594785690307617, + "step": 2620 + }, + { + "epoch": 0.6042651296829972, + "grad_norm": 1.6816380957027248, + "learning_rate": 1.6724851442527624e-06, + "loss": 0.45491674542427063, + "step": 2621 + }, + { + "epoch": 0.6044956772334293, + "grad_norm": 1.4513506801945901, + "learning_rate": 1.6722029883834358e-06, + "loss": 0.581158459186554, + "step": 2622 + }, + { + "epoch": 0.6047262247838616, + "grad_norm": 1.159366601543868, + "learning_rate": 1.671920734850008e-06, + "loss": 0.41334211826324463, + "step": 2623 + }, + { + "epoch": 0.6049567723342939, + "grad_norm": 1.5139603662934273, + "learning_rate": 1.6716383836934869e-06, + "loss": 0.47984325885772705, + "step": 2624 + }, + { + "epoch": 0.6051873198847262, + "grad_norm": 1.456724922494102, + "learning_rate": 1.6713559349548956e-06, + "loss": 0.4930099844932556, + "step": 2625 + }, + { + "epoch": 0.6054178674351585, + "grad_norm": 1.2383087695672668, + "learning_rate": 1.6710733886752708e-06, + "loss": 0.4817400872707367, + "step": 2626 + }, + { + "epoch": 0.6056484149855907, + "grad_norm": 1.1506762604804934, + "learning_rate": 1.670790744895664e-06, + "loss": 0.43708014488220215, + "step": 2627 + }, + { + "epoch": 0.605878962536023, + "grad_norm": 1.312898538931884, + "learning_rate": 1.6705080036571397e-06, + "loss": 0.5880838632583618, + "step": 2628 + }, + { + "epoch": 0.6061095100864553, + "grad_norm": 1.382585469079517, + "learning_rate": 1.6702251650007778e-06, + "loss": 0.5228145718574524, + "step": 2629 + }, + { + "epoch": 0.6063400576368876, + "grad_norm": 1.3711772829373339, + "learning_rate": 1.6699422289676718e-06, + "loss": 0.5998802185058594, + "step": 2630 + }, + { + "epoch": 0.6065706051873199, + "grad_norm": 1.1506769792285998, + "learning_rate": 1.669659195598929e-06, + "loss": 0.4910133481025696, + "step": 2631 + }, + { + "epoch": 0.6068011527377521, + "grad_norm": 1.1374580098234712, + "learning_rate": 1.6693760649356714e-06, + "loss": 0.5143430233001709, + "step": 2632 + }, + { + "epoch": 0.6070317002881844, + "grad_norm": 1.3291493600304582, + "learning_rate": 1.6690928370190352e-06, + "loss": 0.5022460222244263, + "step": 2633 + }, + { + "epoch": 0.6072622478386167, + "grad_norm": 1.2010474194378777, + "learning_rate": 1.66880951189017e-06, + "loss": 0.5120739936828613, + "step": 2634 + }, + { + "epoch": 0.607492795389049, + "grad_norm": 1.1655918439037996, + "learning_rate": 1.66852608959024e-06, + "loss": 0.5094325542449951, + "step": 2635 + }, + { + "epoch": 0.6077233429394813, + "grad_norm": 1.6545296058614192, + "learning_rate": 1.668242570160424e-06, + "loss": 0.5701217651367188, + "step": 2636 + }, + { + "epoch": 0.6079538904899136, + "grad_norm": 1.6680631360673492, + "learning_rate": 1.6679589536419142e-06, + "loss": 0.43836015462875366, + "step": 2637 + }, + { + "epoch": 0.6081844380403458, + "grad_norm": 1.1122806906078924, + "learning_rate": 1.667675240075917e-06, + "loss": 0.46791714429855347, + "step": 2638 + }, + { + "epoch": 0.6084149855907781, + "grad_norm": 1.2841557472628726, + "learning_rate": 1.6673914295036528e-06, + "loss": 0.5161240696907043, + "step": 2639 + }, + { + "epoch": 0.6086455331412104, + "grad_norm": 1.306348640330571, + "learning_rate": 1.667107521966357e-06, + "loss": 0.5272632837295532, + "step": 2640 + }, + { + "epoch": 0.6088760806916427, + "grad_norm": 1.2415691557258128, + "learning_rate": 1.666823517505278e-06, + "loss": 0.5190865993499756, + "step": 2641 + }, + { + "epoch": 0.609106628242075, + "grad_norm": 1.3383874728602996, + "learning_rate": 1.6665394161616788e-06, + "loss": 0.5450509190559387, + "step": 2642 + }, + { + "epoch": 0.6093371757925072, + "grad_norm": 1.1224851387365165, + "learning_rate": 1.6662552179768362e-06, + "loss": 0.4620264768600464, + "step": 2643 + }, + { + "epoch": 0.6095677233429395, + "grad_norm": 1.4790968500843589, + "learning_rate": 1.6659709229920412e-06, + "loss": 0.5052369236946106, + "step": 2644 + }, + { + "epoch": 0.6097982708933718, + "grad_norm": 1.3024913053280143, + "learning_rate": 1.6656865312485992e-06, + "loss": 0.5384722352027893, + "step": 2645 + }, + { + "epoch": 0.6100288184438041, + "grad_norm": 1.206674709939484, + "learning_rate": 1.6654020427878293e-06, + "loss": 0.4649192690849304, + "step": 2646 + }, + { + "epoch": 0.6102593659942364, + "grad_norm": 1.227154571916659, + "learning_rate": 1.6651174576510645e-06, + "loss": 0.5306943655014038, + "step": 2647 + }, + { + "epoch": 0.6104899135446686, + "grad_norm": 1.0456204729401848, + "learning_rate": 1.664832775879652e-06, + "loss": 0.5300636887550354, + "step": 2648 + }, + { + "epoch": 0.6107204610951009, + "grad_norm": 1.3444050876781224, + "learning_rate": 1.6645479975149535e-06, + "loss": 0.5449787378311157, + "step": 2649 + }, + { + "epoch": 0.6109510086455331, + "grad_norm": 1.313946366107906, + "learning_rate": 1.664263122598344e-06, + "loss": 0.5364447832107544, + "step": 2650 + }, + { + "epoch": 0.6111815561959654, + "grad_norm": 1.297750723710745, + "learning_rate": 1.6639781511712132e-06, + "loss": 0.4894382357597351, + "step": 2651 + }, + { + "epoch": 0.6114121037463977, + "grad_norm": 1.247105878162738, + "learning_rate": 1.663693083274964e-06, + "loss": 0.6076130867004395, + "step": 2652 + }, + { + "epoch": 0.6116426512968299, + "grad_norm": 1.2142432606071334, + "learning_rate": 1.6634079189510142e-06, + "loss": 0.5427982807159424, + "step": 2653 + }, + { + "epoch": 0.6118731988472622, + "grad_norm": 1.2083266453516546, + "learning_rate": 1.6631226582407952e-06, + "loss": 0.48615583777427673, + "step": 2654 + }, + { + "epoch": 0.6121037463976945, + "grad_norm": 1.269675522188132, + "learning_rate": 1.662837301185752e-06, + "loss": 0.5485595464706421, + "step": 2655 + }, + { + "epoch": 0.6123342939481268, + "grad_norm": 1.1870072528862068, + "learning_rate": 1.6625518478273444e-06, + "loss": 0.479083776473999, + "step": 2656 + }, + { + "epoch": 0.612564841498559, + "grad_norm": 1.1736970458680376, + "learning_rate": 1.6622662982070459e-06, + "loss": 0.4724195599555969, + "step": 2657 + }, + { + "epoch": 0.6127953890489913, + "grad_norm": 1.15099040352794, + "learning_rate": 1.6619806523663433e-06, + "loss": 0.5106989741325378, + "step": 2658 + }, + { + "epoch": 0.6130259365994236, + "grad_norm": 1.3245439515084314, + "learning_rate": 1.6616949103467387e-06, + "loss": 0.538973867893219, + "step": 2659 + }, + { + "epoch": 0.6132564841498559, + "grad_norm": 1.213303693990195, + "learning_rate": 1.661409072189747e-06, + "loss": 0.49926918745040894, + "step": 2660 + }, + { + "epoch": 0.6134870317002882, + "grad_norm": 1.0216209944980308, + "learning_rate": 1.6611231379368977e-06, + "loss": 0.4001106023788452, + "step": 2661 + }, + { + "epoch": 0.6137175792507205, + "grad_norm": 1.1517339301323273, + "learning_rate": 1.660837107629734e-06, + "loss": 0.44094690680503845, + "step": 2662 + }, + { + "epoch": 0.6139481268011527, + "grad_norm": 1.3250501139748343, + "learning_rate": 1.6605509813098129e-06, + "loss": 0.5321308374404907, + "step": 2663 + }, + { + "epoch": 0.614178674351585, + "grad_norm": 1.2378894255576012, + "learning_rate": 1.6602647590187058e-06, + "loss": 0.4906134009361267, + "step": 2664 + }, + { + "epoch": 0.6144092219020173, + "grad_norm": 1.3263504160433166, + "learning_rate": 1.659978440797998e-06, + "loss": 0.45977315306663513, + "step": 2665 + }, + { + "epoch": 0.6146397694524496, + "grad_norm": 1.3165371046851393, + "learning_rate": 1.6596920266892881e-06, + "loss": 0.5100743770599365, + "step": 2666 + }, + { + "epoch": 0.6148703170028819, + "grad_norm": 1.2693396142216467, + "learning_rate": 1.6594055167341896e-06, + "loss": 0.4486650228500366, + "step": 2667 + }, + { + "epoch": 0.6151008645533141, + "grad_norm": 1.433519074909581, + "learning_rate": 1.6591189109743292e-06, + "loss": 0.5782293081283569, + "step": 2668 + }, + { + "epoch": 0.6153314121037464, + "grad_norm": 1.3772589499745378, + "learning_rate": 1.6588322094513476e-06, + "loss": 0.5620462894439697, + "step": 2669 + }, + { + "epoch": 0.6155619596541787, + "grad_norm": 1.1994240432869185, + "learning_rate": 1.6585454122068997e-06, + "loss": 0.5833989381790161, + "step": 2670 + }, + { + "epoch": 0.615792507204611, + "grad_norm": 1.1206341611976025, + "learning_rate": 1.658258519282654e-06, + "loss": 0.4710484743118286, + "step": 2671 + }, + { + "epoch": 0.6160230547550433, + "grad_norm": 1.1981953327193071, + "learning_rate": 1.6579715307202932e-06, + "loss": 0.492951899766922, + "step": 2672 + }, + { + "epoch": 0.6162536023054755, + "grad_norm": 1.379813532435923, + "learning_rate": 1.6576844465615142e-06, + "loss": 0.537446141242981, + "step": 2673 + }, + { + "epoch": 0.6164841498559078, + "grad_norm": 1.271596422526649, + "learning_rate": 1.6573972668480263e-06, + "loss": 0.45248350501060486, + "step": 2674 + }, + { + "epoch": 0.6167146974063401, + "grad_norm": 1.0491664954993434, + "learning_rate": 1.6571099916215546e-06, + "loss": 0.48271554708480835, + "step": 2675 + }, + { + "epoch": 0.6169452449567724, + "grad_norm": 1.2893317600930594, + "learning_rate": 1.6568226209238367e-06, + "loss": 0.5537866353988647, + "step": 2676 + }, + { + "epoch": 0.6171757925072046, + "grad_norm": 1.2315065049281728, + "learning_rate": 1.6565351547966247e-06, + "loss": 0.44220322370529175, + "step": 2677 + }, + { + "epoch": 0.6174063400576368, + "grad_norm": 1.6057841417293577, + "learning_rate": 1.6562475932816847e-06, + "loss": 0.545140266418457, + "step": 2678 + }, + { + "epoch": 0.6176368876080691, + "grad_norm": 1.298272810341285, + "learning_rate": 1.655959936420796e-06, + "loss": 0.499568372964859, + "step": 2679 + }, + { + "epoch": 0.6178674351585014, + "grad_norm": 1.2211993021087002, + "learning_rate": 1.655672184255753e-06, + "loss": 0.5183538198471069, + "step": 2680 + }, + { + "epoch": 0.6180979827089337, + "grad_norm": 1.4169596526310997, + "learning_rate": 1.6553843368283618e-06, + "loss": 0.6033202409744263, + "step": 2681 + }, + { + "epoch": 0.618328530259366, + "grad_norm": 1.1460338801726684, + "learning_rate": 1.6550963941804444e-06, + "loss": 0.45113763213157654, + "step": 2682 + }, + { + "epoch": 0.6185590778097982, + "grad_norm": 1.1306727750017995, + "learning_rate": 1.6548083563538358e-06, + "loss": 0.5167637467384338, + "step": 2683 + }, + { + "epoch": 0.6187896253602305, + "grad_norm": 1.301069241875041, + "learning_rate": 1.6545202233903846e-06, + "loss": 0.5253550410270691, + "step": 2684 + }, + { + "epoch": 0.6190201729106628, + "grad_norm": 1.5130397732960972, + "learning_rate": 1.6542319953319544e-06, + "loss": 0.5394268035888672, + "step": 2685 + }, + { + "epoch": 0.6192507204610951, + "grad_norm": 1.2309300315341822, + "learning_rate": 1.6539436722204206e-06, + "loss": 0.4581655263900757, + "step": 2686 + }, + { + "epoch": 0.6194812680115274, + "grad_norm": 1.3473232843540013, + "learning_rate": 1.6536552540976742e-06, + "loss": 0.4924882650375366, + "step": 2687 + }, + { + "epoch": 0.6197118155619596, + "grad_norm": 1.3212128637586775, + "learning_rate": 1.653366741005619e-06, + "loss": 0.40816426277160645, + "step": 2688 + }, + { + "epoch": 0.6199423631123919, + "grad_norm": 1.487996666110314, + "learning_rate": 1.6530781329861735e-06, + "loss": 0.386010080575943, + "step": 2689 + }, + { + "epoch": 0.6201729106628242, + "grad_norm": 1.2404836484730133, + "learning_rate": 1.6527894300812693e-06, + "loss": 0.4776495099067688, + "step": 2690 + }, + { + "epoch": 0.6204034582132565, + "grad_norm": 1.268980085495045, + "learning_rate": 1.6525006323328514e-06, + "loss": 0.5063761472702026, + "step": 2691 + }, + { + "epoch": 0.6206340057636888, + "grad_norm": 1.327914489138475, + "learning_rate": 1.6522117397828795e-06, + "loss": 0.5012977123260498, + "step": 2692 + }, + { + "epoch": 0.620864553314121, + "grad_norm": 1.5586706262282004, + "learning_rate": 1.6519227524733266e-06, + "loss": 0.567261815071106, + "step": 2693 + }, + { + "epoch": 0.6210951008645533, + "grad_norm": 1.080122828705907, + "learning_rate": 1.6516336704461796e-06, + "loss": 0.4405871629714966, + "step": 2694 + }, + { + "epoch": 0.6213256484149856, + "grad_norm": 1.2897294207591083, + "learning_rate": 1.6513444937434392e-06, + "loss": 0.5243908166885376, + "step": 2695 + }, + { + "epoch": 0.6215561959654179, + "grad_norm": 1.230064101444913, + "learning_rate": 1.6510552224071198e-06, + "loss": 0.4709666073322296, + "step": 2696 + }, + { + "epoch": 0.6217867435158502, + "grad_norm": 1.222110695276511, + "learning_rate": 1.650765856479249e-06, + "loss": 0.48736077547073364, + "step": 2697 + }, + { + "epoch": 0.6220172910662825, + "grad_norm": 1.3590016738405688, + "learning_rate": 1.6504763960018692e-06, + "loss": 0.46533918380737305, + "step": 2698 + }, + { + "epoch": 0.6222478386167147, + "grad_norm": 1.35575800489149, + "learning_rate": 1.6501868410170359e-06, + "loss": 0.4393872916698456, + "step": 2699 + }, + { + "epoch": 0.622478386167147, + "grad_norm": 1.3208545237029374, + "learning_rate": 1.6498971915668183e-06, + "loss": 0.49825766682624817, + "step": 2700 + }, + { + "epoch": 0.6227089337175793, + "grad_norm": 1.2486030770302066, + "learning_rate": 1.6496074476932993e-06, + "loss": 0.47903305292129517, + "step": 2701 + }, + { + "epoch": 0.6229394812680116, + "grad_norm": 1.4622129412897775, + "learning_rate": 1.6493176094385764e-06, + "loss": 0.6245414018630981, + "step": 2702 + }, + { + "epoch": 0.6231700288184439, + "grad_norm": 1.1866684251840278, + "learning_rate": 1.6490276768447591e-06, + "loss": 0.5198970437049866, + "step": 2703 + }, + { + "epoch": 0.6234005763688761, + "grad_norm": 1.6440881721015699, + "learning_rate": 1.6487376499539722e-06, + "loss": 0.5924375057220459, + "step": 2704 + }, + { + "epoch": 0.6236311239193083, + "grad_norm": 1.2404517941438844, + "learning_rate": 1.6484475288083534e-06, + "loss": 0.4759942889213562, + "step": 2705 + }, + { + "epoch": 0.6238616714697406, + "grad_norm": 1.2374460666011324, + "learning_rate": 1.6481573134500547e-06, + "loss": 0.4802717864513397, + "step": 2706 + }, + { + "epoch": 0.6240922190201729, + "grad_norm": 1.2053044068656478, + "learning_rate": 1.6478670039212404e-06, + "loss": 0.4586595892906189, + "step": 2707 + }, + { + "epoch": 0.6243227665706051, + "grad_norm": 1.2429891166800078, + "learning_rate": 1.6475766002640904e-06, + "loss": 0.5571914911270142, + "step": 2708 + }, + { + "epoch": 0.6245533141210374, + "grad_norm": 1.4255139010134108, + "learning_rate": 1.647286102520797e-06, + "loss": 0.5134386420249939, + "step": 2709 + }, + { + "epoch": 0.6247838616714697, + "grad_norm": 1.2269438614282533, + "learning_rate": 1.6469955107335664e-06, + "loss": 0.5108852386474609, + "step": 2710 + }, + { + "epoch": 0.625014409221902, + "grad_norm": 1.272719981801389, + "learning_rate": 1.6467048249446187e-06, + "loss": 0.4626818895339966, + "step": 2711 + }, + { + "epoch": 0.6252449567723343, + "grad_norm": 1.2442767706548579, + "learning_rate": 1.6464140451961875e-06, + "loss": 0.5615794062614441, + "step": 2712 + }, + { + "epoch": 0.6254755043227666, + "grad_norm": 1.4865776451893986, + "learning_rate": 1.6461231715305197e-06, + "loss": 0.5956846475601196, + "step": 2713 + }, + { + "epoch": 0.6257060518731988, + "grad_norm": 1.360476557499295, + "learning_rate": 1.6458322039898768e-06, + "loss": 0.5330410003662109, + "step": 2714 + }, + { + "epoch": 0.6259365994236311, + "grad_norm": 1.2714727610034695, + "learning_rate": 1.6455411426165334e-06, + "loss": 0.4754364490509033, + "step": 2715 + }, + { + "epoch": 0.6261671469740634, + "grad_norm": 1.194353411400933, + "learning_rate": 1.6452499874527771e-06, + "loss": 0.4814460277557373, + "step": 2716 + }, + { + "epoch": 0.6263976945244957, + "grad_norm": 1.1993429832567999, + "learning_rate": 1.6449587385409101e-06, + "loss": 0.5211490392684937, + "step": 2717 + }, + { + "epoch": 0.626628242074928, + "grad_norm": 1.685645733076896, + "learning_rate": 1.6446673959232478e-06, + "loss": 0.43656522035598755, + "step": 2718 + }, + { + "epoch": 0.6268587896253602, + "grad_norm": 1.3017375720981144, + "learning_rate": 1.6443759596421192e-06, + "loss": 0.5601837635040283, + "step": 2719 + }, + { + "epoch": 0.6270893371757925, + "grad_norm": 1.2022937294499874, + "learning_rate": 1.644084429739867e-06, + "loss": 0.5415230989456177, + "step": 2720 + }, + { + "epoch": 0.6273198847262248, + "grad_norm": 1.2148604998226076, + "learning_rate": 1.6437928062588473e-06, + "loss": 0.5256547927856445, + "step": 2721 + }, + { + "epoch": 0.6275504322766571, + "grad_norm": 1.5289209117578932, + "learning_rate": 1.6435010892414303e-06, + "loss": 0.4892258644104004, + "step": 2722 + }, + { + "epoch": 0.6277809798270894, + "grad_norm": 1.4507176159512447, + "learning_rate": 1.6432092787299992e-06, + "loss": 0.6185523271560669, + "step": 2723 + }, + { + "epoch": 0.6280115273775216, + "grad_norm": 1.230316705129502, + "learning_rate": 1.642917374766951e-06, + "loss": 0.4964678883552551, + "step": 2724 + }, + { + "epoch": 0.6282420749279539, + "grad_norm": 1.6264616977516906, + "learning_rate": 1.6426253773946962e-06, + "loss": 0.5079313516616821, + "step": 2725 + }, + { + "epoch": 0.6284726224783862, + "grad_norm": 1.1997980442079967, + "learning_rate": 1.6423332866556594e-06, + "loss": 0.5262078046798706, + "step": 2726 + }, + { + "epoch": 0.6287031700288185, + "grad_norm": 1.1471642727061313, + "learning_rate": 1.642041102592278e-06, + "loss": 0.5222228765487671, + "step": 2727 + }, + { + "epoch": 0.6289337175792508, + "grad_norm": 1.1511187711328779, + "learning_rate": 1.6417488252470038e-06, + "loss": 0.4470428228378296, + "step": 2728 + }, + { + "epoch": 0.629164265129683, + "grad_norm": 1.5218800246743136, + "learning_rate": 1.6414564546623007e-06, + "loss": 0.5850222110748291, + "step": 2729 + }, + { + "epoch": 0.6293948126801153, + "grad_norm": 1.3890766989379473, + "learning_rate": 1.6411639908806477e-06, + "loss": 0.5844837427139282, + "step": 2730 + }, + { + "epoch": 0.6296253602305476, + "grad_norm": 1.390267605631488, + "learning_rate": 1.6408714339445373e-06, + "loss": 0.5246438384056091, + "step": 2731 + }, + { + "epoch": 0.6298559077809798, + "grad_norm": 1.3255906196936882, + "learning_rate": 1.640578783896474e-06, + "loss": 0.47431260347366333, + "step": 2732 + }, + { + "epoch": 0.630086455331412, + "grad_norm": 1.1973273797462285, + "learning_rate": 1.6402860407789772e-06, + "loss": 0.5054109692573547, + "step": 2733 + }, + { + "epoch": 0.6303170028818443, + "grad_norm": 1.2276344696691943, + "learning_rate": 1.6399932046345794e-06, + "loss": 0.47106099128723145, + "step": 2734 + }, + { + "epoch": 0.6305475504322766, + "grad_norm": 1.209538624936261, + "learning_rate": 1.6397002755058269e-06, + "loss": 0.4557371139526367, + "step": 2735 + }, + { + "epoch": 0.6307780979827089, + "grad_norm": 1.458954969848585, + "learning_rate": 1.6394072534352787e-06, + "loss": 0.6111027002334595, + "step": 2736 + }, + { + "epoch": 0.6310086455331412, + "grad_norm": 1.5969397340893883, + "learning_rate": 1.6391141384655085e-06, + "loss": 0.5637114644050598, + "step": 2737 + }, + { + "epoch": 0.6312391930835735, + "grad_norm": 1.087830166618947, + "learning_rate": 1.6388209306391024e-06, + "loss": 0.43901634216308594, + "step": 2738 + }, + { + "epoch": 0.6314697406340057, + "grad_norm": 1.2084082927136561, + "learning_rate": 1.6385276299986608e-06, + "loss": 0.5315161347389221, + "step": 2739 + }, + { + "epoch": 0.631700288184438, + "grad_norm": 1.0744838804086243, + "learning_rate": 1.6382342365867968e-06, + "loss": 0.396445095539093, + "step": 2740 + }, + { + "epoch": 0.6319308357348703, + "grad_norm": 1.253067633447061, + "learning_rate": 1.637940750446138e-06, + "loss": 0.5279150605201721, + "step": 2741 + }, + { + "epoch": 0.6321613832853026, + "grad_norm": 1.3477604514012602, + "learning_rate": 1.6376471716193241e-06, + "loss": 0.5353842377662659, + "step": 2742 + }, + { + "epoch": 0.6323919308357349, + "grad_norm": 1.39680851665267, + "learning_rate": 1.6373535001490095e-06, + "loss": 0.5103511214256287, + "step": 2743 + }, + { + "epoch": 0.6326224783861671, + "grad_norm": 1.4406962358010578, + "learning_rate": 1.637059736077862e-06, + "loss": 0.5704224109649658, + "step": 2744 + }, + { + "epoch": 0.6328530259365994, + "grad_norm": 1.2410789821967132, + "learning_rate": 1.6367658794485615e-06, + "loss": 0.6006341576576233, + "step": 2745 + }, + { + "epoch": 0.6330835734870317, + "grad_norm": 1.1849794356119856, + "learning_rate": 1.6364719303038031e-06, + "loss": 0.5593788027763367, + "step": 2746 + }, + { + "epoch": 0.633314121037464, + "grad_norm": 1.2983683254340128, + "learning_rate": 1.6361778886862944e-06, + "loss": 0.5924923419952393, + "step": 2747 + }, + { + "epoch": 0.6335446685878963, + "grad_norm": 1.4250134538892931, + "learning_rate": 1.6358837546387565e-06, + "loss": 0.4591634273529053, + "step": 2748 + }, + { + "epoch": 0.6337752161383285, + "grad_norm": 1.4268958179759377, + "learning_rate": 1.635589528203924e-06, + "loss": 0.5870101451873779, + "step": 2749 + }, + { + "epoch": 0.6340057636887608, + "grad_norm": 1.2390542829845288, + "learning_rate": 1.635295209424545e-06, + "loss": 0.5609645843505859, + "step": 2750 + }, + { + "epoch": 0.6342363112391931, + "grad_norm": 1.2219097215373855, + "learning_rate": 1.6350007983433808e-06, + "loss": 0.49148842692375183, + "step": 2751 + }, + { + "epoch": 0.6344668587896254, + "grad_norm": 1.2092576484571942, + "learning_rate": 1.6347062950032063e-06, + "loss": 0.4845973253250122, + "step": 2752 + }, + { + "epoch": 0.6346974063400577, + "grad_norm": 1.633007866264204, + "learning_rate": 1.63441169944681e-06, + "loss": 0.5573195815086365, + "step": 2753 + }, + { + "epoch": 0.63492795389049, + "grad_norm": 1.1092730661182224, + "learning_rate": 1.6341170117169934e-06, + "loss": 0.473361998796463, + "step": 2754 + }, + { + "epoch": 0.6351585014409222, + "grad_norm": 1.3324554132101556, + "learning_rate": 1.6338222318565716e-06, + "loss": 0.47250160574913025, + "step": 2755 + }, + { + "epoch": 0.6353890489913545, + "grad_norm": 1.088490649248913, + "learning_rate": 1.633527359908373e-06, + "loss": 0.5084018707275391, + "step": 2756 + }, + { + "epoch": 0.6356195965417868, + "grad_norm": 1.5890648743750697, + "learning_rate": 1.6332323959152396e-06, + "loss": 0.5426309108734131, + "step": 2757 + }, + { + "epoch": 0.6358501440922191, + "grad_norm": 1.454023530885245, + "learning_rate": 1.6329373399200261e-06, + "loss": 0.3922150135040283, + "step": 2758 + }, + { + "epoch": 0.6360806916426512, + "grad_norm": 1.5606393342793812, + "learning_rate": 1.6326421919656018e-06, + "loss": 0.4642726182937622, + "step": 2759 + }, + { + "epoch": 0.6363112391930835, + "grad_norm": 1.3512521798508315, + "learning_rate": 1.632346952094848e-06, + "loss": 0.5146275758743286, + "step": 2760 + }, + { + "epoch": 0.6365417867435158, + "grad_norm": 1.161835041029154, + "learning_rate": 1.6320516203506605e-06, + "loss": 0.4769957959651947, + "step": 2761 + }, + { + "epoch": 0.6367723342939481, + "grad_norm": 1.202865986079401, + "learning_rate": 1.6317561967759473e-06, + "loss": 0.5173189640045166, + "step": 2762 + }, + { + "epoch": 0.6370028818443804, + "grad_norm": 1.3233830027378666, + "learning_rate": 1.6314606814136311e-06, + "loss": 0.48731061816215515, + "step": 2763 + }, + { + "epoch": 0.6372334293948126, + "grad_norm": 1.3622928770440887, + "learning_rate": 1.6311650743066468e-06, + "loss": 0.40830880403518677, + "step": 2764 + }, + { + "epoch": 0.6374639769452449, + "grad_norm": 1.3508081076594225, + "learning_rate": 1.630869375497943e-06, + "loss": 0.5563752055168152, + "step": 2765 + }, + { + "epoch": 0.6376945244956772, + "grad_norm": 1.1200426971373956, + "learning_rate": 1.6305735850304816e-06, + "loss": 0.4951537847518921, + "step": 2766 + }, + { + "epoch": 0.6379250720461095, + "grad_norm": 1.3503538208414971, + "learning_rate": 1.630277702947238e-06, + "loss": 0.5482779741287231, + "step": 2767 + }, + { + "epoch": 0.6381556195965418, + "grad_norm": 1.3487057607161679, + "learning_rate": 1.629981729291201e-06, + "loss": 0.5211485624313354, + "step": 2768 + }, + { + "epoch": 0.638386167146974, + "grad_norm": 1.459955596146605, + "learning_rate": 1.6296856641053723e-06, + "loss": 0.5225323438644409, + "step": 2769 + }, + { + "epoch": 0.6386167146974063, + "grad_norm": 1.2949707858976387, + "learning_rate": 1.629389507432767e-06, + "loss": 0.4153757393360138, + "step": 2770 + }, + { + "epoch": 0.6388472622478386, + "grad_norm": 1.2904066759459198, + "learning_rate": 1.6290932593164138e-06, + "loss": 0.47255784273147583, + "step": 2771 + }, + { + "epoch": 0.6390778097982709, + "grad_norm": 1.4584165547508758, + "learning_rate": 1.6287969197993542e-06, + "loss": 0.5152851939201355, + "step": 2772 + }, + { + "epoch": 0.6393083573487032, + "grad_norm": 1.451234860527591, + "learning_rate": 1.6285004889246436e-06, + "loss": 0.48233699798583984, + "step": 2773 + }, + { + "epoch": 0.6395389048991355, + "grad_norm": 1.3457498152659688, + "learning_rate": 1.62820396673535e-06, + "loss": 0.4834440350532532, + "step": 2774 + }, + { + "epoch": 0.6397694524495677, + "grad_norm": 1.3644783916718317, + "learning_rate": 1.627907353274555e-06, + "loss": 0.5308742523193359, + "step": 2775 + }, + { + "epoch": 0.64, + "grad_norm": 1.231882052164345, + "learning_rate": 1.6276106485853537e-06, + "loss": 0.40322256088256836, + "step": 2776 + }, + { + "epoch": 0.6402305475504323, + "grad_norm": 1.2547531033607235, + "learning_rate": 1.6273138527108541e-06, + "loss": 0.4684373140335083, + "step": 2777 + }, + { + "epoch": 0.6404610951008646, + "grad_norm": 1.1482277712335578, + "learning_rate": 1.6270169656941772e-06, + "loss": 0.46157366037368774, + "step": 2778 + }, + { + "epoch": 0.6406916426512969, + "grad_norm": 1.2771654472074374, + "learning_rate": 1.6267199875784585e-06, + "loss": 0.497269868850708, + "step": 2779 + }, + { + "epoch": 0.6409221902017291, + "grad_norm": 1.2070868658726128, + "learning_rate": 1.6264229184068447e-06, + "loss": 0.4726135730743408, + "step": 2780 + }, + { + "epoch": 0.6411527377521614, + "grad_norm": 1.126951114278981, + "learning_rate": 1.6261257582224976e-06, + "loss": 0.5554429292678833, + "step": 2781 + }, + { + "epoch": 0.6413832853025937, + "grad_norm": 1.7654211496256054, + "learning_rate": 1.6258285070685914e-06, + "loss": 0.4444640278816223, + "step": 2782 + }, + { + "epoch": 0.641613832853026, + "grad_norm": 1.230279574199785, + "learning_rate": 1.6255311649883133e-06, + "loss": 0.5426352024078369, + "step": 2783 + }, + { + "epoch": 0.6418443804034583, + "grad_norm": 1.4455139026368424, + "learning_rate": 1.6252337320248643e-06, + "loss": 0.563956081867218, + "step": 2784 + }, + { + "epoch": 0.6420749279538905, + "grad_norm": 1.3541204424273805, + "learning_rate": 1.6249362082214584e-06, + "loss": 0.5468907952308655, + "step": 2785 + }, + { + "epoch": 0.6423054755043228, + "grad_norm": 1.212988322705407, + "learning_rate": 1.6246385936213222e-06, + "loss": 0.5481438636779785, + "step": 2786 + }, + { + "epoch": 0.642536023054755, + "grad_norm": 1.4319478354990522, + "learning_rate": 1.6243408882676962e-06, + "loss": 0.6007488965988159, + "step": 2787 + }, + { + "epoch": 0.6427665706051873, + "grad_norm": 1.2664699137320072, + "learning_rate": 1.6240430922038345e-06, + "loss": 0.4572671055793762, + "step": 2788 + }, + { + "epoch": 0.6429971181556196, + "grad_norm": 1.4178410912876163, + "learning_rate": 1.6237452054730029e-06, + "loss": 0.4373534321784973, + "step": 2789 + }, + { + "epoch": 0.6432276657060518, + "grad_norm": 1.2711378651618674, + "learning_rate": 1.6234472281184821e-06, + "loss": 0.5494809150695801, + "step": 2790 + }, + { + "epoch": 0.6434582132564841, + "grad_norm": 1.296311074035635, + "learning_rate": 1.6231491601835643e-06, + "loss": 0.5352902412414551, + "step": 2791 + }, + { + "epoch": 0.6436887608069164, + "grad_norm": 1.34469702291001, + "learning_rate": 1.622851001711556e-06, + "loss": 0.5362370014190674, + "step": 2792 + }, + { + "epoch": 0.6439193083573487, + "grad_norm": 1.0955286846537269, + "learning_rate": 1.6225527527457768e-06, + "loss": 0.5180599689483643, + "step": 2793 + }, + { + "epoch": 0.644149855907781, + "grad_norm": 1.227300788277134, + "learning_rate": 1.6222544133295585e-06, + "loss": 0.4420490860939026, + "step": 2794 + }, + { + "epoch": 0.6443804034582132, + "grad_norm": 1.1282456913511916, + "learning_rate": 1.6219559835062472e-06, + "loss": 0.47733911871910095, + "step": 2795 + }, + { + "epoch": 0.6446109510086455, + "grad_norm": 1.2972747931781763, + "learning_rate": 1.6216574633192019e-06, + "loss": 0.5379013419151306, + "step": 2796 + }, + { + "epoch": 0.6448414985590778, + "grad_norm": 1.488376687235578, + "learning_rate": 1.6213588528117941e-06, + "loss": 0.48056793212890625, + "step": 2797 + }, + { + "epoch": 0.6450720461095101, + "grad_norm": 1.314531005053451, + "learning_rate": 1.6210601520274088e-06, + "loss": 0.6051667332649231, + "step": 2798 + }, + { + "epoch": 0.6453025936599424, + "grad_norm": 1.3389075396682297, + "learning_rate": 1.620761361009444e-06, + "loss": 0.5118743777275085, + "step": 2799 + }, + { + "epoch": 0.6455331412103746, + "grad_norm": 1.1684233871088228, + "learning_rate": 1.6204624798013113e-06, + "loss": 0.5176658630371094, + "step": 2800 + }, + { + "epoch": 0.6457636887608069, + "grad_norm": 1.1942250008445177, + "learning_rate": 1.6201635084464346e-06, + "loss": 0.4896622896194458, + "step": 2801 + }, + { + "epoch": 0.6459942363112392, + "grad_norm": 1.203968558366863, + "learning_rate": 1.619864446988252e-06, + "loss": 0.5606796741485596, + "step": 2802 + }, + { + "epoch": 0.6462247838616715, + "grad_norm": 1.1802584853211424, + "learning_rate": 1.6195652954702129e-06, + "loss": 0.4668291509151459, + "step": 2803 + }, + { + "epoch": 0.6464553314121038, + "grad_norm": 1.419071418109651, + "learning_rate": 1.619266053935782e-06, + "loss": 0.6159840822219849, + "step": 2804 + }, + { + "epoch": 0.646685878962536, + "grad_norm": 1.3809858964710453, + "learning_rate": 1.6189667224284355e-06, + "loss": 0.5556408166885376, + "step": 2805 + }, + { + "epoch": 0.6469164265129683, + "grad_norm": 1.3502640253584333, + "learning_rate": 1.6186673009916634e-06, + "loss": 0.4046534299850464, + "step": 2806 + }, + { + "epoch": 0.6471469740634006, + "grad_norm": 1.2457573044005343, + "learning_rate": 1.618367789668968e-06, + "loss": 0.5278058052062988, + "step": 2807 + }, + { + "epoch": 0.6473775216138329, + "grad_norm": 1.5332124630047532, + "learning_rate": 1.6180681885038656e-06, + "loss": 0.4232120215892792, + "step": 2808 + }, + { + "epoch": 0.6476080691642652, + "grad_norm": 1.0482270723606208, + "learning_rate": 1.617768497539885e-06, + "loss": 0.4960458278656006, + "step": 2809 + }, + { + "epoch": 0.6478386167146974, + "grad_norm": 1.4950274929002954, + "learning_rate": 1.6174687168205685e-06, + "loss": 0.49290311336517334, + "step": 2810 + }, + { + "epoch": 0.6480691642651297, + "grad_norm": 1.0907019420926827, + "learning_rate": 1.6171688463894706e-06, + "loss": 0.49928852915763855, + "step": 2811 + }, + { + "epoch": 0.648299711815562, + "grad_norm": 1.1125971886307398, + "learning_rate": 1.6168688862901597e-06, + "loss": 0.5087406039237976, + "step": 2812 + }, + { + "epoch": 0.6485302593659943, + "grad_norm": 1.1525453732948439, + "learning_rate": 1.616568836566217e-06, + "loss": 0.5653507113456726, + "step": 2813 + }, + { + "epoch": 0.6487608069164265, + "grad_norm": 1.2308519470610553, + "learning_rate": 1.6162686972612361e-06, + "loss": 0.5233205556869507, + "step": 2814 + }, + { + "epoch": 0.6489913544668587, + "grad_norm": 1.173802957785401, + "learning_rate": 1.6159684684188242e-06, + "loss": 0.4954048991203308, + "step": 2815 + }, + { + "epoch": 0.649221902017291, + "grad_norm": 1.6344259227585325, + "learning_rate": 1.6156681500826022e-06, + "loss": 0.5647044777870178, + "step": 2816 + }, + { + "epoch": 0.6494524495677233, + "grad_norm": 1.3909620583545594, + "learning_rate": 1.6153677422962022e-06, + "loss": 0.5182117223739624, + "step": 2817 + }, + { + "epoch": 0.6496829971181556, + "grad_norm": 1.4001152309390272, + "learning_rate": 1.615067245103271e-06, + "loss": 0.47642621397972107, + "step": 2818 + }, + { + "epoch": 0.6499135446685879, + "grad_norm": 1.2874703217432304, + "learning_rate": 1.6147666585474672e-06, + "loss": 0.4365708827972412, + "step": 2819 + }, + { + "epoch": 0.6501440922190201, + "grad_norm": 1.2119512472522103, + "learning_rate": 1.6144659826724635e-06, + "loss": 0.46274715662002563, + "step": 2820 + }, + { + "epoch": 0.6503746397694524, + "grad_norm": 1.1857173604007696, + "learning_rate": 1.6141652175219447e-06, + "loss": 0.3781696557998657, + "step": 2821 + }, + { + "epoch": 0.6506051873198847, + "grad_norm": 1.1820429620454833, + "learning_rate": 1.613864363139609e-06, + "loss": 0.46696609258651733, + "step": 2822 + }, + { + "epoch": 0.650835734870317, + "grad_norm": 1.3911928241305225, + "learning_rate": 1.6135634195691668e-06, + "loss": 0.49795544147491455, + "step": 2823 + }, + { + "epoch": 0.6510662824207493, + "grad_norm": 1.3162254980705577, + "learning_rate": 1.6132623868543424e-06, + "loss": 0.43032360076904297, + "step": 2824 + }, + { + "epoch": 0.6512968299711815, + "grad_norm": 1.4132619120351104, + "learning_rate": 1.6129612650388734e-06, + "loss": 0.4829779267311096, + "step": 2825 + }, + { + "epoch": 0.6515273775216138, + "grad_norm": 1.4598202738444805, + "learning_rate": 1.6126600541665089e-06, + "loss": 0.4899333119392395, + "step": 2826 + }, + { + "epoch": 0.6517579250720461, + "grad_norm": 1.458031941277911, + "learning_rate": 1.6123587542810118e-06, + "loss": 0.5031615495681763, + "step": 2827 + }, + { + "epoch": 0.6519884726224784, + "grad_norm": 1.2733901426169307, + "learning_rate": 1.6120573654261578e-06, + "loss": 0.4357362985610962, + "step": 2828 + }, + { + "epoch": 0.6522190201729107, + "grad_norm": 1.2220684621989164, + "learning_rate": 1.611755887645736e-06, + "loss": 0.5158397555351257, + "step": 2829 + }, + { + "epoch": 0.652449567723343, + "grad_norm": 1.4578040902897245, + "learning_rate": 1.6114543209835476e-06, + "loss": 0.5768516063690186, + "step": 2830 + }, + { + "epoch": 0.6526801152737752, + "grad_norm": 1.2943410970497278, + "learning_rate": 1.611152665483407e-06, + "loss": 0.5171727538108826, + "step": 2831 + }, + { + "epoch": 0.6529106628242075, + "grad_norm": 1.359610619679167, + "learning_rate": 1.6108509211891419e-06, + "loss": 0.508929967880249, + "step": 2832 + }, + { + "epoch": 0.6531412103746398, + "grad_norm": 1.2987632103127786, + "learning_rate": 1.6105490881445926e-06, + "loss": 0.49480926990509033, + "step": 2833 + }, + { + "epoch": 0.6533717579250721, + "grad_norm": 1.4007906850413607, + "learning_rate": 1.6102471663936125e-06, + "loss": 0.5405118465423584, + "step": 2834 + }, + { + "epoch": 0.6536023054755044, + "grad_norm": 1.4214264035517046, + "learning_rate": 1.6099451559800671e-06, + "loss": 0.4784564971923828, + "step": 2835 + }, + { + "epoch": 0.6538328530259366, + "grad_norm": 1.2317082135715491, + "learning_rate": 1.6096430569478355e-06, + "loss": 0.4536136984825134, + "step": 2836 + }, + { + "epoch": 0.6540634005763689, + "grad_norm": 1.2316225701677532, + "learning_rate": 1.60934086934081e-06, + "loss": 0.5488549470901489, + "step": 2837 + }, + { + "epoch": 0.6542939481268012, + "grad_norm": 1.2426165303182712, + "learning_rate": 1.6090385932028948e-06, + "loss": 0.4609632194042206, + "step": 2838 + }, + { + "epoch": 0.6545244956772335, + "grad_norm": 1.3762329881463276, + "learning_rate": 1.608736228578008e-06, + "loss": 0.523021936416626, + "step": 2839 + }, + { + "epoch": 0.6547550432276658, + "grad_norm": 1.1802127861575282, + "learning_rate": 1.6084337755100794e-06, + "loss": 0.47583144903182983, + "step": 2840 + }, + { + "epoch": 0.654985590778098, + "grad_norm": 1.2693471084083858, + "learning_rate": 1.608131234043053e-06, + "loss": 0.49806085228919983, + "step": 2841 + }, + { + "epoch": 0.6552161383285302, + "grad_norm": 1.3784595470681247, + "learning_rate": 1.6078286042208843e-06, + "loss": 0.46162575483322144, + "step": 2842 + }, + { + "epoch": 0.6554466858789625, + "grad_norm": 1.4479182061912272, + "learning_rate": 1.6075258860875425e-06, + "loss": 0.5477081537246704, + "step": 2843 + }, + { + "epoch": 0.6556772334293948, + "grad_norm": 1.5130797283302535, + "learning_rate": 1.6072230796870092e-06, + "loss": 0.5085225105285645, + "step": 2844 + }, + { + "epoch": 0.655907780979827, + "grad_norm": 1.2647298148528885, + "learning_rate": 1.6069201850632798e-06, + "loss": 0.5255313515663147, + "step": 2845 + }, + { + "epoch": 0.6561383285302593, + "grad_norm": 1.413414772362052, + "learning_rate": 1.6066172022603607e-06, + "loss": 0.5655765533447266, + "step": 2846 + }, + { + "epoch": 0.6563688760806916, + "grad_norm": 1.458278918754854, + "learning_rate": 1.606314131322273e-06, + "loss": 0.5228176116943359, + "step": 2847 + }, + { + "epoch": 0.6565994236311239, + "grad_norm": 1.287040721080716, + "learning_rate": 1.606010972293049e-06, + "loss": 0.46039047837257385, + "step": 2848 + }, + { + "epoch": 0.6568299711815562, + "grad_norm": 1.1829978641835537, + "learning_rate": 1.6057077252167353e-06, + "loss": 0.4958561062812805, + "step": 2849 + }, + { + "epoch": 0.6570605187319885, + "grad_norm": 1.2609401742374116, + "learning_rate": 1.60540439013739e-06, + "loss": 0.4093541204929352, + "step": 2850 + }, + { + "epoch": 0.6572910662824207, + "grad_norm": 1.3273683463000963, + "learning_rate": 1.605100967099085e-06, + "loss": 0.554291844367981, + "step": 2851 + }, + { + "epoch": 0.657521613832853, + "grad_norm": 1.6129391874110828, + "learning_rate": 1.6047974561459037e-06, + "loss": 0.5679658055305481, + "step": 2852 + }, + { + "epoch": 0.6577521613832853, + "grad_norm": 1.2433466350266238, + "learning_rate": 1.6044938573219438e-06, + "loss": 0.5162447094917297, + "step": 2853 + }, + { + "epoch": 0.6579827089337176, + "grad_norm": 1.3792495026872826, + "learning_rate": 1.6041901706713149e-06, + "loss": 0.5094351172447205, + "step": 2854 + }, + { + "epoch": 0.6582132564841499, + "grad_norm": 1.4358897989440063, + "learning_rate": 1.6038863962381397e-06, + "loss": 0.5806282758712769, + "step": 2855 + }, + { + "epoch": 0.6584438040345821, + "grad_norm": 1.1262191223686615, + "learning_rate": 1.6035825340665528e-06, + "loss": 0.4678384065628052, + "step": 2856 + }, + { + "epoch": 0.6586743515850144, + "grad_norm": 1.4869165644593196, + "learning_rate": 1.6032785842007028e-06, + "loss": 0.4143972396850586, + "step": 2857 + }, + { + "epoch": 0.6589048991354467, + "grad_norm": 1.1375175731769085, + "learning_rate": 1.6029745466847505e-06, + "loss": 0.42894694209098816, + "step": 2858 + }, + { + "epoch": 0.659135446685879, + "grad_norm": 1.3380971361263998, + "learning_rate": 1.602670421562869e-06, + "loss": 0.47713708877563477, + "step": 2859 + }, + { + "epoch": 0.6593659942363113, + "grad_norm": 1.2246962047118364, + "learning_rate": 1.6023662088792445e-06, + "loss": 0.5422366261482239, + "step": 2860 + }, + { + "epoch": 0.6595965417867435, + "grad_norm": 1.1327420643327901, + "learning_rate": 1.6020619086780767e-06, + "loss": 0.4384247660636902, + "step": 2861 + }, + { + "epoch": 0.6598270893371758, + "grad_norm": 1.2661736589224049, + "learning_rate": 1.6017575210035764e-06, + "loss": 0.5371978878974915, + "step": 2862 + }, + { + "epoch": 0.6600576368876081, + "grad_norm": 1.3390128666305818, + "learning_rate": 1.601453045899968e-06, + "loss": 0.6491122245788574, + "step": 2863 + }, + { + "epoch": 0.6602881844380404, + "grad_norm": 1.1591928446006092, + "learning_rate": 1.6011484834114893e-06, + "loss": 0.4868931770324707, + "step": 2864 + }, + { + "epoch": 0.6605187319884727, + "grad_norm": 1.3765898164637491, + "learning_rate": 1.600843833582389e-06, + "loss": 0.41603779792785645, + "step": 2865 + }, + { + "epoch": 0.6607492795389049, + "grad_norm": 1.1922285846357408, + "learning_rate": 1.6005390964569308e-06, + "loss": 0.47878536581993103, + "step": 2866 + }, + { + "epoch": 0.6609798270893372, + "grad_norm": 1.164643681870199, + "learning_rate": 1.6002342720793888e-06, + "loss": 0.4476701021194458, + "step": 2867 + }, + { + "epoch": 0.6612103746397695, + "grad_norm": 1.2582254976557383, + "learning_rate": 1.599929360494051e-06, + "loss": 0.48599839210510254, + "step": 2868 + }, + { + "epoch": 0.6614409221902017, + "grad_norm": 1.4832886173859434, + "learning_rate": 1.5996243617452184e-06, + "loss": 0.4305136203765869, + "step": 2869 + }, + { + "epoch": 0.661671469740634, + "grad_norm": 1.3451524114419875, + "learning_rate": 1.5993192758772036e-06, + "loss": 0.5310144424438477, + "step": 2870 + }, + { + "epoch": 0.6619020172910662, + "grad_norm": 1.3551281861732154, + "learning_rate": 1.599014102934333e-06, + "loss": 0.5530884265899658, + "step": 2871 + }, + { + "epoch": 0.6621325648414985, + "grad_norm": 1.5490376838312165, + "learning_rate": 1.5987088429609442e-06, + "loss": 0.5356169939041138, + "step": 2872 + }, + { + "epoch": 0.6623631123919308, + "grad_norm": 1.1305826916001416, + "learning_rate": 1.5984034960013887e-06, + "loss": 0.469211220741272, + "step": 2873 + }, + { + "epoch": 0.6625936599423631, + "grad_norm": 1.2389323700358805, + "learning_rate": 1.5980980621000305e-06, + "loss": 0.4920649528503418, + "step": 2874 + }, + { + "epoch": 0.6628242074927954, + "grad_norm": 1.361901936332142, + "learning_rate": 1.5977925413012461e-06, + "loss": 0.5592747330665588, + "step": 2875 + }, + { + "epoch": 0.6630547550432276, + "grad_norm": 1.0963319699906886, + "learning_rate": 1.5974869336494239e-06, + "loss": 0.4942903220653534, + "step": 2876 + }, + { + "epoch": 0.6632853025936599, + "grad_norm": 1.2490208016607074, + "learning_rate": 1.5971812391889654e-06, + "loss": 0.5296880006790161, + "step": 2877 + }, + { + "epoch": 0.6635158501440922, + "grad_norm": 1.4126429356832046, + "learning_rate": 1.5968754579642855e-06, + "loss": 0.5113412737846375, + "step": 2878 + }, + { + "epoch": 0.6637463976945245, + "grad_norm": 1.221750609950105, + "learning_rate": 1.596569590019811e-06, + "loss": 0.5092888474464417, + "step": 2879 + }, + { + "epoch": 0.6639769452449568, + "grad_norm": 1.1209556874218505, + "learning_rate": 1.5962636353999806e-06, + "loss": 0.4685785472393036, + "step": 2880 + }, + { + "epoch": 0.664207492795389, + "grad_norm": 1.1860834020143705, + "learning_rate": 1.595957594149247e-06, + "loss": 0.5484071969985962, + "step": 2881 + }, + { + "epoch": 0.6644380403458213, + "grad_norm": 1.3098494571724633, + "learning_rate": 1.5956514663120748e-06, + "loss": 0.5027823448181152, + "step": 2882 + }, + { + "epoch": 0.6646685878962536, + "grad_norm": 1.2663519411792166, + "learning_rate": 1.5953452519329407e-06, + "loss": 0.509772539138794, + "step": 2883 + }, + { + "epoch": 0.6648991354466859, + "grad_norm": 1.1995137267951683, + "learning_rate": 1.5950389510563346e-06, + "loss": 0.5210834741592407, + "step": 2884 + }, + { + "epoch": 0.6651296829971182, + "grad_norm": 1.3471404101864493, + "learning_rate": 1.5947325637267594e-06, + "loss": 0.5124255418777466, + "step": 2885 + }, + { + "epoch": 0.6653602305475504, + "grad_norm": 1.3751318102582792, + "learning_rate": 1.594426089988729e-06, + "loss": 0.5071109533309937, + "step": 2886 + }, + { + "epoch": 0.6655907780979827, + "grad_norm": 1.2999435524473473, + "learning_rate": 1.5941195298867723e-06, + "loss": 0.468585342168808, + "step": 2887 + }, + { + "epoch": 0.665821325648415, + "grad_norm": 1.1643123580379142, + "learning_rate": 1.5938128834654276e-06, + "loss": 0.47526606917381287, + "step": 2888 + }, + { + "epoch": 0.6660518731988473, + "grad_norm": 0.9943492283337346, + "learning_rate": 1.5935061507692486e-06, + "loss": 0.49675557017326355, + "step": 2889 + }, + { + "epoch": 0.6662824207492796, + "grad_norm": 1.1709047337470004, + "learning_rate": 1.5931993318428002e-06, + "loss": 0.4787757396697998, + "step": 2890 + }, + { + "epoch": 0.6665129682997118, + "grad_norm": 1.3981208786435964, + "learning_rate": 1.5928924267306594e-06, + "loss": 0.5428439974784851, + "step": 2891 + }, + { + "epoch": 0.6667435158501441, + "grad_norm": 1.3700152135370784, + "learning_rate": 1.5925854354774168e-06, + "loss": 0.47622811794281006, + "step": 2892 + }, + { + "epoch": 0.6669740634005764, + "grad_norm": 1.2018111138110872, + "learning_rate": 1.592278358127675e-06, + "loss": 0.5330455899238586, + "step": 2893 + }, + { + "epoch": 0.6672046109510087, + "grad_norm": 1.2085315359118407, + "learning_rate": 1.5919711947260492e-06, + "loss": 0.5267072916030884, + "step": 2894 + }, + { + "epoch": 0.667435158501441, + "grad_norm": 1.403057971845759, + "learning_rate": 1.5916639453171672e-06, + "loss": 0.5285246968269348, + "step": 2895 + }, + { + "epoch": 0.6676657060518733, + "grad_norm": 1.2487314723582965, + "learning_rate": 1.5913566099456686e-06, + "loss": 0.41800087690353394, + "step": 2896 + }, + { + "epoch": 0.6678962536023054, + "grad_norm": 1.2515913608592821, + "learning_rate": 1.5910491886562062e-06, + "loss": 0.5241307616233826, + "step": 2897 + }, + { + "epoch": 0.6681268011527377, + "grad_norm": 1.446448879574996, + "learning_rate": 1.5907416814934456e-06, + "loss": 0.4888567328453064, + "step": 2898 + }, + { + "epoch": 0.66835734870317, + "grad_norm": 1.07101179258961, + "learning_rate": 1.590434088502064e-06, + "loss": 0.3744029104709625, + "step": 2899 + }, + { + "epoch": 0.6685878962536023, + "grad_norm": 1.2925871141386034, + "learning_rate": 1.5901264097267513e-06, + "loss": 0.6257427334785461, + "step": 2900 + }, + { + "epoch": 0.6688184438040345, + "grad_norm": 1.2413020944843032, + "learning_rate": 1.58981864521221e-06, + "loss": 0.4519440233707428, + "step": 2901 + }, + { + "epoch": 0.6690489913544668, + "grad_norm": 1.2299997137786967, + "learning_rate": 1.589510795003156e-06, + "loss": 0.3806610107421875, + "step": 2902 + }, + { + "epoch": 0.6692795389048991, + "grad_norm": 1.291805154904489, + "learning_rate": 1.5892028591443154e-06, + "loss": 0.5493309497833252, + "step": 2903 + }, + { + "epoch": 0.6695100864553314, + "grad_norm": 1.2160139328707327, + "learning_rate": 1.5888948376804291e-06, + "loss": 0.5231510400772095, + "step": 2904 + }, + { + "epoch": 0.6697406340057637, + "grad_norm": 1.653045730376608, + "learning_rate": 1.5885867306562488e-06, + "loss": 0.47482627630233765, + "step": 2905 + }, + { + "epoch": 0.669971181556196, + "grad_norm": 1.2801554509870998, + "learning_rate": 1.5882785381165396e-06, + "loss": 0.5615172386169434, + "step": 2906 + }, + { + "epoch": 0.6702017291066282, + "grad_norm": 1.4220510811597196, + "learning_rate": 1.5879702601060788e-06, + "loss": 0.610340416431427, + "step": 2907 + }, + { + "epoch": 0.6704322766570605, + "grad_norm": 1.3550154221756145, + "learning_rate": 1.5876618966696553e-06, + "loss": 0.5085784196853638, + "step": 2908 + }, + { + "epoch": 0.6706628242074928, + "grad_norm": 1.1827008952943212, + "learning_rate": 1.5873534478520716e-06, + "loss": 0.4580487906932831, + "step": 2909 + }, + { + "epoch": 0.6708933717579251, + "grad_norm": 1.5359854157211643, + "learning_rate": 1.5870449136981425e-06, + "loss": 0.5045263767242432, + "step": 2910 + }, + { + "epoch": 0.6711239193083574, + "grad_norm": 1.284237656199812, + "learning_rate": 1.5867362942526938e-06, + "loss": 0.5317284464836121, + "step": 2911 + }, + { + "epoch": 0.6713544668587896, + "grad_norm": 1.4099609003610267, + "learning_rate": 1.5864275895605658e-06, + "loss": 0.430474191904068, + "step": 2912 + }, + { + "epoch": 0.6715850144092219, + "grad_norm": 1.092793379229376, + "learning_rate": 1.5861187996666088e-06, + "loss": 0.4927142262458801, + "step": 2913 + }, + { + "epoch": 0.6718155619596542, + "grad_norm": 1.475708068624904, + "learning_rate": 1.585809924615688e-06, + "loss": 0.5057426691055298, + "step": 2914 + }, + { + "epoch": 0.6720461095100865, + "grad_norm": 1.1048383600570364, + "learning_rate": 1.5855009644526792e-06, + "loss": 0.37006527185440063, + "step": 2915 + }, + { + "epoch": 0.6722766570605188, + "grad_norm": 1.226294106349504, + "learning_rate": 1.585191919222471e-06, + "loss": 0.48783624172210693, + "step": 2916 + }, + { + "epoch": 0.672507204610951, + "grad_norm": 1.4872973610774505, + "learning_rate": 1.5848827889699646e-06, + "loss": 0.49577081203460693, + "step": 2917 + }, + { + "epoch": 0.6727377521613833, + "grad_norm": 1.3646021511618893, + "learning_rate": 1.5845735737400732e-06, + "loss": 0.5596980452537537, + "step": 2918 + }, + { + "epoch": 0.6729682997118156, + "grad_norm": 1.5361721176693865, + "learning_rate": 1.5842642735777228e-06, + "loss": 0.3920667767524719, + "step": 2919 + }, + { + "epoch": 0.6731988472622479, + "grad_norm": 1.4567898143159288, + "learning_rate": 1.5839548885278515e-06, + "loss": 0.6292526721954346, + "step": 2920 + }, + { + "epoch": 0.6734293948126802, + "grad_norm": 1.2680110772754039, + "learning_rate": 1.5836454186354098e-06, + "loss": 0.4730093479156494, + "step": 2921 + }, + { + "epoch": 0.6736599423631124, + "grad_norm": 1.2495291848198702, + "learning_rate": 1.58333586394536e-06, + "loss": 0.48224174976348877, + "step": 2922 + }, + { + "epoch": 0.6738904899135447, + "grad_norm": 1.325720545623316, + "learning_rate": 1.5830262245026778e-06, + "loss": 0.5028641223907471, + "step": 2923 + }, + { + "epoch": 0.6741210374639769, + "grad_norm": 1.3175381512862607, + "learning_rate": 1.58271650035235e-06, + "loss": 0.5031313300132751, + "step": 2924 + }, + { + "epoch": 0.6743515850144092, + "grad_norm": 1.5161079858248256, + "learning_rate": 1.582406691539377e-06, + "loss": 0.584574818611145, + "step": 2925 + }, + { + "epoch": 0.6745821325648415, + "grad_norm": 1.2461318316468335, + "learning_rate": 1.5820967981087701e-06, + "loss": 0.46639564633369446, + "step": 2926 + }, + { + "epoch": 0.6748126801152737, + "grad_norm": 1.4880257215555335, + "learning_rate": 1.5817868201055537e-06, + "loss": 0.45523303747177124, + "step": 2927 + }, + { + "epoch": 0.675043227665706, + "grad_norm": 1.3102261377548885, + "learning_rate": 1.5814767575747647e-06, + "loss": 0.48584091663360596, + "step": 2928 + }, + { + "epoch": 0.6752737752161383, + "grad_norm": 1.3040040020796448, + "learning_rate": 1.5811666105614518e-06, + "loss": 0.43126803636550903, + "step": 2929 + }, + { + "epoch": 0.6755043227665706, + "grad_norm": 1.25431891535012, + "learning_rate": 1.5808563791106761e-06, + "loss": 0.4430522322654724, + "step": 2930 + }, + { + "epoch": 0.6757348703170029, + "grad_norm": 1.1852464321668217, + "learning_rate": 1.580546063267511e-06, + "loss": 0.5478621125221252, + "step": 2931 + }, + { + "epoch": 0.6759654178674351, + "grad_norm": 1.155223214055886, + "learning_rate": 1.5802356630770425e-06, + "loss": 0.4092339873313904, + "step": 2932 + }, + { + "epoch": 0.6761959654178674, + "grad_norm": 1.1811158393108148, + "learning_rate": 1.5799251785843682e-06, + "loss": 0.4377528727054596, + "step": 2933 + }, + { + "epoch": 0.6764265129682997, + "grad_norm": 1.3770671734713864, + "learning_rate": 1.579614609834598e-06, + "loss": 0.5447462797164917, + "step": 2934 + }, + { + "epoch": 0.676657060518732, + "grad_norm": 1.58389651828821, + "learning_rate": 1.5793039568728546e-06, + "loss": 0.5153322219848633, + "step": 2935 + }, + { + "epoch": 0.6768876080691643, + "grad_norm": 1.3626759798711972, + "learning_rate": 1.578993219744273e-06, + "loss": 0.5152921676635742, + "step": 2936 + }, + { + "epoch": 0.6771181556195965, + "grad_norm": 1.4530601678970734, + "learning_rate": 1.5786823984939995e-06, + "loss": 0.451922208070755, + "step": 2937 + }, + { + "epoch": 0.6773487031700288, + "grad_norm": 1.1810123065006097, + "learning_rate": 1.5783714931671933e-06, + "loss": 0.4027182459831238, + "step": 2938 + }, + { + "epoch": 0.6775792507204611, + "grad_norm": 1.2545756749048065, + "learning_rate": 1.578060503809026e-06, + "loss": 0.5972239971160889, + "step": 2939 + }, + { + "epoch": 0.6778097982708934, + "grad_norm": 1.3351852794948187, + "learning_rate": 1.5777494304646807e-06, + "loss": 0.4376659691333771, + "step": 2940 + }, + { + "epoch": 0.6780403458213257, + "grad_norm": 1.6381068625585637, + "learning_rate": 1.5774382731793537e-06, + "loss": 0.5066968202590942, + "step": 2941 + }, + { + "epoch": 0.6782708933717579, + "grad_norm": 1.233959474935461, + "learning_rate": 1.5771270319982523e-06, + "loss": 0.47554099559783936, + "step": 2942 + }, + { + "epoch": 0.6785014409221902, + "grad_norm": 1.3592025231702105, + "learning_rate": 1.576815706966597e-06, + "loss": 0.4573550522327423, + "step": 2943 + }, + { + "epoch": 0.6787319884726225, + "grad_norm": 1.2899141824371814, + "learning_rate": 1.57650429812962e-06, + "loss": 0.498090922832489, + "step": 2944 + }, + { + "epoch": 0.6789625360230548, + "grad_norm": 1.439288051511995, + "learning_rate": 1.5761928055325657e-06, + "loss": 0.3874198794364929, + "step": 2945 + }, + { + "epoch": 0.6791930835734871, + "grad_norm": 1.3892308861229652, + "learning_rate": 1.5758812292206906e-06, + "loss": 0.5814070701599121, + "step": 2946 + }, + { + "epoch": 0.6794236311239193, + "grad_norm": 1.1114806158765154, + "learning_rate": 1.5755695692392642e-06, + "loss": 0.4853003919124603, + "step": 2947 + }, + { + "epoch": 0.6796541786743516, + "grad_norm": 1.4749591273474632, + "learning_rate": 1.5752578256335664e-06, + "loss": 0.5008170008659363, + "step": 2948 + }, + { + "epoch": 0.6798847262247839, + "grad_norm": 1.2399062484261723, + "learning_rate": 1.5749459984488912e-06, + "loss": 0.5139249563217163, + "step": 2949 + }, + { + "epoch": 0.6801152737752162, + "grad_norm": 1.3293402007434927, + "learning_rate": 1.574634087730543e-06, + "loss": 0.5634652376174927, + "step": 2950 + }, + { + "epoch": 0.6803458213256485, + "grad_norm": 1.500163790638001, + "learning_rate": 1.57432209352384e-06, + "loss": 0.5859615802764893, + "step": 2951 + }, + { + "epoch": 0.6805763688760806, + "grad_norm": 1.5412120263580897, + "learning_rate": 1.5740100158741112e-06, + "loss": 0.5175807476043701, + "step": 2952 + }, + { + "epoch": 0.6808069164265129, + "grad_norm": 1.3090445672867828, + "learning_rate": 1.5736978548266988e-06, + "loss": 0.4430953860282898, + "step": 2953 + }, + { + "epoch": 0.6810374639769452, + "grad_norm": 1.2865088532644573, + "learning_rate": 1.5733856104269556e-06, + "loss": 0.4908146262168884, + "step": 2954 + }, + { + "epoch": 0.6812680115273775, + "grad_norm": 1.810302965226072, + "learning_rate": 1.5730732827202483e-06, + "loss": 0.5244361162185669, + "step": 2955 + }, + { + "epoch": 0.6814985590778098, + "grad_norm": 1.3563775580730366, + "learning_rate": 1.5727608717519544e-06, + "loss": 0.4997614026069641, + "step": 2956 + }, + { + "epoch": 0.681729106628242, + "grad_norm": 1.331712876030176, + "learning_rate": 1.5724483775674644e-06, + "loss": 0.4843396544456482, + "step": 2957 + }, + { + "epoch": 0.6819596541786743, + "grad_norm": 1.2508340904890844, + "learning_rate": 1.5721358002121799e-06, + "loss": 0.4678846001625061, + "step": 2958 + }, + { + "epoch": 0.6821902017291066, + "grad_norm": 1.5103932364725328, + "learning_rate": 1.5718231397315158e-06, + "loss": 0.626168966293335, + "step": 2959 + }, + { + "epoch": 0.6824207492795389, + "grad_norm": 1.2990874275627162, + "learning_rate": 1.5715103961708977e-06, + "loss": 0.43531563878059387, + "step": 2960 + }, + { + "epoch": 0.6826512968299712, + "grad_norm": 1.2463918257107651, + "learning_rate": 1.5711975695757648e-06, + "loss": 0.510450005531311, + "step": 2961 + }, + { + "epoch": 0.6828818443804034, + "grad_norm": 1.3271181664609262, + "learning_rate": 1.5708846599915666e-06, + "loss": 0.5074174404144287, + "step": 2962 + }, + { + "epoch": 0.6831123919308357, + "grad_norm": 1.3176146899484875, + "learning_rate": 1.5705716674637663e-06, + "loss": 0.5755868554115295, + "step": 2963 + }, + { + "epoch": 0.683342939481268, + "grad_norm": 1.3499337753621126, + "learning_rate": 1.5702585920378384e-06, + "loss": 0.5092858672142029, + "step": 2964 + }, + { + "epoch": 0.6835734870317003, + "grad_norm": 1.1765613552448693, + "learning_rate": 1.569945433759269e-06, + "loss": 0.4780580997467041, + "step": 2965 + }, + { + "epoch": 0.6838040345821326, + "grad_norm": 1.3312217595628797, + "learning_rate": 1.5696321926735577e-06, + "loss": 0.46418172121047974, + "step": 2966 + }, + { + "epoch": 0.6840345821325649, + "grad_norm": 1.4402952016719341, + "learning_rate": 1.569318868826214e-06, + "loss": 0.49936625361442566, + "step": 2967 + }, + { + "epoch": 0.6842651296829971, + "grad_norm": 1.4586611460279373, + "learning_rate": 1.5690054622627613e-06, + "loss": 0.3950899839401245, + "step": 2968 + }, + { + "epoch": 0.6844956772334294, + "grad_norm": 1.6977188097354168, + "learning_rate": 1.5686919730287342e-06, + "loss": 0.662026047706604, + "step": 2969 + }, + { + "epoch": 0.6847262247838617, + "grad_norm": 1.244533695524091, + "learning_rate": 1.5683784011696797e-06, + "loss": 0.5297297835350037, + "step": 2970 + }, + { + "epoch": 0.684956772334294, + "grad_norm": 1.320167274856565, + "learning_rate": 1.5680647467311555e-06, + "loss": 0.5406937599182129, + "step": 2971 + }, + { + "epoch": 0.6851873198847263, + "grad_norm": 1.2468519195560397, + "learning_rate": 1.5677510097587339e-06, + "loss": 0.5149558782577515, + "step": 2972 + }, + { + "epoch": 0.6854178674351585, + "grad_norm": 1.6834143469534373, + "learning_rate": 1.5674371902979964e-06, + "loss": 0.5197988748550415, + "step": 2973 + }, + { + "epoch": 0.6856484149855908, + "grad_norm": 1.350490097294227, + "learning_rate": 1.567123288394538e-06, + "loss": 0.4733935594558716, + "step": 2974 + }, + { + "epoch": 0.6858789625360231, + "grad_norm": 1.4037154754911856, + "learning_rate": 1.5668093040939657e-06, + "loss": 0.49029862880706787, + "step": 2975 + }, + { + "epoch": 0.6861095100864554, + "grad_norm": 1.4125916913384953, + "learning_rate": 1.5664952374418977e-06, + "loss": 0.509502112865448, + "step": 2976 + }, + { + "epoch": 0.6863400576368877, + "grad_norm": 1.3378380848439302, + "learning_rate": 1.5661810884839647e-06, + "loss": 0.5216405391693115, + "step": 2977 + }, + { + "epoch": 0.6865706051873199, + "grad_norm": 1.7407414962930694, + "learning_rate": 1.5658668572658098e-06, + "loss": 0.5375246405601501, + "step": 2978 + }, + { + "epoch": 0.6868011527377521, + "grad_norm": 1.3958219888508585, + "learning_rate": 1.5655525438330868e-06, + "loss": 0.4444112777709961, + "step": 2979 + }, + { + "epoch": 0.6870317002881844, + "grad_norm": 1.4595199598065813, + "learning_rate": 1.5652381482314627e-06, + "loss": 0.5068175792694092, + "step": 2980 + }, + { + "epoch": 0.6872622478386167, + "grad_norm": 1.1364189534845608, + "learning_rate": 1.5649236705066158e-06, + "loss": 0.45731109380722046, + "step": 2981 + }, + { + "epoch": 0.687492795389049, + "grad_norm": 1.4303465111962608, + "learning_rate": 1.5646091107042364e-06, + "loss": 0.5306158661842346, + "step": 2982 + }, + { + "epoch": 0.6877233429394812, + "grad_norm": 1.433927269332233, + "learning_rate": 1.5642944688700261e-06, + "loss": 0.4398651719093323, + "step": 2983 + }, + { + "epoch": 0.6879538904899135, + "grad_norm": 1.3400708801277281, + "learning_rate": 1.5639797450497004e-06, + "loss": 0.5990906953811646, + "step": 2984 + }, + { + "epoch": 0.6881844380403458, + "grad_norm": 1.1626847488175958, + "learning_rate": 1.5636649392889845e-06, + "loss": 0.4668315052986145, + "step": 2985 + }, + { + "epoch": 0.6884149855907781, + "grad_norm": 1.269211368058643, + "learning_rate": 1.5633500516336168e-06, + "loss": 0.4629107713699341, + "step": 2986 + }, + { + "epoch": 0.6886455331412104, + "grad_norm": 1.3353647625587797, + "learning_rate": 1.5630350821293465e-06, + "loss": 0.5427249670028687, + "step": 2987 + }, + { + "epoch": 0.6888760806916426, + "grad_norm": 1.5063565488918145, + "learning_rate": 1.5627200308219361e-06, + "loss": 0.5910916328430176, + "step": 2988 + }, + { + "epoch": 0.6891066282420749, + "grad_norm": 1.21637133241614, + "learning_rate": 1.5624048977571593e-06, + "loss": 0.5487556457519531, + "step": 2989 + }, + { + "epoch": 0.6893371757925072, + "grad_norm": 1.8277563810893047, + "learning_rate": 1.5620896829808014e-06, + "loss": 0.5941481590270996, + "step": 2990 + }, + { + "epoch": 0.6895677233429395, + "grad_norm": 1.2657483365011457, + "learning_rate": 1.5617743865386597e-06, + "loss": 0.5026910305023193, + "step": 2991 + }, + { + "epoch": 0.6897982708933718, + "grad_norm": 1.2728272105438154, + "learning_rate": 1.5614590084765434e-06, + "loss": 0.453794002532959, + "step": 2992 + }, + { + "epoch": 0.690028818443804, + "grad_norm": 1.5162382136032657, + "learning_rate": 1.5611435488402747e-06, + "loss": 0.534416675567627, + "step": 2993 + }, + { + "epoch": 0.6902593659942363, + "grad_norm": 1.3021552826846616, + "learning_rate": 1.5608280076756852e-06, + "loss": 0.5203788876533508, + "step": 2994 + }, + { + "epoch": 0.6904899135446686, + "grad_norm": 1.2974339804272135, + "learning_rate": 1.5605123850286207e-06, + "loss": 0.5063557624816895, + "step": 2995 + }, + { + "epoch": 0.6907204610951009, + "grad_norm": 1.0854825641740868, + "learning_rate": 1.5601966809449372e-06, + "loss": 0.4803149700164795, + "step": 2996 + }, + { + "epoch": 0.6909510086455332, + "grad_norm": 1.4622322271463624, + "learning_rate": 1.5598808954705039e-06, + "loss": 0.5497993230819702, + "step": 2997 + }, + { + "epoch": 0.6911815561959654, + "grad_norm": 1.3566519579664447, + "learning_rate": 1.5595650286512005e-06, + "loss": 0.44242218136787415, + "step": 2998 + }, + { + "epoch": 0.6914121037463977, + "grad_norm": 1.4880341344096335, + "learning_rate": 1.5592490805329197e-06, + "loss": 0.4667905867099762, + "step": 2999 + }, + { + "epoch": 0.69164265129683, + "grad_norm": 1.2275920951045758, + "learning_rate": 1.5589330511615653e-06, + "loss": 0.5017203092575073, + "step": 3000 + }, + { + "epoch": 0.6918731988472623, + "grad_norm": 1.1629025443905772, + "learning_rate": 1.5586169405830528e-06, + "loss": 0.4988882541656494, + "step": 3001 + }, + { + "epoch": 0.6921037463976946, + "grad_norm": 1.2059313879115356, + "learning_rate": 1.5583007488433102e-06, + "loss": 0.48794132471084595, + "step": 3002 + }, + { + "epoch": 0.6923342939481268, + "grad_norm": 1.1972491085590309, + "learning_rate": 1.5579844759882766e-06, + "loss": 0.48638075590133667, + "step": 3003 + }, + { + "epoch": 0.6925648414985591, + "grad_norm": 1.523401545887088, + "learning_rate": 1.5576681220639035e-06, + "loss": 0.5094320178031921, + "step": 3004 + }, + { + "epoch": 0.6927953890489914, + "grad_norm": 1.2878330835344656, + "learning_rate": 1.5573516871161529e-06, + "loss": 0.5538557171821594, + "step": 3005 + }, + { + "epoch": 0.6930259365994237, + "grad_norm": 1.4879801831920034, + "learning_rate": 1.5570351711910007e-06, + "loss": 0.37026524543762207, + "step": 3006 + }, + { + "epoch": 0.6932564841498559, + "grad_norm": 1.118855469869377, + "learning_rate": 1.5567185743344328e-06, + "loss": 0.505375325679779, + "step": 3007 + }, + { + "epoch": 0.6934870317002881, + "grad_norm": 1.3758356333811415, + "learning_rate": 1.5564018965924474e-06, + "loss": 0.5471951961517334, + "step": 3008 + }, + { + "epoch": 0.6937175792507204, + "grad_norm": 1.3946032953876015, + "learning_rate": 1.5560851380110547e-06, + "loss": 0.5329568386077881, + "step": 3009 + }, + { + "epoch": 0.6939481268011527, + "grad_norm": 1.2171394692811979, + "learning_rate": 1.555768298636276e-06, + "loss": 0.4184609651565552, + "step": 3010 + }, + { + "epoch": 0.694178674351585, + "grad_norm": 1.299950019418537, + "learning_rate": 1.5554513785141454e-06, + "loss": 0.4469276964664459, + "step": 3011 + }, + { + "epoch": 0.6944092219020173, + "grad_norm": 1.2624011455255777, + "learning_rate": 1.5551343776907073e-06, + "loss": 0.46018022298812866, + "step": 3012 + }, + { + "epoch": 0.6946397694524495, + "grad_norm": 1.4446192207747934, + "learning_rate": 1.5548172962120195e-06, + "loss": 0.39802467823028564, + "step": 3013 + }, + { + "epoch": 0.6948703170028818, + "grad_norm": 1.6421441785374291, + "learning_rate": 1.5545001341241502e-06, + "loss": 0.5130733847618103, + "step": 3014 + }, + { + "epoch": 0.6951008645533141, + "grad_norm": 1.4874797046908075, + "learning_rate": 1.5541828914731797e-06, + "loss": 0.4477139711380005, + "step": 3015 + }, + { + "epoch": 0.6953314121037464, + "grad_norm": 1.5908123652169408, + "learning_rate": 1.5538655683051998e-06, + "loss": 0.45926767587661743, + "step": 3016 + }, + { + "epoch": 0.6955619596541787, + "grad_norm": 1.2608445606501002, + "learning_rate": 1.553548164666315e-06, + "loss": 0.45936119556427, + "step": 3017 + }, + { + "epoch": 0.6957925072046109, + "grad_norm": 1.254131874779924, + "learning_rate": 1.5532306806026401e-06, + "loss": 0.42640042304992676, + "step": 3018 + }, + { + "epoch": 0.6960230547550432, + "grad_norm": 1.359223674128618, + "learning_rate": 1.5529131161603024e-06, + "loss": 0.545343816280365, + "step": 3019 + }, + { + "epoch": 0.6962536023054755, + "grad_norm": 1.499789251776708, + "learning_rate": 1.5525954713854406e-06, + "loss": 0.55390465259552, + "step": 3020 + }, + { + "epoch": 0.6964841498559078, + "grad_norm": 1.4431952423899748, + "learning_rate": 1.5522777463242056e-06, + "loss": 0.5054730176925659, + "step": 3021 + }, + { + "epoch": 0.6967146974063401, + "grad_norm": 1.3421071937183544, + "learning_rate": 1.5519599410227593e-06, + "loss": 0.5345525741577148, + "step": 3022 + }, + { + "epoch": 0.6969452449567723, + "grad_norm": 1.3873484237085747, + "learning_rate": 1.5516420555272758e-06, + "loss": 0.5678459405899048, + "step": 3023 + }, + { + "epoch": 0.6971757925072046, + "grad_norm": 1.5494993250847158, + "learning_rate": 1.5513240898839398e-06, + "loss": 0.6118779182434082, + "step": 3024 + }, + { + "epoch": 0.6974063400576369, + "grad_norm": 1.4303865941013976, + "learning_rate": 1.551006044138949e-06, + "loss": 0.5693734884262085, + "step": 3025 + }, + { + "epoch": 0.6976368876080692, + "grad_norm": 1.382814555248119, + "learning_rate": 1.5506879183385117e-06, + "loss": 0.47115445137023926, + "step": 3026 + }, + { + "epoch": 0.6978674351585015, + "grad_norm": 1.4819808824760035, + "learning_rate": 1.550369712528849e-06, + "loss": 0.4977574944496155, + "step": 3027 + }, + { + "epoch": 0.6980979827089338, + "grad_norm": 1.345366236747585, + "learning_rate": 1.550051426756192e-06, + "loss": 0.5112085938453674, + "step": 3028 + }, + { + "epoch": 0.698328530259366, + "grad_norm": 1.1673490314186687, + "learning_rate": 1.549733061066785e-06, + "loss": 0.5258888602256775, + "step": 3029 + }, + { + "epoch": 0.6985590778097983, + "grad_norm": 1.3170150520989703, + "learning_rate": 1.5494146155068833e-06, + "loss": 0.4910382926464081, + "step": 3030 + }, + { + "epoch": 0.6987896253602306, + "grad_norm": 1.492317262307736, + "learning_rate": 1.5490960901227532e-06, + "loss": 0.538005530834198, + "step": 3031 + }, + { + "epoch": 0.6990201729106629, + "grad_norm": 1.2798707818585195, + "learning_rate": 1.5487774849606732e-06, + "loss": 0.5382951498031616, + "step": 3032 + }, + { + "epoch": 0.6992507204610952, + "grad_norm": 1.3563181533260928, + "learning_rate": 1.5484588000669333e-06, + "loss": 0.5431716442108154, + "step": 3033 + }, + { + "epoch": 0.6994812680115273, + "grad_norm": 1.4730881654634107, + "learning_rate": 1.5481400354878358e-06, + "loss": 0.5745220184326172, + "step": 3034 + }, + { + "epoch": 0.6997118155619596, + "grad_norm": 1.5039346648575265, + "learning_rate": 1.5478211912696929e-06, + "loss": 0.4814023971557617, + "step": 3035 + }, + { + "epoch": 0.6999423631123919, + "grad_norm": 1.2452001063336693, + "learning_rate": 1.5475022674588298e-06, + "loss": 0.3987554907798767, + "step": 3036 + }, + { + "epoch": 0.7001729106628242, + "grad_norm": 1.2273894324003525, + "learning_rate": 1.5471832641015828e-06, + "loss": 0.4156729578971863, + "step": 3037 + }, + { + "epoch": 0.7004034582132564, + "grad_norm": 1.1620912406102892, + "learning_rate": 1.5468641812442997e-06, + "loss": 0.5085793733596802, + "step": 3038 + }, + { + "epoch": 0.7006340057636887, + "grad_norm": 1.3012231858429548, + "learning_rate": 1.5465450189333403e-06, + "loss": 0.4714711904525757, + "step": 3039 + }, + { + "epoch": 0.700864553314121, + "grad_norm": 1.194325392111808, + "learning_rate": 1.5462257772150751e-06, + "loss": 0.3911649286746979, + "step": 3040 + }, + { + "epoch": 0.7010951008645533, + "grad_norm": 1.5023223698140733, + "learning_rate": 1.5459064561358868e-06, + "loss": 0.6202036142349243, + "step": 3041 + }, + { + "epoch": 0.7013256484149856, + "grad_norm": 1.318488566568559, + "learning_rate": 1.5455870557421692e-06, + "loss": 0.4943694770336151, + "step": 3042 + }, + { + "epoch": 0.7015561959654179, + "grad_norm": 1.4976388100892126, + "learning_rate": 1.5452675760803281e-06, + "loss": 0.5165269374847412, + "step": 3043 + }, + { + "epoch": 0.7017867435158501, + "grad_norm": 1.2655871125899185, + "learning_rate": 1.5449480171967804e-06, + "loss": 0.4172588586807251, + "step": 3044 + }, + { + "epoch": 0.7020172910662824, + "grad_norm": 1.2826628166511769, + "learning_rate": 1.5446283791379551e-06, + "loss": 0.5359724760055542, + "step": 3045 + }, + { + "epoch": 0.7022478386167147, + "grad_norm": 1.4599282692486921, + "learning_rate": 1.5443086619502917e-06, + "loss": 0.4579007625579834, + "step": 3046 + }, + { + "epoch": 0.702478386167147, + "grad_norm": 1.1736042374177846, + "learning_rate": 1.5439888656802423e-06, + "loss": 0.5238107442855835, + "step": 3047 + }, + { + "epoch": 0.7027089337175793, + "grad_norm": 1.3607655039329225, + "learning_rate": 1.5436689903742692e-06, + "loss": 0.4772608280181885, + "step": 3048 + }, + { + "epoch": 0.7029394812680115, + "grad_norm": 1.1308906076124552, + "learning_rate": 1.5433490360788479e-06, + "loss": 0.4986886978149414, + "step": 3049 + }, + { + "epoch": 0.7031700288184438, + "grad_norm": 1.4921821574000111, + "learning_rate": 1.543029002840464e-06, + "loss": 0.505290150642395, + "step": 3050 + }, + { + "epoch": 0.7034005763688761, + "grad_norm": 1.216509889918091, + "learning_rate": 1.5427088907056148e-06, + "loss": 0.44803524017333984, + "step": 3051 + }, + { + "epoch": 0.7036311239193084, + "grad_norm": 1.3045688882387803, + "learning_rate": 1.5423886997208096e-06, + "loss": 0.5130533576011658, + "step": 3052 + }, + { + "epoch": 0.7038616714697407, + "grad_norm": 1.2848001617196636, + "learning_rate": 1.5420684299325686e-06, + "loss": 0.5944375991821289, + "step": 3053 + }, + { + "epoch": 0.7040922190201729, + "grad_norm": 1.4438153026517662, + "learning_rate": 1.541748081387424e-06, + "loss": 0.5240592956542969, + "step": 3054 + }, + { + "epoch": 0.7043227665706052, + "grad_norm": 1.4566838697048774, + "learning_rate": 1.5414276541319185e-06, + "loss": 0.5878169536590576, + "step": 3055 + }, + { + "epoch": 0.7045533141210375, + "grad_norm": 1.389504644244344, + "learning_rate": 1.5411071482126074e-06, + "loss": 0.5841420292854309, + "step": 3056 + }, + { + "epoch": 0.7047838616714698, + "grad_norm": 1.4362185597913253, + "learning_rate": 1.5407865636760566e-06, + "loss": 0.4527501165866852, + "step": 3057 + }, + { + "epoch": 0.7050144092219021, + "grad_norm": 1.38132910863877, + "learning_rate": 1.5404659005688441e-06, + "loss": 0.433325856924057, + "step": 3058 + }, + { + "epoch": 0.7052449567723343, + "grad_norm": 1.3438668709731927, + "learning_rate": 1.5401451589375585e-06, + "loss": 0.5717126131057739, + "step": 3059 + }, + { + "epoch": 0.7054755043227666, + "grad_norm": 1.300530022976824, + "learning_rate": 1.5398243388288002e-06, + "loss": 0.4420049786567688, + "step": 3060 + }, + { + "epoch": 0.7057060518731988, + "grad_norm": 1.3883239483781253, + "learning_rate": 1.5395034402891809e-06, + "loss": 0.42093801498413086, + "step": 3061 + }, + { + "epoch": 0.7059365994236311, + "grad_norm": 1.317256008379363, + "learning_rate": 1.5391824633653245e-06, + "loss": 0.4691120386123657, + "step": 3062 + }, + { + "epoch": 0.7061671469740634, + "grad_norm": 1.571058237037257, + "learning_rate": 1.538861408103865e-06, + "loss": 0.5704994797706604, + "step": 3063 + }, + { + "epoch": 0.7063976945244956, + "grad_norm": 1.6209585177680517, + "learning_rate": 1.5385402745514485e-06, + "loss": 0.5992046594619751, + "step": 3064 + }, + { + "epoch": 0.7066282420749279, + "grad_norm": 1.6228865067081202, + "learning_rate": 1.5382190627547327e-06, + "loss": 0.47539931535720825, + "step": 3065 + }, + { + "epoch": 0.7068587896253602, + "grad_norm": 1.269049568068291, + "learning_rate": 1.5378977727603856e-06, + "loss": 0.5070189237594604, + "step": 3066 + }, + { + "epoch": 0.7070893371757925, + "grad_norm": 1.4075154012090485, + "learning_rate": 1.537576404615088e-06, + "loss": 0.5456782579421997, + "step": 3067 + }, + { + "epoch": 0.7073198847262248, + "grad_norm": 1.245118043630662, + "learning_rate": 1.5372549583655315e-06, + "loss": 0.5040643811225891, + "step": 3068 + }, + { + "epoch": 0.707550432276657, + "grad_norm": 1.2490813291253982, + "learning_rate": 1.536933434058418e-06, + "loss": 0.5937368869781494, + "step": 3069 + }, + { + "epoch": 0.7077809798270893, + "grad_norm": 1.3837089052575293, + "learning_rate": 1.536611831740462e-06, + "loss": 0.5315635800361633, + "step": 3070 + }, + { + "epoch": 0.7080115273775216, + "grad_norm": 1.489313806253537, + "learning_rate": 1.5362901514583896e-06, + "loss": 0.4116753339767456, + "step": 3071 + }, + { + "epoch": 0.7082420749279539, + "grad_norm": 1.4466674401069384, + "learning_rate": 1.535968393258937e-06, + "loss": 0.5934009552001953, + "step": 3072 + }, + { + "epoch": 0.7084726224783862, + "grad_norm": 1.625081313125215, + "learning_rate": 1.535646557188852e-06, + "loss": 0.5851935744285583, + "step": 3073 + }, + { + "epoch": 0.7087031700288184, + "grad_norm": 1.0978038324799, + "learning_rate": 1.5353246432948952e-06, + "loss": 0.4650305211544037, + "step": 3074 + }, + { + "epoch": 0.7089337175792507, + "grad_norm": 1.4075996932378603, + "learning_rate": 1.535002651623836e-06, + "loss": 0.4810240864753723, + "step": 3075 + }, + { + "epoch": 0.709164265129683, + "grad_norm": 1.2812004885082364, + "learning_rate": 1.534680582222457e-06, + "loss": 0.5431346297264099, + "step": 3076 + }, + { + "epoch": 0.7093948126801153, + "grad_norm": 1.3726019599129855, + "learning_rate": 1.534358435137552e-06, + "loss": 0.49898943305015564, + "step": 3077 + }, + { + "epoch": 0.7096253602305476, + "grad_norm": 2.2531586386992926, + "learning_rate": 1.534036210415925e-06, + "loss": 0.5768055319786072, + "step": 3078 + }, + { + "epoch": 0.7098559077809798, + "grad_norm": 1.526605019564366, + "learning_rate": 1.5337139081043922e-06, + "loss": 0.5711077451705933, + "step": 3079 + }, + { + "epoch": 0.7100864553314121, + "grad_norm": 1.3112089962432258, + "learning_rate": 1.5333915282497805e-06, + "loss": 0.46261918544769287, + "step": 3080 + }, + { + "epoch": 0.7103170028818444, + "grad_norm": 1.2588542955672957, + "learning_rate": 1.5330690708989286e-06, + "loss": 0.4627862572669983, + "step": 3081 + }, + { + "epoch": 0.7105475504322767, + "grad_norm": 1.3465412006949553, + "learning_rate": 1.5327465360986857e-06, + "loss": 0.5247993469238281, + "step": 3082 + }, + { + "epoch": 0.710778097982709, + "grad_norm": 1.2562229226560728, + "learning_rate": 1.5324239238959135e-06, + "loss": 0.5141404867172241, + "step": 3083 + }, + { + "epoch": 0.7110086455331412, + "grad_norm": 1.2193016691106844, + "learning_rate": 1.5321012343374843e-06, + "loss": 0.41849881410598755, + "step": 3084 + }, + { + "epoch": 0.7112391930835735, + "grad_norm": 1.176720568433765, + "learning_rate": 1.5317784674702805e-06, + "loss": 0.5312439203262329, + "step": 3085 + }, + { + "epoch": 0.7114697406340058, + "grad_norm": 1.448994861920568, + "learning_rate": 1.5314556233411975e-06, + "loss": 0.5194967985153198, + "step": 3086 + }, + { + "epoch": 0.7117002881844381, + "grad_norm": 1.2531304236512153, + "learning_rate": 1.5311327019971411e-06, + "loss": 0.5438544750213623, + "step": 3087 + }, + { + "epoch": 0.7119308357348704, + "grad_norm": 1.283053771121221, + "learning_rate": 1.5308097034850284e-06, + "loss": 0.5709701776504517, + "step": 3088 + }, + { + "epoch": 0.7121613832853025, + "grad_norm": 1.1723690120388703, + "learning_rate": 1.5304866278517876e-06, + "loss": 0.46026527881622314, + "step": 3089 + }, + { + "epoch": 0.7123919308357348, + "grad_norm": 1.3347805311464678, + "learning_rate": 1.5301634751443585e-06, + "loss": 0.48327285051345825, + "step": 3090 + }, + { + "epoch": 0.7126224783861671, + "grad_norm": 1.278497590029932, + "learning_rate": 1.5298402454096913e-06, + "loss": 0.499522864818573, + "step": 3091 + }, + { + "epoch": 0.7128530259365994, + "grad_norm": 1.2877514654924413, + "learning_rate": 1.5295169386947487e-06, + "loss": 0.490348756313324, + "step": 3092 + }, + { + "epoch": 0.7130835734870317, + "grad_norm": 1.4853489603741208, + "learning_rate": 1.5291935550465029e-06, + "loss": 0.5159205794334412, + "step": 3093 + }, + { + "epoch": 0.7133141210374639, + "grad_norm": 1.5618066997282813, + "learning_rate": 1.528870094511939e-06, + "loss": 0.6093416810035706, + "step": 3094 + }, + { + "epoch": 0.7135446685878962, + "grad_norm": 1.3896221479742665, + "learning_rate": 1.5285465571380518e-06, + "loss": 0.5157930254936218, + "step": 3095 + }, + { + "epoch": 0.7137752161383285, + "grad_norm": 1.594771684664948, + "learning_rate": 1.5282229429718484e-06, + "loss": 0.5248245596885681, + "step": 3096 + }, + { + "epoch": 0.7140057636887608, + "grad_norm": 1.147007768121321, + "learning_rate": 1.5278992520603465e-06, + "loss": 0.5186586976051331, + "step": 3097 + }, + { + "epoch": 0.7142363112391931, + "grad_norm": 1.280751872248316, + "learning_rate": 1.5275754844505743e-06, + "loss": 0.47629937529563904, + "step": 3098 + }, + { + "epoch": 0.7144668587896253, + "grad_norm": 1.200709497044733, + "learning_rate": 1.527251640189573e-06, + "loss": 0.47172415256500244, + "step": 3099 + }, + { + "epoch": 0.7146974063400576, + "grad_norm": 1.3100833521000486, + "learning_rate": 1.5269277193243935e-06, + "loss": 0.42949551343917847, + "step": 3100 + }, + { + "epoch": 0.7149279538904899, + "grad_norm": 1.1770401760493896, + "learning_rate": 1.5266037219020975e-06, + "loss": 0.4720640778541565, + "step": 3101 + }, + { + "epoch": 0.7151585014409222, + "grad_norm": 1.4867492108041584, + "learning_rate": 1.5262796479697588e-06, + "loss": 0.6004112958908081, + "step": 3102 + }, + { + "epoch": 0.7153890489913545, + "grad_norm": 1.2325085653784102, + "learning_rate": 1.525955497574462e-06, + "loss": 0.5297414064407349, + "step": 3103 + }, + { + "epoch": 0.7156195965417868, + "grad_norm": 1.1170096797212132, + "learning_rate": 1.5256312707633033e-06, + "loss": 0.4032800495624542, + "step": 3104 + }, + { + "epoch": 0.715850144092219, + "grad_norm": 1.3276364077152654, + "learning_rate": 1.5253069675833884e-06, + "loss": 0.5147067904472351, + "step": 3105 + }, + { + "epoch": 0.7160806916426513, + "grad_norm": 1.24754995211441, + "learning_rate": 1.524982588081836e-06, + "loss": 0.5029354691505432, + "step": 3106 + }, + { + "epoch": 0.7163112391930836, + "grad_norm": 1.193419187811573, + "learning_rate": 1.524658132305775e-06, + "loss": 0.482473760843277, + "step": 3107 + }, + { + "epoch": 0.7165417867435159, + "grad_norm": 1.5679177312787276, + "learning_rate": 1.524333600302345e-06, + "loss": 0.5003863573074341, + "step": 3108 + }, + { + "epoch": 0.7167723342939482, + "grad_norm": 1.241966240767067, + "learning_rate": 1.5240089921186976e-06, + "loss": 0.5416104197502136, + "step": 3109 + }, + { + "epoch": 0.7170028818443804, + "grad_norm": 1.3873843990860806, + "learning_rate": 1.5236843078019948e-06, + "loss": 0.5003090500831604, + "step": 3110 + }, + { + "epoch": 0.7172334293948127, + "grad_norm": 1.3456709258527728, + "learning_rate": 1.52335954739941e-06, + "loss": 0.507997989654541, + "step": 3111 + }, + { + "epoch": 0.717463976945245, + "grad_norm": 1.3569169943499046, + "learning_rate": 1.523034710958127e-06, + "loss": 0.5409040451049805, + "step": 3112 + }, + { + "epoch": 0.7176945244956773, + "grad_norm": 1.1752130871727025, + "learning_rate": 1.5227097985253421e-06, + "loss": 0.46137601137161255, + "step": 3113 + }, + { + "epoch": 0.7179250720461096, + "grad_norm": 1.4940949255724785, + "learning_rate": 1.5223848101482604e-06, + "loss": 0.52419114112854, + "step": 3114 + }, + { + "epoch": 0.7181556195965418, + "grad_norm": 1.2026467071943991, + "learning_rate": 1.5220597458741003e-06, + "loss": 0.44112980365753174, + "step": 3115 + }, + { + "epoch": 0.718386167146974, + "grad_norm": 1.1526512249043757, + "learning_rate": 1.5217346057500905e-06, + "loss": 0.4375038743019104, + "step": 3116 + }, + { + "epoch": 0.7186167146974063, + "grad_norm": 1.3224156894029113, + "learning_rate": 1.5214093898234691e-06, + "loss": 0.5393679141998291, + "step": 3117 + }, + { + "epoch": 0.7188472622478386, + "grad_norm": 1.3081629631325682, + "learning_rate": 1.5210840981414878e-06, + "loss": 0.5080707669258118, + "step": 3118 + }, + { + "epoch": 0.7190778097982709, + "grad_norm": 1.3655587043810777, + "learning_rate": 1.5207587307514083e-06, + "loss": 0.46670597791671753, + "step": 3119 + }, + { + "epoch": 0.7193083573487031, + "grad_norm": 1.4194299174496814, + "learning_rate": 1.5204332877005022e-06, + "loss": 0.49168825149536133, + "step": 3120 + }, + { + "epoch": 0.7195389048991354, + "grad_norm": 1.0310387909664662, + "learning_rate": 1.5201077690360533e-06, + "loss": 0.4821666479110718, + "step": 3121 + }, + { + "epoch": 0.7197694524495677, + "grad_norm": 1.2633547986191462, + "learning_rate": 1.5197821748053561e-06, + "loss": 0.5115829706192017, + "step": 3122 + }, + { + "epoch": 0.72, + "grad_norm": 1.4998871193086376, + "learning_rate": 1.5194565050557162e-06, + "loss": 0.5272057056427002, + "step": 3123 + }, + { + "epoch": 0.7202305475504323, + "grad_norm": 1.346831550802848, + "learning_rate": 1.5191307598344495e-06, + "loss": 0.6039499640464783, + "step": 3124 + }, + { + "epoch": 0.7204610951008645, + "grad_norm": 1.4453840852241573, + "learning_rate": 1.5188049391888847e-06, + "loss": 0.526665985584259, + "step": 3125 + }, + { + "epoch": 0.7206916426512968, + "grad_norm": 1.3777089134097682, + "learning_rate": 1.5184790431663584e-06, + "loss": 0.510013997554779, + "step": 3126 + }, + { + "epoch": 0.7209221902017291, + "grad_norm": 1.3429612403841265, + "learning_rate": 1.518153071814221e-06, + "loss": 0.4767458438873291, + "step": 3127 + }, + { + "epoch": 0.7211527377521614, + "grad_norm": 1.4484846708597472, + "learning_rate": 1.517827025179833e-06, + "loss": 0.5258289575576782, + "step": 3128 + }, + { + "epoch": 0.7213832853025937, + "grad_norm": 1.3093467599223096, + "learning_rate": 1.5175009033105645e-06, + "loss": 0.512428343296051, + "step": 3129 + }, + { + "epoch": 0.7216138328530259, + "grad_norm": 1.3318335614334949, + "learning_rate": 1.5171747062537984e-06, + "loss": 0.45008307695388794, + "step": 3130 + }, + { + "epoch": 0.7218443804034582, + "grad_norm": 1.1846718745624494, + "learning_rate": 1.5168484340569274e-06, + "loss": 0.4721248745918274, + "step": 3131 + }, + { + "epoch": 0.7220749279538905, + "grad_norm": 1.369284305549072, + "learning_rate": 1.5165220867673556e-06, + "loss": 0.5113618969917297, + "step": 3132 + }, + { + "epoch": 0.7223054755043228, + "grad_norm": 1.3733683308454623, + "learning_rate": 1.5161956644324981e-06, + "loss": 0.5095973014831543, + "step": 3133 + }, + { + "epoch": 0.7225360230547551, + "grad_norm": 1.4404559032274478, + "learning_rate": 1.5158691670997803e-06, + "loss": 0.49031388759613037, + "step": 3134 + }, + { + "epoch": 0.7227665706051873, + "grad_norm": 1.4631484742851009, + "learning_rate": 1.515542594816639e-06, + "loss": 0.48376762866973877, + "step": 3135 + }, + { + "epoch": 0.7229971181556196, + "grad_norm": 1.4168472451107348, + "learning_rate": 1.5152159476305216e-06, + "loss": 0.5298563241958618, + "step": 3136 + }, + { + "epoch": 0.7232276657060519, + "grad_norm": 1.1340078225918775, + "learning_rate": 1.514889225588887e-06, + "loss": 0.4832274913787842, + "step": 3137 + }, + { + "epoch": 0.7234582132564842, + "grad_norm": 1.3157069004056066, + "learning_rate": 1.5145624287392043e-06, + "loss": 0.5212540626525879, + "step": 3138 + }, + { + "epoch": 0.7236887608069165, + "grad_norm": 1.6243689909614896, + "learning_rate": 1.514235557128953e-06, + "loss": 0.506250262260437, + "step": 3139 + }, + { + "epoch": 0.7239193083573487, + "grad_norm": 1.2552589816192705, + "learning_rate": 1.5139086108056254e-06, + "loss": 0.4448832869529724, + "step": 3140 + }, + { + "epoch": 0.724149855907781, + "grad_norm": 1.3005846690358085, + "learning_rate": 1.5135815898167223e-06, + "loss": 0.46571147441864014, + "step": 3141 + }, + { + "epoch": 0.7243804034582133, + "grad_norm": 1.148037591887767, + "learning_rate": 1.5132544942097572e-06, + "loss": 0.4139944911003113, + "step": 3142 + }, + { + "epoch": 0.7246109510086456, + "grad_norm": 1.242975321866892, + "learning_rate": 1.5129273240322529e-06, + "loss": 0.46135827898979187, + "step": 3143 + }, + { + "epoch": 0.7248414985590778, + "grad_norm": 1.514996810707073, + "learning_rate": 1.5126000793317448e-06, + "loss": 0.4942426085472107, + "step": 3144 + }, + { + "epoch": 0.72507204610951, + "grad_norm": 1.2884620434427276, + "learning_rate": 1.5122727601557774e-06, + "loss": 0.48823419213294983, + "step": 3145 + }, + { + "epoch": 0.7253025936599423, + "grad_norm": 1.2949364451776857, + "learning_rate": 1.511945366551907e-06, + "loss": 0.48425859212875366, + "step": 3146 + }, + { + "epoch": 0.7255331412103746, + "grad_norm": 1.4240605097464292, + "learning_rate": 1.5116178985677005e-06, + "loss": 0.3889104127883911, + "step": 3147 + }, + { + "epoch": 0.7257636887608069, + "grad_norm": 1.2084808875919353, + "learning_rate": 1.5112903562507357e-06, + "loss": 0.4651326537132263, + "step": 3148 + }, + { + "epoch": 0.7259942363112392, + "grad_norm": 1.3875484913968303, + "learning_rate": 1.5109627396486011e-06, + "loss": 0.5221618413925171, + "step": 3149 + }, + { + "epoch": 0.7262247838616714, + "grad_norm": 1.2662900393043348, + "learning_rate": 1.5106350488088958e-06, + "loss": 0.5054634213447571, + "step": 3150 + }, + { + "epoch": 0.7264553314121037, + "grad_norm": 1.2101168031562617, + "learning_rate": 1.5103072837792295e-06, + "loss": 0.5082115530967712, + "step": 3151 + }, + { + "epoch": 0.726685878962536, + "grad_norm": 1.2590734826919803, + "learning_rate": 1.5099794446072239e-06, + "loss": 0.45884132385253906, + "step": 3152 + }, + { + "epoch": 0.7269164265129683, + "grad_norm": 1.2411545140923466, + "learning_rate": 1.50965153134051e-06, + "loss": 0.4660666882991791, + "step": 3153 + }, + { + "epoch": 0.7271469740634006, + "grad_norm": 1.3868010494860628, + "learning_rate": 1.5093235440267308e-06, + "loss": 0.5483843684196472, + "step": 3154 + }, + { + "epoch": 0.7273775216138328, + "grad_norm": 1.3466110391906256, + "learning_rate": 1.5089954827135384e-06, + "loss": 0.438312828540802, + "step": 3155 + }, + { + "epoch": 0.7276080691642651, + "grad_norm": 1.3819060249646318, + "learning_rate": 1.5086673474485978e-06, + "loss": 0.4444723129272461, + "step": 3156 + }, + { + "epoch": 0.7278386167146974, + "grad_norm": 1.5587131329445694, + "learning_rate": 1.5083391382795835e-06, + "loss": 0.4306541681289673, + "step": 3157 + }, + { + "epoch": 0.7280691642651297, + "grad_norm": 1.4276962124392467, + "learning_rate": 1.5080108552541799e-06, + "loss": 0.5440046787261963, + "step": 3158 + }, + { + "epoch": 0.728299711815562, + "grad_norm": 1.0995595949722106, + "learning_rate": 1.5076824984200837e-06, + "loss": 0.46402427554130554, + "step": 3159 + }, + { + "epoch": 0.7285302593659942, + "grad_norm": 1.2297644719249652, + "learning_rate": 1.5073540678250024e-06, + "loss": 0.5025292634963989, + "step": 3160 + }, + { + "epoch": 0.7287608069164265, + "grad_norm": 1.610056083044619, + "learning_rate": 1.5070255635166526e-06, + "loss": 0.5699792504310608, + "step": 3161 + }, + { + "epoch": 0.7289913544668588, + "grad_norm": 1.1607368738487698, + "learning_rate": 1.506696985542763e-06, + "loss": 0.5201475024223328, + "step": 3162 + }, + { + "epoch": 0.7292219020172911, + "grad_norm": 2.0192527279994508, + "learning_rate": 1.5063683339510722e-06, + "loss": 0.47518134117126465, + "step": 3163 + }, + { + "epoch": 0.7294524495677234, + "grad_norm": 1.4627008855208656, + "learning_rate": 1.5060396087893303e-06, + "loss": 0.45600777864456177, + "step": 3164 + }, + { + "epoch": 0.7296829971181557, + "grad_norm": 1.3350658804578093, + "learning_rate": 1.5057108101052975e-06, + "loss": 0.5589861869812012, + "step": 3165 + }, + { + "epoch": 0.7299135446685879, + "grad_norm": 1.5326020129462947, + "learning_rate": 1.5053819379467449e-06, + "loss": 0.5672855377197266, + "step": 3166 + }, + { + "epoch": 0.7301440922190202, + "grad_norm": 1.358392085768019, + "learning_rate": 1.5050529923614539e-06, + "loss": 0.4816353917121887, + "step": 3167 + }, + { + "epoch": 0.7303746397694525, + "grad_norm": 1.5426455545970876, + "learning_rate": 1.5047239733972172e-06, + "loss": 0.5251726508140564, + "step": 3168 + }, + { + "epoch": 0.7306051873198848, + "grad_norm": 1.3452570725680248, + "learning_rate": 1.5043948811018376e-06, + "loss": 0.5041342973709106, + "step": 3169 + }, + { + "epoch": 0.730835734870317, + "grad_norm": 1.27265857523486, + "learning_rate": 1.5040657155231294e-06, + "loss": 0.47151800990104675, + "step": 3170 + }, + { + "epoch": 0.7310662824207492, + "grad_norm": 1.3177939212987457, + "learning_rate": 1.5037364767089158e-06, + "loss": 0.4337390661239624, + "step": 3171 + }, + { + "epoch": 0.7312968299711815, + "grad_norm": 1.2975145506792556, + "learning_rate": 1.5034071647070326e-06, + "loss": 0.5004295706748962, + "step": 3172 + }, + { + "epoch": 0.7315273775216138, + "grad_norm": 1.3770112611667393, + "learning_rate": 1.5030777795653253e-06, + "loss": 0.5853343605995178, + "step": 3173 + }, + { + "epoch": 0.7317579250720461, + "grad_norm": 1.4038198026175097, + "learning_rate": 1.50274832133165e-06, + "loss": 0.5205816626548767, + "step": 3174 + }, + { + "epoch": 0.7319884726224783, + "grad_norm": 1.4163405803887914, + "learning_rate": 1.5024187900538732e-06, + "loss": 0.6013909578323364, + "step": 3175 + }, + { + "epoch": 0.7322190201729106, + "grad_norm": 1.6139845231432477, + "learning_rate": 1.5020891857798734e-06, + "loss": 0.6791777014732361, + "step": 3176 + }, + { + "epoch": 0.7324495677233429, + "grad_norm": 1.3435286466605478, + "learning_rate": 1.5017595085575377e-06, + "loss": 0.5194481611251831, + "step": 3177 + }, + { + "epoch": 0.7326801152737752, + "grad_norm": 1.485630449161062, + "learning_rate": 1.501429758434765e-06, + "loss": 0.442119836807251, + "step": 3178 + }, + { + "epoch": 0.7329106628242075, + "grad_norm": 1.4386371107965295, + "learning_rate": 1.501099935459465e-06, + "loss": 0.4681103229522705, + "step": 3179 + }, + { + "epoch": 0.7331412103746398, + "grad_norm": 1.4831338394596723, + "learning_rate": 1.500770039679557e-06, + "loss": 0.47549009323120117, + "step": 3180 + }, + { + "epoch": 0.733371757925072, + "grad_norm": 1.210045907042475, + "learning_rate": 1.5004400711429716e-06, + "loss": 0.5011273622512817, + "step": 3181 + }, + { + "epoch": 0.7336023054755043, + "grad_norm": 1.3507700304649388, + "learning_rate": 1.5001100298976498e-06, + "loss": 0.4527067244052887, + "step": 3182 + }, + { + "epoch": 0.7338328530259366, + "grad_norm": 1.4783974186980648, + "learning_rate": 1.4997799159915437e-06, + "loss": 0.4995662271976471, + "step": 3183 + }, + { + "epoch": 0.7340634005763689, + "grad_norm": 1.289236007560327, + "learning_rate": 1.499449729472614e-06, + "loss": 0.4354374408721924, + "step": 3184 + }, + { + "epoch": 0.7342939481268012, + "grad_norm": 1.1085815488677964, + "learning_rate": 1.4991194703888352e-06, + "loss": 0.4196980893611908, + "step": 3185 + }, + { + "epoch": 0.7345244956772334, + "grad_norm": 1.2454446345841115, + "learning_rate": 1.498789138788189e-06, + "loss": 0.41767656803131104, + "step": 3186 + }, + { + "epoch": 0.7347550432276657, + "grad_norm": 1.3175981275733846, + "learning_rate": 1.4984587347186701e-06, + "loss": 0.5601884722709656, + "step": 3187 + }, + { + "epoch": 0.734985590778098, + "grad_norm": 1.3717772217606947, + "learning_rate": 1.498128258228282e-06, + "loss": 0.5722160935401917, + "step": 3188 + }, + { + "epoch": 0.7352161383285303, + "grad_norm": 1.3550924414269825, + "learning_rate": 1.4977977093650402e-06, + "loss": 0.49754706025123596, + "step": 3189 + }, + { + "epoch": 0.7354466858789626, + "grad_norm": 1.2671729909166771, + "learning_rate": 1.4974670881769699e-06, + "loss": 0.3791263997554779, + "step": 3190 + }, + { + "epoch": 0.7356772334293948, + "grad_norm": 1.5238810977019115, + "learning_rate": 1.4971363947121062e-06, + "loss": 0.5790057182312012, + "step": 3191 + }, + { + "epoch": 0.7359077809798271, + "grad_norm": 1.2068480937901207, + "learning_rate": 1.4968056290184962e-06, + "loss": 0.4697153568267822, + "step": 3192 + }, + { + "epoch": 0.7361383285302594, + "grad_norm": 1.3533406878320302, + "learning_rate": 1.4964747911441967e-06, + "loss": 0.5018360614776611, + "step": 3193 + }, + { + "epoch": 0.7363688760806917, + "grad_norm": 1.177404560227721, + "learning_rate": 1.4961438811372744e-06, + "loss": 0.4887595772743225, + "step": 3194 + }, + { + "epoch": 0.736599423631124, + "grad_norm": 1.3181068958883442, + "learning_rate": 1.4958128990458078e-06, + "loss": 0.4804183840751648, + "step": 3195 + }, + { + "epoch": 0.7368299711815562, + "grad_norm": 1.8934118111380485, + "learning_rate": 1.4954818449178846e-06, + "loss": 0.4760059118270874, + "step": 3196 + }, + { + "epoch": 0.7370605187319885, + "grad_norm": 1.262489651614568, + "learning_rate": 1.4951507188016037e-06, + "loss": 0.5415846109390259, + "step": 3197 + }, + { + "epoch": 0.7372910662824208, + "grad_norm": 1.548909219963287, + "learning_rate": 1.4948195207450747e-06, + "loss": 0.4729132652282715, + "step": 3198 + }, + { + "epoch": 0.737521613832853, + "grad_norm": 1.1399718829955798, + "learning_rate": 1.4944882507964166e-06, + "loss": 0.45299506187438965, + "step": 3199 + }, + { + "epoch": 0.7377521613832853, + "grad_norm": 1.2986205608956582, + "learning_rate": 1.4941569090037592e-06, + "loss": 0.4444664716720581, + "step": 3200 + }, + { + "epoch": 0.7379827089337175, + "grad_norm": 1.2888444051901529, + "learning_rate": 1.493825495415244e-06, + "loss": 0.5595009922981262, + "step": 3201 + }, + { + "epoch": 0.7382132564841498, + "grad_norm": 1.2769760823016159, + "learning_rate": 1.4934940100790217e-06, + "loss": 0.5800896286964417, + "step": 3202 + }, + { + "epoch": 0.7384438040345821, + "grad_norm": 1.4684684564858026, + "learning_rate": 1.4931624530432531e-06, + "loss": 0.5237326622009277, + "step": 3203 + }, + { + "epoch": 0.7386743515850144, + "grad_norm": 1.5052585559748772, + "learning_rate": 1.4928308243561106e-06, + "loss": 0.49139225482940674, + "step": 3204 + }, + { + "epoch": 0.7389048991354467, + "grad_norm": 1.1853994118787092, + "learning_rate": 1.4924991240657758e-06, + "loss": 0.4052927494049072, + "step": 3205 + }, + { + "epoch": 0.7391354466858789, + "grad_norm": 1.4576894877729643, + "learning_rate": 1.4921673522204418e-06, + "loss": 0.4857429265975952, + "step": 3206 + }, + { + "epoch": 0.7393659942363112, + "grad_norm": 1.1777599862408406, + "learning_rate": 1.4918355088683114e-06, + "loss": 0.464494526386261, + "step": 3207 + }, + { + "epoch": 0.7395965417867435, + "grad_norm": 1.539493657840995, + "learning_rate": 1.4915035940575982e-06, + "loss": 0.4662816822528839, + "step": 3208 + }, + { + "epoch": 0.7398270893371758, + "grad_norm": 1.6279996883671786, + "learning_rate": 1.4911716078365255e-06, + "loss": 0.6633642911911011, + "step": 3209 + }, + { + "epoch": 0.7400576368876081, + "grad_norm": 1.2374546972110039, + "learning_rate": 1.490839550253328e-06, + "loss": 0.47886157035827637, + "step": 3210 + }, + { + "epoch": 0.7402881844380403, + "grad_norm": 1.6545796769921648, + "learning_rate": 1.49050742135625e-06, + "loss": 0.568007230758667, + "step": 3211 + }, + { + "epoch": 0.7405187319884726, + "grad_norm": 1.3532552620344924, + "learning_rate": 1.4901752211935461e-06, + "loss": 0.5382261872291565, + "step": 3212 + }, + { + "epoch": 0.7407492795389049, + "grad_norm": 1.2960864654756472, + "learning_rate": 1.489842949813482e-06, + "loss": 0.42607924342155457, + "step": 3213 + }, + { + "epoch": 0.7409798270893372, + "grad_norm": 1.1718315561608335, + "learning_rate": 1.4895106072643329e-06, + "loss": 0.4686487317085266, + "step": 3214 + }, + { + "epoch": 0.7412103746397695, + "grad_norm": 1.250504933101052, + "learning_rate": 1.4891781935943849e-06, + "loss": 0.4415118098258972, + "step": 3215 + }, + { + "epoch": 0.7414409221902017, + "grad_norm": 1.4069825438254808, + "learning_rate": 1.4888457088519337e-06, + "loss": 0.5895603895187378, + "step": 3216 + }, + { + "epoch": 0.741671469740634, + "grad_norm": 1.2536937286907117, + "learning_rate": 1.488513153085287e-06, + "loss": 0.49977678060531616, + "step": 3217 + }, + { + "epoch": 0.7419020172910663, + "grad_norm": 1.617869919003562, + "learning_rate": 1.4881805263427606e-06, + "loss": 0.5159600377082825, + "step": 3218 + }, + { + "epoch": 0.7421325648414986, + "grad_norm": 1.3312699005053992, + "learning_rate": 1.4878478286726825e-06, + "loss": 0.486503928899765, + "step": 3219 + }, + { + "epoch": 0.7423631123919309, + "grad_norm": 1.3493254176291696, + "learning_rate": 1.48751506012339e-06, + "loss": 0.5594410300254822, + "step": 3220 + }, + { + "epoch": 0.7425936599423631, + "grad_norm": 1.282002454566378, + "learning_rate": 1.4871822207432306e-06, + "loss": 0.58189857006073, + "step": 3221 + }, + { + "epoch": 0.7428242074927954, + "grad_norm": 1.4722677481345254, + "learning_rate": 1.4868493105805624e-06, + "loss": 0.4876249432563782, + "step": 3222 + }, + { + "epoch": 0.7430547550432277, + "grad_norm": 1.317973913245676, + "learning_rate": 1.486516329683754e-06, + "loss": 0.4791470170021057, + "step": 3223 + }, + { + "epoch": 0.74328530259366, + "grad_norm": 1.4498879865814052, + "learning_rate": 1.486183278101184e-06, + "loss": 0.5361772179603577, + "step": 3224 + }, + { + "epoch": 0.7435158501440923, + "grad_norm": 1.1522319567135113, + "learning_rate": 1.4858501558812416e-06, + "loss": 0.43522512912750244, + "step": 3225 + }, + { + "epoch": 0.7437463976945244, + "grad_norm": 1.340874944457649, + "learning_rate": 1.4855169630723256e-06, + "loss": 0.480033814907074, + "step": 3226 + }, + { + "epoch": 0.7439769452449567, + "grad_norm": 1.4405213728495958, + "learning_rate": 1.4851836997228457e-06, + "loss": 0.49638503789901733, + "step": 3227 + }, + { + "epoch": 0.744207492795389, + "grad_norm": 1.1863542915109169, + "learning_rate": 1.4848503658812215e-06, + "loss": 0.46532535552978516, + "step": 3228 + }, + { + "epoch": 0.7444380403458213, + "grad_norm": 1.4964825329069535, + "learning_rate": 1.4845169615958825e-06, + "loss": 0.57126384973526, + "step": 3229 + }, + { + "epoch": 0.7446685878962536, + "grad_norm": 1.3843465256914986, + "learning_rate": 1.48418348691527e-06, + "loss": 0.44086208939552307, + "step": 3230 + }, + { + "epoch": 0.7448991354466858, + "grad_norm": 1.3494587128334137, + "learning_rate": 1.4838499418878333e-06, + "loss": 0.527345597743988, + "step": 3231 + }, + { + "epoch": 0.7451296829971181, + "grad_norm": 1.1821347534533735, + "learning_rate": 1.4835163265620337e-06, + "loss": 0.5213931798934937, + "step": 3232 + }, + { + "epoch": 0.7453602305475504, + "grad_norm": 1.4266449251298718, + "learning_rate": 1.4831826409863415e-06, + "loss": 0.4754828214645386, + "step": 3233 + }, + { + "epoch": 0.7455907780979827, + "grad_norm": 1.2356724187797137, + "learning_rate": 1.482848885209238e-06, + "loss": 0.5262855291366577, + "step": 3234 + }, + { + "epoch": 0.745821325648415, + "grad_norm": 1.3091823753963572, + "learning_rate": 1.4825150592792146e-06, + "loss": 0.6030062437057495, + "step": 3235 + }, + { + "epoch": 0.7460518731988472, + "grad_norm": 1.537234499936022, + "learning_rate": 1.4821811632447728e-06, + "loss": 0.5706098079681396, + "step": 3236 + }, + { + "epoch": 0.7462824207492795, + "grad_norm": 1.415148150243323, + "learning_rate": 1.4818471971544236e-06, + "loss": 0.5095677971839905, + "step": 3237 + }, + { + "epoch": 0.7465129682997118, + "grad_norm": 1.2198058723620682, + "learning_rate": 1.481513161056689e-06, + "loss": 0.40414959192276, + "step": 3238 + }, + { + "epoch": 0.7467435158501441, + "grad_norm": 1.6486746642937935, + "learning_rate": 1.481179055000102e-06, + "loss": 0.5147525668144226, + "step": 3239 + }, + { + "epoch": 0.7469740634005764, + "grad_norm": 1.2602084168070753, + "learning_rate": 1.4808448790332034e-06, + "loss": 0.5350685119628906, + "step": 3240 + }, + { + "epoch": 0.7472046109510087, + "grad_norm": 1.3799401187569467, + "learning_rate": 1.4805106332045457e-06, + "loss": 0.5179615020751953, + "step": 3241 + }, + { + "epoch": 0.7474351585014409, + "grad_norm": 1.3099210508258956, + "learning_rate": 1.4801763175626922e-06, + "loss": 0.46143245697021484, + "step": 3242 + }, + { + "epoch": 0.7476657060518732, + "grad_norm": 1.2390835702681366, + "learning_rate": 1.4798419321562148e-06, + "loss": 0.45439475774765015, + "step": 3243 + }, + { + "epoch": 0.7478962536023055, + "grad_norm": 1.584653266512537, + "learning_rate": 1.4795074770336963e-06, + "loss": 0.47151651978492737, + "step": 3244 + }, + { + "epoch": 0.7481268011527378, + "grad_norm": 1.3115239213335883, + "learning_rate": 1.4791729522437298e-06, + "loss": 0.5368925333023071, + "step": 3245 + }, + { + "epoch": 0.7483573487031701, + "grad_norm": 1.3145011041134524, + "learning_rate": 1.4788383578349177e-06, + "loss": 0.4919322431087494, + "step": 3246 + }, + { + "epoch": 0.7485878962536023, + "grad_norm": 1.266771108560509, + "learning_rate": 1.4785036938558737e-06, + "loss": 0.449468731880188, + "step": 3247 + }, + { + "epoch": 0.7488184438040346, + "grad_norm": 1.3549371512329456, + "learning_rate": 1.4781689603552208e-06, + "loss": 0.463690847158432, + "step": 3248 + }, + { + "epoch": 0.7490489913544669, + "grad_norm": 1.135747734564589, + "learning_rate": 1.477834157381592e-06, + "loss": 0.47871702909469604, + "step": 3249 + }, + { + "epoch": 0.7492795389048992, + "grad_norm": 1.5722908594204268, + "learning_rate": 1.4774992849836312e-06, + "loss": 0.4971943497657776, + "step": 3250 + }, + { + "epoch": 0.7495100864553315, + "grad_norm": 1.2012715930460522, + "learning_rate": 1.4771643432099915e-06, + "loss": 0.42786940932273865, + "step": 3251 + }, + { + "epoch": 0.7497406340057637, + "grad_norm": 1.458596022540129, + "learning_rate": 1.4768293321093366e-06, + "loss": 0.5166041851043701, + "step": 3252 + }, + { + "epoch": 0.749971181556196, + "grad_norm": 1.4316055802929983, + "learning_rate": 1.4764942517303397e-06, + "loss": 0.5358223915100098, + "step": 3253 + }, + { + "epoch": 0.7502017291066282, + "grad_norm": 1.2502899398623475, + "learning_rate": 1.4761591021216854e-06, + "loss": 0.4700164198875427, + "step": 3254 + }, + { + "epoch": 0.7504322766570605, + "grad_norm": 1.5378679862419669, + "learning_rate": 1.475823883332067e-06, + "loss": 0.4827917516231537, + "step": 3255 + }, + { + "epoch": 0.7506628242074928, + "grad_norm": 1.4586380104389158, + "learning_rate": 1.4754885954101881e-06, + "loss": 0.48745471239089966, + "step": 3256 + }, + { + "epoch": 0.750893371757925, + "grad_norm": 1.7262371957910712, + "learning_rate": 1.4751532384047626e-06, + "loss": 0.4328693747520447, + "step": 3257 + }, + { + "epoch": 0.7511239193083573, + "grad_norm": 1.4947067603037787, + "learning_rate": 1.4748178123645148e-06, + "loss": 0.5124896764755249, + "step": 3258 + }, + { + "epoch": 0.7513544668587896, + "grad_norm": 1.4778565285714071, + "learning_rate": 1.474482317338178e-06, + "loss": 0.46671369671821594, + "step": 3259 + }, + { + "epoch": 0.7515850144092219, + "grad_norm": 1.4926182145171014, + "learning_rate": 1.4741467533744964e-06, + "loss": 0.47405868768692017, + "step": 3260 + }, + { + "epoch": 0.7518155619596542, + "grad_norm": 1.5314925124371308, + "learning_rate": 1.473811120522224e-06, + "loss": 0.6070246696472168, + "step": 3261 + }, + { + "epoch": 0.7520461095100864, + "grad_norm": 1.2253029967074474, + "learning_rate": 1.4734754188301248e-06, + "loss": 0.4673004150390625, + "step": 3262 + }, + { + "epoch": 0.7522766570605187, + "grad_norm": 1.2921346331685029, + "learning_rate": 1.4731396483469726e-06, + "loss": 0.4255906939506531, + "step": 3263 + }, + { + "epoch": 0.752507204610951, + "grad_norm": 1.789844998921356, + "learning_rate": 1.4728038091215518e-06, + "loss": 0.6287655830383301, + "step": 3264 + }, + { + "epoch": 0.7527377521613833, + "grad_norm": 1.5134361950809017, + "learning_rate": 1.4724679012026558e-06, + "loss": 0.5185075998306274, + "step": 3265 + }, + { + "epoch": 0.7529682997118156, + "grad_norm": 1.2691589676603439, + "learning_rate": 1.472131924639089e-06, + "loss": 0.5780787467956543, + "step": 3266 + }, + { + "epoch": 0.7531988472622478, + "grad_norm": 1.4836762250468871, + "learning_rate": 1.4717958794796646e-06, + "loss": 0.5578355193138123, + "step": 3267 + }, + { + "epoch": 0.7534293948126801, + "grad_norm": 1.2160311025308315, + "learning_rate": 1.4714597657732073e-06, + "loss": 0.46400272846221924, + "step": 3268 + }, + { + "epoch": 0.7536599423631124, + "grad_norm": 1.5435283070764019, + "learning_rate": 1.47112358356855e-06, + "loss": 0.4976726770401001, + "step": 3269 + }, + { + "epoch": 0.7538904899135447, + "grad_norm": 2.007123996849609, + "learning_rate": 1.4707873329145372e-06, + "loss": 0.5713675022125244, + "step": 3270 + }, + { + "epoch": 0.754121037463977, + "grad_norm": 1.2543678638742324, + "learning_rate": 1.470451013860023e-06, + "loss": 0.5416299700737, + "step": 3271 + }, + { + "epoch": 0.7543515850144092, + "grad_norm": 1.2483433805975221, + "learning_rate": 1.47011462645387e-06, + "loss": 0.44016146659851074, + "step": 3272 + }, + { + "epoch": 0.7545821325648415, + "grad_norm": 0.9999842753367084, + "learning_rate": 1.4697781707449525e-06, + "loss": 0.4472951292991638, + "step": 3273 + }, + { + "epoch": 0.7548126801152738, + "grad_norm": 1.2756009717160484, + "learning_rate": 1.4694416467821533e-06, + "loss": 0.42740410566329956, + "step": 3274 + }, + { + "epoch": 0.7550432276657061, + "grad_norm": 1.5386149050309754, + "learning_rate": 1.469105054614367e-06, + "loss": 0.5016801953315735, + "step": 3275 + }, + { + "epoch": 0.7552737752161384, + "grad_norm": 1.1801187234317005, + "learning_rate": 1.4687683942904956e-06, + "loss": 0.45598453283309937, + "step": 3276 + }, + { + "epoch": 0.7555043227665706, + "grad_norm": 1.5133516505478535, + "learning_rate": 1.4684316658594536e-06, + "loss": 0.5792516469955444, + "step": 3277 + }, + { + "epoch": 0.7557348703170029, + "grad_norm": 1.1812462798913754, + "learning_rate": 1.4680948693701632e-06, + "loss": 0.4660930633544922, + "step": 3278 + }, + { + "epoch": 0.7559654178674352, + "grad_norm": 1.2146112468729, + "learning_rate": 1.4677580048715576e-06, + "loss": 0.47444018721580505, + "step": 3279 + }, + { + "epoch": 0.7561959654178675, + "grad_norm": 1.5995735050771667, + "learning_rate": 1.4674210724125801e-06, + "loss": 0.5156435370445251, + "step": 3280 + }, + { + "epoch": 0.7564265129682997, + "grad_norm": 1.3317370759839628, + "learning_rate": 1.4670840720421839e-06, + "loss": 0.5381336212158203, + "step": 3281 + }, + { + "epoch": 0.7566570605187319, + "grad_norm": 1.3039255390329512, + "learning_rate": 1.4667470038093298e-06, + "loss": 0.4314936399459839, + "step": 3282 + }, + { + "epoch": 0.7568876080691642, + "grad_norm": 1.3315809296965806, + "learning_rate": 1.4664098677629924e-06, + "loss": 0.517806887626648, + "step": 3283 + }, + { + "epoch": 0.7571181556195965, + "grad_norm": 1.17402962693007, + "learning_rate": 1.466072663952153e-06, + "loss": 0.5067352056503296, + "step": 3284 + }, + { + "epoch": 0.7573487031700288, + "grad_norm": 1.3482800959852768, + "learning_rate": 1.465735392425804e-06, + "loss": 0.49119824171066284, + "step": 3285 + }, + { + "epoch": 0.7575792507204611, + "grad_norm": 1.5805195342469058, + "learning_rate": 1.465398053232947e-06, + "loss": 0.5447220802307129, + "step": 3286 + }, + { + "epoch": 0.7578097982708933, + "grad_norm": 1.5136634313996882, + "learning_rate": 1.465060646422595e-06, + "loss": 0.45386195182800293, + "step": 3287 + }, + { + "epoch": 0.7580403458213256, + "grad_norm": 1.4455737325065987, + "learning_rate": 1.4647231720437684e-06, + "loss": 0.5146728754043579, + "step": 3288 + }, + { + "epoch": 0.7582708933717579, + "grad_norm": 1.208398611343664, + "learning_rate": 1.4643856301454998e-06, + "loss": 0.48067593574523926, + "step": 3289 + }, + { + "epoch": 0.7585014409221902, + "grad_norm": 1.3311997339356765, + "learning_rate": 1.4640480207768298e-06, + "loss": 0.5257793664932251, + "step": 3290 + }, + { + "epoch": 0.7587319884726225, + "grad_norm": 1.3400035497885063, + "learning_rate": 1.4637103439868097e-06, + "loss": 0.47576746344566345, + "step": 3291 + }, + { + "epoch": 0.7589625360230547, + "grad_norm": 1.3460861407557851, + "learning_rate": 1.4633725998245007e-06, + "loss": 0.5666048526763916, + "step": 3292 + }, + { + "epoch": 0.759193083573487, + "grad_norm": 1.1564836610678924, + "learning_rate": 1.463034788338973e-06, + "loss": 0.44034671783447266, + "step": 3293 + }, + { + "epoch": 0.7594236311239193, + "grad_norm": 1.4202662515562567, + "learning_rate": 1.4626969095793075e-06, + "loss": 0.4745502471923828, + "step": 3294 + }, + { + "epoch": 0.7596541786743516, + "grad_norm": 1.4448760394719364, + "learning_rate": 1.4623589635945948e-06, + "loss": 0.5635093450546265, + "step": 3295 + }, + { + "epoch": 0.7598847262247839, + "grad_norm": 1.4934304968991277, + "learning_rate": 1.4620209504339343e-06, + "loss": 0.6082891225814819, + "step": 3296 + }, + { + "epoch": 0.7601152737752161, + "grad_norm": 1.3232103372571715, + "learning_rate": 1.461682870146436e-06, + "loss": 0.4839524030685425, + "step": 3297 + }, + { + "epoch": 0.7603458213256484, + "grad_norm": 1.358400964322484, + "learning_rate": 1.461344722781219e-06, + "loss": 0.526859700679779, + "step": 3298 + }, + { + "epoch": 0.7605763688760807, + "grad_norm": 1.2678987797548023, + "learning_rate": 1.4610065083874138e-06, + "loss": 0.5154972672462463, + "step": 3299 + }, + { + "epoch": 0.760806916426513, + "grad_norm": 1.3734418069144714, + "learning_rate": 1.4606682270141583e-06, + "loss": 0.4647676944732666, + "step": 3300 + }, + { + "epoch": 0.7610374639769453, + "grad_norm": 1.366663754645629, + "learning_rate": 1.4603298787106015e-06, + "loss": 0.4470120966434479, + "step": 3301 + }, + { + "epoch": 0.7612680115273776, + "grad_norm": 1.3628610757351864, + "learning_rate": 1.4599914635259023e-06, + "loss": 0.5059522986412048, + "step": 3302 + }, + { + "epoch": 0.7614985590778098, + "grad_norm": 1.6696301085697236, + "learning_rate": 1.4596529815092284e-06, + "loss": 0.6402307748794556, + "step": 3303 + }, + { + "epoch": 0.7617291066282421, + "grad_norm": 1.1781562854865368, + "learning_rate": 1.4593144327097581e-06, + "loss": 0.4766733646392822, + "step": 3304 + }, + { + "epoch": 0.7619596541786744, + "grad_norm": 1.2614674941288848, + "learning_rate": 1.4589758171766788e-06, + "loss": 0.4931349754333496, + "step": 3305 + }, + { + "epoch": 0.7621902017291067, + "grad_norm": 1.2984040294603267, + "learning_rate": 1.458637134959188e-06, + "loss": 0.48644161224365234, + "step": 3306 + }, + { + "epoch": 0.762420749279539, + "grad_norm": 1.3725039156042402, + "learning_rate": 1.4582983861064925e-06, + "loss": 0.6103028059005737, + "step": 3307 + }, + { + "epoch": 0.7626512968299712, + "grad_norm": 1.284768652256614, + "learning_rate": 1.4579595706678094e-06, + "loss": 0.4911022484302521, + "step": 3308 + }, + { + "epoch": 0.7628818443804034, + "grad_norm": 1.3556353217542514, + "learning_rate": 1.4576206886923648e-06, + "loss": 0.49871009588241577, + "step": 3309 + }, + { + "epoch": 0.7631123919308357, + "grad_norm": 1.3055666196848867, + "learning_rate": 1.457281740229394e-06, + "loss": 0.48048093914985657, + "step": 3310 + }, + { + "epoch": 0.763342939481268, + "grad_norm": 1.351104735921863, + "learning_rate": 1.4569427253281438e-06, + "loss": 0.39041709899902344, + "step": 3311 + }, + { + "epoch": 0.7635734870317002, + "grad_norm": 1.211363331490999, + "learning_rate": 1.456603644037869e-06, + "loss": 0.5072616338729858, + "step": 3312 + }, + { + "epoch": 0.7638040345821325, + "grad_norm": 1.3516359175587098, + "learning_rate": 1.4562644964078348e-06, + "loss": 0.4459783732891083, + "step": 3313 + }, + { + "epoch": 0.7640345821325648, + "grad_norm": 1.3522114158420477, + "learning_rate": 1.4559252824873153e-06, + "loss": 0.46400186419487, + "step": 3314 + }, + { + "epoch": 0.7642651296829971, + "grad_norm": 1.3451209364608996, + "learning_rate": 1.455586002325596e-06, + "loss": 0.4863799810409546, + "step": 3315 + }, + { + "epoch": 0.7644956772334294, + "grad_norm": 1.5370462205813438, + "learning_rate": 1.4552466559719692e-06, + "loss": 0.4174691438674927, + "step": 3316 + }, + { + "epoch": 0.7647262247838617, + "grad_norm": 1.2694266587224816, + "learning_rate": 1.4549072434757392e-06, + "loss": 0.5211185812950134, + "step": 3317 + }, + { + "epoch": 0.7649567723342939, + "grad_norm": 1.3359981437064172, + "learning_rate": 1.4545677648862192e-06, + "loss": 0.47883665561676025, + "step": 3318 + }, + { + "epoch": 0.7651873198847262, + "grad_norm": 1.403465964148792, + "learning_rate": 1.4542282202527316e-06, + "loss": 0.4836026430130005, + "step": 3319 + }, + { + "epoch": 0.7654178674351585, + "grad_norm": 1.1848104317381678, + "learning_rate": 1.4538886096246091e-06, + "loss": 0.4277452528476715, + "step": 3320 + }, + { + "epoch": 0.7656484149855908, + "grad_norm": 1.6322770231147972, + "learning_rate": 1.453548933051193e-06, + "loss": 0.4482790231704712, + "step": 3321 + }, + { + "epoch": 0.7658789625360231, + "grad_norm": 1.4474066354965356, + "learning_rate": 1.4532091905818354e-06, + "loss": 0.4530835747718811, + "step": 3322 + }, + { + "epoch": 0.7661095100864553, + "grad_norm": 1.1318959264422865, + "learning_rate": 1.4528693822658966e-06, + "loss": 0.43995189666748047, + "step": 3323 + }, + { + "epoch": 0.7663400576368876, + "grad_norm": 1.2709137573762306, + "learning_rate": 1.452529508152748e-06, + "loss": 0.5199185609817505, + "step": 3324 + }, + { + "epoch": 0.7665706051873199, + "grad_norm": 1.5146053118213605, + "learning_rate": 1.4521895682917695e-06, + "loss": 0.46382370591163635, + "step": 3325 + }, + { + "epoch": 0.7668011527377522, + "grad_norm": 1.3262747578317158, + "learning_rate": 1.4518495627323506e-06, + "loss": 0.44753193855285645, + "step": 3326 + }, + { + "epoch": 0.7670317002881845, + "grad_norm": 1.3868444920521885, + "learning_rate": 1.4515094915238905e-06, + "loss": 0.560943067073822, + "step": 3327 + }, + { + "epoch": 0.7672622478386167, + "grad_norm": 1.3504010302196803, + "learning_rate": 1.4511693547157987e-06, + "loss": 0.5463162660598755, + "step": 3328 + }, + { + "epoch": 0.767492795389049, + "grad_norm": 1.7895593803441687, + "learning_rate": 1.4508291523574928e-06, + "loss": 0.44977307319641113, + "step": 3329 + }, + { + "epoch": 0.7677233429394813, + "grad_norm": 1.3352084625834344, + "learning_rate": 1.4504888844984014e-06, + "loss": 0.4786399304866791, + "step": 3330 + }, + { + "epoch": 0.7679538904899136, + "grad_norm": 1.2243491120544803, + "learning_rate": 1.450148551187961e-06, + "loss": 0.5283411145210266, + "step": 3331 + }, + { + "epoch": 0.7681844380403459, + "grad_norm": 1.4307683990100575, + "learning_rate": 1.449808152475619e-06, + "loss": 0.5005373954772949, + "step": 3332 + }, + { + "epoch": 0.7684149855907781, + "grad_norm": 1.4475030416705006, + "learning_rate": 1.4494676884108317e-06, + "loss": 0.48068857192993164, + "step": 3333 + }, + { + "epoch": 0.7686455331412104, + "grad_norm": 1.156674368619787, + "learning_rate": 1.449127159043065e-06, + "loss": 0.4934327006340027, + "step": 3334 + }, + { + "epoch": 0.7688760806916427, + "grad_norm": 1.28650908922562, + "learning_rate": 1.4487865644217942e-06, + "loss": 0.49922657012939453, + "step": 3335 + }, + { + "epoch": 0.7691066282420749, + "grad_norm": 1.3756554611981087, + "learning_rate": 1.4484459045965044e-06, + "loss": 0.5592349767684937, + "step": 3336 + }, + { + "epoch": 0.7693371757925072, + "grad_norm": 1.2847582796112, + "learning_rate": 1.4481051796166895e-06, + "loss": 0.4248694181442261, + "step": 3337 + }, + { + "epoch": 0.7695677233429394, + "grad_norm": 1.472604097829593, + "learning_rate": 1.4477643895318537e-06, + "loss": 0.580863356590271, + "step": 3338 + }, + { + "epoch": 0.7697982708933717, + "grad_norm": 1.3223200851539239, + "learning_rate": 1.44742353439151e-06, + "loss": 0.48872265219688416, + "step": 3339 + }, + { + "epoch": 0.770028818443804, + "grad_norm": 1.1094983126631952, + "learning_rate": 1.4470826142451812e-06, + "loss": 0.4195341467857361, + "step": 3340 + }, + { + "epoch": 0.7702593659942363, + "grad_norm": 1.475980825700943, + "learning_rate": 1.4467416291423998e-06, + "loss": 0.5242099761962891, + "step": 3341 + }, + { + "epoch": 0.7704899135446686, + "grad_norm": 1.3418034436842976, + "learning_rate": 1.4464005791327067e-06, + "loss": 0.4401935935020447, + "step": 3342 + }, + { + "epoch": 0.7707204610951008, + "grad_norm": 1.4397973067757597, + "learning_rate": 1.4460594642656532e-06, + "loss": 0.47286516427993774, + "step": 3343 + }, + { + "epoch": 0.7709510086455331, + "grad_norm": 1.2137594722189566, + "learning_rate": 1.4457182845908e-06, + "loss": 0.4484996497631073, + "step": 3344 + }, + { + "epoch": 0.7711815561959654, + "grad_norm": 1.2540008258880415, + "learning_rate": 1.4453770401577168e-06, + "loss": 0.45632821321487427, + "step": 3345 + }, + { + "epoch": 0.7714121037463977, + "grad_norm": 1.1241907177042443, + "learning_rate": 1.445035731015983e-06, + "loss": 0.4622989892959595, + "step": 3346 + }, + { + "epoch": 0.77164265129683, + "grad_norm": 1.5455270730171995, + "learning_rate": 1.4446943572151867e-06, + "loss": 0.48796528577804565, + "step": 3347 + }, + { + "epoch": 0.7718731988472622, + "grad_norm": 1.3039989310725912, + "learning_rate": 1.4443529188049263e-06, + "loss": 0.46072205901145935, + "step": 3348 + }, + { + "epoch": 0.7721037463976945, + "grad_norm": 1.340539994752118, + "learning_rate": 1.4440114158348097e-06, + "loss": 0.5429719686508179, + "step": 3349 + }, + { + "epoch": 0.7723342939481268, + "grad_norm": 1.4033984421504853, + "learning_rate": 1.4436698483544532e-06, + "loss": 0.466327041387558, + "step": 3350 + }, + { + "epoch": 0.7725648414985591, + "grad_norm": 1.4675566024133893, + "learning_rate": 1.443328216413483e-06, + "loss": 0.5271450877189636, + "step": 3351 + }, + { + "epoch": 0.7727953890489914, + "grad_norm": 1.171281390175273, + "learning_rate": 1.442986520061535e-06, + "loss": 0.4191371500492096, + "step": 3352 + }, + { + "epoch": 0.7730259365994236, + "grad_norm": 1.334196633801331, + "learning_rate": 1.4426447593482538e-06, + "loss": 0.47077637910842896, + "step": 3353 + }, + { + "epoch": 0.7732564841498559, + "grad_norm": 1.3111319856519132, + "learning_rate": 1.442302934323294e-06, + "loss": 0.5060920715332031, + "step": 3354 + }, + { + "epoch": 0.7734870317002882, + "grad_norm": 1.2904401653594715, + "learning_rate": 1.4419610450363186e-06, + "loss": 0.4885629713535309, + "step": 3355 + }, + { + "epoch": 0.7737175792507205, + "grad_norm": 1.239927093293297, + "learning_rate": 1.4416190915370015e-06, + "loss": 0.5303410291671753, + "step": 3356 + }, + { + "epoch": 0.7739481268011528, + "grad_norm": 1.409650109035996, + "learning_rate": 1.4412770738750242e-06, + "loss": 0.5328885912895203, + "step": 3357 + }, + { + "epoch": 0.774178674351585, + "grad_norm": 1.2629126051729718, + "learning_rate": 1.4409349921000786e-06, + "loss": 0.44366949796676636, + "step": 3358 + }, + { + "epoch": 0.7744092219020173, + "grad_norm": 1.4207531408710683, + "learning_rate": 1.4405928462618656e-06, + "loss": 0.5273492336273193, + "step": 3359 + }, + { + "epoch": 0.7746397694524496, + "grad_norm": 1.4234295193689461, + "learning_rate": 1.4402506364100955e-06, + "loss": 0.5007044076919556, + "step": 3360 + }, + { + "epoch": 0.7748703170028819, + "grad_norm": 1.4068515591968336, + "learning_rate": 1.4399083625944878e-06, + "loss": 0.46129006147384644, + "step": 3361 + }, + { + "epoch": 0.7751008645533142, + "grad_norm": 1.131790729379322, + "learning_rate": 1.4395660248647712e-06, + "loss": 0.4864243268966675, + "step": 3362 + }, + { + "epoch": 0.7753314121037463, + "grad_norm": 1.4823929826396494, + "learning_rate": 1.439223623270684e-06, + "loss": 0.5185636878013611, + "step": 3363 + }, + { + "epoch": 0.7755619596541786, + "grad_norm": 1.5336447453024817, + "learning_rate": 1.438881157861973e-06, + "loss": 0.503462553024292, + "step": 3364 + }, + { + "epoch": 0.7757925072046109, + "grad_norm": 1.1579630453204193, + "learning_rate": 1.4385386286883958e-06, + "loss": 0.421749472618103, + "step": 3365 + }, + { + "epoch": 0.7760230547550432, + "grad_norm": 1.663964100537045, + "learning_rate": 1.4381960357997183e-06, + "loss": 0.5585425496101379, + "step": 3366 + }, + { + "epoch": 0.7762536023054755, + "grad_norm": 1.4186057360195095, + "learning_rate": 1.4378533792457145e-06, + "loss": 0.5588064789772034, + "step": 3367 + }, + { + "epoch": 0.7764841498559077, + "grad_norm": 1.2195290831267696, + "learning_rate": 1.4375106590761699e-06, + "loss": 0.39466559886932373, + "step": 3368 + }, + { + "epoch": 0.77671469740634, + "grad_norm": 1.2902755420837042, + "learning_rate": 1.437167875340878e-06, + "loss": 0.483551949262619, + "step": 3369 + }, + { + "epoch": 0.7769452449567723, + "grad_norm": 1.5188689936616353, + "learning_rate": 1.4368250280896414e-06, + "loss": 0.4568926990032196, + "step": 3370 + }, + { + "epoch": 0.7771757925072046, + "grad_norm": 1.4484780605131717, + "learning_rate": 1.4364821173722726e-06, + "loss": 0.4849422574043274, + "step": 3371 + }, + { + "epoch": 0.7774063400576369, + "grad_norm": 1.6018044814378665, + "learning_rate": 1.4361391432385926e-06, + "loss": 0.48174428939819336, + "step": 3372 + }, + { + "epoch": 0.7776368876080691, + "grad_norm": 1.5647433832432838, + "learning_rate": 1.435796105738432e-06, + "loss": 0.5706121921539307, + "step": 3373 + }, + { + "epoch": 0.7778674351585014, + "grad_norm": 1.326716608540626, + "learning_rate": 1.435453004921631e-06, + "loss": 0.5058635473251343, + "step": 3374 + }, + { + "epoch": 0.7780979827089337, + "grad_norm": 1.2089771113874428, + "learning_rate": 1.4351098408380382e-06, + "loss": 0.4246353209018707, + "step": 3375 + }, + { + "epoch": 0.778328530259366, + "grad_norm": 1.4154899807455992, + "learning_rate": 1.4347666135375118e-06, + "loss": 0.4570143520832062, + "step": 3376 + }, + { + "epoch": 0.7785590778097983, + "grad_norm": 1.3663868339749332, + "learning_rate": 1.4344233230699194e-06, + "loss": 0.5174911022186279, + "step": 3377 + }, + { + "epoch": 0.7787896253602306, + "grad_norm": 1.3658429357569173, + "learning_rate": 1.4340799694851373e-06, + "loss": 0.5119853019714355, + "step": 3378 + }, + { + "epoch": 0.7790201729106628, + "grad_norm": 1.3179107043166238, + "learning_rate": 1.4337365528330516e-06, + "loss": 0.4212768077850342, + "step": 3379 + }, + { + "epoch": 0.7792507204610951, + "grad_norm": 1.5039977694618414, + "learning_rate": 1.4333930731635562e-06, + "loss": 0.4929216206073761, + "step": 3380 + }, + { + "epoch": 0.7794812680115274, + "grad_norm": 1.4056076676761982, + "learning_rate": 1.433049530526556e-06, + "loss": 0.5154866576194763, + "step": 3381 + }, + { + "epoch": 0.7797118155619597, + "grad_norm": 1.1061064947121604, + "learning_rate": 1.4327059249719643e-06, + "loss": 0.537919819355011, + "step": 3382 + }, + { + "epoch": 0.779942363112392, + "grad_norm": 1.3211021723465781, + "learning_rate": 1.432362256549703e-06, + "loss": 0.5221723318099976, + "step": 3383 + }, + { + "epoch": 0.7801729106628242, + "grad_norm": 1.2423942665098675, + "learning_rate": 1.4320185253097032e-06, + "loss": 0.44169020652770996, + "step": 3384 + }, + { + "epoch": 0.7804034582132565, + "grad_norm": 1.3032004555348564, + "learning_rate": 1.4316747313019062e-06, + "loss": 0.4524272084236145, + "step": 3385 + }, + { + "epoch": 0.7806340057636888, + "grad_norm": 1.3773696861621003, + "learning_rate": 1.4313308745762614e-06, + "loss": 0.49299994111061096, + "step": 3386 + }, + { + "epoch": 0.7808645533141211, + "grad_norm": 1.4415837849285507, + "learning_rate": 1.4309869551827274e-06, + "loss": 0.3977648615837097, + "step": 3387 + }, + { + "epoch": 0.7810951008645534, + "grad_norm": 1.3750364479895638, + "learning_rate": 1.4306429731712722e-06, + "loss": 0.4714970588684082, + "step": 3388 + }, + { + "epoch": 0.7813256484149856, + "grad_norm": 1.3484439441991842, + "learning_rate": 1.430298928591873e-06, + "loss": 0.4930286109447479, + "step": 3389 + }, + { + "epoch": 0.7815561959654179, + "grad_norm": 1.5313273344124982, + "learning_rate": 1.4299548214945158e-06, + "loss": 0.47765177488327026, + "step": 3390 + }, + { + "epoch": 0.7817867435158501, + "grad_norm": 1.530990230685995, + "learning_rate": 1.4296106519291958e-06, + "loss": 0.5838227272033691, + "step": 3391 + }, + { + "epoch": 0.7820172910662824, + "grad_norm": 1.5544559796124315, + "learning_rate": 1.429266419945917e-06, + "loss": 0.5206441879272461, + "step": 3392 + }, + { + "epoch": 0.7822478386167147, + "grad_norm": 1.4914577305568477, + "learning_rate": 1.428922125594693e-06, + "loss": 0.4364234209060669, + "step": 3393 + }, + { + "epoch": 0.7824783861671469, + "grad_norm": 1.3845197160287306, + "learning_rate": 1.4285777689255465e-06, + "loss": 0.48500919342041016, + "step": 3394 + }, + { + "epoch": 0.7827089337175792, + "grad_norm": 1.6273547936063282, + "learning_rate": 1.4282333499885085e-06, + "loss": 0.5136459469795227, + "step": 3395 + }, + { + "epoch": 0.7829394812680115, + "grad_norm": 1.2794363096503683, + "learning_rate": 1.427888868833619e-06, + "loss": 0.5106863975524902, + "step": 3396 + }, + { + "epoch": 0.7831700288184438, + "grad_norm": 1.3619144328823791, + "learning_rate": 1.4275443255109284e-06, + "loss": 0.44078588485717773, + "step": 3397 + }, + { + "epoch": 0.7834005763688761, + "grad_norm": 1.2498324876454319, + "learning_rate": 1.4271997200704955e-06, + "loss": 0.4955654442310333, + "step": 3398 + }, + { + "epoch": 0.7836311239193083, + "grad_norm": 1.5359504534794015, + "learning_rate": 1.4268550525623867e-06, + "loss": 0.52781742811203, + "step": 3399 + }, + { + "epoch": 0.7838616714697406, + "grad_norm": 1.3659296872523792, + "learning_rate": 1.4265103230366796e-06, + "loss": 0.45101356506347656, + "step": 3400 + }, + { + "epoch": 0.7840922190201729, + "grad_norm": 1.5248196276431427, + "learning_rate": 1.4261655315434593e-06, + "loss": 0.5663477182388306, + "step": 3401 + }, + { + "epoch": 0.7843227665706052, + "grad_norm": 1.3703871781933803, + "learning_rate": 1.4258206781328205e-06, + "loss": 0.48097681999206543, + "step": 3402 + }, + { + "epoch": 0.7845533141210375, + "grad_norm": 1.404096619728342, + "learning_rate": 1.425475762854867e-06, + "loss": 0.5087981820106506, + "step": 3403 + }, + { + "epoch": 0.7847838616714697, + "grad_norm": 1.46092867428151, + "learning_rate": 1.4251307857597111e-06, + "loss": 0.4906430244445801, + "step": 3404 + }, + { + "epoch": 0.785014409221902, + "grad_norm": 1.488433432921487, + "learning_rate": 1.4247857468974749e-06, + "loss": 0.5091785788536072, + "step": 3405 + }, + { + "epoch": 0.7852449567723343, + "grad_norm": 1.3564098758024126, + "learning_rate": 1.4244406463182884e-06, + "loss": 0.4587729573249817, + "step": 3406 + }, + { + "epoch": 0.7854755043227666, + "grad_norm": 1.4157992822014371, + "learning_rate": 1.4240954840722917e-06, + "loss": 0.5763654708862305, + "step": 3407 + }, + { + "epoch": 0.7857060518731989, + "grad_norm": 1.3214244473234844, + "learning_rate": 1.4237502602096328e-06, + "loss": 0.4503173828125, + "step": 3408 + }, + { + "epoch": 0.7859365994236311, + "grad_norm": 1.3342645340505106, + "learning_rate": 1.423404974780469e-06, + "loss": 0.4033720791339874, + "step": 3409 + }, + { + "epoch": 0.7861671469740634, + "grad_norm": 1.300850205565397, + "learning_rate": 1.423059627834967e-06, + "loss": 0.4149436950683594, + "step": 3410 + }, + { + "epoch": 0.7863976945244957, + "grad_norm": 1.344125208287592, + "learning_rate": 1.4227142194233024e-06, + "loss": 0.48265737295150757, + "step": 3411 + }, + { + "epoch": 0.786628242074928, + "grad_norm": 1.3980526693501352, + "learning_rate": 1.422368749595659e-06, + "loss": 0.4554409980773926, + "step": 3412 + }, + { + "epoch": 0.7868587896253603, + "grad_norm": 1.5087719772601593, + "learning_rate": 1.4220232184022297e-06, + "loss": 0.4369552731513977, + "step": 3413 + }, + { + "epoch": 0.7870893371757925, + "grad_norm": 1.5230063521052837, + "learning_rate": 1.4216776258932175e-06, + "loss": 0.6042525768280029, + "step": 3414 + }, + { + "epoch": 0.7873198847262248, + "grad_norm": 1.3246312252241845, + "learning_rate": 1.4213319721188328e-06, + "loss": 0.4805430769920349, + "step": 3415 + }, + { + "epoch": 0.7875504322766571, + "grad_norm": 1.5729403638570199, + "learning_rate": 1.4209862571292954e-06, + "loss": 0.4798882007598877, + "step": 3416 + }, + { + "epoch": 0.7877809798270894, + "grad_norm": 1.4229019082389107, + "learning_rate": 1.4206404809748344e-06, + "loss": 0.5285375118255615, + "step": 3417 + }, + { + "epoch": 0.7880115273775216, + "grad_norm": 1.1309356073852486, + "learning_rate": 1.4202946437056872e-06, + "loss": 0.47220277786254883, + "step": 3418 + }, + { + "epoch": 0.7882420749279538, + "grad_norm": 1.3218992691271476, + "learning_rate": 1.4199487453721006e-06, + "loss": 0.4025140404701233, + "step": 3419 + }, + { + "epoch": 0.7884726224783861, + "grad_norm": 1.6108738068796637, + "learning_rate": 1.4196027860243303e-06, + "loss": 0.527138352394104, + "step": 3420 + }, + { + "epoch": 0.7887031700288184, + "grad_norm": 1.3234348965245013, + "learning_rate": 1.4192567657126396e-06, + "loss": 0.5286415815353394, + "step": 3421 + }, + { + "epoch": 0.7889337175792507, + "grad_norm": 1.4695018037410315, + "learning_rate": 1.4189106844873026e-06, + "loss": 0.6222572326660156, + "step": 3422 + }, + { + "epoch": 0.789164265129683, + "grad_norm": 1.4042703472266378, + "learning_rate": 1.418564542398601e-06, + "loss": 0.5256388187408447, + "step": 3423 + }, + { + "epoch": 0.7893948126801152, + "grad_norm": 1.1750007294892328, + "learning_rate": 1.4182183394968257e-06, + "loss": 0.44092851877212524, + "step": 3424 + }, + { + "epoch": 0.7896253602305475, + "grad_norm": 1.0680800247317186, + "learning_rate": 1.417872075832276e-06, + "loss": 0.49720248579978943, + "step": 3425 + }, + { + "epoch": 0.7898559077809798, + "grad_norm": 1.3216310981310417, + "learning_rate": 1.417525751455261e-06, + "loss": 0.5339962244033813, + "step": 3426 + }, + { + "epoch": 0.7900864553314121, + "grad_norm": 1.2572992892349242, + "learning_rate": 1.4171793664160978e-06, + "loss": 0.45812660455703735, + "step": 3427 + }, + { + "epoch": 0.7903170028818444, + "grad_norm": 1.4380138533229172, + "learning_rate": 1.4168329207651123e-06, + "loss": 0.48299676179885864, + "step": 3428 + }, + { + "epoch": 0.7905475504322766, + "grad_norm": 1.6688451231553607, + "learning_rate": 1.4164864145526396e-06, + "loss": 0.4708644151687622, + "step": 3429 + }, + { + "epoch": 0.7907780979827089, + "grad_norm": 1.5321338371589512, + "learning_rate": 1.4161398478290237e-06, + "loss": 0.40403974056243896, + "step": 3430 + }, + { + "epoch": 0.7910086455331412, + "grad_norm": 1.4234994908788658, + "learning_rate": 1.4157932206446167e-06, + "loss": 0.4803101122379303, + "step": 3431 + }, + { + "epoch": 0.7912391930835735, + "grad_norm": 1.2821081470388878, + "learning_rate": 1.4154465330497803e-06, + "loss": 0.4613071084022522, + "step": 3432 + }, + { + "epoch": 0.7914697406340058, + "grad_norm": 1.1011574369493982, + "learning_rate": 1.4150997850948842e-06, + "loss": 0.4833601117134094, + "step": 3433 + }, + { + "epoch": 0.791700288184438, + "grad_norm": 1.5818926357989893, + "learning_rate": 1.4147529768303077e-06, + "loss": 0.5288225412368774, + "step": 3434 + }, + { + "epoch": 0.7919308357348703, + "grad_norm": 1.394870295518193, + "learning_rate": 1.4144061083064384e-06, + "loss": 0.49780988693237305, + "step": 3435 + }, + { + "epoch": 0.7921613832853026, + "grad_norm": 1.4923520865822915, + "learning_rate": 1.4140591795736725e-06, + "loss": 0.522127091884613, + "step": 3436 + }, + { + "epoch": 0.7923919308357349, + "grad_norm": 1.3400151393389832, + "learning_rate": 1.413712190682415e-06, + "loss": 0.47800448536872864, + "step": 3437 + }, + { + "epoch": 0.7926224783861672, + "grad_norm": 1.4774277715563446, + "learning_rate": 1.4133651416830802e-06, + "loss": 0.5165538787841797, + "step": 3438 + }, + { + "epoch": 0.7928530259365995, + "grad_norm": 1.212288656706865, + "learning_rate": 1.4130180326260904e-06, + "loss": 0.44580405950546265, + "step": 3439 + }, + { + "epoch": 0.7930835734870317, + "grad_norm": 1.3979714746769198, + "learning_rate": 1.4126708635618769e-06, + "loss": 0.46524137258529663, + "step": 3440 + }, + { + "epoch": 0.793314121037464, + "grad_norm": 1.2774955650433963, + "learning_rate": 1.4123236345408796e-06, + "loss": 0.4641501307487488, + "step": 3441 + }, + { + "epoch": 0.7935446685878963, + "grad_norm": 1.4649488671253945, + "learning_rate": 1.4119763456135482e-06, + "loss": 0.4559420943260193, + "step": 3442 + }, + { + "epoch": 0.7937752161383286, + "grad_norm": 1.3524968198829954, + "learning_rate": 1.4116289968303389e-06, + "loss": 0.45827484130859375, + "step": 3443 + }, + { + "epoch": 0.7940057636887609, + "grad_norm": 1.1948682370487143, + "learning_rate": 1.4112815882417187e-06, + "loss": 0.4432838261127472, + "step": 3444 + }, + { + "epoch": 0.7942363112391931, + "grad_norm": 1.3739238935791784, + "learning_rate": 1.410934119898162e-06, + "loss": 0.45351576805114746, + "step": 3445 + }, + { + "epoch": 0.7944668587896253, + "grad_norm": 1.5157866838440086, + "learning_rate": 1.4105865918501526e-06, + "loss": 0.5147103071212769, + "step": 3446 + }, + { + "epoch": 0.7946974063400576, + "grad_norm": 1.2972558887632173, + "learning_rate": 1.4102390041481828e-06, + "loss": 0.42173266410827637, + "step": 3447 + }, + { + "epoch": 0.7949279538904899, + "grad_norm": 1.3419874816991175, + "learning_rate": 1.4098913568427533e-06, + "loss": 0.46593934297561646, + "step": 3448 + }, + { + "epoch": 0.7951585014409221, + "grad_norm": 1.3509410352673197, + "learning_rate": 1.409543649984374e-06, + "loss": 0.5319167375564575, + "step": 3449 + }, + { + "epoch": 0.7953890489913544, + "grad_norm": 1.5617478431314156, + "learning_rate": 1.409195883623562e-06, + "loss": 0.5881174802780151, + "step": 3450 + }, + { + "epoch": 0.7956195965417867, + "grad_norm": 1.1523297014628997, + "learning_rate": 1.4088480578108453e-06, + "loss": 0.40378129482269287, + "step": 3451 + }, + { + "epoch": 0.795850144092219, + "grad_norm": 1.4412429262971402, + "learning_rate": 1.4085001725967592e-06, + "loss": 0.4907345473766327, + "step": 3452 + }, + { + "epoch": 0.7960806916426513, + "grad_norm": 1.4038961218455859, + "learning_rate": 1.4081522280318472e-06, + "loss": 0.5060145258903503, + "step": 3453 + }, + { + "epoch": 0.7963112391930836, + "grad_norm": 1.3097671833232498, + "learning_rate": 1.4078042241666624e-06, + "loss": 0.4996522068977356, + "step": 3454 + }, + { + "epoch": 0.7965417867435158, + "grad_norm": 1.4059796957773147, + "learning_rate": 1.4074561610517664e-06, + "loss": 0.44975441694259644, + "step": 3455 + }, + { + "epoch": 0.7967723342939481, + "grad_norm": 1.3944917831982637, + "learning_rate": 1.4071080387377286e-06, + "loss": 0.5002501010894775, + "step": 3456 + }, + { + "epoch": 0.7970028818443804, + "grad_norm": 1.4468660365438362, + "learning_rate": 1.4067598572751277e-06, + "loss": 0.4908374547958374, + "step": 3457 + }, + { + "epoch": 0.7972334293948127, + "grad_norm": 1.3993390163985648, + "learning_rate": 1.4064116167145515e-06, + "loss": 0.5603960752487183, + "step": 3458 + }, + { + "epoch": 0.797463976945245, + "grad_norm": 1.5644785025139432, + "learning_rate": 1.4060633171065949e-06, + "loss": 0.5647206902503967, + "step": 3459 + }, + { + "epoch": 0.7976945244956772, + "grad_norm": 1.2624336646884822, + "learning_rate": 1.4057149585018624e-06, + "loss": 0.4450913667678833, + "step": 3460 + }, + { + "epoch": 0.7979250720461095, + "grad_norm": 1.579456116775947, + "learning_rate": 1.4053665409509672e-06, + "loss": 0.4865730404853821, + "step": 3461 + }, + { + "epoch": 0.7981556195965418, + "grad_norm": 1.6941134643857723, + "learning_rate": 1.4050180645045305e-06, + "loss": 0.5277330279350281, + "step": 3462 + }, + { + "epoch": 0.7983861671469741, + "grad_norm": 1.3510815860850502, + "learning_rate": 1.4046695292131823e-06, + "loss": 0.4863555431365967, + "step": 3463 + }, + { + "epoch": 0.7986167146974064, + "grad_norm": 1.5401585546637828, + "learning_rate": 1.4043209351275613e-06, + "loss": 0.520676851272583, + "step": 3464 + }, + { + "epoch": 0.7988472622478386, + "grad_norm": 1.5129404253879941, + "learning_rate": 1.4039722822983145e-06, + "loss": 0.5390855669975281, + "step": 3465 + }, + { + "epoch": 0.7990778097982709, + "grad_norm": 1.4532008026364132, + "learning_rate": 1.4036235707760973e-06, + "loss": 0.48805344104766846, + "step": 3466 + }, + { + "epoch": 0.7993083573487032, + "grad_norm": 1.3257398340732425, + "learning_rate": 1.4032748006115741e-06, + "loss": 0.5081312656402588, + "step": 3467 + }, + { + "epoch": 0.7995389048991355, + "grad_norm": 1.1210684086195801, + "learning_rate": 1.402925971855418e-06, + "loss": 0.41373687982559204, + "step": 3468 + }, + { + "epoch": 0.7997694524495678, + "grad_norm": 1.5795268987436135, + "learning_rate": 1.4025770845583093e-06, + "loss": 0.45267200469970703, + "step": 3469 + }, + { + "epoch": 0.8, + "grad_norm": 1.3610036905604035, + "learning_rate": 1.402228138770938e-06, + "loss": 0.4800153970718384, + "step": 3470 + }, + { + "epoch": 0.8002305475504323, + "grad_norm": 1.6530255088882315, + "learning_rate": 1.401879134544003e-06, + "loss": 0.5338609218597412, + "step": 3471 + }, + { + "epoch": 0.8004610951008646, + "grad_norm": 1.3481447596394023, + "learning_rate": 1.4015300719282102e-06, + "loss": 0.4515029788017273, + "step": 3472 + }, + { + "epoch": 0.8006916426512968, + "grad_norm": 1.548330384939731, + "learning_rate": 1.4011809509742746e-06, + "loss": 0.5389237403869629, + "step": 3473 + }, + { + "epoch": 0.8009221902017291, + "grad_norm": 1.6598589936630226, + "learning_rate": 1.4008317717329208e-06, + "loss": 0.4558069705963135, + "step": 3474 + }, + { + "epoch": 0.8011527377521613, + "grad_norm": 1.4976877876644026, + "learning_rate": 1.4004825342548798e-06, + "loss": 0.5577516555786133, + "step": 3475 + }, + { + "epoch": 0.8013832853025936, + "grad_norm": 1.3729770351500163, + "learning_rate": 1.400133238590893e-06, + "loss": 0.5946800708770752, + "step": 3476 + }, + { + "epoch": 0.8016138328530259, + "grad_norm": 1.3598913825351893, + "learning_rate": 1.3997838847917093e-06, + "loss": 0.5363301038742065, + "step": 3477 + }, + { + "epoch": 0.8018443804034582, + "grad_norm": 1.631090682220392, + "learning_rate": 1.3994344729080856e-06, + "loss": 0.5241801738739014, + "step": 3478 + }, + { + "epoch": 0.8020749279538905, + "grad_norm": 1.557902999736325, + "learning_rate": 1.3990850029907883e-06, + "loss": 0.5182064175605774, + "step": 3479 + }, + { + "epoch": 0.8023054755043227, + "grad_norm": 1.3906232947379025, + "learning_rate": 1.3987354750905922e-06, + "loss": 0.46549123525619507, + "step": 3480 + }, + { + "epoch": 0.802536023054755, + "grad_norm": 1.3039609736352331, + "learning_rate": 1.3983858892582792e-06, + "loss": 0.4560173451900482, + "step": 3481 + }, + { + "epoch": 0.8027665706051873, + "grad_norm": 1.7257423768652569, + "learning_rate": 1.3980362455446404e-06, + "loss": 0.5379814505577087, + "step": 3482 + }, + { + "epoch": 0.8029971181556196, + "grad_norm": 1.551642989984616, + "learning_rate": 1.3976865440004763e-06, + "loss": 0.509077250957489, + "step": 3483 + }, + { + "epoch": 0.8032276657060519, + "grad_norm": 1.6445081986461623, + "learning_rate": 1.3973367846765944e-06, + "loss": 0.5225260257720947, + "step": 3484 + }, + { + "epoch": 0.8034582132564841, + "grad_norm": 1.8771783864644709, + "learning_rate": 1.396986967623811e-06, + "loss": 0.49268752336502075, + "step": 3485 + }, + { + "epoch": 0.8036887608069164, + "grad_norm": 1.2992049945109447, + "learning_rate": 1.3966370928929508e-06, + "loss": 0.5338944792747498, + "step": 3486 + }, + { + "epoch": 0.8039193083573487, + "grad_norm": 1.4726736877240487, + "learning_rate": 1.3962871605348472e-06, + "loss": 0.5020278692245483, + "step": 3487 + }, + { + "epoch": 0.804149855907781, + "grad_norm": 1.451859327899609, + "learning_rate": 1.3959371706003418e-06, + "loss": 0.5514947175979614, + "step": 3488 + }, + { + "epoch": 0.8043804034582133, + "grad_norm": 1.4474465154502296, + "learning_rate": 1.3955871231402843e-06, + "loss": 0.46391648054122925, + "step": 3489 + }, + { + "epoch": 0.8046109510086455, + "grad_norm": 1.282000029061929, + "learning_rate": 1.395237018205533e-06, + "loss": 0.4571886956691742, + "step": 3490 + }, + { + "epoch": 0.8048414985590778, + "grad_norm": 1.587215609657964, + "learning_rate": 1.3948868558469546e-06, + "loss": 0.524207353591919, + "step": 3491 + }, + { + "epoch": 0.8050720461095101, + "grad_norm": 1.3146551085590652, + "learning_rate": 1.3945366361154238e-06, + "loss": 0.4406725764274597, + "step": 3492 + }, + { + "epoch": 0.8053025936599424, + "grad_norm": 1.3725564429124097, + "learning_rate": 1.3941863590618246e-06, + "loss": 0.46178239583969116, + "step": 3493 + }, + { + "epoch": 0.8055331412103747, + "grad_norm": 1.2708692924858904, + "learning_rate": 1.3938360247370475e-06, + "loss": 0.41395291686058044, + "step": 3494 + }, + { + "epoch": 0.805763688760807, + "grad_norm": 1.2877185719651698, + "learning_rate": 1.3934856331919929e-06, + "loss": 0.42799150943756104, + "step": 3495 + }, + { + "epoch": 0.8059942363112392, + "grad_norm": 1.4566912923520166, + "learning_rate": 1.3931351844775697e-06, + "loss": 0.5145558714866638, + "step": 3496 + }, + { + "epoch": 0.8062247838616715, + "grad_norm": 1.4297054776894131, + "learning_rate": 1.3927846786446934e-06, + "loss": 0.4697001576423645, + "step": 3497 + }, + { + "epoch": 0.8064553314121038, + "grad_norm": 1.25977839257532, + "learning_rate": 1.3924341157442895e-06, + "loss": 0.44351255893707275, + "step": 3498 + }, + { + "epoch": 0.8066858789625361, + "grad_norm": 1.3009582600594158, + "learning_rate": 1.392083495827291e-06, + "loss": 0.44177383184432983, + "step": 3499 + }, + { + "epoch": 0.8069164265129684, + "grad_norm": 1.242620176786914, + "learning_rate": 1.3917328189446395e-06, + "loss": 0.4446752071380615, + "step": 3500 + }, + { + "epoch": 0.8071469740634005, + "grad_norm": 1.341227072498732, + "learning_rate": 1.3913820851472845e-06, + "loss": 0.4698784351348877, + "step": 3501 + }, + { + "epoch": 0.8073775216138328, + "grad_norm": 1.4170578615183846, + "learning_rate": 1.3910312944861837e-06, + "loss": 0.5006378889083862, + "step": 3502 + }, + { + "epoch": 0.8076080691642651, + "grad_norm": 1.380723657233392, + "learning_rate": 1.3906804470123038e-06, + "loss": 0.47502535581588745, + "step": 3503 + }, + { + "epoch": 0.8078386167146974, + "grad_norm": 1.3489334550760619, + "learning_rate": 1.3903295427766192e-06, + "loss": 0.6008619070053101, + "step": 3504 + }, + { + "epoch": 0.8080691642651296, + "grad_norm": 1.4759126954971649, + "learning_rate": 1.3899785818301123e-06, + "loss": 0.4955158531665802, + "step": 3505 + }, + { + "epoch": 0.8082997118155619, + "grad_norm": 1.3044047485737327, + "learning_rate": 1.3896275642237747e-06, + "loss": 0.42474400997161865, + "step": 3506 + }, + { + "epoch": 0.8085302593659942, + "grad_norm": 1.4323063583263014, + "learning_rate": 1.3892764900086048e-06, + "loss": 0.5359533429145813, + "step": 3507 + }, + { + "epoch": 0.8087608069164265, + "grad_norm": 1.3384923488872582, + "learning_rate": 1.3889253592356106e-06, + "loss": 0.5112940669059753, + "step": 3508 + }, + { + "epoch": 0.8089913544668588, + "grad_norm": 1.2992899352507392, + "learning_rate": 1.388574171955808e-06, + "loss": 0.5173017978668213, + "step": 3509 + }, + { + "epoch": 0.809221902017291, + "grad_norm": 1.2833115975697806, + "learning_rate": 1.3882229282202202e-06, + "loss": 0.49251043796539307, + "step": 3510 + }, + { + "epoch": 0.8094524495677233, + "grad_norm": 1.3177101495371608, + "learning_rate": 1.3878716280798793e-06, + "loss": 0.5374883413314819, + "step": 3511 + }, + { + "epoch": 0.8096829971181556, + "grad_norm": 1.4600072526939667, + "learning_rate": 1.3875202715858262e-06, + "loss": 0.4614766240119934, + "step": 3512 + }, + { + "epoch": 0.8099135446685879, + "grad_norm": 1.481594502687008, + "learning_rate": 1.3871688587891087e-06, + "loss": 0.5260672569274902, + "step": 3513 + }, + { + "epoch": 0.8101440922190202, + "grad_norm": 1.4772605570900035, + "learning_rate": 1.3868173897407838e-06, + "loss": 0.4460594058036804, + "step": 3514 + }, + { + "epoch": 0.8103746397694525, + "grad_norm": 1.7026122663882848, + "learning_rate": 1.3864658644919161e-06, + "loss": 0.5009329319000244, + "step": 3515 + }, + { + "epoch": 0.8106051873198847, + "grad_norm": 1.384991070111951, + "learning_rate": 1.3861142830935783e-06, + "loss": 0.5075333118438721, + "step": 3516 + }, + { + "epoch": 0.810835734870317, + "grad_norm": 1.5223425101973258, + "learning_rate": 1.385762645596852e-06, + "loss": 0.44005393981933594, + "step": 3517 + }, + { + "epoch": 0.8110662824207493, + "grad_norm": 1.1883449015067757, + "learning_rate": 1.3854109520528265e-06, + "loss": 0.4705634117126465, + "step": 3518 + }, + { + "epoch": 0.8112968299711816, + "grad_norm": 1.2999872371396695, + "learning_rate": 1.385059202512599e-06, + "loss": 0.4802425801753998, + "step": 3519 + }, + { + "epoch": 0.8115273775216139, + "grad_norm": 1.3224607860470696, + "learning_rate": 1.384707397027275e-06, + "loss": 0.5397260189056396, + "step": 3520 + }, + { + "epoch": 0.8117579250720461, + "grad_norm": 1.4021212201308277, + "learning_rate": 1.3843555356479681e-06, + "loss": 0.4671134948730469, + "step": 3521 + }, + { + "epoch": 0.8119884726224784, + "grad_norm": 1.354117617094354, + "learning_rate": 1.3840036184258005e-06, + "loss": 0.46687519550323486, + "step": 3522 + }, + { + "epoch": 0.8122190201729107, + "grad_norm": 1.449750301342306, + "learning_rate": 1.3836516454119016e-06, + "loss": 0.5054115056991577, + "step": 3523 + }, + { + "epoch": 0.812449567723343, + "grad_norm": 1.5277280034784881, + "learning_rate": 1.3832996166574098e-06, + "loss": 0.40758228302001953, + "step": 3524 + }, + { + "epoch": 0.8126801152737753, + "grad_norm": 1.2548484991062847, + "learning_rate": 1.3829475322134711e-06, + "loss": 0.4480733573436737, + "step": 3525 + }, + { + "epoch": 0.8129106628242075, + "grad_norm": 1.3977222634007296, + "learning_rate": 1.3825953921312398e-06, + "loss": 0.4944935441017151, + "step": 3526 + }, + { + "epoch": 0.8131412103746398, + "grad_norm": 1.4647957112314143, + "learning_rate": 1.3822431964618776e-06, + "loss": 0.45961660146713257, + "step": 3527 + }, + { + "epoch": 0.813371757925072, + "grad_norm": 1.2821219036004161, + "learning_rate": 1.3818909452565558e-06, + "loss": 0.4575357437133789, + "step": 3528 + }, + { + "epoch": 0.8136023054755043, + "grad_norm": 1.3626120540148763, + "learning_rate": 1.3815386385664524e-06, + "loss": 0.47747135162353516, + "step": 3529 + }, + { + "epoch": 0.8138328530259366, + "grad_norm": 1.444122143074095, + "learning_rate": 1.381186276442754e-06, + "loss": 0.5448052883148193, + "step": 3530 + }, + { + "epoch": 0.8140634005763688, + "grad_norm": 1.201463194482245, + "learning_rate": 1.3808338589366545e-06, + "loss": 0.4915286898612976, + "step": 3531 + }, + { + "epoch": 0.8142939481268011, + "grad_norm": 1.4949477151672794, + "learning_rate": 1.3804813860993574e-06, + "loss": 0.5531010627746582, + "step": 3532 + }, + { + "epoch": 0.8145244956772334, + "grad_norm": 1.6945509499432712, + "learning_rate": 1.380128857982073e-06, + "loss": 0.454088032245636, + "step": 3533 + }, + { + "epoch": 0.8147550432276657, + "grad_norm": 1.5456279149373304, + "learning_rate": 1.37977627463602e-06, + "loss": 0.5124789476394653, + "step": 3534 + }, + { + "epoch": 0.814985590778098, + "grad_norm": 1.480001259627385, + "learning_rate": 1.3794236361124248e-06, + "loss": 0.503969132900238, + "step": 3535 + }, + { + "epoch": 0.8152161383285302, + "grad_norm": 1.322239173486934, + "learning_rate": 1.3790709424625222e-06, + "loss": 0.5175889134407043, + "step": 3536 + }, + { + "epoch": 0.8154466858789625, + "grad_norm": 1.1817953864001418, + "learning_rate": 1.3787181937375557e-06, + "loss": 0.46064361929893494, + "step": 3537 + }, + { + "epoch": 0.8156772334293948, + "grad_norm": 1.5516876974260332, + "learning_rate": 1.3783653899887747e-06, + "loss": 0.5197643041610718, + "step": 3538 + }, + { + "epoch": 0.8159077809798271, + "grad_norm": 1.2896396858717665, + "learning_rate": 1.3780125312674388e-06, + "loss": 0.5226879715919495, + "step": 3539 + }, + { + "epoch": 0.8161383285302594, + "grad_norm": 1.511035190303668, + "learning_rate": 1.3776596176248146e-06, + "loss": 0.4556129574775696, + "step": 3540 + }, + { + "epoch": 0.8163688760806916, + "grad_norm": 1.4478108658486855, + "learning_rate": 1.3773066491121766e-06, + "loss": 0.4984133243560791, + "step": 3541 + }, + { + "epoch": 0.8165994236311239, + "grad_norm": 1.5419703816463373, + "learning_rate": 1.3769536257808074e-06, + "loss": 0.48790422081947327, + "step": 3542 + }, + { + "epoch": 0.8168299711815562, + "grad_norm": 1.2462927406104716, + "learning_rate": 1.376600547681998e-06, + "loss": 0.44798487424850464, + "step": 3543 + }, + { + "epoch": 0.8170605187319885, + "grad_norm": 1.343867717269155, + "learning_rate": 1.3762474148670467e-06, + "loss": 0.49012643098831177, + "step": 3544 + }, + { + "epoch": 0.8172910662824208, + "grad_norm": 1.6171785281361803, + "learning_rate": 1.3758942273872598e-06, + "loss": 0.527820885181427, + "step": 3545 + }, + { + "epoch": 0.817521613832853, + "grad_norm": 1.539967947177677, + "learning_rate": 1.3755409852939521e-06, + "loss": 0.5369070768356323, + "step": 3546 + }, + { + "epoch": 0.8177521613832853, + "grad_norm": 1.2728699323899526, + "learning_rate": 1.3751876886384462e-06, + "loss": 0.4529775381088257, + "step": 3547 + }, + { + "epoch": 0.8179827089337176, + "grad_norm": 1.45208589614268, + "learning_rate": 1.3748343374720717e-06, + "loss": 0.49659043550491333, + "step": 3548 + }, + { + "epoch": 0.8182132564841499, + "grad_norm": 1.4337263996576257, + "learning_rate": 1.3744809318461674e-06, + "loss": 0.5159590840339661, + "step": 3549 + }, + { + "epoch": 0.8184438040345822, + "grad_norm": 1.2952875309827698, + "learning_rate": 1.3741274718120796e-06, + "loss": 0.5124874711036682, + "step": 3550 + }, + { + "epoch": 0.8186743515850144, + "grad_norm": 1.466684664869956, + "learning_rate": 1.3737739574211619e-06, + "loss": 0.4404506981372833, + "step": 3551 + }, + { + "epoch": 0.8189048991354467, + "grad_norm": 1.5174615480057096, + "learning_rate": 1.3734203887247763e-06, + "loss": 0.3680835962295532, + "step": 3552 + }, + { + "epoch": 0.819135446685879, + "grad_norm": 1.4287902210203849, + "learning_rate": 1.3730667657742934e-06, + "loss": 0.46388792991638184, + "step": 3553 + }, + { + "epoch": 0.8193659942363113, + "grad_norm": 1.1942746308000458, + "learning_rate": 1.3727130886210901e-06, + "loss": 0.44120755791664124, + "step": 3554 + }, + { + "epoch": 0.8195965417867436, + "grad_norm": 1.6778310740061562, + "learning_rate": 1.3723593573165521e-06, + "loss": 0.46938973665237427, + "step": 3555 + }, + { + "epoch": 0.8198270893371757, + "grad_norm": 1.3549067457449737, + "learning_rate": 1.3720055719120734e-06, + "loss": 0.4371834993362427, + "step": 3556 + }, + { + "epoch": 0.820057636887608, + "grad_norm": 1.7435095113050765, + "learning_rate": 1.3716517324590545e-06, + "loss": 0.5550415515899658, + "step": 3557 + }, + { + "epoch": 0.8202881844380403, + "grad_norm": 1.7043736119794284, + "learning_rate": 1.3712978390089055e-06, + "loss": 0.4762400984764099, + "step": 3558 + }, + { + "epoch": 0.8205187319884726, + "grad_norm": 1.2860874189212628, + "learning_rate": 1.370943891613043e-06, + "loss": 0.45802271366119385, + "step": 3559 + }, + { + "epoch": 0.8207492795389049, + "grad_norm": 1.5384333307494868, + "learning_rate": 1.3705898903228917e-06, + "loss": 0.49840807914733887, + "step": 3560 + }, + { + "epoch": 0.8209798270893371, + "grad_norm": 1.4905282376893718, + "learning_rate": 1.3702358351898844e-06, + "loss": 0.4900815486907959, + "step": 3561 + }, + { + "epoch": 0.8212103746397694, + "grad_norm": 1.2987837199320826, + "learning_rate": 1.369881726265462e-06, + "loss": 0.5785123109817505, + "step": 3562 + }, + { + "epoch": 0.8214409221902017, + "grad_norm": 1.3354174661321194, + "learning_rate": 1.3695275636010727e-06, + "loss": 0.44613367319107056, + "step": 3563 + }, + { + "epoch": 0.821671469740634, + "grad_norm": 1.4261577457389192, + "learning_rate": 1.3691733472481721e-06, + "loss": 0.5450624823570251, + "step": 3564 + }, + { + "epoch": 0.8219020172910663, + "grad_norm": 1.6118170061862536, + "learning_rate": 1.368819077258225e-06, + "loss": 0.49650490283966064, + "step": 3565 + }, + { + "epoch": 0.8221325648414985, + "grad_norm": 1.3702523924375916, + "learning_rate": 1.3684647536827025e-06, + "loss": 0.4716230034828186, + "step": 3566 + }, + { + "epoch": 0.8223631123919308, + "grad_norm": 1.3226178858378896, + "learning_rate": 1.3681103765730843e-06, + "loss": 0.5759170651435852, + "step": 3567 + }, + { + "epoch": 0.8225936599423631, + "grad_norm": 1.1648618477588448, + "learning_rate": 1.3677559459808578e-06, + "loss": 0.4766387939453125, + "step": 3568 + }, + { + "epoch": 0.8228242074927954, + "grad_norm": 1.3636946954115148, + "learning_rate": 1.3674014619575184e-06, + "loss": 0.6306042671203613, + "step": 3569 + }, + { + "epoch": 0.8230547550432277, + "grad_norm": 1.5291938076513798, + "learning_rate": 1.367046924554568e-06, + "loss": 0.5182838439941406, + "step": 3570 + }, + { + "epoch": 0.82328530259366, + "grad_norm": 1.470314419911423, + "learning_rate": 1.366692333823518e-06, + "loss": 0.49583834409713745, + "step": 3571 + }, + { + "epoch": 0.8235158501440922, + "grad_norm": 1.300822411685143, + "learning_rate": 1.3663376898158867e-06, + "loss": 0.48286184668540955, + "step": 3572 + }, + { + "epoch": 0.8237463976945245, + "grad_norm": 1.4808130648732762, + "learning_rate": 1.3659829925831998e-06, + "loss": 0.5258426666259766, + "step": 3573 + }, + { + "epoch": 0.8239769452449568, + "grad_norm": 1.3932683936949655, + "learning_rate": 1.3656282421769916e-06, + "loss": 0.4287678301334381, + "step": 3574 + }, + { + "epoch": 0.8242074927953891, + "grad_norm": 1.223580053985394, + "learning_rate": 1.3652734386488032e-06, + "loss": 0.4457889795303345, + "step": 3575 + }, + { + "epoch": 0.8244380403458214, + "grad_norm": 1.4128958414592394, + "learning_rate": 1.3649185820501839e-06, + "loss": 0.4268190860748291, + "step": 3576 + }, + { + "epoch": 0.8246685878962536, + "grad_norm": 1.3779100654403185, + "learning_rate": 1.3645636724326909e-06, + "loss": 0.43643975257873535, + "step": 3577 + }, + { + "epoch": 0.8248991354466859, + "grad_norm": 1.2944281887265883, + "learning_rate": 1.3642087098478888e-06, + "loss": 0.5144875049591064, + "step": 3578 + }, + { + "epoch": 0.8251296829971182, + "grad_norm": 1.3487775063149092, + "learning_rate": 1.36385369434735e-06, + "loss": 0.4662047028541565, + "step": 3579 + }, + { + "epoch": 0.8253602305475505, + "grad_norm": 1.270291737455824, + "learning_rate": 1.3634986259826543e-06, + "loss": 0.5866056680679321, + "step": 3580 + }, + { + "epoch": 0.8255907780979828, + "grad_norm": 1.5499964236276533, + "learning_rate": 1.3631435048053896e-06, + "loss": 0.5337757468223572, + "step": 3581 + }, + { + "epoch": 0.825821325648415, + "grad_norm": 1.2164376884898056, + "learning_rate": 1.3627883308671518e-06, + "loss": 0.501396656036377, + "step": 3582 + }, + { + "epoch": 0.8260518731988472, + "grad_norm": 1.354943780023473, + "learning_rate": 1.3624331042195432e-06, + "loss": 0.4726705551147461, + "step": 3583 + }, + { + "epoch": 0.8262824207492795, + "grad_norm": 1.3367759670185508, + "learning_rate": 1.3620778249141748e-06, + "loss": 0.52537602186203, + "step": 3584 + }, + { + "epoch": 0.8265129682997118, + "grad_norm": 1.2520406736372904, + "learning_rate": 1.3617224930026652e-06, + "loss": 0.43500566482543945, + "step": 3585 + }, + { + "epoch": 0.826743515850144, + "grad_norm": 1.6605179728034725, + "learning_rate": 1.3613671085366406e-06, + "loss": 0.4788056015968323, + "step": 3586 + }, + { + "epoch": 0.8269740634005763, + "grad_norm": 1.6686837912118906, + "learning_rate": 1.3610116715677338e-06, + "loss": 0.4457281231880188, + "step": 3587 + }, + { + "epoch": 0.8272046109510086, + "grad_norm": 1.2791720026472448, + "learning_rate": 1.360656182147587e-06, + "loss": 0.5165676474571228, + "step": 3588 + }, + { + "epoch": 0.8274351585014409, + "grad_norm": 1.2518329894582794, + "learning_rate": 1.3603006403278485e-06, + "loss": 0.5236124992370605, + "step": 3589 + }, + { + "epoch": 0.8276657060518732, + "grad_norm": 1.3949843727870912, + "learning_rate": 1.3599450461601754e-06, + "loss": 0.5561662912368774, + "step": 3590 + }, + { + "epoch": 0.8278962536023055, + "grad_norm": 1.461488217809503, + "learning_rate": 1.3595893996962313e-06, + "loss": 0.520460307598114, + "step": 3591 + }, + { + "epoch": 0.8281268011527377, + "grad_norm": 1.3837342621409494, + "learning_rate": 1.3592337009876884e-06, + "loss": 0.4899410903453827, + "step": 3592 + }, + { + "epoch": 0.82835734870317, + "grad_norm": 1.5007694688548554, + "learning_rate": 1.3588779500862253e-06, + "loss": 0.45363926887512207, + "step": 3593 + }, + { + "epoch": 0.8285878962536023, + "grad_norm": 1.4492330929060628, + "learning_rate": 1.35852214704353e-06, + "loss": 0.46841347217559814, + "step": 3594 + }, + { + "epoch": 0.8288184438040346, + "grad_norm": 1.2348409794594017, + "learning_rate": 1.358166291911296e-06, + "loss": 0.490681916475296, + "step": 3595 + }, + { + "epoch": 0.8290489913544669, + "grad_norm": 1.4667015092814133, + "learning_rate": 1.3578103847412257e-06, + "loss": 0.49177154898643494, + "step": 3596 + }, + { + "epoch": 0.8292795389048991, + "grad_norm": 1.1277202452814692, + "learning_rate": 1.3574544255850288e-06, + "loss": 0.4094833433628082, + "step": 3597 + }, + { + "epoch": 0.8295100864553314, + "grad_norm": 1.3251048380610777, + "learning_rate": 1.3570984144944225e-06, + "loss": 0.553383469581604, + "step": 3598 + }, + { + "epoch": 0.8297406340057637, + "grad_norm": 1.3429509939320259, + "learning_rate": 1.3567423515211314e-06, + "loss": 0.5091391801834106, + "step": 3599 + }, + { + "epoch": 0.829971181556196, + "grad_norm": 1.3776124908463028, + "learning_rate": 1.3563862367168875e-06, + "loss": 0.4854011535644531, + "step": 3600 + }, + { + "epoch": 0.8302017291066283, + "grad_norm": 1.591671011790601, + "learning_rate": 1.3560300701334308e-06, + "loss": 0.5501555800437927, + "step": 3601 + }, + { + "epoch": 0.8304322766570605, + "grad_norm": 1.3392087051404795, + "learning_rate": 1.355673851822509e-06, + "loss": 0.4151724874973297, + "step": 3602 + }, + { + "epoch": 0.8306628242074928, + "grad_norm": 1.2702951133114364, + "learning_rate": 1.3553175818358761e-06, + "loss": 0.408272922039032, + "step": 3603 + }, + { + "epoch": 0.8308933717579251, + "grad_norm": 1.2667799997100424, + "learning_rate": 1.3549612602252953e-06, + "loss": 0.4814903736114502, + "step": 3604 + }, + { + "epoch": 0.8311239193083574, + "grad_norm": 1.2668151257014681, + "learning_rate": 1.3546048870425354e-06, + "loss": 0.44924643635749817, + "step": 3605 + }, + { + "epoch": 0.8313544668587897, + "grad_norm": 1.4703658781584852, + "learning_rate": 1.3542484623393749e-06, + "loss": 0.5310448408126831, + "step": 3606 + }, + { + "epoch": 0.831585014409222, + "grad_norm": 1.5531269499723805, + "learning_rate": 1.3538919861675978e-06, + "loss": 0.5050290822982788, + "step": 3607 + }, + { + "epoch": 0.8318155619596542, + "grad_norm": 1.3231435225244499, + "learning_rate": 1.3535354585789965e-06, + "loss": 0.4794940948486328, + "step": 3608 + }, + { + "epoch": 0.8320461095100865, + "grad_norm": 1.3928315758026486, + "learning_rate": 1.3531788796253705e-06, + "loss": 0.4965074062347412, + "step": 3609 + }, + { + "epoch": 0.8322766570605188, + "grad_norm": 1.5037573190112965, + "learning_rate": 1.352822249358528e-06, + "loss": 0.5224358439445496, + "step": 3610 + }, + { + "epoch": 0.832507204610951, + "grad_norm": 1.277499818911999, + "learning_rate": 1.3524655678302826e-06, + "loss": 0.4840403199195862, + "step": 3611 + }, + { + "epoch": 0.8327377521613832, + "grad_norm": 1.4604355145667074, + "learning_rate": 1.3521088350924567e-06, + "loss": 0.5683179497718811, + "step": 3612 + }, + { + "epoch": 0.8329682997118155, + "grad_norm": 1.3815402228742937, + "learning_rate": 1.3517520511968803e-06, + "loss": 0.4887921214103699, + "step": 3613 + }, + { + "epoch": 0.8331988472622478, + "grad_norm": 1.609496545258807, + "learning_rate": 1.3513952161953899e-06, + "loss": 0.49039024114608765, + "step": 3614 + }, + { + "epoch": 0.8334293948126801, + "grad_norm": 1.5270344421851152, + "learning_rate": 1.35103833013983e-06, + "loss": 0.5262584686279297, + "step": 3615 + }, + { + "epoch": 0.8336599423631124, + "grad_norm": 1.4463121674933177, + "learning_rate": 1.3506813930820527e-06, + "loss": 0.5098379850387573, + "step": 3616 + }, + { + "epoch": 0.8338904899135446, + "grad_norm": 1.5024028998528263, + "learning_rate": 1.3503244050739169e-06, + "loss": 0.5597623586654663, + "step": 3617 + }, + { + "epoch": 0.8341210374639769, + "grad_norm": 1.2361771648921898, + "learning_rate": 1.3499673661672894e-06, + "loss": 0.49627748131752014, + "step": 3618 + }, + { + "epoch": 0.8343515850144092, + "grad_norm": 1.2808464002536806, + "learning_rate": 1.3496102764140443e-06, + "loss": 0.4776031970977783, + "step": 3619 + }, + { + "epoch": 0.8345821325648415, + "grad_norm": 1.518980683456512, + "learning_rate": 1.3492531358660633e-06, + "loss": 0.554206371307373, + "step": 3620 + }, + { + "epoch": 0.8348126801152738, + "grad_norm": 1.5508283995044057, + "learning_rate": 1.348895944575234e-06, + "loss": 0.5198627710342407, + "step": 3621 + }, + { + "epoch": 0.835043227665706, + "grad_norm": 1.277882860539239, + "learning_rate": 1.348538702593454e-06, + "loss": 0.39228206872940063, + "step": 3622 + }, + { + "epoch": 0.8352737752161383, + "grad_norm": 1.4700971295114715, + "learning_rate": 1.3481814099726266e-06, + "loss": 0.5013151168823242, + "step": 3623 + }, + { + "epoch": 0.8355043227665706, + "grad_norm": 1.3346013574826283, + "learning_rate": 1.347824066764662e-06, + "loss": 0.5275527238845825, + "step": 3624 + }, + { + "epoch": 0.8357348703170029, + "grad_norm": 1.2993289039746385, + "learning_rate": 1.3474666730214788e-06, + "loss": 0.4131700396537781, + "step": 3625 + }, + { + "epoch": 0.8359654178674352, + "grad_norm": 1.3417984305917734, + "learning_rate": 1.3471092287950027e-06, + "loss": 0.4572218656539917, + "step": 3626 + }, + { + "epoch": 0.8361959654178674, + "grad_norm": 1.5009745298932513, + "learning_rate": 1.3467517341371668e-06, + "loss": 0.4194955825805664, + "step": 3627 + }, + { + "epoch": 0.8364265129682997, + "grad_norm": 1.219361308104634, + "learning_rate": 1.3463941890999108e-06, + "loss": 0.5566304922103882, + "step": 3628 + }, + { + "epoch": 0.836657060518732, + "grad_norm": 1.3289507453572718, + "learning_rate": 1.3460365937351824e-06, + "loss": 0.48764199018478394, + "step": 3629 + }, + { + "epoch": 0.8368876080691643, + "grad_norm": 1.3635097888093082, + "learning_rate": 1.3456789480949371e-06, + "loss": 0.5235868692398071, + "step": 3630 + }, + { + "epoch": 0.8371181556195966, + "grad_norm": 1.3481928837782249, + "learning_rate": 1.3453212522311365e-06, + "loss": 0.39009493589401245, + "step": 3631 + }, + { + "epoch": 0.8373487031700289, + "grad_norm": 1.4385596765141213, + "learning_rate": 1.3449635061957506e-06, + "loss": 0.46862345933914185, + "step": 3632 + }, + { + "epoch": 0.8375792507204611, + "grad_norm": 1.4116689668927676, + "learning_rate": 1.3446057100407556e-06, + "loss": 0.5485839247703552, + "step": 3633 + }, + { + "epoch": 0.8378097982708934, + "grad_norm": 1.256291851973583, + "learning_rate": 1.3442478638181354e-06, + "loss": 0.4831143915653229, + "step": 3634 + }, + { + "epoch": 0.8380403458213257, + "grad_norm": 1.3851749917377867, + "learning_rate": 1.343889967579882e-06, + "loss": 0.44708937406539917, + "step": 3635 + }, + { + "epoch": 0.838270893371758, + "grad_norm": 1.6698743063833459, + "learning_rate": 1.343532021377994e-06, + "loss": 0.45937833189964294, + "step": 3636 + }, + { + "epoch": 0.8385014409221903, + "grad_norm": 1.3417409012531487, + "learning_rate": 1.3431740252644767e-06, + "loss": 0.5108849406242371, + "step": 3637 + }, + { + "epoch": 0.8387319884726224, + "grad_norm": 1.4510248789771545, + "learning_rate": 1.3428159792913435e-06, + "loss": 0.5477361679077148, + "step": 3638 + }, + { + "epoch": 0.8389625360230547, + "grad_norm": 1.2875265535134595, + "learning_rate": 1.3424578835106148e-06, + "loss": 0.5166784524917603, + "step": 3639 + }, + { + "epoch": 0.839193083573487, + "grad_norm": 1.452063908950837, + "learning_rate": 1.342099737974318e-06, + "loss": 0.5114049315452576, + "step": 3640 + }, + { + "epoch": 0.8394236311239193, + "grad_norm": 1.648045658357982, + "learning_rate": 1.3417415427344885e-06, + "loss": 0.4201454520225525, + "step": 3641 + }, + { + "epoch": 0.8396541786743515, + "grad_norm": 1.3260350906579368, + "learning_rate": 1.3413832978431676e-06, + "loss": 0.4956648349761963, + "step": 3642 + }, + { + "epoch": 0.8398847262247838, + "grad_norm": 1.3312034034289655, + "learning_rate": 1.3410250033524048e-06, + "loss": 0.378828227519989, + "step": 3643 + }, + { + "epoch": 0.8401152737752161, + "grad_norm": 1.4156130470610577, + "learning_rate": 1.3406666593142569e-06, + "loss": 0.493254691362381, + "step": 3644 + }, + { + "epoch": 0.8403458213256484, + "grad_norm": 1.4086565383527434, + "learning_rate": 1.340308265780787e-06, + "loss": 0.48233136534690857, + "step": 3645 + }, + { + "epoch": 0.8405763688760807, + "grad_norm": 1.3334208525664306, + "learning_rate": 1.3399498228040661e-06, + "loss": 0.45329928398132324, + "step": 3646 + }, + { + "epoch": 0.840806916426513, + "grad_norm": 1.510171100735499, + "learning_rate": 1.3395913304361728e-06, + "loss": 0.47325652837753296, + "step": 3647 + }, + { + "epoch": 0.8410374639769452, + "grad_norm": 1.4245815844886007, + "learning_rate": 1.3392327887291918e-06, + "loss": 0.5237877368927002, + "step": 3648 + }, + { + "epoch": 0.8412680115273775, + "grad_norm": 1.3392602042833703, + "learning_rate": 1.3388741977352156e-06, + "loss": 0.4137705862522125, + "step": 3649 + }, + { + "epoch": 0.8414985590778098, + "grad_norm": 1.4945784448020432, + "learning_rate": 1.3385155575063434e-06, + "loss": 0.5430322885513306, + "step": 3650 + }, + { + "epoch": 0.8417291066282421, + "grad_norm": 1.7615745807954628, + "learning_rate": 1.3381568680946824e-06, + "loss": 0.542243480682373, + "step": 3651 + }, + { + "epoch": 0.8419596541786744, + "grad_norm": 1.5271060313738907, + "learning_rate": 1.3377981295523464e-06, + "loss": 0.45017683506011963, + "step": 3652 + }, + { + "epoch": 0.8421902017291066, + "grad_norm": 1.4484659185032926, + "learning_rate": 1.3374393419314559e-06, + "loss": 0.5193800330162048, + "step": 3653 + }, + { + "epoch": 0.8424207492795389, + "grad_norm": 1.4240617315786623, + "learning_rate": 1.3370805052841393e-06, + "loss": 0.39487144351005554, + "step": 3654 + }, + { + "epoch": 0.8426512968299712, + "grad_norm": 1.7496918494705047, + "learning_rate": 1.3367216196625322e-06, + "loss": 0.5312062501907349, + "step": 3655 + }, + { + "epoch": 0.8428818443804035, + "grad_norm": 1.3680421123056206, + "learning_rate": 1.3363626851187763e-06, + "loss": 0.5203391313552856, + "step": 3656 + }, + { + "epoch": 0.8431123919308358, + "grad_norm": 1.3194431068725445, + "learning_rate": 1.3360037017050215e-06, + "loss": 0.5030896663665771, + "step": 3657 + }, + { + "epoch": 0.843342939481268, + "grad_norm": 1.2711152514275694, + "learning_rate": 1.3356446694734242e-06, + "loss": 0.4215394854545593, + "step": 3658 + }, + { + "epoch": 0.8435734870317003, + "grad_norm": 1.3571550082526849, + "learning_rate": 1.3352855884761481e-06, + "loss": 0.46229058504104614, + "step": 3659 + }, + { + "epoch": 0.8438040345821326, + "grad_norm": 1.3179757408232118, + "learning_rate": 1.334926458765364e-06, + "loss": 0.48473960161209106, + "step": 3660 + }, + { + "epoch": 0.8440345821325649, + "grad_norm": 1.4348121957624616, + "learning_rate": 1.3345672803932497e-06, + "loss": 0.48085319995880127, + "step": 3661 + }, + { + "epoch": 0.8442651296829972, + "grad_norm": 1.183681688837606, + "learning_rate": 1.3342080534119896e-06, + "loss": 0.4717422127723694, + "step": 3662 + }, + { + "epoch": 0.8444956772334294, + "grad_norm": 1.5024885852724694, + "learning_rate": 1.3338487778737762e-06, + "loss": 0.4296337962150574, + "step": 3663 + }, + { + "epoch": 0.8447262247838617, + "grad_norm": 1.2935172742719372, + "learning_rate": 1.3334894538308087e-06, + "loss": 0.48610907793045044, + "step": 3664 + }, + { + "epoch": 0.8449567723342939, + "grad_norm": 1.52285756397875, + "learning_rate": 1.3331300813352922e-06, + "loss": 0.5388171672821045, + "step": 3665 + }, + { + "epoch": 0.8451873198847262, + "grad_norm": 1.2878926477397548, + "learning_rate": 1.3327706604394403e-06, + "loss": 0.4291438162326813, + "step": 3666 + }, + { + "epoch": 0.8454178674351585, + "grad_norm": 1.2885114325801958, + "learning_rate": 1.3324111911954736e-06, + "loss": 0.5693022012710571, + "step": 3667 + }, + { + "epoch": 0.8456484149855907, + "grad_norm": 1.9611798926901898, + "learning_rate": 1.3320516736556188e-06, + "loss": 0.4639110565185547, + "step": 3668 + }, + { + "epoch": 0.845878962536023, + "grad_norm": 1.3579868594952884, + "learning_rate": 1.3316921078721102e-06, + "loss": 0.49151986837387085, + "step": 3669 + }, + { + "epoch": 0.8461095100864553, + "grad_norm": 1.2215058568931207, + "learning_rate": 1.3313324938971886e-06, + "loss": 0.45903199911117554, + "step": 3670 + }, + { + "epoch": 0.8463400576368876, + "grad_norm": 1.408166413442317, + "learning_rate": 1.3309728317831024e-06, + "loss": 0.5564982891082764, + "step": 3671 + }, + { + "epoch": 0.8465706051873199, + "grad_norm": 1.1499291953976989, + "learning_rate": 1.3306131215821067e-06, + "loss": 0.4355608820915222, + "step": 3672 + }, + { + "epoch": 0.8468011527377521, + "grad_norm": 1.3895488327495742, + "learning_rate": 1.330253363346464e-06, + "loss": 0.5584323406219482, + "step": 3673 + }, + { + "epoch": 0.8470317002881844, + "grad_norm": 1.377539936505586, + "learning_rate": 1.329893557128443e-06, + "loss": 0.5399061441421509, + "step": 3674 + }, + { + "epoch": 0.8472622478386167, + "grad_norm": 1.6120619266325988, + "learning_rate": 1.32953370298032e-06, + "loss": 0.45847171545028687, + "step": 3675 + }, + { + "epoch": 0.847492795389049, + "grad_norm": 1.3633635424650061, + "learning_rate": 1.329173800954378e-06, + "loss": 0.48243457078933716, + "step": 3676 + }, + { + "epoch": 0.8477233429394813, + "grad_norm": 1.513997205378081, + "learning_rate": 1.3288138511029071e-06, + "loss": 0.5331195592880249, + "step": 3677 + }, + { + "epoch": 0.8479538904899135, + "grad_norm": 1.4372930833672997, + "learning_rate": 1.3284538534782044e-06, + "loss": 0.5083351135253906, + "step": 3678 + }, + { + "epoch": 0.8481844380403458, + "grad_norm": 1.2133182192138496, + "learning_rate": 1.3280938081325732e-06, + "loss": 0.47454866766929626, + "step": 3679 + }, + { + "epoch": 0.8484149855907781, + "grad_norm": 1.234205887620048, + "learning_rate": 1.3277337151183252e-06, + "loss": 0.42275500297546387, + "step": 3680 + }, + { + "epoch": 0.8486455331412104, + "grad_norm": 1.3262104260117105, + "learning_rate": 1.3273735744877775e-06, + "loss": 0.531667947769165, + "step": 3681 + }, + { + "epoch": 0.8488760806916427, + "grad_norm": 1.2530945065747214, + "learning_rate": 1.3270133862932551e-06, + "loss": 0.537842333316803, + "step": 3682 + }, + { + "epoch": 0.849106628242075, + "grad_norm": 1.4271755209888484, + "learning_rate": 1.3266531505870896e-06, + "loss": 0.47730106115341187, + "step": 3683 + }, + { + "epoch": 0.8493371757925072, + "grad_norm": 1.3660515052317093, + "learning_rate": 1.326292867421619e-06, + "loss": 0.41942697763442993, + "step": 3684 + }, + { + "epoch": 0.8495677233429395, + "grad_norm": 1.4070770160971164, + "learning_rate": 1.3259325368491897e-06, + "loss": 0.5268753170967102, + "step": 3685 + }, + { + "epoch": 0.8497982708933718, + "grad_norm": 1.356452328307654, + "learning_rate": 1.325572158922153e-06, + "loss": 0.43105173110961914, + "step": 3686 + }, + { + "epoch": 0.8500288184438041, + "grad_norm": 1.2933233661813608, + "learning_rate": 1.3252117336928686e-06, + "loss": 0.5447876453399658, + "step": 3687 + }, + { + "epoch": 0.8502593659942363, + "grad_norm": 1.3613250875979575, + "learning_rate": 1.3248512612137023e-06, + "loss": 0.49573707580566406, + "step": 3688 + }, + { + "epoch": 0.8504899135446686, + "grad_norm": 1.4776299074252148, + "learning_rate": 1.3244907415370273e-06, + "loss": 0.46929413080215454, + "step": 3689 + }, + { + "epoch": 0.8507204610951009, + "grad_norm": 1.4515685025287064, + "learning_rate": 1.3241301747152233e-06, + "loss": 0.5122408866882324, + "step": 3690 + }, + { + "epoch": 0.8509510086455332, + "grad_norm": 1.1931030830480251, + "learning_rate": 1.3237695608006766e-06, + "loss": 0.4353037476539612, + "step": 3691 + }, + { + "epoch": 0.8511815561959655, + "grad_norm": 1.2542691426037724, + "learning_rate": 1.3234088998457807e-06, + "loss": 0.45018789172172546, + "step": 3692 + }, + { + "epoch": 0.8514121037463976, + "grad_norm": 1.3013512335769646, + "learning_rate": 1.3230481919029362e-06, + "loss": 0.49650731682777405, + "step": 3693 + }, + { + "epoch": 0.8516426512968299, + "grad_norm": 1.8498390352772125, + "learning_rate": 1.3226874370245497e-06, + "loss": 0.4956985116004944, + "step": 3694 + }, + { + "epoch": 0.8518731988472622, + "grad_norm": 1.422696360099423, + "learning_rate": 1.3223266352630355e-06, + "loss": 0.4598352015018463, + "step": 3695 + }, + { + "epoch": 0.8521037463976945, + "grad_norm": 1.4805802300223785, + "learning_rate": 1.3219657866708147e-06, + "loss": 0.43021154403686523, + "step": 3696 + }, + { + "epoch": 0.8523342939481268, + "grad_norm": 1.0649446070909838, + "learning_rate": 1.321604891300314e-06, + "loss": 0.4016476273536682, + "step": 3697 + }, + { + "epoch": 0.852564841498559, + "grad_norm": 1.424130195501248, + "learning_rate": 1.3212439492039687e-06, + "loss": 0.4343821406364441, + "step": 3698 + }, + { + "epoch": 0.8527953890489913, + "grad_norm": 1.4128523736821907, + "learning_rate": 1.3208829604342189e-06, + "loss": 0.5592546463012695, + "step": 3699 + }, + { + "epoch": 0.8530259365994236, + "grad_norm": 1.3131744964719476, + "learning_rate": 1.3205219250435133e-06, + "loss": 0.47344446182250977, + "step": 3700 + }, + { + "epoch": 0.8532564841498559, + "grad_norm": 1.4103812210291373, + "learning_rate": 1.3201608430843063e-06, + "loss": 0.5225629806518555, + "step": 3701 + }, + { + "epoch": 0.8534870317002882, + "grad_norm": 1.4128306987353971, + "learning_rate": 1.3197997146090593e-06, + "loss": 0.4450826644897461, + "step": 3702 + }, + { + "epoch": 0.8537175792507204, + "grad_norm": 1.348511793859822, + "learning_rate": 1.3194385396702406e-06, + "loss": 0.5104360580444336, + "step": 3703 + }, + { + "epoch": 0.8539481268011527, + "grad_norm": 1.2498427531651606, + "learning_rate": 1.319077318320325e-06, + "loss": 0.46101412177085876, + "step": 3704 + }, + { + "epoch": 0.854178674351585, + "grad_norm": 1.3605615250308858, + "learning_rate": 1.3187160506117947e-06, + "loss": 0.47033798694610596, + "step": 3705 + }, + { + "epoch": 0.8544092219020173, + "grad_norm": 1.4110469176729128, + "learning_rate": 1.3183547365971376e-06, + "loss": 0.3864026367664337, + "step": 3706 + }, + { + "epoch": 0.8546397694524496, + "grad_norm": 1.6313869920317292, + "learning_rate": 1.3179933763288487e-06, + "loss": 0.4850136339664459, + "step": 3707 + }, + { + "epoch": 0.8548703170028819, + "grad_norm": 1.2823453751359075, + "learning_rate": 1.3176319698594307e-06, + "loss": 0.5593098998069763, + "step": 3708 + }, + { + "epoch": 0.8551008645533141, + "grad_norm": 1.3952870507706057, + "learning_rate": 1.3172705172413916e-06, + "loss": 0.4883347749710083, + "step": 3709 + }, + { + "epoch": 0.8553314121037464, + "grad_norm": 1.446792702585452, + "learning_rate": 1.3169090185272466e-06, + "loss": 0.5428842306137085, + "step": 3710 + }, + { + "epoch": 0.8555619596541787, + "grad_norm": 1.304386174447863, + "learning_rate": 1.3165474737695184e-06, + "loss": 0.4476752281188965, + "step": 3711 + }, + { + "epoch": 0.855792507204611, + "grad_norm": 1.1763400531562, + "learning_rate": 1.3161858830207349e-06, + "loss": 0.47227632999420166, + "step": 3712 + }, + { + "epoch": 0.8560230547550433, + "grad_norm": 1.7211720189167297, + "learning_rate": 1.315824246333432e-06, + "loss": 0.4643186330795288, + "step": 3713 + }, + { + "epoch": 0.8562536023054755, + "grad_norm": 1.2821644108063668, + "learning_rate": 1.3154625637601515e-06, + "loss": 0.4912930130958557, + "step": 3714 + }, + { + "epoch": 0.8564841498559078, + "grad_norm": 1.1876618070425506, + "learning_rate": 1.3151008353534424e-06, + "loss": 0.4728042483329773, + "step": 3715 + }, + { + "epoch": 0.8567146974063401, + "grad_norm": 1.5209773177568184, + "learning_rate": 1.3147390611658592e-06, + "loss": 0.5035809874534607, + "step": 3716 + }, + { + "epoch": 0.8569452449567724, + "grad_norm": 1.4748136352873917, + "learning_rate": 1.314377241249965e-06, + "loss": 0.5977092981338501, + "step": 3717 + }, + { + "epoch": 0.8571757925072047, + "grad_norm": 1.4766041364380091, + "learning_rate": 1.3140153756583284e-06, + "loss": 0.46641305088996887, + "step": 3718 + }, + { + "epoch": 0.8574063400576369, + "grad_norm": 1.3233412264919238, + "learning_rate": 1.313653464443524e-06, + "loss": 0.4669331908226013, + "step": 3719 + }, + { + "epoch": 0.8576368876080691, + "grad_norm": 1.5334191310376337, + "learning_rate": 1.3132915076581336e-06, + "loss": 0.5101985335350037, + "step": 3720 + }, + { + "epoch": 0.8578674351585014, + "grad_norm": 1.873618677639978, + "learning_rate": 1.3129295053547469e-06, + "loss": 0.5713244080543518, + "step": 3721 + }, + { + "epoch": 0.8580979827089337, + "grad_norm": 1.5421267681706392, + "learning_rate": 1.3125674575859585e-06, + "loss": 0.5616867542266846, + "step": 3722 + }, + { + "epoch": 0.858328530259366, + "grad_norm": 1.3186271635095725, + "learning_rate": 1.3122053644043698e-06, + "loss": 0.39379388093948364, + "step": 3723 + }, + { + "epoch": 0.8585590778097982, + "grad_norm": 1.4015318528894827, + "learning_rate": 1.3118432258625894e-06, + "loss": 0.49586233496665955, + "step": 3724 + }, + { + "epoch": 0.8587896253602305, + "grad_norm": 1.719866212131585, + "learning_rate": 1.3114810420132323e-06, + "loss": 0.6034448742866516, + "step": 3725 + }, + { + "epoch": 0.8590201729106628, + "grad_norm": 1.1170956956255402, + "learning_rate": 1.3111188129089202e-06, + "loss": 0.502906322479248, + "step": 3726 + }, + { + "epoch": 0.8592507204610951, + "grad_norm": 1.2761243448032986, + "learning_rate": 1.310756538602281e-06, + "loss": 0.46334415674209595, + "step": 3727 + }, + { + "epoch": 0.8594812680115274, + "grad_norm": 1.2932841158550064, + "learning_rate": 1.3103942191459496e-06, + "loss": 0.4570457339286804, + "step": 3728 + }, + { + "epoch": 0.8597118155619596, + "grad_norm": 1.6385975818385836, + "learning_rate": 1.310031854592567e-06, + "loss": 0.4651646018028259, + "step": 3729 + }, + { + "epoch": 0.8599423631123919, + "grad_norm": 1.5487322132450227, + "learning_rate": 1.3096694449947812e-06, + "loss": 0.5101544260978699, + "step": 3730 + }, + { + "epoch": 0.8601729106628242, + "grad_norm": 1.4132801397133001, + "learning_rate": 1.3093069904052467e-06, + "loss": 0.5086382627487183, + "step": 3731 + }, + { + "epoch": 0.8604034582132565, + "grad_norm": 1.383321089409367, + "learning_rate": 1.3089444908766235e-06, + "loss": 0.4948277473449707, + "step": 3732 + }, + { + "epoch": 0.8606340057636888, + "grad_norm": 1.6523780148313876, + "learning_rate": 1.3085819464615802e-06, + "loss": 0.501600444316864, + "step": 3733 + }, + { + "epoch": 0.860864553314121, + "grad_norm": 1.2778182602877666, + "learning_rate": 1.3082193572127902e-06, + "loss": 0.42970049381256104, + "step": 3734 + }, + { + "epoch": 0.8610951008645533, + "grad_norm": 1.3310363616338574, + "learning_rate": 1.3078567231829337e-06, + "loss": 0.5060045719146729, + "step": 3735 + }, + { + "epoch": 0.8613256484149856, + "grad_norm": 1.3440313324926134, + "learning_rate": 1.307494044424698e-06, + "loss": 0.467104434967041, + "step": 3736 + }, + { + "epoch": 0.8615561959654179, + "grad_norm": 1.4120038847759988, + "learning_rate": 1.3071313209907766e-06, + "loss": 0.5574711561203003, + "step": 3737 + }, + { + "epoch": 0.8617867435158502, + "grad_norm": 1.2363227218724966, + "learning_rate": 1.3067685529338693e-06, + "loss": 0.46039044857025146, + "step": 3738 + }, + { + "epoch": 0.8620172910662824, + "grad_norm": 1.1503513795065752, + "learning_rate": 1.3064057403066822e-06, + "loss": 0.4340815544128418, + "step": 3739 + }, + { + "epoch": 0.8622478386167147, + "grad_norm": 1.5523166737624763, + "learning_rate": 1.3060428831619287e-06, + "loss": 0.43785548210144043, + "step": 3740 + }, + { + "epoch": 0.862478386167147, + "grad_norm": 1.5457298304008735, + "learning_rate": 1.305679981552328e-06, + "loss": 0.5572985410690308, + "step": 3741 + }, + { + "epoch": 0.8627089337175793, + "grad_norm": 1.3596059279946906, + "learning_rate": 1.3053170355306057e-06, + "loss": 0.5185844898223877, + "step": 3742 + }, + { + "epoch": 0.8629394812680116, + "grad_norm": 1.3503829945064458, + "learning_rate": 1.3049540451494942e-06, + "loss": 0.46550601720809937, + "step": 3743 + }, + { + "epoch": 0.8631700288184438, + "grad_norm": 1.4222091653360487, + "learning_rate": 1.3045910104617327e-06, + "loss": 0.5469560623168945, + "step": 3744 + }, + { + "epoch": 0.8634005763688761, + "grad_norm": 1.3405137838232928, + "learning_rate": 1.3042279315200657e-06, + "loss": 0.4938455820083618, + "step": 3745 + }, + { + "epoch": 0.8636311239193084, + "grad_norm": 1.1926594999161433, + "learning_rate": 1.303864808377245e-06, + "loss": 0.39218518137931824, + "step": 3746 + }, + { + "epoch": 0.8638616714697407, + "grad_norm": 1.2206192808907754, + "learning_rate": 1.3035016410860291e-06, + "loss": 0.5242647528648376, + "step": 3747 + }, + { + "epoch": 0.8640922190201729, + "grad_norm": 1.964856674282453, + "learning_rate": 1.3031384296991817e-06, + "loss": 0.5636630058288574, + "step": 3748 + }, + { + "epoch": 0.8643227665706051, + "grad_norm": 1.2276813313800654, + "learning_rate": 1.302775174269474e-06, + "loss": 0.423178493976593, + "step": 3749 + }, + { + "epoch": 0.8645533141210374, + "grad_norm": 1.3659960313257418, + "learning_rate": 1.3024118748496832e-06, + "loss": 0.5249595046043396, + "step": 3750 + }, + { + "epoch": 0.8647838616714697, + "grad_norm": 1.5492622232744115, + "learning_rate": 1.302048531492593e-06, + "loss": 0.46585753560066223, + "step": 3751 + }, + { + "epoch": 0.865014409221902, + "grad_norm": 1.1870998359636753, + "learning_rate": 1.301685144250993e-06, + "loss": 0.5256447792053223, + "step": 3752 + }, + { + "epoch": 0.8652449567723343, + "grad_norm": 1.407334330978888, + "learning_rate": 1.3013217131776806e-06, + "loss": 0.4594920575618744, + "step": 3753 + }, + { + "epoch": 0.8654755043227665, + "grad_norm": 1.5476606038444285, + "learning_rate": 1.3009582383254572e-06, + "loss": 0.47202157974243164, + "step": 3754 + }, + { + "epoch": 0.8657060518731988, + "grad_norm": 1.7434821206800877, + "learning_rate": 1.3005947197471327e-06, + "loss": 0.5966329574584961, + "step": 3755 + }, + { + "epoch": 0.8659365994236311, + "grad_norm": 1.366790612468713, + "learning_rate": 1.3002311574955226e-06, + "loss": 0.4795987904071808, + "step": 3756 + }, + { + "epoch": 0.8661671469740634, + "grad_norm": 1.5108288163436319, + "learning_rate": 1.2998675516234486e-06, + "loss": 0.47260379791259766, + "step": 3757 + }, + { + "epoch": 0.8663976945244957, + "grad_norm": 1.25511672370241, + "learning_rate": 1.2995039021837386e-06, + "loss": 0.4049740433692932, + "step": 3758 + }, + { + "epoch": 0.866628242074928, + "grad_norm": 1.5146922201872883, + "learning_rate": 1.2991402092292277e-06, + "loss": 0.45385992527008057, + "step": 3759 + }, + { + "epoch": 0.8668587896253602, + "grad_norm": 1.4053212622212365, + "learning_rate": 1.298776472812756e-06, + "loss": 0.5093865990638733, + "step": 3760 + }, + { + "epoch": 0.8670893371757925, + "grad_norm": 1.3453447275976842, + "learning_rate": 1.2984126929871705e-06, + "loss": 0.5042203664779663, + "step": 3761 + }, + { + "epoch": 0.8673198847262248, + "grad_norm": 1.388435149050926, + "learning_rate": 1.2980488698053257e-06, + "loss": 0.5201801061630249, + "step": 3762 + }, + { + "epoch": 0.8675504322766571, + "grad_norm": 1.6553043139643804, + "learning_rate": 1.2976850033200804e-06, + "loss": 0.43103480339050293, + "step": 3763 + }, + { + "epoch": 0.8677809798270893, + "grad_norm": 1.2908585366398646, + "learning_rate": 1.297321093584301e-06, + "loss": 0.49498647451400757, + "step": 3764 + }, + { + "epoch": 0.8680115273775216, + "grad_norm": 1.4922160261358184, + "learning_rate": 1.2969571406508593e-06, + "loss": 0.47413933277130127, + "step": 3765 + }, + { + "epoch": 0.8682420749279539, + "grad_norm": 1.1674121222476106, + "learning_rate": 1.2965931445726346e-06, + "loss": 0.4971849322319031, + "step": 3766 + }, + { + "epoch": 0.8684726224783862, + "grad_norm": 1.3900091493712237, + "learning_rate": 1.296229105402511e-06, + "loss": 0.5081756114959717, + "step": 3767 + }, + { + "epoch": 0.8687031700288185, + "grad_norm": 1.5541801599858096, + "learning_rate": 1.2958650231933806e-06, + "loss": 0.3967844247817993, + "step": 3768 + }, + { + "epoch": 0.8689337175792508, + "grad_norm": 1.6064453941430812, + "learning_rate": 1.2955008979981397e-06, + "loss": 0.49939191341400146, + "step": 3769 + }, + { + "epoch": 0.869164265129683, + "grad_norm": 1.1874512006657068, + "learning_rate": 1.2951367298696924e-06, + "loss": 0.4890215992927551, + "step": 3770 + }, + { + "epoch": 0.8693948126801153, + "grad_norm": 1.2271551559366067, + "learning_rate": 1.2947725188609486e-06, + "loss": 0.41297200322151184, + "step": 3771 + }, + { + "epoch": 0.8696253602305476, + "grad_norm": 1.353945692705735, + "learning_rate": 1.2944082650248245e-06, + "loss": 0.4626082181930542, + "step": 3772 + }, + { + "epoch": 0.8698559077809799, + "grad_norm": 1.6592151979939496, + "learning_rate": 1.2940439684142417e-06, + "loss": 0.39542341232299805, + "step": 3773 + }, + { + "epoch": 0.8700864553314122, + "grad_norm": 1.4526755191413092, + "learning_rate": 1.2936796290821293e-06, + "loss": 0.5241938829421997, + "step": 3774 + }, + { + "epoch": 0.8703170028818443, + "grad_norm": 1.2509273324342485, + "learning_rate": 1.2933152470814222e-06, + "loss": 0.4975101947784424, + "step": 3775 + }, + { + "epoch": 0.8705475504322766, + "grad_norm": 1.3336305279154814, + "learning_rate": 1.2929508224650608e-06, + "loss": 0.39954787492752075, + "step": 3776 + }, + { + "epoch": 0.8707780979827089, + "grad_norm": 1.2742484885253946, + "learning_rate": 1.292586355285992e-06, + "loss": 0.4336436986923218, + "step": 3777 + }, + { + "epoch": 0.8710086455331412, + "grad_norm": 1.4481272254111919, + "learning_rate": 1.2922218455971701e-06, + "loss": 0.5100188255310059, + "step": 3778 + }, + { + "epoch": 0.8712391930835734, + "grad_norm": 1.3266075653142237, + "learning_rate": 1.2918572934515537e-06, + "loss": 0.4073595106601715, + "step": 3779 + }, + { + "epoch": 0.8714697406340057, + "grad_norm": 1.4097190424033288, + "learning_rate": 1.2914926989021087e-06, + "loss": 0.5112053155899048, + "step": 3780 + }, + { + "epoch": 0.871700288184438, + "grad_norm": 1.3139163731344454, + "learning_rate": 1.2911280620018069e-06, + "loss": 0.4807807207107544, + "step": 3781 + }, + { + "epoch": 0.8719308357348703, + "grad_norm": 1.284068633811954, + "learning_rate": 1.2907633828036263e-06, + "loss": 0.4051937460899353, + "step": 3782 + }, + { + "epoch": 0.8721613832853026, + "grad_norm": 1.651292594956854, + "learning_rate": 1.2903986613605507e-06, + "loss": 0.5108781456947327, + "step": 3783 + }, + { + "epoch": 0.8723919308357349, + "grad_norm": 1.7675344941826705, + "learning_rate": 1.2900338977255707e-06, + "loss": 0.44521909952163696, + "step": 3784 + }, + { + "epoch": 0.8726224783861671, + "grad_norm": 1.5809881129891559, + "learning_rate": 1.2896690919516825e-06, + "loss": 0.6048822999000549, + "step": 3785 + }, + { + "epoch": 0.8728530259365994, + "grad_norm": 1.270946294003379, + "learning_rate": 1.2893042440918887e-06, + "loss": 0.4376435875892639, + "step": 3786 + }, + { + "epoch": 0.8730835734870317, + "grad_norm": 1.4781872894193138, + "learning_rate": 1.2889393541991975e-06, + "loss": 0.5266781449317932, + "step": 3787 + }, + { + "epoch": 0.873314121037464, + "grad_norm": 1.3112914328188243, + "learning_rate": 1.2885744223266244e-06, + "loss": 0.4608879089355469, + "step": 3788 + }, + { + "epoch": 0.8735446685878963, + "grad_norm": 1.3985767247612233, + "learning_rate": 1.2882094485271893e-06, + "loss": 0.6094374656677246, + "step": 3789 + }, + { + "epoch": 0.8737752161383285, + "grad_norm": 1.440566163110992, + "learning_rate": 1.2878444328539198e-06, + "loss": 0.48071056604385376, + "step": 3790 + }, + { + "epoch": 0.8740057636887608, + "grad_norm": 1.2344258963818233, + "learning_rate": 1.2874793753598486e-06, + "loss": 0.5486899614334106, + "step": 3791 + }, + { + "epoch": 0.8742363112391931, + "grad_norm": 1.3009848139904665, + "learning_rate": 1.2871142760980145e-06, + "loss": 0.4325149655342102, + "step": 3792 + }, + { + "epoch": 0.8744668587896254, + "grad_norm": 1.3339799042834972, + "learning_rate": 1.2867491351214628e-06, + "loss": 0.4406658113002777, + "step": 3793 + }, + { + "epoch": 0.8746974063400577, + "grad_norm": 1.5722454397734178, + "learning_rate": 1.2863839524832453e-06, + "loss": 0.4819502830505371, + "step": 3794 + }, + { + "epoch": 0.8749279538904899, + "grad_norm": 1.4646927994024876, + "learning_rate": 1.2860187282364183e-06, + "loss": 0.40368038415908813, + "step": 3795 + }, + { + "epoch": 0.8751585014409222, + "grad_norm": 1.5390691991987933, + "learning_rate": 1.2856534624340455e-06, + "loss": 0.5182099342346191, + "step": 3796 + }, + { + "epoch": 0.8753890489913545, + "grad_norm": 1.418305639642291, + "learning_rate": 1.2852881551291964e-06, + "loss": 0.5145357847213745, + "step": 3797 + }, + { + "epoch": 0.8756195965417868, + "grad_norm": 1.417053237022279, + "learning_rate": 1.2849228063749458e-06, + "loss": 0.47494733333587646, + "step": 3798 + }, + { + "epoch": 0.8758501440922191, + "grad_norm": 1.5460781370684025, + "learning_rate": 1.2845574162243757e-06, + "loss": 0.44350409507751465, + "step": 3799 + }, + { + "epoch": 0.8760806916426513, + "grad_norm": 1.333493282792236, + "learning_rate": 1.2841919847305732e-06, + "loss": 0.48289233446121216, + "step": 3800 + }, + { + "epoch": 0.8763112391930836, + "grad_norm": 1.508439142440734, + "learning_rate": 1.2838265119466316e-06, + "loss": 0.5384439826011658, + "step": 3801 + }, + { + "epoch": 0.8765417867435159, + "grad_norm": 1.710922327336333, + "learning_rate": 1.2834609979256503e-06, + "loss": 0.5271417498588562, + "step": 3802 + }, + { + "epoch": 0.8767723342939481, + "grad_norm": 1.4659820833456867, + "learning_rate": 1.283095442720735e-06, + "loss": 0.4707641005516052, + "step": 3803 + }, + { + "epoch": 0.8770028818443804, + "grad_norm": 1.614777501143066, + "learning_rate": 1.2827298463849969e-06, + "loss": 0.49682319164276123, + "step": 3804 + }, + { + "epoch": 0.8772334293948126, + "grad_norm": 1.7144253113224919, + "learning_rate": 1.2823642089715531e-06, + "loss": 0.5188574194908142, + "step": 3805 + }, + { + "epoch": 0.8774639769452449, + "grad_norm": 1.4545861368963406, + "learning_rate": 1.281998530533527e-06, + "loss": 0.47443845868110657, + "step": 3806 + }, + { + "epoch": 0.8776945244956772, + "grad_norm": 1.5019289075727473, + "learning_rate": 1.2816328111240485e-06, + "loss": 0.4482235908508301, + "step": 3807 + }, + { + "epoch": 0.8779250720461095, + "grad_norm": 1.3960327819894567, + "learning_rate": 1.2812670507962519e-06, + "loss": 0.46176886558532715, + "step": 3808 + }, + { + "epoch": 0.8781556195965418, + "grad_norm": 1.6027449038682064, + "learning_rate": 1.280901249603279e-06, + "loss": 0.6133224964141846, + "step": 3809 + }, + { + "epoch": 0.878386167146974, + "grad_norm": 1.5464017426728758, + "learning_rate": 1.2805354075982764e-06, + "loss": 0.5276920795440674, + "step": 3810 + }, + { + "epoch": 0.8786167146974063, + "grad_norm": 1.3703428637588941, + "learning_rate": 1.2801695248343976e-06, + "loss": 0.45439988374710083, + "step": 3811 + }, + { + "epoch": 0.8788472622478386, + "grad_norm": 1.7551101397428777, + "learning_rate": 1.2798036013648015e-06, + "loss": 0.581672191619873, + "step": 3812 + }, + { + "epoch": 0.8790778097982709, + "grad_norm": 1.4957389309962374, + "learning_rate": 1.279437637242653e-06, + "loss": 0.47344762086868286, + "step": 3813 + }, + { + "epoch": 0.8793083573487032, + "grad_norm": 1.1882502138510669, + "learning_rate": 1.2790716325211222e-06, + "loss": 0.4005385935306549, + "step": 3814 + }, + { + "epoch": 0.8795389048991354, + "grad_norm": 1.3488455900541656, + "learning_rate": 1.2787055872533865e-06, + "loss": 0.4977230429649353, + "step": 3815 + }, + { + "epoch": 0.8797694524495677, + "grad_norm": 1.26803029784315, + "learning_rate": 1.2783395014926286e-06, + "loss": 0.4099036455154419, + "step": 3816 + }, + { + "epoch": 0.88, + "grad_norm": 1.6253478470481617, + "learning_rate": 1.2779733752920366e-06, + "loss": 0.522419810295105, + "step": 3817 + }, + { + "epoch": 0.8802305475504323, + "grad_norm": 1.3877455165242094, + "learning_rate": 1.2776072087048044e-06, + "loss": 0.6116030812263489, + "step": 3818 + }, + { + "epoch": 0.8804610951008646, + "grad_norm": 1.4523883351381868, + "learning_rate": 1.2772410017841331e-06, + "loss": 0.4522816836833954, + "step": 3819 + }, + { + "epoch": 0.8806916426512968, + "grad_norm": 1.3925368174753927, + "learning_rate": 1.276874754583228e-06, + "loss": 0.5046182870864868, + "step": 3820 + }, + { + "epoch": 0.8809221902017291, + "grad_norm": 1.5514128300497483, + "learning_rate": 1.2765084671553017e-06, + "loss": 0.42840707302093506, + "step": 3821 + }, + { + "epoch": 0.8811527377521614, + "grad_norm": 1.38028870575887, + "learning_rate": 1.2761421395535714e-06, + "loss": 0.4718896448612213, + "step": 3822 + }, + { + "epoch": 0.8813832853025937, + "grad_norm": 1.418382492806823, + "learning_rate": 1.275775771831261e-06, + "loss": 0.42692285776138306, + "step": 3823 + }, + { + "epoch": 0.881613832853026, + "grad_norm": 1.227035211124785, + "learning_rate": 1.2754093640415997e-06, + "loss": 0.5146567821502686, + "step": 3824 + }, + { + "epoch": 0.8818443804034583, + "grad_norm": 1.3655704393095225, + "learning_rate": 1.2750429162378226e-06, + "loss": 0.519682765007019, + "step": 3825 + }, + { + "epoch": 0.8820749279538905, + "grad_norm": 1.3597639009077442, + "learning_rate": 1.2746764284731713e-06, + "loss": 0.4810214042663574, + "step": 3826 + }, + { + "epoch": 0.8823054755043228, + "grad_norm": 1.673464893922515, + "learning_rate": 1.2743099008008922e-06, + "loss": 0.4206662178039551, + "step": 3827 + }, + { + "epoch": 0.8825360230547551, + "grad_norm": 1.5098444961762751, + "learning_rate": 1.2739433332742379e-06, + "loss": 0.5288581252098083, + "step": 3828 + }, + { + "epoch": 0.8827665706051874, + "grad_norm": 1.275143856060944, + "learning_rate": 1.2735767259464676e-06, + "loss": 0.4625706672668457, + "step": 3829 + }, + { + "epoch": 0.8829971181556195, + "grad_norm": 1.4159230358123176, + "learning_rate": 1.2732100788708446e-06, + "loss": 0.5021357536315918, + "step": 3830 + }, + { + "epoch": 0.8832276657060518, + "grad_norm": 1.883786032569818, + "learning_rate": 1.2728433921006391e-06, + "loss": 0.5525540113449097, + "step": 3831 + }, + { + "epoch": 0.8834582132564841, + "grad_norm": 1.4405227933488816, + "learning_rate": 1.2724766656891276e-06, + "loss": 0.5107265710830688, + "step": 3832 + }, + { + "epoch": 0.8836887608069164, + "grad_norm": 1.186656316362735, + "learning_rate": 1.272109899689591e-06, + "loss": 0.42621174454689026, + "step": 3833 + }, + { + "epoch": 0.8839193083573487, + "grad_norm": 1.535378140499116, + "learning_rate": 1.2717430941553163e-06, + "loss": 0.48517292737960815, + "step": 3834 + }, + { + "epoch": 0.884149855907781, + "grad_norm": 1.3570442439657298, + "learning_rate": 1.2713762491395971e-06, + "loss": 0.4864816665649414, + "step": 3835 + }, + { + "epoch": 0.8843804034582132, + "grad_norm": 1.4310044115767937, + "learning_rate": 1.271009364695732e-06, + "loss": 0.490509957075119, + "step": 3836 + }, + { + "epoch": 0.8846109510086455, + "grad_norm": 1.403739279698503, + "learning_rate": 1.2706424408770255e-06, + "loss": 0.5011172294616699, + "step": 3837 + }, + { + "epoch": 0.8848414985590778, + "grad_norm": 1.3811542981316953, + "learning_rate": 1.2702754777367876e-06, + "loss": 0.4880932569503784, + "step": 3838 + }, + { + "epoch": 0.8850720461095101, + "grad_norm": 1.3425275690337104, + "learning_rate": 1.2699084753283344e-06, + "loss": 0.46879449486732483, + "step": 3839 + }, + { + "epoch": 0.8853025936599423, + "grad_norm": 1.2274518723165866, + "learning_rate": 1.2695414337049878e-06, + "loss": 0.4026890993118286, + "step": 3840 + }, + { + "epoch": 0.8855331412103746, + "grad_norm": 1.4213648008939022, + "learning_rate": 1.2691743529200747e-06, + "loss": 0.41836002469062805, + "step": 3841 + }, + { + "epoch": 0.8857636887608069, + "grad_norm": 1.2462460722293722, + "learning_rate": 1.2688072330269281e-06, + "loss": 0.45401957631111145, + "step": 3842 + }, + { + "epoch": 0.8859942363112392, + "grad_norm": 1.5928757723481226, + "learning_rate": 1.2684400740788872e-06, + "loss": 0.5391957759857178, + "step": 3843 + }, + { + "epoch": 0.8862247838616715, + "grad_norm": 1.4989767229376667, + "learning_rate": 1.268072876129296e-06, + "loss": 0.4797601103782654, + "step": 3844 + }, + { + "epoch": 0.8864553314121038, + "grad_norm": 1.936441003907435, + "learning_rate": 1.2677056392315049e-06, + "loss": 0.6728458404541016, + "step": 3845 + }, + { + "epoch": 0.886685878962536, + "grad_norm": 1.5685808545128517, + "learning_rate": 1.2673383634388686e-06, + "loss": 0.4655725657939911, + "step": 3846 + }, + { + "epoch": 0.8869164265129683, + "grad_norm": 1.5246140905217858, + "learning_rate": 1.2669710488047494e-06, + "loss": 0.5772045850753784, + "step": 3847 + }, + { + "epoch": 0.8871469740634006, + "grad_norm": 1.4036690205681275, + "learning_rate": 1.2666036953825146e-06, + "loss": 0.46486788988113403, + "step": 3848 + }, + { + "epoch": 0.8873775216138329, + "grad_norm": 1.4488820168717385, + "learning_rate": 1.2662363032255356e-06, + "loss": 0.5417192578315735, + "step": 3849 + }, + { + "epoch": 0.8876080691642652, + "grad_norm": 1.5289545467931411, + "learning_rate": 1.2658688723871917e-06, + "loss": 0.4081997871398926, + "step": 3850 + }, + { + "epoch": 0.8878386167146974, + "grad_norm": 1.4923350478825586, + "learning_rate": 1.2655014029208665e-06, + "loss": 0.48307013511657715, + "step": 3851 + }, + { + "epoch": 0.8880691642651297, + "grad_norm": 1.3266351841026254, + "learning_rate": 1.265133894879949e-06, + "loss": 0.4746254086494446, + "step": 3852 + }, + { + "epoch": 0.888299711815562, + "grad_norm": 1.2468994625849585, + "learning_rate": 1.264766348317835e-06, + "loss": 0.4846293032169342, + "step": 3853 + }, + { + "epoch": 0.8885302593659943, + "grad_norm": 1.7122070966503224, + "learning_rate": 1.2643987632879247e-06, + "loss": 0.520226776599884, + "step": 3854 + }, + { + "epoch": 0.8887608069164266, + "grad_norm": 1.7159698618881642, + "learning_rate": 1.2640311398436248e-06, + "loss": 0.5007052421569824, + "step": 3855 + }, + { + "epoch": 0.8889913544668588, + "grad_norm": 1.3347258185539248, + "learning_rate": 1.2636634780383468e-06, + "loss": 0.48406559228897095, + "step": 3856 + }, + { + "epoch": 0.8892219020172911, + "grad_norm": 1.490180533752101, + "learning_rate": 1.2632957779255085e-06, + "loss": 0.5644323229789734, + "step": 3857 + }, + { + "epoch": 0.8894524495677233, + "grad_norm": 1.3614741031097277, + "learning_rate": 1.2629280395585327e-06, + "loss": 0.44816267490386963, + "step": 3858 + }, + { + "epoch": 0.8896829971181556, + "grad_norm": 1.4856304945777632, + "learning_rate": 1.2625602629908473e-06, + "loss": 0.36427319049835205, + "step": 3859 + }, + { + "epoch": 0.8899135446685879, + "grad_norm": 1.370413175613297, + "learning_rate": 1.2621924482758876e-06, + "loss": 0.45405641198158264, + "step": 3860 + }, + { + "epoch": 0.8901440922190201, + "grad_norm": 1.294524655309081, + "learning_rate": 1.2618245954670928e-06, + "loss": 0.5117720365524292, + "step": 3861 + }, + { + "epoch": 0.8903746397694524, + "grad_norm": 1.4254811243009609, + "learning_rate": 1.261456704617908e-06, + "loss": 0.45705491304397583, + "step": 3862 + }, + { + "epoch": 0.8906051873198847, + "grad_norm": 1.470712717016214, + "learning_rate": 1.2610887757817832e-06, + "loss": 0.5323429703712463, + "step": 3863 + }, + { + "epoch": 0.890835734870317, + "grad_norm": 1.5650874097714547, + "learning_rate": 1.2607208090121762e-06, + "loss": 0.4185718297958374, + "step": 3864 + }, + { + "epoch": 0.8910662824207493, + "grad_norm": 1.4040457955050967, + "learning_rate": 1.2603528043625474e-06, + "loss": 0.5212376117706299, + "step": 3865 + }, + { + "epoch": 0.8912968299711815, + "grad_norm": 1.4617247018385677, + "learning_rate": 1.2599847618863646e-06, + "loss": 0.4622758626937866, + "step": 3866 + }, + { + "epoch": 0.8915273775216138, + "grad_norm": 1.4316141337585846, + "learning_rate": 1.2596166816371003e-06, + "loss": 0.5874351859092712, + "step": 3867 + }, + { + "epoch": 0.8917579250720461, + "grad_norm": 1.362612891341034, + "learning_rate": 1.2592485636682332e-06, + "loss": 0.44203174114227295, + "step": 3868 + }, + { + "epoch": 0.8919884726224784, + "grad_norm": 1.4820160895628598, + "learning_rate": 1.2588804080332467e-06, + "loss": 0.47004449367523193, + "step": 3869 + }, + { + "epoch": 0.8922190201729107, + "grad_norm": 1.259347293997827, + "learning_rate": 1.25851221478563e-06, + "loss": 0.45785069465637207, + "step": 3870 + }, + { + "epoch": 0.8924495677233429, + "grad_norm": 1.5013835331703855, + "learning_rate": 1.2581439839788775e-06, + "loss": 0.5324156284332275, + "step": 3871 + }, + { + "epoch": 0.8926801152737752, + "grad_norm": 1.2878744861876057, + "learning_rate": 1.2577757156664897e-06, + "loss": 0.4123559296131134, + "step": 3872 + }, + { + "epoch": 0.8929106628242075, + "grad_norm": 1.4431943954654778, + "learning_rate": 1.257407409901972e-06, + "loss": 0.5121662616729736, + "step": 3873 + }, + { + "epoch": 0.8931412103746398, + "grad_norm": 1.1985178944416017, + "learning_rate": 1.2570390667388353e-06, + "loss": 0.432760089635849, + "step": 3874 + }, + { + "epoch": 0.8933717579250721, + "grad_norm": 1.8769468836868954, + "learning_rate": 1.256670686230596e-06, + "loss": 0.4611935019493103, + "step": 3875 + }, + { + "epoch": 0.8936023054755043, + "grad_norm": 1.2284902202201224, + "learning_rate": 1.2563022684307765e-06, + "loss": 0.39416176080703735, + "step": 3876 + }, + { + "epoch": 0.8938328530259366, + "grad_norm": 1.6072394074056906, + "learning_rate": 1.2559338133929033e-06, + "loss": 0.55222088098526, + "step": 3877 + }, + { + "epoch": 0.8940634005763689, + "grad_norm": 1.4877009433690547, + "learning_rate": 1.2555653211705098e-06, + "loss": 0.4382261037826538, + "step": 3878 + }, + { + "epoch": 0.8942939481268012, + "grad_norm": 1.5531857609954407, + "learning_rate": 1.2551967918171333e-06, + "loss": 0.5172265768051147, + "step": 3879 + }, + { + "epoch": 0.8945244956772335, + "grad_norm": 1.471596820954927, + "learning_rate": 1.254828225386318e-06, + "loss": 0.4504626989364624, + "step": 3880 + }, + { + "epoch": 0.8947550432276657, + "grad_norm": 1.2909793693386424, + "learning_rate": 1.2544596219316123e-06, + "loss": 0.4999982714653015, + "step": 3881 + }, + { + "epoch": 0.894985590778098, + "grad_norm": 1.4045585194974466, + "learning_rate": 1.2540909815065708e-06, + "loss": 0.45146411657333374, + "step": 3882 + }, + { + "epoch": 0.8952161383285303, + "grad_norm": 1.435519527817537, + "learning_rate": 1.2537223041647528e-06, + "loss": 0.4375761151313782, + "step": 3883 + }, + { + "epoch": 0.8954466858789626, + "grad_norm": 1.3610683543202822, + "learning_rate": 1.2533535899597233e-06, + "loss": 0.46330153942108154, + "step": 3884 + }, + { + "epoch": 0.8956772334293948, + "grad_norm": 1.3915852786240441, + "learning_rate": 1.252984838945053e-06, + "loss": 0.5131307244300842, + "step": 3885 + }, + { + "epoch": 0.895907780979827, + "grad_norm": 1.4118750717373771, + "learning_rate": 1.2526160511743177e-06, + "loss": 0.47963929176330566, + "step": 3886 + }, + { + "epoch": 0.8961383285302593, + "grad_norm": 1.4162114734869578, + "learning_rate": 1.2522472267010973e-06, + "loss": 0.5846255421638489, + "step": 3887 + }, + { + "epoch": 0.8963688760806916, + "grad_norm": 1.2092365440421156, + "learning_rate": 1.2518783655789792e-06, + "loss": 0.4484374523162842, + "step": 3888 + }, + { + "epoch": 0.8965994236311239, + "grad_norm": 1.4172661156409336, + "learning_rate": 1.2515094678615553e-06, + "loss": 0.4247318506240845, + "step": 3889 + }, + { + "epoch": 0.8968299711815562, + "grad_norm": 1.3343249187068733, + "learning_rate": 1.2511405336024216e-06, + "loss": 0.42753252387046814, + "step": 3890 + }, + { + "epoch": 0.8970605187319884, + "grad_norm": 1.45236289391602, + "learning_rate": 1.250771562855181e-06, + "loss": 0.5063097476959229, + "step": 3891 + }, + { + "epoch": 0.8972910662824207, + "grad_norm": 1.369985588812653, + "learning_rate": 1.2504025556734411e-06, + "loss": 0.45213985443115234, + "step": 3892 + }, + { + "epoch": 0.897521613832853, + "grad_norm": 1.4094391689341048, + "learning_rate": 1.250033512110815e-06, + "loss": 0.47396397590637207, + "step": 3893 + }, + { + "epoch": 0.8977521613832853, + "grad_norm": 1.2781299729891589, + "learning_rate": 1.2496644322209202e-06, + "loss": 0.4215230941772461, + "step": 3894 + }, + { + "epoch": 0.8979827089337176, + "grad_norm": 1.542998332559841, + "learning_rate": 1.249295316057381e-06, + "loss": 0.4587506651878357, + "step": 3895 + }, + { + "epoch": 0.8982132564841498, + "grad_norm": 1.3617158708985202, + "learning_rate": 1.2489261636738255e-06, + "loss": 0.5030006766319275, + "step": 3896 + }, + { + "epoch": 0.8984438040345821, + "grad_norm": 1.3744693750148733, + "learning_rate": 1.248556975123888e-06, + "loss": 0.4707000255584717, + "step": 3897 + }, + { + "epoch": 0.8986743515850144, + "grad_norm": 1.3687086395939398, + "learning_rate": 1.2481877504612075e-06, + "loss": 0.5365906953811646, + "step": 3898 + }, + { + "epoch": 0.8989048991354467, + "grad_norm": 1.3698885636510183, + "learning_rate": 1.2478184897394293e-06, + "loss": 0.5381914377212524, + "step": 3899 + }, + { + "epoch": 0.899135446685879, + "grad_norm": 1.3132088828358914, + "learning_rate": 1.2474491930122017e-06, + "loss": 0.4934455156326294, + "step": 3900 + }, + { + "epoch": 0.8993659942363113, + "grad_norm": 1.330529717680055, + "learning_rate": 1.2470798603331811e-06, + "loss": 0.44426971673965454, + "step": 3901 + }, + { + "epoch": 0.8995965417867435, + "grad_norm": 1.8380237109293664, + "learning_rate": 1.2467104917560272e-06, + "loss": 0.5359126329421997, + "step": 3902 + }, + { + "epoch": 0.8998270893371758, + "grad_norm": 1.5365113795650323, + "learning_rate": 1.2463410873344051e-06, + "loss": 0.5049535632133484, + "step": 3903 + }, + { + "epoch": 0.9000576368876081, + "grad_norm": 1.228286736353076, + "learning_rate": 1.2459716471219854e-06, + "loss": 0.47197240591049194, + "step": 3904 + }, + { + "epoch": 0.9002881844380404, + "grad_norm": 1.347891577851575, + "learning_rate": 1.2456021711724444e-06, + "loss": 0.5094764828681946, + "step": 3905 + }, + { + "epoch": 0.9005187319884727, + "grad_norm": 1.614513274766241, + "learning_rate": 1.2452326595394632e-06, + "loss": 0.48482170701026917, + "step": 3906 + }, + { + "epoch": 0.9007492795389049, + "grad_norm": 1.338580704201997, + "learning_rate": 1.2448631122767273e-06, + "loss": 0.4780023694038391, + "step": 3907 + }, + { + "epoch": 0.9009798270893372, + "grad_norm": 1.3763662563145533, + "learning_rate": 1.2444935294379284e-06, + "loss": 0.45455271005630493, + "step": 3908 + }, + { + "epoch": 0.9012103746397695, + "grad_norm": 1.6202257206479407, + "learning_rate": 1.244123911076763e-06, + "loss": 0.5535339117050171, + "step": 3909 + }, + { + "epoch": 0.9014409221902018, + "grad_norm": 1.6798210240312226, + "learning_rate": 1.2437542572469332e-06, + "loss": 0.49363064765930176, + "step": 3910 + }, + { + "epoch": 0.9016714697406341, + "grad_norm": 1.247430657266991, + "learning_rate": 1.2433845680021455e-06, + "loss": 0.4634913206100464, + "step": 3911 + }, + { + "epoch": 0.9019020172910662, + "grad_norm": 1.6355430895028145, + "learning_rate": 1.243014843396112e-06, + "loss": 0.5658323764801025, + "step": 3912 + }, + { + "epoch": 0.9021325648414985, + "grad_norm": 1.483540600317653, + "learning_rate": 1.2426450834825497e-06, + "loss": 0.3981133699417114, + "step": 3913 + }, + { + "epoch": 0.9023631123919308, + "grad_norm": 1.3171638139619342, + "learning_rate": 1.2422752883151808e-06, + "loss": 0.4588771462440491, + "step": 3914 + }, + { + "epoch": 0.9025936599423631, + "grad_norm": 1.462458369282688, + "learning_rate": 1.2419054579477332e-06, + "loss": 0.4534187912940979, + "step": 3915 + }, + { + "epoch": 0.9028242074927953, + "grad_norm": 1.323212121221774, + "learning_rate": 1.2415355924339386e-06, + "loss": 0.505224347114563, + "step": 3916 + }, + { + "epoch": 0.9030547550432276, + "grad_norm": 1.631385539397761, + "learning_rate": 1.2411656918275353e-06, + "loss": 0.5698803663253784, + "step": 3917 + }, + { + "epoch": 0.9032853025936599, + "grad_norm": 1.344522271913295, + "learning_rate": 1.240795756182266e-06, + "loss": 0.5096467137336731, + "step": 3918 + }, + { + "epoch": 0.9035158501440922, + "grad_norm": 1.2092359473357193, + "learning_rate": 1.240425785551878e-06, + "loss": 0.4160349667072296, + "step": 3919 + }, + { + "epoch": 0.9037463976945245, + "grad_norm": 1.514137735832078, + "learning_rate": 1.2400557799901243e-06, + "loss": 0.5298006534576416, + "step": 3920 + }, + { + "epoch": 0.9039769452449568, + "grad_norm": 1.5005579252676167, + "learning_rate": 1.2396857395507635e-06, + "loss": 0.5257192850112915, + "step": 3921 + }, + { + "epoch": 0.904207492795389, + "grad_norm": 1.3593778581503964, + "learning_rate": 1.2393156642875577e-06, + "loss": 0.4265173673629761, + "step": 3922 + }, + { + "epoch": 0.9044380403458213, + "grad_norm": 1.5580046287622509, + "learning_rate": 1.2389455542542757e-06, + "loss": 0.5779361724853516, + "step": 3923 + }, + { + "epoch": 0.9046685878962536, + "grad_norm": 1.355408621755586, + "learning_rate": 1.2385754095046903e-06, + "loss": 0.5402140617370605, + "step": 3924 + }, + { + "epoch": 0.9048991354466859, + "grad_norm": 1.6556822355333962, + "learning_rate": 1.2382052300925796e-06, + "loss": 0.5139520764350891, + "step": 3925 + }, + { + "epoch": 0.9051296829971182, + "grad_norm": 1.4744237883021032, + "learning_rate": 1.237835016071727e-06, + "loss": 0.5268999338150024, + "step": 3926 + }, + { + "epoch": 0.9053602305475504, + "grad_norm": 1.1940680219551778, + "learning_rate": 1.237464767495921e-06, + "loss": 0.4186581075191498, + "step": 3927 + }, + { + "epoch": 0.9055907780979827, + "grad_norm": 1.5404784760334345, + "learning_rate": 1.2370944844189542e-06, + "loss": 0.42975491285324097, + "step": 3928 + }, + { + "epoch": 0.905821325648415, + "grad_norm": 1.5264713593301018, + "learning_rate": 1.2367241668946256e-06, + "loss": 0.5356771945953369, + "step": 3929 + }, + { + "epoch": 0.9060518731988473, + "grad_norm": 1.4340272166135701, + "learning_rate": 1.2363538149767381e-06, + "loss": 0.4588644504547119, + "step": 3930 + }, + { + "epoch": 0.9062824207492796, + "grad_norm": 1.3559415891530742, + "learning_rate": 1.2359834287190998e-06, + "loss": 0.4316065013408661, + "step": 3931 + }, + { + "epoch": 0.9065129682997118, + "grad_norm": 1.8926645973507907, + "learning_rate": 1.2356130081755241e-06, + "loss": 0.5879726409912109, + "step": 3932 + }, + { + "epoch": 0.9067435158501441, + "grad_norm": 1.3952030980221555, + "learning_rate": 1.2352425533998298e-06, + "loss": 0.5054802894592285, + "step": 3933 + }, + { + "epoch": 0.9069740634005764, + "grad_norm": 1.6013693344378155, + "learning_rate": 1.2348720644458395e-06, + "loss": 0.47121232748031616, + "step": 3934 + }, + { + "epoch": 0.9072046109510087, + "grad_norm": 1.3838461965950462, + "learning_rate": 1.2345015413673815e-06, + "loss": 0.45739877223968506, + "step": 3935 + }, + { + "epoch": 0.907435158501441, + "grad_norm": 1.5155985877456657, + "learning_rate": 1.2341309842182888e-06, + "loss": 0.5117859840393066, + "step": 3936 + }, + { + "epoch": 0.9076657060518732, + "grad_norm": 1.5290945554256112, + "learning_rate": 1.2337603930524e-06, + "loss": 0.5041125416755676, + "step": 3937 + }, + { + "epoch": 0.9078962536023055, + "grad_norm": 1.3831423035498782, + "learning_rate": 1.2333897679235577e-06, + "loss": 0.44464683532714844, + "step": 3938 + }, + { + "epoch": 0.9081268011527378, + "grad_norm": 1.8217468846182516, + "learning_rate": 1.23301910888561e-06, + "loss": 0.5294856429100037, + "step": 3939 + }, + { + "epoch": 0.90835734870317, + "grad_norm": 1.6866871720820709, + "learning_rate": 1.2326484159924099e-06, + "loss": 0.5219826102256775, + "step": 3940 + }, + { + "epoch": 0.9085878962536023, + "grad_norm": 1.3129059510531136, + "learning_rate": 1.232277689297815e-06, + "loss": 0.47138598561286926, + "step": 3941 + }, + { + "epoch": 0.9088184438040345, + "grad_norm": 1.5478253692531152, + "learning_rate": 1.2319069288556885e-06, + "loss": 0.49205005168914795, + "step": 3942 + }, + { + "epoch": 0.9090489913544668, + "grad_norm": 1.2297274416962674, + "learning_rate": 1.2315361347198975e-06, + "loss": 0.49453967809677124, + "step": 3943 + }, + { + "epoch": 0.9092795389048991, + "grad_norm": 1.668590073547552, + "learning_rate": 1.231165306944315e-06, + "loss": 0.4912596344947815, + "step": 3944 + }, + { + "epoch": 0.9095100864553314, + "grad_norm": 1.5738731090807616, + "learning_rate": 1.2307944455828175e-06, + "loss": 0.5746258497238159, + "step": 3945 + }, + { + "epoch": 0.9097406340057637, + "grad_norm": 1.2718340719170307, + "learning_rate": 1.2304235506892887e-06, + "loss": 0.45906275510787964, + "step": 3946 + }, + { + "epoch": 0.9099711815561959, + "grad_norm": 1.3569754433422505, + "learning_rate": 1.2300526223176149e-06, + "loss": 0.5177116394042969, + "step": 3947 + }, + { + "epoch": 0.9102017291066282, + "grad_norm": 1.2901403946054024, + "learning_rate": 1.2296816605216883e-06, + "loss": 0.45164844393730164, + "step": 3948 + }, + { + "epoch": 0.9104322766570605, + "grad_norm": 1.5561420901310485, + "learning_rate": 1.2293106653554054e-06, + "loss": 0.5441509485244751, + "step": 3949 + }, + { + "epoch": 0.9106628242074928, + "grad_norm": 1.6569407840335213, + "learning_rate": 1.2289396368726684e-06, + "loss": 0.4755045771598816, + "step": 3950 + }, + { + "epoch": 0.9108933717579251, + "grad_norm": 1.3697502981572085, + "learning_rate": 1.228568575127384e-06, + "loss": 0.5330208539962769, + "step": 3951 + }, + { + "epoch": 0.9111239193083573, + "grad_norm": 1.4523221492491385, + "learning_rate": 1.2281974801734632e-06, + "loss": 0.5201523303985596, + "step": 3952 + }, + { + "epoch": 0.9113544668587896, + "grad_norm": 1.498143694444682, + "learning_rate": 1.2278263520648224e-06, + "loss": 0.5279841423034668, + "step": 3953 + }, + { + "epoch": 0.9115850144092219, + "grad_norm": 1.5841756767163864, + "learning_rate": 1.2274551908553828e-06, + "loss": 0.4712948203086853, + "step": 3954 + }, + { + "epoch": 0.9118155619596542, + "grad_norm": 1.432279815006193, + "learning_rate": 1.2270839965990698e-06, + "loss": 0.513918399810791, + "step": 3955 + }, + { + "epoch": 0.9120461095100865, + "grad_norm": 1.2571574370365426, + "learning_rate": 1.226712769349815e-06, + "loss": 0.3688378930091858, + "step": 3956 + }, + { + "epoch": 0.9122766570605187, + "grad_norm": 1.46428205883802, + "learning_rate": 1.2263415091615528e-06, + "loss": 0.5429365634918213, + "step": 3957 + }, + { + "epoch": 0.912507204610951, + "grad_norm": 1.3697810351726807, + "learning_rate": 1.225970216088224e-06, + "loss": 0.5164231061935425, + "step": 3958 + }, + { + "epoch": 0.9127377521613833, + "grad_norm": 1.5119297834458385, + "learning_rate": 1.2255988901837737e-06, + "loss": 0.48215287923812866, + "step": 3959 + }, + { + "epoch": 0.9129682997118156, + "grad_norm": 1.3695567208291244, + "learning_rate": 1.2252275315021514e-06, + "loss": 0.47684353590011597, + "step": 3960 + }, + { + "epoch": 0.9131988472622479, + "grad_norm": 1.2862730862475011, + "learning_rate": 1.2248561400973114e-06, + "loss": 0.498749315738678, + "step": 3961 + }, + { + "epoch": 0.9134293948126802, + "grad_norm": 1.532172345538078, + "learning_rate": 1.224484716023214e-06, + "loss": 0.6198222637176514, + "step": 3962 + }, + { + "epoch": 0.9136599423631124, + "grad_norm": 1.2104233560563726, + "learning_rate": 1.2241132593338224e-06, + "loss": 0.45880454778671265, + "step": 3963 + }, + { + "epoch": 0.9138904899135447, + "grad_norm": 1.1252676206788053, + "learning_rate": 1.2237417700831055e-06, + "loss": 0.5064502954483032, + "step": 3964 + }, + { + "epoch": 0.914121037463977, + "grad_norm": 1.3166703099327564, + "learning_rate": 1.223370248325037e-06, + "loss": 0.3796413540840149, + "step": 3965 + }, + { + "epoch": 0.9143515850144093, + "grad_norm": 1.824971760714775, + "learning_rate": 1.222998694113595e-06, + "loss": 0.520294725894928, + "step": 3966 + }, + { + "epoch": 0.9145821325648414, + "grad_norm": 1.2643227228990135, + "learning_rate": 1.2226271075027627e-06, + "loss": 0.4708889424800873, + "step": 3967 + }, + { + "epoch": 0.9148126801152737, + "grad_norm": 1.4788536918637403, + "learning_rate": 1.2222554885465277e-06, + "loss": 0.4617408215999603, + "step": 3968 + }, + { + "epoch": 0.915043227665706, + "grad_norm": 1.6953855273421374, + "learning_rate": 1.2218838372988822e-06, + "loss": 0.47521114349365234, + "step": 3969 + }, + { + "epoch": 0.9152737752161383, + "grad_norm": 1.3617165473153408, + "learning_rate": 1.2215121538138233e-06, + "loss": 0.47553038597106934, + "step": 3970 + }, + { + "epoch": 0.9155043227665706, + "grad_norm": 1.371230823038315, + "learning_rate": 1.2211404381453527e-06, + "loss": 0.4502425789833069, + "step": 3971 + }, + { + "epoch": 0.9157348703170028, + "grad_norm": 1.3152534809096919, + "learning_rate": 1.2207686903474774e-06, + "loss": 0.4478566646575928, + "step": 3972 + }, + { + "epoch": 0.9159654178674351, + "grad_norm": 1.2964432112303936, + "learning_rate": 1.2203969104742074e-06, + "loss": 0.48119837045669556, + "step": 3973 + }, + { + "epoch": 0.9161959654178674, + "grad_norm": 1.3052580206449855, + "learning_rate": 1.2200250985795592e-06, + "loss": 0.4442863166332245, + "step": 3974 + }, + { + "epoch": 0.9164265129682997, + "grad_norm": 1.4544787661984238, + "learning_rate": 1.2196532547175534e-06, + "loss": 0.4326072037220001, + "step": 3975 + }, + { + "epoch": 0.916657060518732, + "grad_norm": 1.3112341607173732, + "learning_rate": 1.2192813789422146e-06, + "loss": 0.4617190361022949, + "step": 3976 + }, + { + "epoch": 0.9168876080691643, + "grad_norm": 1.4746796735453866, + "learning_rate": 1.218909471307572e-06, + "loss": 0.5099925994873047, + "step": 3977 + }, + { + "epoch": 0.9171181556195965, + "grad_norm": 1.3055542454695868, + "learning_rate": 1.2185375318676611e-06, + "loss": 0.4432462453842163, + "step": 3978 + }, + { + "epoch": 0.9173487031700288, + "grad_norm": 1.5060839944143631, + "learning_rate": 1.21816556067652e-06, + "loss": 0.46479105949401855, + "step": 3979 + }, + { + "epoch": 0.9175792507204611, + "grad_norm": 1.6825755102995348, + "learning_rate": 1.2177935577881926e-06, + "loss": 0.5027565360069275, + "step": 3980 + }, + { + "epoch": 0.9178097982708934, + "grad_norm": 1.323342879993209, + "learning_rate": 1.217421523256727e-06, + "loss": 0.5658999085426331, + "step": 3981 + }, + { + "epoch": 0.9180403458213257, + "grad_norm": 1.3697743466973002, + "learning_rate": 1.2170494571361753e-06, + "loss": 0.503716230392456, + "step": 3982 + }, + { + "epoch": 0.9182708933717579, + "grad_norm": 1.5398117661982387, + "learning_rate": 1.216677359480596e-06, + "loss": 0.4836317300796509, + "step": 3983 + }, + { + "epoch": 0.9185014409221902, + "grad_norm": 1.2670647547552656, + "learning_rate": 1.2163052303440502e-06, + "loss": 0.4926934838294983, + "step": 3984 + }, + { + "epoch": 0.9187319884726225, + "grad_norm": 1.2680144774170914, + "learning_rate": 1.2159330697806047e-06, + "loss": 0.4263528287410736, + "step": 3985 + }, + { + "epoch": 0.9189625360230548, + "grad_norm": 1.5304339584560502, + "learning_rate": 1.21556087784433e-06, + "loss": 0.465299129486084, + "step": 3986 + }, + { + "epoch": 0.9191930835734871, + "grad_norm": 1.2798602314469432, + "learning_rate": 1.2151886545893028e-06, + "loss": 0.4937010407447815, + "step": 3987 + }, + { + "epoch": 0.9194236311239193, + "grad_norm": 1.3820049835108226, + "learning_rate": 1.2148164000696024e-06, + "loss": 0.4511493146419525, + "step": 3988 + }, + { + "epoch": 0.9196541786743516, + "grad_norm": 1.4217188148151207, + "learning_rate": 1.2144441143393139e-06, + "loss": 0.47422119975090027, + "step": 3989 + }, + { + "epoch": 0.9198847262247839, + "grad_norm": 1.9912580790846817, + "learning_rate": 1.2140717974525263e-06, + "loss": 0.48183363676071167, + "step": 3990 + }, + { + "epoch": 0.9201152737752162, + "grad_norm": 1.2871112603155308, + "learning_rate": 1.2136994494633333e-06, + "loss": 0.5231306552886963, + "step": 3991 + }, + { + "epoch": 0.9203458213256485, + "grad_norm": 1.2840416788228404, + "learning_rate": 1.2133270704258336e-06, + "loss": 0.5468826293945312, + "step": 3992 + }, + { + "epoch": 0.9205763688760807, + "grad_norm": 1.4782261085318393, + "learning_rate": 1.2129546603941297e-06, + "loss": 0.5139263868331909, + "step": 3993 + }, + { + "epoch": 0.920806916426513, + "grad_norm": 1.5250421500545963, + "learning_rate": 1.2125822194223287e-06, + "loss": 0.44919341802597046, + "step": 3994 + }, + { + "epoch": 0.9210374639769452, + "grad_norm": 1.4583297611315478, + "learning_rate": 1.212209747564543e-06, + "loss": 0.423626184463501, + "step": 3995 + }, + { + "epoch": 0.9212680115273775, + "grad_norm": 1.5952268021154143, + "learning_rate": 1.2118372448748885e-06, + "loss": 0.6177996397018433, + "step": 3996 + }, + { + "epoch": 0.9214985590778098, + "grad_norm": 1.2688969514305488, + "learning_rate": 1.2114647114074861e-06, + "loss": 0.49319177865982056, + "step": 3997 + }, + { + "epoch": 0.921729106628242, + "grad_norm": 1.4475287935451044, + "learning_rate": 1.2110921472164607e-06, + "loss": 0.4969290494918823, + "step": 3998 + }, + { + "epoch": 0.9219596541786743, + "grad_norm": 1.586155388040351, + "learning_rate": 1.2107195523559424e-06, + "loss": 0.48943108320236206, + "step": 3999 + }, + { + "epoch": 0.9221902017291066, + "grad_norm": 2.0995701290892352, + "learning_rate": 1.2103469268800654e-06, + "loss": 0.484131783246994, + "step": 4000 + }, + { + "epoch": 0.9224207492795389, + "grad_norm": 1.435307334693519, + "learning_rate": 1.209974270842968e-06, + "loss": 0.507775068283081, + "step": 4001 + }, + { + "epoch": 0.9226512968299712, + "grad_norm": 1.981545451286363, + "learning_rate": 1.209601584298793e-06, + "loss": 0.4608537554740906, + "step": 4002 + }, + { + "epoch": 0.9228818443804034, + "grad_norm": 1.3751735682066653, + "learning_rate": 1.2092288673016886e-06, + "loss": 0.4954952001571655, + "step": 4003 + }, + { + "epoch": 0.9231123919308357, + "grad_norm": 1.4081328435502214, + "learning_rate": 1.2088561199058066e-06, + "loss": 0.5661128163337708, + "step": 4004 + }, + { + "epoch": 0.923342939481268, + "grad_norm": 1.3545501865482017, + "learning_rate": 1.2084833421653024e-06, + "loss": 0.5523176193237305, + "step": 4005 + }, + { + "epoch": 0.9235734870317003, + "grad_norm": 1.6845686246960427, + "learning_rate": 1.2081105341343377e-06, + "loss": 0.5262937545776367, + "step": 4006 + }, + { + "epoch": 0.9238040345821326, + "grad_norm": 1.556031475297922, + "learning_rate": 1.2077376958670775e-06, + "loss": 0.5856887102127075, + "step": 4007 + }, + { + "epoch": 0.9240345821325648, + "grad_norm": 1.1475269427941932, + "learning_rate": 1.207364827417691e-06, + "loss": 0.4267149567604065, + "step": 4008 + }, + { + "epoch": 0.9242651296829971, + "grad_norm": 1.418841731188768, + "learning_rate": 1.2069919288403522e-06, + "loss": 0.5250238180160522, + "step": 4009 + }, + { + "epoch": 0.9244956772334294, + "grad_norm": 1.3666576970448538, + "learning_rate": 1.2066190001892396e-06, + "loss": 0.5198487043380737, + "step": 4010 + }, + { + "epoch": 0.9247262247838617, + "grad_norm": 1.3830301229983213, + "learning_rate": 1.2062460415185354e-06, + "loss": 0.47743192315101624, + "step": 4011 + }, + { + "epoch": 0.924956772334294, + "grad_norm": 1.4075712291395979, + "learning_rate": 1.2058730528824273e-06, + "loss": 0.4159664213657379, + "step": 4012 + }, + { + "epoch": 0.9251873198847262, + "grad_norm": 1.4264938029777896, + "learning_rate": 1.205500034335106e-06, + "loss": 0.5445389151573181, + "step": 4013 + }, + { + "epoch": 0.9254178674351585, + "grad_norm": 1.3580321349174527, + "learning_rate": 1.2051269859307673e-06, + "loss": 0.42347821593284607, + "step": 4014 + }, + { + "epoch": 0.9256484149855908, + "grad_norm": 1.52434699932796, + "learning_rate": 1.2047539077236116e-06, + "loss": 0.5288738012313843, + "step": 4015 + }, + { + "epoch": 0.9258789625360231, + "grad_norm": 1.3100008419238212, + "learning_rate": 1.2043807997678436e-06, + "loss": 0.4394925832748413, + "step": 4016 + }, + { + "epoch": 0.9261095100864554, + "grad_norm": 1.565361448615934, + "learning_rate": 1.2040076621176712e-06, + "loss": 0.5634762048721313, + "step": 4017 + }, + { + "epoch": 0.9263400576368876, + "grad_norm": 1.4510569270002092, + "learning_rate": 1.2036344948273074e-06, + "loss": 0.4297552704811096, + "step": 4018 + }, + { + "epoch": 0.9265706051873199, + "grad_norm": 1.466340432854967, + "learning_rate": 1.2032612979509701e-06, + "loss": 0.48720866441726685, + "step": 4019 + }, + { + "epoch": 0.9268011527377522, + "grad_norm": 1.6032805373268917, + "learning_rate": 1.2028880715428808e-06, + "loss": 0.49143970012664795, + "step": 4020 + }, + { + "epoch": 0.9270317002881845, + "grad_norm": 1.5228540386105625, + "learning_rate": 1.2025148156572654e-06, + "loss": 0.5053589940071106, + "step": 4021 + }, + { + "epoch": 0.9272622478386167, + "grad_norm": 1.5279572652998323, + "learning_rate": 1.2021415303483537e-06, + "loss": 0.5389736890792847, + "step": 4022 + }, + { + "epoch": 0.9274927953890489, + "grad_norm": 1.4328489921033678, + "learning_rate": 1.2017682156703805e-06, + "loss": 0.46251988410949707, + "step": 4023 + }, + { + "epoch": 0.9277233429394812, + "grad_norm": 1.6247276881495054, + "learning_rate": 1.2013948716775848e-06, + "loss": 0.5429900288581848, + "step": 4024 + }, + { + "epoch": 0.9279538904899135, + "grad_norm": 1.3971561705543436, + "learning_rate": 1.201021498424209e-06, + "loss": 0.45329588651657104, + "step": 4025 + }, + { + "epoch": 0.9281844380403458, + "grad_norm": 1.6206327315291964, + "learning_rate": 1.200648095964501e-06, + "loss": 0.4960551857948303, + "step": 4026 + }, + { + "epoch": 0.9284149855907781, + "grad_norm": 1.246015007964809, + "learning_rate": 1.2002746643527117e-06, + "loss": 0.38909661769866943, + "step": 4027 + }, + { + "epoch": 0.9286455331412103, + "grad_norm": 1.4656512778264124, + "learning_rate": 1.199901203643097e-06, + "loss": 0.4518124461174011, + "step": 4028 + }, + { + "epoch": 0.9288760806916426, + "grad_norm": 1.4345267950370788, + "learning_rate": 1.1995277138899173e-06, + "loss": 0.4774012267589569, + "step": 4029 + }, + { + "epoch": 0.9291066282420749, + "grad_norm": 1.8466433066122896, + "learning_rate": 1.1991541951474363e-06, + "loss": 0.48850271105766296, + "step": 4030 + }, + { + "epoch": 0.9293371757925072, + "grad_norm": 1.405365935647213, + "learning_rate": 1.1987806474699223e-06, + "loss": 0.46933990716934204, + "step": 4031 + }, + { + "epoch": 0.9295677233429395, + "grad_norm": 1.4191229128527267, + "learning_rate": 1.1984070709116483e-06, + "loss": 0.4658009707927704, + "step": 4032 + }, + { + "epoch": 0.9297982708933717, + "grad_norm": 1.7378069570906458, + "learning_rate": 1.1980334655268907e-06, + "loss": 0.4660704731941223, + "step": 4033 + }, + { + "epoch": 0.930028818443804, + "grad_norm": 1.238361666141835, + "learning_rate": 1.197659831369931e-06, + "loss": 0.44522547721862793, + "step": 4034 + }, + { + "epoch": 0.9302593659942363, + "grad_norm": 1.4315651225888224, + "learning_rate": 1.1972861684950535e-06, + "loss": 0.5145701169967651, + "step": 4035 + }, + { + "epoch": 0.9304899135446686, + "grad_norm": 1.407197717514797, + "learning_rate": 1.1969124769565485e-06, + "loss": 0.5040268898010254, + "step": 4036 + }, + { + "epoch": 0.9307204610951009, + "grad_norm": 1.4531027015491225, + "learning_rate": 1.1965387568087086e-06, + "loss": 0.46897944808006287, + "step": 4037 + }, + { + "epoch": 0.9309510086455332, + "grad_norm": 1.761132451430314, + "learning_rate": 1.1961650081058325e-06, + "loss": 0.6305633783340454, + "step": 4038 + }, + { + "epoch": 0.9311815561959654, + "grad_norm": 1.4142062841342746, + "learning_rate": 1.1957912309022207e-06, + "loss": 0.4110141396522522, + "step": 4039 + }, + { + "epoch": 0.9314121037463977, + "grad_norm": 1.5122107480089741, + "learning_rate": 1.1954174252521802e-06, + "loss": 0.41255778074264526, + "step": 4040 + }, + { + "epoch": 0.93164265129683, + "grad_norm": 1.7308884755783311, + "learning_rate": 1.195043591210021e-06, + "loss": 0.5402833223342896, + "step": 4041 + }, + { + "epoch": 0.9318731988472623, + "grad_norm": 1.4904148344208392, + "learning_rate": 1.1946697288300564e-06, + "loss": 0.5163707733154297, + "step": 4042 + }, + { + "epoch": 0.9321037463976946, + "grad_norm": 1.3642989361766586, + "learning_rate": 1.1942958381666055e-06, + "loss": 0.5049669742584229, + "step": 4043 + }, + { + "epoch": 0.9323342939481268, + "grad_norm": 1.512669161605088, + "learning_rate": 1.1939219192739907e-06, + "loss": 0.554564356803894, + "step": 4044 + }, + { + "epoch": 0.9325648414985591, + "grad_norm": 1.1648703874817907, + "learning_rate": 1.1935479722065382e-06, + "loss": 0.46329542994499207, + "step": 4045 + }, + { + "epoch": 0.9327953890489914, + "grad_norm": 1.380659317735328, + "learning_rate": 1.1931739970185787e-06, + "loss": 0.49481451511383057, + "step": 4046 + }, + { + "epoch": 0.9330259365994237, + "grad_norm": 1.4592472514709256, + "learning_rate": 1.1927999937644474e-06, + "loss": 0.5521741509437561, + "step": 4047 + }, + { + "epoch": 0.933256484149856, + "grad_norm": 1.6574635084552074, + "learning_rate": 1.1924259624984822e-06, + "loss": 0.4786396324634552, + "step": 4048 + }, + { + "epoch": 0.9334870317002882, + "grad_norm": 1.553655262105641, + "learning_rate": 1.1920519032750266e-06, + "loss": 0.5652132630348206, + "step": 4049 + }, + { + "epoch": 0.9337175792507204, + "grad_norm": 1.2329472545327154, + "learning_rate": 1.1916778161484272e-06, + "loss": 0.3993264436721802, + "step": 4050 + }, + { + "epoch": 0.9339481268011527, + "grad_norm": 1.4293564999356616, + "learning_rate": 1.1913037011730354e-06, + "loss": 0.504034161567688, + "step": 4051 + }, + { + "epoch": 0.934178674351585, + "grad_norm": 1.625339530227479, + "learning_rate": 1.1909295584032054e-06, + "loss": 0.40499967336654663, + "step": 4052 + }, + { + "epoch": 0.9344092219020173, + "grad_norm": 2.035223953420821, + "learning_rate": 1.1905553878932972e-06, + "loss": 0.5135485529899597, + "step": 4053 + }, + { + "epoch": 0.9346397694524495, + "grad_norm": 1.3278296978190696, + "learning_rate": 1.1901811896976736e-06, + "loss": 0.4639396369457245, + "step": 4054 + }, + { + "epoch": 0.9348703170028818, + "grad_norm": 1.4183899547937462, + "learning_rate": 1.1898069638707011e-06, + "loss": 0.4949880838394165, + "step": 4055 + }, + { + "epoch": 0.9351008645533141, + "grad_norm": 1.3028741958089267, + "learning_rate": 1.1894327104667514e-06, + "loss": 0.4456254243850708, + "step": 4056 + }, + { + "epoch": 0.9353314121037464, + "grad_norm": 1.3778790723085623, + "learning_rate": 1.1890584295402e-06, + "loss": 0.466667115688324, + "step": 4057 + }, + { + "epoch": 0.9355619596541787, + "grad_norm": 1.2920684649771048, + "learning_rate": 1.188684121145425e-06, + "loss": 0.4107869863510132, + "step": 4058 + }, + { + "epoch": 0.9357925072046109, + "grad_norm": 1.4103631270546013, + "learning_rate": 1.1883097853368099e-06, + "loss": 0.49275922775268555, + "step": 4059 + }, + { + "epoch": 0.9360230547550432, + "grad_norm": 1.4435221132968696, + "learning_rate": 1.1879354221687424e-06, + "loss": 0.5320898294448853, + "step": 4060 + }, + { + "epoch": 0.9362536023054755, + "grad_norm": 1.4843366418758739, + "learning_rate": 1.1875610316956127e-06, + "loss": 0.48131513595581055, + "step": 4061 + }, + { + "epoch": 0.9364841498559078, + "grad_norm": 1.2831782058716195, + "learning_rate": 1.1871866139718164e-06, + "loss": 0.42021268606185913, + "step": 4062 + }, + { + "epoch": 0.9367146974063401, + "grad_norm": 1.42206888115783, + "learning_rate": 1.1868121690517524e-06, + "loss": 0.4804028868675232, + "step": 4063 + }, + { + "epoch": 0.9369452449567723, + "grad_norm": 1.357971019510146, + "learning_rate": 1.1864376969898238e-06, + "loss": 0.4920700192451477, + "step": 4064 + }, + { + "epoch": 0.9371757925072046, + "grad_norm": 1.6009723063265457, + "learning_rate": 1.1860631978404369e-06, + "loss": 0.4702967703342438, + "step": 4065 + }, + { + "epoch": 0.9374063400576369, + "grad_norm": 1.4828010491515886, + "learning_rate": 1.1856886716580032e-06, + "loss": 0.41766661405563354, + "step": 4066 + }, + { + "epoch": 0.9376368876080692, + "grad_norm": 1.5572182245704769, + "learning_rate": 1.1853141184969372e-06, + "loss": 0.43666309118270874, + "step": 4067 + }, + { + "epoch": 0.9378674351585015, + "grad_norm": 1.402637303057815, + "learning_rate": 1.1849395384116573e-06, + "loss": 0.5529673099517822, + "step": 4068 + }, + { + "epoch": 0.9380979827089337, + "grad_norm": 1.737132310570529, + "learning_rate": 1.1845649314565866e-06, + "loss": 0.4728221297264099, + "step": 4069 + }, + { + "epoch": 0.938328530259366, + "grad_norm": 1.464406585741721, + "learning_rate": 1.1841902976861516e-06, + "loss": 0.4904414117336273, + "step": 4070 + }, + { + "epoch": 0.9385590778097983, + "grad_norm": 1.3210429529267262, + "learning_rate": 1.183815637154782e-06, + "loss": 0.4774867594242096, + "step": 4071 + }, + { + "epoch": 0.9387896253602306, + "grad_norm": 1.3081579109295827, + "learning_rate": 1.1834409499169125e-06, + "loss": 0.4732491970062256, + "step": 4072 + }, + { + "epoch": 0.9390201729106629, + "grad_norm": 1.6141272556528279, + "learning_rate": 1.1830662360269817e-06, + "loss": 0.43487685918807983, + "step": 4073 + }, + { + "epoch": 0.9392507204610951, + "grad_norm": 1.5910806235991546, + "learning_rate": 1.1826914955394307e-06, + "loss": 0.42725875973701477, + "step": 4074 + }, + { + "epoch": 0.9394812680115274, + "grad_norm": 1.6421401366141255, + "learning_rate": 1.1823167285087062e-06, + "loss": 0.5362523198127747, + "step": 4075 + }, + { + "epoch": 0.9397118155619597, + "grad_norm": 1.542102971714458, + "learning_rate": 1.1819419349892573e-06, + "loss": 0.5027948617935181, + "step": 4076 + }, + { + "epoch": 0.9399423631123919, + "grad_norm": 1.5260318111513538, + "learning_rate": 1.1815671150355384e-06, + "loss": 0.5027229189872742, + "step": 4077 + }, + { + "epoch": 0.9401729106628242, + "grad_norm": 1.239313772323881, + "learning_rate": 1.1811922687020064e-06, + "loss": 0.43377360701560974, + "step": 4078 + }, + { + "epoch": 0.9404034582132564, + "grad_norm": 1.9123276757663772, + "learning_rate": 1.1808173960431224e-06, + "loss": 0.5102596282958984, + "step": 4079 + }, + { + "epoch": 0.9406340057636887, + "grad_norm": 1.4541636061516516, + "learning_rate": 1.180442497113352e-06, + "loss": 0.4819416105747223, + "step": 4080 + }, + { + "epoch": 0.940864553314121, + "grad_norm": 2.2469220393286258, + "learning_rate": 1.1800675719671637e-06, + "loss": 0.5308434963226318, + "step": 4081 + }, + { + "epoch": 0.9410951008645533, + "grad_norm": 1.2322230699289647, + "learning_rate": 1.1796926206590306e-06, + "loss": 0.3851321339607239, + "step": 4082 + }, + { + "epoch": 0.9413256484149856, + "grad_norm": 1.2904563965230043, + "learning_rate": 1.1793176432434292e-06, + "loss": 0.4828380048274994, + "step": 4083 + }, + { + "epoch": 0.9415561959654178, + "grad_norm": 1.3580681901731781, + "learning_rate": 1.1789426397748393e-06, + "loss": 0.43688488006591797, + "step": 4084 + }, + { + "epoch": 0.9417867435158501, + "grad_norm": 1.2794460643131822, + "learning_rate": 1.1785676103077453e-06, + "loss": 0.5123027563095093, + "step": 4085 + }, + { + "epoch": 0.9420172910662824, + "grad_norm": 1.6221515546011513, + "learning_rate": 1.1781925548966353e-06, + "loss": 0.5363332033157349, + "step": 4086 + }, + { + "epoch": 0.9422478386167147, + "grad_norm": 1.3527370433567738, + "learning_rate": 1.1778174735960007e-06, + "loss": 0.4401679039001465, + "step": 4087 + }, + { + "epoch": 0.942478386167147, + "grad_norm": 1.6235340600095725, + "learning_rate": 1.1774423664603369e-06, + "loss": 0.48118162155151367, + "step": 4088 + }, + { + "epoch": 0.9427089337175792, + "grad_norm": 1.644375200451457, + "learning_rate": 1.1770672335441435e-06, + "loss": 0.4561998248100281, + "step": 4089 + }, + { + "epoch": 0.9429394812680115, + "grad_norm": 1.3516589509901438, + "learning_rate": 1.1766920749019227e-06, + "loss": 0.5039771199226379, + "step": 4090 + }, + { + "epoch": 0.9431700288184438, + "grad_norm": 1.289993272064434, + "learning_rate": 1.1763168905881817e-06, + "loss": 0.3734637498855591, + "step": 4091 + }, + { + "epoch": 0.9434005763688761, + "grad_norm": 1.5072018021814846, + "learning_rate": 1.1759416806574306e-06, + "loss": 0.5121400952339172, + "step": 4092 + }, + { + "epoch": 0.9436311239193084, + "grad_norm": 1.3533206831206654, + "learning_rate": 1.1755664451641835e-06, + "loss": 0.47663238644599915, + "step": 4093 + }, + { + "epoch": 0.9438616714697406, + "grad_norm": 1.3920201979100617, + "learning_rate": 1.1751911841629585e-06, + "loss": 0.5083819627761841, + "step": 4094 + }, + { + "epoch": 0.9440922190201729, + "grad_norm": 1.7300221057773006, + "learning_rate": 1.1748158977082768e-06, + "loss": 0.5221288800239563, + "step": 4095 + }, + { + "epoch": 0.9443227665706052, + "grad_norm": 1.5179395653322596, + "learning_rate": 1.1744405858546635e-06, + "loss": 0.5698456168174744, + "step": 4096 + }, + { + "epoch": 0.9445533141210375, + "grad_norm": 1.8027766062759447, + "learning_rate": 1.1740652486566482e-06, + "loss": 0.49966350197792053, + "step": 4097 + }, + { + "epoch": 0.9447838616714698, + "grad_norm": 1.6204478112236895, + "learning_rate": 1.1736898861687629e-06, + "loss": 0.5273131728172302, + "step": 4098 + }, + { + "epoch": 0.945014409221902, + "grad_norm": 1.3525958396034103, + "learning_rate": 1.1733144984455441e-06, + "loss": 0.515166699886322, + "step": 4099 + }, + { + "epoch": 0.9452449567723343, + "grad_norm": 1.3435489819348247, + "learning_rate": 1.172939085541531e-06, + "loss": 0.448274165391922, + "step": 4100 + }, + { + "epoch": 0.9454755043227666, + "grad_norm": 1.3997663854784892, + "learning_rate": 1.1725636475112686e-06, + "loss": 0.3678357005119324, + "step": 4101 + }, + { + "epoch": 0.9457060518731989, + "grad_norm": 1.4128979216296833, + "learning_rate": 1.1721881844093031e-06, + "loss": 0.4662661552429199, + "step": 4102 + }, + { + "epoch": 0.9459365994236312, + "grad_norm": 1.3930942066092626, + "learning_rate": 1.1718126962901855e-06, + "loss": 0.49695825576782227, + "step": 4103 + }, + { + "epoch": 0.9461671469740635, + "grad_norm": 1.5090047516434983, + "learning_rate": 1.1714371832084707e-06, + "loss": 0.4838793873786926, + "step": 4104 + }, + { + "epoch": 0.9463976945244956, + "grad_norm": 1.5195967659155012, + "learning_rate": 1.1710616452187164e-06, + "loss": 0.5055621266365051, + "step": 4105 + }, + { + "epoch": 0.9466282420749279, + "grad_norm": 1.555064483270701, + "learning_rate": 1.1706860823754844e-06, + "loss": 0.44428279995918274, + "step": 4106 + }, + { + "epoch": 0.9468587896253602, + "grad_norm": 1.2604559743030346, + "learning_rate": 1.1703104947333406e-06, + "loss": 0.4698944687843323, + "step": 4107 + }, + { + "epoch": 0.9470893371757925, + "grad_norm": 1.705702471223194, + "learning_rate": 1.1699348823468533e-06, + "loss": 0.43742209672927856, + "step": 4108 + }, + { + "epoch": 0.9473198847262247, + "grad_norm": 1.3409195488655554, + "learning_rate": 1.1695592452705958e-06, + "loss": 0.4127580523490906, + "step": 4109 + }, + { + "epoch": 0.947550432276657, + "grad_norm": 1.2372268437863259, + "learning_rate": 1.1691835835591434e-06, + "loss": 0.4625944495201111, + "step": 4110 + }, + { + "epoch": 0.9477809798270893, + "grad_norm": 1.5550413151368645, + "learning_rate": 1.1688078972670768e-06, + "loss": 0.5464745759963989, + "step": 4111 + }, + { + "epoch": 0.9480115273775216, + "grad_norm": 1.4902811228226185, + "learning_rate": 1.1684321864489783e-06, + "loss": 0.5199334025382996, + "step": 4112 + }, + { + "epoch": 0.9482420749279539, + "grad_norm": 1.6755931463282898, + "learning_rate": 1.1680564511594355e-06, + "loss": 0.5582367181777954, + "step": 4113 + }, + { + "epoch": 0.9484726224783862, + "grad_norm": 1.3068527496246771, + "learning_rate": 1.1676806914530388e-06, + "loss": 0.4847801923751831, + "step": 4114 + }, + { + "epoch": 0.9487031700288184, + "grad_norm": 1.4834802377017484, + "learning_rate": 1.1673049073843818e-06, + "loss": 0.3963737487792969, + "step": 4115 + }, + { + "epoch": 0.9489337175792507, + "grad_norm": 1.3472234175773388, + "learning_rate": 1.166929099008062e-06, + "loss": 0.4763021171092987, + "step": 4116 + }, + { + "epoch": 0.949164265129683, + "grad_norm": 1.334596061000354, + "learning_rate": 1.1665532663786808e-06, + "loss": 0.48453909158706665, + "step": 4117 + }, + { + "epoch": 0.9493948126801153, + "grad_norm": 1.5556497216649185, + "learning_rate": 1.1661774095508427e-06, + "loss": 0.4530877470970154, + "step": 4118 + }, + { + "epoch": 0.9496253602305476, + "grad_norm": 1.6439439863567922, + "learning_rate": 1.1658015285791556e-06, + "loss": 0.4559391438961029, + "step": 4119 + }, + { + "epoch": 0.9498559077809798, + "grad_norm": 1.2742689425270366, + "learning_rate": 1.1654256235182313e-06, + "loss": 0.4056190550327301, + "step": 4120 + }, + { + "epoch": 0.9500864553314121, + "grad_norm": 1.4031159218637288, + "learning_rate": 1.1650496944226846e-06, + "loss": 0.4559820294380188, + "step": 4121 + }, + { + "epoch": 0.9503170028818444, + "grad_norm": 1.7520001627092183, + "learning_rate": 1.1646737413471344e-06, + "loss": 0.5147289037704468, + "step": 4122 + }, + { + "epoch": 0.9505475504322767, + "grad_norm": 1.3527745912371363, + "learning_rate": 1.1642977643462026e-06, + "loss": 0.4429762363433838, + "step": 4123 + }, + { + "epoch": 0.950778097982709, + "grad_norm": 1.478213249860498, + "learning_rate": 1.1639217634745151e-06, + "loss": 0.5090504884719849, + "step": 4124 + }, + { + "epoch": 0.9510086455331412, + "grad_norm": 1.2839815904961693, + "learning_rate": 1.1635457387867001e-06, + "loss": 0.4712049067020416, + "step": 4125 + }, + { + "epoch": 0.9512391930835735, + "grad_norm": 1.422837273489116, + "learning_rate": 1.163169690337391e-06, + "loss": 0.4788722097873688, + "step": 4126 + }, + { + "epoch": 0.9514697406340058, + "grad_norm": 1.5690514399087891, + "learning_rate": 1.1627936181812232e-06, + "loss": 0.5272341370582581, + "step": 4127 + }, + { + "epoch": 0.9517002881844381, + "grad_norm": 1.3989516399360993, + "learning_rate": 1.162417522372836e-06, + "loss": 0.476615309715271, + "step": 4128 + }, + { + "epoch": 0.9519308357348704, + "grad_norm": 1.5696451971842293, + "learning_rate": 1.1620414029668723e-06, + "loss": 0.5830926895141602, + "step": 4129 + }, + { + "epoch": 0.9521613832853026, + "grad_norm": 1.5100186493547834, + "learning_rate": 1.1616652600179791e-06, + "loss": 0.4271087646484375, + "step": 4130 + }, + { + "epoch": 0.9523919308357349, + "grad_norm": 1.6424034375782277, + "learning_rate": 1.161289093580805e-06, + "loss": 0.450754851102829, + "step": 4131 + }, + { + "epoch": 0.9526224783861671, + "grad_norm": 1.6045811944232449, + "learning_rate": 1.1609129037100032e-06, + "loss": 0.49380576610565186, + "step": 4132 + }, + { + "epoch": 0.9528530259365994, + "grad_norm": 1.4716623019189798, + "learning_rate": 1.1605366904602307e-06, + "loss": 0.5741837024688721, + "step": 4133 + }, + { + "epoch": 0.9530835734870317, + "grad_norm": 1.425182799360816, + "learning_rate": 1.1601604538861472e-06, + "loss": 0.48263800144195557, + "step": 4134 + }, + { + "epoch": 0.9533141210374639, + "grad_norm": 1.4462099155347636, + "learning_rate": 1.1597841940424155e-06, + "loss": 0.48200082778930664, + "step": 4135 + }, + { + "epoch": 0.9535446685878962, + "grad_norm": 1.5671368311646794, + "learning_rate": 1.159407910983703e-06, + "loss": 0.46894192695617676, + "step": 4136 + }, + { + "epoch": 0.9537752161383285, + "grad_norm": 1.8281622353907203, + "learning_rate": 1.159031604764679e-06, + "loss": 0.4476175308227539, + "step": 4137 + }, + { + "epoch": 0.9540057636887608, + "grad_norm": 1.6057888319214861, + "learning_rate": 1.1586552754400174e-06, + "loss": 0.48476332426071167, + "step": 4138 + }, + { + "epoch": 0.9542363112391931, + "grad_norm": 1.6116514948965757, + "learning_rate": 1.1582789230643945e-06, + "loss": 0.5018280744552612, + "step": 4139 + }, + { + "epoch": 0.9544668587896253, + "grad_norm": 1.3285285890301965, + "learning_rate": 1.1579025476924911e-06, + "loss": 0.3942481279373169, + "step": 4140 + }, + { + "epoch": 0.9546974063400576, + "grad_norm": 1.4850340648020761, + "learning_rate": 1.1575261493789897e-06, + "loss": 0.4592955708503723, + "step": 4141 + }, + { + "epoch": 0.9549279538904899, + "grad_norm": 1.4813387420747084, + "learning_rate": 1.1571497281785775e-06, + "loss": 0.4634397029876709, + "step": 4142 + }, + { + "epoch": 0.9551585014409222, + "grad_norm": 1.3089084887646203, + "learning_rate": 1.1567732841459452e-06, + "loss": 0.47805315256118774, + "step": 4143 + }, + { + "epoch": 0.9553890489913545, + "grad_norm": 1.5017286949965145, + "learning_rate": 1.1563968173357849e-06, + "loss": 0.38585370779037476, + "step": 4144 + }, + { + "epoch": 0.9556195965417867, + "grad_norm": 1.6731042018184503, + "learning_rate": 1.1560203278027943e-06, + "loss": 0.5024282932281494, + "step": 4145 + }, + { + "epoch": 0.955850144092219, + "grad_norm": 1.435684415467379, + "learning_rate": 1.1556438156016731e-06, + "loss": 0.5150408744812012, + "step": 4146 + }, + { + "epoch": 0.9560806916426513, + "grad_norm": 1.3955310313452336, + "learning_rate": 1.1552672807871247e-06, + "loss": 0.4688153862953186, + "step": 4147 + }, + { + "epoch": 0.9563112391930836, + "grad_norm": 1.5113914175565248, + "learning_rate": 1.1548907234138555e-06, + "loss": 0.5097207427024841, + "step": 4148 + }, + { + "epoch": 0.9565417867435159, + "grad_norm": 1.4226145218267139, + "learning_rate": 1.1545141435365755e-06, + "loss": 0.5258755087852478, + "step": 4149 + }, + { + "epoch": 0.9567723342939481, + "grad_norm": 1.5209010048986649, + "learning_rate": 1.1541375412099977e-06, + "loss": 0.4533793330192566, + "step": 4150 + }, + { + "epoch": 0.9570028818443804, + "grad_norm": 1.4320238586920968, + "learning_rate": 1.1537609164888386e-06, + "loss": 0.4984521269798279, + "step": 4151 + }, + { + "epoch": 0.9572334293948127, + "grad_norm": 1.3166306039112776, + "learning_rate": 1.1533842694278182e-06, + "loss": 0.46638578176498413, + "step": 4152 + }, + { + "epoch": 0.957463976945245, + "grad_norm": 1.339478263676104, + "learning_rate": 1.153007600081659e-06, + "loss": 0.5370229482650757, + "step": 4153 + }, + { + "epoch": 0.9576945244956773, + "grad_norm": 1.794431535413407, + "learning_rate": 1.1526309085050869e-06, + "loss": 0.5898431539535522, + "step": 4154 + }, + { + "epoch": 0.9579250720461095, + "grad_norm": 1.3662613635895884, + "learning_rate": 1.1522541947528317e-06, + "loss": 0.4435083270072937, + "step": 4155 + }, + { + "epoch": 0.9581556195965418, + "grad_norm": 1.220784689368442, + "learning_rate": 1.1518774588796263e-06, + "loss": 0.4848114848136902, + "step": 4156 + }, + { + "epoch": 0.9583861671469741, + "grad_norm": 1.5830439158524807, + "learning_rate": 1.1515007009402056e-06, + "loss": 0.4745471477508545, + "step": 4157 + }, + { + "epoch": 0.9586167146974064, + "grad_norm": 1.403752142570791, + "learning_rate": 1.1511239209893093e-06, + "loss": 0.5204712152481079, + "step": 4158 + }, + { + "epoch": 0.9588472622478387, + "grad_norm": 1.4330320976709396, + "learning_rate": 1.1507471190816794e-06, + "loss": 0.4748867154121399, + "step": 4159 + }, + { + "epoch": 0.9590778097982708, + "grad_norm": 1.245034738772996, + "learning_rate": 1.150370295272061e-06, + "loss": 0.48487526178359985, + "step": 4160 + }, + { + "epoch": 0.9593083573487031, + "grad_norm": 1.4945063772490301, + "learning_rate": 1.1499934496152033e-06, + "loss": 0.5005877017974854, + "step": 4161 + }, + { + "epoch": 0.9595389048991354, + "grad_norm": 1.5714269938752097, + "learning_rate": 1.1496165821658577e-06, + "loss": 0.46235162019729614, + "step": 4162 + }, + { + "epoch": 0.9597694524495677, + "grad_norm": 1.4165164470664084, + "learning_rate": 1.149239692978779e-06, + "loss": 0.4043562412261963, + "step": 4163 + }, + { + "epoch": 0.96, + "grad_norm": 1.3191665324250768, + "learning_rate": 1.1488627821087254e-06, + "loss": 0.401640385389328, + "step": 4164 + }, + { + "epoch": 0.9602305475504322, + "grad_norm": 1.561998336702079, + "learning_rate": 1.1484858496104585e-06, + "loss": 0.4345995783805847, + "step": 4165 + }, + { + "epoch": 0.9604610951008645, + "grad_norm": 1.589739589742711, + "learning_rate": 1.1481088955387416e-06, + "loss": 0.4832232892513275, + "step": 4166 + }, + { + "epoch": 0.9606916426512968, + "grad_norm": 1.2721195951536524, + "learning_rate": 1.1477319199483433e-06, + "loss": 0.4984737038612366, + "step": 4167 + }, + { + "epoch": 0.9609221902017291, + "grad_norm": 1.415582783119745, + "learning_rate": 1.1473549228940341e-06, + "loss": 0.5141474604606628, + "step": 4168 + }, + { + "epoch": 0.9611527377521614, + "grad_norm": 1.3918933528998594, + "learning_rate": 1.1469779044305874e-06, + "loss": 0.46393290162086487, + "step": 4169 + }, + { + "epoch": 0.9613832853025936, + "grad_norm": 1.5054675813872291, + "learning_rate": 1.1466008646127798e-06, + "loss": 0.44959738850593567, + "step": 4170 + }, + { + "epoch": 0.9616138328530259, + "grad_norm": 1.6032904767337253, + "learning_rate": 1.146223803495392e-06, + "loss": 0.48199838399887085, + "step": 4171 + }, + { + "epoch": 0.9618443804034582, + "grad_norm": 1.48749369000813, + "learning_rate": 1.1458467211332066e-06, + "loss": 0.40777313709259033, + "step": 4172 + }, + { + "epoch": 0.9620749279538905, + "grad_norm": 1.5173779911426573, + "learning_rate": 1.1454696175810098e-06, + "loss": 0.48183488845825195, + "step": 4173 + }, + { + "epoch": 0.9623054755043228, + "grad_norm": 1.3962963258973355, + "learning_rate": 1.145092492893591e-06, + "loss": 0.5259125828742981, + "step": 4174 + }, + { + "epoch": 0.962536023054755, + "grad_norm": 1.4905678005204057, + "learning_rate": 1.1447153471257422e-06, + "loss": 0.5235443115234375, + "step": 4175 + }, + { + "epoch": 0.9627665706051873, + "grad_norm": 1.3760213189461934, + "learning_rate": 1.1443381803322588e-06, + "loss": 0.5166189670562744, + "step": 4176 + }, + { + "epoch": 0.9629971181556196, + "grad_norm": 1.5648914687391227, + "learning_rate": 1.1439609925679398e-06, + "loss": 0.4759864807128906, + "step": 4177 + }, + { + "epoch": 0.9632276657060519, + "grad_norm": 1.4276484295690437, + "learning_rate": 1.1435837838875854e-06, + "loss": 0.46920865774154663, + "step": 4178 + }, + { + "epoch": 0.9634582132564842, + "grad_norm": 1.735858922856514, + "learning_rate": 1.1432065543460015e-06, + "loss": 0.5612783432006836, + "step": 4179 + }, + { + "epoch": 0.9636887608069165, + "grad_norm": 1.356617595870239, + "learning_rate": 1.1428293039979947e-06, + "loss": 0.5130957365036011, + "step": 4180 + }, + { + "epoch": 0.9639193083573487, + "grad_norm": 1.4484495019945183, + "learning_rate": 1.1424520328983762e-06, + "loss": 0.49154847860336304, + "step": 4181 + }, + { + "epoch": 0.964149855907781, + "grad_norm": 1.0824944418575104, + "learning_rate": 1.1420747411019588e-06, + "loss": 0.3869001567363739, + "step": 4182 + }, + { + "epoch": 0.9643804034582133, + "grad_norm": 1.4337113954478253, + "learning_rate": 1.1416974286635597e-06, + "loss": 0.4921383559703827, + "step": 4183 + }, + { + "epoch": 0.9646109510086456, + "grad_norm": 1.2793039473507872, + "learning_rate": 1.1413200956379985e-06, + "loss": 0.4888332486152649, + "step": 4184 + }, + { + "epoch": 0.9648414985590779, + "grad_norm": 1.6971333270171032, + "learning_rate": 1.1409427420800971e-06, + "loss": 0.5268207788467407, + "step": 4185 + }, + { + "epoch": 0.9650720461095101, + "grad_norm": 1.4734570779566258, + "learning_rate": 1.1405653680446815e-06, + "loss": 0.5089021921157837, + "step": 4186 + }, + { + "epoch": 0.9653025936599423, + "grad_norm": 1.443469066686051, + "learning_rate": 1.1401879735865805e-06, + "loss": 0.4346863627433777, + "step": 4187 + }, + { + "epoch": 0.9655331412103746, + "grad_norm": 1.5343802406915352, + "learning_rate": 1.139810558760625e-06, + "loss": 0.5371497869491577, + "step": 4188 + }, + { + "epoch": 0.9657636887608069, + "grad_norm": 1.4494296397979445, + "learning_rate": 1.1394331236216498e-06, + "loss": 0.41937965154647827, + "step": 4189 + }, + { + "epoch": 0.9659942363112392, + "grad_norm": 1.8240668562947975, + "learning_rate": 1.1390556682244925e-06, + "loss": 0.5104124546051025, + "step": 4190 + }, + { + "epoch": 0.9662247838616714, + "grad_norm": 1.531665150197191, + "learning_rate": 1.1386781926239927e-06, + "loss": 0.4532161355018616, + "step": 4191 + }, + { + "epoch": 0.9664553314121037, + "grad_norm": 1.64841184801151, + "learning_rate": 1.1383006968749947e-06, + "loss": 0.49004897475242615, + "step": 4192 + }, + { + "epoch": 0.966685878962536, + "grad_norm": 1.3461756503958848, + "learning_rate": 1.1379231810323438e-06, + "loss": 0.5654203295707703, + "step": 4193 + }, + { + "epoch": 0.9669164265129683, + "grad_norm": 1.448708484238038, + "learning_rate": 1.1375456451508894e-06, + "loss": 0.4646185636520386, + "step": 4194 + }, + { + "epoch": 0.9671469740634006, + "grad_norm": 1.4243458240080633, + "learning_rate": 1.1371680892854838e-06, + "loss": 0.41903266310691833, + "step": 4195 + }, + { + "epoch": 0.9673775216138328, + "grad_norm": 1.6130252006332106, + "learning_rate": 1.1367905134909817e-06, + "loss": 0.4434237480163574, + "step": 4196 + }, + { + "epoch": 0.9676080691642651, + "grad_norm": 1.4819674775092757, + "learning_rate": 1.1364129178222413e-06, + "loss": 0.48367393016815186, + "step": 4197 + }, + { + "epoch": 0.9678386167146974, + "grad_norm": 1.2984099347552687, + "learning_rate": 1.1360353023341225e-06, + "loss": 0.4555853009223938, + "step": 4198 + }, + { + "epoch": 0.9680691642651297, + "grad_norm": 1.4467890861045218, + "learning_rate": 1.1356576670814895e-06, + "loss": 0.4551926851272583, + "step": 4199 + }, + { + "epoch": 0.968299711815562, + "grad_norm": 1.3665755006252467, + "learning_rate": 1.135280012119209e-06, + "loss": 0.4708002805709839, + "step": 4200 + }, + { + "epoch": 0.9685302593659942, + "grad_norm": 1.5757117044568527, + "learning_rate": 1.13490233750215e-06, + "loss": 0.4727732837200165, + "step": 4201 + }, + { + "epoch": 0.9687608069164265, + "grad_norm": 1.4810458497183263, + "learning_rate": 1.1345246432851846e-06, + "loss": 0.4873931407928467, + "step": 4202 + }, + { + "epoch": 0.9689913544668588, + "grad_norm": 1.2376264936878605, + "learning_rate": 1.1341469295231879e-06, + "loss": 0.5011585354804993, + "step": 4203 + }, + { + "epoch": 0.9692219020172911, + "grad_norm": 1.550445723931056, + "learning_rate": 1.1337691962710377e-06, + "loss": 0.5259454250335693, + "step": 4204 + }, + { + "epoch": 0.9694524495677234, + "grad_norm": 1.475427860284367, + "learning_rate": 1.1333914435836152e-06, + "loss": 0.4858463406562805, + "step": 4205 + }, + { + "epoch": 0.9696829971181556, + "grad_norm": 1.5183455256295848, + "learning_rate": 1.1330136715158034e-06, + "loss": 0.3388391137123108, + "step": 4206 + }, + { + "epoch": 0.9699135446685879, + "grad_norm": 1.6855547609136186, + "learning_rate": 1.132635880122489e-06, + "loss": 0.47413328289985657, + "step": 4207 + }, + { + "epoch": 0.9701440922190202, + "grad_norm": 1.7557933633332012, + "learning_rate": 1.1322580694585608e-06, + "loss": 0.40621912479400635, + "step": 4208 + }, + { + "epoch": 0.9703746397694525, + "grad_norm": 1.3208613343258073, + "learning_rate": 1.1318802395789111e-06, + "loss": 0.49066615104675293, + "step": 4209 + }, + { + "epoch": 0.9706051873198848, + "grad_norm": 1.3654778936529173, + "learning_rate": 1.1315023905384346e-06, + "loss": 0.3960093557834625, + "step": 4210 + }, + { + "epoch": 0.970835734870317, + "grad_norm": 1.827857800057562, + "learning_rate": 1.1311245223920285e-06, + "loss": 0.49717360734939575, + "step": 4211 + }, + { + "epoch": 0.9710662824207493, + "grad_norm": 1.4503837255791163, + "learning_rate": 1.1307466351945934e-06, + "loss": 0.47067588567733765, + "step": 4212 + }, + { + "epoch": 0.9712968299711816, + "grad_norm": 1.5495719646282835, + "learning_rate": 1.1303687290010327e-06, + "loss": 0.5154353380203247, + "step": 4213 + }, + { + "epoch": 0.9715273775216138, + "grad_norm": 1.4525324547254197, + "learning_rate": 1.1299908038662512e-06, + "loss": 0.4328816831111908, + "step": 4214 + }, + { + "epoch": 0.9717579250720461, + "grad_norm": 1.536944432116303, + "learning_rate": 1.1296128598451584e-06, + "loss": 0.43496429920196533, + "step": 4215 + }, + { + "epoch": 0.9719884726224783, + "grad_norm": 2.4125329863559943, + "learning_rate": 1.129234896992666e-06, + "loss": 0.4734534025192261, + "step": 4216 + }, + { + "epoch": 0.9722190201729106, + "grad_norm": 1.2552514953990885, + "learning_rate": 1.128856915363687e-06, + "loss": 0.46757328510284424, + "step": 4217 + }, + { + "epoch": 0.9724495677233429, + "grad_norm": 1.4054270022856143, + "learning_rate": 1.1284789150131387e-06, + "loss": 0.45532792806625366, + "step": 4218 + }, + { + "epoch": 0.9726801152737752, + "grad_norm": 1.668526261128857, + "learning_rate": 1.1281008959959403e-06, + "loss": 0.45022863149642944, + "step": 4219 + }, + { + "epoch": 0.9729106628242075, + "grad_norm": 1.6148563350303777, + "learning_rate": 1.1277228583670146e-06, + "loss": 0.48149728775024414, + "step": 4220 + }, + { + "epoch": 0.9731412103746397, + "grad_norm": 1.3290471777296209, + "learning_rate": 1.1273448021812863e-06, + "loss": 0.48621249198913574, + "step": 4221 + }, + { + "epoch": 0.973371757925072, + "grad_norm": 1.4123073963135984, + "learning_rate": 1.126966727493683e-06, + "loss": 0.4082604944705963, + "step": 4222 + }, + { + "epoch": 0.9736023054755043, + "grad_norm": 1.4706751776044729, + "learning_rate": 1.1265886343591348e-06, + "loss": 0.46301013231277466, + "step": 4223 + }, + { + "epoch": 0.9738328530259366, + "grad_norm": 1.742370781706499, + "learning_rate": 1.1262105228325751e-06, + "loss": 0.5629076957702637, + "step": 4224 + }, + { + "epoch": 0.9740634005763689, + "grad_norm": 1.6392739760147546, + "learning_rate": 1.1258323929689395e-06, + "loss": 0.4499921202659607, + "step": 4225 + }, + { + "epoch": 0.9742939481268011, + "grad_norm": 1.5255138727661328, + "learning_rate": 1.125454244823166e-06, + "loss": 0.4177723228931427, + "step": 4226 + }, + { + "epoch": 0.9745244956772334, + "grad_norm": 1.6446007209672973, + "learning_rate": 1.1250760784501956e-06, + "loss": 0.5151868462562561, + "step": 4227 + }, + { + "epoch": 0.9747550432276657, + "grad_norm": 1.5356213745996614, + "learning_rate": 1.1246978939049724e-06, + "loss": 0.461150586605072, + "step": 4228 + }, + { + "epoch": 0.974985590778098, + "grad_norm": 1.3822737934397757, + "learning_rate": 1.1243196912424426e-06, + "loss": 0.4268735945224762, + "step": 4229 + }, + { + "epoch": 0.9752161383285303, + "grad_norm": 1.2292644910946908, + "learning_rate": 1.1239414705175547e-06, + "loss": 0.41963332891464233, + "step": 4230 + }, + { + "epoch": 0.9754466858789625, + "grad_norm": 1.66032983081408, + "learning_rate": 1.1235632317852604e-06, + "loss": 0.5295606255531311, + "step": 4231 + }, + { + "epoch": 0.9756772334293948, + "grad_norm": 1.4509344335905492, + "learning_rate": 1.123184975100514e-06, + "loss": 0.494422972202301, + "step": 4232 + }, + { + "epoch": 0.9759077809798271, + "grad_norm": 1.3661883066888274, + "learning_rate": 1.1228067005182722e-06, + "loss": 0.4872778058052063, + "step": 4233 + }, + { + "epoch": 0.9761383285302594, + "grad_norm": 1.4615558807186686, + "learning_rate": 1.1224284080934944e-06, + "loss": 0.5760270357131958, + "step": 4234 + }, + { + "epoch": 0.9763688760806917, + "grad_norm": 1.4806023359508287, + "learning_rate": 1.1220500978811425e-06, + "loss": 0.46235722303390503, + "step": 4235 + }, + { + "epoch": 0.976599423631124, + "grad_norm": 1.9622654190849842, + "learning_rate": 1.1216717699361808e-06, + "loss": 0.5219517946243286, + "step": 4236 + }, + { + "epoch": 0.9768299711815562, + "grad_norm": 1.4798295064212674, + "learning_rate": 1.121293424313577e-06, + "loss": 0.5163636207580566, + "step": 4237 + }, + { + "epoch": 0.9770605187319885, + "grad_norm": 1.2412310305077237, + "learning_rate": 1.1209150610683005e-06, + "loss": 0.4972173571586609, + "step": 4238 + }, + { + "epoch": 0.9772910662824208, + "grad_norm": 1.7411076626589757, + "learning_rate": 1.1205366802553228e-06, + "loss": 0.5234299898147583, + "step": 4239 + }, + { + "epoch": 0.9775216138328531, + "grad_norm": 1.540993516834482, + "learning_rate": 1.12015828192962e-06, + "loss": 0.5402883291244507, + "step": 4240 + }, + { + "epoch": 0.9777521613832854, + "grad_norm": 1.6444434333723918, + "learning_rate": 1.1197798661461687e-06, + "loss": 0.5082305073738098, + "step": 4241 + }, + { + "epoch": 0.9779827089337175, + "grad_norm": 1.7783542476181406, + "learning_rate": 1.1194014329599491e-06, + "loss": 0.49959874153137207, + "step": 4242 + }, + { + "epoch": 0.9782132564841498, + "grad_norm": 1.6275513366341, + "learning_rate": 1.1190229824259427e-06, + "loss": 0.4957766532897949, + "step": 4243 + }, + { + "epoch": 0.9784438040345821, + "grad_norm": 1.4378821851714094, + "learning_rate": 1.118644514599136e-06, + "loss": 0.5325085520744324, + "step": 4244 + }, + { + "epoch": 0.9786743515850144, + "grad_norm": 1.555991168807003, + "learning_rate": 1.118266029534515e-06, + "loss": 0.4923267364501953, + "step": 4245 + }, + { + "epoch": 0.9789048991354466, + "grad_norm": 1.5474363935948672, + "learning_rate": 1.1178875272870706e-06, + "loss": 0.4696193337440491, + "step": 4246 + }, + { + "epoch": 0.9791354466858789, + "grad_norm": 1.5847121893215383, + "learning_rate": 1.1175090079117944e-06, + "loss": 0.4032594561576843, + "step": 4247 + }, + { + "epoch": 0.9793659942363112, + "grad_norm": 1.236816331938737, + "learning_rate": 1.1171304714636817e-06, + "loss": 0.41643059253692627, + "step": 4248 + }, + { + "epoch": 0.9795965417867435, + "grad_norm": 1.3517082904179765, + "learning_rate": 1.1167519179977305e-06, + "loss": 0.44701042771339417, + "step": 4249 + }, + { + "epoch": 0.9798270893371758, + "grad_norm": 1.6281507254385772, + "learning_rate": 1.11637334756894e-06, + "loss": 0.5249829888343811, + "step": 4250 + }, + { + "epoch": 0.980057636887608, + "grad_norm": 1.395017390893199, + "learning_rate": 1.1159947602323126e-06, + "loss": 0.44194433093070984, + "step": 4251 + }, + { + "epoch": 0.9802881844380403, + "grad_norm": 1.617259594653265, + "learning_rate": 1.115616156042853e-06, + "loss": 0.4252261817455292, + "step": 4252 + }, + { + "epoch": 0.9805187319884726, + "grad_norm": 1.465003293721735, + "learning_rate": 1.115237535055569e-06, + "loss": 0.4745545983314514, + "step": 4253 + }, + { + "epoch": 0.9807492795389049, + "grad_norm": 1.3491129015710808, + "learning_rate": 1.11485889732547e-06, + "loss": 0.4565798044204712, + "step": 4254 + }, + { + "epoch": 0.9809798270893372, + "grad_norm": 1.4736219803772463, + "learning_rate": 1.1144802429075676e-06, + "loss": 0.5221571922302246, + "step": 4255 + }, + { + "epoch": 0.9812103746397695, + "grad_norm": 1.295490167048315, + "learning_rate": 1.114101571856877e-06, + "loss": 0.4520608186721802, + "step": 4256 + }, + { + "epoch": 0.9814409221902017, + "grad_norm": 1.422747328343904, + "learning_rate": 1.1137228842284154e-06, + "loss": 0.5013391375541687, + "step": 4257 + }, + { + "epoch": 0.981671469740634, + "grad_norm": 1.3110684205211343, + "learning_rate": 1.1133441800772015e-06, + "loss": 0.4168761968612671, + "step": 4258 + }, + { + "epoch": 0.9819020172910663, + "grad_norm": 1.3998776606261594, + "learning_rate": 1.1129654594582572e-06, + "loss": 0.49070900678634644, + "step": 4259 + }, + { + "epoch": 0.9821325648414986, + "grad_norm": 1.4441181086061512, + "learning_rate": 1.112586722426607e-06, + "loss": 0.37209975719451904, + "step": 4260 + }, + { + "epoch": 0.9823631123919309, + "grad_norm": 1.334384584407338, + "learning_rate": 1.112207969037277e-06, + "loss": 0.4262358248233795, + "step": 4261 + }, + { + "epoch": 0.9825936599423631, + "grad_norm": 1.3159343622874629, + "learning_rate": 1.1118291993452966e-06, + "loss": 0.43453359603881836, + "step": 4262 + }, + { + "epoch": 0.9828242074927954, + "grad_norm": 1.303119108796015, + "learning_rate": 1.1114504134056967e-06, + "loss": 0.47000735998153687, + "step": 4263 + }, + { + "epoch": 0.9830547550432277, + "grad_norm": 1.4041476427264843, + "learning_rate": 1.1110716112735113e-06, + "loss": 0.5550954341888428, + "step": 4264 + }, + { + "epoch": 0.98328530259366, + "grad_norm": 1.2951063796923508, + "learning_rate": 1.1106927930037759e-06, + "loss": 0.5161601901054382, + "step": 4265 + }, + { + "epoch": 0.9835158501440923, + "grad_norm": 1.269823955589903, + "learning_rate": 1.1103139586515295e-06, + "loss": 0.49598196148872375, + "step": 4266 + }, + { + "epoch": 0.9837463976945245, + "grad_norm": 1.2479693796740894, + "learning_rate": 1.1099351082718127e-06, + "loss": 0.4219573736190796, + "step": 4267 + }, + { + "epoch": 0.9839769452449568, + "grad_norm": 1.5519518921664053, + "learning_rate": 1.1095562419196678e-06, + "loss": 0.4803532660007477, + "step": 4268 + }, + { + "epoch": 0.984207492795389, + "grad_norm": 1.5190785508850868, + "learning_rate": 1.1091773596501408e-06, + "loss": 0.5081897377967834, + "step": 4269 + }, + { + "epoch": 0.9844380403458213, + "grad_norm": 1.40111882638203, + "learning_rate": 1.1087984615182795e-06, + "loss": 0.3814433217048645, + "step": 4270 + }, + { + "epoch": 0.9846685878962536, + "grad_norm": 1.5915243108834793, + "learning_rate": 1.108419547579133e-06, + "loss": 0.43628042936325073, + "step": 4271 + }, + { + "epoch": 0.9848991354466858, + "grad_norm": 1.527363813385048, + "learning_rate": 1.1080406178877543e-06, + "loss": 0.4480942487716675, + "step": 4272 + }, + { + "epoch": 0.9851296829971181, + "grad_norm": 1.4235030288497876, + "learning_rate": 1.1076616724991983e-06, + "loss": 0.4736294746398926, + "step": 4273 + }, + { + "epoch": 0.9853602305475504, + "grad_norm": 1.4964534780647072, + "learning_rate": 1.1072827114685206e-06, + "loss": 0.4558507800102234, + "step": 4274 + }, + { + "epoch": 0.9855907780979827, + "grad_norm": 1.3128324817273487, + "learning_rate": 1.1069037348507812e-06, + "loss": 0.43823373317718506, + "step": 4275 + }, + { + "epoch": 0.985821325648415, + "grad_norm": 1.56555561288982, + "learning_rate": 1.1065247427010414e-06, + "loss": 0.5391641855239868, + "step": 4276 + }, + { + "epoch": 0.9860518731988472, + "grad_norm": 1.4062251041212817, + "learning_rate": 1.106145735074364e-06, + "loss": 0.5643177628517151, + "step": 4277 + }, + { + "epoch": 0.9862824207492795, + "grad_norm": 1.530134885597804, + "learning_rate": 1.1057667120258163e-06, + "loss": 0.49635857343673706, + "step": 4278 + }, + { + "epoch": 0.9865129682997118, + "grad_norm": 1.6216547009902138, + "learning_rate": 1.1053876736104654e-06, + "loss": 0.5055712461471558, + "step": 4279 + }, + { + "epoch": 0.9867435158501441, + "grad_norm": 1.392265005140282, + "learning_rate": 1.1050086198833815e-06, + "loss": 0.4772811830043793, + "step": 4280 + }, + { + "epoch": 0.9869740634005764, + "grad_norm": 1.4534901655783465, + "learning_rate": 1.1046295508996377e-06, + "loss": 0.5212994813919067, + "step": 4281 + }, + { + "epoch": 0.9872046109510086, + "grad_norm": 1.7569793496789246, + "learning_rate": 1.1042504667143089e-06, + "loss": 0.5265607237815857, + "step": 4282 + }, + { + "epoch": 0.9874351585014409, + "grad_norm": 1.5716464684568976, + "learning_rate": 1.1038713673824713e-06, + "loss": 0.5931172370910645, + "step": 4283 + }, + { + "epoch": 0.9876657060518732, + "grad_norm": 1.835044010809655, + "learning_rate": 1.1034922529592046e-06, + "loss": 0.37950432300567627, + "step": 4284 + }, + { + "epoch": 0.9878962536023055, + "grad_norm": 1.513292412653091, + "learning_rate": 1.1031131234995905e-06, + "loss": 0.5008571147918701, + "step": 4285 + }, + { + "epoch": 0.9881268011527378, + "grad_norm": 1.4057938261159666, + "learning_rate": 1.102733979058712e-06, + "loss": 0.5004929304122925, + "step": 4286 + }, + { + "epoch": 0.98835734870317, + "grad_norm": 1.7178021958689218, + "learning_rate": 1.1023548196916553e-06, + "loss": 0.5646581649780273, + "step": 4287 + }, + { + "epoch": 0.9885878962536023, + "grad_norm": 1.3763606862881304, + "learning_rate": 1.1019756454535078e-06, + "loss": 0.4696485996246338, + "step": 4288 + }, + { + "epoch": 0.9888184438040346, + "grad_norm": 1.477189841532629, + "learning_rate": 1.1015964563993598e-06, + "loss": 0.5698454976081848, + "step": 4289 + }, + { + "epoch": 0.9890489913544669, + "grad_norm": 1.4341233392476178, + "learning_rate": 1.1012172525843037e-06, + "loss": 0.44907110929489136, + "step": 4290 + }, + { + "epoch": 0.9892795389048992, + "grad_norm": 1.3904171575550368, + "learning_rate": 1.1008380340634337e-06, + "loss": 0.447033554315567, + "step": 4291 + }, + { + "epoch": 0.9895100864553315, + "grad_norm": 1.289432455784493, + "learning_rate": 1.1004588008918466e-06, + "loss": 0.4011508822441101, + "step": 4292 + }, + { + "epoch": 0.9897406340057637, + "grad_norm": 1.333931067199064, + "learning_rate": 1.1000795531246406e-06, + "loss": 0.4825517237186432, + "step": 4293 + }, + { + "epoch": 0.989971181556196, + "grad_norm": 1.4931399552561173, + "learning_rate": 1.0997002908169163e-06, + "loss": 0.5283117294311523, + "step": 4294 + }, + { + "epoch": 0.9902017291066283, + "grad_norm": 1.5482168678011143, + "learning_rate": 1.0993210140237777e-06, + "loss": 0.48783427476882935, + "step": 4295 + }, + { + "epoch": 0.9904322766570606, + "grad_norm": 1.5566253013073328, + "learning_rate": 1.0989417228003283e-06, + "loss": 0.4624701738357544, + "step": 4296 + }, + { + "epoch": 0.9906628242074927, + "grad_norm": 1.5344364140491837, + "learning_rate": 1.0985624172016759e-06, + "loss": 0.3982674181461334, + "step": 4297 + }, + { + "epoch": 0.990893371757925, + "grad_norm": 1.3341170614191478, + "learning_rate": 1.09818309728293e-06, + "loss": 0.507615864276886, + "step": 4298 + }, + { + "epoch": 0.9911239193083573, + "grad_norm": 1.2567976733410893, + "learning_rate": 1.0978037630992012e-06, + "loss": 0.46235620975494385, + "step": 4299 + }, + { + "epoch": 0.9913544668587896, + "grad_norm": 1.7589261659786115, + "learning_rate": 1.097424414705603e-06, + "loss": 0.4740482568740845, + "step": 4300 + }, + { + "epoch": 0.9915850144092219, + "grad_norm": 1.4084586130190382, + "learning_rate": 1.0970450521572508e-06, + "loss": 0.45482778549194336, + "step": 4301 + }, + { + "epoch": 0.9918155619596541, + "grad_norm": 1.406956819388606, + "learning_rate": 1.096665675509262e-06, + "loss": 0.4501914978027344, + "step": 4302 + }, + { + "epoch": 0.9920461095100864, + "grad_norm": 1.4314348115046323, + "learning_rate": 1.0962862848167564e-06, + "loss": 0.3777778446674347, + "step": 4303 + }, + { + "epoch": 0.9922766570605187, + "grad_norm": 1.6582396145460987, + "learning_rate": 1.0959068801348552e-06, + "loss": 0.4541711211204529, + "step": 4304 + }, + { + "epoch": 0.992507204610951, + "grad_norm": 1.6170437681024339, + "learning_rate": 1.0955274615186818e-06, + "loss": 0.5733718872070312, + "step": 4305 + }, + { + "epoch": 0.9927377521613833, + "grad_norm": 1.6380431554541108, + "learning_rate": 1.0951480290233622e-06, + "loss": 0.5367584228515625, + "step": 4306 + }, + { + "epoch": 0.9929682997118155, + "grad_norm": 1.5021041158954818, + "learning_rate": 1.0947685827040238e-06, + "loss": 0.458662748336792, + "step": 4307 + }, + { + "epoch": 0.9931988472622478, + "grad_norm": 1.5550346941158644, + "learning_rate": 1.0943891226157961e-06, + "loss": 0.457683265209198, + "step": 4308 + }, + { + "epoch": 0.9934293948126801, + "grad_norm": 1.452724464754786, + "learning_rate": 1.094009648813811e-06, + "loss": 0.4951286315917969, + "step": 4309 + }, + { + "epoch": 0.9936599423631124, + "grad_norm": 1.5676198431506971, + "learning_rate": 1.0936301613532016e-06, + "loss": 0.6215308904647827, + "step": 4310 + }, + { + "epoch": 0.9938904899135447, + "grad_norm": 1.90910681269644, + "learning_rate": 1.0932506602891041e-06, + "loss": 0.5981507301330566, + "step": 4311 + }, + { + "epoch": 0.994121037463977, + "grad_norm": 1.5298286854468381, + "learning_rate": 1.0928711456766552e-06, + "loss": 0.42787450551986694, + "step": 4312 + }, + { + "epoch": 0.9943515850144092, + "grad_norm": 1.7160093773370675, + "learning_rate": 1.0924916175709952e-06, + "loss": 0.4542294442653656, + "step": 4313 + }, + { + "epoch": 0.9945821325648415, + "grad_norm": 1.3930381925713722, + "learning_rate": 1.0921120760272654e-06, + "loss": 0.5470898151397705, + "step": 4314 + }, + { + "epoch": 0.9948126801152738, + "grad_norm": 1.6713314091042928, + "learning_rate": 1.0917325211006095e-06, + "loss": 0.469581663608551, + "step": 4315 + }, + { + "epoch": 0.9950432276657061, + "grad_norm": 1.5294920571651511, + "learning_rate": 1.091352952846172e-06, + "loss": 0.44600075483322144, + "step": 4316 + }, + { + "epoch": 0.9952737752161384, + "grad_norm": 1.2648510479760464, + "learning_rate": 1.0909733713191011e-06, + "loss": 0.44750702381134033, + "step": 4317 + }, + { + "epoch": 0.9955043227665706, + "grad_norm": 1.340203440617395, + "learning_rate": 1.0905937765745457e-06, + "loss": 0.5289393067359924, + "step": 4318 + }, + { + "epoch": 0.9957348703170029, + "grad_norm": 1.5607494499506507, + "learning_rate": 1.0902141686676569e-06, + "loss": 0.5402871966362, + "step": 4319 + }, + { + "epoch": 0.9959654178674352, + "grad_norm": 1.5050684797855236, + "learning_rate": 1.089834547653588e-06, + "loss": 0.522109866142273, + "step": 4320 + }, + { + "epoch": 0.9961959654178675, + "grad_norm": 1.4180672798377163, + "learning_rate": 1.0894549135874935e-06, + "loss": 0.41185298562049866, + "step": 4321 + }, + { + "epoch": 0.9964265129682998, + "grad_norm": 1.4566310616338858, + "learning_rate": 1.089075266524531e-06, + "loss": 0.5705811977386475, + "step": 4322 + }, + { + "epoch": 0.996657060518732, + "grad_norm": 1.470813920648617, + "learning_rate": 1.0886956065198586e-06, + "loss": 0.560600996017456, + "step": 4323 + }, + { + "epoch": 0.9968876080691642, + "grad_norm": 1.8636354943852207, + "learning_rate": 1.088315933628638e-06, + "loss": 0.4886099100112915, + "step": 4324 + }, + { + "epoch": 0.9971181556195965, + "grad_norm": 1.465916297308428, + "learning_rate": 1.0879362479060301e-06, + "loss": 0.5046563148498535, + "step": 4325 + }, + { + "epoch": 0.9973487031700288, + "grad_norm": 1.8418351566021292, + "learning_rate": 1.0875565494072003e-06, + "loss": 0.5192925930023193, + "step": 4326 + }, + { + "epoch": 0.997579250720461, + "grad_norm": 1.7255732387200025, + "learning_rate": 1.0871768381873152e-06, + "loss": 0.41461387276649475, + "step": 4327 + }, + { + "epoch": 0.9978097982708933, + "grad_norm": 1.423557709546974, + "learning_rate": 1.0867971143015423e-06, + "loss": 0.4424000680446625, + "step": 4328 + }, + { + "epoch": 0.9980403458213256, + "grad_norm": 1.3391499583858686, + "learning_rate": 1.086417377805051e-06, + "loss": 0.5320795774459839, + "step": 4329 + }, + { + "epoch": 0.9982708933717579, + "grad_norm": 1.32007573291002, + "learning_rate": 1.0860376287530144e-06, + "loss": 0.444003701210022, + "step": 4330 + }, + { + "epoch": 0.9985014409221902, + "grad_norm": 1.4760538613024474, + "learning_rate": 1.085657867200605e-06, + "loss": 0.5243555307388306, + "step": 4331 + }, + { + "epoch": 0.9987319884726225, + "grad_norm": 1.4502781068225374, + "learning_rate": 1.0852780932029985e-06, + "loss": 0.4801725149154663, + "step": 4332 + }, + { + "epoch": 0.9989625360230547, + "grad_norm": 1.2833884817247785, + "learning_rate": 1.0848983068153725e-06, + "loss": 0.45034441351890564, + "step": 4333 + }, + { + "epoch": 0.999193083573487, + "grad_norm": 1.5849689437640124, + "learning_rate": 1.0845185080929055e-06, + "loss": 0.5011032819747925, + "step": 4334 + }, + { + "epoch": 0.9994236311239193, + "grad_norm": 1.4546910942796216, + "learning_rate": 1.0841386970907784e-06, + "loss": 0.5422689914703369, + "step": 4335 + }, + { + "epoch": 0.9996541786743516, + "grad_norm": 1.4006767595332805, + "learning_rate": 1.0837588738641742e-06, + "loss": 0.48687103390693665, + "step": 4336 + }, + { + "epoch": 0.9998847262247839, + "grad_norm": 1.4288954711940178, + "learning_rate": 1.083379038468277e-06, + "loss": 0.47676515579223633, + "step": 4337 + }, + { + "epoch": 1.0, + "grad_norm": 3.106354912289367, + "learning_rate": 1.0829991909582724e-06, + "loss": 0.4786119759082794, + "step": 4338 + }, + { + "epoch": 1.0002305475504323, + "grad_norm": 1.3871434401118967, + "learning_rate": 1.0826193313893492e-06, + "loss": 0.4178802967071533, + "step": 4339 + }, + { + "epoch": 1.0004610951008646, + "grad_norm": 1.5442462018887193, + "learning_rate": 1.0822394598166965e-06, + "loss": 0.5447506904602051, + "step": 4340 + }, + { + "epoch": 1.0006916426512968, + "grad_norm": 1.5787095830935702, + "learning_rate": 1.0818595762955057e-06, + "loss": 0.4838123321533203, + "step": 4341 + }, + { + "epoch": 1.0009221902017291, + "grad_norm": 1.1408717499721517, + "learning_rate": 1.08147968088097e-06, + "loss": 0.4060467481613159, + "step": 4342 + }, + { + "epoch": 1.0011527377521614, + "grad_norm": 1.7147604581934335, + "learning_rate": 1.0810997736282846e-06, + "loss": 0.5802867412567139, + "step": 4343 + }, + { + "epoch": 1.0013832853025937, + "grad_norm": 1.4674016544687594, + "learning_rate": 1.0807198545926456e-06, + "loss": 0.40671414136886597, + "step": 4344 + }, + { + "epoch": 1.001613832853026, + "grad_norm": 1.4142081560028723, + "learning_rate": 1.0803399238292511e-06, + "loss": 0.49008500576019287, + "step": 4345 + }, + { + "epoch": 1.0018443804034582, + "grad_norm": 1.4937256123283484, + "learning_rate": 1.0799599813933018e-06, + "loss": 0.45358604192733765, + "step": 4346 + }, + { + "epoch": 1.0020749279538905, + "grad_norm": 1.4709246428352223, + "learning_rate": 1.0795800273399991e-06, + "loss": 0.44286537170410156, + "step": 4347 + }, + { + "epoch": 1.0023054755043228, + "grad_norm": 1.4440445642380524, + "learning_rate": 1.079200061724546e-06, + "loss": 0.41197699308395386, + "step": 4348 + }, + { + "epoch": 1.002536023054755, + "grad_norm": 1.4020363593613934, + "learning_rate": 1.0788200846021483e-06, + "loss": 0.5265473127365112, + "step": 4349 + }, + { + "epoch": 1.0027665706051874, + "grad_norm": 1.626211002371467, + "learning_rate": 1.0784400960280115e-06, + "loss": 0.5782182216644287, + "step": 4350 + }, + { + "epoch": 1.0029971181556196, + "grad_norm": 1.5244546461931463, + "learning_rate": 1.0780600960573452e-06, + "loss": 0.46403801441192627, + "step": 4351 + }, + { + "epoch": 1.003227665706052, + "grad_norm": 1.2339365132906865, + "learning_rate": 1.0776800847453592e-06, + "loss": 0.469561368227005, + "step": 4352 + }, + { + "epoch": 1.0034582132564842, + "grad_norm": 1.2673633475315595, + "learning_rate": 1.0773000621472646e-06, + "loss": 0.5608446002006531, + "step": 4353 + }, + { + "epoch": 1.0036887608069165, + "grad_norm": 1.6480925340630606, + "learning_rate": 1.0769200283182752e-06, + "loss": 0.534286379814148, + "step": 4354 + }, + { + "epoch": 1.0039193083573488, + "grad_norm": 1.592904048213369, + "learning_rate": 1.076539983313606e-06, + "loss": 0.4879153370857239, + "step": 4355 + }, + { + "epoch": 1.004149855907781, + "grad_norm": 1.7289069037550475, + "learning_rate": 1.076159927188473e-06, + "loss": 0.4208434224128723, + "step": 4356 + }, + { + "epoch": 1.0043804034582133, + "grad_norm": 1.4619714495069853, + "learning_rate": 1.0757798599980954e-06, + "loss": 0.5276066064834595, + "step": 4357 + }, + { + "epoch": 1.0046109510086456, + "grad_norm": 1.476134912638207, + "learning_rate": 1.0753997817976923e-06, + "loss": 0.44685155153274536, + "step": 4358 + }, + { + "epoch": 1.004841498559078, + "grad_norm": 1.3336692974635336, + "learning_rate": 1.0750196926424852e-06, + "loss": 0.4210960865020752, + "step": 4359 + }, + { + "epoch": 1.0050720461095102, + "grad_norm": 1.487533331972068, + "learning_rate": 1.0746395925876972e-06, + "loss": 0.5184726715087891, + "step": 4360 + }, + { + "epoch": 1.0053025936599425, + "grad_norm": 1.4533904323160831, + "learning_rate": 1.0742594816885528e-06, + "loss": 0.46501922607421875, + "step": 4361 + }, + { + "epoch": 1.0055331412103747, + "grad_norm": 1.4885879141852645, + "learning_rate": 1.0738793600002781e-06, + "loss": 0.43683815002441406, + "step": 4362 + }, + { + "epoch": 1.005763688760807, + "grad_norm": 1.6128856913318383, + "learning_rate": 1.0734992275781013e-06, + "loss": 0.48467642068862915, + "step": 4363 + }, + { + "epoch": 1.0059942363112393, + "grad_norm": 1.4816149851760412, + "learning_rate": 1.0731190844772511e-06, + "loss": 0.4620075225830078, + "step": 4364 + }, + { + "epoch": 1.0062247838616716, + "grad_norm": 1.8688222432221726, + "learning_rate": 1.0727389307529588e-06, + "loss": 0.39401185512542725, + "step": 4365 + }, + { + "epoch": 1.0064553314121036, + "grad_norm": 1.7537121860722333, + "learning_rate": 1.072358766460456e-06, + "loss": 0.5232348442077637, + "step": 4366 + }, + { + "epoch": 1.006685878962536, + "grad_norm": 1.473686642896004, + "learning_rate": 1.0719785916549778e-06, + "loss": 0.5005226731300354, + "step": 4367 + }, + { + "epoch": 1.0069164265129682, + "grad_norm": 1.3600032880524948, + "learning_rate": 1.0715984063917589e-06, + "loss": 0.505985677242279, + "step": 4368 + }, + { + "epoch": 1.0071469740634005, + "grad_norm": 1.5263088445062156, + "learning_rate": 1.0712182107260362e-06, + "loss": 0.4981151819229126, + "step": 4369 + }, + { + "epoch": 1.0073775216138328, + "grad_norm": 1.2501548242250369, + "learning_rate": 1.070838004713048e-06, + "loss": 0.4614086449146271, + "step": 4370 + }, + { + "epoch": 1.007608069164265, + "grad_norm": 1.439429049840933, + "learning_rate": 1.0704577884080352e-06, + "loss": 0.4255724549293518, + "step": 4371 + }, + { + "epoch": 1.0078386167146973, + "grad_norm": 1.6888152805440162, + "learning_rate": 1.0700775618662385e-06, + "loss": 0.4840260148048401, + "step": 4372 + }, + { + "epoch": 1.0080691642651296, + "grad_norm": 1.583402549772347, + "learning_rate": 1.069697325142901e-06, + "loss": 0.5588693618774414, + "step": 4373 + }, + { + "epoch": 1.0082997118155619, + "grad_norm": 1.5157181906879742, + "learning_rate": 1.0693170782932674e-06, + "loss": 0.5086601972579956, + "step": 4374 + }, + { + "epoch": 1.0085302593659942, + "grad_norm": 1.439160179137555, + "learning_rate": 1.0689368213725834e-06, + "loss": 0.3936936855316162, + "step": 4375 + }, + { + "epoch": 1.0087608069164264, + "grad_norm": 1.7344419771703223, + "learning_rate": 1.0685565544360962e-06, + "loss": 0.5114755034446716, + "step": 4376 + }, + { + "epoch": 1.0089913544668587, + "grad_norm": 1.2434564308153218, + "learning_rate": 1.0681762775390548e-06, + "loss": 0.524695873260498, + "step": 4377 + }, + { + "epoch": 1.009221902017291, + "grad_norm": 1.5809148564841888, + "learning_rate": 1.0677959907367095e-06, + "loss": 0.4670463800430298, + "step": 4378 + }, + { + "epoch": 1.0094524495677233, + "grad_norm": 1.5730500794833102, + "learning_rate": 1.0674156940843123e-06, + "loss": 0.5309491157531738, + "step": 4379 + }, + { + "epoch": 1.0096829971181556, + "grad_norm": 1.4093681895946824, + "learning_rate": 1.0670353876371158e-06, + "loss": 0.422494113445282, + "step": 4380 + }, + { + "epoch": 1.0099135446685878, + "grad_norm": 1.4365767688932187, + "learning_rate": 1.066655071450375e-06, + "loss": 0.5568174123764038, + "step": 4381 + }, + { + "epoch": 1.0101440922190201, + "grad_norm": 1.4363708269644129, + "learning_rate": 1.0662747455793458e-06, + "loss": 0.4783478379249573, + "step": 4382 + }, + { + "epoch": 1.0103746397694524, + "grad_norm": 1.4277915820954656, + "learning_rate": 1.0658944100792851e-06, + "loss": 0.5188988447189331, + "step": 4383 + }, + { + "epoch": 1.0106051873198847, + "grad_norm": 1.8516308226296496, + "learning_rate": 1.0655140650054524e-06, + "loss": 0.44151294231414795, + "step": 4384 + }, + { + "epoch": 1.010835734870317, + "grad_norm": 1.317137900872795, + "learning_rate": 1.0651337104131076e-06, + "loss": 0.4886103868484497, + "step": 4385 + }, + { + "epoch": 1.0110662824207493, + "grad_norm": 1.287712297930063, + "learning_rate": 1.064753346357512e-06, + "loss": 0.4710986018180847, + "step": 4386 + }, + { + "epoch": 1.0112968299711815, + "grad_norm": 1.5084820581681782, + "learning_rate": 1.064372972893929e-06, + "loss": 0.5069217085838318, + "step": 4387 + }, + { + "epoch": 1.0115273775216138, + "grad_norm": 1.6432564360854036, + "learning_rate": 1.0639925900776226e-06, + "loss": 0.49354636669158936, + "step": 4388 + }, + { + "epoch": 1.011757925072046, + "grad_norm": 1.3698200894854542, + "learning_rate": 1.0636121979638587e-06, + "loss": 0.41078031063079834, + "step": 4389 + }, + { + "epoch": 1.0119884726224784, + "grad_norm": 1.5910628800230602, + "learning_rate": 1.0632317966079043e-06, + "loss": 0.5079636573791504, + "step": 4390 + }, + { + "epoch": 1.0122190201729107, + "grad_norm": 1.501052153907989, + "learning_rate": 1.0628513860650272e-06, + "loss": 0.4242056906223297, + "step": 4391 + }, + { + "epoch": 1.012449567723343, + "grad_norm": 1.4571526068933165, + "learning_rate": 1.0624709663904976e-06, + "loss": 0.4912078380584717, + "step": 4392 + }, + { + "epoch": 1.0126801152737752, + "grad_norm": 1.432903959990896, + "learning_rate": 1.062090537639587e-06, + "loss": 0.48734402656555176, + "step": 4393 + }, + { + "epoch": 1.0129106628242075, + "grad_norm": 1.1916015715246997, + "learning_rate": 1.0617100998675668e-06, + "loss": 0.35444220900535583, + "step": 4394 + }, + { + "epoch": 1.0131412103746398, + "grad_norm": 1.4116354536956222, + "learning_rate": 1.0613296531297106e-06, + "loss": 0.4598267674446106, + "step": 4395 + }, + { + "epoch": 1.013371757925072, + "grad_norm": 1.493649072074075, + "learning_rate": 1.0609491974812944e-06, + "loss": 0.5195865631103516, + "step": 4396 + }, + { + "epoch": 1.0136023054755043, + "grad_norm": 1.766065577527857, + "learning_rate": 1.060568732977594e-06, + "loss": 0.5357420444488525, + "step": 4397 + }, + { + "epoch": 1.0138328530259366, + "grad_norm": 1.3453143231303708, + "learning_rate": 1.0601882596738864e-06, + "loss": 0.4296334981918335, + "step": 4398 + }, + { + "epoch": 1.014063400576369, + "grad_norm": 1.5493487080133135, + "learning_rate": 1.0598077776254508e-06, + "loss": 0.46664875745773315, + "step": 4399 + }, + { + "epoch": 1.0142939481268012, + "grad_norm": 1.304042047949052, + "learning_rate": 1.0594272868875676e-06, + "loss": 0.4178355932235718, + "step": 4400 + }, + { + "epoch": 1.0145244956772335, + "grad_norm": 1.2861059048068348, + "learning_rate": 1.0590467875155173e-06, + "loss": 0.4932866394519806, + "step": 4401 + }, + { + "epoch": 1.0147550432276657, + "grad_norm": 1.7354787006202048, + "learning_rate": 1.0586662795645836e-06, + "loss": 0.5387941598892212, + "step": 4402 + }, + { + "epoch": 1.014985590778098, + "grad_norm": 1.665137984348258, + "learning_rate": 1.0582857630900491e-06, + "loss": 0.5688868761062622, + "step": 4403 + }, + { + "epoch": 1.0152161383285303, + "grad_norm": 1.477824761922792, + "learning_rate": 1.0579052381472002e-06, + "loss": 0.45884644985198975, + "step": 4404 + }, + { + "epoch": 1.0154466858789626, + "grad_norm": 1.7295708482538712, + "learning_rate": 1.0575247047913222e-06, + "loss": 0.4536563754081726, + "step": 4405 + }, + { + "epoch": 1.0156772334293949, + "grad_norm": 1.6222586501020617, + "learning_rate": 1.0571441630777034e-06, + "loss": 0.39148998260498047, + "step": 4406 + }, + { + "epoch": 1.0159077809798271, + "grad_norm": 1.640574201613157, + "learning_rate": 1.0567636130616318e-06, + "loss": 0.5296084880828857, + "step": 4407 + }, + { + "epoch": 1.0161383285302594, + "grad_norm": 2.1458672337974254, + "learning_rate": 1.0563830547983977e-06, + "loss": 0.5447714328765869, + "step": 4408 + }, + { + "epoch": 1.0163688760806917, + "grad_norm": 1.437566589215361, + "learning_rate": 1.0560024883432926e-06, + "loss": 0.5003344416618347, + "step": 4409 + }, + { + "epoch": 1.016599423631124, + "grad_norm": 1.4621660414589328, + "learning_rate": 1.0556219137516082e-06, + "loss": 0.47146621346473694, + "step": 4410 + }, + { + "epoch": 1.0168299711815563, + "grad_norm": 1.370045824066682, + "learning_rate": 1.0552413310786383e-06, + "loss": 0.44027960300445557, + "step": 4411 + }, + { + "epoch": 1.0170605187319885, + "grad_norm": 1.35500440689804, + "learning_rate": 1.054860740379678e-06, + "loss": 0.3912389576435089, + "step": 4412 + }, + { + "epoch": 1.0172910662824208, + "grad_norm": 1.4370390292161166, + "learning_rate": 1.0544801417100227e-06, + "loss": 0.4845673441886902, + "step": 4413 + }, + { + "epoch": 1.0175216138328531, + "grad_norm": 1.546009165239184, + "learning_rate": 1.0540995351249694e-06, + "loss": 0.46626967191696167, + "step": 4414 + }, + { + "epoch": 1.0177521613832854, + "grad_norm": 1.5178833656938138, + "learning_rate": 1.0537189206798168e-06, + "loss": 0.4380578398704529, + "step": 4415 + }, + { + "epoch": 1.0179827089337177, + "grad_norm": 1.566646806091141, + "learning_rate": 1.0533382984298635e-06, + "loss": 0.4218786656856537, + "step": 4416 + }, + { + "epoch": 1.01821325648415, + "grad_norm": 1.2308252036857967, + "learning_rate": 1.0529576684304104e-06, + "loss": 0.4456557631492615, + "step": 4417 + }, + { + "epoch": 1.0184438040345822, + "grad_norm": 1.5800328705633249, + "learning_rate": 1.052577030736759e-06, + "loss": 0.5125781297683716, + "step": 4418 + }, + { + "epoch": 1.0186743515850145, + "grad_norm": 1.2883404092674424, + "learning_rate": 1.052196385404212e-06, + "loss": 0.45318183302879333, + "step": 4419 + }, + { + "epoch": 1.0189048991354466, + "grad_norm": 1.3112604347930223, + "learning_rate": 1.0518157324880732e-06, + "loss": 0.41572675108909607, + "step": 4420 + }, + { + "epoch": 1.0191354466858789, + "grad_norm": 1.6426527091173992, + "learning_rate": 1.0514350720436474e-06, + "loss": 0.4221467971801758, + "step": 4421 + }, + { + "epoch": 1.0193659942363111, + "grad_norm": 1.7855699658311184, + "learning_rate": 1.0510544041262411e-06, + "loss": 0.5241785049438477, + "step": 4422 + }, + { + "epoch": 1.0195965417867434, + "grad_norm": 1.7878207477876489, + "learning_rate": 1.0506737287911603e-06, + "loss": 0.5138394832611084, + "step": 4423 + }, + { + "epoch": 1.0198270893371757, + "grad_norm": 1.3052170365612978, + "learning_rate": 1.0502930460937141e-06, + "loss": 0.4385989308357239, + "step": 4424 + }, + { + "epoch": 1.020057636887608, + "grad_norm": 1.8389031959070536, + "learning_rate": 1.0499123560892119e-06, + "loss": 0.5446112155914307, + "step": 4425 + }, + { + "epoch": 1.0202881844380403, + "grad_norm": 1.5968459189575308, + "learning_rate": 1.0495316588329632e-06, + "loss": 0.42088472843170166, + "step": 4426 + }, + { + "epoch": 1.0205187319884725, + "grad_norm": 1.380239084591332, + "learning_rate": 1.04915095438028e-06, + "loss": 0.40304017066955566, + "step": 4427 + }, + { + "epoch": 1.0207492795389048, + "grad_norm": 1.3840006399284464, + "learning_rate": 1.0487702427864742e-06, + "loss": 0.4344356060028076, + "step": 4428 + }, + { + "epoch": 1.020979827089337, + "grad_norm": 1.4981806010192171, + "learning_rate": 1.0483895241068595e-06, + "loss": 0.49237990379333496, + "step": 4429 + }, + { + "epoch": 1.0212103746397694, + "grad_norm": 1.695916606130073, + "learning_rate": 1.0480087983967503e-06, + "loss": 0.5327268242835999, + "step": 4430 + }, + { + "epoch": 1.0214409221902017, + "grad_norm": 1.6144239446081834, + "learning_rate": 1.0476280657114621e-06, + "loss": 0.5176074504852295, + "step": 4431 + }, + { + "epoch": 1.021671469740634, + "grad_norm": 1.5447018911477126, + "learning_rate": 1.0472473261063116e-06, + "loss": 0.4684191942214966, + "step": 4432 + }, + { + "epoch": 1.0219020172910662, + "grad_norm": 1.3925875102152703, + "learning_rate": 1.046866579636616e-06, + "loss": 0.38662853837013245, + "step": 4433 + }, + { + "epoch": 1.0221325648414985, + "grad_norm": 1.6044931484850973, + "learning_rate": 1.0464858263576939e-06, + "loss": 0.5248677730560303, + "step": 4434 + }, + { + "epoch": 1.0223631123919308, + "grad_norm": 1.546034740596121, + "learning_rate": 1.046105066324865e-06, + "loss": 0.4143308401107788, + "step": 4435 + }, + { + "epoch": 1.022593659942363, + "grad_norm": 1.572368124431397, + "learning_rate": 1.0457242995934492e-06, + "loss": 0.5042206048965454, + "step": 4436 + }, + { + "epoch": 1.0228242074927953, + "grad_norm": 1.2279861153562417, + "learning_rate": 1.0453435262187686e-06, + "loss": 0.47694748640060425, + "step": 4437 + }, + { + "epoch": 1.0230547550432276, + "grad_norm": 1.7412013285642873, + "learning_rate": 1.0449627462561456e-06, + "loss": 0.3953937292098999, + "step": 4438 + }, + { + "epoch": 1.02328530259366, + "grad_norm": 1.3965038538033099, + "learning_rate": 1.0445819597609028e-06, + "loss": 0.48966118693351746, + "step": 4439 + }, + { + "epoch": 1.0235158501440922, + "grad_norm": 1.28557182576021, + "learning_rate": 1.0442011667883652e-06, + "loss": 0.36514124274253845, + "step": 4440 + }, + { + "epoch": 1.0237463976945245, + "grad_norm": 1.401188513600808, + "learning_rate": 1.0438203673938582e-06, + "loss": 0.4037533402442932, + "step": 4441 + }, + { + "epoch": 1.0239769452449567, + "grad_norm": 1.3235111960085384, + "learning_rate": 1.0434395616327072e-06, + "loss": 0.4208472967147827, + "step": 4442 + }, + { + "epoch": 1.024207492795389, + "grad_norm": 1.5996053630906766, + "learning_rate": 1.04305874956024e-06, + "loss": 0.524165153503418, + "step": 4443 + }, + { + "epoch": 1.0244380403458213, + "grad_norm": 1.2942402749474435, + "learning_rate": 1.0426779312317843e-06, + "loss": 0.39956557750701904, + "step": 4444 + }, + { + "epoch": 1.0246685878962536, + "grad_norm": 1.4600178018333099, + "learning_rate": 1.0422971067026694e-06, + "loss": 0.45270049571990967, + "step": 4445 + }, + { + "epoch": 1.0248991354466859, + "grad_norm": 1.3235016423492378, + "learning_rate": 1.0419162760282247e-06, + "loss": 0.4914137125015259, + "step": 4446 + }, + { + "epoch": 1.0251296829971182, + "grad_norm": 1.49486336775705, + "learning_rate": 1.0415354392637813e-06, + "loss": 0.5092133283615112, + "step": 4447 + }, + { + "epoch": 1.0253602305475504, + "grad_norm": 1.3532599291496046, + "learning_rate": 1.0411545964646705e-06, + "loss": 0.48098695278167725, + "step": 4448 + }, + { + "epoch": 1.0255907780979827, + "grad_norm": 1.6127809694648108, + "learning_rate": 1.040773747686225e-06, + "loss": 0.5379494428634644, + "step": 4449 + }, + { + "epoch": 1.025821325648415, + "grad_norm": 1.6433258148389769, + "learning_rate": 1.0403928929837784e-06, + "loss": 0.42458122968673706, + "step": 4450 + }, + { + "epoch": 1.0260518731988473, + "grad_norm": 1.78483029766293, + "learning_rate": 1.0400120324126645e-06, + "loss": 0.49995291233062744, + "step": 4451 + }, + { + "epoch": 1.0262824207492796, + "grad_norm": 1.736176405485827, + "learning_rate": 1.039631166028218e-06, + "loss": 0.500437319278717, + "step": 4452 + }, + { + "epoch": 1.0265129682997118, + "grad_norm": 1.7033831160422268, + "learning_rate": 1.0392502938857762e-06, + "loss": 0.5602716207504272, + "step": 4453 + }, + { + "epoch": 1.0267435158501441, + "grad_norm": 1.4186875936390437, + "learning_rate": 1.0388694160406745e-06, + "loss": 0.49821144342422485, + "step": 4454 + }, + { + "epoch": 1.0269740634005764, + "grad_norm": 1.6203380931256655, + "learning_rate": 1.0384885325482512e-06, + "loss": 0.4660237431526184, + "step": 4455 + }, + { + "epoch": 1.0272046109510087, + "grad_norm": 1.8296722968299048, + "learning_rate": 1.0381076434638443e-06, + "loss": 0.564801037311554, + "step": 4456 + }, + { + "epoch": 1.027435158501441, + "grad_norm": 1.3996510699189642, + "learning_rate": 1.0377267488427932e-06, + "loss": 0.4113255441188812, + "step": 4457 + }, + { + "epoch": 1.0276657060518732, + "grad_norm": 1.3659807096425378, + "learning_rate": 1.0373458487404382e-06, + "loss": 0.4769394099712372, + "step": 4458 + }, + { + "epoch": 1.0278962536023055, + "grad_norm": 1.3384975798165966, + "learning_rate": 1.0369649432121197e-06, + "loss": 0.42901676893234253, + "step": 4459 + }, + { + "epoch": 1.0281268011527378, + "grad_norm": 1.5476233273543298, + "learning_rate": 1.0365840323131795e-06, + "loss": 0.46431535482406616, + "step": 4460 + }, + { + "epoch": 1.02835734870317, + "grad_norm": 1.2099397488415262, + "learning_rate": 1.03620311609896e-06, + "loss": 0.44773775339126587, + "step": 4461 + }, + { + "epoch": 1.0285878962536024, + "grad_norm": 1.6798930628346755, + "learning_rate": 1.0358221946248043e-06, + "loss": 0.5518221855163574, + "step": 4462 + }, + { + "epoch": 1.0288184438040346, + "grad_norm": 1.480004409369121, + "learning_rate": 1.0354412679460568e-06, + "loss": 0.5178619623184204, + "step": 4463 + }, + { + "epoch": 1.029048991354467, + "grad_norm": 1.609320522926487, + "learning_rate": 1.0350603361180609e-06, + "loss": 0.49681615829467773, + "step": 4464 + }, + { + "epoch": 1.0292795389048992, + "grad_norm": 1.4735231521597927, + "learning_rate": 1.0346793991961634e-06, + "loss": 0.40041211247444153, + "step": 4465 + }, + { + "epoch": 1.0295100864553315, + "grad_norm": 1.2768035889523142, + "learning_rate": 1.0342984572357102e-06, + "loss": 0.4649925231933594, + "step": 4466 + }, + { + "epoch": 1.0297406340057638, + "grad_norm": 1.7706832584466219, + "learning_rate": 1.0339175102920478e-06, + "loss": 0.6232483386993408, + "step": 4467 + }, + { + "epoch": 1.029971181556196, + "grad_norm": 1.4245956852693253, + "learning_rate": 1.0335365584205239e-06, + "loss": 0.5647813081741333, + "step": 4468 + }, + { + "epoch": 1.0302017291066283, + "grad_norm": 1.3076641167650391, + "learning_rate": 1.0331556016764868e-06, + "loss": 0.36746442317962646, + "step": 4469 + }, + { + "epoch": 1.0304322766570606, + "grad_norm": 1.5775380660887826, + "learning_rate": 1.0327746401152858e-06, + "loss": 0.4890482723712921, + "step": 4470 + }, + { + "epoch": 1.030662824207493, + "grad_norm": 1.733394771520275, + "learning_rate": 1.0323936737922707e-06, + "loss": 0.4886651635169983, + "step": 4471 + }, + { + "epoch": 1.0308933717579252, + "grad_norm": 1.6224830403306456, + "learning_rate": 1.0320127027627917e-06, + "loss": 0.4815050959587097, + "step": 4472 + }, + { + "epoch": 1.0311239193083575, + "grad_norm": 1.6980757353739753, + "learning_rate": 1.0316317270821999e-06, + "loss": 0.48830682039260864, + "step": 4473 + }, + { + "epoch": 1.0313544668587897, + "grad_norm": 1.3969476743395202, + "learning_rate": 1.0312507468058473e-06, + "loss": 0.4594195783138275, + "step": 4474 + }, + { + "epoch": 1.031585014409222, + "grad_norm": 1.607466777328964, + "learning_rate": 1.0308697619890864e-06, + "loss": 0.5010625123977661, + "step": 4475 + }, + { + "epoch": 1.0318155619596543, + "grad_norm": 1.283561854988696, + "learning_rate": 1.0304887726872699e-06, + "loss": 0.4723336696624756, + "step": 4476 + }, + { + "epoch": 1.0320461095100864, + "grad_norm": 1.3381329229380114, + "learning_rate": 1.0301077789557519e-06, + "loss": 0.45434504747390747, + "step": 4477 + }, + { + "epoch": 1.0322766570605186, + "grad_norm": 1.377250204828909, + "learning_rate": 1.029726780849887e-06, + "loss": 0.48399144411087036, + "step": 4478 + }, + { + "epoch": 1.032507204610951, + "grad_norm": 1.3862763857384357, + "learning_rate": 1.0293457784250304e-06, + "loss": 0.41350722312927246, + "step": 4479 + }, + { + "epoch": 1.0327377521613832, + "grad_norm": 1.468557287853429, + "learning_rate": 1.028964771736537e-06, + "loss": 0.4828647971153259, + "step": 4480 + }, + { + "epoch": 1.0329682997118155, + "grad_norm": 1.3435400829034796, + "learning_rate": 1.0285837608397637e-06, + "loss": 0.48510146141052246, + "step": 4481 + }, + { + "epoch": 1.0331988472622478, + "grad_norm": 1.4106226741729768, + "learning_rate": 1.0282027457900675e-06, + "loss": 0.5286427140235901, + "step": 4482 + }, + { + "epoch": 1.03342939481268, + "grad_norm": 1.6836246338288663, + "learning_rate": 1.0278217266428054e-06, + "loss": 0.5355654954910278, + "step": 4483 + }, + { + "epoch": 1.0336599423631123, + "grad_norm": 1.388122836127959, + "learning_rate": 1.0274407034533361e-06, + "loss": 0.501393735408783, + "step": 4484 + }, + { + "epoch": 1.0338904899135446, + "grad_norm": 1.407159922284391, + "learning_rate": 1.0270596762770178e-06, + "loss": 0.5080795288085938, + "step": 4485 + }, + { + "epoch": 1.0341210374639769, + "grad_norm": 1.5608351056758434, + "learning_rate": 1.0266786451692103e-06, + "loss": 0.4568654000759125, + "step": 4486 + }, + { + "epoch": 1.0343515850144092, + "grad_norm": 1.5451894749796247, + "learning_rate": 1.026297610185273e-06, + "loss": 0.5166634321212769, + "step": 4487 + }, + { + "epoch": 1.0345821325648414, + "grad_norm": 1.625027529753705, + "learning_rate": 1.0259165713805667e-06, + "loss": 0.5145444869995117, + "step": 4488 + }, + { + "epoch": 1.0348126801152737, + "grad_norm": 1.6809207183251986, + "learning_rate": 1.0255355288104518e-06, + "loss": 0.532972514629364, + "step": 4489 + }, + { + "epoch": 1.035043227665706, + "grad_norm": 1.4595457080171315, + "learning_rate": 1.0251544825302905e-06, + "loss": 0.47362518310546875, + "step": 4490 + }, + { + "epoch": 1.0352737752161383, + "grad_norm": 1.7731872017548376, + "learning_rate": 1.0247734325954448e-06, + "loss": 0.41648489236831665, + "step": 4491 + }, + { + "epoch": 1.0355043227665706, + "grad_norm": 1.3859772973760465, + "learning_rate": 1.0243923790612768e-06, + "loss": 0.41394931077957153, + "step": 4492 + }, + { + "epoch": 1.0357348703170028, + "grad_norm": 1.4656217428361447, + "learning_rate": 1.0240113219831496e-06, + "loss": 0.49118804931640625, + "step": 4493 + }, + { + "epoch": 1.0359654178674351, + "grad_norm": 1.6694899845625315, + "learning_rate": 1.0236302614164275e-06, + "loss": 0.5071209669113159, + "step": 4494 + }, + { + "epoch": 1.0361959654178674, + "grad_norm": 1.5269400513380635, + "learning_rate": 1.023249197416474e-06, + "loss": 0.43428805470466614, + "step": 4495 + }, + { + "epoch": 1.0364265129682997, + "grad_norm": 1.599655780691587, + "learning_rate": 1.0228681300386541e-06, + "loss": 0.4495439827442169, + "step": 4496 + }, + { + "epoch": 1.036657060518732, + "grad_norm": 1.6914403127897477, + "learning_rate": 1.0224870593383326e-06, + "loss": 0.5649271011352539, + "step": 4497 + }, + { + "epoch": 1.0368876080691642, + "grad_norm": 1.7669982984032149, + "learning_rate": 1.0221059853708758e-06, + "loss": 0.5247937440872192, + "step": 4498 + }, + { + "epoch": 1.0371181556195965, + "grad_norm": 1.9131453841840644, + "learning_rate": 1.0217249081916489e-06, + "loss": 0.5018899440765381, + "step": 4499 + }, + { + "epoch": 1.0373487031700288, + "grad_norm": 1.6895237166192871, + "learning_rate": 1.0213438278560188e-06, + "loss": 0.3834974765777588, + "step": 4500 + }, + { + "epoch": 1.037579250720461, + "grad_norm": 1.404878328837829, + "learning_rate": 1.0209627444193527e-06, + "loss": 0.45946773886680603, + "step": 4501 + }, + { + "epoch": 1.0378097982708934, + "grad_norm": 1.3092615059102766, + "learning_rate": 1.020581657937018e-06, + "loss": 0.4812207520008087, + "step": 4502 + }, + { + "epoch": 1.0380403458213256, + "grad_norm": 1.5145567812643126, + "learning_rate": 1.0202005684643821e-06, + "loss": 0.5373339653015137, + "step": 4503 + }, + { + "epoch": 1.038270893371758, + "grad_norm": 1.4172584982767447, + "learning_rate": 1.0198194760568144e-06, + "loss": 0.5129649639129639, + "step": 4504 + }, + { + "epoch": 1.0385014409221902, + "grad_norm": 1.2932618611676632, + "learning_rate": 1.0194383807696824e-06, + "loss": 0.41758543252944946, + "step": 4505 + }, + { + "epoch": 1.0387319884726225, + "grad_norm": 1.3882475624615442, + "learning_rate": 1.0190572826583559e-06, + "loss": 0.39521563053131104, + "step": 4506 + }, + { + "epoch": 1.0389625360230548, + "grad_norm": 1.524715494193292, + "learning_rate": 1.018676181778205e-06, + "loss": 0.4855652451515198, + "step": 4507 + }, + { + "epoch": 1.039193083573487, + "grad_norm": 1.423908966609684, + "learning_rate": 1.0182950781845988e-06, + "loss": 0.4977639317512512, + "step": 4508 + }, + { + "epoch": 1.0394236311239193, + "grad_norm": 1.5272276294299776, + "learning_rate": 1.0179139719329079e-06, + "loss": 0.543816089630127, + "step": 4509 + }, + { + "epoch": 1.0396541786743516, + "grad_norm": 1.585484219875182, + "learning_rate": 1.0175328630785035e-06, + "loss": 0.4400789141654968, + "step": 4510 + }, + { + "epoch": 1.039884726224784, + "grad_norm": 1.5540847499410908, + "learning_rate": 1.0171517516767564e-06, + "loss": 0.5500770211219788, + "step": 4511 + }, + { + "epoch": 1.0401152737752162, + "grad_norm": 1.4513336491340072, + "learning_rate": 1.0167706377830384e-06, + "loss": 0.43814778327941895, + "step": 4512 + }, + { + "epoch": 1.0403458213256485, + "grad_norm": 1.6266107876638078, + "learning_rate": 1.016389521452721e-06, + "loss": 0.538263738155365, + "step": 4513 + }, + { + "epoch": 1.0405763688760807, + "grad_norm": 1.31961944446499, + "learning_rate": 1.0160084027411766e-06, + "loss": 0.4274001717567444, + "step": 4514 + }, + { + "epoch": 1.040806916426513, + "grad_norm": 1.4814208238012136, + "learning_rate": 1.015627281703778e-06, + "loss": 0.5197881460189819, + "step": 4515 + }, + { + "epoch": 1.0410374639769453, + "grad_norm": 1.6946538046452169, + "learning_rate": 1.0152461583958979e-06, + "loss": 0.43719834089279175, + "step": 4516 + }, + { + "epoch": 1.0412680115273776, + "grad_norm": 1.4416740952187463, + "learning_rate": 1.0148650328729096e-06, + "loss": 0.4394899904727936, + "step": 4517 + }, + { + "epoch": 1.0414985590778099, + "grad_norm": 1.579992854212008, + "learning_rate": 1.0144839051901864e-06, + "loss": 0.3841872811317444, + "step": 4518 + }, + { + "epoch": 1.0417291066282421, + "grad_norm": 1.3549804204711746, + "learning_rate": 1.0141027754031023e-06, + "loss": 0.4959898591041565, + "step": 4519 + }, + { + "epoch": 1.0419596541786744, + "grad_norm": 1.4775766757395223, + "learning_rate": 1.0137216435670324e-06, + "loss": 0.4911465346813202, + "step": 4520 + }, + { + "epoch": 1.0421902017291067, + "grad_norm": 1.4224879080779271, + "learning_rate": 1.01334050973735e-06, + "loss": 0.5083625316619873, + "step": 4521 + }, + { + "epoch": 1.042420749279539, + "grad_norm": 1.5446913974368652, + "learning_rate": 1.01295937396943e-06, + "loss": 0.446804940700531, + "step": 4522 + }, + { + "epoch": 1.0426512968299713, + "grad_norm": 1.414836456184516, + "learning_rate": 1.0125782363186482e-06, + "loss": 0.3899458646774292, + "step": 4523 + }, + { + "epoch": 1.0428818443804035, + "grad_norm": 1.3771886132474889, + "learning_rate": 1.0121970968403794e-06, + "loss": 0.4964328408241272, + "step": 4524 + }, + { + "epoch": 1.0431123919308358, + "grad_norm": 1.8083793225311697, + "learning_rate": 1.0118159555899993e-06, + "loss": 0.4805730879306793, + "step": 4525 + }, + { + "epoch": 1.043342939481268, + "grad_norm": 1.3549553002461836, + "learning_rate": 1.0114348126228837e-06, + "loss": 0.4567984938621521, + "step": 4526 + }, + { + "epoch": 1.0435734870317004, + "grad_norm": 1.809108804929047, + "learning_rate": 1.0110536679944087e-06, + "loss": 0.5105732679367065, + "step": 4527 + }, + { + "epoch": 1.0438040345821327, + "grad_norm": 1.6641150664487896, + "learning_rate": 1.010672521759951e-06, + "loss": 0.49492210149765015, + "step": 4528 + }, + { + "epoch": 1.044034582132565, + "grad_norm": 1.3216915800378581, + "learning_rate": 1.0102913739748869e-06, + "loss": 0.3918877840042114, + "step": 4529 + }, + { + "epoch": 1.044265129682997, + "grad_norm": 1.4336608956901862, + "learning_rate": 1.009910224694593e-06, + "loss": 0.48184382915496826, + "step": 4530 + }, + { + "epoch": 1.0444956772334293, + "grad_norm": 1.2096752758028502, + "learning_rate": 1.0095290739744465e-06, + "loss": 0.47491276264190674, + "step": 4531 + }, + { + "epoch": 1.0447262247838616, + "grad_norm": 1.6404923277184014, + "learning_rate": 1.0091479218698248e-06, + "loss": 0.43935298919677734, + "step": 4532 + }, + { + "epoch": 1.0449567723342938, + "grad_norm": 1.5886667617497374, + "learning_rate": 1.0087667684361056e-06, + "loss": 0.4855877757072449, + "step": 4533 + }, + { + "epoch": 1.0451873198847261, + "grad_norm": 1.459325025676877, + "learning_rate": 1.0083856137286658e-06, + "loss": 0.5075816512107849, + "step": 4534 + }, + { + "epoch": 1.0454178674351584, + "grad_norm": 1.428685173416776, + "learning_rate": 1.0080044578028834e-06, + "loss": 0.44345924258232117, + "step": 4535 + }, + { + "epoch": 1.0456484149855907, + "grad_norm": 1.2632777016131902, + "learning_rate": 1.0076233007141368e-06, + "loss": 0.4527851641178131, + "step": 4536 + }, + { + "epoch": 1.045878962536023, + "grad_norm": 1.5014095581737672, + "learning_rate": 1.007242142517804e-06, + "loss": 0.4629090428352356, + "step": 4537 + }, + { + "epoch": 1.0461095100864553, + "grad_norm": 1.6090876537997274, + "learning_rate": 1.0068609832692627e-06, + "loss": 0.4479471445083618, + "step": 4538 + }, + { + "epoch": 1.0463400576368875, + "grad_norm": 1.453798875944705, + "learning_rate": 1.0064798230238925e-06, + "loss": 0.5213382244110107, + "step": 4539 + }, + { + "epoch": 1.0465706051873198, + "grad_norm": 1.7004188694147924, + "learning_rate": 1.006098661837071e-06, + "loss": 0.4633294939994812, + "step": 4540 + }, + { + "epoch": 1.046801152737752, + "grad_norm": 1.2759434531953768, + "learning_rate": 1.0057174997641777e-06, + "loss": 0.44942396879196167, + "step": 4541 + }, + { + "epoch": 1.0470317002881844, + "grad_norm": 1.2842723268727798, + "learning_rate": 1.005336336860591e-06, + "loss": 0.4552151560783386, + "step": 4542 + }, + { + "epoch": 1.0472622478386167, + "grad_norm": 1.483709026132755, + "learning_rate": 1.00495517318169e-06, + "loss": 0.4890771806240082, + "step": 4543 + }, + { + "epoch": 1.047492795389049, + "grad_norm": 1.303122700347689, + "learning_rate": 1.004574008782854e-06, + "loss": 0.47421228885650635, + "step": 4544 + }, + { + "epoch": 1.0477233429394812, + "grad_norm": 1.6851868929876013, + "learning_rate": 1.004192843719462e-06, + "loss": 0.6262589693069458, + "step": 4545 + }, + { + "epoch": 1.0479538904899135, + "grad_norm": 1.4442935171267732, + "learning_rate": 1.0038116780468935e-06, + "loss": 0.4006143808364868, + "step": 4546 + }, + { + "epoch": 1.0481844380403458, + "grad_norm": 1.4133817727166142, + "learning_rate": 1.003430511820528e-06, + "loss": 0.44862663745880127, + "step": 4547 + }, + { + "epoch": 1.048414985590778, + "grad_norm": 1.5403193933763355, + "learning_rate": 1.0030493450957445e-06, + "loss": 0.5606727600097656, + "step": 4548 + }, + { + "epoch": 1.0486455331412103, + "grad_norm": 1.3748512451721946, + "learning_rate": 1.0026681779279233e-06, + "loss": 0.47314101457595825, + "step": 4549 + }, + { + "epoch": 1.0488760806916426, + "grad_norm": 1.6094501862176287, + "learning_rate": 1.002287010372443e-06, + "loss": 0.44440731406211853, + "step": 4550 + }, + { + "epoch": 1.049106628242075, + "grad_norm": 1.35134755450168, + "learning_rate": 1.0019058424846842e-06, + "loss": 0.4923070967197418, + "step": 4551 + }, + { + "epoch": 1.0493371757925072, + "grad_norm": 1.5637255748005057, + "learning_rate": 1.0015246743200266e-06, + "loss": 0.4886152148246765, + "step": 4552 + }, + { + "epoch": 1.0495677233429395, + "grad_norm": 1.6856486366703245, + "learning_rate": 1.0011435059338494e-06, + "loss": 0.5305925607681274, + "step": 4553 + }, + { + "epoch": 1.0497982708933717, + "grad_norm": 1.579540935604605, + "learning_rate": 1.0007623373815323e-06, + "loss": 0.42822471261024475, + "step": 4554 + }, + { + "epoch": 1.050028818443804, + "grad_norm": 1.5491845784693112, + "learning_rate": 1.0003811687184562e-06, + "loss": 0.5669623017311096, + "step": 4555 + }, + { + "epoch": 1.0502593659942363, + "grad_norm": 1.5132164274116935, + "learning_rate": 1e-06, + "loss": 0.45508894324302673, + "step": 4556 + }, + { + "epoch": 1.0504899135446686, + "grad_norm": 1.3829998425737389, + "learning_rate": 9.996188312815435e-07, + "loss": 0.4438894987106323, + "step": 4557 + }, + { + "epoch": 1.0507204610951009, + "grad_norm": 1.7413175640700402, + "learning_rate": 9.992376626184676e-07, + "loss": 0.5677859783172607, + "step": 4558 + }, + { + "epoch": 1.0509510086455331, + "grad_norm": 1.8839030912732315, + "learning_rate": 9.988564940661508e-07, + "loss": 0.5217305421829224, + "step": 4559 + }, + { + "epoch": 1.0511815561959654, + "grad_norm": 1.2410955600288431, + "learning_rate": 9.984753256799737e-07, + "loss": 0.3913061320781708, + "step": 4560 + }, + { + "epoch": 1.0514121037463977, + "grad_norm": 1.5511360318143517, + "learning_rate": 9.980941575153157e-07, + "loss": 0.48233336210250854, + "step": 4561 + }, + { + "epoch": 1.05164265129683, + "grad_norm": 1.4454747638053118, + "learning_rate": 9.977129896275568e-07, + "loss": 0.45223701000213623, + "step": 4562 + }, + { + "epoch": 1.0518731988472623, + "grad_norm": 1.6341160882250192, + "learning_rate": 9.97331822072077e-07, + "loss": 0.5448843836784363, + "step": 4563 + }, + { + "epoch": 1.0521037463976946, + "grad_norm": 1.5670186133404156, + "learning_rate": 9.969506549042556e-07, + "loss": 0.4266091585159302, + "step": 4564 + }, + { + "epoch": 1.0523342939481268, + "grad_norm": 1.5920644599913252, + "learning_rate": 9.965694881794719e-07, + "loss": 0.4513993263244629, + "step": 4565 + }, + { + "epoch": 1.0525648414985591, + "grad_norm": 1.6763509325376744, + "learning_rate": 9.961883219531064e-07, + "loss": 0.44799959659576416, + "step": 4566 + }, + { + "epoch": 1.0527953890489914, + "grad_norm": 2.153193445948826, + "learning_rate": 9.95807156280538e-07, + "loss": 0.5780029296875, + "step": 4567 + }, + { + "epoch": 1.0530259365994237, + "grad_norm": 1.5985216986617043, + "learning_rate": 9.954259912171462e-07, + "loss": 0.4681410789489746, + "step": 4568 + }, + { + "epoch": 1.053256484149856, + "grad_norm": 1.5646561466762618, + "learning_rate": 9.9504482681831e-07, + "loss": 0.49490541219711304, + "step": 4569 + }, + { + "epoch": 1.0534870317002882, + "grad_norm": 1.3876036304903236, + "learning_rate": 9.946636631394091e-07, + "loss": 0.43849432468414307, + "step": 4570 + }, + { + "epoch": 1.0537175792507205, + "grad_norm": 1.2547290934910935, + "learning_rate": 9.942825002358227e-07, + "loss": 0.3409537672996521, + "step": 4571 + }, + { + "epoch": 1.0539481268011528, + "grad_norm": 1.5892871152279466, + "learning_rate": 9.939013381629288e-07, + "loss": 0.4381803274154663, + "step": 4572 + }, + { + "epoch": 1.054178674351585, + "grad_norm": 1.3553936906788953, + "learning_rate": 9.935201769761077e-07, + "loss": 0.4395095109939575, + "step": 4573 + }, + { + "epoch": 1.0544092219020174, + "grad_norm": 1.3843041469920634, + "learning_rate": 9.931390167307374e-07, + "loss": 0.4942593574523926, + "step": 4574 + }, + { + "epoch": 1.0546397694524496, + "grad_norm": 1.6627568420794305, + "learning_rate": 9.927578574821961e-07, + "loss": 0.4674314856529236, + "step": 4575 + }, + { + "epoch": 1.054870317002882, + "grad_norm": 1.5977132892055355, + "learning_rate": 9.923766992858633e-07, + "loss": 0.4729968011379242, + "step": 4576 + }, + { + "epoch": 1.0551008645533142, + "grad_norm": 1.7001005285115778, + "learning_rate": 9.919955421971168e-07, + "loss": 0.5141623616218567, + "step": 4577 + }, + { + "epoch": 1.0553314121037465, + "grad_norm": 1.503844678970446, + "learning_rate": 9.916143862713341e-07, + "loss": 0.44645851850509644, + "step": 4578 + }, + { + "epoch": 1.0555619596541788, + "grad_norm": 1.630335682025123, + "learning_rate": 9.912332315638947e-07, + "loss": 0.5603121519088745, + "step": 4579 + }, + { + "epoch": 1.055792507204611, + "grad_norm": 1.3908420909888959, + "learning_rate": 9.908520781301751e-07, + "loss": 0.4082290232181549, + "step": 4580 + }, + { + "epoch": 1.0560230547550433, + "grad_norm": 1.369787996525026, + "learning_rate": 9.904709260255532e-07, + "loss": 0.48363831639289856, + "step": 4581 + }, + { + "epoch": 1.0562536023054756, + "grad_norm": 1.4093134407140262, + "learning_rate": 9.90089775305407e-07, + "loss": 0.5562942028045654, + "step": 4582 + }, + { + "epoch": 1.0564841498559079, + "grad_norm": 1.490643675328337, + "learning_rate": 9.897086260251132e-07, + "loss": 0.4740902781486511, + "step": 4583 + }, + { + "epoch": 1.0567146974063402, + "grad_norm": 1.4465043958177053, + "learning_rate": 9.893274782400493e-07, + "loss": 0.46482330560684204, + "step": 4584 + }, + { + "epoch": 1.0569452449567724, + "grad_norm": 1.4739791043264163, + "learning_rate": 9.889463320055912e-07, + "loss": 0.5256012678146362, + "step": 4585 + }, + { + "epoch": 1.0571757925072045, + "grad_norm": 1.4346601686074334, + "learning_rate": 9.885651873771162e-07, + "loss": 0.46507787704467773, + "step": 4586 + }, + { + "epoch": 1.0574063400576368, + "grad_norm": 1.3895966625957024, + "learning_rate": 9.88184044410001e-07, + "loss": 0.47399261593818665, + "step": 4587 + }, + { + "epoch": 1.057636887608069, + "grad_norm": 1.3868992540366305, + "learning_rate": 9.878029031596208e-07, + "loss": 0.4795050024986267, + "step": 4588 + }, + { + "epoch": 1.0578674351585013, + "grad_norm": 1.3090753005692493, + "learning_rate": 9.874217636813517e-07, + "loss": 0.4199279844760895, + "step": 4589 + }, + { + "epoch": 1.0580979827089336, + "grad_norm": 1.663501386491142, + "learning_rate": 9.8704062603057e-07, + "loss": 0.41600513458251953, + "step": 4590 + }, + { + "epoch": 1.058328530259366, + "grad_norm": 1.3202559065579687, + "learning_rate": 9.866594902626501e-07, + "loss": 0.45088812708854675, + "step": 4591 + }, + { + "epoch": 1.0585590778097982, + "grad_norm": 1.5365099799322879, + "learning_rate": 9.86278356432968e-07, + "loss": 0.47499629855155945, + "step": 4592 + }, + { + "epoch": 1.0587896253602305, + "grad_norm": 1.6280612027171966, + "learning_rate": 9.858972245968976e-07, + "loss": 0.48040494322776794, + "step": 4593 + }, + { + "epoch": 1.0590201729106627, + "grad_norm": 1.4282716545191367, + "learning_rate": 9.855160948098135e-07, + "loss": 0.5223349928855896, + "step": 4594 + }, + { + "epoch": 1.059250720461095, + "grad_norm": 1.5420066669414958, + "learning_rate": 9.851349671270907e-07, + "loss": 0.46820712089538574, + "step": 4595 + }, + { + "epoch": 1.0594812680115273, + "grad_norm": 1.5344743511950931, + "learning_rate": 9.847538416041025e-07, + "loss": 0.4419459402561188, + "step": 4596 + }, + { + "epoch": 1.0597118155619596, + "grad_norm": 1.7026231172894162, + "learning_rate": 9.84372718296222e-07, + "loss": 0.4973066449165344, + "step": 4597 + }, + { + "epoch": 1.0599423631123919, + "grad_norm": 1.6533749765494075, + "learning_rate": 9.839915972588233e-07, + "loss": 0.5010764002799988, + "step": 4598 + }, + { + "epoch": 1.0601729106628242, + "grad_norm": 1.3352189477102197, + "learning_rate": 9.83610478547279e-07, + "loss": 0.4472631514072418, + "step": 4599 + }, + { + "epoch": 1.0604034582132564, + "grad_norm": 1.4633853965912471, + "learning_rate": 9.83229362216962e-07, + "loss": 0.5191864371299744, + "step": 4600 + }, + { + "epoch": 1.0606340057636887, + "grad_norm": 1.442422737166096, + "learning_rate": 9.828482483232435e-07, + "loss": 0.4557565450668335, + "step": 4601 + }, + { + "epoch": 1.060864553314121, + "grad_norm": 1.4201969439884339, + "learning_rate": 9.824671369214964e-07, + "loss": 0.5049344301223755, + "step": 4602 + }, + { + "epoch": 1.0610951008645533, + "grad_norm": 1.6670814032897683, + "learning_rate": 9.820860280670922e-07, + "loss": 0.47939494252204895, + "step": 4603 + }, + { + "epoch": 1.0613256484149856, + "grad_norm": 1.658621194351885, + "learning_rate": 9.817049218154012e-07, + "loss": 0.429465115070343, + "step": 4604 + }, + { + "epoch": 1.0615561959654178, + "grad_norm": 1.81916330142038, + "learning_rate": 9.813238182217954e-07, + "loss": 0.5290513634681702, + "step": 4605 + }, + { + "epoch": 1.0617867435158501, + "grad_norm": 1.644457909311076, + "learning_rate": 9.80942717341644e-07, + "loss": 0.4728652536869049, + "step": 4606 + }, + { + "epoch": 1.0620172910662824, + "grad_norm": 1.5999219561315778, + "learning_rate": 9.805616192303176e-07, + "loss": 0.498574435710907, + "step": 4607 + }, + { + "epoch": 1.0622478386167147, + "grad_norm": 1.356976867296984, + "learning_rate": 9.80180523943186e-07, + "loss": 0.4974418878555298, + "step": 4608 + }, + { + "epoch": 1.062478386167147, + "grad_norm": 1.5988775450204102, + "learning_rate": 9.797994315356178e-07, + "loss": 0.5187455415725708, + "step": 4609 + }, + { + "epoch": 1.0627089337175792, + "grad_norm": 1.4676791173009547, + "learning_rate": 9.79418342062982e-07, + "loss": 0.4465523958206177, + "step": 4610 + }, + { + "epoch": 1.0629394812680115, + "grad_norm": 1.4881247830158704, + "learning_rate": 9.790372555806472e-07, + "loss": 0.38395804166793823, + "step": 4611 + }, + { + "epoch": 1.0631700288184438, + "grad_norm": 1.2625355877596527, + "learning_rate": 9.786561721439813e-07, + "loss": 0.4082638621330261, + "step": 4612 + }, + { + "epoch": 1.063400576368876, + "grad_norm": 1.3885240890249793, + "learning_rate": 9.782750918083515e-07, + "loss": 0.4943958520889282, + "step": 4613 + }, + { + "epoch": 1.0636311239193084, + "grad_norm": 1.3863404713614964, + "learning_rate": 9.778940146291243e-07, + "loss": 0.48641303181648254, + "step": 4614 + }, + { + "epoch": 1.0638616714697406, + "grad_norm": 1.5048568512606781, + "learning_rate": 9.77512940661667e-07, + "loss": 0.5512930154800415, + "step": 4615 + }, + { + "epoch": 1.064092219020173, + "grad_norm": 1.7370636910994324, + "learning_rate": 9.77131869961346e-07, + "loss": 0.5123332142829895, + "step": 4616 + }, + { + "epoch": 1.0643227665706052, + "grad_norm": 1.4868846938288016, + "learning_rate": 9.76750802583526e-07, + "loss": 0.4741990566253662, + "step": 4617 + }, + { + "epoch": 1.0645533141210375, + "grad_norm": 1.3478064232580702, + "learning_rate": 9.763697385835725e-07, + "loss": 0.46791690587997437, + "step": 4618 + }, + { + "epoch": 1.0647838616714698, + "grad_norm": 1.3689702474752652, + "learning_rate": 9.759886780168505e-07, + "loss": 0.4555216133594513, + "step": 4619 + }, + { + "epoch": 1.065014409221902, + "grad_norm": 1.6358498544131885, + "learning_rate": 9.756076209387233e-07, + "loss": 0.5309184193611145, + "step": 4620 + }, + { + "epoch": 1.0652449567723343, + "grad_norm": 1.3971481165355712, + "learning_rate": 9.752265674045553e-07, + "loss": 0.47507262229919434, + "step": 4621 + }, + { + "epoch": 1.0654755043227666, + "grad_norm": 1.5596009652461436, + "learning_rate": 9.748455174697094e-07, + "loss": 0.5707837343215942, + "step": 4622 + }, + { + "epoch": 1.065706051873199, + "grad_norm": 1.9435380622940854, + "learning_rate": 9.744644711895479e-07, + "loss": 0.45958513021469116, + "step": 4623 + }, + { + "epoch": 1.0659365994236312, + "grad_norm": 1.5800769453439238, + "learning_rate": 9.740834286194334e-07, + "loss": 0.4757344126701355, + "step": 4624 + }, + { + "epoch": 1.0661671469740635, + "grad_norm": 1.6339645956094502, + "learning_rate": 9.73702389814727e-07, + "loss": 0.4484025835990906, + "step": 4625 + }, + { + "epoch": 1.0663976945244957, + "grad_norm": 1.6753880967774484, + "learning_rate": 9.733213548307896e-07, + "loss": 0.4026743173599243, + "step": 4626 + }, + { + "epoch": 1.066628242074928, + "grad_norm": 1.8514450132857911, + "learning_rate": 9.72940323722982e-07, + "loss": 0.5340179204940796, + "step": 4627 + }, + { + "epoch": 1.0668587896253603, + "grad_norm": 1.6161086513447296, + "learning_rate": 9.72559296546664e-07, + "loss": 0.4515274167060852, + "step": 4628 + }, + { + "epoch": 1.0670893371757926, + "grad_norm": 1.866066478952204, + "learning_rate": 9.72178273357195e-07, + "loss": 0.49658486247062683, + "step": 4629 + }, + { + "epoch": 1.0673198847262249, + "grad_norm": 1.6194964007936918, + "learning_rate": 9.717972542099326e-07, + "loss": 0.4344639182090759, + "step": 4630 + }, + { + "epoch": 1.0675504322766571, + "grad_norm": 1.6063008031788568, + "learning_rate": 9.714162391602363e-07, + "loss": 0.5005955100059509, + "step": 4631 + }, + { + "epoch": 1.0677809798270894, + "grad_norm": 1.7066518802868487, + "learning_rate": 9.710352282634632e-07, + "loss": 0.6442652940750122, + "step": 4632 + }, + { + "epoch": 1.0680115273775217, + "grad_norm": 1.3344273253931838, + "learning_rate": 9.706542215749697e-07, + "loss": 0.4573819637298584, + "step": 4633 + }, + { + "epoch": 1.068242074927954, + "grad_norm": 1.3322530236364596, + "learning_rate": 9.702732191501127e-07, + "loss": 0.4596165716648102, + "step": 4634 + }, + { + "epoch": 1.0684726224783863, + "grad_norm": 1.4873477616834596, + "learning_rate": 9.69892221044248e-07, + "loss": 0.4277721047401428, + "step": 4635 + }, + { + "epoch": 1.0687031700288185, + "grad_norm": 1.3152713454926654, + "learning_rate": 9.6951122731273e-07, + "loss": 0.39352816343307495, + "step": 4636 + }, + { + "epoch": 1.0689337175792508, + "grad_norm": 1.4248950438905919, + "learning_rate": 9.691302380109138e-07, + "loss": 0.4209185242652893, + "step": 4637 + }, + { + "epoch": 1.069164265129683, + "grad_norm": 1.3774255316125943, + "learning_rate": 9.687492531941528e-07, + "loss": 0.47638988494873047, + "step": 4638 + }, + { + "epoch": 1.0693948126801152, + "grad_norm": 1.7927969058207647, + "learning_rate": 9.683682729178e-07, + "loss": 0.5114691257476807, + "step": 4639 + }, + { + "epoch": 1.0696253602305474, + "grad_norm": 2.207610541748439, + "learning_rate": 9.679872972372085e-07, + "loss": 0.48080897331237793, + "step": 4640 + }, + { + "epoch": 1.0698559077809797, + "grad_norm": 1.611428399695435, + "learning_rate": 9.676063262077293e-07, + "loss": 0.5234401822090149, + "step": 4641 + }, + { + "epoch": 1.070086455331412, + "grad_norm": 1.3098949045011148, + "learning_rate": 9.672253598847139e-07, + "loss": 0.3622468113899231, + "step": 4642 + }, + { + "epoch": 1.0703170028818443, + "grad_norm": 1.5524376763252634, + "learning_rate": 9.668443983235131e-07, + "loss": 0.45639339089393616, + "step": 4643 + }, + { + "epoch": 1.0705475504322766, + "grad_norm": 1.2743609898532986, + "learning_rate": 9.66463441579476e-07, + "loss": 0.48489123582839966, + "step": 4644 + }, + { + "epoch": 1.0707780979827088, + "grad_norm": 1.5672043285567319, + "learning_rate": 9.660824897079525e-07, + "loss": 0.5093181133270264, + "step": 4645 + }, + { + "epoch": 1.0710086455331411, + "grad_norm": 1.4956950420324642, + "learning_rate": 9.657015427642897e-07, + "loss": 0.4775876998901367, + "step": 4646 + }, + { + "epoch": 1.0712391930835734, + "grad_norm": 1.7652073053177477, + "learning_rate": 9.653206008038362e-07, + "loss": 0.47746625542640686, + "step": 4647 + }, + { + "epoch": 1.0714697406340057, + "grad_norm": 1.2175686688635734, + "learning_rate": 9.64939663881939e-07, + "loss": 0.48147135972976685, + "step": 4648 + }, + { + "epoch": 1.071700288184438, + "grad_norm": 1.524354935204934, + "learning_rate": 9.645587320539434e-07, + "loss": 0.3762315511703491, + "step": 4649 + }, + { + "epoch": 1.0719308357348702, + "grad_norm": 1.8127361533613366, + "learning_rate": 9.641778053751957e-07, + "loss": 0.601677417755127, + "step": 4650 + }, + { + "epoch": 1.0721613832853025, + "grad_norm": 1.390968763863657, + "learning_rate": 9.6379688390104e-07, + "loss": 0.559817910194397, + "step": 4651 + }, + { + "epoch": 1.0723919308357348, + "grad_norm": 1.4914114417009505, + "learning_rate": 9.634159676868202e-07, + "loss": 0.4357626736164093, + "step": 4652 + }, + { + "epoch": 1.072622478386167, + "grad_norm": 1.5444496888118033, + "learning_rate": 9.630350567878804e-07, + "loss": 0.4259532690048218, + "step": 4653 + }, + { + "epoch": 1.0728530259365994, + "grad_norm": 1.3855450396071676, + "learning_rate": 9.62654151259562e-07, + "loss": 0.43510159850120544, + "step": 4654 + }, + { + "epoch": 1.0730835734870317, + "grad_norm": 1.3142166864023803, + "learning_rate": 9.622732511572065e-07, + "loss": 0.47992539405822754, + "step": 4655 + }, + { + "epoch": 1.073314121037464, + "grad_norm": 1.563639153951261, + "learning_rate": 9.618923565361558e-07, + "loss": 0.5122553110122681, + "step": 4656 + }, + { + "epoch": 1.0735446685878962, + "grad_norm": 1.3477880780884035, + "learning_rate": 9.615114674517492e-07, + "loss": 0.4490904211997986, + "step": 4657 + }, + { + "epoch": 1.0737752161383285, + "grad_norm": 1.6020560200523055, + "learning_rate": 9.611305839593259e-07, + "loss": 0.6317769885063171, + "step": 4658 + }, + { + "epoch": 1.0740057636887608, + "grad_norm": 1.4896746014843723, + "learning_rate": 9.60749706114224e-07, + "loss": 0.6265285611152649, + "step": 4659 + }, + { + "epoch": 1.074236311239193, + "grad_norm": 1.501646883657502, + "learning_rate": 9.603688339717817e-07, + "loss": 0.4698525369167328, + "step": 4660 + }, + { + "epoch": 1.0744668587896253, + "grad_norm": 1.5908829607086772, + "learning_rate": 9.599879675873358e-07, + "loss": 0.49619418382644653, + "step": 4661 + }, + { + "epoch": 1.0746974063400576, + "grad_norm": 1.323354232277254, + "learning_rate": 9.596071070162217e-07, + "loss": 0.46166038513183594, + "step": 4662 + }, + { + "epoch": 1.07492795389049, + "grad_norm": 1.5115372830059979, + "learning_rate": 9.592262523137747e-07, + "loss": 0.44326937198638916, + "step": 4663 + }, + { + "epoch": 1.0751585014409222, + "grad_norm": 1.4132443514252715, + "learning_rate": 9.588454035353296e-07, + "loss": 0.49652424454689026, + "step": 4664 + }, + { + "epoch": 1.0753890489913545, + "grad_norm": 1.4411262259507003, + "learning_rate": 9.584645607362186e-07, + "loss": 0.42872053384780884, + "step": 4665 + }, + { + "epoch": 1.0756195965417867, + "grad_norm": 1.3788553083650343, + "learning_rate": 9.580837239717754e-07, + "loss": 0.4546404480934143, + "step": 4666 + }, + { + "epoch": 1.075850144092219, + "grad_norm": 1.6310927881369055, + "learning_rate": 9.577028932973308e-07, + "loss": 0.4827216863632202, + "step": 4667 + }, + { + "epoch": 1.0760806916426513, + "grad_norm": 1.4044809082874001, + "learning_rate": 9.573220687682154e-07, + "loss": 0.48063087463378906, + "step": 4668 + }, + { + "epoch": 1.0763112391930836, + "grad_norm": 2.3285506480260283, + "learning_rate": 9.5694125043976e-07, + "loss": 0.4597262740135193, + "step": 4669 + }, + { + "epoch": 1.0765417867435159, + "grad_norm": 1.376042518158115, + "learning_rate": 9.565604383672927e-07, + "loss": 0.4230908751487732, + "step": 4670 + }, + { + "epoch": 1.0767723342939481, + "grad_norm": 1.6404181287671118, + "learning_rate": 9.561796326061417e-07, + "loss": 0.41329729557037354, + "step": 4671 + }, + { + "epoch": 1.0770028818443804, + "grad_norm": 1.336192639897515, + "learning_rate": 9.55798833211635e-07, + "loss": 0.36296525597572327, + "step": 4672 + }, + { + "epoch": 1.0772334293948127, + "grad_norm": 1.4646789281240495, + "learning_rate": 9.55418040239097e-07, + "loss": 0.4914727807044983, + "step": 4673 + }, + { + "epoch": 1.077463976945245, + "grad_norm": 1.390877382033457, + "learning_rate": 9.550372537438547e-07, + "loss": 0.4246331453323364, + "step": 4674 + }, + { + "epoch": 1.0776945244956773, + "grad_norm": 1.4081852205814585, + "learning_rate": 9.546564737812313e-07, + "loss": 0.416849821805954, + "step": 4675 + }, + { + "epoch": 1.0779250720461095, + "grad_norm": 1.4292647003721168, + "learning_rate": 9.542757004065505e-07, + "loss": 0.4291315972805023, + "step": 4676 + }, + { + "epoch": 1.0781556195965418, + "grad_norm": 1.6415771810836237, + "learning_rate": 9.538949336751353e-07, + "loss": 0.403107225894928, + "step": 4677 + }, + { + "epoch": 1.078386167146974, + "grad_norm": 1.4119962131192818, + "learning_rate": 9.535141736423062e-07, + "loss": 0.4733341932296753, + "step": 4678 + }, + { + "epoch": 1.0786167146974064, + "grad_norm": 1.7859752811554186, + "learning_rate": 9.531334203633838e-07, + "loss": 0.588297963142395, + "step": 4679 + }, + { + "epoch": 1.0788472622478387, + "grad_norm": 1.8273419442887104, + "learning_rate": 9.527526738936885e-07, + "loss": 0.544661283493042, + "step": 4680 + }, + { + "epoch": 1.079077809798271, + "grad_norm": 1.4494540560127676, + "learning_rate": 9.523719342885379e-07, + "loss": 0.5208151340484619, + "step": 4681 + }, + { + "epoch": 1.0793083573487032, + "grad_norm": 1.8810490080535522, + "learning_rate": 9.5199120160325e-07, + "loss": 0.5054324865341187, + "step": 4682 + }, + { + "epoch": 1.0795389048991355, + "grad_norm": 1.5509547417738327, + "learning_rate": 9.516104758931406e-07, + "loss": 0.5247195959091187, + "step": 4683 + }, + { + "epoch": 1.0797694524495678, + "grad_norm": 1.7616045237495106, + "learning_rate": 9.512297572135259e-07, + "loss": 0.4715186655521393, + "step": 4684 + }, + { + "epoch": 1.08, + "grad_norm": 1.376815174594021, + "learning_rate": 9.508490456197203e-07, + "loss": 0.4610804319381714, + "step": 4685 + }, + { + "epoch": 1.0802305475504324, + "grad_norm": 1.501280260470384, + "learning_rate": 9.504683411670368e-07, + "loss": 0.39732515811920166, + "step": 4686 + }, + { + "epoch": 1.0804610951008646, + "grad_norm": 1.3857848874896455, + "learning_rate": 9.500876439107881e-07, + "loss": 0.508002758026123, + "step": 4687 + }, + { + "epoch": 1.080691642651297, + "grad_norm": 1.3410811454931406, + "learning_rate": 9.497069539062859e-07, + "loss": 0.46706700325012207, + "step": 4688 + }, + { + "epoch": 1.0809221902017292, + "grad_norm": 1.6751530232611822, + "learning_rate": 9.493262712088395e-07, + "loss": 0.5082959532737732, + "step": 4689 + }, + { + "epoch": 1.0811527377521615, + "grad_norm": 1.4221868188021594, + "learning_rate": 9.489455958737593e-07, + "loss": 0.45620518922805786, + "step": 4690 + }, + { + "epoch": 1.0813832853025938, + "grad_norm": 1.8105569017956895, + "learning_rate": 9.485649279563527e-07, + "loss": 0.5285652279853821, + "step": 4691 + }, + { + "epoch": 1.081613832853026, + "grad_norm": 1.1979265453913894, + "learning_rate": 9.481842675119267e-07, + "loss": 0.3795422315597534, + "step": 4692 + }, + { + "epoch": 1.0818443804034583, + "grad_norm": 1.662830387193791, + "learning_rate": 9.478036145957881e-07, + "loss": 0.42700281739234924, + "step": 4693 + }, + { + "epoch": 1.0820749279538906, + "grad_norm": 1.6070968941325912, + "learning_rate": 9.47422969263241e-07, + "loss": 0.43620502948760986, + "step": 4694 + }, + { + "epoch": 1.0823054755043229, + "grad_norm": 1.3925806824632918, + "learning_rate": 9.470423315695899e-07, + "loss": 0.501217782497406, + "step": 4695 + }, + { + "epoch": 1.0825360230547552, + "grad_norm": 1.339834807838682, + "learning_rate": 9.466617015701366e-07, + "loss": 0.4491414427757263, + "step": 4696 + }, + { + "epoch": 1.0827665706051872, + "grad_norm": 1.4479543684791027, + "learning_rate": 9.462810793201833e-07, + "loss": 0.4844500720500946, + "step": 4697 + }, + { + "epoch": 1.0829971181556195, + "grad_norm": 1.9044814000505415, + "learning_rate": 9.459004648750307e-07, + "loss": 0.47131961584091187, + "step": 4698 + }, + { + "epoch": 1.0832276657060518, + "grad_norm": 1.603910730442911, + "learning_rate": 9.455198582899773e-07, + "loss": 0.5120211839675903, + "step": 4699 + }, + { + "epoch": 1.083458213256484, + "grad_norm": 1.3551244603653407, + "learning_rate": 9.451392596203219e-07, + "loss": 0.46080902218818665, + "step": 4700 + }, + { + "epoch": 1.0836887608069163, + "grad_norm": 1.6646455172457901, + "learning_rate": 9.447586689213617e-07, + "loss": 0.46871304512023926, + "step": 4701 + }, + { + "epoch": 1.0839193083573486, + "grad_norm": 1.4595951361347437, + "learning_rate": 9.443780862483918e-07, + "loss": 0.5012568235397339, + "step": 4702 + }, + { + "epoch": 1.084149855907781, + "grad_norm": 1.4508081611111274, + "learning_rate": 9.439975116567077e-07, + "loss": 0.4377376437187195, + "step": 4703 + }, + { + "epoch": 1.0843804034582132, + "grad_norm": 1.3718342044857916, + "learning_rate": 9.436169452016024e-07, + "loss": 0.44364726543426514, + "step": 4704 + }, + { + "epoch": 1.0846109510086455, + "grad_norm": 1.619060845934508, + "learning_rate": 9.432363869383682e-07, + "loss": 0.4641938805580139, + "step": 4705 + }, + { + "epoch": 1.0848414985590777, + "grad_norm": 1.7336364607563637, + "learning_rate": 9.428558369222969e-07, + "loss": 0.5737602710723877, + "step": 4706 + }, + { + "epoch": 1.08507204610951, + "grad_norm": 1.7107358159172448, + "learning_rate": 9.424752952086777e-07, + "loss": 0.5098488330841064, + "step": 4707 + }, + { + "epoch": 1.0853025936599423, + "grad_norm": 1.2019465668810627, + "learning_rate": 9.420947618527997e-07, + "loss": 0.35696643590927124, + "step": 4708 + }, + { + "epoch": 1.0855331412103746, + "grad_norm": 1.4669534605111085, + "learning_rate": 9.417142369099507e-07, + "loss": 0.4182433485984802, + "step": 4709 + }, + { + "epoch": 1.0857636887608069, + "grad_norm": 1.4801438236679534, + "learning_rate": 9.413337204354166e-07, + "loss": 0.4930199980735779, + "step": 4710 + }, + { + "epoch": 1.0859942363112391, + "grad_norm": 1.4113635474707797, + "learning_rate": 9.409532124844828e-07, + "loss": 0.49246686697006226, + "step": 4711 + }, + { + "epoch": 1.0862247838616714, + "grad_norm": 1.7711418276471607, + "learning_rate": 9.405727131124326e-07, + "loss": 0.4412927031517029, + "step": 4712 + }, + { + "epoch": 1.0864553314121037, + "grad_norm": 1.4350333443942689, + "learning_rate": 9.401922223745491e-07, + "loss": 0.42520982027053833, + "step": 4713 + }, + { + "epoch": 1.086685878962536, + "grad_norm": 1.6745037730870926, + "learning_rate": 9.398117403261138e-07, + "loss": 0.5159086585044861, + "step": 4714 + }, + { + "epoch": 1.0869164265129683, + "grad_norm": 1.320729871652049, + "learning_rate": 9.39431267022406e-07, + "loss": 0.5348051190376282, + "step": 4715 + }, + { + "epoch": 1.0871469740634006, + "grad_norm": 1.4807752036978776, + "learning_rate": 9.390508025187054e-07, + "loss": 0.5200084447860718, + "step": 4716 + }, + { + "epoch": 1.0873775216138328, + "grad_norm": 1.7351487137422408, + "learning_rate": 9.386703468702892e-07, + "loss": 0.5356011390686035, + "step": 4717 + }, + { + "epoch": 1.0876080691642651, + "grad_norm": 1.4606974960715933, + "learning_rate": 9.382899001324334e-07, + "loss": 0.49807971715927124, + "step": 4718 + }, + { + "epoch": 1.0878386167146974, + "grad_norm": 1.4204133831852221, + "learning_rate": 9.379094623604133e-07, + "loss": 0.4936211407184601, + "step": 4719 + }, + { + "epoch": 1.0880691642651297, + "grad_norm": 1.472033735820111, + "learning_rate": 9.375290336095023e-07, + "loss": 0.43047142028808594, + "step": 4720 + }, + { + "epoch": 1.088299711815562, + "grad_norm": 1.6521573876351554, + "learning_rate": 9.371486139349727e-07, + "loss": 0.5489988327026367, + "step": 4721 + }, + { + "epoch": 1.0885302593659942, + "grad_norm": 1.4462296906896397, + "learning_rate": 9.36768203392096e-07, + "loss": 0.3681810796260834, + "step": 4722 + }, + { + "epoch": 1.0887608069164265, + "grad_norm": 1.6193248142724037, + "learning_rate": 9.363878020361415e-07, + "loss": 0.4829370677471161, + "step": 4723 + }, + { + "epoch": 1.0889913544668588, + "grad_norm": 1.4582135448230624, + "learning_rate": 9.360074099223772e-07, + "loss": 0.51734459400177, + "step": 4724 + }, + { + "epoch": 1.089221902017291, + "grad_norm": 1.6653806071052673, + "learning_rate": 9.35627027106071e-07, + "loss": 0.47395747900009155, + "step": 4725 + }, + { + "epoch": 1.0894524495677234, + "grad_norm": 1.682967664113776, + "learning_rate": 9.35246653642488e-07, + "loss": 0.474406361579895, + "step": 4726 + }, + { + "epoch": 1.0896829971181556, + "grad_norm": 1.7582029988340027, + "learning_rate": 9.348662895868928e-07, + "loss": 0.4683076739311218, + "step": 4727 + }, + { + "epoch": 1.089913544668588, + "grad_norm": 1.3542305902132215, + "learning_rate": 9.344859349945477e-07, + "loss": 0.3666248917579651, + "step": 4728 + }, + { + "epoch": 1.0901440922190202, + "grad_norm": 1.4958744858069852, + "learning_rate": 9.341055899207149e-07, + "loss": 0.42460423707962036, + "step": 4729 + }, + { + "epoch": 1.0903746397694525, + "grad_norm": 1.2929965233797245, + "learning_rate": 9.337252544206545e-07, + "loss": 0.47737449407577515, + "step": 4730 + }, + { + "epoch": 1.0906051873198848, + "grad_norm": 1.7759362318926004, + "learning_rate": 9.33344928549625e-07, + "loss": 0.48160216212272644, + "step": 4731 + }, + { + "epoch": 1.090835734870317, + "grad_norm": 1.8201639638551033, + "learning_rate": 9.32964612362884e-07, + "loss": 0.4271983504295349, + "step": 4732 + }, + { + "epoch": 1.0910662824207493, + "grad_norm": 1.5964518411792343, + "learning_rate": 9.325843059156878e-07, + "loss": 0.49104204773902893, + "step": 4733 + }, + { + "epoch": 1.0912968299711816, + "grad_norm": 1.8237737626424653, + "learning_rate": 9.322040092632901e-07, + "loss": 0.4309648275375366, + "step": 4734 + }, + { + "epoch": 1.0915273775216139, + "grad_norm": 1.6425169957458328, + "learning_rate": 9.318237224609452e-07, + "loss": 0.4608195424079895, + "step": 4735 + }, + { + "epoch": 1.0917579250720462, + "grad_norm": 1.4917232104667926, + "learning_rate": 9.314434455639039e-07, + "loss": 0.47512125968933105, + "step": 4736 + }, + { + "epoch": 1.0919884726224784, + "grad_norm": 1.3580334365102393, + "learning_rate": 9.310631786274166e-07, + "loss": 0.43002909421920776, + "step": 4737 + }, + { + "epoch": 1.0922190201729107, + "grad_norm": 1.4761686095814857, + "learning_rate": 9.306829217067326e-07, + "loss": 0.4949305057525635, + "step": 4738 + }, + { + "epoch": 1.092449567723343, + "grad_norm": 1.7661439414574969, + "learning_rate": 9.30302674857099e-07, + "loss": 0.5113263726234436, + "step": 4739 + }, + { + "epoch": 1.0926801152737753, + "grad_norm": 1.4223065996038133, + "learning_rate": 9.299224381337616e-07, + "loss": 0.4186338186264038, + "step": 4740 + }, + { + "epoch": 1.0929106628242076, + "grad_norm": 1.3736249275403172, + "learning_rate": 9.295422115919646e-07, + "loss": 0.4391184151172638, + "step": 4741 + }, + { + "epoch": 1.0931412103746398, + "grad_norm": 1.6001208967535951, + "learning_rate": 9.291619952869517e-07, + "loss": 0.5554238557815552, + "step": 4742 + }, + { + "epoch": 1.0933717579250721, + "grad_norm": 1.4372296033393044, + "learning_rate": 9.287817892739641e-07, + "loss": 0.5122871398925781, + "step": 4743 + }, + { + "epoch": 1.0936023054755044, + "grad_norm": 1.481435629659421, + "learning_rate": 9.284015936082413e-07, + "loss": 0.5083051919937134, + "step": 4744 + }, + { + "epoch": 1.0938328530259367, + "grad_norm": 1.3561535598473955, + "learning_rate": 9.28021408345022e-07, + "loss": 0.5710517168045044, + "step": 4745 + }, + { + "epoch": 1.094063400576369, + "grad_norm": 1.1918247091292848, + "learning_rate": 9.276412335395438e-07, + "loss": 0.35339492559432983, + "step": 4746 + }, + { + "epoch": 1.0942939481268013, + "grad_norm": 1.5524968954607061, + "learning_rate": 9.272610692470412e-07, + "loss": 0.44773539900779724, + "step": 4747 + }, + { + "epoch": 1.0945244956772335, + "grad_norm": 1.5477488686398166, + "learning_rate": 9.268809155227489e-07, + "loss": 0.47520720958709717, + "step": 4748 + }, + { + "epoch": 1.0947550432276656, + "grad_norm": 1.4714161216323214, + "learning_rate": 9.265007724218987e-07, + "loss": 0.44710588455200195, + "step": 4749 + }, + { + "epoch": 1.0949855907780979, + "grad_norm": 2.110410170202345, + "learning_rate": 9.261206399997216e-07, + "loss": 0.49495795369148254, + "step": 4750 + }, + { + "epoch": 1.0952161383285302, + "grad_norm": 1.5374348664848534, + "learning_rate": 9.257405183114472e-07, + "loss": 0.4427485167980194, + "step": 4751 + }, + { + "epoch": 1.0954466858789624, + "grad_norm": 1.6718709382696622, + "learning_rate": 9.253604074123029e-07, + "loss": 0.5643877983093262, + "step": 4752 + }, + { + "epoch": 1.0956772334293947, + "grad_norm": 1.2647633162037908, + "learning_rate": 9.249803073575147e-07, + "loss": 0.37769174575805664, + "step": 4753 + }, + { + "epoch": 1.095907780979827, + "grad_norm": 1.3828941981233933, + "learning_rate": 9.246002182023078e-07, + "loss": 0.4493235945701599, + "step": 4754 + }, + { + "epoch": 1.0961383285302593, + "grad_norm": 1.4529605693419783, + "learning_rate": 9.242201400019046e-07, + "loss": 0.38867485523223877, + "step": 4755 + }, + { + "epoch": 1.0963688760806916, + "grad_norm": 1.3977757435336773, + "learning_rate": 9.23840072811527e-07, + "loss": 0.3969729244709015, + "step": 4756 + }, + { + "epoch": 1.0965994236311238, + "grad_norm": 1.5812573695205239, + "learning_rate": 9.234600166863941e-07, + "loss": 0.5222212076187134, + "step": 4757 + }, + { + "epoch": 1.0968299711815561, + "grad_norm": 1.5818434035233087, + "learning_rate": 9.230799716817248e-07, + "loss": 0.42709431052207947, + "step": 4758 + }, + { + "epoch": 1.0970605187319884, + "grad_norm": 1.587224691957781, + "learning_rate": 9.226999378527356e-07, + "loss": 0.4916965365409851, + "step": 4759 + }, + { + "epoch": 1.0972910662824207, + "grad_norm": 1.3771120349564459, + "learning_rate": 9.223199152546409e-07, + "loss": 0.4756208062171936, + "step": 4760 + }, + { + "epoch": 1.097521613832853, + "grad_norm": 1.3764453858571746, + "learning_rate": 9.219399039426546e-07, + "loss": 0.45373475551605225, + "step": 4761 + }, + { + "epoch": 1.0977521613832852, + "grad_norm": 1.5172679641639661, + "learning_rate": 9.215599039719884e-07, + "loss": 0.45572659373283386, + "step": 4762 + }, + { + "epoch": 1.0979827089337175, + "grad_norm": 1.44738634077843, + "learning_rate": 9.211799153978519e-07, + "loss": 0.5436868071556091, + "step": 4763 + }, + { + "epoch": 1.0982132564841498, + "grad_norm": 1.6883255773599282, + "learning_rate": 9.20799938275454e-07, + "loss": 0.5677649974822998, + "step": 4764 + }, + { + "epoch": 1.098443804034582, + "grad_norm": 1.4487804074611155, + "learning_rate": 9.20419972660001e-07, + "loss": 0.44892898201942444, + "step": 4765 + }, + { + "epoch": 1.0986743515850144, + "grad_norm": 1.4841544259994688, + "learning_rate": 9.200400186066979e-07, + "loss": 0.4553558826446533, + "step": 4766 + }, + { + "epoch": 1.0989048991354466, + "grad_norm": 1.7722170802145676, + "learning_rate": 9.196600761707487e-07, + "loss": 0.4699048697948456, + "step": 4767 + }, + { + "epoch": 1.099135446685879, + "grad_norm": 1.4749195909576496, + "learning_rate": 9.192801454073546e-07, + "loss": 0.4807226359844208, + "step": 4768 + }, + { + "epoch": 1.0993659942363112, + "grad_norm": 1.5160658010929253, + "learning_rate": 9.189002263717153e-07, + "loss": 0.4812129735946655, + "step": 4769 + }, + { + "epoch": 1.0995965417867435, + "grad_norm": 1.622674615504792, + "learning_rate": 9.185203191190298e-07, + "loss": 0.5983732342720032, + "step": 4770 + }, + { + "epoch": 1.0998270893371758, + "grad_norm": 1.3486018335986438, + "learning_rate": 9.181404237044943e-07, + "loss": 0.490646094083786, + "step": 4771 + }, + { + "epoch": 1.100057636887608, + "grad_norm": 1.4440200781704027, + "learning_rate": 9.177605401833037e-07, + "loss": 0.38754355907440186, + "step": 4772 + }, + { + "epoch": 1.1002881844380403, + "grad_norm": 1.6409742404681038, + "learning_rate": 9.173806686106508e-07, + "loss": 0.4629727005958557, + "step": 4773 + }, + { + "epoch": 1.1005187319884726, + "grad_norm": 1.7575318830561308, + "learning_rate": 9.170008090417274e-07, + "loss": 0.6194955110549927, + "step": 4774 + }, + { + "epoch": 1.100749279538905, + "grad_norm": 1.5942308214770384, + "learning_rate": 9.166209615317233e-07, + "loss": 0.49309587478637695, + "step": 4775 + }, + { + "epoch": 1.1009798270893372, + "grad_norm": 1.5728528642226745, + "learning_rate": 9.162411261358256e-07, + "loss": 0.5451973080635071, + "step": 4776 + }, + { + "epoch": 1.1012103746397695, + "grad_norm": 1.5387601694748296, + "learning_rate": 9.158613029092213e-07, + "loss": 0.48952755331993103, + "step": 4777 + }, + { + "epoch": 1.1014409221902017, + "grad_norm": 1.5407814625276188, + "learning_rate": 9.154814919070945e-07, + "loss": 0.44005918502807617, + "step": 4778 + }, + { + "epoch": 1.101671469740634, + "grad_norm": 2.0586763099102945, + "learning_rate": 9.151016931846274e-07, + "loss": 0.590754508972168, + "step": 4779 + }, + { + "epoch": 1.1019020172910663, + "grad_norm": 1.703751089767445, + "learning_rate": 9.147219067970015e-07, + "loss": 0.4286467432975769, + "step": 4780 + }, + { + "epoch": 1.1021325648414986, + "grad_norm": 1.3155675663714783, + "learning_rate": 9.143421327993951e-07, + "loss": 0.5757718086242676, + "step": 4781 + }, + { + "epoch": 1.1023631123919309, + "grad_norm": 1.5555439515197538, + "learning_rate": 9.139623712469855e-07, + "loss": 0.4068324565887451, + "step": 4782 + }, + { + "epoch": 1.1025936599423631, + "grad_norm": 1.3476061460464042, + "learning_rate": 9.13582622194949e-07, + "loss": 0.47467997670173645, + "step": 4783 + }, + { + "epoch": 1.1028242074927954, + "grad_norm": 1.58616176312586, + "learning_rate": 9.13202885698458e-07, + "loss": 0.4873571991920471, + "step": 4784 + }, + { + "epoch": 1.1030547550432277, + "grad_norm": 1.6677849857086358, + "learning_rate": 9.12823161812685e-07, + "loss": 0.6014094352722168, + "step": 4785 + }, + { + "epoch": 1.10328530259366, + "grad_norm": 1.2865551378829043, + "learning_rate": 9.124434505927996e-07, + "loss": 0.3454878330230713, + "step": 4786 + }, + { + "epoch": 1.1035158501440923, + "grad_norm": 1.57020124413607, + "learning_rate": 9.120637520939698e-07, + "loss": 0.5097141265869141, + "step": 4787 + }, + { + "epoch": 1.1037463976945245, + "grad_norm": 1.289337143667391, + "learning_rate": 9.116840663713624e-07, + "loss": 0.48765894770622253, + "step": 4788 + }, + { + "epoch": 1.1039769452449568, + "grad_norm": 1.2994448528986111, + "learning_rate": 9.113043934801412e-07, + "loss": 0.4968012571334839, + "step": 4789 + }, + { + "epoch": 1.104207492795389, + "grad_norm": 1.3673390492212412, + "learning_rate": 9.109247334754688e-07, + "loss": 0.38764679431915283, + "step": 4790 + }, + { + "epoch": 1.1044380403458214, + "grad_norm": 1.489042273618506, + "learning_rate": 9.105450864125064e-07, + "loss": 0.4387054145336151, + "step": 4791 + }, + { + "epoch": 1.1046685878962537, + "grad_norm": 1.3415444855993421, + "learning_rate": 9.101654523464121e-07, + "loss": 0.4431966543197632, + "step": 4792 + }, + { + "epoch": 1.104899135446686, + "grad_norm": 1.5309861879191928, + "learning_rate": 9.097858313323434e-07, + "loss": 0.5146256685256958, + "step": 4793 + }, + { + "epoch": 1.1051296829971182, + "grad_norm": 1.3518960528695185, + "learning_rate": 9.094062234254543e-07, + "loss": 0.4577777683734894, + "step": 4794 + }, + { + "epoch": 1.1053602305475505, + "grad_norm": 1.7110129497463176, + "learning_rate": 9.09026628680899e-07, + "loss": 0.48272567987442017, + "step": 4795 + }, + { + "epoch": 1.1055907780979828, + "grad_norm": 1.785422212510911, + "learning_rate": 9.086470471538281e-07, + "loss": 0.4567776024341583, + "step": 4796 + }, + { + "epoch": 1.105821325648415, + "grad_norm": 2.043723899772794, + "learning_rate": 9.082674788993907e-07, + "loss": 0.5596228837966919, + "step": 4797 + }, + { + "epoch": 1.1060518731988473, + "grad_norm": 1.6819939360850304, + "learning_rate": 9.078879239727344e-07, + "loss": 0.5048364400863647, + "step": 4798 + }, + { + "epoch": 1.1062824207492796, + "grad_norm": 1.700251984933967, + "learning_rate": 9.075083824290048e-07, + "loss": 0.5421375036239624, + "step": 4799 + }, + { + "epoch": 1.106512968299712, + "grad_norm": 1.4133431513066128, + "learning_rate": 9.071288543233446e-07, + "loss": 0.377308189868927, + "step": 4800 + }, + { + "epoch": 1.1067435158501442, + "grad_norm": 1.605698503728756, + "learning_rate": 9.067493397108963e-07, + "loss": 0.43329721689224243, + "step": 4801 + }, + { + "epoch": 1.1069740634005765, + "grad_norm": 1.7362921748159275, + "learning_rate": 9.063698386467986e-07, + "loss": 0.4355202913284302, + "step": 4802 + }, + { + "epoch": 1.1072046109510087, + "grad_norm": 1.4551532252708075, + "learning_rate": 9.059903511861891e-07, + "loss": 0.46485209465026855, + "step": 4803 + }, + { + "epoch": 1.107435158501441, + "grad_norm": 1.3716674949679923, + "learning_rate": 9.056108773842039e-07, + "loss": 0.4985603094100952, + "step": 4804 + }, + { + "epoch": 1.1076657060518733, + "grad_norm": 1.5586874286041357, + "learning_rate": 9.052314172959763e-07, + "loss": 0.5287209749221802, + "step": 4805 + }, + { + "epoch": 1.1078962536023056, + "grad_norm": 1.680660088067333, + "learning_rate": 9.048519709766375e-07, + "loss": 0.5115060806274414, + "step": 4806 + }, + { + "epoch": 1.1081268011527377, + "grad_norm": 1.4253147157574297, + "learning_rate": 9.044725384813181e-07, + "loss": 0.5016480088233948, + "step": 4807 + }, + { + "epoch": 1.10835734870317, + "grad_norm": 1.623786045853983, + "learning_rate": 9.040931198651449e-07, + "loss": 0.4842113256454468, + "step": 4808 + }, + { + "epoch": 1.1085878962536022, + "grad_norm": 1.7318094433488047, + "learning_rate": 9.037137151832439e-07, + "loss": 0.5217285752296448, + "step": 4809 + }, + { + "epoch": 1.1088184438040345, + "grad_norm": 1.5956576132022964, + "learning_rate": 9.033343244907378e-07, + "loss": 0.5034878253936768, + "step": 4810 + }, + { + "epoch": 1.1090489913544668, + "grad_norm": 1.450218297985674, + "learning_rate": 9.029549478427492e-07, + "loss": 0.5122587084770203, + "step": 4811 + }, + { + "epoch": 1.109279538904899, + "grad_norm": 1.5471365323068564, + "learning_rate": 9.025755852943972e-07, + "loss": 0.503132164478302, + "step": 4812 + }, + { + "epoch": 1.1095100864553313, + "grad_norm": 1.3442228745978584, + "learning_rate": 9.021962369007989e-07, + "loss": 0.42048484086990356, + "step": 4813 + }, + { + "epoch": 1.1097406340057636, + "grad_norm": 1.4746279418354657, + "learning_rate": 9.018169027170701e-07, + "loss": 0.4284360706806183, + "step": 4814 + }, + { + "epoch": 1.109971181556196, + "grad_norm": 1.5301705140832524, + "learning_rate": 9.014375827983241e-07, + "loss": 0.5939760208129883, + "step": 4815 + }, + { + "epoch": 1.1102017291066282, + "grad_norm": 1.4889436613914528, + "learning_rate": 9.010582771996716e-07, + "loss": 0.4964909553527832, + "step": 4816 + }, + { + "epoch": 1.1104322766570605, + "grad_norm": 1.5842307031357417, + "learning_rate": 9.006789859762227e-07, + "loss": 0.4841457009315491, + "step": 4817 + }, + { + "epoch": 1.1106628242074927, + "grad_norm": 1.8305702695417665, + "learning_rate": 9.002997091830836e-07, + "loss": 0.5084402561187744, + "step": 4818 + }, + { + "epoch": 1.110893371757925, + "grad_norm": 1.4377869688891702, + "learning_rate": 8.999204468753594e-07, + "loss": 0.4945530295372009, + "step": 4819 + }, + { + "epoch": 1.1111239193083573, + "grad_norm": 1.424196476465804, + "learning_rate": 8.995411991081535e-07, + "loss": 0.45646482706069946, + "step": 4820 + }, + { + "epoch": 1.1113544668587896, + "grad_norm": 1.8000869076178088, + "learning_rate": 8.991619659365662e-07, + "loss": 0.4761883020401001, + "step": 4821 + }, + { + "epoch": 1.1115850144092219, + "grad_norm": 1.4244112912910287, + "learning_rate": 8.987827474156965e-07, + "loss": 0.37377166748046875, + "step": 4822 + }, + { + "epoch": 1.1118155619596541, + "grad_norm": 1.42162278725734, + "learning_rate": 8.984035436006402e-07, + "loss": 0.46969032287597656, + "step": 4823 + }, + { + "epoch": 1.1120461095100864, + "grad_norm": 1.4551702613987585, + "learning_rate": 8.980243545464923e-07, + "loss": 0.4871477484703064, + "step": 4824 + }, + { + "epoch": 1.1122766570605187, + "grad_norm": 1.3735159399298857, + "learning_rate": 8.976451803083452e-07, + "loss": 0.5354928970336914, + "step": 4825 + }, + { + "epoch": 1.112507204610951, + "grad_norm": 1.487364565462764, + "learning_rate": 8.972660209412879e-07, + "loss": 0.541530966758728, + "step": 4826 + }, + { + "epoch": 1.1127377521613833, + "grad_norm": 1.3765304830204101, + "learning_rate": 8.968868765004095e-07, + "loss": 0.5282139778137207, + "step": 4827 + }, + { + "epoch": 1.1129682997118155, + "grad_norm": 1.5188179873424779, + "learning_rate": 8.965077470407955e-07, + "loss": 0.43828681111335754, + "step": 4828 + }, + { + "epoch": 1.1131988472622478, + "grad_norm": 1.540567571058341, + "learning_rate": 8.961286326175287e-07, + "loss": 0.528573751449585, + "step": 4829 + }, + { + "epoch": 1.11342939481268, + "grad_norm": 2.012088539323808, + "learning_rate": 8.957495332856915e-07, + "loss": 0.5125828385353088, + "step": 4830 + }, + { + "epoch": 1.1136599423631124, + "grad_norm": 1.7582058630443533, + "learning_rate": 8.953704491003624e-07, + "loss": 0.659877359867096, + "step": 4831 + }, + { + "epoch": 1.1138904899135447, + "grad_norm": 1.6814587419722573, + "learning_rate": 8.949913801166183e-07, + "loss": 0.4152098298072815, + "step": 4832 + }, + { + "epoch": 1.114121037463977, + "grad_norm": 1.4999210133226772, + "learning_rate": 8.946123263895349e-07, + "loss": 0.4220502972602844, + "step": 4833 + }, + { + "epoch": 1.1143515850144092, + "grad_norm": 1.9244731687036327, + "learning_rate": 8.942332879741838e-07, + "loss": 0.5434603691101074, + "step": 4834 + }, + { + "epoch": 1.1145821325648415, + "grad_norm": 1.4746482954312572, + "learning_rate": 8.938542649256355e-07, + "loss": 0.5064136981964111, + "step": 4835 + }, + { + "epoch": 1.1148126801152738, + "grad_norm": 1.5136764860349439, + "learning_rate": 8.934752572989588e-07, + "loss": 0.4972747266292572, + "step": 4836 + }, + { + "epoch": 1.115043227665706, + "grad_norm": 1.5111229695700725, + "learning_rate": 8.930962651492188e-07, + "loss": 0.4222022294998169, + "step": 4837 + }, + { + "epoch": 1.1152737752161384, + "grad_norm": 1.66913696458909, + "learning_rate": 8.927172885314796e-07, + "loss": 0.4139519929885864, + "step": 4838 + }, + { + "epoch": 1.1155043227665706, + "grad_norm": 1.7770405386437609, + "learning_rate": 8.923383275008018e-07, + "loss": 0.6009548306465149, + "step": 4839 + }, + { + "epoch": 1.115734870317003, + "grad_norm": 1.4282707283397387, + "learning_rate": 8.919593821122455e-07, + "loss": 0.5030194520950317, + "step": 4840 + }, + { + "epoch": 1.1159654178674352, + "grad_norm": 1.3591409790995543, + "learning_rate": 8.915804524208669e-07, + "loss": 0.5004386901855469, + "step": 4841 + }, + { + "epoch": 1.1161959654178675, + "grad_norm": 1.7972328125538324, + "learning_rate": 8.912015384817206e-07, + "loss": 0.39989084005355835, + "step": 4842 + }, + { + "epoch": 1.1164265129682998, + "grad_norm": 1.6078188458230296, + "learning_rate": 8.90822640349859e-07, + "loss": 0.46599575877189636, + "step": 4843 + }, + { + "epoch": 1.116657060518732, + "grad_norm": 1.5910400636453796, + "learning_rate": 8.904437580803322e-07, + "loss": 0.48841774463653564, + "step": 4844 + }, + { + "epoch": 1.1168876080691643, + "grad_norm": 1.4794610309861063, + "learning_rate": 8.900648917281873e-07, + "loss": 0.4746759831905365, + "step": 4845 + }, + { + "epoch": 1.1171181556195966, + "grad_norm": 2.0991510052575095, + "learning_rate": 8.896860413484705e-07, + "loss": 0.5819174647331238, + "step": 4846 + }, + { + "epoch": 1.1173487031700289, + "grad_norm": 1.3802001580093008, + "learning_rate": 8.893072069962239e-07, + "loss": 0.4410976469516754, + "step": 4847 + }, + { + "epoch": 1.1175792507204612, + "grad_norm": 1.3344798381915968, + "learning_rate": 8.889283887264887e-07, + "loss": 0.43819811940193176, + "step": 4848 + }, + { + "epoch": 1.1178097982708934, + "grad_norm": 1.3486885144083764, + "learning_rate": 8.885495865943033e-07, + "loss": 0.43866032361984253, + "step": 4849 + }, + { + "epoch": 1.1180403458213257, + "grad_norm": 1.5267759422148517, + "learning_rate": 8.881708006547035e-07, + "loss": 0.44860726594924927, + "step": 4850 + }, + { + "epoch": 1.118270893371758, + "grad_norm": 1.4715352178336798, + "learning_rate": 8.877920309627228e-07, + "loss": 0.579569935798645, + "step": 4851 + }, + { + "epoch": 1.1185014409221903, + "grad_norm": 1.6155598521149455, + "learning_rate": 8.874132775733931e-07, + "loss": 0.41236403584480286, + "step": 4852 + }, + { + "epoch": 1.1187319884726226, + "grad_norm": 1.549256572240456, + "learning_rate": 8.870345405417428e-07, + "loss": 0.45414695143699646, + "step": 4853 + }, + { + "epoch": 1.1189625360230548, + "grad_norm": 1.6050881880917633, + "learning_rate": 8.866558199227988e-07, + "loss": 0.44578975439071655, + "step": 4854 + }, + { + "epoch": 1.1191930835734871, + "grad_norm": 1.4571011661113775, + "learning_rate": 8.862771157715846e-07, + "loss": 0.42466121912002563, + "step": 4855 + }, + { + "epoch": 1.1194236311239194, + "grad_norm": 1.6079203038736904, + "learning_rate": 8.858984281431228e-07, + "loss": 0.5185168981552124, + "step": 4856 + }, + { + "epoch": 1.1196541786743517, + "grad_norm": 1.7110922940542974, + "learning_rate": 8.855197570924324e-07, + "loss": 0.4561188817024231, + "step": 4857 + }, + { + "epoch": 1.1198847262247837, + "grad_norm": 1.5555744196993486, + "learning_rate": 8.851411026745302e-07, + "loss": 0.4159931540489197, + "step": 4858 + }, + { + "epoch": 1.120115273775216, + "grad_norm": 1.5248463429149375, + "learning_rate": 8.847624649444309e-07, + "loss": 0.5513845682144165, + "step": 4859 + }, + { + "epoch": 1.1203458213256483, + "grad_norm": 1.4270693773942695, + "learning_rate": 8.84383843957147e-07, + "loss": 0.4656720757484436, + "step": 4860 + }, + { + "epoch": 1.1205763688760806, + "grad_norm": 1.382817028392081, + "learning_rate": 8.840052397676873e-07, + "loss": 0.5290340781211853, + "step": 4861 + }, + { + "epoch": 1.1208069164265129, + "grad_norm": 1.727021963566853, + "learning_rate": 8.836266524310603e-07, + "loss": 0.4851052165031433, + "step": 4862 + }, + { + "epoch": 1.1210374639769451, + "grad_norm": 1.435439662651569, + "learning_rate": 8.832480820022696e-07, + "loss": 0.45340481400489807, + "step": 4863 + }, + { + "epoch": 1.1212680115273774, + "grad_norm": 1.7016168632340074, + "learning_rate": 8.828695285363179e-07, + "loss": 0.5824764966964722, + "step": 4864 + }, + { + "epoch": 1.1214985590778097, + "grad_norm": 1.4809494332211275, + "learning_rate": 8.824909920882056e-07, + "loss": 0.4200345277786255, + "step": 4865 + }, + { + "epoch": 1.121729106628242, + "grad_norm": 1.7782815491554205, + "learning_rate": 8.821124727129297e-07, + "loss": 0.4244277775287628, + "step": 4866 + }, + { + "epoch": 1.1219596541786743, + "grad_norm": 1.3618326629672723, + "learning_rate": 8.817339704654852e-07, + "loss": 0.43537044525146484, + "step": 4867 + }, + { + "epoch": 1.1221902017291066, + "grad_norm": 1.8260743774909134, + "learning_rate": 8.813554854008641e-07, + "loss": 0.42428910732269287, + "step": 4868 + }, + { + "epoch": 1.1224207492795388, + "grad_norm": 1.5288566423130419, + "learning_rate": 8.80977017574057e-07, + "loss": 0.47865474224090576, + "step": 4869 + }, + { + "epoch": 1.1226512968299711, + "grad_norm": 1.5444061070950739, + "learning_rate": 8.805985670400513e-07, + "loss": 0.4466247856616974, + "step": 4870 + }, + { + "epoch": 1.1228818443804034, + "grad_norm": 1.8287166421574825, + "learning_rate": 8.802201338538312e-07, + "loss": 0.3930908441543579, + "step": 4871 + }, + { + "epoch": 1.1231123919308357, + "grad_norm": 1.8199781856573405, + "learning_rate": 8.798417180703799e-07, + "loss": 0.4518371522426605, + "step": 4872 + }, + { + "epoch": 1.123342939481268, + "grad_norm": 1.70058941138662, + "learning_rate": 8.79463319744677e-07, + "loss": 0.5394526720046997, + "step": 4873 + }, + { + "epoch": 1.1235734870317002, + "grad_norm": 1.575874203584041, + "learning_rate": 8.790849389316997e-07, + "loss": 0.4246562719345093, + "step": 4874 + }, + { + "epoch": 1.1238040345821325, + "grad_norm": 1.9575281438943082, + "learning_rate": 8.787065756864232e-07, + "loss": 0.5607410669326782, + "step": 4875 + }, + { + "epoch": 1.1240345821325648, + "grad_norm": 1.3749123465435562, + "learning_rate": 8.783282300638192e-07, + "loss": 0.38254958391189575, + "step": 4876 + }, + { + "epoch": 1.124265129682997, + "grad_norm": 1.3613826546915397, + "learning_rate": 8.779499021188574e-07, + "loss": 0.44105666875839233, + "step": 4877 + }, + { + "epoch": 1.1244956772334294, + "grad_norm": 1.5631401572974992, + "learning_rate": 8.775715919065056e-07, + "loss": 0.37160882353782654, + "step": 4878 + }, + { + "epoch": 1.1247262247838616, + "grad_norm": 1.7025997410821614, + "learning_rate": 8.771932994817278e-07, + "loss": 0.49676722288131714, + "step": 4879 + }, + { + "epoch": 1.124956772334294, + "grad_norm": 1.6298726513756532, + "learning_rate": 8.768150248994858e-07, + "loss": 0.5305773019790649, + "step": 4880 + }, + { + "epoch": 1.1251873198847262, + "grad_norm": 1.2917934278832566, + "learning_rate": 8.764367682147395e-07, + "loss": 0.36669573187828064, + "step": 4881 + }, + { + "epoch": 1.1254178674351585, + "grad_norm": 1.5501344216889468, + "learning_rate": 8.760585294824454e-07, + "loss": 0.4404510259628296, + "step": 4882 + }, + { + "epoch": 1.1256484149855908, + "grad_norm": 1.390626461072737, + "learning_rate": 8.756803087575578e-07, + "loss": 0.4724805951118469, + "step": 4883 + }, + { + "epoch": 1.125878962536023, + "grad_norm": 1.548592980840675, + "learning_rate": 8.753021060950274e-07, + "loss": 0.45248547196388245, + "step": 4884 + }, + { + "epoch": 1.1261095100864553, + "grad_norm": 1.3484839335073744, + "learning_rate": 8.749239215498043e-07, + "loss": 0.4234843850135803, + "step": 4885 + }, + { + "epoch": 1.1263400576368876, + "grad_norm": 1.8625422172760093, + "learning_rate": 8.745457551768342e-07, + "loss": 0.49027156829833984, + "step": 4886 + }, + { + "epoch": 1.1265706051873199, + "grad_norm": 1.5067827985484183, + "learning_rate": 8.741676070310605e-07, + "loss": 0.6061224341392517, + "step": 4887 + }, + { + "epoch": 1.1268011527377522, + "grad_norm": 1.5336136504090925, + "learning_rate": 8.737894771674248e-07, + "loss": 0.43946483731269836, + "step": 4888 + }, + { + "epoch": 1.1270317002881844, + "grad_norm": 1.480981892396503, + "learning_rate": 8.734113656408651e-07, + "loss": 0.5716358423233032, + "step": 4889 + }, + { + "epoch": 1.1272622478386167, + "grad_norm": 1.7389086828401863, + "learning_rate": 8.730332725063168e-07, + "loss": 0.5096737146377563, + "step": 4890 + }, + { + "epoch": 1.127492795389049, + "grad_norm": 1.6664009863971434, + "learning_rate": 8.726551978187138e-07, + "loss": 0.38695579767227173, + "step": 4891 + }, + { + "epoch": 1.1277233429394813, + "grad_norm": 1.8381615508922664, + "learning_rate": 8.722771416329854e-07, + "loss": 0.5407450795173645, + "step": 4892 + }, + { + "epoch": 1.1279538904899136, + "grad_norm": 1.9914901771925757, + "learning_rate": 8.718991040040594e-07, + "loss": 0.5152771472930908, + "step": 4893 + }, + { + "epoch": 1.1281844380403458, + "grad_norm": 1.5958383708352593, + "learning_rate": 8.715210849868615e-07, + "loss": 0.5410465598106384, + "step": 4894 + }, + { + "epoch": 1.1284149855907781, + "grad_norm": 1.337702225927413, + "learning_rate": 8.711430846363132e-07, + "loss": 0.4508117437362671, + "step": 4895 + }, + { + "epoch": 1.1286455331412104, + "grad_norm": 1.6777628929807271, + "learning_rate": 8.70765103007334e-07, + "loss": 0.5080430507659912, + "step": 4896 + }, + { + "epoch": 1.1288760806916427, + "grad_norm": 1.7083293725933877, + "learning_rate": 8.703871401548415e-07, + "loss": 0.470861554145813, + "step": 4897 + }, + { + "epoch": 1.129106628242075, + "grad_norm": 1.872662754797883, + "learning_rate": 8.700091961337486e-07, + "loss": 0.6018689274787903, + "step": 4898 + }, + { + "epoch": 1.1293371757925073, + "grad_norm": 1.5500246582401473, + "learning_rate": 8.696312709989677e-07, + "loss": 0.5060360431671143, + "step": 4899 + }, + { + "epoch": 1.1295677233429395, + "grad_norm": 1.4588678421625432, + "learning_rate": 8.692533648054067e-07, + "loss": 0.45208120346069336, + "step": 4900 + }, + { + "epoch": 1.1297982708933718, + "grad_norm": 1.5825168488447021, + "learning_rate": 8.688754776079714e-07, + "loss": 0.536127507686615, + "step": 4901 + }, + { + "epoch": 1.130028818443804, + "grad_norm": 1.5789528377651103, + "learning_rate": 8.684976094615657e-07, + "loss": 0.4760720133781433, + "step": 4902 + }, + { + "epoch": 1.1302593659942364, + "grad_norm": 1.5428248897097812, + "learning_rate": 8.68119760421089e-07, + "loss": 0.5124382972717285, + "step": 4903 + }, + { + "epoch": 1.1304899135446687, + "grad_norm": 1.3522374572244407, + "learning_rate": 8.67741930541439e-07, + "loss": 0.49435877799987793, + "step": 4904 + }, + { + "epoch": 1.130720461095101, + "grad_norm": 1.5469811060879746, + "learning_rate": 8.673641198775111e-07, + "loss": 0.48838311433792114, + "step": 4905 + }, + { + "epoch": 1.1309510086455332, + "grad_norm": 1.3093772355610735, + "learning_rate": 8.669863284841966e-07, + "loss": 0.47754883766174316, + "step": 4906 + }, + { + "epoch": 1.1311815561959655, + "grad_norm": 1.657432473083644, + "learning_rate": 8.666085564163851e-07, + "loss": 0.49613162875175476, + "step": 4907 + }, + { + "epoch": 1.1314121037463978, + "grad_norm": 1.4477928132979816, + "learning_rate": 8.662308037289622e-07, + "loss": 0.4798436462879181, + "step": 4908 + }, + { + "epoch": 1.13164265129683, + "grad_norm": 1.7066266033391562, + "learning_rate": 8.658530704768121e-07, + "loss": 0.4057399034500122, + "step": 4909 + }, + { + "epoch": 1.1318731988472623, + "grad_norm": 1.7346495783036942, + "learning_rate": 8.654753567148157e-07, + "loss": 0.5270807147026062, + "step": 4910 + }, + { + "epoch": 1.1321037463976946, + "grad_norm": 1.4512953437479286, + "learning_rate": 8.650976624978502e-07, + "loss": 0.5122570991516113, + "step": 4911 + }, + { + "epoch": 1.132334293948127, + "grad_norm": 1.7172370990813912, + "learning_rate": 8.647199878807912e-07, + "loss": 0.5793176889419556, + "step": 4912 + }, + { + "epoch": 1.1325648414985592, + "grad_norm": 1.4271719139773258, + "learning_rate": 8.643423329185104e-07, + "loss": 0.48190778493881226, + "step": 4913 + }, + { + "epoch": 1.1327953890489915, + "grad_norm": 1.606113566768016, + "learning_rate": 8.639646976658774e-07, + "loss": 0.4640873074531555, + "step": 4914 + }, + { + "epoch": 1.1330259365994237, + "grad_norm": 1.6263125990821405, + "learning_rate": 8.635870821777591e-07, + "loss": 0.5187903642654419, + "step": 4915 + }, + { + "epoch": 1.133256484149856, + "grad_norm": 1.414474025707106, + "learning_rate": 8.632094865090184e-07, + "loss": 0.5168712735176086, + "step": 4916 + }, + { + "epoch": 1.1334870317002883, + "grad_norm": 1.7263194018807568, + "learning_rate": 8.628319107145161e-07, + "loss": 0.4974696636199951, + "step": 4917 + }, + { + "epoch": 1.1337175792507204, + "grad_norm": 1.609612326681278, + "learning_rate": 8.624543548491105e-07, + "loss": 0.5246702432632446, + "step": 4918 + }, + { + "epoch": 1.1339481268011526, + "grad_norm": 1.2630050489883073, + "learning_rate": 8.620768189676564e-07, + "loss": 0.4184077978134155, + "step": 4919 + }, + { + "epoch": 1.134178674351585, + "grad_norm": 1.5960723155174386, + "learning_rate": 8.616993031250057e-07, + "loss": 0.5239197015762329, + "step": 4920 + }, + { + "epoch": 1.1344092219020172, + "grad_norm": 1.712138394358373, + "learning_rate": 8.613218073760073e-07, + "loss": 0.5908505916595459, + "step": 4921 + }, + { + "epoch": 1.1346397694524495, + "grad_norm": 1.9712676240506681, + "learning_rate": 8.609443317755077e-07, + "loss": 0.4874504506587982, + "step": 4922 + }, + { + "epoch": 1.1348703170028818, + "grad_norm": 1.5709977593377218, + "learning_rate": 8.605668763783503e-07, + "loss": 0.44740432500839233, + "step": 4923 + }, + { + "epoch": 1.135100864553314, + "grad_norm": 1.9603291324895793, + "learning_rate": 8.60189441239375e-07, + "loss": 0.3754269480705261, + "step": 4924 + }, + { + "epoch": 1.1353314121037463, + "grad_norm": 1.505221231169863, + "learning_rate": 8.598120264134195e-07, + "loss": 0.40734755992889404, + "step": 4925 + }, + { + "epoch": 1.1355619596541786, + "grad_norm": 1.5628975410629704, + "learning_rate": 8.594346319553186e-07, + "loss": 0.5404030680656433, + "step": 4926 + }, + { + "epoch": 1.135792507204611, + "grad_norm": 1.7008702550350545, + "learning_rate": 8.590572579199029e-07, + "loss": 0.3678287863731384, + "step": 4927 + }, + { + "epoch": 1.1360230547550432, + "grad_norm": 1.6117451920140595, + "learning_rate": 8.586799043620019e-07, + "loss": 0.562045693397522, + "step": 4928 + }, + { + "epoch": 1.1362536023054755, + "grad_norm": 1.298080197408525, + "learning_rate": 8.583025713364404e-07, + "loss": 0.46270644664764404, + "step": 4929 + }, + { + "epoch": 1.1364841498559077, + "grad_norm": 1.4129593772006035, + "learning_rate": 8.57925258898041e-07, + "loss": 0.4325964152812958, + "step": 4930 + }, + { + "epoch": 1.13671469740634, + "grad_norm": 1.6019125673177061, + "learning_rate": 8.57547967101624e-07, + "loss": 0.4210170805454254, + "step": 4931 + }, + { + "epoch": 1.1369452449567723, + "grad_norm": 1.5728507205557822, + "learning_rate": 8.571706960020053e-07, + "loss": 0.4151025414466858, + "step": 4932 + }, + { + "epoch": 1.1371757925072046, + "grad_norm": 1.6118551503277867, + "learning_rate": 8.567934456539983e-07, + "loss": 0.4276087284088135, + "step": 4933 + }, + { + "epoch": 1.1374063400576369, + "grad_norm": 1.5052919595806051, + "learning_rate": 8.564162161124144e-07, + "loss": 0.41048938035964966, + "step": 4934 + }, + { + "epoch": 1.1376368876080691, + "grad_norm": 1.571746912251262, + "learning_rate": 8.560390074320605e-07, + "loss": 0.5300489068031311, + "step": 4935 + }, + { + "epoch": 1.1378674351585014, + "grad_norm": 1.5179733531616169, + "learning_rate": 8.556618196677413e-07, + "loss": 0.5026379823684692, + "step": 4936 + }, + { + "epoch": 1.1380979827089337, + "grad_norm": 1.5055765080542705, + "learning_rate": 8.552846528742579e-07, + "loss": 0.3983602821826935, + "step": 4937 + }, + { + "epoch": 1.138328530259366, + "grad_norm": 1.55955976181173, + "learning_rate": 8.549075071064091e-07, + "loss": 0.5190225839614868, + "step": 4938 + }, + { + "epoch": 1.1385590778097983, + "grad_norm": 1.5517589840709185, + "learning_rate": 8.545303824189904e-07, + "loss": 0.49603796005249023, + "step": 4939 + }, + { + "epoch": 1.1387896253602305, + "grad_norm": 1.5762221673293377, + "learning_rate": 8.541532788667933e-07, + "loss": 0.47140175104141235, + "step": 4940 + }, + { + "epoch": 1.1390201729106628, + "grad_norm": 1.796577364081372, + "learning_rate": 8.537761965046079e-07, + "loss": 0.5093640685081482, + "step": 4941 + }, + { + "epoch": 1.139250720461095, + "grad_norm": 1.61771614386877, + "learning_rate": 8.533991353872203e-07, + "loss": 0.44927600026130676, + "step": 4942 + }, + { + "epoch": 1.1394812680115274, + "grad_norm": 1.3040634418575354, + "learning_rate": 8.530220955694127e-07, + "loss": 0.43341124057769775, + "step": 4943 + }, + { + "epoch": 1.1397118155619597, + "grad_norm": 1.4304151716973417, + "learning_rate": 8.526450771059661e-07, + "loss": 0.47579699754714966, + "step": 4944 + }, + { + "epoch": 1.139942363112392, + "grad_norm": 1.7691519185450943, + "learning_rate": 8.522680800516566e-07, + "loss": 0.42832237482070923, + "step": 4945 + }, + { + "epoch": 1.1401729106628242, + "grad_norm": 1.7533376996528798, + "learning_rate": 8.518911044612582e-07, + "loss": 0.4995993375778198, + "step": 4946 + }, + { + "epoch": 1.1404034582132565, + "grad_norm": 1.6528588102849888, + "learning_rate": 8.51514150389542e-07, + "loss": 0.4735615849494934, + "step": 4947 + }, + { + "epoch": 1.1406340057636888, + "grad_norm": 1.414172066097951, + "learning_rate": 8.511372178912746e-07, + "loss": 0.5336610078811646, + "step": 4948 + }, + { + "epoch": 1.140864553314121, + "grad_norm": 1.4570893187309275, + "learning_rate": 8.507603070212209e-07, + "loss": 0.4889930784702301, + "step": 4949 + }, + { + "epoch": 1.1410951008645533, + "grad_norm": 1.4948561610167277, + "learning_rate": 8.503834178341425e-07, + "loss": 0.4650326073169708, + "step": 4950 + }, + { + "epoch": 1.1413256484149856, + "grad_norm": 1.7146295859985705, + "learning_rate": 8.500065503847967e-07, + "loss": 0.422588586807251, + "step": 4951 + }, + { + "epoch": 1.141556195965418, + "grad_norm": 1.5443817175032366, + "learning_rate": 8.496297047279391e-07, + "loss": 0.391678124666214, + "step": 4952 + }, + { + "epoch": 1.1417867435158502, + "grad_norm": 1.6000480430652493, + "learning_rate": 8.492528809183208e-07, + "loss": 0.4672621488571167, + "step": 4953 + }, + { + "epoch": 1.1420172910662825, + "grad_norm": 1.3846533790350994, + "learning_rate": 8.488760790106907e-07, + "loss": 0.44236963987350464, + "step": 4954 + }, + { + "epoch": 1.1422478386167148, + "grad_norm": 1.872328058335658, + "learning_rate": 8.484992990597946e-07, + "loss": 0.4638679325580597, + "step": 4955 + }, + { + "epoch": 1.142478386167147, + "grad_norm": 1.4933075788494417, + "learning_rate": 8.481225411203738e-07, + "loss": 0.39137962460517883, + "step": 4956 + }, + { + "epoch": 1.1427089337175793, + "grad_norm": 1.478246135116108, + "learning_rate": 8.477458052471682e-07, + "loss": 0.4590994417667389, + "step": 4957 + }, + { + "epoch": 1.1429394812680116, + "grad_norm": 1.7349053396994705, + "learning_rate": 8.473690914949131e-07, + "loss": 0.4529002606868744, + "step": 4958 + }, + { + "epoch": 1.1431700288184439, + "grad_norm": 1.5961832007652752, + "learning_rate": 8.46992399918341e-07, + "loss": 0.4857093095779419, + "step": 4959 + }, + { + "epoch": 1.1434005763688762, + "grad_norm": 1.7309392540399018, + "learning_rate": 8.466157305721819e-07, + "loss": 0.5138260722160339, + "step": 4960 + }, + { + "epoch": 1.1436311239193084, + "grad_norm": 1.395111083728356, + "learning_rate": 8.462390835111612e-07, + "loss": 0.42551133036613464, + "step": 4961 + }, + { + "epoch": 1.1438616714697407, + "grad_norm": 1.2166875183733967, + "learning_rate": 8.458624587900021e-07, + "loss": 0.4757024049758911, + "step": 4962 + }, + { + "epoch": 1.144092219020173, + "grad_norm": 1.6010369718376882, + "learning_rate": 8.454858564634247e-07, + "loss": 0.45634210109710693, + "step": 4963 + }, + { + "epoch": 1.1443227665706053, + "grad_norm": 1.594728695980143, + "learning_rate": 8.451092765861446e-07, + "loss": 0.43474477529525757, + "step": 4964 + }, + { + "epoch": 1.1445533141210376, + "grad_norm": 1.569588067868878, + "learning_rate": 8.447327192128756e-07, + "loss": 0.3684108555316925, + "step": 4965 + }, + { + "epoch": 1.1447838616714696, + "grad_norm": 1.544743293825418, + "learning_rate": 8.443561843983269e-07, + "loss": 0.4448085427284241, + "step": 4966 + }, + { + "epoch": 1.145014409221902, + "grad_norm": 1.5575083692045397, + "learning_rate": 8.439796721972056e-07, + "loss": 0.5501620173454285, + "step": 4967 + }, + { + "epoch": 1.1452449567723342, + "grad_norm": 1.496852309992749, + "learning_rate": 8.436031826642151e-07, + "loss": 0.46348631381988525, + "step": 4968 + }, + { + "epoch": 1.1454755043227665, + "grad_norm": 1.4230652325158895, + "learning_rate": 8.432267158540549e-07, + "loss": 0.4619203209877014, + "step": 4969 + }, + { + "epoch": 1.1457060518731987, + "grad_norm": 1.5562637049741876, + "learning_rate": 8.428502718214222e-07, + "loss": 0.376537024974823, + "step": 4970 + }, + { + "epoch": 1.145936599423631, + "grad_norm": 1.2665150891924384, + "learning_rate": 8.424738506210103e-07, + "loss": 0.528576135635376, + "step": 4971 + }, + { + "epoch": 1.1461671469740633, + "grad_norm": 1.4114442104230585, + "learning_rate": 8.420974523075089e-07, + "loss": 0.41852709650993347, + "step": 4972 + }, + { + "epoch": 1.1463976945244956, + "grad_norm": 1.4444856174172405, + "learning_rate": 8.417210769356053e-07, + "loss": 0.4927000403404236, + "step": 4973 + }, + { + "epoch": 1.1466282420749279, + "grad_norm": 1.5086189669079528, + "learning_rate": 8.413447245599827e-07, + "loss": 0.43919095396995544, + "step": 4974 + }, + { + "epoch": 1.1468587896253601, + "grad_norm": 1.5829083232751864, + "learning_rate": 8.409683952353208e-07, + "loss": 0.5224364995956421, + "step": 4975 + }, + { + "epoch": 1.1470893371757924, + "grad_norm": 1.2855645239828792, + "learning_rate": 8.405920890162972e-07, + "loss": 0.47760850191116333, + "step": 4976 + }, + { + "epoch": 1.1473198847262247, + "grad_norm": 1.7882126310834827, + "learning_rate": 8.402158059575845e-07, + "loss": 0.4845304489135742, + "step": 4977 + }, + { + "epoch": 1.147550432276657, + "grad_norm": 1.5850593696336175, + "learning_rate": 8.398395461138527e-07, + "loss": 0.523693859577179, + "step": 4978 + }, + { + "epoch": 1.1477809798270893, + "grad_norm": 1.4071229583070886, + "learning_rate": 8.394633095397693e-07, + "loss": 0.44773513078689575, + "step": 4979 + }, + { + "epoch": 1.1480115273775215, + "grad_norm": 1.9493074463498687, + "learning_rate": 8.390870962899967e-07, + "loss": 0.5348447561264038, + "step": 4980 + }, + { + "epoch": 1.1482420749279538, + "grad_norm": 1.582930450446158, + "learning_rate": 8.387109064191954e-07, + "loss": 0.4859353303909302, + "step": 4981 + }, + { + "epoch": 1.148472622478386, + "grad_norm": 1.8753855861674091, + "learning_rate": 8.38334739982021e-07, + "loss": 0.4306800663471222, + "step": 4982 + }, + { + "epoch": 1.1487031700288184, + "grad_norm": 1.3609318032284317, + "learning_rate": 8.379585970331274e-07, + "loss": 0.4003479480743408, + "step": 4983 + }, + { + "epoch": 1.1489337175792507, + "grad_norm": 2.1221671774243407, + "learning_rate": 8.37582477627164e-07, + "loss": 0.6015596985816956, + "step": 4984 + }, + { + "epoch": 1.149164265129683, + "grad_norm": 1.6080254217986074, + "learning_rate": 8.372063818187767e-07, + "loss": 0.5425978302955627, + "step": 4985 + }, + { + "epoch": 1.1493948126801152, + "grad_norm": 1.6118240887855368, + "learning_rate": 8.368303096626089e-07, + "loss": 0.462574303150177, + "step": 4986 + }, + { + "epoch": 1.1496253602305475, + "grad_norm": 1.5465454371671152, + "learning_rate": 8.364542612132999e-07, + "loss": 0.4790104627609253, + "step": 4987 + }, + { + "epoch": 1.1498559077809798, + "grad_norm": 1.6307505419711474, + "learning_rate": 8.360782365254849e-07, + "loss": 0.4083213210105896, + "step": 4988 + }, + { + "epoch": 1.150086455331412, + "grad_norm": 1.6480494744817742, + "learning_rate": 8.357022356537974e-07, + "loss": 0.4527336359024048, + "step": 4989 + }, + { + "epoch": 1.1503170028818444, + "grad_norm": 1.5469171582750165, + "learning_rate": 8.353262586528655e-07, + "loss": 0.45044830441474915, + "step": 4990 + }, + { + "epoch": 1.1505475504322766, + "grad_norm": 1.4972630884076448, + "learning_rate": 8.349503055773152e-07, + "loss": 0.4949982464313507, + "step": 4991 + }, + { + "epoch": 1.150778097982709, + "grad_norm": 1.4696355307323234, + "learning_rate": 8.345743764817688e-07, + "loss": 0.5219828486442566, + "step": 4992 + }, + { + "epoch": 1.1510086455331412, + "grad_norm": 1.4698594015955475, + "learning_rate": 8.341984714208445e-07, + "loss": 0.47528931498527527, + "step": 4993 + }, + { + "epoch": 1.1512391930835735, + "grad_norm": 1.419305533417606, + "learning_rate": 8.338225904491572e-07, + "loss": 0.3760669231414795, + "step": 4994 + }, + { + "epoch": 1.1514697406340058, + "grad_norm": 1.7202794759708913, + "learning_rate": 8.334467336213192e-07, + "loss": 0.47153982520103455, + "step": 4995 + }, + { + "epoch": 1.151700288184438, + "grad_norm": 1.6502148368463363, + "learning_rate": 8.330709009919379e-07, + "loss": 0.448361337184906, + "step": 4996 + }, + { + "epoch": 1.1519308357348703, + "grad_norm": 1.6574762273201675, + "learning_rate": 8.326950926156185e-07, + "loss": 0.4243425130844116, + "step": 4997 + }, + { + "epoch": 1.1521613832853026, + "grad_norm": 1.619484058823572, + "learning_rate": 8.323193085469613e-07, + "loss": 0.46340247988700867, + "step": 4998 + }, + { + "epoch": 1.1523919308357349, + "grad_norm": 1.5996286621587357, + "learning_rate": 8.319435488405644e-07, + "loss": 0.4195745587348938, + "step": 4999 + }, + { + "epoch": 1.1526224783861672, + "grad_norm": 1.5213884038871759, + "learning_rate": 8.315678135510218e-07, + "loss": 0.5506634712219238, + "step": 5000 + }, + { + "epoch": 1.1528530259365994, + "grad_norm": 1.3389955091698362, + "learning_rate": 8.311921027329231e-07, + "loss": 0.43178266286849976, + "step": 5001 + }, + { + "epoch": 1.1530835734870317, + "grad_norm": 1.483870001679962, + "learning_rate": 8.308164164408565e-07, + "loss": 0.43827325105667114, + "step": 5002 + }, + { + "epoch": 1.153314121037464, + "grad_norm": 1.3812119919237116, + "learning_rate": 8.304407547294044e-07, + "loss": 0.47002434730529785, + "step": 5003 + }, + { + "epoch": 1.1535446685878963, + "grad_norm": 1.6557647695376148, + "learning_rate": 8.300651176531464e-07, + "loss": 0.4747048616409302, + "step": 5004 + }, + { + "epoch": 1.1537752161383286, + "grad_norm": 1.5487662173335255, + "learning_rate": 8.296895052666594e-07, + "loss": 0.44455668330192566, + "step": 5005 + }, + { + "epoch": 1.1540057636887608, + "grad_norm": 1.5585235356930989, + "learning_rate": 8.293139176245155e-07, + "loss": 0.546316385269165, + "step": 5006 + }, + { + "epoch": 1.1542363112391931, + "grad_norm": 1.675135125637205, + "learning_rate": 8.289383547812835e-07, + "loss": 0.4774520993232727, + "step": 5007 + }, + { + "epoch": 1.1544668587896254, + "grad_norm": 1.3977257653778448, + "learning_rate": 8.285628167915295e-07, + "loss": 0.4609676003456116, + "step": 5008 + }, + { + "epoch": 1.1546974063400577, + "grad_norm": 1.5997296940439065, + "learning_rate": 8.281873037098145e-07, + "loss": 0.5442596673965454, + "step": 5009 + }, + { + "epoch": 1.15492795389049, + "grad_norm": 1.433364845269251, + "learning_rate": 8.278118155906973e-07, + "loss": 0.43525272607803345, + "step": 5010 + }, + { + "epoch": 1.1551585014409222, + "grad_norm": 1.8110200812495216, + "learning_rate": 8.274363524887314e-07, + "loss": 0.47077393531799316, + "step": 5011 + }, + { + "epoch": 1.1553890489913545, + "grad_norm": 1.666313225347892, + "learning_rate": 8.270609144584687e-07, + "loss": 0.4974507689476013, + "step": 5012 + }, + { + "epoch": 1.1556195965417868, + "grad_norm": 1.7757066050265735, + "learning_rate": 8.266855015544563e-07, + "loss": 0.4794807732105255, + "step": 5013 + }, + { + "epoch": 1.155850144092219, + "grad_norm": 1.5898294982164218, + "learning_rate": 8.26310113831237e-07, + "loss": 0.528843879699707, + "step": 5014 + }, + { + "epoch": 1.1560806916426514, + "grad_norm": 1.521661516174903, + "learning_rate": 8.259347513433516e-07, + "loss": 0.49961280822753906, + "step": 5015 + }, + { + "epoch": 1.1563112391930837, + "grad_norm": 1.5573702934409057, + "learning_rate": 8.255594141453364e-07, + "loss": 0.46634775400161743, + "step": 5016 + }, + { + "epoch": 1.156541786743516, + "grad_norm": 1.3785535507550584, + "learning_rate": 8.251841022917233e-07, + "loss": 0.5132392048835754, + "step": 5017 + }, + { + "epoch": 1.1567723342939482, + "grad_norm": 1.503101964610376, + "learning_rate": 8.248088158370419e-07, + "loss": 0.46664172410964966, + "step": 5018 + }, + { + "epoch": 1.1570028818443805, + "grad_norm": 2.223349806199342, + "learning_rate": 8.244335548358165e-07, + "loss": 0.5046276450157166, + "step": 5019 + }, + { + "epoch": 1.1572334293948128, + "grad_norm": 1.4765801454732264, + "learning_rate": 8.240583193425694e-07, + "loss": 0.4508659839630127, + "step": 5020 + }, + { + "epoch": 1.157463976945245, + "grad_norm": 1.6522378690504231, + "learning_rate": 8.236831094118186e-07, + "loss": 0.4357062876224518, + "step": 5021 + }, + { + "epoch": 1.1576945244956773, + "grad_norm": 1.4792784189312642, + "learning_rate": 8.233079250980773e-07, + "loss": 0.5156815052032471, + "step": 5022 + }, + { + "epoch": 1.1579250720461096, + "grad_norm": 1.7175978777418233, + "learning_rate": 8.229327664558566e-07, + "loss": 0.5102704763412476, + "step": 5023 + }, + { + "epoch": 1.158155619596542, + "grad_norm": 1.4500341849668028, + "learning_rate": 8.225576335396631e-07, + "loss": 0.5001027584075928, + "step": 5024 + }, + { + "epoch": 1.1583861671469742, + "grad_norm": 1.615768339974115, + "learning_rate": 8.221825264039992e-07, + "loss": 0.5214229226112366, + "step": 5025 + }, + { + "epoch": 1.1586167146974065, + "grad_norm": 1.596713767399932, + "learning_rate": 8.218074451033648e-07, + "loss": 0.4978953003883362, + "step": 5026 + }, + { + "epoch": 1.1588472622478387, + "grad_norm": 1.72655777253444, + "learning_rate": 8.214323896922548e-07, + "loss": 0.40742921829223633, + "step": 5027 + }, + { + "epoch": 1.1590778097982708, + "grad_norm": 1.4098774572275268, + "learning_rate": 8.210573602251607e-07, + "loss": 0.500441312789917, + "step": 5028 + }, + { + "epoch": 1.159308357348703, + "grad_norm": 1.6555011856350113, + "learning_rate": 8.206823567565711e-07, + "loss": 0.4656379222869873, + "step": 5029 + }, + { + "epoch": 1.1595389048991354, + "grad_norm": 1.4004525989876406, + "learning_rate": 8.203073793409694e-07, + "loss": 0.408259779214859, + "step": 5030 + }, + { + "epoch": 1.1597694524495676, + "grad_norm": 1.5906053969397733, + "learning_rate": 8.19932428032836e-07, + "loss": 0.4703931212425232, + "step": 5031 + }, + { + "epoch": 1.16, + "grad_norm": 1.5767341376014499, + "learning_rate": 8.195575028866479e-07, + "loss": 0.449575275182724, + "step": 5032 + }, + { + "epoch": 1.1602305475504322, + "grad_norm": 1.6523550817121402, + "learning_rate": 8.191826039568776e-07, + "loss": 0.46949630975723267, + "step": 5033 + }, + { + "epoch": 1.1604610951008645, + "grad_norm": 1.4441557842189188, + "learning_rate": 8.18807731297994e-07, + "loss": 0.5106115937232971, + "step": 5034 + }, + { + "epoch": 1.1606916426512968, + "grad_norm": 1.4135101448635017, + "learning_rate": 8.184328849644616e-07, + "loss": 0.4903862178325653, + "step": 5035 + }, + { + "epoch": 1.160922190201729, + "grad_norm": 1.6994208159537325, + "learning_rate": 8.180580650107425e-07, + "loss": 0.4848126769065857, + "step": 5036 + }, + { + "epoch": 1.1611527377521613, + "grad_norm": 1.9267658147173174, + "learning_rate": 8.176832714912942e-07, + "loss": 0.5161526203155518, + "step": 5037 + }, + { + "epoch": 1.1613832853025936, + "grad_norm": 1.4045260428359359, + "learning_rate": 8.173085044605693e-07, + "loss": 0.444364070892334, + "step": 5038 + }, + { + "epoch": 1.1616138328530259, + "grad_norm": 1.4741868944919188, + "learning_rate": 8.169337639730184e-07, + "loss": 0.40291786193847656, + "step": 5039 + }, + { + "epoch": 1.1618443804034582, + "grad_norm": 1.5142762668473808, + "learning_rate": 8.165590500830876e-07, + "loss": 0.43826034665107727, + "step": 5040 + }, + { + "epoch": 1.1620749279538904, + "grad_norm": 1.6982487740088041, + "learning_rate": 8.161843628452181e-07, + "loss": 0.5153712034225464, + "step": 5041 + }, + { + "epoch": 1.1623054755043227, + "grad_norm": 1.6774861326863326, + "learning_rate": 8.158097023138488e-07, + "loss": 0.43770891427993774, + "step": 5042 + }, + { + "epoch": 1.162536023054755, + "grad_norm": 1.3656492090603147, + "learning_rate": 8.154350685434135e-07, + "loss": 0.4041779041290283, + "step": 5043 + }, + { + "epoch": 1.1627665706051873, + "grad_norm": 1.6864751806757157, + "learning_rate": 8.150604615883425e-07, + "loss": 0.5335817337036133, + "step": 5044 + }, + { + "epoch": 1.1629971181556196, + "grad_norm": 1.8248598871276303, + "learning_rate": 8.14685881503063e-07, + "loss": 0.5161072015762329, + "step": 5045 + }, + { + "epoch": 1.1632276657060518, + "grad_norm": 1.5273246721191258, + "learning_rate": 8.143113283419968e-07, + "loss": 0.39934635162353516, + "step": 5046 + }, + { + "epoch": 1.1634582132564841, + "grad_norm": 1.5055663366064564, + "learning_rate": 8.139368021595633e-07, + "loss": 0.4325847029685974, + "step": 5047 + }, + { + "epoch": 1.1636887608069164, + "grad_norm": 1.5488180260968318, + "learning_rate": 8.135623030101763e-07, + "loss": 0.4243529438972473, + "step": 5048 + }, + { + "epoch": 1.1639193083573487, + "grad_norm": 1.5959515230472425, + "learning_rate": 8.131878309482475e-07, + "loss": 0.5942574739456177, + "step": 5049 + }, + { + "epoch": 1.164149855907781, + "grad_norm": 1.4099782206145253, + "learning_rate": 8.128133860281837e-07, + "loss": 0.5493526458740234, + "step": 5050 + }, + { + "epoch": 1.1643804034582133, + "grad_norm": 1.6745427589200643, + "learning_rate": 8.124389683043872e-07, + "loss": 0.44348329305648804, + "step": 5051 + }, + { + "epoch": 1.1646109510086455, + "grad_norm": 1.553703570504434, + "learning_rate": 8.120645778312577e-07, + "loss": 0.530125617980957, + "step": 5052 + }, + { + "epoch": 1.1648414985590778, + "grad_norm": 1.471177336732143, + "learning_rate": 8.116902146631901e-07, + "loss": 0.4052886962890625, + "step": 5053 + }, + { + "epoch": 1.16507204610951, + "grad_norm": 1.8383146265148649, + "learning_rate": 8.113158788545751e-07, + "loss": 0.4531574845314026, + "step": 5054 + }, + { + "epoch": 1.1653025936599424, + "grad_norm": 1.6630001339456053, + "learning_rate": 8.109415704598004e-07, + "loss": 0.456318199634552, + "step": 5055 + }, + { + "epoch": 1.1655331412103747, + "grad_norm": 1.4028342384501173, + "learning_rate": 8.105672895332485e-07, + "loss": 0.49434107542037964, + "step": 5056 + }, + { + "epoch": 1.165763688760807, + "grad_norm": 1.437792736683449, + "learning_rate": 8.101930361292987e-07, + "loss": 0.4440796971321106, + "step": 5057 + }, + { + "epoch": 1.1659942363112392, + "grad_norm": 1.8668287714681662, + "learning_rate": 8.098188103023266e-07, + "loss": 0.45466774702072144, + "step": 5058 + }, + { + "epoch": 1.1662247838616715, + "grad_norm": 1.5229028325724632, + "learning_rate": 8.094446121067026e-07, + "loss": 0.5570865273475647, + "step": 5059 + }, + { + "epoch": 1.1664553314121038, + "grad_norm": 1.53454366004412, + "learning_rate": 8.090704415967942e-07, + "loss": 0.39382970333099365, + "step": 5060 + }, + { + "epoch": 1.166685878962536, + "grad_norm": 1.6339069895946312, + "learning_rate": 8.086962988269646e-07, + "loss": 0.5676811337471008, + "step": 5061 + }, + { + "epoch": 1.1669164265129683, + "grad_norm": 1.4415998432790569, + "learning_rate": 8.083221838515727e-07, + "loss": 0.49144673347473145, + "step": 5062 + }, + { + "epoch": 1.1671469740634006, + "grad_norm": 1.7643102538106996, + "learning_rate": 8.079480967249737e-07, + "loss": 0.5352723598480225, + "step": 5063 + }, + { + "epoch": 1.167377521613833, + "grad_norm": 1.6939389814092696, + "learning_rate": 8.075740375015178e-07, + "loss": 0.5697407722473145, + "step": 5064 + }, + { + "epoch": 1.1676080691642652, + "grad_norm": 1.5192410214640266, + "learning_rate": 8.072000062355528e-07, + "loss": 0.4873645305633545, + "step": 5065 + }, + { + "epoch": 1.1678386167146975, + "grad_norm": 1.5905728661562026, + "learning_rate": 8.068260029814213e-07, + "loss": 0.5032225847244263, + "step": 5066 + }, + { + "epoch": 1.1680691642651297, + "grad_norm": 1.5892996781628121, + "learning_rate": 8.064520277934618e-07, + "loss": 0.45441383123397827, + "step": 5067 + }, + { + "epoch": 1.168299711815562, + "grad_norm": 1.5165242772078702, + "learning_rate": 8.060780807260094e-07, + "loss": 0.5142766833305359, + "step": 5068 + }, + { + "epoch": 1.1685302593659943, + "grad_norm": 1.3038765575265592, + "learning_rate": 8.057041618333946e-07, + "loss": 0.4434770345687866, + "step": 5069 + }, + { + "epoch": 1.1687608069164266, + "grad_norm": 1.985553119140782, + "learning_rate": 8.053302711699436e-07, + "loss": 0.4094642400741577, + "step": 5070 + }, + { + "epoch": 1.1689913544668589, + "grad_norm": 1.3275869000209546, + "learning_rate": 8.049564087899794e-07, + "loss": 0.45409655570983887, + "step": 5071 + }, + { + "epoch": 1.1692219020172911, + "grad_norm": 1.5867037847106558, + "learning_rate": 8.045825747478199e-07, + "loss": 0.5118057131767273, + "step": 5072 + }, + { + "epoch": 1.1694524495677234, + "grad_norm": 1.3524637128330204, + "learning_rate": 8.042087690977791e-07, + "loss": 0.3896600604057312, + "step": 5073 + }, + { + "epoch": 1.1696829971181557, + "grad_norm": 1.3790318279559413, + "learning_rate": 8.038349918941678e-07, + "loss": 0.4440167546272278, + "step": 5074 + }, + { + "epoch": 1.169913544668588, + "grad_norm": 1.5642943145914474, + "learning_rate": 8.034612431912913e-07, + "loss": 0.4162600636482239, + "step": 5075 + }, + { + "epoch": 1.17014409221902, + "grad_norm": 1.5696782051249465, + "learning_rate": 8.030875230434516e-07, + "loss": 0.5087725520133972, + "step": 5076 + }, + { + "epoch": 1.1703746397694523, + "grad_norm": 1.4896579171326287, + "learning_rate": 8.027138315049465e-07, + "loss": 0.4495736360549927, + "step": 5077 + }, + { + "epoch": 1.1706051873198846, + "grad_norm": 1.282122386534862, + "learning_rate": 8.023401686300692e-07, + "loss": 0.489845335483551, + "step": 5078 + }, + { + "epoch": 1.170835734870317, + "grad_norm": 1.51185017043383, + "learning_rate": 8.019665344731095e-07, + "loss": 0.5112447738647461, + "step": 5079 + }, + { + "epoch": 1.1710662824207492, + "grad_norm": 1.6932024361341402, + "learning_rate": 8.015929290883517e-07, + "loss": 0.5937504768371582, + "step": 5080 + }, + { + "epoch": 1.1712968299711815, + "grad_norm": 1.4969039934386026, + "learning_rate": 8.012193525300776e-07, + "loss": 0.44051915407180786, + "step": 5081 + }, + { + "epoch": 1.1715273775216137, + "grad_norm": 1.45813729961155, + "learning_rate": 8.008458048525639e-07, + "loss": 0.46435099840164185, + "step": 5082 + }, + { + "epoch": 1.171757925072046, + "grad_norm": 1.6776025006446114, + "learning_rate": 8.004722861100827e-07, + "loss": 0.6185746788978577, + "step": 5083 + }, + { + "epoch": 1.1719884726224783, + "grad_norm": 1.7013307081411828, + "learning_rate": 8.000987963569028e-07, + "loss": 0.4826294481754303, + "step": 5084 + }, + { + "epoch": 1.1722190201729106, + "grad_norm": 1.4399795258671182, + "learning_rate": 7.997253356472884e-07, + "loss": 0.4688999056816101, + "step": 5085 + }, + { + "epoch": 1.1724495677233429, + "grad_norm": 2.065689721826827, + "learning_rate": 7.993519040354989e-07, + "loss": 0.5494599938392639, + "step": 5086 + }, + { + "epoch": 1.1726801152737751, + "grad_norm": 1.7126589073325484, + "learning_rate": 7.989785015757909e-07, + "loss": 0.37470385432243347, + "step": 5087 + }, + { + "epoch": 1.1729106628242074, + "grad_norm": 1.9404321864616616, + "learning_rate": 7.986051283224153e-07, + "loss": 0.5580037236213684, + "step": 5088 + }, + { + "epoch": 1.1731412103746397, + "grad_norm": 1.2841436695629171, + "learning_rate": 7.982317843296191e-07, + "loss": 0.5286623239517212, + "step": 5089 + }, + { + "epoch": 1.173371757925072, + "grad_norm": 1.4009308302426564, + "learning_rate": 7.978584696516463e-07, + "loss": 0.4835943877696991, + "step": 5090 + }, + { + "epoch": 1.1736023054755043, + "grad_norm": 1.8751415579020991, + "learning_rate": 7.974851843427348e-07, + "loss": 0.5184438824653625, + "step": 5091 + }, + { + "epoch": 1.1738328530259365, + "grad_norm": 1.696931221696011, + "learning_rate": 7.971119284571194e-07, + "loss": 0.39980262517929077, + "step": 5092 + }, + { + "epoch": 1.1740634005763688, + "grad_norm": 1.3010621045377282, + "learning_rate": 7.967387020490297e-07, + "loss": 0.4931245446205139, + "step": 5093 + }, + { + "epoch": 1.174293948126801, + "grad_norm": 1.4718683650776678, + "learning_rate": 7.963655051726925e-07, + "loss": 0.5418246984481812, + "step": 5094 + }, + { + "epoch": 1.1745244956772334, + "grad_norm": 1.547908981740155, + "learning_rate": 7.959923378823292e-07, + "loss": 0.40723925828933716, + "step": 5095 + }, + { + "epoch": 1.1747550432276657, + "grad_norm": 1.5860273572842907, + "learning_rate": 7.956192002321564e-07, + "loss": 0.4601886570453644, + "step": 5096 + }, + { + "epoch": 1.174985590778098, + "grad_norm": 1.3393801052353216, + "learning_rate": 7.952460922763881e-07, + "loss": 0.439882755279541, + "step": 5097 + }, + { + "epoch": 1.1752161383285302, + "grad_norm": 1.6695184794084814, + "learning_rate": 7.948730140692326e-07, + "loss": 0.5210170745849609, + "step": 5098 + }, + { + "epoch": 1.1754466858789625, + "grad_norm": 1.8621316471839704, + "learning_rate": 7.944999656648938e-07, + "loss": 0.43414199352264404, + "step": 5099 + }, + { + "epoch": 1.1756772334293948, + "grad_norm": 2.2151031419179072, + "learning_rate": 7.941269471175729e-07, + "loss": 0.5412740707397461, + "step": 5100 + }, + { + "epoch": 1.175907780979827, + "grad_norm": 1.4771921151316811, + "learning_rate": 7.937539584814645e-07, + "loss": 0.49985402822494507, + "step": 5101 + }, + { + "epoch": 1.1761383285302593, + "grad_norm": 1.4276177294779857, + "learning_rate": 7.933809998107603e-07, + "loss": 0.4674869179725647, + "step": 5102 + }, + { + "epoch": 1.1763688760806916, + "grad_norm": 1.7775337786781618, + "learning_rate": 7.930080711596477e-07, + "loss": 0.4349539279937744, + "step": 5103 + }, + { + "epoch": 1.176599423631124, + "grad_norm": 1.554477721089947, + "learning_rate": 7.92635172582309e-07, + "loss": 0.5000085830688477, + "step": 5104 + }, + { + "epoch": 1.1768299711815562, + "grad_norm": 1.7477345906618489, + "learning_rate": 7.922623041329223e-07, + "loss": 0.6095143556594849, + "step": 5105 + }, + { + "epoch": 1.1770605187319885, + "grad_norm": 1.4438459951254565, + "learning_rate": 7.918894658656622e-07, + "loss": 0.4526306986808777, + "step": 5106 + }, + { + "epoch": 1.1772910662824208, + "grad_norm": 1.7380379348373243, + "learning_rate": 7.915166578346974e-07, + "loss": 0.47447705268859863, + "step": 5107 + }, + { + "epoch": 1.177521613832853, + "grad_norm": 1.5624193087101916, + "learning_rate": 7.911438800941938e-07, + "loss": 0.5678610801696777, + "step": 5108 + }, + { + "epoch": 1.1777521613832853, + "grad_norm": 1.7268534957559538, + "learning_rate": 7.907711326983113e-07, + "loss": 0.49429306387901306, + "step": 5109 + }, + { + "epoch": 1.1779827089337176, + "grad_norm": 1.4407413119987242, + "learning_rate": 7.903984157012068e-07, + "loss": 0.430848628282547, + "step": 5110 + }, + { + "epoch": 1.1782132564841499, + "grad_norm": 1.7950110825220507, + "learning_rate": 7.900257291570324e-07, + "loss": 0.4113616645336151, + "step": 5111 + }, + { + "epoch": 1.1784438040345822, + "grad_norm": 1.5284371816604458, + "learning_rate": 7.896530731199346e-07, + "loss": 0.49423450231552124, + "step": 5112 + }, + { + "epoch": 1.1786743515850144, + "grad_norm": 1.3589976827610897, + "learning_rate": 7.892804476440574e-07, + "loss": 0.42228102684020996, + "step": 5113 + }, + { + "epoch": 1.1789048991354467, + "grad_norm": 1.4991917175795098, + "learning_rate": 7.889078527835393e-07, + "loss": 0.5798023343086243, + "step": 5114 + }, + { + "epoch": 1.179135446685879, + "grad_norm": 1.5093615640917626, + "learning_rate": 7.885352885925138e-07, + "loss": 0.4872003197669983, + "step": 5115 + }, + { + "epoch": 1.1793659942363113, + "grad_norm": 1.3378284729255738, + "learning_rate": 7.881627551251116e-07, + "loss": 0.40125834941864014, + "step": 5116 + }, + { + "epoch": 1.1795965417867436, + "grad_norm": 1.5996579095623482, + "learning_rate": 7.877902524354569e-07, + "loss": 0.4458635449409485, + "step": 5117 + }, + { + "epoch": 1.1798270893371758, + "grad_norm": 1.262725489425069, + "learning_rate": 7.87417780577671e-07, + "loss": 0.4682433009147644, + "step": 5118 + }, + { + "epoch": 1.1800576368876081, + "grad_norm": 1.25949021074412, + "learning_rate": 7.870453396058704e-07, + "loss": 0.3488504886627197, + "step": 5119 + }, + { + "epoch": 1.1802881844380404, + "grad_norm": 1.5564681991959675, + "learning_rate": 7.866729295741666e-07, + "loss": 0.508482813835144, + "step": 5120 + }, + { + "epoch": 1.1805187319884727, + "grad_norm": 1.5995779613986645, + "learning_rate": 7.863005505366664e-07, + "loss": 0.4301029145717621, + "step": 5121 + }, + { + "epoch": 1.180749279538905, + "grad_norm": 1.5221914817292215, + "learning_rate": 7.859282025474738e-07, + "loss": 0.46766793727874756, + "step": 5122 + }, + { + "epoch": 1.1809798270893372, + "grad_norm": 1.5282755704648547, + "learning_rate": 7.855558856606862e-07, + "loss": 0.4898037314414978, + "step": 5123 + }, + { + "epoch": 1.1812103746397695, + "grad_norm": 1.5328082397130638, + "learning_rate": 7.851835999303977e-07, + "loss": 0.4219735860824585, + "step": 5124 + }, + { + "epoch": 1.1814409221902018, + "grad_norm": 1.5861961330265528, + "learning_rate": 7.848113454106971e-07, + "loss": 0.47639018297195435, + "step": 5125 + }, + { + "epoch": 1.181671469740634, + "grad_norm": 1.8716402338437157, + "learning_rate": 7.844391221556696e-07, + "loss": 0.5062400102615356, + "step": 5126 + }, + { + "epoch": 1.1819020172910664, + "grad_norm": 1.6261243350490995, + "learning_rate": 7.840669302193957e-07, + "loss": 0.4984915256500244, + "step": 5127 + }, + { + "epoch": 1.1821325648414986, + "grad_norm": 1.6122125447702478, + "learning_rate": 7.836947696559497e-07, + "loss": 0.4356485605239868, + "step": 5128 + }, + { + "epoch": 1.182363112391931, + "grad_norm": 1.3005367585942214, + "learning_rate": 7.833226405194039e-07, + "loss": 0.4968249797821045, + "step": 5129 + }, + { + "epoch": 1.1825936599423632, + "grad_norm": 1.8474286891846123, + "learning_rate": 7.829505428638245e-07, + "loss": 0.497678279876709, + "step": 5130 + }, + { + "epoch": 1.1828242074927955, + "grad_norm": 1.4865858757479278, + "learning_rate": 7.825784767432731e-07, + "loss": 0.43530794978141785, + "step": 5131 + }, + { + "epoch": 1.1830547550432278, + "grad_norm": 1.5492043974535954, + "learning_rate": 7.822064422118078e-07, + "loss": 0.5460381507873535, + "step": 5132 + }, + { + "epoch": 1.18328530259366, + "grad_norm": 1.7873525324615471, + "learning_rate": 7.818344393234799e-07, + "loss": 0.400234192609787, + "step": 5133 + }, + { + "epoch": 1.1835158501440923, + "grad_norm": 1.4067634014586672, + "learning_rate": 7.814624681323387e-07, + "loss": 0.41525113582611084, + "step": 5134 + }, + { + "epoch": 1.1837463976945246, + "grad_norm": 1.7141603087851884, + "learning_rate": 7.810905286924281e-07, + "loss": 0.6016113758087158, + "step": 5135 + }, + { + "epoch": 1.183976945244957, + "grad_norm": 1.565613245374153, + "learning_rate": 7.807186210577856e-07, + "loss": 0.44173747301101685, + "step": 5136 + }, + { + "epoch": 1.1842074927953892, + "grad_norm": 1.5753493289476495, + "learning_rate": 7.803467452824469e-07, + "loss": 0.4887153208255768, + "step": 5137 + }, + { + "epoch": 1.1844380403458212, + "grad_norm": 1.4753411189568921, + "learning_rate": 7.799749014204409e-07, + "loss": 0.35861653089523315, + "step": 5138 + }, + { + "epoch": 1.1846685878962535, + "grad_norm": 1.51791048669176, + "learning_rate": 7.796030895257924e-07, + "loss": 0.49498188495635986, + "step": 5139 + }, + { + "epoch": 1.1848991354466858, + "grad_norm": 1.5790347235405173, + "learning_rate": 7.792313096525229e-07, + "loss": 0.4640830159187317, + "step": 5140 + }, + { + "epoch": 1.185129682997118, + "grad_norm": 1.6546050323090251, + "learning_rate": 7.788595618546473e-07, + "loss": 0.43868836760520935, + "step": 5141 + }, + { + "epoch": 1.1853602305475504, + "grad_norm": 1.3247618330025193, + "learning_rate": 7.784878461861766e-07, + "loss": 0.42038998007774353, + "step": 5142 + }, + { + "epoch": 1.1855907780979826, + "grad_norm": 1.3831120122965148, + "learning_rate": 7.78116162701118e-07, + "loss": 0.46532997488975525, + "step": 5143 + }, + { + "epoch": 1.185821325648415, + "grad_norm": 1.8301201761711272, + "learning_rate": 7.777445114534724e-07, + "loss": 0.4519978165626526, + "step": 5144 + }, + { + "epoch": 1.1860518731988472, + "grad_norm": 2.0549662658660752, + "learning_rate": 7.773728924972374e-07, + "loss": 0.4602941870689392, + "step": 5145 + }, + { + "epoch": 1.1862824207492795, + "grad_norm": 1.8488754653531705, + "learning_rate": 7.770013058864048e-07, + "loss": 0.49775010347366333, + "step": 5146 + }, + { + "epoch": 1.1865129682997118, + "grad_norm": 1.303047332232113, + "learning_rate": 7.76629751674963e-07, + "loss": 0.4270223379135132, + "step": 5147 + }, + { + "epoch": 1.186743515850144, + "grad_norm": 1.7263287626755313, + "learning_rate": 7.762582299168947e-07, + "loss": 0.4341789186000824, + "step": 5148 + }, + { + "epoch": 1.1869740634005763, + "grad_norm": 1.5341090099331076, + "learning_rate": 7.758867406661777e-07, + "loss": 0.5144226551055908, + "step": 5149 + }, + { + "epoch": 1.1872046109510086, + "grad_norm": 1.6436159515518085, + "learning_rate": 7.75515283976786e-07, + "loss": 0.5009859800338745, + "step": 5150 + }, + { + "epoch": 1.1874351585014409, + "grad_norm": 1.7538603635849173, + "learning_rate": 7.751438599026885e-07, + "loss": 0.411882221698761, + "step": 5151 + }, + { + "epoch": 1.1876657060518732, + "grad_norm": 1.5272494525926428, + "learning_rate": 7.747724684978488e-07, + "loss": 0.5135201215744019, + "step": 5152 + }, + { + "epoch": 1.1878962536023054, + "grad_norm": 1.6382337953276667, + "learning_rate": 7.744011098162265e-07, + "loss": 0.5519058108329773, + "step": 5153 + }, + { + "epoch": 1.1881268011527377, + "grad_norm": 1.6462491753715431, + "learning_rate": 7.740297839117761e-07, + "loss": 0.49767088890075684, + "step": 5154 + }, + { + "epoch": 1.18835734870317, + "grad_norm": 1.7563113941156678, + "learning_rate": 7.736584908384472e-07, + "loss": 0.5366396903991699, + "step": 5155 + }, + { + "epoch": 1.1885878962536023, + "grad_norm": 1.4986658639467278, + "learning_rate": 7.732872306501852e-07, + "loss": 0.4962652325630188, + "step": 5156 + }, + { + "epoch": 1.1888184438040346, + "grad_norm": 1.7508485740809516, + "learning_rate": 7.729160034009301e-07, + "loss": 0.5071662664413452, + "step": 5157 + }, + { + "epoch": 1.1890489913544668, + "grad_norm": 1.5980525858667156, + "learning_rate": 7.725448091446171e-07, + "loss": 0.45525041222572327, + "step": 5158 + }, + { + "epoch": 1.1892795389048991, + "grad_norm": 1.6066578942153278, + "learning_rate": 7.721736479351777e-07, + "loss": 0.47270429134368896, + "step": 5159 + }, + { + "epoch": 1.1895100864553314, + "grad_norm": 1.4730405508861286, + "learning_rate": 7.71802519826537e-07, + "loss": 0.49913090467453003, + "step": 5160 + }, + { + "epoch": 1.1897406340057637, + "grad_norm": 1.7049072426900878, + "learning_rate": 7.714314248726164e-07, + "loss": 0.4268707036972046, + "step": 5161 + }, + { + "epoch": 1.189971181556196, + "grad_norm": 1.437999496558197, + "learning_rate": 7.710603631273316e-07, + "loss": 0.3650474548339844, + "step": 5162 + }, + { + "epoch": 1.1902017291066282, + "grad_norm": 1.6164991497032537, + "learning_rate": 7.706893346445947e-07, + "loss": 0.4717981219291687, + "step": 5163 + }, + { + "epoch": 1.1904322766570605, + "grad_norm": 1.4762753138416673, + "learning_rate": 7.703183394783122e-07, + "loss": 0.5165996551513672, + "step": 5164 + }, + { + "epoch": 1.1906628242074928, + "grad_norm": 1.5972624214363949, + "learning_rate": 7.699473776823851e-07, + "loss": 0.4477986693382263, + "step": 5165 + }, + { + "epoch": 1.190893371757925, + "grad_norm": 1.3871967139667596, + "learning_rate": 7.695764493107112e-07, + "loss": 0.4584044814109802, + "step": 5166 + }, + { + "epoch": 1.1911239193083574, + "grad_norm": 1.4925097507009442, + "learning_rate": 7.692055544171823e-07, + "loss": 0.5276877880096436, + "step": 5167 + }, + { + "epoch": 1.1913544668587897, + "grad_norm": 1.8352054779159281, + "learning_rate": 7.68834693055685e-07, + "loss": 0.5204349756240845, + "step": 5168 + }, + { + "epoch": 1.191585014409222, + "grad_norm": 1.6795305875537256, + "learning_rate": 7.684638652801025e-07, + "loss": 0.44728660583496094, + "step": 5169 + }, + { + "epoch": 1.1918155619596542, + "grad_norm": 1.4809310662300776, + "learning_rate": 7.680930711443116e-07, + "loss": 0.48899370431900024, + "step": 5170 + }, + { + "epoch": 1.1920461095100865, + "grad_norm": 1.3367974168589116, + "learning_rate": 7.677223107021847e-07, + "loss": 0.41567301750183105, + "step": 5171 + }, + { + "epoch": 1.1922766570605188, + "grad_norm": 1.6483731426886425, + "learning_rate": 7.673515840075901e-07, + "loss": 0.5072032809257507, + "step": 5172 + }, + { + "epoch": 1.192507204610951, + "grad_norm": 1.5620093494432372, + "learning_rate": 7.669808911143901e-07, + "loss": 0.4741431176662445, + "step": 5173 + }, + { + "epoch": 1.1927377521613833, + "grad_norm": 1.5478542834432012, + "learning_rate": 7.666102320764421e-07, + "loss": 0.4430406987667084, + "step": 5174 + }, + { + "epoch": 1.1929682997118156, + "grad_norm": 1.4964356352817318, + "learning_rate": 7.662396069476002e-07, + "loss": 0.40400367975234985, + "step": 5175 + }, + { + "epoch": 1.193198847262248, + "grad_norm": 1.5885122676102894, + "learning_rate": 7.658690157817112e-07, + "loss": 0.5351930856704712, + "step": 5176 + }, + { + "epoch": 1.1934293948126802, + "grad_norm": 1.492376923213462, + "learning_rate": 7.65498458632619e-07, + "loss": 0.5067006349563599, + "step": 5177 + }, + { + "epoch": 1.1936599423631125, + "grad_norm": 1.4138890281535976, + "learning_rate": 7.651279355541607e-07, + "loss": 0.385654091835022, + "step": 5178 + }, + { + "epoch": 1.1938904899135447, + "grad_norm": 1.857913919053153, + "learning_rate": 7.647574466001703e-07, + "loss": 0.4534091353416443, + "step": 5179 + }, + { + "epoch": 1.194121037463977, + "grad_norm": 1.4074916536819597, + "learning_rate": 7.643869918244759e-07, + "loss": 0.5353249907493591, + "step": 5180 + }, + { + "epoch": 1.1943515850144093, + "grad_norm": 1.4274240401119769, + "learning_rate": 7.640165712809001e-07, + "loss": 0.41028958559036255, + "step": 5181 + }, + { + "epoch": 1.1945821325648416, + "grad_norm": 1.624072204554771, + "learning_rate": 7.636461850232622e-07, + "loss": 0.4383719563484192, + "step": 5182 + }, + { + "epoch": 1.1948126801152739, + "grad_norm": 1.5213363057247005, + "learning_rate": 7.632758331053746e-07, + "loss": 0.469385027885437, + "step": 5183 + }, + { + "epoch": 1.1950432276657061, + "grad_norm": 1.4242859850099794, + "learning_rate": 7.629055155810456e-07, + "loss": 0.4319891929626465, + "step": 5184 + }, + { + "epoch": 1.1952737752161384, + "grad_norm": 1.6816172631406454, + "learning_rate": 7.625352325040792e-07, + "loss": 0.5012685656547546, + "step": 5185 + }, + { + "epoch": 1.1955043227665705, + "grad_norm": 1.432377738464319, + "learning_rate": 7.621649839282728e-07, + "loss": 0.4383701682090759, + "step": 5186 + }, + { + "epoch": 1.1957348703170028, + "grad_norm": 1.506815615793495, + "learning_rate": 7.617947699074202e-07, + "loss": 0.5478798151016235, + "step": 5187 + }, + { + "epoch": 1.195965417867435, + "grad_norm": 1.592514094718691, + "learning_rate": 7.614245904953098e-07, + "loss": 0.4665898084640503, + "step": 5188 + }, + { + "epoch": 1.1961959654178673, + "grad_norm": 1.6410040946837772, + "learning_rate": 7.610544457457245e-07, + "loss": 0.49260783195495605, + "step": 5189 + }, + { + "epoch": 1.1964265129682996, + "grad_norm": 1.941675199926286, + "learning_rate": 7.606843357124425e-07, + "loss": 0.4491361379623413, + "step": 5190 + }, + { + "epoch": 1.1966570605187319, + "grad_norm": 1.3495666028535211, + "learning_rate": 7.603142604492366e-07, + "loss": 0.5150983929634094, + "step": 5191 + }, + { + "epoch": 1.1968876080691642, + "grad_norm": 1.4180648868874097, + "learning_rate": 7.599442200098756e-07, + "loss": 0.4191433787345886, + "step": 5192 + }, + { + "epoch": 1.1971181556195964, + "grad_norm": 1.6520041909295045, + "learning_rate": 7.595742144481222e-07, + "loss": 0.47440847754478455, + "step": 5193 + }, + { + "epoch": 1.1973487031700287, + "grad_norm": 1.4858965402977922, + "learning_rate": 7.592042438177341e-07, + "loss": 0.4382219612598419, + "step": 5194 + }, + { + "epoch": 1.197579250720461, + "grad_norm": 1.4785659490868535, + "learning_rate": 7.588343081724646e-07, + "loss": 0.44138234853744507, + "step": 5195 + }, + { + "epoch": 1.1978097982708933, + "grad_norm": 1.7981437297331029, + "learning_rate": 7.584644075660614e-07, + "loss": 0.41076284646987915, + "step": 5196 + }, + { + "epoch": 1.1980403458213256, + "grad_norm": 1.6500368550264195, + "learning_rate": 7.580945420522669e-07, + "loss": 0.5953484773635864, + "step": 5197 + }, + { + "epoch": 1.1982708933717579, + "grad_norm": 1.5393411560043135, + "learning_rate": 7.577247116848192e-07, + "loss": 0.3844539523124695, + "step": 5198 + }, + { + "epoch": 1.1985014409221901, + "grad_norm": 1.4919048187646704, + "learning_rate": 7.573549165174504e-07, + "loss": 0.47392016649246216, + "step": 5199 + }, + { + "epoch": 1.1987319884726224, + "grad_norm": 1.437882902973616, + "learning_rate": 7.569851566038879e-07, + "loss": 0.43976885080337524, + "step": 5200 + }, + { + "epoch": 1.1989625360230547, + "grad_norm": 1.6498454472520225, + "learning_rate": 7.566154319978545e-07, + "loss": 0.4783346652984619, + "step": 5201 + }, + { + "epoch": 1.199193083573487, + "grad_norm": 1.424108162037939, + "learning_rate": 7.562457427530668e-07, + "loss": 0.534496009349823, + "step": 5202 + }, + { + "epoch": 1.1994236311239193, + "grad_norm": 1.4707784169466331, + "learning_rate": 7.558760889232365e-07, + "loss": 0.42668965458869934, + "step": 5203 + }, + { + "epoch": 1.1996541786743515, + "grad_norm": 1.737746094211821, + "learning_rate": 7.555064705620717e-07, + "loss": 0.45171916484832764, + "step": 5204 + }, + { + "epoch": 1.1998847262247838, + "grad_norm": 1.4495856306459454, + "learning_rate": 7.551368877232728e-07, + "loss": 0.41587740182876587, + "step": 5205 + }, + { + "epoch": 1.200115273775216, + "grad_norm": 2.0458844084652426, + "learning_rate": 7.547673404605372e-07, + "loss": 0.5804768204689026, + "step": 5206 + }, + { + "epoch": 1.2003458213256484, + "grad_norm": 1.6550173812518771, + "learning_rate": 7.543978288275554e-07, + "loss": 0.5011946558952332, + "step": 5207 + }, + { + "epoch": 1.2005763688760807, + "grad_norm": 1.6293811211630818, + "learning_rate": 7.540283528780145e-07, + "loss": 0.4823184013366699, + "step": 5208 + }, + { + "epoch": 1.200806916426513, + "grad_norm": 1.583832485145893, + "learning_rate": 7.536589126655952e-07, + "loss": 0.4285504221916199, + "step": 5209 + }, + { + "epoch": 1.2010374639769452, + "grad_norm": 1.5324557734796895, + "learning_rate": 7.532895082439728e-07, + "loss": 0.5323970317840576, + "step": 5210 + }, + { + "epoch": 1.2012680115273775, + "grad_norm": 1.5033751815855423, + "learning_rate": 7.529201396668188e-07, + "loss": 0.4522852301597595, + "step": 5211 + }, + { + "epoch": 1.2014985590778098, + "grad_norm": 1.6592368965468285, + "learning_rate": 7.525508069877981e-07, + "loss": 0.37899982929229736, + "step": 5212 + }, + { + "epoch": 1.201729106628242, + "grad_norm": 1.542899034057244, + "learning_rate": 7.521815102605709e-07, + "loss": 0.48277533054351807, + "step": 5213 + }, + { + "epoch": 1.2019596541786743, + "grad_norm": 1.6228734814854653, + "learning_rate": 7.518122495387924e-07, + "loss": 0.5247419476509094, + "step": 5214 + }, + { + "epoch": 1.2021902017291066, + "grad_norm": 1.6953521267123268, + "learning_rate": 7.514430248761121e-07, + "loss": 0.5083534717559814, + "step": 5215 + }, + { + "epoch": 1.202420749279539, + "grad_norm": 1.4712281663420155, + "learning_rate": 7.510738363261743e-07, + "loss": 0.4977297782897949, + "step": 5216 + }, + { + "epoch": 1.2026512968299712, + "grad_norm": 1.8268790082927158, + "learning_rate": 7.507046839426193e-07, + "loss": 0.49126511812210083, + "step": 5217 + }, + { + "epoch": 1.2028818443804035, + "grad_norm": 1.390643338354512, + "learning_rate": 7.503355677790797e-07, + "loss": 0.45457524061203003, + "step": 5218 + }, + { + "epoch": 1.2031123919308357, + "grad_norm": 1.5594668449448779, + "learning_rate": 7.499664878891849e-07, + "loss": 0.4343973994255066, + "step": 5219 + }, + { + "epoch": 1.203342939481268, + "grad_norm": 1.7979682805933468, + "learning_rate": 7.495974443265588e-07, + "loss": 0.465421199798584, + "step": 5220 + }, + { + "epoch": 1.2035734870317003, + "grad_norm": 1.643661704169387, + "learning_rate": 7.492284371448189e-07, + "loss": 0.6031137704849243, + "step": 5221 + }, + { + "epoch": 1.2038040345821326, + "grad_norm": 1.3428704799411881, + "learning_rate": 7.488594663975786e-07, + "loss": 0.4751429557800293, + "step": 5222 + }, + { + "epoch": 1.2040345821325649, + "grad_norm": 1.4175138149901096, + "learning_rate": 7.484905321384448e-07, + "loss": 0.4252334237098694, + "step": 5223 + }, + { + "epoch": 1.2042651296829971, + "grad_norm": 1.7243093798167899, + "learning_rate": 7.481216344210205e-07, + "loss": 0.46463245153427124, + "step": 5224 + }, + { + "epoch": 1.2044956772334294, + "grad_norm": 1.5792374494579962, + "learning_rate": 7.477527732989026e-07, + "loss": 0.5066741704940796, + "step": 5225 + }, + { + "epoch": 1.2047262247838617, + "grad_norm": 1.4978142606595146, + "learning_rate": 7.473839488256825e-07, + "loss": 0.43169310688972473, + "step": 5226 + }, + { + "epoch": 1.204956772334294, + "grad_norm": 1.3295563962936168, + "learning_rate": 7.470151610549469e-07, + "loss": 0.4527069330215454, + "step": 5227 + }, + { + "epoch": 1.2051873198847263, + "grad_norm": 1.7731392679004423, + "learning_rate": 7.466464100402765e-07, + "loss": 0.5407136082649231, + "step": 5228 + }, + { + "epoch": 1.2054178674351586, + "grad_norm": 1.776257403864893, + "learning_rate": 7.46277695835247e-07, + "loss": 0.499603807926178, + "step": 5229 + }, + { + "epoch": 1.2056484149855908, + "grad_norm": 1.3999462799385904, + "learning_rate": 7.459090184934293e-07, + "loss": 0.43973076343536377, + "step": 5230 + }, + { + "epoch": 1.2058789625360231, + "grad_norm": 1.4512778914107467, + "learning_rate": 7.455403780683877e-07, + "loss": 0.5759705305099487, + "step": 5231 + }, + { + "epoch": 1.2061095100864554, + "grad_norm": 1.959975964174113, + "learning_rate": 7.451717746136819e-07, + "loss": 0.4623042643070221, + "step": 5232 + }, + { + "epoch": 1.2063400576368877, + "grad_norm": 1.6890420465514966, + "learning_rate": 7.448032081828666e-07, + "loss": 0.5411099791526794, + "step": 5233 + }, + { + "epoch": 1.20657060518732, + "grad_norm": 1.4414259450436449, + "learning_rate": 7.444346788294904e-07, + "loss": 0.4358411133289337, + "step": 5234 + }, + { + "epoch": 1.2068011527377522, + "grad_norm": 1.5143380411163672, + "learning_rate": 7.440661866070967e-07, + "loss": 0.4569090008735657, + "step": 5235 + }, + { + "epoch": 1.2070317002881845, + "grad_norm": 1.8867912221636376, + "learning_rate": 7.436977315692234e-07, + "loss": 0.4413526654243469, + "step": 5236 + }, + { + "epoch": 1.2072622478386168, + "grad_norm": 1.59137760270953, + "learning_rate": 7.433293137694038e-07, + "loss": 0.5472520589828491, + "step": 5237 + }, + { + "epoch": 1.207492795389049, + "grad_norm": 1.5410351897128989, + "learning_rate": 7.429609332611648e-07, + "loss": 0.48993226885795593, + "step": 5238 + }, + { + "epoch": 1.2077233429394814, + "grad_norm": 1.5668405345191256, + "learning_rate": 7.42592590098028e-07, + "loss": 0.4739914536476135, + "step": 5239 + }, + { + "epoch": 1.2079538904899136, + "grad_norm": 1.6541576418498671, + "learning_rate": 7.422242843335103e-07, + "loss": 0.4202721118927002, + "step": 5240 + }, + { + "epoch": 1.208184438040346, + "grad_norm": 1.5270783995121628, + "learning_rate": 7.418560160211227e-07, + "loss": 0.40436694025993347, + "step": 5241 + }, + { + "epoch": 1.2084149855907782, + "grad_norm": 1.4882888138390808, + "learning_rate": 7.4148778521437e-07, + "loss": 0.5451452136039734, + "step": 5242 + }, + { + "epoch": 1.2086455331412105, + "grad_norm": 2.0232712244262707, + "learning_rate": 7.411195919667536e-07, + "loss": 0.47577959299087524, + "step": 5243 + }, + { + "epoch": 1.2088760806916428, + "grad_norm": 1.6782045659247888, + "learning_rate": 7.407514363317668e-07, + "loss": 0.5722167491912842, + "step": 5244 + }, + { + "epoch": 1.209106628242075, + "grad_norm": 1.2764452833125082, + "learning_rate": 7.403833183628994e-07, + "loss": 0.4649240970611572, + "step": 5245 + }, + { + "epoch": 1.2093371757925073, + "grad_norm": 1.5732711836020632, + "learning_rate": 7.400152381136356e-07, + "loss": 0.43235695362091064, + "step": 5246 + }, + { + "epoch": 1.2095677233429396, + "grad_norm": 1.585508242802622, + "learning_rate": 7.396471956374526e-07, + "loss": 0.4768486022949219, + "step": 5247 + }, + { + "epoch": 1.2097982708933717, + "grad_norm": 1.4279497343125311, + "learning_rate": 7.392791909878238e-07, + "loss": 0.4668810963630676, + "step": 5248 + }, + { + "epoch": 1.210028818443804, + "grad_norm": 1.4689492151474428, + "learning_rate": 7.389112242182167e-07, + "loss": 0.5458219051361084, + "step": 5249 + }, + { + "epoch": 1.2102593659942362, + "grad_norm": 1.4005798560782017, + "learning_rate": 7.385432953820923e-07, + "loss": 0.5364928245544434, + "step": 5250 + }, + { + "epoch": 1.2104899135446685, + "grad_norm": 1.6602298965175872, + "learning_rate": 7.381754045329074e-07, + "loss": 0.38189631700515747, + "step": 5251 + }, + { + "epoch": 1.2107204610951008, + "grad_norm": 1.4124893480452145, + "learning_rate": 7.378075517241125e-07, + "loss": 0.4769268333911896, + "step": 5252 + }, + { + "epoch": 1.210951008645533, + "grad_norm": 1.6246057596513548, + "learning_rate": 7.374397370091524e-07, + "loss": 0.5034056901931763, + "step": 5253 + }, + { + "epoch": 1.2111815561959653, + "grad_norm": 1.543411080095952, + "learning_rate": 7.370719604414677e-07, + "loss": 0.4454866647720337, + "step": 5254 + }, + { + "epoch": 1.2114121037463976, + "grad_norm": 2.0913685445588013, + "learning_rate": 7.367042220744917e-07, + "loss": 0.45258912444114685, + "step": 5255 + }, + { + "epoch": 1.21164265129683, + "grad_norm": 1.4734776431585603, + "learning_rate": 7.36336521961653e-07, + "loss": 0.38827258348464966, + "step": 5256 + }, + { + "epoch": 1.2118731988472622, + "grad_norm": 1.3890787791174941, + "learning_rate": 7.359688601563751e-07, + "loss": 0.4223392605781555, + "step": 5257 + }, + { + "epoch": 1.2121037463976945, + "grad_norm": 2.0074293946274, + "learning_rate": 7.356012367120752e-07, + "loss": 0.5090558528900146, + "step": 5258 + }, + { + "epoch": 1.2123342939481268, + "grad_norm": 1.6974670853065486, + "learning_rate": 7.352336516821654e-07, + "loss": 0.457112193107605, + "step": 5259 + }, + { + "epoch": 1.212564841498559, + "grad_norm": 1.7143853713755068, + "learning_rate": 7.34866105120051e-07, + "loss": 0.4801919162273407, + "step": 5260 + }, + { + "epoch": 1.2127953890489913, + "grad_norm": 1.9161855479797785, + "learning_rate": 7.344985970791337e-07, + "loss": 0.4039991497993469, + "step": 5261 + }, + { + "epoch": 1.2130259365994236, + "grad_norm": 1.6279032064972734, + "learning_rate": 7.341311276128086e-07, + "loss": 0.4903545677661896, + "step": 5262 + }, + { + "epoch": 1.2132564841498559, + "grad_norm": 1.3609848879438589, + "learning_rate": 7.337636967744642e-07, + "loss": 0.42703670263290405, + "step": 5263 + }, + { + "epoch": 1.2134870317002882, + "grad_norm": 1.3994720029391854, + "learning_rate": 7.333963046174856e-07, + "loss": 0.44627687335014343, + "step": 5264 + }, + { + "epoch": 1.2137175792507204, + "grad_norm": 1.3128568673715006, + "learning_rate": 7.330289511952505e-07, + "loss": 0.4332897663116455, + "step": 5265 + }, + { + "epoch": 1.2139481268011527, + "grad_norm": 1.637478754152211, + "learning_rate": 7.326616365611312e-07, + "loss": 0.3755282163619995, + "step": 5266 + }, + { + "epoch": 1.214178674351585, + "grad_norm": 1.3904375127772393, + "learning_rate": 7.322943607684955e-07, + "loss": 0.43442171812057495, + "step": 5267 + }, + { + "epoch": 1.2144092219020173, + "grad_norm": 1.6462684630654334, + "learning_rate": 7.319271238707041e-07, + "loss": 0.53243088722229, + "step": 5268 + }, + { + "epoch": 1.2146397694524496, + "grad_norm": 1.6026015390069184, + "learning_rate": 7.315599259211126e-07, + "loss": 0.47449052333831787, + "step": 5269 + }, + { + "epoch": 1.2148703170028818, + "grad_norm": 1.3730761035928816, + "learning_rate": 7.311927669730718e-07, + "loss": 0.4487804174423218, + "step": 5270 + }, + { + "epoch": 1.2151008645533141, + "grad_norm": 1.4002622501825168, + "learning_rate": 7.308256470799254e-07, + "loss": 0.44249311089515686, + "step": 5271 + }, + { + "epoch": 1.2153314121037464, + "grad_norm": 1.3523133908066987, + "learning_rate": 7.304585662950124e-07, + "loss": 0.5116596817970276, + "step": 5272 + }, + { + "epoch": 1.2155619596541787, + "grad_norm": 1.5195812729956901, + "learning_rate": 7.300915246716654e-07, + "loss": 0.4131404161453247, + "step": 5273 + }, + { + "epoch": 1.215792507204611, + "grad_norm": 1.381246861741987, + "learning_rate": 7.297245222632124e-07, + "loss": 0.5043983459472656, + "step": 5274 + }, + { + "epoch": 1.2160230547550432, + "grad_norm": 1.637523206196005, + "learning_rate": 7.293575591229748e-07, + "loss": 0.5042402744293213, + "step": 5275 + }, + { + "epoch": 1.2162536023054755, + "grad_norm": 1.5077498186665426, + "learning_rate": 7.28990635304268e-07, + "loss": 0.5686768293380737, + "step": 5276 + }, + { + "epoch": 1.2164841498559078, + "grad_norm": 1.7943867829472084, + "learning_rate": 7.286237508604029e-07, + "loss": 0.48758572340011597, + "step": 5277 + }, + { + "epoch": 1.21671469740634, + "grad_norm": 1.7282870483340211, + "learning_rate": 7.282569058446839e-07, + "loss": 0.5371814966201782, + "step": 5278 + }, + { + "epoch": 1.2169452449567724, + "grad_norm": 1.4246190071563047, + "learning_rate": 7.278901003104092e-07, + "loss": 0.42574048042297363, + "step": 5279 + }, + { + "epoch": 1.2171757925072046, + "grad_norm": 1.4665158384559256, + "learning_rate": 7.275233343108725e-07, + "loss": 0.4266633987426758, + "step": 5280 + }, + { + "epoch": 1.217406340057637, + "grad_norm": 1.5187689250162444, + "learning_rate": 7.271566078993608e-07, + "loss": 0.5175603628158569, + "step": 5281 + }, + { + "epoch": 1.2176368876080692, + "grad_norm": 1.4145262211461822, + "learning_rate": 7.267899211291553e-07, + "loss": 0.45306429266929626, + "step": 5282 + }, + { + "epoch": 1.2178674351585015, + "grad_norm": 1.5722710553655959, + "learning_rate": 7.264232740535326e-07, + "loss": 0.44565948843955994, + "step": 5283 + }, + { + "epoch": 1.2180979827089338, + "grad_norm": 1.7079312896564505, + "learning_rate": 7.260566667257619e-07, + "loss": 0.5211349129676819, + "step": 5284 + }, + { + "epoch": 1.218328530259366, + "grad_norm": 1.5221889542953, + "learning_rate": 7.256900991991078e-07, + "loss": 0.4979493021965027, + "step": 5285 + }, + { + "epoch": 1.2185590778097983, + "grad_norm": 1.8057148340658564, + "learning_rate": 7.253235715268288e-07, + "loss": 0.5332674980163574, + "step": 5286 + }, + { + "epoch": 1.2187896253602306, + "grad_norm": 1.4882443630436493, + "learning_rate": 7.249570837621773e-07, + "loss": 0.49410900473594666, + "step": 5287 + }, + { + "epoch": 1.219020172910663, + "grad_norm": 1.4338054427483777, + "learning_rate": 7.245906359584007e-07, + "loss": 0.37368130683898926, + "step": 5288 + }, + { + "epoch": 1.2192507204610952, + "grad_norm": 1.4281840560624028, + "learning_rate": 7.242242281687392e-07, + "loss": 0.38786208629608154, + "step": 5289 + }, + { + "epoch": 1.2194812680115275, + "grad_norm": 1.5093115457086763, + "learning_rate": 7.238578604464286e-07, + "loss": 0.3937080502510071, + "step": 5290 + }, + { + "epoch": 1.2197118155619597, + "grad_norm": 1.3008901010675145, + "learning_rate": 7.234915328446984e-07, + "loss": 0.40712523460388184, + "step": 5291 + }, + { + "epoch": 1.219942363112392, + "grad_norm": 1.513634738152884, + "learning_rate": 7.231252454167718e-07, + "loss": 0.4975137412548065, + "step": 5292 + }, + { + "epoch": 1.2201729106628243, + "grad_norm": 1.9471699294165814, + "learning_rate": 7.227589982158668e-07, + "loss": 0.4989965558052063, + "step": 5293 + }, + { + "epoch": 1.2204034582132566, + "grad_norm": 1.7515604771469182, + "learning_rate": 7.223927912951957e-07, + "loss": 0.46941643953323364, + "step": 5294 + }, + { + "epoch": 1.2206340057636889, + "grad_norm": 1.4552925878868335, + "learning_rate": 7.220266247079636e-07, + "loss": 0.4827825129032135, + "step": 5295 + }, + { + "epoch": 1.220864553314121, + "grad_norm": 1.640471954378316, + "learning_rate": 7.216604985073715e-07, + "loss": 0.5201072692871094, + "step": 5296 + }, + { + "epoch": 1.2210951008645532, + "grad_norm": 1.5192370436913325, + "learning_rate": 7.212944127466134e-07, + "loss": 0.4791795015335083, + "step": 5297 + }, + { + "epoch": 1.2213256484149855, + "grad_norm": 1.571406498612016, + "learning_rate": 7.209283674788776e-07, + "loss": 0.4800533056259155, + "step": 5298 + }, + { + "epoch": 1.2215561959654178, + "grad_norm": 1.5414636998167182, + "learning_rate": 7.205623627573474e-07, + "loss": 0.39302635192871094, + "step": 5299 + }, + { + "epoch": 1.22178674351585, + "grad_norm": 1.7227103984195933, + "learning_rate": 7.201963986351985e-07, + "loss": 0.4840422570705414, + "step": 5300 + }, + { + "epoch": 1.2220172910662823, + "grad_norm": 1.5551647759093306, + "learning_rate": 7.198304751656021e-07, + "loss": 0.4697931110858917, + "step": 5301 + }, + { + "epoch": 1.2222478386167146, + "grad_norm": 2.184299348287721, + "learning_rate": 7.194645924017235e-07, + "loss": 0.5836421847343445, + "step": 5302 + }, + { + "epoch": 1.2224783861671469, + "grad_norm": 1.5210470262328177, + "learning_rate": 7.190987503967211e-07, + "loss": 0.4224538803100586, + "step": 5303 + }, + { + "epoch": 1.2227089337175792, + "grad_norm": 1.5588545608663777, + "learning_rate": 7.187329492037483e-07, + "loss": 0.4659155011177063, + "step": 5304 + }, + { + "epoch": 1.2229394812680114, + "grad_norm": 1.5975720556067594, + "learning_rate": 7.183671888759515e-07, + "loss": 0.5140712857246399, + "step": 5305 + }, + { + "epoch": 1.2231700288184437, + "grad_norm": 1.5874727824945862, + "learning_rate": 7.180014694664727e-07, + "loss": 0.5154543519020081, + "step": 5306 + }, + { + "epoch": 1.223400576368876, + "grad_norm": 1.446540356714536, + "learning_rate": 7.17635791028447e-07, + "loss": 0.47813570499420166, + "step": 5307 + }, + { + "epoch": 1.2236311239193083, + "grad_norm": 1.6575633815006634, + "learning_rate": 7.172701536150031e-07, + "loss": 0.5261724591255188, + "step": 5308 + }, + { + "epoch": 1.2238616714697406, + "grad_norm": 1.8019702671398061, + "learning_rate": 7.169045572792649e-07, + "loss": 0.4635971486568451, + "step": 5309 + }, + { + "epoch": 1.2240922190201728, + "grad_norm": 1.262174656023695, + "learning_rate": 7.165390020743497e-07, + "loss": 0.46142834424972534, + "step": 5310 + }, + { + "epoch": 1.2243227665706051, + "grad_norm": 1.4418390684347826, + "learning_rate": 7.161734880533683e-07, + "loss": 0.47900235652923584, + "step": 5311 + }, + { + "epoch": 1.2245533141210374, + "grad_norm": 1.473019079046211, + "learning_rate": 7.15808015269427e-07, + "loss": 0.5513979196548462, + "step": 5312 + }, + { + "epoch": 1.2247838616714697, + "grad_norm": 2.0803994243808637, + "learning_rate": 7.154425837756244e-07, + "loss": 0.46302229166030884, + "step": 5313 + }, + { + "epoch": 1.225014409221902, + "grad_norm": 1.468383069657206, + "learning_rate": 7.150771936250539e-07, + "loss": 0.5822727680206299, + "step": 5314 + }, + { + "epoch": 1.2252449567723342, + "grad_norm": 1.2990824770630092, + "learning_rate": 7.147118448708039e-07, + "loss": 0.37017765641212463, + "step": 5315 + }, + { + "epoch": 1.2254755043227665, + "grad_norm": 1.285655707049756, + "learning_rate": 7.143465375659545e-07, + "loss": 0.44237178564071655, + "step": 5316 + }, + { + "epoch": 1.2257060518731988, + "grad_norm": 1.6766031200731948, + "learning_rate": 7.13981271763582e-07, + "loss": 0.5030481815338135, + "step": 5317 + }, + { + "epoch": 1.225936599423631, + "grad_norm": 1.5702404213513188, + "learning_rate": 7.136160475167547e-07, + "loss": 0.5518827438354492, + "step": 5318 + }, + { + "epoch": 1.2261671469740634, + "grad_norm": 1.251114193413058, + "learning_rate": 7.132508648785369e-07, + "loss": 0.5090180039405823, + "step": 5319 + }, + { + "epoch": 1.2263976945244957, + "grad_norm": 1.7345902549723513, + "learning_rate": 7.128857239019857e-07, + "loss": 0.495076060295105, + "step": 5320 + }, + { + "epoch": 1.226628242074928, + "grad_norm": 1.5675333212129947, + "learning_rate": 7.125206246401514e-07, + "loss": 0.530053973197937, + "step": 5321 + }, + { + "epoch": 1.2268587896253602, + "grad_norm": 1.2898853274906796, + "learning_rate": 7.121555671460802e-07, + "loss": 0.46412545442581177, + "step": 5322 + }, + { + "epoch": 1.2270893371757925, + "grad_norm": 1.5765192599645659, + "learning_rate": 7.117905514728107e-07, + "loss": 0.443530797958374, + "step": 5323 + }, + { + "epoch": 1.2273198847262248, + "grad_norm": 1.6711155634725154, + "learning_rate": 7.114255776733755e-07, + "loss": 0.4647101163864136, + "step": 5324 + }, + { + "epoch": 1.227550432276657, + "grad_norm": 1.547419086723459, + "learning_rate": 7.110606458008023e-07, + "loss": 0.45051440596580505, + "step": 5325 + }, + { + "epoch": 1.2277809798270893, + "grad_norm": 1.6266032223725158, + "learning_rate": 7.106957559081115e-07, + "loss": 0.5258926153182983, + "step": 5326 + }, + { + "epoch": 1.2280115273775216, + "grad_norm": 1.6807374525723655, + "learning_rate": 7.103309080483173e-07, + "loss": 0.39645054936408997, + "step": 5327 + }, + { + "epoch": 1.228242074927954, + "grad_norm": 1.5875759565882142, + "learning_rate": 7.099661022744294e-07, + "loss": 0.48216918110847473, + "step": 5328 + }, + { + "epoch": 1.2284726224783862, + "grad_norm": 1.4750837464060895, + "learning_rate": 7.096013386394493e-07, + "loss": 0.42962855100631714, + "step": 5329 + }, + { + "epoch": 1.2287031700288185, + "grad_norm": 1.613399761856971, + "learning_rate": 7.092366171963738e-07, + "loss": 0.5269042253494263, + "step": 5330 + }, + { + "epoch": 1.2289337175792507, + "grad_norm": 1.831645544729853, + "learning_rate": 7.088719379981932e-07, + "loss": 0.47110509872436523, + "step": 5331 + }, + { + "epoch": 1.229164265129683, + "grad_norm": 1.4339565179319511, + "learning_rate": 7.085073010978915e-07, + "loss": 0.45334869623184204, + "step": 5332 + }, + { + "epoch": 1.2293948126801153, + "grad_norm": 1.4131421708681255, + "learning_rate": 7.081427065484467e-07, + "loss": 0.5029184818267822, + "step": 5333 + }, + { + "epoch": 1.2296253602305476, + "grad_norm": 1.7892961583851241, + "learning_rate": 7.0777815440283e-07, + "loss": 0.5292627215385437, + "step": 5334 + }, + { + "epoch": 1.2298559077809799, + "grad_norm": 1.5276984778361233, + "learning_rate": 7.074136447140077e-07, + "loss": 0.4436877965927124, + "step": 5335 + }, + { + "epoch": 1.2300864553314121, + "grad_norm": 1.6707368087722814, + "learning_rate": 7.070491775349396e-07, + "loss": 0.4915885925292969, + "step": 5336 + }, + { + "epoch": 1.2303170028818444, + "grad_norm": 1.4733414399918665, + "learning_rate": 7.066847529185779e-07, + "loss": 0.4028368592262268, + "step": 5337 + }, + { + "epoch": 1.2305475504322767, + "grad_norm": 1.56972201812435, + "learning_rate": 7.063203709178704e-07, + "loss": 0.41268208622932434, + "step": 5338 + }, + { + "epoch": 1.230778097982709, + "grad_norm": 1.494676374710187, + "learning_rate": 7.059560315857585e-07, + "loss": 0.4848160743713379, + "step": 5339 + }, + { + "epoch": 1.2310086455331413, + "grad_norm": 1.6665066281678116, + "learning_rate": 7.055917349751755e-07, + "loss": 0.43197691440582275, + "step": 5340 + }, + { + "epoch": 1.2312391930835735, + "grad_norm": 1.5221352439773972, + "learning_rate": 7.052274811390514e-07, + "loss": 0.4859619736671448, + "step": 5341 + }, + { + "epoch": 1.2314697406340058, + "grad_norm": 1.8033359613530964, + "learning_rate": 7.048632701303075e-07, + "loss": 0.43828168511390686, + "step": 5342 + }, + { + "epoch": 1.231700288184438, + "grad_norm": 1.465814284332743, + "learning_rate": 7.044991020018601e-07, + "loss": 0.4256266951560974, + "step": 5343 + }, + { + "epoch": 1.2319308357348704, + "grad_norm": 1.6334021582177483, + "learning_rate": 7.041349768066196e-07, + "loss": 0.5345013737678528, + "step": 5344 + }, + { + "epoch": 1.2321613832853027, + "grad_norm": 1.5664723740177018, + "learning_rate": 7.037708945974887e-07, + "loss": 0.46685951948165894, + "step": 5345 + }, + { + "epoch": 1.232391930835735, + "grad_norm": 1.8414903460242973, + "learning_rate": 7.034068554273653e-07, + "loss": 0.4725074768066406, + "step": 5346 + }, + { + "epoch": 1.2326224783861672, + "grad_norm": 1.447512025177934, + "learning_rate": 7.030428593491407e-07, + "loss": 0.4719870984554291, + "step": 5347 + }, + { + "epoch": 1.2328530259365995, + "grad_norm": 1.5137035821770004, + "learning_rate": 7.026789064156992e-07, + "loss": 0.47967803478240967, + "step": 5348 + }, + { + "epoch": 1.2330835734870318, + "grad_norm": 1.7079828842976972, + "learning_rate": 7.023149966799198e-07, + "loss": 0.5108177065849304, + "step": 5349 + }, + { + "epoch": 1.233314121037464, + "grad_norm": 1.8907292823913628, + "learning_rate": 7.019511301946743e-07, + "loss": 0.5461745262145996, + "step": 5350 + }, + { + "epoch": 1.2335446685878964, + "grad_norm": 1.6940587927951292, + "learning_rate": 7.015873070128292e-07, + "loss": 0.5612732172012329, + "step": 5351 + }, + { + "epoch": 1.2337752161383286, + "grad_norm": 1.477660087960271, + "learning_rate": 7.012235271872443e-07, + "loss": 0.4839070439338684, + "step": 5352 + }, + { + "epoch": 1.234005763688761, + "grad_norm": 1.5409923852543361, + "learning_rate": 7.008597907707724e-07, + "loss": 0.4188167452812195, + "step": 5353 + }, + { + "epoch": 1.2342363112391932, + "grad_norm": 1.6513108999988988, + "learning_rate": 7.004960978162617e-07, + "loss": 0.49374920129776, + "step": 5354 + }, + { + "epoch": 1.2344668587896255, + "grad_norm": 1.5545279058414754, + "learning_rate": 7.001324483765515e-07, + "loss": 0.5485600829124451, + "step": 5355 + }, + { + "epoch": 1.2346974063400578, + "grad_norm": 1.5209405886137708, + "learning_rate": 6.997688425044772e-07, + "loss": 0.48566675186157227, + "step": 5356 + }, + { + "epoch": 1.23492795389049, + "grad_norm": 1.6039735368693278, + "learning_rate": 6.994052802528674e-07, + "loss": 0.41277337074279785, + "step": 5357 + }, + { + "epoch": 1.235158501440922, + "grad_norm": 1.4736114676774628, + "learning_rate": 6.990417616745428e-07, + "loss": 0.38438326120376587, + "step": 5358 + }, + { + "epoch": 1.2353890489913544, + "grad_norm": 1.7030181330099163, + "learning_rate": 6.986782868223194e-07, + "loss": 0.4412330389022827, + "step": 5359 + }, + { + "epoch": 1.2356195965417867, + "grad_norm": 1.7135416795794967, + "learning_rate": 6.983148557490069e-07, + "loss": 0.4721234142780304, + "step": 5360 + }, + { + "epoch": 1.235850144092219, + "grad_norm": 1.2725652745663298, + "learning_rate": 6.979514685074069e-07, + "loss": 0.37214499711990356, + "step": 5361 + }, + { + "epoch": 1.2360806916426512, + "grad_norm": 1.6936901145141183, + "learning_rate": 6.975881251503168e-07, + "loss": 0.5757625699043274, + "step": 5362 + }, + { + "epoch": 1.2363112391930835, + "grad_norm": 1.5702972159481126, + "learning_rate": 6.972248257305261e-07, + "loss": 0.47131651639938354, + "step": 5363 + }, + { + "epoch": 1.2365417867435158, + "grad_norm": 1.4382156565054938, + "learning_rate": 6.968615703008181e-07, + "loss": 0.4582099914550781, + "step": 5364 + }, + { + "epoch": 1.236772334293948, + "grad_norm": 1.4379269165858373, + "learning_rate": 6.96498358913971e-07, + "loss": 0.4932486116886139, + "step": 5365 + }, + { + "epoch": 1.2370028818443803, + "grad_norm": 1.4366623990202567, + "learning_rate": 6.961351916227549e-07, + "loss": 0.44992512464523315, + "step": 5366 + }, + { + "epoch": 1.2372334293948126, + "grad_norm": 1.4889934853931641, + "learning_rate": 6.957720684799342e-07, + "loss": 0.5043992400169373, + "step": 5367 + }, + { + "epoch": 1.237463976945245, + "grad_norm": 1.4017599596662496, + "learning_rate": 6.954089895382675e-07, + "loss": 0.44705575704574585, + "step": 5368 + }, + { + "epoch": 1.2376945244956772, + "grad_norm": 1.8731873123119112, + "learning_rate": 6.950459548505057e-07, + "loss": 0.48370909690856934, + "step": 5369 + }, + { + "epoch": 1.2379250720461095, + "grad_norm": 1.4064067860312528, + "learning_rate": 6.946829644693947e-07, + "loss": 0.4326665997505188, + "step": 5370 + }, + { + "epoch": 1.2381556195965417, + "grad_norm": 1.9138397041169513, + "learning_rate": 6.943200184476723e-07, + "loss": 0.5717728137969971, + "step": 5371 + }, + { + "epoch": 1.238386167146974, + "grad_norm": 1.6265922150082073, + "learning_rate": 6.939571168380715e-07, + "loss": 0.4615858793258667, + "step": 5372 + }, + { + "epoch": 1.2386167146974063, + "grad_norm": 1.6132244275345642, + "learning_rate": 6.935942596933181e-07, + "loss": 0.5257784724235535, + "step": 5373 + }, + { + "epoch": 1.2388472622478386, + "grad_norm": 1.4477940177516195, + "learning_rate": 6.932314470661309e-07, + "loss": 0.5333257913589478, + "step": 5374 + }, + { + "epoch": 1.2390778097982709, + "grad_norm": 1.4168682949552263, + "learning_rate": 6.928686790092234e-07, + "loss": 0.4580482244491577, + "step": 5375 + }, + { + "epoch": 1.2393083573487031, + "grad_norm": 1.583728497558373, + "learning_rate": 6.925059555753021e-07, + "loss": 0.4775884449481964, + "step": 5376 + }, + { + "epoch": 1.2395389048991354, + "grad_norm": 1.5687995312552376, + "learning_rate": 6.921432768170661e-07, + "loss": 0.44678401947021484, + "step": 5377 + }, + { + "epoch": 1.2397694524495677, + "grad_norm": 1.4949610582315778, + "learning_rate": 6.917806427872099e-07, + "loss": 0.4390775263309479, + "step": 5378 + }, + { + "epoch": 1.24, + "grad_norm": 1.5677366458650814, + "learning_rate": 6.914180535384198e-07, + "loss": 0.4696844518184662, + "step": 5379 + }, + { + "epoch": 1.2402305475504323, + "grad_norm": 1.6121063432311435, + "learning_rate": 6.910555091233761e-07, + "loss": 0.560876727104187, + "step": 5380 + }, + { + "epoch": 1.2404610951008646, + "grad_norm": 1.5109825976981524, + "learning_rate": 6.906930095947537e-07, + "loss": 0.5124620795249939, + "step": 5381 + }, + { + "epoch": 1.2406916426512968, + "grad_norm": 1.3305908427691708, + "learning_rate": 6.903305550052187e-07, + "loss": 0.4296848177909851, + "step": 5382 + }, + { + "epoch": 1.2409221902017291, + "grad_norm": 1.4071369163278793, + "learning_rate": 6.899681454074327e-07, + "loss": 0.40255898237228394, + "step": 5383 + }, + { + "epoch": 1.2411527377521614, + "grad_norm": 1.7565777753092369, + "learning_rate": 6.896057808540505e-07, + "loss": 0.4180254340171814, + "step": 5384 + }, + { + "epoch": 1.2413832853025937, + "grad_norm": 1.4270834637818937, + "learning_rate": 6.892434613977189e-07, + "loss": 0.41249316930770874, + "step": 5385 + }, + { + "epoch": 1.241613832853026, + "grad_norm": 1.9199945589002845, + "learning_rate": 6.8888118709108e-07, + "loss": 0.44984108209609985, + "step": 5386 + }, + { + "epoch": 1.2418443804034582, + "grad_norm": 1.5651965928462388, + "learning_rate": 6.885189579867677e-07, + "loss": 0.4913838505744934, + "step": 5387 + }, + { + "epoch": 1.2420749279538905, + "grad_norm": 1.8945057573554496, + "learning_rate": 6.881567741374107e-07, + "loss": 0.6019924879074097, + "step": 5388 + }, + { + "epoch": 1.2423054755043228, + "grad_norm": 1.3856621078353595, + "learning_rate": 6.877946355956305e-07, + "loss": 0.5212692618370056, + "step": 5389 + }, + { + "epoch": 1.242536023054755, + "grad_norm": 1.5912853207047202, + "learning_rate": 6.874325424140417e-07, + "loss": 0.49732527136802673, + "step": 5390 + }, + { + "epoch": 1.2427665706051874, + "grad_norm": 1.6221741050482537, + "learning_rate": 6.87070494645253e-07, + "loss": 0.44408339262008667, + "step": 5391 + }, + { + "epoch": 1.2429971181556196, + "grad_norm": 1.584527881503366, + "learning_rate": 6.867084923418663e-07, + "loss": 0.43635284900665283, + "step": 5392 + }, + { + "epoch": 1.243227665706052, + "grad_norm": 1.382962686107236, + "learning_rate": 6.863465355564761e-07, + "loss": 0.48582303524017334, + "step": 5393 + }, + { + "epoch": 1.2434582132564842, + "grad_norm": 1.574457872856851, + "learning_rate": 6.85984624341672e-07, + "loss": 0.4211381673812866, + "step": 5394 + }, + { + "epoch": 1.2436887608069165, + "grad_norm": 1.5127608845686662, + "learning_rate": 6.85622758750035e-07, + "loss": 0.47154808044433594, + "step": 5395 + }, + { + "epoch": 1.2439193083573488, + "grad_norm": 1.2959619642815412, + "learning_rate": 6.852609388341406e-07, + "loss": 0.3881720304489136, + "step": 5396 + }, + { + "epoch": 1.244149855907781, + "grad_norm": 1.8465773987676242, + "learning_rate": 6.84899164646558e-07, + "loss": 0.4934437870979309, + "step": 5397 + }, + { + "epoch": 1.2443804034582133, + "grad_norm": 1.6728854789305907, + "learning_rate": 6.845374362398486e-07, + "loss": 0.49199211597442627, + "step": 5398 + }, + { + "epoch": 1.2446109510086456, + "grad_norm": 1.667169706269872, + "learning_rate": 6.841757536665683e-07, + "loss": 0.5011521577835083, + "step": 5399 + }, + { + "epoch": 1.2448414985590779, + "grad_norm": 1.5545634113819367, + "learning_rate": 6.83814116979265e-07, + "loss": 0.4599594473838806, + "step": 5400 + }, + { + "epoch": 1.2450720461095102, + "grad_norm": 2.335132416331749, + "learning_rate": 6.834525262304817e-07, + "loss": 0.5006797313690186, + "step": 5401 + }, + { + "epoch": 1.2453025936599424, + "grad_norm": 1.7500701936339498, + "learning_rate": 6.830909814727534e-07, + "loss": 0.44420236349105835, + "step": 5402 + }, + { + "epoch": 1.2455331412103747, + "grad_norm": 1.41432328248269, + "learning_rate": 6.827294827586086e-07, + "loss": 0.4200541377067566, + "step": 5403 + }, + { + "epoch": 1.245763688760807, + "grad_norm": 1.4530188886581012, + "learning_rate": 6.823680301405693e-07, + "loss": 0.4684341251850128, + "step": 5404 + }, + { + "epoch": 1.2459942363112393, + "grad_norm": 1.5805133579379969, + "learning_rate": 6.820066236711514e-07, + "loss": 0.5357221364974976, + "step": 5405 + }, + { + "epoch": 1.2462247838616713, + "grad_norm": 1.4518037927097922, + "learning_rate": 6.816452634028626e-07, + "loss": 0.4230891168117523, + "step": 5406 + }, + { + "epoch": 1.2464553314121036, + "grad_norm": 1.5147135801771385, + "learning_rate": 6.812839493882056e-07, + "loss": 0.4489879608154297, + "step": 5407 + }, + { + "epoch": 1.246685878962536, + "grad_norm": 1.4193036604440596, + "learning_rate": 6.80922681679675e-07, + "loss": 0.4748901128768921, + "step": 5408 + }, + { + "epoch": 1.2469164265129682, + "grad_norm": 1.3796380805346808, + "learning_rate": 6.805614603297594e-07, + "loss": 0.44227588176727295, + "step": 5409 + }, + { + "epoch": 1.2471469740634005, + "grad_norm": 1.4650703004037071, + "learning_rate": 6.802002853909408e-07, + "loss": 0.42565715312957764, + "step": 5410 + }, + { + "epoch": 1.2473775216138328, + "grad_norm": 1.2687998722413263, + "learning_rate": 6.798391569156938e-07, + "loss": 0.4466899633407593, + "step": 5411 + }, + { + "epoch": 1.247608069164265, + "grad_norm": 1.4877290529879426, + "learning_rate": 6.794780749564865e-07, + "loss": 0.45467138290405273, + "step": 5412 + }, + { + "epoch": 1.2478386167146973, + "grad_norm": 1.587000793967483, + "learning_rate": 6.79117039565781e-07, + "loss": 0.4968474507331848, + "step": 5413 + }, + { + "epoch": 1.2480691642651296, + "grad_norm": 1.7383476070225938, + "learning_rate": 6.787560507960315e-07, + "loss": 0.4603409171104431, + "step": 5414 + }, + { + "epoch": 1.2482997118155619, + "grad_norm": 1.812747956168159, + "learning_rate": 6.783951086996859e-07, + "loss": 0.5013781785964966, + "step": 5415 + }, + { + "epoch": 1.2485302593659942, + "grad_norm": 1.3508050811528929, + "learning_rate": 6.780342133291853e-07, + "loss": 0.42432457208633423, + "step": 5416 + }, + { + "epoch": 1.2487608069164264, + "grad_norm": 1.7613681947256843, + "learning_rate": 6.776733647369642e-07, + "loss": 0.4237065315246582, + "step": 5417 + }, + { + "epoch": 1.2489913544668587, + "grad_norm": 1.3175927182751024, + "learning_rate": 6.773125629754503e-07, + "loss": 0.41611090302467346, + "step": 5418 + }, + { + "epoch": 1.249221902017291, + "grad_norm": 1.7410881231831246, + "learning_rate": 6.769518080970639e-07, + "loss": 0.44240689277648926, + "step": 5419 + }, + { + "epoch": 1.2494524495677233, + "grad_norm": 1.2785667812742771, + "learning_rate": 6.765911001542193e-07, + "loss": 0.45134061574935913, + "step": 5420 + }, + { + "epoch": 1.2496829971181556, + "grad_norm": 1.9175453690913649, + "learning_rate": 6.762304391993237e-07, + "loss": 0.5218105316162109, + "step": 5421 + }, + { + "epoch": 1.2499135446685878, + "grad_norm": 1.6363309057276445, + "learning_rate": 6.758698252847768e-07, + "loss": 0.4311027228832245, + "step": 5422 + }, + { + "epoch": 1.2501440922190201, + "grad_norm": 1.5244203394981957, + "learning_rate": 6.755092584629727e-07, + "loss": 0.4937070310115814, + "step": 5423 + }, + { + "epoch": 1.2503746397694524, + "grad_norm": 1.4854060603156747, + "learning_rate": 6.751487387862975e-07, + "loss": 0.4381704330444336, + "step": 5424 + }, + { + "epoch": 1.2506051873198847, + "grad_norm": 1.7381307437192484, + "learning_rate": 6.747882663071312e-07, + "loss": 0.4766447842121124, + "step": 5425 + }, + { + "epoch": 1.250835734870317, + "grad_norm": 1.5211127182293511, + "learning_rate": 6.74427841077847e-07, + "loss": 0.41801929473876953, + "step": 5426 + }, + { + "epoch": 1.2510662824207492, + "grad_norm": 1.5333588514457408, + "learning_rate": 6.740674631508105e-07, + "loss": 0.42379114031791687, + "step": 5427 + }, + { + "epoch": 1.2512968299711815, + "grad_norm": 1.6293048691492493, + "learning_rate": 6.737071325783806e-07, + "loss": 0.4198606014251709, + "step": 5428 + }, + { + "epoch": 1.2515273775216138, + "grad_norm": 1.6141332879533556, + "learning_rate": 6.733468494129105e-07, + "loss": 0.4047771692276001, + "step": 5429 + }, + { + "epoch": 1.251757925072046, + "grad_norm": 1.5300772739078217, + "learning_rate": 6.729866137067449e-07, + "loss": 0.5023326873779297, + "step": 5430 + }, + { + "epoch": 1.2519884726224784, + "grad_norm": 1.6198465774895092, + "learning_rate": 6.726264255122227e-07, + "loss": 0.34151679277420044, + "step": 5431 + }, + { + "epoch": 1.2522190201729106, + "grad_norm": 1.486645506129829, + "learning_rate": 6.722662848816748e-07, + "loss": 0.38678500056266785, + "step": 5432 + }, + { + "epoch": 1.252449567723343, + "grad_norm": 1.7272506827447611, + "learning_rate": 6.719061918674267e-07, + "loss": 0.49455368518829346, + "step": 5433 + }, + { + "epoch": 1.2526801152737752, + "grad_norm": 1.4314837349274792, + "learning_rate": 6.715461465217959e-07, + "loss": 0.42544132471084595, + "step": 5434 + }, + { + "epoch": 1.2529106628242075, + "grad_norm": 1.7176926108989403, + "learning_rate": 6.711861488970927e-07, + "loss": 0.4194113612174988, + "step": 5435 + }, + { + "epoch": 1.2531412103746398, + "grad_norm": 1.4888625044157147, + "learning_rate": 6.708261990456219e-07, + "loss": 0.4825857877731323, + "step": 5436 + }, + { + "epoch": 1.253371757925072, + "grad_norm": 1.9183567280059182, + "learning_rate": 6.704662970196801e-07, + "loss": 0.5231322050094604, + "step": 5437 + }, + { + "epoch": 1.2536023054755043, + "grad_norm": 1.5966670163534955, + "learning_rate": 6.701064428715568e-07, + "loss": 0.4393799304962158, + "step": 5438 + }, + { + "epoch": 1.2538328530259366, + "grad_norm": 1.441480411219716, + "learning_rate": 6.69746636653536e-07, + "loss": 0.4439913034439087, + "step": 5439 + }, + { + "epoch": 1.254063400576369, + "grad_norm": 1.3682222278508178, + "learning_rate": 6.693868784178933e-07, + "loss": 0.44157153367996216, + "step": 5440 + }, + { + "epoch": 1.2542939481268012, + "grad_norm": 1.5181266642772941, + "learning_rate": 6.690271682168976e-07, + "loss": 0.4028700292110443, + "step": 5441 + }, + { + "epoch": 1.2545244956772335, + "grad_norm": 1.4208577251896788, + "learning_rate": 6.686675061028115e-07, + "loss": 0.44558918476104736, + "step": 5442 + }, + { + "epoch": 1.2547550432276657, + "grad_norm": 1.6092471535381658, + "learning_rate": 6.6830789212789e-07, + "loss": 0.44742459058761597, + "step": 5443 + }, + { + "epoch": 1.254985590778098, + "grad_norm": 1.5896606564493474, + "learning_rate": 6.679483263443813e-07, + "loss": 0.48470282554626465, + "step": 5444 + }, + { + "epoch": 1.2552161383285303, + "grad_norm": 1.8312792387216397, + "learning_rate": 6.675888088045263e-07, + "loss": 0.4570988416671753, + "step": 5445 + }, + { + "epoch": 1.2554466858789626, + "grad_norm": 1.838942069663871, + "learning_rate": 6.672293395605595e-07, + "loss": 0.3854732811450958, + "step": 5446 + }, + { + "epoch": 1.2556772334293949, + "grad_norm": 1.6067053822119532, + "learning_rate": 6.66869918664708e-07, + "loss": 0.572611927986145, + "step": 5447 + }, + { + "epoch": 1.2559077809798271, + "grad_norm": 1.765302400736949, + "learning_rate": 6.665105461691916e-07, + "loss": 0.4152478873729706, + "step": 5448 + }, + { + "epoch": 1.2561383285302594, + "grad_norm": 1.6331380960048347, + "learning_rate": 6.661512221262237e-07, + "loss": 0.5350701808929443, + "step": 5449 + }, + { + "epoch": 1.2563688760806917, + "grad_norm": 1.6537785251879598, + "learning_rate": 6.657919465880106e-07, + "loss": 0.510172963142395, + "step": 5450 + }, + { + "epoch": 1.256599423631124, + "grad_norm": 1.3958498055611923, + "learning_rate": 6.654327196067504e-07, + "loss": 0.4707493782043457, + "step": 5451 + }, + { + "epoch": 1.2568299711815563, + "grad_norm": 1.4416810291315851, + "learning_rate": 6.650735412346361e-07, + "loss": 0.4108200967311859, + "step": 5452 + }, + { + "epoch": 1.2570605187319885, + "grad_norm": 1.460053318153373, + "learning_rate": 6.647144115238519e-07, + "loss": 0.5222622752189636, + "step": 5453 + }, + { + "epoch": 1.2572910662824208, + "grad_norm": 1.6015667511775546, + "learning_rate": 6.643553305265755e-07, + "loss": 0.425457239151001, + "step": 5454 + }, + { + "epoch": 1.257521613832853, + "grad_norm": 1.8741374124254828, + "learning_rate": 6.639962982949785e-07, + "loss": 0.49435800313949585, + "step": 5455 + }, + { + "epoch": 1.2577521613832854, + "grad_norm": 1.6022646309557353, + "learning_rate": 6.636373148812237e-07, + "loss": 0.5040857791900635, + "step": 5456 + }, + { + "epoch": 1.2579827089337177, + "grad_norm": 1.476700783342049, + "learning_rate": 6.632783803374678e-07, + "loss": 0.4675745368003845, + "step": 5457 + }, + { + "epoch": 1.25821325648415, + "grad_norm": 1.900550198001673, + "learning_rate": 6.629194947158606e-07, + "loss": 0.5321175456047058, + "step": 5458 + }, + { + "epoch": 1.2584438040345822, + "grad_norm": 1.3273206827091684, + "learning_rate": 6.625606580685442e-07, + "loss": 0.4415740370750427, + "step": 5459 + }, + { + "epoch": 1.2586743515850145, + "grad_norm": 1.5235733720363593, + "learning_rate": 6.622018704476539e-07, + "loss": 0.48481184244155884, + "step": 5460 + }, + { + "epoch": 1.2589048991354468, + "grad_norm": 1.74947332771249, + "learning_rate": 6.618431319053176e-07, + "loss": 0.46835392713546753, + "step": 5461 + }, + { + "epoch": 1.259135446685879, + "grad_norm": 1.3284706762425806, + "learning_rate": 6.614844424936566e-07, + "loss": 0.4376718997955322, + "step": 5462 + }, + { + "epoch": 1.2593659942363113, + "grad_norm": 1.558886627315865, + "learning_rate": 6.611258022647847e-07, + "loss": 0.4625728130340576, + "step": 5463 + }, + { + "epoch": 1.2595965417867436, + "grad_norm": 1.6370741370932307, + "learning_rate": 6.607672112708081e-07, + "loss": 0.44812503457069397, + "step": 5464 + }, + { + "epoch": 1.259827089337176, + "grad_norm": 1.6367384240246787, + "learning_rate": 6.60408669563827e-07, + "loss": 0.46147626638412476, + "step": 5465 + }, + { + "epoch": 1.2600576368876082, + "grad_norm": 1.4309654266560352, + "learning_rate": 6.600501771959337e-07, + "loss": 0.45850062370300293, + "step": 5466 + }, + { + "epoch": 1.2602881844380405, + "grad_norm": 1.5248045750601964, + "learning_rate": 6.596917342192129e-07, + "loss": 0.47113144397735596, + "step": 5467 + }, + { + "epoch": 1.2605187319884728, + "grad_norm": 1.7668685332089469, + "learning_rate": 6.593333406857435e-07, + "loss": 0.4846932888031006, + "step": 5468 + }, + { + "epoch": 1.260749279538905, + "grad_norm": 1.5362515481003414, + "learning_rate": 6.589749966475951e-07, + "loss": 0.4357692003250122, + "step": 5469 + }, + { + "epoch": 1.2609798270893373, + "grad_norm": 1.3245508183347572, + "learning_rate": 6.586167021568323e-07, + "loss": 0.44653600454330444, + "step": 5470 + }, + { + "epoch": 1.2612103746397694, + "grad_norm": 1.5525959738010278, + "learning_rate": 6.582584572655118e-07, + "loss": 0.43938639760017395, + "step": 5471 + }, + { + "epoch": 1.2614409221902017, + "grad_norm": 1.3892882632769337, + "learning_rate": 6.579002620256817e-07, + "loss": 0.4045637845993042, + "step": 5472 + }, + { + "epoch": 1.261671469740634, + "grad_norm": 1.6336701340291093, + "learning_rate": 6.575421164893849e-07, + "loss": 0.483223557472229, + "step": 5473 + }, + { + "epoch": 1.2619020172910662, + "grad_norm": 1.6770649329919185, + "learning_rate": 6.571840207086565e-07, + "loss": 0.51056307554245, + "step": 5474 + }, + { + "epoch": 1.2621325648414985, + "grad_norm": 1.607498257265911, + "learning_rate": 6.568259747355233e-07, + "loss": 0.5062652826309204, + "step": 5475 + }, + { + "epoch": 1.2623631123919308, + "grad_norm": 1.82615736140384, + "learning_rate": 6.564679786220062e-07, + "loss": 0.46831825375556946, + "step": 5476 + }, + { + "epoch": 1.262593659942363, + "grad_norm": 1.5711366050335192, + "learning_rate": 6.56110032420118e-07, + "loss": 0.4801711440086365, + "step": 5477 + }, + { + "epoch": 1.2628242074927953, + "grad_norm": 1.687813586503547, + "learning_rate": 6.557521361818643e-07, + "loss": 0.41865023970603943, + "step": 5478 + }, + { + "epoch": 1.2630547550432276, + "grad_norm": 1.4052825771397335, + "learning_rate": 6.553942899592446e-07, + "loss": 0.4309377074241638, + "step": 5479 + }, + { + "epoch": 1.26328530259366, + "grad_norm": 1.455269980692782, + "learning_rate": 6.550364938042496e-07, + "loss": 0.44679516553878784, + "step": 5480 + }, + { + "epoch": 1.2635158501440922, + "grad_norm": 1.4446890497651368, + "learning_rate": 6.546787477688631e-07, + "loss": 0.40785765647888184, + "step": 5481 + }, + { + "epoch": 1.2637463976945245, + "grad_norm": 1.6383514003925148, + "learning_rate": 6.543210519050628e-07, + "loss": 0.43477606773376465, + "step": 5482 + }, + { + "epoch": 1.2639769452449567, + "grad_norm": 1.5580217211873189, + "learning_rate": 6.539634062648174e-07, + "loss": 0.4252205193042755, + "step": 5483 + }, + { + "epoch": 1.264207492795389, + "grad_norm": 1.5719265387067087, + "learning_rate": 6.536058109000895e-07, + "loss": 0.5158742666244507, + "step": 5484 + }, + { + "epoch": 1.2644380403458213, + "grad_norm": 1.7733816595451595, + "learning_rate": 6.532482658628333e-07, + "loss": 0.5523275136947632, + "step": 5485 + }, + { + "epoch": 1.2646685878962536, + "grad_norm": 1.9166044512792477, + "learning_rate": 6.528907712049971e-07, + "loss": 0.45393699407577515, + "step": 5486 + }, + { + "epoch": 1.2648991354466859, + "grad_norm": 1.4684258251388687, + "learning_rate": 6.525333269785213e-07, + "loss": 0.3747859001159668, + "step": 5487 + }, + { + "epoch": 1.2651296829971181, + "grad_norm": 1.5546958152903012, + "learning_rate": 6.521759332353381e-07, + "loss": 0.5249335169792175, + "step": 5488 + }, + { + "epoch": 1.2653602305475504, + "grad_norm": 1.3641169125497614, + "learning_rate": 6.518185900273736e-07, + "loss": 0.4011837840080261, + "step": 5489 + }, + { + "epoch": 1.2655907780979827, + "grad_norm": 1.7629229180871924, + "learning_rate": 6.514612974065459e-07, + "loss": 0.47748851776123047, + "step": 5490 + }, + { + "epoch": 1.265821325648415, + "grad_norm": 2.008372903254068, + "learning_rate": 6.511040554247655e-07, + "loss": 0.5392353534698486, + "step": 5491 + }, + { + "epoch": 1.2660518731988473, + "grad_norm": 1.5145152277351324, + "learning_rate": 6.507468641339371e-07, + "loss": 0.4395901560783386, + "step": 5492 + }, + { + "epoch": 1.2662824207492795, + "grad_norm": 1.6896616144085306, + "learning_rate": 6.503897235859556e-07, + "loss": 0.4864005148410797, + "step": 5493 + }, + { + "epoch": 1.2665129682997118, + "grad_norm": 1.964853072629702, + "learning_rate": 6.500326338327104e-07, + "loss": 0.4969862103462219, + "step": 5494 + }, + { + "epoch": 1.266743515850144, + "grad_norm": 1.6281912350437215, + "learning_rate": 6.49675594926083e-07, + "loss": 0.5065462589263916, + "step": 5495 + }, + { + "epoch": 1.2669740634005764, + "grad_norm": 1.629865686272151, + "learning_rate": 6.493186069179474e-07, + "loss": 0.4453302025794983, + "step": 5496 + }, + { + "epoch": 1.2672046109510087, + "grad_norm": 1.6609285038031356, + "learning_rate": 6.489616698601701e-07, + "loss": 0.4587283730506897, + "step": 5497 + }, + { + "epoch": 1.267435158501441, + "grad_norm": 1.5797490262179883, + "learning_rate": 6.486047838046102e-07, + "loss": 0.5289033055305481, + "step": 5498 + }, + { + "epoch": 1.2676657060518732, + "grad_norm": 1.6771196950319116, + "learning_rate": 6.482479488031199e-07, + "loss": 0.46285438537597656, + "step": 5499 + }, + { + "epoch": 1.2678962536023055, + "grad_norm": 1.5333241787291638, + "learning_rate": 6.478911649075434e-07, + "loss": 0.44040411710739136, + "step": 5500 + }, + { + "epoch": 1.2681268011527378, + "grad_norm": 1.337665135823913, + "learning_rate": 6.475344321697175e-07, + "loss": 0.4678620994091034, + "step": 5501 + }, + { + "epoch": 1.26835734870317, + "grad_norm": 1.7348492481978741, + "learning_rate": 6.471777506414721e-07, + "loss": 0.4077882170677185, + "step": 5502 + }, + { + "epoch": 1.2685878962536024, + "grad_norm": 1.4599912002094135, + "learning_rate": 6.468211203746294e-07, + "loss": 0.476462721824646, + "step": 5503 + }, + { + "epoch": 1.2688184438040346, + "grad_norm": 1.955778831188656, + "learning_rate": 6.464645414210036e-07, + "loss": 0.47313517332077026, + "step": 5504 + }, + { + "epoch": 1.269048991354467, + "grad_norm": 1.600431181416076, + "learning_rate": 6.461080138324025e-07, + "loss": 0.46002405881881714, + "step": 5505 + }, + { + "epoch": 1.2692795389048992, + "grad_norm": 1.471883891355057, + "learning_rate": 6.457515376606253e-07, + "loss": 0.49490875005722046, + "step": 5506 + }, + { + "epoch": 1.2695100864553315, + "grad_norm": 1.9073348208275467, + "learning_rate": 6.453951129574643e-07, + "loss": 0.3652802109718323, + "step": 5507 + }, + { + "epoch": 1.2697406340057638, + "grad_norm": 1.795903663355042, + "learning_rate": 6.450387397747049e-07, + "loss": 0.43633294105529785, + "step": 5508 + }, + { + "epoch": 1.269971181556196, + "grad_norm": 1.581833052782087, + "learning_rate": 6.446824181641239e-07, + "loss": 0.4316346049308777, + "step": 5509 + }, + { + "epoch": 1.2702017291066283, + "grad_norm": 1.4893056012571666, + "learning_rate": 6.443261481774909e-07, + "loss": 0.4697961211204529, + "step": 5510 + }, + { + "epoch": 1.2704322766570606, + "grad_norm": 1.4744438084114087, + "learning_rate": 6.439699298665691e-07, + "loss": 0.3447864055633545, + "step": 5511 + }, + { + "epoch": 1.2706628242074927, + "grad_norm": 1.4930172335593572, + "learning_rate": 6.436137632831124e-07, + "loss": 0.42046308517456055, + "step": 5512 + }, + { + "epoch": 1.270893371757925, + "grad_norm": 1.9205238969624363, + "learning_rate": 6.43257648478869e-07, + "loss": 0.5696559548377991, + "step": 5513 + }, + { + "epoch": 1.2711239193083572, + "grad_norm": 1.5834811016484576, + "learning_rate": 6.429015855055775e-07, + "loss": 0.37561696767807007, + "step": 5514 + }, + { + "epoch": 1.2713544668587895, + "grad_norm": 1.7696273804871432, + "learning_rate": 6.425455744149711e-07, + "loss": 0.4987408518791199, + "step": 5515 + }, + { + "epoch": 1.2715850144092218, + "grad_norm": 1.5472001628110608, + "learning_rate": 6.421896152587745e-07, + "loss": 0.41388893127441406, + "step": 5516 + }, + { + "epoch": 1.271815561959654, + "grad_norm": 1.3907677350646108, + "learning_rate": 6.41833708088704e-07, + "loss": 0.531815230846405, + "step": 5517 + }, + { + "epoch": 1.2720461095100863, + "grad_norm": 2.0563641939125152, + "learning_rate": 6.414778529564701e-07, + "loss": 0.5133731365203857, + "step": 5518 + }, + { + "epoch": 1.2722766570605186, + "grad_norm": 1.4492977630588102, + "learning_rate": 6.411220499137746e-07, + "loss": 0.49691277742385864, + "step": 5519 + }, + { + "epoch": 1.272507204610951, + "grad_norm": 1.469594469426552, + "learning_rate": 6.407662990123117e-07, + "loss": 0.45275843143463135, + "step": 5520 + }, + { + "epoch": 1.2727377521613832, + "grad_norm": 1.3066235759971525, + "learning_rate": 6.404106003037688e-07, + "loss": 0.43602895736694336, + "step": 5521 + }, + { + "epoch": 1.2729682997118155, + "grad_norm": 1.4446893971845258, + "learning_rate": 6.400549538398248e-07, + "loss": 0.4969235956668854, + "step": 5522 + }, + { + "epoch": 1.2731988472622477, + "grad_norm": 1.5937900788273875, + "learning_rate": 6.396993596721512e-07, + "loss": 0.5304889678955078, + "step": 5523 + }, + { + "epoch": 1.27342939481268, + "grad_norm": 1.5346452715505707, + "learning_rate": 6.393438178524131e-07, + "loss": 0.46131831407546997, + "step": 5524 + }, + { + "epoch": 1.2736599423631123, + "grad_norm": 1.5540801807722993, + "learning_rate": 6.389883284322663e-07, + "loss": 0.44982582330703735, + "step": 5525 + }, + { + "epoch": 1.2738904899135446, + "grad_norm": 1.4566262270008488, + "learning_rate": 6.386328914633594e-07, + "loss": 0.42927485704421997, + "step": 5526 + }, + { + "epoch": 1.2741210374639769, + "grad_norm": 1.6365410597421224, + "learning_rate": 6.382775069973347e-07, + "loss": 0.4596608579158783, + "step": 5527 + }, + { + "epoch": 1.2743515850144091, + "grad_norm": 1.4090066202201719, + "learning_rate": 6.379221750858251e-07, + "loss": 0.40814411640167236, + "step": 5528 + }, + { + "epoch": 1.2745821325648414, + "grad_norm": 1.4832230987219173, + "learning_rate": 6.375668957804569e-07, + "loss": 0.47916269302368164, + "step": 5529 + }, + { + "epoch": 1.2748126801152737, + "grad_norm": 1.640737186868239, + "learning_rate": 6.372116691328483e-07, + "loss": 0.48566362261772156, + "step": 5530 + }, + { + "epoch": 1.275043227665706, + "grad_norm": 1.4732784262191274, + "learning_rate": 6.368564951946103e-07, + "loss": 0.4357905089855194, + "step": 5531 + }, + { + "epoch": 1.2752737752161383, + "grad_norm": 1.3641981295499355, + "learning_rate": 6.365013740173459e-07, + "loss": 0.4812158942222595, + "step": 5532 + }, + { + "epoch": 1.2755043227665706, + "grad_norm": 1.5210079549425446, + "learning_rate": 6.361463056526501e-07, + "loss": 0.4973459839820862, + "step": 5533 + }, + { + "epoch": 1.2757348703170028, + "grad_norm": 1.6397578725103514, + "learning_rate": 6.357912901521114e-07, + "loss": 0.44668370485305786, + "step": 5534 + }, + { + "epoch": 1.2759654178674351, + "grad_norm": 1.5010005866568603, + "learning_rate": 6.354363275673093e-07, + "loss": 0.40911513566970825, + "step": 5535 + }, + { + "epoch": 1.2761959654178674, + "grad_norm": 1.5942835145336292, + "learning_rate": 6.35081417949816e-07, + "loss": 0.4385657012462616, + "step": 5536 + }, + { + "epoch": 1.2764265129682997, + "grad_norm": 1.3458840351620285, + "learning_rate": 6.347265613511969e-07, + "loss": 0.4490683376789093, + "step": 5537 + }, + { + "epoch": 1.276657060518732, + "grad_norm": 1.3989827670727941, + "learning_rate": 6.343717578230086e-07, + "loss": 0.4358097314834595, + "step": 5538 + }, + { + "epoch": 1.2768876080691642, + "grad_norm": 1.7207001807997868, + "learning_rate": 6.340170074167999e-07, + "loss": 0.48730576038360596, + "step": 5539 + }, + { + "epoch": 1.2771181556195965, + "grad_norm": 1.5892957523550955, + "learning_rate": 6.336623101841132e-07, + "loss": 0.4328501224517822, + "step": 5540 + }, + { + "epoch": 1.2773487031700288, + "grad_norm": 1.6472307415258993, + "learning_rate": 6.333076661764818e-07, + "loss": 0.49830788373947144, + "step": 5541 + }, + { + "epoch": 1.277579250720461, + "grad_norm": 1.343710132243457, + "learning_rate": 6.329530754454321e-07, + "loss": 0.46980321407318115, + "step": 5542 + }, + { + "epoch": 1.2778097982708934, + "grad_norm": 1.7570308864747564, + "learning_rate": 6.325985380424816e-07, + "loss": 0.47295019030570984, + "step": 5543 + }, + { + "epoch": 1.2780403458213256, + "grad_norm": 1.655776208512617, + "learning_rate": 6.322440540191421e-07, + "loss": 0.5206797122955322, + "step": 5544 + }, + { + "epoch": 1.278270893371758, + "grad_norm": 1.63546950482743, + "learning_rate": 6.318896234269158e-07, + "loss": 0.46184998750686646, + "step": 5545 + }, + { + "epoch": 1.2785014409221902, + "grad_norm": 1.7239763029029178, + "learning_rate": 6.315352463172973e-07, + "loss": 0.49634110927581787, + "step": 5546 + }, + { + "epoch": 1.2787319884726225, + "grad_norm": 1.4144369647998525, + "learning_rate": 6.31180922741775e-07, + "loss": 0.35143324732780457, + "step": 5547 + }, + { + "epoch": 1.2789625360230548, + "grad_norm": 1.4978465409722184, + "learning_rate": 6.308266527518279e-07, + "loss": 0.4144735634326935, + "step": 5548 + }, + { + "epoch": 1.279193083573487, + "grad_norm": 1.4183195079291904, + "learning_rate": 6.304724363989273e-07, + "loss": 0.5448867678642273, + "step": 5549 + }, + { + "epoch": 1.2794236311239193, + "grad_norm": 1.338439375156975, + "learning_rate": 6.301182737345381e-07, + "loss": 0.43275925517082214, + "step": 5550 + }, + { + "epoch": 1.2796541786743516, + "grad_norm": 1.2908687957691545, + "learning_rate": 6.297641648101156e-07, + "loss": 0.4401479959487915, + "step": 5551 + }, + { + "epoch": 1.2798847262247839, + "grad_norm": 1.7017435488796109, + "learning_rate": 6.294101096771083e-07, + "loss": 0.4905737042427063, + "step": 5552 + }, + { + "epoch": 1.2801152737752162, + "grad_norm": 1.8493974545984773, + "learning_rate": 6.290561083869572e-07, + "loss": 0.5399729013442993, + "step": 5553 + }, + { + "epoch": 1.2803458213256484, + "grad_norm": 1.7191281973650443, + "learning_rate": 6.287021609910945e-07, + "loss": 0.4323650598526001, + "step": 5554 + }, + { + "epoch": 1.2805763688760807, + "grad_norm": 1.6009091238858228, + "learning_rate": 6.283482675409453e-07, + "loss": 0.4450085759162903, + "step": 5555 + }, + { + "epoch": 1.280806916426513, + "grad_norm": 1.6050803328137053, + "learning_rate": 6.279944280879268e-07, + "loss": 0.48366865515708923, + "step": 5556 + }, + { + "epoch": 1.2810374639769453, + "grad_norm": 1.6069583853116107, + "learning_rate": 6.276406426834479e-07, + "loss": 0.43875235319137573, + "step": 5557 + }, + { + "epoch": 1.2812680115273776, + "grad_norm": 1.461319209325984, + "learning_rate": 6.272869113789102e-07, + "loss": 0.421744167804718, + "step": 5558 + }, + { + "epoch": 1.2814985590778099, + "grad_norm": 1.8153973429348083, + "learning_rate": 6.269332342257066e-07, + "loss": 0.569898247718811, + "step": 5559 + }, + { + "epoch": 1.2817291066282421, + "grad_norm": 1.449207816940006, + "learning_rate": 6.265796112752232e-07, + "loss": 0.3934449553489685, + "step": 5560 + }, + { + "epoch": 1.2819596541786744, + "grad_norm": 2.2386537654098295, + "learning_rate": 6.262260425788381e-07, + "loss": 0.4563683271408081, + "step": 5561 + }, + { + "epoch": 1.2821902017291067, + "grad_norm": 1.4202120834205216, + "learning_rate": 6.258725281879204e-07, + "loss": 0.45703044533729553, + "step": 5562 + }, + { + "epoch": 1.282420749279539, + "grad_norm": 1.350065136609383, + "learning_rate": 6.255190681538324e-07, + "loss": 0.4308219254016876, + "step": 5563 + }, + { + "epoch": 1.2826512968299713, + "grad_norm": 1.4828315701652353, + "learning_rate": 6.251656625279283e-07, + "loss": 0.43510013818740845, + "step": 5564 + }, + { + "epoch": 1.2828818443804035, + "grad_norm": 1.6397508409229253, + "learning_rate": 6.248123113615538e-07, + "loss": 0.4328692555427551, + "step": 5565 + }, + { + "epoch": 1.2831123919308358, + "grad_norm": 1.5343379599739684, + "learning_rate": 6.244590147060479e-07, + "loss": 0.4080934524536133, + "step": 5566 + }, + { + "epoch": 1.283342939481268, + "grad_norm": 1.7350612167191992, + "learning_rate": 6.241057726127402e-07, + "loss": 0.4813555181026459, + "step": 5567 + }, + { + "epoch": 1.2835734870317004, + "grad_norm": 1.6567967388315745, + "learning_rate": 6.237525851329533e-07, + "loss": 0.430223673582077, + "step": 5568 + }, + { + "epoch": 1.2838040345821327, + "grad_norm": 1.4875391508810385, + "learning_rate": 6.233994523180021e-07, + "loss": 0.5455408692359924, + "step": 5569 + }, + { + "epoch": 1.284034582132565, + "grad_norm": 2.1560276048689895, + "learning_rate": 6.230463742191925e-07, + "loss": 0.5247420072555542, + "step": 5570 + }, + { + "epoch": 1.2842651296829972, + "grad_norm": 1.8980481153200757, + "learning_rate": 6.226933508878232e-07, + "loss": 0.5348542332649231, + "step": 5571 + }, + { + "epoch": 1.2844956772334295, + "grad_norm": 1.6792634796819574, + "learning_rate": 6.223403823751854e-07, + "loss": 0.4977971315383911, + "step": 5572 + }, + { + "epoch": 1.2847262247838618, + "grad_norm": 1.6070638173254763, + "learning_rate": 6.219874687325611e-07, + "loss": 0.40318992733955383, + "step": 5573 + }, + { + "epoch": 1.284956772334294, + "grad_norm": 1.9663675843987791, + "learning_rate": 6.216346100112255e-07, + "loss": 0.5009844303131104, + "step": 5574 + }, + { + "epoch": 1.2851873198847263, + "grad_norm": 1.6937420088731798, + "learning_rate": 6.212818062624445e-07, + "loss": 0.5087116956710815, + "step": 5575 + }, + { + "epoch": 1.2854178674351586, + "grad_norm": 1.6817159218995477, + "learning_rate": 6.209290575374775e-07, + "loss": 0.5260793566703796, + "step": 5576 + }, + { + "epoch": 1.285648414985591, + "grad_norm": 1.6762340662691828, + "learning_rate": 6.205763638875754e-07, + "loss": 0.4408110976219177, + "step": 5577 + }, + { + "epoch": 1.2858789625360232, + "grad_norm": 1.5270773893490903, + "learning_rate": 6.202237253639799e-07, + "loss": 0.4744076132774353, + "step": 5578 + }, + { + "epoch": 1.2861095100864555, + "grad_norm": 1.5635420193144807, + "learning_rate": 6.198711420179273e-07, + "loss": 0.48688220977783203, + "step": 5579 + }, + { + "epoch": 1.2863400576368877, + "grad_norm": 1.6213517296286213, + "learning_rate": 6.195186139006425e-07, + "loss": 0.47676223516464233, + "step": 5580 + }, + { + "epoch": 1.2865706051873198, + "grad_norm": 1.606680809029221, + "learning_rate": 6.191661410633452e-07, + "loss": 0.5136919021606445, + "step": 5581 + }, + { + "epoch": 1.286801152737752, + "grad_norm": 1.5245386080744614, + "learning_rate": 6.188137235572464e-07, + "loss": 0.5181657671928406, + "step": 5582 + }, + { + "epoch": 1.2870317002881844, + "grad_norm": 1.6707771822267998, + "learning_rate": 6.184613614335476e-07, + "loss": 0.5066704750061035, + "step": 5583 + }, + { + "epoch": 1.2872622478386166, + "grad_norm": 1.5163975923186304, + "learning_rate": 6.181090547434438e-07, + "loss": 0.4127427339553833, + "step": 5584 + }, + { + "epoch": 1.287492795389049, + "grad_norm": 1.3664322264218698, + "learning_rate": 6.177568035381223e-07, + "loss": 0.4792090654373169, + "step": 5585 + }, + { + "epoch": 1.2877233429394812, + "grad_norm": 1.4546030620202877, + "learning_rate": 6.174046078687603e-07, + "loss": 0.47321128845214844, + "step": 5586 + }, + { + "epoch": 1.2879538904899135, + "grad_norm": 1.5045985608502348, + "learning_rate": 6.17052467786529e-07, + "loss": 0.6202956438064575, + "step": 5587 + }, + { + "epoch": 1.2881844380403458, + "grad_norm": 1.5090966461095818, + "learning_rate": 6.167003833425902e-07, + "loss": 0.4611511528491974, + "step": 5588 + }, + { + "epoch": 1.288414985590778, + "grad_norm": 1.5961777946986442, + "learning_rate": 6.163483545880981e-07, + "loss": 0.37117400765419006, + "step": 5589 + }, + { + "epoch": 1.2886455331412103, + "grad_norm": 1.5363317852310883, + "learning_rate": 6.159963815741996e-07, + "loss": 0.40295302867889404, + "step": 5590 + }, + { + "epoch": 1.2888760806916426, + "grad_norm": 1.2834821490038788, + "learning_rate": 6.156444643520319e-07, + "loss": 0.4332062602043152, + "step": 5591 + }, + { + "epoch": 1.289106628242075, + "grad_norm": 1.622352739011229, + "learning_rate": 6.152926029727249e-07, + "loss": 0.5083199739456177, + "step": 5592 + }, + { + "epoch": 1.2893371757925072, + "grad_norm": 1.4338205668361415, + "learning_rate": 6.14940797487401e-07, + "loss": 0.4602397680282593, + "step": 5593 + }, + { + "epoch": 1.2895677233429395, + "grad_norm": 1.4280283710663018, + "learning_rate": 6.145890479471734e-07, + "loss": 0.41422247886657715, + "step": 5594 + }, + { + "epoch": 1.2897982708933717, + "grad_norm": 1.395051644435662, + "learning_rate": 6.14237354403148e-07, + "loss": 0.4773354232311249, + "step": 5595 + }, + { + "epoch": 1.290028818443804, + "grad_norm": 1.7507850247217935, + "learning_rate": 6.138857169064215e-07, + "loss": 0.5527161359786987, + "step": 5596 + }, + { + "epoch": 1.2902593659942363, + "grad_norm": 1.6330535523345988, + "learning_rate": 6.135341355080841e-07, + "loss": 0.4463421404361725, + "step": 5597 + }, + { + "epoch": 1.2904899135446686, + "grad_norm": 1.8408791330637222, + "learning_rate": 6.131826102592165e-07, + "loss": 0.46494734287261963, + "step": 5598 + }, + { + "epoch": 1.2907204610951009, + "grad_norm": 1.525583159053329, + "learning_rate": 6.128311412108913e-07, + "loss": 0.41373857855796814, + "step": 5599 + }, + { + "epoch": 1.2909510086455331, + "grad_norm": 1.6892159775229958, + "learning_rate": 6.124797284141738e-07, + "loss": 0.4944826364517212, + "step": 5600 + }, + { + "epoch": 1.2911815561959654, + "grad_norm": 1.3091958060274738, + "learning_rate": 6.121283719201207e-07, + "loss": 0.4663166403770447, + "step": 5601 + }, + { + "epoch": 1.2914121037463977, + "grad_norm": 1.480078368011403, + "learning_rate": 6.117770717797798e-07, + "loss": 0.5245934724807739, + "step": 5602 + }, + { + "epoch": 1.29164265129683, + "grad_norm": 1.4141646006395505, + "learning_rate": 6.114258280441922e-07, + "loss": 0.4217444956302643, + "step": 5603 + }, + { + "epoch": 1.2918731988472623, + "grad_norm": 1.4191022644328553, + "learning_rate": 6.110746407643892e-07, + "loss": 0.459358811378479, + "step": 5604 + }, + { + "epoch": 1.2921037463976945, + "grad_norm": 1.336340183417743, + "learning_rate": 6.10723509991395e-07, + "loss": 0.417694628238678, + "step": 5605 + }, + { + "epoch": 1.2923342939481268, + "grad_norm": 1.2397570173595072, + "learning_rate": 6.103724357762254e-07, + "loss": 0.3651599884033203, + "step": 5606 + }, + { + "epoch": 1.292564841498559, + "grad_norm": 1.5399308093733746, + "learning_rate": 6.100214181698877e-07, + "loss": 0.49200180172920227, + "step": 5607 + }, + { + "epoch": 1.2927953890489914, + "grad_norm": 1.9240361082865032, + "learning_rate": 6.096704572233806e-07, + "loss": 0.497753381729126, + "step": 5608 + }, + { + "epoch": 1.2930259365994237, + "grad_norm": 1.4344257549227153, + "learning_rate": 6.093195529876962e-07, + "loss": 0.5006631016731262, + "step": 5609 + }, + { + "epoch": 1.293256484149856, + "grad_norm": 1.6754143744802865, + "learning_rate": 6.089687055138163e-07, + "loss": 0.5016833543777466, + "step": 5610 + }, + { + "epoch": 1.2934870317002882, + "grad_norm": 1.527136681773818, + "learning_rate": 6.086179148527159e-07, + "loss": 0.4424899220466614, + "step": 5611 + }, + { + "epoch": 1.2937175792507205, + "grad_norm": 1.6530124892602107, + "learning_rate": 6.082671810553606e-07, + "loss": 0.5102081298828125, + "step": 5612 + }, + { + "epoch": 1.2939481268011528, + "grad_norm": 1.5659948984902143, + "learning_rate": 6.079165041727089e-07, + "loss": 0.4826313853263855, + "step": 5613 + }, + { + "epoch": 1.294178674351585, + "grad_norm": 1.395872003247078, + "learning_rate": 6.075658842557105e-07, + "loss": 0.42120543122291565, + "step": 5614 + }, + { + "epoch": 1.2944092219020173, + "grad_norm": 1.699388547513595, + "learning_rate": 6.072153213553066e-07, + "loss": 0.4455034136772156, + "step": 5615 + }, + { + "epoch": 1.2946397694524496, + "grad_norm": 1.9713259117027755, + "learning_rate": 6.068648155224305e-07, + "loss": 0.42256850004196167, + "step": 5616 + }, + { + "epoch": 1.294870317002882, + "grad_norm": 1.9231580761268008, + "learning_rate": 6.06514366808007e-07, + "loss": 0.5602281093597412, + "step": 5617 + }, + { + "epoch": 1.2951008645533142, + "grad_norm": 1.4513434908592018, + "learning_rate": 6.061639752629526e-07, + "loss": 0.43879711627960205, + "step": 5618 + }, + { + "epoch": 1.2953314121037465, + "grad_norm": 1.4190069954434061, + "learning_rate": 6.058136409381757e-07, + "loss": 0.3930923640727997, + "step": 5619 + }, + { + "epoch": 1.2955619596541788, + "grad_norm": 1.839985350345671, + "learning_rate": 6.05463363884576e-07, + "loss": 0.4514680802822113, + "step": 5620 + }, + { + "epoch": 1.295792507204611, + "grad_norm": 1.2536955913267351, + "learning_rate": 6.051131441530453e-07, + "loss": 0.46888765692710876, + "step": 5621 + }, + { + "epoch": 1.296023054755043, + "grad_norm": 1.7812263281619622, + "learning_rate": 6.04762981794467e-07, + "loss": 0.4355303645133972, + "step": 5622 + }, + { + "epoch": 1.2962536023054754, + "grad_norm": 1.6229536165276595, + "learning_rate": 6.044128768597157e-07, + "loss": 0.4351111054420471, + "step": 5623 + }, + { + "epoch": 1.2964841498559077, + "grad_norm": 1.6901196215872798, + "learning_rate": 6.040628293996583e-07, + "loss": 0.4702025055885315, + "step": 5624 + }, + { + "epoch": 1.29671469740634, + "grad_norm": 1.9691583510108008, + "learning_rate": 6.037128394651527e-07, + "loss": 0.46238428354263306, + "step": 5625 + }, + { + "epoch": 1.2969452449567722, + "grad_norm": 1.5645356062188, + "learning_rate": 6.033629071070492e-07, + "loss": 0.3857001066207886, + "step": 5626 + }, + { + "epoch": 1.2971757925072045, + "grad_norm": 1.432067471334769, + "learning_rate": 6.030130323761893e-07, + "loss": 0.49124062061309814, + "step": 5627 + }, + { + "epoch": 1.2974063400576368, + "grad_norm": 1.3667653448147599, + "learning_rate": 6.026632153234057e-07, + "loss": 0.44536006450653076, + "step": 5628 + }, + { + "epoch": 1.297636887608069, + "grad_norm": 1.9079098337606286, + "learning_rate": 6.023134559995237e-07, + "loss": 0.5029529333114624, + "step": 5629 + }, + { + "epoch": 1.2978674351585013, + "grad_norm": 2.0487774662218174, + "learning_rate": 6.019637544553595e-07, + "loss": 0.45321863889694214, + "step": 5630 + }, + { + "epoch": 1.2980979827089336, + "grad_norm": 1.3767097952219558, + "learning_rate": 6.01614110741721e-07, + "loss": 0.46636098623275757, + "step": 5631 + }, + { + "epoch": 1.298328530259366, + "grad_norm": 1.7562160355468237, + "learning_rate": 6.012645249094081e-07, + "loss": 0.4282764494419098, + "step": 5632 + }, + { + "epoch": 1.2985590778097982, + "grad_norm": 1.449718049336432, + "learning_rate": 6.009149970092115e-07, + "loss": 0.42735451459884644, + "step": 5633 + }, + { + "epoch": 1.2987896253602305, + "grad_norm": 1.3504653089580498, + "learning_rate": 6.005655270919141e-07, + "loss": 0.3382011950016022, + "step": 5634 + }, + { + "epoch": 1.2990201729106627, + "grad_norm": 1.5943447106279303, + "learning_rate": 6.002161152082908e-07, + "loss": 0.43230611085891724, + "step": 5635 + }, + { + "epoch": 1.299250720461095, + "grad_norm": 1.7064048884456655, + "learning_rate": 5.99866761409107e-07, + "loss": 0.5036906599998474, + "step": 5636 + }, + { + "epoch": 1.2994812680115273, + "grad_norm": 1.4451080113763042, + "learning_rate": 5.995174657451198e-07, + "loss": 0.4540144205093384, + "step": 5637 + }, + { + "epoch": 1.2997118155619596, + "grad_norm": 1.6244240251604507, + "learning_rate": 5.991682282670794e-07, + "loss": 0.5304908752441406, + "step": 5638 + }, + { + "epoch": 1.2999423631123919, + "grad_norm": 1.6756011798471222, + "learning_rate": 5.988190490257252e-07, + "loss": 0.48925888538360596, + "step": 5639 + }, + { + "epoch": 1.3001729106628241, + "grad_norm": 1.544278359402648, + "learning_rate": 5.984699280717902e-07, + "loss": 0.41739997267723083, + "step": 5640 + }, + { + "epoch": 1.3004034582132564, + "grad_norm": 1.8573165659255007, + "learning_rate": 5.98120865455997e-07, + "loss": 0.368470698595047, + "step": 5641 + }, + { + "epoch": 1.3006340057636887, + "grad_norm": 1.4369868317123584, + "learning_rate": 5.977718612290618e-07, + "loss": 0.47940701246261597, + "step": 5642 + }, + { + "epoch": 1.300864553314121, + "grad_norm": 1.576956184529271, + "learning_rate": 5.974229154416908e-07, + "loss": 0.4344274699687958, + "step": 5643 + }, + { + "epoch": 1.3010951008645533, + "grad_norm": 1.5095504652283631, + "learning_rate": 5.97074028144582e-07, + "loss": 0.4318895637989044, + "step": 5644 + }, + { + "epoch": 1.3013256484149855, + "grad_norm": 1.597236816262288, + "learning_rate": 5.967251993884257e-07, + "loss": 0.483765572309494, + "step": 5645 + }, + { + "epoch": 1.3015561959654178, + "grad_norm": 1.3118188483806787, + "learning_rate": 5.963764292239029e-07, + "loss": 0.39786484837532043, + "step": 5646 + }, + { + "epoch": 1.30178674351585, + "grad_norm": 1.604619731251052, + "learning_rate": 5.960277177016854e-07, + "loss": 0.5057064294815063, + "step": 5647 + }, + { + "epoch": 1.3020172910662824, + "grad_norm": 1.611278241917598, + "learning_rate": 5.956790648724388e-07, + "loss": 0.3967602252960205, + "step": 5648 + }, + { + "epoch": 1.3022478386167147, + "grad_norm": 1.5141547098519228, + "learning_rate": 5.953304707868177e-07, + "loss": 0.46510642766952515, + "step": 5649 + }, + { + "epoch": 1.302478386167147, + "grad_norm": 1.5274485657271228, + "learning_rate": 5.949819354954694e-07, + "loss": 0.4845235347747803, + "step": 5650 + }, + { + "epoch": 1.3027089337175792, + "grad_norm": 1.7942214793176274, + "learning_rate": 5.946334590490329e-07, + "loss": 0.4742242097854614, + "step": 5651 + }, + { + "epoch": 1.3029394812680115, + "grad_norm": 1.5892392961762456, + "learning_rate": 5.942850414981376e-07, + "loss": 0.4435562193393707, + "step": 5652 + }, + { + "epoch": 1.3031700288184438, + "grad_norm": 1.4875348946887388, + "learning_rate": 5.93936682893405e-07, + "loss": 0.44417089223861694, + "step": 5653 + }, + { + "epoch": 1.303400576368876, + "grad_norm": 1.5970020700205978, + "learning_rate": 5.935883832854485e-07, + "loss": 0.4877326488494873, + "step": 5654 + }, + { + "epoch": 1.3036311239193084, + "grad_norm": 1.3171877697027834, + "learning_rate": 5.932401427248721e-07, + "loss": 0.3517765402793884, + "step": 5655 + }, + { + "epoch": 1.3038616714697406, + "grad_norm": 1.3570215231005136, + "learning_rate": 5.928919612622716e-07, + "loss": 0.4456232786178589, + "step": 5656 + }, + { + "epoch": 1.304092219020173, + "grad_norm": 1.8836957623192498, + "learning_rate": 5.925438389482338e-07, + "loss": 0.5343309044837952, + "step": 5657 + }, + { + "epoch": 1.3043227665706052, + "grad_norm": 1.5719703891271506, + "learning_rate": 5.921957758333375e-07, + "loss": 0.46931013464927673, + "step": 5658 + }, + { + "epoch": 1.3045533141210375, + "grad_norm": 1.3499152363640425, + "learning_rate": 5.91847771968153e-07, + "loss": 0.4783180356025696, + "step": 5659 + }, + { + "epoch": 1.3047838616714698, + "grad_norm": 1.3505796312286786, + "learning_rate": 5.91499827403241e-07, + "loss": 0.4500124454498291, + "step": 5660 + }, + { + "epoch": 1.305014409221902, + "grad_norm": 1.5688722879131622, + "learning_rate": 5.911519421891545e-07, + "loss": 0.416248619556427, + "step": 5661 + }, + { + "epoch": 1.3052449567723343, + "grad_norm": 1.464160574072666, + "learning_rate": 5.90804116376438e-07, + "loss": 0.4546668529510498, + "step": 5662 + }, + { + "epoch": 1.3054755043227666, + "grad_norm": 1.6212778345510863, + "learning_rate": 5.904563500156262e-07, + "loss": 0.4955529570579529, + "step": 5663 + }, + { + "epoch": 1.3057060518731989, + "grad_norm": 1.6100740336658428, + "learning_rate": 5.901086431572468e-07, + "loss": 0.5418561100959778, + "step": 5664 + }, + { + "epoch": 1.3059365994236312, + "grad_norm": 1.3667792510170178, + "learning_rate": 5.897609958518171e-07, + "loss": 0.45207348465919495, + "step": 5665 + }, + { + "epoch": 1.3061671469740634, + "grad_norm": 1.567610206868078, + "learning_rate": 5.894134081498471e-07, + "loss": 0.39835965633392334, + "step": 5666 + }, + { + "epoch": 1.3063976945244957, + "grad_norm": 2.1400479034078232, + "learning_rate": 5.89065880101838e-07, + "loss": 0.4610930383205414, + "step": 5667 + }, + { + "epoch": 1.306628242074928, + "grad_norm": 1.480337861014841, + "learning_rate": 5.887184117582814e-07, + "loss": 0.47697019577026367, + "step": 5668 + }, + { + "epoch": 1.3068587896253603, + "grad_norm": 1.7998277326040601, + "learning_rate": 5.883710031696613e-07, + "loss": 0.5246520042419434, + "step": 5669 + }, + { + "epoch": 1.3070893371757926, + "grad_norm": 1.3822061021496619, + "learning_rate": 5.880236543864521e-07, + "loss": 0.43360599875450134, + "step": 5670 + }, + { + "epoch": 1.3073198847262248, + "grad_norm": 1.603823660095097, + "learning_rate": 5.876763654591202e-07, + "loss": 0.4036989212036133, + "step": 5671 + }, + { + "epoch": 1.3075504322766571, + "grad_norm": 1.92161850074288, + "learning_rate": 5.873291364381234e-07, + "loss": 0.5217401385307312, + "step": 5672 + }, + { + "epoch": 1.3077809798270894, + "grad_norm": 1.8626510168435175, + "learning_rate": 5.869819673739097e-07, + "loss": 0.4804594814777374, + "step": 5673 + }, + { + "epoch": 1.3080115273775217, + "grad_norm": 1.5969102074834967, + "learning_rate": 5.866348583169199e-07, + "loss": 0.4158530533313751, + "step": 5674 + }, + { + "epoch": 1.308242074927954, + "grad_norm": 1.4467346607257023, + "learning_rate": 5.862878093175852e-07, + "loss": 0.3767266273498535, + "step": 5675 + }, + { + "epoch": 1.3084726224783862, + "grad_norm": 1.5914288798121585, + "learning_rate": 5.859408204263274e-07, + "loss": 0.4574335515499115, + "step": 5676 + }, + { + "epoch": 1.3087031700288185, + "grad_norm": 1.246598683737431, + "learning_rate": 5.855938916935616e-07, + "loss": 0.4004058539867401, + "step": 5677 + }, + { + "epoch": 1.3089337175792508, + "grad_norm": 1.539707925409524, + "learning_rate": 5.852470231696922e-07, + "loss": 0.5084467530250549, + "step": 5678 + }, + { + "epoch": 1.309164265129683, + "grad_norm": 1.96730424218735, + "learning_rate": 5.849002149051155e-07, + "loss": 0.49348288774490356, + "step": 5679 + }, + { + "epoch": 1.3093948126801154, + "grad_norm": 1.5400304304805938, + "learning_rate": 5.845534669502199e-07, + "loss": 0.4904360771179199, + "step": 5680 + }, + { + "epoch": 1.3096253602305477, + "grad_norm": 1.725745441906493, + "learning_rate": 5.842067793553833e-07, + "loss": 0.39716076850891113, + "step": 5681 + }, + { + "epoch": 1.30985590778098, + "grad_norm": 1.951959855113649, + "learning_rate": 5.838601521709763e-07, + "loss": 0.45109057426452637, + "step": 5682 + }, + { + "epoch": 1.3100864553314122, + "grad_norm": 1.2108615351157967, + "learning_rate": 5.835135854473606e-07, + "loss": 0.4689937233924866, + "step": 5683 + }, + { + "epoch": 1.3103170028818445, + "grad_norm": 1.57788584068215, + "learning_rate": 5.831670792348878e-07, + "loss": 0.46770697832107544, + "step": 5684 + }, + { + "epoch": 1.3105475504322768, + "grad_norm": 1.7640277377337972, + "learning_rate": 5.828206335839025e-07, + "loss": 0.44661128520965576, + "step": 5685 + }, + { + "epoch": 1.310778097982709, + "grad_norm": 1.9000908259737026, + "learning_rate": 5.82474248544739e-07, + "loss": 0.43092700839042664, + "step": 5686 + }, + { + "epoch": 1.3110086455331413, + "grad_norm": 1.6902521158602983, + "learning_rate": 5.821279241677237e-07, + "loss": 0.4759563207626343, + "step": 5687 + }, + { + "epoch": 1.3112391930835736, + "grad_norm": 1.505811136559332, + "learning_rate": 5.817816605031744e-07, + "loss": 0.4558718800544739, + "step": 5688 + }, + { + "epoch": 1.311469740634006, + "grad_norm": 1.4687373535557187, + "learning_rate": 5.814354576013991e-07, + "loss": 0.41315966844558716, + "step": 5689 + }, + { + "epoch": 1.3117002881844382, + "grad_norm": 1.7259723493996193, + "learning_rate": 5.810893155126972e-07, + "loss": 0.48093000054359436, + "step": 5690 + }, + { + "epoch": 1.3119308357348702, + "grad_norm": 1.4454250122793362, + "learning_rate": 5.807432342873604e-07, + "loss": 0.44277292490005493, + "step": 5691 + }, + { + "epoch": 1.3121613832853025, + "grad_norm": 1.7712457329533509, + "learning_rate": 5.803972139756699e-07, + "loss": 0.4686616063117981, + "step": 5692 + }, + { + "epoch": 1.3123919308357348, + "grad_norm": 1.8037865500083508, + "learning_rate": 5.800512546278992e-07, + "loss": 0.5273550748825073, + "step": 5693 + }, + { + "epoch": 1.312622478386167, + "grad_norm": 1.5751540447686536, + "learning_rate": 5.797053562943126e-07, + "loss": 0.40962180495262146, + "step": 5694 + }, + { + "epoch": 1.3128530259365994, + "grad_norm": 1.5552313010741665, + "learning_rate": 5.793595190251657e-07, + "loss": 0.4359856843948364, + "step": 5695 + }, + { + "epoch": 1.3130835734870316, + "grad_norm": 1.427706356459816, + "learning_rate": 5.790137428707047e-07, + "loss": 0.3920098841190338, + "step": 5696 + }, + { + "epoch": 1.313314121037464, + "grad_norm": 1.6313193317124348, + "learning_rate": 5.786680278811673e-07, + "loss": 0.44638365507125854, + "step": 5697 + }, + { + "epoch": 1.3135446685878962, + "grad_norm": 1.7792052140749095, + "learning_rate": 5.783223741067822e-07, + "loss": 0.49320071935653687, + "step": 5698 + }, + { + "epoch": 1.3137752161383285, + "grad_norm": 1.6043100517885838, + "learning_rate": 5.779767815977701e-07, + "loss": 0.5009332299232483, + "step": 5699 + }, + { + "epoch": 1.3140057636887608, + "grad_norm": 1.9474485717331127, + "learning_rate": 5.776312504043408e-07, + "loss": 0.399705171585083, + "step": 5700 + }, + { + "epoch": 1.314236311239193, + "grad_norm": 1.4585410826624658, + "learning_rate": 5.77285780576698e-07, + "loss": 0.4848003685474396, + "step": 5701 + }, + { + "epoch": 1.3144668587896253, + "grad_norm": 1.4467862164072396, + "learning_rate": 5.76940372165033e-07, + "loss": 0.4309002161026001, + "step": 5702 + }, + { + "epoch": 1.3146974063400576, + "grad_norm": 1.4911760535404153, + "learning_rate": 5.765950252195309e-07, + "loss": 0.47664520144462585, + "step": 5703 + }, + { + "epoch": 1.3149279538904899, + "grad_norm": 1.3214665856632308, + "learning_rate": 5.762497397903674e-07, + "loss": 0.40434837341308594, + "step": 5704 + }, + { + "epoch": 1.3151585014409222, + "grad_norm": 1.767762796112652, + "learning_rate": 5.759045159277083e-07, + "loss": 0.46631279587745667, + "step": 5705 + }, + { + "epoch": 1.3153890489913544, + "grad_norm": 1.4544198218645066, + "learning_rate": 5.75559353681711e-07, + "loss": 0.37656185030937195, + "step": 5706 + }, + { + "epoch": 1.3156195965417867, + "grad_norm": 1.2767291509456058, + "learning_rate": 5.752142531025252e-07, + "loss": 0.404850572347641, + "step": 5707 + }, + { + "epoch": 1.315850144092219, + "grad_norm": 1.8381735708700344, + "learning_rate": 5.748692142402887e-07, + "loss": 0.5541446208953857, + "step": 5708 + }, + { + "epoch": 1.3160806916426513, + "grad_norm": 1.657101287344229, + "learning_rate": 5.745242371451331e-07, + "loss": 0.5300667881965637, + "step": 5709 + }, + { + "epoch": 1.3163112391930836, + "grad_norm": 1.5088076439154363, + "learning_rate": 5.741793218671794e-07, + "loss": 0.41248297691345215, + "step": 5710 + }, + { + "epoch": 1.3165417867435159, + "grad_norm": 1.485990096285573, + "learning_rate": 5.738344684565406e-07, + "loss": 0.42053329944610596, + "step": 5711 + }, + { + "epoch": 1.3167723342939481, + "grad_norm": 1.7569253091641839, + "learning_rate": 5.734896769633204e-07, + "loss": 0.49210411310195923, + "step": 5712 + }, + { + "epoch": 1.3170028818443804, + "grad_norm": 1.4735959040405031, + "learning_rate": 5.731449474376133e-07, + "loss": 0.43861931562423706, + "step": 5713 + }, + { + "epoch": 1.3172334293948127, + "grad_norm": 1.616710147227633, + "learning_rate": 5.728002799295049e-07, + "loss": 0.4376800060272217, + "step": 5714 + }, + { + "epoch": 1.317463976945245, + "grad_norm": 1.6699352009633992, + "learning_rate": 5.724556744890714e-07, + "loss": 0.5226752161979675, + "step": 5715 + }, + { + "epoch": 1.3176945244956773, + "grad_norm": 1.527907496447196, + "learning_rate": 5.721111311663807e-07, + "loss": 0.49144458770751953, + "step": 5716 + }, + { + "epoch": 1.3179250720461095, + "grad_norm": 1.588011520770062, + "learning_rate": 5.717666500114918e-07, + "loss": 0.41572028398513794, + "step": 5717 + }, + { + "epoch": 1.3181556195965418, + "grad_norm": 1.9136714216275055, + "learning_rate": 5.714222310744535e-07, + "loss": 0.4877137243747711, + "step": 5718 + }, + { + "epoch": 1.318386167146974, + "grad_norm": 1.3894714709044902, + "learning_rate": 5.710778744053069e-07, + "loss": 0.5112143754959106, + "step": 5719 + }, + { + "epoch": 1.3186167146974064, + "grad_norm": 1.4178344137923677, + "learning_rate": 5.707335800540832e-07, + "loss": 0.3639913499355316, + "step": 5720 + }, + { + "epoch": 1.3188472622478387, + "grad_norm": 1.339168057765147, + "learning_rate": 5.703893480708044e-07, + "loss": 0.4214756488800049, + "step": 5721 + }, + { + "epoch": 1.319077809798271, + "grad_norm": 1.6532547512994948, + "learning_rate": 5.700451785054845e-07, + "loss": 0.45181459188461304, + "step": 5722 + }, + { + "epoch": 1.3193083573487032, + "grad_norm": 1.8115910959996735, + "learning_rate": 5.69701071408127e-07, + "loss": 0.5234519243240356, + "step": 5723 + }, + { + "epoch": 1.3195389048991355, + "grad_norm": 1.5235287093918135, + "learning_rate": 5.693570268287277e-07, + "loss": 0.4809603691101074, + "step": 5724 + }, + { + "epoch": 1.3197694524495678, + "grad_norm": 1.5449169163448038, + "learning_rate": 5.690130448172727e-07, + "loss": 0.49713581800460815, + "step": 5725 + }, + { + "epoch": 1.32, + "grad_norm": 1.7653735856275472, + "learning_rate": 5.68669125423739e-07, + "loss": 0.4501224756240845, + "step": 5726 + }, + { + "epoch": 1.3202305475504323, + "grad_norm": 1.5979447021388788, + "learning_rate": 5.683252686980937e-07, + "loss": 0.4798666834831238, + "step": 5727 + }, + { + "epoch": 1.3204610951008646, + "grad_norm": 1.7043100133103994, + "learning_rate": 5.679814746902968e-07, + "loss": 0.39191022515296936, + "step": 5728 + }, + { + "epoch": 1.320691642651297, + "grad_norm": 1.349443879583748, + "learning_rate": 5.67637743450297e-07, + "loss": 0.4909393787384033, + "step": 5729 + }, + { + "epoch": 1.3209221902017292, + "grad_norm": 1.5585291332202789, + "learning_rate": 5.672940750280357e-07, + "loss": 0.5315475463867188, + "step": 5730 + }, + { + "epoch": 1.3211527377521615, + "grad_norm": 1.8034782223278798, + "learning_rate": 5.669504694734436e-07, + "loss": 0.5093904733657837, + "step": 5731 + }, + { + "epoch": 1.3213832853025935, + "grad_norm": 1.617415818890137, + "learning_rate": 5.666069268364437e-07, + "loss": 0.4729066491127014, + "step": 5732 + }, + { + "epoch": 1.3216138328530258, + "grad_norm": 1.728742183209921, + "learning_rate": 5.662634471669489e-07, + "loss": 0.46405333280563354, + "step": 5733 + }, + { + "epoch": 1.321844380403458, + "grad_norm": 1.41763204916087, + "learning_rate": 5.659200305148626e-07, + "loss": 0.38553181290626526, + "step": 5734 + }, + { + "epoch": 1.3220749279538904, + "grad_norm": 1.6908629643110433, + "learning_rate": 5.655766769300804e-07, + "loss": 0.4223875403404236, + "step": 5735 + }, + { + "epoch": 1.3223054755043226, + "grad_norm": 1.7695441587075371, + "learning_rate": 5.652333864624881e-07, + "loss": 0.44964247941970825, + "step": 5736 + }, + { + "epoch": 1.322536023054755, + "grad_norm": 1.4486441745330252, + "learning_rate": 5.648901591619617e-07, + "loss": 0.417505145072937, + "step": 5737 + }, + { + "epoch": 1.3227665706051872, + "grad_norm": 1.4410963247757809, + "learning_rate": 5.64546995078369e-07, + "loss": 0.419208288192749, + "step": 5738 + }, + { + "epoch": 1.3229971181556195, + "grad_norm": 1.8506448148738834, + "learning_rate": 5.642038942615681e-07, + "loss": 0.499774307012558, + "step": 5739 + }, + { + "epoch": 1.3232276657060518, + "grad_norm": 1.815255555024286, + "learning_rate": 5.638608567614076e-07, + "loss": 0.5035330057144165, + "step": 5740 + }, + { + "epoch": 1.323458213256484, + "grad_norm": 1.4388619620353644, + "learning_rate": 5.635178826277278e-07, + "loss": 0.4658032953739166, + "step": 5741 + }, + { + "epoch": 1.3236887608069163, + "grad_norm": 1.7274819728556454, + "learning_rate": 5.631749719103586e-07, + "loss": 0.42654508352279663, + "step": 5742 + }, + { + "epoch": 1.3239193083573486, + "grad_norm": 1.6352351844005595, + "learning_rate": 5.628321246591219e-07, + "loss": 0.46338269114494324, + "step": 5743 + }, + { + "epoch": 1.324149855907781, + "grad_norm": 1.7058135427472088, + "learning_rate": 5.6248934092383e-07, + "loss": 0.49489402770996094, + "step": 5744 + }, + { + "epoch": 1.3243804034582132, + "grad_norm": 1.7667721028029346, + "learning_rate": 5.621466207542855e-07, + "loss": 0.43571901321411133, + "step": 5745 + }, + { + "epoch": 1.3246109510086455, + "grad_norm": 1.6558339040197791, + "learning_rate": 5.618039642002823e-07, + "loss": 0.3845377564430237, + "step": 5746 + }, + { + "epoch": 1.3248414985590777, + "grad_norm": 1.7985128014396068, + "learning_rate": 5.61461371311604e-07, + "loss": 0.46888309717178345, + "step": 5747 + }, + { + "epoch": 1.32507204610951, + "grad_norm": 1.4860170610023868, + "learning_rate": 5.611188421380266e-07, + "loss": 0.47584888339042664, + "step": 5748 + }, + { + "epoch": 1.3253025936599423, + "grad_norm": 1.462500100058502, + "learning_rate": 5.607763767293162e-07, + "loss": 0.45152002573013306, + "step": 5749 + }, + { + "epoch": 1.3255331412103746, + "grad_norm": 1.3836540155127026, + "learning_rate": 5.604339751352288e-07, + "loss": 0.4863770604133606, + "step": 5750 + }, + { + "epoch": 1.3257636887608069, + "grad_norm": 1.6099021073453343, + "learning_rate": 5.600916374055124e-07, + "loss": 0.4634966254234314, + "step": 5751 + }, + { + "epoch": 1.3259942363112391, + "grad_norm": 1.5103532444548802, + "learning_rate": 5.597493635899047e-07, + "loss": 0.4578768014907837, + "step": 5752 + }, + { + "epoch": 1.3262247838616714, + "grad_norm": 1.645303626914304, + "learning_rate": 5.594071537381344e-07, + "loss": 0.4615476727485657, + "step": 5753 + }, + { + "epoch": 1.3264553314121037, + "grad_norm": 1.649679181206382, + "learning_rate": 5.590650078999215e-07, + "loss": 0.4493352174758911, + "step": 5754 + }, + { + "epoch": 1.326685878962536, + "grad_norm": 1.9417083139706726, + "learning_rate": 5.587229261249758e-07, + "loss": 0.45934945344924927, + "step": 5755 + }, + { + "epoch": 1.3269164265129683, + "grad_norm": 1.5433058030935336, + "learning_rate": 5.583809084629983e-07, + "loss": 0.44645899534225464, + "step": 5756 + }, + { + "epoch": 1.3271469740634005, + "grad_norm": 1.8230512410234043, + "learning_rate": 5.580389549636813e-07, + "loss": 0.4972764551639557, + "step": 5757 + }, + { + "epoch": 1.3273775216138328, + "grad_norm": 1.4829618993989975, + "learning_rate": 5.576970656767062e-07, + "loss": 0.56006920337677, + "step": 5758 + }, + { + "epoch": 1.327608069164265, + "grad_norm": 1.4530984592703375, + "learning_rate": 5.573552406517465e-07, + "loss": 0.35086876153945923, + "step": 5759 + }, + { + "epoch": 1.3278386167146974, + "grad_norm": 1.5453147440802761, + "learning_rate": 5.570134799384652e-07, + "loss": 0.5021958351135254, + "step": 5760 + }, + { + "epoch": 1.3280691642651297, + "grad_norm": 1.3980714186138523, + "learning_rate": 5.566717835865168e-07, + "loss": 0.4798334836959839, + "step": 5761 + }, + { + "epoch": 1.328299711815562, + "grad_norm": 1.6793393039174773, + "learning_rate": 5.56330151645547e-07, + "loss": 0.4362230896949768, + "step": 5762 + }, + { + "epoch": 1.3285302593659942, + "grad_norm": 1.9287032003864895, + "learning_rate": 5.559885841651901e-07, + "loss": 0.4839896261692047, + "step": 5763 + }, + { + "epoch": 1.3287608069164265, + "grad_norm": 1.7469751122977706, + "learning_rate": 5.556470811950735e-07, + "loss": 0.4296723008155823, + "step": 5764 + }, + { + "epoch": 1.3289913544668588, + "grad_norm": 1.6438669469084637, + "learning_rate": 5.553056427848135e-07, + "loss": 0.4957331717014313, + "step": 5765 + }, + { + "epoch": 1.329221902017291, + "grad_norm": 1.6461439100608464, + "learning_rate": 5.549642689840173e-07, + "loss": 0.5281147360801697, + "step": 5766 + }, + { + "epoch": 1.3294524495677233, + "grad_norm": 1.5868314889631472, + "learning_rate": 5.546229598422833e-07, + "loss": 0.48177218437194824, + "step": 5767 + }, + { + "epoch": 1.3296829971181556, + "grad_norm": 1.5218912835977805, + "learning_rate": 5.542817154091999e-07, + "loss": 0.4848208427429199, + "step": 5768 + }, + { + "epoch": 1.329913544668588, + "grad_norm": 1.4668359429602678, + "learning_rate": 5.539405357343464e-07, + "loss": 0.5182781219482422, + "step": 5769 + }, + { + "epoch": 1.3301440922190202, + "grad_norm": 1.4082178913730994, + "learning_rate": 5.535994208672932e-07, + "loss": 0.4479871094226837, + "step": 5770 + }, + { + "epoch": 1.3303746397694525, + "grad_norm": 1.6224978164814792, + "learning_rate": 5.532583708576005e-07, + "loss": 0.43546003103256226, + "step": 5771 + }, + { + "epoch": 1.3306051873198848, + "grad_norm": 1.735010886055401, + "learning_rate": 5.529173857548186e-07, + "loss": 0.46656274795532227, + "step": 5772 + }, + { + "epoch": 1.330835734870317, + "grad_norm": 1.5926561794354557, + "learning_rate": 5.525764656084901e-07, + "loss": 0.40682974457740784, + "step": 5773 + }, + { + "epoch": 1.3310662824207493, + "grad_norm": 1.509669363197034, + "learning_rate": 5.522356104681463e-07, + "loss": 0.4639260470867157, + "step": 5774 + }, + { + "epoch": 1.3312968299711816, + "grad_norm": 1.6625210006206694, + "learning_rate": 5.518948203833106e-07, + "loss": 0.41499418020248413, + "step": 5775 + }, + { + "epoch": 1.3315273775216139, + "grad_norm": 1.7316213010818284, + "learning_rate": 5.515540954034956e-07, + "loss": 0.48424363136291504, + "step": 5776 + }, + { + "epoch": 1.3317579250720462, + "grad_norm": 1.7500118650105563, + "learning_rate": 5.512134355782059e-07, + "loss": 0.481282114982605, + "step": 5777 + }, + { + "epoch": 1.3319884726224784, + "grad_norm": 1.4970286912195556, + "learning_rate": 5.508728409569353e-07, + "loss": 0.46897292137145996, + "step": 5778 + }, + { + "epoch": 1.3322190201729107, + "grad_norm": 1.5395432239994278, + "learning_rate": 5.505323115891684e-07, + "loss": 0.4447956681251526, + "step": 5779 + }, + { + "epoch": 1.332449567723343, + "grad_norm": 1.7841646267882065, + "learning_rate": 5.50191847524381e-07, + "loss": 0.4055880606174469, + "step": 5780 + }, + { + "epoch": 1.3326801152737753, + "grad_norm": 1.8999810775506178, + "learning_rate": 5.498514488120391e-07, + "loss": 0.48870790004730225, + "step": 5781 + }, + { + "epoch": 1.3329106628242076, + "grad_norm": 1.5931778786898716, + "learning_rate": 5.495111155015986e-07, + "loss": 0.46856385469436646, + "step": 5782 + }, + { + "epoch": 1.3331412103746398, + "grad_norm": 1.6068645914103106, + "learning_rate": 5.49170847642507e-07, + "loss": 0.42831993103027344, + "step": 5783 + }, + { + "epoch": 1.3333717579250721, + "grad_norm": 1.4770491256923433, + "learning_rate": 5.488306452842013e-07, + "loss": 0.41119974851608276, + "step": 5784 + }, + { + "epoch": 1.3336023054755044, + "grad_norm": 1.8185317788143798, + "learning_rate": 5.484905084761091e-07, + "loss": 0.5369082689285278, + "step": 5785 + }, + { + "epoch": 1.3338328530259367, + "grad_norm": 1.9538048720090067, + "learning_rate": 5.481504372676495e-07, + "loss": 0.4573896527290344, + "step": 5786 + }, + { + "epoch": 1.334063400576369, + "grad_norm": 1.6221206727707211, + "learning_rate": 5.478104317082305e-07, + "loss": 0.538988471031189, + "step": 5787 + }, + { + "epoch": 1.3342939481268012, + "grad_norm": 1.5995139592730108, + "learning_rate": 5.474704918472516e-07, + "loss": 0.47058168053627014, + "step": 5788 + }, + { + "epoch": 1.3345244956772335, + "grad_norm": 1.5580693417811027, + "learning_rate": 5.471306177341031e-07, + "loss": 0.522472083568573, + "step": 5789 + }, + { + "epoch": 1.3347550432276658, + "grad_norm": 1.5860681876603904, + "learning_rate": 5.467908094181648e-07, + "loss": 0.4278186559677124, + "step": 5790 + }, + { + "epoch": 1.334985590778098, + "grad_norm": 1.5625539938501531, + "learning_rate": 5.464510669488073e-07, + "loss": 0.4699779450893402, + "step": 5791 + }, + { + "epoch": 1.3352161383285304, + "grad_norm": 1.7937776074083913, + "learning_rate": 5.461113903753911e-07, + "loss": 0.4560111165046692, + "step": 5792 + }, + { + "epoch": 1.3354466858789626, + "grad_norm": 1.837112208791973, + "learning_rate": 5.457717797472683e-07, + "loss": 0.48460662364959717, + "step": 5793 + }, + { + "epoch": 1.335677233429395, + "grad_norm": 1.7402459378855901, + "learning_rate": 5.454322351137809e-07, + "loss": 0.4199531376361847, + "step": 5794 + }, + { + "epoch": 1.3359077809798272, + "grad_norm": 1.5222181264331647, + "learning_rate": 5.450927565242605e-07, + "loss": 0.4457356929779053, + "step": 5795 + }, + { + "epoch": 1.3361383285302595, + "grad_norm": 1.649365506502914, + "learning_rate": 5.447533440280309e-07, + "loss": 0.3968189060688019, + "step": 5796 + }, + { + "epoch": 1.3363688760806918, + "grad_norm": 1.4671060690724647, + "learning_rate": 5.444139976744044e-07, + "loss": 0.41763365268707275, + "step": 5797 + }, + { + "epoch": 1.336599423631124, + "grad_norm": 1.722950328594955, + "learning_rate": 5.440747175126843e-07, + "loss": 0.5074071288108826, + "step": 5798 + }, + { + "epoch": 1.3368299711815563, + "grad_norm": 1.6111552809013234, + "learning_rate": 5.437355035921654e-07, + "loss": 0.4077880382537842, + "step": 5799 + }, + { + "epoch": 1.3370605187319884, + "grad_norm": 1.5466812995082164, + "learning_rate": 5.433963559621308e-07, + "loss": 0.4905518889427185, + "step": 5800 + }, + { + "epoch": 1.3372910662824207, + "grad_norm": 1.682630265863772, + "learning_rate": 5.430572746718558e-07, + "loss": 0.4662671685218811, + "step": 5801 + }, + { + "epoch": 1.337521613832853, + "grad_norm": 1.3132407255636611, + "learning_rate": 5.427182597706057e-07, + "loss": 0.4523214101791382, + "step": 5802 + }, + { + "epoch": 1.3377521613832852, + "grad_norm": 1.90695064970206, + "learning_rate": 5.423793113076356e-07, + "loss": 0.47909995913505554, + "step": 5803 + }, + { + "epoch": 1.3379827089337175, + "grad_norm": 1.7910549238251845, + "learning_rate": 5.420404293321908e-07, + "loss": 0.5743957757949829, + "step": 5804 + }, + { + "epoch": 1.3382132564841498, + "grad_norm": 1.5499659591777002, + "learning_rate": 5.417016138935073e-07, + "loss": 0.46549567580223083, + "step": 5805 + }, + { + "epoch": 1.338443804034582, + "grad_norm": 1.3200635620126038, + "learning_rate": 5.413628650408117e-07, + "loss": 0.3912985324859619, + "step": 5806 + }, + { + "epoch": 1.3386743515850144, + "grad_norm": 1.3955673682018792, + "learning_rate": 5.41024182823321e-07, + "loss": 0.4237633943557739, + "step": 5807 + }, + { + "epoch": 1.3389048991354466, + "grad_norm": 1.6883882388649214, + "learning_rate": 5.406855672902417e-07, + "loss": 0.4899691641330719, + "step": 5808 + }, + { + "epoch": 1.339135446685879, + "grad_norm": 1.3966918885304072, + "learning_rate": 5.403470184907716e-07, + "loss": 0.4774795174598694, + "step": 5809 + }, + { + "epoch": 1.3393659942363112, + "grad_norm": 1.8954957129895125, + "learning_rate": 5.400085364740981e-07, + "loss": 0.5134497880935669, + "step": 5810 + }, + { + "epoch": 1.3395965417867435, + "grad_norm": 1.33408195533013, + "learning_rate": 5.396701212893985e-07, + "loss": 0.3900358974933624, + "step": 5811 + }, + { + "epoch": 1.3398270893371758, + "grad_norm": 1.5802049157805196, + "learning_rate": 5.39331772985842e-07, + "loss": 0.5683782696723938, + "step": 5812 + }, + { + "epoch": 1.340057636887608, + "grad_norm": 1.4035611271860546, + "learning_rate": 5.389934916125865e-07, + "loss": 0.4435126781463623, + "step": 5813 + }, + { + "epoch": 1.3402881844380403, + "grad_norm": 1.5175886691873446, + "learning_rate": 5.386552772187806e-07, + "loss": 0.42716413736343384, + "step": 5814 + }, + { + "epoch": 1.3405187319884726, + "grad_norm": 1.6801684258194023, + "learning_rate": 5.383171298535645e-07, + "loss": 0.4489009976387024, + "step": 5815 + }, + { + "epoch": 1.3407492795389049, + "grad_norm": 1.7185586590192998, + "learning_rate": 5.37979049566066e-07, + "loss": 0.4845975637435913, + "step": 5816 + }, + { + "epoch": 1.3409798270893372, + "grad_norm": 1.7370256253696736, + "learning_rate": 5.376410364054052e-07, + "loss": 0.5482637882232666, + "step": 5817 + }, + { + "epoch": 1.3412103746397694, + "grad_norm": 2.613056414919246, + "learning_rate": 5.373030904206924e-07, + "loss": 0.5108325481414795, + "step": 5818 + }, + { + "epoch": 1.3414409221902017, + "grad_norm": 1.4978146785702697, + "learning_rate": 5.369652116610269e-07, + "loss": 0.459455668926239, + "step": 5819 + }, + { + "epoch": 1.341671469740634, + "grad_norm": 1.509610858326331, + "learning_rate": 5.366274001754995e-07, + "loss": 0.5242212414741516, + "step": 5820 + }, + { + "epoch": 1.3419020172910663, + "grad_norm": 1.7716640742532166, + "learning_rate": 5.362896560131905e-07, + "loss": 0.509830892086029, + "step": 5821 + }, + { + "epoch": 1.3421325648414986, + "grad_norm": 1.7106393348542666, + "learning_rate": 5.359519792231703e-07, + "loss": 0.45476478338241577, + "step": 5822 + }, + { + "epoch": 1.3423631123919308, + "grad_norm": 1.9332971228816167, + "learning_rate": 5.356143698545006e-07, + "loss": 0.43780767917633057, + "step": 5823 + }, + { + "epoch": 1.3425936599423631, + "grad_norm": 1.519077249132743, + "learning_rate": 5.352768279562314e-07, + "loss": 0.4382368326187134, + "step": 5824 + }, + { + "epoch": 1.3428242074927954, + "grad_norm": 1.694275850243012, + "learning_rate": 5.34939353577405e-07, + "loss": 0.4616197943687439, + "step": 5825 + }, + { + "epoch": 1.3430547550432277, + "grad_norm": 1.673271377870483, + "learning_rate": 5.346019467670527e-07, + "loss": 0.5287420749664307, + "step": 5826 + }, + { + "epoch": 1.34328530259366, + "grad_norm": 1.6107217520062422, + "learning_rate": 5.342646075741964e-07, + "loss": 0.4960588216781616, + "step": 5827 + }, + { + "epoch": 1.3435158501440922, + "grad_norm": 1.787783527320612, + "learning_rate": 5.339273360478473e-07, + "loss": 0.4494114816188812, + "step": 5828 + }, + { + "epoch": 1.3437463976945245, + "grad_norm": 1.6044879846817381, + "learning_rate": 5.335901322370077e-07, + "loss": 0.5143953561782837, + "step": 5829 + }, + { + "epoch": 1.3439769452449568, + "grad_norm": 1.6617843855030219, + "learning_rate": 5.332529961906698e-07, + "loss": 0.47664588689804077, + "step": 5830 + }, + { + "epoch": 1.344207492795389, + "grad_norm": 1.6680735023411237, + "learning_rate": 5.329159279578166e-07, + "loss": 0.5280349850654602, + "step": 5831 + }, + { + "epoch": 1.3444380403458214, + "grad_norm": 1.4132124166382474, + "learning_rate": 5.325789275874195e-07, + "loss": 0.4075919985771179, + "step": 5832 + }, + { + "epoch": 1.3446685878962537, + "grad_norm": 1.695845807521503, + "learning_rate": 5.322419951284422e-07, + "loss": 0.48058271408081055, + "step": 5833 + }, + { + "epoch": 1.344899135446686, + "grad_norm": 1.6607672510224756, + "learning_rate": 5.319051306298371e-07, + "loss": 0.5294280648231506, + "step": 5834 + }, + { + "epoch": 1.3451296829971182, + "grad_norm": 1.7474336626992906, + "learning_rate": 5.315683341405466e-07, + "loss": 0.47455504536628723, + "step": 5835 + }, + { + "epoch": 1.3453602305475505, + "grad_norm": 1.4619071794552545, + "learning_rate": 5.312316057095045e-07, + "loss": 0.4823703169822693, + "step": 5836 + }, + { + "epoch": 1.3455907780979828, + "grad_norm": 1.4777456061736687, + "learning_rate": 5.308949453856333e-07, + "loss": 0.47531551122665405, + "step": 5837 + }, + { + "epoch": 1.345821325648415, + "grad_norm": 1.6734267117158743, + "learning_rate": 5.305583532178464e-07, + "loss": 0.4509006142616272, + "step": 5838 + }, + { + "epoch": 1.3460518731988473, + "grad_norm": 1.5829632523885313, + "learning_rate": 5.302218292550478e-07, + "loss": 0.48463982343673706, + "step": 5839 + }, + { + "epoch": 1.3462824207492796, + "grad_norm": 1.694704250371438, + "learning_rate": 5.298853735461303e-07, + "loss": 0.4683498442173004, + "step": 5840 + }, + { + "epoch": 1.346512968299712, + "grad_norm": 1.6056494194260316, + "learning_rate": 5.295489861399771e-07, + "loss": 0.4532889723777771, + "step": 5841 + }, + { + "epoch": 1.346743515850144, + "grad_norm": 1.6004406350046356, + "learning_rate": 5.292126670854626e-07, + "loss": 0.4410448968410492, + "step": 5842 + }, + { + "epoch": 1.3469740634005762, + "grad_norm": 1.5125184912832104, + "learning_rate": 5.288764164314499e-07, + "loss": 0.48533615469932556, + "step": 5843 + }, + { + "epoch": 1.3472046109510085, + "grad_norm": 1.4454172468360864, + "learning_rate": 5.28540234226793e-07, + "loss": 0.5278619527816772, + "step": 5844 + }, + { + "epoch": 1.3474351585014408, + "grad_norm": 1.6917564731344552, + "learning_rate": 5.282041205203354e-07, + "loss": 0.43173807859420776, + "step": 5845 + }, + { + "epoch": 1.347665706051873, + "grad_norm": 1.311431998190532, + "learning_rate": 5.278680753609113e-07, + "loss": 0.44198548793792725, + "step": 5846 + }, + { + "epoch": 1.3478962536023054, + "grad_norm": 1.675999276148586, + "learning_rate": 5.275320987973444e-07, + "loss": 0.5097990036010742, + "step": 5847 + }, + { + "epoch": 1.3481268011527376, + "grad_norm": 1.5200547839915433, + "learning_rate": 5.271961908784483e-07, + "loss": 0.41765618324279785, + "step": 5848 + }, + { + "epoch": 1.34835734870317, + "grad_norm": 1.509547179156895, + "learning_rate": 5.268603516530274e-07, + "loss": 0.465061217546463, + "step": 5849 + }, + { + "epoch": 1.3485878962536022, + "grad_norm": 1.640068419096848, + "learning_rate": 5.265245811698751e-07, + "loss": 0.4371737837791443, + "step": 5850 + }, + { + "epoch": 1.3488184438040345, + "grad_norm": 1.6272380446300156, + "learning_rate": 5.261888794777757e-07, + "loss": 0.4835420846939087, + "step": 5851 + }, + { + "epoch": 1.3490489913544668, + "grad_norm": 1.6120940633761554, + "learning_rate": 5.258532466255037e-07, + "loss": 0.434193879365921, + "step": 5852 + }, + { + "epoch": 1.349279538904899, + "grad_norm": 1.456789174255129, + "learning_rate": 5.255176826618223e-07, + "loss": 0.48119521141052246, + "step": 5853 + }, + { + "epoch": 1.3495100864553313, + "grad_norm": 2.0695007608526255, + "learning_rate": 5.251821876354853e-07, + "loss": 0.5133852958679199, + "step": 5854 + }, + { + "epoch": 1.3497406340057636, + "grad_norm": 1.5740277395577307, + "learning_rate": 5.248467615952374e-07, + "loss": 0.502734899520874, + "step": 5855 + }, + { + "epoch": 1.3499711815561959, + "grad_norm": 1.737984028582727, + "learning_rate": 5.245114045898118e-07, + "loss": 0.47067946195602417, + "step": 5856 + }, + { + "epoch": 1.3502017291066282, + "grad_norm": 1.861351064114255, + "learning_rate": 5.241761166679331e-07, + "loss": 0.5280636548995972, + "step": 5857 + }, + { + "epoch": 1.3504322766570604, + "grad_norm": 1.9027445398531666, + "learning_rate": 5.238408978783143e-07, + "loss": 0.43284815549850464, + "step": 5858 + }, + { + "epoch": 1.3506628242074927, + "grad_norm": 1.6794616508594868, + "learning_rate": 5.235057482696601e-07, + "loss": 0.4984050989151001, + "step": 5859 + }, + { + "epoch": 1.350893371757925, + "grad_norm": 1.455600936685728, + "learning_rate": 5.231706678906637e-07, + "loss": 0.4615752696990967, + "step": 5860 + }, + { + "epoch": 1.3511239193083573, + "grad_norm": 1.5178879378297239, + "learning_rate": 5.228356567900086e-07, + "loss": 0.4811222553253174, + "step": 5861 + }, + { + "epoch": 1.3513544668587896, + "grad_norm": 1.7164878231984242, + "learning_rate": 5.225007150163687e-07, + "loss": 0.5302398800849915, + "step": 5862 + }, + { + "epoch": 1.3515850144092219, + "grad_norm": 1.7231443524330496, + "learning_rate": 5.22165842618408e-07, + "loss": 0.46788594126701355, + "step": 5863 + }, + { + "epoch": 1.3518155619596541, + "grad_norm": 1.4074482819945295, + "learning_rate": 5.218310396447791e-07, + "loss": 0.42734235525131226, + "step": 5864 + }, + { + "epoch": 1.3520461095100864, + "grad_norm": 1.6191419784980083, + "learning_rate": 5.214963061441264e-07, + "loss": 0.4698154926300049, + "step": 5865 + }, + { + "epoch": 1.3522766570605187, + "grad_norm": 1.6424454305776117, + "learning_rate": 5.211616421650826e-07, + "loss": 0.48623502254486084, + "step": 5866 + }, + { + "epoch": 1.352507204610951, + "grad_norm": 1.8362604622502379, + "learning_rate": 5.208270477562704e-07, + "loss": 0.5312871336936951, + "step": 5867 + }, + { + "epoch": 1.3527377521613833, + "grad_norm": 2.04277197466832, + "learning_rate": 5.204925229663039e-07, + "loss": 0.5257741212844849, + "step": 5868 + }, + { + "epoch": 1.3529682997118155, + "grad_norm": 1.515431923993323, + "learning_rate": 5.201580678437852e-07, + "loss": 0.42586231231689453, + "step": 5869 + }, + { + "epoch": 1.3531988472622478, + "grad_norm": 1.5045820135918335, + "learning_rate": 5.198236824373075e-07, + "loss": 0.4793773293495178, + "step": 5870 + }, + { + "epoch": 1.35342939481268, + "grad_norm": 1.6874766609639118, + "learning_rate": 5.194893667954541e-07, + "loss": 0.4825376570224762, + "step": 5871 + }, + { + "epoch": 1.3536599423631124, + "grad_norm": 1.5749959416130654, + "learning_rate": 5.191551209667968e-07, + "loss": 0.5299191474914551, + "step": 5872 + }, + { + "epoch": 1.3538904899135447, + "grad_norm": 1.9579620248680123, + "learning_rate": 5.188209449998984e-07, + "loss": 0.45400893688201904, + "step": 5873 + }, + { + "epoch": 1.354121037463977, + "grad_norm": 1.586588649654464, + "learning_rate": 5.184868389433108e-07, + "loss": 0.40687328577041626, + "step": 5874 + }, + { + "epoch": 1.3543515850144092, + "grad_norm": 1.7386772311529148, + "learning_rate": 5.181528028455764e-07, + "loss": 0.44856715202331543, + "step": 5875 + }, + { + "epoch": 1.3545821325648415, + "grad_norm": 1.6814656770848047, + "learning_rate": 5.178188367552276e-07, + "loss": 0.4961288869380951, + "step": 5876 + }, + { + "epoch": 1.3548126801152738, + "grad_norm": 1.3451366506384344, + "learning_rate": 5.174849407207853e-07, + "loss": 0.48112860321998596, + "step": 5877 + }, + { + "epoch": 1.355043227665706, + "grad_norm": 1.6313677309379906, + "learning_rate": 5.17151114790762e-07, + "loss": 0.5009165406227112, + "step": 5878 + }, + { + "epoch": 1.3552737752161383, + "grad_norm": 1.6571659753115824, + "learning_rate": 5.168173590136588e-07, + "loss": 0.4461110234260559, + "step": 5879 + }, + { + "epoch": 1.3555043227665706, + "grad_norm": 1.9922430252685355, + "learning_rate": 5.164836734379666e-07, + "loss": 0.46784788370132446, + "step": 5880 + }, + { + "epoch": 1.355734870317003, + "grad_norm": 1.6236627472570804, + "learning_rate": 5.161500581121669e-07, + "loss": 0.4470252990722656, + "step": 5881 + }, + { + "epoch": 1.3559654178674352, + "grad_norm": 1.6544593178546172, + "learning_rate": 5.158165130847301e-07, + "loss": 0.4843372106552124, + "step": 5882 + }, + { + "epoch": 1.3561959654178675, + "grad_norm": 1.4568756529342444, + "learning_rate": 5.15483038404117e-07, + "loss": 0.48338162899017334, + "step": 5883 + }, + { + "epoch": 1.3564265129682997, + "grad_norm": 1.5639784023031311, + "learning_rate": 5.151496341187786e-07, + "loss": 0.49267876148223877, + "step": 5884 + }, + { + "epoch": 1.356657060518732, + "grad_norm": 1.4532699235197888, + "learning_rate": 5.148163002771543e-07, + "loss": 0.44972002506256104, + "step": 5885 + }, + { + "epoch": 1.3568876080691643, + "grad_norm": 1.849096817052275, + "learning_rate": 5.144830369276747e-07, + "loss": 0.47793418169021606, + "step": 5886 + }, + { + "epoch": 1.3571181556195966, + "grad_norm": 2.019581972182712, + "learning_rate": 5.141498441187584e-07, + "loss": 0.5081642866134644, + "step": 5887 + }, + { + "epoch": 1.3573487031700289, + "grad_norm": 1.7936039173954244, + "learning_rate": 5.138167218988156e-07, + "loss": 0.4508650302886963, + "step": 5888 + }, + { + "epoch": 1.3575792507204612, + "grad_norm": 1.9903590976587098, + "learning_rate": 5.13483670316246e-07, + "loss": 0.509946346282959, + "step": 5889 + }, + { + "epoch": 1.3578097982708934, + "grad_norm": 1.3894335336694332, + "learning_rate": 5.131506894194376e-07, + "loss": 0.3508078455924988, + "step": 5890 + }, + { + "epoch": 1.3580403458213257, + "grad_norm": 1.6347599571971514, + "learning_rate": 5.128177792567696e-07, + "loss": 0.5096418857574463, + "step": 5891 + }, + { + "epoch": 1.358270893371758, + "grad_norm": 1.2587415692746868, + "learning_rate": 5.124849398766103e-07, + "loss": 0.4068000018596649, + "step": 5892 + }, + { + "epoch": 1.3585014409221903, + "grad_norm": 1.6029483959229915, + "learning_rate": 5.121521713273173e-07, + "loss": 0.4583415687084198, + "step": 5893 + }, + { + "epoch": 1.3587319884726226, + "grad_norm": 1.9696596179786654, + "learning_rate": 5.118194736572395e-07, + "loss": 0.48043733835220337, + "step": 5894 + }, + { + "epoch": 1.3589625360230548, + "grad_norm": 1.623202830766786, + "learning_rate": 5.11486846914713e-07, + "loss": 0.5002140998840332, + "step": 5895 + }, + { + "epoch": 1.3591930835734871, + "grad_norm": 1.620019064708946, + "learning_rate": 5.111542911480659e-07, + "loss": 0.5203151702880859, + "step": 5896 + }, + { + "epoch": 1.3594236311239194, + "grad_norm": 1.4403729884467253, + "learning_rate": 5.108218064056152e-07, + "loss": 0.4077376127243042, + "step": 5897 + }, + { + "epoch": 1.3596541786743517, + "grad_norm": 1.5876907999897627, + "learning_rate": 5.104893927356674e-07, + "loss": 0.5146734714508057, + "step": 5898 + }, + { + "epoch": 1.359884726224784, + "grad_norm": 1.7641615656345115, + "learning_rate": 5.10157050186518e-07, + "loss": 0.5297499299049377, + "step": 5899 + }, + { + "epoch": 1.3601152737752162, + "grad_norm": 1.586023517840485, + "learning_rate": 5.09824778806454e-07, + "loss": 0.49775469303131104, + "step": 5900 + }, + { + "epoch": 1.3603458213256485, + "grad_norm": 1.5383657146391863, + "learning_rate": 5.094925786437499e-07, + "loss": 0.42014068365097046, + "step": 5901 + }, + { + "epoch": 1.3605763688760808, + "grad_norm": 1.5811863841429854, + "learning_rate": 5.09160449746672e-07, + "loss": 0.39976444840431213, + "step": 5902 + }, + { + "epoch": 1.360806916426513, + "grad_norm": 1.5391009905059254, + "learning_rate": 5.088283921634742e-07, + "loss": 0.4181244671344757, + "step": 5903 + }, + { + "epoch": 1.3610374639769454, + "grad_norm": 1.7318442603568622, + "learning_rate": 5.084964059424018e-07, + "loss": 0.49912410974502563, + "step": 5904 + }, + { + "epoch": 1.3612680115273776, + "grad_norm": 1.6464926496982077, + "learning_rate": 5.081644911316886e-07, + "loss": 0.5237860679626465, + "step": 5905 + }, + { + "epoch": 1.36149855907781, + "grad_norm": 1.9750248134134947, + "learning_rate": 5.078326477795583e-07, + "loss": 0.4730784296989441, + "step": 5906 + }, + { + "epoch": 1.3617291066282422, + "grad_norm": 1.5345984420273122, + "learning_rate": 5.075008759342241e-07, + "loss": 0.5080181360244751, + "step": 5907 + }, + { + "epoch": 1.3619596541786745, + "grad_norm": 1.6710731790396276, + "learning_rate": 5.071691756438897e-07, + "loss": 0.4697926640510559, + "step": 5908 + }, + { + "epoch": 1.3621902017291068, + "grad_norm": 1.490941194326937, + "learning_rate": 5.068375469567468e-07, + "loss": 0.5204674005508423, + "step": 5909 + }, + { + "epoch": 1.3624207492795388, + "grad_norm": 1.5304764486604812, + "learning_rate": 5.065059899209785e-07, + "loss": 0.43228816986083984, + "step": 5910 + }, + { + "epoch": 1.362651296829971, + "grad_norm": 1.3898360585052687, + "learning_rate": 5.06174504584756e-07, + "loss": 0.47551077604293823, + "step": 5911 + }, + { + "epoch": 1.3628818443804034, + "grad_norm": 1.4150986020819334, + "learning_rate": 5.058430909962406e-07, + "loss": 0.44586285948753357, + "step": 5912 + }, + { + "epoch": 1.3631123919308357, + "grad_norm": 1.3091754547925671, + "learning_rate": 5.055117492035839e-07, + "loss": 0.47856760025024414, + "step": 5913 + }, + { + "epoch": 1.363342939481268, + "grad_norm": 1.9810859445793283, + "learning_rate": 5.051804792549254e-07, + "loss": 0.6033484935760498, + "step": 5914 + }, + { + "epoch": 1.3635734870317002, + "grad_norm": 1.642277635899145, + "learning_rate": 5.048492811983959e-07, + "loss": 0.5101956129074097, + "step": 5915 + }, + { + "epoch": 1.3638040345821325, + "grad_norm": 2.462967749056604, + "learning_rate": 5.045181550821154e-07, + "loss": 0.530934751033783, + "step": 5916 + }, + { + "epoch": 1.3640345821325648, + "grad_norm": 1.558351988913394, + "learning_rate": 5.041871009541922e-07, + "loss": 0.4618695378303528, + "step": 5917 + }, + { + "epoch": 1.364265129682997, + "grad_norm": 1.393276685582818, + "learning_rate": 5.038561188627257e-07, + "loss": 0.4308139383792877, + "step": 5918 + }, + { + "epoch": 1.3644956772334293, + "grad_norm": 1.4122819038580587, + "learning_rate": 5.035252088558034e-07, + "loss": 0.4413840174674988, + "step": 5919 + }, + { + "epoch": 1.3647262247838616, + "grad_norm": 1.762353694417549, + "learning_rate": 5.031943709815036e-07, + "loss": 0.426006019115448, + "step": 5920 + }, + { + "epoch": 1.364956772334294, + "grad_norm": 2.0693345821353852, + "learning_rate": 5.028636052878938e-07, + "loss": 0.5016382932662964, + "step": 5921 + }, + { + "epoch": 1.3651873198847262, + "grad_norm": 1.5626757094374883, + "learning_rate": 5.025329118230302e-07, + "loss": 0.4329431653022766, + "step": 5922 + }, + { + "epoch": 1.3654178674351585, + "grad_norm": 1.4393399562375095, + "learning_rate": 5.022022906349598e-07, + "loss": 0.4622783064842224, + "step": 5923 + }, + { + "epoch": 1.3656484149855908, + "grad_norm": 1.8772089679378412, + "learning_rate": 5.018717417717181e-07, + "loss": 0.45837199687957764, + "step": 5924 + }, + { + "epoch": 1.365878962536023, + "grad_norm": 1.508840858176121, + "learning_rate": 5.0154126528133e-07, + "loss": 0.40243199467658997, + "step": 5925 + }, + { + "epoch": 1.3661095100864553, + "grad_norm": 1.7704033220607236, + "learning_rate": 5.012108612118111e-07, + "loss": 0.3701140284538269, + "step": 5926 + }, + { + "epoch": 1.3663400576368876, + "grad_norm": 1.4584895223212495, + "learning_rate": 5.008805296111649e-07, + "loss": 0.42856937646865845, + "step": 5927 + }, + { + "epoch": 1.3665706051873199, + "grad_norm": 1.4058422037004188, + "learning_rate": 5.005502705273855e-07, + "loss": 0.4932258129119873, + "step": 5928 + }, + { + "epoch": 1.3668011527377522, + "grad_norm": 1.5903214965689192, + "learning_rate": 5.00220084008457e-07, + "loss": 0.40012168884277344, + "step": 5929 + }, + { + "epoch": 1.3670317002881844, + "grad_norm": 1.454636698679369, + "learning_rate": 4.998899701023503e-07, + "loss": 0.4348130524158478, + "step": 5930 + }, + { + "epoch": 1.3672622478386167, + "grad_norm": 1.531204931434994, + "learning_rate": 4.995599288570287e-07, + "loss": 0.43674468994140625, + "step": 5931 + }, + { + "epoch": 1.367492795389049, + "grad_norm": 1.722849550986695, + "learning_rate": 4.992299603204432e-07, + "loss": 0.5576746463775635, + "step": 5932 + }, + { + "epoch": 1.3677233429394813, + "grad_norm": 1.4734378875973935, + "learning_rate": 4.98900064540535e-07, + "loss": 0.4338483214378357, + "step": 5933 + }, + { + "epoch": 1.3679538904899136, + "grad_norm": 1.5640070595952762, + "learning_rate": 4.98570241565235e-07, + "loss": 0.5156441330909729, + "step": 5934 + }, + { + "epoch": 1.3681844380403458, + "grad_norm": 1.4905894229317878, + "learning_rate": 4.982404914424626e-07, + "loss": 0.4708647131919861, + "step": 5935 + }, + { + "epoch": 1.3684149855907781, + "grad_norm": 1.6577000540746343, + "learning_rate": 4.979108142201267e-07, + "loss": 0.4355749487876892, + "step": 5936 + }, + { + "epoch": 1.3686455331412104, + "grad_norm": 1.7883353786762823, + "learning_rate": 4.975812099461268e-07, + "loss": 0.4360370635986328, + "step": 5937 + }, + { + "epoch": 1.3688760806916427, + "grad_norm": 1.9642902580940569, + "learning_rate": 4.972516786683501e-07, + "loss": 0.5324473977088928, + "step": 5938 + }, + { + "epoch": 1.369106628242075, + "grad_norm": 1.429248391480111, + "learning_rate": 4.969222204346751e-07, + "loss": 0.49706023931503296, + "step": 5939 + }, + { + "epoch": 1.3693371757925072, + "grad_norm": 1.6516845494204488, + "learning_rate": 4.965928352929674e-07, + "loss": 0.3852601647377014, + "step": 5940 + }, + { + "epoch": 1.3695677233429395, + "grad_norm": 1.5936594611011352, + "learning_rate": 4.962635232910843e-07, + "loss": 0.4638129472732544, + "step": 5941 + }, + { + "epoch": 1.3697982708933718, + "grad_norm": 1.683078925842744, + "learning_rate": 4.959342844768711e-07, + "loss": 0.4691101908683777, + "step": 5942 + }, + { + "epoch": 1.370028818443804, + "grad_norm": 1.3490562795558882, + "learning_rate": 4.956051188981623e-07, + "loss": 0.5083839893341064, + "step": 5943 + }, + { + "epoch": 1.3702593659942364, + "grad_norm": 1.4625392664965804, + "learning_rate": 4.952760266027825e-07, + "loss": 0.4441392719745636, + "step": 5944 + }, + { + "epoch": 1.3704899135446686, + "grad_norm": 1.6907734985715615, + "learning_rate": 4.94947007638546e-07, + "loss": 0.38773834705352783, + "step": 5945 + }, + { + "epoch": 1.370720461095101, + "grad_norm": 1.4182609011262102, + "learning_rate": 4.946180620532548e-07, + "loss": 0.4743019938468933, + "step": 5946 + }, + { + "epoch": 1.3709510086455332, + "grad_norm": 1.6787322002316782, + "learning_rate": 4.942891898947024e-07, + "loss": 0.3981400728225708, + "step": 5947 + }, + { + "epoch": 1.3711815561959655, + "grad_norm": 1.4088695472372947, + "learning_rate": 4.939603912106696e-07, + "loss": 0.40556612610816956, + "step": 5948 + }, + { + "epoch": 1.3714121037463978, + "grad_norm": 1.6422838341462083, + "learning_rate": 4.936316660489277e-07, + "loss": 0.4359711706638336, + "step": 5949 + }, + { + "epoch": 1.37164265129683, + "grad_norm": 1.4347305213532893, + "learning_rate": 4.933030144572372e-07, + "loss": 0.44561219215393066, + "step": 5950 + }, + { + "epoch": 1.371873198847262, + "grad_norm": 1.319594203653507, + "learning_rate": 4.929744364833474e-07, + "loss": 0.3856406807899475, + "step": 5951 + }, + { + "epoch": 1.3721037463976944, + "grad_norm": 1.6102685715345482, + "learning_rate": 4.926459321749973e-07, + "loss": 0.4586164951324463, + "step": 5952 + }, + { + "epoch": 1.3723342939481267, + "grad_norm": 2.0415905828304117, + "learning_rate": 4.92317501579916e-07, + "loss": 0.5609460473060608, + "step": 5953 + }, + { + "epoch": 1.372564841498559, + "grad_norm": 2.256441945151045, + "learning_rate": 4.919891447458204e-07, + "loss": 0.5234363675117493, + "step": 5954 + }, + { + "epoch": 1.3727953890489912, + "grad_norm": 1.959793340744298, + "learning_rate": 4.916608617204171e-07, + "loss": 0.4611673951148987, + "step": 5955 + }, + { + "epoch": 1.3730259365994235, + "grad_norm": 1.6387175941742442, + "learning_rate": 4.913326525514021e-07, + "loss": 0.4873179793357849, + "step": 5956 + }, + { + "epoch": 1.3732564841498558, + "grad_norm": 1.915167462474106, + "learning_rate": 4.910045172864613e-07, + "loss": 0.5042279362678528, + "step": 5957 + }, + { + "epoch": 1.373487031700288, + "grad_norm": 1.728746475348662, + "learning_rate": 4.906764559732695e-07, + "loss": 0.5057715177536011, + "step": 5958 + }, + { + "epoch": 1.3737175792507204, + "grad_norm": 1.7996746605755354, + "learning_rate": 4.903484686594897e-07, + "loss": 0.5340696573257446, + "step": 5959 + }, + { + "epoch": 1.3739481268011526, + "grad_norm": 1.5779353792159843, + "learning_rate": 4.90020555392776e-07, + "loss": 0.43351268768310547, + "step": 5960 + }, + { + "epoch": 1.374178674351585, + "grad_norm": 1.573562503159586, + "learning_rate": 4.896927162207707e-07, + "loss": 0.457750141620636, + "step": 5961 + }, + { + "epoch": 1.3744092219020172, + "grad_norm": 1.5967761640899065, + "learning_rate": 4.893649511911044e-07, + "loss": 0.4459339678287506, + "step": 5962 + }, + { + "epoch": 1.3746397694524495, + "grad_norm": 1.5932726302811062, + "learning_rate": 4.890372603513993e-07, + "loss": 0.5079913139343262, + "step": 5963 + }, + { + "epoch": 1.3748703170028818, + "grad_norm": 1.6314201964372212, + "learning_rate": 4.887096437492643e-07, + "loss": 0.4558470845222473, + "step": 5964 + }, + { + "epoch": 1.375100864553314, + "grad_norm": 1.555540006249106, + "learning_rate": 4.883821014322992e-07, + "loss": 0.5119925737380981, + "step": 5965 + }, + { + "epoch": 1.3753314121037463, + "grad_norm": 1.5582303154225734, + "learning_rate": 4.880546334480929e-07, + "loss": 0.39001739025115967, + "step": 5966 + }, + { + "epoch": 1.3755619596541786, + "grad_norm": 1.5191279412240843, + "learning_rate": 4.877272398442228e-07, + "loss": 0.38213586807250977, + "step": 5967 + }, + { + "epoch": 1.3757925072046109, + "grad_norm": 1.4691257079314304, + "learning_rate": 4.873999206682552e-07, + "loss": 0.48196929693222046, + "step": 5968 + }, + { + "epoch": 1.3760230547550432, + "grad_norm": 1.6328356700934117, + "learning_rate": 4.87072675967747e-07, + "loss": 0.36515331268310547, + "step": 5969 + }, + { + "epoch": 1.3762536023054754, + "grad_norm": 1.6247882795619184, + "learning_rate": 4.867455057902429e-07, + "loss": 0.5114316940307617, + "step": 5970 + }, + { + "epoch": 1.3764841498559077, + "grad_norm": 1.8444487237090605, + "learning_rate": 4.864184101832778e-07, + "loss": 0.563550591468811, + "step": 5971 + }, + { + "epoch": 1.37671469740634, + "grad_norm": 1.7225734496778158, + "learning_rate": 4.860913891943746e-07, + "loss": 0.5151046514511108, + "step": 5972 + }, + { + "epoch": 1.3769452449567723, + "grad_norm": 1.529072988346735, + "learning_rate": 4.857644428710469e-07, + "loss": 0.4265006184577942, + "step": 5973 + }, + { + "epoch": 1.3771757925072046, + "grad_norm": 1.551487562216016, + "learning_rate": 4.854375712607961e-07, + "loss": 0.4501451253890991, + "step": 5974 + }, + { + "epoch": 1.3774063400576368, + "grad_norm": 1.569217036910626, + "learning_rate": 4.85110774411113e-07, + "loss": 0.4197757840156555, + "step": 5975 + }, + { + "epoch": 1.3776368876080691, + "grad_norm": 1.5926547930252053, + "learning_rate": 4.847840523694784e-07, + "loss": 0.4908246695995331, + "step": 5976 + }, + { + "epoch": 1.3778674351585014, + "grad_norm": 1.526074427590657, + "learning_rate": 4.84457405183361e-07, + "loss": 0.4114115536212921, + "step": 5977 + }, + { + "epoch": 1.3780979827089337, + "grad_norm": 1.6071297705826257, + "learning_rate": 4.841308329002195e-07, + "loss": 0.4672767221927643, + "step": 5978 + }, + { + "epoch": 1.378328530259366, + "grad_norm": 1.2631644967059048, + "learning_rate": 4.838043355675019e-07, + "loss": 0.3806523084640503, + "step": 5979 + }, + { + "epoch": 1.3785590778097983, + "grad_norm": 1.9350101011141525, + "learning_rate": 4.834779132326444e-07, + "loss": 0.44928061962127686, + "step": 5980 + }, + { + "epoch": 1.3787896253602305, + "grad_norm": 1.8150559336473449, + "learning_rate": 4.831515659430726e-07, + "loss": 0.40093138813972473, + "step": 5981 + }, + { + "epoch": 1.3790201729106628, + "grad_norm": 2.041794025799897, + "learning_rate": 4.828252937462018e-07, + "loss": 0.4751448631286621, + "step": 5982 + }, + { + "epoch": 1.379250720461095, + "grad_norm": 1.7047133009163284, + "learning_rate": 4.824990966894355e-07, + "loss": 0.5027964115142822, + "step": 5983 + }, + { + "epoch": 1.3794812680115274, + "grad_norm": 1.5876448572494875, + "learning_rate": 4.821729748201674e-07, + "loss": 0.4667786955833435, + "step": 5984 + }, + { + "epoch": 1.3797118155619597, + "grad_norm": 1.6975993686252844, + "learning_rate": 4.818469281857787e-07, + "loss": 0.4899994134902954, + "step": 5985 + }, + { + "epoch": 1.379942363112392, + "grad_norm": 1.967426478945087, + "learning_rate": 4.815209568336415e-07, + "loss": 0.5160613059997559, + "step": 5986 + }, + { + "epoch": 1.3801729106628242, + "grad_norm": 1.4806686181451014, + "learning_rate": 4.811950608111158e-07, + "loss": 0.5122005939483643, + "step": 5987 + }, + { + "epoch": 1.3804034582132565, + "grad_norm": 1.5095702034418825, + "learning_rate": 4.808692401655503e-07, + "loss": 0.4773480296134949, + "step": 5988 + }, + { + "epoch": 1.3806340057636888, + "grad_norm": 1.4967712307165415, + "learning_rate": 4.805434949442837e-07, + "loss": 0.4779089093208313, + "step": 5989 + }, + { + "epoch": 1.380864553314121, + "grad_norm": 1.5842683506783093, + "learning_rate": 4.80217825194644e-07, + "loss": 0.4837608337402344, + "step": 5990 + }, + { + "epoch": 1.3810951008645533, + "grad_norm": 1.8521614971327032, + "learning_rate": 4.798922309639466e-07, + "loss": 0.4466870129108429, + "step": 5991 + }, + { + "epoch": 1.3813256484149856, + "grad_norm": 1.9601310722449918, + "learning_rate": 4.795667122994979e-07, + "loss": 0.4643474519252777, + "step": 5992 + }, + { + "epoch": 1.381556195965418, + "grad_norm": 1.5897841101760393, + "learning_rate": 4.79241269248592e-07, + "loss": 0.4888812303543091, + "step": 5993 + }, + { + "epoch": 1.3817867435158502, + "grad_norm": 1.5638951554173341, + "learning_rate": 4.789159018585118e-07, + "loss": 0.505973219871521, + "step": 5994 + }, + { + "epoch": 1.3820172910662825, + "grad_norm": 1.3822895824056294, + "learning_rate": 4.785906101765309e-07, + "loss": 0.44251859188079834, + "step": 5995 + }, + { + "epoch": 1.3822478386167147, + "grad_norm": 1.960049432168954, + "learning_rate": 4.782653942499097e-07, + "loss": 0.5573620200157166, + "step": 5996 + }, + { + "epoch": 1.382478386167147, + "grad_norm": 1.37836624882881, + "learning_rate": 4.779402541258993e-07, + "loss": 0.37017643451690674, + "step": 5997 + }, + { + "epoch": 1.3827089337175793, + "grad_norm": 1.970528045777299, + "learning_rate": 4.776151898517394e-07, + "loss": 0.4964868426322937, + "step": 5998 + }, + { + "epoch": 1.3829394812680116, + "grad_norm": 1.5277765140499937, + "learning_rate": 4.772902014746583e-07, + "loss": 0.460393488407135, + "step": 5999 + }, + { + "epoch": 1.3831700288184439, + "grad_norm": 1.898227204559436, + "learning_rate": 4.769652890418732e-07, + "loss": 0.497903048992157, + "step": 6000 + }, + { + "epoch": 1.3834005763688761, + "grad_norm": 1.5051772587978125, + "learning_rate": 4.7664045260059015e-07, + "loss": 0.4663991928100586, + "step": 6001 + }, + { + "epoch": 1.3836311239193084, + "grad_norm": 1.630329268480054, + "learning_rate": 4.763156921980049e-07, + "loss": 0.43742233514785767, + "step": 6002 + }, + { + "epoch": 1.3838616714697407, + "grad_norm": 1.2809856045675265, + "learning_rate": 4.7599100788130233e-07, + "loss": 0.4082593023777008, + "step": 6003 + }, + { + "epoch": 1.384092219020173, + "grad_norm": 1.9460794232048821, + "learning_rate": 4.7566639969765465e-07, + "loss": 0.5778148174285889, + "step": 6004 + }, + { + "epoch": 1.3843227665706053, + "grad_norm": 1.520124922186599, + "learning_rate": 4.7534186769422504e-07, + "loss": 0.4080566167831421, + "step": 6005 + }, + { + "epoch": 1.3845533141210375, + "grad_norm": 1.5687504912879382, + "learning_rate": 4.7501741191816403e-07, + "loss": 0.46496686339378357, + "step": 6006 + }, + { + "epoch": 1.3847838616714698, + "grad_norm": 1.4953220020064624, + "learning_rate": 4.746930324166114e-07, + "loss": 0.47985541820526123, + "step": 6007 + }, + { + "epoch": 1.385014409221902, + "grad_norm": 1.5018708897664648, + "learning_rate": 4.7436872923669703e-07, + "loss": 0.48982805013656616, + "step": 6008 + }, + { + "epoch": 1.3852449567723344, + "grad_norm": 1.4445730747155596, + "learning_rate": 4.740445024255377e-07, + "loss": 0.46816062927246094, + "step": 6009 + }, + { + "epoch": 1.3854755043227667, + "grad_norm": 1.460923651414453, + "learning_rate": 4.7372035203024097e-07, + "loss": 0.4749149680137634, + "step": 6010 + }, + { + "epoch": 1.385706051873199, + "grad_norm": 1.323855531957966, + "learning_rate": 4.733962780979025e-07, + "loss": 0.4393165707588196, + "step": 6011 + }, + { + "epoch": 1.3859365994236312, + "grad_norm": 1.6288416030334434, + "learning_rate": 4.7307228067560677e-07, + "loss": 0.3817910850048065, + "step": 6012 + }, + { + "epoch": 1.3861671469740635, + "grad_norm": 1.4361365253762288, + "learning_rate": 4.727483598104267e-07, + "loss": 0.3621605336666107, + "step": 6013 + }, + { + "epoch": 1.3863976945244958, + "grad_norm": 1.581373327873717, + "learning_rate": 4.7242451554942555e-07, + "loss": 0.39434587955474854, + "step": 6014 + }, + { + "epoch": 1.386628242074928, + "grad_norm": 1.310743303542111, + "learning_rate": 4.7210074793965357e-07, + "loss": 0.5066482424736023, + "step": 6015 + }, + { + "epoch": 1.3868587896253604, + "grad_norm": 1.754271147284867, + "learning_rate": 4.7177705702815175e-07, + "loss": 0.4697442650794983, + "step": 6016 + }, + { + "epoch": 1.3870893371757926, + "grad_norm": 1.6770775757631975, + "learning_rate": 4.71453442861948e-07, + "loss": 0.4051814079284668, + "step": 6017 + }, + { + "epoch": 1.387319884726225, + "grad_norm": 1.6336553716475837, + "learning_rate": 4.711299054880612e-07, + "loss": 0.5127224326133728, + "step": 6018 + }, + { + "epoch": 1.3875504322766572, + "grad_norm": 1.7922826477354121, + "learning_rate": 4.708064449534973e-07, + "loss": 0.3973507285118103, + "step": 6019 + }, + { + "epoch": 1.3877809798270893, + "grad_norm": 1.597857166916995, + "learning_rate": 4.704830613052515e-07, + "loss": 0.505881667137146, + "step": 6020 + }, + { + "epoch": 1.3880115273775215, + "grad_norm": 1.4925723751693731, + "learning_rate": 4.701597545903089e-07, + "loss": 0.4754818081855774, + "step": 6021 + }, + { + "epoch": 1.3882420749279538, + "grad_norm": 1.4074084258772404, + "learning_rate": 4.6983652485564163e-07, + "loss": 0.4562879204750061, + "step": 6022 + }, + { + "epoch": 1.388472622478386, + "grad_norm": 1.7249030074653906, + "learning_rate": 4.6951337214821216e-07, + "loss": 0.5570380091667175, + "step": 6023 + }, + { + "epoch": 1.3887031700288184, + "grad_norm": 1.9969873123775743, + "learning_rate": 4.6919029651497157e-07, + "loss": 0.4170283079147339, + "step": 6024 + }, + { + "epoch": 1.3889337175792507, + "grad_norm": 1.5864056005736995, + "learning_rate": 4.68867298002859e-07, + "loss": 0.4627062976360321, + "step": 6025 + }, + { + "epoch": 1.389164265129683, + "grad_norm": 1.6317114770758834, + "learning_rate": 4.685443766588023e-07, + "loss": 0.47997409105300903, + "step": 6026 + }, + { + "epoch": 1.3893948126801152, + "grad_norm": 1.5433535584548672, + "learning_rate": 4.682215325297195e-07, + "loss": 0.4558975100517273, + "step": 6027 + }, + { + "epoch": 1.3896253602305475, + "grad_norm": 1.500194393702039, + "learning_rate": 4.6789876566251573e-07, + "loss": 0.47624218463897705, + "step": 6028 + }, + { + "epoch": 1.3898559077809798, + "grad_norm": 1.4670498948839097, + "learning_rate": 4.6757607610408623e-07, + "loss": 0.43521273136138916, + "step": 6029 + }, + { + "epoch": 1.390086455331412, + "grad_norm": 1.4064558734872, + "learning_rate": 4.672534639013139e-07, + "loss": 0.4876500368118286, + "step": 6030 + }, + { + "epoch": 1.3903170028818443, + "grad_norm": 1.953687488635846, + "learning_rate": 4.6693092910107157e-07, + "loss": 0.5351635217666626, + "step": 6031 + }, + { + "epoch": 1.3905475504322766, + "grad_norm": 1.5870986874022448, + "learning_rate": 4.6660847175021976e-07, + "loss": 0.4978008270263672, + "step": 6032 + }, + { + "epoch": 1.390778097982709, + "grad_norm": 1.8145501622265596, + "learning_rate": 4.66286091895608e-07, + "loss": 0.532027542591095, + "step": 6033 + }, + { + "epoch": 1.3910086455331412, + "grad_norm": 1.4756865518363598, + "learning_rate": 4.659637895840748e-07, + "loss": 0.40873438119888306, + "step": 6034 + }, + { + "epoch": 1.3912391930835735, + "grad_norm": 1.3799051211541253, + "learning_rate": 4.65641564862448e-07, + "loss": 0.3943024277687073, + "step": 6035 + }, + { + "epoch": 1.3914697406340057, + "grad_norm": 1.6362745005680124, + "learning_rate": 4.6531941777754257e-07, + "loss": 0.4594842493534088, + "step": 6036 + }, + { + "epoch": 1.391700288184438, + "grad_norm": 1.5130847369004203, + "learning_rate": 4.649973483761643e-07, + "loss": 0.429911732673645, + "step": 6037 + }, + { + "epoch": 1.3919308357348703, + "grad_norm": 1.5953387971551207, + "learning_rate": 4.6467535670510516e-07, + "loss": 0.4754660725593567, + "step": 6038 + }, + { + "epoch": 1.3921613832853026, + "grad_norm": 1.7926411696243407, + "learning_rate": 4.6435344281114775e-07, + "loss": 0.49685293436050415, + "step": 6039 + }, + { + "epoch": 1.3923919308357349, + "grad_norm": 1.7779518102975276, + "learning_rate": 4.640316067410632e-07, + "loss": 0.483062207698822, + "step": 6040 + }, + { + "epoch": 1.3926224783861672, + "grad_norm": 1.7120940563362925, + "learning_rate": 4.6370984854161033e-07, + "loss": 0.511294960975647, + "step": 6041 + }, + { + "epoch": 1.3928530259365994, + "grad_norm": 1.7910142322176623, + "learning_rate": 4.633881682595375e-07, + "loss": 0.5286852717399597, + "step": 6042 + }, + { + "epoch": 1.3930835734870317, + "grad_norm": 1.5909865165506805, + "learning_rate": 4.630665659415823e-07, + "loss": 0.49661144614219666, + "step": 6043 + }, + { + "epoch": 1.393314121037464, + "grad_norm": 1.6939702924642166, + "learning_rate": 4.627450416344687e-07, + "loss": 0.48525696992874146, + "step": 6044 + }, + { + "epoch": 1.3935446685878963, + "grad_norm": 1.9008448770845712, + "learning_rate": 4.6242359538491205e-07, + "loss": 0.4805188775062561, + "step": 6045 + }, + { + "epoch": 1.3937752161383286, + "grad_norm": 1.466547085894007, + "learning_rate": 4.6210222723961436e-07, + "loss": 0.5583043098449707, + "step": 6046 + }, + { + "epoch": 1.3940057636887608, + "grad_norm": 1.767099349933789, + "learning_rate": 4.6178093724526724e-07, + "loss": 0.4752922058105469, + "step": 6047 + }, + { + "epoch": 1.3942363112391931, + "grad_norm": 2.1204506587480236, + "learning_rate": 4.614597254485515e-07, + "loss": 0.5305292010307312, + "step": 6048 + }, + { + "epoch": 1.3944668587896254, + "grad_norm": 1.6156047909242823, + "learning_rate": 4.611385918961352e-07, + "loss": 0.4300711154937744, + "step": 6049 + }, + { + "epoch": 1.3946974063400577, + "grad_norm": 1.8395044872177642, + "learning_rate": 4.6081753663467546e-07, + "loss": 0.4568009376525879, + "step": 6050 + }, + { + "epoch": 1.39492795389049, + "grad_norm": 1.515683664151756, + "learning_rate": 4.6049655971081913e-07, + "loss": 0.5203668475151062, + "step": 6051 + }, + { + "epoch": 1.3951585014409222, + "grad_norm": 1.4603584943150332, + "learning_rate": 4.601756611711999e-07, + "loss": 0.40216517448425293, + "step": 6052 + }, + { + "epoch": 1.3953890489913545, + "grad_norm": 1.4642830157949756, + "learning_rate": 4.5985484106244175e-07, + "loss": 0.4311853349208832, + "step": 6053 + }, + { + "epoch": 1.3956195965417868, + "grad_norm": 1.5517607357762497, + "learning_rate": 4.5953409943115584e-07, + "loss": 0.5079714059829712, + "step": 6054 + }, + { + "epoch": 1.395850144092219, + "grad_norm": 1.5088915517004085, + "learning_rate": 4.5921343632394327e-07, + "loss": 0.4816412925720215, + "step": 6055 + }, + { + "epoch": 1.3960806916426514, + "grad_norm": 1.8928583094761373, + "learning_rate": 4.588928517873928e-07, + "loss": 0.4365989863872528, + "step": 6056 + }, + { + "epoch": 1.3963112391930836, + "grad_norm": 1.7375818461870982, + "learning_rate": 4.5857234586808144e-07, + "loss": 0.47723880410194397, + "step": 6057 + }, + { + "epoch": 1.396541786743516, + "grad_norm": 1.6552182086251737, + "learning_rate": 4.5825191861257596e-07, + "loss": 0.4895835518836975, + "step": 6058 + }, + { + "epoch": 1.3967723342939482, + "grad_norm": 1.4001733151811946, + "learning_rate": 4.5793157006743145e-07, + "loss": 0.4960166811943054, + "step": 6059 + }, + { + "epoch": 1.3970028818443805, + "grad_norm": 1.6684440025816993, + "learning_rate": 4.5761130027919025e-07, + "loss": 0.4520935118198395, + "step": 6060 + }, + { + "epoch": 1.3972334293948125, + "grad_norm": 1.481564869888927, + "learning_rate": 4.572911092943852e-07, + "loss": 0.4755667448043823, + "step": 6061 + }, + { + "epoch": 1.3974639769452448, + "grad_norm": 1.456572260239621, + "learning_rate": 4.5697099715953634e-07, + "loss": 0.4399319887161255, + "step": 6062 + }, + { + "epoch": 1.397694524495677, + "grad_norm": 1.7189931326092531, + "learning_rate": 4.566509639211521e-07, + "loss": 0.4009808599948883, + "step": 6063 + }, + { + "epoch": 1.3979250720461094, + "grad_norm": 1.6812792934483527, + "learning_rate": 4.563310096257309e-07, + "loss": 0.4784051477909088, + "step": 6064 + }, + { + "epoch": 1.3981556195965417, + "grad_norm": 1.8540756549671957, + "learning_rate": 4.560111343197579e-07, + "loss": 0.521167516708374, + "step": 6065 + }, + { + "epoch": 1.398386167146974, + "grad_norm": 1.9181010135961398, + "learning_rate": 4.556913380497085e-07, + "loss": 0.44407376646995544, + "step": 6066 + }, + { + "epoch": 1.3986167146974062, + "grad_norm": 1.6432970918022198, + "learning_rate": 4.5537162086204495e-07, + "loss": 0.4660610556602478, + "step": 6067 + }, + { + "epoch": 1.3988472622478385, + "grad_norm": 1.8554905581879808, + "learning_rate": 4.5505198280321967e-07, + "loss": 0.45331743359565735, + "step": 6068 + }, + { + "epoch": 1.3990778097982708, + "grad_norm": 1.3269229239434595, + "learning_rate": 4.5473242391967227e-07, + "loss": 0.42603427171707153, + "step": 6069 + }, + { + "epoch": 1.399308357348703, + "grad_norm": 1.5609089936334382, + "learning_rate": 4.5441294425783094e-07, + "loss": 0.4841277599334717, + "step": 6070 + }, + { + "epoch": 1.3995389048991353, + "grad_norm": 1.6494964083060812, + "learning_rate": 4.5409354386411326e-07, + "loss": 0.5001981258392334, + "step": 6071 + }, + { + "epoch": 1.3997694524495676, + "grad_norm": 1.6407020689096197, + "learning_rate": 4.5377422278492493e-07, + "loss": 0.4393565058708191, + "step": 6072 + }, + { + "epoch": 1.4, + "grad_norm": 2.0348511108564753, + "learning_rate": 4.534549810666596e-07, + "loss": 0.48033279180526733, + "step": 6073 + }, + { + "epoch": 1.4002305475504322, + "grad_norm": 1.557372777178074, + "learning_rate": 4.5313581875570015e-07, + "loss": 0.5052364468574524, + "step": 6074 + }, + { + "epoch": 1.4004610951008645, + "grad_norm": 1.665250892528103, + "learning_rate": 4.528167358984173e-07, + "loss": 0.49901437759399414, + "step": 6075 + }, + { + "epoch": 1.4006916426512968, + "grad_norm": 1.7238877472049492, + "learning_rate": 4.524977325411702e-07, + "loss": 0.47194209694862366, + "step": 6076 + }, + { + "epoch": 1.400922190201729, + "grad_norm": 1.6051143361503823, + "learning_rate": 4.5217880873030734e-07, + "loss": 0.5385118722915649, + "step": 6077 + }, + { + "epoch": 1.4011527377521613, + "grad_norm": 1.6829333959036268, + "learning_rate": 4.5185996451216435e-07, + "loss": 0.42518895864486694, + "step": 6078 + }, + { + "epoch": 1.4013832853025936, + "grad_norm": 1.9857639773328855, + "learning_rate": 4.515411999330664e-07, + "loss": 0.6034430861473083, + "step": 6079 + }, + { + "epoch": 1.4016138328530259, + "grad_norm": 2.1725180303091105, + "learning_rate": 4.5122251503932684e-07, + "loss": 0.46876388788223267, + "step": 6080 + }, + { + "epoch": 1.4018443804034582, + "grad_norm": 1.4349271940518988, + "learning_rate": 4.5090390987724713e-07, + "loss": 0.4204791784286499, + "step": 6081 + }, + { + "epoch": 1.4020749279538904, + "grad_norm": 1.5732232756880702, + "learning_rate": 4.505853844931171e-07, + "loss": 0.44004327058792114, + "step": 6082 + }, + { + "epoch": 1.4023054755043227, + "grad_norm": 1.7944396338097273, + "learning_rate": 4.502669389332149e-07, + "loss": 0.47575461864471436, + "step": 6083 + }, + { + "epoch": 1.402536023054755, + "grad_norm": 1.7168372511929604, + "learning_rate": 4.4994857324380773e-07, + "loss": 0.5163394212722778, + "step": 6084 + }, + { + "epoch": 1.4027665706051873, + "grad_norm": 1.3499104261023827, + "learning_rate": 4.496302874711512e-07, + "loss": 0.40582704544067383, + "step": 6085 + }, + { + "epoch": 1.4029971181556196, + "grad_norm": 1.8288150818970852, + "learning_rate": 4.49312081661488e-07, + "loss": 0.5648316740989685, + "step": 6086 + }, + { + "epoch": 1.4032276657060518, + "grad_norm": 1.536379330296098, + "learning_rate": 4.4899395586105113e-07, + "loss": 0.4152177572250366, + "step": 6087 + }, + { + "epoch": 1.4034582132564841, + "grad_norm": 1.4582400127460118, + "learning_rate": 4.4867591011606057e-07, + "loss": 0.395770400762558, + "step": 6088 + }, + { + "epoch": 1.4036887608069164, + "grad_norm": 1.6620585836074195, + "learning_rate": 4.4835794447272446e-07, + "loss": 0.5302882790565491, + "step": 6089 + }, + { + "epoch": 1.4039193083573487, + "grad_norm": 1.8122041008088263, + "learning_rate": 4.4804005897724084e-07, + "loss": 0.5821331739425659, + "step": 6090 + }, + { + "epoch": 1.404149855907781, + "grad_norm": 1.8462465057477913, + "learning_rate": 4.477222536757943e-07, + "loss": 0.518589973449707, + "step": 6091 + }, + { + "epoch": 1.4043804034582132, + "grad_norm": 1.6347459739887822, + "learning_rate": 4.47404528614559e-07, + "loss": 0.4511559009552002, + "step": 6092 + }, + { + "epoch": 1.4046109510086455, + "grad_norm": 1.3912951974967376, + "learning_rate": 4.470868838396976e-07, + "loss": 0.4637323021888733, + "step": 6093 + }, + { + "epoch": 1.4048414985590778, + "grad_norm": 1.7391893367323972, + "learning_rate": 4.467693193973602e-07, + "loss": 0.5015200972557068, + "step": 6094 + }, + { + "epoch": 1.40507204610951, + "grad_norm": 1.6600607094430424, + "learning_rate": 4.4645183533368515e-07, + "loss": 0.34618085622787476, + "step": 6095 + }, + { + "epoch": 1.4053025936599424, + "grad_norm": 1.8174784404033468, + "learning_rate": 4.4613443169480023e-07, + "loss": 0.5129716396331787, + "step": 6096 + }, + { + "epoch": 1.4055331412103746, + "grad_norm": 1.6451722890507783, + "learning_rate": 4.458171085268204e-07, + "loss": 0.48730310797691345, + "step": 6097 + }, + { + "epoch": 1.405763688760807, + "grad_norm": 2.017135112557972, + "learning_rate": 4.4549986587584996e-07, + "loss": 0.4687865376472473, + "step": 6098 + }, + { + "epoch": 1.4059942363112392, + "grad_norm": 1.7161898202788912, + "learning_rate": 4.451827037879804e-07, + "loss": 0.43602675199508667, + "step": 6099 + }, + { + "epoch": 1.4062247838616715, + "grad_norm": 1.5398135343338488, + "learning_rate": 4.448656223092926e-07, + "loss": 0.40164947509765625, + "step": 6100 + }, + { + "epoch": 1.4064553314121038, + "grad_norm": 1.892876938818121, + "learning_rate": 4.4454862148585494e-07, + "loss": 0.457908570766449, + "step": 6101 + }, + { + "epoch": 1.406685878962536, + "grad_norm": 1.8597516649322154, + "learning_rate": 4.442317013637239e-07, + "loss": 0.4982607960700989, + "step": 6102 + }, + { + "epoch": 1.4069164265129683, + "grad_norm": 1.686924482899156, + "learning_rate": 4.439148619889451e-07, + "loss": 0.4464913606643677, + "step": 6103 + }, + { + "epoch": 1.4071469740634006, + "grad_norm": 1.4308914648810174, + "learning_rate": 4.435981034075525e-07, + "loss": 0.4235959053039551, + "step": 6104 + }, + { + "epoch": 1.407377521613833, + "grad_norm": 1.6685279003688986, + "learning_rate": 4.432814256655669e-07, + "loss": 0.48098224401474, + "step": 6105 + }, + { + "epoch": 1.4076080691642652, + "grad_norm": 1.8164478402131154, + "learning_rate": 4.429648288089992e-07, + "loss": 0.5259070992469788, + "step": 6106 + }, + { + "epoch": 1.4078386167146975, + "grad_norm": 2.1051952106576706, + "learning_rate": 4.426483128838471e-07, + "loss": 0.43548309803009033, + "step": 6107 + }, + { + "epoch": 1.4080691642651297, + "grad_norm": 2.1581744627863597, + "learning_rate": 4.423318779360966e-07, + "loss": 0.4666636288166046, + "step": 6108 + }, + { + "epoch": 1.408299711815562, + "grad_norm": 1.7160181832394292, + "learning_rate": 4.4201552401172346e-07, + "loss": 0.48951369524002075, + "step": 6109 + }, + { + "epoch": 1.4085302593659943, + "grad_norm": 2.086438615634531, + "learning_rate": 4.416992511566897e-07, + "loss": 0.5804085731506348, + "step": 6110 + }, + { + "epoch": 1.4087608069164266, + "grad_norm": 1.5393560212970487, + "learning_rate": 4.413830594169472e-07, + "loss": 0.5000404119491577, + "step": 6111 + }, + { + "epoch": 1.4089913544668589, + "grad_norm": 1.5396501247377221, + "learning_rate": 4.410669488384347e-07, + "loss": 0.4344940185546875, + "step": 6112 + }, + { + "epoch": 1.4092219020172911, + "grad_norm": 1.9142144594904624, + "learning_rate": 4.407509194670803e-07, + "loss": 0.4730883836746216, + "step": 6113 + }, + { + "epoch": 1.4094524495677234, + "grad_norm": 1.683589524108464, + "learning_rate": 4.404349713487996e-07, + "loss": 0.4169773459434509, + "step": 6114 + }, + { + "epoch": 1.4096829971181557, + "grad_norm": 1.4212947387266628, + "learning_rate": 4.401191045294962e-07, + "loss": 0.38692671060562134, + "step": 6115 + }, + { + "epoch": 1.409913544668588, + "grad_norm": 1.7880224033139351, + "learning_rate": 4.398033190550625e-07, + "loss": 0.4843568801879883, + "step": 6116 + }, + { + "epoch": 1.4101440922190203, + "grad_norm": 1.8161229642449126, + "learning_rate": 4.3948761497137945e-07, + "loss": 0.3911912441253662, + "step": 6117 + }, + { + "epoch": 1.4103746397694525, + "grad_norm": 1.3495609018309418, + "learning_rate": 4.391719923243146e-07, + "loss": 0.4209800958633423, + "step": 6118 + }, + { + "epoch": 1.4106051873198848, + "grad_norm": 1.484656438743802, + "learning_rate": 4.3885645115972536e-07, + "loss": 0.5201029777526855, + "step": 6119 + }, + { + "epoch": 1.410835734870317, + "grad_norm": 1.5580728922190459, + "learning_rate": 4.3854099152345636e-07, + "loss": 0.46659159660339355, + "step": 6120 + }, + { + "epoch": 1.4110662824207494, + "grad_norm": 1.573073448587342, + "learning_rate": 4.3822561346134025e-07, + "loss": 0.3859689235687256, + "step": 6121 + }, + { + "epoch": 1.4112968299711817, + "grad_norm": 1.3691662874620947, + "learning_rate": 4.3791031701919876e-07, + "loss": 0.525857150554657, + "step": 6122 + }, + { + "epoch": 1.411527377521614, + "grad_norm": 1.8712349604235057, + "learning_rate": 4.3759510224284056e-07, + "loss": 0.4414822459220886, + "step": 6123 + }, + { + "epoch": 1.4117579250720462, + "grad_norm": 1.6938210047217055, + "learning_rate": 4.3727996917806353e-07, + "loss": 0.45479732751846313, + "step": 6124 + }, + { + "epoch": 1.4119884726224785, + "grad_norm": 1.6359082859481933, + "learning_rate": 4.3696491787065337e-07, + "loss": 0.44235748052597046, + "step": 6125 + }, + { + "epoch": 1.4122190201729108, + "grad_norm": 1.4596228705783454, + "learning_rate": 4.366499483663836e-07, + "loss": 0.4811703562736511, + "step": 6126 + }, + { + "epoch": 1.412449567723343, + "grad_norm": 1.9449964414371903, + "learning_rate": 4.363350607110158e-07, + "loss": 0.4828331172466278, + "step": 6127 + }, + { + "epoch": 1.4126801152737753, + "grad_norm": 1.4685922441222679, + "learning_rate": 4.360202549502997e-07, + "loss": 0.5047401785850525, + "step": 6128 + }, + { + "epoch": 1.4129106628242076, + "grad_norm": 1.6609285866380643, + "learning_rate": 4.3570553112997357e-07, + "loss": 0.3716857135295868, + "step": 6129 + }, + { + "epoch": 1.4131412103746397, + "grad_norm": 1.633832477639241, + "learning_rate": 4.353908892957638e-07, + "loss": 0.4703335762023926, + "step": 6130 + }, + { + "epoch": 1.413371757925072, + "grad_norm": 1.7488068800395162, + "learning_rate": 4.350763294933841e-07, + "loss": 0.463678240776062, + "step": 6131 + }, + { + "epoch": 1.4136023054755043, + "grad_norm": 1.3006583528006206, + "learning_rate": 4.347618517685373e-07, + "loss": 0.41314953565597534, + "step": 6132 + }, + { + "epoch": 1.4138328530259365, + "grad_norm": 1.5221749066271508, + "learning_rate": 4.3444745616691325e-07, + "loss": 0.46069851517677307, + "step": 6133 + }, + { + "epoch": 1.4140634005763688, + "grad_norm": 1.4483737045255485, + "learning_rate": 4.341331427341902e-07, + "loss": 0.3713275194168091, + "step": 6134 + }, + { + "epoch": 1.414293948126801, + "grad_norm": 2.2040413085700057, + "learning_rate": 4.338189115160353e-07, + "loss": 0.5063982605934143, + "step": 6135 + }, + { + "epoch": 1.4145244956772334, + "grad_norm": 1.7310624628289812, + "learning_rate": 4.335047625581023e-07, + "loss": 0.5195713639259338, + "step": 6136 + }, + { + "epoch": 1.4147550432276657, + "grad_norm": 2.027672927737903, + "learning_rate": 4.331906959060342e-07, + "loss": 0.48258209228515625, + "step": 6137 + }, + { + "epoch": 1.414985590778098, + "grad_norm": 1.5880816797426516, + "learning_rate": 4.3287671160546193e-07, + "loss": 0.47651320695877075, + "step": 6138 + }, + { + "epoch": 1.4152161383285302, + "grad_norm": 1.6724019808978872, + "learning_rate": 4.325628097020038e-07, + "loss": 0.39837729930877686, + "step": 6139 + }, + { + "epoch": 1.4154466858789625, + "grad_norm": 1.6085191304764304, + "learning_rate": 4.322489902412662e-07, + "loss": 0.4356096386909485, + "step": 6140 + }, + { + "epoch": 1.4156772334293948, + "grad_norm": 1.5428745920415274, + "learning_rate": 4.3193525326884426e-07, + "loss": 0.5047112703323364, + "step": 6141 + }, + { + "epoch": 1.415907780979827, + "grad_norm": 1.8689144541994767, + "learning_rate": 4.316215988303203e-07, + "loss": 0.5051916241645813, + "step": 6142 + }, + { + "epoch": 1.4161383285302593, + "grad_norm": 1.6542346539121724, + "learning_rate": 4.313080269712658e-07, + "loss": 0.4928128123283386, + "step": 6143 + }, + { + "epoch": 1.4163688760806916, + "grad_norm": 1.7523545888738172, + "learning_rate": 4.309945377372385e-07, + "loss": 0.4253290891647339, + "step": 6144 + }, + { + "epoch": 1.416599423631124, + "grad_norm": 1.5588360330776463, + "learning_rate": 4.3068113117378603e-07, + "loss": 0.4693288207054138, + "step": 6145 + }, + { + "epoch": 1.4168299711815562, + "grad_norm": 1.7376987458736854, + "learning_rate": 4.3036780732644273e-07, + "loss": 0.3377845287322998, + "step": 6146 + }, + { + "epoch": 1.4170605187319885, + "grad_norm": 1.5961011734430122, + "learning_rate": 4.300545662407309e-07, + "loss": 0.44026291370391846, + "step": 6147 + }, + { + "epoch": 1.4172910662824207, + "grad_norm": 1.6678026768298122, + "learning_rate": 4.297414079621615e-07, + "loss": 0.4033926725387573, + "step": 6148 + }, + { + "epoch": 1.417521613832853, + "grad_norm": 1.591044742689737, + "learning_rate": 4.2942833253623357e-07, + "loss": 0.4513046145439148, + "step": 6149 + }, + { + "epoch": 1.4177521613832853, + "grad_norm": 1.7764371951105735, + "learning_rate": 4.2911534000843306e-07, + "loss": 0.5358277559280396, + "step": 6150 + }, + { + "epoch": 1.4179827089337176, + "grad_norm": 1.5239739695025156, + "learning_rate": 4.2880243042423524e-07, + "loss": 0.47580230236053467, + "step": 6151 + }, + { + "epoch": 1.4182132564841499, + "grad_norm": 1.557650739221615, + "learning_rate": 4.2848960382910225e-07, + "loss": 0.4727135896682739, + "step": 6152 + }, + { + "epoch": 1.4184438040345821, + "grad_norm": 1.71869847258335, + "learning_rate": 4.281768602684841e-07, + "loss": 0.4576184153556824, + "step": 6153 + }, + { + "epoch": 1.4186743515850144, + "grad_norm": 1.8118055618452378, + "learning_rate": 4.2786419978782006e-07, + "loss": 0.4298781156539917, + "step": 6154 + }, + { + "epoch": 1.4189048991354467, + "grad_norm": 1.3541799773484497, + "learning_rate": 4.275516224325355e-07, + "loss": 0.42015182971954346, + "step": 6155 + }, + { + "epoch": 1.419135446685879, + "grad_norm": 1.8522934176845671, + "learning_rate": 4.272391282480455e-07, + "loss": 0.5046502351760864, + "step": 6156 + }, + { + "epoch": 1.4193659942363113, + "grad_norm": 1.7317433079809288, + "learning_rate": 4.2692671727975193e-07, + "loss": 0.46464890241622925, + "step": 6157 + }, + { + "epoch": 1.4195965417867435, + "grad_norm": 1.9285455657348056, + "learning_rate": 4.266143895730444e-07, + "loss": 0.456853449344635, + "step": 6158 + }, + { + "epoch": 1.4198270893371758, + "grad_norm": 1.5730881793585256, + "learning_rate": 4.2630214517330167e-07, + "loss": 0.5618214011192322, + "step": 6159 + }, + { + "epoch": 1.420057636887608, + "grad_norm": 1.8422559070497995, + "learning_rate": 4.259899841258887e-07, + "loss": 0.47811365127563477, + "step": 6160 + }, + { + "epoch": 1.4202881844380404, + "grad_norm": 1.8468838919820687, + "learning_rate": 4.2567790647615974e-07, + "loss": 0.40685200691223145, + "step": 6161 + }, + { + "epoch": 1.4205187319884727, + "grad_norm": 1.9439131357752444, + "learning_rate": 4.2536591226945685e-07, + "loss": 0.4835454821586609, + "step": 6162 + }, + { + "epoch": 1.420749279538905, + "grad_norm": 1.5657796902096128, + "learning_rate": 4.2505400155110904e-07, + "loss": 0.46967822313308716, + "step": 6163 + }, + { + "epoch": 1.4209798270893372, + "grad_norm": 1.4756380896942416, + "learning_rate": 4.247421743664339e-07, + "loss": 0.41301921010017395, + "step": 6164 + }, + { + "epoch": 1.4212103746397695, + "grad_norm": 1.567339455776478, + "learning_rate": 4.2443043076073603e-07, + "loss": 0.4748044013977051, + "step": 6165 + }, + { + "epoch": 1.4214409221902018, + "grad_norm": 1.7509442825646346, + "learning_rate": 4.24118770779309e-07, + "loss": 0.5216407775878906, + "step": 6166 + }, + { + "epoch": 1.421671469740634, + "grad_norm": 1.3159416230441767, + "learning_rate": 4.238071944674343e-07, + "loss": 0.4520043730735779, + "step": 6167 + }, + { + "epoch": 1.4219020172910664, + "grad_norm": 1.5360413927774947, + "learning_rate": 4.2349570187037985e-07, + "loss": 0.3486665189266205, + "step": 6168 + }, + { + "epoch": 1.4221325648414986, + "grad_norm": 1.5354184007061404, + "learning_rate": 4.2318429303340297e-07, + "loss": 0.4190082550048828, + "step": 6169 + }, + { + "epoch": 1.422363112391931, + "grad_norm": 1.7686291285808324, + "learning_rate": 4.228729680017479e-07, + "loss": 0.4878532290458679, + "step": 6170 + }, + { + "epoch": 1.422593659942363, + "grad_norm": 1.5021962460950842, + "learning_rate": 4.225617268206464e-07, + "loss": 0.4264869689941406, + "step": 6171 + }, + { + "epoch": 1.4228242074927953, + "grad_norm": 1.5869703714522394, + "learning_rate": 4.2225056953531933e-07, + "loss": 0.47751015424728394, + "step": 6172 + }, + { + "epoch": 1.4230547550432275, + "grad_norm": 1.9078138295934728, + "learning_rate": 4.21939496190974e-07, + "loss": 0.45385488867759705, + "step": 6173 + }, + { + "epoch": 1.4232853025936598, + "grad_norm": 1.5958951589441288, + "learning_rate": 4.216285068328065e-07, + "loss": 0.4519824981689453, + "step": 6174 + }, + { + "epoch": 1.423515850144092, + "grad_norm": 1.5192412548501215, + "learning_rate": 4.213176015060006e-07, + "loss": 0.49791768193244934, + "step": 6175 + }, + { + "epoch": 1.4237463976945244, + "grad_norm": 1.527247990921823, + "learning_rate": 4.2100678025572724e-07, + "loss": 0.44258758425712585, + "step": 6176 + }, + { + "epoch": 1.4239769452449567, + "grad_norm": 1.6937889737807723, + "learning_rate": 4.2069604312714525e-07, + "loss": 0.4176792502403259, + "step": 6177 + }, + { + "epoch": 1.424207492795389, + "grad_norm": 1.8648960890447026, + "learning_rate": 4.203853901654021e-07, + "loss": 0.4810779392719269, + "step": 6178 + }, + { + "epoch": 1.4244380403458212, + "grad_norm": 1.6056895640711444, + "learning_rate": 4.2007482141563186e-07, + "loss": 0.5062845945358276, + "step": 6179 + }, + { + "epoch": 1.4246685878962535, + "grad_norm": 1.6213901133636355, + "learning_rate": 4.1976433692295754e-07, + "loss": 0.4448728561401367, + "step": 6180 + }, + { + "epoch": 1.4248991354466858, + "grad_norm": 1.4307330121028476, + "learning_rate": 4.1945393673248873e-07, + "loss": 0.47862520813941956, + "step": 6181 + }, + { + "epoch": 1.425129682997118, + "grad_norm": 1.703952709162038, + "learning_rate": 4.1914362088932386e-07, + "loss": 0.48513489961624146, + "step": 6182 + }, + { + "epoch": 1.4253602305475503, + "grad_norm": 2.0858372404714314, + "learning_rate": 4.188333894385484e-07, + "loss": 0.5556698441505432, + "step": 6183 + }, + { + "epoch": 1.4255907780979826, + "grad_norm": 1.5697295606732804, + "learning_rate": 4.185232424252353e-07, + "loss": 0.5382585525512695, + "step": 6184 + }, + { + "epoch": 1.425821325648415, + "grad_norm": 1.834044011941015, + "learning_rate": 4.182131798944462e-07, + "loss": 0.5234952569007874, + "step": 6185 + }, + { + "epoch": 1.4260518731988472, + "grad_norm": 2.054448938324408, + "learning_rate": 4.179032018912301e-07, + "loss": 0.44383174180984497, + "step": 6186 + }, + { + "epoch": 1.4262824207492795, + "grad_norm": 1.6371314228420715, + "learning_rate": 4.1759330846062303e-07, + "loss": 0.47934067249298096, + "step": 6187 + }, + { + "epoch": 1.4265129682997117, + "grad_norm": 1.5588160900794392, + "learning_rate": 4.1728349964764984e-07, + "loss": 0.5123411417007446, + "step": 6188 + }, + { + "epoch": 1.426743515850144, + "grad_norm": 1.4601611877162428, + "learning_rate": 4.1697377549732236e-07, + "loss": 0.3838074803352356, + "step": 6189 + }, + { + "epoch": 1.4269740634005763, + "grad_norm": 1.4713344422959787, + "learning_rate": 4.166641360546399e-07, + "loss": 0.3905826210975647, + "step": 6190 + }, + { + "epoch": 1.4272046109510086, + "grad_norm": 1.6243224441884077, + "learning_rate": 4.1635458136459044e-07, + "loss": 0.4994160830974579, + "step": 6191 + }, + { + "epoch": 1.4274351585014409, + "grad_norm": 1.4096266963123292, + "learning_rate": 4.1604511147214836e-07, + "loss": 0.43674254417419434, + "step": 6192 + }, + { + "epoch": 1.4276657060518732, + "grad_norm": 1.5219590055657222, + "learning_rate": 4.1573572642227694e-07, + "loss": 0.46856212615966797, + "step": 6193 + }, + { + "epoch": 1.4278962536023054, + "grad_norm": 1.5309187370305974, + "learning_rate": 4.1542642625992674e-07, + "loss": 0.4608234167098999, + "step": 6194 + }, + { + "epoch": 1.4281268011527377, + "grad_norm": 1.4332212544399399, + "learning_rate": 4.151172110300356e-07, + "loss": 0.4033564329147339, + "step": 6195 + }, + { + "epoch": 1.42835734870317, + "grad_norm": 1.6166843007495713, + "learning_rate": 4.1480808077752936e-07, + "loss": 0.4321993589401245, + "step": 6196 + }, + { + "epoch": 1.4285878962536023, + "grad_norm": 1.4833337580993968, + "learning_rate": 4.1449903554732104e-07, + "loss": 0.40581172704696655, + "step": 6197 + }, + { + "epoch": 1.4288184438040346, + "grad_norm": 1.8252750599229672, + "learning_rate": 4.1419007538431186e-07, + "loss": 0.4893447160720825, + "step": 6198 + }, + { + "epoch": 1.4290489913544668, + "grad_norm": 1.5117375099736445, + "learning_rate": 4.138812003333911e-07, + "loss": 0.43434032797813416, + "step": 6199 + }, + { + "epoch": 1.4292795389048991, + "grad_norm": 1.806419884918517, + "learning_rate": 4.1357241043943426e-07, + "loss": 0.5144214630126953, + "step": 6200 + }, + { + "epoch": 1.4295100864553314, + "grad_norm": 1.5993245565697396, + "learning_rate": 4.132637057473062e-07, + "loss": 0.44391340017318726, + "step": 6201 + }, + { + "epoch": 1.4297406340057637, + "grad_norm": 1.4128063462984315, + "learning_rate": 4.1295508630185785e-07, + "loss": 0.41327035427093506, + "step": 6202 + }, + { + "epoch": 1.429971181556196, + "grad_norm": 1.4869234266710627, + "learning_rate": 4.126465521479282e-07, + "loss": 0.4482381343841553, + "step": 6203 + }, + { + "epoch": 1.4302017291066282, + "grad_norm": 1.7027878810602206, + "learning_rate": 4.123381033303448e-07, + "loss": 0.6047927737236023, + "step": 6204 + }, + { + "epoch": 1.4304322766570605, + "grad_norm": 1.842833087774749, + "learning_rate": 4.1202973989392133e-07, + "loss": 0.5015072822570801, + "step": 6205 + }, + { + "epoch": 1.4306628242074928, + "grad_norm": 1.700957541882254, + "learning_rate": 4.117214618834601e-07, + "loss": 0.38783231377601624, + "step": 6206 + }, + { + "epoch": 1.430893371757925, + "grad_norm": 1.838628795440048, + "learning_rate": 4.1141326934375107e-07, + "loss": 0.5287540555000305, + "step": 6207 + }, + { + "epoch": 1.4311239193083574, + "grad_norm": 1.5517084366812568, + "learning_rate": 4.1110516231957103e-07, + "loss": 0.475554883480072, + "step": 6208 + }, + { + "epoch": 1.4313544668587896, + "grad_norm": 1.6639045521148432, + "learning_rate": 4.1079714085568486e-07, + "loss": 0.5235040187835693, + "step": 6209 + }, + { + "epoch": 1.431585014409222, + "grad_norm": 1.9325689770095837, + "learning_rate": 4.1048920499684427e-07, + "loss": 0.45399802923202515, + "step": 6210 + }, + { + "epoch": 1.4318155619596542, + "grad_norm": 1.3260149665798413, + "learning_rate": 4.101813547877897e-07, + "loss": 0.4158547520637512, + "step": 6211 + }, + { + "epoch": 1.4320461095100865, + "grad_norm": 1.558704413544226, + "learning_rate": 4.0987359027324886e-07, + "loss": 0.41257238388061523, + "step": 6212 + }, + { + "epoch": 1.4322766570605188, + "grad_norm": 1.891001021238512, + "learning_rate": 4.0956591149793607e-07, + "loss": 0.5313294529914856, + "step": 6213 + }, + { + "epoch": 1.432507204610951, + "grad_norm": 1.2595979569069053, + "learning_rate": 4.0925831850655444e-07, + "loss": 0.4157707095146179, + "step": 6214 + }, + { + "epoch": 1.4327377521613833, + "grad_norm": 1.2040642494272713, + "learning_rate": 4.0895081134379394e-07, + "loss": 0.4087299704551697, + "step": 6215 + }, + { + "epoch": 1.4329682997118156, + "grad_norm": 1.7148771732386008, + "learning_rate": 4.0864339005433145e-07, + "loss": 0.3862289488315582, + "step": 6216 + }, + { + "epoch": 1.4331988472622479, + "grad_norm": 1.9798056573423377, + "learning_rate": 4.0833605468283317e-07, + "loss": 0.523725152015686, + "step": 6217 + }, + { + "epoch": 1.4334293948126802, + "grad_norm": 1.4407692345614906, + "learning_rate": 4.0802880527395067e-07, + "loss": 0.49756765365600586, + "step": 6218 + }, + { + "epoch": 1.4336599423631124, + "grad_norm": 1.367214788762715, + "learning_rate": 4.077216418723246e-07, + "loss": 0.46542513370513916, + "step": 6219 + }, + { + "epoch": 1.4338904899135447, + "grad_norm": 1.4834393486713002, + "learning_rate": 4.0741456452258307e-07, + "loss": 0.477664589881897, + "step": 6220 + }, + { + "epoch": 1.434121037463977, + "grad_norm": 1.7561511956990303, + "learning_rate": 4.0710757326934074e-07, + "loss": 0.43805643916130066, + "step": 6221 + }, + { + "epoch": 1.4343515850144093, + "grad_norm": 1.4656378770889396, + "learning_rate": 4.0680066815719995e-07, + "loss": 0.42762941122055054, + "step": 6222 + }, + { + "epoch": 1.4345821325648416, + "grad_norm": 1.3924764249205592, + "learning_rate": 4.064938492307515e-07, + "loss": 0.39931389689445496, + "step": 6223 + }, + { + "epoch": 1.4348126801152739, + "grad_norm": 1.7265894992285387, + "learning_rate": 4.0618711653457216e-07, + "loss": 0.4285869002342224, + "step": 6224 + }, + { + "epoch": 1.4350432276657061, + "grad_norm": 1.509947149065422, + "learning_rate": 4.05880470113228e-07, + "loss": 0.36583176255226135, + "step": 6225 + }, + { + "epoch": 1.4352737752161384, + "grad_norm": 1.8527675270132944, + "learning_rate": 4.055739100112705e-07, + "loss": 0.6161515712738037, + "step": 6226 + }, + { + "epoch": 1.4355043227665707, + "grad_norm": 1.952912084401196, + "learning_rate": 4.0526743627324064e-07, + "loss": 0.5286136269569397, + "step": 6227 + }, + { + "epoch": 1.435734870317003, + "grad_norm": 1.5581410654741574, + "learning_rate": 4.0496104894366547e-07, + "loss": 0.5855327844619751, + "step": 6228 + }, + { + "epoch": 1.4359654178674353, + "grad_norm": 1.4944372815532387, + "learning_rate": 4.0465474806705937e-07, + "loss": 0.47179684042930603, + "step": 6229 + }, + { + "epoch": 1.4361959654178675, + "grad_norm": 1.4543699646458046, + "learning_rate": 4.043485336879252e-07, + "loss": 0.3799450993537903, + "step": 6230 + }, + { + "epoch": 1.4364265129682998, + "grad_norm": 1.6053005008404755, + "learning_rate": 4.040424058507529e-07, + "loss": 0.49765193462371826, + "step": 6231 + }, + { + "epoch": 1.436657060518732, + "grad_norm": 1.729005204741036, + "learning_rate": 4.0373636460001917e-07, + "loss": 0.4718540906906128, + "step": 6232 + }, + { + "epoch": 1.4368876080691644, + "grad_norm": 1.6500825373448902, + "learning_rate": 4.034304099801891e-07, + "loss": 0.566913366317749, + "step": 6233 + }, + { + "epoch": 1.4371181556195967, + "grad_norm": 1.4341252322387557, + "learning_rate": 4.0312454203571455e-07, + "loss": 0.3900049924850464, + "step": 6234 + }, + { + "epoch": 1.437348703170029, + "grad_norm": 1.955668386191478, + "learning_rate": 4.028187608110345e-07, + "loss": 0.3742540180683136, + "step": 6235 + }, + { + "epoch": 1.4375792507204612, + "grad_norm": 1.5530650034379911, + "learning_rate": 4.025130663505765e-07, + "loss": 0.4583272933959961, + "step": 6236 + }, + { + "epoch": 1.4378097982708935, + "grad_norm": 2.011266106765332, + "learning_rate": 4.0220745869875394e-07, + "loss": 0.4786511957645416, + "step": 6237 + }, + { + "epoch": 1.4380403458213258, + "grad_norm": 1.5855947294064638, + "learning_rate": 4.0190193789996907e-07, + "loss": 0.5637160539627075, + "step": 6238 + }, + { + "epoch": 1.438270893371758, + "grad_norm": 1.6000199656569591, + "learning_rate": 4.015965039986111e-07, + "loss": 0.5228704214096069, + "step": 6239 + }, + { + "epoch": 1.4385014409221901, + "grad_norm": 1.5727211756275186, + "learning_rate": 4.012911570390559e-07, + "loss": 0.4603196084499359, + "step": 6240 + }, + { + "epoch": 1.4387319884726224, + "grad_norm": 1.7564085712912887, + "learning_rate": 4.0098589706566743e-07, + "loss": 0.4373057186603546, + "step": 6241 + }, + { + "epoch": 1.4389625360230547, + "grad_norm": 1.3059861405345108, + "learning_rate": 4.006807241227964e-07, + "loss": 0.3848613500595093, + "step": 6242 + }, + { + "epoch": 1.439193083573487, + "grad_norm": 1.773866478707716, + "learning_rate": 4.0037563825478147e-07, + "loss": 0.45864033699035645, + "step": 6243 + }, + { + "epoch": 1.4394236311239192, + "grad_norm": 1.7833304023969065, + "learning_rate": 4.0007063950594887e-07, + "loss": 0.5278322696685791, + "step": 6244 + }, + { + "epoch": 1.4396541786743515, + "grad_norm": 1.652601767877407, + "learning_rate": 3.9976572792061115e-07, + "loss": 0.500469982624054, + "step": 6245 + }, + { + "epoch": 1.4398847262247838, + "grad_norm": 1.6838341724119685, + "learning_rate": 3.994609035430694e-07, + "loss": 0.487979531288147, + "step": 6246 + }, + { + "epoch": 1.440115273775216, + "grad_norm": 1.6309708233356133, + "learning_rate": 3.9915616641761096e-07, + "loss": 0.5660527944564819, + "step": 6247 + }, + { + "epoch": 1.4403458213256484, + "grad_norm": 1.6648921828279533, + "learning_rate": 3.988515165885108e-07, + "loss": 0.41596394777297974, + "step": 6248 + }, + { + "epoch": 1.4405763688760806, + "grad_norm": 1.8488691582565622, + "learning_rate": 3.9854695410003204e-07, + "loss": 0.41838061809539795, + "step": 6249 + }, + { + "epoch": 1.440806916426513, + "grad_norm": 1.8635370267148315, + "learning_rate": 3.982424789964237e-07, + "loss": 0.4344887137413025, + "step": 6250 + }, + { + "epoch": 1.4410374639769452, + "grad_norm": 1.596955676535653, + "learning_rate": 3.979380913219231e-07, + "loss": 0.44062328338623047, + "step": 6251 + }, + { + "epoch": 1.4412680115273775, + "grad_norm": 1.8121811553129317, + "learning_rate": 3.976337911207552e-07, + "loss": 0.4305090308189392, + "step": 6252 + }, + { + "epoch": 1.4414985590778098, + "grad_norm": 1.5838295963307605, + "learning_rate": 3.9732957843713113e-07, + "loss": 0.502083957195282, + "step": 6253 + }, + { + "epoch": 1.441729106628242, + "grad_norm": 1.427531595240151, + "learning_rate": 3.9702545331524986e-07, + "loss": 0.47407883405685425, + "step": 6254 + }, + { + "epoch": 1.4419596541786743, + "grad_norm": 1.7242438620668297, + "learning_rate": 3.967214157992972e-07, + "loss": 0.4704144597053528, + "step": 6255 + }, + { + "epoch": 1.4421902017291066, + "grad_norm": 1.451462667384279, + "learning_rate": 3.9641746593344705e-07, + "loss": 0.4758627116680145, + "step": 6256 + }, + { + "epoch": 1.442420749279539, + "grad_norm": 1.5373451481480342, + "learning_rate": 3.961136037618605e-07, + "loss": 0.4934813976287842, + "step": 6257 + }, + { + "epoch": 1.4426512968299712, + "grad_norm": 1.2690471851311347, + "learning_rate": 3.958098293286849e-07, + "loss": 0.4143943190574646, + "step": 6258 + }, + { + "epoch": 1.4428818443804035, + "grad_norm": 1.654088578407426, + "learning_rate": 3.9550614267805613e-07, + "loss": 0.5096204876899719, + "step": 6259 + }, + { + "epoch": 1.4431123919308357, + "grad_norm": 1.8140254863740195, + "learning_rate": 3.9520254385409647e-07, + "loss": 0.5065678358078003, + "step": 6260 + }, + { + "epoch": 1.443342939481268, + "grad_norm": 1.8200536333409338, + "learning_rate": 3.948990329009152e-07, + "loss": 0.5405898094177246, + "step": 6261 + }, + { + "epoch": 1.4435734870317003, + "grad_norm": 1.414774723719948, + "learning_rate": 3.945956098626101e-07, + "loss": 0.555136501789093, + "step": 6262 + }, + { + "epoch": 1.4438040345821326, + "grad_norm": 1.4749429352123176, + "learning_rate": 3.9429227478326466e-07, + "loss": 0.4158381223678589, + "step": 6263 + }, + { + "epoch": 1.4440345821325649, + "grad_norm": 1.8063967161180363, + "learning_rate": 3.9398902770695065e-07, + "loss": 0.5013213157653809, + "step": 6264 + }, + { + "epoch": 1.4442651296829971, + "grad_norm": 1.6664196354264296, + "learning_rate": 3.936858686777269e-07, + "loss": 0.4610293209552765, + "step": 6265 + }, + { + "epoch": 1.4444956772334294, + "grad_norm": 1.5189284441427042, + "learning_rate": 3.933827977396392e-07, + "loss": 0.4541108310222626, + "step": 6266 + }, + { + "epoch": 1.4447262247838617, + "grad_norm": 1.7447582201728986, + "learning_rate": 3.9307981493672017e-07, + "loss": 0.5167892575263977, + "step": 6267 + }, + { + "epoch": 1.444956772334294, + "grad_norm": 1.7117416076745173, + "learning_rate": 3.927769203129907e-07, + "loss": 0.49022093415260315, + "step": 6268 + }, + { + "epoch": 1.4451873198847263, + "grad_norm": 1.551473147802093, + "learning_rate": 3.924741139124574e-07, + "loss": 0.5062391757965088, + "step": 6269 + }, + { + "epoch": 1.4454178674351585, + "grad_norm": 1.4758917732598034, + "learning_rate": 3.9217139577911586e-07, + "loss": 0.38461071252822876, + "step": 6270 + }, + { + "epoch": 1.4456484149855908, + "grad_norm": 1.6377148611250167, + "learning_rate": 3.9186876595694706e-07, + "loss": 0.48669880628585815, + "step": 6271 + }, + { + "epoch": 1.445878962536023, + "grad_norm": 1.560585881225336, + "learning_rate": 3.915662244899206e-07, + "loss": 0.4502665400505066, + "step": 6272 + }, + { + "epoch": 1.4461095100864554, + "grad_norm": 1.8121212441561765, + "learning_rate": 3.912637714219923e-07, + "loss": 0.46156373620033264, + "step": 6273 + }, + { + "epoch": 1.4463400576368877, + "grad_norm": 1.6312679824277865, + "learning_rate": 3.909614067971051e-07, + "loss": 0.45263969898223877, + "step": 6274 + }, + { + "epoch": 1.44657060518732, + "grad_norm": 1.5703236593794554, + "learning_rate": 3.906591306591899e-07, + "loss": 0.4911407232284546, + "step": 6275 + }, + { + "epoch": 1.4468011527377522, + "grad_norm": 1.6102563776848093, + "learning_rate": 3.903569430521644e-07, + "loss": 0.4695231318473816, + "step": 6276 + }, + { + "epoch": 1.4470317002881845, + "grad_norm": 1.6730607470278018, + "learning_rate": 3.9005484401993314e-07, + "loss": 0.43057340383529663, + "step": 6277 + }, + { + "epoch": 1.4472622478386168, + "grad_norm": 1.7564179291630202, + "learning_rate": 3.897528336063879e-07, + "loss": 0.49983319640159607, + "step": 6278 + }, + { + "epoch": 1.447492795389049, + "grad_norm": 1.5495814925532476, + "learning_rate": 3.8945091185540725e-07, + "loss": 0.46399611234664917, + "step": 6279 + }, + { + "epoch": 1.4477233429394814, + "grad_norm": 1.6622842234199082, + "learning_rate": 3.891490788108578e-07, + "loss": 0.44408589601516724, + "step": 6280 + }, + { + "epoch": 1.4479538904899134, + "grad_norm": 1.922048473357172, + "learning_rate": 3.888473345165929e-07, + "loss": 0.3725231885910034, + "step": 6281 + }, + { + "epoch": 1.4481844380403457, + "grad_norm": 1.7108395707282047, + "learning_rate": 3.885456790164523e-07, + "loss": 0.4412611722946167, + "step": 6282 + }, + { + "epoch": 1.448414985590778, + "grad_norm": 1.5115694649774922, + "learning_rate": 3.8824411235426404e-07, + "loss": 0.48282700777053833, + "step": 6283 + }, + { + "epoch": 1.4486455331412103, + "grad_norm": 1.4372123625640567, + "learning_rate": 3.8794263457384226e-07, + "loss": 0.443182110786438, + "step": 6284 + }, + { + "epoch": 1.4488760806916425, + "grad_norm": 1.6465099767718465, + "learning_rate": 3.8764124571898826e-07, + "loss": 0.5350714325904846, + "step": 6285 + }, + { + "epoch": 1.4491066282420748, + "grad_norm": 1.621326131358088, + "learning_rate": 3.8733994583349136e-07, + "loss": 0.48168644309043884, + "step": 6286 + }, + { + "epoch": 1.449337175792507, + "grad_norm": 2.0377729139688197, + "learning_rate": 3.870387349611266e-07, + "loss": 0.5208690166473389, + "step": 6287 + }, + { + "epoch": 1.4495677233429394, + "grad_norm": 1.5878874835937575, + "learning_rate": 3.867376131456571e-07, + "loss": 0.39023950695991516, + "step": 6288 + }, + { + "epoch": 1.4497982708933717, + "grad_norm": 1.6699418552277543, + "learning_rate": 3.864365804308333e-07, + "loss": 0.5270309448242188, + "step": 6289 + }, + { + "epoch": 1.450028818443804, + "grad_norm": 1.7984598642312444, + "learning_rate": 3.861356368603914e-07, + "loss": 0.46370822191238403, + "step": 6290 + }, + { + "epoch": 1.4502593659942362, + "grad_norm": 1.629232628741741, + "learning_rate": 3.8583478247805554e-07, + "loss": 0.4571373462677002, + "step": 6291 + }, + { + "epoch": 1.4504899135446685, + "grad_norm": 1.8809560181882357, + "learning_rate": 3.855340173275365e-07, + "loss": 0.5077657103538513, + "step": 6292 + }, + { + "epoch": 1.4507204610951008, + "grad_norm": 1.4749726509433596, + "learning_rate": 3.852333414525326e-07, + "loss": 0.47071346640586853, + "step": 6293 + }, + { + "epoch": 1.450951008645533, + "grad_norm": 1.6663865831998346, + "learning_rate": 3.8493275489672914e-07, + "loss": 0.5249844789505005, + "step": 6294 + }, + { + "epoch": 1.4511815561959653, + "grad_norm": 1.6297516532596577, + "learning_rate": 3.846322577037977e-07, + "loss": 0.4389895796775818, + "step": 6295 + }, + { + "epoch": 1.4514121037463976, + "grad_norm": 1.9107463185916174, + "learning_rate": 3.8433184991739797e-07, + "loss": 0.47682899236679077, + "step": 6296 + }, + { + "epoch": 1.45164265129683, + "grad_norm": 1.583134869267576, + "learning_rate": 3.8403153158117585e-07, + "loss": 0.5012357831001282, + "step": 6297 + }, + { + "epoch": 1.4518731988472622, + "grad_norm": 1.7933259402091364, + "learning_rate": 3.83731302738764e-07, + "loss": 0.4444109797477722, + "step": 6298 + }, + { + "epoch": 1.4521037463976945, + "grad_norm": 1.6273839601687068, + "learning_rate": 3.8343116343378333e-07, + "loss": 0.401306688785553, + "step": 6299 + }, + { + "epoch": 1.4523342939481267, + "grad_norm": 1.5929632009179213, + "learning_rate": 3.831311137098402e-07, + "loss": 0.44299593567848206, + "step": 6300 + }, + { + "epoch": 1.452564841498559, + "grad_norm": 1.5423692013544683, + "learning_rate": 3.828311536105291e-07, + "loss": 0.4148893654346466, + "step": 6301 + }, + { + "epoch": 1.4527953890489913, + "grad_norm": 1.8002510409281733, + "learning_rate": 3.825312831794314e-07, + "loss": 0.4563109278678894, + "step": 6302 + }, + { + "epoch": 1.4530259365994236, + "grad_norm": 1.6337296475781216, + "learning_rate": 3.822315024601149e-07, + "loss": 0.4540612995624542, + "step": 6303 + }, + { + "epoch": 1.4532564841498559, + "grad_norm": 1.7199257308385885, + "learning_rate": 3.819318114961343e-07, + "loss": 0.4511542320251465, + "step": 6304 + }, + { + "epoch": 1.4534870317002881, + "grad_norm": 1.8322950511376024, + "learning_rate": 3.816322103310321e-07, + "loss": 0.47727471590042114, + "step": 6305 + }, + { + "epoch": 1.4537175792507204, + "grad_norm": 1.674326740334363, + "learning_rate": 3.8133269900833664e-07, + "loss": 0.4711531400680542, + "step": 6306 + }, + { + "epoch": 1.4539481268011527, + "grad_norm": 1.4365052516515433, + "learning_rate": 3.8103327757156454e-07, + "loss": 0.4542125463485718, + "step": 6307 + }, + { + "epoch": 1.454178674351585, + "grad_norm": 1.7375177208970245, + "learning_rate": 3.807339460642178e-07, + "loss": 0.5262948274612427, + "step": 6308 + }, + { + "epoch": 1.4544092219020173, + "grad_norm": 1.4962137087496394, + "learning_rate": 3.804347045297871e-07, + "loss": 0.5131025314331055, + "step": 6309 + }, + { + "epoch": 1.4546397694524495, + "grad_norm": 1.8610634194791733, + "learning_rate": 3.801355530117485e-07, + "loss": 0.41658881306648254, + "step": 6310 + }, + { + "epoch": 1.4548703170028818, + "grad_norm": 1.7539949314322378, + "learning_rate": 3.7983649155356533e-07, + "loss": 0.4876325726509094, + "step": 6311 + }, + { + "epoch": 1.455100864553314, + "grad_norm": 1.5534286551953016, + "learning_rate": 3.7953752019868865e-07, + "loss": 0.4055835008621216, + "step": 6312 + }, + { + "epoch": 1.4553314121037464, + "grad_norm": 1.4355176124522515, + "learning_rate": 3.79238638990556e-07, + "loss": 0.45018666982650757, + "step": 6313 + }, + { + "epoch": 1.4555619596541787, + "grad_norm": 2.33610396723976, + "learning_rate": 3.7893984797259113e-07, + "loss": 0.5295370817184448, + "step": 6314 + }, + { + "epoch": 1.455792507204611, + "grad_norm": 1.6012578754804487, + "learning_rate": 3.7864114718820594e-07, + "loss": 0.44755294919013977, + "step": 6315 + }, + { + "epoch": 1.4560230547550432, + "grad_norm": 1.4087585915017262, + "learning_rate": 3.783425366807982e-07, + "loss": 0.3790748119354248, + "step": 6316 + }, + { + "epoch": 1.4562536023054755, + "grad_norm": 1.4371137527160003, + "learning_rate": 3.780440164937525e-07, + "loss": 0.44833457469940186, + "step": 6317 + }, + { + "epoch": 1.4564841498559078, + "grad_norm": 1.5127457638273085, + "learning_rate": 3.7774558667044154e-07, + "loss": 0.486098051071167, + "step": 6318 + }, + { + "epoch": 1.45671469740634, + "grad_norm": 1.6312165307012176, + "learning_rate": 3.774472472542233e-07, + "loss": 0.5650969743728638, + "step": 6319 + }, + { + "epoch": 1.4569452449567724, + "grad_norm": 1.576011420615042, + "learning_rate": 3.771489982884437e-07, + "loss": 0.5329450368881226, + "step": 6320 + }, + { + "epoch": 1.4571757925072046, + "grad_norm": 1.275904497817475, + "learning_rate": 3.768508398164356e-07, + "loss": 0.4540603756904602, + "step": 6321 + }, + { + "epoch": 1.457406340057637, + "grad_norm": 1.7203841934704942, + "learning_rate": 3.765527718815181e-07, + "loss": 0.4849478304386139, + "step": 6322 + }, + { + "epoch": 1.4576368876080692, + "grad_norm": 1.8368644261234324, + "learning_rate": 3.7625479452699714e-07, + "loss": 0.5025255680084229, + "step": 6323 + }, + { + "epoch": 1.4578674351585015, + "grad_norm": 1.5544842869111484, + "learning_rate": 3.7595690779616554e-07, + "loss": 0.4498249888420105, + "step": 6324 + }, + { + "epoch": 1.4580979827089338, + "grad_norm": 1.4645542423324267, + "learning_rate": 3.7565911173230347e-07, + "loss": 0.43486863374710083, + "step": 6325 + }, + { + "epoch": 1.458328530259366, + "grad_norm": 1.4567650293051104, + "learning_rate": 3.7536140637867784e-07, + "loss": 0.45068681240081787, + "step": 6326 + }, + { + "epoch": 1.4585590778097983, + "grad_norm": 1.640676579677221, + "learning_rate": 3.750637917785415e-07, + "loss": 0.48379603028297424, + "step": 6327 + }, + { + "epoch": 1.4587896253602306, + "grad_norm": 1.5536049420371754, + "learning_rate": 3.7476626797513564e-07, + "loss": 0.4718289375305176, + "step": 6328 + }, + { + "epoch": 1.4590201729106629, + "grad_norm": 1.4598459872980478, + "learning_rate": 3.744688350116868e-07, + "loss": 0.4820924997329712, + "step": 6329 + }, + { + "epoch": 1.4592507204610952, + "grad_norm": 1.4067339533366963, + "learning_rate": 3.741714929314086e-07, + "loss": 0.4419173002243042, + "step": 6330 + }, + { + "epoch": 1.4594812680115274, + "grad_norm": 1.5713519098289923, + "learning_rate": 3.7387424177750237e-07, + "loss": 0.49854522943496704, + "step": 6331 + }, + { + "epoch": 1.4597118155619597, + "grad_norm": 1.5875195079981788, + "learning_rate": 3.7357708159315514e-07, + "loss": 0.48068612813949585, + "step": 6332 + }, + { + "epoch": 1.459942363112392, + "grad_norm": 1.7026609781525415, + "learning_rate": 3.732800124215414e-07, + "loss": 0.511599063873291, + "step": 6333 + }, + { + "epoch": 1.4601729106628243, + "grad_norm": 1.4797586406170442, + "learning_rate": 3.7298303430582245e-07, + "loss": 0.5052134990692139, + "step": 6334 + }, + { + "epoch": 1.4604034582132566, + "grad_norm": 1.7667173800076938, + "learning_rate": 3.7268614728914606e-07, + "loss": 0.4742302894592285, + "step": 6335 + }, + { + "epoch": 1.4606340057636888, + "grad_norm": 1.403636658794488, + "learning_rate": 3.7238935141464644e-07, + "loss": 0.5089839696884155, + "step": 6336 + }, + { + "epoch": 1.4608645533141211, + "grad_norm": 2.0880344022264437, + "learning_rate": 3.720926467254449e-07, + "loss": 0.5136945247650146, + "step": 6337 + }, + { + "epoch": 1.4610951008645534, + "grad_norm": 1.5636116190285216, + "learning_rate": 3.7179603326464993e-07, + "loss": 0.3947451710700989, + "step": 6338 + }, + { + "epoch": 1.4613256484149857, + "grad_norm": 1.7992854560678408, + "learning_rate": 3.714995110753565e-07, + "loss": 0.48703646659851074, + "step": 6339 + }, + { + "epoch": 1.461556195965418, + "grad_norm": 1.6901577747405614, + "learning_rate": 3.712030802006455e-07, + "loss": 0.4713754951953888, + "step": 6340 + }, + { + "epoch": 1.4617867435158503, + "grad_norm": 1.47391484527644, + "learning_rate": 3.709067406835862e-07, + "loss": 0.4496157169342041, + "step": 6341 + }, + { + "epoch": 1.4620172910662825, + "grad_norm": 1.7559130114451649, + "learning_rate": 3.706104925672331e-07, + "loss": 0.5094351768493652, + "step": 6342 + }, + { + "epoch": 1.4622478386167148, + "grad_norm": 1.274246430476679, + "learning_rate": 3.7031433589462766e-07, + "loss": 0.46877321600914, + "step": 6343 + }, + { + "epoch": 1.462478386167147, + "grad_norm": 1.9260152638351333, + "learning_rate": 3.700182707087991e-07, + "loss": 0.3847515881061554, + "step": 6344 + }, + { + "epoch": 1.4627089337175794, + "grad_norm": 1.453999915283592, + "learning_rate": 3.697222970527618e-07, + "loss": 0.5548876523971558, + "step": 6345 + }, + { + "epoch": 1.4629394812680117, + "grad_norm": 1.4413433555035606, + "learning_rate": 3.694264149695182e-07, + "loss": 0.47204387187957764, + "step": 6346 + }, + { + "epoch": 1.463170028818444, + "grad_norm": 1.6869492340215804, + "learning_rate": 3.6913062450205714e-07, + "loss": 0.47992634773254395, + "step": 6347 + }, + { + "epoch": 1.4634005763688762, + "grad_norm": 1.5705466032049853, + "learning_rate": 3.688349256933534e-07, + "loss": 0.5625392198562622, + "step": 6348 + }, + { + "epoch": 1.4636311239193083, + "grad_norm": 1.4260083123464304, + "learning_rate": 3.685393185863689e-07, + "loss": 0.3677716851234436, + "step": 6349 + }, + { + "epoch": 1.4638616714697406, + "grad_norm": 1.5900275170626974, + "learning_rate": 3.682438032240527e-07, + "loss": 0.3630062937736511, + "step": 6350 + }, + { + "epoch": 1.4640922190201728, + "grad_norm": 1.4447444002714263, + "learning_rate": 3.6794837964933943e-07, + "loss": 0.5151525139808655, + "step": 6351 + }, + { + "epoch": 1.4643227665706051, + "grad_norm": 1.6404693772770675, + "learning_rate": 3.6765304790515193e-07, + "loss": 0.37844717502593994, + "step": 6352 + }, + { + "epoch": 1.4645533141210374, + "grad_norm": 1.3380095823905058, + "learning_rate": 3.673578080343981e-07, + "loss": 0.4164416193962097, + "step": 6353 + }, + { + "epoch": 1.4647838616714697, + "grad_norm": 1.6805497958260283, + "learning_rate": 3.670626600799739e-07, + "loss": 0.5269230008125305, + "step": 6354 + }, + { + "epoch": 1.465014409221902, + "grad_norm": 1.647915234149178, + "learning_rate": 3.667676040847607e-07, + "loss": 0.4049336612224579, + "step": 6355 + }, + { + "epoch": 1.4652449567723342, + "grad_norm": 1.6287952578421923, + "learning_rate": 3.66472640091627e-07, + "loss": 0.4874676465988159, + "step": 6356 + }, + { + "epoch": 1.4654755043227665, + "grad_norm": 1.359427032107508, + "learning_rate": 3.6617776814342826e-07, + "loss": 0.44552722573280334, + "step": 6357 + }, + { + "epoch": 1.4657060518731988, + "grad_norm": 1.6381292926536035, + "learning_rate": 3.6588298828300655e-07, + "loss": 0.46151018142700195, + "step": 6358 + }, + { + "epoch": 1.465936599423631, + "grad_norm": 1.759691893404636, + "learning_rate": 3.655883005531898e-07, + "loss": 0.4587894082069397, + "step": 6359 + }, + { + "epoch": 1.4661671469740634, + "grad_norm": 1.8298309075681234, + "learning_rate": 3.6529370499679367e-07, + "loss": 0.5404157638549805, + "step": 6360 + }, + { + "epoch": 1.4663976945244956, + "grad_norm": 1.3135400416651812, + "learning_rate": 3.649992016566195e-07, + "loss": 0.42960917949676514, + "step": 6361 + }, + { + "epoch": 1.466628242074928, + "grad_norm": 1.9123580665558777, + "learning_rate": 3.647047905754551e-07, + "loss": 0.550566554069519, + "step": 6362 + }, + { + "epoch": 1.4668587896253602, + "grad_norm": 1.5220633344951369, + "learning_rate": 3.644104717960761e-07, + "loss": 0.42004531621932983, + "step": 6363 + }, + { + "epoch": 1.4670893371757925, + "grad_norm": 2.0037836879571413, + "learning_rate": 3.641162453612434e-07, + "loss": 0.4716450572013855, + "step": 6364 + }, + { + "epoch": 1.4673198847262248, + "grad_norm": 1.9961081968547811, + "learning_rate": 3.6382211131370534e-07, + "loss": 0.4558556079864502, + "step": 6365 + }, + { + "epoch": 1.467550432276657, + "grad_norm": 1.79825755837218, + "learning_rate": 3.6352806969619667e-07, + "loss": 0.51080721616745, + "step": 6366 + }, + { + "epoch": 1.4677809798270893, + "grad_norm": 1.867330178377139, + "learning_rate": 3.6323412055143843e-07, + "loss": 0.35964512825012207, + "step": 6367 + }, + { + "epoch": 1.4680115273775216, + "grad_norm": 1.7497633832514439, + "learning_rate": 3.629402639221384e-07, + "loss": 0.4457089900970459, + "step": 6368 + }, + { + "epoch": 1.4682420749279539, + "grad_norm": 1.652033538972312, + "learning_rate": 3.626464998509905e-07, + "loss": 0.38707441091537476, + "step": 6369 + }, + { + "epoch": 1.4684726224783862, + "grad_norm": 1.5451052503855718, + "learning_rate": 3.623528283806758e-07, + "loss": 0.5018205642700195, + "step": 6370 + }, + { + "epoch": 1.4687031700288184, + "grad_norm": 1.6247787287129325, + "learning_rate": 3.620592495538622e-07, + "loss": 0.4383612871170044, + "step": 6371 + }, + { + "epoch": 1.4689337175792507, + "grad_norm": 1.4673351855920176, + "learning_rate": 3.6176576341320297e-07, + "loss": 0.46968695521354675, + "step": 6372 + }, + { + "epoch": 1.469164265129683, + "grad_norm": 1.5714223819434983, + "learning_rate": 3.6147237000133925e-07, + "loss": 0.45279714465141296, + "step": 6373 + }, + { + "epoch": 1.4693948126801153, + "grad_norm": 1.926925770479076, + "learning_rate": 3.6117906936089757e-07, + "loss": 0.4974134862422943, + "step": 6374 + }, + { + "epoch": 1.4696253602305476, + "grad_norm": 1.89913152070155, + "learning_rate": 3.608858615344914e-07, + "loss": 0.49392572045326233, + "step": 6375 + }, + { + "epoch": 1.4698559077809799, + "grad_norm": 1.8956623188876287, + "learning_rate": 3.605927465647213e-07, + "loss": 0.45273101329803467, + "step": 6376 + }, + { + "epoch": 1.4700864553314121, + "grad_norm": 1.715042184201244, + "learning_rate": 3.602997244941731e-07, + "loss": 0.45529431104660034, + "step": 6377 + }, + { + "epoch": 1.4703170028818444, + "grad_norm": 1.8449740006889035, + "learning_rate": 3.600067953654203e-07, + "loss": 0.5191174745559692, + "step": 6378 + }, + { + "epoch": 1.4705475504322767, + "grad_norm": 1.6198609763992475, + "learning_rate": 3.5971395922102276e-07, + "loss": 0.44115346670150757, + "step": 6379 + }, + { + "epoch": 1.470778097982709, + "grad_norm": 1.4551263848150924, + "learning_rate": 3.5942121610352616e-07, + "loss": 0.3980026841163635, + "step": 6380 + }, + { + "epoch": 1.4710086455331413, + "grad_norm": 1.8677194748312773, + "learning_rate": 3.5912856605546303e-07, + "loss": 0.4519263505935669, + "step": 6381 + }, + { + "epoch": 1.4712391930835735, + "grad_norm": 1.9194690118602158, + "learning_rate": 3.5883600911935206e-07, + "loss": 0.47958600521087646, + "step": 6382 + }, + { + "epoch": 1.4714697406340058, + "grad_norm": 1.495711078011954, + "learning_rate": 3.5854354533769915e-07, + "loss": 0.5039705038070679, + "step": 6383 + }, + { + "epoch": 1.471700288184438, + "grad_norm": 1.6950047090453357, + "learning_rate": 3.582511747529965e-07, + "loss": 0.4200620651245117, + "step": 6384 + }, + { + "epoch": 1.4719308357348704, + "grad_norm": 1.746616763124911, + "learning_rate": 3.579588974077218e-07, + "loss": 0.44767335057258606, + "step": 6385 + }, + { + "epoch": 1.4721613832853027, + "grad_norm": 1.9564062341779582, + "learning_rate": 3.5766671334434053e-07, + "loss": 0.4160998463630676, + "step": 6386 + }, + { + "epoch": 1.472391930835735, + "grad_norm": 2.0148129925088436, + "learning_rate": 3.5737462260530384e-07, + "loss": 0.4349063038825989, + "step": 6387 + }, + { + "epoch": 1.4726224783861672, + "grad_norm": 1.379105631547287, + "learning_rate": 3.570826252330491e-07, + "loss": 0.44622567296028137, + "step": 6388 + }, + { + "epoch": 1.4728530259365995, + "grad_norm": 1.6792585009694532, + "learning_rate": 3.56790721270001e-07, + "loss": 0.5509021878242493, + "step": 6389 + }, + { + "epoch": 1.4730835734870318, + "grad_norm": 1.589744434583523, + "learning_rate": 3.5649891075856963e-07, + "loss": 0.4545692801475525, + "step": 6390 + }, + { + "epoch": 1.4733141210374638, + "grad_norm": 1.5193646777338217, + "learning_rate": 3.5620719374115237e-07, + "loss": 0.45771169662475586, + "step": 6391 + }, + { + "epoch": 1.4735446685878961, + "grad_norm": 1.5457460822793552, + "learning_rate": 3.559155702601333e-07, + "loss": 0.40960395336151123, + "step": 6392 + }, + { + "epoch": 1.4737752161383284, + "grad_norm": 1.5018594999569612, + "learning_rate": 3.5562404035788084e-07, + "loss": 0.4792563319206238, + "step": 6393 + }, + { + "epoch": 1.4740057636887607, + "grad_norm": 1.9530047641946722, + "learning_rate": 3.5533260407675205e-07, + "loss": 0.4988730251789093, + "step": 6394 + }, + { + "epoch": 1.474236311239193, + "grad_norm": 1.4554877163973188, + "learning_rate": 3.5504126145908985e-07, + "loss": 0.49683940410614014, + "step": 6395 + }, + { + "epoch": 1.4744668587896252, + "grad_norm": 2.220410748790677, + "learning_rate": 3.547500125472227e-07, + "loss": 0.515201985836029, + "step": 6396 + }, + { + "epoch": 1.4746974063400575, + "grad_norm": 1.6620796307322583, + "learning_rate": 3.544588573834666e-07, + "loss": 0.5007616877555847, + "step": 6397 + }, + { + "epoch": 1.4749279538904898, + "grad_norm": 1.5582513723700742, + "learning_rate": 3.5416779601012316e-07, + "loss": 0.41666027903556824, + "step": 6398 + }, + { + "epoch": 1.475158501440922, + "grad_norm": 1.5507523525405247, + "learning_rate": 3.538768284694801e-07, + "loss": 0.4753478169441223, + "step": 6399 + }, + { + "epoch": 1.4753890489913544, + "grad_norm": 1.5959517593070658, + "learning_rate": 3.535859548038128e-07, + "loss": 0.39610493183135986, + "step": 6400 + }, + { + "epoch": 1.4756195965417866, + "grad_norm": 1.7175570820294603, + "learning_rate": 3.5329517505538133e-07, + "loss": 0.49294179677963257, + "step": 6401 + }, + { + "epoch": 1.475850144092219, + "grad_norm": 1.4777974691524935, + "learning_rate": 3.5300448926643345e-07, + "loss": 0.5709241628646851, + "step": 6402 + }, + { + "epoch": 1.4760806916426512, + "grad_norm": 1.6977497595363569, + "learning_rate": 3.52713897479203e-07, + "loss": 0.5481114387512207, + "step": 6403 + }, + { + "epoch": 1.4763112391930835, + "grad_norm": 1.293897371508969, + "learning_rate": 3.524233997359097e-07, + "loss": 0.39064526557922363, + "step": 6404 + }, + { + "epoch": 1.4765417867435158, + "grad_norm": 1.7655897829606124, + "learning_rate": 3.521329960787598e-07, + "loss": 0.43510839343070984, + "step": 6405 + }, + { + "epoch": 1.476772334293948, + "grad_norm": 1.6197138314599864, + "learning_rate": 3.518426865499456e-07, + "loss": 0.5212624073028564, + "step": 6406 + }, + { + "epoch": 1.4770028818443803, + "grad_norm": 1.3980445990441086, + "learning_rate": 3.5155247119164646e-07, + "loss": 0.44855934381484985, + "step": 6407 + }, + { + "epoch": 1.4772334293948126, + "grad_norm": 1.6879035741159887, + "learning_rate": 3.512623500460279e-07, + "loss": 0.45301520824432373, + "step": 6408 + }, + { + "epoch": 1.477463976945245, + "grad_norm": 1.7659988453507556, + "learning_rate": 3.5097232315524074e-07, + "loss": 0.44875574111938477, + "step": 6409 + }, + { + "epoch": 1.4776945244956772, + "grad_norm": 1.6352648357517579, + "learning_rate": 3.506823905614238e-07, + "loss": 0.4552629590034485, + "step": 6410 + }, + { + "epoch": 1.4779250720461095, + "grad_norm": 1.634142332572928, + "learning_rate": 3.503925523067007e-07, + "loss": 0.5640658140182495, + "step": 6411 + }, + { + "epoch": 1.4781556195965417, + "grad_norm": 1.689760519647161, + "learning_rate": 3.501028084331817e-07, + "loss": 0.4508011043071747, + "step": 6412 + }, + { + "epoch": 1.478386167146974, + "grad_norm": 1.6675433414567795, + "learning_rate": 3.4981315898296437e-07, + "loss": 0.4895268678665161, + "step": 6413 + }, + { + "epoch": 1.4786167146974063, + "grad_norm": 1.5223819083256382, + "learning_rate": 3.495236039981307e-07, + "loss": 0.6094552874565125, + "step": 6414 + }, + { + "epoch": 1.4788472622478386, + "grad_norm": 1.8997558277564615, + "learning_rate": 3.492341435207509e-07, + "loss": 0.4547635316848755, + "step": 6415 + }, + { + "epoch": 1.4790778097982709, + "grad_norm": 1.3581768371839973, + "learning_rate": 3.489447775928803e-07, + "loss": 0.3610043227672577, + "step": 6416 + }, + { + "epoch": 1.4793083573487031, + "grad_norm": 1.6041394568745782, + "learning_rate": 3.4865550625656094e-07, + "loss": 0.5122381448745728, + "step": 6417 + }, + { + "epoch": 1.4795389048991354, + "grad_norm": 1.867753113062549, + "learning_rate": 3.483663295538206e-07, + "loss": 0.49883753061294556, + "step": 6418 + }, + { + "epoch": 1.4797694524495677, + "grad_norm": 1.6738054659426496, + "learning_rate": 3.4807724752667344e-07, + "loss": 0.4434877634048462, + "step": 6419 + }, + { + "epoch": 1.48, + "grad_norm": 1.6103141345408045, + "learning_rate": 3.477882602171205e-07, + "loss": 0.477453351020813, + "step": 6420 + }, + { + "epoch": 1.4802305475504323, + "grad_norm": 1.4146735862110984, + "learning_rate": 3.474993676671487e-07, + "loss": 0.4113251864910126, + "step": 6421 + }, + { + "epoch": 1.4804610951008645, + "grad_norm": 1.6403201677984904, + "learning_rate": 3.4721056991873063e-07, + "loss": 0.48774218559265137, + "step": 6422 + }, + { + "epoch": 1.4806916426512968, + "grad_norm": 1.7694899270224396, + "learning_rate": 3.469218670138264e-07, + "loss": 0.49067050218582153, + "step": 6423 + }, + { + "epoch": 1.480922190201729, + "grad_norm": 1.461965616606682, + "learning_rate": 3.46633258994381e-07, + "loss": 0.44339796900749207, + "step": 6424 + }, + { + "epoch": 1.4811527377521614, + "grad_norm": 1.5104802529368333, + "learning_rate": 3.4634474590232585e-07, + "loss": 0.46774202585220337, + "step": 6425 + }, + { + "epoch": 1.4813832853025937, + "grad_norm": 1.6010012626937276, + "learning_rate": 3.460563277795796e-07, + "loss": 0.5128026008605957, + "step": 6426 + }, + { + "epoch": 1.481613832853026, + "grad_norm": 1.567736868007956, + "learning_rate": 3.457680046680458e-07, + "loss": 0.402060866355896, + "step": 6427 + }, + { + "epoch": 1.4818443804034582, + "grad_norm": 1.6986772514907804, + "learning_rate": 3.4547977660961504e-07, + "loss": 0.4166930317878723, + "step": 6428 + }, + { + "epoch": 1.4820749279538905, + "grad_norm": 1.6211572453475638, + "learning_rate": 3.451916436461643e-07, + "loss": 0.428037166595459, + "step": 6429 + }, + { + "epoch": 1.4823054755043228, + "grad_norm": 1.4970288634545281, + "learning_rate": 3.449036058195558e-07, + "loss": 0.5032195448875427, + "step": 6430 + }, + { + "epoch": 1.482536023054755, + "grad_norm": 1.6031976447892884, + "learning_rate": 3.4461566317163827e-07, + "loss": 0.480010986328125, + "step": 6431 + }, + { + "epoch": 1.4827665706051874, + "grad_norm": 1.5460841020576666, + "learning_rate": 3.4432781574424743e-07, + "loss": 0.6013551950454712, + "step": 6432 + }, + { + "epoch": 1.4829971181556196, + "grad_norm": 2.0092772021219942, + "learning_rate": 3.440400635792037e-07, + "loss": 0.42359572649002075, + "step": 6433 + }, + { + "epoch": 1.483227665706052, + "grad_norm": 1.7875564171400882, + "learning_rate": 3.437524067183153e-07, + "loss": 0.4685453772544861, + "step": 6434 + }, + { + "epoch": 1.4834582132564842, + "grad_norm": 1.6281902482139496, + "learning_rate": 3.4346484520337513e-07, + "loss": 0.4501311480998993, + "step": 6435 + }, + { + "epoch": 1.4836887608069165, + "grad_norm": 1.714416901486118, + "learning_rate": 3.431773790761634e-07, + "loss": 0.43600693345069885, + "step": 6436 + }, + { + "epoch": 1.4839193083573488, + "grad_norm": 1.802737382888876, + "learning_rate": 3.4289000837844574e-07, + "loss": 0.5174646377563477, + "step": 6437 + }, + { + "epoch": 1.484149855907781, + "grad_norm": 2.0164710202679554, + "learning_rate": 3.426027331519737e-07, + "loss": 0.587194561958313, + "step": 6438 + }, + { + "epoch": 1.4843804034582133, + "grad_norm": 1.5580641317732793, + "learning_rate": 3.4231555343848585e-07, + "loss": 0.4370976388454437, + "step": 6439 + }, + { + "epoch": 1.4846109510086456, + "grad_norm": 1.4989526897857972, + "learning_rate": 3.4202846927970664e-07, + "loss": 0.4005950093269348, + "step": 6440 + }, + { + "epoch": 1.4848414985590779, + "grad_norm": 1.7120206565146892, + "learning_rate": 3.4174148071734565e-07, + "loss": 0.489225834608078, + "step": 6441 + }, + { + "epoch": 1.4850720461095102, + "grad_norm": 1.4871889439736439, + "learning_rate": 3.4145458779310034e-07, + "loss": 0.464316725730896, + "step": 6442 + }, + { + "epoch": 1.4853025936599424, + "grad_norm": 1.5005161742665332, + "learning_rate": 3.411677905486525e-07, + "loss": 0.42455434799194336, + "step": 6443 + }, + { + "epoch": 1.4855331412103747, + "grad_norm": 1.859604235829819, + "learning_rate": 3.408810890256708e-07, + "loss": 0.4953247308731079, + "step": 6444 + }, + { + "epoch": 1.485763688760807, + "grad_norm": 1.6362253603742314, + "learning_rate": 3.405944832658104e-07, + "loss": 0.5813614130020142, + "step": 6445 + }, + { + "epoch": 1.4859942363112393, + "grad_norm": 1.510210977741005, + "learning_rate": 3.403079733107117e-07, + "loss": 0.5422607064247131, + "step": 6446 + }, + { + "epoch": 1.4862247838616716, + "grad_norm": 1.808286563495416, + "learning_rate": 3.4002155920200183e-07, + "loss": 0.4816162586212158, + "step": 6447 + }, + { + "epoch": 1.4864553314121038, + "grad_norm": 1.8383473199815739, + "learning_rate": 3.39735240981294e-07, + "loss": 0.513064444065094, + "step": 6448 + }, + { + "epoch": 1.4866858789625361, + "grad_norm": 1.784239888983303, + "learning_rate": 3.3944901869018714e-07, + "loss": 0.45172595977783203, + "step": 6449 + }, + { + "epoch": 1.4869164265129684, + "grad_norm": 1.6442365366535128, + "learning_rate": 3.391628923702664e-07, + "loss": 0.458996057510376, + "step": 6450 + }, + { + "epoch": 1.4871469740634007, + "grad_norm": 1.808440287394247, + "learning_rate": 3.388768620631024e-07, + "loss": 0.5369571447372437, + "step": 6451 + }, + { + "epoch": 1.487377521613833, + "grad_norm": 1.488742958974303, + "learning_rate": 3.3859092781025276e-07, + "loss": 0.40987443923950195, + "step": 6452 + }, + { + "epoch": 1.4876080691642652, + "grad_norm": 1.3654680024752937, + "learning_rate": 3.3830508965326123e-07, + "loss": 0.4854167401790619, + "step": 6453 + }, + { + "epoch": 1.4878386167146975, + "grad_norm": 1.4869168297481818, + "learning_rate": 3.3801934763365637e-07, + "loss": 0.4233596920967102, + "step": 6454 + }, + { + "epoch": 1.4880691642651298, + "grad_norm": 1.6653305815248807, + "learning_rate": 3.3773370179295415e-07, + "loss": 0.4606029987335205, + "step": 6455 + }, + { + "epoch": 1.488299711815562, + "grad_norm": 1.565802296000991, + "learning_rate": 3.3744815217265566e-07, + "loss": 0.500397801399231, + "step": 6456 + }, + { + "epoch": 1.4885302593659944, + "grad_norm": 1.4104137796905991, + "learning_rate": 3.371626988142479e-07, + "loss": 0.4079688787460327, + "step": 6457 + }, + { + "epoch": 1.4887608069164266, + "grad_norm": 1.437160720705673, + "learning_rate": 3.3687734175920503e-07, + "loss": 0.4022506773471832, + "step": 6458 + }, + { + "epoch": 1.4889913544668587, + "grad_norm": 1.5597051684116123, + "learning_rate": 3.365920810489856e-07, + "loss": 0.46588951349258423, + "step": 6459 + }, + { + "epoch": 1.489221902017291, + "grad_norm": 1.5966856514161005, + "learning_rate": 3.3630691672503565e-07, + "loss": 0.5634331703186035, + "step": 6460 + }, + { + "epoch": 1.4894524495677233, + "grad_norm": 1.5519766042222847, + "learning_rate": 3.360218488287867e-07, + "loss": 0.45025673508644104, + "step": 6461 + }, + { + "epoch": 1.4896829971181555, + "grad_norm": 1.6343139379388767, + "learning_rate": 3.357368774016559e-07, + "loss": 0.4171956777572632, + "step": 6462 + }, + { + "epoch": 1.4899135446685878, + "grad_norm": 1.7433785479359918, + "learning_rate": 3.354520024850467e-07, + "loss": 0.4150547981262207, + "step": 6463 + }, + { + "epoch": 1.4901440922190201, + "grad_norm": 1.6499845840112966, + "learning_rate": 3.351672241203479e-07, + "loss": 0.5727693438529968, + "step": 6464 + }, + { + "epoch": 1.4903746397694524, + "grad_norm": 1.4575635695527691, + "learning_rate": 3.3488254234893554e-07, + "loss": 0.3606629967689514, + "step": 6465 + }, + { + "epoch": 1.4906051873198847, + "grad_norm": 1.669334025951129, + "learning_rate": 3.345979572121709e-07, + "loss": 0.447257936000824, + "step": 6466 + }, + { + "epoch": 1.490835734870317, + "grad_norm": 1.4454918598699755, + "learning_rate": 3.3431346875140067e-07, + "loss": 0.4196828603744507, + "step": 6467 + }, + { + "epoch": 1.4910662824207492, + "grad_norm": 1.761385296418873, + "learning_rate": 3.340290770079588e-07, + "loss": 0.46776294708251953, + "step": 6468 + }, + { + "epoch": 1.4912968299711815, + "grad_norm": 1.7102419270453626, + "learning_rate": 3.3374478202316403e-07, + "loss": 0.463356614112854, + "step": 6469 + }, + { + "epoch": 1.4915273775216138, + "grad_norm": 1.7784174334662857, + "learning_rate": 3.3346058383832123e-07, + "loss": 0.49275442957878113, + "step": 6470 + }, + { + "epoch": 1.491757925072046, + "grad_norm": 1.6776101672774684, + "learning_rate": 3.3317648249472205e-07, + "loss": 0.5000715851783752, + "step": 6471 + }, + { + "epoch": 1.4919884726224784, + "grad_norm": 1.7556476404250725, + "learning_rate": 3.328924780336428e-07, + "loss": 0.6033698320388794, + "step": 6472 + }, + { + "epoch": 1.4922190201729106, + "grad_norm": 1.4908393069487778, + "learning_rate": 3.326085704963467e-07, + "loss": 0.521305501461029, + "step": 6473 + }, + { + "epoch": 1.492449567723343, + "grad_norm": 1.2845886716264758, + "learning_rate": 3.3232475992408293e-07, + "loss": 0.4261690378189087, + "step": 6474 + }, + { + "epoch": 1.4926801152737752, + "grad_norm": 1.5797229025924917, + "learning_rate": 3.320410463580859e-07, + "loss": 0.4929957985877991, + "step": 6475 + }, + { + "epoch": 1.4929106628242075, + "grad_norm": 1.7503702857264898, + "learning_rate": 3.3175742983957577e-07, + "loss": 0.5392374992370605, + "step": 6476 + }, + { + "epoch": 1.4931412103746398, + "grad_norm": 1.6614074077433174, + "learning_rate": 3.314739104097599e-07, + "loss": 0.45847803354263306, + "step": 6477 + }, + { + "epoch": 1.493371757925072, + "grad_norm": 1.5197176985824143, + "learning_rate": 3.3119048810982996e-07, + "loss": 0.317424476146698, + "step": 6478 + }, + { + "epoch": 1.4936023054755043, + "grad_norm": 1.7221896627907232, + "learning_rate": 3.3090716298096497e-07, + "loss": 0.4409928321838379, + "step": 6479 + }, + { + "epoch": 1.4938328530259366, + "grad_norm": 1.7378684668549815, + "learning_rate": 3.306239350643284e-07, + "loss": 0.4996468424797058, + "step": 6480 + }, + { + "epoch": 1.4940634005763689, + "grad_norm": 1.4267403901787394, + "learning_rate": 3.3034080440107104e-07, + "loss": 0.4341059625148773, + "step": 6481 + }, + { + "epoch": 1.4942939481268012, + "grad_norm": 1.5210821751127084, + "learning_rate": 3.3005777103232833e-07, + "loss": 0.48193103075027466, + "step": 6482 + }, + { + "epoch": 1.4945244956772334, + "grad_norm": 1.4847373995641082, + "learning_rate": 3.297748349992221e-07, + "loss": 0.3965853452682495, + "step": 6483 + }, + { + "epoch": 1.4947550432276657, + "grad_norm": 1.798682040014271, + "learning_rate": 3.2949199634285994e-07, + "loss": 0.517971396446228, + "step": 6484 + }, + { + "epoch": 1.494985590778098, + "grad_norm": 1.4832490379969678, + "learning_rate": 3.2920925510433605e-07, + "loss": 0.4893750548362732, + "step": 6485 + }, + { + "epoch": 1.4952161383285303, + "grad_norm": 1.8768690559978958, + "learning_rate": 3.289266113247289e-07, + "loss": 0.4487413167953491, + "step": 6486 + }, + { + "epoch": 1.4954466858789626, + "grad_norm": 1.5362967716419886, + "learning_rate": 3.2864406504510444e-07, + "loss": 0.4499363303184509, + "step": 6487 + }, + { + "epoch": 1.4956772334293948, + "grad_norm": 1.7573103415083038, + "learning_rate": 3.2836161630651327e-07, + "loss": 0.47149038314819336, + "step": 6488 + }, + { + "epoch": 1.4959077809798271, + "grad_norm": 1.866127921997491, + "learning_rate": 3.2807926514999206e-07, + "loss": 0.4235773980617523, + "step": 6489 + }, + { + "epoch": 1.4961383285302594, + "grad_norm": 1.841841873618585, + "learning_rate": 3.2779701161656414e-07, + "loss": 0.47815465927124023, + "step": 6490 + }, + { + "epoch": 1.4963688760806917, + "grad_norm": 2.0492176414960523, + "learning_rate": 3.2751485574723725e-07, + "loss": 0.500824511051178, + "step": 6491 + }, + { + "epoch": 1.496599423631124, + "grad_norm": 1.7353423201350575, + "learning_rate": 3.2723279758300614e-07, + "loss": 0.4901300370693207, + "step": 6492 + }, + { + "epoch": 1.4968299711815563, + "grad_norm": 1.6715223899825975, + "learning_rate": 3.2695083716485116e-07, + "loss": 0.43286561965942383, + "step": 6493 + }, + { + "epoch": 1.4970605187319885, + "grad_norm": 1.6129181056594784, + "learning_rate": 3.26668974533738e-07, + "loss": 0.5049563646316528, + "step": 6494 + }, + { + "epoch": 1.4972910662824208, + "grad_norm": 1.3860068483985046, + "learning_rate": 3.2638720973061826e-07, + "loss": 0.44142240285873413, + "step": 6495 + }, + { + "epoch": 1.497521613832853, + "grad_norm": 1.7555180120285485, + "learning_rate": 3.261055427964292e-07, + "loss": 0.46423906087875366, + "step": 6496 + }, + { + "epoch": 1.4977521613832854, + "grad_norm": 1.630721059004709, + "learning_rate": 3.2582397377209446e-07, + "loss": 0.5402355790138245, + "step": 6497 + }, + { + "epoch": 1.4979827089337177, + "grad_norm": 1.776211928117776, + "learning_rate": 3.2554250269852326e-07, + "loss": 0.4754972457885742, + "step": 6498 + }, + { + "epoch": 1.49821325648415, + "grad_norm": 1.7088366443415275, + "learning_rate": 3.2526112961660987e-07, + "loss": 0.4837331771850586, + "step": 6499 + }, + { + "epoch": 1.498443804034582, + "grad_norm": 1.4244012985740182, + "learning_rate": 3.2497985456723556e-07, + "loss": 0.41410496830940247, + "step": 6500 + }, + { + "epoch": 1.4986743515850143, + "grad_norm": 1.3926696859702201, + "learning_rate": 3.246986775912661e-07, + "loss": 0.39501869678497314, + "step": 6501 + }, + { + "epoch": 1.4989048991354466, + "grad_norm": 1.7615254739148636, + "learning_rate": 3.2441759872955367e-07, + "loss": 0.44316792488098145, + "step": 6502 + }, + { + "epoch": 1.4991354466858788, + "grad_norm": 1.5504390307833396, + "learning_rate": 3.2413661802293633e-07, + "loss": 0.4260290861129761, + "step": 6503 + }, + { + "epoch": 1.4993659942363111, + "grad_norm": 1.4857903718592202, + "learning_rate": 3.2385573551223733e-07, + "loss": 0.4658172130584717, + "step": 6504 + }, + { + "epoch": 1.4995965417867434, + "grad_norm": 1.4040986890780063, + "learning_rate": 3.235749512382662e-07, + "loss": 0.4857284426689148, + "step": 6505 + }, + { + "epoch": 1.4998270893371757, + "grad_norm": 1.6098416781838087, + "learning_rate": 3.232942652418185e-07, + "loss": 0.3886150121688843, + "step": 6506 + }, + { + "epoch": 1.500057636887608, + "grad_norm": 1.5908712909726912, + "learning_rate": 3.2301367756367383e-07, + "loss": 0.47719478607177734, + "step": 6507 + }, + { + "epoch": 1.5002881844380402, + "grad_norm": 1.5313150321043758, + "learning_rate": 3.227331882445995e-07, + "loss": 0.4501890540122986, + "step": 6508 + }, + { + "epoch": 1.5005187319884725, + "grad_norm": 1.6816112124849405, + "learning_rate": 3.224527973253472e-07, + "loss": 0.5312929153442383, + "step": 6509 + }, + { + "epoch": 1.5007492795389048, + "grad_norm": 1.7099512604146767, + "learning_rate": 3.22172504846655e-07, + "loss": 0.4247457981109619, + "step": 6510 + }, + { + "epoch": 1.500979827089337, + "grad_norm": 1.5170821792110236, + "learning_rate": 3.2189231084924693e-07, + "loss": 0.41195109486579895, + "step": 6511 + }, + { + "epoch": 1.5012103746397694, + "grad_norm": 1.414534154865254, + "learning_rate": 3.2161221537383187e-07, + "loss": 0.49529117345809937, + "step": 6512 + }, + { + "epoch": 1.5014409221902016, + "grad_norm": 1.7823472150342257, + "learning_rate": 3.213322184611045e-07, + "loss": 0.47565385699272156, + "step": 6513 + }, + { + "epoch": 1.501671469740634, + "grad_norm": 1.59315040296854, + "learning_rate": 3.210523201517461e-07, + "loss": 0.44504472613334656, + "step": 6514 + }, + { + "epoch": 1.5019020172910662, + "grad_norm": 1.578129124805014, + "learning_rate": 3.2077252048642224e-07, + "loss": 0.46689483523368835, + "step": 6515 + }, + { + "epoch": 1.5021325648414985, + "grad_norm": 1.9438217919470961, + "learning_rate": 3.2049281950578554e-07, + "loss": 0.4728453755378723, + "step": 6516 + }, + { + "epoch": 1.5023631123919308, + "grad_norm": 1.5831150547958008, + "learning_rate": 3.2021321725047326e-07, + "loss": 0.4356672167778015, + "step": 6517 + }, + { + "epoch": 1.502593659942363, + "grad_norm": 1.6622807686406893, + "learning_rate": 3.1993371376110903e-07, + "loss": 0.4222509562969208, + "step": 6518 + }, + { + "epoch": 1.5028242074927953, + "grad_norm": 1.4965022852461283, + "learning_rate": 3.1965430907830157e-07, + "loss": 0.3961385190486908, + "step": 6519 + }, + { + "epoch": 1.5030547550432276, + "grad_norm": 1.7015715818454764, + "learning_rate": 3.193750032426452e-07, + "loss": 0.40793734788894653, + "step": 6520 + }, + { + "epoch": 1.5032853025936599, + "grad_norm": 1.2428021579416293, + "learning_rate": 3.190957962947205e-07, + "loss": 0.418218195438385, + "step": 6521 + }, + { + "epoch": 1.5035158501440922, + "grad_norm": 1.5780813489572465, + "learning_rate": 3.188166882750937e-07, + "loss": 0.4289787709712982, + "step": 6522 + }, + { + "epoch": 1.5037463976945245, + "grad_norm": 1.4926692792678622, + "learning_rate": 3.185376792243154e-07, + "loss": 0.41293439269065857, + "step": 6523 + }, + { + "epoch": 1.5039769452449567, + "grad_norm": 1.6656656082223846, + "learning_rate": 3.182587691829236e-07, + "loss": 0.5296646952629089, + "step": 6524 + }, + { + "epoch": 1.504207492795389, + "grad_norm": 1.6592089492848106, + "learning_rate": 3.179799581914406e-07, + "loss": 0.40602487325668335, + "step": 6525 + }, + { + "epoch": 1.5044380403458213, + "grad_norm": 1.9931387488132102, + "learning_rate": 3.1770124629037445e-07, + "loss": 0.4724326729774475, + "step": 6526 + }, + { + "epoch": 1.5046685878962536, + "grad_norm": 1.5756364961675935, + "learning_rate": 3.174226335202197e-07, + "loss": 0.49090898036956787, + "step": 6527 + }, + { + "epoch": 1.5048991354466859, + "grad_norm": 2.036877642601554, + "learning_rate": 3.171441199214553e-07, + "loss": 0.4932633638381958, + "step": 6528 + }, + { + "epoch": 1.5051296829971181, + "grad_norm": 1.564392641724374, + "learning_rate": 3.168657055345466e-07, + "loss": 0.45044368505477905, + "step": 6529 + }, + { + "epoch": 1.5053602305475504, + "grad_norm": 1.801725407800878, + "learning_rate": 3.165873903999449e-07, + "loss": 0.5444917678833008, + "step": 6530 + }, + { + "epoch": 1.5055907780979827, + "grad_norm": 1.6246016746918435, + "learning_rate": 3.163091745580857e-07, + "loss": 0.45643138885498047, + "step": 6531 + }, + { + "epoch": 1.505821325648415, + "grad_norm": 1.5583644902514906, + "learning_rate": 3.160310580493913e-07, + "loss": 0.45805928111076355, + "step": 6532 + }, + { + "epoch": 1.5060518731988473, + "grad_norm": 2.1170198831310096, + "learning_rate": 3.157530409142687e-07, + "loss": 0.4036891460418701, + "step": 6533 + }, + { + "epoch": 1.5062824207492795, + "grad_norm": 1.7185763373562066, + "learning_rate": 3.154751231931111e-07, + "loss": 0.5445838570594788, + "step": 6534 + }, + { + "epoch": 1.5065129682997118, + "grad_norm": 1.9448138702130089, + "learning_rate": 3.1519730492629737e-07, + "loss": 0.5139729976654053, + "step": 6535 + }, + { + "epoch": 1.506743515850144, + "grad_norm": 1.7903199300823514, + "learning_rate": 3.1491958615419123e-07, + "loss": 0.4956102669239044, + "step": 6536 + }, + { + "epoch": 1.5069740634005764, + "grad_norm": 1.408735724577368, + "learning_rate": 3.146419669171426e-07, + "loss": 0.4758613705635071, + "step": 6537 + }, + { + "epoch": 1.5072046109510087, + "grad_norm": 1.4805391629752656, + "learning_rate": 3.1436444725548674e-07, + "loss": 0.41319960355758667, + "step": 6538 + }, + { + "epoch": 1.507435158501441, + "grad_norm": 1.454596870187288, + "learning_rate": 3.140870272095437e-07, + "loss": 0.4641035795211792, + "step": 6539 + }, + { + "epoch": 1.5076657060518732, + "grad_norm": 1.7193566807860163, + "learning_rate": 3.138097068196206e-07, + "loss": 0.4940011501312256, + "step": 6540 + }, + { + "epoch": 1.5078962536023055, + "grad_norm": 1.734016298128087, + "learning_rate": 3.135324861260085e-07, + "loss": 0.5018976330757141, + "step": 6541 + }, + { + "epoch": 1.5081268011527378, + "grad_norm": 1.5024512602760236, + "learning_rate": 3.132553651689849e-07, + "loss": 0.41456544399261475, + "step": 6542 + }, + { + "epoch": 1.50835734870317, + "grad_norm": 1.3831056672275375, + "learning_rate": 3.1297834398881293e-07, + "loss": 0.43678340315818787, + "step": 6543 + }, + { + "epoch": 1.5085878962536023, + "grad_norm": 1.6751460423842672, + "learning_rate": 3.1270142262574084e-07, + "loss": 0.4336814880371094, + "step": 6544 + }, + { + "epoch": 1.5088184438040346, + "grad_norm": 1.763114589172267, + "learning_rate": 3.124246011200018e-07, + "loss": 0.4593331217765808, + "step": 6545 + }, + { + "epoch": 1.509048991354467, + "grad_norm": 1.5015058915538149, + "learning_rate": 3.121478795118158e-07, + "loss": 0.4398609399795532, + "step": 6546 + }, + { + "epoch": 1.5092795389048992, + "grad_norm": 1.3983198539077313, + "learning_rate": 3.11871257841387e-07, + "loss": 0.40382882952690125, + "step": 6547 + }, + { + "epoch": 1.5095100864553315, + "grad_norm": 1.6180984558312, + "learning_rate": 3.115947361489064e-07, + "loss": 0.511704683303833, + "step": 6548 + }, + { + "epoch": 1.5097406340057637, + "grad_norm": 1.5988312105042757, + "learning_rate": 3.113183144745488e-07, + "loss": 0.4606817960739136, + "step": 6549 + }, + { + "epoch": 1.509971181556196, + "grad_norm": 1.792080137203916, + "learning_rate": 3.1104199285847645e-07, + "loss": 0.43390586972236633, + "step": 6550 + }, + { + "epoch": 1.5102017291066283, + "grad_norm": 1.6165670343276886, + "learning_rate": 3.1076577134083524e-07, + "loss": 0.4556368589401245, + "step": 6551 + }, + { + "epoch": 1.5104322766570606, + "grad_norm": 1.6645688544418311, + "learning_rate": 3.104896499617573e-07, + "loss": 0.477630078792572, + "step": 6552 + }, + { + "epoch": 1.5106628242074929, + "grad_norm": 1.2719017711677718, + "learning_rate": 3.102136287613606e-07, + "loss": 0.41332900524139404, + "step": 6553 + }, + { + "epoch": 1.5108933717579252, + "grad_norm": 1.348486974941644, + "learning_rate": 3.099377077797477e-07, + "loss": 0.4499499201774597, + "step": 6554 + }, + { + "epoch": 1.5111239193083574, + "grad_norm": 1.8893704868945012, + "learning_rate": 3.096618870570072e-07, + "loss": 0.47415900230407715, + "step": 6555 + }, + { + "epoch": 1.5113544668587897, + "grad_norm": 1.5506294716293978, + "learning_rate": 3.0938616663321346e-07, + "loss": 0.4729388952255249, + "step": 6556 + }, + { + "epoch": 1.511585014409222, + "grad_norm": 1.9901244862666623, + "learning_rate": 3.0911054654842547e-07, + "loss": 0.49651503562927246, + "step": 6557 + }, + { + "epoch": 1.5118155619596543, + "grad_norm": 1.6254741291231039, + "learning_rate": 3.0883502684268747e-07, + "loss": 0.4242505431175232, + "step": 6558 + }, + { + "epoch": 1.5120461095100866, + "grad_norm": 1.5927069260474938, + "learning_rate": 3.085596075560304e-07, + "loss": 0.4927797019481659, + "step": 6559 + }, + { + "epoch": 1.5122766570605188, + "grad_norm": 1.572555154857978, + "learning_rate": 3.0828428872846903e-07, + "loss": 0.5201178193092346, + "step": 6560 + }, + { + "epoch": 1.5125072046109511, + "grad_norm": 1.893958450562389, + "learning_rate": 3.0800907040000515e-07, + "loss": 0.5418535470962524, + "step": 6561 + }, + { + "epoch": 1.5127377521613834, + "grad_norm": 2.210710686259104, + "learning_rate": 3.077339526106243e-07, + "loss": 0.5631084442138672, + "step": 6562 + }, + { + "epoch": 1.5129682997118157, + "grad_norm": 1.4163424994270397, + "learning_rate": 3.07458935400299e-07, + "loss": 0.3862801790237427, + "step": 6563 + }, + { + "epoch": 1.513198847262248, + "grad_norm": 1.432435754713911, + "learning_rate": 3.071840188089859e-07, + "loss": 0.4399529695510864, + "step": 6564 + }, + { + "epoch": 1.5134293948126802, + "grad_norm": 1.416271606847585, + "learning_rate": 3.069092028766275e-07, + "loss": 0.42560017108917236, + "step": 6565 + }, + { + "epoch": 1.5136599423631125, + "grad_norm": 1.3843654986107572, + "learning_rate": 3.066344876431518e-07, + "loss": 0.5227498412132263, + "step": 6566 + }, + { + "epoch": 1.5138904899135448, + "grad_norm": 1.7845341383364082, + "learning_rate": 3.0635987314847234e-07, + "loss": 0.5001060962677002, + "step": 6567 + }, + { + "epoch": 1.514121037463977, + "grad_norm": 1.4633273931040118, + "learning_rate": 3.0608535943248725e-07, + "loss": 0.4475817382335663, + "step": 6568 + }, + { + "epoch": 1.5143515850144094, + "grad_norm": 1.776494937774852, + "learning_rate": 3.058109465350811e-07, + "loss": 0.41835230588912964, + "step": 6569 + }, + { + "epoch": 1.5145821325648416, + "grad_norm": 1.5649603798840601, + "learning_rate": 3.05536634496123e-07, + "loss": 0.43384939432144165, + "step": 6570 + }, + { + "epoch": 1.514812680115274, + "grad_norm": 1.4466265728431833, + "learning_rate": 3.0526242335546714e-07, + "loss": 0.49595510959625244, + "step": 6571 + }, + { + "epoch": 1.5150432276657062, + "grad_norm": 1.4382916193314434, + "learning_rate": 3.0498831315295425e-07, + "loss": 0.4870753288269043, + "step": 6572 + }, + { + "epoch": 1.5152737752161385, + "grad_norm": 2.104289673848665, + "learning_rate": 3.047143039284091e-07, + "loss": 0.5014097094535828, + "step": 6573 + }, + { + "epoch": 1.5155043227665708, + "grad_norm": 1.5411668776330472, + "learning_rate": 3.044403957216427e-07, + "loss": 0.45475223660469055, + "step": 6574 + }, + { + "epoch": 1.515734870317003, + "grad_norm": 2.2103660716265936, + "learning_rate": 3.0416658857245135e-07, + "loss": 0.4280875027179718, + "step": 6575 + }, + { + "epoch": 1.515965417867435, + "grad_norm": 1.411762768587303, + "learning_rate": 3.038928825206162e-07, + "loss": 0.41492798924446106, + "step": 6576 + }, + { + "epoch": 1.5161959654178674, + "grad_norm": 1.5234346382387383, + "learning_rate": 3.0361927760590356e-07, + "loss": 0.5283424854278564, + "step": 6577 + }, + { + "epoch": 1.5164265129682997, + "grad_norm": 1.7769534343214894, + "learning_rate": 3.0334577386806535e-07, + "loss": 0.5035547614097595, + "step": 6578 + }, + { + "epoch": 1.516657060518732, + "grad_norm": 1.7167448261757756, + "learning_rate": 3.03072371346839e-07, + "loss": 0.5148544907569885, + "step": 6579 + }, + { + "epoch": 1.5168876080691642, + "grad_norm": 1.8335675563589118, + "learning_rate": 3.0279907008194747e-07, + "loss": 0.48413559794425964, + "step": 6580 + }, + { + "epoch": 1.5171181556195965, + "grad_norm": 1.8842526874117616, + "learning_rate": 3.0252587011309785e-07, + "loss": 0.4580768942832947, + "step": 6581 + }, + { + "epoch": 1.5173487031700288, + "grad_norm": 1.7010607027180409, + "learning_rate": 3.0225277147998397e-07, + "loss": 0.49141448736190796, + "step": 6582 + }, + { + "epoch": 1.517579250720461, + "grad_norm": 1.5163321507217336, + "learning_rate": 3.0197977422228393e-07, + "loss": 0.490544855594635, + "step": 6583 + }, + { + "epoch": 1.5178097982708934, + "grad_norm": 1.56421821222723, + "learning_rate": 3.017068783796609e-07, + "loss": 0.4357878565788269, + "step": 6584 + }, + { + "epoch": 1.5180403458213256, + "grad_norm": 1.6556135569370023, + "learning_rate": 3.0143408399176463e-07, + "loss": 0.4955572485923767, + "step": 6585 + }, + { + "epoch": 1.518270893371758, + "grad_norm": 1.7663661601044416, + "learning_rate": 3.0116139109822855e-07, + "loss": 0.5085045695304871, + "step": 6586 + }, + { + "epoch": 1.5185014409221902, + "grad_norm": 1.4316549536529295, + "learning_rate": 3.008887997386725e-07, + "loss": 0.3946433663368225, + "step": 6587 + }, + { + "epoch": 1.5187319884726225, + "grad_norm": 1.5043787480659017, + "learning_rate": 3.006163099527016e-07, + "loss": 0.4135388433933258, + "step": 6588 + }, + { + "epoch": 1.5189625360230548, + "grad_norm": 1.4759287632093028, + "learning_rate": 3.003439217799052e-07, + "loss": 0.45946577191352844, + "step": 6589 + }, + { + "epoch": 1.519193083573487, + "grad_norm": 1.9119134231407664, + "learning_rate": 3.0007163525985823e-07, + "loss": 0.5455194711685181, + "step": 6590 + }, + { + "epoch": 1.5194236311239193, + "grad_norm": 2.3789785967639165, + "learning_rate": 2.9979945043212173e-07, + "loss": 0.49064499139785767, + "step": 6591 + }, + { + "epoch": 1.5196541786743516, + "grad_norm": 1.663692129917964, + "learning_rate": 2.9952736733624086e-07, + "loss": 0.42849159240722656, + "step": 6592 + }, + { + "epoch": 1.5198847262247839, + "grad_norm": 1.915483300525053, + "learning_rate": 2.9925538601174685e-07, + "loss": 0.47390738129615784, + "step": 6593 + }, + { + "epoch": 1.5201152737752162, + "grad_norm": 1.6913351541435369, + "learning_rate": 2.989835064981553e-07, + "loss": 0.5051196813583374, + "step": 6594 + }, + { + "epoch": 1.5203458213256484, + "grad_norm": 1.824205122638696, + "learning_rate": 2.98711728834968e-07, + "loss": 0.45643603801727295, + "step": 6595 + }, + { + "epoch": 1.5205763688760807, + "grad_norm": 1.7692485956097788, + "learning_rate": 2.984400530616712e-07, + "loss": 0.5199561715126038, + "step": 6596 + }, + { + "epoch": 1.520806916426513, + "grad_norm": 1.5344483393306434, + "learning_rate": 2.9816847921773614e-07, + "loss": 0.38316380977630615, + "step": 6597 + }, + { + "epoch": 1.5210374639769453, + "grad_norm": 1.813431679729583, + "learning_rate": 2.9789700734262036e-07, + "loss": 0.5445187091827393, + "step": 6598 + }, + { + "epoch": 1.5212680115273776, + "grad_norm": 2.052172217871983, + "learning_rate": 2.976256374757653e-07, + "loss": 0.4933769702911377, + "step": 6599 + }, + { + "epoch": 1.5214985590778098, + "grad_norm": 1.8156847296260061, + "learning_rate": 2.973543696565984e-07, + "loss": 0.4137638807296753, + "step": 6600 + }, + { + "epoch": 1.5217291066282421, + "grad_norm": 1.622501548220719, + "learning_rate": 2.970832039245325e-07, + "loss": 0.45649808645248413, + "step": 6601 + }, + { + "epoch": 1.5219596541786744, + "grad_norm": 1.748320177273405, + "learning_rate": 2.968121403189647e-07, + "loss": 0.5007699728012085, + "step": 6602 + }, + { + "epoch": 1.5221902017291065, + "grad_norm": 1.6579513532546735, + "learning_rate": 2.9654117887927755e-07, + "loss": 0.48721688985824585, + "step": 6603 + }, + { + "epoch": 1.5224207492795387, + "grad_norm": 1.3583198525190263, + "learning_rate": 2.962703196448394e-07, + "loss": 0.3983156681060791, + "step": 6604 + }, + { + "epoch": 1.522651296829971, + "grad_norm": 1.9077712460543044, + "learning_rate": 2.959995626550028e-07, + "loss": 0.5087774991989136, + "step": 6605 + }, + { + "epoch": 1.5228818443804033, + "grad_norm": 1.6900731884970928, + "learning_rate": 2.957289079491064e-07, + "loss": 0.4693753719329834, + "step": 6606 + }, + { + "epoch": 1.5231123919308356, + "grad_norm": 1.6484608802659817, + "learning_rate": 2.954583555664731e-07, + "loss": 0.4728010892868042, + "step": 6607 + }, + { + "epoch": 1.5233429394812679, + "grad_norm": 1.9993958773405, + "learning_rate": 2.951879055464118e-07, + "loss": 0.48341798782348633, + "step": 6608 + }, + { + "epoch": 1.5235734870317001, + "grad_norm": 1.777920307748833, + "learning_rate": 2.9491755792821584e-07, + "loss": 0.49080032110214233, + "step": 6609 + }, + { + "epoch": 1.5238040345821324, + "grad_norm": 1.736793286100546, + "learning_rate": 2.946473127511635e-07, + "loss": 0.49306032061576843, + "step": 6610 + }, + { + "epoch": 1.5240345821325647, + "grad_norm": 1.7339244766073734, + "learning_rate": 2.94377170054519e-07, + "loss": 0.457378625869751, + "step": 6611 + }, + { + "epoch": 1.524265129682997, + "grad_norm": 1.75933167721537, + "learning_rate": 2.9410712987753163e-07, + "loss": 0.5000994801521301, + "step": 6612 + }, + { + "epoch": 1.5244956772334293, + "grad_norm": 1.2946273184485895, + "learning_rate": 2.938371922594347e-07, + "loss": 0.4149599075317383, + "step": 6613 + }, + { + "epoch": 1.5247262247838616, + "grad_norm": 1.8715591693483276, + "learning_rate": 2.9356735723944827e-07, + "loss": 0.4516978859901428, + "step": 6614 + }, + { + "epoch": 1.5249567723342938, + "grad_norm": 1.5047320302067637, + "learning_rate": 2.932976248567752e-07, + "loss": 0.49368464946746826, + "step": 6615 + }, + { + "epoch": 1.5251873198847261, + "grad_norm": 1.6819963043711963, + "learning_rate": 2.9302799515060574e-07, + "loss": 0.5174213647842407, + "step": 6616 + }, + { + "epoch": 1.5254178674351584, + "grad_norm": 1.5888803256634056, + "learning_rate": 2.927584681601144e-07, + "loss": 0.3940533399581909, + "step": 6617 + }, + { + "epoch": 1.5256484149855907, + "grad_norm": 1.331505711299331, + "learning_rate": 2.9248904392445993e-07, + "loss": 0.38714292645454407, + "step": 6618 + }, + { + "epoch": 1.525878962536023, + "grad_norm": 1.796591789965291, + "learning_rate": 2.9221972248278734e-07, + "loss": 0.45089441537857056, + "step": 6619 + }, + { + "epoch": 1.5261095100864552, + "grad_norm": 1.5199676831316118, + "learning_rate": 2.9195050387422693e-07, + "loss": 0.47767460346221924, + "step": 6620 + }, + { + "epoch": 1.5263400576368875, + "grad_norm": 2.02038935767357, + "learning_rate": 2.9168138813789176e-07, + "loss": 0.4977542757987976, + "step": 6621 + }, + { + "epoch": 1.5265706051873198, + "grad_norm": 1.6269397756794977, + "learning_rate": 2.914123753128829e-07, + "loss": 0.4222266972064972, + "step": 6622 + }, + { + "epoch": 1.526801152737752, + "grad_norm": 1.4270360279099836, + "learning_rate": 2.9114346543828425e-07, + "loss": 0.4695430099964142, + "step": 6623 + }, + { + "epoch": 1.5270317002881844, + "grad_norm": 1.8374099270801607, + "learning_rate": 2.9087465855316595e-07, + "loss": 0.5093779563903809, + "step": 6624 + }, + { + "epoch": 1.5272622478386166, + "grad_norm": 1.644131671999478, + "learning_rate": 2.9060595469658324e-07, + "loss": 0.4236484169960022, + "step": 6625 + }, + { + "epoch": 1.527492795389049, + "grad_norm": 1.7674503617278896, + "learning_rate": 2.903373539075755e-07, + "loss": 0.4549310803413391, + "step": 6626 + }, + { + "epoch": 1.5277233429394812, + "grad_norm": 1.9004549728971407, + "learning_rate": 2.9006885622516765e-07, + "loss": 0.5295060873031616, + "step": 6627 + }, + { + "epoch": 1.5279538904899135, + "grad_norm": 1.6391132617373654, + "learning_rate": 2.898004616883699e-07, + "loss": 0.43450385332107544, + "step": 6628 + }, + { + "epoch": 1.5281844380403458, + "grad_norm": 1.3851601688362805, + "learning_rate": 2.895321703361767e-07, + "loss": 0.4820800721645355, + "step": 6629 + }, + { + "epoch": 1.528414985590778, + "grad_norm": 1.8242908406641638, + "learning_rate": 2.8926398220756874e-07, + "loss": 0.4907156825065613, + "step": 6630 + }, + { + "epoch": 1.5286455331412103, + "grad_norm": 1.5568983091450215, + "learning_rate": 2.889958973415101e-07, + "loss": 0.39823904633522034, + "step": 6631 + }, + { + "epoch": 1.5288760806916426, + "grad_norm": 1.613972299703288, + "learning_rate": 2.887279157769514e-07, + "loss": 0.4582018256187439, + "step": 6632 + }, + { + "epoch": 1.5291066282420749, + "grad_norm": 1.447957337480398, + "learning_rate": 2.8846003755282744e-07, + "loss": 0.43874603509902954, + "step": 6633 + }, + { + "epoch": 1.5293371757925072, + "grad_norm": 1.7886894424220765, + "learning_rate": 2.8819226270805775e-07, + "loss": 0.38620924949645996, + "step": 6634 + }, + { + "epoch": 1.5295677233429394, + "grad_norm": 1.403143890487367, + "learning_rate": 2.879245912815473e-07, + "loss": 0.41149455308914185, + "step": 6635 + }, + { + "epoch": 1.5297982708933717, + "grad_norm": 1.5521023219473429, + "learning_rate": 2.8765702331218667e-07, + "loss": 0.4521693289279938, + "step": 6636 + }, + { + "epoch": 1.530028818443804, + "grad_norm": 1.8134136314855847, + "learning_rate": 2.8738955883884983e-07, + "loss": 0.5196795463562012, + "step": 6637 + }, + { + "epoch": 1.5302593659942363, + "grad_norm": 1.8251461959103044, + "learning_rate": 2.8712219790039726e-07, + "loss": 0.4373961091041565, + "step": 6638 + }, + { + "epoch": 1.5304899135446686, + "grad_norm": 1.6509894275378938, + "learning_rate": 2.868549405356734e-07, + "loss": 0.39191538095474243, + "step": 6639 + }, + { + "epoch": 1.5307204610951008, + "grad_norm": 1.7497602657864888, + "learning_rate": 2.865877867835076e-07, + "loss": 0.48131316900253296, + "step": 6640 + }, + { + "epoch": 1.5309510086455331, + "grad_norm": 1.670385630744972, + "learning_rate": 2.863207366827153e-07, + "loss": 0.4298658072948456, + "step": 6641 + }, + { + "epoch": 1.5311815561959654, + "grad_norm": 1.3877032472926891, + "learning_rate": 2.8605379027209545e-07, + "loss": 0.4172072410583496, + "step": 6642 + }, + { + "epoch": 1.5314121037463977, + "grad_norm": 1.4575459808797373, + "learning_rate": 2.8578694759043295e-07, + "loss": 0.47554445266723633, + "step": 6643 + }, + { + "epoch": 1.53164265129683, + "grad_norm": 1.6657810796732697, + "learning_rate": 2.8552020867649704e-07, + "loss": 0.39061158895492554, + "step": 6644 + }, + { + "epoch": 1.5318731988472623, + "grad_norm": 2.1350395786045415, + "learning_rate": 2.8525357356904243e-07, + "loss": 0.5809512138366699, + "step": 6645 + }, + { + "epoch": 1.5321037463976945, + "grad_norm": 1.6219135412965018, + "learning_rate": 2.849870423068083e-07, + "loss": 0.45639652013778687, + "step": 6646 + }, + { + "epoch": 1.5323342939481268, + "grad_norm": 1.8136030445889209, + "learning_rate": 2.847206149285184e-07, + "loss": 0.49183255434036255, + "step": 6647 + }, + { + "epoch": 1.532564841498559, + "grad_norm": 1.5868308524868744, + "learning_rate": 2.844542914728822e-07, + "loss": 0.497137188911438, + "step": 6648 + }, + { + "epoch": 1.5327953890489914, + "grad_norm": 1.3089081966783787, + "learning_rate": 2.8418807197859415e-07, + "loss": 0.4407503008842468, + "step": 6649 + }, + { + "epoch": 1.5330259365994237, + "grad_norm": 1.5201255778420197, + "learning_rate": 2.839219564843326e-07, + "loss": 0.5553654432296753, + "step": 6650 + }, + { + "epoch": 1.533256484149856, + "grad_norm": 1.780260170294563, + "learning_rate": 2.836559450287618e-07, + "loss": 0.5073549151420593, + "step": 6651 + }, + { + "epoch": 1.5334870317002882, + "grad_norm": 1.5664457648186267, + "learning_rate": 2.8339003765053017e-07, + "loss": 0.47718626260757446, + "step": 6652 + }, + { + "epoch": 1.5337175792507205, + "grad_norm": 1.6245823258461864, + "learning_rate": 2.831242343882709e-07, + "loss": 0.5722469687461853, + "step": 6653 + }, + { + "epoch": 1.5339481268011528, + "grad_norm": 1.6980020739907509, + "learning_rate": 2.8285853528060334e-07, + "loss": 0.46500977873802185, + "step": 6654 + }, + { + "epoch": 1.534178674351585, + "grad_norm": 1.4448204582266613, + "learning_rate": 2.8259294036613e-07, + "loss": 0.40789636969566345, + "step": 6655 + }, + { + "epoch": 1.5344092219020173, + "grad_norm": 1.833854839817425, + "learning_rate": 2.8232744968343936e-07, + "loss": 0.5013411045074463, + "step": 6656 + }, + { + "epoch": 1.5346397694524496, + "grad_norm": 1.480266495626672, + "learning_rate": 2.820620632711048e-07, + "loss": 0.5018881559371948, + "step": 6657 + }, + { + "epoch": 1.534870317002882, + "grad_norm": 1.9887255008924078, + "learning_rate": 2.817967811676839e-07, + "loss": 0.4205210208892822, + "step": 6658 + }, + { + "epoch": 1.5351008645533142, + "grad_norm": 1.8583776706428177, + "learning_rate": 2.815316034117193e-07, + "loss": 0.4614740014076233, + "step": 6659 + }, + { + "epoch": 1.5353314121037465, + "grad_norm": 1.814790283470267, + "learning_rate": 2.812665300417384e-07, + "loss": 0.528913676738739, + "step": 6660 + }, + { + "epoch": 1.5355619596541787, + "grad_norm": 1.8558142199957448, + "learning_rate": 2.8100156109625385e-07, + "loss": 0.46981993317604065, + "step": 6661 + }, + { + "epoch": 1.535792507204611, + "grad_norm": 1.6880680317462484, + "learning_rate": 2.807366966137632e-07, + "loss": 0.4568699598312378, + "step": 6662 + }, + { + "epoch": 1.5360230547550433, + "grad_norm": 1.499127303889182, + "learning_rate": 2.804719366327479e-07, + "loss": 0.5659410953521729, + "step": 6663 + }, + { + "epoch": 1.5362536023054756, + "grad_norm": 2.0231461298701707, + "learning_rate": 2.802072811916754e-07, + "loss": 0.36596113443374634, + "step": 6664 + }, + { + "epoch": 1.5364841498559079, + "grad_norm": 1.6883253033702186, + "learning_rate": 2.799427303289971e-07, + "loss": 0.4764753580093384, + "step": 6665 + }, + { + "epoch": 1.5367146974063401, + "grad_norm": 1.6695170645245285, + "learning_rate": 2.796782840831491e-07, + "loss": 0.5203686356544495, + "step": 6666 + }, + { + "epoch": 1.5369452449567724, + "grad_norm": 1.4951683162021838, + "learning_rate": 2.7941394249255336e-07, + "loss": 0.42811352014541626, + "step": 6667 + }, + { + "epoch": 1.5371757925072047, + "grad_norm": 1.5858028897704401, + "learning_rate": 2.7914970559561546e-07, + "loss": 0.5146535038948059, + "step": 6668 + }, + { + "epoch": 1.537406340057637, + "grad_norm": 1.919094576999998, + "learning_rate": 2.788855734307264e-07, + "loss": 0.45644527673721313, + "step": 6669 + }, + { + "epoch": 1.5376368876080693, + "grad_norm": 1.4614837709110287, + "learning_rate": 2.786215460362622e-07, + "loss": 0.4498692750930786, + "step": 6670 + }, + { + "epoch": 1.5378674351585016, + "grad_norm": 1.7804653724848765, + "learning_rate": 2.783576234505831e-07, + "loss": 0.5310048460960388, + "step": 6671 + }, + { + "epoch": 1.5380979827089338, + "grad_norm": 1.96879936627813, + "learning_rate": 2.780938057120339e-07, + "loss": 0.5187146663665771, + "step": 6672 + }, + { + "epoch": 1.5383285302593661, + "grad_norm": 1.5356727607037908, + "learning_rate": 2.778300928589451e-07, + "loss": 0.4811710715293884, + "step": 6673 + }, + { + "epoch": 1.5385590778097984, + "grad_norm": 1.666562643230331, + "learning_rate": 2.7756648492963096e-07, + "loss": 0.49236875772476196, + "step": 6674 + }, + { + "epoch": 1.5387896253602307, + "grad_norm": 1.3329310825608907, + "learning_rate": 2.7730298196239157e-07, + "loss": 0.36981940269470215, + "step": 6675 + }, + { + "epoch": 1.539020172910663, + "grad_norm": 1.597840023115702, + "learning_rate": 2.7703958399551054e-07, + "loss": 0.47676587104797363, + "step": 6676 + }, + { + "epoch": 1.5392507204610952, + "grad_norm": 1.9450827633385033, + "learning_rate": 2.767762910672574e-07, + "loss": 0.4283655285835266, + "step": 6677 + }, + { + "epoch": 1.5394812680115275, + "grad_norm": 1.546272242244294, + "learning_rate": 2.7651310321588573e-07, + "loss": 0.4599767327308655, + "step": 6678 + }, + { + "epoch": 1.5397118155619598, + "grad_norm": 1.5217395243560299, + "learning_rate": 2.7625002047963343e-07, + "loss": 0.44924429059028625, + "step": 6679 + }, + { + "epoch": 1.539942363112392, + "grad_norm": 1.435567998485271, + "learning_rate": 2.7598704289672423e-07, + "loss": 0.46689385175704956, + "step": 6680 + }, + { + "epoch": 1.5401729106628244, + "grad_norm": 1.7978574318643632, + "learning_rate": 2.7572417050536624e-07, + "loss": 0.41301921010017395, + "step": 6681 + }, + { + "epoch": 1.5404034582132566, + "grad_norm": 1.3658980896134711, + "learning_rate": 2.7546140334375145e-07, + "loss": 0.4051539897918701, + "step": 6682 + }, + { + "epoch": 1.540634005763689, + "grad_norm": 1.7841621413207764, + "learning_rate": 2.7519874145005784e-07, + "loss": 0.5061618089675903, + "step": 6683 + }, + { + "epoch": 1.5408645533141212, + "grad_norm": 1.8580808389371584, + "learning_rate": 2.7493618486244707e-07, + "loss": 0.5225817561149597, + "step": 6684 + }, + { + "epoch": 1.5410951008645535, + "grad_norm": 1.6859040287421543, + "learning_rate": 2.746737336190658e-07, + "loss": 0.4648950695991516, + "step": 6685 + }, + { + "epoch": 1.5413256484149855, + "grad_norm": 1.860078359778471, + "learning_rate": 2.744113877580457e-07, + "loss": 0.43647703528404236, + "step": 6686 + }, + { + "epoch": 1.5415561959654178, + "grad_norm": 1.4941624249640828, + "learning_rate": 2.741491473175027e-07, + "loss": 0.521142840385437, + "step": 6687 + }, + { + "epoch": 1.54178674351585, + "grad_norm": 1.532927468541072, + "learning_rate": 2.73887012335538e-07, + "loss": 0.4534454941749573, + "step": 6688 + }, + { + "epoch": 1.5420172910662824, + "grad_norm": 1.4675733231304722, + "learning_rate": 2.736249828502364e-07, + "loss": 0.46668314933776855, + "step": 6689 + }, + { + "epoch": 1.5422478386167147, + "grad_norm": 1.5326573954944585, + "learning_rate": 2.7336305889966883e-07, + "loss": 0.4454139471054077, + "step": 6690 + }, + { + "epoch": 1.542478386167147, + "grad_norm": 1.7336061149384574, + "learning_rate": 2.7310124052188974e-07, + "loss": 0.5094617605209351, + "step": 6691 + }, + { + "epoch": 1.5427089337175792, + "grad_norm": 1.5785750525116626, + "learning_rate": 2.7283952775493837e-07, + "loss": 0.45893144607543945, + "step": 6692 + }, + { + "epoch": 1.5429394812680115, + "grad_norm": 1.7402929016413005, + "learning_rate": 2.72577920636839e-07, + "loss": 0.5504060983657837, + "step": 6693 + }, + { + "epoch": 1.5431700288184438, + "grad_norm": 1.6147246247988525, + "learning_rate": 2.72316419205601e-07, + "loss": 0.45066389441490173, + "step": 6694 + }, + { + "epoch": 1.543400576368876, + "grad_norm": 1.7322862041204292, + "learning_rate": 2.7205502349921693e-07, + "loss": 0.5064136385917664, + "step": 6695 + }, + { + "epoch": 1.5436311239193083, + "grad_norm": 1.552736874900683, + "learning_rate": 2.717937335556656e-07, + "loss": 0.4684542715549469, + "step": 6696 + }, + { + "epoch": 1.5438616714697406, + "grad_norm": 1.4915294954935223, + "learning_rate": 2.715325494129095e-07, + "loss": 0.5206316709518433, + "step": 6697 + }, + { + "epoch": 1.544092219020173, + "grad_norm": 1.4864642883898989, + "learning_rate": 2.7127147110889546e-07, + "loss": 0.4287317097187042, + "step": 6698 + }, + { + "epoch": 1.5443227665706052, + "grad_norm": 2.0644796323853956, + "learning_rate": 2.710104986815562e-07, + "loss": 0.5084264278411865, + "step": 6699 + }, + { + "epoch": 1.5445533141210375, + "grad_norm": 2.1288340424993213, + "learning_rate": 2.7074963216880763e-07, + "loss": 0.5043609142303467, + "step": 6700 + }, + { + "epoch": 1.5447838616714697, + "grad_norm": 1.59845836991449, + "learning_rate": 2.7048887160855126e-07, + "loss": 0.47983455657958984, + "step": 6701 + }, + { + "epoch": 1.545014409221902, + "grad_norm": 1.5964312039079533, + "learning_rate": 2.7022821703867324e-07, + "loss": 0.46194732189178467, + "step": 6702 + }, + { + "epoch": 1.5452449567723343, + "grad_norm": 1.6469503223160236, + "learning_rate": 2.699676684970437e-07, + "loss": 0.45597344636917114, + "step": 6703 + }, + { + "epoch": 1.5454755043227666, + "grad_norm": 1.6935530011692812, + "learning_rate": 2.697072260215174e-07, + "loss": 0.4783972501754761, + "step": 6704 + }, + { + "epoch": 1.5457060518731989, + "grad_norm": 1.380249835342775, + "learning_rate": 2.694468896499338e-07, + "loss": 0.38894015550613403, + "step": 6705 + }, + { + "epoch": 1.5459365994236312, + "grad_norm": 1.3839758510594902, + "learning_rate": 2.691866594201173e-07, + "loss": 0.4892553389072418, + "step": 6706 + }, + { + "epoch": 1.5461671469740634, + "grad_norm": 1.566000483112353, + "learning_rate": 2.689265353698771e-07, + "loss": 0.5352126359939575, + "step": 6707 + }, + { + "epoch": 1.5463976945244957, + "grad_norm": 1.5309860118301892, + "learning_rate": 2.6866651753700576e-07, + "loss": 0.4805898666381836, + "step": 6708 + }, + { + "epoch": 1.546628242074928, + "grad_norm": 1.5493878656820466, + "learning_rate": 2.684066059592818e-07, + "loss": 0.45238590240478516, + "step": 6709 + }, + { + "epoch": 1.5468587896253603, + "grad_norm": 1.8917787793535266, + "learning_rate": 2.6814680067446736e-07, + "loss": 0.34512025117874146, + "step": 6710 + }, + { + "epoch": 1.5470893371757926, + "grad_norm": 1.485168346068275, + "learning_rate": 2.6788710172030916e-07, + "loss": 0.5297055244445801, + "step": 6711 + }, + { + "epoch": 1.5473198847262248, + "grad_norm": 1.374095554873191, + "learning_rate": 2.6762750913453947e-07, + "loss": 0.4209028482437134, + "step": 6712 + }, + { + "epoch": 1.547550432276657, + "grad_norm": 1.7935969299479495, + "learning_rate": 2.673680229548736e-07, + "loss": 0.4791908264160156, + "step": 6713 + }, + { + "epoch": 1.5477809798270892, + "grad_norm": 1.90306719497615, + "learning_rate": 2.671086432190125e-07, + "loss": 0.4767991602420807, + "step": 6714 + }, + { + "epoch": 1.5480115273775215, + "grad_norm": 1.7090911972846048, + "learning_rate": 2.668493699646418e-07, + "loss": 0.4787396192550659, + "step": 6715 + }, + { + "epoch": 1.5482420749279537, + "grad_norm": 1.6003570038579618, + "learning_rate": 2.6659020322943084e-07, + "loss": 0.42476657032966614, + "step": 6716 + }, + { + "epoch": 1.548472622478386, + "grad_norm": 1.7618849900264748, + "learning_rate": 2.6633114305103357e-07, + "loss": 0.5486623048782349, + "step": 6717 + }, + { + "epoch": 1.5487031700288183, + "grad_norm": 1.7089266434509598, + "learning_rate": 2.6607218946708933e-07, + "loss": 0.4897763431072235, + "step": 6718 + }, + { + "epoch": 1.5489337175792506, + "grad_norm": 1.530185410258912, + "learning_rate": 2.6581334251522057e-07, + "loss": 0.5206410884857178, + "step": 6719 + }, + { + "epoch": 1.5491642651296829, + "grad_norm": 1.585267194715654, + "learning_rate": 2.6555460223303603e-07, + "loss": 0.5253853797912598, + "step": 6720 + }, + { + "epoch": 1.5493948126801151, + "grad_norm": 1.5285645352455675, + "learning_rate": 2.652959686581272e-07, + "loss": 0.4229533076286316, + "step": 6721 + }, + { + "epoch": 1.5496253602305474, + "grad_norm": 1.465857605111891, + "learning_rate": 2.650374418280714e-07, + "loss": 0.38957664370536804, + "step": 6722 + }, + { + "epoch": 1.5498559077809797, + "grad_norm": 1.7558356792496363, + "learning_rate": 2.6477902178042965e-07, + "loss": 0.49704110622406006, + "step": 6723 + }, + { + "epoch": 1.550086455331412, + "grad_norm": 1.5616300844187965, + "learning_rate": 2.6452070855274735e-07, + "loss": 0.4891868531703949, + "step": 6724 + }, + { + "epoch": 1.5503170028818443, + "grad_norm": 1.716364048095666, + "learning_rate": 2.6426250218255506e-07, + "loss": 0.4441138505935669, + "step": 6725 + }, + { + "epoch": 1.5505475504322765, + "grad_norm": 1.7565337255811777, + "learning_rate": 2.6400440270736776e-07, + "loss": 0.3671753704547882, + "step": 6726 + }, + { + "epoch": 1.5507780979827088, + "grad_norm": 1.6232570381426292, + "learning_rate": 2.6374641016468413e-07, + "loss": 0.46538716554641724, + "step": 6727 + }, + { + "epoch": 1.551008645533141, + "grad_norm": 1.3516189453331386, + "learning_rate": 2.6348852459198855e-07, + "loss": 0.45591798424720764, + "step": 6728 + }, + { + "epoch": 1.5512391930835734, + "grad_norm": 1.5308684864062825, + "learning_rate": 2.63230746026748e-07, + "loss": 0.438511461019516, + "step": 6729 + }, + { + "epoch": 1.5514697406340057, + "grad_norm": 1.8201832016490442, + "learning_rate": 2.629730745064156e-07, + "loss": 0.463054358959198, + "step": 6730 + }, + { + "epoch": 1.551700288184438, + "grad_norm": 1.5514099053161647, + "learning_rate": 2.6271551006842865e-07, + "loss": 0.4669750928878784, + "step": 6731 + }, + { + "epoch": 1.5519308357348702, + "grad_norm": 1.7191439883700805, + "learning_rate": 2.6245805275020783e-07, + "loss": 0.399213969707489, + "step": 6732 + }, + { + "epoch": 1.5521613832853025, + "grad_norm": 1.8508399260110915, + "learning_rate": 2.622007025891598e-07, + "loss": 0.47209489345550537, + "step": 6733 + }, + { + "epoch": 1.5523919308357348, + "grad_norm": 1.8455257948540806, + "learning_rate": 2.619434596226746e-07, + "loss": 0.4839526414871216, + "step": 6734 + }, + { + "epoch": 1.552622478386167, + "grad_norm": 1.6023080770873257, + "learning_rate": 2.616863238881266e-07, + "loss": 0.47192198038101196, + "step": 6735 + }, + { + "epoch": 1.5528530259365994, + "grad_norm": 1.8657326968382677, + "learning_rate": 2.614292954228754e-07, + "loss": 0.389009028673172, + "step": 6736 + }, + { + "epoch": 1.5530835734870316, + "grad_norm": 1.8766723343050034, + "learning_rate": 2.611723742642641e-07, + "loss": 0.47060155868530273, + "step": 6737 + }, + { + "epoch": 1.553314121037464, + "grad_norm": 1.4040566740202909, + "learning_rate": 2.6091556044962094e-07, + "loss": 0.4835396409034729, + "step": 6738 + }, + { + "epoch": 1.5535446685878962, + "grad_norm": 1.6935927742553605, + "learning_rate": 2.6065885401625867e-07, + "loss": 0.5195285677909851, + "step": 6739 + }, + { + "epoch": 1.5537752161383285, + "grad_norm": 2.006654013641868, + "learning_rate": 2.6040225500147363e-07, + "loss": 0.48561543226242065, + "step": 6740 + }, + { + "epoch": 1.5540057636887608, + "grad_norm": 1.6144400066706488, + "learning_rate": 2.601457634425471e-07, + "loss": 0.4577465355396271, + "step": 6741 + }, + { + "epoch": 1.554236311239193, + "grad_norm": 1.364151207746382, + "learning_rate": 2.5988937937674427e-07, + "loss": 0.3737722635269165, + "step": 6742 + }, + { + "epoch": 1.5544668587896253, + "grad_norm": 1.815041454227608, + "learning_rate": 2.5963310284131545e-07, + "loss": 0.4451100528240204, + "step": 6743 + }, + { + "epoch": 1.5546974063400576, + "grad_norm": 1.4908238286019466, + "learning_rate": 2.5937693387349513e-07, + "loss": 0.3610233664512634, + "step": 6744 + }, + { + "epoch": 1.5549279538904899, + "grad_norm": 1.5670821566769393, + "learning_rate": 2.591208725105015e-07, + "loss": 0.43974393606185913, + "step": 6745 + }, + { + "epoch": 1.5551585014409222, + "grad_norm": 2.0006014338762346, + "learning_rate": 2.588649187895382e-07, + "loss": 0.5391696095466614, + "step": 6746 + }, + { + "epoch": 1.5553890489913544, + "grad_norm": 1.8335182104585823, + "learning_rate": 2.586090727477923e-07, + "loss": 0.49853283166885376, + "step": 6747 + }, + { + "epoch": 1.5556195965417867, + "grad_norm": 1.8206435100176481, + "learning_rate": 2.5835333442243524e-07, + "loss": 0.4586958885192871, + "step": 6748 + }, + { + "epoch": 1.555850144092219, + "grad_norm": 2.0804734095027477, + "learning_rate": 2.580977038506239e-07, + "loss": 0.4456654489040375, + "step": 6749 + }, + { + "epoch": 1.5560806916426513, + "grad_norm": 1.730782017536939, + "learning_rate": 2.5784218106949795e-07, + "loss": 0.39961719512939453, + "step": 6750 + }, + { + "epoch": 1.5563112391930836, + "grad_norm": 1.8389730814722773, + "learning_rate": 2.5758676611618257e-07, + "loss": 0.460401713848114, + "step": 6751 + }, + { + "epoch": 1.5565417867435158, + "grad_norm": 1.726236854490963, + "learning_rate": 2.5733145902778733e-07, + "loss": 0.4925374388694763, + "step": 6752 + }, + { + "epoch": 1.5567723342939481, + "grad_norm": 1.5901382869844622, + "learning_rate": 2.570762598414051e-07, + "loss": 0.42615199089050293, + "step": 6753 + }, + { + "epoch": 1.5570028818443804, + "grad_norm": 1.6475869590160572, + "learning_rate": 2.568211685941136e-07, + "loss": 0.47983551025390625, + "step": 6754 + }, + { + "epoch": 1.5572334293948127, + "grad_norm": 1.4379677339689119, + "learning_rate": 2.5656618532297547e-07, + "loss": 0.44416266679763794, + "step": 6755 + }, + { + "epoch": 1.557463976945245, + "grad_norm": 1.8167037891496292, + "learning_rate": 2.563113100650366e-07, + "loss": 0.47325876355171204, + "step": 6756 + }, + { + "epoch": 1.5576945244956772, + "grad_norm": 1.4587384999344424, + "learning_rate": 2.5605654285732814e-07, + "loss": 0.4115426242351532, + "step": 6757 + }, + { + "epoch": 1.5579250720461095, + "grad_norm": 1.7770718135646975, + "learning_rate": 2.558018837368646e-07, + "loss": 0.46395576000213623, + "step": 6758 + }, + { + "epoch": 1.5581556195965418, + "grad_norm": 1.3519334824438696, + "learning_rate": 2.5554733274064597e-07, + "loss": 0.41246697306632996, + "step": 6759 + }, + { + "epoch": 1.558386167146974, + "grad_norm": 1.7600589242580773, + "learning_rate": 2.5529288990565557e-07, + "loss": 0.5139991044998169, + "step": 6760 + }, + { + "epoch": 1.5586167146974064, + "grad_norm": 1.6093969605391563, + "learning_rate": 2.5503855526886084e-07, + "loss": 0.4346150755882263, + "step": 6761 + }, + { + "epoch": 1.5588472622478386, + "grad_norm": 1.6307147869055905, + "learning_rate": 2.5478432886721434e-07, + "loss": 0.44862866401672363, + "step": 6762 + }, + { + "epoch": 1.559077809798271, + "grad_norm": 1.422425939609917, + "learning_rate": 2.545302107376529e-07, + "loss": 0.41274869441986084, + "step": 6763 + }, + { + "epoch": 1.5593083573487032, + "grad_norm": 1.7395975622996638, + "learning_rate": 2.5427620091709645e-07, + "loss": 0.5566954612731934, + "step": 6764 + }, + { + "epoch": 1.5595389048991355, + "grad_norm": 2.028516666606984, + "learning_rate": 2.540222994424508e-07, + "loss": 0.5888369083404541, + "step": 6765 + }, + { + "epoch": 1.5597694524495678, + "grad_norm": 1.7585045308287393, + "learning_rate": 2.537685063506048e-07, + "loss": 0.4688549339771271, + "step": 6766 + }, + { + "epoch": 1.56, + "grad_norm": 1.9492383318424087, + "learning_rate": 2.5351482167843153e-07, + "loss": 0.4225703477859497, + "step": 6767 + }, + { + "epoch": 1.5602305475504323, + "grad_norm": 1.6959394085598527, + "learning_rate": 2.5326124546278947e-07, + "loss": 0.47270894050598145, + "step": 6768 + }, + { + "epoch": 1.5604610951008646, + "grad_norm": 1.8877771671569992, + "learning_rate": 2.530077777405201e-07, + "loss": 0.5117524266242981, + "step": 6769 + }, + { + "epoch": 1.560691642651297, + "grad_norm": 1.452301579537498, + "learning_rate": 2.5275441854844967e-07, + "loss": 0.4925321638584137, + "step": 6770 + }, + { + "epoch": 1.5609221902017292, + "grad_norm": 1.5144358135947686, + "learning_rate": 2.5250116792338917e-07, + "loss": 0.3935622572898865, + "step": 6771 + }, + { + "epoch": 1.5611527377521615, + "grad_norm": 1.6339047025884095, + "learning_rate": 2.522480259021329e-07, + "loss": 0.43235844373703003, + "step": 6772 + }, + { + "epoch": 1.5613832853025937, + "grad_norm": 1.8401079537023814, + "learning_rate": 2.519949925214597e-07, + "loss": 0.578273594379425, + "step": 6773 + }, + { + "epoch": 1.561613832853026, + "grad_norm": 1.7857356240818296, + "learning_rate": 2.5174206781813243e-07, + "loss": 0.4800739288330078, + "step": 6774 + }, + { + "epoch": 1.5618443804034583, + "grad_norm": 1.4278838200119695, + "learning_rate": 2.514892518288988e-07, + "loss": 0.4485281705856323, + "step": 6775 + }, + { + "epoch": 1.5620749279538906, + "grad_norm": 1.7437958447826178, + "learning_rate": 2.5123654459049057e-07, + "loss": 0.5072147846221924, + "step": 6776 + }, + { + "epoch": 1.5623054755043229, + "grad_norm": 1.8975658070712482, + "learning_rate": 2.509839461396229e-07, + "loss": 0.4516841471195221, + "step": 6777 + }, + { + "epoch": 1.5625360230547551, + "grad_norm": 1.7740833513311727, + "learning_rate": 2.507314565129962e-07, + "loss": 0.5076330900192261, + "step": 6778 + }, + { + "epoch": 1.5627665706051874, + "grad_norm": 1.6281423676668454, + "learning_rate": 2.5047907574729443e-07, + "loss": 0.5011087656021118, + "step": 6779 + }, + { + "epoch": 1.5629971181556197, + "grad_norm": 1.4979953019445997, + "learning_rate": 2.502268038791856e-07, + "loss": 0.3949703574180603, + "step": 6780 + }, + { + "epoch": 1.563227665706052, + "grad_norm": 1.624938698617509, + "learning_rate": 2.499746409453227e-07, + "loss": 0.39678525924682617, + "step": 6781 + }, + { + "epoch": 1.5634582132564843, + "grad_norm": 1.6812991120579335, + "learning_rate": 2.4972258698234185e-07, + "loss": 0.44274720549583435, + "step": 6782 + }, + { + "epoch": 1.5636887608069165, + "grad_norm": 1.4348522477076457, + "learning_rate": 2.494706420268641e-07, + "loss": 0.5082550048828125, + "step": 6783 + }, + { + "epoch": 1.5639193083573488, + "grad_norm": 1.3908999166270042, + "learning_rate": 2.492188061154946e-07, + "loss": 0.44391587376594543, + "step": 6784 + }, + { + "epoch": 1.564149855907781, + "grad_norm": 1.7005169151282067, + "learning_rate": 2.4896707928482254e-07, + "loss": 0.471325159072876, + "step": 6785 + }, + { + "epoch": 1.5643804034582134, + "grad_norm": 1.6719246286514318, + "learning_rate": 2.48715461571421e-07, + "loss": 0.4599985182285309, + "step": 6786 + }, + { + "epoch": 1.5646109510086457, + "grad_norm": 1.7250479291501297, + "learning_rate": 2.4846395301184706e-07, + "loss": 0.5035426616668701, + "step": 6787 + }, + { + "epoch": 1.564841498559078, + "grad_norm": 1.752363888581634, + "learning_rate": 2.482125536426427e-07, + "loss": 0.4742690920829773, + "step": 6788 + }, + { + "epoch": 1.5650720461095102, + "grad_norm": 1.387253350450147, + "learning_rate": 2.47961263500334e-07, + "loss": 0.4548560380935669, + "step": 6789 + }, + { + "epoch": 1.5653025936599425, + "grad_norm": 1.7361892806040864, + "learning_rate": 2.4771008262143003e-07, + "loss": 0.42407310009002686, + "step": 6790 + }, + { + "epoch": 1.5655331412103748, + "grad_norm": 1.7554110781883456, + "learning_rate": 2.4745901104242537e-07, + "loss": 0.5894111394882202, + "step": 6791 + }, + { + "epoch": 1.565763688760807, + "grad_norm": 1.5368093751158929, + "learning_rate": 2.4720804879979796e-07, + "loss": 0.4132724702358246, + "step": 6792 + }, + { + "epoch": 1.5659942363112394, + "grad_norm": 1.682979728804869, + "learning_rate": 2.4695719593000964e-07, + "loss": 0.5049244165420532, + "step": 6793 + }, + { + "epoch": 1.5662247838616716, + "grad_norm": 2.3775113241805315, + "learning_rate": 2.4670645246950725e-07, + "loss": 0.5342719554901123, + "step": 6794 + }, + { + "epoch": 1.566455331412104, + "grad_norm": 1.7576636684029612, + "learning_rate": 2.4645581845472077e-07, + "loss": 0.5695189237594604, + "step": 6795 + }, + { + "epoch": 1.566685878962536, + "grad_norm": 1.589967543943453, + "learning_rate": 2.4620529392206477e-07, + "loss": 0.44183048605918884, + "step": 6796 + }, + { + "epoch": 1.5669164265129683, + "grad_norm": 1.6197990017545385, + "learning_rate": 2.4595487890793834e-07, + "loss": 0.46745526790618896, + "step": 6797 + }, + { + "epoch": 1.5671469740634005, + "grad_norm": 1.8670350554799435, + "learning_rate": 2.4570457344872386e-07, + "loss": 0.4594680666923523, + "step": 6798 + }, + { + "epoch": 1.5673775216138328, + "grad_norm": 1.9718361980202608, + "learning_rate": 2.454543775807877e-07, + "loss": 0.48752421140670776, + "step": 6799 + }, + { + "epoch": 1.567608069164265, + "grad_norm": 1.4241399512515274, + "learning_rate": 2.4520429134048146e-07, + "loss": 0.4049089252948761, + "step": 6800 + }, + { + "epoch": 1.5678386167146974, + "grad_norm": 1.593383566073588, + "learning_rate": 2.449543147641394e-07, + "loss": 0.5329450368881226, + "step": 6801 + }, + { + "epoch": 1.5680691642651297, + "grad_norm": 1.6931578369576277, + "learning_rate": 2.4470444788808106e-07, + "loss": 0.48651859164237976, + "step": 6802 + }, + { + "epoch": 1.568299711815562, + "grad_norm": 1.5393896989762188, + "learning_rate": 2.44454690748609e-07, + "loss": 0.44224095344543457, + "step": 6803 + }, + { + "epoch": 1.5685302593659942, + "grad_norm": 1.5599638316816633, + "learning_rate": 2.4420504338201096e-07, + "loss": 0.4946790337562561, + "step": 6804 + }, + { + "epoch": 1.5687608069164265, + "grad_norm": 1.4168062301601407, + "learning_rate": 2.439555058245577e-07, + "loss": 0.41671523451805115, + "step": 6805 + }, + { + "epoch": 1.5689913544668588, + "grad_norm": 1.560840380576806, + "learning_rate": 2.437060781125041e-07, + "loss": 0.4457974433898926, + "step": 6806 + }, + { + "epoch": 1.569221902017291, + "grad_norm": 1.7545615802633734, + "learning_rate": 2.4345676028208985e-07, + "loss": 0.48806965351104736, + "step": 6807 + }, + { + "epoch": 1.5694524495677233, + "grad_norm": 1.3837750693537794, + "learning_rate": 2.432075523695385e-07, + "loss": 0.4820772409439087, + "step": 6808 + }, + { + "epoch": 1.5696829971181556, + "grad_norm": 1.4951609195898863, + "learning_rate": 2.429584544110567e-07, + "loss": 0.4809981882572174, + "step": 6809 + }, + { + "epoch": 1.569913544668588, + "grad_norm": 1.766097670042948, + "learning_rate": 2.427094664428364e-07, + "loss": 0.45282435417175293, + "step": 6810 + }, + { + "epoch": 1.5701440922190202, + "grad_norm": 1.8589225856452647, + "learning_rate": 2.424605885010527e-07, + "loss": 0.5170393586158752, + "step": 6811 + }, + { + "epoch": 1.5703746397694525, + "grad_norm": 1.4134732880067309, + "learning_rate": 2.422118206218646e-07, + "loss": 0.388106107711792, + "step": 6812 + }, + { + "epoch": 1.5706051873198847, + "grad_norm": 1.63584303547554, + "learning_rate": 2.419631628414163e-07, + "loss": 0.44863706827163696, + "step": 6813 + }, + { + "epoch": 1.570835734870317, + "grad_norm": 1.4107779406078444, + "learning_rate": 2.4171461519583425e-07, + "loss": 0.4372791051864624, + "step": 6814 + }, + { + "epoch": 1.5710662824207493, + "grad_norm": 1.29268864966395, + "learning_rate": 2.4146617772123046e-07, + "loss": 0.4813999533653259, + "step": 6815 + }, + { + "epoch": 1.5712968299711816, + "grad_norm": 1.5420414623437797, + "learning_rate": 2.4121785045370046e-07, + "loss": 0.4367016553878784, + "step": 6816 + }, + { + "epoch": 1.5715273775216139, + "grad_norm": 1.6434220509981157, + "learning_rate": 2.409696334293233e-07, + "loss": 0.42757725715637207, + "step": 6817 + }, + { + "epoch": 1.5717579250720461, + "grad_norm": 1.6577819897791792, + "learning_rate": 2.4072152668416236e-07, + "loss": 0.4397827386856079, + "step": 6818 + }, + { + "epoch": 1.5719884726224784, + "grad_norm": 1.5756653213645186, + "learning_rate": 2.4047353025426476e-07, + "loss": 0.45200103521347046, + "step": 6819 + }, + { + "epoch": 1.5722190201729107, + "grad_norm": 1.6542457562292414, + "learning_rate": 2.4022564417566193e-07, + "loss": 0.45443668961524963, + "step": 6820 + }, + { + "epoch": 1.572449567723343, + "grad_norm": 1.6997348607365212, + "learning_rate": 2.3997786848436965e-07, + "loss": 0.516747236251831, + "step": 6821 + }, + { + "epoch": 1.5726801152737753, + "grad_norm": 1.6365599277185443, + "learning_rate": 2.3973020321638625e-07, + "loss": 0.44745802879333496, + "step": 6822 + }, + { + "epoch": 1.5729106628242073, + "grad_norm": 2.158839604427667, + "learning_rate": 2.3948264840769585e-07, + "loss": 0.5438615679740906, + "step": 6823 + }, + { + "epoch": 1.5731412103746396, + "grad_norm": 1.643481401912203, + "learning_rate": 2.39235204094265e-07, + "loss": 0.4899371862411499, + "step": 6824 + }, + { + "epoch": 1.573371757925072, + "grad_norm": 1.55622048106585, + "learning_rate": 2.389878703120447e-07, + "loss": 0.4395183324813843, + "step": 6825 + }, + { + "epoch": 1.5736023054755042, + "grad_norm": 1.4934042205985354, + "learning_rate": 2.387406470969704e-07, + "loss": 0.479698121547699, + "step": 6826 + }, + { + "epoch": 1.5738328530259365, + "grad_norm": 1.9307758017589527, + "learning_rate": 2.384935344849607e-07, + "loss": 0.6000853776931763, + "step": 6827 + }, + { + "epoch": 1.5740634005763687, + "grad_norm": 1.3692449284024986, + "learning_rate": 2.382465325119185e-07, + "loss": 0.41117769479751587, + "step": 6828 + }, + { + "epoch": 1.574293948126801, + "grad_norm": 1.9113886128584991, + "learning_rate": 2.3799964121373117e-07, + "loss": 0.5113134980201721, + "step": 6829 + }, + { + "epoch": 1.5745244956772333, + "grad_norm": 1.4251842959330887, + "learning_rate": 2.3775286062626897e-07, + "loss": 0.49158281087875366, + "step": 6830 + }, + { + "epoch": 1.5747550432276656, + "grad_norm": 1.780662153833107, + "learning_rate": 2.375061907853866e-07, + "loss": 0.5564873814582825, + "step": 6831 + }, + { + "epoch": 1.5749855907780979, + "grad_norm": 1.9181105692541727, + "learning_rate": 2.3725963172692244e-07, + "loss": 0.4885261058807373, + "step": 6832 + }, + { + "epoch": 1.5752161383285301, + "grad_norm": 1.5805066300147617, + "learning_rate": 2.3701318348669908e-07, + "loss": 0.4490503668785095, + "step": 6833 + }, + { + "epoch": 1.5754466858789624, + "grad_norm": 1.4499272850949159, + "learning_rate": 2.3676684610052334e-07, + "loss": 0.45185232162475586, + "step": 6834 + }, + { + "epoch": 1.5756772334293947, + "grad_norm": 1.995751623566264, + "learning_rate": 2.365206196041848e-07, + "loss": 0.5666052103042603, + "step": 6835 + }, + { + "epoch": 1.575907780979827, + "grad_norm": 1.5643028039542144, + "learning_rate": 2.3627450403345816e-07, + "loss": 0.45692935585975647, + "step": 6836 + }, + { + "epoch": 1.5761383285302593, + "grad_norm": 1.682140694304972, + "learning_rate": 2.360284994241012e-07, + "loss": 0.44901716709136963, + "step": 6837 + }, + { + "epoch": 1.5763688760806915, + "grad_norm": 1.6290031908081577, + "learning_rate": 2.357826058118555e-07, + "loss": 0.5051450729370117, + "step": 6838 + }, + { + "epoch": 1.5765994236311238, + "grad_norm": 1.8396132592321723, + "learning_rate": 2.3553682323244762e-07, + "loss": 0.43471968173980713, + "step": 6839 + }, + { + "epoch": 1.576829971181556, + "grad_norm": 1.417260930252744, + "learning_rate": 2.352911517215863e-07, + "loss": 0.42540234327316284, + "step": 6840 + }, + { + "epoch": 1.5770605187319884, + "grad_norm": 1.695717710695922, + "learning_rate": 2.350455913149657e-07, + "loss": 0.5011035799980164, + "step": 6841 + }, + { + "epoch": 1.5772910662824207, + "grad_norm": 1.3387049067961254, + "learning_rate": 2.3480014204826348e-07, + "loss": 0.4265633225440979, + "step": 6842 + }, + { + "epoch": 1.577521613832853, + "grad_norm": 1.8058533825125813, + "learning_rate": 2.345548039571399e-07, + "loss": 0.4330548346042633, + "step": 6843 + }, + { + "epoch": 1.5777521613832852, + "grad_norm": 1.5588036812303396, + "learning_rate": 2.3430957707724052e-07, + "loss": 0.453426718711853, + "step": 6844 + }, + { + "epoch": 1.5779827089337175, + "grad_norm": 1.5822406339462496, + "learning_rate": 2.3406446144419446e-07, + "loss": 0.4875522255897522, + "step": 6845 + }, + { + "epoch": 1.5782132564841498, + "grad_norm": 1.8097072716346074, + "learning_rate": 2.3381945709361416e-07, + "loss": 0.41219890117645264, + "step": 6846 + }, + { + "epoch": 1.578443804034582, + "grad_norm": 1.7446551101419503, + "learning_rate": 2.3357456406109644e-07, + "loss": 0.4699985980987549, + "step": 6847 + }, + { + "epoch": 1.5786743515850143, + "grad_norm": 1.7989203387061783, + "learning_rate": 2.3332978238222178e-07, + "loss": 0.4073752164840698, + "step": 6848 + }, + { + "epoch": 1.5789048991354466, + "grad_norm": 1.7603722349498907, + "learning_rate": 2.3308511209255376e-07, + "loss": 0.5328919887542725, + "step": 6849 + }, + { + "epoch": 1.579135446685879, + "grad_norm": 1.9514955832292185, + "learning_rate": 2.328405532276413e-07, + "loss": 0.538253128528595, + "step": 6850 + }, + { + "epoch": 1.5793659942363112, + "grad_norm": 1.5632567756625935, + "learning_rate": 2.3259610582301558e-07, + "loss": 0.44995903968811035, + "step": 6851 + }, + { + "epoch": 1.5795965417867435, + "grad_norm": 1.7120874755790536, + "learning_rate": 2.3235176991419247e-07, + "loss": 0.4335440397262573, + "step": 6852 + }, + { + "epoch": 1.5798270893371757, + "grad_norm": 1.6187630739158092, + "learning_rate": 2.321075455366719e-07, + "loss": 0.4642670154571533, + "step": 6853 + }, + { + "epoch": 1.580057636887608, + "grad_norm": 1.8439898983263738, + "learning_rate": 2.3186343272593656e-07, + "loss": 0.4488638639450073, + "step": 6854 + }, + { + "epoch": 1.5802881844380403, + "grad_norm": 1.585715016192241, + "learning_rate": 2.3161943151745378e-07, + "loss": 0.48309770226478577, + "step": 6855 + }, + { + "epoch": 1.5805187319884726, + "grad_norm": 1.5603192455872934, + "learning_rate": 2.313755419466741e-07, + "loss": 0.44456416368484497, + "step": 6856 + }, + { + "epoch": 1.5807492795389049, + "grad_norm": 1.599130287546092, + "learning_rate": 2.3113176404903222e-07, + "loss": 0.46257686614990234, + "step": 6857 + }, + { + "epoch": 1.5809798270893372, + "grad_norm": 1.6327518009499344, + "learning_rate": 2.308880978599469e-07, + "loss": 0.3914128541946411, + "step": 6858 + }, + { + "epoch": 1.5812103746397694, + "grad_norm": 2.107969712331713, + "learning_rate": 2.3064454341481988e-07, + "loss": 0.508151113986969, + "step": 6859 + }, + { + "epoch": 1.5814409221902017, + "grad_norm": 1.5415023234803529, + "learning_rate": 2.304011007490374e-07, + "loss": 0.43072253465652466, + "step": 6860 + }, + { + "epoch": 1.581671469740634, + "grad_norm": 1.5469316288580008, + "learning_rate": 2.3015776989796909e-07, + "loss": 0.432369589805603, + "step": 6861 + }, + { + "epoch": 1.5819020172910663, + "grad_norm": 1.682956567076036, + "learning_rate": 2.299145508969681e-07, + "loss": 0.4282001554965973, + "step": 6862 + }, + { + "epoch": 1.5821325648414986, + "grad_norm": 1.8586742904701485, + "learning_rate": 2.2967144378137194e-07, + "loss": 0.5486190319061279, + "step": 6863 + }, + { + "epoch": 1.5823631123919308, + "grad_norm": 1.6360275945951068, + "learning_rate": 2.2942844858650122e-07, + "loss": 0.3602842092514038, + "step": 6864 + }, + { + "epoch": 1.5825936599423631, + "grad_norm": 1.8002022611312485, + "learning_rate": 2.2918556534766087e-07, + "loss": 0.39400649070739746, + "step": 6865 + }, + { + "epoch": 1.5828242074927954, + "grad_norm": 2.0187438455147397, + "learning_rate": 2.289427941001395e-07, + "loss": 0.5241566896438599, + "step": 6866 + }, + { + "epoch": 1.5830547550432277, + "grad_norm": 1.9477429004752067, + "learning_rate": 2.2870013487920902e-07, + "loss": 0.4443414807319641, + "step": 6867 + }, + { + "epoch": 1.58328530259366, + "grad_norm": 1.4339092748204783, + "learning_rate": 2.2845758772012523e-07, + "loss": 0.4845046401023865, + "step": 6868 + }, + { + "epoch": 1.5835158501440922, + "grad_norm": 1.4451826716980998, + "learning_rate": 2.2821515265812753e-07, + "loss": 0.4631197452545166, + "step": 6869 + }, + { + "epoch": 1.5837463976945245, + "grad_norm": 1.8130555774397925, + "learning_rate": 2.2797282972843935e-07, + "loss": 0.47642138600349426, + "step": 6870 + }, + { + "epoch": 1.5839769452449568, + "grad_norm": 1.5693735960928867, + "learning_rate": 2.2773061896626811e-07, + "loss": 0.48786088824272156, + "step": 6871 + }, + { + "epoch": 1.584207492795389, + "grad_norm": 1.656811061554273, + "learning_rate": 2.2748852040680378e-07, + "loss": 0.4556247591972351, + "step": 6872 + }, + { + "epoch": 1.5844380403458214, + "grad_norm": 1.604595617408912, + "learning_rate": 2.2724653408522155e-07, + "loss": 0.4918019771575928, + "step": 6873 + }, + { + "epoch": 1.5846685878962536, + "grad_norm": 1.2997559949157804, + "learning_rate": 2.2700466003667917e-07, + "loss": 0.4443207383155823, + "step": 6874 + }, + { + "epoch": 1.584899135446686, + "grad_norm": 1.593924905912374, + "learning_rate": 2.2676289829631802e-07, + "loss": 0.42983290553092957, + "step": 6875 + }, + { + "epoch": 1.5851296829971182, + "grad_norm": 1.5434495937585317, + "learning_rate": 2.2652124889926417e-07, + "loss": 0.414949893951416, + "step": 6876 + }, + { + "epoch": 1.5853602305475505, + "grad_norm": 1.521196276312085, + "learning_rate": 2.2627971188062622e-07, + "loss": 0.39912670850753784, + "step": 6877 + }, + { + "epoch": 1.5855907780979828, + "grad_norm": 1.3168327583899544, + "learning_rate": 2.2603828727549734e-07, + "loss": 0.4181273579597473, + "step": 6878 + }, + { + "epoch": 1.585821325648415, + "grad_norm": 1.7553292134606597, + "learning_rate": 2.2579697511895425e-07, + "loss": 0.5589914321899414, + "step": 6879 + }, + { + "epoch": 1.5860518731988473, + "grad_norm": 1.625959912645244, + "learning_rate": 2.2555577544605686e-07, + "loss": 0.4346531629562378, + "step": 6880 + }, + { + "epoch": 1.5862824207492796, + "grad_norm": 1.5868469446711888, + "learning_rate": 2.2531468829184852e-07, + "loss": 0.4413943290710449, + "step": 6881 + }, + { + "epoch": 1.586512968299712, + "grad_norm": 1.4600697465130843, + "learning_rate": 2.250737136913574e-07, + "loss": 0.45540279150009155, + "step": 6882 + }, + { + "epoch": 1.5867435158501442, + "grad_norm": 1.6137315194829938, + "learning_rate": 2.24832851679594e-07, + "loss": 0.4250563979148865, + "step": 6883 + }, + { + "epoch": 1.5869740634005765, + "grad_norm": 1.4798874934990478, + "learning_rate": 2.2459210229155356e-07, + "loss": 0.44773417711257935, + "step": 6884 + }, + { + "epoch": 1.5872046109510087, + "grad_norm": 1.4993813947174892, + "learning_rate": 2.2435146556221408e-07, + "loss": 0.4882596731185913, + "step": 6885 + }, + { + "epoch": 1.587435158501441, + "grad_norm": 1.5587737674283808, + "learning_rate": 2.2411094152653798e-07, + "loss": 0.4748955965042114, + "step": 6886 + }, + { + "epoch": 1.5876657060518733, + "grad_norm": 1.6227574167605214, + "learning_rate": 2.2387053021947065e-07, + "loss": 0.3629099130630493, + "step": 6887 + }, + { + "epoch": 1.5878962536023056, + "grad_norm": 1.6993060193268632, + "learning_rate": 2.236302316759411e-07, + "loss": 0.4068647623062134, + "step": 6888 + }, + { + "epoch": 1.5881268011527379, + "grad_norm": 1.4233201625460687, + "learning_rate": 2.2339004593086252e-07, + "loss": 0.44618022441864014, + "step": 6889 + }, + { + "epoch": 1.5883573487031701, + "grad_norm": 1.8263823863304054, + "learning_rate": 2.2314997301913153e-07, + "loss": 0.46568962931632996, + "step": 6890 + }, + { + "epoch": 1.5885878962536024, + "grad_norm": 1.8982767649260424, + "learning_rate": 2.2291001297562784e-07, + "loss": 0.5379288792610168, + "step": 6891 + }, + { + "epoch": 1.5888184438040347, + "grad_norm": 1.9640546549928781, + "learning_rate": 2.2267016583521558e-07, + "loss": 0.4629078805446625, + "step": 6892 + }, + { + "epoch": 1.589048991354467, + "grad_norm": 1.489161454120763, + "learning_rate": 2.2243043163274189e-07, + "loss": 0.4108186662197113, + "step": 6893 + }, + { + "epoch": 1.5892795389048993, + "grad_norm": 1.8632335546890277, + "learning_rate": 2.2219081040303734e-07, + "loss": 0.4663807153701782, + "step": 6894 + }, + { + "epoch": 1.5895100864553315, + "grad_norm": 1.8071280855278253, + "learning_rate": 2.2195130218091685e-07, + "loss": 0.42701542377471924, + "step": 6895 + }, + { + "epoch": 1.5897406340057638, + "grad_norm": 1.8417473897562753, + "learning_rate": 2.2171190700117804e-07, + "loss": 0.5210022926330566, + "step": 6896 + }, + { + "epoch": 1.589971181556196, + "grad_norm": 1.8057710310189852, + "learning_rate": 2.2147262489860275e-07, + "loss": 0.41991421580314636, + "step": 6897 + }, + { + "epoch": 1.5902017291066284, + "grad_norm": 1.7840543361838392, + "learning_rate": 2.212334559079564e-07, + "loss": 0.46946293115615845, + "step": 6898 + }, + { + "epoch": 1.5904322766570607, + "grad_norm": 1.7012515976823714, + "learning_rate": 2.2099440006398772e-07, + "loss": 0.35496097803115845, + "step": 6899 + }, + { + "epoch": 1.590662824207493, + "grad_norm": 1.518063979509112, + "learning_rate": 2.2075545740142875e-07, + "loss": 0.4571149945259094, + "step": 6900 + }, + { + "epoch": 1.5908933717579252, + "grad_norm": 1.4946182440732432, + "learning_rate": 2.2051662795499525e-07, + "loss": 0.4564734399318695, + "step": 6901 + }, + { + "epoch": 1.5911239193083575, + "grad_norm": 1.4348743515049034, + "learning_rate": 2.2027791175938693e-07, + "loss": 0.4557456076145172, + "step": 6902 + }, + { + "epoch": 1.5913544668587898, + "grad_norm": 1.4908849547027518, + "learning_rate": 2.2003930884928702e-07, + "loss": 0.4800085723400116, + "step": 6903 + }, + { + "epoch": 1.591585014409222, + "grad_norm": 1.5940599632244288, + "learning_rate": 2.1980081925936144e-07, + "loss": 0.45482996106147766, + "step": 6904 + }, + { + "epoch": 1.5918155619596541, + "grad_norm": 1.7435440666868574, + "learning_rate": 2.1956244302426097e-07, + "loss": 0.478973925113678, + "step": 6905 + }, + { + "epoch": 1.5920461095100864, + "grad_norm": 1.3952796040058404, + "learning_rate": 2.1932418017861863e-07, + "loss": 0.4436086416244507, + "step": 6906 + }, + { + "epoch": 1.5922766570605187, + "grad_norm": 1.7126522646300093, + "learning_rate": 2.1908603075705156e-07, + "loss": 0.4582422375679016, + "step": 6907 + }, + { + "epoch": 1.592507204610951, + "grad_norm": 1.630225683771869, + "learning_rate": 2.188479947941607e-07, + "loss": 0.5626469850540161, + "step": 6908 + }, + { + "epoch": 1.5927377521613832, + "grad_norm": 1.6154153830226072, + "learning_rate": 2.186100723245299e-07, + "loss": 0.49945351481437683, + "step": 6909 + }, + { + "epoch": 1.5929682997118155, + "grad_norm": 1.317835601373723, + "learning_rate": 2.1837226338272685e-07, + "loss": 0.4216611683368683, + "step": 6910 + }, + { + "epoch": 1.5931988472622478, + "grad_norm": 1.6644970218843584, + "learning_rate": 2.181345680033031e-07, + "loss": 0.461073100566864, + "step": 6911 + }, + { + "epoch": 1.59342939481268, + "grad_norm": 1.656443059857395, + "learning_rate": 2.178969862207931e-07, + "loss": 0.5296883583068848, + "step": 6912 + }, + { + "epoch": 1.5936599423631124, + "grad_norm": 1.5254712888429038, + "learning_rate": 2.1765951806971484e-07, + "loss": 0.4975186586380005, + "step": 6913 + }, + { + "epoch": 1.5938904899135447, + "grad_norm": 1.6364108447848835, + "learning_rate": 2.174221635845699e-07, + "loss": 0.4512255787849426, + "step": 6914 + }, + { + "epoch": 1.594121037463977, + "grad_norm": 1.6579860313320571, + "learning_rate": 2.1718492279984358e-07, + "loss": 0.5384291410446167, + "step": 6915 + }, + { + "epoch": 1.5943515850144092, + "grad_norm": 1.6048868780122425, + "learning_rate": 2.1694779575000476e-07, + "loss": 0.5140515565872192, + "step": 6916 + }, + { + "epoch": 1.5945821325648415, + "grad_norm": 1.6618051236543094, + "learning_rate": 2.1671078246950503e-07, + "loss": 0.3933897018432617, + "step": 6917 + }, + { + "epoch": 1.5948126801152738, + "grad_norm": 1.6798554271750792, + "learning_rate": 2.1647388299278046e-07, + "loss": 0.45260024070739746, + "step": 6918 + }, + { + "epoch": 1.595043227665706, + "grad_norm": 1.6622533875894812, + "learning_rate": 2.162370973542499e-07, + "loss": 0.46932220458984375, + "step": 6919 + }, + { + "epoch": 1.5952737752161383, + "grad_norm": 1.7467607595951578, + "learning_rate": 2.1600042558831545e-07, + "loss": 0.46241551637649536, + "step": 6920 + }, + { + "epoch": 1.5955043227665706, + "grad_norm": 1.8116389882531025, + "learning_rate": 2.1576386772936363e-07, + "loss": 0.5214533805847168, + "step": 6921 + }, + { + "epoch": 1.595734870317003, + "grad_norm": 1.8713209887402171, + "learning_rate": 2.1552742381176326e-07, + "loss": 0.49391108751296997, + "step": 6922 + }, + { + "epoch": 1.5959654178674352, + "grad_norm": 1.4096867093895666, + "learning_rate": 2.1529109386986754e-07, + "loss": 0.4527132511138916, + "step": 6923 + }, + { + "epoch": 1.5961959654178675, + "grad_norm": 1.463280102232623, + "learning_rate": 2.1505487793801301e-07, + "loss": 0.43627679347991943, + "step": 6924 + }, + { + "epoch": 1.5964265129682997, + "grad_norm": 1.5759299558725761, + "learning_rate": 2.1481877605051913e-07, + "loss": 0.5021758079528809, + "step": 6925 + }, + { + "epoch": 1.596657060518732, + "grad_norm": 1.3836341256873759, + "learning_rate": 2.1458278824168874e-07, + "loss": 0.4158906936645508, + "step": 6926 + }, + { + "epoch": 1.5968876080691643, + "grad_norm": 1.5708743524166162, + "learning_rate": 2.1434691454580888e-07, + "loss": 0.45191070437431335, + "step": 6927 + }, + { + "epoch": 1.5971181556195966, + "grad_norm": 1.700284121200831, + "learning_rate": 2.1411115499714916e-07, + "loss": 0.4693247973918915, + "step": 6928 + }, + { + "epoch": 1.5973487031700289, + "grad_norm": 2.1682335280453526, + "learning_rate": 2.1387550962996336e-07, + "loss": 0.3784172832965851, + "step": 6929 + }, + { + "epoch": 1.5975792507204611, + "grad_norm": 1.8489077076485807, + "learning_rate": 2.136399784784879e-07, + "loss": 0.45491930842399597, + "step": 6930 + }, + { + "epoch": 1.5978097982708934, + "grad_norm": 1.741517442028974, + "learning_rate": 2.1340456157694354e-07, + "loss": 0.4505925476551056, + "step": 6931 + }, + { + "epoch": 1.5980403458213257, + "grad_norm": 1.6067304492037071, + "learning_rate": 2.1316925895953364e-07, + "loss": 0.45480477809906006, + "step": 6932 + }, + { + "epoch": 1.5982708933717578, + "grad_norm": 1.7828518645464755, + "learning_rate": 2.1293407066044478e-07, + "loss": 0.4488682746887207, + "step": 6933 + }, + { + "epoch": 1.59850144092219, + "grad_norm": 2.009780498310941, + "learning_rate": 2.1269899671384785e-07, + "loss": 0.4563372731208801, + "step": 6934 + }, + { + "epoch": 1.5987319884726223, + "grad_norm": 1.8158550451175957, + "learning_rate": 2.1246403715389672e-07, + "loss": 0.49727576971054077, + "step": 6935 + }, + { + "epoch": 1.5989625360230546, + "grad_norm": 1.9371356570822131, + "learning_rate": 2.1222919201472823e-07, + "loss": 0.5009286403656006, + "step": 6936 + }, + { + "epoch": 1.5991930835734869, + "grad_norm": 1.5623560718411584, + "learning_rate": 2.1199446133046338e-07, + "loss": 0.3688209056854248, + "step": 6937 + }, + { + "epoch": 1.5994236311239192, + "grad_norm": 1.6119779247292187, + "learning_rate": 2.1175984513520584e-07, + "loss": 0.4590919613838196, + "step": 6938 + }, + { + "epoch": 1.5996541786743514, + "grad_norm": 1.8414452773127332, + "learning_rate": 2.1152534346304275e-07, + "loss": 0.5258738994598389, + "step": 6939 + }, + { + "epoch": 1.5998847262247837, + "grad_norm": 1.7277239463285377, + "learning_rate": 2.1129095634804505e-07, + "loss": 0.4137745797634125, + "step": 6940 + }, + { + "epoch": 1.600115273775216, + "grad_norm": 1.776208310064266, + "learning_rate": 2.1105668382426634e-07, + "loss": 0.38268011808395386, + "step": 6941 + }, + { + "epoch": 1.6003458213256483, + "grad_norm": 1.8866925869596325, + "learning_rate": 2.1082252592574423e-07, + "loss": 0.4915100932121277, + "step": 6942 + }, + { + "epoch": 1.6005763688760806, + "grad_norm": 1.643885549144145, + "learning_rate": 2.1058848268649986e-07, + "loss": 0.44428160786628723, + "step": 6943 + }, + { + "epoch": 1.6008069164265128, + "grad_norm": 1.947073483480518, + "learning_rate": 2.1035455414053682e-07, + "loss": 0.46833938360214233, + "step": 6944 + }, + { + "epoch": 1.6010374639769451, + "grad_norm": 1.6568134566322756, + "learning_rate": 2.1012074032184247e-07, + "loss": 0.47894763946533203, + "step": 6945 + }, + { + "epoch": 1.6012680115273774, + "grad_norm": 1.515406639156062, + "learning_rate": 2.0988704126438738e-07, + "loss": 0.3642117381095886, + "step": 6946 + }, + { + "epoch": 1.6014985590778097, + "grad_norm": 1.4026104553016838, + "learning_rate": 2.0965345700212578e-07, + "loss": 0.395542174577713, + "step": 6947 + }, + { + "epoch": 1.601729106628242, + "grad_norm": 1.8436231094001752, + "learning_rate": 2.0941998756899537e-07, + "loss": 0.47251084446907043, + "step": 6948 + }, + { + "epoch": 1.6019596541786743, + "grad_norm": 2.2260940930238693, + "learning_rate": 2.0918663299891625e-07, + "loss": 0.515550971031189, + "step": 6949 + }, + { + "epoch": 1.6021902017291065, + "grad_norm": 1.7608353830332475, + "learning_rate": 2.0895339332579299e-07, + "loss": 0.5145356059074402, + "step": 6950 + }, + { + "epoch": 1.6024207492795388, + "grad_norm": 1.4657228610920408, + "learning_rate": 2.0872026858351255e-07, + "loss": 0.4490816593170166, + "step": 6951 + }, + { + "epoch": 1.602651296829971, + "grad_norm": 1.8585570966425249, + "learning_rate": 2.084872588059453e-07, + "loss": 0.6396125555038452, + "step": 6952 + }, + { + "epoch": 1.6028818443804034, + "grad_norm": 1.677774039877492, + "learning_rate": 2.0825436402694574e-07, + "loss": 0.4160453677177429, + "step": 6953 + }, + { + "epoch": 1.6031123919308357, + "grad_norm": 1.732479091236752, + "learning_rate": 2.0802158428035034e-07, + "loss": 0.455702543258667, + "step": 6954 + }, + { + "epoch": 1.603342939481268, + "grad_norm": 1.6973414598129097, + "learning_rate": 2.0778891959998002e-07, + "loss": 0.47815048694610596, + "step": 6955 + }, + { + "epoch": 1.6035734870317002, + "grad_norm": 1.5949839265658996, + "learning_rate": 2.0755637001963878e-07, + "loss": 0.5771400928497314, + "step": 6956 + }, + { + "epoch": 1.6038040345821325, + "grad_norm": 1.5561630257878414, + "learning_rate": 2.0732393557311323e-07, + "loss": 0.4287475347518921, + "step": 6957 + }, + { + "epoch": 1.6040345821325648, + "grad_norm": 1.6827470461890062, + "learning_rate": 2.0709161629417382e-07, + "loss": 0.44147899746894836, + "step": 6958 + }, + { + "epoch": 1.604265129682997, + "grad_norm": 1.5690305134788955, + "learning_rate": 2.0685941221657388e-07, + "loss": 0.5066714286804199, + "step": 6959 + }, + { + "epoch": 1.6044956772334293, + "grad_norm": 1.4619488956650715, + "learning_rate": 2.0662732337405054e-07, + "loss": 0.47425931692123413, + "step": 6960 + }, + { + "epoch": 1.6047262247838616, + "grad_norm": 2.006019965656153, + "learning_rate": 2.063953498003239e-07, + "loss": 0.4885402023792267, + "step": 6961 + }, + { + "epoch": 1.604956772334294, + "grad_norm": 1.6398180184758835, + "learning_rate": 2.061634915290974e-07, + "loss": 0.4193480908870697, + "step": 6962 + }, + { + "epoch": 1.6051873198847262, + "grad_norm": 1.9483442057634373, + "learning_rate": 2.0593174859405714e-07, + "loss": 0.4796232283115387, + "step": 6963 + }, + { + "epoch": 1.6054178674351585, + "grad_norm": 1.7132013221413995, + "learning_rate": 2.0570012102887356e-07, + "loss": 0.42399299144744873, + "step": 6964 + }, + { + "epoch": 1.6056484149855907, + "grad_norm": 1.7163715090870402, + "learning_rate": 2.054686088671992e-07, + "loss": 0.4394020140171051, + "step": 6965 + }, + { + "epoch": 1.605878962536023, + "grad_norm": 1.7765040569397281, + "learning_rate": 2.0523721214267087e-07, + "loss": 0.3917948007583618, + "step": 6966 + }, + { + "epoch": 1.6061095100864553, + "grad_norm": 1.454882745207512, + "learning_rate": 2.050059308889076e-07, + "loss": 0.39274662733078003, + "step": 6967 + }, + { + "epoch": 1.6063400576368876, + "grad_norm": 1.779744919694154, + "learning_rate": 2.0477476513951265e-07, + "loss": 0.4619693458080292, + "step": 6968 + }, + { + "epoch": 1.6065706051873199, + "grad_norm": 1.3167764775765771, + "learning_rate": 2.0454371492807177e-07, + "loss": 0.39066576957702637, + "step": 6969 + }, + { + "epoch": 1.6068011527377521, + "grad_norm": 1.6112784154759012, + "learning_rate": 2.0431278028815392e-07, + "loss": 0.5046144723892212, + "step": 6970 + }, + { + "epoch": 1.6070317002881844, + "grad_norm": 1.6893801716169201, + "learning_rate": 2.0408196125331167e-07, + "loss": 0.3814374506473541, + "step": 6971 + }, + { + "epoch": 1.6072622478386167, + "grad_norm": 1.542470838097386, + "learning_rate": 2.03851257857081e-07, + "loss": 0.49477407336235046, + "step": 6972 + }, + { + "epoch": 1.607492795389049, + "grad_norm": 1.7326796573740122, + "learning_rate": 2.0362067013298e-07, + "loss": 0.5003507137298584, + "step": 6973 + }, + { + "epoch": 1.6077233429394813, + "grad_norm": 1.9600144557835601, + "learning_rate": 2.0339019811451152e-07, + "loss": 0.3798295259475708, + "step": 6974 + }, + { + "epoch": 1.6079538904899136, + "grad_norm": 1.5009878438661703, + "learning_rate": 2.0315984183516012e-07, + "loss": 0.3913137912750244, + "step": 6975 + }, + { + "epoch": 1.6081844380403458, + "grad_norm": 1.7801044809151643, + "learning_rate": 2.029296013283942e-07, + "loss": 0.40811246633529663, + "step": 6976 + }, + { + "epoch": 1.6084149855907781, + "grad_norm": 1.7717326489240075, + "learning_rate": 2.0269947662766562e-07, + "loss": 0.5058863162994385, + "step": 6977 + }, + { + "epoch": 1.6086455331412104, + "grad_norm": 1.826456443587998, + "learning_rate": 2.024694677664087e-07, + "loss": 0.45775991678237915, + "step": 6978 + }, + { + "epoch": 1.6088760806916427, + "grad_norm": 1.3868260218878463, + "learning_rate": 2.0223957477804164e-07, + "loss": 0.39464837312698364, + "step": 6979 + }, + { + "epoch": 1.609106628242075, + "grad_norm": 1.557694777666343, + "learning_rate": 2.020097976959656e-07, + "loss": 0.44453293085098267, + "step": 6980 + }, + { + "epoch": 1.6093371757925072, + "grad_norm": 1.5365654039847554, + "learning_rate": 2.0178013655356463e-07, + "loss": 0.5004311203956604, + "step": 6981 + }, + { + "epoch": 1.6095677233429395, + "grad_norm": 1.6102093248541456, + "learning_rate": 2.0155059138420615e-07, + "loss": 0.4887525141239166, + "step": 6982 + }, + { + "epoch": 1.6097982708933718, + "grad_norm": 1.6742015253238756, + "learning_rate": 2.0132116222124028e-07, + "loss": 0.4750329256057739, + "step": 6983 + }, + { + "epoch": 1.610028818443804, + "grad_norm": 1.5520776036181012, + "learning_rate": 2.0109184909800115e-07, + "loss": 0.5468109846115112, + "step": 6984 + }, + { + "epoch": 1.6102593659942364, + "grad_norm": 1.8852776003246023, + "learning_rate": 2.0086265204780572e-07, + "loss": 0.4403616786003113, + "step": 6985 + }, + { + "epoch": 1.6104899135446686, + "grad_norm": 1.3958899933269038, + "learning_rate": 2.006335711039534e-07, + "loss": 0.4939368963241577, + "step": 6986 + }, + { + "epoch": 1.610720461095101, + "grad_norm": 1.3992598613678886, + "learning_rate": 2.0040460629972788e-07, + "loss": 0.4437975287437439, + "step": 6987 + }, + { + "epoch": 1.6109510086455332, + "grad_norm": 1.5741668789949548, + "learning_rate": 2.0017575766839502e-07, + "loss": 0.4779764413833618, + "step": 6988 + }, + { + "epoch": 1.6111815561959655, + "grad_norm": 1.6090259987078506, + "learning_rate": 1.9994702524320383e-07, + "loss": 0.49435535073280334, + "step": 6989 + }, + { + "epoch": 1.6114121037463978, + "grad_norm": 1.682670242630337, + "learning_rate": 1.9971840905738735e-07, + "loss": 0.41287925839424133, + "step": 6990 + }, + { + "epoch": 1.61164265129683, + "grad_norm": 1.6608345205857078, + "learning_rate": 1.9948990914416065e-07, + "loss": 0.43594056367874146, + "step": 6991 + }, + { + "epoch": 1.6118731988472623, + "grad_norm": 1.4566510618351554, + "learning_rate": 1.9926152553672258e-07, + "loss": 0.4222400188446045, + "step": 6992 + }, + { + "epoch": 1.6121037463976946, + "grad_norm": 1.3927222561183994, + "learning_rate": 1.9903325826825524e-07, + "loss": 0.40775904059410095, + "step": 6993 + }, + { + "epoch": 1.6123342939481269, + "grad_norm": 1.5765829905078221, + "learning_rate": 1.9880510737192312e-07, + "loss": 0.4643257260322571, + "step": 6994 + }, + { + "epoch": 1.6125648414985592, + "grad_norm": 1.8968521533386222, + "learning_rate": 1.9857707288087434e-07, + "loss": 0.42287328839302063, + "step": 6995 + }, + { + "epoch": 1.6127953890489914, + "grad_norm": 1.7958779987187703, + "learning_rate": 1.9834915482823943e-07, + "loss": 0.44881507754325867, + "step": 6996 + }, + { + "epoch": 1.6130259365994237, + "grad_norm": 1.4789752954442097, + "learning_rate": 1.9812135324713307e-07, + "loss": 0.5019153952598572, + "step": 6997 + }, + { + "epoch": 1.613256484149856, + "grad_norm": 1.9782823177827327, + "learning_rate": 1.9789366817065244e-07, + "loss": 0.46092158555984497, + "step": 6998 + }, + { + "epoch": 1.6134870317002883, + "grad_norm": 1.6796264243497427, + "learning_rate": 1.9766609963187753e-07, + "loss": 0.5497767925262451, + "step": 6999 + }, + { + "epoch": 1.6137175792507206, + "grad_norm": 1.6615467659002465, + "learning_rate": 1.9743864766387196e-07, + "loss": 0.3865404427051544, + "step": 7000 + }, + { + "epoch": 1.6139481268011528, + "grad_norm": 1.7045504670159946, + "learning_rate": 1.9721131229968213e-07, + "loss": 0.4722781181335449, + "step": 7001 + }, + { + "epoch": 1.6141786743515851, + "grad_norm": 1.496853211993526, + "learning_rate": 1.9698409357233702e-07, + "loss": 0.46194642782211304, + "step": 7002 + }, + { + "epoch": 1.6144092219020174, + "grad_norm": 1.5419924356187527, + "learning_rate": 1.967569915148498e-07, + "loss": 0.42680829763412476, + "step": 7003 + }, + { + "epoch": 1.6146397694524497, + "grad_norm": 1.5370455081070127, + "learning_rate": 1.9653000616021554e-07, + "loss": 0.4733467102050781, + "step": 7004 + }, + { + "epoch": 1.614870317002882, + "grad_norm": 1.5140177132473809, + "learning_rate": 1.9630313754141293e-07, + "loss": 0.42426949739456177, + "step": 7005 + }, + { + "epoch": 1.6151008645533143, + "grad_norm": 1.5682665529467597, + "learning_rate": 1.9607638569140405e-07, + "loss": 0.4114811420440674, + "step": 7006 + }, + { + "epoch": 1.6153314121037465, + "grad_norm": 1.5390762052298386, + "learning_rate": 1.9584975064313337e-07, + "loss": 0.44919753074645996, + "step": 7007 + }, + { + "epoch": 1.6155619596541788, + "grad_norm": 1.4647408998092328, + "learning_rate": 1.9562323242952816e-07, + "loss": 0.44578665494918823, + "step": 7008 + }, + { + "epoch": 1.615792507204611, + "grad_norm": 1.6845882444382327, + "learning_rate": 1.953968310834998e-07, + "loss": 0.4226934611797333, + "step": 7009 + }, + { + "epoch": 1.6160230547550434, + "grad_norm": 2.2486201594203656, + "learning_rate": 1.9517054663794153e-07, + "loss": 0.5555834770202637, + "step": 7010 + }, + { + "epoch": 1.6162536023054757, + "grad_norm": 1.4767118807632889, + "learning_rate": 1.9494437912573058e-07, + "loss": 0.428075909614563, + "step": 7011 + }, + { + "epoch": 1.616484149855908, + "grad_norm": 1.5189056600876345, + "learning_rate": 1.9471832857972625e-07, + "loss": 0.47747939825057983, + "step": 7012 + }, + { + "epoch": 1.6167146974063402, + "grad_norm": 1.5100831598484277, + "learning_rate": 1.9449239503277194e-07, + "loss": 0.4276934862136841, + "step": 7013 + }, + { + "epoch": 1.6169452449567725, + "grad_norm": 1.8983824161544947, + "learning_rate": 1.9426657851769302e-07, + "loss": 0.44419193267822266, + "step": 7014 + }, + { + "epoch": 1.6171757925072046, + "grad_norm": 1.6660017186142178, + "learning_rate": 1.9404087906729806e-07, + "loss": 0.4462706446647644, + "step": 7015 + }, + { + "epoch": 1.6174063400576368, + "grad_norm": 1.6051052042823168, + "learning_rate": 1.9381529671437923e-07, + "loss": 0.43562668561935425, + "step": 7016 + }, + { + "epoch": 1.6176368876080691, + "grad_norm": 1.9934644528425425, + "learning_rate": 1.935898314917115e-07, + "loss": 0.4618384838104248, + "step": 7017 + }, + { + "epoch": 1.6178674351585014, + "grad_norm": 1.5305830970877417, + "learning_rate": 1.933644834320519e-07, + "loss": 0.42981481552124023, + "step": 7018 + }, + { + "epoch": 1.6180979827089337, + "grad_norm": 1.916475433542249, + "learning_rate": 1.93139252568142e-07, + "loss": 0.39951127767562866, + "step": 7019 + }, + { + "epoch": 1.618328530259366, + "grad_norm": 1.4880364379634898, + "learning_rate": 1.9291413893270514e-07, + "loss": 0.4628783166408539, + "step": 7020 + }, + { + "epoch": 1.6185590778097982, + "grad_norm": 1.9586483355597437, + "learning_rate": 1.926891425584476e-07, + "loss": 0.4748151898384094, + "step": 7021 + }, + { + "epoch": 1.6187896253602305, + "grad_norm": 1.4040170327047168, + "learning_rate": 1.9246426347805967e-07, + "loss": 0.4238407015800476, + "step": 7022 + }, + { + "epoch": 1.6190201729106628, + "grad_norm": 1.4544645637680234, + "learning_rate": 1.9223950172421332e-07, + "loss": 0.4519417881965637, + "step": 7023 + }, + { + "epoch": 1.619250720461095, + "grad_norm": 1.5479222790274918, + "learning_rate": 1.9201485732956445e-07, + "loss": 0.4104294776916504, + "step": 7024 + }, + { + "epoch": 1.6194812680115274, + "grad_norm": 1.6509334362523898, + "learning_rate": 1.9179033032675173e-07, + "loss": 0.3946937322616577, + "step": 7025 + }, + { + "epoch": 1.6197118155619596, + "grad_norm": 1.6453980835597737, + "learning_rate": 1.915659207483964e-07, + "loss": 0.41711992025375366, + "step": 7026 + }, + { + "epoch": 1.619942363112392, + "grad_norm": 1.5895165297825928, + "learning_rate": 1.913416286271028e-07, + "loss": 0.4761412739753723, + "step": 7027 + }, + { + "epoch": 1.6201729106628242, + "grad_norm": 1.4830868217383606, + "learning_rate": 1.9111745399545798e-07, + "loss": 0.5078233480453491, + "step": 7028 + }, + { + "epoch": 1.6204034582132565, + "grad_norm": 1.3877209141758606, + "learning_rate": 1.9089339688603246e-07, + "loss": 0.3472098708152771, + "step": 7029 + }, + { + "epoch": 1.6206340057636888, + "grad_norm": 1.5886201993830495, + "learning_rate": 1.9066945733137974e-07, + "loss": 0.3884485363960266, + "step": 7030 + }, + { + "epoch": 1.620864553314121, + "grad_norm": 1.3892892625927125, + "learning_rate": 1.9044563536403524e-07, + "loss": 0.4992063641548157, + "step": 7031 + }, + { + "epoch": 1.6210951008645533, + "grad_norm": 1.4924190063226175, + "learning_rate": 1.902219310165185e-07, + "loss": 0.3512866795063019, + "step": 7032 + }, + { + "epoch": 1.6213256484149856, + "grad_norm": 1.5305883772970803, + "learning_rate": 1.8999834432133133e-07, + "loss": 0.44194120168685913, + "step": 7033 + }, + { + "epoch": 1.621556195965418, + "grad_norm": 1.6224356039874788, + "learning_rate": 1.8977487531095814e-07, + "loss": 0.4745762348175049, + "step": 7034 + }, + { + "epoch": 1.6217867435158502, + "grad_norm": 1.9250806360959296, + "learning_rate": 1.8955152401786723e-07, + "loss": 0.4458765387535095, + "step": 7035 + }, + { + "epoch": 1.6220172910662825, + "grad_norm": 1.701108971645492, + "learning_rate": 1.893282904745087e-07, + "loss": 0.5192512273788452, + "step": 7036 + }, + { + "epoch": 1.6222478386167147, + "grad_norm": 1.6193165039007456, + "learning_rate": 1.8910517471331632e-07, + "loss": 0.47647416591644287, + "step": 7037 + }, + { + "epoch": 1.622478386167147, + "grad_norm": 1.6132356068665432, + "learning_rate": 1.888821767667067e-07, + "loss": 0.49768751859664917, + "step": 7038 + }, + { + "epoch": 1.6227089337175793, + "grad_norm": 1.9401291765695219, + "learning_rate": 1.8865929666707893e-07, + "loss": 0.5038268566131592, + "step": 7039 + }, + { + "epoch": 1.6229394812680116, + "grad_norm": 1.6903831936251612, + "learning_rate": 1.8843653444681519e-07, + "loss": 0.47656458616256714, + "step": 7040 + }, + { + "epoch": 1.6231700288184439, + "grad_norm": 1.4991134295006157, + "learning_rate": 1.8821389013828016e-07, + "loss": 0.5014642477035522, + "step": 7041 + }, + { + "epoch": 1.6234005763688761, + "grad_norm": 2.093487981103135, + "learning_rate": 1.879913637738221e-07, + "loss": 0.4911212921142578, + "step": 7042 + }, + { + "epoch": 1.6236311239193082, + "grad_norm": 2.113248716786874, + "learning_rate": 1.8776895538577185e-07, + "loss": 0.4738670289516449, + "step": 7043 + }, + { + "epoch": 1.6238616714697405, + "grad_norm": 1.6705700650062818, + "learning_rate": 1.8754666500644278e-07, + "loss": 0.42330676317214966, + "step": 7044 + }, + { + "epoch": 1.6240922190201728, + "grad_norm": 1.7524142065793642, + "learning_rate": 1.8732449266813178e-07, + "loss": 0.4284883141517639, + "step": 7045 + }, + { + "epoch": 1.624322766570605, + "grad_norm": 1.6676122484771183, + "learning_rate": 1.8710243840311778e-07, + "loss": 0.4407314658164978, + "step": 7046 + }, + { + "epoch": 1.6245533141210373, + "grad_norm": 1.6726112467885852, + "learning_rate": 1.868805022436629e-07, + "loss": 0.445793092250824, + "step": 7047 + }, + { + "epoch": 1.6247838616714696, + "grad_norm": 2.0179165105135484, + "learning_rate": 1.866586842220126e-07, + "loss": 0.49527254700660706, + "step": 7048 + }, + { + "epoch": 1.6250144092219019, + "grad_norm": 1.8829724432355108, + "learning_rate": 1.8643698437039423e-07, + "loss": 0.5024650692939758, + "step": 7049 + }, + { + "epoch": 1.6252449567723342, + "grad_norm": 1.4839586443824915, + "learning_rate": 1.8621540272101864e-07, + "loss": 0.4003955125808716, + "step": 7050 + }, + { + "epoch": 1.6254755043227664, + "grad_norm": 1.4243235634156672, + "learning_rate": 1.8599393930607965e-07, + "loss": 0.4203549027442932, + "step": 7051 + }, + { + "epoch": 1.6257060518731987, + "grad_norm": 1.8616676934177332, + "learning_rate": 1.8577259415775336e-07, + "loss": 0.498489648103714, + "step": 7052 + }, + { + "epoch": 1.625936599423631, + "grad_norm": 1.849928674121505, + "learning_rate": 1.8555136730819865e-07, + "loss": 0.42907896637916565, + "step": 7053 + }, + { + "epoch": 1.6261671469740633, + "grad_norm": 1.5752798727413557, + "learning_rate": 1.85330258789558e-07, + "loss": 0.4849644601345062, + "step": 7054 + }, + { + "epoch": 1.6263976945244956, + "grad_norm": 1.4894906930427128, + "learning_rate": 1.851092686339556e-07, + "loss": 0.37567687034606934, + "step": 7055 + }, + { + "epoch": 1.6266282420749278, + "grad_norm": 1.6419279864399527, + "learning_rate": 1.8488839687349967e-07, + "loss": 0.40736931562423706, + "step": 7056 + }, + { + "epoch": 1.6268587896253601, + "grad_norm": 1.5390004415931011, + "learning_rate": 1.8466764354027986e-07, + "loss": 0.45521795749664307, + "step": 7057 + }, + { + "epoch": 1.6270893371757924, + "grad_norm": 2.0439510581772042, + "learning_rate": 1.844470086663701e-07, + "loss": 0.45577508211135864, + "step": 7058 + }, + { + "epoch": 1.6273198847262247, + "grad_norm": 1.8442747006182973, + "learning_rate": 1.842264922838258e-07, + "loss": 0.46271002292633057, + "step": 7059 + }, + { + "epoch": 1.627550432276657, + "grad_norm": 1.8168769739622397, + "learning_rate": 1.8400609442468573e-07, + "loss": 0.41168513894081116, + "step": 7060 + }, + { + "epoch": 1.6277809798270892, + "grad_norm": 1.6704034042742053, + "learning_rate": 1.8378581512097146e-07, + "loss": 0.48050713539123535, + "step": 7061 + }, + { + "epoch": 1.6280115273775215, + "grad_norm": 1.7102134601368413, + "learning_rate": 1.8356565440468763e-07, + "loss": 0.4158909022808075, + "step": 7062 + }, + { + "epoch": 1.6282420749279538, + "grad_norm": 1.5610634608134886, + "learning_rate": 1.8334561230782075e-07, + "loss": 0.42451566457748413, + "step": 7063 + }, + { + "epoch": 1.628472622478386, + "grad_norm": 1.4952643713001559, + "learning_rate": 1.8312568886234114e-07, + "loss": 0.5046045184135437, + "step": 7064 + }, + { + "epoch": 1.6287031700288184, + "grad_norm": 1.6727214912336503, + "learning_rate": 1.8290588410020113e-07, + "loss": 0.5656751990318298, + "step": 7065 + }, + { + "epoch": 1.6289337175792507, + "grad_norm": 1.756636707422769, + "learning_rate": 1.8268619805333597e-07, + "loss": 0.5253554582595825, + "step": 7066 + }, + { + "epoch": 1.629164265129683, + "grad_norm": 1.5333023336983513, + "learning_rate": 1.8246663075366408e-07, + "loss": 0.46105247735977173, + "step": 7067 + }, + { + "epoch": 1.6293948126801152, + "grad_norm": 1.5741857770042025, + "learning_rate": 1.8224718223308576e-07, + "loss": 0.5526989698410034, + "step": 7068 + }, + { + "epoch": 1.6296253602305475, + "grad_norm": 1.4157346505000856, + "learning_rate": 1.8202785252348506e-07, + "loss": 0.43069472908973694, + "step": 7069 + }, + { + "epoch": 1.6298559077809798, + "grad_norm": 1.6082377506047894, + "learning_rate": 1.818086416567285e-07, + "loss": 0.5010451078414917, + "step": 7070 + }, + { + "epoch": 1.630086455331412, + "grad_norm": 1.4560397719214118, + "learning_rate": 1.8158954966466467e-07, + "loss": 0.48797810077667236, + "step": 7071 + }, + { + "epoch": 1.6303170028818443, + "grad_norm": 1.5172724872127616, + "learning_rate": 1.8137057657912568e-07, + "loss": 0.3758738934993744, + "step": 7072 + }, + { + "epoch": 1.6305475504322766, + "grad_norm": 1.490229867825559, + "learning_rate": 1.8115172243192556e-07, + "loss": 0.4795163869857788, + "step": 7073 + }, + { + "epoch": 1.630778097982709, + "grad_norm": 1.6959094837276352, + "learning_rate": 1.8093298725486184e-07, + "loss": 0.3993092179298401, + "step": 7074 + }, + { + "epoch": 1.6310086455331412, + "grad_norm": 1.802185920731944, + "learning_rate": 1.8071437107971476e-07, + "loss": 0.5435800552368164, + "step": 7075 + }, + { + "epoch": 1.6312391930835735, + "grad_norm": 1.7039163337391874, + "learning_rate": 1.804958739382464e-07, + "loss": 0.4518716335296631, + "step": 7076 + }, + { + "epoch": 1.6314697406340057, + "grad_norm": 1.6900376783333737, + "learning_rate": 1.8027749586220277e-07, + "loss": 0.5534895062446594, + "step": 7077 + }, + { + "epoch": 1.631700288184438, + "grad_norm": 1.8998675234284237, + "learning_rate": 1.800592368833115e-07, + "loss": 0.5099557638168335, + "step": 7078 + }, + { + "epoch": 1.6319308357348703, + "grad_norm": 1.35099602311764, + "learning_rate": 1.7984109703328322e-07, + "loss": 0.43192172050476074, + "step": 7079 + }, + { + "epoch": 1.6321613832853026, + "grad_norm": 1.6063094501829667, + "learning_rate": 1.796230763438119e-07, + "loss": 0.46690988540649414, + "step": 7080 + }, + { + "epoch": 1.6323919308357349, + "grad_norm": 1.7268310015035628, + "learning_rate": 1.7940517484657301e-07, + "loss": 0.487186074256897, + "step": 7081 + }, + { + "epoch": 1.6326224783861671, + "grad_norm": 1.6482432744189444, + "learning_rate": 1.7918739257322613e-07, + "loss": 0.4217287302017212, + "step": 7082 + }, + { + "epoch": 1.6328530259365994, + "grad_norm": 1.9234836261724548, + "learning_rate": 1.7896972955541223e-07, + "loss": 0.4879988431930542, + "step": 7083 + }, + { + "epoch": 1.6330835734870317, + "grad_norm": 1.4694641918643139, + "learning_rate": 1.787521858247555e-07, + "loss": 0.449859619140625, + "step": 7084 + }, + { + "epoch": 1.633314121037464, + "grad_norm": 1.5243370275164099, + "learning_rate": 1.7853476141286306e-07, + "loss": 0.38333576917648315, + "step": 7085 + }, + { + "epoch": 1.6335446685878963, + "grad_norm": 1.3646889918108736, + "learning_rate": 1.78317456351324e-07, + "loss": 0.386202871799469, + "step": 7086 + }, + { + "epoch": 1.6337752161383285, + "grad_norm": 1.9790389429198783, + "learning_rate": 1.7810027067171075e-07, + "loss": 0.5273287296295166, + "step": 7087 + }, + { + "epoch": 1.6340057636887608, + "grad_norm": 1.469553115830399, + "learning_rate": 1.7788320440557836e-07, + "loss": 0.4877879023551941, + "step": 7088 + }, + { + "epoch": 1.634236311239193, + "grad_norm": 1.3693249465977024, + "learning_rate": 1.7766625758446407e-07, + "loss": 0.5114452242851257, + "step": 7089 + }, + { + "epoch": 1.6344668587896254, + "grad_norm": 1.4466728323394555, + "learning_rate": 1.774494302398878e-07, + "loss": 0.534758985042572, + "step": 7090 + }, + { + "epoch": 1.6346974063400577, + "grad_norm": 1.6884547026848957, + "learning_rate": 1.7723272240335262e-07, + "loss": 0.4727644622325897, + "step": 7091 + }, + { + "epoch": 1.63492795389049, + "grad_norm": 1.3041580702100446, + "learning_rate": 1.7701613410634365e-07, + "loss": 0.44658514857292175, + "step": 7092 + }, + { + "epoch": 1.6351585014409222, + "grad_norm": 1.8133669891438844, + "learning_rate": 1.767996653803292e-07, + "loss": 0.4832932949066162, + "step": 7093 + }, + { + "epoch": 1.6353890489913545, + "grad_norm": 1.9873008129610308, + "learning_rate": 1.7658331625675958e-07, + "loss": 0.4222508668899536, + "step": 7094 + }, + { + "epoch": 1.6356195965417868, + "grad_norm": 1.6637875675152678, + "learning_rate": 1.7636708676706856e-07, + "loss": 0.5232953429222107, + "step": 7095 + }, + { + "epoch": 1.635850144092219, + "grad_norm": 2.1560102051327936, + "learning_rate": 1.7615097694267177e-07, + "loss": 0.4896438419818878, + "step": 7096 + }, + { + "epoch": 1.6360806916426514, + "grad_norm": 1.521912219118672, + "learning_rate": 1.7593498681496743e-07, + "loss": 0.4000094532966614, + "step": 7097 + }, + { + "epoch": 1.6363112391930836, + "grad_norm": 1.6788307180179725, + "learning_rate": 1.7571911641533698e-07, + "loss": 0.5291081070899963, + "step": 7098 + }, + { + "epoch": 1.636541786743516, + "grad_norm": 1.7872228150885656, + "learning_rate": 1.7550336577514424e-07, + "loss": 0.4801519513130188, + "step": 7099 + }, + { + "epoch": 1.6367723342939482, + "grad_norm": 1.7969227029518855, + "learning_rate": 1.7528773492573524e-07, + "loss": 0.4453350901603699, + "step": 7100 + }, + { + "epoch": 1.6370028818443805, + "grad_norm": 1.8067107495609092, + "learning_rate": 1.7507222389843923e-07, + "loss": 0.5279836058616638, + "step": 7101 + }, + { + "epoch": 1.6372334293948128, + "grad_norm": 2.2043741342581575, + "learning_rate": 1.7485683272456754e-07, + "loss": 0.5456463098526001, + "step": 7102 + }, + { + "epoch": 1.637463976945245, + "grad_norm": 1.5492809486335855, + "learning_rate": 1.7464156143541398e-07, + "loss": 0.445858895778656, + "step": 7103 + }, + { + "epoch": 1.6376945244956773, + "grad_norm": 1.768665301644395, + "learning_rate": 1.744264100622558e-07, + "loss": 0.48954901099205017, + "step": 7104 + }, + { + "epoch": 1.6379250720461096, + "grad_norm": 1.9762098585433456, + "learning_rate": 1.742113786363517e-07, + "loss": 0.4365660548210144, + "step": 7105 + }, + { + "epoch": 1.6381556195965419, + "grad_norm": 1.5120765060907773, + "learning_rate": 1.739964671889438e-07, + "loss": 0.40327224135398865, + "step": 7106 + }, + { + "epoch": 1.6383861671469742, + "grad_norm": 1.5579134604152398, + "learning_rate": 1.7378167575125668e-07, + "loss": 0.4767388701438904, + "step": 7107 + }, + { + "epoch": 1.6386167146974064, + "grad_norm": 1.8299910010033837, + "learning_rate": 1.735670043544971e-07, + "loss": 0.5005271434783936, + "step": 7108 + }, + { + "epoch": 1.6388472622478387, + "grad_norm": 1.888346334221167, + "learning_rate": 1.7335245302985458e-07, + "loss": 0.5074931979179382, + "step": 7109 + }, + { + "epoch": 1.639077809798271, + "grad_norm": 2.3323407528830695, + "learning_rate": 1.7313802180850102e-07, + "loss": 0.3878687024116516, + "step": 7110 + }, + { + "epoch": 1.6393083573487033, + "grad_norm": 1.7073526439249955, + "learning_rate": 1.7292371072159118e-07, + "loss": 0.5035123229026794, + "step": 7111 + }, + { + "epoch": 1.6395389048991356, + "grad_norm": 1.625831303720922, + "learning_rate": 1.727095198002625e-07, + "loss": 0.47408533096313477, + "step": 7112 + }, + { + "epoch": 1.6397694524495678, + "grad_norm": 1.5861177391571943, + "learning_rate": 1.724954490756342e-07, + "loss": 0.45419204235076904, + "step": 7113 + }, + { + "epoch": 1.6400000000000001, + "grad_norm": 1.5212582464247422, + "learning_rate": 1.7228149857880902e-07, + "loss": 0.44029003381729126, + "step": 7114 + }, + { + "epoch": 1.6402305475504324, + "grad_norm": 1.5718073030301392, + "learning_rate": 1.7206766834087148e-07, + "loss": 0.4374336004257202, + "step": 7115 + }, + { + "epoch": 1.6404610951008647, + "grad_norm": 1.977156786293177, + "learning_rate": 1.7185395839288875e-07, + "loss": 0.5088529586791992, + "step": 7116 + }, + { + "epoch": 1.640691642651297, + "grad_norm": 1.9035660253830302, + "learning_rate": 1.7164036876591105e-07, + "loss": 0.5353911519050598, + "step": 7117 + }, + { + "epoch": 1.6409221902017292, + "grad_norm": 1.5646144443377459, + "learning_rate": 1.7142689949097033e-07, + "loss": 0.49949127435684204, + "step": 7118 + }, + { + "epoch": 1.6411527377521615, + "grad_norm": 1.416160387365733, + "learning_rate": 1.712135505990816e-07, + "loss": 0.4392736256122589, + "step": 7119 + }, + { + "epoch": 1.6413832853025938, + "grad_norm": 1.618778441026721, + "learning_rate": 1.7100032212124248e-07, + "loss": 0.4985026717185974, + "step": 7120 + }, + { + "epoch": 1.641613832853026, + "grad_norm": 1.5434378799917168, + "learning_rate": 1.7078721408843266e-07, + "loss": 0.39500099420547485, + "step": 7121 + }, + { + "epoch": 1.6418443804034584, + "grad_norm": 1.4981414645710247, + "learning_rate": 1.7057422653161424e-07, + "loss": 0.3887489438056946, + "step": 7122 + }, + { + "epoch": 1.6420749279538907, + "grad_norm": 1.5956592528760005, + "learning_rate": 1.7036135948173268e-07, + "loss": 0.5208394527435303, + "step": 7123 + }, + { + "epoch": 1.642305475504323, + "grad_norm": 1.6406147446679789, + "learning_rate": 1.7014861296971473e-07, + "loss": 0.4292425513267517, + "step": 7124 + }, + { + "epoch": 1.642536023054755, + "grad_norm": 1.8466690932217622, + "learning_rate": 1.6993598702647084e-07, + "loss": 0.5247593522071838, + "step": 7125 + }, + { + "epoch": 1.6427665706051873, + "grad_norm": 1.6236257528874523, + "learning_rate": 1.6972348168289275e-07, + "loss": 0.40911680459976196, + "step": 7126 + }, + { + "epoch": 1.6429971181556196, + "grad_norm": 1.7593622225771122, + "learning_rate": 1.6951109696985576e-07, + "loss": 0.5300519466400146, + "step": 7127 + }, + { + "epoch": 1.6432276657060518, + "grad_norm": 1.8530552916322522, + "learning_rate": 1.692988329182171e-07, + "loss": 0.4551096558570862, + "step": 7128 + }, + { + "epoch": 1.6434582132564841, + "grad_norm": 1.5407775659664558, + "learning_rate": 1.690866895588161e-07, + "loss": 0.4497135877609253, + "step": 7129 + }, + { + "epoch": 1.6436887608069164, + "grad_norm": 1.4118252540002971, + "learning_rate": 1.6887466692247554e-07, + "loss": 0.4177432656288147, + "step": 7130 + }, + { + "epoch": 1.6439193083573487, + "grad_norm": 1.583844642588823, + "learning_rate": 1.6866276503999965e-07, + "loss": 0.4593951404094696, + "step": 7131 + }, + { + "epoch": 1.644149855907781, + "grad_norm": 1.6310540123379373, + "learning_rate": 1.684509839421757e-07, + "loss": 0.38442713022232056, + "step": 7132 + }, + { + "epoch": 1.6443804034582132, + "grad_norm": 1.6385922234515504, + "learning_rate": 1.6823932365977356e-07, + "loss": 0.4701668620109558, + "step": 7133 + }, + { + "epoch": 1.6446109510086455, + "grad_norm": 1.7942474001482556, + "learning_rate": 1.6802778422354514e-07, + "loss": 0.47364962100982666, + "step": 7134 + }, + { + "epoch": 1.6448414985590778, + "grad_norm": 1.6308226749238004, + "learning_rate": 1.6781636566422463e-07, + "loss": 0.5317097902297974, + "step": 7135 + }, + { + "epoch": 1.64507204610951, + "grad_norm": 1.3748228459577467, + "learning_rate": 1.6760506801252926e-07, + "loss": 0.47745388746261597, + "step": 7136 + }, + { + "epoch": 1.6453025936599424, + "grad_norm": 1.6435587909932845, + "learning_rate": 1.6739389129915817e-07, + "loss": 0.4827711880207062, + "step": 7137 + }, + { + "epoch": 1.6455331412103746, + "grad_norm": 1.7001993101774864, + "learning_rate": 1.671828355547934e-07, + "loss": 0.5359855890274048, + "step": 7138 + }, + { + "epoch": 1.645763688760807, + "grad_norm": 1.7037683748039727, + "learning_rate": 1.6697190081009882e-07, + "loss": 0.48604434728622437, + "step": 7139 + }, + { + "epoch": 1.6459942363112392, + "grad_norm": 1.648303118966041, + "learning_rate": 1.6676108709572146e-07, + "loss": 0.602343738079071, + "step": 7140 + }, + { + "epoch": 1.6462247838616715, + "grad_norm": 1.5710736352040753, + "learning_rate": 1.6655039444229013e-07, + "loss": 0.36290526390075684, + "step": 7141 + }, + { + "epoch": 1.6464553314121038, + "grad_norm": 1.5863608375532527, + "learning_rate": 1.6633982288041603e-07, + "loss": 0.4096101224422455, + "step": 7142 + }, + { + "epoch": 1.646685878962536, + "grad_norm": 1.5485446810610417, + "learning_rate": 1.6612937244069326e-07, + "loss": 0.4343856871128082, + "step": 7143 + }, + { + "epoch": 1.6469164265129683, + "grad_norm": 1.5635554519919759, + "learning_rate": 1.6591904315369833e-07, + "loss": 0.44130879640579224, + "step": 7144 + }, + { + "epoch": 1.6471469740634006, + "grad_norm": 1.429256520832658, + "learning_rate": 1.6570883504998945e-07, + "loss": 0.45949405431747437, + "step": 7145 + }, + { + "epoch": 1.6473775216138329, + "grad_norm": 1.6107689761815698, + "learning_rate": 1.654987481601081e-07, + "loss": 0.39314505457878113, + "step": 7146 + }, + { + "epoch": 1.6476080691642652, + "grad_norm": 1.6243284006873182, + "learning_rate": 1.6528878251457757e-07, + "loss": 0.45313894748687744, + "step": 7147 + }, + { + "epoch": 1.6478386167146974, + "grad_norm": 1.4856311266688829, + "learning_rate": 1.6507893814390328e-07, + "loss": 0.4263615310192108, + "step": 7148 + }, + { + "epoch": 1.6480691642651297, + "grad_norm": 1.688675267530104, + "learning_rate": 1.6486921507857398e-07, + "loss": 0.5187538266181946, + "step": 7149 + }, + { + "epoch": 1.648299711815562, + "grad_norm": 1.3737191741307955, + "learning_rate": 1.6465961334905986e-07, + "loss": 0.44234776496887207, + "step": 7150 + }, + { + "epoch": 1.6485302593659943, + "grad_norm": 1.5480906235848935, + "learning_rate": 1.64450132985814e-07, + "loss": 0.42623990774154663, + "step": 7151 + }, + { + "epoch": 1.6487608069164263, + "grad_norm": 1.4835957957904586, + "learning_rate": 1.6424077401927206e-07, + "loss": 0.4729412794113159, + "step": 7152 + }, + { + "epoch": 1.6489913544668586, + "grad_norm": 1.4548490747782812, + "learning_rate": 1.6403153647985134e-07, + "loss": 0.4603039026260376, + "step": 7153 + }, + { + "epoch": 1.649221902017291, + "grad_norm": 1.456280541182368, + "learning_rate": 1.6382242039795213e-07, + "loss": 0.5220425724983215, + "step": 7154 + }, + { + "epoch": 1.6494524495677232, + "grad_norm": 1.784246781666792, + "learning_rate": 1.6361342580395632e-07, + "loss": 0.4285042881965637, + "step": 7155 + }, + { + "epoch": 1.6496829971181555, + "grad_norm": 1.9528209037439446, + "learning_rate": 1.6340455272822894e-07, + "loss": 0.47878625988960266, + "step": 7156 + }, + { + "epoch": 1.6499135446685878, + "grad_norm": 1.341682698287289, + "learning_rate": 1.631958012011173e-07, + "loss": 0.44329750537872314, + "step": 7157 + }, + { + "epoch": 1.65014409221902, + "grad_norm": 1.7415780885053698, + "learning_rate": 1.6298717125295057e-07, + "loss": 0.4647062420845032, + "step": 7158 + }, + { + "epoch": 1.6503746397694523, + "grad_norm": 1.5567564662904019, + "learning_rate": 1.6277866291404074e-07, + "loss": 0.4851604104042053, + "step": 7159 + }, + { + "epoch": 1.6506051873198846, + "grad_norm": 1.582233265535471, + "learning_rate": 1.6257027621468177e-07, + "loss": 0.532638669013977, + "step": 7160 + }, + { + "epoch": 1.6508357348703169, + "grad_norm": 1.5173909016412013, + "learning_rate": 1.623620111851498e-07, + "loss": 0.4583241641521454, + "step": 7161 + }, + { + "epoch": 1.6510662824207492, + "grad_norm": 1.514446467644476, + "learning_rate": 1.6215386785570405e-07, + "loss": 0.3982502222061157, + "step": 7162 + }, + { + "epoch": 1.6512968299711814, + "grad_norm": 1.7091680314569688, + "learning_rate": 1.6194584625658514e-07, + "loss": 0.465998113155365, + "step": 7163 + }, + { + "epoch": 1.6515273775216137, + "grad_norm": 1.642523863514421, + "learning_rate": 1.6173794641801675e-07, + "loss": 0.49632728099823, + "step": 7164 + }, + { + "epoch": 1.651757925072046, + "grad_norm": 1.6357073961102824, + "learning_rate": 1.615301683702046e-07, + "loss": 0.47182852029800415, + "step": 7165 + }, + { + "epoch": 1.6519884726224783, + "grad_norm": 1.5740312008724333, + "learning_rate": 1.6132251214333658e-07, + "loss": 0.42910608649253845, + "step": 7166 + }, + { + "epoch": 1.6522190201729106, + "grad_norm": 1.4210692456044949, + "learning_rate": 1.6111497776758276e-07, + "loss": 0.42247748374938965, + "step": 7167 + }, + { + "epoch": 1.6524495677233428, + "grad_norm": 2.016987391877327, + "learning_rate": 1.609075652730961e-07, + "loss": 0.4508252739906311, + "step": 7168 + }, + { + "epoch": 1.6526801152737751, + "grad_norm": 1.4449046621735189, + "learning_rate": 1.6070027469001114e-07, + "loss": 0.4595048129558563, + "step": 7169 + }, + { + "epoch": 1.6529106628242074, + "grad_norm": 1.5448968518745512, + "learning_rate": 1.6049310604844536e-07, + "loss": 0.3852691054344177, + "step": 7170 + }, + { + "epoch": 1.6531412103746397, + "grad_norm": 1.6692560122028328, + "learning_rate": 1.6028605937849793e-07, + "loss": 0.4829123020172119, + "step": 7171 + }, + { + "epoch": 1.653371757925072, + "grad_norm": 1.709360623818806, + "learning_rate": 1.600791347102508e-07, + "loss": 0.5782856941223145, + "step": 7172 + }, + { + "epoch": 1.6536023054755042, + "grad_norm": 1.696439015666858, + "learning_rate": 1.5987233207376794e-07, + "loss": 0.44069433212280273, + "step": 7173 + }, + { + "epoch": 1.6538328530259365, + "grad_norm": 1.7407402903841707, + "learning_rate": 1.596656514990954e-07, + "loss": 0.41529107093811035, + "step": 7174 + }, + { + "epoch": 1.6540634005763688, + "grad_norm": 1.619986578008229, + "learning_rate": 1.5945909301626205e-07, + "loss": 0.4839997887611389, + "step": 7175 + }, + { + "epoch": 1.654293948126801, + "grad_norm": 1.7287640110596156, + "learning_rate": 1.5925265665527821e-07, + "loss": 0.4021362066268921, + "step": 7176 + }, + { + "epoch": 1.6545244956772334, + "grad_norm": 1.4304015495283673, + "learning_rate": 1.5904634244613723e-07, + "loss": 0.5291178226470947, + "step": 7177 + }, + { + "epoch": 1.6547550432276656, + "grad_norm": 1.9082896937154643, + "learning_rate": 1.5884015041881483e-07, + "loss": 0.4622513949871063, + "step": 7178 + }, + { + "epoch": 1.654985590778098, + "grad_norm": 1.5727457944548462, + "learning_rate": 1.58634080603268e-07, + "loss": 0.5098867416381836, + "step": 7179 + }, + { + "epoch": 1.6552161383285302, + "grad_norm": 1.8633855733896474, + "learning_rate": 1.5842813302943646e-07, + "loss": 0.6110771894454956, + "step": 7180 + }, + { + "epoch": 1.6554466858789625, + "grad_norm": 1.8109337460557362, + "learning_rate": 1.5822230772724288e-07, + "loss": 0.5498735308647156, + "step": 7181 + }, + { + "epoch": 1.6556772334293948, + "grad_norm": 1.6288862141191938, + "learning_rate": 1.5801660472659074e-07, + "loss": 0.5020328760147095, + "step": 7182 + }, + { + "epoch": 1.655907780979827, + "grad_norm": 1.5319375398935402, + "learning_rate": 1.5781102405736723e-07, + "loss": 0.4844881594181061, + "step": 7183 + }, + { + "epoch": 1.6561383285302593, + "grad_norm": 1.6333215277041324, + "learning_rate": 1.5760556574944062e-07, + "loss": 0.47296953201293945, + "step": 7184 + }, + { + "epoch": 1.6563688760806916, + "grad_norm": 1.5894632852363868, + "learning_rate": 1.5740022983266232e-07, + "loss": 0.5081913471221924, + "step": 7185 + }, + { + "epoch": 1.656599423631124, + "grad_norm": 1.8719272137228433, + "learning_rate": 1.5719501633686517e-07, + "loss": 0.3864198923110962, + "step": 7186 + }, + { + "epoch": 1.6568299711815562, + "grad_norm": 1.621789029663121, + "learning_rate": 1.569899252918644e-07, + "loss": 0.5106151700019836, + "step": 7187 + }, + { + "epoch": 1.6570605187319885, + "grad_norm": 1.623996761371585, + "learning_rate": 1.5678495672745785e-07, + "loss": 0.4601839780807495, + "step": 7188 + }, + { + "epoch": 1.6572910662824207, + "grad_norm": 1.7015255797101292, + "learning_rate": 1.5658011067342546e-07, + "loss": 0.41081488132476807, + "step": 7189 + }, + { + "epoch": 1.657521613832853, + "grad_norm": 1.655929621273635, + "learning_rate": 1.563753871595289e-07, + "loss": 0.5042529702186584, + "step": 7190 + }, + { + "epoch": 1.6577521613832853, + "grad_norm": 1.504758223783334, + "learning_rate": 1.5617078621551305e-07, + "loss": 0.4239219129085541, + "step": 7191 + }, + { + "epoch": 1.6579827089337176, + "grad_norm": 1.734035504638041, + "learning_rate": 1.559663078711032e-07, + "loss": 0.40397927165031433, + "step": 7192 + }, + { + "epoch": 1.6582132564841499, + "grad_norm": 1.3443962161928609, + "learning_rate": 1.5576195215600862e-07, + "loss": 0.44509077072143555, + "step": 7193 + }, + { + "epoch": 1.6584438040345821, + "grad_norm": 1.6126140371628406, + "learning_rate": 1.555577190999201e-07, + "loss": 0.3795713782310486, + "step": 7194 + }, + { + "epoch": 1.6586743515850144, + "grad_norm": 1.3064119517656203, + "learning_rate": 1.5535360873251024e-07, + "loss": 0.43911436200141907, + "step": 7195 + }, + { + "epoch": 1.6589048991354467, + "grad_norm": 1.4863642261752135, + "learning_rate": 1.5514962108343432e-07, + "loss": 0.485666960477829, + "step": 7196 + }, + { + "epoch": 1.659135446685879, + "grad_norm": 1.783744090847337, + "learning_rate": 1.549457561823302e-07, + "loss": 0.3457345962524414, + "step": 7197 + }, + { + "epoch": 1.6593659942363113, + "grad_norm": 1.6175427546858157, + "learning_rate": 1.5474201405881616e-07, + "loss": 0.5008036494255066, + "step": 7198 + }, + { + "epoch": 1.6595965417867435, + "grad_norm": 1.6987150806432352, + "learning_rate": 1.5453839474249474e-07, + "loss": 0.53286212682724, + "step": 7199 + }, + { + "epoch": 1.6598270893371758, + "grad_norm": 1.7805581367356456, + "learning_rate": 1.5433489826294921e-07, + "loss": 0.5207295417785645, + "step": 7200 + }, + { + "epoch": 1.660057636887608, + "grad_norm": 1.55676080740635, + "learning_rate": 1.5413152464974565e-07, + "loss": 0.4445546865463257, + "step": 7201 + }, + { + "epoch": 1.6602881844380404, + "grad_norm": 1.5721680438234895, + "learning_rate": 1.5392827393243246e-07, + "loss": 0.4071146547794342, + "step": 7202 + }, + { + "epoch": 1.6605187319884727, + "grad_norm": 1.899976807653372, + "learning_rate": 1.5372514614053956e-07, + "loss": 0.43851250410079956, + "step": 7203 + }, + { + "epoch": 1.660749279538905, + "grad_norm": 1.6317372171777702, + "learning_rate": 1.53522141303579e-07, + "loss": 0.4426385164260864, + "step": 7204 + }, + { + "epoch": 1.6609798270893372, + "grad_norm": 1.4622497310054086, + "learning_rate": 1.5331925945104585e-07, + "loss": 0.4058944582939148, + "step": 7205 + }, + { + "epoch": 1.6612103746397695, + "grad_norm": 1.4412439963389565, + "learning_rate": 1.531165006124161e-07, + "loss": 0.46442219614982605, + "step": 7206 + }, + { + "epoch": 1.6614409221902018, + "grad_norm": 1.8998885504324288, + "learning_rate": 1.5291386481714917e-07, + "loss": 0.44008395075798035, + "step": 7207 + }, + { + "epoch": 1.661671469740634, + "grad_norm": 1.6165431540422157, + "learning_rate": 1.5271135209468545e-07, + "loss": 0.4646702706813812, + "step": 7208 + }, + { + "epoch": 1.6619020172910663, + "grad_norm": 1.6141720205477035, + "learning_rate": 1.5250896247444833e-07, + "loss": 0.5206056833267212, + "step": 7209 + }, + { + "epoch": 1.6621325648414986, + "grad_norm": 1.5917839537896612, + "learning_rate": 1.5230669598584266e-07, + "loss": 0.43179526925086975, + "step": 7210 + }, + { + "epoch": 1.662363112391931, + "grad_norm": 1.3678375311854771, + "learning_rate": 1.521045526582554e-07, + "loss": 0.41198521852493286, + "step": 7211 + }, + { + "epoch": 1.6625936599423632, + "grad_norm": 1.6500020374603228, + "learning_rate": 1.5190253252105624e-07, + "loss": 0.542406439781189, + "step": 7212 + }, + { + "epoch": 1.6628242074927955, + "grad_norm": 1.858718160982589, + "learning_rate": 1.517006356035967e-07, + "loss": 0.46104592084884644, + "step": 7213 + }, + { + "epoch": 1.6630547550432278, + "grad_norm": 1.519964730065185, + "learning_rate": 1.514988619352099e-07, + "loss": 0.39294254779815674, + "step": 7214 + }, + { + "epoch": 1.66328530259366, + "grad_norm": 1.7134471062991041, + "learning_rate": 1.512972115452119e-07, + "loss": 0.5588621497154236, + "step": 7215 + }, + { + "epoch": 1.6635158501440923, + "grad_norm": 1.8550173716299083, + "learning_rate": 1.510956844629002e-07, + "loss": 0.5034887790679932, + "step": 7216 + }, + { + "epoch": 1.6637463976945246, + "grad_norm": 1.802824758529772, + "learning_rate": 1.508942807175544e-07, + "loss": 0.503799319267273, + "step": 7217 + }, + { + "epoch": 1.6639769452449569, + "grad_norm": 1.765440871146946, + "learning_rate": 1.5069300033843668e-07, + "loss": 0.465304970741272, + "step": 7218 + }, + { + "epoch": 1.6642074927953892, + "grad_norm": 1.9150024875802376, + "learning_rate": 1.5049184335479072e-07, + "loss": 0.47217413783073425, + "step": 7219 + }, + { + "epoch": 1.6644380403458214, + "grad_norm": 1.6818734317510342, + "learning_rate": 1.5029080979584275e-07, + "loss": 0.4457072615623474, + "step": 7220 + }, + { + "epoch": 1.6646685878962537, + "grad_norm": 1.5435205402521186, + "learning_rate": 1.5008989969080065e-07, + "loss": 0.43352675437927246, + "step": 7221 + }, + { + "epoch": 1.664899135446686, + "grad_norm": 1.400426360312227, + "learning_rate": 1.4988911306885487e-07, + "loss": 0.4387536644935608, + "step": 7222 + }, + { + "epoch": 1.6651296829971183, + "grad_norm": 1.7947162267476848, + "learning_rate": 1.496884499591774e-07, + "loss": 0.39572250843048096, + "step": 7223 + }, + { + "epoch": 1.6653602305475506, + "grad_norm": 1.4532491521694952, + "learning_rate": 1.4948791039092234e-07, + "loss": 0.4870808720588684, + "step": 7224 + }, + { + "epoch": 1.6655907780979828, + "grad_norm": 1.967784988941882, + "learning_rate": 1.4928749439322618e-07, + "loss": 0.4750326871871948, + "step": 7225 + }, + { + "epoch": 1.6658213256484151, + "grad_norm": 1.733380464244531, + "learning_rate": 1.4908720199520763e-07, + "loss": 0.4600903391838074, + "step": 7226 + }, + { + "epoch": 1.6660518731988474, + "grad_norm": 1.845212682331216, + "learning_rate": 1.488870332259664e-07, + "loss": 0.5157172679901123, + "step": 7227 + }, + { + "epoch": 1.6662824207492797, + "grad_norm": 1.527717288320904, + "learning_rate": 1.4868698811458558e-07, + "loss": 0.5109579563140869, + "step": 7228 + }, + { + "epoch": 1.666512968299712, + "grad_norm": 1.5715781586375175, + "learning_rate": 1.4848706669012933e-07, + "loss": 0.5458623766899109, + "step": 7229 + }, + { + "epoch": 1.6667435158501442, + "grad_norm": 1.559742556790004, + "learning_rate": 1.48287268981644e-07, + "loss": 0.4763823449611664, + "step": 7230 + }, + { + "epoch": 1.6669740634005765, + "grad_norm": 1.474661322860322, + "learning_rate": 1.480875950181585e-07, + "loss": 0.46346691250801086, + "step": 7231 + }, + { + "epoch": 1.6672046109510088, + "grad_norm": 1.8805501138397551, + "learning_rate": 1.4788804482868289e-07, + "loss": 0.5442988872528076, + "step": 7232 + }, + { + "epoch": 1.667435158501441, + "grad_norm": 1.5922849196085607, + "learning_rate": 1.4768861844221002e-07, + "loss": 0.508071780204773, + "step": 7233 + }, + { + "epoch": 1.6676657060518734, + "grad_norm": 2.044136469779381, + "learning_rate": 1.4748931588771484e-07, + "loss": 0.607035756111145, + "step": 7234 + }, + { + "epoch": 1.6678962536023054, + "grad_norm": 2.56236340259614, + "learning_rate": 1.4729013719415352e-07, + "loss": 0.6532429456710815, + "step": 7235 + }, + { + "epoch": 1.6681268011527377, + "grad_norm": 1.659139984651841, + "learning_rate": 1.4709108239046465e-07, + "loss": 0.49710947275161743, + "step": 7236 + }, + { + "epoch": 1.66835734870317, + "grad_norm": 2.3059474306371293, + "learning_rate": 1.4689215150556856e-07, + "loss": 0.5243515968322754, + "step": 7237 + }, + { + "epoch": 1.6685878962536023, + "grad_norm": 2.0007736669763516, + "learning_rate": 1.4669334456836825e-07, + "loss": 0.5056744813919067, + "step": 7238 + }, + { + "epoch": 1.6688184438040345, + "grad_norm": 1.7502562249309883, + "learning_rate": 1.4649466160774847e-07, + "loss": 0.43398773670196533, + "step": 7239 + }, + { + "epoch": 1.6690489913544668, + "grad_norm": 1.5465503969894825, + "learning_rate": 1.462961026525752e-07, + "loss": 0.5139761567115784, + "step": 7240 + }, + { + "epoch": 1.669279538904899, + "grad_norm": 1.6696867508957232, + "learning_rate": 1.4609766773169763e-07, + "loss": 0.4375327229499817, + "step": 7241 + }, + { + "epoch": 1.6695100864553314, + "grad_norm": 1.5816533801320478, + "learning_rate": 1.4589935687394593e-07, + "loss": 0.4920062720775604, + "step": 7242 + }, + { + "epoch": 1.6697406340057637, + "grad_norm": 1.7465659879703033, + "learning_rate": 1.4570117010813243e-07, + "loss": 0.47602343559265137, + "step": 7243 + }, + { + "epoch": 1.669971181556196, + "grad_norm": 1.6609975329131823, + "learning_rate": 1.4550310746305194e-07, + "loss": 0.4663471579551697, + "step": 7244 + }, + { + "epoch": 1.6702017291066282, + "grad_norm": 1.825000722601049, + "learning_rate": 1.4530516896748068e-07, + "loss": 0.4032224416732788, + "step": 7245 + }, + { + "epoch": 1.6704322766570605, + "grad_norm": 1.6515038226392735, + "learning_rate": 1.4510735465017708e-07, + "loss": 0.3822782635688782, + "step": 7246 + }, + { + "epoch": 1.6706628242074928, + "grad_norm": 1.7342878908697361, + "learning_rate": 1.4490966453988185e-07, + "loss": 0.4859994649887085, + "step": 7247 + }, + { + "epoch": 1.670893371757925, + "grad_norm": 1.8492242805033283, + "learning_rate": 1.4471209866531708e-07, + "loss": 0.5374040007591248, + "step": 7248 + }, + { + "epoch": 1.6711239193083574, + "grad_norm": 1.6953840635476194, + "learning_rate": 1.4451465705518663e-07, + "loss": 0.48769307136535645, + "step": 7249 + }, + { + "epoch": 1.6713544668587896, + "grad_norm": 1.6109119052274135, + "learning_rate": 1.443173397381774e-07, + "loss": 0.4628336429595947, + "step": 7250 + }, + { + "epoch": 1.671585014409222, + "grad_norm": 1.6751219673185227, + "learning_rate": 1.4412014674295703e-07, + "loss": 0.5241574645042419, + "step": 7251 + }, + { + "epoch": 1.6718155619596542, + "grad_norm": 1.6820438668557176, + "learning_rate": 1.4392307809817594e-07, + "loss": 0.4850649833679199, + "step": 7252 + }, + { + "epoch": 1.6720461095100865, + "grad_norm": 1.4736638522112695, + "learning_rate": 1.4372613383246579e-07, + "loss": 0.4610109329223633, + "step": 7253 + }, + { + "epoch": 1.6722766570605188, + "grad_norm": 1.8781850963576057, + "learning_rate": 1.43529313974441e-07, + "loss": 0.422643780708313, + "step": 7254 + }, + { + "epoch": 1.672507204610951, + "grad_norm": 1.3707324521464075, + "learning_rate": 1.4333261855269717e-07, + "loss": 0.4568919241428375, + "step": 7255 + }, + { + "epoch": 1.6727377521613833, + "grad_norm": 1.5655308869863263, + "learning_rate": 1.43136047595812e-07, + "loss": 0.4420914649963379, + "step": 7256 + }, + { + "epoch": 1.6729682997118156, + "grad_norm": 1.809187677501354, + "learning_rate": 1.4293960113234526e-07, + "loss": 0.531182587146759, + "step": 7257 + }, + { + "epoch": 1.6731988472622479, + "grad_norm": 1.588312906840417, + "learning_rate": 1.4274327919083883e-07, + "loss": 0.4499055743217468, + "step": 7258 + }, + { + "epoch": 1.6734293948126802, + "grad_norm": 1.4474121027649385, + "learning_rate": 1.425470817998159e-07, + "loss": 0.4007442593574524, + "step": 7259 + }, + { + "epoch": 1.6736599423631124, + "grad_norm": 1.328708907630177, + "learning_rate": 1.423510089877823e-07, + "loss": 0.38373956084251404, + "step": 7260 + }, + { + "epoch": 1.6738904899135447, + "grad_norm": 1.565700725727342, + "learning_rate": 1.4215506078322513e-07, + "loss": 0.5179092884063721, + "step": 7261 + }, + { + "epoch": 1.6741210374639768, + "grad_norm": 2.1636236089677094, + "learning_rate": 1.4195923721461345e-07, + "loss": 0.4818217158317566, + "step": 7262 + }, + { + "epoch": 1.674351585014409, + "grad_norm": 1.405414242668412, + "learning_rate": 1.417635383103989e-07, + "loss": 0.4114675521850586, + "step": 7263 + }, + { + "epoch": 1.6745821325648413, + "grad_norm": 1.864769627750043, + "learning_rate": 1.4156796409901383e-07, + "loss": 0.4796205163002014, + "step": 7264 + }, + { + "epoch": 1.6748126801152736, + "grad_norm": 1.6488410041287704, + "learning_rate": 1.4137251460887366e-07, + "loss": 0.5158804655075073, + "step": 7265 + }, + { + "epoch": 1.675043227665706, + "grad_norm": 1.7213812053974584, + "learning_rate": 1.4117718986837491e-07, + "loss": 0.5137546062469482, + "step": 7266 + }, + { + "epoch": 1.6752737752161382, + "grad_norm": 1.4792265893063055, + "learning_rate": 1.409819899058965e-07, + "loss": 0.48155319690704346, + "step": 7267 + }, + { + "epoch": 1.6755043227665705, + "grad_norm": 2.1364016007887234, + "learning_rate": 1.4078691474979865e-07, + "loss": 0.5631832480430603, + "step": 7268 + }, + { + "epoch": 1.6757348703170027, + "grad_norm": 1.4486975390027959, + "learning_rate": 1.405919644284238e-07, + "loss": 0.37383341789245605, + "step": 7269 + }, + { + "epoch": 1.675965417867435, + "grad_norm": 1.5115227541499854, + "learning_rate": 1.403971389700962e-07, + "loss": 0.4356718361377716, + "step": 7270 + }, + { + "epoch": 1.6761959654178673, + "grad_norm": 1.7254792983162555, + "learning_rate": 1.402024384031223e-07, + "loss": 0.5697557926177979, + "step": 7271 + }, + { + "epoch": 1.6764265129682996, + "grad_norm": 1.8330890314003314, + "learning_rate": 1.4000786275578957e-07, + "loss": 0.38880759477615356, + "step": 7272 + }, + { + "epoch": 1.6766570605187319, + "grad_norm": 1.5828613232595852, + "learning_rate": 1.398134120563682e-07, + "loss": 0.40824171900749207, + "step": 7273 + }, + { + "epoch": 1.6768876080691641, + "grad_norm": 1.8426743516757296, + "learning_rate": 1.396190863331098e-07, + "loss": 0.45844566822052, + "step": 7274 + }, + { + "epoch": 1.6771181556195964, + "grad_norm": 1.7101458464470949, + "learning_rate": 1.394248856142476e-07, + "loss": 0.4732695519924164, + "step": 7275 + }, + { + "epoch": 1.6773487031700287, + "grad_norm": 1.5405894803577098, + "learning_rate": 1.3923080992799729e-07, + "loss": 0.45811837911605835, + "step": 7276 + }, + { + "epoch": 1.677579250720461, + "grad_norm": 1.4406228369333338, + "learning_rate": 1.3903685930255572e-07, + "loss": 0.46207255125045776, + "step": 7277 + }, + { + "epoch": 1.6778097982708933, + "grad_norm": 1.4671120213207756, + "learning_rate": 1.3884303376610195e-07, + "loss": 0.4856521487236023, + "step": 7278 + }, + { + "epoch": 1.6780403458213256, + "grad_norm": 1.5200443209372343, + "learning_rate": 1.386493333467973e-07, + "loss": 0.4625289738178253, + "step": 7279 + }, + { + "epoch": 1.6782708933717578, + "grad_norm": 1.5447587816062573, + "learning_rate": 1.3845575807278398e-07, + "loss": 0.4227305054664612, + "step": 7280 + }, + { + "epoch": 1.6785014409221901, + "grad_norm": 1.6564382894986278, + "learning_rate": 1.3826230797218664e-07, + "loss": 0.40824317932128906, + "step": 7281 + }, + { + "epoch": 1.6787319884726224, + "grad_norm": 1.4249612038198227, + "learning_rate": 1.380689830731112e-07, + "loss": 0.4376741647720337, + "step": 7282 + }, + { + "epoch": 1.6789625360230547, + "grad_norm": 1.4853225121555564, + "learning_rate": 1.3787578340364602e-07, + "loss": 0.4159294366836548, + "step": 7283 + }, + { + "epoch": 1.679193083573487, + "grad_norm": 1.6921340878324538, + "learning_rate": 1.3768270899186118e-07, + "loss": 0.44097238779067993, + "step": 7284 + }, + { + "epoch": 1.6794236311239192, + "grad_norm": 1.7542650919820308, + "learning_rate": 1.37489759865808e-07, + "loss": 0.5287643074989319, + "step": 7285 + }, + { + "epoch": 1.6796541786743515, + "grad_norm": 1.6980963761970458, + "learning_rate": 1.3729693605352054e-07, + "loss": 0.5027199983596802, + "step": 7286 + }, + { + "epoch": 1.6798847262247838, + "grad_norm": 1.4467586071513, + "learning_rate": 1.371042375830137e-07, + "loss": 0.38447409868240356, + "step": 7287 + }, + { + "epoch": 1.680115273775216, + "grad_norm": 1.4823170991132026, + "learning_rate": 1.369116644822843e-07, + "loss": 0.32439717650413513, + "step": 7288 + }, + { + "epoch": 1.6803458213256484, + "grad_norm": 1.6304048881341835, + "learning_rate": 1.3671921677931185e-07, + "loss": 0.466668963432312, + "step": 7289 + }, + { + "epoch": 1.6805763688760806, + "grad_norm": 1.5670479284811365, + "learning_rate": 1.3652689450205633e-07, + "loss": 0.3803076446056366, + "step": 7290 + }, + { + "epoch": 1.680806916426513, + "grad_norm": 1.5423068826131208, + "learning_rate": 1.3633469767846063e-07, + "loss": 0.5098183155059814, + "step": 7291 + }, + { + "epoch": 1.6810374639769452, + "grad_norm": 1.9666029015445945, + "learning_rate": 1.3614262633644903e-07, + "loss": 0.4866775870323181, + "step": 7292 + }, + { + "epoch": 1.6812680115273775, + "grad_norm": 1.5218473916717437, + "learning_rate": 1.3595068050392722e-07, + "loss": 0.4426755905151367, + "step": 7293 + }, + { + "epoch": 1.6814985590778098, + "grad_norm": 1.6763772291665047, + "learning_rate": 1.3575886020878291e-07, + "loss": 0.49981606006622314, + "step": 7294 + }, + { + "epoch": 1.681729106628242, + "grad_norm": 1.7060577764824782, + "learning_rate": 1.355671654788858e-07, + "loss": 0.47976410388946533, + "step": 7295 + }, + { + "epoch": 1.6819596541786743, + "grad_norm": 1.721079690327414, + "learning_rate": 1.3537559634208683e-07, + "loss": 0.49790793657302856, + "step": 7296 + }, + { + "epoch": 1.6821902017291066, + "grad_norm": 1.790367918060309, + "learning_rate": 1.351841528262194e-07, + "loss": 0.48854726552963257, + "step": 7297 + }, + { + "epoch": 1.6824207492795389, + "grad_norm": 1.6106089496589149, + "learning_rate": 1.3499283495909784e-07, + "loss": 0.46680933237075806, + "step": 7298 + }, + { + "epoch": 1.6826512968299712, + "grad_norm": 1.913402875215736, + "learning_rate": 1.3480164276851923e-07, + "loss": 0.4013046324253082, + "step": 7299 + }, + { + "epoch": 1.6828818443804034, + "grad_norm": 1.445836567637905, + "learning_rate": 1.3461057628226135e-07, + "loss": 0.5207708477973938, + "step": 7300 + }, + { + "epoch": 1.6831123919308357, + "grad_norm": 1.5618988740075492, + "learning_rate": 1.34419635528084e-07, + "loss": 0.42747941613197327, + "step": 7301 + }, + { + "epoch": 1.683342939481268, + "grad_norm": 2.258248985065798, + "learning_rate": 1.3422882053372918e-07, + "loss": 0.46138545870780945, + "step": 7302 + }, + { + "epoch": 1.6835734870317003, + "grad_norm": 1.607848950586652, + "learning_rate": 1.3403813132692054e-07, + "loss": 0.5034617185592651, + "step": 7303 + }, + { + "epoch": 1.6838040345821326, + "grad_norm": 1.7549778043587798, + "learning_rate": 1.3384756793536277e-07, + "loss": 0.5109648704528809, + "step": 7304 + }, + { + "epoch": 1.6840345821325649, + "grad_norm": 1.6683673761165936, + "learning_rate": 1.3365713038674342e-07, + "loss": 0.5048235654830933, + "step": 7305 + }, + { + "epoch": 1.6842651296829971, + "grad_norm": 1.671440552476867, + "learning_rate": 1.3346681870873022e-07, + "loss": 0.46366703510284424, + "step": 7306 + }, + { + "epoch": 1.6844956772334294, + "grad_norm": 1.8814222847361726, + "learning_rate": 1.3327663292897385e-07, + "loss": 0.5473049283027649, + "step": 7307 + }, + { + "epoch": 1.6847262247838617, + "grad_norm": 1.5517081701402915, + "learning_rate": 1.3308657307510662e-07, + "loss": 0.4539650082588196, + "step": 7308 + }, + { + "epoch": 1.684956772334294, + "grad_norm": 1.6894656634552743, + "learning_rate": 1.328966391747418e-07, + "loss": 0.43108680844306946, + "step": 7309 + }, + { + "epoch": 1.6851873198847263, + "grad_norm": 1.9974250241260127, + "learning_rate": 1.3270683125547522e-07, + "loss": 0.573739767074585, + "step": 7310 + }, + { + "epoch": 1.6854178674351585, + "grad_norm": 2.037790233442197, + "learning_rate": 1.3251714934488368e-07, + "loss": 0.405525803565979, + "step": 7311 + }, + { + "epoch": 1.6856484149855908, + "grad_norm": 1.765339357509686, + "learning_rate": 1.3232759347052603e-07, + "loss": 0.5189083814620972, + "step": 7312 + }, + { + "epoch": 1.685878962536023, + "grad_norm": 1.806012516151003, + "learning_rate": 1.32138163659943e-07, + "loss": 0.47043824195861816, + "step": 7313 + }, + { + "epoch": 1.6861095100864554, + "grad_norm": 1.5219455074379382, + "learning_rate": 1.319488599406563e-07, + "loss": 0.5212691426277161, + "step": 7314 + }, + { + "epoch": 1.6863400576368877, + "grad_norm": 1.9352097714905825, + "learning_rate": 1.317596823401702e-07, + "loss": 0.44503504037857056, + "step": 7315 + }, + { + "epoch": 1.68657060518732, + "grad_norm": 1.8653042666003878, + "learning_rate": 1.3157063088597033e-07, + "loss": 0.486750066280365, + "step": 7316 + }, + { + "epoch": 1.6868011527377522, + "grad_norm": 1.6466316943983774, + "learning_rate": 1.3138170560552365e-07, + "loss": 0.436980664730072, + "step": 7317 + }, + { + "epoch": 1.6870317002881845, + "grad_norm": 1.4906620727560134, + "learning_rate": 1.3119290652627912e-07, + "loss": 0.4514414072036743, + "step": 7318 + }, + { + "epoch": 1.6872622478386168, + "grad_norm": 1.594310471174457, + "learning_rate": 1.3100423367566704e-07, + "loss": 0.48360395431518555, + "step": 7319 + }, + { + "epoch": 1.687492795389049, + "grad_norm": 1.8385147655691183, + "learning_rate": 1.308156870810999e-07, + "loss": 0.5205049514770508, + "step": 7320 + }, + { + "epoch": 1.6877233429394813, + "grad_norm": 1.6713700205301076, + "learning_rate": 1.306272667699716e-07, + "loss": 0.4396322965621948, + "step": 7321 + }, + { + "epoch": 1.6879538904899136, + "grad_norm": 1.6747688199386033, + "learning_rate": 1.304389727696573e-07, + "loss": 0.42613643407821655, + "step": 7322 + }, + { + "epoch": 1.688184438040346, + "grad_norm": 1.6965469885346547, + "learning_rate": 1.3025080510751463e-07, + "loss": 0.3865918815135956, + "step": 7323 + }, + { + "epoch": 1.6884149855907782, + "grad_norm": 1.9903164455925761, + "learning_rate": 1.3006276381088222e-07, + "loss": 0.5589674711227417, + "step": 7324 + }, + { + "epoch": 1.6886455331412105, + "grad_norm": 1.6749318244122438, + "learning_rate": 1.2987484890708022e-07, + "loss": 0.4480137228965759, + "step": 7325 + }, + { + "epoch": 1.6888760806916427, + "grad_norm": 1.6726911081582678, + "learning_rate": 1.2968706042341114e-07, + "loss": 0.46543216705322266, + "step": 7326 + }, + { + "epoch": 1.689106628242075, + "grad_norm": 1.7581803186716298, + "learning_rate": 1.2949939838715827e-07, + "loss": 0.4383571743965149, + "step": 7327 + }, + { + "epoch": 1.6893371757925073, + "grad_norm": 1.6177869416016535, + "learning_rate": 1.2931186282558715e-07, + "loss": 0.47900426387786865, + "step": 7328 + }, + { + "epoch": 1.6895677233429396, + "grad_norm": 1.821208964293862, + "learning_rate": 1.2912445376594504e-07, + "loss": 0.5839447975158691, + "step": 7329 + }, + { + "epoch": 1.6897982708933719, + "grad_norm": 1.3312817497628286, + "learning_rate": 1.2893717123546023e-07, + "loss": 0.5179777145385742, + "step": 7330 + }, + { + "epoch": 1.6900288184438041, + "grad_norm": 1.8984725283324497, + "learning_rate": 1.2875001526134266e-07, + "loss": 0.4351516366004944, + "step": 7331 + }, + { + "epoch": 1.6902593659942364, + "grad_norm": 1.917939401942737, + "learning_rate": 1.2856298587078474e-07, + "loss": 0.48052316904067993, + "step": 7332 + }, + { + "epoch": 1.6904899135446687, + "grad_norm": 2.027779365017344, + "learning_rate": 1.2837608309095937e-07, + "loss": 0.5767349600791931, + "step": 7333 + }, + { + "epoch": 1.690720461095101, + "grad_norm": 1.5008580056938157, + "learning_rate": 1.2818930694902208e-07, + "loss": 0.4722314774990082, + "step": 7334 + }, + { + "epoch": 1.6909510086455333, + "grad_norm": 1.7280315705596727, + "learning_rate": 1.280026574721089e-07, + "loss": 0.472305029630661, + "step": 7335 + }, + { + "epoch": 1.6911815561959656, + "grad_norm": 1.7438045229147328, + "learning_rate": 1.2781613468733864e-07, + "loss": 0.5404185652732849, + "step": 7336 + }, + { + "epoch": 1.6914121037463978, + "grad_norm": 1.5394991269331093, + "learning_rate": 1.2762973862181092e-07, + "loss": 0.4667291045188904, + "step": 7337 + }, + { + "epoch": 1.6916426512968301, + "grad_norm": 1.538686442787995, + "learning_rate": 1.2744346930260685e-07, + "loss": 0.4928268492221832, + "step": 7338 + }, + { + "epoch": 1.6918731988472624, + "grad_norm": 1.7770397518440189, + "learning_rate": 1.2725732675678958e-07, + "loss": 0.4096994996070862, + "step": 7339 + }, + { + "epoch": 1.6921037463976947, + "grad_norm": 1.5070597312201002, + "learning_rate": 1.270713110114041e-07, + "loss": 0.4654881954193115, + "step": 7340 + }, + { + "epoch": 1.692334293948127, + "grad_norm": 1.6648905898246225, + "learning_rate": 1.2688542209347597e-07, + "loss": 0.4741584360599518, + "step": 7341 + }, + { + "epoch": 1.6925648414985592, + "grad_norm": 2.0538633630714576, + "learning_rate": 1.2669966003001342e-07, + "loss": 0.48024487495422363, + "step": 7342 + }, + { + "epoch": 1.6927953890489915, + "grad_norm": 1.7078715634541877, + "learning_rate": 1.2651402484800545e-07, + "loss": 0.40225690603256226, + "step": 7343 + }, + { + "epoch": 1.6930259365994238, + "grad_norm": 1.6998853863064503, + "learning_rate": 1.263285165744228e-07, + "loss": 0.4933784008026123, + "step": 7344 + }, + { + "epoch": 1.6932564841498559, + "grad_norm": 1.9278324438812642, + "learning_rate": 1.2614313523621823e-07, + "loss": 0.5119373798370361, + "step": 7345 + }, + { + "epoch": 1.6934870317002881, + "grad_norm": 1.5071956319462745, + "learning_rate": 1.2595788086032545e-07, + "loss": 0.45921066403388977, + "step": 7346 + }, + { + "epoch": 1.6937175792507204, + "grad_norm": 1.8213068226512792, + "learning_rate": 1.2577275347366e-07, + "loss": 0.4157813489437103, + "step": 7347 + }, + { + "epoch": 1.6939481268011527, + "grad_norm": 1.6844334121601705, + "learning_rate": 1.255877531031193e-07, + "loss": 0.47223663330078125, + "step": 7348 + }, + { + "epoch": 1.694178674351585, + "grad_norm": 1.703336195724066, + "learning_rate": 1.2540287977558173e-07, + "loss": 0.49459707736968994, + "step": 7349 + }, + { + "epoch": 1.6944092219020173, + "grad_norm": 1.4956091776340097, + "learning_rate": 1.2521813351790756e-07, + "loss": 0.36979377269744873, + "step": 7350 + }, + { + "epoch": 1.6946397694524495, + "grad_norm": 1.48101418059435, + "learning_rate": 1.2503351435693809e-07, + "loss": 0.5258666276931763, + "step": 7351 + }, + { + "epoch": 1.6948703170028818, + "grad_norm": 1.7171134118237417, + "learning_rate": 1.248490223194969e-07, + "loss": 0.48548775911331177, + "step": 7352 + }, + { + "epoch": 1.695100864553314, + "grad_norm": 1.5260650611402817, + "learning_rate": 1.2466465743238908e-07, + "loss": 0.49529772996902466, + "step": 7353 + }, + { + "epoch": 1.6953314121037464, + "grad_norm": 1.4265089916669407, + "learning_rate": 1.244804197224003e-07, + "loss": 0.525967001914978, + "step": 7354 + }, + { + "epoch": 1.6955619596541787, + "grad_norm": 1.5675417944673544, + "learning_rate": 1.2429630921629886e-07, + "loss": 0.45880353450775146, + "step": 7355 + }, + { + "epoch": 1.695792507204611, + "grad_norm": 1.786310864642647, + "learning_rate": 1.24112325940834e-07, + "loss": 0.4868921637535095, + "step": 7356 + }, + { + "epoch": 1.6960230547550432, + "grad_norm": 1.9091573764166652, + "learning_rate": 1.239284699227363e-07, + "loss": 0.48856431245803833, + "step": 7357 + }, + { + "epoch": 1.6962536023054755, + "grad_norm": 1.6702490923414848, + "learning_rate": 1.2374474118871848e-07, + "loss": 0.4778832793235779, + "step": 7358 + }, + { + "epoch": 1.6964841498559078, + "grad_norm": 1.5686777857090144, + "learning_rate": 1.235611397654741e-07, + "loss": 0.4598827660083771, + "step": 7359 + }, + { + "epoch": 1.69671469740634, + "grad_norm": 1.6359168914665654, + "learning_rate": 1.2337766567967868e-07, + "loss": 0.41662126779556274, + "step": 7360 + }, + { + "epoch": 1.6969452449567723, + "grad_norm": 1.8557843651768584, + "learning_rate": 1.2319431895798937e-07, + "loss": 0.5126262903213501, + "step": 7361 + }, + { + "epoch": 1.6971757925072046, + "grad_norm": 1.8495853092328842, + "learning_rate": 1.2301109962704425e-07, + "loss": 0.5296661257743835, + "step": 7362 + }, + { + "epoch": 1.697406340057637, + "grad_norm": 1.5429948348957294, + "learning_rate": 1.2282800771346326e-07, + "loss": 0.45704740285873413, + "step": 7363 + }, + { + "epoch": 1.6976368876080692, + "grad_norm": 1.9200104385218533, + "learning_rate": 1.2264504324384739e-07, + "loss": 0.48733824491500854, + "step": 7364 + }, + { + "epoch": 1.6978674351585015, + "grad_norm": 1.4498218230085718, + "learning_rate": 1.2246220624477988e-07, + "loss": 0.499523788690567, + "step": 7365 + }, + { + "epoch": 1.6980979827089338, + "grad_norm": 1.5061276319908403, + "learning_rate": 1.222794967428251e-07, + "loss": 0.4288235902786255, + "step": 7366 + }, + { + "epoch": 1.698328530259366, + "grad_norm": 1.610798127644449, + "learning_rate": 1.2209691476452854e-07, + "loss": 0.49590837955474854, + "step": 7367 + }, + { + "epoch": 1.6985590778097983, + "grad_norm": 1.5318629161297386, + "learning_rate": 1.2191446033641784e-07, + "loss": 0.36183077096939087, + "step": 7368 + }, + { + "epoch": 1.6987896253602306, + "grad_norm": 1.60727685293895, + "learning_rate": 1.2173213348500156e-07, + "loss": 0.4574984312057495, + "step": 7369 + }, + { + "epoch": 1.6990201729106629, + "grad_norm": 1.4620696246719465, + "learning_rate": 1.215499342367695e-07, + "loss": 0.4039604365825653, + "step": 7370 + }, + { + "epoch": 1.6992507204610952, + "grad_norm": 1.5626947359485344, + "learning_rate": 1.2136786261819398e-07, + "loss": 0.46439865231513977, + "step": 7371 + }, + { + "epoch": 1.6994812680115272, + "grad_norm": 1.6367083291338689, + "learning_rate": 1.2118591865572757e-07, + "loss": 0.3887529969215393, + "step": 7372 + }, + { + "epoch": 1.6997118155619595, + "grad_norm": 1.679799527354739, + "learning_rate": 1.2100410237580506e-07, + "loss": 0.4796936511993408, + "step": 7373 + }, + { + "epoch": 1.6999423631123918, + "grad_norm": 1.7129464656234383, + "learning_rate": 1.208224138048426e-07, + "loss": 0.4265397787094116, + "step": 7374 + }, + { + "epoch": 1.700172910662824, + "grad_norm": 1.3055425525744166, + "learning_rate": 1.2064085296923764e-07, + "loss": 0.38613706827163696, + "step": 7375 + }, + { + "epoch": 1.7004034582132563, + "grad_norm": 1.5801772819812159, + "learning_rate": 1.2045941989536866e-07, + "loss": 0.3781717121601105, + "step": 7376 + }, + { + "epoch": 1.7006340057636886, + "grad_norm": 1.4615581683312544, + "learning_rate": 1.2027811460959646e-07, + "loss": 0.3941626250743866, + "step": 7377 + }, + { + "epoch": 1.700864553314121, + "grad_norm": 1.842751075683814, + "learning_rate": 1.2009693713826251e-07, + "loss": 0.6087595820426941, + "step": 7378 + }, + { + "epoch": 1.7010951008645532, + "grad_norm": 1.7553383978846564, + "learning_rate": 1.1991588750769033e-07, + "loss": 0.45024704933166504, + "step": 7379 + }, + { + "epoch": 1.7013256484149855, + "grad_norm": 1.6137642064096758, + "learning_rate": 1.1973496574418418e-07, + "loss": 0.4617878794670105, + "step": 7380 + }, + { + "epoch": 1.7015561959654177, + "grad_norm": 1.9862996703255327, + "learning_rate": 1.1955417187403037e-07, + "loss": 0.4676027297973633, + "step": 7381 + }, + { + "epoch": 1.70178674351585, + "grad_norm": 1.871973148067532, + "learning_rate": 1.193735059234965e-07, + "loss": 0.4123028516769409, + "step": 7382 + }, + { + "epoch": 1.7020172910662823, + "grad_norm": 1.6050301624607715, + "learning_rate": 1.1919296791883082e-07, + "loss": 0.46627044677734375, + "step": 7383 + }, + { + "epoch": 1.7022478386167146, + "grad_norm": 1.7149279499035175, + "learning_rate": 1.1901255788626418e-07, + "loss": 0.5274061560630798, + "step": 7384 + }, + { + "epoch": 1.7024783861671469, + "grad_norm": 1.5052434216982107, + "learning_rate": 1.1883227585200839e-07, + "loss": 0.4972034990787506, + "step": 7385 + }, + { + "epoch": 1.7027089337175791, + "grad_norm": 1.9494573631811647, + "learning_rate": 1.1865212184225604e-07, + "loss": 0.4843828082084656, + "step": 7386 + }, + { + "epoch": 1.7029394812680114, + "grad_norm": 1.505775210941553, + "learning_rate": 1.1847209588318208e-07, + "loss": 0.42801034450531006, + "step": 7387 + }, + { + "epoch": 1.7031700288184437, + "grad_norm": 1.5219550860788174, + "learning_rate": 1.1829219800094226e-07, + "loss": 0.4895517826080322, + "step": 7388 + }, + { + "epoch": 1.703400576368876, + "grad_norm": 1.6293329855163652, + "learning_rate": 1.1811242822167367e-07, + "loss": 0.4011702537536621, + "step": 7389 + }, + { + "epoch": 1.7036311239193083, + "grad_norm": 1.648771005525557, + "learning_rate": 1.179327865714953e-07, + "loss": 0.43081313371658325, + "step": 7390 + }, + { + "epoch": 1.7038616714697405, + "grad_norm": 1.6248501330425935, + "learning_rate": 1.1775327307650695e-07, + "loss": 0.4731036424636841, + "step": 7391 + }, + { + "epoch": 1.7040922190201728, + "grad_norm": 1.6823492765317505, + "learning_rate": 1.1757388776279043e-07, + "loss": 0.3956582546234131, + "step": 7392 + }, + { + "epoch": 1.704322766570605, + "grad_norm": 2.067253157839649, + "learning_rate": 1.1739463065640798e-07, + "loss": 0.5273596048355103, + "step": 7393 + }, + { + "epoch": 1.7045533141210374, + "grad_norm": 1.7700345516037628, + "learning_rate": 1.1721550178340445e-07, + "loss": 0.4681214392185211, + "step": 7394 + }, + { + "epoch": 1.7047838616714697, + "grad_norm": 1.5670620820320287, + "learning_rate": 1.1703650116980513e-07, + "loss": 0.5035468339920044, + "step": 7395 + }, + { + "epoch": 1.705014409221902, + "grad_norm": 1.5253972732776402, + "learning_rate": 1.1685762884161654e-07, + "loss": 0.3710506558418274, + "step": 7396 + }, + { + "epoch": 1.7052449567723342, + "grad_norm": 1.5910552354864504, + "learning_rate": 1.1667888482482746e-07, + "loss": 0.37030795216560364, + "step": 7397 + }, + { + "epoch": 1.7054755043227665, + "grad_norm": 1.923678020581632, + "learning_rate": 1.1650026914540755e-07, + "loss": 0.5114949941635132, + "step": 7398 + }, + { + "epoch": 1.7057060518731988, + "grad_norm": 1.6569237958778402, + "learning_rate": 1.1632178182930751e-07, + "loss": 0.3987428545951843, + "step": 7399 + }, + { + "epoch": 1.705936599423631, + "grad_norm": 1.7268416884810995, + "learning_rate": 1.1614342290246004e-07, + "loss": 0.45176962018013, + "step": 7400 + }, + { + "epoch": 1.7061671469740634, + "grad_norm": 1.5994820804443497, + "learning_rate": 1.1596519239077863e-07, + "loss": 0.4312123656272888, + "step": 7401 + }, + { + "epoch": 1.7063976945244956, + "grad_norm": 1.5751618334265127, + "learning_rate": 1.157870903201581e-07, + "loss": 0.5310814380645752, + "step": 7402 + }, + { + "epoch": 1.706628242074928, + "grad_norm": 1.6328659776924948, + "learning_rate": 1.1560911671647534e-07, + "loss": 0.47525835037231445, + "step": 7403 + }, + { + "epoch": 1.7068587896253602, + "grad_norm": 1.5436637705693566, + "learning_rate": 1.1543127160558752e-07, + "loss": 0.5096621513366699, + "step": 7404 + }, + { + "epoch": 1.7070893371757925, + "grad_norm": 1.389329550146149, + "learning_rate": 1.15253555013334e-07, + "loss": 0.3848613500595093, + "step": 7405 + }, + { + "epoch": 1.7073198847262248, + "grad_norm": 1.7666796743032251, + "learning_rate": 1.1507596696553523e-07, + "loss": 0.46764057874679565, + "step": 7406 + }, + { + "epoch": 1.707550432276657, + "grad_norm": 1.4488805676543326, + "learning_rate": 1.148985074879928e-07, + "loss": 0.4664180278778076, + "step": 7407 + }, + { + "epoch": 1.7077809798270893, + "grad_norm": 1.817722227071109, + "learning_rate": 1.1472117660648973e-07, + "loss": 0.4912991523742676, + "step": 7408 + }, + { + "epoch": 1.7080115273775216, + "grad_norm": 1.565352212690196, + "learning_rate": 1.145439743467902e-07, + "loss": 0.44789934158325195, + "step": 7409 + }, + { + "epoch": 1.7082420749279539, + "grad_norm": 1.6748377858979937, + "learning_rate": 1.1436690073463984e-07, + "loss": 0.4465000629425049, + "step": 7410 + }, + { + "epoch": 1.7084726224783862, + "grad_norm": 1.6189125297433695, + "learning_rate": 1.1418995579576607e-07, + "loss": 0.4135594069957733, + "step": 7411 + }, + { + "epoch": 1.7087031700288184, + "grad_norm": 1.6376644340887752, + "learning_rate": 1.1401313955587655e-07, + "loss": 0.5296405553817749, + "step": 7412 + }, + { + "epoch": 1.7089337175792507, + "grad_norm": 1.8368866532400925, + "learning_rate": 1.1383645204066127e-07, + "loss": 0.42997848987579346, + "step": 7413 + }, + { + "epoch": 1.709164265129683, + "grad_norm": 1.7018740159985664, + "learning_rate": 1.1365989327579106e-07, + "loss": 0.3954406976699829, + "step": 7414 + }, + { + "epoch": 1.7093948126801153, + "grad_norm": 1.573589198376906, + "learning_rate": 1.134834632869176e-07, + "loss": 0.5098167657852173, + "step": 7415 + }, + { + "epoch": 1.7096253602305476, + "grad_norm": 1.666317247889276, + "learning_rate": 1.1330716209967505e-07, + "loss": 0.44079747796058655, + "step": 7416 + }, + { + "epoch": 1.7098559077809798, + "grad_norm": 1.7228180635145338, + "learning_rate": 1.1313098973967738e-07, + "loss": 0.4745299220085144, + "step": 7417 + }, + { + "epoch": 1.7100864553314121, + "grad_norm": 1.7819888998245728, + "learning_rate": 1.129549462325211e-07, + "loss": 0.586134672164917, + "step": 7418 + }, + { + "epoch": 1.7103170028818444, + "grad_norm": 1.6308460435680996, + "learning_rate": 1.1277903160378377e-07, + "loss": 0.453177273273468, + "step": 7419 + }, + { + "epoch": 1.7105475504322767, + "grad_norm": 1.5209333044759716, + "learning_rate": 1.1260324587902314e-07, + "loss": 0.4852634370326996, + "step": 7420 + }, + { + "epoch": 1.710778097982709, + "grad_norm": 1.6132453532504059, + "learning_rate": 1.1242758908377959e-07, + "loss": 0.44662681221961975, + "step": 7421 + }, + { + "epoch": 1.7110086455331412, + "grad_norm": 1.856186184886188, + "learning_rate": 1.1225206124357412e-07, + "loss": 0.4371451735496521, + "step": 7422 + }, + { + "epoch": 1.7112391930835735, + "grad_norm": 1.7655414830639746, + "learning_rate": 1.12076662383909e-07, + "loss": 0.46445029973983765, + "step": 7423 + }, + { + "epoch": 1.7114697406340058, + "grad_norm": 1.8715652362026352, + "learning_rate": 1.119013925302682e-07, + "loss": 0.4788290858268738, + "step": 7424 + }, + { + "epoch": 1.711700288184438, + "grad_norm": 1.6675143296503283, + "learning_rate": 1.1172625170811634e-07, + "loss": 0.44457030296325684, + "step": 7425 + }, + { + "epoch": 1.7119308357348704, + "grad_norm": 1.5832176674715501, + "learning_rate": 1.1155123994289927e-07, + "loss": 0.4288104176521301, + "step": 7426 + }, + { + "epoch": 1.7121613832853027, + "grad_norm": 1.7512263233099108, + "learning_rate": 1.1137635726004502e-07, + "loss": 0.4504792392253876, + "step": 7427 + }, + { + "epoch": 1.712391930835735, + "grad_norm": 1.7570403694181211, + "learning_rate": 1.1120160368496167e-07, + "loss": 0.4845864772796631, + "step": 7428 + }, + { + "epoch": 1.7126224783861672, + "grad_norm": 1.6506356701568592, + "learning_rate": 1.1102697924303928e-07, + "loss": 0.5140354633331299, + "step": 7429 + }, + { + "epoch": 1.7128530259365995, + "grad_norm": 1.8835666301746035, + "learning_rate": 1.1085248395964919e-07, + "loss": 0.5070383548736572, + "step": 7430 + }, + { + "epoch": 1.7130835734870318, + "grad_norm": 1.670709235469594, + "learning_rate": 1.1067811786014358e-07, + "loss": 0.5340418219566345, + "step": 7431 + }, + { + "epoch": 1.713314121037464, + "grad_norm": 1.7797047375233717, + "learning_rate": 1.1050388096985596e-07, + "loss": 0.4950510561466217, + "step": 7432 + }, + { + "epoch": 1.7135446685878963, + "grad_norm": 1.5585786144398537, + "learning_rate": 1.1032977331410109e-07, + "loss": 0.44572609663009644, + "step": 7433 + }, + { + "epoch": 1.7137752161383286, + "grad_norm": 1.560749431847804, + "learning_rate": 1.1015579491817506e-07, + "loss": 0.4561808109283447, + "step": 7434 + }, + { + "epoch": 1.714005763688761, + "grad_norm": 2.192343641588309, + "learning_rate": 1.0998194580735531e-07, + "loss": 0.5268326997756958, + "step": 7435 + }, + { + "epoch": 1.7142363112391932, + "grad_norm": 1.63175270270566, + "learning_rate": 1.098082260069001e-07, + "loss": 0.42211106419563293, + "step": 7436 + }, + { + "epoch": 1.7144668587896255, + "grad_norm": 2.3567165239179846, + "learning_rate": 1.0963463554204922e-07, + "loss": 0.451328307390213, + "step": 7437 + }, + { + "epoch": 1.7146974063400577, + "grad_norm": 1.6768366495694842, + "learning_rate": 1.094611744380236e-07, + "loss": 0.4603223204612732, + "step": 7438 + }, + { + "epoch": 1.71492795389049, + "grad_norm": 1.5061721175015017, + "learning_rate": 1.09287842720025e-07, + "loss": 0.47026073932647705, + "step": 7439 + }, + { + "epoch": 1.7151585014409223, + "grad_norm": 1.4706168009476588, + "learning_rate": 1.0911464041323715e-07, + "loss": 0.4415278434753418, + "step": 7440 + }, + { + "epoch": 1.7153890489913546, + "grad_norm": 1.64598531774651, + "learning_rate": 1.0894156754282424e-07, + "loss": 0.4910876750946045, + "step": 7441 + }, + { + "epoch": 1.7156195965417869, + "grad_norm": 1.6127230404554662, + "learning_rate": 1.0876862413393195e-07, + "loss": 0.43458497524261475, + "step": 7442 + }, + { + "epoch": 1.7158501440922191, + "grad_norm": 1.495202383352549, + "learning_rate": 1.0859581021168762e-07, + "loss": 0.3776114881038666, + "step": 7443 + }, + { + "epoch": 1.7160806916426514, + "grad_norm": 1.7256092990297747, + "learning_rate": 1.0842312580119884e-07, + "loss": 0.5010780096054077, + "step": 7444 + }, + { + "epoch": 1.7163112391930837, + "grad_norm": 1.3668277186923021, + "learning_rate": 1.0825057092755507e-07, + "loss": 0.39507001638412476, + "step": 7445 + }, + { + "epoch": 1.716541786743516, + "grad_norm": 1.4987676780901875, + "learning_rate": 1.080781456158264e-07, + "loss": 0.4180053770542145, + "step": 7446 + }, + { + "epoch": 1.7167723342939483, + "grad_norm": 2.1314587491759442, + "learning_rate": 1.0790584989106467e-07, + "loss": 0.47408896684646606, + "step": 7447 + }, + { + "epoch": 1.7170028818443805, + "grad_norm": 1.5715711040468392, + "learning_rate": 1.0773368377830294e-07, + "loss": 0.4921650290489197, + "step": 7448 + }, + { + "epoch": 1.7172334293948128, + "grad_norm": 1.7750258778835253, + "learning_rate": 1.0756164730255469e-07, + "loss": 0.47502100467681885, + "step": 7449 + }, + { + "epoch": 1.717463976945245, + "grad_norm": 1.7577426271633303, + "learning_rate": 1.0738974048881544e-07, + "loss": 0.40656572580337524, + "step": 7450 + }, + { + "epoch": 1.7176945244956774, + "grad_norm": 1.5162151917678242, + "learning_rate": 1.0721796336206124e-07, + "loss": 0.5114340782165527, + "step": 7451 + }, + { + "epoch": 1.7179250720461097, + "grad_norm": 1.6900637058341903, + "learning_rate": 1.0704631594724933e-07, + "loss": 0.36296752095222473, + "step": 7452 + }, + { + "epoch": 1.718155619596542, + "grad_norm": 1.7155576773786951, + "learning_rate": 1.0687479826931878e-07, + "loss": 0.4491519331932068, + "step": 7453 + }, + { + "epoch": 1.718386167146974, + "grad_norm": 1.4226771137370906, + "learning_rate": 1.0670341035318875e-07, + "loss": 0.4326419234275818, + "step": 7454 + }, + { + "epoch": 1.7186167146974063, + "grad_norm": 1.6302999867116295, + "learning_rate": 1.0653215222376044e-07, + "loss": 0.5220270156860352, + "step": 7455 + }, + { + "epoch": 1.7188472622478386, + "grad_norm": 1.567650077192639, + "learning_rate": 1.0636102390591606e-07, + "loss": 0.4059186279773712, + "step": 7456 + }, + { + "epoch": 1.7190778097982709, + "grad_norm": 1.7354307880622482, + "learning_rate": 1.061900254245186e-07, + "loss": 0.42291224002838135, + "step": 7457 + }, + { + "epoch": 1.7193083573487031, + "grad_norm": 1.4894312189644163, + "learning_rate": 1.0601915680441209e-07, + "loss": 0.43607455492019653, + "step": 7458 + }, + { + "epoch": 1.7195389048991354, + "grad_norm": 1.9632855044272428, + "learning_rate": 1.0584841807042234e-07, + "loss": 0.5138526558876038, + "step": 7459 + }, + { + "epoch": 1.7197694524495677, + "grad_norm": 1.7822906481694845, + "learning_rate": 1.0567780924735559e-07, + "loss": 0.4020421802997589, + "step": 7460 + }, + { + "epoch": 1.72, + "grad_norm": 1.623501776252126, + "learning_rate": 1.0550733036000004e-07, + "loss": 0.49493610858917236, + "step": 7461 + }, + { + "epoch": 1.7202305475504323, + "grad_norm": 1.581571013878747, + "learning_rate": 1.0533698143312386e-07, + "loss": 0.47467899322509766, + "step": 7462 + }, + { + "epoch": 1.7204610951008645, + "grad_norm": 1.8688599536477708, + "learning_rate": 1.0516676249147749e-07, + "loss": 0.5697565078735352, + "step": 7463 + }, + { + "epoch": 1.7206916426512968, + "grad_norm": 1.3314748131695917, + "learning_rate": 1.0499667355979169e-07, + "loss": 0.37515026330947876, + "step": 7464 + }, + { + "epoch": 1.720922190201729, + "grad_norm": 1.558567814194751, + "learning_rate": 1.048267146627786e-07, + "loss": 0.4420028328895569, + "step": 7465 + }, + { + "epoch": 1.7211527377521614, + "grad_norm": 1.7343593017294057, + "learning_rate": 1.0465688582513155e-07, + "loss": 0.4828314185142517, + "step": 7466 + }, + { + "epoch": 1.7213832853025937, + "grad_norm": 1.6846810381650634, + "learning_rate": 1.0448718707152504e-07, + "loss": 0.4600035548210144, + "step": 7467 + }, + { + "epoch": 1.721613832853026, + "grad_norm": 1.807146642346794, + "learning_rate": 1.0431761842661435e-07, + "loss": 0.41191548109054565, + "step": 7468 + }, + { + "epoch": 1.7218443804034582, + "grad_norm": 1.9114706919498279, + "learning_rate": 1.0414817991503622e-07, + "loss": 0.5425341725349426, + "step": 7469 + }, + { + "epoch": 1.7220749279538905, + "grad_norm": 1.8729482375195492, + "learning_rate": 1.0397887156140816e-07, + "loss": 0.487109512090683, + "step": 7470 + }, + { + "epoch": 1.7223054755043228, + "grad_norm": 1.7803299951951388, + "learning_rate": 1.0380969339032886e-07, + "loss": 0.4489486515522003, + "step": 7471 + }, + { + "epoch": 1.722536023054755, + "grad_norm": 1.6945025110814675, + "learning_rate": 1.036406454263783e-07, + "loss": 0.5018674731254578, + "step": 7472 + }, + { + "epoch": 1.7227665706051873, + "grad_norm": 2.040381850324315, + "learning_rate": 1.0347172769411717e-07, + "loss": 0.5236009955406189, + "step": 7473 + }, + { + "epoch": 1.7229971181556196, + "grad_norm": 1.5273022991430243, + "learning_rate": 1.0330294021808761e-07, + "loss": 0.4675469994544983, + "step": 7474 + }, + { + "epoch": 1.723227665706052, + "grad_norm": 1.4423948071468964, + "learning_rate": 1.0313428302281279e-07, + "loss": 0.46353641152381897, + "step": 7475 + }, + { + "epoch": 1.7234582132564842, + "grad_norm": 1.5910405062096624, + "learning_rate": 1.029657561327969e-07, + "loss": 0.44482338428497314, + "step": 7476 + }, + { + "epoch": 1.7236887608069165, + "grad_norm": 1.8731422268451656, + "learning_rate": 1.0279735957252489e-07, + "loss": 0.46485304832458496, + "step": 7477 + }, + { + "epoch": 1.7239193083573487, + "grad_norm": 1.6235316800460735, + "learning_rate": 1.0262909336646297e-07, + "loss": 0.46616819500923157, + "step": 7478 + }, + { + "epoch": 1.724149855907781, + "grad_norm": 2.1075896653310897, + "learning_rate": 1.0246095753905859e-07, + "loss": 0.5326619744300842, + "step": 7479 + }, + { + "epoch": 1.7243804034582133, + "grad_norm": 1.5629362609682114, + "learning_rate": 1.0229295211474031e-07, + "loss": 0.4326254725456238, + "step": 7480 + }, + { + "epoch": 1.7246109510086456, + "grad_norm": 1.6926827697188294, + "learning_rate": 1.021250771179173e-07, + "loss": 0.4316496253013611, + "step": 7481 + }, + { + "epoch": 1.7248414985590776, + "grad_norm": 1.5119148511884464, + "learning_rate": 1.0195733257298034e-07, + "loss": 0.4678090810775757, + "step": 7482 + }, + { + "epoch": 1.72507204610951, + "grad_norm": 2.0805412539757158, + "learning_rate": 1.0178971850430085e-07, + "loss": 0.5122083425521851, + "step": 7483 + }, + { + "epoch": 1.7253025936599422, + "grad_norm": 1.8165420465218807, + "learning_rate": 1.0162223493623113e-07, + "loss": 0.533470869064331, + "step": 7484 + }, + { + "epoch": 1.7255331412103745, + "grad_norm": 1.5064649628800673, + "learning_rate": 1.0145488189310525e-07, + "loss": 0.4170408844947815, + "step": 7485 + }, + { + "epoch": 1.7257636887608068, + "grad_norm": 1.5295516246279552, + "learning_rate": 1.0128765939923745e-07, + "loss": 0.43090832233428955, + "step": 7486 + }, + { + "epoch": 1.725994236311239, + "grad_norm": 1.7285765401539372, + "learning_rate": 1.0112056747892361e-07, + "loss": 0.41703808307647705, + "step": 7487 + }, + { + "epoch": 1.7262247838616713, + "grad_norm": 1.5954882562837394, + "learning_rate": 1.0095360615644066e-07, + "loss": 0.43948811292648315, + "step": 7488 + }, + { + "epoch": 1.7264553314121036, + "grad_norm": 1.6198432317521159, + "learning_rate": 1.0078677545604608e-07, + "loss": 0.5152919292449951, + "step": 7489 + }, + { + "epoch": 1.726685878962536, + "grad_norm": 1.618593763623408, + "learning_rate": 1.0062007540197881e-07, + "loss": 0.44976603984832764, + "step": 7490 + }, + { + "epoch": 1.7269164265129682, + "grad_norm": 1.4603076911656099, + "learning_rate": 1.0045350601845825e-07, + "loss": 0.39342233538627625, + "step": 7491 + }, + { + "epoch": 1.7271469740634005, + "grad_norm": 1.8025946719805128, + "learning_rate": 1.0028706732968551e-07, + "loss": 0.5002644658088684, + "step": 7492 + }, + { + "epoch": 1.7273775216138327, + "grad_norm": 1.4920644612153386, + "learning_rate": 1.0012075935984254e-07, + "loss": 0.4744255840778351, + "step": 7493 + }, + { + "epoch": 1.727608069164265, + "grad_norm": 1.7250863140843284, + "learning_rate": 9.995458213309183e-08, + "loss": 0.5364977717399597, + "step": 7494 + }, + { + "epoch": 1.7278386167146973, + "grad_norm": 1.5826177519538227, + "learning_rate": 9.978853567357748e-08, + "loss": 0.41442006826400757, + "step": 7495 + }, + { + "epoch": 1.7280691642651296, + "grad_norm": 1.8900488961036888, + "learning_rate": 9.96226200054242e-08, + "loss": 0.48307210206985474, + "step": 7496 + }, + { + "epoch": 1.7282997118155619, + "grad_norm": 1.377997456460788, + "learning_rate": 9.945683515273762e-08, + "loss": 0.42677950859069824, + "step": 7497 + }, + { + "epoch": 1.7285302593659941, + "grad_norm": 1.6897651930501534, + "learning_rate": 9.929118113960488e-08, + "loss": 0.4564162790775299, + "step": 7498 + }, + { + "epoch": 1.7287608069164264, + "grad_norm": 1.637171531739475, + "learning_rate": 9.912565799009342e-08, + "loss": 0.5233185887336731, + "step": 7499 + }, + { + "epoch": 1.7289913544668587, + "grad_norm": 1.613782917482401, + "learning_rate": 9.896026572825233e-08, + "loss": 0.45117032527923584, + "step": 7500 + }, + { + "epoch": 1.729221902017291, + "grad_norm": 1.6941256160277793, + "learning_rate": 9.879500437811139e-08, + "loss": 0.40787798166275024, + "step": 7501 + }, + { + "epoch": 1.7294524495677233, + "grad_norm": 1.7574036134910922, + "learning_rate": 9.862987396368138e-08, + "loss": 0.49555718898773193, + "step": 7502 + }, + { + "epoch": 1.7296829971181555, + "grad_norm": 1.411079526679828, + "learning_rate": 9.846487450895357e-08, + "loss": 0.4298393726348877, + "step": 7503 + }, + { + "epoch": 1.7299135446685878, + "grad_norm": 1.5124401855552176, + "learning_rate": 9.830000603790134e-08, + "loss": 0.444818913936615, + "step": 7504 + }, + { + "epoch": 1.73014409221902, + "grad_norm": 1.8237459278593453, + "learning_rate": 9.813526857447785e-08, + "loss": 0.5020506381988525, + "step": 7505 + }, + { + "epoch": 1.7303746397694524, + "grad_norm": 1.608208641626091, + "learning_rate": 9.797066214261806e-08, + "loss": 0.48920828104019165, + "step": 7506 + }, + { + "epoch": 1.7306051873198847, + "grad_norm": 1.7777996714125537, + "learning_rate": 9.78061867662372e-08, + "loss": 0.47981560230255127, + "step": 7507 + }, + { + "epoch": 1.730835734870317, + "grad_norm": 1.7932378217487064, + "learning_rate": 9.764184246923235e-08, + "loss": 0.43281418085098267, + "step": 7508 + }, + { + "epoch": 1.7310662824207492, + "grad_norm": 1.5295134527039604, + "learning_rate": 9.747762927548064e-08, + "loss": 0.5252255797386169, + "step": 7509 + }, + { + "epoch": 1.7312968299711815, + "grad_norm": 1.542548175322286, + "learning_rate": 9.731354720884056e-08, + "loss": 0.3210105299949646, + "step": 7510 + }, + { + "epoch": 1.7315273775216138, + "grad_norm": 1.6116337463422301, + "learning_rate": 9.714959629315156e-08, + "loss": 0.3888552784919739, + "step": 7511 + }, + { + "epoch": 1.731757925072046, + "grad_norm": 1.946842581393702, + "learning_rate": 9.698577655223427e-08, + "loss": 0.4745385944843292, + "step": 7512 + }, + { + "epoch": 1.7319884726224783, + "grad_norm": 1.489733706159224, + "learning_rate": 9.682208800988955e-08, + "loss": 0.49115753173828125, + "step": 7513 + }, + { + "epoch": 1.7322190201729106, + "grad_norm": 1.7715639242385375, + "learning_rate": 9.665853068990005e-08, + "loss": 0.5387924909591675, + "step": 7514 + }, + { + "epoch": 1.732449567723343, + "grad_norm": 1.525187821805336, + "learning_rate": 9.649510461602884e-08, + "loss": 0.524321436882019, + "step": 7515 + }, + { + "epoch": 1.7326801152737752, + "grad_norm": 1.5860436064187413, + "learning_rate": 9.633180981201972e-08, + "loss": 0.4536818861961365, + "step": 7516 + }, + { + "epoch": 1.7329106628242075, + "grad_norm": 1.5337023749780843, + "learning_rate": 9.616864630159816e-08, + "loss": 0.42552828788757324, + "step": 7517 + }, + { + "epoch": 1.7331412103746398, + "grad_norm": 2.0073070998312628, + "learning_rate": 9.600561410846963e-08, + "loss": 0.4798361659049988, + "step": 7518 + }, + { + "epoch": 1.733371757925072, + "grad_norm": 1.5754383687584421, + "learning_rate": 9.584271325632143e-08, + "loss": 0.46831148862838745, + "step": 7519 + }, + { + "epoch": 1.7336023054755043, + "grad_norm": 1.4214145640785232, + "learning_rate": 9.567994376882138e-08, + "loss": 0.45275694131851196, + "step": 7520 + }, + { + "epoch": 1.7338328530259366, + "grad_norm": 1.5148035503433661, + "learning_rate": 9.551730566961802e-08, + "loss": 0.4453716278076172, + "step": 7521 + }, + { + "epoch": 1.7340634005763689, + "grad_norm": 1.721382063454988, + "learning_rate": 9.535479898234112e-08, + "loss": 0.4417135715484619, + "step": 7522 + }, + { + "epoch": 1.7342939481268012, + "grad_norm": 1.2708031102992778, + "learning_rate": 9.519242373060077e-08, + "loss": 0.3882251977920532, + "step": 7523 + }, + { + "epoch": 1.7345244956772334, + "grad_norm": 1.6303128159977553, + "learning_rate": 9.503017993798879e-08, + "loss": 0.44697415828704834, + "step": 7524 + }, + { + "epoch": 1.7347550432276657, + "grad_norm": 1.4940295737615545, + "learning_rate": 9.486806762807753e-08, + "loss": 0.453482985496521, + "step": 7525 + }, + { + "epoch": 1.734985590778098, + "grad_norm": 1.3459156592909483, + "learning_rate": 9.470608682442005e-08, + "loss": 0.4285128712654114, + "step": 7526 + }, + { + "epoch": 1.7352161383285303, + "grad_norm": 1.569173733211583, + "learning_rate": 9.454423755055097e-08, + "loss": 0.5039681196212769, + "step": 7527 + }, + { + "epoch": 1.7354466858789626, + "grad_norm": 1.545842201869689, + "learning_rate": 9.438251982998446e-08, + "loss": 0.47230884432792664, + "step": 7528 + }, + { + "epoch": 1.7356772334293948, + "grad_norm": 1.6050836653007727, + "learning_rate": 9.422093368621697e-08, + "loss": 0.4544826149940491, + "step": 7529 + }, + { + "epoch": 1.7359077809798271, + "grad_norm": 1.742414138608038, + "learning_rate": 9.405947914272528e-08, + "loss": 0.5290546417236328, + "step": 7530 + }, + { + "epoch": 1.7361383285302594, + "grad_norm": 1.7746094817380647, + "learning_rate": 9.389815622296682e-08, + "loss": 0.4567055106163025, + "step": 7531 + }, + { + "epoch": 1.7363688760806917, + "grad_norm": 1.4581374638124471, + "learning_rate": 9.37369649503802e-08, + "loss": 0.4331890642642975, + "step": 7532 + }, + { + "epoch": 1.736599423631124, + "grad_norm": 1.2487963100679016, + "learning_rate": 9.357590534838533e-08, + "loss": 0.436681866645813, + "step": 7533 + }, + { + "epoch": 1.7368299711815562, + "grad_norm": 1.6198245450897364, + "learning_rate": 9.341497744038174e-08, + "loss": 0.338731586933136, + "step": 7534 + }, + { + "epoch": 1.7370605187319885, + "grad_norm": 1.508881034011893, + "learning_rate": 9.325418124975104e-08, + "loss": 0.4191438555717468, + "step": 7535 + }, + { + "epoch": 1.7372910662824208, + "grad_norm": 1.8780313553460073, + "learning_rate": 9.309351679985488e-08, + "loss": 0.48121005296707153, + "step": 7536 + }, + { + "epoch": 1.737521613832853, + "grad_norm": 1.6253985288606565, + "learning_rate": 9.293298411403649e-08, + "loss": 0.4818571209907532, + "step": 7537 + }, + { + "epoch": 1.7377521613832854, + "grad_norm": 1.690304719427989, + "learning_rate": 9.277258321561953e-08, + "loss": 0.5245034694671631, + "step": 7538 + }, + { + "epoch": 1.7379827089337176, + "grad_norm": 1.6355032908600926, + "learning_rate": 9.261231412790871e-08, + "loss": 0.34384316205978394, + "step": 7539 + }, + { + "epoch": 1.73821325648415, + "grad_norm": 1.6274411603144268, + "learning_rate": 9.245217687418893e-08, + "loss": 0.5642977356910706, + "step": 7540 + }, + { + "epoch": 1.7384438040345822, + "grad_norm": 1.7735493838829446, + "learning_rate": 9.229217147772706e-08, + "loss": 0.42372941970825195, + "step": 7541 + }, + { + "epoch": 1.7386743515850145, + "grad_norm": 1.4194954678009137, + "learning_rate": 9.21322979617698e-08, + "loss": 0.45389068126678467, + "step": 7542 + }, + { + "epoch": 1.7389048991354468, + "grad_norm": 1.9956141977502952, + "learning_rate": 9.197255634954549e-08, + "loss": 0.4558347463607788, + "step": 7543 + }, + { + "epoch": 1.739135446685879, + "grad_norm": 1.518636010257655, + "learning_rate": 9.181294666426242e-08, + "loss": 0.39747804403305054, + "step": 7544 + }, + { + "epoch": 1.7393659942363113, + "grad_norm": 1.4376186670875364, + "learning_rate": 9.165346892911086e-08, + "loss": 0.4366666376590729, + "step": 7545 + }, + { + "epoch": 1.7395965417867436, + "grad_norm": 1.6808343438571287, + "learning_rate": 9.14941231672608e-08, + "loss": 0.46908068656921387, + "step": 7546 + }, + { + "epoch": 1.739827089337176, + "grad_norm": 1.6446423333356077, + "learning_rate": 9.133490940186362e-08, + "loss": 0.4479343891143799, + "step": 7547 + }, + { + "epoch": 1.7400576368876082, + "grad_norm": 1.7651113248837644, + "learning_rate": 9.117582765605125e-08, + "loss": 0.5398527383804321, + "step": 7548 + }, + { + "epoch": 1.7402881844380405, + "grad_norm": 1.7495160233878475, + "learning_rate": 9.101687795293711e-08, + "loss": 0.5177565217018127, + "step": 7549 + }, + { + "epoch": 1.7405187319884727, + "grad_norm": 1.7544400731280996, + "learning_rate": 9.085806031561449e-08, + "loss": 0.4867921471595764, + "step": 7550 + }, + { + "epoch": 1.740749279538905, + "grad_norm": 1.8079319879826028, + "learning_rate": 9.069937476715817e-08, + "loss": 0.4968165159225464, + "step": 7551 + }, + { + "epoch": 1.7409798270893373, + "grad_norm": 1.5072359519878373, + "learning_rate": 9.054082133062346e-08, + "loss": 0.4723459780216217, + "step": 7552 + }, + { + "epoch": 1.7412103746397696, + "grad_norm": 1.7902281905237034, + "learning_rate": 9.03824000290464e-08, + "loss": 0.4756123423576355, + "step": 7553 + }, + { + "epoch": 1.7414409221902019, + "grad_norm": 1.563530493865973, + "learning_rate": 9.022411088544412e-08, + "loss": 0.4623698592185974, + "step": 7554 + }, + { + "epoch": 1.7416714697406341, + "grad_norm": 1.5031875380247095, + "learning_rate": 9.006595392281424e-08, + "loss": 0.5495933890342712, + "step": 7555 + }, + { + "epoch": 1.7419020172910664, + "grad_norm": 1.6009783877508532, + "learning_rate": 8.990792916413526e-08, + "loss": 0.48480600118637085, + "step": 7556 + }, + { + "epoch": 1.7421325648414987, + "grad_norm": 2.3377126287594545, + "learning_rate": 8.975003663236702e-08, + "loss": 0.4872364401817322, + "step": 7557 + }, + { + "epoch": 1.742363112391931, + "grad_norm": 1.9626684536569303, + "learning_rate": 8.95922763504492e-08, + "loss": 0.47713416814804077, + "step": 7558 + }, + { + "epoch": 1.7425936599423633, + "grad_norm": 1.6011185020405347, + "learning_rate": 8.943464834130287e-08, + "loss": 0.42151015996932983, + "step": 7559 + }, + { + "epoch": 1.7428242074927955, + "grad_norm": 1.7001226994436034, + "learning_rate": 8.927715262782954e-08, + "loss": 0.4855753779411316, + "step": 7560 + }, + { + "epoch": 1.7430547550432278, + "grad_norm": 1.6165035114904904, + "learning_rate": 8.911978923291186e-08, + "loss": 0.45306575298309326, + "step": 7561 + }, + { + "epoch": 1.74328530259366, + "grad_norm": 1.764884197279863, + "learning_rate": 8.896255817941334e-08, + "loss": 0.4395045042037964, + "step": 7562 + }, + { + "epoch": 1.7435158501440924, + "grad_norm": 1.9974865160949407, + "learning_rate": 8.880545949017748e-08, + "loss": 0.3771313428878784, + "step": 7563 + }, + { + "epoch": 1.7437463976945244, + "grad_norm": 1.5074350350589907, + "learning_rate": 8.86484931880297e-08, + "loss": 0.46764516830444336, + "step": 7564 + }, + { + "epoch": 1.7439769452449567, + "grad_norm": 1.828556449583164, + "learning_rate": 8.849165929577517e-08, + "loss": 0.4414178133010864, + "step": 7565 + }, + { + "epoch": 1.744207492795389, + "grad_norm": 1.8922732063370227, + "learning_rate": 8.833495783620016e-08, + "loss": 0.5199191570281982, + "step": 7566 + }, + { + "epoch": 1.7444380403458213, + "grad_norm": 1.9299795436619636, + "learning_rate": 8.817838883207218e-08, + "loss": 0.4091680645942688, + "step": 7567 + }, + { + "epoch": 1.7446685878962536, + "grad_norm": 1.6528169018956758, + "learning_rate": 8.802195230613852e-08, + "loss": 0.4918748140335083, + "step": 7568 + }, + { + "epoch": 1.7448991354466858, + "grad_norm": 1.7608829566111894, + "learning_rate": 8.786564828112809e-08, + "loss": 0.6308727264404297, + "step": 7569 + }, + { + "epoch": 1.7451296829971181, + "grad_norm": 1.5275596205430586, + "learning_rate": 8.770947677975038e-08, + "loss": 0.4739742875099182, + "step": 7570 + }, + { + "epoch": 1.7453602305475504, + "grad_norm": 1.944068702259704, + "learning_rate": 8.755343782469538e-08, + "loss": 0.4230186343193054, + "step": 7571 + }, + { + "epoch": 1.7455907780979827, + "grad_norm": 1.4651669474961737, + "learning_rate": 8.739753143863382e-08, + "loss": 0.4493221640586853, + "step": 7572 + }, + { + "epoch": 1.745821325648415, + "grad_norm": 1.5840361731430554, + "learning_rate": 8.724175764421715e-08, + "loss": 0.45003601908683777, + "step": 7573 + }, + { + "epoch": 1.7460518731988472, + "grad_norm": 1.5857283185800684, + "learning_rate": 8.708611646407793e-08, + "loss": 0.4641885757446289, + "step": 7574 + }, + { + "epoch": 1.7462824207492795, + "grad_norm": 1.7333150129203774, + "learning_rate": 8.693060792082929e-08, + "loss": 0.4831950068473816, + "step": 7575 + }, + { + "epoch": 1.7465129682997118, + "grad_norm": 1.5101721481830663, + "learning_rate": 8.67752320370646e-08, + "loss": 0.4084625840187073, + "step": 7576 + }, + { + "epoch": 1.746743515850144, + "grad_norm": 2.0710521973450464, + "learning_rate": 8.661998883535881e-08, + "loss": 0.4741554856300354, + "step": 7577 + }, + { + "epoch": 1.7469740634005764, + "grad_norm": 1.819950018248465, + "learning_rate": 8.646487833826698e-08, + "loss": 0.4739700257778168, + "step": 7578 + }, + { + "epoch": 1.7472046109510087, + "grad_norm": 1.937161185285157, + "learning_rate": 8.630990056832487e-08, + "loss": 0.46832704544067383, + "step": 7579 + }, + { + "epoch": 1.747435158501441, + "grad_norm": 1.6055622161766872, + "learning_rate": 8.615505554804936e-08, + "loss": 0.4395570158958435, + "step": 7580 + }, + { + "epoch": 1.7476657060518732, + "grad_norm": 1.6800369119355125, + "learning_rate": 8.600034329993755e-08, + "loss": 0.4883463382720947, + "step": 7581 + }, + { + "epoch": 1.7478962536023055, + "grad_norm": 1.3748001970742678, + "learning_rate": 8.58457638464678e-08, + "loss": 0.43945634365081787, + "step": 7582 + }, + { + "epoch": 1.7481268011527378, + "grad_norm": 1.5799846720591912, + "learning_rate": 8.569131721009892e-08, + "loss": 0.431932270526886, + "step": 7583 + }, + { + "epoch": 1.74835734870317, + "grad_norm": 1.710985178231973, + "learning_rate": 8.55370034132703e-08, + "loss": 0.3820973336696625, + "step": 7584 + }, + { + "epoch": 1.7485878962536023, + "grad_norm": 1.2876621633227396, + "learning_rate": 8.5382822478402e-08, + "loss": 0.476523756980896, + "step": 7585 + }, + { + "epoch": 1.7488184438040346, + "grad_norm": 1.8780737473029232, + "learning_rate": 8.522877442789511e-08, + "loss": 0.4477803707122803, + "step": 7586 + }, + { + "epoch": 1.749048991354467, + "grad_norm": 1.5728974720788302, + "learning_rate": 8.507485928413095e-08, + "loss": 0.5614187717437744, + "step": 7587 + }, + { + "epoch": 1.7492795389048992, + "grad_norm": 1.603867913680985, + "learning_rate": 8.492107706947216e-08, + "loss": 0.48507487773895264, + "step": 7588 + }, + { + "epoch": 1.7495100864553315, + "grad_norm": 1.9320347667746667, + "learning_rate": 8.476742780626134e-08, + "loss": 0.5384523868560791, + "step": 7589 + }, + { + "epoch": 1.7497406340057637, + "grad_norm": 1.3666371507332613, + "learning_rate": 8.46139115168224e-08, + "loss": 0.4491095542907715, + "step": 7590 + }, + { + "epoch": 1.749971181556196, + "grad_norm": 1.788798941175675, + "learning_rate": 8.446052822345961e-08, + "loss": 0.4440731406211853, + "step": 7591 + }, + { + "epoch": 1.750201729106628, + "grad_norm": 1.8620853583487789, + "learning_rate": 8.43072779484577e-08, + "loss": 0.44330766797065735, + "step": 7592 + }, + { + "epoch": 1.7504322766570604, + "grad_norm": 2.168954666945145, + "learning_rate": 8.415416071408255e-08, + "loss": 0.5658286809921265, + "step": 7593 + }, + { + "epoch": 1.7506628242074926, + "grad_norm": 1.6560038514007611, + "learning_rate": 8.400117654258065e-08, + "loss": 0.529731273651123, + "step": 7594 + }, + { + "epoch": 1.750893371757925, + "grad_norm": 1.9880717674846065, + "learning_rate": 8.38483254561787e-08, + "loss": 0.48404788970947266, + "step": 7595 + }, + { + "epoch": 1.7511239193083572, + "grad_norm": 1.8893587082964727, + "learning_rate": 8.36956074770847e-08, + "loss": 0.5418789386749268, + "step": 7596 + }, + { + "epoch": 1.7513544668587895, + "grad_norm": 1.5987650722786961, + "learning_rate": 8.354302262748681e-08, + "loss": 0.4364627003669739, + "step": 7597 + }, + { + "epoch": 1.7515850144092218, + "grad_norm": 1.9291958087354615, + "learning_rate": 8.339057092955382e-08, + "loss": 0.5439783334732056, + "step": 7598 + }, + { + "epoch": 1.751815561959654, + "grad_norm": 1.324151225959263, + "learning_rate": 8.323825240543581e-08, + "loss": 0.3421855568885803, + "step": 7599 + }, + { + "epoch": 1.7520461095100863, + "grad_norm": 1.4472184681007734, + "learning_rate": 8.30860670772625e-08, + "loss": 0.4138490855693817, + "step": 7600 + }, + { + "epoch": 1.7522766570605186, + "grad_norm": 1.424984649539542, + "learning_rate": 8.293401496714536e-08, + "loss": 0.48010265827178955, + "step": 7601 + }, + { + "epoch": 1.7525072046109509, + "grad_norm": 2.0212458348438536, + "learning_rate": 8.27820960971759e-08, + "loss": 0.4731842279434204, + "step": 7602 + }, + { + "epoch": 1.7527377521613832, + "grad_norm": 1.8169709585385185, + "learning_rate": 8.263031048942626e-08, + "loss": 0.4629393517971039, + "step": 7603 + }, + { + "epoch": 1.7529682997118154, + "grad_norm": 1.5488079089685691, + "learning_rate": 8.247865816594934e-08, + "loss": 0.47763800621032715, + "step": 7604 + }, + { + "epoch": 1.7531988472622477, + "grad_norm": 1.6496235472038807, + "learning_rate": 8.232713914877831e-08, + "loss": 0.4947577714920044, + "step": 7605 + }, + { + "epoch": 1.75342939481268, + "grad_norm": 1.683138181441751, + "learning_rate": 8.217575345992767e-08, + "loss": 0.525865375995636, + "step": 7606 + }, + { + "epoch": 1.7536599423631123, + "grad_norm": 2.0948110988687034, + "learning_rate": 8.202450112139237e-08, + "loss": 0.3592625558376312, + "step": 7607 + }, + { + "epoch": 1.7538904899135446, + "grad_norm": 1.802480905567215, + "learning_rate": 8.187338215514727e-08, + "loss": 0.4164069890975952, + "step": 7608 + }, + { + "epoch": 1.7541210374639769, + "grad_norm": 1.875988052607345, + "learning_rate": 8.172239658314883e-08, + "loss": 0.508970320224762, + "step": 7609 + }, + { + "epoch": 1.7543515850144091, + "grad_norm": 1.6717467531748178, + "learning_rate": 8.15715444273336e-08, + "loss": 0.5025101900100708, + "step": 7610 + }, + { + "epoch": 1.7545821325648414, + "grad_norm": 1.633734503844184, + "learning_rate": 8.14208257096185e-08, + "loss": 0.40589481592178345, + "step": 7611 + }, + { + "epoch": 1.7548126801152737, + "grad_norm": 1.5657822184185666, + "learning_rate": 8.127024045190179e-08, + "loss": 0.4361206889152527, + "step": 7612 + }, + { + "epoch": 1.755043227665706, + "grad_norm": 1.7797082565488338, + "learning_rate": 8.111978867606173e-08, + "loss": 0.4599993824958801, + "step": 7613 + }, + { + "epoch": 1.7552737752161383, + "grad_norm": 1.7072368164248672, + "learning_rate": 8.096947040395729e-08, + "loss": 0.4837978482246399, + "step": 7614 + }, + { + "epoch": 1.7555043227665705, + "grad_norm": 1.5967344816784776, + "learning_rate": 8.081928565742868e-08, + "loss": 0.44848477840423584, + "step": 7615 + }, + { + "epoch": 1.7557348703170028, + "grad_norm": 1.719065982432159, + "learning_rate": 8.066923445829565e-08, + "loss": 0.5391553044319153, + "step": 7616 + }, + { + "epoch": 1.755965417867435, + "grad_norm": 1.8326741541692133, + "learning_rate": 8.051931682835933e-08, + "loss": 0.5540188550949097, + "step": 7617 + }, + { + "epoch": 1.7561959654178674, + "grad_norm": 1.6797073632881683, + "learning_rate": 8.036953278940095e-08, + "loss": 0.432576984167099, + "step": 7618 + }, + { + "epoch": 1.7564265129682997, + "grad_norm": 1.7495401832350141, + "learning_rate": 8.021988236318267e-08, + "loss": 0.48879313468933105, + "step": 7619 + }, + { + "epoch": 1.756657060518732, + "grad_norm": 1.540187656958341, + "learning_rate": 8.007036557144742e-08, + "loss": 0.3939533531665802, + "step": 7620 + }, + { + "epoch": 1.7568876080691642, + "grad_norm": 1.588849068766626, + "learning_rate": 7.992098243591794e-08, + "loss": 0.4698101282119751, + "step": 7621 + }, + { + "epoch": 1.7571181556195965, + "grad_norm": 1.7235994666899865, + "learning_rate": 7.977173297829865e-08, + "loss": 0.3880566656589508, + "step": 7622 + }, + { + "epoch": 1.7573487031700288, + "grad_norm": 1.547739865650052, + "learning_rate": 7.962261722027352e-08, + "loss": 0.4888259768486023, + "step": 7623 + }, + { + "epoch": 1.757579250720461, + "grad_norm": 1.7611245522192838, + "learning_rate": 7.947363518350746e-08, + "loss": 0.5065386295318604, + "step": 7624 + }, + { + "epoch": 1.7578097982708933, + "grad_norm": 1.6305093185850665, + "learning_rate": 7.932478688964628e-08, + "loss": 0.4453485310077667, + "step": 7625 + }, + { + "epoch": 1.7580403458213256, + "grad_norm": 1.695465926028088, + "learning_rate": 7.917607236031587e-08, + "loss": 0.49680840969085693, + "step": 7626 + }, + { + "epoch": 1.758270893371758, + "grad_norm": 1.902125357001842, + "learning_rate": 7.902749161712297e-08, + "loss": 0.46799665689468384, + "step": 7627 + }, + { + "epoch": 1.7585014409221902, + "grad_norm": 1.4641445248158584, + "learning_rate": 7.887904468165507e-08, + "loss": 0.4290396273136139, + "step": 7628 + }, + { + "epoch": 1.7587319884726225, + "grad_norm": 1.494429958046351, + "learning_rate": 7.873073157547971e-08, + "loss": 0.4317634701728821, + "step": 7629 + }, + { + "epoch": 1.7589625360230547, + "grad_norm": 1.5596923902995892, + "learning_rate": 7.85825523201451e-08, + "loss": 0.47573816776275635, + "step": 7630 + }, + { + "epoch": 1.759193083573487, + "grad_norm": 1.869048567302145, + "learning_rate": 7.843450693718046e-08, + "loss": 0.5061118602752686, + "step": 7631 + }, + { + "epoch": 1.7594236311239193, + "grad_norm": 1.90036981437545, + "learning_rate": 7.828659544809502e-08, + "loss": 0.4778732657432556, + "step": 7632 + }, + { + "epoch": 1.7596541786743516, + "grad_norm": 1.7460511681021387, + "learning_rate": 7.813881787437904e-08, + "loss": 0.43174654245376587, + "step": 7633 + }, + { + "epoch": 1.7598847262247839, + "grad_norm": 2.4436834763569792, + "learning_rate": 7.799117423750267e-08, + "loss": 0.5490479469299316, + "step": 7634 + }, + { + "epoch": 1.7601152737752161, + "grad_norm": 1.9505377666705999, + "learning_rate": 7.784366455891733e-08, + "loss": 0.4566101133823395, + "step": 7635 + }, + { + "epoch": 1.7603458213256484, + "grad_norm": 1.9604546224667367, + "learning_rate": 7.769628886005463e-08, + "loss": 0.4798309803009033, + "step": 7636 + }, + { + "epoch": 1.7605763688760807, + "grad_norm": 1.72870260704983, + "learning_rate": 7.754904716232647e-08, + "loss": 0.38129231333732605, + "step": 7637 + }, + { + "epoch": 1.760806916426513, + "grad_norm": 1.6337711899440401, + "learning_rate": 7.740193948712559e-08, + "loss": 0.5362708568572998, + "step": 7638 + }, + { + "epoch": 1.7610374639769453, + "grad_norm": 1.6562532161260684, + "learning_rate": 7.725496585582547e-08, + "loss": 0.4723502993583679, + "step": 7639 + }, + { + "epoch": 1.7612680115273776, + "grad_norm": 1.9008305274132495, + "learning_rate": 7.710812628977958e-08, + "loss": 0.484948992729187, + "step": 7640 + }, + { + "epoch": 1.7614985590778098, + "grad_norm": 1.5406976005020663, + "learning_rate": 7.696142081032264e-08, + "loss": 0.42966312170028687, + "step": 7641 + }, + { + "epoch": 1.7617291066282421, + "grad_norm": 1.934961065868341, + "learning_rate": 7.68148494387687e-08, + "loss": 0.46488505601882935, + "step": 7642 + }, + { + "epoch": 1.7619596541786744, + "grad_norm": 1.7842781707991795, + "learning_rate": 7.666841219641351e-08, + "loss": 0.4626274108886719, + "step": 7643 + }, + { + "epoch": 1.7621902017291067, + "grad_norm": 1.5991637379651926, + "learning_rate": 7.652210910453283e-08, + "loss": 0.45007890462875366, + "step": 7644 + }, + { + "epoch": 1.762420749279539, + "grad_norm": 1.8799325604165342, + "learning_rate": 7.637594018438288e-08, + "loss": 0.48811158537864685, + "step": 7645 + }, + { + "epoch": 1.7626512968299712, + "grad_norm": 1.4567095096492855, + "learning_rate": 7.622990545720054e-08, + "loss": 0.4320351481437683, + "step": 7646 + }, + { + "epoch": 1.7628818443804035, + "grad_norm": 1.7129571899462184, + "learning_rate": 7.608400494420353e-08, + "loss": 0.43537721037864685, + "step": 7647 + }, + { + "epoch": 1.7631123919308358, + "grad_norm": 1.8137752166479428, + "learning_rate": 7.593823866658889e-08, + "loss": 0.39277005195617676, + "step": 7648 + }, + { + "epoch": 1.763342939481268, + "grad_norm": 1.7043881583043696, + "learning_rate": 7.579260664553544e-08, + "loss": 0.4029799997806549, + "step": 7649 + }, + { + "epoch": 1.7635734870317004, + "grad_norm": 1.7369149494853777, + "learning_rate": 7.564710890220183e-08, + "loss": 0.393459677696228, + "step": 7650 + }, + { + "epoch": 1.7638040345821326, + "grad_norm": 1.7712546173623327, + "learning_rate": 7.550174545772747e-08, + "loss": 0.47554415464401245, + "step": 7651 + }, + { + "epoch": 1.764034582132565, + "grad_norm": 1.2906409481776995, + "learning_rate": 7.535651633323226e-08, + "loss": 0.3958222270011902, + "step": 7652 + }, + { + "epoch": 1.7642651296829972, + "grad_norm": 1.5409557041571615, + "learning_rate": 7.521142154981641e-08, + "loss": 0.4722353219985962, + "step": 7653 + }, + { + "epoch": 1.7644956772334295, + "grad_norm": 1.6974701199285933, + "learning_rate": 7.506646112856041e-08, + "loss": 0.4863585829734802, + "step": 7654 + }, + { + "epoch": 1.7647262247838618, + "grad_norm": 1.7158035391820794, + "learning_rate": 7.492163509052585e-08, + "loss": 0.47278356552124023, + "step": 7655 + }, + { + "epoch": 1.764956772334294, + "grad_norm": 1.5747549758580093, + "learning_rate": 7.477694345675411e-08, + "loss": 0.4372313320636749, + "step": 7656 + }, + { + "epoch": 1.7651873198847263, + "grad_norm": 1.767687407393855, + "learning_rate": 7.463238624826785e-08, + "loss": 0.5864512920379639, + "step": 7657 + }, + { + "epoch": 1.7654178674351586, + "grad_norm": 1.5559070085327171, + "learning_rate": 7.448796348606923e-08, + "loss": 0.4714244306087494, + "step": 7658 + }, + { + "epoch": 1.7656484149855909, + "grad_norm": 1.56910530297036, + "learning_rate": 7.434367519114182e-08, + "loss": 0.4437907040119171, + "step": 7659 + }, + { + "epoch": 1.7658789625360232, + "grad_norm": 1.7453753195379775, + "learning_rate": 7.419952138444896e-08, + "loss": 0.4242848753929138, + "step": 7660 + }, + { + "epoch": 1.7661095100864554, + "grad_norm": 1.5917510491266111, + "learning_rate": 7.405550208693456e-08, + "loss": 0.3774869441986084, + "step": 7661 + }, + { + "epoch": 1.7663400576368877, + "grad_norm": 1.7728290606673953, + "learning_rate": 7.391161731952356e-08, + "loss": 0.37027084827423096, + "step": 7662 + }, + { + "epoch": 1.76657060518732, + "grad_norm": 1.8043432929961043, + "learning_rate": 7.376786710312043e-08, + "loss": 0.4489648640155792, + "step": 7663 + }, + { + "epoch": 1.7668011527377523, + "grad_norm": 1.977706820286657, + "learning_rate": 7.362425145861072e-08, + "loss": 0.4108201861381531, + "step": 7664 + }, + { + "epoch": 1.7670317002881846, + "grad_norm": 1.5197214354873567, + "learning_rate": 7.348077040686062e-08, + "loss": 0.43439608812332153, + "step": 7665 + }, + { + "epoch": 1.7672622478386169, + "grad_norm": 1.7853031370306771, + "learning_rate": 7.333742396871623e-08, + "loss": 0.42088496685028076, + "step": 7666 + }, + { + "epoch": 1.7674927953890491, + "grad_norm": 1.5723255421968871, + "learning_rate": 7.319421216500399e-08, + "loss": 0.48281657695770264, + "step": 7667 + }, + { + "epoch": 1.7677233429394814, + "grad_norm": 1.4536217774292859, + "learning_rate": 7.305113501653159e-08, + "loss": 0.5121546983718872, + "step": 7668 + }, + { + "epoch": 1.7679538904899137, + "grad_norm": 1.3857702545327046, + "learning_rate": 7.290819254408631e-08, + "loss": 0.40559446811676025, + "step": 7669 + }, + { + "epoch": 1.768184438040346, + "grad_norm": 1.6995494609484647, + "learning_rate": 7.276538476843641e-08, + "loss": 0.5207052826881409, + "step": 7670 + }, + { + "epoch": 1.7684149855907783, + "grad_norm": 1.4468462521729057, + "learning_rate": 7.262271171033007e-08, + "loss": 0.5177881121635437, + "step": 7671 + }, + { + "epoch": 1.7686455331412105, + "grad_norm": 1.4929042033851914, + "learning_rate": 7.248017339049662e-08, + "loss": 0.34873148798942566, + "step": 7672 + }, + { + "epoch": 1.7688760806916428, + "grad_norm": 1.4828796543268334, + "learning_rate": 7.233776982964513e-08, + "loss": 0.5187006592750549, + "step": 7673 + }, + { + "epoch": 1.7691066282420749, + "grad_norm": 1.7628461302950964, + "learning_rate": 7.219550104846528e-08, + "loss": 0.4007762670516968, + "step": 7674 + }, + { + "epoch": 1.7693371757925072, + "grad_norm": 1.7373369616493144, + "learning_rate": 7.205336706762732e-08, + "loss": 0.34868037700653076, + "step": 7675 + }, + { + "epoch": 1.7695677233429394, + "grad_norm": 1.5968096719688372, + "learning_rate": 7.191136790778207e-08, + "loss": 0.4267783761024475, + "step": 7676 + }, + { + "epoch": 1.7697982708933717, + "grad_norm": 1.662213854570385, + "learning_rate": 7.176950358956025e-08, + "loss": 0.48516613245010376, + "step": 7677 + }, + { + "epoch": 1.770028818443804, + "grad_norm": 1.4490843200762542, + "learning_rate": 7.162777413357345e-08, + "loss": 0.370013564825058, + "step": 7678 + }, + { + "epoch": 1.7702593659942363, + "grad_norm": 1.5754606178259032, + "learning_rate": 7.148617956041347e-08, + "loss": 0.4909803569316864, + "step": 7679 + }, + { + "epoch": 1.7704899135446686, + "grad_norm": 1.6427495490321167, + "learning_rate": 7.134471989065227e-08, + "loss": 0.45908260345458984, + "step": 7680 + }, + { + "epoch": 1.7707204610951008, + "grad_norm": 1.5785664303220688, + "learning_rate": 7.120339514484285e-08, + "loss": 0.4022323191165924, + "step": 7681 + }, + { + "epoch": 1.7709510086455331, + "grad_norm": 1.5712968291810534, + "learning_rate": 7.10622053435178e-08, + "loss": 0.5402769446372986, + "step": 7682 + }, + { + "epoch": 1.7711815561959654, + "grad_norm": 1.5473368342653435, + "learning_rate": 7.092115050719083e-08, + "loss": 0.4438230097293854, + "step": 7683 + }, + { + "epoch": 1.7714121037463977, + "grad_norm": 1.573083190057225, + "learning_rate": 7.078023065635585e-08, + "loss": 0.49692100286483765, + "step": 7684 + }, + { + "epoch": 1.77164265129683, + "grad_norm": 1.6609187799962484, + "learning_rate": 7.063944581148684e-08, + "loss": 0.4989246726036072, + "step": 7685 + }, + { + "epoch": 1.7718731988472622, + "grad_norm": 1.8704844560277196, + "learning_rate": 7.049879599303842e-08, + "loss": 0.5415306687355042, + "step": 7686 + }, + { + "epoch": 1.7721037463976945, + "grad_norm": 1.7026704659165601, + "learning_rate": 7.035828122144538e-08, + "loss": 0.5030748844146729, + "step": 7687 + }, + { + "epoch": 1.7723342939481268, + "grad_norm": 1.764587481495758, + "learning_rate": 7.021790151712326e-08, + "loss": 0.5167194604873657, + "step": 7688 + }, + { + "epoch": 1.772564841498559, + "grad_norm": 1.7278659087314725, + "learning_rate": 7.007765690046774e-08, + "loss": 0.48385316133499146, + "step": 7689 + }, + { + "epoch": 1.7727953890489914, + "grad_norm": 1.755610455110011, + "learning_rate": 6.993754739185487e-08, + "loss": 0.44149184226989746, + "step": 7690 + }, + { + "epoch": 1.7730259365994236, + "grad_norm": 1.827798361341849, + "learning_rate": 6.979757301164113e-08, + "loss": 0.5165606737136841, + "step": 7691 + }, + { + "epoch": 1.773256484149856, + "grad_norm": 1.779706905725424, + "learning_rate": 6.965773378016348e-08, + "loss": 0.3729487359523773, + "step": 7692 + }, + { + "epoch": 1.7734870317002882, + "grad_norm": 1.7294467511491705, + "learning_rate": 6.951802971773868e-08, + "loss": 0.459484338760376, + "step": 7693 + }, + { + "epoch": 1.7737175792507205, + "grad_norm": 1.7442166010782674, + "learning_rate": 6.93784608446647e-08, + "loss": 0.41767945885658264, + "step": 7694 + }, + { + "epoch": 1.7739481268011528, + "grad_norm": 1.5424246362534682, + "learning_rate": 6.923902718121921e-08, + "loss": 0.5340604782104492, + "step": 7695 + }, + { + "epoch": 1.774178674351585, + "grad_norm": 1.5182197777494724, + "learning_rate": 6.909972874766057e-08, + "loss": 0.4299081265926361, + "step": 7696 + }, + { + "epoch": 1.7744092219020173, + "grad_norm": 1.5921658694779064, + "learning_rate": 6.896056556422747e-08, + "loss": 0.4131748080253601, + "step": 7697 + }, + { + "epoch": 1.7746397694524496, + "grad_norm": 1.7608740339569051, + "learning_rate": 6.882153765113885e-08, + "loss": 0.460279643535614, + "step": 7698 + }, + { + "epoch": 1.774870317002882, + "grad_norm": 1.8352317153683946, + "learning_rate": 6.868264502859366e-08, + "loss": 0.5152443051338196, + "step": 7699 + }, + { + "epoch": 1.7751008645533142, + "grad_norm": 1.6921081006962246, + "learning_rate": 6.854388771677211e-08, + "loss": 0.49927568435668945, + "step": 7700 + }, + { + "epoch": 1.7753314121037462, + "grad_norm": 2.152713876850487, + "learning_rate": 6.840526573583383e-08, + "loss": 0.43129733204841614, + "step": 7701 + }, + { + "epoch": 1.7755619596541785, + "grad_norm": 1.6631961804324806, + "learning_rate": 6.826677910591926e-08, + "loss": 0.39078906178474426, + "step": 7702 + }, + { + "epoch": 1.7757925072046108, + "grad_norm": 1.6788233641075792, + "learning_rate": 6.8128427847149e-08, + "loss": 0.4480215907096863, + "step": 7703 + }, + { + "epoch": 1.776023054755043, + "grad_norm": 1.7287551762868483, + "learning_rate": 6.79902119796243e-08, + "loss": 0.39621448516845703, + "step": 7704 + }, + { + "epoch": 1.7762536023054754, + "grad_norm": 1.5061600691752293, + "learning_rate": 6.785213152342628e-08, + "loss": 0.4708957076072693, + "step": 7705 + }, + { + "epoch": 1.7764841498559076, + "grad_norm": 1.6137574769212935, + "learning_rate": 6.771418649861638e-08, + "loss": 0.38831156492233276, + "step": 7706 + }, + { + "epoch": 1.77671469740634, + "grad_norm": 1.4887027346159967, + "learning_rate": 6.7576376925237e-08, + "loss": 0.4053429365158081, + "step": 7707 + }, + { + "epoch": 1.7769452449567722, + "grad_norm": 1.575497148186284, + "learning_rate": 6.74387028233101e-08, + "loss": 0.4687873125076294, + "step": 7708 + }, + { + "epoch": 1.7771757925072045, + "grad_norm": 1.9413502528314348, + "learning_rate": 6.730116421283838e-08, + "loss": 0.4794561266899109, + "step": 7709 + }, + { + "epoch": 1.7774063400576368, + "grad_norm": 1.5506812991717323, + "learning_rate": 6.716376111380506e-08, + "loss": 0.5206823945045471, + "step": 7710 + }, + { + "epoch": 1.777636887608069, + "grad_norm": 1.5577917172326046, + "learning_rate": 6.702649354617307e-08, + "loss": 0.5148698687553406, + "step": 7711 + }, + { + "epoch": 1.7778674351585013, + "grad_norm": 1.7026300959091676, + "learning_rate": 6.688936152988589e-08, + "loss": 0.47977179288864136, + "step": 7712 + }, + { + "epoch": 1.7780979827089336, + "grad_norm": 1.5608303640349446, + "learning_rate": 6.67523650848677e-08, + "loss": 0.3876994848251343, + "step": 7713 + }, + { + "epoch": 1.7783285302593659, + "grad_norm": 1.6373362273851646, + "learning_rate": 6.661550423102235e-08, + "loss": 0.48703986406326294, + "step": 7714 + }, + { + "epoch": 1.7785590778097982, + "grad_norm": 1.6262507534520982, + "learning_rate": 6.647877898823462e-08, + "loss": 0.40404099225997925, + "step": 7715 + }, + { + "epoch": 1.7787896253602304, + "grad_norm": 1.7573860934066001, + "learning_rate": 6.634218937636882e-08, + "loss": 0.4791918396949768, + "step": 7716 + }, + { + "epoch": 1.7790201729106627, + "grad_norm": 1.6154685028874851, + "learning_rate": 6.620573541527042e-08, + "loss": 0.46973907947540283, + "step": 7717 + }, + { + "epoch": 1.779250720461095, + "grad_norm": 1.9052536891572849, + "learning_rate": 6.606941712476466e-08, + "loss": 0.5298879146575928, + "step": 7718 + }, + { + "epoch": 1.7794812680115273, + "grad_norm": 1.4315445647873681, + "learning_rate": 6.593323452465693e-08, + "loss": 0.45558857917785645, + "step": 7719 + }, + { + "epoch": 1.7797118155619596, + "grad_norm": 1.74666285264939, + "learning_rate": 6.579718763473329e-08, + "loss": 0.42475903034210205, + "step": 7720 + }, + { + "epoch": 1.7799423631123918, + "grad_norm": 1.6085492262804577, + "learning_rate": 6.566127647476016e-08, + "loss": 0.4914790391921997, + "step": 7721 + }, + { + "epoch": 1.7801729106628241, + "grad_norm": 1.7428285922364788, + "learning_rate": 6.552550106448363e-08, + "loss": 0.49469706416130066, + "step": 7722 + }, + { + "epoch": 1.7804034582132564, + "grad_norm": 1.6195332469418184, + "learning_rate": 6.538986142363089e-08, + "loss": 0.36061471700668335, + "step": 7723 + }, + { + "epoch": 1.7806340057636887, + "grad_norm": 1.8328650472042145, + "learning_rate": 6.525435757190867e-08, + "loss": 0.5629843473434448, + "step": 7724 + }, + { + "epoch": 1.780864553314121, + "grad_norm": 1.8941469492593315, + "learning_rate": 6.511898952900419e-08, + "loss": 0.46624869108200073, + "step": 7725 + }, + { + "epoch": 1.7810951008645532, + "grad_norm": 1.7694804696248736, + "learning_rate": 6.498375731458527e-08, + "loss": 0.5627322793006897, + "step": 7726 + }, + { + "epoch": 1.7813256484149855, + "grad_norm": 1.4591931185069376, + "learning_rate": 6.484866094829944e-08, + "loss": 0.4573343098163605, + "step": 7727 + }, + { + "epoch": 1.7815561959654178, + "grad_norm": 1.785446481353626, + "learning_rate": 6.47137004497751e-08, + "loss": 0.44986212253570557, + "step": 7728 + }, + { + "epoch": 1.78178674351585, + "grad_norm": 2.16333875922491, + "learning_rate": 6.457887583862065e-08, + "loss": 0.4796055853366852, + "step": 7729 + }, + { + "epoch": 1.7820172910662824, + "grad_norm": 1.774300664934424, + "learning_rate": 6.444418713442445e-08, + "loss": 0.447618305683136, + "step": 7730 + }, + { + "epoch": 1.7822478386167147, + "grad_norm": 1.8485863438016088, + "learning_rate": 6.430963435675551e-08, + "loss": 0.46969741582870483, + "step": 7731 + }, + { + "epoch": 1.782478386167147, + "grad_norm": 1.733301132250343, + "learning_rate": 6.417521752516275e-08, + "loss": 0.43413540720939636, + "step": 7732 + }, + { + "epoch": 1.7827089337175792, + "grad_norm": 2.138945448309459, + "learning_rate": 6.404093665917576e-08, + "loss": 0.532086968421936, + "step": 7733 + }, + { + "epoch": 1.7829394812680115, + "grad_norm": 1.9071183008372716, + "learning_rate": 6.390679177830417e-08, + "loss": 0.5077864527702332, + "step": 7734 + }, + { + "epoch": 1.7831700288184438, + "grad_norm": 2.059822081609659, + "learning_rate": 6.377278290203757e-08, + "loss": 0.5576372146606445, + "step": 7735 + }, + { + "epoch": 1.783400576368876, + "grad_norm": 1.73331324996214, + "learning_rate": 6.363891004984646e-08, + "loss": 0.4265804886817932, + "step": 7736 + }, + { + "epoch": 1.7836311239193083, + "grad_norm": 1.4188934525049064, + "learning_rate": 6.350517324118087e-08, + "loss": 0.40466463565826416, + "step": 7737 + }, + { + "epoch": 1.7838616714697406, + "grad_norm": 1.8493725205047176, + "learning_rate": 6.337157249547132e-08, + "loss": 0.42143672704696655, + "step": 7738 + }, + { + "epoch": 1.784092219020173, + "grad_norm": 1.6661748528000018, + "learning_rate": 6.32381078321289e-08, + "loss": 0.49432891607284546, + "step": 7739 + }, + { + "epoch": 1.7843227665706052, + "grad_norm": 1.7897170262913213, + "learning_rate": 6.310477927054436e-08, + "loss": 0.4701034426689148, + "step": 7740 + }, + { + "epoch": 1.7845533141210375, + "grad_norm": 1.8045876586105092, + "learning_rate": 6.297158683008896e-08, + "loss": 0.4706331491470337, + "step": 7741 + }, + { + "epoch": 1.7847838616714697, + "grad_norm": 1.6424743309135614, + "learning_rate": 6.283853053011456e-08, + "loss": 0.5200982689857483, + "step": 7742 + }, + { + "epoch": 1.785014409221902, + "grad_norm": 1.5460597062029549, + "learning_rate": 6.270561038995248e-08, + "loss": 0.39499545097351074, + "step": 7743 + }, + { + "epoch": 1.7852449567723343, + "grad_norm": 1.7159811116379573, + "learning_rate": 6.25728264289147e-08, + "loss": 0.4668952226638794, + "step": 7744 + }, + { + "epoch": 1.7854755043227666, + "grad_norm": 1.4261473913577194, + "learning_rate": 6.244017866629337e-08, + "loss": 0.3721618950366974, + "step": 7745 + }, + { + "epoch": 1.7857060518731989, + "grad_norm": 1.6135087449550563, + "learning_rate": 6.230766712136082e-08, + "loss": 0.4165458679199219, + "step": 7746 + }, + { + "epoch": 1.7859365994236311, + "grad_norm": 1.7713530242570052, + "learning_rate": 6.217529181336967e-08, + "loss": 0.4876209497451782, + "step": 7747 + }, + { + "epoch": 1.7861671469740634, + "grad_norm": 2.115685960160452, + "learning_rate": 6.204305276155252e-08, + "loss": 0.576325535774231, + "step": 7748 + }, + { + "epoch": 1.7863976945244957, + "grad_norm": 1.679232826772317, + "learning_rate": 6.191094998512259e-08, + "loss": 0.4436579942703247, + "step": 7749 + }, + { + "epoch": 1.786628242074928, + "grad_norm": 1.5230671727260017, + "learning_rate": 6.177898350327282e-08, + "loss": 0.34511566162109375, + "step": 7750 + }, + { + "epoch": 1.7868587896253603, + "grad_norm": 1.3956973107037376, + "learning_rate": 6.164715333517656e-08, + "loss": 0.47144967317581177, + "step": 7751 + }, + { + "epoch": 1.7870893371757925, + "grad_norm": 1.7458104416555835, + "learning_rate": 6.15154594999876e-08, + "loss": 0.45297929644584656, + "step": 7752 + }, + { + "epoch": 1.7873198847262248, + "grad_norm": 1.617434355385532, + "learning_rate": 6.13839020168393e-08, + "loss": 0.38023924827575684, + "step": 7753 + }, + { + "epoch": 1.787550432276657, + "grad_norm": 1.553165083717958, + "learning_rate": 6.125248090484581e-08, + "loss": 0.49789756536483765, + "step": 7754 + }, + { + "epoch": 1.7877809798270894, + "grad_norm": 1.5119135940794262, + "learning_rate": 6.112119618310141e-08, + "loss": 0.41589903831481934, + "step": 7755 + }, + { + "epoch": 1.7880115273775217, + "grad_norm": 1.9352630835722213, + "learning_rate": 6.099004787068018e-08, + "loss": 0.4091898500919342, + "step": 7756 + }, + { + "epoch": 1.788242074927954, + "grad_norm": 1.5523031339189788, + "learning_rate": 6.085903598663655e-08, + "loss": 0.42826372385025024, + "step": 7757 + }, + { + "epoch": 1.7884726224783862, + "grad_norm": 1.621501159790323, + "learning_rate": 6.072816055000552e-08, + "loss": 0.43129992485046387, + "step": 7758 + }, + { + "epoch": 1.7887031700288185, + "grad_norm": 1.5341459583525834, + "learning_rate": 6.059742157980152e-08, + "loss": 0.46008431911468506, + "step": 7759 + }, + { + "epoch": 1.7889337175792508, + "grad_norm": 1.9309743062622498, + "learning_rate": 6.046681909501994e-08, + "loss": 0.5652228593826294, + "step": 7760 + }, + { + "epoch": 1.789164265129683, + "grad_norm": 1.7004047485154516, + "learning_rate": 6.03363531146357e-08, + "loss": 0.44568371772766113, + "step": 7761 + }, + { + "epoch": 1.7893948126801154, + "grad_norm": 1.5142959284192654, + "learning_rate": 6.020602365760419e-08, + "loss": 0.44745492935180664, + "step": 7762 + }, + { + "epoch": 1.7896253602305476, + "grad_norm": 1.9154302941464691, + "learning_rate": 6.007583074286094e-08, + "loss": 0.449093759059906, + "step": 7763 + }, + { + "epoch": 1.78985590778098, + "grad_norm": 1.267128025484339, + "learning_rate": 5.994577438932169e-08, + "loss": 0.37527692317962646, + "step": 7764 + }, + { + "epoch": 1.7900864553314122, + "grad_norm": 1.4630537022894423, + "learning_rate": 5.981585461588213e-08, + "loss": 0.4551668167114258, + "step": 7765 + }, + { + "epoch": 1.7903170028818445, + "grad_norm": 1.5603801053478943, + "learning_rate": 5.968607144141846e-08, + "loss": 0.4525032043457031, + "step": 7766 + }, + { + "epoch": 1.7905475504322768, + "grad_norm": 1.7729818006554123, + "learning_rate": 5.955642488478674e-08, + "loss": 0.357668399810791, + "step": 7767 + }, + { + "epoch": 1.790778097982709, + "grad_norm": 1.828688502419706, + "learning_rate": 5.9426914964823327e-08, + "loss": 0.49838197231292725, + "step": 7768 + }, + { + "epoch": 1.7910086455331413, + "grad_norm": 1.4374954094595649, + "learning_rate": 5.9297541700344286e-08, + "loss": 0.43415066599845886, + "step": 7769 + }, + { + "epoch": 1.7912391930835736, + "grad_norm": 1.4808851812838624, + "learning_rate": 5.91683051101467e-08, + "loss": 0.44491448998451233, + "step": 7770 + }, + { + "epoch": 1.7914697406340059, + "grad_norm": 1.5676470000342726, + "learning_rate": 5.9039205213007094e-08, + "loss": 0.5361511707305908, + "step": 7771 + }, + { + "epoch": 1.7917002881844382, + "grad_norm": 1.5923433064017045, + "learning_rate": 5.891024202768224e-08, + "loss": 0.4797734320163727, + "step": 7772 + }, + { + "epoch": 1.7919308357348704, + "grad_norm": 1.5894190231720695, + "learning_rate": 5.878141557290939e-08, + "loss": 0.39747869968414307, + "step": 7773 + }, + { + "epoch": 1.7921613832853027, + "grad_norm": 1.836432899941197, + "learning_rate": 5.865272586740566e-08, + "loss": 0.5796657800674438, + "step": 7774 + }, + { + "epoch": 1.792391930835735, + "grad_norm": 1.5280268419302134, + "learning_rate": 5.8524172929867995e-08, + "loss": 0.4255181849002838, + "step": 7775 + }, + { + "epoch": 1.7926224783861673, + "grad_norm": 1.6995321372757681, + "learning_rate": 5.8395756778974125e-08, + "loss": 0.4837808310985565, + "step": 7776 + }, + { + "epoch": 1.7928530259365996, + "grad_norm": 1.5350170754716532, + "learning_rate": 5.826747743338134e-08, + "loss": 0.47962331771850586, + "step": 7777 + }, + { + "epoch": 1.7930835734870318, + "grad_norm": 1.5014268099772772, + "learning_rate": 5.813933491172751e-08, + "loss": 0.4127134084701538, + "step": 7778 + }, + { + "epoch": 1.7933141210374641, + "grad_norm": 1.5155643699973835, + "learning_rate": 5.801132923263052e-08, + "loss": 0.42512404918670654, + "step": 7779 + }, + { + "epoch": 1.7935446685878964, + "grad_norm": 1.9564743709601773, + "learning_rate": 5.7883460414687946e-08, + "loss": 0.5483304262161255, + "step": 7780 + }, + { + "epoch": 1.7937752161383287, + "grad_norm": 1.6888505182787188, + "learning_rate": 5.775572847647781e-08, + "loss": 0.42901748418807983, + "step": 7781 + }, + { + "epoch": 1.794005763688761, + "grad_norm": 1.8007028985429232, + "learning_rate": 5.762813343655859e-08, + "loss": 0.4339037239551544, + "step": 7782 + }, + { + "epoch": 1.7942363112391932, + "grad_norm": 1.65295831348881, + "learning_rate": 5.7500675313468026e-08, + "loss": 0.4670749008655548, + "step": 7783 + }, + { + "epoch": 1.7944668587896253, + "grad_norm": 1.5443675410420685, + "learning_rate": 5.737335412572497e-08, + "loss": 0.5136678218841553, + "step": 7784 + }, + { + "epoch": 1.7946974063400576, + "grad_norm": 1.4237421461129909, + "learning_rate": 5.72461698918274e-08, + "loss": 0.43283748626708984, + "step": 7785 + }, + { + "epoch": 1.7949279538904899, + "grad_norm": 1.7650305092506278, + "learning_rate": 5.71191226302542e-08, + "loss": 0.476909875869751, + "step": 7786 + }, + { + "epoch": 1.7951585014409221, + "grad_norm": 1.672567271487365, + "learning_rate": 5.699221235946394e-08, + "loss": 0.4585307240486145, + "step": 7787 + }, + { + "epoch": 1.7953890489913544, + "grad_norm": 1.9458664676542134, + "learning_rate": 5.6865439097895096e-08, + "loss": 0.5796236991882324, + "step": 7788 + }, + { + "epoch": 1.7956195965417867, + "grad_norm": 2.0548684321027015, + "learning_rate": 5.6738802863966816e-08, + "loss": 0.4688211679458618, + "step": 7789 + }, + { + "epoch": 1.795850144092219, + "grad_norm": 1.7820844049311864, + "learning_rate": 5.661230367607805e-08, + "loss": 0.48832905292510986, + "step": 7790 + }, + { + "epoch": 1.7960806916426513, + "grad_norm": 1.6531888312791727, + "learning_rate": 5.648594155260744e-08, + "loss": 0.49670735001564026, + "step": 7791 + }, + { + "epoch": 1.7963112391930836, + "grad_norm": 1.6231028236984248, + "learning_rate": 5.6359716511914624e-08, + "loss": 0.4234163761138916, + "step": 7792 + }, + { + "epoch": 1.7965417867435158, + "grad_norm": 1.7032647254914282, + "learning_rate": 5.6233628572338375e-08, + "loss": 0.5399529933929443, + "step": 7793 + }, + { + "epoch": 1.7967723342939481, + "grad_norm": 1.519848058141304, + "learning_rate": 5.610767775219805e-08, + "loss": 0.40350812673568726, + "step": 7794 + }, + { + "epoch": 1.7970028818443804, + "grad_norm": 1.6695509859757407, + "learning_rate": 5.598186406979311e-08, + "loss": 0.44279199838638306, + "step": 7795 + }, + { + "epoch": 1.7972334293948127, + "grad_norm": 2.1845146197296907, + "learning_rate": 5.585618754340282e-08, + "loss": 0.49113231897354126, + "step": 7796 + }, + { + "epoch": 1.797463976945245, + "grad_norm": 1.6014376932476218, + "learning_rate": 5.573064819128681e-08, + "loss": 0.47899287939071655, + "step": 7797 + }, + { + "epoch": 1.7976945244956772, + "grad_norm": 1.962506870462268, + "learning_rate": 5.5605246031684485e-08, + "loss": 0.4684637784957886, + "step": 7798 + }, + { + "epoch": 1.7979250720461095, + "grad_norm": 1.704176925336371, + "learning_rate": 5.547998108281571e-08, + "loss": 0.5158382654190063, + "step": 7799 + }, + { + "epoch": 1.7981556195965418, + "grad_norm": 1.5361177647232538, + "learning_rate": 5.5354853362880036e-08, + "loss": 0.41753411293029785, + "step": 7800 + }, + { + "epoch": 1.798386167146974, + "grad_norm": 1.4773819899817666, + "learning_rate": 5.522986289005704e-08, + "loss": 0.4594987630844116, + "step": 7801 + }, + { + "epoch": 1.7986167146974064, + "grad_norm": 1.5236452636861773, + "learning_rate": 5.510500968250675e-08, + "loss": 0.4700300693511963, + "step": 7802 + }, + { + "epoch": 1.7988472622478386, + "grad_norm": 1.945100425844493, + "learning_rate": 5.49802937583691e-08, + "loss": 0.39876431226730347, + "step": 7803 + }, + { + "epoch": 1.799077809798271, + "grad_norm": 1.6345492691406456, + "learning_rate": 5.4855715135763927e-08, + "loss": 0.4913487434387207, + "step": 7804 + }, + { + "epoch": 1.7993083573487032, + "grad_norm": 2.0246385500829414, + "learning_rate": 5.473127383279119e-08, + "loss": 0.4329494833946228, + "step": 7805 + }, + { + "epoch": 1.7995389048991355, + "grad_norm": 1.5152375389497672, + "learning_rate": 5.460696986753099e-08, + "loss": 0.35094910860061646, + "step": 7806 + }, + { + "epoch": 1.7997694524495678, + "grad_norm": 1.5548589132603017, + "learning_rate": 5.448280325804322e-08, + "loss": 0.4664410948753357, + "step": 7807 + }, + { + "epoch": 1.8, + "grad_norm": 1.9514902411487012, + "learning_rate": 5.435877402236821e-08, + "loss": 0.48508626222610474, + "step": 7808 + }, + { + "epoch": 1.8002305475504323, + "grad_norm": 1.7554423106257484, + "learning_rate": 5.4234882178525896e-08, + "loss": 0.5313225388526917, + "step": 7809 + }, + { + "epoch": 1.8004610951008646, + "grad_norm": 1.3389949821748275, + "learning_rate": 5.411112774451665e-08, + "loss": 0.4444521963596344, + "step": 7810 + }, + { + "epoch": 1.8006916426512967, + "grad_norm": 1.635479397086268, + "learning_rate": 5.398751073832075e-08, + "loss": 0.448794424533844, + "step": 7811 + }, + { + "epoch": 1.800922190201729, + "grad_norm": 1.5018240504535878, + "learning_rate": 5.38640311778984e-08, + "loss": 0.3712002635002136, + "step": 7812 + }, + { + "epoch": 1.8011527377521612, + "grad_norm": 1.9098610685931527, + "learning_rate": 5.3740689081189784e-08, + "loss": 0.4695093333721161, + "step": 7813 + }, + { + "epoch": 1.8013832853025935, + "grad_norm": 1.777484518094469, + "learning_rate": 5.361748446611525e-08, + "loss": 0.4755779206752777, + "step": 7814 + }, + { + "epoch": 1.8016138328530258, + "grad_norm": 1.5127484913328306, + "learning_rate": 5.349441735057514e-08, + "loss": 0.4299379289150238, + "step": 7815 + }, + { + "epoch": 1.801844380403458, + "grad_norm": 1.9413837097604036, + "learning_rate": 5.337148775245004e-08, + "loss": 0.43760111927986145, + "step": 7816 + }, + { + "epoch": 1.8020749279538903, + "grad_norm": 1.502605290866374, + "learning_rate": 5.324869568960011e-08, + "loss": 0.4866371750831604, + "step": 7817 + }, + { + "epoch": 1.8023054755043226, + "grad_norm": 1.8861304768122535, + "learning_rate": 5.312604117986586e-08, + "loss": 0.4446576237678528, + "step": 7818 + }, + { + "epoch": 1.802536023054755, + "grad_norm": 1.6888151118163675, + "learning_rate": 5.30035242410678e-08, + "loss": 0.4882046580314636, + "step": 7819 + }, + { + "epoch": 1.8027665706051872, + "grad_norm": 1.6607748455710494, + "learning_rate": 5.288114489100615e-08, + "loss": 0.5194848775863647, + "step": 7820 + }, + { + "epoch": 1.8029971181556195, + "grad_norm": 1.5306526873181203, + "learning_rate": 5.2758903147461456e-08, + "loss": 0.47810953855514526, + "step": 7821 + }, + { + "epoch": 1.8032276657060518, + "grad_norm": 1.3512000021688482, + "learning_rate": 5.2636799028194175e-08, + "loss": 0.4498436450958252, + "step": 7822 + }, + { + "epoch": 1.803458213256484, + "grad_norm": 1.8685173906778947, + "learning_rate": 5.251483255094469e-08, + "loss": 0.4595226049423218, + "step": 7823 + }, + { + "epoch": 1.8036887608069163, + "grad_norm": 1.9918192648087005, + "learning_rate": 5.2393003733433695e-08, + "loss": 0.4187047481536865, + "step": 7824 + }, + { + "epoch": 1.8039193083573486, + "grad_norm": 1.9097202165834481, + "learning_rate": 5.2271312593361593e-08, + "loss": 0.46125978231430054, + "step": 7825 + }, + { + "epoch": 1.8041498559077809, + "grad_norm": 1.6079780482495436, + "learning_rate": 5.214975914840847e-08, + "loss": 0.5298447608947754, + "step": 7826 + }, + { + "epoch": 1.8043804034582132, + "grad_norm": 1.8654987830999303, + "learning_rate": 5.20283434162353e-08, + "loss": 0.36386626958847046, + "step": 7827 + }, + { + "epoch": 1.8046109510086454, + "grad_norm": 1.8624196207161894, + "learning_rate": 5.190706541448209e-08, + "loss": 0.46004655957221985, + "step": 7828 + }, + { + "epoch": 1.8048414985590777, + "grad_norm": 1.528524654700596, + "learning_rate": 5.178592516076963e-08, + "loss": 0.43738240003585815, + "step": 7829 + }, + { + "epoch": 1.80507204610951, + "grad_norm": 1.7413237279253044, + "learning_rate": 5.166492267269795e-08, + "loss": 0.4735296368598938, + "step": 7830 + }, + { + "epoch": 1.8053025936599423, + "grad_norm": 1.4916939190312641, + "learning_rate": 5.154405796784789e-08, + "loss": 0.40936362743377686, + "step": 7831 + }, + { + "epoch": 1.8055331412103746, + "grad_norm": 1.4495523848817589, + "learning_rate": 5.142333106377961e-08, + "loss": 0.45034337043762207, + "step": 7832 + }, + { + "epoch": 1.8057636887608068, + "grad_norm": 1.3784789325623554, + "learning_rate": 5.13027419780333e-08, + "loss": 0.44316160678863525, + "step": 7833 + }, + { + "epoch": 1.8059942363112391, + "grad_norm": 1.4092068526068369, + "learning_rate": 5.118229072812952e-08, + "loss": 0.4842323660850525, + "step": 7834 + }, + { + "epoch": 1.8062247838616714, + "grad_norm": 1.566812551044544, + "learning_rate": 5.10619773315687e-08, + "loss": 0.4448675513267517, + "step": 7835 + }, + { + "epoch": 1.8064553314121037, + "grad_norm": 1.6102992842469919, + "learning_rate": 5.0941801805830743e-08, + "loss": 0.38104236125946045, + "step": 7836 + }, + { + "epoch": 1.806685878962536, + "grad_norm": 1.5296809805750156, + "learning_rate": 5.082176416837636e-08, + "loss": 0.44963395595550537, + "step": 7837 + }, + { + "epoch": 1.8069164265129682, + "grad_norm": 1.5844384454532618, + "learning_rate": 5.070186443664548e-08, + "loss": 0.5729601383209229, + "step": 7838 + }, + { + "epoch": 1.8071469740634005, + "grad_norm": 1.4362989763417529, + "learning_rate": 5.058210262805818e-08, + "loss": 0.41734176874160767, + "step": 7839 + }, + { + "epoch": 1.8073775216138328, + "grad_norm": 1.693804486024896, + "learning_rate": 5.046247876001497e-08, + "loss": 0.3615468740463257, + "step": 7840 + }, + { + "epoch": 1.807608069164265, + "grad_norm": 1.5964757784006058, + "learning_rate": 5.034299284989563e-08, + "loss": 0.4586338698863983, + "step": 7841 + }, + { + "epoch": 1.8078386167146974, + "grad_norm": 1.7334876895530935, + "learning_rate": 5.022364491506037e-08, + "loss": 0.5273293852806091, + "step": 7842 + }, + { + "epoch": 1.8080691642651296, + "grad_norm": 1.8912136860353268, + "learning_rate": 5.0104434972849106e-08, + "loss": 0.4835001528263092, + "step": 7843 + }, + { + "epoch": 1.808299711815562, + "grad_norm": 1.7745284627014155, + "learning_rate": 4.99853630405821e-08, + "loss": 0.5108824372291565, + "step": 7844 + }, + { + "epoch": 1.8085302593659942, + "grad_norm": 1.700329152976267, + "learning_rate": 4.986642913555894e-08, + "loss": 0.3851168751716614, + "step": 7845 + }, + { + "epoch": 1.8087608069164265, + "grad_norm": 1.7141226792443536, + "learning_rate": 4.9747633275059486e-08, + "loss": 0.5318799614906311, + "step": 7846 + }, + { + "epoch": 1.8089913544668588, + "grad_norm": 1.7788983980504363, + "learning_rate": 4.962897547634359e-08, + "loss": 0.4470546245574951, + "step": 7847 + }, + { + "epoch": 1.809221902017291, + "grad_norm": 1.799452040929468, + "learning_rate": 4.951045575665114e-08, + "loss": 0.4645649194717407, + "step": 7848 + }, + { + "epoch": 1.8094524495677233, + "grad_norm": 1.7833884340536976, + "learning_rate": 4.9392074133201675e-08, + "loss": 0.5158429741859436, + "step": 7849 + }, + { + "epoch": 1.8096829971181556, + "grad_norm": 1.6174396436681213, + "learning_rate": 4.927383062319501e-08, + "loss": 0.45542553067207336, + "step": 7850 + }, + { + "epoch": 1.809913544668588, + "grad_norm": 1.7293030074031477, + "learning_rate": 4.915572524381051e-08, + "loss": 0.4432970881462097, + "step": 7851 + }, + { + "epoch": 1.8101440922190202, + "grad_norm": 1.4973425388688861, + "learning_rate": 4.903775801220755e-08, + "loss": 0.4452818036079407, + "step": 7852 + }, + { + "epoch": 1.8103746397694525, + "grad_norm": 1.4630308885956378, + "learning_rate": 4.891992894552588e-08, + "loss": 0.5507289171218872, + "step": 7853 + }, + { + "epoch": 1.8106051873198847, + "grad_norm": 1.9247487981178528, + "learning_rate": 4.880223806088446e-08, + "loss": 0.5079092979431152, + "step": 7854 + }, + { + "epoch": 1.810835734870317, + "grad_norm": 1.5662190264538383, + "learning_rate": 4.8684685375382726e-08, + "loss": 0.5003194808959961, + "step": 7855 + }, + { + "epoch": 1.8110662824207493, + "grad_norm": 1.6396730822390861, + "learning_rate": 4.856727090610002e-08, + "loss": 0.4839469790458679, + "step": 7856 + }, + { + "epoch": 1.8112968299711816, + "grad_norm": 1.5156804101091823, + "learning_rate": 4.8449994670095254e-08, + "loss": 0.4743555784225464, + "step": 7857 + }, + { + "epoch": 1.8115273775216139, + "grad_norm": 1.8329893402310773, + "learning_rate": 4.8332856684407565e-08, + "loss": 0.48717260360717773, + "step": 7858 + }, + { + "epoch": 1.8117579250720461, + "grad_norm": 1.6627403476546345, + "learning_rate": 4.821585696605568e-08, + "loss": 0.4353930354118347, + "step": 7859 + }, + { + "epoch": 1.8119884726224784, + "grad_norm": 1.5088427953216483, + "learning_rate": 4.809899553203844e-08, + "loss": 0.5033609867095947, + "step": 7860 + }, + { + "epoch": 1.8122190201729107, + "grad_norm": 2.0189049548486864, + "learning_rate": 4.798227239933495e-08, + "loss": 0.6085183620452881, + "step": 7861 + }, + { + "epoch": 1.812449567723343, + "grad_norm": 1.5411871293354347, + "learning_rate": 4.7865687584903503e-08, + "loss": 0.4610944986343384, + "step": 7862 + }, + { + "epoch": 1.8126801152737753, + "grad_norm": 1.5441686393391751, + "learning_rate": 4.7749241105682905e-08, + "loss": 0.43830424547195435, + "step": 7863 + }, + { + "epoch": 1.8129106628242075, + "grad_norm": 1.8213823551119195, + "learning_rate": 4.76329329785915e-08, + "loss": 0.44822877645492554, + "step": 7864 + }, + { + "epoch": 1.8131412103746398, + "grad_norm": 1.7073238966182782, + "learning_rate": 4.751676322052756e-08, + "loss": 0.506583571434021, + "step": 7865 + }, + { + "epoch": 1.813371757925072, + "grad_norm": 1.7477332396001743, + "learning_rate": 4.740073184836946e-08, + "loss": 0.3813684582710266, + "step": 7866 + }, + { + "epoch": 1.8136023054755044, + "grad_norm": 1.9006354309336493, + "learning_rate": 4.728483887897527e-08, + "loss": 0.45449137687683105, + "step": 7867 + }, + { + "epoch": 1.8138328530259367, + "grad_norm": 1.7166109802455793, + "learning_rate": 4.716908432918309e-08, + "loss": 0.42221778631210327, + "step": 7868 + }, + { + "epoch": 1.814063400576369, + "grad_norm": 1.4612227977115941, + "learning_rate": 4.705346821581102e-08, + "loss": 0.40090346336364746, + "step": 7869 + }, + { + "epoch": 1.8142939481268012, + "grad_norm": 1.5809246396763592, + "learning_rate": 4.693799055565673e-08, + "loss": 0.4458918869495392, + "step": 7870 + }, + { + "epoch": 1.8145244956772335, + "grad_norm": 1.8347081818496995, + "learning_rate": 4.682265136549768e-08, + "loss": 0.44281435012817383, + "step": 7871 + }, + { + "epoch": 1.8147550432276658, + "grad_norm": 1.9080323902144045, + "learning_rate": 4.670745066209192e-08, + "loss": 0.503116250038147, + "step": 7872 + }, + { + "epoch": 1.814985590778098, + "grad_norm": 1.450463241085339, + "learning_rate": 4.65923884621765e-08, + "loss": 0.37526172399520874, + "step": 7873 + }, + { + "epoch": 1.8152161383285303, + "grad_norm": 1.5131592845477215, + "learning_rate": 4.6477464782469054e-08, + "loss": 0.3724762201309204, + "step": 7874 + }, + { + "epoch": 1.8154466858789626, + "grad_norm": 1.4737758886121874, + "learning_rate": 4.636267963966656e-08, + "loss": 0.41912156343460083, + "step": 7875 + }, + { + "epoch": 1.815677233429395, + "grad_norm": 1.6217019756861422, + "learning_rate": 4.6248033050446336e-08, + "loss": 0.3820981979370117, + "step": 7876 + }, + { + "epoch": 1.8159077809798272, + "grad_norm": 1.72842311014601, + "learning_rate": 4.613352503146517e-08, + "loss": 0.5527099370956421, + "step": 7877 + }, + { + "epoch": 1.8161383285302595, + "grad_norm": 1.999533464900462, + "learning_rate": 4.601915559935987e-08, + "loss": 0.4588435888290405, + "step": 7878 + }, + { + "epoch": 1.8163688760806918, + "grad_norm": 1.6219483828800259, + "learning_rate": 4.5904924770747144e-08, + "loss": 0.4699181318283081, + "step": 7879 + }, + { + "epoch": 1.816599423631124, + "grad_norm": 1.5998365257982348, + "learning_rate": 4.5790832562223825e-08, + "loss": 0.45713916420936584, + "step": 7880 + }, + { + "epoch": 1.8168299711815563, + "grad_norm": 1.55324771004424, + "learning_rate": 4.5676878990366096e-08, + "loss": 0.38195744156837463, + "step": 7881 + }, + { + "epoch": 1.8170605187319886, + "grad_norm": 1.9872138227633411, + "learning_rate": 4.556306407173016e-08, + "loss": 0.44771432876586914, + "step": 7882 + }, + { + "epoch": 1.8172910662824209, + "grad_norm": 1.791967057454681, + "learning_rate": 4.5449387822852016e-08, + "loss": 0.5057739019393921, + "step": 7883 + }, + { + "epoch": 1.8175216138328532, + "grad_norm": 1.5014714706473309, + "learning_rate": 4.533585026024789e-08, + "loss": 0.4792659878730774, + "step": 7884 + }, + { + "epoch": 1.8177521613832854, + "grad_norm": 1.60058179236347, + "learning_rate": 4.52224514004137e-08, + "loss": 0.5714812874794006, + "step": 7885 + }, + { + "epoch": 1.8179827089337177, + "grad_norm": 1.6688246286733348, + "learning_rate": 4.510919125982482e-08, + "loss": 0.5826704502105713, + "step": 7886 + }, + { + "epoch": 1.81821325648415, + "grad_norm": 1.6495391511246666, + "learning_rate": 4.499606985493709e-08, + "loss": 0.37359529733657837, + "step": 7887 + }, + { + "epoch": 1.8184438040345823, + "grad_norm": 1.5419285481811944, + "learning_rate": 4.4883087202185696e-08, + "loss": 0.4854467213153839, + "step": 7888 + }, + { + "epoch": 1.8186743515850146, + "grad_norm": 1.4892991712894692, + "learning_rate": 4.477024331798562e-08, + "loss": 0.5693310499191284, + "step": 7889 + }, + { + "epoch": 1.8189048991354468, + "grad_norm": 1.6688923135871851, + "learning_rate": 4.46575382187323e-08, + "loss": 0.5239206552505493, + "step": 7890 + }, + { + "epoch": 1.8191354466858791, + "grad_norm": 1.5245551187313666, + "learning_rate": 4.4544971920800425e-08, + "loss": 0.4846993684768677, + "step": 7891 + }, + { + "epoch": 1.8193659942363114, + "grad_norm": 1.6316413156349714, + "learning_rate": 4.443254444054456e-08, + "loss": 0.4502261281013489, + "step": 7892 + }, + { + "epoch": 1.8195965417867437, + "grad_norm": 2.0167261690755445, + "learning_rate": 4.4320255794299655e-08, + "loss": 0.43649619817733765, + "step": 7893 + }, + { + "epoch": 1.8198270893371757, + "grad_norm": 1.483085562494321, + "learning_rate": 4.420810599837987e-08, + "loss": 0.5035286545753479, + "step": 7894 + }, + { + "epoch": 1.820057636887608, + "grad_norm": 1.4711203449015122, + "learning_rate": 4.4096095069079296e-08, + "loss": 0.39399099349975586, + "step": 7895 + }, + { + "epoch": 1.8202881844380403, + "grad_norm": 1.8796636800361097, + "learning_rate": 4.3984223022672015e-08, + "loss": 0.4257911443710327, + "step": 7896 + }, + { + "epoch": 1.8205187319884726, + "grad_norm": 1.6978394419820524, + "learning_rate": 4.387248987541181e-08, + "loss": 0.43202370405197144, + "step": 7897 + }, + { + "epoch": 1.8207492795389049, + "grad_norm": 1.6730325432474051, + "learning_rate": 4.376089564353258e-08, + "loss": 0.475969135761261, + "step": 7898 + }, + { + "epoch": 1.8209798270893371, + "grad_norm": 1.4401421618443093, + "learning_rate": 4.3649440343247466e-08, + "loss": 0.41923943161964417, + "step": 7899 + }, + { + "epoch": 1.8212103746397694, + "grad_norm": 1.5983550240567561, + "learning_rate": 4.3538123990750184e-08, + "loss": 0.4265141487121582, + "step": 7900 + }, + { + "epoch": 1.8214409221902017, + "grad_norm": 1.794717875578816, + "learning_rate": 4.342694660221358e-08, + "loss": 0.5309425592422485, + "step": 7901 + }, + { + "epoch": 1.821671469740634, + "grad_norm": 1.7266349285203761, + "learning_rate": 4.3315908193790384e-08, + "loss": 0.4863468110561371, + "step": 7902 + }, + { + "epoch": 1.8219020172910663, + "grad_norm": 1.5239348980367693, + "learning_rate": 4.320500878161382e-08, + "loss": 0.477884978055954, + "step": 7903 + }, + { + "epoch": 1.8221325648414985, + "grad_norm": 1.7240173692545748, + "learning_rate": 4.3094248381795874e-08, + "loss": 0.5297879576683044, + "step": 7904 + }, + { + "epoch": 1.8223631123919308, + "grad_norm": 1.7928218302726315, + "learning_rate": 4.298362701042924e-08, + "loss": 0.45477786660194397, + "step": 7905 + }, + { + "epoch": 1.822593659942363, + "grad_norm": 1.607998070552905, + "learning_rate": 4.287314468358605e-08, + "loss": 0.49528950452804565, + "step": 7906 + }, + { + "epoch": 1.8228242074927954, + "grad_norm": 1.3870439541037285, + "learning_rate": 4.276280141731814e-08, + "loss": 0.4592822194099426, + "step": 7907 + }, + { + "epoch": 1.8230547550432277, + "grad_norm": 1.5905605119266304, + "learning_rate": 4.265259722765713e-08, + "loss": 0.4995965361595154, + "step": 7908 + }, + { + "epoch": 1.82328530259366, + "grad_norm": 1.598553219734128, + "learning_rate": 4.254253213061476e-08, + "loss": 0.391417533159256, + "step": 7909 + }, + { + "epoch": 1.8235158501440922, + "grad_norm": 1.8868519986532408, + "learning_rate": 4.243260614218214e-08, + "loss": 0.5150176882743835, + "step": 7910 + }, + { + "epoch": 1.8237463976945245, + "grad_norm": 1.6268404324890209, + "learning_rate": 4.232281927833059e-08, + "loss": 0.4422363042831421, + "step": 7911 + }, + { + "epoch": 1.8239769452449568, + "grad_norm": 1.8210697233868607, + "learning_rate": 4.2213171555010696e-08, + "loss": 0.48169761896133423, + "step": 7912 + }, + { + "epoch": 1.824207492795389, + "grad_norm": 1.5167646952042861, + "learning_rate": 4.210366298815349e-08, + "loss": 0.48462286591529846, + "step": 7913 + }, + { + "epoch": 1.8244380403458214, + "grad_norm": 1.7110004354591246, + "learning_rate": 4.1994293593669236e-08, + "loss": 0.4958770275115967, + "step": 7914 + }, + { + "epoch": 1.8246685878962536, + "grad_norm": 1.529936451310175, + "learning_rate": 4.188506338744813e-08, + "loss": 0.4321090281009674, + "step": 7915 + }, + { + "epoch": 1.824899135446686, + "grad_norm": 1.6613956831762409, + "learning_rate": 4.1775972385360234e-08, + "loss": 0.4604012966156006, + "step": 7916 + }, + { + "epoch": 1.8251296829971182, + "grad_norm": 1.5114138411857048, + "learning_rate": 4.166702060325544e-08, + "loss": 0.4154652953147888, + "step": 7917 + }, + { + "epoch": 1.8253602305475505, + "grad_norm": 1.6531876577774711, + "learning_rate": 4.1558208056963086e-08, + "loss": 0.4332526922225952, + "step": 7918 + }, + { + "epoch": 1.8255907780979828, + "grad_norm": 1.3857924196517601, + "learning_rate": 4.1449534762292735e-08, + "loss": 0.3537461757659912, + "step": 7919 + }, + { + "epoch": 1.825821325648415, + "grad_norm": 1.6404383737371222, + "learning_rate": 4.134100073503344e-08, + "loss": 0.5045830607414246, + "step": 7920 + }, + { + "epoch": 1.826051873198847, + "grad_norm": 1.3402611675019687, + "learning_rate": 4.12326059909538e-08, + "loss": 0.39267051219940186, + "step": 7921 + }, + { + "epoch": 1.8262824207492794, + "grad_norm": 1.6633828678500635, + "learning_rate": 4.112435054580276e-08, + "loss": 0.48170942068099976, + "step": 7922 + }, + { + "epoch": 1.8265129682997117, + "grad_norm": 1.587202742021785, + "learning_rate": 4.101623441530855e-08, + "loss": 0.4519041180610657, + "step": 7923 + }, + { + "epoch": 1.826743515850144, + "grad_norm": 1.8456233576104417, + "learning_rate": 4.0908257615179467e-08, + "loss": 0.5125565528869629, + "step": 7924 + }, + { + "epoch": 1.8269740634005762, + "grad_norm": 1.700906466331933, + "learning_rate": 4.080042016110319e-08, + "loss": 0.4696243703365326, + "step": 7925 + }, + { + "epoch": 1.8272046109510085, + "grad_norm": 1.7066910811269922, + "learning_rate": 4.0692722068747745e-08, + "loss": 0.48319560289382935, + "step": 7926 + }, + { + "epoch": 1.8274351585014408, + "grad_norm": 1.6121339862505244, + "learning_rate": 4.0585163353760165e-08, + "loss": 0.4324444532394409, + "step": 7927 + }, + { + "epoch": 1.827665706051873, + "grad_norm": 1.7831255886648252, + "learning_rate": 4.0477744031767625e-08, + "loss": 0.472909152507782, + "step": 7928 + }, + { + "epoch": 1.8278962536023053, + "grad_norm": 1.718704717525841, + "learning_rate": 4.03704641183773e-08, + "loss": 0.4177134037017822, + "step": 7929 + }, + { + "epoch": 1.8281268011527376, + "grad_norm": 1.5718528363636486, + "learning_rate": 4.0263323629175724e-08, + "loss": 0.46940964460372925, + "step": 7930 + }, + { + "epoch": 1.82835734870317, + "grad_norm": 1.6808687193803358, + "learning_rate": 4.015632257972912e-08, + "loss": 0.4983375668525696, + "step": 7931 + }, + { + "epoch": 1.8285878962536022, + "grad_norm": 1.4863665442165397, + "learning_rate": 4.004946098558404e-08, + "loss": 0.44752681255340576, + "step": 7932 + }, + { + "epoch": 1.8288184438040345, + "grad_norm": 1.816894342124537, + "learning_rate": 3.9942738862266065e-08, + "loss": 0.5251951217651367, + "step": 7933 + }, + { + "epoch": 1.8290489913544667, + "grad_norm": 1.7568175864500193, + "learning_rate": 3.983615622528069e-08, + "loss": 0.4417540431022644, + "step": 7934 + }, + { + "epoch": 1.829279538904899, + "grad_norm": 1.436205708803146, + "learning_rate": 3.9729713090113635e-08, + "loss": 0.49237650632858276, + "step": 7935 + }, + { + "epoch": 1.8295100864553313, + "grad_norm": 1.5345793839651518, + "learning_rate": 3.962340947222953e-08, + "loss": 0.45194119215011597, + "step": 7936 + }, + { + "epoch": 1.8297406340057636, + "grad_norm": 1.4439924303115503, + "learning_rate": 3.9517245387073574e-08, + "loss": 0.36154115200042725, + "step": 7937 + }, + { + "epoch": 1.8299711815561959, + "grad_norm": 1.6402341415525263, + "learning_rate": 3.94112208500702e-08, + "loss": 0.42474454641342163, + "step": 7938 + }, + { + "epoch": 1.8302017291066282, + "grad_norm": 1.760242952555505, + "learning_rate": 3.9305335876623545e-08, + "loss": 0.49459904432296753, + "step": 7939 + }, + { + "epoch": 1.8304322766570604, + "grad_norm": 1.5170817136048635, + "learning_rate": 3.919959048211785e-08, + "loss": 0.4881632328033447, + "step": 7940 + }, + { + "epoch": 1.8306628242074927, + "grad_norm": 1.7241690625217052, + "learning_rate": 3.909398468191638e-08, + "loss": 0.4779052436351776, + "step": 7941 + }, + { + "epoch": 1.830893371757925, + "grad_norm": 1.6098002323896041, + "learning_rate": 3.898851849136298e-08, + "loss": 0.38114166259765625, + "step": 7942 + }, + { + "epoch": 1.8311239193083573, + "grad_norm": 1.7909906484652018, + "learning_rate": 3.8883191925780604e-08, + "loss": 0.5009176731109619, + "step": 7943 + }, + { + "epoch": 1.8313544668587896, + "grad_norm": 1.5346894615338271, + "learning_rate": 3.8778005000472125e-08, + "loss": 0.3927236795425415, + "step": 7944 + }, + { + "epoch": 1.8315850144092218, + "grad_norm": 1.793902552920017, + "learning_rate": 3.867295773072021e-08, + "loss": 0.502021074295044, + "step": 7945 + }, + { + "epoch": 1.8318155619596541, + "grad_norm": 1.5109925254989232, + "learning_rate": 3.85680501317871e-08, + "loss": 0.5001766681671143, + "step": 7946 + }, + { + "epoch": 1.8320461095100864, + "grad_norm": 1.4735968339841883, + "learning_rate": 3.8463282218914595e-08, + "loss": 0.35029879212379456, + "step": 7947 + }, + { + "epoch": 1.8322766570605187, + "grad_norm": 1.4571551513662828, + "learning_rate": 3.835865400732452e-08, + "loss": 0.40344852209091187, + "step": 7948 + }, + { + "epoch": 1.832507204610951, + "grad_norm": 1.8610379031960593, + "learning_rate": 3.8254165512218276e-08, + "loss": 0.4865550994873047, + "step": 7949 + }, + { + "epoch": 1.8327377521613832, + "grad_norm": 1.5911656176036144, + "learning_rate": 3.814981674877693e-08, + "loss": 0.4122176766395569, + "step": 7950 + }, + { + "epoch": 1.8329682997118155, + "grad_norm": 1.6337636431412388, + "learning_rate": 3.804560773216137e-08, + "loss": 0.4108060598373413, + "step": 7951 + }, + { + "epoch": 1.8331988472622478, + "grad_norm": 1.6927194768268683, + "learning_rate": 3.7941538477511914e-08, + "loss": 0.44751685857772827, + "step": 7952 + }, + { + "epoch": 1.83342939481268, + "grad_norm": 1.8176286894290226, + "learning_rate": 3.783760899994881e-08, + "loss": 0.5625091791152954, + "step": 7953 + }, + { + "epoch": 1.8336599423631124, + "grad_norm": 1.7256683800022798, + "learning_rate": 3.773381931457198e-08, + "loss": 0.5466880202293396, + "step": 7954 + }, + { + "epoch": 1.8338904899135446, + "grad_norm": 1.623766763090723, + "learning_rate": 3.7630169436460915e-08, + "loss": 0.4447929263114929, + "step": 7955 + }, + { + "epoch": 1.834121037463977, + "grad_norm": 2.1137124064205834, + "learning_rate": 3.7526659380675006e-08, + "loss": 0.5581841468811035, + "step": 7956 + }, + { + "epoch": 1.8343515850144092, + "grad_norm": 1.726869046029688, + "learning_rate": 3.74232891622529e-08, + "loss": 0.385328471660614, + "step": 7957 + }, + { + "epoch": 1.8345821325648415, + "grad_norm": 1.599527906679486, + "learning_rate": 3.732005879621358e-08, + "loss": 0.4313199520111084, + "step": 7958 + }, + { + "epoch": 1.8348126801152738, + "grad_norm": 1.8941416481134308, + "learning_rate": 3.721696829755505e-08, + "loss": 0.48205095529556274, + "step": 7959 + }, + { + "epoch": 1.835043227665706, + "grad_norm": 1.4975360935823894, + "learning_rate": 3.7114017681255324e-08, + "loss": 0.5020872354507446, + "step": 7960 + }, + { + "epoch": 1.8352737752161383, + "grad_norm": 1.6682410179064875, + "learning_rate": 3.701120696227222e-08, + "loss": 0.440343976020813, + "step": 7961 + }, + { + "epoch": 1.8355043227665706, + "grad_norm": 1.7650179418507541, + "learning_rate": 3.690853615554301e-08, + "loss": 0.44800078868865967, + "step": 7962 + }, + { + "epoch": 1.8357348703170029, + "grad_norm": 1.7281281440709166, + "learning_rate": 3.680600527598454e-08, + "loss": 0.4218701124191284, + "step": 7963 + }, + { + "epoch": 1.8359654178674352, + "grad_norm": 1.701041640665313, + "learning_rate": 3.6703614338493674e-08, + "loss": 0.4884364902973175, + "step": 7964 + }, + { + "epoch": 1.8361959654178674, + "grad_norm": 1.792804134531825, + "learning_rate": 3.6601363357946725e-08, + "loss": 0.472229540348053, + "step": 7965 + }, + { + "epoch": 1.8364265129682997, + "grad_norm": 1.4963073935095241, + "learning_rate": 3.6499252349199486e-08, + "loss": 0.411716490983963, + "step": 7966 + }, + { + "epoch": 1.836657060518732, + "grad_norm": 2.247546301190474, + "learning_rate": 3.639728132708797e-08, + "loss": 0.48858320713043213, + "step": 7967 + }, + { + "epoch": 1.8368876080691643, + "grad_norm": 1.8323771399281195, + "learning_rate": 3.629545030642711e-08, + "loss": 0.42073512077331543, + "step": 7968 + }, + { + "epoch": 1.8371181556195966, + "grad_norm": 1.926368162913144, + "learning_rate": 3.6193759302012296e-08, + "loss": 0.5200133919715881, + "step": 7969 + }, + { + "epoch": 1.8373487031700289, + "grad_norm": 1.6431215963345491, + "learning_rate": 3.609220832861781e-08, + "loss": 0.4794218838214874, + "step": 7970 + }, + { + "epoch": 1.8375792507204611, + "grad_norm": 1.7666660619120225, + "learning_rate": 3.599079740099831e-08, + "loss": 0.516029953956604, + "step": 7971 + }, + { + "epoch": 1.8378097982708934, + "grad_norm": 5.5684397461669874, + "learning_rate": 3.5889526533887434e-08, + "loss": 0.4258018434047699, + "step": 7972 + }, + { + "epoch": 1.8380403458213257, + "grad_norm": 1.4274910996664532, + "learning_rate": 3.5788395741998876e-08, + "loss": 0.4117387533187866, + "step": 7973 + }, + { + "epoch": 1.838270893371758, + "grad_norm": 1.8002661199837686, + "learning_rate": 3.5687405040025987e-08, + "loss": 0.43812108039855957, + "step": 7974 + }, + { + "epoch": 1.8385014409221903, + "grad_norm": 1.433184579488626, + "learning_rate": 3.558655444264158e-08, + "loss": 0.4854302406311035, + "step": 7975 + }, + { + "epoch": 1.8387319884726225, + "grad_norm": 1.6279391160867416, + "learning_rate": 3.5485843964498163e-08, + "loss": 0.47127220034599304, + "step": 7976 + }, + { + "epoch": 1.8389625360230548, + "grad_norm": 1.63494329169352, + "learning_rate": 3.538527362022814e-08, + "loss": 0.4757349491119385, + "step": 7977 + }, + { + "epoch": 1.839193083573487, + "grad_norm": 1.309631044571452, + "learning_rate": 3.5284843424443155e-08, + "loss": 0.427249938249588, + "step": 7978 + }, + { + "epoch": 1.8394236311239194, + "grad_norm": 1.370934941994731, + "learning_rate": 3.518455339173454e-08, + "loss": 0.409855455160141, + "step": 7979 + }, + { + "epoch": 1.8396541786743517, + "grad_norm": 1.5739512241520999, + "learning_rate": 3.5084403536673634e-08, + "loss": 0.38040632009506226, + "step": 7980 + }, + { + "epoch": 1.839884726224784, + "grad_norm": 1.91777543224754, + "learning_rate": 3.498439387381103e-08, + "loss": 0.4681670069694519, + "step": 7981 + }, + { + "epoch": 1.8401152737752162, + "grad_norm": 1.6476666655223309, + "learning_rate": 3.4884524417677086e-08, + "loss": 0.5145970582962036, + "step": 7982 + }, + { + "epoch": 1.8403458213256485, + "grad_norm": 1.7150302446575982, + "learning_rate": 3.478479518278199e-08, + "loss": 0.5431094169616699, + "step": 7983 + }, + { + "epoch": 1.8405763688760808, + "grad_norm": 1.6753926575592268, + "learning_rate": 3.4685206183615146e-08, + "loss": 0.5518392324447632, + "step": 7984 + }, + { + "epoch": 1.840806916426513, + "grad_norm": 1.4372422949374304, + "learning_rate": 3.458575743464598e-08, + "loss": 0.5075215101242065, + "step": 7985 + }, + { + "epoch": 1.8410374639769453, + "grad_norm": 1.507303141864474, + "learning_rate": 3.448644895032304e-08, + "loss": 0.42477503418922424, + "step": 7986 + }, + { + "epoch": 1.8412680115273776, + "grad_norm": 1.4856885288037505, + "learning_rate": 3.4387280745075134e-08, + "loss": 0.507225751876831, + "step": 7987 + }, + { + "epoch": 1.84149855907781, + "grad_norm": 1.4825041181942642, + "learning_rate": 3.428825283331027e-08, + "loss": 0.4144738018512726, + "step": 7988 + }, + { + "epoch": 1.8417291066282422, + "grad_norm": 1.9591826993684927, + "learning_rate": 3.418936522941618e-08, + "loss": 0.3863438367843628, + "step": 7989 + }, + { + "epoch": 1.8419596541786745, + "grad_norm": 1.5040121036504583, + "learning_rate": 3.409061794776025e-08, + "loss": 0.39375755190849304, + "step": 7990 + }, + { + "epoch": 1.8421902017291067, + "grad_norm": 1.4086042716424876, + "learning_rate": 3.3992011002689334e-08, + "loss": 0.48356667160987854, + "step": 7991 + }, + { + "epoch": 1.842420749279539, + "grad_norm": 1.594841231811865, + "learning_rate": 3.3893544408529985e-08, + "loss": 0.42886489629745483, + "step": 7992 + }, + { + "epoch": 1.8426512968299713, + "grad_norm": 1.6653919162381416, + "learning_rate": 3.3795218179588524e-08, + "loss": 0.36877313256263733, + "step": 7993 + }, + { + "epoch": 1.8428818443804036, + "grad_norm": 1.833016012492649, + "learning_rate": 3.369703233015053e-08, + "loss": 0.45927101373672485, + "step": 7994 + }, + { + "epoch": 1.8431123919308359, + "grad_norm": 1.7521984280825604, + "learning_rate": 3.3598986874481484e-08, + "loss": 0.478916734457016, + "step": 7995 + }, + { + "epoch": 1.8433429394812682, + "grad_norm": 1.5148570498452396, + "learning_rate": 3.350108182682654e-08, + "loss": 0.4402740001678467, + "step": 7996 + }, + { + "epoch": 1.8435734870317004, + "grad_norm": 1.4672604870133275, + "learning_rate": 3.3403317201409986e-08, + "loss": 0.45362555980682373, + "step": 7997 + }, + { + "epoch": 1.8438040345821327, + "grad_norm": 1.7127744840337602, + "learning_rate": 3.330569301243602e-08, + "loss": 0.42510533332824707, + "step": 7998 + }, + { + "epoch": 1.844034582132565, + "grad_norm": 1.8047053012106982, + "learning_rate": 3.320820927408874e-08, + "loss": 0.4747004508972168, + "step": 7999 + }, + { + "epoch": 1.8442651296829973, + "grad_norm": 1.4734662369910507, + "learning_rate": 3.3110866000531144e-08, + "loss": 0.4616791307926178, + "step": 8000 + }, + { + "epoch": 1.8444956772334296, + "grad_norm": 1.5205060225146327, + "learning_rate": 3.301366320590659e-08, + "loss": 0.5137572288513184, + "step": 8001 + }, + { + "epoch": 1.8447262247838618, + "grad_norm": 1.7717149493752453, + "learning_rate": 3.291660090433734e-08, + "loss": 0.5519400835037231, + "step": 8002 + }, + { + "epoch": 1.844956772334294, + "grad_norm": 1.46812017440615, + "learning_rate": 3.281967910992556e-08, + "loss": 0.46323487162590027, + "step": 8003 + }, + { + "epoch": 1.8451873198847262, + "grad_norm": 1.662894278457401, + "learning_rate": 3.272289783675308e-08, + "loss": 0.4647497832775116, + "step": 8004 + }, + { + "epoch": 1.8454178674351585, + "grad_norm": 1.5023892185449783, + "learning_rate": 3.262625709888101e-08, + "loss": 0.40287381410598755, + "step": 8005 + }, + { + "epoch": 1.8456484149855907, + "grad_norm": 1.7424527329087247, + "learning_rate": 3.252975691035042e-08, + "loss": 0.4404665231704712, + "step": 8006 + }, + { + "epoch": 1.845878962536023, + "grad_norm": 1.7167182806108425, + "learning_rate": 3.2433397285181906e-08, + "loss": 0.4510694742202759, + "step": 8007 + }, + { + "epoch": 1.8461095100864553, + "grad_norm": 1.808603235242201, + "learning_rate": 3.233717823737536e-08, + "loss": 0.5496842861175537, + "step": 8008 + }, + { + "epoch": 1.8463400576368876, + "grad_norm": 1.2919273138221294, + "learning_rate": 3.2241099780910385e-08, + "loss": 0.3442491590976715, + "step": 8009 + }, + { + "epoch": 1.8465706051873199, + "grad_norm": 1.5771951982307342, + "learning_rate": 3.214516192974615e-08, + "loss": 0.4181824326515198, + "step": 8010 + }, + { + "epoch": 1.8468011527377521, + "grad_norm": 2.536239745080693, + "learning_rate": 3.204936469782149e-08, + "loss": 0.5211422443389893, + "step": 8011 + }, + { + "epoch": 1.8470317002881844, + "grad_norm": 1.7289079469042739, + "learning_rate": 3.195370809905484e-08, + "loss": 0.4499666690826416, + "step": 8012 + }, + { + "epoch": 1.8472622478386167, + "grad_norm": 1.521332043446148, + "learning_rate": 3.1858192147343977e-08, + "loss": 0.4396360218524933, + "step": 8013 + }, + { + "epoch": 1.847492795389049, + "grad_norm": 1.5919836717749307, + "learning_rate": 3.1762816856566454e-08, + "loss": 0.5222504734992981, + "step": 8014 + }, + { + "epoch": 1.8477233429394813, + "grad_norm": 1.7151085760805032, + "learning_rate": 3.16675822405793e-08, + "loss": 0.4413851499557495, + "step": 8015 + }, + { + "epoch": 1.8479538904899135, + "grad_norm": 1.5672185881021385, + "learning_rate": 3.1572488313218904e-08, + "loss": 0.4500206708908081, + "step": 8016 + }, + { + "epoch": 1.8481844380403458, + "grad_norm": 1.6869036603734429, + "learning_rate": 3.1477535088301755e-08, + "loss": 0.526034951210022, + "step": 8017 + }, + { + "epoch": 1.848414985590778, + "grad_norm": 1.719809726136178, + "learning_rate": 3.1382722579623376e-08, + "loss": 0.4836745858192444, + "step": 8018 + }, + { + "epoch": 1.8486455331412104, + "grad_norm": 1.6521486962664456, + "learning_rate": 3.128805080095898e-08, + "loss": 0.4079389274120331, + "step": 8019 + }, + { + "epoch": 1.8488760806916427, + "grad_norm": 1.4957674152417437, + "learning_rate": 3.1193519766063655e-08, + "loss": 0.44330352544784546, + "step": 8020 + }, + { + "epoch": 1.849106628242075, + "grad_norm": 1.9754043574725335, + "learning_rate": 3.109912948867166e-08, + "loss": 0.5943433046340942, + "step": 8021 + }, + { + "epoch": 1.8493371757925072, + "grad_norm": 1.6271444611930428, + "learning_rate": 3.100487998249679e-08, + "loss": 0.403645396232605, + "step": 8022 + }, + { + "epoch": 1.8495677233429395, + "grad_norm": 1.62649256942446, + "learning_rate": 3.091077126123254e-08, + "loss": 0.38898250460624695, + "step": 8023 + }, + { + "epoch": 1.8497982708933718, + "grad_norm": 1.6220661737220665, + "learning_rate": 3.0816803338551966e-08, + "loss": 0.4852311611175537, + "step": 8024 + }, + { + "epoch": 1.850028818443804, + "grad_norm": 1.5505326704151483, + "learning_rate": 3.072297622810782e-08, + "loss": 0.5085941553115845, + "step": 8025 + }, + { + "epoch": 1.8502593659942363, + "grad_norm": 1.5598630793553008, + "learning_rate": 3.062928994353187e-08, + "loss": 0.5087497234344482, + "step": 8026 + }, + { + "epoch": 1.8504899135446686, + "grad_norm": 1.515152966212454, + "learning_rate": 3.053574449843599e-08, + "loss": 0.41259822249412537, + "step": 8027 + }, + { + "epoch": 1.850720461095101, + "grad_norm": 1.6528788980804292, + "learning_rate": 3.044233990641143e-08, + "loss": 0.5031530857086182, + "step": 8028 + }, + { + "epoch": 1.8509510086455332, + "grad_norm": 1.558033346939741, + "learning_rate": 3.034907618102856e-08, + "loss": 0.5004956722259521, + "step": 8029 + }, + { + "epoch": 1.8511815561959655, + "grad_norm": 1.7892456756117716, + "learning_rate": 3.025595333583797e-08, + "loss": 0.506257951259613, + "step": 8030 + }, + { + "epoch": 1.8514121037463975, + "grad_norm": 1.695644279509625, + "learning_rate": 3.016297138436918e-08, + "loss": 0.3953269124031067, + "step": 8031 + }, + { + "epoch": 1.8516426512968298, + "grad_norm": 1.4718849636623421, + "learning_rate": 3.007013034013173e-08, + "loss": 0.4835085868835449, + "step": 8032 + }, + { + "epoch": 1.851873198847262, + "grad_norm": 1.8663871893102746, + "learning_rate": 2.997743021661448e-08, + "loss": 0.414350688457489, + "step": 8033 + }, + { + "epoch": 1.8521037463976944, + "grad_norm": 1.4238378341848068, + "learning_rate": 2.988487102728554e-08, + "loss": 0.4191391170024872, + "step": 8034 + }, + { + "epoch": 1.8523342939481267, + "grad_norm": 1.5441051894921214, + "learning_rate": 2.9792452785592947e-08, + "loss": 0.4623367190361023, + "step": 8035 + }, + { + "epoch": 1.852564841498559, + "grad_norm": 1.8389565697437233, + "learning_rate": 2.9700175504964175e-08, + "loss": 0.43516361713409424, + "step": 8036 + }, + { + "epoch": 1.8527953890489912, + "grad_norm": 1.3914012278068204, + "learning_rate": 2.9608039198805944e-08, + "loss": 0.38310742378234863, + "step": 8037 + }, + { + "epoch": 1.8530259365994235, + "grad_norm": 1.8121203481344736, + "learning_rate": 2.9516043880504882e-08, + "loss": 0.4491914212703705, + "step": 8038 + }, + { + "epoch": 1.8532564841498558, + "grad_norm": 1.4258495096905672, + "learning_rate": 2.9424189563426848e-08, + "loss": 0.40703436732292175, + "step": 8039 + }, + { + "epoch": 1.853487031700288, + "grad_norm": 1.6268645644688815, + "learning_rate": 2.9332476260917505e-08, + "loss": 0.4627934694290161, + "step": 8040 + }, + { + "epoch": 1.8537175792507203, + "grad_norm": 1.73857404363576, + "learning_rate": 2.9240903986301634e-08, + "loss": 0.5413684844970703, + "step": 8041 + }, + { + "epoch": 1.8539481268011526, + "grad_norm": 1.4418238041833238, + "learning_rate": 2.914947275288382e-08, + "loss": 0.48317795991897583, + "step": 8042 + }, + { + "epoch": 1.854178674351585, + "grad_norm": 1.9593036490159952, + "learning_rate": 2.9058182573947986e-08, + "loss": 0.5212484002113342, + "step": 8043 + }, + { + "epoch": 1.8544092219020172, + "grad_norm": 1.560534281141119, + "learning_rate": 2.896703346275775e-08, + "loss": 0.46468842029571533, + "step": 8044 + }, + { + "epoch": 1.8546397694524495, + "grad_norm": 1.4520449873662198, + "learning_rate": 2.8876025432556073e-08, + "loss": 0.4069516956806183, + "step": 8045 + }, + { + "epoch": 1.8548703170028817, + "grad_norm": 1.354675996882102, + "learning_rate": 2.8785158496565598e-08, + "loss": 0.41940397024154663, + "step": 8046 + }, + { + "epoch": 1.855100864553314, + "grad_norm": 1.3952687524013894, + "learning_rate": 2.869443266798832e-08, + "loss": 0.43869268894195557, + "step": 8047 + }, + { + "epoch": 1.8553314121037463, + "grad_norm": 1.6019003431265055, + "learning_rate": 2.8603847960005477e-08, + "loss": 0.42834728956222534, + "step": 8048 + }, + { + "epoch": 1.8555619596541786, + "grad_norm": 2.0473667943403324, + "learning_rate": 2.8513404385778428e-08, + "loss": 0.5309191942214966, + "step": 8049 + }, + { + "epoch": 1.8557925072046109, + "grad_norm": 1.6839541365806647, + "learning_rate": 2.8423101958447437e-08, + "loss": 0.40201905369758606, + "step": 8050 + }, + { + "epoch": 1.8560230547550431, + "grad_norm": 1.586417513378552, + "learning_rate": 2.8332940691132567e-08, + "loss": 0.5671436190605164, + "step": 8051 + }, + { + "epoch": 1.8562536023054754, + "grad_norm": 1.8928597752283605, + "learning_rate": 2.824292059693356e-08, + "loss": 0.3906604051589966, + "step": 8052 + }, + { + "epoch": 1.8564841498559077, + "grad_norm": 1.681854638373173, + "learning_rate": 2.815304168892918e-08, + "loss": 0.4506712555885315, + "step": 8053 + }, + { + "epoch": 1.85671469740634, + "grad_norm": 1.8408224537520856, + "learning_rate": 2.8063303980177866e-08, + "loss": 0.42090779542922974, + "step": 8054 + }, + { + "epoch": 1.8569452449567723, + "grad_norm": 1.4139598318096287, + "learning_rate": 2.7973707483717635e-08, + "loss": 0.34566083550453186, + "step": 8055 + }, + { + "epoch": 1.8571757925072045, + "grad_norm": 1.688139206456363, + "learning_rate": 2.7884252212565738e-08, + "loss": 0.5240504145622253, + "step": 8056 + }, + { + "epoch": 1.8574063400576368, + "grad_norm": 1.5731797225643818, + "learning_rate": 2.779493817971956e-08, + "loss": 0.5085941553115845, + "step": 8057 + }, + { + "epoch": 1.857636887608069, + "grad_norm": 1.5901229313111294, + "learning_rate": 2.7705765398155058e-08, + "loss": 0.4701150059700012, + "step": 8058 + }, + { + "epoch": 1.8578674351585014, + "grad_norm": 1.7286913224525837, + "learning_rate": 2.7616733880828304e-08, + "loss": 0.4169929325580597, + "step": 8059 + }, + { + "epoch": 1.8580979827089337, + "grad_norm": 1.6076137148912093, + "learning_rate": 2.7527843640674618e-08, + "loss": 0.4952937066555023, + "step": 8060 + }, + { + "epoch": 1.858328530259366, + "grad_norm": 1.881595697948473, + "learning_rate": 2.7439094690608787e-08, + "loss": 0.47481924295425415, + "step": 8061 + }, + { + "epoch": 1.8585590778097982, + "grad_norm": 1.5277002490466254, + "learning_rate": 2.735048704352527e-08, + "loss": 0.3752020299434662, + "step": 8062 + }, + { + "epoch": 1.8587896253602305, + "grad_norm": 1.809986336100686, + "learning_rate": 2.726202071229755e-08, + "loss": 0.5471283793449402, + "step": 8063 + }, + { + "epoch": 1.8590201729106628, + "grad_norm": 1.4088159083069367, + "learning_rate": 2.7173695709779008e-08, + "loss": 0.4557954668998718, + "step": 8064 + }, + { + "epoch": 1.859250720461095, + "grad_norm": 1.550122560907728, + "learning_rate": 2.7085512048802606e-08, + "loss": 0.40548449754714966, + "step": 8065 + }, + { + "epoch": 1.8594812680115274, + "grad_norm": 1.3791933294840308, + "learning_rate": 2.699746974218009e-08, + "loss": 0.43826359510421753, + "step": 8066 + }, + { + "epoch": 1.8597118155619596, + "grad_norm": 1.7188043861384448, + "learning_rate": 2.6909568802703453e-08, + "loss": 0.4630689024925232, + "step": 8067 + }, + { + "epoch": 1.859942363112392, + "grad_norm": 1.3522114967992507, + "learning_rate": 2.6821809243143367e-08, + "loss": 0.4804350733757019, + "step": 8068 + }, + { + "epoch": 1.8601729106628242, + "grad_norm": 1.6824990648749616, + "learning_rate": 2.6734191076250744e-08, + "loss": 0.5459887981414795, + "step": 8069 + }, + { + "epoch": 1.8604034582132565, + "grad_norm": 1.4159310296071261, + "learning_rate": 2.6646714314755513e-08, + "loss": 0.366889625787735, + "step": 8070 + }, + { + "epoch": 1.8606340057636888, + "grad_norm": 1.9582286801999929, + "learning_rate": 2.6559378971366953e-08, + "loss": 0.4883540868759155, + "step": 8071 + }, + { + "epoch": 1.860864553314121, + "grad_norm": 1.6324605985235177, + "learning_rate": 2.6472185058774243e-08, + "loss": 0.4964878261089325, + "step": 8072 + }, + { + "epoch": 1.8610951008645533, + "grad_norm": 1.673302060019101, + "learning_rate": 2.6385132589645697e-08, + "loss": 0.4955672323703766, + "step": 8073 + }, + { + "epoch": 1.8613256484149856, + "grad_norm": 1.6435310144192468, + "learning_rate": 2.6298221576628977e-08, + "loss": 0.42056921124458313, + "step": 8074 + }, + { + "epoch": 1.8615561959654179, + "grad_norm": 1.4381408247631826, + "learning_rate": 2.6211452032351534e-08, + "loss": 0.47011131048202515, + "step": 8075 + }, + { + "epoch": 1.8617867435158502, + "grad_norm": 1.509318362247471, + "learning_rate": 2.612482396941984e-08, + "loss": 0.3521801233291626, + "step": 8076 + }, + { + "epoch": 1.8620172910662824, + "grad_norm": 1.636071263399774, + "learning_rate": 2.6038337400420164e-08, + "loss": 0.4199404716491699, + "step": 8077 + }, + { + "epoch": 1.8622478386167147, + "grad_norm": 1.5150893291359375, + "learning_rate": 2.595199233791834e-08, + "loss": 0.40499597787857056, + "step": 8078 + }, + { + "epoch": 1.862478386167147, + "grad_norm": 1.7531061656772358, + "learning_rate": 2.586578879445922e-08, + "loss": 0.5024272799491882, + "step": 8079 + }, + { + "epoch": 1.8627089337175793, + "grad_norm": 1.5549183536571185, + "learning_rate": 2.5779726782567124e-08, + "loss": 0.4589402675628662, + "step": 8080 + }, + { + "epoch": 1.8629394812680116, + "grad_norm": 1.6355789321212768, + "learning_rate": 2.5693806314746157e-08, + "loss": 0.4677194356918335, + "step": 8081 + }, + { + "epoch": 1.8631700288184438, + "grad_norm": 1.6860171847729095, + "learning_rate": 2.560802740347956e-08, + "loss": 0.4410317540168762, + "step": 8082 + }, + { + "epoch": 1.8634005763688761, + "grad_norm": 1.8113207487688465, + "learning_rate": 2.5522390061230358e-08, + "loss": 0.5089725255966187, + "step": 8083 + }, + { + "epoch": 1.8636311239193084, + "grad_norm": 1.588730434569227, + "learning_rate": 2.543689430044038e-08, + "loss": 0.47457408905029297, + "step": 8084 + }, + { + "epoch": 1.8638616714697407, + "grad_norm": 1.4319853393057471, + "learning_rate": 2.535154013353169e-08, + "loss": 0.49628597497940063, + "step": 8085 + }, + { + "epoch": 1.864092219020173, + "grad_norm": 1.9014302038219766, + "learning_rate": 2.5266327572905144e-08, + "loss": 0.49943581223487854, + "step": 8086 + }, + { + "epoch": 1.8643227665706052, + "grad_norm": 1.7732762838979956, + "learning_rate": 2.5181256630941063e-08, + "loss": 0.4620710015296936, + "step": 8087 + }, + { + "epoch": 1.8645533141210375, + "grad_norm": 1.5972635490235074, + "learning_rate": 2.5096327319999555e-08, + "loss": 0.4752368927001953, + "step": 8088 + }, + { + "epoch": 1.8647838616714698, + "grad_norm": 1.4886652930685826, + "learning_rate": 2.50115396524202e-08, + "loss": 0.4133688509464264, + "step": 8089 + }, + { + "epoch": 1.865014409221902, + "grad_norm": 1.949281605819026, + "learning_rate": 2.492689364052125e-08, + "loss": 0.4419419765472412, + "step": 8090 + }, + { + "epoch": 1.8652449567723344, + "grad_norm": 1.7344029726538643, + "learning_rate": 2.4842389296601428e-08, + "loss": 0.4486713409423828, + "step": 8091 + }, + { + "epoch": 1.8654755043227667, + "grad_norm": 1.8228844715495627, + "learning_rate": 2.4758026632938022e-08, + "loss": 0.47762376070022583, + "step": 8092 + }, + { + "epoch": 1.865706051873199, + "grad_norm": 2.0782542300039517, + "learning_rate": 2.4673805661788007e-08, + "loss": 0.4357690215110779, + "step": 8093 + }, + { + "epoch": 1.8659365994236312, + "grad_norm": 1.8276227922492578, + "learning_rate": 2.458972639538792e-08, + "loss": 0.5147565603256226, + "step": 8094 + }, + { + "epoch": 1.8661671469740635, + "grad_norm": 1.6692516341274928, + "learning_rate": 2.4505788845953668e-08, + "loss": 0.39681991934776306, + "step": 8095 + }, + { + "epoch": 1.8663976945244958, + "grad_norm": 1.4487927889395267, + "learning_rate": 2.4421993025680265e-08, + "loss": 0.4552622437477112, + "step": 8096 + }, + { + "epoch": 1.866628242074928, + "grad_norm": 1.7503441015857129, + "learning_rate": 2.4338338946742752e-08, + "loss": 0.45923811197280884, + "step": 8097 + }, + { + "epoch": 1.8668587896253603, + "grad_norm": 1.502251183757353, + "learning_rate": 2.4254826621294966e-08, + "loss": 0.47280222177505493, + "step": 8098 + }, + { + "epoch": 1.8670893371757926, + "grad_norm": 1.456189671497043, + "learning_rate": 2.417145606147042e-08, + "loss": 0.4157524108886719, + "step": 8099 + }, + { + "epoch": 1.867319884726225, + "grad_norm": 1.4379336942099938, + "learning_rate": 2.4088227279381757e-08, + "loss": 0.42565596103668213, + "step": 8100 + }, + { + "epoch": 1.8675504322766572, + "grad_norm": 1.6304916370441087, + "learning_rate": 2.4005140287121528e-08, + "loss": 0.47616803646087646, + "step": 8101 + }, + { + "epoch": 1.8677809798270895, + "grad_norm": 1.515736347680141, + "learning_rate": 2.392219509676152e-08, + "loss": 0.4814712107181549, + "step": 8102 + }, + { + "epoch": 1.8680115273775217, + "grad_norm": 1.441013936213762, + "learning_rate": 2.383939172035243e-08, + "loss": 0.5077251195907593, + "step": 8103 + }, + { + "epoch": 1.868242074927954, + "grad_norm": 1.6890380706345745, + "learning_rate": 2.3756730169925075e-08, + "loss": 0.38274845480918884, + "step": 8104 + }, + { + "epoch": 1.8684726224783863, + "grad_norm": 2.0943824772733315, + "learning_rate": 2.3674210457489074e-08, + "loss": 0.4606715440750122, + "step": 8105 + }, + { + "epoch": 1.8687031700288186, + "grad_norm": 1.6432807697262717, + "learning_rate": 2.3591832595033723e-08, + "loss": 0.46634575724601746, + "step": 8106 + }, + { + "epoch": 1.8689337175792509, + "grad_norm": 1.8861732961995932, + "learning_rate": 2.3509596594527893e-08, + "loss": 0.5166279673576355, + "step": 8107 + }, + { + "epoch": 1.8691642651296831, + "grad_norm": 1.599107373552059, + "learning_rate": 2.3427502467919357e-08, + "loss": 0.4556184709072113, + "step": 8108 + }, + { + "epoch": 1.8693948126801154, + "grad_norm": 1.5633160648919202, + "learning_rate": 2.334555022713558e-08, + "loss": 0.43875348567962646, + "step": 8109 + }, + { + "epoch": 1.8696253602305477, + "grad_norm": 1.424121149992357, + "learning_rate": 2.326373988408359e-08, + "loss": 0.4601413607597351, + "step": 8110 + }, + { + "epoch": 1.86985590778098, + "grad_norm": 1.769455045601545, + "learning_rate": 2.318207145064921e-08, + "loss": 0.476366251707077, + "step": 8111 + }, + { + "epoch": 1.8700864553314123, + "grad_norm": 1.7211682546672895, + "learning_rate": 2.3100544938698396e-08, + "loss": 0.42146819829940796, + "step": 8112 + }, + { + "epoch": 1.8703170028818443, + "grad_norm": 1.6952336846407405, + "learning_rate": 2.3019160360075784e-08, + "loss": 0.5055359601974487, + "step": 8113 + }, + { + "epoch": 1.8705475504322766, + "grad_norm": 2.2577515216080406, + "learning_rate": 2.2937917726605803e-08, + "loss": 0.5550209879875183, + "step": 8114 + }, + { + "epoch": 1.8707780979827089, + "grad_norm": 1.6101290683313494, + "learning_rate": 2.2856817050092346e-08, + "loss": 0.4699084758758545, + "step": 8115 + }, + { + "epoch": 1.8710086455331412, + "grad_norm": 1.7215028056651225, + "learning_rate": 2.2775858342318323e-08, + "loss": 0.5803818702697754, + "step": 8116 + }, + { + "epoch": 1.8712391930835734, + "grad_norm": 1.7430698785050744, + "learning_rate": 2.2695041615046097e-08, + "loss": 0.5021014213562012, + "step": 8117 + }, + { + "epoch": 1.8714697406340057, + "grad_norm": 1.8052568666638755, + "learning_rate": 2.261436688001772e-08, + "loss": 0.47168630361557007, + "step": 8118 + }, + { + "epoch": 1.871700288184438, + "grad_norm": 1.4385974626029983, + "learning_rate": 2.2533834148954266e-08, + "loss": 0.4514986574649811, + "step": 8119 + }, + { + "epoch": 1.8719308357348703, + "grad_norm": 1.6683524018814926, + "learning_rate": 2.2453443433556373e-08, + "loss": 0.507361650466919, + "step": 8120 + }, + { + "epoch": 1.8721613832853026, + "grad_norm": 1.4143259238979617, + "learning_rate": 2.237319474550392e-08, + "loss": 0.46427232027053833, + "step": 8121 + }, + { + "epoch": 1.8723919308357349, + "grad_norm": 1.6353248584942321, + "learning_rate": 2.229308809645625e-08, + "loss": 0.4799825847148895, + "step": 8122 + }, + { + "epoch": 1.8726224783861671, + "grad_norm": 1.424423920616398, + "learning_rate": 2.2213123498051933e-08, + "loss": 0.4089062809944153, + "step": 8123 + }, + { + "epoch": 1.8728530259365994, + "grad_norm": 1.8857434186496964, + "learning_rate": 2.213330096190913e-08, + "loss": 0.6294845342636108, + "step": 8124 + }, + { + "epoch": 1.8730835734870317, + "grad_norm": 1.3744302251940603, + "learning_rate": 2.2053620499625003e-08, + "loss": 0.40907663106918335, + "step": 8125 + }, + { + "epoch": 1.873314121037464, + "grad_norm": 1.6129897410187974, + "learning_rate": 2.1974082122776627e-08, + "loss": 0.4643021821975708, + "step": 8126 + }, + { + "epoch": 1.8735446685878963, + "grad_norm": 1.3724860354028519, + "learning_rate": 2.189468584291976e-08, + "loss": 0.45625531673431396, + "step": 8127 + }, + { + "epoch": 1.8737752161383285, + "grad_norm": 1.5215478461136303, + "learning_rate": 2.1815431671590168e-08, + "loss": 0.3957236409187317, + "step": 8128 + }, + { + "epoch": 1.8740057636887608, + "grad_norm": 1.7966855487137956, + "learning_rate": 2.1736319620302423e-08, + "loss": 0.5302785634994507, + "step": 8129 + }, + { + "epoch": 1.874236311239193, + "grad_norm": 1.60041594224808, + "learning_rate": 2.1657349700550774e-08, + "loss": 0.5150219202041626, + "step": 8130 + }, + { + "epoch": 1.8744668587896254, + "grad_norm": 1.5418466180847037, + "learning_rate": 2.1578521923808712e-08, + "loss": 0.49146413803100586, + "step": 8131 + }, + { + "epoch": 1.8746974063400577, + "grad_norm": 1.7718176093322833, + "learning_rate": 2.1499836301529073e-08, + "loss": 0.5348008275032043, + "step": 8132 + }, + { + "epoch": 1.87492795389049, + "grad_norm": 1.8197773841806884, + "learning_rate": 2.1421292845144045e-08, + "loss": 0.5781833529472351, + "step": 8133 + }, + { + "epoch": 1.8751585014409222, + "grad_norm": 1.6712001358143913, + "learning_rate": 2.134289156606528e-08, + "loss": 0.4303954839706421, + "step": 8134 + }, + { + "epoch": 1.8753890489913545, + "grad_norm": 1.5346781500505091, + "learning_rate": 2.1264632475683665e-08, + "loss": 0.3662906289100647, + "step": 8135 + }, + { + "epoch": 1.8756195965417868, + "grad_norm": 1.8494759301054144, + "learning_rate": 2.1186515585369323e-08, + "loss": 0.4143645763397217, + "step": 8136 + }, + { + "epoch": 1.875850144092219, + "grad_norm": 1.6333327402319844, + "learning_rate": 2.110854090647185e-08, + "loss": 0.41202569007873535, + "step": 8137 + }, + { + "epoch": 1.8760806916426513, + "grad_norm": 1.8987185928487378, + "learning_rate": 2.1030708450320068e-08, + "loss": 0.4279648959636688, + "step": 8138 + }, + { + "epoch": 1.8763112391930836, + "grad_norm": 1.605664384270582, + "learning_rate": 2.0953018228222484e-08, + "loss": 0.4687902629375458, + "step": 8139 + }, + { + "epoch": 1.876541786743516, + "grad_norm": 1.7901855377352036, + "learning_rate": 2.0875470251466408e-08, + "loss": 0.44786232709884644, + "step": 8140 + }, + { + "epoch": 1.876772334293948, + "grad_norm": 1.5508099943894624, + "learning_rate": 2.0798064531319048e-08, + "loss": 0.43611055612564087, + "step": 8141 + }, + { + "epoch": 1.8770028818443802, + "grad_norm": 1.5918102453875247, + "learning_rate": 2.0720801079026407e-08, + "loss": 0.5804335474967957, + "step": 8142 + }, + { + "epoch": 1.8772334293948125, + "grad_norm": 1.450097927891864, + "learning_rate": 2.064367990581406e-08, + "loss": 0.3775164484977722, + "step": 8143 + }, + { + "epoch": 1.8774639769452448, + "grad_norm": 1.9129361746381695, + "learning_rate": 2.0566701022887044e-08, + "loss": 0.44318705797195435, + "step": 8144 + }, + { + "epoch": 1.877694524495677, + "grad_norm": 1.6495228346572506, + "learning_rate": 2.0489864441429526e-08, + "loss": 0.4423883557319641, + "step": 8145 + }, + { + "epoch": 1.8779250720461094, + "grad_norm": 2.010247138726775, + "learning_rate": 2.0413170172605022e-08, + "loss": 0.513456404209137, + "step": 8146 + }, + { + "epoch": 1.8781556195965416, + "grad_norm": 1.492461163153598, + "learning_rate": 2.0336618227556502e-08, + "loss": 0.37731099128723145, + "step": 8147 + }, + { + "epoch": 1.878386167146974, + "grad_norm": 1.85946715978495, + "learning_rate": 2.026020861740607e-08, + "loss": 0.5168889760971069, + "step": 8148 + }, + { + "epoch": 1.8786167146974062, + "grad_norm": 1.7236419764627888, + "learning_rate": 2.0183941353255407e-08, + "loss": 0.4313003122806549, + "step": 8149 + }, + { + "epoch": 1.8788472622478385, + "grad_norm": 1.6137221955666339, + "learning_rate": 2.010781644618509e-08, + "loss": 0.4891592562198639, + "step": 8150 + }, + { + "epoch": 1.8790778097982708, + "grad_norm": 1.5371431093905303, + "learning_rate": 2.003183390725549e-08, + "loss": 0.4526386260986328, + "step": 8151 + }, + { + "epoch": 1.879308357348703, + "grad_norm": 1.7453687827090651, + "learning_rate": 1.9955993747506005e-08, + "loss": 0.41896072030067444, + "step": 8152 + }, + { + "epoch": 1.8795389048991353, + "grad_norm": 1.3994731816960104, + "learning_rate": 1.9880295977955486e-08, + "loss": 0.4495571255683899, + "step": 8153 + }, + { + "epoch": 1.8797694524495676, + "grad_norm": 1.56608977367896, + "learning_rate": 1.980474060960191e-08, + "loss": 0.45853012800216675, + "step": 8154 + }, + { + "epoch": 1.88, + "grad_norm": 1.7029949485801334, + "learning_rate": 1.9729327653422834e-08, + "loss": 0.45541200041770935, + "step": 8155 + }, + { + "epoch": 1.8802305475504322, + "grad_norm": 1.6854487376632121, + "learning_rate": 1.9654057120374824e-08, + "loss": 0.48467034101486206, + "step": 8156 + }, + { + "epoch": 1.8804610951008645, + "grad_norm": 1.435646075094146, + "learning_rate": 1.957892902139402e-08, + "loss": 0.40592968463897705, + "step": 8157 + }, + { + "epoch": 1.8806916426512967, + "grad_norm": 1.546237825496461, + "learning_rate": 1.9503943367395692e-08, + "loss": 0.5229415893554688, + "step": 8158 + }, + { + "epoch": 1.880922190201729, + "grad_norm": 1.692350015447075, + "learning_rate": 1.942910016927446e-08, + "loss": 0.4685397744178772, + "step": 8159 + }, + { + "epoch": 1.8811527377521613, + "grad_norm": 1.7486120551838806, + "learning_rate": 1.93543994379044e-08, + "loss": 0.48139676451683044, + "step": 8160 + }, + { + "epoch": 1.8813832853025936, + "grad_norm": 1.4835995524654266, + "learning_rate": 1.9279841184138613e-08, + "loss": 0.4402969479560852, + "step": 8161 + }, + { + "epoch": 1.8816138328530259, + "grad_norm": 1.787346384712089, + "learning_rate": 1.920542541880954e-08, + "loss": 0.4192197024822235, + "step": 8162 + }, + { + "epoch": 1.8818443804034581, + "grad_norm": 1.5358084843474231, + "learning_rate": 1.913115215272931e-08, + "loss": 0.38458961248397827, + "step": 8163 + }, + { + "epoch": 1.8820749279538904, + "grad_norm": 2.0291302973789547, + "learning_rate": 1.9057021396688856e-08, + "loss": 0.4597528278827667, + "step": 8164 + }, + { + "epoch": 1.8823054755043227, + "grad_norm": 1.7232445103457965, + "learning_rate": 1.898303316145866e-08, + "loss": 0.5111892223358154, + "step": 8165 + }, + { + "epoch": 1.882536023054755, + "grad_norm": 1.7242418431244608, + "learning_rate": 1.8909187457788357e-08, + "loss": 0.406166672706604, + "step": 8166 + }, + { + "epoch": 1.8827665706051873, + "grad_norm": 2.128090849602097, + "learning_rate": 1.8835484296407134e-08, + "loss": 0.5733405947685242, + "step": 8167 + }, + { + "epoch": 1.8829971181556195, + "grad_norm": 1.8251609057771778, + "learning_rate": 1.8761923688023096e-08, + "loss": 0.4798845648765564, + "step": 8168 + }, + { + "epoch": 1.8832276657060518, + "grad_norm": 1.6647247641023675, + "learning_rate": 1.8688505643323916e-08, + "loss": 0.5291308164596558, + "step": 8169 + }, + { + "epoch": 1.883458213256484, + "grad_norm": 2.119330206077609, + "learning_rate": 1.8615230172976505e-08, + "loss": 0.5666407346725464, + "step": 8170 + }, + { + "epoch": 1.8836887608069164, + "grad_norm": 1.4319363819920743, + "learning_rate": 1.8542097287627123e-08, + "loss": 0.3907548785209656, + "step": 8171 + }, + { + "epoch": 1.8839193083573487, + "grad_norm": 1.6951882671978693, + "learning_rate": 1.846910699790094e-08, + "loss": 0.4055211544036865, + "step": 8172 + }, + { + "epoch": 1.884149855907781, + "grad_norm": 1.7439765908427145, + "learning_rate": 1.8396259314402918e-08, + "loss": 0.5032040476799011, + "step": 8173 + }, + { + "epoch": 1.8843804034582132, + "grad_norm": 1.6673926968240602, + "learning_rate": 1.832355424771703e-08, + "loss": 0.44346821308135986, + "step": 8174 + }, + { + "epoch": 1.8846109510086455, + "grad_norm": 1.727405604744471, + "learning_rate": 1.82509918084065e-08, + "loss": 0.49914759397506714, + "step": 8175 + }, + { + "epoch": 1.8848414985590778, + "grad_norm": 1.9751386726315723, + "learning_rate": 1.8178572007014005e-08, + "loss": 0.5221455693244934, + "step": 8176 + }, + { + "epoch": 1.88507204610951, + "grad_norm": 1.510541508033986, + "learning_rate": 1.810629485406112e-08, + "loss": 0.42171233892440796, + "step": 8177 + }, + { + "epoch": 1.8853025936599423, + "grad_norm": 1.6261053990647354, + "learning_rate": 1.8034160360049234e-08, + "loss": 0.36800506711006165, + "step": 8178 + }, + { + "epoch": 1.8855331412103746, + "grad_norm": 2.0110866169235972, + "learning_rate": 1.7962168535458842e-08, + "loss": 0.44831427931785583, + "step": 8179 + }, + { + "epoch": 1.885763688760807, + "grad_norm": 1.8264839989305564, + "learning_rate": 1.7890319390749255e-08, + "loss": 0.39368799328804016, + "step": 8180 + }, + { + "epoch": 1.8859942363112392, + "grad_norm": 1.8410759547301747, + "learning_rate": 1.7818612936359666e-08, + "loss": 0.5152736902236938, + "step": 8181 + }, + { + "epoch": 1.8862247838616715, + "grad_norm": 1.7682123665451497, + "learning_rate": 1.7747049182708086e-08, + "loss": 0.43120649456977844, + "step": 8182 + }, + { + "epoch": 1.8864553314121038, + "grad_norm": 1.5438355501907768, + "learning_rate": 1.767562814019208e-08, + "loss": 0.4215066432952881, + "step": 8183 + }, + { + "epoch": 1.886685878962536, + "grad_norm": 1.4643158724856222, + "learning_rate": 1.760434981918846e-08, + "loss": 0.44176608324050903, + "step": 8184 + }, + { + "epoch": 1.8869164265129683, + "grad_norm": 1.5750882708401341, + "learning_rate": 1.753321423005305e-08, + "loss": 0.46879637241363525, + "step": 8185 + }, + { + "epoch": 1.8871469740634006, + "grad_norm": 1.4540600241751436, + "learning_rate": 1.746222138312137e-08, + "loss": 0.42334824800491333, + "step": 8186 + }, + { + "epoch": 1.8873775216138329, + "grad_norm": 1.8563297655343165, + "learning_rate": 1.7391371288707712e-08, + "loss": 0.47822874784469604, + "step": 8187 + }, + { + "epoch": 1.8876080691642652, + "grad_norm": 1.4944491017270831, + "learning_rate": 1.7320663957105963e-08, + "loss": 0.42995691299438477, + "step": 8188 + }, + { + "epoch": 1.8878386167146974, + "grad_norm": 1.6297543723171652, + "learning_rate": 1.7250099398589125e-08, + "loss": 0.44044607877731323, + "step": 8189 + }, + { + "epoch": 1.8880691642651297, + "grad_norm": 1.6463599352675649, + "learning_rate": 1.717967762340944e-08, + "loss": 0.40334299206733704, + "step": 8190 + }, + { + "epoch": 1.888299711815562, + "grad_norm": 1.4705090673445491, + "learning_rate": 1.71093986417985e-08, + "loss": 0.4697923958301544, + "step": 8191 + }, + { + "epoch": 1.8885302593659943, + "grad_norm": 1.7306768855177799, + "learning_rate": 1.703926246396714e-08, + "loss": 0.5038257837295532, + "step": 8192 + }, + { + "epoch": 1.8887608069164266, + "grad_norm": 2.034955626144784, + "learning_rate": 1.6969269100105544e-08, + "loss": 0.5519133806228638, + "step": 8193 + }, + { + "epoch": 1.8889913544668588, + "grad_norm": 1.7878403739464412, + "learning_rate": 1.6899418560382796e-08, + "loss": 0.43426749110221863, + "step": 8194 + }, + { + "epoch": 1.8892219020172911, + "grad_norm": 1.609592778887584, + "learning_rate": 1.6829710854947553e-08, + "loss": 0.5156720280647278, + "step": 8195 + }, + { + "epoch": 1.8894524495677234, + "grad_norm": 1.9758568999656432, + "learning_rate": 1.6760145993927498e-08, + "loss": 0.47747790813446045, + "step": 8196 + }, + { + "epoch": 1.8896829971181557, + "grad_norm": 1.7451557727910063, + "learning_rate": 1.6690723987429877e-08, + "loss": 0.43909454345703125, + "step": 8197 + }, + { + "epoch": 1.889913544668588, + "grad_norm": 1.6052109954506801, + "learning_rate": 1.6621444845540845e-08, + "loss": 0.459448903799057, + "step": 8198 + }, + { + "epoch": 1.8901440922190202, + "grad_norm": 2.2071080011540185, + "learning_rate": 1.6552308578326125e-08, + "loss": 0.47015225887298584, + "step": 8199 + }, + { + "epoch": 1.8903746397694525, + "grad_norm": 1.713037489046331, + "learning_rate": 1.648331519583035e-08, + "loss": 0.5074043869972229, + "step": 8200 + }, + { + "epoch": 1.8906051873198848, + "grad_norm": 1.8682646944165644, + "learning_rate": 1.641446470807739e-08, + "loss": 0.54727703332901, + "step": 8201 + }, + { + "epoch": 1.890835734870317, + "grad_norm": 1.7945484902703406, + "learning_rate": 1.6345757125070802e-08, + "loss": 0.4788065552711487, + "step": 8202 + }, + { + "epoch": 1.8910662824207494, + "grad_norm": 1.5828500242044055, + "learning_rate": 1.6277192456792933e-08, + "loss": 0.4665883779525757, + "step": 8203 + }, + { + "epoch": 1.8912968299711816, + "grad_norm": 1.623464134473763, + "learning_rate": 1.6208770713205476e-08, + "loss": 0.47191154956817627, + "step": 8204 + }, + { + "epoch": 1.891527377521614, + "grad_norm": 1.7046539879820468, + "learning_rate": 1.6140491904249485e-08, + "loss": 0.48569393157958984, + "step": 8205 + }, + { + "epoch": 1.8917579250720462, + "grad_norm": 1.964953961808447, + "learning_rate": 1.6072356039845248e-08, + "loss": 0.4643250107765198, + "step": 8206 + }, + { + "epoch": 1.8919884726224785, + "grad_norm": 1.9677334911903335, + "learning_rate": 1.6004363129891952e-08, + "loss": 0.5228808522224426, + "step": 8207 + }, + { + "epoch": 1.8922190201729108, + "grad_norm": 1.6695153529028328, + "learning_rate": 1.5936513184268473e-08, + "loss": 0.6013127565383911, + "step": 8208 + }, + { + "epoch": 1.892449567723343, + "grad_norm": 1.4893331292755958, + "learning_rate": 1.5868806212832485e-08, + "loss": 0.435358464717865, + "step": 8209 + }, + { + "epoch": 1.8926801152737753, + "grad_norm": 1.6150621839423676, + "learning_rate": 1.580124222542134e-08, + "loss": 0.4283503293991089, + "step": 8210 + }, + { + "epoch": 1.8929106628242076, + "grad_norm": 1.6969954577088078, + "learning_rate": 1.5733821231851297e-08, + "loss": 0.48960375785827637, + "step": 8211 + }, + { + "epoch": 1.89314121037464, + "grad_norm": 2.1552511560352308, + "learning_rate": 1.566654324191785e-08, + "loss": 0.4763854742050171, + "step": 8212 + }, + { + "epoch": 1.8933717579250722, + "grad_norm": 1.6589761188821655, + "learning_rate": 1.5599408265395964e-08, + "loss": 0.4161483645439148, + "step": 8213 + }, + { + "epoch": 1.8936023054755045, + "grad_norm": 1.4659774004562973, + "learning_rate": 1.5532416312039387e-08, + "loss": 0.49362367391586304, + "step": 8214 + }, + { + "epoch": 1.8938328530259367, + "grad_norm": 1.5963069795498221, + "learning_rate": 1.5465567391581557e-08, + "loss": 0.4685269892215729, + "step": 8215 + }, + { + "epoch": 1.894063400576369, + "grad_norm": 1.6900259640459938, + "learning_rate": 1.539886151373493e-08, + "loss": 0.41390180587768555, + "step": 8216 + }, + { + "epoch": 1.8942939481268013, + "grad_norm": 1.6700784942712323, + "learning_rate": 1.5332298688191082e-08, + "loss": 0.4230450391769409, + "step": 8217 + }, + { + "epoch": 1.8945244956772336, + "grad_norm": 1.8268833435784397, + "learning_rate": 1.5265878924621056e-08, + "loss": 0.48427850008010864, + "step": 8218 + }, + { + "epoch": 1.8947550432276659, + "grad_norm": 1.4475419458342855, + "learning_rate": 1.5199602232674692e-08, + "loss": 0.5288915634155273, + "step": 8219 + }, + { + "epoch": 1.8949855907780981, + "grad_norm": 1.6861602943756362, + "learning_rate": 1.5133468621981505e-08, + "loss": 0.4539833664894104, + "step": 8220 + }, + { + "epoch": 1.8952161383285304, + "grad_norm": 2.50047407355109, + "learning_rate": 1.5067478102149922e-08, + "loss": 0.4823256731033325, + "step": 8221 + }, + { + "epoch": 1.8954466858789627, + "grad_norm": 1.997599510564075, + "learning_rate": 1.5001630682767718e-08, + "loss": 0.48464787006378174, + "step": 8222 + }, + { + "epoch": 1.8956772334293948, + "grad_norm": 1.5490561236499911, + "learning_rate": 1.4935926373401907e-08, + "loss": 0.49926498532295227, + "step": 8223 + }, + { + "epoch": 1.895907780979827, + "grad_norm": 1.7495479454026852, + "learning_rate": 1.4870365183598632e-08, + "loss": 0.4835718870162964, + "step": 8224 + }, + { + "epoch": 1.8961383285302593, + "grad_norm": 1.7245680541797082, + "learning_rate": 1.4804947122883049e-08, + "loss": 0.5158127546310425, + "step": 8225 + }, + { + "epoch": 1.8963688760806916, + "grad_norm": 1.5787923396995764, + "learning_rate": 1.473967220076e-08, + "loss": 0.45827725529670715, + "step": 8226 + }, + { + "epoch": 1.8965994236311239, + "grad_norm": 1.6342570379037924, + "learning_rate": 1.4674540426713012e-08, + "loss": 0.4177684187889099, + "step": 8227 + }, + { + "epoch": 1.8968299711815562, + "grad_norm": 1.7374508298892117, + "learning_rate": 1.4609551810205178e-08, + "loss": 0.4626643657684326, + "step": 8228 + }, + { + "epoch": 1.8970605187319884, + "grad_norm": 1.4056791598602973, + "learning_rate": 1.4544706360678616e-08, + "loss": 0.424638569355011, + "step": 8229 + }, + { + "epoch": 1.8972910662824207, + "grad_norm": 1.4685379441322668, + "learning_rate": 1.4480004087554898e-08, + "loss": 0.4635809659957886, + "step": 8230 + }, + { + "epoch": 1.897521613832853, + "grad_norm": 1.75143293227431, + "learning_rate": 1.4415445000234282e-08, + "loss": 0.4738515317440033, + "step": 8231 + }, + { + "epoch": 1.8977521613832853, + "grad_norm": 1.5264824829484198, + "learning_rate": 1.4351029108096713e-08, + "loss": 0.4863637089729309, + "step": 8232 + }, + { + "epoch": 1.8979827089337176, + "grad_norm": 1.5399019801695686, + "learning_rate": 1.4286756420501034e-08, + "loss": 0.5584152340888977, + "step": 8233 + }, + { + "epoch": 1.8982132564841498, + "grad_norm": 1.496901447925544, + "learning_rate": 1.4222626946785666e-08, + "loss": 0.4283461570739746, + "step": 8234 + }, + { + "epoch": 1.8984438040345821, + "grad_norm": 1.8147365486613847, + "learning_rate": 1.4158640696267598e-08, + "loss": 0.49863070249557495, + "step": 8235 + }, + { + "epoch": 1.8986743515850144, + "grad_norm": 1.6662108192886154, + "learning_rate": 1.409479767824362e-08, + "loss": 0.38217055797576904, + "step": 8236 + }, + { + "epoch": 1.8989048991354467, + "grad_norm": 1.6967046835869566, + "learning_rate": 1.4031097901989308e-08, + "loss": 0.4688405394554138, + "step": 8237 + }, + { + "epoch": 1.899135446685879, + "grad_norm": 1.9034545453492508, + "learning_rate": 1.3967541376759706e-08, + "loss": 0.5327590703964233, + "step": 8238 + }, + { + "epoch": 1.8993659942363113, + "grad_norm": 1.8446265826303438, + "learning_rate": 1.3904128111788872e-08, + "loss": 0.49749255180358887, + "step": 8239 + }, + { + "epoch": 1.8995965417867435, + "grad_norm": 1.7179092731540313, + "learning_rate": 1.3840858116289988e-08, + "loss": 0.5315423011779785, + "step": 8240 + }, + { + "epoch": 1.8998270893371758, + "grad_norm": 1.5737385318700703, + "learning_rate": 1.3777731399455594e-08, + "loss": 0.4651438593864441, + "step": 8241 + }, + { + "epoch": 1.900057636887608, + "grad_norm": 1.7144081952703678, + "learning_rate": 1.3714747970457352e-08, + "loss": 0.323408842086792, + "step": 8242 + }, + { + "epoch": 1.9002881844380404, + "grad_norm": 1.9581034490172329, + "learning_rate": 1.3651907838446275e-08, + "loss": 0.5402773022651672, + "step": 8243 + }, + { + "epoch": 1.9005187319884727, + "grad_norm": 1.670746605340447, + "learning_rate": 1.358921101255206e-08, + "loss": 0.5026879906654358, + "step": 8244 + }, + { + "epoch": 1.900749279538905, + "grad_norm": 1.7720812388937348, + "learning_rate": 1.3526657501884087e-08, + "loss": 0.5281597375869751, + "step": 8245 + }, + { + "epoch": 1.9009798270893372, + "grad_norm": 1.6385593343590161, + "learning_rate": 1.3464247315530642e-08, + "loss": 0.412728488445282, + "step": 8246 + }, + { + "epoch": 1.9012103746397695, + "grad_norm": 1.7237289199424797, + "learning_rate": 1.340198046255947e-08, + "loss": 0.4562723636627197, + "step": 8247 + }, + { + "epoch": 1.9014409221902018, + "grad_norm": 1.626201065698003, + "learning_rate": 1.3339856952017115e-08, + "loss": 0.39775967597961426, + "step": 8248 + }, + { + "epoch": 1.901671469740634, + "grad_norm": 1.6533133902767987, + "learning_rate": 1.3277876792929466e-08, + "loss": 0.39823421835899353, + "step": 8249 + }, + { + "epoch": 1.9019020172910661, + "grad_norm": 1.4228259330908177, + "learning_rate": 1.3216039994301765e-08, + "loss": 0.3946484923362732, + "step": 8250 + }, + { + "epoch": 1.9021325648414984, + "grad_norm": 2.068299680905734, + "learning_rate": 1.3154346565118046e-08, + "loss": 0.46879899501800537, + "step": 8251 + }, + { + "epoch": 1.9023631123919307, + "grad_norm": 1.6777738143005605, + "learning_rate": 1.3092796514341808e-08, + "loss": 0.4379505515098572, + "step": 8252 + }, + { + "epoch": 1.902593659942363, + "grad_norm": 1.5749007782930142, + "learning_rate": 1.3031389850915674e-08, + "loss": 0.39895427227020264, + "step": 8253 + }, + { + "epoch": 1.9028242074927952, + "grad_norm": 1.495609087075131, + "learning_rate": 1.2970126583761287e-08, + "loss": 0.4997497797012329, + "step": 8254 + }, + { + "epoch": 1.9030547550432275, + "grad_norm": 1.5959222152903147, + "learning_rate": 1.2909006721779858e-08, + "loss": 0.4764189124107361, + "step": 8255 + }, + { + "epoch": 1.9032853025936598, + "grad_norm": 1.5708268780308896, + "learning_rate": 1.2848030273851062e-08, + "loss": 0.42075514793395996, + "step": 8256 + }, + { + "epoch": 1.903515850144092, + "grad_norm": 2.1937828998511186, + "learning_rate": 1.278719724883437e-08, + "loss": 0.49138063192367554, + "step": 8257 + }, + { + "epoch": 1.9037463976945244, + "grad_norm": 1.513683079655307, + "learning_rate": 1.2726507655568264e-08, + "loss": 0.4185170531272888, + "step": 8258 + }, + { + "epoch": 1.9039769452449566, + "grad_norm": 1.412269470806139, + "learning_rate": 1.2665961502870026e-08, + "loss": 0.42299705743789673, + "step": 8259 + }, + { + "epoch": 1.904207492795389, + "grad_norm": 1.5645581808319793, + "learning_rate": 1.2605558799536508e-08, + "loss": 0.3970172703266144, + "step": 8260 + }, + { + "epoch": 1.9044380403458212, + "grad_norm": 1.8688835594223658, + "learning_rate": 1.2545299554343803e-08, + "loss": 0.48734235763549805, + "step": 8261 + }, + { + "epoch": 1.9046685878962535, + "grad_norm": 1.6788669457783065, + "learning_rate": 1.2485183776046793e-08, + "loss": 0.42422592639923096, + "step": 8262 + }, + { + "epoch": 1.9048991354466858, + "grad_norm": 1.5306993869763585, + "learning_rate": 1.2425211473379604e-08, + "loss": 0.43414005637168884, + "step": 8263 + }, + { + "epoch": 1.905129682997118, + "grad_norm": 1.657887750948753, + "learning_rate": 1.2365382655055601e-08, + "loss": 0.5316790342330933, + "step": 8264 + }, + { + "epoch": 1.9053602305475503, + "grad_norm": 1.9919373780449294, + "learning_rate": 1.2305697329767384e-08, + "loss": 0.517704963684082, + "step": 8265 + }, + { + "epoch": 1.9055907780979826, + "grad_norm": 1.671616617942954, + "learning_rate": 1.2246155506186572e-08, + "loss": 0.47799554467201233, + "step": 8266 + }, + { + "epoch": 1.9058213256484149, + "grad_norm": 1.5562617979556934, + "learning_rate": 1.2186757192963915e-08, + "loss": 0.5152599811553955, + "step": 8267 + }, + { + "epoch": 1.9060518731988472, + "grad_norm": 1.4688745064382045, + "learning_rate": 1.2127502398729505e-08, + "loss": 0.46499788761138916, + "step": 8268 + }, + { + "epoch": 1.9062824207492794, + "grad_norm": 1.8146565107608943, + "learning_rate": 1.2068391132092348e-08, + "loss": 0.5220280885696411, + "step": 8269 + }, + { + "epoch": 1.9065129682997117, + "grad_norm": 1.4497058250698829, + "learning_rate": 1.2009423401640684e-08, + "loss": 0.46435877680778503, + "step": 8270 + }, + { + "epoch": 1.906743515850144, + "grad_norm": 1.896368333082707, + "learning_rate": 1.1950599215941992e-08, + "loss": 0.5128264427185059, + "step": 8271 + }, + { + "epoch": 1.9069740634005763, + "grad_norm": 1.6162622871518073, + "learning_rate": 1.189191858354266e-08, + "loss": 0.39614224433898926, + "step": 8272 + }, + { + "epoch": 1.9072046109510086, + "grad_norm": 1.5987359191908617, + "learning_rate": 1.1833381512968422e-08, + "loss": 0.469489187002182, + "step": 8273 + }, + { + "epoch": 1.9074351585014409, + "grad_norm": 1.5090874127212548, + "learning_rate": 1.1774988012724363e-08, + "loss": 0.40567007660865784, + "step": 8274 + }, + { + "epoch": 1.9076657060518731, + "grad_norm": 1.8342116060768927, + "learning_rate": 1.1716738091294143e-08, + "loss": 0.47286513447761536, + "step": 8275 + }, + { + "epoch": 1.9078962536023054, + "grad_norm": 1.8677623698184516, + "learning_rate": 1.165863175714099e-08, + "loss": 0.4351140856742859, + "step": 8276 + }, + { + "epoch": 1.9081268011527377, + "grad_norm": 2.1204578995330823, + "learning_rate": 1.1600669018707043e-08, + "loss": 0.5400139093399048, + "step": 8277 + }, + { + "epoch": 1.90835734870317, + "grad_norm": 1.9666099328710733, + "learning_rate": 1.1542849884413897e-08, + "loss": 0.4617918133735657, + "step": 8278 + }, + { + "epoch": 1.9085878962536023, + "grad_norm": 1.7973383029247338, + "learning_rate": 1.1485174362661942e-08, + "loss": 0.4001161754131317, + "step": 8279 + }, + { + "epoch": 1.9088184438040345, + "grad_norm": 1.7749869657942918, + "learning_rate": 1.14276424618307e-08, + "loss": 0.4725416302680969, + "step": 8280 + }, + { + "epoch": 1.9090489913544668, + "grad_norm": 1.4489800751611797, + "learning_rate": 1.137025419027926e-08, + "loss": 0.43786871433258057, + "step": 8281 + }, + { + "epoch": 1.909279538904899, + "grad_norm": 1.6944845320829343, + "learning_rate": 1.1313009556345288e-08, + "loss": 0.4960116147994995, + "step": 8282 + }, + { + "epoch": 1.9095100864553314, + "grad_norm": 1.485040665380673, + "learning_rate": 1.1255908568345906e-08, + "loss": 0.46718698740005493, + "step": 8283 + }, + { + "epoch": 1.9097406340057637, + "grad_norm": 1.601866993353985, + "learning_rate": 1.119895123457737e-08, + "loss": 0.49856632947921753, + "step": 8284 + }, + { + "epoch": 1.909971181556196, + "grad_norm": 1.736808763671631, + "learning_rate": 1.1142137563314835e-08, + "loss": 0.44509345293045044, + "step": 8285 + }, + { + "epoch": 1.9102017291066282, + "grad_norm": 1.5155711234857085, + "learning_rate": 1.1085467562812812e-08, + "loss": 0.4501849412918091, + "step": 8286 + }, + { + "epoch": 1.9104322766570605, + "grad_norm": 1.864296209064819, + "learning_rate": 1.1028941241305046e-08, + "loss": 0.4580952823162079, + "step": 8287 + }, + { + "epoch": 1.9106628242074928, + "grad_norm": 1.820881586406844, + "learning_rate": 1.0972558607003968e-08, + "loss": 0.43742048740386963, + "step": 8288 + }, + { + "epoch": 1.910893371757925, + "grad_norm": 1.6142791937625995, + "learning_rate": 1.091631966810147e-08, + "loss": 0.41808924078941345, + "step": 8289 + }, + { + "epoch": 1.9111239193083573, + "grad_norm": 1.6208739211222591, + "learning_rate": 1.0860224432768462e-08, + "loss": 0.46481162309646606, + "step": 8290 + }, + { + "epoch": 1.9113544668587896, + "grad_norm": 2.011637000699936, + "learning_rate": 1.0804272909155087e-08, + "loss": 0.4603671431541443, + "step": 8291 + }, + { + "epoch": 1.911585014409222, + "grad_norm": 1.8494404636328983, + "learning_rate": 1.0748465105390403e-08, + "loss": 0.49557918310165405, + "step": 8292 + }, + { + "epoch": 1.9118155619596542, + "grad_norm": 2.002537998599403, + "learning_rate": 1.0692801029582809e-08, + "loss": 0.4407039284706116, + "step": 8293 + }, + { + "epoch": 1.9120461095100865, + "grad_norm": 1.7586281642289996, + "learning_rate": 1.0637280689819617e-08, + "loss": 0.5207707285881042, + "step": 8294 + }, + { + "epoch": 1.9122766570605187, + "grad_norm": 1.5038089673632116, + "learning_rate": 1.0581904094167483e-08, + "loss": 0.5129815340042114, + "step": 8295 + }, + { + "epoch": 1.912507204610951, + "grad_norm": 1.947553585968147, + "learning_rate": 1.0526671250671858e-08, + "loss": 0.39395007491111755, + "step": 8296 + }, + { + "epoch": 1.9127377521613833, + "grad_norm": 1.6292888672697963, + "learning_rate": 1.0471582167357662e-08, + "loss": 0.5052094459533691, + "step": 8297 + }, + { + "epoch": 1.9129682997118156, + "grad_norm": 1.8639782484748226, + "learning_rate": 1.0416636852228822e-08, + "loss": 0.5339310169219971, + "step": 8298 + }, + { + "epoch": 1.9131988472622479, + "grad_norm": 1.6724915320609384, + "learning_rate": 1.0361835313268064e-08, + "loss": 0.3916591703891754, + "step": 8299 + }, + { + "epoch": 1.9134293948126802, + "grad_norm": 1.6417155774632985, + "learning_rate": 1.0307177558437684e-08, + "loss": 0.43442589044570923, + "step": 8300 + }, + { + "epoch": 1.9136599423631124, + "grad_norm": 1.765418684368188, + "learning_rate": 1.0252663595678889e-08, + "loss": 0.48208218812942505, + "step": 8301 + }, + { + "epoch": 1.9138904899135447, + "grad_norm": 1.5550213363618528, + "learning_rate": 1.0198293432911898e-08, + "loss": 0.4363470673561096, + "step": 8302 + }, + { + "epoch": 1.914121037463977, + "grad_norm": 1.468689516141858, + "learning_rate": 1.0144067078036167e-08, + "loss": 0.43322134017944336, + "step": 8303 + }, + { + "epoch": 1.9143515850144093, + "grad_norm": 1.559621305722424, + "learning_rate": 1.0089984538930173e-08, + "loss": 0.432983934879303, + "step": 8304 + }, + { + "epoch": 1.9145821325648416, + "grad_norm": 1.8386361285939403, + "learning_rate": 1.0036045823451634e-08, + "loss": 0.4156948924064636, + "step": 8305 + }, + { + "epoch": 1.9148126801152738, + "grad_norm": 1.6606703244215435, + "learning_rate": 9.982250939437275e-09, + "loss": 0.4155515432357788, + "step": 8306 + }, + { + "epoch": 1.9150432276657061, + "grad_norm": 1.7059318829970815, + "learning_rate": 9.928599894702961e-09, + "loss": 0.47653689980506897, + "step": 8307 + }, + { + "epoch": 1.9152737752161384, + "grad_norm": 1.5705288067964687, + "learning_rate": 9.875092697043563e-09, + "loss": 0.45228058099746704, + "step": 8308 + }, + { + "epoch": 1.9155043227665707, + "grad_norm": 1.5823697184468708, + "learning_rate": 9.821729354232977e-09, + "loss": 0.4896412789821625, + "step": 8309 + }, + { + "epoch": 1.915734870317003, + "grad_norm": 1.7271525589204022, + "learning_rate": 9.768509874024556e-09, + "loss": 0.41047337651252747, + "step": 8310 + }, + { + "epoch": 1.9159654178674352, + "grad_norm": 1.6723293195859799, + "learning_rate": 9.715434264150557e-09, + "loss": 0.5180307626724243, + "step": 8311 + }, + { + "epoch": 1.9161959654178675, + "grad_norm": 1.7461325154516316, + "learning_rate": 9.662502532322147e-09, + "loss": 0.46440133452415466, + "step": 8312 + }, + { + "epoch": 1.9164265129682998, + "grad_norm": 1.6166489895049083, + "learning_rate": 9.609714686229952e-09, + "loss": 0.4066951274871826, + "step": 8313 + }, + { + "epoch": 1.916657060518732, + "grad_norm": 1.7089538604188352, + "learning_rate": 9.557070733543393e-09, + "loss": 0.4911927282810211, + "step": 8314 + }, + { + "epoch": 1.9168876080691644, + "grad_norm": 1.7069173470957535, + "learning_rate": 9.504570681910907e-09, + "loss": 0.5309121608734131, + "step": 8315 + }, + { + "epoch": 1.9171181556195966, + "grad_norm": 1.802146087409815, + "learning_rate": 9.452214538960501e-09, + "loss": 0.4592200517654419, + "step": 8316 + }, + { + "epoch": 1.917348703170029, + "grad_norm": 2.2439207686561886, + "learning_rate": 9.400002312298871e-09, + "loss": 0.44735923409461975, + "step": 8317 + }, + { + "epoch": 1.9175792507204612, + "grad_norm": 1.868917565152502, + "learning_rate": 9.347934009511837e-09, + "loss": 0.5140376091003418, + "step": 8318 + }, + { + "epoch": 1.9178097982708935, + "grad_norm": 1.4954089291718025, + "learning_rate": 9.29600963816446e-09, + "loss": 0.42626407742500305, + "step": 8319 + }, + { + "epoch": 1.9180403458213258, + "grad_norm": 1.5252659338318748, + "learning_rate": 9.244229205800813e-09, + "loss": 0.4173426628112793, + "step": 8320 + }, + { + "epoch": 1.918270893371758, + "grad_norm": 1.6423957945500776, + "learning_rate": 9.192592719943992e-09, + "loss": 0.41199928522109985, + "step": 8321 + }, + { + "epoch": 1.9185014409221903, + "grad_norm": 1.621590778725344, + "learning_rate": 9.14110018809644e-09, + "loss": 0.43618956208229065, + "step": 8322 + }, + { + "epoch": 1.9187319884726226, + "grad_norm": 1.7165545250310659, + "learning_rate": 9.089751617739172e-09, + "loss": 0.48510247468948364, + "step": 8323 + }, + { + "epoch": 1.9189625360230549, + "grad_norm": 2.19025289523308, + "learning_rate": 9.038547016332776e-09, + "loss": 0.46992772817611694, + "step": 8324 + }, + { + "epoch": 1.9191930835734872, + "grad_norm": 1.5553353507324814, + "learning_rate": 8.987486391316745e-09, + "loss": 0.4461814761161804, + "step": 8325 + }, + { + "epoch": 1.9194236311239194, + "grad_norm": 1.6004360220081093, + "learning_rate": 8.936569750109701e-09, + "loss": 0.4462101459503174, + "step": 8326 + }, + { + "epoch": 1.9196541786743517, + "grad_norm": 1.9035892190993247, + "learning_rate": 8.885797100109283e-09, + "loss": 0.5376943945884705, + "step": 8327 + }, + { + "epoch": 1.919884726224784, + "grad_norm": 1.4622106701043842, + "learning_rate": 8.835168448692032e-09, + "loss": 0.4294065237045288, + "step": 8328 + }, + { + "epoch": 1.9201152737752163, + "grad_norm": 1.5772985070440868, + "learning_rate": 8.784683803214066e-09, + "loss": 0.4223681688308716, + "step": 8329 + }, + { + "epoch": 1.9203458213256486, + "grad_norm": 1.673718906169722, + "learning_rate": 8.73434317100996e-09, + "loss": 0.5321441888809204, + "step": 8330 + }, + { + "epoch": 1.9205763688760809, + "grad_norm": 1.47290212215442, + "learning_rate": 8.684146559393979e-09, + "loss": 0.4908212423324585, + "step": 8331 + }, + { + "epoch": 1.9208069164265131, + "grad_norm": 1.5252367208444346, + "learning_rate": 8.634093975659062e-09, + "loss": 0.48342543840408325, + "step": 8332 + }, + { + "epoch": 1.9210374639769452, + "grad_norm": 1.584338687628807, + "learning_rate": 8.584185427077285e-09, + "loss": 0.5173824429512024, + "step": 8333 + }, + { + "epoch": 1.9212680115273775, + "grad_norm": 1.3721837054005588, + "learning_rate": 8.534420920899844e-09, + "loss": 0.42781007289886475, + "step": 8334 + }, + { + "epoch": 1.9214985590778098, + "grad_norm": 1.7522004684189494, + "learning_rate": 8.484800464357067e-09, + "loss": 0.48095422983169556, + "step": 8335 + }, + { + "epoch": 1.921729106628242, + "grad_norm": 1.486479647979893, + "learning_rate": 8.4353240646583e-09, + "loss": 0.4701269865036011, + "step": 8336 + }, + { + "epoch": 1.9219596541786743, + "grad_norm": 1.5874139106024778, + "learning_rate": 8.385991728991903e-09, + "loss": 0.48512130975723267, + "step": 8337 + }, + { + "epoch": 1.9221902017291066, + "grad_norm": 1.6650791676703436, + "learning_rate": 8.336803464525255e-09, + "loss": 0.456530898809433, + "step": 8338 + }, + { + "epoch": 1.9224207492795389, + "grad_norm": 1.6568880053058186, + "learning_rate": 8.287759278405082e-09, + "loss": 0.48397719860076904, + "step": 8339 + }, + { + "epoch": 1.9226512968299712, + "grad_norm": 1.6643186904423333, + "learning_rate": 8.238859177756907e-09, + "loss": 0.5594744086265564, + "step": 8340 + }, + { + "epoch": 1.9228818443804034, + "grad_norm": 1.771066054203885, + "learning_rate": 8.190103169685269e-09, + "loss": 0.4647720754146576, + "step": 8341 + }, + { + "epoch": 1.9231123919308357, + "grad_norm": 1.712221014293666, + "learning_rate": 8.141491261274169e-09, + "loss": 0.5003525018692017, + "step": 8342 + }, + { + "epoch": 1.923342939481268, + "grad_norm": 1.4924276842796294, + "learning_rate": 8.09302345958629e-09, + "loss": 0.5174271464347839, + "step": 8343 + }, + { + "epoch": 1.9235734870317003, + "grad_norm": 2.0224825185920197, + "learning_rate": 8.044699771663554e-09, + "loss": 0.4582705795764923, + "step": 8344 + }, + { + "epoch": 1.9238040345821326, + "grad_norm": 1.630234830459202, + "learning_rate": 7.99652020452679e-09, + "loss": 0.4875626564025879, + "step": 8345 + }, + { + "epoch": 1.9240345821325648, + "grad_norm": 1.607848466927164, + "learning_rate": 7.948484765175956e-09, + "loss": 0.45983967185020447, + "step": 8346 + }, + { + "epoch": 1.9242651296829971, + "grad_norm": 1.873627093178645, + "learning_rate": 7.900593460590133e-09, + "loss": 0.46369433403015137, + "step": 8347 + }, + { + "epoch": 1.9244956772334294, + "grad_norm": 1.840943880232746, + "learning_rate": 7.852846297727644e-09, + "loss": 0.4666575491428375, + "step": 8348 + }, + { + "epoch": 1.9247262247838617, + "grad_norm": 1.7631099459348156, + "learning_rate": 7.805243283525387e-09, + "loss": 0.39856386184692383, + "step": 8349 + }, + { + "epoch": 1.924956772334294, + "grad_norm": 1.5924965126506507, + "learning_rate": 7.757784424899716e-09, + "loss": 0.3968764543533325, + "step": 8350 + }, + { + "epoch": 1.9251873198847262, + "grad_norm": 1.5539586874837867, + "learning_rate": 7.710469728745895e-09, + "loss": 0.42590945959091187, + "step": 8351 + }, + { + "epoch": 1.9254178674351585, + "grad_norm": 1.8558131754812899, + "learning_rate": 7.6632992019382e-09, + "loss": 0.4320804476737976, + "step": 8352 + }, + { + "epoch": 1.9256484149855908, + "grad_norm": 1.373435252153639, + "learning_rate": 7.616272851330151e-09, + "loss": 0.42604368925094604, + "step": 8353 + }, + { + "epoch": 1.925878962536023, + "grad_norm": 1.5219509394941837, + "learning_rate": 7.569390683753951e-09, + "loss": 0.46070748567581177, + "step": 8354 + }, + { + "epoch": 1.9261095100864554, + "grad_norm": 1.9609201609900513, + "learning_rate": 7.52265270602126e-09, + "loss": 0.5836024284362793, + "step": 8355 + }, + { + "epoch": 1.9263400576368876, + "grad_norm": 1.8423197030839495, + "learning_rate": 7.476058924922645e-09, + "loss": 0.4969649910926819, + "step": 8356 + }, + { + "epoch": 1.92657060518732, + "grad_norm": 1.597412550113596, + "learning_rate": 7.429609347227694e-09, + "loss": 0.5294152498245239, + "step": 8357 + }, + { + "epoch": 1.9268011527377522, + "grad_norm": 1.8991873203876128, + "learning_rate": 7.383303979684896e-09, + "loss": 0.47974759340286255, + "step": 8358 + }, + { + "epoch": 1.9270317002881845, + "grad_norm": 1.7244962769819776, + "learning_rate": 7.337142829022202e-09, + "loss": 0.5180144309997559, + "step": 8359 + }, + { + "epoch": 1.9272622478386165, + "grad_norm": 1.8763071699681506, + "learning_rate": 7.291125901946027e-09, + "loss": 0.4036504924297333, + "step": 8360 + }, + { + "epoch": 1.9274927953890488, + "grad_norm": 1.74498653425893, + "learning_rate": 7.2452532051423546e-09, + "loss": 0.4952111840248108, + "step": 8361 + }, + { + "epoch": 1.927723342939481, + "grad_norm": 1.9182000125605638, + "learning_rate": 7.199524745275965e-09, + "loss": 0.43847912549972534, + "step": 8362 + }, + { + "epoch": 1.9279538904899134, + "grad_norm": 1.480848731477337, + "learning_rate": 7.153940528990765e-09, + "loss": 0.42722171545028687, + "step": 8363 + }, + { + "epoch": 1.9281844380403457, + "grad_norm": 1.496577307338104, + "learning_rate": 7.10850056290968e-09, + "loss": 0.44882699847221375, + "step": 8364 + }, + { + "epoch": 1.928414985590778, + "grad_norm": 1.8191818061281437, + "learning_rate": 7.0632048536345415e-09, + "loss": 0.5153505206108093, + "step": 8365 + }, + { + "epoch": 1.9286455331412102, + "grad_norm": 1.6967369186549641, + "learning_rate": 7.018053407746416e-09, + "loss": 0.5489900708198547, + "step": 8366 + }, + { + "epoch": 1.9288760806916425, + "grad_norm": 1.6353777624372818, + "learning_rate": 6.97304623180539e-09, + "loss": 0.4193941354751587, + "step": 8367 + }, + { + "epoch": 1.9291066282420748, + "grad_norm": 1.623882816413383, + "learning_rate": 6.928183332350346e-09, + "loss": 0.4031536281108856, + "step": 8368 + }, + { + "epoch": 1.929337175792507, + "grad_norm": 1.594539749841058, + "learning_rate": 6.883464715899734e-09, + "loss": 0.4182126522064209, + "step": 8369 + }, + { + "epoch": 1.9295677233429394, + "grad_norm": 1.428635807002028, + "learning_rate": 6.838890388950469e-09, + "loss": 0.4714782238006592, + "step": 8370 + }, + { + "epoch": 1.9297982708933716, + "grad_norm": 1.8277761014318794, + "learning_rate": 6.7944603579787044e-09, + "loss": 0.47345131635665894, + "step": 8371 + }, + { + "epoch": 1.930028818443804, + "grad_norm": 1.6599936080596263, + "learning_rate": 6.750174629439831e-09, + "loss": 0.5616276264190674, + "step": 8372 + }, + { + "epoch": 1.9302593659942362, + "grad_norm": 1.7317139547020615, + "learning_rate": 6.706033209767925e-09, + "loss": 0.544796347618103, + "step": 8373 + }, + { + "epoch": 1.9304899135446685, + "grad_norm": 1.5033990065109937, + "learning_rate": 6.662036105376412e-09, + "loss": 0.5631225109100342, + "step": 8374 + }, + { + "epoch": 1.9307204610951008, + "grad_norm": 1.6075623646085158, + "learning_rate": 6.6181833226575116e-09, + "loss": 0.39414408802986145, + "step": 8375 + }, + { + "epoch": 1.930951008645533, + "grad_norm": 1.671437220717172, + "learning_rate": 6.574474867982793e-09, + "loss": 0.4659278392791748, + "step": 8376 + }, + { + "epoch": 1.9311815561959653, + "grad_norm": 1.4579597767228383, + "learning_rate": 6.5309107477022895e-09, + "loss": 0.423857718706131, + "step": 8377 + }, + { + "epoch": 1.9314121037463976, + "grad_norm": 1.5793968032935768, + "learning_rate": 6.4874909681457145e-09, + "loss": 0.47983941435813904, + "step": 8378 + }, + { + "epoch": 1.9316426512968299, + "grad_norm": 1.6183823258999779, + "learning_rate": 6.444215535621245e-09, + "loss": 0.36726903915405273, + "step": 8379 + }, + { + "epoch": 1.9318731988472622, + "grad_norm": 1.718540594241041, + "learning_rate": 6.401084456416628e-09, + "loss": 0.5433714985847473, + "step": 8380 + }, + { + "epoch": 1.9321037463976944, + "grad_norm": 1.5160106606022126, + "learning_rate": 6.358097736798295e-09, + "loss": 0.4377497434616089, + "step": 8381 + }, + { + "epoch": 1.9323342939481267, + "grad_norm": 1.778623844156025, + "learning_rate": 6.3152553830115864e-09, + "loss": 0.5426797270774841, + "step": 8382 + }, + { + "epoch": 1.932564841498559, + "grad_norm": 1.4848182166015895, + "learning_rate": 6.2725574012812975e-09, + "loss": 0.3502352237701416, + "step": 8383 + }, + { + "epoch": 1.9327953890489913, + "grad_norm": 1.747637938738868, + "learning_rate": 6.23000379781069e-09, + "loss": 0.4589088559150696, + "step": 8384 + }, + { + "epoch": 1.9330259365994236, + "grad_norm": 1.6442032368895256, + "learning_rate": 6.187594578782707e-09, + "loss": 0.5089824795722961, + "step": 8385 + }, + { + "epoch": 1.9332564841498558, + "grad_norm": 1.6208238067165452, + "learning_rate": 6.145329750358752e-09, + "loss": 0.3649140000343323, + "step": 8386 + }, + { + "epoch": 1.9334870317002881, + "grad_norm": 1.4952107950779492, + "learning_rate": 6.103209318679469e-09, + "loss": 0.3923742175102234, + "step": 8387 + }, + { + "epoch": 1.9337175792507204, + "grad_norm": 1.7006901623122679, + "learning_rate": 6.061233289864632e-09, + "loss": 0.4399063289165497, + "step": 8388 + }, + { + "epoch": 1.9339481268011527, + "grad_norm": 1.4843303104270151, + "learning_rate": 6.0194016700129134e-09, + "loss": 0.4042898416519165, + "step": 8389 + }, + { + "epoch": 1.934178674351585, + "grad_norm": 1.5429865198164772, + "learning_rate": 5.9777144652018994e-09, + "loss": 0.4660540521144867, + "step": 8390 + }, + { + "epoch": 1.9344092219020173, + "grad_norm": 1.6674342548655874, + "learning_rate": 5.9361716814883e-09, + "loss": 0.5535542964935303, + "step": 8391 + }, + { + "epoch": 1.9346397694524495, + "grad_norm": 1.703428306689839, + "learning_rate": 5.894773324907953e-09, + "loss": 0.4237617254257202, + "step": 8392 + }, + { + "epoch": 1.9348703170028818, + "grad_norm": 1.8224589830441662, + "learning_rate": 5.853519401475604e-09, + "loss": 0.5574471950531006, + "step": 8393 + }, + { + "epoch": 1.935100864553314, + "grad_norm": 2.049918989905908, + "learning_rate": 5.812409917185012e-09, + "loss": 0.48153114318847656, + "step": 8394 + }, + { + "epoch": 1.9353314121037464, + "grad_norm": 1.6430356369587549, + "learning_rate": 5.771444878008846e-09, + "loss": 0.523391604423523, + "step": 8395 + }, + { + "epoch": 1.9355619596541787, + "grad_norm": 1.5802726481699332, + "learning_rate": 5.730624289899122e-09, + "loss": 0.4961997866630554, + "step": 8396 + }, + { + "epoch": 1.935792507204611, + "grad_norm": 1.5755062335679042, + "learning_rate": 5.6899481587863174e-09, + "loss": 0.4530646800994873, + "step": 8397 + }, + { + "epoch": 1.9360230547550432, + "grad_norm": 1.799741209476833, + "learning_rate": 5.649416490580594e-09, + "loss": 0.4895196855068207, + "step": 8398 + }, + { + "epoch": 1.9362536023054755, + "grad_norm": 1.3496435901328925, + "learning_rate": 5.609029291170575e-09, + "loss": 0.4179130792617798, + "step": 8399 + }, + { + "epoch": 1.9364841498559078, + "grad_norm": 1.5676225979121055, + "learning_rate": 5.568786566424122e-09, + "loss": 0.5279220342636108, + "step": 8400 + }, + { + "epoch": 1.93671469740634, + "grad_norm": 1.7613704417043121, + "learning_rate": 5.528688322188224e-09, + "loss": 0.5009425282478333, + "step": 8401 + }, + { + "epoch": 1.9369452449567723, + "grad_norm": 1.964452384404303, + "learning_rate": 5.488734564288555e-09, + "loss": 0.4320024847984314, + "step": 8402 + }, + { + "epoch": 1.9371757925072046, + "grad_norm": 2.2196156524988764, + "learning_rate": 5.448925298530027e-09, + "loss": 0.6183265447616577, + "step": 8403 + }, + { + "epoch": 1.937406340057637, + "grad_norm": 1.6120974046128558, + "learning_rate": 5.40926053069668e-09, + "loss": 0.5422444343566895, + "step": 8404 + }, + { + "epoch": 1.9376368876080692, + "grad_norm": 1.7196795497330961, + "learning_rate": 5.369740266551126e-09, + "loss": 0.4645472466945648, + "step": 8405 + }, + { + "epoch": 1.9378674351585015, + "grad_norm": 1.4249047377418558, + "learning_rate": 5.330364511835439e-09, + "loss": 0.3970335125923157, + "step": 8406 + }, + { + "epoch": 1.9380979827089337, + "grad_norm": 1.8953829064382313, + "learning_rate": 5.291133272270376e-09, + "loss": 0.5468182563781738, + "step": 8407 + }, + { + "epoch": 1.938328530259366, + "grad_norm": 1.865719573844204, + "learning_rate": 5.252046553556044e-09, + "loss": 0.5717053413391113, + "step": 8408 + }, + { + "epoch": 1.9385590778097983, + "grad_norm": 1.4715205331924197, + "learning_rate": 5.213104361371012e-09, + "loss": 0.5246316194534302, + "step": 8409 + }, + { + "epoch": 1.9387896253602306, + "grad_norm": 1.7486117905905436, + "learning_rate": 5.174306701373421e-09, + "loss": 0.4892142415046692, + "step": 8410 + }, + { + "epoch": 1.9390201729106629, + "grad_norm": 1.7044045562562955, + "learning_rate": 5.135653579200094e-09, + "loss": 0.46718069911003113, + "step": 8411 + }, + { + "epoch": 1.9392507204610951, + "grad_norm": 1.678083637284962, + "learning_rate": 5.097145000466985e-09, + "loss": 0.48289304971694946, + "step": 8412 + }, + { + "epoch": 1.9394812680115274, + "grad_norm": 1.589487272584262, + "learning_rate": 5.05878097076895e-09, + "loss": 0.4352980852127075, + "step": 8413 + }, + { + "epoch": 1.9397118155619597, + "grad_norm": 1.5324614271383616, + "learning_rate": 5.020561495679865e-09, + "loss": 0.4404779076576233, + "step": 8414 + }, + { + "epoch": 1.939942363112392, + "grad_norm": 1.5122574500468116, + "learning_rate": 4.9824865807526205e-09, + "loss": 0.5069785714149475, + "step": 8415 + }, + { + "epoch": 1.9401729106628243, + "grad_norm": 1.7183195331173957, + "learning_rate": 4.944556231519015e-09, + "loss": 0.5377410650253296, + "step": 8416 + }, + { + "epoch": 1.9404034582132565, + "grad_norm": 1.6585204896545156, + "learning_rate": 4.9067704534901944e-09, + "loss": 0.44542694091796875, + "step": 8417 + }, + { + "epoch": 1.9406340057636888, + "grad_norm": 1.8607811753782286, + "learning_rate": 4.869129252155768e-09, + "loss": 0.3760349750518799, + "step": 8418 + }, + { + "epoch": 1.940864553314121, + "grad_norm": 1.8090313920756074, + "learning_rate": 4.831632632984695e-09, + "loss": 0.4890024960041046, + "step": 8419 + }, + { + "epoch": 1.9410951008645534, + "grad_norm": 1.5849870242815058, + "learning_rate": 4.794280601424949e-09, + "loss": 0.4114872217178345, + "step": 8420 + }, + { + "epoch": 1.9413256484149857, + "grad_norm": 1.650126186884988, + "learning_rate": 4.757073162903302e-09, + "loss": 0.4893375039100647, + "step": 8421 + }, + { + "epoch": 1.941556195965418, + "grad_norm": 1.6807297976206002, + "learning_rate": 4.7200103228255405e-09, + "loss": 0.4684467911720276, + "step": 8422 + }, + { + "epoch": 1.9417867435158502, + "grad_norm": 1.67464763167606, + "learning_rate": 4.68309208657669e-09, + "loss": 0.44905227422714233, + "step": 8423 + }, + { + "epoch": 1.9420172910662825, + "grad_norm": 1.7633620797202647, + "learning_rate": 4.646318459520349e-09, + "loss": 0.41259336471557617, + "step": 8424 + }, + { + "epoch": 1.9422478386167148, + "grad_norm": 1.5507547402556556, + "learning_rate": 4.6096894469996876e-09, + "loss": 0.43815112113952637, + "step": 8425 + }, + { + "epoch": 1.942478386167147, + "grad_norm": 1.5778990502154555, + "learning_rate": 4.573205054336115e-09, + "loss": 0.45738598704338074, + "step": 8426 + }, + { + "epoch": 1.9427089337175794, + "grad_norm": 1.5588092260341486, + "learning_rate": 4.536865286830727e-09, + "loss": 0.3894132971763611, + "step": 8427 + }, + { + "epoch": 1.9429394812680116, + "grad_norm": 1.623138474885205, + "learning_rate": 4.5006701497631864e-09, + "loss": 0.5066829919815063, + "step": 8428 + }, + { + "epoch": 1.943170028818444, + "grad_norm": 2.1057192715369077, + "learning_rate": 4.464619648392287e-09, + "loss": 0.5245810747146606, + "step": 8429 + }, + { + "epoch": 1.9434005763688762, + "grad_norm": 1.450252578239379, + "learning_rate": 4.42871378795584e-09, + "loss": 0.4634879231452942, + "step": 8430 + }, + { + "epoch": 1.9436311239193085, + "grad_norm": 1.6490696686605655, + "learning_rate": 4.3929525736705605e-09, + "loss": 0.5357192158699036, + "step": 8431 + }, + { + "epoch": 1.9438616714697408, + "grad_norm": 1.6299846504660511, + "learning_rate": 4.357336010732071e-09, + "loss": 0.4591226577758789, + "step": 8432 + }, + { + "epoch": 1.944092219020173, + "grad_norm": 1.5898360627427908, + "learning_rate": 4.321864104315343e-09, + "loss": 0.42836296558380127, + "step": 8433 + }, + { + "epoch": 1.9443227665706053, + "grad_norm": 1.6213619512908282, + "learning_rate": 4.286536859573919e-09, + "loss": 0.5093647241592407, + "step": 8434 + }, + { + "epoch": 1.9445533141210376, + "grad_norm": 1.9084911394042954, + "learning_rate": 4.251354281640473e-09, + "loss": 0.40465253591537476, + "step": 8435 + }, + { + "epoch": 1.9447838616714699, + "grad_norm": 1.7729160223942424, + "learning_rate": 4.2163163756265825e-09, + "loss": 0.46636664867401123, + "step": 8436 + }, + { + "epoch": 1.9450144092219022, + "grad_norm": 1.9152181663086612, + "learning_rate": 4.1814231466230645e-09, + "loss": 0.49786052107810974, + "step": 8437 + }, + { + "epoch": 1.9452449567723344, + "grad_norm": 1.6016583543312068, + "learning_rate": 4.146674599699418e-09, + "loss": 0.48284637928009033, + "step": 8438 + }, + { + "epoch": 1.9454755043227667, + "grad_norm": 1.3525514753444163, + "learning_rate": 4.112070739904272e-09, + "loss": 0.4034563899040222, + "step": 8439 + }, + { + "epoch": 1.945706051873199, + "grad_norm": 1.8628888292595736, + "learning_rate": 4.077611572265382e-09, + "loss": 0.5521979928016663, + "step": 8440 + }, + { + "epoch": 1.9459365994236313, + "grad_norm": 1.6868967423968648, + "learning_rate": 4.043297101789078e-09, + "loss": 0.4537632465362549, + "step": 8441 + }, + { + "epoch": 1.9461671469740636, + "grad_norm": 1.6850998569290199, + "learning_rate": 4.009127333460926e-09, + "loss": 0.5159620046615601, + "step": 8442 + }, + { + "epoch": 1.9463976945244956, + "grad_norm": 1.4228816698157278, + "learning_rate": 3.975102272245512e-09, + "loss": 0.46216505765914917, + "step": 8443 + }, + { + "epoch": 1.946628242074928, + "grad_norm": 1.929330228609917, + "learning_rate": 3.941221923086324e-09, + "loss": 0.5094718933105469, + "step": 8444 + }, + { + "epoch": 1.9468587896253602, + "grad_norm": 1.733746574616843, + "learning_rate": 3.907486290905759e-09, + "loss": 0.4824807941913605, + "step": 8445 + }, + { + "epoch": 1.9470893371757925, + "grad_norm": 1.4644111723489823, + "learning_rate": 3.873895380605341e-09, + "loss": 0.4324292838573456, + "step": 8446 + }, + { + "epoch": 1.9473198847262247, + "grad_norm": 1.8148293632262469, + "learning_rate": 3.8404491970653874e-09, + "loss": 0.5109740495681763, + "step": 8447 + }, + { + "epoch": 1.947550432276657, + "grad_norm": 1.915278078833533, + "learning_rate": 3.8071477451453445e-09, + "loss": 0.47580888867378235, + "step": 8448 + }, + { + "epoch": 1.9477809798270893, + "grad_norm": 1.582715092663918, + "learning_rate": 3.773991029683565e-09, + "loss": 0.4678313136100769, + "step": 8449 + }, + { + "epoch": 1.9480115273775216, + "grad_norm": 1.6330856951875405, + "learning_rate": 3.740979055497306e-09, + "loss": 0.46340861916542053, + "step": 8450 + }, + { + "epoch": 1.9482420749279539, + "grad_norm": 1.387966844064718, + "learning_rate": 3.7081118273829536e-09, + "loss": 0.43767350912094116, + "step": 8451 + }, + { + "epoch": 1.9484726224783862, + "grad_norm": 1.7100462700673766, + "learning_rate": 3.6753893501156873e-09, + "loss": 0.5428752303123474, + "step": 8452 + }, + { + "epoch": 1.9487031700288184, + "grad_norm": 1.47058628039416, + "learning_rate": 3.6428116284498157e-09, + "loss": 0.380368709564209, + "step": 8453 + }, + { + "epoch": 1.9489337175792507, + "grad_norm": 1.6255373517486364, + "learning_rate": 3.610378667118552e-09, + "loss": 0.4518704414367676, + "step": 8454 + }, + { + "epoch": 1.949164265129683, + "grad_norm": 1.9772184397093173, + "learning_rate": 3.5780904708340167e-09, + "loss": 0.5296592712402344, + "step": 8455 + }, + { + "epoch": 1.9493948126801153, + "grad_norm": 1.6560836592749282, + "learning_rate": 3.545947044287345e-09, + "loss": 0.4429657459259033, + "step": 8456 + }, + { + "epoch": 1.9496253602305476, + "grad_norm": 1.5997951231441618, + "learning_rate": 3.5139483921486913e-09, + "loss": 0.4942644238471985, + "step": 8457 + }, + { + "epoch": 1.9498559077809798, + "grad_norm": 1.514213668293002, + "learning_rate": 3.4820945190671138e-09, + "loss": 0.4621245861053467, + "step": 8458 + }, + { + "epoch": 1.9500864553314121, + "grad_norm": 1.663238270461513, + "learning_rate": 3.450385429670577e-09, + "loss": 0.3954099416732788, + "step": 8459 + }, + { + "epoch": 1.9503170028818444, + "grad_norm": 1.6486452582394135, + "learning_rate": 3.418821128566174e-09, + "loss": 0.48606687784194946, + "step": 8460 + }, + { + "epoch": 1.9505475504322767, + "grad_norm": 1.5621828863712628, + "learning_rate": 3.3874016203397916e-09, + "loss": 0.4746624827384949, + "step": 8461 + }, + { + "epoch": 1.950778097982709, + "grad_norm": 1.7626425316421102, + "learning_rate": 3.356126909556445e-09, + "loss": 0.473477840423584, + "step": 8462 + }, + { + "epoch": 1.9510086455331412, + "grad_norm": 1.718222636076735, + "learning_rate": 3.3249970007599435e-09, + "loss": 0.47697845101356506, + "step": 8463 + }, + { + "epoch": 1.9512391930835735, + "grad_norm": 1.6677094487521866, + "learning_rate": 3.294011898473115e-09, + "loss": 0.4818275570869446, + "step": 8464 + }, + { + "epoch": 1.9514697406340058, + "grad_norm": 1.7732869220620682, + "learning_rate": 3.263171607197912e-09, + "loss": 0.4384726285934448, + "step": 8465 + }, + { + "epoch": 1.951700288184438, + "grad_norm": 1.6730968086224116, + "learning_rate": 3.232476131415085e-09, + "loss": 0.5690720081329346, + "step": 8466 + }, + { + "epoch": 1.9519308357348704, + "grad_norm": 1.5356559279376012, + "learning_rate": 3.2019254755841774e-09, + "loss": 0.4018305540084839, + "step": 8467 + }, + { + "epoch": 1.9521613832853026, + "grad_norm": 1.3752378081542544, + "learning_rate": 3.171519644144083e-09, + "loss": 0.4394480586051941, + "step": 8468 + }, + { + "epoch": 1.952391930835735, + "grad_norm": 1.784880778617597, + "learning_rate": 3.1412586415123787e-09, + "loss": 0.4497807025909424, + "step": 8469 + }, + { + "epoch": 1.952622478386167, + "grad_norm": 1.77748620875406, + "learning_rate": 3.1111424720856595e-09, + "loss": 0.4412338137626648, + "step": 8470 + }, + { + "epoch": 1.9528530259365993, + "grad_norm": 1.6059566008206951, + "learning_rate": 3.081171140239536e-09, + "loss": 0.44863519072532654, + "step": 8471 + }, + { + "epoch": 1.9530835734870315, + "grad_norm": 1.6456475980995775, + "learning_rate": 3.0513446503285245e-09, + "loss": 0.5177716016769409, + "step": 8472 + }, + { + "epoch": 1.9533141210374638, + "grad_norm": 1.6665867061870976, + "learning_rate": 3.021663006686048e-09, + "loss": 0.5143194198608398, + "step": 8473 + }, + { + "epoch": 1.953544668587896, + "grad_norm": 1.9552097979581144, + "learning_rate": 2.9921262136246574e-09, + "loss": 0.5202943086624146, + "step": 8474 + }, + { + "epoch": 1.9537752161383284, + "grad_norm": 1.5949249311044142, + "learning_rate": 2.9627342754355853e-09, + "loss": 0.3946771025657654, + "step": 8475 + }, + { + "epoch": 1.9540057636887607, + "grad_norm": 1.3562551955817035, + "learning_rate": 2.933487196389195e-09, + "loss": 0.3503817021846771, + "step": 8476 + }, + { + "epoch": 1.954236311239193, + "grad_norm": 1.7470917367547707, + "learning_rate": 2.9043849807349753e-09, + "loss": 0.41341015696525574, + "step": 8477 + }, + { + "epoch": 1.9544668587896252, + "grad_norm": 1.704213335494292, + "learning_rate": 2.8754276327009886e-09, + "loss": 0.4515586495399475, + "step": 8478 + }, + { + "epoch": 1.9546974063400575, + "grad_norm": 2.058172843593399, + "learning_rate": 2.8466151564944253e-09, + "loss": 0.49089083075523376, + "step": 8479 + }, + { + "epoch": 1.9549279538904898, + "grad_norm": 1.4448060876815116, + "learning_rate": 2.817947556301492e-09, + "loss": 0.4171237349510193, + "step": 8480 + }, + { + "epoch": 1.955158501440922, + "grad_norm": 1.6605698756015046, + "learning_rate": 2.789424836287413e-09, + "loss": 0.4951333999633789, + "step": 8481 + }, + { + "epoch": 1.9553890489913544, + "grad_norm": 1.567040182627561, + "learning_rate": 2.761047000595984e-09, + "loss": 0.37939924001693726, + "step": 8482 + }, + { + "epoch": 1.9556195965417866, + "grad_norm": 1.964544419289842, + "learning_rate": 2.732814053350463e-09, + "loss": 0.433444619178772, + "step": 8483 + }, + { + "epoch": 1.955850144092219, + "grad_norm": 1.7955734026982275, + "learning_rate": 2.7047259986526795e-09, + "loss": 0.5602415800094604, + "step": 8484 + }, + { + "epoch": 1.9560806916426512, + "grad_norm": 1.6268724855988212, + "learning_rate": 2.67678284058348e-09, + "loss": 0.46474429965019226, + "step": 8485 + }, + { + "epoch": 1.9563112391930835, + "grad_norm": 1.6444413974801657, + "learning_rate": 2.648984583202951e-09, + "loss": 0.4493546783924103, + "step": 8486 + }, + { + "epoch": 1.9565417867435158, + "grad_norm": 1.6869718104061497, + "learning_rate": 2.6213312305495283e-09, + "loss": 0.4560815691947937, + "step": 8487 + }, + { + "epoch": 1.956772334293948, + "grad_norm": 1.6786523975409646, + "learning_rate": 2.593822786641331e-09, + "loss": 0.4888133406639099, + "step": 8488 + }, + { + "epoch": 1.9570028818443803, + "grad_norm": 1.6374287407121897, + "learning_rate": 2.5664592554747176e-09, + "loss": 0.4761776328086853, + "step": 8489 + }, + { + "epoch": 1.9572334293948126, + "grad_norm": 1.5386878575559964, + "learning_rate": 2.5392406410256196e-09, + "loss": 0.48560792207717896, + "step": 8490 + }, + { + "epoch": 1.9574639769452449, + "grad_norm": 1.770586958344679, + "learning_rate": 2.5121669472484287e-09, + "loss": 0.5601654052734375, + "step": 8491 + }, + { + "epoch": 1.9576945244956772, + "grad_norm": 1.4713351138364472, + "learning_rate": 2.485238178076665e-09, + "loss": 0.3691544532775879, + "step": 8492 + }, + { + "epoch": 1.9579250720461094, + "grad_norm": 1.4167042718550695, + "learning_rate": 2.458454337422866e-09, + "loss": 0.39126554131507874, + "step": 8493 + }, + { + "epoch": 1.9581556195965417, + "grad_norm": 1.8048695509109922, + "learning_rate": 2.431815429178474e-09, + "loss": 0.5396989583969116, + "step": 8494 + }, + { + "epoch": 1.958386167146974, + "grad_norm": 1.3843806172146762, + "learning_rate": 2.405321457213727e-09, + "loss": 0.4269161820411682, + "step": 8495 + }, + { + "epoch": 1.9586167146974063, + "grad_norm": 1.6940755991478917, + "learning_rate": 2.3789724253781006e-09, + "loss": 0.5214752554893494, + "step": 8496 + }, + { + "epoch": 1.9588472622478386, + "grad_norm": 1.713092802772806, + "learning_rate": 2.352768337499755e-09, + "loss": 0.4246710538864136, + "step": 8497 + }, + { + "epoch": 1.9590778097982708, + "grad_norm": 1.5890787020160493, + "learning_rate": 2.3267091973857568e-09, + "loss": 0.4201776683330536, + "step": 8498 + }, + { + "epoch": 1.9593083573487031, + "grad_norm": 1.8593976573086313, + "learning_rate": 2.3007950088222984e-09, + "loss": 0.4610064625740051, + "step": 8499 + }, + { + "epoch": 1.9595389048991354, + "grad_norm": 1.6805721571635974, + "learning_rate": 2.2750257755745907e-09, + "loss": 0.5150582194328308, + "step": 8500 + }, + { + "epoch": 1.9597694524495677, + "grad_norm": 1.7727019985255488, + "learning_rate": 2.2494015013864165e-09, + "loss": 0.5353474617004395, + "step": 8501 + }, + { + "epoch": 1.96, + "grad_norm": 1.9131020027009458, + "learning_rate": 2.223922189980798e-09, + "loss": 0.42907923460006714, + "step": 8502 + }, + { + "epoch": 1.9602305475504322, + "grad_norm": 1.650820515603492, + "learning_rate": 2.198587845059774e-09, + "loss": 0.49550437927246094, + "step": 8503 + }, + { + "epoch": 1.9604610951008645, + "grad_norm": 1.4377584361594662, + "learning_rate": 2.1733984703038445e-09, + "loss": 0.5000699758529663, + "step": 8504 + }, + { + "epoch": 1.9606916426512968, + "grad_norm": 1.9114962774622124, + "learning_rate": 2.1483540693729707e-09, + "loss": 0.4995121359825134, + "step": 8505 + }, + { + "epoch": 1.960922190201729, + "grad_norm": 1.4677591065160696, + "learning_rate": 2.123454645905909e-09, + "loss": 0.4930616617202759, + "step": 8506 + }, + { + "epoch": 1.9611527377521614, + "grad_norm": 1.6432570722843007, + "learning_rate": 2.0987002035200984e-09, + "loss": 0.4664524793624878, + "step": 8507 + }, + { + "epoch": 1.9613832853025936, + "grad_norm": 1.4132919055054123, + "learning_rate": 2.0740907458122183e-09, + "loss": 0.41985607147216797, + "step": 8508 + }, + { + "epoch": 1.961613832853026, + "grad_norm": 1.904445466996887, + "learning_rate": 2.049626276357741e-09, + "loss": 0.5060914158821106, + "step": 8509 + }, + { + "epoch": 1.9618443804034582, + "grad_norm": 1.6899990006384302, + "learning_rate": 2.0253067987110464e-09, + "loss": 0.45631885528564453, + "step": 8510 + }, + { + "epoch": 1.9620749279538905, + "grad_norm": 1.905928349702902, + "learning_rate": 2.0011323164055293e-09, + "loss": 0.4670146107673645, + "step": 8511 + }, + { + "epoch": 1.9623054755043228, + "grad_norm": 1.6556002585759428, + "learning_rate": 1.977102832953603e-09, + "loss": 0.4538724422454834, + "step": 8512 + }, + { + "epoch": 1.962536023054755, + "grad_norm": 2.4006972020163673, + "learning_rate": 1.9532183518463642e-09, + "loss": 0.44456255435943604, + "step": 8513 + }, + { + "epoch": 1.9627665706051873, + "grad_norm": 2.296949192556993, + "learning_rate": 1.929478876554036e-09, + "loss": 0.4598735570907593, + "step": 8514 + }, + { + "epoch": 1.9629971181556196, + "grad_norm": 1.7193445432012833, + "learning_rate": 1.9058844105256378e-09, + "loss": 0.5378584861755371, + "step": 8515 + }, + { + "epoch": 1.963227665706052, + "grad_norm": 1.8034999535100136, + "learning_rate": 1.8824349571893162e-09, + "loss": 0.4685138463973999, + "step": 8516 + }, + { + "epoch": 1.9634582132564842, + "grad_norm": 1.7193406810459768, + "learning_rate": 1.8591305199520124e-09, + "loss": 0.46510517597198486, + "step": 8517 + }, + { + "epoch": 1.9636887608069165, + "grad_norm": 1.7164761741465555, + "learning_rate": 1.835971102199574e-09, + "loss": 0.4738510251045227, + "step": 8518 + }, + { + "epoch": 1.9639193083573487, + "grad_norm": 1.4222932644352415, + "learning_rate": 1.8129567072968643e-09, + "loss": 0.4861803650856018, + "step": 8519 + }, + { + "epoch": 1.964149855907781, + "grad_norm": 1.7465089780006517, + "learning_rate": 1.7900873385875425e-09, + "loss": 0.36860841512680054, + "step": 8520 + }, + { + "epoch": 1.9643804034582133, + "grad_norm": 1.7111061301798263, + "learning_rate": 1.7673629993943951e-09, + "loss": 0.4288443326950073, + "step": 8521 + }, + { + "epoch": 1.9646109510086456, + "grad_norm": 1.5913629461582546, + "learning_rate": 1.744783693019003e-09, + "loss": 0.4562298655509949, + "step": 8522 + }, + { + "epoch": 1.9648414985590779, + "grad_norm": 1.6561698600767365, + "learning_rate": 1.7223494227419644e-09, + "loss": 0.5843374729156494, + "step": 8523 + }, + { + "epoch": 1.9650720461095101, + "grad_norm": 1.8587886095096156, + "learning_rate": 1.700060191822561e-09, + "loss": 0.5312603712081909, + "step": 8524 + }, + { + "epoch": 1.9653025936599424, + "grad_norm": 1.6611948128135299, + "learning_rate": 1.6779160034994245e-09, + "loss": 0.43121063709259033, + "step": 8525 + }, + { + "epoch": 1.9655331412103747, + "grad_norm": 1.5328134921803458, + "learning_rate": 1.6559168609896479e-09, + "loss": 0.423523485660553, + "step": 8526 + }, + { + "epoch": 1.965763688760807, + "grad_norm": 1.5325484608464404, + "learning_rate": 1.6340627674895635e-09, + "loss": 0.5113149881362915, + "step": 8527 + }, + { + "epoch": 1.9659942363112393, + "grad_norm": 1.7674615595341292, + "learning_rate": 1.6123537261745202e-09, + "loss": 0.4596378207206726, + "step": 8528 + }, + { + "epoch": 1.9662247838616715, + "grad_norm": 1.9649669225067132, + "learning_rate": 1.5907897401983283e-09, + "loss": 0.48513877391815186, + "step": 8529 + }, + { + "epoch": 1.9664553314121038, + "grad_norm": 1.5045937749141793, + "learning_rate": 1.5693708126941486e-09, + "loss": 0.4482196867465973, + "step": 8530 + }, + { + "epoch": 1.966685878962536, + "grad_norm": 1.9112638119340308, + "learning_rate": 1.5480969467739358e-09, + "loss": 0.47810351848602295, + "step": 8531 + }, + { + "epoch": 1.9669164265129684, + "grad_norm": 1.505981902090098, + "learning_rate": 1.526968145528551e-09, + "loss": 0.4758110046386719, + "step": 8532 + }, + { + "epoch": 1.9671469740634007, + "grad_norm": 1.4899371928312963, + "learning_rate": 1.505984412027872e-09, + "loss": 0.3773839473724365, + "step": 8533 + }, + { + "epoch": 1.967377521613833, + "grad_norm": 1.6548436534645594, + "learning_rate": 1.48514574932046e-09, + "loss": 0.44250258803367615, + "step": 8534 + }, + { + "epoch": 1.9676080691642652, + "grad_norm": 1.8192588687794267, + "learning_rate": 1.4644521604340042e-09, + "loss": 0.46573948860168457, + "step": 8535 + }, + { + "epoch": 1.9678386167146975, + "grad_norm": 1.6851509837317844, + "learning_rate": 1.4439036483751e-09, + "loss": 0.4824560284614563, + "step": 8536 + }, + { + "epoch": 1.9680691642651298, + "grad_norm": 1.5256238286750707, + "learning_rate": 1.4235002161292475e-09, + "loss": 0.458002507686615, + "step": 8537 + }, + { + "epoch": 1.968299711815562, + "grad_norm": 1.4386333998841516, + "learning_rate": 1.4032418666608537e-09, + "loss": 0.4253692626953125, + "step": 8538 + }, + { + "epoch": 1.9685302593659944, + "grad_norm": 1.7093317856476664, + "learning_rate": 1.3831286029131195e-09, + "loss": 0.5733859539031982, + "step": 8539 + }, + { + "epoch": 1.9687608069164266, + "grad_norm": 1.7122915194726511, + "learning_rate": 1.3631604278084852e-09, + "loss": 0.43337786197662354, + "step": 8540 + }, + { + "epoch": 1.968991354466859, + "grad_norm": 1.7084219388276647, + "learning_rate": 1.3433373442479635e-09, + "loss": 0.5053178071975708, + "step": 8541 + }, + { + "epoch": 1.9692219020172912, + "grad_norm": 1.7830872237681115, + "learning_rate": 1.3236593551118057e-09, + "loss": 0.4684600234031677, + "step": 8542 + }, + { + "epoch": 1.9694524495677235, + "grad_norm": 1.4832731604691933, + "learning_rate": 1.3041264632588367e-09, + "loss": 0.5142512917518616, + "step": 8543 + }, + { + "epoch": 1.9696829971181558, + "grad_norm": 1.5260640752119692, + "learning_rate": 1.284738671527119e-09, + "loss": 0.4978599548339844, + "step": 8544 + }, + { + "epoch": 1.969913544668588, + "grad_norm": 1.874462156323344, + "learning_rate": 1.2654959827334e-09, + "loss": 0.4477071464061737, + "step": 8545 + }, + { + "epoch": 1.9701440922190203, + "grad_norm": 1.7535577322058908, + "learning_rate": 1.2463983996735539e-09, + "loss": 0.5351927280426025, + "step": 8546 + }, + { + "epoch": 1.9703746397694526, + "grad_norm": 1.8647918037435045, + "learning_rate": 1.2274459251220282e-09, + "loss": 0.47270357608795166, + "step": 8547 + }, + { + "epoch": 1.9706051873198849, + "grad_norm": 1.7262652137863324, + "learning_rate": 1.208638561832731e-09, + "loss": 0.4262394905090332, + "step": 8548 + }, + { + "epoch": 1.9708357348703172, + "grad_norm": 1.4191712678909525, + "learning_rate": 1.1899763125380324e-09, + "loss": 0.37146294116973877, + "step": 8549 + }, + { + "epoch": 1.9710662824207494, + "grad_norm": 1.6675066868349637, + "learning_rate": 1.171459179949319e-09, + "loss": 0.44513702392578125, + "step": 8550 + }, + { + "epoch": 1.9712968299711817, + "grad_norm": 1.4372028970167274, + "learning_rate": 1.1530871667569942e-09, + "loss": 0.37944507598876953, + "step": 8551 + }, + { + "epoch": 1.9715273775216138, + "grad_norm": 1.4771963152936205, + "learning_rate": 1.134860275630256e-09, + "loss": 0.47857093811035156, + "step": 8552 + }, + { + "epoch": 1.971757925072046, + "grad_norm": 1.571680901692873, + "learning_rate": 1.1167785092174308e-09, + "loss": 0.4138413965702057, + "step": 8553 + }, + { + "epoch": 1.9719884726224783, + "grad_norm": 1.6122847153434285, + "learning_rate": 1.0988418701454173e-09, + "loss": 0.4699985682964325, + "step": 8554 + }, + { + "epoch": 1.9722190201729106, + "grad_norm": 1.5958383172737982, + "learning_rate": 1.0810503610203526e-09, + "loss": 0.4969210624694824, + "step": 8555 + }, + { + "epoch": 1.972449567723343, + "grad_norm": 1.6916926087464847, + "learning_rate": 1.063403984427169e-09, + "loss": 0.43024778366088867, + "step": 8556 + }, + { + "epoch": 1.9726801152737752, + "grad_norm": 1.4587952703382436, + "learning_rate": 1.0459027429295942e-09, + "loss": 0.46639060974121094, + "step": 8557 + }, + { + "epoch": 1.9729106628242075, + "grad_norm": 1.6008064147099506, + "learning_rate": 1.0285466390704823e-09, + "loss": 0.5065155625343323, + "step": 8558 + }, + { + "epoch": 1.9731412103746397, + "grad_norm": 1.7928190139614848, + "learning_rate": 1.011335675371372e-09, + "loss": 0.5083951354026794, + "step": 8559 + }, + { + "epoch": 1.973371757925072, + "grad_norm": 1.8178029070087696, + "learning_rate": 9.942698543330408e-10, + "loss": 0.42413848638534546, + "step": 8560 + }, + { + "epoch": 1.9736023054755043, + "grad_norm": 1.7623362797842286, + "learning_rate": 9.773491784347276e-10, + "loss": 0.49263373017311096, + "step": 8561 + }, + { + "epoch": 1.9738328530259366, + "grad_norm": 1.5088601311568544, + "learning_rate": 9.605736501350214e-10, + "loss": 0.5056403279304504, + "step": 8562 + }, + { + "epoch": 1.9740634005763689, + "grad_norm": 1.3757762309730726, + "learning_rate": 9.439432718711949e-10, + "loss": 0.38567453622817993, + "step": 8563 + }, + { + "epoch": 1.9742939481268011, + "grad_norm": 1.607385096449106, + "learning_rate": 9.274580460593151e-10, + "loss": 0.4299001395702362, + "step": 8564 + }, + { + "epoch": 1.9745244956772334, + "grad_norm": 1.8304487641958882, + "learning_rate": 9.111179750946884e-10, + "loss": 0.40686798095703125, + "step": 8565 + }, + { + "epoch": 1.9747550432276657, + "grad_norm": 1.544468124931417, + "learning_rate": 8.949230613514159e-10, + "loss": 0.46604692935943604, + "step": 8566 + }, + { + "epoch": 1.974985590778098, + "grad_norm": 1.4920348808355277, + "learning_rate": 8.788733071821708e-10, + "loss": 0.4761780798435211, + "step": 8567 + }, + { + "epoch": 1.9752161383285303, + "grad_norm": 1.7050549298025448, + "learning_rate": 8.629687149190878e-10, + "loss": 0.46284541487693787, + "step": 8568 + }, + { + "epoch": 1.9754466858789625, + "grad_norm": 1.4180271535096691, + "learning_rate": 8.472092868728741e-10, + "loss": 0.40577346086502075, + "step": 8569 + }, + { + "epoch": 1.9756772334293948, + "grad_norm": 1.4754290558138181, + "learning_rate": 8.315950253330317e-10, + "loss": 0.41758865118026733, + "step": 8570 + }, + { + "epoch": 1.975907780979827, + "grad_norm": 1.570725289162605, + "learning_rate": 8.161259325684123e-10, + "loss": 0.4157485067844391, + "step": 8571 + }, + { + "epoch": 1.9761383285302594, + "grad_norm": 1.665740660596312, + "learning_rate": 8.008020108263292e-10, + "loss": 0.5008315443992615, + "step": 8572 + }, + { + "epoch": 1.9763688760806917, + "grad_norm": 1.6456396704959368, + "learning_rate": 7.856232623332238e-10, + "loss": 0.4801330864429474, + "step": 8573 + }, + { + "epoch": 1.976599423631124, + "grad_norm": 1.776295117264788, + "learning_rate": 7.70589689294554e-10, + "loss": 0.4999025762081146, + "step": 8574 + }, + { + "epoch": 1.9768299711815562, + "grad_norm": 9.074929873540922, + "learning_rate": 7.557012938943508e-10, + "loss": 0.484047532081604, + "step": 8575 + }, + { + "epoch": 1.9770605187319885, + "grad_norm": 1.6103903391555485, + "learning_rate": 7.409580782957725e-10, + "loss": 0.4282698631286621, + "step": 8576 + }, + { + "epoch": 1.9772910662824208, + "grad_norm": 1.750902728026367, + "learning_rate": 7.263600446409946e-10, + "loss": 0.4923356771469116, + "step": 8577 + }, + { + "epoch": 1.977521613832853, + "grad_norm": 1.6083242786765162, + "learning_rate": 7.119071950507649e-10, + "loss": 0.4218749701976776, + "step": 8578 + }, + { + "epoch": 1.9777521613832854, + "grad_norm": 1.8060957887763938, + "learning_rate": 6.975995316250705e-10, + "loss": 0.4842839241027832, + "step": 8579 + }, + { + "epoch": 1.9779827089337174, + "grad_norm": 1.6027374422619212, + "learning_rate": 6.834370564426928e-10, + "loss": 0.4656603932380676, + "step": 8580 + }, + { + "epoch": 1.9782132564841497, + "grad_norm": 2.1305794902642012, + "learning_rate": 6.694197715612082e-10, + "loss": 0.4844403862953186, + "step": 8581 + }, + { + "epoch": 1.978443804034582, + "grad_norm": 1.5897213596359092, + "learning_rate": 6.555476790170988e-10, + "loss": 0.4485793709754944, + "step": 8582 + }, + { + "epoch": 1.9786743515850143, + "grad_norm": 1.6781887879321788, + "learning_rate": 6.418207808259746e-10, + "loss": 0.43551105260849, + "step": 8583 + }, + { + "epoch": 1.9789048991354465, + "grad_norm": 1.6124895924649505, + "learning_rate": 6.2823907898224e-10, + "loss": 0.5033091902732849, + "step": 8584 + }, + { + "epoch": 1.9791354466858788, + "grad_norm": 2.074419599659983, + "learning_rate": 6.148025754590946e-10, + "loss": 0.5115556120872498, + "step": 8585 + }, + { + "epoch": 1.979365994236311, + "grad_norm": 1.4135481748357088, + "learning_rate": 6.015112722087545e-10, + "loss": 0.43797826766967773, + "step": 8586 + }, + { + "epoch": 1.9795965417867434, + "grad_norm": 1.846574308775448, + "learning_rate": 5.883651711622306e-10, + "loss": 0.5435835123062134, + "step": 8587 + }, + { + "epoch": 1.9798270893371757, + "grad_norm": 1.5580114920033599, + "learning_rate": 5.753642742296616e-10, + "loss": 0.4738013744354248, + "step": 8588 + }, + { + "epoch": 1.980057636887608, + "grad_norm": 1.6585753953233522, + "learning_rate": 5.62508583299759e-10, + "loss": 0.4558696150779724, + "step": 8589 + }, + { + "epoch": 1.9802881844380402, + "grad_norm": 1.7432531783107619, + "learning_rate": 5.497981002404728e-10, + "loss": 0.42724609375, + "step": 8590 + }, + { + "epoch": 1.9805187319884725, + "grad_norm": 1.9941803693564724, + "learning_rate": 5.372328268984372e-10, + "loss": 0.5050727128982544, + "step": 8591 + }, + { + "epoch": 1.9807492795389048, + "grad_norm": 1.513644928299055, + "learning_rate": 5.248127650991919e-10, + "loss": 0.47262313961982727, + "step": 8592 + }, + { + "epoch": 1.980979827089337, + "grad_norm": 2.012555372158305, + "learning_rate": 5.125379166474042e-10, + "loss": 0.48336970806121826, + "step": 8593 + }, + { + "epoch": 1.9812103746397693, + "grad_norm": 2.154806409478182, + "learning_rate": 5.004082833264256e-10, + "loss": 0.4705643653869629, + "step": 8594 + }, + { + "epoch": 1.9814409221902016, + "grad_norm": 1.875214683556783, + "learning_rate": 4.88423866898513e-10, + "loss": 0.4459192156791687, + "step": 8595 + }, + { + "epoch": 1.981671469740634, + "grad_norm": 1.4453078097407226, + "learning_rate": 4.765846691048292e-10, + "loss": 0.3831733763217926, + "step": 8596 + }, + { + "epoch": 1.9819020172910662, + "grad_norm": 1.920632928170187, + "learning_rate": 4.648906916655537e-10, + "loss": 0.5818041563034058, + "step": 8597 + }, + { + "epoch": 1.9821325648414985, + "grad_norm": 1.9427780014267613, + "learning_rate": 4.533419362797719e-10, + "loss": 0.4342089891433716, + "step": 8598 + }, + { + "epoch": 1.9823631123919307, + "grad_norm": 1.422370508458207, + "learning_rate": 4.419384046253638e-10, + "loss": 0.37377679347991943, + "step": 8599 + }, + { + "epoch": 1.982593659942363, + "grad_norm": 1.432187503069185, + "learning_rate": 4.306800983590042e-10, + "loss": 0.45839375257492065, + "step": 8600 + }, + { + "epoch": 1.9828242074927953, + "grad_norm": 1.895331297890617, + "learning_rate": 4.1956701911660677e-10, + "loss": 0.3925288915634155, + "step": 8601 + }, + { + "epoch": 1.9830547550432276, + "grad_norm": 1.6305983213480166, + "learning_rate": 4.0859916851265775e-10, + "loss": 0.5161505937576294, + "step": 8602 + }, + { + "epoch": 1.9832853025936599, + "grad_norm": 2.3010002033435515, + "learning_rate": 3.977765481406603e-10, + "loss": 0.42376708984375, + "step": 8603 + }, + { + "epoch": 1.9835158501440922, + "grad_norm": 1.4014683599849886, + "learning_rate": 3.8709915957313434e-10, + "loss": 0.3881904184818268, + "step": 8604 + }, + { + "epoch": 1.9837463976945244, + "grad_norm": 2.1224035959751872, + "learning_rate": 3.7656700436139446e-10, + "loss": 0.4746381640434265, + "step": 8605 + }, + { + "epoch": 1.9839769452449567, + "grad_norm": 1.647611076747471, + "learning_rate": 3.66180084035439e-10, + "loss": 0.449998140335083, + "step": 8606 + }, + { + "epoch": 1.984207492795389, + "grad_norm": 1.5362594317031057, + "learning_rate": 3.559384001046162e-10, + "loss": 0.43570417165756226, + "step": 8607 + }, + { + "epoch": 1.9844380403458213, + "grad_norm": 1.49086791729506, + "learning_rate": 3.458419540568469e-10, + "loss": 0.3809811472892761, + "step": 8608 + }, + { + "epoch": 1.9846685878962536, + "grad_norm": 1.608522281632451, + "learning_rate": 3.358907473590689e-10, + "loss": 0.49793338775634766, + "step": 8609 + }, + { + "epoch": 1.9848991354466858, + "grad_norm": 1.5981316079336074, + "learning_rate": 3.2608478145701446e-10, + "loss": 0.4516673684120178, + "step": 8610 + }, + { + "epoch": 1.9851296829971181, + "grad_norm": 1.6477885143930069, + "learning_rate": 3.1642405777554395e-10, + "loss": 0.44456255435943604, + "step": 8611 + }, + { + "epoch": 1.9853602305475504, + "grad_norm": 1.5147278214718478, + "learning_rate": 3.069085777180902e-10, + "loss": 0.48226436972618103, + "step": 8612 + }, + { + "epoch": 1.9855907780979827, + "grad_norm": 1.4407295799998574, + "learning_rate": 2.97538342667214e-10, + "loss": 0.36119428277015686, + "step": 8613 + }, + { + "epoch": 1.985821325648415, + "grad_norm": 1.80452348449383, + "learning_rate": 2.883133539842708e-10, + "loss": 0.4766218066215515, + "step": 8614 + }, + { + "epoch": 1.9860518731988472, + "grad_norm": 1.559658000515671, + "learning_rate": 2.792336130096329e-10, + "loss": 0.43342578411102295, + "step": 8615 + }, + { + "epoch": 1.9862824207492795, + "grad_norm": 1.9767603351027456, + "learning_rate": 2.702991210624672e-10, + "loss": 0.4490775763988495, + "step": 8616 + }, + { + "epoch": 1.9865129682997118, + "grad_norm": 1.7490605051188948, + "learning_rate": 2.615098794409576e-10, + "loss": 0.44993698596954346, + "step": 8617 + }, + { + "epoch": 1.986743515850144, + "grad_norm": 1.5702137688944042, + "learning_rate": 2.528658894218605e-10, + "loss": 0.42283037304878235, + "step": 8618 + }, + { + "epoch": 1.9869740634005764, + "grad_norm": 2.0052863426807157, + "learning_rate": 2.4436715226128223e-10, + "loss": 0.4462372660636902, + "step": 8619 + }, + { + "epoch": 1.9872046109510086, + "grad_norm": 1.3592854899659668, + "learning_rate": 2.3601366919379085e-10, + "loss": 0.4206271767616272, + "step": 8620 + }, + { + "epoch": 1.987435158501441, + "grad_norm": 1.4951319541257433, + "learning_rate": 2.2780544143330415e-10, + "loss": 0.36352843046188354, + "step": 8621 + }, + { + "epoch": 1.9876657060518732, + "grad_norm": 1.5047600149450613, + "learning_rate": 2.197424701722017e-10, + "loss": 0.5465779304504395, + "step": 8622 + }, + { + "epoch": 1.9878962536023055, + "grad_norm": 1.875200476718417, + "learning_rate": 2.1182475658199084e-10, + "loss": 0.4362960457801819, + "step": 8623 + }, + { + "epoch": 1.9881268011527378, + "grad_norm": 1.9250864038056952, + "learning_rate": 2.0405230181308464e-10, + "loss": 0.44332411885261536, + "step": 8624 + }, + { + "epoch": 1.98835734870317, + "grad_norm": 1.6816673707390386, + "learning_rate": 1.9642510699469094e-10, + "loss": 0.42330360412597656, + "step": 8625 + }, + { + "epoch": 1.9885878962536023, + "grad_norm": 1.6481663953899248, + "learning_rate": 1.8894317323492336e-10, + "loss": 0.40553006529808044, + "step": 8626 + }, + { + "epoch": 1.9888184438040346, + "grad_norm": 1.5770933619871585, + "learning_rate": 1.816065016209123e-10, + "loss": 0.4907650947570801, + "step": 8627 + }, + { + "epoch": 1.9890489913544669, + "grad_norm": 1.5812895865355108, + "learning_rate": 1.7441509321869384e-10, + "loss": 0.44923338294029236, + "step": 8628 + }, + { + "epoch": 1.9892795389048992, + "grad_norm": 2.0552616974565145, + "learning_rate": 1.673689490728769e-10, + "loss": 0.56303870677948, + "step": 8629 + }, + { + "epoch": 1.9895100864553315, + "grad_norm": 1.5996704371111576, + "learning_rate": 1.6046807020730912e-10, + "loss": 0.4329832196235657, + "step": 8630 + }, + { + "epoch": 1.9897406340057637, + "grad_norm": 1.686914212065089, + "learning_rate": 1.5371245762463292e-10, + "loss": 0.4422938823699951, + "step": 8631 + }, + { + "epoch": 1.989971181556196, + "grad_norm": 1.6071279103767742, + "learning_rate": 1.4710211230628543e-10, + "loss": 0.4526577889919281, + "step": 8632 + }, + { + "epoch": 1.9902017291066283, + "grad_norm": 1.7288964076018811, + "learning_rate": 1.4063703521272063e-10, + "loss": 0.4272541403770447, + "step": 8633 + }, + { + "epoch": 1.9904322766570606, + "grad_norm": 1.461321801390116, + "learning_rate": 1.343172272834092e-10, + "loss": 0.5006154775619507, + "step": 8634 + }, + { + "epoch": 1.9906628242074929, + "grad_norm": 1.8963149256285567, + "learning_rate": 1.281426894362836e-10, + "loss": 0.4116207957267761, + "step": 8635 + }, + { + "epoch": 1.9908933717579251, + "grad_norm": 1.5740070518221372, + "learning_rate": 1.22113422568626e-10, + "loss": 0.4534362554550171, + "step": 8636 + }, + { + "epoch": 1.9911239193083574, + "grad_norm": 1.6160514514770858, + "learning_rate": 1.1622942755629139e-10, + "loss": 0.3912625312805176, + "step": 8637 + }, + { + "epoch": 1.9913544668587897, + "grad_norm": 1.3729454306493092, + "learning_rate": 1.1049070525426252e-10, + "loss": 0.4240685999393463, + "step": 8638 + }, + { + "epoch": 1.991585014409222, + "grad_norm": 1.6710128219591365, + "learning_rate": 1.0489725649631687e-10, + "loss": 0.4178678095340729, + "step": 8639 + }, + { + "epoch": 1.9918155619596543, + "grad_norm": 1.7123543216360648, + "learning_rate": 9.944908209513769e-11, + "loss": 0.48780155181884766, + "step": 8640 + }, + { + "epoch": 1.9920461095100865, + "grad_norm": 1.658023119876229, + "learning_rate": 9.414618284220299e-11, + "loss": 0.4556965231895447, + "step": 8641 + }, + { + "epoch": 1.9922766570605188, + "grad_norm": 1.7782616665984705, + "learning_rate": 8.898855950800754e-11, + "loss": 0.5117144584655762, + "step": 8642 + }, + { + "epoch": 1.992507204610951, + "grad_norm": 1.8221716524523575, + "learning_rate": 8.397621284195189e-11, + "loss": 0.46460968255996704, + "step": 8643 + }, + { + "epoch": 1.9927377521613834, + "grad_norm": 1.68635723508226, + "learning_rate": 7.910914357223131e-11, + "loss": 0.45853862166404724, + "step": 8644 + }, + { + "epoch": 1.9929682997118157, + "grad_norm": 1.5020447755054196, + "learning_rate": 7.438735240594684e-11, + "loss": 0.41449788212776184, + "step": 8645 + }, + { + "epoch": 1.993198847262248, + "grad_norm": 1.896332617219016, + "learning_rate": 6.981084002910532e-11, + "loss": 0.48925185203552246, + "step": 8646 + }, + { + "epoch": 1.9934293948126802, + "grad_norm": 1.5105640269915757, + "learning_rate": 6.53796071067303e-11, + "loss": 0.43202799558639526, + "step": 8647 + }, + { + "epoch": 1.9936599423631125, + "grad_norm": 1.5063306441530113, + "learning_rate": 6.109365428264013e-11, + "loss": 0.43396270275115967, + "step": 8648 + }, + { + "epoch": 1.9938904899135448, + "grad_norm": 1.5334796324468363, + "learning_rate": 5.695298217944788e-11, + "loss": 0.44604524970054626, + "step": 8649 + }, + { + "epoch": 1.994121037463977, + "grad_norm": 1.5502891688232796, + "learning_rate": 5.295759139878342e-11, + "loss": 0.411149799823761, + "step": 8650 + }, + { + "epoch": 1.9943515850144093, + "grad_norm": 1.590838117330855, + "learning_rate": 4.910748252107133e-11, + "loss": 0.38791534304618835, + "step": 8651 + }, + { + "epoch": 1.9945821325648416, + "grad_norm": 1.642413682053721, + "learning_rate": 4.5402656105864024e-11, + "loss": 0.4820106625556946, + "step": 8652 + }, + { + "epoch": 1.994812680115274, + "grad_norm": 1.543914106365223, + "learning_rate": 4.18431126912866e-11, + "loss": 0.408313512802124, + "step": 8653 + }, + { + "epoch": 1.9950432276657062, + "grad_norm": 1.5678544580393214, + "learning_rate": 3.8428852794480935e-11, + "loss": 0.4617573022842407, + "step": 8654 + }, + { + "epoch": 1.9952737752161385, + "grad_norm": 1.636428470799379, + "learning_rate": 3.5159876911716737e-11, + "loss": 0.508655309677124, + "step": 8655 + }, + { + "epoch": 1.9955043227665707, + "grad_norm": 1.4677587127880338, + "learning_rate": 3.203618551761433e-11, + "loss": 0.45601966977119446, + "step": 8656 + }, + { + "epoch": 1.995734870317003, + "grad_norm": 1.8390476738420678, + "learning_rate": 2.9057779066365972e-11, + "loss": 0.4519381523132324, + "step": 8657 + }, + { + "epoch": 1.9959654178674353, + "grad_norm": 1.5346345441319986, + "learning_rate": 2.622465799040352e-11, + "loss": 0.48502281308174133, + "step": 8658 + }, + { + "epoch": 1.9961959654178676, + "grad_norm": 1.9727012907550732, + "learning_rate": 2.3536822701508695e-11, + "loss": 0.5099426507949829, + "step": 8659 + }, + { + "epoch": 1.9964265129682999, + "grad_norm": 1.825075486461616, + "learning_rate": 2.099427359025796e-11, + "loss": 0.493893563747406, + "step": 8660 + }, + { + "epoch": 1.9966570605187322, + "grad_norm": 1.7578437209431468, + "learning_rate": 1.859701102591149e-11, + "loss": 0.43132543563842773, + "step": 8661 + }, + { + "epoch": 1.9968876080691642, + "grad_norm": 1.5322532834446179, + "learning_rate": 1.634503535674625e-11, + "loss": 0.3871678113937378, + "step": 8662 + }, + { + "epoch": 1.9971181556195965, + "grad_norm": 1.5983076161224297, + "learning_rate": 1.4238346910167009e-11, + "loss": 0.35364365577697754, + "step": 8663 + }, + { + "epoch": 1.9973487031700288, + "grad_norm": 1.6351729263694506, + "learning_rate": 1.2276945991929189e-11, + "loss": 0.3983590006828308, + "step": 8664 + }, + { + "epoch": 1.997579250720461, + "grad_norm": 1.9232763939009716, + "learning_rate": 1.0460832887360105e-11, + "loss": 0.5645977258682251, + "step": 8665 + }, + { + "epoch": 1.9978097982708933, + "grad_norm": 1.598471688595569, + "learning_rate": 8.790007860026705e-12, + "loss": 0.49037492275238037, + "step": 8666 + }, + { + "epoch": 1.9980403458213256, + "grad_norm": 1.4231796101274559, + "learning_rate": 7.264471152845786e-12, + "loss": 0.43441101908683777, + "step": 8667 + }, + { + "epoch": 1.998270893371758, + "grad_norm": 1.485143249087042, + "learning_rate": 5.884222987417864e-12, + "loss": 0.5325566530227661, + "step": 8668 + }, + { + "epoch": 1.9985014409221902, + "grad_norm": 1.766101207356354, + "learning_rate": 4.649263564249217e-12, + "loss": 0.5419152975082397, + "step": 8669 + }, + { + "epoch": 1.9987319884726225, + "grad_norm": 1.4716315906020312, + "learning_rate": 3.559593062862909e-12, + "loss": 0.43845266103744507, + "step": 8670 + }, + { + "epoch": 1.9989625360230547, + "grad_norm": 1.806573459584031, + "learning_rate": 2.6152116414657198e-12, + "loss": 0.36655327677726746, + "step": 8671 + }, + { + "epoch": 1.999193083573487, + "grad_norm": 1.558732737347915, + "learning_rate": 1.8161194373922383e-12, + "loss": 0.4533482789993286, + "step": 8672 + }, + { + "epoch": 1.9994236311239193, + "grad_norm": 1.8969538976627203, + "learning_rate": 1.1623165665497481e-12, + "loss": 0.5300408005714417, + "step": 8673 + }, + { + "epoch": 1.9996541786743516, + "grad_norm": 1.755489261574437, + "learning_rate": 6.538031240843622e-13, + "loss": 0.48557695746421814, + "step": 8674 + }, + { + "epoch": 1.9998847262247839, + "grad_norm": 1.6246828645550628, + "learning_rate": 2.9057918382591196e-13, + "loss": 0.42293277382850647, + "step": 8675 + }, + { + "epoch": 2.0, + "grad_norm": 2.2404218937215554, + "learning_rate": 7.264479862101324e-14, + "loss": 0.2570451498031616, + "step": 8676 + } + ], + "logging_steps": 1, + "max_steps": 8676, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 3025729836269568.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +}