{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 500, "global_step": 8680, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0002304147465437788, "grad_norm": 0.3584135221139379, "learning_rate": 0.0, "loss": 1.1575632095336914, "step": 1 }, { "epoch": 0.0004608294930875576, "grad_norm": 0.3035367055626511, "learning_rate": 4.6082949308755755e-09, "loss": 0.9973502159118652, "step": 2 }, { "epoch": 0.0006912442396313364, "grad_norm": 0.39685233086299543, "learning_rate": 9.216589861751151e-09, "loss": 1.0778999328613281, "step": 3 }, { "epoch": 0.0009216589861751152, "grad_norm": 0.4029042979509503, "learning_rate": 1.3824884792626728e-08, "loss": 1.1912263631820679, "step": 4 }, { "epoch": 0.001152073732718894, "grad_norm": 0.3943812894307851, "learning_rate": 1.8433179723502302e-08, "loss": 1.136031150817871, "step": 5 }, { "epoch": 0.0013824884792626728, "grad_norm": 0.472718552613566, "learning_rate": 2.304147465437788e-08, "loss": 1.1647956371307373, "step": 6 }, { "epoch": 0.0016129032258064516, "grad_norm": 0.4378363913681294, "learning_rate": 2.7649769585253456e-08, "loss": 1.144924283027649, "step": 7 }, { "epoch": 0.0018433179723502304, "grad_norm": 0.412264706125121, "learning_rate": 3.225806451612903e-08, "loss": 1.1821019649505615, "step": 8 }, { "epoch": 0.0020737327188940094, "grad_norm": 0.35864626774735575, "learning_rate": 3.6866359447004604e-08, "loss": 1.0586045980453491, "step": 9 }, { "epoch": 0.002304147465437788, "grad_norm": 0.497058147699291, "learning_rate": 4.1474654377880186e-08, "loss": 1.2029818296432495, "step": 10 }, { "epoch": 0.002534562211981567, "grad_norm": 0.465265464928516, "learning_rate": 4.608294930875576e-08, "loss": 1.1411634683609009, "step": 11 }, { "epoch": 0.0027649769585253456, "grad_norm": 0.4356529753705429, "learning_rate": 5.069124423963134e-08, "loss": 1.2719087600708008, "step": 12 }, { "epoch": 0.0029953917050691246, "grad_norm": 0.4469831586732583, "learning_rate": 5.529953917050691e-08, "loss": 1.1132495403289795, "step": 13 }, { "epoch": 0.0032258064516129032, "grad_norm": 0.3918942421249174, "learning_rate": 5.990783410138249e-08, "loss": 1.1900808811187744, "step": 14 }, { "epoch": 0.0034562211981566822, "grad_norm": 0.33446734054876004, "learning_rate": 6.451612903225806e-08, "loss": 1.2273608446121216, "step": 15 }, { "epoch": 0.003686635944700461, "grad_norm": 0.4610551419026991, "learning_rate": 6.912442396313364e-08, "loss": 1.2130601406097412, "step": 16 }, { "epoch": 0.00391705069124424, "grad_norm": 0.4765520188128542, "learning_rate": 7.373271889400921e-08, "loss": 1.0534124374389648, "step": 17 }, { "epoch": 0.004147465437788019, "grad_norm": 0.4247458361448018, "learning_rate": 7.834101382488478e-08, "loss": 1.1796221733093262, "step": 18 }, { "epoch": 0.004377880184331797, "grad_norm": 0.42651087679972033, "learning_rate": 8.294930875576037e-08, "loss": 1.118175745010376, "step": 19 }, { "epoch": 0.004608294930875576, "grad_norm": 0.37538111415149067, "learning_rate": 8.755760368663594e-08, "loss": 1.140963077545166, "step": 20 }, { "epoch": 0.004838709677419355, "grad_norm": 0.39373769301837386, "learning_rate": 9.216589861751152e-08, "loss": 1.107339859008789, "step": 21 }, { "epoch": 0.005069124423963134, "grad_norm": 0.5053900590341595, "learning_rate": 9.677419354838709e-08, "loss": 1.171803593635559, "step": 22 }, { "epoch": 0.005299539170506912, "grad_norm": 0.32897537004851696, "learning_rate": 1.0138248847926267e-07, "loss": 0.9935251474380493, "step": 23 }, { "epoch": 0.005529953917050691, "grad_norm": 0.4355535799950001, "learning_rate": 1.0599078341013824e-07, "loss": 1.0870952606201172, "step": 24 }, { "epoch": 0.00576036866359447, "grad_norm": 0.5215895570336967, "learning_rate": 1.1059907834101383e-07, "loss": 1.1520278453826904, "step": 25 }, { "epoch": 0.005990783410138249, "grad_norm": 0.4878994851998504, "learning_rate": 1.152073732718894e-07, "loss": 1.3603750467300415, "step": 26 }, { "epoch": 0.006221198156682027, "grad_norm": 0.3985371704289713, "learning_rate": 1.1981566820276498e-07, "loss": 1.230550765991211, "step": 27 }, { "epoch": 0.0064516129032258064, "grad_norm": 0.4105556408349015, "learning_rate": 1.2442396313364054e-07, "loss": 1.267604112625122, "step": 28 }, { "epoch": 0.0066820276497695855, "grad_norm": 0.3604672745500653, "learning_rate": 1.2903225806451611e-07, "loss": 1.344348669052124, "step": 29 }, { "epoch": 0.0069124423963133645, "grad_norm": 0.42234881975895605, "learning_rate": 1.336405529953917e-07, "loss": 1.2794291973114014, "step": 30 }, { "epoch": 0.007142857142857143, "grad_norm": 0.39749887698930225, "learning_rate": 1.3824884792626728e-07, "loss": 1.2841103076934814, "step": 31 }, { "epoch": 0.007373271889400922, "grad_norm": 0.34204310388035036, "learning_rate": 1.4285714285714285e-07, "loss": 1.1505224704742432, "step": 32 }, { "epoch": 0.007603686635944701, "grad_norm": 0.36676388907062357, "learning_rate": 1.4746543778801842e-07, "loss": 0.9800833463668823, "step": 33 }, { "epoch": 0.00783410138248848, "grad_norm": 0.4216809539302965, "learning_rate": 1.52073732718894e-07, "loss": 1.3712589740753174, "step": 34 }, { "epoch": 0.008064516129032258, "grad_norm": 0.46644559931224167, "learning_rate": 1.5668202764976955e-07, "loss": 1.2274689674377441, "step": 35 }, { "epoch": 0.008294930875576038, "grad_norm": 0.41359150478695417, "learning_rate": 1.6129032258064515e-07, "loss": 1.0673755407333374, "step": 36 }, { "epoch": 0.008525345622119816, "grad_norm": 0.534062363030203, "learning_rate": 1.6589861751152074e-07, "loss": 1.242164134979248, "step": 37 }, { "epoch": 0.008755760368663594, "grad_norm": 0.48756247774131056, "learning_rate": 1.705069124423963e-07, "loss": 1.190554141998291, "step": 38 }, { "epoch": 0.008986175115207374, "grad_norm": 0.35848282094721656, "learning_rate": 1.7511520737327188e-07, "loss": 1.3119773864746094, "step": 39 }, { "epoch": 0.009216589861751152, "grad_norm": 0.4466769921356875, "learning_rate": 1.7972350230414745e-07, "loss": 1.2532517910003662, "step": 40 }, { "epoch": 0.00944700460829493, "grad_norm": 0.4271763580587928, "learning_rate": 1.8433179723502305e-07, "loss": 1.307154655456543, "step": 41 }, { "epoch": 0.00967741935483871, "grad_norm": 0.432221455567464, "learning_rate": 1.889400921658986e-07, "loss": 1.1899281740188599, "step": 42 }, { "epoch": 0.009907834101382488, "grad_norm": 0.48501644393966153, "learning_rate": 1.9354838709677418e-07, "loss": 1.1928249597549438, "step": 43 }, { "epoch": 0.010138248847926268, "grad_norm": 0.35170632131851265, "learning_rate": 1.9815668202764975e-07, "loss": 1.1663157939910889, "step": 44 }, { "epoch": 0.010368663594470046, "grad_norm": 0.43449129429745276, "learning_rate": 2.0276497695852535e-07, "loss": 1.1806118488311768, "step": 45 }, { "epoch": 0.010599078341013824, "grad_norm": 0.39933118678172597, "learning_rate": 2.073732718894009e-07, "loss": 1.1704952716827393, "step": 46 }, { "epoch": 0.010829493087557604, "grad_norm": 0.46071358975984034, "learning_rate": 2.1198156682027649e-07, "loss": 1.2124149799346924, "step": 47 }, { "epoch": 0.011059907834101382, "grad_norm": 0.325920139351066, "learning_rate": 2.1658986175115208e-07, "loss": 1.041813850402832, "step": 48 }, { "epoch": 0.01129032258064516, "grad_norm": 0.4189805583015969, "learning_rate": 2.2119815668202765e-07, "loss": 1.255402684211731, "step": 49 }, { "epoch": 0.01152073732718894, "grad_norm": 0.369986826532368, "learning_rate": 2.2580645161290322e-07, "loss": 1.1115221977233887, "step": 50 }, { "epoch": 0.011751152073732719, "grad_norm": 0.501835295036206, "learning_rate": 2.304147465437788e-07, "loss": 1.4048426151275635, "step": 51 }, { "epoch": 0.011981566820276499, "grad_norm": 0.38759638044019523, "learning_rate": 2.3502304147465438e-07, "loss": 1.1690936088562012, "step": 52 }, { "epoch": 0.012211981566820277, "grad_norm": 0.43771993971927803, "learning_rate": 2.3963133640552995e-07, "loss": 1.164888620376587, "step": 53 }, { "epoch": 0.012442396313364055, "grad_norm": 0.5047093250847474, "learning_rate": 2.442396313364055e-07, "loss": 1.004424810409546, "step": 54 }, { "epoch": 0.012672811059907835, "grad_norm": 0.371768250028493, "learning_rate": 2.488479262672811e-07, "loss": 0.8810856342315674, "step": 55 }, { "epoch": 0.012903225806451613, "grad_norm": 0.41437582347111235, "learning_rate": 2.534562211981567e-07, "loss": 1.300262451171875, "step": 56 }, { "epoch": 0.013133640552995391, "grad_norm": 0.44923919860912964, "learning_rate": 2.5806451612903223e-07, "loss": 1.3624285459518433, "step": 57 }, { "epoch": 0.013364055299539171, "grad_norm": 0.37916325568511644, "learning_rate": 2.6267281105990777e-07, "loss": 1.2133375406265259, "step": 58 }, { "epoch": 0.013594470046082949, "grad_norm": 0.3665676434937369, "learning_rate": 2.672811059907834e-07, "loss": 1.2203283309936523, "step": 59 }, { "epoch": 0.013824884792626729, "grad_norm": 0.4314731168039537, "learning_rate": 2.7188940092165896e-07, "loss": 1.291412353515625, "step": 60 }, { "epoch": 0.014055299539170507, "grad_norm": 0.46787898249820037, "learning_rate": 2.7649769585253456e-07, "loss": 1.1596577167510986, "step": 61 }, { "epoch": 0.014285714285714285, "grad_norm": 0.34850075759056304, "learning_rate": 2.8110599078341015e-07, "loss": 0.9789823889732361, "step": 62 }, { "epoch": 0.014516129032258065, "grad_norm": 0.46810420323672, "learning_rate": 2.857142857142857e-07, "loss": 1.220383882522583, "step": 63 }, { "epoch": 0.014746543778801843, "grad_norm": 0.36577992953429955, "learning_rate": 2.903225806451613e-07, "loss": 1.0961871147155762, "step": 64 }, { "epoch": 0.014976958525345621, "grad_norm": 0.4155727286496237, "learning_rate": 2.9493087557603683e-07, "loss": 1.2281936407089233, "step": 65 }, { "epoch": 0.015207373271889401, "grad_norm": 0.48770399467414544, "learning_rate": 2.9953917050691243e-07, "loss": 1.279728889465332, "step": 66 }, { "epoch": 0.01543778801843318, "grad_norm": 0.3697109399388579, "learning_rate": 3.04147465437788e-07, "loss": 1.0932798385620117, "step": 67 }, { "epoch": 0.01566820276497696, "grad_norm": 0.4768828309013543, "learning_rate": 3.0875576036866356e-07, "loss": 1.1612955331802368, "step": 68 }, { "epoch": 0.015898617511520736, "grad_norm": 0.335260500319883, "learning_rate": 3.133640552995391e-07, "loss": 1.193152666091919, "step": 69 }, { "epoch": 0.016129032258064516, "grad_norm": 0.3754577001974335, "learning_rate": 3.1797235023041476e-07, "loss": 1.3303695917129517, "step": 70 }, { "epoch": 0.016359447004608296, "grad_norm": 0.5384978005623245, "learning_rate": 3.225806451612903e-07, "loss": 1.3735731840133667, "step": 71 }, { "epoch": 0.016589861751152075, "grad_norm": 0.44147085813841874, "learning_rate": 3.271889400921659e-07, "loss": 1.162925124168396, "step": 72 }, { "epoch": 0.016820276497695852, "grad_norm": 0.46260262466297236, "learning_rate": 3.317972350230415e-07, "loss": 1.3879203796386719, "step": 73 }, { "epoch": 0.017050691244239632, "grad_norm": 0.33864035083037825, "learning_rate": 3.3640552995391703e-07, "loss": 1.2721638679504395, "step": 74 }, { "epoch": 0.01728110599078341, "grad_norm": 0.5797449954735189, "learning_rate": 3.410138248847926e-07, "loss": 1.3997783660888672, "step": 75 }, { "epoch": 0.017511520737327188, "grad_norm": 0.3824734589731608, "learning_rate": 3.4562211981566817e-07, "loss": 1.1099059581756592, "step": 76 }, { "epoch": 0.017741935483870968, "grad_norm": 0.6286343528066216, "learning_rate": 3.5023041474654376e-07, "loss": 1.341759204864502, "step": 77 }, { "epoch": 0.017972350230414748, "grad_norm": 0.41058458963409694, "learning_rate": 3.5483870967741936e-07, "loss": 1.343479871749878, "step": 78 }, { "epoch": 0.018202764976958524, "grad_norm": 0.41653629518149576, "learning_rate": 3.594470046082949e-07, "loss": 1.2225772142410278, "step": 79 }, { "epoch": 0.018433179723502304, "grad_norm": 0.37871730557010347, "learning_rate": 3.6405529953917044e-07, "loss": 1.1934573650360107, "step": 80 }, { "epoch": 0.018663594470046084, "grad_norm": 0.36930989407616927, "learning_rate": 3.686635944700461e-07, "loss": 1.099440336227417, "step": 81 }, { "epoch": 0.01889400921658986, "grad_norm": 0.4445938548359885, "learning_rate": 3.7327188940092163e-07, "loss": 1.0864269733428955, "step": 82 }, { "epoch": 0.01912442396313364, "grad_norm": 0.4183127094774659, "learning_rate": 3.778801843317972e-07, "loss": 1.0706703662872314, "step": 83 }, { "epoch": 0.01935483870967742, "grad_norm": 0.3377183372891763, "learning_rate": 3.824884792626728e-07, "loss": 1.1675662994384766, "step": 84 }, { "epoch": 0.019585253456221197, "grad_norm": 0.4219766455348787, "learning_rate": 3.8709677419354837e-07, "loss": 1.3294553756713867, "step": 85 }, { "epoch": 0.019815668202764977, "grad_norm": 0.39357768126078463, "learning_rate": 3.9170506912442396e-07, "loss": 1.050878882408142, "step": 86 }, { "epoch": 0.020046082949308756, "grad_norm": 0.5263429396452582, "learning_rate": 3.963133640552995e-07, "loss": 1.3243739604949951, "step": 87 }, { "epoch": 0.020276497695852536, "grad_norm": 0.4373425676890139, "learning_rate": 4.009216589861751e-07, "loss": 1.1350429058074951, "step": 88 }, { "epoch": 0.020506912442396313, "grad_norm": 0.39555461421299365, "learning_rate": 4.055299539170507e-07, "loss": 1.24526047706604, "step": 89 }, { "epoch": 0.020737327188940093, "grad_norm": 0.5372699223271491, "learning_rate": 4.1013824884792624e-07, "loss": 1.3459908962249756, "step": 90 }, { "epoch": 0.020967741935483872, "grad_norm": 0.45711998906450413, "learning_rate": 4.147465437788018e-07, "loss": 1.2129223346710205, "step": 91 }, { "epoch": 0.02119815668202765, "grad_norm": 0.396171288478396, "learning_rate": 4.1935483870967743e-07, "loss": 1.0522969961166382, "step": 92 }, { "epoch": 0.02142857142857143, "grad_norm": 0.4102245507283394, "learning_rate": 4.2396313364055297e-07, "loss": 1.3128937482833862, "step": 93 }, { "epoch": 0.02165898617511521, "grad_norm": 0.4498995421630644, "learning_rate": 4.285714285714285e-07, "loss": 1.3582855463027954, "step": 94 }, { "epoch": 0.021889400921658985, "grad_norm": 0.401280081593378, "learning_rate": 4.3317972350230416e-07, "loss": 1.3959028720855713, "step": 95 }, { "epoch": 0.022119815668202765, "grad_norm": 0.34811166324547105, "learning_rate": 4.377880184331797e-07, "loss": 1.149501085281372, "step": 96 }, { "epoch": 0.022350230414746545, "grad_norm": 0.48133121679013907, "learning_rate": 4.423963133640553e-07, "loss": 1.024135708808899, "step": 97 }, { "epoch": 0.02258064516129032, "grad_norm": 0.42298775317954185, "learning_rate": 4.4700460829493084e-07, "loss": 0.9255483150482178, "step": 98 }, { "epoch": 0.0228110599078341, "grad_norm": 0.4429779269301727, "learning_rate": 4.5161290322580644e-07, "loss": 1.1694722175598145, "step": 99 }, { "epoch": 0.02304147465437788, "grad_norm": 0.5257102588195529, "learning_rate": 4.5622119815668203e-07, "loss": 1.1588457822799683, "step": 100 }, { "epoch": 0.023271889400921657, "grad_norm": 0.37478821324150746, "learning_rate": 4.608294930875576e-07, "loss": 1.172672986984253, "step": 101 }, { "epoch": 0.023502304147465437, "grad_norm": 0.5416446977134604, "learning_rate": 4.654377880184331e-07, "loss": 1.092405915260315, "step": 102 }, { "epoch": 0.023732718894009217, "grad_norm": 0.40304171727239163, "learning_rate": 4.7004608294930877e-07, "loss": 1.11540687084198, "step": 103 }, { "epoch": 0.023963133640552997, "grad_norm": 0.46185115643683655, "learning_rate": 4.746543778801843e-07, "loss": 1.1380189657211304, "step": 104 }, { "epoch": 0.024193548387096774, "grad_norm": 0.4705857339336588, "learning_rate": 4.792626728110599e-07, "loss": 1.1031086444854736, "step": 105 }, { "epoch": 0.024423963133640553, "grad_norm": 0.38094574356569405, "learning_rate": 4.838709677419355e-07, "loss": 1.1988024711608887, "step": 106 }, { "epoch": 0.024654377880184333, "grad_norm": 0.48794686062473364, "learning_rate": 4.88479262672811e-07, "loss": 1.0814614295959473, "step": 107 }, { "epoch": 0.02488479262672811, "grad_norm": 0.41304010922593737, "learning_rate": 4.930875576036866e-07, "loss": 1.0541695356369019, "step": 108 }, { "epoch": 0.02511520737327189, "grad_norm": 0.4262047073398665, "learning_rate": 4.976958525345622e-07, "loss": 1.2281692028045654, "step": 109 }, { "epoch": 0.02534562211981567, "grad_norm": 0.4617413170072456, "learning_rate": 5.023041474654378e-07, "loss": 1.2542369365692139, "step": 110 }, { "epoch": 0.025576036866359446, "grad_norm": 0.46571699511286535, "learning_rate": 5.069124423963134e-07, "loss": 1.36039137840271, "step": 111 }, { "epoch": 0.025806451612903226, "grad_norm": 0.3893860976585314, "learning_rate": 5.11520737327189e-07, "loss": 1.1092976331710815, "step": 112 }, { "epoch": 0.026036866359447006, "grad_norm": 0.4636216593448083, "learning_rate": 5.161290322580645e-07, "loss": 1.0634076595306396, "step": 113 }, { "epoch": 0.026267281105990782, "grad_norm": 0.3440530135190564, "learning_rate": 5.2073732718894e-07, "loss": 1.0024809837341309, "step": 114 }, { "epoch": 0.026497695852534562, "grad_norm": 0.4346835070660911, "learning_rate": 5.253456221198155e-07, "loss": 1.1691724061965942, "step": 115 }, { "epoch": 0.026728110599078342, "grad_norm": 0.46992230717269323, "learning_rate": 5.299539170506912e-07, "loss": 1.2053219079971313, "step": 116 }, { "epoch": 0.02695852534562212, "grad_norm": 0.3668719861525143, "learning_rate": 5.345622119815668e-07, "loss": 1.119420051574707, "step": 117 }, { "epoch": 0.027188940092165898, "grad_norm": 0.44063509410116297, "learning_rate": 5.391705069124423e-07, "loss": 1.1640167236328125, "step": 118 }, { "epoch": 0.027419354838709678, "grad_norm": 0.41158620514350025, "learning_rate": 5.437788018433179e-07, "loss": 1.180116057395935, "step": 119 }, { "epoch": 0.027649769585253458, "grad_norm": 0.4684655855415561, "learning_rate": 5.483870967741935e-07, "loss": 1.0726159811019897, "step": 120 }, { "epoch": 0.027880184331797234, "grad_norm": 0.44443528947779826, "learning_rate": 5.529953917050691e-07, "loss": 1.03219473361969, "step": 121 }, { "epoch": 0.028110599078341014, "grad_norm": 0.4615930748718386, "learning_rate": 5.576036866359447e-07, "loss": 1.1545735597610474, "step": 122 }, { "epoch": 0.028341013824884794, "grad_norm": 0.4154044637047318, "learning_rate": 5.622119815668203e-07, "loss": 1.2409746646881104, "step": 123 }, { "epoch": 0.02857142857142857, "grad_norm": 0.48642203067509454, "learning_rate": 5.668202764976958e-07, "loss": 1.2717409133911133, "step": 124 }, { "epoch": 0.02880184331797235, "grad_norm": 0.5633308049530943, "learning_rate": 5.714285714285714e-07, "loss": 1.523846983909607, "step": 125 }, { "epoch": 0.02903225806451613, "grad_norm": 0.47068700261388136, "learning_rate": 5.760368663594469e-07, "loss": 1.3386890888214111, "step": 126 }, { "epoch": 0.029262672811059907, "grad_norm": 0.5199142981609907, "learning_rate": 5.806451612903226e-07, "loss": 1.3080404996871948, "step": 127 }, { "epoch": 0.029493087557603687, "grad_norm": 0.530224330517059, "learning_rate": 5.852534562211982e-07, "loss": 1.3194537162780762, "step": 128 }, { "epoch": 0.029723502304147466, "grad_norm": 0.49119251759787413, "learning_rate": 5.898617511520737e-07, "loss": 1.0546228885650635, "step": 129 }, { "epoch": 0.029953917050691243, "grad_norm": 0.44238233872112126, "learning_rate": 5.944700460829493e-07, "loss": 1.3160395622253418, "step": 130 }, { "epoch": 0.030184331797235023, "grad_norm": 0.5551864793339897, "learning_rate": 5.990783410138249e-07, "loss": 1.3497555255889893, "step": 131 }, { "epoch": 0.030414746543778803, "grad_norm": 0.41383181378393813, "learning_rate": 6.036866359447004e-07, "loss": 1.0863350629806519, "step": 132 }, { "epoch": 0.03064516129032258, "grad_norm": 0.4913368059485873, "learning_rate": 6.08294930875576e-07, "loss": 1.1640913486480713, "step": 133 }, { "epoch": 0.03087557603686636, "grad_norm": 0.4309615007654084, "learning_rate": 6.129032258064516e-07, "loss": 1.398510217666626, "step": 134 }, { "epoch": 0.03110599078341014, "grad_norm": 0.46249423735581563, "learning_rate": 6.175115207373271e-07, "loss": 1.3015594482421875, "step": 135 }, { "epoch": 0.03133640552995392, "grad_norm": 0.5511951371835903, "learning_rate": 6.221198156682027e-07, "loss": 1.2786016464233398, "step": 136 }, { "epoch": 0.031566820276497695, "grad_norm": 0.35056112177409643, "learning_rate": 6.267281105990782e-07, "loss": 1.0863161087036133, "step": 137 }, { "epoch": 0.03179723502304147, "grad_norm": 0.49469780540978775, "learning_rate": 6.313364055299539e-07, "loss": 1.1590030193328857, "step": 138 }, { "epoch": 0.032027649769585255, "grad_norm": 0.4498097850802204, "learning_rate": 6.359447004608295e-07, "loss": 1.2473185062408447, "step": 139 }, { "epoch": 0.03225806451612903, "grad_norm": 0.46996183926649465, "learning_rate": 6.40552995391705e-07, "loss": 1.1982496976852417, "step": 140 }, { "epoch": 0.03248847926267281, "grad_norm": 0.39627654459475076, "learning_rate": 6.451612903225806e-07, "loss": 1.078690528869629, "step": 141 }, { "epoch": 0.03271889400921659, "grad_norm": 0.4831308537053794, "learning_rate": 6.497695852534562e-07, "loss": 1.1540311574935913, "step": 142 }, { "epoch": 0.03294930875576037, "grad_norm": 0.4510531995801552, "learning_rate": 6.543778801843318e-07, "loss": 1.319035530090332, "step": 143 }, { "epoch": 0.03317972350230415, "grad_norm": 0.46683155201608206, "learning_rate": 6.589861751152074e-07, "loss": 1.199448585510254, "step": 144 }, { "epoch": 0.03341013824884793, "grad_norm": 0.526397133846452, "learning_rate": 6.63594470046083e-07, "loss": 1.212646484375, "step": 145 }, { "epoch": 0.033640552995391704, "grad_norm": 0.6339080221663279, "learning_rate": 6.682027649769585e-07, "loss": 1.2833064794540405, "step": 146 }, { "epoch": 0.03387096774193549, "grad_norm": 0.6111094782416204, "learning_rate": 6.728110599078341e-07, "loss": 1.2852118015289307, "step": 147 }, { "epoch": 0.034101382488479264, "grad_norm": 0.36790627555446376, "learning_rate": 6.774193548387096e-07, "loss": 1.0287699699401855, "step": 148 }, { "epoch": 0.03433179723502304, "grad_norm": 0.4705970251054534, "learning_rate": 6.820276497695853e-07, "loss": 1.2580914497375488, "step": 149 }, { "epoch": 0.03456221198156682, "grad_norm": 0.4446865658925291, "learning_rate": 6.866359447004608e-07, "loss": 1.0557801723480225, "step": 150 }, { "epoch": 0.0347926267281106, "grad_norm": 0.4962737867323335, "learning_rate": 6.912442396313363e-07, "loss": 1.1820557117462158, "step": 151 }, { "epoch": 0.035023041474654376, "grad_norm": 0.4496579463689646, "learning_rate": 6.958525345622119e-07, "loss": 1.2777981758117676, "step": 152 }, { "epoch": 0.03525345622119816, "grad_norm": 0.4664315599937052, "learning_rate": 7.004608294930875e-07, "loss": 1.1465356349945068, "step": 153 }, { "epoch": 0.035483870967741936, "grad_norm": 0.5245233624695497, "learning_rate": 7.05069124423963e-07, "loss": 1.3553744554519653, "step": 154 }, { "epoch": 0.03571428571428571, "grad_norm": 0.5474513239817841, "learning_rate": 7.096774193548387e-07, "loss": 1.176223874092102, "step": 155 }, { "epoch": 0.035944700460829496, "grad_norm": 0.4022708922904972, "learning_rate": 7.142857142857143e-07, "loss": 1.1771761178970337, "step": 156 }, { "epoch": 0.03617511520737327, "grad_norm": 0.5000685120319052, "learning_rate": 7.188940092165898e-07, "loss": 1.1598860025405884, "step": 157 }, { "epoch": 0.03640552995391705, "grad_norm": 0.4955460688514832, "learning_rate": 7.235023041474654e-07, "loss": 1.0689195394515991, "step": 158 }, { "epoch": 0.03663594470046083, "grad_norm": 0.5324202700222229, "learning_rate": 7.281105990783409e-07, "loss": 1.1444990634918213, "step": 159 }, { "epoch": 0.03686635944700461, "grad_norm": 0.441885052912425, "learning_rate": 7.327188940092166e-07, "loss": 1.2261321544647217, "step": 160 }, { "epoch": 0.037096774193548385, "grad_norm": 0.47946473640002796, "learning_rate": 7.373271889400922e-07, "loss": 0.9325876235961914, "step": 161 }, { "epoch": 0.03732718894009217, "grad_norm": 0.46688477365444836, "learning_rate": 7.419354838709677e-07, "loss": 1.071167230606079, "step": 162 }, { "epoch": 0.037557603686635944, "grad_norm": 0.5188018198616766, "learning_rate": 7.465437788018433e-07, "loss": 1.1856298446655273, "step": 163 }, { "epoch": 0.03778801843317972, "grad_norm": 0.5279511073474723, "learning_rate": 7.511520737327189e-07, "loss": 1.13883376121521, "step": 164 }, { "epoch": 0.038018433179723504, "grad_norm": 0.4671725091927055, "learning_rate": 7.557603686635944e-07, "loss": 1.2896685600280762, "step": 165 }, { "epoch": 0.03824884792626728, "grad_norm": 0.6286776240106037, "learning_rate": 7.603686635944701e-07, "loss": 1.3122754096984863, "step": 166 }, { "epoch": 0.03847926267281106, "grad_norm": 0.5120060171404104, "learning_rate": 7.649769585253457e-07, "loss": 1.165675163269043, "step": 167 }, { "epoch": 0.03870967741935484, "grad_norm": 0.5132036652169082, "learning_rate": 7.695852534562211e-07, "loss": 1.1348214149475098, "step": 168 }, { "epoch": 0.03894009216589862, "grad_norm": 0.5816469452243797, "learning_rate": 7.741935483870967e-07, "loss": 1.287818431854248, "step": 169 }, { "epoch": 0.03917050691244239, "grad_norm": 0.4886112893618036, "learning_rate": 7.788018433179722e-07, "loss": 1.0723031759262085, "step": 170 }, { "epoch": 0.03940092165898618, "grad_norm": 0.5572220637370465, "learning_rate": 7.834101382488479e-07, "loss": 1.29054594039917, "step": 171 }, { "epoch": 0.03963133640552995, "grad_norm": 0.4996602061858042, "learning_rate": 7.880184331797235e-07, "loss": 1.201147198677063, "step": 172 }, { "epoch": 0.03986175115207373, "grad_norm": 0.47488604971715725, "learning_rate": 7.92626728110599e-07, "loss": 1.2529574632644653, "step": 173 }, { "epoch": 0.04009216589861751, "grad_norm": 0.5420947446150967, "learning_rate": 7.972350230414746e-07, "loss": 1.3255105018615723, "step": 174 }, { "epoch": 0.04032258064516129, "grad_norm": 0.5367164884336, "learning_rate": 8.018433179723502e-07, "loss": 1.3167433738708496, "step": 175 }, { "epoch": 0.04055299539170507, "grad_norm": 0.5124027812324866, "learning_rate": 8.064516129032257e-07, "loss": 1.4780502319335938, "step": 176 }, { "epoch": 0.04078341013824885, "grad_norm": 0.49049200777499574, "learning_rate": 8.110599078341014e-07, "loss": 1.3096996545791626, "step": 177 }, { "epoch": 0.041013824884792625, "grad_norm": 0.5684690759624818, "learning_rate": 8.15668202764977e-07, "loss": 1.3124895095825195, "step": 178 }, { "epoch": 0.04124423963133641, "grad_norm": 0.5746940747619091, "learning_rate": 8.202764976958525e-07, "loss": 1.2589681148529053, "step": 179 }, { "epoch": 0.041474654377880185, "grad_norm": 0.5351550863930432, "learning_rate": 8.248847926267281e-07, "loss": 1.0576659440994263, "step": 180 }, { "epoch": 0.04170506912442396, "grad_norm": 0.5804930108989373, "learning_rate": 8.294930875576036e-07, "loss": 1.2647404670715332, "step": 181 }, { "epoch": 0.041935483870967745, "grad_norm": 0.5527713530674592, "learning_rate": 8.341013824884793e-07, "loss": 1.072542428970337, "step": 182 }, { "epoch": 0.04216589861751152, "grad_norm": 0.636913740412271, "learning_rate": 8.387096774193549e-07, "loss": 1.2417643070220947, "step": 183 }, { "epoch": 0.0423963133640553, "grad_norm": 0.4636179655744076, "learning_rate": 8.433179723502303e-07, "loss": 1.2490241527557373, "step": 184 }, { "epoch": 0.04262672811059908, "grad_norm": 0.5714553493227277, "learning_rate": 8.479262672811059e-07, "loss": 1.1169328689575195, "step": 185 }, { "epoch": 0.04285714285714286, "grad_norm": 0.5893436962226742, "learning_rate": 8.525345622119815e-07, "loss": 1.1799774169921875, "step": 186 }, { "epoch": 0.043087557603686634, "grad_norm": 0.4840759402042485, "learning_rate": 8.57142857142857e-07, "loss": 0.9655753374099731, "step": 187 }, { "epoch": 0.04331797235023042, "grad_norm": 0.5473512318665162, "learning_rate": 8.617511520737327e-07, "loss": 1.2863562107086182, "step": 188 }, { "epoch": 0.043548387096774194, "grad_norm": 0.5971573505450626, "learning_rate": 8.663594470046083e-07, "loss": 1.056877613067627, "step": 189 }, { "epoch": 0.04377880184331797, "grad_norm": 0.5903656134268881, "learning_rate": 8.709677419354838e-07, "loss": 1.2128019332885742, "step": 190 }, { "epoch": 0.044009216589861753, "grad_norm": 0.5042165136835149, "learning_rate": 8.755760368663594e-07, "loss": 1.1397441625595093, "step": 191 }, { "epoch": 0.04423963133640553, "grad_norm": 0.5007324461761941, "learning_rate": 8.801843317972349e-07, "loss": 1.062232255935669, "step": 192 }, { "epoch": 0.044470046082949306, "grad_norm": 0.5077694656116347, "learning_rate": 8.847926267281106e-07, "loss": 1.0102736949920654, "step": 193 }, { "epoch": 0.04470046082949309, "grad_norm": 0.5039275409209952, "learning_rate": 8.894009216589862e-07, "loss": 1.155517339706421, "step": 194 }, { "epoch": 0.044930875576036866, "grad_norm": 0.4568536555143312, "learning_rate": 8.940092165898617e-07, "loss": 1.042372703552246, "step": 195 }, { "epoch": 0.04516129032258064, "grad_norm": 0.6118356615587064, "learning_rate": 8.986175115207373e-07, "loss": 1.1158320903778076, "step": 196 }, { "epoch": 0.045391705069124426, "grad_norm": 0.6547758969058546, "learning_rate": 9.032258064516129e-07, "loss": 1.4693050384521484, "step": 197 }, { "epoch": 0.0456221198156682, "grad_norm": 0.5189200191294998, "learning_rate": 9.078341013824884e-07, "loss": 1.0990574359893799, "step": 198 }, { "epoch": 0.04585253456221198, "grad_norm": 0.5123720508165549, "learning_rate": 9.124423963133641e-07, "loss": 1.0259861946105957, "step": 199 }, { "epoch": 0.04608294930875576, "grad_norm": 0.4638504791285932, "learning_rate": 9.170506912442397e-07, "loss": 1.2708477973937988, "step": 200 }, { "epoch": 0.04631336405529954, "grad_norm": 0.426472351706666, "learning_rate": 9.216589861751152e-07, "loss": 1.052978754043579, "step": 201 }, { "epoch": 0.046543778801843315, "grad_norm": 0.5548008737632977, "learning_rate": 9.262672811059907e-07, "loss": 1.3405938148498535, "step": 202 }, { "epoch": 0.0467741935483871, "grad_norm": 0.4311530218247671, "learning_rate": 9.308755760368662e-07, "loss": 0.9464558362960815, "step": 203 }, { "epoch": 0.047004608294930875, "grad_norm": 0.6377195135282403, "learning_rate": 9.354838709677418e-07, "loss": 1.3019077777862549, "step": 204 }, { "epoch": 0.04723502304147465, "grad_norm": 0.6029329005096047, "learning_rate": 9.400921658986175e-07, "loss": 1.146841049194336, "step": 205 }, { "epoch": 0.047465437788018434, "grad_norm": 0.6136536598800337, "learning_rate": 9.44700460829493e-07, "loss": 1.106084942817688, "step": 206 }, { "epoch": 0.04769585253456221, "grad_norm": 0.6661299934206126, "learning_rate": 9.493087557603686e-07, "loss": 1.2930629253387451, "step": 207 }, { "epoch": 0.047926267281105994, "grad_norm": 0.5555271013101563, "learning_rate": 9.539170506912442e-07, "loss": 1.1637842655181885, "step": 208 }, { "epoch": 0.04815668202764977, "grad_norm": 0.444081897230925, "learning_rate": 9.585253456221198e-07, "loss": 1.1753308773040771, "step": 209 }, { "epoch": 0.04838709677419355, "grad_norm": 0.5362299776231612, "learning_rate": 9.631336405529954e-07, "loss": 1.2304046154022217, "step": 210 }, { "epoch": 0.04861751152073733, "grad_norm": 0.6898819231347578, "learning_rate": 9.67741935483871e-07, "loss": 1.4326789379119873, "step": 211 }, { "epoch": 0.04884792626728111, "grad_norm": 0.614044501232848, "learning_rate": 9.723502304147466e-07, "loss": 1.0759861469268799, "step": 212 }, { "epoch": 0.04907834101382488, "grad_norm": 0.5971609176488232, "learning_rate": 9.76958525345622e-07, "loss": 1.1514811515808105, "step": 213 }, { "epoch": 0.04930875576036867, "grad_norm": 0.49252816443356506, "learning_rate": 9.815668202764976e-07, "loss": 1.1618578433990479, "step": 214 }, { "epoch": 0.04953917050691244, "grad_norm": 0.5677669382006955, "learning_rate": 9.861751152073732e-07, "loss": 1.0321345329284668, "step": 215 }, { "epoch": 0.04976958525345622, "grad_norm": 0.4551655972629908, "learning_rate": 9.907834101382488e-07, "loss": 1.0391438007354736, "step": 216 }, { "epoch": 0.05, "grad_norm": 0.6188957189455181, "learning_rate": 9.953917050691244e-07, "loss": 1.080418586730957, "step": 217 }, { "epoch": 0.05023041474654378, "grad_norm": 0.6531841586974683, "learning_rate": 1e-06, "loss": 1.2095223665237427, "step": 218 }, { "epoch": 0.050460829493087556, "grad_norm": 0.5036313537560552, "learning_rate": 1.0046082949308756e-06, "loss": 1.1144485473632812, "step": 219 }, { "epoch": 0.05069124423963134, "grad_norm": 0.6466646674884302, "learning_rate": 1.0092165898617511e-06, "loss": 1.2560818195343018, "step": 220 }, { "epoch": 0.050921658986175115, "grad_norm": 0.586777516357483, "learning_rate": 1.0138248847926267e-06, "loss": 1.1043426990509033, "step": 221 }, { "epoch": 0.05115207373271889, "grad_norm": 0.41448570454396455, "learning_rate": 1.0184331797235021e-06, "loss": 1.0725831985473633, "step": 222 }, { "epoch": 0.051382488479262675, "grad_norm": 0.5713867853647446, "learning_rate": 1.023041474654378e-06, "loss": 0.9764004349708557, "step": 223 }, { "epoch": 0.05161290322580645, "grad_norm": 0.6662412690615445, "learning_rate": 1.0276497695852535e-06, "loss": 1.2172776460647583, "step": 224 }, { "epoch": 0.05184331797235023, "grad_norm": 0.610800258000843, "learning_rate": 1.032258064516129e-06, "loss": 1.1065070629119873, "step": 225 }, { "epoch": 0.05207373271889401, "grad_norm": 0.5057724484519791, "learning_rate": 1.0368663594470047e-06, "loss": 1.0840628147125244, "step": 226 }, { "epoch": 0.05230414746543779, "grad_norm": 0.5250793281243177, "learning_rate": 1.04147465437788e-06, "loss": 1.109276294708252, "step": 227 }, { "epoch": 0.052534562211981564, "grad_norm": 0.7348582040933043, "learning_rate": 1.0460829493087557e-06, "loss": 1.186352252960205, "step": 228 }, { "epoch": 0.05276497695852535, "grad_norm": 0.48569306871313883, "learning_rate": 1.050691244239631e-06, "loss": 1.1605256795883179, "step": 229 }, { "epoch": 0.052995391705069124, "grad_norm": 0.6312799860168967, "learning_rate": 1.0552995391705069e-06, "loss": 1.0269646644592285, "step": 230 }, { "epoch": 0.0532258064516129, "grad_norm": 0.6446173917231129, "learning_rate": 1.0599078341013825e-06, "loss": 0.9595874547958374, "step": 231 }, { "epoch": 0.053456221198156684, "grad_norm": 0.6010998567907583, "learning_rate": 1.0645161290322579e-06, "loss": 1.1606154441833496, "step": 232 }, { "epoch": 0.05368663594470046, "grad_norm": 0.6379425251609956, "learning_rate": 1.0691244239631337e-06, "loss": 0.9920428991317749, "step": 233 }, { "epoch": 0.05391705069124424, "grad_norm": 0.6346840342097714, "learning_rate": 1.073732718894009e-06, "loss": 1.2124650478363037, "step": 234 }, { "epoch": 0.05414746543778802, "grad_norm": 0.5761223431136224, "learning_rate": 1.0783410138248847e-06, "loss": 1.2237420082092285, "step": 235 }, { "epoch": 0.054377880184331796, "grad_norm": 0.5178799666370111, "learning_rate": 1.0829493087557605e-06, "loss": 1.1484715938568115, "step": 236 }, { "epoch": 0.05460829493087557, "grad_norm": 0.5910590598999479, "learning_rate": 1.0875576036866358e-06, "loss": 1.2143291234970093, "step": 237 }, { "epoch": 0.054838709677419356, "grad_norm": 0.568116947952991, "learning_rate": 1.0921658986175114e-06, "loss": 1.1995420455932617, "step": 238 }, { "epoch": 0.05506912442396313, "grad_norm": 0.6128333972066793, "learning_rate": 1.096774193548387e-06, "loss": 1.2577292919158936, "step": 239 }, { "epoch": 0.055299539170506916, "grad_norm": 0.6177738975799152, "learning_rate": 1.1013824884792626e-06, "loss": 1.2170629501342773, "step": 240 }, { "epoch": 0.05552995391705069, "grad_norm": 0.3580107479174479, "learning_rate": 1.1059907834101382e-06, "loss": 0.8318669199943542, "step": 241 }, { "epoch": 0.05576036866359447, "grad_norm": 0.4976235536822315, "learning_rate": 1.1105990783410138e-06, "loss": 1.0760166645050049, "step": 242 }, { "epoch": 0.05599078341013825, "grad_norm": 0.7197455436310494, "learning_rate": 1.1152073732718894e-06, "loss": 1.2437031269073486, "step": 243 }, { "epoch": 0.05622119815668203, "grad_norm": 0.5957655407019126, "learning_rate": 1.1198156682027648e-06, "loss": 1.1680852174758911, "step": 244 }, { "epoch": 0.056451612903225805, "grad_norm": 0.6708075502500678, "learning_rate": 1.1244239631336406e-06, "loss": 1.051478385925293, "step": 245 }, { "epoch": 0.05668202764976959, "grad_norm": 0.547285271256248, "learning_rate": 1.1290322580645162e-06, "loss": 1.1433100700378418, "step": 246 }, { "epoch": 0.056912442396313365, "grad_norm": 0.6428413238154085, "learning_rate": 1.1336405529953916e-06, "loss": 0.9521546363830566, "step": 247 }, { "epoch": 0.05714285714285714, "grad_norm": 0.6790518899839243, "learning_rate": 1.1382488479262674e-06, "loss": 1.226189136505127, "step": 248 }, { "epoch": 0.057373271889400924, "grad_norm": 0.7178538920010674, "learning_rate": 1.1428571428571428e-06, "loss": 1.108027696609497, "step": 249 }, { "epoch": 0.0576036866359447, "grad_norm": 0.4608432366288286, "learning_rate": 1.1474654377880184e-06, "loss": 1.042288064956665, "step": 250 }, { "epoch": 0.05783410138248848, "grad_norm": 0.8171244559521852, "learning_rate": 1.1520737327188938e-06, "loss": 1.193603754043579, "step": 251 }, { "epoch": 0.05806451612903226, "grad_norm": 0.6766522772283506, "learning_rate": 1.1566820276497696e-06, "loss": 1.193584680557251, "step": 252 }, { "epoch": 0.05829493087557604, "grad_norm": 0.5714710938556213, "learning_rate": 1.1612903225806452e-06, "loss": 1.2318934202194214, "step": 253 }, { "epoch": 0.05852534562211981, "grad_norm": 0.6443899979691422, "learning_rate": 1.1658986175115205e-06, "loss": 1.1626521348953247, "step": 254 }, { "epoch": 0.0587557603686636, "grad_norm": 0.6336855527034527, "learning_rate": 1.1705069124423963e-06, "loss": 1.2402286529541016, "step": 255 }, { "epoch": 0.05898617511520737, "grad_norm": 0.599628545600123, "learning_rate": 1.1751152073732717e-06, "loss": 1.190323829650879, "step": 256 }, { "epoch": 0.05921658986175115, "grad_norm": 0.655955321737197, "learning_rate": 1.1797235023041473e-06, "loss": 1.121636986732483, "step": 257 }, { "epoch": 0.05944700460829493, "grad_norm": 0.5349922437861245, "learning_rate": 1.1843317972350231e-06, "loss": 1.099304437637329, "step": 258 }, { "epoch": 0.05967741935483871, "grad_norm": 0.5611568770807159, "learning_rate": 1.1889400921658985e-06, "loss": 1.1730690002441406, "step": 259 }, { "epoch": 0.059907834101382486, "grad_norm": 0.5874751551203973, "learning_rate": 1.1935483870967741e-06, "loss": 1.1450574398040771, "step": 260 }, { "epoch": 0.06013824884792627, "grad_norm": 0.6634311667010621, "learning_rate": 1.1981566820276497e-06, "loss": 1.1435421705245972, "step": 261 }, { "epoch": 0.060368663594470046, "grad_norm": 0.6113712565981082, "learning_rate": 1.2027649769585253e-06, "loss": 1.2153000831604004, "step": 262 }, { "epoch": 0.06059907834101382, "grad_norm": 0.4715675476477507, "learning_rate": 1.207373271889401e-06, "loss": 1.0380406379699707, "step": 263 }, { "epoch": 0.060829493087557605, "grad_norm": 0.5396758253019809, "learning_rate": 1.2119815668202765e-06, "loss": 1.1639207601547241, "step": 264 }, { "epoch": 0.06105990783410138, "grad_norm": 0.7193765184254299, "learning_rate": 1.216589861751152e-06, "loss": 1.1862819194793701, "step": 265 }, { "epoch": 0.06129032258064516, "grad_norm": 0.5621136552568688, "learning_rate": 1.2211981566820275e-06, "loss": 1.2122020721435547, "step": 266 }, { "epoch": 0.06152073732718894, "grad_norm": 0.506518590231947, "learning_rate": 1.2258064516129033e-06, "loss": 1.1201646327972412, "step": 267 }, { "epoch": 0.06175115207373272, "grad_norm": 0.6015371724768855, "learning_rate": 1.2304147465437787e-06, "loss": 0.9520926475524902, "step": 268 }, { "epoch": 0.061981566820276494, "grad_norm": 0.6815507447701216, "learning_rate": 1.2350230414746543e-06, "loss": 1.0426976680755615, "step": 269 }, { "epoch": 0.06221198156682028, "grad_norm": 0.5129880337213574, "learning_rate": 1.23963133640553e-06, "loss": 0.934493899345398, "step": 270 }, { "epoch": 0.062442396313364054, "grad_norm": 0.5416312735509534, "learning_rate": 1.2442396313364054e-06, "loss": 1.23980712890625, "step": 271 }, { "epoch": 0.06267281105990784, "grad_norm": 0.5947336924258313, "learning_rate": 1.248847926267281e-06, "loss": 1.094742774963379, "step": 272 }, { "epoch": 0.06290322580645161, "grad_norm": 0.5496219212827214, "learning_rate": 1.2534562211981564e-06, "loss": 1.0271551609039307, "step": 273 }, { "epoch": 0.06313364055299539, "grad_norm": 0.43924704821878574, "learning_rate": 1.2580645161290322e-06, "loss": 1.159210205078125, "step": 274 }, { "epoch": 0.06336405529953917, "grad_norm": 0.6336734571964621, "learning_rate": 1.2626728110599078e-06, "loss": 1.127510666847229, "step": 275 }, { "epoch": 0.06359447004608294, "grad_norm": 0.564136508309977, "learning_rate": 1.2672811059907832e-06, "loss": 1.1371517181396484, "step": 276 }, { "epoch": 0.06382488479262673, "grad_norm": 0.5092569849346139, "learning_rate": 1.271889400921659e-06, "loss": 1.0296730995178223, "step": 277 }, { "epoch": 0.06405529953917051, "grad_norm": 0.47819096787751125, "learning_rate": 1.2764976958525344e-06, "loss": 1.036975383758545, "step": 278 }, { "epoch": 0.06428571428571428, "grad_norm": 0.5933788958917384, "learning_rate": 1.28110599078341e-06, "loss": 1.2120393514633179, "step": 279 }, { "epoch": 0.06451612903225806, "grad_norm": 0.5094532117085869, "learning_rate": 1.2857142857142858e-06, "loss": 1.0084068775177002, "step": 280 }, { "epoch": 0.06474654377880185, "grad_norm": 0.5556672645421422, "learning_rate": 1.2903225806451612e-06, "loss": 1.2005786895751953, "step": 281 }, { "epoch": 0.06497695852534562, "grad_norm": 0.5273275990471241, "learning_rate": 1.2949308755760368e-06, "loss": 1.1506783962249756, "step": 282 }, { "epoch": 0.0652073732718894, "grad_norm": 0.6565311834699108, "learning_rate": 1.2995391705069124e-06, "loss": 1.1219947338104248, "step": 283 }, { "epoch": 0.06543778801843318, "grad_norm": 0.5392805741788703, "learning_rate": 1.304147465437788e-06, "loss": 1.2041170597076416, "step": 284 }, { "epoch": 0.06566820276497695, "grad_norm": 0.4958618059812673, "learning_rate": 1.3087557603686636e-06, "loss": 1.0903037786483765, "step": 285 }, { "epoch": 0.06589861751152074, "grad_norm": 0.5739593792710319, "learning_rate": 1.3133640552995392e-06, "loss": 1.2140064239501953, "step": 286 }, { "epoch": 0.06612903225806452, "grad_norm": 0.6611408054194472, "learning_rate": 1.3179723502304148e-06, "loss": 1.3026092052459717, "step": 287 }, { "epoch": 0.0663594470046083, "grad_norm": 0.5994162091601994, "learning_rate": 1.3225806451612901e-06, "loss": 1.0937910079956055, "step": 288 }, { "epoch": 0.06658986175115207, "grad_norm": 0.5087892316212932, "learning_rate": 1.327188940092166e-06, "loss": 1.1768109798431396, "step": 289 }, { "epoch": 0.06682027649769585, "grad_norm": 0.6601843016778813, "learning_rate": 1.3317972350230413e-06, "loss": 1.0796440839767456, "step": 290 }, { "epoch": 0.06705069124423964, "grad_norm": 0.5059222364831474, "learning_rate": 1.336405529953917e-06, "loss": 0.9972932934761047, "step": 291 }, { "epoch": 0.06728110599078341, "grad_norm": 0.5571474335328804, "learning_rate": 1.3410138248847927e-06, "loss": 0.9860717058181763, "step": 292 }, { "epoch": 0.06751152073732719, "grad_norm": 0.5418320654969337, "learning_rate": 1.3456221198156681e-06, "loss": 1.045119047164917, "step": 293 }, { "epoch": 0.06774193548387097, "grad_norm": 0.5469511174229076, "learning_rate": 1.3502304147465437e-06, "loss": 1.2740920782089233, "step": 294 }, { "epoch": 0.06797235023041474, "grad_norm": 0.5280888059979016, "learning_rate": 1.354838709677419e-06, "loss": 1.0860114097595215, "step": 295 }, { "epoch": 0.06820276497695853, "grad_norm": 0.6361673375880608, "learning_rate": 1.359447004608295e-06, "loss": 1.111539602279663, "step": 296 }, { "epoch": 0.06843317972350231, "grad_norm": 0.6640553054344481, "learning_rate": 1.3640552995391705e-06, "loss": 1.1628870964050293, "step": 297 }, { "epoch": 0.06866359447004608, "grad_norm": 0.5665129055040568, "learning_rate": 1.3686635944700459e-06, "loss": 1.042768955230713, "step": 298 }, { "epoch": 0.06889400921658986, "grad_norm": 0.43340931133190164, "learning_rate": 1.3732718894009217e-06, "loss": 0.9970331192016602, "step": 299 }, { "epoch": 0.06912442396313365, "grad_norm": 0.5645710736996077, "learning_rate": 1.377880184331797e-06, "loss": 1.1270179748535156, "step": 300 }, { "epoch": 0.06935483870967742, "grad_norm": 0.5065704773498506, "learning_rate": 1.3824884792626727e-06, "loss": 0.9505646824836731, "step": 301 }, { "epoch": 0.0695852534562212, "grad_norm": 0.5178052985950043, "learning_rate": 1.3870967741935485e-06, "loss": 1.0997588634490967, "step": 302 }, { "epoch": 0.06981566820276498, "grad_norm": 0.46976885146719827, "learning_rate": 1.3917050691244239e-06, "loss": 1.1512106657028198, "step": 303 }, { "epoch": 0.07004608294930875, "grad_norm": 0.5368431131511487, "learning_rate": 1.3963133640552995e-06, "loss": 1.1340759992599487, "step": 304 }, { "epoch": 0.07027649769585254, "grad_norm": 0.6153911846871725, "learning_rate": 1.400921658986175e-06, "loss": 1.187511682510376, "step": 305 }, { "epoch": 0.07050691244239632, "grad_norm": 0.511555535336468, "learning_rate": 1.4055299539170507e-06, "loss": 1.0711122751235962, "step": 306 }, { "epoch": 0.07073732718894009, "grad_norm": 0.48287298633713555, "learning_rate": 1.410138248847926e-06, "loss": 0.9636896848678589, "step": 307 }, { "epoch": 0.07096774193548387, "grad_norm": 0.5910127759130634, "learning_rate": 1.4147465437788018e-06, "loss": 1.0506833791732788, "step": 308 }, { "epoch": 0.07119815668202766, "grad_norm": 0.46621570534633416, "learning_rate": 1.4193548387096774e-06, "loss": 1.1076349020004272, "step": 309 }, { "epoch": 0.07142857142857142, "grad_norm": 0.5023143786431462, "learning_rate": 1.4239631336405528e-06, "loss": 1.0878944396972656, "step": 310 }, { "epoch": 0.07165898617511521, "grad_norm": 0.5894127846415432, "learning_rate": 1.4285714285714286e-06, "loss": 1.0808600187301636, "step": 311 }, { "epoch": 0.07188940092165899, "grad_norm": 0.6608655757057322, "learning_rate": 1.433179723502304e-06, "loss": 1.2117588520050049, "step": 312 }, { "epoch": 0.07211981566820276, "grad_norm": 0.49227698344069387, "learning_rate": 1.4377880184331796e-06, "loss": 1.0899101495742798, "step": 313 }, { "epoch": 0.07235023041474654, "grad_norm": 0.4429228185732454, "learning_rate": 1.4423963133640554e-06, "loss": 0.9540426135063171, "step": 314 }, { "epoch": 0.07258064516129033, "grad_norm": 0.6262415135725191, "learning_rate": 1.4470046082949308e-06, "loss": 1.1170068979263306, "step": 315 }, { "epoch": 0.0728110599078341, "grad_norm": 0.5398534028349639, "learning_rate": 1.4516129032258064e-06, "loss": 1.2030160427093506, "step": 316 }, { "epoch": 0.07304147465437788, "grad_norm": 0.5750696017486863, "learning_rate": 1.4562211981566818e-06, "loss": 1.1599903106689453, "step": 317 }, { "epoch": 0.07327188940092166, "grad_norm": 0.4940370201046436, "learning_rate": 1.4608294930875576e-06, "loss": 1.0777950286865234, "step": 318 }, { "epoch": 0.07350230414746543, "grad_norm": 0.5527232601625117, "learning_rate": 1.4654377880184332e-06, "loss": 1.1250553131103516, "step": 319 }, { "epoch": 0.07373271889400922, "grad_norm": 0.4905671836592669, "learning_rate": 1.4700460829493086e-06, "loss": 1.10176420211792, "step": 320 }, { "epoch": 0.073963133640553, "grad_norm": 0.5457078879226115, "learning_rate": 1.4746543778801844e-06, "loss": 1.111799716949463, "step": 321 }, { "epoch": 0.07419354838709677, "grad_norm": 0.4195800331952007, "learning_rate": 1.4792626728110598e-06, "loss": 1.1555054187774658, "step": 322 }, { "epoch": 0.07442396313364055, "grad_norm": 0.46236670595596, "learning_rate": 1.4838709677419353e-06, "loss": 1.0977535247802734, "step": 323 }, { "epoch": 0.07465437788018434, "grad_norm": 0.5097860724223924, "learning_rate": 1.4884792626728112e-06, "loss": 0.9058012962341309, "step": 324 }, { "epoch": 0.0748847926267281, "grad_norm": 0.5077577953430894, "learning_rate": 1.4930875576036865e-06, "loss": 1.1147960424423218, "step": 325 }, { "epoch": 0.07511520737327189, "grad_norm": 0.44169448790763116, "learning_rate": 1.4976958525345621e-06, "loss": 1.1315648555755615, "step": 326 }, { "epoch": 0.07534562211981567, "grad_norm": 0.5088086763700569, "learning_rate": 1.5023041474654377e-06, "loss": 0.9134868383407593, "step": 327 }, { "epoch": 0.07557603686635944, "grad_norm": 0.44118138965972864, "learning_rate": 1.5069124423963133e-06, "loss": 1.017493724822998, "step": 328 }, { "epoch": 0.07580645161290323, "grad_norm": 0.5038134502792564, "learning_rate": 1.5115207373271887e-06, "loss": 1.220658540725708, "step": 329 }, { "epoch": 0.07603686635944701, "grad_norm": 0.49196264739665124, "learning_rate": 1.5161290322580645e-06, "loss": 1.2254307270050049, "step": 330 }, { "epoch": 0.07626728110599078, "grad_norm": 0.6442066774537618, "learning_rate": 1.5207373271889401e-06, "loss": 1.2653989791870117, "step": 331 }, { "epoch": 0.07649769585253456, "grad_norm": 0.5214989033274106, "learning_rate": 1.5253456221198155e-06, "loss": 1.199981451034546, "step": 332 }, { "epoch": 0.07672811059907834, "grad_norm": 0.5987857165424706, "learning_rate": 1.5299539170506913e-06, "loss": 1.1141018867492676, "step": 333 }, { "epoch": 0.07695852534562211, "grad_norm": 0.5942646354683767, "learning_rate": 1.5345622119815667e-06, "loss": 1.2139991521835327, "step": 334 }, { "epoch": 0.0771889400921659, "grad_norm": 0.46506211352562865, "learning_rate": 1.5391705069124423e-06, "loss": 1.0647475719451904, "step": 335 }, { "epoch": 0.07741935483870968, "grad_norm": 0.41334086285294086, "learning_rate": 1.543778801843318e-06, "loss": 0.9740357398986816, "step": 336 }, { "epoch": 0.07764976958525345, "grad_norm": 0.3730662683323707, "learning_rate": 1.5483870967741935e-06, "loss": 0.877153754234314, "step": 337 }, { "epoch": 0.07788018433179723, "grad_norm": 0.5608914234538745, "learning_rate": 1.552995391705069e-06, "loss": 1.2472789287567139, "step": 338 }, { "epoch": 0.07811059907834102, "grad_norm": 0.49369711494641005, "learning_rate": 1.5576036866359445e-06, "loss": 1.1873078346252441, "step": 339 }, { "epoch": 0.07834101382488479, "grad_norm": 0.47054639516827856, "learning_rate": 1.5622119815668203e-06, "loss": 1.0728449821472168, "step": 340 }, { "epoch": 0.07857142857142857, "grad_norm": 0.5084311239727589, "learning_rate": 1.5668202764976959e-06, "loss": 0.9974904656410217, "step": 341 }, { "epoch": 0.07880184331797235, "grad_norm": 0.5100945633220172, "learning_rate": 1.5714285714285712e-06, "loss": 1.0591039657592773, "step": 342 }, { "epoch": 0.07903225806451612, "grad_norm": 0.5927330256525677, "learning_rate": 1.576036866359447e-06, "loss": 1.04117751121521, "step": 343 }, { "epoch": 0.0792626728110599, "grad_norm": 0.40984725482311923, "learning_rate": 1.5806451612903224e-06, "loss": 0.934100866317749, "step": 344 }, { "epoch": 0.07949308755760369, "grad_norm": 0.4545690285130126, "learning_rate": 1.585253456221198e-06, "loss": 1.0333890914916992, "step": 345 }, { "epoch": 0.07972350230414746, "grad_norm": 0.4848318013907446, "learning_rate": 1.5898617511520738e-06, "loss": 1.1762741804122925, "step": 346 }, { "epoch": 0.07995391705069124, "grad_norm": 0.4082821303075448, "learning_rate": 1.5944700460829492e-06, "loss": 1.081842303276062, "step": 347 }, { "epoch": 0.08018433179723503, "grad_norm": 0.48343962912190763, "learning_rate": 1.5990783410138248e-06, "loss": 1.140712022781372, "step": 348 }, { "epoch": 0.0804147465437788, "grad_norm": 0.3519464028715392, "learning_rate": 1.6036866359447004e-06, "loss": 1.0155198574066162, "step": 349 }, { "epoch": 0.08064516129032258, "grad_norm": 0.4719922940268299, "learning_rate": 1.608294930875576e-06, "loss": 1.0673280954360962, "step": 350 }, { "epoch": 0.08087557603686636, "grad_norm": 0.44336917730159625, "learning_rate": 1.6129032258064514e-06, "loss": 1.1061692237854004, "step": 351 }, { "epoch": 0.08110599078341015, "grad_norm": 0.6227306591455409, "learning_rate": 1.6175115207373272e-06, "loss": 1.0120354890823364, "step": 352 }, { "epoch": 0.08133640552995391, "grad_norm": 0.5343939607764295, "learning_rate": 1.6221198156682028e-06, "loss": 1.1260986328125, "step": 353 }, { "epoch": 0.0815668202764977, "grad_norm": 0.514597043189326, "learning_rate": 1.6267281105990782e-06, "loss": 1.0376214981079102, "step": 354 }, { "epoch": 0.08179723502304148, "grad_norm": 0.41314458702115897, "learning_rate": 1.631336405529954e-06, "loss": 1.0802130699157715, "step": 355 }, { "epoch": 0.08202764976958525, "grad_norm": 0.5113844239661658, "learning_rate": 1.6359447004608294e-06, "loss": 1.217378854751587, "step": 356 }, { "epoch": 0.08225806451612903, "grad_norm": 0.3681604891354872, "learning_rate": 1.640552995391705e-06, "loss": 0.9107617139816284, "step": 357 }, { "epoch": 0.08248847926267282, "grad_norm": 0.4567828094638987, "learning_rate": 1.6451612903225808e-06, "loss": 1.089385986328125, "step": 358 }, { "epoch": 0.08271889400921659, "grad_norm": 0.42382031863730735, "learning_rate": 1.6497695852534561e-06, "loss": 1.1420392990112305, "step": 359 }, { "epoch": 0.08294930875576037, "grad_norm": 0.4385300551654332, "learning_rate": 1.6543778801843317e-06, "loss": 0.9308648705482483, "step": 360 }, { "epoch": 0.08317972350230415, "grad_norm": 0.4691608891369802, "learning_rate": 1.6589861751152071e-06, "loss": 0.9463413953781128, "step": 361 }, { "epoch": 0.08341013824884792, "grad_norm": 0.4312953553305326, "learning_rate": 1.663594470046083e-06, "loss": 1.0364834070205688, "step": 362 }, { "epoch": 0.0836405529953917, "grad_norm": 0.4601141894995307, "learning_rate": 1.6682027649769585e-06, "loss": 0.9992797374725342, "step": 363 }, { "epoch": 0.08387096774193549, "grad_norm": 0.4110829230093509, "learning_rate": 1.672811059907834e-06, "loss": 0.9862687587738037, "step": 364 }, { "epoch": 0.08410138248847926, "grad_norm": 0.538237007116734, "learning_rate": 1.6774193548387097e-06, "loss": 1.0882744789123535, "step": 365 }, { "epoch": 0.08433179723502304, "grad_norm": 0.38129891099780466, "learning_rate": 1.682027649769585e-06, "loss": 0.9217149615287781, "step": 366 }, { "epoch": 0.08456221198156683, "grad_norm": 0.47566005804946043, "learning_rate": 1.6866359447004607e-06, "loss": 1.0384632349014282, "step": 367 }, { "epoch": 0.0847926267281106, "grad_norm": 0.41334228678230484, "learning_rate": 1.6912442396313363e-06, "loss": 0.8760565519332886, "step": 368 }, { "epoch": 0.08502304147465438, "grad_norm": 0.38194490761666694, "learning_rate": 1.6958525345622119e-06, "loss": 0.9868614077568054, "step": 369 }, { "epoch": 0.08525345622119816, "grad_norm": 0.41853356164893474, "learning_rate": 1.7004608294930875e-06, "loss": 1.0386936664581299, "step": 370 }, { "epoch": 0.08548387096774193, "grad_norm": 0.4969082634490474, "learning_rate": 1.705069124423963e-06, "loss": 1.2201364040374756, "step": 371 }, { "epoch": 0.08571428571428572, "grad_norm": 0.45684500070085005, "learning_rate": 1.7096774193548387e-06, "loss": 0.9892920255661011, "step": 372 }, { "epoch": 0.0859447004608295, "grad_norm": 0.3411435628885881, "learning_rate": 1.714285714285714e-06, "loss": 0.9379667639732361, "step": 373 }, { "epoch": 0.08617511520737327, "grad_norm": 0.4493279942699278, "learning_rate": 1.7188940092165899e-06, "loss": 1.0150624513626099, "step": 374 }, { "epoch": 0.08640552995391705, "grad_norm": 0.3873317793310882, "learning_rate": 1.7235023041474655e-06, "loss": 0.8724589943885803, "step": 375 }, { "epoch": 0.08663594470046083, "grad_norm": 0.451020649692039, "learning_rate": 1.7281105990783408e-06, "loss": 1.005715012550354, "step": 376 }, { "epoch": 0.0868663594470046, "grad_norm": 0.40515583321904614, "learning_rate": 1.7327188940092167e-06, "loss": 1.0238345861434937, "step": 377 }, { "epoch": 0.08709677419354839, "grad_norm": 0.5713304603163627, "learning_rate": 1.737327188940092e-06, "loss": 1.061020851135254, "step": 378 }, { "epoch": 0.08732718894009217, "grad_norm": 0.31543151666242697, "learning_rate": 1.7419354838709676e-06, "loss": 0.8607133626937866, "step": 379 }, { "epoch": 0.08755760368663594, "grad_norm": 0.396586403800106, "learning_rate": 1.7465437788018434e-06, "loss": 0.9070740938186646, "step": 380 }, { "epoch": 0.08778801843317972, "grad_norm": 0.4114853593210784, "learning_rate": 1.7511520737327188e-06, "loss": 0.993092954158783, "step": 381 }, { "epoch": 0.08801843317972351, "grad_norm": 0.5030976624666732, "learning_rate": 1.7557603686635944e-06, "loss": 1.1119567155838013, "step": 382 }, { "epoch": 0.08824884792626728, "grad_norm": 0.3947649464982104, "learning_rate": 1.7603686635944698e-06, "loss": 1.030786395072937, "step": 383 }, { "epoch": 0.08847926267281106, "grad_norm": 0.413233744996873, "learning_rate": 1.7649769585253456e-06, "loss": 1.0578559637069702, "step": 384 }, { "epoch": 0.08870967741935484, "grad_norm": 0.5116874225270758, "learning_rate": 1.7695852534562212e-06, "loss": 1.1282391548156738, "step": 385 }, { "epoch": 0.08894009216589861, "grad_norm": 0.36883704269137796, "learning_rate": 1.7741935483870966e-06, "loss": 0.7838784456253052, "step": 386 }, { "epoch": 0.0891705069124424, "grad_norm": 0.4028833159886203, "learning_rate": 1.7788018433179724e-06, "loss": 0.9244300127029419, "step": 387 }, { "epoch": 0.08940092165898618, "grad_norm": 0.37786066556159736, "learning_rate": 1.7834101382488478e-06, "loss": 0.916866660118103, "step": 388 }, { "epoch": 0.08963133640552995, "grad_norm": 0.3466207390337416, "learning_rate": 1.7880184331797234e-06, "loss": 0.9918155670166016, "step": 389 }, { "epoch": 0.08986175115207373, "grad_norm": 0.49146787648511026, "learning_rate": 1.792626728110599e-06, "loss": 0.9879001379013062, "step": 390 }, { "epoch": 0.09009216589861752, "grad_norm": 0.4467876721911936, "learning_rate": 1.7972350230414746e-06, "loss": 1.0252082347869873, "step": 391 }, { "epoch": 0.09032258064516129, "grad_norm": 0.4519090202816701, "learning_rate": 1.8018433179723502e-06, "loss": 1.0376901626586914, "step": 392 }, { "epoch": 0.09055299539170507, "grad_norm": 0.4158305964101772, "learning_rate": 1.8064516129032258e-06, "loss": 1.0237072706222534, "step": 393 }, { "epoch": 0.09078341013824885, "grad_norm": 0.3903660894092682, "learning_rate": 1.8110599078341013e-06, "loss": 1.004181146621704, "step": 394 }, { "epoch": 0.09101382488479262, "grad_norm": 0.4844697096481946, "learning_rate": 1.8156682027649767e-06, "loss": 1.1162958145141602, "step": 395 }, { "epoch": 0.0912442396313364, "grad_norm": 0.43484007077470926, "learning_rate": 1.8202764976958525e-06, "loss": 0.9634548425674438, "step": 396 }, { "epoch": 0.09147465437788019, "grad_norm": 0.34256483920586434, "learning_rate": 1.8248847926267281e-06, "loss": 0.9306463599205017, "step": 397 }, { "epoch": 0.09170506912442396, "grad_norm": 0.4291772843094685, "learning_rate": 1.8294930875576035e-06, "loss": 1.0243630409240723, "step": 398 }, { "epoch": 0.09193548387096774, "grad_norm": 0.37150575195192614, "learning_rate": 1.8341013824884793e-06, "loss": 0.9261370897293091, "step": 399 }, { "epoch": 0.09216589861751152, "grad_norm": 0.41574639263883495, "learning_rate": 1.8387096774193547e-06, "loss": 0.9929264783859253, "step": 400 }, { "epoch": 0.0923963133640553, "grad_norm": 0.4086620199652483, "learning_rate": 1.8433179723502303e-06, "loss": 1.0245590209960938, "step": 401 }, { "epoch": 0.09262672811059908, "grad_norm": 0.4485366734014856, "learning_rate": 1.8479262672811061e-06, "loss": 0.9801148176193237, "step": 402 }, { "epoch": 0.09285714285714286, "grad_norm": 0.48045286204627596, "learning_rate": 1.8525345622119815e-06, "loss": 1.181383728981018, "step": 403 }, { "epoch": 0.09308755760368663, "grad_norm": 0.41845043157279344, "learning_rate": 1.857142857142857e-06, "loss": 0.9493411779403687, "step": 404 }, { "epoch": 0.09331797235023041, "grad_norm": 0.4897744794150158, "learning_rate": 1.8617511520737325e-06, "loss": 1.1096491813659668, "step": 405 }, { "epoch": 0.0935483870967742, "grad_norm": 0.4480175053230346, "learning_rate": 1.8663594470046083e-06, "loss": 1.1019275188446045, "step": 406 }, { "epoch": 0.09377880184331797, "grad_norm": 0.3732577959232657, "learning_rate": 1.8709677419354837e-06, "loss": 0.973988950252533, "step": 407 }, { "epoch": 0.09400921658986175, "grad_norm": 0.4400203989690802, "learning_rate": 1.8755760368663593e-06, "loss": 1.1670622825622559, "step": 408 }, { "epoch": 0.09423963133640553, "grad_norm": 0.3329146322312322, "learning_rate": 1.880184331797235e-06, "loss": 0.8550488948822021, "step": 409 }, { "epoch": 0.0944700460829493, "grad_norm": 0.4080056832475701, "learning_rate": 1.8847926267281104e-06, "loss": 1.0501651763916016, "step": 410 }, { "epoch": 0.09470046082949309, "grad_norm": 0.4667020783139675, "learning_rate": 1.889400921658986e-06, "loss": 1.1323202848434448, "step": 411 }, { "epoch": 0.09493087557603687, "grad_norm": 0.4438011539128225, "learning_rate": 1.8940092165898616e-06, "loss": 1.168154001235962, "step": 412 }, { "epoch": 0.09516129032258064, "grad_norm": 0.5043395094497101, "learning_rate": 1.8986175115207372e-06, "loss": 1.0667431354522705, "step": 413 }, { "epoch": 0.09539170506912442, "grad_norm": 0.42921175733784445, "learning_rate": 1.9032258064516128e-06, "loss": 1.1447162628173828, "step": 414 }, { "epoch": 0.0956221198156682, "grad_norm": 0.42501454608228506, "learning_rate": 1.9078341013824884e-06, "loss": 0.9403433799743652, "step": 415 }, { "epoch": 0.09585253456221199, "grad_norm": 0.4016688989337606, "learning_rate": 1.912442396313364e-06, "loss": 0.9837527275085449, "step": 416 }, { "epoch": 0.09608294930875576, "grad_norm": 0.422068085350648, "learning_rate": 1.9170506912442396e-06, "loss": 1.071333408355713, "step": 417 }, { "epoch": 0.09631336405529954, "grad_norm": 0.5124388054628781, "learning_rate": 1.921658986175115e-06, "loss": 1.0156168937683105, "step": 418 }, { "epoch": 0.09654377880184332, "grad_norm": 0.4338501331744671, "learning_rate": 1.926267281105991e-06, "loss": 0.9705266952514648, "step": 419 }, { "epoch": 0.0967741935483871, "grad_norm": 0.407144156286867, "learning_rate": 1.930875576036866e-06, "loss": 1.0570204257965088, "step": 420 }, { "epoch": 0.09700460829493088, "grad_norm": 0.43729360857600713, "learning_rate": 1.935483870967742e-06, "loss": 1.141861915588379, "step": 421 }, { "epoch": 0.09723502304147466, "grad_norm": 0.4507835554387818, "learning_rate": 1.9400921658986174e-06, "loss": 0.9849745631217957, "step": 422 }, { "epoch": 0.09746543778801843, "grad_norm": 0.4932195036683519, "learning_rate": 1.944700460829493e-06, "loss": 1.0279912948608398, "step": 423 }, { "epoch": 0.09769585253456221, "grad_norm": 0.4014365475110759, "learning_rate": 1.9493087557603686e-06, "loss": 1.0707788467407227, "step": 424 }, { "epoch": 0.097926267281106, "grad_norm": 0.37856248369077095, "learning_rate": 1.953917050691244e-06, "loss": 0.9391129016876221, "step": 425 }, { "epoch": 0.09815668202764977, "grad_norm": 0.3604046417791118, "learning_rate": 1.9585253456221198e-06, "loss": 0.9792884588241577, "step": 426 }, { "epoch": 0.09838709677419355, "grad_norm": 0.42091691400517506, "learning_rate": 1.963133640552995e-06, "loss": 1.0111792087554932, "step": 427 }, { "epoch": 0.09861751152073733, "grad_norm": 0.2951881364083913, "learning_rate": 1.967741935483871e-06, "loss": 1.0020272731781006, "step": 428 }, { "epoch": 0.0988479262672811, "grad_norm": 0.42473763380817414, "learning_rate": 1.9723502304147463e-06, "loss": 1.1002991199493408, "step": 429 }, { "epoch": 0.09907834101382489, "grad_norm": 0.3977328364337887, "learning_rate": 1.976958525345622e-06, "loss": 0.9656131267547607, "step": 430 }, { "epoch": 0.09930875576036867, "grad_norm": 0.4163794190517341, "learning_rate": 1.9815668202764975e-06, "loss": 1.1845166683197021, "step": 431 }, { "epoch": 0.09953917050691244, "grad_norm": 0.4102761511182145, "learning_rate": 1.9861751152073733e-06, "loss": 0.8743879795074463, "step": 432 }, { "epoch": 0.09976958525345622, "grad_norm": 0.48299006340600875, "learning_rate": 1.9907834101382487e-06, "loss": 1.0800082683563232, "step": 433 }, { "epoch": 0.1, "grad_norm": 0.39412754669182365, "learning_rate": 1.995391705069124e-06, "loss": 1.0410808324813843, "step": 434 }, { "epoch": 0.10023041474654378, "grad_norm": 0.4817128357084655, "learning_rate": 2e-06, "loss": 1.0214624404907227, "step": 435 }, { "epoch": 0.10046082949308756, "grad_norm": 0.4738161753055533, "learning_rate": 1.9999999274256618e-06, "loss": 1.0304028987884521, "step": 436 }, { "epoch": 0.10069124423963134, "grad_norm": 0.3946923205513698, "learning_rate": 1.9999997097026583e-06, "loss": 1.0457626581192017, "step": 437 }, { "epoch": 0.10092165898617511, "grad_norm": 0.43567215904100204, "learning_rate": 1.9999993468310205e-06, "loss": 0.9837691187858582, "step": 438 }, { "epoch": 0.1011520737327189, "grad_norm": 0.5216317957588074, "learning_rate": 1.9999988388108013e-06, "loss": 1.0819612741470337, "step": 439 }, { "epoch": 0.10138248847926268, "grad_norm": 0.31182314858852395, "learning_rate": 1.9999981856420743e-06, "loss": 1.0417449474334717, "step": 440 }, { "epoch": 0.10161290322580645, "grad_norm": 0.5477105048499294, "learning_rate": 1.999997387324935e-06, "loss": 1.0501068830490112, "step": 441 }, { "epoch": 0.10184331797235023, "grad_norm": 0.4106183150059033, "learning_rate": 1.999996443859498e-06, "loss": 1.0635120868682861, "step": 442 }, { "epoch": 0.10207373271889401, "grad_norm": 0.4873224989082174, "learning_rate": 1.999995355245902e-06, "loss": 0.9732234477996826, "step": 443 }, { "epoch": 0.10230414746543778, "grad_norm": 0.3718846857755592, "learning_rate": 1.9999941214843034e-06, "loss": 0.9493811130523682, "step": 444 }, { "epoch": 0.10253456221198157, "grad_norm": 0.5595191439491263, "learning_rate": 1.9999927425748817e-06, "loss": 1.1455141305923462, "step": 445 }, { "epoch": 0.10276497695852535, "grad_norm": 0.4237177518607636, "learning_rate": 1.9999912185178374e-06, "loss": 0.9341592788696289, "step": 446 }, { "epoch": 0.10299539170506912, "grad_norm": 0.3913224265375377, "learning_rate": 1.9999895493133916e-06, "loss": 0.9535608291625977, "step": 447 }, { "epoch": 0.1032258064516129, "grad_norm": 0.4687207319213409, "learning_rate": 1.999987734961787e-06, "loss": 1.1977221965789795, "step": 448 }, { "epoch": 0.10345622119815669, "grad_norm": 0.45995634872516833, "learning_rate": 1.999985775463286e-06, "loss": 1.1658375263214111, "step": 449 }, { "epoch": 0.10368663594470046, "grad_norm": 0.47830181543951694, "learning_rate": 1.9999836708181734e-06, "loss": 1.1171612739562988, "step": 450 }, { "epoch": 0.10391705069124424, "grad_norm": 0.3823354001067843, "learning_rate": 1.999981421026755e-06, "loss": 1.0864373445510864, "step": 451 }, { "epoch": 0.10414746543778802, "grad_norm": 0.43518989690984766, "learning_rate": 1.999979026089357e-06, "loss": 1.1211299896240234, "step": 452 }, { "epoch": 0.10437788018433179, "grad_norm": 0.45163820634554874, "learning_rate": 1.9999764860063277e-06, "loss": 1.071751594543457, "step": 453 }, { "epoch": 0.10460829493087558, "grad_norm": 0.3749468590501543, "learning_rate": 1.9999738007780347e-06, "loss": 1.0377576351165771, "step": 454 }, { "epoch": 0.10483870967741936, "grad_norm": 0.42625340690366553, "learning_rate": 1.9999709704048685e-06, "loss": 0.9658410549163818, "step": 455 }, { "epoch": 0.10506912442396313, "grad_norm": 0.4022888050751363, "learning_rate": 1.9999679948872395e-06, "loss": 0.9070194959640503, "step": 456 }, { "epoch": 0.10529953917050691, "grad_norm": 0.5570523464378584, "learning_rate": 1.9999648742255803e-06, "loss": 1.2197664976119995, "step": 457 }, { "epoch": 0.1055299539170507, "grad_norm": 0.3961372853294897, "learning_rate": 1.9999616084203426e-06, "loss": 0.9032889604568481, "step": 458 }, { "epoch": 0.10576036866359446, "grad_norm": 0.39060467678942784, "learning_rate": 1.9999581974720017e-06, "loss": 0.9458762407302856, "step": 459 }, { "epoch": 0.10599078341013825, "grad_norm": 0.5068153216782157, "learning_rate": 1.9999546413810526e-06, "loss": 1.0024757385253906, "step": 460 }, { "epoch": 0.10622119815668203, "grad_norm": 0.38148764403186025, "learning_rate": 1.9999509401480108e-06, "loss": 0.9499050378799438, "step": 461 }, { "epoch": 0.1064516129032258, "grad_norm": 0.4354491299812492, "learning_rate": 1.9999470937734132e-06, "loss": 1.0764188766479492, "step": 462 }, { "epoch": 0.10668202764976958, "grad_norm": 0.42800401210878014, "learning_rate": 1.9999431022578194e-06, "loss": 0.9858300089836121, "step": 463 }, { "epoch": 0.10691244239631337, "grad_norm": 0.41132718920336847, "learning_rate": 1.999938965601808e-06, "loss": 0.8965580463409424, "step": 464 }, { "epoch": 0.10714285714285714, "grad_norm": 0.39699129711694964, "learning_rate": 1.9999346838059788e-06, "loss": 0.8860410451889038, "step": 465 }, { "epoch": 0.10737327188940092, "grad_norm": 0.48300723462768347, "learning_rate": 1.9999302568709546e-06, "loss": 1.0621274709701538, "step": 466 }, { "epoch": 0.1076036866359447, "grad_norm": 0.45149909069714367, "learning_rate": 1.9999256847973774e-06, "loss": 0.8894643783569336, "step": 467 }, { "epoch": 0.10783410138248847, "grad_norm": 0.3529913357119793, "learning_rate": 1.999920967585911e-06, "loss": 0.98856520652771, "step": 468 }, { "epoch": 0.10806451612903226, "grad_norm": 0.3260735960256147, "learning_rate": 1.999916105237239e-06, "loss": 0.7885239124298096, "step": 469 }, { "epoch": 0.10829493087557604, "grad_norm": 0.4477697599226733, "learning_rate": 1.9999110977520687e-06, "loss": 1.0274477005004883, "step": 470 }, { "epoch": 0.10852534562211981, "grad_norm": 0.3938409891368368, "learning_rate": 1.999905945131126e-06, "loss": 0.8672109842300415, "step": 471 }, { "epoch": 0.10875576036866359, "grad_norm": 0.37173415889586336, "learning_rate": 1.9999006473751594e-06, "loss": 0.852576732635498, "step": 472 }, { "epoch": 0.10898617511520738, "grad_norm": 0.3670138423827908, "learning_rate": 1.9998952044849375e-06, "loss": 0.9553557634353638, "step": 473 }, { "epoch": 0.10921658986175115, "grad_norm": 0.4402707979796638, "learning_rate": 1.99988961646125e-06, "loss": 1.1375620365142822, "step": 474 }, { "epoch": 0.10944700460829493, "grad_norm": 0.4045716386517098, "learning_rate": 1.9998838833049083e-06, "loss": 0.9653681516647339, "step": 475 }, { "epoch": 0.10967741935483871, "grad_norm": 0.3653559897200667, "learning_rate": 1.999878005016745e-06, "loss": 1.1139185428619385, "step": 476 }, { "epoch": 0.10990783410138248, "grad_norm": 0.37459420946595523, "learning_rate": 1.9998719815976127e-06, "loss": 0.8375418186187744, "step": 477 }, { "epoch": 0.11013824884792627, "grad_norm": 0.33053822521695836, "learning_rate": 1.999865813048386e-06, "loss": 1.0005979537963867, "step": 478 }, { "epoch": 0.11036866359447005, "grad_norm": 0.39083306344420843, "learning_rate": 1.99985949936996e-06, "loss": 0.8499772548675537, "step": 479 }, { "epoch": 0.11059907834101383, "grad_norm": 0.3575835338316839, "learning_rate": 1.999853040563252e-06, "loss": 0.9805284738540649, "step": 480 }, { "epoch": 0.1108294930875576, "grad_norm": 0.43340835059987204, "learning_rate": 1.9998464366291983e-06, "loss": 0.9462177753448486, "step": 481 }, { "epoch": 0.11105990783410138, "grad_norm": 0.44706726559657484, "learning_rate": 1.999839687568758e-06, "loss": 1.1023187637329102, "step": 482 }, { "epoch": 0.11129032258064517, "grad_norm": 0.3754824087757579, "learning_rate": 1.9998327933829103e-06, "loss": 0.9361279010772705, "step": 483 }, { "epoch": 0.11152073732718894, "grad_norm": 0.38419186899738067, "learning_rate": 1.9998257540726567e-06, "loss": 0.9811379909515381, "step": 484 }, { "epoch": 0.11175115207373272, "grad_norm": 0.4030421476721474, "learning_rate": 1.9998185696390184e-06, "loss": 1.0246069431304932, "step": 485 }, { "epoch": 0.1119815668202765, "grad_norm": 0.4555360249805513, "learning_rate": 1.9998112400830385e-06, "loss": 1.0614899396896362, "step": 486 }, { "epoch": 0.11221198156682027, "grad_norm": 0.4347652169333907, "learning_rate": 1.9998037654057803e-06, "loss": 1.02305269241333, "step": 487 }, { "epoch": 0.11244239631336406, "grad_norm": 0.43672158413630835, "learning_rate": 1.999796145608329e-06, "loss": 1.044907808303833, "step": 488 }, { "epoch": 0.11267281105990784, "grad_norm": 0.4917956866782855, "learning_rate": 1.999788380691791e-06, "loss": 0.9669852256774902, "step": 489 }, { "epoch": 0.11290322580645161, "grad_norm": 0.3857920087478492, "learning_rate": 1.9997804706572933e-06, "loss": 1.0235236883163452, "step": 490 }, { "epoch": 0.1131336405529954, "grad_norm": 0.4541175977583441, "learning_rate": 1.9997724155059835e-06, "loss": 0.8982692360877991, "step": 491 }, { "epoch": 0.11336405529953918, "grad_norm": 0.481910238333043, "learning_rate": 1.9997642152390312e-06, "loss": 0.8390282988548279, "step": 492 }, { "epoch": 0.11359447004608295, "grad_norm": 0.39882686276748835, "learning_rate": 1.9997558698576266e-06, "loss": 0.8938695192337036, "step": 493 }, { "epoch": 0.11382488479262673, "grad_norm": 0.5064684870077569, "learning_rate": 1.9997473793629813e-06, "loss": 0.9747422933578491, "step": 494 }, { "epoch": 0.11405529953917051, "grad_norm": 0.443509358045386, "learning_rate": 1.999738743756327e-06, "loss": 1.050918698310852, "step": 495 }, { "epoch": 0.11428571428571428, "grad_norm": 0.5368423996158629, "learning_rate": 1.9997299630389174e-06, "loss": 0.9169312715530396, "step": 496 }, { "epoch": 0.11451612903225807, "grad_norm": 0.452695866401899, "learning_rate": 1.9997210372120272e-06, "loss": 1.0258065462112427, "step": 497 }, { "epoch": 0.11474654377880185, "grad_norm": 0.3831239007423439, "learning_rate": 1.9997119662769523e-06, "loss": 1.066356897354126, "step": 498 }, { "epoch": 0.11497695852534562, "grad_norm": 0.4319474855040805, "learning_rate": 1.9997027502350086e-06, "loss": 1.0336101055145264, "step": 499 }, { "epoch": 0.1152073732718894, "grad_norm": 0.36856882435983085, "learning_rate": 1.9996933890875342e-06, "loss": 1.0434989929199219, "step": 500 }, { "epoch": 0.11543778801843319, "grad_norm": 0.4366750071509639, "learning_rate": 1.9996838828358876e-06, "loss": 1.0081424713134766, "step": 501 }, { "epoch": 0.11566820276497695, "grad_norm": 0.4424253641379215, "learning_rate": 1.999674231481449e-06, "loss": 1.0998575687408447, "step": 502 }, { "epoch": 0.11589861751152074, "grad_norm": 0.43915567985422416, "learning_rate": 1.9996644350256193e-06, "loss": 1.0325868129730225, "step": 503 }, { "epoch": 0.11612903225806452, "grad_norm": 0.39758687932867864, "learning_rate": 1.99965449346982e-06, "loss": 1.0520741939544678, "step": 504 }, { "epoch": 0.11635944700460829, "grad_norm": 0.4373332869451062, "learning_rate": 1.9996444068154943e-06, "loss": 0.9355484247207642, "step": 505 }, { "epoch": 0.11658986175115207, "grad_norm": 0.478944942365821, "learning_rate": 1.9996341750641067e-06, "loss": 1.2088062763214111, "step": 506 }, { "epoch": 0.11682027649769586, "grad_norm": 0.45703939880277317, "learning_rate": 1.9996237982171416e-06, "loss": 1.007477045059204, "step": 507 }, { "epoch": 0.11705069124423963, "grad_norm": 0.516029780444843, "learning_rate": 1.9996132762761054e-06, "loss": 0.9528911113739014, "step": 508 }, { "epoch": 0.11728110599078341, "grad_norm": 0.44144049831872473, "learning_rate": 1.9996026092425258e-06, "loss": 1.0906065702438354, "step": 509 }, { "epoch": 0.1175115207373272, "grad_norm": 0.45635386377861326, "learning_rate": 1.9995917971179507e-06, "loss": 1.1328812837600708, "step": 510 }, { "epoch": 0.11774193548387096, "grad_norm": 0.5010986511700435, "learning_rate": 1.9995808399039493e-06, "loss": 1.1367099285125732, "step": 511 }, { "epoch": 0.11797235023041475, "grad_norm": 0.5738525299064665, "learning_rate": 1.999569737602112e-06, "loss": 1.22605562210083, "step": 512 }, { "epoch": 0.11820276497695853, "grad_norm": 0.40700112362856533, "learning_rate": 1.9995584902140514e-06, "loss": 0.8814148306846619, "step": 513 }, { "epoch": 0.1184331797235023, "grad_norm": 0.4018062947026822, "learning_rate": 1.9995470977413988e-06, "loss": 0.916766881942749, "step": 514 }, { "epoch": 0.11866359447004608, "grad_norm": 0.3907370494982875, "learning_rate": 1.999535560185808e-06, "loss": 0.8088599443435669, "step": 515 }, { "epoch": 0.11889400921658987, "grad_norm": 0.5585215819507526, "learning_rate": 1.9995238775489538e-06, "loss": 1.0029397010803223, "step": 516 }, { "epoch": 0.11912442396313364, "grad_norm": 0.47103060321263474, "learning_rate": 1.9995120498325322e-06, "loss": 1.157515287399292, "step": 517 }, { "epoch": 0.11935483870967742, "grad_norm": 0.43934234876750516, "learning_rate": 1.99950007703826e-06, "loss": 0.989453911781311, "step": 518 }, { "epoch": 0.1195852534562212, "grad_norm": 0.501533126043576, "learning_rate": 1.999487959167874e-06, "loss": 0.9791898727416992, "step": 519 }, { "epoch": 0.11981566820276497, "grad_norm": 0.3947583681206324, "learning_rate": 1.9994756962231343e-06, "loss": 0.9994203448295593, "step": 520 }, { "epoch": 0.12004608294930876, "grad_norm": 0.4064680989752179, "learning_rate": 1.999463288205821e-06, "loss": 0.9096299409866333, "step": 521 }, { "epoch": 0.12027649769585254, "grad_norm": 0.5675118509929592, "learning_rate": 1.999450735117734e-06, "loss": 0.9956046342849731, "step": 522 }, { "epoch": 0.12050691244239631, "grad_norm": 0.40854646192247485, "learning_rate": 1.9994380369606956e-06, "loss": 1.0336079597473145, "step": 523 }, { "epoch": 0.12073732718894009, "grad_norm": 0.4028964743045085, "learning_rate": 1.99942519373655e-06, "loss": 0.8828116655349731, "step": 524 }, { "epoch": 0.12096774193548387, "grad_norm": 0.4113573248244064, "learning_rate": 1.9994122054471597e-06, "loss": 0.8733093738555908, "step": 525 }, { "epoch": 0.12119815668202764, "grad_norm": 0.4633889976755098, "learning_rate": 1.9993990720944114e-06, "loss": 1.0312494039535522, "step": 526 }, { "epoch": 0.12142857142857143, "grad_norm": 0.39342421435973574, "learning_rate": 1.9993857936802105e-06, "loss": 0.9229701161384583, "step": 527 }, { "epoch": 0.12165898617511521, "grad_norm": 0.4629141668744642, "learning_rate": 1.9993723702064853e-06, "loss": 0.8980100154876709, "step": 528 }, { "epoch": 0.12188940092165898, "grad_norm": 0.42208035145091816, "learning_rate": 1.999358801675183e-06, "loss": 0.939933180809021, "step": 529 }, { "epoch": 0.12211981566820276, "grad_norm": 0.3966309171286601, "learning_rate": 1.9993450880882733e-06, "loss": 1.0014444589614868, "step": 530 }, { "epoch": 0.12235023041474655, "grad_norm": 0.4166874579150977, "learning_rate": 1.9993312294477477e-06, "loss": 0.9995889663696289, "step": 531 }, { "epoch": 0.12258064516129032, "grad_norm": 0.37598019229960666, "learning_rate": 1.9993172257556167e-06, "loss": 1.0010197162628174, "step": 532 }, { "epoch": 0.1228110599078341, "grad_norm": 0.3629842057209114, "learning_rate": 1.9993030770139135e-06, "loss": 0.972966194152832, "step": 533 }, { "epoch": 0.12304147465437788, "grad_norm": 0.4160633061352588, "learning_rate": 1.9992887832246917e-06, "loss": 0.8033444881439209, "step": 534 }, { "epoch": 0.12327188940092165, "grad_norm": 0.3895553967201257, "learning_rate": 1.9992743443900254e-06, "loss": 0.7532742619514465, "step": 535 }, { "epoch": 0.12350230414746544, "grad_norm": 0.46964696388446997, "learning_rate": 1.9992597605120113e-06, "loss": 1.058760643005371, "step": 536 }, { "epoch": 0.12373271889400922, "grad_norm": 0.37591416731208094, "learning_rate": 1.9992450315927658e-06, "loss": 0.8559634685516357, "step": 537 }, { "epoch": 0.12396313364055299, "grad_norm": 0.4216079229956694, "learning_rate": 1.9992301576344267e-06, "loss": 1.053638219833374, "step": 538 }, { "epoch": 0.12419354838709677, "grad_norm": 0.5423293655738015, "learning_rate": 1.9992151386391528e-06, "loss": 0.8841970562934875, "step": 539 }, { "epoch": 0.12442396313364056, "grad_norm": 0.5667972752402203, "learning_rate": 1.9991999746091247e-06, "loss": 0.9355173110961914, "step": 540 }, { "epoch": 0.12465437788018432, "grad_norm": 0.43323548094659586, "learning_rate": 1.999184665546543e-06, "loss": 0.9978284239768982, "step": 541 }, { "epoch": 0.12488479262672811, "grad_norm": 0.4166718713190779, "learning_rate": 1.99916921145363e-06, "loss": 0.8855264782905579, "step": 542 }, { "epoch": 0.1251152073732719, "grad_norm": 0.5314416958418489, "learning_rate": 1.9991536123326283e-06, "loss": 0.885519802570343, "step": 543 }, { "epoch": 0.12534562211981568, "grad_norm": 0.4381118612604031, "learning_rate": 1.9991378681858024e-06, "loss": 0.9772528409957886, "step": 544 }, { "epoch": 0.12557603686635946, "grad_norm": 0.46876887659201405, "learning_rate": 1.999121979015438e-06, "loss": 0.8817745447158813, "step": 545 }, { "epoch": 0.12580645161290321, "grad_norm": 0.36530562318650095, "learning_rate": 1.9991059448238404e-06, "loss": 0.9374080896377563, "step": 546 }, { "epoch": 0.126036866359447, "grad_norm": 0.3669313811039727, "learning_rate": 1.9990897656133383e-06, "loss": 0.9174116253852844, "step": 547 }, { "epoch": 0.12626728110599078, "grad_norm": 0.401361126928626, "learning_rate": 1.999073441386279e-06, "loss": 0.9514039158821106, "step": 548 }, { "epoch": 0.12649769585253456, "grad_norm": 0.4665811721686224, "learning_rate": 1.999056972145032e-06, "loss": 1.10535728931427, "step": 549 }, { "epoch": 0.12672811059907835, "grad_norm": 0.4609610092344924, "learning_rate": 1.999040357891989e-06, "loss": 1.0641597509384155, "step": 550 }, { "epoch": 0.12695852534562213, "grad_norm": 0.39409304359090785, "learning_rate": 1.99902359862956e-06, "loss": 0.9596017599105835, "step": 551 }, { "epoch": 0.1271889400921659, "grad_norm": 0.4899166130843387, "learning_rate": 1.9990066943601777e-06, "loss": 1.083927869796753, "step": 552 }, { "epoch": 0.12741935483870967, "grad_norm": 0.42007806110658624, "learning_rate": 1.998989645086297e-06, "loss": 0.9146738052368164, "step": 553 }, { "epoch": 0.12764976958525345, "grad_norm": 0.41224202627344914, "learning_rate": 1.998972450810391e-06, "loss": 0.9038050770759583, "step": 554 }, { "epoch": 0.12788018433179724, "grad_norm": 0.45759233489952406, "learning_rate": 1.9989551115349574e-06, "loss": 0.973220705986023, "step": 555 }, { "epoch": 0.12811059907834102, "grad_norm": 0.424280511041039, "learning_rate": 1.998937627262511e-06, "loss": 0.8804281949996948, "step": 556 }, { "epoch": 0.1283410138248848, "grad_norm": 0.47603807991909786, "learning_rate": 1.9989199979955903e-06, "loss": 1.100919485092163, "step": 557 }, { "epoch": 0.12857142857142856, "grad_norm": 0.5871199693144976, "learning_rate": 1.998902223736755e-06, "loss": 1.1152353286743164, "step": 558 }, { "epoch": 0.12880184331797234, "grad_norm": 0.4236469989661471, "learning_rate": 1.9988843044885837e-06, "loss": 1.0721793174743652, "step": 559 }, { "epoch": 0.12903225806451613, "grad_norm": 0.4234271408586371, "learning_rate": 1.9988662402536783e-06, "loss": 0.9035133123397827, "step": 560 }, { "epoch": 0.1292626728110599, "grad_norm": 0.4210053632602843, "learning_rate": 1.9988480310346603e-06, "loss": 1.0053937435150146, "step": 561 }, { "epoch": 0.1294930875576037, "grad_norm": 0.5230478085674195, "learning_rate": 1.9988296768341728e-06, "loss": 0.8536228537559509, "step": 562 }, { "epoch": 0.12972350230414748, "grad_norm": 0.41493118398063783, "learning_rate": 1.9988111776548797e-06, "loss": 0.9673396348953247, "step": 563 }, { "epoch": 0.12995391705069123, "grad_norm": 0.36295429679681995, "learning_rate": 1.998792533499467e-06, "loss": 0.9402456879615784, "step": 564 }, { "epoch": 0.13018433179723501, "grad_norm": 0.3983153697524455, "learning_rate": 1.99877374437064e-06, "loss": 0.8900678157806396, "step": 565 }, { "epoch": 0.1304147465437788, "grad_norm": 0.47587146443270817, "learning_rate": 1.9987548102711264e-06, "loss": 0.9112892150878906, "step": 566 }, { "epoch": 0.13064516129032258, "grad_norm": 0.3969666466780631, "learning_rate": 1.9987357312036743e-06, "loss": 0.763452410697937, "step": 567 }, { "epoch": 0.13087557603686636, "grad_norm": 0.44573355289133143, "learning_rate": 1.9987165071710527e-06, "loss": 1.0410873889923096, "step": 568 }, { "epoch": 0.13110599078341015, "grad_norm": 0.389372329442145, "learning_rate": 1.9986971381760524e-06, "loss": 1.029583215713501, "step": 569 }, { "epoch": 0.1313364055299539, "grad_norm": 0.46351745512727555, "learning_rate": 1.9986776242214845e-06, "loss": 0.994928777217865, "step": 570 }, { "epoch": 0.1315668202764977, "grad_norm": 0.49139035828687805, "learning_rate": 1.9986579653101817e-06, "loss": 1.001985788345337, "step": 571 }, { "epoch": 0.13179723502304147, "grad_norm": 0.45831221525956994, "learning_rate": 1.998638161444997e-06, "loss": 0.9813050031661987, "step": 572 }, { "epoch": 0.13202764976958525, "grad_norm": 0.45157644768988, "learning_rate": 1.9986182126288053e-06, "loss": 0.8678451180458069, "step": 573 }, { "epoch": 0.13225806451612904, "grad_norm": 0.42446769343835317, "learning_rate": 1.998598118864502e-06, "loss": 1.0393729209899902, "step": 574 }, { "epoch": 0.13248847926267282, "grad_norm": 0.39102315770569207, "learning_rate": 1.998577880155004e-06, "loss": 0.9631935358047485, "step": 575 }, { "epoch": 0.1327188940092166, "grad_norm": 0.37655183711017187, "learning_rate": 1.9985574965032483e-06, "loss": 0.8480437994003296, "step": 576 }, { "epoch": 0.13294930875576036, "grad_norm": 0.432103661547375, "learning_rate": 1.998536967912194e-06, "loss": 1.0450071096420288, "step": 577 }, { "epoch": 0.13317972350230414, "grad_norm": 0.5144084645376303, "learning_rate": 1.9985162943848207e-06, "loss": 0.9374763369560242, "step": 578 }, { "epoch": 0.13341013824884793, "grad_norm": 0.45444537143479036, "learning_rate": 1.9984954759241293e-06, "loss": 0.9405182600021362, "step": 579 }, { "epoch": 0.1336405529953917, "grad_norm": 0.42824704739155545, "learning_rate": 1.998474512533141e-06, "loss": 0.8406375646591187, "step": 580 }, { "epoch": 0.1338709677419355, "grad_norm": 0.48847321743709643, "learning_rate": 1.9984534042148994e-06, "loss": 0.9323312044143677, "step": 581 }, { "epoch": 0.13410138248847928, "grad_norm": 0.43641992007971325, "learning_rate": 1.998432150972468e-06, "loss": 1.0704214572906494, "step": 582 }, { "epoch": 0.13433179723502303, "grad_norm": 0.38681502715760335, "learning_rate": 1.9984107528089315e-06, "loss": 0.8636025190353394, "step": 583 }, { "epoch": 0.13456221198156681, "grad_norm": 0.4361205380771583, "learning_rate": 1.998389209727396e-06, "loss": 0.9616303443908691, "step": 584 }, { "epoch": 0.1347926267281106, "grad_norm": 0.4406937724242653, "learning_rate": 1.998367521730988e-06, "loss": 1.0457193851470947, "step": 585 }, { "epoch": 0.13502304147465438, "grad_norm": 0.4060450620979949, "learning_rate": 1.9983456888228566e-06, "loss": 1.0460572242736816, "step": 586 }, { "epoch": 0.13525345622119817, "grad_norm": 0.3771944294411782, "learning_rate": 1.9983237110061695e-06, "loss": 0.9693883657455444, "step": 587 }, { "epoch": 0.13548387096774195, "grad_norm": 0.4355709352067949, "learning_rate": 1.9983015882841175e-06, "loss": 0.8823472857475281, "step": 588 }, { "epoch": 0.1357142857142857, "grad_norm": 0.5606637533068962, "learning_rate": 1.998279320659912e-06, "loss": 1.1602983474731445, "step": 589 }, { "epoch": 0.1359447004608295, "grad_norm": 0.40130101265364443, "learning_rate": 1.9982569081367843e-06, "loss": 0.8191353678703308, "step": 590 }, { "epoch": 0.13617511520737327, "grad_norm": 0.40863931644700857, "learning_rate": 1.9982343507179876e-06, "loss": 1.141557216644287, "step": 591 }, { "epoch": 0.13640552995391705, "grad_norm": 0.4712969186607289, "learning_rate": 1.998211648406797e-06, "loss": 0.9688570499420166, "step": 592 }, { "epoch": 0.13663594470046084, "grad_norm": 0.4543844570436241, "learning_rate": 1.9981888012065068e-06, "loss": 1.0218561887741089, "step": 593 }, { "epoch": 0.13686635944700462, "grad_norm": 0.5219271265911207, "learning_rate": 1.9981658091204334e-06, "loss": 0.9531952142715454, "step": 594 }, { "epoch": 0.13709677419354838, "grad_norm": 0.5314208269690397, "learning_rate": 1.9981426721519143e-06, "loss": 1.1421492099761963, "step": 595 }, { "epoch": 0.13732718894009216, "grad_norm": 0.3970137466851754, "learning_rate": 1.9981193903043074e-06, "loss": 0.8173041343688965, "step": 596 }, { "epoch": 0.13755760368663594, "grad_norm": 0.43200053855682263, "learning_rate": 1.998095963580993e-06, "loss": 0.8842465877532959, "step": 597 }, { "epoch": 0.13778801843317973, "grad_norm": 0.6492506358781442, "learning_rate": 1.9980723919853703e-06, "loss": 0.8547788858413696, "step": 598 }, { "epoch": 0.1380184331797235, "grad_norm": 0.5287255050220753, "learning_rate": 1.998048675520861e-06, "loss": 1.0085712671279907, "step": 599 }, { "epoch": 0.1382488479262673, "grad_norm": 0.5226769291219134, "learning_rate": 1.9980248141909083e-06, "loss": 0.9276378750801086, "step": 600 }, { "epoch": 0.13847926267281105, "grad_norm": 0.44292446989501455, "learning_rate": 1.998000807998975e-06, "loss": 0.9236693382263184, "step": 601 }, { "epoch": 0.13870967741935483, "grad_norm": 0.43514287150953085, "learning_rate": 1.9979766569485454e-06, "loss": 1.0353924036026, "step": 602 }, { "epoch": 0.13894009216589862, "grad_norm": 0.3831256791535214, "learning_rate": 1.9979523610431246e-06, "loss": 0.8456567525863647, "step": 603 }, { "epoch": 0.1391705069124424, "grad_norm": 0.46736174894260846, "learning_rate": 1.997927920286241e-06, "loss": 0.997468888759613, "step": 604 }, { "epoch": 0.13940092165898618, "grad_norm": 0.38558572890255066, "learning_rate": 1.9979033346814397e-06, "loss": 0.8962260484695435, "step": 605 }, { "epoch": 0.13963133640552997, "grad_norm": 0.4829548009529998, "learning_rate": 1.997878604232291e-06, "loss": 0.8586266040802002, "step": 606 }, { "epoch": 0.13986175115207372, "grad_norm": 0.4776734917637134, "learning_rate": 1.9978537289423837e-06, "loss": 0.9639670848846436, "step": 607 }, { "epoch": 0.1400921658986175, "grad_norm": 0.4115822234384495, "learning_rate": 1.9978287088153286e-06, "loss": 1.005727767944336, "step": 608 }, { "epoch": 0.1403225806451613, "grad_norm": 0.44858527541471366, "learning_rate": 1.9978035438547575e-06, "loss": 1.148871898651123, "step": 609 }, { "epoch": 0.14055299539170507, "grad_norm": 0.4357664217922314, "learning_rate": 1.9977782340643226e-06, "loss": 1.0459539890289307, "step": 610 }, { "epoch": 0.14078341013824885, "grad_norm": 0.43229915305128663, "learning_rate": 1.9977527794476985e-06, "loss": 0.92689448595047, "step": 611 }, { "epoch": 0.14101382488479264, "grad_norm": 0.5514025110097415, "learning_rate": 1.997727180008579e-06, "loss": 0.9735790491104126, "step": 612 }, { "epoch": 0.1412442396313364, "grad_norm": 0.5114055458545007, "learning_rate": 1.99770143575068e-06, "loss": 0.8882870674133301, "step": 613 }, { "epoch": 0.14147465437788018, "grad_norm": 0.47604166837238787, "learning_rate": 1.9976755466777386e-06, "loss": 0.9229795932769775, "step": 614 }, { "epoch": 0.14170506912442396, "grad_norm": 0.39391923738635765, "learning_rate": 1.997649512793512e-06, "loss": 0.9097769260406494, "step": 615 }, { "epoch": 0.14193548387096774, "grad_norm": 0.429877903042447, "learning_rate": 1.9976233341017798e-06, "loss": 0.7751711010932922, "step": 616 }, { "epoch": 0.14216589861751153, "grad_norm": 0.4585028421290768, "learning_rate": 1.9975970106063414e-06, "loss": 0.9071080684661865, "step": 617 }, { "epoch": 0.1423963133640553, "grad_norm": 0.372835081071011, "learning_rate": 1.997570542311017e-06, "loss": 0.8444115519523621, "step": 618 }, { "epoch": 0.14262672811059907, "grad_norm": 0.46125503087530084, "learning_rate": 1.9975439292196496e-06, "loss": 0.9159516096115112, "step": 619 }, { "epoch": 0.14285714285714285, "grad_norm": 0.45879242474243875, "learning_rate": 1.997517171336101e-06, "loss": 0.9697242975234985, "step": 620 }, { "epoch": 0.14308755760368663, "grad_norm": 0.4605305033840643, "learning_rate": 1.9974902686642557e-06, "loss": 0.9894170761108398, "step": 621 }, { "epoch": 0.14331797235023042, "grad_norm": 0.48517122287493847, "learning_rate": 1.9974632212080184e-06, "loss": 1.0364127159118652, "step": 622 }, { "epoch": 0.1435483870967742, "grad_norm": 0.39169164845291754, "learning_rate": 1.997436028971315e-06, "loss": 0.8980219960212708, "step": 623 }, { "epoch": 0.14377880184331798, "grad_norm": 0.4857070397144096, "learning_rate": 1.9974086919580925e-06, "loss": 1.0293703079223633, "step": 624 }, { "epoch": 0.14400921658986174, "grad_norm": 0.46693238253454916, "learning_rate": 1.9973812101723186e-06, "loss": 1.006148099899292, "step": 625 }, { "epoch": 0.14423963133640552, "grad_norm": 0.5525790188158035, "learning_rate": 1.9973535836179825e-06, "loss": 0.9489799737930298, "step": 626 }, { "epoch": 0.1444700460829493, "grad_norm": 0.3704152285915921, "learning_rate": 1.997325812299094e-06, "loss": 0.7601498961448669, "step": 627 }, { "epoch": 0.1447004608294931, "grad_norm": 0.6225002321802279, "learning_rate": 1.9972978962196843e-06, "loss": 0.8345643281936646, "step": 628 }, { "epoch": 0.14493087557603687, "grad_norm": 0.48694459235316484, "learning_rate": 1.9972698353838053e-06, "loss": 0.8705894947052002, "step": 629 }, { "epoch": 0.14516129032258066, "grad_norm": 0.42033173985472694, "learning_rate": 1.9972416297955294e-06, "loss": 0.9515185356140137, "step": 630 }, { "epoch": 0.1453917050691244, "grad_norm": 0.38157222553103914, "learning_rate": 1.9972132794589514e-06, "loss": 0.7616517543792725, "step": 631 }, { "epoch": 0.1456221198156682, "grad_norm": 0.47593248323635307, "learning_rate": 1.9971847843781862e-06, "loss": 0.8870444297790527, "step": 632 }, { "epoch": 0.14585253456221198, "grad_norm": 0.45987330163099194, "learning_rate": 1.9971561445573696e-06, "loss": 0.8709393739700317, "step": 633 }, { "epoch": 0.14608294930875576, "grad_norm": 0.35616612587319196, "learning_rate": 1.997127360000658e-06, "loss": 0.865444540977478, "step": 634 }, { "epoch": 0.14631336405529954, "grad_norm": 0.4431578416665891, "learning_rate": 1.997098430712231e-06, "loss": 0.9560728073120117, "step": 635 }, { "epoch": 0.14654377880184333, "grad_norm": 0.4107966614124612, "learning_rate": 1.9970693566962866e-06, "loss": 0.7579058408737183, "step": 636 }, { "epoch": 0.14677419354838708, "grad_norm": 0.4609569973718347, "learning_rate": 1.997040137957045e-06, "loss": 1.0709021091461182, "step": 637 }, { "epoch": 0.14700460829493087, "grad_norm": 0.5029329480654331, "learning_rate": 1.9970107744987474e-06, "loss": 0.9911563396453857, "step": 638 }, { "epoch": 0.14723502304147465, "grad_norm": 0.45338591583748106, "learning_rate": 1.996981266325655e-06, "loss": 0.9673472046852112, "step": 639 }, { "epoch": 0.14746543778801843, "grad_norm": 0.3918341582647882, "learning_rate": 1.9969516134420523e-06, "loss": 0.7728441953659058, "step": 640 }, { "epoch": 0.14769585253456222, "grad_norm": 0.532382418423259, "learning_rate": 1.9969218158522426e-06, "loss": 1.0198101997375488, "step": 641 }, { "epoch": 0.147926267281106, "grad_norm": 0.45259693038053805, "learning_rate": 1.996891873560551e-06, "loss": 0.9710760116577148, "step": 642 }, { "epoch": 0.14815668202764978, "grad_norm": 0.41281494255735757, "learning_rate": 1.9968617865713237e-06, "loss": 0.9956847429275513, "step": 643 }, { "epoch": 0.14838709677419354, "grad_norm": 0.40081796016292187, "learning_rate": 1.996831554888928e-06, "loss": 1.0974771976470947, "step": 644 }, { "epoch": 0.14861751152073732, "grad_norm": 0.5353172634899142, "learning_rate": 1.9968011785177513e-06, "loss": 0.914455771446228, "step": 645 }, { "epoch": 0.1488479262672811, "grad_norm": 0.5511418094652546, "learning_rate": 1.9967706574622033e-06, "loss": 1.1308314800262451, "step": 646 }, { "epoch": 0.1490783410138249, "grad_norm": 0.5114786055194052, "learning_rate": 1.9967399917267142e-06, "loss": 0.981814444065094, "step": 647 }, { "epoch": 0.14930875576036867, "grad_norm": 0.431645238473459, "learning_rate": 1.9967091813157345e-06, "loss": 0.874076247215271, "step": 648 }, { "epoch": 0.14953917050691246, "grad_norm": 0.39621973386547166, "learning_rate": 1.9966782262337365e-06, "loss": 0.8496171832084656, "step": 649 }, { "epoch": 0.1497695852534562, "grad_norm": 0.49468581823361646, "learning_rate": 1.9966471264852136e-06, "loss": 0.9395674467086792, "step": 650 }, { "epoch": 0.15, "grad_norm": 0.4120224768195847, "learning_rate": 1.99661588207468e-06, "loss": 0.8363018035888672, "step": 651 }, { "epoch": 0.15023041474654378, "grad_norm": 0.4552124844336583, "learning_rate": 1.9965844930066696e-06, "loss": 1.0035831928253174, "step": 652 }, { "epoch": 0.15046082949308756, "grad_norm": 0.3910663219458386, "learning_rate": 1.99655295928574e-06, "loss": 1.0316795110702515, "step": 653 }, { "epoch": 0.15069124423963134, "grad_norm": 0.4287067909796643, "learning_rate": 1.9965212809164676e-06, "loss": 0.9545150995254517, "step": 654 }, { "epoch": 0.15092165898617513, "grad_norm": 0.4577708396372056, "learning_rate": 1.99648945790345e-06, "loss": 0.993801474571228, "step": 655 }, { "epoch": 0.15115207373271888, "grad_norm": 0.4032410507172632, "learning_rate": 1.9964574902513075e-06, "loss": 0.8666588664054871, "step": 656 }, { "epoch": 0.15138248847926267, "grad_norm": 0.48179220104835324, "learning_rate": 1.9964253779646787e-06, "loss": 0.9507651925086975, "step": 657 }, { "epoch": 0.15161290322580645, "grad_norm": 0.4899406622119438, "learning_rate": 1.996393121048226e-06, "loss": 0.8700851202011108, "step": 658 }, { "epoch": 0.15184331797235023, "grad_norm": 0.40256613113119405, "learning_rate": 1.9963607195066307e-06, "loss": 0.9966975450515747, "step": 659 }, { "epoch": 0.15207373271889402, "grad_norm": 0.44964674216674483, "learning_rate": 1.9963281733445957e-06, "loss": 0.9552028179168701, "step": 660 }, { "epoch": 0.1523041474654378, "grad_norm": 0.47921018457871023, "learning_rate": 1.9962954825668456e-06, "loss": 1.0182740688323975, "step": 661 }, { "epoch": 0.15253456221198156, "grad_norm": 0.5096203598929419, "learning_rate": 1.996262647178125e-06, "loss": 1.0001778602600098, "step": 662 }, { "epoch": 0.15276497695852534, "grad_norm": 0.44730944505165277, "learning_rate": 1.9962296671832e-06, "loss": 0.9902865886688232, "step": 663 }, { "epoch": 0.15299539170506912, "grad_norm": 0.44977913840647327, "learning_rate": 1.9961965425868575e-06, "loss": 0.9272845983505249, "step": 664 }, { "epoch": 0.1532258064516129, "grad_norm": 0.5011405916103636, "learning_rate": 1.996163273393906e-06, "loss": 0.9705777168273926, "step": 665 }, { "epoch": 0.1534562211981567, "grad_norm": 0.5035568947424544, "learning_rate": 1.9961298596091736e-06, "loss": 0.9472209215164185, "step": 666 }, { "epoch": 0.15368663594470047, "grad_norm": 0.3982786140702462, "learning_rate": 1.9960963012375113e-06, "loss": 0.9734043478965759, "step": 667 }, { "epoch": 0.15391705069124423, "grad_norm": 0.516464356110248, "learning_rate": 1.9960625982837894e-06, "loss": 0.8765468001365662, "step": 668 }, { "epoch": 0.154147465437788, "grad_norm": 0.6158206412964224, "learning_rate": 1.9960287507529e-06, "loss": 1.0224063396453857, "step": 669 }, { "epoch": 0.1543778801843318, "grad_norm": 0.4417623345727513, "learning_rate": 1.995994758649756e-06, "loss": 0.983299970626831, "step": 670 }, { "epoch": 0.15460829493087558, "grad_norm": 0.4680475004359934, "learning_rate": 1.9959606219792914e-06, "loss": 1.0524147748947144, "step": 671 }, { "epoch": 0.15483870967741936, "grad_norm": 0.45758073401288785, "learning_rate": 1.9959263407464606e-06, "loss": 1.1012977361679077, "step": 672 }, { "epoch": 0.15506912442396314, "grad_norm": 0.6298296463565526, "learning_rate": 1.99589191495624e-06, "loss": 0.8494049310684204, "step": 673 }, { "epoch": 0.1552995391705069, "grad_norm": 0.4795860182347848, "learning_rate": 1.9958573446136263e-06, "loss": 0.8677045106887817, "step": 674 }, { "epoch": 0.15552995391705068, "grad_norm": 0.5514260857685808, "learning_rate": 1.995822629723638e-06, "loss": 1.1034941673278809, "step": 675 }, { "epoch": 0.15576036866359447, "grad_norm": 0.3959041890885462, "learning_rate": 1.9957877702913128e-06, "loss": 0.8428820371627808, "step": 676 }, { "epoch": 0.15599078341013825, "grad_norm": 0.5435721054179383, "learning_rate": 1.9957527663217107e-06, "loss": 0.8584408760070801, "step": 677 }, { "epoch": 0.15622119815668203, "grad_norm": 0.47044010204436115, "learning_rate": 1.995717617819913e-06, "loss": 0.8089514970779419, "step": 678 }, { "epoch": 0.15645161290322582, "grad_norm": 0.48360065475233177, "learning_rate": 1.9956823247910217e-06, "loss": 0.8459775447845459, "step": 679 }, { "epoch": 0.15668202764976957, "grad_norm": 0.441023419118176, "learning_rate": 1.9956468872401583e-06, "loss": 1.0583066940307617, "step": 680 }, { "epoch": 0.15691244239631336, "grad_norm": 0.4427871322496545, "learning_rate": 1.995611305172468e-06, "loss": 0.9396135807037354, "step": 681 }, { "epoch": 0.15714285714285714, "grad_norm": 0.4888169944824013, "learning_rate": 1.995575578593114e-06, "loss": 1.0143593549728394, "step": 682 }, { "epoch": 0.15737327188940092, "grad_norm": 0.44801312951365924, "learning_rate": 1.9955397075072833e-06, "loss": 0.8822500109672546, "step": 683 }, { "epoch": 0.1576036866359447, "grad_norm": 0.4936771776275296, "learning_rate": 1.995503691920182e-06, "loss": 0.8841962218284607, "step": 684 }, { "epoch": 0.1578341013824885, "grad_norm": 0.4240877666200064, "learning_rate": 1.9954675318370374e-06, "loss": 0.8537080883979797, "step": 685 }, { "epoch": 0.15806451612903225, "grad_norm": 0.5056113314098377, "learning_rate": 1.9954312272630985e-06, "loss": 1.0292394161224365, "step": 686 }, { "epoch": 0.15829493087557603, "grad_norm": 0.5106923922410934, "learning_rate": 1.995394778203635e-06, "loss": 0.8741706013679504, "step": 687 }, { "epoch": 0.1585253456221198, "grad_norm": 0.47911475912836377, "learning_rate": 1.995358184663937e-06, "loss": 0.9429572820663452, "step": 688 }, { "epoch": 0.1587557603686636, "grad_norm": 0.5562334593954328, "learning_rate": 1.995321446649316e-06, "loss": 0.9522494077682495, "step": 689 }, { "epoch": 0.15898617511520738, "grad_norm": 0.5394048021515351, "learning_rate": 1.9952845641651046e-06, "loss": 0.9743782877922058, "step": 690 }, { "epoch": 0.15921658986175116, "grad_norm": 0.4663620909245047, "learning_rate": 1.995247537216657e-06, "loss": 0.926364541053772, "step": 691 }, { "epoch": 0.15944700460829492, "grad_norm": 0.4595450639525932, "learning_rate": 1.995210365809346e-06, "loss": 0.8355565071105957, "step": 692 }, { "epoch": 0.1596774193548387, "grad_norm": 0.44548737988500176, "learning_rate": 1.9951730499485684e-06, "loss": 0.9200692772865295, "step": 693 }, { "epoch": 0.15990783410138248, "grad_norm": 0.36513232613054547, "learning_rate": 1.99513558963974e-06, "loss": 0.7571361064910889, "step": 694 }, { "epoch": 0.16013824884792627, "grad_norm": 0.48187866859107054, "learning_rate": 1.995097984888298e-06, "loss": 0.935307502746582, "step": 695 }, { "epoch": 0.16036866359447005, "grad_norm": 0.5833897193983939, "learning_rate": 1.995060235699701e-06, "loss": 1.1118557453155518, "step": 696 }, { "epoch": 0.16059907834101383, "grad_norm": 0.3866866326578979, "learning_rate": 1.995022342079428e-06, "loss": 0.8024749755859375, "step": 697 }, { "epoch": 0.1608294930875576, "grad_norm": 0.44217187311148026, "learning_rate": 1.994984304032979e-06, "loss": 0.9018943309783936, "step": 698 }, { "epoch": 0.16105990783410137, "grad_norm": 0.4729402911259197, "learning_rate": 1.9949461215658757e-06, "loss": 0.8571128249168396, "step": 699 }, { "epoch": 0.16129032258064516, "grad_norm": 0.4822593475964477, "learning_rate": 1.99490779468366e-06, "loss": 0.9707971215248108, "step": 700 }, { "epoch": 0.16152073732718894, "grad_norm": 0.4341551988253619, "learning_rate": 1.994869323391895e-06, "loss": 0.8157618045806885, "step": 701 }, { "epoch": 0.16175115207373272, "grad_norm": 0.4620050649733586, "learning_rate": 1.994830707696165e-06, "loss": 0.9009906053543091, "step": 702 }, { "epoch": 0.1619815668202765, "grad_norm": 0.5270647594020066, "learning_rate": 1.9947919476020745e-06, "loss": 1.0093860626220703, "step": 703 }, { "epoch": 0.1622119815668203, "grad_norm": 0.4233068308539462, "learning_rate": 1.9947530431152494e-06, "loss": 1.018160343170166, "step": 704 }, { "epoch": 0.16244239631336405, "grad_norm": 0.5753809013533212, "learning_rate": 1.9947139942413378e-06, "loss": 0.9755370616912842, "step": 705 }, { "epoch": 0.16267281105990783, "grad_norm": 0.490686071812002, "learning_rate": 1.994674800986006e-06, "loss": 0.9406822919845581, "step": 706 }, { "epoch": 0.1629032258064516, "grad_norm": 0.4856505350445516, "learning_rate": 1.994635463354944e-06, "loss": 0.9128296971321106, "step": 707 }, { "epoch": 0.1631336405529954, "grad_norm": 0.42889971607025285, "learning_rate": 1.994595981353861e-06, "loss": 0.929735541343689, "step": 708 }, { "epoch": 0.16336405529953918, "grad_norm": 0.5176054911036664, "learning_rate": 1.994556354988488e-06, "loss": 0.9021023511886597, "step": 709 }, { "epoch": 0.16359447004608296, "grad_norm": 0.46567553841056064, "learning_rate": 1.994516584264577e-06, "loss": 0.9187623262405396, "step": 710 }, { "epoch": 0.16382488479262672, "grad_norm": 0.4564071002670219, "learning_rate": 1.9944766691879e-06, "loss": 0.8283985257148743, "step": 711 }, { "epoch": 0.1640552995391705, "grad_norm": 0.5448909609220928, "learning_rate": 1.994436609764251e-06, "loss": 1.0592901706695557, "step": 712 }, { "epoch": 0.16428571428571428, "grad_norm": 0.5512946720093808, "learning_rate": 1.9943964059994446e-06, "loss": 0.98726487159729, "step": 713 }, { "epoch": 0.16451612903225807, "grad_norm": 0.5060774432164115, "learning_rate": 1.9943560578993165e-06, "loss": 0.8761749267578125, "step": 714 }, { "epoch": 0.16474654377880185, "grad_norm": 0.4759569802502017, "learning_rate": 1.9943155654697227e-06, "loss": 0.878170371055603, "step": 715 }, { "epoch": 0.16497695852534563, "grad_norm": 0.5212205127966931, "learning_rate": 1.9942749287165414e-06, "loss": 0.9444767236709595, "step": 716 }, { "epoch": 0.1652073732718894, "grad_norm": 0.436107073640643, "learning_rate": 1.9942341476456697e-06, "loss": 0.8270057439804077, "step": 717 }, { "epoch": 0.16543778801843317, "grad_norm": 0.36828111446023454, "learning_rate": 1.9941932222630284e-06, "loss": 0.825955867767334, "step": 718 }, { "epoch": 0.16566820276497696, "grad_norm": 0.4748059596727922, "learning_rate": 1.9941521525745564e-06, "loss": 0.9384286403656006, "step": 719 }, { "epoch": 0.16589861751152074, "grad_norm": 0.5968010950850139, "learning_rate": 1.994110938586216e-06, "loss": 0.9627010226249695, "step": 720 }, { "epoch": 0.16612903225806452, "grad_norm": 0.40665371786149496, "learning_rate": 1.9940695803039886e-06, "loss": 0.8436836004257202, "step": 721 }, { "epoch": 0.1663594470046083, "grad_norm": 0.48219849106464674, "learning_rate": 1.994028077733878e-06, "loss": 1.0689928531646729, "step": 722 }, { "epoch": 0.16658986175115206, "grad_norm": 0.4600242469407339, "learning_rate": 1.993986430881907e-06, "loss": 0.911309003829956, "step": 723 }, { "epoch": 0.16682027649769585, "grad_norm": 0.5404195969690949, "learning_rate": 1.993944639754122e-06, "loss": 0.9897152185440063, "step": 724 }, { "epoch": 0.16705069124423963, "grad_norm": 0.48212503869308937, "learning_rate": 1.9939027043565883e-06, "loss": 1.0230367183685303, "step": 725 }, { "epoch": 0.1672811059907834, "grad_norm": 0.4398728967426152, "learning_rate": 1.993860624695393e-06, "loss": 0.8067069053649902, "step": 726 }, { "epoch": 0.1675115207373272, "grad_norm": 0.5835576425821721, "learning_rate": 1.9938184007766434e-06, "loss": 0.9784343242645264, "step": 727 }, { "epoch": 0.16774193548387098, "grad_norm": 0.5139557651921927, "learning_rate": 1.9937760326064686e-06, "loss": 0.8617877960205078, "step": 728 }, { "epoch": 0.16797235023041474, "grad_norm": 0.5276605551773887, "learning_rate": 1.9937335201910183e-06, "loss": 1.0390141010284424, "step": 729 }, { "epoch": 0.16820276497695852, "grad_norm": 0.5007165894606777, "learning_rate": 1.9936908635364633e-06, "loss": 1.0478965044021606, "step": 730 }, { "epoch": 0.1684331797235023, "grad_norm": 0.46789644745982956, "learning_rate": 1.9936480626489944e-06, "loss": 0.8396252393722534, "step": 731 }, { "epoch": 0.16866359447004609, "grad_norm": 0.4366381763655398, "learning_rate": 1.9936051175348256e-06, "loss": 0.8690099120140076, "step": 732 }, { "epoch": 0.16889400921658987, "grad_norm": 0.44373038767323764, "learning_rate": 1.993562028200189e-06, "loss": 0.944722056388855, "step": 733 }, { "epoch": 0.16912442396313365, "grad_norm": 0.4480067961897654, "learning_rate": 1.9935187946513385e-06, "loss": 0.7134733200073242, "step": 734 }, { "epoch": 0.1693548387096774, "grad_norm": 0.44081731431481436, "learning_rate": 1.993475416894551e-06, "loss": 0.8102486729621887, "step": 735 }, { "epoch": 0.1695852534562212, "grad_norm": 0.5621249368486638, "learning_rate": 1.9934318949361215e-06, "loss": 0.924787163734436, "step": 736 }, { "epoch": 0.16981566820276497, "grad_norm": 0.4621168425652111, "learning_rate": 1.993388228782368e-06, "loss": 0.9595087766647339, "step": 737 }, { "epoch": 0.17004608294930876, "grad_norm": 0.4164356485660062, "learning_rate": 1.993344418439628e-06, "loss": 0.9949792623519897, "step": 738 }, { "epoch": 0.17027649769585254, "grad_norm": 0.6359964400004778, "learning_rate": 1.9933004639142604e-06, "loss": 1.0905860662460327, "step": 739 }, { "epoch": 0.17050691244239632, "grad_norm": 0.39800173884382345, "learning_rate": 1.9932563652126455e-06, "loss": 0.9638324975967407, "step": 740 }, { "epoch": 0.17073732718894008, "grad_norm": 0.4909114039853375, "learning_rate": 1.9932121223411844e-06, "loss": 0.9434946179389954, "step": 741 }, { "epoch": 0.17096774193548386, "grad_norm": 0.49072837958490606, "learning_rate": 1.9931677353062983e-06, "loss": 0.9050095081329346, "step": 742 }, { "epoch": 0.17119815668202765, "grad_norm": 0.509303736181324, "learning_rate": 1.9931232041144303e-06, "loss": 1.0698316097259521, "step": 743 }, { "epoch": 0.17142857142857143, "grad_norm": 0.393391743712663, "learning_rate": 1.993078528772044e-06, "loss": 0.7938296794891357, "step": 744 }, { "epoch": 0.1716589861751152, "grad_norm": 0.46597408496400117, "learning_rate": 1.993033709285624e-06, "loss": 0.8485043048858643, "step": 745 }, { "epoch": 0.171889400921659, "grad_norm": 0.4736797887475262, "learning_rate": 1.9929887456616754e-06, "loss": 0.8605694770812988, "step": 746 }, { "epoch": 0.17211981566820275, "grad_norm": 0.40523028160004354, "learning_rate": 1.9929436379067253e-06, "loss": 0.7101563215255737, "step": 747 }, { "epoch": 0.17235023041474654, "grad_norm": 0.4519555914654837, "learning_rate": 1.9928983860273205e-06, "loss": 1.093912959098816, "step": 748 }, { "epoch": 0.17258064516129032, "grad_norm": 0.4930830686705908, "learning_rate": 1.9928529900300294e-06, "loss": 0.8099753856658936, "step": 749 }, { "epoch": 0.1728110599078341, "grad_norm": 0.3752662958180716, "learning_rate": 1.992807449921441e-06, "loss": 0.7816359400749207, "step": 750 }, { "epoch": 0.17304147465437789, "grad_norm": 0.5180432792159949, "learning_rate": 1.9927617657081656e-06, "loss": 0.8887455463409424, "step": 751 }, { "epoch": 0.17327188940092167, "grad_norm": 0.6260862232080928, "learning_rate": 1.992715937396834e-06, "loss": 1.0926017761230469, "step": 752 }, { "epoch": 0.17350230414746542, "grad_norm": 0.5546410088380269, "learning_rate": 1.9926699649940985e-06, "loss": 0.7657707929611206, "step": 753 }, { "epoch": 0.1737327188940092, "grad_norm": 0.5766197712214459, "learning_rate": 1.992623848506632e-06, "loss": 0.9350340366363525, "step": 754 }, { "epoch": 0.173963133640553, "grad_norm": 0.5011774306610247, "learning_rate": 1.9925775879411276e-06, "loss": 0.883575439453125, "step": 755 }, { "epoch": 0.17419354838709677, "grad_norm": 0.3678933943457833, "learning_rate": 1.9925311833043e-06, "loss": 0.814304769039154, "step": 756 }, { "epoch": 0.17442396313364056, "grad_norm": 0.5857143887476359, "learning_rate": 1.992484634602886e-06, "loss": 0.9263690710067749, "step": 757 }, { "epoch": 0.17465437788018434, "grad_norm": 0.49862680540203774, "learning_rate": 1.9924379418436402e-06, "loss": 1.0321627855300903, "step": 758 }, { "epoch": 0.1748847926267281, "grad_norm": 0.5062063825952041, "learning_rate": 1.9923911050333413e-06, "loss": 0.969459056854248, "step": 759 }, { "epoch": 0.17511520737327188, "grad_norm": 0.4554436665394103, "learning_rate": 1.9923441241787874e-06, "loss": 0.9926396012306213, "step": 760 }, { "epoch": 0.17534562211981566, "grad_norm": 0.43315077691547155, "learning_rate": 1.9922969992867975e-06, "loss": 0.776180624961853, "step": 761 }, { "epoch": 0.17557603686635945, "grad_norm": 0.5350913373105377, "learning_rate": 1.992249730364212e-06, "loss": 0.9413800239562988, "step": 762 }, { "epoch": 0.17580645161290323, "grad_norm": 0.48045178893419493, "learning_rate": 1.9922023174178913e-06, "loss": 0.8365576267242432, "step": 763 }, { "epoch": 0.17603686635944701, "grad_norm": 0.47752363664412967, "learning_rate": 1.992154760454718e-06, "loss": 1.023102879524231, "step": 764 }, { "epoch": 0.17626728110599077, "grad_norm": 0.6035875388891613, "learning_rate": 1.9921070594815944e-06, "loss": 1.079930067062378, "step": 765 }, { "epoch": 0.17649769585253455, "grad_norm": 0.44885698296531085, "learning_rate": 1.9920592145054445e-06, "loss": 0.8974392414093018, "step": 766 }, { "epoch": 0.17672811059907834, "grad_norm": 0.5363940338283703, "learning_rate": 1.9920112255332133e-06, "loss": 0.9509298205375671, "step": 767 }, { "epoch": 0.17695852534562212, "grad_norm": 0.3960858930926947, "learning_rate": 1.991963092571866e-06, "loss": 0.938835620880127, "step": 768 }, { "epoch": 0.1771889400921659, "grad_norm": 0.3409332869225393, "learning_rate": 1.9919148156283888e-06, "loss": 0.7918044328689575, "step": 769 }, { "epoch": 0.1774193548387097, "grad_norm": 0.46985590284048473, "learning_rate": 1.9918663947097893e-06, "loss": 0.8235958814620972, "step": 770 }, { "epoch": 0.17764976958525347, "grad_norm": 0.4734643903674827, "learning_rate": 1.9918178298230953e-06, "loss": 0.9079158902168274, "step": 771 }, { "epoch": 0.17788018433179723, "grad_norm": 0.5764167010482935, "learning_rate": 1.9917691209753563e-06, "loss": 0.8548607230186462, "step": 772 }, { "epoch": 0.178110599078341, "grad_norm": 0.47446352682333093, "learning_rate": 1.9917202681736428e-06, "loss": 0.8327757120132446, "step": 773 }, { "epoch": 0.1783410138248848, "grad_norm": 0.5415533792438672, "learning_rate": 1.991671271425045e-06, "loss": 1.0511503219604492, "step": 774 }, { "epoch": 0.17857142857142858, "grad_norm": 0.4310425860855909, "learning_rate": 1.991622130736675e-06, "loss": 0.9168857932090759, "step": 775 }, { "epoch": 0.17880184331797236, "grad_norm": 0.44391822434593214, "learning_rate": 1.9915728461156654e-06, "loss": 0.8740782737731934, "step": 776 }, { "epoch": 0.17903225806451614, "grad_norm": 0.5841506637592749, "learning_rate": 1.99152341756917e-06, "loss": 0.9706588983535767, "step": 777 }, { "epoch": 0.1792626728110599, "grad_norm": 0.5492923015057676, "learning_rate": 1.9914738451043627e-06, "loss": 1.144281268119812, "step": 778 }, { "epoch": 0.17949308755760368, "grad_norm": 0.4170516305027483, "learning_rate": 1.9914241287284403e-06, "loss": 0.973777174949646, "step": 779 }, { "epoch": 0.17972350230414746, "grad_norm": 0.4502683719091688, "learning_rate": 1.991374268448617e-06, "loss": 0.9002145528793335, "step": 780 }, { "epoch": 0.17995391705069125, "grad_norm": 0.5526460425242373, "learning_rate": 1.9913242642721316e-06, "loss": 0.9234670400619507, "step": 781 }, { "epoch": 0.18018433179723503, "grad_norm": 0.4959743401985291, "learning_rate": 1.9912741162062415e-06, "loss": 0.9552402496337891, "step": 782 }, { "epoch": 0.18041474654377881, "grad_norm": 0.5510111451188886, "learning_rate": 1.9912238242582257e-06, "loss": 1.0485708713531494, "step": 783 }, { "epoch": 0.18064516129032257, "grad_norm": 0.5447745918227888, "learning_rate": 1.991173388435384e-06, "loss": 0.9852809906005859, "step": 784 }, { "epoch": 0.18087557603686635, "grad_norm": 0.4726322734582533, "learning_rate": 1.991122808745037e-06, "loss": 0.7824808359146118, "step": 785 }, { "epoch": 0.18110599078341014, "grad_norm": 0.6534462420793078, "learning_rate": 1.9910720851945268e-06, "loss": 1.0380492210388184, "step": 786 }, { "epoch": 0.18133640552995392, "grad_norm": 0.48532232647089923, "learning_rate": 1.991021217791215e-06, "loss": 0.9808282256126404, "step": 787 }, { "epoch": 0.1815668202764977, "grad_norm": 0.4791928008108061, "learning_rate": 1.9909702065424854e-06, "loss": 0.8636116981506348, "step": 788 }, { "epoch": 0.1817972350230415, "grad_norm": 0.45783287516468024, "learning_rate": 1.9909190514557427e-06, "loss": 0.8179407715797424, "step": 789 }, { "epoch": 0.18202764976958524, "grad_norm": 0.4760021295113364, "learning_rate": 1.990867752538411e-06, "loss": 0.9424594044685364, "step": 790 }, { "epoch": 0.18225806451612903, "grad_norm": 0.5558557995369799, "learning_rate": 1.9908163097979366e-06, "loss": 0.9429298043251038, "step": 791 }, { "epoch": 0.1824884792626728, "grad_norm": 0.5944732273868478, "learning_rate": 1.990764723241787e-06, "loss": 0.9671716690063477, "step": 792 }, { "epoch": 0.1827188940092166, "grad_norm": 0.6041148299127167, "learning_rate": 1.9907129928774494e-06, "loss": 1.0063345432281494, "step": 793 }, { "epoch": 0.18294930875576038, "grad_norm": 0.4817475331580677, "learning_rate": 1.990661118712432e-06, "loss": 0.9932061433792114, "step": 794 }, { "epoch": 0.18317972350230416, "grad_norm": 0.4648544131499562, "learning_rate": 1.990609100754265e-06, "loss": 0.859153151512146, "step": 795 }, { "epoch": 0.18341013824884791, "grad_norm": 0.4738825500961963, "learning_rate": 1.9905569390104984e-06, "loss": 0.9328111410140991, "step": 796 }, { "epoch": 0.1836405529953917, "grad_norm": 0.542624486663781, "learning_rate": 1.9905046334887033e-06, "loss": 0.9970628619194031, "step": 797 }, { "epoch": 0.18387096774193548, "grad_norm": 0.41971271798029636, "learning_rate": 1.990452184196472e-06, "loss": 1.0347282886505127, "step": 798 }, { "epoch": 0.18410138248847926, "grad_norm": 0.4270967132251902, "learning_rate": 1.990399591141417e-06, "loss": 0.9167106747627258, "step": 799 }, { "epoch": 0.18433179723502305, "grad_norm": 0.5046236893106074, "learning_rate": 1.990346854331173e-06, "loss": 0.8895610570907593, "step": 800 }, { "epoch": 0.18456221198156683, "grad_norm": 0.5237845429219861, "learning_rate": 1.990293973773394e-06, "loss": 0.8525041341781616, "step": 801 }, { "epoch": 0.1847926267281106, "grad_norm": 0.4894836264572075, "learning_rate": 1.9902409494757553e-06, "loss": 0.8184069395065308, "step": 802 }, { "epoch": 0.18502304147465437, "grad_norm": 0.430895578738413, "learning_rate": 1.9901877814459544e-06, "loss": 0.8342509269714355, "step": 803 }, { "epoch": 0.18525345622119815, "grad_norm": 0.49779999067704434, "learning_rate": 1.9901344696917072e-06, "loss": 0.9254395365715027, "step": 804 }, { "epoch": 0.18548387096774194, "grad_norm": 0.5124892914660328, "learning_rate": 1.990081014220753e-06, "loss": 0.9537396430969238, "step": 805 }, { "epoch": 0.18571428571428572, "grad_norm": 0.47100696643896606, "learning_rate": 1.99002741504085e-06, "loss": 0.871498167514801, "step": 806 }, { "epoch": 0.1859447004608295, "grad_norm": 0.43363760401100476, "learning_rate": 1.9899736721597786e-06, "loss": 0.879954993724823, "step": 807 }, { "epoch": 0.18617511520737326, "grad_norm": 0.5651525829110051, "learning_rate": 1.9899197855853386e-06, "loss": 0.9238240718841553, "step": 808 }, { "epoch": 0.18640552995391704, "grad_norm": 0.43185548411741037, "learning_rate": 1.9898657553253527e-06, "loss": 0.7939119935035706, "step": 809 }, { "epoch": 0.18663594470046083, "grad_norm": 0.42423118388289394, "learning_rate": 1.989811581387663e-06, "loss": 0.8536086082458496, "step": 810 }, { "epoch": 0.1868663594470046, "grad_norm": 0.7488569193689159, "learning_rate": 1.9897572637801322e-06, "loss": 0.8272225856781006, "step": 811 }, { "epoch": 0.1870967741935484, "grad_norm": 0.5639808995976617, "learning_rate": 1.989702802510645e-06, "loss": 0.9187904596328735, "step": 812 }, { "epoch": 0.18732718894009218, "grad_norm": 0.5096509814307604, "learning_rate": 1.989648197587106e-06, "loss": 0.905516505241394, "step": 813 }, { "epoch": 0.18755760368663593, "grad_norm": 0.46349746061643887, "learning_rate": 1.9895934490174415e-06, "loss": 0.7548567056655884, "step": 814 }, { "epoch": 0.18778801843317972, "grad_norm": 0.5916446556749395, "learning_rate": 1.9895385568095978e-06, "loss": 0.8242576122283936, "step": 815 }, { "epoch": 0.1880184331797235, "grad_norm": 0.47871736963615374, "learning_rate": 1.9894835209715427e-06, "loss": 0.9861007928848267, "step": 816 }, { "epoch": 0.18824884792626728, "grad_norm": 0.5325996448618295, "learning_rate": 1.989428341511264e-06, "loss": 0.9705426096916199, "step": 817 }, { "epoch": 0.18847926267281107, "grad_norm": 0.5222036147665577, "learning_rate": 1.9893730184367722e-06, "loss": 0.9773565530776978, "step": 818 }, { "epoch": 0.18870967741935485, "grad_norm": 0.42837248272258044, "learning_rate": 1.989317551756096e-06, "loss": 0.7929856777191162, "step": 819 }, { "epoch": 0.1889400921658986, "grad_norm": 0.48925051722314383, "learning_rate": 1.9892619414772866e-06, "loss": 0.9749126434326172, "step": 820 }, { "epoch": 0.1891705069124424, "grad_norm": 0.49968815355517815, "learning_rate": 1.9892061876084166e-06, "loss": 0.9945374727249146, "step": 821 }, { "epoch": 0.18940092165898617, "grad_norm": 0.3942389156154952, "learning_rate": 1.9891502901575776e-06, "loss": 0.8016892075538635, "step": 822 }, { "epoch": 0.18963133640552995, "grad_norm": 0.5604199160430772, "learning_rate": 1.9890942491328837e-06, "loss": 0.9389557838439941, "step": 823 }, { "epoch": 0.18986175115207374, "grad_norm": 0.38179956879765936, "learning_rate": 1.9890380645424686e-06, "loss": 0.724082887172699, "step": 824 }, { "epoch": 0.19009216589861752, "grad_norm": 0.5409880819899738, "learning_rate": 1.988981736394488e-06, "loss": 0.8877915143966675, "step": 825 }, { "epoch": 0.19032258064516128, "grad_norm": 0.6992705135248997, "learning_rate": 1.9889252646971177e-06, "loss": 1.207446813583374, "step": 826 }, { "epoch": 0.19055299539170506, "grad_norm": 0.5040994233955279, "learning_rate": 1.9888686494585542e-06, "loss": 0.9155057668685913, "step": 827 }, { "epoch": 0.19078341013824884, "grad_norm": 0.5532998867192596, "learning_rate": 1.9888118906870154e-06, "loss": 1.005772352218628, "step": 828 }, { "epoch": 0.19101382488479263, "grad_norm": 0.42790166152469256, "learning_rate": 1.9887549883907394e-06, "loss": 0.9060605764389038, "step": 829 }, { "epoch": 0.1912442396313364, "grad_norm": 0.5177028577691919, "learning_rate": 1.988697942577986e-06, "loss": 0.7652161717414856, "step": 830 }, { "epoch": 0.1914746543778802, "grad_norm": 0.5981838434161031, "learning_rate": 1.9886407532570354e-06, "loss": 1.0191380977630615, "step": 831 }, { "epoch": 0.19170506912442398, "grad_norm": 0.4987711114148914, "learning_rate": 1.9885834204361876e-06, "loss": 0.9497933387756348, "step": 832 }, { "epoch": 0.19193548387096773, "grad_norm": 0.462035144334916, "learning_rate": 1.9885259441237657e-06, "loss": 0.7728058099746704, "step": 833 }, { "epoch": 0.19216589861751152, "grad_norm": 0.517810203206895, "learning_rate": 1.9884683243281113e-06, "loss": 0.8961999416351318, "step": 834 }, { "epoch": 0.1923963133640553, "grad_norm": 0.49386963761649333, "learning_rate": 1.9884105610575885e-06, "loss": 0.9218904972076416, "step": 835 }, { "epoch": 0.19262672811059908, "grad_norm": 0.49785428541631027, "learning_rate": 1.9883526543205807e-06, "loss": 0.8411329984664917, "step": 836 }, { "epoch": 0.19285714285714287, "grad_norm": 0.42947794662366, "learning_rate": 1.988294604125494e-06, "loss": 0.9536285400390625, "step": 837 }, { "epoch": 0.19308755760368665, "grad_norm": 0.589338261376726, "learning_rate": 1.9882364104807535e-06, "loss": 0.9404321908950806, "step": 838 }, { "epoch": 0.1933179723502304, "grad_norm": 0.6889982860652113, "learning_rate": 1.9881780733948066e-06, "loss": 1.2520880699157715, "step": 839 }, { "epoch": 0.1935483870967742, "grad_norm": 0.5071547317768794, "learning_rate": 1.9881195928761205e-06, "loss": 0.8961449861526489, "step": 840 }, { "epoch": 0.19377880184331797, "grad_norm": 0.5612915327251169, "learning_rate": 1.9880609689331833e-06, "loss": 0.8844394683837891, "step": 841 }, { "epoch": 0.19400921658986175, "grad_norm": 0.6383643268501873, "learning_rate": 1.9880022015745044e-06, "loss": 1.1305835247039795, "step": 842 }, { "epoch": 0.19423963133640554, "grad_norm": 0.5396685716999928, "learning_rate": 1.9879432908086143e-06, "loss": 0.9980956315994263, "step": 843 }, { "epoch": 0.19447004608294932, "grad_norm": 0.46511386172638836, "learning_rate": 1.987884236644063e-06, "loss": 0.7613730430603027, "step": 844 }, { "epoch": 0.19470046082949308, "grad_norm": 0.6010725617242704, "learning_rate": 1.987825039089423e-06, "loss": 0.9742579460144043, "step": 845 }, { "epoch": 0.19493087557603686, "grad_norm": 0.4022001131058661, "learning_rate": 1.9877656981532864e-06, "loss": 0.7118766903877258, "step": 846 }, { "epoch": 0.19516129032258064, "grad_norm": 0.48902949112989696, "learning_rate": 1.9877062138442657e-06, "loss": 0.8657095432281494, "step": 847 }, { "epoch": 0.19539170506912443, "grad_norm": 0.42720754806325495, "learning_rate": 1.987646586170996e-06, "loss": 0.8543902039527893, "step": 848 }, { "epoch": 0.1956221198156682, "grad_norm": 0.4842820004763047, "learning_rate": 1.9875868151421317e-06, "loss": 0.8896970748901367, "step": 849 }, { "epoch": 0.195852534562212, "grad_norm": 0.5225855938017534, "learning_rate": 1.9875269007663486e-06, "loss": 0.8662775754928589, "step": 850 }, { "epoch": 0.19608294930875575, "grad_norm": 0.48460338230512107, "learning_rate": 1.9874668430523434e-06, "loss": 0.8241516351699829, "step": 851 }, { "epoch": 0.19631336405529953, "grad_norm": 0.5278134062893883, "learning_rate": 1.987406642008833e-06, "loss": 0.973886251449585, "step": 852 }, { "epoch": 0.19654377880184332, "grad_norm": 0.48464213201098744, "learning_rate": 1.9873462976445554e-06, "loss": 0.8133533000946045, "step": 853 }, { "epoch": 0.1967741935483871, "grad_norm": 0.6657370368562822, "learning_rate": 1.9872858099682697e-06, "loss": 1.120869755744934, "step": 854 }, { "epoch": 0.19700460829493088, "grad_norm": 0.47886128108046017, "learning_rate": 1.9872251789887562e-06, "loss": 0.9376444816589355, "step": 855 }, { "epoch": 0.19723502304147467, "grad_norm": 0.4627008078705538, "learning_rate": 1.9871644047148148e-06, "loss": 0.8763699531555176, "step": 856 }, { "epoch": 0.19746543778801842, "grad_norm": 0.5436736732062664, "learning_rate": 1.9871034871552667e-06, "loss": 0.7993260622024536, "step": 857 }, { "epoch": 0.1976958525345622, "grad_norm": 0.5225344117964711, "learning_rate": 1.9870424263189542e-06, "loss": 1.0312654972076416, "step": 858 }, { "epoch": 0.197926267281106, "grad_norm": 0.6040828842975151, "learning_rate": 1.98698122221474e-06, "loss": 1.0784629583358765, "step": 859 }, { "epoch": 0.19815668202764977, "grad_norm": 0.5681257026488339, "learning_rate": 1.9869198748515085e-06, "loss": 1.136039137840271, "step": 860 }, { "epoch": 0.19838709677419356, "grad_norm": 0.5123381612546825, "learning_rate": 1.986858384238163e-06, "loss": 0.834873378276825, "step": 861 }, { "epoch": 0.19861751152073734, "grad_norm": 0.5505167057841309, "learning_rate": 1.98679675038363e-06, "loss": 0.9705442190170288, "step": 862 }, { "epoch": 0.1988479262672811, "grad_norm": 0.6567761197272963, "learning_rate": 1.9867349732968547e-06, "loss": 0.9343886375427246, "step": 863 }, { "epoch": 0.19907834101382488, "grad_norm": 0.49387008808397015, "learning_rate": 1.986673052986805e-06, "loss": 0.9140456914901733, "step": 864 }, { "epoch": 0.19930875576036866, "grad_norm": 0.5850607327811402, "learning_rate": 1.986610989462467e-06, "loss": 0.9121139049530029, "step": 865 }, { "epoch": 0.19953917050691244, "grad_norm": 0.4775789448856378, "learning_rate": 1.9865487827328505e-06, "loss": 0.7333672642707825, "step": 866 }, { "epoch": 0.19976958525345623, "grad_norm": 0.5039450613377916, "learning_rate": 1.986486432806984e-06, "loss": 0.8405989408493042, "step": 867 }, { "epoch": 0.2, "grad_norm": 0.47371690470710304, "learning_rate": 1.9864239396939176e-06, "loss": 0.8693375587463379, "step": 868 }, { "epoch": 0.20023041474654377, "grad_norm": 0.5727654616233698, "learning_rate": 1.9863613034027223e-06, "loss": 1.0137104988098145, "step": 869 }, { "epoch": 0.20046082949308755, "grad_norm": 0.5382771457657299, "learning_rate": 1.9862985239424895e-06, "loss": 1.0283832550048828, "step": 870 }, { "epoch": 0.20069124423963133, "grad_norm": 0.6200501422886965, "learning_rate": 1.9862356013223316e-06, "loss": 1.117444634437561, "step": 871 }, { "epoch": 0.20092165898617512, "grad_norm": 0.6309070895129882, "learning_rate": 1.986172535551382e-06, "loss": 0.8861427307128906, "step": 872 }, { "epoch": 0.2011520737327189, "grad_norm": 0.5017852774763055, "learning_rate": 1.9861093266387946e-06, "loss": 1.0273747444152832, "step": 873 }, { "epoch": 0.20138248847926268, "grad_norm": 0.5141875246573869, "learning_rate": 1.9860459745937437e-06, "loss": 0.918023943901062, "step": 874 }, { "epoch": 0.20161290322580644, "grad_norm": 0.5278755996885149, "learning_rate": 1.9859824794254246e-06, "loss": 0.8983356952667236, "step": 875 }, { "epoch": 0.20184331797235022, "grad_norm": 0.5803540160351622, "learning_rate": 1.985918841143054e-06, "loss": 1.0180974006652832, "step": 876 }, { "epoch": 0.202073732718894, "grad_norm": 0.48253787858386377, "learning_rate": 1.985855059755869e-06, "loss": 0.9656573534011841, "step": 877 }, { "epoch": 0.2023041474654378, "grad_norm": 0.5015537059540116, "learning_rate": 1.9857911352731273e-06, "loss": 0.8522181510925293, "step": 878 }, { "epoch": 0.20253456221198157, "grad_norm": 0.4883752495192941, "learning_rate": 1.985727067704107e-06, "loss": 0.9180892705917358, "step": 879 }, { "epoch": 0.20276497695852536, "grad_norm": 0.5817140345419661, "learning_rate": 1.985662857058108e-06, "loss": 0.9979432821273804, "step": 880 }, { "epoch": 0.2029953917050691, "grad_norm": 0.5608420179715049, "learning_rate": 1.98559850334445e-06, "loss": 0.8916480541229248, "step": 881 }, { "epoch": 0.2032258064516129, "grad_norm": 0.41973060059994494, "learning_rate": 1.9855340065724738e-06, "loss": 0.8755770921707153, "step": 882 }, { "epoch": 0.20345622119815668, "grad_norm": 0.5596516763963291, "learning_rate": 1.9854693667515418e-06, "loss": 1.0200350284576416, "step": 883 }, { "epoch": 0.20368663594470046, "grad_norm": 0.5199867730002389, "learning_rate": 1.9854045838910353e-06, "loss": 0.928024172782898, "step": 884 }, { "epoch": 0.20391705069124424, "grad_norm": 0.5756725941645391, "learning_rate": 1.9853396580003582e-06, "loss": 0.8617212176322937, "step": 885 }, { "epoch": 0.20414746543778803, "grad_norm": 0.5415263717139983, "learning_rate": 1.985274589088934e-06, "loss": 0.9383209943771362, "step": 886 }, { "epoch": 0.20437788018433178, "grad_norm": 0.48094986017269503, "learning_rate": 1.985209377166208e-06, "loss": 0.7217687368392944, "step": 887 }, { "epoch": 0.20460829493087557, "grad_norm": 0.612593081169746, "learning_rate": 1.9851440222416446e-06, "loss": 1.0717028379440308, "step": 888 }, { "epoch": 0.20483870967741935, "grad_norm": 0.6063882651782059, "learning_rate": 1.9850785243247303e-06, "loss": 1.0137064456939697, "step": 889 }, { "epoch": 0.20506912442396313, "grad_norm": 0.5244411173844509, "learning_rate": 1.985012883424973e-06, "loss": 0.8569058179855347, "step": 890 }, { "epoch": 0.20529953917050692, "grad_norm": 0.6524290996376207, "learning_rate": 1.9849470995518993e-06, "loss": 0.9398901462554932, "step": 891 }, { "epoch": 0.2055299539170507, "grad_norm": 0.3752296846015947, "learning_rate": 1.9848811727150577e-06, "loss": 0.731800377368927, "step": 892 }, { "epoch": 0.20576036866359446, "grad_norm": 0.5142990565199794, "learning_rate": 1.984815102924018e-06, "loss": 0.8543055653572083, "step": 893 }, { "epoch": 0.20599078341013824, "grad_norm": 0.5278314343821748, "learning_rate": 1.98474889018837e-06, "loss": 0.9112114906311035, "step": 894 }, { "epoch": 0.20622119815668202, "grad_norm": 0.50708997202126, "learning_rate": 1.984682534517724e-06, "loss": 0.8272690773010254, "step": 895 }, { "epoch": 0.2064516129032258, "grad_norm": 0.5912295968473946, "learning_rate": 1.984616035921712e-06, "loss": 0.9680918455123901, "step": 896 }, { "epoch": 0.2066820276497696, "grad_norm": 0.6089139321115737, "learning_rate": 1.984549394409985e-06, "loss": 0.815123438835144, "step": 897 }, { "epoch": 0.20691244239631337, "grad_norm": 0.4952276433479721, "learning_rate": 1.984482609992218e-06, "loss": 0.8035521507263184, "step": 898 }, { "epoch": 0.20714285714285716, "grad_norm": 0.548354244530079, "learning_rate": 1.9844156826781027e-06, "loss": 0.9000132083892822, "step": 899 }, { "epoch": 0.2073732718894009, "grad_norm": 0.6652515011666116, "learning_rate": 1.9843486124773543e-06, "loss": 1.06328547000885, "step": 900 }, { "epoch": 0.2076036866359447, "grad_norm": 0.4596762245312169, "learning_rate": 1.9842813993997083e-06, "loss": 0.9028425216674805, "step": 901 }, { "epoch": 0.20783410138248848, "grad_norm": 0.5779573613376965, "learning_rate": 1.9842140434549196e-06, "loss": 0.7786350250244141, "step": 902 }, { "epoch": 0.20806451612903226, "grad_norm": 0.5102795361356062, "learning_rate": 1.9841465446527656e-06, "loss": 0.8041539788246155, "step": 903 }, { "epoch": 0.20829493087557605, "grad_norm": 0.4348300351835264, "learning_rate": 1.9840789030030434e-06, "loss": 0.8380184173583984, "step": 904 }, { "epoch": 0.20852534562211983, "grad_norm": 0.7151525379978475, "learning_rate": 1.984011118515572e-06, "loss": 0.8191432952880859, "step": 905 }, { "epoch": 0.20875576036866358, "grad_norm": 0.5006646807997585, "learning_rate": 1.9839431912001885e-06, "loss": 0.8236384391784668, "step": 906 }, { "epoch": 0.20898617511520737, "grad_norm": 0.4959155947407375, "learning_rate": 1.9838751210667534e-06, "loss": 0.8218076825141907, "step": 907 }, { "epoch": 0.20921658986175115, "grad_norm": 0.5127899266702147, "learning_rate": 1.983806908125147e-06, "loss": 0.9140353202819824, "step": 908 }, { "epoch": 0.20944700460829493, "grad_norm": 0.5063732794644019, "learning_rate": 1.9837385523852706e-06, "loss": 0.9179826974868774, "step": 909 }, { "epoch": 0.20967741935483872, "grad_norm": 0.5385574519868781, "learning_rate": 1.9836700538570456e-06, "loss": 0.8888909816741943, "step": 910 }, { "epoch": 0.2099078341013825, "grad_norm": 0.5208969379705799, "learning_rate": 1.9836014125504143e-06, "loss": 0.8951253890991211, "step": 911 }, { "epoch": 0.21013824884792626, "grad_norm": 0.6093988535410455, "learning_rate": 1.98353262847534e-06, "loss": 1.084958553314209, "step": 912 }, { "epoch": 0.21036866359447004, "grad_norm": 0.5061127496745415, "learning_rate": 1.983463701641807e-06, "loss": 0.8590713739395142, "step": 913 }, { "epoch": 0.21059907834101382, "grad_norm": 0.6396228440899432, "learning_rate": 1.9833946320598195e-06, "loss": 1.0393706560134888, "step": 914 }, { "epoch": 0.2108294930875576, "grad_norm": 0.49567487165870866, "learning_rate": 1.983325419739403e-06, "loss": 0.9403085708618164, "step": 915 }, { "epoch": 0.2110599078341014, "grad_norm": 0.49912224081019996, "learning_rate": 1.9832560646906038e-06, "loss": 0.8431342244148254, "step": 916 }, { "epoch": 0.21129032258064517, "grad_norm": 0.5558843704958377, "learning_rate": 1.9831865669234884e-06, "loss": 0.9024044871330261, "step": 917 }, { "epoch": 0.21152073732718893, "grad_norm": 0.44775113902692637, "learning_rate": 1.9831169264481443e-06, "loss": 0.747347354888916, "step": 918 }, { "epoch": 0.2117511520737327, "grad_norm": 0.46715914917156914, "learning_rate": 1.9830471432746796e-06, "loss": 0.8266197443008423, "step": 919 }, { "epoch": 0.2119815668202765, "grad_norm": 0.5566270603086758, "learning_rate": 1.9829772174132235e-06, "loss": 0.8633416295051575, "step": 920 }, { "epoch": 0.21221198156682028, "grad_norm": 0.5228096908540074, "learning_rate": 1.9829071488739256e-06, "loss": 1.0290095806121826, "step": 921 }, { "epoch": 0.21244239631336406, "grad_norm": 0.667274912811163, "learning_rate": 1.9828369376669566e-06, "loss": 0.8193448781967163, "step": 922 }, { "epoch": 0.21267281105990785, "grad_norm": 0.5677549533509479, "learning_rate": 1.982766583802507e-06, "loss": 0.8828415870666504, "step": 923 }, { "epoch": 0.2129032258064516, "grad_norm": 0.597806988660978, "learning_rate": 1.9826960872907885e-06, "loss": 0.8806191682815552, "step": 924 }, { "epoch": 0.21313364055299538, "grad_norm": 0.40902701240404726, "learning_rate": 1.982625448142034e-06, "loss": 0.8441533446311951, "step": 925 }, { "epoch": 0.21336405529953917, "grad_norm": 0.5142754504345473, "learning_rate": 1.9825546663664963e-06, "loss": 0.9084080457687378, "step": 926 }, { "epoch": 0.21359447004608295, "grad_norm": 0.7318607240255686, "learning_rate": 1.98248374197445e-06, "loss": 0.9005601406097412, "step": 927 }, { "epoch": 0.21382488479262673, "grad_norm": 0.48930991442842664, "learning_rate": 1.9824126749761893e-06, "loss": 1.0415414571762085, "step": 928 }, { "epoch": 0.21405529953917052, "grad_norm": 0.4380456409582823, "learning_rate": 1.982341465382029e-06, "loss": 0.8130594491958618, "step": 929 }, { "epoch": 0.21428571428571427, "grad_norm": 0.4623167832467728, "learning_rate": 1.9822701132023053e-06, "loss": 0.9178205728530884, "step": 930 }, { "epoch": 0.21451612903225806, "grad_norm": 0.5894382821211327, "learning_rate": 1.9821986184473754e-06, "loss": 0.9927947521209717, "step": 931 }, { "epoch": 0.21474654377880184, "grad_norm": 0.5621440238225328, "learning_rate": 1.982126981127616e-06, "loss": 0.9172670841217041, "step": 932 }, { "epoch": 0.21497695852534562, "grad_norm": 0.5805773191302366, "learning_rate": 1.9820552012534255e-06, "loss": 0.9513058066368103, "step": 933 }, { "epoch": 0.2152073732718894, "grad_norm": 0.6596090379041671, "learning_rate": 1.9819832788352227e-06, "loss": 1.014827013015747, "step": 934 }, { "epoch": 0.2154377880184332, "grad_norm": 0.5483468550441934, "learning_rate": 1.9819112138834473e-06, "loss": 1.0225746631622314, "step": 935 }, { "epoch": 0.21566820276497695, "grad_norm": 0.46659867801168237, "learning_rate": 1.9818390064085584e-06, "loss": 0.8804227113723755, "step": 936 }, { "epoch": 0.21589861751152073, "grad_norm": 0.42738644934381204, "learning_rate": 1.9817666564210376e-06, "loss": 0.7215760350227356, "step": 937 }, { "epoch": 0.2161290322580645, "grad_norm": 0.6620668522422565, "learning_rate": 1.981694163931387e-06, "loss": 0.9978986978530884, "step": 938 }, { "epoch": 0.2163594470046083, "grad_norm": 0.5846107454293807, "learning_rate": 1.981621528950128e-06, "loss": 0.8646233081817627, "step": 939 }, { "epoch": 0.21658986175115208, "grad_norm": 0.44150430663795637, "learning_rate": 1.981548751487803e-06, "loss": 0.9619132876396179, "step": 940 }, { "epoch": 0.21682027649769586, "grad_norm": 0.543839377462045, "learning_rate": 1.981475831554976e-06, "loss": 0.9209504127502441, "step": 941 }, { "epoch": 0.21705069124423962, "grad_norm": 0.563351483363654, "learning_rate": 1.9814027691622318e-06, "loss": 0.7629299163818359, "step": 942 }, { "epoch": 0.2172811059907834, "grad_norm": 0.4885334834965844, "learning_rate": 1.9813295643201747e-06, "loss": 0.8702583312988281, "step": 943 }, { "epoch": 0.21751152073732719, "grad_norm": 0.5579102568918498, "learning_rate": 1.9812562170394305e-06, "loss": 0.9571657180786133, "step": 944 }, { "epoch": 0.21774193548387097, "grad_norm": 0.43227127189367615, "learning_rate": 1.9811827273306456e-06, "loss": 0.7271617650985718, "step": 945 }, { "epoch": 0.21797235023041475, "grad_norm": 0.46137899963900864, "learning_rate": 1.9811090952044865e-06, "loss": 0.8189597725868225, "step": 946 }, { "epoch": 0.21820276497695854, "grad_norm": 0.49142212284435566, "learning_rate": 1.981035320671641e-06, "loss": 0.7933987379074097, "step": 947 }, { "epoch": 0.2184331797235023, "grad_norm": 0.48207328184354004, "learning_rate": 1.9809614037428174e-06, "loss": 0.9687645435333252, "step": 948 }, { "epoch": 0.21866359447004607, "grad_norm": 0.5647695490676888, "learning_rate": 1.980887344428745e-06, "loss": 0.8293745517730713, "step": 949 }, { "epoch": 0.21889400921658986, "grad_norm": 0.6489579503887147, "learning_rate": 1.9808131427401727e-06, "loss": 1.0447471141815186, "step": 950 }, { "epoch": 0.21912442396313364, "grad_norm": 0.48010625791746325, "learning_rate": 1.9807387986878715e-06, "loss": 0.8916672468185425, "step": 951 }, { "epoch": 0.21935483870967742, "grad_norm": 0.5436399520986829, "learning_rate": 1.980664312282632e-06, "loss": 0.8380981683731079, "step": 952 }, { "epoch": 0.2195852534562212, "grad_norm": 0.4634469099281989, "learning_rate": 1.9805896835352656e-06, "loss": 0.887790322303772, "step": 953 }, { "epoch": 0.21981566820276496, "grad_norm": 0.5184548533508342, "learning_rate": 1.9805149124566048e-06, "loss": 0.8353140950202942, "step": 954 }, { "epoch": 0.22004608294930875, "grad_norm": 0.7177333773715296, "learning_rate": 1.9804399990575026e-06, "loss": 1.0337531566619873, "step": 955 }, { "epoch": 0.22027649769585253, "grad_norm": 0.4262367777660272, "learning_rate": 1.9803649433488324e-06, "loss": 0.8845529556274414, "step": 956 }, { "epoch": 0.2205069124423963, "grad_norm": 0.4271901286679727, "learning_rate": 1.9802897453414884e-06, "loss": 0.7408445477485657, "step": 957 }, { "epoch": 0.2207373271889401, "grad_norm": 0.5478873632644168, "learning_rate": 1.980214405046386e-06, "loss": 0.873178243637085, "step": 958 }, { "epoch": 0.22096774193548388, "grad_norm": 0.556535747180833, "learning_rate": 1.98013892247446e-06, "loss": 1.0207639932632446, "step": 959 }, { "epoch": 0.22119815668202766, "grad_norm": 0.5890989419509002, "learning_rate": 1.980063297636667e-06, "loss": 0.8626997470855713, "step": 960 }, { "epoch": 0.22142857142857142, "grad_norm": 0.5912616927968722, "learning_rate": 1.9799875305439836e-06, "loss": 0.8961347341537476, "step": 961 }, { "epoch": 0.2216589861751152, "grad_norm": 0.495639914718092, "learning_rate": 1.9799116212074075e-06, "loss": 0.8115944862365723, "step": 962 }, { "epoch": 0.22188940092165899, "grad_norm": 0.5281413221179645, "learning_rate": 1.979835569637957e-06, "loss": 0.8274029493331909, "step": 963 }, { "epoch": 0.22211981566820277, "grad_norm": 0.5782364794204825, "learning_rate": 1.9797593758466706e-06, "loss": 1.020345687866211, "step": 964 }, { "epoch": 0.22235023041474655, "grad_norm": 0.586333023609623, "learning_rate": 1.979683039844608e-06, "loss": 0.8164723515510559, "step": 965 }, { "epoch": 0.22258064516129034, "grad_norm": 0.48956655235723145, "learning_rate": 1.979606561642849e-06, "loss": 0.832849383354187, "step": 966 }, { "epoch": 0.2228110599078341, "grad_norm": 0.5810232623043905, "learning_rate": 1.9795299412524945e-06, "loss": 0.9765876531600952, "step": 967 }, { "epoch": 0.22304147465437787, "grad_norm": 0.5610292572060406, "learning_rate": 1.9794531786846657e-06, "loss": 0.9280411005020142, "step": 968 }, { "epoch": 0.22327188940092166, "grad_norm": 0.6528516733941818, "learning_rate": 1.9793762739505042e-06, "loss": 1.122058629989624, "step": 969 }, { "epoch": 0.22350230414746544, "grad_norm": 0.4582570301724996, "learning_rate": 1.9792992270611737e-06, "loss": 0.824627161026001, "step": 970 }, { "epoch": 0.22373271889400922, "grad_norm": 0.750391550156154, "learning_rate": 1.9792220380278565e-06, "loss": 1.0583840608596802, "step": 971 }, { "epoch": 0.223963133640553, "grad_norm": 0.5277817422831291, "learning_rate": 1.979144706861757e-06, "loss": 1.053803563117981, "step": 972 }, { "epoch": 0.22419354838709676, "grad_norm": 0.5197675200798639, "learning_rate": 1.9790672335740993e-06, "loss": 0.8572183847427368, "step": 973 }, { "epoch": 0.22442396313364055, "grad_norm": 0.5956201422774761, "learning_rate": 1.978989618176129e-06, "loss": 0.7955416440963745, "step": 974 }, { "epoch": 0.22465437788018433, "grad_norm": 0.6931203377433601, "learning_rate": 1.9789118606791113e-06, "loss": 0.9455063343048096, "step": 975 }, { "epoch": 0.2248847926267281, "grad_norm": 0.5553738972507489, "learning_rate": 1.978833961094333e-06, "loss": 0.788895845413208, "step": 976 }, { "epoch": 0.2251152073732719, "grad_norm": 0.4854852275390097, "learning_rate": 1.9787559194331014e-06, "loss": 0.8344719409942627, "step": 977 }, { "epoch": 0.22534562211981568, "grad_norm": 0.5098723288351352, "learning_rate": 1.9786777357067436e-06, "loss": 0.85140061378479, "step": 978 }, { "epoch": 0.22557603686635944, "grad_norm": 0.43945689098482754, "learning_rate": 1.978599409926608e-06, "loss": 0.8511399030685425, "step": 979 }, { "epoch": 0.22580645161290322, "grad_norm": 0.4893125980217, "learning_rate": 1.9785209421040636e-06, "loss": 0.9243351221084595, "step": 980 }, { "epoch": 0.226036866359447, "grad_norm": 0.5349074342918002, "learning_rate": 1.9784423322504996e-06, "loss": 0.9043580293655396, "step": 981 }, { "epoch": 0.2262672811059908, "grad_norm": 0.654146848198394, "learning_rate": 1.978363580377327e-06, "loss": 0.854049563407898, "step": 982 }, { "epoch": 0.22649769585253457, "grad_norm": 0.43507484708504635, "learning_rate": 1.9782846864959754e-06, "loss": 0.7785296440124512, "step": 983 }, { "epoch": 0.22672811059907835, "grad_norm": 0.5830354059161934, "learning_rate": 1.9782056506178965e-06, "loss": 0.8464720845222473, "step": 984 }, { "epoch": 0.2269585253456221, "grad_norm": 0.5249975809892665, "learning_rate": 1.9781264727545624e-06, "loss": 0.8519179821014404, "step": 985 }, { "epoch": 0.2271889400921659, "grad_norm": 0.6176158235785483, "learning_rate": 1.978047152917466e-06, "loss": 0.956415057182312, "step": 986 }, { "epoch": 0.22741935483870968, "grad_norm": 0.5046722242039021, "learning_rate": 1.97796769111812e-06, "loss": 1.028620719909668, "step": 987 }, { "epoch": 0.22764976958525346, "grad_norm": 0.4889451789926323, "learning_rate": 1.9778880873680585e-06, "loss": 0.8707184195518494, "step": 988 }, { "epoch": 0.22788018433179724, "grad_norm": 0.5212071576326044, "learning_rate": 1.9778083416788355e-06, "loss": 0.9842795729637146, "step": 989 }, { "epoch": 0.22811059907834103, "grad_norm": 0.5963522406410062, "learning_rate": 1.977728454062026e-06, "loss": 0.8827522993087769, "step": 990 }, { "epoch": 0.22834101382488478, "grad_norm": 0.5285989804764033, "learning_rate": 1.9776484245292256e-06, "loss": 0.8608568906784058, "step": 991 }, { "epoch": 0.22857142857142856, "grad_norm": 0.7428648265675979, "learning_rate": 1.977568253092051e-06, "loss": 0.8512595891952515, "step": 992 }, { "epoch": 0.22880184331797235, "grad_norm": 0.520235896024025, "learning_rate": 1.9774879397621383e-06, "loss": 0.7335344552993774, "step": 993 }, { "epoch": 0.22903225806451613, "grad_norm": 0.6711607827981731, "learning_rate": 1.9774074845511457e-06, "loss": 1.0301114320755005, "step": 994 }, { "epoch": 0.22926267281105991, "grad_norm": 0.515409965463074, "learning_rate": 1.97732688747075e-06, "loss": 0.9011565446853638, "step": 995 }, { "epoch": 0.2294930875576037, "grad_norm": 0.5657170632178228, "learning_rate": 1.9772461485326507e-06, "loss": 0.8644282221794128, "step": 996 }, { "epoch": 0.22972350230414745, "grad_norm": 0.49795498598042737, "learning_rate": 1.9771652677485664e-06, "loss": 0.8107467889785767, "step": 997 }, { "epoch": 0.22995391705069124, "grad_norm": 0.5832229133316258, "learning_rate": 1.9770842451302373e-06, "loss": 1.0090508460998535, "step": 998 }, { "epoch": 0.23018433179723502, "grad_norm": 0.4910768822506593, "learning_rate": 1.977003080689424e-06, "loss": 0.8153292536735535, "step": 999 }, { "epoch": 0.2304147465437788, "grad_norm": 0.6502643477323704, "learning_rate": 1.976921774437906e-06, "loss": 0.8446916341781616, "step": 1000 }, { "epoch": 0.2306451612903226, "grad_norm": 0.5179047651030808, "learning_rate": 1.9768403263874865e-06, "loss": 0.759350597858429, "step": 1001 }, { "epoch": 0.23087557603686637, "grad_norm": 0.5414654559095757, "learning_rate": 1.9767587365499862e-06, "loss": 0.9181695580482483, "step": 1002 }, { "epoch": 0.23110599078341013, "grad_norm": 0.4755050115257823, "learning_rate": 1.976677004937249e-06, "loss": 0.8450978994369507, "step": 1003 }, { "epoch": 0.2313364055299539, "grad_norm": 0.5616575268963485, "learning_rate": 1.9765951315611365e-06, "loss": 0.775252640247345, "step": 1004 }, { "epoch": 0.2315668202764977, "grad_norm": 0.5248180263396327, "learning_rate": 1.976513116433534e-06, "loss": 0.8682440519332886, "step": 1005 }, { "epoch": 0.23179723502304148, "grad_norm": 0.6093284414229693, "learning_rate": 1.9764309595663457e-06, "loss": 1.0701451301574707, "step": 1006 }, { "epoch": 0.23202764976958526, "grad_norm": 0.5747684398408948, "learning_rate": 1.976348660971496e-06, "loss": 0.9381946921348572, "step": 1007 }, { "epoch": 0.23225806451612904, "grad_norm": 0.5225356801303237, "learning_rate": 1.976266220660931e-06, "loss": 0.7836539149284363, "step": 1008 }, { "epoch": 0.2324884792626728, "grad_norm": 0.5379097818020191, "learning_rate": 1.9761836386466156e-06, "loss": 0.9271948337554932, "step": 1009 }, { "epoch": 0.23271889400921658, "grad_norm": 0.514797473753123, "learning_rate": 1.976100914940538e-06, "loss": 0.8268035650253296, "step": 1010 }, { "epoch": 0.23294930875576036, "grad_norm": 0.5105764513310544, "learning_rate": 1.976018049554705e-06, "loss": 0.8266786336898804, "step": 1011 }, { "epoch": 0.23317972350230415, "grad_norm": 0.6250953922330988, "learning_rate": 1.9759350425011435e-06, "loss": 0.9437457323074341, "step": 1012 }, { "epoch": 0.23341013824884793, "grad_norm": 0.5629533372281755, "learning_rate": 1.9758518937919033e-06, "loss": 0.9078803062438965, "step": 1013 }, { "epoch": 0.23364055299539171, "grad_norm": 0.5994095472581402, "learning_rate": 1.975768603439052e-06, "loss": 0.9873687624931335, "step": 1014 }, { "epoch": 0.23387096774193547, "grad_norm": 0.5010269853722422, "learning_rate": 1.97568517145468e-06, "loss": 0.9450196027755737, "step": 1015 }, { "epoch": 0.23410138248847925, "grad_norm": 0.5173338079683222, "learning_rate": 1.975601597850897e-06, "loss": 0.8804495334625244, "step": 1016 }, { "epoch": 0.23433179723502304, "grad_norm": 0.5286639294307074, "learning_rate": 1.9755178826398333e-06, "loss": 0.9646104574203491, "step": 1017 }, { "epoch": 0.23456221198156682, "grad_norm": 0.5917923655178416, "learning_rate": 1.9754340258336403e-06, "loss": 0.9829385280609131, "step": 1018 }, { "epoch": 0.2347926267281106, "grad_norm": 0.5022802882731887, "learning_rate": 1.97535002744449e-06, "loss": 0.8433707356452942, "step": 1019 }, { "epoch": 0.2350230414746544, "grad_norm": 0.5984717862988072, "learning_rate": 1.9752658874845744e-06, "loss": 0.9892767071723938, "step": 1020 }, { "epoch": 0.23525345622119814, "grad_norm": 0.5038568694461213, "learning_rate": 1.9751816059661065e-06, "loss": 0.8367536664009094, "step": 1021 }, { "epoch": 0.23548387096774193, "grad_norm": 0.6009503951092086, "learning_rate": 1.9750971829013194e-06, "loss": 0.8947298526763916, "step": 1022 }, { "epoch": 0.2357142857142857, "grad_norm": 0.4955473883987944, "learning_rate": 1.975012618302467e-06, "loss": 0.9218910336494446, "step": 1023 }, { "epoch": 0.2359447004608295, "grad_norm": 0.46527028147066757, "learning_rate": 1.9749279121818236e-06, "loss": 0.8744943141937256, "step": 1024 }, { "epoch": 0.23617511520737328, "grad_norm": 0.5457797851350515, "learning_rate": 1.9748430645516845e-06, "loss": 0.9023007154464722, "step": 1025 }, { "epoch": 0.23640552995391706, "grad_norm": 0.5361296427556177, "learning_rate": 1.974758075424365e-06, "loss": 0.8475106954574585, "step": 1026 }, { "epoch": 0.23663594470046084, "grad_norm": 0.5535275060374267, "learning_rate": 1.9746729448122013e-06, "loss": 0.8594635725021362, "step": 1027 }, { "epoch": 0.2368663594470046, "grad_norm": 0.6574105474773485, "learning_rate": 1.97458767272755e-06, "loss": 0.9601756930351257, "step": 1028 }, { "epoch": 0.23709677419354838, "grad_norm": 0.5454698959338334, "learning_rate": 1.9745022591827886e-06, "loss": 0.9281105399131775, "step": 1029 }, { "epoch": 0.23732718894009217, "grad_norm": 0.4631930883062957, "learning_rate": 1.9744167041903136e-06, "loss": 0.8240020275115967, "step": 1030 }, { "epoch": 0.23755760368663595, "grad_norm": 0.5116113956014486, "learning_rate": 1.9743310077625446e-06, "loss": 0.807030200958252, "step": 1031 }, { "epoch": 0.23778801843317973, "grad_norm": 0.5399356518827937, "learning_rate": 1.9742451699119194e-06, "loss": 0.8044267892837524, "step": 1032 }, { "epoch": 0.23801843317972352, "grad_norm": 0.5022311335968053, "learning_rate": 1.9741591906508975e-06, "loss": 0.9198760390281677, "step": 1033 }, { "epoch": 0.23824884792626727, "grad_norm": 0.6382005412114766, "learning_rate": 1.974073069991959e-06, "loss": 0.7951973676681519, "step": 1034 }, { "epoch": 0.23847926267281105, "grad_norm": 0.5488288386867366, "learning_rate": 1.9739868079476035e-06, "loss": 0.8366928100585938, "step": 1035 }, { "epoch": 0.23870967741935484, "grad_norm": 0.5327938531465227, "learning_rate": 1.9739004045303524e-06, "loss": 0.9644484519958496, "step": 1036 }, { "epoch": 0.23894009216589862, "grad_norm": 0.47502000880743445, "learning_rate": 1.9738138597527464e-06, "loss": 0.8332105875015259, "step": 1037 }, { "epoch": 0.2391705069124424, "grad_norm": 0.4812648524584188, "learning_rate": 1.9737271736273482e-06, "loss": 0.8923197388648987, "step": 1038 }, { "epoch": 0.2394009216589862, "grad_norm": 0.48693803999160823, "learning_rate": 1.97364034616674e-06, "loss": 0.861129879951477, "step": 1039 }, { "epoch": 0.23963133640552994, "grad_norm": 0.49858003070315154, "learning_rate": 1.973553377383524e-06, "loss": 0.8042281270027161, "step": 1040 }, { "epoch": 0.23986175115207373, "grad_norm": 0.603264823916037, "learning_rate": 1.9734662672903247e-06, "loss": 1.0315792560577393, "step": 1041 }, { "epoch": 0.2400921658986175, "grad_norm": 0.524902457294173, "learning_rate": 1.973379015899785e-06, "loss": 0.8165839910507202, "step": 1042 }, { "epoch": 0.2403225806451613, "grad_norm": 0.5868579839473654, "learning_rate": 1.97329162322457e-06, "loss": 1.0002663135528564, "step": 1043 }, { "epoch": 0.24055299539170508, "grad_norm": 0.579630177733921, "learning_rate": 1.9732040892773642e-06, "loss": 0.9340938925743103, "step": 1044 }, { "epoch": 0.24078341013824886, "grad_norm": 0.40394518210500746, "learning_rate": 1.973116414070873e-06, "loss": 0.7457709312438965, "step": 1045 }, { "epoch": 0.24101382488479262, "grad_norm": 0.5468265646556031, "learning_rate": 1.9730285976178227e-06, "loss": 0.846583366394043, "step": 1046 }, { "epoch": 0.2412442396313364, "grad_norm": 0.597351972991794, "learning_rate": 1.9729406399309594e-06, "loss": 0.9701514840126038, "step": 1047 }, { "epoch": 0.24147465437788018, "grad_norm": 0.430042606733588, "learning_rate": 1.9728525410230506e-06, "loss": 0.7943054437637329, "step": 1048 }, { "epoch": 0.24170506912442397, "grad_norm": 0.690774172762037, "learning_rate": 1.972764300906883e-06, "loss": 0.8885551691055298, "step": 1049 }, { "epoch": 0.24193548387096775, "grad_norm": 0.522936671850185, "learning_rate": 1.9726759195952653e-06, "loss": 0.8258899450302124, "step": 1050 }, { "epoch": 0.24216589861751153, "grad_norm": 0.586622666679495, "learning_rate": 1.9725873971010255e-06, "loss": 1.0085303783416748, "step": 1051 }, { "epoch": 0.2423963133640553, "grad_norm": 0.49596210148454095, "learning_rate": 1.9724987334370124e-06, "loss": 0.814777135848999, "step": 1052 }, { "epoch": 0.24262672811059907, "grad_norm": 0.5592433145931486, "learning_rate": 1.9724099286160953e-06, "loss": 0.8328995704650879, "step": 1053 }, { "epoch": 0.24285714285714285, "grad_norm": 0.5857793622474846, "learning_rate": 1.9723209826511645e-06, "loss": 0.8699138164520264, "step": 1054 }, { "epoch": 0.24308755760368664, "grad_norm": 0.5678867062742812, "learning_rate": 1.9722318955551303e-06, "loss": 0.8298562169075012, "step": 1055 }, { "epoch": 0.24331797235023042, "grad_norm": 0.5976489688453608, "learning_rate": 1.9721426673409236e-06, "loss": 0.9470195770263672, "step": 1056 }, { "epoch": 0.2435483870967742, "grad_norm": 0.48875505327809854, "learning_rate": 1.9720532980214955e-06, "loss": 0.7733730673789978, "step": 1057 }, { "epoch": 0.24377880184331796, "grad_norm": 0.46823524678841166, "learning_rate": 1.9719637876098184e-06, "loss": 0.7761770486831665, "step": 1058 }, { "epoch": 0.24400921658986174, "grad_norm": 0.445725356281168, "learning_rate": 1.971874136118884e-06, "loss": 0.9270585775375366, "step": 1059 }, { "epoch": 0.24423963133640553, "grad_norm": 0.42406381632115403, "learning_rate": 1.971784343561705e-06, "loss": 0.906977653503418, "step": 1060 }, { "epoch": 0.2444700460829493, "grad_norm": 0.6412884076264423, "learning_rate": 1.971694409951316e-06, "loss": 0.9668625593185425, "step": 1061 }, { "epoch": 0.2447004608294931, "grad_norm": 0.49415949875048953, "learning_rate": 1.971604335300769e-06, "loss": 0.8215349316596985, "step": 1062 }, { "epoch": 0.24493087557603688, "grad_norm": 0.5322070043492434, "learning_rate": 1.971514119623139e-06, "loss": 0.8351551294326782, "step": 1063 }, { "epoch": 0.24516129032258063, "grad_norm": 0.47999809865085763, "learning_rate": 1.9714237629315206e-06, "loss": 0.8778517246246338, "step": 1064 }, { "epoch": 0.24539170506912442, "grad_norm": 0.5396014898113735, "learning_rate": 1.9713332652390293e-06, "loss": 0.9415761232376099, "step": 1065 }, { "epoch": 0.2456221198156682, "grad_norm": 0.5420605598116663, "learning_rate": 1.9712426265588e-06, "loss": 0.9040292501449585, "step": 1066 }, { "epoch": 0.24585253456221198, "grad_norm": 0.6005715295467339, "learning_rate": 1.9711518469039894e-06, "loss": 0.8886675834655762, "step": 1067 }, { "epoch": 0.24608294930875577, "grad_norm": 0.6273079636247865, "learning_rate": 1.971060926287774e-06, "loss": 0.8439750671386719, "step": 1068 }, { "epoch": 0.24631336405529955, "grad_norm": 0.5872743245126388, "learning_rate": 1.9709698647233507e-06, "loss": 0.8698763251304626, "step": 1069 }, { "epoch": 0.2465437788018433, "grad_norm": 0.5858508124188764, "learning_rate": 1.970878662223937e-06, "loss": 0.7866508364677429, "step": 1070 }, { "epoch": 0.2467741935483871, "grad_norm": 0.46529709331014274, "learning_rate": 1.97078731880277e-06, "loss": 0.8652541637420654, "step": 1071 }, { "epoch": 0.24700460829493087, "grad_norm": 0.4617144249036463, "learning_rate": 1.97069583447311e-06, "loss": 0.8614386320114136, "step": 1072 }, { "epoch": 0.24723502304147466, "grad_norm": 0.5647954006429063, "learning_rate": 1.970604209248234e-06, "loss": 0.9367830753326416, "step": 1073 }, { "epoch": 0.24746543778801844, "grad_norm": 0.5744177103855904, "learning_rate": 1.9705124431414417e-06, "loss": 0.8851934671401978, "step": 1074 }, { "epoch": 0.24769585253456222, "grad_norm": 0.49563724633359013, "learning_rate": 1.9704205361660534e-06, "loss": 0.9619653224945068, "step": 1075 }, { "epoch": 0.24792626728110598, "grad_norm": 0.5649060756387019, "learning_rate": 1.9703284883354094e-06, "loss": 0.8826392889022827, "step": 1076 }, { "epoch": 0.24815668202764976, "grad_norm": 0.6563751938003036, "learning_rate": 1.970236299662869e-06, "loss": 0.9075444340705872, "step": 1077 }, { "epoch": 0.24838709677419354, "grad_norm": 0.5796370649143662, "learning_rate": 1.9701439701618147e-06, "loss": 1.048058032989502, "step": 1078 }, { "epoch": 0.24861751152073733, "grad_norm": 0.5313768074192232, "learning_rate": 1.970051499845647e-06, "loss": 0.8460798263549805, "step": 1079 }, { "epoch": 0.2488479262672811, "grad_norm": 0.7193266180122563, "learning_rate": 1.9699588887277886e-06, "loss": 0.9410982131958008, "step": 1080 }, { "epoch": 0.2490783410138249, "grad_norm": 0.5102129399153178, "learning_rate": 1.9698661368216816e-06, "loss": 0.8247401714324951, "step": 1081 }, { "epoch": 0.24930875576036865, "grad_norm": 0.5269386839997043, "learning_rate": 1.969773244140789e-06, "loss": 0.8543484210968018, "step": 1082 }, { "epoch": 0.24953917050691243, "grad_norm": 0.6681776129080308, "learning_rate": 1.9696802106985933e-06, "loss": 0.9339861273765564, "step": 1083 }, { "epoch": 0.24976958525345622, "grad_norm": 0.6394378735221973, "learning_rate": 1.969587036508599e-06, "loss": 0.8268687725067139, "step": 1084 }, { "epoch": 0.25, "grad_norm": 0.5565533707237263, "learning_rate": 1.96949372158433e-06, "loss": 0.9990735054016113, "step": 1085 }, { "epoch": 0.2502304147465438, "grad_norm": 0.5875792221187977, "learning_rate": 1.9694002659393305e-06, "loss": 0.871169924736023, "step": 1086 }, { "epoch": 0.25046082949308757, "grad_norm": 0.5066699305192991, "learning_rate": 1.9693066695871657e-06, "loss": 0.9275476932525635, "step": 1087 }, { "epoch": 0.25069124423963135, "grad_norm": 0.5987932412868929, "learning_rate": 1.969212932541421e-06, "loss": 0.802006721496582, "step": 1088 }, { "epoch": 0.25092165898617513, "grad_norm": 0.6594060142183631, "learning_rate": 1.9691190548157023e-06, "loss": 1.158774495124817, "step": 1089 }, { "epoch": 0.2511520737327189, "grad_norm": 0.5926971423347241, "learning_rate": 1.969025036423636e-06, "loss": 0.8979278802871704, "step": 1090 }, { "epoch": 0.2513824884792627, "grad_norm": 0.48149308442816224, "learning_rate": 1.968930877378868e-06, "loss": 0.9486579895019531, "step": 1091 }, { "epoch": 0.25161290322580643, "grad_norm": 0.5203236583717573, "learning_rate": 1.968836577695066e-06, "loss": 0.8661590814590454, "step": 1092 }, { "epoch": 0.2518433179723502, "grad_norm": 0.5636787742284843, "learning_rate": 1.9687421373859173e-06, "loss": 0.9224900007247925, "step": 1093 }, { "epoch": 0.252073732718894, "grad_norm": 0.6117977186323622, "learning_rate": 1.96864755646513e-06, "loss": 0.9563734531402588, "step": 1094 }, { "epoch": 0.2523041474654378, "grad_norm": 0.535175631127211, "learning_rate": 1.968552834946432e-06, "loss": 0.7457284927368164, "step": 1095 }, { "epoch": 0.25253456221198156, "grad_norm": 0.5387959310508903, "learning_rate": 1.9684579728435727e-06, "loss": 0.8763077259063721, "step": 1096 }, { "epoch": 0.25276497695852534, "grad_norm": 0.5765732282352442, "learning_rate": 1.9683629701703203e-06, "loss": 0.8476013541221619, "step": 1097 }, { "epoch": 0.25299539170506913, "grad_norm": 0.6265041816963897, "learning_rate": 1.9682678269404647e-06, "loss": 0.9706464409828186, "step": 1098 }, { "epoch": 0.2532258064516129, "grad_norm": 0.5592313042434921, "learning_rate": 1.968172543167816e-06, "loss": 0.9898370504379272, "step": 1099 }, { "epoch": 0.2534562211981567, "grad_norm": 0.5273265970472166, "learning_rate": 1.9680771188662043e-06, "loss": 0.9073352813720703, "step": 1100 }, { "epoch": 0.2536866359447005, "grad_norm": 0.5101975110861352, "learning_rate": 1.9679815540494805e-06, "loss": 0.698054850101471, "step": 1101 }, { "epoch": 0.25391705069124426, "grad_norm": 0.5334723333803978, "learning_rate": 1.967885848731515e-06, "loss": 0.8755865097045898, "step": 1102 }, { "epoch": 0.25414746543778804, "grad_norm": 0.7353231676630018, "learning_rate": 1.9677900029262004e-06, "loss": 0.8884447813034058, "step": 1103 }, { "epoch": 0.2543778801843318, "grad_norm": 0.48855032311862734, "learning_rate": 1.967694016647448e-06, "loss": 0.738738477230072, "step": 1104 }, { "epoch": 0.25460829493087556, "grad_norm": 0.5363150933196312, "learning_rate": 1.96759788990919e-06, "loss": 0.8024383783340454, "step": 1105 }, { "epoch": 0.25483870967741934, "grad_norm": 0.703802110686274, "learning_rate": 1.967501622725379e-06, "loss": 0.8780910968780518, "step": 1106 }, { "epoch": 0.2550691244239631, "grad_norm": 0.47799328608287317, "learning_rate": 1.967405215109989e-06, "loss": 0.8709204196929932, "step": 1107 }, { "epoch": 0.2552995391705069, "grad_norm": 0.5771096865101828, "learning_rate": 1.9673086670770122e-06, "loss": 0.8838910460472107, "step": 1108 }, { "epoch": 0.2555299539170507, "grad_norm": 0.6122299943883392, "learning_rate": 1.967211978640463e-06, "loss": 0.9310617446899414, "step": 1109 }, { "epoch": 0.2557603686635945, "grad_norm": 0.5172180782022067, "learning_rate": 1.9671151498143756e-06, "loss": 0.8453254699707031, "step": 1110 }, { "epoch": 0.25599078341013826, "grad_norm": 0.6724028308795985, "learning_rate": 1.967018180612804e-06, "loss": 1.0201973915100098, "step": 1111 }, { "epoch": 0.25622119815668204, "grad_norm": 0.5304279166188671, "learning_rate": 1.9669210710498242e-06, "loss": 0.84140944480896, "step": 1112 }, { "epoch": 0.2564516129032258, "grad_norm": 0.5850181467371437, "learning_rate": 1.9668238211395308e-06, "loss": 0.9012273550033569, "step": 1113 }, { "epoch": 0.2566820276497696, "grad_norm": 0.5516270166899023, "learning_rate": 1.9667264308960394e-06, "loss": 0.820103645324707, "step": 1114 }, { "epoch": 0.2569124423963134, "grad_norm": 0.7253674338479518, "learning_rate": 1.9666289003334868e-06, "loss": 1.0709048509597778, "step": 1115 }, { "epoch": 0.2571428571428571, "grad_norm": 0.6606805333344365, "learning_rate": 1.966531229466029e-06, "loss": 0.9408602714538574, "step": 1116 }, { "epoch": 0.2573732718894009, "grad_norm": 0.7074764796406602, "learning_rate": 1.9664334183078425e-06, "loss": 0.967316210269928, "step": 1117 }, { "epoch": 0.2576036866359447, "grad_norm": 0.7069704403267734, "learning_rate": 1.9663354668731248e-06, "loss": 0.9483754634857178, "step": 1118 }, { "epoch": 0.25783410138248847, "grad_norm": 0.7072881911304519, "learning_rate": 1.966237375176093e-06, "loss": 0.7978509664535522, "step": 1119 }, { "epoch": 0.25806451612903225, "grad_norm": 0.5719987288484106, "learning_rate": 1.9661391432309862e-06, "loss": 0.8720531463623047, "step": 1120 }, { "epoch": 0.25829493087557603, "grad_norm": 0.6673697559796071, "learning_rate": 1.966040771052061e-06, "loss": 0.7984024286270142, "step": 1121 }, { "epoch": 0.2585253456221198, "grad_norm": 0.5693036626081565, "learning_rate": 1.965942258653597e-06, "loss": 0.9255385398864746, "step": 1122 }, { "epoch": 0.2587557603686636, "grad_norm": 0.5886763980683305, "learning_rate": 1.9658436060498927e-06, "loss": 0.9028007984161377, "step": 1123 }, { "epoch": 0.2589861751152074, "grad_norm": 0.5256574840125579, "learning_rate": 1.9657448132552677e-06, "loss": 0.8773014545440674, "step": 1124 }, { "epoch": 0.25921658986175117, "grad_norm": 0.5356122505196939, "learning_rate": 1.9656458802840617e-06, "loss": 0.9280908107757568, "step": 1125 }, { "epoch": 0.25944700460829495, "grad_norm": 0.6473213250874083, "learning_rate": 1.9655468071506344e-06, "loss": 0.820783793926239, "step": 1126 }, { "epoch": 0.25967741935483873, "grad_norm": 0.490374992394704, "learning_rate": 1.9654475938693663e-06, "loss": 0.7832465171813965, "step": 1127 }, { "epoch": 0.25990783410138246, "grad_norm": 0.6097626342555662, "learning_rate": 1.965348240454658e-06, "loss": 0.8824669122695923, "step": 1128 }, { "epoch": 0.26013824884792625, "grad_norm": 0.5472888524636408, "learning_rate": 1.9652487469209305e-06, "loss": 0.8782131671905518, "step": 1129 }, { "epoch": 0.26036866359447003, "grad_norm": 0.6689126051687625, "learning_rate": 1.9651491132826255e-06, "loss": 0.938920259475708, "step": 1130 }, { "epoch": 0.2605990783410138, "grad_norm": 0.5811243675216263, "learning_rate": 1.965049339554204e-06, "loss": 0.8733320236206055, "step": 1131 }, { "epoch": 0.2608294930875576, "grad_norm": 0.5773916722243296, "learning_rate": 1.9649494257501485e-06, "loss": 0.8688358664512634, "step": 1132 }, { "epoch": 0.2610599078341014, "grad_norm": 0.5867794198483245, "learning_rate": 1.9648493718849617e-06, "loss": 0.9250427484512329, "step": 1133 }, { "epoch": 0.26129032258064516, "grad_norm": 0.5093685293336041, "learning_rate": 1.9647491779731655e-06, "loss": 0.7890609502792358, "step": 1134 }, { "epoch": 0.26152073732718895, "grad_norm": 0.5526465355704269, "learning_rate": 1.964648844029303e-06, "loss": 0.83612060546875, "step": 1135 }, { "epoch": 0.26175115207373273, "grad_norm": 0.597714005790405, "learning_rate": 1.9645483700679387e-06, "loss": 0.7951240539550781, "step": 1136 }, { "epoch": 0.2619815668202765, "grad_norm": 0.5785889079746135, "learning_rate": 1.9644477561036546e-06, "loss": 0.9746277332305908, "step": 1137 }, { "epoch": 0.2622119815668203, "grad_norm": 0.6092572079482067, "learning_rate": 1.9643470021510556e-06, "loss": 0.856966495513916, "step": 1138 }, { "epoch": 0.2624423963133641, "grad_norm": 0.5158468607686231, "learning_rate": 1.9642461082247663e-06, "loss": 0.7419042587280273, "step": 1139 }, { "epoch": 0.2626728110599078, "grad_norm": 0.6141847224483623, "learning_rate": 1.9641450743394304e-06, "loss": 0.8868693709373474, "step": 1140 }, { "epoch": 0.2629032258064516, "grad_norm": 0.6400145867633011, "learning_rate": 1.9640439005097133e-06, "loss": 1.0111520290374756, "step": 1141 }, { "epoch": 0.2631336405529954, "grad_norm": 0.5946199662941717, "learning_rate": 1.9639425867503006e-06, "loss": 0.9379187226295471, "step": 1142 }, { "epoch": 0.26336405529953916, "grad_norm": 0.6188285038344139, "learning_rate": 1.9638411330758973e-06, "loss": 0.8451071977615356, "step": 1143 }, { "epoch": 0.26359447004608294, "grad_norm": 0.6988429276503174, "learning_rate": 1.9637395395012295e-06, "loss": 1.0407288074493408, "step": 1144 }, { "epoch": 0.2638248847926267, "grad_norm": 0.7122851693009883, "learning_rate": 1.9636378060410433e-06, "loss": 0.9594388008117676, "step": 1145 }, { "epoch": 0.2640552995391705, "grad_norm": 0.4400072369022715, "learning_rate": 1.9635359327101057e-06, "loss": 0.7940789461135864, "step": 1146 }, { "epoch": 0.2642857142857143, "grad_norm": 0.6347840140846547, "learning_rate": 1.9634339195232025e-06, "loss": 0.9707269668579102, "step": 1147 }, { "epoch": 0.2645161290322581, "grad_norm": 0.6349984514987448, "learning_rate": 1.9633317664951417e-06, "loss": 0.9554522037506104, "step": 1148 }, { "epoch": 0.26474654377880186, "grad_norm": 0.7144693638673882, "learning_rate": 1.9632294736407497e-06, "loss": 1.009516716003418, "step": 1149 }, { "epoch": 0.26497695852534564, "grad_norm": 0.5429306162333095, "learning_rate": 1.9631270409748754e-06, "loss": 0.8337735533714294, "step": 1150 }, { "epoch": 0.2652073732718894, "grad_norm": 0.5901765838606909, "learning_rate": 1.963024468512386e-06, "loss": 0.9103367328643799, "step": 1151 }, { "epoch": 0.2654377880184332, "grad_norm": 0.3703807183273661, "learning_rate": 1.9629217562681694e-06, "loss": 0.7258249521255493, "step": 1152 }, { "epoch": 0.26566820276497694, "grad_norm": 0.6322578847379198, "learning_rate": 1.962818904257135e-06, "loss": 0.7696776390075684, "step": 1153 }, { "epoch": 0.2658986175115207, "grad_norm": 0.5842074670437798, "learning_rate": 1.962715912494211e-06, "loss": 0.9027894139289856, "step": 1154 }, { "epoch": 0.2661290322580645, "grad_norm": 0.6016444551454023, "learning_rate": 1.962612780994347e-06, "loss": 1.0412788391113281, "step": 1155 }, { "epoch": 0.2663594470046083, "grad_norm": 0.5483158655152818, "learning_rate": 1.962509509772512e-06, "loss": 0.8656542897224426, "step": 1156 }, { "epoch": 0.26658986175115207, "grad_norm": 0.56350579921959, "learning_rate": 1.9624060988436964e-06, "loss": 0.9541186094284058, "step": 1157 }, { "epoch": 0.26682027649769585, "grad_norm": 0.6019903664727945, "learning_rate": 1.962302548222909e-06, "loss": 0.7684942483901978, "step": 1158 }, { "epoch": 0.26705069124423964, "grad_norm": 0.5978642328134118, "learning_rate": 1.962198857925181e-06, "loss": 0.8934941291809082, "step": 1159 }, { "epoch": 0.2672811059907834, "grad_norm": 0.8041491872239377, "learning_rate": 1.962095027965562e-06, "loss": 0.8674842715263367, "step": 1160 }, { "epoch": 0.2675115207373272, "grad_norm": 0.5520577783269698, "learning_rate": 1.9619910583591237e-06, "loss": 0.8850778937339783, "step": 1161 }, { "epoch": 0.267741935483871, "grad_norm": 0.5547632066870658, "learning_rate": 1.961886949120957e-06, "loss": 0.9140915870666504, "step": 1162 }, { "epoch": 0.26797235023041477, "grad_norm": 0.5171975434439527, "learning_rate": 1.9617827002661733e-06, "loss": 0.7557287812232971, "step": 1163 }, { "epoch": 0.26820276497695855, "grad_norm": 0.6409514019909783, "learning_rate": 1.9616783118099032e-06, "loss": 0.8780542612075806, "step": 1164 }, { "epoch": 0.2684331797235023, "grad_norm": 0.5407478984703894, "learning_rate": 1.9615737837672995e-06, "loss": 0.8352043628692627, "step": 1165 }, { "epoch": 0.26866359447004606, "grad_norm": 0.5628947650252879, "learning_rate": 1.961469116153534e-06, "loss": 0.8119357228279114, "step": 1166 }, { "epoch": 0.26889400921658985, "grad_norm": 0.5744461460266088, "learning_rate": 1.9613643089837992e-06, "loss": 0.8953120708465576, "step": 1167 }, { "epoch": 0.26912442396313363, "grad_norm": 0.5867925171054906, "learning_rate": 1.9612593622733074e-06, "loss": 0.9078162908554077, "step": 1168 }, { "epoch": 0.2693548387096774, "grad_norm": 0.5358654275940312, "learning_rate": 1.961154276037292e-06, "loss": 0.9118859767913818, "step": 1169 }, { "epoch": 0.2695852534562212, "grad_norm": 0.5501238198976731, "learning_rate": 1.9610490502910056e-06, "loss": 0.8456159234046936, "step": 1170 }, { "epoch": 0.269815668202765, "grad_norm": 0.6291583788438779, "learning_rate": 1.9609436850497222e-06, "loss": 0.7860552072525024, "step": 1171 }, { "epoch": 0.27004608294930876, "grad_norm": 0.5078912747038423, "learning_rate": 1.9608381803287343e-06, "loss": 0.8121567368507385, "step": 1172 }, { "epoch": 0.27027649769585255, "grad_norm": 0.6271384929565738, "learning_rate": 1.9607325361433574e-06, "loss": 0.9212384819984436, "step": 1173 }, { "epoch": 0.27050691244239633, "grad_norm": 0.5704107274797215, "learning_rate": 1.960626752508924e-06, "loss": 0.9528858661651611, "step": 1174 }, { "epoch": 0.2707373271889401, "grad_norm": 0.5901390376692353, "learning_rate": 1.9605208294407894e-06, "loss": 0.8561227321624756, "step": 1175 }, { "epoch": 0.2709677419354839, "grad_norm": 0.5308748660328867, "learning_rate": 1.960414766954328e-06, "loss": 0.9333669543266296, "step": 1176 }, { "epoch": 0.2711981566820276, "grad_norm": 0.5146250417484006, "learning_rate": 1.9603085650649345e-06, "loss": 0.8879388570785522, "step": 1177 }, { "epoch": 0.2714285714285714, "grad_norm": 0.6699060572110628, "learning_rate": 1.9602022237880244e-06, "loss": 1.0099214315414429, "step": 1178 }, { "epoch": 0.2716589861751152, "grad_norm": 0.5456103597772948, "learning_rate": 1.9600957431390324e-06, "loss": 0.9341822862625122, "step": 1179 }, { "epoch": 0.271889400921659, "grad_norm": 0.48145703185786454, "learning_rate": 1.9599891231334144e-06, "loss": 0.7616428136825562, "step": 1180 }, { "epoch": 0.27211981566820276, "grad_norm": 0.4889684884403523, "learning_rate": 1.959882363786646e-06, "loss": 0.8270235061645508, "step": 1181 }, { "epoch": 0.27235023041474654, "grad_norm": 0.5354748169041671, "learning_rate": 1.9597754651142233e-06, "loss": 0.8715114593505859, "step": 1182 }, { "epoch": 0.2725806451612903, "grad_norm": 0.5251650427533354, "learning_rate": 1.959668427131662e-06, "loss": 0.6910781860351562, "step": 1183 }, { "epoch": 0.2728110599078341, "grad_norm": 0.5425639259870759, "learning_rate": 1.9595612498544997e-06, "loss": 0.9158545136451721, "step": 1184 }, { "epoch": 0.2730414746543779, "grad_norm": 0.4274378587816055, "learning_rate": 1.9594539332982917e-06, "loss": 0.7129944562911987, "step": 1185 }, { "epoch": 0.2732718894009217, "grad_norm": 0.5549453334752472, "learning_rate": 1.9593464774786155e-06, "loss": 0.9487595558166504, "step": 1186 }, { "epoch": 0.27350230414746546, "grad_norm": 0.490496609840347, "learning_rate": 1.959238882411068e-06, "loss": 0.9455368518829346, "step": 1187 }, { "epoch": 0.27373271889400924, "grad_norm": 0.5638225468967204, "learning_rate": 1.959131148111267e-06, "loss": 0.9005390405654907, "step": 1188 }, { "epoch": 0.27396313364055297, "grad_norm": 0.6239187759866925, "learning_rate": 1.9590232745948494e-06, "loss": 0.91117262840271, "step": 1189 }, { "epoch": 0.27419354838709675, "grad_norm": 0.46530917608588857, "learning_rate": 1.958915261877473e-06, "loss": 0.7940579652786255, "step": 1190 }, { "epoch": 0.27442396313364054, "grad_norm": 0.5621028227805456, "learning_rate": 1.9588071099748155e-06, "loss": 1.0705196857452393, "step": 1191 }, { "epoch": 0.2746543778801843, "grad_norm": 0.7402334674842445, "learning_rate": 1.9586988189025756e-06, "loss": 0.9311869740486145, "step": 1192 }, { "epoch": 0.2748847926267281, "grad_norm": 0.5809380189675816, "learning_rate": 1.9585903886764715e-06, "loss": 0.9400506019592285, "step": 1193 }, { "epoch": 0.2751152073732719, "grad_norm": 0.5097271764516258, "learning_rate": 1.958481819312241e-06, "loss": 0.8282920122146606, "step": 1194 }, { "epoch": 0.27534562211981567, "grad_norm": 0.6446418001070287, "learning_rate": 1.9583731108256435e-06, "loss": 0.9111119508743286, "step": 1195 }, { "epoch": 0.27557603686635945, "grad_norm": 0.6208204199981331, "learning_rate": 1.9582642632324576e-06, "loss": 0.9486548900604248, "step": 1196 }, { "epoch": 0.27580645161290324, "grad_norm": 0.634036768829364, "learning_rate": 1.9581552765484828e-06, "loss": 0.8452764749526978, "step": 1197 }, { "epoch": 0.276036866359447, "grad_norm": 0.6457489846855801, "learning_rate": 1.958046150789538e-06, "loss": 0.8636663556098938, "step": 1198 }, { "epoch": 0.2762672811059908, "grad_norm": 0.6308230498005049, "learning_rate": 1.9579368859714623e-06, "loss": 0.9819158315658569, "step": 1199 }, { "epoch": 0.2764976958525346, "grad_norm": 0.6100305190055095, "learning_rate": 1.957827482110116e-06, "loss": 0.8010607957839966, "step": 1200 }, { "epoch": 0.2767281105990783, "grad_norm": 0.44236661935550003, "learning_rate": 1.957717939221379e-06, "loss": 0.7686241865158081, "step": 1201 }, { "epoch": 0.2769585253456221, "grad_norm": 0.5324278038856628, "learning_rate": 1.9576082573211507e-06, "loss": 0.8548723459243774, "step": 1202 }, { "epoch": 0.2771889400921659, "grad_norm": 0.5873649231612361, "learning_rate": 1.957498436425351e-06, "loss": 0.7866852283477783, "step": 1203 }, { "epoch": 0.27741935483870966, "grad_norm": 0.5578610745935356, "learning_rate": 1.9573884765499215e-06, "loss": 0.8086235523223877, "step": 1204 }, { "epoch": 0.27764976958525345, "grad_norm": 0.6489442522213279, "learning_rate": 1.9572783777108217e-06, "loss": 1.0310871601104736, "step": 1205 }, { "epoch": 0.27788018433179723, "grad_norm": 0.6639195648959771, "learning_rate": 1.957168139924033e-06, "loss": 0.9482970237731934, "step": 1206 }, { "epoch": 0.278110599078341, "grad_norm": 0.5595205782283428, "learning_rate": 1.957057763205556e-06, "loss": 0.809493899345398, "step": 1207 }, { "epoch": 0.2783410138248848, "grad_norm": 0.5835729385419335, "learning_rate": 1.956947247571411e-06, "loss": 0.8679298162460327, "step": 1208 }, { "epoch": 0.2785714285714286, "grad_norm": 0.5339273489408208, "learning_rate": 1.95683659303764e-06, "loss": 0.8870571255683899, "step": 1209 }, { "epoch": 0.27880184331797236, "grad_norm": 0.6400258685482293, "learning_rate": 1.9567257996203046e-06, "loss": 0.8452431559562683, "step": 1210 }, { "epoch": 0.27903225806451615, "grad_norm": 0.585371400581961, "learning_rate": 1.9566148673354855e-06, "loss": 0.8376550674438477, "step": 1211 }, { "epoch": 0.27926267281105993, "grad_norm": 0.468171015360779, "learning_rate": 1.9565037961992853e-06, "loss": 0.7686463594436646, "step": 1212 }, { "epoch": 0.2794930875576037, "grad_norm": 0.6305180956441923, "learning_rate": 1.956392586227825e-06, "loss": 1.0064536333084106, "step": 1213 }, { "epoch": 0.27972350230414744, "grad_norm": 0.5204866621768998, "learning_rate": 1.956281237437247e-06, "loss": 0.9087784290313721, "step": 1214 }, { "epoch": 0.2799539170506912, "grad_norm": 0.5800831908467822, "learning_rate": 1.9561697498437133e-06, "loss": 0.8528383374214172, "step": 1215 }, { "epoch": 0.280184331797235, "grad_norm": 0.492586251170718, "learning_rate": 1.9560581234634062e-06, "loss": 0.8229737281799316, "step": 1216 }, { "epoch": 0.2804147465437788, "grad_norm": 0.6543530371868361, "learning_rate": 1.9559463583125285e-06, "loss": 0.8957454562187195, "step": 1217 }, { "epoch": 0.2806451612903226, "grad_norm": 0.6116476174626837, "learning_rate": 1.955834454407302e-06, "loss": 0.8373404741287231, "step": 1218 }, { "epoch": 0.28087557603686636, "grad_norm": 0.6339166918490768, "learning_rate": 1.9557224117639698e-06, "loss": 0.9117659330368042, "step": 1219 }, { "epoch": 0.28110599078341014, "grad_norm": 0.7009847380548185, "learning_rate": 1.9556102303987946e-06, "loss": 0.9079498052597046, "step": 1220 }, { "epoch": 0.2813364055299539, "grad_norm": 0.6797187898490639, "learning_rate": 1.9554979103280597e-06, "loss": 0.8127235174179077, "step": 1221 }, { "epoch": 0.2815668202764977, "grad_norm": 0.4430544694455362, "learning_rate": 1.9553854515680684e-06, "loss": 0.6790676712989807, "step": 1222 }, { "epoch": 0.2817972350230415, "grad_norm": 0.547920786044559, "learning_rate": 1.955272854135143e-06, "loss": 0.93434739112854, "step": 1223 }, { "epoch": 0.2820276497695853, "grad_norm": 0.5831429716678932, "learning_rate": 1.9551601180456274e-06, "loss": 0.8624403476715088, "step": 1224 }, { "epoch": 0.28225806451612906, "grad_norm": 0.5942670172250124, "learning_rate": 1.9550472433158856e-06, "loss": 0.8871273994445801, "step": 1225 }, { "epoch": 0.2824884792626728, "grad_norm": 0.6403907324028919, "learning_rate": 1.9549342299623007e-06, "loss": 1.0226445198059082, "step": 1226 }, { "epoch": 0.28271889400921657, "grad_norm": 0.5570530371692032, "learning_rate": 1.9548210780012764e-06, "loss": 0.9232503771781921, "step": 1227 }, { "epoch": 0.28294930875576035, "grad_norm": 0.5562171255847491, "learning_rate": 1.9547077874492367e-06, "loss": 0.944965124130249, "step": 1228 }, { "epoch": 0.28317972350230414, "grad_norm": 0.7815951055502713, "learning_rate": 1.9545943583226255e-06, "loss": 0.9491870403289795, "step": 1229 }, { "epoch": 0.2834101382488479, "grad_norm": 0.5531880644641158, "learning_rate": 1.9544807906379065e-06, "loss": 0.8477638363838196, "step": 1230 }, { "epoch": 0.2836405529953917, "grad_norm": 0.6334904267465776, "learning_rate": 1.9543670844115647e-06, "loss": 0.9733752012252808, "step": 1231 }, { "epoch": 0.2838709677419355, "grad_norm": 0.5077250781055755, "learning_rate": 1.954253239660104e-06, "loss": 0.8158911466598511, "step": 1232 }, { "epoch": 0.28410138248847927, "grad_norm": 0.47003121688563365, "learning_rate": 1.9541392564000487e-06, "loss": 0.8814271092414856, "step": 1233 }, { "epoch": 0.28433179723502305, "grad_norm": 0.5974631149552703, "learning_rate": 1.9540251346479435e-06, "loss": 0.8366897106170654, "step": 1234 }, { "epoch": 0.28456221198156684, "grad_norm": 0.5122641090735244, "learning_rate": 1.953910874420353e-06, "loss": 0.8043497800827026, "step": 1235 }, { "epoch": 0.2847926267281106, "grad_norm": 0.6923450749153209, "learning_rate": 1.953796475733862e-06, "loss": 0.904765248298645, "step": 1236 }, { "epoch": 0.2850230414746544, "grad_norm": 0.6316427864189956, "learning_rate": 1.953681938605075e-06, "loss": 0.9092245101928711, "step": 1237 }, { "epoch": 0.28525345622119813, "grad_norm": 0.44433825637231683, "learning_rate": 1.953567263050617e-06, "loss": 0.9119021892547607, "step": 1238 }, { "epoch": 0.2854838709677419, "grad_norm": 0.5258256580858013, "learning_rate": 1.9534524490871336e-06, "loss": 0.8380709886550903, "step": 1239 }, { "epoch": 0.2857142857142857, "grad_norm": 0.6731382971935342, "learning_rate": 1.9533374967312894e-06, "loss": 0.9410983324050903, "step": 1240 }, { "epoch": 0.2859447004608295, "grad_norm": 0.5901005556596554, "learning_rate": 1.953222405999769e-06, "loss": 0.882665753364563, "step": 1241 }, { "epoch": 0.28617511520737327, "grad_norm": 0.600142706864601, "learning_rate": 1.953107176909279e-06, "loss": 0.9334039688110352, "step": 1242 }, { "epoch": 0.28640552995391705, "grad_norm": 0.649506044390801, "learning_rate": 1.9529918094765433e-06, "loss": 0.8743090033531189, "step": 1243 }, { "epoch": 0.28663594470046083, "grad_norm": 0.5149777367828677, "learning_rate": 1.9528763037183086e-06, "loss": 0.9017846584320068, "step": 1244 }, { "epoch": 0.2868663594470046, "grad_norm": 0.6718877038666831, "learning_rate": 1.95276065965134e-06, "loss": 0.9412289261817932, "step": 1245 }, { "epoch": 0.2870967741935484, "grad_norm": 0.5829455891585096, "learning_rate": 1.9526448772924222e-06, "loss": 0.9008835554122925, "step": 1246 }, { "epoch": 0.2873271889400922, "grad_norm": 0.5850809594667484, "learning_rate": 1.9525289566583622e-06, "loss": 0.803752064704895, "step": 1247 }, { "epoch": 0.28755760368663597, "grad_norm": 0.642250740432813, "learning_rate": 1.952412897765985e-06, "loss": 0.8354049921035767, "step": 1248 }, { "epoch": 0.28778801843317975, "grad_norm": 0.711123311118831, "learning_rate": 1.9522967006321363e-06, "loss": 1.047461748123169, "step": 1249 }, { "epoch": 0.2880184331797235, "grad_norm": 0.5664585984555107, "learning_rate": 1.9521803652736826e-06, "loss": 0.9036056399345398, "step": 1250 }, { "epoch": 0.28824884792626726, "grad_norm": 0.6380477461120507, "learning_rate": 1.952063891707509e-06, "loss": 0.9534894227981567, "step": 1251 }, { "epoch": 0.28847926267281104, "grad_norm": 0.6213868500155985, "learning_rate": 1.9519472799505217e-06, "loss": 0.9200841188430786, "step": 1252 }, { "epoch": 0.2887096774193548, "grad_norm": 0.6071864938745559, "learning_rate": 1.9518305300196475e-06, "loss": 0.8917449712753296, "step": 1253 }, { "epoch": 0.2889400921658986, "grad_norm": 0.43859246681042113, "learning_rate": 1.9517136419318317e-06, "loss": 0.92131507396698, "step": 1254 }, { "epoch": 0.2891705069124424, "grad_norm": 0.5459214675052779, "learning_rate": 1.951596615704041e-06, "loss": 0.8862432241439819, "step": 1255 }, { "epoch": 0.2894009216589862, "grad_norm": 0.5238034407201325, "learning_rate": 1.951479451353261e-06, "loss": 0.7789605855941772, "step": 1256 }, { "epoch": 0.28963133640552996, "grad_norm": 0.6480376013887345, "learning_rate": 1.951362148896498e-06, "loss": 0.8187062740325928, "step": 1257 }, { "epoch": 0.28986175115207374, "grad_norm": 0.651824990199355, "learning_rate": 1.9512447083507784e-06, "loss": 1.0575072765350342, "step": 1258 }, { "epoch": 0.2900921658986175, "grad_norm": 0.5300946141437952, "learning_rate": 1.9511271297331493e-06, "loss": 0.8027279376983643, "step": 1259 }, { "epoch": 0.2903225806451613, "grad_norm": 0.549023479491683, "learning_rate": 1.951009413060676e-06, "loss": 0.6641743183135986, "step": 1260 }, { "epoch": 0.2905529953917051, "grad_norm": 0.4919566770154341, "learning_rate": 1.950891558350446e-06, "loss": 0.7937613725662231, "step": 1261 }, { "epoch": 0.2907834101382488, "grad_norm": 0.6213972326398296, "learning_rate": 1.950773565619564e-06, "loss": 0.9600511193275452, "step": 1262 }, { "epoch": 0.2910138248847926, "grad_norm": 0.6514763319649333, "learning_rate": 1.9506554348851585e-06, "loss": 0.8275980353355408, "step": 1263 }, { "epoch": 0.2912442396313364, "grad_norm": 0.598467260157347, "learning_rate": 1.950537166164375e-06, "loss": 0.9008789658546448, "step": 1264 }, { "epoch": 0.29147465437788017, "grad_norm": 0.5520168646542984, "learning_rate": 1.95041875947438e-06, "loss": 0.8701465129852295, "step": 1265 }, { "epoch": 0.29170506912442395, "grad_norm": 0.5793489097336151, "learning_rate": 1.95030021483236e-06, "loss": 0.9313883781433105, "step": 1266 }, { "epoch": 0.29193548387096774, "grad_norm": 0.5738973536331494, "learning_rate": 1.9501815322555222e-06, "loss": 0.883125901222229, "step": 1267 }, { "epoch": 0.2921658986175115, "grad_norm": 0.5430628147775056, "learning_rate": 1.9500627117610927e-06, "loss": 0.8856269121170044, "step": 1268 }, { "epoch": 0.2923963133640553, "grad_norm": 0.4857560088008075, "learning_rate": 1.9499437533663184e-06, "loss": 0.8817840218544006, "step": 1269 }, { "epoch": 0.2926267281105991, "grad_norm": 0.7079159031386842, "learning_rate": 1.949824657088466e-06, "loss": 0.9911330342292786, "step": 1270 }, { "epoch": 0.29285714285714287, "grad_norm": 0.6283382634413396, "learning_rate": 1.949705422944822e-06, "loss": 0.8902890682220459, "step": 1271 }, { "epoch": 0.29308755760368665, "grad_norm": 0.5381213123876506, "learning_rate": 1.949586050952693e-06, "loss": 0.6846401691436768, "step": 1272 }, { "epoch": 0.29331797235023044, "grad_norm": 0.6164805880844991, "learning_rate": 1.9494665411294057e-06, "loss": 0.9186165928840637, "step": 1273 }, { "epoch": 0.29354838709677417, "grad_norm": 0.4648178531483389, "learning_rate": 1.949346893492307e-06, "loss": 0.8614095449447632, "step": 1274 }, { "epoch": 0.29377880184331795, "grad_norm": 0.6146731068970395, "learning_rate": 1.9492271080587637e-06, "loss": 0.7824405431747437, "step": 1275 }, { "epoch": 0.29400921658986173, "grad_norm": 0.5415059908334089, "learning_rate": 1.949107184846162e-06, "loss": 0.8694697618484497, "step": 1276 }, { "epoch": 0.2942396313364055, "grad_norm": 0.6070495052767576, "learning_rate": 1.948987123871909e-06, "loss": 0.8839597105979919, "step": 1277 }, { "epoch": 0.2944700460829493, "grad_norm": 0.5155544169686388, "learning_rate": 1.948866925153431e-06, "loss": 0.832268238067627, "step": 1278 }, { "epoch": 0.2947004608294931, "grad_norm": 0.48264272480740306, "learning_rate": 1.948746588708175e-06, "loss": 0.8243123888969421, "step": 1279 }, { "epoch": 0.29493087557603687, "grad_norm": 0.7516695382591614, "learning_rate": 1.948626114553608e-06, "loss": 0.99314284324646, "step": 1280 }, { "epoch": 0.29516129032258065, "grad_norm": 0.6001488755214682, "learning_rate": 1.948505502707216e-06, "loss": 0.8853542804718018, "step": 1281 }, { "epoch": 0.29539170506912443, "grad_norm": 0.7940640499991963, "learning_rate": 1.948384753186506e-06, "loss": 0.9623305797576904, "step": 1282 }, { "epoch": 0.2956221198156682, "grad_norm": 0.64774993620639, "learning_rate": 1.948263866009005e-06, "loss": 0.8321142792701721, "step": 1283 }, { "epoch": 0.295852534562212, "grad_norm": 0.6059595321597901, "learning_rate": 1.948142841192258e-06, "loss": 0.8911606669425964, "step": 1284 }, { "epoch": 0.2960829493087558, "grad_norm": 0.6228210357050852, "learning_rate": 1.948021678753834e-06, "loss": 0.9501996040344238, "step": 1285 }, { "epoch": 0.29631336405529957, "grad_norm": 0.5846881548888203, "learning_rate": 1.947900378711318e-06, "loss": 0.8555784225463867, "step": 1286 }, { "epoch": 0.2965437788018433, "grad_norm": 0.5726752466099971, "learning_rate": 1.9477789410823163e-06, "loss": 0.7703878283500671, "step": 1287 }, { "epoch": 0.2967741935483871, "grad_norm": 0.5629458043150717, "learning_rate": 1.947657365884457e-06, "loss": 1.0072009563446045, "step": 1288 }, { "epoch": 0.29700460829493086, "grad_norm": 0.5698014348408978, "learning_rate": 1.9475356531353847e-06, "loss": 0.7633493542671204, "step": 1289 }, { "epoch": 0.29723502304147464, "grad_norm": 0.5241558601711666, "learning_rate": 1.9474138028527674e-06, "loss": 0.88579261302948, "step": 1290 }, { "epoch": 0.2974654377880184, "grad_norm": 0.6037880677787516, "learning_rate": 1.94729181505429e-06, "loss": 0.8356794118881226, "step": 1291 }, { "epoch": 0.2976958525345622, "grad_norm": 0.6197051238228268, "learning_rate": 1.94716968975766e-06, "loss": 0.8330395817756653, "step": 1292 }, { "epoch": 0.297926267281106, "grad_norm": 0.6667932213948545, "learning_rate": 1.947047426980604e-06, "loss": 0.9219698905944824, "step": 1293 }, { "epoch": 0.2981566820276498, "grad_norm": 0.5409653154450632, "learning_rate": 1.9469250267408674e-06, "loss": 0.880803644657135, "step": 1294 }, { "epoch": 0.29838709677419356, "grad_norm": 0.5789679620224094, "learning_rate": 1.9468024890562165e-06, "loss": 0.8212012052536011, "step": 1295 }, { "epoch": 0.29861751152073734, "grad_norm": 0.6209106243517916, "learning_rate": 1.946679813944438e-06, "loss": 1.0118587017059326, "step": 1296 }, { "epoch": 0.2988479262672811, "grad_norm": 0.6374046746708436, "learning_rate": 1.9465570014233377e-06, "loss": 0.8708915710449219, "step": 1297 }, { "epoch": 0.2990783410138249, "grad_norm": 0.6373146041782783, "learning_rate": 1.9464340515107415e-06, "loss": 0.9386067986488342, "step": 1298 }, { "epoch": 0.29930875576036864, "grad_norm": 0.5346925830356088, "learning_rate": 1.9463109642244958e-06, "loss": 0.8672319650650024, "step": 1299 }, { "epoch": 0.2995391705069124, "grad_norm": 0.7198371333215221, "learning_rate": 1.9461877395824662e-06, "loss": 0.9002958536148071, "step": 1300 }, { "epoch": 0.2997695852534562, "grad_norm": 0.6247724220238058, "learning_rate": 1.946064377602539e-06, "loss": 0.9206029772758484, "step": 1301 }, { "epoch": 0.3, "grad_norm": 0.8295443472719992, "learning_rate": 1.94594087830262e-06, "loss": 1.0063598155975342, "step": 1302 }, { "epoch": 0.3002304147465438, "grad_norm": 0.5149695005553171, "learning_rate": 1.9458172417006346e-06, "loss": 0.7616912126541138, "step": 1303 }, { "epoch": 0.30046082949308756, "grad_norm": 0.5462398029065331, "learning_rate": 1.945693467814529e-06, "loss": 0.8385730385780334, "step": 1304 }, { "epoch": 0.30069124423963134, "grad_norm": 0.4854220181479302, "learning_rate": 1.9455695566622677e-06, "loss": 0.7032216787338257, "step": 1305 }, { "epoch": 0.3009216589861751, "grad_norm": 0.5554776786626977, "learning_rate": 1.9454455082618373e-06, "loss": 0.7647181749343872, "step": 1306 }, { "epoch": 0.3011520737327189, "grad_norm": 0.7119385935860951, "learning_rate": 1.945321322631243e-06, "loss": 0.9918918013572693, "step": 1307 }, { "epoch": 0.3013824884792627, "grad_norm": 0.5689741757687454, "learning_rate": 1.945196999788511e-06, "loss": 0.838451623916626, "step": 1308 }, { "epoch": 0.3016129032258065, "grad_norm": 0.7156229049064139, "learning_rate": 1.945072539751685e-06, "loss": 0.9739303588867188, "step": 1309 }, { "epoch": 0.30184331797235026, "grad_norm": 0.4850858592361209, "learning_rate": 1.9449479425388305e-06, "loss": 0.8233742713928223, "step": 1310 }, { "epoch": 0.302073732718894, "grad_norm": 0.666231819455408, "learning_rate": 1.944823208168034e-06, "loss": 0.9765088558197021, "step": 1311 }, { "epoch": 0.30230414746543777, "grad_norm": 0.5940530240559707, "learning_rate": 1.944698336657399e-06, "loss": 0.7614048719406128, "step": 1312 }, { "epoch": 0.30253456221198155, "grad_norm": 0.5807403996402337, "learning_rate": 1.9445733280250512e-06, "loss": 0.760692834854126, "step": 1313 }, { "epoch": 0.30276497695852533, "grad_norm": 0.710580819926471, "learning_rate": 1.944448182289135e-06, "loss": 0.8484706878662109, "step": 1314 }, { "epoch": 0.3029953917050691, "grad_norm": 0.6131916776262658, "learning_rate": 1.944322899467816e-06, "loss": 0.8857289552688599, "step": 1315 }, { "epoch": 0.3032258064516129, "grad_norm": 0.7120330171482998, "learning_rate": 1.944197479579278e-06, "loss": 0.8375179171562195, "step": 1316 }, { "epoch": 0.3034562211981567, "grad_norm": 0.5402001956337824, "learning_rate": 1.9440719226417263e-06, "loss": 0.8141925930976868, "step": 1317 }, { "epoch": 0.30368663594470047, "grad_norm": 0.7607357810019435, "learning_rate": 1.943946228673384e-06, "loss": 0.9970111846923828, "step": 1318 }, { "epoch": 0.30391705069124425, "grad_norm": 0.5721230302327327, "learning_rate": 1.9438203976924966e-06, "loss": 0.9542866349220276, "step": 1319 }, { "epoch": 0.30414746543778803, "grad_norm": 0.5904074306009988, "learning_rate": 1.943694429717328e-06, "loss": 0.8808399438858032, "step": 1320 }, { "epoch": 0.3043778801843318, "grad_norm": 0.5734964183027593, "learning_rate": 1.9435683247661623e-06, "loss": 0.8541150093078613, "step": 1321 }, { "epoch": 0.3046082949308756, "grad_norm": 0.7749551173384804, "learning_rate": 1.943442082857303e-06, "loss": 0.8887044191360474, "step": 1322 }, { "epoch": 0.30483870967741933, "grad_norm": 0.6530281616907251, "learning_rate": 1.9433157040090746e-06, "loss": 0.8699131011962891, "step": 1323 }, { "epoch": 0.3050691244239631, "grad_norm": 0.6811202971751444, "learning_rate": 1.9431891882398205e-06, "loss": 0.7096077799797058, "step": 1324 }, { "epoch": 0.3052995391705069, "grad_norm": 0.5279135582200482, "learning_rate": 1.9430625355679045e-06, "loss": 0.8040453195571899, "step": 1325 }, { "epoch": 0.3055299539170507, "grad_norm": 0.5904456084555657, "learning_rate": 1.9429357460117093e-06, "loss": 0.8275970220565796, "step": 1326 }, { "epoch": 0.30576036866359446, "grad_norm": 0.5947614996956965, "learning_rate": 1.9428088195896393e-06, "loss": 0.9724141359329224, "step": 1327 }, { "epoch": 0.30599078341013825, "grad_norm": 0.6696756628924122, "learning_rate": 1.9426817563201174e-06, "loss": 0.9293274879455566, "step": 1328 }, { "epoch": 0.30622119815668203, "grad_norm": 0.5976334939970911, "learning_rate": 1.9425545562215865e-06, "loss": 0.9454036951065063, "step": 1329 }, { "epoch": 0.3064516129032258, "grad_norm": 0.48928245529374687, "learning_rate": 1.9424272193125094e-06, "loss": 0.7751365900039673, "step": 1330 }, { "epoch": 0.3066820276497696, "grad_norm": 0.5211050083614731, "learning_rate": 1.942299745611369e-06, "loss": 0.8444688320159912, "step": 1331 }, { "epoch": 0.3069124423963134, "grad_norm": 0.6370602856216532, "learning_rate": 1.9421721351366684e-06, "loss": 0.7751414775848389, "step": 1332 }, { "epoch": 0.30714285714285716, "grad_norm": 0.6732034032956694, "learning_rate": 1.9420443879069287e-06, "loss": 0.912209153175354, "step": 1333 }, { "epoch": 0.30737327188940095, "grad_norm": 0.4990267188564962, "learning_rate": 1.941916503940694e-06, "loss": 0.8897542357444763, "step": 1334 }, { "epoch": 0.3076036866359447, "grad_norm": 0.6319943447022882, "learning_rate": 1.9417884832565257e-06, "loss": 0.8562046885490417, "step": 1335 }, { "epoch": 0.30783410138248846, "grad_norm": 0.6422557067377674, "learning_rate": 1.9416603258730055e-06, "loss": 0.8886401653289795, "step": 1336 }, { "epoch": 0.30806451612903224, "grad_norm": 0.5477788033894715, "learning_rate": 1.9415320318087354e-06, "loss": 0.7401903867721558, "step": 1337 }, { "epoch": 0.308294930875576, "grad_norm": 0.5300494853019255, "learning_rate": 1.941403601082338e-06, "loss": 0.761360764503479, "step": 1338 }, { "epoch": 0.3085253456221198, "grad_norm": 0.5079078108497779, "learning_rate": 1.9412750337124537e-06, "loss": 0.9223028421401978, "step": 1339 }, { "epoch": 0.3087557603686636, "grad_norm": 0.6370349934611669, "learning_rate": 1.9411463297177446e-06, "loss": 0.9287113547325134, "step": 1340 }, { "epoch": 0.3089861751152074, "grad_norm": 0.6186566628026451, "learning_rate": 1.941017489116891e-06, "loss": 0.8548502922058105, "step": 1341 }, { "epoch": 0.30921658986175116, "grad_norm": 0.6058300330585435, "learning_rate": 1.9408885119285953e-06, "loss": 0.8885709643363953, "step": 1342 }, { "epoch": 0.30944700460829494, "grad_norm": 0.6807261513363189, "learning_rate": 1.940759398171577e-06, "loss": 0.8970856666564941, "step": 1343 }, { "epoch": 0.3096774193548387, "grad_norm": 0.5664251996297385, "learning_rate": 1.9406301478645783e-06, "loss": 0.847138524055481, "step": 1344 }, { "epoch": 0.3099078341013825, "grad_norm": 0.5354847877369665, "learning_rate": 1.9405007610263584e-06, "loss": 0.7892216444015503, "step": 1345 }, { "epoch": 0.3101382488479263, "grad_norm": 0.6610649492142503, "learning_rate": 1.940371237675698e-06, "loss": 0.8869141340255737, "step": 1346 }, { "epoch": 0.3103686635944701, "grad_norm": 0.6628677961578048, "learning_rate": 1.9402415778313976e-06, "loss": 0.8405635356903076, "step": 1347 }, { "epoch": 0.3105990783410138, "grad_norm": 0.6646875425992601, "learning_rate": 1.9401117815122768e-06, "loss": 0.914352536201477, "step": 1348 }, { "epoch": 0.3108294930875576, "grad_norm": 0.5494930695935469, "learning_rate": 1.9399818487371756e-06, "loss": 0.9059416055679321, "step": 1349 }, { "epoch": 0.31105990783410137, "grad_norm": 0.7196818748440269, "learning_rate": 1.939851779524953e-06, "loss": 1.0513644218444824, "step": 1350 }, { "epoch": 0.31129032258064515, "grad_norm": 0.6538820317800585, "learning_rate": 1.9397215738944893e-06, "loss": 0.8673127889633179, "step": 1351 }, { "epoch": 0.31152073732718893, "grad_norm": 0.5818727011440767, "learning_rate": 1.9395912318646827e-06, "loss": 0.7907043695449829, "step": 1352 }, { "epoch": 0.3117511520737327, "grad_norm": 0.6065295506543811, "learning_rate": 1.9394607534544527e-06, "loss": 0.8127990961074829, "step": 1353 }, { "epoch": 0.3119815668202765, "grad_norm": 0.598693369273182, "learning_rate": 1.9393301386827387e-06, "loss": 0.877837061882019, "step": 1354 }, { "epoch": 0.3122119815668203, "grad_norm": 0.734976387219602, "learning_rate": 1.939199387568498e-06, "loss": 0.8518592715263367, "step": 1355 }, { "epoch": 0.31244239631336407, "grad_norm": 0.5473192670176156, "learning_rate": 1.9390685001307093e-06, "loss": 0.7151869535446167, "step": 1356 }, { "epoch": 0.31267281105990785, "grad_norm": 0.5581062201544951, "learning_rate": 1.9389374763883716e-06, "loss": 0.8325271606445312, "step": 1357 }, { "epoch": 0.31290322580645163, "grad_norm": 0.6052904551524719, "learning_rate": 1.938806316360502e-06, "loss": 0.6875327825546265, "step": 1358 }, { "epoch": 0.3131336405529954, "grad_norm": 0.5481097616797531, "learning_rate": 1.9386750200661382e-06, "loss": 0.8944000005722046, "step": 1359 }, { "epoch": 0.31336405529953915, "grad_norm": 0.4954617799257055, "learning_rate": 1.938543587524338e-06, "loss": 0.8544747829437256, "step": 1360 }, { "epoch": 0.31359447004608293, "grad_norm": 0.6938391730058001, "learning_rate": 1.9384120187541788e-06, "loss": 0.9216448068618774, "step": 1361 }, { "epoch": 0.3138248847926267, "grad_norm": 0.6834174981389874, "learning_rate": 1.938280313774757e-06, "loss": 0.9239494800567627, "step": 1362 }, { "epoch": 0.3140552995391705, "grad_norm": 0.6816430700209432, "learning_rate": 1.9381484726051896e-06, "loss": 0.8903297185897827, "step": 1363 }, { "epoch": 0.3142857142857143, "grad_norm": 0.46405748253250195, "learning_rate": 1.9380164952646137e-06, "loss": 0.7019625306129456, "step": 1364 }, { "epoch": 0.31451612903225806, "grad_norm": 0.6844663370999042, "learning_rate": 1.9378843817721854e-06, "loss": 0.8668909072875977, "step": 1365 }, { "epoch": 0.31474654377880185, "grad_norm": 0.6454844689755169, "learning_rate": 1.9377521321470804e-06, "loss": 0.8124282360076904, "step": 1366 }, { "epoch": 0.31497695852534563, "grad_norm": 0.5251795296125459, "learning_rate": 1.937619746408495e-06, "loss": 0.7543507814407349, "step": 1367 }, { "epoch": 0.3152073732718894, "grad_norm": 0.6140420297919054, "learning_rate": 1.9374872245756448e-06, "loss": 0.8355506062507629, "step": 1368 }, { "epoch": 0.3154377880184332, "grad_norm": 0.5898778959170111, "learning_rate": 1.937354566667765e-06, "loss": 0.9642060399055481, "step": 1369 }, { "epoch": 0.315668202764977, "grad_norm": 0.5556038505388771, "learning_rate": 1.93722177270411e-06, "loss": 0.9044197201728821, "step": 1370 }, { "epoch": 0.31589861751152076, "grad_norm": 0.4271939145273097, "learning_rate": 1.937088842703956e-06, "loss": 0.8077869415283203, "step": 1371 }, { "epoch": 0.3161290322580645, "grad_norm": 0.6032982707731396, "learning_rate": 1.9369557766865968e-06, "loss": 0.8319793939590454, "step": 1372 }, { "epoch": 0.3163594470046083, "grad_norm": 0.6304953638761566, "learning_rate": 1.9368225746713475e-06, "loss": 0.8233131170272827, "step": 1373 }, { "epoch": 0.31658986175115206, "grad_norm": 0.6631214954178034, "learning_rate": 1.936689236677541e-06, "loss": 0.7898514270782471, "step": 1374 }, { "epoch": 0.31682027649769584, "grad_norm": 0.6121849479571054, "learning_rate": 1.9365557627245326e-06, "loss": 0.9243249893188477, "step": 1375 }, { "epoch": 0.3170506912442396, "grad_norm": 0.5673475924264754, "learning_rate": 1.9364221528316946e-06, "loss": 0.8153131008148193, "step": 1376 }, { "epoch": 0.3172811059907834, "grad_norm": 0.6767166003638188, "learning_rate": 1.936288407018421e-06, "loss": 0.9203826189041138, "step": 1377 }, { "epoch": 0.3175115207373272, "grad_norm": 0.6187562743125278, "learning_rate": 1.936154525304124e-06, "loss": 0.902605414390564, "step": 1378 }, { "epoch": 0.317741935483871, "grad_norm": 0.6256929156852202, "learning_rate": 1.936020507708238e-06, "loss": 0.9504558444023132, "step": 1379 }, { "epoch": 0.31797235023041476, "grad_norm": 0.6737932441495208, "learning_rate": 1.9358863542502133e-06, "loss": 0.8068373203277588, "step": 1380 }, { "epoch": 0.31820276497695854, "grad_norm": 0.6309381884158767, "learning_rate": 1.935752064949524e-06, "loss": 1.00711989402771, "step": 1381 }, { "epoch": 0.3184331797235023, "grad_norm": 0.6297604875594859, "learning_rate": 1.935617639825661e-06, "loss": 0.8271746039390564, "step": 1382 }, { "epoch": 0.3186635944700461, "grad_norm": 0.658739150286029, "learning_rate": 1.9354830788981363e-06, "loss": 0.8478754758834839, "step": 1383 }, { "epoch": 0.31889400921658984, "grad_norm": 0.6165108812612344, "learning_rate": 1.935348382186481e-06, "loss": 0.9240723252296448, "step": 1384 }, { "epoch": 0.3191244239631336, "grad_norm": 0.6446571506984649, "learning_rate": 1.935213549710246e-06, "loss": 0.9275645613670349, "step": 1385 }, { "epoch": 0.3193548387096774, "grad_norm": 0.6060948743586713, "learning_rate": 1.9350785814890027e-06, "loss": 0.9838275909423828, "step": 1386 }, { "epoch": 0.3195852534562212, "grad_norm": 0.5765714017880346, "learning_rate": 1.934943477542341e-06, "loss": 0.9259177446365356, "step": 1387 }, { "epoch": 0.31981566820276497, "grad_norm": 0.6051365106169855, "learning_rate": 1.9348082378898714e-06, "loss": 0.9252835512161255, "step": 1388 }, { "epoch": 0.32004608294930875, "grad_norm": 0.5670107070091258, "learning_rate": 1.9346728625512235e-06, "loss": 0.8929460048675537, "step": 1389 }, { "epoch": 0.32027649769585254, "grad_norm": 0.5325931239107909, "learning_rate": 1.934537351546047e-06, "loss": 0.8909564018249512, "step": 1390 }, { "epoch": 0.3205069124423963, "grad_norm": 0.6295332947946368, "learning_rate": 1.934401704894011e-06, "loss": 0.8745983839035034, "step": 1391 }, { "epoch": 0.3207373271889401, "grad_norm": 0.5987888846505133, "learning_rate": 1.934265922614805e-06, "loss": 0.8622266054153442, "step": 1392 }, { "epoch": 0.3209677419354839, "grad_norm": 0.5587707056179402, "learning_rate": 1.9341300047281365e-06, "loss": 0.6796590089797974, "step": 1393 }, { "epoch": 0.32119815668202767, "grad_norm": 0.6156409956015295, "learning_rate": 1.9339939512537344e-06, "loss": 0.9012733697891235, "step": 1394 }, { "epoch": 0.32142857142857145, "grad_norm": 0.5898128750933246, "learning_rate": 1.933857762211347e-06, "loss": 0.9196282625198364, "step": 1395 }, { "epoch": 0.3216589861751152, "grad_norm": 0.716981638669288, "learning_rate": 1.9337214376207417e-06, "loss": 0.7717788219451904, "step": 1396 }, { "epoch": 0.32188940092165896, "grad_norm": 0.6574432706431985, "learning_rate": 1.9335849775017057e-06, "loss": 0.8516619801521301, "step": 1397 }, { "epoch": 0.32211981566820275, "grad_norm": 0.6319036543472709, "learning_rate": 1.933448381874046e-06, "loss": 0.8089120388031006, "step": 1398 }, { "epoch": 0.32235023041474653, "grad_norm": 0.7117992019263996, "learning_rate": 1.9333116507575895e-06, "loss": 0.8940925598144531, "step": 1399 }, { "epoch": 0.3225806451612903, "grad_norm": 1.1103495530975782, "learning_rate": 1.9331747841721827e-06, "loss": 1.0240859985351562, "step": 1400 }, { "epoch": 0.3228110599078341, "grad_norm": 0.6110124319562482, "learning_rate": 1.9330377821376916e-06, "loss": 0.742689847946167, "step": 1401 }, { "epoch": 0.3230414746543779, "grad_norm": 0.6830153635526487, "learning_rate": 1.932900644674001e-06, "loss": 0.9843875169754028, "step": 1402 }, { "epoch": 0.32327188940092166, "grad_norm": 0.6043326796009376, "learning_rate": 1.932763371801017e-06, "loss": 0.7289329767227173, "step": 1403 }, { "epoch": 0.32350230414746545, "grad_norm": 0.676828647698979, "learning_rate": 1.9326259635386644e-06, "loss": 0.7706295251846313, "step": 1404 }, { "epoch": 0.32373271889400923, "grad_norm": 0.526047650367784, "learning_rate": 1.932488419906888e-06, "loss": 0.87788325548172, "step": 1405 }, { "epoch": 0.323963133640553, "grad_norm": 0.5971998478662486, "learning_rate": 1.9323507409256515e-06, "loss": 0.863690972328186, "step": 1406 }, { "epoch": 0.3241935483870968, "grad_norm": 0.700825296208237, "learning_rate": 1.9322129266149396e-06, "loss": 0.9333875179290771, "step": 1407 }, { "epoch": 0.3244239631336406, "grad_norm": 0.6642455421211582, "learning_rate": 1.9320749769947555e-06, "loss": 0.9170523881912231, "step": 1408 }, { "epoch": 0.3246543778801843, "grad_norm": 0.7524235771818621, "learning_rate": 1.931936892085122e-06, "loss": 0.9337698221206665, "step": 1409 }, { "epoch": 0.3248847926267281, "grad_norm": 0.5832115844679703, "learning_rate": 1.9317986719060824e-06, "loss": 0.8436682224273682, "step": 1410 }, { "epoch": 0.3251152073732719, "grad_norm": 0.5569674571153642, "learning_rate": 1.9316603164776996e-06, "loss": 0.6652755737304688, "step": 1411 }, { "epoch": 0.32534562211981566, "grad_norm": 0.5895248621851672, "learning_rate": 1.931521825820055e-06, "loss": 0.7966932654380798, "step": 1412 }, { "epoch": 0.32557603686635944, "grad_norm": 0.7207375493085693, "learning_rate": 1.93138319995325e-06, "loss": 0.9791682958602905, "step": 1413 }, { "epoch": 0.3258064516129032, "grad_norm": 0.6505701538481653, "learning_rate": 1.931244438897407e-06, "loss": 0.7403467297554016, "step": 1414 }, { "epoch": 0.326036866359447, "grad_norm": 0.5881243698924259, "learning_rate": 1.931105542672667e-06, "loss": 0.7758523225784302, "step": 1415 }, { "epoch": 0.3262672811059908, "grad_norm": 0.6866613437755184, "learning_rate": 1.9309665112991894e-06, "loss": 0.8444551229476929, "step": 1416 }, { "epoch": 0.3264976958525346, "grad_norm": 0.6987387290897759, "learning_rate": 1.9308273447971553e-06, "loss": 0.8796061277389526, "step": 1417 }, { "epoch": 0.32672811059907836, "grad_norm": 0.6235742967720523, "learning_rate": 1.9306880431867643e-06, "loss": 0.8386640548706055, "step": 1418 }, { "epoch": 0.32695852534562214, "grad_norm": 0.669578268248941, "learning_rate": 1.930548606488236e-06, "loss": 0.9229142665863037, "step": 1419 }, { "epoch": 0.3271889400921659, "grad_norm": 0.6307605261613933, "learning_rate": 1.9304090347218094e-06, "loss": 0.9938615560531616, "step": 1420 }, { "epoch": 0.32741935483870965, "grad_norm": 0.6526253572614591, "learning_rate": 1.930269327907743e-06, "loss": 0.7946186661720276, "step": 1421 }, { "epoch": 0.32764976958525344, "grad_norm": 0.6717401804422498, "learning_rate": 1.930129486066315e-06, "loss": 0.9456713199615479, "step": 1422 }, { "epoch": 0.3278801843317972, "grad_norm": 0.5156577436912951, "learning_rate": 1.929989509217824e-06, "loss": 0.844656765460968, "step": 1423 }, { "epoch": 0.328110599078341, "grad_norm": 0.5219846430026822, "learning_rate": 1.9298493973825862e-06, "loss": 0.7534950971603394, "step": 1424 }, { "epoch": 0.3283410138248848, "grad_norm": 0.7328149629860281, "learning_rate": 1.92970915058094e-06, "loss": 0.934429407119751, "step": 1425 }, { "epoch": 0.32857142857142857, "grad_norm": 0.6913075282966522, "learning_rate": 1.929568768833241e-06, "loss": 0.9491959810256958, "step": 1426 }, { "epoch": 0.32880184331797235, "grad_norm": 0.6938433783461605, "learning_rate": 1.9294282521598657e-06, "loss": 0.9739001989364624, "step": 1427 }, { "epoch": 0.32903225806451614, "grad_norm": 0.7260904191446513, "learning_rate": 1.92928760058121e-06, "loss": 0.8159639835357666, "step": 1428 }, { "epoch": 0.3292626728110599, "grad_norm": 0.6287238530590293, "learning_rate": 1.9291468141176894e-06, "loss": 0.8752772808074951, "step": 1429 }, { "epoch": 0.3294930875576037, "grad_norm": 0.6480201898337635, "learning_rate": 1.929005892789739e-06, "loss": 0.8543882369995117, "step": 1430 }, { "epoch": 0.3297235023041475, "grad_norm": 0.7294679881265868, "learning_rate": 1.928864836617813e-06, "loss": 0.8837493658065796, "step": 1431 }, { "epoch": 0.32995391705069127, "grad_norm": 0.7638461032292205, "learning_rate": 1.9287236456223854e-06, "loss": 0.9320387840270996, "step": 1432 }, { "epoch": 0.330184331797235, "grad_norm": 0.5042343025936808, "learning_rate": 1.92858231982395e-06, "loss": 0.8272919654846191, "step": 1433 }, { "epoch": 0.3304147465437788, "grad_norm": 0.6965906133224807, "learning_rate": 1.9284408592430207e-06, "loss": 0.9415527582168579, "step": 1434 }, { "epoch": 0.33064516129032256, "grad_norm": 0.7215035047368656, "learning_rate": 1.928299263900129e-06, "loss": 0.91558837890625, "step": 1435 }, { "epoch": 0.33087557603686635, "grad_norm": 0.5956823050741555, "learning_rate": 1.9281575338158287e-06, "loss": 0.9333036541938782, "step": 1436 }, { "epoch": 0.33110599078341013, "grad_norm": 0.6051938214219355, "learning_rate": 1.928015669010691e-06, "loss": 0.7823847532272339, "step": 1437 }, { "epoch": 0.3313364055299539, "grad_norm": 0.7462826372754077, "learning_rate": 1.9278736695053075e-06, "loss": 0.8436610102653503, "step": 1438 }, { "epoch": 0.3315668202764977, "grad_norm": 0.7254037554281902, "learning_rate": 1.927731535320289e-06, "loss": 0.8658925890922546, "step": 1439 }, { "epoch": 0.3317972350230415, "grad_norm": 0.6229809292573231, "learning_rate": 1.9275892664762665e-06, "loss": 0.8510075807571411, "step": 1440 }, { "epoch": 0.33202764976958526, "grad_norm": 0.6349856559462502, "learning_rate": 1.9274468629938897e-06, "loss": 0.8002004623413086, "step": 1441 }, { "epoch": 0.33225806451612905, "grad_norm": 0.6766111098462606, "learning_rate": 1.9273043248938287e-06, "loss": 1.0030219554901123, "step": 1442 }, { "epoch": 0.33248847926267283, "grad_norm": 0.6313930076569801, "learning_rate": 1.9271616521967723e-06, "loss": 0.8415981531143188, "step": 1443 }, { "epoch": 0.3327188940092166, "grad_norm": 0.5599899399531522, "learning_rate": 1.9270188449234295e-06, "loss": 0.7704254388809204, "step": 1444 }, { "epoch": 0.33294930875576034, "grad_norm": 0.5742869826690059, "learning_rate": 1.9268759030945294e-06, "loss": 0.8350723385810852, "step": 1445 }, { "epoch": 0.3331797235023041, "grad_norm": 0.7177949171518314, "learning_rate": 1.926732826730818e-06, "loss": 0.8729690313339233, "step": 1446 }, { "epoch": 0.3334101382488479, "grad_norm": 0.64691268148931, "learning_rate": 1.926589615853064e-06, "loss": 0.7758746147155762, "step": 1447 }, { "epoch": 0.3336405529953917, "grad_norm": 0.6330035443782508, "learning_rate": 1.926446270482054e-06, "loss": 0.7895134687423706, "step": 1448 }, { "epoch": 0.3338709677419355, "grad_norm": 0.5710370240153678, "learning_rate": 1.9263027906385936e-06, "loss": 1.0239053964614868, "step": 1449 }, { "epoch": 0.33410138248847926, "grad_norm": 0.6423159813237256, "learning_rate": 1.9261591763435104e-06, "loss": 0.9294595122337341, "step": 1450 }, { "epoch": 0.33433179723502304, "grad_norm": 0.690830605411519, "learning_rate": 1.9260154276176484e-06, "loss": 0.9786148071289062, "step": 1451 }, { "epoch": 0.3345622119815668, "grad_norm": 0.5115027993477321, "learning_rate": 1.925871544481873e-06, "loss": 0.8513587117195129, "step": 1452 }, { "epoch": 0.3347926267281106, "grad_norm": 0.4974492616751121, "learning_rate": 1.9257275269570686e-06, "loss": 0.7737371921539307, "step": 1453 }, { "epoch": 0.3350230414746544, "grad_norm": 0.6186615203368176, "learning_rate": 1.9255833750641392e-06, "loss": 0.8567382097244263, "step": 1454 }, { "epoch": 0.3352534562211982, "grad_norm": 0.5498745898568592, "learning_rate": 1.9254390888240078e-06, "loss": 0.893741250038147, "step": 1455 }, { "epoch": 0.33548387096774196, "grad_norm": 0.5996544133152318, "learning_rate": 1.9252946682576184e-06, "loss": 0.9558119773864746, "step": 1456 }, { "epoch": 0.3357142857142857, "grad_norm": 0.6629164295929078, "learning_rate": 1.9251501133859323e-06, "loss": 0.7055593729019165, "step": 1457 }, { "epoch": 0.33594470046082947, "grad_norm": 0.652213418545905, "learning_rate": 1.9250054242299326e-06, "loss": 0.8409907817840576, "step": 1458 }, { "epoch": 0.33617511520737325, "grad_norm": 0.5648924790833157, "learning_rate": 1.9248606008106196e-06, "loss": 0.9459772109985352, "step": 1459 }, { "epoch": 0.33640552995391704, "grad_norm": 0.6285611694534835, "learning_rate": 1.924715643149015e-06, "loss": 0.7848879098892212, "step": 1460 }, { "epoch": 0.3366359447004608, "grad_norm": 0.8030718131506138, "learning_rate": 1.924570551266159e-06, "loss": 1.0365980863571167, "step": 1461 }, { "epoch": 0.3368663594470046, "grad_norm": 0.6014174038703485, "learning_rate": 1.924425325183111e-06, "loss": 0.7331318855285645, "step": 1462 }, { "epoch": 0.3370967741935484, "grad_norm": 0.6427865459032713, "learning_rate": 1.9242799649209515e-06, "loss": 0.8536237478256226, "step": 1463 }, { "epoch": 0.33732718894009217, "grad_norm": 0.6525839289073214, "learning_rate": 1.9241344705007784e-06, "loss": 0.9296326637268066, "step": 1464 }, { "epoch": 0.33755760368663595, "grad_norm": 0.887947392639257, "learning_rate": 1.92398884194371e-06, "loss": 0.9084932804107666, "step": 1465 }, { "epoch": 0.33778801843317974, "grad_norm": 0.5270165853452017, "learning_rate": 1.9238430792708847e-06, "loss": 0.7426833510398865, "step": 1466 }, { "epoch": 0.3380184331797235, "grad_norm": 0.5410658114261949, "learning_rate": 1.9236971825034595e-06, "loss": 0.7655431032180786, "step": 1467 }, { "epoch": 0.3382488479262673, "grad_norm": 0.8331011387344854, "learning_rate": 1.923551151662611e-06, "loss": 0.9463646411895752, "step": 1468 }, { "epoch": 0.3384792626728111, "grad_norm": 0.5486811314665706, "learning_rate": 1.9234049867695355e-06, "loss": 0.75661301612854, "step": 1469 }, { "epoch": 0.3387096774193548, "grad_norm": 0.6386489226368193, "learning_rate": 1.9232586878454486e-06, "loss": 0.7411723136901855, "step": 1470 }, { "epoch": 0.3389400921658986, "grad_norm": 0.6921074075590697, "learning_rate": 1.9231122549115854e-06, "loss": 0.9537360072135925, "step": 1471 }, { "epoch": 0.3391705069124424, "grad_norm": 0.6895160542670777, "learning_rate": 1.9229656879892004e-06, "loss": 0.9527197480201721, "step": 1472 }, { "epoch": 0.33940092165898617, "grad_norm": 0.7025720730409266, "learning_rate": 1.9228189870995674e-06, "loss": 0.9083822965621948, "step": 1473 }, { "epoch": 0.33963133640552995, "grad_norm": 0.5301970222083436, "learning_rate": 1.9226721522639804e-06, "loss": 0.8546823263168335, "step": 1474 }, { "epoch": 0.33986175115207373, "grad_norm": 0.6709689097402769, "learning_rate": 1.922525183503752e-06, "loss": 0.7429832816123962, "step": 1475 }, { "epoch": 0.3400921658986175, "grad_norm": 0.62032231336291, "learning_rate": 1.922378080840214e-06, "loss": 0.8805499076843262, "step": 1476 }, { "epoch": 0.3403225806451613, "grad_norm": 0.681736765273056, "learning_rate": 1.9222308442947193e-06, "loss": 1.0177074670791626, "step": 1477 }, { "epoch": 0.3405529953917051, "grad_norm": 0.5202393927717802, "learning_rate": 1.922083473888638e-06, "loss": 0.778317391872406, "step": 1478 }, { "epoch": 0.34078341013824887, "grad_norm": 0.5628134051805, "learning_rate": 1.921935969643361e-06, "loss": 0.8461896181106567, "step": 1479 }, { "epoch": 0.34101382488479265, "grad_norm": 0.5553667327802273, "learning_rate": 1.921788331580299e-06, "loss": 0.8028895258903503, "step": 1480 }, { "epoch": 0.34124423963133643, "grad_norm": 0.5368047903298083, "learning_rate": 1.9216405597208803e-06, "loss": 0.9071121215820312, "step": 1481 }, { "epoch": 0.34147465437788016, "grad_norm": 0.6427007304701287, "learning_rate": 1.921492654086555e-06, "loss": 0.7715062499046326, "step": 1482 }, { "epoch": 0.34170506912442394, "grad_norm": 0.5552851307839923, "learning_rate": 1.9213446146987907e-06, "loss": 0.8446664810180664, "step": 1483 }, { "epoch": 0.3419354838709677, "grad_norm": 0.712846002939772, "learning_rate": 1.9211964415790754e-06, "loss": 0.9835283756256104, "step": 1484 }, { "epoch": 0.3421658986175115, "grad_norm": 0.8210412746012221, "learning_rate": 1.921048134748916e-06, "loss": 1.0630817413330078, "step": 1485 }, { "epoch": 0.3423963133640553, "grad_norm": 0.6748930312757173, "learning_rate": 1.920899694229839e-06, "loss": 0.8514837622642517, "step": 1486 }, { "epoch": 0.3426267281105991, "grad_norm": 0.6222560657794074, "learning_rate": 1.920751120043391e-06, "loss": 0.7302432060241699, "step": 1487 }, { "epoch": 0.34285714285714286, "grad_norm": 0.7079869651359869, "learning_rate": 1.920602412211136e-06, "loss": 0.778337836265564, "step": 1488 }, { "epoch": 0.34308755760368664, "grad_norm": 0.6890026561089317, "learning_rate": 1.92045357075466e-06, "loss": 0.815348207950592, "step": 1489 }, { "epoch": 0.3433179723502304, "grad_norm": 0.5476065495891982, "learning_rate": 1.920304595695567e-06, "loss": 0.7844003438949585, "step": 1490 }, { "epoch": 0.3435483870967742, "grad_norm": 0.6758218109549144, "learning_rate": 1.92015548705548e-06, "loss": 0.9513435363769531, "step": 1491 }, { "epoch": 0.343778801843318, "grad_norm": 0.6450445262879821, "learning_rate": 1.9200062448560424e-06, "loss": 0.7506752610206604, "step": 1492 }, { "epoch": 0.3440092165898618, "grad_norm": 0.6233205865485715, "learning_rate": 1.919856869118916e-06, "loss": 0.739554762840271, "step": 1493 }, { "epoch": 0.3442396313364055, "grad_norm": 0.7436551378630792, "learning_rate": 1.9197073598657826e-06, "loss": 0.8167033791542053, "step": 1494 }, { "epoch": 0.3444700460829493, "grad_norm": 0.6904439986569212, "learning_rate": 1.919557717118344e-06, "loss": 0.9308677911758423, "step": 1495 }, { "epoch": 0.34470046082949307, "grad_norm": 0.6340340245140523, "learning_rate": 1.9194079408983197e-06, "loss": 0.8601467609405518, "step": 1496 }, { "epoch": 0.34493087557603686, "grad_norm": 0.5645119744435318, "learning_rate": 1.91925803122745e-06, "loss": 0.8062653541564941, "step": 1497 }, { "epoch": 0.34516129032258064, "grad_norm": 0.6267130901098985, "learning_rate": 1.9191079881274943e-06, "loss": 0.8910555839538574, "step": 1498 }, { "epoch": 0.3453917050691244, "grad_norm": 0.6398235864437706, "learning_rate": 1.9189578116202307e-06, "loss": 0.8604668378829956, "step": 1499 }, { "epoch": 0.3456221198156682, "grad_norm": 0.660935387898433, "learning_rate": 1.918807501727457e-06, "loss": 0.7255126237869263, "step": 1500 }, { "epoch": 0.345852534562212, "grad_norm": 0.6873891579533423, "learning_rate": 1.9186570584709912e-06, "loss": 0.998108983039856, "step": 1501 }, { "epoch": 0.34608294930875577, "grad_norm": 0.6220147185177797, "learning_rate": 1.918506481872669e-06, "loss": 0.7660422325134277, "step": 1502 }, { "epoch": 0.34631336405529956, "grad_norm": 0.6579892645247903, "learning_rate": 1.9183557719543472e-06, "loss": 0.868739902973175, "step": 1503 }, { "epoch": 0.34654377880184334, "grad_norm": 0.5789973673480234, "learning_rate": 1.918204928737901e-06, "loss": 0.6630350351333618, "step": 1504 }, { "epoch": 0.3467741935483871, "grad_norm": 0.5444610824332694, "learning_rate": 1.9180539522452247e-06, "loss": 0.8651586771011353, "step": 1505 }, { "epoch": 0.34700460829493085, "grad_norm": 0.5927111235913876, "learning_rate": 1.9179028424982326e-06, "loss": 0.8584417700767517, "step": 1506 }, { "epoch": 0.34723502304147463, "grad_norm": 0.5575547611441275, "learning_rate": 1.917751599518858e-06, "loss": 0.7793893814086914, "step": 1507 }, { "epoch": 0.3474654377880184, "grad_norm": 0.768634414143097, "learning_rate": 1.9176002233290542e-06, "loss": 0.8499815464019775, "step": 1508 }, { "epoch": 0.3476958525345622, "grad_norm": 0.7795460044280101, "learning_rate": 1.917448713950792e-06, "loss": 0.7914199829101562, "step": 1509 }, { "epoch": 0.347926267281106, "grad_norm": 0.8510793838671106, "learning_rate": 1.9172970714060637e-06, "loss": 0.942331850528717, "step": 1510 }, { "epoch": 0.34815668202764977, "grad_norm": 0.621963787262809, "learning_rate": 1.9171452957168803e-06, "loss": 0.7780032157897949, "step": 1511 }, { "epoch": 0.34838709677419355, "grad_norm": 0.6399045325995384, "learning_rate": 1.916993386905271e-06, "loss": 0.8544708490371704, "step": 1512 }, { "epoch": 0.34861751152073733, "grad_norm": 0.6890752127070114, "learning_rate": 1.9168413449932855e-06, "loss": 0.798173725605011, "step": 1513 }, { "epoch": 0.3488479262672811, "grad_norm": 0.7396810139453504, "learning_rate": 1.9166891700029922e-06, "loss": 0.9426852464675903, "step": 1514 }, { "epoch": 0.3490783410138249, "grad_norm": 0.7455227520654529, "learning_rate": 1.91653686195648e-06, "loss": 0.922240138053894, "step": 1515 }, { "epoch": 0.3493087557603687, "grad_norm": 0.630161091555718, "learning_rate": 1.9163844208758556e-06, "loss": 0.7997978925704956, "step": 1516 }, { "epoch": 0.34953917050691247, "grad_norm": 0.7560374253096135, "learning_rate": 1.9162318467832455e-06, "loss": 1.0597525835037231, "step": 1517 }, { "epoch": 0.3497695852534562, "grad_norm": 0.6669142658812499, "learning_rate": 1.9160791397007957e-06, "loss": 0.8211681842803955, "step": 1518 }, { "epoch": 0.35, "grad_norm": 0.6134468456903489, "learning_rate": 1.9159262996506716e-06, "loss": 0.8078022003173828, "step": 1519 }, { "epoch": 0.35023041474654376, "grad_norm": 0.8800559709758627, "learning_rate": 1.915773326655057e-06, "loss": 0.9449256658554077, "step": 1520 }, { "epoch": 0.35046082949308754, "grad_norm": 0.6806561068219223, "learning_rate": 1.915620220736157e-06, "loss": 0.8744012117385864, "step": 1521 }, { "epoch": 0.35069124423963133, "grad_norm": 0.501693303726274, "learning_rate": 1.9154669819161946e-06, "loss": 0.9503095746040344, "step": 1522 }, { "epoch": 0.3509216589861751, "grad_norm": 0.7422922368497302, "learning_rate": 1.9153136102174106e-06, "loss": 1.055432915687561, "step": 1523 }, { "epoch": 0.3511520737327189, "grad_norm": 0.7420134076461076, "learning_rate": 1.9151601056620684e-06, "loss": 0.8540226221084595, "step": 1524 }, { "epoch": 0.3513824884792627, "grad_norm": 0.6432500784024293, "learning_rate": 1.915006468272448e-06, "loss": 0.8846266865730286, "step": 1525 }, { "epoch": 0.35161290322580646, "grad_norm": 0.6065038491164693, "learning_rate": 1.9148526980708507e-06, "loss": 0.8941656947135925, "step": 1526 }, { "epoch": 0.35184331797235024, "grad_norm": 0.657637251938276, "learning_rate": 1.914698795079595e-06, "loss": 0.868419885635376, "step": 1527 }, { "epoch": 0.35207373271889403, "grad_norm": 0.6471997072963731, "learning_rate": 1.91454475932102e-06, "loss": 0.7375580072402954, "step": 1528 }, { "epoch": 0.3523041474654378, "grad_norm": 0.5813494020686044, "learning_rate": 1.9143905908174844e-06, "loss": 0.9415492415428162, "step": 1529 }, { "epoch": 0.35253456221198154, "grad_norm": 0.5845641741459107, "learning_rate": 1.9142362895913646e-06, "loss": 0.8395911455154419, "step": 1530 }, { "epoch": 0.3527649769585253, "grad_norm": 0.6214793611789142, "learning_rate": 1.914081855665057e-06, "loss": 0.831234335899353, "step": 1531 }, { "epoch": 0.3529953917050691, "grad_norm": 0.6337865377576076, "learning_rate": 1.9139272890609794e-06, "loss": 0.8975566029548645, "step": 1532 }, { "epoch": 0.3532258064516129, "grad_norm": 0.629586080319263, "learning_rate": 1.913772589801565e-06, "loss": 0.8134264945983887, "step": 1533 }, { "epoch": 0.3534562211981567, "grad_norm": 0.6728325426784268, "learning_rate": 1.913617757909269e-06, "loss": 0.9507275819778442, "step": 1534 }, { "epoch": 0.35368663594470046, "grad_norm": 0.6431752162471284, "learning_rate": 1.913462793406565e-06, "loss": 0.8839038610458374, "step": 1535 }, { "epoch": 0.35391705069124424, "grad_norm": 0.5543997844984022, "learning_rate": 1.9133076963159453e-06, "loss": 0.8708392381668091, "step": 1536 }, { "epoch": 0.354147465437788, "grad_norm": 0.6062385114401656, "learning_rate": 1.913152466659923e-06, "loss": 0.7609391212463379, "step": 1537 }, { "epoch": 0.3543778801843318, "grad_norm": 0.7180303128257083, "learning_rate": 1.912997104461029e-06, "loss": 0.9231283664703369, "step": 1538 }, { "epoch": 0.3546082949308756, "grad_norm": 0.6890910539107805, "learning_rate": 1.912841609741814e-06, "loss": 1.0297726392745972, "step": 1539 }, { "epoch": 0.3548387096774194, "grad_norm": 0.75971130189085, "learning_rate": 1.9126859825248475e-06, "loss": 0.8798987865447998, "step": 1540 }, { "epoch": 0.35506912442396316, "grad_norm": 0.7030378763019209, "learning_rate": 1.912530222832719e-06, "loss": 0.9104069471359253, "step": 1541 }, { "epoch": 0.35529953917050694, "grad_norm": 0.6534729730017157, "learning_rate": 1.9123743306880368e-06, "loss": 0.7618073225021362, "step": 1542 }, { "epoch": 0.35552995391705067, "grad_norm": 0.7461748863693719, "learning_rate": 1.912218306113428e-06, "loss": 0.8397510051727295, "step": 1543 }, { "epoch": 0.35576036866359445, "grad_norm": 0.7060377086024656, "learning_rate": 1.91206214913154e-06, "loss": 0.9884299039840698, "step": 1544 }, { "epoch": 0.35599078341013823, "grad_norm": 0.9576081524625122, "learning_rate": 1.9119058597650385e-06, "loss": 0.9878349304199219, "step": 1545 }, { "epoch": 0.356221198156682, "grad_norm": 0.6493274093007226, "learning_rate": 1.9117494380366086e-06, "loss": 0.8790488243103027, "step": 1546 }, { "epoch": 0.3564516129032258, "grad_norm": 0.5310131973918355, "learning_rate": 1.9115928839689546e-06, "loss": 0.7390745878219604, "step": 1547 }, { "epoch": 0.3566820276497696, "grad_norm": 0.6882029258971281, "learning_rate": 1.9114361975848004e-06, "loss": 0.7354288101196289, "step": 1548 }, { "epoch": 0.35691244239631337, "grad_norm": 0.7667535594605746, "learning_rate": 1.911279378906889e-06, "loss": 0.9234673976898193, "step": 1549 }, { "epoch": 0.35714285714285715, "grad_norm": 0.6115013610277281, "learning_rate": 1.911122427957982e-06, "loss": 0.8913710117340088, "step": 1550 }, { "epoch": 0.35737327188940093, "grad_norm": 0.7050561523779678, "learning_rate": 1.9109653447608605e-06, "loss": 0.754358172416687, "step": 1551 }, { "epoch": 0.3576036866359447, "grad_norm": 0.784312775933048, "learning_rate": 1.910808129338325e-06, "loss": 0.7361906170845032, "step": 1552 }, { "epoch": 0.3578341013824885, "grad_norm": 0.7799572736490341, "learning_rate": 1.9106507817131957e-06, "loss": 0.8167279362678528, "step": 1553 }, { "epoch": 0.3580645161290323, "grad_norm": 0.5335250967831215, "learning_rate": 1.910493301908311e-06, "loss": 0.7504739761352539, "step": 1554 }, { "epoch": 0.358294930875576, "grad_norm": 0.7032319483863736, "learning_rate": 1.9103356899465287e-06, "loss": 0.8452355861663818, "step": 1555 }, { "epoch": 0.3585253456221198, "grad_norm": 0.6126249946093243, "learning_rate": 1.9101779458507263e-06, "loss": 0.891547679901123, "step": 1556 }, { "epoch": 0.3587557603686636, "grad_norm": 0.6935978783962933, "learning_rate": 1.9100200696438e-06, "loss": 0.8132680654525757, "step": 1557 }, { "epoch": 0.35898617511520736, "grad_norm": 0.6519674133121284, "learning_rate": 1.9098620613486646e-06, "loss": 0.799482524394989, "step": 1558 }, { "epoch": 0.35921658986175115, "grad_norm": 0.5904521460015955, "learning_rate": 1.909703920988256e-06, "loss": 0.8490267992019653, "step": 1559 }, { "epoch": 0.35944700460829493, "grad_norm": 0.6819976276562522, "learning_rate": 1.9095456485855277e-06, "loss": 0.8608428239822388, "step": 1560 }, { "epoch": 0.3596774193548387, "grad_norm": 0.710056379748393, "learning_rate": 1.9093872441634526e-06, "loss": 0.8460499048233032, "step": 1561 }, { "epoch": 0.3599078341013825, "grad_norm": 0.7727130217690178, "learning_rate": 1.9092287077450226e-06, "loss": 0.9268433451652527, "step": 1562 }, { "epoch": 0.3601382488479263, "grad_norm": 0.612809776724531, "learning_rate": 1.90907003935325e-06, "loss": 0.7354154586791992, "step": 1563 }, { "epoch": 0.36036866359447006, "grad_norm": 0.6941943523357101, "learning_rate": 1.9089112390111637e-06, "loss": 0.87982177734375, "step": 1564 }, { "epoch": 0.36059907834101385, "grad_norm": 0.7092001355075633, "learning_rate": 1.9087523067418148e-06, "loss": 0.994953453540802, "step": 1565 }, { "epoch": 0.36082949308755763, "grad_norm": 0.7240785511234525, "learning_rate": 1.9085932425682715e-06, "loss": 0.8623256087303162, "step": 1566 }, { "epoch": 0.36105990783410136, "grad_norm": 0.7577571727617612, "learning_rate": 1.908434046513622e-06, "loss": 0.8752846717834473, "step": 1567 }, { "epoch": 0.36129032258064514, "grad_norm": 0.7538020694732109, "learning_rate": 1.908274718600973e-06, "loss": 0.9002033472061157, "step": 1568 }, { "epoch": 0.3615207373271889, "grad_norm": 0.6751938160957709, "learning_rate": 1.908115258853451e-06, "loss": 0.7290444374084473, "step": 1569 }, { "epoch": 0.3617511520737327, "grad_norm": 0.5739449847646289, "learning_rate": 1.9079556672942016e-06, "loss": 0.6833889484405518, "step": 1570 }, { "epoch": 0.3619815668202765, "grad_norm": 0.7271514059808825, "learning_rate": 1.907795943946389e-06, "loss": 1.0033842325210571, "step": 1571 }, { "epoch": 0.3622119815668203, "grad_norm": 0.7261786878454322, "learning_rate": 1.907636088833197e-06, "loss": 0.9590950012207031, "step": 1572 }, { "epoch": 0.36244239631336406, "grad_norm": 0.6796147019608265, "learning_rate": 1.907476101977828e-06, "loss": 0.8812122344970703, "step": 1573 }, { "epoch": 0.36267281105990784, "grad_norm": 0.5509770826635522, "learning_rate": 1.9073159834035045e-06, "loss": 0.7549433708190918, "step": 1574 }, { "epoch": 0.3629032258064516, "grad_norm": 0.8344983468044503, "learning_rate": 1.9071557331334667e-06, "loss": 0.9235562086105347, "step": 1575 }, { "epoch": 0.3631336405529954, "grad_norm": 0.6317903590715543, "learning_rate": 1.9069953511909755e-06, "loss": 0.8468542098999023, "step": 1576 }, { "epoch": 0.3633640552995392, "grad_norm": 0.5574642699953357, "learning_rate": 1.9068348375993096e-06, "loss": 0.8804000616073608, "step": 1577 }, { "epoch": 0.363594470046083, "grad_norm": 0.5912501411899118, "learning_rate": 1.9066741923817676e-06, "loss": 0.762598991394043, "step": 1578 }, { "epoch": 0.3638248847926267, "grad_norm": 0.7706966706442087, "learning_rate": 1.9065134155616666e-06, "loss": 0.8791940212249756, "step": 1579 }, { "epoch": 0.3640552995391705, "grad_norm": 0.7168527524200441, "learning_rate": 1.9063525071623439e-06, "loss": 0.7041842937469482, "step": 1580 }, { "epoch": 0.36428571428571427, "grad_norm": 0.6160916310238944, "learning_rate": 1.9061914672071543e-06, "loss": 0.9526468515396118, "step": 1581 }, { "epoch": 0.36451612903225805, "grad_norm": 0.7118890640067297, "learning_rate": 1.906030295719473e-06, "loss": 0.9388316869735718, "step": 1582 }, { "epoch": 0.36474654377880183, "grad_norm": 0.6899284739234433, "learning_rate": 1.9058689927226936e-06, "loss": 0.7295777797698975, "step": 1583 }, { "epoch": 0.3649769585253456, "grad_norm": 0.773766722090894, "learning_rate": 1.905707558240229e-06, "loss": 0.7540932297706604, "step": 1584 }, { "epoch": 0.3652073732718894, "grad_norm": 0.7012558071518832, "learning_rate": 1.9055459922955118e-06, "loss": 0.9457792639732361, "step": 1585 }, { "epoch": 0.3654377880184332, "grad_norm": 0.8248538436303866, "learning_rate": 1.9053842949119923e-06, "loss": 0.9121883511543274, "step": 1586 }, { "epoch": 0.36566820276497697, "grad_norm": 0.7283384308967912, "learning_rate": 1.905222466113141e-06, "loss": 0.8140746355056763, "step": 1587 }, { "epoch": 0.36589861751152075, "grad_norm": 0.6419705545105435, "learning_rate": 1.905060505922447e-06, "loss": 0.7403484582901001, "step": 1588 }, { "epoch": 0.36612903225806454, "grad_norm": 0.581047347336086, "learning_rate": 1.9048984143634188e-06, "loss": 0.9040734171867371, "step": 1589 }, { "epoch": 0.3663594470046083, "grad_norm": 0.8763582049227886, "learning_rate": 1.9047361914595834e-06, "loss": 0.9060958623886108, "step": 1590 }, { "epoch": 0.36658986175115205, "grad_norm": 0.563240407907546, "learning_rate": 1.904573837234488e-06, "loss": 0.6925936937332153, "step": 1591 }, { "epoch": 0.36682027649769583, "grad_norm": 0.6465995527416484, "learning_rate": 1.9044113517116973e-06, "loss": 0.8120197057723999, "step": 1592 }, { "epoch": 0.3670506912442396, "grad_norm": 0.6544256373051048, "learning_rate": 1.9042487349147965e-06, "loss": 0.796414852142334, "step": 1593 }, { "epoch": 0.3672811059907834, "grad_norm": 0.5916998574283423, "learning_rate": 1.9040859868673885e-06, "loss": 0.8390822410583496, "step": 1594 }, { "epoch": 0.3675115207373272, "grad_norm": 0.6567403008386238, "learning_rate": 1.9039231075930967e-06, "loss": 0.990093469619751, "step": 1595 }, { "epoch": 0.36774193548387096, "grad_norm": 0.733917290012865, "learning_rate": 1.9037600971155623e-06, "loss": 0.8548597097396851, "step": 1596 }, { "epoch": 0.36797235023041475, "grad_norm": 0.5429475903618856, "learning_rate": 1.9035969554584464e-06, "loss": 0.687299370765686, "step": 1597 }, { "epoch": 0.36820276497695853, "grad_norm": 0.9276548262086025, "learning_rate": 1.9034336826454282e-06, "loss": 0.7857942581176758, "step": 1598 }, { "epoch": 0.3684331797235023, "grad_norm": 0.7345227244712206, "learning_rate": 1.9032702787002072e-06, "loss": 0.8836538195610046, "step": 1599 }, { "epoch": 0.3686635944700461, "grad_norm": 0.723858907192251, "learning_rate": 1.9031067436465011e-06, "loss": 0.8132715225219727, "step": 1600 }, { "epoch": 0.3688940092165899, "grad_norm": 0.6649285274594987, "learning_rate": 1.9029430775080467e-06, "loss": 0.7632347345352173, "step": 1601 }, { "epoch": 0.36912442396313366, "grad_norm": 0.6319858893374919, "learning_rate": 1.9027792803086e-06, "loss": 0.8616297841072083, "step": 1602 }, { "epoch": 0.36935483870967745, "grad_norm": 0.6067565637769744, "learning_rate": 1.9026153520719358e-06, "loss": 0.8418172597885132, "step": 1603 }, { "epoch": 0.3695852534562212, "grad_norm": 0.7094320350542224, "learning_rate": 1.902451292821848e-06, "loss": 0.7253717184066772, "step": 1604 }, { "epoch": 0.36981566820276496, "grad_norm": 0.8059000016280097, "learning_rate": 1.90228710258215e-06, "loss": 0.9746035933494568, "step": 1605 }, { "epoch": 0.37004608294930874, "grad_norm": 0.5259402340057983, "learning_rate": 1.9021227813766733e-06, "loss": 0.7722853422164917, "step": 1606 }, { "epoch": 0.3702764976958525, "grad_norm": 0.6925264238716391, "learning_rate": 1.9019583292292693e-06, "loss": 0.8278614282608032, "step": 1607 }, { "epoch": 0.3705069124423963, "grad_norm": 0.6439238935194896, "learning_rate": 1.9017937461638078e-06, "loss": 0.7433085441589355, "step": 1608 }, { "epoch": 0.3707373271889401, "grad_norm": 0.5505689424398915, "learning_rate": 1.901629032204178e-06, "loss": 0.9194153547286987, "step": 1609 }, { "epoch": 0.3709677419354839, "grad_norm": 0.5866951472740422, "learning_rate": 1.9014641873742877e-06, "loss": 0.8502616882324219, "step": 1610 }, { "epoch": 0.37119815668202766, "grad_norm": 0.6242266615517361, "learning_rate": 1.9012992116980637e-06, "loss": 0.8494570255279541, "step": 1611 }, { "epoch": 0.37142857142857144, "grad_norm": 0.7369836132356214, "learning_rate": 1.9011341051994526e-06, "loss": 0.8567800521850586, "step": 1612 }, { "epoch": 0.3716589861751152, "grad_norm": 0.6246604791910833, "learning_rate": 1.9009688679024189e-06, "loss": 0.7739682197570801, "step": 1613 }, { "epoch": 0.371889400921659, "grad_norm": 0.754158311495332, "learning_rate": 1.900803499830947e-06, "loss": 0.8548814058303833, "step": 1614 }, { "epoch": 0.3721198156682028, "grad_norm": 0.5813822362984273, "learning_rate": 1.9006380010090395e-06, "loss": 0.7444359064102173, "step": 1615 }, { "epoch": 0.3723502304147465, "grad_norm": 1.02732235167255, "learning_rate": 1.9004723714607183e-06, "loss": 1.0483827590942383, "step": 1616 }, { "epoch": 0.3725806451612903, "grad_norm": 0.7020606936102383, "learning_rate": 1.9003066112100248e-06, "loss": 0.7734435200691223, "step": 1617 }, { "epoch": 0.3728110599078341, "grad_norm": 0.7388837596699729, "learning_rate": 1.9001407202810181e-06, "loss": 0.856806755065918, "step": 1618 }, { "epoch": 0.37304147465437787, "grad_norm": 0.6630252498689021, "learning_rate": 1.8999746986977776e-06, "loss": 0.8708832263946533, "step": 1619 }, { "epoch": 0.37327188940092165, "grad_norm": 0.7833548721469644, "learning_rate": 1.899808546484401e-06, "loss": 0.9295653104782104, "step": 1620 }, { "epoch": 0.37350230414746544, "grad_norm": 0.8120612065986471, "learning_rate": 1.8996422636650054e-06, "loss": 0.8799598217010498, "step": 1621 }, { "epoch": 0.3737327188940092, "grad_norm": 0.6113644757026901, "learning_rate": 1.8994758502637259e-06, "loss": 0.8014140725135803, "step": 1622 }, { "epoch": 0.373963133640553, "grad_norm": 0.7305462035644114, "learning_rate": 1.8993093063047174e-06, "loss": 0.8252615928649902, "step": 1623 }, { "epoch": 0.3741935483870968, "grad_norm": 0.5571708900709818, "learning_rate": 1.899142631812154e-06, "loss": 0.8617361783981323, "step": 1624 }, { "epoch": 0.37442396313364057, "grad_norm": 0.7088005059034134, "learning_rate": 1.8989758268102274e-06, "loss": 0.9316745400428772, "step": 1625 }, { "epoch": 0.37465437788018435, "grad_norm": 0.5449801119846465, "learning_rate": 1.89880889132315e-06, "loss": 0.8195457458496094, "step": 1626 }, { "epoch": 0.37488479262672814, "grad_norm": 0.7143201633211917, "learning_rate": 1.8986418253751516e-06, "loss": 0.7828787565231323, "step": 1627 }, { "epoch": 0.37511520737327186, "grad_norm": 0.6506165386805676, "learning_rate": 1.898474628990482e-06, "loss": 0.8130955696105957, "step": 1628 }, { "epoch": 0.37534562211981565, "grad_norm": 0.7388682274593752, "learning_rate": 1.8983073021934097e-06, "loss": 0.9925695657730103, "step": 1629 }, { "epoch": 0.37557603686635943, "grad_norm": 0.7851734301973293, "learning_rate": 1.8981398450082216e-06, "loss": 0.8547999858856201, "step": 1630 }, { "epoch": 0.3758064516129032, "grad_norm": 0.7016894400602667, "learning_rate": 1.897972257459224e-06, "loss": 0.8922954797744751, "step": 1631 }, { "epoch": 0.376036866359447, "grad_norm": 0.641235710173759, "learning_rate": 1.8978045395707415e-06, "loss": 0.8553646802902222, "step": 1632 }, { "epoch": 0.3762672811059908, "grad_norm": 0.6780369843564141, "learning_rate": 1.897636691367119e-06, "loss": 0.7854139804840088, "step": 1633 }, { "epoch": 0.37649769585253456, "grad_norm": 0.8291834208164379, "learning_rate": 1.897468712872719e-06, "loss": 0.8968626260757446, "step": 1634 }, { "epoch": 0.37672811059907835, "grad_norm": 0.8135056284613995, "learning_rate": 1.8973006041119234e-06, "loss": 0.8898152112960815, "step": 1635 }, { "epoch": 0.37695852534562213, "grad_norm": 0.7215595529410248, "learning_rate": 1.8971323651091332e-06, "loss": 0.8499374389648438, "step": 1636 }, { "epoch": 0.3771889400921659, "grad_norm": 0.5955881573233954, "learning_rate": 1.8969639958887677e-06, "loss": 0.7803430557250977, "step": 1637 }, { "epoch": 0.3774193548387097, "grad_norm": 0.672225539346555, "learning_rate": 1.8967954964752657e-06, "loss": 0.7669799327850342, "step": 1638 }, { "epoch": 0.3776497695852535, "grad_norm": 0.7164416850564317, "learning_rate": 1.8966268668930845e-06, "loss": 0.9085204601287842, "step": 1639 }, { "epoch": 0.3778801843317972, "grad_norm": 0.8492247946008473, "learning_rate": 1.8964581071667005e-06, "loss": 0.7793002724647522, "step": 1640 }, { "epoch": 0.378110599078341, "grad_norm": 0.6359200183287212, "learning_rate": 1.896289217320609e-06, "loss": 0.8649430274963379, "step": 1641 }, { "epoch": 0.3783410138248848, "grad_norm": 0.6424804906800053, "learning_rate": 1.8961201973793243e-06, "loss": 0.856898844242096, "step": 1642 }, { "epoch": 0.37857142857142856, "grad_norm": 0.7702312360726356, "learning_rate": 1.895951047367379e-06, "loss": 0.8221957087516785, "step": 1643 }, { "epoch": 0.37880184331797234, "grad_norm": 0.7163935487823062, "learning_rate": 1.8957817673093256e-06, "loss": 0.8158079385757446, "step": 1644 }, { "epoch": 0.3790322580645161, "grad_norm": 0.8008902981825888, "learning_rate": 1.8956123572297343e-06, "loss": 0.7803312540054321, "step": 1645 }, { "epoch": 0.3792626728110599, "grad_norm": 0.7902834195938876, "learning_rate": 1.8954428171531949e-06, "loss": 1.035685420036316, "step": 1646 }, { "epoch": 0.3794930875576037, "grad_norm": 0.6044824314396153, "learning_rate": 1.8952731471043161e-06, "loss": 0.6871123313903809, "step": 1647 }, { "epoch": 0.3797235023041475, "grad_norm": 0.6400629937897654, "learning_rate": 1.8951033471077253e-06, "loss": 0.9651780128479004, "step": 1648 }, { "epoch": 0.37995391705069126, "grad_norm": 0.7485926311468839, "learning_rate": 1.8949334171880687e-06, "loss": 1.018349528312683, "step": 1649 }, { "epoch": 0.38018433179723504, "grad_norm": 0.6571349103626993, "learning_rate": 1.894763357370011e-06, "loss": 0.6839278936386108, "step": 1650 }, { "epoch": 0.3804147465437788, "grad_norm": 0.6757724586058976, "learning_rate": 1.894593167678237e-06, "loss": 0.8442174196243286, "step": 1651 }, { "epoch": 0.38064516129032255, "grad_norm": 0.6368918088972565, "learning_rate": 1.8944228481374484e-06, "loss": 0.8224585056304932, "step": 1652 }, { "epoch": 0.38087557603686634, "grad_norm": 0.6970802562618803, "learning_rate": 1.8942523987723678e-06, "loss": 0.8570500612258911, "step": 1653 }, { "epoch": 0.3811059907834101, "grad_norm": 0.731718201815575, "learning_rate": 1.8940818196077354e-06, "loss": 0.7696554660797119, "step": 1654 }, { "epoch": 0.3813364055299539, "grad_norm": 0.7456139352122005, "learning_rate": 1.8939111106683103e-06, "loss": 0.822563886642456, "step": 1655 }, { "epoch": 0.3815668202764977, "grad_norm": 0.46565320695076334, "learning_rate": 1.8937402719788711e-06, "loss": 0.6537219882011414, "step": 1656 }, { "epoch": 0.38179723502304147, "grad_norm": 0.8414098679023442, "learning_rate": 1.8935693035642145e-06, "loss": 0.9081932306289673, "step": 1657 }, { "epoch": 0.38202764976958525, "grad_norm": 0.5018818977531995, "learning_rate": 1.8933982054491563e-06, "loss": 0.6839661598205566, "step": 1658 }, { "epoch": 0.38225806451612904, "grad_norm": 0.6964355972832653, "learning_rate": 1.8932269776585313e-06, "loss": 0.9187283515930176, "step": 1659 }, { "epoch": 0.3824884792626728, "grad_norm": 0.8100260748701062, "learning_rate": 1.893055620217193e-06, "loss": 0.9567047357559204, "step": 1660 }, { "epoch": 0.3827188940092166, "grad_norm": 0.7345697660292878, "learning_rate": 1.8928841331500136e-06, "loss": 0.785561203956604, "step": 1661 }, { "epoch": 0.3829493087557604, "grad_norm": 0.882033286363023, "learning_rate": 1.8927125164818842e-06, "loss": 0.8986088037490845, "step": 1662 }, { "epoch": 0.38317972350230417, "grad_norm": 0.7191553093714457, "learning_rate": 1.892540770237715e-06, "loss": 1.0027087926864624, "step": 1663 }, { "epoch": 0.38341013824884795, "grad_norm": 0.6970721775230337, "learning_rate": 1.8923688944424346e-06, "loss": 0.8502041697502136, "step": 1664 }, { "epoch": 0.3836405529953917, "grad_norm": 0.6684142159321271, "learning_rate": 1.8921968891209907e-06, "loss": 0.8526991605758667, "step": 1665 }, { "epoch": 0.38387096774193546, "grad_norm": 0.7082372977886758, "learning_rate": 1.8920247542983492e-06, "loss": 0.8084676265716553, "step": 1666 }, { "epoch": 0.38410138248847925, "grad_norm": 0.6206558140284871, "learning_rate": 1.8918524899994957e-06, "loss": 0.8922938704490662, "step": 1667 }, { "epoch": 0.38433179723502303, "grad_norm": 0.768771022868596, "learning_rate": 1.8916800962494337e-06, "loss": 0.7965600490570068, "step": 1668 }, { "epoch": 0.3845622119815668, "grad_norm": 0.6752105100256773, "learning_rate": 1.8915075730731865e-06, "loss": 0.9505549073219299, "step": 1669 }, { "epoch": 0.3847926267281106, "grad_norm": 0.6897214722687708, "learning_rate": 1.8913349204957947e-06, "loss": 0.9459924697875977, "step": 1670 }, { "epoch": 0.3850230414746544, "grad_norm": 0.6215985429421047, "learning_rate": 1.8911621385423195e-06, "loss": 0.8433674573898315, "step": 1671 }, { "epoch": 0.38525345622119817, "grad_norm": 0.7790027974124772, "learning_rate": 1.8909892272378398e-06, "loss": 0.8945955038070679, "step": 1672 }, { "epoch": 0.38548387096774195, "grad_norm": 0.6828005324330048, "learning_rate": 1.890816186607453e-06, "loss": 0.8580358624458313, "step": 1673 }, { "epoch": 0.38571428571428573, "grad_norm": 0.6249387555876122, "learning_rate": 1.8906430166762761e-06, "loss": 0.7708698511123657, "step": 1674 }, { "epoch": 0.3859447004608295, "grad_norm": 0.7418139824839276, "learning_rate": 1.8904697174694446e-06, "loss": 0.8647153377532959, "step": 1675 }, { "epoch": 0.3861751152073733, "grad_norm": 0.7428074816121766, "learning_rate": 1.890296289012112e-06, "loss": 0.9380506277084351, "step": 1676 }, { "epoch": 0.386405529953917, "grad_norm": 0.6218965089791644, "learning_rate": 1.8901227313294519e-06, "loss": 0.8814103603363037, "step": 1677 }, { "epoch": 0.3866359447004608, "grad_norm": 0.7768206335574417, "learning_rate": 1.8899490444466556e-06, "loss": 0.9348419904708862, "step": 1678 }, { "epoch": 0.3868663594470046, "grad_norm": 0.5956095891599564, "learning_rate": 1.8897752283889338e-06, "loss": 0.7502046823501587, "step": 1679 }, { "epoch": 0.3870967741935484, "grad_norm": 0.567040551050712, "learning_rate": 1.8896012831815155e-06, "loss": 0.8499769568443298, "step": 1680 }, { "epoch": 0.38732718894009216, "grad_norm": 0.6506272613615357, "learning_rate": 1.8894272088496487e-06, "loss": 0.8253993391990662, "step": 1681 }, { "epoch": 0.38755760368663594, "grad_norm": 0.7707626449058277, "learning_rate": 1.8892530054185998e-06, "loss": 0.8494073152542114, "step": 1682 }, { "epoch": 0.3877880184331797, "grad_norm": 0.7608738547672518, "learning_rate": 1.8890786729136546e-06, "loss": 0.8836106061935425, "step": 1683 }, { "epoch": 0.3880184331797235, "grad_norm": 0.636256009552465, "learning_rate": 1.8889042113601166e-06, "loss": 0.8949145078659058, "step": 1684 }, { "epoch": 0.3882488479262673, "grad_norm": 0.5966436023392323, "learning_rate": 1.8887296207833095e-06, "loss": 0.6210965514183044, "step": 1685 }, { "epoch": 0.3884792626728111, "grad_norm": 0.8527942588919344, "learning_rate": 1.8885549012085744e-06, "loss": 0.9216527938842773, "step": 1686 }, { "epoch": 0.38870967741935486, "grad_norm": 0.6878600463475216, "learning_rate": 1.8883800526612715e-06, "loss": 0.9266358613967896, "step": 1687 }, { "epoch": 0.38894009216589864, "grad_norm": 0.7261249184769291, "learning_rate": 1.88820507516678e-06, "loss": 0.8550606966018677, "step": 1688 }, { "epoch": 0.38917050691244237, "grad_norm": 0.702582367534852, "learning_rate": 1.888029968750498e-06, "loss": 0.8632181882858276, "step": 1689 }, { "epoch": 0.38940092165898615, "grad_norm": 0.8055419508573982, "learning_rate": 1.8878547334378415e-06, "loss": 0.8795493841171265, "step": 1690 }, { "epoch": 0.38963133640552994, "grad_norm": 0.8491490559655837, "learning_rate": 1.8876793692542456e-06, "loss": 0.9750456809997559, "step": 1691 }, { "epoch": 0.3898617511520737, "grad_norm": 0.7818793926101317, "learning_rate": 1.8875038762251645e-06, "loss": 0.9270161390304565, "step": 1692 }, { "epoch": 0.3900921658986175, "grad_norm": 0.7260894881906815, "learning_rate": 1.8873282543760705e-06, "loss": 0.8154089450836182, "step": 1693 }, { "epoch": 0.3903225806451613, "grad_norm": 0.692223503364103, "learning_rate": 1.887152503732455e-06, "loss": 0.9245043992996216, "step": 1694 }, { "epoch": 0.39055299539170507, "grad_norm": 0.7622355519095229, "learning_rate": 1.8869766243198284e-06, "loss": 0.9218056201934814, "step": 1695 }, { "epoch": 0.39078341013824885, "grad_norm": 0.5749624768358436, "learning_rate": 1.8868006161637192e-06, "loss": 0.7753894329071045, "step": 1696 }, { "epoch": 0.39101382488479264, "grad_norm": 0.7181901167791495, "learning_rate": 1.8866244792896739e-06, "loss": 0.8455277681350708, "step": 1697 }, { "epoch": 0.3912442396313364, "grad_norm": 0.7361657621974459, "learning_rate": 1.8864482137232596e-06, "loss": 0.8301571607589722, "step": 1698 }, { "epoch": 0.3914746543778802, "grad_norm": 0.5504243602930398, "learning_rate": 1.8862718194900602e-06, "loss": 0.9768285155296326, "step": 1699 }, { "epoch": 0.391705069124424, "grad_norm": 0.7416616964447972, "learning_rate": 1.8860952966156798e-06, "loss": 0.9659395217895508, "step": 1700 }, { "epoch": 0.3919354838709677, "grad_norm": 0.731283063502841, "learning_rate": 1.8859186451257401e-06, "loss": 0.9975444078445435, "step": 1701 }, { "epoch": 0.3921658986175115, "grad_norm": 0.712824030540976, "learning_rate": 1.8857418650458816e-06, "loss": 0.9248796701431274, "step": 1702 }, { "epoch": 0.3923963133640553, "grad_norm": 0.6864309886370629, "learning_rate": 1.8855649564017642e-06, "loss": 0.8792428970336914, "step": 1703 }, { "epoch": 0.39262672811059907, "grad_norm": 0.7264626081176593, "learning_rate": 1.8853879192190657e-06, "loss": 0.8387417197227478, "step": 1704 }, { "epoch": 0.39285714285714285, "grad_norm": 0.707677593822268, "learning_rate": 1.8852107535234828e-06, "loss": 0.7020218372344971, "step": 1705 }, { "epoch": 0.39308755760368663, "grad_norm": 0.673092322659609, "learning_rate": 1.885033459340731e-06, "loss": 0.7388321161270142, "step": 1706 }, { "epoch": 0.3933179723502304, "grad_norm": 0.7503922468030345, "learning_rate": 1.8848560366965441e-06, "loss": 0.7536240220069885, "step": 1707 }, { "epoch": 0.3935483870967742, "grad_norm": 0.7237343332600692, "learning_rate": 1.8846784856166746e-06, "loss": 0.747667670249939, "step": 1708 }, { "epoch": 0.393778801843318, "grad_norm": 0.7263541821971573, "learning_rate": 1.8845008061268945e-06, "loss": 0.8068975210189819, "step": 1709 }, { "epoch": 0.39400921658986177, "grad_norm": 0.7581453840562968, "learning_rate": 1.8843229982529932e-06, "loss": 0.7613410949707031, "step": 1710 }, { "epoch": 0.39423963133640555, "grad_norm": 0.6546080156681554, "learning_rate": 1.8841450620207793e-06, "loss": 0.8579158782958984, "step": 1711 }, { "epoch": 0.39447004608294933, "grad_norm": 0.6400652758844664, "learning_rate": 1.88396699745608e-06, "loss": 0.8754673004150391, "step": 1712 }, { "epoch": 0.39470046082949306, "grad_norm": 0.7227539443635326, "learning_rate": 1.8837888045847415e-06, "loss": 0.7988177537918091, "step": 1713 }, { "epoch": 0.39493087557603684, "grad_norm": 0.7533730909693769, "learning_rate": 1.8836104834326279e-06, "loss": 0.8658367395401001, "step": 1714 }, { "epoch": 0.3951612903225806, "grad_norm": 0.7819630929666835, "learning_rate": 1.8834320340256223e-06, "loss": 0.8777489066123962, "step": 1715 }, { "epoch": 0.3953917050691244, "grad_norm": 0.6763778401068745, "learning_rate": 1.8832534563896264e-06, "loss": 0.9785901308059692, "step": 1716 }, { "epoch": 0.3956221198156682, "grad_norm": 0.7796554840537433, "learning_rate": 1.883074750550561e-06, "loss": 0.847503125667572, "step": 1717 }, { "epoch": 0.395852534562212, "grad_norm": 0.7786503806499795, "learning_rate": 1.8828959165343643e-06, "loss": 1.0159538984298706, "step": 1718 }, { "epoch": 0.39608294930875576, "grad_norm": 0.8472423063084373, "learning_rate": 1.882716954366994e-06, "loss": 0.9064888954162598, "step": 1719 }, { "epoch": 0.39631336405529954, "grad_norm": 0.7664117713246195, "learning_rate": 1.8825378640744264e-06, "loss": 0.956849217414856, "step": 1720 }, { "epoch": 0.3965437788018433, "grad_norm": 0.758389558529891, "learning_rate": 1.882358645682656e-06, "loss": 0.8983441591262817, "step": 1721 }, { "epoch": 0.3967741935483871, "grad_norm": 0.5702990900386659, "learning_rate": 1.8821792992176967e-06, "loss": 0.7698956727981567, "step": 1722 }, { "epoch": 0.3970046082949309, "grad_norm": 0.8118873070872795, "learning_rate": 1.8819998247055797e-06, "loss": 0.9376351833343506, "step": 1723 }, { "epoch": 0.3972350230414747, "grad_norm": 0.8486728692509508, "learning_rate": 1.881820222172356e-06, "loss": 0.8776079416275024, "step": 1724 }, { "epoch": 0.39746543778801846, "grad_norm": 0.9552617438975642, "learning_rate": 1.8816404916440942e-06, "loss": 0.9776726961135864, "step": 1725 }, { "epoch": 0.3976958525345622, "grad_norm": 0.5841959382882552, "learning_rate": 1.8814606331468822e-06, "loss": 0.7699686288833618, "step": 1726 }, { "epoch": 0.39792626728110597, "grad_norm": 0.7581748259398383, "learning_rate": 1.8812806467068265e-06, "loss": 0.8256866931915283, "step": 1727 }, { "epoch": 0.39815668202764976, "grad_norm": 0.6320724280659841, "learning_rate": 1.881100532350051e-06, "loss": 0.8493847846984863, "step": 1728 }, { "epoch": 0.39838709677419354, "grad_norm": 0.6592895509903398, "learning_rate": 1.8809202901027002e-06, "loss": 0.8138688802719116, "step": 1729 }, { "epoch": 0.3986175115207373, "grad_norm": 0.7569638843586648, "learning_rate": 1.880739919990935e-06, "loss": 0.8637882471084595, "step": 1730 }, { "epoch": 0.3988479262672811, "grad_norm": 0.5847233582227849, "learning_rate": 1.880559422040937e-06, "loss": 0.8988152742385864, "step": 1731 }, { "epoch": 0.3990783410138249, "grad_norm": 0.4724369020135308, "learning_rate": 1.880378796278904e-06, "loss": 0.8247279524803162, "step": 1732 }, { "epoch": 0.39930875576036867, "grad_norm": 0.8071560192562027, "learning_rate": 1.8801980427310546e-06, "loss": 0.9699070453643799, "step": 1733 }, { "epoch": 0.39953917050691246, "grad_norm": 0.8108307817175047, "learning_rate": 1.8800171614236241e-06, "loss": 0.9516465663909912, "step": 1734 }, { "epoch": 0.39976958525345624, "grad_norm": 0.655632769560408, "learning_rate": 1.879836152382868e-06, "loss": 0.9553602933883667, "step": 1735 }, { "epoch": 0.4, "grad_norm": 0.666214042250043, "learning_rate": 1.879655015635059e-06, "loss": 0.7805094718933105, "step": 1736 }, { "epoch": 0.4002304147465438, "grad_norm": 0.730264537734651, "learning_rate": 1.8794737512064888e-06, "loss": 0.9509962797164917, "step": 1737 }, { "epoch": 0.40046082949308753, "grad_norm": 0.6755335543884481, "learning_rate": 1.8792923591234683e-06, "loss": 0.8663454055786133, "step": 1738 }, { "epoch": 0.4006912442396313, "grad_norm": 0.7325230471707477, "learning_rate": 1.8791108394123257e-06, "loss": 0.8773336410522461, "step": 1739 }, { "epoch": 0.4009216589861751, "grad_norm": 0.6493515009165077, "learning_rate": 1.8789291920994086e-06, "loss": 0.7201284766197205, "step": 1740 }, { "epoch": 0.4011520737327189, "grad_norm": 0.6665806307840867, "learning_rate": 1.8787474172110826e-06, "loss": 0.799161434173584, "step": 1741 }, { "epoch": 0.40138248847926267, "grad_norm": 0.8651407328311, "learning_rate": 1.8785655147737326e-06, "loss": 0.8987375497817993, "step": 1742 }, { "epoch": 0.40161290322580645, "grad_norm": 0.8706739093465035, "learning_rate": 1.878383484813761e-06, "loss": 0.8553296327590942, "step": 1743 }, { "epoch": 0.40184331797235023, "grad_norm": 0.6706596266673751, "learning_rate": 1.8782013273575895e-06, "loss": 0.8376551270484924, "step": 1744 }, { "epoch": 0.402073732718894, "grad_norm": 0.7963067027250083, "learning_rate": 1.8780190424316578e-06, "loss": 0.8220775723457336, "step": 1745 }, { "epoch": 0.4023041474654378, "grad_norm": 0.7339356821882034, "learning_rate": 1.8778366300624244e-06, "loss": 0.8614820241928101, "step": 1746 }, { "epoch": 0.4025345622119816, "grad_norm": 0.8065421465945496, "learning_rate": 1.8776540902763665e-06, "loss": 0.9434851408004761, "step": 1747 }, { "epoch": 0.40276497695852537, "grad_norm": 0.8102544073977809, "learning_rate": 1.877471423099979e-06, "loss": 0.8150373101234436, "step": 1748 }, { "epoch": 0.40299539170506915, "grad_norm": 0.5910178895755134, "learning_rate": 1.8772886285597762e-06, "loss": 0.7660368084907532, "step": 1749 }, { "epoch": 0.4032258064516129, "grad_norm": 0.7262631962712356, "learning_rate": 1.8771057066822903e-06, "loss": 0.7647032141685486, "step": 1750 }, { "epoch": 0.40345622119815666, "grad_norm": 0.6238918567790319, "learning_rate": 1.8769226574940723e-06, "loss": 0.6034061908721924, "step": 1751 }, { "epoch": 0.40368663594470044, "grad_norm": 0.7344154412243011, "learning_rate": 1.8767394810216914e-06, "loss": 1.0062675476074219, "step": 1752 }, { "epoch": 0.40391705069124423, "grad_norm": 0.6966552417777933, "learning_rate": 1.8765561772917354e-06, "loss": 0.9791489839553833, "step": 1753 }, { "epoch": 0.404147465437788, "grad_norm": 0.5825611392130148, "learning_rate": 1.8763727463308108e-06, "loss": 0.9054251909255981, "step": 1754 }, { "epoch": 0.4043778801843318, "grad_norm": 0.7455727854900284, "learning_rate": 1.8761891881655423e-06, "loss": 0.9156093597412109, "step": 1755 }, { "epoch": 0.4046082949308756, "grad_norm": 0.6983601123297067, "learning_rate": 1.876005502822573e-06, "loss": 0.7525647878646851, "step": 1756 }, { "epoch": 0.40483870967741936, "grad_norm": 0.6156689393045622, "learning_rate": 1.8758216903285643e-06, "loss": 0.8321493864059448, "step": 1757 }, { "epoch": 0.40506912442396314, "grad_norm": 0.888147060404811, "learning_rate": 1.8756377507101973e-06, "loss": 0.9937042593955994, "step": 1758 }, { "epoch": 0.40529953917050693, "grad_norm": 0.553604524827559, "learning_rate": 1.8754536839941694e-06, "loss": 0.7001460790634155, "step": 1759 }, { "epoch": 0.4055299539170507, "grad_norm": 0.7747422377442987, "learning_rate": 1.8752694902071986e-06, "loss": 1.0062569379806519, "step": 1760 }, { "epoch": 0.4057603686635945, "grad_norm": 0.7145787925683823, "learning_rate": 1.8750851693760199e-06, "loss": 0.7414188385009766, "step": 1761 }, { "epoch": 0.4059907834101382, "grad_norm": 0.6306403135362045, "learning_rate": 1.8749007215273873e-06, "loss": 0.7181771397590637, "step": 1762 }, { "epoch": 0.406221198156682, "grad_norm": 0.7763317855361268, "learning_rate": 1.8747161466880732e-06, "loss": 0.8797845244407654, "step": 1763 }, { "epoch": 0.4064516129032258, "grad_norm": 0.6123636271862207, "learning_rate": 1.8745314448848684e-06, "loss": 0.7774960398674011, "step": 1764 }, { "epoch": 0.4066820276497696, "grad_norm": 0.9110978120854332, "learning_rate": 1.874346616144582e-06, "loss": 0.8499422073364258, "step": 1765 }, { "epoch": 0.40691244239631336, "grad_norm": 0.6306854745937814, "learning_rate": 1.874161660494042e-06, "loss": 0.7070250511169434, "step": 1766 }, { "epoch": 0.40714285714285714, "grad_norm": 0.6762437905211294, "learning_rate": 1.8739765779600939e-06, "loss": 0.8009281158447266, "step": 1767 }, { "epoch": 0.4073732718894009, "grad_norm": 0.6084135312041689, "learning_rate": 1.8737913685696027e-06, "loss": 0.6866155862808228, "step": 1768 }, { "epoch": 0.4076036866359447, "grad_norm": 0.7813040754942882, "learning_rate": 1.873606032349451e-06, "loss": 0.8200059533119202, "step": 1769 }, { "epoch": 0.4078341013824885, "grad_norm": 0.629385301974861, "learning_rate": 1.8734205693265404e-06, "loss": 0.8413814902305603, "step": 1770 }, { "epoch": 0.4080645161290323, "grad_norm": 0.776612651465312, "learning_rate": 1.8732349795277903e-06, "loss": 0.9935271143913269, "step": 1771 }, { "epoch": 0.40829493087557606, "grad_norm": 0.6589503544607032, "learning_rate": 1.873049262980139e-06, "loss": 0.8718058466911316, "step": 1772 }, { "epoch": 0.40852534562211984, "grad_norm": 0.8620050398467397, "learning_rate": 1.8728634197105428e-06, "loss": 0.9009358882904053, "step": 1773 }, { "epoch": 0.40875576036866357, "grad_norm": 0.7755306532739165, "learning_rate": 1.8726774497459768e-06, "loss": 0.9128156900405884, "step": 1774 }, { "epoch": 0.40898617511520735, "grad_norm": 0.6450271750629438, "learning_rate": 1.8724913531134342e-06, "loss": 0.8524078130722046, "step": 1775 }, { "epoch": 0.40921658986175113, "grad_norm": 0.7569328214438452, "learning_rate": 1.872305129839927e-06, "loss": 0.9431420564651489, "step": 1776 }, { "epoch": 0.4094470046082949, "grad_norm": 0.6746261931292995, "learning_rate": 1.8721187799524846e-06, "loss": 0.7666694521903992, "step": 1777 }, { "epoch": 0.4096774193548387, "grad_norm": 0.6448149830483173, "learning_rate": 1.871932303478156e-06, "loss": 0.872551679611206, "step": 1778 }, { "epoch": 0.4099078341013825, "grad_norm": 0.6320914450645303, "learning_rate": 1.8717457004440079e-06, "loss": 0.7596250176429749, "step": 1779 }, { "epoch": 0.41013824884792627, "grad_norm": 0.9751786230729174, "learning_rate": 1.8715589708771253e-06, "loss": 1.0098414421081543, "step": 1780 }, { "epoch": 0.41036866359447005, "grad_norm": 0.9695096083628231, "learning_rate": 1.871372114804612e-06, "loss": 0.9961523413658142, "step": 1781 }, { "epoch": 0.41059907834101383, "grad_norm": 0.8458697864526913, "learning_rate": 1.8711851322535896e-06, "loss": 0.9065390825271606, "step": 1782 }, { "epoch": 0.4108294930875576, "grad_norm": 0.5445685826440523, "learning_rate": 1.8709980232511987e-06, "loss": 0.7906428575515747, "step": 1783 }, { "epoch": 0.4110599078341014, "grad_norm": 0.5783797348856774, "learning_rate": 1.8708107878245976e-06, "loss": 0.798285722732544, "step": 1784 }, { "epoch": 0.4112903225806452, "grad_norm": 0.7492534516122694, "learning_rate": 1.870623426000964e-06, "loss": 0.7809790372848511, "step": 1785 }, { "epoch": 0.4115207373271889, "grad_norm": 0.8776810150838931, "learning_rate": 1.8704359378074921e-06, "loss": 0.8931630849838257, "step": 1786 }, { "epoch": 0.4117511520737327, "grad_norm": 0.6321595970525742, "learning_rate": 1.870248323271396e-06, "loss": 0.8219889402389526, "step": 1787 }, { "epoch": 0.4119815668202765, "grad_norm": 0.9973808347817518, "learning_rate": 1.8700605824199084e-06, "loss": 0.8371819257736206, "step": 1788 }, { "epoch": 0.41221198156682026, "grad_norm": 0.7869196176383942, "learning_rate": 1.8698727152802789e-06, "loss": 0.951171875, "step": 1789 }, { "epoch": 0.41244239631336405, "grad_norm": 0.6763081680317143, "learning_rate": 1.8696847218797763e-06, "loss": 0.7678385972976685, "step": 1790 }, { "epoch": 0.41267281105990783, "grad_norm": 0.567634539573834, "learning_rate": 1.8694966022456872e-06, "loss": 0.9296993017196655, "step": 1791 }, { "epoch": 0.4129032258064516, "grad_norm": 0.5450828031444163, "learning_rate": 1.8693083564053178e-06, "loss": 0.8991763591766357, "step": 1792 }, { "epoch": 0.4131336405529954, "grad_norm": 0.5967294444907658, "learning_rate": 1.8691199843859913e-06, "loss": 0.8332901000976562, "step": 1793 }, { "epoch": 0.4133640552995392, "grad_norm": 0.7571962190593917, "learning_rate": 1.8689314862150497e-06, "loss": 0.7723548412322998, "step": 1794 }, { "epoch": 0.41359447004608296, "grad_norm": 0.6588409150246594, "learning_rate": 1.868742861919853e-06, "loss": 0.7768993377685547, "step": 1795 }, { "epoch": 0.41382488479262675, "grad_norm": 0.43193778142300604, "learning_rate": 1.86855411152778e-06, "loss": 0.6058932542800903, "step": 1796 }, { "epoch": 0.41405529953917053, "grad_norm": 0.8667574432138021, "learning_rate": 1.8683652350662274e-06, "loss": 0.8711605072021484, "step": 1797 }, { "epoch": 0.4142857142857143, "grad_norm": 0.8780154463369872, "learning_rate": 1.8681762325626104e-06, "loss": 0.9023469090461731, "step": 1798 }, { "epoch": 0.41451612903225804, "grad_norm": 0.6070102500189553, "learning_rate": 1.867987104044363e-06, "loss": 0.7735910415649414, "step": 1799 }, { "epoch": 0.4147465437788018, "grad_norm": 0.6293725885471063, "learning_rate": 1.8677978495389364e-06, "loss": 0.6609020829200745, "step": 1800 }, { "epoch": 0.4149769585253456, "grad_norm": 0.6485782104038655, "learning_rate": 1.8676084690738005e-06, "loss": 0.7823291420936584, "step": 1801 }, { "epoch": 0.4152073732718894, "grad_norm": 0.8472581681306268, "learning_rate": 1.867418962676444e-06, "loss": 0.9076563715934753, "step": 1802 }, { "epoch": 0.4154377880184332, "grad_norm": 0.561807586977654, "learning_rate": 1.8672293303743735e-06, "loss": 0.8645772933959961, "step": 1803 }, { "epoch": 0.41566820276497696, "grad_norm": 0.6821058596015542, "learning_rate": 1.8670395721951135e-06, "loss": 0.8071421384811401, "step": 1804 }, { "epoch": 0.41589861751152074, "grad_norm": 0.7396557376618352, "learning_rate": 1.8668496881662077e-06, "loss": 0.8459846377372742, "step": 1805 }, { "epoch": 0.4161290322580645, "grad_norm": 0.7167052224732033, "learning_rate": 1.866659678315217e-06, "loss": 0.8467865586280823, "step": 1806 }, { "epoch": 0.4163594470046083, "grad_norm": 0.8262164291061972, "learning_rate": 1.8664695426697215e-06, "loss": 0.8963291645050049, "step": 1807 }, { "epoch": 0.4165898617511521, "grad_norm": 0.528766323006704, "learning_rate": 1.8662792812573188e-06, "loss": 0.7901826500892639, "step": 1808 }, { "epoch": 0.4168202764976959, "grad_norm": 0.8974116604603759, "learning_rate": 1.8660888941056252e-06, "loss": 0.807115912437439, "step": 1809 }, { "epoch": 0.41705069124423966, "grad_norm": 0.6271237317374816, "learning_rate": 1.8658983812422753e-06, "loss": 0.8439537286758423, "step": 1810 }, { "epoch": 0.4172811059907834, "grad_norm": 0.8360600380108553, "learning_rate": 1.8657077426949214e-06, "loss": 0.6920834183692932, "step": 1811 }, { "epoch": 0.41751152073732717, "grad_norm": 0.7603232216568709, "learning_rate": 1.865516978491235e-06, "loss": 0.8712124824523926, "step": 1812 }, { "epoch": 0.41774193548387095, "grad_norm": 0.718498571919399, "learning_rate": 1.865326088658905e-06, "loss": 0.7720927596092224, "step": 1813 }, { "epoch": 0.41797235023041474, "grad_norm": 0.6953832780918029, "learning_rate": 1.8651350732256386e-06, "loss": 0.8003814220428467, "step": 1814 }, { "epoch": 0.4182027649769585, "grad_norm": 0.838076886250554, "learning_rate": 1.8649439322191616e-06, "loss": 0.8999850749969482, "step": 1815 }, { "epoch": 0.4184331797235023, "grad_norm": 0.584714014216153, "learning_rate": 1.8647526656672179e-06, "loss": 0.6752324104309082, "step": 1816 }, { "epoch": 0.4186635944700461, "grad_norm": 0.7365325720475113, "learning_rate": 1.8645612735975696e-06, "loss": 0.8521262407302856, "step": 1817 }, { "epoch": 0.41889400921658987, "grad_norm": 0.7194058023938104, "learning_rate": 1.864369756037997e-06, "loss": 0.8813315629959106, "step": 1818 }, { "epoch": 0.41912442396313365, "grad_norm": 0.742428235010686, "learning_rate": 1.8641781130162986e-06, "loss": 0.8358273506164551, "step": 1819 }, { "epoch": 0.41935483870967744, "grad_norm": 0.591500867449821, "learning_rate": 1.863986344560291e-06, "loss": 0.8051023483276367, "step": 1820 }, { "epoch": 0.4195852534562212, "grad_norm": 0.7791039105049288, "learning_rate": 1.863794450697809e-06, "loss": 0.768791675567627, "step": 1821 }, { "epoch": 0.419815668202765, "grad_norm": 0.9369354252226071, "learning_rate": 1.8636024314567065e-06, "loss": 0.8420040607452393, "step": 1822 }, { "epoch": 0.42004608294930873, "grad_norm": 0.673055652482875, "learning_rate": 1.8634102868648542e-06, "loss": 0.7670450806617737, "step": 1823 }, { "epoch": 0.4202764976958525, "grad_norm": 0.6699812957272996, "learning_rate": 1.863218016950142e-06, "loss": 0.8292283415794373, "step": 1824 }, { "epoch": 0.4205069124423963, "grad_norm": 0.6058254395333167, "learning_rate": 1.8630256217404767e-06, "loss": 0.8005781769752502, "step": 1825 }, { "epoch": 0.4207373271889401, "grad_norm": 0.923190166351158, "learning_rate": 1.8628331012637854e-06, "loss": 0.8214897513389587, "step": 1826 }, { "epoch": 0.42096774193548386, "grad_norm": 0.6734314204378448, "learning_rate": 1.8626404555480118e-06, "loss": 0.7938524484634399, "step": 1827 }, { "epoch": 0.42119815668202765, "grad_norm": 0.7824933974022145, "learning_rate": 1.862447684621118e-06, "loss": 1.0047048330307007, "step": 1828 }, { "epoch": 0.42142857142857143, "grad_norm": 0.7060449091561402, "learning_rate": 1.862254788511084e-06, "loss": 0.7660601139068604, "step": 1829 }, { "epoch": 0.4216589861751152, "grad_norm": 0.7940468118829026, "learning_rate": 1.8620617672459096e-06, "loss": 0.8227912783622742, "step": 1830 }, { "epoch": 0.421889400921659, "grad_norm": 0.8322274877206185, "learning_rate": 1.8618686208536106e-06, "loss": 0.8570956587791443, "step": 1831 }, { "epoch": 0.4221198156682028, "grad_norm": 0.6215191834076389, "learning_rate": 1.8616753493622221e-06, "loss": 0.7472532987594604, "step": 1832 }, { "epoch": 0.42235023041474656, "grad_norm": 0.702673502332975, "learning_rate": 1.8614819527997976e-06, "loss": 0.812872052192688, "step": 1833 }, { "epoch": 0.42258064516129035, "grad_norm": 0.7168526420375322, "learning_rate": 1.861288431194408e-06, "loss": 0.7801386117935181, "step": 1834 }, { "epoch": 0.4228110599078341, "grad_norm": 0.8740851917776313, "learning_rate": 1.8610947845741426e-06, "loss": 0.7834687829017639, "step": 1835 }, { "epoch": 0.42304147465437786, "grad_norm": 0.8009990500080056, "learning_rate": 1.8609010129671097e-06, "loss": 0.786865234375, "step": 1836 }, { "epoch": 0.42327188940092164, "grad_norm": 0.6559457181196078, "learning_rate": 1.860707116401434e-06, "loss": 0.7728738784790039, "step": 1837 }, { "epoch": 0.4235023041474654, "grad_norm": 0.6384024302830484, "learning_rate": 1.8605130949052598e-06, "loss": 0.6508793830871582, "step": 1838 }, { "epoch": 0.4237327188940092, "grad_norm": 0.6544986461362278, "learning_rate": 1.8603189485067492e-06, "loss": 0.7949484586715698, "step": 1839 }, { "epoch": 0.423963133640553, "grad_norm": 0.7679729608195138, "learning_rate": 1.8601246772340822e-06, "loss": 0.7151408195495605, "step": 1840 }, { "epoch": 0.4241935483870968, "grad_norm": 0.6910188883895837, "learning_rate": 1.859930281115457e-06, "loss": 0.7678598165512085, "step": 1841 }, { "epoch": 0.42442396313364056, "grad_norm": 0.6547923584739629, "learning_rate": 1.8597357601790895e-06, "loss": 0.8042058944702148, "step": 1842 }, { "epoch": 0.42465437788018434, "grad_norm": 0.6889925049755639, "learning_rate": 1.859541114453215e-06, "loss": 0.7328081130981445, "step": 1843 }, { "epoch": 0.4248847926267281, "grad_norm": 0.7385850960276812, "learning_rate": 1.8593463439660853e-06, "loss": 0.7646626234054565, "step": 1844 }, { "epoch": 0.4251152073732719, "grad_norm": 0.7455331415840897, "learning_rate": 1.8591514487459717e-06, "loss": 0.8965721726417542, "step": 1845 }, { "epoch": 0.4253456221198157, "grad_norm": 0.6783955368622289, "learning_rate": 1.8589564288211623e-06, "loss": 0.8892468810081482, "step": 1846 }, { "epoch": 0.4255760368663594, "grad_norm": 0.669354336924349, "learning_rate": 1.8587612842199648e-06, "loss": 0.8314409255981445, "step": 1847 }, { "epoch": 0.4258064516129032, "grad_norm": 0.7299222952808436, "learning_rate": 1.8585660149707034e-06, "loss": 0.7713892459869385, "step": 1848 }, { "epoch": 0.426036866359447, "grad_norm": 0.7583328231707663, "learning_rate": 1.8583706211017216e-06, "loss": 0.9349459409713745, "step": 1849 }, { "epoch": 0.42626728110599077, "grad_norm": 0.7309436500165829, "learning_rate": 1.8581751026413805e-06, "loss": 0.8438700437545776, "step": 1850 }, { "epoch": 0.42649769585253455, "grad_norm": 1.0171962155435006, "learning_rate": 1.8579794596180594e-06, "loss": 0.9559776782989502, "step": 1851 }, { "epoch": 0.42672811059907834, "grad_norm": 0.6701533748146308, "learning_rate": 1.8577836920601556e-06, "loss": 0.7124872803688049, "step": 1852 }, { "epoch": 0.4269585253456221, "grad_norm": 0.8613289026694887, "learning_rate": 1.8575877999960842e-06, "loss": 0.7935503125190735, "step": 1853 }, { "epoch": 0.4271889400921659, "grad_norm": 0.7107096707504692, "learning_rate": 1.8573917834542792e-06, "loss": 0.9145890474319458, "step": 1854 }, { "epoch": 0.4274193548387097, "grad_norm": 0.7290504646059204, "learning_rate": 1.8571956424631918e-06, "loss": 0.8239228129386902, "step": 1855 }, { "epoch": 0.42764976958525347, "grad_norm": 0.6018983094431002, "learning_rate": 1.8569993770512916e-06, "loss": 0.8767688274383545, "step": 1856 }, { "epoch": 0.42788018433179725, "grad_norm": 0.6742014961339767, "learning_rate": 1.8568029872470663e-06, "loss": 0.7860859632492065, "step": 1857 }, { "epoch": 0.42811059907834104, "grad_norm": 0.6990668023927343, "learning_rate": 1.8566064730790218e-06, "loss": 0.8855729103088379, "step": 1858 }, { "epoch": 0.4283410138248848, "grad_norm": 0.8518974155898882, "learning_rate": 1.8564098345756815e-06, "loss": 1.023299217224121, "step": 1859 }, { "epoch": 0.42857142857142855, "grad_norm": 0.7174059285774532, "learning_rate": 1.8562130717655878e-06, "loss": 0.7665202617645264, "step": 1860 }, { "epoch": 0.42880184331797233, "grad_norm": 0.7036772811538429, "learning_rate": 1.8560161846773e-06, "loss": 0.8456651568412781, "step": 1861 }, { "epoch": 0.4290322580645161, "grad_norm": 0.7229483822116546, "learning_rate": 1.8558191733393964e-06, "loss": 0.8920061588287354, "step": 1862 }, { "epoch": 0.4292626728110599, "grad_norm": 0.8104170426239989, "learning_rate": 1.8556220377804723e-06, "loss": 0.8686853051185608, "step": 1863 }, { "epoch": 0.4294930875576037, "grad_norm": 0.5832986779631602, "learning_rate": 1.8554247780291425e-06, "loss": 0.6976242065429688, "step": 1864 }, { "epoch": 0.42972350230414746, "grad_norm": 0.7347161353185314, "learning_rate": 1.8552273941140387e-06, "loss": 0.9612032771110535, "step": 1865 }, { "epoch": 0.42995391705069125, "grad_norm": 0.6243829709767468, "learning_rate": 1.8550298860638108e-06, "loss": 0.9288003444671631, "step": 1866 }, { "epoch": 0.43018433179723503, "grad_norm": 0.6743712494799082, "learning_rate": 1.8548322539071263e-06, "loss": 0.8397525548934937, "step": 1867 }, { "epoch": 0.4304147465437788, "grad_norm": 0.5881426126037044, "learning_rate": 1.8546344976726722e-06, "loss": 0.6311365365982056, "step": 1868 }, { "epoch": 0.4306451612903226, "grad_norm": 0.7497017851812813, "learning_rate": 1.8544366173891523e-06, "loss": 0.7868270874023438, "step": 1869 }, { "epoch": 0.4308755760368664, "grad_norm": 0.6265515804052451, "learning_rate": 1.8542386130852883e-06, "loss": 0.9197052717208862, "step": 1870 }, { "epoch": 0.43110599078341016, "grad_norm": 0.7018278829983491, "learning_rate": 1.8540404847898206e-06, "loss": 0.7875635027885437, "step": 1871 }, { "epoch": 0.4313364055299539, "grad_norm": 0.7789284724063816, "learning_rate": 1.853842232531507e-06, "loss": 0.9805077910423279, "step": 1872 }, { "epoch": 0.4315668202764977, "grad_norm": 0.838470325159009, "learning_rate": 1.8536438563391236e-06, "loss": 0.8906866312026978, "step": 1873 }, { "epoch": 0.43179723502304146, "grad_norm": 0.73247587866706, "learning_rate": 1.8534453562414649e-06, "loss": 0.7506693601608276, "step": 1874 }, { "epoch": 0.43202764976958524, "grad_norm": 0.6576915367586517, "learning_rate": 1.8532467322673422e-06, "loss": 0.6173181533813477, "step": 1875 }, { "epoch": 0.432258064516129, "grad_norm": 0.6907344817423696, "learning_rate": 1.853047984445586e-06, "loss": 0.9217972755432129, "step": 1876 }, { "epoch": 0.4324884792626728, "grad_norm": 0.8808471726659616, "learning_rate": 1.8528491128050442e-06, "loss": 0.8300588130950928, "step": 1877 }, { "epoch": 0.4327188940092166, "grad_norm": 0.7869544847637374, "learning_rate": 1.8526501173745826e-06, "loss": 0.8109279870986938, "step": 1878 }, { "epoch": 0.4329493087557604, "grad_norm": 0.8253705845492948, "learning_rate": 1.852450998183085e-06, "loss": 0.9243700504302979, "step": 1879 }, { "epoch": 0.43317972350230416, "grad_norm": 0.7291726511705204, "learning_rate": 1.8522517552594539e-06, "loss": 0.7983531951904297, "step": 1880 }, { "epoch": 0.43341013824884794, "grad_norm": 0.837506072245515, "learning_rate": 1.8520523886326088e-06, "loss": 0.9931240081787109, "step": 1881 }, { "epoch": 0.4336405529953917, "grad_norm": 0.7782064692415819, "learning_rate": 1.8518528983314874e-06, "loss": 0.923255443572998, "step": 1882 }, { "epoch": 0.4338709677419355, "grad_norm": 0.5003052765919304, "learning_rate": 1.8516532843850454e-06, "loss": 0.8470325469970703, "step": 1883 }, { "epoch": 0.43410138248847924, "grad_norm": 0.7497886449083292, "learning_rate": 1.8514535468222566e-06, "loss": 0.9175074696540833, "step": 1884 }, { "epoch": 0.434331797235023, "grad_norm": 0.7474680310474195, "learning_rate": 1.8512536856721126e-06, "loss": 0.8617827892303467, "step": 1885 }, { "epoch": 0.4345622119815668, "grad_norm": 0.6779026169933022, "learning_rate": 1.8510537009636231e-06, "loss": 0.6787248849868774, "step": 1886 }, { "epoch": 0.4347926267281106, "grad_norm": 0.6948062534132075, "learning_rate": 1.8508535927258157e-06, "loss": 0.8031569719314575, "step": 1887 }, { "epoch": 0.43502304147465437, "grad_norm": 0.8219581995376891, "learning_rate": 1.8506533609877354e-06, "loss": 1.0252577066421509, "step": 1888 }, { "epoch": 0.43525345622119815, "grad_norm": 0.6297691459816858, "learning_rate": 1.850453005778446e-06, "loss": 0.7947444915771484, "step": 1889 }, { "epoch": 0.43548387096774194, "grad_norm": 0.7974729793994046, "learning_rate": 1.8502525271270288e-06, "loss": 0.817523717880249, "step": 1890 }, { "epoch": 0.4357142857142857, "grad_norm": 0.905445482286677, "learning_rate": 1.850051925062583e-06, "loss": 0.8029658794403076, "step": 1891 }, { "epoch": 0.4359447004608295, "grad_norm": 0.7902601112013473, "learning_rate": 1.8498511996142253e-06, "loss": 0.871408224105835, "step": 1892 }, { "epoch": 0.4361751152073733, "grad_norm": 0.7279346643764769, "learning_rate": 1.849650350811091e-06, "loss": 1.0133098363876343, "step": 1893 }, { "epoch": 0.43640552995391707, "grad_norm": 0.5859043876213773, "learning_rate": 1.8494493786823333e-06, "loss": 0.8320624828338623, "step": 1894 }, { "epoch": 0.43663594470046085, "grad_norm": 0.7240549495084485, "learning_rate": 1.8492482832571225e-06, "loss": 0.7757631540298462, "step": 1895 }, { "epoch": 0.4368663594470046, "grad_norm": 0.7606146142454437, "learning_rate": 1.8490470645646479e-06, "loss": 0.8503100872039795, "step": 1896 }, { "epoch": 0.43709677419354837, "grad_norm": 0.7560932530175453, "learning_rate": 1.8488457226341158e-06, "loss": 0.8145939707756042, "step": 1897 }, { "epoch": 0.43732718894009215, "grad_norm": 0.8041258430075643, "learning_rate": 1.848644257494751e-06, "loss": 0.831500232219696, "step": 1898 }, { "epoch": 0.43755760368663593, "grad_norm": 0.6473340838552745, "learning_rate": 1.8484426691757956e-06, "loss": 0.9340692758560181, "step": 1899 }, { "epoch": 0.4377880184331797, "grad_norm": 0.7851684163129825, "learning_rate": 1.8482409577065097e-06, "loss": 1.011988639831543, "step": 1900 }, { "epoch": 0.4380184331797235, "grad_norm": 0.6819650200659566, "learning_rate": 1.848039123116172e-06, "loss": 0.8110378980636597, "step": 1901 }, { "epoch": 0.4382488479262673, "grad_norm": 0.6310651453357742, "learning_rate": 1.8478371654340779e-06, "loss": 0.8230330944061279, "step": 1902 }, { "epoch": 0.43847926267281107, "grad_norm": 0.8335502206603579, "learning_rate": 1.8476350846895419e-06, "loss": 0.875052809715271, "step": 1903 }, { "epoch": 0.43870967741935485, "grad_norm": 0.7394371211482306, "learning_rate": 1.8474328809118953e-06, "loss": 0.9373071193695068, "step": 1904 }, { "epoch": 0.43894009216589863, "grad_norm": 0.7538115820848524, "learning_rate": 1.847230554130488e-06, "loss": 0.8341633677482605, "step": 1905 }, { "epoch": 0.4391705069124424, "grad_norm": 0.6579829053639499, "learning_rate": 1.8470281043746873e-06, "loss": 0.8147767782211304, "step": 1906 }, { "epoch": 0.4394009216589862, "grad_norm": 0.6022228592985512, "learning_rate": 1.8468255316738785e-06, "loss": 0.740512490272522, "step": 1907 }, { "epoch": 0.4396313364055299, "grad_norm": 0.7743265443588842, "learning_rate": 1.846622836057465e-06, "loss": 0.7754743099212646, "step": 1908 }, { "epoch": 0.4398617511520737, "grad_norm": 0.7535493986684056, "learning_rate": 1.8464200175548677e-06, "loss": 0.9131484031677246, "step": 1909 }, { "epoch": 0.4400921658986175, "grad_norm": 0.7099012564704421, "learning_rate": 1.8462170761955252e-06, "loss": 0.7084713578224182, "step": 1910 }, { "epoch": 0.4403225806451613, "grad_norm": 0.7949281739735957, "learning_rate": 1.8460140120088945e-06, "loss": 0.8535224199295044, "step": 1911 }, { "epoch": 0.44055299539170506, "grad_norm": 0.8579322326008002, "learning_rate": 1.8458108250244498e-06, "loss": 0.7661323547363281, "step": 1912 }, { "epoch": 0.44078341013824884, "grad_norm": 0.7355189670899542, "learning_rate": 1.8456075152716837e-06, "loss": 0.8064024448394775, "step": 1913 }, { "epoch": 0.4410138248847926, "grad_norm": 0.7422340222781728, "learning_rate": 1.8454040827801058e-06, "loss": 0.7858735918998718, "step": 1914 }, { "epoch": 0.4412442396313364, "grad_norm": 0.6589873136371734, "learning_rate": 1.8452005275792448e-06, "loss": 0.9251735210418701, "step": 1915 }, { "epoch": 0.4414746543778802, "grad_norm": 0.718018605876598, "learning_rate": 1.8449968496986461e-06, "loss": 0.7237124443054199, "step": 1916 }, { "epoch": 0.441705069124424, "grad_norm": 0.7573893032737062, "learning_rate": 1.8447930491678732e-06, "loss": 0.8939133882522583, "step": 1917 }, { "epoch": 0.44193548387096776, "grad_norm": 0.8373489922925343, "learning_rate": 1.8445891260165076e-06, "loss": 0.8815577626228333, "step": 1918 }, { "epoch": 0.44216589861751154, "grad_norm": 0.8703539982402225, "learning_rate": 1.8443850802741485e-06, "loss": 0.943426787853241, "step": 1919 }, { "epoch": 0.4423963133640553, "grad_norm": 0.6998600920537428, "learning_rate": 1.8441809119704126e-06, "loss": 0.8001632690429688, "step": 1920 }, { "epoch": 0.44262672811059905, "grad_norm": 0.8531362441371287, "learning_rate": 1.8439766211349352e-06, "loss": 0.8656308650970459, "step": 1921 }, { "epoch": 0.44285714285714284, "grad_norm": 0.7261410922718881, "learning_rate": 1.8437722077973686e-06, "loss": 0.9774024486541748, "step": 1922 }, { "epoch": 0.4430875576036866, "grad_norm": 0.728823767818971, "learning_rate": 1.8435676719873827e-06, "loss": 0.7655738592147827, "step": 1923 }, { "epoch": 0.4433179723502304, "grad_norm": 0.6595509202419896, "learning_rate": 1.8433630137346657e-06, "loss": 0.6455004811286926, "step": 1924 }, { "epoch": 0.4435483870967742, "grad_norm": 0.7214853647491487, "learning_rate": 1.8431582330689243e-06, "loss": 0.8221153020858765, "step": 1925 }, { "epoch": 0.44377880184331797, "grad_norm": 0.7718374957528886, "learning_rate": 1.8429533300198816e-06, "loss": 0.7878339886665344, "step": 1926 }, { "epoch": 0.44400921658986175, "grad_norm": 0.7666174978175726, "learning_rate": 1.8427483046172787e-06, "loss": 0.8292763829231262, "step": 1927 }, { "epoch": 0.44423963133640554, "grad_norm": 0.7395800766154846, "learning_rate": 1.842543156890875e-06, "loss": 0.7774572372436523, "step": 1928 }, { "epoch": 0.4444700460829493, "grad_norm": 0.7419338266362171, "learning_rate": 1.8423378868704476e-06, "loss": 0.7327601909637451, "step": 1929 }, { "epoch": 0.4447004608294931, "grad_norm": 0.7176112305038147, "learning_rate": 1.8421324945857909e-06, "loss": 0.8067511320114136, "step": 1930 }, { "epoch": 0.4449308755760369, "grad_norm": 0.780684647138278, "learning_rate": 1.8419269800667173e-06, "loss": 0.851010799407959, "step": 1931 }, { "epoch": 0.44516129032258067, "grad_norm": 0.7848772154457995, "learning_rate": 1.8417213433430576e-06, "loss": 0.8402234315872192, "step": 1932 }, { "epoch": 0.4453917050691244, "grad_norm": 0.7848428302916386, "learning_rate": 1.8415155844446591e-06, "loss": 0.8857355117797852, "step": 1933 }, { "epoch": 0.4456221198156682, "grad_norm": 0.6465222204250215, "learning_rate": 1.841309703401387e-06, "loss": 0.7517881393432617, "step": 1934 }, { "epoch": 0.44585253456221197, "grad_norm": 0.8220839741097039, "learning_rate": 1.8411037002431257e-06, "loss": 0.8583779335021973, "step": 1935 }, { "epoch": 0.44608294930875575, "grad_norm": 0.7149579567670102, "learning_rate": 1.8408975749997758e-06, "loss": 0.7691524028778076, "step": 1936 }, { "epoch": 0.44631336405529953, "grad_norm": 0.6891731440130011, "learning_rate": 1.8406913277012558e-06, "loss": 0.9164496660232544, "step": 1937 }, { "epoch": 0.4465437788018433, "grad_norm": 0.6382978906826758, "learning_rate": 1.8404849583775025e-06, "loss": 0.843226432800293, "step": 1938 }, { "epoch": 0.4467741935483871, "grad_norm": 0.843769912689158, "learning_rate": 1.8402784670584706e-06, "loss": 0.8492633104324341, "step": 1939 }, { "epoch": 0.4470046082949309, "grad_norm": 0.7117202181402426, "learning_rate": 1.8400718537741314e-06, "loss": 0.8088324069976807, "step": 1940 }, { "epoch": 0.44723502304147467, "grad_norm": 0.8584564611753391, "learning_rate": 1.8398651185544746e-06, "loss": 0.8879667520523071, "step": 1941 }, { "epoch": 0.44746543778801845, "grad_norm": 0.6515549607308898, "learning_rate": 1.8396582614295078e-06, "loss": 0.8926588892936707, "step": 1942 }, { "epoch": 0.44769585253456223, "grad_norm": 0.6885634929225364, "learning_rate": 1.8394512824292558e-06, "loss": 0.8007583618164062, "step": 1943 }, { "epoch": 0.447926267281106, "grad_norm": 0.6940540666117992, "learning_rate": 1.8392441815837613e-06, "loss": 0.7420827746391296, "step": 1944 }, { "epoch": 0.44815668202764974, "grad_norm": 0.6846873323136197, "learning_rate": 1.839036958923085e-06, "loss": 0.7653264999389648, "step": 1945 }, { "epoch": 0.4483870967741935, "grad_norm": 0.6684685460178057, "learning_rate": 1.838829614477305e-06, "loss": 0.886576771736145, "step": 1946 }, { "epoch": 0.4486175115207373, "grad_norm": 0.7769567865097903, "learning_rate": 1.8386221482765168e-06, "loss": 0.904376745223999, "step": 1947 }, { "epoch": 0.4488479262672811, "grad_norm": 0.6833196213451335, "learning_rate": 1.838414560350834e-06, "loss": 0.6791579723358154, "step": 1948 }, { "epoch": 0.4490783410138249, "grad_norm": 0.8296885335278092, "learning_rate": 1.838206850730388e-06, "loss": 0.9402183294296265, "step": 1949 }, { "epoch": 0.44930875576036866, "grad_norm": 0.9215175287627321, "learning_rate": 1.8379990194453265e-06, "loss": 0.9756022691726685, "step": 1950 }, { "epoch": 0.44953917050691244, "grad_norm": 0.9502651388093868, "learning_rate": 1.8377910665258173e-06, "loss": 0.7311051487922668, "step": 1951 }, { "epoch": 0.4497695852534562, "grad_norm": 0.5687721596613555, "learning_rate": 1.8375829920020438e-06, "loss": 0.6966956853866577, "step": 1952 }, { "epoch": 0.45, "grad_norm": 0.7191813033419734, "learning_rate": 1.8373747959042076e-06, "loss": 0.7327426671981812, "step": 1953 }, { "epoch": 0.4502304147465438, "grad_norm": 0.8067848664348717, "learning_rate": 1.8371664782625285e-06, "loss": 0.8650925755500793, "step": 1954 }, { "epoch": 0.4504608294930876, "grad_norm": 0.8028206677205298, "learning_rate": 1.8369580391072431e-06, "loss": 0.876739501953125, "step": 1955 }, { "epoch": 0.45069124423963136, "grad_norm": 0.7092651204784524, "learning_rate": 1.8367494784686066e-06, "loss": 0.7787455320358276, "step": 1956 }, { "epoch": 0.4509216589861751, "grad_norm": 0.7762123563340246, "learning_rate": 1.836540796376891e-06, "loss": 0.8874029517173767, "step": 1957 }, { "epoch": 0.4511520737327189, "grad_norm": 0.7670080315961673, "learning_rate": 1.8363319928623862e-06, "loss": 0.8944835662841797, "step": 1958 }, { "epoch": 0.45138248847926266, "grad_norm": 0.570293089893543, "learning_rate": 1.8361230679553996e-06, "loss": 0.7106739282608032, "step": 1959 }, { "epoch": 0.45161290322580644, "grad_norm": 0.7068996407627426, "learning_rate": 1.835914021686257e-06, "loss": 0.8668634295463562, "step": 1960 }, { "epoch": 0.4518433179723502, "grad_norm": 0.7818076957354034, "learning_rate": 1.8357048540853003e-06, "loss": 0.8123712539672852, "step": 1961 }, { "epoch": 0.452073732718894, "grad_norm": 0.7369058807274856, "learning_rate": 1.8354955651828907e-06, "loss": 0.865728497505188, "step": 1962 }, { "epoch": 0.4523041474654378, "grad_norm": 0.7502978391788373, "learning_rate": 1.8352861550094056e-06, "loss": 0.8066651225090027, "step": 1963 }, { "epoch": 0.4525345622119816, "grad_norm": 1.2076261262226256, "learning_rate": 1.835076623595241e-06, "loss": 1.020591139793396, "step": 1964 }, { "epoch": 0.45276497695852536, "grad_norm": 0.7642119123557376, "learning_rate": 1.83486697097081e-06, "loss": 0.839346706867218, "step": 1965 }, { "epoch": 0.45299539170506914, "grad_norm": 0.663652311830839, "learning_rate": 1.8346571971665434e-06, "loss": 0.7707340121269226, "step": 1966 }, { "epoch": 0.4532258064516129, "grad_norm": 0.6603686601649886, "learning_rate": 1.8344473022128897e-06, "loss": 0.7969534397125244, "step": 1967 }, { "epoch": 0.4534562211981567, "grad_norm": 0.8431782882642489, "learning_rate": 1.8342372861403143e-06, "loss": 0.9371283650398254, "step": 1968 }, { "epoch": 0.45368663594470043, "grad_norm": 0.7102966402282939, "learning_rate": 1.8340271489793015e-06, "loss": 0.7915256023406982, "step": 1969 }, { "epoch": 0.4539170506912442, "grad_norm": 0.6028172078632871, "learning_rate": 1.8338168907603522e-06, "loss": 0.8394884467124939, "step": 1970 }, { "epoch": 0.454147465437788, "grad_norm": 0.8133055611447335, "learning_rate": 1.833606511513985e-06, "loss": 0.7786067128181458, "step": 1971 }, { "epoch": 0.4543778801843318, "grad_norm": 0.905741517676821, "learning_rate": 1.833396011270736e-06, "loss": 0.9237443208694458, "step": 1972 }, { "epoch": 0.45460829493087557, "grad_norm": 0.9055049100464759, "learning_rate": 1.8331853900611596e-06, "loss": 0.7530162334442139, "step": 1973 }, { "epoch": 0.45483870967741935, "grad_norm": 0.7172947421019107, "learning_rate": 1.8329746479158263e-06, "loss": 0.8349624872207642, "step": 1974 }, { "epoch": 0.45506912442396313, "grad_norm": 0.9222448487169791, "learning_rate": 1.8327637848653259e-06, "loss": 0.8748637437820435, "step": 1975 }, { "epoch": 0.4552995391705069, "grad_norm": 0.7416851295200875, "learning_rate": 1.832552800940265e-06, "loss": 0.9111478924751282, "step": 1976 }, { "epoch": 0.4555299539170507, "grad_norm": 0.6251856024732342, "learning_rate": 1.8323416961712665e-06, "loss": 0.8108797073364258, "step": 1977 }, { "epoch": 0.4557603686635945, "grad_norm": 0.9459625715160394, "learning_rate": 1.832130470588973e-06, "loss": 0.9266520738601685, "step": 1978 }, { "epoch": 0.45599078341013827, "grad_norm": 0.7773850051724754, "learning_rate": 1.831919124224043e-06, "loss": 0.9092522859573364, "step": 1979 }, { "epoch": 0.45622119815668205, "grad_norm": 0.664954530341155, "learning_rate": 1.8317076571071536e-06, "loss": 0.8249068260192871, "step": 1980 }, { "epoch": 0.45645161290322583, "grad_norm": 0.770896895795481, "learning_rate": 1.8314960692689992e-06, "loss": 0.7497084140777588, "step": 1981 }, { "epoch": 0.45668202764976956, "grad_norm": 0.7450904317902424, "learning_rate": 1.8312843607402907e-06, "loss": 0.7360142469406128, "step": 1982 }, { "epoch": 0.45691244239631335, "grad_norm": 0.7224490513690306, "learning_rate": 1.8310725315517578e-06, "loss": 0.8443512320518494, "step": 1983 }, { "epoch": 0.45714285714285713, "grad_norm": 0.6770718154001021, "learning_rate": 1.830860581734147e-06, "loss": 0.7995656728744507, "step": 1984 }, { "epoch": 0.4573732718894009, "grad_norm": 0.8305927985197211, "learning_rate": 1.8306485113182229e-06, "loss": 0.7396436929702759, "step": 1985 }, { "epoch": 0.4576036866359447, "grad_norm": 0.7351757860546534, "learning_rate": 1.8304363203347668e-06, "loss": 0.7415385246276855, "step": 1986 }, { "epoch": 0.4578341013824885, "grad_norm": 0.8416697439034252, "learning_rate": 1.8302240088145784e-06, "loss": 0.9316694736480713, "step": 1987 }, { "epoch": 0.45806451612903226, "grad_norm": 0.6482250359686991, "learning_rate": 1.830011576788474e-06, "loss": 0.7692697048187256, "step": 1988 }, { "epoch": 0.45829493087557605, "grad_norm": 0.7546540101557039, "learning_rate": 1.829799024287288e-06, "loss": 0.8377524614334106, "step": 1989 }, { "epoch": 0.45852534562211983, "grad_norm": 0.800432018333432, "learning_rate": 1.8295863513418724e-06, "loss": 0.8005630970001221, "step": 1990 }, { "epoch": 0.4587557603686636, "grad_norm": 0.6132717130341248, "learning_rate": 1.829373557983096e-06, "loss": 0.8609297275543213, "step": 1991 }, { "epoch": 0.4589861751152074, "grad_norm": 0.7611348757483902, "learning_rate": 1.8291606442418454e-06, "loss": 0.9111521244049072, "step": 1992 }, { "epoch": 0.4592165898617512, "grad_norm": 0.6486046074488622, "learning_rate": 1.8289476101490254e-06, "loss": 0.7540388107299805, "step": 1993 }, { "epoch": 0.4594470046082949, "grad_norm": 0.7891604292973137, "learning_rate": 1.8287344557355565e-06, "loss": 0.9018936157226562, "step": 1994 }, { "epoch": 0.4596774193548387, "grad_norm": 0.8558307889574596, "learning_rate": 1.8285211810323791e-06, "loss": 0.918912947177887, "step": 1995 }, { "epoch": 0.4599078341013825, "grad_norm": 0.6889746928021416, "learning_rate": 1.8283077860704488e-06, "loss": 0.7777351140975952, "step": 1996 }, { "epoch": 0.46013824884792626, "grad_norm": 0.8546199279018112, "learning_rate": 1.82809427088074e-06, "loss": 0.9283437132835388, "step": 1997 }, { "epoch": 0.46036866359447004, "grad_norm": 0.7206983576837674, "learning_rate": 1.8278806354942442e-06, "loss": 0.7032894492149353, "step": 1998 }, { "epoch": 0.4605990783410138, "grad_norm": 0.7084552833839082, "learning_rate": 1.8276668799419696e-06, "loss": 0.8392905592918396, "step": 1999 }, { "epoch": 0.4608294930875576, "grad_norm": 0.8216520324249929, "learning_rate": 1.8274530042549434e-06, "loss": 0.8059369325637817, "step": 2000 }, { "epoch": 0.4610599078341014, "grad_norm": 0.7022225516164876, "learning_rate": 1.827239008464209e-06, "loss": 0.7738519906997681, "step": 2001 }, { "epoch": 0.4612903225806452, "grad_norm": 0.894321981759021, "learning_rate": 1.8270248926008275e-06, "loss": 0.9189014434814453, "step": 2002 }, { "epoch": 0.46152073732718896, "grad_norm": 0.9750927332357222, "learning_rate": 1.8268106566958782e-06, "loss": 0.8878552913665771, "step": 2003 }, { "epoch": 0.46175115207373274, "grad_norm": 0.7601663032895281, "learning_rate": 1.826596300780456e-06, "loss": 0.9786058664321899, "step": 2004 }, { "epoch": 0.4619815668202765, "grad_norm": 0.7513085122069586, "learning_rate": 1.8263818248856754e-06, "loss": 0.7887653112411499, "step": 2005 }, { "epoch": 0.46221198156682025, "grad_norm": 0.7571825247765968, "learning_rate": 1.8261672290426668e-06, "loss": 0.8773549795150757, "step": 2006 }, { "epoch": 0.46244239631336403, "grad_norm": 0.6543768471355319, "learning_rate": 1.8259525132825786e-06, "loss": 0.6929831504821777, "step": 2007 }, { "epoch": 0.4626728110599078, "grad_norm": 0.8544099497368944, "learning_rate": 1.8257376776365765e-06, "loss": 0.9438232183456421, "step": 2008 }, { "epoch": 0.4629032258064516, "grad_norm": 0.6803330432545487, "learning_rate": 1.8255227221358435e-06, "loss": 0.7559594511985779, "step": 2009 }, { "epoch": 0.4631336405529954, "grad_norm": 0.7347158890455135, "learning_rate": 1.8253076468115805e-06, "loss": 0.8990212678909302, "step": 2010 }, { "epoch": 0.46336405529953917, "grad_norm": 0.7325838411869188, "learning_rate": 1.825092451695005e-06, "loss": 0.8638331890106201, "step": 2011 }, { "epoch": 0.46359447004608295, "grad_norm": 0.7537964319175384, "learning_rate": 1.8248771368173522e-06, "loss": 0.9262570142745972, "step": 2012 }, { "epoch": 0.46382488479262673, "grad_norm": 0.770620841657562, "learning_rate": 1.8246617022098754e-06, "loss": 0.7412514090538025, "step": 2013 }, { "epoch": 0.4640552995391705, "grad_norm": 0.8304378021605247, "learning_rate": 1.8244461479038437e-06, "loss": 0.8680287599563599, "step": 2014 }, { "epoch": 0.4642857142857143, "grad_norm": 0.7004084931574237, "learning_rate": 1.8242304739305457e-06, "loss": 0.7774302959442139, "step": 2015 }, { "epoch": 0.4645161290322581, "grad_norm": 0.8275882534036313, "learning_rate": 1.824014680321285e-06, "loss": 0.9278442859649658, "step": 2016 }, { "epoch": 0.46474654377880187, "grad_norm": 0.6808747325759799, "learning_rate": 1.8237987671073846e-06, "loss": 0.9617106914520264, "step": 2017 }, { "epoch": 0.4649769585253456, "grad_norm": 0.682915952128137, "learning_rate": 1.8235827343201838e-06, "loss": 0.7983255386352539, "step": 2018 }, { "epoch": 0.4652073732718894, "grad_norm": 0.7878897167758285, "learning_rate": 1.8233665819910393e-06, "loss": 0.7966747283935547, "step": 2019 }, { "epoch": 0.46543778801843316, "grad_norm": 0.893729443286113, "learning_rate": 1.8231503101513253e-06, "loss": 0.8977803587913513, "step": 2020 }, { "epoch": 0.46566820276497695, "grad_norm": 0.6522874054217892, "learning_rate": 1.8229339188324334e-06, "loss": 0.7098231911659241, "step": 2021 }, { "epoch": 0.46589861751152073, "grad_norm": 0.6971785978535421, "learning_rate": 1.822717408065773e-06, "loss": 0.6402776837348938, "step": 2022 }, { "epoch": 0.4661290322580645, "grad_norm": 0.7272467550896602, "learning_rate": 1.8225007778827698e-06, "loss": 0.797479510307312, "step": 2023 }, { "epoch": 0.4663594470046083, "grad_norm": 0.7464543289112394, "learning_rate": 1.8222840283148675e-06, "loss": 0.8205317258834839, "step": 2024 }, { "epoch": 0.4665898617511521, "grad_norm": 0.755319646803663, "learning_rate": 1.822067159393527e-06, "loss": 0.8123108148574829, "step": 2025 }, { "epoch": 0.46682027649769586, "grad_norm": 0.7470494916721893, "learning_rate": 1.8218501711502262e-06, "loss": 0.9103116989135742, "step": 2026 }, { "epoch": 0.46705069124423965, "grad_norm": 0.8399971318490079, "learning_rate": 1.8216330636164617e-06, "loss": 0.725040078163147, "step": 2027 }, { "epoch": 0.46728110599078343, "grad_norm": 0.8693243601175246, "learning_rate": 1.8214158368237456e-06, "loss": 0.8598217964172363, "step": 2028 }, { "epoch": 0.4675115207373272, "grad_norm": 0.9587381766929439, "learning_rate": 1.821198490803608e-06, "loss": 0.9139465093612671, "step": 2029 }, { "epoch": 0.46774193548387094, "grad_norm": 0.7850806397253399, "learning_rate": 1.8209810255875966e-06, "loss": 0.8331620097160339, "step": 2030 }, { "epoch": 0.4679723502304147, "grad_norm": 0.8908286579751021, "learning_rate": 1.8207634412072764e-06, "loss": 0.7901387810707092, "step": 2031 }, { "epoch": 0.4682027649769585, "grad_norm": 0.6861413854458724, "learning_rate": 1.8205457376942288e-06, "loss": 0.7651060819625854, "step": 2032 }, { "epoch": 0.4684331797235023, "grad_norm": 0.7738923235394239, "learning_rate": 1.820327915080054e-06, "loss": 0.7382134199142456, "step": 2033 }, { "epoch": 0.4686635944700461, "grad_norm": 0.6962774548883505, "learning_rate": 1.8201099733963682e-06, "loss": 0.7851507067680359, "step": 2034 }, { "epoch": 0.46889400921658986, "grad_norm": 0.8995005169228616, "learning_rate": 1.8198919126748056e-06, "loss": 0.9357708692550659, "step": 2035 }, { "epoch": 0.46912442396313364, "grad_norm": 0.8238296907521364, "learning_rate": 1.819673732947017e-06, "loss": 0.8188502788543701, "step": 2036 }, { "epoch": 0.4693548387096774, "grad_norm": 1.0258349340262545, "learning_rate": 1.8194554342446712e-06, "loss": 0.81590735912323, "step": 2037 }, { "epoch": 0.4695852534562212, "grad_norm": 0.811644542087897, "learning_rate": 1.8192370165994544e-06, "loss": 0.6879743933677673, "step": 2038 }, { "epoch": 0.469815668202765, "grad_norm": 0.8669848845646889, "learning_rate": 1.8190184800430686e-06, "loss": 0.9287742376327515, "step": 2039 }, { "epoch": 0.4700460829493088, "grad_norm": 0.9807524438459786, "learning_rate": 1.818799824607235e-06, "loss": 0.9625484943389893, "step": 2040 }, { "epoch": 0.47027649769585256, "grad_norm": 0.8259194997097902, "learning_rate": 1.8185810503236904e-06, "loss": 0.8267782926559448, "step": 2041 }, { "epoch": 0.4705069124423963, "grad_norm": 0.8404148332122154, "learning_rate": 1.8183621572241904e-06, "loss": 0.8827054500579834, "step": 2042 }, { "epoch": 0.47073732718894007, "grad_norm": 0.7550183773883651, "learning_rate": 1.8181431453405067e-06, "loss": 0.7755721807479858, "step": 2043 }, { "epoch": 0.47096774193548385, "grad_norm": 0.9234865066349518, "learning_rate": 1.8179240147044285e-06, "loss": 0.8320283889770508, "step": 2044 }, { "epoch": 0.47119815668202764, "grad_norm": 0.7077773446032107, "learning_rate": 1.8177047653477619e-06, "loss": 0.8737574815750122, "step": 2045 }, { "epoch": 0.4714285714285714, "grad_norm": 0.8821209974643925, "learning_rate": 1.8174853973023317e-06, "loss": 0.7007719278335571, "step": 2046 }, { "epoch": 0.4716589861751152, "grad_norm": 0.822666216900424, "learning_rate": 1.817265910599978e-06, "loss": 0.8062577247619629, "step": 2047 }, { "epoch": 0.471889400921659, "grad_norm": 0.6775605665320994, "learning_rate": 1.8170463052725594e-06, "loss": 0.7059667110443115, "step": 2048 }, { "epoch": 0.47211981566820277, "grad_norm": 0.7830423922028903, "learning_rate": 1.816826581351951e-06, "loss": 0.9025841951370239, "step": 2049 }, { "epoch": 0.47235023041474655, "grad_norm": 0.8388278274768075, "learning_rate": 1.8166067388700458e-06, "loss": 0.7534186840057373, "step": 2050 }, { "epoch": 0.47258064516129034, "grad_norm": 0.7623620329649421, "learning_rate": 1.8163867778587534e-06, "loss": 0.9447616338729858, "step": 2051 }, { "epoch": 0.4728110599078341, "grad_norm": 0.6423913345578718, "learning_rate": 1.8161666983500012e-06, "loss": 0.7092128992080688, "step": 2052 }, { "epoch": 0.4730414746543779, "grad_norm": 0.8648864734786782, "learning_rate": 1.815946500375733e-06, "loss": 0.8689497113227844, "step": 2053 }, { "epoch": 0.4732718894009217, "grad_norm": 0.8941588190294093, "learning_rate": 1.8157261839679105e-06, "loss": 0.9298638105392456, "step": 2054 }, { "epoch": 0.4735023041474654, "grad_norm": 0.6527064378770876, "learning_rate": 1.8155057491585125e-06, "loss": 0.7138030529022217, "step": 2055 }, { "epoch": 0.4737327188940092, "grad_norm": 0.6699370139228978, "learning_rate": 1.815285195979534e-06, "loss": 0.825221836566925, "step": 2056 }, { "epoch": 0.473963133640553, "grad_norm": 0.8559190132682327, "learning_rate": 1.8150645244629891e-06, "loss": 0.8643208742141724, "step": 2057 }, { "epoch": 0.47419354838709676, "grad_norm": 0.8338353738235549, "learning_rate": 1.8148437346409073e-06, "loss": 0.9611828327178955, "step": 2058 }, { "epoch": 0.47442396313364055, "grad_norm": 0.8119567978397472, "learning_rate": 1.8146228265453363e-06, "loss": 0.8609912991523743, "step": 2059 }, { "epoch": 0.47465437788018433, "grad_norm": 0.7540582566966652, "learning_rate": 1.8144018002083404e-06, "loss": 0.8277603387832642, "step": 2060 }, { "epoch": 0.4748847926267281, "grad_norm": 0.8438703930452028, "learning_rate": 1.814180655662001e-06, "loss": 0.8601360321044922, "step": 2061 }, { "epoch": 0.4751152073732719, "grad_norm": 0.7023202538855939, "learning_rate": 1.8139593929384178e-06, "loss": 0.8454653024673462, "step": 2062 }, { "epoch": 0.4753456221198157, "grad_norm": 0.8270167900724995, "learning_rate": 1.8137380120697059e-06, "loss": 0.870082437992096, "step": 2063 }, { "epoch": 0.47557603686635946, "grad_norm": 0.8497953303327396, "learning_rate": 1.8135165130879988e-06, "loss": 0.8064073324203491, "step": 2064 }, { "epoch": 0.47580645161290325, "grad_norm": 0.5532170457954219, "learning_rate": 1.813294896025447e-06, "loss": 0.829608678817749, "step": 2065 }, { "epoch": 0.47603686635944703, "grad_norm": 0.7131662100806325, "learning_rate": 1.8130731609142176e-06, "loss": 0.8185791969299316, "step": 2066 }, { "epoch": 0.47626728110599076, "grad_norm": 0.9405207635689381, "learning_rate": 1.812851307786495e-06, "loss": 0.8855293989181519, "step": 2067 }, { "epoch": 0.47649769585253454, "grad_norm": 0.6766659884445188, "learning_rate": 1.8126293366744815e-06, "loss": 0.7495461106300354, "step": 2068 }, { "epoch": 0.4767281105990783, "grad_norm": 0.9706294845402844, "learning_rate": 1.8124072476103956e-06, "loss": 0.9435098171234131, "step": 2069 }, { "epoch": 0.4769585253456221, "grad_norm": 0.7637936743615437, "learning_rate": 1.8121850406264727e-06, "loss": 0.9299448728561401, "step": 2070 }, { "epoch": 0.4771889400921659, "grad_norm": 0.9500813357187163, "learning_rate": 1.8119627157549665e-06, "loss": 0.9011991024017334, "step": 2071 }, { "epoch": 0.4774193548387097, "grad_norm": 0.6847341374863515, "learning_rate": 1.8117402730281476e-06, "loss": 0.7326598167419434, "step": 2072 }, { "epoch": 0.47764976958525346, "grad_norm": 0.7364560962143368, "learning_rate": 1.8115177124783024e-06, "loss": 0.8137445449829102, "step": 2073 }, { "epoch": 0.47788018433179724, "grad_norm": 0.9429635333298672, "learning_rate": 1.811295034137735e-06, "loss": 0.8653519153594971, "step": 2074 }, { "epoch": 0.478110599078341, "grad_norm": 0.8511205154632088, "learning_rate": 1.811072238038768e-06, "loss": 0.9140677452087402, "step": 2075 }, { "epoch": 0.4783410138248848, "grad_norm": 0.8012710450337872, "learning_rate": 1.810849324213739e-06, "loss": 0.8878934979438782, "step": 2076 }, { "epoch": 0.4785714285714286, "grad_norm": 0.6571390792752639, "learning_rate": 1.8106262926950045e-06, "loss": 0.8238190412521362, "step": 2077 }, { "epoch": 0.4788018433179724, "grad_norm": 0.8097531572330602, "learning_rate": 1.8104031435149362e-06, "loss": 0.7722488641738892, "step": 2078 }, { "epoch": 0.4790322580645161, "grad_norm": 0.890992078514086, "learning_rate": 1.8101798767059248e-06, "loss": 0.9338192939758301, "step": 2079 }, { "epoch": 0.4792626728110599, "grad_norm": 0.8000986035452533, "learning_rate": 1.8099564923003767e-06, "loss": 0.7342168688774109, "step": 2080 }, { "epoch": 0.47949308755760367, "grad_norm": 0.7644530181466097, "learning_rate": 1.809732990330716e-06, "loss": 0.8445772528648376, "step": 2081 }, { "epoch": 0.47972350230414745, "grad_norm": 0.7291725333905612, "learning_rate": 1.8095093708293839e-06, "loss": 0.825678825378418, "step": 2082 }, { "epoch": 0.47995391705069124, "grad_norm": 0.8072481370959372, "learning_rate": 1.8092856338288381e-06, "loss": 0.7995405197143555, "step": 2083 }, { "epoch": 0.480184331797235, "grad_norm": 0.8193777121106555, "learning_rate": 1.8090617793615536e-06, "loss": 0.7811745405197144, "step": 2084 }, { "epoch": 0.4804147465437788, "grad_norm": 0.7364459454678961, "learning_rate": 1.8088378074600231e-06, "loss": 0.842727780342102, "step": 2085 }, { "epoch": 0.4806451612903226, "grad_norm": 0.7640299868769393, "learning_rate": 1.808613718156756e-06, "loss": 0.840941309928894, "step": 2086 }, { "epoch": 0.48087557603686637, "grad_norm": 0.7783965916533324, "learning_rate": 1.808389511484278e-06, "loss": 0.9024466872215271, "step": 2087 }, { "epoch": 0.48110599078341015, "grad_norm": 0.8943218774431004, "learning_rate": 1.8081651874751325e-06, "loss": 0.9112771153450012, "step": 2088 }, { "epoch": 0.48133640552995394, "grad_norm": 0.6675207900987881, "learning_rate": 1.8079407461618797e-06, "loss": 0.834719181060791, "step": 2089 }, { "epoch": 0.4815668202764977, "grad_norm": 0.8421358450475633, "learning_rate": 1.8077161875770971e-06, "loss": 0.8472555875778198, "step": 2090 }, { "epoch": 0.48179723502304145, "grad_norm": 0.7303169649115268, "learning_rate": 1.8074915117533796e-06, "loss": 0.8459140062332153, "step": 2091 }, { "epoch": 0.48202764976958523, "grad_norm": 0.6945162401362365, "learning_rate": 1.807266718723338e-06, "loss": 0.6570066213607788, "step": 2092 }, { "epoch": 0.482258064516129, "grad_norm": 0.7314212575092469, "learning_rate": 1.8070418085196006e-06, "loss": 0.8897342681884766, "step": 2093 }, { "epoch": 0.4824884792626728, "grad_norm": 0.8312385191950623, "learning_rate": 1.8068167811748132e-06, "loss": 0.8339060544967651, "step": 2094 }, { "epoch": 0.4827188940092166, "grad_norm": 0.7547678583050421, "learning_rate": 1.8065916367216383e-06, "loss": 0.7972484827041626, "step": 2095 }, { "epoch": 0.48294930875576036, "grad_norm": 0.7424060773179767, "learning_rate": 1.806366375192755e-06, "loss": 0.7894760966300964, "step": 2096 }, { "epoch": 0.48317972350230415, "grad_norm": 0.7408232706643347, "learning_rate": 1.8061409966208597e-06, "loss": 0.713944673538208, "step": 2097 }, { "epoch": 0.48341013824884793, "grad_norm": 0.8423029874540192, "learning_rate": 1.8059155010386662e-06, "loss": 0.7832180261611938, "step": 2098 }, { "epoch": 0.4836405529953917, "grad_norm": 0.6563887159918735, "learning_rate": 1.8056898884789043e-06, "loss": 0.8873809576034546, "step": 2099 }, { "epoch": 0.4838709677419355, "grad_norm": 0.8864132111812594, "learning_rate": 1.8054641589743218e-06, "loss": 0.8174929618835449, "step": 2100 }, { "epoch": 0.4841013824884793, "grad_norm": 0.6797946394214075, "learning_rate": 1.805238312557683e-06, "loss": 0.876921534538269, "step": 2101 }, { "epoch": 0.48433179723502306, "grad_norm": 0.7629892942789464, "learning_rate": 1.8050123492617693e-06, "loss": 0.9455937147140503, "step": 2102 }, { "epoch": 0.4845622119815668, "grad_norm": 0.6880522665173857, "learning_rate": 1.8047862691193784e-06, "loss": 0.8146508932113647, "step": 2103 }, { "epoch": 0.4847926267281106, "grad_norm": 0.762873599305404, "learning_rate": 1.8045600721633262e-06, "loss": 0.8513495326042175, "step": 2104 }, { "epoch": 0.48502304147465436, "grad_norm": 0.8329533644475985, "learning_rate": 1.8043337584264443e-06, "loss": 0.8430027961730957, "step": 2105 }, { "epoch": 0.48525345622119814, "grad_norm": 0.6323595862794837, "learning_rate": 1.8041073279415826e-06, "loss": 0.7683960199356079, "step": 2106 }, { "epoch": 0.4854838709677419, "grad_norm": 0.6620613064117244, "learning_rate": 1.8038807807416067e-06, "loss": 0.7099664211273193, "step": 2107 }, { "epoch": 0.4857142857142857, "grad_norm": 0.725415262213876, "learning_rate": 1.8036541168593994e-06, "loss": 0.8046330213546753, "step": 2108 }, { "epoch": 0.4859447004608295, "grad_norm": 0.7817858416968994, "learning_rate": 1.803427336327861e-06, "loss": 0.8387504816055298, "step": 2109 }, { "epoch": 0.4861751152073733, "grad_norm": 0.7135784962709865, "learning_rate": 1.8032004391799085e-06, "loss": 0.883955717086792, "step": 2110 }, { "epoch": 0.48640552995391706, "grad_norm": 0.7408960119431725, "learning_rate": 1.8029734254484756e-06, "loss": 0.7622070908546448, "step": 2111 }, { "epoch": 0.48663594470046084, "grad_norm": 0.7726145388563513, "learning_rate": 1.802746295166513e-06, "loss": 0.6625584363937378, "step": 2112 }, { "epoch": 0.4868663594470046, "grad_norm": 0.8189497209718242, "learning_rate": 1.8025190483669878e-06, "loss": 0.8232327699661255, "step": 2113 }, { "epoch": 0.4870967741935484, "grad_norm": 0.8528139298235252, "learning_rate": 1.8022916850828857e-06, "loss": 0.9083148241043091, "step": 2114 }, { "epoch": 0.4873271889400922, "grad_norm": 0.7392938308731752, "learning_rate": 1.8020642053472074e-06, "loss": 0.8248398303985596, "step": 2115 }, { "epoch": 0.4875576036866359, "grad_norm": 0.7121240208517446, "learning_rate": 1.8018366091929717e-06, "loss": 0.8055423498153687, "step": 2116 }, { "epoch": 0.4877880184331797, "grad_norm": 0.778973471543998, "learning_rate": 1.8016088966532135e-06, "loss": 0.8716787695884705, "step": 2117 }, { "epoch": 0.4880184331797235, "grad_norm": 0.7561230225795058, "learning_rate": 1.801381067760985e-06, "loss": 0.8530780673027039, "step": 2118 }, { "epoch": 0.48824884792626727, "grad_norm": 0.6774037273322415, "learning_rate": 1.8011531225493557e-06, "loss": 0.7958484888076782, "step": 2119 }, { "epoch": 0.48847926267281105, "grad_norm": 0.8596146173926187, "learning_rate": 1.800925061051411e-06, "loss": 0.8312872648239136, "step": 2120 }, { "epoch": 0.48870967741935484, "grad_norm": 0.8135900564482533, "learning_rate": 1.8006968833002541e-06, "loss": 0.8097391128540039, "step": 2121 }, { "epoch": 0.4889400921658986, "grad_norm": 0.9139337120301166, "learning_rate": 1.8004685893290046e-06, "loss": 0.8636112213134766, "step": 2122 }, { "epoch": 0.4891705069124424, "grad_norm": 0.9088930992891967, "learning_rate": 1.800240179170799e-06, "loss": 0.9122721552848816, "step": 2123 }, { "epoch": 0.4894009216589862, "grad_norm": 0.914017678688966, "learning_rate": 1.8000116528587907e-06, "loss": 0.8172330856323242, "step": 2124 }, { "epoch": 0.48963133640552997, "grad_norm": 0.8007018337125341, "learning_rate": 1.7997830104261502e-06, "loss": 0.7377575635910034, "step": 2125 }, { "epoch": 0.48986175115207375, "grad_norm": 0.9218847107737449, "learning_rate": 1.7995542519060644e-06, "loss": 0.7278136014938354, "step": 2126 }, { "epoch": 0.49009216589861754, "grad_norm": 0.8808842591031234, "learning_rate": 1.7993253773317374e-06, "loss": 0.8977715969085693, "step": 2127 }, { "epoch": 0.49032258064516127, "grad_norm": 0.7019593909183576, "learning_rate": 1.7990963867363902e-06, "loss": 0.789979100227356, "step": 2128 }, { "epoch": 0.49055299539170505, "grad_norm": 0.7069412826082713, "learning_rate": 1.7988672801532602e-06, "loss": 0.8304328322410583, "step": 2129 }, { "epoch": 0.49078341013824883, "grad_norm": 0.7922910084647693, "learning_rate": 1.7986380576156019e-06, "loss": 0.7597516179084778, "step": 2130 }, { "epoch": 0.4910138248847926, "grad_norm": 0.6007262757544611, "learning_rate": 1.7984087191566873e-06, "loss": 0.661639928817749, "step": 2131 }, { "epoch": 0.4912442396313364, "grad_norm": 0.7484873666922557, "learning_rate": 1.7981792648098035e-06, "loss": 0.7871333360671997, "step": 2132 }, { "epoch": 0.4914746543778802, "grad_norm": 0.7758289248832314, "learning_rate": 1.7979496946082565e-06, "loss": 0.8166402578353882, "step": 2133 }, { "epoch": 0.49170506912442397, "grad_norm": 0.6906377275927077, "learning_rate": 1.7977200085853674e-06, "loss": 0.7112412452697754, "step": 2134 }, { "epoch": 0.49193548387096775, "grad_norm": 0.8103572300867555, "learning_rate": 1.7974902067744752e-06, "loss": 0.8358132839202881, "step": 2135 }, { "epoch": 0.49216589861751153, "grad_norm": 0.7103875590554449, "learning_rate": 1.7972602892089353e-06, "loss": 0.8544377088546753, "step": 2136 }, { "epoch": 0.4923963133640553, "grad_norm": 0.9004573017295656, "learning_rate": 1.7970302559221197e-06, "loss": 1.0105161666870117, "step": 2137 }, { "epoch": 0.4926267281105991, "grad_norm": 0.7525179633837843, "learning_rate": 1.7968001069474176e-06, "loss": 0.7666197419166565, "step": 2138 }, { "epoch": 0.4928571428571429, "grad_norm": 0.9209694432294897, "learning_rate": 1.7965698423182349e-06, "loss": 0.9250742197036743, "step": 2139 }, { "epoch": 0.4930875576036866, "grad_norm": 0.8066717978287462, "learning_rate": 1.7963394620679942e-06, "loss": 0.8269995450973511, "step": 2140 }, { "epoch": 0.4933179723502304, "grad_norm": 0.9533305612537857, "learning_rate": 1.7961089662301346e-06, "loss": 1.0431339740753174, "step": 2141 }, { "epoch": 0.4935483870967742, "grad_norm": 0.7107784117562762, "learning_rate": 1.7958783548381125e-06, "loss": 0.7474809288978577, "step": 2142 }, { "epoch": 0.49377880184331796, "grad_norm": 0.7729911498332706, "learning_rate": 1.7956476279254007e-06, "loss": 0.8850520849227905, "step": 2143 }, { "epoch": 0.49400921658986174, "grad_norm": 0.8566824172714074, "learning_rate": 1.7954167855254893e-06, "loss": 0.8898880481719971, "step": 2144 }, { "epoch": 0.4942396313364055, "grad_norm": 0.886855392770134, "learning_rate": 1.7951858276718842e-06, "loss": 0.8718239068984985, "step": 2145 }, { "epoch": 0.4944700460829493, "grad_norm": 0.7604278475621951, "learning_rate": 1.794954754398109e-06, "loss": 0.8407484292984009, "step": 2146 }, { "epoch": 0.4947004608294931, "grad_norm": 0.9582215314216729, "learning_rate": 1.7947235657377036e-06, "loss": 0.8453764915466309, "step": 2147 }, { "epoch": 0.4949308755760369, "grad_norm": 0.6332693049941237, "learning_rate": 1.794492261724225e-06, "loss": 0.5795568227767944, "step": 2148 }, { "epoch": 0.49516129032258066, "grad_norm": 0.9864343717736791, "learning_rate": 1.794260842391246e-06, "loss": 0.8601347208023071, "step": 2149 }, { "epoch": 0.49539170506912444, "grad_norm": 0.8909931853274754, "learning_rate": 1.7940293077723573e-06, "loss": 0.8328324556350708, "step": 2150 }, { "epoch": 0.4956221198156682, "grad_norm": 0.6691517417241877, "learning_rate": 1.7937976579011655e-06, "loss": 0.8924463391304016, "step": 2151 }, { "epoch": 0.49585253456221196, "grad_norm": 0.7983254161536232, "learning_rate": 1.7935658928112947e-06, "loss": 0.9725968837738037, "step": 2152 }, { "epoch": 0.49608294930875574, "grad_norm": 0.7649378566504706, "learning_rate": 1.7933340125363855e-06, "loss": 0.7814322710037231, "step": 2153 }, { "epoch": 0.4963133640552995, "grad_norm": 0.795129549448148, "learning_rate": 1.793102017110094e-06, "loss": 0.8022886514663696, "step": 2154 }, { "epoch": 0.4965437788018433, "grad_norm": 0.9455352743035539, "learning_rate": 1.7928699065660951e-06, "loss": 0.9747333526611328, "step": 2155 }, { "epoch": 0.4967741935483871, "grad_norm": 1.0353782305768249, "learning_rate": 1.7926376809380783e-06, "loss": 0.9039797782897949, "step": 2156 }, { "epoch": 0.49700460829493087, "grad_norm": 1.000992925643121, "learning_rate": 1.7924053402597518e-06, "loss": 0.9444677829742432, "step": 2157 }, { "epoch": 0.49723502304147466, "grad_norm": 0.7688551400180308, "learning_rate": 1.7921728845648393e-06, "loss": 0.8442031741142273, "step": 2158 }, { "epoch": 0.49746543778801844, "grad_norm": 0.8590371435800439, "learning_rate": 1.7919403138870813e-06, "loss": 0.9410362839698792, "step": 2159 }, { "epoch": 0.4976958525345622, "grad_norm": 0.8168398725206235, "learning_rate": 1.791707628260235e-06, "loss": 0.8929172158241272, "step": 2160 }, { "epoch": 0.497926267281106, "grad_norm": 0.970370102226972, "learning_rate": 1.7914748277180745e-06, "loss": 0.9259560108184814, "step": 2161 }, { "epoch": 0.4981566820276498, "grad_norm": 0.7778204252845836, "learning_rate": 1.7912419122943904e-06, "loss": 0.8201638460159302, "step": 2162 }, { "epoch": 0.49838709677419357, "grad_norm": 0.7628075269760098, "learning_rate": 1.7910088820229907e-06, "loss": 0.7554556131362915, "step": 2163 }, { "epoch": 0.4986175115207373, "grad_norm": 0.7698860809397133, "learning_rate": 1.7907757369376984e-06, "loss": 0.8206801414489746, "step": 2164 }, { "epoch": 0.4988479262672811, "grad_norm": 0.7606971261006891, "learning_rate": 1.7905424770723551e-06, "loss": 0.765400767326355, "step": 2165 }, { "epoch": 0.49907834101382487, "grad_norm": 0.9629614917036793, "learning_rate": 1.7903091024608177e-06, "loss": 0.9191527366638184, "step": 2166 }, { "epoch": 0.49930875576036865, "grad_norm": 1.0883591834210613, "learning_rate": 1.7900756131369601e-06, "loss": 0.8515042662620544, "step": 2167 }, { "epoch": 0.49953917050691243, "grad_norm": 0.7623230395498896, "learning_rate": 1.7898420091346736e-06, "loss": 0.8509752750396729, "step": 2168 }, { "epoch": 0.4997695852534562, "grad_norm": 0.7417934516303272, "learning_rate": 1.7896082904878647e-06, "loss": 0.8007084131240845, "step": 2169 }, { "epoch": 0.5, "grad_norm": 0.8597818097533757, "learning_rate": 1.789374457230458e-06, "loss": 0.8395413756370544, "step": 2170 }, { "epoch": 0.5002304147465437, "grad_norm": 0.7232889708808644, "learning_rate": 1.7891405093963937e-06, "loss": 0.8624853491783142, "step": 2171 }, { "epoch": 0.5004608294930876, "grad_norm": 0.6629899968556545, "learning_rate": 1.788906447019629e-06, "loss": 0.8141548037528992, "step": 2172 }, { "epoch": 0.5006912442396313, "grad_norm": 0.6495144260680482, "learning_rate": 1.7886722701341382e-06, "loss": 0.6764500141143799, "step": 2173 }, { "epoch": 0.5009216589861751, "grad_norm": 0.6701022764652186, "learning_rate": 1.7884379787739112e-06, "loss": 0.710756778717041, "step": 2174 }, { "epoch": 0.5011520737327189, "grad_norm": 0.8273999117205362, "learning_rate": 1.7882035729729555e-06, "loss": 0.8090574145317078, "step": 2175 }, { "epoch": 0.5013824884792627, "grad_norm": 0.6977221855783239, "learning_rate": 1.7879690527652943e-06, "loss": 0.7639138102531433, "step": 2176 }, { "epoch": 0.5016129032258064, "grad_norm": 0.9185836860641033, "learning_rate": 1.7877344181849687e-06, "loss": 0.8093903660774231, "step": 2177 }, { "epoch": 0.5018433179723503, "grad_norm": 0.7610855435865236, "learning_rate": 1.7874996692660348e-06, "loss": 0.8705824017524719, "step": 2178 }, { "epoch": 0.502073732718894, "grad_norm": 0.7815265219501579, "learning_rate": 1.7872648060425666e-06, "loss": 0.7365947961807251, "step": 2179 }, { "epoch": 0.5023041474654378, "grad_norm": 0.8989287933893153, "learning_rate": 1.787029828548654e-06, "loss": 0.9405299425125122, "step": 2180 }, { "epoch": 0.5025345622119816, "grad_norm": 0.907417749032586, "learning_rate": 1.7867947368184036e-06, "loss": 0.9232017993927002, "step": 2181 }, { "epoch": 0.5027649769585254, "grad_norm": 1.0801728154122552, "learning_rate": 1.7865595308859388e-06, "loss": 0.9941537380218506, "step": 2182 }, { "epoch": 0.5029953917050691, "grad_norm": 0.7341611336832391, "learning_rate": 1.7863242107853993e-06, "loss": 0.6981802582740784, "step": 2183 }, { "epoch": 0.5032258064516129, "grad_norm": 0.8346521198909456, "learning_rate": 1.7860887765509417e-06, "loss": 0.8155109882354736, "step": 2184 }, { "epoch": 0.5034562211981567, "grad_norm": 0.8846374910749497, "learning_rate": 1.7858532282167385e-06, "loss": 0.7246255874633789, "step": 2185 }, { "epoch": 0.5036866359447004, "grad_norm": 0.7027049895049993, "learning_rate": 1.7856175658169796e-06, "loss": 0.7042064666748047, "step": 2186 }, { "epoch": 0.5039170506912443, "grad_norm": 0.8633735424450812, "learning_rate": 1.7853817893858714e-06, "loss": 0.7522145509719849, "step": 2187 }, { "epoch": 0.504147465437788, "grad_norm": 0.8170927084265063, "learning_rate": 1.7851458989576359e-06, "loss": 1.0157709121704102, "step": 2188 }, { "epoch": 0.5043778801843318, "grad_norm": 0.8537305826863457, "learning_rate": 1.7849098945665127e-06, "loss": 0.7096433639526367, "step": 2189 }, { "epoch": 0.5046082949308756, "grad_norm": 0.8293401368813538, "learning_rate": 1.7846737762467572e-06, "loss": 0.7743037939071655, "step": 2190 }, { "epoch": 0.5048387096774194, "grad_norm": 0.802261593558941, "learning_rate": 1.784437544032642e-06, "loss": 0.7907241582870483, "step": 2191 }, { "epoch": 0.5050691244239631, "grad_norm": 0.9488985791352184, "learning_rate": 1.7842011979584557e-06, "loss": 0.8692185878753662, "step": 2192 }, { "epoch": 0.505299539170507, "grad_norm": 1.0636987469588612, "learning_rate": 1.783964738058504e-06, "loss": 0.9678715467453003, "step": 2193 }, { "epoch": 0.5055299539170507, "grad_norm": 0.7713527005281836, "learning_rate": 1.7837281643671077e-06, "loss": 0.855170726776123, "step": 2194 }, { "epoch": 0.5057603686635944, "grad_norm": 0.7469430705420217, "learning_rate": 1.7834914769186065e-06, "loss": 0.8452733755111694, "step": 2195 }, { "epoch": 0.5059907834101383, "grad_norm": 0.6866121153572871, "learning_rate": 1.7832546757473543e-06, "loss": 0.7517217397689819, "step": 2196 }, { "epoch": 0.506221198156682, "grad_norm": 0.7453227048555126, "learning_rate": 1.783017760887723e-06, "loss": 0.6971632838249207, "step": 2197 }, { "epoch": 0.5064516129032258, "grad_norm": 0.7964964192157018, "learning_rate": 1.7827807323741002e-06, "loss": 0.8638256192207336, "step": 2198 }, { "epoch": 0.5066820276497696, "grad_norm": 0.7941877452524988, "learning_rate": 1.7825435902408903e-06, "loss": 0.8410143256187439, "step": 2199 }, { "epoch": 0.5069124423963134, "grad_norm": 0.7902588767037179, "learning_rate": 1.7823063345225143e-06, "loss": 0.8127691745758057, "step": 2200 }, { "epoch": 0.5071428571428571, "grad_norm": 0.7618481515663807, "learning_rate": 1.7820689652534096e-06, "loss": 0.7351404428482056, "step": 2201 }, { "epoch": 0.507373271889401, "grad_norm": 0.6691944306500267, "learning_rate": 1.7818314824680298e-06, "loss": 0.7258716821670532, "step": 2202 }, { "epoch": 0.5076036866359447, "grad_norm": 1.0029859864492747, "learning_rate": 1.7815938862008454e-06, "loss": 0.9509599208831787, "step": 2203 }, { "epoch": 0.5078341013824885, "grad_norm": 0.7738532710061052, "learning_rate": 1.7813561764863429e-06, "loss": 0.8600929379463196, "step": 2204 }, { "epoch": 0.5080645161290323, "grad_norm": 0.9689099485850551, "learning_rate": 1.7811183533590257e-06, "loss": 0.8688119649887085, "step": 2205 }, { "epoch": 0.5082949308755761, "grad_norm": 0.7599344683888546, "learning_rate": 1.780880416853414e-06, "loss": 0.8447986841201782, "step": 2206 }, { "epoch": 0.5085253456221198, "grad_norm": 0.6953642388755117, "learning_rate": 1.7806423670040433e-06, "loss": 0.8262573480606079, "step": 2207 }, { "epoch": 0.5087557603686635, "grad_norm": 0.7640117945069856, "learning_rate": 1.7804042038454666e-06, "loss": 0.9534487724304199, "step": 2208 }, { "epoch": 0.5089861751152074, "grad_norm": 0.7513792438385134, "learning_rate": 1.7801659274122527e-06, "loss": 0.7712565064430237, "step": 2209 }, { "epoch": 0.5092165898617511, "grad_norm": 0.8714588056175714, "learning_rate": 1.7799275377389873e-06, "loss": 0.8190760016441345, "step": 2210 }, { "epoch": 0.509447004608295, "grad_norm": 0.9379540710774249, "learning_rate": 1.7796890348602722e-06, "loss": 0.8647592067718506, "step": 2211 }, { "epoch": 0.5096774193548387, "grad_norm": 0.7912467632232041, "learning_rate": 1.7794504188107257e-06, "loss": 0.7788198590278625, "step": 2212 }, { "epoch": 0.5099078341013825, "grad_norm": 0.7053754197084299, "learning_rate": 1.779211689624983e-06, "loss": 0.8610718250274658, "step": 2213 }, { "epoch": 0.5101382488479262, "grad_norm": 0.7783569383566119, "learning_rate": 1.7789728473376952e-06, "loss": 0.832200825214386, "step": 2214 }, { "epoch": 0.5103686635944701, "grad_norm": 0.7823482622118234, "learning_rate": 1.7787338919835298e-06, "loss": 0.7325488328933716, "step": 2215 }, { "epoch": 0.5105990783410138, "grad_norm": 0.8903627357495159, "learning_rate": 1.7784948235971707e-06, "loss": 0.8038203716278076, "step": 2216 }, { "epoch": 0.5108294930875577, "grad_norm": 0.6275186054972087, "learning_rate": 1.7782556422133185e-06, "loss": 0.7016317248344421, "step": 2217 }, { "epoch": 0.5110599078341014, "grad_norm": 0.8951545762278973, "learning_rate": 1.7780163478666905e-06, "loss": 0.7964655160903931, "step": 2218 }, { "epoch": 0.5112903225806451, "grad_norm": 0.7709224710894249, "learning_rate": 1.777776940592019e-06, "loss": 0.6681785583496094, "step": 2219 }, { "epoch": 0.511520737327189, "grad_norm": 0.8934880823893885, "learning_rate": 1.7775374204240547e-06, "loss": 0.835777759552002, "step": 2220 }, { "epoch": 0.5117511520737327, "grad_norm": 1.0248178001051076, "learning_rate": 1.777297787397563e-06, "loss": 0.9442443251609802, "step": 2221 }, { "epoch": 0.5119815668202765, "grad_norm": 1.072158922361294, "learning_rate": 1.7770580415473267e-06, "loss": 0.9351231455802917, "step": 2222 }, { "epoch": 0.5122119815668202, "grad_norm": 0.878332211622375, "learning_rate": 1.776818182908144e-06, "loss": 0.7238374352455139, "step": 2223 }, { "epoch": 0.5124423963133641, "grad_norm": 0.7001659306792695, "learning_rate": 1.7765782115148308e-06, "loss": 0.8206230998039246, "step": 2224 }, { "epoch": 0.5126728110599078, "grad_norm": 0.6546302150578799, "learning_rate": 1.7763381274022176e-06, "loss": 0.748784065246582, "step": 2225 }, { "epoch": 0.5129032258064516, "grad_norm": 0.7566703422977776, "learning_rate": 1.7760979306051533e-06, "loss": 0.7980858087539673, "step": 2226 }, { "epoch": 0.5131336405529954, "grad_norm": 0.8877968508757134, "learning_rate": 1.7758576211585018e-06, "loss": 0.8631168603897095, "step": 2227 }, { "epoch": 0.5133640552995392, "grad_norm": 0.7405217897025548, "learning_rate": 1.7756171990971441e-06, "loss": 0.9405999779701233, "step": 2228 }, { "epoch": 0.5135944700460829, "grad_norm": 0.8867257371824923, "learning_rate": 1.7753766644559763e-06, "loss": 0.9055094718933105, "step": 2229 }, { "epoch": 0.5138248847926268, "grad_norm": 0.827493910498757, "learning_rate": 1.775136017269912e-06, "loss": 0.7583146691322327, "step": 2230 }, { "epoch": 0.5140552995391705, "grad_norm": 0.8689067612775456, "learning_rate": 1.7748952575738811e-06, "loss": 0.8728743195533752, "step": 2231 }, { "epoch": 0.5142857142857142, "grad_norm": 0.7067707521741841, "learning_rate": 1.7746543854028295e-06, "loss": 0.8133460283279419, "step": 2232 }, { "epoch": 0.5145161290322581, "grad_norm": 0.7177694794353267, "learning_rate": 1.7744134007917194e-06, "loss": 0.8389721512794495, "step": 2233 }, { "epoch": 0.5147465437788018, "grad_norm": 0.9617522193850644, "learning_rate": 1.774172303775529e-06, "loss": 0.7016798257827759, "step": 2234 }, { "epoch": 0.5149769585253456, "grad_norm": 0.7999711451764379, "learning_rate": 1.7739310943892538e-06, "loss": 0.7920540571212769, "step": 2235 }, { "epoch": 0.5152073732718894, "grad_norm": 0.6990088891534603, "learning_rate": 1.7736897726679048e-06, "loss": 0.900149405002594, "step": 2236 }, { "epoch": 0.5154377880184332, "grad_norm": 0.743220745754201, "learning_rate": 1.7734483386465096e-06, "loss": 0.8537915349006653, "step": 2237 }, { "epoch": 0.5156682027649769, "grad_norm": 0.8134323205434837, "learning_rate": 1.7732067923601121e-06, "loss": 0.7418123483657837, "step": 2238 }, { "epoch": 0.5158986175115208, "grad_norm": 1.108361921569266, "learning_rate": 1.7729651338437721e-06, "loss": 0.8890011310577393, "step": 2239 }, { "epoch": 0.5161290322580645, "grad_norm": 0.9841321811418366, "learning_rate": 1.7727233631325663e-06, "loss": 0.9082813262939453, "step": 2240 }, { "epoch": 0.5163594470046083, "grad_norm": 0.9268737545625799, "learning_rate": 1.7724814802615868e-06, "loss": 0.8337695598602295, "step": 2241 }, { "epoch": 0.5165898617511521, "grad_norm": 1.1037050608526282, "learning_rate": 1.7722394852659437e-06, "loss": 0.8990765810012817, "step": 2242 }, { "epoch": 0.5168202764976959, "grad_norm": 0.8552834719912825, "learning_rate": 1.7719973781807614e-06, "loss": 0.720890998840332, "step": 2243 }, { "epoch": 0.5170506912442396, "grad_norm": 0.6406815235154244, "learning_rate": 1.7717551590411817e-06, "loss": 0.7966938018798828, "step": 2244 }, { "epoch": 0.5172811059907834, "grad_norm": 0.8614270693246835, "learning_rate": 1.7715128278823622e-06, "loss": 0.9290107488632202, "step": 2245 }, { "epoch": 0.5175115207373272, "grad_norm": 0.8755598994931274, "learning_rate": 1.771270384739477e-06, "loss": 0.8388533592224121, "step": 2246 }, { "epoch": 0.5177419354838709, "grad_norm": 0.8200932411512113, "learning_rate": 1.7710278296477169e-06, "loss": 0.8845043182373047, "step": 2247 }, { "epoch": 0.5179723502304148, "grad_norm": 0.8499976704860752, "learning_rate": 1.7707851626422875e-06, "loss": 0.879709780216217, "step": 2248 }, { "epoch": 0.5182027649769585, "grad_norm": 0.8407815201465851, "learning_rate": 1.7705423837584123e-06, "loss": 0.8215152025222778, "step": 2249 }, { "epoch": 0.5184331797235023, "grad_norm": 0.8770027311962882, "learning_rate": 1.7702994930313305e-06, "loss": 0.8108627796173096, "step": 2250 }, { "epoch": 0.5186635944700461, "grad_norm": 0.9106818329739914, "learning_rate": 1.7700564904962966e-06, "loss": 0.8391602039337158, "step": 2251 }, { "epoch": 0.5188940092165899, "grad_norm": 0.82724043269172, "learning_rate": 1.769813376188583e-06, "loss": 0.8664923906326294, "step": 2252 }, { "epoch": 0.5191244239631336, "grad_norm": 0.8478256896643234, "learning_rate": 1.7695701501434765e-06, "loss": 0.9670882821083069, "step": 2253 }, { "epoch": 0.5193548387096775, "grad_norm": 0.8831524743377538, "learning_rate": 1.7693268123962816e-06, "loss": 0.946273684501648, "step": 2254 }, { "epoch": 0.5195852534562212, "grad_norm": 0.7643743435262689, "learning_rate": 1.7690833629823184e-06, "loss": 0.9691795706748962, "step": 2255 }, { "epoch": 0.5198156682027649, "grad_norm": 0.7833370135674333, "learning_rate": 1.7688398019369232e-06, "loss": 0.8086103200912476, "step": 2256 }, { "epoch": 0.5200460829493088, "grad_norm": 0.8183770044685874, "learning_rate": 1.7685961292954486e-06, "loss": 0.8574277758598328, "step": 2257 }, { "epoch": 0.5202764976958525, "grad_norm": 0.7089387180946831, "learning_rate": 1.7683523450932633e-06, "loss": 0.7841963171958923, "step": 2258 }, { "epoch": 0.5205069124423963, "grad_norm": 0.7629735238937895, "learning_rate": 1.7681084493657523e-06, "loss": 0.6972980499267578, "step": 2259 }, { "epoch": 0.5207373271889401, "grad_norm": 0.7917333859989639, "learning_rate": 1.7678644421483163e-06, "loss": 0.9193723201751709, "step": 2260 }, { "epoch": 0.5209677419354839, "grad_norm": 0.9714597630384237, "learning_rate": 1.7676203234763736e-06, "loss": 0.7902654409408569, "step": 2261 }, { "epoch": 0.5211981566820276, "grad_norm": 0.7983060164629807, "learning_rate": 1.767376093385357e-06, "loss": 0.8804734945297241, "step": 2262 }, { "epoch": 0.5214285714285715, "grad_norm": 0.9065709846386143, "learning_rate": 1.7671317519107163e-06, "loss": 0.7884976863861084, "step": 2263 }, { "epoch": 0.5216589861751152, "grad_norm": 0.9252417906886758, "learning_rate": 1.7668872990879173e-06, "loss": 0.8233190774917603, "step": 2264 }, { "epoch": 0.521889400921659, "grad_norm": 0.7126124532622758, "learning_rate": 1.766642734952442e-06, "loss": 0.7985334396362305, "step": 2265 }, { "epoch": 0.5221198156682028, "grad_norm": 0.8073440338214538, "learning_rate": 1.7663980595397887e-06, "loss": 0.7805646657943726, "step": 2266 }, { "epoch": 0.5223502304147466, "grad_norm": 0.9455838488830395, "learning_rate": 1.7661532728854718e-06, "loss": 0.8528248071670532, "step": 2267 }, { "epoch": 0.5225806451612903, "grad_norm": 0.882590365173732, "learning_rate": 1.7659083750250215e-06, "loss": 0.7714066505432129, "step": 2268 }, { "epoch": 0.522811059907834, "grad_norm": 0.7632999883965862, "learning_rate": 1.7656633659939843e-06, "loss": 0.8250499963760376, "step": 2269 }, { "epoch": 0.5230414746543779, "grad_norm": 0.6787990523098465, "learning_rate": 1.7654182458279231e-06, "loss": 0.7878777384757996, "step": 2270 }, { "epoch": 0.5232718894009216, "grad_norm": 0.8263772967033729, "learning_rate": 1.7651730145624174e-06, "loss": 0.9080224633216858, "step": 2271 }, { "epoch": 0.5235023041474655, "grad_norm": 0.8137376292994275, "learning_rate": 1.7649276722330607e-06, "loss": 0.8010937571525574, "step": 2272 }, { "epoch": 0.5237327188940092, "grad_norm": 0.8996847055009526, "learning_rate": 1.7646822188754658e-06, "loss": 0.903404951095581, "step": 2273 }, { "epoch": 0.523963133640553, "grad_norm": 0.928692707021516, "learning_rate": 1.7644366545252589e-06, "loss": 0.9009061455726624, "step": 2274 }, { "epoch": 0.5241935483870968, "grad_norm": 0.7651260343716183, "learning_rate": 1.7641909792180834e-06, "loss": 0.7158697843551636, "step": 2275 }, { "epoch": 0.5244239631336406, "grad_norm": 0.8041302440889452, "learning_rate": 1.763945192989599e-06, "loss": 0.8101463317871094, "step": 2276 }, { "epoch": 0.5246543778801843, "grad_norm": 0.8174455436475604, "learning_rate": 1.7636992958754812e-06, "loss": 0.758610725402832, "step": 2277 }, { "epoch": 0.5248847926267282, "grad_norm": 0.9651314388158028, "learning_rate": 1.7634532879114216e-06, "loss": 0.9469501972198486, "step": 2278 }, { "epoch": 0.5251152073732719, "grad_norm": 0.6853415956002341, "learning_rate": 1.7632071691331281e-06, "loss": 0.7528036236763, "step": 2279 }, { "epoch": 0.5253456221198156, "grad_norm": 0.9124447697867164, "learning_rate": 1.7629609395763242e-06, "loss": 0.8519324064254761, "step": 2280 }, { "epoch": 0.5255760368663595, "grad_norm": 0.9239480610002251, "learning_rate": 1.7627145992767498e-06, "loss": 0.8620004653930664, "step": 2281 }, { "epoch": 0.5258064516129032, "grad_norm": 0.7831738680942184, "learning_rate": 1.762468148270161e-06, "loss": 0.8066067695617676, "step": 2282 }, { "epoch": 0.526036866359447, "grad_norm": 0.8314773622163678, "learning_rate": 1.7622215865923301e-06, "loss": 0.865642786026001, "step": 2283 }, { "epoch": 0.5262672811059907, "grad_norm": 0.7269170910166286, "learning_rate": 1.761974914279045e-06, "loss": 0.8478001356124878, "step": 2284 }, { "epoch": 0.5264976958525346, "grad_norm": 0.8461811606118353, "learning_rate": 1.7617281313661098e-06, "loss": 0.7984344363212585, "step": 2285 }, { "epoch": 0.5267281105990783, "grad_norm": 0.8489168247147351, "learning_rate": 1.7614812378893444e-06, "loss": 0.8480801582336426, "step": 2286 }, { "epoch": 0.5269585253456222, "grad_norm": 0.9126795310234661, "learning_rate": 1.7612342338845859e-06, "loss": 0.8667479753494263, "step": 2287 }, { "epoch": 0.5271889400921659, "grad_norm": 0.9533468835174431, "learning_rate": 1.7609871193876854e-06, "loss": 0.8431364297866821, "step": 2288 }, { "epoch": 0.5274193548387097, "grad_norm": 0.8628781350943807, "learning_rate": 1.7607398944345127e-06, "loss": 0.8544220924377441, "step": 2289 }, { "epoch": 0.5276497695852534, "grad_norm": 0.9575259696859837, "learning_rate": 1.760492559060951e-06, "loss": 0.9298971891403198, "step": 2290 }, { "epoch": 0.5278801843317973, "grad_norm": 0.8854664005974592, "learning_rate": 1.760245113302901e-06, "loss": 0.739667534828186, "step": 2291 }, { "epoch": 0.528110599078341, "grad_norm": 0.9418693515744256, "learning_rate": 1.7599975571962796e-06, "loss": 0.8981268405914307, "step": 2292 }, { "epoch": 0.5283410138248847, "grad_norm": 0.8489202000746718, "learning_rate": 1.7597498907770185e-06, "loss": 0.8027834892272949, "step": 2293 }, { "epoch": 0.5285714285714286, "grad_norm": 0.7244957329263912, "learning_rate": 1.7595021140810669e-06, "loss": 0.7018242478370667, "step": 2294 }, { "epoch": 0.5288018433179723, "grad_norm": 0.8699196704594798, "learning_rate": 1.7592542271443887e-06, "loss": 0.7655147910118103, "step": 2295 }, { "epoch": 0.5290322580645161, "grad_norm": 0.8169123509935803, "learning_rate": 1.7590062300029644e-06, "loss": 0.8283153772354126, "step": 2296 }, { "epoch": 0.5292626728110599, "grad_norm": 1.0550792201388366, "learning_rate": 1.7587581226927907e-06, "loss": 1.0430598258972168, "step": 2297 }, { "epoch": 0.5294930875576037, "grad_norm": 0.7609036061197976, "learning_rate": 1.7585099052498802e-06, "loss": 0.6683472990989685, "step": 2298 }, { "epoch": 0.5297235023041474, "grad_norm": 0.7278178698575015, "learning_rate": 1.7582615777102609e-06, "loss": 0.7254939079284668, "step": 2299 }, { "epoch": 0.5299539170506913, "grad_norm": 0.7049477325497308, "learning_rate": 1.7580131401099774e-06, "loss": 0.7913245558738708, "step": 2300 }, { "epoch": 0.530184331797235, "grad_norm": 0.8416230641508338, "learning_rate": 1.75776459248509e-06, "loss": 0.7832915782928467, "step": 2301 }, { "epoch": 0.5304147465437788, "grad_norm": 0.7722959383546871, "learning_rate": 1.7575159348716754e-06, "loss": 0.9754987955093384, "step": 2302 }, { "epoch": 0.5306451612903226, "grad_norm": 0.8614799765536667, "learning_rate": 1.7572671673058254e-06, "loss": 0.8343901634216309, "step": 2303 }, { "epoch": 0.5308755760368664, "grad_norm": 0.862069962418511, "learning_rate": 1.757018289823649e-06, "loss": 0.9836198091506958, "step": 2304 }, { "epoch": 0.5311059907834101, "grad_norm": 0.7978699236275345, "learning_rate": 1.7567693024612695e-06, "loss": 0.8258972764015198, "step": 2305 }, { "epoch": 0.5313364055299539, "grad_norm": 0.8169244061103897, "learning_rate": 1.7565202052548277e-06, "loss": 0.8822964429855347, "step": 2306 }, { "epoch": 0.5315668202764977, "grad_norm": 0.8094894252842574, "learning_rate": 1.7562709982404797e-06, "loss": 0.721222996711731, "step": 2307 }, { "epoch": 0.5317972350230414, "grad_norm": 0.7759663122688174, "learning_rate": 1.7560216814543974e-06, "loss": 0.7273069620132446, "step": 2308 }, { "epoch": 0.5320276497695853, "grad_norm": 0.749740659090673, "learning_rate": 1.755772254932769e-06, "loss": 0.8031520843505859, "step": 2309 }, { "epoch": 0.532258064516129, "grad_norm": 0.8746676083569236, "learning_rate": 1.7555227187117982e-06, "loss": 0.8767163157463074, "step": 2310 }, { "epoch": 0.5324884792626728, "grad_norm": 1.052374988916139, "learning_rate": 1.755273072827705e-06, "loss": 0.8018463850021362, "step": 2311 }, { "epoch": 0.5327188940092166, "grad_norm": 0.9632384627648846, "learning_rate": 1.7550233173167252e-06, "loss": 0.8281232118606567, "step": 2312 }, { "epoch": 0.5329493087557604, "grad_norm": 0.9472067369973646, "learning_rate": 1.7547734522151103e-06, "loss": 0.8802565336227417, "step": 2313 }, { "epoch": 0.5331797235023041, "grad_norm": 0.7195582219345643, "learning_rate": 1.754523477559128e-06, "loss": 0.8055544495582581, "step": 2314 }, { "epoch": 0.533410138248848, "grad_norm": 0.9358658916449707, "learning_rate": 1.754273393385062e-06, "loss": 0.8163481950759888, "step": 2315 }, { "epoch": 0.5336405529953917, "grad_norm": 0.9365559775291885, "learning_rate": 1.7540231997292111e-06, "loss": 0.8308255076408386, "step": 2316 }, { "epoch": 0.5338709677419354, "grad_norm": 0.9031429015213124, "learning_rate": 1.7537728966278913e-06, "loss": 0.8387685418128967, "step": 2317 }, { "epoch": 0.5341013824884793, "grad_norm": 0.7470153179334161, "learning_rate": 1.7535224841174333e-06, "loss": 0.8668780326843262, "step": 2318 }, { "epoch": 0.534331797235023, "grad_norm": 0.7449540611731051, "learning_rate": 1.7532719622341842e-06, "loss": 0.8394712209701538, "step": 2319 }, { "epoch": 0.5345622119815668, "grad_norm": 0.7539905771593468, "learning_rate": 1.7530213310145073e-06, "loss": 0.7755688428878784, "step": 2320 }, { "epoch": 0.5347926267281106, "grad_norm": 0.8150738821263226, "learning_rate": 1.7527705904947805e-06, "loss": 0.7714632749557495, "step": 2321 }, { "epoch": 0.5350230414746544, "grad_norm": 0.807680924946579, "learning_rate": 1.7525197407113997e-06, "loss": 0.8810869455337524, "step": 2322 }, { "epoch": 0.5352534562211981, "grad_norm": 1.0672299468188131, "learning_rate": 1.7522687817007742e-06, "loss": 0.8445242643356323, "step": 2323 }, { "epoch": 0.535483870967742, "grad_norm": 1.1338085945775938, "learning_rate": 1.7520177134993311e-06, "loss": 0.9602948427200317, "step": 2324 }, { "epoch": 0.5357142857142857, "grad_norm": 0.7789379367396811, "learning_rate": 1.7517665361435126e-06, "loss": 0.7865237593650818, "step": 2325 }, { "epoch": 0.5359447004608295, "grad_norm": 0.8870578602537817, "learning_rate": 1.7515152496697763e-06, "loss": 0.8062880039215088, "step": 2326 }, { "epoch": 0.5361751152073733, "grad_norm": 0.9742037408160464, "learning_rate": 1.7512638541145966e-06, "loss": 0.8386664986610413, "step": 2327 }, { "epoch": 0.5364055299539171, "grad_norm": 1.0154937609139327, "learning_rate": 1.7510123495144629e-06, "loss": 0.973692774772644, "step": 2328 }, { "epoch": 0.5366359447004608, "grad_norm": 0.9023959356834507, "learning_rate": 1.7507607359058808e-06, "loss": 0.8250089883804321, "step": 2329 }, { "epoch": 0.5368663594470046, "grad_norm": 0.8457870176131529, "learning_rate": 1.750509013325372e-06, "loss": 0.8578102588653564, "step": 2330 }, { "epoch": 0.5370967741935484, "grad_norm": 0.8804595958614453, "learning_rate": 1.7502571818094732e-06, "loss": 0.916475236415863, "step": 2331 }, { "epoch": 0.5373271889400921, "grad_norm": 0.9225430635370255, "learning_rate": 1.7500052413947377e-06, "loss": 0.8210046291351318, "step": 2332 }, { "epoch": 0.537557603686636, "grad_norm": 0.7091387099201478, "learning_rate": 1.7497531921177344e-06, "loss": 0.816267728805542, "step": 2333 }, { "epoch": 0.5377880184331797, "grad_norm": 0.9764630645457667, "learning_rate": 1.7495010340150478e-06, "loss": 1.0091882944107056, "step": 2334 }, { "epoch": 0.5380184331797235, "grad_norm": 0.982812584725329, "learning_rate": 1.7492487671232783e-06, "loss": 0.7549277544021606, "step": 2335 }, { "epoch": 0.5382488479262673, "grad_norm": 0.8589431412898547, "learning_rate": 1.7489963914790423e-06, "loss": 0.9584934711456299, "step": 2336 }, { "epoch": 0.5384792626728111, "grad_norm": 0.7167225081500926, "learning_rate": 1.7487439071189713e-06, "loss": 0.8189069628715515, "step": 2337 }, { "epoch": 0.5387096774193548, "grad_norm": 0.976466384445042, "learning_rate": 1.7484913140797138e-06, "loss": 0.7529993057250977, "step": 2338 }, { "epoch": 0.5389400921658987, "grad_norm": 0.9894954868399615, "learning_rate": 1.7482386123979324e-06, "loss": 0.8611496686935425, "step": 2339 }, { "epoch": 0.5391705069124424, "grad_norm": 1.2753256885249857, "learning_rate": 1.7479858021103074e-06, "loss": 0.9400241374969482, "step": 2340 }, { "epoch": 0.5394009216589861, "grad_norm": 0.7513824016722385, "learning_rate": 1.7477328832535332e-06, "loss": 0.6686737537384033, "step": 2341 }, { "epoch": 0.53963133640553, "grad_norm": 0.7834119073150019, "learning_rate": 1.747479855864321e-06, "loss": 0.864795982837677, "step": 2342 }, { "epoch": 0.5398617511520737, "grad_norm": 0.9942068845664563, "learning_rate": 1.7472267199793971e-06, "loss": 0.9579563140869141, "step": 2343 }, { "epoch": 0.5400921658986175, "grad_norm": 0.9464284115225821, "learning_rate": 1.746973475635504e-06, "loss": 0.7492884397506714, "step": 2344 }, { "epoch": 0.5403225806451613, "grad_norm": 1.1301826150440575, "learning_rate": 1.7467201228694e-06, "loss": 1.020420789718628, "step": 2345 }, { "epoch": 0.5405529953917051, "grad_norm": 0.8996882097606888, "learning_rate": 1.7464666617178585e-06, "loss": 0.8277238011360168, "step": 2346 }, { "epoch": 0.5407834101382488, "grad_norm": 0.8343415166384458, "learning_rate": 1.7462130922176694e-06, "loss": 0.8160337209701538, "step": 2347 }, { "epoch": 0.5410138248847927, "grad_norm": 0.940177897473061, "learning_rate": 1.7459594144056378e-06, "loss": 0.8742454648017883, "step": 2348 }, { "epoch": 0.5412442396313364, "grad_norm": 0.8263630155636004, "learning_rate": 1.7457056283185847e-06, "loss": 0.7987914085388184, "step": 2349 }, { "epoch": 0.5414746543778802, "grad_norm": 0.8096196719588583, "learning_rate": 1.7454517339933467e-06, "loss": 0.6917734146118164, "step": 2350 }, { "epoch": 0.541705069124424, "grad_norm": 0.9860357050478065, "learning_rate": 1.7451977314667763e-06, "loss": 0.8338258266448975, "step": 2351 }, { "epoch": 0.5419354838709678, "grad_norm": 0.6906626367704619, "learning_rate": 1.7449436207757418e-06, "loss": 0.8308743238449097, "step": 2352 }, { "epoch": 0.5421658986175115, "grad_norm": 0.7126371911422212, "learning_rate": 1.744689401957127e-06, "loss": 0.7843145728111267, "step": 2353 }, { "epoch": 0.5423963133640552, "grad_norm": 0.6637904176126797, "learning_rate": 1.7444350750478314e-06, "loss": 0.9088687896728516, "step": 2354 }, { "epoch": 0.5426267281105991, "grad_norm": 1.1601519737508017, "learning_rate": 1.74418064008477e-06, "loss": 0.876841127872467, "step": 2355 }, { "epoch": 0.5428571428571428, "grad_norm": 0.804702758707697, "learning_rate": 1.743926097104874e-06, "loss": 0.7169051170349121, "step": 2356 }, { "epoch": 0.5430875576036867, "grad_norm": 0.8414445338031196, "learning_rate": 1.7436714461450897e-06, "loss": 0.7979093194007874, "step": 2357 }, { "epoch": 0.5433179723502304, "grad_norm": 0.796767744969521, "learning_rate": 1.7434166872423795e-06, "loss": 0.9152545928955078, "step": 2358 }, { "epoch": 0.5435483870967742, "grad_norm": 0.8612716514728646, "learning_rate": 1.7431618204337212e-06, "loss": 0.8968983888626099, "step": 2359 }, { "epoch": 0.543778801843318, "grad_norm": 0.7451796864953032, "learning_rate": 1.7429068457561086e-06, "loss": 0.7591085433959961, "step": 2360 }, { "epoch": 0.5440092165898618, "grad_norm": 0.8434007797764556, "learning_rate": 1.7426517632465508e-06, "loss": 0.6931861639022827, "step": 2361 }, { "epoch": 0.5442396313364055, "grad_norm": 0.816030716232177, "learning_rate": 1.7423965729420729e-06, "loss": 0.7715095281600952, "step": 2362 }, { "epoch": 0.5444700460829494, "grad_norm": 0.7333839549943538, "learning_rate": 1.742141274879715e-06, "loss": 0.8282119035720825, "step": 2363 }, { "epoch": 0.5447004608294931, "grad_norm": 0.8282161479585932, "learning_rate": 1.7418858690965337e-06, "loss": 0.7595704197883606, "step": 2364 }, { "epoch": 0.5449308755760369, "grad_norm": 0.8861519618227073, "learning_rate": 1.7416303556296005e-06, "loss": 0.8738422393798828, "step": 2365 }, { "epoch": 0.5451612903225806, "grad_norm": 0.819062403403448, "learning_rate": 1.741374734516003e-06, "loss": 0.8399837017059326, "step": 2366 }, { "epoch": 0.5453917050691244, "grad_norm": 0.9147252373002325, "learning_rate": 1.7411190057928442e-06, "loss": 0.8213151693344116, "step": 2367 }, { "epoch": 0.5456221198156682, "grad_norm": 0.862161359681962, "learning_rate": 1.740863169497243e-06, "loss": 0.748835563659668, "step": 2368 }, { "epoch": 0.5458525345622119, "grad_norm": 0.6925915187477067, "learning_rate": 1.7406072256663333e-06, "loss": 0.9222339391708374, "step": 2369 }, { "epoch": 0.5460829493087558, "grad_norm": 0.6352006169320189, "learning_rate": 1.7403511743372655e-06, "loss": 0.6543160676956177, "step": 2370 }, { "epoch": 0.5463133640552995, "grad_norm": 0.9993386394035012, "learning_rate": 1.7400950155472046e-06, "loss": 0.9828567504882812, "step": 2371 }, { "epoch": 0.5465437788018433, "grad_norm": 0.9620494284169527, "learning_rate": 1.739838749333332e-06, "loss": 0.95346599817276, "step": 2372 }, { "epoch": 0.5467741935483871, "grad_norm": 0.4533946729074916, "learning_rate": 1.7395823757328442e-06, "loss": 0.626889705657959, "step": 2373 }, { "epoch": 0.5470046082949309, "grad_norm": 0.6641652944774505, "learning_rate": 1.739325894782954e-06, "loss": 0.8152071833610535, "step": 2374 }, { "epoch": 0.5472350230414746, "grad_norm": 0.7149653321076401, "learning_rate": 1.7390693065208889e-06, "loss": 0.8244980573654175, "step": 2375 }, { "epoch": 0.5474654377880185, "grad_norm": 0.8801604517186058, "learning_rate": 1.738812610983892e-06, "loss": 0.8234372138977051, "step": 2376 }, { "epoch": 0.5476958525345622, "grad_norm": 0.8626749383303203, "learning_rate": 1.7385558082092228e-06, "loss": 0.9334712624549866, "step": 2377 }, { "epoch": 0.5479262672811059, "grad_norm": 0.8866496689156442, "learning_rate": 1.7382988982341557e-06, "loss": 0.7873882055282593, "step": 2378 }, { "epoch": 0.5481566820276498, "grad_norm": 0.7814140858155267, "learning_rate": 1.7380418810959814e-06, "loss": 0.7971000671386719, "step": 2379 }, { "epoch": 0.5483870967741935, "grad_norm": 0.7452714019733373, "learning_rate": 1.7377847568320046e-06, "loss": 0.8617004156112671, "step": 2380 }, { "epoch": 0.5486175115207373, "grad_norm": 0.7316280745753603, "learning_rate": 1.7375275254795472e-06, "loss": 0.6798374056816101, "step": 2381 }, { "epoch": 0.5488479262672811, "grad_norm": 0.8600424341995414, "learning_rate": 1.7372701870759459e-06, "loss": 0.8621633052825928, "step": 2382 }, { "epoch": 0.5490783410138249, "grad_norm": 0.78685909041996, "learning_rate": 1.7370127416585527e-06, "loss": 0.6533470153808594, "step": 2383 }, { "epoch": 0.5493087557603686, "grad_norm": 0.9199843580999427, "learning_rate": 1.736755189264736e-06, "loss": 0.8854461908340454, "step": 2384 }, { "epoch": 0.5495391705069125, "grad_norm": 1.0020485772603467, "learning_rate": 1.7364975299318786e-06, "loss": 0.9461240768432617, "step": 2385 }, { "epoch": 0.5497695852534562, "grad_norm": 1.0179837516521926, "learning_rate": 1.73623976369738e-06, "loss": 0.8936882019042969, "step": 2386 }, { "epoch": 0.55, "grad_norm": 0.7527230779520249, "learning_rate": 1.7359818905986544e-06, "loss": 0.8177640438079834, "step": 2387 }, { "epoch": 0.5502304147465438, "grad_norm": 0.7539178622826256, "learning_rate": 1.7357239106731317e-06, "loss": 0.793328046798706, "step": 2388 }, { "epoch": 0.5504608294930876, "grad_norm": 0.8548599569350254, "learning_rate": 1.7354658239582572e-06, "loss": 0.8837069272994995, "step": 2389 }, { "epoch": 0.5506912442396313, "grad_norm": 0.8764277126116193, "learning_rate": 1.7352076304914918e-06, "loss": 0.8801138401031494, "step": 2390 }, { "epoch": 0.5509216589861751, "grad_norm": 0.7981260720892804, "learning_rate": 1.7349493303103123e-06, "loss": 0.865073025226593, "step": 2391 }, { "epoch": 0.5511520737327189, "grad_norm": 0.5938962289027067, "learning_rate": 1.7346909234522107e-06, "loss": 0.8712339401245117, "step": 2392 }, { "epoch": 0.5513824884792626, "grad_norm": 0.6857068624612402, "learning_rate": 1.7344324099546938e-06, "loss": 0.7689294815063477, "step": 2393 }, { "epoch": 0.5516129032258065, "grad_norm": 0.6784843872797971, "learning_rate": 1.7341737898552851e-06, "loss": 0.9228999614715576, "step": 2394 }, { "epoch": 0.5518433179723502, "grad_norm": 1.025443261317525, "learning_rate": 1.7339150631915228e-06, "loss": 0.9473327398300171, "step": 2395 }, { "epoch": 0.552073732718894, "grad_norm": 0.9317831571882359, "learning_rate": 1.7336562300009604e-06, "loss": 0.7724621295928955, "step": 2396 }, { "epoch": 0.5523041474654378, "grad_norm": 0.7823556125482615, "learning_rate": 1.7333972903211675e-06, "loss": 0.8646600246429443, "step": 2397 }, { "epoch": 0.5525345622119816, "grad_norm": 0.6673069571562762, "learning_rate": 1.7331382441897286e-06, "loss": 0.7143402099609375, "step": 2398 }, { "epoch": 0.5527649769585253, "grad_norm": 0.9600129950475998, "learning_rate": 1.7328790916442446e-06, "loss": 0.8229624032974243, "step": 2399 }, { "epoch": 0.5529953917050692, "grad_norm": 0.8815652742153803, "learning_rate": 1.7326198327223303e-06, "loss": 0.7244875431060791, "step": 2400 }, { "epoch": 0.5532258064516129, "grad_norm": 0.8586401947703556, "learning_rate": 1.7323604674616173e-06, "loss": 0.7797688245773315, "step": 2401 }, { "epoch": 0.5534562211981566, "grad_norm": 0.7923271764392044, "learning_rate": 1.7321009958997519e-06, "loss": 0.752421498298645, "step": 2402 }, { "epoch": 0.5536866359447005, "grad_norm": 0.880725843060538, "learning_rate": 1.7318414180743962e-06, "loss": 0.8285892009735107, "step": 2403 }, { "epoch": 0.5539170506912442, "grad_norm": 0.7844500606150882, "learning_rate": 1.7315817340232272e-06, "loss": 0.8247888088226318, "step": 2404 }, { "epoch": 0.554147465437788, "grad_norm": 0.7041289847587934, "learning_rate": 1.7313219437839384e-06, "loss": 0.7713418006896973, "step": 2405 }, { "epoch": 0.5543778801843318, "grad_norm": 0.8575067968238488, "learning_rate": 1.7310620473942374e-06, "loss": 0.8748825788497925, "step": 2406 }, { "epoch": 0.5546082949308756, "grad_norm": 0.899949436927101, "learning_rate": 1.730802044891848e-06, "loss": 0.9255902767181396, "step": 2407 }, { "epoch": 0.5548387096774193, "grad_norm": 0.7968868837370462, "learning_rate": 1.7305419363145093e-06, "loss": 0.7226976156234741, "step": 2408 }, { "epoch": 0.5550691244239632, "grad_norm": 0.8868777191693532, "learning_rate": 1.7302817216999754e-06, "loss": 0.9024704694747925, "step": 2409 }, { "epoch": 0.5552995391705069, "grad_norm": 0.8331382998314191, "learning_rate": 1.7300214010860168e-06, "loss": 0.7857767343521118, "step": 2410 }, { "epoch": 0.5555299539170507, "grad_norm": 0.7111146090264087, "learning_rate": 1.7297609745104183e-06, "loss": 0.7280064821243286, "step": 2411 }, { "epoch": 0.5557603686635945, "grad_norm": 0.8916895272866717, "learning_rate": 1.72950044201098e-06, "loss": 0.8909369111061096, "step": 2412 }, { "epoch": 0.5559907834101383, "grad_norm": 0.8724458169518867, "learning_rate": 1.7292398036255183e-06, "loss": 0.8543871641159058, "step": 2413 }, { "epoch": 0.556221198156682, "grad_norm": 0.7364121573266219, "learning_rate": 1.7289790593918648e-06, "loss": 0.6934928894042969, "step": 2414 }, { "epoch": 0.5564516129032258, "grad_norm": 0.7288921937743348, "learning_rate": 1.7287182093478658e-06, "loss": 0.6323058605194092, "step": 2415 }, { "epoch": 0.5566820276497696, "grad_norm": 0.9203399963548066, "learning_rate": 1.7284572535313833e-06, "loss": 0.8607437014579773, "step": 2416 }, { "epoch": 0.5569124423963133, "grad_norm": 0.8312318653257402, "learning_rate": 1.7281961919802948e-06, "loss": 0.932594358921051, "step": 2417 }, { "epoch": 0.5571428571428572, "grad_norm": 0.8132622554262421, "learning_rate": 1.727935024732493e-06, "loss": 0.7239062786102295, "step": 2418 }, { "epoch": 0.5573732718894009, "grad_norm": 0.770772581447816, "learning_rate": 1.727673751825886e-06, "loss": 0.7600498199462891, "step": 2419 }, { "epoch": 0.5576036866359447, "grad_norm": 0.9553759629640377, "learning_rate": 1.7274123732983977e-06, "loss": 0.6888710260391235, "step": 2420 }, { "epoch": 0.5578341013824885, "grad_norm": 0.9472816188704319, "learning_rate": 1.7271508891879657e-06, "loss": 0.9768370389938354, "step": 2421 }, { "epoch": 0.5580645161290323, "grad_norm": 0.7612474564207412, "learning_rate": 1.7268892995325453e-06, "loss": 0.7302272319793701, "step": 2422 }, { "epoch": 0.558294930875576, "grad_norm": 0.952809818405442, "learning_rate": 1.7266276043701052e-06, "loss": 0.7664496898651123, "step": 2423 }, { "epoch": 0.5585253456221199, "grad_norm": 0.7105308716985692, "learning_rate": 1.72636580373863e-06, "loss": 0.7672723531723022, "step": 2424 }, { "epoch": 0.5587557603686636, "grad_norm": 0.9094827818764729, "learning_rate": 1.7261038976761203e-06, "loss": 0.7467625141143799, "step": 2425 }, { "epoch": 0.5589861751152074, "grad_norm": 1.0609555724090778, "learning_rate": 1.7258418862205908e-06, "loss": 0.899692177772522, "step": 2426 }, { "epoch": 0.5592165898617512, "grad_norm": 0.8726314105037919, "learning_rate": 1.7255797694100724e-06, "loss": 0.9654138088226318, "step": 2427 }, { "epoch": 0.5594470046082949, "grad_norm": 1.0261431779245342, "learning_rate": 1.725317547282611e-06, "loss": 0.8487396836280823, "step": 2428 }, { "epoch": 0.5596774193548387, "grad_norm": 0.7692614118612008, "learning_rate": 1.7250552198762682e-06, "loss": 0.7785199284553528, "step": 2429 }, { "epoch": 0.5599078341013825, "grad_norm": 0.7931069179642137, "learning_rate": 1.7247927872291198e-06, "loss": 0.9243934750556946, "step": 2430 }, { "epoch": 0.5601382488479263, "grad_norm": 0.6935679959823647, "learning_rate": 1.724530249379258e-06, "loss": 0.8674443960189819, "step": 2431 }, { "epoch": 0.56036866359447, "grad_norm": 0.7564063858493598, "learning_rate": 1.7242676063647895e-06, "loss": 0.8022270202636719, "step": 2432 }, { "epoch": 0.5605990783410139, "grad_norm": 0.8222900385869091, "learning_rate": 1.7240048582238367e-06, "loss": 0.8696796894073486, "step": 2433 }, { "epoch": 0.5608294930875576, "grad_norm": 0.8560234672396506, "learning_rate": 1.7237420049945374e-06, "loss": 0.7752439975738525, "step": 2434 }, { "epoch": 0.5610599078341014, "grad_norm": 0.9286340475505503, "learning_rate": 1.723479046715044e-06, "loss": 0.7660201787948608, "step": 2435 }, { "epoch": 0.5612903225806452, "grad_norm": 0.7639410477119124, "learning_rate": 1.7232159834235249e-06, "loss": 0.9319918155670166, "step": 2436 }, { "epoch": 0.561520737327189, "grad_norm": 0.8121463742755932, "learning_rate": 1.722952815158163e-06, "loss": 0.8175421357154846, "step": 2437 }, { "epoch": 0.5617511520737327, "grad_norm": 0.5646145066796834, "learning_rate": 1.7226895419571573e-06, "loss": 0.6959598064422607, "step": 2438 }, { "epoch": 0.5619815668202764, "grad_norm": 0.9804875774075569, "learning_rate": 1.722426163858721e-06, "loss": 0.8629111051559448, "step": 2439 }, { "epoch": 0.5622119815668203, "grad_norm": 1.1148628556143985, "learning_rate": 1.7221626809010833e-06, "loss": 0.8222612142562866, "step": 2440 }, { "epoch": 0.562442396313364, "grad_norm": 0.7126052614291007, "learning_rate": 1.721899093122489e-06, "loss": 0.8329352140426636, "step": 2441 }, { "epoch": 0.5626728110599079, "grad_norm": 0.7803804718208336, "learning_rate": 1.7216354005611966e-06, "loss": 0.8777236938476562, "step": 2442 }, { "epoch": 0.5629032258064516, "grad_norm": 0.8601336969746237, "learning_rate": 1.7213716032554814e-06, "loss": 0.8487246036529541, "step": 2443 }, { "epoch": 0.5631336405529954, "grad_norm": 0.9035051311861264, "learning_rate": 1.7211077012436327e-06, "loss": 0.8429645299911499, "step": 2444 }, { "epoch": 0.5633640552995391, "grad_norm": 0.9883668092610399, "learning_rate": 1.720843694563956e-06, "loss": 0.7683241367340088, "step": 2445 }, { "epoch": 0.563594470046083, "grad_norm": 0.839045001132387, "learning_rate": 1.7205795832547715e-06, "loss": 0.8468153476715088, "step": 2446 }, { "epoch": 0.5638248847926267, "grad_norm": 0.7865527461309724, "learning_rate": 1.7203153673544136e-06, "loss": 0.7957276105880737, "step": 2447 }, { "epoch": 0.5640552995391706, "grad_norm": 0.7301149604369097, "learning_rate": 1.7200510469012343e-06, "loss": 0.703586757183075, "step": 2448 }, { "epoch": 0.5642857142857143, "grad_norm": 0.9237896103754119, "learning_rate": 1.7197866219335988e-06, "loss": 0.8399583101272583, "step": 2449 }, { "epoch": 0.5645161290322581, "grad_norm": 0.9147331037465749, "learning_rate": 1.7195220924898882e-06, "loss": 0.8198127746582031, "step": 2450 }, { "epoch": 0.5647465437788018, "grad_norm": 0.8751939719560463, "learning_rate": 1.7192574586084977e-06, "loss": 0.8345620632171631, "step": 2451 }, { "epoch": 0.5649769585253456, "grad_norm": 0.5798955427424709, "learning_rate": 1.71899272032784e-06, "loss": 0.7717207670211792, "step": 2452 }, { "epoch": 0.5652073732718894, "grad_norm": 1.0279650439820616, "learning_rate": 1.7187278776863402e-06, "loss": 0.9178022146224976, "step": 2453 }, { "epoch": 0.5654377880184331, "grad_norm": 0.8586126622693072, "learning_rate": 1.7184629307224405e-06, "loss": 0.802221417427063, "step": 2454 }, { "epoch": 0.565668202764977, "grad_norm": 0.9691589621671786, "learning_rate": 1.718197879474598e-06, "loss": 0.8785420656204224, "step": 2455 }, { "epoch": 0.5658986175115207, "grad_norm": 0.8087978885886937, "learning_rate": 1.7179327239812835e-06, "loss": 0.866797924041748, "step": 2456 }, { "epoch": 0.5661290322580645, "grad_norm": 0.7850858892434726, "learning_rate": 1.7176674642809848e-06, "loss": 0.8483223915100098, "step": 2457 }, { "epoch": 0.5663594470046083, "grad_norm": 0.7634922973789945, "learning_rate": 1.7174021004122038e-06, "loss": 0.815066933631897, "step": 2458 }, { "epoch": 0.5665898617511521, "grad_norm": 0.7286124953848899, "learning_rate": 1.7171366324134575e-06, "loss": 0.8584767580032349, "step": 2459 }, { "epoch": 0.5668202764976958, "grad_norm": 0.8250445352678845, "learning_rate": 1.7168710603232783e-06, "loss": 0.8710953593254089, "step": 2460 }, { "epoch": 0.5670506912442397, "grad_norm": 0.9434416859632441, "learning_rate": 1.7166053841802137e-06, "loss": 0.8174586892127991, "step": 2461 }, { "epoch": 0.5672811059907834, "grad_norm": 0.8270311207697365, "learning_rate": 1.7163396040228263e-06, "loss": 0.7240795493125916, "step": 2462 }, { "epoch": 0.5675115207373271, "grad_norm": 0.9011815170935621, "learning_rate": 1.7160737198896938e-06, "loss": 0.8026313781738281, "step": 2463 }, { "epoch": 0.567741935483871, "grad_norm": 0.906377679717593, "learning_rate": 1.7158077318194088e-06, "loss": 0.8170863389968872, "step": 2464 }, { "epoch": 0.5679723502304147, "grad_norm": 0.7708394273236241, "learning_rate": 1.7155416398505794e-06, "loss": 0.7524861097335815, "step": 2465 }, { "epoch": 0.5682027649769585, "grad_norm": 1.053627484653556, "learning_rate": 1.7152754440218278e-06, "loss": 0.9895739555358887, "step": 2466 }, { "epoch": 0.5684331797235023, "grad_norm": 0.8044893250734789, "learning_rate": 1.7150091443717924e-06, "loss": 0.840786874294281, "step": 2467 }, { "epoch": 0.5686635944700461, "grad_norm": 0.7235386782272144, "learning_rate": 1.7147427409391265e-06, "loss": 0.8896929025650024, "step": 2468 }, { "epoch": 0.5688940092165898, "grad_norm": 0.930785639448215, "learning_rate": 1.714476233762498e-06, "loss": 0.9940589666366577, "step": 2469 }, { "epoch": 0.5691244239631337, "grad_norm": 0.8541894175832414, "learning_rate": 1.7142096228805896e-06, "loss": 0.8827046155929565, "step": 2470 }, { "epoch": 0.5693548387096774, "grad_norm": 0.8477738552913107, "learning_rate": 1.7139429083321003e-06, "loss": 0.8402417302131653, "step": 2471 }, { "epoch": 0.5695852534562212, "grad_norm": 1.0681644319875638, "learning_rate": 1.7136760901557428e-06, "loss": 0.9298208951950073, "step": 2472 }, { "epoch": 0.569815668202765, "grad_norm": 0.799198798955049, "learning_rate": 1.7134091683902456e-06, "loss": 0.7272841930389404, "step": 2473 }, { "epoch": 0.5700460829493088, "grad_norm": 0.9504491625382946, "learning_rate": 1.7131421430743522e-06, "loss": 0.7767274379730225, "step": 2474 }, { "epoch": 0.5702764976958525, "grad_norm": 0.8321899881110706, "learning_rate": 1.7128750142468205e-06, "loss": 0.8381883502006531, "step": 2475 }, { "epoch": 0.5705069124423963, "grad_norm": 0.722993858034587, "learning_rate": 1.7126077819464247e-06, "loss": 0.6917109489440918, "step": 2476 }, { "epoch": 0.5707373271889401, "grad_norm": 0.8529687693157456, "learning_rate": 1.712340446211952e-06, "loss": 0.848122239112854, "step": 2477 }, { "epoch": 0.5709677419354838, "grad_norm": 0.8115142651418973, "learning_rate": 1.7120730070822074e-06, "loss": 0.7880194187164307, "step": 2478 }, { "epoch": 0.5711981566820277, "grad_norm": 0.7900923038142705, "learning_rate": 1.7118054645960077e-06, "loss": 0.8782297372817993, "step": 2479 }, { "epoch": 0.5714285714285714, "grad_norm": 0.8386744568018749, "learning_rate": 1.7115378187921876e-06, "loss": 0.9030005931854248, "step": 2480 }, { "epoch": 0.5716589861751152, "grad_norm": 1.0512780177061767, "learning_rate": 1.7112700697095953e-06, "loss": 0.9950683116912842, "step": 2481 }, { "epoch": 0.571889400921659, "grad_norm": 0.7851257012482162, "learning_rate": 1.7110022173870933e-06, "loss": 0.8825187683105469, "step": 2482 }, { "epoch": 0.5721198156682028, "grad_norm": 0.7742449968104124, "learning_rate": 1.710734261863561e-06, "loss": 0.7918775081634521, "step": 2483 }, { "epoch": 0.5723502304147465, "grad_norm": 0.8385191739759446, "learning_rate": 1.7104662031778916e-06, "loss": 1.0219467878341675, "step": 2484 }, { "epoch": 0.5725806451612904, "grad_norm": 0.7273611559924746, "learning_rate": 1.7101980413689931e-06, "loss": 0.7633316516876221, "step": 2485 }, { "epoch": 0.5728110599078341, "grad_norm": 0.9207367628977638, "learning_rate": 1.7099297764757891e-06, "loss": 0.8972171545028687, "step": 2486 }, { "epoch": 0.5730414746543778, "grad_norm": 0.9268590747994748, "learning_rate": 1.7096614085372183e-06, "loss": 0.9467268586158752, "step": 2487 }, { "epoch": 0.5732718894009217, "grad_norm": 0.6697903314360253, "learning_rate": 1.709392937592233e-06, "loss": 0.7688668370246887, "step": 2488 }, { "epoch": 0.5735023041474654, "grad_norm": 0.9069250629096394, "learning_rate": 1.7091243636798022e-06, "loss": 0.8521163463592529, "step": 2489 }, { "epoch": 0.5737327188940092, "grad_norm": 1.1876566208797892, "learning_rate": 1.7088556868389087e-06, "loss": 0.937403678894043, "step": 2490 }, { "epoch": 0.573963133640553, "grad_norm": 0.7484200220587712, "learning_rate": 1.7085869071085507e-06, "loss": 0.929175853729248, "step": 2491 }, { "epoch": 0.5741935483870968, "grad_norm": 0.75868423962596, "learning_rate": 1.708318024527741e-06, "loss": 0.8213154673576355, "step": 2492 }, { "epoch": 0.5744239631336405, "grad_norm": 0.8570973138589657, "learning_rate": 1.708049039135508e-06, "loss": 0.7666962146759033, "step": 2493 }, { "epoch": 0.5746543778801844, "grad_norm": 0.944726193523685, "learning_rate": 1.707779950970894e-06, "loss": 0.9787846803665161, "step": 2494 }, { "epoch": 0.5748847926267281, "grad_norm": 0.9499725243145639, "learning_rate": 1.7075107600729575e-06, "loss": 0.9688804149627686, "step": 2495 }, { "epoch": 0.5751152073732719, "grad_norm": 0.7169812071362754, "learning_rate": 1.7072414664807706e-06, "loss": 0.7186019420623779, "step": 2496 }, { "epoch": 0.5753456221198157, "grad_norm": 0.8737696103531859, "learning_rate": 1.706972070233421e-06, "loss": 0.814068615436554, "step": 2497 }, { "epoch": 0.5755760368663595, "grad_norm": 0.8930538892783126, "learning_rate": 1.7067025713700111e-06, "loss": 0.8439940214157104, "step": 2498 }, { "epoch": 0.5758064516129032, "grad_norm": 1.0358274070142592, "learning_rate": 1.706432969929659e-06, "loss": 1.0199556350708008, "step": 2499 }, { "epoch": 0.576036866359447, "grad_norm": 0.8418547467759998, "learning_rate": 1.7061632659514964e-06, "loss": 0.9422338008880615, "step": 2500 }, { "epoch": 0.5762672811059908, "grad_norm": 0.8692517624840741, "learning_rate": 1.7058934594746704e-06, "loss": 0.9307081699371338, "step": 2501 }, { "epoch": 0.5764976958525345, "grad_norm": 0.8121605874769848, "learning_rate": 1.7056235505383433e-06, "loss": 0.7202768325805664, "step": 2502 }, { "epoch": 0.5767281105990784, "grad_norm": 0.915285295701684, "learning_rate": 1.7053535391816923e-06, "loss": 1.0184223651885986, "step": 2503 }, { "epoch": 0.5769585253456221, "grad_norm": 0.8238573361353964, "learning_rate": 1.7050834254439085e-06, "loss": 0.7957574129104614, "step": 2504 }, { "epoch": 0.5771889400921659, "grad_norm": 0.9632097611385487, "learning_rate": 1.7048132093641989e-06, "loss": 0.9694541096687317, "step": 2505 }, { "epoch": 0.5774193548387097, "grad_norm": 0.7406781740567284, "learning_rate": 1.704542890981785e-06, "loss": 0.8427075147628784, "step": 2506 }, { "epoch": 0.5776497695852535, "grad_norm": 0.7137957479223747, "learning_rate": 1.7042724703359032e-06, "loss": 0.7745763063430786, "step": 2507 }, { "epoch": 0.5778801843317972, "grad_norm": 0.8935647722203462, "learning_rate": 1.7040019474658047e-06, "loss": 0.8179641962051392, "step": 2508 }, { "epoch": 0.5781105990783411, "grad_norm": 0.9010033541227577, "learning_rate": 1.7037313224107557e-06, "loss": 0.8118200302124023, "step": 2509 }, { "epoch": 0.5783410138248848, "grad_norm": 0.7297456575398072, "learning_rate": 1.7034605952100364e-06, "loss": 0.7892665863037109, "step": 2510 }, { "epoch": 0.5785714285714286, "grad_norm": 0.736874372872981, "learning_rate": 1.7031897659029434e-06, "loss": 0.7442026734352112, "step": 2511 }, { "epoch": 0.5788018433179724, "grad_norm": 0.9375581770522491, "learning_rate": 1.7029188345287865e-06, "loss": 0.8179585933685303, "step": 2512 }, { "epoch": 0.5790322580645161, "grad_norm": 0.8710660194733852, "learning_rate": 1.7026478011268918e-06, "loss": 0.7569797039031982, "step": 2513 }, { "epoch": 0.5792626728110599, "grad_norm": 0.8952615874674131, "learning_rate": 1.7023766657365984e-06, "loss": 0.8464581966400146, "step": 2514 }, { "epoch": 0.5794930875576036, "grad_norm": 0.9645554070219402, "learning_rate": 1.702105428397262e-06, "loss": 0.7326645255088806, "step": 2515 }, { "epoch": 0.5797235023041475, "grad_norm": 0.8243138835822689, "learning_rate": 1.7018340891482522e-06, "loss": 0.7993732690811157, "step": 2516 }, { "epoch": 0.5799539170506912, "grad_norm": 0.7406582307230963, "learning_rate": 1.7015626480289532e-06, "loss": 0.8124513626098633, "step": 2517 }, { "epoch": 0.580184331797235, "grad_norm": 0.7758431888553803, "learning_rate": 1.701291105078765e-06, "loss": 0.9075840711593628, "step": 2518 }, { "epoch": 0.5804147465437788, "grad_norm": 0.8900052121004013, "learning_rate": 1.7010194603371009e-06, "loss": 0.8212069272994995, "step": 2519 }, { "epoch": 0.5806451612903226, "grad_norm": 0.8737089153257858, "learning_rate": 1.7007477138433903e-06, "loss": 0.7582074999809265, "step": 2520 }, { "epoch": 0.5808755760368663, "grad_norm": 0.7402264811343096, "learning_rate": 1.7004758656370769e-06, "loss": 0.8917636871337891, "step": 2521 }, { "epoch": 0.5811059907834102, "grad_norm": 0.9496944008191128, "learning_rate": 1.7002039157576186e-06, "loss": 0.8919704556465149, "step": 2522 }, { "epoch": 0.5813364055299539, "grad_norm": 0.8803733592170607, "learning_rate": 1.699931864244489e-06, "loss": 0.7474988698959351, "step": 2523 }, { "epoch": 0.5815668202764976, "grad_norm": 0.9179665061824968, "learning_rate": 1.6996597111371758e-06, "loss": 0.8596241474151611, "step": 2524 }, { "epoch": 0.5817972350230415, "grad_norm": 0.8260474861422493, "learning_rate": 1.699387456475182e-06, "loss": 0.9316335916519165, "step": 2525 }, { "epoch": 0.5820276497695852, "grad_norm": 0.7937616616577486, "learning_rate": 1.6991151002980248e-06, "loss": 0.7364813089370728, "step": 2526 }, { "epoch": 0.582258064516129, "grad_norm": 0.9072210580359311, "learning_rate": 1.698842642645236e-06, "loss": 0.789472758769989, "step": 2527 }, { "epoch": 0.5824884792626728, "grad_norm": 0.9988239379820413, "learning_rate": 1.6985700835563627e-06, "loss": 1.024861216545105, "step": 2528 }, { "epoch": 0.5827188940092166, "grad_norm": 0.9746619752287254, "learning_rate": 1.6982974230709667e-06, "loss": 0.8465025424957275, "step": 2529 }, { "epoch": 0.5829493087557603, "grad_norm": 1.0146741583341603, "learning_rate": 1.6980246612286244e-06, "loss": 0.7502799034118652, "step": 2530 }, { "epoch": 0.5831797235023042, "grad_norm": 0.866831185770848, "learning_rate": 1.6977517980689264e-06, "loss": 0.8019870519638062, "step": 2531 }, { "epoch": 0.5834101382488479, "grad_norm": 0.783761351839215, "learning_rate": 1.6974788336314788e-06, "loss": 0.9048774242401123, "step": 2532 }, { "epoch": 0.5836405529953917, "grad_norm": 0.8577409607010705, "learning_rate": 1.6972057679559018e-06, "loss": 0.8411067724227905, "step": 2533 }, { "epoch": 0.5838709677419355, "grad_norm": 0.7158353942796929, "learning_rate": 1.6969326010818304e-06, "loss": 0.7399133443832397, "step": 2534 }, { "epoch": 0.5841013824884793, "grad_norm": 0.7309631229110555, "learning_rate": 1.6966593330489144e-06, "loss": 0.7553995847702026, "step": 2535 }, { "epoch": 0.584331797235023, "grad_norm": 0.7563702103772202, "learning_rate": 1.6963859638968188e-06, "loss": 0.8405054807662964, "step": 2536 }, { "epoch": 0.5845622119815668, "grad_norm": 0.739785555800379, "learning_rate": 1.6961124936652223e-06, "loss": 0.7619640231132507, "step": 2537 }, { "epoch": 0.5847926267281106, "grad_norm": 0.6189871014888121, "learning_rate": 1.6958389223938187e-06, "loss": 0.7785576581954956, "step": 2538 }, { "epoch": 0.5850230414746543, "grad_norm": 1.0593569746028593, "learning_rate": 1.695565250122317e-06, "loss": 0.9230754375457764, "step": 2539 }, { "epoch": 0.5852534562211982, "grad_norm": 0.9087046574881754, "learning_rate": 1.69529147689044e-06, "loss": 0.798599362373352, "step": 2540 }, { "epoch": 0.5854838709677419, "grad_norm": 0.7546263570181881, "learning_rate": 1.6950176027379253e-06, "loss": 0.8491491079330444, "step": 2541 }, { "epoch": 0.5857142857142857, "grad_norm": 0.9063392015432612, "learning_rate": 1.694743627704526e-06, "loss": 0.7906054854393005, "step": 2542 }, { "epoch": 0.5859447004608295, "grad_norm": 0.8834118839199732, "learning_rate": 1.6944695518300084e-06, "loss": 0.8178746700286865, "step": 2543 }, { "epoch": 0.5861751152073733, "grad_norm": 0.9444844508582247, "learning_rate": 1.6941953751541552e-06, "loss": 0.867972731590271, "step": 2544 }, { "epoch": 0.586405529953917, "grad_norm": 0.8815618278989616, "learning_rate": 1.6939210977167622e-06, "loss": 0.8000613451004028, "step": 2545 }, { "epoch": 0.5866359447004609, "grad_norm": 0.938056940810552, "learning_rate": 1.6936467195576403e-06, "loss": 0.8473562002182007, "step": 2546 }, { "epoch": 0.5868663594470046, "grad_norm": 0.960324746454341, "learning_rate": 1.6933722407166156e-06, "loss": 0.971686065196991, "step": 2547 }, { "epoch": 0.5870967741935483, "grad_norm": 0.718798566737211, "learning_rate": 1.6930976612335276e-06, "loss": 0.6679604053497314, "step": 2548 }, { "epoch": 0.5873271889400922, "grad_norm": 0.8662288511956259, "learning_rate": 1.692822981148232e-06, "loss": 0.81952303647995, "step": 2549 }, { "epoch": 0.5875576036866359, "grad_norm": 0.7171085968938, "learning_rate": 1.6925482005005978e-06, "loss": 0.8711779713630676, "step": 2550 }, { "epoch": 0.5877880184331797, "grad_norm": 0.8419799604008648, "learning_rate": 1.6922733193305093e-06, "loss": 0.930451512336731, "step": 2551 }, { "epoch": 0.5880184331797235, "grad_norm": 0.8349862719015169, "learning_rate": 1.6919983376778647e-06, "loss": 0.8435598611831665, "step": 2552 }, { "epoch": 0.5882488479262673, "grad_norm": 0.8491940209701643, "learning_rate": 1.6917232555825774e-06, "loss": 0.8868621587753296, "step": 2553 }, { "epoch": 0.588479262672811, "grad_norm": 0.7537041162487105, "learning_rate": 1.6914480730845752e-06, "loss": 0.6821786165237427, "step": 2554 }, { "epoch": 0.5887096774193549, "grad_norm": 0.8487688242201222, "learning_rate": 1.691172790223801e-06, "loss": 0.7241402864456177, "step": 2555 }, { "epoch": 0.5889400921658986, "grad_norm": 0.7422220828348832, "learning_rate": 1.690897407040211e-06, "loss": 0.7477490305900574, "step": 2556 }, { "epoch": 0.5891705069124424, "grad_norm": 0.7636915444427955, "learning_rate": 1.690621923573777e-06, "loss": 0.7881484031677246, "step": 2557 }, { "epoch": 0.5894009216589862, "grad_norm": 0.959692830610789, "learning_rate": 1.6903463398644848e-06, "loss": 0.8292979001998901, "step": 2558 }, { "epoch": 0.58963133640553, "grad_norm": 0.711937804642515, "learning_rate": 1.690070655952336e-06, "loss": 0.7068917751312256, "step": 2559 }, { "epoch": 0.5898617511520737, "grad_norm": 1.1143023950252693, "learning_rate": 1.6897948718773443e-06, "loss": 0.8907356262207031, "step": 2560 }, { "epoch": 0.5900921658986175, "grad_norm": 0.7930222105996996, "learning_rate": 1.6895189876795405e-06, "loss": 0.7762824892997742, "step": 2561 }, { "epoch": 0.5903225806451613, "grad_norm": 1.0922797891559575, "learning_rate": 1.6892430033989685e-06, "loss": 0.9682759046554565, "step": 2562 }, { "epoch": 0.590552995391705, "grad_norm": 0.8231082510824629, "learning_rate": 1.6889669190756866e-06, "loss": 0.7594735622406006, "step": 2563 }, { "epoch": 0.5907834101382489, "grad_norm": 0.8117866090414669, "learning_rate": 1.6886907347497687e-06, "loss": 0.8161605000495911, "step": 2564 }, { "epoch": 0.5910138248847926, "grad_norm": 0.8557086150703954, "learning_rate": 1.6884144504613023e-06, "loss": 0.9390331506729126, "step": 2565 }, { "epoch": 0.5912442396313364, "grad_norm": 0.9387748138594502, "learning_rate": 1.68813806625039e-06, "loss": 0.8895832300186157, "step": 2566 }, { "epoch": 0.5914746543778802, "grad_norm": 0.8802161511936953, "learning_rate": 1.687861582157148e-06, "loss": 0.7779919505119324, "step": 2567 }, { "epoch": 0.591705069124424, "grad_norm": 1.139110447936057, "learning_rate": 1.687584998221708e-06, "loss": 0.8974252343177795, "step": 2568 }, { "epoch": 0.5919354838709677, "grad_norm": 0.8073269492940187, "learning_rate": 1.687308314484216e-06, "loss": 0.8487393856048584, "step": 2569 }, { "epoch": 0.5921658986175116, "grad_norm": 0.8310515688854938, "learning_rate": 1.6870315309848318e-06, "loss": 0.8356295824050903, "step": 2570 }, { "epoch": 0.5923963133640553, "grad_norm": 0.9033360313158958, "learning_rate": 1.6867546477637307e-06, "loss": 0.8180248737335205, "step": 2571 }, { "epoch": 0.5926267281105991, "grad_norm": 0.6950974205275126, "learning_rate": 1.6864776648611013e-06, "loss": 0.8456830978393555, "step": 2572 }, { "epoch": 0.5928571428571429, "grad_norm": 0.9039181033590447, "learning_rate": 1.6862005823171476e-06, "loss": 0.8378905057907104, "step": 2573 }, { "epoch": 0.5930875576036866, "grad_norm": 0.835432630485808, "learning_rate": 1.685923400172088e-06, "loss": 0.8060408234596252, "step": 2574 }, { "epoch": 0.5933179723502304, "grad_norm": 0.8354491785263655, "learning_rate": 1.685646118466155e-06, "loss": 0.7550709247589111, "step": 2575 }, { "epoch": 0.5935483870967742, "grad_norm": 0.805260271869055, "learning_rate": 1.6853687372395955e-06, "loss": 0.8475208282470703, "step": 2576 }, { "epoch": 0.593778801843318, "grad_norm": 1.0626255995304192, "learning_rate": 1.6850912565326709e-06, "loss": 0.8681533336639404, "step": 2577 }, { "epoch": 0.5940092165898617, "grad_norm": 0.9000714044087056, "learning_rate": 1.6848136763856573e-06, "loss": 0.7756578922271729, "step": 2578 }, { "epoch": 0.5942396313364056, "grad_norm": 1.1163759985623336, "learning_rate": 1.6845359968388456e-06, "loss": 0.8910564184188843, "step": 2579 }, { "epoch": 0.5944700460829493, "grad_norm": 0.7484768523036672, "learning_rate": 1.6842582179325397e-06, "loss": 0.7293382883071899, "step": 2580 }, { "epoch": 0.5947004608294931, "grad_norm": 0.8208214849988605, "learning_rate": 1.6839803397070597e-06, "loss": 0.8497427105903625, "step": 2581 }, { "epoch": 0.5949308755760369, "grad_norm": 0.9124854441462121, "learning_rate": 1.6837023622027386e-06, "loss": 0.800891637802124, "step": 2582 }, { "epoch": 0.5951612903225807, "grad_norm": 0.8887114325795745, "learning_rate": 1.683424285459925e-06, "loss": 0.889703631401062, "step": 2583 }, { "epoch": 0.5953917050691244, "grad_norm": 0.83139201735135, "learning_rate": 1.6831461095189808e-06, "loss": 0.7500913143157959, "step": 2584 }, { "epoch": 0.5956221198156681, "grad_norm": 0.8260167845821169, "learning_rate": 1.6828678344202834e-06, "loss": 0.8575263023376465, "step": 2585 }, { "epoch": 0.595852534562212, "grad_norm": 0.8796083393133354, "learning_rate": 1.6825894602042238e-06, "loss": 0.7754372358322144, "step": 2586 }, { "epoch": 0.5960829493087557, "grad_norm": 1.0529816523070568, "learning_rate": 1.6823109869112074e-06, "loss": 0.8861502408981323, "step": 2587 }, { "epoch": 0.5963133640552996, "grad_norm": 0.7738036894554111, "learning_rate": 1.6820324145816548e-06, "loss": 0.725920557975769, "step": 2588 }, { "epoch": 0.5965437788018433, "grad_norm": 0.7887605961214393, "learning_rate": 1.6817537432559998e-06, "loss": 0.6195499897003174, "step": 2589 }, { "epoch": 0.5967741935483871, "grad_norm": 0.8405918169035362, "learning_rate": 1.6814749729746918e-06, "loss": 0.8757472038269043, "step": 2590 }, { "epoch": 0.5970046082949308, "grad_norm": 0.8710168774832879, "learning_rate": 1.6811961037781934e-06, "loss": 0.8024059534072876, "step": 2591 }, { "epoch": 0.5972350230414747, "grad_norm": 1.1763814328442668, "learning_rate": 1.6809171357069825e-06, "loss": 0.8397082090377808, "step": 2592 }, { "epoch": 0.5974654377880184, "grad_norm": 0.8163820389720032, "learning_rate": 1.6806380688015507e-06, "loss": 0.7693872451782227, "step": 2593 }, { "epoch": 0.5976958525345623, "grad_norm": 0.7668441612993817, "learning_rate": 1.6803589031024043e-06, "loss": 0.7918043732643127, "step": 2594 }, { "epoch": 0.597926267281106, "grad_norm": 0.7951277033960863, "learning_rate": 1.680079638650064e-06, "loss": 0.8046969175338745, "step": 2595 }, { "epoch": 0.5981566820276498, "grad_norm": 0.9724191958452253, "learning_rate": 1.6798002754850643e-06, "loss": 0.7889789938926697, "step": 2596 }, { "epoch": 0.5983870967741935, "grad_norm": 0.8356070849986357, "learning_rate": 1.6795208136479543e-06, "loss": 0.874780535697937, "step": 2597 }, { "epoch": 0.5986175115207373, "grad_norm": 0.8380940855873632, "learning_rate": 1.679241253179298e-06, "loss": 0.8728631734848022, "step": 2598 }, { "epoch": 0.5988479262672811, "grad_norm": 0.7909132896338992, "learning_rate": 1.678961594119673e-06, "loss": 0.5940345525741577, "step": 2599 }, { "epoch": 0.5990783410138248, "grad_norm": 0.7873638428289793, "learning_rate": 1.6786818365096712e-06, "loss": 0.8524528741836548, "step": 2600 }, { "epoch": 0.5993087557603687, "grad_norm": 1.2099119623298256, "learning_rate": 1.6784019803899e-06, "loss": 1.0738554000854492, "step": 2601 }, { "epoch": 0.5995391705069124, "grad_norm": 0.9987206599474828, "learning_rate": 1.6781220258009787e-06, "loss": 0.9146362543106079, "step": 2602 }, { "epoch": 0.5997695852534562, "grad_norm": 0.9546196333490053, "learning_rate": 1.6778419727835434e-06, "loss": 0.8846019506454468, "step": 2603 }, { "epoch": 0.6, "grad_norm": 1.0356705992849526, "learning_rate": 1.6775618213782427e-06, "loss": 0.9564694166183472, "step": 2604 }, { "epoch": 0.6002304147465438, "grad_norm": 0.8649265876220377, "learning_rate": 1.6772815716257411e-06, "loss": 0.7311475276947021, "step": 2605 }, { "epoch": 0.6004608294930875, "grad_norm": 0.9996641063184493, "learning_rate": 1.6770012235667157e-06, "loss": 0.8198719024658203, "step": 2606 }, { "epoch": 0.6006912442396314, "grad_norm": 0.8625199282325245, "learning_rate": 1.676720777241859e-06, "loss": 0.7667897939682007, "step": 2607 }, { "epoch": 0.6009216589861751, "grad_norm": 0.8068998344787891, "learning_rate": 1.6764402326918775e-06, "loss": 0.8438166379928589, "step": 2608 }, { "epoch": 0.6011520737327188, "grad_norm": 0.8540979807575545, "learning_rate": 1.6761595899574913e-06, "loss": 0.801039457321167, "step": 2609 }, { "epoch": 0.6013824884792627, "grad_norm": 0.8234203241271092, "learning_rate": 1.6758788490794362e-06, "loss": 0.8063384294509888, "step": 2610 }, { "epoch": 0.6016129032258064, "grad_norm": 0.6526013686548677, "learning_rate": 1.6755980100984609e-06, "loss": 0.7574378848075867, "step": 2611 }, { "epoch": 0.6018433179723502, "grad_norm": 0.9515660687698646, "learning_rate": 1.6753170730553285e-06, "loss": 0.7640282511711121, "step": 2612 }, { "epoch": 0.602073732718894, "grad_norm": 0.8028588885811085, "learning_rate": 1.675036037990817e-06, "loss": 0.8366582989692688, "step": 2613 }, { "epoch": 0.6023041474654378, "grad_norm": 0.9790278189412774, "learning_rate": 1.6747549049457184e-06, "loss": 0.851488471031189, "step": 2614 }, { "epoch": 0.6025345622119815, "grad_norm": 0.8888933014827352, "learning_rate": 1.6744736739608385e-06, "loss": 0.6821870803833008, "step": 2615 }, { "epoch": 0.6027649769585254, "grad_norm": 0.9884428615602953, "learning_rate": 1.6741923450769977e-06, "loss": 0.9263452887535095, "step": 2616 }, { "epoch": 0.6029953917050691, "grad_norm": 0.7660541738576696, "learning_rate": 1.6739109183350303e-06, "loss": 0.7471155524253845, "step": 2617 }, { "epoch": 0.603225806451613, "grad_norm": 0.8463548916487829, "learning_rate": 1.6736293937757858e-06, "loss": 0.8859940767288208, "step": 2618 }, { "epoch": 0.6034562211981567, "grad_norm": 0.7725702923302962, "learning_rate": 1.673347771440126e-06, "loss": 0.8078656792640686, "step": 2619 }, { "epoch": 0.6036866359447005, "grad_norm": 0.8796637852565455, "learning_rate": 1.673066051368929e-06, "loss": 0.7663185596466064, "step": 2620 }, { "epoch": 0.6039170506912442, "grad_norm": 0.7762146466532337, "learning_rate": 1.6727842336030855e-06, "loss": 0.7924770712852478, "step": 2621 }, { "epoch": 0.604147465437788, "grad_norm": 0.6362525346897695, "learning_rate": 1.672502318183501e-06, "loss": 0.7781439423561096, "step": 2622 }, { "epoch": 0.6043778801843318, "grad_norm": 0.7824821748809755, "learning_rate": 1.6722203051510953e-06, "loss": 0.9342260360717773, "step": 2623 }, { "epoch": 0.6046082949308755, "grad_norm": 0.9113412146225311, "learning_rate": 1.6719381945468024e-06, "loss": 0.8589230179786682, "step": 2624 }, { "epoch": 0.6048387096774194, "grad_norm": 0.9092021688294594, "learning_rate": 1.67165598641157e-06, "loss": 0.8692198991775513, "step": 2625 }, { "epoch": 0.6050691244239631, "grad_norm": 0.9811252814075038, "learning_rate": 1.6713736807863606e-06, "loss": 0.9220771789550781, "step": 2626 }, { "epoch": 0.6052995391705069, "grad_norm": 0.7869789442575379, "learning_rate": 1.6710912777121497e-06, "loss": 0.670639157295227, "step": 2627 }, { "epoch": 0.6055299539170507, "grad_norm": 0.8458627233906328, "learning_rate": 1.6708087772299287e-06, "loss": 0.780914306640625, "step": 2628 }, { "epoch": 0.6057603686635945, "grad_norm": 0.7718782555310939, "learning_rate": 1.6705261793807014e-06, "loss": 0.836430549621582, "step": 2629 }, { "epoch": 0.6059907834101382, "grad_norm": 0.8965474432723056, "learning_rate": 1.670243484205487e-06, "loss": 0.84266197681427, "step": 2630 }, { "epoch": 0.6062211981566821, "grad_norm": 0.8992013517980091, "learning_rate": 1.6699606917453184e-06, "loss": 0.9276752471923828, "step": 2631 }, { "epoch": 0.6064516129032258, "grad_norm": 0.8740634897243095, "learning_rate": 1.6696778020412418e-06, "loss": 0.8319100141525269, "step": 2632 }, { "epoch": 0.6066820276497696, "grad_norm": 0.9778851785690291, "learning_rate": 1.669394815134319e-06, "loss": 0.7511987686157227, "step": 2633 }, { "epoch": 0.6069124423963134, "grad_norm": 0.9559089829828732, "learning_rate": 1.6691117310656249e-06, "loss": 0.7847566604614258, "step": 2634 }, { "epoch": 0.6071428571428571, "grad_norm": 0.7352732117136743, "learning_rate": 1.668828549876249e-06, "loss": 0.8598428964614868, "step": 2635 }, { "epoch": 0.6073732718894009, "grad_norm": 0.9632462301651329, "learning_rate": 1.6685452716072942e-06, "loss": 0.8676267266273499, "step": 2636 }, { "epoch": 0.6076036866359447, "grad_norm": 0.9796050613045469, "learning_rate": 1.6682618962998787e-06, "loss": 0.8139858841896057, "step": 2637 }, { "epoch": 0.6078341013824885, "grad_norm": 0.9214980939594923, "learning_rate": 1.6679784239951334e-06, "loss": 0.878848671913147, "step": 2638 }, { "epoch": 0.6080645161290322, "grad_norm": 0.8942413316087445, "learning_rate": 1.6676948547342038e-06, "loss": 0.7094229459762573, "step": 2639 }, { "epoch": 0.6082949308755761, "grad_norm": 0.7183954232108332, "learning_rate": 1.6674111885582502e-06, "loss": 0.7908186912536621, "step": 2640 }, { "epoch": 0.6085253456221198, "grad_norm": 0.705517985038791, "learning_rate": 1.6671274255084465e-06, "loss": 0.7205992341041565, "step": 2641 }, { "epoch": 0.6087557603686636, "grad_norm": 0.937951031991606, "learning_rate": 1.6668435656259796e-06, "loss": 0.8098955750465393, "step": 2642 }, { "epoch": 0.6089861751152074, "grad_norm": 0.8047793122116887, "learning_rate": 1.6665596089520522e-06, "loss": 0.9344205856323242, "step": 2643 }, { "epoch": 0.6092165898617512, "grad_norm": 0.73132257965357, "learning_rate": 1.6662755555278798e-06, "loss": 0.6149121522903442, "step": 2644 }, { "epoch": 0.6094470046082949, "grad_norm": 1.1550816011183633, "learning_rate": 1.6659914053946929e-06, "loss": 0.790631115436554, "step": 2645 }, { "epoch": 0.6096774193548387, "grad_norm": 0.9832349740984434, "learning_rate": 1.6657071585937349e-06, "loss": 0.7789372801780701, "step": 2646 }, { "epoch": 0.6099078341013825, "grad_norm": 0.7425679816784971, "learning_rate": 1.6654228151662641e-06, "loss": 0.9119753837585449, "step": 2647 }, { "epoch": 0.6101382488479262, "grad_norm": 1.0635804319271085, "learning_rate": 1.6651383751535526e-06, "loss": 0.827568769454956, "step": 2648 }, { "epoch": 0.6103686635944701, "grad_norm": 0.9620609244203838, "learning_rate": 1.6648538385968865e-06, "loss": 0.8862377405166626, "step": 2649 }, { "epoch": 0.6105990783410138, "grad_norm": 0.7954209003880245, "learning_rate": 1.6645692055375658e-06, "loss": 0.7765665054321289, "step": 2650 }, { "epoch": 0.6108294930875576, "grad_norm": 0.7698374340240739, "learning_rate": 1.6642844760169048e-06, "loss": 0.7673745155334473, "step": 2651 }, { "epoch": 0.6110599078341014, "grad_norm": 1.051257553540871, "learning_rate": 1.6639996500762313e-06, "loss": 0.8539090752601624, "step": 2652 }, { "epoch": 0.6112903225806452, "grad_norm": 0.8676017636407886, "learning_rate": 1.663714727756888e-06, "loss": 0.9146299362182617, "step": 2653 }, { "epoch": 0.6115207373271889, "grad_norm": 0.9802646170879412, "learning_rate": 1.6634297091002304e-06, "loss": 0.6720675230026245, "step": 2654 }, { "epoch": 0.6117511520737328, "grad_norm": 0.9963804792413621, "learning_rate": 1.6631445941476287e-06, "loss": 0.876419186592102, "step": 2655 }, { "epoch": 0.6119815668202765, "grad_norm": 0.8251901500966289, "learning_rate": 1.6628593829404673e-06, "loss": 0.781826376914978, "step": 2656 }, { "epoch": 0.6122119815668203, "grad_norm": 1.0156308960299383, "learning_rate": 1.662574075520144e-06, "loss": 0.8700725436210632, "step": 2657 }, { "epoch": 0.6124423963133641, "grad_norm": 0.8730333366815507, "learning_rate": 1.6622886719280703e-06, "loss": 0.7927212715148926, "step": 2658 }, { "epoch": 0.6126728110599078, "grad_norm": 0.9472958125063492, "learning_rate": 1.6620031722056732e-06, "loss": 0.8402982354164124, "step": 2659 }, { "epoch": 0.6129032258064516, "grad_norm": 0.9246784332742947, "learning_rate": 1.6617175763943916e-06, "loss": 0.844031572341919, "step": 2660 }, { "epoch": 0.6131336405529954, "grad_norm": 1.1749754124811849, "learning_rate": 1.66143188453568e-06, "loss": 0.7927590608596802, "step": 2661 }, { "epoch": 0.6133640552995392, "grad_norm": 0.7562363270320578, "learning_rate": 1.6611460966710057e-06, "loss": 0.6881238222122192, "step": 2662 }, { "epoch": 0.6135944700460829, "grad_norm": 0.7503304726479316, "learning_rate": 1.6608602128418512e-06, "loss": 0.8782250881195068, "step": 2663 }, { "epoch": 0.6138248847926268, "grad_norm": 0.764429872232153, "learning_rate": 1.6605742330897112e-06, "loss": 0.810072124004364, "step": 2664 }, { "epoch": 0.6140552995391705, "grad_norm": 0.7959070796498304, "learning_rate": 1.660288157456096e-06, "loss": 0.9278649091720581, "step": 2665 }, { "epoch": 0.6142857142857143, "grad_norm": 0.8518702716538695, "learning_rate": 1.6600019859825287e-06, "loss": 0.7821990251541138, "step": 2666 }, { "epoch": 0.614516129032258, "grad_norm": 0.8000150810917545, "learning_rate": 1.6597157187105474e-06, "loss": 0.7945138216018677, "step": 2667 }, { "epoch": 0.6147465437788019, "grad_norm": 0.9158855636867193, "learning_rate": 1.659429355681702e-06, "loss": 0.7796168327331543, "step": 2668 }, { "epoch": 0.6149769585253456, "grad_norm": 0.8778480996767207, "learning_rate": 1.659142896937559e-06, "loss": 0.8412867784500122, "step": 2669 }, { "epoch": 0.6152073732718893, "grad_norm": 0.8776586025383009, "learning_rate": 1.6588563425196976e-06, "loss": 0.8507891893386841, "step": 2670 }, { "epoch": 0.6154377880184332, "grad_norm": 0.7470530836348557, "learning_rate": 1.6585696924697097e-06, "loss": 0.7538737654685974, "step": 2671 }, { "epoch": 0.6156682027649769, "grad_norm": 0.7938343055651664, "learning_rate": 1.6582829468292027e-06, "loss": 0.7241994142532349, "step": 2672 }, { "epoch": 0.6158986175115208, "grad_norm": 0.7740707689038899, "learning_rate": 1.6579961056397979e-06, "loss": 0.8282276391983032, "step": 2673 }, { "epoch": 0.6161290322580645, "grad_norm": 0.9834275785675608, "learning_rate": 1.657709168943129e-06, "loss": 0.7823094725608826, "step": 2674 }, { "epoch": 0.6163594470046083, "grad_norm": 0.7814560466718257, "learning_rate": 1.6574221367808452e-06, "loss": 0.7682117819786072, "step": 2675 }, { "epoch": 0.616589861751152, "grad_norm": 0.791790817396352, "learning_rate": 1.6571350091946084e-06, "loss": 0.7483188509941101, "step": 2676 }, { "epoch": 0.6168202764976959, "grad_norm": 0.7904062559480196, "learning_rate": 1.656847786226095e-06, "loss": 0.8244579434394836, "step": 2677 }, { "epoch": 0.6170506912442396, "grad_norm": 0.935192090002093, "learning_rate": 1.6565604679169951e-06, "loss": 0.9741685390472412, "step": 2678 }, { "epoch": 0.6172811059907835, "grad_norm": 1.2715516239943523, "learning_rate": 1.6562730543090122e-06, "loss": 1.0004706382751465, "step": 2679 }, { "epoch": 0.6175115207373272, "grad_norm": 0.7382412100690486, "learning_rate": 1.6559855454438644e-06, "loss": 0.6897011399269104, "step": 2680 }, { "epoch": 0.617741935483871, "grad_norm": 0.6330897297720288, "learning_rate": 1.6556979413632833e-06, "loss": 0.7250478267669678, "step": 2681 }, { "epoch": 0.6179723502304147, "grad_norm": 0.9717515360338855, "learning_rate": 1.6554102421090137e-06, "loss": 0.850714385509491, "step": 2682 }, { "epoch": 0.6182027649769585, "grad_norm": 0.917367886199939, "learning_rate": 1.6551224477228152e-06, "loss": 0.8389794230461121, "step": 2683 }, { "epoch": 0.6184331797235023, "grad_norm": 0.8244704754842406, "learning_rate": 1.6548345582464608e-06, "loss": 0.8004277944564819, "step": 2684 }, { "epoch": 0.618663594470046, "grad_norm": 0.9438052955461359, "learning_rate": 1.654546573721737e-06, "loss": 0.8439298868179321, "step": 2685 }, { "epoch": 0.6188940092165899, "grad_norm": 0.9506767899718855, "learning_rate": 1.6542584941904448e-06, "loss": 0.7715939283370972, "step": 2686 }, { "epoch": 0.6191244239631336, "grad_norm": 0.7277066195828455, "learning_rate": 1.6539703196943982e-06, "loss": 0.8521275520324707, "step": 2687 }, { "epoch": 0.6193548387096774, "grad_norm": 0.9502964788805838, "learning_rate": 1.6536820502754249e-06, "loss": 0.8773370981216431, "step": 2688 }, { "epoch": 0.6195852534562212, "grad_norm": 0.8896877670997408, "learning_rate": 1.653393685975368e-06, "loss": 0.7613356113433838, "step": 2689 }, { "epoch": 0.619815668202765, "grad_norm": 0.7872525626089157, "learning_rate": 1.6531052268360823e-06, "loss": 0.7534692287445068, "step": 2690 }, { "epoch": 0.6200460829493087, "grad_norm": 0.8888603991720845, "learning_rate": 1.652816672899438e-06, "loss": 0.861242413520813, "step": 2691 }, { "epoch": 0.6202764976958526, "grad_norm": 1.0955455640383855, "learning_rate": 1.652528024207317e-06, "loss": 0.9778954982757568, "step": 2692 }, { "epoch": 0.6205069124423963, "grad_norm": 0.8389124431813023, "learning_rate": 1.6522392808016176e-06, "loss": 0.7874879240989685, "step": 2693 }, { "epoch": 0.6207373271889401, "grad_norm": 1.038077147354541, "learning_rate": 1.6519504427242503e-06, "loss": 0.8306739330291748, "step": 2694 }, { "epoch": 0.6209677419354839, "grad_norm": 0.890554970207788, "learning_rate": 1.651661510017139e-06, "loss": 0.7617331743240356, "step": 2695 }, { "epoch": 0.6211981566820276, "grad_norm": 0.8325839299854928, "learning_rate": 1.6513724827222223e-06, "loss": 0.8912776708602905, "step": 2696 }, { "epoch": 0.6214285714285714, "grad_norm": 0.9626202232237234, "learning_rate": 1.6510833608814519e-06, "loss": 0.832025945186615, "step": 2697 }, { "epoch": 0.6216589861751152, "grad_norm": 0.8573045739455887, "learning_rate": 1.6507941445367934e-06, "loss": 0.7391358613967896, "step": 2698 }, { "epoch": 0.621889400921659, "grad_norm": 0.8417803604945624, "learning_rate": 1.6505048337302267e-06, "loss": 0.7968891263008118, "step": 2699 }, { "epoch": 0.6221198156682027, "grad_norm": 0.7943584636642551, "learning_rate": 1.6502154285037446e-06, "loss": 0.8268226981163025, "step": 2700 }, { "epoch": 0.6223502304147466, "grad_norm": 0.8943748659016423, "learning_rate": 1.6499259288993536e-06, "loss": 0.8727509379386902, "step": 2701 }, { "epoch": 0.6225806451612903, "grad_norm": 0.9781149876582625, "learning_rate": 1.6496363349590746e-06, "loss": 0.8419584035873413, "step": 2702 }, { "epoch": 0.6228110599078341, "grad_norm": 0.9222004845701074, "learning_rate": 1.6493466467249415e-06, "loss": 0.7753620743751526, "step": 2703 }, { "epoch": 0.6230414746543779, "grad_norm": 0.8188505837862442, "learning_rate": 1.6490568642390022e-06, "loss": 0.7735302448272705, "step": 2704 }, { "epoch": 0.6232718894009217, "grad_norm": 0.892742684163995, "learning_rate": 1.6487669875433183e-06, "loss": 0.8730747699737549, "step": 2705 }, { "epoch": 0.6235023041474654, "grad_norm": 1.081206789540213, "learning_rate": 1.648477016679965e-06, "loss": 1.026259183883667, "step": 2706 }, { "epoch": 0.6237327188940092, "grad_norm": 1.1700615414540931, "learning_rate": 1.6481869516910314e-06, "loss": 1.0710067749023438, "step": 2707 }, { "epoch": 0.623963133640553, "grad_norm": 0.8750649396873535, "learning_rate": 1.6478967926186196e-06, "loss": 0.8451842069625854, "step": 2708 }, { "epoch": 0.6241935483870967, "grad_norm": 1.0025312740636694, "learning_rate": 1.6476065395048463e-06, "loss": 0.8114550113677979, "step": 2709 }, { "epoch": 0.6244239631336406, "grad_norm": 0.9543936745980088, "learning_rate": 1.6473161923918408e-06, "loss": 0.9158897399902344, "step": 2710 }, { "epoch": 0.6246543778801843, "grad_norm": 0.9073320322912862, "learning_rate": 1.6470257513217471e-06, "loss": 0.8455985188484192, "step": 2711 }, { "epoch": 0.6248847926267281, "grad_norm": 0.9409835862192949, "learning_rate": 1.6467352163367224e-06, "loss": 0.7869806885719299, "step": 2712 }, { "epoch": 0.6251152073732719, "grad_norm": 0.9720046165998673, "learning_rate": 1.6464445874789369e-06, "loss": 0.7813467979431152, "step": 2713 }, { "epoch": 0.6253456221198157, "grad_norm": 0.9253768349404401, "learning_rate": 1.646153864790575e-06, "loss": 0.7607834339141846, "step": 2714 }, { "epoch": 0.6255760368663594, "grad_norm": 0.7655542834849622, "learning_rate": 1.6458630483138354e-06, "loss": 0.6316394209861755, "step": 2715 }, { "epoch": 0.6258064516129033, "grad_norm": 1.0037920503955002, "learning_rate": 1.6455721380909293e-06, "loss": 0.8613089323043823, "step": 2716 }, { "epoch": 0.626036866359447, "grad_norm": 0.900314234710346, "learning_rate": 1.6452811341640823e-06, "loss": 0.8521597385406494, "step": 2717 }, { "epoch": 0.6262672811059908, "grad_norm": 0.863334614503053, "learning_rate": 1.6449900365755322e-06, "loss": 0.7649816870689392, "step": 2718 }, { "epoch": 0.6264976958525346, "grad_norm": 0.7921235061169694, "learning_rate": 1.6446988453675327e-06, "loss": 0.669215738773346, "step": 2719 }, { "epoch": 0.6267281105990783, "grad_norm": 1.0085146323707468, "learning_rate": 1.6444075605823491e-06, "loss": 0.7795897722244263, "step": 2720 }, { "epoch": 0.6269585253456221, "grad_norm": 1.0985096718321175, "learning_rate": 1.6441161822622612e-06, "loss": 0.9773029088973999, "step": 2721 }, { "epoch": 0.6271889400921659, "grad_norm": 0.88062279724108, "learning_rate": 1.6438247104495622e-06, "loss": 0.8313496112823486, "step": 2722 }, { "epoch": 0.6274193548387097, "grad_norm": 0.8741823244787398, "learning_rate": 1.6435331451865589e-06, "loss": 0.822803258895874, "step": 2723 }, { "epoch": 0.6276497695852534, "grad_norm": 1.1191623839144935, "learning_rate": 1.643241486515571e-06, "loss": 0.8933405876159668, "step": 2724 }, { "epoch": 0.6278801843317973, "grad_norm": 0.8721873626078817, "learning_rate": 1.6429497344789334e-06, "loss": 0.865382194519043, "step": 2725 }, { "epoch": 0.628110599078341, "grad_norm": 0.6623424743433429, "learning_rate": 1.6426578891189929e-06, "loss": 0.5955609679222107, "step": 2726 }, { "epoch": 0.6283410138248848, "grad_norm": 0.9379654908769754, "learning_rate": 1.6423659504781102e-06, "loss": 0.7832648754119873, "step": 2727 }, { "epoch": 0.6285714285714286, "grad_norm": 0.9904172136436726, "learning_rate": 1.6420739185986606e-06, "loss": 0.8939651250839233, "step": 2728 }, { "epoch": 0.6288018433179724, "grad_norm": 0.8754504203733118, "learning_rate": 1.6417817935230316e-06, "loss": 0.7950553894042969, "step": 2729 }, { "epoch": 0.6290322580645161, "grad_norm": 0.7473547756110924, "learning_rate": 1.6414895752936247e-06, "loss": 0.7011410593986511, "step": 2730 }, { "epoch": 0.6292626728110599, "grad_norm": 0.8298073820867625, "learning_rate": 1.6411972639528553e-06, "loss": 0.8745814561843872, "step": 2731 }, { "epoch": 0.6294930875576037, "grad_norm": 0.9643129286331958, "learning_rate": 1.640904859543152e-06, "loss": 0.9487906694412231, "step": 2732 }, { "epoch": 0.6297235023041474, "grad_norm": 1.0003996457820634, "learning_rate": 1.6406123621069565e-06, "loss": 0.8493598103523254, "step": 2733 }, { "epoch": 0.6299539170506913, "grad_norm": 0.7043952970778223, "learning_rate": 1.640319771686725e-06, "loss": 0.8176105618476868, "step": 2734 }, { "epoch": 0.630184331797235, "grad_norm": 1.1365398207749948, "learning_rate": 1.640027088324926e-06, "loss": 0.8331952691078186, "step": 2735 }, { "epoch": 0.6304147465437788, "grad_norm": 0.9152153352251905, "learning_rate": 1.6397343120640428e-06, "loss": 0.7507727146148682, "step": 2736 }, { "epoch": 0.6306451612903226, "grad_norm": 0.8498087936716523, "learning_rate": 1.6394414429465707e-06, "loss": 0.7681083679199219, "step": 2737 }, { "epoch": 0.6308755760368664, "grad_norm": 1.0207970870125542, "learning_rate": 1.6391484810150197e-06, "loss": 0.86592036485672, "step": 2738 }, { "epoch": 0.6311059907834101, "grad_norm": 0.7893726077346048, "learning_rate": 1.6388554263119133e-06, "loss": 0.6561422348022461, "step": 2739 }, { "epoch": 0.631336405529954, "grad_norm": 0.8691518888981297, "learning_rate": 1.6385622788797871e-06, "loss": 1.0149214267730713, "step": 2740 }, { "epoch": 0.6315668202764977, "grad_norm": 3.1459869291369578, "learning_rate": 1.6382690387611912e-06, "loss": 0.8542313575744629, "step": 2741 }, { "epoch": 0.6317972350230415, "grad_norm": 0.8459688860048273, "learning_rate": 1.6379757059986898e-06, "loss": 0.8561190366744995, "step": 2742 }, { "epoch": 0.6320276497695853, "grad_norm": 0.8945733601522768, "learning_rate": 1.6376822806348591e-06, "loss": 0.7487457990646362, "step": 2743 }, { "epoch": 0.632258064516129, "grad_norm": 0.7710656021686645, "learning_rate": 1.6373887627122894e-06, "loss": 0.6169087886810303, "step": 2744 }, { "epoch": 0.6324884792626728, "grad_norm": 0.9363459151732765, "learning_rate": 1.6370951522735848e-06, "loss": 0.8384301662445068, "step": 2745 }, { "epoch": 0.6327188940092165, "grad_norm": 0.8816116065345285, "learning_rate": 1.636801449361362e-06, "loss": 0.8009958267211914, "step": 2746 }, { "epoch": 0.6329493087557604, "grad_norm": 0.7782605199549586, "learning_rate": 1.6365076540182518e-06, "loss": 0.7277840375900269, "step": 2747 }, { "epoch": 0.6331797235023041, "grad_norm": 0.8629211607674182, "learning_rate": 1.6362137662868988e-06, "loss": 0.7994974255561829, "step": 2748 }, { "epoch": 0.633410138248848, "grad_norm": 0.9972871876044257, "learning_rate": 1.6359197862099592e-06, "loss": 0.9940546751022339, "step": 2749 }, { "epoch": 0.6336405529953917, "grad_norm": 0.7083636808435892, "learning_rate": 1.6356257138301048e-06, "loss": 0.776983916759491, "step": 2750 }, { "epoch": 0.6338709677419355, "grad_norm": 1.0813287689618403, "learning_rate": 1.6353315491900194e-06, "loss": 0.8218704462051392, "step": 2751 }, { "epoch": 0.6341013824884792, "grad_norm": 0.9285197745822434, "learning_rate": 1.635037292332401e-06, "loss": 0.8437784910202026, "step": 2752 }, { "epoch": 0.6343317972350231, "grad_norm": 0.7951039096878332, "learning_rate": 1.63474294329996e-06, "loss": 0.7774004340171814, "step": 2753 }, { "epoch": 0.6345622119815668, "grad_norm": 0.7998446978982631, "learning_rate": 1.634448502135421e-06, "loss": 0.8480523824691772, "step": 2754 }, { "epoch": 0.6347926267281107, "grad_norm": 0.8710356721404071, "learning_rate": 1.634153968881522e-06, "loss": 0.838944673538208, "step": 2755 }, { "epoch": 0.6350230414746544, "grad_norm": 0.9609360504840417, "learning_rate": 1.633859343581014e-06, "loss": 0.7989159822463989, "step": 2756 }, { "epoch": 0.6352534562211981, "grad_norm": 0.8906618388597183, "learning_rate": 1.6335646262766612e-06, "loss": 0.8122522234916687, "step": 2757 }, { "epoch": 0.635483870967742, "grad_norm": 1.0306905026592958, "learning_rate": 1.6332698170112418e-06, "loss": 0.7472352981567383, "step": 2758 }, { "epoch": 0.6357142857142857, "grad_norm": 0.7470082329854858, "learning_rate": 1.6329749158275466e-06, "loss": 0.7160866260528564, "step": 2759 }, { "epoch": 0.6359447004608295, "grad_norm": 0.9276359862380839, "learning_rate": 1.6326799227683803e-06, "loss": 0.850339412689209, "step": 2760 }, { "epoch": 0.6361751152073732, "grad_norm": 0.8334408182150722, "learning_rate": 1.632384837876561e-06, "loss": 0.7683566808700562, "step": 2761 }, { "epoch": 0.6364055299539171, "grad_norm": 1.0070287688728312, "learning_rate": 1.6320896611949197e-06, "loss": 0.820326030254364, "step": 2762 }, { "epoch": 0.6366359447004608, "grad_norm": 0.9088399606663712, "learning_rate": 1.6317943927663005e-06, "loss": 0.9319206476211548, "step": 2763 }, { "epoch": 0.6368663594470046, "grad_norm": 0.854101738795234, "learning_rate": 1.6314990326335619e-06, "loss": 0.8473616242408752, "step": 2764 }, { "epoch": 0.6370967741935484, "grad_norm": 0.9083270544798837, "learning_rate": 1.6312035808395746e-06, "loss": 0.7515239715576172, "step": 2765 }, { "epoch": 0.6373271889400922, "grad_norm": 0.9691327918436982, "learning_rate": 1.630908037427223e-06, "loss": 0.8780150413513184, "step": 2766 }, { "epoch": 0.6375576036866359, "grad_norm": 0.8183908015853972, "learning_rate": 1.6306124024394051e-06, "loss": 0.7502909898757935, "step": 2767 }, { "epoch": 0.6377880184331797, "grad_norm": 1.0244030314506845, "learning_rate": 1.630316675919032e-06, "loss": 0.8440920114517212, "step": 2768 }, { "epoch": 0.6380184331797235, "grad_norm": 0.9479398820781787, "learning_rate": 1.6300208579090275e-06, "loss": 0.7769831418991089, "step": 2769 }, { "epoch": 0.6382488479262672, "grad_norm": 0.7616107153752498, "learning_rate": 1.6297249484523297e-06, "loss": 0.6217764616012573, "step": 2770 }, { "epoch": 0.6384792626728111, "grad_norm": 0.7961962297717475, "learning_rate": 1.6294289475918891e-06, "loss": 0.8726013898849487, "step": 2771 }, { "epoch": 0.6387096774193548, "grad_norm": 0.9993347618775529, "learning_rate": 1.6291328553706702e-06, "loss": 0.9624546766281128, "step": 2772 }, { "epoch": 0.6389400921658986, "grad_norm": 0.9073330627878557, "learning_rate": 1.62883667183165e-06, "loss": 0.733322024345398, "step": 2773 }, { "epoch": 0.6391705069124424, "grad_norm": 0.828990327728417, "learning_rate": 1.6285403970178197e-06, "loss": 0.7944040298461914, "step": 2774 }, { "epoch": 0.6394009216589862, "grad_norm": 0.945508092850191, "learning_rate": 1.6282440309721825e-06, "loss": 0.8006964921951294, "step": 2775 }, { "epoch": 0.6396313364055299, "grad_norm": 0.8235251563991838, "learning_rate": 1.6279475737377562e-06, "loss": 0.8226393461227417, "step": 2776 }, { "epoch": 0.6398617511520738, "grad_norm": 0.9205648176506509, "learning_rate": 1.6276510253575707e-06, "loss": 0.8216049671173096, "step": 2777 }, { "epoch": 0.6400921658986175, "grad_norm": 1.2879339929003093, "learning_rate": 1.6273543858746698e-06, "loss": 0.9556760191917419, "step": 2778 }, { "epoch": 0.6403225806451613, "grad_norm": 1.226309717633737, "learning_rate": 1.6270576553321103e-06, "loss": 0.9736160039901733, "step": 2779 }, { "epoch": 0.6405529953917051, "grad_norm": 0.7107959971647043, "learning_rate": 1.6267608337729622e-06, "loss": 0.6930527687072754, "step": 2780 }, { "epoch": 0.6407834101382488, "grad_norm": 0.8158686811134676, "learning_rate": 1.6264639212403089e-06, "loss": 0.8047456741333008, "step": 2781 }, { "epoch": 0.6410138248847926, "grad_norm": 0.8454524938044947, "learning_rate": 1.6261669177772465e-06, "loss": 0.7278450727462769, "step": 2782 }, { "epoch": 0.6412442396313364, "grad_norm": 0.8520417006771478, "learning_rate": 1.6258698234268852e-06, "loss": 0.7768574357032776, "step": 2783 }, { "epoch": 0.6414746543778802, "grad_norm": 1.0890287289964238, "learning_rate": 1.6255726382323475e-06, "loss": 0.7621645331382751, "step": 2784 }, { "epoch": 0.6417050691244239, "grad_norm": 0.7437513689171984, "learning_rate": 1.6252753622367695e-06, "loss": 0.7566754221916199, "step": 2785 }, { "epoch": 0.6419354838709678, "grad_norm": 0.8832427803322862, "learning_rate": 1.6249779954833005e-06, "loss": 0.7609840631484985, "step": 2786 }, { "epoch": 0.6421658986175115, "grad_norm": 0.7482883809435998, "learning_rate": 1.6246805380151028e-06, "loss": 0.7360000610351562, "step": 2787 }, { "epoch": 0.6423963133640553, "grad_norm": 1.1130271498528226, "learning_rate": 1.624382989875352e-06, "loss": 0.7951081395149231, "step": 2788 }, { "epoch": 0.6426267281105991, "grad_norm": 0.7939855049580037, "learning_rate": 1.6240853511072367e-06, "loss": 0.7273311614990234, "step": 2789 }, { "epoch": 0.6428571428571429, "grad_norm": 1.0416971384804878, "learning_rate": 1.6237876217539588e-06, "loss": 0.9270737171173096, "step": 2790 }, { "epoch": 0.6430875576036866, "grad_norm": 0.97801359210753, "learning_rate": 1.6234898018587336e-06, "loss": 0.7624385356903076, "step": 2791 }, { "epoch": 0.6433179723502304, "grad_norm": 0.8529799225121792, "learning_rate": 1.6231918914647889e-06, "loss": 0.8266719579696655, "step": 2792 }, { "epoch": 0.6435483870967742, "grad_norm": 0.6435153338840431, "learning_rate": 1.6228938906153663e-06, "loss": 0.7606902122497559, "step": 2793 }, { "epoch": 0.6437788018433179, "grad_norm": 1.022572162531227, "learning_rate": 1.6225957993537197e-06, "loss": 0.8239191174507141, "step": 2794 }, { "epoch": 0.6440092165898618, "grad_norm": 0.8871272102711673, "learning_rate": 1.6222976177231174e-06, "loss": 0.8313608169555664, "step": 2795 }, { "epoch": 0.6442396313364055, "grad_norm": 0.7541910127898682, "learning_rate": 1.6219993457668396e-06, "loss": 0.7725037932395935, "step": 2796 }, { "epoch": 0.6444700460829493, "grad_norm": 0.8887584465014293, "learning_rate": 1.6217009835281802e-06, "loss": 0.8791182041168213, "step": 2797 }, { "epoch": 0.6447004608294931, "grad_norm": 0.9285171614449231, "learning_rate": 1.621402531050446e-06, "loss": 0.7157453298568726, "step": 2798 }, { "epoch": 0.6449308755760369, "grad_norm": 0.9675001114911925, "learning_rate": 1.621103988376957e-06, "loss": 0.8248307704925537, "step": 2799 }, { "epoch": 0.6451612903225806, "grad_norm": 0.8114025469253138, "learning_rate": 1.6208053555510467e-06, "loss": 0.7094661593437195, "step": 2800 }, { "epoch": 0.6453917050691245, "grad_norm": 0.997320269594231, "learning_rate": 1.6205066326160605e-06, "loss": 0.9130781888961792, "step": 2801 }, { "epoch": 0.6456221198156682, "grad_norm": 0.8555561883924394, "learning_rate": 1.620207819615358e-06, "loss": 0.7140541076660156, "step": 2802 }, { "epoch": 0.645852534562212, "grad_norm": 0.8223075667705522, "learning_rate": 1.6199089165923116e-06, "loss": 0.8638602495193481, "step": 2803 }, { "epoch": 0.6460829493087558, "grad_norm": 0.8487880176317714, "learning_rate": 1.6196099235903068e-06, "loss": 0.9055536389350891, "step": 2804 }, { "epoch": 0.6463133640552995, "grad_norm": 0.9356547902583738, "learning_rate": 1.6193108406527416e-06, "loss": 0.7694590091705322, "step": 2805 }, { "epoch": 0.6465437788018433, "grad_norm": 0.9047595380936525, "learning_rate": 1.619011667823028e-06, "loss": 0.7512019872665405, "step": 2806 }, { "epoch": 0.646774193548387, "grad_norm": 0.8406537006369587, "learning_rate": 1.6187124051445903e-06, "loss": 0.6362565159797668, "step": 2807 }, { "epoch": 0.6470046082949309, "grad_norm": 1.328031327807814, "learning_rate": 1.6184130526608656e-06, "loss": 0.885259747505188, "step": 2808 }, { "epoch": 0.6472350230414746, "grad_norm": 0.9445009081248091, "learning_rate": 1.6181136104153054e-06, "loss": 0.7868754863739014, "step": 2809 }, { "epoch": 0.6474654377880185, "grad_norm": 0.901923102146858, "learning_rate": 1.6178140784513729e-06, "loss": 0.889660120010376, "step": 2810 }, { "epoch": 0.6476958525345622, "grad_norm": 0.7380215273328754, "learning_rate": 1.6175144568125444e-06, "loss": 0.8460343480110168, "step": 2811 }, { "epoch": 0.647926267281106, "grad_norm": 0.9963582050847237, "learning_rate": 1.6172147455423105e-06, "loss": 0.8729731440544128, "step": 2812 }, { "epoch": 0.6481566820276498, "grad_norm": 0.9500689129739934, "learning_rate": 1.616914944684173e-06, "loss": 0.7937173843383789, "step": 2813 }, { "epoch": 0.6483870967741936, "grad_norm": 1.068299419221943, "learning_rate": 1.6166150542816483e-06, "loss": 0.8764641284942627, "step": 2814 }, { "epoch": 0.6486175115207373, "grad_norm": 0.8942547003902331, "learning_rate": 1.6163150743782645e-06, "loss": 0.8078420758247375, "step": 2815 }, { "epoch": 0.6488479262672812, "grad_norm": 0.9410598977678883, "learning_rate": 1.6160150050175636e-06, "loss": 0.9124993085861206, "step": 2816 }, { "epoch": 0.6490783410138249, "grad_norm": 0.8852573714623596, "learning_rate": 1.6157148462431003e-06, "loss": 0.9584136009216309, "step": 2817 }, { "epoch": 0.6493087557603686, "grad_norm": 1.0833527157774228, "learning_rate": 1.6154145980984422e-06, "loss": 0.8404672145843506, "step": 2818 }, { "epoch": 0.6495391705069125, "grad_norm": 0.9498348014278839, "learning_rate": 1.6151142606271695e-06, "loss": 0.7928001880645752, "step": 2819 }, { "epoch": 0.6497695852534562, "grad_norm": 0.8444903444994009, "learning_rate": 1.6148138338728766e-06, "loss": 0.7877479791641235, "step": 2820 }, { "epoch": 0.65, "grad_norm": 0.814898961059689, "learning_rate": 1.6145133178791695e-06, "loss": 0.9502429366111755, "step": 2821 }, { "epoch": 0.6502304147465438, "grad_norm": 0.791549779828082, "learning_rate": 1.6142127126896679e-06, "loss": 0.7866412401199341, "step": 2822 }, { "epoch": 0.6504608294930876, "grad_norm": 0.7841896313928699, "learning_rate": 1.613912018348004e-06, "loss": 0.8315345644950867, "step": 2823 }, { "epoch": 0.6506912442396313, "grad_norm": 0.6841019539216254, "learning_rate": 1.6136112348978236e-06, "loss": 0.9718044400215149, "step": 2824 }, { "epoch": 0.6509216589861752, "grad_norm": 0.6502753552916141, "learning_rate": 1.6133103623827843e-06, "loss": 0.5874941349029541, "step": 2825 }, { "epoch": 0.6511520737327189, "grad_norm": 0.8954999916723304, "learning_rate": 1.613009400846558e-06, "loss": 0.9498391151428223, "step": 2826 }, { "epoch": 0.6513824884792627, "grad_norm": 0.9527387242959447, "learning_rate": 1.612708350332829e-06, "loss": 0.858715295791626, "step": 2827 }, { "epoch": 0.6516129032258065, "grad_norm": 0.7771583744459308, "learning_rate": 1.6124072108852938e-06, "loss": 0.8618113994598389, "step": 2828 }, { "epoch": 0.6518433179723502, "grad_norm": 0.7504136233680345, "learning_rate": 1.6121059825476628e-06, "loss": 0.8024446964263916, "step": 2829 }, { "epoch": 0.652073732718894, "grad_norm": 0.8461077162414828, "learning_rate": 1.6118046653636586e-06, "loss": 0.8021122813224792, "step": 2830 }, { "epoch": 0.6523041474654377, "grad_norm": 0.8330044091738112, "learning_rate": 1.6115032593770176e-06, "loss": 0.8092107772827148, "step": 2831 }, { "epoch": 0.6525345622119816, "grad_norm": 0.8480183578387018, "learning_rate": 1.6112017646314872e-06, "loss": 0.9842641353607178, "step": 2832 }, { "epoch": 0.6527649769585253, "grad_norm": 0.8051494817524167, "learning_rate": 1.6109001811708305e-06, "loss": 0.744353175163269, "step": 2833 }, { "epoch": 0.6529953917050692, "grad_norm": 1.0610555371871784, "learning_rate": 1.6105985090388209e-06, "loss": 0.7089616060256958, "step": 2834 }, { "epoch": 0.6532258064516129, "grad_norm": 0.9119028582239228, "learning_rate": 1.610296748279246e-06, "loss": 0.9043736457824707, "step": 2835 }, { "epoch": 0.6534562211981567, "grad_norm": 1.0078987757698072, "learning_rate": 1.6099948989359061e-06, "loss": 0.9170948266983032, "step": 2836 }, { "epoch": 0.6536866359447004, "grad_norm": 0.9289963097672949, "learning_rate": 1.6096929610526145e-06, "loss": 0.8275802135467529, "step": 2837 }, { "epoch": 0.6539170506912443, "grad_norm": 0.9146670757237039, "learning_rate": 1.6093909346731965e-06, "loss": 0.9180251955986023, "step": 2838 }, { "epoch": 0.654147465437788, "grad_norm": 0.708269208459363, "learning_rate": 1.6090888198414908e-06, "loss": 0.8041235208511353, "step": 2839 }, { "epoch": 0.6543778801843319, "grad_norm": 0.9431191202102605, "learning_rate": 1.6087866166013492e-06, "loss": 0.7833176851272583, "step": 2840 }, { "epoch": 0.6546082949308756, "grad_norm": 0.8680924352570318, "learning_rate": 1.6084843249966364e-06, "loss": 0.838886022567749, "step": 2841 }, { "epoch": 0.6548387096774193, "grad_norm": 0.8317233103954151, "learning_rate": 1.6081819450712293e-06, "loss": 0.837687611579895, "step": 2842 }, { "epoch": 0.6550691244239631, "grad_norm": 0.8737630969117387, "learning_rate": 1.607879476869018e-06, "loss": 0.6572843790054321, "step": 2843 }, { "epoch": 0.6552995391705069, "grad_norm": 0.8513917948170456, "learning_rate": 1.6075769204339053e-06, "loss": 0.7698653936386108, "step": 2844 }, { "epoch": 0.6555299539170507, "grad_norm": 0.9469558820500475, "learning_rate": 1.607274275809807e-06, "loss": 0.8639169335365295, "step": 2845 }, { "epoch": 0.6557603686635944, "grad_norm": 0.8250799867539951, "learning_rate": 1.6069715430406517e-06, "loss": 0.837492823600769, "step": 2846 }, { "epoch": 0.6559907834101383, "grad_norm": 0.9277000604833184, "learning_rate": 1.6066687221703803e-06, "loss": 0.8824087381362915, "step": 2847 }, { "epoch": 0.656221198156682, "grad_norm": 0.9304701724719217, "learning_rate": 1.6063658132429468e-06, "loss": 0.8161731958389282, "step": 2848 }, { "epoch": 0.6564516129032258, "grad_norm": 0.7988044282931124, "learning_rate": 1.6060628163023183e-06, "loss": 0.8365877270698547, "step": 2849 }, { "epoch": 0.6566820276497696, "grad_norm": 0.8477393490951164, "learning_rate": 1.6057597313924745e-06, "loss": 0.877829909324646, "step": 2850 }, { "epoch": 0.6569124423963134, "grad_norm": 0.857078285622655, "learning_rate": 1.6054565585574075e-06, "loss": 0.756903886795044, "step": 2851 }, { "epoch": 0.6571428571428571, "grad_norm": 1.0124401818225557, "learning_rate": 1.6051532978411223e-06, "loss": 0.7777276039123535, "step": 2852 }, { "epoch": 0.6573732718894009, "grad_norm": 0.9464152715401636, "learning_rate": 1.6048499492876375e-06, "loss": 0.9191532135009766, "step": 2853 }, { "epoch": 0.6576036866359447, "grad_norm": 0.7885787618366824, "learning_rate": 1.6045465129409829e-06, "loss": 0.7693309783935547, "step": 2854 }, { "epoch": 0.6578341013824884, "grad_norm": 0.8787314035574895, "learning_rate": 1.6042429888452024e-06, "loss": 0.7865023612976074, "step": 2855 }, { "epoch": 0.6580645161290323, "grad_norm": 0.8588996745183644, "learning_rate": 1.6039393770443521e-06, "loss": 0.844336748123169, "step": 2856 }, { "epoch": 0.658294930875576, "grad_norm": 0.9455502994869639, "learning_rate": 1.6036356775825009e-06, "loss": 0.9590705633163452, "step": 2857 }, { "epoch": 0.6585253456221198, "grad_norm": 0.904582718768817, "learning_rate": 1.6033318905037297e-06, "loss": 0.8687748312950134, "step": 2858 }, { "epoch": 0.6587557603686636, "grad_norm": 0.8848681311153475, "learning_rate": 1.6030280158521336e-06, "loss": 0.8669745922088623, "step": 2859 }, { "epoch": 0.6589861751152074, "grad_norm": 0.8829211466390271, "learning_rate": 1.6027240536718191e-06, "loss": 0.6929436922073364, "step": 2860 }, { "epoch": 0.6592165898617511, "grad_norm": 0.9047325967091919, "learning_rate": 1.6024200040069065e-06, "loss": 0.6965433359146118, "step": 2861 }, { "epoch": 0.659447004608295, "grad_norm": 0.9743729570848424, "learning_rate": 1.6021158669015273e-06, "loss": 0.780353307723999, "step": 2862 }, { "epoch": 0.6596774193548387, "grad_norm": 0.7726382879850381, "learning_rate": 1.6018116423998277e-06, "loss": 0.685762882232666, "step": 2863 }, { "epoch": 0.6599078341013825, "grad_norm": 0.8607619933867399, "learning_rate": 1.6015073305459646e-06, "loss": 0.8249918222427368, "step": 2864 }, { "epoch": 0.6601382488479263, "grad_norm": 0.7388237148259402, "learning_rate": 1.6012029313841086e-06, "loss": 0.7327184677124023, "step": 2865 }, { "epoch": 0.66036866359447, "grad_norm": 0.9554378042614118, "learning_rate": 1.6008984449584433e-06, "loss": 0.7785891890525818, "step": 2866 }, { "epoch": 0.6605990783410138, "grad_norm": 0.7196967379779726, "learning_rate": 1.600593871313164e-06, "loss": 0.7307751178741455, "step": 2867 }, { "epoch": 0.6608294930875576, "grad_norm": 1.2601680054093507, "learning_rate": 1.6002892104924796e-06, "loss": 0.8802257180213928, "step": 2868 }, { "epoch": 0.6610599078341014, "grad_norm": 1.0302753711943056, "learning_rate": 1.5999844625406106e-06, "loss": 0.8699140548706055, "step": 2869 }, { "epoch": 0.6612903225806451, "grad_norm": 0.8146336951608913, "learning_rate": 1.5996796275017914e-06, "loss": 0.6453604102134705, "step": 2870 }, { "epoch": 0.661520737327189, "grad_norm": 0.807532897551279, "learning_rate": 1.5993747054202682e-06, "loss": 0.7319324016571045, "step": 2871 }, { "epoch": 0.6617511520737327, "grad_norm": 0.9337023535064233, "learning_rate": 1.5990696963402998e-06, "loss": 0.8357574343681335, "step": 2872 }, { "epoch": 0.6619815668202765, "grad_norm": 0.854915024221744, "learning_rate": 1.5987646003061581e-06, "loss": 0.7647984027862549, "step": 2873 }, { "epoch": 0.6622119815668203, "grad_norm": 1.0099884737934117, "learning_rate": 1.5984594173621274e-06, "loss": 0.8542075753211975, "step": 2874 }, { "epoch": 0.6624423963133641, "grad_norm": 0.9685596460194386, "learning_rate": 1.5981541475525044e-06, "loss": 0.7689328193664551, "step": 2875 }, { "epoch": 0.6626728110599078, "grad_norm": 0.8183777315007433, "learning_rate": 1.5978487909215987e-06, "loss": 0.7459174990653992, "step": 2876 }, { "epoch": 0.6629032258064517, "grad_norm": 0.8697380019030229, "learning_rate": 1.5975433475137329e-06, "loss": 0.8268495202064514, "step": 2877 }, { "epoch": 0.6631336405529954, "grad_norm": 0.9013422410425754, "learning_rate": 1.5972378173732406e-06, "loss": 0.8254266977310181, "step": 2878 }, { "epoch": 0.6633640552995391, "grad_norm": 1.0427681980244552, "learning_rate": 1.59693220054447e-06, "loss": 0.8552727103233337, "step": 2879 }, { "epoch": 0.663594470046083, "grad_norm": 0.7469699255899254, "learning_rate": 1.596626497071781e-06, "loss": 0.7196269035339355, "step": 2880 }, { "epoch": 0.6638248847926267, "grad_norm": 0.9146202447996906, "learning_rate": 1.5963207069995455e-06, "loss": 0.815540075302124, "step": 2881 }, { "epoch": 0.6640552995391705, "grad_norm": 0.8585411055523222, "learning_rate": 1.596014830372149e-06, "loss": 0.8040128350257874, "step": 2882 }, { "epoch": 0.6642857142857143, "grad_norm": 0.8592608746136836, "learning_rate": 1.5957088672339887e-06, "loss": 0.7990812659263611, "step": 2883 }, { "epoch": 0.6645161290322581, "grad_norm": 0.9139395957334936, "learning_rate": 1.5954028176294746e-06, "loss": 0.956179141998291, "step": 2884 }, { "epoch": 0.6647465437788018, "grad_norm": 0.9544806325504157, "learning_rate": 1.5950966816030304e-06, "loss": 0.7730144262313843, "step": 2885 }, { "epoch": 0.6649769585253457, "grad_norm": 1.0230957824823068, "learning_rate": 1.5947904591990904e-06, "loss": 0.902834415435791, "step": 2886 }, { "epoch": 0.6652073732718894, "grad_norm": 0.8987169052425068, "learning_rate": 1.5944841504621027e-06, "loss": 0.7234599590301514, "step": 2887 }, { "epoch": 0.6654377880184332, "grad_norm": 0.9849005395145788, "learning_rate": 1.5941777554365271e-06, "loss": 1.0267843008041382, "step": 2888 }, { "epoch": 0.665668202764977, "grad_norm": 1.1615941669691254, "learning_rate": 1.5938712741668376e-06, "loss": 0.7431002855300903, "step": 2889 }, { "epoch": 0.6658986175115207, "grad_norm": 0.8013605201375282, "learning_rate": 1.5935647066975185e-06, "loss": 0.7843111753463745, "step": 2890 }, { "epoch": 0.6661290322580645, "grad_norm": 0.9498522711625995, "learning_rate": 1.593258053073068e-06, "loss": 0.8775256872177124, "step": 2891 }, { "epoch": 0.6663594470046083, "grad_norm": 0.8363878343517416, "learning_rate": 1.5929513133379966e-06, "loss": 0.7861695289611816, "step": 2892 }, { "epoch": 0.6665898617511521, "grad_norm": 1.1446598361432248, "learning_rate": 1.5926444875368267e-06, "loss": 0.8721977472305298, "step": 2893 }, { "epoch": 0.6668202764976958, "grad_norm": 0.7591669830135314, "learning_rate": 1.5923375757140941e-06, "loss": 0.648263692855835, "step": 2894 }, { "epoch": 0.6670506912442397, "grad_norm": 0.8984763952333247, "learning_rate": 1.592030577914347e-06, "loss": 0.8334729075431824, "step": 2895 }, { "epoch": 0.6672811059907834, "grad_norm": 0.7757586607492352, "learning_rate": 1.591723494182145e-06, "loss": 0.6105949878692627, "step": 2896 }, { "epoch": 0.6675115207373272, "grad_norm": 0.8562379620561761, "learning_rate": 1.5914163245620608e-06, "loss": 0.7895448207855225, "step": 2897 }, { "epoch": 0.667741935483871, "grad_norm": 0.9487051467126763, "learning_rate": 1.5911090690986805e-06, "loss": 0.8728576302528381, "step": 2898 }, { "epoch": 0.6679723502304148, "grad_norm": 0.7480056751597441, "learning_rate": 1.590801727836601e-06, "loss": 0.7637856006622314, "step": 2899 }, { "epoch": 0.6682027649769585, "grad_norm": 1.0125939986027075, "learning_rate": 1.590494300820433e-06, "loss": 0.8988397717475891, "step": 2900 }, { "epoch": 0.6684331797235024, "grad_norm": 0.9324485554010499, "learning_rate": 1.590186788094799e-06, "loss": 0.7486827373504639, "step": 2901 }, { "epoch": 0.6686635944700461, "grad_norm": 0.7629631437151, "learning_rate": 1.589879189704334e-06, "loss": 0.8212865591049194, "step": 2902 }, { "epoch": 0.6688940092165898, "grad_norm": 0.7640149838894683, "learning_rate": 1.5895715056936853e-06, "loss": 0.7421284914016724, "step": 2903 }, { "epoch": 0.6691244239631337, "grad_norm": 0.8407199034997399, "learning_rate": 1.5892637361075132e-06, "loss": 0.8721676468849182, "step": 2904 }, { "epoch": 0.6693548387096774, "grad_norm": 0.9214400782360851, "learning_rate": 1.58895588099049e-06, "loss": 0.7265836000442505, "step": 2905 }, { "epoch": 0.6695852534562212, "grad_norm": 0.959235173078028, "learning_rate": 1.5886479403873e-06, "loss": 0.863615870475769, "step": 2906 }, { "epoch": 0.669815668202765, "grad_norm": 0.788219849900096, "learning_rate": 1.588339914342641e-06, "loss": 0.8362177610397339, "step": 2907 }, { "epoch": 0.6700460829493088, "grad_norm": 1.0142262876785297, "learning_rate": 1.5880318029012223e-06, "loss": 0.9076892137527466, "step": 2908 }, { "epoch": 0.6702764976958525, "grad_norm": 0.957653217332238, "learning_rate": 1.5877236061077658e-06, "loss": 0.9149065017700195, "step": 2909 }, { "epoch": 0.6705069124423964, "grad_norm": 0.8820705070600866, "learning_rate": 1.5874153240070062e-06, "loss": 0.7761013507843018, "step": 2910 }, { "epoch": 0.6707373271889401, "grad_norm": 1.049261864076062, "learning_rate": 1.5871069566436894e-06, "loss": 0.8671830892562866, "step": 2911 }, { "epoch": 0.6709677419354839, "grad_norm": 0.9461120142941367, "learning_rate": 1.5867985040625755e-06, "loss": 0.9433870315551758, "step": 2912 }, { "epoch": 0.6711981566820276, "grad_norm": 0.934114103387592, "learning_rate": 1.5864899663084352e-06, "loss": 0.8009352684020996, "step": 2913 }, { "epoch": 0.6714285714285714, "grad_norm": 0.9285902098427739, "learning_rate": 1.5861813434260528e-06, "loss": 0.6813808083534241, "step": 2914 }, { "epoch": 0.6716589861751152, "grad_norm": 0.7891360814530397, "learning_rate": 1.5858726354602248e-06, "loss": 0.712783932685852, "step": 2915 }, { "epoch": 0.6718894009216589, "grad_norm": 0.9971879600214522, "learning_rate": 1.5855638424557588e-06, "loss": 0.7871056795120239, "step": 2916 }, { "epoch": 0.6721198156682028, "grad_norm": 0.9551471269364743, "learning_rate": 1.5852549644574766e-06, "loss": 0.8590981960296631, "step": 2917 }, { "epoch": 0.6723502304147465, "grad_norm": 0.9338373296128487, "learning_rate": 1.584946001510211e-06, "loss": 0.7952913641929626, "step": 2918 }, { "epoch": 0.6725806451612903, "grad_norm": 1.0716689971646949, "learning_rate": 1.5846369536588078e-06, "loss": 0.8567384481430054, "step": 2919 }, { "epoch": 0.6728110599078341, "grad_norm": 1.0797852963412387, "learning_rate": 1.5843278209481246e-06, "loss": 0.859541654586792, "step": 2920 }, { "epoch": 0.6730414746543779, "grad_norm": 1.1734504357127358, "learning_rate": 1.5840186034230318e-06, "loss": 0.7843801975250244, "step": 2921 }, { "epoch": 0.6732718894009216, "grad_norm": 0.7736885985619673, "learning_rate": 1.5837093011284118e-06, "loss": 0.7448940277099609, "step": 2922 }, { "epoch": 0.6735023041474655, "grad_norm": 1.0803788544256392, "learning_rate": 1.5833999141091593e-06, "loss": 0.9325242042541504, "step": 2923 }, { "epoch": 0.6737327188940092, "grad_norm": 1.2302390941080075, "learning_rate": 1.5830904424101816e-06, "loss": 0.8005647659301758, "step": 2924 }, { "epoch": 0.673963133640553, "grad_norm": 0.9271295903754758, "learning_rate": 1.5827808860763984e-06, "loss": 0.8897464275360107, "step": 2925 }, { "epoch": 0.6741935483870968, "grad_norm": 1.0218758099034497, "learning_rate": 1.5824712451527409e-06, "loss": 0.8319039344787598, "step": 2926 }, { "epoch": 0.6744239631336405, "grad_norm": 1.0734614103347653, "learning_rate": 1.5821615196841533e-06, "loss": 0.7638111114501953, "step": 2927 }, { "epoch": 0.6746543778801843, "grad_norm": 0.8552316991076688, "learning_rate": 1.581851709715592e-06, "loss": 0.7617092132568359, "step": 2928 }, { "epoch": 0.6748847926267281, "grad_norm": 1.0119419737078916, "learning_rate": 1.581541815292025e-06, "loss": 0.813319742679596, "step": 2929 }, { "epoch": 0.6751152073732719, "grad_norm": 0.8324815306646182, "learning_rate": 1.5812318364584334e-06, "loss": 0.7495343089103699, "step": 2930 }, { "epoch": 0.6753456221198156, "grad_norm": 1.0070331562925772, "learning_rate": 1.5809217732598103e-06, "loss": 0.9064745306968689, "step": 2931 }, { "epoch": 0.6755760368663595, "grad_norm": 0.77529378116571, "learning_rate": 1.580611625741161e-06, "loss": 0.699098527431488, "step": 2932 }, { "epoch": 0.6758064516129032, "grad_norm": 0.9525126023464006, "learning_rate": 1.5803013939475025e-06, "loss": 0.9168096780776978, "step": 2933 }, { "epoch": 0.676036866359447, "grad_norm": 0.8145178437764095, "learning_rate": 1.5799910779238652e-06, "loss": 0.8848644495010376, "step": 2934 }, { "epoch": 0.6762672811059908, "grad_norm": 0.8852934324704809, "learning_rate": 1.5796806777152903e-06, "loss": 0.7795228958129883, "step": 2935 }, { "epoch": 0.6764976958525346, "grad_norm": 0.9901973226971541, "learning_rate": 1.5793701933668327e-06, "loss": 0.9287698268890381, "step": 2936 }, { "epoch": 0.6767281105990783, "grad_norm": 0.9605403793187631, "learning_rate": 1.5790596249235587e-06, "loss": 0.8661396503448486, "step": 2937 }, { "epoch": 0.6769585253456222, "grad_norm": 1.0073544692346657, "learning_rate": 1.5787489724305464e-06, "loss": 0.7544706463813782, "step": 2938 }, { "epoch": 0.6771889400921659, "grad_norm": 1.350397583464208, "learning_rate": 1.5784382359328872e-06, "loss": 0.8613651990890503, "step": 2939 }, { "epoch": 0.6774193548387096, "grad_norm": 1.0225856960398716, "learning_rate": 1.5781274154756833e-06, "loss": 0.8695065975189209, "step": 2940 }, { "epoch": 0.6776497695852535, "grad_norm": 1.1450515007973723, "learning_rate": 1.577816511104051e-06, "loss": 0.9453287720680237, "step": 2941 }, { "epoch": 0.6778801843317972, "grad_norm": 0.7720442193305806, "learning_rate": 1.577505522863117e-06, "loss": 0.8599261045455933, "step": 2942 }, { "epoch": 0.678110599078341, "grad_norm": 0.8831442525084486, "learning_rate": 1.5771944507980205e-06, "loss": 0.8143391609191895, "step": 2943 }, { "epoch": 0.6783410138248848, "grad_norm": 0.9328639928073722, "learning_rate": 1.576883294953914e-06, "loss": 0.9558438062667847, "step": 2944 }, { "epoch": 0.6785714285714286, "grad_norm": 0.6484366074680237, "learning_rate": 1.5765720553759605e-06, "loss": 0.7348268628120422, "step": 2945 }, { "epoch": 0.6788018433179723, "grad_norm": 1.0387482604326927, "learning_rate": 1.5762607321093366e-06, "loss": 0.9361155033111572, "step": 2946 }, { "epoch": 0.6790322580645162, "grad_norm": 0.9855095789147831, "learning_rate": 1.5759493251992303e-06, "loss": 0.8094985485076904, "step": 2947 }, { "epoch": 0.6792626728110599, "grad_norm": 1.631714554631539, "learning_rate": 1.575637834690842e-06, "loss": 0.8746658563613892, "step": 2948 }, { "epoch": 0.6794930875576037, "grad_norm": 0.9249217331606766, "learning_rate": 1.575326260629384e-06, "loss": 0.7433050870895386, "step": 2949 }, { "epoch": 0.6797235023041475, "grad_norm": 0.9856239464338491, "learning_rate": 1.5750146030600808e-06, "loss": 0.8621053695678711, "step": 2950 }, { "epoch": 0.6799539170506912, "grad_norm": 0.9119478915395727, "learning_rate": 1.5747028620281695e-06, "loss": 0.7541971206665039, "step": 2951 }, { "epoch": 0.680184331797235, "grad_norm": 1.0099311239329205, "learning_rate": 1.5743910375788982e-06, "loss": 0.9817987680435181, "step": 2952 }, { "epoch": 0.6804147465437788, "grad_norm": 1.046074262522893, "learning_rate": 1.5740791297575283e-06, "loss": 0.7763534188270569, "step": 2953 }, { "epoch": 0.6806451612903226, "grad_norm": 1.0303747349913415, "learning_rate": 1.573767138609333e-06, "loss": 0.7482337355613708, "step": 2954 }, { "epoch": 0.6808755760368663, "grad_norm": 1.0308347032013807, "learning_rate": 1.5734550641795967e-06, "loss": 0.7352473735809326, "step": 2955 }, { "epoch": 0.6811059907834102, "grad_norm": 0.9086715245515472, "learning_rate": 1.573142906513617e-06, "loss": 0.8657293319702148, "step": 2956 }, { "epoch": 0.6813364055299539, "grad_norm": 0.9597438975913184, "learning_rate": 1.5728306656567033e-06, "loss": 0.8035376667976379, "step": 2957 }, { "epoch": 0.6815668202764977, "grad_norm": 0.9481340627224691, "learning_rate": 1.572518341654177e-06, "loss": 0.8030140399932861, "step": 2958 }, { "epoch": 0.6817972350230415, "grad_norm": 0.956950799259568, "learning_rate": 1.5722059345513711e-06, "loss": 0.797377347946167, "step": 2959 }, { "epoch": 0.6820276497695853, "grad_norm": 0.7086079395333297, "learning_rate": 1.5718934443936311e-06, "loss": 0.7041053175926208, "step": 2960 }, { "epoch": 0.682258064516129, "grad_norm": 1.0251660128790803, "learning_rate": 1.571580871226315e-06, "loss": 0.7911885976791382, "step": 2961 }, { "epoch": 0.6824884792626729, "grad_norm": 0.8834527581303466, "learning_rate": 1.5712682150947922e-06, "loss": 0.7908599376678467, "step": 2962 }, { "epoch": 0.6827188940092166, "grad_norm": 0.8159267525070817, "learning_rate": 1.5709554760444442e-06, "loss": 0.860281229019165, "step": 2963 }, { "epoch": 0.6829493087557603, "grad_norm": 0.8226887233242035, "learning_rate": 1.5706426541206645e-06, "loss": 0.6987707018852234, "step": 2964 }, { "epoch": 0.6831797235023042, "grad_norm": 0.8719992040747229, "learning_rate": 1.5703297493688592e-06, "loss": 0.7198495864868164, "step": 2965 }, { "epoch": 0.6834101382488479, "grad_norm": 1.1775957395401402, "learning_rate": 1.5700167618344455e-06, "loss": 0.8232598304748535, "step": 2966 }, { "epoch": 0.6836405529953917, "grad_norm": 0.8962037845514019, "learning_rate": 1.569703691562854e-06, "loss": 0.8425456285476685, "step": 2967 }, { "epoch": 0.6838709677419355, "grad_norm": 0.8746880672166448, "learning_rate": 1.5693905385995252e-06, "loss": 0.7758797407150269, "step": 2968 }, { "epoch": 0.6841013824884793, "grad_norm": 0.9739325658587258, "learning_rate": 1.569077302989914e-06, "loss": 0.7478910684585571, "step": 2969 }, { "epoch": 0.684331797235023, "grad_norm": 0.88099670074057, "learning_rate": 1.5687639847794854e-06, "loss": 0.8274309635162354, "step": 2970 }, { "epoch": 0.6845622119815669, "grad_norm": 0.9125307567181903, "learning_rate": 1.5684505840137173e-06, "loss": 0.6800183653831482, "step": 2971 }, { "epoch": 0.6847926267281106, "grad_norm": 1.1416810893109246, "learning_rate": 1.5681371007380996e-06, "loss": 0.7768006324768066, "step": 2972 }, { "epoch": 0.6850230414746544, "grad_norm": 0.8308804334079786, "learning_rate": 1.5678235349981338e-06, "loss": 0.7462732195854187, "step": 2973 }, { "epoch": 0.6852534562211982, "grad_norm": 0.935725297382271, "learning_rate": 1.5675098868393335e-06, "loss": 0.8461781144142151, "step": 2974 }, { "epoch": 0.6854838709677419, "grad_norm": 0.9717984846524689, "learning_rate": 1.5671961563072244e-06, "loss": 0.7968491911888123, "step": 2975 }, { "epoch": 0.6857142857142857, "grad_norm": 0.9710985084042064, "learning_rate": 1.5668823434473443e-06, "loss": 0.805394172668457, "step": 2976 }, { "epoch": 0.6859447004608294, "grad_norm": 0.9297793560483373, "learning_rate": 1.5665684483052424e-06, "loss": 0.7241736650466919, "step": 2977 }, { "epoch": 0.6861751152073733, "grad_norm": 0.9673260038513803, "learning_rate": 1.5662544709264801e-06, "loss": 0.7345866560935974, "step": 2978 }, { "epoch": 0.686405529953917, "grad_norm": 0.8604134561659843, "learning_rate": 1.5659404113566312e-06, "loss": 0.7605085372924805, "step": 2979 }, { "epoch": 0.6866359447004609, "grad_norm": 0.9618303204830516, "learning_rate": 1.5656262696412808e-06, "loss": 0.8555188179016113, "step": 2980 }, { "epoch": 0.6868663594470046, "grad_norm": 0.8604009092225049, "learning_rate": 1.5653120458260261e-06, "loss": 0.7139542698860168, "step": 2981 }, { "epoch": 0.6870967741935484, "grad_norm": 0.9290410772154322, "learning_rate": 1.564997739956476e-06, "loss": 0.8676587343215942, "step": 2982 }, { "epoch": 0.6873271889400921, "grad_norm": 0.9524807718966832, "learning_rate": 1.5646833520782523e-06, "loss": 0.8121025562286377, "step": 2983 }, { "epoch": 0.687557603686636, "grad_norm": 0.7889521702672326, "learning_rate": 1.5643688822369873e-06, "loss": 0.7757136821746826, "step": 2984 }, { "epoch": 0.6877880184331797, "grad_norm": 0.8884194014759353, "learning_rate": 1.5640543304783264e-06, "loss": 0.8357381820678711, "step": 2985 }, { "epoch": 0.6880184331797236, "grad_norm": 0.9725078170053829, "learning_rate": 1.563739696847926e-06, "loss": 0.8635811805725098, "step": 2986 }, { "epoch": 0.6882488479262673, "grad_norm": 0.9539959391598165, "learning_rate": 1.563424981391455e-06, "loss": 0.90900057554245, "step": 2987 }, { "epoch": 0.688479262672811, "grad_norm": 1.056070683011334, "learning_rate": 1.563110184154594e-06, "loss": 0.9001314043998718, "step": 2988 }, { "epoch": 0.6887096774193548, "grad_norm": 0.7893194308475292, "learning_rate": 1.5627953051830353e-06, "loss": 0.7482000589370728, "step": 2989 }, { "epoch": 0.6889400921658986, "grad_norm": 1.0183435769639337, "learning_rate": 1.5624803445224829e-06, "loss": 0.8504235744476318, "step": 2990 }, { "epoch": 0.6891705069124424, "grad_norm": 0.9687684393899343, "learning_rate": 1.5621653022186526e-06, "loss": 0.7887089252471924, "step": 2991 }, { "epoch": 0.6894009216589861, "grad_norm": 0.9412995775666883, "learning_rate": 1.5618501783172735e-06, "loss": 0.8745719790458679, "step": 2992 }, { "epoch": 0.68963133640553, "grad_norm": 0.8960957701589951, "learning_rate": 1.5615349728640848e-06, "loss": 0.8269633054733276, "step": 2993 }, { "epoch": 0.6898617511520737, "grad_norm": 0.802430248071724, "learning_rate": 1.5612196859048382e-06, "loss": 0.7355072498321533, "step": 2994 }, { "epoch": 0.6900921658986175, "grad_norm": 0.9768940563158048, "learning_rate": 1.5609043174852966e-06, "loss": 0.857653021812439, "step": 2995 }, { "epoch": 0.6903225806451613, "grad_norm": 1.0766498115550724, "learning_rate": 1.5605888676512365e-06, "loss": 0.8575785160064697, "step": 2996 }, { "epoch": 0.6905529953917051, "grad_norm": 0.8803208034747956, "learning_rate": 1.560273336448444e-06, "loss": 0.8631561994552612, "step": 2997 }, { "epoch": 0.6907834101382488, "grad_norm": 1.0014936433552548, "learning_rate": 1.5599577239227185e-06, "loss": 0.7993800044059753, "step": 2998 }, { "epoch": 0.6910138248847926, "grad_norm": 0.8990076202156756, "learning_rate": 1.5596420301198707e-06, "loss": 0.7961007356643677, "step": 2999 }, { "epoch": 0.6912442396313364, "grad_norm": 1.0216355950582598, "learning_rate": 1.5593262550857232e-06, "loss": 0.7536421418190002, "step": 3000 }, { "epoch": 0.6914746543778801, "grad_norm": 0.8348839196110558, "learning_rate": 1.55901039886611e-06, "loss": 0.70341956615448, "step": 3001 }, { "epoch": 0.691705069124424, "grad_norm": 1.0093771985733984, "learning_rate": 1.5586944615068776e-06, "loss": 0.8152127265930176, "step": 3002 }, { "epoch": 0.6919354838709677, "grad_norm": 0.9332692294841357, "learning_rate": 1.5583784430538838e-06, "loss": 0.6728770732879639, "step": 3003 }, { "epoch": 0.6921658986175115, "grad_norm": 1.0871891474224546, "learning_rate": 1.558062343552998e-06, "loss": 0.8406884670257568, "step": 3004 }, { "epoch": 0.6923963133640553, "grad_norm": 0.8920706269230131, "learning_rate": 1.5577461630501018e-06, "loss": 0.766754686832428, "step": 3005 }, { "epoch": 0.6926267281105991, "grad_norm": 0.714004026253109, "learning_rate": 1.5574299015910889e-06, "loss": 0.7456642389297485, "step": 3006 }, { "epoch": 0.6928571428571428, "grad_norm": 0.8290815943958627, "learning_rate": 1.557113559221863e-06, "loss": 0.7834097743034363, "step": 3007 }, { "epoch": 0.6930875576036867, "grad_norm": 0.91346801287595, "learning_rate": 1.556797135988342e-06, "loss": 0.7425946593284607, "step": 3008 }, { "epoch": 0.6933179723502304, "grad_norm": 1.0483330104966306, "learning_rate": 1.5564806319364534e-06, "loss": 0.7914093732833862, "step": 3009 }, { "epoch": 0.6935483870967742, "grad_norm": 0.9665010461345012, "learning_rate": 1.556164047112138e-06, "loss": 0.819783091545105, "step": 3010 }, { "epoch": 0.693778801843318, "grad_norm": 0.985903986481312, "learning_rate": 1.5558473815613474e-06, "loss": 0.7147302627563477, "step": 3011 }, { "epoch": 0.6940092165898617, "grad_norm": 1.1240220664371217, "learning_rate": 1.5555306353300452e-06, "loss": 0.7247470617294312, "step": 3012 }, { "epoch": 0.6942396313364055, "grad_norm": 1.2403633886338306, "learning_rate": 1.5552138084642067e-06, "loss": 0.8277294635772705, "step": 3013 }, { "epoch": 0.6944700460829493, "grad_norm": 0.9054626931882043, "learning_rate": 1.554896901009819e-06, "loss": 0.8014394640922546, "step": 3014 }, { "epoch": 0.6947004608294931, "grad_norm": 0.9274937399954835, "learning_rate": 1.5545799130128808e-06, "loss": 0.7468869686126709, "step": 3015 }, { "epoch": 0.6949308755760368, "grad_norm": 0.8904964499744723, "learning_rate": 1.554262844519402e-06, "loss": 0.7854933142662048, "step": 3016 }, { "epoch": 0.6951612903225807, "grad_norm": 0.9536718451900233, "learning_rate": 1.5539456955754053e-06, "loss": 0.8359543681144714, "step": 3017 }, { "epoch": 0.6953917050691244, "grad_norm": 0.8313774511874621, "learning_rate": 1.5536284662269243e-06, "loss": 0.7767773866653442, "step": 3018 }, { "epoch": 0.6956221198156682, "grad_norm": 0.7370790678700915, "learning_rate": 1.5533111565200044e-06, "loss": 0.8388162851333618, "step": 3019 }, { "epoch": 0.695852534562212, "grad_norm": 0.9159856551917743, "learning_rate": 1.5529937665007024e-06, "loss": 0.7791208028793335, "step": 3020 }, { "epoch": 0.6960829493087558, "grad_norm": 0.9740300384215894, "learning_rate": 1.5526762962150875e-06, "loss": 0.8662698864936829, "step": 3021 }, { "epoch": 0.6963133640552995, "grad_norm": 0.7004253764922403, "learning_rate": 1.5523587457092394e-06, "loss": 0.737492024898529, "step": 3022 }, { "epoch": 0.6965437788018434, "grad_norm": 1.0408775765092733, "learning_rate": 1.552041115029251e-06, "loss": 0.83610999584198, "step": 3023 }, { "epoch": 0.6967741935483871, "grad_norm": 1.1134023704947162, "learning_rate": 1.5517234042212254e-06, "loss": 0.930977463722229, "step": 3024 }, { "epoch": 0.6970046082949308, "grad_norm": 0.8756044667716456, "learning_rate": 1.551405613331278e-06, "loss": 0.7587058544158936, "step": 3025 }, { "epoch": 0.6972350230414747, "grad_norm": 0.7720525053545241, "learning_rate": 1.551087742405536e-06, "loss": 0.7549247741699219, "step": 3026 }, { "epoch": 0.6974654377880184, "grad_norm": 0.8108175030001162, "learning_rate": 1.5507697914901376e-06, "loss": 0.6906812787055969, "step": 3027 }, { "epoch": 0.6976958525345622, "grad_norm": 0.7358502568670926, "learning_rate": 1.5504517606312332e-06, "loss": 0.7806124687194824, "step": 3028 }, { "epoch": 0.697926267281106, "grad_norm": 0.8191496367359047, "learning_rate": 1.5501336498749846e-06, "loss": 0.8091036081314087, "step": 3029 }, { "epoch": 0.6981566820276498, "grad_norm": 0.923718506351422, "learning_rate": 1.5498154592675646e-06, "loss": 0.721937894821167, "step": 3030 }, { "epoch": 0.6983870967741935, "grad_norm": 0.729194360630959, "learning_rate": 1.5494971888551587e-06, "loss": 0.712378740310669, "step": 3031 }, { "epoch": 0.6986175115207374, "grad_norm": 0.9809936276606201, "learning_rate": 1.5491788386839635e-06, "loss": 0.8106495141983032, "step": 3032 }, { "epoch": 0.6988479262672811, "grad_norm": 1.0550994014291641, "learning_rate": 1.5488604088001866e-06, "loss": 0.7886521816253662, "step": 3033 }, { "epoch": 0.6990783410138249, "grad_norm": 0.9413909460240358, "learning_rate": 1.5485418992500479e-06, "loss": 0.7483402490615845, "step": 3034 }, { "epoch": 0.6993087557603687, "grad_norm": 0.9735513924670123, "learning_rate": 1.5482233100797788e-06, "loss": 0.6236725449562073, "step": 3035 }, { "epoch": 0.6995391705069124, "grad_norm": 1.023064942988146, "learning_rate": 1.5479046413356222e-06, "loss": 0.9477910995483398, "step": 3036 }, { "epoch": 0.6997695852534562, "grad_norm": 1.0993186685690193, "learning_rate": 1.5475858930638322e-06, "loss": 0.8921213746070862, "step": 3037 }, { "epoch": 0.7, "grad_norm": 0.7179145673247356, "learning_rate": 1.5472670653106744e-06, "loss": 0.7460963726043701, "step": 3038 }, { "epoch": 0.7002304147465438, "grad_norm": 0.8319225077693166, "learning_rate": 1.5469481581224271e-06, "loss": 0.6135849356651306, "step": 3039 }, { "epoch": 0.7004608294930875, "grad_norm": 0.8739744675210649, "learning_rate": 1.546629171545378e-06, "loss": 0.8039313554763794, "step": 3040 }, { "epoch": 0.7006912442396314, "grad_norm": 1.2210857419731846, "learning_rate": 1.5463101056258289e-06, "loss": 0.8751651048660278, "step": 3041 }, { "epoch": 0.7009216589861751, "grad_norm": 0.9070575590392688, "learning_rate": 1.545990960410091e-06, "loss": 0.7600879669189453, "step": 3042 }, { "epoch": 0.7011520737327189, "grad_norm": 0.9983949583794295, "learning_rate": 1.545671735944488e-06, "loss": 0.8118841648101807, "step": 3043 }, { "epoch": 0.7013824884792627, "grad_norm": 0.7470799565000998, "learning_rate": 1.5453524322753546e-06, "loss": 0.7144184112548828, "step": 3044 }, { "epoch": 0.7016129032258065, "grad_norm": 1.149288210915265, "learning_rate": 1.545033049449038e-06, "loss": 0.9730075001716614, "step": 3045 }, { "epoch": 0.7018433179723502, "grad_norm": 0.9334735321523672, "learning_rate": 1.5447135875118957e-06, "loss": 0.6930910348892212, "step": 3046 }, { "epoch": 0.7020737327188941, "grad_norm": 1.0190518922073715, "learning_rate": 1.5443940465102973e-06, "loss": 0.8517031669616699, "step": 3047 }, { "epoch": 0.7023041474654378, "grad_norm": 0.9199109424213672, "learning_rate": 1.5440744264906237e-06, "loss": 0.7939779758453369, "step": 3048 }, { "epoch": 0.7025345622119815, "grad_norm": 1.0310125567194028, "learning_rate": 1.5437547274992672e-06, "loss": 0.8946782350540161, "step": 3049 }, { "epoch": 0.7027649769585254, "grad_norm": 1.1682685309372194, "learning_rate": 1.543434949582632e-06, "loss": 0.9273954033851624, "step": 3050 }, { "epoch": 0.7029953917050691, "grad_norm": 0.8496559046178408, "learning_rate": 1.5431150927871333e-06, "loss": 0.7731457352638245, "step": 3051 }, { "epoch": 0.7032258064516129, "grad_norm": 0.9900519408386056, "learning_rate": 1.542795157159198e-06, "loss": 0.7982608079910278, "step": 3052 }, { "epoch": 0.7034562211981567, "grad_norm": 1.0252185126476046, "learning_rate": 1.542475142745264e-06, "loss": 0.8422989845275879, "step": 3053 }, { "epoch": 0.7036866359447005, "grad_norm": 1.1364598749635721, "learning_rate": 1.542155049591781e-06, "loss": 0.8344876766204834, "step": 3054 }, { "epoch": 0.7039170506912442, "grad_norm": 1.3240029855230715, "learning_rate": 1.541834877745211e-06, "loss": 0.8830629587173462, "step": 3055 }, { "epoch": 0.7041474654377881, "grad_norm": 0.8841605120149971, "learning_rate": 1.5415146272520247e-06, "loss": 0.823864221572876, "step": 3056 }, { "epoch": 0.7043778801843318, "grad_norm": 1.226256029650695, "learning_rate": 1.5411942981587077e-06, "loss": 0.8577016592025757, "step": 3057 }, { "epoch": 0.7046082949308756, "grad_norm": 0.9938154526101401, "learning_rate": 1.540873890511755e-06, "loss": 0.7431750297546387, "step": 3058 }, { "epoch": 0.7048387096774194, "grad_norm": 1.3100911793106818, "learning_rate": 1.5405534043576729e-06, "loss": 0.8219394683837891, "step": 3059 }, { "epoch": 0.7050691244239631, "grad_norm": 0.8179546123014678, "learning_rate": 1.5402328397429795e-06, "loss": 0.706437349319458, "step": 3060 }, { "epoch": 0.7052995391705069, "grad_norm": 0.9400567182130463, "learning_rate": 1.5399121967142051e-06, "loss": 0.8669443130493164, "step": 3061 }, { "epoch": 0.7055299539170506, "grad_norm": 0.9808762608140087, "learning_rate": 1.5395914753178897e-06, "loss": 0.7995564937591553, "step": 3062 }, { "epoch": 0.7057603686635945, "grad_norm": 1.0691077372052262, "learning_rate": 1.5392706756005862e-06, "loss": 0.7840889692306519, "step": 3063 }, { "epoch": 0.7059907834101382, "grad_norm": 0.9593102373354429, "learning_rate": 1.5389497976088582e-06, "loss": 0.8231604695320129, "step": 3064 }, { "epoch": 0.706221198156682, "grad_norm": 1.0423471516482703, "learning_rate": 1.5386288413892801e-06, "loss": 0.7821571826934814, "step": 3065 }, { "epoch": 0.7064516129032258, "grad_norm": 0.9221304357539406, "learning_rate": 1.538307806988439e-06, "loss": 0.736830472946167, "step": 3066 }, { "epoch": 0.7066820276497696, "grad_norm": 0.8124713959576904, "learning_rate": 1.537986694452932e-06, "loss": 0.7783113718032837, "step": 3067 }, { "epoch": 0.7069124423963133, "grad_norm": 0.8679700879266566, "learning_rate": 1.5376655038293692e-06, "loss": 0.8000421524047852, "step": 3068 }, { "epoch": 0.7071428571428572, "grad_norm": 0.8513728527683974, "learning_rate": 1.5373442351643696e-06, "loss": 0.7446980476379395, "step": 3069 }, { "epoch": 0.7073732718894009, "grad_norm": 0.8188336762916474, "learning_rate": 1.537022888504566e-06, "loss": 0.7018321752548218, "step": 3070 }, { "epoch": 0.7076036866359448, "grad_norm": 0.8259052522128728, "learning_rate": 1.5367014638966008e-06, "loss": 0.6903716325759888, "step": 3071 }, { "epoch": 0.7078341013824885, "grad_norm": 1.0909385113291765, "learning_rate": 1.5363799613871289e-06, "loss": 0.9635254144668579, "step": 3072 }, { "epoch": 0.7080645161290322, "grad_norm": 0.7335179559352851, "learning_rate": 1.5360583810228156e-06, "loss": 0.8612154722213745, "step": 3073 }, { "epoch": 0.708294930875576, "grad_norm": 0.9395034612023028, "learning_rate": 1.5357367228503376e-06, "loss": 0.8632407784461975, "step": 3074 }, { "epoch": 0.7085253456221198, "grad_norm": 0.9383639731759232, "learning_rate": 1.5354149869163839e-06, "loss": 0.8117856979370117, "step": 3075 }, { "epoch": 0.7087557603686636, "grad_norm": 0.9770895875008837, "learning_rate": 1.5350931732676538e-06, "loss": 0.8062559366226196, "step": 3076 }, { "epoch": 0.7089861751152073, "grad_norm": 0.9191794034062433, "learning_rate": 1.5347712819508576e-06, "loss": 0.7918965816497803, "step": 3077 }, { "epoch": 0.7092165898617512, "grad_norm": 0.7897301018455927, "learning_rate": 1.534449313012718e-06, "loss": 0.7564986944198608, "step": 3078 }, { "epoch": 0.7094470046082949, "grad_norm": 0.774017262501344, "learning_rate": 1.534127266499968e-06, "loss": 0.8261928558349609, "step": 3079 }, { "epoch": 0.7096774193548387, "grad_norm": 0.9288792217475005, "learning_rate": 1.5338051424593524e-06, "loss": 0.705269455909729, "step": 3080 }, { "epoch": 0.7099078341013825, "grad_norm": 0.8500383243043894, "learning_rate": 1.5334829409376271e-06, "loss": 0.823144793510437, "step": 3081 }, { "epoch": 0.7101382488479263, "grad_norm": 0.7512588375717618, "learning_rate": 1.5331606619815588e-06, "loss": 0.7772066593170166, "step": 3082 }, { "epoch": 0.71036866359447, "grad_norm": 1.0827682012637947, "learning_rate": 1.5328383056379265e-06, "loss": 0.8901097178459167, "step": 3083 }, { "epoch": 0.7105990783410139, "grad_norm": 0.9540489638748495, "learning_rate": 1.5325158719535196e-06, "loss": 0.8454819917678833, "step": 3084 }, { "epoch": 0.7108294930875576, "grad_norm": 0.8879734338037916, "learning_rate": 1.5321933609751388e-06, "loss": 0.8444693684577942, "step": 3085 }, { "epoch": 0.7110599078341013, "grad_norm": 1.0157021807199436, "learning_rate": 1.5318707727495964e-06, "loss": 0.7893826961517334, "step": 3086 }, { "epoch": 0.7112903225806452, "grad_norm": 0.9711563338551928, "learning_rate": 1.531548107323715e-06, "loss": 0.7536686658859253, "step": 3087 }, { "epoch": 0.7115207373271889, "grad_norm": 1.1272305964721914, "learning_rate": 1.53122536474433e-06, "loss": 0.8105358481407166, "step": 3088 }, { "epoch": 0.7117511520737327, "grad_norm": 0.8430783893005721, "learning_rate": 1.530902545058286e-06, "loss": 0.8104212284088135, "step": 3089 }, { "epoch": 0.7119815668202765, "grad_norm": 1.1740010494566606, "learning_rate": 1.5305796483124405e-06, "loss": 0.7738373279571533, "step": 3090 }, { "epoch": 0.7122119815668203, "grad_norm": 0.8346644560955941, "learning_rate": 1.5302566745536618e-06, "loss": 0.7583746910095215, "step": 3091 }, { "epoch": 0.712442396313364, "grad_norm": 1.0290772907257426, "learning_rate": 1.5299336238288286e-06, "loss": 0.8370871543884277, "step": 3092 }, { "epoch": 0.7126728110599079, "grad_norm": 0.8908237623549358, "learning_rate": 1.5296104961848314e-06, "loss": 0.7833988666534424, "step": 3093 }, { "epoch": 0.7129032258064516, "grad_norm": 1.135734716262211, "learning_rate": 1.5292872916685717e-06, "loss": 0.8024515509605408, "step": 3094 }, { "epoch": 0.7131336405529954, "grad_norm": 0.8156588034123838, "learning_rate": 1.5289640103269623e-06, "loss": 0.8044738173484802, "step": 3095 }, { "epoch": 0.7133640552995392, "grad_norm": 0.846268334708117, "learning_rate": 1.5286406522069273e-06, "loss": 0.7783721685409546, "step": 3096 }, { "epoch": 0.7135944700460829, "grad_norm": 0.8004616169511741, "learning_rate": 1.5283172173554014e-06, "loss": 0.693443238735199, "step": 3097 }, { "epoch": 0.7138248847926267, "grad_norm": 0.9862921565687749, "learning_rate": 1.527993705819331e-06, "loss": 0.8142237663269043, "step": 3098 }, { "epoch": 0.7140552995391705, "grad_norm": 0.9077662799949481, "learning_rate": 1.5276701176456726e-06, "loss": 0.790626049041748, "step": 3099 }, { "epoch": 0.7142857142857143, "grad_norm": 1.0485200242859731, "learning_rate": 1.5273464528813953e-06, "loss": 0.9460805654525757, "step": 3100 }, { "epoch": 0.714516129032258, "grad_norm": 0.902776913050398, "learning_rate": 1.5270227115734789e-06, "loss": 0.6906337738037109, "step": 3101 }, { "epoch": 0.7147465437788019, "grad_norm": 0.8514512995363496, "learning_rate": 1.526698893768913e-06, "loss": 0.8828556537628174, "step": 3102 }, { "epoch": 0.7149769585253456, "grad_norm": 1.0568586756231748, "learning_rate": 1.5263749995147004e-06, "loss": 0.8395771980285645, "step": 3103 }, { "epoch": 0.7152073732718894, "grad_norm": 0.814014727084384, "learning_rate": 1.5260510288578535e-06, "loss": 0.7103895545005798, "step": 3104 }, { "epoch": 0.7154377880184332, "grad_norm": 1.0670304040497072, "learning_rate": 1.5257269818453956e-06, "loss": 0.9780298471450806, "step": 3105 }, { "epoch": 0.715668202764977, "grad_norm": 0.777700102492748, "learning_rate": 1.525402858524363e-06, "loss": 0.8176128268241882, "step": 3106 }, { "epoch": 0.7158986175115207, "grad_norm": 0.8127092170976247, "learning_rate": 1.5250786589418008e-06, "loss": 0.6766567230224609, "step": 3107 }, { "epoch": 0.7161290322580646, "grad_norm": 0.8076252538068988, "learning_rate": 1.5247543831447662e-06, "loss": 0.7910950183868408, "step": 3108 }, { "epoch": 0.7163594470046083, "grad_norm": 0.76882132080824, "learning_rate": 1.5244300311803275e-06, "loss": 0.8444501161575317, "step": 3109 }, { "epoch": 0.716589861751152, "grad_norm": 0.9073390489490682, "learning_rate": 1.5241056030955642e-06, "loss": 0.7180038690567017, "step": 3110 }, { "epoch": 0.7168202764976959, "grad_norm": 0.8535510406326756, "learning_rate": 1.5237810989375663e-06, "loss": 0.8563181757926941, "step": 3111 }, { "epoch": 0.7170506912442396, "grad_norm": 0.7281554723991874, "learning_rate": 1.5234565187534353e-06, "loss": 0.7792840003967285, "step": 3112 }, { "epoch": 0.7172811059907834, "grad_norm": 1.2546504724448617, "learning_rate": 1.5231318625902835e-06, "loss": 0.8414837121963501, "step": 3113 }, { "epoch": 0.7175115207373272, "grad_norm": 0.9151299107605344, "learning_rate": 1.5228071304952348e-06, "loss": 0.8549888134002686, "step": 3114 }, { "epoch": 0.717741935483871, "grad_norm": 0.8858229770055023, "learning_rate": 1.5224823225154228e-06, "loss": 0.7973321676254272, "step": 3115 }, { "epoch": 0.7179723502304147, "grad_norm": 0.8923496131316503, "learning_rate": 1.5221574386979937e-06, "loss": 0.7328228950500488, "step": 3116 }, { "epoch": 0.7182027649769586, "grad_norm": 0.8315355877258431, "learning_rate": 1.5218324790901033e-06, "loss": 0.8953883051872253, "step": 3117 }, { "epoch": 0.7184331797235023, "grad_norm": 0.8252416441396693, "learning_rate": 1.5215074437389195e-06, "loss": 0.7804527282714844, "step": 3118 }, { "epoch": 0.7186635944700461, "grad_norm": 1.0592650685202745, "learning_rate": 1.5211823326916204e-06, "loss": 0.7581363320350647, "step": 3119 }, { "epoch": 0.7188940092165899, "grad_norm": 0.9812896234713268, "learning_rate": 1.520857145995396e-06, "loss": 0.7720214128494263, "step": 3120 }, { "epoch": 0.7191244239631336, "grad_norm": 0.8448153689850479, "learning_rate": 1.5205318836974463e-06, "loss": 0.7142826914787292, "step": 3121 }, { "epoch": 0.7193548387096774, "grad_norm": 1.0627992363231917, "learning_rate": 1.520206545844983e-06, "loss": 0.715612530708313, "step": 3122 }, { "epoch": 0.7195852534562212, "grad_norm": 1.1048993433011334, "learning_rate": 1.5198811324852277e-06, "loss": 0.8851219415664673, "step": 3123 }, { "epoch": 0.719815668202765, "grad_norm": 0.9292687584217408, "learning_rate": 1.5195556436654146e-06, "loss": 0.981631875038147, "step": 3124 }, { "epoch": 0.7200460829493087, "grad_norm": 1.043088312445038, "learning_rate": 1.5192300794327876e-06, "loss": 0.8586313724517822, "step": 3125 }, { "epoch": 0.7202764976958526, "grad_norm": 1.082548105463139, "learning_rate": 1.518904439834602e-06, "loss": 0.8863250017166138, "step": 3126 }, { "epoch": 0.7205069124423963, "grad_norm": 0.8136107336174612, "learning_rate": 1.5185787249181239e-06, "loss": 0.864910900592804, "step": 3127 }, { "epoch": 0.7207373271889401, "grad_norm": 0.9898417106954193, "learning_rate": 1.5182529347306302e-06, "loss": 0.8120951652526855, "step": 3128 }, { "epoch": 0.7209677419354839, "grad_norm": 1.008844559262399, "learning_rate": 1.517927069319409e-06, "loss": 0.7866026163101196, "step": 3129 }, { "epoch": 0.7211981566820277, "grad_norm": 0.9577789377394936, "learning_rate": 1.5176011287317598e-06, "loss": 0.8610655069351196, "step": 3130 }, { "epoch": 0.7214285714285714, "grad_norm": 0.8861108738387133, "learning_rate": 1.5172751130149915e-06, "loss": 0.7463846206665039, "step": 3131 }, { "epoch": 0.7216589861751153, "grad_norm": 0.7361410685782023, "learning_rate": 1.5169490222164254e-06, "loss": 0.6578936576843262, "step": 3132 }, { "epoch": 0.721889400921659, "grad_norm": 0.9361369886672088, "learning_rate": 1.516622856383393e-06, "loss": 0.6849668025970459, "step": 3133 }, { "epoch": 0.7221198156682027, "grad_norm": 1.0686822202217916, "learning_rate": 1.5162966155632372e-06, "loss": 0.9549611806869507, "step": 3134 }, { "epoch": 0.7223502304147466, "grad_norm": 0.9063080856885865, "learning_rate": 1.5159702998033113e-06, "loss": 0.8005616664886475, "step": 3135 }, { "epoch": 0.7225806451612903, "grad_norm": 1.089721709643384, "learning_rate": 1.5156439091509793e-06, "loss": 0.8980830311775208, "step": 3136 }, { "epoch": 0.7228110599078341, "grad_norm": 1.012161312959267, "learning_rate": 1.5153174436536166e-06, "loss": 0.8247464895248413, "step": 3137 }, { "epoch": 0.7230414746543778, "grad_norm": 0.9582357561913161, "learning_rate": 1.5149909033586088e-06, "loss": 0.818629264831543, "step": 3138 }, { "epoch": 0.7232718894009217, "grad_norm": 0.7730251673290138, "learning_rate": 1.5146642883133532e-06, "loss": 0.8928704261779785, "step": 3139 }, { "epoch": 0.7235023041474654, "grad_norm": 1.199560365249708, "learning_rate": 1.5143375985652576e-06, "loss": 0.9330282807350159, "step": 3140 }, { "epoch": 0.7237327188940093, "grad_norm": 0.9749101527395967, "learning_rate": 1.5140108341617405e-06, "loss": 0.7961822748184204, "step": 3141 }, { "epoch": 0.723963133640553, "grad_norm": 0.9244859383947029, "learning_rate": 1.513683995150231e-06, "loss": 0.8073769807815552, "step": 3142 }, { "epoch": 0.7241935483870968, "grad_norm": 1.0469784848396728, "learning_rate": 1.51335708157817e-06, "loss": 0.946292519569397, "step": 3143 }, { "epoch": 0.7244239631336405, "grad_norm": 0.8214787899217685, "learning_rate": 1.513030093493008e-06, "loss": 0.806084156036377, "step": 3144 }, { "epoch": 0.7246543778801844, "grad_norm": 0.9086362129225068, "learning_rate": 1.5127030309422072e-06, "loss": 0.8804534673690796, "step": 3145 }, { "epoch": 0.7248847926267281, "grad_norm": 0.973773267534968, "learning_rate": 1.51237589397324e-06, "loss": 0.7489848136901855, "step": 3146 }, { "epoch": 0.7251152073732718, "grad_norm": 1.047973105384132, "learning_rate": 1.5120486826335905e-06, "loss": 0.875586986541748, "step": 3147 }, { "epoch": 0.7253456221198157, "grad_norm": 0.8473382638758681, "learning_rate": 1.5117213969707522e-06, "loss": 0.8334758281707764, "step": 3148 }, { "epoch": 0.7255760368663594, "grad_norm": 0.8693445792084491, "learning_rate": 1.5113940370322306e-06, "loss": 0.8010859489440918, "step": 3149 }, { "epoch": 0.7258064516129032, "grad_norm": 0.8638975130346471, "learning_rate": 1.5110666028655417e-06, "loss": 0.7907547950744629, "step": 3150 }, { "epoch": 0.726036866359447, "grad_norm": 0.9542895726151109, "learning_rate": 1.5107390945182117e-06, "loss": 0.8922848105430603, "step": 3151 }, { "epoch": 0.7262672811059908, "grad_norm": 0.7865624103758176, "learning_rate": 1.5104115120377783e-06, "loss": 0.7418628931045532, "step": 3152 }, { "epoch": 0.7264976958525345, "grad_norm": 1.0285540479216404, "learning_rate": 1.51008385547179e-06, "loss": 0.9063338041305542, "step": 3153 }, { "epoch": 0.7267281105990784, "grad_norm": 1.0080575916686718, "learning_rate": 1.5097561248678047e-06, "loss": 0.8718822002410889, "step": 3154 }, { "epoch": 0.7269585253456221, "grad_norm": 1.0055226715830414, "learning_rate": 1.5094283202733934e-06, "loss": 0.950742244720459, "step": 3155 }, { "epoch": 0.727188940092166, "grad_norm": 1.126636802719941, "learning_rate": 1.5091004417361353e-06, "loss": 0.7963443994522095, "step": 3156 }, { "epoch": 0.7274193548387097, "grad_norm": 1.0644638923319971, "learning_rate": 1.5087724893036225e-06, "loss": 0.8428621888160706, "step": 3157 }, { "epoch": 0.7276497695852534, "grad_norm": 1.0421355661787988, "learning_rate": 1.508444463023456e-06, "loss": 0.8271539211273193, "step": 3158 }, { "epoch": 0.7278801843317972, "grad_norm": 0.7345991655152693, "learning_rate": 1.508116362943249e-06, "loss": 0.7899917364120483, "step": 3159 }, { "epoch": 0.728110599078341, "grad_norm": 1.1916065857121023, "learning_rate": 1.5077881891106246e-06, "loss": 0.8734809160232544, "step": 3160 }, { "epoch": 0.7283410138248848, "grad_norm": 1.0138536766133128, "learning_rate": 1.5074599415732164e-06, "loss": 0.7740491628646851, "step": 3161 }, { "epoch": 0.7285714285714285, "grad_norm": 0.8952462084516831, "learning_rate": 1.5071316203786698e-06, "loss": 0.7219515442848206, "step": 3162 }, { "epoch": 0.7288018433179724, "grad_norm": 0.7779518912065628, "learning_rate": 1.50680322557464e-06, "loss": 0.8122725486755371, "step": 3163 }, { "epoch": 0.7290322580645161, "grad_norm": 0.9965727720770509, "learning_rate": 1.5064747572087923e-06, "loss": 0.8280072212219238, "step": 3164 }, { "epoch": 0.7292626728110599, "grad_norm": 0.9097690003119847, "learning_rate": 1.5061462153288047e-06, "loss": 0.7287842035293579, "step": 3165 }, { "epoch": 0.7294930875576037, "grad_norm": 1.0497146109580189, "learning_rate": 1.5058175999823639e-06, "loss": 0.8404949903488159, "step": 3166 }, { "epoch": 0.7297235023041475, "grad_norm": 0.9887517999095412, "learning_rate": 1.505488911217168e-06, "loss": 0.6572415828704834, "step": 3167 }, { "epoch": 0.7299539170506912, "grad_norm": 1.0946078663351873, "learning_rate": 1.5051601490809257e-06, "loss": 0.8924484848976135, "step": 3168 }, { "epoch": 0.7301843317972351, "grad_norm": 1.1648951213224894, "learning_rate": 1.5048313136213566e-06, "loss": 0.8701428174972534, "step": 3169 }, { "epoch": 0.7304147465437788, "grad_norm": 1.1475520143482136, "learning_rate": 1.5045024048861906e-06, "loss": 0.8327716588973999, "step": 3170 }, { "epoch": 0.7306451612903225, "grad_norm": 0.9261768702303601, "learning_rate": 1.5041734229231686e-06, "loss": 0.8379253149032593, "step": 3171 }, { "epoch": 0.7308755760368664, "grad_norm": 0.944084791074753, "learning_rate": 1.5038443677800413e-06, "loss": 0.7475664019584656, "step": 3172 }, { "epoch": 0.7311059907834101, "grad_norm": 1.2226580752686416, "learning_rate": 1.5035152395045714e-06, "loss": 0.9002243280410767, "step": 3173 }, { "epoch": 0.7313364055299539, "grad_norm": 0.8355701729873874, "learning_rate": 1.503186038144531e-06, "loss": 0.6718685626983643, "step": 3174 }, { "epoch": 0.7315668202764977, "grad_norm": 0.8961232238271665, "learning_rate": 1.5028567637477033e-06, "loss": 0.6836501359939575, "step": 3175 }, { "epoch": 0.7317972350230415, "grad_norm": 0.8859536342600928, "learning_rate": 1.502527416361882e-06, "loss": 0.7548954486846924, "step": 3176 }, { "epoch": 0.7320276497695852, "grad_norm": 0.9826706955950207, "learning_rate": 1.5021979960348714e-06, "loss": 0.8385212421417236, "step": 3177 }, { "epoch": 0.7322580645161291, "grad_norm": 0.8341383572022868, "learning_rate": 1.5018685028144864e-06, "loss": 0.8605425357818604, "step": 3178 }, { "epoch": 0.7324884792626728, "grad_norm": 0.9464588739740442, "learning_rate": 1.501538936748553e-06, "loss": 0.8831393718719482, "step": 3179 }, { "epoch": 0.7327188940092166, "grad_norm": 0.8991947067614845, "learning_rate": 1.5012092978849062e-06, "loss": 0.6965172290802002, "step": 3180 }, { "epoch": 0.7329493087557604, "grad_norm": 1.0090692893685214, "learning_rate": 1.500879586271394e-06, "loss": 0.8062859773635864, "step": 3181 }, { "epoch": 0.7331797235023041, "grad_norm": 0.7952177607289516, "learning_rate": 1.5005498019558724e-06, "loss": 0.8285790681838989, "step": 3182 }, { "epoch": 0.7334101382488479, "grad_norm": 0.9848452236152132, "learning_rate": 1.50021994498621e-06, "loss": 0.612429141998291, "step": 3183 }, { "epoch": 0.7336405529953917, "grad_norm": 0.9156545700522013, "learning_rate": 1.4998900154102847e-06, "loss": 0.8271423578262329, "step": 3184 }, { "epoch": 0.7338709677419355, "grad_norm": 1.033787601007848, "learning_rate": 1.499560013275986e-06, "loss": 0.838964581489563, "step": 3185 }, { "epoch": 0.7341013824884792, "grad_norm": 0.973220548768116, "learning_rate": 1.4992299386312119e-06, "loss": 0.7902333736419678, "step": 3186 }, { "epoch": 0.7343317972350231, "grad_norm": 1.0086369878855088, "learning_rate": 1.4988997915238735e-06, "loss": 0.8520635366439819, "step": 3187 }, { "epoch": 0.7345622119815668, "grad_norm": 0.9892742658321851, "learning_rate": 1.4985695720018905e-06, "loss": 0.8666567206382751, "step": 3188 }, { "epoch": 0.7347926267281106, "grad_norm": 0.9672613309802366, "learning_rate": 1.4982392801131944e-06, "loss": 0.6930691003799438, "step": 3189 }, { "epoch": 0.7350230414746544, "grad_norm": 0.7049869743164157, "learning_rate": 1.4979089159057263e-06, "loss": 0.7957722544670105, "step": 3190 }, { "epoch": 0.7352534562211982, "grad_norm": 1.0247601673009343, "learning_rate": 1.4975784794274383e-06, "loss": 0.8966697454452515, "step": 3191 }, { "epoch": 0.7354838709677419, "grad_norm": 0.9082832739975722, "learning_rate": 1.4972479707262926e-06, "loss": 0.7478537559509277, "step": 3192 }, { "epoch": 0.7357142857142858, "grad_norm": 0.9541041339746362, "learning_rate": 1.4969173898502624e-06, "loss": 0.8862416744232178, "step": 3193 }, { "epoch": 0.7359447004608295, "grad_norm": 0.8171852448254098, "learning_rate": 1.4965867368473306e-06, "loss": 0.7910712957382202, "step": 3194 }, { "epoch": 0.7361751152073732, "grad_norm": 1.1219879646982642, "learning_rate": 1.4962560117654916e-06, "loss": 0.7371944785118103, "step": 3195 }, { "epoch": 0.7364055299539171, "grad_norm": 1.097733223938739, "learning_rate": 1.4959252146527496e-06, "loss": 0.7966737151145935, "step": 3196 }, { "epoch": 0.7366359447004608, "grad_norm": 1.0499505243286467, "learning_rate": 1.4955943455571188e-06, "loss": 0.8474653363227844, "step": 3197 }, { "epoch": 0.7368663594470046, "grad_norm": 1.1042914253537062, "learning_rate": 1.4952634045266249e-06, "loss": 1.0197458267211914, "step": 3198 }, { "epoch": 0.7370967741935484, "grad_norm": 1.054872102822339, "learning_rate": 1.4949323916093036e-06, "loss": 0.8813979625701904, "step": 3199 }, { "epoch": 0.7373271889400922, "grad_norm": 0.9264193586497762, "learning_rate": 1.4946013068532008e-06, "loss": 0.9323042631149292, "step": 3200 }, { "epoch": 0.7375576036866359, "grad_norm": 1.1184797510334814, "learning_rate": 1.494270150306373e-06, "loss": 0.8637902736663818, "step": 3201 }, { "epoch": 0.7377880184331798, "grad_norm": 1.1006860616870338, "learning_rate": 1.4939389220168875e-06, "loss": 0.8046854734420776, "step": 3202 }, { "epoch": 0.7380184331797235, "grad_norm": 0.9882241685181946, "learning_rate": 1.4936076220328211e-06, "loss": 0.7616177201271057, "step": 3203 }, { "epoch": 0.7382488479262673, "grad_norm": 1.0795779512267711, "learning_rate": 1.4932762504022619e-06, "loss": 0.8548959493637085, "step": 3204 }, { "epoch": 0.738479262672811, "grad_norm": 0.7907178615166577, "learning_rate": 1.492944807173308e-06, "loss": 0.8062562942504883, "step": 3205 }, { "epoch": 0.7387096774193549, "grad_norm": 1.3004819436990922, "learning_rate": 1.492613292394068e-06, "loss": 0.8776403069496155, "step": 3206 }, { "epoch": 0.7389400921658986, "grad_norm": 1.0654471822316505, "learning_rate": 1.4922817061126605e-06, "loss": 0.7528336048126221, "step": 3207 }, { "epoch": 0.7391705069124423, "grad_norm": 0.9288011243231857, "learning_rate": 1.4919500483772152e-06, "loss": 0.7441881895065308, "step": 3208 }, { "epoch": 0.7394009216589862, "grad_norm": 0.9496581250230889, "learning_rate": 1.4916183192358715e-06, "loss": 0.8925758004188538, "step": 3209 }, { "epoch": 0.7396313364055299, "grad_norm": 0.999519243113449, "learning_rate": 1.4912865187367798e-06, "loss": 0.7527008652687073, "step": 3210 }, { "epoch": 0.7398617511520738, "grad_norm": 0.8631940848050832, "learning_rate": 1.4909546469281e-06, "loss": 0.753572404384613, "step": 3211 }, { "epoch": 0.7400921658986175, "grad_norm": 0.938203260102219, "learning_rate": 1.4906227038580036e-06, "loss": 0.8884274959564209, "step": 3212 }, { "epoch": 0.7403225806451613, "grad_norm": 0.7835821294972823, "learning_rate": 1.4902906895746707e-06, "loss": 0.7702244520187378, "step": 3213 }, { "epoch": 0.740552995391705, "grad_norm": 1.0140732775513552, "learning_rate": 1.4899586041262936e-06, "loss": 0.8662835359573364, "step": 3214 }, { "epoch": 0.7407834101382489, "grad_norm": 1.0357827096613574, "learning_rate": 1.4896264475610736e-06, "loss": 0.9819997549057007, "step": 3215 }, { "epoch": 0.7410138248847926, "grad_norm": 1.0094197188590162, "learning_rate": 1.4892942199272232e-06, "loss": 0.9137614965438843, "step": 3216 }, { "epoch": 0.7412442396313365, "grad_norm": 0.8442315992670393, "learning_rate": 1.488961921272964e-06, "loss": 0.7554785013198853, "step": 3217 }, { "epoch": 0.7414746543778802, "grad_norm": 1.1172745597106868, "learning_rate": 1.4886295516465296e-06, "loss": 0.8528940677642822, "step": 3218 }, { "epoch": 0.7417050691244239, "grad_norm": 0.9056918439443091, "learning_rate": 1.4882971110961626e-06, "loss": 0.7212377786636353, "step": 3219 }, { "epoch": 0.7419354838709677, "grad_norm": 0.9349124518247459, "learning_rate": 1.4879645996701161e-06, "loss": 0.7767617702484131, "step": 3220 }, { "epoch": 0.7421658986175115, "grad_norm": 0.8749389005214587, "learning_rate": 1.4876320174166542e-06, "loss": 0.8083292245864868, "step": 3221 }, { "epoch": 0.7423963133640553, "grad_norm": 1.14484646357819, "learning_rate": 1.4872993643840506e-06, "loss": 0.8652364015579224, "step": 3222 }, { "epoch": 0.742626728110599, "grad_norm": 0.9176030431238368, "learning_rate": 1.486966640620589e-06, "loss": 0.7455019950866699, "step": 3223 }, { "epoch": 0.7428571428571429, "grad_norm": 1.0637469159007076, "learning_rate": 1.4866338461745644e-06, "loss": 0.7881917953491211, "step": 3224 }, { "epoch": 0.7430875576036866, "grad_norm": 1.0955814961304737, "learning_rate": 1.4863009810942813e-06, "loss": 0.8148372173309326, "step": 3225 }, { "epoch": 0.7433179723502304, "grad_norm": 0.7991384008669099, "learning_rate": 1.4859680454280547e-06, "loss": 0.6574658751487732, "step": 3226 }, { "epoch": 0.7435483870967742, "grad_norm": 0.9231484623709659, "learning_rate": 1.4856350392242094e-06, "loss": 0.7831655740737915, "step": 3227 }, { "epoch": 0.743778801843318, "grad_norm": 0.8080817272772121, "learning_rate": 1.485301962531081e-06, "loss": 0.7406231164932251, "step": 3228 }, { "epoch": 0.7440092165898617, "grad_norm": 0.9500561612529754, "learning_rate": 1.4849688153970154e-06, "loss": 0.8092324733734131, "step": 3229 }, { "epoch": 0.7442396313364056, "grad_norm": 0.969093760928221, "learning_rate": 1.4846355978703679e-06, "loss": 0.6662560701370239, "step": 3230 }, { "epoch": 0.7444700460829493, "grad_norm": 0.8941354868939383, "learning_rate": 1.4843023099995052e-06, "loss": 0.8064731359481812, "step": 3231 }, { "epoch": 0.744700460829493, "grad_norm": 1.0463529761361023, "learning_rate": 1.4839689518328037e-06, "loss": 0.7424519658088684, "step": 3232 }, { "epoch": 0.7449308755760369, "grad_norm": 0.9618875213680247, "learning_rate": 1.4836355234186489e-06, "loss": 0.7851438522338867, "step": 3233 }, { "epoch": 0.7451612903225806, "grad_norm": 1.2534680382280676, "learning_rate": 1.4833020248054381e-06, "loss": 0.896986722946167, "step": 3234 }, { "epoch": 0.7453917050691244, "grad_norm": 1.3688846458082455, "learning_rate": 1.4829684560415787e-06, "loss": 0.9469928741455078, "step": 3235 }, { "epoch": 0.7456221198156682, "grad_norm": 0.8653442286827894, "learning_rate": 1.4826348171754872e-06, "loss": 0.7527188062667847, "step": 3236 }, { "epoch": 0.745852534562212, "grad_norm": 0.9575212903893582, "learning_rate": 1.4823011082555907e-06, "loss": 0.7758080959320068, "step": 3237 }, { "epoch": 0.7460829493087557, "grad_norm": 0.9454436343118328, "learning_rate": 1.481967329330327e-06, "loss": 0.8359881043434143, "step": 3238 }, { "epoch": 0.7463133640552996, "grad_norm": 0.7567559878181612, "learning_rate": 1.4816334804481434e-06, "loss": 0.6576982736587524, "step": 3239 }, { "epoch": 0.7465437788018433, "grad_norm": 1.0012365138594377, "learning_rate": 1.4812995616574978e-06, "loss": 0.7919917106628418, "step": 3240 }, { "epoch": 0.7467741935483871, "grad_norm": 0.7865137499791297, "learning_rate": 1.480965573006858e-06, "loss": 0.7682263851165771, "step": 3241 }, { "epoch": 0.7470046082949309, "grad_norm": 1.0123241682054298, "learning_rate": 1.4806315145447017e-06, "loss": 0.8573193550109863, "step": 3242 }, { "epoch": 0.7472350230414746, "grad_norm": 0.8191884786597581, "learning_rate": 1.4802973863195174e-06, "loss": 0.8473606109619141, "step": 3243 }, { "epoch": 0.7474654377880184, "grad_norm": 0.8754073951862541, "learning_rate": 1.4799631883798033e-06, "loss": 0.8110678195953369, "step": 3244 }, { "epoch": 0.7476958525345622, "grad_norm": 1.2161581760732987, "learning_rate": 1.4796289207740681e-06, "loss": 0.6624661087989807, "step": 3245 }, { "epoch": 0.747926267281106, "grad_norm": 0.7356293873938221, "learning_rate": 1.47929458355083e-06, "loss": 0.8145536184310913, "step": 3246 }, { "epoch": 0.7481566820276497, "grad_norm": 0.921128997158793, "learning_rate": 1.4789601767586172e-06, "loss": 0.7819876074790955, "step": 3247 }, { "epoch": 0.7483870967741936, "grad_norm": 0.973465003660405, "learning_rate": 1.4786257004459692e-06, "loss": 0.7573810815811157, "step": 3248 }, { "epoch": 0.7486175115207373, "grad_norm": 1.061603620628762, "learning_rate": 1.4782911546614343e-06, "loss": 0.8149522542953491, "step": 3249 }, { "epoch": 0.7488479262672811, "grad_norm": 1.023358335101362, "learning_rate": 1.4779565394535714e-06, "loss": 0.9935284852981567, "step": 3250 }, { "epoch": 0.7490783410138249, "grad_norm": 0.8488935416479958, "learning_rate": 1.4776218548709497e-06, "loss": 0.8673371076583862, "step": 3251 }, { "epoch": 0.7493087557603687, "grad_norm": 1.0304468521950305, "learning_rate": 1.4772871009621477e-06, "loss": 0.8569149374961853, "step": 3252 }, { "epoch": 0.7495391705069124, "grad_norm": 0.8613722173703313, "learning_rate": 1.4769522777757551e-06, "loss": 0.7177854776382446, "step": 3253 }, { "epoch": 0.7497695852534563, "grad_norm": 1.0681726446759283, "learning_rate": 1.4766173853603706e-06, "loss": 0.8115622997283936, "step": 3254 }, { "epoch": 0.75, "grad_norm": 0.782977490159237, "learning_rate": 1.4762824237646038e-06, "loss": 0.7209019660949707, "step": 3255 }, { "epoch": 0.7502304147465437, "grad_norm": 0.9264325214188774, "learning_rate": 1.4759473930370736e-06, "loss": 0.8433470726013184, "step": 3256 }, { "epoch": 0.7504608294930876, "grad_norm": 1.0399152705693322, "learning_rate": 1.4756122932264093e-06, "loss": 0.853674054145813, "step": 3257 }, { "epoch": 0.7506912442396313, "grad_norm": 0.9978956076189626, "learning_rate": 1.4752771243812503e-06, "loss": 0.8645769357681274, "step": 3258 }, { "epoch": 0.7509216589861751, "grad_norm": 1.4046905803968728, "learning_rate": 1.474941886550246e-06, "loss": 0.927452564239502, "step": 3259 }, { "epoch": 0.7511520737327189, "grad_norm": 0.8642581213790671, "learning_rate": 1.4746065797820552e-06, "loss": 0.7461255788803101, "step": 3260 }, { "epoch": 0.7513824884792627, "grad_norm": 0.9230380534710827, "learning_rate": 1.4742712041253481e-06, "loss": 0.8737163543701172, "step": 3261 }, { "epoch": 0.7516129032258064, "grad_norm": 0.8624828182814519, "learning_rate": 1.4739357596288036e-06, "loss": 0.7148758172988892, "step": 3262 }, { "epoch": 0.7518433179723503, "grad_norm": 0.8930446588032352, "learning_rate": 1.4736002463411108e-06, "loss": 0.738334596157074, "step": 3263 }, { "epoch": 0.752073732718894, "grad_norm": 0.9237791770446419, "learning_rate": 1.4732646643109692e-06, "loss": 0.7733340263366699, "step": 3264 }, { "epoch": 0.7523041474654378, "grad_norm": 0.8815526032135323, "learning_rate": 1.4729290135870883e-06, "loss": 0.7882881164550781, "step": 3265 }, { "epoch": 0.7525345622119816, "grad_norm": 1.029688172185613, "learning_rate": 1.472593294218187e-06, "loss": 0.7908357381820679, "step": 3266 }, { "epoch": 0.7527649769585254, "grad_norm": 1.0791156682188368, "learning_rate": 1.4722575062529946e-06, "loss": 0.8818062543869019, "step": 3267 }, { "epoch": 0.7529953917050691, "grad_norm": 0.9552677127935061, "learning_rate": 1.4719216497402504e-06, "loss": 0.7152599692344666, "step": 3268 }, { "epoch": 0.7532258064516129, "grad_norm": 0.8322037056106782, "learning_rate": 1.4715857247287036e-06, "loss": 0.8503165245056152, "step": 3269 }, { "epoch": 0.7534562211981567, "grad_norm": 0.9223729567181368, "learning_rate": 1.4712497312671128e-06, "loss": 0.8382623195648193, "step": 3270 }, { "epoch": 0.7536866359447004, "grad_norm": 1.0456882119229616, "learning_rate": 1.4709136694042479e-06, "loss": 0.8358533382415771, "step": 3271 }, { "epoch": 0.7539170506912443, "grad_norm": 0.850717529465525, "learning_rate": 1.4705775391888868e-06, "loss": 0.6735624670982361, "step": 3272 }, { "epoch": 0.754147465437788, "grad_norm": 0.8890452669379437, "learning_rate": 1.470241340669819e-06, "loss": 0.8343949317932129, "step": 3273 }, { "epoch": 0.7543778801843318, "grad_norm": 0.9508610560109901, "learning_rate": 1.4699050738958434e-06, "loss": 0.8204318284988403, "step": 3274 }, { "epoch": 0.7546082949308756, "grad_norm": 0.9484772286558124, "learning_rate": 1.4695687389157684e-06, "loss": 0.7541854977607727, "step": 3275 }, { "epoch": 0.7548387096774194, "grad_norm": 0.8425504123859369, "learning_rate": 1.4692323357784122e-06, "loss": 0.8144943714141846, "step": 3276 }, { "epoch": 0.7550691244239631, "grad_norm": 0.8699783126306536, "learning_rate": 1.468895864532604e-06, "loss": 0.9045677781105042, "step": 3277 }, { "epoch": 0.755299539170507, "grad_norm": 1.1586104318366583, "learning_rate": 1.4685593252271816e-06, "loss": 0.8818730115890503, "step": 3278 }, { "epoch": 0.7555299539170507, "grad_norm": 1.013621065000431, "learning_rate": 1.4682227179109932e-06, "loss": 0.8582229614257812, "step": 3279 }, { "epoch": 0.7557603686635944, "grad_norm": 1.016541372354986, "learning_rate": 1.4678860426328977e-06, "loss": 0.8769974708557129, "step": 3280 }, { "epoch": 0.7559907834101383, "grad_norm": 0.8474484944100091, "learning_rate": 1.467549299441762e-06, "loss": 0.8034937381744385, "step": 3281 }, { "epoch": 0.756221198156682, "grad_norm": 0.9998169463505984, "learning_rate": 1.4672124883864646e-06, "loss": 0.9057378768920898, "step": 3282 }, { "epoch": 0.7564516129032258, "grad_norm": 0.9160359407680143, "learning_rate": 1.4668756095158929e-06, "loss": 0.8039969205856323, "step": 3283 }, { "epoch": 0.7566820276497696, "grad_norm": 0.7311572278532684, "learning_rate": 1.4665386628789448e-06, "loss": 0.887493908405304, "step": 3284 }, { "epoch": 0.7569124423963134, "grad_norm": 0.9749833066021305, "learning_rate": 1.4662016485245271e-06, "loss": 0.783561646938324, "step": 3285 }, { "epoch": 0.7571428571428571, "grad_norm": 1.1972955361865625, "learning_rate": 1.4658645665015579e-06, "loss": 0.7526337504386902, "step": 3286 }, { "epoch": 0.757373271889401, "grad_norm": 1.0074911468135093, "learning_rate": 1.4655274168589633e-06, "loss": 0.8583099842071533, "step": 3287 }, { "epoch": 0.7576036866359447, "grad_norm": 0.9193819222275846, "learning_rate": 1.4651901996456802e-06, "loss": 0.743253767490387, "step": 3288 }, { "epoch": 0.7578341013824885, "grad_norm": 0.9481332173734432, "learning_rate": 1.4648529149106555e-06, "loss": 0.8763987421989441, "step": 3289 }, { "epoch": 0.7580645161290323, "grad_norm": 0.9531439206540595, "learning_rate": 1.4645155627028455e-06, "loss": 0.8388645648956299, "step": 3290 }, { "epoch": 0.7582949308755761, "grad_norm": 0.9430549047432926, "learning_rate": 1.4641781430712167e-06, "loss": 0.8943589925765991, "step": 3291 }, { "epoch": 0.7585253456221198, "grad_norm": 0.897306276129885, "learning_rate": 1.463840656064745e-06, "loss": 0.9224259257316589, "step": 3292 }, { "epoch": 0.7587557603686635, "grad_norm": 0.7118962108569266, "learning_rate": 1.463503101732416e-06, "loss": 0.5836232900619507, "step": 3293 }, { "epoch": 0.7589861751152074, "grad_norm": 1.2610309452085111, "learning_rate": 1.4631654801232255e-06, "loss": 0.6700382828712463, "step": 3294 }, { "epoch": 0.7592165898617511, "grad_norm": 0.9159006934526643, "learning_rate": 1.4628277912861785e-06, "loss": 0.7876112461090088, "step": 3295 }, { "epoch": 0.759447004608295, "grad_norm": 0.9073380438964382, "learning_rate": 1.4624900352702905e-06, "loss": 0.8410799503326416, "step": 3296 }, { "epoch": 0.7596774193548387, "grad_norm": 0.931630117662002, "learning_rate": 1.4621522121245859e-06, "loss": 0.9615974426269531, "step": 3297 }, { "epoch": 0.7599078341013825, "grad_norm": 1.1213393394374043, "learning_rate": 1.4618143218980996e-06, "loss": 0.7973389625549316, "step": 3298 }, { "epoch": 0.7601382488479262, "grad_norm": 0.7835636014361216, "learning_rate": 1.461476364639876e-06, "loss": 0.7734094858169556, "step": 3299 }, { "epoch": 0.7603686635944701, "grad_norm": 0.9681758067915807, "learning_rate": 1.461138340398969e-06, "loss": 0.7365939617156982, "step": 3300 }, { "epoch": 0.7605990783410138, "grad_norm": 0.9251627601521192, "learning_rate": 1.4608002492244421e-06, "loss": 0.822052001953125, "step": 3301 }, { "epoch": 0.7608294930875577, "grad_norm": 0.83536047590978, "learning_rate": 1.460462091165369e-06, "loss": 0.7220577001571655, "step": 3302 }, { "epoch": 0.7610599078341014, "grad_norm": 0.9806834080573716, "learning_rate": 1.4601238662708332e-06, "loss": 0.9795923233032227, "step": 3303 }, { "epoch": 0.7612903225806451, "grad_norm": 1.0452301496717684, "learning_rate": 1.4597855745899273e-06, "loss": 0.804523229598999, "step": 3304 }, { "epoch": 0.761520737327189, "grad_norm": 0.936039712838613, "learning_rate": 1.4594472161717536e-06, "loss": 0.7630297541618347, "step": 3305 }, { "epoch": 0.7617511520737327, "grad_norm": 1.008258749087615, "learning_rate": 1.4591087910654254e-06, "loss": 0.7088560461997986, "step": 3306 }, { "epoch": 0.7619815668202765, "grad_norm": 0.8612515545716848, "learning_rate": 1.4587702993200637e-06, "loss": 0.6627416014671326, "step": 3307 }, { "epoch": 0.7622119815668202, "grad_norm": 1.0700034611745908, "learning_rate": 1.4584317409848001e-06, "loss": 0.7931111454963684, "step": 3308 }, { "epoch": 0.7624423963133641, "grad_norm": 0.918004873184285, "learning_rate": 1.4580931161087763e-06, "loss": 0.8107850551605225, "step": 3309 }, { "epoch": 0.7626728110599078, "grad_norm": 1.1251596055699022, "learning_rate": 1.4577544247411431e-06, "loss": 0.8211404085159302, "step": 3310 }, { "epoch": 0.7629032258064516, "grad_norm": 1.1825093837600291, "learning_rate": 1.457415666931061e-06, "loss": 0.9861341714859009, "step": 3311 }, { "epoch": 0.7631336405529954, "grad_norm": 1.0573079532917569, "learning_rate": 1.4570768427277007e-06, "loss": 0.8963409662246704, "step": 3312 }, { "epoch": 0.7633640552995392, "grad_norm": 1.1183054914337, "learning_rate": 1.4567379521802416e-06, "loss": 0.7510147094726562, "step": 3313 }, { "epoch": 0.7635944700460829, "grad_norm": 1.0312269750408198, "learning_rate": 1.4563989953378734e-06, "loss": 0.7761805057525635, "step": 3314 }, { "epoch": 0.7638248847926268, "grad_norm": 0.782434581691777, "learning_rate": 1.4560599722497953e-06, "loss": 0.6202781200408936, "step": 3315 }, { "epoch": 0.7640552995391705, "grad_norm": 0.9114320197488165, "learning_rate": 1.4557208829652159e-06, "loss": 0.711891770362854, "step": 3316 }, { "epoch": 0.7642857142857142, "grad_norm": 1.0888571874972786, "learning_rate": 1.4553817275333537e-06, "loss": 0.8689517974853516, "step": 3317 }, { "epoch": 0.7645161290322581, "grad_norm": 0.847547372029402, "learning_rate": 1.4550425060034365e-06, "loss": 0.7323688268661499, "step": 3318 }, { "epoch": 0.7647465437788018, "grad_norm": 0.954006429800706, "learning_rate": 1.4547032184247022e-06, "loss": 0.8934407234191895, "step": 3319 }, { "epoch": 0.7649769585253456, "grad_norm": 0.9830574702749578, "learning_rate": 1.4543638648463975e-06, "loss": 0.7729885578155518, "step": 3320 }, { "epoch": 0.7652073732718894, "grad_norm": 0.9967355019103026, "learning_rate": 1.454024445317779e-06, "loss": 0.8962388038635254, "step": 3321 }, { "epoch": 0.7654377880184332, "grad_norm": 0.8821073382766633, "learning_rate": 1.4536849598881137e-06, "loss": 0.8655213117599487, "step": 3322 }, { "epoch": 0.7656682027649769, "grad_norm": 0.8780656658271131, "learning_rate": 1.453345408606677e-06, "loss": 0.6471779346466064, "step": 3323 }, { "epoch": 0.7658986175115208, "grad_norm": 0.7335596828312507, "learning_rate": 1.4530057915227545e-06, "loss": 0.8665071129798889, "step": 3324 }, { "epoch": 0.7661290322580645, "grad_norm": 1.054528188345679, "learning_rate": 1.4526661086856407e-06, "loss": 0.9504371285438538, "step": 3325 }, { "epoch": 0.7663594470046083, "grad_norm": 1.017396914206461, "learning_rate": 1.452326360144641e-06, "loss": 0.8122013807296753, "step": 3326 }, { "epoch": 0.7665898617511521, "grad_norm": 1.0019111601549837, "learning_rate": 1.4519865459490687e-06, "loss": 0.817001223564148, "step": 3327 }, { "epoch": 0.7668202764976959, "grad_norm": 0.9387626004792055, "learning_rate": 1.4516466661482474e-06, "loss": 0.732322096824646, "step": 3328 }, { "epoch": 0.7670506912442396, "grad_norm": 0.8844021324185192, "learning_rate": 1.4513067207915106e-06, "loss": 0.7961580157279968, "step": 3329 }, { "epoch": 0.7672811059907834, "grad_norm": 0.9579783239612414, "learning_rate": 1.4509667099282007e-06, "loss": 0.7660717368125916, "step": 3330 }, { "epoch": 0.7675115207373272, "grad_norm": 0.8487336367256668, "learning_rate": 1.4506266336076698e-06, "loss": 0.8279193639755249, "step": 3331 }, { "epoch": 0.7677419354838709, "grad_norm": 0.8431407438554851, "learning_rate": 1.4502864918792796e-06, "loss": 0.7050153017044067, "step": 3332 }, { "epoch": 0.7679723502304148, "grad_norm": 0.9386347952909049, "learning_rate": 1.4499462847924013e-06, "loss": 0.8146064877510071, "step": 3333 }, { "epoch": 0.7682027649769585, "grad_norm": 0.8248232070769104, "learning_rate": 1.4496060123964153e-06, "loss": 0.8300814628601074, "step": 3334 }, { "epoch": 0.7684331797235023, "grad_norm": 0.848400587593364, "learning_rate": 1.4492656747407117e-06, "loss": 0.8240403532981873, "step": 3335 }, { "epoch": 0.7686635944700461, "grad_norm": 1.1661360506901004, "learning_rate": 1.4489252718746908e-06, "loss": 0.901625394821167, "step": 3336 }, { "epoch": 0.7688940092165899, "grad_norm": 0.8620744709914054, "learning_rate": 1.4485848038477604e-06, "loss": 0.827139675617218, "step": 3337 }, { "epoch": 0.7691244239631336, "grad_norm": 1.111541176491108, "learning_rate": 1.4482442707093397e-06, "loss": 0.7032946348190308, "step": 3338 }, { "epoch": 0.7693548387096775, "grad_norm": 0.8506038004087974, "learning_rate": 1.4479036725088564e-06, "loss": 0.6805816888809204, "step": 3339 }, { "epoch": 0.7695852534562212, "grad_norm": 0.8063208135295213, "learning_rate": 1.447563009295748e-06, "loss": 0.673591136932373, "step": 3340 }, { "epoch": 0.7698156682027649, "grad_norm": 0.8116035277545482, "learning_rate": 1.4472222811194614e-06, "loss": 0.6513386964797974, "step": 3341 }, { "epoch": 0.7700460829493088, "grad_norm": 0.7654089652768199, "learning_rate": 1.4468814880294529e-06, "loss": 0.7367297410964966, "step": 3342 }, { "epoch": 0.7702764976958525, "grad_norm": 1.0405555538712603, "learning_rate": 1.4465406300751878e-06, "loss": 0.7393670082092285, "step": 3343 }, { "epoch": 0.7705069124423963, "grad_norm": 0.7135144631405288, "learning_rate": 1.4461997073061411e-06, "loss": 0.7525930404663086, "step": 3344 }, { "epoch": 0.7707373271889401, "grad_norm": 0.7583677101512988, "learning_rate": 1.445858719771798e-06, "loss": 0.6679942011833191, "step": 3345 }, { "epoch": 0.7709677419354839, "grad_norm": 1.0903018310329022, "learning_rate": 1.4455176675216518e-06, "loss": 0.8440653085708618, "step": 3346 }, { "epoch": 0.7711981566820276, "grad_norm": 0.9929368208299709, "learning_rate": 1.4451765506052063e-06, "loss": 0.8765773177146912, "step": 3347 }, { "epoch": 0.7714285714285715, "grad_norm": 0.9183070258317377, "learning_rate": 1.4448353690719732e-06, "loss": 0.7309157848358154, "step": 3348 }, { "epoch": 0.7716589861751152, "grad_norm": 0.8130162073408548, "learning_rate": 1.4444941229714758e-06, "loss": 0.8043340444564819, "step": 3349 }, { "epoch": 0.771889400921659, "grad_norm": 0.8488386913998837, "learning_rate": 1.4441528123532443e-06, "loss": 0.6528831124305725, "step": 3350 }, { "epoch": 0.7721198156682028, "grad_norm": 0.7632405080168834, "learning_rate": 1.4438114372668202e-06, "loss": 0.7973155975341797, "step": 3351 }, { "epoch": 0.7723502304147466, "grad_norm": 0.8366450624031991, "learning_rate": 1.443469997761754e-06, "loss": 0.940142810344696, "step": 3352 }, { "epoch": 0.7725806451612903, "grad_norm": 1.0048812991349738, "learning_rate": 1.443128493887604e-06, "loss": 0.7936829328536987, "step": 3353 }, { "epoch": 0.772811059907834, "grad_norm": 0.8583665989338275, "learning_rate": 1.44278692569394e-06, "loss": 0.8369218111038208, "step": 3354 }, { "epoch": 0.7730414746543779, "grad_norm": 1.313808566044562, "learning_rate": 1.4424452932303398e-06, "loss": 0.9305802583694458, "step": 3355 }, { "epoch": 0.7732718894009216, "grad_norm": 0.8862565116465879, "learning_rate": 1.4421035965463916e-06, "loss": 0.913454532623291, "step": 3356 }, { "epoch": 0.7735023041474655, "grad_norm": 1.0772806984700294, "learning_rate": 1.4417618356916912e-06, "loss": 0.8552114963531494, "step": 3357 }, { "epoch": 0.7737327188940092, "grad_norm": 1.080720564237515, "learning_rate": 1.4414200107158452e-06, "loss": 0.8674488067626953, "step": 3358 }, { "epoch": 0.773963133640553, "grad_norm": 1.0999604158561203, "learning_rate": 1.441078121668469e-06, "loss": 0.9142898321151733, "step": 3359 }, { "epoch": 0.7741935483870968, "grad_norm": 1.0964749277789683, "learning_rate": 1.4407361685991872e-06, "loss": 0.8258639574050903, "step": 3360 }, { "epoch": 0.7744239631336406, "grad_norm": 1.062716295700188, "learning_rate": 1.4403941515576343e-06, "loss": 0.773646354675293, "step": 3361 }, { "epoch": 0.7746543778801843, "grad_norm": 1.1397221950146432, "learning_rate": 1.440052070593453e-06, "loss": 0.9481985569000244, "step": 3362 }, { "epoch": 0.7748847926267282, "grad_norm": 1.0332478363266029, "learning_rate": 1.4397099257562965e-06, "loss": 0.7915977239608765, "step": 3363 }, { "epoch": 0.7751152073732719, "grad_norm": 1.057946693927254, "learning_rate": 1.4393677170958261e-06, "loss": 0.887650191783905, "step": 3364 }, { "epoch": 0.7753456221198156, "grad_norm": 0.8250912024788589, "learning_rate": 1.4390254446617137e-06, "loss": 0.8516546487808228, "step": 3365 }, { "epoch": 0.7755760368663595, "grad_norm": 0.9895329351481195, "learning_rate": 1.4386831085036386e-06, "loss": 0.8076090812683105, "step": 3366 }, { "epoch": 0.7758064516129032, "grad_norm": 0.9203902257484836, "learning_rate": 1.4383407086712913e-06, "loss": 0.7480059862136841, "step": 3367 }, { "epoch": 0.776036866359447, "grad_norm": 1.1101542314671893, "learning_rate": 1.4379982452143704e-06, "loss": 0.8586190938949585, "step": 3368 }, { "epoch": 0.7762672811059907, "grad_norm": 0.9197679868181698, "learning_rate": 1.4376557181825842e-06, "loss": 0.7581472396850586, "step": 3369 }, { "epoch": 0.7764976958525346, "grad_norm": 1.2064630913320733, "learning_rate": 1.4373131276256495e-06, "loss": 0.7482568621635437, "step": 3370 }, { "epoch": 0.7767281105990783, "grad_norm": 1.2204489088505164, "learning_rate": 1.4369704735932935e-06, "loss": 0.8822590112686157, "step": 3371 }, { "epoch": 0.7769585253456222, "grad_norm": 0.9171528830764245, "learning_rate": 1.4366277561352517e-06, "loss": 0.7762279510498047, "step": 3372 }, { "epoch": 0.7771889400921659, "grad_norm": 0.9649262790570658, "learning_rate": 1.4362849753012692e-06, "loss": 0.8059147596359253, "step": 3373 }, { "epoch": 0.7774193548387097, "grad_norm": 1.0529652703364816, "learning_rate": 1.4359421311411e-06, "loss": 0.778538703918457, "step": 3374 }, { "epoch": 0.7776497695852534, "grad_norm": 1.1587212424703164, "learning_rate": 1.4355992237045077e-06, "loss": 0.9422975778579712, "step": 3375 }, { "epoch": 0.7778801843317973, "grad_norm": 1.0109308621512796, "learning_rate": 1.4352562530412645e-06, "loss": 0.7437118291854858, "step": 3376 }, { "epoch": 0.778110599078341, "grad_norm": 0.8961203034935337, "learning_rate": 1.4349132192011525e-06, "loss": 0.6935930252075195, "step": 3377 }, { "epoch": 0.7783410138248847, "grad_norm": 1.1629979064489353, "learning_rate": 1.4345701222339628e-06, "loss": 0.7797117829322815, "step": 3378 }, { "epoch": 0.7785714285714286, "grad_norm": 1.0591342199366531, "learning_rate": 1.434226962189495e-06, "loss": 0.8795931339263916, "step": 3379 }, { "epoch": 0.7788018433179723, "grad_norm": 1.071603440273884, "learning_rate": 1.433883739117558e-06, "loss": 0.8936992287635803, "step": 3380 }, { "epoch": 0.7790322580645161, "grad_norm": 1.0412928095771106, "learning_rate": 1.4335404530679708e-06, "loss": 0.9142701625823975, "step": 3381 }, { "epoch": 0.7792626728110599, "grad_norm": 1.0966643259622728, "learning_rate": 1.4331971040905613e-06, "loss": 0.8996907472610474, "step": 3382 }, { "epoch": 0.7794930875576037, "grad_norm": 1.020250921022328, "learning_rate": 1.4328536922351654e-06, "loss": 0.9645330905914307, "step": 3383 }, { "epoch": 0.7797235023041474, "grad_norm": 0.7173807290755059, "learning_rate": 1.4325102175516289e-06, "loss": 0.5122036933898926, "step": 3384 }, { "epoch": 0.7799539170506913, "grad_norm": 0.8487864939918429, "learning_rate": 1.432166680089807e-06, "loss": 0.6556990742683411, "step": 3385 }, { "epoch": 0.780184331797235, "grad_norm": 0.7980125905366343, "learning_rate": 1.4318230798995634e-06, "loss": 0.6642920970916748, "step": 3386 }, { "epoch": 0.7804147465437788, "grad_norm": 1.1205844690065134, "learning_rate": 1.4314794170307718e-06, "loss": 0.9373915195465088, "step": 3387 }, { "epoch": 0.7806451612903226, "grad_norm": 1.1583496011366634, "learning_rate": 1.4311356915333139e-06, "loss": 0.8295063972473145, "step": 3388 }, { "epoch": 0.7808755760368664, "grad_norm": 1.0075666840710995, "learning_rate": 1.4307919034570809e-06, "loss": 0.8167035579681396, "step": 3389 }, { "epoch": 0.7811059907834101, "grad_norm": 1.045465756545736, "learning_rate": 1.4304480528519736e-06, "loss": 0.8444087505340576, "step": 3390 }, { "epoch": 0.7813364055299539, "grad_norm": 0.9731986846355507, "learning_rate": 1.4301041397679012e-06, "loss": 0.7753941416740417, "step": 3391 }, { "epoch": 0.7815668202764977, "grad_norm": 1.0117493931274548, "learning_rate": 1.4297601642547824e-06, "loss": 0.7885915040969849, "step": 3392 }, { "epoch": 0.7817972350230414, "grad_norm": 0.9902641403084854, "learning_rate": 1.4294161263625444e-06, "loss": 0.730733335018158, "step": 3393 }, { "epoch": 0.7820276497695853, "grad_norm": 0.8781208509199174, "learning_rate": 1.4290720261411241e-06, "loss": 0.8505427837371826, "step": 3394 }, { "epoch": 0.782258064516129, "grad_norm": 0.9435888376510791, "learning_rate": 1.4287278636404676e-06, "loss": 0.7370787858963013, "step": 3395 }, { "epoch": 0.7824884792626728, "grad_norm": 0.8683550268652552, "learning_rate": 1.428383638910529e-06, "loss": 0.6776250600814819, "step": 3396 }, { "epoch": 0.7827188940092166, "grad_norm": 1.158711583120319, "learning_rate": 1.4280393520012726e-06, "loss": 0.8878101706504822, "step": 3397 }, { "epoch": 0.7829493087557604, "grad_norm": 1.0028929146104306, "learning_rate": 1.427695002962671e-06, "loss": 0.789238691329956, "step": 3398 }, { "epoch": 0.7831797235023041, "grad_norm": 1.0382561381902518, "learning_rate": 1.4273505918447052e-06, "loss": 0.772524356842041, "step": 3399 }, { "epoch": 0.783410138248848, "grad_norm": 0.8483839499127978, "learning_rate": 1.4270061186973673e-06, "loss": 0.682374119758606, "step": 3400 }, { "epoch": 0.7836405529953917, "grad_norm": 0.9396222987314208, "learning_rate": 1.4266615835706566e-06, "loss": 0.874775767326355, "step": 3401 }, { "epoch": 0.7838709677419354, "grad_norm": 1.3780294752863322, "learning_rate": 1.4263169865145816e-06, "loss": 0.9141736626625061, "step": 3402 }, { "epoch": 0.7841013824884793, "grad_norm": 1.0849695477918648, "learning_rate": 1.4259723275791603e-06, "loss": 0.8533145189285278, "step": 3403 }, { "epoch": 0.784331797235023, "grad_norm": 0.9340136683520418, "learning_rate": 1.4256276068144198e-06, "loss": 0.7920266389846802, "step": 3404 }, { "epoch": 0.7845622119815668, "grad_norm": 0.9462841256440514, "learning_rate": 1.4252828242703957e-06, "loss": 0.7822731733322144, "step": 3405 }, { "epoch": 0.7847926267281106, "grad_norm": 0.9890597976168253, "learning_rate": 1.4249379799971324e-06, "loss": 0.7103791832923889, "step": 3406 }, { "epoch": 0.7850230414746544, "grad_norm": 1.0298833059227221, "learning_rate": 1.4245930740446841e-06, "loss": 0.7857639789581299, "step": 3407 }, { "epoch": 0.7852534562211981, "grad_norm": 1.1065594183312877, "learning_rate": 1.4242481064631134e-06, "loss": 0.8069730997085571, "step": 3408 }, { "epoch": 0.785483870967742, "grad_norm": 1.0472042802008708, "learning_rate": 1.4239030773024912e-06, "loss": 0.8758031129837036, "step": 3409 }, { "epoch": 0.7857142857142857, "grad_norm": 1.015785019886056, "learning_rate": 1.4235579866128983e-06, "loss": 0.895712673664093, "step": 3410 }, { "epoch": 0.7859447004608295, "grad_norm": 0.9442660407745113, "learning_rate": 1.423212834444425e-06, "loss": 0.7904561758041382, "step": 3411 }, { "epoch": 0.7861751152073733, "grad_norm": 1.0957623852355893, "learning_rate": 1.4228676208471685e-06, "loss": 0.9322203993797302, "step": 3412 }, { "epoch": 0.7864055299539171, "grad_norm": 0.7668753687506044, "learning_rate": 1.422522345871237e-06, "loss": 0.9693628549575806, "step": 3413 }, { "epoch": 0.7866359447004608, "grad_norm": 0.8417164970136307, "learning_rate": 1.4221770095667462e-06, "loss": 0.6737014651298523, "step": 3414 }, { "epoch": 0.7868663594470046, "grad_norm": 1.1466654292657967, "learning_rate": 1.4218316119838215e-06, "loss": 0.8682050108909607, "step": 3415 }, { "epoch": 0.7870967741935484, "grad_norm": 1.058324160083765, "learning_rate": 1.4214861531725966e-06, "loss": 0.7920347452163696, "step": 3416 }, { "epoch": 0.7873271889400921, "grad_norm": 1.0147867893383273, "learning_rate": 1.4211406331832144e-06, "loss": 0.8330510854721069, "step": 3417 }, { "epoch": 0.787557603686636, "grad_norm": 0.8802491842183522, "learning_rate": 1.4207950520658272e-06, "loss": 0.8314074873924255, "step": 3418 }, { "epoch": 0.7877880184331797, "grad_norm": 1.069355954495663, "learning_rate": 1.420449409870595e-06, "loss": 0.7045331001281738, "step": 3419 }, { "epoch": 0.7880184331797235, "grad_norm": 0.9484390721895568, "learning_rate": 1.4201037066476876e-06, "loss": 0.7825411558151245, "step": 3420 }, { "epoch": 0.7882488479262673, "grad_norm": 0.86611108370867, "learning_rate": 1.4197579424472834e-06, "loss": 0.6960075497627258, "step": 3421 }, { "epoch": 0.7884792626728111, "grad_norm": 1.038692849963906, "learning_rate": 1.4194121173195694e-06, "loss": 0.8366748094558716, "step": 3422 }, { "epoch": 0.7887096774193548, "grad_norm": 0.8605441828045868, "learning_rate": 1.4190662313147419e-06, "loss": 0.8859039545059204, "step": 3423 }, { "epoch": 0.7889400921658987, "grad_norm": 1.0572382908005622, "learning_rate": 1.4187202844830057e-06, "loss": 0.7098245620727539, "step": 3424 }, { "epoch": 0.7891705069124424, "grad_norm": 0.9126448008384304, "learning_rate": 1.4183742768745743e-06, "loss": 0.7410455942153931, "step": 3425 }, { "epoch": 0.7894009216589861, "grad_norm": 0.8007200450015498, "learning_rate": 1.4180282085396706e-06, "loss": 0.7414010763168335, "step": 3426 }, { "epoch": 0.78963133640553, "grad_norm": 1.090062212374054, "learning_rate": 1.417682079528526e-06, "loss": 0.9043526649475098, "step": 3427 }, { "epoch": 0.7898617511520737, "grad_norm": 0.8510201071166715, "learning_rate": 1.4173358898913804e-06, "loss": 0.7709499597549438, "step": 3428 }, { "epoch": 0.7900921658986175, "grad_norm": 1.0829385459770577, "learning_rate": 1.416989639678483e-06, "loss": 0.7499940395355225, "step": 3429 }, { "epoch": 0.7903225806451613, "grad_norm": 0.766744185733082, "learning_rate": 1.4166433289400911e-06, "loss": 0.7401680946350098, "step": 3430 }, { "epoch": 0.7905529953917051, "grad_norm": 0.8802012939982503, "learning_rate": 1.4162969577264718e-06, "loss": 1.0132567882537842, "step": 3431 }, { "epoch": 0.7907834101382488, "grad_norm": 0.9758763490715631, "learning_rate": 1.4159505260879004e-06, "loss": 0.8438389301300049, "step": 3432 }, { "epoch": 0.7910138248847927, "grad_norm": 1.2075583274029744, "learning_rate": 1.4156040340746603e-06, "loss": 0.9149703979492188, "step": 3433 }, { "epoch": 0.7912442396313364, "grad_norm": 1.4960555955584764, "learning_rate": 1.4152574817370451e-06, "loss": 0.9141047596931458, "step": 3434 }, { "epoch": 0.7914746543778802, "grad_norm": 0.924125511762228, "learning_rate": 1.414910869125356e-06, "loss": 0.6896570324897766, "step": 3435 }, { "epoch": 0.791705069124424, "grad_norm": 0.9277571830040596, "learning_rate": 1.4145641962899035e-06, "loss": 0.742916464805603, "step": 3436 }, { "epoch": 0.7919354838709678, "grad_norm": 1.0041274553911197, "learning_rate": 1.414217463281007e-06, "loss": 0.9315029382705688, "step": 3437 }, { "epoch": 0.7921658986175115, "grad_norm": 0.9532695013501692, "learning_rate": 1.4138706701489942e-06, "loss": 0.7645175457000732, "step": 3438 }, { "epoch": 0.7923963133640552, "grad_norm": 1.0166687927137474, "learning_rate": 1.413523816944201e-06, "loss": 0.8253934383392334, "step": 3439 }, { "epoch": 0.7926267281105991, "grad_norm": 1.055807296618818, "learning_rate": 1.4131769037169736e-06, "loss": 0.8650136590003967, "step": 3440 }, { "epoch": 0.7928571428571428, "grad_norm": 1.0239985264965783, "learning_rate": 1.4128299305176654e-06, "loss": 0.7453975677490234, "step": 3441 }, { "epoch": 0.7930875576036867, "grad_norm": 1.1689392671270256, "learning_rate": 1.4124828973966392e-06, "loss": 0.9121813774108887, "step": 3442 }, { "epoch": 0.7933179723502304, "grad_norm": 1.16007005259146, "learning_rate": 1.4121358044042667e-06, "loss": 0.9097952842712402, "step": 3443 }, { "epoch": 0.7935483870967742, "grad_norm": 0.9263687778783555, "learning_rate": 1.4117886515909277e-06, "loss": 0.7185770273208618, "step": 3444 }, { "epoch": 0.793778801843318, "grad_norm": 0.9816189958888628, "learning_rate": 1.4114414390070111e-06, "loss": 0.8192715644836426, "step": 3445 }, { "epoch": 0.7940092165898618, "grad_norm": 0.8830372557771754, "learning_rate": 1.4110941667029143e-06, "loss": 0.7864251136779785, "step": 3446 }, { "epoch": 0.7942396313364055, "grad_norm": 0.9262266668392852, "learning_rate": 1.4107468347290431e-06, "loss": 0.7433357834815979, "step": 3447 }, { "epoch": 0.7944700460829494, "grad_norm": 0.8826486406616629, "learning_rate": 1.4103994431358133e-06, "loss": 0.8196350336074829, "step": 3448 }, { "epoch": 0.7947004608294931, "grad_norm": 1.0379031741076927, "learning_rate": 1.410051991973647e-06, "loss": 0.7698987126350403, "step": 3449 }, { "epoch": 0.7949308755760369, "grad_norm": 1.228700210939763, "learning_rate": 1.4097044812929776e-06, "loss": 0.9404128789901733, "step": 3450 }, { "epoch": 0.7951612903225806, "grad_norm": 0.9114628140508482, "learning_rate": 1.4093569111442443e-06, "loss": 0.827290952205658, "step": 3451 }, { "epoch": 0.7953917050691244, "grad_norm": 1.0612294009838623, "learning_rate": 1.4090092815778976e-06, "loss": 0.8126389384269714, "step": 3452 }, { "epoch": 0.7956221198156682, "grad_norm": 0.9598694992596972, "learning_rate": 1.4086615926443953e-06, "loss": 0.7439650297164917, "step": 3453 }, { "epoch": 0.7958525345622119, "grad_norm": 0.9952168701899716, "learning_rate": 1.4083138443942036e-06, "loss": 0.7505590915679932, "step": 3454 }, { "epoch": 0.7960829493087558, "grad_norm": 0.8299073365871691, "learning_rate": 1.407966036877798e-06, "loss": 0.7070168256759644, "step": 3455 }, { "epoch": 0.7963133640552995, "grad_norm": 0.9422601313607071, "learning_rate": 1.4076181701456623e-06, "loss": 0.8271987438201904, "step": 3456 }, { "epoch": 0.7965437788018433, "grad_norm": 0.8558890366072001, "learning_rate": 1.4072702442482886e-06, "loss": 0.72886061668396, "step": 3457 }, { "epoch": 0.7967741935483871, "grad_norm": 1.1355616522222822, "learning_rate": 1.4069222592361784e-06, "loss": 0.838603138923645, "step": 3458 }, { "epoch": 0.7970046082949309, "grad_norm": 1.1314183210174298, "learning_rate": 1.4065742151598408e-06, "loss": 0.9829634428024292, "step": 3459 }, { "epoch": 0.7972350230414746, "grad_norm": 1.0528251173572156, "learning_rate": 1.406226112069794e-06, "loss": 0.8269632458686829, "step": 3460 }, { "epoch": 0.7974654377880185, "grad_norm": 1.0290510208624037, "learning_rate": 1.405877950016565e-06, "loss": 0.7234654426574707, "step": 3461 }, { "epoch": 0.7976958525345622, "grad_norm": 0.89079385428478, "learning_rate": 1.4055297290506887e-06, "loss": 0.7843908071517944, "step": 3462 }, { "epoch": 0.7979262672811059, "grad_norm": 0.8247890912721374, "learning_rate": 1.4051814492227094e-06, "loss": 0.7294371128082275, "step": 3463 }, { "epoch": 0.7981566820276498, "grad_norm": 1.1727486785997119, "learning_rate": 1.4048331105831787e-06, "loss": 0.8805780410766602, "step": 3464 }, { "epoch": 0.7983870967741935, "grad_norm": 0.9922079942807702, "learning_rate": 1.404484713182658e-06, "loss": 0.6933708190917969, "step": 3465 }, { "epoch": 0.7986175115207373, "grad_norm": 1.0638183747733119, "learning_rate": 1.404136257071717e-06, "loss": 0.8720458745956421, "step": 3466 }, { "epoch": 0.7988479262672811, "grad_norm": 1.1404138575251217, "learning_rate": 1.403787742300933e-06, "loss": 0.7675988674163818, "step": 3467 }, { "epoch": 0.7990783410138249, "grad_norm": 1.0188982193786602, "learning_rate": 1.403439168920893e-06, "loss": 0.7630051374435425, "step": 3468 }, { "epoch": 0.7993087557603686, "grad_norm": 0.9607713149142998, "learning_rate": 1.4030905369821914e-06, "loss": 0.9195173978805542, "step": 3469 }, { "epoch": 0.7995391705069125, "grad_norm": 0.966603725031027, "learning_rate": 1.402741846535432e-06, "loss": 0.9347431659698486, "step": 3470 }, { "epoch": 0.7997695852534562, "grad_norm": 1.0423944793385256, "learning_rate": 1.4023930976312271e-06, "loss": 0.7812551259994507, "step": 3471 }, { "epoch": 0.8, "grad_norm": 1.0230073164776583, "learning_rate": 1.4020442903201963e-06, "loss": 0.7655330896377563, "step": 3472 }, { "epoch": 0.8002304147465438, "grad_norm": 1.2791975931288466, "learning_rate": 1.4016954246529694e-06, "loss": 0.7543904185295105, "step": 3473 }, { "epoch": 0.8004608294930876, "grad_norm": 0.8246426244987128, "learning_rate": 1.4013465006801833e-06, "loss": 0.9343980550765991, "step": 3474 }, { "epoch": 0.8006912442396313, "grad_norm": 1.1458439395589735, "learning_rate": 1.4009975184524838e-06, "loss": 0.7366182208061218, "step": 3475 }, { "epoch": 0.8009216589861751, "grad_norm": 1.0109168818205314, "learning_rate": 1.4006484780205254e-06, "loss": 0.7028899192810059, "step": 3476 }, { "epoch": 0.8011520737327189, "grad_norm": 1.1092959183189253, "learning_rate": 1.4002993794349708e-06, "loss": 0.9259153604507446, "step": 3477 }, { "epoch": 0.8013824884792626, "grad_norm": 1.091442085001374, "learning_rate": 1.3999502227464914e-06, "loss": 0.7263842225074768, "step": 3478 }, { "epoch": 0.8016129032258065, "grad_norm": 0.9964781390280828, "learning_rate": 1.3996010080057664e-06, "loss": 0.8177748918533325, "step": 3479 }, { "epoch": 0.8018433179723502, "grad_norm": 1.080145531043834, "learning_rate": 1.3992517352634842e-06, "loss": 0.8526895046234131, "step": 3480 }, { "epoch": 0.802073732718894, "grad_norm": 1.031018616296166, "learning_rate": 1.398902404570341e-06, "loss": 0.7914575338363647, "step": 3481 }, { "epoch": 0.8023041474654378, "grad_norm": 0.816157508913072, "learning_rate": 1.398553015977042e-06, "loss": 0.7546013593673706, "step": 3482 }, { "epoch": 0.8025345622119816, "grad_norm": 1.0408293581677805, "learning_rate": 1.3982035695343005e-06, "loss": 0.7250038385391235, "step": 3483 }, { "epoch": 0.8027649769585253, "grad_norm": 1.023275477136697, "learning_rate": 1.3978540652928376e-06, "loss": 0.8650141954421997, "step": 3484 }, { "epoch": 0.8029953917050692, "grad_norm": 0.9633891302798026, "learning_rate": 1.3975045033033838e-06, "loss": 0.8020066022872925, "step": 3485 }, { "epoch": 0.8032258064516129, "grad_norm": 0.9146174916063312, "learning_rate": 1.3971548836166782e-06, "loss": 0.7376772165298462, "step": 3486 }, { "epoch": 0.8034562211981566, "grad_norm": 0.9278800283054291, "learning_rate": 1.3968052062834665e-06, "loss": 0.8440769910812378, "step": 3487 }, { "epoch": 0.8036866359447005, "grad_norm": 0.8964312010034259, "learning_rate": 1.3964554713545047e-06, "loss": 0.7886836528778076, "step": 3488 }, { "epoch": 0.8039170506912442, "grad_norm": 0.9177920963823754, "learning_rate": 1.396105678880556e-06, "loss": 0.9167575836181641, "step": 3489 }, { "epoch": 0.804147465437788, "grad_norm": 0.8367032180339474, "learning_rate": 1.3957558289123922e-06, "loss": 0.6761677861213684, "step": 3490 }, { "epoch": 0.8043778801843318, "grad_norm": 0.9716984065235628, "learning_rate": 1.3954059215007938e-06, "loss": 0.7775592803955078, "step": 3491 }, { "epoch": 0.8046082949308756, "grad_norm": 1.00005526663364, "learning_rate": 1.3950559566965494e-06, "loss": 0.8127217292785645, "step": 3492 }, { "epoch": 0.8048387096774193, "grad_norm": 1.007116682040637, "learning_rate": 1.394705934550456e-06, "loss": 0.8134229779243469, "step": 3493 }, { "epoch": 0.8050691244239632, "grad_norm": 1.3224030787110577, "learning_rate": 1.3943558551133186e-06, "loss": 0.8853167295455933, "step": 3494 }, { "epoch": 0.8052995391705069, "grad_norm": 1.0544152264027669, "learning_rate": 1.3940057184359506e-06, "loss": 0.8024332523345947, "step": 3495 }, { "epoch": 0.8055299539170507, "grad_norm": 0.6779010833647611, "learning_rate": 1.3936555245691745e-06, "loss": 0.7581099271774292, "step": 3496 }, { "epoch": 0.8057603686635945, "grad_norm": 1.0509729333579008, "learning_rate": 1.3933052735638203e-06, "loss": 0.979412317276001, "step": 3497 }, { "epoch": 0.8059907834101383, "grad_norm": 0.9816833973848147, "learning_rate": 1.392954965470726e-06, "loss": 0.7917830944061279, "step": 3498 }, { "epoch": 0.806221198156682, "grad_norm": 0.9622725908619084, "learning_rate": 1.392604600340739e-06, "loss": 0.8565326929092407, "step": 3499 }, { "epoch": 0.8064516129032258, "grad_norm": 1.0170451339424116, "learning_rate": 1.3922541782247136e-06, "loss": 0.7276358604431152, "step": 3500 }, { "epoch": 0.8066820276497696, "grad_norm": 0.8351645839157906, "learning_rate": 1.3919036991735138e-06, "loss": 0.734528660774231, "step": 3501 }, { "epoch": 0.8069124423963133, "grad_norm": 1.1746648423168138, "learning_rate": 1.391553163238011e-06, "loss": 0.8786039352416992, "step": 3502 }, { "epoch": 0.8071428571428572, "grad_norm": 1.1050955424788658, "learning_rate": 1.3912025704690844e-06, "loss": 0.9509482383728027, "step": 3503 }, { "epoch": 0.8073732718894009, "grad_norm": 0.8741751886687131, "learning_rate": 1.3908519209176225e-06, "loss": 0.7188615202903748, "step": 3504 }, { "epoch": 0.8076036866359447, "grad_norm": 1.0307846021250762, "learning_rate": 1.3905012146345221e-06, "loss": 0.7681115865707397, "step": 3505 }, { "epoch": 0.8078341013824885, "grad_norm": 1.0988034793572021, "learning_rate": 1.3901504516706874e-06, "loss": 0.8835415840148926, "step": 3506 }, { "epoch": 0.8080645161290323, "grad_norm": 1.0724177836810997, "learning_rate": 1.389799632077031e-06, "loss": 0.8179003000259399, "step": 3507 }, { "epoch": 0.808294930875576, "grad_norm": 1.1244187286361234, "learning_rate": 1.3894487559044742e-06, "loss": 0.9690247774124146, "step": 3508 }, { "epoch": 0.8085253456221199, "grad_norm": 0.9601740737567672, "learning_rate": 1.389097823203946e-06, "loss": 0.9759812951087952, "step": 3509 }, { "epoch": 0.8087557603686636, "grad_norm": 0.8953376224758026, "learning_rate": 1.3887468340263838e-06, "loss": 0.6649112105369568, "step": 3510 }, { "epoch": 0.8089861751152074, "grad_norm": 0.8803647716437188, "learning_rate": 1.388395788422733e-06, "loss": 0.7824583053588867, "step": 3511 }, { "epoch": 0.8092165898617512, "grad_norm": 1.0776551292843717, "learning_rate": 1.3880446864439482e-06, "loss": 0.8226176500320435, "step": 3512 }, { "epoch": 0.8094470046082949, "grad_norm": 1.0775758718001336, "learning_rate": 1.3876935281409904e-06, "loss": 0.7708876729011536, "step": 3513 }, { "epoch": 0.8096774193548387, "grad_norm": 1.1275141981575327, "learning_rate": 1.3873423135648303e-06, "loss": 0.7162825465202332, "step": 3514 }, { "epoch": 0.8099078341013825, "grad_norm": 1.1973823780619761, "learning_rate": 1.3869910427664464e-06, "loss": 0.815816342830658, "step": 3515 }, { "epoch": 0.8101382488479263, "grad_norm": 1.0491570029475803, "learning_rate": 1.3866397157968248e-06, "loss": 0.9166251420974731, "step": 3516 }, { "epoch": 0.81036866359447, "grad_norm": 1.185963303947227, "learning_rate": 1.3862883327069606e-06, "loss": 0.9193897843360901, "step": 3517 }, { "epoch": 0.8105990783410139, "grad_norm": 1.1492579516601074, "learning_rate": 1.3859368935478557e-06, "loss": 0.9019489288330078, "step": 3518 }, { "epoch": 0.8108294930875576, "grad_norm": 1.0706438739080621, "learning_rate": 1.3855853983705222e-06, "loss": 0.8616153597831726, "step": 3519 }, { "epoch": 0.8110599078341014, "grad_norm": 0.9368530229676858, "learning_rate": 1.3852338472259782e-06, "loss": 0.8898462057113647, "step": 3520 }, { "epoch": 0.8112903225806452, "grad_norm": 0.9891797921278073, "learning_rate": 1.3848822401652513e-06, "loss": 0.770263135433197, "step": 3521 }, { "epoch": 0.811520737327189, "grad_norm": 0.950594228231774, "learning_rate": 1.384530577239377e-06, "loss": 0.7524563074111938, "step": 3522 }, { "epoch": 0.8117511520737327, "grad_norm": 0.8975349550091929, "learning_rate": 1.3841788584993981e-06, "loss": 0.776715874671936, "step": 3523 }, { "epoch": 0.8119815668202764, "grad_norm": 0.6412822466784485, "learning_rate": 1.3838270839963666e-06, "loss": 0.7165439128875732, "step": 3524 }, { "epoch": 0.8122119815668203, "grad_norm": 1.0082147827954213, "learning_rate": 1.383475253781342e-06, "loss": 0.7641004323959351, "step": 3525 }, { "epoch": 0.812442396313364, "grad_norm": 0.9278762834298543, "learning_rate": 1.3831233679053921e-06, "loss": 0.7493933439254761, "step": 3526 }, { "epoch": 0.8126728110599079, "grad_norm": 1.1064599998463516, "learning_rate": 1.3827714264195924e-06, "loss": 0.7981607913970947, "step": 3527 }, { "epoch": 0.8129032258064516, "grad_norm": 1.2555949352929368, "learning_rate": 1.3824194293750272e-06, "loss": 0.9130103588104248, "step": 3528 }, { "epoch": 0.8131336405529954, "grad_norm": 1.0192840808161379, "learning_rate": 1.3820673768227878e-06, "loss": 0.7208644151687622, "step": 3529 }, { "epoch": 0.8133640552995391, "grad_norm": 0.9880323858602741, "learning_rate": 1.3817152688139745e-06, "loss": 0.9134006500244141, "step": 3530 }, { "epoch": 0.813594470046083, "grad_norm": 0.836575472485664, "learning_rate": 1.381363105399695e-06, "loss": 0.7383376359939575, "step": 3531 }, { "epoch": 0.8138248847926267, "grad_norm": 1.4743208995655537, "learning_rate": 1.381010886631066e-06, "loss": 0.9143035411834717, "step": 3532 }, { "epoch": 0.8140552995391706, "grad_norm": 0.8030889519622723, "learning_rate": 1.3806586125592107e-06, "loss": 0.7972506284713745, "step": 3533 }, { "epoch": 0.8142857142857143, "grad_norm": 0.9706054308316248, "learning_rate": 1.380306283235262e-06, "loss": 0.8999859094619751, "step": 3534 }, { "epoch": 0.8145161290322581, "grad_norm": 1.4136312048518, "learning_rate": 1.37995389871036e-06, "loss": 0.7759672999382019, "step": 3535 }, { "epoch": 0.8147465437788018, "grad_norm": 0.8852561621502252, "learning_rate": 1.3796014590356522e-06, "loss": 0.7915023565292358, "step": 3536 }, { "epoch": 0.8149769585253456, "grad_norm": 1.0626460640648143, "learning_rate": 1.3792489642622956e-06, "loss": 0.8259623050689697, "step": 3537 }, { "epoch": 0.8152073732718894, "grad_norm": 0.9193643373115533, "learning_rate": 1.3788964144414534e-06, "loss": 0.7786526679992676, "step": 3538 }, { "epoch": 0.8154377880184331, "grad_norm": 0.8743120056652736, "learning_rate": 1.3785438096242987e-06, "loss": 0.8655314445495605, "step": 3539 }, { "epoch": 0.815668202764977, "grad_norm": 1.073925215345039, "learning_rate": 1.3781911498620108e-06, "loss": 0.8116016387939453, "step": 3540 }, { "epoch": 0.8158986175115207, "grad_norm": 1.07781870851745, "learning_rate": 1.3778384352057781e-06, "loss": 0.712907075881958, "step": 3541 }, { "epoch": 0.8161290322580645, "grad_norm": 0.9419481549244654, "learning_rate": 1.377485665706797e-06, "loss": 0.8271318674087524, "step": 3542 }, { "epoch": 0.8163594470046083, "grad_norm": 1.231349694992367, "learning_rate": 1.3771328414162713e-06, "loss": 0.9161353707313538, "step": 3543 }, { "epoch": 0.8165898617511521, "grad_norm": 1.1900246832578463, "learning_rate": 1.3767799623854125e-06, "loss": 0.9555908441543579, "step": 3544 }, { "epoch": 0.8168202764976958, "grad_norm": 0.9121338000164769, "learning_rate": 1.3764270286654414e-06, "loss": 0.7863249778747559, "step": 3545 }, { "epoch": 0.8170506912442397, "grad_norm": 1.0362996056258458, "learning_rate": 1.3760740403075853e-06, "loss": 0.9086883068084717, "step": 3546 }, { "epoch": 0.8172811059907834, "grad_norm": 0.9211768991499883, "learning_rate": 1.37572099736308e-06, "loss": 0.6231412887573242, "step": 3547 }, { "epoch": 0.8175115207373271, "grad_norm": 0.94903309328564, "learning_rate": 1.3753678998831692e-06, "loss": 0.8221716284751892, "step": 3548 }, { "epoch": 0.817741935483871, "grad_norm": 1.0641797094094223, "learning_rate": 1.375014747919105e-06, "loss": 0.8077783584594727, "step": 3549 }, { "epoch": 0.8179723502304147, "grad_norm": 1.0675643850007648, "learning_rate": 1.3746615415221463e-06, "loss": 0.6882060766220093, "step": 3550 }, { "epoch": 0.8182027649769585, "grad_norm": 0.8393670588117293, "learning_rate": 1.3743082807435614e-06, "loss": 0.700161337852478, "step": 3551 }, { "epoch": 0.8184331797235023, "grad_norm": 0.8856084645963668, "learning_rate": 1.3739549656346243e-06, "loss": 0.737981915473938, "step": 3552 }, { "epoch": 0.8186635944700461, "grad_norm": 0.8562104816360829, "learning_rate": 1.3736015962466193e-06, "loss": 0.8025717735290527, "step": 3553 }, { "epoch": 0.8188940092165898, "grad_norm": 1.1233745076434911, "learning_rate": 1.3732481726308372e-06, "loss": 0.8855722546577454, "step": 3554 }, { "epoch": 0.8191244239631337, "grad_norm": 1.2861487220187957, "learning_rate": 1.3728946948385768e-06, "loss": 0.819130539894104, "step": 3555 }, { "epoch": 0.8193548387096774, "grad_norm": 1.086213399760416, "learning_rate": 1.3725411629211454e-06, "loss": 0.8419625759124756, "step": 3556 }, { "epoch": 0.8195852534562212, "grad_norm": 0.8659477904111433, "learning_rate": 1.3721875769298575e-06, "loss": 0.8478890657424927, "step": 3557 }, { "epoch": 0.819815668202765, "grad_norm": 0.9446742102947047, "learning_rate": 1.371833936916035e-06, "loss": 0.8654077053070068, "step": 3558 }, { "epoch": 0.8200460829493088, "grad_norm": 1.132873117876266, "learning_rate": 1.371480242931009e-06, "loss": 0.8898686170578003, "step": 3559 }, { "epoch": 0.8202764976958525, "grad_norm": 1.0419861877874252, "learning_rate": 1.3711264950261176e-06, "loss": 0.873773455619812, "step": 3560 }, { "epoch": 0.8205069124423963, "grad_norm": 0.8068261635969198, "learning_rate": 1.3707726932527068e-06, "loss": 0.6323572397232056, "step": 3561 }, { "epoch": 0.8207373271889401, "grad_norm": 1.1038849604905803, "learning_rate": 1.3704188376621304e-06, "loss": 0.7018281817436218, "step": 3562 }, { "epoch": 0.8209677419354838, "grad_norm": 1.084497532058705, "learning_rate": 1.37006492830575e-06, "loss": 0.8052775859832764, "step": 3563 }, { "epoch": 0.8211981566820277, "grad_norm": 1.0795040103988192, "learning_rate": 1.3697109652349352e-06, "loss": 0.8057233095169067, "step": 3564 }, { "epoch": 0.8214285714285714, "grad_norm": 1.1240440402053398, "learning_rate": 1.3693569485010633e-06, "loss": 0.8647899627685547, "step": 3565 }, { "epoch": 0.8216589861751152, "grad_norm": 0.9167509343069911, "learning_rate": 1.369002878155519e-06, "loss": 0.8022265434265137, "step": 3566 }, { "epoch": 0.821889400921659, "grad_norm": 1.0569217144551386, "learning_rate": 1.368648754249696e-06, "loss": 0.8534140586853027, "step": 3567 }, { "epoch": 0.8221198156682028, "grad_norm": 1.1336199597215886, "learning_rate": 1.3682945768349935e-06, "loss": 0.905183732509613, "step": 3568 }, { "epoch": 0.8223502304147465, "grad_norm": 1.0114816874699049, "learning_rate": 1.3679403459628215e-06, "loss": 0.6096831560134888, "step": 3569 }, { "epoch": 0.8225806451612904, "grad_norm": 1.0433167842442863, "learning_rate": 1.367586061684595e-06, "loss": 0.7220188975334167, "step": 3570 }, { "epoch": 0.8228110599078341, "grad_norm": 1.2434665139770538, "learning_rate": 1.3672317240517386e-06, "loss": 0.8028903007507324, "step": 3571 }, { "epoch": 0.8230414746543778, "grad_norm": 0.8999816334081224, "learning_rate": 1.3668773331156831e-06, "loss": 0.8121141791343689, "step": 3572 }, { "epoch": 0.8232718894009217, "grad_norm": 0.9985064007808814, "learning_rate": 1.3665228889278687e-06, "loss": 0.8259282112121582, "step": 3573 }, { "epoch": 0.8235023041474654, "grad_norm": 1.0492496227314838, "learning_rate": 1.3661683915397423e-06, "loss": 0.9356029033660889, "step": 3574 }, { "epoch": 0.8237327188940092, "grad_norm": 0.9103215470779688, "learning_rate": 1.3658138410027582e-06, "loss": 0.738788366317749, "step": 3575 }, { "epoch": 0.823963133640553, "grad_norm": 0.9813034370683628, "learning_rate": 1.3654592373683794e-06, "loss": 0.7775605320930481, "step": 3576 }, { "epoch": 0.8241935483870968, "grad_norm": 1.0650813981062164, "learning_rate": 1.3651045806880766e-06, "loss": 0.7645376324653625, "step": 3577 }, { "epoch": 0.8244239631336405, "grad_norm": 0.9731809944135928, "learning_rate": 1.3647498710133272e-06, "loss": 0.7713958024978638, "step": 3578 }, { "epoch": 0.8246543778801844, "grad_norm": 1.148498187573576, "learning_rate": 1.3643951083956165e-06, "loss": 0.6920947432518005, "step": 3579 }, { "epoch": 0.8248847926267281, "grad_norm": 0.8263814798727009, "learning_rate": 1.3640402928864382e-06, "loss": 0.7108405828475952, "step": 3580 }, { "epoch": 0.8251152073732719, "grad_norm": 1.0141959867722847, "learning_rate": 1.3636854245372936e-06, "loss": 0.7879295945167542, "step": 3581 }, { "epoch": 0.8253456221198157, "grad_norm": 0.8796188222287911, "learning_rate": 1.3633305033996909e-06, "loss": 0.8173119425773621, "step": 3582 }, { "epoch": 0.8255760368663595, "grad_norm": 1.230625652029921, "learning_rate": 1.3629755295251466e-06, "loss": 0.8530454635620117, "step": 3583 }, { "epoch": 0.8258064516129032, "grad_norm": 0.7851178128331011, "learning_rate": 1.3626205029651846e-06, "loss": 0.7749553918838501, "step": 3584 }, { "epoch": 0.826036866359447, "grad_norm": 0.9879629515788971, "learning_rate": 1.362265423771337e-06, "loss": 0.8313847780227661, "step": 3585 }, { "epoch": 0.8262672811059908, "grad_norm": 0.9997153587851354, "learning_rate": 1.3619102919951424e-06, "loss": 0.7285455465316772, "step": 3586 }, { "epoch": 0.8264976958525345, "grad_norm": 1.053529475482116, "learning_rate": 1.361555107688148e-06, "loss": 0.8084003925323486, "step": 3587 }, { "epoch": 0.8267281105990784, "grad_norm": 1.1979034262658517, "learning_rate": 1.3611998709019088e-06, "loss": 0.8506543040275574, "step": 3588 }, { "epoch": 0.8269585253456221, "grad_norm": 1.150137696376644, "learning_rate": 1.3608445816879864e-06, "loss": 0.8320293426513672, "step": 3589 }, { "epoch": 0.8271889400921659, "grad_norm": 1.0954200087136678, "learning_rate": 1.3604892400979501e-06, "loss": 0.8116205930709839, "step": 3590 }, { "epoch": 0.8274193548387097, "grad_norm": 0.988607654244707, "learning_rate": 1.3601338461833785e-06, "loss": 0.8317450284957886, "step": 3591 }, { "epoch": 0.8276497695852535, "grad_norm": 1.0502248139840338, "learning_rate": 1.3597783999958553e-06, "loss": 0.7348642349243164, "step": 3592 }, { "epoch": 0.8278801843317972, "grad_norm": 0.8829971344500126, "learning_rate": 1.359422901586974e-06, "loss": 0.8087270259857178, "step": 3593 }, { "epoch": 0.8281105990783411, "grad_norm": 1.1012699484003496, "learning_rate": 1.3590673510083345e-06, "loss": 0.7964637875556946, "step": 3594 }, { "epoch": 0.8283410138248848, "grad_norm": 0.8597833865541051, "learning_rate": 1.358711748311544e-06, "loss": 0.6192176342010498, "step": 3595 }, { "epoch": 0.8285714285714286, "grad_norm": 1.458647590594062, "learning_rate": 1.3583560935482182e-06, "loss": 0.7735739946365356, "step": 3596 }, { "epoch": 0.8288018433179724, "grad_norm": 1.209934555151429, "learning_rate": 1.35800038676998e-06, "loss": 0.7965315580368042, "step": 3597 }, { "epoch": 0.8290322580645161, "grad_norm": 1.0086229436787473, "learning_rate": 1.3576446280284595e-06, "loss": 0.6489244699478149, "step": 3598 }, { "epoch": 0.8292626728110599, "grad_norm": 1.041271189758682, "learning_rate": 1.3572888173752946e-06, "loss": 0.8073695302009583, "step": 3599 }, { "epoch": 0.8294930875576036, "grad_norm": 0.7544591630478071, "learning_rate": 1.3569329548621309e-06, "loss": 0.7925900816917419, "step": 3600 }, { "epoch": 0.8297235023041475, "grad_norm": 1.1274353505725723, "learning_rate": 1.356577040540621e-06, "loss": 0.83954918384552, "step": 3601 }, { "epoch": 0.8299539170506912, "grad_norm": 0.69092010707332, "learning_rate": 1.356221074462426e-06, "loss": 0.6384706497192383, "step": 3602 }, { "epoch": 0.830184331797235, "grad_norm": 0.8604009933780791, "learning_rate": 1.3558650566792136e-06, "loss": 0.8308184146881104, "step": 3603 }, { "epoch": 0.8304147465437788, "grad_norm": 0.9893567222365065, "learning_rate": 1.3555089872426596e-06, "loss": 0.7972864508628845, "step": 3604 }, { "epoch": 0.8306451612903226, "grad_norm": 1.0575497381629144, "learning_rate": 1.3551528662044463e-06, "loss": 0.8038849830627441, "step": 3605 }, { "epoch": 0.8308755760368663, "grad_norm": 1.0146034272672162, "learning_rate": 1.3547966936162646e-06, "loss": 0.7735980749130249, "step": 3606 }, { "epoch": 0.8311059907834102, "grad_norm": 1.169701687059532, "learning_rate": 1.354440469529813e-06, "loss": 0.7717504501342773, "step": 3607 }, { "epoch": 0.8313364055299539, "grad_norm": 0.8981514617249363, "learning_rate": 1.3540841939967962e-06, "loss": 0.9405615329742432, "step": 3608 }, { "epoch": 0.8315668202764976, "grad_norm": 0.9913743440349779, "learning_rate": 1.3537278670689273e-06, "loss": 0.7730603814125061, "step": 3609 }, { "epoch": 0.8317972350230415, "grad_norm": 1.1958069213876743, "learning_rate": 1.353371488797927e-06, "loss": 0.8677463531494141, "step": 3610 }, { "epoch": 0.8320276497695852, "grad_norm": 1.0362704574624084, "learning_rate": 1.3530150592355227e-06, "loss": 0.8261700868606567, "step": 3611 }, { "epoch": 0.832258064516129, "grad_norm": 0.9430749395940993, "learning_rate": 1.35265857843345e-06, "loss": 0.6799050569534302, "step": 3612 }, { "epoch": 0.8324884792626728, "grad_norm": 1.0479319081515341, "learning_rate": 1.3523020464434514e-06, "loss": 0.9117664098739624, "step": 3613 }, { "epoch": 0.8327188940092166, "grad_norm": 1.0691436327470698, "learning_rate": 1.3519454633172771e-06, "loss": 0.8637168407440186, "step": 3614 }, { "epoch": 0.8329493087557603, "grad_norm": 0.8579929983536723, "learning_rate": 1.3515888291066848e-06, "loss": 0.8169793486595154, "step": 3615 }, { "epoch": 0.8331797235023042, "grad_norm": 0.920659117563804, "learning_rate": 1.3512321438634392e-06, "loss": 0.6901019811630249, "step": 3616 }, { "epoch": 0.8334101382488479, "grad_norm": 1.350300242304736, "learning_rate": 1.3508754076393133e-06, "loss": 0.868461012840271, "step": 3617 }, { "epoch": 0.8336405529953917, "grad_norm": 0.9765625383196332, "learning_rate": 1.3505186204860864e-06, "loss": 0.7916195392608643, "step": 3618 }, { "epoch": 0.8338709677419355, "grad_norm": 0.9685384546753151, "learning_rate": 1.3501617824555456e-06, "loss": 0.7078498601913452, "step": 3619 }, { "epoch": 0.8341013824884793, "grad_norm": 1.2242730037688179, "learning_rate": 1.3498048935994857e-06, "loss": 0.890669584274292, "step": 3620 }, { "epoch": 0.834331797235023, "grad_norm": 0.8358453705503323, "learning_rate": 1.3494479539697087e-06, "loss": 0.8162761926651001, "step": 3621 }, { "epoch": 0.8345622119815668, "grad_norm": 1.013077112717635, "learning_rate": 1.3490909636180233e-06, "loss": 0.7743235230445862, "step": 3622 }, { "epoch": 0.8347926267281106, "grad_norm": 1.0099386147746707, "learning_rate": 1.3487339225962472e-06, "loss": 0.8297950029373169, "step": 3623 }, { "epoch": 0.8350230414746543, "grad_norm": 1.1865830325248257, "learning_rate": 1.3483768309562035e-06, "loss": 0.9550352692604065, "step": 3624 }, { "epoch": 0.8352534562211982, "grad_norm": 0.9576603479694407, "learning_rate": 1.3480196887497242e-06, "loss": 0.7343823909759521, "step": 3625 }, { "epoch": 0.8354838709677419, "grad_norm": 1.0312198523972542, "learning_rate": 1.3476624960286479e-06, "loss": 0.8942683935165405, "step": 3626 }, { "epoch": 0.8357142857142857, "grad_norm": 1.0216203737583824, "learning_rate": 1.34730525284482e-06, "loss": 0.778289794921875, "step": 3627 }, { "epoch": 0.8359447004608295, "grad_norm": 0.8374039418656565, "learning_rate": 1.3469479592500951e-06, "loss": 0.5924088954925537, "step": 3628 }, { "epoch": 0.8361751152073733, "grad_norm": 1.6640914693337763, "learning_rate": 1.3465906152963329e-06, "loss": 1.0363706350326538, "step": 3629 }, { "epoch": 0.836405529953917, "grad_norm": 1.1094517477504633, "learning_rate": 1.346233221035402e-06, "loss": 0.7927669286727905, "step": 3630 }, { "epoch": 0.8366359447004609, "grad_norm": 1.017803676905956, "learning_rate": 1.345875776519177e-06, "loss": 0.8428707718849182, "step": 3631 }, { "epoch": 0.8368663594470046, "grad_norm": 1.0894705086513103, "learning_rate": 1.345518281799541e-06, "loss": 0.7975403070449829, "step": 3632 }, { "epoch": 0.8370967741935483, "grad_norm": 1.0032068733109394, "learning_rate": 1.3451607369283842e-06, "loss": 0.8383880853652954, "step": 3633 }, { "epoch": 0.8373271889400922, "grad_norm": 1.007543360201824, "learning_rate": 1.3448031419576028e-06, "loss": 0.9033386707305908, "step": 3634 }, { "epoch": 0.8375576036866359, "grad_norm": 1.1312406567077748, "learning_rate": 1.3444454969391021e-06, "loss": 0.8913514018058777, "step": 3635 }, { "epoch": 0.8377880184331797, "grad_norm": 1.4041014769308477, "learning_rate": 1.3440878019247936e-06, "loss": 0.9051915407180786, "step": 3636 }, { "epoch": 0.8380184331797235, "grad_norm": 0.9777048211867199, "learning_rate": 1.343730056966596e-06, "loss": 0.8240993618965149, "step": 3637 }, { "epoch": 0.8382488479262673, "grad_norm": 1.1788464491037272, "learning_rate": 1.3433722621164358e-06, "loss": 0.8276345133781433, "step": 3638 }, { "epoch": 0.838479262672811, "grad_norm": 1.1512835626079758, "learning_rate": 1.343014417426246e-06, "loss": 0.8250508904457092, "step": 3639 }, { "epoch": 0.8387096774193549, "grad_norm": 1.0066201319773938, "learning_rate": 1.342656522947968e-06, "loss": 0.7872868180274963, "step": 3640 }, { "epoch": 0.8389400921658986, "grad_norm": 0.8473767849665474, "learning_rate": 1.3422985787335491e-06, "loss": 0.7634146809577942, "step": 3641 }, { "epoch": 0.8391705069124424, "grad_norm": 0.9991956505737468, "learning_rate": 1.3419405848349448e-06, "loss": 0.63923180103302, "step": 3642 }, { "epoch": 0.8394009216589862, "grad_norm": 0.8936657519523178, "learning_rate": 1.3415825413041173e-06, "loss": 0.900942325592041, "step": 3643 }, { "epoch": 0.83963133640553, "grad_norm": 0.8086145892134451, "learning_rate": 1.341224448193036e-06, "loss": 0.6415199041366577, "step": 3644 }, { "epoch": 0.8398617511520737, "grad_norm": 0.7541710851332, "learning_rate": 1.3408663055536775e-06, "loss": 0.7750275135040283, "step": 3645 }, { "epoch": 0.8400921658986175, "grad_norm": 1.0677810215945565, "learning_rate": 1.3405081134380264e-06, "loss": 0.8159983158111572, "step": 3646 }, { "epoch": 0.8403225806451613, "grad_norm": 1.0361250834896671, "learning_rate": 1.3401498718980733e-06, "loss": 0.6870952844619751, "step": 3647 }, { "epoch": 0.840552995391705, "grad_norm": 1.0057736881312165, "learning_rate": 1.3397915809858168e-06, "loss": 0.8588749170303345, "step": 3648 }, { "epoch": 0.8407834101382489, "grad_norm": 0.8944864050117411, "learning_rate": 1.3394332407532619e-06, "loss": 0.6926778554916382, "step": 3649 }, { "epoch": 0.8410138248847926, "grad_norm": 0.9996715673645244, "learning_rate": 1.3390748512524213e-06, "loss": 0.7165309190750122, "step": 3650 }, { "epoch": 0.8412442396313364, "grad_norm": 0.8676606625906299, "learning_rate": 1.3387164125353149e-06, "loss": 0.7782741189002991, "step": 3651 }, { "epoch": 0.8414746543778802, "grad_norm": 1.2076812224962883, "learning_rate": 1.3383579246539698e-06, "loss": 0.9153795838356018, "step": 3652 }, { "epoch": 0.841705069124424, "grad_norm": 0.9194313077193984, "learning_rate": 1.33799938766042e-06, "loss": 0.8419643044471741, "step": 3653 }, { "epoch": 0.8419354838709677, "grad_norm": 0.9325821466469247, "learning_rate": 1.3376408016067064e-06, "loss": 0.6927728652954102, "step": 3654 }, { "epoch": 0.8421658986175116, "grad_norm": 0.8795285549516815, "learning_rate": 1.3372821665448774e-06, "loss": 0.7721414566040039, "step": 3655 }, { "epoch": 0.8423963133640553, "grad_norm": 0.8650877944504008, "learning_rate": 1.3369234825269887e-06, "loss": 0.7277967929840088, "step": 3656 }, { "epoch": 0.8426267281105991, "grad_norm": 0.8893990009557013, "learning_rate": 1.336564749605102e-06, "loss": 0.7764936089515686, "step": 3657 }, { "epoch": 0.8428571428571429, "grad_norm": 1.0366422012708214, "learning_rate": 1.336205967831288e-06, "loss": 0.7445545196533203, "step": 3658 }, { "epoch": 0.8430875576036866, "grad_norm": 0.9883734306246509, "learning_rate": 1.3358471372576227e-06, "loss": 0.8359465599060059, "step": 3659 }, { "epoch": 0.8433179723502304, "grad_norm": 1.1992732184975974, "learning_rate": 1.33548825793619e-06, "loss": 0.8634141683578491, "step": 3660 }, { "epoch": 0.8435483870967742, "grad_norm": 0.9932267949840192, "learning_rate": 1.3351293299190804e-06, "loss": 0.7365708351135254, "step": 3661 }, { "epoch": 0.843778801843318, "grad_norm": 1.0553779905834517, "learning_rate": 1.3347703532583927e-06, "loss": 0.7135465145111084, "step": 3662 }, { "epoch": 0.8440092165898617, "grad_norm": 0.9366872036776951, "learning_rate": 1.3344113280062313e-06, "loss": 0.7411447763442993, "step": 3663 }, { "epoch": 0.8442396313364056, "grad_norm": 1.1654296408446096, "learning_rate": 1.3340522542147081e-06, "loss": 0.7765100002288818, "step": 3664 }, { "epoch": 0.8444700460829493, "grad_norm": 0.9657216098787882, "learning_rate": 1.3336931319359426e-06, "loss": 0.7638096809387207, "step": 3665 }, { "epoch": 0.8447004608294931, "grad_norm": 0.8148482611092309, "learning_rate": 1.3333339612220606e-06, "loss": 0.7114577889442444, "step": 3666 }, { "epoch": 0.8449308755760369, "grad_norm": 1.075345107734405, "learning_rate": 1.3329747421251955e-06, "loss": 0.8702960014343262, "step": 3667 }, { "epoch": 0.8451612903225807, "grad_norm": 0.8702936794654799, "learning_rate": 1.3326154746974878e-06, "loss": 0.7248300313949585, "step": 3668 }, { "epoch": 0.8453917050691244, "grad_norm": 1.0810218150457531, "learning_rate": 1.332256158991084e-06, "loss": 0.7648389339447021, "step": 3669 }, { "epoch": 0.8456221198156681, "grad_norm": 1.1179174327015893, "learning_rate": 1.3318967950581383e-06, "loss": 0.7075401544570923, "step": 3670 }, { "epoch": 0.845852534562212, "grad_norm": 0.9497106076514022, "learning_rate": 1.3315373829508122e-06, "loss": 0.6923220157623291, "step": 3671 }, { "epoch": 0.8460829493087557, "grad_norm": 1.100773813694407, "learning_rate": 1.3311779227212742e-06, "loss": 0.7522361874580383, "step": 3672 }, { "epoch": 0.8463133640552996, "grad_norm": 1.026931960572947, "learning_rate": 1.3308184144216989e-06, "loss": 0.7087293863296509, "step": 3673 }, { "epoch": 0.8465437788018433, "grad_norm": 0.793322008156401, "learning_rate": 1.3304588581042688e-06, "loss": 0.782098650932312, "step": 3674 }, { "epoch": 0.8467741935483871, "grad_norm": 1.029621860148689, "learning_rate": 1.330099253821173e-06, "loss": 0.7671197652816772, "step": 3675 }, { "epoch": 0.8470046082949308, "grad_norm": 0.8604911309489864, "learning_rate": 1.3297396016246073e-06, "loss": 0.8098698258399963, "step": 3676 }, { "epoch": 0.8472350230414747, "grad_norm": 0.9021265860196932, "learning_rate": 1.3293799015667751e-06, "loss": 0.7671023011207581, "step": 3677 }, { "epoch": 0.8474654377880184, "grad_norm": 0.9115553667327773, "learning_rate": 1.3290201536998862e-06, "loss": 0.7448668479919434, "step": 3678 }, { "epoch": 0.8476958525345623, "grad_norm": 1.4463207292378697, "learning_rate": 1.3286603580761576e-06, "loss": 0.946117639541626, "step": 3679 }, { "epoch": 0.847926267281106, "grad_norm": 0.932975472082494, "learning_rate": 1.328300514747813e-06, "loss": 0.8134163618087769, "step": 3680 }, { "epoch": 0.8481566820276498, "grad_norm": 1.0433920810873991, "learning_rate": 1.327940623767083e-06, "loss": 0.725477933883667, "step": 3681 }, { "epoch": 0.8483870967741935, "grad_norm": 0.9434209059724857, "learning_rate": 1.3275806851862061e-06, "loss": 0.8278200626373291, "step": 3682 }, { "epoch": 0.8486175115207373, "grad_norm": 1.2837572025692205, "learning_rate": 1.327220699057426e-06, "loss": 0.8437181711196899, "step": 3683 }, { "epoch": 0.8488479262672811, "grad_norm": 1.0932618965520366, "learning_rate": 1.326860665432995e-06, "loss": 0.8921856880187988, "step": 3684 }, { "epoch": 0.8490783410138248, "grad_norm": 0.9850919430921788, "learning_rate": 1.326500584365171e-06, "loss": 0.7285119295120239, "step": 3685 }, { "epoch": 0.8493087557603687, "grad_norm": 1.0119244636074918, "learning_rate": 1.3261404559062196e-06, "loss": 0.8968918323516846, "step": 3686 }, { "epoch": 0.8495391705069124, "grad_norm": 0.9862869524570133, "learning_rate": 1.3257802801084123e-06, "loss": 0.6794285774230957, "step": 3687 }, { "epoch": 0.8497695852534562, "grad_norm": 1.1495746754769118, "learning_rate": 1.3254200570240291e-06, "loss": 0.869774341583252, "step": 3688 }, { "epoch": 0.85, "grad_norm": 1.1620464557259493, "learning_rate": 1.3250597867053553e-06, "loss": 0.7862332463264465, "step": 3689 }, { "epoch": 0.8502304147465438, "grad_norm": 1.1253065949092746, "learning_rate": 1.3246994692046835e-06, "loss": 0.8424299955368042, "step": 3690 }, { "epoch": 0.8504608294930875, "grad_norm": 0.7041532260107465, "learning_rate": 1.3243391045743137e-06, "loss": 0.6232138276100159, "step": 3691 }, { "epoch": 0.8506912442396314, "grad_norm": 0.9563538572085633, "learning_rate": 1.3239786928665523e-06, "loss": 0.7108159065246582, "step": 3692 }, { "epoch": 0.8509216589861751, "grad_norm": 1.0262733388108027, "learning_rate": 1.3236182341337126e-06, "loss": 0.7282330393791199, "step": 3693 }, { "epoch": 0.8511520737327188, "grad_norm": 1.2079736335999256, "learning_rate": 1.3232577284281147e-06, "loss": 0.7864304780960083, "step": 3694 }, { "epoch": 0.8513824884792627, "grad_norm": 0.9682428596442779, "learning_rate": 1.3228971758020852e-06, "loss": 0.7826365232467651, "step": 3695 }, { "epoch": 0.8516129032258064, "grad_norm": 1.0308498953586989, "learning_rate": 1.322536576307958e-06, "loss": 0.8429988026618958, "step": 3696 }, { "epoch": 0.8518433179723502, "grad_norm": 1.106791902142165, "learning_rate": 1.322175929998074e-06, "loss": 0.771148145198822, "step": 3697 }, { "epoch": 0.852073732718894, "grad_norm": 1.2323556662321768, "learning_rate": 1.3218152369247804e-06, "loss": 0.9610496759414673, "step": 3698 }, { "epoch": 0.8523041474654378, "grad_norm": 1.0124488299649408, "learning_rate": 1.321454497140431e-06, "loss": 0.7286547422409058, "step": 3699 }, { "epoch": 0.8525345622119815, "grad_norm": 0.8362780560832063, "learning_rate": 1.321093710697387e-06, "loss": 0.7446750402450562, "step": 3700 }, { "epoch": 0.8527649769585254, "grad_norm": 0.8774754337310029, "learning_rate": 1.3207328776480156e-06, "loss": 0.7211639881134033, "step": 3701 }, { "epoch": 0.8529953917050691, "grad_norm": 0.9667628641735269, "learning_rate": 1.320371998044692e-06, "loss": 0.765962541103363, "step": 3702 }, { "epoch": 0.853225806451613, "grad_norm": 1.0775083181101466, "learning_rate": 1.3200110719397967e-06, "loss": 0.9090084433555603, "step": 3703 }, { "epoch": 0.8534562211981567, "grad_norm": 0.9604272002153474, "learning_rate": 1.319650099385718e-06, "loss": 0.8222901225090027, "step": 3704 }, { "epoch": 0.8536866359447005, "grad_norm": 1.0297311955715076, "learning_rate": 1.3192890804348508e-06, "loss": 0.7929965853691101, "step": 3705 }, { "epoch": 0.8539170506912442, "grad_norm": 0.9788103737354025, "learning_rate": 1.318928015139596e-06, "loss": 0.89229816198349, "step": 3706 }, { "epoch": 0.854147465437788, "grad_norm": 1.1185541946390394, "learning_rate": 1.3185669035523621e-06, "loss": 0.8348276615142822, "step": 3707 }, { "epoch": 0.8543778801843318, "grad_norm": 1.0960703003892842, "learning_rate": 1.3182057457255639e-06, "loss": 0.9006820917129517, "step": 3708 }, { "epoch": 0.8546082949308755, "grad_norm": 0.8300224623954644, "learning_rate": 1.3178445417116233e-06, "loss": 0.665691614151001, "step": 3709 }, { "epoch": 0.8548387096774194, "grad_norm": 0.6677558949928035, "learning_rate": 1.3174832915629677e-06, "loss": 0.7073110342025757, "step": 3710 }, { "epoch": 0.8550691244239631, "grad_norm": 1.0807205184602706, "learning_rate": 1.317121995332033e-06, "loss": 0.7125800848007202, "step": 3711 }, { "epoch": 0.8552995391705069, "grad_norm": 1.1504081133401938, "learning_rate": 1.31676065307126e-06, "loss": 0.847205638885498, "step": 3712 }, { "epoch": 0.8555299539170507, "grad_norm": 1.1272186923536152, "learning_rate": 1.3163992648330979e-06, "loss": 0.860866904258728, "step": 3713 }, { "epoch": 0.8557603686635945, "grad_norm": 0.9974272492162177, "learning_rate": 1.3160378306700014e-06, "loss": 0.811161994934082, "step": 3714 }, { "epoch": 0.8559907834101382, "grad_norm": 1.059693566679631, "learning_rate": 1.3156763506344318e-06, "loss": 1.0276790857315063, "step": 3715 }, { "epoch": 0.8562211981566821, "grad_norm": 0.8617440282777447, "learning_rate": 1.3153148247788584e-06, "loss": 0.7462253570556641, "step": 3716 }, { "epoch": 0.8564516129032258, "grad_norm": 1.281384523734545, "learning_rate": 1.314953253155755e-06, "loss": 0.9181896448135376, "step": 3717 }, { "epoch": 0.8566820276497696, "grad_norm": 0.7940667691684741, "learning_rate": 1.3145916358176044e-06, "loss": 0.5943678021430969, "step": 3718 }, { "epoch": 0.8569124423963134, "grad_norm": 0.9268739898787507, "learning_rate": 1.3142299728168942e-06, "loss": 0.7908656597137451, "step": 3719 }, { "epoch": 0.8571428571428571, "grad_norm": 1.2242140267734891, "learning_rate": 1.3138682642061192e-06, "loss": 0.8716393709182739, "step": 3720 }, { "epoch": 0.8573732718894009, "grad_norm": 0.9921811812486295, "learning_rate": 1.3135065100377814e-06, "loss": 0.76909339427948, "step": 3721 }, { "epoch": 0.8576036866359447, "grad_norm": 1.0272733292998222, "learning_rate": 1.3131447103643884e-06, "loss": 0.7896728515625, "step": 3722 }, { "epoch": 0.8578341013824885, "grad_norm": 1.0326134494637835, "learning_rate": 1.3127828652384554e-06, "loss": 0.8458575010299683, "step": 3723 }, { "epoch": 0.8580645161290322, "grad_norm": 0.9849414066001893, "learning_rate": 1.3124209747125036e-06, "loss": 0.7419729232788086, "step": 3724 }, { "epoch": 0.8582949308755761, "grad_norm": 0.9131603734827297, "learning_rate": 1.3120590388390608e-06, "loss": 0.8801093697547913, "step": 3725 }, { "epoch": 0.8585253456221198, "grad_norm": 0.7986933302941567, "learning_rate": 1.3116970576706617e-06, "loss": 0.6337816715240479, "step": 3726 }, { "epoch": 0.8587557603686636, "grad_norm": 1.1352865331161706, "learning_rate": 1.3113350312598472e-06, "loss": 0.8099665641784668, "step": 3727 }, { "epoch": 0.8589861751152074, "grad_norm": 1.0467011868433627, "learning_rate": 1.3109729596591651e-06, "loss": 0.7430413961410522, "step": 3728 }, { "epoch": 0.8592165898617512, "grad_norm": 1.0569982664185076, "learning_rate": 1.3106108429211699e-06, "loss": 0.7374905347824097, "step": 3729 }, { "epoch": 0.8594470046082949, "grad_norm": 0.7857724004075162, "learning_rate": 1.3102486810984217e-06, "loss": 0.71753990650177, "step": 3730 }, { "epoch": 0.8596774193548387, "grad_norm": 1.0554970253272185, "learning_rate": 1.3098864742434885e-06, "loss": 0.9126461744308472, "step": 3731 }, { "epoch": 0.8599078341013825, "grad_norm": 1.1141466235187625, "learning_rate": 1.3095242224089434e-06, "loss": 0.846487283706665, "step": 3732 }, { "epoch": 0.8601382488479262, "grad_norm": 0.9640305278845377, "learning_rate": 1.3091619256473671e-06, "loss": 0.7026070952415466, "step": 3733 }, { "epoch": 0.8603686635944701, "grad_norm": 1.2209599470129553, "learning_rate": 1.3087995840113471e-06, "loss": 1.0044158697128296, "step": 3734 }, { "epoch": 0.8605990783410138, "grad_norm": 1.2732308696122019, "learning_rate": 1.3084371975534759e-06, "loss": 0.8061608076095581, "step": 3735 }, { "epoch": 0.8608294930875576, "grad_norm": 1.2155874878372677, "learning_rate": 1.308074766326354e-06, "loss": 0.9189345836639404, "step": 3736 }, { "epoch": 0.8610599078341014, "grad_norm": 3.0839554304770314, "learning_rate": 1.3077122903825875e-06, "loss": 0.8183290958404541, "step": 3737 }, { "epoch": 0.8612903225806452, "grad_norm": 0.9202037098580877, "learning_rate": 1.3073497697747893e-06, "loss": 0.860893726348877, "step": 3738 }, { "epoch": 0.8615207373271889, "grad_norm": 0.7717429741205805, "learning_rate": 1.306987204555579e-06, "loss": 0.6732957363128662, "step": 3739 }, { "epoch": 0.8617511520737328, "grad_norm": 0.9444170667577415, "learning_rate": 1.3066245947775821e-06, "loss": 0.7910758256912231, "step": 3740 }, { "epoch": 0.8619815668202765, "grad_norm": 1.316217805471382, "learning_rate": 1.3062619404934317e-06, "loss": 0.9422181844711304, "step": 3741 }, { "epoch": 0.8622119815668203, "grad_norm": 0.9698503213179374, "learning_rate": 1.3058992417557657e-06, "loss": 0.7731142044067383, "step": 3742 }, { "epoch": 0.8624423963133641, "grad_norm": 0.9561313394387324, "learning_rate": 1.3055364986172296e-06, "loss": 0.8419089317321777, "step": 3743 }, { "epoch": 0.8626728110599078, "grad_norm": 0.8852750785802604, "learning_rate": 1.3051737111304757e-06, "loss": 0.7535419464111328, "step": 3744 }, { "epoch": 0.8629032258064516, "grad_norm": 0.8636514927767351, "learning_rate": 1.3048108793481614e-06, "loss": 0.7744847536087036, "step": 3745 }, { "epoch": 0.8631336405529954, "grad_norm": 1.04058809416254, "learning_rate": 1.3044480033229513e-06, "loss": 0.7578398585319519, "step": 3746 }, { "epoch": 0.8633640552995392, "grad_norm": 1.2334871836764278, "learning_rate": 1.3040850831075168e-06, "loss": 0.8767418265342712, "step": 3747 }, { "epoch": 0.8635944700460829, "grad_norm": 1.1256734507930313, "learning_rate": 1.303722118754535e-06, "loss": 0.7484671473503113, "step": 3748 }, { "epoch": 0.8638248847926268, "grad_norm": 0.9064086460386975, "learning_rate": 1.3033591103166897e-06, "loss": 0.7231101989746094, "step": 3749 }, { "epoch": 0.8640552995391705, "grad_norm": 0.896473034432068, "learning_rate": 1.3029960578466709e-06, "loss": 0.7626307606697083, "step": 3750 }, { "epoch": 0.8642857142857143, "grad_norm": 1.0608055188685264, "learning_rate": 1.302632961397176e-06, "loss": 0.7244704961776733, "step": 3751 }, { "epoch": 0.864516129032258, "grad_norm": 1.0368271143877468, "learning_rate": 1.3022698210209066e-06, "loss": 0.8575884103775024, "step": 3752 }, { "epoch": 0.8647465437788019, "grad_norm": 1.050928094888414, "learning_rate": 1.3019066367705733e-06, "loss": 0.7617322206497192, "step": 3753 }, { "epoch": 0.8649769585253456, "grad_norm": 1.0524737157850867, "learning_rate": 1.3015434086988914e-06, "loss": 0.7899904251098633, "step": 3754 }, { "epoch": 0.8652073732718893, "grad_norm": 0.7826254299372721, "learning_rate": 1.3011801368585825e-06, "loss": 0.6405949592590332, "step": 3755 }, { "epoch": 0.8654377880184332, "grad_norm": 1.004484214855527, "learning_rate": 1.300816821302376e-06, "loss": 0.8473223447799683, "step": 3756 }, { "epoch": 0.8656682027649769, "grad_norm": 1.0318183916575985, "learning_rate": 1.3004534620830059e-06, "loss": 0.7843037843704224, "step": 3757 }, { "epoch": 0.8658986175115208, "grad_norm": 0.8527211236886993, "learning_rate": 1.3000900592532134e-06, "loss": 0.7418329119682312, "step": 3758 }, { "epoch": 0.8661290322580645, "grad_norm": 1.1686967012789897, "learning_rate": 1.2997266128657462e-06, "loss": 0.9007542133331299, "step": 3759 }, { "epoch": 0.8663594470046083, "grad_norm": 1.0002999248018631, "learning_rate": 1.2993631229733582e-06, "loss": 0.7214536666870117, "step": 3760 }, { "epoch": 0.866589861751152, "grad_norm": 1.060698383579802, "learning_rate": 1.2989995896288085e-06, "loss": 0.6538300514221191, "step": 3761 }, { "epoch": 0.8668202764976959, "grad_norm": 0.8939424364373206, "learning_rate": 1.2986360128848647e-06, "loss": 0.8132497668266296, "step": 3762 }, { "epoch": 0.8670506912442396, "grad_norm": 1.2692579875098073, "learning_rate": 1.2982723927942987e-06, "loss": 0.8940386176109314, "step": 3763 }, { "epoch": 0.8672811059907835, "grad_norm": 0.9095968882110219, "learning_rate": 1.2979087294098904e-06, "loss": 0.7426153421401978, "step": 3764 }, { "epoch": 0.8675115207373272, "grad_norm": 1.2314721218727755, "learning_rate": 1.2975450227844236e-06, "loss": 0.8140754103660583, "step": 3765 }, { "epoch": 0.867741935483871, "grad_norm": 1.165847048536148, "learning_rate": 1.2971812729706907e-06, "loss": 0.9078278541564941, "step": 3766 }, { "epoch": 0.8679723502304147, "grad_norm": 0.8581444329277982, "learning_rate": 1.29681748002149e-06, "loss": 0.6632627248764038, "step": 3767 }, { "epoch": 0.8682027649769585, "grad_norm": 1.0737542944031577, "learning_rate": 1.2964536439896245e-06, "loss": 0.913419246673584, "step": 3768 }, { "epoch": 0.8684331797235023, "grad_norm": 0.9232699220030103, "learning_rate": 1.2960897649279054e-06, "loss": 0.776391863822937, "step": 3769 }, { "epoch": 0.868663594470046, "grad_norm": 0.7836255693570048, "learning_rate": 1.2957258428891488e-06, "loss": 0.7171014547348022, "step": 3770 }, { "epoch": 0.8688940092165899, "grad_norm": 1.072840063629104, "learning_rate": 1.2953618779261776e-06, "loss": 0.8848521709442139, "step": 3771 }, { "epoch": 0.8691244239631336, "grad_norm": 0.9374655640180731, "learning_rate": 1.2949978700918207e-06, "loss": 0.6794570684432983, "step": 3772 }, { "epoch": 0.8693548387096774, "grad_norm": 1.1765914680464367, "learning_rate": 1.2946338194389137e-06, "loss": 0.7128770351409912, "step": 3773 }, { "epoch": 0.8695852534562212, "grad_norm": 1.0061805151394425, "learning_rate": 1.2942697260202976e-06, "loss": 0.7794370651245117, "step": 3774 }, { "epoch": 0.869815668202765, "grad_norm": 0.8201503807835805, "learning_rate": 1.2939055898888203e-06, "loss": 0.7946528196334839, "step": 3775 }, { "epoch": 0.8700460829493087, "grad_norm": 0.8253544658473864, "learning_rate": 1.2935414110973357e-06, "loss": 0.7052137851715088, "step": 3776 }, { "epoch": 0.8702764976958526, "grad_norm": 1.1148062721900278, "learning_rate": 1.293177189698704e-06, "loss": 0.785929799079895, "step": 3777 }, { "epoch": 0.8705069124423963, "grad_norm": 1.0434715730493578, "learning_rate": 1.2928129257457915e-06, "loss": 0.7907861471176147, "step": 3778 }, { "epoch": 0.8707373271889401, "grad_norm": 1.0141295879138945, "learning_rate": 1.2924486192914704e-06, "loss": 0.9145845770835876, "step": 3779 }, { "epoch": 0.8709677419354839, "grad_norm": 1.2821040685334846, "learning_rate": 1.2920842703886191e-06, "loss": 0.8332167863845825, "step": 3780 }, { "epoch": 0.8711981566820276, "grad_norm": 1.1443987508087015, "learning_rate": 1.2917198790901229e-06, "loss": 0.9593367576599121, "step": 3781 }, { "epoch": 0.8714285714285714, "grad_norm": 1.1001262078147525, "learning_rate": 1.2913554454488723e-06, "loss": 0.9269144535064697, "step": 3782 }, { "epoch": 0.8716589861751152, "grad_norm": 0.8577227656018163, "learning_rate": 1.2909909695177645e-06, "loss": 0.8474053144454956, "step": 3783 }, { "epoch": 0.871889400921659, "grad_norm": 1.0482742591675172, "learning_rate": 1.2906264513497027e-06, "loss": 0.8098207116127014, "step": 3784 }, { "epoch": 0.8721198156682027, "grad_norm": 0.9400670599728106, "learning_rate": 1.2902618909975962e-06, "loss": 0.7394517064094543, "step": 3785 }, { "epoch": 0.8723502304147466, "grad_norm": 1.199479550356467, "learning_rate": 1.2898972885143606e-06, "loss": 0.8667110204696655, "step": 3786 }, { "epoch": 0.8725806451612903, "grad_norm": 1.2600204383371998, "learning_rate": 1.289532643952917e-06, "loss": 0.826819121837616, "step": 3787 }, { "epoch": 0.8728110599078341, "grad_norm": 0.9212030006613351, "learning_rate": 1.2891679573661937e-06, "loss": 0.7765695452690125, "step": 3788 }, { "epoch": 0.8730414746543779, "grad_norm": 0.8409152224560986, "learning_rate": 1.2888032288071245e-06, "loss": 0.7180448770523071, "step": 3789 }, { "epoch": 0.8732718894009217, "grad_norm": 0.9734045628890519, "learning_rate": 1.2884384583286486e-06, "loss": 0.7619662880897522, "step": 3790 }, { "epoch": 0.8735023041474654, "grad_norm": 1.0439158459354512, "learning_rate": 1.2880736459837123e-06, "loss": 0.8332309126853943, "step": 3791 }, { "epoch": 0.8737327188940092, "grad_norm": 1.019583919621154, "learning_rate": 1.2877087918252676e-06, "loss": 0.9314864277839661, "step": 3792 }, { "epoch": 0.873963133640553, "grad_norm": 1.0252621742811456, "learning_rate": 1.287343895906273e-06, "loss": 0.8505650758743286, "step": 3793 }, { "epoch": 0.8741935483870967, "grad_norm": 1.1808911521686665, "learning_rate": 1.286978958279692e-06, "loss": 0.8086442351341248, "step": 3794 }, { "epoch": 0.8744239631336406, "grad_norm": 0.9931096763073582, "learning_rate": 1.2866139789984951e-06, "loss": 0.9369934797286987, "step": 3795 }, { "epoch": 0.8746543778801843, "grad_norm": 1.0923174237783717, "learning_rate": 1.2862489581156585e-06, "loss": 0.6776204705238342, "step": 3796 }, { "epoch": 0.8748847926267281, "grad_norm": 1.1437930163109349, "learning_rate": 1.2858838956841646e-06, "loss": 0.8742507100105286, "step": 3797 }, { "epoch": 0.8751152073732719, "grad_norm": 0.8088256156858264, "learning_rate": 1.285518791757002e-06, "loss": 0.6592123508453369, "step": 3798 }, { "epoch": 0.8753456221198157, "grad_norm": 1.064419209573929, "learning_rate": 1.2851536463871646e-06, "loss": 0.727974534034729, "step": 3799 }, { "epoch": 0.8755760368663594, "grad_norm": 1.1114963626056278, "learning_rate": 1.284788459627653e-06, "loss": 0.734921395778656, "step": 3800 }, { "epoch": 0.8758064516129033, "grad_norm": 1.1341924912712853, "learning_rate": 1.2844232315314734e-06, "loss": 0.8848391771316528, "step": 3801 }, { "epoch": 0.876036866359447, "grad_norm": 0.9036415522550547, "learning_rate": 1.284057962151638e-06, "loss": 0.7014757394790649, "step": 3802 }, { "epoch": 0.8762672811059908, "grad_norm": 1.1253352689452834, "learning_rate": 1.2836926515411662e-06, "loss": 0.9037606716156006, "step": 3803 }, { "epoch": 0.8764976958525346, "grad_norm": 1.0304179621449525, "learning_rate": 1.2833272997530808e-06, "loss": 0.7842103242874146, "step": 3804 }, { "epoch": 0.8767281105990783, "grad_norm": 0.8881021582469312, "learning_rate": 1.282961906840413e-06, "loss": 0.7233899831771851, "step": 3805 }, { "epoch": 0.8769585253456221, "grad_norm": 1.0965629604169354, "learning_rate": 1.2825964728561995e-06, "loss": 0.8439977169036865, "step": 3806 }, { "epoch": 0.8771889400921659, "grad_norm": 0.9011702646392625, "learning_rate": 1.2822309978534817e-06, "loss": 0.6734062433242798, "step": 3807 }, { "epoch": 0.8774193548387097, "grad_norm": 0.8611901516189409, "learning_rate": 1.2818654818853082e-06, "loss": 0.8132908344268799, "step": 3808 }, { "epoch": 0.8776497695852534, "grad_norm": 1.0055540352806662, "learning_rate": 1.2814999250047334e-06, "loss": 0.7867386341094971, "step": 3809 }, { "epoch": 0.8778801843317973, "grad_norm": 0.9631857828899055, "learning_rate": 1.2811343272648172e-06, "loss": 0.7367507219314575, "step": 3810 }, { "epoch": 0.878110599078341, "grad_norm": 0.9475758390620135, "learning_rate": 1.280768688718625e-06, "loss": 0.8154586553573608, "step": 3811 }, { "epoch": 0.8783410138248848, "grad_norm": 1.2471162716233217, "learning_rate": 1.2804030094192297e-06, "loss": 0.9962621331214905, "step": 3812 }, { "epoch": 0.8785714285714286, "grad_norm": 0.9442759022004834, "learning_rate": 1.280037289419709e-06, "loss": 0.8720508813858032, "step": 3813 }, { "epoch": 0.8788018433179724, "grad_norm": 0.9970556206238078, "learning_rate": 1.2796715287731461e-06, "loss": 0.7211558818817139, "step": 3814 }, { "epoch": 0.8790322580645161, "grad_norm": 1.0985560987492957, "learning_rate": 1.279305727532631e-06, "loss": 0.8354029059410095, "step": 3815 }, { "epoch": 0.8792626728110599, "grad_norm": 1.2983425606164107, "learning_rate": 1.2789398857512597e-06, "loss": 0.9136772155761719, "step": 3816 }, { "epoch": 0.8794930875576037, "grad_norm": 1.099731879502331, "learning_rate": 1.2785740034821328e-06, "loss": 0.7603391408920288, "step": 3817 }, { "epoch": 0.8797235023041474, "grad_norm": 1.0043618459346715, "learning_rate": 1.2782080807783582e-06, "loss": 0.8938640356063843, "step": 3818 }, { "epoch": 0.8799539170506913, "grad_norm": 0.9668042432935031, "learning_rate": 1.2778421176930492e-06, "loss": 0.8041675090789795, "step": 3819 }, { "epoch": 0.880184331797235, "grad_norm": 0.858269124078789, "learning_rate": 1.2774761142793246e-06, "loss": 0.7128704786300659, "step": 3820 }, { "epoch": 0.8804147465437788, "grad_norm": 1.01263470571454, "learning_rate": 1.277110070590309e-06, "loss": 0.7927603721618652, "step": 3821 }, { "epoch": 0.8806451612903226, "grad_norm": 0.8447601312860044, "learning_rate": 1.2767439866791342e-06, "loss": 0.8294891119003296, "step": 3822 }, { "epoch": 0.8808755760368664, "grad_norm": 1.0620381421224903, "learning_rate": 1.2763778625989354e-06, "loss": 0.8058860301971436, "step": 3823 }, { "epoch": 0.8811059907834101, "grad_norm": 1.1264235058600618, "learning_rate": 1.2760116984028559e-06, "loss": 0.9073271751403809, "step": 3824 }, { "epoch": 0.881336405529954, "grad_norm": 0.9871957246708625, "learning_rate": 1.2756454941440439e-06, "loss": 0.755131721496582, "step": 3825 }, { "epoch": 0.8815668202764977, "grad_norm": 0.9177831986454672, "learning_rate": 1.2752792498756532e-06, "loss": 0.7571133375167847, "step": 3826 }, { "epoch": 0.8817972350230415, "grad_norm": 1.0303718222421674, "learning_rate": 1.2749129656508438e-06, "loss": 0.8021755218505859, "step": 3827 }, { "epoch": 0.8820276497695853, "grad_norm": 0.9628359079626025, "learning_rate": 1.2745466415227812e-06, "loss": 0.7817519903182983, "step": 3828 }, { "epoch": 0.882258064516129, "grad_norm": 0.9923984386602839, "learning_rate": 1.2741802775446375e-06, "loss": 0.7144416570663452, "step": 3829 }, { "epoch": 0.8824884792626728, "grad_norm": 1.1770010674703593, "learning_rate": 1.2738138737695894e-06, "loss": 0.8154206275939941, "step": 3830 }, { "epoch": 0.8827188940092165, "grad_norm": 1.0860031408073831, "learning_rate": 1.2734474302508199e-06, "loss": 0.7478733062744141, "step": 3831 }, { "epoch": 0.8829493087557604, "grad_norm": 0.9998255564669785, "learning_rate": 1.2730809470415177e-06, "loss": 0.7792314291000366, "step": 3832 }, { "epoch": 0.8831797235023041, "grad_norm": 1.1952265957395494, "learning_rate": 1.2727144241948776e-06, "loss": 0.8550708293914795, "step": 3833 }, { "epoch": 0.883410138248848, "grad_norm": 1.14972903127367, "learning_rate": 1.2723478617641e-06, "loss": 0.9415113925933838, "step": 3834 }, { "epoch": 0.8836405529953917, "grad_norm": 1.1062517985394071, "learning_rate": 1.2719812598023909e-06, "loss": 0.8359560370445251, "step": 3835 }, { "epoch": 0.8838709677419355, "grad_norm": 1.2039080793867758, "learning_rate": 1.2716146183629618e-06, "loss": 0.9515634775161743, "step": 3836 }, { "epoch": 0.8841013824884792, "grad_norm": 1.1195735084656264, "learning_rate": 1.2712479374990302e-06, "loss": 0.9433277249336243, "step": 3837 }, { "epoch": 0.8843317972350231, "grad_norm": 1.022594144324791, "learning_rate": 1.27088121726382e-06, "loss": 0.809203028678894, "step": 3838 }, { "epoch": 0.8845622119815668, "grad_norm": 1.0243153152488458, "learning_rate": 1.2705144577105596e-06, "loss": 0.8003803491592407, "step": 3839 }, { "epoch": 0.8847926267281107, "grad_norm": 1.0509871208480976, "learning_rate": 1.2701476588924837e-06, "loss": 0.8258087038993835, "step": 3840 }, { "epoch": 0.8850230414746544, "grad_norm": 0.8336199164135607, "learning_rate": 1.2697808208628326e-06, "loss": 0.7337249517440796, "step": 3841 }, { "epoch": 0.8852534562211981, "grad_norm": 1.1988508685394492, "learning_rate": 1.269413943674853e-06, "loss": 0.6963306665420532, "step": 3842 }, { "epoch": 0.885483870967742, "grad_norm": 1.1494175494849699, "learning_rate": 1.2690470273817955e-06, "loss": 0.8849321603775024, "step": 3843 }, { "epoch": 0.8857142857142857, "grad_norm": 0.9311581320318796, "learning_rate": 1.2686800720369183e-06, "loss": 0.804117739200592, "step": 3844 }, { "epoch": 0.8859447004608295, "grad_norm": 0.9139368239237865, "learning_rate": 1.2683130776934848e-06, "loss": 0.7873985767364502, "step": 3845 }, { "epoch": 0.8861751152073732, "grad_norm": 1.0475484077031534, "learning_rate": 1.2679460444047627e-06, "loss": 0.7401156425476074, "step": 3846 }, { "epoch": 0.8864055299539171, "grad_norm": 1.1867976153376456, "learning_rate": 1.2675789722240274e-06, "loss": 0.8216343522071838, "step": 3847 }, { "epoch": 0.8866359447004608, "grad_norm": 1.1126927795380483, "learning_rate": 1.2672118612045583e-06, "loss": 0.9367883205413818, "step": 3848 }, { "epoch": 0.8868663594470046, "grad_norm": 1.333436966015092, "learning_rate": 1.2668447113996411e-06, "loss": 0.959208607673645, "step": 3849 }, { "epoch": 0.8870967741935484, "grad_norm": 1.019926575329533, "learning_rate": 1.2664775228625678e-06, "loss": 0.754011869430542, "step": 3850 }, { "epoch": 0.8873271889400922, "grad_norm": 1.0679613059424808, "learning_rate": 1.2661102956466343e-06, "loss": 0.7200918793678284, "step": 3851 }, { "epoch": 0.8875576036866359, "grad_norm": 1.1470470713937198, "learning_rate": 1.2657430298051441e-06, "loss": 0.7819997072219849, "step": 3852 }, { "epoch": 0.8877880184331797, "grad_norm": 0.7442261609023784, "learning_rate": 1.2653757253914045e-06, "loss": 0.6145305037498474, "step": 3853 }, { "epoch": 0.8880184331797235, "grad_norm": 1.0307629205268725, "learning_rate": 1.2650083824587298e-06, "loss": 0.8730908036231995, "step": 3854 }, { "epoch": 0.8882488479262672, "grad_norm": 0.8412211397931054, "learning_rate": 1.2646410010604395e-06, "loss": 0.7595944404602051, "step": 3855 }, { "epoch": 0.8884792626728111, "grad_norm": 1.1742884385001073, "learning_rate": 1.264273581249858e-06, "loss": 0.8533104658126831, "step": 3856 }, { "epoch": 0.8887096774193548, "grad_norm": 0.9075889816265436, "learning_rate": 1.263906123080316e-06, "loss": 0.7239818572998047, "step": 3857 }, { "epoch": 0.8889400921658986, "grad_norm": 1.1211735744208717, "learning_rate": 1.2635386266051498e-06, "loss": 0.7675650119781494, "step": 3858 }, { "epoch": 0.8891705069124424, "grad_norm": 1.03231156560467, "learning_rate": 1.2631710918777007e-06, "loss": 0.8886630535125732, "step": 3859 }, { "epoch": 0.8894009216589862, "grad_norm": 1.078590523668252, "learning_rate": 1.2628035189513159e-06, "loss": 0.798930287361145, "step": 3860 }, { "epoch": 0.8896313364055299, "grad_norm": 0.9635414297502106, "learning_rate": 1.2624359078793484e-06, "loss": 0.7189278602600098, "step": 3861 }, { "epoch": 0.8898617511520738, "grad_norm": 1.0909939790359444, "learning_rate": 1.2620682587151565e-06, "loss": 0.8187342882156372, "step": 3862 }, { "epoch": 0.8900921658986175, "grad_norm": 1.1174191800105742, "learning_rate": 1.2617005715121034e-06, "loss": 0.880839467048645, "step": 3863 }, { "epoch": 0.8903225806451613, "grad_norm": 0.9160208180175933, "learning_rate": 1.2613328463235586e-06, "loss": 0.84575355052948, "step": 3864 }, { "epoch": 0.8905529953917051, "grad_norm": 0.8361425077510937, "learning_rate": 1.2609650832028978e-06, "loss": 0.6823658347129822, "step": 3865 }, { "epoch": 0.8907834101382488, "grad_norm": 1.0695425966983703, "learning_rate": 1.2605972822035e-06, "loss": 0.8295711278915405, "step": 3866 }, { "epoch": 0.8910138248847926, "grad_norm": 1.1932993089448705, "learning_rate": 1.2602294433787518e-06, "loss": 0.8684213161468506, "step": 3867 }, { "epoch": 0.8912442396313364, "grad_norm": 0.8493371065418897, "learning_rate": 1.2598615667820447e-06, "loss": 0.6560889482498169, "step": 3868 }, { "epoch": 0.8914746543778802, "grad_norm": 1.0552959260029386, "learning_rate": 1.259493652466775e-06, "loss": 0.740487277507782, "step": 3869 }, { "epoch": 0.8917050691244239, "grad_norm": 0.9680726179927289, "learning_rate": 1.2591257004863453e-06, "loss": 0.8167253732681274, "step": 3870 }, { "epoch": 0.8919354838709678, "grad_norm": 0.8741208745575088, "learning_rate": 1.2587577108941634e-06, "loss": 0.8521690368652344, "step": 3871 }, { "epoch": 0.8921658986175115, "grad_norm": 1.263426910808872, "learning_rate": 1.2583896837436418e-06, "loss": 0.8830848932266235, "step": 3872 }, { "epoch": 0.8923963133640553, "grad_norm": 0.9234650272103238, "learning_rate": 1.2580216190881999e-06, "loss": 0.7080649137496948, "step": 3873 }, { "epoch": 0.8926267281105991, "grad_norm": 0.9098984938292525, "learning_rate": 1.2576535169812614e-06, "loss": 0.8013911247253418, "step": 3874 }, { "epoch": 0.8928571428571429, "grad_norm": 0.9781454154869316, "learning_rate": 1.2572853774762564e-06, "loss": 0.8307033777236938, "step": 3875 }, { "epoch": 0.8930875576036866, "grad_norm": 1.003074779947638, "learning_rate": 1.256917200626619e-06, "loss": 0.7514123916625977, "step": 3876 }, { "epoch": 0.8933179723502304, "grad_norm": 1.3024082731165083, "learning_rate": 1.2565489864857903e-06, "loss": 0.7608132362365723, "step": 3877 }, { "epoch": 0.8935483870967742, "grad_norm": 0.9570998315665514, "learning_rate": 1.256180735107216e-06, "loss": 0.8011139631271362, "step": 3878 }, { "epoch": 0.8937788018433179, "grad_norm": 1.134653936381734, "learning_rate": 1.2558124465443467e-06, "loss": 0.9760414958000183, "step": 3879 }, { "epoch": 0.8940092165898618, "grad_norm": 1.0547420638261442, "learning_rate": 1.2554441208506399e-06, "loss": 0.7292976379394531, "step": 3880 }, { "epoch": 0.8942396313364055, "grad_norm": 1.0683215421992245, "learning_rate": 1.255075758079557e-06, "loss": 0.819061279296875, "step": 3881 }, { "epoch": 0.8944700460829493, "grad_norm": 1.006803716245281, "learning_rate": 1.2547073582845652e-06, "loss": 0.8407306671142578, "step": 3882 }, { "epoch": 0.8947004608294931, "grad_norm": 0.8233707920449198, "learning_rate": 1.2543389215191379e-06, "loss": 0.7452164888381958, "step": 3883 }, { "epoch": 0.8949308755760369, "grad_norm": 1.049978361878961, "learning_rate": 1.2539704478367525e-06, "loss": 0.9001756310462952, "step": 3884 }, { "epoch": 0.8951612903225806, "grad_norm": 0.8057583780945189, "learning_rate": 1.253601937290893e-06, "loss": 0.7006322741508484, "step": 3885 }, { "epoch": 0.8953917050691245, "grad_norm": 0.9116907763776896, "learning_rate": 1.253233389935048e-06, "loss": 0.8464070558547974, "step": 3886 }, { "epoch": 0.8956221198156682, "grad_norm": 0.9768693849406578, "learning_rate": 1.2528648058227117e-06, "loss": 0.8153925538063049, "step": 3887 }, { "epoch": 0.895852534562212, "grad_norm": 0.9311867207234187, "learning_rate": 1.2524961850073835e-06, "loss": 0.7093103528022766, "step": 3888 }, { "epoch": 0.8960829493087558, "grad_norm": 0.8533841155936702, "learning_rate": 1.2521275275425685e-06, "loss": 0.676047682762146, "step": 3889 }, { "epoch": 0.8963133640552995, "grad_norm": 0.87097687176947, "learning_rate": 1.2517588334817765e-06, "loss": 0.6980170011520386, "step": 3890 }, { "epoch": 0.8965437788018433, "grad_norm": 0.9291831127411667, "learning_rate": 1.2513901028785232e-06, "loss": 0.7343952655792236, "step": 3891 }, { "epoch": 0.896774193548387, "grad_norm": 1.0285752510532034, "learning_rate": 1.251021335786329e-06, "loss": 0.6836012005805969, "step": 3892 }, { "epoch": 0.8970046082949309, "grad_norm": 0.9328635468922583, "learning_rate": 1.2506525322587204e-06, "loss": 0.7405731678009033, "step": 3893 }, { "epoch": 0.8972350230414746, "grad_norm": 0.9162563014074782, "learning_rate": 1.2502836923492288e-06, "loss": 0.7626791596412659, "step": 3894 }, { "epoch": 0.8974654377880185, "grad_norm": 0.8530894630449782, "learning_rate": 1.2499148161113904e-06, "loss": 0.951126754283905, "step": 3895 }, { "epoch": 0.8976958525345622, "grad_norm": 1.0356266230162976, "learning_rate": 1.249545903598747e-06, "loss": 0.8248430490493774, "step": 3896 }, { "epoch": 0.897926267281106, "grad_norm": 1.0696916510331513, "learning_rate": 1.2491769548648466e-06, "loss": 0.9306991100311279, "step": 3897 }, { "epoch": 0.8981566820276498, "grad_norm": 1.2546361240375576, "learning_rate": 1.2488079699632406e-06, "loss": 0.8529196977615356, "step": 3898 }, { "epoch": 0.8983870967741936, "grad_norm": 1.1432122269665714, "learning_rate": 1.2484389489474873e-06, "loss": 0.8614317178726196, "step": 3899 }, { "epoch": 0.8986175115207373, "grad_norm": 0.8777341649032664, "learning_rate": 1.2480698918711494e-06, "loss": 0.723548173904419, "step": 3900 }, { "epoch": 0.8988479262672812, "grad_norm": 0.8559428728446495, "learning_rate": 1.2477007987877953e-06, "loss": 0.9424235820770264, "step": 3901 }, { "epoch": 0.8990783410138249, "grad_norm": 1.1966583189697881, "learning_rate": 1.2473316697509982e-06, "loss": 0.8307658433914185, "step": 3902 }, { "epoch": 0.8993087557603686, "grad_norm": 0.9430977683906336, "learning_rate": 1.2469625048143364e-06, "loss": 0.7164772748947144, "step": 3903 }, { "epoch": 0.8995391705069125, "grad_norm": 1.0578567003352413, "learning_rate": 1.2465933040313941e-06, "loss": 0.824491024017334, "step": 3904 }, { "epoch": 0.8997695852534562, "grad_norm": 0.9955753469888821, "learning_rate": 1.24622406745576e-06, "loss": 0.7468826770782471, "step": 3905 }, { "epoch": 0.9, "grad_norm": 1.0419833775918754, "learning_rate": 1.2458547951410285e-06, "loss": 0.8049126863479614, "step": 3906 }, { "epoch": 0.9002304147465438, "grad_norm": 1.0794114769462158, "learning_rate": 1.245485487140799e-06, "loss": 0.658754825592041, "step": 3907 }, { "epoch": 0.9004608294930876, "grad_norm": 0.9848364091798514, "learning_rate": 1.245116143508676e-06, "loss": 0.6772202849388123, "step": 3908 }, { "epoch": 0.9006912442396313, "grad_norm": 0.9291487276824166, "learning_rate": 1.2447467642982697e-06, "loss": 0.8160394430160522, "step": 3909 }, { "epoch": 0.9009216589861752, "grad_norm": 1.3459000002689838, "learning_rate": 1.244377349563194e-06, "loss": 0.8289823532104492, "step": 3910 }, { "epoch": 0.9011520737327189, "grad_norm": 1.0130598759262572, "learning_rate": 1.24400789935707e-06, "loss": 0.7574084997177124, "step": 3911 }, { "epoch": 0.9013824884792627, "grad_norm": 0.9665886404424858, "learning_rate": 1.2436384137335218e-06, "loss": 0.8116365671157837, "step": 3912 }, { "epoch": 0.9016129032258065, "grad_norm": 1.0860329839978788, "learning_rate": 1.2432688927461808e-06, "loss": 0.814805805683136, "step": 3913 }, { "epoch": 0.9018433179723502, "grad_norm": 0.9783977746996081, "learning_rate": 1.2428993364486822e-06, "loss": 0.7947453260421753, "step": 3914 }, { "epoch": 0.902073732718894, "grad_norm": 1.1432103627131167, "learning_rate": 1.2425297448946661e-06, "loss": 0.939562976360321, "step": 3915 }, { "epoch": 0.9023041474654377, "grad_norm": 0.9342812306918719, "learning_rate": 1.2421601181377787e-06, "loss": 0.9460225105285645, "step": 3916 }, { "epoch": 0.9025345622119816, "grad_norm": 1.1417876456910938, "learning_rate": 1.241790456231671e-06, "loss": 0.9183799028396606, "step": 3917 }, { "epoch": 0.9027649769585253, "grad_norm": 1.1195959115117728, "learning_rate": 1.2414207592299984e-06, "loss": 0.6793398857116699, "step": 3918 }, { "epoch": 0.9029953917050692, "grad_norm": 0.9758451113738527, "learning_rate": 1.2410510271864222e-06, "loss": 0.7796125411987305, "step": 3919 }, { "epoch": 0.9032258064516129, "grad_norm": 1.127885346985943, "learning_rate": 1.2406812601546085e-06, "loss": 0.8164567351341248, "step": 3920 }, { "epoch": 0.9034562211981567, "grad_norm": 1.327729370966401, "learning_rate": 1.2403114581882288e-06, "loss": 0.7267879247665405, "step": 3921 }, { "epoch": 0.9036866359447004, "grad_norm": 0.9644037075475709, "learning_rate": 1.2399416213409586e-06, "loss": 0.7277103066444397, "step": 3922 }, { "epoch": 0.9039170506912443, "grad_norm": 1.1653209742127064, "learning_rate": 1.23957174966648e-06, "loss": 0.8507979512214661, "step": 3923 }, { "epoch": 0.904147465437788, "grad_norm": 1.2024221808183382, "learning_rate": 1.2392018432184792e-06, "loss": 0.9431333541870117, "step": 3924 }, { "epoch": 0.9043778801843319, "grad_norm": 0.9610849982223711, "learning_rate": 1.2388319020506473e-06, "loss": 0.669041633605957, "step": 3925 }, { "epoch": 0.9046082949308756, "grad_norm": 1.0428863031922808, "learning_rate": 1.2384619262166808e-06, "loss": 0.7639964818954468, "step": 3926 }, { "epoch": 0.9048387096774193, "grad_norm": 0.9055700075744166, "learning_rate": 1.2380919157702819e-06, "loss": 0.7390594482421875, "step": 3927 }, { "epoch": 0.9050691244239631, "grad_norm": 1.0183193149474203, "learning_rate": 1.2377218707651562e-06, "loss": 0.8320105075836182, "step": 3928 }, { "epoch": 0.9052995391705069, "grad_norm": 0.9604555269461571, "learning_rate": 1.237351791255016e-06, "loss": 0.6820249557495117, "step": 3929 }, { "epoch": 0.9055299539170507, "grad_norm": 1.0758012435150028, "learning_rate": 1.2369816772935773e-06, "loss": 0.8548537492752075, "step": 3930 }, { "epoch": 0.9057603686635944, "grad_norm": 1.0169473440313737, "learning_rate": 1.236611528934562e-06, "loss": 0.7226318120956421, "step": 3931 }, { "epoch": 0.9059907834101383, "grad_norm": 1.2196278844047388, "learning_rate": 1.2362413462316963e-06, "loss": 0.879987359046936, "step": 3932 }, { "epoch": 0.906221198156682, "grad_norm": 0.8628507992206548, "learning_rate": 1.2358711292387122e-06, "loss": 0.7919881343841553, "step": 3933 }, { "epoch": 0.9064516129032258, "grad_norm": 1.0779297510278616, "learning_rate": 1.2355008780093456e-06, "loss": 0.8232694268226624, "step": 3934 }, { "epoch": 0.9066820276497696, "grad_norm": 1.249487252121194, "learning_rate": 1.2351305925973385e-06, "loss": 0.80347740650177, "step": 3935 }, { "epoch": 0.9069124423963134, "grad_norm": 1.2510529509996382, "learning_rate": 1.234760273056437e-06, "loss": 0.7818408012390137, "step": 3936 }, { "epoch": 0.9071428571428571, "grad_norm": 1.1620371895322128, "learning_rate": 1.2343899194403931e-06, "loss": 0.8391210436820984, "step": 3937 }, { "epoch": 0.9073732718894009, "grad_norm": 1.1380529418025975, "learning_rate": 1.2340195318029622e-06, "loss": 0.7937500476837158, "step": 3938 }, { "epoch": 0.9076036866359447, "grad_norm": 0.973433345758839, "learning_rate": 1.2336491101979065e-06, "loss": 0.7158668041229248, "step": 3939 }, { "epoch": 0.9078341013824884, "grad_norm": 0.9549803277521113, "learning_rate": 1.2332786546789915e-06, "loss": 0.6956034898757935, "step": 3940 }, { "epoch": 0.9080645161290323, "grad_norm": 1.035574155623001, "learning_rate": 1.2329081652999887e-06, "loss": 0.7252948880195618, "step": 3941 }, { "epoch": 0.908294930875576, "grad_norm": 1.2086784459715743, "learning_rate": 1.2325376421146739e-06, "loss": 0.7131162881851196, "step": 3942 }, { "epoch": 0.9085253456221198, "grad_norm": 0.8781165558243194, "learning_rate": 1.2321670851768285e-06, "loss": 0.7383663654327393, "step": 3943 }, { "epoch": 0.9087557603686636, "grad_norm": 0.9355062944038273, "learning_rate": 1.2317964945402374e-06, "loss": 0.8296892642974854, "step": 3944 }, { "epoch": 0.9089861751152074, "grad_norm": 1.1131069336270092, "learning_rate": 1.2314258702586923e-06, "loss": 0.8314273357391357, "step": 3945 }, { "epoch": 0.9092165898617511, "grad_norm": 0.9647703306046335, "learning_rate": 1.2310552123859888e-06, "loss": 0.7264384031295776, "step": 3946 }, { "epoch": 0.909447004608295, "grad_norm": 0.7580621867286127, "learning_rate": 1.230684520975927e-06, "loss": 0.6757937073707581, "step": 3947 }, { "epoch": 0.9096774193548387, "grad_norm": 0.8884108342506404, "learning_rate": 1.230313796082312e-06, "loss": 0.8318504691123962, "step": 3948 }, { "epoch": 0.9099078341013825, "grad_norm": 0.7767337233620181, "learning_rate": 1.2299430377589547e-06, "loss": 0.7043207883834839, "step": 3949 }, { "epoch": 0.9101382488479263, "grad_norm": 1.0668368590995472, "learning_rate": 1.2295722460596696e-06, "loss": 0.8499487638473511, "step": 3950 }, { "epoch": 0.91036866359447, "grad_norm": 1.1145902688644103, "learning_rate": 1.2292014210382772e-06, "loss": 0.8219600319862366, "step": 3951 }, { "epoch": 0.9105990783410138, "grad_norm": 1.2329010539695853, "learning_rate": 1.2288305627486017e-06, "loss": 0.8136317133903503, "step": 3952 }, { "epoch": 0.9108294930875576, "grad_norm": 1.1220482069317936, "learning_rate": 1.2284596712444735e-06, "loss": 0.7858958840370178, "step": 3953 }, { "epoch": 0.9110599078341014, "grad_norm": 1.182019995516566, "learning_rate": 1.2280887465797259e-06, "loss": 0.8108563423156738, "step": 3954 }, { "epoch": 0.9112903225806451, "grad_norm": 1.17197106565382, "learning_rate": 1.2277177888081987e-06, "loss": 0.8061145544052124, "step": 3955 }, { "epoch": 0.911520737327189, "grad_norm": 1.1140830632516712, "learning_rate": 1.2273467979837361e-06, "loss": 0.7769665718078613, "step": 3956 }, { "epoch": 0.9117511520737327, "grad_norm": 1.5134088570090107, "learning_rate": 1.2269757741601867e-06, "loss": 1.0548570156097412, "step": 3957 }, { "epoch": 0.9119815668202765, "grad_norm": 0.9732476833800602, "learning_rate": 1.226604717391404e-06, "loss": 0.7095952033996582, "step": 3958 }, { "epoch": 0.9122119815668203, "grad_norm": 0.8435340807921997, "learning_rate": 1.226233627731247e-06, "loss": 0.7330363392829895, "step": 3959 }, { "epoch": 0.9124423963133641, "grad_norm": 0.9706068481575616, "learning_rate": 1.225862505233578e-06, "loss": 0.7328442931175232, "step": 3960 }, { "epoch": 0.9126728110599078, "grad_norm": 1.059740258312267, "learning_rate": 1.2254913499522656e-06, "loss": 0.7572993040084839, "step": 3961 }, { "epoch": 0.9129032258064517, "grad_norm": 1.0542941153492202, "learning_rate": 1.2251201619411823e-06, "loss": 0.7706469297409058, "step": 3962 }, { "epoch": 0.9131336405529954, "grad_norm": 1.1436826868313579, "learning_rate": 1.2247489412542053e-06, "loss": 0.7830193042755127, "step": 3963 }, { "epoch": 0.9133640552995391, "grad_norm": 1.0827904871592715, "learning_rate": 1.224377687945217e-06, "loss": 0.8415955901145935, "step": 3964 }, { "epoch": 0.913594470046083, "grad_norm": 1.1895924425921953, "learning_rate": 1.2240064020681044e-06, "loss": 0.7383062839508057, "step": 3965 }, { "epoch": 0.9138248847926267, "grad_norm": 1.1432920832791855, "learning_rate": 1.2236350836767593e-06, "loss": 0.7372882962226868, "step": 3966 }, { "epoch": 0.9140552995391705, "grad_norm": 1.0941013432151616, "learning_rate": 1.2232637328250776e-06, "loss": 0.7914254665374756, "step": 3967 }, { "epoch": 0.9142857142857143, "grad_norm": 0.9886213418734634, "learning_rate": 1.2228923495669605e-06, "loss": 0.8510675430297852, "step": 3968 }, { "epoch": 0.9145161290322581, "grad_norm": 1.045281864627849, "learning_rate": 1.2225209339563143e-06, "loss": 0.7391757369041443, "step": 3969 }, { "epoch": 0.9147465437788018, "grad_norm": 0.8746728562097662, "learning_rate": 1.2221494860470491e-06, "loss": 0.69194495677948, "step": 3970 }, { "epoch": 0.9149769585253457, "grad_norm": 1.0907421288179358, "learning_rate": 1.22177800589308e-06, "loss": 0.7593865394592285, "step": 3971 }, { "epoch": 0.9152073732718894, "grad_norm": 1.037234739347401, "learning_rate": 1.2214064935483268e-06, "loss": 0.7831966876983643, "step": 3972 }, { "epoch": 0.9154377880184332, "grad_norm": 1.1150279108134162, "learning_rate": 1.2210349490667145e-06, "loss": 0.8858723640441895, "step": 3973 }, { "epoch": 0.915668202764977, "grad_norm": 1.1381126617682915, "learning_rate": 1.2206633725021715e-06, "loss": 0.8645567893981934, "step": 3974 }, { "epoch": 0.9158986175115207, "grad_norm": 0.9188905804582469, "learning_rate": 1.2202917639086322e-06, "loss": 0.7619047164916992, "step": 3975 }, { "epoch": 0.9161290322580645, "grad_norm": 1.0126992141273314, "learning_rate": 1.2199201233400355e-06, "loss": 0.8652681112289429, "step": 3976 }, { "epoch": 0.9163594470046083, "grad_norm": 0.9961259698766619, "learning_rate": 1.2195484508503234e-06, "loss": 0.6860940456390381, "step": 3977 }, { "epoch": 0.9165898617511521, "grad_norm": 0.8860870600955693, "learning_rate": 1.2191767464934444e-06, "loss": 0.7372464537620544, "step": 3978 }, { "epoch": 0.9168202764976958, "grad_norm": 1.3495413684840594, "learning_rate": 1.218805010323351e-06, "loss": 0.8719853162765503, "step": 3979 }, { "epoch": 0.9170506912442397, "grad_norm": 0.9968927276513252, "learning_rate": 1.2184332423940003e-06, "loss": 0.8203779458999634, "step": 3980 }, { "epoch": 0.9172811059907834, "grad_norm": 1.197176686739939, "learning_rate": 1.218061442759353e-06, "loss": 0.8648861646652222, "step": 3981 }, { "epoch": 0.9175115207373272, "grad_norm": 1.0630748229990676, "learning_rate": 1.2176896114733766e-06, "loss": 0.7651659250259399, "step": 3982 }, { "epoch": 0.917741935483871, "grad_norm": 1.20459191964974, "learning_rate": 1.2173177485900408e-06, "loss": 0.8495512008666992, "step": 3983 }, { "epoch": 0.9179723502304148, "grad_norm": 1.3559959351470627, "learning_rate": 1.2169458541633216e-06, "loss": 0.7997228503227234, "step": 3984 }, { "epoch": 0.9182027649769585, "grad_norm": 0.9870494686008755, "learning_rate": 1.2165739282471987e-06, "loss": 0.8353173136711121, "step": 3985 }, { "epoch": 0.9184331797235024, "grad_norm": 1.2277323881843956, "learning_rate": 1.216201970895657e-06, "loss": 0.9039655327796936, "step": 3986 }, { "epoch": 0.9186635944700461, "grad_norm": 0.9209288499077958, "learning_rate": 1.2158299821626854e-06, "loss": 0.8158592581748962, "step": 3987 }, { "epoch": 0.9188940092165898, "grad_norm": 1.2007654555954255, "learning_rate": 1.2154579621022776e-06, "loss": 0.8443971872329712, "step": 3988 }, { "epoch": 0.9191244239631337, "grad_norm": 0.916322848733307, "learning_rate": 1.2150859107684318e-06, "loss": 0.7934167385101318, "step": 3989 }, { "epoch": 0.9193548387096774, "grad_norm": 1.1576910593833736, "learning_rate": 1.2147138282151512e-06, "loss": 0.750052809715271, "step": 3990 }, { "epoch": 0.9195852534562212, "grad_norm": 1.0948767691124337, "learning_rate": 1.2143417144964423e-06, "loss": 0.813056468963623, "step": 3991 }, { "epoch": 0.919815668202765, "grad_norm": 1.1487977592190233, "learning_rate": 1.2139695696663174e-06, "loss": 0.9478945732116699, "step": 3992 }, { "epoch": 0.9200460829493088, "grad_norm": 0.9711264468634061, "learning_rate": 1.2135973937787927e-06, "loss": 0.687637448310852, "step": 3993 }, { "epoch": 0.9202764976958525, "grad_norm": 1.071392128639805, "learning_rate": 1.213225186887889e-06, "loss": 0.8073818683624268, "step": 3994 }, { "epoch": 0.9205069124423964, "grad_norm": 1.1074324196567935, "learning_rate": 1.2128529490476318e-06, "loss": 0.6684166789054871, "step": 3995 }, { "epoch": 0.9207373271889401, "grad_norm": 1.1910033963986806, "learning_rate": 1.2124806803120506e-06, "loss": 0.7897466421127319, "step": 3996 }, { "epoch": 0.9209677419354839, "grad_norm": 1.0375797321803883, "learning_rate": 1.21210838073518e-06, "loss": 0.832312822341919, "step": 3997 }, { "epoch": 0.9211981566820276, "grad_norm": 1.036059468253791, "learning_rate": 1.2117360503710588e-06, "loss": 0.9536067247390747, "step": 3998 }, { "epoch": 0.9214285714285714, "grad_norm": 1.123926651312402, "learning_rate": 1.2113636892737302e-06, "loss": 0.8959759473800659, "step": 3999 }, { "epoch": 0.9216589861751152, "grad_norm": 0.9405530325495998, "learning_rate": 1.2109912974972422e-06, "loss": 0.6789166927337646, "step": 4000 }, { "epoch": 0.9218894009216589, "grad_norm": 0.9327551909921717, "learning_rate": 1.2106188750956464e-06, "loss": 0.7336491346359253, "step": 4001 }, { "epoch": 0.9221198156682028, "grad_norm": 0.8000293761487048, "learning_rate": 1.2102464221229997e-06, "loss": 0.7838259935379028, "step": 4002 }, { "epoch": 0.9223502304147465, "grad_norm": 1.2907858896278495, "learning_rate": 1.2098739386333631e-06, "loss": 0.9147623777389526, "step": 4003 }, { "epoch": 0.9225806451612903, "grad_norm": 1.3691019040487797, "learning_rate": 1.2095014246808022e-06, "loss": 0.7296491265296936, "step": 4004 }, { "epoch": 0.9228110599078341, "grad_norm": 1.1028104717001235, "learning_rate": 1.2091288803193868e-06, "loss": 0.7898432016372681, "step": 4005 }, { "epoch": 0.9230414746543779, "grad_norm": 1.1562470474736035, "learning_rate": 1.2087563056031914e-06, "loss": 0.8190659284591675, "step": 4006 }, { "epoch": 0.9232718894009216, "grad_norm": 1.4146112766933352, "learning_rate": 1.2083837005862945e-06, "loss": 0.8383443355560303, "step": 4007 }, { "epoch": 0.9235023041474655, "grad_norm": 0.7251077105825574, "learning_rate": 1.2080110653227796e-06, "loss": 0.5987120866775513, "step": 4008 }, { "epoch": 0.9237327188940092, "grad_norm": 1.056645940510342, "learning_rate": 1.2076383998667334e-06, "loss": 0.8811358213424683, "step": 4009 }, { "epoch": 0.923963133640553, "grad_norm": 0.8867108269493398, "learning_rate": 1.2072657042722486e-06, "loss": 0.7958807349205017, "step": 4010 }, { "epoch": 0.9241935483870968, "grad_norm": 1.1776412427000924, "learning_rate": 1.2068929785934215e-06, "loss": 0.7192457914352417, "step": 4011 }, { "epoch": 0.9244239631336405, "grad_norm": 1.0545419352254402, "learning_rate": 1.2065202228843523e-06, "loss": 0.6854838132858276, "step": 4012 }, { "epoch": 0.9246543778801843, "grad_norm": 1.0759672957343283, "learning_rate": 1.2061474371991457e-06, "loss": 0.7334680557250977, "step": 4013 }, { "epoch": 0.9248847926267281, "grad_norm": 0.9536076812745731, "learning_rate": 1.205774621591912e-06, "loss": 0.7614402770996094, "step": 4014 }, { "epoch": 0.9251152073732719, "grad_norm": 1.3871826739545572, "learning_rate": 1.2054017761167644e-06, "loss": 0.7502505779266357, "step": 4015 }, { "epoch": 0.9253456221198156, "grad_norm": 1.044146949688276, "learning_rate": 1.2050289008278205e-06, "loss": 0.7922523021697998, "step": 4016 }, { "epoch": 0.9255760368663595, "grad_norm": 1.2025329853302307, "learning_rate": 1.2046559957792032e-06, "loss": 0.7534265518188477, "step": 4017 }, { "epoch": 0.9258064516129032, "grad_norm": 0.9478426591249515, "learning_rate": 1.2042830610250395e-06, "loss": 0.6997093558311462, "step": 4018 }, { "epoch": 0.926036866359447, "grad_norm": 1.050086676036124, "learning_rate": 1.2039100966194594e-06, "loss": 0.7009599208831787, "step": 4019 }, { "epoch": 0.9262672811059908, "grad_norm": 1.108108705874163, "learning_rate": 1.203537102616599e-06, "loss": 0.795873761177063, "step": 4020 }, { "epoch": 0.9264976958525346, "grad_norm": 1.1836803264586404, "learning_rate": 1.2031640790705972e-06, "loss": 0.7860225439071655, "step": 4021 }, { "epoch": 0.9267281105990783, "grad_norm": 0.9036535621632875, "learning_rate": 1.2027910260355989e-06, "loss": 0.7657063007354736, "step": 4022 }, { "epoch": 0.9269585253456222, "grad_norm": 1.0407468417409953, "learning_rate": 1.2024179435657512e-06, "loss": 0.782909631729126, "step": 4023 }, { "epoch": 0.9271889400921659, "grad_norm": 0.8628791908243046, "learning_rate": 1.202044831715207e-06, "loss": 0.713431715965271, "step": 4024 }, { "epoch": 0.9274193548387096, "grad_norm": 0.9826922843740741, "learning_rate": 1.201671690538123e-06, "loss": 0.9126790165901184, "step": 4025 }, { "epoch": 0.9276497695852535, "grad_norm": 0.9552497173996132, "learning_rate": 1.20129852008866e-06, "loss": 0.8640999794006348, "step": 4026 }, { "epoch": 0.9278801843317972, "grad_norm": 1.0290580406520045, "learning_rate": 1.2009253204209832e-06, "loss": 0.723473072052002, "step": 4027 }, { "epoch": 0.928110599078341, "grad_norm": 0.9995947167655078, "learning_rate": 1.2005520915892626e-06, "loss": 0.6764041185379028, "step": 4028 }, { "epoch": 0.9283410138248848, "grad_norm": 1.1315388960653066, "learning_rate": 1.200178833647671e-06, "loss": 0.8525882959365845, "step": 4029 }, { "epoch": 0.9285714285714286, "grad_norm": 1.1279047416289067, "learning_rate": 1.1998055466503872e-06, "loss": 0.714957058429718, "step": 4030 }, { "epoch": 0.9288018433179723, "grad_norm": 0.9055007840106456, "learning_rate": 1.1994322306515925e-06, "loss": 0.8015910387039185, "step": 4031 }, { "epoch": 0.9290322580645162, "grad_norm": 1.1314666315910753, "learning_rate": 1.1990588857054733e-06, "loss": 1.0306739807128906, "step": 4032 }, { "epoch": 0.9292626728110599, "grad_norm": 1.0078215910327748, "learning_rate": 1.1986855118662205e-06, "loss": 0.8307464122772217, "step": 4033 }, { "epoch": 0.9294930875576037, "grad_norm": 0.9974753472669955, "learning_rate": 1.1983121091880286e-06, "loss": 0.8720347881317139, "step": 4034 }, { "epoch": 0.9297235023041475, "grad_norm": 1.0249437684832297, "learning_rate": 1.1979386777250968e-06, "loss": 0.7716174721717834, "step": 4035 }, { "epoch": 0.9299539170506912, "grad_norm": 0.9533075514678258, "learning_rate": 1.1975652175316279e-06, "loss": 0.8968960046768188, "step": 4036 }, { "epoch": 0.930184331797235, "grad_norm": 1.0235472692311864, "learning_rate": 1.197191728661829e-06, "loss": 0.7472472786903381, "step": 4037 }, { "epoch": 0.9304147465437788, "grad_norm": 1.209577738801564, "learning_rate": 1.196818211169912e-06, "loss": 0.7969691753387451, "step": 4038 }, { "epoch": 0.9306451612903226, "grad_norm": 0.8592343628435503, "learning_rate": 1.196444665110092e-06, "loss": 0.6187525987625122, "step": 4039 }, { "epoch": 0.9308755760368663, "grad_norm": 1.0503056259771648, "learning_rate": 1.1960710905365893e-06, "loss": 0.8715502619743347, "step": 4040 }, { "epoch": 0.9311059907834102, "grad_norm": 0.9918268480034713, "learning_rate": 1.1956974875036273e-06, "loss": 0.7174774408340454, "step": 4041 }, { "epoch": 0.9313364055299539, "grad_norm": 0.8743867275561935, "learning_rate": 1.1953238560654337e-06, "loss": 0.6546192169189453, "step": 4042 }, { "epoch": 0.9315668202764977, "grad_norm": 1.1024794232135675, "learning_rate": 1.194950196276241e-06, "loss": 0.8688700199127197, "step": 4043 }, { "epoch": 0.9317972350230415, "grad_norm": 1.0449187982587707, "learning_rate": 1.1945765081902856e-06, "loss": 0.7679718732833862, "step": 4044 }, { "epoch": 0.9320276497695853, "grad_norm": 0.9426197124643214, "learning_rate": 1.1942027918618073e-06, "loss": 0.6335175037384033, "step": 4045 }, { "epoch": 0.932258064516129, "grad_norm": 1.0452657366695544, "learning_rate": 1.1938290473450513e-06, "loss": 0.785153865814209, "step": 4046 }, { "epoch": 0.9324884792626729, "grad_norm": 0.9145063707903602, "learning_rate": 1.1934552746942653e-06, "loss": 0.6873019337654114, "step": 4047 }, { "epoch": 0.9327188940092166, "grad_norm": 0.9707470479007109, "learning_rate": 1.1930814739637025e-06, "loss": 0.7416094541549683, "step": 4048 }, { "epoch": 0.9329493087557603, "grad_norm": 1.2103943548089806, "learning_rate": 1.1927076452076193e-06, "loss": 0.7206372618675232, "step": 4049 }, { "epoch": 0.9331797235023042, "grad_norm": 1.1043264858931607, "learning_rate": 1.1923337884802767e-06, "loss": 0.8352477550506592, "step": 4050 }, { "epoch": 0.9334101382488479, "grad_norm": 1.116832001192149, "learning_rate": 1.191959903835939e-06, "loss": 0.8243483304977417, "step": 4051 }, { "epoch": 0.9336405529953917, "grad_norm": 1.4110893804735163, "learning_rate": 1.1915859913288756e-06, "loss": 0.827987790107727, "step": 4052 }, { "epoch": 0.9338709677419355, "grad_norm": 1.1514055762505417, "learning_rate": 1.1912120510133589e-06, "loss": 0.8624123334884644, "step": 4053 }, { "epoch": 0.9341013824884793, "grad_norm": 1.2091942284642192, "learning_rate": 1.1908380829436667e-06, "loss": 0.8615037202835083, "step": 4054 }, { "epoch": 0.934331797235023, "grad_norm": 1.2500115524653743, "learning_rate": 1.190464087174079e-06, "loss": 0.9367121458053589, "step": 4055 }, { "epoch": 0.9345622119815669, "grad_norm": 1.4503623207353766, "learning_rate": 1.190090063758881e-06, "loss": 0.927996039390564, "step": 4056 }, { "epoch": 0.9347926267281106, "grad_norm": 1.0709061746508743, "learning_rate": 1.1897160127523623e-06, "loss": 0.841314435005188, "step": 4057 }, { "epoch": 0.9350230414746544, "grad_norm": 1.1021939339887863, "learning_rate": 1.189341934208815e-06, "loss": 0.864904522895813, "step": 4058 }, { "epoch": 0.9352534562211982, "grad_norm": 1.148301781904619, "learning_rate": 1.188967828182537e-06, "loss": 0.9505404829978943, "step": 4059 }, { "epoch": 0.9354838709677419, "grad_norm": 1.0791372441668663, "learning_rate": 1.188593694727829e-06, "loss": 0.7347132563591003, "step": 4060 }, { "epoch": 0.9357142857142857, "grad_norm": 1.1367351426324537, "learning_rate": 1.1882195338989958e-06, "loss": 0.6267231106758118, "step": 4061 }, { "epoch": 0.9359447004608294, "grad_norm": 1.0946102482081315, "learning_rate": 1.1878453457503464e-06, "loss": 0.8052406907081604, "step": 4062 }, { "epoch": 0.9361751152073733, "grad_norm": 1.1032845960202522, "learning_rate": 1.1874711303361933e-06, "loss": 0.7928211688995361, "step": 4063 }, { "epoch": 0.936405529953917, "grad_norm": 1.1265414942472118, "learning_rate": 1.1870968877108545e-06, "loss": 0.8863959312438965, "step": 4064 }, { "epoch": 0.9366359447004609, "grad_norm": 1.0592501761240638, "learning_rate": 1.1867226179286496e-06, "loss": 0.8749874830245972, "step": 4065 }, { "epoch": 0.9368663594470046, "grad_norm": 0.9223254168257967, "learning_rate": 1.186348321043904e-06, "loss": 0.7516318559646606, "step": 4066 }, { "epoch": 0.9370967741935484, "grad_norm": 1.0863969007807137, "learning_rate": 1.1859739971109467e-06, "loss": 0.8435031771659851, "step": 4067 }, { "epoch": 0.9373271889400921, "grad_norm": 1.08570563607149, "learning_rate": 1.1855996461841093e-06, "loss": 0.8766932487487793, "step": 4068 }, { "epoch": 0.937557603686636, "grad_norm": 1.2630999347152494, "learning_rate": 1.1852252683177293e-06, "loss": 0.8748513460159302, "step": 4069 }, { "epoch": 0.9377880184331797, "grad_norm": 1.2689555695038703, "learning_rate": 1.184850863566147e-06, "loss": 0.8917855024337769, "step": 4070 }, { "epoch": 0.9380184331797236, "grad_norm": 1.0628114663297852, "learning_rate": 1.1844764319837064e-06, "loss": 0.7631640434265137, "step": 4071 }, { "epoch": 0.9382488479262673, "grad_norm": 1.0140155614547266, "learning_rate": 1.1841019736247557e-06, "loss": 0.8354158401489258, "step": 4072 }, { "epoch": 0.938479262672811, "grad_norm": 0.8561335978546013, "learning_rate": 1.1837274885436473e-06, "loss": 0.8122761845588684, "step": 4073 }, { "epoch": 0.9387096774193548, "grad_norm": 1.5776279194471237, "learning_rate": 1.1833529767947374e-06, "loss": 0.8281430006027222, "step": 4074 }, { "epoch": 0.9389400921658986, "grad_norm": 1.3828203317822199, "learning_rate": 1.1829784384323856e-06, "loss": 0.8291982412338257, "step": 4075 }, { "epoch": 0.9391705069124424, "grad_norm": 1.3096607265096822, "learning_rate": 1.1826038735109553e-06, "loss": 0.8951852321624756, "step": 4076 }, { "epoch": 0.9394009216589861, "grad_norm": 1.2165058417213606, "learning_rate": 1.182229282084815e-06, "loss": 0.7006446123123169, "step": 4077 }, { "epoch": 0.93963133640553, "grad_norm": 1.1269330295000342, "learning_rate": 1.1818546642083353e-06, "loss": 0.8944047689437866, "step": 4078 }, { "epoch": 0.9398617511520737, "grad_norm": 0.9351299115123082, "learning_rate": 1.1814800199358919e-06, "loss": 0.8252646923065186, "step": 4079 }, { "epoch": 0.9400921658986175, "grad_norm": 1.2255680666736817, "learning_rate": 1.181105349321864e-06, "loss": 0.7852828502655029, "step": 4080 }, { "epoch": 0.9403225806451613, "grad_norm": 1.0734973037527151, "learning_rate": 1.1807306524206347e-06, "loss": 0.7758563160896301, "step": 4081 }, { "epoch": 0.9405529953917051, "grad_norm": 1.0672387708424669, "learning_rate": 1.1803559292865899e-06, "loss": 0.7297114133834839, "step": 4082 }, { "epoch": 0.9407834101382488, "grad_norm": 1.1802096748579922, "learning_rate": 1.1799811799741209e-06, "loss": 0.7974321842193604, "step": 4083 }, { "epoch": 0.9410138248847926, "grad_norm": 1.2930194654348013, "learning_rate": 1.179606404537622e-06, "loss": 0.6406733989715576, "step": 4084 }, { "epoch": 0.9412442396313364, "grad_norm": 0.9862268230007224, "learning_rate": 1.179231603031491e-06, "loss": 0.6925486326217651, "step": 4085 }, { "epoch": 0.9414746543778801, "grad_norm": 0.9201295652583962, "learning_rate": 1.17885677551013e-06, "loss": 0.792647123336792, "step": 4086 }, { "epoch": 0.941705069124424, "grad_norm": 1.0460531669846371, "learning_rate": 1.1784819220279454e-06, "loss": 0.7499191761016846, "step": 4087 }, { "epoch": 0.9419354838709677, "grad_norm": 1.120763335726602, "learning_rate": 1.1781070426393455e-06, "loss": 0.8307451009750366, "step": 4088 }, { "epoch": 0.9421658986175115, "grad_norm": 1.1015455973526673, "learning_rate": 1.1777321373987445e-06, "loss": 0.7859289646148682, "step": 4089 }, { "epoch": 0.9423963133640553, "grad_norm": 1.0291702780651948, "learning_rate": 1.177357206360559e-06, "loss": 0.761134922504425, "step": 4090 }, { "epoch": 0.9426267281105991, "grad_norm": 1.240188832472171, "learning_rate": 1.1769822495792098e-06, "loss": 0.8697078227996826, "step": 4091 }, { "epoch": 0.9428571428571428, "grad_norm": 1.0395615260234665, "learning_rate": 1.1766072671091212e-06, "loss": 0.731541633605957, "step": 4092 }, { "epoch": 0.9430875576036867, "grad_norm": 1.1056530512213054, "learning_rate": 1.1762322590047219e-06, "loss": 0.7501940727233887, "step": 4093 }, { "epoch": 0.9433179723502304, "grad_norm": 1.1531150840189341, "learning_rate": 1.1758572253204431e-06, "loss": 0.9448602199554443, "step": 4094 }, { "epoch": 0.9435483870967742, "grad_norm": 0.8884441593083074, "learning_rate": 1.175482166110721e-06, "loss": 0.7704026699066162, "step": 4095 }, { "epoch": 0.943778801843318, "grad_norm": 0.8973060402184874, "learning_rate": 1.1751070814299947e-06, "loss": 0.7905057668685913, "step": 4096 }, { "epoch": 0.9440092165898617, "grad_norm": 1.238350046583652, "learning_rate": 1.1747319713327078e-06, "loss": 0.8957202434539795, "step": 4097 }, { "epoch": 0.9442396313364055, "grad_norm": 0.9896078596502195, "learning_rate": 1.174356835873306e-06, "loss": 0.7922521233558655, "step": 4098 }, { "epoch": 0.9444700460829493, "grad_norm": 0.9974151293119675, "learning_rate": 1.1739816751062404e-06, "loss": 0.6501933336257935, "step": 4099 }, { "epoch": 0.9447004608294931, "grad_norm": 0.9673699554437744, "learning_rate": 1.1736064890859654e-06, "loss": 0.6743361353874207, "step": 4100 }, { "epoch": 0.9449308755760368, "grad_norm": 1.0381670362595088, "learning_rate": 1.173231277866938e-06, "loss": 0.920632004737854, "step": 4101 }, { "epoch": 0.9451612903225807, "grad_norm": 0.872889135902432, "learning_rate": 1.1728560415036199e-06, "loss": 0.7498964071273804, "step": 4102 }, { "epoch": 0.9453917050691244, "grad_norm": 0.8444235514312883, "learning_rate": 1.1724807800504765e-06, "loss": 0.7665064334869385, "step": 4103 }, { "epoch": 0.9456221198156682, "grad_norm": 0.8729439782855682, "learning_rate": 1.172105493561976e-06, "loss": 0.75946044921875, "step": 4104 }, { "epoch": 0.945852534562212, "grad_norm": 1.016811663523364, "learning_rate": 1.1717301820925908e-06, "loss": 0.7701961398124695, "step": 4105 }, { "epoch": 0.9460829493087558, "grad_norm": 0.9708618505769702, "learning_rate": 1.1713548456967974e-06, "loss": 0.7775348424911499, "step": 4106 }, { "epoch": 0.9463133640552995, "grad_norm": 0.8519325609053343, "learning_rate": 1.1709794844290745e-06, "loss": 0.8149436712265015, "step": 4107 }, { "epoch": 0.9465437788018434, "grad_norm": 0.8519085263981432, "learning_rate": 1.170604098343906e-06, "loss": 0.7136009335517883, "step": 4108 }, { "epoch": 0.9467741935483871, "grad_norm": 1.2048256186284507, "learning_rate": 1.1702286874957786e-06, "loss": 0.7678873538970947, "step": 4109 }, { "epoch": 0.9470046082949308, "grad_norm": 0.9842223659547223, "learning_rate": 1.1698532519391827e-06, "loss": 0.7506710290908813, "step": 4110 }, { "epoch": 0.9472350230414747, "grad_norm": 0.900893049038478, "learning_rate": 1.1694777917286118e-06, "loss": 0.6646897792816162, "step": 4111 }, { "epoch": 0.9474654377880184, "grad_norm": 1.3857066059132386, "learning_rate": 1.1691023069185639e-06, "loss": 0.820647120475769, "step": 4112 }, { "epoch": 0.9476958525345622, "grad_norm": 0.9795728799566645, "learning_rate": 1.1687267975635402e-06, "loss": 0.872378408908844, "step": 4113 }, { "epoch": 0.947926267281106, "grad_norm": 1.0760361173899362, "learning_rate": 1.168351263718045e-06, "loss": 0.7920655608177185, "step": 4114 }, { "epoch": 0.9481566820276498, "grad_norm": 1.1709025489256302, "learning_rate": 1.1679757054365866e-06, "loss": 0.6593836545944214, "step": 4115 }, { "epoch": 0.9483870967741935, "grad_norm": 1.0965626572699905, "learning_rate": 1.1676001227736772e-06, "loss": 0.7473627328872681, "step": 4116 }, { "epoch": 0.9486175115207374, "grad_norm": 1.2027339281506744, "learning_rate": 1.1672245157838317e-06, "loss": 0.8001665472984314, "step": 4117 }, { "epoch": 0.9488479262672811, "grad_norm": 0.9543944768909415, "learning_rate": 1.1668488845215689e-06, "loss": 0.7342571020126343, "step": 4118 }, { "epoch": 0.9490783410138249, "grad_norm": 1.2428163281726954, "learning_rate": 1.1664732290414118e-06, "loss": 0.7616822719573975, "step": 4119 }, { "epoch": 0.9493087557603687, "grad_norm": 1.2486031522636918, "learning_rate": 1.1660975493978857e-06, "loss": 0.8885634541511536, "step": 4120 }, { "epoch": 0.9495391705069124, "grad_norm": 1.1323168185847523, "learning_rate": 1.1657218456455205e-06, "loss": 0.7816281318664551, "step": 4121 }, { "epoch": 0.9497695852534562, "grad_norm": 0.9570364600334796, "learning_rate": 1.1653461178388485e-06, "loss": 0.7412079572677612, "step": 4122 }, { "epoch": 0.95, "grad_norm": 0.957883425985998, "learning_rate": 1.1649703660324064e-06, "loss": 0.8096172213554382, "step": 4123 }, { "epoch": 0.9502304147465438, "grad_norm": 1.0359903594582591, "learning_rate": 1.164594590280734e-06, "loss": 0.6690856218338013, "step": 4124 }, { "epoch": 0.9504608294930875, "grad_norm": 0.9697541149080181, "learning_rate": 1.1642187906383746e-06, "loss": 0.7509289979934692, "step": 4125 }, { "epoch": 0.9506912442396314, "grad_norm": 0.8506285939807987, "learning_rate": 1.1638429671598754e-06, "loss": 0.6643730401992798, "step": 4126 }, { "epoch": 0.9509216589861751, "grad_norm": 0.994475544194171, "learning_rate": 1.1634671198997864e-06, "loss": 0.8100850582122803, "step": 4127 }, { "epoch": 0.9511520737327189, "grad_norm": 1.392121351288023, "learning_rate": 1.1630912489126612e-06, "loss": 0.919742226600647, "step": 4128 }, { "epoch": 0.9513824884792627, "grad_norm": 1.144319413666889, "learning_rate": 1.1627153542530571e-06, "loss": 0.8953771591186523, "step": 4129 }, { "epoch": 0.9516129032258065, "grad_norm": 0.9663802093818391, "learning_rate": 1.162339435975535e-06, "loss": 0.7401770949363708, "step": 4130 }, { "epoch": 0.9518433179723502, "grad_norm": 1.0071840947097435, "learning_rate": 1.1619634941346585e-06, "loss": 0.7618032097816467, "step": 4131 }, { "epoch": 0.9520737327188941, "grad_norm": 1.3156218418351784, "learning_rate": 1.1615875287849955e-06, "loss": 0.9134000539779663, "step": 4132 }, { "epoch": 0.9523041474654378, "grad_norm": 0.9617492928251477, "learning_rate": 1.1612115399811162e-06, "loss": 0.7555145025253296, "step": 4133 }, { "epoch": 0.9525345622119815, "grad_norm": 0.9434517704683025, "learning_rate": 1.1608355277775955e-06, "loss": 0.9125050902366638, "step": 4134 }, { "epoch": 0.9527649769585254, "grad_norm": 0.9082549396493419, "learning_rate": 1.1604594922290106e-06, "loss": 0.6575542688369751, "step": 4135 }, { "epoch": 0.9529953917050691, "grad_norm": 1.0750997369204898, "learning_rate": 1.1600834333899431e-06, "loss": 0.7530527114868164, "step": 4136 }, { "epoch": 0.9532258064516129, "grad_norm": 0.9603596342147773, "learning_rate": 1.159707351314977e-06, "loss": 0.8818701505661011, "step": 4137 }, { "epoch": 0.9534562211981567, "grad_norm": 0.9491169409805379, "learning_rate": 1.1593312460587003e-06, "loss": 0.7172919511795044, "step": 4138 }, { "epoch": 0.9536866359447005, "grad_norm": 1.1122266085503043, "learning_rate": 1.1589551176757044e-06, "loss": 0.8701400756835938, "step": 4139 }, { "epoch": 0.9539170506912442, "grad_norm": 1.3285866575691943, "learning_rate": 1.1585789662205834e-06, "loss": 0.867475152015686, "step": 4140 }, { "epoch": 0.9541474654377881, "grad_norm": 1.1851362026267, "learning_rate": 1.1582027917479356e-06, "loss": 0.7809052467346191, "step": 4141 }, { "epoch": 0.9543778801843318, "grad_norm": 1.1986202884801196, "learning_rate": 1.1578265943123619e-06, "loss": 0.8589099645614624, "step": 4142 }, { "epoch": 0.9546082949308756, "grad_norm": 0.893566517908755, "learning_rate": 1.157450373968467e-06, "loss": 0.7826642394065857, "step": 4143 }, { "epoch": 0.9548387096774194, "grad_norm": 1.3652425128856092, "learning_rate": 1.1570741307708585e-06, "loss": 0.9550029635429382, "step": 4144 }, { "epoch": 0.9550691244239631, "grad_norm": 1.0826442844044148, "learning_rate": 1.1566978647741478e-06, "loss": 0.8607431650161743, "step": 4145 }, { "epoch": 0.9552995391705069, "grad_norm": 0.8247649155112424, "learning_rate": 1.15632157603295e-06, "loss": 0.7350449562072754, "step": 4146 }, { "epoch": 0.9555299539170506, "grad_norm": 1.033301557916291, "learning_rate": 1.1559452646018818e-06, "loss": 0.853142261505127, "step": 4147 }, { "epoch": 0.9557603686635945, "grad_norm": 1.0495554531445934, "learning_rate": 1.1555689305355651e-06, "loss": 0.7137192487716675, "step": 4148 }, { "epoch": 0.9559907834101382, "grad_norm": 1.158813208265862, "learning_rate": 1.1551925738886244e-06, "loss": 0.9007513523101807, "step": 4149 }, { "epoch": 0.956221198156682, "grad_norm": 1.1071306366128357, "learning_rate": 1.1548161947156867e-06, "loss": 0.8499083518981934, "step": 4150 }, { "epoch": 0.9564516129032258, "grad_norm": 0.874419574252059, "learning_rate": 1.1544397930713836e-06, "loss": 0.8068628311157227, "step": 4151 }, { "epoch": 0.9566820276497696, "grad_norm": 1.1729788609256337, "learning_rate": 1.1540633690103487e-06, "loss": 0.8357307314872742, "step": 4152 }, { "epoch": 0.9569124423963133, "grad_norm": 1.262397502444813, "learning_rate": 1.1536869225872198e-06, "loss": 0.7650378942489624, "step": 4153 }, { "epoch": 0.9571428571428572, "grad_norm": 0.9933463317010283, "learning_rate": 1.1533104538566376e-06, "loss": 0.8717354536056519, "step": 4154 }, { "epoch": 0.9573732718894009, "grad_norm": 0.9807638290234347, "learning_rate": 1.152933962873246e-06, "loss": 0.6314762830734253, "step": 4155 }, { "epoch": 0.9576036866359448, "grad_norm": 1.1279705073097503, "learning_rate": 1.152557449691692e-06, "loss": 0.8949059844017029, "step": 4156 }, { "epoch": 0.9578341013824885, "grad_norm": 1.137203803563717, "learning_rate": 1.1521809143666261e-06, "loss": 0.7862699031829834, "step": 4157 }, { "epoch": 0.9580645161290322, "grad_norm": 0.8970512868442762, "learning_rate": 1.151804356952702e-06, "loss": 0.7954641580581665, "step": 4158 }, { "epoch": 0.958294930875576, "grad_norm": 1.0478069911824797, "learning_rate": 1.1514277775045766e-06, "loss": 0.7654163241386414, "step": 4159 }, { "epoch": 0.9585253456221198, "grad_norm": 1.0321973050954667, "learning_rate": 1.1510511760769097e-06, "loss": 0.7050681114196777, "step": 4160 }, { "epoch": 0.9587557603686636, "grad_norm": 1.0667493196933242, "learning_rate": 1.1506745527243646e-06, "loss": 0.8646515607833862, "step": 4161 }, { "epoch": 0.9589861751152073, "grad_norm": 0.9392654190881413, "learning_rate": 1.1502979075016078e-06, "loss": 0.7427883148193359, "step": 4162 }, { "epoch": 0.9592165898617512, "grad_norm": 1.2506151155745373, "learning_rate": 1.1499212404633083e-06, "loss": 0.7800190448760986, "step": 4163 }, { "epoch": 0.9594470046082949, "grad_norm": 1.0487739651932841, "learning_rate": 1.1495445516641394e-06, "loss": 0.789481520652771, "step": 4164 }, { "epoch": 0.9596774193548387, "grad_norm": 0.8332785453272284, "learning_rate": 1.1491678411587768e-06, "loss": 0.7975008487701416, "step": 4165 }, { "epoch": 0.9599078341013825, "grad_norm": 0.9306560917040928, "learning_rate": 1.1487911090018994e-06, "loss": 0.7964596748352051, "step": 4166 }, { "epoch": 0.9601382488479263, "grad_norm": 0.8915843631095149, "learning_rate": 1.1484143552481895e-06, "loss": 0.7008803486824036, "step": 4167 }, { "epoch": 0.96036866359447, "grad_norm": 0.888889684402262, "learning_rate": 1.1480375799523328e-06, "loss": 0.708189070224762, "step": 4168 }, { "epoch": 0.9605990783410139, "grad_norm": 1.1069917813185677, "learning_rate": 1.1476607831690167e-06, "loss": 0.8207682371139526, "step": 4169 }, { "epoch": 0.9608294930875576, "grad_norm": 1.200280235865814, "learning_rate": 1.1472839649529337e-06, "loss": 0.7682942152023315, "step": 4170 }, { "epoch": 0.9610599078341013, "grad_norm": 1.0122999990692296, "learning_rate": 1.1469071253587785e-06, "loss": 0.8435598611831665, "step": 4171 }, { "epoch": 0.9612903225806452, "grad_norm": 0.79536207500534, "learning_rate": 1.1465302644412483e-06, "loss": 0.7516113519668579, "step": 4172 }, { "epoch": 0.9615207373271889, "grad_norm": 0.881539477347835, "learning_rate": 1.1461533822550442e-06, "loss": 0.7125411629676819, "step": 4173 }, { "epoch": 0.9617511520737327, "grad_norm": 0.9108745928942158, "learning_rate": 1.14577647885487e-06, "loss": 0.7560747861862183, "step": 4174 }, { "epoch": 0.9619815668202765, "grad_norm": 0.9027443230900505, "learning_rate": 1.1453995542954332e-06, "loss": 0.6702673435211182, "step": 4175 }, { "epoch": 0.9622119815668203, "grad_norm": 1.1520258504461998, "learning_rate": 1.1450226086314433e-06, "loss": 0.8083088397979736, "step": 4176 }, { "epoch": 0.962442396313364, "grad_norm": 0.9906259449003554, "learning_rate": 1.1446456419176135e-06, "loss": 0.7579925060272217, "step": 4177 }, { "epoch": 0.9626728110599079, "grad_norm": 0.9460352601625827, "learning_rate": 1.1442686542086609e-06, "loss": 0.713416576385498, "step": 4178 }, { "epoch": 0.9629032258064516, "grad_norm": 1.1770844867552515, "learning_rate": 1.1438916455593035e-06, "loss": 0.7767639756202698, "step": 4179 }, { "epoch": 0.9631336405529954, "grad_norm": 1.0244180953454374, "learning_rate": 1.1435146160242645e-06, "loss": 0.7493964433670044, "step": 4180 }, { "epoch": 0.9633640552995392, "grad_norm": 1.1249907720020325, "learning_rate": 1.1431375656582692e-06, "loss": 0.8789365291595459, "step": 4181 }, { "epoch": 0.9635944700460829, "grad_norm": 1.177047767616621, "learning_rate": 1.1427604945160457e-06, "loss": 0.7750524878501892, "step": 4182 }, { "epoch": 0.9638248847926267, "grad_norm": 1.1195166665130392, "learning_rate": 1.142383402652325e-06, "loss": 0.9330715537071228, "step": 4183 }, { "epoch": 0.9640552995391705, "grad_norm": 0.933339002257347, "learning_rate": 1.142006290121842e-06, "loss": 0.6845035552978516, "step": 4184 }, { "epoch": 0.9642857142857143, "grad_norm": 0.9794843601160967, "learning_rate": 1.1416291569793343e-06, "loss": 0.7295390963554382, "step": 4185 }, { "epoch": 0.964516129032258, "grad_norm": 1.0666753158619988, "learning_rate": 1.1412520032795419e-06, "loss": 0.6869080066680908, "step": 4186 }, { "epoch": 0.9647465437788019, "grad_norm": 1.506743316898968, "learning_rate": 1.140874829077208e-06, "loss": 1.0916842222213745, "step": 4187 }, { "epoch": 0.9649769585253456, "grad_norm": 1.0539994363877199, "learning_rate": 1.1404976344270793e-06, "loss": 0.7487984299659729, "step": 4188 }, { "epoch": 0.9652073732718894, "grad_norm": 1.024674697115665, "learning_rate": 1.140120419383905e-06, "loss": 0.8852604627609253, "step": 4189 }, { "epoch": 0.9654377880184332, "grad_norm": 1.065174441144157, "learning_rate": 1.139743184002437e-06, "loss": 0.7384698987007141, "step": 4190 }, { "epoch": 0.965668202764977, "grad_norm": 1.2009691028192717, "learning_rate": 1.1393659283374312e-06, "loss": 0.8033223152160645, "step": 4191 }, { "epoch": 0.9658986175115207, "grad_norm": 1.2698866658546557, "learning_rate": 1.1389886524436453e-06, "loss": 0.8870355486869812, "step": 4192 }, { "epoch": 0.9661290322580646, "grad_norm": 1.1198376045036553, "learning_rate": 1.1386113563758405e-06, "loss": 0.869537353515625, "step": 4193 }, { "epoch": 0.9663594470046083, "grad_norm": 1.027781409519754, "learning_rate": 1.1382340401887808e-06, "loss": 0.8564068675041199, "step": 4194 }, { "epoch": 0.966589861751152, "grad_norm": 0.9894593103049535, "learning_rate": 1.1378567039372332e-06, "loss": 0.7988623380661011, "step": 4195 }, { "epoch": 0.9668202764976959, "grad_norm": 1.0843651981255995, "learning_rate": 1.1374793476759673e-06, "loss": 0.9405556917190552, "step": 4196 }, { "epoch": 0.9670506912442396, "grad_norm": 0.8756334921680484, "learning_rate": 1.137101971459756e-06, "loss": 0.6757407188415527, "step": 4197 }, { "epoch": 0.9672811059907834, "grad_norm": 1.1855730012050456, "learning_rate": 1.1367245753433757e-06, "loss": 0.7521541118621826, "step": 4198 }, { "epoch": 0.9675115207373272, "grad_norm": 1.0137943151941313, "learning_rate": 1.1363471593816037e-06, "loss": 0.7306162714958191, "step": 4199 }, { "epoch": 0.967741935483871, "grad_norm": 0.8912209844157076, "learning_rate": 1.135969723629222e-06, "loss": 0.6884766817092896, "step": 4200 }, { "epoch": 0.9679723502304147, "grad_norm": 1.2084507323846643, "learning_rate": 1.1355922681410152e-06, "loss": 0.8420373201370239, "step": 4201 }, { "epoch": 0.9682027649769586, "grad_norm": 0.7638761509020496, "learning_rate": 1.1352147929717704e-06, "loss": 0.7252322435379028, "step": 4202 }, { "epoch": 0.9684331797235023, "grad_norm": 0.9448982669089191, "learning_rate": 1.134837298176277e-06, "loss": 0.6375538110733032, "step": 4203 }, { "epoch": 0.9686635944700461, "grad_norm": 1.0629192948024473, "learning_rate": 1.1344597838093283e-06, "loss": 0.713671863079071, "step": 4204 }, { "epoch": 0.9688940092165899, "grad_norm": 1.0319385361068514, "learning_rate": 1.1340822499257201e-06, "loss": 0.8591479063034058, "step": 4205 }, { "epoch": 0.9691244239631336, "grad_norm": 1.0671754327237228, "learning_rate": 1.1337046965802505e-06, "loss": 0.7638808488845825, "step": 4206 }, { "epoch": 0.9693548387096774, "grad_norm": 1.1032489557963816, "learning_rate": 1.1333271238277215e-06, "loss": 0.8133253455162048, "step": 4207 }, { "epoch": 0.9695852534562212, "grad_norm": 0.9621754998556686, "learning_rate": 1.132949531722937e-06, "loss": 0.6938756704330444, "step": 4208 }, { "epoch": 0.969815668202765, "grad_norm": 1.171557608199449, "learning_rate": 1.132571920320704e-06, "loss": 0.793639063835144, "step": 4209 }, { "epoch": 0.9700460829493087, "grad_norm": 1.066219056403929, "learning_rate": 1.132194289675832e-06, "loss": 0.7188536524772644, "step": 4210 }, { "epoch": 0.9702764976958526, "grad_norm": 1.2873690827507545, "learning_rate": 1.1318166398431343e-06, "loss": 0.8076587319374084, "step": 4211 }, { "epoch": 0.9705069124423963, "grad_norm": 1.2434961707112964, "learning_rate": 1.1314389708774258e-06, "loss": 0.8390023708343506, "step": 4212 }, { "epoch": 0.9707373271889401, "grad_norm": 1.2800250293744322, "learning_rate": 1.1310612828335243e-06, "loss": 0.8395706415176392, "step": 4213 }, { "epoch": 0.9709677419354839, "grad_norm": 1.1156221851257155, "learning_rate": 1.1306835757662515e-06, "loss": 0.9672995805740356, "step": 4214 }, { "epoch": 0.9711981566820277, "grad_norm": 1.1859433022618981, "learning_rate": 1.1303058497304303e-06, "loss": 0.7716202735900879, "step": 4215 }, { "epoch": 0.9714285714285714, "grad_norm": 0.9257750691433206, "learning_rate": 1.1299281047808876e-06, "loss": 0.6318329572677612, "step": 4216 }, { "epoch": 0.9716589861751153, "grad_norm": 1.1802189065520408, "learning_rate": 1.1295503409724525e-06, "loss": 0.8287553787231445, "step": 4217 }, { "epoch": 0.971889400921659, "grad_norm": 0.835147088990129, "learning_rate": 1.129172558359957e-06, "loss": 0.6903107762336731, "step": 4218 }, { "epoch": 0.9721198156682027, "grad_norm": 0.9693907793654548, "learning_rate": 1.1287947569982355e-06, "loss": 0.684443473815918, "step": 4219 }, { "epoch": 0.9723502304147466, "grad_norm": 1.2152908203730401, "learning_rate": 1.1284169369421254e-06, "loss": 0.8566167950630188, "step": 4220 }, { "epoch": 0.9725806451612903, "grad_norm": 1.0787740661687364, "learning_rate": 1.1280390982464673e-06, "loss": 0.8103536367416382, "step": 4221 }, { "epoch": 0.9728110599078341, "grad_norm": 1.115333195517037, "learning_rate": 1.1276612409661036e-06, "loss": 0.8027071356773376, "step": 4222 }, { "epoch": 0.9730414746543778, "grad_norm": 1.1442493875477038, "learning_rate": 1.1272833651558796e-06, "loss": 0.8251115679740906, "step": 4223 }, { "epoch": 0.9732718894009217, "grad_norm": 1.1151561398542829, "learning_rate": 1.1269054708706437e-06, "loss": 0.6468047499656677, "step": 4224 }, { "epoch": 0.9735023041474654, "grad_norm": 1.129830296326307, "learning_rate": 1.1265275581652465e-06, "loss": 0.8085706233978271, "step": 4225 }, { "epoch": 0.9737327188940093, "grad_norm": 1.139574441171448, "learning_rate": 1.1261496270945418e-06, "loss": 0.8396503925323486, "step": 4226 }, { "epoch": 0.973963133640553, "grad_norm": 0.9978900351940978, "learning_rate": 1.1257716777133861e-06, "loss": 0.7860006093978882, "step": 4227 }, { "epoch": 0.9741935483870968, "grad_norm": 1.1484873689809545, "learning_rate": 1.1253937100766373e-06, "loss": 0.8630701303482056, "step": 4228 }, { "epoch": 0.9744239631336405, "grad_norm": 0.9488769562872501, "learning_rate": 1.1250157242391577e-06, "loss": 0.8363114595413208, "step": 4229 }, { "epoch": 0.9746543778801844, "grad_norm": 1.1415512207130691, "learning_rate": 1.1246377202558114e-06, "loss": 0.7837141156196594, "step": 4230 }, { "epoch": 0.9748847926267281, "grad_norm": 1.3474534084840375, "learning_rate": 1.1242596981814648e-06, "loss": 0.8283151984214783, "step": 4231 }, { "epoch": 0.9751152073732718, "grad_norm": 1.2728043293758005, "learning_rate": 1.1238816580709878e-06, "loss": 0.9232061505317688, "step": 4232 }, { "epoch": 0.9753456221198157, "grad_norm": 1.125514954365521, "learning_rate": 1.123503599979252e-06, "loss": 0.8721164464950562, "step": 4233 }, { "epoch": 0.9755760368663594, "grad_norm": 1.0382014546922784, "learning_rate": 1.1231255239611321e-06, "loss": 0.9398131370544434, "step": 4234 }, { "epoch": 0.9758064516129032, "grad_norm": 1.0916134182788353, "learning_rate": 1.1227474300715054e-06, "loss": 0.8124324083328247, "step": 4235 }, { "epoch": 0.976036866359447, "grad_norm": 0.8607187401974831, "learning_rate": 1.1223693183652515e-06, "loss": 0.8532534837722778, "step": 4236 }, { "epoch": 0.9762672811059908, "grad_norm": 1.10871517745179, "learning_rate": 1.1219911888972536e-06, "loss": 0.7547662258148193, "step": 4237 }, { "epoch": 0.9764976958525345, "grad_norm": 1.036940513326952, "learning_rate": 1.1216130417223956e-06, "loss": 0.7407231330871582, "step": 4238 }, { "epoch": 0.9767281105990784, "grad_norm": 1.0573090435680337, "learning_rate": 1.1212348768955657e-06, "loss": 0.8190197944641113, "step": 4239 }, { "epoch": 0.9769585253456221, "grad_norm": 1.111465926757279, "learning_rate": 1.1208566944716542e-06, "loss": 0.6641337871551514, "step": 4240 }, { "epoch": 0.977188940092166, "grad_norm": 1.224342353107687, "learning_rate": 1.120478494505553e-06, "loss": 0.8953202962875366, "step": 4241 }, { "epoch": 0.9774193548387097, "grad_norm": 0.9676272600083323, "learning_rate": 1.1201002770521583e-06, "loss": 0.7803191542625427, "step": 4242 }, { "epoch": 0.9776497695852534, "grad_norm": 1.1107043139306134, "learning_rate": 1.1197220421663674e-06, "loss": 0.6827100515365601, "step": 4243 }, { "epoch": 0.9778801843317972, "grad_norm": 1.2085442462659117, "learning_rate": 1.1193437899030802e-06, "loss": 0.8513565063476562, "step": 4244 }, { "epoch": 0.978110599078341, "grad_norm": 0.9785496460004156, "learning_rate": 1.1189655203172e-06, "loss": 0.7196829915046692, "step": 4245 }, { "epoch": 0.9783410138248848, "grad_norm": 1.0764048064511267, "learning_rate": 1.1185872334636319e-06, "loss": 0.7823485136032104, "step": 4246 }, { "epoch": 0.9785714285714285, "grad_norm": 1.0963006166840967, "learning_rate": 1.1182089293972841e-06, "loss": 0.7178136110305786, "step": 4247 }, { "epoch": 0.9788018433179724, "grad_norm": 1.0782886091125194, "learning_rate": 1.1178306081730664e-06, "loss": 0.7746715545654297, "step": 4248 }, { "epoch": 0.9790322580645161, "grad_norm": 0.9177757629071243, "learning_rate": 1.117452269845892e-06, "loss": 0.8829167485237122, "step": 4249 }, { "epoch": 0.9792626728110599, "grad_norm": 0.9096983569344097, "learning_rate": 1.1170739144706764e-06, "loss": 0.7592206001281738, "step": 4250 }, { "epoch": 0.9794930875576037, "grad_norm": 0.8361017174057647, "learning_rate": 1.1166955421023368e-06, "loss": 0.8107382655143738, "step": 4251 }, { "epoch": 0.9797235023041475, "grad_norm": 0.9837092835211146, "learning_rate": 1.116317152795794e-06, "loss": 0.6807001829147339, "step": 4252 }, { "epoch": 0.9799539170506912, "grad_norm": 1.1872199804636603, "learning_rate": 1.1159387466059705e-06, "loss": 0.7752517461776733, "step": 4253 }, { "epoch": 0.9801843317972351, "grad_norm": 0.8560133871531077, "learning_rate": 1.115560323587791e-06, "loss": 0.7484745383262634, "step": 4254 }, { "epoch": 0.9804147465437788, "grad_norm": 1.153488759551228, "learning_rate": 1.1151818837961838e-06, "loss": 0.877413809299469, "step": 4255 }, { "epoch": 0.9806451612903225, "grad_norm": 1.0087457568089837, "learning_rate": 1.1148034272860785e-06, "loss": 0.7806656360626221, "step": 4256 }, { "epoch": 0.9808755760368664, "grad_norm": 0.849135201735791, "learning_rate": 1.1144249541124078e-06, "loss": 0.6938076019287109, "step": 4257 }, { "epoch": 0.9811059907834101, "grad_norm": 1.0559339187336096, "learning_rate": 1.1140464643301064e-06, "loss": 0.8832957148551941, "step": 4258 }, { "epoch": 0.9813364055299539, "grad_norm": 1.1632523287766907, "learning_rate": 1.1136679579941117e-06, "loss": 0.7794016003608704, "step": 4259 }, { "epoch": 0.9815668202764977, "grad_norm": 0.9689102084269609, "learning_rate": 1.1132894351593636e-06, "loss": 0.6877585053443909, "step": 4260 }, { "epoch": 0.9817972350230415, "grad_norm": 1.0902109747190951, "learning_rate": 1.1129108958808037e-06, "loss": 0.8268473148345947, "step": 4261 }, { "epoch": 0.9820276497695852, "grad_norm": 1.0260596307079526, "learning_rate": 1.112532340213377e-06, "loss": 0.6717547178268433, "step": 4262 }, { "epoch": 0.9822580645161291, "grad_norm": 1.0646130416760407, "learning_rate": 1.11215376821203e-06, "loss": 0.849999725818634, "step": 4263 }, { "epoch": 0.9824884792626728, "grad_norm": 1.005034332417578, "learning_rate": 1.1117751799317118e-06, "loss": 0.6562552452087402, "step": 4264 }, { "epoch": 0.9827188940092166, "grad_norm": 1.0885536317886024, "learning_rate": 1.1113965754273743e-06, "loss": 0.7734784483909607, "step": 4265 }, { "epoch": 0.9829493087557604, "grad_norm": 1.0527283904271951, "learning_rate": 1.1110179547539717e-06, "loss": 0.7580564022064209, "step": 4266 }, { "epoch": 0.9831797235023041, "grad_norm": 1.121984331535499, "learning_rate": 1.1106393179664595e-06, "loss": 0.9207481145858765, "step": 4267 }, { "epoch": 0.9834101382488479, "grad_norm": 1.1182241685665208, "learning_rate": 1.1102606651197968e-06, "loss": 0.8987482786178589, "step": 4268 }, { "epoch": 0.9836405529953917, "grad_norm": 0.8558732255272679, "learning_rate": 1.1098819962689445e-06, "loss": 0.7486778497695923, "step": 4269 }, { "epoch": 0.9838709677419355, "grad_norm": 0.9905311956335509, "learning_rate": 1.1095033114688662e-06, "loss": 0.7387109994888306, "step": 4270 }, { "epoch": 0.9841013824884792, "grad_norm": 0.913366940312768, "learning_rate": 1.109124610774527e-06, "loss": 0.7337637543678284, "step": 4271 }, { "epoch": 0.9843317972350231, "grad_norm": 1.1127819698251733, "learning_rate": 1.1087458942408952e-06, "loss": 0.7419463396072388, "step": 4272 }, { "epoch": 0.9845622119815668, "grad_norm": 1.0024132905496845, "learning_rate": 1.1083671619229407e-06, "loss": 0.7525068521499634, "step": 4273 }, { "epoch": 0.9847926267281106, "grad_norm": 1.2794306882440036, "learning_rate": 1.107988413875636e-06, "loss": 0.8593931198120117, "step": 4274 }, { "epoch": 0.9850230414746544, "grad_norm": 1.1058497522784536, "learning_rate": 1.107609650153956e-06, "loss": 0.9123519659042358, "step": 4275 }, { "epoch": 0.9852534562211982, "grad_norm": 1.0134863035075283, "learning_rate": 1.107230870812878e-06, "loss": 0.7099615335464478, "step": 4276 }, { "epoch": 0.9854838709677419, "grad_norm": 1.0305482113277953, "learning_rate": 1.1068520759073807e-06, "loss": 0.9525141716003418, "step": 4277 }, { "epoch": 0.9857142857142858, "grad_norm": 1.078520213597711, "learning_rate": 1.106473265492446e-06, "loss": 0.8360154628753662, "step": 4278 }, { "epoch": 0.9859447004608295, "grad_norm": 0.835665323629814, "learning_rate": 1.106094439623058e-06, "loss": 0.7788960933685303, "step": 4279 }, { "epoch": 0.9861751152073732, "grad_norm": 1.4332707697001132, "learning_rate": 1.1057155983542024e-06, "loss": 0.76897132396698, "step": 4280 }, { "epoch": 0.9864055299539171, "grad_norm": 1.2788839563876278, "learning_rate": 1.1053367417408678e-06, "loss": 0.8062764406204224, "step": 4281 }, { "epoch": 0.9866359447004608, "grad_norm": 1.0759322336892816, "learning_rate": 1.1049578698380446e-06, "loss": 0.6796555519104004, "step": 4282 }, { "epoch": 0.9868663594470046, "grad_norm": 1.2156156083740777, "learning_rate": 1.1045789827007256e-06, "loss": 0.8495693206787109, "step": 4283 }, { "epoch": 0.9870967741935484, "grad_norm": 1.1065961656311563, "learning_rate": 1.1042000803839054e-06, "loss": 0.9202588200569153, "step": 4284 }, { "epoch": 0.9873271889400922, "grad_norm": 1.0492103887070696, "learning_rate": 1.1038211629425815e-06, "loss": 0.8204039335250854, "step": 4285 }, { "epoch": 0.9875576036866359, "grad_norm": 1.3424135227199923, "learning_rate": 1.1034422304317534e-06, "loss": 0.921082615852356, "step": 4286 }, { "epoch": 0.9877880184331798, "grad_norm": 1.1158968493314756, "learning_rate": 1.1030632829064225e-06, "loss": 0.8114739656448364, "step": 4287 }, { "epoch": 0.9880184331797235, "grad_norm": 1.160400130956272, "learning_rate": 1.1026843204215924e-06, "loss": 0.7394933700561523, "step": 4288 }, { "epoch": 0.9882488479262673, "grad_norm": 1.102093260654992, "learning_rate": 1.1023053430322692e-06, "loss": 0.9515210390090942, "step": 4289 }, { "epoch": 0.988479262672811, "grad_norm": 1.0914130901392678, "learning_rate": 1.1019263507934611e-06, "loss": 0.6729186773300171, "step": 4290 }, { "epoch": 0.9887096774193549, "grad_norm": 0.9547635126100301, "learning_rate": 1.1015473437601776e-06, "loss": 0.6455283164978027, "step": 4291 }, { "epoch": 0.9889400921658986, "grad_norm": 1.1259220869244864, "learning_rate": 1.1011683219874322e-06, "loss": 0.8071424961090088, "step": 4292 }, { "epoch": 0.9891705069124423, "grad_norm": 0.8980294635582122, "learning_rate": 1.1007892855302385e-06, "loss": 0.7287160754203796, "step": 4293 }, { "epoch": 0.9894009216589862, "grad_norm": 0.956104694967055, "learning_rate": 1.1004102344436135e-06, "loss": 0.7916513681411743, "step": 4294 }, { "epoch": 0.9896313364055299, "grad_norm": 0.948939194234829, "learning_rate": 1.1000311687825757e-06, "loss": 0.8075610399246216, "step": 4295 }, { "epoch": 0.9898617511520738, "grad_norm": 0.8467724433306772, "learning_rate": 1.0996520886021465e-06, "loss": 0.6144437193870544, "step": 4296 }, { "epoch": 0.9900921658986175, "grad_norm": 1.1816936561057356, "learning_rate": 1.0992729939573482e-06, "loss": 0.830337643623352, "step": 4297 }, { "epoch": 0.9903225806451613, "grad_norm": 1.1631921516982922, "learning_rate": 1.0988938849032063e-06, "loss": 0.7104393243789673, "step": 4298 }, { "epoch": 0.990552995391705, "grad_norm": 1.0166827801425276, "learning_rate": 1.0985147614947484e-06, "loss": 0.746238112449646, "step": 4299 }, { "epoch": 0.9907834101382489, "grad_norm": 0.8744941548736713, "learning_rate": 1.0981356237870027e-06, "loss": 0.7309597730636597, "step": 4300 }, { "epoch": 0.9910138248847926, "grad_norm": 1.1787483382236952, "learning_rate": 1.0977564718350013e-06, "loss": 0.799136757850647, "step": 4301 }, { "epoch": 0.9912442396313365, "grad_norm": 1.146252036070138, "learning_rate": 1.0973773056937776e-06, "loss": 0.7477747201919556, "step": 4302 }, { "epoch": 0.9914746543778802, "grad_norm": 1.1466743668258872, "learning_rate": 1.0969981254183668e-06, "loss": 0.8051053285598755, "step": 4303 }, { "epoch": 0.9917050691244239, "grad_norm": 0.9910519080633017, "learning_rate": 1.0966189310638063e-06, "loss": 0.8023163080215454, "step": 4304 }, { "epoch": 0.9919354838709677, "grad_norm": 0.9483313078672773, "learning_rate": 1.096239722685136e-06, "loss": 0.6804348230361938, "step": 4305 }, { "epoch": 0.9921658986175115, "grad_norm": 1.119857177527024, "learning_rate": 1.0958605003373976e-06, "loss": 0.8276509046554565, "step": 4306 }, { "epoch": 0.9923963133640553, "grad_norm": 1.2511674827094457, "learning_rate": 1.095481264075634e-06, "loss": 0.9733830690383911, "step": 4307 }, { "epoch": 0.992626728110599, "grad_norm": 1.070745120202566, "learning_rate": 1.0951020139548917e-06, "loss": 0.824803352355957, "step": 4308 }, { "epoch": 0.9928571428571429, "grad_norm": 1.100108017822232, "learning_rate": 1.094722750030218e-06, "loss": 0.8144090175628662, "step": 4309 }, { "epoch": 0.9930875576036866, "grad_norm": 1.1329325704330306, "learning_rate": 1.0943434723566623e-06, "loss": 0.8394016027450562, "step": 4310 }, { "epoch": 0.9933179723502304, "grad_norm": 1.0464489724076296, "learning_rate": 1.0939641809892766e-06, "loss": 0.7688177824020386, "step": 4311 }, { "epoch": 0.9935483870967742, "grad_norm": 1.0599291427198123, "learning_rate": 1.0935848759831144e-06, "loss": 0.8157391548156738, "step": 4312 }, { "epoch": 0.993778801843318, "grad_norm": 1.0072726544693649, "learning_rate": 1.0932055573932316e-06, "loss": 0.7618423700332642, "step": 4313 }, { "epoch": 0.9940092165898617, "grad_norm": 0.8996295977906229, "learning_rate": 1.0928262252746848e-06, "loss": 0.7404567003250122, "step": 4314 }, { "epoch": 0.9942396313364056, "grad_norm": 0.8729845318677907, "learning_rate": 1.092446879682535e-06, "loss": 0.6825613975524902, "step": 4315 }, { "epoch": 0.9944700460829493, "grad_norm": 0.886318283085954, "learning_rate": 1.0920675206718428e-06, "loss": 0.6607732772827148, "step": 4316 }, { "epoch": 0.994700460829493, "grad_norm": 1.1703494407740602, "learning_rate": 1.0916881482976716e-06, "loss": 0.715195894241333, "step": 4317 }, { "epoch": 0.9949308755760369, "grad_norm": 1.0266525014281969, "learning_rate": 1.0913087626150872e-06, "loss": 0.7593914270401001, "step": 4318 }, { "epoch": 0.9951612903225806, "grad_norm": 0.9546142286310197, "learning_rate": 1.090929363679157e-06, "loss": 0.8368399143218994, "step": 4319 }, { "epoch": 0.9953917050691244, "grad_norm": 1.0080836713071024, "learning_rate": 1.0905499515449499e-06, "loss": 0.7799170613288879, "step": 4320 }, { "epoch": 0.9956221198156682, "grad_norm": 1.0450181436512773, "learning_rate": 1.0901705262675372e-06, "loss": 0.8194636702537537, "step": 4321 }, { "epoch": 0.995852534562212, "grad_norm": 0.7482572391575254, "learning_rate": 1.0897910879019917e-06, "loss": 0.7150344848632812, "step": 4322 }, { "epoch": 0.9960829493087557, "grad_norm": 1.0624528328831144, "learning_rate": 1.089411636503389e-06, "loss": 0.737568736076355, "step": 4323 }, { "epoch": 0.9963133640552996, "grad_norm": 0.9578129661977193, "learning_rate": 1.0890321721268056e-06, "loss": 0.7037359476089478, "step": 4324 }, { "epoch": 0.9965437788018433, "grad_norm": 1.1660806477651886, "learning_rate": 1.0886526948273206e-06, "loss": 0.7664542198181152, "step": 4325 }, { "epoch": 0.9967741935483871, "grad_norm": 1.1927624722703807, "learning_rate": 1.0882732046600138e-06, "loss": 0.7700943946838379, "step": 4326 }, { "epoch": 0.9970046082949309, "grad_norm": 0.9828460552540413, "learning_rate": 1.0878937016799683e-06, "loss": 0.7634885311126709, "step": 4327 }, { "epoch": 0.9972350230414746, "grad_norm": 0.9138031795649807, "learning_rate": 1.0875141859422685e-06, "loss": 0.6784960031509399, "step": 4328 }, { "epoch": 0.9974654377880184, "grad_norm": 0.9227707667287056, "learning_rate": 1.0871346575020002e-06, "loss": 0.7224948406219482, "step": 4329 }, { "epoch": 0.9976958525345622, "grad_norm": 1.140456315375248, "learning_rate": 1.086755116414252e-06, "loss": 0.7886664867401123, "step": 4330 }, { "epoch": 0.997926267281106, "grad_norm": 0.8735584486255558, "learning_rate": 1.0863755627341133e-06, "loss": 0.7871295809745789, "step": 4331 }, { "epoch": 0.9981566820276497, "grad_norm": 0.9703663985745814, "learning_rate": 1.085995996516676e-06, "loss": 0.700717568397522, "step": 4332 }, { "epoch": 0.9983870967741936, "grad_norm": 1.0137806073331785, "learning_rate": 1.085616417817034e-06, "loss": 0.9090461730957031, "step": 4333 }, { "epoch": 0.9986175115207373, "grad_norm": 0.8161279565195018, "learning_rate": 1.0852368266902818e-06, "loss": 0.7697109580039978, "step": 4334 }, { "epoch": 0.9988479262672811, "grad_norm": 1.1335275167371797, "learning_rate": 1.0848572231915177e-06, "loss": 0.8135972023010254, "step": 4335 }, { "epoch": 0.9990783410138249, "grad_norm": 0.9620227504979613, "learning_rate": 1.0844776073758392e-06, "loss": 0.803811252117157, "step": 4336 }, { "epoch": 0.9993087557603687, "grad_norm": 1.1159399325844028, "learning_rate": 1.0840979792983482e-06, "loss": 0.874006986618042, "step": 4337 }, { "epoch": 0.9995391705069124, "grad_norm": 1.0695664725891423, "learning_rate": 1.0837183390141472e-06, "loss": 0.7424730062484741, "step": 4338 }, { "epoch": 0.9997695852534563, "grad_norm": 1.0413618177070603, "learning_rate": 1.0833386865783393e-06, "loss": 0.8219665884971619, "step": 4339 }, { "epoch": 1.0, "grad_norm": 1.2200287736254531, "learning_rate": 1.0829590220460319e-06, "loss": 0.7065195441246033, "step": 4340 }, { "epoch": 1.0002304147465437, "grad_norm": 1.4255251627812264, "learning_rate": 1.0825793454723324e-06, "loss": 0.7988346219062805, "step": 4341 }, { "epoch": 1.0004608294930875, "grad_norm": 0.9544404961531333, "learning_rate": 1.08219965691235e-06, "loss": 0.6731617450714111, "step": 4342 }, { "epoch": 1.0006912442396314, "grad_norm": 1.0713203032897287, "learning_rate": 1.0818199564211964e-06, "loss": 0.8058687448501587, "step": 4343 }, { "epoch": 1.0009216589861751, "grad_norm": 1.2330384736552804, "learning_rate": 1.081440244053984e-06, "loss": 0.8351448178291321, "step": 4344 }, { "epoch": 1.0011520737327189, "grad_norm": 0.9578484310628987, "learning_rate": 1.0810605198658286e-06, "loss": 0.8619185090065002, "step": 4345 }, { "epoch": 1.0013824884792626, "grad_norm": 1.030004028036847, "learning_rate": 1.0806807839118455e-06, "loss": 0.7600966691970825, "step": 4346 }, { "epoch": 1.0016129032258065, "grad_norm": 1.103182000242006, "learning_rate": 1.0803010362471536e-06, "loss": 0.8123422265052795, "step": 4347 }, { "epoch": 1.0018433179723503, "grad_norm": 1.0359331933938025, "learning_rate": 1.0799212769268727e-06, "loss": 0.8277603983879089, "step": 4348 }, { "epoch": 1.002073732718894, "grad_norm": 0.7466130076646643, "learning_rate": 1.079541506006124e-06, "loss": 0.6666774153709412, "step": 4349 }, { "epoch": 1.0023041474654377, "grad_norm": 1.0582236596847403, "learning_rate": 1.0791617235400313e-06, "loss": 0.8483254909515381, "step": 4350 }, { "epoch": 1.0025345622119817, "grad_norm": 0.9094409000603249, "learning_rate": 1.0787819295837193e-06, "loss": 0.6585661172866821, "step": 4351 }, { "epoch": 1.0027649769585254, "grad_norm": 1.0274936512349702, "learning_rate": 1.0784021241923142e-06, "loss": 0.7591124773025513, "step": 4352 }, { "epoch": 1.0029953917050691, "grad_norm": 1.0201165998262116, "learning_rate": 1.078022307420945e-06, "loss": 0.7305805683135986, "step": 4353 }, { "epoch": 1.0032258064516129, "grad_norm": 0.8894858318623733, "learning_rate": 1.0776424793247407e-06, "loss": 0.6558996438980103, "step": 4354 }, { "epoch": 1.0034562211981566, "grad_norm": 1.313034349644303, "learning_rate": 1.0772626399588336e-06, "loss": 0.6837360262870789, "step": 4355 }, { "epoch": 1.0036866359447005, "grad_norm": 0.9187212026563307, "learning_rate": 1.0768827893783562e-06, "loss": 0.778124988079071, "step": 4356 }, { "epoch": 1.0039170506912443, "grad_norm": 1.0828207561971888, "learning_rate": 1.0765029276384438e-06, "loss": 0.7676408886909485, "step": 4357 }, { "epoch": 1.004147465437788, "grad_norm": 1.1604376015370672, "learning_rate": 1.0761230547942333e-06, "loss": 0.854246973991394, "step": 4358 }, { "epoch": 1.0043778801843317, "grad_norm": 0.9177073619188721, "learning_rate": 1.0757431709008615e-06, "loss": 0.716766893863678, "step": 4359 }, { "epoch": 1.0046082949308757, "grad_norm": 0.9439720321299626, "learning_rate": 1.075363276013469e-06, "loss": 0.6827799081802368, "step": 4360 }, { "epoch": 1.0048387096774194, "grad_norm": 0.9539231430903122, "learning_rate": 1.074983370187197e-06, "loss": 0.7977348566055298, "step": 4361 }, { "epoch": 1.0050691244239631, "grad_norm": 1.1227456227969494, "learning_rate": 1.0746034534771878e-06, "loss": 0.6958035826683044, "step": 4362 }, { "epoch": 1.0052995391705069, "grad_norm": 0.9288361874867539, "learning_rate": 1.0742235259385861e-06, "loss": 0.8407979607582092, "step": 4363 }, { "epoch": 1.0055299539170508, "grad_norm": 0.8466973629768922, "learning_rate": 1.073843587626538e-06, "loss": 0.8180495500564575, "step": 4364 }, { "epoch": 1.0057603686635945, "grad_norm": 0.9973113541484702, "learning_rate": 1.0734636385961907e-06, "loss": 0.7551306486129761, "step": 4365 }, { "epoch": 1.0059907834101383, "grad_norm": 1.1054013447474482, "learning_rate": 1.0730836789026936e-06, "loss": 0.6598455309867859, "step": 4366 }, { "epoch": 1.006221198156682, "grad_norm": 0.9578758202335947, "learning_rate": 1.0727037086011971e-06, "loss": 0.9186126589775085, "step": 4367 }, { "epoch": 1.0064516129032257, "grad_norm": 1.0208878451508383, "learning_rate": 1.0723237277468538e-06, "loss": 0.8491259813308716, "step": 4368 }, { "epoch": 1.0066820276497697, "grad_norm": 1.0678483382751343, "learning_rate": 1.071943736394817e-06, "loss": 0.6938691139221191, "step": 4369 }, { "epoch": 1.0069124423963134, "grad_norm": 1.1084737690479445, "learning_rate": 1.0715637346002423e-06, "loss": 0.801313579082489, "step": 4370 }, { "epoch": 1.0071428571428571, "grad_norm": 0.983698557868892, "learning_rate": 1.071183722418286e-06, "loss": 0.7663706541061401, "step": 4371 }, { "epoch": 1.0073732718894008, "grad_norm": 0.8508185045615759, "learning_rate": 1.070803699904107e-06, "loss": 0.7434467077255249, "step": 4372 }, { "epoch": 1.0076036866359448, "grad_norm": 1.331303605136832, "learning_rate": 1.0704236671128643e-06, "loss": 0.8366774320602417, "step": 4373 }, { "epoch": 1.0078341013824885, "grad_norm": 1.276875198714222, "learning_rate": 1.07004362409972e-06, "loss": 0.7027710676193237, "step": 4374 }, { "epoch": 1.0080645161290323, "grad_norm": 1.1122995966371962, "learning_rate": 1.0696635709198357e-06, "loss": 0.7965548038482666, "step": 4375 }, { "epoch": 1.008294930875576, "grad_norm": 1.0387807228424288, "learning_rate": 1.0692835076283768e-06, "loss": 0.8058432340621948, "step": 4376 }, { "epoch": 1.0085253456221197, "grad_norm": 1.1870264013217662, "learning_rate": 1.0689034342805085e-06, "loss": 0.9056248068809509, "step": 4377 }, { "epoch": 1.0087557603686637, "grad_norm": 1.0069765876574615, "learning_rate": 1.0685233509313979e-06, "loss": 0.8407673835754395, "step": 4378 }, { "epoch": 1.0089861751152074, "grad_norm": 1.3133023777292065, "learning_rate": 1.0681432576362133e-06, "loss": 0.9138794541358948, "step": 4379 }, { "epoch": 1.0092165898617511, "grad_norm": 1.3361237624577444, "learning_rate": 1.067763154450125e-06, "loss": 0.6640630960464478, "step": 4380 }, { "epoch": 1.0094470046082948, "grad_norm": 1.4646712113013267, "learning_rate": 1.0673830414283051e-06, "loss": 0.9387146234512329, "step": 4381 }, { "epoch": 1.0096774193548388, "grad_norm": 1.0228212242769696, "learning_rate": 1.067002918625926e-06, "loss": 0.7288271188735962, "step": 4382 }, { "epoch": 1.0099078341013825, "grad_norm": 1.1693551967727813, "learning_rate": 1.0666227860981613e-06, "loss": 0.7886035442352295, "step": 4383 }, { "epoch": 1.0101382488479262, "grad_norm": 1.056596025284508, "learning_rate": 1.066242643900188e-06, "loss": 0.6929852962493896, "step": 4384 }, { "epoch": 1.01036866359447, "grad_norm": 0.9057033157053335, "learning_rate": 1.065862492087182e-06, "loss": 0.7709990739822388, "step": 4385 }, { "epoch": 1.010599078341014, "grad_norm": 1.0362803754904506, "learning_rate": 1.065482330714323e-06, "loss": 0.811382532119751, "step": 4386 }, { "epoch": 1.0108294930875577, "grad_norm": 1.2204693151649666, "learning_rate": 1.0651021598367905e-06, "loss": 0.8274353742599487, "step": 4387 }, { "epoch": 1.0110599078341014, "grad_norm": 0.9995911348883496, "learning_rate": 1.0647219795097651e-06, "loss": 0.7449204921722412, "step": 4388 }, { "epoch": 1.011290322580645, "grad_norm": 0.906861932756066, "learning_rate": 1.0643417897884303e-06, "loss": 0.675945520401001, "step": 4389 }, { "epoch": 1.0115207373271888, "grad_norm": 1.183632210098949, "learning_rate": 1.06396159072797e-06, "loss": 0.7329400777816772, "step": 4390 }, { "epoch": 1.0117511520737328, "grad_norm": 0.9566645616399831, "learning_rate": 1.0635813823835692e-06, "loss": 0.7809139490127563, "step": 4391 }, { "epoch": 1.0119815668202765, "grad_norm": 1.0167427862718812, "learning_rate": 1.0632011648104155e-06, "loss": 0.799081563949585, "step": 4392 }, { "epoch": 1.0122119815668202, "grad_norm": 1.0484890321007356, "learning_rate": 1.062820938063696e-06, "loss": 0.7738279104232788, "step": 4393 }, { "epoch": 1.012442396313364, "grad_norm": 0.9791695127555486, "learning_rate": 1.0624407021986007e-06, "loss": 0.895797610282898, "step": 4394 }, { "epoch": 1.012672811059908, "grad_norm": 0.9476041908693101, "learning_rate": 1.0620604572703198e-06, "loss": 0.6887848973274231, "step": 4395 }, { "epoch": 1.0129032258064516, "grad_norm": 1.0915270783702586, "learning_rate": 1.0616802033340457e-06, "loss": 0.9540888071060181, "step": 4396 }, { "epoch": 1.0131336405529954, "grad_norm": 1.3368596619746418, "learning_rate": 1.0612999404449721e-06, "loss": 0.9047783017158508, "step": 4397 }, { "epoch": 1.013364055299539, "grad_norm": 0.924946076870977, "learning_rate": 1.0609196686582931e-06, "loss": 0.7030448913574219, "step": 4398 }, { "epoch": 1.013594470046083, "grad_norm": 0.9501232585433265, "learning_rate": 1.0605393880292046e-06, "loss": 0.8097348213195801, "step": 4399 }, { "epoch": 1.0138248847926268, "grad_norm": 1.0163791343408108, "learning_rate": 1.0601590986129045e-06, "loss": 0.7446185350418091, "step": 4400 }, { "epoch": 1.0140552995391705, "grad_norm": 1.0548185515811, "learning_rate": 1.0597788004645908e-06, "loss": 0.7450964450836182, "step": 4401 }, { "epoch": 1.0142857142857142, "grad_norm": 1.1891450532947472, "learning_rate": 1.0593984936394632e-06, "loss": 0.8326355218887329, "step": 4402 }, { "epoch": 1.014516129032258, "grad_norm": 1.0194370020803867, "learning_rate": 1.0590181781927227e-06, "loss": 0.7013953924179077, "step": 4403 }, { "epoch": 1.014746543778802, "grad_norm": 1.2634402455639506, "learning_rate": 1.0586378541795723e-06, "loss": 0.7806364297866821, "step": 4404 }, { "epoch": 1.0149769585253456, "grad_norm": 1.2061797737844093, "learning_rate": 1.0582575216552146e-06, "loss": 0.8207389116287231, "step": 4405 }, { "epoch": 1.0152073732718894, "grad_norm": 1.123863770924685, "learning_rate": 1.0578771806748545e-06, "loss": 0.8042873740196228, "step": 4406 }, { "epoch": 1.015437788018433, "grad_norm": 0.9837741196260199, "learning_rate": 1.057496831293699e-06, "loss": 0.7225071787834167, "step": 4407 }, { "epoch": 1.015668202764977, "grad_norm": 0.8165867352878113, "learning_rate": 1.0571164735669538e-06, "loss": 0.7783743143081665, "step": 4408 }, { "epoch": 1.0158986175115208, "grad_norm": 1.1050702802288892, "learning_rate": 1.0567361075498286e-06, "loss": 0.7455039024353027, "step": 4409 }, { "epoch": 1.0161290322580645, "grad_norm": 1.0331220241961572, "learning_rate": 1.0563557332975322e-06, "loss": 0.7819615602493286, "step": 4410 }, { "epoch": 1.0163594470046082, "grad_norm": 1.052305833495017, "learning_rate": 1.0559753508652758e-06, "loss": 0.6466404795646667, "step": 4411 }, { "epoch": 1.0165898617511522, "grad_norm": 0.9503687927611121, "learning_rate": 1.0555949603082715e-06, "loss": 0.8728539943695068, "step": 4412 }, { "epoch": 1.016820276497696, "grad_norm": 0.9080353373358744, "learning_rate": 1.055214561681732e-06, "loss": 0.6082659959793091, "step": 4413 }, { "epoch": 1.0170506912442396, "grad_norm": 1.1401384988886654, "learning_rate": 1.054834155040872e-06, "loss": 0.8429103493690491, "step": 4414 }, { "epoch": 1.0172811059907834, "grad_norm": 0.9060045457810262, "learning_rate": 1.0544537404409073e-06, "loss": 0.7953135967254639, "step": 4415 }, { "epoch": 1.017511520737327, "grad_norm": 0.6713482182574511, "learning_rate": 1.0540733179370542e-06, "loss": 0.7243527173995972, "step": 4416 }, { "epoch": 1.017741935483871, "grad_norm": 1.4572192259453962, "learning_rate": 1.0536928875845303e-06, "loss": 0.6882613897323608, "step": 4417 }, { "epoch": 1.0179723502304148, "grad_norm": 0.9719982264568039, "learning_rate": 1.053312449438555e-06, "loss": 0.9157286882400513, "step": 4418 }, { "epoch": 1.0182027649769585, "grad_norm": 1.1196456434566004, "learning_rate": 1.0529320035543482e-06, "loss": 0.7224643230438232, "step": 4419 }, { "epoch": 1.0184331797235022, "grad_norm": 1.4712628070157254, "learning_rate": 1.0525515499871311e-06, "loss": 0.874829888343811, "step": 4420 }, { "epoch": 1.0186635944700462, "grad_norm": 0.9184049522457163, "learning_rate": 1.0521710887921262e-06, "loss": 0.6911267042160034, "step": 4421 }, { "epoch": 1.01889400921659, "grad_norm": 1.1423796554253005, "learning_rate": 1.051790620024557e-06, "loss": 0.9065574407577515, "step": 4422 }, { "epoch": 1.0191244239631336, "grad_norm": 1.225714416603257, "learning_rate": 1.0514101437396474e-06, "loss": 0.7671108245849609, "step": 4423 }, { "epoch": 1.0193548387096774, "grad_norm": 1.3506661037387142, "learning_rate": 1.051029659992624e-06, "loss": 0.8706510066986084, "step": 4424 }, { "epoch": 1.019585253456221, "grad_norm": 1.4185673299670827, "learning_rate": 1.0506491688387128e-06, "loss": 0.741087794303894, "step": 4425 }, { "epoch": 1.019815668202765, "grad_norm": 1.0122076007105019, "learning_rate": 1.0502686703331419e-06, "loss": 0.8045330047607422, "step": 4426 }, { "epoch": 1.0200460829493088, "grad_norm": 1.1768435258548835, "learning_rate": 1.0498881645311398e-06, "loss": 0.8464969992637634, "step": 4427 }, { "epoch": 1.0202764976958525, "grad_norm": 1.1260966872974236, "learning_rate": 1.0495076514879367e-06, "loss": 0.7660650610923767, "step": 4428 }, { "epoch": 1.0205069124423962, "grad_norm": 1.0026539513539563, "learning_rate": 1.0491271312587636e-06, "loss": 0.8565669059753418, "step": 4429 }, { "epoch": 1.0207373271889402, "grad_norm": 1.306851956145893, "learning_rate": 1.0487466038988525e-06, "loss": 0.8884295225143433, "step": 4430 }, { "epoch": 1.020967741935484, "grad_norm": 1.0672501887857282, "learning_rate": 1.0483660694634361e-06, "loss": 0.7300036549568176, "step": 4431 }, { "epoch": 1.0211981566820276, "grad_norm": 1.261937486377886, "learning_rate": 1.0479855280077493e-06, "loss": 0.7879898548126221, "step": 4432 }, { "epoch": 1.0214285714285714, "grad_norm": 1.5182696761272942, "learning_rate": 1.0476049795870263e-06, "loss": 0.9811698198318481, "step": 4433 }, { "epoch": 1.0216589861751153, "grad_norm": 1.1962738461411733, "learning_rate": 1.0472244242565034e-06, "loss": 0.7706241607666016, "step": 4434 }, { "epoch": 1.021889400921659, "grad_norm": 1.289215010975763, "learning_rate": 1.046843862071418e-06, "loss": 0.761093020439148, "step": 4435 }, { "epoch": 1.0221198156682028, "grad_norm": 1.2142929670752842, "learning_rate": 1.046463293087008e-06, "loss": 0.8306092619895935, "step": 4436 }, { "epoch": 1.0223502304147465, "grad_norm": 1.0820298518439184, "learning_rate": 1.0460827173585125e-06, "loss": 0.9669788479804993, "step": 4437 }, { "epoch": 1.0225806451612902, "grad_norm": 1.173748576404213, "learning_rate": 1.0457021349411715e-06, "loss": 0.8461639285087585, "step": 4438 }, { "epoch": 1.0228110599078342, "grad_norm": 1.0738697424760002, "learning_rate": 1.0453215458902262e-06, "loss": 0.7230383157730103, "step": 4439 }, { "epoch": 1.023041474654378, "grad_norm": 1.195555915731222, "learning_rate": 1.0449409502609186e-06, "loss": 0.7506514191627502, "step": 4440 }, { "epoch": 1.0232718894009216, "grad_norm": 1.2468090783946124, "learning_rate": 1.0445603481084914e-06, "loss": 0.7530048489570618, "step": 4441 }, { "epoch": 1.0235023041474653, "grad_norm": 1.1659142578592716, "learning_rate": 1.044179739488189e-06, "loss": 0.8402249813079834, "step": 4442 }, { "epoch": 1.0237327188940093, "grad_norm": 0.9379480482149454, "learning_rate": 1.0437991244552557e-06, "loss": 0.7661963701248169, "step": 4443 }, { "epoch": 1.023963133640553, "grad_norm": 1.484925993605904, "learning_rate": 1.043418503064937e-06, "loss": 0.7982668876647949, "step": 4444 }, { "epoch": 1.0241935483870968, "grad_norm": 1.5153078123946815, "learning_rate": 1.0430378753724807e-06, "loss": 0.899538516998291, "step": 4445 }, { "epoch": 1.0244239631336405, "grad_norm": 1.0283178313705175, "learning_rate": 1.0426572414331337e-06, "loss": 0.8027441501617432, "step": 4446 }, { "epoch": 1.0246543778801844, "grad_norm": 1.0275551729897887, "learning_rate": 1.0422766013021442e-06, "loss": 0.8575221300125122, "step": 4447 }, { "epoch": 1.0248847926267282, "grad_norm": 1.0529216327738424, "learning_rate": 1.0418959550347622e-06, "loss": 0.7001699209213257, "step": 4448 }, { "epoch": 1.0251152073732719, "grad_norm": 1.344629476023339, "learning_rate": 1.041515302686238e-06, "loss": 0.9296507835388184, "step": 4449 }, { "epoch": 1.0253456221198156, "grad_norm": 1.1736142719382505, "learning_rate": 1.0411346443118222e-06, "loss": 0.8214550018310547, "step": 4450 }, { "epoch": 1.0255760368663593, "grad_norm": 1.111485424859677, "learning_rate": 1.0407539799667673e-06, "loss": 0.7598673701286316, "step": 4451 }, { "epoch": 1.0258064516129033, "grad_norm": 1.1453890077051856, "learning_rate": 1.0403733097063265e-06, "loss": 0.8222990036010742, "step": 4452 }, { "epoch": 1.026036866359447, "grad_norm": 0.8681765527907143, "learning_rate": 1.039992633585753e-06, "loss": 0.7860872745513916, "step": 4453 }, { "epoch": 1.0262672811059907, "grad_norm": 0.7352315377021262, "learning_rate": 1.0396119516603018e-06, "loss": 0.6602796912193298, "step": 4454 }, { "epoch": 1.0264976958525345, "grad_norm": 0.7865024675454858, "learning_rate": 1.0392312639852278e-06, "loss": 0.554654598236084, "step": 4455 }, { "epoch": 1.0267281105990784, "grad_norm": 0.997694873166315, "learning_rate": 1.0388505706157885e-06, "loss": 0.7977210879325867, "step": 4456 }, { "epoch": 1.0269585253456222, "grad_norm": 0.9315155505189272, "learning_rate": 1.0384698716072398e-06, "loss": 0.8770938515663147, "step": 4457 }, { "epoch": 1.0271889400921659, "grad_norm": 1.1958306146081352, "learning_rate": 1.0380891670148403e-06, "loss": 0.710452675819397, "step": 4458 }, { "epoch": 1.0274193548387096, "grad_norm": 1.0231453414790668, "learning_rate": 1.0377084568938485e-06, "loss": 0.8876768946647644, "step": 4459 }, { "epoch": 1.0276497695852536, "grad_norm": 1.1707146109643827, "learning_rate": 1.0373277412995241e-06, "loss": 0.7770971059799194, "step": 4460 }, { "epoch": 1.0278801843317973, "grad_norm": 1.2438301523835749, "learning_rate": 1.0369470202871275e-06, "loss": 0.9199050068855286, "step": 4461 }, { "epoch": 1.028110599078341, "grad_norm": 1.225766455591599, "learning_rate": 1.0365662939119199e-06, "loss": 0.7931548357009888, "step": 4462 }, { "epoch": 1.0283410138248847, "grad_norm": 0.9403888957806107, "learning_rate": 1.0361855622291636e-06, "loss": 0.7484941482543945, "step": 4463 }, { "epoch": 1.0285714285714285, "grad_norm": 1.1077517121943607, "learning_rate": 1.03580482529412e-06, "loss": 0.7639475464820862, "step": 4464 }, { "epoch": 1.0288018433179724, "grad_norm": 0.9266455289292281, "learning_rate": 1.035424083162054e-06, "loss": 0.7705268859863281, "step": 4465 }, { "epoch": 1.0290322580645161, "grad_norm": 1.0602296301972336, "learning_rate": 1.0350433358882288e-06, "loss": 0.7714117169380188, "step": 4466 }, { "epoch": 1.0292626728110599, "grad_norm": 0.9812855436464868, "learning_rate": 1.0346625835279102e-06, "loss": 0.851073145866394, "step": 4467 }, { "epoch": 1.0294930875576036, "grad_norm": 0.9352903997309275, "learning_rate": 1.0342818261363631e-06, "loss": 0.8001583218574524, "step": 4468 }, { "epoch": 1.0297235023041476, "grad_norm": 1.1158901092617035, "learning_rate": 1.0339010637688547e-06, "loss": 0.8352588415145874, "step": 4469 }, { "epoch": 1.0299539170506913, "grad_norm": 0.91245372061127, "learning_rate": 1.0335202964806515e-06, "loss": 0.8136032223701477, "step": 4470 }, { "epoch": 1.030184331797235, "grad_norm": 1.1248571903620148, "learning_rate": 1.0331395243270215e-06, "loss": 0.8041108846664429, "step": 4471 }, { "epoch": 1.0304147465437787, "grad_norm": 0.9370378251466553, "learning_rate": 1.032758747363234e-06, "loss": 0.6961067914962769, "step": 4472 }, { "epoch": 1.0306451612903227, "grad_norm": 0.8328897533850071, "learning_rate": 1.0323779656445572e-06, "loss": 0.8063983917236328, "step": 4473 }, { "epoch": 1.0308755760368664, "grad_norm": 1.01915176563276, "learning_rate": 1.0319971792262618e-06, "loss": 0.706061601638794, "step": 4474 }, { "epoch": 1.0311059907834101, "grad_norm": 1.1193687254143303, "learning_rate": 1.0316163881636181e-06, "loss": 0.8510581254959106, "step": 4475 }, { "epoch": 1.0313364055299539, "grad_norm": 0.8459775762451333, "learning_rate": 1.0312355925118975e-06, "loss": 0.7169028520584106, "step": 4476 }, { "epoch": 1.0315668202764976, "grad_norm": 0.8345675502163972, "learning_rate": 1.0308547923263718e-06, "loss": 0.7513360977172852, "step": 4477 }, { "epoch": 1.0317972350230415, "grad_norm": 1.1826641384928935, "learning_rate": 1.030473987662314e-06, "loss": 0.7408783435821533, "step": 4478 }, { "epoch": 1.0320276497695853, "grad_norm": 1.2135549739175484, "learning_rate": 1.0300931785749974e-06, "loss": 0.8177747130393982, "step": 4479 }, { "epoch": 1.032258064516129, "grad_norm": 1.074036475926982, "learning_rate": 1.0297123651196954e-06, "loss": 0.7530791759490967, "step": 4480 }, { "epoch": 1.0324884792626727, "grad_norm": 1.2947307404575235, "learning_rate": 1.0293315473516832e-06, "loss": 0.7958859205245972, "step": 4481 }, { "epoch": 1.0327188940092167, "grad_norm": 1.2482360288136136, "learning_rate": 1.0289507253262357e-06, "loss": 0.8719943761825562, "step": 4482 }, { "epoch": 1.0329493087557604, "grad_norm": 1.0347953021678673, "learning_rate": 1.028569899098629e-06, "loss": 0.7584139108657837, "step": 4483 }, { "epoch": 1.0331797235023041, "grad_norm": 1.1621251755994506, "learning_rate": 1.0281890687241387e-06, "loss": 0.852983832359314, "step": 4484 }, { "epoch": 1.0334101382488479, "grad_norm": 0.995758429643109, "learning_rate": 1.027808234258043e-06, "loss": 0.7455692291259766, "step": 4485 }, { "epoch": 1.0336405529953918, "grad_norm": 0.9126434588001895, "learning_rate": 1.0274273957556185e-06, "loss": 0.7078343629837036, "step": 4486 }, { "epoch": 1.0338709677419355, "grad_norm": 1.056440353383354, "learning_rate": 1.027046553272144e-06, "loss": 0.7580842971801758, "step": 4487 }, { "epoch": 1.0341013824884793, "grad_norm": 0.9071452550966383, "learning_rate": 1.026665706862898e-06, "loss": 0.7271389961242676, "step": 4488 }, { "epoch": 1.034331797235023, "grad_norm": 1.3819767756673818, "learning_rate": 1.0262848565831599e-06, "loss": 0.8271546363830566, "step": 4489 }, { "epoch": 1.0345622119815667, "grad_norm": 1.1533046933911033, "learning_rate": 1.0259040024882098e-06, "loss": 0.6799920201301575, "step": 4490 }, { "epoch": 1.0347926267281107, "grad_norm": 0.7837273040397605, "learning_rate": 1.0255231446333277e-06, "loss": 0.6962645053863525, "step": 4491 }, { "epoch": 1.0350230414746544, "grad_norm": 1.2060107344479347, "learning_rate": 1.0251422830737955e-06, "loss": 0.8722797632217407, "step": 4492 }, { "epoch": 1.0352534562211981, "grad_norm": 1.0328841633467782, "learning_rate": 1.024761417864894e-06, "loss": 0.8054880499839783, "step": 4493 }, { "epoch": 1.0354838709677419, "grad_norm": 0.9178345615112383, "learning_rate": 1.0243805490619053e-06, "loss": 0.8196548223495483, "step": 4494 }, { "epoch": 1.0357142857142858, "grad_norm": 1.5010413914558958, "learning_rate": 1.0239996767201122e-06, "loss": 0.8197275400161743, "step": 4495 }, { "epoch": 1.0359447004608295, "grad_norm": 1.1223467429515472, "learning_rate": 1.0236188008947978e-06, "loss": 0.7704858779907227, "step": 4496 }, { "epoch": 1.0361751152073733, "grad_norm": 1.2288506828429187, "learning_rate": 1.0232379216412459e-06, "loss": 0.8296232223510742, "step": 4497 }, { "epoch": 1.036405529953917, "grad_norm": 1.1910482399414777, "learning_rate": 1.0228570390147404e-06, "loss": 0.6546601057052612, "step": 4498 }, { "epoch": 1.036635944700461, "grad_norm": 1.0493042801064925, "learning_rate": 1.0224761530705656e-06, "loss": 0.808987021446228, "step": 4499 }, { "epoch": 1.0368663594470047, "grad_norm": 1.0198435860671902, "learning_rate": 1.0220952638640073e-06, "loss": 0.862627387046814, "step": 4500 }, { "epoch": 1.0370967741935484, "grad_norm": 0.9314966888515314, "learning_rate": 1.0217143714503507e-06, "loss": 0.781114935874939, "step": 4501 }, { "epoch": 1.0373271889400921, "grad_norm": 1.1732597442137338, "learning_rate": 1.0213334758848814e-06, "loss": 0.7186112403869629, "step": 4502 }, { "epoch": 1.0375576036866359, "grad_norm": 0.9870711221115687, "learning_rate": 1.0209525772228868e-06, "loss": 0.8112529516220093, "step": 4503 }, { "epoch": 1.0377880184331798, "grad_norm": 1.1558866878107408, "learning_rate": 1.020571675519653e-06, "loss": 0.7364751100540161, "step": 4504 }, { "epoch": 1.0380184331797235, "grad_norm": 1.296821231113786, "learning_rate": 1.0201907708304681e-06, "loss": 0.7015886902809143, "step": 4505 }, { "epoch": 1.0382488479262673, "grad_norm": 0.8755063657778166, "learning_rate": 1.0198098632106197e-06, "loss": 0.7018470168113708, "step": 4506 }, { "epoch": 1.038479262672811, "grad_norm": 0.9958013421397902, "learning_rate": 1.0194289527153953e-06, "loss": 0.820391058921814, "step": 4507 }, { "epoch": 1.038709677419355, "grad_norm": 1.2026544914516983, "learning_rate": 1.0190480394000844e-06, "loss": 0.8341129422187805, "step": 4508 }, { "epoch": 1.0389400921658987, "grad_norm": 0.8606365913019236, "learning_rate": 1.0186671233199757e-06, "loss": 0.7345695495605469, "step": 4509 }, { "epoch": 1.0391705069124424, "grad_norm": 1.375974242893794, "learning_rate": 1.0182862045303589e-06, "loss": 0.8899500370025635, "step": 4510 }, { "epoch": 1.0394009216589861, "grad_norm": 1.001562990779633, "learning_rate": 1.0179052830865238e-06, "loss": 0.8158663511276245, "step": 4511 }, { "epoch": 1.0396313364055298, "grad_norm": 1.1574048409080129, "learning_rate": 1.0175243590437604e-06, "loss": 0.734848141670227, "step": 4512 }, { "epoch": 1.0398617511520738, "grad_norm": 1.062511127484639, "learning_rate": 1.0171434324573596e-06, "loss": 0.7920876741409302, "step": 4513 }, { "epoch": 1.0400921658986175, "grad_norm": 1.2131341489328324, "learning_rate": 1.0167625033826122e-06, "loss": 0.9224791526794434, "step": 4514 }, { "epoch": 1.0403225806451613, "grad_norm": 1.152494191321953, "learning_rate": 1.0163815718748096e-06, "loss": 0.7086025476455688, "step": 4515 }, { "epoch": 1.040552995391705, "grad_norm": 1.0223491213154539, "learning_rate": 1.0160006379892434e-06, "loss": 0.7657936811447144, "step": 4516 }, { "epoch": 1.040783410138249, "grad_norm": 1.11296257844156, "learning_rate": 1.0156197017812058e-06, "loss": 0.786298394203186, "step": 4517 }, { "epoch": 1.0410138248847927, "grad_norm": 1.1998728834800867, "learning_rate": 1.0152387633059895e-06, "loss": 0.8667294979095459, "step": 4518 }, { "epoch": 1.0412442396313364, "grad_norm": 1.0233425185279803, "learning_rate": 1.0148578226188866e-06, "loss": 0.8479517102241516, "step": 4519 }, { "epoch": 1.0414746543778801, "grad_norm": 0.8930216519245627, "learning_rate": 1.0144768797751904e-06, "loss": 0.6430692076683044, "step": 4520 }, { "epoch": 1.041705069124424, "grad_norm": 1.122852329570553, "learning_rate": 1.0140959348301946e-06, "loss": 0.874313473701477, "step": 4521 }, { "epoch": 1.0419354838709678, "grad_norm": 1.101097598838231, "learning_rate": 1.013714987839192e-06, "loss": 0.8439676761627197, "step": 4522 }, { "epoch": 1.0421658986175115, "grad_norm": 1.2477053670484948, "learning_rate": 1.0133340388574774e-06, "loss": 0.7480089664459229, "step": 4523 }, { "epoch": 1.0423963133640552, "grad_norm": 1.3143250159570112, "learning_rate": 1.012953087940345e-06, "loss": 0.8786139488220215, "step": 4524 }, { "epoch": 1.042626728110599, "grad_norm": 1.1897211165926171, "learning_rate": 1.0125721351430885e-06, "loss": 0.8333299160003662, "step": 4525 }, { "epoch": 1.042857142857143, "grad_norm": 1.055645356383861, "learning_rate": 1.0121911805210032e-06, "loss": 0.8201998472213745, "step": 4526 }, { "epoch": 1.0430875576036867, "grad_norm": 1.160199033506195, "learning_rate": 1.0118102241293847e-06, "loss": 0.7793110609054565, "step": 4527 }, { "epoch": 1.0433179723502304, "grad_norm": 1.045720270383819, "learning_rate": 1.0114292660235272e-06, "loss": 0.7148817777633667, "step": 4528 }, { "epoch": 1.043548387096774, "grad_norm": 1.0726942336798908, "learning_rate": 1.011048306258727e-06, "loss": 0.7945176362991333, "step": 4529 }, { "epoch": 1.043778801843318, "grad_norm": 1.0532791972453868, "learning_rate": 1.01066734489028e-06, "loss": 0.7246826887130737, "step": 4530 }, { "epoch": 1.0440092165898618, "grad_norm": 1.230297656368, "learning_rate": 1.0102863819734822e-06, "loss": 0.7342358827590942, "step": 4531 }, { "epoch": 1.0442396313364055, "grad_norm": 1.1072867148521375, "learning_rate": 1.0099054175636292e-06, "loss": 0.6837234497070312, "step": 4532 }, { "epoch": 1.0444700460829492, "grad_norm": 0.8847188010063922, "learning_rate": 1.0095244517160184e-06, "loss": 0.6941408514976501, "step": 4533 }, { "epoch": 1.0447004608294932, "grad_norm": 0.9992175314765978, "learning_rate": 1.009143484485946e-06, "loss": 0.7835201025009155, "step": 4534 }, { "epoch": 1.044930875576037, "grad_norm": 1.1533173348493126, "learning_rate": 1.0087625159287086e-06, "loss": 0.7887566089630127, "step": 4535 }, { "epoch": 1.0451612903225806, "grad_norm": 0.9980831932241371, "learning_rate": 1.0083815460996036e-06, "loss": 0.7106727361679077, "step": 4536 }, { "epoch": 1.0453917050691244, "grad_norm": 1.1003103489016812, "learning_rate": 1.0080005750539287e-06, "loss": 0.8316382169723511, "step": 4537 }, { "epoch": 1.045622119815668, "grad_norm": 1.278017855977623, "learning_rate": 1.0076196028469805e-06, "loss": 0.7535592317581177, "step": 4538 }, { "epoch": 1.045852534562212, "grad_norm": 1.2167524484109087, "learning_rate": 1.0072386295340571e-06, "loss": 0.9255459308624268, "step": 4539 }, { "epoch": 1.0460829493087558, "grad_norm": 0.9884104383515986, "learning_rate": 1.0068576551704561e-06, "loss": 0.7415009140968323, "step": 4540 }, { "epoch": 1.0463133640552995, "grad_norm": 0.9221193872044946, "learning_rate": 1.0064766798114758e-06, "loss": 0.673210620880127, "step": 4541 }, { "epoch": 1.0465437788018432, "grad_norm": 1.2907861596502346, "learning_rate": 1.006095703512414e-06, "loss": 0.7063118815422058, "step": 4542 }, { "epoch": 1.0467741935483872, "grad_norm": 1.0344490200256125, "learning_rate": 1.005714726328569e-06, "loss": 0.73606276512146, "step": 4543 }, { "epoch": 1.047004608294931, "grad_norm": 1.1024687809140408, "learning_rate": 1.005333748315239e-06, "loss": 0.6723713874816895, "step": 4544 }, { "epoch": 1.0472350230414746, "grad_norm": 1.0566239460690536, "learning_rate": 1.0049527695277223e-06, "loss": 0.643845796585083, "step": 4545 }, { "epoch": 1.0474654377880184, "grad_norm": 1.1196128686458957, "learning_rate": 1.0045717900213175e-06, "loss": 0.8820847272872925, "step": 4546 }, { "epoch": 1.047695852534562, "grad_norm": 1.177142500227169, "learning_rate": 1.0041908098513239e-06, "loss": 0.6555176973342896, "step": 4547 }, { "epoch": 1.047926267281106, "grad_norm": 1.4046987769414077, "learning_rate": 1.0038098290730394e-06, "loss": 0.8142974376678467, "step": 4548 }, { "epoch": 1.0481566820276498, "grad_norm": 1.3843242800793498, "learning_rate": 1.0034288477417634e-06, "loss": 0.8107532262802124, "step": 4549 }, { "epoch": 1.0483870967741935, "grad_norm": 1.093115680939654, "learning_rate": 1.0030478659127947e-06, "loss": 0.7078464031219482, "step": 4550 }, { "epoch": 1.0486175115207372, "grad_norm": 1.3647000829373368, "learning_rate": 1.0026668836414322e-06, "loss": 0.9168295860290527, "step": 4551 }, { "epoch": 1.0488479262672812, "grad_norm": 0.7154125463388302, "learning_rate": 1.0022859009829752e-06, "loss": 0.7384864091873169, "step": 4552 }, { "epoch": 1.049078341013825, "grad_norm": 0.9459016715465385, "learning_rate": 1.0019049179927229e-06, "loss": 0.6092562675476074, "step": 4553 }, { "epoch": 1.0493087557603686, "grad_norm": 1.159695075830992, "learning_rate": 1.001523934725974e-06, "loss": 0.713464617729187, "step": 4554 }, { "epoch": 1.0495391705069124, "grad_norm": 0.9471368467961162, "learning_rate": 1.001142951238028e-06, "loss": 0.7514123916625977, "step": 4555 }, { "epoch": 1.0497695852534563, "grad_norm": 1.1414214053095963, "learning_rate": 1.000761967584184e-06, "loss": 0.8092095851898193, "step": 4556 }, { "epoch": 1.05, "grad_norm": 0.830509770117895, "learning_rate": 1.000380983819742e-06, "loss": 0.7609254717826843, "step": 4557 }, { "epoch": 1.0502304147465438, "grad_norm": 0.8874333429433436, "learning_rate": 1e-06, "loss": 0.8363404273986816, "step": 4558 }, { "epoch": 1.0504608294930875, "grad_norm": 1.1983399653767088, "learning_rate": 9.996190161802584e-07, "loss": 0.8139501810073853, "step": 4559 }, { "epoch": 1.0506912442396312, "grad_norm": 0.8984420952696672, "learning_rate": 9.992380324158157e-07, "loss": 0.8064978122711182, "step": 4560 }, { "epoch": 1.0509216589861752, "grad_norm": 0.9258651657418774, "learning_rate": 9.988570487619721e-07, "loss": 0.7162975072860718, "step": 4561 }, { "epoch": 1.051152073732719, "grad_norm": 1.2196516767947119, "learning_rate": 9.984760652740261e-07, "loss": 0.9298074245452881, "step": 4562 }, { "epoch": 1.0513824884792626, "grad_norm": 1.0770268299074148, "learning_rate": 9.980950820072773e-07, "loss": 0.6929144859313965, "step": 4563 }, { "epoch": 1.0516129032258064, "grad_norm": 0.919564091111097, "learning_rate": 9.97714099017025e-07, "loss": 0.6516381502151489, "step": 4564 }, { "epoch": 1.0518433179723503, "grad_norm": 1.091105354713726, "learning_rate": 9.97333116358568e-07, "loss": 0.864730715751648, "step": 4565 }, { "epoch": 1.052073732718894, "grad_norm": 0.9113453911026408, "learning_rate": 9.969521340872052e-07, "loss": 0.7911246418952942, "step": 4566 }, { "epoch": 1.0523041474654378, "grad_norm": 1.032556518691269, "learning_rate": 9.965711522582367e-07, "loss": 0.7766593098640442, "step": 4567 }, { "epoch": 1.0525345622119815, "grad_norm": 1.1309615036566574, "learning_rate": 9.961901709269607e-07, "loss": 0.7703378200531006, "step": 4568 }, { "epoch": 1.0527649769585254, "grad_norm": 0.9296180823184125, "learning_rate": 9.958091901486762e-07, "loss": 0.7068926692008972, "step": 4569 }, { "epoch": 1.0529953917050692, "grad_norm": 1.0589255494911889, "learning_rate": 9.954282099786824e-07, "loss": 0.740556538105011, "step": 4570 }, { "epoch": 1.053225806451613, "grad_norm": 1.1264720214776667, "learning_rate": 9.950472304722778e-07, "loss": 0.798403263092041, "step": 4571 }, { "epoch": 1.0534562211981566, "grad_norm": 0.9551633921802427, "learning_rate": 9.94666251684761e-07, "loss": 0.6945887804031372, "step": 4572 }, { "epoch": 1.0536866359447004, "grad_norm": 1.0978186377940822, "learning_rate": 9.942852736714312e-07, "loss": 0.8257915377616882, "step": 4573 }, { "epoch": 1.0539170506912443, "grad_norm": 1.108870855150134, "learning_rate": 9.939042964875859e-07, "loss": 0.751315712928772, "step": 4574 }, { "epoch": 1.054147465437788, "grad_norm": 0.8929134755319279, "learning_rate": 9.935233201885241e-07, "loss": 0.6607721447944641, "step": 4575 }, { "epoch": 1.0543778801843318, "grad_norm": 1.1623094406064765, "learning_rate": 9.931423448295438e-07, "loss": 0.9135023355484009, "step": 4576 }, { "epoch": 1.0546082949308755, "grad_norm": 1.1079901137426853, "learning_rate": 9.927613704659428e-07, "loss": 0.8238483667373657, "step": 4577 }, { "epoch": 1.0548387096774194, "grad_norm": 1.0927838633299076, "learning_rate": 9.923803971530196e-07, "loss": 0.7657001614570618, "step": 4578 }, { "epoch": 1.0550691244239632, "grad_norm": 1.0858899027259339, "learning_rate": 9.919994249460717e-07, "loss": 0.6360250115394592, "step": 4579 }, { "epoch": 1.055299539170507, "grad_norm": 3.1983788784304843, "learning_rate": 9.916184539003963e-07, "loss": 0.6958763003349304, "step": 4580 }, { "epoch": 1.0555299539170506, "grad_norm": 1.0079237517587447, "learning_rate": 9.912374840712915e-07, "loss": 0.7093038558959961, "step": 4581 }, { "epoch": 1.0557603686635946, "grad_norm": 1.0680215254508902, "learning_rate": 9.908565155140544e-07, "loss": 0.7641304731369019, "step": 4582 }, { "epoch": 1.0559907834101383, "grad_norm": 0.8923201066182703, "learning_rate": 9.904755482839817e-07, "loss": 0.7976446151733398, "step": 4583 }, { "epoch": 1.056221198156682, "grad_norm": 1.0963737907088362, "learning_rate": 9.900945824363707e-07, "loss": 0.8407114744186401, "step": 4584 }, { "epoch": 1.0564516129032258, "grad_norm": 1.0695401976763876, "learning_rate": 9.897136180265181e-07, "loss": 0.7988634705543518, "step": 4585 }, { "epoch": 1.0566820276497695, "grad_norm": 1.072342293651018, "learning_rate": 9.893326551097198e-07, "loss": 0.7847359776496887, "step": 4586 }, { "epoch": 1.0569124423963134, "grad_norm": 1.0629893453410204, "learning_rate": 9.889516937412728e-07, "loss": 0.8458963632583618, "step": 4587 }, { "epoch": 1.0571428571428572, "grad_norm": 1.1301054626559641, "learning_rate": 9.88570733976473e-07, "loss": 0.8479788899421692, "step": 4588 }, { "epoch": 1.057373271889401, "grad_norm": 1.180492999769349, "learning_rate": 9.881897758706154e-07, "loss": 0.7467283010482788, "step": 4589 }, { "epoch": 1.0576036866359446, "grad_norm": 1.1676226241505752, "learning_rate": 9.878088194789967e-07, "loss": 0.9400098323822021, "step": 4590 }, { "epoch": 1.0578341013824886, "grad_norm": 1.2151292863225376, "learning_rate": 9.874278648569118e-07, "loss": 0.8901257514953613, "step": 4591 }, { "epoch": 1.0580645161290323, "grad_norm": 1.2956773767909102, "learning_rate": 9.870469120596552e-07, "loss": 0.840053379535675, "step": 4592 }, { "epoch": 1.058294930875576, "grad_norm": 0.9938952111506293, "learning_rate": 9.866659611425225e-07, "loss": 0.6825235486030579, "step": 4593 }, { "epoch": 1.0585253456221198, "grad_norm": 1.2521534530730631, "learning_rate": 9.86285012160808e-07, "loss": 0.7783857583999634, "step": 4594 }, { "epoch": 1.0587557603686637, "grad_norm": 1.0517032997656734, "learning_rate": 9.859040651698055e-07, "loss": 0.7901174426078796, "step": 4595 }, { "epoch": 1.0589861751152074, "grad_norm": 1.2211963787816231, "learning_rate": 9.855231202248097e-07, "loss": 0.9475124478340149, "step": 4596 }, { "epoch": 1.0592165898617512, "grad_norm": 1.1872676544788658, "learning_rate": 9.851421773811133e-07, "loss": 0.8582692742347717, "step": 4597 }, { "epoch": 1.0594470046082949, "grad_norm": 1.1723948726757356, "learning_rate": 9.847612366940106e-07, "loss": 0.7885586023330688, "step": 4598 }, { "epoch": 1.0596774193548386, "grad_norm": 1.17635061110199, "learning_rate": 9.843802982187943e-07, "loss": 0.7981748580932617, "step": 4599 }, { "epoch": 1.0599078341013826, "grad_norm": 0.9066343519689628, "learning_rate": 9.839993620107563e-07, "loss": 0.7060403823852539, "step": 4600 }, { "epoch": 1.0601382488479263, "grad_norm": 1.2126688495293467, "learning_rate": 9.836184281251905e-07, "loss": 0.7902223467826843, "step": 4601 }, { "epoch": 1.06036866359447, "grad_norm": 0.9972491115312556, "learning_rate": 9.83237496617388e-07, "loss": 0.7074719071388245, "step": 4602 }, { "epoch": 1.0605990783410137, "grad_norm": 0.9455936494800175, "learning_rate": 9.828565675426405e-07, "loss": 0.7180163264274597, "step": 4603 }, { "epoch": 1.0608294930875577, "grad_norm": 0.8990997781996365, "learning_rate": 9.824756409562397e-07, "loss": 0.7040787935256958, "step": 4604 }, { "epoch": 1.0610599078341014, "grad_norm": 1.0311368456712493, "learning_rate": 9.820947169134765e-07, "loss": 0.8387063145637512, "step": 4605 }, { "epoch": 1.0612903225806452, "grad_norm": 1.0692817612993422, "learning_rate": 9.81713795469641e-07, "loss": 0.8587188124656677, "step": 4606 }, { "epoch": 1.0615207373271889, "grad_norm": 1.0418289468184643, "learning_rate": 9.813328766800242e-07, "loss": 0.729094386100769, "step": 4607 }, { "epoch": 1.0617511520737328, "grad_norm": 1.1884134090864242, "learning_rate": 9.809519605999158e-07, "loss": 1.0576609373092651, "step": 4608 }, { "epoch": 1.0619815668202766, "grad_norm": 1.1124938149620707, "learning_rate": 9.805710472846044e-07, "loss": 0.7605572938919067, "step": 4609 }, { "epoch": 1.0622119815668203, "grad_norm": 0.9566684121068049, "learning_rate": 9.801901367893807e-07, "loss": 0.722477912902832, "step": 4610 }, { "epoch": 1.062442396313364, "grad_norm": 0.9185071862681494, "learning_rate": 9.79809229169532e-07, "loss": 0.7335925698280334, "step": 4611 }, { "epoch": 1.0626728110599077, "grad_norm": 1.0494538531790283, "learning_rate": 9.794283244803466e-07, "loss": 0.8116357922554016, "step": 4612 }, { "epoch": 1.0629032258064517, "grad_norm": 1.0519905027101895, "learning_rate": 9.79047422777113e-07, "loss": 0.8004311323165894, "step": 4613 }, { "epoch": 1.0631336405529954, "grad_norm": 0.9803128568921189, "learning_rate": 9.786665241151185e-07, "loss": 0.8198168277740479, "step": 4614 }, { "epoch": 1.0633640552995391, "grad_norm": 0.9841178854805237, "learning_rate": 9.782856285496494e-07, "loss": 0.7031205892562866, "step": 4615 }, { "epoch": 1.0635944700460829, "grad_norm": 1.055262322588535, "learning_rate": 9.779047361359928e-07, "loss": 0.7303737998008728, "step": 4616 }, { "epoch": 1.0638248847926268, "grad_norm": 1.1694198331033647, "learning_rate": 9.775238469294345e-07, "loss": 0.8775424957275391, "step": 4617 }, { "epoch": 1.0640552995391706, "grad_norm": 0.9013154484602001, "learning_rate": 9.771429609852597e-07, "loss": 0.7463759183883667, "step": 4618 }, { "epoch": 1.0642857142857143, "grad_norm": 0.8792691967623277, "learning_rate": 9.767620783587542e-07, "loss": 0.7200205326080322, "step": 4619 }, { "epoch": 1.064516129032258, "grad_norm": 0.9102194522316246, "learning_rate": 9.763811991052019e-07, "loss": 0.8255786299705505, "step": 4620 }, { "epoch": 1.064746543778802, "grad_norm": 1.2552865619465912, "learning_rate": 9.760003232798877e-07, "loss": 0.7975195050239563, "step": 4621 }, { "epoch": 1.0649769585253457, "grad_norm": 0.9993977940644363, "learning_rate": 9.756194509380948e-07, "loss": 0.6993064880371094, "step": 4622 }, { "epoch": 1.0652073732718894, "grad_norm": 1.314757658160511, "learning_rate": 9.752385821351062e-07, "loss": 0.818634033203125, "step": 4623 }, { "epoch": 1.0654377880184331, "grad_norm": 1.0949894149977886, "learning_rate": 9.748577169262046e-07, "loss": 0.707933783531189, "step": 4624 }, { "epoch": 1.0656682027649769, "grad_norm": 1.1439419332653986, "learning_rate": 9.744768553666723e-07, "loss": 0.8133440017700195, "step": 4625 }, { "epoch": 1.0658986175115208, "grad_norm": 1.1394394770433072, "learning_rate": 9.740959975117901e-07, "loss": 0.8818857669830322, "step": 4626 }, { "epoch": 1.0661290322580645, "grad_norm": 0.9617616601353652, "learning_rate": 9.737151434168402e-07, "loss": 0.6057544946670532, "step": 4627 }, { "epoch": 1.0663594470046083, "grad_norm": 1.047486055121172, "learning_rate": 9.733342931371023e-07, "loss": 0.7560185194015503, "step": 4628 }, { "epoch": 1.066589861751152, "grad_norm": 1.233360971442642, "learning_rate": 9.72953446727856e-07, "loss": 0.8196524381637573, "step": 4629 }, { "epoch": 1.066820276497696, "grad_norm": 1.031309795003994, "learning_rate": 9.725726042443814e-07, "loss": 0.8695862889289856, "step": 4630 }, { "epoch": 1.0670506912442397, "grad_norm": 0.9769847065094724, "learning_rate": 9.721917657419573e-07, "loss": 0.7753207683563232, "step": 4631 }, { "epoch": 1.0672811059907834, "grad_norm": 1.0908524037443617, "learning_rate": 9.718109312758612e-07, "loss": 0.8245481252670288, "step": 4632 }, { "epoch": 1.0675115207373271, "grad_norm": 1.201628166799481, "learning_rate": 9.71430100901371e-07, "loss": 0.8654806613922119, "step": 4633 }, { "epoch": 1.067741935483871, "grad_norm": 1.22982718965067, "learning_rate": 9.710492746737642e-07, "loss": 0.8667370080947876, "step": 4634 }, { "epoch": 1.0679723502304148, "grad_norm": 1.2635323967888392, "learning_rate": 9.706684526483167e-07, "loss": 0.7786421775817871, "step": 4635 }, { "epoch": 1.0682027649769585, "grad_norm": 1.037203898616246, "learning_rate": 9.702876348803045e-07, "loss": 0.7788090705871582, "step": 4636 }, { "epoch": 1.0684331797235023, "grad_norm": 1.1815160856137523, "learning_rate": 9.69906821425003e-07, "loss": 0.812332034111023, "step": 4637 }, { "epoch": 1.068663594470046, "grad_norm": 1.2578908038434822, "learning_rate": 9.69526012337686e-07, "loss": 0.7884202599525452, "step": 4638 }, { "epoch": 1.06889400921659, "grad_norm": 1.0539526708204177, "learning_rate": 9.69145207673628e-07, "loss": 0.725990891456604, "step": 4639 }, { "epoch": 1.0691244239631337, "grad_norm": 1.01343921612526, "learning_rate": 9.687644074881028e-07, "loss": 0.7277272343635559, "step": 4640 }, { "epoch": 1.0693548387096774, "grad_norm": 1.0871506025213427, "learning_rate": 9.683836118363818e-07, "loss": 0.8081945180892944, "step": 4641 }, { "epoch": 1.0695852534562211, "grad_norm": 1.1050642405984226, "learning_rate": 9.680028207737383e-07, "loss": 0.8633503913879395, "step": 4642 }, { "epoch": 1.069815668202765, "grad_norm": 0.9415461517108813, "learning_rate": 9.67622034355443e-07, "loss": 0.7873313426971436, "step": 4643 }, { "epoch": 1.0700460829493088, "grad_norm": 1.269353126640295, "learning_rate": 9.67241252636766e-07, "loss": 0.7927644848823547, "step": 4644 }, { "epoch": 1.0702764976958525, "grad_norm": 1.395156348091843, "learning_rate": 9.668604756729784e-07, "loss": 0.9458138942718506, "step": 4645 }, { "epoch": 1.0705069124423963, "grad_norm": 1.2621680271291411, "learning_rate": 9.664797035193484e-07, "loss": 0.7471280097961426, "step": 4646 }, { "epoch": 1.07073732718894, "grad_norm": 1.0373772164844823, "learning_rate": 9.660989362311455e-07, "loss": 0.7666789293289185, "step": 4647 }, { "epoch": 1.070967741935484, "grad_norm": 0.8355654249705468, "learning_rate": 9.65718173863637e-07, "loss": 0.7846331000328064, "step": 4648 }, { "epoch": 1.0711981566820277, "grad_norm": 1.1393955111251446, "learning_rate": 9.653374164720897e-07, "loss": 0.7790371179580688, "step": 4649 }, { "epoch": 1.0714285714285714, "grad_norm": 1.110758470727215, "learning_rate": 9.64956664111771e-07, "loss": 0.9056169986724854, "step": 4650 }, { "epoch": 1.0716589861751151, "grad_norm": 0.84240400487228, "learning_rate": 9.645759168379461e-07, "loss": 0.6839256286621094, "step": 4651 }, { "epoch": 1.071889400921659, "grad_norm": 1.377334701305697, "learning_rate": 9.641951747058799e-07, "loss": 0.7071784138679504, "step": 4652 }, { "epoch": 1.0721198156682028, "grad_norm": 1.1683127374870803, "learning_rate": 9.638144377708366e-07, "loss": 0.8166929483413696, "step": 4653 }, { "epoch": 1.0723502304147465, "grad_norm": 1.239204160701412, "learning_rate": 9.6343370608808e-07, "loss": 0.8013010621070862, "step": 4654 }, { "epoch": 1.0725806451612903, "grad_norm": 1.0825444957318084, "learning_rate": 9.630529797128722e-07, "loss": 0.8157169818878174, "step": 4655 }, { "epoch": 1.072811059907834, "grad_norm": 1.0890180382455945, "learning_rate": 9.626722587004758e-07, "loss": 0.6467397212982178, "step": 4656 }, { "epoch": 1.073041474654378, "grad_norm": 0.840613071204114, "learning_rate": 9.622915431061519e-07, "loss": 0.6623806953430176, "step": 4657 }, { "epoch": 1.0732718894009217, "grad_norm": 0.9242647901691624, "learning_rate": 9.619108329851596e-07, "loss": 0.8333703279495239, "step": 4658 }, { "epoch": 1.0735023041474654, "grad_norm": 1.1552752606597634, "learning_rate": 9.615301283927603e-07, "loss": 0.8798840045928955, "step": 4659 }, { "epoch": 1.0737327188940091, "grad_norm": 1.1547075721097313, "learning_rate": 9.611494293842119e-07, "loss": 0.8712242841720581, "step": 4660 }, { "epoch": 1.073963133640553, "grad_norm": 1.030127804248938, "learning_rate": 9.60768736014772e-07, "loss": 0.720801591873169, "step": 4661 }, { "epoch": 1.0741935483870968, "grad_norm": 1.0305643381766019, "learning_rate": 9.603880483396983e-07, "loss": 0.7974982857704163, "step": 4662 }, { "epoch": 1.0744239631336405, "grad_norm": 1.1569753217458012, "learning_rate": 9.600073664142471e-07, "loss": 0.7656542062759399, "step": 4663 }, { "epoch": 1.0746543778801843, "grad_norm": 1.2831377014983525, "learning_rate": 9.596266902936737e-07, "loss": 0.8274385333061218, "step": 4664 }, { "epoch": 1.0748847926267282, "grad_norm": 1.1261587516242995, "learning_rate": 9.592460200332328e-07, "loss": 0.6508798599243164, "step": 4665 }, { "epoch": 1.075115207373272, "grad_norm": 0.8712727383997491, "learning_rate": 9.588653556881781e-07, "loss": 0.6393407583236694, "step": 4666 }, { "epoch": 1.0753456221198157, "grad_norm": 0.8300127743505744, "learning_rate": 9.58484697313762e-07, "loss": 0.7857781052589417, "step": 4667 }, { "epoch": 1.0755760368663594, "grad_norm": 1.0591582120645788, "learning_rate": 9.58104044965238e-07, "loss": 0.7433615922927856, "step": 4668 }, { "epoch": 1.0758064516129031, "grad_norm": 0.9252765779736452, "learning_rate": 9.57723398697856e-07, "loss": 0.6694349646568298, "step": 4669 }, { "epoch": 1.076036866359447, "grad_norm": 1.06633744555344, "learning_rate": 9.573427585668664e-07, "loss": 0.7849506735801697, "step": 4670 }, { "epoch": 1.0762672811059908, "grad_norm": 0.948086558097784, "learning_rate": 9.569621246275194e-07, "loss": 0.5924462080001831, "step": 4671 }, { "epoch": 1.0764976958525345, "grad_norm": 1.0764379613448063, "learning_rate": 9.565814969350628e-07, "loss": 0.7679359316825867, "step": 4672 }, { "epoch": 1.0767281105990782, "grad_norm": 0.8770076747846444, "learning_rate": 9.562008755447444e-07, "loss": 0.803286612033844, "step": 4673 }, { "epoch": 1.0769585253456222, "grad_norm": 0.9139287879253918, "learning_rate": 9.558202605118112e-07, "loss": 0.6302975416183472, "step": 4674 }, { "epoch": 1.077188940092166, "grad_norm": 1.1929014758233443, "learning_rate": 9.554396518915085e-07, "loss": 0.7441667914390564, "step": 4675 }, { "epoch": 1.0774193548387097, "grad_norm": 1.1469726623234646, "learning_rate": 9.550590497390815e-07, "loss": 0.805221438407898, "step": 4676 }, { "epoch": 1.0776497695852534, "grad_norm": 1.1540692428304171, "learning_rate": 9.54678454109774e-07, "loss": 0.9557743072509766, "step": 4677 }, { "epoch": 1.0778801843317973, "grad_norm": 1.0781366924036009, "learning_rate": 9.542978650588284e-07, "loss": 0.7361980080604553, "step": 4678 }, { "epoch": 1.078110599078341, "grad_norm": 1.2143012487351885, "learning_rate": 9.539172826414876e-07, "loss": 0.7474843263626099, "step": 4679 }, { "epoch": 1.0783410138248848, "grad_norm": 1.0143818885553835, "learning_rate": 9.535367069129923e-07, "loss": 0.595927357673645, "step": 4680 }, { "epoch": 1.0785714285714285, "grad_norm": 1.1128254146821686, "learning_rate": 9.531561379285818e-07, "loss": 0.894598126411438, "step": 4681 }, { "epoch": 1.0788018433179722, "grad_norm": 1.3233034879697116, "learning_rate": 9.527755757434966e-07, "loss": 0.915902853012085, "step": 4682 }, { "epoch": 1.0790322580645162, "grad_norm": 1.3436084997047495, "learning_rate": 9.523950204129739e-07, "loss": 0.8670432567596436, "step": 4683 }, { "epoch": 1.07926267281106, "grad_norm": 1.119487791223308, "learning_rate": 9.520144719922508e-07, "loss": 0.7829893231391907, "step": 4684 }, { "epoch": 1.0794930875576036, "grad_norm": 1.1633745895382166, "learning_rate": 9.516339305365638e-07, "loss": 0.6584970951080322, "step": 4685 }, { "epoch": 1.0797235023041474, "grad_norm": 1.0240703451548752, "learning_rate": 9.512533961011478e-07, "loss": 0.7853457927703857, "step": 4686 }, { "epoch": 1.0799539170506913, "grad_norm": 0.8755927642296618, "learning_rate": 9.508728687412364e-07, "loss": 0.7890632152557373, "step": 4687 }, { "epoch": 1.080184331797235, "grad_norm": 1.1475809434863895, "learning_rate": 9.504923485120634e-07, "loss": 0.8281408548355103, "step": 4688 }, { "epoch": 1.0804147465437788, "grad_norm": 0.9222741947208914, "learning_rate": 9.501118354688605e-07, "loss": 0.7878601551055908, "step": 4689 }, { "epoch": 1.0806451612903225, "grad_norm": 1.3827368592572105, "learning_rate": 9.497313296668582e-07, "loss": 0.8332592844963074, "step": 4690 }, { "epoch": 1.0808755760368665, "grad_norm": 1.0564274993228098, "learning_rate": 9.493508311612874e-07, "loss": 0.7680759429931641, "step": 4691 }, { "epoch": 1.0811059907834102, "grad_norm": 0.9446139934289677, "learning_rate": 9.489703400073762e-07, "loss": 0.6368690729141235, "step": 4692 }, { "epoch": 1.081336405529954, "grad_norm": 1.1588361552017052, "learning_rate": 9.485898562603525e-07, "loss": 0.7018477916717529, "step": 4693 }, { "epoch": 1.0815668202764976, "grad_norm": 1.057066552712669, "learning_rate": 9.482093799754432e-07, "loss": 0.8494987487792969, "step": 4694 }, { "epoch": 1.0817972350230414, "grad_norm": 1.0119994692546468, "learning_rate": 9.478289112078736e-07, "loss": 0.8146306276321411, "step": 4695 }, { "epoch": 1.0820276497695853, "grad_norm": 1.054771760893497, "learning_rate": 9.474484500128689e-07, "loss": 0.7832612991333008, "step": 4696 }, { "epoch": 1.082258064516129, "grad_norm": 1.0487197763357414, "learning_rate": 9.470679964456519e-07, "loss": 0.8569360971450806, "step": 4697 }, { "epoch": 1.0824884792626728, "grad_norm": 1.1432115985173055, "learning_rate": 9.466875505614449e-07, "loss": 0.8145112991333008, "step": 4698 }, { "epoch": 1.0827188940092165, "grad_norm": 1.0578814317560323, "learning_rate": 9.463071124154697e-07, "loss": 0.6632689237594604, "step": 4699 }, { "epoch": 1.0829493087557605, "grad_norm": 1.1233922356996344, "learning_rate": 9.459266820629461e-07, "loss": 0.6299769878387451, "step": 4700 }, { "epoch": 1.0831797235023042, "grad_norm": 1.0275349813599226, "learning_rate": 9.455462595590925e-07, "loss": 0.7722063064575195, "step": 4701 }, { "epoch": 1.083410138248848, "grad_norm": 1.2023285008908922, "learning_rate": 9.451658449591278e-07, "loss": 0.8219027519226074, "step": 4702 }, { "epoch": 1.0836405529953916, "grad_norm": 1.1618110682341312, "learning_rate": 9.44785438318268e-07, "loss": 0.9078400731086731, "step": 4703 }, { "epoch": 1.0838709677419356, "grad_norm": 1.087404948952653, "learning_rate": 9.444050396917286e-07, "loss": 0.8062041997909546, "step": 4704 }, { "epoch": 1.0841013824884793, "grad_norm": 0.9599318157385525, "learning_rate": 9.440246491347242e-07, "loss": 0.6379001140594482, "step": 4705 }, { "epoch": 1.084331797235023, "grad_norm": 1.179840039843376, "learning_rate": 9.436442667024679e-07, "loss": 0.919986367225647, "step": 4706 }, { "epoch": 1.0845622119815668, "grad_norm": 1.025427308273649, "learning_rate": 9.432638924501715e-07, "loss": 0.6534138917922974, "step": 4707 }, { "epoch": 1.0847926267281105, "grad_norm": 1.1537368190719173, "learning_rate": 9.428835264330462e-07, "loss": 0.8340045809745789, "step": 4708 }, { "epoch": 1.0850230414746544, "grad_norm": 1.2598648406656967, "learning_rate": 9.425031687063014e-07, "loss": 0.8347625732421875, "step": 4709 }, { "epoch": 1.0852534562211982, "grad_norm": 1.080310831214647, "learning_rate": 9.421228193251452e-07, "loss": 0.807063639163971, "step": 4710 }, { "epoch": 1.085483870967742, "grad_norm": 0.8480154931503633, "learning_rate": 9.417424783447855e-07, "loss": 0.7375985383987427, "step": 4711 }, { "epoch": 1.0857142857142856, "grad_norm": 0.9219258926876724, "learning_rate": 9.413621458204281e-07, "loss": 0.5723168849945068, "step": 4712 }, { "epoch": 1.0859447004608296, "grad_norm": 1.20469026899904, "learning_rate": 9.409818218072772e-07, "loss": 0.8272668123245239, "step": 4713 }, { "epoch": 1.0861751152073733, "grad_norm": 1.0744380351617728, "learning_rate": 9.406015063605368e-07, "loss": 0.6400803327560425, "step": 4714 }, { "epoch": 1.086405529953917, "grad_norm": 0.9959690478635643, "learning_rate": 9.402211995354095e-07, "loss": 0.6829795837402344, "step": 4715 }, { "epoch": 1.0866359447004608, "grad_norm": 1.0434747079590168, "learning_rate": 9.398409013870954e-07, "loss": 0.8509865999221802, "step": 4716 }, { "epoch": 1.0868663594470047, "grad_norm": 1.0730582514021882, "learning_rate": 9.394606119707954e-07, "loss": 0.895818829536438, "step": 4717 }, { "epoch": 1.0870967741935484, "grad_norm": 1.2584943519033869, "learning_rate": 9.390803313417072e-07, "loss": 0.8534268140792847, "step": 4718 }, { "epoch": 1.0873271889400922, "grad_norm": 1.0910485662903118, "learning_rate": 9.38700059555028e-07, "loss": 0.8603401184082031, "step": 4719 }, { "epoch": 1.087557603686636, "grad_norm": 1.1060380385520165, "learning_rate": 9.383197966659542e-07, "loss": 0.8810417652130127, "step": 4720 }, { "epoch": 1.0877880184331796, "grad_norm": 1.078874247367276, "learning_rate": 9.3793954272968e-07, "loss": 0.7144299149513245, "step": 4721 }, { "epoch": 1.0880184331797236, "grad_norm": 1.3140311568193026, "learning_rate": 9.375592978013994e-07, "loss": 0.8780069351196289, "step": 4722 }, { "epoch": 1.0882488479262673, "grad_norm": 1.1329108063995987, "learning_rate": 9.371790619363041e-07, "loss": 0.7976780533790588, "step": 4723 }, { "epoch": 1.088479262672811, "grad_norm": 1.0979402846559465, "learning_rate": 9.367988351895846e-07, "loss": 0.9183385372161865, "step": 4724 }, { "epoch": 1.0887096774193548, "grad_norm": 1.0551038276717553, "learning_rate": 9.364186176164306e-07, "loss": 0.7891188859939575, "step": 4725 }, { "epoch": 1.0889400921658987, "grad_norm": 0.9930223107211231, "learning_rate": 9.360384092720301e-07, "loss": 0.7586535215377808, "step": 4726 }, { "epoch": 1.0891705069124424, "grad_norm": 1.1542507976324667, "learning_rate": 9.356582102115696e-07, "loss": 0.7915316224098206, "step": 4727 }, { "epoch": 1.0894009216589862, "grad_norm": 0.901378484170352, "learning_rate": 9.352780204902349e-07, "loss": 0.6608257293701172, "step": 4728 }, { "epoch": 1.08963133640553, "grad_norm": 1.1982692712799377, "learning_rate": 9.3489784016321e-07, "loss": 0.8375273942947388, "step": 4729 }, { "epoch": 1.0898617511520738, "grad_norm": 1.43591815259741, "learning_rate": 9.345176692856768e-07, "loss": 0.7629055976867676, "step": 4730 }, { "epoch": 1.0900921658986176, "grad_norm": 1.3741081876453818, "learning_rate": 9.341375079128177e-07, "loss": 0.8037875890731812, "step": 4731 }, { "epoch": 1.0903225806451613, "grad_norm": 1.1252370555828741, "learning_rate": 9.337573560998123e-07, "loss": 0.8843437433242798, "step": 4732 }, { "epoch": 1.090552995391705, "grad_norm": 1.058447534132799, "learning_rate": 9.333772139018387e-07, "loss": 0.7164910435676575, "step": 4733 }, { "epoch": 1.0907834101382488, "grad_norm": 1.144703504042011, "learning_rate": 9.329970813740742e-07, "loss": 0.8076978921890259, "step": 4734 }, { "epoch": 1.0910138248847927, "grad_norm": 1.091507904535434, "learning_rate": 9.326169585716949e-07, "loss": 0.7265340089797974, "step": 4735 }, { "epoch": 1.0912442396313364, "grad_norm": 0.9010611551057135, "learning_rate": 9.322368455498747e-07, "loss": 0.7438681125640869, "step": 4736 }, { "epoch": 1.0914746543778802, "grad_norm": 1.455573835192626, "learning_rate": 9.318567423637868e-07, "loss": 0.8760604858398438, "step": 4737 }, { "epoch": 1.0917050691244239, "grad_norm": 1.064698472707054, "learning_rate": 9.314766490686026e-07, "loss": 0.7216911315917969, "step": 4738 }, { "epoch": 1.0919354838709678, "grad_norm": 1.207051606070953, "learning_rate": 9.310965657194916e-07, "loss": 0.8003707528114319, "step": 4739 }, { "epoch": 1.0921658986175116, "grad_norm": 0.9484074376515712, "learning_rate": 9.307164923716233e-07, "loss": 0.6496548652648926, "step": 4740 }, { "epoch": 1.0923963133640553, "grad_norm": 1.0304975730869472, "learning_rate": 9.303364290801644e-07, "loss": 0.7659108638763428, "step": 4741 }, { "epoch": 1.092626728110599, "grad_norm": 1.016478094690519, "learning_rate": 9.299563759002802e-07, "loss": 0.7799512147903442, "step": 4742 }, { "epoch": 1.092857142857143, "grad_norm": 0.9921566283768914, "learning_rate": 9.295763328871357e-07, "loss": 0.7675691246986389, "step": 4743 }, { "epoch": 1.0930875576036867, "grad_norm": 1.0513054078420998, "learning_rate": 9.291963000958931e-07, "loss": 0.677080512046814, "step": 4744 }, { "epoch": 1.0933179723502304, "grad_norm": 1.0842277521538888, "learning_rate": 9.28816277581714e-07, "loss": 0.7885928153991699, "step": 4745 }, { "epoch": 1.0935483870967742, "grad_norm": 1.07543209238493, "learning_rate": 9.28436265399758e-07, "loss": 0.6568010449409485, "step": 4746 }, { "epoch": 1.0937788018433179, "grad_norm": 1.076830779801181, "learning_rate": 9.280562636051827e-07, "loss": 0.9438225030899048, "step": 4747 }, { "epoch": 1.0940092165898618, "grad_norm": 1.0420094595322553, "learning_rate": 9.276762722531461e-07, "loss": 0.8119498491287231, "step": 4748 }, { "epoch": 1.0942396313364056, "grad_norm": 0.8228863679585698, "learning_rate": 9.272962913988029e-07, "loss": 0.7570452690124512, "step": 4749 }, { "epoch": 1.0944700460829493, "grad_norm": 1.0990726312613297, "learning_rate": 9.269163210973063e-07, "loss": 0.7541190385818481, "step": 4750 }, { "epoch": 1.094700460829493, "grad_norm": 1.015570437282189, "learning_rate": 9.265363614038093e-07, "loss": 0.6481921672821045, "step": 4751 }, { "epoch": 1.094930875576037, "grad_norm": 1.1173263478947815, "learning_rate": 9.261564123734623e-07, "loss": 0.7997267246246338, "step": 4752 }, { "epoch": 1.0951612903225807, "grad_norm": 1.4388540160892265, "learning_rate": 9.25776474061414e-07, "loss": 0.9093008637428284, "step": 4753 }, { "epoch": 1.0953917050691244, "grad_norm": 1.3909093606880625, "learning_rate": 9.253965465228122e-07, "loss": 0.7609673142433167, "step": 4754 }, { "epoch": 1.0956221198156681, "grad_norm": 1.311027419629587, "learning_rate": 9.250166298128032e-07, "loss": 0.8338878154754639, "step": 4755 }, { "epoch": 1.095852534562212, "grad_norm": 1.1912490488387477, "learning_rate": 9.246367239865308e-07, "loss": 0.7503781318664551, "step": 4756 }, { "epoch": 1.0960829493087558, "grad_norm": 1.0417471668794835, "learning_rate": 9.242568290991384e-07, "loss": 0.7630816698074341, "step": 4757 }, { "epoch": 1.0963133640552996, "grad_norm": 1.4287601409586015, "learning_rate": 9.238769452057671e-07, "loss": 0.8026378154754639, "step": 4758 }, { "epoch": 1.0965437788018433, "grad_norm": 1.0309152969100308, "learning_rate": 9.234970723615558e-07, "loss": 0.8256090879440308, "step": 4759 }, { "epoch": 1.096774193548387, "grad_norm": 1.1197681925892131, "learning_rate": 9.231172106216437e-07, "loss": 0.7331836223602295, "step": 4760 }, { "epoch": 1.097004608294931, "grad_norm": 1.1300301361381715, "learning_rate": 9.227373600411667e-07, "loss": 0.886203944683075, "step": 4761 }, { "epoch": 1.0972350230414747, "grad_norm": 1.113695044174903, "learning_rate": 9.223575206752592e-07, "loss": 0.7802814245223999, "step": 4762 }, { "epoch": 1.0974654377880184, "grad_norm": 1.3075634566953063, "learning_rate": 9.219776925790552e-07, "loss": 0.9682798385620117, "step": 4763 }, { "epoch": 1.0976958525345621, "grad_norm": 1.1689607681364365, "learning_rate": 9.215978758076858e-07, "loss": 0.8733793497085571, "step": 4764 }, { "epoch": 1.097926267281106, "grad_norm": 1.0890238577837303, "learning_rate": 9.212180704162809e-07, "loss": 0.8403818607330322, "step": 4765 }, { "epoch": 1.0981566820276498, "grad_norm": 1.0898706001284595, "learning_rate": 9.208382764599688e-07, "loss": 0.7957059144973755, "step": 4766 }, { "epoch": 1.0983870967741935, "grad_norm": 1.290224136897281, "learning_rate": 9.204584939938761e-07, "loss": 0.8943477272987366, "step": 4767 }, { "epoch": 1.0986175115207373, "grad_norm": 1.0710230295284595, "learning_rate": 9.200787230731273e-07, "loss": 0.7084406018257141, "step": 4768 }, { "epoch": 1.098847926267281, "grad_norm": 1.190836398847277, "learning_rate": 9.196989637528465e-07, "loss": 0.8374637365341187, "step": 4769 }, { "epoch": 1.099078341013825, "grad_norm": 1.3757022429132086, "learning_rate": 9.193192160881543e-07, "loss": 0.6963578462600708, "step": 4770 }, { "epoch": 1.0993087557603687, "grad_norm": 0.9887346096468936, "learning_rate": 9.189394801341716e-07, "loss": 0.6732540130615234, "step": 4771 }, { "epoch": 1.0995391705069124, "grad_norm": 1.092710990198668, "learning_rate": 9.185597559460159e-07, "loss": 0.7104849219322205, "step": 4772 }, { "epoch": 1.0997695852534561, "grad_norm": 1.3885045688613133, "learning_rate": 9.181800435788037e-07, "loss": 0.8461153507232666, "step": 4773 }, { "epoch": 1.1, "grad_norm": 1.0447899457724443, "learning_rate": 9.178003430876502e-07, "loss": 0.7120847105979919, "step": 4774 }, { "epoch": 1.1002304147465438, "grad_norm": 1.0881207229188647, "learning_rate": 9.174206545276677e-07, "loss": 0.8108617067337036, "step": 4775 }, { "epoch": 1.1004608294930875, "grad_norm": 0.9153115264713604, "learning_rate": 9.170409779539678e-07, "loss": 0.7019558548927307, "step": 4776 }, { "epoch": 1.1006912442396313, "grad_norm": 0.9272452690627847, "learning_rate": 9.166613134216605e-07, "loss": 0.7563629150390625, "step": 4777 }, { "epoch": 1.100921658986175, "grad_norm": 0.9795708897837844, "learning_rate": 9.162816609858533e-07, "loss": 0.777009129524231, "step": 4778 }, { "epoch": 1.101152073732719, "grad_norm": 1.143317572483065, "learning_rate": 9.159020207016516e-07, "loss": 0.812334418296814, "step": 4779 }, { "epoch": 1.1013824884792627, "grad_norm": 0.8685579046345627, "learning_rate": 9.155223926241608e-07, "loss": 0.609114408493042, "step": 4780 }, { "epoch": 1.1016129032258064, "grad_norm": 1.1689773804888128, "learning_rate": 9.151427768084828e-07, "loss": 0.8277549147605896, "step": 4781 }, { "epoch": 1.1018433179723501, "grad_norm": 1.2556834532396843, "learning_rate": 9.147631733097179e-07, "loss": 0.8649400472640991, "step": 4782 }, { "epoch": 1.102073732718894, "grad_norm": 0.8878271909604711, "learning_rate": 9.14383582182966e-07, "loss": 0.7894293665885925, "step": 4783 }, { "epoch": 1.1023041474654378, "grad_norm": 1.3844953995401048, "learning_rate": 9.14004003483324e-07, "loss": 0.9121778011322021, "step": 4784 }, { "epoch": 1.1025345622119815, "grad_norm": 1.0899535734318635, "learning_rate": 9.136244372658867e-07, "loss": 0.7162299156188965, "step": 4785 }, { "epoch": 1.1027649769585253, "grad_norm": 1.1193596859001855, "learning_rate": 9.132448835857482e-07, "loss": 0.7059808969497681, "step": 4786 }, { "epoch": 1.1029953917050692, "grad_norm": 1.2034226051758443, "learning_rate": 9.128653424979999e-07, "loss": 0.8172405958175659, "step": 4787 }, { "epoch": 1.103225806451613, "grad_norm": 0.876114016677297, "learning_rate": 9.124858140577316e-07, "loss": 0.7672706842422485, "step": 4788 }, { "epoch": 1.1034562211981567, "grad_norm": 1.2578760464526295, "learning_rate": 9.121062983200318e-07, "loss": 0.7054900527000427, "step": 4789 }, { "epoch": 1.1036866359447004, "grad_norm": 1.0063162295686867, "learning_rate": 9.117267953399865e-07, "loss": 0.888538122177124, "step": 4790 }, { "epoch": 1.1039170506912441, "grad_norm": 1.1758406583219614, "learning_rate": 9.113473051726796e-07, "loss": 0.7918668985366821, "step": 4791 }, { "epoch": 1.104147465437788, "grad_norm": 1.220328177578168, "learning_rate": 9.109678278731942e-07, "loss": 0.7385697960853577, "step": 4792 }, { "epoch": 1.1043778801843318, "grad_norm": 1.0627777124669568, "learning_rate": 9.105883634966107e-07, "loss": 0.6394056081771851, "step": 4793 }, { "epoch": 1.1046082949308755, "grad_norm": 1.2147960582385422, "learning_rate": 9.102089120980081e-07, "loss": 0.8372077941894531, "step": 4794 }, { "epoch": 1.1048387096774193, "grad_norm": 1.0764884273918471, "learning_rate": 9.098294737324628e-07, "loss": 0.6944066286087036, "step": 4795 }, { "epoch": 1.1050691244239632, "grad_norm": 1.3210680270500303, "learning_rate": 9.0945004845505e-07, "loss": 0.8480994701385498, "step": 4796 }, { "epoch": 1.105299539170507, "grad_norm": 1.3778825395187644, "learning_rate": 9.090706363208431e-07, "loss": 0.837437629699707, "step": 4797 }, { "epoch": 1.1055299539170507, "grad_norm": 1.2126670676110476, "learning_rate": 9.086912373849128e-07, "loss": 0.8610002398490906, "step": 4798 }, { "epoch": 1.1057603686635944, "grad_norm": 1.1204211704902753, "learning_rate": 9.083118517023281e-07, "loss": 0.7323784828186035, "step": 4799 }, { "epoch": 1.1059907834101383, "grad_norm": 1.394483021595883, "learning_rate": 9.079324793281573e-07, "loss": 0.7838932871818542, "step": 4800 }, { "epoch": 1.106221198156682, "grad_norm": 1.1333807320340106, "learning_rate": 9.075531203174651e-07, "loss": 0.7655705213546753, "step": 4801 }, { "epoch": 1.1064516129032258, "grad_norm": 1.199812107745982, "learning_rate": 9.071737747253148e-07, "loss": 0.8320151567459106, "step": 4802 }, { "epoch": 1.1066820276497695, "grad_norm": 1.0428789095876687, "learning_rate": 9.067944426067687e-07, "loss": 0.7434612512588501, "step": 4803 }, { "epoch": 1.1069124423963133, "grad_norm": 1.348302596081637, "learning_rate": 9.064151240168857e-07, "loss": 0.8351321220397949, "step": 4804 }, { "epoch": 1.1071428571428572, "grad_norm": 0.9731377071478325, "learning_rate": 9.060358190107233e-07, "loss": 0.6648053526878357, "step": 4805 }, { "epoch": 1.107373271889401, "grad_norm": 1.236779616553706, "learning_rate": 9.056565276433377e-07, "loss": 0.7507585287094116, "step": 4806 }, { "epoch": 1.1076036866359447, "grad_norm": 1.0866303306873377, "learning_rate": 9.052772499697823e-07, "loss": 0.7638635635375977, "step": 4807 }, { "epoch": 1.1078341013824884, "grad_norm": 1.3204341922490346, "learning_rate": 9.048979860451081e-07, "loss": 0.8066626191139221, "step": 4808 }, { "epoch": 1.1080645161290323, "grad_norm": 0.9459322006964221, "learning_rate": 9.045187359243659e-07, "loss": 0.7090466022491455, "step": 4809 }, { "epoch": 1.108294930875576, "grad_norm": 1.1112578831827626, "learning_rate": 9.041394996626027e-07, "loss": 0.7071142792701721, "step": 4810 }, { "epoch": 1.1085253456221198, "grad_norm": 1.0134445673972028, "learning_rate": 9.037602773148638e-07, "loss": 0.7103942036628723, "step": 4811 }, { "epoch": 1.1087557603686635, "grad_norm": 1.1348721368793189, "learning_rate": 9.033810689361936e-07, "loss": 0.8408492207527161, "step": 4812 }, { "epoch": 1.1089861751152075, "grad_norm": 0.9439878571651674, "learning_rate": 9.030018745816335e-07, "loss": 0.7621495723724365, "step": 4813 }, { "epoch": 1.1092165898617512, "grad_norm": 1.152461687801826, "learning_rate": 9.026226943062225e-07, "loss": 0.7105196714401245, "step": 4814 }, { "epoch": 1.109447004608295, "grad_norm": 1.079152769158689, "learning_rate": 9.022435281649986e-07, "loss": 0.8733636140823364, "step": 4815 }, { "epoch": 1.1096774193548387, "grad_norm": 1.223534472251507, "learning_rate": 9.018643762129974e-07, "loss": 0.9097845554351807, "step": 4816 }, { "epoch": 1.1099078341013824, "grad_norm": 1.2220607424054495, "learning_rate": 9.014852385052519e-07, "loss": 0.8743059635162354, "step": 4817 }, { "epoch": 1.1101382488479263, "grad_norm": 1.0404677289419784, "learning_rate": 9.011061150967937e-07, "loss": 0.7898736000061035, "step": 4818 }, { "epoch": 1.11036866359447, "grad_norm": 1.1698125073586854, "learning_rate": 9.007270060426516e-07, "loss": 0.871254563331604, "step": 4819 }, { "epoch": 1.1105990783410138, "grad_norm": 1.323286168379092, "learning_rate": 9.003479113978536e-07, "loss": 0.6833579540252686, "step": 4820 }, { "epoch": 1.1108294930875575, "grad_norm": 1.285642784687423, "learning_rate": 8.999688312174243e-07, "loss": 0.8289071321487427, "step": 4821 }, { "epoch": 1.1110599078341015, "grad_norm": 1.1884737282905606, "learning_rate": 8.995897655563864e-07, "loss": 0.6798583269119263, "step": 4822 }, { "epoch": 1.1112903225806452, "grad_norm": 1.1108358813410262, "learning_rate": 8.992107144697614e-07, "loss": 0.6518250703811646, "step": 4823 }, { "epoch": 1.111520737327189, "grad_norm": 1.3596600109698966, "learning_rate": 8.988316780125679e-07, "loss": 0.9316667318344116, "step": 4824 }, { "epoch": 1.1117511520737327, "grad_norm": 0.9951654747842746, "learning_rate": 8.98452656239822e-07, "loss": 0.755483865737915, "step": 4825 }, { "epoch": 1.1119815668202766, "grad_norm": 1.0146600815927005, "learning_rate": 8.980736492065391e-07, "loss": 0.7892755270004272, "step": 4826 }, { "epoch": 1.1122119815668203, "grad_norm": 0.9930161298314518, "learning_rate": 8.976946569677308e-07, "loss": 0.703255295753479, "step": 4827 }, { "epoch": 1.112442396313364, "grad_norm": 1.1559327578235137, "learning_rate": 8.973156795784073e-07, "loss": 0.7885171175003052, "step": 4828 }, { "epoch": 1.1126728110599078, "grad_norm": 1.1407519814570228, "learning_rate": 8.969367170935776e-07, "loss": 0.8035199642181396, "step": 4829 }, { "epoch": 1.1129032258064515, "grad_norm": 1.0245821351407076, "learning_rate": 8.965577695682467e-07, "loss": 0.8272112607955933, "step": 4830 }, { "epoch": 1.1131336405529955, "grad_norm": 1.1104598721433627, "learning_rate": 8.961788370574182e-07, "loss": 0.8734478950500488, "step": 4831 }, { "epoch": 1.1133640552995392, "grad_norm": 1.2722110058519596, "learning_rate": 8.957999196160946e-07, "loss": 0.7487469911575317, "step": 4832 }, { "epoch": 1.113594470046083, "grad_norm": 1.3783344397611896, "learning_rate": 8.954210172992748e-07, "loss": 0.9193693399429321, "step": 4833 }, { "epoch": 1.1138248847926266, "grad_norm": 1.4522583636726432, "learning_rate": 8.950421301619555e-07, "loss": 0.8228428959846497, "step": 4834 }, { "epoch": 1.1140552995391706, "grad_norm": 0.9646412535671615, "learning_rate": 8.946632582591324e-07, "loss": 0.7419015169143677, "step": 4835 }, { "epoch": 1.1142857142857143, "grad_norm": 1.1957500872812925, "learning_rate": 8.942844016457975e-07, "loss": 0.827411949634552, "step": 4836 }, { "epoch": 1.114516129032258, "grad_norm": 0.9975223373000859, "learning_rate": 8.93905560376942e-07, "loss": 0.7066754102706909, "step": 4837 }, { "epoch": 1.1147465437788018, "grad_norm": 1.2336329306802043, "learning_rate": 8.93526734507554e-07, "loss": 0.7201621532440186, "step": 4838 }, { "epoch": 1.1149769585253457, "grad_norm": 0.8521980282185057, "learning_rate": 8.931479240926196e-07, "loss": 0.6363521814346313, "step": 4839 }, { "epoch": 1.1152073732718895, "grad_norm": 1.0065898101647581, "learning_rate": 8.927691291871223e-07, "loss": 0.8232909440994263, "step": 4840 }, { "epoch": 1.1154377880184332, "grad_norm": 1.0354249430711853, "learning_rate": 8.923903498460441e-07, "loss": 0.7006033658981323, "step": 4841 }, { "epoch": 1.115668202764977, "grad_norm": 1.1957171429651339, "learning_rate": 8.920115861243638e-07, "loss": 0.6982721090316772, "step": 4842 }, { "epoch": 1.1158986175115206, "grad_norm": 1.039109039901578, "learning_rate": 8.916328380770593e-07, "loss": 0.7735922336578369, "step": 4843 }, { "epoch": 1.1161290322580646, "grad_norm": 1.189307260310029, "learning_rate": 8.912541057591049e-07, "loss": 0.7430423498153687, "step": 4844 }, { "epoch": 1.1163594470046083, "grad_norm": 1.0189703427385546, "learning_rate": 8.908753892254729e-07, "loss": 0.7783932685852051, "step": 4845 }, { "epoch": 1.116589861751152, "grad_norm": 0.895546986970967, "learning_rate": 8.904966885311339e-07, "loss": 0.726211428642273, "step": 4846 }, { "epoch": 1.1168202764976958, "grad_norm": 1.0042101088511581, "learning_rate": 8.901180037310555e-07, "loss": 0.664351761341095, "step": 4847 }, { "epoch": 1.1170506912442397, "grad_norm": 1.192545271664204, "learning_rate": 8.897393348802031e-07, "loss": 0.8246554136276245, "step": 4848 }, { "epoch": 1.1172811059907835, "grad_norm": 1.3113785088290244, "learning_rate": 8.893606820335405e-07, "loss": 0.9435447454452515, "step": 4849 }, { "epoch": 1.1175115207373272, "grad_norm": 1.1196400925650334, "learning_rate": 8.889820452460286e-07, "loss": 0.8471171855926514, "step": 4850 }, { "epoch": 1.117741935483871, "grad_norm": 0.9950597161448561, "learning_rate": 8.886034245726254e-07, "loss": 0.6038233041763306, "step": 4851 }, { "epoch": 1.1179723502304149, "grad_norm": 1.1171540360532777, "learning_rate": 8.882248200682881e-07, "loss": 0.8186997771263123, "step": 4852 }, { "epoch": 1.1182027649769586, "grad_norm": 1.2436642718372632, "learning_rate": 8.878462317879702e-07, "loss": 0.789948582649231, "step": 4853 }, { "epoch": 1.1184331797235023, "grad_norm": 1.0789321556804603, "learning_rate": 8.87467659786623e-07, "loss": 0.7543652057647705, "step": 4854 }, { "epoch": 1.118663594470046, "grad_norm": 1.0717127208024606, "learning_rate": 8.870891041191963e-07, "loss": 0.5985269546508789, "step": 4855 }, { "epoch": 1.1188940092165898, "grad_norm": 1.109115113465042, "learning_rate": 8.867105648406364e-07, "loss": 0.7676643133163452, "step": 4856 }, { "epoch": 1.1191244239631337, "grad_norm": 1.0078052507528568, "learning_rate": 8.863320420058881e-07, "loss": 0.7317303419113159, "step": 4857 }, { "epoch": 1.1193548387096774, "grad_norm": 1.117240479042085, "learning_rate": 8.859535356698936e-07, "loss": 0.8357843160629272, "step": 4858 }, { "epoch": 1.1195852534562212, "grad_norm": 1.2827717071860176, "learning_rate": 8.855750458875923e-07, "loss": 0.7149945497512817, "step": 4859 }, { "epoch": 1.119815668202765, "grad_norm": 1.1258754685876486, "learning_rate": 8.851965727139214e-07, "loss": 0.7059169411659241, "step": 4860 }, { "epoch": 1.1200460829493089, "grad_norm": 1.0779991100813224, "learning_rate": 8.848181162038163e-07, "loss": 0.7530190944671631, "step": 4861 }, { "epoch": 1.1202764976958526, "grad_norm": 1.12578616970897, "learning_rate": 8.844396764122092e-07, "loss": 0.808814287185669, "step": 4862 }, { "epoch": 1.1205069124423963, "grad_norm": 1.174668121226261, "learning_rate": 8.840612533940295e-07, "loss": 0.7205604910850525, "step": 4863 }, { "epoch": 1.12073732718894, "grad_norm": 1.0284636891818573, "learning_rate": 8.83682847204206e-07, "loss": 0.7493274211883545, "step": 4864 }, { "epoch": 1.120967741935484, "grad_norm": 1.1974475439930412, "learning_rate": 8.833044578976631e-07, "loss": 0.8115849494934082, "step": 4865 }, { "epoch": 1.1211981566820277, "grad_norm": 1.2224514970634248, "learning_rate": 8.829260855293237e-07, "loss": 0.8188419342041016, "step": 4866 }, { "epoch": 1.1214285714285714, "grad_norm": 1.372584236180193, "learning_rate": 8.82547730154108e-07, "loss": 0.6152349710464478, "step": 4867 }, { "epoch": 1.1216589861751152, "grad_norm": 0.9364210771252817, "learning_rate": 8.821693918269333e-07, "loss": 0.7629969120025635, "step": 4868 }, { "epoch": 1.121889400921659, "grad_norm": 1.0637191210851928, "learning_rate": 8.81791070602716e-07, "loss": 0.7063733339309692, "step": 4869 }, { "epoch": 1.1221198156682028, "grad_norm": 1.2221996591019166, "learning_rate": 8.814127665363682e-07, "loss": 0.729676365852356, "step": 4870 }, { "epoch": 1.1223502304147466, "grad_norm": 1.2363948838699006, "learning_rate": 8.810344796827999e-07, "loss": 0.8188877105712891, "step": 4871 }, { "epoch": 1.1225806451612903, "grad_norm": 1.4364824515163135, "learning_rate": 8.806562100969199e-07, "loss": 0.70793217420578, "step": 4872 }, { "epoch": 1.122811059907834, "grad_norm": 1.2471671753090219, "learning_rate": 8.802779578336329e-07, "loss": 0.8086484670639038, "step": 4873 }, { "epoch": 1.123041474654378, "grad_norm": 1.209058465827679, "learning_rate": 8.798997229478417e-07, "loss": 0.8954081535339355, "step": 4874 }, { "epoch": 1.1232718894009217, "grad_norm": 1.0352094557860352, "learning_rate": 8.795215054944469e-07, "loss": 0.6615205407142639, "step": 4875 }, { "epoch": 1.1235023041474654, "grad_norm": 1.3182700744777898, "learning_rate": 8.79143305528346e-07, "loss": 0.6851116418838501, "step": 4876 }, { "epoch": 1.1237327188940092, "grad_norm": 0.9311237252586447, "learning_rate": 8.787651231044342e-07, "loss": 0.7594672441482544, "step": 4877 }, { "epoch": 1.123963133640553, "grad_norm": 1.2505187148095604, "learning_rate": 8.783869582776044e-07, "loss": 0.7170572280883789, "step": 4878 }, { "epoch": 1.1241935483870968, "grad_norm": 1.1244851690255748, "learning_rate": 8.780088111027467e-07, "loss": 0.9139137864112854, "step": 4879 }, { "epoch": 1.1244239631336406, "grad_norm": 1.2468380143920514, "learning_rate": 8.776306816347482e-07, "loss": 0.8716791868209839, "step": 4880 }, { "epoch": 1.1246543778801843, "grad_norm": 1.5043743610246187, "learning_rate": 8.772525699284946e-07, "loss": 0.840330958366394, "step": 4881 }, { "epoch": 1.124884792626728, "grad_norm": 1.28802116274467, "learning_rate": 8.768744760388681e-07, "loss": 0.7713445425033569, "step": 4882 }, { "epoch": 1.125115207373272, "grad_norm": 1.2058132743835892, "learning_rate": 8.764964000207479e-07, "loss": 0.8964767456054688, "step": 4883 }, { "epoch": 1.1253456221198157, "grad_norm": 1.12361515551762, "learning_rate": 8.761183419290121e-07, "loss": 0.8038421869277954, "step": 4884 }, { "epoch": 1.1255760368663594, "grad_norm": 0.7722654284456119, "learning_rate": 8.757403018185351e-07, "loss": 0.6601011753082275, "step": 4885 }, { "epoch": 1.1258064516129032, "grad_norm": 0.8011265369746955, "learning_rate": 8.753622797441885e-07, "loss": 0.8226664066314697, "step": 4886 }, { "epoch": 1.1260368663594469, "grad_norm": 1.0633366554284305, "learning_rate": 8.749842757608422e-07, "loss": 0.7062248587608337, "step": 4887 }, { "epoch": 1.1262672811059908, "grad_norm": 1.318395948514478, "learning_rate": 8.746062899233628e-07, "loss": 0.8642051815986633, "step": 4888 }, { "epoch": 1.1264976958525346, "grad_norm": 1.2332349128972684, "learning_rate": 8.74228322286614e-07, "loss": 0.8194048404693604, "step": 4889 }, { "epoch": 1.1267281105990783, "grad_norm": 1.121678775220638, "learning_rate": 8.738503729054583e-07, "loss": 0.6957820653915405, "step": 4890 }, { "epoch": 1.1269585253456222, "grad_norm": 0.9775692035561586, "learning_rate": 8.734724418347537e-07, "loss": 0.8107770681381226, "step": 4891 }, { "epoch": 1.127188940092166, "grad_norm": 1.1508754542191086, "learning_rate": 8.730945291293563e-07, "loss": 0.7727551460266113, "step": 4892 }, { "epoch": 1.1274193548387097, "grad_norm": 1.1347047929449647, "learning_rate": 8.727166348441207e-07, "loss": 0.7389936447143555, "step": 4893 }, { "epoch": 1.1276497695852534, "grad_norm": 1.2733389095695957, "learning_rate": 8.723387590338964e-07, "loss": 0.7666463851928711, "step": 4894 }, { "epoch": 1.1278801843317972, "grad_norm": 1.1990629153183452, "learning_rate": 8.719609017535328e-07, "loss": 0.7795453071594238, "step": 4895 }, { "epoch": 1.128110599078341, "grad_norm": 1.1062968437903737, "learning_rate": 8.715830630578746e-07, "loss": 0.8560752272605896, "step": 4896 }, { "epoch": 1.1283410138248848, "grad_norm": 1.2251043883259816, "learning_rate": 8.712052430017645e-07, "loss": 0.7574455738067627, "step": 4897 }, { "epoch": 1.1285714285714286, "grad_norm": 1.3025894471719623, "learning_rate": 8.708274416400432e-07, "loss": 0.8017276525497437, "step": 4898 }, { "epoch": 1.1288018433179723, "grad_norm": 0.9942840399227726, "learning_rate": 8.704496590275477e-07, "loss": 0.7046157121658325, "step": 4899 }, { "epoch": 1.129032258064516, "grad_norm": 1.187705347283351, "learning_rate": 8.700718952191124e-07, "loss": 0.7352035641670227, "step": 4900 }, { "epoch": 1.12926267281106, "grad_norm": 0.9471130432852718, "learning_rate": 8.696941502695698e-07, "loss": 0.6444690227508545, "step": 4901 }, { "epoch": 1.1294930875576037, "grad_norm": 1.0628821586759927, "learning_rate": 8.69316424233749e-07, "loss": 0.7909440994262695, "step": 4902 }, { "epoch": 1.1297235023041474, "grad_norm": 0.9483928902743061, "learning_rate": 8.689387171664756e-07, "loss": 0.646790087223053, "step": 4903 }, { "epoch": 1.1299539170506911, "grad_norm": 1.2796319408131067, "learning_rate": 8.685610291225744e-07, "loss": 0.786831796169281, "step": 4904 }, { "epoch": 1.130184331797235, "grad_norm": 1.143272972798168, "learning_rate": 8.681833601568657e-07, "loss": 0.8004348278045654, "step": 4905 }, { "epoch": 1.1304147465437788, "grad_norm": 0.996600703731369, "learning_rate": 8.678057103241677e-07, "loss": 0.6846532821655273, "step": 4906 }, { "epoch": 1.1306451612903226, "grad_norm": 1.299426572962062, "learning_rate": 8.67428079679296e-07, "loss": 0.7555707693099976, "step": 4907 }, { "epoch": 1.1308755760368663, "grad_norm": 1.3809719247833205, "learning_rate": 8.67050468277063e-07, "loss": 0.852725625038147, "step": 4908 }, { "epoch": 1.1311059907834102, "grad_norm": 0.9844151846464619, "learning_rate": 8.666728761722782e-07, "loss": 0.6990044713020325, "step": 4909 }, { "epoch": 1.131336405529954, "grad_norm": 1.223366973696945, "learning_rate": 8.662953034197493e-07, "loss": 0.8050999641418457, "step": 4910 }, { "epoch": 1.1315668202764977, "grad_norm": 1.3085197840977536, "learning_rate": 8.659177500742802e-07, "loss": 0.8169291019439697, "step": 4911 }, { "epoch": 1.1317972350230414, "grad_norm": 1.081294035300873, "learning_rate": 8.655402161906716e-07, "loss": 0.7814679145812988, "step": 4912 }, { "epoch": 1.1320276497695851, "grad_norm": 1.237970773045493, "learning_rate": 8.651627018237231e-07, "loss": 0.6734834313392639, "step": 4913 }, { "epoch": 1.132258064516129, "grad_norm": 1.1143770605215586, "learning_rate": 8.647852070282299e-07, "loss": 0.8765416145324707, "step": 4914 }, { "epoch": 1.1324884792626728, "grad_norm": 1.3797966848789986, "learning_rate": 8.644077318589847e-07, "loss": 1.0023764371871948, "step": 4915 }, { "epoch": 1.1327188940092165, "grad_norm": 1.0387287080137257, "learning_rate": 8.64030276370778e-07, "loss": 0.7561393976211548, "step": 4916 }, { "epoch": 1.1329493087557603, "grad_norm": 1.123376400728965, "learning_rate": 8.636528406183961e-07, "loss": 0.8252062797546387, "step": 4917 }, { "epoch": 1.1331797235023042, "grad_norm": 1.3939443114820729, "learning_rate": 8.632754246566246e-07, "loss": 0.7598097324371338, "step": 4918 }, { "epoch": 1.133410138248848, "grad_norm": 0.8823184534346743, "learning_rate": 8.628980285402438e-07, "loss": 0.6113640069961548, "step": 4919 }, { "epoch": 1.1336405529953917, "grad_norm": 1.096652563873467, "learning_rate": 8.625206523240325e-07, "loss": 0.7457853555679321, "step": 4920 }, { "epoch": 1.1338709677419354, "grad_norm": 1.0304826450193199, "learning_rate": 8.62143296062767e-07, "loss": 0.7334161996841431, "step": 4921 }, { "epoch": 1.1341013824884794, "grad_norm": 1.1383631487720753, "learning_rate": 8.617659598112195e-07, "loss": 0.7446962594985962, "step": 4922 }, { "epoch": 1.134331797235023, "grad_norm": 0.9360514056176105, "learning_rate": 8.613886436241594e-07, "loss": 0.7074497938156128, "step": 4923 }, { "epoch": 1.1345622119815668, "grad_norm": 0.9945384740922374, "learning_rate": 8.610113475563547e-07, "loss": 0.6728851795196533, "step": 4924 }, { "epoch": 1.1347926267281105, "grad_norm": 1.0533766436674836, "learning_rate": 8.606340716625689e-07, "loss": 0.7732793092727661, "step": 4925 }, { "epoch": 1.1350230414746543, "grad_norm": 1.2301857240081557, "learning_rate": 8.60256815997563e-07, "loss": 0.7514671683311462, "step": 4926 }, { "epoch": 1.1352534562211982, "grad_norm": 1.2507291163181513, "learning_rate": 8.598795806160952e-07, "loss": 0.7824795842170715, "step": 4927 }, { "epoch": 1.135483870967742, "grad_norm": 1.1585997268920079, "learning_rate": 8.59502365572921e-07, "loss": 0.789236307144165, "step": 4928 }, { "epoch": 1.1357142857142857, "grad_norm": 1.1796078109098491, "learning_rate": 8.591251709227919e-07, "loss": 0.7005175948143005, "step": 4929 }, { "epoch": 1.1359447004608294, "grad_norm": 1.2299124062921447, "learning_rate": 8.587479967204582e-07, "loss": 0.7851300239562988, "step": 4930 }, { "epoch": 1.1361751152073734, "grad_norm": 1.5129438725714193, "learning_rate": 8.583708430206658e-07, "loss": 0.8901405334472656, "step": 4931 }, { "epoch": 1.136405529953917, "grad_norm": 1.1049343524856345, "learning_rate": 8.579937098781576e-07, "loss": 0.8118528127670288, "step": 4932 }, { "epoch": 1.1366359447004608, "grad_norm": 1.0631974751851168, "learning_rate": 8.57616597347675e-07, "loss": 0.6500028371810913, "step": 4933 }, { "epoch": 1.1368663594470045, "grad_norm": 1.057066415615051, "learning_rate": 8.572395054839547e-07, "loss": 0.7752922773361206, "step": 4934 }, { "epoch": 1.1370967741935485, "grad_norm": 1.124364781444334, "learning_rate": 8.568624343417309e-07, "loss": 0.7346245050430298, "step": 4935 }, { "epoch": 1.1373271889400922, "grad_norm": 1.4547001781507483, "learning_rate": 8.564853839757356e-07, "loss": 0.9249104261398315, "step": 4936 }, { "epoch": 1.137557603686636, "grad_norm": 1.0350864816884677, "learning_rate": 8.561083544406965e-07, "loss": 0.7407078742980957, "step": 4937 }, { "epoch": 1.1377880184331797, "grad_norm": 1.197156559440129, "learning_rate": 8.557313457913393e-07, "loss": 0.7615865468978882, "step": 4938 }, { "epoch": 1.1380184331797234, "grad_norm": 1.2125718427071739, "learning_rate": 8.553543580823866e-07, "loss": 0.757561445236206, "step": 4939 }, { "epoch": 1.1382488479262673, "grad_norm": 1.1468001082336654, "learning_rate": 8.549773913685572e-07, "loss": 0.7130411863327026, "step": 4940 }, { "epoch": 1.138479262672811, "grad_norm": 1.1282357144069963, "learning_rate": 8.54600445704567e-07, "loss": 0.7507551312446594, "step": 4941 }, { "epoch": 1.1387096774193548, "grad_norm": 1.0556143227749322, "learning_rate": 8.542235211451301e-07, "loss": 0.896443247795105, "step": 4942 }, { "epoch": 1.1389400921658985, "grad_norm": 1.145222677509159, "learning_rate": 8.538466177449557e-07, "loss": 0.7530815601348877, "step": 4943 }, { "epoch": 1.1391705069124425, "grad_norm": 1.2481258172783056, "learning_rate": 8.534697355587517e-07, "loss": 0.8730431795120239, "step": 4944 }, { "epoch": 1.1394009216589862, "grad_norm": 1.3010516024158107, "learning_rate": 8.530928746412216e-07, "loss": 0.6452720165252686, "step": 4945 }, { "epoch": 1.13963133640553, "grad_norm": 1.1712957128451178, "learning_rate": 8.527160350470661e-07, "loss": 0.7679018974304199, "step": 4946 }, { "epoch": 1.1398617511520737, "grad_norm": 1.402874429077297, "learning_rate": 8.523392168309832e-07, "loss": 0.8186824321746826, "step": 4947 }, { "epoch": 1.1400921658986176, "grad_norm": 1.1669467278440648, "learning_rate": 8.519624200476676e-07, "loss": 0.666642427444458, "step": 4948 }, { "epoch": 1.1403225806451613, "grad_norm": 1.0160881327834055, "learning_rate": 8.515856447518104e-07, "loss": 0.7478682994842529, "step": 4949 }, { "epoch": 1.140552995391705, "grad_norm": 1.2340329971083113, "learning_rate": 8.512088909981007e-07, "loss": 0.7527793645858765, "step": 4950 }, { "epoch": 1.1407834101382488, "grad_norm": 1.136863530366948, "learning_rate": 8.508321588412235e-07, "loss": 0.7614094018936157, "step": 4951 }, { "epoch": 1.1410138248847925, "grad_norm": 1.2371366016065355, "learning_rate": 8.504554483358605e-07, "loss": 0.8294994831085205, "step": 4952 }, { "epoch": 1.1412442396313365, "grad_norm": 1.4759487382386114, "learning_rate": 8.500787595366919e-07, "loss": 0.8900095224380493, "step": 4953 }, { "epoch": 1.1414746543778802, "grad_norm": 1.0721192735972314, "learning_rate": 8.497020924983926e-07, "loss": 0.8403744697570801, "step": 4954 }, { "epoch": 1.141705069124424, "grad_norm": 1.0449510164412683, "learning_rate": 8.493254472756355e-07, "loss": 0.7046208381652832, "step": 4955 }, { "epoch": 1.1419354838709677, "grad_norm": 1.3018714779233174, "learning_rate": 8.489488239230904e-07, "loss": 0.8226789832115173, "step": 4956 }, { "epoch": 1.1421658986175116, "grad_norm": 1.058902427650911, "learning_rate": 8.485722224954236e-07, "loss": 0.7248969674110413, "step": 4957 }, { "epoch": 1.1423963133640553, "grad_norm": 1.1327549620980084, "learning_rate": 8.481956430472979e-07, "loss": 0.8116840124130249, "step": 4958 }, { "epoch": 1.142626728110599, "grad_norm": 1.062622286893391, "learning_rate": 8.478190856333739e-07, "loss": 0.7534138560295105, "step": 4959 }, { "epoch": 1.1428571428571428, "grad_norm": 1.3427980825750856, "learning_rate": 8.474425503083082e-07, "loss": 0.8945306539535522, "step": 4960 }, { "epoch": 1.1430875576036867, "grad_norm": 1.1592346473165394, "learning_rate": 8.47066037126754e-07, "loss": 0.7554503083229065, "step": 4961 }, { "epoch": 1.1433179723502305, "grad_norm": 1.4596388821753403, "learning_rate": 8.466895461433625e-07, "loss": 0.832726776599884, "step": 4962 }, { "epoch": 1.1435483870967742, "grad_norm": 1.250046955776058, "learning_rate": 8.463130774127804e-07, "loss": 0.8312773704528809, "step": 4963 }, { "epoch": 1.143778801843318, "grad_norm": 0.9153601791246997, "learning_rate": 8.459366309896512e-07, "loss": 0.6484537124633789, "step": 4964 }, { "epoch": 1.1440092165898617, "grad_norm": 1.2863432770713337, "learning_rate": 8.455602069286165e-07, "loss": 0.9216604828834534, "step": 4965 }, { "epoch": 1.1442396313364056, "grad_norm": 1.134985678431753, "learning_rate": 8.451838052843131e-07, "loss": 0.6213096380233765, "step": 4966 }, { "epoch": 1.1444700460829493, "grad_norm": 0.9562822723791001, "learning_rate": 8.448074261113756e-07, "loss": 0.6873677968978882, "step": 4967 }, { "epoch": 1.144700460829493, "grad_norm": 1.215560824144924, "learning_rate": 8.444310694644348e-07, "loss": 0.7883448600769043, "step": 4968 }, { "epoch": 1.1449308755760368, "grad_norm": 1.1944176371651494, "learning_rate": 8.440547353981178e-07, "loss": 0.724172830581665, "step": 4969 }, { "epoch": 1.1451612903225807, "grad_norm": 1.0792006702141475, "learning_rate": 8.4367842396705e-07, "loss": 0.7115252017974854, "step": 4970 }, { "epoch": 1.1453917050691245, "grad_norm": 1.0823773323138404, "learning_rate": 8.433021352258521e-07, "loss": 0.7165110111236572, "step": 4971 }, { "epoch": 1.1456221198156682, "grad_norm": 1.0874360604645514, "learning_rate": 8.429258692291413e-07, "loss": 0.7563315629959106, "step": 4972 }, { "epoch": 1.145852534562212, "grad_norm": 1.1334099478279698, "learning_rate": 8.425496260315331e-07, "loss": 0.7528449892997742, "step": 4973 }, { "epoch": 1.1460829493087559, "grad_norm": 1.1141426795021205, "learning_rate": 8.421734056876383e-07, "loss": 0.7976171970367432, "step": 4974 }, { "epoch": 1.1463133640552996, "grad_norm": 1.020985144100356, "learning_rate": 8.417972082520644e-07, "loss": 0.7498095035552979, "step": 4975 }, { "epoch": 1.1465437788018433, "grad_norm": 1.3446642320448154, "learning_rate": 8.414210337794165e-07, "loss": 0.9568856954574585, "step": 4976 }, { "epoch": 1.146774193548387, "grad_norm": 0.9499457055768262, "learning_rate": 8.410448823242957e-07, "loss": 0.6402908563613892, "step": 4977 }, { "epoch": 1.1470046082949308, "grad_norm": 1.1759709167305108, "learning_rate": 8.406687539412995e-07, "loss": 0.8224657773971558, "step": 4978 }, { "epoch": 1.1472350230414747, "grad_norm": 1.2886598107348421, "learning_rate": 8.402926486850229e-07, "loss": 0.7804544568061829, "step": 4979 }, { "epoch": 1.1474654377880185, "grad_norm": 1.1861127295236977, "learning_rate": 8.39916566610057e-07, "loss": 0.7920527458190918, "step": 4980 }, { "epoch": 1.1476958525345622, "grad_norm": 1.1244888328051699, "learning_rate": 8.395405077709891e-07, "loss": 0.7672078609466553, "step": 4981 }, { "epoch": 1.147926267281106, "grad_norm": 1.2427545332028853, "learning_rate": 8.391644722224047e-07, "loss": 0.6997950077056885, "step": 4982 }, { "epoch": 1.1481566820276499, "grad_norm": 1.057637628401912, "learning_rate": 8.38788460018884e-07, "loss": 0.7754349708557129, "step": 4983 }, { "epoch": 1.1483870967741936, "grad_norm": 1.1458978330134115, "learning_rate": 8.384124712150046e-07, "loss": 0.706238329410553, "step": 4984 }, { "epoch": 1.1486175115207373, "grad_norm": 0.8874927618348325, "learning_rate": 8.380365058653415e-07, "loss": 0.7115224599838257, "step": 4985 }, { "epoch": 1.148847926267281, "grad_norm": 1.349182229007694, "learning_rate": 8.376605640244652e-07, "loss": 0.9026098847389221, "step": 4986 }, { "epoch": 1.149078341013825, "grad_norm": 1.359066441839043, "learning_rate": 8.372846457469428e-07, "loss": 0.9123632311820984, "step": 4987 }, { "epoch": 1.1493087557603687, "grad_norm": 1.1389830084868187, "learning_rate": 8.369087510873389e-07, "loss": 0.8365681171417236, "step": 4988 }, { "epoch": 1.1495391705069125, "grad_norm": 1.1572327597453433, "learning_rate": 8.36532880100214e-07, "loss": 0.7506389617919922, "step": 4989 }, { "epoch": 1.1497695852534562, "grad_norm": 1.1932866122784214, "learning_rate": 8.361570328401246e-07, "loss": 0.7736936807632446, "step": 4990 }, { "epoch": 1.15, "grad_norm": 1.0939095427412457, "learning_rate": 8.357812093616254e-07, "loss": 0.7364238500595093, "step": 4991 }, { "epoch": 1.1502304147465439, "grad_norm": 1.154457809524142, "learning_rate": 8.354054097192659e-07, "loss": 0.8588067293167114, "step": 4992 }, { "epoch": 1.1504608294930876, "grad_norm": 1.0040260335609983, "learning_rate": 8.350296339675938e-07, "loss": 0.777319073677063, "step": 4993 }, { "epoch": 1.1506912442396313, "grad_norm": 1.2472613338245313, "learning_rate": 8.346538821611517e-07, "loss": 0.6695454716682434, "step": 4994 }, { "epoch": 1.150921658986175, "grad_norm": 1.1333204343634593, "learning_rate": 8.342781543544796e-07, "loss": 0.7785383462905884, "step": 4995 }, { "epoch": 1.1511520737327188, "grad_norm": 1.2063502081148214, "learning_rate": 8.339024506021143e-07, "loss": 0.7386239767074585, "step": 4996 }, { "epoch": 1.1513824884792627, "grad_norm": 1.015973129089863, "learning_rate": 8.335267709585884e-07, "loss": 0.8044750690460205, "step": 4997 }, { "epoch": 1.1516129032258065, "grad_norm": 0.991689333823338, "learning_rate": 8.331511154784307e-07, "loss": 0.6925652623176575, "step": 4998 }, { "epoch": 1.1518433179723502, "grad_norm": 1.1362021503644928, "learning_rate": 8.327754842161684e-07, "loss": 0.7906935214996338, "step": 4999 }, { "epoch": 1.1520737327188941, "grad_norm": 1.0865966340855062, "learning_rate": 8.323998772263231e-07, "loss": 0.7131960988044739, "step": 5000 }, { "epoch": 1.1523041474654379, "grad_norm": 1.0459163670419733, "learning_rate": 8.320242945634132e-07, "loss": 0.8412370085716248, "step": 5001 }, { "epoch": 1.1525345622119816, "grad_norm": 1.219248495471204, "learning_rate": 8.316487362819551e-07, "loss": 0.7800952792167664, "step": 5002 }, { "epoch": 1.1527649769585253, "grad_norm": 1.2269188284281454, "learning_rate": 8.312732024364602e-07, "loss": 0.8620247840881348, "step": 5003 }, { "epoch": 1.152995391705069, "grad_norm": 1.1576962368399284, "learning_rate": 8.30897693081436e-07, "loss": 0.7551721334457397, "step": 5004 }, { "epoch": 1.153225806451613, "grad_norm": 1.1081098689134552, "learning_rate": 8.305222082713882e-07, "loss": 0.8510593175888062, "step": 5005 }, { "epoch": 1.1534562211981567, "grad_norm": 1.0356186889640762, "learning_rate": 8.301467480608176e-07, "loss": 0.6503845453262329, "step": 5006 }, { "epoch": 1.1536866359447004, "grad_norm": 1.1593829978588668, "learning_rate": 8.297713125042212e-07, "loss": 0.7729237079620361, "step": 5007 }, { "epoch": 1.1539170506912442, "grad_norm": 1.0812796919286354, "learning_rate": 8.293959016560939e-07, "loss": 0.77802574634552, "step": 5008 }, { "epoch": 1.154147465437788, "grad_norm": 0.9915519400035699, "learning_rate": 8.290205155709256e-07, "loss": 0.7977825999259949, "step": 5009 }, { "epoch": 1.1543778801843319, "grad_norm": 1.1128731733324948, "learning_rate": 8.286451543032027e-07, "loss": 0.7479745149612427, "step": 5010 }, { "epoch": 1.1546082949308756, "grad_norm": 1.0554376798438097, "learning_rate": 8.282698179074092e-07, "loss": 0.7631532549858093, "step": 5011 }, { "epoch": 1.1548387096774193, "grad_norm": 1.1424098237872247, "learning_rate": 8.278945064380243e-07, "loss": 0.7437061071395874, "step": 5012 }, { "epoch": 1.1550691244239633, "grad_norm": 1.2208599961881346, "learning_rate": 8.275192199495236e-07, "loss": 0.9334282875061035, "step": 5013 }, { "epoch": 1.155299539170507, "grad_norm": 1.1846438304674103, "learning_rate": 8.2714395849638e-07, "loss": 0.7119227647781372, "step": 5014 }, { "epoch": 1.1555299539170507, "grad_norm": 1.202224273678675, "learning_rate": 8.267687221330619e-07, "loss": 0.8335816860198975, "step": 5015 }, { "epoch": 1.1557603686635944, "grad_norm": 1.290989413518125, "learning_rate": 8.263935109140347e-07, "loss": 0.6130940914154053, "step": 5016 }, { "epoch": 1.1559907834101382, "grad_norm": 1.1118999574659398, "learning_rate": 8.260183248937595e-07, "loss": 0.8223903179168701, "step": 5017 }, { "epoch": 1.1562211981566821, "grad_norm": 1.1042026567968168, "learning_rate": 8.256431641266938e-07, "loss": 0.8024790287017822, "step": 5018 }, { "epoch": 1.1564516129032258, "grad_norm": 1.2308316211864536, "learning_rate": 8.252680286672924e-07, "loss": 0.7425345182418823, "step": 5019 }, { "epoch": 1.1566820276497696, "grad_norm": 0.9907420981370885, "learning_rate": 8.248929185700053e-07, "loss": 0.7729727029800415, "step": 5020 }, { "epoch": 1.1569124423963133, "grad_norm": 1.096476255015683, "learning_rate": 8.245178338892788e-07, "loss": 0.8451874256134033, "step": 5021 }, { "epoch": 1.157142857142857, "grad_norm": 1.1584589365926052, "learning_rate": 8.241427746795569e-07, "loss": 0.8666542768478394, "step": 5022 }, { "epoch": 1.157373271889401, "grad_norm": 1.2897904410488261, "learning_rate": 8.237677409952784e-07, "loss": 0.740352988243103, "step": 5023 }, { "epoch": 1.1576036866359447, "grad_norm": 0.9937724952342799, "learning_rate": 8.233927328908788e-07, "loss": 0.6325985193252563, "step": 5024 }, { "epoch": 1.1578341013824884, "grad_norm": 1.0099472902179978, "learning_rate": 8.230177504207901e-07, "loss": 0.8075892925262451, "step": 5025 }, { "epoch": 1.1580645161290322, "grad_norm": 1.0459718249244707, "learning_rate": 8.22642793639441e-07, "loss": 0.7176432609558105, "step": 5026 }, { "epoch": 1.1582949308755761, "grad_norm": 1.1804726429614583, "learning_rate": 8.222678626012554e-07, "loss": 0.7734829187393188, "step": 5027 }, { "epoch": 1.1585253456221198, "grad_norm": 1.3220222245590558, "learning_rate": 8.218929573606544e-07, "loss": 0.8642655611038208, "step": 5028 }, { "epoch": 1.1587557603686636, "grad_norm": 1.0337487495481472, "learning_rate": 8.215180779720548e-07, "loss": 0.7788450121879578, "step": 5029 }, { "epoch": 1.1589861751152073, "grad_norm": 0.9361659768144168, "learning_rate": 8.211432244898696e-07, "loss": 0.7470313310623169, "step": 5030 }, { "epoch": 1.1592165898617512, "grad_norm": 0.9907043815397547, "learning_rate": 8.207683969685091e-07, "loss": 0.7691675424575806, "step": 5031 }, { "epoch": 1.159447004608295, "grad_norm": 0.9920310393320094, "learning_rate": 8.203935954623783e-07, "loss": 0.7060209512710571, "step": 5032 }, { "epoch": 1.1596774193548387, "grad_norm": 1.189958639239752, "learning_rate": 8.20018820025879e-07, "loss": 0.7617488503456116, "step": 5033 }, { "epoch": 1.1599078341013824, "grad_norm": 1.2174023482004634, "learning_rate": 8.196440707134102e-07, "loss": 0.7016350626945496, "step": 5034 }, { "epoch": 1.1601382488479262, "grad_norm": 1.3407340114210469, "learning_rate": 8.192693475793657e-07, "loss": 0.8375445604324341, "step": 5035 }, { "epoch": 1.16036866359447, "grad_norm": 1.2333127293881232, "learning_rate": 8.188946506781359e-07, "loss": 0.8903663158416748, "step": 5036 }, { "epoch": 1.1605990783410138, "grad_norm": 1.1046448662682735, "learning_rate": 8.18519980064108e-07, "loss": 0.7613073587417603, "step": 5037 }, { "epoch": 1.1608294930875576, "grad_norm": 1.2358045096315418, "learning_rate": 8.181453357916649e-07, "loss": 0.7443521022796631, "step": 5038 }, { "epoch": 1.1610599078341013, "grad_norm": 1.0132222940739166, "learning_rate": 8.17770717915185e-07, "loss": 0.7986443042755127, "step": 5039 }, { "epoch": 1.1612903225806452, "grad_norm": 1.1475221794766963, "learning_rate": 8.173961264890447e-07, "loss": 0.7128815650939941, "step": 5040 }, { "epoch": 1.161520737327189, "grad_norm": 2.1353174029488593, "learning_rate": 8.170215615676144e-07, "loss": 0.7189117074012756, "step": 5041 }, { "epoch": 1.1617511520737327, "grad_norm": 1.0970239097626442, "learning_rate": 8.166470232052626e-07, "loss": 0.8358731269836426, "step": 5042 }, { "epoch": 1.1619815668202764, "grad_norm": 1.3103703595946257, "learning_rate": 8.162725114563527e-07, "loss": 0.7734829187393188, "step": 5043 }, { "epoch": 1.1622119815668204, "grad_norm": 1.0836793655881298, "learning_rate": 8.158980263752443e-07, "loss": 0.842268705368042, "step": 5044 }, { "epoch": 1.162442396313364, "grad_norm": 1.0953254817646525, "learning_rate": 8.155235680162937e-07, "loss": 0.7973036766052246, "step": 5045 }, { "epoch": 1.1626728110599078, "grad_norm": 1.1431491680692596, "learning_rate": 8.151491364338532e-07, "loss": 0.743615984916687, "step": 5046 }, { "epoch": 1.1629032258064516, "grad_norm": 1.2354800674331334, "learning_rate": 8.147747316822705e-07, "loss": 0.799458384513855, "step": 5047 }, { "epoch": 1.1631336405529953, "grad_norm": 1.4365906916451476, "learning_rate": 8.144003538158907e-07, "loss": 0.8368128538131714, "step": 5048 }, { "epoch": 1.1633640552995392, "grad_norm": 1.0543438991079201, "learning_rate": 8.140260028890537e-07, "loss": 0.8543322086334229, "step": 5049 }, { "epoch": 1.163594470046083, "grad_norm": 1.4010693577495907, "learning_rate": 8.136516789560957e-07, "loss": 0.9586522579193115, "step": 5050 }, { "epoch": 1.1638248847926267, "grad_norm": 1.0831898931931903, "learning_rate": 8.132773820713505e-07, "loss": 0.7781316041946411, "step": 5051 }, { "epoch": 1.1640552995391704, "grad_norm": 1.1820241176000723, "learning_rate": 8.129031122891459e-07, "loss": 0.7726340293884277, "step": 5052 }, { "epoch": 1.1642857142857144, "grad_norm": 1.2561245635498344, "learning_rate": 8.125288696638064e-07, "loss": 0.886093258857727, "step": 5053 }, { "epoch": 1.164516129032258, "grad_norm": 1.1568232893052595, "learning_rate": 8.121546542496538e-07, "loss": 0.7896960973739624, "step": 5054 }, { "epoch": 1.1647465437788018, "grad_norm": 1.066019166680275, "learning_rate": 8.117804661010045e-07, "loss": 0.8272452354431152, "step": 5055 }, { "epoch": 1.1649769585253456, "grad_norm": 1.216096321256879, "learning_rate": 8.11406305272171e-07, "loss": 0.8452264070510864, "step": 5056 }, { "epoch": 1.1652073732718895, "grad_norm": 1.1423033593169452, "learning_rate": 8.11032171817463e-07, "loss": 0.7973369359970093, "step": 5057 }, { "epoch": 1.1654377880184332, "grad_norm": 0.9573952961126706, "learning_rate": 8.10658065791185e-07, "loss": 0.8045153617858887, "step": 5058 }, { "epoch": 1.165668202764977, "grad_norm": 1.2070626820317865, "learning_rate": 8.102839872476378e-07, "loss": 0.8921254873275757, "step": 5059 }, { "epoch": 1.1658986175115207, "grad_norm": 1.1196640968944265, "learning_rate": 8.099099362411191e-07, "loss": 0.7633669376373291, "step": 5060 }, { "epoch": 1.1661290322580644, "grad_norm": 1.4676357149183228, "learning_rate": 8.095359128259214e-07, "loss": 0.9303205013275146, "step": 5061 }, { "epoch": 1.1663594470046084, "grad_norm": 1.1532839170590041, "learning_rate": 8.091619170563335e-07, "loss": 0.867104709148407, "step": 5062 }, { "epoch": 1.166589861751152, "grad_norm": 1.2071495700843942, "learning_rate": 8.087879489866409e-07, "loss": 0.8136844038963318, "step": 5063 }, { "epoch": 1.1668202764976958, "grad_norm": 1.5482117252744063, "learning_rate": 8.084140086711246e-07, "loss": 0.9016939997673035, "step": 5064 }, { "epoch": 1.1670506912442395, "grad_norm": 1.5795186850129557, "learning_rate": 8.080400961640608e-07, "loss": 0.8621236085891724, "step": 5065 }, { "epoch": 1.1672811059907835, "grad_norm": 1.336449231038986, "learning_rate": 8.076662115197234e-07, "loss": 0.856648862361908, "step": 5066 }, { "epoch": 1.1675115207373272, "grad_norm": 1.3107118910408024, "learning_rate": 8.072923547923805e-07, "loss": 0.7752784490585327, "step": 5067 }, { "epoch": 1.167741935483871, "grad_norm": 1.3093385224686542, "learning_rate": 8.069185260362974e-07, "loss": 0.8573904037475586, "step": 5068 }, { "epoch": 1.1679723502304147, "grad_norm": 1.1636599679682322, "learning_rate": 8.065447253057347e-07, "loss": 0.724372148513794, "step": 5069 }, { "epoch": 1.1682027649769586, "grad_norm": 1.146758460237727, "learning_rate": 8.061709526549486e-07, "loss": 0.7428436875343323, "step": 5070 }, { "epoch": 1.1684331797235024, "grad_norm": 1.273017047999111, "learning_rate": 8.057972081381925e-07, "loss": 0.8888595104217529, "step": 5071 }, { "epoch": 1.168663594470046, "grad_norm": 0.9497262022662447, "learning_rate": 8.054234918097146e-07, "loss": 0.5753290057182312, "step": 5072 }, { "epoch": 1.1688940092165898, "grad_norm": 1.037170746248572, "learning_rate": 8.050498037237589e-07, "loss": 0.6724086999893188, "step": 5073 }, { "epoch": 1.1691244239631335, "grad_norm": 1.1504888789916348, "learning_rate": 8.046761439345664e-07, "loss": 0.7410751581192017, "step": 5074 }, { "epoch": 1.1693548387096775, "grad_norm": 1.2658920818717738, "learning_rate": 8.043025124963731e-07, "loss": 0.8522979021072388, "step": 5075 }, { "epoch": 1.1695852534562212, "grad_norm": 0.9918624551952729, "learning_rate": 8.039289094634109e-07, "loss": 0.6243441700935364, "step": 5076 }, { "epoch": 1.169815668202765, "grad_norm": 1.113826210544245, "learning_rate": 8.03555334889908e-07, "loss": 0.9332150220870972, "step": 5077 }, { "epoch": 1.1700460829493087, "grad_norm": 1.17170377289517, "learning_rate": 8.031817888300883e-07, "loss": 0.7620645761489868, "step": 5078 }, { "epoch": 1.1702764976958526, "grad_norm": 1.2693395517069683, "learning_rate": 8.028082713381708e-07, "loss": 0.6983245015144348, "step": 5079 }, { "epoch": 1.1705069124423964, "grad_norm": 1.049572082944252, "learning_rate": 8.024347824683723e-07, "loss": 0.6220129728317261, "step": 5080 }, { "epoch": 1.17073732718894, "grad_norm": 1.0906919021349344, "learning_rate": 8.020613222749034e-07, "loss": 0.7363810539245605, "step": 5081 }, { "epoch": 1.1709677419354838, "grad_norm": 1.1450127350480972, "learning_rate": 8.016878908119713e-07, "loss": 0.6864198446273804, "step": 5082 }, { "epoch": 1.1711981566820278, "grad_norm": 1.061738817269073, "learning_rate": 8.013144881337795e-07, "loss": 0.758607029914856, "step": 5083 }, { "epoch": 1.1714285714285715, "grad_norm": 1.038630253415404, "learning_rate": 8.009411142945269e-07, "loss": 0.7519336938858032, "step": 5084 }, { "epoch": 1.1716589861751152, "grad_norm": 1.132431622302542, "learning_rate": 8.005677693484076e-07, "loss": 0.7681798934936523, "step": 5085 }, { "epoch": 1.171889400921659, "grad_norm": 1.1022208744006678, "learning_rate": 8.00194453349613e-07, "loss": 0.6808522939682007, "step": 5086 }, { "epoch": 1.1721198156682027, "grad_norm": 1.039877694159321, "learning_rate": 7.99821166352329e-07, "loss": 0.7373358607292175, "step": 5087 }, { "epoch": 1.1723502304147466, "grad_norm": 1.0199898679930943, "learning_rate": 7.994479084107374e-07, "loss": 0.7272510528564453, "step": 5088 }, { "epoch": 1.1725806451612903, "grad_norm": 1.2473385255320408, "learning_rate": 7.990746795790166e-07, "loss": 0.845584511756897, "step": 5089 }, { "epoch": 1.172811059907834, "grad_norm": 1.188342902392479, "learning_rate": 7.987014799113397e-07, "loss": 0.7751157283782959, "step": 5090 }, { "epoch": 1.1730414746543778, "grad_norm": 1.1193246813934836, "learning_rate": 7.98328309461877e-07, "loss": 0.679701566696167, "step": 5091 }, { "epoch": 1.1732718894009218, "grad_norm": 1.1116687434739936, "learning_rate": 7.979551682847932e-07, "loss": 0.7630679607391357, "step": 5092 }, { "epoch": 1.1735023041474655, "grad_norm": 1.0309555153446328, "learning_rate": 7.975820564342487e-07, "loss": 0.700912594795227, "step": 5093 }, { "epoch": 1.1737327188940092, "grad_norm": 1.097867809116453, "learning_rate": 7.972089739644012e-07, "loss": 0.6789706945419312, "step": 5094 }, { "epoch": 1.173963133640553, "grad_norm": 1.411041629986285, "learning_rate": 7.968359209294027e-07, "loss": 0.6744855642318726, "step": 5095 }, { "epoch": 1.1741935483870969, "grad_norm": 1.060959542495881, "learning_rate": 7.964628973834011e-07, "loss": 0.7551798820495605, "step": 5096 }, { "epoch": 1.1744239631336406, "grad_norm": 0.9743982939550204, "learning_rate": 7.960899033805407e-07, "loss": 0.711478054523468, "step": 5097 }, { "epoch": 1.1746543778801843, "grad_norm": 1.1281696794434548, "learning_rate": 7.95716938974961e-07, "loss": 0.7464019060134888, "step": 5098 }, { "epoch": 1.174884792626728, "grad_norm": 1.2269121334355921, "learning_rate": 7.953440042207966e-07, "loss": 0.7667930126190186, "step": 5099 }, { "epoch": 1.1751152073732718, "grad_norm": 0.9314104563097803, "learning_rate": 7.949710991721796e-07, "loss": 0.7574796676635742, "step": 5100 }, { "epoch": 1.1753456221198157, "grad_norm": 0.9285474016256665, "learning_rate": 7.945982238832361e-07, "loss": 0.6627304553985596, "step": 5101 }, { "epoch": 1.1755760368663595, "grad_norm": 1.2503590742658475, "learning_rate": 7.942253784080879e-07, "loss": 0.6803916692733765, "step": 5102 }, { "epoch": 1.1758064516129032, "grad_norm": 1.1622603764445048, "learning_rate": 7.938525628008541e-07, "loss": 0.7107337713241577, "step": 5103 }, { "epoch": 1.176036866359447, "grad_norm": 1.0411872319848583, "learning_rate": 7.934797771156481e-07, "loss": 0.7669517993927002, "step": 5104 }, { "epoch": 1.1762672811059907, "grad_norm": 1.185214338142044, "learning_rate": 7.931070214065787e-07, "loss": 0.7431854605674744, "step": 5105 }, { "epoch": 1.1764976958525346, "grad_norm": 1.121798206744332, "learning_rate": 7.927342957277512e-07, "loss": 0.7778047323226929, "step": 5106 }, { "epoch": 1.1767281105990783, "grad_norm": 1.1095356364162186, "learning_rate": 7.923616001332666e-07, "loss": 0.7759886980056763, "step": 5107 }, { "epoch": 1.176958525345622, "grad_norm": 1.236811676128496, "learning_rate": 7.919889346772206e-07, "loss": 0.8010379076004028, "step": 5108 }, { "epoch": 1.177188940092166, "grad_norm": 1.06629818182004, "learning_rate": 7.916162994137055e-07, "loss": 0.6671626567840576, "step": 5109 }, { "epoch": 1.1774193548387097, "grad_norm": 1.3043487682811514, "learning_rate": 7.912436943968088e-07, "loss": 0.7521620988845825, "step": 5110 }, { "epoch": 1.1776497695852535, "grad_norm": 1.0243889894502596, "learning_rate": 7.908711196806131e-07, "loss": 0.7626729011535645, "step": 5111 }, { "epoch": 1.1778801843317972, "grad_norm": 1.2636422633100723, "learning_rate": 7.904985753191979e-07, "loss": 0.8247047066688538, "step": 5112 }, { "epoch": 1.178110599078341, "grad_norm": 0.9958902943746148, "learning_rate": 7.901260613666372e-07, "loss": 0.6851831078529358, "step": 5113 }, { "epoch": 1.1783410138248849, "grad_norm": 1.114469339271613, "learning_rate": 7.897535778770003e-07, "loss": 0.7752102613449097, "step": 5114 }, { "epoch": 1.1785714285714286, "grad_norm": 1.0998339013097813, "learning_rate": 7.893811249043537e-07, "loss": 0.8885148167610168, "step": 5115 }, { "epoch": 1.1788018433179723, "grad_norm": 1.3062040351627935, "learning_rate": 7.890087025027579e-07, "loss": 0.7530373334884644, "step": 5116 }, { "epoch": 1.179032258064516, "grad_norm": 1.0400370692656624, "learning_rate": 7.886363107262697e-07, "loss": 0.7795672416687012, "step": 5117 }, { "epoch": 1.1792626728110598, "grad_norm": 1.0719443222612952, "learning_rate": 7.882639496289413e-07, "loss": 0.7563966512680054, "step": 5118 }, { "epoch": 1.1794930875576037, "grad_norm": 0.9799024359449507, "learning_rate": 7.878916192648198e-07, "loss": 0.7218793630599976, "step": 5119 }, { "epoch": 1.1797235023041475, "grad_norm": 1.3292879414667447, "learning_rate": 7.875193196879494e-07, "loss": 0.8213250637054443, "step": 5120 }, { "epoch": 1.1799539170506912, "grad_norm": 1.118163280715499, "learning_rate": 7.871470509523685e-07, "loss": 0.8134827613830566, "step": 5121 }, { "epoch": 1.1801843317972351, "grad_norm": 0.9613119464109229, "learning_rate": 7.867748131121109e-07, "loss": 0.6135407090187073, "step": 5122 }, { "epoch": 1.1804147465437789, "grad_norm": 1.2999694720426915, "learning_rate": 7.864026062212073e-07, "loss": 0.8110366463661194, "step": 5123 }, { "epoch": 1.1806451612903226, "grad_norm": 0.9962674732824631, "learning_rate": 7.860304303336827e-07, "loss": 0.6723964214324951, "step": 5124 }, { "epoch": 1.1808755760368663, "grad_norm": 1.2942490465484493, "learning_rate": 7.856582855035577e-07, "loss": 0.8308886885643005, "step": 5125 }, { "epoch": 1.18110599078341, "grad_norm": 1.023999175845692, "learning_rate": 7.852861717848488e-07, "loss": 0.7960010766983032, "step": 5126 }, { "epoch": 1.181336405529954, "grad_norm": 1.2456351777125307, "learning_rate": 7.84914089231568e-07, "loss": 0.7931640148162842, "step": 5127 }, { "epoch": 1.1815668202764977, "grad_norm": 1.2288164842517166, "learning_rate": 7.845420378977222e-07, "loss": 0.762995719909668, "step": 5128 }, { "epoch": 1.1817972350230415, "grad_norm": 1.373671152705427, "learning_rate": 7.841700178373146e-07, "loss": 0.9416301250457764, "step": 5129 }, { "epoch": 1.1820276497695852, "grad_norm": 1.0032147289786453, "learning_rate": 7.837980291043431e-07, "loss": 0.7666923999786377, "step": 5130 }, { "epoch": 1.182258064516129, "grad_norm": 1.1123898953678502, "learning_rate": 7.834260717528012e-07, "loss": 0.7668861150741577, "step": 5131 }, { "epoch": 1.1824884792626729, "grad_norm": 1.1236616956881595, "learning_rate": 7.830541458366786e-07, "loss": 0.7576566934585571, "step": 5132 }, { "epoch": 1.1827188940092166, "grad_norm": 1.0432406760791426, "learning_rate": 7.826822514099595e-07, "loss": 0.6288204193115234, "step": 5133 }, { "epoch": 1.1829493087557603, "grad_norm": 1.2747953745069134, "learning_rate": 7.823103885266236e-07, "loss": 0.8332630395889282, "step": 5134 }, { "epoch": 1.1831797235023043, "grad_norm": 1.3987532245853456, "learning_rate": 7.819385572406469e-07, "loss": 0.9294546246528625, "step": 5135 }, { "epoch": 1.183410138248848, "grad_norm": 0.9911973140133253, "learning_rate": 7.81566757606e-07, "loss": 0.637617826461792, "step": 5136 }, { "epoch": 1.1836405529953917, "grad_norm": 1.2295561738436023, "learning_rate": 7.81194989676649e-07, "loss": 0.7614878416061401, "step": 5137 }, { "epoch": 1.1838709677419355, "grad_norm": 1.2939539056978149, "learning_rate": 7.808232535065556e-07, "loss": 0.8612164258956909, "step": 5138 }, { "epoch": 1.1841013824884792, "grad_norm": 1.0758125620247463, "learning_rate": 7.804515491496765e-07, "loss": 0.7530151605606079, "step": 5139 }, { "epoch": 1.1843317972350231, "grad_norm": 0.9883281570065391, "learning_rate": 7.800798766599648e-07, "loss": 0.7739782929420471, "step": 5140 }, { "epoch": 1.1845622119815669, "grad_norm": 1.0835226521428547, "learning_rate": 7.797082360913678e-07, "loss": 0.7992277145385742, "step": 5141 }, { "epoch": 1.1847926267281106, "grad_norm": 1.2343955942215838, "learning_rate": 7.793366274978284e-07, "loss": 0.8744574785232544, "step": 5142 }, { "epoch": 1.1850230414746543, "grad_norm": 0.9992165946111031, "learning_rate": 7.789650509332857e-07, "loss": 0.7522493600845337, "step": 5143 }, { "epoch": 1.185253456221198, "grad_norm": 1.1095107175779666, "learning_rate": 7.785935064516733e-07, "loss": 0.8811007142066956, "step": 5144 }, { "epoch": 1.185483870967742, "grad_norm": 0.9512882648642599, "learning_rate": 7.782219941069201e-07, "loss": 0.8141417503356934, "step": 5145 }, { "epoch": 1.1857142857142857, "grad_norm": 1.3048397777053706, "learning_rate": 7.778505139529509e-07, "loss": 0.9473680257797241, "step": 5146 }, { "epoch": 1.1859447004608294, "grad_norm": 1.1561666933094623, "learning_rate": 7.774790660436857e-07, "loss": 0.740132212638855, "step": 5147 }, { "epoch": 1.1861751152073732, "grad_norm": 1.1265716565789026, "learning_rate": 7.771076504330392e-07, "loss": 0.7904594540596008, "step": 5148 }, { "epoch": 1.1864055299539171, "grad_norm": 1.1481555737803508, "learning_rate": 7.767362671749224e-07, "loss": 0.8085094690322876, "step": 5149 }, { "epoch": 1.1866359447004609, "grad_norm": 1.3362082879917547, "learning_rate": 7.76364916323241e-07, "loss": 0.6954756379127502, "step": 5150 }, { "epoch": 1.1868663594470046, "grad_norm": 1.175085216674836, "learning_rate": 7.759935979318953e-07, "loss": 0.8575167059898376, "step": 5151 }, { "epoch": 1.1870967741935483, "grad_norm": 0.9330545417113619, "learning_rate": 7.756223120547829e-07, "loss": 0.6125110387802124, "step": 5152 }, { "epoch": 1.1873271889400923, "grad_norm": 1.1387987197615417, "learning_rate": 7.752510587457949e-07, "loss": 0.7737400531768799, "step": 5153 }, { "epoch": 1.187557603686636, "grad_norm": 0.9473095115528148, "learning_rate": 7.748798380588177e-07, "loss": 0.7300955653190613, "step": 5154 }, { "epoch": 1.1877880184331797, "grad_norm": 0.9479432315278626, "learning_rate": 7.745086500477343e-07, "loss": 0.7974356412887573, "step": 5155 }, { "epoch": 1.1880184331797234, "grad_norm": 1.120213603018525, "learning_rate": 7.74137494766422e-07, "loss": 0.8158693313598633, "step": 5156 }, { "epoch": 1.1882488479262672, "grad_norm": 0.9086968377624679, "learning_rate": 7.737663722687531e-07, "loss": 0.6656177639961243, "step": 5157 }, { "epoch": 1.1884792626728111, "grad_norm": 1.284345958176322, "learning_rate": 7.733952826085958e-07, "loss": 0.7796640992164612, "step": 5158 }, { "epoch": 1.1887096774193548, "grad_norm": 1.1079992534891525, "learning_rate": 7.730242258398135e-07, "loss": 0.9224779009819031, "step": 5159 }, { "epoch": 1.1889400921658986, "grad_norm": 1.2013047291849663, "learning_rate": 7.726532020162639e-07, "loss": 0.7105277180671692, "step": 5160 }, { "epoch": 1.1891705069124423, "grad_norm": 0.9139263319393289, "learning_rate": 7.722822111918012e-07, "loss": 0.5793930292129517, "step": 5161 }, { "epoch": 1.1894009216589863, "grad_norm": 0.9419478266668957, "learning_rate": 7.719112534202743e-07, "loss": 0.7319367527961731, "step": 5162 }, { "epoch": 1.18963133640553, "grad_norm": 1.182614737199728, "learning_rate": 7.715403287555266e-07, "loss": 0.7517954111099243, "step": 5163 }, { "epoch": 1.1898617511520737, "grad_norm": 1.1800441614309307, "learning_rate": 7.711694372513981e-07, "loss": 0.8633241057395935, "step": 5164 }, { "epoch": 1.1900921658986174, "grad_norm": 1.280920610105802, "learning_rate": 7.707985789617227e-07, "loss": 0.6453210115432739, "step": 5165 }, { "epoch": 1.1903225806451614, "grad_norm": 1.1209224749220659, "learning_rate": 7.704277539403303e-07, "loss": 0.7609909772872925, "step": 5166 }, { "epoch": 1.1905529953917051, "grad_norm": 1.1829891287159422, "learning_rate": 7.700569622410453e-07, "loss": 0.7419755458831787, "step": 5167 }, { "epoch": 1.1907834101382488, "grad_norm": 1.0759571852853795, "learning_rate": 7.696862039176879e-07, "loss": 0.849078357219696, "step": 5168 }, { "epoch": 1.1910138248847926, "grad_norm": 1.3077976619104341, "learning_rate": 7.693154790240732e-07, "loss": 0.8147921562194824, "step": 5169 }, { "epoch": 1.1912442396313363, "grad_norm": 1.1349568865686221, "learning_rate": 7.689447876140114e-07, "loss": 0.7660118937492371, "step": 5170 }, { "epoch": 1.1914746543778802, "grad_norm": 0.9919046297525586, "learning_rate": 7.685741297413075e-07, "loss": 0.7775185108184814, "step": 5171 }, { "epoch": 1.191705069124424, "grad_norm": 1.0634336005518812, "learning_rate": 7.682035054597624e-07, "loss": 0.7184321880340576, "step": 5172 }, { "epoch": 1.1919354838709677, "grad_norm": 0.9191067866194278, "learning_rate": 7.678329148231719e-07, "loss": 0.7108585834503174, "step": 5173 }, { "epoch": 1.1921658986175114, "grad_norm": 1.169972531551494, "learning_rate": 7.674623578853259e-07, "loss": 0.7252670526504517, "step": 5174 }, { "epoch": 1.1923963133640554, "grad_norm": 1.0227424567448893, "learning_rate": 7.670918347000113e-07, "loss": 0.818352460861206, "step": 5175 }, { "epoch": 1.192626728110599, "grad_norm": 0.8768631462521176, "learning_rate": 7.667213453210086e-07, "loss": 0.6538013815879822, "step": 5176 }, { "epoch": 1.1928571428571428, "grad_norm": 1.1216359209528128, "learning_rate": 7.663508898020935e-07, "loss": 0.7058148384094238, "step": 5177 }, { "epoch": 1.1930875576036866, "grad_norm": 1.0528263608484594, "learning_rate": 7.659804681970377e-07, "loss": 0.7003160715103149, "step": 5178 }, { "epoch": 1.1933179723502305, "grad_norm": 1.2339709506043992, "learning_rate": 7.656100805596072e-07, "loss": 0.84567791223526, "step": 5179 }, { "epoch": 1.1935483870967742, "grad_norm": 1.239861543806107, "learning_rate": 7.652397269435626e-07, "loss": 0.7994743585586548, "step": 5180 }, { "epoch": 1.193778801843318, "grad_norm": 1.3106444419652792, "learning_rate": 7.648694074026615e-07, "loss": 0.8177791833877563, "step": 5181 }, { "epoch": 1.1940092165898617, "grad_norm": 1.362939104353802, "learning_rate": 7.644991219906545e-07, "loss": 0.6663975715637207, "step": 5182 }, { "epoch": 1.1942396313364054, "grad_norm": 1.1422405746222943, "learning_rate": 7.641288707612878e-07, "loss": 0.8275883197784424, "step": 5183 }, { "epoch": 1.1944700460829494, "grad_norm": 1.1201157873973466, "learning_rate": 7.637586537683036e-07, "loss": 0.7710767388343811, "step": 5184 }, { "epoch": 1.194700460829493, "grad_norm": 1.1629669577400157, "learning_rate": 7.633884710654382e-07, "loss": 0.7628582715988159, "step": 5185 }, { "epoch": 1.1949308755760368, "grad_norm": 1.3793540006541976, "learning_rate": 7.630183227064227e-07, "loss": 0.7002676725387573, "step": 5186 }, { "epoch": 1.1951612903225806, "grad_norm": 0.9948455527839576, "learning_rate": 7.626482087449841e-07, "loss": 0.8272073268890381, "step": 5187 }, { "epoch": 1.1953917050691245, "grad_norm": 1.0711227380559258, "learning_rate": 7.622781292348435e-07, "loss": 0.7881417274475098, "step": 5188 }, { "epoch": 1.1956221198156682, "grad_norm": 1.0728428578693516, "learning_rate": 7.61908084229718e-07, "loss": 0.797294020652771, "step": 5189 }, { "epoch": 1.195852534562212, "grad_norm": 1.0264450399364256, "learning_rate": 7.615380737833191e-07, "loss": 0.7752290964126587, "step": 5190 }, { "epoch": 1.1960829493087557, "grad_norm": 1.0830464595218987, "learning_rate": 7.611680979493525e-07, "loss": 0.7299143075942993, "step": 5191 }, { "epoch": 1.1963133640552996, "grad_norm": 1.4839567137751186, "learning_rate": 7.60798156781521e-07, "loss": 0.6749997138977051, "step": 5192 }, { "epoch": 1.1965437788018434, "grad_norm": 1.2717197322235172, "learning_rate": 7.6042825033352e-07, "loss": 0.7933796048164368, "step": 5193 }, { "epoch": 1.196774193548387, "grad_norm": 1.1254669600910374, "learning_rate": 7.600583786590411e-07, "loss": 0.7214919328689575, "step": 5194 }, { "epoch": 1.1970046082949308, "grad_norm": 1.0000165841598083, "learning_rate": 7.596885418117713e-07, "loss": 0.7804256081581116, "step": 5195 }, { "epoch": 1.1972350230414746, "grad_norm": 1.2738023107912249, "learning_rate": 7.593187398453915e-07, "loss": 0.7615138292312622, "step": 5196 }, { "epoch": 1.1974654377880185, "grad_norm": 1.0493977127227612, "learning_rate": 7.589489728135778e-07, "loss": 0.8473657369613647, "step": 5197 }, { "epoch": 1.1976958525345622, "grad_norm": 1.2204301678409606, "learning_rate": 7.585792407700018e-07, "loss": 0.7302027940750122, "step": 5198 }, { "epoch": 1.197926267281106, "grad_norm": 1.123276567811957, "learning_rate": 7.582095437683294e-07, "loss": 0.7631692886352539, "step": 5199 }, { "epoch": 1.1981566820276497, "grad_norm": 1.339389807954867, "learning_rate": 7.578398818622211e-07, "loss": 0.7982754707336426, "step": 5200 }, { "epoch": 1.1983870967741936, "grad_norm": 1.3949436336418501, "learning_rate": 7.574702551053339e-07, "loss": 0.8445635437965393, "step": 5201 }, { "epoch": 1.1986175115207374, "grad_norm": 1.267881130363425, "learning_rate": 7.571006635513182e-07, "loss": 0.8486276268959045, "step": 5202 }, { "epoch": 1.198847926267281, "grad_norm": 1.2841422228776138, "learning_rate": 7.567311072538191e-07, "loss": 0.8433184623718262, "step": 5203 }, { "epoch": 1.1990783410138248, "grad_norm": 1.5895945882971518, "learning_rate": 7.56361586266478e-07, "loss": 0.9772260189056396, "step": 5204 }, { "epoch": 1.1993087557603688, "grad_norm": 1.1927959868338558, "learning_rate": 7.559921006429304e-07, "loss": 0.8349692821502686, "step": 5205 }, { "epoch": 1.1995391705069125, "grad_norm": 1.070076083870323, "learning_rate": 7.556226504368059e-07, "loss": 0.7454575300216675, "step": 5206 }, { "epoch": 1.1997695852534562, "grad_norm": 0.882927792535501, "learning_rate": 7.552532357017303e-07, "loss": 0.6680991649627686, "step": 5207 }, { "epoch": 1.2, "grad_norm": 1.1844993546767875, "learning_rate": 7.54883856491324e-07, "loss": 0.6528318524360657, "step": 5208 }, { "epoch": 1.2002304147465437, "grad_norm": 1.0482736751922475, "learning_rate": 7.545145128592008e-07, "loss": 0.7711834907531738, "step": 5209 }, { "epoch": 1.2004608294930876, "grad_norm": 1.022603342926927, "learning_rate": 7.541452048589714e-07, "loss": 0.6378746628761292, "step": 5210 }, { "epoch": 1.2006912442396314, "grad_norm": 0.9309859008896244, "learning_rate": 7.537759325442402e-07, "loss": 0.7489340305328369, "step": 5211 }, { "epoch": 1.200921658986175, "grad_norm": 1.0825673838806515, "learning_rate": 7.53406695968606e-07, "loss": 0.7869534492492676, "step": 5212 }, { "epoch": 1.2011520737327188, "grad_norm": 1.1316888770375757, "learning_rate": 7.530374951856637e-07, "loss": 0.7252482175827026, "step": 5213 }, { "epoch": 1.2013824884792628, "grad_norm": 1.1337087819491523, "learning_rate": 7.526683302490018e-07, "loss": 0.763259768486023, "step": 5214 }, { "epoch": 1.2016129032258065, "grad_norm": 1.405277715760194, "learning_rate": 7.522992012122046e-07, "loss": 0.8135688304901123, "step": 5215 }, { "epoch": 1.2018433179723502, "grad_norm": 1.5589534049714566, "learning_rate": 7.519301081288504e-07, "loss": 0.9282290935516357, "step": 5216 }, { "epoch": 1.202073732718894, "grad_norm": 1.2621340712897178, "learning_rate": 7.515610510525125e-07, "loss": 0.7968727946281433, "step": 5217 }, { "epoch": 1.202304147465438, "grad_norm": 1.4154309582650375, "learning_rate": 7.511920300367594e-07, "loss": 0.9495606422424316, "step": 5218 }, { "epoch": 1.2025345622119816, "grad_norm": 1.120709992771365, "learning_rate": 7.508230451351537e-07, "loss": 0.6790425181388855, "step": 5219 }, { "epoch": 1.2027649769585254, "grad_norm": 1.1216778132469425, "learning_rate": 7.504540964012527e-07, "loss": 0.7269036173820496, "step": 5220 }, { "epoch": 1.202995391705069, "grad_norm": 1.4394573291388193, "learning_rate": 7.500851838886097e-07, "loss": 0.820799708366394, "step": 5221 }, { "epoch": 1.2032258064516128, "grad_norm": 1.1080457725700354, "learning_rate": 7.497163076507715e-07, "loss": 0.7693401575088501, "step": 5222 }, { "epoch": 1.2034562211981568, "grad_norm": 1.1611837511561531, "learning_rate": 7.493474677412793e-07, "loss": 0.7687606811523438, "step": 5223 }, { "epoch": 1.2036866359447005, "grad_norm": 0.9784122136232752, "learning_rate": 7.489786642136709e-07, "loss": 0.6858488321304321, "step": 5224 }, { "epoch": 1.2039170506912442, "grad_norm": 0.8776412008252917, "learning_rate": 7.486098971214769e-07, "loss": 0.7575044631958008, "step": 5225 }, { "epoch": 1.204147465437788, "grad_norm": 0.8129887936087057, "learning_rate": 7.482411665182236e-07, "loss": 0.6799627542495728, "step": 5226 }, { "epoch": 1.2043778801843317, "grad_norm": 1.4994332488998736, "learning_rate": 7.478724724574317e-07, "loss": 0.8882759809494019, "step": 5227 }, { "epoch": 1.2046082949308756, "grad_norm": 1.10750930167245, "learning_rate": 7.475038149926165e-07, "loss": 0.7835016250610352, "step": 5228 }, { "epoch": 1.2048387096774194, "grad_norm": 1.3325922049902164, "learning_rate": 7.471351941772883e-07, "loss": 0.9264512062072754, "step": 5229 }, { "epoch": 1.205069124423963, "grad_norm": 1.225862576818596, "learning_rate": 7.467666100649521e-07, "loss": 0.8094228506088257, "step": 5230 }, { "epoch": 1.205299539170507, "grad_norm": 1.167425367358343, "learning_rate": 7.463980627091073e-07, "loss": 0.7782102823257446, "step": 5231 }, { "epoch": 1.2055299539170508, "grad_norm": 1.2892161969383955, "learning_rate": 7.460295521632474e-07, "loss": 0.7946768999099731, "step": 5232 }, { "epoch": 1.2057603686635945, "grad_norm": 1.2538288509415036, "learning_rate": 7.456610784808624e-07, "loss": 0.7571625709533691, "step": 5233 }, { "epoch": 1.2059907834101382, "grad_norm": 1.3786667467707436, "learning_rate": 7.45292641715435e-07, "loss": 0.9760236144065857, "step": 5234 }, { "epoch": 1.206221198156682, "grad_norm": 1.0717694328508904, "learning_rate": 7.449242419204431e-07, "loss": 0.6370055675506592, "step": 5235 }, { "epoch": 1.206451612903226, "grad_norm": 1.226412390848778, "learning_rate": 7.445558791493603e-07, "loss": 0.7991320490837097, "step": 5236 }, { "epoch": 1.2066820276497696, "grad_norm": 1.0607083796487833, "learning_rate": 7.441875534556531e-07, "loss": 0.8840054273605347, "step": 5237 }, { "epoch": 1.2069124423963133, "grad_norm": 1.0615184698087237, "learning_rate": 7.438192648927841e-07, "loss": 0.8634533882141113, "step": 5238 }, { "epoch": 1.207142857142857, "grad_norm": 0.9816687263450602, "learning_rate": 7.434510135142098e-07, "loss": 0.7081723213195801, "step": 5239 }, { "epoch": 1.2073732718894008, "grad_norm": 1.1398058732045784, "learning_rate": 7.430827993733808e-07, "loss": 0.7160249352455139, "step": 5240 }, { "epoch": 1.2076036866359448, "grad_norm": 0.8011837684152103, "learning_rate": 7.427146225237438e-07, "loss": 0.5323421955108643, "step": 5241 }, { "epoch": 1.2078341013824885, "grad_norm": 1.0448270993907307, "learning_rate": 7.423464830187386e-07, "loss": 0.6439197063446045, "step": 5242 }, { "epoch": 1.2080645161290322, "grad_norm": 1.2861588666790074, "learning_rate": 7.419783809117999e-07, "loss": 0.8268016576766968, "step": 5243 }, { "epoch": 1.2082949308755762, "grad_norm": 1.0010661947708184, "learning_rate": 7.416103162563582e-07, "loss": 0.8115339279174805, "step": 5244 }, { "epoch": 1.2085253456221199, "grad_norm": 1.05524382659239, "learning_rate": 7.41242289105837e-07, "loss": 0.8677197694778442, "step": 5245 }, { "epoch": 1.2087557603686636, "grad_norm": 1.3337261104998102, "learning_rate": 7.408742995136547e-07, "loss": 0.7942948937416077, "step": 5246 }, { "epoch": 1.2089861751152073, "grad_norm": 1.4261507552200647, "learning_rate": 7.405063475332249e-07, "loss": 0.8457766771316528, "step": 5247 }, { "epoch": 1.209216589861751, "grad_norm": 1.2992145711475631, "learning_rate": 7.401384332179552e-07, "loss": 0.8463923931121826, "step": 5248 }, { "epoch": 1.209447004608295, "grad_norm": 1.2576660242210724, "learning_rate": 7.397705566212479e-07, "loss": 0.9192875623703003, "step": 5249 }, { "epoch": 1.2096774193548387, "grad_norm": 1.257257688865163, "learning_rate": 7.394027177964999e-07, "loss": 0.7461347579956055, "step": 5250 }, { "epoch": 1.2099078341013825, "grad_norm": 1.150791607540225, "learning_rate": 7.390349167971025e-07, "loss": 0.6953321695327759, "step": 5251 }, { "epoch": 1.2101382488479262, "grad_norm": 1.0284326235023098, "learning_rate": 7.38667153676441e-07, "loss": 0.7226089835166931, "step": 5252 }, { "epoch": 1.21036866359447, "grad_norm": 0.8781484717910895, "learning_rate": 7.382994284878967e-07, "loss": 0.6746406555175781, "step": 5253 }, { "epoch": 1.2105990783410139, "grad_norm": 1.109396083619457, "learning_rate": 7.379317412848438e-07, "loss": 0.7600215673446655, "step": 5254 }, { "epoch": 1.2108294930875576, "grad_norm": 1.0821310147954002, "learning_rate": 7.375640921206514e-07, "loss": 0.7530734539031982, "step": 5255 }, { "epoch": 1.2110599078341013, "grad_norm": 1.0572444642243028, "learning_rate": 7.371964810486839e-07, "loss": 0.8103033304214478, "step": 5256 }, { "epoch": 1.2112903225806453, "grad_norm": 1.5370115848017, "learning_rate": 7.368289081222994e-07, "loss": 0.8916831016540527, "step": 5257 }, { "epoch": 1.211520737327189, "grad_norm": 0.9972990737801745, "learning_rate": 7.364613733948501e-07, "loss": 0.6728129386901855, "step": 5258 }, { "epoch": 1.2117511520737327, "grad_norm": 1.2459715050980873, "learning_rate": 7.360938769196841e-07, "loss": 0.8609380722045898, "step": 5259 }, { "epoch": 1.2119815668202765, "grad_norm": 1.2704694196315967, "learning_rate": 7.357264187501422e-07, "loss": 0.9370373487472534, "step": 5260 }, { "epoch": 1.2122119815668202, "grad_norm": 1.1080973982930933, "learning_rate": 7.353589989395604e-07, "loss": 0.6812434196472168, "step": 5261 }, { "epoch": 1.2124423963133641, "grad_norm": 1.1917998982451765, "learning_rate": 7.349916175412701e-07, "loss": 0.7661731243133545, "step": 5262 }, { "epoch": 1.2126728110599079, "grad_norm": 1.175052294784061, "learning_rate": 7.346242746085951e-07, "loss": 0.7306643128395081, "step": 5263 }, { "epoch": 1.2129032258064516, "grad_norm": 1.2065862060559862, "learning_rate": 7.34256970194856e-07, "loss": 0.7189076542854309, "step": 5264 }, { "epoch": 1.2131336405529953, "grad_norm": 0.8932044441494517, "learning_rate": 7.338897043533656e-07, "loss": 0.6935977935791016, "step": 5265 }, { "epoch": 1.213364055299539, "grad_norm": 1.1224428177486496, "learning_rate": 7.335224771374323e-07, "loss": 0.8451323509216309, "step": 5266 }, { "epoch": 1.213594470046083, "grad_norm": 1.1211043364668347, "learning_rate": 7.331552886003589e-07, "loss": 0.7936843037605286, "step": 5267 }, { "epoch": 1.2138248847926267, "grad_norm": 1.1507587511456696, "learning_rate": 7.327881387954418e-07, "loss": 0.7989950776100159, "step": 5268 }, { "epoch": 1.2140552995391705, "grad_norm": 1.1166217189865624, "learning_rate": 7.324210277759726e-07, "loss": 0.7579236030578613, "step": 5269 }, { "epoch": 1.2142857142857142, "grad_norm": 1.1276787851795544, "learning_rate": 7.320539555952372e-07, "loss": 0.7101268768310547, "step": 5270 }, { "epoch": 1.2145161290322581, "grad_norm": 1.0342829920040018, "learning_rate": 7.316869223065155e-07, "loss": 0.7920513153076172, "step": 5271 }, { "epoch": 1.2147465437788019, "grad_norm": 1.4357028015234437, "learning_rate": 7.313199279630814e-07, "loss": 0.9241428375244141, "step": 5272 }, { "epoch": 1.2149769585253456, "grad_norm": 1.1653282891915406, "learning_rate": 7.309529726182044e-07, "loss": 0.8278338313102722, "step": 5273 }, { "epoch": 1.2152073732718893, "grad_norm": 0.9443953324177181, "learning_rate": 7.305860563251473e-07, "loss": 0.8230598568916321, "step": 5274 }, { "epoch": 1.2154377880184333, "grad_norm": 0.9783962526324749, "learning_rate": 7.302191791371672e-07, "loss": 0.7791799902915955, "step": 5275 }, { "epoch": 1.215668202764977, "grad_norm": 1.1070826926760935, "learning_rate": 7.298523411075163e-07, "loss": 0.705475926399231, "step": 5276 }, { "epoch": 1.2158986175115207, "grad_norm": 1.2064718691511076, "learning_rate": 7.294855422894406e-07, "loss": 0.8078421354293823, "step": 5277 }, { "epoch": 1.2161290322580645, "grad_norm": 1.2182160993977798, "learning_rate": 7.2911878273618e-07, "loss": 0.8115853667259216, "step": 5278 }, { "epoch": 1.2163594470046082, "grad_norm": 1.0596504935928797, "learning_rate": 7.287520625009698e-07, "loss": 0.6917247772216797, "step": 5279 }, { "epoch": 1.2165898617511521, "grad_norm": 1.0522660082790807, "learning_rate": 7.283853816370386e-07, "loss": 0.7131551504135132, "step": 5280 }, { "epoch": 1.2168202764976959, "grad_norm": 0.9495683492221387, "learning_rate": 7.280187401976093e-07, "loss": 0.713994562625885, "step": 5281 }, { "epoch": 1.2170506912442396, "grad_norm": 1.0845439765546743, "learning_rate": 7.276521382359001e-07, "loss": 0.7123454809188843, "step": 5282 }, { "epoch": 1.2172811059907833, "grad_norm": 1.395671188469518, "learning_rate": 7.272855758051226e-07, "loss": 0.7805770635604858, "step": 5283 }, { "epoch": 1.2175115207373273, "grad_norm": 0.9191020761831104, "learning_rate": 7.269190529584823e-07, "loss": 0.756670355796814, "step": 5284 }, { "epoch": 1.217741935483871, "grad_norm": 0.9614002237797926, "learning_rate": 7.265525697491804e-07, "loss": 0.5992655754089355, "step": 5285 }, { "epoch": 1.2179723502304147, "grad_norm": 1.1857893348181308, "learning_rate": 7.26186126230411e-07, "loss": 0.7552722692489624, "step": 5286 }, { "epoch": 1.2182027649769585, "grad_norm": 1.3153742960319537, "learning_rate": 7.258197224553627e-07, "loss": 0.7189064025878906, "step": 5287 }, { "epoch": 1.2184331797235024, "grad_norm": 1.115820306372996, "learning_rate": 7.254533584772188e-07, "loss": 0.8277319669723511, "step": 5288 }, { "epoch": 1.2186635944700461, "grad_norm": 1.0584826489222536, "learning_rate": 7.250870343491561e-07, "loss": 0.6655987501144409, "step": 5289 }, { "epoch": 1.2188940092165899, "grad_norm": 1.3888484350972408, "learning_rate": 7.247207501243469e-07, "loss": 0.8654178380966187, "step": 5290 }, { "epoch": 1.2191244239631336, "grad_norm": 1.1781514985004269, "learning_rate": 7.243545058559564e-07, "loss": 0.9148486852645874, "step": 5291 }, { "epoch": 1.2193548387096773, "grad_norm": 1.0525236851594717, "learning_rate": 7.239883015971439e-07, "loss": 0.8003618717193604, "step": 5292 }, { "epoch": 1.2195852534562213, "grad_norm": 1.1614945814905475, "learning_rate": 7.236221374010647e-07, "loss": 0.7290889024734497, "step": 5293 }, { "epoch": 1.219815668202765, "grad_norm": 0.963434252776205, "learning_rate": 7.232560133208663e-07, "loss": 0.5989147424697876, "step": 5294 }, { "epoch": 1.2200460829493087, "grad_norm": 0.8766403983792901, "learning_rate": 7.228899294096907e-07, "loss": 0.8424522876739502, "step": 5295 }, { "epoch": 1.2202764976958524, "grad_norm": 1.1686896205403536, "learning_rate": 7.225238857206754e-07, "loss": 0.7753746509552002, "step": 5296 }, { "epoch": 1.2205069124423964, "grad_norm": 1.1424848742103464, "learning_rate": 7.221578823069508e-07, "loss": 0.693191647529602, "step": 5297 }, { "epoch": 1.2207373271889401, "grad_norm": 1.177332636609729, "learning_rate": 7.217919192216417e-07, "loss": 0.7561964988708496, "step": 5298 }, { "epoch": 1.2209677419354839, "grad_norm": 0.9927977088932712, "learning_rate": 7.214259965178673e-07, "loss": 0.7721199989318848, "step": 5299 }, { "epoch": 1.2211981566820276, "grad_norm": 1.39798744468456, "learning_rate": 7.210601142487407e-07, "loss": 0.8100659251213074, "step": 5300 }, { "epoch": 1.2214285714285715, "grad_norm": 1.0570396078634527, "learning_rate": 7.206942724673688e-07, "loss": 0.6753256916999817, "step": 5301 }, { "epoch": 1.2216589861751153, "grad_norm": 1.1020954128293505, "learning_rate": 7.20328471226854e-07, "loss": 0.7534425854682922, "step": 5302 }, { "epoch": 1.221889400921659, "grad_norm": 1.5962153366210945, "learning_rate": 7.199627105802913e-07, "loss": 0.8275027275085449, "step": 5303 }, { "epoch": 1.2221198156682027, "grad_norm": 1.1431238814592317, "learning_rate": 7.195969905807702e-07, "loss": 0.728579580783844, "step": 5304 }, { "epoch": 1.2223502304147464, "grad_norm": 1.1008777946014818, "learning_rate": 7.192313112813749e-07, "loss": 0.8221413493156433, "step": 5305 }, { "epoch": 1.2225806451612904, "grad_norm": 1.0255386420970887, "learning_rate": 7.188656727351832e-07, "loss": 0.7819123268127441, "step": 5306 }, { "epoch": 1.2228110599078341, "grad_norm": 1.1141595278176613, "learning_rate": 7.185000749952666e-07, "loss": 0.7474294900894165, "step": 5307 }, { "epoch": 1.2230414746543778, "grad_norm": 1.4333018176649106, "learning_rate": 7.181345181146919e-07, "loss": 0.8072259426116943, "step": 5308 }, { "epoch": 1.2232718894009216, "grad_norm": 1.3449246489382425, "learning_rate": 7.177690021465184e-07, "loss": 0.8718069791793823, "step": 5309 }, { "epoch": 1.2235023041474655, "grad_norm": 1.1090181258933243, "learning_rate": 7.174035271438006e-07, "loss": 0.8374875783920288, "step": 5310 }, { "epoch": 1.2237327188940093, "grad_norm": 1.2085386756305507, "learning_rate": 7.170380931595869e-07, "loss": 0.6669566631317139, "step": 5311 }, { "epoch": 1.223963133640553, "grad_norm": 1.1706882886588135, "learning_rate": 7.16672700246919e-07, "loss": 0.8735665678977966, "step": 5312 }, { "epoch": 1.2241935483870967, "grad_norm": 1.1826163019402958, "learning_rate": 7.16307348458834e-07, "loss": 0.8312361240386963, "step": 5313 }, { "epoch": 1.2244239631336407, "grad_norm": 1.1102424714986416, "learning_rate": 7.159420378483619e-07, "loss": 0.7927724123001099, "step": 5314 }, { "epoch": 1.2246543778801844, "grad_norm": 1.0527049283172933, "learning_rate": 7.155767684685264e-07, "loss": 0.7641698122024536, "step": 5315 }, { "epoch": 1.2248847926267281, "grad_norm": 1.0508850668326304, "learning_rate": 7.15211540372347e-07, "loss": 0.7490028142929077, "step": 5316 }, { "epoch": 1.2251152073732718, "grad_norm": 1.0604993776512237, "learning_rate": 7.148463536128354e-07, "loss": 0.7194815874099731, "step": 5317 }, { "epoch": 1.2253456221198156, "grad_norm": 1.2779756064695784, "learning_rate": 7.144812082429979e-07, "loss": 0.8328256607055664, "step": 5318 }, { "epoch": 1.2255760368663595, "grad_norm": 1.1539197608232337, "learning_rate": 7.141161043158352e-07, "loss": 0.9124876260757446, "step": 5319 }, { "epoch": 1.2258064516129032, "grad_norm": 1.346989410896588, "learning_rate": 7.137510418843416e-07, "loss": 0.8183319568634033, "step": 5320 }, { "epoch": 1.226036866359447, "grad_norm": 1.0902088619882297, "learning_rate": 7.133860210015048e-07, "loss": 0.8423885107040405, "step": 5321 }, { "epoch": 1.2262672811059907, "grad_norm": 1.064962271727849, "learning_rate": 7.130210417203082e-07, "loss": 0.8175387382507324, "step": 5322 }, { "epoch": 1.2264976958525347, "grad_norm": 1.0111617635250245, "learning_rate": 7.126561040937274e-07, "loss": 0.8415048718452454, "step": 5323 }, { "epoch": 1.2267281105990784, "grad_norm": 1.4241774929740556, "learning_rate": 7.122912081747321e-07, "loss": 0.6891156435012817, "step": 5324 }, { "epoch": 1.226958525345622, "grad_norm": 1.1236132104045742, "learning_rate": 7.119263540162876e-07, "loss": 0.667617678642273, "step": 5325 }, { "epoch": 1.2271889400921658, "grad_norm": 1.21591291521647, "learning_rate": 7.115615416713517e-07, "loss": 0.7752082347869873, "step": 5326 }, { "epoch": 1.2274193548387098, "grad_norm": 1.0094697644265302, "learning_rate": 7.111967711928757e-07, "loss": 0.6582639813423157, "step": 5327 }, { "epoch": 1.2276497695852535, "grad_norm": 0.9823209869062589, "learning_rate": 7.108320426338063e-07, "loss": 0.6996462345123291, "step": 5328 }, { "epoch": 1.2278801843317972, "grad_norm": 1.1364634127826816, "learning_rate": 7.104673560470828e-07, "loss": 0.7132028341293335, "step": 5329 }, { "epoch": 1.228110599078341, "grad_norm": 1.1959075580849723, "learning_rate": 7.101027114856395e-07, "loss": 0.7344096899032593, "step": 5330 }, { "epoch": 1.2283410138248847, "grad_norm": 1.2810764573761082, "learning_rate": 7.097381090024039e-07, "loss": 0.7805585861206055, "step": 5331 }, { "epoch": 1.2285714285714286, "grad_norm": 1.2310137220528714, "learning_rate": 7.093735486502976e-07, "loss": 0.6785855889320374, "step": 5332 }, { "epoch": 1.2288018433179724, "grad_norm": 1.3226389203047557, "learning_rate": 7.090090304822355e-07, "loss": 0.7465041875839233, "step": 5333 }, { "epoch": 1.229032258064516, "grad_norm": 1.0465247410006058, "learning_rate": 7.086445545511278e-07, "loss": 0.7400432825088501, "step": 5334 }, { "epoch": 1.2292626728110598, "grad_norm": 0.9732969942350592, "learning_rate": 7.082801209098774e-07, "loss": 0.8567768335342407, "step": 5335 }, { "epoch": 1.2294930875576038, "grad_norm": 1.133102602749406, "learning_rate": 7.079157296113807e-07, "loss": 0.7451025247573853, "step": 5336 }, { "epoch": 1.2297235023041475, "grad_norm": 1.2953309888801026, "learning_rate": 7.075513807085299e-07, "loss": 0.7178194522857666, "step": 5337 }, { "epoch": 1.2299539170506912, "grad_norm": 1.114794382407599, "learning_rate": 7.071870742542086e-07, "loss": 0.7538058161735535, "step": 5338 }, { "epoch": 1.230184331797235, "grad_norm": 1.2706015052011863, "learning_rate": 7.068228103012959e-07, "loss": 0.7853896021842957, "step": 5339 }, { "epoch": 1.230414746543779, "grad_norm": 1.6145088717882257, "learning_rate": 7.064585889026644e-07, "loss": 0.9359887838363647, "step": 5340 }, { "epoch": 1.2306451612903226, "grad_norm": 1.2876289498435494, "learning_rate": 7.060944101111797e-07, "loss": 0.8590530753135681, "step": 5341 }, { "epoch": 1.2308755760368664, "grad_norm": 1.0245387562303532, "learning_rate": 7.057302739797025e-07, "loss": 0.7047204971313477, "step": 5342 }, { "epoch": 1.23110599078341, "grad_norm": 1.3069544437359595, "learning_rate": 7.053661805610867e-07, "loss": 0.8826072216033936, "step": 5343 }, { "epoch": 1.2313364055299538, "grad_norm": 1.2593962984780245, "learning_rate": 7.050021299081792e-07, "loss": 0.9394192695617676, "step": 5344 }, { "epoch": 1.2315668202764978, "grad_norm": 1.1109567819341923, "learning_rate": 7.046381220738224e-07, "loss": 0.7814885377883911, "step": 5345 }, { "epoch": 1.2317972350230415, "grad_norm": 1.1819250736895568, "learning_rate": 7.042741571108512e-07, "loss": 0.781699538230896, "step": 5346 }, { "epoch": 1.2320276497695852, "grad_norm": 1.1116588757864085, "learning_rate": 7.039102350720946e-07, "loss": 0.6554632186889648, "step": 5347 }, { "epoch": 1.232258064516129, "grad_norm": 0.9564548780258206, "learning_rate": 7.035463560103753e-07, "loss": 0.6449903249740601, "step": 5348 }, { "epoch": 1.2324884792626727, "grad_norm": 1.3130676696714008, "learning_rate": 7.031825199785101e-07, "loss": 0.8222958445549011, "step": 5349 }, { "epoch": 1.2327188940092166, "grad_norm": 1.073654969776922, "learning_rate": 7.02818727029309e-07, "loss": 0.8315533399581909, "step": 5350 }, { "epoch": 1.2329493087557604, "grad_norm": 0.9980466179862664, "learning_rate": 7.024549772155764e-07, "loss": 0.8065732717514038, "step": 5351 }, { "epoch": 1.233179723502304, "grad_norm": 1.3823215182318742, "learning_rate": 7.020912705901101e-07, "loss": 0.7607216835021973, "step": 5352 }, { "epoch": 1.233410138248848, "grad_norm": 1.3000097773568569, "learning_rate": 7.01727607205701e-07, "loss": 0.877311110496521, "step": 5353 }, { "epoch": 1.2336405529953918, "grad_norm": 1.1855641794195606, "learning_rate": 7.013639871151354e-07, "loss": 0.7352526187896729, "step": 5354 }, { "epoch": 1.2338709677419355, "grad_norm": 1.1123782494693044, "learning_rate": 7.010004103711915e-07, "loss": 0.7676074504852295, "step": 5355 }, { "epoch": 1.2341013824884792, "grad_norm": 1.1035546011135826, "learning_rate": 7.00636877026642e-07, "loss": 0.7802003622055054, "step": 5356 }, { "epoch": 1.234331797235023, "grad_norm": 1.0576568317960378, "learning_rate": 7.002733871342537e-07, "loss": 0.747033953666687, "step": 5357 }, { "epoch": 1.234562211981567, "grad_norm": 1.1565555542506367, "learning_rate": 6.999099407467865e-07, "loss": 0.8086956739425659, "step": 5358 }, { "epoch": 1.2347926267281106, "grad_norm": 1.450692015608809, "learning_rate": 6.995465379169941e-07, "loss": 0.9362099170684814, "step": 5359 }, { "epoch": 1.2350230414746544, "grad_norm": 1.0699993470783844, "learning_rate": 6.991831786976241e-07, "loss": 0.6784812211990356, "step": 5360 }, { "epoch": 1.235253456221198, "grad_norm": 1.0206889971672557, "learning_rate": 6.988198631414171e-07, "loss": 0.7733708620071411, "step": 5361 }, { "epoch": 1.2354838709677418, "grad_norm": 1.1745502344238163, "learning_rate": 6.984565913011087e-07, "loss": 0.8747115135192871, "step": 5362 }, { "epoch": 1.2357142857142858, "grad_norm": 1.0659966645754941, "learning_rate": 6.980933632294268e-07, "loss": 0.6947430372238159, "step": 5363 }, { "epoch": 1.2359447004608295, "grad_norm": 1.206089262306805, "learning_rate": 6.97730178979093e-07, "loss": 0.7128404378890991, "step": 5364 }, { "epoch": 1.2361751152073732, "grad_norm": 1.1120167642627505, "learning_rate": 6.973670386028242e-07, "loss": 0.7190830707550049, "step": 5365 }, { "epoch": 1.2364055299539172, "grad_norm": 1.1367562157166997, "learning_rate": 6.970039421533291e-07, "loss": 0.7625770568847656, "step": 5366 }, { "epoch": 1.236635944700461, "grad_norm": 1.109720416461976, "learning_rate": 6.966408896833104e-07, "loss": 0.7942707538604736, "step": 5367 }, { "epoch": 1.2368663594470046, "grad_norm": 1.2413354296268997, "learning_rate": 6.962778812454652e-07, "loss": 0.8329455852508545, "step": 5368 }, { "epoch": 1.2370967741935484, "grad_norm": 0.8823115581397621, "learning_rate": 6.959149168924833e-07, "loss": 0.6034290790557861, "step": 5369 }, { "epoch": 1.237327188940092, "grad_norm": 1.1119487486974622, "learning_rate": 6.955519966770486e-07, "loss": 0.8424680233001709, "step": 5370 }, { "epoch": 1.237557603686636, "grad_norm": 1.4443979353165184, "learning_rate": 6.951891206518388e-07, "loss": 0.8670322895050049, "step": 5371 }, { "epoch": 1.2377880184331798, "grad_norm": 1.2577295715670245, "learning_rate": 6.948262888695244e-07, "loss": 0.7283621430397034, "step": 5372 }, { "epoch": 1.2380184331797235, "grad_norm": 1.1772858057268798, "learning_rate": 6.9446350138277e-07, "loss": 0.7990118265151978, "step": 5373 }, { "epoch": 1.2382488479262672, "grad_norm": 1.3359682917878526, "learning_rate": 6.941007582442342e-07, "loss": 0.945558488368988, "step": 5374 }, { "epoch": 1.238479262672811, "grad_norm": 1.186182272846314, "learning_rate": 6.937380595065685e-07, "loss": 0.6905936002731323, "step": 5375 }, { "epoch": 1.238709677419355, "grad_norm": 1.1665515184197677, "learning_rate": 6.933754052224176e-07, "loss": 0.7757662534713745, "step": 5376 }, { "epoch": 1.2389400921658986, "grad_norm": 1.1107589407670702, "learning_rate": 6.930127954444209e-07, "loss": 0.63062584400177, "step": 5377 }, { "epoch": 1.2391705069124423, "grad_norm": 1.2453155093106256, "learning_rate": 6.926502302252109e-07, "loss": 0.7341021299362183, "step": 5378 }, { "epoch": 1.2394009216589863, "grad_norm": 0.9019761448377311, "learning_rate": 6.922877096174127e-07, "loss": 0.572767972946167, "step": 5379 }, { "epoch": 1.23963133640553, "grad_norm": 1.274761976544521, "learning_rate": 6.919252336736463e-07, "loss": 0.630276083946228, "step": 5380 }, { "epoch": 1.2398617511520738, "grad_norm": 1.0769631455551745, "learning_rate": 6.915628024465244e-07, "loss": 0.668334424495697, "step": 5381 }, { "epoch": 1.2400921658986175, "grad_norm": 0.9444198657704267, "learning_rate": 6.912004159886529e-07, "loss": 0.6766513586044312, "step": 5382 }, { "epoch": 1.2403225806451612, "grad_norm": 1.3884668691330446, "learning_rate": 6.908380743526328e-07, "loss": 0.7016473412513733, "step": 5383 }, { "epoch": 1.2405529953917052, "grad_norm": 1.378738366714881, "learning_rate": 6.904757775910568e-07, "loss": 0.8837979435920715, "step": 5384 }, { "epoch": 1.2407834101382489, "grad_norm": 0.9305030195638431, "learning_rate": 6.901135257565116e-07, "loss": 0.7187714576721191, "step": 5385 }, { "epoch": 1.2410138248847926, "grad_norm": 1.0935814864632027, "learning_rate": 6.897513189015782e-07, "loss": 0.8227157592773438, "step": 5386 }, { "epoch": 1.2412442396313363, "grad_norm": 1.278600897043475, "learning_rate": 6.893891570788301e-07, "loss": 0.8812209367752075, "step": 5387 }, { "epoch": 1.24147465437788, "grad_norm": 1.0426681195674332, "learning_rate": 6.890270403408348e-07, "loss": 0.6702297925949097, "step": 5388 }, { "epoch": 1.241705069124424, "grad_norm": 1.1718249382850798, "learning_rate": 6.886649687401529e-07, "loss": 0.646358847618103, "step": 5389 }, { "epoch": 1.2419354838709677, "grad_norm": 1.1131010301922042, "learning_rate": 6.883029423293383e-07, "loss": 0.6514080762863159, "step": 5390 }, { "epoch": 1.2421658986175115, "grad_norm": 1.0826812738863971, "learning_rate": 6.879409611609393e-07, "loss": 0.6938437819480896, "step": 5391 }, { "epoch": 1.2423963133640552, "grad_norm": 1.3710627721954263, "learning_rate": 6.875790252874967e-07, "loss": 0.8601399064064026, "step": 5392 }, { "epoch": 1.2426267281105992, "grad_norm": 1.1590300352526421, "learning_rate": 6.872171347615445e-07, "loss": 0.6641080379486084, "step": 5393 }, { "epoch": 1.2428571428571429, "grad_norm": 1.0046628491787142, "learning_rate": 6.868552896356117e-07, "loss": 0.7109012603759766, "step": 5394 }, { "epoch": 1.2430875576036866, "grad_norm": 1.261042767669179, "learning_rate": 6.864934899622191e-07, "loss": 0.8558728694915771, "step": 5395 }, { "epoch": 1.2433179723502303, "grad_norm": 1.1243133400823155, "learning_rate": 6.861317357938807e-07, "loss": 0.6119382977485657, "step": 5396 }, { "epoch": 1.2435483870967743, "grad_norm": 1.2850449121793286, "learning_rate": 6.857700271831059e-07, "loss": 0.7527587413787842, "step": 5397 }, { "epoch": 1.243778801843318, "grad_norm": 1.3104214277299573, "learning_rate": 6.854083641823957e-07, "loss": 0.8082761168479919, "step": 5398 }, { "epoch": 1.2440092165898617, "grad_norm": 1.0664271007055484, "learning_rate": 6.850467468442447e-07, "loss": 0.7289307117462158, "step": 5399 }, { "epoch": 1.2442396313364055, "grad_norm": 1.2684124709337747, "learning_rate": 6.846851752211418e-07, "loss": 0.8824148178100586, "step": 5400 }, { "epoch": 1.2444700460829492, "grad_norm": 1.2011621536911168, "learning_rate": 6.843236493655682e-07, "loss": 0.7046724557876587, "step": 5401 }, { "epoch": 1.2447004608294931, "grad_norm": 1.0456601321771188, "learning_rate": 6.839621693299987e-07, "loss": 0.8192921876907349, "step": 5402 }, { "epoch": 1.2449308755760369, "grad_norm": 1.1031705508374716, "learning_rate": 6.83600735166902e-07, "loss": 0.7651070356369019, "step": 5403 }, { "epoch": 1.2451612903225806, "grad_norm": 1.10155120943284, "learning_rate": 6.832393469287401e-07, "loss": 0.7689340114593506, "step": 5404 }, { "epoch": 1.2453917050691243, "grad_norm": 1.438313566898243, "learning_rate": 6.828780046679671e-07, "loss": 0.9214832782745361, "step": 5405 }, { "epoch": 1.2456221198156683, "grad_norm": 1.1160237214981186, "learning_rate": 6.825167084370322e-07, "loss": 0.7210682034492493, "step": 5406 }, { "epoch": 1.245852534562212, "grad_norm": 1.1608936823977416, "learning_rate": 6.82155458288377e-07, "loss": 0.871317446231842, "step": 5407 }, { "epoch": 1.2460829493087557, "grad_norm": 1.2750147741770517, "learning_rate": 6.817942542744359e-07, "loss": 0.7669065594673157, "step": 5408 }, { "epoch": 1.2463133640552995, "grad_norm": 1.0693548196930358, "learning_rate": 6.814330964476379e-07, "loss": 0.7317448854446411, "step": 5409 }, { "epoch": 1.2465437788018434, "grad_norm": 1.2936969678285373, "learning_rate": 6.810719848604036e-07, "loss": 0.7873220443725586, "step": 5410 }, { "epoch": 1.2467741935483871, "grad_norm": 1.2973675980536, "learning_rate": 6.807109195651492e-07, "loss": 0.713294267654419, "step": 5411 }, { "epoch": 1.2470046082949309, "grad_norm": 1.2551238151306954, "learning_rate": 6.803499006142819e-07, "loss": 0.7592979669570923, "step": 5412 }, { "epoch": 1.2472350230414746, "grad_norm": 1.3113983649465133, "learning_rate": 6.79988928060203e-07, "loss": 0.7805737257003784, "step": 5413 }, { "epoch": 1.2474654377880183, "grad_norm": 0.8180058983934718, "learning_rate": 6.79628001955308e-07, "loss": 0.7706440687179565, "step": 5414 }, { "epoch": 1.2476958525345623, "grad_norm": 1.3696824329137627, "learning_rate": 6.792671223519844e-07, "loss": 0.772534966468811, "step": 5415 }, { "epoch": 1.247926267281106, "grad_norm": 1.2283026355612159, "learning_rate": 6.789062893026129e-07, "loss": 0.7939096093177795, "step": 5416 }, { "epoch": 1.2481566820276497, "grad_norm": 1.263037130888269, "learning_rate": 6.78545502859569e-07, "loss": 0.7062902450561523, "step": 5417 }, { "epoch": 1.2483870967741935, "grad_norm": 1.042353004558378, "learning_rate": 6.781847630752197e-07, "loss": 0.8296496868133545, "step": 5418 }, { "epoch": 1.2486175115207374, "grad_norm": 1.4186103660131706, "learning_rate": 6.778240700019258e-07, "loss": 0.926125168800354, "step": 5419 }, { "epoch": 1.2488479262672811, "grad_norm": 1.1816532525816696, "learning_rate": 6.774634236920419e-07, "loss": 0.7301739454269409, "step": 5420 }, { "epoch": 1.2490783410138249, "grad_norm": 1.366957713339659, "learning_rate": 6.771028241979151e-07, "loss": 0.7313426733016968, "step": 5421 }, { "epoch": 1.2493087557603686, "grad_norm": 0.9539446793763906, "learning_rate": 6.767422715718853e-07, "loss": 0.7193025946617126, "step": 5422 }, { "epoch": 1.2495391705069125, "grad_norm": 1.1735826178809459, "learning_rate": 6.763817658662874e-07, "loss": 0.6544638872146606, "step": 5423 }, { "epoch": 1.2497695852534563, "grad_norm": 1.1828661707349362, "learning_rate": 6.760213071334478e-07, "loss": 0.8402822613716125, "step": 5424 }, { "epoch": 1.25, "grad_norm": 1.1854670368859663, "learning_rate": 6.756608954256861e-07, "loss": 0.6840100288391113, "step": 5425 }, { "epoch": 1.2502304147465437, "grad_norm": 1.1842873946027908, "learning_rate": 6.753005307953165e-07, "loss": 0.7315107583999634, "step": 5426 }, { "epoch": 1.2504608294930875, "grad_norm": 0.9743094512393712, "learning_rate": 6.74940213294645e-07, "loss": 0.6369785070419312, "step": 5427 }, { "epoch": 1.2506912442396314, "grad_norm": 1.0769824502789231, "learning_rate": 6.745799429759711e-07, "loss": 0.7700424790382385, "step": 5428 }, { "epoch": 1.2509216589861751, "grad_norm": 1.2719323162039158, "learning_rate": 6.742197198915877e-07, "loss": 0.7436221241950989, "step": 5429 }, { "epoch": 1.2511520737327189, "grad_norm": 1.235326047289827, "learning_rate": 6.738595440937809e-07, "loss": 0.8028342723846436, "step": 5430 }, { "epoch": 1.2513824884792628, "grad_norm": 1.1651221420823998, "learning_rate": 6.734994156348288e-07, "loss": 0.7705515623092651, "step": 5431 }, { "epoch": 1.2516129032258063, "grad_norm": 1.509633589240068, "learning_rate": 6.73139334567005e-07, "loss": 0.7110899686813354, "step": 5432 }, { "epoch": 1.2518433179723503, "grad_norm": 1.0701201128505256, "learning_rate": 6.727793009425739e-07, "loss": 0.7495337128639221, "step": 5433 }, { "epoch": 1.252073732718894, "grad_norm": 1.1393040143384143, "learning_rate": 6.724193148137938e-07, "loss": 0.7735337018966675, "step": 5434 }, { "epoch": 1.2523041474654377, "grad_norm": 1.5709409365174263, "learning_rate": 6.720593762329167e-07, "loss": 0.8655617237091064, "step": 5435 }, { "epoch": 1.2525345622119817, "grad_norm": 1.0969772466203969, "learning_rate": 6.716994852521871e-07, "loss": 0.7989616394042969, "step": 5436 }, { "epoch": 1.2527649769585254, "grad_norm": 1.2186152186967236, "learning_rate": 6.713396419238424e-07, "loss": 0.8090296983718872, "step": 5437 }, { "epoch": 1.2529953917050691, "grad_norm": 1.175751705980128, "learning_rate": 6.709798463001138e-07, "loss": 0.7150726318359375, "step": 5438 }, { "epoch": 1.2532258064516129, "grad_norm": 1.1350361891486582, "learning_rate": 6.706200984332249e-07, "loss": 0.7136287689208984, "step": 5439 }, { "epoch": 1.2534562211981566, "grad_norm": 1.2991395376590593, "learning_rate": 6.702603983753927e-07, "loss": 0.8538687229156494, "step": 5440 }, { "epoch": 1.2536866359447005, "grad_norm": 1.5253402941485412, "learning_rate": 6.699007461788272e-07, "loss": 0.7960666418075562, "step": 5441 }, { "epoch": 1.2539170506912443, "grad_norm": 0.9539757778238315, "learning_rate": 6.695411418957309e-07, "loss": 0.7462595701217651, "step": 5442 }, { "epoch": 1.254147465437788, "grad_norm": 1.482445221768143, "learning_rate": 6.691815855783009e-07, "loss": 0.795913577079773, "step": 5443 }, { "epoch": 1.2543778801843317, "grad_norm": 1.071717267875031, "learning_rate": 6.688220772787258e-07, "loss": 0.7589330077171326, "step": 5444 }, { "epoch": 1.2546082949308754, "grad_norm": 1.4795497320121442, "learning_rate": 6.684626170491874e-07, "loss": 0.7719615697860718, "step": 5445 }, { "epoch": 1.2548387096774194, "grad_norm": 1.06581311441289, "learning_rate": 6.681032049418616e-07, "loss": 0.8516664505004883, "step": 5446 }, { "epoch": 1.2550691244239631, "grad_norm": 1.466555451116343, "learning_rate": 6.677438410089163e-07, "loss": 0.8597210049629211, "step": 5447 }, { "epoch": 1.2552995391705069, "grad_norm": 1.2172979010742704, "learning_rate": 6.673845253025124e-07, "loss": 0.7101171016693115, "step": 5448 }, { "epoch": 1.2555299539170508, "grad_norm": 1.105900547055049, "learning_rate": 6.670252578748044e-07, "loss": 0.6946178078651428, "step": 5449 }, { "epoch": 1.2557603686635945, "grad_norm": 1.687580161954866, "learning_rate": 6.666660387779395e-07, "loss": 0.9912126660346985, "step": 5450 }, { "epoch": 1.2559907834101383, "grad_norm": 1.087382323913162, "learning_rate": 6.663068680640573e-07, "loss": 0.6495379209518433, "step": 5451 }, { "epoch": 1.256221198156682, "grad_norm": 1.0213661473677353, "learning_rate": 6.65947745785292e-07, "loss": 0.6276426315307617, "step": 5452 }, { "epoch": 1.2564516129032257, "grad_norm": 1.082562870265783, "learning_rate": 6.655886719937691e-07, "loss": 0.7273461818695068, "step": 5453 }, { "epoch": 1.2566820276497697, "grad_norm": 1.258671733492057, "learning_rate": 6.652296467416073e-07, "loss": 0.8248249292373657, "step": 5454 }, { "epoch": 1.2569124423963134, "grad_norm": 1.2124691152915896, "learning_rate": 6.648706700809196e-07, "loss": 0.8709753751754761, "step": 5455 }, { "epoch": 1.2571428571428571, "grad_norm": 1.4025604957471465, "learning_rate": 6.645117420638105e-07, "loss": 0.8207283020019531, "step": 5456 }, { "epoch": 1.2573732718894008, "grad_norm": 1.0867491150840567, "learning_rate": 6.641528627423774e-07, "loss": 0.8222801685333252, "step": 5457 }, { "epoch": 1.2576036866359446, "grad_norm": 1.0891862457945214, "learning_rate": 6.637940321687121e-07, "loss": 0.7684904336929321, "step": 5458 }, { "epoch": 1.2578341013824885, "grad_norm": 1.106565522930133, "learning_rate": 6.634352503948979e-07, "loss": 0.7930517196655273, "step": 5459 }, { "epoch": 1.2580645161290323, "grad_norm": 1.255727738748605, "learning_rate": 6.630765174730116e-07, "loss": 0.7414563298225403, "step": 5460 }, { "epoch": 1.258294930875576, "grad_norm": 1.0415923536335177, "learning_rate": 6.627178334551227e-07, "loss": 0.7959232926368713, "step": 5461 }, { "epoch": 1.25852534562212, "grad_norm": 1.2823788828450395, "learning_rate": 6.623591983932935e-07, "loss": 0.6722866296768188, "step": 5462 }, { "epoch": 1.2587557603686637, "grad_norm": 1.0428819037253236, "learning_rate": 6.620006123395799e-07, "loss": 0.7688727378845215, "step": 5463 }, { "epoch": 1.2589861751152074, "grad_norm": 1.1454091886933473, "learning_rate": 6.616420753460301e-07, "loss": 0.7543724179267883, "step": 5464 }, { "epoch": 1.2592165898617511, "grad_norm": 1.3156243556780545, "learning_rate": 6.612835874646847e-07, "loss": 0.7097430229187012, "step": 5465 }, { "epoch": 1.2594470046082948, "grad_norm": 1.1699591097632744, "learning_rate": 6.609251487475786e-07, "loss": 0.8640443682670593, "step": 5466 }, { "epoch": 1.2596774193548388, "grad_norm": 1.4552439697890553, "learning_rate": 6.605667592467384e-07, "loss": 0.7872523069381714, "step": 5467 }, { "epoch": 1.2599078341013825, "grad_norm": 1.3601390048962447, "learning_rate": 6.602084190141835e-07, "loss": 0.8647557497024536, "step": 5468 }, { "epoch": 1.2601382488479262, "grad_norm": 0.9953963267515464, "learning_rate": 6.598501281019268e-07, "loss": 0.7323553562164307, "step": 5469 }, { "epoch": 1.26036866359447, "grad_norm": 1.2478057023441294, "learning_rate": 6.594918865619739e-07, "loss": 0.8214852809906006, "step": 5470 }, { "epoch": 1.2605990783410137, "grad_norm": 1.1743890995374524, "learning_rate": 6.591336944463223e-07, "loss": 0.8011265397071838, "step": 5471 }, { "epoch": 1.2608294930875577, "grad_norm": 0.9651307194588488, "learning_rate": 6.587755518069642e-07, "loss": 0.798862636089325, "step": 5472 }, { "epoch": 1.2610599078341014, "grad_norm": 1.1888872240865054, "learning_rate": 6.58417458695883e-07, "loss": 0.7231202721595764, "step": 5473 }, { "epoch": 1.261290322580645, "grad_norm": 1.25713690411949, "learning_rate": 6.580594151650551e-07, "loss": 0.8816685676574707, "step": 5474 }, { "epoch": 1.261520737327189, "grad_norm": 1.0218552259688816, "learning_rate": 6.577014212664509e-07, "loss": 0.6343427300453186, "step": 5475 }, { "epoch": 1.2617511520737328, "grad_norm": 1.2062270864209526, "learning_rate": 6.573434770520321e-07, "loss": 0.7785895466804504, "step": 5476 }, { "epoch": 1.2619815668202765, "grad_norm": 1.2086458816060426, "learning_rate": 6.569855825737536e-07, "loss": 0.7408698797225952, "step": 5477 }, { "epoch": 1.2622119815668202, "grad_norm": 1.2755490666336102, "learning_rate": 6.566277378835643e-07, "loss": 0.8481286764144897, "step": 5478 }, { "epoch": 1.262442396313364, "grad_norm": 1.0772225233745287, "learning_rate": 6.56269943033404e-07, "loss": 0.8221831917762756, "step": 5479 }, { "epoch": 1.262672811059908, "grad_norm": 1.1202704150930312, "learning_rate": 6.559121980752065e-07, "loss": 0.805405855178833, "step": 5480 }, { "epoch": 1.2629032258064516, "grad_norm": 1.4925713527432443, "learning_rate": 6.55554503060898e-07, "loss": 0.8643565773963928, "step": 5481 }, { "epoch": 1.2631336405529954, "grad_norm": 1.038997236699539, "learning_rate": 6.551968580423973e-07, "loss": 0.7087225914001465, "step": 5482 }, { "epoch": 1.263364055299539, "grad_norm": 1.3080505612178328, "learning_rate": 6.54839263071616e-07, "loss": 0.8401756882667542, "step": 5483 }, { "epoch": 1.2635944700460828, "grad_norm": 0.974231759030553, "learning_rate": 6.544817182004589e-07, "loss": 0.76345294713974, "step": 5484 }, { "epoch": 1.2638248847926268, "grad_norm": 0.9975788463971886, "learning_rate": 6.541242234808228e-07, "loss": 0.7177271842956543, "step": 5485 }, { "epoch": 1.2640552995391705, "grad_norm": 1.0524467641617976, "learning_rate": 6.537667789645981e-07, "loss": 0.7436186075210571, "step": 5486 }, { "epoch": 1.2642857142857142, "grad_norm": 1.025347292021162, "learning_rate": 6.53409384703667e-07, "loss": 0.6526673436164856, "step": 5487 }, { "epoch": 1.2645161290322582, "grad_norm": 1.4422505610217646, "learning_rate": 6.530520407499049e-07, "loss": 0.879219651222229, "step": 5488 }, { "epoch": 1.264746543778802, "grad_norm": 1.1643268817299548, "learning_rate": 6.526947471551798e-07, "loss": 0.7005003690719604, "step": 5489 }, { "epoch": 1.2649769585253456, "grad_norm": 1.276974659887974, "learning_rate": 6.523375039713525e-07, "loss": 0.716349720954895, "step": 5490 }, { "epoch": 1.2652073732718894, "grad_norm": 1.307490301718017, "learning_rate": 6.519803112502758e-07, "loss": 0.8524413704872131, "step": 5491 }, { "epoch": 1.265437788018433, "grad_norm": 1.3886244481055607, "learning_rate": 6.516231690437966e-07, "loss": 0.8032857179641724, "step": 5492 }, { "epoch": 1.265668202764977, "grad_norm": 1.3026581508138244, "learning_rate": 6.512660774037531e-07, "loss": 0.8912144899368286, "step": 5493 }, { "epoch": 1.2658986175115208, "grad_norm": 1.1001846572449894, "learning_rate": 6.509090363819764e-07, "loss": 0.6526974439620972, "step": 5494 }, { "epoch": 1.2661290322580645, "grad_norm": 1.1539964772442708, "learning_rate": 6.505520460302916e-07, "loss": 0.7436610460281372, "step": 5495 }, { "epoch": 1.2663594470046082, "grad_norm": 1.0590907210895066, "learning_rate": 6.501951064005145e-07, "loss": 0.7112951874732971, "step": 5496 }, { "epoch": 1.266589861751152, "grad_norm": 1.136772271419419, "learning_rate": 6.498382175444545e-07, "loss": 0.6908622980117798, "step": 5497 }, { "epoch": 1.266820276497696, "grad_norm": 1.2936126009346398, "learning_rate": 6.494813795139137e-07, "loss": 0.8169400691986084, "step": 5498 }, { "epoch": 1.2670506912442396, "grad_norm": 1.1611805763062155, "learning_rate": 6.491245923606868e-07, "loss": 0.7577871084213257, "step": 5499 }, { "epoch": 1.2672811059907834, "grad_norm": 1.2166617406598321, "learning_rate": 6.487678561365606e-07, "loss": 0.7470887303352356, "step": 5500 }, { "epoch": 1.2675115207373273, "grad_norm": 1.2499100792685887, "learning_rate": 6.484111708933153e-07, "loss": 0.7862193584442139, "step": 5501 }, { "epoch": 1.267741935483871, "grad_norm": 1.0856856438170979, "learning_rate": 6.48054536682723e-07, "loss": 0.6809444427490234, "step": 5502 }, { "epoch": 1.2679723502304148, "grad_norm": 1.1883483456973896, "learning_rate": 6.476979535565486e-07, "loss": 0.7560738921165466, "step": 5503 }, { "epoch": 1.2682027649769585, "grad_norm": 1.060654462751894, "learning_rate": 6.473414215665501e-07, "loss": 0.6961003541946411, "step": 5504 }, { "epoch": 1.2684331797235022, "grad_norm": 1.1318601167609275, "learning_rate": 6.469849407644775e-07, "loss": 0.762688159942627, "step": 5505 }, { "epoch": 1.2686635944700462, "grad_norm": 1.3318780914664468, "learning_rate": 6.46628511202073e-07, "loss": 0.8735007047653198, "step": 5506 }, { "epoch": 1.26889400921659, "grad_norm": 1.2498993266864264, "learning_rate": 6.462721329310727e-07, "loss": 0.7127432823181152, "step": 5507 }, { "epoch": 1.2691244239631336, "grad_norm": 1.1810894491038926, "learning_rate": 6.45915806003204e-07, "loss": 0.7720422744750977, "step": 5508 }, { "epoch": 1.2693548387096774, "grad_norm": 1.3742393921911886, "learning_rate": 6.455595304701871e-07, "loss": 0.8046890497207642, "step": 5509 }, { "epoch": 1.269585253456221, "grad_norm": 1.433035812490825, "learning_rate": 6.452033063837354e-07, "loss": 0.8218742609024048, "step": 5510 }, { "epoch": 1.269815668202765, "grad_norm": 1.3642640568886157, "learning_rate": 6.448471337955536e-07, "loss": 0.912622332572937, "step": 5511 }, { "epoch": 1.2700460829493088, "grad_norm": 1.3101181049427244, "learning_rate": 6.444910127573407e-07, "loss": 0.7940733432769775, "step": 5512 }, { "epoch": 1.2702764976958525, "grad_norm": 1.0982469100789136, "learning_rate": 6.441349433207864e-07, "loss": 0.7085565328598022, "step": 5513 }, { "epoch": 1.2705069124423964, "grad_norm": 1.241687978637031, "learning_rate": 6.437789255375739e-07, "loss": 0.9316935539245605, "step": 5514 }, { "epoch": 1.2707373271889402, "grad_norm": 0.9697190322352798, "learning_rate": 6.43422959459379e-07, "loss": 0.7412574291229248, "step": 5515 }, { "epoch": 1.270967741935484, "grad_norm": 0.9713506680995111, "learning_rate": 6.430670451378695e-07, "loss": 0.7476450204849243, "step": 5516 }, { "epoch": 1.2711981566820276, "grad_norm": 1.1272976564667934, "learning_rate": 6.427111826247056e-07, "loss": 0.8530189990997314, "step": 5517 }, { "epoch": 1.2714285714285714, "grad_norm": 1.3163108639601895, "learning_rate": 6.423553719715406e-07, "loss": 0.8193017840385437, "step": 5518 }, { "epoch": 1.2716589861751153, "grad_norm": 1.002275086425174, "learning_rate": 6.419996132300203e-07, "loss": 0.7444974780082703, "step": 5519 }, { "epoch": 1.271889400921659, "grad_norm": 1.0214749663440856, "learning_rate": 6.416439064517818e-07, "loss": 0.7422837018966675, "step": 5520 }, { "epoch": 1.2721198156682028, "grad_norm": 1.2499390785362547, "learning_rate": 6.412882516884562e-07, "loss": 1.0155640840530396, "step": 5521 }, { "epoch": 1.2723502304147465, "grad_norm": 1.489615968336023, "learning_rate": 6.409326489916658e-07, "loss": 0.8097087144851685, "step": 5522 }, { "epoch": 1.2725806451612902, "grad_norm": 1.293861875643454, "learning_rate": 6.405770984130257e-07, "loss": 0.8545565009117126, "step": 5523 }, { "epoch": 1.2728110599078342, "grad_norm": 0.9914622760341439, "learning_rate": 6.402216000041445e-07, "loss": 0.6765652298927307, "step": 5524 }, { "epoch": 1.273041474654378, "grad_norm": 1.103390848542702, "learning_rate": 6.398661538166217e-07, "loss": 0.7964426875114441, "step": 5525 }, { "epoch": 1.2732718894009216, "grad_norm": 1.2196724846653912, "learning_rate": 6.395107599020495e-07, "loss": 0.7449651956558228, "step": 5526 }, { "epoch": 1.2735023041474656, "grad_norm": 1.5614043870867116, "learning_rate": 6.391554183120138e-07, "loss": 0.8639888167381287, "step": 5527 }, { "epoch": 1.2737327188940093, "grad_norm": 1.046130673497984, "learning_rate": 6.388001290980914e-07, "loss": 0.7668901681900024, "step": 5528 }, { "epoch": 1.273963133640553, "grad_norm": 1.082923428749424, "learning_rate": 6.384448923118517e-07, "loss": 0.6461849212646484, "step": 5529 }, { "epoch": 1.2741935483870968, "grad_norm": 1.1539877219125736, "learning_rate": 6.380897080048576e-07, "loss": 0.7045707702636719, "step": 5530 }, { "epoch": 1.2744239631336405, "grad_norm": 1.1893221959186644, "learning_rate": 6.377345762286632e-07, "loss": 0.8303793668746948, "step": 5531 }, { "epoch": 1.2746543778801844, "grad_norm": 1.112799220738114, "learning_rate": 6.373794970348152e-07, "loss": 0.808259129524231, "step": 5532 }, { "epoch": 1.2748847926267282, "grad_norm": 1.527249581557179, "learning_rate": 6.370244704748535e-07, "loss": 0.8224689960479736, "step": 5533 }, { "epoch": 1.2751152073732719, "grad_norm": 1.4408900318423565, "learning_rate": 6.366694966003089e-07, "loss": 0.8559266328811646, "step": 5534 }, { "epoch": 1.2753456221198156, "grad_norm": 1.3225808297843282, "learning_rate": 6.363145754627063e-07, "loss": 0.7972407341003418, "step": 5535 }, { "epoch": 1.2755760368663593, "grad_norm": 0.9700139233174567, "learning_rate": 6.359597071135618e-07, "loss": 0.7750328779220581, "step": 5536 }, { "epoch": 1.2758064516129033, "grad_norm": 1.3472908531853058, "learning_rate": 6.356048916043836e-07, "loss": 0.807072639465332, "step": 5537 }, { "epoch": 1.276036866359447, "grad_norm": 1.2153299361350896, "learning_rate": 6.35250128986673e-07, "loss": 0.8459323048591614, "step": 5538 }, { "epoch": 1.2762672811059907, "grad_norm": 1.1921452547723677, "learning_rate": 6.348954193119233e-07, "loss": 0.7874447107315063, "step": 5539 }, { "epoch": 1.2764976958525347, "grad_norm": 1.243785118643696, "learning_rate": 6.345407626316202e-07, "loss": 0.8817394971847534, "step": 5540 }, { "epoch": 1.2767281105990782, "grad_norm": 1.0210963009280363, "learning_rate": 6.341861589972417e-07, "loss": 0.7936382293701172, "step": 5541 }, { "epoch": 1.2769585253456222, "grad_norm": 1.1288567171733945, "learning_rate": 6.33831608460258e-07, "loss": 0.7301348447799683, "step": 5542 }, { "epoch": 1.2771889400921659, "grad_norm": 0.9930019172389213, "learning_rate": 6.334771110721311e-07, "loss": 0.6546784043312073, "step": 5543 }, { "epoch": 1.2774193548387096, "grad_norm": 1.1320345708885517, "learning_rate": 6.331226668843168e-07, "loss": 0.798918604850769, "step": 5544 }, { "epoch": 1.2776497695852536, "grad_norm": 1.0677491026042323, "learning_rate": 6.327682759482618e-07, "loss": 0.6275264620780945, "step": 5545 }, { "epoch": 1.2778801843317973, "grad_norm": 1.1056891749814017, "learning_rate": 6.324139383154048e-07, "loss": 0.6870732307434082, "step": 5546 }, { "epoch": 1.278110599078341, "grad_norm": 1.113302907194177, "learning_rate": 6.320596540371785e-07, "loss": 0.8280556201934814, "step": 5547 }, { "epoch": 1.2783410138248847, "grad_norm": 1.0958194382001605, "learning_rate": 6.317054231650063e-07, "loss": 0.8053648471832275, "step": 5548 }, { "epoch": 1.2785714285714285, "grad_norm": 1.1500355966221105, "learning_rate": 6.313512457503043e-07, "loss": 0.7628893852233887, "step": 5549 }, { "epoch": 1.2788018433179724, "grad_norm": 1.1770420137500979, "learning_rate": 6.30997121844481e-07, "loss": 0.8075753450393677, "step": 5550 }, { "epoch": 1.2790322580645161, "grad_norm": 1.1420933628102303, "learning_rate": 6.306430514989371e-07, "loss": 0.7883275747299194, "step": 5551 }, { "epoch": 1.2792626728110599, "grad_norm": 1.238710939895555, "learning_rate": 6.302890347650648e-07, "loss": 0.7438768744468689, "step": 5552 }, { "epoch": 1.2794930875576038, "grad_norm": 1.261177122589368, "learning_rate": 6.299350716942501e-07, "loss": 0.7756023406982422, "step": 5553 }, { "epoch": 1.2797235023041473, "grad_norm": 1.0915753285175969, "learning_rate": 6.295811623378698e-07, "loss": 0.7128444910049438, "step": 5554 }, { "epoch": 1.2799539170506913, "grad_norm": 0.9707581386208312, "learning_rate": 6.292273067472931e-07, "loss": 0.7611228823661804, "step": 5555 }, { "epoch": 1.280184331797235, "grad_norm": 1.0553125250063393, "learning_rate": 6.288735049738822e-07, "loss": 0.7803670167922974, "step": 5556 }, { "epoch": 1.2804147465437787, "grad_norm": 1.0703973986821036, "learning_rate": 6.28519757068991e-07, "loss": 0.958204448223114, "step": 5557 }, { "epoch": 1.2806451612903227, "grad_norm": 1.1879640741186497, "learning_rate": 6.28166063083965e-07, "loss": 0.7220249772071838, "step": 5558 }, { "epoch": 1.2808755760368664, "grad_norm": 1.4250311227945265, "learning_rate": 6.278124230701427e-07, "loss": 0.7396695613861084, "step": 5559 }, { "epoch": 1.2811059907834101, "grad_norm": 1.1549531480718158, "learning_rate": 6.274588370788545e-07, "loss": 0.819474458694458, "step": 5560 }, { "epoch": 1.2813364055299539, "grad_norm": 1.0583859146786307, "learning_rate": 6.271053051614231e-07, "loss": 0.6997617483139038, "step": 5561 }, { "epoch": 1.2815668202764976, "grad_norm": 1.1462805534929357, "learning_rate": 6.26751827369163e-07, "loss": 0.7526183128356934, "step": 5562 }, { "epoch": 1.2817972350230415, "grad_norm": 1.3576714493720627, "learning_rate": 6.263984037533805e-07, "loss": 0.7185813188552856, "step": 5563 }, { "epoch": 1.2820276497695853, "grad_norm": 0.9722151716418193, "learning_rate": 6.260450343653757e-07, "loss": 0.7739845514297485, "step": 5564 }, { "epoch": 1.282258064516129, "grad_norm": 1.0387058407540612, "learning_rate": 6.25691719256439e-07, "loss": 0.698557436466217, "step": 5565 }, { "epoch": 1.2824884792626727, "grad_norm": 1.1402265972621366, "learning_rate": 6.253384584778534e-07, "loss": 0.6946271657943726, "step": 5566 }, { "epoch": 1.2827188940092165, "grad_norm": 1.2349626326096388, "learning_rate": 6.24985252080895e-07, "loss": 0.7746025323867798, "step": 5567 }, { "epoch": 1.2829493087557604, "grad_norm": 1.050385772264468, "learning_rate": 6.246321001168306e-07, "loss": 0.8759660720825195, "step": 5568 }, { "epoch": 1.2831797235023041, "grad_norm": 1.1535965526965875, "learning_rate": 6.2427900263692e-07, "loss": 0.741111159324646, "step": 5569 }, { "epoch": 1.2834101382488479, "grad_norm": 1.2619269860039752, "learning_rate": 6.239259596924149e-07, "loss": 0.8580630421638489, "step": 5570 }, { "epoch": 1.2836405529953918, "grad_norm": 1.0890841483076914, "learning_rate": 6.235729713345588e-07, "loss": 0.7139618992805481, "step": 5571 }, { "epoch": 1.2838709677419355, "grad_norm": 1.1260979019373678, "learning_rate": 6.232200376145873e-07, "loss": 0.8300976753234863, "step": 5572 }, { "epoch": 1.2841013824884793, "grad_norm": 1.091655687939806, "learning_rate": 6.228671585837288e-07, "loss": 0.7193114757537842, "step": 5573 }, { "epoch": 1.284331797235023, "grad_norm": 1.289214780103651, "learning_rate": 6.225143342932031e-07, "loss": 0.8802851438522339, "step": 5574 }, { "epoch": 1.2845622119815667, "grad_norm": 1.069264068692084, "learning_rate": 6.221615647942217e-07, "loss": 0.749543309211731, "step": 5575 }, { "epoch": 1.2847926267281107, "grad_norm": 1.1044047193035296, "learning_rate": 6.218088501379892e-07, "loss": 0.703508734703064, "step": 5576 }, { "epoch": 1.2850230414746544, "grad_norm": 1.4722305319077136, "learning_rate": 6.214561903757017e-07, "loss": 0.7519023418426514, "step": 5577 }, { "epoch": 1.2852534562211981, "grad_norm": 1.4130549197431626, "learning_rate": 6.211035855585466e-07, "loss": 0.9525241851806641, "step": 5578 }, { "epoch": 1.2854838709677419, "grad_norm": 1.3149636986285136, "learning_rate": 6.207510357377046e-07, "loss": 0.8288872241973877, "step": 5579 }, { "epoch": 1.2857142857142856, "grad_norm": 1.3691241647074333, "learning_rate": 6.203985409643478e-07, "loss": 0.8531112670898438, "step": 5580 }, { "epoch": 1.2859447004608295, "grad_norm": 1.121519108666965, "learning_rate": 6.200461012896401e-07, "loss": 0.7106495499610901, "step": 5581 }, { "epoch": 1.2861751152073733, "grad_norm": 1.426451214846877, "learning_rate": 6.19693716764738e-07, "loss": 0.714931845664978, "step": 5582 }, { "epoch": 1.286405529953917, "grad_norm": 1.3296169647206766, "learning_rate": 6.19341387440789e-07, "loss": 0.8281360268592834, "step": 5583 }, { "epoch": 1.286635944700461, "grad_norm": 1.4833656768811476, "learning_rate": 6.189891133689342e-07, "loss": 0.9155910611152649, "step": 5584 }, { "epoch": 1.2868663594470047, "grad_norm": 1.3432683189972507, "learning_rate": 6.186368946003051e-07, "loss": 0.7573060989379883, "step": 5585 }, { "epoch": 1.2870967741935484, "grad_norm": 1.2055594370265132, "learning_rate": 6.182847311860255e-07, "loss": 0.6994235515594482, "step": 5586 }, { "epoch": 1.2873271889400921, "grad_norm": 1.0775806715124838, "learning_rate": 6.179326231772123e-07, "loss": 0.771092414855957, "step": 5587 }, { "epoch": 1.2875576036866359, "grad_norm": 1.269208775599209, "learning_rate": 6.17580570624973e-07, "loss": 0.7470684051513672, "step": 5588 }, { "epoch": 1.2877880184331798, "grad_norm": 1.5425254092924614, "learning_rate": 6.172285735804075e-07, "loss": 0.918886125087738, "step": 5589 }, { "epoch": 1.2880184331797235, "grad_norm": 1.0377944178544696, "learning_rate": 6.16876632094608e-07, "loss": 0.7232617139816284, "step": 5590 }, { "epoch": 1.2882488479262673, "grad_norm": 1.1703799662994099, "learning_rate": 6.16524746218658e-07, "loss": 0.7367006540298462, "step": 5591 }, { "epoch": 1.288479262672811, "grad_norm": 1.1904508940632728, "learning_rate": 6.161729160036333e-07, "loss": 0.8783999681472778, "step": 5592 }, { "epoch": 1.2887096774193547, "grad_norm": 1.1869935665885074, "learning_rate": 6.158211415006019e-07, "loss": 0.8266523480415344, "step": 5593 }, { "epoch": 1.2889400921658987, "grad_norm": 1.1675308279856504, "learning_rate": 6.154694227606234e-07, "loss": 0.8528730869293213, "step": 5594 }, { "epoch": 1.2891705069124424, "grad_norm": 1.3182250244296418, "learning_rate": 6.151177598347485e-07, "loss": 0.7586283683776855, "step": 5595 }, { "epoch": 1.2894009216589861, "grad_norm": 1.4182043487427547, "learning_rate": 6.147661527740217e-07, "loss": 0.8671954870223999, "step": 5596 }, { "epoch": 1.28963133640553, "grad_norm": 1.081063839615246, "learning_rate": 6.14414601629478e-07, "loss": 0.7354376316070557, "step": 5597 }, { "epoch": 1.2898617511520738, "grad_norm": 1.051384434692424, "learning_rate": 6.140631064521443e-07, "loss": 0.8515663146972656, "step": 5598 }, { "epoch": 1.2900921658986175, "grad_norm": 1.3608023513745535, "learning_rate": 6.137116672930395e-07, "loss": 0.9068351984024048, "step": 5599 }, { "epoch": 1.2903225806451613, "grad_norm": 1.4956373283031226, "learning_rate": 6.133602842031752e-07, "loss": 0.7260826230049133, "step": 5600 }, { "epoch": 1.290552995391705, "grad_norm": 1.1400144341772105, "learning_rate": 6.130089572335535e-07, "loss": 0.7162504196166992, "step": 5601 }, { "epoch": 1.290783410138249, "grad_norm": 1.2203621133034757, "learning_rate": 6.126576864351695e-07, "loss": 0.7625414133071899, "step": 5602 }, { "epoch": 1.2910138248847927, "grad_norm": 1.0985405517526388, "learning_rate": 6.123064718590099e-07, "loss": 0.787274956703186, "step": 5603 }, { "epoch": 1.2912442396313364, "grad_norm": 1.0173148522997915, "learning_rate": 6.119553135560519e-07, "loss": 0.6539326310157776, "step": 5604 }, { "epoch": 1.2914746543778801, "grad_norm": 1.0405810111847797, "learning_rate": 6.11604211577267e-07, "loss": 0.8481189012527466, "step": 5605 }, { "epoch": 1.2917050691244238, "grad_norm": 1.1908108884253377, "learning_rate": 6.112531659736164e-07, "loss": 0.794892430305481, "step": 5606 }, { "epoch": 1.2919354838709678, "grad_norm": 1.0728869697567227, "learning_rate": 6.10902176796054e-07, "loss": 0.6738630533218384, "step": 5607 }, { "epoch": 1.2921658986175115, "grad_norm": 1.2190379429225964, "learning_rate": 6.105512440955258e-07, "loss": 0.7220937609672546, "step": 5608 }, { "epoch": 1.2923963133640552, "grad_norm": 0.9117229942004119, "learning_rate": 6.102003679229688e-07, "loss": 0.6831785440444946, "step": 5609 }, { "epoch": 1.2926267281105992, "grad_norm": 1.0925904509799125, "learning_rate": 6.098495483293125e-07, "loss": 0.7033277750015259, "step": 5610 }, { "epoch": 1.292857142857143, "grad_norm": 0.9024231402190447, "learning_rate": 6.094987853654779e-07, "loss": 0.7063429355621338, "step": 5611 }, { "epoch": 1.2930875576036867, "grad_norm": 1.1531814321684226, "learning_rate": 6.091480790823771e-07, "loss": 0.7791472673416138, "step": 5612 }, { "epoch": 1.2933179723502304, "grad_norm": 1.3904591821034944, "learning_rate": 6.087974295309157e-07, "loss": 0.8674220442771912, "step": 5613 }, { "epoch": 1.293548387096774, "grad_norm": 1.0513898416349883, "learning_rate": 6.084468367619895e-07, "loss": 0.7878479957580566, "step": 5614 }, { "epoch": 1.293778801843318, "grad_norm": 0.9253694996288483, "learning_rate": 6.080963008264861e-07, "loss": 0.7019612789154053, "step": 5615 }, { "epoch": 1.2940092165898618, "grad_norm": 1.1163623788947772, "learning_rate": 6.077458217752863e-07, "loss": 0.68759685754776, "step": 5616 }, { "epoch": 1.2942396313364055, "grad_norm": 1.1326420080908837, "learning_rate": 6.073953996592612e-07, "loss": 0.851733922958374, "step": 5617 }, { "epoch": 1.2944700460829492, "grad_norm": 1.1539848484030915, "learning_rate": 6.070450345292739e-07, "loss": 0.699798047542572, "step": 5618 }, { "epoch": 1.294700460829493, "grad_norm": 1.3439745934739915, "learning_rate": 6.066947264361798e-07, "loss": 0.8625125885009766, "step": 5619 }, { "epoch": 1.294930875576037, "grad_norm": 1.2395704270447963, "learning_rate": 6.063444754308253e-07, "loss": 0.759062647819519, "step": 5620 }, { "epoch": 1.2951612903225806, "grad_norm": 1.1349706072725887, "learning_rate": 6.059942815640491e-07, "loss": 0.7549973726272583, "step": 5621 }, { "epoch": 1.2953917050691244, "grad_norm": 1.2217826699562653, "learning_rate": 6.056441448866816e-07, "loss": 0.8142743110656738, "step": 5622 }, { "epoch": 1.2956221198156683, "grad_norm": 1.0818175637274867, "learning_rate": 6.052940654495442e-07, "loss": 0.7881144881248474, "step": 5623 }, { "epoch": 1.295852534562212, "grad_norm": 1.2201407031885296, "learning_rate": 6.049440433034505e-07, "loss": 0.7922053933143616, "step": 5624 }, { "epoch": 1.2960829493087558, "grad_norm": 1.1955381878542082, "learning_rate": 6.045940784992061e-07, "loss": 0.6808311939239502, "step": 5625 }, { "epoch": 1.2963133640552995, "grad_norm": 1.203534246478074, "learning_rate": 6.04244171087608e-07, "loss": 0.933373749256134, "step": 5626 }, { "epoch": 1.2965437788018432, "grad_norm": 1.3722573775025653, "learning_rate": 6.038943211194439e-07, "loss": 0.8077404499053955, "step": 5627 }, { "epoch": 1.2967741935483872, "grad_norm": 1.2263754202708472, "learning_rate": 6.035445286454953e-07, "loss": 0.7920867204666138, "step": 5628 }, { "epoch": 1.297004608294931, "grad_norm": 1.1574994086499075, "learning_rate": 6.031947937165335e-07, "loss": 0.5872117280960083, "step": 5629 }, { "epoch": 1.2972350230414746, "grad_norm": 1.2959093642025599, "learning_rate": 6.02845116383322e-07, "loss": 0.8593505620956421, "step": 5630 }, { "epoch": 1.2974654377880184, "grad_norm": 1.4149025135483138, "learning_rate": 6.02495496696616e-07, "loss": 0.8352359533309937, "step": 5631 }, { "epoch": 1.297695852534562, "grad_norm": 1.1724909355958724, "learning_rate": 6.021459347071623e-07, "loss": 0.7316182255744934, "step": 5632 }, { "epoch": 1.297926267281106, "grad_norm": 1.1972298924235394, "learning_rate": 6.017964304656997e-07, "loss": 0.7294400334358215, "step": 5633 }, { "epoch": 1.2981566820276498, "grad_norm": 1.0769002788322786, "learning_rate": 6.014469840229581e-07, "loss": 0.6595947742462158, "step": 5634 }, { "epoch": 1.2983870967741935, "grad_norm": 1.308087510592029, "learning_rate": 6.010975954296587e-07, "loss": 0.7849195003509521, "step": 5635 }, { "epoch": 1.2986175115207375, "grad_norm": 1.0709465804551583, "learning_rate": 6.007482647365159e-07, "loss": 0.6915944218635559, "step": 5636 }, { "epoch": 1.2988479262672812, "grad_norm": 1.1595852934519908, "learning_rate": 6.003989919942338e-07, "loss": 0.6821994781494141, "step": 5637 }, { "epoch": 1.299078341013825, "grad_norm": 1.0472078656298618, "learning_rate": 6.000497772535087e-07, "loss": 0.7333718538284302, "step": 5638 }, { "epoch": 1.2993087557603686, "grad_norm": 1.0656731272596272, "learning_rate": 5.997006205650292e-07, "loss": 0.8069280385971069, "step": 5639 }, { "epoch": 1.2995391705069124, "grad_norm": 1.0655856429852437, "learning_rate": 5.993515219794745e-07, "loss": 0.6989297866821289, "step": 5640 }, { "epoch": 1.2997695852534563, "grad_norm": 1.187477589278957, "learning_rate": 5.990024815475161e-07, "loss": 0.7784403562545776, "step": 5641 }, { "epoch": 1.3, "grad_norm": 1.2512602653388225, "learning_rate": 5.986534993198168e-07, "loss": 0.6554181575775146, "step": 5642 }, { "epoch": 1.3002304147465438, "grad_norm": 1.298436931300319, "learning_rate": 5.983045753470307e-07, "loss": 0.7647836208343506, "step": 5643 }, { "epoch": 1.3004608294930875, "grad_norm": 0.9269247679622435, "learning_rate": 5.979557096798033e-07, "loss": 0.7787084579467773, "step": 5644 }, { "epoch": 1.3006912442396312, "grad_norm": 1.0646184845326898, "learning_rate": 5.97606902368773e-07, "loss": 0.6367940902709961, "step": 5645 }, { "epoch": 1.3009216589861752, "grad_norm": 1.0481428990706296, "learning_rate": 5.972581534645679e-07, "loss": 0.7650243043899536, "step": 5646 }, { "epoch": 1.301152073732719, "grad_norm": 0.9452672150266047, "learning_rate": 5.969094630178084e-07, "loss": 0.6506018042564392, "step": 5647 }, { "epoch": 1.3013824884792626, "grad_norm": 1.4764262273840163, "learning_rate": 5.965608310791071e-07, "loss": 0.7351242303848267, "step": 5648 }, { "epoch": 1.3016129032258066, "grad_norm": 1.2210251097969258, "learning_rate": 5.96212257699067e-07, "loss": 0.7327077984809875, "step": 5649 }, { "epoch": 1.3018433179723503, "grad_norm": 1.0681197005600311, "learning_rate": 5.958637429282831e-07, "loss": 0.6448171138763428, "step": 5650 }, { "epoch": 1.302073732718894, "grad_norm": 1.18574113940407, "learning_rate": 5.955152868173418e-07, "loss": 0.8347861766815186, "step": 5651 }, { "epoch": 1.3023041474654378, "grad_norm": 1.2733315501094051, "learning_rate": 5.951668894168215e-07, "loss": 0.736280620098114, "step": 5652 }, { "epoch": 1.3025345622119815, "grad_norm": 1.2627292373923777, "learning_rate": 5.948185507772908e-07, "loss": 0.8677594661712646, "step": 5653 }, { "epoch": 1.3027649769585254, "grad_norm": 1.1729788728933164, "learning_rate": 5.944702709493113e-07, "loss": 0.6598676443099976, "step": 5654 }, { "epoch": 1.3029953917050692, "grad_norm": 1.1072155159392119, "learning_rate": 5.941220499834352e-07, "loss": 0.7795349359512329, "step": 5655 }, { "epoch": 1.303225806451613, "grad_norm": 1.1312979891837796, "learning_rate": 5.937738879302058e-07, "loss": 0.6929318904876709, "step": 5656 }, { "epoch": 1.3034562211981566, "grad_norm": 1.19931324162024, "learning_rate": 5.934257848401593e-07, "loss": 0.859328031539917, "step": 5657 }, { "epoch": 1.3036866359447004, "grad_norm": 1.435339518052459, "learning_rate": 5.930777407638216e-07, "loss": 1.0015549659729004, "step": 5658 }, { "epoch": 1.3039170506912443, "grad_norm": 1.0471647927751007, "learning_rate": 5.927297557517115e-07, "loss": 0.6775785088539124, "step": 5659 }, { "epoch": 1.304147465437788, "grad_norm": 1.0488503999959857, "learning_rate": 5.923818298543378e-07, "loss": 0.7228262424468994, "step": 5660 }, { "epoch": 1.3043778801843318, "grad_norm": 0.9177755631443217, "learning_rate": 5.92033963122202e-07, "loss": 0.6139897108078003, "step": 5661 }, { "epoch": 1.3046082949308757, "grad_norm": 1.062819188029367, "learning_rate": 5.916861556057965e-07, "loss": 0.7336323261260986, "step": 5662 }, { "epoch": 1.3048387096774192, "grad_norm": 1.1985877666304134, "learning_rate": 5.913384073556049e-07, "loss": 0.9223559498786926, "step": 5663 }, { "epoch": 1.3050691244239632, "grad_norm": 1.1960311086176088, "learning_rate": 5.909907184221023e-07, "loss": 0.7230484485626221, "step": 5664 }, { "epoch": 1.305299539170507, "grad_norm": 1.1557586988240278, "learning_rate": 5.906430888557556e-07, "loss": 0.753510594367981, "step": 5665 }, { "epoch": 1.3055299539170506, "grad_norm": 1.2167084005991546, "learning_rate": 5.902955187070229e-07, "loss": 0.8960593938827515, "step": 5666 }, { "epoch": 1.3057603686635946, "grad_norm": 0.9226031223011045, "learning_rate": 5.899480080263527e-07, "loss": 0.6865993738174438, "step": 5667 }, { "epoch": 1.3059907834101383, "grad_norm": 1.2350884878154553, "learning_rate": 5.896005568641868e-07, "loss": 0.7748720645904541, "step": 5668 }, { "epoch": 1.306221198156682, "grad_norm": 1.437104451012044, "learning_rate": 5.892531652709567e-07, "loss": 0.834233283996582, "step": 5669 }, { "epoch": 1.3064516129032258, "grad_norm": 1.2209490689427414, "learning_rate": 5.889058332970858e-07, "loss": 0.8398417234420776, "step": 5670 }, { "epoch": 1.3066820276497695, "grad_norm": 0.8546573405192346, "learning_rate": 5.885585609929891e-07, "loss": 0.6889529228210449, "step": 5671 }, { "epoch": 1.3069124423963134, "grad_norm": 1.1935289122089947, "learning_rate": 5.882113484090725e-07, "loss": 0.6625782251358032, "step": 5672 }, { "epoch": 1.3071428571428572, "grad_norm": 1.2286244905882078, "learning_rate": 5.878641955957334e-07, "loss": 0.7774407267570496, "step": 5673 }, { "epoch": 1.307373271889401, "grad_norm": 1.066003573867245, "learning_rate": 5.875171026033608e-07, "loss": 0.7799595594406128, "step": 5674 }, { "epoch": 1.3076036866359446, "grad_norm": 1.2859461118878832, "learning_rate": 5.87170069482335e-07, "loss": 0.800041913986206, "step": 5675 }, { "epoch": 1.3078341013824883, "grad_norm": 1.2986825545894243, "learning_rate": 5.868230962830265e-07, "loss": 0.7478667497634888, "step": 5676 }, { "epoch": 1.3080645161290323, "grad_norm": 0.9705514903251621, "learning_rate": 5.86476183055799e-07, "loss": 0.7538981437683105, "step": 5677 }, { "epoch": 1.308294930875576, "grad_norm": 1.4195819337110585, "learning_rate": 5.861293298510061e-07, "loss": 0.7556810975074768, "step": 5678 }, { "epoch": 1.3085253456221198, "grad_norm": 0.9225289666667563, "learning_rate": 5.85782536718993e-07, "loss": 0.670037031173706, "step": 5679 }, { "epoch": 1.3087557603686637, "grad_norm": 1.1667524105558311, "learning_rate": 5.854358037100964e-07, "loss": 0.6238662600517273, "step": 5680 }, { "epoch": 1.3089861751152074, "grad_norm": 1.1817165911107195, "learning_rate": 5.85089130874644e-07, "loss": 0.7972823977470398, "step": 5681 }, { "epoch": 1.3092165898617512, "grad_norm": 1.0746427307389195, "learning_rate": 5.847425182629549e-07, "loss": 0.7332338094711304, "step": 5682 }, { "epoch": 1.3094470046082949, "grad_norm": 1.2496997052714673, "learning_rate": 5.843959659253398e-07, "loss": 0.8186966180801392, "step": 5683 }, { "epoch": 1.3096774193548386, "grad_norm": 1.2708999919485935, "learning_rate": 5.840494739120996e-07, "loss": 0.8207032680511475, "step": 5684 }, { "epoch": 1.3099078341013826, "grad_norm": 1.4960688490449285, "learning_rate": 5.83703042273528e-07, "loss": 0.848265528678894, "step": 5685 }, { "epoch": 1.3101382488479263, "grad_norm": 1.0212687278019523, "learning_rate": 5.833566710599088e-07, "loss": 0.7766404151916504, "step": 5686 }, { "epoch": 1.31036866359447, "grad_norm": 1.2185059104564926, "learning_rate": 5.830103603215168e-07, "loss": 0.7570784687995911, "step": 5687 }, { "epoch": 1.3105990783410137, "grad_norm": 1.1006353524996257, "learning_rate": 5.826641101086194e-07, "loss": 0.7551493644714355, "step": 5688 }, { "epoch": 1.3108294930875575, "grad_norm": 1.3664942507199704, "learning_rate": 5.823179204714739e-07, "loss": 0.8589804172515869, "step": 5689 }, { "epoch": 1.3110599078341014, "grad_norm": 1.2869604696659869, "learning_rate": 5.819717914603288e-07, "loss": 0.8252761960029602, "step": 5690 }, { "epoch": 1.3112903225806452, "grad_norm": 1.0886628872971145, "learning_rate": 5.816257231254254e-07, "loss": 0.7784370183944702, "step": 5691 }, { "epoch": 1.3115207373271889, "grad_norm": 1.1343775846575583, "learning_rate": 5.812797155169942e-07, "loss": 0.8040215969085693, "step": 5692 }, { "epoch": 1.3117511520737328, "grad_norm": 1.013609351306971, "learning_rate": 5.809337686852582e-07, "loss": 0.8355100154876709, "step": 5693 }, { "epoch": 1.3119815668202766, "grad_norm": 1.466649672488184, "learning_rate": 5.805878826804303e-07, "loss": 0.8233312368392944, "step": 5694 }, { "epoch": 1.3122119815668203, "grad_norm": 1.1563119764352225, "learning_rate": 5.802420575527165e-07, "loss": 0.7756507992744446, "step": 5695 }, { "epoch": 1.312442396313364, "grad_norm": 1.1867005828091945, "learning_rate": 5.798962933523124e-07, "loss": 0.7503829002380371, "step": 5696 }, { "epoch": 1.3126728110599077, "grad_norm": 1.506327103479739, "learning_rate": 5.795505901294051e-07, "loss": 0.749663770198822, "step": 5697 }, { "epoch": 1.3129032258064517, "grad_norm": 1.440884605575443, "learning_rate": 5.792049479341732e-07, "loss": 0.9003115296363831, "step": 5698 }, { "epoch": 1.3131336405529954, "grad_norm": 1.059615932759845, "learning_rate": 5.788593668167854e-07, "loss": 0.655732274055481, "step": 5699 }, { "epoch": 1.3133640552995391, "grad_norm": 0.9900775273356892, "learning_rate": 5.785138468274036e-07, "loss": 0.7318822145462036, "step": 5700 }, { "epoch": 1.3135944700460829, "grad_norm": 0.9099775921199348, "learning_rate": 5.781683880161788e-07, "loss": 0.6512752771377563, "step": 5701 }, { "epoch": 1.3138248847926266, "grad_norm": 1.1289875219473309, "learning_rate": 5.778229904332537e-07, "loss": 0.7232785820960999, "step": 5702 }, { "epoch": 1.3140552995391706, "grad_norm": 1.2645196269426846, "learning_rate": 5.77477654128763e-07, "loss": 0.837032675743103, "step": 5703 }, { "epoch": 1.3142857142857143, "grad_norm": 1.4984544841183642, "learning_rate": 5.771323791528315e-07, "loss": 0.926714301109314, "step": 5704 }, { "epoch": 1.314516129032258, "grad_norm": 1.1221666474084682, "learning_rate": 5.76787165555575e-07, "loss": 0.7228986620903015, "step": 5705 }, { "epoch": 1.314746543778802, "grad_norm": 1.3618848390091767, "learning_rate": 5.764420133871015e-07, "loss": 0.8330450057983398, "step": 5706 }, { "epoch": 1.3149769585253457, "grad_norm": 1.2680150111326054, "learning_rate": 5.760969226975088e-07, "loss": 0.793700098991394, "step": 5707 }, { "epoch": 1.3152073732718894, "grad_norm": 1.2897950240071954, "learning_rate": 5.757518935368868e-07, "loss": 0.8797321319580078, "step": 5708 }, { "epoch": 1.3154377880184331, "grad_norm": 1.1147531221594877, "learning_rate": 5.754069259553159e-07, "loss": 0.8772039413452148, "step": 5709 }, { "epoch": 1.3156682027649769, "grad_norm": 0.820739065285044, "learning_rate": 5.750620200028672e-07, "loss": 0.5998358726501465, "step": 5710 }, { "epoch": 1.3158986175115208, "grad_norm": 1.7932534766511148, "learning_rate": 5.747171757296041e-07, "loss": 0.7694767713546753, "step": 5711 }, { "epoch": 1.3161290322580645, "grad_norm": 1.2782062967169578, "learning_rate": 5.7437239318558e-07, "loss": 0.8526760339736938, "step": 5712 }, { "epoch": 1.3163594470046083, "grad_norm": 1.199230266468518, "learning_rate": 5.740276724208396e-07, "loss": 0.8407987356185913, "step": 5713 }, { "epoch": 1.316589861751152, "grad_norm": 1.289466266523787, "learning_rate": 5.736830134854183e-07, "loss": 0.9731476306915283, "step": 5714 }, { "epoch": 1.3168202764976957, "grad_norm": 1.134122607422213, "learning_rate": 5.733384164293434e-07, "loss": 0.7230468988418579, "step": 5715 }, { "epoch": 1.3170506912442397, "grad_norm": 1.2031868742095575, "learning_rate": 5.729938813026327e-07, "loss": 0.8260238766670227, "step": 5716 }, { "epoch": 1.3172811059907834, "grad_norm": 1.0909604007760305, "learning_rate": 5.726494081552948e-07, "loss": 0.7616437673568726, "step": 5717 }, { "epoch": 1.3175115207373271, "grad_norm": 1.1614064666034054, "learning_rate": 5.723049970373295e-07, "loss": 0.7628509998321533, "step": 5718 }, { "epoch": 1.317741935483871, "grad_norm": 1.2522299219195512, "learning_rate": 5.719606479987273e-07, "loss": 0.744842529296875, "step": 5719 }, { "epoch": 1.3179723502304148, "grad_norm": 0.9975745357037148, "learning_rate": 5.716163610894708e-07, "loss": 0.7228065133094788, "step": 5720 }, { "epoch": 1.3182027649769585, "grad_norm": 1.5461378865588107, "learning_rate": 5.712721363595325e-07, "loss": 0.8764907121658325, "step": 5721 }, { "epoch": 1.3184331797235023, "grad_norm": 1.0737882176659082, "learning_rate": 5.709279738588757e-07, "loss": 0.7966248393058777, "step": 5722 }, { "epoch": 1.318663594470046, "grad_norm": 1.4239755183906653, "learning_rate": 5.705838736374558e-07, "loss": 0.8983157873153687, "step": 5723 }, { "epoch": 1.31889400921659, "grad_norm": 1.1693207378088453, "learning_rate": 5.70239835745218e-07, "loss": 0.7349347472190857, "step": 5724 }, { "epoch": 1.3191244239631337, "grad_norm": 1.4511397115268243, "learning_rate": 5.698958602320988e-07, "loss": 0.9297066926956177, "step": 5725 }, { "epoch": 1.3193548387096774, "grad_norm": 1.0721204261694746, "learning_rate": 5.695519471480266e-07, "loss": 0.7106038331985474, "step": 5726 }, { "epoch": 1.3195852534562211, "grad_norm": 1.3074916303787611, "learning_rate": 5.692080965429193e-07, "loss": 0.8759022951126099, "step": 5727 }, { "epoch": 1.3198156682027649, "grad_norm": 1.2039841953988952, "learning_rate": 5.688643084666862e-07, "loss": 0.8337300419807434, "step": 5728 }, { "epoch": 1.3200460829493088, "grad_norm": 1.2975435530580146, "learning_rate": 5.685205829692283e-07, "loss": 0.8543391227722168, "step": 5729 }, { "epoch": 1.3202764976958525, "grad_norm": 0.9960252179140261, "learning_rate": 5.681769201004366e-07, "loss": 0.7497329711914062, "step": 5730 }, { "epoch": 1.3205069124423963, "grad_norm": 1.0615580947761494, "learning_rate": 5.678333199101929e-07, "loss": 0.8190964460372925, "step": 5731 }, { "epoch": 1.3207373271889402, "grad_norm": 1.1486652227224357, "learning_rate": 5.674897824483711e-07, "loss": 0.8233011960983276, "step": 5732 }, { "epoch": 1.320967741935484, "grad_norm": 1.2086113696285639, "learning_rate": 5.671463077648348e-07, "loss": 0.75257408618927, "step": 5733 }, { "epoch": 1.3211981566820277, "grad_norm": 1.0357997575051858, "learning_rate": 5.668028959094386e-07, "loss": 0.6468796133995056, "step": 5734 }, { "epoch": 1.3214285714285714, "grad_norm": 0.869693175338726, "learning_rate": 5.664595469320288e-07, "loss": 0.6756174564361572, "step": 5735 }, { "epoch": 1.3216589861751151, "grad_norm": 1.2928038093451135, "learning_rate": 5.661162608824419e-07, "loss": 0.9040344953536987, "step": 5736 }, { "epoch": 1.321889400921659, "grad_norm": 1.013287726627938, "learning_rate": 5.657730378105055e-07, "loss": 0.8082150816917419, "step": 5737 }, { "epoch": 1.3221198156682028, "grad_norm": 1.2602760490074278, "learning_rate": 5.654298777660375e-07, "loss": 0.8760210275650024, "step": 5738 }, { "epoch": 1.3223502304147465, "grad_norm": 1.4464070872810626, "learning_rate": 5.650867807988473e-07, "loss": 0.6980990171432495, "step": 5739 }, { "epoch": 1.3225806451612903, "grad_norm": 0.927469939331727, "learning_rate": 5.647437469587355e-07, "loss": 0.6552839279174805, "step": 5740 }, { "epoch": 1.322811059907834, "grad_norm": 0.9934566913252004, "learning_rate": 5.644007762954925e-07, "loss": 0.8304816484451294, "step": 5741 }, { "epoch": 1.323041474654378, "grad_norm": 1.1691146043820817, "learning_rate": 5.640578688589e-07, "loss": 0.7977567315101624, "step": 5742 }, { "epoch": 1.3232718894009217, "grad_norm": 1.4376891352576404, "learning_rate": 5.637150246987308e-07, "loss": 0.7656992673873901, "step": 5743 }, { "epoch": 1.3235023041474654, "grad_norm": 1.1120822444951537, "learning_rate": 5.633722438647483e-07, "loss": 0.921256422996521, "step": 5744 }, { "epoch": 1.3237327188940093, "grad_norm": 1.2718785752085355, "learning_rate": 5.630295264067063e-07, "loss": 0.8012785315513611, "step": 5745 }, { "epoch": 1.323963133640553, "grad_norm": 1.2403067439539972, "learning_rate": 5.626868723743504e-07, "loss": 0.613241970539093, "step": 5746 }, { "epoch": 1.3241935483870968, "grad_norm": 1.34086331204533, "learning_rate": 5.623442818174161e-07, "loss": 0.7134846448898315, "step": 5747 }, { "epoch": 1.3244239631336405, "grad_norm": 1.3127547947642921, "learning_rate": 5.620017547856295e-07, "loss": 0.8963242173194885, "step": 5748 }, { "epoch": 1.3246543778801843, "grad_norm": 1.3476788930677732, "learning_rate": 5.616592913287087e-07, "loss": 0.8401378393173218, "step": 5749 }, { "epoch": 1.3248847926267282, "grad_norm": 1.0346861015576712, "learning_rate": 5.613168914963615e-07, "loss": 0.6455308198928833, "step": 5750 }, { "epoch": 1.325115207373272, "grad_norm": 1.105933895384034, "learning_rate": 5.609745553382863e-07, "loss": 0.6920031905174255, "step": 5751 }, { "epoch": 1.3253456221198157, "grad_norm": 1.1001754091297298, "learning_rate": 5.606322829041737e-07, "loss": 0.9099706411361694, "step": 5752 }, { "epoch": 1.3255760368663594, "grad_norm": 1.3286482905641974, "learning_rate": 5.602900742437036e-07, "loss": 0.8034265637397766, "step": 5753 }, { "epoch": 1.3258064516129031, "grad_norm": 0.9956708814709011, "learning_rate": 5.599479294065471e-07, "loss": 0.7216918468475342, "step": 5754 }, { "epoch": 1.326036866359447, "grad_norm": 1.1406371859334326, "learning_rate": 5.596058484423655e-07, "loss": 0.7428277730941772, "step": 5755 }, { "epoch": 1.3262672811059908, "grad_norm": 1.3052741120899958, "learning_rate": 5.592638314008127e-07, "loss": 0.7636011838912964, "step": 5756 }, { "epoch": 1.3264976958525345, "grad_norm": 1.3474656843000283, "learning_rate": 5.589218783315311e-07, "loss": 0.7765215635299683, "step": 5757 }, { "epoch": 1.3267281105990785, "grad_norm": 1.0612768168901736, "learning_rate": 5.585799892841551e-07, "loss": 0.6524033546447754, "step": 5758 }, { "epoch": 1.3269585253456222, "grad_norm": 1.134076107561282, "learning_rate": 5.582381643083087e-07, "loss": 0.8105186223983765, "step": 5759 }, { "epoch": 1.327188940092166, "grad_norm": 1.2647095323418043, "learning_rate": 5.578964034536084e-07, "loss": 0.7654449939727783, "step": 5760 }, { "epoch": 1.3274193548387097, "grad_norm": 1.0086580295069412, "learning_rate": 5.5755470676966e-07, "loss": 0.6545592546463013, "step": 5761 }, { "epoch": 1.3276497695852534, "grad_norm": 1.1744400728961766, "learning_rate": 5.572130743060597e-07, "loss": 0.7116275429725647, "step": 5762 }, { "epoch": 1.3278801843317973, "grad_norm": 1.246651079531453, "learning_rate": 5.568715061123959e-07, "loss": 0.8396822214126587, "step": 5763 }, { "epoch": 1.328110599078341, "grad_norm": 1.2492632037634621, "learning_rate": 5.565300022382464e-07, "loss": 0.6729685664176941, "step": 5764 }, { "epoch": 1.3283410138248848, "grad_norm": 1.1356361065972511, "learning_rate": 5.561885627331795e-07, "loss": 0.6891340017318726, "step": 5765 }, { "epoch": 1.3285714285714285, "grad_norm": 1.1361488307123824, "learning_rate": 5.558471876467556e-07, "loss": 0.7232956886291504, "step": 5766 }, { "epoch": 1.3288018433179722, "grad_norm": 1.3213229777584583, "learning_rate": 5.555058770285246e-07, "loss": 0.7800660133361816, "step": 5767 }, { "epoch": 1.3290322580645162, "grad_norm": 1.106817082140827, "learning_rate": 5.551646309280266e-07, "loss": 0.6794005036354065, "step": 5768 }, { "epoch": 1.32926267281106, "grad_norm": 1.223898149625345, "learning_rate": 5.548234493947939e-07, "loss": 0.7739551067352295, "step": 5769 }, { "epoch": 1.3294930875576036, "grad_norm": 1.0605861943491384, "learning_rate": 5.544823324783482e-07, "loss": 0.759978711605072, "step": 5770 }, { "epoch": 1.3297235023041476, "grad_norm": 0.9593168779270222, "learning_rate": 5.541412802282017e-07, "loss": 0.7563333511352539, "step": 5771 }, { "epoch": 1.3299539170506913, "grad_norm": 1.2126531853296405, "learning_rate": 5.538002926938587e-07, "loss": 0.6705852746963501, "step": 5772 }, { "epoch": 1.330184331797235, "grad_norm": 1.4207541622240418, "learning_rate": 5.534593699248124e-07, "loss": 0.8343281745910645, "step": 5773 }, { "epoch": 1.3304147465437788, "grad_norm": 1.4106880574063376, "learning_rate": 5.531185119705474e-07, "loss": 0.7158486843109131, "step": 5774 }, { "epoch": 1.3306451612903225, "grad_norm": 1.5132468140839932, "learning_rate": 5.527777188805385e-07, "loss": 0.8888766765594482, "step": 5775 }, { "epoch": 1.3308755760368665, "grad_norm": 1.0939731159249404, "learning_rate": 5.524369907042519e-07, "loss": 0.873813271522522, "step": 5776 }, { "epoch": 1.3311059907834102, "grad_norm": 1.1685988919933143, "learning_rate": 5.520963274911437e-07, "loss": 0.7654919624328613, "step": 5777 }, { "epoch": 1.331336405529954, "grad_norm": 0.8790821291361985, "learning_rate": 5.517557292906606e-07, "loss": 0.6976190805435181, "step": 5778 }, { "epoch": 1.3315668202764976, "grad_norm": 1.0920428500423505, "learning_rate": 5.5141519615224e-07, "loss": 0.8356388807296753, "step": 5779 }, { "epoch": 1.3317972350230414, "grad_norm": 1.1881219631842852, "learning_rate": 5.510747281253094e-07, "loss": 0.719998836517334, "step": 5780 }, { "epoch": 1.3320276497695853, "grad_norm": 1.4093262324111957, "learning_rate": 5.507343252592882e-07, "loss": 0.8432124853134155, "step": 5781 }, { "epoch": 1.332258064516129, "grad_norm": 1.2484869478133402, "learning_rate": 5.503939876035845e-07, "loss": 0.8426402807235718, "step": 5782 }, { "epoch": 1.3324884792626728, "grad_norm": 1.1843136616988323, "learning_rate": 5.500537152075986e-07, "loss": 0.8133292198181152, "step": 5783 }, { "epoch": 1.3327188940092167, "grad_norm": 1.2203561788081314, "learning_rate": 5.497135081207205e-07, "loss": 0.8097467422485352, "step": 5784 }, { "epoch": 1.3329493087557602, "grad_norm": 0.9964838781032684, "learning_rate": 5.493733663923299e-07, "loss": 0.6943382024765015, "step": 5785 }, { "epoch": 1.3331797235023042, "grad_norm": 0.8958647048569846, "learning_rate": 5.490332900717993e-07, "loss": 0.5896245837211609, "step": 5786 }, { "epoch": 1.333410138248848, "grad_norm": 1.2066217319918868, "learning_rate": 5.486932792084895e-07, "loss": 0.6837725639343262, "step": 5787 }, { "epoch": 1.3336405529953916, "grad_norm": 1.3459237431541746, "learning_rate": 5.483533338517523e-07, "loss": 0.8371915221214294, "step": 5788 }, { "epoch": 1.3338709677419356, "grad_norm": 1.1649657355507903, "learning_rate": 5.480134540509313e-07, "loss": 0.8001077175140381, "step": 5789 }, { "epoch": 1.3341013824884793, "grad_norm": 1.4458040399946648, "learning_rate": 5.476736398553591e-07, "loss": 0.9070717096328735, "step": 5790 }, { "epoch": 1.334331797235023, "grad_norm": 1.256932465914866, "learning_rate": 5.473338913143589e-07, "loss": 0.9061849117279053, "step": 5791 }, { "epoch": 1.3345622119815668, "grad_norm": 1.2993559451282939, "learning_rate": 5.469942084772454e-07, "loss": 0.8465786576271057, "step": 5792 }, { "epoch": 1.3347926267281105, "grad_norm": 1.2333173266054418, "learning_rate": 5.466545913933229e-07, "loss": 0.8221259117126465, "step": 5793 }, { "epoch": 1.3350230414746544, "grad_norm": 1.1214356414954587, "learning_rate": 5.463150401118864e-07, "loss": 0.594088077545166, "step": 5794 }, { "epoch": 1.3352534562211982, "grad_norm": 1.0900215939620008, "learning_rate": 5.459755546822207e-07, "loss": 0.6983529925346375, "step": 5795 }, { "epoch": 1.335483870967742, "grad_norm": 1.3561793320800521, "learning_rate": 5.456361351536027e-07, "loss": 0.7720709443092346, "step": 5796 }, { "epoch": 1.3357142857142856, "grad_norm": 1.1798730390657586, "learning_rate": 5.45296781575298e-07, "loss": 0.8087977766990662, "step": 5797 }, { "epoch": 1.3359447004608294, "grad_norm": 1.123982224882924, "learning_rate": 5.449574939965636e-07, "loss": 0.6808000802993774, "step": 5798 }, { "epoch": 1.3361751152073733, "grad_norm": 1.0634688756756818, "learning_rate": 5.446182724666466e-07, "loss": 0.7222881317138672, "step": 5799 }, { "epoch": 1.336405529953917, "grad_norm": 1.1919279054292256, "learning_rate": 5.44279117034784e-07, "loss": 0.872687578201294, "step": 5800 }, { "epoch": 1.3366359447004608, "grad_norm": 1.3648460603559815, "learning_rate": 5.439400277502048e-07, "loss": 0.7728114128112793, "step": 5801 }, { "epoch": 1.3368663594470047, "grad_norm": 1.0445795610107291, "learning_rate": 5.436010046621267e-07, "loss": 0.807528018951416, "step": 5802 }, { "epoch": 1.3370967741935484, "grad_norm": 1.151575262421327, "learning_rate": 5.432620478197583e-07, "loss": 0.6997063159942627, "step": 5803 }, { "epoch": 1.3373271889400922, "grad_norm": 1.309125931504039, "learning_rate": 5.429231572722995e-07, "loss": 0.797568678855896, "step": 5804 }, { "epoch": 1.337557603686636, "grad_norm": 1.0057472643211554, "learning_rate": 5.425843330689386e-07, "loss": 0.6412359476089478, "step": 5805 }, { "epoch": 1.3377880184331796, "grad_norm": 1.1290308654729904, "learning_rate": 5.422455752588569e-07, "loss": 0.8605507612228394, "step": 5806 }, { "epoch": 1.3380184331797236, "grad_norm": 1.0459165137008808, "learning_rate": 5.419068838912238e-07, "loss": 0.856192946434021, "step": 5807 }, { "epoch": 1.3382488479262673, "grad_norm": 1.1354202830657318, "learning_rate": 5.415682590151998e-07, "loss": 0.8614650368690491, "step": 5808 }, { "epoch": 1.338479262672811, "grad_norm": 1.6619444336877072, "learning_rate": 5.412297006799365e-07, "loss": 0.9675840139389038, "step": 5809 }, { "epoch": 1.3387096774193548, "grad_norm": 1.0659647985310448, "learning_rate": 5.408912089345747e-07, "loss": 0.7333405017852783, "step": 5810 }, { "epoch": 1.3389400921658985, "grad_norm": 1.3540380425785927, "learning_rate": 5.405527838282457e-07, "loss": 0.8271909952163696, "step": 5811 }, { "epoch": 1.3391705069124424, "grad_norm": 1.3562249096622705, "learning_rate": 5.402144254100724e-07, "loss": 0.8036069869995117, "step": 5812 }, { "epoch": 1.3394009216589862, "grad_norm": 1.3975514954701582, "learning_rate": 5.398761337291667e-07, "loss": 0.855912446975708, "step": 5813 }, { "epoch": 1.33963133640553, "grad_norm": 1.2830545749549949, "learning_rate": 5.395379088346309e-07, "loss": 0.8198536038398743, "step": 5814 }, { "epoch": 1.3398617511520738, "grad_norm": 1.2130249913700057, "learning_rate": 5.391997507755581e-07, "loss": 0.8931646347045898, "step": 5815 }, { "epoch": 1.3400921658986176, "grad_norm": 0.9981529734228639, "learning_rate": 5.388616596010312e-07, "loss": 0.7073954343795776, "step": 5816 }, { "epoch": 1.3403225806451613, "grad_norm": 1.2450035085912274, "learning_rate": 5.385236353601241e-07, "loss": 0.7758424282073975, "step": 5817 }, { "epoch": 1.340552995391705, "grad_norm": 1.1707291956273551, "learning_rate": 5.381856781019005e-07, "loss": 0.6805497407913208, "step": 5818 }, { "epoch": 1.3407834101382488, "grad_norm": 1.251540768140409, "learning_rate": 5.378477878754144e-07, "loss": 0.8956538438796997, "step": 5819 }, { "epoch": 1.3410138248847927, "grad_norm": 1.0594639846580987, "learning_rate": 5.375099647297096e-07, "loss": 0.7819657921791077, "step": 5820 }, { "epoch": 1.3412442396313364, "grad_norm": 1.0523114055014655, "learning_rate": 5.371722087138217e-07, "loss": 0.5764007568359375, "step": 5821 }, { "epoch": 1.3414746543778802, "grad_norm": 1.1661401559135987, "learning_rate": 5.368345198767749e-07, "loss": 0.697022557258606, "step": 5822 }, { "epoch": 1.3417050691244239, "grad_norm": 1.3065346909259405, "learning_rate": 5.364968982675839e-07, "loss": 0.7773014307022095, "step": 5823 }, { "epoch": 1.3419354838709676, "grad_norm": 1.3340944249973796, "learning_rate": 5.361593439352551e-07, "loss": 0.7395004034042358, "step": 5824 }, { "epoch": 1.3421658986175116, "grad_norm": 1.0762295080363014, "learning_rate": 5.358218569287834e-07, "loss": 0.7989716529846191, "step": 5825 }, { "epoch": 1.3423963133640553, "grad_norm": 1.280549478612159, "learning_rate": 5.354844372971543e-07, "loss": 0.8894884586334229, "step": 5826 }, { "epoch": 1.342626728110599, "grad_norm": 1.5586577225053506, "learning_rate": 5.351470850893446e-07, "loss": 0.8415021300315857, "step": 5827 }, { "epoch": 1.342857142857143, "grad_norm": 1.4272313895508615, "learning_rate": 5.3480980035432e-07, "loss": 0.9963078498840332, "step": 5828 }, { "epoch": 1.3430875576036867, "grad_norm": 1.1680739887228044, "learning_rate": 5.344725831410368e-07, "loss": 0.8489943742752075, "step": 5829 }, { "epoch": 1.3433179723502304, "grad_norm": 0.9897462108554296, "learning_rate": 5.341354334984422e-07, "loss": 0.6949954032897949, "step": 5830 }, { "epoch": 1.3435483870967742, "grad_norm": 1.1225897948987795, "learning_rate": 5.337983514754722e-07, "loss": 0.878408670425415, "step": 5831 }, { "epoch": 1.3437788018433179, "grad_norm": 1.2067617593706235, "learning_rate": 5.334613371210549e-07, "loss": 0.722877025604248, "step": 5832 }, { "epoch": 1.3440092165898618, "grad_norm": 1.04123853110292, "learning_rate": 5.331243904841068e-07, "loss": 0.670013427734375, "step": 5833 }, { "epoch": 1.3442396313364056, "grad_norm": 1.0789084686611892, "learning_rate": 5.327875116135354e-07, "loss": 0.8336968421936035, "step": 5834 }, { "epoch": 1.3444700460829493, "grad_norm": 1.2348261826059375, "learning_rate": 5.324507005582381e-07, "loss": 0.7917020916938782, "step": 5835 }, { "epoch": 1.344700460829493, "grad_norm": 1.288528901659057, "learning_rate": 5.321139573671024e-07, "loss": 0.7479217052459717, "step": 5836 }, { "epoch": 1.3449308755760367, "grad_norm": 1.206901718846971, "learning_rate": 5.317772820890068e-07, "loss": 0.8059084415435791, "step": 5837 }, { "epoch": 1.3451612903225807, "grad_norm": 1.0687058344207596, "learning_rate": 5.314406747728186e-07, "loss": 0.6853187680244446, "step": 5838 }, { "epoch": 1.3453917050691244, "grad_norm": 1.2007310227541288, "learning_rate": 5.311041354673964e-07, "loss": 0.7769491672515869, "step": 5839 }, { "epoch": 1.3456221198156681, "grad_norm": 1.007121872066712, "learning_rate": 5.307676642215877e-07, "loss": 0.6669384241104126, "step": 5840 }, { "epoch": 1.345852534562212, "grad_norm": 1.091111253411437, "learning_rate": 5.304312610842319e-07, "loss": 0.7884945869445801, "step": 5841 }, { "epoch": 1.3460829493087558, "grad_norm": 1.2799296704263758, "learning_rate": 5.300949261041567e-07, "loss": 0.8030047416687012, "step": 5842 }, { "epoch": 1.3463133640552996, "grad_norm": 1.293856241707333, "learning_rate": 5.297586593301806e-07, "loss": 0.7792675495147705, "step": 5843 }, { "epoch": 1.3465437788018433, "grad_norm": 1.450964712660266, "learning_rate": 5.29422460811113e-07, "loss": 0.8699119091033936, "step": 5844 }, { "epoch": 1.346774193548387, "grad_norm": 1.1164478098944863, "learning_rate": 5.290863305957523e-07, "loss": 0.8075394630432129, "step": 5845 }, { "epoch": 1.347004608294931, "grad_norm": 1.2025668698948455, "learning_rate": 5.287502687328868e-07, "loss": 0.7875077128410339, "step": 5846 }, { "epoch": 1.3472350230414747, "grad_norm": 1.2743475952279586, "learning_rate": 5.284142752712965e-07, "loss": 0.6799413561820984, "step": 5847 }, { "epoch": 1.3474654377880184, "grad_norm": 1.3570475044053845, "learning_rate": 5.280783502597496e-07, "loss": 0.914801299571991, "step": 5848 }, { "epoch": 1.3476958525345621, "grad_norm": 1.4096481978785727, "learning_rate": 5.277424937470052e-07, "loss": 0.8591992855072021, "step": 5849 }, { "epoch": 1.3479262672811059, "grad_norm": 1.1971358109064123, "learning_rate": 5.27406705781813e-07, "loss": 0.7830478549003601, "step": 5850 }, { "epoch": 1.3481566820276498, "grad_norm": 1.397466179292115, "learning_rate": 5.270709864129119e-07, "loss": 0.8365499973297119, "step": 5851 }, { "epoch": 1.3483870967741935, "grad_norm": 1.5417932199175834, "learning_rate": 5.267353356890305e-07, "loss": 0.8342669010162354, "step": 5852 }, { "epoch": 1.3486175115207373, "grad_norm": 1.0532947941417055, "learning_rate": 5.263997536588891e-07, "loss": 0.7802393436431885, "step": 5853 }, { "epoch": 1.3488479262672812, "grad_norm": 1.2005511445865484, "learning_rate": 5.260642403711964e-07, "loss": 0.8245328068733215, "step": 5854 }, { "epoch": 1.349078341013825, "grad_norm": 1.043405656704728, "learning_rate": 5.257287958746519e-07, "loss": 0.7209265232086182, "step": 5855 }, { "epoch": 1.3493087557603687, "grad_norm": 1.254105643009189, "learning_rate": 5.253934202179444e-07, "loss": 0.9258058071136475, "step": 5856 }, { "epoch": 1.3495391705069124, "grad_norm": 1.3493584028342165, "learning_rate": 5.25058113449754e-07, "loss": 0.6889467835426331, "step": 5857 }, { "epoch": 1.3497695852534561, "grad_norm": 1.113027412487739, "learning_rate": 5.247228756187498e-07, "loss": 0.8810057640075684, "step": 5858 }, { "epoch": 1.35, "grad_norm": 1.140989478824924, "learning_rate": 5.243877067735909e-07, "loss": 0.7236393690109253, "step": 5859 }, { "epoch": 1.3502304147465438, "grad_norm": 1.1712872152312954, "learning_rate": 5.240526069629264e-07, "loss": 0.8287979364395142, "step": 5860 }, { "epoch": 1.3504608294930875, "grad_norm": 0.9764543402246563, "learning_rate": 5.237175762353964e-07, "loss": 0.8268846869468689, "step": 5861 }, { "epoch": 1.3506912442396313, "grad_norm": 1.08770217121451, "learning_rate": 5.233826146396296e-07, "loss": 0.7995575666427612, "step": 5862 }, { "epoch": 1.350921658986175, "grad_norm": 1.185939350431103, "learning_rate": 5.230477222242449e-07, "loss": 0.7379493713378906, "step": 5863 }, { "epoch": 1.351152073732719, "grad_norm": 1.1532350043824988, "learning_rate": 5.227128990378524e-07, "loss": 0.729906439781189, "step": 5864 }, { "epoch": 1.3513824884792627, "grad_norm": 1.3775772205538213, "learning_rate": 5.223781451290506e-07, "loss": 0.8356789350509644, "step": 5865 }, { "epoch": 1.3516129032258064, "grad_norm": 1.4707388081384496, "learning_rate": 5.220434605464285e-07, "loss": 0.8130582571029663, "step": 5866 }, { "epoch": 1.3518433179723504, "grad_norm": 1.3840431554185126, "learning_rate": 5.217088453385658e-07, "loss": 0.7686447501182556, "step": 5867 }, { "epoch": 1.352073732718894, "grad_norm": 1.4824685151456765, "learning_rate": 5.213742995540309e-07, "loss": 0.7945844531059265, "step": 5868 }, { "epoch": 1.3523041474654378, "grad_norm": 0.9715413572597766, "learning_rate": 5.210398232413824e-07, "loss": 0.8082837462425232, "step": 5869 }, { "epoch": 1.3525345622119815, "grad_norm": 1.2398246007417328, "learning_rate": 5.2070541644917e-07, "loss": 0.7826153039932251, "step": 5870 }, { "epoch": 1.3527649769585253, "grad_norm": 1.2471684178108737, "learning_rate": 5.203710792259318e-07, "loss": 0.6853276491165161, "step": 5871 }, { "epoch": 1.3529953917050692, "grad_norm": 1.2891891865978977, "learning_rate": 5.200368116201962e-07, "loss": 0.8354780673980713, "step": 5872 }, { "epoch": 1.353225806451613, "grad_norm": 1.1178862343459024, "learning_rate": 5.197026136804823e-07, "loss": 0.7857648134231567, "step": 5873 }, { "epoch": 1.3534562211981567, "grad_norm": 0.9168225851850988, "learning_rate": 5.193684854552982e-07, "loss": 0.663504958152771, "step": 5874 }, { "epoch": 1.3536866359447004, "grad_norm": 1.329771615602396, "learning_rate": 5.190344269931423e-07, "loss": 0.8192203044891357, "step": 5875 }, { "epoch": 1.3539170506912441, "grad_norm": 1.4861685476717017, "learning_rate": 5.187004383425024e-07, "loss": 0.801753044128418, "step": 5876 }, { "epoch": 1.354147465437788, "grad_norm": 1.3551621393598028, "learning_rate": 5.183665195518566e-07, "loss": 0.9427206516265869, "step": 5877 }, { "epoch": 1.3543778801843318, "grad_norm": 1.1121835630605517, "learning_rate": 5.18032670669673e-07, "loss": 0.7801729440689087, "step": 5878 }, { "epoch": 1.3546082949308755, "grad_norm": 1.3936797390586833, "learning_rate": 5.176988917444094e-07, "loss": 0.8224533796310425, "step": 5879 }, { "epoch": 1.3548387096774195, "grad_norm": 0.9505008459531469, "learning_rate": 5.173651828245127e-07, "loss": 0.7800098657608032, "step": 5880 }, { "epoch": 1.3550691244239632, "grad_norm": 0.9654380749861797, "learning_rate": 5.170315439584212e-07, "loss": 0.7612746953964233, "step": 5881 }, { "epoch": 1.355299539170507, "grad_norm": 1.191616140078335, "learning_rate": 5.166979751945617e-07, "loss": 0.8027492761611938, "step": 5882 }, { "epoch": 1.3555299539170507, "grad_norm": 1.167147993456773, "learning_rate": 5.163644765813508e-07, "loss": 0.7509280443191528, "step": 5883 }, { "epoch": 1.3557603686635944, "grad_norm": 1.2102231125675782, "learning_rate": 5.160310481671966e-07, "loss": 0.7663145661354065, "step": 5884 }, { "epoch": 1.3559907834101383, "grad_norm": 1.246862901799125, "learning_rate": 5.156976900004948e-07, "loss": 0.7598870396614075, "step": 5885 }, { "epoch": 1.356221198156682, "grad_norm": 1.127184650819857, "learning_rate": 5.153644021296317e-07, "loss": 0.7923038005828857, "step": 5886 }, { "epoch": 1.3564516129032258, "grad_norm": 1.2664053097126295, "learning_rate": 5.150311846029846e-07, "loss": 0.8711799383163452, "step": 5887 }, { "epoch": 1.3566820276497695, "grad_norm": 1.294570667250746, "learning_rate": 5.146980374689191e-07, "loss": 0.7852096557617188, "step": 5888 }, { "epoch": 1.3569124423963133, "grad_norm": 1.1426360408928755, "learning_rate": 5.143649607757905e-07, "loss": 0.7259876132011414, "step": 5889 }, { "epoch": 1.3571428571428572, "grad_norm": 0.9810253925795782, "learning_rate": 5.140319545719454e-07, "loss": 0.7612321376800537, "step": 5890 }, { "epoch": 1.357373271889401, "grad_norm": 1.2919477789807814, "learning_rate": 5.136990189057187e-07, "loss": 0.7881298661231995, "step": 5891 }, { "epoch": 1.3576036866359447, "grad_norm": 1.0310706760740191, "learning_rate": 5.133661538254353e-07, "loss": 0.6956340074539185, "step": 5892 }, { "epoch": 1.3578341013824886, "grad_norm": 1.0277045355993415, "learning_rate": 5.130333593794107e-07, "loss": 0.7800698280334473, "step": 5893 }, { "epoch": 1.3580645161290323, "grad_norm": 1.0373100274796343, "learning_rate": 5.127006356159496e-07, "loss": 0.6920318603515625, "step": 5894 }, { "epoch": 1.358294930875576, "grad_norm": 0.9870224446835288, "learning_rate": 5.123679825833458e-07, "loss": 0.6972872018814087, "step": 5895 }, { "epoch": 1.3585253456221198, "grad_norm": 1.1473583592012562, "learning_rate": 5.12035400329884e-07, "loss": 0.8820276260375977, "step": 5896 }, { "epoch": 1.3587557603686635, "grad_norm": 1.1566218274104645, "learning_rate": 5.117028889038375e-07, "loss": 0.8834109306335449, "step": 5897 }, { "epoch": 1.3589861751152075, "grad_norm": 1.1393544418506285, "learning_rate": 5.113704483534704e-07, "loss": 0.6981096267700195, "step": 5898 }, { "epoch": 1.3592165898617512, "grad_norm": 1.329102048560067, "learning_rate": 5.11038078727036e-07, "loss": 0.7617249488830566, "step": 5899 }, { "epoch": 1.359447004608295, "grad_norm": 1.7116569149164136, "learning_rate": 5.107057800727773e-07, "loss": 0.8373798131942749, "step": 5900 }, { "epoch": 1.3596774193548387, "grad_norm": 1.3064563550321244, "learning_rate": 5.103735524389264e-07, "loss": 0.7176666855812073, "step": 5901 }, { "epoch": 1.3599078341013824, "grad_norm": 0.9003342699900779, "learning_rate": 5.100413958737067e-07, "loss": 0.7872966527938843, "step": 5902 }, { "epoch": 1.3601382488479263, "grad_norm": 1.1723157653802474, "learning_rate": 5.097093104253295e-07, "loss": 0.6668897271156311, "step": 5903 }, { "epoch": 1.36036866359447, "grad_norm": 1.2119302484042467, "learning_rate": 5.093772961419967e-07, "loss": 0.8413408994674683, "step": 5904 }, { "epoch": 1.3605990783410138, "grad_norm": 0.9857990015136971, "learning_rate": 5.090453530719e-07, "loss": 0.632825493812561, "step": 5905 }, { "epoch": 1.3608294930875577, "grad_norm": 1.2378128322555926, "learning_rate": 5.087134812632201e-07, "loss": 0.737346887588501, "step": 5906 }, { "epoch": 1.3610599078341012, "grad_norm": 1.2614434601380542, "learning_rate": 5.083816807641283e-07, "loss": 1.00008225440979, "step": 5907 }, { "epoch": 1.3612903225806452, "grad_norm": 1.2168755174090398, "learning_rate": 5.08049951622785e-07, "loss": 0.7844079732894897, "step": 5908 }, { "epoch": 1.361520737327189, "grad_norm": 1.0532681425474226, "learning_rate": 5.077182938873393e-07, "loss": 0.8615080118179321, "step": 5909 }, { "epoch": 1.3617511520737327, "grad_norm": 1.279562028421048, "learning_rate": 5.073867076059321e-07, "loss": 0.6930621862411499, "step": 5910 }, { "epoch": 1.3619815668202766, "grad_norm": 1.4077453728560791, "learning_rate": 5.07055192826692e-07, "loss": 0.7020307183265686, "step": 5911 }, { "epoch": 1.3622119815668203, "grad_norm": 1.4403791813866107, "learning_rate": 5.067237495977379e-07, "loss": 0.7281042337417603, "step": 5912 }, { "epoch": 1.362442396313364, "grad_norm": 1.014203177200115, "learning_rate": 5.063923779671789e-07, "loss": 0.8092719316482544, "step": 5913 }, { "epoch": 1.3626728110599078, "grad_norm": 1.2597384594296865, "learning_rate": 5.060610779831125e-07, "loss": 0.7323317527770996, "step": 5914 }, { "epoch": 1.3629032258064515, "grad_norm": 0.9804861205409557, "learning_rate": 5.05729849693627e-07, "loss": 0.7370069622993469, "step": 5915 }, { "epoch": 1.3631336405529955, "grad_norm": 1.1355071333670705, "learning_rate": 5.053986931467994e-07, "loss": 0.7175320386886597, "step": 5916 }, { "epoch": 1.3633640552995392, "grad_norm": 1.2834592450306632, "learning_rate": 5.050676083906964e-07, "loss": 0.8643501996994019, "step": 5917 }, { "epoch": 1.363594470046083, "grad_norm": 1.2479698704612106, "learning_rate": 5.047365954733752e-07, "loss": 0.9110950827598572, "step": 5918 }, { "epoch": 1.3638248847926266, "grad_norm": 1.6104451195946936, "learning_rate": 5.044056544428814e-07, "loss": 0.9242197275161743, "step": 5919 }, { "epoch": 1.3640552995391704, "grad_norm": 1.2769108446030992, "learning_rate": 5.040747853472509e-07, "loss": 0.9218860864639282, "step": 5920 }, { "epoch": 1.3642857142857143, "grad_norm": 1.3302527755174611, "learning_rate": 5.037439882345084e-07, "loss": 0.970054030418396, "step": 5921 }, { "epoch": 1.364516129032258, "grad_norm": 1.0075132364725619, "learning_rate": 5.034132631526695e-07, "loss": 0.7707182168960571, "step": 5922 }, { "epoch": 1.3647465437788018, "grad_norm": 1.1036594577594991, "learning_rate": 5.03082610149738e-07, "loss": 0.7673811912536621, "step": 5923 }, { "epoch": 1.3649769585253457, "grad_norm": 1.2758650519526258, "learning_rate": 5.027520292737073e-07, "loss": 0.7387198209762573, "step": 5924 }, { "epoch": 1.3652073732718895, "grad_norm": 1.139448521744241, "learning_rate": 5.024215205725619e-07, "loss": 0.7803019881248474, "step": 5925 }, { "epoch": 1.3654377880184332, "grad_norm": 1.3985269621197394, "learning_rate": 5.020910840942738e-07, "loss": 0.8753018379211426, "step": 5926 }, { "epoch": 1.365668202764977, "grad_norm": 1.0358625157915384, "learning_rate": 5.017607198868055e-07, "loss": 0.7917389869689941, "step": 5927 }, { "epoch": 1.3658986175115206, "grad_norm": 1.2995608187995562, "learning_rate": 5.014304279981095e-07, "loss": 0.8393691182136536, "step": 5928 }, { "epoch": 1.3661290322580646, "grad_norm": 1.2671721961788391, "learning_rate": 5.011002084761264e-07, "loss": 0.6635205745697021, "step": 5929 }, { "epoch": 1.3663594470046083, "grad_norm": 1.2038857805513816, "learning_rate": 5.007700613687879e-07, "loss": 0.7058769464492798, "step": 5930 }, { "epoch": 1.366589861751152, "grad_norm": 1.1784688857731938, "learning_rate": 5.004399867240143e-07, "loss": 0.841168224811554, "step": 5931 }, { "epoch": 1.3668202764976958, "grad_norm": 1.3760327619217738, "learning_rate": 5.001099845897148e-07, "loss": 0.7385121583938599, "step": 5932 }, { "epoch": 1.3670506912442395, "grad_norm": 1.1633525983686732, "learning_rate": 4.997800550137897e-07, "loss": 0.6525158882141113, "step": 5933 }, { "epoch": 1.3672811059907835, "grad_norm": 1.2331358286597804, "learning_rate": 4.994501980441274e-07, "loss": 0.7838844060897827, "step": 5934 }, { "epoch": 1.3675115207373272, "grad_norm": 1.4450953979822279, "learning_rate": 4.991204137286061e-07, "loss": 0.8831999897956848, "step": 5935 }, { "epoch": 1.367741935483871, "grad_norm": 1.0408031352355525, "learning_rate": 4.987907021150938e-07, "loss": 0.8053784966468811, "step": 5936 }, { "epoch": 1.3679723502304149, "grad_norm": 1.1356206370071746, "learning_rate": 4.984610632514475e-07, "loss": 0.8093301057815552, "step": 5937 }, { "epoch": 1.3682027649769586, "grad_norm": 1.0230530705292329, "learning_rate": 4.981314971855136e-07, "loss": 0.7609653472900391, "step": 5938 }, { "epoch": 1.3684331797235023, "grad_norm": 1.4109994154981755, "learning_rate": 4.978020039651288e-07, "loss": 0.7131600379943848, "step": 5939 }, { "epoch": 1.368663594470046, "grad_norm": 1.3192550042799691, "learning_rate": 4.974725836381184e-07, "loss": 0.6555063724517822, "step": 5940 }, { "epoch": 1.3688940092165898, "grad_norm": 1.1278604970222592, "learning_rate": 4.971432362522968e-07, "loss": 0.8349519968032837, "step": 5941 }, { "epoch": 1.3691244239631337, "grad_norm": 1.2138732932202303, "learning_rate": 4.968139618554691e-07, "loss": 0.7335611581802368, "step": 5942 }, { "epoch": 1.3693548387096774, "grad_norm": 1.050807913168598, "learning_rate": 4.964847604954287e-07, "loss": 0.8349814414978027, "step": 5943 }, { "epoch": 1.3695852534562212, "grad_norm": 1.07716704849378, "learning_rate": 4.961556322199585e-07, "loss": 0.6816729307174683, "step": 5944 }, { "epoch": 1.369815668202765, "grad_norm": 1.5220059571304148, "learning_rate": 4.958265770768315e-07, "loss": 0.847672164440155, "step": 5945 }, { "epoch": 1.3700460829493086, "grad_norm": 1.267067930725286, "learning_rate": 4.954975951138095e-07, "loss": 0.6674519777297974, "step": 5946 }, { "epoch": 1.3702764976958526, "grad_norm": 1.0820409905680344, "learning_rate": 4.951686863786432e-07, "loss": 0.7836427092552185, "step": 5947 }, { "epoch": 1.3705069124423963, "grad_norm": 1.0577780792239002, "learning_rate": 4.948398509190742e-07, "loss": 0.640183687210083, "step": 5948 }, { "epoch": 1.37073732718894, "grad_norm": 1.223963669470004, "learning_rate": 4.945110887828322e-07, "loss": 0.8438451290130615, "step": 5949 }, { "epoch": 1.370967741935484, "grad_norm": 1.5483267377377474, "learning_rate": 4.94182400017636e-07, "loss": 0.9311714172363281, "step": 5950 }, { "epoch": 1.3711981566820277, "grad_norm": 1.2352509732193302, "learning_rate": 4.938537846711952e-07, "loss": 0.7332801818847656, "step": 5951 }, { "epoch": 1.3714285714285714, "grad_norm": 1.127354832681604, "learning_rate": 4.935252427912075e-07, "loss": 0.7189289331436157, "step": 5952 }, { "epoch": 1.3716589861751152, "grad_norm": 1.451594181977691, "learning_rate": 4.9319677442536e-07, "loss": 0.827372670173645, "step": 5953 }, { "epoch": 1.371889400921659, "grad_norm": 1.2273788913776413, "learning_rate": 4.9286837962133e-07, "loss": 0.7607625722885132, "step": 5954 }, { "epoch": 1.3721198156682028, "grad_norm": 1.1935199245873378, "learning_rate": 4.925400584267836e-07, "loss": 0.9420886635780334, "step": 5955 }, { "epoch": 1.3723502304147466, "grad_norm": 1.1557325656206936, "learning_rate": 4.922118108893757e-07, "loss": 0.7605317831039429, "step": 5956 }, { "epoch": 1.3725806451612903, "grad_norm": 1.059494459687004, "learning_rate": 4.918836370567513e-07, "loss": 0.8353599309921265, "step": 5957 }, { "epoch": 1.372811059907834, "grad_norm": 1.2571100340874592, "learning_rate": 4.915555369765439e-07, "loss": 0.8540027141571045, "step": 5958 }, { "epoch": 1.3730414746543778, "grad_norm": 1.027809306304352, "learning_rate": 4.912275106963778e-07, "loss": 0.6965712308883667, "step": 5959 }, { "epoch": 1.3732718894009217, "grad_norm": 1.0356479101830274, "learning_rate": 4.908995582638648e-07, "loss": 0.7460787296295166, "step": 5960 }, { "epoch": 1.3735023041474654, "grad_norm": 1.1845566109999182, "learning_rate": 4.905716797266067e-07, "loss": 0.8652873039245605, "step": 5961 }, { "epoch": 1.3737327188940092, "grad_norm": 1.1300176885770365, "learning_rate": 4.902438751321952e-07, "loss": 0.7757953405380249, "step": 5962 }, { "epoch": 1.3739631336405531, "grad_norm": 1.2945741727860514, "learning_rate": 4.899161445282102e-07, "loss": 0.8842452168464661, "step": 5963 }, { "epoch": 1.3741935483870968, "grad_norm": 1.1415902309445607, "learning_rate": 4.895884879622215e-07, "loss": 0.7259113788604736, "step": 5964 }, { "epoch": 1.3744239631336406, "grad_norm": 1.3855842779268248, "learning_rate": 4.892609054817883e-07, "loss": 0.8871402144432068, "step": 5965 }, { "epoch": 1.3746543778801843, "grad_norm": 1.3262407740428463, "learning_rate": 4.889333971344586e-07, "loss": 0.7564518451690674, "step": 5966 }, { "epoch": 1.374884792626728, "grad_norm": 1.2010368462649357, "learning_rate": 4.886059629677692e-07, "loss": 0.7886015176773071, "step": 5967 }, { "epoch": 1.375115207373272, "grad_norm": 1.199947155848343, "learning_rate": 4.882786030292479e-07, "loss": 0.8256035447120667, "step": 5968 }, { "epoch": 1.3753456221198157, "grad_norm": 1.3084738837241086, "learning_rate": 4.879513173664099e-07, "loss": 0.9351227283477783, "step": 5969 }, { "epoch": 1.3755760368663594, "grad_norm": 1.1794682657820328, "learning_rate": 4.876241060267598e-07, "loss": 0.7221553921699524, "step": 5970 }, { "epoch": 1.3758064516129032, "grad_norm": 1.3959950512058854, "learning_rate": 4.872969690577928e-07, "loss": 0.7451514005661011, "step": 5971 }, { "epoch": 1.3760368663594469, "grad_norm": 2.704793745814284, "learning_rate": 4.86969906506992e-07, "loss": 0.810903787612915, "step": 5972 }, { "epoch": 1.3762672811059908, "grad_norm": 1.0363767093510534, "learning_rate": 4.866429184218298e-07, "loss": 0.6279938817024231, "step": 5973 }, { "epoch": 1.3764976958525346, "grad_norm": 1.4075128359986724, "learning_rate": 4.863160048497688e-07, "loss": 0.7742956876754761, "step": 5974 }, { "epoch": 1.3767281105990783, "grad_norm": 1.0416061346586747, "learning_rate": 4.859891658382597e-07, "loss": 0.7423844933509827, "step": 5975 }, { "epoch": 1.3769585253456222, "grad_norm": 1.0348526250721313, "learning_rate": 4.856624014347426e-07, "loss": 0.8387676477432251, "step": 5976 }, { "epoch": 1.377188940092166, "grad_norm": 1.3906652341525882, "learning_rate": 4.853357116866471e-07, "loss": 0.7959855794906616, "step": 5977 }, { "epoch": 1.3774193548387097, "grad_norm": 1.2781418274310543, "learning_rate": 4.850090966413913e-07, "loss": 0.7086259722709656, "step": 5978 }, { "epoch": 1.3776497695852534, "grad_norm": 1.113262974989995, "learning_rate": 4.846825563463838e-07, "loss": 0.7219396829605103, "step": 5979 }, { "epoch": 1.3778801843317972, "grad_norm": 1.2693838975886846, "learning_rate": 4.84356090849021e-07, "loss": 0.8383582830429077, "step": 5980 }, { "epoch": 1.378110599078341, "grad_norm": 1.2004259850017622, "learning_rate": 4.840297001966887e-07, "loss": 0.7624244689941406, "step": 5981 }, { "epoch": 1.3783410138248848, "grad_norm": 1.3275243269089372, "learning_rate": 4.837033844367626e-07, "loss": 0.7901623249053955, "step": 5982 }, { "epoch": 1.3785714285714286, "grad_norm": 1.0665581903589285, "learning_rate": 4.833771436166068e-07, "loss": 0.7732094526290894, "step": 5983 }, { "epoch": 1.3788018433179723, "grad_norm": 1.221680510593368, "learning_rate": 4.830509777835744e-07, "loss": 0.7882228493690491, "step": 5984 }, { "epoch": 1.379032258064516, "grad_norm": 1.3954212415484932, "learning_rate": 4.827248869850086e-07, "loss": 0.8601159453392029, "step": 5985 }, { "epoch": 1.37926267281106, "grad_norm": 1.442537797357167, "learning_rate": 4.823988712682406e-07, "loss": 0.8828538656234741, "step": 5986 }, { "epoch": 1.3794930875576037, "grad_norm": 1.2814445672112398, "learning_rate": 4.820729306805907e-07, "loss": 0.8586058020591736, "step": 5987 }, { "epoch": 1.3797235023041474, "grad_norm": 1.3476469386797916, "learning_rate": 4.8174706526937e-07, "loss": 0.8276243209838867, "step": 5988 }, { "epoch": 1.3799539170506914, "grad_norm": 1.1504215702512235, "learning_rate": 4.814212750818764e-07, "loss": 0.837665855884552, "step": 5989 }, { "epoch": 1.380184331797235, "grad_norm": 1.0830851541320008, "learning_rate": 4.810955601653978e-07, "loss": 0.7493194341659546, "step": 5990 }, { "epoch": 1.3804147465437788, "grad_norm": 0.9470923738615639, "learning_rate": 4.807699205672123e-07, "loss": 0.8382525444030762, "step": 5991 }, { "epoch": 1.3806451612903226, "grad_norm": 1.302996846441217, "learning_rate": 4.804443563345854e-07, "loss": 0.8152645826339722, "step": 5992 }, { "epoch": 1.3808755760368663, "grad_norm": 1.1087518210488847, "learning_rate": 4.801188675147719e-07, "loss": 0.7168164849281311, "step": 5993 }, { "epoch": 1.3811059907834102, "grad_norm": 1.3971974855003246, "learning_rate": 4.79793454155017e-07, "loss": 0.883512556552887, "step": 5994 }, { "epoch": 1.381336405529954, "grad_norm": 1.1775999496250547, "learning_rate": 4.794681163025536e-07, "loss": 0.7258438467979431, "step": 5995 }, { "epoch": 1.3815668202764977, "grad_norm": 1.102316858629444, "learning_rate": 4.79142854004604e-07, "loss": 0.8408991098403931, "step": 5996 }, { "epoch": 1.3817972350230414, "grad_norm": 1.2549882230845555, "learning_rate": 4.788176673083796e-07, "loss": 0.6506227254867554, "step": 5997 }, { "epoch": 1.3820276497695851, "grad_norm": 1.145761304273299, "learning_rate": 4.784925562610809e-07, "loss": 0.6971127986907959, "step": 5998 }, { "epoch": 1.382258064516129, "grad_norm": 1.3037562977083754, "learning_rate": 4.781675209098967e-07, "loss": 0.8399784564971924, "step": 5999 }, { "epoch": 1.3824884792626728, "grad_norm": 1.1085204750545832, "learning_rate": 4.778425613020067e-07, "loss": 0.6451772451400757, "step": 6000 }, { "epoch": 1.3827188940092165, "grad_norm": 1.2906420363235995, "learning_rate": 4.775176774845774e-07, "loss": 0.7794390916824341, "step": 6001 }, { "epoch": 1.3829493087557605, "grad_norm": 1.2681207047961411, "learning_rate": 4.771928695047652e-07, "loss": 0.7743663191795349, "step": 6002 }, { "epoch": 1.3831797235023042, "grad_norm": 1.3900227492937691, "learning_rate": 4.768681374097165e-07, "loss": 0.7654878497123718, "step": 6003 }, { "epoch": 1.383410138248848, "grad_norm": 0.9597367840932265, "learning_rate": 4.765434812465645e-07, "loss": 0.634769082069397, "step": 6004 }, { "epoch": 1.3836405529953917, "grad_norm": 1.506039076037628, "learning_rate": 4.762189010624337e-07, "loss": 0.7941944599151611, "step": 6005 }, { "epoch": 1.3838709677419354, "grad_norm": 1.015987334283248, "learning_rate": 4.75894396904436e-07, "loss": 0.7437179088592529, "step": 6006 }, { "epoch": 1.3841013824884794, "grad_norm": 1.4064808788220893, "learning_rate": 4.7556996881967236e-07, "loss": 0.7854535579681396, "step": 6007 }, { "epoch": 1.384331797235023, "grad_norm": 1.1454067558015728, "learning_rate": 4.752456168552339e-07, "loss": 0.7506910562515259, "step": 6008 }, { "epoch": 1.3845622119815668, "grad_norm": 1.3378490743548084, "learning_rate": 4.749213410581995e-07, "loss": 0.8967334032058716, "step": 6009 }, { "epoch": 1.3847926267281105, "grad_norm": 0.9073367214802157, "learning_rate": 4.7459714147563677e-07, "loss": 0.7053096294403076, "step": 6010 }, { "epoch": 1.3850230414746543, "grad_norm": 1.4011875457574152, "learning_rate": 4.7427301815460396e-07, "loss": 0.8759415149688721, "step": 6011 }, { "epoch": 1.3852534562211982, "grad_norm": 1.2083846258038176, "learning_rate": 4.739489711421466e-07, "loss": 0.8827483654022217, "step": 6012 }, { "epoch": 1.385483870967742, "grad_norm": 0.9892327750407551, "learning_rate": 4.736250004852993e-07, "loss": 0.7268258929252625, "step": 6013 }, { "epoch": 1.3857142857142857, "grad_norm": 1.3354283922456354, "learning_rate": 4.7330110623108665e-07, "loss": 0.7142586708068848, "step": 6014 }, { "epoch": 1.3859447004608296, "grad_norm": 0.9791582073391492, "learning_rate": 4.7297728842652116e-07, "loss": 0.7123303413391113, "step": 6015 }, { "epoch": 1.3861751152073734, "grad_norm": 1.1089770586845422, "learning_rate": 4.726535471186047e-07, "loss": 0.7548067569732666, "step": 6016 }, { "epoch": 1.386405529953917, "grad_norm": 1.205868893691031, "learning_rate": 4.723298823543277e-07, "loss": 0.7792191505432129, "step": 6017 }, { "epoch": 1.3866359447004608, "grad_norm": 1.313401532453458, "learning_rate": 4.7200629418066975e-07, "loss": 0.8658785820007324, "step": 6018 }, { "epoch": 1.3868663594470045, "grad_norm": 1.20345203638671, "learning_rate": 4.716827826445987e-07, "loss": 0.7173904776573181, "step": 6019 }, { "epoch": 1.3870967741935485, "grad_norm": 1.0016118220950732, "learning_rate": 4.7135934779307284e-07, "loss": 0.6675543785095215, "step": 6020 }, { "epoch": 1.3873271889400922, "grad_norm": 1.2559637316001069, "learning_rate": 4.710359896730378e-07, "loss": 0.8164724111557007, "step": 6021 }, { "epoch": 1.387557603686636, "grad_norm": 1.474439832240672, "learning_rate": 4.707127083314283e-07, "loss": 0.8354332447052002, "step": 6022 }, { "epoch": 1.3877880184331797, "grad_norm": 1.1544900465349175, "learning_rate": 4.7038950381516885e-07, "loss": 0.8414663672447205, "step": 6023 }, { "epoch": 1.3880184331797234, "grad_norm": 1.2150035811173532, "learning_rate": 4.700663761711717e-07, "loss": 0.7693418264389038, "step": 6024 }, { "epoch": 1.3882488479262673, "grad_norm": 1.0071958767588902, "learning_rate": 4.697433254463382e-07, "loss": 0.7809267044067383, "step": 6025 }, { "epoch": 1.388479262672811, "grad_norm": 1.203482571104156, "learning_rate": 4.6942035168755944e-07, "loss": 0.7455927133560181, "step": 6026 }, { "epoch": 1.3887096774193548, "grad_norm": 1.3018105004563159, "learning_rate": 4.6909745494171383e-07, "loss": 0.8217881917953491, "step": 6027 }, { "epoch": 1.3889400921658988, "grad_norm": 1.3723027057230852, "learning_rate": 4.687746352556703e-07, "loss": 0.8138882517814636, "step": 6028 }, { "epoch": 1.3891705069124423, "grad_norm": 1.241759909967513, "learning_rate": 4.6845189267628505e-07, "loss": 0.8926469087600708, "step": 6029 }, { "epoch": 1.3894009216589862, "grad_norm": 1.3027918343739477, "learning_rate": 4.681292272504036e-07, "loss": 0.797023355960846, "step": 6030 }, { "epoch": 1.38963133640553, "grad_norm": 0.8383796462842409, "learning_rate": 4.6780663902486104e-07, "loss": 0.6767498254776001, "step": 6031 }, { "epoch": 1.3898617511520737, "grad_norm": 1.2727364252127855, "learning_rate": 4.674841280464804e-07, "loss": 0.7514280080795288, "step": 6032 }, { "epoch": 1.3900921658986176, "grad_norm": 1.3853363805552346, "learning_rate": 4.671616943620731e-07, "loss": 0.8879726529121399, "step": 6033 }, { "epoch": 1.3903225806451613, "grad_norm": 0.8270134553121277, "learning_rate": 4.66839338018441e-07, "loss": 0.6674140095710754, "step": 6034 }, { "epoch": 1.390552995391705, "grad_norm": 1.078021820178179, "learning_rate": 4.6651705906237307e-07, "loss": 0.9094855785369873, "step": 6035 }, { "epoch": 1.3907834101382488, "grad_norm": 1.2561393182724931, "learning_rate": 4.661948575406478e-07, "loss": 0.8334506750106812, "step": 6036 }, { "epoch": 1.3910138248847925, "grad_norm": 1.040119500616202, "learning_rate": 4.658727335000323e-07, "loss": 0.6545997858047485, "step": 6037 }, { "epoch": 1.3912442396313365, "grad_norm": 1.1967093206075838, "learning_rate": 4.6555068698728237e-07, "loss": 0.7810590267181396, "step": 6038 }, { "epoch": 1.3914746543778802, "grad_norm": 1.0756703494881659, "learning_rate": 4.652287180491424e-07, "loss": 0.7581864595413208, "step": 6039 }, { "epoch": 1.391705069124424, "grad_norm": 1.2754594039466507, "learning_rate": 4.649068267323465e-07, "loss": 0.7134817242622375, "step": 6040 }, { "epoch": 1.3919354838709677, "grad_norm": 0.9730020123763279, "learning_rate": 4.645850130836162e-07, "loss": 0.7050445079803467, "step": 6041 }, { "epoch": 1.3921658986175114, "grad_norm": 1.146073776977597, "learning_rate": 4.642632771496622e-07, "loss": 0.8510535955429077, "step": 6042 }, { "epoch": 1.3923963133640553, "grad_norm": 1.3940656685053847, "learning_rate": 4.6394161897718454e-07, "loss": 0.8627035617828369, "step": 6043 }, { "epoch": 1.392626728110599, "grad_norm": 1.2671457951329919, "learning_rate": 4.6362003861287127e-07, "loss": 0.89891517162323, "step": 6044 }, { "epoch": 1.3928571428571428, "grad_norm": 1.3215265337916509, "learning_rate": 4.6329853610339896e-07, "loss": 0.7267141342163086, "step": 6045 }, { "epoch": 1.3930875576036867, "grad_norm": 1.4814794045534565, "learning_rate": 4.6297711149543405e-07, "loss": 0.8021189570426941, "step": 6046 }, { "epoch": 1.3933179723502305, "grad_norm": 1.0954918085269951, "learning_rate": 4.6265576483563054e-07, "loss": 0.7836861610412598, "step": 6047 }, { "epoch": 1.3935483870967742, "grad_norm": 1.1158269152355589, "learning_rate": 4.623344961706309e-07, "loss": 0.816940188407898, "step": 6048 }, { "epoch": 1.393778801843318, "grad_norm": 1.4383712223724088, "learning_rate": 4.6201330554706773e-07, "loss": 0.77923583984375, "step": 6049 }, { "epoch": 1.3940092165898617, "grad_norm": 1.3116759273395542, "learning_rate": 4.6169219301156117e-07, "loss": 0.8017981052398682, "step": 6050 }, { "epoch": 1.3942396313364056, "grad_norm": 0.9886522563222937, "learning_rate": 4.6137115861071973e-07, "loss": 0.6786847114562988, "step": 6051 }, { "epoch": 1.3944700460829493, "grad_norm": 1.1651814302030006, "learning_rate": 4.61050202391142e-07, "loss": 0.7802412509918213, "step": 6052 }, { "epoch": 1.394700460829493, "grad_norm": 1.1955845105043188, "learning_rate": 4.6072932439941347e-07, "loss": 0.7434886693954468, "step": 6053 }, { "epoch": 1.3949308755760368, "grad_norm": 1.2231160523968054, "learning_rate": 4.6040852468211e-07, "loss": 0.7590811252593994, "step": 6054 }, { "epoch": 1.3951612903225805, "grad_norm": 1.5534904257800726, "learning_rate": 4.600878032857949e-07, "loss": 0.8952670097351074, "step": 6055 }, { "epoch": 1.3953917050691245, "grad_norm": 1.1221688640413483, "learning_rate": 4.5976716025702036e-07, "loss": 0.8055328130722046, "step": 6056 }, { "epoch": 1.3956221198156682, "grad_norm": 1.2064570897657243, "learning_rate": 4.5944659564232725e-07, "loss": 0.8919316530227661, "step": 6057 }, { "epoch": 1.395852534562212, "grad_norm": 1.1074605434156857, "learning_rate": 4.591261094882453e-07, "loss": 0.701945960521698, "step": 6058 }, { "epoch": 1.3960829493087559, "grad_norm": 1.1766452414586335, "learning_rate": 4.5880570184129206e-07, "loss": 0.7457436323165894, "step": 6059 }, { "epoch": 1.3963133640552996, "grad_norm": 1.193782401804385, "learning_rate": 4.5848537274797527e-07, "loss": 0.8093513250350952, "step": 6060 }, { "epoch": 1.3965437788018433, "grad_norm": 1.5454221039375025, "learning_rate": 4.5816512225478965e-07, "loss": 0.7098822593688965, "step": 6061 }, { "epoch": 1.396774193548387, "grad_norm": 1.2339994165792372, "learning_rate": 4.578449504082189e-07, "loss": 0.7423167824745178, "step": 6062 }, { "epoch": 1.3970046082949308, "grad_norm": 1.1302042774482615, "learning_rate": 4.5752485725473624e-07, "loss": 0.8730076551437378, "step": 6063 }, { "epoch": 1.3972350230414747, "grad_norm": 1.124374396794659, "learning_rate": 4.572048428408024e-07, "loss": 0.6914420127868652, "step": 6064 }, { "epoch": 1.3974654377880185, "grad_norm": 1.3148006815381303, "learning_rate": 4.5688490721286664e-07, "loss": 0.8051402568817139, "step": 6065 }, { "epoch": 1.3976958525345622, "grad_norm": 1.548390651351193, "learning_rate": 4.5656505041736803e-07, "loss": 0.9185452461242676, "step": 6066 }, { "epoch": 1.397926267281106, "grad_norm": 1.1772485518113056, "learning_rate": 4.5624527250073287e-07, "loss": 0.766645073890686, "step": 6067 }, { "epoch": 1.3981566820276496, "grad_norm": 1.3246112666718692, "learning_rate": 4.559255735093763e-07, "loss": 0.8005224466323853, "step": 6068 }, { "epoch": 1.3983870967741936, "grad_norm": 1.2624209909197728, "learning_rate": 4.5560595348970275e-07, "loss": 0.8072810173034668, "step": 6069 }, { "epoch": 1.3986175115207373, "grad_norm": 1.2197415999956105, "learning_rate": 4.552864124881045e-07, "loss": 0.7537474632263184, "step": 6070 }, { "epoch": 1.398847926267281, "grad_norm": 1.3524984308216321, "learning_rate": 4.549669505509619e-07, "loss": 0.8396750092506409, "step": 6071 }, { "epoch": 1.399078341013825, "grad_norm": 1.3095033527266953, "learning_rate": 4.546475677246453e-07, "loss": 0.8456804752349854, "step": 6072 }, { "epoch": 1.3993087557603687, "grad_norm": 1.212970447769736, "learning_rate": 4.543282640555123e-07, "loss": 0.6150076389312744, "step": 6073 }, { "epoch": 1.3995391705069125, "grad_norm": 1.1345047277741707, "learning_rate": 4.540090395899089e-07, "loss": 0.667172908782959, "step": 6074 }, { "epoch": 1.3997695852534562, "grad_norm": 1.1269214154073468, "learning_rate": 4.5368989437417116e-07, "loss": 0.7918317914009094, "step": 6075 }, { "epoch": 1.4, "grad_norm": 1.070411671989194, "learning_rate": 4.5337082845462193e-07, "loss": 0.6800580024719238, "step": 6076 }, { "epoch": 1.4002304147465439, "grad_norm": 1.3908779413221009, "learning_rate": 4.530518418775733e-07, "loss": 0.9205034971237183, "step": 6077 }, { "epoch": 1.4004608294930876, "grad_norm": 0.9376373503434607, "learning_rate": 4.5273293468932585e-07, "loss": 0.7228822708129883, "step": 6078 }, { "epoch": 1.4006912442396313, "grad_norm": 1.0019153673681407, "learning_rate": 4.524141069361679e-07, "loss": 0.6827987432479858, "step": 6079 }, { "epoch": 1.400921658986175, "grad_norm": 1.086076018779761, "learning_rate": 4.520953586643779e-07, "loss": 0.6272581815719604, "step": 6080 }, { "epoch": 1.4011520737327188, "grad_norm": 1.1153873233388363, "learning_rate": 4.5177668992022125e-07, "loss": 0.8041881322860718, "step": 6081 }, { "epoch": 1.4013824884792627, "grad_norm": 0.986104576594979, "learning_rate": 4.5145810074995194e-07, "loss": 0.7284958362579346, "step": 6082 }, { "epoch": 1.4016129032258065, "grad_norm": 1.227152604501521, "learning_rate": 4.511395911998135e-07, "loss": 0.7653781175613403, "step": 6083 }, { "epoch": 1.4018433179723502, "grad_norm": 1.0466936448387898, "learning_rate": 4.5082116131603677e-07, "loss": 0.8037170171737671, "step": 6084 }, { "epoch": 1.4020737327188941, "grad_norm": 1.1911735797842866, "learning_rate": 4.505028111448411e-07, "loss": 0.783043384552002, "step": 6085 }, { "epoch": 1.4023041474654379, "grad_norm": 1.0547410930732963, "learning_rate": 4.501845407324354e-07, "loss": 0.6712161302566528, "step": 6086 }, { "epoch": 1.4025345622119816, "grad_norm": 1.6406574524985842, "learning_rate": 4.4986635012501575e-07, "loss": 0.9537261724472046, "step": 6087 }, { "epoch": 1.4027649769585253, "grad_norm": 1.4091085059994304, "learning_rate": 4.495482393687666e-07, "loss": 0.8984304666519165, "step": 6088 }, { "epoch": 1.402995391705069, "grad_norm": 1.0430973660752654, "learning_rate": 4.4923020850986224e-07, "loss": 0.6894555687904358, "step": 6089 }, { "epoch": 1.403225806451613, "grad_norm": 1.1542541609725157, "learning_rate": 4.489122575944639e-07, "loss": 0.685502290725708, "step": 6090 }, { "epoch": 1.4034562211981567, "grad_norm": 1.1082950627991512, "learning_rate": 4.485943866687216e-07, "loss": 0.6794239282608032, "step": 6091 }, { "epoch": 1.4036866359447004, "grad_norm": 1.0717636346133315, "learning_rate": 4.482765957787744e-07, "loss": 0.7647888660430908, "step": 6092 }, { "epoch": 1.4039170506912442, "grad_norm": 1.3476206179513355, "learning_rate": 4.4795888497074896e-07, "loss": 0.798794150352478, "step": 6093 }, { "epoch": 1.404147465437788, "grad_norm": 1.0358789181259667, "learning_rate": 4.4764125429076026e-07, "loss": 0.79430091381073, "step": 6094 }, { "epoch": 1.4043778801843319, "grad_norm": 1.4040182367122596, "learning_rate": 4.4732370378491255e-07, "loss": 0.9089795351028442, "step": 6095 }, { "epoch": 1.4046082949308756, "grad_norm": 0.9307801992196251, "learning_rate": 4.4700623349929757e-07, "loss": 0.8270718455314636, "step": 6096 }, { "epoch": 1.4048387096774193, "grad_norm": 1.082228260794844, "learning_rate": 4.466888434799958e-07, "loss": 0.7550361156463623, "step": 6097 }, { "epoch": 1.4050691244239633, "grad_norm": 1.15557625190535, "learning_rate": 4.463715337730759e-07, "loss": 0.7406442165374756, "step": 6098 }, { "epoch": 1.405299539170507, "grad_norm": 1.4065045960279658, "learning_rate": 4.460543044245949e-07, "loss": 0.830552875995636, "step": 6099 }, { "epoch": 1.4055299539170507, "grad_norm": 1.4160409051991987, "learning_rate": 4.45737155480598e-07, "loss": 0.8961822390556335, "step": 6100 }, { "epoch": 1.4057603686635944, "grad_norm": 1.2630678724710616, "learning_rate": 4.454200869871195e-07, "loss": 0.6307489275932312, "step": 6101 }, { "epoch": 1.4059907834101382, "grad_norm": 1.437795392364305, "learning_rate": 4.451030989901808e-07, "loss": 0.8682084083557129, "step": 6102 }, { "epoch": 1.4062211981566821, "grad_norm": 1.1897592960029226, "learning_rate": 4.4478619153579323e-07, "loss": 0.7157681584358215, "step": 6103 }, { "epoch": 1.4064516129032258, "grad_norm": 1.196767224907471, "learning_rate": 4.4446936466995486e-07, "loss": 0.7267071008682251, "step": 6104 }, { "epoch": 1.4066820276497696, "grad_norm": 1.1191501401801882, "learning_rate": 4.4415261843865246e-07, "loss": 0.8435063362121582, "step": 6105 }, { "epoch": 1.4069124423963133, "grad_norm": 1.2220260712556485, "learning_rate": 4.43835952887862e-07, "loss": 0.8895175457000732, "step": 6106 }, { "epoch": 1.407142857142857, "grad_norm": 1.0150052474935476, "learning_rate": 4.435193680635467e-07, "loss": 0.7470073699951172, "step": 6107 }, { "epoch": 1.407373271889401, "grad_norm": 1.376675993117338, "learning_rate": 4.432028640116581e-07, "loss": 0.7993630170822144, "step": 6108 }, { "epoch": 1.4076036866359447, "grad_norm": 1.2675455750766673, "learning_rate": 4.4288644077813695e-07, "loss": 0.823069155216217, "step": 6109 }, { "epoch": 1.4078341013824884, "grad_norm": 1.374585518914166, "learning_rate": 4.4257009840891146e-07, "loss": 0.7665367126464844, "step": 6110 }, { "epoch": 1.4080645161290324, "grad_norm": 1.1174810423449963, "learning_rate": 4.422538369498979e-07, "loss": 0.7173991799354553, "step": 6111 }, { "epoch": 1.4082949308755761, "grad_norm": 0.9476955630635919, "learning_rate": 4.4193765644700186e-07, "loss": 0.8288347125053406, "step": 6112 }, { "epoch": 1.4085253456221198, "grad_norm": 1.206088367901853, "learning_rate": 4.4162155694611636e-07, "loss": 0.8589911460876465, "step": 6113 }, { "epoch": 1.4087557603686636, "grad_norm": 1.2884473987369411, "learning_rate": 4.4130553849312213e-07, "loss": 0.8783868551254272, "step": 6114 }, { "epoch": 1.4089861751152073, "grad_norm": 1.0994332560949611, "learning_rate": 4.409896011338898e-07, "loss": 0.7625287771224976, "step": 6115 }, { "epoch": 1.4092165898617512, "grad_norm": 1.1571434855502665, "learning_rate": 4.406737449142769e-07, "loss": 0.7412571907043457, "step": 6116 }, { "epoch": 1.409447004608295, "grad_norm": 0.9525276096114424, "learning_rate": 4.4035796988012943e-07, "loss": 0.6248455047607422, "step": 6117 }, { "epoch": 1.4096774193548387, "grad_norm": 1.1843810443395109, "learning_rate": 4.400422760772817e-07, "loss": 0.7970919609069824, "step": 6118 }, { "epoch": 1.4099078341013824, "grad_norm": 1.0403384039115238, "learning_rate": 4.397266635515563e-07, "loss": 0.6184223294258118, "step": 6119 }, { "epoch": 1.4101382488479262, "grad_norm": 1.07818776364935, "learning_rate": 4.394111323487637e-07, "loss": 0.9014843702316284, "step": 6120 }, { "epoch": 1.41036866359447, "grad_norm": 1.1660248005288976, "learning_rate": 4.390956825147034e-07, "loss": 0.8468939661979675, "step": 6121 }, { "epoch": 1.4105990783410138, "grad_norm": 1.0810631729189881, "learning_rate": 4.3878031409516234e-07, "loss": 0.7832604646682739, "step": 6122 }, { "epoch": 1.4108294930875576, "grad_norm": 1.0700225295832282, "learning_rate": 4.3846502713591527e-07, "loss": 0.7202898263931274, "step": 6123 }, { "epoch": 1.4110599078341015, "grad_norm": 1.1788285042234896, "learning_rate": 4.3814982168272664e-07, "loss": 0.6785540580749512, "step": 6124 }, { "epoch": 1.4112903225806452, "grad_norm": 1.3040233352486812, "learning_rate": 4.378346977813474e-07, "loss": 0.795532763004303, "step": 6125 }, { "epoch": 1.411520737327189, "grad_norm": 0.8875056644654742, "learning_rate": 4.3751965547751735e-07, "loss": 0.7715259790420532, "step": 6126 }, { "epoch": 1.4117511520737327, "grad_norm": 1.4252318364105403, "learning_rate": 4.37204694816965e-07, "loss": 0.8657132983207703, "step": 6127 }, { "epoch": 1.4119815668202764, "grad_norm": 1.0207817658354317, "learning_rate": 4.3688981584540586e-07, "loss": 0.7253363132476807, "step": 6128 }, { "epoch": 1.4122119815668204, "grad_norm": 1.2055823367063212, "learning_rate": 4.365750186085447e-07, "loss": 0.8511998653411865, "step": 6129 }, { "epoch": 1.412442396313364, "grad_norm": 1.3256931814656627, "learning_rate": 4.3626030315207386e-07, "loss": 0.7936528921127319, "step": 6130 }, { "epoch": 1.4126728110599078, "grad_norm": 1.1878967804503957, "learning_rate": 4.3594566952167324e-07, "loss": 0.758521556854248, "step": 6131 }, { "epoch": 1.4129032258064516, "grad_norm": 1.242405288398936, "learning_rate": 4.3563111776301243e-07, "loss": 0.8202048540115356, "step": 6132 }, { "epoch": 1.4131336405529953, "grad_norm": 1.075213759854547, "learning_rate": 4.3531664792174773e-07, "loss": 0.7864067554473877, "step": 6133 }, { "epoch": 1.4133640552995392, "grad_norm": 1.472991105564755, "learning_rate": 4.350022600435236e-07, "loss": 0.8051233291625977, "step": 6134 }, { "epoch": 1.413594470046083, "grad_norm": 1.0811225554895896, "learning_rate": 4.34687954173974e-07, "loss": 0.7617348432540894, "step": 6135 }, { "epoch": 1.4138248847926267, "grad_norm": 1.299621377240526, "learning_rate": 4.3437373035871927e-07, "loss": 0.7899652719497681, "step": 6136 }, { "epoch": 1.4140552995391706, "grad_norm": 1.1704157180732915, "learning_rate": 4.340595886433689e-07, "loss": 0.8467222452163696, "step": 6137 }, { "epoch": 1.4142857142857144, "grad_norm": 1.294364382858993, "learning_rate": 4.3374552907352003e-07, "loss": 0.8451426029205322, "step": 6138 }, { "epoch": 1.414516129032258, "grad_norm": 1.1053072195052795, "learning_rate": 4.3343155169475797e-07, "loss": 0.7140414714813232, "step": 6139 }, { "epoch": 1.4147465437788018, "grad_norm": 1.365344165744123, "learning_rate": 4.331176565526558e-07, "loss": 0.7680803537368774, "step": 6140 }, { "epoch": 1.4149769585253456, "grad_norm": 1.0970331390876962, "learning_rate": 4.328038436927757e-07, "loss": 0.7262120246887207, "step": 6141 }, { "epoch": 1.4152073732718895, "grad_norm": 1.2176292189863585, "learning_rate": 4.3249011316066676e-07, "loss": 0.7788687944412231, "step": 6142 }, { "epoch": 1.4154377880184332, "grad_norm": 1.4880584379115793, "learning_rate": 4.321764650018662e-07, "loss": 0.7613503336906433, "step": 6143 }, { "epoch": 1.415668202764977, "grad_norm": 0.9554644370778598, "learning_rate": 4.3186289926190056e-07, "loss": 0.6778309345245361, "step": 6144 }, { "epoch": 1.4158986175115207, "grad_norm": 1.5159867718873894, "learning_rate": 4.315494159862829e-07, "loss": 0.8626673221588135, "step": 6145 }, { "epoch": 1.4161290322580644, "grad_norm": 1.194727935560369, "learning_rate": 4.312360152205147e-07, "loss": 0.8321051597595215, "step": 6146 }, { "epoch": 1.4163594470046084, "grad_norm": 1.146293428483721, "learning_rate": 4.309226970100861e-07, "loss": 0.9317119717597961, "step": 6147 }, { "epoch": 1.416589861751152, "grad_norm": 1.4669878139895565, "learning_rate": 4.306094614004748e-07, "loss": 0.9479870200157166, "step": 6148 }, { "epoch": 1.4168202764976958, "grad_norm": 1.0166991353273056, "learning_rate": 4.3029630843714606e-07, "loss": 0.8222699165344238, "step": 6149 }, { "epoch": 1.4170506912442398, "grad_norm": 1.427356205375722, "learning_rate": 4.2998323816555427e-07, "loss": 0.8232519030570984, "step": 6150 }, { "epoch": 1.4172811059907833, "grad_norm": 1.156719588287236, "learning_rate": 4.2967025063114057e-07, "loss": 0.7423735857009888, "step": 6151 }, { "epoch": 1.4175115207373272, "grad_norm": 1.1009896479281802, "learning_rate": 4.2935734587933527e-07, "loss": 0.6947557926177979, "step": 6152 }, { "epoch": 1.417741935483871, "grad_norm": 1.2980025668504918, "learning_rate": 4.290445239555558e-07, "loss": 0.789128303527832, "step": 6153 }, { "epoch": 1.4179723502304147, "grad_norm": 1.344185599290992, "learning_rate": 4.2873178490520745e-07, "loss": 0.8025885820388794, "step": 6154 }, { "epoch": 1.4182027649769586, "grad_norm": 1.3491619317054568, "learning_rate": 4.284191287736847e-07, "loss": 0.8139045238494873, "step": 6155 }, { "epoch": 1.4184331797235024, "grad_norm": 1.1246209635446252, "learning_rate": 4.2810655560636864e-07, "loss": 0.8154167532920837, "step": 6156 }, { "epoch": 1.418663594470046, "grad_norm": 1.0954033524128675, "learning_rate": 4.2779406544862896e-07, "loss": 0.6383910775184631, "step": 6157 }, { "epoch": 1.4188940092165898, "grad_norm": 1.217902628448707, "learning_rate": 4.2748165834582316e-07, "loss": 0.7008179426193237, "step": 6158 }, { "epoch": 1.4191244239631335, "grad_norm": 1.2584275851601723, "learning_rate": 4.2716933434329684e-07, "loss": 0.9458012580871582, "step": 6159 }, { "epoch": 1.4193548387096775, "grad_norm": 1.1170402428175406, "learning_rate": 4.268570934863829e-07, "loss": 0.7354133725166321, "step": 6160 }, { "epoch": 1.4195852534562212, "grad_norm": 1.050503834766047, "learning_rate": 4.265449358204034e-07, "loss": 0.7146268486976624, "step": 6161 }, { "epoch": 1.419815668202765, "grad_norm": 1.3602740783757037, "learning_rate": 4.262328613906674e-07, "loss": 0.7357315421104431, "step": 6162 }, { "epoch": 1.4200460829493087, "grad_norm": 1.5139772991772644, "learning_rate": 4.2592087024247157e-07, "loss": 0.8006314039230347, "step": 6163 }, { "epoch": 1.4202764976958524, "grad_norm": 1.2194249079603743, "learning_rate": 4.256089624211018e-07, "loss": 0.8299369812011719, "step": 6164 }, { "epoch": 1.4205069124423964, "grad_norm": 1.3878054713959478, "learning_rate": 4.252971379718308e-07, "loss": 0.7018890380859375, "step": 6165 }, { "epoch": 1.42073732718894, "grad_norm": 1.0332854509364862, "learning_rate": 4.24985396939919e-07, "loss": 0.6501315236091614, "step": 6166 }, { "epoch": 1.4209677419354838, "grad_norm": 1.6385767983913562, "learning_rate": 4.24673739370616e-07, "loss": 0.8379749059677124, "step": 6167 }, { "epoch": 1.4211981566820278, "grad_norm": 1.3590615179836698, "learning_rate": 4.24362165309158e-07, "loss": 0.7996747493743896, "step": 6168 }, { "epoch": 1.4214285714285715, "grad_norm": 1.2270246479776195, "learning_rate": 4.240506748007695e-07, "loss": 0.7258181571960449, "step": 6169 }, { "epoch": 1.4216589861751152, "grad_norm": 0.9997463365032918, "learning_rate": 4.237392678906633e-07, "loss": 0.6035803556442261, "step": 6170 }, { "epoch": 1.421889400921659, "grad_norm": 1.1041316785012205, "learning_rate": 4.2342794462403954e-07, "loss": 0.7668799757957458, "step": 6171 }, { "epoch": 1.4221198156682027, "grad_norm": 0.9385556238542058, "learning_rate": 4.23116705046086e-07, "loss": 0.7816733121871948, "step": 6172 }, { "epoch": 1.4223502304147466, "grad_norm": 1.2003519134278278, "learning_rate": 4.228055492019793e-07, "loss": 0.8753983974456787, "step": 6173 }, { "epoch": 1.4225806451612903, "grad_norm": 1.1591394093837553, "learning_rate": 4.224944771368831e-07, "loss": 0.8319464921951294, "step": 6174 }, { "epoch": 1.422811059907834, "grad_norm": 1.1444278460686073, "learning_rate": 4.2218348889594866e-07, "loss": 0.6670328378677368, "step": 6175 }, { "epoch": 1.4230414746543778, "grad_norm": 0.9949133230999909, "learning_rate": 4.218725845243163e-07, "loss": 0.7879645824432373, "step": 6176 }, { "epoch": 1.4232718894009215, "grad_norm": 1.1897456513351008, "learning_rate": 4.2156176406711287e-07, "loss": 0.709680438041687, "step": 6177 }, { "epoch": 1.4235023041474655, "grad_norm": 1.2454467445687987, "learning_rate": 4.2125102756945364e-07, "loss": 0.7990894317626953, "step": 6178 }, { "epoch": 1.4237327188940092, "grad_norm": 0.899401568311558, "learning_rate": 4.2094037507644165e-07, "loss": 0.7283308506011963, "step": 6179 }, { "epoch": 1.423963133640553, "grad_norm": 1.1017464258775596, "learning_rate": 4.2062980663316715e-07, "loss": 0.8763309717178345, "step": 6180 }, { "epoch": 1.4241935483870969, "grad_norm": 1.5313476968397717, "learning_rate": 4.2031932228470966e-07, "loss": 0.9370014667510986, "step": 6181 }, { "epoch": 1.4244239631336406, "grad_norm": 1.2317913481286529, "learning_rate": 4.2000892207613526e-07, "loss": 0.7883036136627197, "step": 6182 }, { "epoch": 1.4246543778801843, "grad_norm": 1.0986212570485994, "learning_rate": 4.196986060524975e-07, "loss": 0.7021682262420654, "step": 6183 }, { "epoch": 1.424884792626728, "grad_norm": 1.6809928588875014, "learning_rate": 4.193883742588393e-07, "loss": 0.842636227607727, "step": 6184 }, { "epoch": 1.4251152073732718, "grad_norm": 1.3804520546599122, "learning_rate": 4.190782267401899e-07, "loss": 0.8003957867622375, "step": 6185 }, { "epoch": 1.4253456221198157, "grad_norm": 1.4234115388616575, "learning_rate": 4.1876816354156655e-07, "loss": 0.9799495935440063, "step": 6186 }, { "epoch": 1.4255760368663595, "grad_norm": 1.4430834747300494, "learning_rate": 4.184581847079751e-07, "loss": 0.8726102113723755, "step": 6187 }, { "epoch": 1.4258064516129032, "grad_norm": 1.4779961873749974, "learning_rate": 4.181482902844082e-07, "loss": 0.8771729469299316, "step": 6188 }, { "epoch": 1.426036866359447, "grad_norm": 0.932904262005563, "learning_rate": 4.1783848031584644e-07, "loss": 0.5891281962394714, "step": 6189 }, { "epoch": 1.4262672811059907, "grad_norm": 1.0356433358815755, "learning_rate": 4.1752875484725904e-07, "loss": 0.8133054971694946, "step": 6190 }, { "epoch": 1.4264976958525346, "grad_norm": 1.2051464792634443, "learning_rate": 4.1721911392360164e-07, "loss": 0.7175684571266174, "step": 6191 }, { "epoch": 1.4267281105990783, "grad_norm": 1.2483759508518841, "learning_rate": 4.16909557589818e-07, "loss": 0.7112927436828613, "step": 6192 }, { "epoch": 1.426958525345622, "grad_norm": 1.3756845434805187, "learning_rate": 4.166000858908406e-07, "loss": 0.8564406037330627, "step": 6193 }, { "epoch": 1.427188940092166, "grad_norm": 1.2070686503198162, "learning_rate": 4.162906988715883e-07, "loss": 0.7630729675292969, "step": 6194 }, { "epoch": 1.4274193548387097, "grad_norm": 0.971140934311516, "learning_rate": 4.1598139657696806e-07, "loss": 0.6810768246650696, "step": 6195 }, { "epoch": 1.4276497695852535, "grad_norm": 0.9185719080310675, "learning_rate": 4.1567217905187535e-07, "loss": 0.8482312560081482, "step": 6196 }, { "epoch": 1.4278801843317972, "grad_norm": 1.4356078879259653, "learning_rate": 4.1536304634119225e-07, "loss": 0.845355749130249, "step": 6197 }, { "epoch": 1.428110599078341, "grad_norm": 1.3990653285356356, "learning_rate": 4.1505399848978896e-07, "loss": 0.8082824349403381, "step": 6198 }, { "epoch": 1.4283410138248849, "grad_norm": 1.5497395393382225, "learning_rate": 4.147450355425235e-07, "loss": 0.8141404390335083, "step": 6199 }, { "epoch": 1.4285714285714286, "grad_norm": 1.0209015709753073, "learning_rate": 4.14436157544241e-07, "loss": 0.8144549131393433, "step": 6200 }, { "epoch": 1.4288018433179723, "grad_norm": 1.2316152605954584, "learning_rate": 4.141273645397754e-07, "loss": 0.6554359793663025, "step": 6201 }, { "epoch": 1.429032258064516, "grad_norm": 1.2095729612520494, "learning_rate": 4.138186565739472e-07, "loss": 0.8035449981689453, "step": 6202 }, { "epoch": 1.4292626728110598, "grad_norm": 1.348688453980758, "learning_rate": 4.1351003369156467e-07, "loss": 0.7848105430603027, "step": 6203 }, { "epoch": 1.4294930875576037, "grad_norm": 1.167048125389705, "learning_rate": 4.132014959374246e-07, "loss": 0.7064214944839478, "step": 6204 }, { "epoch": 1.4297235023041475, "grad_norm": 1.236002479887974, "learning_rate": 4.128930433563107e-07, "loss": 0.7636318802833557, "step": 6205 }, { "epoch": 1.4299539170506912, "grad_norm": 1.2440935326289273, "learning_rate": 4.1258467599299395e-07, "loss": 0.6839499473571777, "step": 6206 }, { "epoch": 1.4301843317972351, "grad_norm": 1.1802386777878584, "learning_rate": 4.122763938922341e-07, "loss": 0.8355294466018677, "step": 6207 }, { "epoch": 1.4304147465437789, "grad_norm": 1.1238131581281627, "learning_rate": 4.1196819709877773e-07, "loss": 0.7563334107398987, "step": 6208 }, { "epoch": 1.4306451612903226, "grad_norm": 1.1336601077663977, "learning_rate": 4.116600856573588e-07, "loss": 0.6991991996765137, "step": 6209 }, { "epoch": 1.4308755760368663, "grad_norm": 1.2669311049959366, "learning_rate": 4.113520596126998e-07, "loss": 0.7249872088432312, "step": 6210 }, { "epoch": 1.43110599078341, "grad_norm": 0.9386622429459606, "learning_rate": 4.110441190095101e-07, "loss": 0.6570736169815063, "step": 6211 }, { "epoch": 1.431336405529954, "grad_norm": 1.0652944602016763, "learning_rate": 4.107362638924865e-07, "loss": 0.7137724161148071, "step": 6212 }, { "epoch": 1.4315668202764977, "grad_norm": 1.1571956532799377, "learning_rate": 4.1042849430631453e-07, "loss": 0.7620561122894287, "step": 6213 }, { "epoch": 1.4317972350230415, "grad_norm": 1.118516282963539, "learning_rate": 4.1012081029566616e-07, "loss": 0.8186367750167847, "step": 6214 }, { "epoch": 1.4320276497695852, "grad_norm": 1.2414517851095686, "learning_rate": 4.098132119052008e-07, "loss": 0.8068171739578247, "step": 6215 }, { "epoch": 1.432258064516129, "grad_norm": 1.3160335320341774, "learning_rate": 4.095056991795668e-07, "loss": 0.8640002012252808, "step": 6216 }, { "epoch": 1.4324884792626729, "grad_norm": 1.4376158954775202, "learning_rate": 4.0919827216339887e-07, "loss": 0.8886386156082153, "step": 6217 }, { "epoch": 1.4327188940092166, "grad_norm": 1.072787779438559, "learning_rate": 4.0889093090131965e-07, "loss": 0.6853137016296387, "step": 6218 }, { "epoch": 1.4329493087557603, "grad_norm": 1.0751813749856631, "learning_rate": 4.0858367543793923e-07, "loss": 0.7423670291900635, "step": 6219 }, { "epoch": 1.4331797235023043, "grad_norm": 1.2596005033506457, "learning_rate": 4.0827650581785544e-07, "loss": 0.7969200611114502, "step": 6220 }, { "epoch": 1.433410138248848, "grad_norm": 1.1441853902577663, "learning_rate": 4.079694220856531e-07, "loss": 0.8506221771240234, "step": 6221 }, { "epoch": 1.4336405529953917, "grad_norm": 1.107985966829949, "learning_rate": 4.076624242859058e-07, "loss": 0.6755083799362183, "step": 6222 }, { "epoch": 1.4338709677419355, "grad_norm": 1.0751582832116895, "learning_rate": 4.0735551246317333e-07, "loss": 0.7734944820404053, "step": 6223 }, { "epoch": 1.4341013824884792, "grad_norm": 1.1828392807290495, "learning_rate": 4.0704868666200345e-07, "loss": 0.8564216494560242, "step": 6224 }, { "epoch": 1.4343317972350231, "grad_norm": 0.8521811929477493, "learning_rate": 4.067419469269321e-07, "loss": 0.6858065128326416, "step": 6225 }, { "epoch": 1.4345622119815669, "grad_norm": 1.4454169020848073, "learning_rate": 4.064352933024813e-07, "loss": 0.684749960899353, "step": 6226 }, { "epoch": 1.4347926267281106, "grad_norm": 1.0124943930771644, "learning_rate": 4.061287258331624e-07, "loss": 0.7648766040802002, "step": 6227 }, { "epoch": 1.4350230414746543, "grad_norm": 1.2226521022766697, "learning_rate": 4.058222445634727e-07, "loss": 0.924850583076477, "step": 6228 }, { "epoch": 1.435253456221198, "grad_norm": 1.2841804739911125, "learning_rate": 4.055158495378972e-07, "loss": 0.906406581401825, "step": 6229 }, { "epoch": 1.435483870967742, "grad_norm": 1.1497462597145154, "learning_rate": 4.052095408009095e-07, "loss": 0.9169156551361084, "step": 6230 }, { "epoch": 1.4357142857142857, "grad_norm": 0.9291011874506654, "learning_rate": 4.0490331839696967e-07, "loss": 0.7367587685585022, "step": 6231 }, { "epoch": 1.4359447004608294, "grad_norm": 0.9837392218179005, "learning_rate": 4.045971823705249e-07, "loss": 0.7608749270439148, "step": 6232 }, { "epoch": 1.4361751152073734, "grad_norm": 1.006459600101246, "learning_rate": 4.0429113276601134e-07, "loss": 0.7008038759231567, "step": 6233 }, { "epoch": 1.4364055299539171, "grad_norm": 1.3644950830796674, "learning_rate": 4.039851696278511e-07, "loss": 0.8581372499465942, "step": 6234 }, { "epoch": 1.4366359447004609, "grad_norm": 1.1117269621825037, "learning_rate": 4.036792930004542e-07, "loss": 0.6602354049682617, "step": 6235 }, { "epoch": 1.4368663594470046, "grad_norm": 1.1136625894629528, "learning_rate": 4.0337350292821893e-07, "loss": 0.8560018539428711, "step": 6236 }, { "epoch": 1.4370967741935483, "grad_norm": 1.5699670277885023, "learning_rate": 4.030677994555298e-07, "loss": 0.8837640285491943, "step": 6237 }, { "epoch": 1.4373271889400923, "grad_norm": 1.1788518631283098, "learning_rate": 4.027621826267593e-07, "loss": 0.8214797973632812, "step": 6238 }, { "epoch": 1.437557603686636, "grad_norm": 1.091488147712342, "learning_rate": 4.024566524862675e-07, "loss": 0.7590944766998291, "step": 6239 }, { "epoch": 1.4377880184331797, "grad_norm": 1.5224250495012106, "learning_rate": 4.021512090784014e-07, "loss": 0.8792011141777039, "step": 6240 }, { "epoch": 1.4380184331797234, "grad_norm": 0.9801567843215049, "learning_rate": 4.0184585244749556e-07, "loss": 0.8309401273727417, "step": 6241 }, { "epoch": 1.4382488479262672, "grad_norm": 1.2518924977337436, "learning_rate": 4.015405826378727e-07, "loss": 0.7474797964096069, "step": 6242 }, { "epoch": 1.4384792626728111, "grad_norm": 1.0203221096159534, "learning_rate": 4.012353996938421e-07, "loss": 0.7376091480255127, "step": 6243 }, { "epoch": 1.4387096774193548, "grad_norm": 1.4049798692682764, "learning_rate": 4.0093030365970014e-07, "loss": 0.7809054851531982, "step": 6244 }, { "epoch": 1.4389400921658986, "grad_norm": 1.206100995388555, "learning_rate": 4.0062529457973194e-07, "loss": 0.8551669120788574, "step": 6245 }, { "epoch": 1.4391705069124425, "grad_norm": 1.3285364918408127, "learning_rate": 4.0032037249820874e-07, "loss": 0.7874705791473389, "step": 6246 }, { "epoch": 1.4394009216589863, "grad_norm": 1.220500481419073, "learning_rate": 4.0001553745938923e-07, "loss": 0.8032190799713135, "step": 6247 }, { "epoch": 1.43963133640553, "grad_norm": 1.1833761956090303, "learning_rate": 3.9971078950752057e-07, "loss": 0.7600107192993164, "step": 6248 }, { "epoch": 1.4398617511520737, "grad_norm": 1.0770488794400255, "learning_rate": 3.994061286868361e-07, "loss": 0.7738933563232422, "step": 6249 }, { "epoch": 1.4400921658986174, "grad_norm": 1.2036013798832181, "learning_rate": 3.9910155504155665e-07, "loss": 0.701007604598999, "step": 6250 }, { "epoch": 1.4403225806451614, "grad_norm": 1.2067244620095277, "learning_rate": 3.9879706861589126e-07, "loss": 0.8962818384170532, "step": 6251 }, { "epoch": 1.4405529953917051, "grad_norm": 1.4532648423769148, "learning_rate": 3.9849266945403513e-07, "loss": 0.7636146545410156, "step": 6252 }, { "epoch": 1.4407834101382488, "grad_norm": 1.4158432417231142, "learning_rate": 3.981883576001722e-07, "loss": 0.8816943168640137, "step": 6253 }, { "epoch": 1.4410138248847926, "grad_norm": 1.2321816109724755, "learning_rate": 3.978841330984725e-07, "loss": 0.7252858877182007, "step": 6254 }, { "epoch": 1.4412442396313363, "grad_norm": 1.1568327683598156, "learning_rate": 3.975799959930932e-07, "loss": 0.6720175743103027, "step": 6255 }, { "epoch": 1.4414746543778802, "grad_norm": 0.981779637597959, "learning_rate": 3.972759463281805e-07, "loss": 0.8000779151916504, "step": 6256 }, { "epoch": 1.441705069124424, "grad_norm": 1.2561538909400267, "learning_rate": 3.9697198414786626e-07, "loss": 0.7356371283531189, "step": 6257 }, { "epoch": 1.4419354838709677, "grad_norm": 1.3228468777834088, "learning_rate": 3.966681094962703e-07, "loss": 0.708438515663147, "step": 6258 }, { "epoch": 1.4421658986175117, "grad_norm": 1.1635121950639566, "learning_rate": 3.963643224174994e-07, "loss": 0.709287166595459, "step": 6259 }, { "epoch": 1.4423963133640554, "grad_norm": 1.2638923885979756, "learning_rate": 3.9606062295564813e-07, "loss": 0.743755578994751, "step": 6260 }, { "epoch": 1.442626728110599, "grad_norm": 1.119467668131696, "learning_rate": 3.9575701115479744e-07, "loss": 0.9727948904037476, "step": 6261 }, { "epoch": 1.4428571428571428, "grad_norm": 1.165539680123963, "learning_rate": 3.9545348705901703e-07, "loss": 0.9070688486099243, "step": 6262 }, { "epoch": 1.4430875576036866, "grad_norm": 1.3995169117674358, "learning_rate": 3.951500507123627e-07, "loss": 0.8167496919631958, "step": 6263 }, { "epoch": 1.4433179723502305, "grad_norm": 1.1204443462300027, "learning_rate": 3.948467021588775e-07, "loss": 0.7691773772239685, "step": 6264 }, { "epoch": 1.4435483870967742, "grad_norm": 1.2915211655205685, "learning_rate": 3.945434414425927e-07, "loss": 0.7638411521911621, "step": 6265 }, { "epoch": 1.443778801843318, "grad_norm": 1.0311097608426527, "learning_rate": 3.942402686075258e-07, "loss": 0.8138284683227539, "step": 6266 }, { "epoch": 1.4440092165898617, "grad_norm": 1.430800234304149, "learning_rate": 3.939371836976816e-07, "loss": 0.8404628038406372, "step": 6267 }, { "epoch": 1.4442396313364054, "grad_norm": 1.0744818989251388, "learning_rate": 3.936341867570533e-07, "loss": 0.7354726791381836, "step": 6268 }, { "epoch": 1.4444700460829494, "grad_norm": 1.2516347720495873, "learning_rate": 3.9333127782962003e-07, "loss": 0.8607511520385742, "step": 6269 }, { "epoch": 1.444700460829493, "grad_norm": 1.03787633948696, "learning_rate": 3.930284569593483e-07, "loss": 0.7372239232063293, "step": 6270 }, { "epoch": 1.4449308755760368, "grad_norm": 1.205690175362699, "learning_rate": 3.927257241901929e-07, "loss": 0.8902593851089478, "step": 6271 }, { "epoch": 1.4451612903225808, "grad_norm": 1.0978426997676995, "learning_rate": 3.924230795660947e-07, "loss": 0.7481765747070312, "step": 6272 }, { "epoch": 1.4453917050691243, "grad_norm": 1.1624854693895736, "learning_rate": 3.9212052313098177e-07, "loss": 0.6868888139724731, "step": 6273 }, { "epoch": 1.4456221198156682, "grad_norm": 1.219538424407328, "learning_rate": 3.918180549287705e-07, "loss": 0.6867324709892273, "step": 6274 }, { "epoch": 1.445852534562212, "grad_norm": 1.4192898010151693, "learning_rate": 3.9151567500336323e-07, "loss": 0.8473105430603027, "step": 6275 }, { "epoch": 1.4460829493087557, "grad_norm": 1.2236253801186994, "learning_rate": 3.912133833986504e-07, "loss": 0.7629631757736206, "step": 6276 }, { "epoch": 1.4463133640552996, "grad_norm": 1.0502703605539807, "learning_rate": 3.909111801585091e-07, "loss": 0.9501597881317139, "step": 6277 }, { "epoch": 1.4465437788018434, "grad_norm": 1.0568805239624584, "learning_rate": 3.906090653268037e-07, "loss": 0.7330536842346191, "step": 6278 }, { "epoch": 1.446774193548387, "grad_norm": 1.199243558298224, "learning_rate": 3.903070389473857e-07, "loss": 0.907101571559906, "step": 6279 }, { "epoch": 1.4470046082949308, "grad_norm": 1.1269939172893009, "learning_rate": 3.900051010640939e-07, "loss": 0.8177503347396851, "step": 6280 }, { "epoch": 1.4472350230414746, "grad_norm": 1.373102048695832, "learning_rate": 3.897032517207538e-07, "loss": 0.7851059436798096, "step": 6281 }, { "epoch": 1.4474654377880185, "grad_norm": 0.8801777971944739, "learning_rate": 3.8940149096117914e-07, "loss": 0.7056214809417725, "step": 6282 }, { "epoch": 1.4476958525345622, "grad_norm": 1.0831833275731695, "learning_rate": 3.8909981882916975e-07, "loss": 0.784143328666687, "step": 6283 }, { "epoch": 1.447926267281106, "grad_norm": 1.2368924313085696, "learning_rate": 3.8879823536851253e-07, "loss": 0.8157210350036621, "step": 6284 }, { "epoch": 1.4481566820276497, "grad_norm": 1.276176943713772, "learning_rate": 3.884967406229828e-07, "loss": 0.7329680323600769, "step": 6285 }, { "epoch": 1.4483870967741934, "grad_norm": 1.4518343581804805, "learning_rate": 3.8819533463634145e-07, "loss": 0.9214208722114563, "step": 6286 }, { "epoch": 1.4486175115207374, "grad_norm": 1.835142969551997, "learning_rate": 3.8789401745233706e-07, "loss": 0.8118722438812256, "step": 6287 }, { "epoch": 1.448847926267281, "grad_norm": 1.0485981202236783, "learning_rate": 3.8759278911470615e-07, "loss": 0.7517364025115967, "step": 6288 }, { "epoch": 1.4490783410138248, "grad_norm": 1.0879409814064, "learning_rate": 3.872916496671711e-07, "loss": 0.8979834318161011, "step": 6289 }, { "epoch": 1.4493087557603688, "grad_norm": 1.6674549792368192, "learning_rate": 3.8699059915344166e-07, "loss": 0.9159818887710571, "step": 6290 }, { "epoch": 1.4495391705069125, "grad_norm": 1.2582380909324238, "learning_rate": 3.8668963761721563e-07, "loss": 0.8176029324531555, "step": 6291 }, { "epoch": 1.4497695852534562, "grad_norm": 1.3257834277786367, "learning_rate": 3.8638876510217666e-07, "loss": 0.7077589631080627, "step": 6292 }, { "epoch": 1.45, "grad_norm": 1.0304546829516872, "learning_rate": 3.8608798165199585e-07, "loss": 0.8107718825340271, "step": 6293 }, { "epoch": 1.4502304147465437, "grad_norm": 1.278146889045901, "learning_rate": 3.8578728731033214e-07, "loss": 0.9021201133728027, "step": 6294 }, { "epoch": 1.4504608294930876, "grad_norm": 1.5907360314325336, "learning_rate": 3.854866821208306e-07, "loss": 0.9134507179260254, "step": 6295 }, { "epoch": 1.4506912442396314, "grad_norm": 1.2431886164023473, "learning_rate": 3.8518616612712317e-07, "loss": 0.9081463813781738, "step": 6296 }, { "epoch": 1.450921658986175, "grad_norm": 1.394869861453301, "learning_rate": 3.848857393728303e-07, "loss": 0.7892032861709595, "step": 6297 }, { "epoch": 1.4511520737327188, "grad_norm": 1.1702087372951315, "learning_rate": 3.8458540190155796e-07, "loss": 0.753928542137146, "step": 6298 }, { "epoch": 1.4513824884792625, "grad_norm": 1.1800339185606825, "learning_rate": 3.8428515375689996e-07, "loss": 0.6316792964935303, "step": 6299 }, { "epoch": 1.4516129032258065, "grad_norm": 1.0510746352372813, "learning_rate": 3.8398499498243665e-07, "loss": 0.6569210290908813, "step": 6300 }, { "epoch": 1.4518433179723502, "grad_norm": 1.2827982624069105, "learning_rate": 3.836849256217355e-07, "loss": 0.9082256555557251, "step": 6301 }, { "epoch": 1.452073732718894, "grad_norm": 1.2539326790404104, "learning_rate": 3.833849457183519e-07, "loss": 0.6533655524253845, "step": 6302 }, { "epoch": 1.452304147465438, "grad_norm": 1.1962706885387824, "learning_rate": 3.830850553158271e-07, "loss": 0.8181168437004089, "step": 6303 }, { "epoch": 1.4525345622119816, "grad_norm": 1.191632474290621, "learning_rate": 3.827852544576895e-07, "loss": 0.8258780241012573, "step": 6304 }, { "epoch": 1.4527649769585254, "grad_norm": 1.2200843626761786, "learning_rate": 3.824855431874555e-07, "loss": 0.7917114496231079, "step": 6305 }, { "epoch": 1.452995391705069, "grad_norm": 1.1119249100754447, "learning_rate": 3.821859215486274e-07, "loss": 0.7523643970489502, "step": 6306 }, { "epoch": 1.4532258064516128, "grad_norm": 1.173507656799684, "learning_rate": 3.818863895846945e-07, "loss": 0.7248106002807617, "step": 6307 }, { "epoch": 1.4534562211981568, "grad_norm": 1.0384099625968284, "learning_rate": 3.815869473391343e-07, "loss": 0.6663920879364014, "step": 6308 }, { "epoch": 1.4536866359447005, "grad_norm": 1.2904533830018654, "learning_rate": 3.8128759485540995e-07, "loss": 0.887082576751709, "step": 6309 }, { "epoch": 1.4539170506912442, "grad_norm": 1.176731626067417, "learning_rate": 3.8098833217697193e-07, "loss": 0.8491328954696655, "step": 6310 }, { "epoch": 1.454147465437788, "grad_norm": 0.995531509886264, "learning_rate": 3.806891593472582e-07, "loss": 0.6749746799468994, "step": 6311 }, { "epoch": 1.4543778801843317, "grad_norm": 1.2359927269681388, "learning_rate": 3.803900764096932e-07, "loss": 0.7607502937316895, "step": 6312 }, { "epoch": 1.4546082949308756, "grad_norm": 0.9855772687954082, "learning_rate": 3.8009108340768804e-07, "loss": 0.6713626980781555, "step": 6313 }, { "epoch": 1.4548387096774194, "grad_norm": 1.0335982949651026, "learning_rate": 3.797921803846419e-07, "loss": 0.7031810879707336, "step": 6314 }, { "epoch": 1.455069124423963, "grad_norm": 1.2499044478276522, "learning_rate": 3.7949336738393955e-07, "loss": 0.7233775854110718, "step": 6315 }, { "epoch": 1.455299539170507, "grad_norm": 1.1902627494977487, "learning_rate": 3.791946444489532e-07, "loss": 0.7446990013122559, "step": 6316 }, { "epoch": 1.4555299539170508, "grad_norm": 1.0356528338667375, "learning_rate": 3.7889601162304273e-07, "loss": 0.731992244720459, "step": 6317 }, { "epoch": 1.4557603686635945, "grad_norm": 0.9012124257356037, "learning_rate": 3.785974689495539e-07, "loss": 0.7167335152626038, "step": 6318 }, { "epoch": 1.4559907834101382, "grad_norm": 1.0367746360279544, "learning_rate": 3.7829901647181993e-07, "loss": 0.7634297609329224, "step": 6319 }, { "epoch": 1.456221198156682, "grad_norm": 1.323601627974345, "learning_rate": 3.7800065423316066e-07, "loss": 0.7584050893783569, "step": 6320 }, { "epoch": 1.456451612903226, "grad_norm": 1.3168506305563585, "learning_rate": 3.777023822768829e-07, "loss": 0.7150899171829224, "step": 6321 }, { "epoch": 1.4566820276497696, "grad_norm": 1.3142694869577929, "learning_rate": 3.7740420064628034e-07, "loss": 0.7821052670478821, "step": 6322 }, { "epoch": 1.4569124423963133, "grad_norm": 1.1890463822517086, "learning_rate": 3.7710610938463405e-07, "loss": 0.8678094148635864, "step": 6323 }, { "epoch": 1.457142857142857, "grad_norm": 1.0929926711457507, "learning_rate": 3.7680810853521107e-07, "loss": 0.6953635215759277, "step": 6324 }, { "epoch": 1.4573732718894008, "grad_norm": 1.392687245093679, "learning_rate": 3.765101981412665e-07, "loss": 0.765946626663208, "step": 6325 }, { "epoch": 1.4576036866359448, "grad_norm": 1.2287803375758581, "learning_rate": 3.7621237824604137e-07, "loss": 0.8828680515289307, "step": 6326 }, { "epoch": 1.4578341013824885, "grad_norm": 1.4191080683791804, "learning_rate": 3.7591464889276326e-07, "loss": 0.8916178345680237, "step": 6327 }, { "epoch": 1.4580645161290322, "grad_norm": 1.4414543071479498, "learning_rate": 3.756170101246481e-07, "loss": 0.7563039064407349, "step": 6328 }, { "epoch": 1.4582949308755762, "grad_norm": 1.1488058177567217, "learning_rate": 3.7531946198489725e-07, "loss": 0.8548855781555176, "step": 6329 }, { "epoch": 1.4585253456221199, "grad_norm": 1.2471941201918813, "learning_rate": 3.750220045166993e-07, "loss": 0.8337546586990356, "step": 6330 }, { "epoch": 1.4587557603686636, "grad_norm": 1.2665043024049272, "learning_rate": 3.7472463776323036e-07, "loss": 0.8909939527511597, "step": 6331 }, { "epoch": 1.4589861751152073, "grad_norm": 0.9459101838544814, "learning_rate": 3.744273617676524e-07, "loss": 0.629026472568512, "step": 6332 }, { "epoch": 1.459216589861751, "grad_norm": 1.245577103796106, "learning_rate": 3.7413017657311454e-07, "loss": 0.7264849543571472, "step": 6333 }, { "epoch": 1.459447004608295, "grad_norm": 1.0987416494814488, "learning_rate": 3.738330822227532e-07, "loss": 0.808081865310669, "step": 6334 }, { "epoch": 1.4596774193548387, "grad_norm": 1.145687515640666, "learning_rate": 3.7353607875969115e-07, "loss": 0.6092932820320129, "step": 6335 }, { "epoch": 1.4599078341013825, "grad_norm": 1.2636271324745916, "learning_rate": 3.7323916622703756e-07, "loss": 0.8700584173202515, "step": 6336 }, { "epoch": 1.4601382488479262, "grad_norm": 1.2867446987977476, "learning_rate": 3.7294234466788954e-07, "loss": 0.8424433469772339, "step": 6337 }, { "epoch": 1.46036866359447, "grad_norm": 1.1929868573019329, "learning_rate": 3.7264561412533013e-07, "loss": 0.8587443828582764, "step": 6338 }, { "epoch": 1.4605990783410139, "grad_norm": 1.1369944171843958, "learning_rate": 3.7234897464242934e-07, "loss": 0.7708064913749695, "step": 6339 }, { "epoch": 1.4608294930875576, "grad_norm": 0.9599493655503268, "learning_rate": 3.7205242626224395e-07, "loss": 0.8226567506790161, "step": 6340 }, { "epoch": 1.4610599078341013, "grad_norm": 1.6926769297162396, "learning_rate": 3.717559690278176e-07, "loss": 0.8414342403411865, "step": 6341 }, { "epoch": 1.4612903225806453, "grad_norm": 1.136325082903018, "learning_rate": 3.714596029821804e-07, "loss": 0.765863299369812, "step": 6342 }, { "epoch": 1.461520737327189, "grad_norm": 1.2033696575950952, "learning_rate": 3.7116332816834997e-07, "loss": 0.7253202199935913, "step": 6343 }, { "epoch": 1.4617511520737327, "grad_norm": 1.2614732245354896, "learning_rate": 3.7086714462933e-07, "loss": 0.786415696144104, "step": 6344 }, { "epoch": 1.4619815668202765, "grad_norm": 1.3398597613096093, "learning_rate": 3.705710524081108e-07, "loss": 0.8382824659347534, "step": 6345 }, { "epoch": 1.4622119815668202, "grad_norm": 1.1421503229190921, "learning_rate": 3.702750515476705e-07, "loss": 0.7953319549560547, "step": 6346 }, { "epoch": 1.4624423963133641, "grad_norm": 1.1953524657169348, "learning_rate": 3.699791420909727e-07, "loss": 0.7897430658340454, "step": 6347 }, { "epoch": 1.4626728110599079, "grad_norm": 1.0462269201726477, "learning_rate": 3.6968332408096804e-07, "loss": 0.7276254892349243, "step": 6348 }, { "epoch": 1.4629032258064516, "grad_norm": 1.2576670635193097, "learning_rate": 3.693875975605949e-07, "loss": 0.7318450212478638, "step": 6349 }, { "epoch": 1.4631336405529953, "grad_norm": 1.3298595608160129, "learning_rate": 3.6909196257277676e-07, "loss": 0.8438090085983276, "step": 6350 }, { "epoch": 1.463364055299539, "grad_norm": 1.1958819221255177, "learning_rate": 3.6879641916042534e-07, "loss": 0.7977915406227112, "step": 6351 }, { "epoch": 1.463594470046083, "grad_norm": 1.5876789525233332, "learning_rate": 3.685009673664382e-07, "loss": 0.8845348358154297, "step": 6352 }, { "epoch": 1.4638248847926267, "grad_norm": 1.1089282393569035, "learning_rate": 3.682056072336992e-07, "loss": 0.8971320986747742, "step": 6353 }, { "epoch": 1.4640552995391705, "grad_norm": 1.1499585685789093, "learning_rate": 3.679103388050803e-07, "loss": 0.7015302181243896, "step": 6354 }, { "epoch": 1.4642857142857144, "grad_norm": 1.058413373940715, "learning_rate": 3.676151621234389e-07, "loss": 0.5953146815299988, "step": 6355 }, { "epoch": 1.4645161290322581, "grad_norm": 0.940762320723037, "learning_rate": 3.673200772316193e-07, "loss": 0.5794636011123657, "step": 6356 }, { "epoch": 1.4647465437788019, "grad_norm": 1.4093031765021824, "learning_rate": 3.6702508417245324e-07, "loss": 0.8272292017936707, "step": 6357 }, { "epoch": 1.4649769585253456, "grad_norm": 1.2004626750502272, "learning_rate": 3.6673018298875826e-07, "loss": 0.7239755392074585, "step": 6358 }, { "epoch": 1.4652073732718893, "grad_norm": 1.0592207409293348, "learning_rate": 3.6643537372333886e-07, "loss": 0.8597465753555298, "step": 6359 }, { "epoch": 1.4654377880184333, "grad_norm": 1.3768417389873642, "learning_rate": 3.661406564189862e-07, "loss": 0.7540475130081177, "step": 6360 }, { "epoch": 1.465668202764977, "grad_norm": 1.2300552177842492, "learning_rate": 3.658460311184782e-07, "loss": 0.793259859085083, "step": 6361 }, { "epoch": 1.4658986175115207, "grad_norm": 1.1933122341650848, "learning_rate": 3.6555149786457883e-07, "loss": 0.797966718673706, "step": 6362 }, { "epoch": 1.4661290322580645, "grad_norm": 1.082541374270611, "learning_rate": 3.6525705670004016e-07, "loss": 0.7466796636581421, "step": 6363 }, { "epoch": 1.4663594470046082, "grad_norm": 0.9612262339874744, "learning_rate": 3.6496270766759927e-07, "loss": 0.7694044709205627, "step": 6364 }, { "epoch": 1.4665898617511521, "grad_norm": 1.753828188679532, "learning_rate": 3.6466845080998043e-07, "loss": 0.7701553106307983, "step": 6365 }, { "epoch": 1.4668202764976959, "grad_norm": 1.0670832455899337, "learning_rate": 3.643742861698952e-07, "loss": 0.6718326807022095, "step": 6366 }, { "epoch": 1.4670506912442396, "grad_norm": 1.1220075290963027, "learning_rate": 3.6408021379004086e-07, "loss": 0.7099052667617798, "step": 6367 }, { "epoch": 1.4672811059907835, "grad_norm": 1.0614563823752192, "learning_rate": 3.6378623371310126e-07, "loss": 0.8650654554367065, "step": 6368 }, { "epoch": 1.4675115207373273, "grad_norm": 1.18691798498221, "learning_rate": 3.6349234598174794e-07, "loss": 0.7920950055122375, "step": 6369 }, { "epoch": 1.467741935483871, "grad_norm": 1.3672164620265899, "learning_rate": 3.63198550638638e-07, "loss": 0.7927969098091125, "step": 6370 }, { "epoch": 1.4679723502304147, "grad_norm": 1.6817643007938734, "learning_rate": 3.6290484772641514e-07, "loss": 0.9403868913650513, "step": 6371 }, { "epoch": 1.4682027649769585, "grad_norm": 1.188245842937741, "learning_rate": 3.626112372877106e-07, "loss": 0.9157334566116333, "step": 6372 }, { "epoch": 1.4684331797235024, "grad_norm": 1.0918511661649737, "learning_rate": 3.6231771936514067e-07, "loss": 0.7742066979408264, "step": 6373 }, { "epoch": 1.4686635944700461, "grad_norm": 1.0472722321327697, "learning_rate": 3.6202429400131006e-07, "loss": 0.69399094581604, "step": 6374 }, { "epoch": 1.4688940092165899, "grad_norm": 1.243240675298042, "learning_rate": 3.6173096123880854e-07, "loss": 0.874832272529602, "step": 6375 }, { "epoch": 1.4691244239631336, "grad_norm": 0.9504044447465768, "learning_rate": 3.6143772112021275e-07, "loss": 0.6685272455215454, "step": 6376 }, { "epoch": 1.4693548387096773, "grad_norm": 1.2588614059189167, "learning_rate": 3.611445736880867e-07, "loss": 0.7422738671302795, "step": 6377 }, { "epoch": 1.4695852534562213, "grad_norm": 1.1563672807518934, "learning_rate": 3.6085151898498e-07, "loss": 0.8208622932434082, "step": 6378 }, { "epoch": 1.469815668202765, "grad_norm": 1.278791922768039, "learning_rate": 3.605585570534293e-07, "loss": 0.8001033663749695, "step": 6379 }, { "epoch": 1.4700460829493087, "grad_norm": 1.4073194030234843, "learning_rate": 3.6026568793595744e-07, "loss": 0.789332926273346, "step": 6380 }, { "epoch": 1.4702764976958527, "grad_norm": 1.1542499539799642, "learning_rate": 3.599729116750742e-07, "loss": 0.8071820139884949, "step": 6381 }, { "epoch": 1.4705069124423962, "grad_norm": 1.3369229588575535, "learning_rate": 3.5968022831327506e-07, "loss": 0.8028534054756165, "step": 6382 }, { "epoch": 1.4707373271889401, "grad_norm": 1.0119395143433376, "learning_rate": 3.593876378930435e-07, "loss": 0.6888329982757568, "step": 6383 }, { "epoch": 1.4709677419354839, "grad_norm": 1.285773441215651, "learning_rate": 3.590951404568483e-07, "loss": 0.8176132440567017, "step": 6384 }, { "epoch": 1.4711981566820276, "grad_norm": 0.9429108192029542, "learning_rate": 3.588027360471446e-07, "loss": 0.6715027689933777, "step": 6385 }, { "epoch": 1.4714285714285715, "grad_norm": 1.2177133807456715, "learning_rate": 3.585104247063753e-07, "loss": 0.8622937798500061, "step": 6386 }, { "epoch": 1.4716589861751153, "grad_norm": 1.252482813795077, "learning_rate": 3.5821820647696864e-07, "loss": 0.7244299650192261, "step": 6387 }, { "epoch": 1.471889400921659, "grad_norm": 1.2422776234152886, "learning_rate": 3.579260814013393e-07, "loss": 0.8130464553833008, "step": 6388 }, { "epoch": 1.4721198156682027, "grad_norm": 1.739841773852821, "learning_rate": 3.576340495218897e-07, "loss": 0.8563692569732666, "step": 6389 }, { "epoch": 1.4723502304147464, "grad_norm": 1.1474783445098509, "learning_rate": 3.573421108810073e-07, "loss": 0.8315908908843994, "step": 6390 }, { "epoch": 1.4725806451612904, "grad_norm": 1.0916407928923948, "learning_rate": 3.5705026552106645e-07, "loss": 0.653038740158081, "step": 6391 }, { "epoch": 1.4728110599078341, "grad_norm": 1.250110377436999, "learning_rate": 3.5675851348442876e-07, "loss": 0.7511966228485107, "step": 6392 }, { "epoch": 1.4730414746543778, "grad_norm": 1.226967151246929, "learning_rate": 3.564668548134413e-07, "loss": 0.8675990104675293, "step": 6393 }, { "epoch": 1.4732718894009218, "grad_norm": 1.2481066388566375, "learning_rate": 3.5617528955043765e-07, "loss": 0.7574094533920288, "step": 6394 }, { "epoch": 1.4735023041474653, "grad_norm": 1.3612516426224104, "learning_rate": 3.5588381773773866e-07, "loss": 0.7004787921905518, "step": 6395 }, { "epoch": 1.4737327188940093, "grad_norm": 1.193988835000252, "learning_rate": 3.555924394176508e-07, "loss": 0.680101215839386, "step": 6396 }, { "epoch": 1.473963133640553, "grad_norm": 1.2956197944669767, "learning_rate": 3.55301154632467e-07, "loss": 0.8340710401535034, "step": 6397 }, { "epoch": 1.4741935483870967, "grad_norm": 1.2156451361937963, "learning_rate": 3.5500996342446756e-07, "loss": 0.8307079076766968, "step": 6398 }, { "epoch": 1.4744239631336407, "grad_norm": 1.3824459968937755, "learning_rate": 3.547188658359179e-07, "loss": 0.9614958167076111, "step": 6399 }, { "epoch": 1.4746543778801844, "grad_norm": 1.2140973914551956, "learning_rate": 3.544278619090707e-07, "loss": 0.782494068145752, "step": 6400 }, { "epoch": 1.4748847926267281, "grad_norm": 1.372883571978596, "learning_rate": 3.5413695168616474e-07, "loss": 0.7474460601806641, "step": 6401 }, { "epoch": 1.4751152073732718, "grad_norm": 1.0929029713656226, "learning_rate": 3.5384613520942484e-07, "loss": 0.7182635068893433, "step": 6402 }, { "epoch": 1.4753456221198156, "grad_norm": 1.1562679128127753, "learning_rate": 3.5355541252106336e-07, "loss": 0.8116436004638672, "step": 6403 }, { "epoch": 1.4755760368663595, "grad_norm": 1.1320096436261353, "learning_rate": 3.5326478366327806e-07, "loss": 0.8007283210754395, "step": 6404 }, { "epoch": 1.4758064516129032, "grad_norm": 1.060451283065696, "learning_rate": 3.5297424867825276e-07, "loss": 0.7707732915878296, "step": 6405 }, { "epoch": 1.476036866359447, "grad_norm": 1.319974893721661, "learning_rate": 3.5268380760815917e-07, "loss": 0.8031977415084839, "step": 6406 }, { "epoch": 1.4762672811059907, "grad_norm": 1.0847497024921582, "learning_rate": 3.5239346049515397e-07, "loss": 0.7113008499145508, "step": 6407 }, { "epoch": 1.4764976958525344, "grad_norm": 1.490354792200027, "learning_rate": 3.521032073813802e-07, "loss": 0.8069616556167603, "step": 6408 }, { "epoch": 1.4767281105990784, "grad_norm": 1.6536617293382079, "learning_rate": 3.518130483089686e-07, "loss": 0.9780417680740356, "step": 6409 }, { "epoch": 1.476958525345622, "grad_norm": 1.0393285063529043, "learning_rate": 3.515229833200351e-07, "loss": 0.765299379825592, "step": 6410 }, { "epoch": 1.4771889400921658, "grad_norm": 0.9792702634570369, "learning_rate": 3.512330124566816e-07, "loss": 0.7279179096221924, "step": 6411 }, { "epoch": 1.4774193548387098, "grad_norm": 1.3765526641198769, "learning_rate": 3.509431357609978e-07, "loss": 0.8429825901985168, "step": 6412 }, { "epoch": 1.4776497695852535, "grad_norm": 1.2876523066268597, "learning_rate": 3.506533532750586e-07, "loss": 0.741936206817627, "step": 6413 }, { "epoch": 1.4778801843317972, "grad_norm": 1.0841845353527741, "learning_rate": 3.5036366504092527e-07, "loss": 0.6841387748718262, "step": 6414 }, { "epoch": 1.478110599078341, "grad_norm": 1.1361546476433346, "learning_rate": 3.5007407110064626e-07, "loss": 0.7136961221694946, "step": 6415 }, { "epoch": 1.4783410138248847, "grad_norm": 1.1942730912918724, "learning_rate": 3.497845714962554e-07, "loss": 0.8483344912528992, "step": 6416 }, { "epoch": 1.4785714285714286, "grad_norm": 1.1525838724707749, "learning_rate": 3.4949516626977294e-07, "loss": 0.7060235738754272, "step": 6417 }, { "epoch": 1.4788018433179724, "grad_norm": 1.2546190088001288, "learning_rate": 3.4920585546320625e-07, "loss": 0.7351587414741516, "step": 6418 }, { "epoch": 1.479032258064516, "grad_norm": 1.4082190266306274, "learning_rate": 3.489166391185482e-07, "loss": 0.7445269823074341, "step": 6419 }, { "epoch": 1.4792626728110598, "grad_norm": 1.2308828080413103, "learning_rate": 3.4862751727777796e-07, "loss": 0.795128583908081, "step": 6420 }, { "epoch": 1.4794930875576036, "grad_norm": 1.3455737723646244, "learning_rate": 3.4833848998286133e-07, "loss": 0.7916193008422852, "step": 6421 }, { "epoch": 1.4797235023041475, "grad_norm": 1.2062461099240058, "learning_rate": 3.480495572757497e-07, "loss": 0.8279474973678589, "step": 6422 }, { "epoch": 1.4799539170506912, "grad_norm": 1.3615355231577309, "learning_rate": 3.477607191983822e-07, "loss": 0.9339898824691772, "step": 6423 }, { "epoch": 1.480184331797235, "grad_norm": 1.2958649175302657, "learning_rate": 3.4747197579268296e-07, "loss": 0.8579660654067993, "step": 6424 }, { "epoch": 1.480414746543779, "grad_norm": 1.1935735021965341, "learning_rate": 3.471833271005622e-07, "loss": 0.7637878060340881, "step": 6425 }, { "epoch": 1.4806451612903226, "grad_norm": 1.2997741786350927, "learning_rate": 3.4689477316391756e-07, "loss": 0.8600465059280396, "step": 6426 }, { "epoch": 1.4808755760368664, "grad_norm": 0.9725758019670567, "learning_rate": 3.46606314024632e-07, "loss": 0.6576759815216064, "step": 6427 }, { "epoch": 1.48110599078341, "grad_norm": 1.1289750059608772, "learning_rate": 3.463179497245747e-07, "loss": 0.7556706666946411, "step": 6428 }, { "epoch": 1.4813364055299538, "grad_norm": 1.3449392913610907, "learning_rate": 3.4602968030560196e-07, "loss": 0.8826701641082764, "step": 6429 }, { "epoch": 1.4815668202764978, "grad_norm": 1.1499087478485694, "learning_rate": 3.457415058095554e-07, "loss": 0.7352213263511658, "step": 6430 }, { "epoch": 1.4817972350230415, "grad_norm": 1.4434298728988502, "learning_rate": 3.454534262782628e-07, "loss": 0.8108851909637451, "step": 6431 }, { "epoch": 1.4820276497695852, "grad_norm": 1.3070168078927469, "learning_rate": 3.4516544175353914e-07, "loss": 0.8595583438873291, "step": 6432 }, { "epoch": 1.482258064516129, "grad_norm": 1.1496814595283131, "learning_rate": 3.448775522771847e-07, "loss": 0.7194280028343201, "step": 6433 }, { "epoch": 1.4824884792626727, "grad_norm": 1.2788780172510947, "learning_rate": 3.445897578909861e-07, "loss": 0.8966056108474731, "step": 6434 }, { "epoch": 1.4827188940092166, "grad_norm": 1.4168806857520198, "learning_rate": 3.443020586367167e-07, "loss": 0.8089771270751953, "step": 6435 }, { "epoch": 1.4829493087557604, "grad_norm": 1.3086078413537297, "learning_rate": 3.4401445455613555e-07, "loss": 0.7835644483566284, "step": 6436 }, { "epoch": 1.483179723502304, "grad_norm": 1.242850049469479, "learning_rate": 3.4372694569098746e-07, "loss": 0.7285257577896118, "step": 6437 }, { "epoch": 1.483410138248848, "grad_norm": 1.4884020116718253, "learning_rate": 3.434395320830048e-07, "loss": 0.9108592867851257, "step": 6438 }, { "epoch": 1.4836405529953918, "grad_norm": 1.265305751937672, "learning_rate": 3.431522137739049e-07, "loss": 0.7154395580291748, "step": 6439 }, { "epoch": 1.4838709677419355, "grad_norm": 1.0883673646660943, "learning_rate": 3.428649908053917e-07, "loss": 0.6483602523803711, "step": 6440 }, { "epoch": 1.4841013824884792, "grad_norm": 1.457129029114168, "learning_rate": 3.425778632191551e-07, "loss": 0.8090662956237793, "step": 6441 }, { "epoch": 1.484331797235023, "grad_norm": 1.428702771444548, "learning_rate": 3.422908310568712e-07, "loss": 0.7884642481803894, "step": 6442 }, { "epoch": 1.484562211981567, "grad_norm": 1.2738553778883674, "learning_rate": 3.4200389436020225e-07, "loss": 0.8628194332122803, "step": 6443 }, { "epoch": 1.4847926267281106, "grad_norm": 1.1838310809928603, "learning_rate": 3.4171705317079723e-07, "loss": 0.8192269802093506, "step": 6444 }, { "epoch": 1.4850230414746544, "grad_norm": 1.316668872684636, "learning_rate": 3.4143030753029054e-07, "loss": 0.7768012285232544, "step": 6445 }, { "epoch": 1.485253456221198, "grad_norm": 1.2324282268735118, "learning_rate": 3.411436574803026e-07, "loss": 0.7420791387557983, "step": 6446 }, { "epoch": 1.4854838709677418, "grad_norm": 1.3102449774544425, "learning_rate": 3.4085710306244086e-07, "loss": 0.823938250541687, "step": 6447 }, { "epoch": 1.4857142857142858, "grad_norm": 1.1672900255965821, "learning_rate": 3.405706443182976e-07, "loss": 0.7215089201927185, "step": 6448 }, { "epoch": 1.4859447004608295, "grad_norm": 1.138949819615918, "learning_rate": 3.4028428128945286e-07, "loss": 0.8301436901092529, "step": 6449 }, { "epoch": 1.4861751152073732, "grad_norm": 1.1171858572091258, "learning_rate": 3.399980140174712e-07, "loss": 0.6727990508079529, "step": 6450 }, { "epoch": 1.4864055299539172, "grad_norm": 1.0969379356045603, "learning_rate": 3.397118425439038e-07, "loss": 0.8364754319190979, "step": 6451 }, { "epoch": 1.486635944700461, "grad_norm": 1.2714499604529865, "learning_rate": 3.394257669102887e-07, "loss": 0.7241604328155518, "step": 6452 }, { "epoch": 1.4868663594470046, "grad_norm": 1.429435383993002, "learning_rate": 3.3913978715814897e-07, "loss": 0.7762489914894104, "step": 6453 }, { "epoch": 1.4870967741935484, "grad_norm": 1.3862601382620485, "learning_rate": 3.38853903328994e-07, "loss": 0.9278200268745422, "step": 6454 }, { "epoch": 1.487327188940092, "grad_norm": 0.9454491284474441, "learning_rate": 3.3856811546431994e-07, "loss": 0.693070113658905, "step": 6455 }, { "epoch": 1.487557603686636, "grad_norm": 1.4631261008304832, "learning_rate": 3.382824236056084e-07, "loss": 0.8541949987411499, "step": 6456 }, { "epoch": 1.4877880184331798, "grad_norm": 1.1080747331787868, "learning_rate": 3.379968277943267e-07, "loss": 0.7638850212097168, "step": 6457 }, { "epoch": 1.4880184331797235, "grad_norm": 1.5396868765343736, "learning_rate": 3.377113280719295e-07, "loss": 0.8240739107131958, "step": 6458 }, { "epoch": 1.4882488479262672, "grad_norm": 1.037738997106509, "learning_rate": 3.374259244798562e-07, "loss": 0.7360633015632629, "step": 6459 }, { "epoch": 1.488479262672811, "grad_norm": 1.1287418173516828, "learning_rate": 3.371406170595328e-07, "loss": 0.8626362085342407, "step": 6460 }, { "epoch": 1.488709677419355, "grad_norm": 1.553133844655672, "learning_rate": 3.368554058523713e-07, "loss": 0.8499895334243774, "step": 6461 }, { "epoch": 1.4889400921658986, "grad_norm": 1.1568237777707882, "learning_rate": 3.3657029089976985e-07, "loss": 0.8335039615631104, "step": 6462 }, { "epoch": 1.4891705069124423, "grad_norm": 1.1957026633378731, "learning_rate": 3.3628527224311196e-07, "loss": 0.8154790997505188, "step": 6463 }, { "epoch": 1.4894009216589863, "grad_norm": 1.2851436413791164, "learning_rate": 3.3600034992376856e-07, "loss": 0.7952951192855835, "step": 6464 }, { "epoch": 1.48963133640553, "grad_norm": 1.5993164682006433, "learning_rate": 3.3571552398309535e-07, "loss": 0.7227598428726196, "step": 6465 }, { "epoch": 1.4898617511520738, "grad_norm": 1.1773028491207966, "learning_rate": 3.3543079446243404e-07, "loss": 0.6703250408172607, "step": 6466 }, { "epoch": 1.4900921658986175, "grad_norm": 1.152932493736184, "learning_rate": 3.351461614031136e-07, "loss": 0.7468122243881226, "step": 6467 }, { "epoch": 1.4903225806451612, "grad_norm": 1.2933114629854674, "learning_rate": 3.348616248464475e-07, "loss": 0.8649178743362427, "step": 6468 }, { "epoch": 1.4905529953917052, "grad_norm": 1.013990280281903, "learning_rate": 3.345771848337359e-07, "loss": 0.8229554295539856, "step": 6469 }, { "epoch": 1.4907834101382489, "grad_norm": 1.3471402030282535, "learning_rate": 3.342928414062652e-07, "loss": 0.7275597453117371, "step": 6470 }, { "epoch": 1.4910138248847926, "grad_norm": 1.095192106330462, "learning_rate": 3.3400859460530737e-07, "loss": 0.657899796962738, "step": 6471 }, { "epoch": 1.4912442396313363, "grad_norm": 1.0853913135805695, "learning_rate": 3.3372444447212e-07, "loss": 0.7579425573348999, "step": 6472 }, { "epoch": 1.49147465437788, "grad_norm": 1.1304988993649205, "learning_rate": 3.334403910479479e-07, "loss": 0.8707751631736755, "step": 6473 }, { "epoch": 1.491705069124424, "grad_norm": 1.3454806591137698, "learning_rate": 3.331564343740201e-07, "loss": 0.7923752665519714, "step": 6474 }, { "epoch": 1.4919354838709677, "grad_norm": 1.2646674876263875, "learning_rate": 3.328725744915536e-07, "loss": 0.8308948278427124, "step": 6475 }, { "epoch": 1.4921658986175115, "grad_norm": 1.4029553470676885, "learning_rate": 3.3258881144174967e-07, "loss": 0.8984559774398804, "step": 6476 }, { "epoch": 1.4923963133640554, "grad_norm": 1.2358798089346714, "learning_rate": 3.3230514526579614e-07, "loss": 0.9279792308807373, "step": 6477 }, { "epoch": 1.4926267281105992, "grad_norm": 1.4094728162225774, "learning_rate": 3.3202157600486655e-07, "loss": 0.7934520244598389, "step": 6478 }, { "epoch": 1.4928571428571429, "grad_norm": 1.658388461731414, "learning_rate": 3.3173810370012136e-07, "loss": 0.8463613390922546, "step": 6479 }, { "epoch": 1.4930875576036866, "grad_norm": 1.339159678666659, "learning_rate": 3.314547283927057e-07, "loss": 0.8087350130081177, "step": 6480 }, { "epoch": 1.4933179723502303, "grad_norm": 1.2350842201271304, "learning_rate": 3.3117145012375113e-07, "loss": 0.7711254358291626, "step": 6481 }, { "epoch": 1.4935483870967743, "grad_norm": 1.2753839749074636, "learning_rate": 3.3088826893437526e-07, "loss": 0.7140679359436035, "step": 6482 }, { "epoch": 1.493778801843318, "grad_norm": 1.1506161777222865, "learning_rate": 3.3060518486568103e-07, "loss": 0.7074463367462158, "step": 6483 }, { "epoch": 1.4940092165898617, "grad_norm": 0.8291232249474376, "learning_rate": 3.3032219795875827e-07, "loss": 0.7560559511184692, "step": 6484 }, { "epoch": 1.4942396313364055, "grad_norm": 1.4344445687170468, "learning_rate": 3.3003930825468194e-07, "loss": 0.7699435353279114, "step": 6485 }, { "epoch": 1.4944700460829492, "grad_norm": 1.277197987117764, "learning_rate": 3.297565157945129e-07, "loss": 0.817488431930542, "step": 6486 }, { "epoch": 1.4947004608294931, "grad_norm": 1.1511534488778172, "learning_rate": 3.294738206192985e-07, "loss": 0.7534141540527344, "step": 6487 }, { "epoch": 1.4949308755760369, "grad_norm": 1.1924480850963226, "learning_rate": 3.291912227700715e-07, "loss": 0.7423536777496338, "step": 6488 }, { "epoch": 1.4951612903225806, "grad_norm": 0.952322784205302, "learning_rate": 3.2890872228785003e-07, "loss": 0.7181985378265381, "step": 6489 }, { "epoch": 1.4953917050691246, "grad_norm": 1.270224090305602, "learning_rate": 3.286263192136396e-07, "loss": 0.7143938541412354, "step": 6490 }, { "epoch": 1.4956221198156683, "grad_norm": 1.3995714023195414, "learning_rate": 3.2834401358843e-07, "loss": 0.8247631788253784, "step": 6491 }, { "epoch": 1.495852534562212, "grad_norm": 1.1449759372564834, "learning_rate": 3.280618054531974e-07, "loss": 0.8627001047134399, "step": 6492 }, { "epoch": 1.4960829493087557, "grad_norm": 1.3482725665599868, "learning_rate": 3.2777969484890456e-07, "loss": 0.813239574432373, "step": 6493 }, { "epoch": 1.4963133640552995, "grad_norm": 0.9200346218481302, "learning_rate": 3.2749768181649904e-07, "loss": 0.6633884310722351, "step": 6494 }, { "epoch": 1.4965437788018434, "grad_norm": 1.4278232440541767, "learning_rate": 3.272157663969144e-07, "loss": 0.7760038375854492, "step": 6495 }, { "epoch": 1.4967741935483871, "grad_norm": 1.3200918095184475, "learning_rate": 3.2693394863107105e-07, "loss": 0.9352993369102478, "step": 6496 }, { "epoch": 1.4970046082949309, "grad_norm": 1.2344539392280847, "learning_rate": 3.2665222855987397e-07, "loss": 0.7011485695838928, "step": 6497 }, { "epoch": 1.4972350230414746, "grad_norm": 1.2183950494067446, "learning_rate": 3.263706062242142e-07, "loss": 0.9008398056030273, "step": 6498 }, { "epoch": 1.4974654377880183, "grad_norm": 1.194608222128912, "learning_rate": 3.260890816649694e-07, "loss": 0.768037736415863, "step": 6499 }, { "epoch": 1.4976958525345623, "grad_norm": 0.9220148240054391, "learning_rate": 3.258076549230024e-07, "loss": 0.7603639364242554, "step": 6500 }, { "epoch": 1.497926267281106, "grad_norm": 1.3821459764557307, "learning_rate": 3.2552632603916177e-07, "loss": 0.7984024286270142, "step": 6501 }, { "epoch": 1.4981566820276497, "grad_norm": 1.415424035035242, "learning_rate": 3.2524509505428187e-07, "loss": 0.8466978073120117, "step": 6502 }, { "epoch": 1.4983870967741937, "grad_norm": 1.3670825801142161, "learning_rate": 3.24963962009183e-07, "loss": 0.7964911460876465, "step": 6503 }, { "epoch": 1.4986175115207372, "grad_norm": 1.3123478568754847, "learning_rate": 3.246829269446716e-07, "loss": 0.7551665306091309, "step": 6504 }, { "epoch": 1.4988479262672811, "grad_norm": 1.3193018902055227, "learning_rate": 3.2440198990153945e-07, "loss": 0.6468057632446289, "step": 6505 }, { "epoch": 1.4990783410138249, "grad_norm": 1.2139801652485203, "learning_rate": 3.241211509205638e-07, "loss": 0.7739330530166626, "step": 6506 }, { "epoch": 1.4993087557603686, "grad_norm": 1.3659144717848737, "learning_rate": 3.238404100425085e-07, "loss": 0.8205568790435791, "step": 6507 }, { "epoch": 1.4995391705069125, "grad_norm": 0.958982052367848, "learning_rate": 3.235597673081227e-07, "loss": 0.667822003364563, "step": 6508 }, { "epoch": 1.4997695852534563, "grad_norm": 1.2374356667574686, "learning_rate": 3.232792227581409e-07, "loss": 0.7829990386962891, "step": 6509 }, { "epoch": 1.5, "grad_norm": 1.1404525757399535, "learning_rate": 3.229987764332843e-07, "loss": 0.768509566783905, "step": 6510 }, { "epoch": 1.5002304147465437, "grad_norm": 1.3651547247057954, "learning_rate": 3.227184283742591e-07, "loss": 0.8448585867881775, "step": 6511 }, { "epoch": 1.5004608294930875, "grad_norm": 1.2722097281432705, "learning_rate": 3.2243817862175705e-07, "loss": 0.6929391622543335, "step": 6512 }, { "epoch": 1.5006912442396314, "grad_norm": 0.8983294061831201, "learning_rate": 3.221580272164567e-07, "loss": 0.6453005075454712, "step": 6513 }, { "epoch": 1.5009216589861751, "grad_norm": 1.135934251126359, "learning_rate": 3.2187797419902143e-07, "loss": 0.7870811820030212, "step": 6514 }, { "epoch": 1.5011520737327189, "grad_norm": 1.264885386654941, "learning_rate": 3.2159801961010013e-07, "loss": 0.7032002210617065, "step": 6515 }, { "epoch": 1.5013824884792628, "grad_norm": 1.5122369312915371, "learning_rate": 3.213181634903285e-07, "loss": 0.8018448352813721, "step": 6516 }, { "epoch": 1.5016129032258063, "grad_norm": 1.0930874016239036, "learning_rate": 3.2103840588032707e-07, "loss": 0.7066134810447693, "step": 6517 }, { "epoch": 1.5018433179723503, "grad_norm": 1.049874936950677, "learning_rate": 3.207587468207018e-07, "loss": 0.6835265159606934, "step": 6518 }, { "epoch": 1.502073732718894, "grad_norm": 1.1994114231897615, "learning_rate": 3.204791863520455e-07, "loss": 0.6679749488830566, "step": 6519 }, { "epoch": 1.5023041474654377, "grad_norm": 1.1780261658003046, "learning_rate": 3.201997245149358e-07, "loss": 0.781232476234436, "step": 6520 }, { "epoch": 1.5025345622119817, "grad_norm": 1.156188659495686, "learning_rate": 3.1992036134993616e-07, "loss": 0.7853572368621826, "step": 6521 }, { "epoch": 1.5027649769585254, "grad_norm": 1.3156565650023675, "learning_rate": 3.1964109689759576e-07, "loss": 0.8220832943916321, "step": 6522 }, { "epoch": 1.5029953917050691, "grad_norm": 1.0874952614272322, "learning_rate": 3.193619311984491e-07, "loss": 0.8046013116836548, "step": 6523 }, { "epoch": 1.5032258064516129, "grad_norm": 1.1481673715256613, "learning_rate": 3.190828642930174e-07, "loss": 0.7123414874076843, "step": 6524 }, { "epoch": 1.5034562211981566, "grad_norm": 1.2507360463805697, "learning_rate": 3.188038962218066e-07, "loss": 0.7913625240325928, "step": 6525 }, { "epoch": 1.5036866359447005, "grad_norm": 1.2264479129016654, "learning_rate": 3.185250270253081e-07, "loss": 0.7837327718734741, "step": 6526 }, { "epoch": 1.5039170506912443, "grad_norm": 1.3223188543102071, "learning_rate": 3.182462567440002e-07, "loss": 0.7799992561340332, "step": 6527 }, { "epoch": 1.504147465437788, "grad_norm": 1.2906027927929307, "learning_rate": 3.1796758541834545e-07, "loss": 0.8591268062591553, "step": 6528 }, { "epoch": 1.504377880184332, "grad_norm": 1.1175058933428492, "learning_rate": 3.176890130887926e-07, "loss": 0.6886378526687622, "step": 6529 }, { "epoch": 1.5046082949308754, "grad_norm": 1.4969255628781877, "learning_rate": 3.1741053979577647e-07, "loss": 0.8641641139984131, "step": 6530 }, { "epoch": 1.5048387096774194, "grad_norm": 1.3022265823882768, "learning_rate": 3.1713216557971687e-07, "loss": 0.8215552568435669, "step": 6531 }, { "epoch": 1.5050691244239631, "grad_norm": 1.332125606212464, "learning_rate": 3.1685389048101906e-07, "loss": 0.8506371974945068, "step": 6532 }, { "epoch": 1.5052995391705069, "grad_norm": 1.371517957091787, "learning_rate": 3.1657571454007515e-07, "loss": 0.740912675857544, "step": 6533 }, { "epoch": 1.5055299539170508, "grad_norm": 1.0380741302125553, "learning_rate": 3.162976377972614e-07, "loss": 0.6458308696746826, "step": 6534 }, { "epoch": 1.5057603686635943, "grad_norm": 1.0737980819278299, "learning_rate": 3.1601966029294013e-07, "loss": 0.7368316650390625, "step": 6535 }, { "epoch": 1.5059907834101383, "grad_norm": 1.1008143995933475, "learning_rate": 3.1574178206746003e-07, "loss": 0.6648637056350708, "step": 6536 }, { "epoch": 1.506221198156682, "grad_norm": 1.2751679142768328, "learning_rate": 3.154640031611544e-07, "loss": 0.706688404083252, "step": 6537 }, { "epoch": 1.5064516129032257, "grad_norm": 1.0597131508477158, "learning_rate": 3.1518632361434263e-07, "loss": 0.722059965133667, "step": 6538 }, { "epoch": 1.5066820276497697, "grad_norm": 1.1420297201861054, "learning_rate": 3.14908743467329e-07, "loss": 0.7098807096481323, "step": 6539 }, { "epoch": 1.5069124423963134, "grad_norm": 1.1123804283277692, "learning_rate": 3.1463126276040454e-07, "loss": 0.7131781578063965, "step": 6540 }, { "epoch": 1.5071428571428571, "grad_norm": 0.757735402153, "learning_rate": 3.143538815338451e-07, "loss": 0.7292109727859497, "step": 6541 }, { "epoch": 1.507373271889401, "grad_norm": 1.1145586582073062, "learning_rate": 3.1407659982791204e-07, "loss": 0.7305347919464111, "step": 6542 }, { "epoch": 1.5076036866359446, "grad_norm": 1.3246030999705258, "learning_rate": 3.1379941768285247e-07, "loss": 0.8072094321250916, "step": 6543 }, { "epoch": 1.5078341013824885, "grad_norm": 1.2831968996332677, "learning_rate": 3.135223351388987e-07, "loss": 0.8772450685501099, "step": 6544 }, { "epoch": 1.5080645161290323, "grad_norm": 1.1816139196453221, "learning_rate": 3.1324535223626957e-07, "loss": 0.8463687896728516, "step": 6545 }, { "epoch": 1.508294930875576, "grad_norm": 1.1937564350019036, "learning_rate": 3.1296846901516806e-07, "loss": 0.6764696836471558, "step": 6546 }, { "epoch": 1.50852534562212, "grad_norm": 1.198918569491841, "learning_rate": 3.126916855157841e-07, "loss": 0.8395411968231201, "step": 6547 }, { "epoch": 1.5087557603686634, "grad_norm": 1.0607235882989698, "learning_rate": 3.1241500177829195e-07, "loss": 0.8227219581604004, "step": 6548 }, { "epoch": 1.5089861751152074, "grad_norm": 1.1677688606359355, "learning_rate": 3.121384178428519e-07, "loss": 0.7079675197601318, "step": 6549 }, { "epoch": 1.5092165898617511, "grad_norm": 1.2218836381096956, "learning_rate": 3.1186193374961014e-07, "loss": 0.7792578935623169, "step": 6550 }, { "epoch": 1.5094470046082948, "grad_norm": 1.403777710630671, "learning_rate": 3.1158554953869776e-07, "loss": 0.7821195125579834, "step": 6551 }, { "epoch": 1.5096774193548388, "grad_norm": 1.0979873084769438, "learning_rate": 3.1130926525023114e-07, "loss": 0.6640183329582214, "step": 6552 }, { "epoch": 1.5099078341013825, "grad_norm": 1.4504991573195685, "learning_rate": 3.110330809243134e-07, "loss": 0.8087342977523804, "step": 6553 }, { "epoch": 1.5101382488479262, "grad_norm": 1.283455986462282, "learning_rate": 3.1075699660103184e-07, "loss": 0.7716038227081299, "step": 6554 }, { "epoch": 1.5103686635944702, "grad_norm": 1.3208739442859028, "learning_rate": 3.1048101232045943e-07, "loss": 0.8408910632133484, "step": 6555 }, { "epoch": 1.5105990783410137, "grad_norm": 1.3122071557376964, "learning_rate": 3.1020512812265564e-07, "loss": 0.8799750804901123, "step": 6556 }, { "epoch": 1.5108294930875577, "grad_norm": 1.0380590960697713, "learning_rate": 3.0992934404766426e-07, "loss": 0.6534945368766785, "step": 6557 }, { "epoch": 1.5110599078341014, "grad_norm": 1.2848519038217197, "learning_rate": 3.0965366013551507e-07, "loss": 0.8293032646179199, "step": 6558 }, { "epoch": 1.511290322580645, "grad_norm": 1.2211391336842954, "learning_rate": 3.0937807642622295e-07, "loss": 0.7486997842788696, "step": 6559 }, { "epoch": 1.511520737327189, "grad_norm": 0.9417803138002018, "learning_rate": 3.0910259295978914e-07, "loss": 0.7431119680404663, "step": 6560 }, { "epoch": 1.5117511520737326, "grad_norm": 1.255174714268565, "learning_rate": 3.0882720977619927e-07, "loss": 0.945859432220459, "step": 6561 }, { "epoch": 1.5119815668202765, "grad_norm": 1.3772354918121292, "learning_rate": 3.0855192691542487e-07, "loss": 0.7894721031188965, "step": 6562 }, { "epoch": 1.5122119815668202, "grad_norm": 1.59783124946433, "learning_rate": 3.082767444174229e-07, "loss": 0.8593465089797974, "step": 6563 }, { "epoch": 1.512442396313364, "grad_norm": 1.1763179818399963, "learning_rate": 3.080016623221355e-07, "loss": 0.8297405242919922, "step": 6564 }, { "epoch": 1.512672811059908, "grad_norm": 1.2146736952008175, "learning_rate": 3.07726680669491e-07, "loss": 0.7372928857803345, "step": 6565 }, { "epoch": 1.5129032258064516, "grad_norm": 0.987456734373793, "learning_rate": 3.0745179949940235e-07, "loss": 0.7031347751617432, "step": 6566 }, { "epoch": 1.5131336405529954, "grad_norm": 0.9788253167457012, "learning_rate": 3.071770188517679e-07, "loss": 0.7086467742919922, "step": 6567 }, { "epoch": 1.5133640552995393, "grad_norm": 1.2854493361240282, "learning_rate": 3.069023387664723e-07, "loss": 0.9091345071792603, "step": 6568 }, { "epoch": 1.5135944700460828, "grad_norm": 1.3979394006170445, "learning_rate": 3.066277592833847e-07, "loss": 0.7470624446868896, "step": 6569 }, { "epoch": 1.5138248847926268, "grad_norm": 1.2458050386964743, "learning_rate": 3.0635328044235965e-07, "loss": 0.75694739818573, "step": 6570 }, { "epoch": 1.5140552995391705, "grad_norm": 1.1257752667184633, "learning_rate": 3.0607890228323796e-07, "loss": 0.7832024693489075, "step": 6571 }, { "epoch": 1.5142857142857142, "grad_norm": 1.4206979397737705, "learning_rate": 3.0580462484584455e-07, "loss": 0.6777220368385315, "step": 6572 }, { "epoch": 1.5145161290322582, "grad_norm": 1.1010797667803915, "learning_rate": 3.055304481699913e-07, "loss": 0.7748236060142517, "step": 6573 }, { "epoch": 1.5147465437788017, "grad_norm": 1.1639246159957346, "learning_rate": 3.052563722954741e-07, "loss": 0.7495633363723755, "step": 6574 }, { "epoch": 1.5149769585253456, "grad_norm": 1.1319897669216112, "learning_rate": 3.049823972620744e-07, "loss": 0.8011484742164612, "step": 6575 }, { "epoch": 1.5152073732718894, "grad_norm": 1.3878273723563577, "learning_rate": 3.0470852310956e-07, "loss": 0.7480140924453735, "step": 6576 }, { "epoch": 1.515437788018433, "grad_norm": 1.1963673851290149, "learning_rate": 3.0443474987768305e-07, "loss": 0.6561319828033447, "step": 6577 }, { "epoch": 1.515668202764977, "grad_norm": 1.1887729560806304, "learning_rate": 3.041610776061813e-07, "loss": 0.7437188029289246, "step": 6578 }, { "epoch": 1.5158986175115208, "grad_norm": 1.2420532978964127, "learning_rate": 3.0388750633477766e-07, "loss": 0.7429096698760986, "step": 6579 }, { "epoch": 1.5161290322580645, "grad_norm": 1.3505114972693866, "learning_rate": 3.0361403610318125e-07, "loss": 0.859411358833313, "step": 6580 }, { "epoch": 1.5163594470046085, "grad_norm": 0.9758931256825946, "learning_rate": 3.0334066695108565e-07, "loss": 0.7636305093765259, "step": 6581 }, { "epoch": 1.516589861751152, "grad_norm": 1.1796162666849943, "learning_rate": 3.030673989181699e-07, "loss": 0.8331989049911499, "step": 6582 }, { "epoch": 1.516820276497696, "grad_norm": 1.0763217337155384, "learning_rate": 3.0279423204409857e-07, "loss": 0.770574688911438, "step": 6583 }, { "epoch": 1.5170506912442396, "grad_norm": 1.3524367915089308, "learning_rate": 3.025211663685213e-07, "loss": 0.7470898628234863, "step": 6584 }, { "epoch": 1.5172811059907834, "grad_norm": 1.2515745730030696, "learning_rate": 3.022482019310736e-07, "loss": 0.7907510995864868, "step": 6585 }, { "epoch": 1.5175115207373273, "grad_norm": 1.1087989572536945, "learning_rate": 3.019753387713757e-07, "loss": 0.751417338848114, "step": 6586 }, { "epoch": 1.5177419354838708, "grad_norm": 1.3862652872284045, "learning_rate": 3.01702576929033e-07, "loss": 0.8987867832183838, "step": 6587 }, { "epoch": 1.5179723502304148, "grad_norm": 1.2098170472034613, "learning_rate": 3.0142991644363714e-07, "loss": 0.7618268728256226, "step": 6588 }, { "epoch": 1.5182027649769585, "grad_norm": 1.4029958928912587, "learning_rate": 3.011573573547641e-07, "loss": 0.9358207583427429, "step": 6589 }, { "epoch": 1.5184331797235022, "grad_norm": 1.4434031985489326, "learning_rate": 3.008848997019753e-07, "loss": 0.6549144387245178, "step": 6590 }, { "epoch": 1.5186635944700462, "grad_norm": 1.293720092884626, "learning_rate": 3.00612543524818e-07, "loss": 0.8642100095748901, "step": 6591 }, { "epoch": 1.51889400921659, "grad_norm": 1.2852982676947153, "learning_rate": 3.003402888628241e-07, "loss": 0.7348824143409729, "step": 6592 }, { "epoch": 1.5191244239631336, "grad_norm": 1.0897732641421132, "learning_rate": 3.000681357555108e-07, "loss": 0.8737039566040039, "step": 6593 }, { "epoch": 1.5193548387096776, "grad_norm": 1.3095413820866733, "learning_rate": 2.9979608424238134e-07, "loss": 0.749860405921936, "step": 6594 }, { "epoch": 1.519585253456221, "grad_norm": 1.4291988493830527, "learning_rate": 2.99524134362923e-07, "loss": 0.7583779096603394, "step": 6595 }, { "epoch": 1.519815668202765, "grad_norm": 1.1886499728868618, "learning_rate": 2.992522861566095e-07, "loss": 0.7096224427223206, "step": 6596 }, { "epoch": 1.5200460829493088, "grad_norm": 1.3265073494412316, "learning_rate": 2.9898053966289904e-07, "loss": 0.7813585996627808, "step": 6597 }, { "epoch": 1.5202764976958525, "grad_norm": 1.3753919073529044, "learning_rate": 2.9870889492123517e-07, "loss": 0.7744605541229248, "step": 6598 }, { "epoch": 1.5205069124423964, "grad_norm": 1.4661404938087315, "learning_rate": 2.984373519710469e-07, "loss": 0.8398552536964417, "step": 6599 }, { "epoch": 1.52073732718894, "grad_norm": 1.1837780856173943, "learning_rate": 2.981659108517478e-07, "loss": 0.6853294372558594, "step": 6600 }, { "epoch": 1.520967741935484, "grad_norm": 0.9892560165373243, "learning_rate": 2.97894571602738e-07, "loss": 0.7673987150192261, "step": 6601 }, { "epoch": 1.5211981566820276, "grad_norm": 1.0638042713840496, "learning_rate": 2.976233342634017e-07, "loss": 0.7000377774238586, "step": 6602 }, { "epoch": 1.5214285714285714, "grad_norm": 1.2089273111808856, "learning_rate": 2.9735219887310857e-07, "loss": 0.8429346680641174, "step": 6603 }, { "epoch": 1.5216589861751153, "grad_norm": 1.4255685153178952, "learning_rate": 2.970811654712133e-07, "loss": 0.9118648767471313, "step": 6604 }, { "epoch": 1.521889400921659, "grad_norm": 1.0974145188834663, "learning_rate": 2.9681023409705666e-07, "loss": 0.7745784521102905, "step": 6605 }, { "epoch": 1.5221198156682028, "grad_norm": 1.234720575381531, "learning_rate": 2.9653940478996367e-07, "loss": 0.8481245040893555, "step": 6606 }, { "epoch": 1.5223502304147467, "grad_norm": 1.1446582960275502, "learning_rate": 2.9626867758924436e-07, "loss": 0.8643463850021362, "step": 6607 }, { "epoch": 1.5225806451612902, "grad_norm": 1.6406368897457513, "learning_rate": 2.959980525341953e-07, "loss": 0.9524952173233032, "step": 6608 }, { "epoch": 1.5228110599078342, "grad_norm": 1.067119300713527, "learning_rate": 2.9572752966409686e-07, "loss": 0.7153829336166382, "step": 6609 }, { "epoch": 1.523041474654378, "grad_norm": 1.1739681134356785, "learning_rate": 2.954571090182149e-07, "loss": 0.8332774639129639, "step": 6610 }, { "epoch": 1.5232718894009216, "grad_norm": 1.3773090684366749, "learning_rate": 2.9518679063580123e-07, "loss": 0.7511743307113647, "step": 6611 }, { "epoch": 1.5235023041474656, "grad_norm": 1.2327774867248482, "learning_rate": 2.9491657455609175e-07, "loss": 0.715233325958252, "step": 6612 }, { "epoch": 1.523732718894009, "grad_norm": 1.139323635074032, "learning_rate": 2.946464608183078e-07, "loss": 0.7386246919631958, "step": 6613 }, { "epoch": 1.523963133640553, "grad_norm": 1.1904592003911236, "learning_rate": 2.943764494616565e-07, "loss": 0.8337790369987488, "step": 6614 }, { "epoch": 1.5241935483870968, "grad_norm": 1.394927398157402, "learning_rate": 2.941065405253296e-07, "loss": 0.8447855710983276, "step": 6615 }, { "epoch": 1.5244239631336405, "grad_norm": 1.1307960049130217, "learning_rate": 2.938367340485035e-07, "loss": 0.7430610060691833, "step": 6616 }, { "epoch": 1.5246543778801844, "grad_norm": 1.134552871583557, "learning_rate": 2.9356703007034087e-07, "loss": 0.7740806937217712, "step": 6617 }, { "epoch": 1.5248847926267282, "grad_norm": 1.2516085920875086, "learning_rate": 2.9329742862998875e-07, "loss": 0.7824152708053589, "step": 6618 }, { "epoch": 1.5251152073732719, "grad_norm": 1.0852675062610386, "learning_rate": 2.930279297665792e-07, "loss": 0.9222463965415955, "step": 6619 }, { "epoch": 1.5253456221198156, "grad_norm": 1.8096931577931101, "learning_rate": 2.927585335192294e-07, "loss": 0.9548497200012207, "step": 6620 }, { "epoch": 1.5255760368663593, "grad_norm": 1.497275795232007, "learning_rate": 2.9248923992704255e-07, "loss": 0.9007906913757324, "step": 6621 }, { "epoch": 1.5258064516129033, "grad_norm": 1.0647051889661132, "learning_rate": 2.9222004902910593e-07, "loss": 0.6932169198989868, "step": 6622 }, { "epoch": 1.526036866359447, "grad_norm": 0.9763599663388729, "learning_rate": 2.919509608644922e-07, "loss": 0.7327853441238403, "step": 6623 }, { "epoch": 1.5262672811059907, "grad_norm": 1.423305414970627, "learning_rate": 2.916819754722588e-07, "loss": 0.617963433265686, "step": 6624 }, { "epoch": 1.5264976958525347, "grad_norm": 1.3790687935494703, "learning_rate": 2.914130928914493e-07, "loss": 1.0567349195480347, "step": 6625 }, { "epoch": 1.5267281105990782, "grad_norm": 1.243824261339929, "learning_rate": 2.9114431316109145e-07, "loss": 0.7362378835678101, "step": 6626 }, { "epoch": 1.5269585253456222, "grad_norm": 1.1636178458595106, "learning_rate": 2.9087563632019774e-07, "loss": 0.6879991888999939, "step": 6627 }, { "epoch": 1.5271889400921659, "grad_norm": 1.2540530060828472, "learning_rate": 2.9060706240776686e-07, "loss": 0.7804177403450012, "step": 6628 }, { "epoch": 1.5274193548387096, "grad_norm": 1.2450061818881997, "learning_rate": 2.9033859146278197e-07, "loss": 0.7459548711776733, "step": 6629 }, { "epoch": 1.5276497695852536, "grad_norm": 1.1214229491247267, "learning_rate": 2.900702235242106e-07, "loss": 0.7392233610153198, "step": 6630 }, { "epoch": 1.5278801843317973, "grad_norm": 1.0862664338119448, "learning_rate": 2.8980195863100675e-07, "loss": 0.6956135034561157, "step": 6631 }, { "epoch": 1.528110599078341, "grad_norm": 1.1232709572579735, "learning_rate": 2.8953379682210856e-07, "loss": 0.7042561769485474, "step": 6632 }, { "epoch": 1.5283410138248847, "grad_norm": 1.070241779197473, "learning_rate": 2.8926573813643884e-07, "loss": 0.7114298343658447, "step": 6633 }, { "epoch": 1.5285714285714285, "grad_norm": 1.0297537166419386, "learning_rate": 2.8899778261290664e-07, "loss": 0.862826943397522, "step": 6634 }, { "epoch": 1.5288018433179724, "grad_norm": 1.3240716498057261, "learning_rate": 2.8872993029040506e-07, "loss": 0.8229889869689941, "step": 6635 }, { "epoch": 1.5290322580645161, "grad_norm": 1.2292174291080764, "learning_rate": 2.884621812078122e-07, "loss": 0.8058778047561646, "step": 6636 }, { "epoch": 1.5292626728110599, "grad_norm": 1.2782782809475366, "learning_rate": 2.881945354039921e-07, "loss": 0.8150385618209839, "step": 6637 }, { "epoch": 1.5294930875576038, "grad_norm": 1.1137449533588037, "learning_rate": 2.8792699291779276e-07, "loss": 0.7067136168479919, "step": 6638 }, { "epoch": 1.5297235023041473, "grad_norm": 1.2793329729310776, "learning_rate": 2.8765955378804784e-07, "loss": 0.7725155353546143, "step": 6639 }, { "epoch": 1.5299539170506913, "grad_norm": 1.0584861581127705, "learning_rate": 2.873922180535754e-07, "loss": 0.5956720113754272, "step": 6640 }, { "epoch": 1.530184331797235, "grad_norm": 1.1955034677005214, "learning_rate": 2.8712498575317934e-07, "loss": 0.6506170630455017, "step": 6641 }, { "epoch": 1.5304147465437787, "grad_norm": 1.0781697188392338, "learning_rate": 2.86857856925648e-07, "loss": 0.7860926985740662, "step": 6642 }, { "epoch": 1.5306451612903227, "grad_norm": 1.1840723689685375, "learning_rate": 2.8659083160975464e-07, "loss": 0.7003993391990662, "step": 6643 }, { "epoch": 1.5308755760368664, "grad_norm": 1.1562706768971642, "learning_rate": 2.8632390984425746e-07, "loss": 0.6887079477310181, "step": 6644 }, { "epoch": 1.5311059907834101, "grad_norm": 1.243117329825752, "learning_rate": 2.860570916678998e-07, "loss": 0.788282036781311, "step": 6645 }, { "epoch": 1.5313364055299539, "grad_norm": 1.273283187040626, "learning_rate": 2.8579037711941043e-07, "loss": 0.771350085735321, "step": 6646 }, { "epoch": 1.5315668202764976, "grad_norm": 1.1000030346921834, "learning_rate": 2.855237662375021e-07, "loss": 0.6418509483337402, "step": 6647 }, { "epoch": 1.5317972350230415, "grad_norm": 1.022873677691871, "learning_rate": 2.852572590608735e-07, "loss": 0.6606692671775818, "step": 6648 }, { "epoch": 1.5320276497695853, "grad_norm": 1.4727879897773712, "learning_rate": 2.849908556282076e-07, "loss": 0.8623934984207153, "step": 6649 }, { "epoch": 1.532258064516129, "grad_norm": 1.1678986803146219, "learning_rate": 2.8472455597817215e-07, "loss": 0.848737359046936, "step": 6650 }, { "epoch": 1.532488479262673, "grad_norm": 1.2265451299303025, "learning_rate": 2.844583601494207e-07, "loss": 0.7156505584716797, "step": 6651 }, { "epoch": 1.5327188940092165, "grad_norm": 1.157360063816448, "learning_rate": 2.8419226818059116e-07, "loss": 0.598319411277771, "step": 6652 }, { "epoch": 1.5329493087557604, "grad_norm": 1.0128877845083564, "learning_rate": 2.8392628011030585e-07, "loss": 0.6320680379867554, "step": 6653 }, { "epoch": 1.5331797235023041, "grad_norm": 1.2437383042471344, "learning_rate": 2.836603959771734e-07, "loss": 0.8770536184310913, "step": 6654 }, { "epoch": 1.5334101382488479, "grad_norm": 1.3327586940769975, "learning_rate": 2.833946158197862e-07, "loss": 0.896265983581543, "step": 6655 }, { "epoch": 1.5336405529953918, "grad_norm": 1.1058301341236145, "learning_rate": 2.8312893967672145e-07, "loss": 0.7194868326187134, "step": 6656 }, { "epoch": 1.5338709677419353, "grad_norm": 1.1479450761132848, "learning_rate": 2.828633675865425e-07, "loss": 0.7993383407592773, "step": 6657 }, { "epoch": 1.5341013824884793, "grad_norm": 1.3252275312162691, "learning_rate": 2.8259789958779635e-07, "loss": 0.6808127760887146, "step": 6658 }, { "epoch": 1.534331797235023, "grad_norm": 1.3083456260381565, "learning_rate": 2.823325357190153e-07, "loss": 0.7348822355270386, "step": 6659 }, { "epoch": 1.5345622119815667, "grad_norm": 1.4520629186425333, "learning_rate": 2.820672760187166e-07, "loss": 0.7729920744895935, "step": 6660 }, { "epoch": 1.5347926267281107, "grad_norm": 1.1927593175103235, "learning_rate": 2.818021205254021e-07, "loss": 0.803922176361084, "step": 6661 }, { "epoch": 1.5350230414746544, "grad_norm": 1.1316086785563555, "learning_rate": 2.815370692775594e-07, "loss": 0.7931007146835327, "step": 6662 }, { "epoch": 1.5352534562211981, "grad_norm": 0.9381855495475373, "learning_rate": 2.8127212231365995e-07, "loss": 0.7990511655807495, "step": 6663 }, { "epoch": 1.535483870967742, "grad_norm": 1.1449374360466444, "learning_rate": 2.8100727967216043e-07, "loss": 0.8163471817970276, "step": 6664 }, { "epoch": 1.5357142857142856, "grad_norm": 1.126530672311672, "learning_rate": 2.8074254139150225e-07, "loss": 0.7628358602523804, "step": 6665 }, { "epoch": 1.5359447004608295, "grad_norm": 1.216707261403855, "learning_rate": 2.8047790751011216e-07, "loss": 0.8008173704147339, "step": 6666 }, { "epoch": 1.5361751152073733, "grad_norm": 1.4385072008960633, "learning_rate": 2.802133780664013e-07, "loss": 0.9139487743377686, "step": 6667 }, { "epoch": 1.536405529953917, "grad_norm": 1.479452922561271, "learning_rate": 2.7994895309876555e-07, "loss": 0.9436901211738586, "step": 6668 }, { "epoch": 1.536635944700461, "grad_norm": 1.1137684825301204, "learning_rate": 2.7968463264558617e-07, "loss": 0.8072221875190735, "step": 6669 }, { "epoch": 1.5368663594470044, "grad_norm": 1.4031563621096825, "learning_rate": 2.7942041674522866e-07, "loss": 0.7434822916984558, "step": 6670 }, { "epoch": 1.5370967741935484, "grad_norm": 1.1245525381043615, "learning_rate": 2.7915630543604394e-07, "loss": 0.6729850769042969, "step": 6671 }, { "epoch": 1.5373271889400921, "grad_norm": 1.2279789151687839, "learning_rate": 2.7889229875636723e-07, "loss": 0.8752315044403076, "step": 6672 }, { "epoch": 1.5375576036866359, "grad_norm": 1.2125823370266373, "learning_rate": 2.786283967445184e-07, "loss": 0.8519413471221924, "step": 6673 }, { "epoch": 1.5377880184331798, "grad_norm": 1.2674824603159123, "learning_rate": 2.783645994388032e-07, "loss": 0.8868448734283447, "step": 6674 }, { "epoch": 1.5380184331797235, "grad_norm": 1.2984993367707722, "learning_rate": 2.78100906877511e-07, "loss": 0.9223456978797913, "step": 6675 }, { "epoch": 1.5382488479262673, "grad_norm": 1.0080180068423799, "learning_rate": 2.7783731909891616e-07, "loss": 0.799191951751709, "step": 6676 }, { "epoch": 1.5384792626728112, "grad_norm": 1.1987572506109172, "learning_rate": 2.775738361412788e-07, "loss": 0.7092995643615723, "step": 6677 }, { "epoch": 1.5387096774193547, "grad_norm": 1.2206610409098804, "learning_rate": 2.7731045804284283e-07, "loss": 0.674687385559082, "step": 6678 }, { "epoch": 1.5389400921658987, "grad_norm": 1.4910052625734944, "learning_rate": 2.77047184841837e-07, "loss": 0.7366930246353149, "step": 6679 }, { "epoch": 1.5391705069124424, "grad_norm": 1.169385374165895, "learning_rate": 2.767840165764753e-07, "loss": 0.838137149810791, "step": 6680 }, { "epoch": 1.5394009216589861, "grad_norm": 1.2120746756764942, "learning_rate": 2.765209532849558e-07, "loss": 0.7507175803184509, "step": 6681 }, { "epoch": 1.53963133640553, "grad_norm": 1.2981666739842812, "learning_rate": 2.7625799500546267e-07, "loss": 0.8157602548599243, "step": 6682 }, { "epoch": 1.5398617511520736, "grad_norm": 1.2345607869860449, "learning_rate": 2.7599514177616333e-07, "loss": 0.7779219150543213, "step": 6683 }, { "epoch": 1.5400921658986175, "grad_norm": 1.186692939443946, "learning_rate": 2.757323936352106e-07, "loss": 0.8261638879776001, "step": 6684 }, { "epoch": 1.5403225806451613, "grad_norm": 0.8917527422638705, "learning_rate": 2.7546975062074197e-07, "loss": 0.6139177680015564, "step": 6685 }, { "epoch": 1.540552995391705, "grad_norm": 1.0945474995666544, "learning_rate": 2.752072127708802e-07, "loss": 0.744202733039856, "step": 6686 }, { "epoch": 1.540783410138249, "grad_norm": 1.279582503351568, "learning_rate": 2.749447801237319e-07, "loss": 0.7685158848762512, "step": 6687 }, { "epoch": 1.5410138248847927, "grad_norm": 1.4134776465364736, "learning_rate": 2.7468245271738865e-07, "loss": 0.7483633756637573, "step": 6688 }, { "epoch": 1.5412442396313364, "grad_norm": 1.4452963556936742, "learning_rate": 2.7442023058992746e-07, "loss": 0.8967286348342896, "step": 6689 }, { "epoch": 1.5414746543778803, "grad_norm": 3.4447797406152922, "learning_rate": 2.7415811377940933e-07, "loss": 0.8035085201263428, "step": 6690 }, { "epoch": 1.5417050691244238, "grad_norm": 1.2535208224880003, "learning_rate": 2.738961023238798e-07, "loss": 0.8504149913787842, "step": 6691 }, { "epoch": 1.5419354838709678, "grad_norm": 1.408249398601243, "learning_rate": 2.736341962613701e-07, "loss": 0.7612431049346924, "step": 6692 }, { "epoch": 1.5421658986175115, "grad_norm": 1.3117649202054886, "learning_rate": 2.733723956298951e-07, "loss": 0.6974390745162964, "step": 6693 }, { "epoch": 1.5423963133640552, "grad_norm": 1.10015572050179, "learning_rate": 2.7311070046745476e-07, "loss": 0.7946817874908447, "step": 6694 }, { "epoch": 1.5426267281105992, "grad_norm": 1.3598767034128523, "learning_rate": 2.728491108120342e-07, "loss": 0.7801793813705444, "step": 6695 }, { "epoch": 1.5428571428571427, "grad_norm": 1.0989233619042245, "learning_rate": 2.725876267016023e-07, "loss": 0.720335066318512, "step": 6696 }, { "epoch": 1.5430875576036867, "grad_norm": 0.9331707903973574, "learning_rate": 2.7232624817411376e-07, "loss": 0.6820393800735474, "step": 6697 }, { "epoch": 1.5433179723502304, "grad_norm": 1.2636082158419006, "learning_rate": 2.7206497526750694e-07, "loss": 0.8217613697052002, "step": 6698 }, { "epoch": 1.543548387096774, "grad_norm": 1.2388683954169015, "learning_rate": 2.7180380801970525e-07, "loss": 0.7600520849227905, "step": 6699 }, { "epoch": 1.543778801843318, "grad_norm": 1.2564669684453122, "learning_rate": 2.7154274646861687e-07, "loss": 0.9402344226837158, "step": 6700 }, { "epoch": 1.5440092165898618, "grad_norm": 1.0720415723340906, "learning_rate": 2.7128179065213417e-07, "loss": 0.7470760345458984, "step": 6701 }, { "epoch": 1.5442396313364055, "grad_norm": 1.0091593723711232, "learning_rate": 2.710209406081353e-07, "loss": 0.6915948390960693, "step": 6702 }, { "epoch": 1.5444700460829495, "grad_norm": 1.1829806437851378, "learning_rate": 2.707601963744817e-07, "loss": 0.7554904222488403, "step": 6703 }, { "epoch": 1.544700460829493, "grad_norm": 0.9892324198221251, "learning_rate": 2.7049955798902026e-07, "loss": 0.8197575807571411, "step": 6704 }, { "epoch": 1.544930875576037, "grad_norm": 1.3144339350992138, "learning_rate": 2.702390254895819e-07, "loss": 0.7106794118881226, "step": 6705 }, { "epoch": 1.5451612903225806, "grad_norm": 1.1715761852419602, "learning_rate": 2.699785989139832e-07, "loss": 0.6320512294769287, "step": 6706 }, { "epoch": 1.5453917050691244, "grad_norm": 1.2156391686389374, "learning_rate": 2.697182783000246e-07, "loss": 0.8327566385269165, "step": 6707 }, { "epoch": 1.5456221198156683, "grad_norm": 1.2605126330062313, "learning_rate": 2.6945806368549063e-07, "loss": 0.8732178211212158, "step": 6708 }, { "epoch": 1.5458525345622118, "grad_norm": 1.3881676599881438, "learning_rate": 2.69197955108152e-07, "loss": 0.8709380626678467, "step": 6709 }, { "epoch": 1.5460829493087558, "grad_norm": 1.2029107229444744, "learning_rate": 2.689379526057628e-07, "loss": 0.7821739912033081, "step": 6710 }, { "epoch": 1.5463133640552995, "grad_norm": 1.2268892680878298, "learning_rate": 2.686780562160615e-07, "loss": 0.8658162355422974, "step": 6711 }, { "epoch": 1.5465437788018432, "grad_norm": 0.9914521746084854, "learning_rate": 2.6841826597677274e-07, "loss": 0.6354731321334839, "step": 6712 }, { "epoch": 1.5467741935483872, "grad_norm": 1.132983970089502, "learning_rate": 2.68158581925604e-07, "loss": 0.8000082969665527, "step": 6713 }, { "epoch": 1.547004608294931, "grad_norm": 1.0140012222754493, "learning_rate": 2.6789900410024804e-07, "loss": 0.7998030185699463, "step": 6714 }, { "epoch": 1.5472350230414746, "grad_norm": 1.2207312006862205, "learning_rate": 2.676395325383827e-07, "loss": 0.861609935760498, "step": 6715 }, { "epoch": 1.5474654377880186, "grad_norm": 1.2739007648131329, "learning_rate": 2.6738016727766976e-07, "loss": 0.8119577765464783, "step": 6716 }, { "epoch": 1.547695852534562, "grad_norm": 1.1272023201701244, "learning_rate": 2.671209083557553e-07, "loss": 0.7704594135284424, "step": 6717 }, { "epoch": 1.547926267281106, "grad_norm": 1.1924986504981143, "learning_rate": 2.6686175581027114e-07, "loss": 0.7577236890792847, "step": 6718 }, { "epoch": 1.5481566820276498, "grad_norm": 1.438095427566863, "learning_rate": 2.666027096788326e-07, "loss": 0.8362265825271606, "step": 6719 }, { "epoch": 1.5483870967741935, "grad_norm": 1.3282450269784174, "learning_rate": 2.6634376999903984e-07, "loss": 0.7604315280914307, "step": 6720 }, { "epoch": 1.5486175115207375, "grad_norm": 1.0996855935996066, "learning_rate": 2.6608493680847757e-07, "loss": 0.7181323766708374, "step": 6721 }, { "epoch": 1.548847926267281, "grad_norm": 1.408245929611007, "learning_rate": 2.6582621014471495e-07, "loss": 0.8613896369934082, "step": 6722 }, { "epoch": 1.549078341013825, "grad_norm": 1.1355853758662044, "learning_rate": 2.6556759004530616e-07, "loss": 0.6254151463508606, "step": 6723 }, { "epoch": 1.5493087557603686, "grad_norm": 1.1737642272227355, "learning_rate": 2.6530907654778957e-07, "loss": 0.7960973381996155, "step": 6724 }, { "epoch": 1.5495391705069124, "grad_norm": 1.1419390810119388, "learning_rate": 2.6505066968968747e-07, "loss": 0.7899094820022583, "step": 6725 }, { "epoch": 1.5497695852534563, "grad_norm": 0.9820941780775652, "learning_rate": 2.647923695085081e-07, "loss": 0.6578950881958008, "step": 6726 }, { "epoch": 1.55, "grad_norm": 1.3013325638388529, "learning_rate": 2.64534176041743e-07, "loss": 0.737798810005188, "step": 6727 }, { "epoch": 1.5502304147465438, "grad_norm": 0.9487414790323747, "learning_rate": 2.642760893268684e-07, "loss": 0.7809627056121826, "step": 6728 }, { "epoch": 1.5504608294930877, "grad_norm": 0.9991258167716155, "learning_rate": 2.640181094013456e-07, "loss": 0.6693655252456665, "step": 6729 }, { "epoch": 1.5506912442396312, "grad_norm": 0.8705752911958233, "learning_rate": 2.6376023630262003e-07, "loss": 0.7264609932899475, "step": 6730 }, { "epoch": 1.5509216589861752, "grad_norm": 1.0975251127061347, "learning_rate": 2.635024700681211e-07, "loss": 0.7585712671279907, "step": 6731 }, { "epoch": 1.551152073732719, "grad_norm": 1.520332751892112, "learning_rate": 2.6324481073526404e-07, "loss": 0.7335324287414551, "step": 6732 }, { "epoch": 1.5513824884792626, "grad_norm": 1.1271215778218124, "learning_rate": 2.629872583414473e-07, "loss": 0.835372805595398, "step": 6733 }, { "epoch": 1.5516129032258066, "grad_norm": 1.231737661164668, "learning_rate": 2.6272981292405405e-07, "loss": 0.8069926500320435, "step": 6734 }, { "epoch": 1.55184331797235, "grad_norm": 1.2110282300687614, "learning_rate": 2.6247247452045285e-07, "loss": 0.7548434138298035, "step": 6735 }, { "epoch": 1.552073732718894, "grad_norm": 1.281837931597139, "learning_rate": 2.6221524316799546e-07, "loss": 0.6907505989074707, "step": 6736 }, { "epoch": 1.5523041474654378, "grad_norm": 1.2384070012918627, "learning_rate": 2.619581189040185e-07, "loss": 0.8544988632202148, "step": 6737 }, { "epoch": 1.5525345622119815, "grad_norm": 1.024260684065218, "learning_rate": 2.6170110176584404e-07, "loss": 0.7176710367202759, "step": 6738 }, { "epoch": 1.5527649769585254, "grad_norm": 1.1771656195687117, "learning_rate": 2.6144419179077715e-07, "loss": 0.7160323858261108, "step": 6739 }, { "epoch": 1.5529953917050692, "grad_norm": 1.2619778254885654, "learning_rate": 2.6118738901610806e-07, "loss": 0.7749248743057251, "step": 6740 }, { "epoch": 1.553225806451613, "grad_norm": 1.3014936029444653, "learning_rate": 2.6093069347911145e-07, "loss": 0.7701436281204224, "step": 6741 }, { "epoch": 1.5534562211981566, "grad_norm": 1.2206842608778186, "learning_rate": 2.606741052170459e-07, "loss": 0.6725181341171265, "step": 6742 }, { "epoch": 1.5536866359447004, "grad_norm": 1.0193653205430255, "learning_rate": 2.6041762426715563e-07, "loss": 0.7730624675750732, "step": 6743 }, { "epoch": 1.5539170506912443, "grad_norm": 0.9417911057706564, "learning_rate": 2.601612506666682e-07, "loss": 0.7083867788314819, "step": 6744 }, { "epoch": 1.554147465437788, "grad_norm": 1.1436343405561136, "learning_rate": 2.599049844527953e-07, "loss": 0.7680408954620361, "step": 6745 }, { "epoch": 1.5543778801843318, "grad_norm": 0.9401611092461176, "learning_rate": 2.596488256627346e-07, "loss": 0.7145194411277771, "step": 6746 }, { "epoch": 1.5546082949308757, "grad_norm": 1.6305632532659482, "learning_rate": 2.593927743336667e-07, "loss": 0.8626812696456909, "step": 6747 }, { "epoch": 1.5548387096774192, "grad_norm": 1.1326626029703477, "learning_rate": 2.591368305027569e-07, "loss": 0.775201678276062, "step": 6748 }, { "epoch": 1.5550691244239632, "grad_norm": 1.1775115850016065, "learning_rate": 2.588809942071557e-07, "loss": 0.9363858699798584, "step": 6749 }, { "epoch": 1.555299539170507, "grad_norm": 1.0406152793499837, "learning_rate": 2.5862526548399697e-07, "loss": 0.8079385757446289, "step": 6750 }, { "epoch": 1.5555299539170506, "grad_norm": 1.2405408742249928, "learning_rate": 2.5836964437039934e-07, "loss": 0.8635082840919495, "step": 6751 }, { "epoch": 1.5557603686635946, "grad_norm": 1.072904507718934, "learning_rate": 2.581141309034662e-07, "loss": 0.7840827703475952, "step": 6752 }, { "epoch": 1.5559907834101383, "grad_norm": 1.202200191511419, "learning_rate": 2.5785872512028497e-07, "loss": 0.7833336591720581, "step": 6753 }, { "epoch": 1.556221198156682, "grad_norm": 1.2301348726534915, "learning_rate": 2.576034270579269e-07, "loss": 0.7340226173400879, "step": 6754 }, { "epoch": 1.5564516129032258, "grad_norm": 0.9782804135142905, "learning_rate": 2.5734823675344895e-07, "loss": 0.6423541307449341, "step": 6755 }, { "epoch": 1.5566820276497695, "grad_norm": 1.1992594758940591, "learning_rate": 2.570931542438913e-07, "loss": 0.7772454619407654, "step": 6756 }, { "epoch": 1.5569124423963134, "grad_norm": 1.192101331643462, "learning_rate": 2.568381795662785e-07, "loss": 0.8113390803337097, "step": 6757 }, { "epoch": 1.5571428571428572, "grad_norm": 1.1257023205339645, "learning_rate": 2.5658331275762045e-07, "loss": 0.6688467264175415, "step": 6758 }, { "epoch": 1.557373271889401, "grad_norm": 1.0966214019602503, "learning_rate": 2.5632855385491037e-07, "loss": 0.8140766620635986, "step": 6759 }, { "epoch": 1.5576036866359448, "grad_norm": 1.0260387911312179, "learning_rate": 2.560739028951262e-07, "loss": 0.7661154270172119, "step": 6760 }, { "epoch": 1.5578341013824883, "grad_norm": 1.2298722431512563, "learning_rate": 2.558193599152302e-07, "loss": 0.6781749725341797, "step": 6761 }, { "epoch": 1.5580645161290323, "grad_norm": 1.62266115954538, "learning_rate": 2.5556492495216865e-07, "loss": 0.8885331749916077, "step": 6762 }, { "epoch": 1.558294930875576, "grad_norm": 1.3197551931331304, "learning_rate": 2.55310598042873e-07, "loss": 0.799277663230896, "step": 6763 }, { "epoch": 1.5585253456221198, "grad_norm": 1.205426943239231, "learning_rate": 2.550563792242583e-07, "loss": 0.8288404941558838, "step": 6764 }, { "epoch": 1.5587557603686637, "grad_norm": 1.1206026594489704, "learning_rate": 2.5480226853322397e-07, "loss": 0.9452340602874756, "step": 6765 }, { "epoch": 1.5589861751152074, "grad_norm": 1.068059951967386, "learning_rate": 2.5454826600665347e-07, "loss": 0.6716231107711792, "step": 6766 }, { "epoch": 1.5592165898617512, "grad_norm": 0.9885922984637816, "learning_rate": 2.542943716814157e-07, "loss": 0.90239417552948, "step": 6767 }, { "epoch": 1.5594470046082949, "grad_norm": 1.306788685526263, "learning_rate": 2.5404058559436225e-07, "loss": 0.7895521521568298, "step": 6768 }, { "epoch": 1.5596774193548386, "grad_norm": 1.1707304874415911, "learning_rate": 2.537869077823307e-07, "loss": 0.8097352385520935, "step": 6769 }, { "epoch": 1.5599078341013826, "grad_norm": 1.2075274904697726, "learning_rate": 2.535333382821415e-07, "loss": 0.7599455118179321, "step": 6770 }, { "epoch": 1.5601382488479263, "grad_norm": 1.3869678105449568, "learning_rate": 2.5327987713059986e-07, "loss": 0.8735921382904053, "step": 6771 }, { "epoch": 1.56036866359447, "grad_norm": 1.3359870563601237, "learning_rate": 2.530265243644958e-07, "loss": 0.7263825535774231, "step": 6772 }, { "epoch": 1.560599078341014, "grad_norm": 1.2240386443766704, "learning_rate": 2.5277328002060296e-07, "loss": 0.8642966747283936, "step": 6773 }, { "epoch": 1.5608294930875575, "grad_norm": 0.9860249164323385, "learning_rate": 2.525201441356789e-07, "loss": 0.6928948163986206, "step": 6774 }, { "epoch": 1.5610599078341014, "grad_norm": 1.0977742625281808, "learning_rate": 2.522671167464667e-07, "loss": 0.7841427326202393, "step": 6775 }, { "epoch": 1.5612903225806452, "grad_norm": 1.102415991736206, "learning_rate": 2.5201419788969267e-07, "loss": 0.6539766192436218, "step": 6776 }, { "epoch": 1.5615207373271889, "grad_norm": 1.3087388284847004, "learning_rate": 2.5176138760206734e-07, "loss": 0.7817956805229187, "step": 6777 }, { "epoch": 1.5617511520737328, "grad_norm": 1.2176256508295467, "learning_rate": 2.5150868592028626e-07, "loss": 0.7847198843955994, "step": 6778 }, { "epoch": 1.5619815668202763, "grad_norm": 1.1618742898915668, "learning_rate": 2.5125609288102856e-07, "loss": 0.8248952627182007, "step": 6779 }, { "epoch": 1.5622119815668203, "grad_norm": 1.315546210832164, "learning_rate": 2.510036085209578e-07, "loss": 0.8099820613861084, "step": 6780 }, { "epoch": 1.562442396313364, "grad_norm": 1.0605499181430498, "learning_rate": 2.5075123287672173e-07, "loss": 0.7764754295349121, "step": 6781 }, { "epoch": 1.5626728110599077, "grad_norm": 1.376636441531992, "learning_rate": 2.5049896598495234e-07, "loss": 0.8055214285850525, "step": 6782 }, { "epoch": 1.5629032258064517, "grad_norm": 1.2086842805836235, "learning_rate": 2.502468078822656e-07, "loss": 0.7536123991012573, "step": 6783 }, { "epoch": 1.5631336405529954, "grad_norm": 1.3330366284043236, "learning_rate": 2.499947586052623e-07, "loss": 0.8212461471557617, "step": 6784 }, { "epoch": 1.5633640552995391, "grad_norm": 1.087165735027238, "learning_rate": 2.49742818190527e-07, "loss": 0.7297977209091187, "step": 6785 }, { "epoch": 1.563594470046083, "grad_norm": 1.3633339944793545, "learning_rate": 2.494909866746282e-07, "loss": 0.752082109451294, "step": 6786 }, { "epoch": 1.5638248847926266, "grad_norm": 1.231077416550479, "learning_rate": 2.4923926409411934e-07, "loss": 0.9181928634643555, "step": 6787 }, { "epoch": 1.5640552995391706, "grad_norm": 1.263799738870316, "learning_rate": 2.489876504855374e-07, "loss": 0.8607058525085449, "step": 6788 }, { "epoch": 1.5642857142857143, "grad_norm": 1.4864085600196295, "learning_rate": 2.4873614588540347e-07, "loss": 0.9659625887870789, "step": 6789 }, { "epoch": 1.564516129032258, "grad_norm": 1.7132447669994355, "learning_rate": 2.4848475033022377e-07, "loss": 0.8357822299003601, "step": 6790 }, { "epoch": 1.564746543778802, "grad_norm": 1.4493565138453182, "learning_rate": 2.482334638564877e-07, "loss": 0.7871281504631042, "step": 6791 }, { "epoch": 1.5649769585253455, "grad_norm": 0.9644716518923556, "learning_rate": 2.4798228650066874e-07, "loss": 0.7221591472625732, "step": 6792 }, { "epoch": 1.5652073732718894, "grad_norm": 1.217051022182652, "learning_rate": 2.4773121829922586e-07, "loss": 0.7399123907089233, "step": 6793 }, { "epoch": 1.5654377880184331, "grad_norm": 1.6036073035934815, "learning_rate": 2.474802592886003e-07, "loss": 0.8159279227256775, "step": 6794 }, { "epoch": 1.5656682027649769, "grad_norm": 1.208678395846015, "learning_rate": 2.472294095052192e-07, "loss": 0.8222753405570984, "step": 6795 }, { "epoch": 1.5658986175115208, "grad_norm": 1.0411919729384558, "learning_rate": 2.469786689854928e-07, "loss": 0.6586673259735107, "step": 6796 }, { "epoch": 1.5661290322580645, "grad_norm": 1.0728597460775429, "learning_rate": 2.467280377658154e-07, "loss": 0.8361790180206299, "step": 6797 }, { "epoch": 1.5663594470046083, "grad_norm": 1.2928413385952742, "learning_rate": 2.464775158825665e-07, "loss": 0.7669099569320679, "step": 6798 }, { "epoch": 1.5665898617511522, "grad_norm": 1.331214255352709, "learning_rate": 2.462271033721086e-07, "loss": 0.7876452207565308, "step": 6799 }, { "epoch": 1.5668202764976957, "grad_norm": 1.2617656160077577, "learning_rate": 2.459768002707887e-07, "loss": 0.7932916879653931, "step": 6800 }, { "epoch": 1.5670506912442397, "grad_norm": 1.1101874723309544, "learning_rate": 2.457266066149382e-07, "loss": 0.734020471572876, "step": 6801 }, { "epoch": 1.5672811059907834, "grad_norm": 1.2001011742733312, "learning_rate": 2.4547652244087216e-07, "loss": 0.6975284814834595, "step": 6802 }, { "epoch": 1.5675115207373271, "grad_norm": 1.213830843525294, "learning_rate": 2.452265477848896e-07, "loss": 0.7214465737342834, "step": 6803 }, { "epoch": 1.567741935483871, "grad_norm": 1.1586033079782525, "learning_rate": 2.4497668268327485e-07, "loss": 0.8645110130310059, "step": 6804 }, { "epoch": 1.5679723502304146, "grad_norm": 1.0991857687698348, "learning_rate": 2.4472692717229504e-07, "loss": 0.7389887571334839, "step": 6805 }, { "epoch": 1.5682027649769585, "grad_norm": 1.206958266137894, "learning_rate": 2.4447728128820165e-07, "loss": 0.8462876081466675, "step": 6806 }, { "epoch": 1.5684331797235023, "grad_norm": 1.2507487710365972, "learning_rate": 2.44227745067231e-07, "loss": 0.824936032295227, "step": 6807 }, { "epoch": 1.568663594470046, "grad_norm": 1.2566804457387248, "learning_rate": 2.439783185456027e-07, "loss": 0.8516823053359985, "step": 6808 }, { "epoch": 1.56889400921659, "grad_norm": 1.065798809017728, "learning_rate": 2.4372900175952015e-07, "loss": 0.6154674291610718, "step": 6809 }, { "epoch": 1.5691244239631337, "grad_norm": 1.2816681742105784, "learning_rate": 2.434797947451722e-07, "loss": 0.7769260406494141, "step": 6810 }, { "epoch": 1.5693548387096774, "grad_norm": 1.2232245245328917, "learning_rate": 2.432306975387306e-07, "loss": 0.9525332450866699, "step": 6811 }, { "epoch": 1.5695852534562214, "grad_norm": 1.3409057347397177, "learning_rate": 2.429817101763511e-07, "loss": 0.7537581920623779, "step": 6812 }, { "epoch": 1.5698156682027649, "grad_norm": 1.3548012775304474, "learning_rate": 2.427328326941744e-07, "loss": 0.814711332321167, "step": 6813 }, { "epoch": 1.5700460829493088, "grad_norm": 1.3820372699413255, "learning_rate": 2.4248406512832466e-07, "loss": 0.708736777305603, "step": 6814 }, { "epoch": 1.5702764976958525, "grad_norm": 1.1061554332755352, "learning_rate": 2.422354075149098e-07, "loss": 0.6757712960243225, "step": 6815 }, { "epoch": 1.5705069124423963, "grad_norm": 1.0865188505414496, "learning_rate": 2.4198685989002257e-07, "loss": 0.736266553401947, "step": 6816 }, { "epoch": 1.5707373271889402, "grad_norm": 1.1180343138508952, "learning_rate": 2.417384222897392e-07, "loss": 0.7423173189163208, "step": 6817 }, { "epoch": 1.5709677419354837, "grad_norm": 1.2076049425001651, "learning_rate": 2.414900947501197e-07, "loss": 0.7260550260543823, "step": 6818 }, { "epoch": 1.5711981566820277, "grad_norm": 1.241277027009942, "learning_rate": 2.4124187730720915e-07, "loss": 0.7125939130783081, "step": 6819 }, { "epoch": 1.5714285714285714, "grad_norm": 1.1330555560067848, "learning_rate": 2.409937699970356e-07, "loss": 0.7429558634757996, "step": 6820 }, { "epoch": 1.5716589861751151, "grad_norm": 1.1709438494600335, "learning_rate": 2.407457728556115e-07, "loss": 0.7166736721992493, "step": 6821 }, { "epoch": 1.571889400921659, "grad_norm": 1.1783418664080478, "learning_rate": 2.4049788591893336e-07, "loss": 0.7438491582870483, "step": 6822 }, { "epoch": 1.5721198156682028, "grad_norm": 1.3579191422740273, "learning_rate": 2.402501092229814e-07, "loss": 0.8031798601150513, "step": 6823 }, { "epoch": 1.5723502304147465, "grad_norm": 1.3256875261480106, "learning_rate": 2.400024428037206e-07, "loss": 0.7067087888717651, "step": 6824 }, { "epoch": 1.5725806451612905, "grad_norm": 1.1524386121511956, "learning_rate": 2.3975488669709906e-07, "loss": 0.7147783041000366, "step": 6825 }, { "epoch": 1.572811059907834, "grad_norm": 1.2529979656124484, "learning_rate": 2.395074409390491e-07, "loss": 0.8534795641899109, "step": 6826 }, { "epoch": 1.573041474654378, "grad_norm": 1.0527069171574706, "learning_rate": 2.392601055654875e-07, "loss": 0.7630984783172607, "step": 6827 }, { "epoch": 1.5732718894009217, "grad_norm": 1.3268090351372508, "learning_rate": 2.390128806123145e-07, "loss": 0.9395428895950317, "step": 6828 }, { "epoch": 1.5735023041474654, "grad_norm": 1.1905263432335205, "learning_rate": 2.3876576611541423e-07, "loss": 0.7086023092269897, "step": 6829 }, { "epoch": 1.5737327188940093, "grad_norm": 1.0320188306367468, "learning_rate": 2.385187621106555e-07, "loss": 0.6937201619148254, "step": 6830 }, { "epoch": 1.5739631336405528, "grad_norm": 1.1238131407833931, "learning_rate": 2.3827186863389037e-07, "loss": 0.7339247465133667, "step": 6831 }, { "epoch": 1.5741935483870968, "grad_norm": 0.9948868064813976, "learning_rate": 2.3802508572095493e-07, "loss": 0.8453131318092346, "step": 6832 }, { "epoch": 1.5744239631336405, "grad_norm": 1.2870129222879585, "learning_rate": 2.377784134076698e-07, "loss": 0.7303619384765625, "step": 6833 }, { "epoch": 1.5746543778801843, "grad_norm": 1.1663952236638828, "learning_rate": 2.3753185172983893e-07, "loss": 0.9635858535766602, "step": 6834 }, { "epoch": 1.5748847926267282, "grad_norm": 0.9711435467160289, "learning_rate": 2.3728540072324998e-07, "loss": 0.7174761295318604, "step": 6835 }, { "epoch": 1.575115207373272, "grad_norm": 1.0168865512931398, "learning_rate": 2.3703906042367584e-07, "loss": 0.7375633716583252, "step": 6836 }, { "epoch": 1.5753456221198157, "grad_norm": 1.0569071581049987, "learning_rate": 2.3679283086687206e-07, "loss": 0.8202652931213379, "step": 6837 }, { "epoch": 1.5755760368663596, "grad_norm": 1.4428887155533328, "learning_rate": 2.3654671208857823e-07, "loss": 0.8448499441146851, "step": 6838 }, { "epoch": 1.5758064516129031, "grad_norm": 1.3297185542360797, "learning_rate": 2.3630070412451864e-07, "loss": 0.7840893268585205, "step": 6839 }, { "epoch": 1.576036866359447, "grad_norm": 1.1930310177318706, "learning_rate": 2.3605480701040092e-07, "loss": 0.8036940693855286, "step": 6840 }, { "epoch": 1.5762672811059908, "grad_norm": 1.2730513650169084, "learning_rate": 2.3580902078191666e-07, "loss": 0.8333625793457031, "step": 6841 }, { "epoch": 1.5764976958525345, "grad_norm": 1.3288439351572012, "learning_rate": 2.3556334547474133e-07, "loss": 0.804919958114624, "step": 6842 }, { "epoch": 1.5767281105990785, "grad_norm": 1.2605928054638793, "learning_rate": 2.3531778112453416e-07, "loss": 0.752541720867157, "step": 6843 }, { "epoch": 1.576958525345622, "grad_norm": 1.0378289852617786, "learning_rate": 2.3507232776693896e-07, "loss": 0.647051215171814, "step": 6844 }, { "epoch": 1.577188940092166, "grad_norm": 1.1139826400416593, "learning_rate": 2.3482698543758285e-07, "loss": 0.7546517848968506, "step": 6845 }, { "epoch": 1.5774193548387097, "grad_norm": 1.0118514872509952, "learning_rate": 2.345817541720766e-07, "loss": 0.8773425817489624, "step": 6846 }, { "epoch": 1.5776497695852534, "grad_norm": 1.1485612061840695, "learning_rate": 2.3433663400601567e-07, "loss": 0.9538160562515259, "step": 6847 }, { "epoch": 1.5778801843317973, "grad_norm": 1.0298677066929223, "learning_rate": 2.340916249749787e-07, "loss": 0.6275157332420349, "step": 6848 }, { "epoch": 1.578110599078341, "grad_norm": 1.1889533964841936, "learning_rate": 2.3384672711452812e-07, "loss": 0.7729284167289734, "step": 6849 }, { "epoch": 1.5783410138248848, "grad_norm": 1.4210079123943715, "learning_rate": 2.3360194046021108e-07, "loss": 0.8361644148826599, "step": 6850 }, { "epoch": 1.5785714285714287, "grad_norm": 1.2305172757518368, "learning_rate": 2.3335726504755793e-07, "loss": 0.6782940626144409, "step": 6851 }, { "epoch": 1.5788018433179722, "grad_norm": 1.3612688278959233, "learning_rate": 2.3311270091208256e-07, "loss": 0.8036615252494812, "step": 6852 }, { "epoch": 1.5790322580645162, "grad_norm": 1.1729176601878941, "learning_rate": 2.3286824808928362e-07, "loss": 0.8450125455856323, "step": 6853 }, { "epoch": 1.57926267281106, "grad_norm": 1.2162582175159786, "learning_rate": 2.3262390661464303e-07, "loss": 0.6546198725700378, "step": 6854 }, { "epoch": 1.5794930875576036, "grad_norm": 1.4056383803669428, "learning_rate": 2.3237967652362612e-07, "loss": 0.8201385140419006, "step": 6855 }, { "epoch": 1.5797235023041476, "grad_norm": 1.3504561324932176, "learning_rate": 2.3213555785168336e-07, "loss": 0.8753508925437927, "step": 6856 }, { "epoch": 1.579953917050691, "grad_norm": 1.2672866740553073, "learning_rate": 2.3189155063424782e-07, "loss": 0.5884093642234802, "step": 6857 }, { "epoch": 1.580184331797235, "grad_norm": 1.0135145180947078, "learning_rate": 2.3164765490673654e-07, "loss": 0.6494029760360718, "step": 6858 }, { "epoch": 1.5804147465437788, "grad_norm": 1.1478304397345402, "learning_rate": 2.3140387070455126e-07, "loss": 0.7407097220420837, "step": 6859 }, { "epoch": 1.5806451612903225, "grad_norm": 1.3351942864944542, "learning_rate": 2.3116019806307673e-07, "loss": 0.8934177160263062, "step": 6860 }, { "epoch": 1.5808755760368665, "grad_norm": 1.0696222163552975, "learning_rate": 2.309166370176816e-07, "loss": 0.7487956881523132, "step": 6861 }, { "epoch": 1.5811059907834102, "grad_norm": 1.316829236490256, "learning_rate": 2.3067318760371845e-07, "loss": 0.7744357585906982, "step": 6862 }, { "epoch": 1.581336405529954, "grad_norm": 1.3202738468289819, "learning_rate": 2.304298498565237e-07, "loss": 0.8871743679046631, "step": 6863 }, { "epoch": 1.5815668202764976, "grad_norm": 1.3064493000042272, "learning_rate": 2.3018662381141717e-07, "loss": 0.7865666151046753, "step": 6864 }, { "epoch": 1.5817972350230414, "grad_norm": 1.4125222114326161, "learning_rate": 2.2994350950370334e-07, "loss": 0.8416531682014465, "step": 6865 }, { "epoch": 1.5820276497695853, "grad_norm": 1.5275008378701445, "learning_rate": 2.2970050696866972e-07, "loss": 0.8443950414657593, "step": 6866 }, { "epoch": 1.582258064516129, "grad_norm": 1.4005476364990852, "learning_rate": 2.2945761624158756e-07, "loss": 0.7770054340362549, "step": 6867 }, { "epoch": 1.5824884792626728, "grad_norm": 1.2159355438440163, "learning_rate": 2.2921483735771252e-07, "loss": 0.7263047695159912, "step": 6868 }, { "epoch": 1.5827188940092167, "grad_norm": 1.3958985609002883, "learning_rate": 2.2897217035228312e-07, "loss": 0.8288376927375793, "step": 6869 }, { "epoch": 1.5829493087557602, "grad_norm": 1.2630380344196672, "learning_rate": 2.2872961526052292e-07, "loss": 0.8325462937355042, "step": 6870 }, { "epoch": 1.5831797235023042, "grad_norm": 1.317005879944655, "learning_rate": 2.284871721176379e-07, "loss": 0.7412815093994141, "step": 6871 }, { "epoch": 1.583410138248848, "grad_norm": 1.2813974132427688, "learning_rate": 2.2824484095881823e-07, "loss": 0.8958117961883545, "step": 6872 }, { "epoch": 1.5836405529953916, "grad_norm": 1.228628782021168, "learning_rate": 2.2800262181923858e-07, "loss": 0.8374444246292114, "step": 6873 }, { "epoch": 1.5838709677419356, "grad_norm": 1.2394995315660131, "learning_rate": 2.2776051473405634e-07, "loss": 0.7900353670120239, "step": 6874 }, { "epoch": 1.5841013824884793, "grad_norm": 1.129671125708823, "learning_rate": 2.2751851973841285e-07, "loss": 0.7420408725738525, "step": 6875 }, { "epoch": 1.584331797235023, "grad_norm": 1.3245275433928243, "learning_rate": 2.2727663686743382e-07, "loss": 0.8902314305305481, "step": 6876 }, { "epoch": 1.5845622119815668, "grad_norm": 1.2122656586799572, "learning_rate": 2.27034866156228e-07, "loss": 0.739869236946106, "step": 6877 }, { "epoch": 1.5847926267281105, "grad_norm": 1.169654737499052, "learning_rate": 2.2679320763988775e-07, "loss": 0.8340646624565125, "step": 6878 }, { "epoch": 1.5850230414746544, "grad_norm": 1.3076425110312813, "learning_rate": 2.2655166135349013e-07, "loss": 0.7501030564308167, "step": 6879 }, { "epoch": 1.5852534562211982, "grad_norm": 1.0619799072208593, "learning_rate": 2.2631022733209504e-07, "loss": 0.722623348236084, "step": 6880 }, { "epoch": 1.585483870967742, "grad_norm": 1.4046404033814042, "learning_rate": 2.260689056107461e-07, "loss": 0.8319696187973022, "step": 6881 }, { "epoch": 1.5857142857142859, "grad_norm": 1.2824383261655956, "learning_rate": 2.2582769622447107e-07, "loss": 0.85502028465271, "step": 6882 }, { "epoch": 1.5859447004608294, "grad_norm": 1.444500113904039, "learning_rate": 2.2558659920828095e-07, "loss": 0.7942626476287842, "step": 6883 }, { "epoch": 1.5861751152073733, "grad_norm": 0.9346347634599198, "learning_rate": 2.253456145971705e-07, "loss": 0.6731030941009521, "step": 6884 }, { "epoch": 1.586405529953917, "grad_norm": 1.2567565363582325, "learning_rate": 2.2510474242611887e-07, "loss": 0.8479423522949219, "step": 6885 }, { "epoch": 1.5866359447004608, "grad_norm": 1.0824322707106273, "learning_rate": 2.2486398273008812e-07, "loss": 0.7398810386657715, "step": 6886 }, { "epoch": 1.5868663594470047, "grad_norm": 1.4531636253389437, "learning_rate": 2.246233355440238e-07, "loss": 0.8422881364822388, "step": 6887 }, { "epoch": 1.5870967741935482, "grad_norm": 1.4298247398214885, "learning_rate": 2.2438280090285612e-07, "loss": 0.8307279944419861, "step": 6888 }, { "epoch": 1.5873271889400922, "grad_norm": 1.3280924437525041, "learning_rate": 2.2414237884149821e-07, "loss": 0.8329004049301147, "step": 6889 }, { "epoch": 1.587557603686636, "grad_norm": 1.196093026387475, "learning_rate": 2.2390206939484645e-07, "loss": 0.801641583442688, "step": 6890 }, { "epoch": 1.5877880184331796, "grad_norm": 1.359543687074451, "learning_rate": 2.2366187259778235e-07, "loss": 0.9850986003875732, "step": 6891 }, { "epoch": 1.5880184331797236, "grad_norm": 1.2770195506897435, "learning_rate": 2.2342178848516935e-07, "loss": 0.7169715166091919, "step": 6892 }, { "epoch": 1.5882488479262673, "grad_norm": 1.1258655345605515, "learning_rate": 2.2318181709185603e-07, "loss": 0.7509033679962158, "step": 6893 }, { "epoch": 1.588479262672811, "grad_norm": 1.2429319924869415, "learning_rate": 2.2294195845267348e-07, "loss": 0.6974655985832214, "step": 6894 }, { "epoch": 1.588709677419355, "grad_norm": 1.1949954122245936, "learning_rate": 2.227022126024367e-07, "loss": 0.7388278245925903, "step": 6895 }, { "epoch": 1.5889400921658985, "grad_norm": 1.1219112420315915, "learning_rate": 2.2246257957594506e-07, "loss": 0.6479122638702393, "step": 6896 }, { "epoch": 1.5891705069124424, "grad_norm": 1.2556673774557678, "learning_rate": 2.222230594079807e-07, "loss": 0.759338915348053, "step": 6897 }, { "epoch": 1.5894009216589862, "grad_norm": 1.1747779352742982, "learning_rate": 2.2198365213330937e-07, "loss": 0.7299938201904297, "step": 6898 }, { "epoch": 1.58963133640553, "grad_norm": 1.2072520940330866, "learning_rate": 2.2174435778668122e-07, "loss": 0.707555890083313, "step": 6899 }, { "epoch": 1.5898617511520738, "grad_norm": 1.3083069601374675, "learning_rate": 2.2150517640282918e-07, "loss": 0.8311065435409546, "step": 6900 }, { "epoch": 1.5900921658986173, "grad_norm": 1.1585381591481734, "learning_rate": 2.2126610801647028e-07, "loss": 0.6494649648666382, "step": 6901 }, { "epoch": 1.5903225806451613, "grad_norm": 1.006735116508423, "learning_rate": 2.2102715266230486e-07, "loss": 0.6563294529914856, "step": 6902 }, { "epoch": 1.590552995391705, "grad_norm": 1.0365958828861261, "learning_rate": 2.207883103750171e-07, "loss": 0.7426891326904297, "step": 6903 }, { "epoch": 1.5907834101382488, "grad_norm": 0.9164747480191582, "learning_rate": 2.2054958118927413e-07, "loss": 0.7074661254882812, "step": 6904 }, { "epoch": 1.5910138248847927, "grad_norm": 1.4657092079572216, "learning_rate": 2.203109651397279e-07, "loss": 0.8407880663871765, "step": 6905 }, { "epoch": 1.5912442396313364, "grad_norm": 1.014884431152031, "learning_rate": 2.2007246226101296e-07, "loss": 0.7228440642356873, "step": 6906 }, { "epoch": 1.5914746543778802, "grad_norm": 1.1100543617790197, "learning_rate": 2.1983407258774733e-07, "loss": 0.6988812685012817, "step": 6907 }, { "epoch": 1.591705069124424, "grad_norm": 1.3237351414434337, "learning_rate": 2.195957961545335e-07, "loss": 0.793757438659668, "step": 6908 }, { "epoch": 1.5919354838709676, "grad_norm": 1.2485526093365642, "learning_rate": 2.1935763299595678e-07, "loss": 0.8621397018432617, "step": 6909 }, { "epoch": 1.5921658986175116, "grad_norm": 1.2314950700356975, "learning_rate": 2.1911958314658598e-07, "loss": 0.7661364078521729, "step": 6910 }, { "epoch": 1.5923963133640553, "grad_norm": 1.1937782252155265, "learning_rate": 2.1888164664097408e-07, "loss": 0.9322741031646729, "step": 6911 }, { "epoch": 1.592626728110599, "grad_norm": 1.1787479005369867, "learning_rate": 2.1864382351365717e-07, "loss": 0.8411989212036133, "step": 6912 }, { "epoch": 1.592857142857143, "grad_norm": 1.515351393614885, "learning_rate": 2.1840611379915464e-07, "loss": 0.8212479948997498, "step": 6913 }, { "epoch": 1.5930875576036865, "grad_norm": 1.5032885798825617, "learning_rate": 2.181685175319702e-07, "loss": 0.7875508069992065, "step": 6914 }, { "epoch": 1.5933179723502304, "grad_norm": 1.178669163135756, "learning_rate": 2.1793103474659047e-07, "loss": 0.8389852046966553, "step": 6915 }, { "epoch": 1.5935483870967742, "grad_norm": 1.388906101423199, "learning_rate": 2.1769366547748546e-07, "loss": 0.8223046660423279, "step": 6916 }, { "epoch": 1.5937788018433179, "grad_norm": 1.0682255683615596, "learning_rate": 2.1745640975910962e-07, "loss": 0.8427159786224365, "step": 6917 }, { "epoch": 1.5940092165898618, "grad_norm": 1.2770752550871127, "learning_rate": 2.172192676258996e-07, "loss": 0.7448060512542725, "step": 6918 }, { "epoch": 1.5942396313364056, "grad_norm": 1.1028377529507616, "learning_rate": 2.1698223911227686e-07, "loss": 0.7122288346290588, "step": 6919 }, { "epoch": 1.5944700460829493, "grad_norm": 1.3801420957349657, "learning_rate": 2.1674532425264548e-07, "loss": 0.7712994813919067, "step": 6920 }, { "epoch": 1.5947004608294932, "grad_norm": 1.4967308024498271, "learning_rate": 2.1650852308139355e-07, "loss": 0.9656664729118347, "step": 6921 }, { "epoch": 1.5949308755760367, "grad_norm": 1.3725078407101703, "learning_rate": 2.162718356328922e-07, "loss": 0.748894214630127, "step": 6922 }, { "epoch": 1.5951612903225807, "grad_norm": 1.0191925895935576, "learning_rate": 2.1603526194149635e-07, "loss": 0.6875454187393188, "step": 6923 }, { "epoch": 1.5953917050691244, "grad_norm": 1.099493651981713, "learning_rate": 2.1579880204154412e-07, "loss": 0.8258690237998962, "step": 6924 }, { "epoch": 1.5956221198156681, "grad_norm": 1.2228052738114181, "learning_rate": 2.15562455967358e-07, "loss": 0.7647902965545654, "step": 6925 }, { "epoch": 1.595852534562212, "grad_norm": 1.0716326843288577, "learning_rate": 2.1532622375324284e-07, "loss": 0.7004281282424927, "step": 6926 }, { "epoch": 1.5960829493087556, "grad_norm": 1.1015601686618846, "learning_rate": 2.1509010543348726e-07, "loss": 0.7500345706939697, "step": 6927 }, { "epoch": 1.5963133640552996, "grad_norm": 1.1261005927566234, "learning_rate": 2.148541010423641e-07, "loss": 0.7300195693969727, "step": 6928 }, { "epoch": 1.5965437788018433, "grad_norm": 1.0927883255058508, "learning_rate": 2.1461821061412876e-07, "loss": 0.7592284679412842, "step": 6929 }, { "epoch": 1.596774193548387, "grad_norm": 1.265065855875648, "learning_rate": 2.1438243418302016e-07, "loss": 0.7179796099662781, "step": 6930 }, { "epoch": 1.597004608294931, "grad_norm": 1.167267121775029, "learning_rate": 2.1414677178326157e-07, "loss": 0.8829631805419922, "step": 6931 }, { "epoch": 1.5972350230414747, "grad_norm": 1.4030936435750112, "learning_rate": 2.1391122344905865e-07, "loss": 0.8661972880363464, "step": 6932 }, { "epoch": 1.5974654377880184, "grad_norm": 1.4609293147197595, "learning_rate": 2.136757892146007e-07, "loss": 0.7774989604949951, "step": 6933 }, { "epoch": 1.5976958525345624, "grad_norm": 1.2556066222087972, "learning_rate": 2.1344046911406132e-07, "loss": 0.7343888878822327, "step": 6934 }, { "epoch": 1.5979262672811059, "grad_norm": 1.1442684268001395, "learning_rate": 2.132052631815966e-07, "loss": 0.7810107469558716, "step": 6935 }, { "epoch": 1.5981566820276498, "grad_norm": 1.343676205067389, "learning_rate": 2.12970171451346e-07, "loss": 0.7585299611091614, "step": 6936 }, { "epoch": 1.5983870967741935, "grad_norm": 1.2827689520736418, "learning_rate": 2.1273519395743344e-07, "loss": 0.886371910572052, "step": 6937 }, { "epoch": 1.5986175115207373, "grad_norm": 0.9767702062550015, "learning_rate": 2.1250033073396523e-07, "loss": 0.6986823081970215, "step": 6938 }, { "epoch": 1.5988479262672812, "grad_norm": 1.2062052682782651, "learning_rate": 2.122655818150312e-07, "loss": 0.7524189352989197, "step": 6939 }, { "epoch": 1.5990783410138247, "grad_norm": 1.1473232496595593, "learning_rate": 2.120309472347055e-07, "loss": 0.7699365615844727, "step": 6940 }, { "epoch": 1.5993087557603687, "grad_norm": 1.188421090787615, "learning_rate": 2.1179642702704458e-07, "loss": 0.8112696409225464, "step": 6941 }, { "epoch": 1.5995391705069124, "grad_norm": 1.377266755106213, "learning_rate": 2.115620212260889e-07, "loss": 0.7067416906356812, "step": 6942 }, { "epoch": 1.5997695852534561, "grad_norm": 0.80841875970131, "learning_rate": 2.1132772986586211e-07, "loss": 0.787110447883606, "step": 6943 }, { "epoch": 1.6, "grad_norm": 1.282613261539406, "learning_rate": 2.11093552980371e-07, "loss": 0.7356789112091064, "step": 6944 }, { "epoch": 1.6002304147465438, "grad_norm": 1.2747758780049527, "learning_rate": 2.1085949060360653e-07, "loss": 0.8057125806808472, "step": 6945 }, { "epoch": 1.6004608294930875, "grad_norm": 1.1828340962550294, "learning_rate": 2.1062554276954225e-07, "loss": 0.7169399261474609, "step": 6946 }, { "epoch": 1.6006912442396315, "grad_norm": 1.2018304676070681, "learning_rate": 2.1039170951213526e-07, "loss": 0.7219180464744568, "step": 6947 }, { "epoch": 1.600921658986175, "grad_norm": 1.2736335133966967, "learning_rate": 2.101579908653266e-07, "loss": 0.7530789375305176, "step": 6948 }, { "epoch": 1.601152073732719, "grad_norm": 1.2374620271602483, "learning_rate": 2.0992438686303993e-07, "loss": 0.8192377090454102, "step": 6949 }, { "epoch": 1.6013824884792627, "grad_norm": 1.0987195977670645, "learning_rate": 2.0969089753918223e-07, "loss": 0.6904648542404175, "step": 6950 }, { "epoch": 1.6016129032258064, "grad_norm": 1.2558183684284059, "learning_rate": 2.0945752292764495e-07, "loss": 0.7289770245552063, "step": 6951 }, { "epoch": 1.6018433179723504, "grad_norm": 1.260283902447682, "learning_rate": 2.0922426306230157e-07, "loss": 0.8467620611190796, "step": 6952 }, { "epoch": 1.6020737327188939, "grad_norm": 1.3650999598924758, "learning_rate": 2.089911179770093e-07, "loss": 0.7835153937339783, "step": 6953 }, { "epoch": 1.6023041474654378, "grad_norm": 0.847985634813149, "learning_rate": 2.0875808770560933e-07, "loss": 0.6696668267250061, "step": 6954 }, { "epoch": 1.6025345622119815, "grad_norm": 1.441689312728025, "learning_rate": 2.0852517228192556e-07, "loss": 0.8451364636421204, "step": 6955 }, { "epoch": 1.6027649769585253, "grad_norm": 1.2628900414882365, "learning_rate": 2.0829237173976487e-07, "loss": 0.7917240858078003, "step": 6956 }, { "epoch": 1.6029953917050692, "grad_norm": 1.2514606025933794, "learning_rate": 2.0805968611291867e-07, "loss": 0.791597843170166, "step": 6957 }, { "epoch": 1.603225806451613, "grad_norm": 1.2854657657217543, "learning_rate": 2.0782711543516063e-07, "loss": 0.7571247816085815, "step": 6958 }, { "epoch": 1.6034562211981567, "grad_norm": 1.1996866839711877, "learning_rate": 2.075946597402478e-07, "loss": 0.9196302890777588, "step": 6959 }, { "epoch": 1.6036866359447006, "grad_norm": 0.9955085341059975, "learning_rate": 2.0736231906192136e-07, "loss": 0.7106618881225586, "step": 6960 }, { "epoch": 1.6039170506912441, "grad_norm": 0.9090693582601959, "learning_rate": 2.071300934339051e-07, "loss": 0.8923465013504028, "step": 6961 }, { "epoch": 1.604147465437788, "grad_norm": 1.1524730844586952, "learning_rate": 2.0689798288990601e-07, "loss": 0.6929241418838501, "step": 6962 }, { "epoch": 1.6043778801843318, "grad_norm": 1.4736872345919192, "learning_rate": 2.0666598746361487e-07, "loss": 0.935944676399231, "step": 6963 }, { "epoch": 1.6046082949308755, "grad_norm": 1.3002916307222088, "learning_rate": 2.0643410718870536e-07, "loss": 0.7442188262939453, "step": 6964 }, { "epoch": 1.6048387096774195, "grad_norm": 1.13007905720726, "learning_rate": 2.0620234209883446e-07, "loss": 0.7340278625488281, "step": 6965 }, { "epoch": 1.605069124423963, "grad_norm": 1.1841454047560163, "learning_rate": 2.0597069222764297e-07, "loss": 0.7436190247535706, "step": 6966 }, { "epoch": 1.605299539170507, "grad_norm": 1.1998918795301519, "learning_rate": 2.0573915760875406e-07, "loss": 0.9109283685684204, "step": 6967 }, { "epoch": 1.6055299539170507, "grad_norm": 1.362187790875206, "learning_rate": 2.0550773827577518e-07, "loss": 0.86224365234375, "step": 6968 }, { "epoch": 1.6057603686635944, "grad_norm": 1.0973288140018649, "learning_rate": 2.0527643426229636e-07, "loss": 0.6873685121536255, "step": 6969 }, { "epoch": 1.6059907834101383, "grad_norm": 1.2862613183491987, "learning_rate": 2.0504524560189074e-07, "loss": 0.7634609937667847, "step": 6970 }, { "epoch": 1.606221198156682, "grad_norm": 1.642442078921259, "learning_rate": 2.0481417232811572e-07, "loss": 0.7940595149993896, "step": 6971 }, { "epoch": 1.6064516129032258, "grad_norm": 1.0579671129687211, "learning_rate": 2.0458321447451078e-07, "loss": 0.7109687924385071, "step": 6972 }, { "epoch": 1.6066820276497698, "grad_norm": 1.3780414286693414, "learning_rate": 2.04352372074599e-07, "loss": 0.9476398825645447, "step": 6973 }, { "epoch": 1.6069124423963133, "grad_norm": 1.3106188238946987, "learning_rate": 2.0412164516188747e-07, "loss": 0.7563579678535461, "step": 6974 }, { "epoch": 1.6071428571428572, "grad_norm": 1.1912217950342037, "learning_rate": 2.0389103376986538e-07, "loss": 0.7928751707077026, "step": 6975 }, { "epoch": 1.607373271889401, "grad_norm": 1.1927610489358789, "learning_rate": 2.0366053793200565e-07, "loss": 0.776961624622345, "step": 6976 }, { "epoch": 1.6076036866359447, "grad_norm": 1.1830668942381175, "learning_rate": 2.0343015768176496e-07, "loss": 0.6511167883872986, "step": 6977 }, { "epoch": 1.6078341013824886, "grad_norm": 1.3541662729221868, "learning_rate": 2.0319989305258235e-07, "loss": 0.6487337350845337, "step": 6978 }, { "epoch": 1.6080645161290321, "grad_norm": 1.5271951763204938, "learning_rate": 2.0296974407788004e-07, "loss": 0.921454131603241, "step": 6979 }, { "epoch": 1.608294930875576, "grad_norm": 1.0476613319531645, "learning_rate": 2.0273971079106467e-07, "loss": 0.8145809769630432, "step": 6980 }, { "epoch": 1.6085253456221198, "grad_norm": 0.9495439447317249, "learning_rate": 2.0250979322552474e-07, "loss": 0.6655904054641724, "step": 6981 }, { "epoch": 1.6087557603686635, "grad_norm": 1.1486957458539049, "learning_rate": 2.0227999141463258e-07, "loss": 0.777961254119873, "step": 6982 }, { "epoch": 1.6089861751152075, "grad_norm": 1.3274428663782127, "learning_rate": 2.0205030539174361e-07, "loss": 0.6543164253234863, "step": 6983 }, { "epoch": 1.6092165898617512, "grad_norm": 1.233780092778412, "learning_rate": 2.018207351901966e-07, "loss": 0.7842000722885132, "step": 6984 }, { "epoch": 1.609447004608295, "grad_norm": 0.999384175284256, "learning_rate": 2.0159128084331278e-07, "loss": 0.7264418005943298, "step": 6985 }, { "epoch": 1.6096774193548387, "grad_norm": 1.313414021265448, "learning_rate": 2.0136194238439795e-07, "loss": 0.8722596168518066, "step": 6986 }, { "epoch": 1.6099078341013824, "grad_norm": 1.3518278161266697, "learning_rate": 2.0113271984673997e-07, "loss": 0.8162735104560852, "step": 6987 }, { "epoch": 1.6101382488479263, "grad_norm": 1.212757185466248, "learning_rate": 2.0090361326360982e-07, "loss": 0.6962481737136841, "step": 6988 }, { "epoch": 1.61036866359447, "grad_norm": 1.133716172506403, "learning_rate": 2.0067462266826264e-07, "loss": 0.8186852931976318, "step": 6989 }, { "epoch": 1.6105990783410138, "grad_norm": 1.505728867210405, "learning_rate": 2.0044574809393543e-07, "loss": 0.8935987949371338, "step": 6990 }, { "epoch": 1.6108294930875577, "grad_norm": 1.2824355796337807, "learning_rate": 2.002169895738498e-07, "loss": 0.9152865409851074, "step": 6991 }, { "epoch": 1.6110599078341012, "grad_norm": 1.521529078332145, "learning_rate": 1.9998834714120928e-07, "loss": 0.8042874336242676, "step": 6992 }, { "epoch": 1.6112903225806452, "grad_norm": 1.3198117612600044, "learning_rate": 1.9975982082920083e-07, "loss": 0.9621129035949707, "step": 6993 }, { "epoch": 1.611520737327189, "grad_norm": 1.1154614331355635, "learning_rate": 1.9953141067099533e-07, "loss": 0.8296995162963867, "step": 6994 }, { "epoch": 1.6117511520737327, "grad_norm": 1.0827522335122797, "learning_rate": 1.9930311669974587e-07, "loss": 0.8129373788833618, "step": 6995 }, { "epoch": 1.6119815668202766, "grad_norm": 1.359695561767368, "learning_rate": 1.9907493894858874e-07, "loss": 0.7450911998748779, "step": 6996 }, { "epoch": 1.6122119815668203, "grad_norm": 1.2367503665171555, "learning_rate": 1.9884687745064422e-07, "loss": 0.798037052154541, "step": 6997 }, { "epoch": 1.612442396313364, "grad_norm": 1.218969884225304, "learning_rate": 1.9861893223901494e-07, "loss": 0.8118857145309448, "step": 6998 }, { "epoch": 1.6126728110599078, "grad_norm": 1.2176008366956401, "learning_rate": 1.9839110334678632e-07, "loss": 0.7954392433166504, "step": 6999 }, { "epoch": 1.6129032258064515, "grad_norm": 1.2233633618619175, "learning_rate": 1.9816339080702825e-07, "loss": 0.8055616617202759, "step": 7000 }, { "epoch": 1.6131336405529955, "grad_norm": 1.503254744382692, "learning_rate": 1.979357946527924e-07, "loss": 0.8949761986732483, "step": 7001 }, { "epoch": 1.6133640552995392, "grad_norm": 1.376056206509758, "learning_rate": 1.9770831491711427e-07, "loss": 0.8327617645263672, "step": 7002 }, { "epoch": 1.613594470046083, "grad_norm": 1.2867855951178133, "learning_rate": 1.9748095163301215e-07, "loss": 0.7593148946762085, "step": 7003 }, { "epoch": 1.6138248847926269, "grad_norm": 1.2449007241812073, "learning_rate": 1.9725370483348737e-07, "loss": 0.7639665603637695, "step": 7004 }, { "epoch": 1.6140552995391704, "grad_norm": 1.2839981076373308, "learning_rate": 1.9702657455152448e-07, "loss": 0.8561587929725647, "step": 7005 }, { "epoch": 1.6142857142857143, "grad_norm": 1.4345782240891563, "learning_rate": 1.9679956082009154e-07, "loss": 0.835313081741333, "step": 7006 }, { "epoch": 1.614516129032258, "grad_norm": 1.680229749258956, "learning_rate": 1.9657266367213898e-07, "loss": 0.831456184387207, "step": 7007 }, { "epoch": 1.6147465437788018, "grad_norm": 1.1797102347566437, "learning_rate": 1.963458831406005e-07, "loss": 0.699436604976654, "step": 7008 }, { "epoch": 1.6149769585253457, "grad_norm": 1.2382287230628872, "learning_rate": 1.9611921925839337e-07, "loss": 0.7821902632713318, "step": 7009 }, { "epoch": 1.6152073732718892, "grad_norm": 1.035873020643515, "learning_rate": 1.9589267205841742e-07, "loss": 0.7491241097450256, "step": 7010 }, { "epoch": 1.6154377880184332, "grad_norm": 1.3212550422299536, "learning_rate": 1.956662415735554e-07, "loss": 0.7299652099609375, "step": 7011 }, { "epoch": 1.615668202764977, "grad_norm": 1.2121144450441814, "learning_rate": 1.9543992783667385e-07, "loss": 0.692190408706665, "step": 7012 }, { "epoch": 1.6158986175115206, "grad_norm": 1.5397188528974992, "learning_rate": 1.9521373088062166e-07, "loss": 0.8727273941040039, "step": 7013 }, { "epoch": 1.6161290322580646, "grad_norm": 0.9576172656761047, "learning_rate": 1.9498765073823077e-07, "loss": 0.6441171169281006, "step": 7014 }, { "epoch": 1.6163594470046083, "grad_norm": 1.202013067822893, "learning_rate": 1.947616874423169e-07, "loss": 0.6960387229919434, "step": 7015 }, { "epoch": 1.616589861751152, "grad_norm": 1.3238157552069112, "learning_rate": 1.9453584102567788e-07, "loss": 0.9231700301170349, "step": 7016 }, { "epoch": 1.616820276497696, "grad_norm": 1.5395552640428811, "learning_rate": 1.9431011152109555e-07, "loss": 0.6957401037216187, "step": 7017 }, { "epoch": 1.6170506912442395, "grad_norm": 1.221595091148929, "learning_rate": 1.9408449896133384e-07, "loss": 0.6608580350875854, "step": 7018 }, { "epoch": 1.6172811059907835, "grad_norm": 1.386134285673899, "learning_rate": 1.9385900337913997e-07, "loss": 0.7322397232055664, "step": 7019 }, { "epoch": 1.6175115207373272, "grad_norm": 1.1188269604657235, "learning_rate": 1.9363362480724488e-07, "loss": 0.6996288299560547, "step": 7020 }, { "epoch": 1.617741935483871, "grad_norm": 1.022000935531768, "learning_rate": 1.9340836327836163e-07, "loss": 0.7928623557090759, "step": 7021 }, { "epoch": 1.6179723502304149, "grad_norm": 0.9992379944358776, "learning_rate": 1.9318321882518674e-07, "loss": 0.6275026202201843, "step": 7022 }, { "epoch": 1.6182027649769584, "grad_norm": 1.26569218150676, "learning_rate": 1.9295819148039948e-07, "loss": 0.6660110950469971, "step": 7023 }, { "epoch": 1.6184331797235023, "grad_norm": 1.0401535425644861, "learning_rate": 1.9273328127666232e-07, "loss": 0.8129480481147766, "step": 7024 }, { "epoch": 1.618663594470046, "grad_norm": 1.146646002030878, "learning_rate": 1.9250848824662046e-07, "loss": 0.8070700168609619, "step": 7025 }, { "epoch": 1.6188940092165898, "grad_norm": 1.4109951707076815, "learning_rate": 1.922838124229028e-07, "loss": 0.8123769760131836, "step": 7026 }, { "epoch": 1.6191244239631337, "grad_norm": 0.9906397496222884, "learning_rate": 1.920592538381205e-07, "loss": 0.6552244424819946, "step": 7027 }, { "epoch": 1.6193548387096774, "grad_norm": 1.0749749429025204, "learning_rate": 1.9183481252486767e-07, "loss": 0.8764367699623108, "step": 7028 }, { "epoch": 1.6195852534562212, "grad_norm": 1.8347518044142406, "learning_rate": 1.9161048851572215e-07, "loss": 0.9075809717178345, "step": 7029 }, { "epoch": 1.6198156682027651, "grad_norm": 1.1695152473088226, "learning_rate": 1.9138628184324412e-07, "loss": 0.7308327555656433, "step": 7030 }, { "epoch": 1.6200460829493086, "grad_norm": 1.4269673355519676, "learning_rate": 1.9116219253997655e-07, "loss": 0.838142991065979, "step": 7031 }, { "epoch": 1.6202764976958526, "grad_norm": 1.5286648636126694, "learning_rate": 1.9093822063844623e-07, "loss": 0.7681041359901428, "step": 7032 }, { "epoch": 1.6205069124423963, "grad_norm": 1.1858134701081806, "learning_rate": 1.907143661711621e-07, "loss": 0.7179980278015137, "step": 7033 }, { "epoch": 1.62073732718894, "grad_norm": 1.2400863874788628, "learning_rate": 1.9049062917061609e-07, "loss": 0.8688361644744873, "step": 7034 }, { "epoch": 1.620967741935484, "grad_norm": 1.0795907835047491, "learning_rate": 1.9026700966928388e-07, "loss": 0.6540178656578064, "step": 7035 }, { "epoch": 1.6211981566820275, "grad_norm": 0.9042431894176799, "learning_rate": 1.900435076996233e-07, "loss": 0.7834869623184204, "step": 7036 }, { "epoch": 1.6214285714285714, "grad_norm": 1.4376571546925008, "learning_rate": 1.8982012329407505e-07, "loss": 0.8895971775054932, "step": 7037 }, { "epoch": 1.6216589861751152, "grad_norm": 1.1211547009425467, "learning_rate": 1.8959685648506362e-07, "loss": 0.6625858545303345, "step": 7038 }, { "epoch": 1.621889400921659, "grad_norm": 1.4181930826937483, "learning_rate": 1.893737073049957e-07, "loss": 0.651193380355835, "step": 7039 }, { "epoch": 1.6221198156682028, "grad_norm": 1.49480203283565, "learning_rate": 1.8915067578626065e-07, "loss": 0.8716636896133423, "step": 7040 }, { "epoch": 1.6223502304147466, "grad_norm": 1.2037531898880258, "learning_rate": 1.8892776196123196e-07, "loss": 0.812637984752655, "step": 7041 }, { "epoch": 1.6225806451612903, "grad_norm": 1.4952425500537936, "learning_rate": 1.887049658622648e-07, "loss": 0.7803184986114502, "step": 7042 }, { "epoch": 1.6228110599078343, "grad_norm": 1.4542796613479354, "learning_rate": 1.8848228752169793e-07, "loss": 0.7884814739227295, "step": 7043 }, { "epoch": 1.6230414746543778, "grad_norm": 1.3474838088832628, "learning_rate": 1.8825972697185265e-07, "loss": 0.7250671982765198, "step": 7044 }, { "epoch": 1.6232718894009217, "grad_norm": 1.2055929150487366, "learning_rate": 1.880372842450332e-07, "loss": 0.8078780174255371, "step": 7045 }, { "epoch": 1.6235023041474654, "grad_norm": 1.2023825853188168, "learning_rate": 1.878149593735272e-07, "loss": 0.8523818254470825, "step": 7046 }, { "epoch": 1.6237327188940092, "grad_norm": 1.2683431455334386, "learning_rate": 1.875927523896047e-07, "loss": 0.8772249221801758, "step": 7047 }, { "epoch": 1.6239631336405531, "grad_norm": 1.0815338842817483, "learning_rate": 1.8737066332551843e-07, "loss": 0.7906323671340942, "step": 7048 }, { "epoch": 1.6241935483870966, "grad_norm": 1.3048529080567755, "learning_rate": 1.8714869221350492e-07, "loss": 0.8010337352752686, "step": 7049 }, { "epoch": 1.6244239631336406, "grad_norm": 1.365899691735964, "learning_rate": 1.8692683908578267e-07, "loss": 0.8978049755096436, "step": 7050 }, { "epoch": 1.6246543778801843, "grad_norm": 1.159165616843268, "learning_rate": 1.8670510397455297e-07, "loss": 0.6622864007949829, "step": 7051 }, { "epoch": 1.624884792626728, "grad_norm": 1.048079119212609, "learning_rate": 1.8648348691200112e-07, "loss": 0.7795406579971313, "step": 7052 }, { "epoch": 1.625115207373272, "grad_norm": 1.2605630326093136, "learning_rate": 1.8626198793029423e-07, "loss": 0.9152054786682129, "step": 7053 }, { "epoch": 1.6253456221198157, "grad_norm": 1.1757865506402991, "learning_rate": 1.860406070615822e-07, "loss": 0.719946563243866, "step": 7054 }, { "epoch": 1.6255760368663594, "grad_norm": 1.2991129477224903, "learning_rate": 1.8581934433799884e-07, "loss": 0.782962441444397, "step": 7055 }, { "epoch": 1.6258064516129034, "grad_norm": 1.118392005824248, "learning_rate": 1.855981997916597e-07, "loss": 0.8119732737541199, "step": 7056 }, { "epoch": 1.6260368663594469, "grad_norm": 1.2362407544063627, "learning_rate": 1.8537717345466351e-07, "loss": 0.7585981488227844, "step": 7057 }, { "epoch": 1.6262672811059908, "grad_norm": 1.158465388331893, "learning_rate": 1.8515626535909258e-07, "loss": 0.6846082210540771, "step": 7058 }, { "epoch": 1.6264976958525346, "grad_norm": 1.230933966400155, "learning_rate": 1.8493547553701083e-07, "loss": 0.7355546951293945, "step": 7059 }, { "epoch": 1.6267281105990783, "grad_norm": 1.15836260056471, "learning_rate": 1.847148040204657e-07, "loss": 0.6828340291976929, "step": 7060 }, { "epoch": 1.6269585253456222, "grad_norm": 1.0499975056987365, "learning_rate": 1.8449425084148763e-07, "loss": 0.8513988256454468, "step": 7061 }, { "epoch": 1.6271889400921657, "grad_norm": 1.0253802645646743, "learning_rate": 1.8427381603208947e-07, "loss": 0.6817762851715088, "step": 7062 }, { "epoch": 1.6274193548387097, "grad_norm": 0.9793159138955572, "learning_rate": 1.8405349962426699e-07, "loss": 0.7314180731773376, "step": 7063 }, { "epoch": 1.6276497695852534, "grad_norm": 1.326821994662743, "learning_rate": 1.8383330164999898e-07, "loss": 0.8193466663360596, "step": 7064 }, { "epoch": 1.6278801843317972, "grad_norm": 1.2511428182189692, "learning_rate": 1.8361322214124643e-07, "loss": 0.7469823360443115, "step": 7065 }, { "epoch": 1.628110599078341, "grad_norm": 1.4366505105110272, "learning_rate": 1.8339326112995423e-07, "loss": 0.8578816652297974, "step": 7066 }, { "epoch": 1.6283410138248848, "grad_norm": 1.4615192025781363, "learning_rate": 1.8317341864804903e-07, "loss": 0.8384239077568054, "step": 7067 }, { "epoch": 1.6285714285714286, "grad_norm": 1.122194991625306, "learning_rate": 1.829536947274406e-07, "loss": 0.8707646131515503, "step": 7068 }, { "epoch": 1.6288018433179725, "grad_norm": 1.2319397578647793, "learning_rate": 1.82734089400022e-07, "loss": 0.6869943141937256, "step": 7069 }, { "epoch": 1.629032258064516, "grad_norm": 1.3893487386527597, "learning_rate": 1.8251460269766848e-07, "loss": 0.7776129245758057, "step": 7070 }, { "epoch": 1.62926267281106, "grad_norm": 1.104887091227765, "learning_rate": 1.8229523465223785e-07, "loss": 0.8126854300498962, "step": 7071 }, { "epoch": 1.6294930875576037, "grad_norm": 1.0317016664034484, "learning_rate": 1.8207598529557166e-07, "loss": 0.6570720672607422, "step": 7072 }, { "epoch": 1.6297235023041474, "grad_norm": 0.8859395443506812, "learning_rate": 1.818568546594934e-07, "loss": 0.6485599875450134, "step": 7073 }, { "epoch": 1.6299539170506914, "grad_norm": 1.206554438869518, "learning_rate": 1.816378427758093e-07, "loss": 0.9132766723632812, "step": 7074 }, { "epoch": 1.6301843317972349, "grad_norm": 1.4945592359199265, "learning_rate": 1.8141894967630932e-07, "loss": 0.8277286291122437, "step": 7075 }, { "epoch": 1.6304147465437788, "grad_norm": 1.3670934774676884, "learning_rate": 1.812001753927651e-07, "loss": 0.7409358024597168, "step": 7076 }, { "epoch": 1.6306451612903226, "grad_norm": 1.2664504423738472, "learning_rate": 1.809815199569311e-07, "loss": 0.8233339786529541, "step": 7077 }, { "epoch": 1.6308755760368663, "grad_norm": 1.3727275296136565, "learning_rate": 1.8076298340054563e-07, "loss": 0.8704487085342407, "step": 7078 }, { "epoch": 1.6311059907834102, "grad_norm": 1.503472652590263, "learning_rate": 1.8054456575532862e-07, "loss": 0.8845789432525635, "step": 7079 }, { "epoch": 1.631336405529954, "grad_norm": 1.0523258046250148, "learning_rate": 1.8032626705298272e-07, "loss": 0.7241162061691284, "step": 7080 }, { "epoch": 1.6315668202764977, "grad_norm": 1.193290512437584, "learning_rate": 1.8010808732519433e-07, "loss": 0.7065681219100952, "step": 7081 }, { "epoch": 1.6317972350230416, "grad_norm": 1.281102564788521, "learning_rate": 1.7989002660363162e-07, "loss": 0.6492339372634888, "step": 7082 }, { "epoch": 1.6320276497695851, "grad_norm": 0.9673694389198546, "learning_rate": 1.79672084919946e-07, "loss": 0.7089248895645142, "step": 7083 }, { "epoch": 1.632258064516129, "grad_norm": 1.0367687290608978, "learning_rate": 1.794542623057712e-07, "loss": 0.7030316591262817, "step": 7084 }, { "epoch": 1.6324884792626728, "grad_norm": 1.1008255373775855, "learning_rate": 1.792365587927239e-07, "loss": 0.8626528978347778, "step": 7085 }, { "epoch": 1.6327188940092165, "grad_norm": 1.1079176271315754, "learning_rate": 1.7901897441240333e-07, "loss": 0.8468672037124634, "step": 7086 }, { "epoch": 1.6329493087557605, "grad_norm": 1.4611904004596754, "learning_rate": 1.7880150919639214e-07, "loss": 0.8546739816665649, "step": 7087 }, { "epoch": 1.633179723502304, "grad_norm": 1.1949871550520017, "learning_rate": 1.7858416317625468e-07, "loss": 0.9187895655632019, "step": 7088 }, { "epoch": 1.633410138248848, "grad_norm": 1.077248232790752, "learning_rate": 1.7836693638353827e-07, "loss": 0.7496293783187866, "step": 7089 }, { "epoch": 1.6336405529953917, "grad_norm": 1.0517765508552415, "learning_rate": 1.7814982884977358e-07, "loss": 0.682653546333313, "step": 7090 }, { "epoch": 1.6338709677419354, "grad_norm": 1.5003665522833143, "learning_rate": 1.7793284060647295e-07, "loss": 0.8065551519393921, "step": 7091 }, { "epoch": 1.6341013824884794, "grad_norm": 1.134711484772771, "learning_rate": 1.7771597168513263e-07, "loss": 0.6605588793754578, "step": 7092 }, { "epoch": 1.634331797235023, "grad_norm": 1.0012250391371058, "learning_rate": 1.7749922211723034e-07, "loss": 0.7257254123687744, "step": 7093 }, { "epoch": 1.6345622119815668, "grad_norm": 1.1831263140816395, "learning_rate": 1.772825919342269e-07, "loss": 0.7438890933990479, "step": 7094 }, { "epoch": 1.6347926267281108, "grad_norm": 1.250595895627981, "learning_rate": 1.770660811675664e-07, "loss": 0.8546249866485596, "step": 7095 }, { "epoch": 1.6350230414746543, "grad_norm": 1.1835928544530323, "learning_rate": 1.7684968984867466e-07, "loss": 0.727516770362854, "step": 7096 }, { "epoch": 1.6352534562211982, "grad_norm": 1.36586374940823, "learning_rate": 1.766334180089606e-07, "loss": 0.7578408718109131, "step": 7097 }, { "epoch": 1.635483870967742, "grad_norm": 1.4255838450352876, "learning_rate": 1.7641726567981606e-07, "loss": 0.8253650665283203, "step": 7098 }, { "epoch": 1.6357142857142857, "grad_norm": 1.3615057524495244, "learning_rate": 1.7620123289261523e-07, "loss": 0.8932347297668457, "step": 7099 }, { "epoch": 1.6359447004608296, "grad_norm": 1.0770953977682685, "learning_rate": 1.7598531967871465e-07, "loss": 0.6661143898963928, "step": 7100 }, { "epoch": 1.6361751152073731, "grad_norm": 1.2408264386151553, "learning_rate": 1.7576952606945415e-07, "loss": 0.8413572311401367, "step": 7101 }, { "epoch": 1.636405529953917, "grad_norm": 1.2084626250429713, "learning_rate": 1.7555385209615603e-07, "loss": 0.713816225528717, "step": 7102 }, { "epoch": 1.6366359447004608, "grad_norm": 1.67339389064804, "learning_rate": 1.7533829779012466e-07, "loss": 0.8588179349899292, "step": 7103 }, { "epoch": 1.6368663594470045, "grad_norm": 1.3521357251955939, "learning_rate": 1.7512286318264778e-07, "loss": 0.8666437864303589, "step": 7104 }, { "epoch": 1.6370967741935485, "grad_norm": 1.340257158830322, "learning_rate": 1.7490754830499522e-07, "loss": 0.9219843745231628, "step": 7105 }, { "epoch": 1.6373271889400922, "grad_norm": 1.3285275552241094, "learning_rate": 1.7469235318841956e-07, "loss": 0.93767249584198, "step": 7106 }, { "epoch": 1.637557603686636, "grad_norm": 1.2782247944953928, "learning_rate": 1.7447727786415644e-07, "loss": 0.7317457795143127, "step": 7107 }, { "epoch": 1.6377880184331797, "grad_norm": 1.1023935137429937, "learning_rate": 1.7426232236342365e-07, "loss": 0.850578784942627, "step": 7108 }, { "epoch": 1.6380184331797234, "grad_norm": 1.1932749051362488, "learning_rate": 1.7404748671742143e-07, "loss": 0.7580707669258118, "step": 7109 }, { "epoch": 1.6382488479262673, "grad_norm": 1.4967576950530754, "learning_rate": 1.738327709573333e-07, "loss": 0.8393806219100952, "step": 7110 }, { "epoch": 1.638479262672811, "grad_norm": 1.0170127852420416, "learning_rate": 1.7361817511432474e-07, "loss": 0.6641673445701599, "step": 7111 }, { "epoch": 1.6387096774193548, "grad_norm": 1.2746608671167614, "learning_rate": 1.734036992195438e-07, "loss": 0.7570137977600098, "step": 7112 }, { "epoch": 1.6389400921658988, "grad_norm": 1.1366436885649456, "learning_rate": 1.7318934330412194e-07, "loss": 0.78557288646698, "step": 7113 }, { "epoch": 1.6391705069124423, "grad_norm": 1.3443988626089514, "learning_rate": 1.729751073991721e-07, "loss": 0.8309692740440369, "step": 7114 }, { "epoch": 1.6394009216589862, "grad_norm": 1.0791152795033432, "learning_rate": 1.727609915357908e-07, "loss": 0.6409872770309448, "step": 7115 }, { "epoch": 1.63963133640553, "grad_norm": 1.0106967037974632, "learning_rate": 1.7254699574505648e-07, "loss": 0.7916153073310852, "step": 7116 }, { "epoch": 1.6398617511520737, "grad_norm": 1.5121844712494004, "learning_rate": 1.7233312005803015e-07, "loss": 0.7925357818603516, "step": 7117 }, { "epoch": 1.6400921658986176, "grad_norm": 1.5493448906965575, "learning_rate": 1.7211936450575594e-07, "loss": 0.9124211668968201, "step": 7118 }, { "epoch": 1.6403225806451613, "grad_norm": 1.2418161556418856, "learning_rate": 1.7190572911925994e-07, "loss": 0.8905198574066162, "step": 7119 }, { "epoch": 1.640552995391705, "grad_norm": 1.0755844253909046, "learning_rate": 1.716922139295509e-07, "loss": 0.8139728307723999, "step": 7120 }, { "epoch": 1.6407834101382488, "grad_norm": 1.3621014779170746, "learning_rate": 1.7147881896762074e-07, "loss": 0.7607166767120361, "step": 7121 }, { "epoch": 1.6410138248847925, "grad_norm": 1.282778120557478, "learning_rate": 1.7126554426444316e-07, "loss": 0.806864857673645, "step": 7122 }, { "epoch": 1.6412442396313365, "grad_norm": 1.352241351446694, "learning_rate": 1.710523898509747e-07, "loss": 0.697334885597229, "step": 7123 }, { "epoch": 1.6414746543778802, "grad_norm": 1.4205201103890581, "learning_rate": 1.7083935575815455e-07, "loss": 0.7313966751098633, "step": 7124 }, { "epoch": 1.641705069124424, "grad_norm": 1.3868798260826238, "learning_rate": 1.7062644201690413e-07, "loss": 0.8857930898666382, "step": 7125 }, { "epoch": 1.6419354838709679, "grad_norm": 1.0686783154078314, "learning_rate": 1.7041364865812758e-07, "loss": 0.7451884746551514, "step": 7126 }, { "epoch": 1.6421658986175114, "grad_norm": 1.2220777026134708, "learning_rate": 1.7020097571271186e-07, "loss": 0.7023841142654419, "step": 7127 }, { "epoch": 1.6423963133640553, "grad_norm": 1.2608302557028366, "learning_rate": 1.6998842321152607e-07, "loss": 0.708385705947876, "step": 7128 }, { "epoch": 1.642626728110599, "grad_norm": 1.3854146642080662, "learning_rate": 1.697759911854215e-07, "loss": 0.7885474562644958, "step": 7129 }, { "epoch": 1.6428571428571428, "grad_norm": 1.161295661131579, "learning_rate": 1.695636796652331e-07, "loss": 0.7054568529129028, "step": 7130 }, { "epoch": 1.6430875576036867, "grad_norm": 1.1652742930387396, "learning_rate": 1.6935148868177718e-07, "loss": 0.6899726986885071, "step": 7131 }, { "epoch": 1.6433179723502302, "grad_norm": 1.4011600897250127, "learning_rate": 1.6913941826585288e-07, "loss": 0.8558614253997803, "step": 7132 }, { "epoch": 1.6435483870967742, "grad_norm": 1.2947217762783314, "learning_rate": 1.6892746844824223e-07, "loss": 0.7741858959197998, "step": 7133 }, { "epoch": 1.643778801843318, "grad_norm": 1.130755528536183, "learning_rate": 1.6871563925970943e-07, "loss": 0.7332532405853271, "step": 7134 }, { "epoch": 1.6440092165898617, "grad_norm": 1.4331915051670545, "learning_rate": 1.6850393073100078e-07, "loss": 0.8288085460662842, "step": 7135 }, { "epoch": 1.6442396313364056, "grad_norm": 1.493040320153856, "learning_rate": 1.682923428928461e-07, "loss": 0.9470697641372681, "step": 7136 }, { "epoch": 1.6444700460829493, "grad_norm": 1.1093535752232264, "learning_rate": 1.6808087577595686e-07, "loss": 0.7123041749000549, "step": 7137 }, { "epoch": 1.644700460829493, "grad_norm": 1.3701909416221987, "learning_rate": 1.6786952941102694e-07, "loss": 0.8077690005302429, "step": 7138 }, { "epoch": 1.644930875576037, "grad_norm": 1.3400770079054931, "learning_rate": 1.6765830382873348e-07, "loss": 0.767215371131897, "step": 7139 }, { "epoch": 1.6451612903225805, "grad_norm": 1.3723903093182923, "learning_rate": 1.6744719905973502e-07, "loss": 0.7488540410995483, "step": 7140 }, { "epoch": 1.6453917050691245, "grad_norm": 1.4546211260208752, "learning_rate": 1.6723621513467378e-07, "loss": 0.7841323018074036, "step": 7141 }, { "epoch": 1.6456221198156682, "grad_norm": 1.2167195095267902, "learning_rate": 1.6702535208417346e-07, "loss": 0.65464186668396, "step": 7142 }, { "epoch": 1.645852534562212, "grad_norm": 1.3347329400915569, "learning_rate": 1.6681460993884056e-07, "loss": 0.8845036029815674, "step": 7143 }, { "epoch": 1.6460829493087559, "grad_norm": 1.3318983430245122, "learning_rate": 1.6660398872926396e-07, "loss": 0.6741687655448914, "step": 7144 }, { "epoch": 1.6463133640552994, "grad_norm": 1.4438874912830426, "learning_rate": 1.663934884860152e-07, "loss": 0.8656717538833618, "step": 7145 }, { "epoch": 1.6465437788018433, "grad_norm": 1.3298318800949103, "learning_rate": 1.6618310923964785e-07, "loss": 0.7588434219360352, "step": 7146 }, { "epoch": 1.646774193548387, "grad_norm": 1.3262924093620256, "learning_rate": 1.6597285102069846e-07, "loss": 0.7180176973342896, "step": 7147 }, { "epoch": 1.6470046082949308, "grad_norm": 1.2551409816382322, "learning_rate": 1.6576271385968576e-07, "loss": 0.8253776431083679, "step": 7148 }, { "epoch": 1.6472350230414747, "grad_norm": 1.2281736040805922, "learning_rate": 1.6555269778711046e-07, "loss": 0.7200941443443298, "step": 7149 }, { "epoch": 1.6474654377880185, "grad_norm": 1.1059198918963296, "learning_rate": 1.653428028334567e-07, "loss": 0.7076164484024048, "step": 7150 }, { "epoch": 1.6476958525345622, "grad_norm": 1.195055160265343, "learning_rate": 1.6513302902919003e-07, "loss": 0.8068090677261353, "step": 7151 }, { "epoch": 1.6479262672811061, "grad_norm": 1.3947857709427287, "learning_rate": 1.6492337640475884e-07, "loss": 0.9712029099464417, "step": 7152 }, { "epoch": 1.6481566820276496, "grad_norm": 1.406808701456467, "learning_rate": 1.6471384499059438e-07, "loss": 0.8359737992286682, "step": 7153 }, { "epoch": 1.6483870967741936, "grad_norm": 1.0570634795327605, "learning_rate": 1.645044348171094e-07, "loss": 0.8066359758377075, "step": 7154 }, { "epoch": 1.6486175115207373, "grad_norm": 1.3810484659709985, "learning_rate": 1.642951459146995e-07, "loss": 0.8717833757400513, "step": 7155 }, { "epoch": 1.648847926267281, "grad_norm": 1.0992736543757442, "learning_rate": 1.6408597831374305e-07, "loss": 0.7335910201072693, "step": 7156 }, { "epoch": 1.649078341013825, "grad_norm": 1.2397456033121492, "learning_rate": 1.6387693204460028e-07, "loss": 0.816049337387085, "step": 7157 }, { "epoch": 1.6493087557603685, "grad_norm": 1.4068842390673124, "learning_rate": 1.6366800713761364e-07, "loss": 0.8060640096664429, "step": 7158 }, { "epoch": 1.6495391705069125, "grad_norm": 1.2074799471388065, "learning_rate": 1.6345920362310894e-07, "loss": 0.8477619886398315, "step": 7159 }, { "epoch": 1.6497695852534562, "grad_norm": 1.332601091577715, "learning_rate": 1.6325052153139329e-07, "loss": 0.9793992638587952, "step": 7160 }, { "epoch": 1.65, "grad_norm": 1.1909988829986036, "learning_rate": 1.6304196089275658e-07, "loss": 0.8020002245903015, "step": 7161 }, { "epoch": 1.6502304147465439, "grad_norm": 1.3231428787162685, "learning_rate": 1.6283352173747146e-07, "loss": 0.8226429224014282, "step": 7162 }, { "epoch": 1.6504608294930876, "grad_norm": 1.2483952861501775, "learning_rate": 1.6262520409579227e-07, "loss": 0.7029248476028442, "step": 7163 }, { "epoch": 1.6506912442396313, "grad_norm": 1.0969129808942812, "learning_rate": 1.6241700799795631e-07, "loss": 0.7234015464782715, "step": 7164 }, { "epoch": 1.6509216589861753, "grad_norm": 1.3383637969539028, "learning_rate": 1.6220893347418285e-07, "loss": 0.854112982749939, "step": 7165 }, { "epoch": 1.6511520737327188, "grad_norm": 1.2277405230752314, "learning_rate": 1.6200098055467325e-07, "loss": 0.8098663091659546, "step": 7166 }, { "epoch": 1.6513824884792627, "grad_norm": 1.286099874995443, "learning_rate": 1.617931492696123e-07, "loss": 0.9032876491546631, "step": 7167 }, { "epoch": 1.6516129032258065, "grad_norm": 1.0239384348378415, "learning_rate": 1.6158543964916606e-07, "loss": 0.7048916816711426, "step": 7168 }, { "epoch": 1.6518433179723502, "grad_norm": 1.2354879671689736, "learning_rate": 1.6137785172348307e-07, "loss": 0.879542350769043, "step": 7169 }, { "epoch": 1.6520737327188941, "grad_norm": 1.1499858637392877, "learning_rate": 1.611703855226949e-07, "loss": 0.7851279377937317, "step": 7170 }, { "epoch": 1.6523041474654376, "grad_norm": 1.3219595195357319, "learning_rate": 1.6096304107691493e-07, "loss": 0.779682457447052, "step": 7171 }, { "epoch": 1.6525345622119816, "grad_norm": 1.2160096597693908, "learning_rate": 1.6075581841623854e-07, "loss": 0.7761027812957764, "step": 7172 }, { "epoch": 1.6527649769585253, "grad_norm": 1.2474814185415584, "learning_rate": 1.605487175707443e-07, "loss": 0.726230263710022, "step": 7173 }, { "epoch": 1.652995391705069, "grad_norm": 1.4211290590725025, "learning_rate": 1.6034173857049238e-07, "loss": 0.915956437587738, "step": 7174 }, { "epoch": 1.653225806451613, "grad_norm": 1.2631109729400856, "learning_rate": 1.6013488144552534e-07, "loss": 0.8435969352722168, "step": 7175 }, { "epoch": 1.6534562211981567, "grad_norm": 1.4370024530537882, "learning_rate": 1.599281462258687e-07, "loss": 0.7775791883468628, "step": 7176 }, { "epoch": 1.6536866359447004, "grad_norm": 1.2504716465033257, "learning_rate": 1.5972153294152945e-07, "loss": 0.7578383684158325, "step": 7177 }, { "epoch": 1.6539170506912444, "grad_norm": 1.25108951979748, "learning_rate": 1.5951504162249706e-07, "loss": 0.8378545045852661, "step": 7178 }, { "epoch": 1.654147465437788, "grad_norm": 0.8833465476140244, "learning_rate": 1.59308672298744e-07, "loss": 0.7071488499641418, "step": 7179 }, { "epoch": 1.6543778801843319, "grad_norm": 1.315489910714214, "learning_rate": 1.591024250002243e-07, "loss": 0.7424521446228027, "step": 7180 }, { "epoch": 1.6546082949308756, "grad_norm": 1.2002526550771535, "learning_rate": 1.5889629975687401e-07, "loss": 0.6503180265426636, "step": 7181 }, { "epoch": 1.6548387096774193, "grad_norm": 1.1861762089682637, "learning_rate": 1.5869029659861265e-07, "loss": 0.7589888572692871, "step": 7182 }, { "epoch": 1.6550691244239633, "grad_norm": 1.2877948406073703, "learning_rate": 1.5848441555534109e-07, "loss": 0.7609498500823975, "step": 7183 }, { "epoch": 1.6552995391705068, "grad_norm": 1.1756552735153392, "learning_rate": 1.582786566569425e-07, "loss": 0.7813476324081421, "step": 7184 }, { "epoch": 1.6555299539170507, "grad_norm": 1.1595327374780875, "learning_rate": 1.5807301993328258e-07, "loss": 0.7386292219161987, "step": 7185 }, { "epoch": 1.6557603686635944, "grad_norm": 1.4106740697965885, "learning_rate": 1.5786750541420922e-07, "loss": 1.0402865409851074, "step": 7186 }, { "epoch": 1.6559907834101382, "grad_norm": 1.071897744375966, "learning_rate": 1.5766211312955246e-07, "loss": 0.7375132441520691, "step": 7187 }, { "epoch": 1.6562211981566821, "grad_norm": 1.3721197645813625, "learning_rate": 1.574568431091251e-07, "loss": 0.7903615236282349, "step": 7188 }, { "epoch": 1.6564516129032258, "grad_norm": 1.1205445704505106, "learning_rate": 1.5725169538272132e-07, "loss": 0.6912896633148193, "step": 7189 }, { "epoch": 1.6566820276497696, "grad_norm": 1.2659829320834666, "learning_rate": 1.570466699801185e-07, "loss": 0.7181826233863831, "step": 7190 }, { "epoch": 1.6569124423963135, "grad_norm": 1.3941328099536103, "learning_rate": 1.5684176693107566e-07, "loss": 0.8328898549079895, "step": 7191 }, { "epoch": 1.657142857142857, "grad_norm": 1.275566962551196, "learning_rate": 1.5663698626533384e-07, "loss": 0.7775120735168457, "step": 7192 }, { "epoch": 1.657373271889401, "grad_norm": 1.3683527646177032, "learning_rate": 1.564323280126173e-07, "loss": 0.8412137031555176, "step": 7193 }, { "epoch": 1.6576036866359447, "grad_norm": 1.4192183215515342, "learning_rate": 1.562277922026316e-07, "loss": 0.7046825885772705, "step": 7194 }, { "epoch": 1.6578341013824884, "grad_norm": 1.3386632639806328, "learning_rate": 1.5602337886506468e-07, "loss": 0.7107498645782471, "step": 7195 }, { "epoch": 1.6580645161290324, "grad_norm": 1.1946522893092928, "learning_rate": 1.558190880295872e-07, "loss": 0.640724778175354, "step": 7196 }, { "epoch": 1.658294930875576, "grad_norm": 1.3093502483074915, "learning_rate": 1.556149197258515e-07, "loss": 0.7856858968734741, "step": 7197 }, { "epoch": 1.6585253456221198, "grad_norm": 1.4971129714340625, "learning_rate": 1.554108739834923e-07, "loss": 0.7956376075744629, "step": 7198 }, { "epoch": 1.6587557603686636, "grad_norm": 1.2753834260169075, "learning_rate": 1.5520695083212675e-07, "loss": 0.721325159072876, "step": 7199 }, { "epoch": 1.6589861751152073, "grad_norm": 1.060032555829029, "learning_rate": 1.550031503013539e-07, "loss": 0.7043335437774658, "step": 7200 }, { "epoch": 1.6592165898617512, "grad_norm": 1.2269468216437214, "learning_rate": 1.5479947242075496e-07, "loss": 0.7154408693313599, "step": 7201 }, { "epoch": 1.659447004608295, "grad_norm": 1.0598234159957265, "learning_rate": 1.5459591721989397e-07, "loss": 0.7353748083114624, "step": 7202 }, { "epoch": 1.6596774193548387, "grad_norm": 1.1815091781809732, "learning_rate": 1.5439248472831644e-07, "loss": 0.7404372692108154, "step": 7203 }, { "epoch": 1.6599078341013827, "grad_norm": 1.7521749620198364, "learning_rate": 1.541891749755503e-07, "loss": 0.8678613305091858, "step": 7204 }, { "epoch": 1.6601382488479262, "grad_norm": 1.2663476960491773, "learning_rate": 1.5398598799110562e-07, "loss": 0.7177796363830566, "step": 7205 }, { "epoch": 1.66036866359447, "grad_norm": 1.3475911636796425, "learning_rate": 1.537829238044749e-07, "loss": 0.7610895037651062, "step": 7206 }, { "epoch": 1.6605990783410138, "grad_norm": 1.355013126121341, "learning_rate": 1.5357998244513227e-07, "loss": 0.7340127825737, "step": 7207 }, { "epoch": 1.6608294930875576, "grad_norm": 1.2008253519594887, "learning_rate": 1.5337716394253498e-07, "loss": 0.7060200572013855, "step": 7208 }, { "epoch": 1.6610599078341015, "grad_norm": 1.306554098336219, "learning_rate": 1.5317446832612147e-07, "loss": 0.8592087030410767, "step": 7209 }, { "epoch": 1.661290322580645, "grad_norm": 1.1630740877062444, "learning_rate": 1.5297189562531264e-07, "loss": 0.8687897324562073, "step": 7210 }, { "epoch": 1.661520737327189, "grad_norm": 1.346256802747815, "learning_rate": 1.5276944586951202e-07, "loss": 0.8158563375473022, "step": 7211 }, { "epoch": 1.6617511520737327, "grad_norm": 1.2436624388230366, "learning_rate": 1.5256711908810482e-07, "loss": 0.7734059691429138, "step": 7212 }, { "epoch": 1.6619815668202764, "grad_norm": 1.4006583359216147, "learning_rate": 1.5236491531045815e-07, "loss": 0.8302994966506958, "step": 7213 }, { "epoch": 1.6622119815668204, "grad_norm": 1.3250021353738068, "learning_rate": 1.5216283456592216e-07, "loss": 0.8474830389022827, "step": 7214 }, { "epoch": 1.662442396313364, "grad_norm": 1.130266104375724, "learning_rate": 1.5196087688382808e-07, "loss": 0.7903469800949097, "step": 7215 }, { "epoch": 1.6626728110599078, "grad_norm": 1.1131412296095682, "learning_rate": 1.5175904229349035e-07, "loss": 0.7756912708282471, "step": 7216 }, { "epoch": 1.6629032258064518, "grad_norm": 1.4164367883683733, "learning_rate": 1.5155733082420463e-07, "loss": 0.7495905756950378, "step": 7217 }, { "epoch": 1.6631336405529953, "grad_norm": 1.3394708776746769, "learning_rate": 1.5135574250524897e-07, "loss": 0.8536649942398071, "step": 7218 }, { "epoch": 1.6633640552995392, "grad_norm": 1.3243776315844114, "learning_rate": 1.5115427736588404e-07, "loss": 0.7301580905914307, "step": 7219 }, { "epoch": 1.663594470046083, "grad_norm": 1.324768351380299, "learning_rate": 1.5095293543535203e-07, "loss": 0.7131164073944092, "step": 7220 }, { "epoch": 1.6638248847926267, "grad_norm": 1.0897989875613177, "learning_rate": 1.5075171674287712e-07, "loss": 0.708457350730896, "step": 7221 }, { "epoch": 1.6640552995391706, "grad_norm": 1.402833248483696, "learning_rate": 1.5055062131766662e-07, "loss": 0.7509758472442627, "step": 7222 }, { "epoch": 1.6642857142857141, "grad_norm": 1.1455053593625757, "learning_rate": 1.503496491889089e-07, "loss": 0.8401786088943481, "step": 7223 }, { "epoch": 1.664516129032258, "grad_norm": 1.3755379329147759, "learning_rate": 1.5014880038577482e-07, "loss": 0.8578320741653442, "step": 7224 }, { "epoch": 1.6647465437788018, "grad_norm": 1.0530962657504686, "learning_rate": 1.4994807493741723e-07, "loss": 0.6890276670455933, "step": 7225 }, { "epoch": 1.6649769585253456, "grad_norm": 1.1705604667481366, "learning_rate": 1.4974747287297128e-07, "loss": 0.785246729850769, "step": 7226 }, { "epoch": 1.6652073732718895, "grad_norm": 1.1145207566800768, "learning_rate": 1.4954699422155382e-07, "loss": 0.7826062440872192, "step": 7227 }, { "epoch": 1.6654377880184332, "grad_norm": 1.392497287743248, "learning_rate": 1.4934663901226452e-07, "loss": 0.807513952255249, "step": 7228 }, { "epoch": 1.665668202764977, "grad_norm": 1.0951466978132682, "learning_rate": 1.4914640727418448e-07, "loss": 0.8138872385025024, "step": 7229 }, { "epoch": 1.6658986175115207, "grad_norm": 1.0721150835685114, "learning_rate": 1.489462990363768e-07, "loss": 0.8465121984481812, "step": 7230 }, { "epoch": 1.6661290322580644, "grad_norm": 1.2125852838751665, "learning_rate": 1.4874631432788743e-07, "loss": 0.7649251222610474, "step": 7231 }, { "epoch": 1.6663594470046084, "grad_norm": 1.242983952838099, "learning_rate": 1.485464531777436e-07, "loss": 0.8297271132469177, "step": 7232 }, { "epoch": 1.666589861751152, "grad_norm": 1.4592304164798606, "learning_rate": 1.483467156149546e-07, "loss": 0.7873194217681885, "step": 7233 }, { "epoch": 1.6668202764976958, "grad_norm": 1.1529440121296932, "learning_rate": 1.4814710166851274e-07, "loss": 0.6924761533737183, "step": 7234 }, { "epoch": 1.6670506912442398, "grad_norm": 0.9776015930659686, "learning_rate": 1.4794761136739132e-07, "loss": 0.6600887179374695, "step": 7235 }, { "epoch": 1.6672811059907833, "grad_norm": 1.0700715817274216, "learning_rate": 1.477482447405458e-07, "loss": 0.6552041172981262, "step": 7236 }, { "epoch": 1.6675115207373272, "grad_norm": 1.1844260959064823, "learning_rate": 1.4754900181691465e-07, "loss": 0.8609327077865601, "step": 7237 }, { "epoch": 1.667741935483871, "grad_norm": 0.9877698580103615, "learning_rate": 1.4734988262541726e-07, "loss": 0.6970123052597046, "step": 7238 }, { "epoch": 1.6679723502304147, "grad_norm": 1.1422057607025191, "learning_rate": 1.4715088719495573e-07, "loss": 0.7859683036804199, "step": 7239 }, { "epoch": 1.6682027649769586, "grad_norm": 1.102405207717508, "learning_rate": 1.4695201555441393e-07, "loss": 0.7448029518127441, "step": 7240 }, { "epoch": 1.6684331797235024, "grad_norm": 1.136418636365662, "learning_rate": 1.4675326773265762e-07, "loss": 0.7566728591918945, "step": 7241 }, { "epoch": 1.668663594470046, "grad_norm": 1.183347797545015, "learning_rate": 1.465546437585351e-07, "loss": 0.7563366889953613, "step": 7242 }, { "epoch": 1.6688940092165898, "grad_norm": 1.2270668729431573, "learning_rate": 1.4635614366087623e-07, "loss": 0.8580834865570068, "step": 7243 }, { "epoch": 1.6691244239631335, "grad_norm": 1.261588467565845, "learning_rate": 1.4615776746849306e-07, "loss": 0.6200178861618042, "step": 7244 }, { "epoch": 1.6693548387096775, "grad_norm": 1.12353329539602, "learning_rate": 1.4595951521017958e-07, "loss": 0.8052491545677185, "step": 7245 }, { "epoch": 1.6695852534562212, "grad_norm": 1.7485044689788691, "learning_rate": 1.4576138691471186e-07, "loss": 0.7383530735969543, "step": 7246 }, { "epoch": 1.669815668202765, "grad_norm": 1.2061617795996018, "learning_rate": 1.4556338261084776e-07, "loss": 0.6735742092132568, "step": 7247 }, { "epoch": 1.670046082949309, "grad_norm": 1.1671720957777614, "learning_rate": 1.453655023273277e-07, "loss": 0.7570016980171204, "step": 7248 }, { "epoch": 1.6702764976958524, "grad_norm": 1.1212050061324152, "learning_rate": 1.4516774609287364e-07, "loss": 0.7271980047225952, "step": 7249 }, { "epoch": 1.6705069124423964, "grad_norm": 1.3773952001351246, "learning_rate": 1.449701139361894e-07, "loss": 0.8567354083061218, "step": 7250 }, { "epoch": 1.67073732718894, "grad_norm": 1.4372041287717652, "learning_rate": 1.447726058859614e-07, "loss": 0.8675428628921509, "step": 7251 }, { "epoch": 1.6709677419354838, "grad_norm": 1.6475511282046704, "learning_rate": 1.4457522197085748e-07, "loss": 0.9131098389625549, "step": 7252 }, { "epoch": 1.6711981566820278, "grad_norm": 0.9228526790942371, "learning_rate": 1.4437796221952748e-07, "loss": 0.7921037673950195, "step": 7253 }, { "epoch": 1.6714285714285713, "grad_norm": 1.3314958050470875, "learning_rate": 1.441808266606037e-07, "loss": 0.7559863328933716, "step": 7254 }, { "epoch": 1.6716589861751152, "grad_norm": 1.4253402064070324, "learning_rate": 1.4398381532269998e-07, "loss": 0.7433857917785645, "step": 7255 }, { "epoch": 1.671889400921659, "grad_norm": 1.340982715064525, "learning_rate": 1.4378692823441207e-07, "loss": 0.8171184062957764, "step": 7256 }, { "epoch": 1.6721198156682027, "grad_norm": 1.4295893582001031, "learning_rate": 1.4359016542431824e-07, "loss": 0.7296291589736938, "step": 7257 }, { "epoch": 1.6723502304147466, "grad_norm": 1.1566282275472088, "learning_rate": 1.4339352692097828e-07, "loss": 0.7397829294204712, "step": 7258 }, { "epoch": 1.6725806451612903, "grad_norm": 1.1030928795639288, "learning_rate": 1.431970127529335e-07, "loss": 0.6724194884300232, "step": 7259 }, { "epoch": 1.672811059907834, "grad_norm": 1.266832602935082, "learning_rate": 1.430006229487084e-07, "loss": 0.7711449861526489, "step": 7260 }, { "epoch": 1.673041474654378, "grad_norm": 1.0334522746934713, "learning_rate": 1.428043575368083e-07, "loss": 0.7581815719604492, "step": 7261 }, { "epoch": 1.6732718894009215, "grad_norm": 1.2775574658714877, "learning_rate": 1.4260821654572063e-07, "loss": 0.7092517614364624, "step": 7262 }, { "epoch": 1.6735023041474655, "grad_norm": 1.116987885688497, "learning_rate": 1.4241220000391562e-07, "loss": 0.646745502948761, "step": 7263 }, { "epoch": 1.6737327188940092, "grad_norm": 1.0897996116307995, "learning_rate": 1.4221630793984453e-07, "loss": 0.7364122867584229, "step": 7264 }, { "epoch": 1.673963133640553, "grad_norm": 1.0366138580080708, "learning_rate": 1.4202054038194068e-07, "loss": 0.8186795711517334, "step": 7265 }, { "epoch": 1.6741935483870969, "grad_norm": 1.178861697439358, "learning_rate": 1.4182489735861957e-07, "loss": 0.7172378301620483, "step": 7266 }, { "epoch": 1.6744239631336404, "grad_norm": 1.6433299949580555, "learning_rate": 1.416293788982783e-07, "loss": 0.8780974745750427, "step": 7267 }, { "epoch": 1.6746543778801843, "grad_norm": 1.303060213158533, "learning_rate": 1.4143398502929672e-07, "loss": 0.9034930467605591, "step": 7268 }, { "epoch": 1.674884792626728, "grad_norm": 1.283952582595571, "learning_rate": 1.4123871578003543e-07, "loss": 0.7994415760040283, "step": 7269 }, { "epoch": 1.6751152073732718, "grad_norm": 1.2332939563797212, "learning_rate": 1.410435711788376e-07, "loss": 0.8327854871749878, "step": 7270 }, { "epoch": 1.6753456221198157, "grad_norm": 1.3516689374751454, "learning_rate": 1.408485512540285e-07, "loss": 0.7667550444602966, "step": 7271 }, { "epoch": 1.6755760368663595, "grad_norm": 1.3721126007283877, "learning_rate": 1.4065365603391478e-07, "loss": 0.8073924779891968, "step": 7272 }, { "epoch": 1.6758064516129032, "grad_norm": 1.2537292403097655, "learning_rate": 1.4045888554678497e-07, "loss": 0.7265589237213135, "step": 7273 }, { "epoch": 1.6760368663594472, "grad_norm": 1.4008103355507637, "learning_rate": 1.402642398209104e-07, "loss": 0.6912035942077637, "step": 7274 }, { "epoch": 1.6762672811059907, "grad_norm": 1.4159985968960598, "learning_rate": 1.400697188845432e-07, "loss": 0.917953372001648, "step": 7275 }, { "epoch": 1.6764976958525346, "grad_norm": 1.1092123664048492, "learning_rate": 1.3987532276591774e-07, "loss": 0.6989340782165527, "step": 7276 }, { "epoch": 1.6767281105990783, "grad_norm": 1.0530722269060104, "learning_rate": 1.396810514932507e-07, "loss": 0.6648346185684204, "step": 7277 }, { "epoch": 1.676958525345622, "grad_norm": 1.152242717428616, "learning_rate": 1.3948690509474014e-07, "loss": 0.6462730169296265, "step": 7278 }, { "epoch": 1.677188940092166, "grad_norm": 1.0559078213581141, "learning_rate": 1.3929288359856584e-07, "loss": 0.6084051132202148, "step": 7279 }, { "epoch": 1.6774193548387095, "grad_norm": 1.2568155531692753, "learning_rate": 1.3909898703289037e-07, "loss": 0.8593035936355591, "step": 7280 }, { "epoch": 1.6776497695852535, "grad_norm": 1.432799112874992, "learning_rate": 1.389052154258572e-07, "loss": 0.8064925670623779, "step": 7281 }, { "epoch": 1.6778801843317972, "grad_norm": 1.3257643730794528, "learning_rate": 1.3871156880559186e-07, "loss": 0.7366064786911011, "step": 7282 }, { "epoch": 1.678110599078341, "grad_norm": 1.4541745835743052, "learning_rate": 1.3851804720020233e-07, "loss": 0.8090124726295471, "step": 7283 }, { "epoch": 1.6783410138248849, "grad_norm": 1.3768572400260246, "learning_rate": 1.3832465063777787e-07, "loss": 0.7326936721801758, "step": 7284 }, { "epoch": 1.6785714285714286, "grad_norm": 1.1036181265329146, "learning_rate": 1.3813137914638961e-07, "loss": 0.7142004370689392, "step": 7285 }, { "epoch": 1.6788018433179723, "grad_norm": 1.1850699819171153, "learning_rate": 1.3793823275409066e-07, "loss": 0.8358181715011597, "step": 7286 }, { "epoch": 1.6790322580645163, "grad_norm": 1.341055264970921, "learning_rate": 1.3774521148891583e-07, "loss": 0.7337081432342529, "step": 7287 }, { "epoch": 1.6792626728110598, "grad_norm": 1.079298746666331, "learning_rate": 1.3755231537888222e-07, "loss": 0.8029334545135498, "step": 7288 }, { "epoch": 1.6794930875576037, "grad_norm": 1.1362422930327392, "learning_rate": 1.373595444519884e-07, "loss": 0.8132611513137817, "step": 7289 }, { "epoch": 1.6797235023041475, "grad_norm": 1.2850987320352512, "learning_rate": 1.3716689873621446e-07, "loss": 0.7377278804779053, "step": 7290 }, { "epoch": 1.6799539170506912, "grad_norm": 1.5545938019119256, "learning_rate": 1.3697437825952307e-07, "loss": 0.788368284702301, "step": 7291 }, { "epoch": 1.6801843317972351, "grad_norm": 1.3811107908360538, "learning_rate": 1.3678198304985822e-07, "loss": 0.8288586139678955, "step": 7292 }, { "epoch": 1.6804147465437786, "grad_norm": 1.2973962244733976, "learning_rate": 1.3658971313514567e-07, "loss": 0.8534054160118103, "step": 7293 }, { "epoch": 1.6806451612903226, "grad_norm": 1.261356018830994, "learning_rate": 1.363975685432933e-07, "loss": 0.8730596303939819, "step": 7294 }, { "epoch": 1.6808755760368663, "grad_norm": 1.2262296688166254, "learning_rate": 1.3620554930219076e-07, "loss": 0.6891343593597412, "step": 7295 }, { "epoch": 1.68110599078341, "grad_norm": 1.4944659665191207, "learning_rate": 1.360136554397089e-07, "loss": 0.8575270175933838, "step": 7296 }, { "epoch": 1.681336405529954, "grad_norm": 1.1221716147697696, "learning_rate": 1.3582188698370134e-07, "loss": 0.82694011926651, "step": 7297 }, { "epoch": 1.6815668202764977, "grad_norm": 1.1921152491764102, "learning_rate": 1.3563024396200296e-07, "loss": 0.6468113660812378, "step": 7298 }, { "epoch": 1.6817972350230415, "grad_norm": 1.1634380991195066, "learning_rate": 1.3543872640243016e-07, "loss": 0.6818577647209167, "step": 7299 }, { "epoch": 1.6820276497695854, "grad_norm": 1.262155726089824, "learning_rate": 1.352473343327819e-07, "loss": 0.7630767822265625, "step": 7300 }, { "epoch": 1.682258064516129, "grad_norm": 1.3348546512512276, "learning_rate": 1.3505606778083832e-07, "loss": 0.9019678831100464, "step": 7301 }, { "epoch": 1.6824884792626729, "grad_norm": 1.1302876731614566, "learning_rate": 1.3486492677436123e-07, "loss": 0.821324348449707, "step": 7302 }, { "epoch": 1.6827188940092166, "grad_norm": 1.1997119452659193, "learning_rate": 1.3467391134109495e-07, "loss": 0.796151876449585, "step": 7303 }, { "epoch": 1.6829493087557603, "grad_norm": 1.298615109914031, "learning_rate": 1.3448302150876488e-07, "loss": 0.8020445108413696, "step": 7304 }, { "epoch": 1.6831797235023043, "grad_norm": 0.9490183941784253, "learning_rate": 1.3429225730507843e-07, "loss": 0.7215749025344849, "step": 7305 }, { "epoch": 1.6834101382488478, "grad_norm": 1.2708231250445967, "learning_rate": 1.3410161875772474e-07, "loss": 0.920941174030304, "step": 7306 }, { "epoch": 1.6836405529953917, "grad_norm": 1.4523260098562263, "learning_rate": 1.3391110589437494e-07, "loss": 0.8979494571685791, "step": 7307 }, { "epoch": 1.6838709677419355, "grad_norm": 1.3126261706157987, "learning_rate": 1.337207187426812e-07, "loss": 0.9125145673751831, "step": 7308 }, { "epoch": 1.6841013824884792, "grad_norm": 1.1179697975279568, "learning_rate": 1.3353045733027858e-07, "loss": 0.8205714225769043, "step": 7309 }, { "epoch": 1.6843317972350231, "grad_norm": 1.0993805126125902, "learning_rate": 1.3334032168478305e-07, "loss": 0.6914113759994507, "step": 7310 }, { "epoch": 1.6845622119815669, "grad_norm": 1.3165472089957067, "learning_rate": 1.3315031183379233e-07, "loss": 0.7355014085769653, "step": 7311 }, { "epoch": 1.6847926267281106, "grad_norm": 1.3581792517836289, "learning_rate": 1.3296042780488637e-07, "loss": 0.7564182281494141, "step": 7312 }, { "epoch": 1.6850230414746545, "grad_norm": 1.197316556809727, "learning_rate": 1.3277066962562643e-07, "loss": 0.8091372847557068, "step": 7313 }, { "epoch": 1.685253456221198, "grad_norm": 1.131878643977171, "learning_rate": 1.3258103732355586e-07, "loss": 0.7457877993583679, "step": 7314 }, { "epoch": 1.685483870967742, "grad_norm": 1.2462081986852567, "learning_rate": 1.3239153092619948e-07, "loss": 0.861819863319397, "step": 7315 }, { "epoch": 1.6857142857142857, "grad_norm": 1.2291218741883772, "learning_rate": 1.3220215046106353e-07, "loss": 0.7698357105255127, "step": 7316 }, { "epoch": 1.6859447004608294, "grad_norm": 1.2862793081172317, "learning_rate": 1.320128959556369e-07, "loss": 0.7889456152915955, "step": 7317 }, { "epoch": 1.6861751152073734, "grad_norm": 1.0926817497008894, "learning_rate": 1.3182376743738932e-07, "loss": 0.6467938423156738, "step": 7318 }, { "epoch": 1.686405529953917, "grad_norm": 0.962046315570081, "learning_rate": 1.3163476493377245e-07, "loss": 0.7202441692352295, "step": 7319 }, { "epoch": 1.6866359447004609, "grad_norm": 1.2860571238613498, "learning_rate": 1.3144588847222004e-07, "loss": 0.7464008331298828, "step": 7320 }, { "epoch": 1.6868663594470046, "grad_norm": 1.3323127704795366, "learning_rate": 1.3125713808014704e-07, "loss": 0.8924611806869507, "step": 7321 }, { "epoch": 1.6870967741935483, "grad_norm": 1.5027995023789942, "learning_rate": 1.3106851378495044e-07, "loss": 0.6943146586418152, "step": 7322 }, { "epoch": 1.6873271889400923, "grad_norm": 1.336362656918588, "learning_rate": 1.308800156140085e-07, "loss": 0.7335963249206543, "step": 7323 }, { "epoch": 1.687557603686636, "grad_norm": 1.1540515039280186, "learning_rate": 1.30691643594682e-07, "loss": 0.6900516748428345, "step": 7324 }, { "epoch": 1.6877880184331797, "grad_norm": 1.0161083273097216, "learning_rate": 1.3050339775431262e-07, "loss": 0.7230286598205566, "step": 7325 }, { "epoch": 1.6880184331797237, "grad_norm": 1.3577939883495977, "learning_rate": 1.3031527812022403e-07, "loss": 0.8069840669631958, "step": 7326 }, { "epoch": 1.6882488479262672, "grad_norm": 1.1850570268151976, "learning_rate": 1.3012728471972134e-07, "loss": 0.7598710060119629, "step": 7327 }, { "epoch": 1.6884792626728111, "grad_norm": 1.1081098309526143, "learning_rate": 1.2993941758009164e-07, "loss": 0.6817609071731567, "step": 7328 }, { "epoch": 1.6887096774193548, "grad_norm": 1.1578322948538884, "learning_rate": 1.2975167672860387e-07, "loss": 0.6958975791931152, "step": 7329 }, { "epoch": 1.6889400921658986, "grad_norm": 1.3026010781309694, "learning_rate": 1.2956406219250814e-07, "loss": 0.8270853757858276, "step": 7330 }, { "epoch": 1.6891705069124425, "grad_norm": 1.2716142402347783, "learning_rate": 1.2937657399903623e-07, "loss": 0.8045610189437866, "step": 7331 }, { "epoch": 1.689400921658986, "grad_norm": 1.3670021400758372, "learning_rate": 1.2918921217540224e-07, "loss": 0.6685627698898315, "step": 7332 }, { "epoch": 1.68963133640553, "grad_norm": 1.481483528763015, "learning_rate": 1.2900197674880142e-07, "loss": 0.8157398700714111, "step": 7333 }, { "epoch": 1.6898617511520737, "grad_norm": 1.1922253618562, "learning_rate": 1.2881486774641025e-07, "loss": 0.6142218112945557, "step": 7334 }, { "epoch": 1.6900921658986174, "grad_norm": 1.2611165552955415, "learning_rate": 1.2862788519538815e-07, "loss": 0.7849327921867371, "step": 7335 }, { "epoch": 1.6903225806451614, "grad_norm": 1.3074701765125263, "learning_rate": 1.2844102912287457e-07, "loss": 0.8035926818847656, "step": 7336 }, { "epoch": 1.6905529953917051, "grad_norm": 1.26449405816571, "learning_rate": 1.2825429955599209e-07, "loss": 0.8456575870513916, "step": 7337 }, { "epoch": 1.6907834101382488, "grad_norm": 1.0994096629111347, "learning_rate": 1.2806769652184402e-07, "loss": 0.7436026334762573, "step": 7338 }, { "epoch": 1.6910138248847926, "grad_norm": 1.3946687886072922, "learning_rate": 1.2788122004751522e-07, "loss": 0.8315454721450806, "step": 7339 }, { "epoch": 1.6912442396313363, "grad_norm": 1.1032652805797263, "learning_rate": 1.2769487016007307e-07, "loss": 0.7425665855407715, "step": 7340 }, { "epoch": 1.6914746543778802, "grad_norm": 1.210532059455236, "learning_rate": 1.2750864688656572e-07, "loss": 0.7899731993675232, "step": 7341 }, { "epoch": 1.691705069124424, "grad_norm": 1.2339006903630358, "learning_rate": 1.2732255025402327e-07, "loss": 0.7637509703636169, "step": 7342 }, { "epoch": 1.6919354838709677, "grad_norm": 1.2301886439270189, "learning_rate": 1.2713658028945717e-07, "loss": 0.793779730796814, "step": 7343 }, { "epoch": 1.6921658986175117, "grad_norm": 1.2351914671209905, "learning_rate": 1.2695073701986103e-07, "loss": 0.7248083353042603, "step": 7344 }, { "epoch": 1.6923963133640552, "grad_norm": 1.4318296651769333, "learning_rate": 1.2676502047220973e-07, "loss": 0.7506270408630371, "step": 7345 }, { "epoch": 1.692626728110599, "grad_norm": 1.248314789497465, "learning_rate": 1.2657943067345965e-07, "loss": 0.7921839952468872, "step": 7346 }, { "epoch": 1.6928571428571428, "grad_norm": 0.9630256947791611, "learning_rate": 1.263939676505491e-07, "loss": 0.7627893686294556, "step": 7347 }, { "epoch": 1.6930875576036866, "grad_norm": 1.039168896728356, "learning_rate": 1.262086314303973e-07, "loss": 0.788955807685852, "step": 7348 }, { "epoch": 1.6933179723502305, "grad_norm": 1.0370858136190912, "learning_rate": 1.2602342203990612e-07, "loss": 0.5527241826057434, "step": 7349 }, { "epoch": 1.6935483870967742, "grad_norm": 1.344465363325951, "learning_rate": 1.2583833950595825e-07, "loss": 0.7324573397636414, "step": 7350 }, { "epoch": 1.693778801843318, "grad_norm": 1.0731663336898336, "learning_rate": 1.256533838554179e-07, "loss": 0.6588207483291626, "step": 7351 }, { "epoch": 1.6940092165898617, "grad_norm": 1.417078203000081, "learning_rate": 1.2546855511513165e-07, "loss": 0.7597184181213379, "step": 7352 }, { "epoch": 1.6942396313364054, "grad_norm": 1.1748568881342167, "learning_rate": 1.2528385331192692e-07, "loss": 0.7487671375274658, "step": 7353 }, { "epoch": 1.6944700460829494, "grad_norm": 1.0203340332958148, "learning_rate": 1.250992784726126e-07, "loss": 0.757739245891571, "step": 7354 }, { "epoch": 1.694700460829493, "grad_norm": 1.314521719717035, "learning_rate": 1.249148306239801e-07, "loss": 0.616966724395752, "step": 7355 }, { "epoch": 1.6949308755760368, "grad_norm": 1.506626916778979, "learning_rate": 1.2473050979280142e-07, "loss": 0.9415719509124756, "step": 7356 }, { "epoch": 1.6951612903225808, "grad_norm": 1.0903568482188648, "learning_rate": 1.2454631600583044e-07, "loss": 0.7731447815895081, "step": 7357 }, { "epoch": 1.6953917050691243, "grad_norm": 1.2821570786422227, "learning_rate": 1.2436224928980276e-07, "loss": 0.800236701965332, "step": 7358 }, { "epoch": 1.6956221198156682, "grad_norm": 1.2900334463062004, "learning_rate": 1.241783096714356e-07, "loss": 0.8113845586776733, "step": 7359 }, { "epoch": 1.695852534562212, "grad_norm": 1.2157051726485628, "learning_rate": 1.2399449717742706e-07, "loss": 0.748763382434845, "step": 7360 }, { "epoch": 1.6960829493087557, "grad_norm": 1.3769466349570898, "learning_rate": 1.2381081183445774e-07, "loss": 0.8595450520515442, "step": 7361 }, { "epoch": 1.6963133640552996, "grad_norm": 1.240341465296028, "learning_rate": 1.2362725366918913e-07, "loss": 0.7800960540771484, "step": 7362 }, { "epoch": 1.6965437788018434, "grad_norm": 1.1951306648014712, "learning_rate": 1.2344382270826438e-07, "loss": 0.6549400687217712, "step": 7363 }, { "epoch": 1.696774193548387, "grad_norm": 1.1182982438102955, "learning_rate": 1.2326051897830858e-07, "loss": 0.7839380502700806, "step": 7364 }, { "epoch": 1.6970046082949308, "grad_norm": 1.2576690972053175, "learning_rate": 1.230773425059277e-07, "loss": 0.8436654806137085, "step": 7365 }, { "epoch": 1.6972350230414746, "grad_norm": 0.8415515075804344, "learning_rate": 1.2289429331770974e-07, "loss": 0.6517987251281738, "step": 7366 }, { "epoch": 1.6974654377880185, "grad_norm": 1.073572916121381, "learning_rate": 1.2271137144022392e-07, "loss": 0.7108355760574341, "step": 7367 }, { "epoch": 1.6976958525345622, "grad_norm": 1.138464806776697, "learning_rate": 1.2252857690002094e-07, "loss": 0.7801471948623657, "step": 7368 }, { "epoch": 1.697926267281106, "grad_norm": 0.9980466100193536, "learning_rate": 1.2234590972363358e-07, "loss": 0.8240209221839905, "step": 7369 }, { "epoch": 1.69815668202765, "grad_norm": 1.5026485017018454, "learning_rate": 1.2216336993757558e-07, "loss": 0.8119853138923645, "step": 7370 }, { "epoch": 1.6983870967741934, "grad_norm": 0.9448426506131885, "learning_rate": 1.2198095756834216e-07, "loss": 0.7685642838478088, "step": 7371 }, { "epoch": 1.6986175115207374, "grad_norm": 1.1884615399125027, "learning_rate": 1.217986726424106e-07, "loss": 0.7820984125137329, "step": 7372 }, { "epoch": 1.698847926267281, "grad_norm": 1.4933868054084445, "learning_rate": 1.2161651518623916e-07, "loss": 0.8051085472106934, "step": 7373 }, { "epoch": 1.6990783410138248, "grad_norm": 1.16418962691877, "learning_rate": 1.2143448522626742e-07, "loss": 0.828999400138855, "step": 7374 }, { "epoch": 1.6993087557603688, "grad_norm": 1.513005376638313, "learning_rate": 1.2125258278891738e-07, "loss": 0.8215579986572266, "step": 7375 }, { "epoch": 1.6995391705069123, "grad_norm": 1.2614405602995598, "learning_rate": 1.2107080790059156e-07, "loss": 0.9362014532089233, "step": 7376 }, { "epoch": 1.6997695852534562, "grad_norm": 1.014310262155135, "learning_rate": 1.2088916058767428e-07, "loss": 0.7789602279663086, "step": 7377 }, { "epoch": 1.7, "grad_norm": 1.322797235291574, "learning_rate": 1.2070764087653163e-07, "loss": 0.8371152877807617, "step": 7378 }, { "epoch": 1.7002304147465437, "grad_norm": 1.2225532720655308, "learning_rate": 1.2052624879351103e-07, "loss": 0.64423668384552, "step": 7379 }, { "epoch": 1.7004608294930876, "grad_norm": 1.3442813905677369, "learning_rate": 1.203449843649409e-07, "loss": 0.7635257244110107, "step": 7380 }, { "epoch": 1.7006912442396314, "grad_norm": 1.15010903043395, "learning_rate": 1.2016384761713194e-07, "loss": 0.7859230041503906, "step": 7381 }, { "epoch": 1.700921658986175, "grad_norm": 1.0218637195871514, "learning_rate": 1.199828385763757e-07, "loss": 0.7066336870193481, "step": 7382 }, { "epoch": 1.701152073732719, "grad_norm": 1.1069799499148123, "learning_rate": 1.198019572689455e-07, "loss": 0.7190531492233276, "step": 7383 }, { "epoch": 1.7013824884792625, "grad_norm": 1.520158585759741, "learning_rate": 1.1962120372109586e-07, "loss": 0.7389136552810669, "step": 7384 }, { "epoch": 1.7016129032258065, "grad_norm": 1.5406735409523549, "learning_rate": 1.1944057795906316e-07, "loss": 0.774425745010376, "step": 7385 }, { "epoch": 1.7018433179723502, "grad_norm": 1.0093305285556118, "learning_rate": 1.1926008000906484e-07, "loss": 0.7566725015640259, "step": 7386 }, { "epoch": 1.702073732718894, "grad_norm": 1.153413777620863, "learning_rate": 1.1907970989729987e-07, "loss": 0.6891475915908813, "step": 7387 }, { "epoch": 1.702304147465438, "grad_norm": 1.08541401133235, "learning_rate": 1.1889946764994873e-07, "loss": 0.6188378930091858, "step": 7388 }, { "epoch": 1.7025345622119814, "grad_norm": 1.1534210847497282, "learning_rate": 1.1871935329317362e-07, "loss": 0.703027069568634, "step": 7389 }, { "epoch": 1.7027649769585254, "grad_norm": 1.2738888238498793, "learning_rate": 1.1853936685311772e-07, "loss": 0.9253139495849609, "step": 7390 }, { "epoch": 1.702995391705069, "grad_norm": 1.015934424294919, "learning_rate": 1.1835950835590569e-07, "loss": 0.6504430770874023, "step": 7391 }, { "epoch": 1.7032258064516128, "grad_norm": 1.0145240040509695, "learning_rate": 1.18179777827644e-07, "loss": 0.6656354665756226, "step": 7392 }, { "epoch": 1.7034562211981568, "grad_norm": 1.451290987899464, "learning_rate": 1.1800017529442019e-07, "loss": 0.8534063100814819, "step": 7393 }, { "epoch": 1.7036866359447005, "grad_norm": 1.1896366783409809, "learning_rate": 1.178207007823031e-07, "loss": 0.8315893411636353, "step": 7394 }, { "epoch": 1.7039170506912442, "grad_norm": 1.1636407894423468, "learning_rate": 1.1764135431734367e-07, "loss": 0.8161677718162537, "step": 7395 }, { "epoch": 1.7041474654377882, "grad_norm": 1.418011015190517, "learning_rate": 1.1746213592557352e-07, "loss": 0.7942687273025513, "step": 7396 }, { "epoch": 1.7043778801843317, "grad_norm": 0.9938387819486493, "learning_rate": 1.1728304563300584e-07, "loss": 0.8056384325027466, "step": 7397 }, { "epoch": 1.7046082949308756, "grad_norm": 1.3626759695428086, "learning_rate": 1.1710408346563583e-07, "loss": 0.8535007238388062, "step": 7398 }, { "epoch": 1.7048387096774194, "grad_norm": 1.1491077351100174, "learning_rate": 1.1692524944943916e-07, "loss": 0.7729576826095581, "step": 7399 }, { "epoch": 1.705069124423963, "grad_norm": 1.2729586784281095, "learning_rate": 1.1674654361037328e-07, "loss": 0.7755489349365234, "step": 7400 }, { "epoch": 1.705299539170507, "grad_norm": 1.7008944920024607, "learning_rate": 1.1656796597437757e-07, "loss": 0.8752193450927734, "step": 7401 }, { "epoch": 1.7055299539170505, "grad_norm": 1.0505715773863387, "learning_rate": 1.1638951656737217e-07, "loss": 0.7135917544364929, "step": 7402 }, { "epoch": 1.7057603686635945, "grad_norm": 1.1807276735663779, "learning_rate": 1.1621119541525859e-07, "loss": 0.7378124594688416, "step": 7403 }, { "epoch": 1.7059907834101382, "grad_norm": 1.1699041912496186, "learning_rate": 1.1603300254391978e-07, "loss": 0.637479305267334, "step": 7404 }, { "epoch": 1.706221198156682, "grad_norm": 0.9107859734790176, "learning_rate": 1.1585493797922075e-07, "loss": 0.6162394881248474, "step": 7405 }, { "epoch": 1.706451612903226, "grad_norm": 1.0832025296305532, "learning_rate": 1.1567700174700701e-07, "loss": 0.7836494445800781, "step": 7406 }, { "epoch": 1.7066820276497696, "grad_norm": 1.3117851793296085, "learning_rate": 1.154991938731057e-07, "loss": 0.6297281980514526, "step": 7407 }, { "epoch": 1.7069124423963133, "grad_norm": 0.9987358693502671, "learning_rate": 1.1532151438332549e-07, "loss": 0.7190115451812744, "step": 7408 }, { "epoch": 1.7071428571428573, "grad_norm": 1.353324439932077, "learning_rate": 1.151439633034561e-07, "loss": 0.7578086853027344, "step": 7409 }, { "epoch": 1.7073732718894008, "grad_norm": 0.986158496671175, "learning_rate": 1.1496654065926925e-07, "loss": 0.7347216010093689, "step": 7410 }, { "epoch": 1.7076036866359448, "grad_norm": 1.2279759650694806, "learning_rate": 1.1478924647651711e-07, "loss": 0.7940168380737305, "step": 7411 }, { "epoch": 1.7078341013824885, "grad_norm": 1.2336717780625897, "learning_rate": 1.1461208078093431e-07, "loss": 0.7625843286514282, "step": 7412 }, { "epoch": 1.7080645161290322, "grad_norm": 1.5771280074431184, "learning_rate": 1.1443504359823585e-07, "loss": 0.7603492736816406, "step": 7413 }, { "epoch": 1.7082949308755762, "grad_norm": 1.1263740749103024, "learning_rate": 1.1425813495411817e-07, "loss": 0.8746018409729004, "step": 7414 }, { "epoch": 1.7085253456221197, "grad_norm": 1.2947959548271089, "learning_rate": 1.1408135487425996e-07, "loss": 0.72724449634552, "step": 7415 }, { "epoch": 1.7087557603686636, "grad_norm": 0.794129708213959, "learning_rate": 1.1390470338432023e-07, "loss": 0.6874721646308899, "step": 7416 }, { "epoch": 1.7089861751152073, "grad_norm": 0.9673124457868691, "learning_rate": 1.1372818050993959e-07, "loss": 0.7129265666007996, "step": 7417 }, { "epoch": 1.709216589861751, "grad_norm": 1.3811139782005308, "learning_rate": 1.1355178627674045e-07, "loss": 0.7505607008934021, "step": 7418 }, { "epoch": 1.709447004608295, "grad_norm": 1.1149863565678992, "learning_rate": 1.1337552071032608e-07, "loss": 0.7497769594192505, "step": 7419 }, { "epoch": 1.7096774193548387, "grad_norm": 1.342673457996757, "learning_rate": 1.1319938383628092e-07, "loss": 0.792352020740509, "step": 7420 }, { "epoch": 1.7099078341013825, "grad_norm": 1.1720516000619245, "learning_rate": 1.1302337568017139e-07, "loss": 0.780627965927124, "step": 7421 }, { "epoch": 1.7101382488479264, "grad_norm": 1.2702279678670012, "learning_rate": 1.1284749626754464e-07, "loss": 0.7024368047714233, "step": 7422 }, { "epoch": 1.71036866359447, "grad_norm": 1.2880158142162281, "learning_rate": 1.1267174562392945e-07, "loss": 0.756782591342926, "step": 7423 }, { "epoch": 1.7105990783410139, "grad_norm": 1.2881350167706749, "learning_rate": 1.1249612377483552e-07, "loss": 0.8585456609725952, "step": 7424 }, { "epoch": 1.7108294930875576, "grad_norm": 1.2079330064248406, "learning_rate": 1.1232063074575449e-07, "loss": 0.8610610961914062, "step": 7425 }, { "epoch": 1.7110599078341013, "grad_norm": 1.2629835504337044, "learning_rate": 1.1214526656215872e-07, "loss": 0.7493829131126404, "step": 7426 }, { "epoch": 1.7112903225806453, "grad_norm": 1.1677189056932475, "learning_rate": 1.1197003124950222e-07, "loss": 0.7479410171508789, "step": 7427 }, { "epoch": 1.7115207373271888, "grad_norm": 1.2024881147733253, "learning_rate": 1.1179492483322006e-07, "loss": 0.8056051135063171, "step": 7428 }, { "epoch": 1.7117511520737327, "grad_norm": 1.2393004464149642, "learning_rate": 1.1161994733872848e-07, "loss": 0.8448202610015869, "step": 7429 }, { "epoch": 1.7119815668202765, "grad_norm": 1.3170634810384778, "learning_rate": 1.1144509879142571e-07, "loss": 0.7783033847808838, "step": 7430 }, { "epoch": 1.7122119815668202, "grad_norm": 1.2589188548838177, "learning_rate": 1.1127037921669058e-07, "loss": 0.6591838598251343, "step": 7431 }, { "epoch": 1.7124423963133641, "grad_norm": 1.4141951291447457, "learning_rate": 1.1109578863988322e-07, "loss": 0.8508287668228149, "step": 7432 }, { "epoch": 1.7126728110599079, "grad_norm": 1.0110596601133535, "learning_rate": 1.1092132708634549e-07, "loss": 0.7981588840484619, "step": 7433 }, { "epoch": 1.7129032258064516, "grad_norm": 1.1560054105611206, "learning_rate": 1.1074699458140025e-07, "loss": 0.7754761576652527, "step": 7434 }, { "epoch": 1.7131336405529956, "grad_norm": 1.4234254723014017, "learning_rate": 1.1057279115035124e-07, "loss": 0.8487040996551514, "step": 7435 }, { "epoch": 1.713364055299539, "grad_norm": 1.2105987237993454, "learning_rate": 1.1039871681848433e-07, "loss": 0.8175803422927856, "step": 7436 }, { "epoch": 1.713594470046083, "grad_norm": 1.0010434545431337, "learning_rate": 1.1022477161106591e-07, "loss": 0.8361574411392212, "step": 7437 }, { "epoch": 1.7138248847926267, "grad_norm": 1.1841110354603608, "learning_rate": 1.1005095555334409e-07, "loss": 0.6253053545951843, "step": 7438 }, { "epoch": 1.7140552995391705, "grad_norm": 1.5361244402123166, "learning_rate": 1.0987726867054792e-07, "loss": 0.8035168647766113, "step": 7439 }, { "epoch": 1.7142857142857144, "grad_norm": 1.0148513511065955, "learning_rate": 1.0970371098788767e-07, "loss": 0.7352867722511292, "step": 7440 }, { "epoch": 1.714516129032258, "grad_norm": 1.1469128257526675, "learning_rate": 1.0953028253055541e-07, "loss": 0.7540202140808105, "step": 7441 }, { "epoch": 1.7147465437788019, "grad_norm": 1.2653522382652087, "learning_rate": 1.0935698332372379e-07, "loss": 0.7883191108703613, "step": 7442 }, { "epoch": 1.7149769585253456, "grad_norm": 1.2745739855530656, "learning_rate": 1.0918381339254701e-07, "loss": 0.7581819295883179, "step": 7443 }, { "epoch": 1.7152073732718893, "grad_norm": 1.1705192956080483, "learning_rate": 1.090107727621603e-07, "loss": 0.8066321611404419, "step": 7444 }, { "epoch": 1.7154377880184333, "grad_norm": 1.1820593590096908, "learning_rate": 1.0883786145768037e-07, "loss": 0.7427937984466553, "step": 7445 }, { "epoch": 1.715668202764977, "grad_norm": 1.3132499515834741, "learning_rate": 1.0866507950420523e-07, "loss": 0.7736409902572632, "step": 7446 }, { "epoch": 1.7158986175115207, "grad_norm": 1.1930714060597967, "learning_rate": 1.0849242692681382e-07, "loss": 0.7253416776657104, "step": 7447 }, { "epoch": 1.7161290322580647, "grad_norm": 0.9521960056037656, "learning_rate": 1.0831990375056643e-07, "loss": 0.7933270931243896, "step": 7448 }, { "epoch": 1.7163594470046082, "grad_norm": 1.407227257578247, "learning_rate": 1.0814751000050437e-07, "loss": 0.7946739196777344, "step": 7449 }, { "epoch": 1.7165898617511521, "grad_norm": 1.2776015375287177, "learning_rate": 1.0797524570165073e-07, "loss": 0.7798205614089966, "step": 7450 }, { "epoch": 1.7168202764976959, "grad_norm": 1.2558469001082564, "learning_rate": 1.078031108790094e-07, "loss": 0.616565465927124, "step": 7451 }, { "epoch": 1.7170506912442396, "grad_norm": 1.2221718815584264, "learning_rate": 1.0763110555756516e-07, "loss": 0.8406517505645752, "step": 7452 }, { "epoch": 1.7172811059907835, "grad_norm": 1.3773523411720476, "learning_rate": 1.0745922976228483e-07, "loss": 0.8827311992645264, "step": 7453 }, { "epoch": 1.717511520737327, "grad_norm": 1.2403910104019171, "learning_rate": 1.0728748351811567e-07, "loss": 0.585588812828064, "step": 7454 }, { "epoch": 1.717741935483871, "grad_norm": 0.9381679846122704, "learning_rate": 1.0711586684998631e-07, "loss": 0.6305320858955383, "step": 7455 }, { "epoch": 1.7179723502304147, "grad_norm": 1.0634674542520166, "learning_rate": 1.0694437978280701e-07, "loss": 0.7982319593429565, "step": 7456 }, { "epoch": 1.7182027649769585, "grad_norm": 1.3468349324058282, "learning_rate": 1.0677302234146879e-07, "loss": 0.7792943716049194, "step": 7457 }, { "epoch": 1.7184331797235024, "grad_norm": 1.308217346349807, "learning_rate": 1.0660179455084372e-07, "loss": 0.7019332051277161, "step": 7458 }, { "epoch": 1.7186635944700461, "grad_norm": 1.2330257329830192, "learning_rate": 1.0643069643578562e-07, "loss": 0.8088894486427307, "step": 7459 }, { "epoch": 1.7188940092165899, "grad_norm": 1.5573400915532798, "learning_rate": 1.0625972802112882e-07, "loss": 0.799231767654419, "step": 7460 }, { "epoch": 1.7191244239631336, "grad_norm": 0.950308854182165, "learning_rate": 1.0608888933168958e-07, "loss": 0.7265694737434387, "step": 7461 }, { "epoch": 1.7193548387096773, "grad_norm": 1.1717288459308963, "learning_rate": 1.0591818039226464e-07, "loss": 0.8566714525222778, "step": 7462 }, { "epoch": 1.7195852534562213, "grad_norm": 1.2255123057406947, "learning_rate": 1.0574760122763216e-07, "loss": 0.811874508857727, "step": 7463 }, { "epoch": 1.719815668202765, "grad_norm": 1.0493349652228454, "learning_rate": 1.0557715186255156e-07, "loss": 0.7990631461143494, "step": 7464 }, { "epoch": 1.7200460829493087, "grad_norm": 1.3183681626099089, "learning_rate": 1.0540683232176307e-07, "loss": 0.8108334541320801, "step": 7465 }, { "epoch": 1.7202764976958527, "grad_norm": 1.8420274096120763, "learning_rate": 1.0523664262998888e-07, "loss": 0.8927996158599854, "step": 7466 }, { "epoch": 1.7205069124423962, "grad_norm": 1.1733285346989661, "learning_rate": 1.0506658281193138e-07, "loss": 0.7277737855911255, "step": 7467 }, { "epoch": 1.7207373271889401, "grad_norm": 1.0503912207473127, "learning_rate": 1.0489665289227467e-07, "loss": 0.7229233980178833, "step": 7468 }, { "epoch": 1.7209677419354839, "grad_norm": 1.298634428768958, "learning_rate": 1.0472685289568373e-07, "loss": 0.7211846709251404, "step": 7469 }, { "epoch": 1.7211981566820276, "grad_norm": 1.1862135261022106, "learning_rate": 1.0455718284680504e-07, "loss": 0.8239504098892212, "step": 7470 }, { "epoch": 1.7214285714285715, "grad_norm": 1.2304377847970827, "learning_rate": 1.0438764277026579e-07, "loss": 0.7492972612380981, "step": 7471 }, { "epoch": 1.7216589861751153, "grad_norm": 1.3060072891774943, "learning_rate": 1.0421823269067442e-07, "loss": 0.7658303380012512, "step": 7472 }, { "epoch": 1.721889400921659, "grad_norm": 1.0618950256674606, "learning_rate": 1.0404895263262092e-07, "loss": 0.708244800567627, "step": 7473 }, { "epoch": 1.7221198156682027, "grad_norm": 1.1946101503339825, "learning_rate": 1.0387980262067575e-07, "loss": 0.7575969696044922, "step": 7474 }, { "epoch": 1.7223502304147464, "grad_norm": 1.3899740319803422, "learning_rate": 1.0371078267939082e-07, "loss": 0.7321910262107849, "step": 7475 }, { "epoch": 1.7225806451612904, "grad_norm": 1.3828231848460977, "learning_rate": 1.035418928332995e-07, "loss": 0.7812562584877014, "step": 7476 }, { "epoch": 1.7228110599078341, "grad_norm": 1.3136112254743646, "learning_rate": 1.0337313310691565e-07, "loss": 0.7272104620933533, "step": 7477 }, { "epoch": 1.7230414746543778, "grad_norm": 1.1508289944716614, "learning_rate": 1.032045035247343e-07, "loss": 0.7006442546844482, "step": 7478 }, { "epoch": 1.7232718894009218, "grad_norm": 1.138231534813956, "learning_rate": 1.0303600411123226e-07, "loss": 0.7082154750823975, "step": 7479 }, { "epoch": 1.7235023041474653, "grad_norm": 1.4157478972732351, "learning_rate": 1.0286763489086681e-07, "loss": 0.7204899191856384, "step": 7480 }, { "epoch": 1.7237327188940093, "grad_norm": 1.1954797848768004, "learning_rate": 1.026993958880763e-07, "loss": 0.9119626879692078, "step": 7481 }, { "epoch": 1.723963133640553, "grad_norm": 1.0923155592461768, "learning_rate": 1.0253128712728088e-07, "loss": 0.5961707830429077, "step": 7482 }, { "epoch": 1.7241935483870967, "grad_norm": 1.1032837677908203, "learning_rate": 1.023633086328809e-07, "loss": 0.7469611167907715, "step": 7483 }, { "epoch": 1.7244239631336407, "grad_norm": 1.2394445599695993, "learning_rate": 1.0219546042925841e-07, "loss": 0.8353795409202576, "step": 7484 }, { "epoch": 1.7246543778801844, "grad_norm": 1.120589163159477, "learning_rate": 1.0202774254077618e-07, "loss": 0.6587873101234436, "step": 7485 }, { "epoch": 1.7248847926267281, "grad_norm": 1.2182162589741892, "learning_rate": 1.0186015499177847e-07, "loss": 0.8595654964447021, "step": 7486 }, { "epoch": 1.7251152073732718, "grad_norm": 1.0966229129393803, "learning_rate": 1.0169269780659028e-07, "loss": 0.7683298587799072, "step": 7487 }, { "epoch": 1.7253456221198156, "grad_norm": 1.372358134101511, "learning_rate": 1.0152537100951786e-07, "loss": 0.888152003288269, "step": 7488 }, { "epoch": 1.7255760368663595, "grad_norm": 1.1162191205168919, "learning_rate": 1.013581746248482e-07, "loss": 0.7835309505462646, "step": 7489 }, { "epoch": 1.7258064516129032, "grad_norm": 1.4079534093347241, "learning_rate": 1.0119110867684999e-07, "loss": 0.9744646549224854, "step": 7490 }, { "epoch": 1.726036866359447, "grad_norm": 1.109483043922066, "learning_rate": 1.0102417318977251e-07, "loss": 0.6842091083526611, "step": 7491 }, { "epoch": 1.726267281105991, "grad_norm": 1.2357910065520838, "learning_rate": 1.0085736818784607e-07, "loss": 0.7435774207115173, "step": 7492 }, { "epoch": 1.7264976958525344, "grad_norm": 1.3316804792215136, "learning_rate": 1.0069069369528249e-07, "loss": 0.8430237770080566, "step": 7493 }, { "epoch": 1.7267281105990784, "grad_norm": 1.1766330255379311, "learning_rate": 1.0052414973627421e-07, "loss": 0.8203141689300537, "step": 7494 }, { "epoch": 1.726958525345622, "grad_norm": 1.291685708783942, "learning_rate": 1.0035773633499456e-07, "loss": 0.7491584420204163, "step": 7495 }, { "epoch": 1.7271889400921658, "grad_norm": 0.9475128549493947, "learning_rate": 1.0019145351559876e-07, "loss": 0.6738899946212769, "step": 7496 }, { "epoch": 1.7274193548387098, "grad_norm": 1.4107090522911332, "learning_rate": 1.0002530130222231e-07, "loss": 0.8628265857696533, "step": 7497 }, { "epoch": 1.7276497695852533, "grad_norm": 1.5650622568616335, "learning_rate": 9.985927971898178e-08, "loss": 1.0158125162124634, "step": 7498 }, { "epoch": 1.7278801843317972, "grad_norm": 1.2981782537446935, "learning_rate": 9.969338878997535e-08, "loss": 0.7269070148468018, "step": 7499 }, { "epoch": 1.728110599078341, "grad_norm": 1.3106792244331589, "learning_rate": 9.952762853928165e-08, "loss": 0.8769187927246094, "step": 7500 }, { "epoch": 1.7283410138248847, "grad_norm": 1.325563750244826, "learning_rate": 9.936199899096042e-08, "loss": 0.7841119170188904, "step": 7501 }, { "epoch": 1.7285714285714286, "grad_norm": 1.7907234255256992, "learning_rate": 9.91965001690529e-08, "loss": 0.9209425449371338, "step": 7502 }, { "epoch": 1.7288018433179724, "grad_norm": 1.110414701934764, "learning_rate": 9.903113209758096e-08, "loss": 0.7795250415802002, "step": 7503 }, { "epoch": 1.729032258064516, "grad_norm": 1.2158163264490913, "learning_rate": 9.886589480054741e-08, "loss": 0.7131094932556152, "step": 7504 }, { "epoch": 1.72926267281106, "grad_norm": 1.167789931248441, "learning_rate": 9.870078830193629e-08, "loss": 0.8090137839317322, "step": 7505 }, { "epoch": 1.7294930875576036, "grad_norm": 1.124104241227004, "learning_rate": 9.853581262571231e-08, "loss": 0.7797958850860596, "step": 7506 }, { "epoch": 1.7297235023041475, "grad_norm": 1.3470491669984355, "learning_rate": 9.83709677958221e-08, "loss": 0.6927989721298218, "step": 7507 }, { "epoch": 1.7299539170506912, "grad_norm": 1.152565458620573, "learning_rate": 9.820625383619219e-08, "loss": 0.8009092807769775, "step": 7508 }, { "epoch": 1.730184331797235, "grad_norm": 1.0970285369996284, "learning_rate": 9.804167077073056e-08, "loss": 0.761864423751831, "step": 7509 }, { "epoch": 1.730414746543779, "grad_norm": 1.5795757660336223, "learning_rate": 9.787721862332654e-08, "loss": 0.7459509372711182, "step": 7510 }, { "epoch": 1.7306451612903224, "grad_norm": 1.0401744024243509, "learning_rate": 9.771289741785005e-08, "loss": 0.8216449022293091, "step": 7511 }, { "epoch": 1.7308755760368664, "grad_norm": 1.3924364017238642, "learning_rate": 9.754870717815177e-08, "loss": 0.7860604524612427, "step": 7512 }, { "epoch": 1.73110599078341, "grad_norm": 1.146706612325942, "learning_rate": 9.738464792806422e-08, "loss": 0.7727769613265991, "step": 7513 }, { "epoch": 1.7313364055299538, "grad_norm": 1.2690787911964316, "learning_rate": 9.722071969140011e-08, "loss": 0.874458909034729, "step": 7514 }, { "epoch": 1.7315668202764978, "grad_norm": 1.1530798069952481, "learning_rate": 9.705692249195319e-08, "loss": 0.840191125869751, "step": 7515 }, { "epoch": 1.7317972350230415, "grad_norm": 1.1387350117516357, "learning_rate": 9.689325635349877e-08, "loss": 0.7169238924980164, "step": 7516 }, { "epoch": 1.7320276497695852, "grad_norm": 1.2478630540284088, "learning_rate": 9.672972129979273e-08, "loss": 0.7554492950439453, "step": 7517 }, { "epoch": 1.7322580645161292, "grad_norm": 1.2166706454141942, "learning_rate": 9.656631735457154e-08, "loss": 0.5734076499938965, "step": 7518 }, { "epoch": 1.7324884792626727, "grad_norm": 1.5466370383298045, "learning_rate": 9.640304454155369e-08, "loss": 0.7867637872695923, "step": 7519 }, { "epoch": 1.7327188940092166, "grad_norm": 1.2704443586099365, "learning_rate": 9.623990288443773e-08, "loss": 0.7330230474472046, "step": 7520 }, { "epoch": 1.7329493087557604, "grad_norm": 1.1352922714992866, "learning_rate": 9.607689240690319e-08, "loss": 0.7880058288574219, "step": 7521 }, { "epoch": 1.733179723502304, "grad_norm": 1.0605191939295662, "learning_rate": 9.591401313261139e-08, "loss": 0.796575665473938, "step": 7522 }, { "epoch": 1.733410138248848, "grad_norm": 1.4376273040997398, "learning_rate": 9.575126508520359e-08, "loss": 0.8101698160171509, "step": 7523 }, { "epoch": 1.7336405529953915, "grad_norm": 1.0868433692155355, "learning_rate": 9.55886482883026e-08, "loss": 0.7811597585678101, "step": 7524 }, { "epoch": 1.7338709677419355, "grad_norm": 1.1754841201094306, "learning_rate": 9.542616276551208e-08, "loss": 0.7680011987686157, "step": 7525 }, { "epoch": 1.7341013824884792, "grad_norm": 1.3670730603232781, "learning_rate": 9.526380854041638e-08, "loss": 0.8018794059753418, "step": 7526 }, { "epoch": 1.734331797235023, "grad_norm": 1.1232468645544793, "learning_rate": 9.510158563658133e-08, "loss": 0.7770500183105469, "step": 7527 }, { "epoch": 1.734562211981567, "grad_norm": 1.1848169541071576, "learning_rate": 9.493949407755309e-08, "loss": 0.7622300982475281, "step": 7528 }, { "epoch": 1.7347926267281106, "grad_norm": 1.5281654640943847, "learning_rate": 9.477753388685928e-08, "loss": 0.831570029258728, "step": 7529 }, { "epoch": 1.7350230414746544, "grad_norm": 1.1599086861943149, "learning_rate": 9.461570508800776e-08, "loss": 0.7987254858016968, "step": 7530 }, { "epoch": 1.7352534562211983, "grad_norm": 1.2752040500202788, "learning_rate": 9.44540077044883e-08, "loss": 0.8219848275184631, "step": 7531 }, { "epoch": 1.7354838709677418, "grad_norm": 1.298736989691398, "learning_rate": 9.429244175977092e-08, "loss": 0.8273369073867798, "step": 7532 }, { "epoch": 1.7357142857142858, "grad_norm": 1.2555474610105797, "learning_rate": 9.413100727730628e-08, "loss": 0.8241056203842163, "step": 7533 }, { "epoch": 1.7359447004608295, "grad_norm": 1.4118150886368108, "learning_rate": 9.396970428052697e-08, "loss": 0.6880715489387512, "step": 7534 }, { "epoch": 1.7361751152073732, "grad_norm": 1.092011806345561, "learning_rate": 9.380853279284551e-08, "loss": 0.7355446815490723, "step": 7535 }, { "epoch": 1.7364055299539172, "grad_norm": 1.2700711725839655, "learning_rate": 9.364749283765604e-08, "loss": 0.8835841417312622, "step": 7536 }, { "epoch": 1.7366359447004607, "grad_norm": 1.1984936737610834, "learning_rate": 9.348658443833313e-08, "loss": 0.80763840675354, "step": 7537 }, { "epoch": 1.7368663594470046, "grad_norm": 1.2855970061631397, "learning_rate": 9.332580761823227e-08, "loss": 0.7473145723342896, "step": 7538 }, { "epoch": 1.7370967741935484, "grad_norm": 1.2970951445867331, "learning_rate": 9.316516240069028e-08, "loss": 0.6618188619613647, "step": 7539 }, { "epoch": 1.737327188940092, "grad_norm": 1.3396426049949766, "learning_rate": 9.300464880902447e-08, "loss": 0.7432928085327148, "step": 7540 }, { "epoch": 1.737557603686636, "grad_norm": 1.1659381023507147, "learning_rate": 9.284426686653302e-08, "loss": 0.7915963530540466, "step": 7541 }, { "epoch": 1.7377880184331798, "grad_norm": 1.1552275821682043, "learning_rate": 9.26840165964955e-08, "loss": 0.6428440809249878, "step": 7542 }, { "epoch": 1.7380184331797235, "grad_norm": 1.1399241166482426, "learning_rate": 9.252389802217187e-08, "loss": 0.7142912149429321, "step": 7543 }, { "epoch": 1.7382488479262674, "grad_norm": 1.316337246157137, "learning_rate": 9.236391116680309e-08, "loss": 0.878044605255127, "step": 7544 }, { "epoch": 1.738479262672811, "grad_norm": 1.089416476430598, "learning_rate": 9.220405605361103e-08, "loss": 0.6861810684204102, "step": 7545 }, { "epoch": 1.738709677419355, "grad_norm": 1.3890455529154517, "learning_rate": 9.204433270579825e-08, "loss": 0.7638171911239624, "step": 7546 }, { "epoch": 1.7389400921658986, "grad_norm": 1.1532660265349828, "learning_rate": 9.188474114654876e-08, "loss": 0.7149873971939087, "step": 7547 }, { "epoch": 1.7391705069124423, "grad_norm": 1.1783502444227563, "learning_rate": 9.172528139902703e-08, "loss": 0.7249442338943481, "step": 7548 }, { "epoch": 1.7394009216589863, "grad_norm": 1.178650320628679, "learning_rate": 9.156595348637819e-08, "loss": 0.6846513748168945, "step": 7549 }, { "epoch": 1.7396313364055298, "grad_norm": 1.4706201914955974, "learning_rate": 9.140675743172843e-08, "loss": 0.9332281351089478, "step": 7550 }, { "epoch": 1.7398617511520738, "grad_norm": 1.1835891939139382, "learning_rate": 9.124769325818526e-08, "loss": 0.6878118515014648, "step": 7551 }, { "epoch": 1.7400921658986175, "grad_norm": 1.077038469987993, "learning_rate": 9.108876098883633e-08, "loss": 0.7695426344871521, "step": 7552 }, { "epoch": 1.7403225806451612, "grad_norm": 1.3278288479360603, "learning_rate": 9.09299606467503e-08, "loss": 0.7983303666114807, "step": 7553 }, { "epoch": 1.7405529953917052, "grad_norm": 1.4656214059917094, "learning_rate": 9.077129225497726e-08, "loss": 0.8158761262893677, "step": 7554 }, { "epoch": 1.7407834101382489, "grad_norm": 1.1519947124673093, "learning_rate": 9.061275583654748e-08, "loss": 0.8064214587211609, "step": 7555 }, { "epoch": 1.7410138248847926, "grad_norm": 1.2545881332280804, "learning_rate": 9.045435141447211e-08, "loss": 0.9058080911636353, "step": 7556 }, { "epoch": 1.7412442396313366, "grad_norm": 1.213639501339424, "learning_rate": 9.029607901174374e-08, "loss": 0.7392270565032959, "step": 7557 }, { "epoch": 1.74147465437788, "grad_norm": 1.0453486445607982, "learning_rate": 9.013793865133501e-08, "loss": 0.7114729881286621, "step": 7558 }, { "epoch": 1.741705069124424, "grad_norm": 1.2302263811033798, "learning_rate": 8.997993035620022e-08, "loss": 0.8675493597984314, "step": 7559 }, { "epoch": 1.7419354838709677, "grad_norm": 0.9934561818451934, "learning_rate": 8.98220541492738e-08, "loss": 0.8103020191192627, "step": 7560 }, { "epoch": 1.7421658986175115, "grad_norm": 1.2538115734834285, "learning_rate": 8.966431005347109e-08, "loss": 0.7339279651641846, "step": 7561 }, { "epoch": 1.7423963133640554, "grad_norm": 1.3510829475373114, "learning_rate": 8.950669809168887e-08, "loss": 0.6971707344055176, "step": 7562 }, { "epoch": 1.742626728110599, "grad_norm": 1.105458403928542, "learning_rate": 8.934921828680408e-08, "loss": 0.8633124232292175, "step": 7563 }, { "epoch": 1.7428571428571429, "grad_norm": 1.3082830118219664, "learning_rate": 8.919187066167466e-08, "loss": 0.7704664468765259, "step": 7564 }, { "epoch": 1.7430875576036866, "grad_norm": 1.1782653714880955, "learning_rate": 8.903465523913955e-08, "loss": 0.7063533067703247, "step": 7565 }, { "epoch": 1.7433179723502303, "grad_norm": 1.1177210535700517, "learning_rate": 8.887757204201817e-08, "loss": 0.7094486951828003, "step": 7566 }, { "epoch": 1.7435483870967743, "grad_norm": 1.4575572123890834, "learning_rate": 8.872062109311096e-08, "loss": 0.8743780255317688, "step": 7567 }, { "epoch": 1.743778801843318, "grad_norm": 1.5827740898240907, "learning_rate": 8.856380241519935e-08, "loss": 0.7282687425613403, "step": 7568 }, { "epoch": 1.7440092165898617, "grad_norm": 1.105316538989134, "learning_rate": 8.840711603104523e-08, "loss": 0.7507487535476685, "step": 7569 }, { "epoch": 1.7442396313364057, "grad_norm": 1.2820028807325874, "learning_rate": 8.82505619633912e-08, "loss": 0.807691216468811, "step": 7570 }, { "epoch": 1.7444700460829492, "grad_norm": 1.3537034886290398, "learning_rate": 8.809414023496142e-08, "loss": 0.8650702238082886, "step": 7571 }, { "epoch": 1.7447004608294931, "grad_norm": 0.9602033366804331, "learning_rate": 8.793785086845984e-08, "loss": 0.6872273683547974, "step": 7572 }, { "epoch": 1.7449308755760369, "grad_norm": 1.0979215212634434, "learning_rate": 8.778169388657163e-08, "loss": 0.7242698669433594, "step": 7573 }, { "epoch": 1.7451612903225806, "grad_norm": 1.0962988735603825, "learning_rate": 8.762566931196313e-08, "loss": 0.741705060005188, "step": 7574 }, { "epoch": 1.7453917050691246, "grad_norm": 1.06231801843056, "learning_rate": 8.746977716728099e-08, "loss": 0.7293061017990112, "step": 7575 }, { "epoch": 1.745622119815668, "grad_norm": 1.0145801945512316, "learning_rate": 8.731401747515244e-08, "loss": 0.8385475277900696, "step": 7576 }, { "epoch": 1.745852534562212, "grad_norm": 1.4891647422185605, "learning_rate": 8.715839025818617e-08, "loss": 0.8484489917755127, "step": 7577 }, { "epoch": 1.7460829493087557, "grad_norm": 1.1930293813449155, "learning_rate": 8.7002895538971e-08, "loss": 0.6511530876159668, "step": 7578 }, { "epoch": 1.7463133640552995, "grad_norm": 1.4360732745608953, "learning_rate": 8.684753334007688e-08, "loss": 0.8274673223495483, "step": 7579 }, { "epoch": 1.7465437788018434, "grad_norm": 1.081237944644138, "learning_rate": 8.669230368405456e-08, "loss": 0.7367755174636841, "step": 7580 }, { "epoch": 1.7467741935483871, "grad_norm": 1.2748877435171337, "learning_rate": 8.653720659343522e-08, "loss": 0.80199134349823, "step": 7581 }, { "epoch": 1.7470046082949309, "grad_norm": 1.1988639104811598, "learning_rate": 8.638224209073097e-08, "loss": 0.7782701253890991, "step": 7582 }, { "epoch": 1.7472350230414746, "grad_norm": 1.3660035419508034, "learning_rate": 8.622741019843504e-08, "loss": 0.7613752484321594, "step": 7583 }, { "epoch": 1.7474654377880183, "grad_norm": 1.3599194483251544, "learning_rate": 8.60727109390208e-08, "loss": 0.8213690519332886, "step": 7584 }, { "epoch": 1.7476958525345623, "grad_norm": 1.1411507368613496, "learning_rate": 8.59181443349426e-08, "loss": 0.7064045667648315, "step": 7585 }, { "epoch": 1.747926267281106, "grad_norm": 1.1189241999598565, "learning_rate": 8.576371040863573e-08, "loss": 0.6686617136001587, "step": 7586 }, { "epoch": 1.7481566820276497, "grad_norm": 1.0194951619872286, "learning_rate": 8.560940918251592e-08, "loss": 0.7520097494125366, "step": 7587 }, { "epoch": 1.7483870967741937, "grad_norm": 1.0822685191965165, "learning_rate": 8.545524067897991e-08, "loss": 0.8176038265228271, "step": 7588 }, { "epoch": 1.7486175115207372, "grad_norm": 1.3408318725531652, "learning_rate": 8.530120492040505e-08, "loss": 0.6680614948272705, "step": 7589 }, { "epoch": 1.7488479262672811, "grad_norm": 1.3621846138568519, "learning_rate": 8.514730192914921e-08, "loss": 0.7421592473983765, "step": 7590 }, { "epoch": 1.7490783410138249, "grad_norm": 1.2822263575200588, "learning_rate": 8.499353172755164e-08, "loss": 0.8869342803955078, "step": 7591 }, { "epoch": 1.7493087557603686, "grad_norm": 1.1206823186662898, "learning_rate": 8.48398943379316e-08, "loss": 0.6850584745407104, "step": 7592 }, { "epoch": 1.7495391705069125, "grad_norm": 1.0932592535391596, "learning_rate": 8.468638978258914e-08, "loss": 0.7433363199234009, "step": 7593 }, { "epoch": 1.7497695852534563, "grad_norm": 1.0269953798613225, "learning_rate": 8.453301808380564e-08, "loss": 0.7744357585906982, "step": 7594 }, { "epoch": 1.75, "grad_norm": 1.382126107142446, "learning_rate": 8.437977926384277e-08, "loss": 0.8236217498779297, "step": 7595 }, { "epoch": 1.7502304147465437, "grad_norm": 1.3329245666066865, "learning_rate": 8.422667334494249e-08, "loss": 0.8552603721618652, "step": 7596 }, { "epoch": 1.7504608294930875, "grad_norm": 1.4100651978644374, "learning_rate": 8.407370034932859e-08, "loss": 0.7755998373031616, "step": 7597 }, { "epoch": 1.7506912442396314, "grad_norm": 1.3033243035055457, "learning_rate": 8.392086029920442e-08, "loss": 0.8105130195617676, "step": 7598 }, { "epoch": 1.7509216589861751, "grad_norm": 1.290928258750675, "learning_rate": 8.376815321675457e-08, "loss": 0.8787405490875244, "step": 7599 }, { "epoch": 1.7511520737327189, "grad_norm": 1.1296910155342912, "learning_rate": 8.361557912414441e-08, "loss": 0.6107788681983948, "step": 7600 }, { "epoch": 1.7513824884792628, "grad_norm": 0.9941949428855014, "learning_rate": 8.34631380435199e-08, "loss": 0.6825795769691467, "step": 7601 }, { "epoch": 1.7516129032258063, "grad_norm": 1.5141115638242784, "learning_rate": 8.331082999700734e-08, "loss": 0.7069272994995117, "step": 7602 }, { "epoch": 1.7518433179723503, "grad_norm": 1.5687921139560086, "learning_rate": 8.315865500671449e-08, "loss": 0.7784801721572876, "step": 7603 }, { "epoch": 1.752073732718894, "grad_norm": 1.0771300382051838, "learning_rate": 8.300661309472912e-08, "loss": 0.7653795480728149, "step": 7604 }, { "epoch": 1.7523041474654377, "grad_norm": 1.5582480598587298, "learning_rate": 8.285470428311991e-08, "loss": 0.7386122941970825, "step": 7605 }, { "epoch": 1.7525345622119817, "grad_norm": 0.9515219540238303, "learning_rate": 8.270292859393613e-08, "loss": 0.7828700542449951, "step": 7606 }, { "epoch": 1.7527649769585254, "grad_norm": 1.5500733851956912, "learning_rate": 8.255128604920792e-08, "loss": 0.8955565094947815, "step": 7607 }, { "epoch": 1.7529953917050691, "grad_norm": 1.2505809950313513, "learning_rate": 8.2399776670946e-08, "loss": 0.9071576595306396, "step": 7608 }, { "epoch": 1.7532258064516129, "grad_norm": 1.3402860152327503, "learning_rate": 8.22484004811419e-08, "loss": 0.752417802810669, "step": 7609 }, { "epoch": 1.7534562211981566, "grad_norm": 1.367440429282924, "learning_rate": 8.209715750176727e-08, "loss": 0.8611370325088501, "step": 7610 }, { "epoch": 1.7536866359447005, "grad_norm": 1.232351895452084, "learning_rate": 8.19460477547752e-08, "loss": 0.745223879814148, "step": 7611 }, { "epoch": 1.7539170506912443, "grad_norm": 1.0415704016806513, "learning_rate": 8.179507126209906e-08, "loss": 0.7799668908119202, "step": 7612 }, { "epoch": 1.754147465437788, "grad_norm": 1.3761849870920217, "learning_rate": 8.164422804565263e-08, "loss": 0.8177207708358765, "step": 7613 }, { "epoch": 1.754377880184332, "grad_norm": 1.2017347256018391, "learning_rate": 8.149351812733085e-08, "loss": 0.7111436128616333, "step": 7614 }, { "epoch": 1.7546082949308754, "grad_norm": 1.2253776843179969, "learning_rate": 8.1342941529009e-08, "loss": 0.6840728521347046, "step": 7615 }, { "epoch": 1.7548387096774194, "grad_norm": 1.02983629791633, "learning_rate": 8.119249827254281e-08, "loss": 0.6115491986274719, "step": 7616 }, { "epoch": 1.7550691244239631, "grad_norm": 1.3870391302655596, "learning_rate": 8.104218837976939e-08, "loss": 0.7149351239204407, "step": 7617 }, { "epoch": 1.7552995391705069, "grad_norm": 1.2174150358988711, "learning_rate": 8.089201187250571e-08, "loss": 0.688147783279419, "step": 7618 }, { "epoch": 1.7555299539170508, "grad_norm": 1.2630937737290178, "learning_rate": 8.074196877254969e-08, "loss": 0.8092058300971985, "step": 7619 }, { "epoch": 1.7557603686635943, "grad_norm": 0.8375696110242734, "learning_rate": 8.05920591016801e-08, "loss": 0.7375935912132263, "step": 7620 }, { "epoch": 1.7559907834101383, "grad_norm": 1.1868565460321117, "learning_rate": 8.044228288165599e-08, "loss": 0.6793934106826782, "step": 7621 }, { "epoch": 1.756221198156682, "grad_norm": 1.2102446264436708, "learning_rate": 8.0292640134217e-08, "loss": 0.7395757436752319, "step": 7622 }, { "epoch": 1.7564516129032257, "grad_norm": 0.9259939168277553, "learning_rate": 8.014313088108394e-08, "loss": 0.546409010887146, "step": 7623 }, { "epoch": 1.7566820276497697, "grad_norm": 1.4575552468425101, "learning_rate": 7.999375514395778e-08, "loss": 0.7790534496307373, "step": 7624 }, { "epoch": 1.7569124423963134, "grad_norm": 1.0896798964233478, "learning_rate": 7.984451294452e-08, "loss": 0.7398231625556946, "step": 7625 }, { "epoch": 1.7571428571428571, "grad_norm": 1.2623646343227142, "learning_rate": 7.969540430443311e-08, "loss": 0.7414441108703613, "step": 7626 }, { "epoch": 1.757373271889401, "grad_norm": 1.1312110923091452, "learning_rate": 7.954642924533994e-08, "loss": 0.7548750638961792, "step": 7627 }, { "epoch": 1.7576036866359446, "grad_norm": 0.957909042850816, "learning_rate": 7.939758778886385e-08, "loss": 0.7546773552894592, "step": 7628 }, { "epoch": 1.7578341013824885, "grad_norm": 1.1252175485529645, "learning_rate": 7.924887995660945e-08, "loss": 0.7373867630958557, "step": 7629 }, { "epoch": 1.7580645161290323, "grad_norm": 0.9815120449405607, "learning_rate": 7.910030577016113e-08, "loss": 0.7271026968955994, "step": 7630 }, { "epoch": 1.758294930875576, "grad_norm": 1.3179911972781693, "learning_rate": 7.89518652510841e-08, "loss": 0.8723413944244385, "step": 7631 }, { "epoch": 1.75852534562212, "grad_norm": 1.3060473211580457, "learning_rate": 7.880355842092468e-08, "loss": 0.8282548189163208, "step": 7632 }, { "epoch": 1.7587557603686634, "grad_norm": 1.1089249458958528, "learning_rate": 7.865538530120918e-08, "loss": 0.7436991930007935, "step": 7633 }, { "epoch": 1.7589861751152074, "grad_norm": 1.0884201833829175, "learning_rate": 7.850734591344488e-08, "loss": 0.7750650644302368, "step": 7634 }, { "epoch": 1.7592165898617511, "grad_norm": 1.1544057740235625, "learning_rate": 7.835944027911957e-08, "loss": 0.6824958324432373, "step": 7635 }, { "epoch": 1.7594470046082948, "grad_norm": 1.1607504467923393, "learning_rate": 7.821166841970107e-08, "loss": 0.8500322103500366, "step": 7636 }, { "epoch": 1.7596774193548388, "grad_norm": 1.3527797330475602, "learning_rate": 7.806403035663889e-08, "loss": 0.7111128568649292, "step": 7637 }, { "epoch": 1.7599078341013825, "grad_norm": 1.1877365592337052, "learning_rate": 7.791652611136212e-08, "loss": 0.7320532202720642, "step": 7638 }, { "epoch": 1.7601382488479262, "grad_norm": 1.2292449607917257, "learning_rate": 7.776915570528076e-08, "loss": 0.8439149856567383, "step": 7639 }, { "epoch": 1.7603686635944702, "grad_norm": 1.0358127598823044, "learning_rate": 7.762191915978578e-08, "loss": 0.6489125490188599, "step": 7640 }, { "epoch": 1.7605990783410137, "grad_norm": 1.0601387517448153, "learning_rate": 7.74748164962482e-08, "loss": 0.8118616342544556, "step": 7641 }, { "epoch": 1.7608294930875577, "grad_norm": 1.0728808336951121, "learning_rate": 7.732784773601953e-08, "loss": 0.7528102397918701, "step": 7642 }, { "epoch": 1.7610599078341014, "grad_norm": 1.0747138773482348, "learning_rate": 7.718101290043244e-08, "loss": 0.7856849431991577, "step": 7643 }, { "epoch": 1.761290322580645, "grad_norm": 1.3272410706900097, "learning_rate": 7.703431201079969e-08, "loss": 0.8001973628997803, "step": 7644 }, { "epoch": 1.761520737327189, "grad_norm": 1.2721768499077395, "learning_rate": 7.688774508841478e-08, "loss": 0.8332471251487732, "step": 7645 }, { "epoch": 1.7617511520737326, "grad_norm": 1.2453092502270793, "learning_rate": 7.67413121545516e-08, "loss": 0.7534940242767334, "step": 7646 }, { "epoch": 1.7619815668202765, "grad_norm": 1.1709183629535718, "learning_rate": 7.65950132304647e-08, "loss": 0.737503170967102, "step": 7647 }, { "epoch": 1.7622119815668202, "grad_norm": 1.0899482570396566, "learning_rate": 7.644884833738896e-08, "loss": 0.7407201528549194, "step": 7648 }, { "epoch": 1.762442396313364, "grad_norm": 1.0059187913201149, "learning_rate": 7.630281749654055e-08, "loss": 0.7589377164840698, "step": 7649 }, { "epoch": 1.762672811059908, "grad_norm": 1.2624576054967966, "learning_rate": 7.615692072911528e-08, "loss": 0.6586496829986572, "step": 7650 }, { "epoch": 1.7629032258064516, "grad_norm": 1.1572434060771926, "learning_rate": 7.601115805628977e-08, "loss": 0.705591082572937, "step": 7651 }, { "epoch": 1.7631336405529954, "grad_norm": 1.2006722437863475, "learning_rate": 7.586552949922176e-08, "loss": 0.7889619469642639, "step": 7652 }, { "epoch": 1.7633640552995393, "grad_norm": 1.0348577197525213, "learning_rate": 7.572003507904868e-08, "loss": 0.6912282705307007, "step": 7653 }, { "epoch": 1.7635944700460828, "grad_norm": 1.1101374555344716, "learning_rate": 7.557467481688873e-08, "loss": 0.7374964952468872, "step": 7654 }, { "epoch": 1.7638248847926268, "grad_norm": 1.1479262514291408, "learning_rate": 7.542944873384105e-08, "loss": 0.7302298545837402, "step": 7655 }, { "epoch": 1.7640552995391705, "grad_norm": 1.2653276061660264, "learning_rate": 7.5284356850985e-08, "loss": 0.8323671817779541, "step": 7656 }, { "epoch": 1.7642857142857142, "grad_norm": 1.0548505840987745, "learning_rate": 7.513939918938028e-08, "loss": 0.6654655933380127, "step": 7657 }, { "epoch": 1.7645161290322582, "grad_norm": 1.1231001283574193, "learning_rate": 7.499457577006751e-08, "loss": 0.6371186375617981, "step": 7658 }, { "epoch": 1.7647465437788017, "grad_norm": 1.3299088323872645, "learning_rate": 7.484988661406733e-08, "loss": 0.7761695384979248, "step": 7659 }, { "epoch": 1.7649769585253456, "grad_norm": 1.1268786347378037, "learning_rate": 7.470533174238158e-08, "loss": 0.779335618019104, "step": 7660 }, { "epoch": 1.7652073732718894, "grad_norm": 1.26329747548588, "learning_rate": 7.456091117599195e-08, "loss": 0.7642731666564941, "step": 7661 }, { "epoch": 1.765437788018433, "grad_norm": 1.417392503393573, "learning_rate": 7.441662493586076e-08, "loss": 0.7490801215171814, "step": 7662 }, { "epoch": 1.765668202764977, "grad_norm": 1.6109060172749883, "learning_rate": 7.427247304293139e-08, "loss": 0.9480686187744141, "step": 7663 }, { "epoch": 1.7658986175115208, "grad_norm": 1.243245001745715, "learning_rate": 7.412845551812707e-08, "loss": 0.6208070516586304, "step": 7664 }, { "epoch": 1.7661290322580645, "grad_norm": 1.2606477635417679, "learning_rate": 7.398457238235167e-08, "loss": 0.7782050371170044, "step": 7665 }, { "epoch": 1.7663594470046085, "grad_norm": 1.1494295384377444, "learning_rate": 7.38408236564897e-08, "loss": 0.6725378632545471, "step": 7666 }, { "epoch": 1.766589861751152, "grad_norm": 1.4030647180836417, "learning_rate": 7.369720936140611e-08, "loss": 0.8247120380401611, "step": 7667 }, { "epoch": 1.766820276497696, "grad_norm": 1.2966757041323174, "learning_rate": 7.355372951794614e-08, "loss": 0.7866288423538208, "step": 7668 }, { "epoch": 1.7670506912442396, "grad_norm": 1.5029385474750363, "learning_rate": 7.341038414693613e-08, "loss": 0.8096400499343872, "step": 7669 }, { "epoch": 1.7672811059907834, "grad_norm": 1.5152361583075085, "learning_rate": 7.326717326918208e-08, "loss": 0.7799873352050781, "step": 7670 }, { "epoch": 1.7675115207373273, "grad_norm": 1.0568101452951337, "learning_rate": 7.312409690547095e-08, "loss": 0.809285044670105, "step": 7671 }, { "epoch": 1.7677419354838708, "grad_norm": 1.351048640166805, "learning_rate": 7.298115507657021e-08, "loss": 0.874248743057251, "step": 7672 }, { "epoch": 1.7679723502304148, "grad_norm": 1.1594085684678137, "learning_rate": 7.283834780322761e-08, "loss": 0.7418022155761719, "step": 7673 }, { "epoch": 1.7682027649769585, "grad_norm": 1.2895302232300179, "learning_rate": 7.269567510617126e-08, "loss": 0.720660388469696, "step": 7674 }, { "epoch": 1.7684331797235022, "grad_norm": 1.241628438381412, "learning_rate": 7.255313700611032e-08, "loss": 0.7655429840087891, "step": 7675 }, { "epoch": 1.7686635944700462, "grad_norm": 1.125747625986026, "learning_rate": 7.241073352373361e-08, "loss": 0.7303705215454102, "step": 7676 }, { "epoch": 1.76889400921659, "grad_norm": 1.1695690935051566, "learning_rate": 7.226846467971093e-08, "loss": 0.7997909188270569, "step": 7677 }, { "epoch": 1.7691244239631336, "grad_norm": 1.261135372954414, "learning_rate": 7.212633049469264e-08, "loss": 0.6546763181686401, "step": 7678 }, { "epoch": 1.7693548387096776, "grad_norm": 0.9669222373383191, "learning_rate": 7.1984330989309e-08, "loss": 0.6374444961547852, "step": 7679 }, { "epoch": 1.769585253456221, "grad_norm": 1.2966171484977755, "learning_rate": 7.184246618417111e-08, "loss": 0.7092937231063843, "step": 7680 }, { "epoch": 1.769815668202765, "grad_norm": 1.3237517845156634, "learning_rate": 7.17007360998706e-08, "loss": 0.7702305316925049, "step": 7681 }, { "epoch": 1.7700460829493088, "grad_norm": 0.978090031115468, "learning_rate": 7.155914075697933e-08, "loss": 0.7763724327087402, "step": 7682 }, { "epoch": 1.7702764976958525, "grad_norm": 0.9935287090208255, "learning_rate": 7.141768017604966e-08, "loss": 0.6409577131271362, "step": 7683 }, { "epoch": 1.7705069124423964, "grad_norm": 1.2265488041489598, "learning_rate": 7.127635437761459e-08, "loss": 0.7500795125961304, "step": 7684 }, { "epoch": 1.77073732718894, "grad_norm": 1.405023681248552, "learning_rate": 7.113516338218717e-08, "loss": 0.7312004566192627, "step": 7685 }, { "epoch": 1.770967741935484, "grad_norm": 0.910138776962328, "learning_rate": 7.099410721026112e-08, "loss": 0.823514997959137, "step": 7686 }, { "epoch": 1.7711981566820276, "grad_norm": 1.4146285511420962, "learning_rate": 7.085318588231048e-08, "loss": 0.9504063129425049, "step": 7687 }, { "epoch": 1.7714285714285714, "grad_norm": 0.8614868773221174, "learning_rate": 7.071239941878981e-08, "loss": 0.7850733399391174, "step": 7688 }, { "epoch": 1.7716589861751153, "grad_norm": 1.356738665999072, "learning_rate": 7.057174784013431e-08, "loss": 0.9447094798088074, "step": 7689 }, { "epoch": 1.771889400921659, "grad_norm": 1.134179637006652, "learning_rate": 7.04312311667592e-08, "loss": 0.6675062775611877, "step": 7690 }, { "epoch": 1.7721198156682028, "grad_norm": 0.9395193655643466, "learning_rate": 7.029084941906005e-08, "loss": 0.6875232458114624, "step": 7691 }, { "epoch": 1.7723502304147467, "grad_norm": 1.3573723926231736, "learning_rate": 7.015060261741357e-08, "loss": 0.7847919464111328, "step": 7692 }, { "epoch": 1.7725806451612902, "grad_norm": 1.300014614678359, "learning_rate": 7.001049078217613e-08, "loss": 0.7924584150314331, "step": 7693 }, { "epoch": 1.7728110599078342, "grad_norm": 1.4499718780004744, "learning_rate": 6.987051393368471e-08, "loss": 0.8802344799041748, "step": 7694 }, { "epoch": 1.773041474654378, "grad_norm": 1.425988233405148, "learning_rate": 6.973067209225692e-08, "loss": 0.7038631439208984, "step": 7695 }, { "epoch": 1.7732718894009216, "grad_norm": 1.1226859696380713, "learning_rate": 6.959096527819064e-08, "loss": 0.9016700387001038, "step": 7696 }, { "epoch": 1.7735023041474656, "grad_norm": 1.1967072079572705, "learning_rate": 6.945139351176387e-08, "loss": 0.7678165435791016, "step": 7697 }, { "epoch": 1.773732718894009, "grad_norm": 1.1001980127511188, "learning_rate": 6.931195681323565e-08, "loss": 0.6612143516540527, "step": 7698 }, { "epoch": 1.773963133640553, "grad_norm": 1.3968871696274494, "learning_rate": 6.917265520284476e-08, "loss": 0.840233325958252, "step": 7699 }, { "epoch": 1.7741935483870968, "grad_norm": 1.3698339080168875, "learning_rate": 6.90334887008106e-08, "loss": 0.7913506031036377, "step": 7700 }, { "epoch": 1.7744239631336405, "grad_norm": 1.3434994536689218, "learning_rate": 6.889445732733323e-08, "loss": 0.7523634433746338, "step": 7701 }, { "epoch": 1.7746543778801844, "grad_norm": 1.1357027982798495, "learning_rate": 6.875556110259273e-08, "loss": 0.7009792327880859, "step": 7702 }, { "epoch": 1.7748847926267282, "grad_norm": 0.9926018792518734, "learning_rate": 6.861680004674963e-08, "loss": 0.6533738970756531, "step": 7703 }, { "epoch": 1.7751152073732719, "grad_norm": 1.0969556014291875, "learning_rate": 6.847817417994517e-08, "loss": 0.860493540763855, "step": 7704 }, { "epoch": 1.7753456221198156, "grad_norm": 1.3425565367947665, "learning_rate": 6.833968352230057e-08, "loss": 0.810010552406311, "step": 7705 }, { "epoch": 1.7755760368663593, "grad_norm": 1.2400741621258158, "learning_rate": 6.820132809391743e-08, "loss": 0.8443198204040527, "step": 7706 }, { "epoch": 1.7758064516129033, "grad_norm": 1.1086679828690398, "learning_rate": 6.806310791487813e-08, "loss": 0.758772611618042, "step": 7707 }, { "epoch": 1.776036866359447, "grad_norm": 1.2474164003496853, "learning_rate": 6.792502300524472e-08, "loss": 0.8438040614128113, "step": 7708 }, { "epoch": 1.7762672811059907, "grad_norm": 1.154420265010753, "learning_rate": 6.778707338506051e-08, "loss": 0.7727431058883667, "step": 7709 }, { "epoch": 1.7764976958525347, "grad_norm": 1.6420516256349273, "learning_rate": 6.764925907434849e-08, "loss": 0.8118282556533813, "step": 7710 }, { "epoch": 1.7767281105990782, "grad_norm": 1.22888062854885, "learning_rate": 6.75115800931122e-08, "loss": 0.7667281627655029, "step": 7711 }, { "epoch": 1.7769585253456222, "grad_norm": 1.2558357954388057, "learning_rate": 6.737403646133566e-08, "loss": 0.7824913263320923, "step": 7712 }, { "epoch": 1.7771889400921659, "grad_norm": 1.176254722115087, "learning_rate": 6.723662819898312e-08, "loss": 0.7318419218063354, "step": 7713 }, { "epoch": 1.7774193548387096, "grad_norm": 1.2059569400095187, "learning_rate": 6.709935532599897e-08, "loss": 0.7060009241104126, "step": 7714 }, { "epoch": 1.7776497695852536, "grad_norm": 1.3093811884607869, "learning_rate": 6.69622178623086e-08, "loss": 0.7367588877677917, "step": 7715 }, { "epoch": 1.7778801843317973, "grad_norm": 1.3618967587860527, "learning_rate": 6.682521582781708e-08, "loss": 0.7340742349624634, "step": 7716 }, { "epoch": 1.778110599078341, "grad_norm": 1.257394780772999, "learning_rate": 6.668834924240995e-08, "loss": 0.6655991077423096, "step": 7717 }, { "epoch": 1.7783410138248847, "grad_norm": 1.3379718118337083, "learning_rate": 6.655161812595367e-08, "loss": 0.7562434673309326, "step": 7718 }, { "epoch": 1.7785714285714285, "grad_norm": 1.2416548769934193, "learning_rate": 6.641502249829423e-08, "loss": 0.8078730702400208, "step": 7719 }, { "epoch": 1.7788018433179724, "grad_norm": 1.1920319583326109, "learning_rate": 6.627856237925811e-08, "loss": 0.6285899877548218, "step": 7720 }, { "epoch": 1.7790322580645161, "grad_norm": 1.1055337731409536, "learning_rate": 6.61422377886528e-08, "loss": 0.6633951663970947, "step": 7721 }, { "epoch": 1.7792626728110599, "grad_norm": 1.0697990396462347, "learning_rate": 6.600604874626548e-08, "loss": 0.7273050546646118, "step": 7722 }, { "epoch": 1.7794930875576038, "grad_norm": 1.2680575632659172, "learning_rate": 6.586999527186354e-08, "loss": 0.6665729284286499, "step": 7723 }, { "epoch": 1.7797235023041473, "grad_norm": 1.275935674563519, "learning_rate": 6.573407738519531e-08, "loss": 0.7332675457000732, "step": 7724 }, { "epoch": 1.7799539170506913, "grad_norm": 1.0778234517601935, "learning_rate": 6.559829510598892e-08, "loss": 0.7439071536064148, "step": 7725 }, { "epoch": 1.780184331797235, "grad_norm": 1.3635129938987167, "learning_rate": 6.546264845395299e-08, "loss": 0.7104752063751221, "step": 7726 }, { "epoch": 1.7804147465437787, "grad_norm": 1.2639306988819587, "learning_rate": 6.53271374487765e-08, "loss": 0.7792220115661621, "step": 7727 }, { "epoch": 1.7806451612903227, "grad_norm": 1.0938522733418012, "learning_rate": 6.519176211012867e-08, "loss": 0.6379693746566772, "step": 7728 }, { "epoch": 1.7808755760368664, "grad_norm": 1.3289044633653213, "learning_rate": 6.505652245765881e-08, "loss": 0.7737444639205933, "step": 7729 }, { "epoch": 1.7811059907834101, "grad_norm": 1.1550683939038542, "learning_rate": 6.49214185109973e-08, "loss": 0.7681130170822144, "step": 7730 }, { "epoch": 1.7813364055299539, "grad_norm": 1.4083081227680676, "learning_rate": 6.478645028975372e-08, "loss": 0.8718420267105103, "step": 7731 }, { "epoch": 1.7815668202764976, "grad_norm": 1.1823677205039174, "learning_rate": 6.465161781351914e-08, "loss": 0.7557366490364075, "step": 7732 }, { "epoch": 1.7817972350230415, "grad_norm": 1.1999869902911706, "learning_rate": 6.45169211018638e-08, "loss": 0.6794936656951904, "step": 7733 }, { "epoch": 1.7820276497695853, "grad_norm": 1.2764239528790797, "learning_rate": 6.438236017433895e-08, "loss": 0.8390437364578247, "step": 7734 }, { "epoch": 1.782258064516129, "grad_norm": 1.134383511808464, "learning_rate": 6.424793505047599e-08, "loss": 0.8024254441261292, "step": 7735 }, { "epoch": 1.782488479262673, "grad_norm": 0.8536836629483899, "learning_rate": 6.411364574978651e-08, "loss": 0.6382162570953369, "step": 7736 }, { "epoch": 1.7827188940092165, "grad_norm": 1.1757601346145792, "learning_rate": 6.397949229176225e-08, "loss": 0.6832011938095093, "step": 7737 }, { "epoch": 1.7829493087557604, "grad_norm": 1.653357486541517, "learning_rate": 6.384547469587564e-08, "loss": 0.9003958702087402, "step": 7738 }, { "epoch": 1.7831797235023041, "grad_norm": 1.1523951728047304, "learning_rate": 6.371159298157913e-08, "loss": 0.7030328512191772, "step": 7739 }, { "epoch": 1.7834101382488479, "grad_norm": 1.2390057793357907, "learning_rate": 6.357784716830528e-08, "loss": 0.8153259754180908, "step": 7740 }, { "epoch": 1.7836405529953918, "grad_norm": 1.4244568607420958, "learning_rate": 6.344423727546744e-08, "loss": 0.8229082226753235, "step": 7741 }, { "epoch": 1.7838709677419353, "grad_norm": 1.3055755817113595, "learning_rate": 6.331076332245866e-08, "loss": 0.7306294441223145, "step": 7742 }, { "epoch": 1.7841013824884793, "grad_norm": 1.1085692686400792, "learning_rate": 6.317742532865233e-08, "loss": 0.7613078951835632, "step": 7743 }, { "epoch": 1.784331797235023, "grad_norm": 1.4694829399841158, "learning_rate": 6.304422331340275e-08, "loss": 0.9164611101150513, "step": 7744 }, { "epoch": 1.7845622119815667, "grad_norm": 1.4076564642652605, "learning_rate": 6.29111572960439e-08, "loss": 0.8770956993103027, "step": 7745 }, { "epoch": 1.7847926267281107, "grad_norm": 1.3274833988945276, "learning_rate": 6.277822729588989e-08, "loss": 0.7482821941375732, "step": 7746 }, { "epoch": 1.7850230414746544, "grad_norm": 1.3149565308569835, "learning_rate": 6.264543333223549e-08, "loss": 0.7850298881530762, "step": 7747 }, { "epoch": 1.7852534562211981, "grad_norm": 1.0844733877563915, "learning_rate": 6.251277542435552e-08, "loss": 0.5781385898590088, "step": 7748 }, { "epoch": 1.785483870967742, "grad_norm": 1.2619844590894689, "learning_rate": 6.238025359150501e-08, "loss": 0.8217513561248779, "step": 7749 }, { "epoch": 1.7857142857142856, "grad_norm": 1.2512912228822737, "learning_rate": 6.224786785291969e-08, "loss": 0.8500482439994812, "step": 7750 }, { "epoch": 1.7859447004608295, "grad_norm": 1.458025138254964, "learning_rate": 6.211561822781474e-08, "loss": 0.8146470785140991, "step": 7751 }, { "epoch": 1.7861751152073733, "grad_norm": 1.248354775738917, "learning_rate": 6.198350473538616e-08, "loss": 0.7351702451705933, "step": 7752 }, { "epoch": 1.786405529953917, "grad_norm": 1.2620887228989164, "learning_rate": 6.185152739481026e-08, "loss": 0.7993056774139404, "step": 7753 }, { "epoch": 1.786635944700461, "grad_norm": 1.4031526672609798, "learning_rate": 6.171968622524315e-08, "loss": 0.8570160865783691, "step": 7754 }, { "epoch": 1.7868663594470044, "grad_norm": 1.1948359150749444, "learning_rate": 6.158798124582143e-08, "loss": 0.6200212836265564, "step": 7755 }, { "epoch": 1.7870967741935484, "grad_norm": 1.2592084852014216, "learning_rate": 6.145641247566202e-08, "loss": 0.8196465373039246, "step": 7756 }, { "epoch": 1.7873271889400921, "grad_norm": 0.9917037331823602, "learning_rate": 6.132497993386165e-08, "loss": 0.7038032412528992, "step": 7757 }, { "epoch": 1.7875576036866359, "grad_norm": 1.2428262727857045, "learning_rate": 6.119368363949806e-08, "loss": 0.7222307324409485, "step": 7758 }, { "epoch": 1.7877880184331798, "grad_norm": 1.2991738769607613, "learning_rate": 6.106252361162834e-08, "loss": 0.8457501530647278, "step": 7759 }, { "epoch": 1.7880184331797235, "grad_norm": 1.5487287329891364, "learning_rate": 6.093149986929025e-08, "loss": 0.7543236017227173, "step": 7760 }, { "epoch": 1.7882488479262673, "grad_norm": 1.294614145507911, "learning_rate": 6.080061243150191e-08, "loss": 0.5728875398635864, "step": 7761 }, { "epoch": 1.7884792626728112, "grad_norm": 1.3902935059609232, "learning_rate": 6.066986131726138e-08, "loss": 0.6864895820617676, "step": 7762 }, { "epoch": 1.7887096774193547, "grad_norm": 1.1640824452811938, "learning_rate": 6.053924654554687e-08, "loss": 0.8580472469329834, "step": 7763 }, { "epoch": 1.7889400921658987, "grad_norm": 1.358237067906671, "learning_rate": 6.040876813531714e-08, "loss": 0.7670924663543701, "step": 7764 }, { "epoch": 1.7891705069124424, "grad_norm": 1.2558108988688055, "learning_rate": 6.027842610551082e-08, "loss": 0.6558287739753723, "step": 7765 }, { "epoch": 1.7894009216589861, "grad_norm": 1.2875975662335684, "learning_rate": 6.014822047504697e-08, "loss": 0.8186839818954468, "step": 7766 }, { "epoch": 1.78963133640553, "grad_norm": 1.2720662525098447, "learning_rate": 6.001815126282462e-08, "loss": 0.7862167358398438, "step": 7767 }, { "epoch": 1.7898617511520736, "grad_norm": 1.1119662378593531, "learning_rate": 5.98882184877233e-08, "loss": 0.8594048023223877, "step": 7768 }, { "epoch": 1.7900921658986175, "grad_norm": 1.3277176558233812, "learning_rate": 5.975842216860238e-08, "loss": 0.804019033908844, "step": 7769 }, { "epoch": 1.7903225806451613, "grad_norm": 1.1244948347974122, "learning_rate": 5.962876232430192e-08, "loss": 0.7404098510742188, "step": 7770 }, { "epoch": 1.790552995391705, "grad_norm": 1.3595838567399194, "learning_rate": 5.949923897364173e-08, "loss": 0.7726024389266968, "step": 7771 }, { "epoch": 1.790783410138249, "grad_norm": 1.5060671287860161, "learning_rate": 5.936985213542178e-08, "loss": 0.8225048184394836, "step": 7772 }, { "epoch": 1.7910138248847927, "grad_norm": 1.4217281972238225, "learning_rate": 5.924060182842272e-08, "loss": 0.8485706448554993, "step": 7773 }, { "epoch": 1.7912442396313364, "grad_norm": 1.189460803975086, "learning_rate": 5.9111488071404867e-08, "loss": 0.6580322980880737, "step": 7774 }, { "epoch": 1.7914746543778803, "grad_norm": 1.1783786831629417, "learning_rate": 5.898251088310879e-08, "loss": 0.7486656904220581, "step": 7775 }, { "epoch": 1.7917050691244238, "grad_norm": 1.5948072851449393, "learning_rate": 5.885367028225574e-08, "loss": 0.9068334102630615, "step": 7776 }, { "epoch": 1.7919354838709678, "grad_norm": 1.1107745619546634, "learning_rate": 5.872496628754653e-08, "loss": 0.7091449499130249, "step": 7777 }, { "epoch": 1.7921658986175115, "grad_norm": 1.3473785107334575, "learning_rate": 5.8596398917662107e-08, "loss": 0.7248316407203674, "step": 7778 }, { "epoch": 1.7923963133640552, "grad_norm": 1.2057819957098448, "learning_rate": 5.8467968191264315e-08, "loss": 0.7740335464477539, "step": 7779 }, { "epoch": 1.7926267281105992, "grad_norm": 1.267573304949112, "learning_rate": 5.833967412699448e-08, "loss": 0.7810479402542114, "step": 7780 }, { "epoch": 1.7928571428571427, "grad_norm": 1.004282792701847, "learning_rate": 5.821151674347435e-08, "loss": 0.7072443962097168, "step": 7781 }, { "epoch": 1.7930875576036867, "grad_norm": 1.1829190770666373, "learning_rate": 5.808349605930585e-08, "loss": 0.8218289613723755, "step": 7782 }, { "epoch": 1.7933179723502304, "grad_norm": 1.393265214120735, "learning_rate": 5.795561209307087e-08, "loss": 0.8928433656692505, "step": 7783 }, { "epoch": 1.793548387096774, "grad_norm": 1.455083354855402, "learning_rate": 5.7827864863331796e-08, "loss": 0.765188455581665, "step": 7784 }, { "epoch": 1.793778801843318, "grad_norm": 1.0118039506572176, "learning_rate": 5.7700254388630795e-08, "loss": 0.7149494886398315, "step": 7785 }, { "epoch": 1.7940092165898618, "grad_norm": 1.6638445812749356, "learning_rate": 5.75727806874905e-08, "loss": 0.8144164085388184, "step": 7786 }, { "epoch": 1.7942396313364055, "grad_norm": 1.1101501647130416, "learning_rate": 5.744544377841354e-08, "loss": 0.7549517154693604, "step": 7787 }, { "epoch": 1.7944700460829495, "grad_norm": 1.1805002478026116, "learning_rate": 5.731824367988258e-08, "loss": 0.7820652723312378, "step": 7788 }, { "epoch": 1.794700460829493, "grad_norm": 1.2187125462499315, "learning_rate": 5.719118041036042e-08, "loss": 0.8253183364868164, "step": 7789 }, { "epoch": 1.794930875576037, "grad_norm": 1.3044045265020685, "learning_rate": 5.70642539882904e-08, "loss": 0.8177148103713989, "step": 7790 }, { "epoch": 1.7951612903225806, "grad_norm": 1.2453642288062106, "learning_rate": 5.69374644320958e-08, "loss": 0.722260594367981, "step": 7791 }, { "epoch": 1.7953917050691244, "grad_norm": 1.3322495120015716, "learning_rate": 5.6810811760179434e-08, "loss": 0.8128643035888672, "step": 7792 }, { "epoch": 1.7956221198156683, "grad_norm": 1.2461980802133077, "learning_rate": 5.6684295990925394e-08, "loss": 0.8267233371734619, "step": 7793 }, { "epoch": 1.7958525345622118, "grad_norm": 1.1467604985666775, "learning_rate": 5.655791714269697e-08, "loss": 0.8385082483291626, "step": 7794 }, { "epoch": 1.7960829493087558, "grad_norm": 1.2035138425735283, "learning_rate": 5.643167523383785e-08, "loss": 0.8705167770385742, "step": 7795 }, { "epoch": 1.7963133640552995, "grad_norm": 1.263928906996047, "learning_rate": 5.6305570282672024e-08, "loss": 0.7628496885299683, "step": 7796 }, { "epoch": 1.7965437788018432, "grad_norm": 1.2993701262886028, "learning_rate": 5.61796023075034e-08, "loss": 0.8246536254882812, "step": 7797 }, { "epoch": 1.7967741935483872, "grad_norm": 1.2920173759654132, "learning_rate": 5.6053771326615815e-08, "loss": 0.7103257179260254, "step": 7798 }, { "epoch": 1.797004608294931, "grad_norm": 1.318695367926756, "learning_rate": 5.5928077358273984e-08, "loss": 0.614989161491394, "step": 7799 }, { "epoch": 1.7972350230414746, "grad_norm": 1.6404840895868877, "learning_rate": 5.5802520420721866e-08, "loss": 0.9876137971878052, "step": 7800 }, { "epoch": 1.7974654377880186, "grad_norm": 1.2467848598458215, "learning_rate": 5.5677100532183775e-08, "loss": 0.7023773193359375, "step": 7801 }, { "epoch": 1.797695852534562, "grad_norm": 1.1844278512776936, "learning_rate": 5.555181771086459e-08, "loss": 0.6680843830108643, "step": 7802 }, { "epoch": 1.797926267281106, "grad_norm": 1.0826933828880965, "learning_rate": 5.542667197494877e-08, "loss": 0.7221776843070984, "step": 7803 }, { "epoch": 1.7981566820276498, "grad_norm": 1.0071738664190577, "learning_rate": 5.5301663342601e-08, "loss": 0.7473262548446655, "step": 7804 }, { "epoch": 1.7983870967741935, "grad_norm": 1.2499370802188474, "learning_rate": 5.517679183196622e-08, "loss": 0.8690468072891235, "step": 7805 }, { "epoch": 1.7986175115207375, "grad_norm": 1.0933317196070476, "learning_rate": 5.505205746116937e-08, "loss": 0.8353981971740723, "step": 7806 }, { "epoch": 1.798847926267281, "grad_norm": 1.177111485427447, "learning_rate": 5.4927460248315405e-08, "loss": 0.7691711187362671, "step": 7807 }, { "epoch": 1.799078341013825, "grad_norm": 1.034283547212154, "learning_rate": 5.480300021148953e-08, "loss": 0.6732556819915771, "step": 7808 }, { "epoch": 1.7993087557603686, "grad_norm": 1.1520777556370354, "learning_rate": 5.467867736875664e-08, "loss": 0.7273567914962769, "step": 7809 }, { "epoch": 1.7995391705069124, "grad_norm": 1.201774068977123, "learning_rate": 5.455449173816251e-08, "loss": 0.7951864004135132, "step": 7810 }, { "epoch": 1.7997695852534563, "grad_norm": 1.4133736179333027, "learning_rate": 5.4430443337732276e-08, "loss": 0.7073169350624084, "step": 7811 }, { "epoch": 1.8, "grad_norm": 1.0101637387022209, "learning_rate": 5.430653218547132e-08, "loss": 0.682072639465332, "step": 7812 }, { "epoch": 1.8002304147465438, "grad_norm": 0.9949453624163476, "learning_rate": 5.4182758299365364e-08, "loss": 0.7512049674987793, "step": 7813 }, { "epoch": 1.8004608294930877, "grad_norm": 1.2218170088515747, "learning_rate": 5.405912169738003e-08, "loss": 0.7470980882644653, "step": 7814 }, { "epoch": 1.8006912442396312, "grad_norm": 1.1792295753175266, "learning_rate": 5.3935622397460634e-08, "loss": 0.792417049407959, "step": 7815 }, { "epoch": 1.8009216589861752, "grad_norm": 1.4508025797803343, "learning_rate": 5.3812260417533505e-08, "loss": 0.8600934743881226, "step": 7816 }, { "epoch": 1.801152073732719, "grad_norm": 1.2411035382017865, "learning_rate": 5.36890357755041e-08, "loss": 0.6931058168411255, "step": 7817 }, { "epoch": 1.8013824884792626, "grad_norm": 1.1047587345616248, "learning_rate": 5.3565948489258216e-08, "loss": 0.7382420897483826, "step": 7818 }, { "epoch": 1.8016129032258066, "grad_norm": 1.5724454012098283, "learning_rate": 5.344299857666224e-08, "loss": 0.6811971068382263, "step": 7819 }, { "epoch": 1.80184331797235, "grad_norm": 1.3142032735909368, "learning_rate": 5.332018605556188e-08, "loss": 0.8551425933837891, "step": 7820 }, { "epoch": 1.802073732718894, "grad_norm": 1.298840655183536, "learning_rate": 5.319751094378322e-08, "loss": 0.7907109260559082, "step": 7821 }, { "epoch": 1.8023041474654378, "grad_norm": 1.462185741805911, "learning_rate": 5.3074973259132464e-08, "loss": 0.6995817422866821, "step": 7822 }, { "epoch": 1.8025345622119815, "grad_norm": 1.2098230160416081, "learning_rate": 5.295257301939582e-08, "loss": 0.9157558679580688, "step": 7823 }, { "epoch": 1.8027649769585254, "grad_norm": 1.3503599705143554, "learning_rate": 5.283031024233942e-08, "loss": 0.8181086778640747, "step": 7824 }, { "epoch": 1.8029953917050692, "grad_norm": 1.061101797749781, "learning_rate": 5.270818494570961e-08, "loss": 0.7170151472091675, "step": 7825 }, { "epoch": 1.803225806451613, "grad_norm": 1.3415396727620215, "learning_rate": 5.258619714723278e-08, "loss": 0.7548947334289551, "step": 7826 }, { "epoch": 1.8034562211981566, "grad_norm": 1.309211881034751, "learning_rate": 5.2464346864615204e-08, "loss": 0.7482869625091553, "step": 7827 }, { "epoch": 1.8036866359447004, "grad_norm": 1.2839346666214595, "learning_rate": 5.234263411554329e-08, "loss": 0.6984925270080566, "step": 7828 }, { "epoch": 1.8039170506912443, "grad_norm": 1.4972180990250632, "learning_rate": 5.222105891768347e-08, "loss": 0.910038948059082, "step": 7829 }, { "epoch": 1.804147465437788, "grad_norm": 1.4071380742837927, "learning_rate": 5.2099621288682174e-08, "loss": 0.8936711549758911, "step": 7830 }, { "epoch": 1.8043778801843318, "grad_norm": 1.2841490446822148, "learning_rate": 5.197832124616608e-08, "loss": 0.7376326322555542, "step": 7831 }, { "epoch": 1.8046082949308757, "grad_norm": 1.6922079171273652, "learning_rate": 5.1857158807741554e-08, "loss": 0.8373547792434692, "step": 7832 }, { "epoch": 1.8048387096774192, "grad_norm": 1.1938115721747944, "learning_rate": 5.17361339909953e-08, "loss": 0.7018512487411499, "step": 7833 }, { "epoch": 1.8050691244239632, "grad_norm": 1.0051532014919082, "learning_rate": 5.161524681349394e-08, "loss": 0.6111225485801697, "step": 7834 }, { "epoch": 1.805299539170507, "grad_norm": 1.1643316930206133, "learning_rate": 5.149449729278388e-08, "loss": 0.6961934566497803, "step": 7835 }, { "epoch": 1.8055299539170506, "grad_norm": 1.1662486414151942, "learning_rate": 5.137388544639198e-08, "loss": 0.677324116230011, "step": 7836 }, { "epoch": 1.8057603686635946, "grad_norm": 1.1241341054985654, "learning_rate": 5.125341129182481e-08, "loss": 0.7124897837638855, "step": 7837 }, { "epoch": 1.8059907834101383, "grad_norm": 1.1858041195501718, "learning_rate": 5.1133074846568815e-08, "loss": 0.7474578619003296, "step": 7838 }, { "epoch": 1.806221198156682, "grad_norm": 1.0832413753523613, "learning_rate": 5.101287612809102e-08, "loss": 0.699856162071228, "step": 7839 }, { "epoch": 1.8064516129032258, "grad_norm": 1.2510053638983376, "learning_rate": 5.089281515383803e-08, "loss": 0.6548302173614502, "step": 7840 }, { "epoch": 1.8066820276497695, "grad_norm": 1.4067864996197734, "learning_rate": 5.077289194123624e-08, "loss": 0.8376108407974243, "step": 7841 }, { "epoch": 1.8069124423963134, "grad_norm": 1.4168917230935398, "learning_rate": 5.065310650769283e-08, "loss": 0.741931140422821, "step": 7842 }, { "epoch": 1.8071428571428572, "grad_norm": 1.0130617353418785, "learning_rate": 5.053345887059413e-08, "loss": 0.7253270149230957, "step": 7843 }, { "epoch": 1.807373271889401, "grad_norm": 1.452385981822963, "learning_rate": 5.0413949047306894e-08, "loss": 0.8248677849769592, "step": 7844 }, { "epoch": 1.8076036866359448, "grad_norm": 1.2182337218961132, "learning_rate": 5.0294577055177925e-08, "loss": 0.7571253776550293, "step": 7845 }, { "epoch": 1.8078341013824883, "grad_norm": 1.3374870147899762, "learning_rate": 5.017534291153391e-08, "loss": 0.8256274461746216, "step": 7846 }, { "epoch": 1.8080645161290323, "grad_norm": 1.0202351482491858, "learning_rate": 5.0056246633681356e-08, "loss": 0.8609060049057007, "step": 7847 }, { "epoch": 1.808294930875576, "grad_norm": 1.0533455142790622, "learning_rate": 4.9937288238907196e-08, "loss": 0.7005047798156738, "step": 7848 }, { "epoch": 1.8085253456221198, "grad_norm": 1.508707208071474, "learning_rate": 4.981846774447784e-08, "loss": 0.8640049695968628, "step": 7849 }, { "epoch": 1.8087557603686637, "grad_norm": 1.2891784390675838, "learning_rate": 4.969978516763984e-08, "loss": 0.8385862112045288, "step": 7850 }, { "epoch": 1.8089861751152074, "grad_norm": 1.4569260681358536, "learning_rate": 4.9581240525620184e-08, "loss": 0.845676064491272, "step": 7851 }, { "epoch": 1.8092165898617512, "grad_norm": 1.1553749249891685, "learning_rate": 4.9462833835625327e-08, "loss": 0.7638444304466248, "step": 7852 }, { "epoch": 1.8094470046082949, "grad_norm": 1.3732641737808478, "learning_rate": 4.934456511484153e-08, "loss": 0.813924252986908, "step": 7853 }, { "epoch": 1.8096774193548386, "grad_norm": 1.1884602060780909, "learning_rate": 4.9226434380435835e-08, "loss": 0.8773660659790039, "step": 7854 }, { "epoch": 1.8099078341013826, "grad_norm": 1.1706588526128812, "learning_rate": 4.91084416495543e-08, "loss": 0.6703497171401978, "step": 7855 }, { "epoch": 1.8101382488479263, "grad_norm": 1.149648913375334, "learning_rate": 4.8990586939323896e-08, "loss": 0.760738730430603, "step": 7856 }, { "epoch": 1.81036866359447, "grad_norm": 1.0947557024146697, "learning_rate": 4.887287026685072e-08, "loss": 0.6466494798660278, "step": 7857 }, { "epoch": 1.810599078341014, "grad_norm": 1.2590537115606641, "learning_rate": 4.8755291649221206e-08, "loss": 0.7416050434112549, "step": 7858 }, { "epoch": 1.8108294930875575, "grad_norm": 1.200153480710325, "learning_rate": 4.863785110350205e-08, "loss": 0.6841444373130798, "step": 7859 }, { "epoch": 1.8110599078341014, "grad_norm": 0.9841555488440165, "learning_rate": 4.8520548646739265e-08, "loss": 0.7818359732627869, "step": 7860 }, { "epoch": 1.8112903225806452, "grad_norm": 1.287409595467329, "learning_rate": 4.840338429595914e-08, "loss": 0.8802354335784912, "step": 7861 }, { "epoch": 1.8115207373271889, "grad_norm": 1.3745378964034318, "learning_rate": 4.8286358068168055e-08, "loss": 0.8105144500732422, "step": 7862 }, { "epoch": 1.8117511520737328, "grad_norm": 1.2395900259386188, "learning_rate": 4.816946998035232e-08, "loss": 0.6613968014717102, "step": 7863 }, { "epoch": 1.8119815668202763, "grad_norm": 1.191330407638407, "learning_rate": 4.80527200494778e-08, "loss": 0.7945050001144409, "step": 7864 }, { "epoch": 1.8122119815668203, "grad_norm": 1.1914418584370128, "learning_rate": 4.793610829249084e-08, "loss": 0.7268643379211426, "step": 7865 }, { "epoch": 1.812442396313364, "grad_norm": 1.0567405581917244, "learning_rate": 4.781963472631745e-08, "loss": 0.7416445016860962, "step": 7866 }, { "epoch": 1.8126728110599077, "grad_norm": 1.246941647908604, "learning_rate": 4.770329936786355e-08, "loss": 0.8536533117294312, "step": 7867 }, { "epoch": 1.8129032258064517, "grad_norm": 1.2429467538833636, "learning_rate": 4.7587102234015074e-08, "loss": 0.8258422017097473, "step": 7868 }, { "epoch": 1.8131336405529954, "grad_norm": 1.189598318299626, "learning_rate": 4.7471043341637874e-08, "loss": 0.6976941823959351, "step": 7869 }, { "epoch": 1.8133640552995391, "grad_norm": 1.222534835599988, "learning_rate": 4.735512270757758e-08, "loss": 0.8213087916374207, "step": 7870 }, { "epoch": 1.813594470046083, "grad_norm": 1.1756801964544004, "learning_rate": 4.723934034866028e-08, "loss": 0.8012057542800903, "step": 7871 }, { "epoch": 1.8138248847926266, "grad_norm": 1.0419940327131916, "learning_rate": 4.7123696281691436e-08, "loss": 0.7802866697311401, "step": 7872 }, { "epoch": 1.8140552995391706, "grad_norm": 1.1630887083640626, "learning_rate": 4.700819052345639e-08, "loss": 0.8024426698684692, "step": 7873 }, { "epoch": 1.8142857142857143, "grad_norm": 0.9709635675133196, "learning_rate": 4.689282309072107e-08, "loss": 0.6383114457130432, "step": 7874 }, { "epoch": 1.814516129032258, "grad_norm": 1.2768186922012608, "learning_rate": 4.677759400023085e-08, "loss": 0.7226015329360962, "step": 7875 }, { "epoch": 1.814746543778802, "grad_norm": 1.0424513670531574, "learning_rate": 4.6662503268710684e-08, "loss": 0.8390164971351624, "step": 7876 }, { "epoch": 1.8149769585253455, "grad_norm": 1.0443665370850939, "learning_rate": 4.654755091286633e-08, "loss": 0.8120134472846985, "step": 7877 }, { "epoch": 1.8152073732718894, "grad_norm": 1.305111160234168, "learning_rate": 4.6432736949382656e-08, "loss": 0.6554470062255859, "step": 7878 }, { "epoch": 1.8154377880184331, "grad_norm": 1.1780234915455678, "learning_rate": 4.631806139492478e-08, "loss": 0.7268370985984802, "step": 7879 }, { "epoch": 1.8156682027649769, "grad_norm": 1.4051894182356444, "learning_rate": 4.620352426613794e-08, "loss": 0.7991992831230164, "step": 7880 }, { "epoch": 1.8158986175115208, "grad_norm": 1.1268859101296151, "learning_rate": 4.608912557964673e-08, "loss": 0.7695842981338501, "step": 7881 }, { "epoch": 1.8161290322580645, "grad_norm": 1.9896156470888766, "learning_rate": 4.59748653520563e-08, "loss": 0.8633268475532532, "step": 7882 }, { "epoch": 1.8163594470046083, "grad_norm": 1.1364981478494263, "learning_rate": 4.586074359995118e-08, "loss": 0.7018440961837769, "step": 7883 }, { "epoch": 1.8165898617511522, "grad_norm": 1.1022691462384118, "learning_rate": 4.574676033989589e-08, "loss": 0.7304259538650513, "step": 7884 }, { "epoch": 1.8168202764976957, "grad_norm": 1.2520833867580832, "learning_rate": 4.563291558843518e-08, "loss": 0.7408654689788818, "step": 7885 }, { "epoch": 1.8170506912442397, "grad_norm": 0.8583590816187824, "learning_rate": 4.55192093620933e-08, "loss": 0.6378169059753418, "step": 7886 }, { "epoch": 1.8172811059907834, "grad_norm": 1.2929203847720665, "learning_rate": 4.540564167737471e-08, "loss": 0.8854331374168396, "step": 7887 }, { "epoch": 1.8175115207373271, "grad_norm": 1.3325768500609418, "learning_rate": 4.529221255076343e-08, "loss": 0.6948372721672058, "step": 7888 }, { "epoch": 1.817741935483871, "grad_norm": 1.0169430034347062, "learning_rate": 4.517892199872364e-08, "loss": 0.8199236392974854, "step": 7889 }, { "epoch": 1.8179723502304146, "grad_norm": 1.2358305635738154, "learning_rate": 4.506577003769918e-08, "loss": 0.6967995762825012, "step": 7890 }, { "epoch": 1.8182027649769585, "grad_norm": 1.5521492896589208, "learning_rate": 4.495275668411425e-08, "loss": 0.848435640335083, "step": 7891 }, { "epoch": 1.8184331797235023, "grad_norm": 1.0482582355280439, "learning_rate": 4.483988195437227e-08, "loss": 0.7085731029510498, "step": 7892 }, { "epoch": 1.818663594470046, "grad_norm": 1.540410469929121, "learning_rate": 4.472714586485682e-08, "loss": 0.7400653958320618, "step": 7893 }, { "epoch": 1.81889400921659, "grad_norm": 1.3011192141788026, "learning_rate": 4.461454843193169e-08, "loss": 0.7636830806732178, "step": 7894 }, { "epoch": 1.8191244239631337, "grad_norm": 0.9509851989309867, "learning_rate": 4.4502089671940135e-08, "loss": 0.6902754306793213, "step": 7895 }, { "epoch": 1.8193548387096774, "grad_norm": 1.4497717090666749, "learning_rate": 4.438976960120522e-08, "loss": 0.8397349119186401, "step": 7896 }, { "epoch": 1.8195852534562214, "grad_norm": 1.1317263019718502, "learning_rate": 4.4277588236030226e-08, "loss": 0.7505836486816406, "step": 7897 }, { "epoch": 1.8198156682027649, "grad_norm": 1.4213425196027163, "learning_rate": 4.416554559269814e-08, "loss": 0.9310287833213806, "step": 7898 }, { "epoch": 1.8200460829493088, "grad_norm": 1.0910777164101302, "learning_rate": 4.405364168747161e-08, "loss": 0.724685549736023, "step": 7899 }, { "epoch": 1.8202764976958525, "grad_norm": 0.99356469827684, "learning_rate": 4.394187653659365e-08, "loss": 0.6554735898971558, "step": 7900 }, { "epoch": 1.8205069124423963, "grad_norm": 1.5629584518265682, "learning_rate": 4.383025015628661e-08, "loss": 0.7494597434997559, "step": 7901 }, { "epoch": 1.8207373271889402, "grad_norm": 1.3596683636243805, "learning_rate": 4.371876256275287e-08, "loss": 0.817386269569397, "step": 7902 }, { "epoch": 1.8209677419354837, "grad_norm": 1.2645292088995888, "learning_rate": 4.3607413772174806e-08, "loss": 0.8668064475059509, "step": 7903 }, { "epoch": 1.8211981566820277, "grad_norm": 1.2001673372629817, "learning_rate": 4.34962038007145e-08, "loss": 0.7400633096694946, "step": 7904 }, { "epoch": 1.8214285714285714, "grad_norm": 1.018878326746976, "learning_rate": 4.3385132664514046e-08, "loss": 0.7273544073104858, "step": 7905 }, { "epoch": 1.8216589861751151, "grad_norm": 1.149057253315942, "learning_rate": 4.3274200379695315e-08, "loss": 0.7133193016052246, "step": 7906 }, { "epoch": 1.821889400921659, "grad_norm": 1.2433089389356335, "learning_rate": 4.316340696235976e-08, "loss": 0.9390736222267151, "step": 7907 }, { "epoch": 1.8221198156682028, "grad_norm": 1.1318410882734156, "learning_rate": 4.3052752428588966e-08, "loss": 0.7065613269805908, "step": 7908 }, { "epoch": 1.8223502304147465, "grad_norm": 1.2803518971044316, "learning_rate": 4.294223679444442e-08, "loss": 0.813999354839325, "step": 7909 }, { "epoch": 1.8225806451612905, "grad_norm": 1.616827704611462, "learning_rate": 4.2831860075966955e-08, "loss": 0.9234256148338318, "step": 7910 }, { "epoch": 1.822811059907834, "grad_norm": 1.4124883659201861, "learning_rate": 4.272162228917808e-08, "loss": 0.8630207777023315, "step": 7911 }, { "epoch": 1.823041474654378, "grad_norm": 1.382424983437882, "learning_rate": 4.2611523450078456e-08, "loss": 0.7827208042144775, "step": 7912 }, { "epoch": 1.8232718894009217, "grad_norm": 1.3479238410287269, "learning_rate": 4.250156357464873e-08, "loss": 0.884107232093811, "step": 7913 }, { "epoch": 1.8235023041474654, "grad_norm": 1.3064700630797408, "learning_rate": 4.2391742678849484e-08, "loss": 0.8615697026252747, "step": 7914 }, { "epoch": 1.8237327188940093, "grad_norm": 1.4410161390206035, "learning_rate": 4.2282060778621174e-08, "loss": 0.8001279830932617, "step": 7915 }, { "epoch": 1.8239631336405528, "grad_norm": 1.1016373373524035, "learning_rate": 4.217251788988374e-08, "loss": 0.7183214426040649, "step": 7916 }, { "epoch": 1.8241935483870968, "grad_norm": 1.2680472029966925, "learning_rate": 4.206311402853746e-08, "loss": 0.7751119136810303, "step": 7917 }, { "epoch": 1.8244239631336405, "grad_norm": 1.287058032235602, "learning_rate": 4.195384921046208e-08, "loss": 0.8073426485061646, "step": 7918 }, { "epoch": 1.8246543778801843, "grad_norm": 1.053407718143569, "learning_rate": 4.1844723451517017e-08, "loss": 0.7918455600738525, "step": 7919 }, { "epoch": 1.8248847926267282, "grad_norm": 1.1789390806182918, "learning_rate": 4.1735736767542054e-08, "loss": 0.8070017099380493, "step": 7920 }, { "epoch": 1.825115207373272, "grad_norm": 1.1456133687492283, "learning_rate": 4.1626889174356306e-08, "loss": 0.7202159762382507, "step": 7921 }, { "epoch": 1.8253456221198157, "grad_norm": 1.304718816677761, "learning_rate": 4.15181806877587e-08, "loss": 0.8412283658981323, "step": 7922 }, { "epoch": 1.8255760368663596, "grad_norm": 1.079962569087528, "learning_rate": 4.140961132352849e-08, "loss": 0.6230478286743164, "step": 7923 }, { "epoch": 1.8258064516129031, "grad_norm": 1.184647211526077, "learning_rate": 4.1301181097424196e-08, "loss": 0.6475099921226501, "step": 7924 }, { "epoch": 1.826036866359447, "grad_norm": 1.1526955390848261, "learning_rate": 4.1192890025184223e-08, "loss": 0.6277462244033813, "step": 7925 }, { "epoch": 1.8262672811059908, "grad_norm": 1.048650750687635, "learning_rate": 4.1084738122527e-08, "loss": 0.784058690071106, "step": 7926 }, { "epoch": 1.8264976958525345, "grad_norm": 1.2758998200943634, "learning_rate": 4.097672540515063e-08, "loss": 0.7214534282684326, "step": 7927 }, { "epoch": 1.8267281105990785, "grad_norm": 1.3299220547069754, "learning_rate": 4.086885188873302e-08, "loss": 0.7504015564918518, "step": 7928 }, { "epoch": 1.826958525345622, "grad_norm": 1.3115105618474625, "learning_rate": 4.076111758893175e-08, "loss": 0.8837840557098389, "step": 7929 }, { "epoch": 1.827188940092166, "grad_norm": 0.9756920709009218, "learning_rate": 4.065352252138443e-08, "loss": 0.6903706789016724, "step": 7930 }, { "epoch": 1.8274193548387097, "grad_norm": 1.0882078909648618, "learning_rate": 4.054606670170824e-08, "loss": 0.6120485067367554, "step": 7931 }, { "epoch": 1.8276497695852534, "grad_norm": 1.3933670864132435, "learning_rate": 4.043875014550047e-08, "loss": 0.9566253423690796, "step": 7932 }, { "epoch": 1.8278801843317973, "grad_norm": 1.143561158140067, "learning_rate": 4.033157286833766e-08, "loss": 0.7702776193618774, "step": 7933 }, { "epoch": 1.828110599078341, "grad_norm": 1.3861853644171394, "learning_rate": 4.0224534885776706e-08, "loss": 0.7326529026031494, "step": 7934 }, { "epoch": 1.8283410138248848, "grad_norm": 1.199651876611857, "learning_rate": 4.011763621335395e-08, "loss": 0.8161343336105347, "step": 7935 }, { "epoch": 1.8285714285714287, "grad_norm": 1.2385311136965618, "learning_rate": 4.001087686658544e-08, "loss": 0.7167537212371826, "step": 7936 }, { "epoch": 1.8288018433179722, "grad_norm": 1.5866479195226006, "learning_rate": 3.9904256860967433e-08, "loss": 0.9195249080657959, "step": 7937 }, { "epoch": 1.8290322580645162, "grad_norm": 1.4492337682663832, "learning_rate": 3.979777621197544e-08, "loss": 0.9483609199523926, "step": 7938 }, { "epoch": 1.82926267281106, "grad_norm": 1.1520857488925356, "learning_rate": 3.96914349350651e-08, "loss": 0.6521364450454712, "step": 7939 }, { "epoch": 1.8294930875576036, "grad_norm": 1.1394847291425385, "learning_rate": 3.958523304567174e-08, "loss": 0.714328408241272, "step": 7940 }, { "epoch": 1.8297235023041476, "grad_norm": 1.2749952242619191, "learning_rate": 3.9479170559210464e-08, "loss": 0.705136775970459, "step": 7941 }, { "epoch": 1.829953917050691, "grad_norm": 1.2310686937076982, "learning_rate": 3.937324749107584e-08, "loss": 0.9096843004226685, "step": 7942 }, { "epoch": 1.830184331797235, "grad_norm": 1.1347026880501985, "learning_rate": 3.9267463856642704e-08, "loss": 0.7797929048538208, "step": 7943 }, { "epoch": 1.8304147465437788, "grad_norm": 1.1418375010830168, "learning_rate": 3.9161819671265414e-08, "loss": 0.739689290523529, "step": 7944 }, { "epoch": 1.8306451612903225, "grad_norm": 1.2414926332489717, "learning_rate": 3.905631495027795e-08, "loss": 0.7297589778900146, "step": 7945 }, { "epoch": 1.8308755760368665, "grad_norm": 1.1411747974433366, "learning_rate": 3.895094970899426e-08, "loss": 0.6632317900657654, "step": 7946 }, { "epoch": 1.8311059907834102, "grad_norm": 1.1035263718417188, "learning_rate": 3.884572396270802e-08, "loss": 0.8075754642486572, "step": 7947 }, { "epoch": 1.831336405529954, "grad_norm": 1.1206981689667126, "learning_rate": 3.874063772669256e-08, "loss": 0.879385232925415, "step": 7948 }, { "epoch": 1.8315668202764976, "grad_norm": 1.1296410172019098, "learning_rate": 3.86356910162009e-08, "loss": 0.7182341814041138, "step": 7949 }, { "epoch": 1.8317972350230414, "grad_norm": 1.3256415462362086, "learning_rate": 3.853088384646608e-08, "loss": 0.8980770111083984, "step": 7950 }, { "epoch": 1.8320276497695853, "grad_norm": 1.2399263879902838, "learning_rate": 3.8426216232700483e-08, "loss": 0.7798547744750977, "step": 7951 }, { "epoch": 1.832258064516129, "grad_norm": 1.30590072600508, "learning_rate": 3.832168819009685e-08, "loss": 0.7545509934425354, "step": 7952 }, { "epoch": 1.8324884792626728, "grad_norm": 1.4626138945450415, "learning_rate": 3.821729973382681e-08, "loss": 0.7394163608551025, "step": 7953 }, { "epoch": 1.8327188940092167, "grad_norm": 1.095086275435991, "learning_rate": 3.811305087904271e-08, "loss": 0.7771584987640381, "step": 7954 }, { "epoch": 1.8329493087557602, "grad_norm": 1.0772465088176202, "learning_rate": 3.800894164087587e-08, "loss": 0.6490596532821655, "step": 7955 }, { "epoch": 1.8331797235023042, "grad_norm": 1.6261572682115344, "learning_rate": 3.7904972034437546e-08, "loss": 0.8465416431427002, "step": 7956 }, { "epoch": 1.833410138248848, "grad_norm": 1.1256653812684285, "learning_rate": 3.780114207481899e-08, "loss": 0.6769351363182068, "step": 7957 }, { "epoch": 1.8336405529953916, "grad_norm": 1.1157448396752008, "learning_rate": 3.769745177709094e-08, "loss": 0.8187215328216553, "step": 7958 }, { "epoch": 1.8338709677419356, "grad_norm": 0.9478307441179703, "learning_rate": 3.759390115630356e-08, "loss": 0.7524763345718384, "step": 7959 }, { "epoch": 1.8341013824884793, "grad_norm": 1.3846707864730958, "learning_rate": 3.749049022748762e-08, "loss": 0.8019517064094543, "step": 7960 }, { "epoch": 1.834331797235023, "grad_norm": 1.2301171101661803, "learning_rate": 3.738721900565278e-08, "loss": 0.7732158899307251, "step": 7961 }, { "epoch": 1.8345622119815668, "grad_norm": 1.1624945144679932, "learning_rate": 3.728408750578871e-08, "loss": 0.7152917385101318, "step": 7962 }, { "epoch": 1.8347926267281105, "grad_norm": 1.2249354034345745, "learning_rate": 3.7181095742864876e-08, "loss": 0.7117735147476196, "step": 7963 }, { "epoch": 1.8350230414746544, "grad_norm": 1.1387667941982393, "learning_rate": 3.7078243731830436e-08, "loss": 0.7651360034942627, "step": 7964 }, { "epoch": 1.8352534562211982, "grad_norm": 1.103224145154883, "learning_rate": 3.697553148761412e-08, "loss": 0.6686996817588806, "step": 7965 }, { "epoch": 1.835483870967742, "grad_norm": 1.4148867918515446, "learning_rate": 3.687295902512455e-08, "loss": 0.8654145002365112, "step": 7966 }, { "epoch": 1.8357142857142859, "grad_norm": 1.2014603088046913, "learning_rate": 3.6770526359250046e-08, "loss": 0.7883874177932739, "step": 7967 }, { "epoch": 1.8359447004608294, "grad_norm": 1.3036366063511584, "learning_rate": 3.666823350485848e-08, "loss": 0.7270755767822266, "step": 7968 }, { "epoch": 1.8361751152073733, "grad_norm": 1.2757403346821974, "learning_rate": 3.656608047679744e-08, "loss": 0.654710054397583, "step": 7969 }, { "epoch": 1.836405529953917, "grad_norm": 1.3173622827867584, "learning_rate": 3.6464067289894485e-08, "loss": 0.688032329082489, "step": 7970 }, { "epoch": 1.8366359447004608, "grad_norm": 1.610615012564481, "learning_rate": 3.6362193958956457e-08, "loss": 0.901115894317627, "step": 7971 }, { "epoch": 1.8368663594470047, "grad_norm": 1.116601972108686, "learning_rate": 3.6260460498770404e-08, "loss": 0.7335774302482605, "step": 7972 }, { "epoch": 1.8370967741935482, "grad_norm": 1.386903572934919, "learning_rate": 3.615886692410275e-08, "loss": 0.8056570291519165, "step": 7973 }, { "epoch": 1.8373271889400922, "grad_norm": 1.0398578754417405, "learning_rate": 3.6057413249699356e-08, "loss": 0.82081538438797, "step": 7974 }, { "epoch": 1.837557603686636, "grad_norm": 1.2589683870881863, "learning_rate": 3.595609949028655e-08, "loss": 0.7741475105285645, "step": 7975 }, { "epoch": 1.8377880184331796, "grad_norm": 1.4550225731476647, "learning_rate": 3.5854925660569693e-08, "loss": 0.9020792245864868, "step": 7976 }, { "epoch": 1.8380184331797236, "grad_norm": 1.395018589671643, "learning_rate": 3.57538917752338e-08, "loss": 0.759677529335022, "step": 7977 }, { "epoch": 1.8382488479262673, "grad_norm": 1.2528132061795532, "learning_rate": 3.565299784894427e-08, "loss": 0.6658498644828796, "step": 7978 }, { "epoch": 1.838479262672811, "grad_norm": 1.156561409904186, "learning_rate": 3.5552243896345254e-08, "loss": 0.8359798192977905, "step": 7979 }, { "epoch": 1.838709677419355, "grad_norm": 0.9586985661683237, "learning_rate": 3.545162993206141e-08, "loss": 0.656216025352478, "step": 7980 }, { "epoch": 1.8389400921658985, "grad_norm": 1.1907827843907386, "learning_rate": 3.53511559706966e-08, "loss": 0.7783077359199524, "step": 7981 }, { "epoch": 1.8391705069124424, "grad_norm": 1.315887741405374, "learning_rate": 3.525082202683427e-08, "loss": 0.7726818919181824, "step": 7982 }, { "epoch": 1.8394009216589862, "grad_norm": 1.203190333477806, "learning_rate": 3.5150628115038213e-08, "loss": 0.6797339916229248, "step": 7983 }, { "epoch": 1.83963133640553, "grad_norm": 1.6491537372199485, "learning_rate": 3.505057424985114e-08, "loss": 0.818444013595581, "step": 7984 }, { "epoch": 1.8398617511520738, "grad_norm": 1.2385444618355612, "learning_rate": 3.495066044579564e-08, "loss": 0.716003954410553, "step": 7985 }, { "epoch": 1.8400921658986173, "grad_norm": 1.1184726381698433, "learning_rate": 3.485088671737435e-08, "loss": 0.8214380741119385, "step": 7986 }, { "epoch": 1.8403225806451613, "grad_norm": 1.2891166927609845, "learning_rate": 3.475125307906923e-08, "loss": 0.8004239797592163, "step": 7987 }, { "epoch": 1.840552995391705, "grad_norm": 1.0064244623457703, "learning_rate": 3.465175954534183e-08, "loss": 0.724868655204773, "step": 7988 }, { "epoch": 1.8407834101382488, "grad_norm": 1.2194713737299876, "learning_rate": 3.455240613063359e-08, "loss": 0.6774435043334961, "step": 7989 }, { "epoch": 1.8410138248847927, "grad_norm": 1.2000954990034474, "learning_rate": 3.445319284936543e-08, "loss": 0.7618406414985657, "step": 7990 }, { "epoch": 1.8412442396313364, "grad_norm": 1.2446761227229344, "learning_rate": 3.4354119715938154e-08, "loss": 0.8176794648170471, "step": 7991 }, { "epoch": 1.8414746543778802, "grad_norm": 1.3311989323291133, "learning_rate": 3.4255186744732045e-08, "loss": 0.7540123462677002, "step": 7992 }, { "epoch": 1.841705069124424, "grad_norm": 0.8317940065053944, "learning_rate": 3.4156393950107164e-08, "loss": 0.6888976097106934, "step": 7993 }, { "epoch": 1.8419354838709676, "grad_norm": 0.9229557772464766, "learning_rate": 3.405774134640294e-08, "loss": 0.6719028949737549, "step": 7994 }, { "epoch": 1.8421658986175116, "grad_norm": 1.2216480626353798, "learning_rate": 3.3959228947938903e-08, "loss": 0.817806601524353, "step": 7995 }, { "epoch": 1.8423963133640553, "grad_norm": 1.176727717908757, "learning_rate": 3.3860856769013955e-08, "loss": 0.6681252717971802, "step": 7996 }, { "epoch": 1.842626728110599, "grad_norm": 1.261442308873967, "learning_rate": 3.3762624823906574e-08, "loss": 0.7965174317359924, "step": 7997 }, { "epoch": 1.842857142857143, "grad_norm": 1.163849986057629, "learning_rate": 3.366453312687512e-08, "loss": 0.714171826839447, "step": 7998 }, { "epoch": 1.8430875576036865, "grad_norm": 1.2077995913515678, "learning_rate": 3.356658169215743e-08, "loss": 0.7489287853240967, "step": 7999 }, { "epoch": 1.8433179723502304, "grad_norm": 1.270011813451473, "learning_rate": 3.34687705339709e-08, "loss": 0.790866494178772, "step": 8000 }, { "epoch": 1.8435483870967742, "grad_norm": 0.9665221846950844, "learning_rate": 3.337109966651297e-08, "loss": 0.8208349943161011, "step": 8001 }, { "epoch": 1.8437788018433179, "grad_norm": 1.1715709525124653, "learning_rate": 3.3273569103960174e-08, "loss": 0.7974207401275635, "step": 8002 }, { "epoch": 1.8440092165898618, "grad_norm": 1.1483232930238036, "learning_rate": 3.317617886046908e-08, "loss": 0.751643180847168, "step": 8003 }, { "epoch": 1.8442396313364056, "grad_norm": 1.3210448516681466, "learning_rate": 3.3078928950175724e-08, "loss": 0.9231137037277222, "step": 8004 }, { "epoch": 1.8444700460829493, "grad_norm": 1.1496984894908708, "learning_rate": 3.2981819387195683e-08, "loss": 0.7975907325744629, "step": 8005 }, { "epoch": 1.8447004608294932, "grad_norm": 1.1807761173209448, "learning_rate": 3.288485018562448e-08, "loss": 0.7467124462127686, "step": 8006 }, { "epoch": 1.8449308755760367, "grad_norm": 1.1558703241619663, "learning_rate": 3.278802135953706e-08, "loss": 0.7983080148696899, "step": 8007 }, { "epoch": 1.8451612903225807, "grad_norm": 1.2273424689042212, "learning_rate": 3.269133292298787e-08, "loss": 0.7991635799407959, "step": 8008 }, { "epoch": 1.8453917050691244, "grad_norm": 1.3284825495150037, "learning_rate": 3.259478489001111e-08, "loss": 0.9309900403022766, "step": 8009 }, { "epoch": 1.8456221198156681, "grad_norm": 1.4898197506974649, "learning_rate": 3.249837727462068e-08, "loss": 0.7667444944381714, "step": 8010 }, { "epoch": 1.845852534562212, "grad_norm": 1.0693184262343387, "learning_rate": 3.2402110090809955e-08, "loss": 0.722775936126709, "step": 8011 }, { "epoch": 1.8460829493087556, "grad_norm": 1.2061345728793884, "learning_rate": 3.230598335255208e-08, "loss": 0.7049660682678223, "step": 8012 }, { "epoch": 1.8463133640552996, "grad_norm": 1.2538545243397632, "learning_rate": 3.220999707379957e-08, "loss": 0.7543717622756958, "step": 8013 }, { "epoch": 1.8465437788018433, "grad_norm": 1.0254969440317054, "learning_rate": 3.2114151268484825e-08, "loss": 0.705594539642334, "step": 8014 }, { "epoch": 1.846774193548387, "grad_norm": 1.3381301652737214, "learning_rate": 3.201844595051972e-08, "loss": 0.8663946390151978, "step": 8015 }, { "epoch": 1.847004608294931, "grad_norm": 1.2931743474180666, "learning_rate": 3.192288113379582e-08, "loss": 0.6990827918052673, "step": 8016 }, { "epoch": 1.8472350230414747, "grad_norm": 1.3047302382268444, "learning_rate": 3.182745683218391e-08, "loss": 0.8494592905044556, "step": 8017 }, { "epoch": 1.8474654377880184, "grad_norm": 1.1964557388323078, "learning_rate": 3.173217305953524e-08, "loss": 0.7689815163612366, "step": 8018 }, { "epoch": 1.8476958525345624, "grad_norm": 1.0869127948311592, "learning_rate": 3.163702982967964e-08, "loss": 0.7961923480033875, "step": 8019 }, { "epoch": 1.8479262672811059, "grad_norm": 1.1859545141002084, "learning_rate": 3.154202715642729e-08, "loss": 0.7290681600570679, "step": 8020 }, { "epoch": 1.8481566820276498, "grad_norm": 1.2696204436408378, "learning_rate": 3.1447165053567594e-08, "loss": 0.7486605048179626, "step": 8021 }, { "epoch": 1.8483870967741935, "grad_norm": 1.2409295752272667, "learning_rate": 3.135244353486977e-08, "loss": 0.8263967633247375, "step": 8022 }, { "epoch": 1.8486175115207373, "grad_norm": 1.3436046094044156, "learning_rate": 3.1257862614082254e-08, "loss": 0.7462657690048218, "step": 8023 }, { "epoch": 1.8488479262672812, "grad_norm": 1.7105756282592546, "learning_rate": 3.116342230493374e-08, "loss": 0.9305819272994995, "step": 8024 }, { "epoch": 1.8490783410138247, "grad_norm": 1.1597494849443377, "learning_rate": 3.1069122621131925e-08, "loss": 0.7202557325363159, "step": 8025 }, { "epoch": 1.8493087557603687, "grad_norm": 1.0985806176068067, "learning_rate": 3.097496357636409e-08, "loss": 0.723913311958313, "step": 8026 }, { "epoch": 1.8495391705069124, "grad_norm": 1.427360065972912, "learning_rate": 3.088094518429751e-08, "loss": 0.7067763805389404, "step": 8027 }, { "epoch": 1.8497695852534561, "grad_norm": 1.3110685780585822, "learning_rate": 3.078706745857884e-08, "loss": 0.7853527665138245, "step": 8028 }, { "epoch": 1.85, "grad_norm": 1.228901367807535, "learning_rate": 3.0693330412834285e-08, "loss": 0.7183133363723755, "step": 8029 }, { "epoch": 1.8502304147465438, "grad_norm": 1.1077136741228983, "learning_rate": 3.0599734060669626e-08, "loss": 0.8041096925735474, "step": 8030 }, { "epoch": 1.8504608294930875, "grad_norm": 1.0495776729925357, "learning_rate": 3.050627841567022e-08, "loss": 0.7259166240692139, "step": 8031 }, { "epoch": 1.8506912442396315, "grad_norm": 1.5016516908972768, "learning_rate": 3.041296349140099e-08, "loss": 0.8844292163848877, "step": 8032 }, { "epoch": 1.850921658986175, "grad_norm": 1.2846098007302502, "learning_rate": 3.031978930140666e-08, "loss": 0.7566810846328735, "step": 8033 }, { "epoch": 1.851152073732719, "grad_norm": 1.4566612706299762, "learning_rate": 3.0226755859211085e-08, "loss": 0.8365379571914673, "step": 8034 }, { "epoch": 1.8513824884792627, "grad_norm": 1.03909937329538, "learning_rate": 3.013386317831823e-08, "loss": 0.6786175966262817, "step": 8035 }, { "epoch": 1.8516129032258064, "grad_norm": 0.8445952555360507, "learning_rate": 3.0041111272211206e-08, "loss": 0.5450198650360107, "step": 8036 }, { "epoch": 1.8518433179723504, "grad_norm": 1.3789732970427235, "learning_rate": 2.994850015435269e-08, "loss": 0.8792393207550049, "step": 8037 }, { "epoch": 1.8520737327188939, "grad_norm": 1.1270074296152806, "learning_rate": 2.985602983818525e-08, "loss": 0.8463287353515625, "step": 8038 }, { "epoch": 1.8523041474654378, "grad_norm": 1.2927452986312467, "learning_rate": 2.9763700337130827e-08, "loss": 0.77659010887146, "step": 8039 }, { "epoch": 1.8525345622119815, "grad_norm": 0.8652026295993711, "learning_rate": 2.9671511664590698e-08, "loss": 0.6180428266525269, "step": 8040 }, { "epoch": 1.8527649769585253, "grad_norm": 1.2049419514211082, "learning_rate": 2.9579463833946273e-08, "loss": 0.7886658906936646, "step": 8041 }, { "epoch": 1.8529953917050692, "grad_norm": 1.35078980115234, "learning_rate": 2.9487556858557972e-08, "loss": 0.8371871709823608, "step": 8042 }, { "epoch": 1.853225806451613, "grad_norm": 1.1555875449847217, "learning_rate": 2.9395790751765904e-08, "loss": 0.7082366347312927, "step": 8043 }, { "epoch": 1.8534562211981567, "grad_norm": 1.2745414422252506, "learning_rate": 2.930416552689008e-08, "loss": 0.7866584062576294, "step": 8044 }, { "epoch": 1.8536866359447006, "grad_norm": 1.229235509048025, "learning_rate": 2.9212681197229527e-08, "loss": 0.8789514303207397, "step": 8045 }, { "epoch": 1.8539170506912441, "grad_norm": 1.0208282620264577, "learning_rate": 2.9121337776063072e-08, "loss": 0.7041239738464355, "step": 8046 }, { "epoch": 1.854147465437788, "grad_norm": 1.3204473756112607, "learning_rate": 2.9030135276649215e-08, "loss": 0.8290516138076782, "step": 8047 }, { "epoch": 1.8543778801843318, "grad_norm": 1.2424965520320617, "learning_rate": 2.8939073712225813e-08, "loss": 0.8532444834709167, "step": 8048 }, { "epoch": 1.8546082949308755, "grad_norm": 1.375111764710695, "learning_rate": 2.8848153096010407e-08, "loss": 0.8635869026184082, "step": 8049 }, { "epoch": 1.8548387096774195, "grad_norm": 1.3481674122248803, "learning_rate": 2.8757373441199885e-08, "loss": 0.723747730255127, "step": 8050 }, { "epoch": 1.855069124423963, "grad_norm": 1.3399875040651272, "learning_rate": 2.8666734760970925e-08, "loss": 0.893456220626831, "step": 8051 }, { "epoch": 1.855299539170507, "grad_norm": 1.2732338285848108, "learning_rate": 2.8576237068479335e-08, "loss": 0.6871381998062134, "step": 8052 }, { "epoch": 1.8555299539170507, "grad_norm": 1.0534516506243037, "learning_rate": 2.848588037686106e-08, "loss": 0.7820594906806946, "step": 8053 }, { "epoch": 1.8557603686635944, "grad_norm": 1.0873243123362593, "learning_rate": 2.839566469923105e-08, "loss": 0.7783479690551758, "step": 8054 }, { "epoch": 1.8559907834101383, "grad_norm": 1.25602911336094, "learning_rate": 2.8305590048684268e-08, "loss": 0.7612866163253784, "step": 8055 }, { "epoch": 1.856221198156682, "grad_norm": 1.0752346215773687, "learning_rate": 2.82156564382946e-08, "loss": 0.7483590841293335, "step": 8056 }, { "epoch": 1.8564516129032258, "grad_norm": 1.0547692532993052, "learning_rate": 2.812586388111582e-08, "loss": 0.7553579807281494, "step": 8057 }, { "epoch": 1.8566820276497698, "grad_norm": 1.0828193353243305, "learning_rate": 2.80362123901815e-08, "loss": 0.8895602226257324, "step": 8058 }, { "epoch": 1.8569124423963133, "grad_norm": 1.1481937931103232, "learning_rate": 2.794670197850424e-08, "loss": 0.7974053621292114, "step": 8059 }, { "epoch": 1.8571428571428572, "grad_norm": 1.0112292806236838, "learning_rate": 2.7857332659076193e-08, "loss": 0.7730135917663574, "step": 8060 }, { "epoch": 1.857373271889401, "grad_norm": 1.115608079627536, "learning_rate": 2.7768104444869434e-08, "loss": 0.7258738279342651, "step": 8061 }, { "epoch": 1.8576036866359447, "grad_norm": 1.3030363105586589, "learning_rate": 2.7679017348835264e-08, "loss": 0.7068890333175659, "step": 8062 }, { "epoch": 1.8578341013824886, "grad_norm": 1.3041822573340287, "learning_rate": 2.7590071383904568e-08, "loss": 0.8741557002067566, "step": 8063 }, { "epoch": 1.8580645161290321, "grad_norm": 1.3236368529143523, "learning_rate": 2.750126656298768e-08, "loss": 0.8723797798156738, "step": 8064 }, { "epoch": 1.858294930875576, "grad_norm": 1.2019235064586495, "learning_rate": 2.7412602898974514e-08, "loss": 0.8510957956314087, "step": 8065 }, { "epoch": 1.8585253456221198, "grad_norm": 0.8996466342772348, "learning_rate": 2.732408040473444e-08, "loss": 0.6875216960906982, "step": 8066 }, { "epoch": 1.8587557603686635, "grad_norm": 1.235948717542994, "learning_rate": 2.7235699093116515e-08, "loss": 0.8057721257209778, "step": 8067 }, { "epoch": 1.8589861751152075, "grad_norm": 1.1066694710477807, "learning_rate": 2.7147458976949145e-08, "loss": 0.7547335624694824, "step": 8068 }, { "epoch": 1.8592165898617512, "grad_norm": 1.2565080056809024, "learning_rate": 2.7059360069040193e-08, "loss": 0.8301708102226257, "step": 8069 }, { "epoch": 1.859447004608295, "grad_norm": 1.354839024861171, "learning_rate": 2.69714023821771e-08, "loss": 0.8313431143760681, "step": 8070 }, { "epoch": 1.8596774193548387, "grad_norm": 1.2482736529337517, "learning_rate": 2.6883585929126872e-08, "loss": 0.6631792783737183, "step": 8071 }, { "epoch": 1.8599078341013824, "grad_norm": 1.342165180678223, "learning_rate": 2.679591072263576e-08, "loss": 0.7643609046936035, "step": 8072 }, { "epoch": 1.8601382488479263, "grad_norm": 1.5670037508761703, "learning_rate": 2.670837677543003e-08, "loss": 0.8543407917022705, "step": 8073 }, { "epoch": 1.86036866359447, "grad_norm": 1.0908415634382522, "learning_rate": 2.662098410021485e-08, "loss": 0.8051489591598511, "step": 8074 }, { "epoch": 1.8605990783410138, "grad_norm": 1.1493604797084143, "learning_rate": 2.653373270967518e-08, "loss": 0.7065767645835876, "step": 8075 }, { "epoch": 1.8608294930875577, "grad_norm": 0.9852441728403762, "learning_rate": 2.6446622616475566e-08, "loss": 0.672603189945221, "step": 8076 }, { "epoch": 1.8610599078341012, "grad_norm": 1.2739019796547877, "learning_rate": 2.6359653833259776e-08, "loss": 0.7201080918312073, "step": 8077 }, { "epoch": 1.8612903225806452, "grad_norm": 1.156933357533599, "learning_rate": 2.627282637265149e-08, "loss": 0.7147494554519653, "step": 8078 }, { "epoch": 1.861520737327189, "grad_norm": 1.3793116889121875, "learning_rate": 2.6186140247253297e-08, "loss": 0.7051082253456116, "step": 8079 }, { "epoch": 1.8617511520737327, "grad_norm": 1.2253670327071573, "learning_rate": 2.6099595469647683e-08, "loss": 0.5786069631576538, "step": 8080 }, { "epoch": 1.8619815668202766, "grad_norm": 1.2391603364729231, "learning_rate": 2.6013192052396493e-08, "loss": 0.8880232572555542, "step": 8081 }, { "epoch": 1.8622119815668203, "grad_norm": 1.3577487615179598, "learning_rate": 2.5926930008041137e-08, "loss": 0.9295729398727417, "step": 8082 }, { "epoch": 1.862442396313364, "grad_norm": 1.1507407274303025, "learning_rate": 2.5840809349102378e-08, "loss": 0.6963248252868652, "step": 8083 }, { "epoch": 1.8626728110599078, "grad_norm": 1.2547838683138512, "learning_rate": 2.5754830088080548e-08, "loss": 0.8788298964500427, "step": 8084 }, { "epoch": 1.8629032258064515, "grad_norm": 1.3540782368440085, "learning_rate": 2.5668992237455334e-08, "loss": 0.7454242706298828, "step": 8085 }, { "epoch": 1.8631336405529955, "grad_norm": 1.1950812039913048, "learning_rate": 2.558329580968599e-08, "loss": 0.7659780383110046, "step": 8086 }, { "epoch": 1.8633640552995392, "grad_norm": 1.5016734977487585, "learning_rate": 2.5497740817211456e-08, "loss": 0.8799881935119629, "step": 8087 }, { "epoch": 1.863594470046083, "grad_norm": 0.9825172132169212, "learning_rate": 2.5412327272449684e-08, "loss": 0.7319198846817017, "step": 8088 }, { "epoch": 1.8638248847926269, "grad_norm": 1.0689400870779366, "learning_rate": 2.532705518779854e-08, "loss": 0.6450645923614502, "step": 8089 }, { "epoch": 1.8640552995391704, "grad_norm": 1.1783740361717576, "learning_rate": 2.52419245756349e-08, "loss": 0.7213672399520874, "step": 8090 }, { "epoch": 1.8642857142857143, "grad_norm": 1.3483335750734096, "learning_rate": 2.515693544831554e-08, "loss": 0.790163516998291, "step": 8091 }, { "epoch": 1.864516129032258, "grad_norm": 1.2871905619529331, "learning_rate": 2.507208781817638e-08, "loss": 0.8324074745178223, "step": 8092 }, { "epoch": 1.8647465437788018, "grad_norm": 1.4095960145667545, "learning_rate": 2.4987381697533227e-08, "loss": 0.879224419593811, "step": 8093 }, { "epoch": 1.8649769585253457, "grad_norm": 1.4121148041878757, "learning_rate": 2.4902817098680807e-08, "loss": 0.8668204545974731, "step": 8094 }, { "epoch": 1.8652073732718892, "grad_norm": 1.1605042845973315, "learning_rate": 2.481839403389341e-08, "loss": 0.6737711429595947, "step": 8095 }, { "epoch": 1.8654377880184332, "grad_norm": 1.3482506919608122, "learning_rate": 2.4734112515425343e-08, "loss": 0.8948237299919128, "step": 8096 }, { "epoch": 1.865668202764977, "grad_norm": 1.2927456093148797, "learning_rate": 2.4649972555509823e-08, "loss": 0.6866592168807983, "step": 8097 }, { "epoch": 1.8658986175115206, "grad_norm": 1.2040358944727056, "learning_rate": 2.4565974166359416e-08, "loss": 0.8852076530456543, "step": 8098 }, { "epoch": 1.8661290322580646, "grad_norm": 1.1474664367024714, "learning_rate": 2.44821173601667e-08, "loss": 0.7402448654174805, "step": 8099 }, { "epoch": 1.8663594470046083, "grad_norm": 1.299234544884085, "learning_rate": 2.439840214910316e-08, "loss": 0.8536320924758911, "step": 8100 }, { "epoch": 1.866589861751152, "grad_norm": 1.1550631938568499, "learning_rate": 2.4314828545319965e-08, "loss": 0.6408628225326538, "step": 8101 }, { "epoch": 1.866820276497696, "grad_norm": 1.188548223378954, "learning_rate": 2.4231396560947858e-08, "loss": 0.9578930735588074, "step": 8102 }, { "epoch": 1.8670506912442395, "grad_norm": 1.8289817367376688, "learning_rate": 2.4148106208096708e-08, "loss": 0.7606109976768494, "step": 8103 }, { "epoch": 1.8672811059907835, "grad_norm": 0.9826738512020193, "learning_rate": 2.4064957498856177e-08, "loss": 0.7446529865264893, "step": 8104 }, { "epoch": 1.8675115207373272, "grad_norm": 1.0744366993530696, "learning_rate": 2.398195044529505e-08, "loss": 0.6086497902870178, "step": 8105 }, { "epoch": 1.867741935483871, "grad_norm": 1.5561440229209103, "learning_rate": 2.389908505946181e-08, "loss": 0.9348995685577393, "step": 8106 }, { "epoch": 1.8679723502304149, "grad_norm": 1.1497120508700005, "learning_rate": 2.381636135338405e-08, "loss": 0.6817007660865784, "step": 8107 }, { "epoch": 1.8682027649769584, "grad_norm": 1.0815805532535518, "learning_rate": 2.373377933906917e-08, "loss": 0.7228778600692749, "step": 8108 }, { "epoch": 1.8684331797235023, "grad_norm": 1.2824972753864794, "learning_rate": 2.3651339028503913e-08, "loss": 0.6974154114723206, "step": 8109 }, { "epoch": 1.868663594470046, "grad_norm": 1.2746687740486187, "learning_rate": 2.3569040433654264e-08, "loss": 0.8025680780410767, "step": 8110 }, { "epoch": 1.8688940092165898, "grad_norm": 1.0439186994105132, "learning_rate": 2.3486883566465777e-08, "loss": 0.7570391893386841, "step": 8111 }, { "epoch": 1.8691244239631337, "grad_norm": 1.1353343636911755, "learning_rate": 2.3404868438863246e-08, "loss": 0.7982438802719116, "step": 8112 }, { "epoch": 1.8693548387096774, "grad_norm": 0.948053216671403, "learning_rate": 2.3322995062751372e-08, "loss": 0.6615588665008545, "step": 8113 }, { "epoch": 1.8695852534562212, "grad_norm": 1.1794145616088556, "learning_rate": 2.324126345001376e-08, "loss": 0.7748852968215942, "step": 8114 }, { "epoch": 1.8698156682027651, "grad_norm": 1.146675047414541, "learning_rate": 2.3159673612513587e-08, "loss": 0.7238468527793884, "step": 8115 }, { "epoch": 1.8700460829493086, "grad_norm": 1.2843830020573481, "learning_rate": 2.3078225562093822e-08, "loss": 0.8146705627441406, "step": 8116 }, { "epoch": 1.8702764976958526, "grad_norm": 1.0747488287412188, "learning_rate": 2.2996919310576235e-08, "loss": 0.8393594026565552, "step": 8117 }, { "epoch": 1.8705069124423963, "grad_norm": 1.6346887094004536, "learning_rate": 2.2915754869762384e-08, "loss": 0.9619652032852173, "step": 8118 }, { "epoch": 1.87073732718894, "grad_norm": 1.6641290836048537, "learning_rate": 2.2834732251433286e-08, "loss": 0.8301321268081665, "step": 8119 }, { "epoch": 1.870967741935484, "grad_norm": 1.2687107297135523, "learning_rate": 2.2753851467349206e-08, "loss": 0.8236079812049866, "step": 8120 }, { "epoch": 1.8711981566820275, "grad_norm": 1.430457986003777, "learning_rate": 2.267311252924975e-08, "loss": 0.9007565379142761, "step": 8121 }, { "epoch": 1.8714285714285714, "grad_norm": 1.1827948115854126, "learning_rate": 2.2592515448854432e-08, "loss": 0.7430707216262817, "step": 8122 }, { "epoch": 1.8716589861751152, "grad_norm": 1.17432989990484, "learning_rate": 2.2512060237861452e-08, "loss": 0.7562465667724609, "step": 8123 }, { "epoch": 1.871889400921659, "grad_norm": 1.1839994711227122, "learning_rate": 2.24317469079488e-08, "loss": 0.7736096978187561, "step": 8124 }, { "epoch": 1.8721198156682028, "grad_norm": 1.1809968020267403, "learning_rate": 2.2351575470774153e-08, "loss": 0.7652724981307983, "step": 8125 }, { "epoch": 1.8723502304147466, "grad_norm": 1.4664554269524215, "learning_rate": 2.2271545937973978e-08, "loss": 0.8034792542457581, "step": 8126 }, { "epoch": 1.8725806451612903, "grad_norm": 1.2107856133228136, "learning_rate": 2.219165832116454e-08, "loss": 0.6158101558685303, "step": 8127 }, { "epoch": 1.8728110599078343, "grad_norm": 1.1984460742665393, "learning_rate": 2.2111912631941564e-08, "loss": 0.6514682769775391, "step": 8128 }, { "epoch": 1.8730414746543778, "grad_norm": 1.1090676234846621, "learning_rate": 2.203230888187979e-08, "loss": 0.833041787147522, "step": 8129 }, { "epoch": 1.8732718894009217, "grad_norm": 1.3944148742352294, "learning_rate": 2.1952847082533864e-08, "loss": 0.8033208250999451, "step": 8130 }, { "epoch": 1.8735023041474654, "grad_norm": 1.2067904980609332, "learning_rate": 2.187352724543734e-08, "loss": 0.742051362991333, "step": 8131 }, { "epoch": 1.8737327188940092, "grad_norm": 1.2058964422107643, "learning_rate": 2.1794349382103337e-08, "loss": 0.7411169409751892, "step": 8132 }, { "epoch": 1.8739631336405531, "grad_norm": 1.3201479261882787, "learning_rate": 2.171531350402467e-08, "loss": 0.7517165541648865, "step": 8133 }, { "epoch": 1.8741935483870966, "grad_norm": 1.2371172479380752, "learning_rate": 2.1636419622673263e-08, "loss": 0.8010021448135376, "step": 8134 }, { "epoch": 1.8744239631336406, "grad_norm": 1.2501522956166489, "learning_rate": 2.1557667749500187e-08, "loss": 0.7265241742134094, "step": 8135 }, { "epoch": 1.8746543778801843, "grad_norm": 1.191380870353666, "learning_rate": 2.1479057895936403e-08, "loss": 0.6809227466583252, "step": 8136 }, { "epoch": 1.874884792626728, "grad_norm": 1.2737037893770147, "learning_rate": 2.140059007339201e-08, "loss": 0.8235769271850586, "step": 8137 }, { "epoch": 1.875115207373272, "grad_norm": 1.1356268338575812, "learning_rate": 2.132226429325634e-08, "loss": 0.7556289434432983, "step": 8138 }, { "epoch": 1.8753456221198157, "grad_norm": 1.257264783564694, "learning_rate": 2.1244080566898638e-08, "loss": 0.7765048742294312, "step": 8139 }, { "epoch": 1.8755760368663594, "grad_norm": 1.1776465139256578, "learning_rate": 2.1166038905666816e-08, "loss": 0.7637666463851929, "step": 8140 }, { "epoch": 1.8758064516129034, "grad_norm": 1.2471130614608452, "learning_rate": 2.10881393208886e-08, "loss": 0.8413453698158264, "step": 8141 }, { "epoch": 1.8760368663594469, "grad_norm": 1.443351972543058, "learning_rate": 2.101038182387105e-08, "loss": 0.7937475442886353, "step": 8142 }, { "epoch": 1.8762672811059908, "grad_norm": 1.1772607773578063, "learning_rate": 2.0932766425900585e-08, "loss": 0.7654982805252075, "step": 8143 }, { "epoch": 1.8764976958525346, "grad_norm": 1.53397176108589, "learning_rate": 2.0855293138242968e-08, "loss": 0.8950663805007935, "step": 8144 }, { "epoch": 1.8767281105990783, "grad_norm": 1.250929142335872, "learning_rate": 2.077796197214332e-08, "loss": 0.6405420303344727, "step": 8145 }, { "epoch": 1.8769585253456222, "grad_norm": 1.085136655013558, "learning_rate": 2.0700772938826217e-08, "loss": 0.7724314332008362, "step": 8146 }, { "epoch": 1.8771889400921657, "grad_norm": 1.09160242748488, "learning_rate": 2.0623726049495472e-08, "loss": 0.7929061651229858, "step": 8147 }, { "epoch": 1.8774193548387097, "grad_norm": 1.0975195498555617, "learning_rate": 2.0546821315334363e-08, "loss": 0.7207096815109253, "step": 8148 }, { "epoch": 1.8776497695852534, "grad_norm": 1.347240880442127, "learning_rate": 2.0470058747505513e-08, "loss": 0.9234127402305603, "step": 8149 }, { "epoch": 1.8778801843317972, "grad_norm": 1.2189429089634525, "learning_rate": 2.0393438357150906e-08, "loss": 0.9006322026252747, "step": 8150 }, { "epoch": 1.878110599078341, "grad_norm": 0.9863507376975118, "learning_rate": 2.0316960155391972e-08, "loss": 0.6289799809455872, "step": 8151 }, { "epoch": 1.8783410138248848, "grad_norm": 1.117182475586666, "learning_rate": 2.0240624153329168e-08, "loss": 0.8551793098449707, "step": 8152 }, { "epoch": 1.8785714285714286, "grad_norm": 1.1253834649892556, "learning_rate": 2.016443036204285e-08, "loss": 0.8065170645713806, "step": 8153 }, { "epoch": 1.8788018433179725, "grad_norm": 1.0124272640628642, "learning_rate": 2.0088378792592286e-08, "loss": 0.6361274719238281, "step": 8154 }, { "epoch": 1.879032258064516, "grad_norm": 1.3966308966349001, "learning_rate": 2.0012469456016312e-08, "loss": 0.8539700508117676, "step": 8155 }, { "epoch": 1.87926267281106, "grad_norm": 1.380681857214056, "learning_rate": 1.9936702363333115e-08, "loss": 0.7424989938735962, "step": 8156 }, { "epoch": 1.8794930875576037, "grad_norm": 1.0795560964001287, "learning_rate": 1.9861077525540116e-08, "loss": 0.5831520557403564, "step": 8157 }, { "epoch": 1.8797235023041474, "grad_norm": 1.3034651332513367, "learning_rate": 1.9785594953614093e-08, "loss": 0.8080646991729736, "step": 8158 }, { "epoch": 1.8799539170506914, "grad_norm": 1.3028494466110516, "learning_rate": 1.9710254658511392e-08, "loss": 0.8008537292480469, "step": 8159 }, { "epoch": 1.8801843317972349, "grad_norm": 0.7838996508063781, "learning_rate": 1.9635056651167492e-08, "loss": 0.7317294478416443, "step": 8160 }, { "epoch": 1.8804147465437788, "grad_norm": 1.240068145392807, "learning_rate": 1.956000094249721e-08, "loss": 0.803238034248352, "step": 8161 }, { "epoch": 1.8806451612903226, "grad_norm": 1.1592302203633778, "learning_rate": 1.948508754339506e-08, "loss": 0.7202219367027283, "step": 8162 }, { "epoch": 1.8808755760368663, "grad_norm": 1.3406292816176746, "learning_rate": 1.9410316464734233e-08, "loss": 0.7691160440444946, "step": 8163 }, { "epoch": 1.8811059907834102, "grad_norm": 1.0898220168427848, "learning_rate": 1.933568771736782e-08, "loss": 0.7092962265014648, "step": 8164 }, { "epoch": 1.881336405529954, "grad_norm": 1.3165421464208054, "learning_rate": 1.9261201312128274e-08, "loss": 0.819804310798645, "step": 8165 }, { "epoch": 1.8815668202764977, "grad_norm": 1.2278633726487793, "learning_rate": 1.918685725982694e-08, "loss": 0.9127538204193115, "step": 8166 }, { "epoch": 1.8817972350230416, "grad_norm": 1.198181344272901, "learning_rate": 1.9112655571254855e-08, "loss": 0.8023328185081482, "step": 8167 }, { "epoch": 1.8820276497695851, "grad_norm": 1.1150363141436184, "learning_rate": 1.903859625718218e-08, "loss": 0.723065972328186, "step": 8168 }, { "epoch": 1.882258064516129, "grad_norm": 1.329775802249569, "learning_rate": 1.896467932835877e-08, "loss": 0.7838670611381531, "step": 8169 }, { "epoch": 1.8824884792626728, "grad_norm": 1.0221481880663403, "learning_rate": 1.8890904795513475e-08, "loss": 0.6029871702194214, "step": 8170 }, { "epoch": 1.8827188940092165, "grad_norm": 1.1179619592038208, "learning_rate": 1.8817272669354512e-08, "loss": 0.7622933387756348, "step": 8171 }, { "epoch": 1.8829493087557605, "grad_norm": 1.3471730261003036, "learning_rate": 1.8743782960569444e-08, "loss": 0.7702913284301758, "step": 8172 }, { "epoch": 1.883179723502304, "grad_norm": 1.1115192812221177, "learning_rate": 1.867043567982518e-08, "loss": 0.6385080814361572, "step": 8173 }, { "epoch": 1.883410138248848, "grad_norm": 1.1957117872616694, "learning_rate": 1.8597230837768208e-08, "loss": 0.6886409521102905, "step": 8174 }, { "epoch": 1.8836405529953917, "grad_norm": 1.2615274538141057, "learning_rate": 1.8524168445023803e-08, "loss": 0.7697125673294067, "step": 8175 }, { "epoch": 1.8838709677419354, "grad_norm": 1.2703572064059772, "learning_rate": 1.8451248512197148e-08, "loss": 0.7942332029342651, "step": 8176 }, { "epoch": 1.8841013824884794, "grad_norm": 1.2486681210000266, "learning_rate": 1.8378471049872445e-08, "loss": 0.7751410007476807, "step": 8177 }, { "epoch": 1.884331797235023, "grad_norm": 1.4135289386452112, "learning_rate": 1.8305836068613023e-08, "loss": 0.8650992512702942, "step": 8178 }, { "epoch": 1.8845622119815668, "grad_norm": 1.255590367160678, "learning_rate": 1.8233343578962e-08, "loss": 0.7084495425224304, "step": 8179 }, { "epoch": 1.8847926267281108, "grad_norm": 1.2065933395861381, "learning_rate": 1.8160993591441408e-08, "loss": 0.7428494691848755, "step": 8180 }, { "epoch": 1.8850230414746543, "grad_norm": 1.2721568643853003, "learning_rate": 1.8088786116552844e-08, "loss": 0.7431809902191162, "step": 8181 }, { "epoch": 1.8852534562211982, "grad_norm": 1.5234831289492186, "learning_rate": 1.801672116477715e-08, "loss": 0.8312518000602722, "step": 8182 }, { "epoch": 1.885483870967742, "grad_norm": 1.412977003038852, "learning_rate": 1.7944798746574285e-08, "loss": 0.8574832081794739, "step": 8183 }, { "epoch": 1.8857142857142857, "grad_norm": 1.209006694724365, "learning_rate": 1.7873018872383793e-08, "loss": 0.7716966867446899, "step": 8184 }, { "epoch": 1.8859447004608296, "grad_norm": 1.1984291768693995, "learning_rate": 1.780138155262456e-08, "loss": 0.8536000847816467, "step": 8185 }, { "epoch": 1.8861751152073731, "grad_norm": 1.4411910829910872, "learning_rate": 1.7729886797694606e-08, "loss": 0.6559889316558838, "step": 8186 }, { "epoch": 1.886405529953917, "grad_norm": 1.4146541158068258, "learning_rate": 1.7658534617971065e-08, "loss": 0.7371512651443481, "step": 8187 }, { "epoch": 1.8866359447004608, "grad_norm": 1.5920989952321163, "learning_rate": 1.7587325023810773e-08, "loss": 0.8092008829116821, "step": 8188 }, { "epoch": 1.8868663594470045, "grad_norm": 1.1485577131831675, "learning_rate": 1.751625802554979e-08, "loss": 0.7793067693710327, "step": 8189 }, { "epoch": 1.8870967741935485, "grad_norm": 1.3107398360408737, "learning_rate": 1.7445333633503312e-08, "loss": 0.8102752566337585, "step": 8190 }, { "epoch": 1.8873271889400922, "grad_norm": 0.9411355693415201, "learning_rate": 1.737455185796588e-08, "loss": 0.7141490578651428, "step": 8191 }, { "epoch": 1.887557603686636, "grad_norm": 1.3771499753857814, "learning_rate": 1.7303912709211497e-08, "loss": 0.8010870218276978, "step": 8192 }, { "epoch": 1.8877880184331797, "grad_norm": 1.0040229371574219, "learning_rate": 1.723341619749319e-08, "loss": 0.7945431470870972, "step": 8193 }, { "epoch": 1.8880184331797234, "grad_norm": 1.5084700431378903, "learning_rate": 1.7163062333043544e-08, "loss": 0.765398383140564, "step": 8194 }, { "epoch": 1.8882488479262673, "grad_norm": 1.141763186710756, "learning_rate": 1.709285112607428e-08, "loss": 0.8645910024642944, "step": 8195 }, { "epoch": 1.888479262672811, "grad_norm": 1.4294051802947438, "learning_rate": 1.7022782586776363e-08, "loss": 0.7650351524353027, "step": 8196 }, { "epoch": 1.8887096774193548, "grad_norm": 1.148441042244908, "learning_rate": 1.695285672532043e-08, "loss": 0.8059902191162109, "step": 8197 }, { "epoch": 1.8889400921658988, "grad_norm": 1.3019488561633756, "learning_rate": 1.688307355185592e-08, "loss": 0.8389305472373962, "step": 8198 }, { "epoch": 1.8891705069124423, "grad_norm": 1.3363862822981094, "learning_rate": 1.681343307651173e-08, "loss": 0.755578875541687, "step": 8199 }, { "epoch": 1.8894009216589862, "grad_norm": 1.2754809499843205, "learning_rate": 1.6743935309396218e-08, "loss": 0.822825014591217, "step": 8200 }, { "epoch": 1.88963133640553, "grad_norm": 1.2571266177044025, "learning_rate": 1.667458026059676e-08, "loss": 0.8229342699050903, "step": 8201 }, { "epoch": 1.8898617511520737, "grad_norm": 1.3086181916191966, "learning_rate": 1.6605367940180303e-08, "loss": 0.7142254114151001, "step": 8202 }, { "epoch": 1.8900921658986176, "grad_norm": 1.1722391698259569, "learning_rate": 1.6536298358192812e-08, "loss": 0.8904600739479065, "step": 8203 }, { "epoch": 1.8903225806451613, "grad_norm": 1.151403763105922, "learning_rate": 1.6467371524659603e-08, "loss": 0.8758517503738403, "step": 8204 }, { "epoch": 1.890552995391705, "grad_norm": 1.3083947750625244, "learning_rate": 1.6398587449585555e-08, "loss": 0.7609111666679382, "step": 8205 }, { "epoch": 1.8907834101382488, "grad_norm": 0.9406449994318669, "learning_rate": 1.6329946142954353e-08, "loss": 0.8177064657211304, "step": 8206 }, { "epoch": 1.8910138248847925, "grad_norm": 1.1366142550146048, "learning_rate": 1.626144761472925e-08, "loss": 0.6342105865478516, "step": 8207 }, { "epoch": 1.8912442396313365, "grad_norm": 0.8903675484312013, "learning_rate": 1.6193091874852627e-08, "loss": 0.6025499105453491, "step": 8208 }, { "epoch": 1.8914746543778802, "grad_norm": 1.3017839387858507, "learning_rate": 1.6124878933246543e-08, "loss": 0.78373783826828, "step": 8209 }, { "epoch": 1.891705069124424, "grad_norm": 1.336095893979754, "learning_rate": 1.605680879981164e-08, "loss": 0.8072086572647095, "step": 8210 }, { "epoch": 1.8919354838709679, "grad_norm": 1.5597980072939257, "learning_rate": 1.5988881484428453e-08, "loss": 0.9057372212409973, "step": 8211 }, { "epoch": 1.8921658986175114, "grad_norm": 1.2099616448625954, "learning_rate": 1.592109699695643e-08, "loss": 0.8235929012298584, "step": 8212 }, { "epoch": 1.8923963133640553, "grad_norm": 1.2417707847492958, "learning_rate": 1.5853455347234366e-08, "loss": 0.6610825061798096, "step": 8213 }, { "epoch": 1.892626728110599, "grad_norm": 1.4158986087253451, "learning_rate": 1.5785956545080415e-08, "loss": 0.7152366638183594, "step": 8214 }, { "epoch": 1.8928571428571428, "grad_norm": 1.330885873092923, "learning_rate": 1.5718600600292066e-08, "loss": 0.7971903085708618, "step": 8215 }, { "epoch": 1.8930875576036867, "grad_norm": 1.226467557812747, "learning_rate": 1.565138752264572e-08, "loss": 0.7639449238777161, "step": 8216 }, { "epoch": 1.8933179723502302, "grad_norm": 1.0517976072639703, "learning_rate": 1.5584317321897356e-08, "loss": 0.6396117806434631, "step": 8217 }, { "epoch": 1.8935483870967742, "grad_norm": 1.328962567982178, "learning_rate": 1.5517390007782183e-08, "loss": 0.790566086769104, "step": 8218 }, { "epoch": 1.893778801843318, "grad_norm": 1.6769404862380202, "learning_rate": 1.5450605590014544e-08, "loss": 0.7948310971260071, "step": 8219 }, { "epoch": 1.8940092165898617, "grad_norm": 1.2378052027269906, "learning_rate": 1.5383964078288124e-08, "loss": 0.9425654411315918, "step": 8220 }, { "epoch": 1.8942396313364056, "grad_norm": 1.2441112834124675, "learning_rate": 1.531746548227586e-08, "loss": 0.8001678586006165, "step": 8221 }, { "epoch": 1.8944700460829493, "grad_norm": 0.9072642646135723, "learning_rate": 1.5251109811629915e-08, "loss": 0.6636781692504883, "step": 8222 }, { "epoch": 1.894700460829493, "grad_norm": 1.0313464437335311, "learning_rate": 1.5184897075981807e-08, "loss": 0.7884416580200195, "step": 8223 }, { "epoch": 1.894930875576037, "grad_norm": 1.0907885139753422, "learning_rate": 1.511882728494218e-08, "loss": 0.6888208389282227, "step": 8224 }, { "epoch": 1.8951612903225805, "grad_norm": 1.3461823033287323, "learning_rate": 1.5052900448100815e-08, "loss": 0.7253614664077759, "step": 8225 }, { "epoch": 1.8953917050691245, "grad_norm": 1.2272377599078015, "learning_rate": 1.498711657502716e-08, "loss": 0.7865983843803406, "step": 8226 }, { "epoch": 1.8956221198156682, "grad_norm": 1.4908955714231082, "learning_rate": 1.492147567526947e-08, "loss": 0.8778063654899597, "step": 8227 }, { "epoch": 1.895852534562212, "grad_norm": 1.2263224402103408, "learning_rate": 1.4855977758355675e-08, "loss": 0.7812581062316895, "step": 8228 }, { "epoch": 1.8960829493087559, "grad_norm": 1.2890011409819144, "learning_rate": 1.4790622833792287e-08, "loss": 0.7160226106643677, "step": 8229 }, { "epoch": 1.8963133640552994, "grad_norm": 1.1613199880989007, "learning_rate": 1.472541091106594e-08, "loss": 0.8187412619590759, "step": 8230 }, { "epoch": 1.8965437788018433, "grad_norm": 1.1653251647412382, "learning_rate": 1.4660341999641834e-08, "loss": 0.7517846822738647, "step": 8231 }, { "epoch": 1.896774193548387, "grad_norm": 1.3673338656755198, "learning_rate": 1.4595416108964753e-08, "loss": 0.9230127334594727, "step": 8232 }, { "epoch": 1.8970046082949308, "grad_norm": 1.228175308993719, "learning_rate": 1.4530633248458269e-08, "loss": 0.6803582906723022, "step": 8233 }, { "epoch": 1.8972350230414747, "grad_norm": 1.2890219242119376, "learning_rate": 1.4465993427525968e-08, "loss": 0.8444511294364929, "step": 8234 }, { "epoch": 1.8974654377880185, "grad_norm": 1.4479761110450609, "learning_rate": 1.4401496655550016e-08, "loss": 0.7622519731521606, "step": 8235 }, { "epoch": 1.8976958525345622, "grad_norm": 1.20875065982799, "learning_rate": 1.4337142941892033e-08, "loss": 0.687129020690918, "step": 8236 }, { "epoch": 1.8979262672811061, "grad_norm": 1.1827775538431895, "learning_rate": 1.4272932295892992e-08, "loss": 0.6421219110488892, "step": 8237 }, { "epoch": 1.8981566820276496, "grad_norm": 1.2669401147896007, "learning_rate": 1.4208864726872772e-08, "loss": 0.7829388380050659, "step": 8238 }, { "epoch": 1.8983870967741936, "grad_norm": 1.3482974956529734, "learning_rate": 1.4144940244130821e-08, "loss": 0.7754424810409546, "step": 8239 }, { "epoch": 1.8986175115207373, "grad_norm": 1.1130898544931584, "learning_rate": 1.4081158856945719e-08, "loss": 0.6544859409332275, "step": 8240 }, { "epoch": 1.898847926267281, "grad_norm": 1.0822240775455856, "learning_rate": 1.4017520574575282e-08, "loss": 0.8020427227020264, "step": 8241 }, { "epoch": 1.899078341013825, "grad_norm": 1.1350657169907092, "learning_rate": 1.3954025406256343e-08, "loss": 0.7343212366104126, "step": 8242 }, { "epoch": 1.8993087557603685, "grad_norm": 1.2792336145941459, "learning_rate": 1.3890673361205418e-08, "loss": 0.7643232345581055, "step": 8243 }, { "epoch": 1.8995391705069125, "grad_norm": 1.212662168320899, "learning_rate": 1.3827464448617709e-08, "loss": 0.7806165814399719, "step": 8244 }, { "epoch": 1.8997695852534562, "grad_norm": 1.6104194734157218, "learning_rate": 1.3764398677667988e-08, "loss": 0.8533280491828918, "step": 8245 }, { "epoch": 1.9, "grad_norm": 1.1289941083869026, "learning_rate": 1.3701476057510264e-08, "loss": 0.773565411567688, "step": 8246 }, { "epoch": 1.9002304147465439, "grad_norm": 1.1091300492504157, "learning_rate": 1.3638696597277677e-08, "loss": 0.7752503752708435, "step": 8247 }, { "epoch": 1.9004608294930876, "grad_norm": 0.9880656776459645, "learning_rate": 1.3576060306082383e-08, "loss": 0.7466747760772705, "step": 8248 }, { "epoch": 1.9006912442396313, "grad_norm": 1.2177337280417093, "learning_rate": 1.3513567193016106e-08, "loss": 0.8103033304214478, "step": 8249 }, { "epoch": 1.9009216589861753, "grad_norm": 1.0248826665714235, "learning_rate": 1.3451217267149595e-08, "loss": 0.6501287817955017, "step": 8250 }, { "epoch": 1.9011520737327188, "grad_norm": 1.210107770730306, "learning_rate": 1.3389010537532941e-08, "loss": 0.7329230308532715, "step": 8251 }, { "epoch": 1.9013824884792627, "grad_norm": 1.3978474783131303, "learning_rate": 1.3326947013195255e-08, "loss": 0.8413917422294617, "step": 8252 }, { "epoch": 1.9016129032258065, "grad_norm": 1.4081927433558092, "learning_rate": 1.3265026703144999e-08, "loss": 0.7283090353012085, "step": 8253 }, { "epoch": 1.9018433179723502, "grad_norm": 1.2553133709092965, "learning_rate": 1.3203249616369872e-08, "loss": 0.8378126621246338, "step": 8254 }, { "epoch": 1.9020737327188941, "grad_norm": 1.099276496142028, "learning_rate": 1.3141615761836811e-08, "loss": 0.7675777673721313, "step": 8255 }, { "epoch": 1.9023041474654376, "grad_norm": 1.6916159414604328, "learning_rate": 1.308012514849155e-08, "loss": 0.6448104381561279, "step": 8256 }, { "epoch": 1.9025345622119816, "grad_norm": 1.3264486635424506, "learning_rate": 1.3018777785259838e-08, "loss": 0.8024395704269409, "step": 8257 }, { "epoch": 1.9027649769585253, "grad_norm": 1.1900370575281645, "learning_rate": 1.2957573681045887e-08, "loss": 0.8159325122833252, "step": 8258 }, { "epoch": 1.902995391705069, "grad_norm": 1.1100937535082447, "learning_rate": 1.2896512844733365e-08, "loss": 0.7916233539581299, "step": 8259 }, { "epoch": 1.903225806451613, "grad_norm": 1.2408177778484295, "learning_rate": 1.2835595285185296e-08, "loss": 0.798140823841095, "step": 8260 }, { "epoch": 1.9034562211981567, "grad_norm": 1.2142666252173266, "learning_rate": 1.277482101124383e-08, "loss": 0.7881651520729065, "step": 8261 }, { "epoch": 1.9036866359447004, "grad_norm": 1.3615775077613546, "learning_rate": 1.2714190031730021e-08, "loss": 0.7023189663887024, "step": 8262 }, { "epoch": 1.9039170506912444, "grad_norm": 1.2537620544817238, "learning_rate": 1.2653702355444606e-08, "loss": 0.8286309242248535, "step": 8263 }, { "epoch": 1.904147465437788, "grad_norm": 1.4181409914325045, "learning_rate": 1.259335799116723e-08, "loss": 0.7626973986625671, "step": 8264 }, { "epoch": 1.9043778801843319, "grad_norm": 1.7640804361655256, "learning_rate": 1.2533156947656665e-08, "loss": 1.0350267887115479, "step": 8265 }, { "epoch": 1.9046082949308756, "grad_norm": 1.0808972871053977, "learning_rate": 1.2473099233651251e-08, "loss": 0.6378228664398193, "step": 8266 }, { "epoch": 1.9048387096774193, "grad_norm": 1.1012549826430145, "learning_rate": 1.2413184857868241e-08, "loss": 0.8265732526779175, "step": 8267 }, { "epoch": 1.9050691244239633, "grad_norm": 1.102740322591124, "learning_rate": 1.23534138290039e-08, "loss": 0.8545348644256592, "step": 8268 }, { "epoch": 1.9052995391705068, "grad_norm": 1.1667419775790697, "learning_rate": 1.2293786155734176e-08, "loss": 0.660080075263977, "step": 8269 }, { "epoch": 1.9055299539170507, "grad_norm": 1.4258566183231558, "learning_rate": 1.2234301846713813e-08, "loss": 0.8409689664840698, "step": 8270 }, { "epoch": 1.9057603686635944, "grad_norm": 1.3639053971310304, "learning_rate": 1.2174960910576904e-08, "loss": 0.8026434183120728, "step": 8271 }, { "epoch": 1.9059907834101382, "grad_norm": 1.1477802786886386, "learning_rate": 1.2115763355936671e-08, "loss": 0.8315812945365906, "step": 8272 }, { "epoch": 1.9062211981566821, "grad_norm": 1.1488868543504023, "learning_rate": 1.2056709191385572e-08, "loss": 0.7373194694519043, "step": 8273 }, { "epoch": 1.9064516129032258, "grad_norm": 1.28219548502893, "learning_rate": 1.1997798425495309e-08, "loss": 0.7502317428588867, "step": 8274 }, { "epoch": 1.9066820276497696, "grad_norm": 1.1940555150789485, "learning_rate": 1.1939031066816707e-08, "loss": 0.8208760023117065, "step": 8275 }, { "epoch": 1.9069124423963135, "grad_norm": 1.2690336009694645, "learning_rate": 1.188040712387961e-08, "loss": 0.7584094405174255, "step": 8276 }, { "epoch": 1.907142857142857, "grad_norm": 1.3136164329476003, "learning_rate": 1.1821926605193433e-08, "loss": 0.7776647210121155, "step": 8277 }, { "epoch": 1.907373271889401, "grad_norm": 1.0778088332238458, "learning_rate": 1.1763589519246387e-08, "loss": 0.7739659547805786, "step": 8278 }, { "epoch": 1.9076036866359447, "grad_norm": 1.3752880267959628, "learning_rate": 1.170539587450603e-08, "loss": 0.7276068925857544, "step": 8279 }, { "epoch": 1.9078341013824884, "grad_norm": 1.1782987713077362, "learning_rate": 1.1647345679419163e-08, "loss": 0.624208927154541, "step": 8280 }, { "epoch": 1.9080645161290324, "grad_norm": 1.0744404873031923, "learning_rate": 1.1589438942411712e-08, "loss": 0.7865229845046997, "step": 8281 }, { "epoch": 1.908294930875576, "grad_norm": 1.1655122856650737, "learning_rate": 1.1531675671888619e-08, "loss": 0.8290715217590332, "step": 8282 }, { "epoch": 1.9085253456221198, "grad_norm": 1.4733922787626827, "learning_rate": 1.1474055876234289e-08, "loss": 0.8750064969062805, "step": 8283 }, { "epoch": 1.9087557603686636, "grad_norm": 1.0358743027064434, "learning_rate": 1.1416579563812146e-08, "loss": 0.7946900129318237, "step": 8284 }, { "epoch": 1.9089861751152073, "grad_norm": 1.1260650941834194, "learning_rate": 1.1359246742964623e-08, "loss": 0.6673855781555176, "step": 8285 }, { "epoch": 1.9092165898617512, "grad_norm": 1.5734371068415847, "learning_rate": 1.1302057422013734e-08, "loss": 0.8423609137535095, "step": 8286 }, { "epoch": 1.909447004608295, "grad_norm": 1.1774099615686673, "learning_rate": 1.124501160926039e-08, "loss": 0.7583299279212952, "step": 8287 }, { "epoch": 1.9096774193548387, "grad_norm": 1.3632188021099019, "learning_rate": 1.1188109312984639e-08, "loss": 0.8489730358123779, "step": 8288 }, { "epoch": 1.9099078341013827, "grad_norm": 1.268317857067217, "learning_rate": 1.1131350541445871e-08, "loss": 0.7460636496543884, "step": 8289 }, { "epoch": 1.9101382488479262, "grad_norm": 1.1951667787690143, "learning_rate": 1.1074735302882387e-08, "loss": 0.7310905456542969, "step": 8290 }, { "epoch": 1.91036866359447, "grad_norm": 1.1692661015812214, "learning_rate": 1.1018263605511946e-08, "loss": 0.8411405086517334, "step": 8291 }, { "epoch": 1.9105990783410138, "grad_norm": 1.12451343736832, "learning_rate": 1.0961935457531323e-08, "loss": 0.7980802059173584, "step": 8292 }, { "epoch": 1.9108294930875576, "grad_norm": 1.2914760603674136, "learning_rate": 1.0905750867116426e-08, "loss": 0.779492974281311, "step": 8293 }, { "epoch": 1.9110599078341015, "grad_norm": 1.0940139924335759, "learning_rate": 1.0849709842422283e-08, "loss": 0.7893733978271484, "step": 8294 }, { "epoch": 1.911290322580645, "grad_norm": 1.367510888792546, "learning_rate": 1.07938123915835e-08, "loss": 0.8281872272491455, "step": 8295 }, { "epoch": 1.911520737327189, "grad_norm": 1.3626141199750628, "learning_rate": 1.0738058522713144e-08, "loss": 0.721331775188446, "step": 8296 }, { "epoch": 1.9117511520737327, "grad_norm": 0.9302233955509024, "learning_rate": 1.0682448243904073e-08, "loss": 0.6043491363525391, "step": 8297 }, { "epoch": 1.9119815668202764, "grad_norm": 1.002380139729753, "learning_rate": 1.0626981563227943e-08, "loss": 0.7737481594085693, "step": 8298 }, { "epoch": 1.9122119815668204, "grad_norm": 1.20563258082351, "learning_rate": 1.0571658488735536e-08, "loss": 0.771499514579773, "step": 8299 }, { "epoch": 1.912442396313364, "grad_norm": 1.1334287395884057, "learning_rate": 1.0516479028457204e-08, "loss": 0.6711971759796143, "step": 8300 }, { "epoch": 1.9126728110599078, "grad_norm": 1.1514161835446617, "learning_rate": 1.0461443190402097e-08, "loss": 0.691685140132904, "step": 8301 }, { "epoch": 1.9129032258064518, "grad_norm": 1.0627327279898275, "learning_rate": 1.0406550982558382e-08, "loss": 0.7339159250259399, "step": 8302 }, { "epoch": 1.9131336405529953, "grad_norm": 1.098827920572517, "learning_rate": 1.0351802412893796e-08, "loss": 0.7832008600234985, "step": 8303 }, { "epoch": 1.9133640552995392, "grad_norm": 1.8976948304927823, "learning_rate": 1.0297197489355092e-08, "loss": 0.862671971321106, "step": 8304 }, { "epoch": 1.913594470046083, "grad_norm": 1.2340137918284608, "learning_rate": 1.0242736219867821e-08, "loss": 0.6442357897758484, "step": 8305 }, { "epoch": 1.9138248847926267, "grad_norm": 1.3262423414476558, "learning_rate": 1.0188418612337102e-08, "loss": 0.8777452707290649, "step": 8306 }, { "epoch": 1.9140552995391706, "grad_norm": 1.2308393583128812, "learning_rate": 1.0134244674647186e-08, "loss": 0.7672470808029175, "step": 8307 }, { "epoch": 1.9142857142857141, "grad_norm": 0.9277990008899878, "learning_rate": 1.0080214414661226e-08, "loss": 0.7338177561759949, "step": 8308 }, { "epoch": 1.914516129032258, "grad_norm": 1.3815065909330264, "learning_rate": 1.0026327840221727e-08, "loss": 0.7546414136886597, "step": 8309 }, { "epoch": 1.9147465437788018, "grad_norm": 1.0116807626508924, "learning_rate": 9.972584959149988e-09, "loss": 0.621455192565918, "step": 8310 }, { "epoch": 1.9149769585253456, "grad_norm": 1.0385626369203964, "learning_rate": 9.918985779247102e-09, "loss": 0.7403131723403931, "step": 8311 }, { "epoch": 1.9152073732718895, "grad_norm": 1.1027069898803628, "learning_rate": 9.865530308292624e-09, "loss": 0.7924279570579529, "step": 8312 }, { "epoch": 1.9154377880184332, "grad_norm": 1.1362295208393791, "learning_rate": 9.81221855404568e-09, "loss": 0.8831228017807007, "step": 8313 }, { "epoch": 1.915668202764977, "grad_norm": 1.1281945792188444, "learning_rate": 9.759050524244417e-09, "loss": 0.6786219477653503, "step": 8314 }, { "epoch": 1.9158986175115207, "grad_norm": 1.2807157366480393, "learning_rate": 9.70602622660599e-09, "loss": 0.7311046123504639, "step": 8315 }, { "epoch": 1.9161290322580644, "grad_norm": 1.3847340573145779, "learning_rate": 9.653145668826912e-09, "loss": 0.8914301991462708, "step": 8316 }, { "epoch": 1.9163594470046084, "grad_norm": 1.4027670914288322, "learning_rate": 9.600408858582709e-09, "loss": 0.8144292831420898, "step": 8317 }, { "epoch": 1.916589861751152, "grad_norm": 1.1077379444431534, "learning_rate": 9.547815803528036e-09, "loss": 0.6670823097229004, "step": 8318 }, { "epoch": 1.9168202764976958, "grad_norm": 1.2434106495167774, "learning_rate": 9.495366511296676e-09, "loss": 0.6801552772521973, "step": 8319 }, { "epoch": 1.9170506912442398, "grad_norm": 1.0098918722618904, "learning_rate": 9.44306098950165e-09, "loss": 0.8144240379333496, "step": 8320 }, { "epoch": 1.9172811059907833, "grad_norm": 1.0515221920732627, "learning_rate": 9.390899245734995e-09, "loss": 0.6352888345718384, "step": 8321 }, { "epoch": 1.9175115207373272, "grad_norm": 1.2296941092807456, "learning_rate": 9.33888128756788e-09, "loss": 0.7513711452484131, "step": 8322 }, { "epoch": 1.917741935483871, "grad_norm": 1.4377668264686976, "learning_rate": 9.287007122550705e-09, "loss": 0.7699171304702759, "step": 8323 }, { "epoch": 1.9179723502304147, "grad_norm": 1.591632209718944, "learning_rate": 9.235276758212895e-09, "loss": 0.8321002721786499, "step": 8324 }, { "epoch": 1.9182027649769586, "grad_norm": 1.0453744404830132, "learning_rate": 9.183690202062999e-09, "loss": 0.6815298795700073, "step": 8325 }, { "epoch": 1.9184331797235024, "grad_norm": 1.0030633247337575, "learning_rate": 9.132247461588915e-09, "loss": 0.7135178446769714, "step": 8326 }, { "epoch": 1.918663594470046, "grad_norm": 1.3123190228023687, "learning_rate": 9.080948544257338e-09, "loss": 0.8452005982398987, "step": 8327 }, { "epoch": 1.9188940092165898, "grad_norm": 1.1270879003396566, "learning_rate": 9.029793457514312e-09, "loss": 0.7449440956115723, "step": 8328 }, { "epoch": 1.9191244239631335, "grad_norm": 1.2310904327231214, "learning_rate": 8.978782208784897e-09, "loss": 0.8172955513000488, "step": 8329 }, { "epoch": 1.9193548387096775, "grad_norm": 1.0097624251077932, "learning_rate": 8.92791480547317e-09, "loss": 0.6682305335998535, "step": 8330 }, { "epoch": 1.9195852534562212, "grad_norm": 1.1974701853493588, "learning_rate": 8.877191254962779e-09, "loss": 0.6874973773956299, "step": 8331 }, { "epoch": 1.919815668202765, "grad_norm": 1.1728345166861331, "learning_rate": 8.826611564615949e-09, "loss": 0.8371694684028625, "step": 8332 }, { "epoch": 1.920046082949309, "grad_norm": 1.1837626119929445, "learning_rate": 8.77617574177425e-09, "loss": 0.7147493362426758, "step": 8333 }, { "epoch": 1.9202764976958524, "grad_norm": 1.2783488550083906, "learning_rate": 8.725883793758382e-09, "loss": 0.7444115877151489, "step": 8334 }, { "epoch": 1.9205069124423964, "grad_norm": 1.3799268170287549, "learning_rate": 8.675735727868283e-09, "loss": 0.7772307395935059, "step": 8335 }, { "epoch": 1.92073732718894, "grad_norm": 1.2730237375907167, "learning_rate": 8.625731551382798e-09, "loss": 0.702937126159668, "step": 8336 }, { "epoch": 1.9209677419354838, "grad_norm": 1.316574939310684, "learning_rate": 8.575871271559898e-09, "loss": 0.7404709458351135, "step": 8337 }, { "epoch": 1.9211981566820278, "grad_norm": 1.4216605594412726, "learning_rate": 8.526154895636906e-09, "loss": 0.7142058610916138, "step": 8338 }, { "epoch": 1.9214285714285713, "grad_norm": 1.381037068322115, "learning_rate": 8.476582430830048e-09, "loss": 0.8950545191764832, "step": 8339 }, { "epoch": 1.9216589861751152, "grad_norm": 1.2364573338693037, "learning_rate": 8.42715388433446e-09, "loss": 0.6939054131507874, "step": 8340 }, { "epoch": 1.921889400921659, "grad_norm": 1.3248307922164142, "learning_rate": 8.377869263324954e-09, "loss": 0.7916324138641357, "step": 8341 }, { "epoch": 1.9221198156682027, "grad_norm": 1.3092539218499513, "learning_rate": 8.328728574954924e-09, "loss": 0.8059754371643066, "step": 8342 }, { "epoch": 1.9223502304147466, "grad_norm": 1.1195879983393067, "learning_rate": 8.279731826357105e-09, "loss": 0.650648295879364, "step": 8343 }, { "epoch": 1.9225806451612903, "grad_norm": 0.9135397053997126, "learning_rate": 8.230879024643478e-09, "loss": 0.6912552118301392, "step": 8344 }, { "epoch": 1.922811059907834, "grad_norm": 0.8588678436998939, "learning_rate": 8.182170176904702e-09, "loss": 0.7430927753448486, "step": 8345 }, { "epoch": 1.923041474654378, "grad_norm": 1.1000327691208154, "learning_rate": 8.133605290210898e-09, "loss": 0.7550772428512573, "step": 8346 }, { "epoch": 1.9232718894009215, "grad_norm": 1.1138393113278757, "learning_rate": 8.08518437161132e-09, "loss": 0.7235819101333618, "step": 8347 }, { "epoch": 1.9235023041474655, "grad_norm": 1.085631464611088, "learning_rate": 8.036907428134121e-09, "loss": 0.790582537651062, "step": 8348 }, { "epoch": 1.9237327188940092, "grad_norm": 1.2928878399763604, "learning_rate": 7.988774466786585e-09, "loss": 0.7350871562957764, "step": 8349 }, { "epoch": 1.923963133640553, "grad_norm": 1.3980478677422172, "learning_rate": 7.940785494555124e-09, "loss": 0.86177659034729, "step": 8350 }, { "epoch": 1.9241935483870969, "grad_norm": 1.196963381013611, "learning_rate": 7.892940518405499e-09, "loss": 0.8039232492446899, "step": 8351 }, { "epoch": 1.9244239631336404, "grad_norm": 1.231295549355971, "learning_rate": 7.845239545282046e-09, "loss": 0.7130967378616333, "step": 8352 }, { "epoch": 1.9246543778801843, "grad_norm": 1.0830506625128473, "learning_rate": 7.797682582108667e-09, "loss": 0.7297911047935486, "step": 8353 }, { "epoch": 1.924884792626728, "grad_norm": 1.2576048144274934, "learning_rate": 7.750269635788065e-09, "loss": 0.7302875518798828, "step": 8354 }, { "epoch": 1.9251152073732718, "grad_norm": 1.1228331103171292, "learning_rate": 7.703000713202401e-09, "loss": 0.7976555824279785, "step": 8355 }, { "epoch": 1.9253456221198157, "grad_norm": 1.1181213613597878, "learning_rate": 7.65587582121252e-09, "loss": 0.6747829914093018, "step": 8356 }, { "epoch": 1.9255760368663595, "grad_norm": 1.3086474559444063, "learning_rate": 7.608894966658509e-09, "loss": 0.7217142581939697, "step": 8357 }, { "epoch": 1.9258064516129032, "grad_norm": 1.3893709396765357, "learning_rate": 7.562058156359685e-09, "loss": 0.8635888695716858, "step": 8358 }, { "epoch": 1.9260368663594472, "grad_norm": 1.3318330118319255, "learning_rate": 7.515365397114282e-09, "loss": 0.8435994386672974, "step": 8359 }, { "epoch": 1.9262672811059907, "grad_norm": 1.4490671236886896, "learning_rate": 7.468816695699653e-09, "loss": 0.8632286787033081, "step": 8360 }, { "epoch": 1.9264976958525346, "grad_norm": 1.501498499241499, "learning_rate": 7.422412058872396e-09, "loss": 0.7916556596755981, "step": 8361 }, { "epoch": 1.9267281105990783, "grad_norm": 1.1808854932681303, "learning_rate": 7.376151493368121e-09, "loss": 0.8307663202285767, "step": 8362 }, { "epoch": 1.926958525345622, "grad_norm": 1.4156996026964064, "learning_rate": 7.330035005901236e-09, "loss": 0.9020388126373291, "step": 8363 }, { "epoch": 1.927188940092166, "grad_norm": 1.222606934693838, "learning_rate": 7.28406260316572e-09, "loss": 0.7926114797592163, "step": 8364 }, { "epoch": 1.9274193548387095, "grad_norm": 1.0417046174216056, "learning_rate": 7.2382342918343446e-09, "loss": 0.7609784603118896, "step": 8365 }, { "epoch": 1.9276497695852535, "grad_norm": 1.3729827404737949, "learning_rate": 7.192550078559012e-09, "loss": 0.6010490655899048, "step": 8366 }, { "epoch": 1.9278801843317972, "grad_norm": 1.495271329234438, "learning_rate": 7.147009969970641e-09, "loss": 0.8219606876373291, "step": 8367 }, { "epoch": 1.928110599078341, "grad_norm": 1.207499145814505, "learning_rate": 7.101613972679499e-09, "loss": 0.8688151836395264, "step": 8368 }, { "epoch": 1.9283410138248849, "grad_norm": 1.0608698410629562, "learning_rate": 7.0563620932747595e-09, "loss": 0.7654411792755127, "step": 8369 }, { "epoch": 1.9285714285714286, "grad_norm": 1.0982841652537483, "learning_rate": 7.01125433832439e-09, "loss": 0.6878413558006287, "step": 8370 }, { "epoch": 1.9288018433179723, "grad_norm": 1.0662803206592244, "learning_rate": 6.966290714375933e-09, "loss": 0.6703332662582397, "step": 8371 }, { "epoch": 1.9290322580645163, "grad_norm": 1.1405585467491617, "learning_rate": 6.921471227955833e-09, "loss": 0.752200722694397, "step": 8372 }, { "epoch": 1.9292626728110598, "grad_norm": 1.1122335677850106, "learning_rate": 6.8767958855695526e-09, "loss": 0.8107069730758667, "step": 8373 }, { "epoch": 1.9294930875576037, "grad_norm": 1.4102834771954489, "learning_rate": 6.832264693701573e-09, "loss": 0.8816967010498047, "step": 8374 }, { "epoch": 1.9297235023041475, "grad_norm": 1.2593635712728732, "learning_rate": 6.78787765881561e-09, "loss": 0.7889697551727295, "step": 8375 }, { "epoch": 1.9299539170506912, "grad_norm": 1.2377942170623384, "learning_rate": 6.743634787354291e-09, "loss": 0.7218060493469238, "step": 8376 }, { "epoch": 1.9301843317972351, "grad_norm": 1.2786458190631131, "learning_rate": 6.699536085739588e-09, "loss": 0.8061347007751465, "step": 8377 }, { "epoch": 1.9304147465437786, "grad_norm": 1.0571211016932303, "learning_rate": 6.655581560372159e-09, "loss": 0.7320632934570312, "step": 8378 }, { "epoch": 1.9306451612903226, "grad_norm": 1.2201688729332103, "learning_rate": 6.611771217632123e-09, "loss": 0.7039695978164673, "step": 8379 }, { "epoch": 1.9308755760368663, "grad_norm": 1.0152325785443144, "learning_rate": 6.568105063878393e-09, "loss": 0.7056317925453186, "step": 8380 }, { "epoch": 1.93110599078341, "grad_norm": 1.3442992098354511, "learning_rate": 6.524583105449122e-09, "loss": 0.9265607595443726, "step": 8381 }, { "epoch": 1.931336405529954, "grad_norm": 0.9980232024455323, "learning_rate": 6.481205348661367e-09, "loss": 0.7249365448951721, "step": 8382 }, { "epoch": 1.9315668202764977, "grad_norm": 1.0217670095742197, "learning_rate": 6.4379717998114256e-09, "loss": 0.8216372728347778, "step": 8383 }, { "epoch": 1.9317972350230415, "grad_norm": 1.0731967820570871, "learning_rate": 6.394882465174611e-09, "loss": 0.6750606894493103, "step": 8384 }, { "epoch": 1.9320276497695854, "grad_norm": 1.1382732221343326, "learning_rate": 6.351937351005143e-09, "loss": 0.8265045285224915, "step": 8385 }, { "epoch": 1.932258064516129, "grad_norm": 1.2033626019579449, "learning_rate": 6.309136463536591e-09, "loss": 0.5992317795753479, "step": 8386 }, { "epoch": 1.9324884792626729, "grad_norm": 1.026760102298627, "learning_rate": 6.266479808981428e-09, "loss": 0.6586567163467407, "step": 8387 }, { "epoch": 1.9327188940092166, "grad_norm": 1.1335080912138158, "learning_rate": 6.223967393531259e-09, "loss": 0.7496415376663208, "step": 8388 }, { "epoch": 1.9329493087557603, "grad_norm": 1.2743344602397095, "learning_rate": 6.181599223356593e-09, "loss": 0.8637027740478516, "step": 8389 }, { "epoch": 1.9331797235023043, "grad_norm": 1.3348493633535858, "learning_rate": 6.139375304607064e-09, "loss": 0.6925984621047974, "step": 8390 }, { "epoch": 1.9334101382488478, "grad_norm": 1.3338549311969345, "learning_rate": 6.0972956434115485e-09, "loss": 0.8345432877540588, "step": 8391 }, { "epoch": 1.9336405529953917, "grad_norm": 1.211546505819517, "learning_rate": 6.055360245877938e-09, "loss": 0.797752857208252, "step": 8392 }, { "epoch": 1.9338709677419355, "grad_norm": 1.025513773253857, "learning_rate": 6.013569118092809e-09, "loss": 0.7460094690322876, "step": 8393 }, { "epoch": 1.9341013824884792, "grad_norm": 1.0501792229397418, "learning_rate": 5.97192226612242e-09, "loss": 0.7695547342300415, "step": 8394 }, { "epoch": 1.9343317972350231, "grad_norm": 1.3341559418127071, "learning_rate": 5.9304196960113795e-09, "loss": 0.8372104167938232, "step": 8395 }, { "epoch": 1.9345622119815669, "grad_norm": 1.174939684239835, "learning_rate": 5.889061413784091e-09, "loss": 0.7647950053215027, "step": 8396 }, { "epoch": 1.9347926267281106, "grad_norm": 1.0568987578487792, "learning_rate": 5.84784742544353e-09, "loss": 0.6958519220352173, "step": 8397 }, { "epoch": 1.9350230414746545, "grad_norm": 1.1905008025272417, "learning_rate": 5.806777736971691e-09, "loss": 0.8488763570785522, "step": 8398 }, { "epoch": 1.935253456221198, "grad_norm": 1.1975357379056275, "learning_rate": 5.765852354330025e-09, "loss": 0.6448318958282471, "step": 8399 }, { "epoch": 1.935483870967742, "grad_norm": 1.288117894635522, "learning_rate": 5.725071283458671e-09, "loss": 0.7449144124984741, "step": 8400 }, { "epoch": 1.9357142857142857, "grad_norm": 1.2060473887345362, "learning_rate": 5.684434530277005e-09, "loss": 0.8339489102363586, "step": 8401 }, { "epoch": 1.9359447004608294, "grad_norm": 1.355663998015665, "learning_rate": 5.643942100683308e-09, "loss": 0.7758409380912781, "step": 8402 }, { "epoch": 1.9361751152073734, "grad_norm": 1.2457476365021507, "learning_rate": 5.60359400055499e-09, "loss": 0.8604291081428528, "step": 8403 }, { "epoch": 1.936405529953917, "grad_norm": 0.9800977546704353, "learning_rate": 5.5633902357487e-09, "loss": 0.7379741668701172, "step": 8404 }, { "epoch": 1.9366359447004609, "grad_norm": 1.0501931597758303, "learning_rate": 5.52333081209988e-09, "loss": 0.6943101286888123, "step": 8405 }, { "epoch": 1.9368663594470046, "grad_norm": 1.193280273833338, "learning_rate": 5.483415735422992e-09, "loss": 0.7397646903991699, "step": 8406 }, { "epoch": 1.9370967741935483, "grad_norm": 1.1298510822998358, "learning_rate": 5.443645011511844e-09, "loss": 0.7566234469413757, "step": 8407 }, { "epoch": 1.9373271889400923, "grad_norm": 1.322820355956732, "learning_rate": 5.40401864613893e-09, "loss": 0.6345827579498291, "step": 8408 }, { "epoch": 1.937557603686636, "grad_norm": 1.6653451978671274, "learning_rate": 5.3645366450560944e-09, "loss": 0.7259831428527832, "step": 8409 }, { "epoch": 1.9377880184331797, "grad_norm": 1.347964952979272, "learning_rate": 5.325199013993975e-09, "loss": 0.7897600531578064, "step": 8410 }, { "epoch": 1.9380184331797237, "grad_norm": 1.3016062068490681, "learning_rate": 5.286005758662448e-09, "loss": 0.8421739339828491, "step": 8411 }, { "epoch": 1.9382488479262672, "grad_norm": 1.3347958532899202, "learning_rate": 5.2469568847504085e-09, "loss": 0.7652501463890076, "step": 8412 }, { "epoch": 1.9384792626728111, "grad_norm": 1.3105993577298032, "learning_rate": 5.2080523979256556e-09, "loss": 0.6397069096565247, "step": 8413 }, { "epoch": 1.9387096774193548, "grad_norm": 1.2689574006754154, "learning_rate": 5.169292303835116e-09, "loss": 0.840052604675293, "step": 8414 }, { "epoch": 1.9389400921658986, "grad_norm": 1.344062608291919, "learning_rate": 5.130676608104845e-09, "loss": 0.8453920483589172, "step": 8415 }, { "epoch": 1.9391705069124425, "grad_norm": 1.3358429095342716, "learning_rate": 5.092205316339915e-09, "loss": 0.8301386833190918, "step": 8416 }, { "epoch": 1.939400921658986, "grad_norm": 1.0570862677742232, "learning_rate": 5.0538784341241924e-09, "loss": 0.6682429313659668, "step": 8417 }, { "epoch": 1.93963133640553, "grad_norm": 1.4370850274204425, "learning_rate": 5.0156959670208945e-09, "loss": 0.7881286144256592, "step": 8418 }, { "epoch": 1.9398617511520737, "grad_norm": 1.1170749783406635, "learning_rate": 4.9776579205721424e-09, "loss": 0.7413277626037598, "step": 8419 }, { "epoch": 1.9400921658986174, "grad_norm": 1.2672048797390025, "learning_rate": 4.939764300299187e-09, "loss": 0.6718757152557373, "step": 8420 }, { "epoch": 1.9403225806451614, "grad_norm": 1.1707673461814823, "learning_rate": 4.9020151117019625e-09, "loss": 0.8595068454742432, "step": 8421 }, { "epoch": 1.9405529953917051, "grad_norm": 1.0350774696905816, "learning_rate": 4.864410360260085e-09, "loss": 0.6985205411911011, "step": 8422 }, { "epoch": 1.9407834101382488, "grad_norm": 1.222465370246094, "learning_rate": 4.826950051431522e-09, "loss": 0.7148889303207397, "step": 8423 }, { "epoch": 1.9410138248847926, "grad_norm": 1.320040251210183, "learning_rate": 4.789634190653813e-09, "loss": 0.8109019994735718, "step": 8424 }, { "epoch": 1.9412442396313363, "grad_norm": 1.4762486891336946, "learning_rate": 4.752462783343292e-09, "loss": 0.8268437385559082, "step": 8425 }, { "epoch": 1.9414746543778802, "grad_norm": 0.9708535634361853, "learning_rate": 4.715435834895088e-09, "loss": 0.7300432920455933, "step": 8426 }, { "epoch": 1.941705069124424, "grad_norm": 1.3017508085468754, "learning_rate": 4.6785533506839005e-09, "loss": 0.848440408706665, "step": 8427 }, { "epoch": 1.9419354838709677, "grad_norm": 1.0873655680994063, "learning_rate": 4.6418153360630044e-09, "loss": 0.7526305913925171, "step": 8428 }, { "epoch": 1.9421658986175117, "grad_norm": 1.1186105868292944, "learning_rate": 4.605221796365022e-09, "loss": 0.6987402439117432, "step": 8429 }, { "epoch": 1.9423963133640552, "grad_norm": 1.5889483697201847, "learning_rate": 4.568772736901261e-09, "loss": 0.7944519519805908, "step": 8430 }, { "epoch": 1.942626728110599, "grad_norm": 1.0443704220390153, "learning_rate": 4.532468162962378e-09, "loss": 0.7206175327301025, "step": 8431 }, { "epoch": 1.9428571428571428, "grad_norm": 1.332362884391146, "learning_rate": 4.4963080798179345e-09, "loss": 0.6892992854118347, "step": 8432 }, { "epoch": 1.9430875576036866, "grad_norm": 1.0826330060160456, "learning_rate": 4.460292492716511e-09, "loss": 0.696158766746521, "step": 8433 }, { "epoch": 1.9433179723502305, "grad_norm": 0.9789941295444919, "learning_rate": 4.424421406885704e-09, "loss": 0.8007163405418396, "step": 8434 }, { "epoch": 1.9435483870967742, "grad_norm": 1.1286085842961833, "learning_rate": 4.3886948275320135e-09, "loss": 0.7969222068786621, "step": 8435 }, { "epoch": 1.943778801843318, "grad_norm": 1.2183409512094359, "learning_rate": 4.353112759841404e-09, "loss": 0.7752852439880371, "step": 8436 }, { "epoch": 1.9440092165898617, "grad_norm": 1.1860536416754315, "learning_rate": 4.317675208978411e-09, "loss": 0.7788258790969849, "step": 8437 }, { "epoch": 1.9442396313364054, "grad_norm": 1.1863849018136006, "learning_rate": 4.2823821800866964e-09, "loss": 0.838456392288208, "step": 8438 }, { "epoch": 1.9444700460829494, "grad_norm": 1.0569456831140607, "learning_rate": 4.2472336782890525e-09, "loss": 0.7503675222396851, "step": 8439 }, { "epoch": 1.944700460829493, "grad_norm": 0.9808278818485672, "learning_rate": 4.212229708687287e-09, "loss": 0.810901403427124, "step": 8440 }, { "epoch": 1.9449308755760368, "grad_norm": 1.0050063922171069, "learning_rate": 4.1773702763621135e-09, "loss": 0.7551805973052979, "step": 8441 }, { "epoch": 1.9451612903225808, "grad_norm": 1.2275039222333026, "learning_rate": 4.142655386373373e-09, "loss": 0.9387043714523315, "step": 8442 }, { "epoch": 1.9453917050691243, "grad_norm": 1.034577232879954, "learning_rate": 4.1080850437598124e-09, "loss": 0.7508292198181152, "step": 8443 }, { "epoch": 1.9456221198156682, "grad_norm": 0.9799945991508818, "learning_rate": 4.073659253539308e-09, "loss": 0.737107515335083, "step": 8444 }, { "epoch": 1.945852534562212, "grad_norm": 1.477967097078984, "learning_rate": 4.03937802070875e-09, "loss": 0.86794114112854, "step": 8445 }, { "epoch": 1.9460829493087557, "grad_norm": 0.9207750837260967, "learning_rate": 4.005241350243937e-09, "loss": 0.7629859447479248, "step": 8446 }, { "epoch": 1.9463133640552996, "grad_norm": 1.4180879805115079, "learning_rate": 3.971249247099906e-09, "loss": 0.7455410957336426, "step": 8447 }, { "epoch": 1.9465437788018434, "grad_norm": 1.1941620926103322, "learning_rate": 3.937401716210376e-09, "loss": 0.8322222828865051, "step": 8448 }, { "epoch": 1.946774193548387, "grad_norm": 1.510433091637528, "learning_rate": 3.903698762488528e-09, "loss": 0.7961260676383972, "step": 8449 }, { "epoch": 1.9470046082949308, "grad_norm": 1.2160569883363423, "learning_rate": 3.870140390826005e-09, "loss": 0.8144096732139587, "step": 8450 }, { "epoch": 1.9472350230414746, "grad_norm": 1.2123613138822447, "learning_rate": 3.8367266060939095e-09, "loss": 0.7973348498344421, "step": 8451 }, { "epoch": 1.9474654377880185, "grad_norm": 1.4038735969349747, "learning_rate": 3.803457413142253e-09, "loss": 0.8311715126037598, "step": 8452 }, { "epoch": 1.9476958525345622, "grad_norm": 0.9815978065709688, "learning_rate": 3.770332816799948e-09, "loss": 0.7851812839508057, "step": 8453 }, { "epoch": 1.947926267281106, "grad_norm": 1.3820548975058524, "learning_rate": 3.737352821875039e-09, "loss": 0.8721193075180054, "step": 8454 }, { "epoch": 1.94815668202765, "grad_norm": 1.2337347998012935, "learning_rate": 3.704517433154364e-09, "loss": 0.8594118356704712, "step": 8455 }, { "epoch": 1.9483870967741934, "grad_norm": 0.9620755666197012, "learning_rate": 3.671826655404109e-09, "loss": 0.6526527404785156, "step": 8456 }, { "epoch": 1.9486175115207374, "grad_norm": 0.9198704876253201, "learning_rate": 3.639280493369368e-09, "loss": 0.7577145099639893, "step": 8457 }, { "epoch": 1.948847926267281, "grad_norm": 1.4898349304718468, "learning_rate": 3.6068789517739173e-09, "loss": 0.9176833629608154, "step": 8458 }, { "epoch": 1.9490783410138248, "grad_norm": 1.5070373914502264, "learning_rate": 3.5746220353209956e-09, "loss": 0.8947671055793762, "step": 8459 }, { "epoch": 1.9493087557603688, "grad_norm": 1.2654885409411176, "learning_rate": 3.542509748692524e-09, "loss": 0.8791666030883789, "step": 8460 }, { "epoch": 1.9495391705069123, "grad_norm": 0.9247331783476281, "learning_rate": 3.5105420965496626e-09, "loss": 0.7431247234344482, "step": 8461 }, { "epoch": 1.9497695852534562, "grad_norm": 1.3437504272827105, "learning_rate": 3.4787190835324775e-09, "loss": 0.7998695373535156, "step": 8462 }, { "epoch": 1.95, "grad_norm": 1.359553043789141, "learning_rate": 3.447040714259941e-09, "loss": 0.8120161294937134, "step": 8463 }, { "epoch": 1.9502304147465437, "grad_norm": 1.063781533705899, "learning_rate": 3.415506993330153e-09, "loss": 0.8062546849250793, "step": 8464 }, { "epoch": 1.9504608294930876, "grad_norm": 1.3290963135655427, "learning_rate": 3.384117925320229e-09, "loss": 0.8100919723510742, "step": 8465 }, { "epoch": 1.9506912442396314, "grad_norm": 1.410960677080016, "learning_rate": 3.352873514786303e-09, "loss": 0.7376535534858704, "step": 8466 }, { "epoch": 1.950921658986175, "grad_norm": 1.1333962819853984, "learning_rate": 3.321773766263303e-09, "loss": 0.7534361481666565, "step": 8467 }, { "epoch": 1.951152073732719, "grad_norm": 0.956942860373484, "learning_rate": 3.290818684265506e-09, "loss": 0.6914925575256348, "step": 8468 }, { "epoch": 1.9513824884792625, "grad_norm": 1.40322423242457, "learning_rate": 3.2600082732858746e-09, "loss": 0.837024450302124, "step": 8469 }, { "epoch": 1.9516129032258065, "grad_norm": 1.3077639635125993, "learning_rate": 3.229342537796609e-09, "loss": 0.7960337400436401, "step": 8470 }, { "epoch": 1.9518433179723502, "grad_norm": 1.1044299774108808, "learning_rate": 3.1988214822485928e-09, "loss": 0.6611788868904114, "step": 8471 }, { "epoch": 1.952073732718894, "grad_norm": 1.2652589643459276, "learning_rate": 3.16844511107206e-09, "loss": 0.8798158168792725, "step": 8472 }, { "epoch": 1.952304147465438, "grad_norm": 1.3477135835069336, "learning_rate": 3.1382134286761506e-09, "loss": 0.790015459060669, "step": 8473 }, { "epoch": 1.9525345622119814, "grad_norm": 1.062422263250462, "learning_rate": 3.1081264394489103e-09, "loss": 0.7676407098770142, "step": 8474 }, { "epoch": 1.9527649769585254, "grad_norm": 1.1707572290080033, "learning_rate": 3.07818414775729e-09, "loss": 0.8213051557540894, "step": 8475 }, { "epoch": 1.952995391705069, "grad_norm": 1.328203051872804, "learning_rate": 3.048386557947591e-09, "loss": 0.8909401893615723, "step": 8476 }, { "epoch": 1.9532258064516128, "grad_norm": 1.2206551189591073, "learning_rate": 3.0187336743446867e-09, "loss": 0.838227391242981, "step": 8477 }, { "epoch": 1.9534562211981568, "grad_norm": 1.1958685930192579, "learning_rate": 2.9892255012528013e-09, "loss": 0.7297696471214294, "step": 8478 }, { "epoch": 1.9536866359447005, "grad_norm": 1.508389266534061, "learning_rate": 2.9598620429550636e-09, "loss": 1.0060585737228394, "step": 8479 }, { "epoch": 1.9539170506912442, "grad_norm": 1.1858328009290373, "learning_rate": 2.9306433037132873e-09, "loss": 0.7812967300415039, "step": 8480 }, { "epoch": 1.9541474654377882, "grad_norm": 1.196629989025656, "learning_rate": 2.901569287768746e-09, "loss": 0.7349315881729126, "step": 8481 }, { "epoch": 1.9543778801843317, "grad_norm": 1.1580071941270487, "learning_rate": 2.8726399993415085e-09, "loss": 0.7083498239517212, "step": 8482 }, { "epoch": 1.9546082949308756, "grad_norm": 1.3308451395414542, "learning_rate": 2.8438554426304386e-09, "loss": 0.7969732880592346, "step": 8483 }, { "epoch": 1.9548387096774194, "grad_norm": 1.405840014033905, "learning_rate": 2.815215621813749e-09, "loss": 0.7701122164726257, "step": 8484 }, { "epoch": 1.955069124423963, "grad_norm": 1.0487330945577633, "learning_rate": 2.7867205410484485e-09, "loss": 0.7323017120361328, "step": 8485 }, { "epoch": 1.955299539170507, "grad_norm": 0.9842598310766136, "learning_rate": 2.7583702044704504e-09, "loss": 0.8357248306274414, "step": 8486 }, { "epoch": 1.9555299539170505, "grad_norm": 1.4806137218761686, "learning_rate": 2.7301646161947966e-09, "loss": 0.8164674043655396, "step": 8487 }, { "epoch": 1.9557603686635945, "grad_norm": 1.2641967325925645, "learning_rate": 2.7021037803156566e-09, "loss": 0.7972782850265503, "step": 8488 }, { "epoch": 1.9559907834101382, "grad_norm": 1.2417679147004388, "learning_rate": 2.6741877009058835e-09, "loss": 0.864342987537384, "step": 8489 }, { "epoch": 1.956221198156682, "grad_norm": 1.1067561191492752, "learning_rate": 2.646416382017458e-09, "loss": 0.7428402900695801, "step": 8490 }, { "epoch": 1.956451612903226, "grad_norm": 1.3211414352422526, "learning_rate": 2.618789827681378e-09, "loss": 0.7164437770843506, "step": 8491 }, { "epoch": 1.9566820276497696, "grad_norm": 1.153189225005644, "learning_rate": 2.5913080419075473e-09, "loss": 0.6997767686843872, "step": 8492 }, { "epoch": 1.9569124423963133, "grad_norm": 1.2481992001614755, "learning_rate": 2.563971028684886e-09, "loss": 0.6399234533309937, "step": 8493 }, { "epoch": 1.9571428571428573, "grad_norm": 1.1639751659112805, "learning_rate": 2.536778791981553e-09, "loss": 0.7642914056777954, "step": 8494 }, { "epoch": 1.9573732718894008, "grad_norm": 1.218382512158835, "learning_rate": 2.5097313357442806e-09, "loss": 0.8284746408462524, "step": 8495 }, { "epoch": 1.9576036866359448, "grad_norm": 1.2221524988832009, "learning_rate": 2.4828286638989282e-09, "loss": 0.6680238246917725, "step": 8496 }, { "epoch": 1.9578341013824885, "grad_norm": 1.2965002342798193, "learning_rate": 2.4560707803504834e-09, "loss": 0.7621040344238281, "step": 8497 }, { "epoch": 1.9580645161290322, "grad_norm": 1.2947556724815892, "learning_rate": 2.4294576889827278e-09, "loss": 0.7326159477233887, "step": 8498 }, { "epoch": 1.9582949308755762, "grad_norm": 1.0656455780738308, "learning_rate": 2.4029893936586833e-09, "loss": 0.6496877670288086, "step": 8499 }, { "epoch": 1.9585253456221197, "grad_norm": 1.241192579535759, "learning_rate": 2.376665898220054e-09, "loss": 0.665170431137085, "step": 8500 }, { "epoch": 1.9587557603686636, "grad_norm": 1.2593646350179877, "learning_rate": 2.3504872064876724e-09, "loss": 0.7238261699676514, "step": 8501 }, { "epoch": 1.9589861751152073, "grad_norm": 1.5709730629781664, "learning_rate": 2.3244533222613882e-09, "loss": 0.6696983575820923, "step": 8502 }, { "epoch": 1.959216589861751, "grad_norm": 1.6011689537620306, "learning_rate": 2.2985642493199563e-09, "loss": 0.8414099216461182, "step": 8503 }, { "epoch": 1.959447004608295, "grad_norm": 1.1680069988943498, "learning_rate": 2.2728199914210377e-09, "loss": 0.7390140295028687, "step": 8504 }, { "epoch": 1.9596774193548387, "grad_norm": 1.1922421298842674, "learning_rate": 2.247220552301532e-09, "loss": 0.7910370826721191, "step": 8505 }, { "epoch": 1.9599078341013825, "grad_norm": 1.2059164746419144, "learning_rate": 2.2217659356771334e-09, "loss": 0.8111266493797302, "step": 8506 }, { "epoch": 1.9601382488479264, "grad_norm": 1.382769681983927, "learning_rate": 2.1964561452425535e-09, "loss": 0.8748809099197388, "step": 8507 }, { "epoch": 1.96036866359447, "grad_norm": 1.4348415171969837, "learning_rate": 2.1712911846714088e-09, "loss": 0.726898193359375, "step": 8508 }, { "epoch": 1.9605990783410139, "grad_norm": 0.903624770648156, "learning_rate": 2.1462710576163335e-09, "loss": 0.5221005082130432, "step": 8509 }, { "epoch": 1.9608294930875576, "grad_norm": 0.9979524654583228, "learning_rate": 2.1213957677090887e-09, "loss": 0.7336875200271606, "step": 8510 }, { "epoch": 1.9610599078341013, "grad_norm": 1.4570574984679434, "learning_rate": 2.096665318560231e-09, "loss": 0.9653327465057373, "step": 8511 }, { "epoch": 1.9612903225806453, "grad_norm": 1.3910033326033395, "learning_rate": 2.0720797137594448e-09, "loss": 0.8309473991394043, "step": 8512 }, { "epoch": 1.9615207373271888, "grad_norm": 1.250491052702372, "learning_rate": 2.047638956874986e-09, "loss": 0.7829124331474304, "step": 8513 }, { "epoch": 1.9617511520737327, "grad_norm": 1.6063542888921636, "learning_rate": 2.0233430514547955e-09, "loss": 0.8399544358253479, "step": 8514 }, { "epoch": 1.9619815668202765, "grad_norm": 1.2304488854915971, "learning_rate": 1.999192001025163e-09, "loss": 0.7827579975128174, "step": 8515 }, { "epoch": 1.9622119815668202, "grad_norm": 1.7023781342726942, "learning_rate": 1.9751858090916174e-09, "loss": 0.8617441654205322, "step": 8516 }, { "epoch": 1.9624423963133641, "grad_norm": 1.124873706648068, "learning_rate": 1.951324479138594e-09, "loss": 0.758098840713501, "step": 8517 }, { "epoch": 1.9626728110599079, "grad_norm": 1.407820551284048, "learning_rate": 1.927608014629656e-09, "loss": 0.738059937953949, "step": 8518 }, { "epoch": 1.9629032258064516, "grad_norm": 1.2924313700222672, "learning_rate": 1.9040364190070492e-09, "loss": 0.6286636590957642, "step": 8519 }, { "epoch": 1.9631336405529956, "grad_norm": 1.4040969276884698, "learning_rate": 1.88060969569237e-09, "loss": 0.764518141746521, "step": 8520 }, { "epoch": 1.963364055299539, "grad_norm": 0.9848782890607348, "learning_rate": 1.8573278480857878e-09, "loss": 0.775516152381897, "step": 8521 }, { "epoch": 1.963594470046083, "grad_norm": 1.2592904992793421, "learning_rate": 1.8341908795665994e-09, "loss": 0.8513185977935791, "step": 8522 }, { "epoch": 1.9638248847926267, "grad_norm": 1.4423039825526616, "learning_rate": 1.8111987934933404e-09, "loss": 0.7300710082054138, "step": 8523 }, { "epoch": 1.9640552995391705, "grad_norm": 1.1896167974085796, "learning_rate": 1.788351593203119e-09, "loss": 0.7346746921539307, "step": 8524 }, { "epoch": 1.9642857142857144, "grad_norm": 1.3610028359172472, "learning_rate": 1.7656492820121715e-09, "loss": 0.8231781721115112, "step": 8525 }, { "epoch": 1.964516129032258, "grad_norm": 1.2672154264769777, "learning_rate": 1.743091863215751e-09, "loss": 0.6972112655639648, "step": 8526 }, { "epoch": 1.9647465437788019, "grad_norm": 1.013160541626117, "learning_rate": 1.720679340088016e-09, "loss": 0.6512203812599182, "step": 8527 }, { "epoch": 1.9649769585253456, "grad_norm": 1.220658103943082, "learning_rate": 1.698411715882253e-09, "loss": 0.6755591630935669, "step": 8528 }, { "epoch": 1.9652073732718893, "grad_norm": 1.115552383506669, "learning_rate": 1.6762889938303215e-09, "loss": 0.6858727335929871, "step": 8529 }, { "epoch": 1.9654377880184333, "grad_norm": 1.1810577023934496, "learning_rate": 1.6543111771434303e-09, "loss": 0.7820768356323242, "step": 8530 }, { "epoch": 1.965668202764977, "grad_norm": 1.512690235242737, "learning_rate": 1.6324782690116944e-09, "loss": 0.7841604948043823, "step": 8531 }, { "epoch": 1.9658986175115207, "grad_norm": 1.4015300039500524, "learning_rate": 1.6107902726040234e-09, "loss": 0.8665674328804016, "step": 8532 }, { "epoch": 1.9661290322580647, "grad_norm": 1.1307460450405855, "learning_rate": 1.5892471910684547e-09, "loss": 0.6764376163482666, "step": 8533 }, { "epoch": 1.9663594470046082, "grad_norm": 1.4229790787582275, "learning_rate": 1.5678490275319312e-09, "loss": 0.8453094959259033, "step": 8534 }, { "epoch": 1.9665898617511521, "grad_norm": 1.0573142140796512, "learning_rate": 1.546595785100413e-09, "loss": 0.7798272371292114, "step": 8535 }, { "epoch": 1.9668202764976959, "grad_norm": 1.1791892730982974, "learning_rate": 1.5254874668586548e-09, "loss": 0.7426424026489258, "step": 8536 }, { "epoch": 1.9670506912442396, "grad_norm": 1.1309739514060748, "learning_rate": 1.5045240758706501e-09, "loss": 0.8443984985351562, "step": 8537 }, { "epoch": 1.9672811059907835, "grad_norm": 1.1053257066980806, "learning_rate": 1.4837056151790762e-09, "loss": 0.8439072370529175, "step": 8538 }, { "epoch": 1.967511520737327, "grad_norm": 1.4135182916864908, "learning_rate": 1.463032087805849e-09, "loss": 0.8307704925537109, "step": 8539 }, { "epoch": 1.967741935483871, "grad_norm": 1.1593054366438007, "learning_rate": 1.442503496751568e-09, "loss": 0.678236722946167, "step": 8540 }, { "epoch": 1.9679723502304147, "grad_norm": 1.3372006359269073, "learning_rate": 1.4221198449960724e-09, "loss": 0.7072663307189941, "step": 8541 }, { "epoch": 1.9682027649769585, "grad_norm": 1.194618240695654, "learning_rate": 1.4018811354977732e-09, "loss": 0.7825980186462402, "step": 8542 }, { "epoch": 1.9684331797235024, "grad_norm": 1.8366711172437336, "learning_rate": 1.3817873711945426e-09, "loss": 0.786361813545227, "step": 8543 }, { "epoch": 1.9686635944700461, "grad_norm": 1.6047169504491765, "learning_rate": 1.3618385550029365e-09, "loss": 1.00287926197052, "step": 8544 }, { "epoch": 1.9688940092165899, "grad_norm": 1.336810745652672, "learning_rate": 1.3420346898183054e-09, "loss": 0.7320775389671326, "step": 8545 }, { "epoch": 1.9691244239631336, "grad_norm": 1.0018804515064612, "learning_rate": 1.322375778515461e-09, "loss": 0.7127507925033569, "step": 8546 }, { "epoch": 1.9693548387096773, "grad_norm": 1.4124185296399752, "learning_rate": 1.3028618239475652e-09, "loss": 0.818395733833313, "step": 8547 }, { "epoch": 1.9695852534562213, "grad_norm": 1.2063998497880193, "learning_rate": 1.2834928289472413e-09, "loss": 0.6384972929954529, "step": 8548 }, { "epoch": 1.969815668202765, "grad_norm": 1.240783999344712, "learning_rate": 1.2642687963256849e-09, "loss": 0.7358517646789551, "step": 8549 }, { "epoch": 1.9700460829493087, "grad_norm": 1.1083546443376424, "learning_rate": 1.2451897288734414e-09, "loss": 0.7311068773269653, "step": 8550 }, { "epoch": 1.9702764976958527, "grad_norm": 1.5415338816809878, "learning_rate": 1.2262556293597403e-09, "loss": 0.8390932083129883, "step": 8551 }, { "epoch": 1.9705069124423962, "grad_norm": 1.2045586519715463, "learning_rate": 1.2074665005328277e-09, "loss": 0.8114689588546753, "step": 8552 }, { "epoch": 1.9707373271889401, "grad_norm": 1.4445688810441233, "learning_rate": 1.1888223451199665e-09, "loss": 1.0044716596603394, "step": 8553 }, { "epoch": 1.9709677419354839, "grad_norm": 1.2243432992298795, "learning_rate": 1.170323165827214e-09, "loss": 0.7566370368003845, "step": 8554 }, { "epoch": 1.9711981566820276, "grad_norm": 1.2230365473762954, "learning_rate": 1.1519689653397557e-09, "loss": 0.7543225288391113, "step": 8555 }, { "epoch": 1.9714285714285715, "grad_norm": 1.306226883529119, "learning_rate": 1.1337597463217941e-09, "loss": 0.8291902542114258, "step": 8556 }, { "epoch": 1.9716589861751153, "grad_norm": 1.1360827313333892, "learning_rate": 1.1156955114162147e-09, "loss": 0.7363135814666748, "step": 8557 }, { "epoch": 1.971889400921659, "grad_norm": 1.102255040931488, "learning_rate": 1.0977762632451427e-09, "loss": 0.7180813550949097, "step": 8558 }, { "epoch": 1.9721198156682027, "grad_norm": 1.1849465839861355, "learning_rate": 1.0800020044093861e-09, "loss": 0.7220569849014282, "step": 8559 }, { "epoch": 1.9723502304147464, "grad_norm": 1.2915012101962247, "learning_rate": 1.0623727374889925e-09, "loss": 0.8839110136032104, "step": 8560 }, { "epoch": 1.9725806451612904, "grad_norm": 1.2553727673767463, "learning_rate": 1.0448884650426926e-09, "loss": 0.7210807800292969, "step": 8561 }, { "epoch": 1.9728110599078341, "grad_norm": 1.3474393893445982, "learning_rate": 1.0275491896084565e-09, "loss": 0.6993537545204163, "step": 8562 }, { "epoch": 1.9730414746543778, "grad_norm": 1.0591927963671788, "learning_rate": 1.0103549137030486e-09, "loss": 0.6951562166213989, "step": 8563 }, { "epoch": 1.9732718894009218, "grad_norm": 1.0760064093903359, "learning_rate": 9.933056398220285e-10, "loss": 0.855778694152832, "step": 8564 }, { "epoch": 1.9735023041474653, "grad_norm": 1.3238204379730676, "learning_rate": 9.76401370440194e-10, "loss": 0.8461301326751709, "step": 8565 }, { "epoch": 1.9737327188940093, "grad_norm": 1.0765880280550415, "learning_rate": 9.596421080112493e-10, "loss": 0.6144053936004639, "step": 8566 }, { "epoch": 1.973963133640553, "grad_norm": 1.226899728476588, "learning_rate": 9.430278549675818e-10, "loss": 0.6623581647872925, "step": 8567 }, { "epoch": 1.9741935483870967, "grad_norm": 1.6396403159587711, "learning_rate": 9.265586137209292e-10, "loss": 0.9540686011314392, "step": 8568 }, { "epoch": 1.9744239631336407, "grad_norm": 1.1121119945854705, "learning_rate": 9.102343866616014e-10, "loss": 0.7231987714767456, "step": 8569 }, { "epoch": 1.9746543778801844, "grad_norm": 1.3771440446346792, "learning_rate": 8.940551761592585e-10, "loss": 0.7759320735931396, "step": 8570 }, { "epoch": 1.9748847926267281, "grad_norm": 1.3995632478363096, "learning_rate": 8.780209845621334e-10, "loss": 0.8277846574783325, "step": 8571 }, { "epoch": 1.9751152073732718, "grad_norm": 1.1615989785178322, "learning_rate": 8.621318141974754e-10, "loss": 0.7913431525230408, "step": 8572 }, { "epoch": 1.9753456221198156, "grad_norm": 1.1262387789302248, "learning_rate": 8.46387667371773e-10, "loss": 0.7011829614639282, "step": 8573 }, { "epoch": 1.9755760368663595, "grad_norm": 1.358126505769676, "learning_rate": 8.30788546370198e-10, "loss": 0.8762087821960449, "step": 8574 }, { "epoch": 1.9758064516129032, "grad_norm": 1.5337262034773564, "learning_rate": 8.153344534569396e-10, "loss": 0.7944581508636475, "step": 8575 }, { "epoch": 1.976036866359447, "grad_norm": 0.9763562202292912, "learning_rate": 8.00025390875203e-10, "loss": 0.7086907625198364, "step": 8576 }, { "epoch": 1.976267281105991, "grad_norm": 1.3716397771498143, "learning_rate": 7.848613608468779e-10, "loss": 0.7263821959495544, "step": 8577 }, { "epoch": 1.9764976958525344, "grad_norm": 1.0912146553836337, "learning_rate": 7.698423655732034e-10, "loss": 0.714054524898529, "step": 8578 }, { "epoch": 1.9767281105990784, "grad_norm": 1.0671768990247028, "learning_rate": 7.549684072341023e-10, "loss": 0.817487359046936, "step": 8579 }, { "epoch": 1.976958525345622, "grad_norm": 1.3039849886057633, "learning_rate": 7.402394879885143e-10, "loss": 0.7933021783828735, "step": 8580 }, { "epoch": 1.9771889400921658, "grad_norm": 1.1473238275849764, "learning_rate": 7.25655609974396e-10, "loss": 0.8699008822441101, "step": 8581 }, { "epoch": 1.9774193548387098, "grad_norm": 1.2250569758639698, "learning_rate": 7.112167753083876e-10, "loss": 0.804245114326477, "step": 8582 }, { "epoch": 1.9776497695852533, "grad_norm": 1.322132271674899, "learning_rate": 6.969229860863679e-10, "loss": 0.8334434628486633, "step": 8583 }, { "epoch": 1.9778801843317972, "grad_norm": 1.1368298808414594, "learning_rate": 6.827742443831219e-10, "loss": 0.7549147605895996, "step": 8584 }, { "epoch": 1.978110599078341, "grad_norm": 0.9036184179111577, "learning_rate": 6.687705522522291e-10, "loss": 0.69701087474823, "step": 8585 }, { "epoch": 1.9783410138248847, "grad_norm": 1.259028975685209, "learning_rate": 6.549119117263969e-10, "loss": 0.727588415145874, "step": 8586 }, { "epoch": 1.9785714285714286, "grad_norm": 1.0967653076646233, "learning_rate": 6.411983248171271e-10, "loss": 0.7309392094612122, "step": 8587 }, { "epoch": 1.9788018433179724, "grad_norm": 1.6515699626026994, "learning_rate": 6.276297935149388e-10, "loss": 0.8299658298492432, "step": 8588 }, { "epoch": 1.979032258064516, "grad_norm": 1.362481943616663, "learning_rate": 6.142063197892566e-10, "loss": 0.9731055498123169, "step": 8589 }, { "epoch": 1.97926267281106, "grad_norm": 1.1190783849934713, "learning_rate": 6.009279055885219e-10, "loss": 0.6292351484298706, "step": 8590 }, { "epoch": 1.9794930875576036, "grad_norm": 1.2981523800262795, "learning_rate": 5.877945528400818e-10, "loss": 0.7881810665130615, "step": 8591 }, { "epoch": 1.9797235023041475, "grad_norm": 1.2430793849512602, "learning_rate": 5.748062634501894e-10, "loss": 0.7910494804382324, "step": 8592 }, { "epoch": 1.9799539170506912, "grad_norm": 1.3789958651744842, "learning_rate": 5.619630393042252e-10, "loss": 0.8255902528762817, "step": 8593 }, { "epoch": 1.980184331797235, "grad_norm": 1.0908110861505123, "learning_rate": 5.492648822660318e-10, "loss": 0.788017749786377, "step": 8594 }, { "epoch": 1.980414746543779, "grad_norm": 1.2052887418241187, "learning_rate": 5.367117941791343e-10, "loss": 0.8717716932296753, "step": 8595 }, { "epoch": 1.9806451612903224, "grad_norm": 1.3810911920135494, "learning_rate": 5.243037768652981e-10, "loss": 0.7220178246498108, "step": 8596 }, { "epoch": 1.9808755760368664, "grad_norm": 1.2221356933031184, "learning_rate": 5.120408321256376e-10, "loss": 0.7536830902099609, "step": 8597 }, { "epoch": 1.98110599078341, "grad_norm": 1.13011497917934, "learning_rate": 4.999229617401735e-10, "loss": 0.7480939626693726, "step": 8598 }, { "epoch": 1.9813364055299538, "grad_norm": 1.1029404069670388, "learning_rate": 4.879501674676101e-10, "loss": 0.7168867588043213, "step": 8599 }, { "epoch": 1.9815668202764978, "grad_norm": 1.1019009005346911, "learning_rate": 4.761224510460016e-10, "loss": 0.8352792263031006, "step": 8600 }, { "epoch": 1.9817972350230415, "grad_norm": 1.2827894099174693, "learning_rate": 4.644398141919748e-10, "loss": 0.6987372636795044, "step": 8601 }, { "epoch": 1.9820276497695852, "grad_norm": 1.2120343684069002, "learning_rate": 4.5290225860128426e-10, "loss": 0.6844612956047058, "step": 8602 }, { "epoch": 1.9822580645161292, "grad_norm": 1.4290404101727392, "learning_rate": 4.4150978594859055e-10, "loss": 0.7659348249435425, "step": 8603 }, { "epoch": 1.9824884792626727, "grad_norm": 1.1012416889537506, "learning_rate": 4.3026239788757077e-10, "loss": 0.8163154125213623, "step": 8604 }, { "epoch": 1.9827188940092166, "grad_norm": 1.3238497684740367, "learning_rate": 4.191600960505859e-10, "loss": 0.8688125610351562, "step": 8605 }, { "epoch": 1.9829493087557604, "grad_norm": 1.13771312339099, "learning_rate": 4.082028820493466e-10, "loss": 0.8250670433044434, "step": 8606 }, { "epoch": 1.983179723502304, "grad_norm": 1.1783821953258633, "learning_rate": 3.973907574741364e-10, "loss": 0.9378982782363892, "step": 8607 }, { "epoch": 1.983410138248848, "grad_norm": 1.1593506126073094, "learning_rate": 3.867237238943666e-10, "loss": 0.8764913082122803, "step": 8608 }, { "epoch": 1.9836405529953915, "grad_norm": 1.271012232850208, "learning_rate": 3.762017828583541e-10, "loss": 0.7690116763114929, "step": 8609 }, { "epoch": 1.9838709677419355, "grad_norm": 1.2648955747200947, "learning_rate": 3.6582493589332187e-10, "loss": 0.6977133750915527, "step": 8610 }, { "epoch": 1.9841013824884792, "grad_norm": 1.0674977135329127, "learning_rate": 3.5559318450539835e-10, "loss": 0.7362618446350098, "step": 8611 }, { "epoch": 1.984331797235023, "grad_norm": 1.4639153789709758, "learning_rate": 3.455065301798399e-10, "loss": 0.7065306305885315, "step": 8612 }, { "epoch": 1.984562211981567, "grad_norm": 1.2869199371326872, "learning_rate": 3.355649743805866e-10, "loss": 0.812393307685852, "step": 8613 }, { "epoch": 1.9847926267281106, "grad_norm": 1.1854338312494677, "learning_rate": 3.2576851855070644e-10, "loss": 0.6947695016860962, "step": 8614 }, { "epoch": 1.9850230414746544, "grad_norm": 1.2703269278379015, "learning_rate": 3.161171641121729e-10, "loss": 0.6745340824127197, "step": 8615 }, { "epoch": 1.9852534562211983, "grad_norm": 1.2827625316731694, "learning_rate": 3.0661091246575454e-10, "loss": 0.7426450848579407, "step": 8616 }, { "epoch": 1.9854838709677418, "grad_norm": 1.1091238879569632, "learning_rate": 2.9724976499134745e-10, "loss": 0.7769409418106079, "step": 8617 }, { "epoch": 1.9857142857142858, "grad_norm": 1.3932431178326243, "learning_rate": 2.8803372304775365e-10, "loss": 0.9591978192329407, "step": 8618 }, { "epoch": 1.9859447004608295, "grad_norm": 1.0484296609281079, "learning_rate": 2.789627879725698e-10, "loss": 0.7504953742027283, "step": 8619 }, { "epoch": 1.9861751152073732, "grad_norm": 1.4071650291408113, "learning_rate": 2.700369610825204e-10, "loss": 0.8990021347999573, "step": 8620 }, { "epoch": 1.9864055299539172, "grad_norm": 1.3278656398693938, "learning_rate": 2.612562436731247e-10, "loss": 0.786778450012207, "step": 8621 }, { "epoch": 1.9866359447004607, "grad_norm": 1.3996268794778322, "learning_rate": 2.526206370189188e-10, "loss": 0.7387717366218567, "step": 8622 }, { "epoch": 1.9868663594470046, "grad_norm": 1.1375614144189101, "learning_rate": 2.4413014237323336e-10, "loss": 0.7672144174575806, "step": 8623 }, { "epoch": 1.9870967741935484, "grad_norm": 1.0342597373081839, "learning_rate": 2.357847609686381e-10, "loss": 0.6191907525062561, "step": 8624 }, { "epoch": 1.987327188940092, "grad_norm": 1.1782057870810292, "learning_rate": 2.2758449401638624e-10, "loss": 0.7257785201072693, "step": 8625 }, { "epoch": 1.987557603686636, "grad_norm": 1.0162904765762713, "learning_rate": 2.195293427066369e-10, "loss": 0.6997271776199341, "step": 8626 }, { "epoch": 1.9877880184331798, "grad_norm": 1.0539587989000714, "learning_rate": 2.1161930820878804e-10, "loss": 0.7813891768455505, "step": 8627 }, { "epoch": 1.9880184331797235, "grad_norm": 1.4503426709948117, "learning_rate": 2.0385439167069917e-10, "loss": 0.8003429174423218, "step": 8628 }, { "epoch": 1.9882488479262674, "grad_norm": 1.0912355930233222, "learning_rate": 1.962345942196908e-10, "loss": 0.8020645380020142, "step": 8629 }, { "epoch": 1.988479262672811, "grad_norm": 1.6862187526303312, "learning_rate": 1.8875991696165604e-10, "loss": 0.9189429879188538, "step": 8630 }, { "epoch": 1.988709677419355, "grad_norm": 1.2154025001234743, "learning_rate": 1.8143036098150487e-10, "loss": 0.7399884462356567, "step": 8631 }, { "epoch": 1.9889400921658986, "grad_norm": 1.346746600672021, "learning_rate": 1.7424592734316402e-10, "loss": 0.7725361585617065, "step": 8632 }, { "epoch": 1.9891705069124423, "grad_norm": 1.2077720906172131, "learning_rate": 1.6720661708946593e-10, "loss": 0.7887094020843506, "step": 8633 }, { "epoch": 1.9894009216589863, "grad_norm": 1.1656177751476533, "learning_rate": 1.6031243124203786e-10, "loss": 0.8007388114929199, "step": 8634 }, { "epoch": 1.9896313364055298, "grad_norm": 1.1324445653667632, "learning_rate": 1.5356337080174587e-10, "loss": 0.6478462219238281, "step": 8635 }, { "epoch": 1.9898617511520738, "grad_norm": 1.0566661119746916, "learning_rate": 1.469594367480287e-10, "loss": 0.8274422287940979, "step": 8636 }, { "epoch": 1.9900921658986175, "grad_norm": 1.2865497814691733, "learning_rate": 1.4050063003956391e-10, "loss": 0.7919641733169556, "step": 8637 }, { "epoch": 1.9903225806451612, "grad_norm": 1.1652783170900007, "learning_rate": 1.3418695161382388e-10, "loss": 0.7973719239234924, "step": 8638 }, { "epoch": 1.9905529953917052, "grad_norm": 1.2951125509591672, "learning_rate": 1.280184023870756e-10, "loss": 0.8002075552940369, "step": 8639 }, { "epoch": 1.9907834101382489, "grad_norm": 1.2388157581694845, "learning_rate": 1.2199498325482506e-10, "loss": 0.748448371887207, "step": 8640 }, { "epoch": 1.9910138248847926, "grad_norm": 1.1203681158314, "learning_rate": 1.1611669509137278e-10, "loss": 0.7333977222442627, "step": 8641 }, { "epoch": 1.9912442396313366, "grad_norm": 1.2174560450430658, "learning_rate": 1.1038353874992524e-10, "loss": 0.7760608196258545, "step": 8642 }, { "epoch": 1.99147465437788, "grad_norm": 1.0029836343944154, "learning_rate": 1.0479551506259455e-10, "loss": 0.6129526495933533, "step": 8643 }, { "epoch": 1.991705069124424, "grad_norm": 1.2201657055802861, "learning_rate": 9.935262484062068e-11, "loss": 0.68567955493927, "step": 8644 }, { "epoch": 1.9919354838709677, "grad_norm": 1.1159731481420905, "learning_rate": 9.405486887381631e-11, "loss": 0.9042092561721802, "step": 8645 }, { "epoch": 1.9921658986175115, "grad_norm": 1.1343470197220147, "learning_rate": 8.890224793123291e-11, "loss": 0.7143117189407349, "step": 8646 }, { "epoch": 1.9923963133640554, "grad_norm": 1.1380121853465122, "learning_rate": 8.389476276071672e-11, "loss": 0.7486213445663452, "step": 8647 }, { "epoch": 1.992626728110599, "grad_norm": 1.0074777840055806, "learning_rate": 7.903241408924177e-11, "loss": 0.8554232716560364, "step": 8648 }, { "epoch": 1.9928571428571429, "grad_norm": 1.2464259863760472, "learning_rate": 7.431520262246582e-11, "loss": 0.6604819297790527, "step": 8649 }, { "epoch": 1.9930875576036866, "grad_norm": 1.450236790683031, "learning_rate": 6.974312904517443e-11, "loss": 0.8032737970352173, "step": 8650 }, { "epoch": 1.9933179723502303, "grad_norm": 1.417412341607897, "learning_rate": 6.531619402083687e-11, "loss": 0.7712494730949402, "step": 8651 }, { "epoch": 1.9935483870967743, "grad_norm": 1.1501531132473004, "learning_rate": 6.103439819216127e-11, "loss": 0.7894617915153503, "step": 8652 }, { "epoch": 1.993778801843318, "grad_norm": 1.585587469758744, "learning_rate": 5.689774218065046e-11, "loss": 0.8386135697364807, "step": 8653 }, { "epoch": 1.9940092165898617, "grad_norm": 1.151150154599754, "learning_rate": 5.290622658660204e-11, "loss": 0.744853138923645, "step": 8654 }, { "epoch": 1.9942396313364057, "grad_norm": 1.2019290228056547, "learning_rate": 4.90598519894414e-11, "loss": 0.7604823112487793, "step": 8655 }, { "epoch": 1.9944700460829492, "grad_norm": 1.3088329290176663, "learning_rate": 4.53586189474997e-11, "loss": 0.7552424669265747, "step": 8656 }, { "epoch": 1.9947004608294931, "grad_norm": 1.1999629002739178, "learning_rate": 4.180252799801387e-11, "loss": 0.9652698636054993, "step": 8657 }, { "epoch": 1.9949308755760369, "grad_norm": 3.0754205014147553, "learning_rate": 3.839157965712658e-11, "loss": 0.9589856266975403, "step": 8658 }, { "epoch": 1.9951612903225806, "grad_norm": 1.0149584356506736, "learning_rate": 3.512577441988629e-11, "loss": 0.6802269220352173, "step": 8659 }, { "epoch": 1.9953917050691246, "grad_norm": 1.3402861462863225, "learning_rate": 3.200511276035822e-11, "loss": 0.8262367248535156, "step": 8660 }, { "epoch": 1.995622119815668, "grad_norm": 1.166077707630556, "learning_rate": 2.9029595131513372e-11, "loss": 0.8353632688522339, "step": 8661 }, { "epoch": 1.995852534562212, "grad_norm": 1.3494376018654042, "learning_rate": 2.61992219652285e-11, "loss": 0.8807231187820435, "step": 8662 }, { "epoch": 1.9960829493087557, "grad_norm": 1.1520030195581032, "learning_rate": 2.3513993672397148e-11, "loss": 0.8394359350204468, "step": 8663 }, { "epoch": 1.9963133640552995, "grad_norm": 0.9751046818624397, "learning_rate": 2.0973910642707592e-11, "loss": 0.8343399167060852, "step": 8664 }, { "epoch": 1.9965437788018434, "grad_norm": 0.9230235584546375, "learning_rate": 1.857897324475388e-11, "loss": 0.7168834209442139, "step": 8665 }, { "epoch": 1.9967741935483871, "grad_norm": 1.1469086275708407, "learning_rate": 1.6329181826257866e-11, "loss": 0.7825703620910645, "step": 8666 }, { "epoch": 1.9970046082949309, "grad_norm": 1.2013625464128237, "learning_rate": 1.4224536713847157e-11, "loss": 0.6497002840042114, "step": 8667 }, { "epoch": 1.9972350230414746, "grad_norm": 1.204271254016415, "learning_rate": 1.2265038212944112e-11, "loss": 0.8188776969909668, "step": 8668 }, { "epoch": 1.9974654377880183, "grad_norm": 2.0423246677180056, "learning_rate": 1.0450686607987869e-11, "loss": 0.898658812046051, "step": 8669 }, { "epoch": 1.9976958525345623, "grad_norm": 1.5435875726791675, "learning_rate": 8.781482162212306e-12, "loss": 0.8580871820449829, "step": 8670 }, { "epoch": 1.997926267281106, "grad_norm": 1.3046658328904006, "learning_rate": 7.25742511797911e-12, "loss": 0.7657710313796997, "step": 8671 }, { "epoch": 1.9981566820276497, "grad_norm": 1.4371880227275262, "learning_rate": 5.87851569655573e-12, "loss": 0.7881382703781128, "step": 8672 }, { "epoch": 1.9983870967741937, "grad_norm": 1.3805751034431293, "learning_rate": 4.644754098004356e-12, "loss": 0.8711144924163818, "step": 8673 }, { "epoch": 1.9986175115207372, "grad_norm": 1.1130398802574797, "learning_rate": 3.5561405015149814e-12, "loss": 0.6993192434310913, "step": 8674 }, { "epoch": 1.9988479262672811, "grad_norm": 0.9709196628106886, "learning_rate": 2.6126750650723452e-12, "loss": 0.7348669767379761, "step": 8675 }, { "epoch": 1.9990783410138249, "grad_norm": 1.0686467754804958, "learning_rate": 1.8143579254559227e-12, "loss": 0.7356513142585754, "step": 8676 }, { "epoch": 1.9993087557603686, "grad_norm": 1.6789468220081696, "learning_rate": 1.1611891986840206e-12, "loss": 0.7969627380371094, "step": 8677 }, { "epoch": 1.9995391705069125, "grad_norm": 1.3374302292197147, "learning_rate": 6.531689795696848e-13, "loss": 0.7247132062911987, "step": 8678 }, { "epoch": 1.9997695852534563, "grad_norm": 1.3757463334176048, "learning_rate": 2.902973418317245e-13, "loss": 0.6177656650543213, "step": 8679 }, { "epoch": 2.0, "grad_norm": 1.3149906641620008, "learning_rate": 7.25743380947108e-14, "loss": 0.8378380537033081, "step": 8680 }, { "epoch": 2.0, "step": 8680, "total_flos": 7249753014763520.0, "train_loss": 0.8227015781100444, "train_runtime": 44224.0669, "train_samples_per_second": 0.785, "train_steps_per_second": 0.196 } ], "logging_steps": 1, "max_steps": 8680, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7249753014763520.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }