{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 69383, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00014412752403326464, "grad_norm": 2.8250595659469355, "learning_rate": 1.297016861219196e-08, "loss": 0.8123, "step": 10 }, { "epoch": 0.0002882550480665293, "grad_norm": 3.87418200578353, "learning_rate": 2.7381467070183025e-08, "loss": 0.8427, "step": 20 }, { "epoch": 0.0004323825720997939, "grad_norm": 3.263717515562004, "learning_rate": 4.1792765528174095e-08, "loss": 0.883, "step": 30 }, { "epoch": 0.0005765100961330586, "grad_norm": 3.040436779529905, "learning_rate": 5.620406398616516e-08, "loss": 0.8529, "step": 40 }, { "epoch": 0.0007206376201663231, "grad_norm": 3.4085728844163903, "learning_rate": 7.061536244415622e-08, "loss": 0.8288, "step": 50 }, { "epoch": 0.0008647651441995878, "grad_norm": 3.4974267271840973, "learning_rate": 8.50266609021473e-08, "loss": 0.843, "step": 60 }, { "epoch": 0.0010088926682328525, "grad_norm": 3.1336904260974445, "learning_rate": 9.943795936013835e-08, "loss": 0.8455, "step": 70 }, { "epoch": 0.0011530201922661171, "grad_norm": 3.246048378995215, "learning_rate": 1.1384925781812943e-07, "loss": 0.8675, "step": 80 }, { "epoch": 0.0012971477162993816, "grad_norm": 2.8200429541364715, "learning_rate": 1.282605562761205e-07, "loss": 0.809, "step": 90 }, { "epoch": 0.0014412752403326463, "grad_norm": 2.8630479571188534, "learning_rate": 1.4267185473411154e-07, "loss": 0.8168, "step": 100 }, { "epoch": 0.001585402764365911, "grad_norm": 2.548459485591285, "learning_rate": 1.570831531921026e-07, "loss": 0.7452, "step": 110 }, { "epoch": 0.0017295302883991756, "grad_norm": 2.7409744946800987, "learning_rate": 1.714944516500937e-07, "loss": 0.8187, "step": 120 }, { "epoch": 0.0018736578124324403, "grad_norm": 3.702443634452223, "learning_rate": 1.8590575010808476e-07, "loss": 0.818, "step": 130 }, { "epoch": 0.002017785336465705, "grad_norm": 2.3194845568389018, "learning_rate": 2.0031704856607582e-07, "loss": 0.7807, "step": 140 }, { "epoch": 0.0021619128604989696, "grad_norm": 2.4656969022917137, "learning_rate": 2.147283470240669e-07, "loss": 0.7325, "step": 150 }, { "epoch": 0.0023060403845322343, "grad_norm": 2.122033631863646, "learning_rate": 2.2913964548205795e-07, "loss": 0.7389, "step": 160 }, { "epoch": 0.002450167908565499, "grad_norm": 1.9158136496612432, "learning_rate": 2.43550943940049e-07, "loss": 0.7846, "step": 170 }, { "epoch": 0.002594295432598763, "grad_norm": 2.003366453020241, "learning_rate": 2.579622423980401e-07, "loss": 0.7531, "step": 180 }, { "epoch": 0.002738422956632028, "grad_norm": 1.934278473033145, "learning_rate": 2.7237354085603116e-07, "loss": 0.7331, "step": 190 }, { "epoch": 0.0028825504806652925, "grad_norm": 2.225945293084102, "learning_rate": 2.8678483931402223e-07, "loss": 0.7341, "step": 200 }, { "epoch": 0.003026678004698557, "grad_norm": 1.8328736031437345, "learning_rate": 3.011961377720133e-07, "loss": 0.7171, "step": 210 }, { "epoch": 0.003170805528731822, "grad_norm": 2.0946270586176428, "learning_rate": 3.1560743623000435e-07, "loss": 0.7334, "step": 220 }, { "epoch": 0.0033149330527650865, "grad_norm": 2.07936064151572, "learning_rate": 3.3001873468799547e-07, "loss": 0.6924, "step": 230 }, { "epoch": 0.003459060576798351, "grad_norm": 1.8865783609643025, "learning_rate": 3.444300331459865e-07, "loss": 0.7149, "step": 240 }, { "epoch": 0.003603188100831616, "grad_norm": 2.0358377728840082, "learning_rate": 3.5884133160397754e-07, "loss": 0.6794, "step": 250 }, { "epoch": 0.0037473156248648805, "grad_norm": 1.7251853704601103, "learning_rate": 3.732526300619686e-07, "loss": 0.6843, "step": 260 }, { "epoch": 0.003891443148898145, "grad_norm": 1.888669761437406, "learning_rate": 3.8766392851995967e-07, "loss": 0.7043, "step": 270 }, { "epoch": 0.00403557067293141, "grad_norm": 2.107486728657345, "learning_rate": 4.020752269779508e-07, "loss": 0.7041, "step": 280 }, { "epoch": 0.0041796981969646745, "grad_norm": 1.6231033102895864, "learning_rate": 4.164865254359418e-07, "loss": 0.6706, "step": 290 }, { "epoch": 0.004323825720997939, "grad_norm": 1.9736643805621747, "learning_rate": 4.3089782389393286e-07, "loss": 0.6666, "step": 300 }, { "epoch": 0.004467953245031204, "grad_norm": 1.9625898279534186, "learning_rate": 4.45309122351924e-07, "loss": 0.6742, "step": 310 }, { "epoch": 0.0046120807690644685, "grad_norm": 2.053127654327487, "learning_rate": 4.59720420809915e-07, "loss": 0.6492, "step": 320 }, { "epoch": 0.004756208293097733, "grad_norm": 2.0026672923061084, "learning_rate": 4.741317192679061e-07, "loss": 0.6533, "step": 330 }, { "epoch": 0.004900335817130998, "grad_norm": 1.8625184304275488, "learning_rate": 4.885430177258971e-07, "loss": 0.6375, "step": 340 }, { "epoch": 0.0050444633411642625, "grad_norm": 2.0343155917983995, "learning_rate": 5.029543161838882e-07, "loss": 0.6362, "step": 350 }, { "epoch": 0.005188590865197526, "grad_norm": 1.9353418648881633, "learning_rate": 5.173656146418792e-07, "loss": 0.6242, "step": 360 }, { "epoch": 0.005332718389230791, "grad_norm": 1.8572416596297734, "learning_rate": 5.317769130998703e-07, "loss": 0.6249, "step": 370 }, { "epoch": 0.005476845913264056, "grad_norm": 1.7761057590801574, "learning_rate": 5.461882115578615e-07, "loss": 0.6436, "step": 380 }, { "epoch": 0.00562097343729732, "grad_norm": 1.988302765743044, "learning_rate": 5.605995100158524e-07, "loss": 0.6492, "step": 390 }, { "epoch": 0.005765100961330585, "grad_norm": 1.8410343426926474, "learning_rate": 5.750108084738435e-07, "loss": 0.6324, "step": 400 }, { "epoch": 0.00590922848536385, "grad_norm": 1.6603924733379634, "learning_rate": 5.894221069318347e-07, "loss": 0.6181, "step": 410 }, { "epoch": 0.006053356009397114, "grad_norm": 1.520003536127697, "learning_rate": 6.038334053898256e-07, "loss": 0.5994, "step": 420 }, { "epoch": 0.006197483533430379, "grad_norm": 1.779404767410018, "learning_rate": 6.182447038478168e-07, "loss": 0.6197, "step": 430 }, { "epoch": 0.006341611057463644, "grad_norm": 2.0029232052964563, "learning_rate": 6.326560023058079e-07, "loss": 0.6323, "step": 440 }, { "epoch": 0.006485738581496908, "grad_norm": 2.3229577157889696, "learning_rate": 6.470673007637988e-07, "loss": 0.6279, "step": 450 }, { "epoch": 0.006629866105530173, "grad_norm": 1.7638319431201854, "learning_rate": 6.6147859922179e-07, "loss": 0.6488, "step": 460 }, { "epoch": 0.006773993629563438, "grad_norm": 1.9867034238761472, "learning_rate": 6.75889897679781e-07, "loss": 0.6658, "step": 470 }, { "epoch": 0.006918121153596702, "grad_norm": 2.5151974724075794, "learning_rate": 6.90301196137772e-07, "loss": 0.625, "step": 480 }, { "epoch": 0.007062248677629967, "grad_norm": 1.9433250538572369, "learning_rate": 7.047124945957632e-07, "loss": 0.6191, "step": 490 }, { "epoch": 0.007206376201663232, "grad_norm": 2.008722198831636, "learning_rate": 7.191237930537542e-07, "loss": 0.6053, "step": 500 }, { "epoch": 0.007350503725696496, "grad_norm": 1.9039866537510146, "learning_rate": 7.335350915117452e-07, "loss": 0.5939, "step": 510 }, { "epoch": 0.007494631249729761, "grad_norm": 2.136962078412343, "learning_rate": 7.479463899697364e-07, "loss": 0.6237, "step": 520 }, { "epoch": 0.007638758773763026, "grad_norm": 2.1877812779117884, "learning_rate": 7.623576884277274e-07, "loss": 0.6135, "step": 530 }, { "epoch": 0.00778288629779629, "grad_norm": 1.8689467175194021, "learning_rate": 7.767689868857185e-07, "loss": 0.633, "step": 540 }, { "epoch": 0.007927013821829554, "grad_norm": 2.444345499102546, "learning_rate": 7.911802853437094e-07, "loss": 0.6622, "step": 550 }, { "epoch": 0.00807114134586282, "grad_norm": 1.5994916716527894, "learning_rate": 8.055915838017006e-07, "loss": 0.5969, "step": 560 }, { "epoch": 0.008215268869896084, "grad_norm": 2.067846010213442, "learning_rate": 8.200028822596917e-07, "loss": 0.6654, "step": 570 }, { "epoch": 0.008359396393929349, "grad_norm": 1.7669541801058253, "learning_rate": 8.344141807176826e-07, "loss": 0.6103, "step": 580 }, { "epoch": 0.008503523917962613, "grad_norm": 1.880162542877261, "learning_rate": 8.488254791756738e-07, "loss": 0.6257, "step": 590 }, { "epoch": 0.008647651441995878, "grad_norm": 1.9332847866877554, "learning_rate": 8.632367776336649e-07, "loss": 0.6224, "step": 600 }, { "epoch": 0.008791778966029142, "grad_norm": 1.6391457117656467, "learning_rate": 8.776480760916558e-07, "loss": 0.5728, "step": 610 }, { "epoch": 0.008935906490062408, "grad_norm": 1.947663516769443, "learning_rate": 8.92059374549647e-07, "loss": 0.6095, "step": 620 }, { "epoch": 0.009080034014095672, "grad_norm": 1.8754855990190764, "learning_rate": 9.064706730076381e-07, "loss": 0.625, "step": 630 }, { "epoch": 0.009224161538128937, "grad_norm": 1.7415003292000424, "learning_rate": 9.208819714656292e-07, "loss": 0.6174, "step": 640 }, { "epoch": 0.0093682890621622, "grad_norm": 1.9300243420815975, "learning_rate": 9.352932699236202e-07, "loss": 0.6474, "step": 650 }, { "epoch": 0.009512416586195466, "grad_norm": 2.1553171600596284, "learning_rate": 9.497045683816113e-07, "loss": 0.6325, "step": 660 }, { "epoch": 0.00965654411022873, "grad_norm": 2.2602748629987435, "learning_rate": 9.641158668396023e-07, "loss": 0.6356, "step": 670 }, { "epoch": 0.009800671634261996, "grad_norm": 1.9806284936225267, "learning_rate": 9.785271652975934e-07, "loss": 0.6504, "step": 680 }, { "epoch": 0.00994479915829526, "grad_norm": 1.5960485449099273, "learning_rate": 9.929384637555844e-07, "loss": 0.556, "step": 690 }, { "epoch": 0.010088926682328525, "grad_norm": 2.116101379498486, "learning_rate": 1.0073497622135755e-06, "loss": 0.5824, "step": 700 }, { "epoch": 0.010233054206361789, "grad_norm": 2.0001950745666135, "learning_rate": 1.0217610606715666e-06, "loss": 0.5642, "step": 710 }, { "epoch": 0.010377181730395053, "grad_norm": 1.6082559264671357, "learning_rate": 1.0361723591295576e-06, "loss": 0.5853, "step": 720 }, { "epoch": 0.010521309254428318, "grad_norm": 1.9579751978702478, "learning_rate": 1.0505836575875487e-06, "loss": 0.5966, "step": 730 }, { "epoch": 0.010665436778461582, "grad_norm": 2.1562189003008263, "learning_rate": 1.0649949560455398e-06, "loss": 0.6029, "step": 740 }, { "epoch": 0.010809564302494848, "grad_norm": 2.202707195991483, "learning_rate": 1.0794062545035308e-06, "loss": 0.6209, "step": 750 }, { "epoch": 0.010953691826528111, "grad_norm": 1.8629514708209154, "learning_rate": 1.0938175529615219e-06, "loss": 0.5935, "step": 760 }, { "epoch": 0.011097819350561377, "grad_norm": 1.98809034138322, "learning_rate": 1.108228851419513e-06, "loss": 0.6291, "step": 770 }, { "epoch": 0.01124194687459464, "grad_norm": 1.9838497817383034, "learning_rate": 1.122640149877504e-06, "loss": 0.5703, "step": 780 }, { "epoch": 0.011386074398627906, "grad_norm": 1.7084612404305402, "learning_rate": 1.137051448335495e-06, "loss": 0.5943, "step": 790 }, { "epoch": 0.01153020192266117, "grad_norm": 2.107557257560781, "learning_rate": 1.1514627467934861e-06, "loss": 0.5926, "step": 800 }, { "epoch": 0.011674329446694436, "grad_norm": 1.7898750511142008, "learning_rate": 1.1658740452514774e-06, "loss": 0.6055, "step": 810 }, { "epoch": 0.0118184569707277, "grad_norm": 1.754445553086163, "learning_rate": 1.1802853437094683e-06, "loss": 0.6121, "step": 820 }, { "epoch": 0.011962584494760965, "grad_norm": 1.9058146546879122, "learning_rate": 1.1946966421674593e-06, "loss": 0.6355, "step": 830 }, { "epoch": 0.012106712018794229, "grad_norm": 1.526037010525371, "learning_rate": 1.2091079406254506e-06, "loss": 0.6013, "step": 840 }, { "epoch": 0.012250839542827494, "grad_norm": 1.8758153646763451, "learning_rate": 1.2235192390834415e-06, "loss": 0.6041, "step": 850 }, { "epoch": 0.012394967066860758, "grad_norm": 1.5748516600421418, "learning_rate": 1.2379305375414325e-06, "loss": 0.6314, "step": 860 }, { "epoch": 0.012539094590894024, "grad_norm": 2.1243313959402075, "learning_rate": 1.2523418359994236e-06, "loss": 0.5845, "step": 870 }, { "epoch": 0.012683222114927287, "grad_norm": 2.0956520253297546, "learning_rate": 1.2667531344574147e-06, "loss": 0.5764, "step": 880 }, { "epoch": 0.012827349638960553, "grad_norm": 1.86614421794014, "learning_rate": 1.2811644329154057e-06, "loss": 0.6282, "step": 890 }, { "epoch": 0.012971477162993817, "grad_norm": 2.0859747678215195, "learning_rate": 1.2955757313733968e-06, "loss": 0.5859, "step": 900 }, { "epoch": 0.013115604687027082, "grad_norm": 1.9269333342994515, "learning_rate": 1.309987029831388e-06, "loss": 0.6062, "step": 910 }, { "epoch": 0.013259732211060346, "grad_norm": 1.9701756713163028, "learning_rate": 1.3243983282893791e-06, "loss": 0.6318, "step": 920 }, { "epoch": 0.013403859735093612, "grad_norm": 2.6781408676250233, "learning_rate": 1.33880962674737e-06, "loss": 0.5843, "step": 930 }, { "epoch": 0.013547987259126875, "grad_norm": 1.6750391037714871, "learning_rate": 1.353220925205361e-06, "loss": 0.5482, "step": 940 }, { "epoch": 0.013692114783160141, "grad_norm": 1.9590872924608154, "learning_rate": 1.367632223663352e-06, "loss": 0.5693, "step": 950 }, { "epoch": 0.013836242307193405, "grad_norm": 1.895972083341478, "learning_rate": 1.3820435221213432e-06, "loss": 0.5907, "step": 960 }, { "epoch": 0.013980369831226669, "grad_norm": 2.1561720256475496, "learning_rate": 1.3964548205793344e-06, "loss": 0.5913, "step": 970 }, { "epoch": 0.014124497355259934, "grad_norm": 2.1260488345980457, "learning_rate": 1.4108661190373255e-06, "loss": 0.5876, "step": 980 }, { "epoch": 0.014268624879293198, "grad_norm": 1.5534476023784216, "learning_rate": 1.4252774174953166e-06, "loss": 0.5857, "step": 990 }, { "epoch": 0.014412752403326463, "grad_norm": 2.0103913704726497, "learning_rate": 1.4396887159533074e-06, "loss": 0.5921, "step": 1000 }, { "epoch": 0.014556879927359727, "grad_norm": 1.8424369977565422, "learning_rate": 1.4541000144112985e-06, "loss": 0.5647, "step": 1010 }, { "epoch": 0.014701007451392993, "grad_norm": 1.9288322240282996, "learning_rate": 1.4685113128692895e-06, "loss": 0.6015, "step": 1020 }, { "epoch": 0.014845134975426257, "grad_norm": 1.7890338433594688, "learning_rate": 1.4829226113272808e-06, "loss": 0.556, "step": 1030 }, { "epoch": 0.014989262499459522, "grad_norm": 1.8308486187584458, "learning_rate": 1.4973339097852719e-06, "loss": 0.5979, "step": 1040 }, { "epoch": 0.015133390023492786, "grad_norm": 1.7558165723321617, "learning_rate": 1.511745208243263e-06, "loss": 0.5594, "step": 1050 }, { "epoch": 0.015277517547526051, "grad_norm": 1.8965705039884069, "learning_rate": 1.526156506701254e-06, "loss": 0.5573, "step": 1060 }, { "epoch": 0.015421645071559315, "grad_norm": 2.028678585672367, "learning_rate": 1.5405678051592449e-06, "loss": 0.5848, "step": 1070 }, { "epoch": 0.01556577259559258, "grad_norm": 2.119949296213452, "learning_rate": 1.554979103617236e-06, "loss": 0.5806, "step": 1080 }, { "epoch": 0.015709900119625846, "grad_norm": 1.6397054915548126, "learning_rate": 1.569390402075227e-06, "loss": 0.5312, "step": 1090 }, { "epoch": 0.01585402764365911, "grad_norm": 1.8656872368909592, "learning_rate": 1.5838017005332183e-06, "loss": 0.5684, "step": 1100 }, { "epoch": 0.015998155167692374, "grad_norm": 5.480888092138683, "learning_rate": 1.5982129989912093e-06, "loss": 0.5936, "step": 1110 }, { "epoch": 0.01614228269172564, "grad_norm": 2.027192228555228, "learning_rate": 1.6126242974492004e-06, "loss": 0.5586, "step": 1120 }, { "epoch": 0.016286410215758905, "grad_norm": 1.9240545521283632, "learning_rate": 1.6270355959071912e-06, "loss": 0.5702, "step": 1130 }, { "epoch": 0.016430537739792167, "grad_norm": 1.742132798293045, "learning_rate": 1.6414468943651823e-06, "loss": 0.5605, "step": 1140 }, { "epoch": 0.016574665263825433, "grad_norm": 1.848901595141633, "learning_rate": 1.6558581928231734e-06, "loss": 0.574, "step": 1150 }, { "epoch": 0.016718792787858698, "grad_norm": 1.8908094345949775, "learning_rate": 1.6702694912811646e-06, "loss": 0.5902, "step": 1160 }, { "epoch": 0.016862920311891964, "grad_norm": 1.9553467047954143, "learning_rate": 1.6846807897391557e-06, "loss": 0.5827, "step": 1170 }, { "epoch": 0.017007047835925226, "grad_norm": 1.7189058254477252, "learning_rate": 1.6990920881971468e-06, "loss": 0.5788, "step": 1180 }, { "epoch": 0.01715117535995849, "grad_norm": 1.7190726009659631, "learning_rate": 1.7135033866551378e-06, "loss": 0.5738, "step": 1190 }, { "epoch": 0.017295302883991757, "grad_norm": 2.2086572000180382, "learning_rate": 1.7279146851131287e-06, "loss": 0.5623, "step": 1200 }, { "epoch": 0.01743943040802502, "grad_norm": 1.932267965963001, "learning_rate": 1.7423259835711197e-06, "loss": 0.5756, "step": 1210 }, { "epoch": 0.017583557932058284, "grad_norm": 1.872235658535138, "learning_rate": 1.756737282029111e-06, "loss": 0.5971, "step": 1220 }, { "epoch": 0.01772768545609155, "grad_norm": 1.6038862620684708, "learning_rate": 1.771148580487102e-06, "loss": 0.5815, "step": 1230 }, { "epoch": 0.017871812980124815, "grad_norm": 2.0581094338195878, "learning_rate": 1.7855598789450931e-06, "loss": 0.5827, "step": 1240 }, { "epoch": 0.018015940504158078, "grad_norm": 1.967685717432428, "learning_rate": 1.7999711774030842e-06, "loss": 0.5532, "step": 1250 }, { "epoch": 0.018160068028191343, "grad_norm": 1.6670964279105838, "learning_rate": 1.8143824758610753e-06, "loss": 0.559, "step": 1260 }, { "epoch": 0.01830419555222461, "grad_norm": 1.5677526780251057, "learning_rate": 1.8287937743190661e-06, "loss": 0.5287, "step": 1270 }, { "epoch": 0.018448323076257874, "grad_norm": 1.9973788815722542, "learning_rate": 1.8432050727770572e-06, "loss": 0.5685, "step": 1280 }, { "epoch": 0.018592450600291136, "grad_norm": 1.7539575282999487, "learning_rate": 1.8576163712350485e-06, "loss": 0.5545, "step": 1290 }, { "epoch": 0.0187365781243244, "grad_norm": 1.899221690474535, "learning_rate": 1.8720276696930395e-06, "loss": 0.5952, "step": 1300 }, { "epoch": 0.018880705648357667, "grad_norm": 1.9390350051679957, "learning_rate": 1.8864389681510306e-06, "loss": 0.5799, "step": 1310 }, { "epoch": 0.019024833172390933, "grad_norm": 1.5949540000650273, "learning_rate": 1.9008502666090217e-06, "loss": 0.548, "step": 1320 }, { "epoch": 0.019168960696424195, "grad_norm": 11.075299361734768, "learning_rate": 1.915261565067013e-06, "loss": 0.5476, "step": 1330 }, { "epoch": 0.01931308822045746, "grad_norm": 1.826087273836159, "learning_rate": 1.9296728635250036e-06, "loss": 0.5826, "step": 1340 }, { "epoch": 0.019457215744490726, "grad_norm": 2.038013970356889, "learning_rate": 1.9440841619829946e-06, "loss": 0.6065, "step": 1350 }, { "epoch": 0.01960134326852399, "grad_norm": 1.9544325796083613, "learning_rate": 1.9584954604409857e-06, "loss": 0.5521, "step": 1360 }, { "epoch": 0.019745470792557254, "grad_norm": 2.1748375875142094, "learning_rate": 1.972906758898977e-06, "loss": 0.5961, "step": 1370 }, { "epoch": 0.01988959831659052, "grad_norm": 1.8746850528902321, "learning_rate": 1.9873180573569682e-06, "loss": 0.5668, "step": 1380 }, { "epoch": 0.020033725840623785, "grad_norm": 1.7603701948577464, "learning_rate": 2.0017293558149593e-06, "loss": 0.5557, "step": 1390 }, { "epoch": 0.02017785336465705, "grad_norm": 2.123440558670354, "learning_rate": 2.01614065427295e-06, "loss": 0.5844, "step": 1400 }, { "epoch": 0.020321980888690312, "grad_norm": 1.871869489034027, "learning_rate": 2.030551952730941e-06, "loss": 0.5645, "step": 1410 }, { "epoch": 0.020466108412723578, "grad_norm": 1.6440777455348796, "learning_rate": 2.044963251188932e-06, "loss": 0.5501, "step": 1420 }, { "epoch": 0.020610235936756843, "grad_norm": 1.7587395195891489, "learning_rate": 2.059374549646923e-06, "loss": 0.5524, "step": 1430 }, { "epoch": 0.020754363460790105, "grad_norm": 2.2158937801451395, "learning_rate": 2.0737858481049146e-06, "loss": 0.5791, "step": 1440 }, { "epoch": 0.02089849098482337, "grad_norm": 2.064808965234018, "learning_rate": 2.0881971465629057e-06, "loss": 0.5473, "step": 1450 }, { "epoch": 0.021042618508856636, "grad_norm": 1.729152211311561, "learning_rate": 2.1026084450208968e-06, "loss": 0.5732, "step": 1460 }, { "epoch": 0.021186746032889902, "grad_norm": 1.8198534669622943, "learning_rate": 2.1170197434788874e-06, "loss": 0.5622, "step": 1470 }, { "epoch": 0.021330873556923164, "grad_norm": 1.898876449394025, "learning_rate": 2.1314310419368785e-06, "loss": 0.5671, "step": 1480 }, { "epoch": 0.02147500108095643, "grad_norm": 1.7608430711671064, "learning_rate": 2.1458423403948695e-06, "loss": 0.5564, "step": 1490 }, { "epoch": 0.021619128604989695, "grad_norm": 2.128480433587752, "learning_rate": 2.160253638852861e-06, "loss": 0.5876, "step": 1500 }, { "epoch": 0.02176325612902296, "grad_norm": 1.848536760424887, "learning_rate": 2.174664937310852e-06, "loss": 0.5758, "step": 1510 }, { "epoch": 0.021907383653056223, "grad_norm": 1.6894417044242536, "learning_rate": 2.189076235768843e-06, "loss": 0.5578, "step": 1520 }, { "epoch": 0.022051511177089488, "grad_norm": 1.9270516971561633, "learning_rate": 2.203487534226834e-06, "loss": 0.5598, "step": 1530 }, { "epoch": 0.022195638701122754, "grad_norm": 1.8884450602777532, "learning_rate": 2.217898832684825e-06, "loss": 0.5639, "step": 1540 }, { "epoch": 0.02233976622515602, "grad_norm": 1.8878088199732062, "learning_rate": 2.232310131142816e-06, "loss": 0.5793, "step": 1550 }, { "epoch": 0.02248389374918928, "grad_norm": 1.5812326729076118, "learning_rate": 2.2467214296008074e-06, "loss": 0.5631, "step": 1560 }, { "epoch": 0.022628021273222547, "grad_norm": 2.0634149724272697, "learning_rate": 2.2611327280587985e-06, "loss": 0.5679, "step": 1570 }, { "epoch": 0.022772148797255812, "grad_norm": 1.711177867930457, "learning_rate": 2.2755440265167895e-06, "loss": 0.5659, "step": 1580 }, { "epoch": 0.022916276321289078, "grad_norm": 2.440910554282084, "learning_rate": 2.2899553249747806e-06, "loss": 0.5837, "step": 1590 }, { "epoch": 0.02306040384532234, "grad_norm": 2.1201403885019605, "learning_rate": 2.3043666234327712e-06, "loss": 0.543, "step": 1600 }, { "epoch": 0.023204531369355606, "grad_norm": 1.5941391596958392, "learning_rate": 2.3187779218907623e-06, "loss": 0.5211, "step": 1610 }, { "epoch": 0.02334865889338887, "grad_norm": 1.836705090558692, "learning_rate": 2.3331892203487533e-06, "loss": 0.5132, "step": 1620 }, { "epoch": 0.023492786417422137, "grad_norm": 1.899276178775375, "learning_rate": 2.347600518806745e-06, "loss": 0.56, "step": 1630 }, { "epoch": 0.0236369139414554, "grad_norm": 2.125966165062158, "learning_rate": 2.362011817264736e-06, "loss": 0.5689, "step": 1640 }, { "epoch": 0.023781041465488664, "grad_norm": 1.8052593818908749, "learning_rate": 2.376423115722727e-06, "loss": 0.5462, "step": 1650 }, { "epoch": 0.02392516898952193, "grad_norm": 1.9073683624725561, "learning_rate": 2.390834414180718e-06, "loss": 0.5443, "step": 1660 }, { "epoch": 0.024069296513555195, "grad_norm": 2.086930342166401, "learning_rate": 2.4052457126387087e-06, "loss": 0.5575, "step": 1670 }, { "epoch": 0.024213424037588457, "grad_norm": 1.745940463866409, "learning_rate": 2.4196570110966997e-06, "loss": 0.5241, "step": 1680 }, { "epoch": 0.024357551561621723, "grad_norm": 1.9435617999809223, "learning_rate": 2.4340683095546912e-06, "loss": 0.5466, "step": 1690 }, { "epoch": 0.02450167908565499, "grad_norm": 1.9457307719688643, "learning_rate": 2.4484796080126823e-06, "loss": 0.574, "step": 1700 }, { "epoch": 0.02464580660968825, "grad_norm": 1.932563266407429, "learning_rate": 2.4628909064706733e-06, "loss": 0.5445, "step": 1710 }, { "epoch": 0.024789934133721516, "grad_norm": 2.112744305993978, "learning_rate": 2.4773022049286644e-06, "loss": 0.5648, "step": 1720 }, { "epoch": 0.02493406165775478, "grad_norm": 2.0588464951407928, "learning_rate": 2.4917135033866555e-06, "loss": 0.5909, "step": 1730 }, { "epoch": 0.025078189181788047, "grad_norm": 1.9016816890427382, "learning_rate": 2.5061248018446465e-06, "loss": 0.5814, "step": 1740 }, { "epoch": 0.02522231670582131, "grad_norm": 1.748764782254277, "learning_rate": 2.5205361003026376e-06, "loss": 0.5583, "step": 1750 }, { "epoch": 0.025366444229854575, "grad_norm": 1.7324172612771005, "learning_rate": 2.5349473987606287e-06, "loss": 0.5808, "step": 1760 }, { "epoch": 0.02551057175388784, "grad_norm": 1.8505124409459635, "learning_rate": 2.5493586972186197e-06, "loss": 0.5546, "step": 1770 }, { "epoch": 0.025654699277921106, "grad_norm": 1.7115329877102612, "learning_rate": 2.5637699956766104e-06, "loss": 0.5826, "step": 1780 }, { "epoch": 0.025798826801954368, "grad_norm": 2.2076421118285157, "learning_rate": 2.578181294134602e-06, "loss": 0.548, "step": 1790 }, { "epoch": 0.025942954325987633, "grad_norm": 2.016617519757607, "learning_rate": 2.5925925925925925e-06, "loss": 0.5547, "step": 1800 }, { "epoch": 0.0260870818500209, "grad_norm": 1.9049967873660159, "learning_rate": 2.607003891050584e-06, "loss": 0.5402, "step": 1810 }, { "epoch": 0.026231209374054165, "grad_norm": 1.5750854879549638, "learning_rate": 2.621415189508575e-06, "loss": 0.5509, "step": 1820 }, { "epoch": 0.026375336898087427, "grad_norm": 1.8704154209066808, "learning_rate": 2.635826487966566e-06, "loss": 0.5648, "step": 1830 }, { "epoch": 0.026519464422120692, "grad_norm": 3.5063422504073434, "learning_rate": 2.650237786424557e-06, "loss": 0.5485, "step": 1840 }, { "epoch": 0.026663591946153958, "grad_norm": 2.2115480992525676, "learning_rate": 2.664649084882548e-06, "loss": 0.5592, "step": 1850 }, { "epoch": 0.026807719470187223, "grad_norm": 1.6432552911874874, "learning_rate": 2.6790603833405393e-06, "loss": 0.5144, "step": 1860 }, { "epoch": 0.026951846994220485, "grad_norm": 2.064372476614481, "learning_rate": 2.69347168179853e-06, "loss": 0.5494, "step": 1870 }, { "epoch": 0.02709597451825375, "grad_norm": 1.855590835979979, "learning_rate": 2.7078829802565214e-06, "loss": 0.5788, "step": 1880 }, { "epoch": 0.027240102042287016, "grad_norm": 2.063529141464548, "learning_rate": 2.7222942787145125e-06, "loss": 0.5847, "step": 1890 }, { "epoch": 0.027384229566320282, "grad_norm": 2.149572369395929, "learning_rate": 2.7367055771725035e-06, "loss": 0.5778, "step": 1900 }, { "epoch": 0.027528357090353544, "grad_norm": 1.8720355142846898, "learning_rate": 2.7511168756304946e-06, "loss": 0.5264, "step": 1910 }, { "epoch": 0.02767248461438681, "grad_norm": 2.0530033177056874, "learning_rate": 2.7655281740884853e-06, "loss": 0.5797, "step": 1920 }, { "epoch": 0.027816612138420075, "grad_norm": 1.9153385120343174, "learning_rate": 2.7799394725464767e-06, "loss": 0.5349, "step": 1930 }, { "epoch": 0.027960739662453337, "grad_norm": 1.738990268165123, "learning_rate": 2.794350771004468e-06, "loss": 0.5295, "step": 1940 }, { "epoch": 0.028104867186486603, "grad_norm": 1.6793753478353217, "learning_rate": 2.808762069462459e-06, "loss": 0.5603, "step": 1950 }, { "epoch": 0.028248994710519868, "grad_norm": 2.118418724390783, "learning_rate": 2.82317336792045e-06, "loss": 0.5692, "step": 1960 }, { "epoch": 0.028393122234553134, "grad_norm": 2.6342162224141115, "learning_rate": 2.8375846663784406e-06, "loss": 0.5336, "step": 1970 }, { "epoch": 0.028537249758586396, "grad_norm": 1.7264181458388663, "learning_rate": 2.851995964836432e-06, "loss": 0.5208, "step": 1980 }, { "epoch": 0.02868137728261966, "grad_norm": 1.808870715816935, "learning_rate": 2.8664072632944227e-06, "loss": 0.5217, "step": 1990 }, { "epoch": 0.028825504806652927, "grad_norm": 1.9458463591075323, "learning_rate": 2.880818561752414e-06, "loss": 0.5652, "step": 2000 }, { "epoch": 0.028969632330686192, "grad_norm": 2.0907884487071997, "learning_rate": 2.8952298602104052e-06, "loss": 0.5315, "step": 2010 }, { "epoch": 0.029113759854719454, "grad_norm": 1.8243580302262745, "learning_rate": 2.9096411586683963e-06, "loss": 0.5544, "step": 2020 }, { "epoch": 0.02925788737875272, "grad_norm": 1.85887392368298, "learning_rate": 2.9240524571263874e-06, "loss": 0.5518, "step": 2030 }, { "epoch": 0.029402014902785985, "grad_norm": 1.708990698492463, "learning_rate": 2.938463755584378e-06, "loss": 0.5566, "step": 2040 }, { "epoch": 0.02954614242681925, "grad_norm": 1.9009007242624858, "learning_rate": 2.9528750540423695e-06, "loss": 0.5037, "step": 2050 }, { "epoch": 0.029690269950852513, "grad_norm": 1.8089129713401537, "learning_rate": 2.96728635250036e-06, "loss": 0.5439, "step": 2060 }, { "epoch": 0.02983439747488578, "grad_norm": 1.8040898192867185, "learning_rate": 2.9816976509583516e-06, "loss": 0.5348, "step": 2070 }, { "epoch": 0.029978524998919044, "grad_norm": 1.8963482954267787, "learning_rate": 2.9961089494163427e-06, "loss": 0.529, "step": 2080 }, { "epoch": 0.03012265252295231, "grad_norm": 1.7662984545175526, "learning_rate": 3.0105202478743338e-06, "loss": 0.5856, "step": 2090 }, { "epoch": 0.030266780046985572, "grad_norm": 1.9638193453449695, "learning_rate": 3.024931546332325e-06, "loss": 0.5557, "step": 2100 }, { "epoch": 0.030410907571018837, "grad_norm": 1.997876407797777, "learning_rate": 3.0393428447903155e-06, "loss": 0.5673, "step": 2110 }, { "epoch": 0.030555035095052103, "grad_norm": 2.179308271654043, "learning_rate": 3.053754143248307e-06, "loss": 0.5451, "step": 2120 }, { "epoch": 0.03069916261908537, "grad_norm": 1.889010909169802, "learning_rate": 3.068165441706298e-06, "loss": 0.5518, "step": 2130 }, { "epoch": 0.03084329014311863, "grad_norm": 1.5709065993569018, "learning_rate": 3.082576740164289e-06, "loss": 0.5115, "step": 2140 }, { "epoch": 0.030987417667151896, "grad_norm": 2.193331814782688, "learning_rate": 3.09698803862228e-06, "loss": 0.6086, "step": 2150 }, { "epoch": 0.03113154519118516, "grad_norm": 1.7508892127444393, "learning_rate": 3.1113993370802716e-06, "loss": 0.5522, "step": 2160 }, { "epoch": 0.031275672715218424, "grad_norm": 5.319384304351234, "learning_rate": 3.1258106355382623e-06, "loss": 0.5153, "step": 2170 }, { "epoch": 0.03141980023925169, "grad_norm": 1.835302390118333, "learning_rate": 3.140221933996253e-06, "loss": 0.5437, "step": 2180 }, { "epoch": 0.031563927763284955, "grad_norm": 1.9041082542446799, "learning_rate": 3.1546332324542444e-06, "loss": 0.5559, "step": 2190 }, { "epoch": 0.03170805528731822, "grad_norm": 1.8540191848090755, "learning_rate": 3.1690445309122355e-06, "loss": 0.5653, "step": 2200 }, { "epoch": 0.031852182811351486, "grad_norm": 1.7937222616274862, "learning_rate": 3.1834558293702265e-06, "loss": 0.512, "step": 2210 }, { "epoch": 0.03199631033538475, "grad_norm": 2.4029545865247393, "learning_rate": 3.1978671278282176e-06, "loss": 0.5413, "step": 2220 }, { "epoch": 0.03214043785941801, "grad_norm": 2.003701287978441, "learning_rate": 3.212278426286209e-06, "loss": 0.5544, "step": 2230 }, { "epoch": 0.03228456538345128, "grad_norm": 1.7241653228212495, "learning_rate": 3.2266897247441997e-06, "loss": 0.522, "step": 2240 }, { "epoch": 0.03242869290748454, "grad_norm": 2.0586547400526887, "learning_rate": 3.2411010232021903e-06, "loss": 0.5365, "step": 2250 }, { "epoch": 0.03257282043151781, "grad_norm": 1.7084273904141078, "learning_rate": 3.255512321660182e-06, "loss": 0.5373, "step": 2260 }, { "epoch": 0.03271694795555107, "grad_norm": 3.1683216526097873, "learning_rate": 3.269923620118173e-06, "loss": 0.5301, "step": 2270 }, { "epoch": 0.032861075479584334, "grad_norm": 1.9970152627953215, "learning_rate": 3.284334918576164e-06, "loss": 0.5362, "step": 2280 }, { "epoch": 0.0330052030036176, "grad_norm": 2.020548017568989, "learning_rate": 3.298746217034155e-06, "loss": 0.5466, "step": 2290 }, { "epoch": 0.033149330527650865, "grad_norm": 1.893327714077033, "learning_rate": 3.3131575154921465e-06, "loss": 0.5551, "step": 2300 }, { "epoch": 0.03329345805168413, "grad_norm": 1.9620945532931866, "learning_rate": 3.327568813950137e-06, "loss": 0.5697, "step": 2310 }, { "epoch": 0.033437585575717396, "grad_norm": 1.8763030701905103, "learning_rate": 3.3419801124081282e-06, "loss": 0.5677, "step": 2320 }, { "epoch": 0.03358171309975066, "grad_norm": 1.6978589645669804, "learning_rate": 3.3563914108661193e-06, "loss": 0.5598, "step": 2330 }, { "epoch": 0.03372584062378393, "grad_norm": 1.7577601833425427, "learning_rate": 3.3708027093241103e-06, "loss": 0.577, "step": 2340 }, { "epoch": 0.03386996814781719, "grad_norm": 1.9723820990615915, "learning_rate": 3.385214007782102e-06, "loss": 0.5473, "step": 2350 }, { "epoch": 0.03401409567185045, "grad_norm": 1.5312819061592722, "learning_rate": 3.3996253062400925e-06, "loss": 0.5201, "step": 2360 }, { "epoch": 0.03415822319588372, "grad_norm": 3.031453779776954, "learning_rate": 3.414036604698084e-06, "loss": 0.526, "step": 2370 }, { "epoch": 0.03430235071991698, "grad_norm": 1.7829908438031097, "learning_rate": 3.4284479031560746e-06, "loss": 0.5514, "step": 2380 }, { "epoch": 0.034446478243950245, "grad_norm": 2.505165858646116, "learning_rate": 3.4428592016140657e-06, "loss": 0.5509, "step": 2390 }, { "epoch": 0.034590605767983514, "grad_norm": 2.0006738019512698, "learning_rate": 3.4572705000720567e-06, "loss": 0.5644, "step": 2400 }, { "epoch": 0.034734733292016776, "grad_norm": 1.6566629795889591, "learning_rate": 3.4716817985300478e-06, "loss": 0.5429, "step": 2410 }, { "epoch": 0.03487886081605004, "grad_norm": 1.8237802843634847, "learning_rate": 3.4860930969880393e-06, "loss": 0.5638, "step": 2420 }, { "epoch": 0.03502298834008331, "grad_norm": 1.8339204310658102, "learning_rate": 3.50050439544603e-06, "loss": 0.5289, "step": 2430 }, { "epoch": 0.03516711586411657, "grad_norm": 1.9070324600178938, "learning_rate": 3.5149156939040206e-06, "loss": 0.5408, "step": 2440 }, { "epoch": 0.03531124338814984, "grad_norm": 1.951447126700897, "learning_rate": 3.529326992362012e-06, "loss": 0.5567, "step": 2450 }, { "epoch": 0.0354553709121831, "grad_norm": 1.5941858979437709, "learning_rate": 3.543738290820003e-06, "loss": 0.5214, "step": 2460 }, { "epoch": 0.03559949843621636, "grad_norm": 2.015071978387826, "learning_rate": 3.558149589277994e-06, "loss": 0.542, "step": 2470 }, { "epoch": 0.03574362596024963, "grad_norm": 1.694582055769199, "learning_rate": 3.5725608877359852e-06, "loss": 0.512, "step": 2480 }, { "epoch": 0.03588775348428289, "grad_norm": 2.0590891715963267, "learning_rate": 3.5869721861939767e-06, "loss": 0.5355, "step": 2490 }, { "epoch": 0.036031881008316155, "grad_norm": 1.9816345057918725, "learning_rate": 3.6013834846519674e-06, "loss": 0.5109, "step": 2500 }, { "epoch": 0.036176008532349424, "grad_norm": 1.925747298324837, "learning_rate": 3.6157947831099584e-06, "loss": 0.5378, "step": 2510 }, { "epoch": 0.036320136056382686, "grad_norm": 1.8356628378924813, "learning_rate": 3.6302060815679495e-06, "loss": 0.5644, "step": 2520 }, { "epoch": 0.036464263580415955, "grad_norm": 1.87177415244673, "learning_rate": 3.6446173800259405e-06, "loss": 0.567, "step": 2530 }, { "epoch": 0.03660839110444922, "grad_norm": 2.3573030266465396, "learning_rate": 3.659028678483932e-06, "loss": 0.5178, "step": 2540 }, { "epoch": 0.03675251862848248, "grad_norm": 2.3404398621401814, "learning_rate": 3.6734399769419227e-06, "loss": 0.5422, "step": 2550 }, { "epoch": 0.03689664615251575, "grad_norm": 1.8258515652851137, "learning_rate": 3.687851275399914e-06, "loss": 0.5214, "step": 2560 }, { "epoch": 0.03704077367654901, "grad_norm": 1.8102895179712952, "learning_rate": 3.702262573857905e-06, "loss": 0.5705, "step": 2570 }, { "epoch": 0.03718490120058227, "grad_norm": 1.920743331644228, "learning_rate": 3.716673872315896e-06, "loss": 0.5686, "step": 2580 }, { "epoch": 0.03732902872461554, "grad_norm": 1.844201670422106, "learning_rate": 3.731085170773887e-06, "loss": 0.5603, "step": 2590 }, { "epoch": 0.0374731562486488, "grad_norm": 1.9835049660778645, "learning_rate": 3.745496469231878e-06, "loss": 0.5451, "step": 2600 }, { "epoch": 0.03761728377268207, "grad_norm": 1.6965963345482522, "learning_rate": 3.7599077676898695e-06, "loss": 0.522, "step": 2610 }, { "epoch": 0.037761411296715335, "grad_norm": 2.177135060568417, "learning_rate": 3.77431906614786e-06, "loss": 0.579, "step": 2620 }, { "epoch": 0.0379055388207486, "grad_norm": 1.9329875138140442, "learning_rate": 3.7887303646058516e-06, "loss": 0.5488, "step": 2630 }, { "epoch": 0.038049666344781866, "grad_norm": 1.8092400476281032, "learning_rate": 3.8031416630638422e-06, "loss": 0.5274, "step": 2640 }, { "epoch": 0.03819379386881513, "grad_norm": 2.0170508979200057, "learning_rate": 3.817552961521833e-06, "loss": 0.5151, "step": 2650 }, { "epoch": 0.03833792139284839, "grad_norm": 1.8558658903155256, "learning_rate": 3.831964259979825e-06, "loss": 0.5213, "step": 2660 }, { "epoch": 0.03848204891688166, "grad_norm": 2.046962293923311, "learning_rate": 3.846375558437815e-06, "loss": 0.5583, "step": 2670 }, { "epoch": 0.03862617644091492, "grad_norm": 2.1153842648430223, "learning_rate": 3.860786856895807e-06, "loss": 0.5243, "step": 2680 }, { "epoch": 0.03877030396494818, "grad_norm": 1.6003902575171718, "learning_rate": 3.875198155353798e-06, "loss": 0.5191, "step": 2690 }, { "epoch": 0.03891443148898145, "grad_norm": 1.8552296393656451, "learning_rate": 3.889609453811789e-06, "loss": 0.5517, "step": 2700 }, { "epoch": 0.039058559013014714, "grad_norm": 1.729069662511443, "learning_rate": 3.90402075226978e-06, "loss": 0.5388, "step": 2710 }, { "epoch": 0.03920268653704798, "grad_norm": 2.2389034696309538, "learning_rate": 3.91843205072777e-06, "loss": 0.5592, "step": 2720 }, { "epoch": 0.039346814061081245, "grad_norm": 1.9947846246614016, "learning_rate": 3.932843349185762e-06, "loss": 0.5301, "step": 2730 }, { "epoch": 0.03949094158511451, "grad_norm": 1.8039842195680897, "learning_rate": 3.9472546476437525e-06, "loss": 0.5212, "step": 2740 }, { "epoch": 0.039635069109147776, "grad_norm": 1.7371912691733458, "learning_rate": 3.961665946101744e-06, "loss": 0.5293, "step": 2750 }, { "epoch": 0.03977919663318104, "grad_norm": 1.686939945831398, "learning_rate": 3.9760772445597354e-06, "loss": 0.5621, "step": 2760 }, { "epoch": 0.0399233241572143, "grad_norm": 1.8616946945778698, "learning_rate": 3.9904885430177265e-06, "loss": 0.5705, "step": 2770 }, { "epoch": 0.04006745168124757, "grad_norm": 1.7032563271696537, "learning_rate": 4.0048998414757176e-06, "loss": 0.5527, "step": 2780 }, { "epoch": 0.04021157920528083, "grad_norm": 2.7247919492004997, "learning_rate": 4.019311139933708e-06, "loss": 0.5246, "step": 2790 }, { "epoch": 0.0403557067293141, "grad_norm": 2.026670705236375, "learning_rate": 4.0337224383917e-06, "loss": 0.5495, "step": 2800 }, { "epoch": 0.04049983425334736, "grad_norm": 2.0218962395589917, "learning_rate": 4.048133736849691e-06, "loss": 0.5492, "step": 2810 }, { "epoch": 0.040643961777380624, "grad_norm": 1.9191195088515047, "learning_rate": 4.062545035307682e-06, "loss": 0.5347, "step": 2820 }, { "epoch": 0.04078808930141389, "grad_norm": 2.28881978533632, "learning_rate": 4.076956333765673e-06, "loss": 0.5546, "step": 2830 }, { "epoch": 0.040932216825447156, "grad_norm": 1.7608670826707924, "learning_rate": 4.091367632223664e-06, "loss": 0.5349, "step": 2840 }, { "epoch": 0.04107634434948042, "grad_norm": 1.8288518528919047, "learning_rate": 4.105778930681655e-06, "loss": 0.5316, "step": 2850 }, { "epoch": 0.04122047187351369, "grad_norm": 1.759399006916404, "learning_rate": 4.120190229139645e-06, "loss": 0.5401, "step": 2860 }, { "epoch": 0.04136459939754695, "grad_norm": 2.0713713796619375, "learning_rate": 4.134601527597637e-06, "loss": 0.5315, "step": 2870 }, { "epoch": 0.04150872692158021, "grad_norm": 2.07311458434203, "learning_rate": 4.149012826055628e-06, "loss": 0.5282, "step": 2880 }, { "epoch": 0.04165285444561348, "grad_norm": 1.6681969782272115, "learning_rate": 4.163424124513619e-06, "loss": 0.526, "step": 2890 }, { "epoch": 0.04179698196964674, "grad_norm": 1.9529754892447158, "learning_rate": 4.17783542297161e-06, "loss": 0.543, "step": 2900 }, { "epoch": 0.04194110949368001, "grad_norm": 1.6138999673010035, "learning_rate": 4.1922467214296005e-06, "loss": 0.5624, "step": 2910 }, { "epoch": 0.04208523701771327, "grad_norm": 2.8159073394796366, "learning_rate": 4.2066580198875924e-06, "loss": 0.5389, "step": 2920 }, { "epoch": 0.042229364541746535, "grad_norm": 1.7907884480248406, "learning_rate": 4.2210693183455835e-06, "loss": 0.5147, "step": 2930 }, { "epoch": 0.042373492065779804, "grad_norm": 2.108888828634336, "learning_rate": 4.2354806168035746e-06, "loss": 0.5648, "step": 2940 }, { "epoch": 0.042517619589813066, "grad_norm": 2.197148891632498, "learning_rate": 4.249891915261566e-06, "loss": 0.57, "step": 2950 }, { "epoch": 0.04266174711384633, "grad_norm": 1.8903949739498729, "learning_rate": 4.264303213719557e-06, "loss": 0.5527, "step": 2960 }, { "epoch": 0.0428058746378796, "grad_norm": 4.1031392147661645, "learning_rate": 4.278714512177548e-06, "loss": 0.5364, "step": 2970 }, { "epoch": 0.04295000216191286, "grad_norm": 1.7498265391553125, "learning_rate": 4.293125810635538e-06, "loss": 0.5495, "step": 2980 }, { "epoch": 0.04309412968594613, "grad_norm": 1.9286544232225815, "learning_rate": 4.30753710909353e-06, "loss": 0.522, "step": 2990 }, { "epoch": 0.04323825720997939, "grad_norm": 1.9665431093456354, "learning_rate": 4.321948407551521e-06, "loss": 0.5334, "step": 3000 }, { "epoch": 0.04338238473401265, "grad_norm": 2.2413490542145005, "learning_rate": 4.336359706009512e-06, "loss": 0.5615, "step": 3010 }, { "epoch": 0.04352651225804592, "grad_norm": 1.8095148534936072, "learning_rate": 4.350771004467503e-06, "loss": 0.5425, "step": 3020 }, { "epoch": 0.04367063978207918, "grad_norm": 1.8923836833774954, "learning_rate": 4.365182302925494e-06, "loss": 0.5499, "step": 3030 }, { "epoch": 0.043814767306112445, "grad_norm": 2.0602256147024005, "learning_rate": 4.379593601383485e-06, "loss": 0.5543, "step": 3040 }, { "epoch": 0.043958894830145714, "grad_norm": 1.8573182780361777, "learning_rate": 4.3940048998414754e-06, "loss": 0.5572, "step": 3050 }, { "epoch": 0.044103022354178976, "grad_norm": 2.001742907468787, "learning_rate": 4.408416198299467e-06, "loss": 0.5353, "step": 3060 }, { "epoch": 0.044247149878212245, "grad_norm": 2.07466200175124, "learning_rate": 4.422827496757458e-06, "loss": 0.5644, "step": 3070 }, { "epoch": 0.04439127740224551, "grad_norm": 1.5880577653591674, "learning_rate": 4.4372387952154495e-06, "loss": 0.5083, "step": 3080 }, { "epoch": 0.04453540492627877, "grad_norm": 2.0368327507138755, "learning_rate": 4.4516500936734405e-06, "loss": 0.4969, "step": 3090 }, { "epoch": 0.04467953245031204, "grad_norm": 1.9794759192434028, "learning_rate": 4.466061392131432e-06, "loss": 0.5449, "step": 3100 }, { "epoch": 0.0448236599743453, "grad_norm": 1.749333516175665, "learning_rate": 4.480472690589423e-06, "loss": 0.5183, "step": 3110 }, { "epoch": 0.04496778749837856, "grad_norm": 1.9728798708756903, "learning_rate": 4.494883989047414e-06, "loss": 0.5289, "step": 3120 }, { "epoch": 0.04511191502241183, "grad_norm": 1.8462323536275602, "learning_rate": 4.509295287505405e-06, "loss": 0.5055, "step": 3130 }, { "epoch": 0.045256042546445094, "grad_norm": 1.950235665459322, "learning_rate": 4.523706585963396e-06, "loss": 0.5418, "step": 3140 }, { "epoch": 0.045400170070478356, "grad_norm": 1.7934363409952017, "learning_rate": 4.538117884421387e-06, "loss": 0.5515, "step": 3150 }, { "epoch": 0.045544297594511625, "grad_norm": 1.7994105237645774, "learning_rate": 4.552529182879378e-06, "loss": 0.5486, "step": 3160 }, { "epoch": 0.04568842511854489, "grad_norm": 2.0543465495717474, "learning_rate": 4.566940481337369e-06, "loss": 0.5244, "step": 3170 }, { "epoch": 0.045832552642578156, "grad_norm": 1.95397200264783, "learning_rate": 4.58135177979536e-06, "loss": 0.515, "step": 3180 }, { "epoch": 0.04597668016661142, "grad_norm": 2.0344853797117293, "learning_rate": 4.595763078253351e-06, "loss": 0.5283, "step": 3190 }, { "epoch": 0.04612080769064468, "grad_norm": 1.8800509856298193, "learning_rate": 4.610174376711342e-06, "loss": 0.553, "step": 3200 }, { "epoch": 0.04626493521467795, "grad_norm": 1.9645371383098096, "learning_rate": 4.624585675169333e-06, "loss": 0.5125, "step": 3210 }, { "epoch": 0.04640906273871121, "grad_norm": 2.0269614219145913, "learning_rate": 4.638996973627324e-06, "loss": 0.5495, "step": 3220 }, { "epoch": 0.04655319026274447, "grad_norm": 1.9473762505882206, "learning_rate": 4.653408272085315e-06, "loss": 0.529, "step": 3230 }, { "epoch": 0.04669731778677774, "grad_norm": 1.779993003009082, "learning_rate": 4.6678195705433065e-06, "loss": 0.535, "step": 3240 }, { "epoch": 0.046841445310811004, "grad_norm": 2.027331078438228, "learning_rate": 4.6822308690012975e-06, "loss": 0.5229, "step": 3250 }, { "epoch": 0.04698557283484427, "grad_norm": 1.669575887237267, "learning_rate": 4.696642167459289e-06, "loss": 0.5526, "step": 3260 }, { "epoch": 0.047129700358877535, "grad_norm": 4.494346551658855, "learning_rate": 4.71105346591728e-06, "loss": 0.5686, "step": 3270 }, { "epoch": 0.0472738278829108, "grad_norm": 1.907582863704028, "learning_rate": 4.725464764375271e-06, "loss": 0.5483, "step": 3280 }, { "epoch": 0.047417955406944066, "grad_norm": 2.1360284326366217, "learning_rate": 4.739876062833262e-06, "loss": 0.5153, "step": 3290 }, { "epoch": 0.04756208293097733, "grad_norm": 1.9685443817136299, "learning_rate": 4.754287361291253e-06, "loss": 0.5342, "step": 3300 }, { "epoch": 0.04770621045501059, "grad_norm": 1.895495891682974, "learning_rate": 4.768698659749244e-06, "loss": 0.5219, "step": 3310 }, { "epoch": 0.04785033797904386, "grad_norm": 2.0673151755358243, "learning_rate": 4.783109958207235e-06, "loss": 0.5249, "step": 3320 }, { "epoch": 0.04799446550307712, "grad_norm": 1.8466905871750159, "learning_rate": 4.797521256665226e-06, "loss": 0.5177, "step": 3330 }, { "epoch": 0.04813859302711039, "grad_norm": 1.7920878432172447, "learning_rate": 4.811932555123217e-06, "loss": 0.5295, "step": 3340 }, { "epoch": 0.04828272055114365, "grad_norm": 2.0913458172603727, "learning_rate": 4.826343853581208e-06, "loss": 0.5617, "step": 3350 }, { "epoch": 0.048426848075176915, "grad_norm": 1.748377527281263, "learning_rate": 4.840755152039199e-06, "loss": 0.5309, "step": 3360 }, { "epoch": 0.048570975599210184, "grad_norm": 1.7081313645088392, "learning_rate": 4.85516645049719e-06, "loss": 0.5438, "step": 3370 }, { "epoch": 0.048715103123243446, "grad_norm": 1.909469583767992, "learning_rate": 4.869577748955181e-06, "loss": 0.5165, "step": 3380 }, { "epoch": 0.04885923064727671, "grad_norm": 1.826815717615458, "learning_rate": 4.8839890474131724e-06, "loss": 0.5056, "step": 3390 }, { "epoch": 0.04900335817130998, "grad_norm": 1.923448128981492, "learning_rate": 4.8984003458711635e-06, "loss": 0.5385, "step": 3400 }, { "epoch": 0.04914748569534324, "grad_norm": 2.083915497259617, "learning_rate": 4.9128116443291546e-06, "loss": 0.5435, "step": 3410 }, { "epoch": 0.0492916132193765, "grad_norm": 1.7561939731763008, "learning_rate": 4.927222942787146e-06, "loss": 0.5433, "step": 3420 }, { "epoch": 0.04943574074340977, "grad_norm": 1.9654959566108496, "learning_rate": 4.941634241245137e-06, "loss": 0.5519, "step": 3430 }, { "epoch": 0.04957986826744303, "grad_norm": 1.9837424821345682, "learning_rate": 4.956045539703128e-06, "loss": 0.541, "step": 3440 }, { "epoch": 0.0497239957914763, "grad_norm": 1.892480669157962, "learning_rate": 4.970456838161119e-06, "loss": 0.5453, "step": 3450 }, { "epoch": 0.04986812331550956, "grad_norm": 1.7018914098502327, "learning_rate": 4.98486813661911e-06, "loss": 0.5346, "step": 3460 }, { "epoch": 0.050012250839542825, "grad_norm": 2.0328539769314378, "learning_rate": 4.999279435077101e-06, "loss": 0.5528, "step": 3470 }, { "epoch": 0.050156378363576094, "grad_norm": 1.8726363685673524, "learning_rate": 5.013690733535091e-06, "loss": 0.5499, "step": 3480 }, { "epoch": 0.050300505887609356, "grad_norm": 2.4368305212337016, "learning_rate": 5.028102031993083e-06, "loss": 0.5383, "step": 3490 }, { "epoch": 0.05044463341164262, "grad_norm": 2.016839933633307, "learning_rate": 5.042513330451074e-06, "loss": 0.5523, "step": 3500 }, { "epoch": 0.05058876093567589, "grad_norm": 1.756858378358942, "learning_rate": 5.056924628909066e-06, "loss": 0.5512, "step": 3510 }, { "epoch": 0.05073288845970915, "grad_norm": 3.0947806393557094, "learning_rate": 5.071335927367056e-06, "loss": 0.5515, "step": 3520 }, { "epoch": 0.05087701598374242, "grad_norm": 1.892894717994704, "learning_rate": 5.085747225825047e-06, "loss": 0.5391, "step": 3530 }, { "epoch": 0.05102114350777568, "grad_norm": 1.7413487073203087, "learning_rate": 5.100158524283038e-06, "loss": 0.5227, "step": 3540 }, { "epoch": 0.05116527103180894, "grad_norm": 1.9944580643948657, "learning_rate": 5.114569822741029e-06, "loss": 0.5232, "step": 3550 }, { "epoch": 0.05130939855584221, "grad_norm": 1.7314136830571483, "learning_rate": 5.1289811211990205e-06, "loss": 0.5535, "step": 3560 }, { "epoch": 0.051453526079875474, "grad_norm": 1.9039904356543373, "learning_rate": 5.1433924196570116e-06, "loss": 0.5273, "step": 3570 }, { "epoch": 0.051597653603908736, "grad_norm": 1.7762516937815593, "learning_rate": 5.1578037181150035e-06, "loss": 0.5456, "step": 3580 }, { "epoch": 0.051741781127942005, "grad_norm": 2.076711743015058, "learning_rate": 5.172215016572994e-06, "loss": 0.5364, "step": 3590 }, { "epoch": 0.05188590865197527, "grad_norm": 1.7123128850916234, "learning_rate": 5.186626315030985e-06, "loss": 0.524, "step": 3600 }, { "epoch": 0.05203003617600853, "grad_norm": 1.9995547271383296, "learning_rate": 5.201037613488976e-06, "loss": 0.5083, "step": 3610 }, { "epoch": 0.0521741637000418, "grad_norm": 2.2400244258915767, "learning_rate": 5.215448911946966e-06, "loss": 0.558, "step": 3620 }, { "epoch": 0.05231829122407506, "grad_norm": 1.8504718722816458, "learning_rate": 5.229860210404958e-06, "loss": 0.4976, "step": 3630 }, { "epoch": 0.05246241874810833, "grad_norm": 1.8301368444746662, "learning_rate": 5.244271508862949e-06, "loss": 0.5644, "step": 3640 }, { "epoch": 0.05260654627214159, "grad_norm": 1.9299716925461392, "learning_rate": 5.258682807320939e-06, "loss": 0.5341, "step": 3650 }, { "epoch": 0.05275067379617485, "grad_norm": 1.7159960380132446, "learning_rate": 5.273094105778931e-06, "loss": 0.5486, "step": 3660 }, { "epoch": 0.05289480132020812, "grad_norm": 2.249549923625344, "learning_rate": 5.287505404236922e-06, "loss": 0.5586, "step": 3670 }, { "epoch": 0.053038928844241384, "grad_norm": 1.9349481687799475, "learning_rate": 5.301916702694913e-06, "loss": 0.563, "step": 3680 }, { "epoch": 0.053183056368274646, "grad_norm": 1.9628588346596152, "learning_rate": 5.316328001152904e-06, "loss": 0.5285, "step": 3690 }, { "epoch": 0.053327183892307915, "grad_norm": 2.139559494353378, "learning_rate": 5.330739299610895e-06, "loss": 0.5268, "step": 3700 }, { "epoch": 0.05347131141634118, "grad_norm": 1.7755644577187, "learning_rate": 5.3451505980688865e-06, "loss": 0.4915, "step": 3710 }, { "epoch": 0.053615438940374446, "grad_norm": 2.1987976544763574, "learning_rate": 5.359561896526877e-06, "loss": 0.5136, "step": 3720 }, { "epoch": 0.05375956646440771, "grad_norm": 2.160676876421243, "learning_rate": 5.373973194984869e-06, "loss": 0.5425, "step": 3730 }, { "epoch": 0.05390369398844097, "grad_norm": 2.0038161911529135, "learning_rate": 5.38838449344286e-06, "loss": 0.5378, "step": 3740 }, { "epoch": 0.05404782151247424, "grad_norm": 1.8772729306052764, "learning_rate": 5.402795791900851e-06, "loss": 0.5573, "step": 3750 }, { "epoch": 0.0541919490365075, "grad_norm": 1.9441239143655835, "learning_rate": 5.417207090358842e-06, "loss": 0.5403, "step": 3760 }, { "epoch": 0.054336076560540764, "grad_norm": 20.24781982107497, "learning_rate": 5.431618388816833e-06, "loss": 0.5385, "step": 3770 }, { "epoch": 0.05448020408457403, "grad_norm": 1.7069370536512578, "learning_rate": 5.446029687274824e-06, "loss": 0.5366, "step": 3780 }, { "epoch": 0.054624331608607295, "grad_norm": 1.7804179986949065, "learning_rate": 5.460440985732814e-06, "loss": 0.5404, "step": 3790 }, { "epoch": 0.054768459132640564, "grad_norm": 1.8294865128572777, "learning_rate": 5.474852284190806e-06, "loss": 0.5594, "step": 3800 }, { "epoch": 0.054912586656673826, "grad_norm": 2.136350916173363, "learning_rate": 5.489263582648797e-06, "loss": 0.5339, "step": 3810 }, { "epoch": 0.05505671418070709, "grad_norm": 1.7514844648278873, "learning_rate": 5.503674881106789e-06, "loss": 0.5426, "step": 3820 }, { "epoch": 0.05520084170474036, "grad_norm": 1.6538503304650878, "learning_rate": 5.518086179564779e-06, "loss": 0.5222, "step": 3830 }, { "epoch": 0.05534496922877362, "grad_norm": 2.132369341666534, "learning_rate": 5.53249747802277e-06, "loss": 0.5527, "step": 3840 }, { "epoch": 0.05548909675280688, "grad_norm": 2.0253980813214576, "learning_rate": 5.546908776480761e-06, "loss": 0.5499, "step": 3850 }, { "epoch": 0.05563322427684015, "grad_norm": 2.4853098083329277, "learning_rate": 5.5613200749387516e-06, "loss": 0.5119, "step": 3860 }, { "epoch": 0.05577735180087341, "grad_norm": 1.7953554377949, "learning_rate": 5.5757313733967435e-06, "loss": 0.522, "step": 3870 }, { "epoch": 0.055921479324906674, "grad_norm": 1.809996361060591, "learning_rate": 5.5901426718547345e-06, "loss": 0.5644, "step": 3880 }, { "epoch": 0.05606560684893994, "grad_norm": 2.5617872844095366, "learning_rate": 5.6045539703127265e-06, "loss": 0.5307, "step": 3890 }, { "epoch": 0.056209734372973205, "grad_norm": 1.9056286748778126, "learning_rate": 5.618965268770717e-06, "loss": 0.5389, "step": 3900 }, { "epoch": 0.056353861897006474, "grad_norm": 1.960793459729946, "learning_rate": 5.633376567228708e-06, "loss": 0.5298, "step": 3910 }, { "epoch": 0.056497989421039736, "grad_norm": 2.4752561788859544, "learning_rate": 5.647787865686699e-06, "loss": 0.5707, "step": 3920 }, { "epoch": 0.056642116945073, "grad_norm": 1.5768751937154168, "learning_rate": 5.662199164144689e-06, "loss": 0.506, "step": 3930 }, { "epoch": 0.05678624446910627, "grad_norm": 1.8352882966833057, "learning_rate": 5.676610462602681e-06, "loss": 0.5268, "step": 3940 }, { "epoch": 0.05693037199313953, "grad_norm": 1.722485530352799, "learning_rate": 5.691021761060672e-06, "loss": 0.5396, "step": 3950 }, { "epoch": 0.05707449951717279, "grad_norm": 2.189131868031384, "learning_rate": 5.705433059518664e-06, "loss": 0.548, "step": 3960 }, { "epoch": 0.05721862704120606, "grad_norm": 1.5962057672233378, "learning_rate": 5.719844357976654e-06, "loss": 0.5358, "step": 3970 }, { "epoch": 0.05736275456523932, "grad_norm": 1.8168031021094881, "learning_rate": 5.734255656434645e-06, "loss": 0.5095, "step": 3980 }, { "epoch": 0.05750688208927259, "grad_norm": 1.9813812782374354, "learning_rate": 5.748666954892636e-06, "loss": 0.5546, "step": 3990 }, { "epoch": 0.057651009613305854, "grad_norm": 1.898862832389609, "learning_rate": 5.7630782533506265e-06, "loss": 0.5277, "step": 4000 }, { "epoch": 0.057795137137339116, "grad_norm": 2.150501925374042, "learning_rate": 5.777489551808618e-06, "loss": 0.5163, "step": 4010 }, { "epoch": 0.057939264661372385, "grad_norm": 2.0348639622670004, "learning_rate": 5.7919008502666094e-06, "loss": 0.5492, "step": 4020 }, { "epoch": 0.05808339218540565, "grad_norm": 1.6727351179385, "learning_rate": 5.806312148724601e-06, "loss": 0.5334, "step": 4030 }, { "epoch": 0.05822751970943891, "grad_norm": 7.1169776154035596, "learning_rate": 5.8207234471825916e-06, "loss": 0.5238, "step": 4040 }, { "epoch": 0.05837164723347218, "grad_norm": 1.9311943701027507, "learning_rate": 5.835134745640583e-06, "loss": 0.5185, "step": 4050 }, { "epoch": 0.05851577475750544, "grad_norm": 1.9360126848553338, "learning_rate": 5.849546044098574e-06, "loss": 0.5319, "step": 4060 }, { "epoch": 0.05865990228153871, "grad_norm": 2.1578821223813818, "learning_rate": 5.863957342556565e-06, "loss": 0.5687, "step": 4070 }, { "epoch": 0.05880402980557197, "grad_norm": 1.8994862304258433, "learning_rate": 5.878368641014556e-06, "loss": 0.5619, "step": 4080 }, { "epoch": 0.05894815732960523, "grad_norm": 1.990944424024575, "learning_rate": 5.892779939472547e-06, "loss": 0.4988, "step": 4090 }, { "epoch": 0.0590922848536385, "grad_norm": 1.8271614498265167, "learning_rate": 5.907191237930539e-06, "loss": 0.5361, "step": 4100 }, { "epoch": 0.059236412377671764, "grad_norm": 1.9190520427579085, "learning_rate": 5.921602536388529e-06, "loss": 0.5235, "step": 4110 }, { "epoch": 0.059380539901705026, "grad_norm": 1.8273694397460394, "learning_rate": 5.93601383484652e-06, "loss": 0.491, "step": 4120 }, { "epoch": 0.059524667425738295, "grad_norm": 2.0260116677528948, "learning_rate": 5.950425133304511e-06, "loss": 0.5598, "step": 4130 }, { "epoch": 0.05966879494977156, "grad_norm": 1.8837176964527003, "learning_rate": 5.964836431762502e-06, "loss": 0.5473, "step": 4140 }, { "epoch": 0.05981292247380482, "grad_norm": 1.7756601460637638, "learning_rate": 5.979247730220493e-06, "loss": 0.5534, "step": 4150 }, { "epoch": 0.05995704999783809, "grad_norm": 1.4908312428124506, "learning_rate": 5.993659028678484e-06, "loss": 0.5242, "step": 4160 }, { "epoch": 0.06010117752187135, "grad_norm": 1.948755769534712, "learning_rate": 6.008070327136476e-06, "loss": 0.5481, "step": 4170 }, { "epoch": 0.06024530504590462, "grad_norm": 1.5091182748447864, "learning_rate": 6.0224816255944664e-06, "loss": 0.4993, "step": 4180 }, { "epoch": 0.06038943256993788, "grad_norm": 1.669862421696947, "learning_rate": 6.0368929240524575e-06, "loss": 0.5255, "step": 4190 }, { "epoch": 0.060533560093971144, "grad_norm": 1.8455810310462406, "learning_rate": 6.051304222510449e-06, "loss": 0.4994, "step": 4200 }, { "epoch": 0.06067768761800441, "grad_norm": 1.8216506343355505, "learning_rate": 6.06571552096844e-06, "loss": 0.5485, "step": 4210 }, { "epoch": 0.060821815142037675, "grad_norm": 2.0339938192784777, "learning_rate": 6.080126819426431e-06, "loss": 0.5475, "step": 4220 }, { "epoch": 0.06096594266607094, "grad_norm": 1.7687951520352247, "learning_rate": 6.094538117884422e-06, "loss": 0.5237, "step": 4230 }, { "epoch": 0.061110070190104206, "grad_norm": 1.7905795315111708, "learning_rate": 6.108949416342414e-06, "loss": 0.5204, "step": 4240 }, { "epoch": 0.06125419771413747, "grad_norm": 1.9211939812943544, "learning_rate": 6.123360714800404e-06, "loss": 0.545, "step": 4250 }, { "epoch": 0.06139832523817074, "grad_norm": 1.698082838656227, "learning_rate": 6.137772013258395e-06, "loss": 0.5208, "step": 4260 }, { "epoch": 0.061542452762204, "grad_norm": 2.874650285438022, "learning_rate": 6.152183311716387e-06, "loss": 0.5515, "step": 4270 }, { "epoch": 0.06168658028623726, "grad_norm": 1.733523317135266, "learning_rate": 6.166594610174377e-06, "loss": 0.5484, "step": 4280 }, { "epoch": 0.06183070781027053, "grad_norm": 1.6661793066343293, "learning_rate": 6.181005908632368e-06, "loss": 0.5306, "step": 4290 }, { "epoch": 0.06197483533430379, "grad_norm": 2.1152278985512516, "learning_rate": 6.195417207090359e-06, "loss": 0.5707, "step": 4300 }, { "epoch": 0.062118962858337054, "grad_norm": 1.7421014269779413, "learning_rate": 6.209828505548351e-06, "loss": 0.4993, "step": 4310 }, { "epoch": 0.06226309038237032, "grad_norm": 1.8359776002055923, "learning_rate": 6.224239804006341e-06, "loss": 0.522, "step": 4320 }, { "epoch": 0.062407217906403585, "grad_norm": 1.7662563833267644, "learning_rate": 6.238651102464332e-06, "loss": 0.5246, "step": 4330 }, { "epoch": 0.06255134543043685, "grad_norm": 1.8042821271657845, "learning_rate": 6.253062400922324e-06, "loss": 0.5525, "step": 4340 }, { "epoch": 0.06269547295447012, "grad_norm": 2.138642176181497, "learning_rate": 6.2674736993803145e-06, "loss": 0.5629, "step": 4350 }, { "epoch": 0.06283960047850339, "grad_norm": 1.7442859884959994, "learning_rate": 6.281884997838306e-06, "loss": 0.5313, "step": 4360 }, { "epoch": 0.06298372800253664, "grad_norm": 1.8494612141598095, "learning_rate": 6.296296296296297e-06, "loss": 0.5009, "step": 4370 }, { "epoch": 0.06312785552656991, "grad_norm": 1.7308667888250067, "learning_rate": 6.3107075947542886e-06, "loss": 0.5089, "step": 4380 }, { "epoch": 0.06327198305060318, "grad_norm": 1.9240615171690698, "learning_rate": 6.325118893212279e-06, "loss": 0.5477, "step": 4390 }, { "epoch": 0.06341611057463643, "grad_norm": 1.9848546732851444, "learning_rate": 6.33953019167027e-06, "loss": 0.5604, "step": 4400 }, { "epoch": 0.0635602380986697, "grad_norm": 2.0035407395889484, "learning_rate": 6.353941490128262e-06, "loss": 0.5311, "step": 4410 }, { "epoch": 0.06370436562270297, "grad_norm": 1.6653144721374893, "learning_rate": 6.368352788586252e-06, "loss": 0.5616, "step": 4420 }, { "epoch": 0.06384849314673623, "grad_norm": 1.6707448273282763, "learning_rate": 6.382764087044243e-06, "loss": 0.4993, "step": 4430 }, { "epoch": 0.0639926206707695, "grad_norm": 1.7096949639219527, "learning_rate": 6.397175385502234e-06, "loss": 0.5398, "step": 4440 }, { "epoch": 0.06413674819480276, "grad_norm": 2.1163167486972956, "learning_rate": 6.411586683960226e-06, "loss": 0.5499, "step": 4450 }, { "epoch": 0.06428087571883602, "grad_norm": 1.5416981821778326, "learning_rate": 6.425997982418216e-06, "loss": 0.5099, "step": 4460 }, { "epoch": 0.06442500324286929, "grad_norm": 1.6701940432641529, "learning_rate": 6.440409280876207e-06, "loss": 0.5381, "step": 4470 }, { "epoch": 0.06456913076690256, "grad_norm": 2.0839218972363516, "learning_rate": 6.454820579334199e-06, "loss": 0.5388, "step": 4480 }, { "epoch": 0.06471325829093583, "grad_norm": 1.9668682086945497, "learning_rate": 6.469231877792189e-06, "loss": 0.5209, "step": 4490 }, { "epoch": 0.06485738581496908, "grad_norm": 2.0652308889615703, "learning_rate": 6.4836431762501805e-06, "loss": 0.524, "step": 4500 }, { "epoch": 0.06500151333900235, "grad_norm": 1.5966343502228346, "learning_rate": 6.4980544747081715e-06, "loss": 0.5419, "step": 4510 }, { "epoch": 0.06514564086303562, "grad_norm": 2.2879234973477693, "learning_rate": 6.5124657731661635e-06, "loss": 0.5482, "step": 4520 }, { "epoch": 0.06528976838706887, "grad_norm": 1.8595984427029222, "learning_rate": 6.526877071624154e-06, "loss": 0.5257, "step": 4530 }, { "epoch": 0.06543389591110214, "grad_norm": 1.5766156451545525, "learning_rate": 6.541288370082145e-06, "loss": 0.5273, "step": 4540 }, { "epoch": 0.06557802343513541, "grad_norm": 1.909964073581362, "learning_rate": 6.555699668540137e-06, "loss": 0.5322, "step": 4550 }, { "epoch": 0.06572215095916867, "grad_norm": 2.3385304329412886, "learning_rate": 6.570110966998127e-06, "loss": 0.5362, "step": 4560 }, { "epoch": 0.06586627848320194, "grad_norm": 1.9510042444990017, "learning_rate": 6.584522265456118e-06, "loss": 0.541, "step": 4570 }, { "epoch": 0.0660104060072352, "grad_norm": 1.7493048093532648, "learning_rate": 6.59893356391411e-06, "loss": 0.5294, "step": 4580 }, { "epoch": 0.06615453353126846, "grad_norm": 1.8043307141640894, "learning_rate": 6.6133448623721e-06, "loss": 0.5379, "step": 4590 }, { "epoch": 0.06629866105530173, "grad_norm": 1.7614105526857522, "learning_rate": 6.627756160830091e-06, "loss": 0.5023, "step": 4600 }, { "epoch": 0.066442788579335, "grad_norm": 1.7741591863125101, "learning_rate": 6.642167459288082e-06, "loss": 0.529, "step": 4610 }, { "epoch": 0.06658691610336825, "grad_norm": 1.8709288866262723, "learning_rate": 6.656578757746074e-06, "loss": 0.5205, "step": 4620 }, { "epoch": 0.06673104362740152, "grad_norm": 1.6512715913955838, "learning_rate": 6.670990056204064e-06, "loss": 0.526, "step": 4630 }, { "epoch": 0.06687517115143479, "grad_norm": 1.8590856484030247, "learning_rate": 6.685401354662055e-06, "loss": 0.5174, "step": 4640 }, { "epoch": 0.06701929867546805, "grad_norm": 1.7057604448842392, "learning_rate": 6.699812653120047e-06, "loss": 0.5423, "step": 4650 }, { "epoch": 0.06716342619950132, "grad_norm": 2.1882720546558474, "learning_rate": 6.7142239515780375e-06, "loss": 0.56, "step": 4660 }, { "epoch": 0.06730755372353459, "grad_norm": 1.8129917931256923, "learning_rate": 6.7286352500360286e-06, "loss": 0.5561, "step": 4670 }, { "epoch": 0.06745168124756785, "grad_norm": 1.8355753217421684, "learning_rate": 6.74304654849402e-06, "loss": 0.5588, "step": 4680 }, { "epoch": 0.06759580877160111, "grad_norm": 1.4939324884018956, "learning_rate": 6.7574578469520115e-06, "loss": 0.495, "step": 4690 }, { "epoch": 0.06773993629563438, "grad_norm": 2.085233019433524, "learning_rate": 6.771869145410002e-06, "loss": 0.5388, "step": 4700 }, { "epoch": 0.06788406381966765, "grad_norm": 1.8511395365105758, "learning_rate": 6.786280443867993e-06, "loss": 0.5117, "step": 4710 }, { "epoch": 0.0680281913437009, "grad_norm": 2.4305182081441297, "learning_rate": 6.800691742325985e-06, "loss": 0.5688, "step": 4720 }, { "epoch": 0.06817231886773417, "grad_norm": 2.0051181815151304, "learning_rate": 6.815103040783975e-06, "loss": 0.5272, "step": 4730 }, { "epoch": 0.06831644639176744, "grad_norm": 1.8310613553194357, "learning_rate": 6.829514339241966e-06, "loss": 0.5356, "step": 4740 }, { "epoch": 0.0684605739158007, "grad_norm": 1.948073389220678, "learning_rate": 6.843925637699957e-06, "loss": 0.5661, "step": 4750 }, { "epoch": 0.06860470143983396, "grad_norm": 2.0356268057151254, "learning_rate": 6.858336936157949e-06, "loss": 0.5426, "step": 4760 }, { "epoch": 0.06874882896386723, "grad_norm": 1.934837360511284, "learning_rate": 6.872748234615939e-06, "loss": 0.4963, "step": 4770 }, { "epoch": 0.06889295648790049, "grad_norm": 1.71992024024452, "learning_rate": 6.88715953307393e-06, "loss": 0.5185, "step": 4780 }, { "epoch": 0.06903708401193376, "grad_norm": 1.9109405223605018, "learning_rate": 6.901570831531922e-06, "loss": 0.5, "step": 4790 }, { "epoch": 0.06918121153596703, "grad_norm": 1.8786034143352044, "learning_rate": 6.915982129989912e-06, "loss": 0.5304, "step": 4800 }, { "epoch": 0.06932533906000028, "grad_norm": 1.8035362806738047, "learning_rate": 6.9303934284479034e-06, "loss": 0.5363, "step": 4810 }, { "epoch": 0.06946946658403355, "grad_norm": 2.125219531623248, "learning_rate": 6.9448047269058945e-06, "loss": 0.5233, "step": 4820 }, { "epoch": 0.06961359410806682, "grad_norm": 1.8077869323852913, "learning_rate": 6.959216025363886e-06, "loss": 0.53, "step": 4830 }, { "epoch": 0.06975772163210008, "grad_norm": 1.951022711852785, "learning_rate": 6.973627323821877e-06, "loss": 0.5329, "step": 4840 }, { "epoch": 0.06990184915613334, "grad_norm": 2.0703830252567923, "learning_rate": 6.988038622279868e-06, "loss": 0.5439, "step": 4850 }, { "epoch": 0.07004597668016661, "grad_norm": 1.7787826411060008, "learning_rate": 7.00244992073786e-06, "loss": 0.521, "step": 4860 }, { "epoch": 0.07019010420419988, "grad_norm": 1.9032227779425248, "learning_rate": 7.01686121919585e-06, "loss": 0.563, "step": 4870 }, { "epoch": 0.07033423172823314, "grad_norm": 1.8624044292547142, "learning_rate": 7.031272517653841e-06, "loss": 0.5376, "step": 4880 }, { "epoch": 0.0704783592522664, "grad_norm": 1.8270029049155334, "learning_rate": 7.045683816111833e-06, "loss": 0.5263, "step": 4890 }, { "epoch": 0.07062248677629968, "grad_norm": 2.0222897511007605, "learning_rate": 7.060095114569824e-06, "loss": 0.5408, "step": 4900 }, { "epoch": 0.07076661430033293, "grad_norm": 1.5028514097076284, "learning_rate": 7.074506413027814e-06, "loss": 0.5326, "step": 4910 }, { "epoch": 0.0709107418243662, "grad_norm": 1.7746178496190856, "learning_rate": 7.088917711485805e-06, "loss": 0.5565, "step": 4920 }, { "epoch": 0.07105486934839947, "grad_norm": 2.017836114934992, "learning_rate": 7.103329009943797e-06, "loss": 0.5615, "step": 4930 }, { "epoch": 0.07119899687243272, "grad_norm": 1.647542333097911, "learning_rate": 7.117740308401787e-06, "loss": 0.5456, "step": 4940 }, { "epoch": 0.07134312439646599, "grad_norm": 1.7413216202176642, "learning_rate": 7.132151606859778e-06, "loss": 0.5423, "step": 4950 }, { "epoch": 0.07148725192049926, "grad_norm": 1.8777451668696914, "learning_rate": 7.14656290531777e-06, "loss": 0.533, "step": 4960 }, { "epoch": 0.07163137944453252, "grad_norm": 1.660096328947355, "learning_rate": 7.160974203775761e-06, "loss": 0.5383, "step": 4970 }, { "epoch": 0.07177550696856579, "grad_norm": 1.6759600110921489, "learning_rate": 7.1753855022337515e-06, "loss": 0.5162, "step": 4980 }, { "epoch": 0.07191963449259905, "grad_norm": 1.7829592090401205, "learning_rate": 7.189796800691743e-06, "loss": 0.5473, "step": 4990 }, { "epoch": 0.07206376201663231, "grad_norm": 1.5842247076900686, "learning_rate": 7.2042080991497345e-06, "loss": 0.5096, "step": 5000 }, { "epoch": 0.07220788954066558, "grad_norm": 1.7897505123093747, "learning_rate": 7.218619397607725e-06, "loss": 0.5374, "step": 5010 }, { "epoch": 0.07235201706469885, "grad_norm": 2.236540082847642, "learning_rate": 7.233030696065716e-06, "loss": 0.574, "step": 5020 }, { "epoch": 0.07249614458873212, "grad_norm": 1.9882697998659122, "learning_rate": 7.247441994523708e-06, "loss": 0.5693, "step": 5030 }, { "epoch": 0.07264027211276537, "grad_norm": 1.819414428586229, "learning_rate": 7.261853292981699e-06, "loss": 0.5391, "step": 5040 }, { "epoch": 0.07278439963679864, "grad_norm": 1.7273209289270848, "learning_rate": 7.276264591439689e-06, "loss": 0.5333, "step": 5050 }, { "epoch": 0.07292852716083191, "grad_norm": 1.5288533808490188, "learning_rate": 7.29067588989768e-06, "loss": 0.4971, "step": 5060 }, { "epoch": 0.07307265468486517, "grad_norm": 1.9810441406635917, "learning_rate": 7.305087188355672e-06, "loss": 0.5749, "step": 5070 }, { "epoch": 0.07321678220889843, "grad_norm": 2.0146176943228737, "learning_rate": 7.319498486813662e-06, "loss": 0.5144, "step": 5080 }, { "epoch": 0.0733609097329317, "grad_norm": 2.122422545745788, "learning_rate": 7.333909785271653e-06, "loss": 0.5319, "step": 5090 }, { "epoch": 0.07350503725696496, "grad_norm": 1.8830840284752108, "learning_rate": 7.348321083729645e-06, "loss": 0.5186, "step": 5100 }, { "epoch": 0.07364916478099823, "grad_norm": 1.8875604010836786, "learning_rate": 7.362732382187636e-06, "loss": 0.5368, "step": 5110 }, { "epoch": 0.0737932923050315, "grad_norm": 2.1570390826015884, "learning_rate": 7.377143680645626e-06, "loss": 0.5446, "step": 5120 }, { "epoch": 0.07393741982906475, "grad_norm": 2.0817161538807314, "learning_rate": 7.3915549791036175e-06, "loss": 0.5451, "step": 5130 }, { "epoch": 0.07408154735309802, "grad_norm": 1.8572939503962713, "learning_rate": 7.405966277561609e-06, "loss": 0.5163, "step": 5140 }, { "epoch": 0.07422567487713129, "grad_norm": 1.600217947180802, "learning_rate": 7.4203775760196e-06, "loss": 0.5427, "step": 5150 }, { "epoch": 0.07436980240116454, "grad_norm": 1.963801565868538, "learning_rate": 7.434788874477591e-06, "loss": 0.5254, "step": 5160 }, { "epoch": 0.07451392992519781, "grad_norm": 1.8664500344451156, "learning_rate": 7.449200172935583e-06, "loss": 0.5356, "step": 5170 }, { "epoch": 0.07465805744923108, "grad_norm": 1.4856629585559646, "learning_rate": 7.463611471393574e-06, "loss": 0.5402, "step": 5180 }, { "epoch": 0.07480218497326434, "grad_norm": 2.001472683157249, "learning_rate": 7.478022769851564e-06, "loss": 0.5405, "step": 5190 }, { "epoch": 0.0749463124972976, "grad_norm": 1.8291295458246353, "learning_rate": 7.492434068309555e-06, "loss": 0.5524, "step": 5200 }, { "epoch": 0.07509044002133088, "grad_norm": 1.685593722250548, "learning_rate": 7.506845366767547e-06, "loss": 0.521, "step": 5210 }, { "epoch": 0.07523456754536414, "grad_norm": 1.931205724857888, "learning_rate": 7.521256665225537e-06, "loss": 0.5157, "step": 5220 }, { "epoch": 0.0753786950693974, "grad_norm": 2.530168256051908, "learning_rate": 7.535667963683528e-06, "loss": 0.5318, "step": 5230 }, { "epoch": 0.07552282259343067, "grad_norm": 1.6278129364843004, "learning_rate": 7.55007926214152e-06, "loss": 0.5476, "step": 5240 }, { "epoch": 0.07566695011746394, "grad_norm": 1.7849432824939047, "learning_rate": 7.564490560599511e-06, "loss": 0.5412, "step": 5250 }, { "epoch": 0.0758110776414972, "grad_norm": 1.7899640369161618, "learning_rate": 7.578901859057501e-06, "loss": 0.5268, "step": 5260 }, { "epoch": 0.07595520516553046, "grad_norm": 1.8711452703869986, "learning_rate": 7.593313157515493e-06, "loss": 0.5248, "step": 5270 }, { "epoch": 0.07609933268956373, "grad_norm": 1.6890115397104026, "learning_rate": 7.607724455973484e-06, "loss": 0.5213, "step": 5280 }, { "epoch": 0.07624346021359699, "grad_norm": 1.9441693007021394, "learning_rate": 7.6221357544314745e-06, "loss": 0.5474, "step": 5290 }, { "epoch": 0.07638758773763026, "grad_norm": 1.881219716981986, "learning_rate": 7.636547052889466e-06, "loss": 0.513, "step": 5300 }, { "epoch": 0.07653171526166352, "grad_norm": 1.5054781990396469, "learning_rate": 7.650958351347457e-06, "loss": 0.5217, "step": 5310 }, { "epoch": 0.07667584278569678, "grad_norm": 1.8782780275647675, "learning_rate": 7.665369649805449e-06, "loss": 0.5409, "step": 5320 }, { "epoch": 0.07681997030973005, "grad_norm": 1.947357881553378, "learning_rate": 7.67978094826344e-06, "loss": 0.5526, "step": 5330 }, { "epoch": 0.07696409783376332, "grad_norm": 1.7854097236298743, "learning_rate": 7.69419224672143e-06, "loss": 0.5289, "step": 5340 }, { "epoch": 0.07710822535779657, "grad_norm": 1.7300381826350495, "learning_rate": 7.708603545179422e-06, "loss": 0.5414, "step": 5350 }, { "epoch": 0.07725235288182984, "grad_norm": 1.577691403860615, "learning_rate": 7.723014843637413e-06, "loss": 0.5238, "step": 5360 }, { "epoch": 0.07739648040586311, "grad_norm": 1.7278192900780656, "learning_rate": 7.737426142095404e-06, "loss": 0.5155, "step": 5370 }, { "epoch": 0.07754060792989637, "grad_norm": 1.6772245402786692, "learning_rate": 7.751837440553395e-06, "loss": 0.5057, "step": 5380 }, { "epoch": 0.07768473545392963, "grad_norm": 2.1129755912470163, "learning_rate": 7.766248739011386e-06, "loss": 0.5309, "step": 5390 }, { "epoch": 0.0778288629779629, "grad_norm": 1.8289613466091001, "learning_rate": 7.780660037469377e-06, "loss": 0.5445, "step": 5400 }, { "epoch": 0.07797299050199617, "grad_norm": 1.5902878717671793, "learning_rate": 7.795071335927368e-06, "loss": 0.5058, "step": 5410 }, { "epoch": 0.07811711802602943, "grad_norm": 1.787503238195541, "learning_rate": 7.80948263438536e-06, "loss": 0.5335, "step": 5420 }, { "epoch": 0.0782612455500627, "grad_norm": 1.5563594670674519, "learning_rate": 7.82389393284335e-06, "loss": 0.5074, "step": 5430 }, { "epoch": 0.07840537307409597, "grad_norm": 1.842707232362684, "learning_rate": 7.838305231301341e-06, "loss": 0.547, "step": 5440 }, { "epoch": 0.07854950059812922, "grad_norm": 1.8454444501757257, "learning_rate": 7.852716529759332e-06, "loss": 0.511, "step": 5450 }, { "epoch": 0.07869362812216249, "grad_norm": 2.076229182625897, "learning_rate": 7.867127828217323e-06, "loss": 0.5392, "step": 5460 }, { "epoch": 0.07883775564619576, "grad_norm": 2.208272265410678, "learning_rate": 7.881539126675314e-06, "loss": 0.5475, "step": 5470 }, { "epoch": 0.07898188317022901, "grad_norm": 1.662123408519741, "learning_rate": 7.895950425133306e-06, "loss": 0.5084, "step": 5480 }, { "epoch": 0.07912601069426228, "grad_norm": 1.7005033536668743, "learning_rate": 7.910361723591297e-06, "loss": 0.5147, "step": 5490 }, { "epoch": 0.07927013821829555, "grad_norm": 1.9167425773824684, "learning_rate": 7.924773022049288e-06, "loss": 0.5589, "step": 5500 }, { "epoch": 0.07941426574232881, "grad_norm": 1.570640665046883, "learning_rate": 7.939184320507279e-06, "loss": 0.5068, "step": 5510 }, { "epoch": 0.07955839326636208, "grad_norm": 1.7798532559246043, "learning_rate": 7.95359561896527e-06, "loss": 0.5428, "step": 5520 }, { "epoch": 0.07970252079039535, "grad_norm": 1.675456326759839, "learning_rate": 7.96800691742326e-06, "loss": 0.533, "step": 5530 }, { "epoch": 0.0798466483144286, "grad_norm": 1.9386912022316687, "learning_rate": 7.982418215881252e-06, "loss": 0.5092, "step": 5540 }, { "epoch": 0.07999077583846187, "grad_norm": 1.96561979082192, "learning_rate": 7.996829514339243e-06, "loss": 0.5443, "step": 5550 }, { "epoch": 0.08013490336249514, "grad_norm": 1.588098979043936, "learning_rate": 8.011240812797234e-06, "loss": 0.5354, "step": 5560 }, { "epoch": 0.0802790308865284, "grad_norm": 1.904498518010621, "learning_rate": 8.025652111255225e-06, "loss": 0.538, "step": 5570 }, { "epoch": 0.08042315841056166, "grad_norm": 1.7982394348350277, "learning_rate": 8.040063409713216e-06, "loss": 0.5336, "step": 5580 }, { "epoch": 0.08056728593459493, "grad_norm": 1.5693707712152516, "learning_rate": 8.054474708171207e-06, "loss": 0.5448, "step": 5590 }, { "epoch": 0.0807114134586282, "grad_norm": 1.8826796656693146, "learning_rate": 8.068886006629197e-06, "loss": 0.521, "step": 5600 }, { "epoch": 0.08085554098266146, "grad_norm": 1.873358148631293, "learning_rate": 8.08329730508719e-06, "loss": 0.5165, "step": 5610 }, { "epoch": 0.08099966850669472, "grad_norm": 1.7330957208025286, "learning_rate": 8.09770860354518e-06, "loss": 0.5084, "step": 5620 }, { "epoch": 0.081143796030728, "grad_norm": 1.5991378202262287, "learning_rate": 8.112119902003172e-06, "loss": 0.5235, "step": 5630 }, { "epoch": 0.08128792355476125, "grad_norm": 1.7948437716721168, "learning_rate": 8.126531200461163e-06, "loss": 0.546, "step": 5640 }, { "epoch": 0.08143205107879452, "grad_norm": 1.7864320339226158, "learning_rate": 8.140942498919154e-06, "loss": 0.5141, "step": 5650 }, { "epoch": 0.08157617860282779, "grad_norm": 1.7751557740330326, "learning_rate": 8.155353797377145e-06, "loss": 0.5238, "step": 5660 }, { "epoch": 0.08172030612686104, "grad_norm": 1.6666481176010033, "learning_rate": 8.169765095835134e-06, "loss": 0.5021, "step": 5670 }, { "epoch": 0.08186443365089431, "grad_norm": 1.9291214531540661, "learning_rate": 8.184176394293127e-06, "loss": 0.5361, "step": 5680 }, { "epoch": 0.08200856117492758, "grad_norm": 1.6227918775727312, "learning_rate": 8.198587692751118e-06, "loss": 0.5139, "step": 5690 }, { "epoch": 0.08215268869896084, "grad_norm": 2.2007065112622026, "learning_rate": 8.212998991209109e-06, "loss": 0.553, "step": 5700 }, { "epoch": 0.0822968162229941, "grad_norm": 1.8063380813760475, "learning_rate": 8.2274102896671e-06, "loss": 0.5358, "step": 5710 }, { "epoch": 0.08244094374702737, "grad_norm": 1.9662208883433, "learning_rate": 8.241821588125091e-06, "loss": 0.5291, "step": 5720 }, { "epoch": 0.08258507127106063, "grad_norm": 1.7959386903806303, "learning_rate": 8.256232886583082e-06, "loss": 0.5028, "step": 5730 }, { "epoch": 0.0827291987950939, "grad_norm": 1.6185918086791968, "learning_rate": 8.270644185041073e-06, "loss": 0.5334, "step": 5740 }, { "epoch": 0.08287332631912717, "grad_norm": 1.8929291293054358, "learning_rate": 8.285055483499064e-06, "loss": 0.5361, "step": 5750 }, { "epoch": 0.08301745384316042, "grad_norm": 2.5711773459536342, "learning_rate": 8.299466781957055e-06, "loss": 0.5038, "step": 5760 }, { "epoch": 0.08316158136719369, "grad_norm": 2.021806217367333, "learning_rate": 8.313878080415046e-06, "loss": 0.5563, "step": 5770 }, { "epoch": 0.08330570889122696, "grad_norm": 1.8915463945846578, "learning_rate": 8.328289378873037e-06, "loss": 0.545, "step": 5780 }, { "epoch": 0.08344983641526023, "grad_norm": 1.789715713535288, "learning_rate": 8.342700677331029e-06, "loss": 0.5477, "step": 5790 }, { "epoch": 0.08359396393929348, "grad_norm": 1.9034152139020692, "learning_rate": 8.35711197578902e-06, "loss": 0.5476, "step": 5800 }, { "epoch": 0.08373809146332675, "grad_norm": 1.5461896508477144, "learning_rate": 8.37152327424701e-06, "loss": 0.5348, "step": 5810 }, { "epoch": 0.08388221898736002, "grad_norm": 2.0722106467611194, "learning_rate": 8.385934572705002e-06, "loss": 0.5431, "step": 5820 }, { "epoch": 0.08402634651139328, "grad_norm": 1.5623992781745188, "learning_rate": 8.400345871162993e-06, "loss": 0.5437, "step": 5830 }, { "epoch": 0.08417047403542655, "grad_norm": 1.8369394432548305, "learning_rate": 8.414757169620984e-06, "loss": 0.5146, "step": 5840 }, { "epoch": 0.08431460155945981, "grad_norm": 1.8738492226066634, "learning_rate": 8.429168468078975e-06, "loss": 0.5435, "step": 5850 }, { "epoch": 0.08445872908349307, "grad_norm": 2.2458965516952416, "learning_rate": 8.443579766536966e-06, "loss": 0.5307, "step": 5860 }, { "epoch": 0.08460285660752634, "grad_norm": 1.8393183669377977, "learning_rate": 8.457991064994957e-06, "loss": 0.5174, "step": 5870 }, { "epoch": 0.08474698413155961, "grad_norm": 1.7805298924773227, "learning_rate": 8.472402363452948e-06, "loss": 0.4984, "step": 5880 }, { "epoch": 0.08489111165559286, "grad_norm": 1.5873253911155047, "learning_rate": 8.486813661910939e-06, "loss": 0.5217, "step": 5890 }, { "epoch": 0.08503523917962613, "grad_norm": 1.8510771104423658, "learning_rate": 8.50122496036893e-06, "loss": 0.5369, "step": 5900 }, { "epoch": 0.0851793667036594, "grad_norm": 2.017105142983983, "learning_rate": 8.515636258826921e-06, "loss": 0.5117, "step": 5910 }, { "epoch": 0.08532349422769266, "grad_norm": 1.5846361015713786, "learning_rate": 8.530047557284912e-06, "loss": 0.5336, "step": 5920 }, { "epoch": 0.08546762175172593, "grad_norm": 1.6760899188430327, "learning_rate": 8.544458855742903e-06, "loss": 0.5492, "step": 5930 }, { "epoch": 0.0856117492757592, "grad_norm": 1.4834913280650521, "learning_rate": 8.558870154200894e-06, "loss": 0.5276, "step": 5940 }, { "epoch": 0.08575587679979246, "grad_norm": 1.6996776484883387, "learning_rate": 8.573281452658886e-06, "loss": 0.5357, "step": 5950 }, { "epoch": 0.08590000432382572, "grad_norm": 1.8043238917782727, "learning_rate": 8.587692751116877e-06, "loss": 0.5025, "step": 5960 }, { "epoch": 0.08604413184785899, "grad_norm": 1.5703471681892278, "learning_rate": 8.602104049574868e-06, "loss": 0.5154, "step": 5970 }, { "epoch": 0.08618825937189226, "grad_norm": 1.606341298927273, "learning_rate": 8.616515348032859e-06, "loss": 0.5484, "step": 5980 }, { "epoch": 0.08633238689592551, "grad_norm": 1.643135256063983, "learning_rate": 8.63092664649085e-06, "loss": 0.5337, "step": 5990 }, { "epoch": 0.08647651441995878, "grad_norm": 1.8561029621084761, "learning_rate": 8.645337944948841e-06, "loss": 0.5386, "step": 6000 }, { "epoch": 0.08662064194399205, "grad_norm": 1.8408781470661717, "learning_rate": 8.659749243406832e-06, "loss": 0.5302, "step": 6010 }, { "epoch": 0.0867647694680253, "grad_norm": 1.9575475731734735, "learning_rate": 8.674160541864823e-06, "loss": 0.5416, "step": 6020 }, { "epoch": 0.08690889699205857, "grad_norm": 1.818386768267519, "learning_rate": 8.688571840322814e-06, "loss": 0.5826, "step": 6030 }, { "epoch": 0.08705302451609184, "grad_norm": 1.4680212398504346, "learning_rate": 8.702983138780805e-06, "loss": 0.5099, "step": 6040 }, { "epoch": 0.0871971520401251, "grad_norm": 1.9487646624529726, "learning_rate": 8.717394437238796e-06, "loss": 0.5215, "step": 6050 }, { "epoch": 0.08734127956415837, "grad_norm": 1.7920397872403477, "learning_rate": 8.731805735696787e-06, "loss": 0.5051, "step": 6060 }, { "epoch": 0.08748540708819164, "grad_norm": 2.2004657395961815, "learning_rate": 8.746217034154778e-06, "loss": 0.5618, "step": 6070 }, { "epoch": 0.08762953461222489, "grad_norm": 1.7229951058326598, "learning_rate": 8.76062833261277e-06, "loss": 0.5144, "step": 6080 }, { "epoch": 0.08777366213625816, "grad_norm": 1.916454810715728, "learning_rate": 8.77503963107076e-06, "loss": 0.5225, "step": 6090 }, { "epoch": 0.08791778966029143, "grad_norm": 1.9052728558511467, "learning_rate": 8.789450929528751e-06, "loss": 0.5491, "step": 6100 }, { "epoch": 0.08806191718432468, "grad_norm": 1.9600743991789622, "learning_rate": 8.803862227986743e-06, "loss": 0.5492, "step": 6110 }, { "epoch": 0.08820604470835795, "grad_norm": 1.8263004401817862, "learning_rate": 8.818273526444734e-06, "loss": 0.5458, "step": 6120 }, { "epoch": 0.08835017223239122, "grad_norm": 1.541307547845343, "learning_rate": 8.832684824902725e-06, "loss": 0.5024, "step": 6130 }, { "epoch": 0.08849429975642449, "grad_norm": 1.5025843295170653, "learning_rate": 8.847096123360716e-06, "loss": 0.564, "step": 6140 }, { "epoch": 0.08863842728045775, "grad_norm": 1.8105882134317655, "learning_rate": 8.861507421818707e-06, "loss": 0.5468, "step": 6150 }, { "epoch": 0.08878255480449102, "grad_norm": 1.7228834163080804, "learning_rate": 8.875918720276698e-06, "loss": 0.5612, "step": 6160 }, { "epoch": 0.08892668232852428, "grad_norm": 1.6586565167995062, "learning_rate": 8.890330018734689e-06, "loss": 0.5599, "step": 6170 }, { "epoch": 0.08907080985255754, "grad_norm": 1.6722983164628273, "learning_rate": 8.90474131719268e-06, "loss": 0.5405, "step": 6180 }, { "epoch": 0.08921493737659081, "grad_norm": 1.6464194268564392, "learning_rate": 8.919152615650671e-06, "loss": 0.5316, "step": 6190 }, { "epoch": 0.08935906490062408, "grad_norm": 2.3439713544178913, "learning_rate": 8.933563914108662e-06, "loss": 0.5382, "step": 6200 }, { "epoch": 0.08950319242465733, "grad_norm": 1.54149071608989, "learning_rate": 8.947975212566653e-06, "loss": 0.5119, "step": 6210 }, { "epoch": 0.0896473199486906, "grad_norm": 1.7632827835480627, "learning_rate": 8.962386511024644e-06, "loss": 0.5222, "step": 6220 }, { "epoch": 0.08979144747272387, "grad_norm": 1.796905558957626, "learning_rate": 8.976797809482635e-06, "loss": 0.5265, "step": 6230 }, { "epoch": 0.08993557499675713, "grad_norm": 1.7722442160712681, "learning_rate": 8.991209107940626e-06, "loss": 0.551, "step": 6240 }, { "epoch": 0.0900797025207904, "grad_norm": 1.9180741846313667, "learning_rate": 9.005620406398617e-06, "loss": 0.5181, "step": 6250 }, { "epoch": 0.09022383004482366, "grad_norm": 1.894281656248424, "learning_rate": 9.020031704856609e-06, "loss": 0.5287, "step": 6260 }, { "epoch": 0.09036795756885692, "grad_norm": 2.295889608685035, "learning_rate": 9.0344430033146e-06, "loss": 0.5278, "step": 6270 }, { "epoch": 0.09051208509289019, "grad_norm": 1.5358331881821146, "learning_rate": 9.04885430177259e-06, "loss": 0.5547, "step": 6280 }, { "epoch": 0.09065621261692346, "grad_norm": 2.948259031482607, "learning_rate": 9.063265600230582e-06, "loss": 0.5096, "step": 6290 }, { "epoch": 0.09080034014095671, "grad_norm": 1.9381280103041842, "learning_rate": 9.077676898688573e-06, "loss": 0.5591, "step": 6300 }, { "epoch": 0.09094446766498998, "grad_norm": 1.6184958199187638, "learning_rate": 9.092088197146564e-06, "loss": 0.4998, "step": 6310 }, { "epoch": 0.09108859518902325, "grad_norm": 2.209736980063481, "learning_rate": 9.106499495604555e-06, "loss": 0.5491, "step": 6320 }, { "epoch": 0.09123272271305652, "grad_norm": 1.7264789198883568, "learning_rate": 9.120910794062546e-06, "loss": 0.5388, "step": 6330 }, { "epoch": 0.09137685023708977, "grad_norm": 2.6332690275056243, "learning_rate": 9.135322092520537e-06, "loss": 0.524, "step": 6340 }, { "epoch": 0.09152097776112304, "grad_norm": 1.617778471824364, "learning_rate": 9.149733390978528e-06, "loss": 0.5522, "step": 6350 }, { "epoch": 0.09166510528515631, "grad_norm": 1.6255321530519957, "learning_rate": 9.164144689436519e-06, "loss": 0.5146, "step": 6360 }, { "epoch": 0.09180923280918957, "grad_norm": 1.5059694775109476, "learning_rate": 9.17855598789451e-06, "loss": 0.5094, "step": 6370 }, { "epoch": 0.09195336033322284, "grad_norm": 1.9005046559435599, "learning_rate": 9.192967286352501e-06, "loss": 0.5525, "step": 6380 }, { "epoch": 0.0920974878572561, "grad_norm": 2.127609442111397, "learning_rate": 9.207378584810492e-06, "loss": 0.5033, "step": 6390 }, { "epoch": 0.09224161538128936, "grad_norm": 1.6689652825798116, "learning_rate": 9.221789883268483e-06, "loss": 0.5266, "step": 6400 }, { "epoch": 0.09238574290532263, "grad_norm": 2.0460894258804583, "learning_rate": 9.236201181726474e-06, "loss": 0.5506, "step": 6410 }, { "epoch": 0.0925298704293559, "grad_norm": 2.1920502667254085, "learning_rate": 9.250612480184466e-06, "loss": 0.5433, "step": 6420 }, { "epoch": 0.09267399795338915, "grad_norm": 1.9604661406002089, "learning_rate": 9.265023778642457e-06, "loss": 0.5165, "step": 6430 }, { "epoch": 0.09281812547742242, "grad_norm": 1.7176227103373007, "learning_rate": 9.279435077100448e-06, "loss": 0.5298, "step": 6440 }, { "epoch": 0.09296225300145569, "grad_norm": 1.9164726920645228, "learning_rate": 9.293846375558439e-06, "loss": 0.5493, "step": 6450 }, { "epoch": 0.09310638052548895, "grad_norm": 1.4962807390822495, "learning_rate": 9.30825767401643e-06, "loss": 0.5264, "step": 6460 }, { "epoch": 0.09325050804952222, "grad_norm": 2.433661853160552, "learning_rate": 9.32266897247442e-06, "loss": 0.5556, "step": 6470 }, { "epoch": 0.09339463557355548, "grad_norm": 1.5139459230767514, "learning_rate": 9.337080270932412e-06, "loss": 0.5301, "step": 6480 }, { "epoch": 0.09353876309758874, "grad_norm": 2.0468444808500403, "learning_rate": 9.351491569390403e-06, "loss": 0.5485, "step": 6490 }, { "epoch": 0.09368289062162201, "grad_norm": 1.6825745964057788, "learning_rate": 9.365902867848394e-06, "loss": 0.5412, "step": 6500 }, { "epoch": 0.09382701814565528, "grad_norm": 1.7698113569439535, "learning_rate": 9.380314166306385e-06, "loss": 0.5032, "step": 6510 }, { "epoch": 0.09397114566968855, "grad_norm": 1.4027347656202136, "learning_rate": 9.394725464764376e-06, "loss": 0.5347, "step": 6520 }, { "epoch": 0.0941152731937218, "grad_norm": 1.9206344173828878, "learning_rate": 9.409136763222367e-06, "loss": 0.5226, "step": 6530 }, { "epoch": 0.09425940071775507, "grad_norm": 1.6502828022239844, "learning_rate": 9.423548061680358e-06, "loss": 0.544, "step": 6540 }, { "epoch": 0.09440352824178834, "grad_norm": 1.7233454771823902, "learning_rate": 9.43795936013835e-06, "loss": 0.5593, "step": 6550 }, { "epoch": 0.0945476557658216, "grad_norm": 1.6644919953724262, "learning_rate": 9.45237065859634e-06, "loss": 0.5241, "step": 6560 }, { "epoch": 0.09469178328985486, "grad_norm": 1.7652471093569913, "learning_rate": 9.466781957054331e-06, "loss": 0.5139, "step": 6570 }, { "epoch": 0.09483591081388813, "grad_norm": 1.459182291163728, "learning_rate": 9.481193255512323e-06, "loss": 0.5384, "step": 6580 }, { "epoch": 0.09498003833792139, "grad_norm": 1.6021186968158192, "learning_rate": 9.495604553970314e-06, "loss": 0.5136, "step": 6590 }, { "epoch": 0.09512416586195466, "grad_norm": 1.95545157640721, "learning_rate": 9.510015852428305e-06, "loss": 0.5134, "step": 6600 }, { "epoch": 0.09526829338598793, "grad_norm": 1.7510170007614017, "learning_rate": 9.524427150886296e-06, "loss": 0.5016, "step": 6610 }, { "epoch": 0.09541242091002118, "grad_norm": 1.4258582871932526, "learning_rate": 9.538838449344287e-06, "loss": 0.5244, "step": 6620 }, { "epoch": 0.09555654843405445, "grad_norm": 1.685211759203824, "learning_rate": 9.553249747802278e-06, "loss": 0.5248, "step": 6630 }, { "epoch": 0.09570067595808772, "grad_norm": 1.720658919325262, "learning_rate": 9.567661046260269e-06, "loss": 0.5376, "step": 6640 }, { "epoch": 0.09584480348212097, "grad_norm": 2.042357293111902, "learning_rate": 9.58207234471826e-06, "loss": 0.5451, "step": 6650 }, { "epoch": 0.09598893100615424, "grad_norm": 1.6819467149890122, "learning_rate": 9.596483643176251e-06, "loss": 0.5119, "step": 6660 }, { "epoch": 0.09613305853018751, "grad_norm": 1.7346866625555404, "learning_rate": 9.610894941634242e-06, "loss": 0.5592, "step": 6670 }, { "epoch": 0.09627718605422078, "grad_norm": 1.5244364334185834, "learning_rate": 9.625306240092233e-06, "loss": 0.5151, "step": 6680 }, { "epoch": 0.09642131357825404, "grad_norm": 2.4874687656442416, "learning_rate": 9.639717538550224e-06, "loss": 0.5336, "step": 6690 }, { "epoch": 0.0965654411022873, "grad_norm": 1.4764766515127346, "learning_rate": 9.654128837008215e-06, "loss": 0.4958, "step": 6700 }, { "epoch": 0.09670956862632057, "grad_norm": 1.4884584053209053, "learning_rate": 9.668540135466206e-06, "loss": 0.4942, "step": 6710 }, { "epoch": 0.09685369615035383, "grad_norm": 1.6062798959533968, "learning_rate": 9.682951433924197e-06, "loss": 0.5168, "step": 6720 }, { "epoch": 0.0969978236743871, "grad_norm": 1.6275886864673907, "learning_rate": 9.697362732382188e-06, "loss": 0.5235, "step": 6730 }, { "epoch": 0.09714195119842037, "grad_norm": 1.7708126367337969, "learning_rate": 9.71177403084018e-06, "loss": 0.5264, "step": 6740 }, { "epoch": 0.09728607872245362, "grad_norm": 1.7954002724546871, "learning_rate": 9.72618532929817e-06, "loss": 0.5343, "step": 6750 }, { "epoch": 0.09743020624648689, "grad_norm": 1.567070435745308, "learning_rate": 9.740596627756162e-06, "loss": 0.5394, "step": 6760 }, { "epoch": 0.09757433377052016, "grad_norm": 1.6257476875840848, "learning_rate": 9.755007926214153e-06, "loss": 0.5181, "step": 6770 }, { "epoch": 0.09771846129455342, "grad_norm": 1.670743980232274, "learning_rate": 9.769419224672144e-06, "loss": 0.5311, "step": 6780 }, { "epoch": 0.09786258881858668, "grad_norm": 1.7125575159839392, "learning_rate": 9.783830523130135e-06, "loss": 0.5406, "step": 6790 }, { "epoch": 0.09800671634261995, "grad_norm": 1.7905385172112733, "learning_rate": 9.798241821588126e-06, "loss": 0.5321, "step": 6800 }, { "epoch": 0.09815084386665321, "grad_norm": 1.825392461242357, "learning_rate": 9.812653120046117e-06, "loss": 0.5418, "step": 6810 }, { "epoch": 0.09829497139068648, "grad_norm": 1.6020941951118888, "learning_rate": 9.827064418504108e-06, "loss": 0.535, "step": 6820 }, { "epoch": 0.09843909891471975, "grad_norm": 2.047843679196522, "learning_rate": 9.841475716962099e-06, "loss": 0.5362, "step": 6830 }, { "epoch": 0.098583226438753, "grad_norm": 6.167237797662502, "learning_rate": 9.85588701542009e-06, "loss": 0.5198, "step": 6840 }, { "epoch": 0.09872735396278627, "grad_norm": 1.8380327208940106, "learning_rate": 9.870298313878081e-06, "loss": 0.5259, "step": 6850 }, { "epoch": 0.09887148148681954, "grad_norm": 1.6439304935590902, "learning_rate": 9.884709612336072e-06, "loss": 0.5161, "step": 6860 }, { "epoch": 0.09901560901085281, "grad_norm": 2.0933030301516578, "learning_rate": 9.899120910794063e-06, "loss": 0.5403, "step": 6870 }, { "epoch": 0.09915973653488606, "grad_norm": 1.5394278882699717, "learning_rate": 9.913532209252054e-06, "loss": 0.5043, "step": 6880 }, { "epoch": 0.09930386405891933, "grad_norm": 1.7393201628553396, "learning_rate": 9.927943507710046e-06, "loss": 0.5341, "step": 6890 }, { "epoch": 0.0994479915829526, "grad_norm": 1.5474367548362735, "learning_rate": 9.942354806168037e-06, "loss": 0.5148, "step": 6900 }, { "epoch": 0.09959211910698586, "grad_norm": 1.7279447346418257, "learning_rate": 9.956766104626028e-06, "loss": 0.543, "step": 6910 }, { "epoch": 0.09973624663101913, "grad_norm": 1.5629868508457365, "learning_rate": 9.971177403084019e-06, "loss": 0.4874, "step": 6920 }, { "epoch": 0.0998803741550524, "grad_norm": 1.870782818042444, "learning_rate": 9.98558870154201e-06, "loss": 0.5324, "step": 6930 }, { "epoch": 0.10002450167908565, "grad_norm": 1.417008304605477, "learning_rate": 1e-05, "loss": 0.5291, "step": 6940 }, { "epoch": 0.10016862920311892, "grad_norm": 1.7874372570526766, "learning_rate": 9.999999367211883e-06, "loss": 0.5269, "step": 6950 }, { "epoch": 0.10031275672715219, "grad_norm": 1.5758466573556253, "learning_rate": 9.999997468847694e-06, "loss": 0.5245, "step": 6960 }, { "epoch": 0.10045688425118544, "grad_norm": 1.7203366771261963, "learning_rate": 9.999994304907911e-06, "loss": 0.5105, "step": 6970 }, { "epoch": 0.10060101177521871, "grad_norm": 1.7868415986436368, "learning_rate": 9.999989875393337e-06, "loss": 0.5505, "step": 6980 }, { "epoch": 0.10074513929925198, "grad_norm": 1.9149396103678, "learning_rate": 9.999984180305093e-06, "loss": 0.5286, "step": 6990 }, { "epoch": 0.10088926682328524, "grad_norm": 1.8192727038614163, "learning_rate": 9.999977219644618e-06, "loss": 0.5464, "step": 7000 }, { "epoch": 0.1010333943473185, "grad_norm": 1.9372395495462487, "learning_rate": 9.999968993413678e-06, "loss": 0.5157, "step": 7010 }, { "epoch": 0.10117752187135177, "grad_norm": 1.681140577738144, "learning_rate": 9.999959501614352e-06, "loss": 0.5522, "step": 7020 }, { "epoch": 0.10132164939538503, "grad_norm": 1.7473061072181084, "learning_rate": 9.999948744249043e-06, "loss": 0.5525, "step": 7030 }, { "epoch": 0.1014657769194183, "grad_norm": 1.8108500896467732, "learning_rate": 9.999936721320475e-06, "loss": 0.5358, "step": 7040 }, { "epoch": 0.10160990444345157, "grad_norm": 1.68217265269267, "learning_rate": 9.999923432831692e-06, "loss": 0.5215, "step": 7050 }, { "epoch": 0.10175403196748484, "grad_norm": 2.207799709705657, "learning_rate": 9.999908878786053e-06, "loss": 0.5416, "step": 7060 }, { "epoch": 0.10189815949151809, "grad_norm": 1.6173162455075925, "learning_rate": 9.999893059187247e-06, "loss": 0.5145, "step": 7070 }, { "epoch": 0.10204228701555136, "grad_norm": 1.9315698989636958, "learning_rate": 9.999875974039275e-06, "loss": 0.5341, "step": 7080 }, { "epoch": 0.10218641453958463, "grad_norm": 1.8508091638976965, "learning_rate": 9.999857623346463e-06, "loss": 0.5514, "step": 7090 }, { "epoch": 0.10233054206361789, "grad_norm": 1.6258546490157069, "learning_rate": 9.999838007113455e-06, "loss": 0.5232, "step": 7100 }, { "epoch": 0.10247466958765115, "grad_norm": 1.8264627565846483, "learning_rate": 9.999817125345216e-06, "loss": 0.5423, "step": 7110 }, { "epoch": 0.10261879711168442, "grad_norm": 1.5246684393651728, "learning_rate": 9.999794978047034e-06, "loss": 0.5407, "step": 7120 }, { "epoch": 0.10276292463571768, "grad_norm": 1.9039998649255017, "learning_rate": 9.999771565224512e-06, "loss": 0.5062, "step": 7130 }, { "epoch": 0.10290705215975095, "grad_norm": 1.6706460397366263, "learning_rate": 9.999746886883577e-06, "loss": 0.53, "step": 7140 }, { "epoch": 0.10305117968378422, "grad_norm": 1.8153438503257748, "learning_rate": 9.999720943030476e-06, "loss": 0.5574, "step": 7150 }, { "epoch": 0.10319530720781747, "grad_norm": 1.574498993887471, "learning_rate": 9.999693733671774e-06, "loss": 0.5401, "step": 7160 }, { "epoch": 0.10333943473185074, "grad_norm": 1.9974781650315987, "learning_rate": 9.999665258814359e-06, "loss": 0.5349, "step": 7170 }, { "epoch": 0.10348356225588401, "grad_norm": 1.6532168943105785, "learning_rate": 9.99963551846544e-06, "loss": 0.5242, "step": 7180 }, { "epoch": 0.10362768977991726, "grad_norm": 1.8989469961525791, "learning_rate": 9.999604512632545e-06, "loss": 0.5277, "step": 7190 }, { "epoch": 0.10377181730395053, "grad_norm": 1.8431577169487372, "learning_rate": 9.999572241323519e-06, "loss": 0.5608, "step": 7200 }, { "epoch": 0.1039159448279838, "grad_norm": 1.6798778641389216, "learning_rate": 9.999538704546532e-06, "loss": 0.5151, "step": 7210 }, { "epoch": 0.10406007235201706, "grad_norm": 1.7805596667549806, "learning_rate": 9.999503902310073e-06, "loss": 0.5172, "step": 7220 }, { "epoch": 0.10420419987605033, "grad_norm": 1.7104774302280519, "learning_rate": 9.99946783462295e-06, "loss": 0.5398, "step": 7230 }, { "epoch": 0.1043483274000836, "grad_norm": 1.6169009049894512, "learning_rate": 9.999430501494294e-06, "loss": 0.5189, "step": 7240 }, { "epoch": 0.10449245492411686, "grad_norm": 1.8370645187759385, "learning_rate": 9.999391902933552e-06, "loss": 0.5075, "step": 7250 }, { "epoch": 0.10463658244815012, "grad_norm": 2.4041110686776417, "learning_rate": 9.999352038950497e-06, "loss": 0.4963, "step": 7260 }, { "epoch": 0.10478070997218339, "grad_norm": 1.6393571472983768, "learning_rate": 9.999310909555215e-06, "loss": 0.5521, "step": 7270 }, { "epoch": 0.10492483749621666, "grad_norm": 1.6766923930571511, "learning_rate": 9.99926851475812e-06, "loss": 0.5206, "step": 7280 }, { "epoch": 0.10506896502024991, "grad_norm": 1.8511486129430146, "learning_rate": 9.999224854569944e-06, "loss": 0.5427, "step": 7290 }, { "epoch": 0.10521309254428318, "grad_norm": 1.708570804142864, "learning_rate": 9.999179929001735e-06, "loss": 0.5335, "step": 7300 }, { "epoch": 0.10535722006831645, "grad_norm": 1.5668761073255868, "learning_rate": 9.999133738064863e-06, "loss": 0.5345, "step": 7310 }, { "epoch": 0.1055013475923497, "grad_norm": 1.4517874642604753, "learning_rate": 9.999086281771025e-06, "loss": 0.5381, "step": 7320 }, { "epoch": 0.10564547511638298, "grad_norm": 1.8628745645159135, "learning_rate": 9.999037560132227e-06, "loss": 0.54, "step": 7330 }, { "epoch": 0.10578960264041624, "grad_norm": 1.816867976827627, "learning_rate": 9.998987573160805e-06, "loss": 0.504, "step": 7340 }, { "epoch": 0.1059337301644495, "grad_norm": 1.4712774475398949, "learning_rate": 9.99893632086941e-06, "loss": 0.5287, "step": 7350 }, { "epoch": 0.10607785768848277, "grad_norm": 1.6579812724251384, "learning_rate": 9.998883803271016e-06, "loss": 0.5255, "step": 7360 }, { "epoch": 0.10622198521251604, "grad_norm": 1.8883623867752495, "learning_rate": 9.998830020378913e-06, "loss": 0.5591, "step": 7370 }, { "epoch": 0.10636611273654929, "grad_norm": 1.7871089827241182, "learning_rate": 9.99877497220672e-06, "loss": 0.5473, "step": 7380 }, { "epoch": 0.10651024026058256, "grad_norm": 1.6874518335309874, "learning_rate": 9.998718658768364e-06, "loss": 0.5449, "step": 7390 }, { "epoch": 0.10665436778461583, "grad_norm": 1.6694544990423508, "learning_rate": 9.998661080078103e-06, "loss": 0.5355, "step": 7400 }, { "epoch": 0.1067984953086491, "grad_norm": 1.7190333398345934, "learning_rate": 9.99860223615051e-06, "loss": 0.5333, "step": 7410 }, { "epoch": 0.10694262283268235, "grad_norm": 3.019450520394973, "learning_rate": 9.998542127000479e-06, "loss": 0.5312, "step": 7420 }, { "epoch": 0.10708675035671562, "grad_norm": 1.655134445053616, "learning_rate": 9.998480752643222e-06, "loss": 0.5176, "step": 7430 }, { "epoch": 0.10723087788074889, "grad_norm": 1.6730742119188293, "learning_rate": 9.998418113094279e-06, "loss": 0.5356, "step": 7440 }, { "epoch": 0.10737500540478215, "grad_norm": 1.7851291031876433, "learning_rate": 9.998354208369503e-06, "loss": 0.5497, "step": 7450 }, { "epoch": 0.10751913292881542, "grad_norm": 1.7009728923567038, "learning_rate": 9.998289038485068e-06, "loss": 0.5379, "step": 7460 }, { "epoch": 0.10766326045284869, "grad_norm": 1.6035364780831798, "learning_rate": 9.998222603457468e-06, "loss": 0.5591, "step": 7470 }, { "epoch": 0.10780738797688194, "grad_norm": 1.6940037179558092, "learning_rate": 9.998154903303522e-06, "loss": 0.536, "step": 7480 }, { "epoch": 0.10795151550091521, "grad_norm": 1.7897008577049272, "learning_rate": 9.998085938040367e-06, "loss": 0.5406, "step": 7490 }, { "epoch": 0.10809564302494848, "grad_norm": 1.836733965062655, "learning_rate": 9.998015707685456e-06, "loss": 0.5499, "step": 7500 }, { "epoch": 0.10823977054898173, "grad_norm": 1.7343266644635325, "learning_rate": 9.997944212256565e-06, "loss": 0.5211, "step": 7510 }, { "epoch": 0.108383898073015, "grad_norm": 1.6466684870765482, "learning_rate": 9.997871451771792e-06, "loss": 0.5449, "step": 7520 }, { "epoch": 0.10852802559704827, "grad_norm": 1.6107333685584986, "learning_rate": 9.997797426249555e-06, "loss": 0.5491, "step": 7530 }, { "epoch": 0.10867215312108153, "grad_norm": 1.4917232732600179, "learning_rate": 9.99772213570859e-06, "loss": 0.5263, "step": 7540 }, { "epoch": 0.1088162806451148, "grad_norm": 1.7192249755434812, "learning_rate": 9.997645580167953e-06, "loss": 0.5299, "step": 7550 }, { "epoch": 0.10896040816914807, "grad_norm": 1.6852282585517941, "learning_rate": 9.997567759647022e-06, "loss": 0.541, "step": 7560 }, { "epoch": 0.10910453569318132, "grad_norm": 1.9747481894775645, "learning_rate": 9.997488674165496e-06, "loss": 0.5518, "step": 7570 }, { "epoch": 0.10924866321721459, "grad_norm": 1.6376079080722976, "learning_rate": 9.997408323743389e-06, "loss": 0.5467, "step": 7580 }, { "epoch": 0.10939279074124786, "grad_norm": 1.4773410180269857, "learning_rate": 9.997326708401045e-06, "loss": 0.5879, "step": 7590 }, { "epoch": 0.10953691826528113, "grad_norm": 1.8113016039653294, "learning_rate": 9.997243828159116e-06, "loss": 0.5652, "step": 7600 }, { "epoch": 0.10968104578931438, "grad_norm": 1.5214736025537243, "learning_rate": 9.997159683038584e-06, "loss": 0.5576, "step": 7610 }, { "epoch": 0.10982517331334765, "grad_norm": 1.6562199475649675, "learning_rate": 9.997074273060746e-06, "loss": 0.52, "step": 7620 }, { "epoch": 0.10996930083738092, "grad_norm": 1.6727647483663202, "learning_rate": 9.996987598247221e-06, "loss": 0.5656, "step": 7630 }, { "epoch": 0.11011342836141418, "grad_norm": 1.5757182049316736, "learning_rate": 9.996899658619948e-06, "loss": 0.5195, "step": 7640 }, { "epoch": 0.11025755588544744, "grad_norm": 1.858289800131217, "learning_rate": 9.996810454201185e-06, "loss": 0.5412, "step": 7650 }, { "epoch": 0.11040168340948071, "grad_norm": 1.7358502021801598, "learning_rate": 9.99671998501351e-06, "loss": 0.553, "step": 7660 }, { "epoch": 0.11054581093351397, "grad_norm": 2.3733059692864216, "learning_rate": 9.996628251079825e-06, "loss": 0.5227, "step": 7670 }, { "epoch": 0.11068993845754724, "grad_norm": 1.7422791514960994, "learning_rate": 9.996535252423347e-06, "loss": 0.5427, "step": 7680 }, { "epoch": 0.1108340659815805, "grad_norm": 1.606932350697185, "learning_rate": 9.996440989067617e-06, "loss": 0.528, "step": 7690 }, { "epoch": 0.11097819350561376, "grad_norm": 1.6827708008397346, "learning_rate": 9.996345461036494e-06, "loss": 0.5144, "step": 7700 }, { "epoch": 0.11112232102964703, "grad_norm": 3.200230354165058, "learning_rate": 9.996248668354156e-06, "loss": 0.5151, "step": 7710 }, { "epoch": 0.1112664485536803, "grad_norm": 1.5524247353189395, "learning_rate": 9.996150611045103e-06, "loss": 0.5416, "step": 7720 }, { "epoch": 0.11141057607771356, "grad_norm": 1.5144891947183547, "learning_rate": 9.99605128913416e-06, "loss": 0.557, "step": 7730 }, { "epoch": 0.11155470360174682, "grad_norm": 1.638198576046771, "learning_rate": 9.995950702646458e-06, "loss": 0.5496, "step": 7740 }, { "epoch": 0.1116988311257801, "grad_norm": 1.6467876115318552, "learning_rate": 9.995848851607464e-06, "loss": 0.4839, "step": 7750 }, { "epoch": 0.11184295864981335, "grad_norm": 1.7550849456617894, "learning_rate": 9.995745736042954e-06, "loss": 0.5341, "step": 7760 }, { "epoch": 0.11198708617384662, "grad_norm": 1.7911818552704497, "learning_rate": 9.995641355979032e-06, "loss": 0.5357, "step": 7770 }, { "epoch": 0.11213121369787989, "grad_norm": 1.9212882575841714, "learning_rate": 9.995535711442116e-06, "loss": 0.5307, "step": 7780 }, { "epoch": 0.11227534122191316, "grad_norm": 1.7963843258053904, "learning_rate": 9.995428802458944e-06, "loss": 0.5464, "step": 7790 }, { "epoch": 0.11241946874594641, "grad_norm": 1.464288004226909, "learning_rate": 9.99532062905658e-06, "loss": 0.5535, "step": 7800 }, { "epoch": 0.11256359626997968, "grad_norm": 1.4985945972702615, "learning_rate": 9.995211191262402e-06, "loss": 0.5337, "step": 7810 }, { "epoch": 0.11270772379401295, "grad_norm": 1.2506173580443345, "learning_rate": 9.995100489104112e-06, "loss": 0.5283, "step": 7820 }, { "epoch": 0.1128518513180462, "grad_norm": 1.7606544792320755, "learning_rate": 9.99498852260973e-06, "loss": 0.5275, "step": 7830 }, { "epoch": 0.11299597884207947, "grad_norm": 1.9611905405234424, "learning_rate": 9.994875291807595e-06, "loss": 0.5133, "step": 7840 }, { "epoch": 0.11314010636611274, "grad_norm": 1.900260299553958, "learning_rate": 9.994760796726368e-06, "loss": 0.5781, "step": 7850 }, { "epoch": 0.113284233890146, "grad_norm": 1.5579404855508112, "learning_rate": 9.994645037395031e-06, "loss": 0.5185, "step": 7860 }, { "epoch": 0.11342836141417927, "grad_norm": 1.7963082762246747, "learning_rate": 9.994528013842884e-06, "loss": 0.535, "step": 7870 }, { "epoch": 0.11357248893821253, "grad_norm": 1.6902237419623998, "learning_rate": 9.994409726099546e-06, "loss": 0.5537, "step": 7880 }, { "epoch": 0.11371661646224579, "grad_norm": 1.4110641457773698, "learning_rate": 9.994290174194958e-06, "loss": 0.5307, "step": 7890 }, { "epoch": 0.11386074398627906, "grad_norm": 1.681457769358045, "learning_rate": 9.994169358159381e-06, "loss": 0.5318, "step": 7900 }, { "epoch": 0.11400487151031233, "grad_norm": 1.6145493270351643, "learning_rate": 9.994047278023396e-06, "loss": 0.501, "step": 7910 }, { "epoch": 0.11414899903434558, "grad_norm": 1.437200311869969, "learning_rate": 9.993923933817902e-06, "loss": 0.5223, "step": 7920 }, { "epoch": 0.11429312655837885, "grad_norm": 1.7209958618907815, "learning_rate": 9.993799325574118e-06, "loss": 0.5122, "step": 7930 }, { "epoch": 0.11443725408241212, "grad_norm": 2.142281202938283, "learning_rate": 9.993673453323588e-06, "loss": 0.5281, "step": 7940 }, { "epoch": 0.11458138160644538, "grad_norm": 1.6736602078255731, "learning_rate": 9.993546317098169e-06, "loss": 0.5179, "step": 7950 }, { "epoch": 0.11472550913047865, "grad_norm": 1.6102718399720597, "learning_rate": 9.993417916930043e-06, "loss": 0.5118, "step": 7960 }, { "epoch": 0.11486963665451191, "grad_norm": 1.9557443348413084, "learning_rate": 9.99328825285171e-06, "loss": 0.5677, "step": 7970 }, { "epoch": 0.11501376417854518, "grad_norm": 1.7717035600996525, "learning_rate": 9.993157324895987e-06, "loss": 0.5464, "step": 7980 }, { "epoch": 0.11515789170257844, "grad_norm": 1.7230625461909546, "learning_rate": 9.993025133096017e-06, "loss": 0.5194, "step": 7990 }, { "epoch": 0.11530201922661171, "grad_norm": 1.847332603618014, "learning_rate": 9.99289167748526e-06, "loss": 0.5431, "step": 8000 }, { "epoch": 0.11544614675064498, "grad_norm": 1.9098750089116265, "learning_rate": 9.992756958097493e-06, "loss": 0.5185, "step": 8010 }, { "epoch": 0.11559027427467823, "grad_norm": 1.8376296438227884, "learning_rate": 9.992620974966818e-06, "loss": 0.5209, "step": 8020 }, { "epoch": 0.1157344017987115, "grad_norm": 1.6241116158740048, "learning_rate": 9.992483728127653e-06, "loss": 0.5301, "step": 8030 }, { "epoch": 0.11587852932274477, "grad_norm": 1.6574580928534361, "learning_rate": 9.992345217614738e-06, "loss": 0.5567, "step": 8040 }, { "epoch": 0.11602265684677802, "grad_norm": 1.901756494501504, "learning_rate": 9.992205443463132e-06, "loss": 0.5768, "step": 8050 }, { "epoch": 0.1161667843708113, "grad_norm": 1.6334163988192771, "learning_rate": 9.992064405708214e-06, "loss": 0.5238, "step": 8060 }, { "epoch": 0.11631091189484456, "grad_norm": 3.7515705149894374, "learning_rate": 9.991922104385683e-06, "loss": 0.4951, "step": 8070 }, { "epoch": 0.11645503941887782, "grad_norm": 1.9040534991665512, "learning_rate": 9.991778539531555e-06, "loss": 0.5213, "step": 8080 }, { "epoch": 0.11659916694291109, "grad_norm": 1.7701724908822458, "learning_rate": 9.991633711182174e-06, "loss": 0.5381, "step": 8090 }, { "epoch": 0.11674329446694436, "grad_norm": 1.6098330238710503, "learning_rate": 9.991487619374193e-06, "loss": 0.5191, "step": 8100 }, { "epoch": 0.11688742199097761, "grad_norm": 1.5607383367693388, "learning_rate": 9.991340264144592e-06, "loss": 0.5404, "step": 8110 }, { "epoch": 0.11703154951501088, "grad_norm": 1.603410989487066, "learning_rate": 9.991191645530669e-06, "loss": 0.5248, "step": 8120 }, { "epoch": 0.11717567703904415, "grad_norm": 1.4694052952686167, "learning_rate": 9.991041763570041e-06, "loss": 0.5116, "step": 8130 }, { "epoch": 0.11731980456307742, "grad_norm": 1.5989712167162238, "learning_rate": 9.990890618300648e-06, "loss": 0.5104, "step": 8140 }, { "epoch": 0.11746393208711067, "grad_norm": 1.7917499154766752, "learning_rate": 9.990738209760745e-06, "loss": 0.5438, "step": 8150 }, { "epoch": 0.11760805961114394, "grad_norm": 1.7359860926006179, "learning_rate": 9.990584537988907e-06, "loss": 0.5331, "step": 8160 }, { "epoch": 0.11775218713517721, "grad_norm": 1.5381110148572188, "learning_rate": 9.990429603024032e-06, "loss": 0.5279, "step": 8170 }, { "epoch": 0.11789631465921047, "grad_norm": 1.4331674061930157, "learning_rate": 9.99027340490534e-06, "loss": 0.5325, "step": 8180 }, { "epoch": 0.11804044218324374, "grad_norm": 1.5456694838322123, "learning_rate": 9.990115943672363e-06, "loss": 0.5317, "step": 8190 }, { "epoch": 0.118184569707277, "grad_norm": 1.9777030829684663, "learning_rate": 9.989957219364957e-06, "loss": 0.5308, "step": 8200 }, { "epoch": 0.11832869723131026, "grad_norm": 1.8386856236785947, "learning_rate": 9.989797232023299e-06, "loss": 0.5507, "step": 8210 }, { "epoch": 0.11847282475534353, "grad_norm": 1.5049225566621878, "learning_rate": 9.989635981687886e-06, "loss": 0.5036, "step": 8220 }, { "epoch": 0.1186169522793768, "grad_norm": 1.8481232327726178, "learning_rate": 9.989473468399529e-06, "loss": 0.5525, "step": 8230 }, { "epoch": 0.11876107980341005, "grad_norm": 1.487342733183939, "learning_rate": 9.989309692199366e-06, "loss": 0.5167, "step": 8240 }, { "epoch": 0.11890520732744332, "grad_norm": 1.7299375680798659, "learning_rate": 9.989144653128846e-06, "loss": 0.5371, "step": 8250 }, { "epoch": 0.11904933485147659, "grad_norm": 1.4151982458672274, "learning_rate": 9.98897835122975e-06, "loss": 0.5055, "step": 8260 }, { "epoch": 0.11919346237550985, "grad_norm": 1.7235700646136718, "learning_rate": 9.988810786544167e-06, "loss": 0.5403, "step": 8270 }, { "epoch": 0.11933758989954311, "grad_norm": 1.6692304362799573, "learning_rate": 9.988641959114512e-06, "loss": 0.5409, "step": 8280 }, { "epoch": 0.11948171742357638, "grad_norm": 2.739131812623566, "learning_rate": 9.988471868983515e-06, "loss": 0.4867, "step": 8290 }, { "epoch": 0.11962584494760964, "grad_norm": 1.738005906295289, "learning_rate": 9.988300516194232e-06, "loss": 0.5199, "step": 8300 }, { "epoch": 0.11976997247164291, "grad_norm": 1.5353478113211756, "learning_rate": 9.988127900790033e-06, "loss": 0.5385, "step": 8310 }, { "epoch": 0.11991409999567618, "grad_norm": 1.94752523633879, "learning_rate": 9.98795402281461e-06, "loss": 0.5487, "step": 8320 }, { "epoch": 0.12005822751970945, "grad_norm": 1.5858678673343527, "learning_rate": 9.987778882311974e-06, "loss": 0.5258, "step": 8330 }, { "epoch": 0.1202023550437427, "grad_norm": 1.349652859662599, "learning_rate": 9.987602479326456e-06, "loss": 0.5266, "step": 8340 }, { "epoch": 0.12034648256777597, "grad_norm": 2.1117470442029393, "learning_rate": 9.987424813902706e-06, "loss": 0.5031, "step": 8350 }, { "epoch": 0.12049061009180924, "grad_norm": 1.60703383363991, "learning_rate": 9.987245886085694e-06, "loss": 0.5359, "step": 8360 }, { "epoch": 0.1206347376158425, "grad_norm": 2.3983817701526213, "learning_rate": 9.98706569592071e-06, "loss": 0.5238, "step": 8370 }, { "epoch": 0.12077886513987576, "grad_norm": 1.600113722382585, "learning_rate": 9.986884243453363e-06, "loss": 0.5089, "step": 8380 }, { "epoch": 0.12092299266390903, "grad_norm": 1.522293902268806, "learning_rate": 9.98670152872958e-06, "loss": 0.5462, "step": 8390 }, { "epoch": 0.12106712018794229, "grad_norm": 1.7968857850389028, "learning_rate": 9.986517551795609e-06, "loss": 0.5662, "step": 8400 }, { "epoch": 0.12121124771197556, "grad_norm": 1.5229039495727346, "learning_rate": 9.986332312698019e-06, "loss": 0.534, "step": 8410 }, { "epoch": 0.12135537523600883, "grad_norm": 1.492405910745058, "learning_rate": 9.986145811483693e-06, "loss": 0.5521, "step": 8420 }, { "epoch": 0.12149950276004208, "grad_norm": 1.6425215832243432, "learning_rate": 9.985958048199845e-06, "loss": 0.5595, "step": 8430 }, { "epoch": 0.12164363028407535, "grad_norm": 1.359592278259538, "learning_rate": 9.985769022893991e-06, "loss": 0.5369, "step": 8440 }, { "epoch": 0.12178775780810862, "grad_norm": 1.6079607057660121, "learning_rate": 9.985578735613984e-06, "loss": 0.5323, "step": 8450 }, { "epoch": 0.12193188533214187, "grad_norm": 1.6157307523027062, "learning_rate": 9.985387186407984e-06, "loss": 0.5332, "step": 8460 }, { "epoch": 0.12207601285617514, "grad_norm": 3.00670346411315, "learning_rate": 9.985194375324479e-06, "loss": 0.5308, "step": 8470 }, { "epoch": 0.12222014038020841, "grad_norm": 1.8737644565114768, "learning_rate": 9.985000302412267e-06, "loss": 0.5318, "step": 8480 }, { "epoch": 0.12236426790424167, "grad_norm": 1.485186640450143, "learning_rate": 9.984804967720476e-06, "loss": 0.5274, "step": 8490 }, { "epoch": 0.12250839542827494, "grad_norm": 1.4762183985957191, "learning_rate": 9.984608371298544e-06, "loss": 0.5559, "step": 8500 }, { "epoch": 0.1226525229523082, "grad_norm": 1.6136807892314837, "learning_rate": 9.984410513196237e-06, "loss": 0.5116, "step": 8510 }, { "epoch": 0.12279665047634147, "grad_norm": 1.3698502652630684, "learning_rate": 9.984211393463631e-06, "loss": 0.5087, "step": 8520 }, { "epoch": 0.12294077800037473, "grad_norm": 1.7652210875835268, "learning_rate": 9.984011012151131e-06, "loss": 0.5411, "step": 8530 }, { "epoch": 0.123084905524408, "grad_norm": 1.6134384378154019, "learning_rate": 9.983809369309454e-06, "loss": 0.5178, "step": 8540 }, { "epoch": 0.12322903304844127, "grad_norm": 2.204406172032703, "learning_rate": 9.983606464989638e-06, "loss": 0.5216, "step": 8550 }, { "epoch": 0.12337316057247452, "grad_norm": 1.5239852075826696, "learning_rate": 9.983402299243044e-06, "loss": 0.4976, "step": 8560 }, { "epoch": 0.12351728809650779, "grad_norm": 1.8294069632961867, "learning_rate": 9.983196872121345e-06, "loss": 0.5364, "step": 8570 }, { "epoch": 0.12366141562054106, "grad_norm": 1.6811809006771152, "learning_rate": 9.982990183676543e-06, "loss": 0.5142, "step": 8580 }, { "epoch": 0.12380554314457431, "grad_norm": 1.9643187356159564, "learning_rate": 9.982782233960952e-06, "loss": 0.5456, "step": 8590 }, { "epoch": 0.12394967066860758, "grad_norm": 1.4688294318047102, "learning_rate": 9.982573023027205e-06, "loss": 0.5156, "step": 8600 }, { "epoch": 0.12409379819264085, "grad_norm": 1.555206707675048, "learning_rate": 9.98236255092826e-06, "loss": 0.536, "step": 8610 }, { "epoch": 0.12423792571667411, "grad_norm": 1.7166536965080452, "learning_rate": 9.982150817717388e-06, "loss": 0.5181, "step": 8620 }, { "epoch": 0.12438205324070738, "grad_norm": 1.7025725459950034, "learning_rate": 9.981937823448182e-06, "loss": 0.5341, "step": 8630 }, { "epoch": 0.12452618076474065, "grad_norm": 1.5507916759370004, "learning_rate": 9.981723568174557e-06, "loss": 0.5246, "step": 8640 }, { "epoch": 0.1246703082887739, "grad_norm": 1.719758804461833, "learning_rate": 9.981508051950741e-06, "loss": 0.5114, "step": 8650 }, { "epoch": 0.12481443581280717, "grad_norm": 1.4799270827511561, "learning_rate": 9.981291274831286e-06, "loss": 0.5505, "step": 8660 }, { "epoch": 0.12495856333684044, "grad_norm": 1.8657396681445255, "learning_rate": 9.981073236871062e-06, "loss": 0.5563, "step": 8670 }, { "epoch": 0.1251026908608737, "grad_norm": 1.7345225879976012, "learning_rate": 9.980853938125257e-06, "loss": 0.54, "step": 8680 }, { "epoch": 0.12524681838490698, "grad_norm": 1.5777330427881668, "learning_rate": 9.980633378649378e-06, "loss": 0.5148, "step": 8690 }, { "epoch": 0.12539094590894023, "grad_norm": 1.7491194558165002, "learning_rate": 9.980411558499253e-06, "loss": 0.5176, "step": 8700 }, { "epoch": 0.1255350734329735, "grad_norm": 1.7648147481451293, "learning_rate": 9.980188477731028e-06, "loss": 0.5219, "step": 8710 }, { "epoch": 0.12567920095700677, "grad_norm": 1.4816960168726703, "learning_rate": 9.979964136401167e-06, "loss": 0.5214, "step": 8720 }, { "epoch": 0.12582332848104003, "grad_norm": 1.7162715401283484, "learning_rate": 9.979738534566456e-06, "loss": 0.5175, "step": 8730 }, { "epoch": 0.12596745600507328, "grad_norm": 1.583063320272237, "learning_rate": 9.979511672283999e-06, "loss": 0.5517, "step": 8740 }, { "epoch": 0.12611158352910656, "grad_norm": 1.9451197328768055, "learning_rate": 9.979283549611215e-06, "loss": 0.5475, "step": 8750 }, { "epoch": 0.12625571105313982, "grad_norm": 1.6215306357919952, "learning_rate": 9.979054166605848e-06, "loss": 0.5038, "step": 8760 }, { "epoch": 0.12639983857717307, "grad_norm": 1.6446661181743814, "learning_rate": 9.978823523325959e-06, "loss": 0.5421, "step": 8770 }, { "epoch": 0.12654396610120636, "grad_norm": 1.6027773268164476, "learning_rate": 9.978591619829924e-06, "loss": 0.5169, "step": 8780 }, { "epoch": 0.1266880936252396, "grad_norm": 1.4531042905902065, "learning_rate": 9.978358456176444e-06, "loss": 0.5362, "step": 8790 }, { "epoch": 0.12683222114927287, "grad_norm": 1.7559299657384442, "learning_rate": 9.978124032424534e-06, "loss": 0.5525, "step": 8800 }, { "epoch": 0.12697634867330615, "grad_norm": 1.5623085920492965, "learning_rate": 9.977888348633534e-06, "loss": 0.532, "step": 8810 }, { "epoch": 0.1271204761973394, "grad_norm": 1.7436117496250412, "learning_rate": 9.977651404863094e-06, "loss": 0.5225, "step": 8820 }, { "epoch": 0.12726460372137266, "grad_norm": 1.5894982714441628, "learning_rate": 9.977413201173192e-06, "loss": 0.534, "step": 8830 }, { "epoch": 0.12740873124540594, "grad_norm": 1.709670347938831, "learning_rate": 9.97717373762412e-06, "loss": 0.5198, "step": 8840 }, { "epoch": 0.1275528587694392, "grad_norm": 1.85593619823613, "learning_rate": 9.976933014276491e-06, "loss": 0.5303, "step": 8850 }, { "epoch": 0.12769698629347245, "grad_norm": 1.5378146257798249, "learning_rate": 9.976691031191232e-06, "loss": 0.5131, "step": 8860 }, { "epoch": 0.12784111381750574, "grad_norm": 1.657209962992534, "learning_rate": 9.976447788429597e-06, "loss": 0.5275, "step": 8870 }, { "epoch": 0.127985241341539, "grad_norm": 1.8537191830305242, "learning_rate": 9.976203286053153e-06, "loss": 0.5106, "step": 8880 }, { "epoch": 0.12812936886557225, "grad_norm": 1.4878465462899284, "learning_rate": 9.975957524123786e-06, "loss": 0.5255, "step": 8890 }, { "epoch": 0.12827349638960553, "grad_norm": 1.6152029491016693, "learning_rate": 9.975710502703703e-06, "loss": 0.5377, "step": 8900 }, { "epoch": 0.12841762391363878, "grad_norm": 1.6181228038301394, "learning_rate": 9.975462221855428e-06, "loss": 0.5089, "step": 8910 }, { "epoch": 0.12856175143767204, "grad_norm": 1.6559427461943652, "learning_rate": 9.975212681641806e-06, "loss": 0.5381, "step": 8920 }, { "epoch": 0.12870587896170532, "grad_norm": 1.7060748672416923, "learning_rate": 9.974961882126e-06, "loss": 0.5277, "step": 8930 }, { "epoch": 0.12885000648573858, "grad_norm": 1.4396780476978914, "learning_rate": 9.97470982337149e-06, "loss": 0.5296, "step": 8940 }, { "epoch": 0.12899413400977183, "grad_norm": 1.6802000414869929, "learning_rate": 9.974456505442073e-06, "loss": 0.5287, "step": 8950 }, { "epoch": 0.12913826153380512, "grad_norm": 1.533507316041997, "learning_rate": 9.974201928401874e-06, "loss": 0.5307, "step": 8960 }, { "epoch": 0.12928238905783837, "grad_norm": 1.8133829079682198, "learning_rate": 9.973946092315326e-06, "loss": 0.5538, "step": 8970 }, { "epoch": 0.12942651658187165, "grad_norm": 1.785125107937249, "learning_rate": 9.973688997247186e-06, "loss": 0.55, "step": 8980 }, { "epoch": 0.1295706441059049, "grad_norm": 1.865206657746021, "learning_rate": 9.973430643262526e-06, "loss": 0.5291, "step": 8990 }, { "epoch": 0.12971477162993816, "grad_norm": 1.7472079141728347, "learning_rate": 9.973171030426747e-06, "loss": 0.5289, "step": 9000 }, { "epoch": 0.12985889915397145, "grad_norm": 1.5502223960139028, "learning_rate": 9.972910158805553e-06, "loss": 0.5208, "step": 9010 }, { "epoch": 0.1300030266780047, "grad_norm": 1.5255408697392228, "learning_rate": 9.972648028464977e-06, "loss": 0.5385, "step": 9020 }, { "epoch": 0.13014715420203796, "grad_norm": 1.4642931521972244, "learning_rate": 9.972384639471372e-06, "loss": 0.5188, "step": 9030 }, { "epoch": 0.13029128172607124, "grad_norm": 2.3082210642873853, "learning_rate": 9.9721199918914e-06, "loss": 0.532, "step": 9040 }, { "epoch": 0.1304354092501045, "grad_norm": 1.6385450514536717, "learning_rate": 9.97185408579205e-06, "loss": 0.538, "step": 9050 }, { "epoch": 0.13057953677413775, "grad_norm": 1.772449196285532, "learning_rate": 9.971586921240626e-06, "loss": 0.5253, "step": 9060 }, { "epoch": 0.13072366429817103, "grad_norm": 1.6510414635486976, "learning_rate": 9.971318498304753e-06, "loss": 0.5398, "step": 9070 }, { "epoch": 0.1308677918222043, "grad_norm": 1.658287692687586, "learning_rate": 9.971048817052371e-06, "loss": 0.5452, "step": 9080 }, { "epoch": 0.13101191934623754, "grad_norm": 1.4319456225450187, "learning_rate": 9.970777877551744e-06, "loss": 0.5153, "step": 9090 }, { "epoch": 0.13115604687027083, "grad_norm": 1.5945581474143333, "learning_rate": 9.970505679871447e-06, "loss": 0.5595, "step": 9100 }, { "epoch": 0.13130017439430408, "grad_norm": 1.35440641126855, "learning_rate": 9.970232224080378e-06, "loss": 0.4961, "step": 9110 }, { "epoch": 0.13144430191833734, "grad_norm": 1.860938180044808, "learning_rate": 9.969957510247757e-06, "loss": 0.5198, "step": 9120 }, { "epoch": 0.13158842944237062, "grad_norm": 1.3547242908306234, "learning_rate": 9.969681538443112e-06, "loss": 0.5351, "step": 9130 }, { "epoch": 0.13173255696640387, "grad_norm": 1.5880963946453364, "learning_rate": 9.969404308736298e-06, "loss": 0.5516, "step": 9140 }, { "epoch": 0.13187668449043713, "grad_norm": 1.7275315218493335, "learning_rate": 9.969125821197488e-06, "loss": 0.5119, "step": 9150 }, { "epoch": 0.1320208120144704, "grad_norm": 1.5629204145347302, "learning_rate": 9.96884607589717e-06, "loss": 0.5034, "step": 9160 }, { "epoch": 0.13216493953850367, "grad_norm": 1.576284297364984, "learning_rate": 9.968565072906153e-06, "loss": 0.5179, "step": 9170 }, { "epoch": 0.13230906706253692, "grad_norm": 1.6527944839553077, "learning_rate": 9.96828281229556e-06, "loss": 0.5163, "step": 9180 }, { "epoch": 0.1324531945865702, "grad_norm": 1.63443000666485, "learning_rate": 9.967999294136838e-06, "loss": 0.578, "step": 9190 }, { "epoch": 0.13259732211060346, "grad_norm": 1.7076429634783834, "learning_rate": 9.96771451850175e-06, "loss": 0.5377, "step": 9200 }, { "epoch": 0.13274144963463672, "grad_norm": 1.8200334243834035, "learning_rate": 9.967428485462375e-06, "loss": 0.5429, "step": 9210 }, { "epoch": 0.13288557715867, "grad_norm": 1.787787183734275, "learning_rate": 9.967141195091116e-06, "loss": 0.5504, "step": 9220 }, { "epoch": 0.13302970468270325, "grad_norm": 1.7250784797527372, "learning_rate": 9.966852647460687e-06, "loss": 0.5228, "step": 9230 }, { "epoch": 0.1331738322067365, "grad_norm": 1.489204089445697, "learning_rate": 9.966562842644125e-06, "loss": 0.5329, "step": 9240 }, { "epoch": 0.1333179597307698, "grad_norm": 2.194164491529095, "learning_rate": 9.966271780714784e-06, "loss": 0.5398, "step": 9250 }, { "epoch": 0.13346208725480305, "grad_norm": 1.674837562071664, "learning_rate": 9.965979461746335e-06, "loss": 0.5006, "step": 9260 }, { "epoch": 0.1336062147788363, "grad_norm": 1.5997190570730162, "learning_rate": 9.965685885812773e-06, "loss": 0.5537, "step": 9270 }, { "epoch": 0.13375034230286958, "grad_norm": 1.7076823186627987, "learning_rate": 9.9653910529884e-06, "loss": 0.5186, "step": 9280 }, { "epoch": 0.13389446982690284, "grad_norm": 1.6744303704364507, "learning_rate": 9.965094963347846e-06, "loss": 0.524, "step": 9290 }, { "epoch": 0.1340385973509361, "grad_norm": 1.7744615731747797, "learning_rate": 9.964797616966058e-06, "loss": 0.525, "step": 9300 }, { "epoch": 0.13418272487496938, "grad_norm": 1.6800636985908362, "learning_rate": 9.964499013918294e-06, "loss": 0.5335, "step": 9310 }, { "epoch": 0.13432685239900263, "grad_norm": 1.6983053216730377, "learning_rate": 9.96419915428014e-06, "loss": 0.5341, "step": 9320 }, { "epoch": 0.13447097992303592, "grad_norm": 1.7728276435559616, "learning_rate": 9.963898038127491e-06, "loss": 0.548, "step": 9330 }, { "epoch": 0.13461510744706917, "grad_norm": 1.5786083406682985, "learning_rate": 9.963595665536567e-06, "loss": 0.5509, "step": 9340 }, { "epoch": 0.13475923497110243, "grad_norm": 1.8145981760115104, "learning_rate": 9.9632920365839e-06, "loss": 0.529, "step": 9350 }, { "epoch": 0.1349033624951357, "grad_norm": 1.6609673761819554, "learning_rate": 9.962987151346347e-06, "loss": 0.5345, "step": 9360 }, { "epoch": 0.13504749001916896, "grad_norm": 1.5633857814339276, "learning_rate": 9.962681009901075e-06, "loss": 0.5297, "step": 9370 }, { "epoch": 0.13519161754320222, "grad_norm": 1.3657384514686495, "learning_rate": 9.962373612325575e-06, "loss": 0.5249, "step": 9380 }, { "epoch": 0.1353357450672355, "grad_norm": 1.6341529502260153, "learning_rate": 9.962064958697657e-06, "loss": 0.5491, "step": 9390 }, { "epoch": 0.13547987259126876, "grad_norm": 1.796473427351042, "learning_rate": 9.961755049095441e-06, "loss": 0.51, "step": 9400 }, { "epoch": 0.135624000115302, "grad_norm": 1.5775396105282762, "learning_rate": 9.961443883597371e-06, "loss": 0.5091, "step": 9410 }, { "epoch": 0.1357681276393353, "grad_norm": 1.8951030039251344, "learning_rate": 9.96113146228221e-06, "loss": 0.5132, "step": 9420 }, { "epoch": 0.13591225516336855, "grad_norm": 1.6162052541947365, "learning_rate": 9.960817785229035e-06, "loss": 0.5351, "step": 9430 }, { "epoch": 0.1360563826874018, "grad_norm": 1.613666276877378, "learning_rate": 9.960502852517243e-06, "loss": 0.5358, "step": 9440 }, { "epoch": 0.1362005102114351, "grad_norm": 1.6587249415243808, "learning_rate": 9.960186664226547e-06, "loss": 0.5137, "step": 9450 }, { "epoch": 0.13634463773546834, "grad_norm": 1.5982424525271524, "learning_rate": 9.959869220436982e-06, "loss": 0.5117, "step": 9460 }, { "epoch": 0.1364887652595016, "grad_norm": 1.5941400087686848, "learning_rate": 9.959550521228894e-06, "loss": 0.5054, "step": 9470 }, { "epoch": 0.13663289278353488, "grad_norm": 1.6913348921865032, "learning_rate": 9.959230566682953e-06, "loss": 0.4997, "step": 9480 }, { "epoch": 0.13677702030756814, "grad_norm": 1.4567554699946976, "learning_rate": 9.958909356880144e-06, "loss": 0.5382, "step": 9490 }, { "epoch": 0.1369211478316014, "grad_norm": 1.520422627164141, "learning_rate": 9.95858689190177e-06, "loss": 0.5049, "step": 9500 }, { "epoch": 0.13706527535563467, "grad_norm": 1.5178931343704998, "learning_rate": 9.958263171829452e-06, "loss": 0.5176, "step": 9510 }, { "epoch": 0.13720940287966793, "grad_norm": 1.5817005259507062, "learning_rate": 9.957938196745127e-06, "loss": 0.5242, "step": 9520 }, { "epoch": 0.13735353040370119, "grad_norm": 1.62081547743582, "learning_rate": 9.957611966731055e-06, "loss": 0.5447, "step": 9530 }, { "epoch": 0.13749765792773447, "grad_norm": 1.8404151846844703, "learning_rate": 9.957284481869805e-06, "loss": 0.5426, "step": 9540 }, { "epoch": 0.13764178545176772, "grad_norm": 1.8250741719792734, "learning_rate": 9.956955742244272e-06, "loss": 0.5263, "step": 9550 }, { "epoch": 0.13778591297580098, "grad_norm": 1.4852823911434496, "learning_rate": 9.956625747937662e-06, "loss": 0.509, "step": 9560 }, { "epoch": 0.13793004049983426, "grad_norm": 1.4509008526579001, "learning_rate": 9.956294499033505e-06, "loss": 0.5337, "step": 9570 }, { "epoch": 0.13807416802386752, "grad_norm": 1.8783029686131798, "learning_rate": 9.955961995615641e-06, "loss": 0.552, "step": 9580 }, { "epoch": 0.13821829554790077, "grad_norm": 1.7236661621172147, "learning_rate": 9.955628237768237e-06, "loss": 0.5312, "step": 9590 }, { "epoch": 0.13836242307193405, "grad_norm": 1.5807354528496438, "learning_rate": 9.955293225575767e-06, "loss": 0.5082, "step": 9600 }, { "epoch": 0.1385065505959673, "grad_norm": 1.9496021311256757, "learning_rate": 9.954956959123031e-06, "loss": 0.5313, "step": 9610 }, { "epoch": 0.13865067812000056, "grad_norm": 1.6644849340171817, "learning_rate": 9.954619438495142e-06, "loss": 0.5492, "step": 9620 }, { "epoch": 0.13879480564403385, "grad_norm": 1.4740931476349823, "learning_rate": 9.954280663777531e-06, "loss": 0.5377, "step": 9630 }, { "epoch": 0.1389389331680671, "grad_norm": 1.6394062915123808, "learning_rate": 9.95394063505595e-06, "loss": 0.5468, "step": 9640 }, { "epoch": 0.13908306069210036, "grad_norm": 1.6489664253379264, "learning_rate": 9.953599352416462e-06, "loss": 0.5155, "step": 9650 }, { "epoch": 0.13922718821613364, "grad_norm": 1.7188702690130606, "learning_rate": 9.953256815945452e-06, "loss": 0.5509, "step": 9660 }, { "epoch": 0.1393713157401669, "grad_norm": 1.517455440583056, "learning_rate": 9.95291302572962e-06, "loss": 0.5069, "step": 9670 }, { "epoch": 0.13951544326420015, "grad_norm": 1.5767614178723555, "learning_rate": 9.952567981855988e-06, "loss": 0.5162, "step": 9680 }, { "epoch": 0.13965957078823343, "grad_norm": 1.6123415692187126, "learning_rate": 9.952221684411888e-06, "loss": 0.5163, "step": 9690 }, { "epoch": 0.1398036983122667, "grad_norm": 1.7175226267172674, "learning_rate": 9.951874133484975e-06, "loss": 0.5537, "step": 9700 }, { "epoch": 0.13994782583629997, "grad_norm": 1.5586782194640085, "learning_rate": 9.95152532916322e-06, "loss": 0.5125, "step": 9710 }, { "epoch": 0.14009195336033323, "grad_norm": 2.495536322191336, "learning_rate": 9.95117527153491e-06, "loss": 0.539, "step": 9720 }, { "epoch": 0.14023608088436648, "grad_norm": 1.454823521468968, "learning_rate": 9.950823960688649e-06, "loss": 0.5528, "step": 9730 }, { "epoch": 0.14038020840839976, "grad_norm": 1.5112339694175088, "learning_rate": 9.95047139671336e-06, "loss": 0.5267, "step": 9740 }, { "epoch": 0.14052433593243302, "grad_norm": 1.6004613768657043, "learning_rate": 9.950117579698285e-06, "loss": 0.5504, "step": 9750 }, { "epoch": 0.14066846345646628, "grad_norm": 1.4854254499667392, "learning_rate": 9.949762509732976e-06, "loss": 0.5479, "step": 9760 }, { "epoch": 0.14081259098049956, "grad_norm": 1.3320784570155408, "learning_rate": 9.949406186907308e-06, "loss": 0.5112, "step": 9770 }, { "epoch": 0.1409567185045328, "grad_norm": 1.6674876949901936, "learning_rate": 9.949048611311471e-06, "loss": 0.5132, "step": 9780 }, { "epoch": 0.14110084602856607, "grad_norm": 1.466334231038367, "learning_rate": 9.948689783035976e-06, "loss": 0.5403, "step": 9790 }, { "epoch": 0.14124497355259935, "grad_norm": 1.7589737159165764, "learning_rate": 9.948329702171647e-06, "loss": 0.5483, "step": 9800 }, { "epoch": 0.1413891010766326, "grad_norm": 1.571972708741959, "learning_rate": 9.947968368809624e-06, "loss": 0.5317, "step": 9810 }, { "epoch": 0.14153322860066586, "grad_norm": 1.5955847647083927, "learning_rate": 9.947605783041365e-06, "loss": 0.5096, "step": 9820 }, { "epoch": 0.14167735612469914, "grad_norm": 1.6584268036977814, "learning_rate": 9.94724194495865e-06, "loss": 0.5726, "step": 9830 }, { "epoch": 0.1418214836487324, "grad_norm": 1.5579422198435615, "learning_rate": 9.94687685465357e-06, "loss": 0.4988, "step": 9840 }, { "epoch": 0.14196561117276565, "grad_norm": 1.681566040776198, "learning_rate": 9.946510512218532e-06, "loss": 0.5283, "step": 9850 }, { "epoch": 0.14210973869679894, "grad_norm": 1.3785677050921588, "learning_rate": 9.946142917746267e-06, "loss": 0.5188, "step": 9860 }, { "epoch": 0.1422538662208322, "grad_norm": 1.646128616669466, "learning_rate": 9.94577407132982e-06, "loss": 0.5479, "step": 9870 }, { "epoch": 0.14239799374486545, "grad_norm": 1.6968382237346853, "learning_rate": 9.945403973062545e-06, "loss": 0.522, "step": 9880 }, { "epoch": 0.14254212126889873, "grad_norm": 1.7983952784369435, "learning_rate": 9.945032623038125e-06, "loss": 0.5483, "step": 9890 }, { "epoch": 0.14268624879293199, "grad_norm": 1.5854533290285393, "learning_rate": 9.944660021350553e-06, "loss": 0.5203, "step": 9900 }, { "epoch": 0.14283037631696524, "grad_norm": 1.5948043992132597, "learning_rate": 9.944286168094141e-06, "loss": 0.5431, "step": 9910 }, { "epoch": 0.14297450384099852, "grad_norm": 1.6455964246119719, "learning_rate": 9.943911063363516e-06, "loss": 0.4999, "step": 9920 }, { "epoch": 0.14311863136503178, "grad_norm": 1.8210753094168066, "learning_rate": 9.94353470725362e-06, "loss": 0.5175, "step": 9930 }, { "epoch": 0.14326275888906503, "grad_norm": 1.69210055891606, "learning_rate": 9.943157099859722e-06, "loss": 0.5158, "step": 9940 }, { "epoch": 0.14340688641309832, "grad_norm": 1.3990694404261157, "learning_rate": 9.942778241277392e-06, "loss": 0.5074, "step": 9950 }, { "epoch": 0.14355101393713157, "grad_norm": 1.7024462005488405, "learning_rate": 9.94239813160253e-06, "loss": 0.5433, "step": 9960 }, { "epoch": 0.14369514146116483, "grad_norm": 1.440356268516134, "learning_rate": 9.942016770931344e-06, "loss": 0.5416, "step": 9970 }, { "epoch": 0.1438392689851981, "grad_norm": 1.6295827735592527, "learning_rate": 9.941634159360365e-06, "loss": 0.5417, "step": 9980 }, { "epoch": 0.14398339650923137, "grad_norm": 1.562339981108788, "learning_rate": 9.941250296986437e-06, "loss": 0.4953, "step": 9990 }, { "epoch": 0.14412752403326462, "grad_norm": 1.6079863960101626, "learning_rate": 9.94086518390672e-06, "loss": 0.5176, "step": 10000 }, { "epoch": 0.1442716515572979, "grad_norm": 1.6331761852206441, "learning_rate": 9.940478820218695e-06, "loss": 0.5344, "step": 10010 }, { "epoch": 0.14441577908133116, "grad_norm": 1.5839841010886857, "learning_rate": 9.940091206020155e-06, "loss": 0.5067, "step": 10020 }, { "epoch": 0.1445599066053644, "grad_norm": 1.4644740951109425, "learning_rate": 9.939702341409209e-06, "loss": 0.4946, "step": 10030 }, { "epoch": 0.1447040341293977, "grad_norm": 1.4769066625575058, "learning_rate": 9.939312226484288e-06, "loss": 0.5352, "step": 10040 }, { "epoch": 0.14484816165343095, "grad_norm": 1.6601776060585394, "learning_rate": 9.938920861344135e-06, "loss": 0.5455, "step": 10050 }, { "epoch": 0.14499228917746423, "grad_norm": 1.471490723360633, "learning_rate": 9.93852824608781e-06, "loss": 0.5279, "step": 10060 }, { "epoch": 0.1451364167014975, "grad_norm": 1.3987818842463533, "learning_rate": 9.938134380814689e-06, "loss": 0.5347, "step": 10070 }, { "epoch": 0.14528054422553074, "grad_norm": 1.5949521360571508, "learning_rate": 9.937739265624465e-06, "loss": 0.4917, "step": 10080 }, { "epoch": 0.14542467174956403, "grad_norm": 1.6360563504990169, "learning_rate": 9.93734290061715e-06, "loss": 0.5126, "step": 10090 }, { "epoch": 0.14556879927359728, "grad_norm": 1.6762236555873233, "learning_rate": 9.936945285893069e-06, "loss": 0.5528, "step": 10100 }, { "epoch": 0.14571292679763054, "grad_norm": 1.7687894810380427, "learning_rate": 9.936546421552865e-06, "loss": 0.5218, "step": 10110 }, { "epoch": 0.14585705432166382, "grad_norm": 1.8228962024536335, "learning_rate": 9.936146307697494e-06, "loss": 0.5193, "step": 10120 }, { "epoch": 0.14600118184569708, "grad_norm": 1.3470237330200716, "learning_rate": 9.935744944428234e-06, "loss": 0.5015, "step": 10130 }, { "epoch": 0.14614530936973033, "grad_norm": 1.580744523582911, "learning_rate": 9.935342331846673e-06, "loss": 0.5319, "step": 10140 }, { "epoch": 0.1462894368937636, "grad_norm": 1.8071323280786205, "learning_rate": 9.934938470054723e-06, "loss": 0.5527, "step": 10150 }, { "epoch": 0.14643356441779687, "grad_norm": 1.6293631428081266, "learning_rate": 9.934533359154604e-06, "loss": 0.5265, "step": 10160 }, { "epoch": 0.14657769194183012, "grad_norm": 1.7741250659450314, "learning_rate": 9.934126999248857e-06, "loss": 0.5422, "step": 10170 }, { "epoch": 0.1467218194658634, "grad_norm": 1.6940963514605087, "learning_rate": 9.933719390440337e-06, "loss": 0.5167, "step": 10180 }, { "epoch": 0.14686594698989666, "grad_norm": 1.5908854915787674, "learning_rate": 9.933310532832217e-06, "loss": 0.5365, "step": 10190 }, { "epoch": 0.14701007451392992, "grad_norm": 1.6618080413884013, "learning_rate": 9.932900426527985e-06, "loss": 0.5262, "step": 10200 }, { "epoch": 0.1471542020379632, "grad_norm": 1.4002877872882082, "learning_rate": 9.932489071631444e-06, "loss": 0.4962, "step": 10210 }, { "epoch": 0.14729832956199645, "grad_norm": 1.4045604477990365, "learning_rate": 9.932076468246714e-06, "loss": 0.5071, "step": 10220 }, { "epoch": 0.1474424570860297, "grad_norm": 1.7555189538359202, "learning_rate": 9.931662616478234e-06, "loss": 0.5133, "step": 10230 }, { "epoch": 0.147586584610063, "grad_norm": 1.560842322927479, "learning_rate": 9.931247516430754e-06, "loss": 0.5157, "step": 10240 }, { "epoch": 0.14773071213409625, "grad_norm": 1.7176989876307869, "learning_rate": 9.930831168209344e-06, "loss": 0.5316, "step": 10250 }, { "epoch": 0.1478748396581295, "grad_norm": 1.8436169895964836, "learning_rate": 9.930413571919383e-06, "loss": 0.5368, "step": 10260 }, { "epoch": 0.1480189671821628, "grad_norm": 1.8491156791722627, "learning_rate": 9.929994727666576e-06, "loss": 0.534, "step": 10270 }, { "epoch": 0.14816309470619604, "grad_norm": 1.6296120982081974, "learning_rate": 9.92957463555694e-06, "loss": 0.5371, "step": 10280 }, { "epoch": 0.1483072222302293, "grad_norm": 1.5544820389203797, "learning_rate": 9.929153295696803e-06, "loss": 0.536, "step": 10290 }, { "epoch": 0.14845134975426258, "grad_norm": 1.529899877896656, "learning_rate": 9.928730708192811e-06, "loss": 0.5383, "step": 10300 }, { "epoch": 0.14859547727829583, "grad_norm": 1.7202072487765119, "learning_rate": 9.928306873151934e-06, "loss": 0.5406, "step": 10310 }, { "epoch": 0.1487396048023291, "grad_norm": 1.6880451821072107, "learning_rate": 9.927881790681444e-06, "loss": 0.5032, "step": 10320 }, { "epoch": 0.14888373232636237, "grad_norm": 1.5537083524550879, "learning_rate": 9.927455460888942e-06, "loss": 0.522, "step": 10330 }, { "epoch": 0.14902785985039563, "grad_norm": 1.6294111748675555, "learning_rate": 9.927027883882333e-06, "loss": 0.5589, "step": 10340 }, { "epoch": 0.14917198737442888, "grad_norm": 1.6004012316933973, "learning_rate": 9.926599059769848e-06, "loss": 0.5393, "step": 10350 }, { "epoch": 0.14931611489846217, "grad_norm": 1.3463470311907668, "learning_rate": 9.926168988660025e-06, "loss": 0.5049, "step": 10360 }, { "epoch": 0.14946024242249542, "grad_norm": 1.5570061099298047, "learning_rate": 9.925737670661724e-06, "loss": 0.5264, "step": 10370 }, { "epoch": 0.14960436994652868, "grad_norm": 1.7590062994726918, "learning_rate": 9.925305105884118e-06, "loss": 0.5178, "step": 10380 }, { "epoch": 0.14974849747056196, "grad_norm": 1.500013650854343, "learning_rate": 9.924871294436695e-06, "loss": 0.5112, "step": 10390 }, { "epoch": 0.1498926249945952, "grad_norm": 1.4258717000087666, "learning_rate": 9.92443623642926e-06, "loss": 0.4814, "step": 10400 }, { "epoch": 0.15003675251862847, "grad_norm": 1.6793544154798357, "learning_rate": 9.923999931971933e-06, "loss": 0.5229, "step": 10410 }, { "epoch": 0.15018088004266175, "grad_norm": 1.7097114602303365, "learning_rate": 9.923562381175147e-06, "loss": 0.5758, "step": 10420 }, { "epoch": 0.150325007566695, "grad_norm": 1.3876006249852573, "learning_rate": 9.923123584149655e-06, "loss": 0.49, "step": 10430 }, { "epoch": 0.1504691350907283, "grad_norm": 1.6956271989723564, "learning_rate": 9.922683541006522e-06, "loss": 0.4979, "step": 10440 }, { "epoch": 0.15061326261476154, "grad_norm": 1.636616739162311, "learning_rate": 9.922242251857132e-06, "loss": 0.5267, "step": 10450 }, { "epoch": 0.1507573901387948, "grad_norm": 1.684758060698122, "learning_rate": 9.921799716813178e-06, "loss": 0.5241, "step": 10460 }, { "epoch": 0.15090151766282808, "grad_norm": 1.868316874699705, "learning_rate": 9.921355935986677e-06, "loss": 0.5306, "step": 10470 }, { "epoch": 0.15104564518686134, "grad_norm": 1.6672268315606935, "learning_rate": 9.920910909489954e-06, "loss": 0.5334, "step": 10480 }, { "epoch": 0.1511897727108946, "grad_norm": 1.6280752443147315, "learning_rate": 9.920464637435652e-06, "loss": 0.5295, "step": 10490 }, { "epoch": 0.15133390023492788, "grad_norm": 1.682606164948002, "learning_rate": 9.92001711993673e-06, "loss": 0.5384, "step": 10500 }, { "epoch": 0.15147802775896113, "grad_norm": 1.4896124991736668, "learning_rate": 9.91956835710646e-06, "loss": 0.4921, "step": 10510 }, { "epoch": 0.1516221552829944, "grad_norm": 1.8625860103466876, "learning_rate": 9.919118349058433e-06, "loss": 0.5548, "step": 10520 }, { "epoch": 0.15176628280702767, "grad_norm": 1.5454188738549612, "learning_rate": 9.91866709590655e-06, "loss": 0.4907, "step": 10530 }, { "epoch": 0.15191041033106092, "grad_norm": 1.703867878569658, "learning_rate": 9.918214597765035e-06, "loss": 0.5502, "step": 10540 }, { "epoch": 0.15205453785509418, "grad_norm": 1.7331412874622234, "learning_rate": 9.917760854748415e-06, "loss": 0.5544, "step": 10550 }, { "epoch": 0.15219866537912746, "grad_norm": 1.9460737427912198, "learning_rate": 9.917305866971547e-06, "loss": 0.5154, "step": 10560 }, { "epoch": 0.15234279290316072, "grad_norm": 1.7293641535736937, "learning_rate": 9.91684963454959e-06, "loss": 0.5231, "step": 10570 }, { "epoch": 0.15248692042719397, "grad_norm": 1.682904013784273, "learning_rate": 9.916392157598025e-06, "loss": 0.5099, "step": 10580 }, { "epoch": 0.15263104795122726, "grad_norm": 1.8346118642512015, "learning_rate": 9.915933436232645e-06, "loss": 0.5029, "step": 10590 }, { "epoch": 0.1527751754752605, "grad_norm": 1.8237635735884452, "learning_rate": 9.915473470569562e-06, "loss": 0.5261, "step": 10600 }, { "epoch": 0.15291930299929377, "grad_norm": 1.565538877622725, "learning_rate": 9.915012260725198e-06, "loss": 0.5229, "step": 10610 }, { "epoch": 0.15306343052332705, "grad_norm": 1.5539754336579246, "learning_rate": 9.914549806816296e-06, "loss": 0.5188, "step": 10620 }, { "epoch": 0.1532075580473603, "grad_norm": 1.5785558653506266, "learning_rate": 9.914086108959907e-06, "loss": 0.5072, "step": 10630 }, { "epoch": 0.15335168557139356, "grad_norm": 1.9573595721453882, "learning_rate": 9.913621167273396e-06, "loss": 0.528, "step": 10640 }, { "epoch": 0.15349581309542684, "grad_norm": 1.5428518698853602, "learning_rate": 9.913154981874456e-06, "loss": 0.5304, "step": 10650 }, { "epoch": 0.1536399406194601, "grad_norm": 1.5567528802138544, "learning_rate": 9.91268755288108e-06, "loss": 0.5438, "step": 10660 }, { "epoch": 0.15378406814349335, "grad_norm": 1.6083863577163826, "learning_rate": 9.912218880411582e-06, "loss": 0.5409, "step": 10670 }, { "epoch": 0.15392819566752663, "grad_norm": 1.6496103169990346, "learning_rate": 9.91174896458459e-06, "loss": 0.5381, "step": 10680 }, { "epoch": 0.1540723231915599, "grad_norm": 1.2863811748171123, "learning_rate": 9.91127780551905e-06, "loss": 0.5112, "step": 10690 }, { "epoch": 0.15421645071559315, "grad_norm": 1.5255088107138264, "learning_rate": 9.910805403334212e-06, "loss": 0.5116, "step": 10700 }, { "epoch": 0.15436057823962643, "grad_norm": 1.7009821143883654, "learning_rate": 9.910331758149655e-06, "loss": 0.5326, "step": 10710 }, { "epoch": 0.15450470576365968, "grad_norm": 1.4912358001879122, "learning_rate": 9.909856870085265e-06, "loss": 0.5213, "step": 10720 }, { "epoch": 0.15464883328769294, "grad_norm": 1.5726579307077195, "learning_rate": 9.909380739261241e-06, "loss": 0.4744, "step": 10730 }, { "epoch": 0.15479296081172622, "grad_norm": 1.408369250852814, "learning_rate": 9.908903365798101e-06, "loss": 0.516, "step": 10740 }, { "epoch": 0.15493708833575948, "grad_norm": 1.5037891610671674, "learning_rate": 9.908424749816676e-06, "loss": 0.5229, "step": 10750 }, { "epoch": 0.15508121585979273, "grad_norm": 1.3889571819634474, "learning_rate": 9.907944891438108e-06, "loss": 0.506, "step": 10760 }, { "epoch": 0.15522534338382601, "grad_norm": 4.797165519646846, "learning_rate": 9.907463790783858e-06, "loss": 0.5199, "step": 10770 }, { "epoch": 0.15536947090785927, "grad_norm": 1.6529525877480602, "learning_rate": 9.9069814479757e-06, "loss": 0.499, "step": 10780 }, { "epoch": 0.15551359843189255, "grad_norm": 1.594953275312236, "learning_rate": 9.906497863135724e-06, "loss": 0.5238, "step": 10790 }, { "epoch": 0.1556577259559258, "grad_norm": 2.049308381875278, "learning_rate": 9.90601303638633e-06, "loss": 0.5042, "step": 10800 }, { "epoch": 0.15580185347995906, "grad_norm": 1.6059495849593297, "learning_rate": 9.905526967850237e-06, "loss": 0.5422, "step": 10810 }, { "epoch": 0.15594598100399235, "grad_norm": 1.4630280053126783, "learning_rate": 9.905039657650474e-06, "loss": 0.5199, "step": 10820 }, { "epoch": 0.1560901085280256, "grad_norm": 1.5130515785534144, "learning_rate": 9.90455110591039e-06, "loss": 0.5347, "step": 10830 }, { "epoch": 0.15623423605205886, "grad_norm": 1.6485418022096934, "learning_rate": 9.904061312753643e-06, "loss": 0.532, "step": 10840 }, { "epoch": 0.15637836357609214, "grad_norm": 1.6000978680369629, "learning_rate": 9.903570278304207e-06, "loss": 0.5433, "step": 10850 }, { "epoch": 0.1565224911001254, "grad_norm": 1.3670169099701037, "learning_rate": 9.90307800268637e-06, "loss": 0.4983, "step": 10860 }, { "epoch": 0.15666661862415865, "grad_norm": 1.9807184135474374, "learning_rate": 9.902584486024736e-06, "loss": 0.5366, "step": 10870 }, { "epoch": 0.15681074614819193, "grad_norm": 2.2392760920259587, "learning_rate": 9.90208972844422e-06, "loss": 0.516, "step": 10880 }, { "epoch": 0.1569548736722252, "grad_norm": 1.646898700852732, "learning_rate": 9.901593730070051e-06, "loss": 0.5047, "step": 10890 }, { "epoch": 0.15709900119625844, "grad_norm": 1.7137462323918071, "learning_rate": 9.90109649102778e-06, "loss": 0.5021, "step": 10900 }, { "epoch": 0.15724312872029172, "grad_norm": 1.4726719266672363, "learning_rate": 9.900598011443258e-06, "loss": 0.5531, "step": 10910 }, { "epoch": 0.15738725624432498, "grad_norm": 1.9492232994326484, "learning_rate": 9.900098291442663e-06, "loss": 0.5428, "step": 10920 }, { "epoch": 0.15753138376835824, "grad_norm": 1.6702668403696321, "learning_rate": 9.899597331152482e-06, "loss": 0.5418, "step": 10930 }, { "epoch": 0.15767551129239152, "grad_norm": 1.7417863845333483, "learning_rate": 9.899095130699513e-06, "loss": 0.4987, "step": 10940 }, { "epoch": 0.15781963881642477, "grad_norm": 1.6263055541733278, "learning_rate": 9.898591690210871e-06, "loss": 0.5119, "step": 10950 }, { "epoch": 0.15796376634045803, "grad_norm": 1.7460393402322365, "learning_rate": 9.898087009813985e-06, "loss": 0.5206, "step": 10960 }, { "epoch": 0.1581078938644913, "grad_norm": 1.6867517071179892, "learning_rate": 9.897581089636597e-06, "loss": 0.5161, "step": 10970 }, { "epoch": 0.15825202138852457, "grad_norm": 1.387856750434108, "learning_rate": 9.897073929806764e-06, "loss": 0.5149, "step": 10980 }, { "epoch": 0.15839614891255782, "grad_norm": 1.5155602374714858, "learning_rate": 9.896565530452857e-06, "loss": 0.5001, "step": 10990 }, { "epoch": 0.1585402764365911, "grad_norm": 1.6570985511502707, "learning_rate": 9.896055891703556e-06, "loss": 0.5187, "step": 11000 }, { "epoch": 0.15868440396062436, "grad_norm": 1.4805475443680869, "learning_rate": 9.895545013687861e-06, "loss": 0.5319, "step": 11010 }, { "epoch": 0.15882853148465761, "grad_norm": 1.5070742118345957, "learning_rate": 9.895032896535081e-06, "loss": 0.4812, "step": 11020 }, { "epoch": 0.1589726590086909, "grad_norm": 1.6719915034146042, "learning_rate": 9.894519540374845e-06, "loss": 0.5262, "step": 11030 }, { "epoch": 0.15911678653272415, "grad_norm": 1.66020726996552, "learning_rate": 9.894004945337085e-06, "loss": 0.5445, "step": 11040 }, { "epoch": 0.1592609140567574, "grad_norm": 1.612721443888506, "learning_rate": 9.89348911155206e-06, "loss": 0.4991, "step": 11050 }, { "epoch": 0.1594050415807907, "grad_norm": 1.4935551347717924, "learning_rate": 9.892972039150329e-06, "loss": 0.5242, "step": 11060 }, { "epoch": 0.15954916910482395, "grad_norm": 1.3483943525207163, "learning_rate": 9.892453728262772e-06, "loss": 0.5246, "step": 11070 }, { "epoch": 0.1596932966288572, "grad_norm": 1.8695056974349344, "learning_rate": 9.891934179020585e-06, "loss": 0.4998, "step": 11080 }, { "epoch": 0.15983742415289048, "grad_norm": 1.6887458509289548, "learning_rate": 9.891413391555271e-06, "loss": 0.5057, "step": 11090 }, { "epoch": 0.15998155167692374, "grad_norm": 1.588206694924183, "learning_rate": 9.89089136599865e-06, "loss": 0.5385, "step": 11100 }, { "epoch": 0.160125679200957, "grad_norm": 1.6484197026402876, "learning_rate": 9.890368102482855e-06, "loss": 0.4899, "step": 11110 }, { "epoch": 0.16026980672499028, "grad_norm": 1.7357595184272403, "learning_rate": 9.88984360114033e-06, "loss": 0.5289, "step": 11120 }, { "epoch": 0.16041393424902353, "grad_norm": 1.6307634088989467, "learning_rate": 9.889317862103837e-06, "loss": 0.5175, "step": 11130 }, { "epoch": 0.1605580617730568, "grad_norm": 1.5135863148773603, "learning_rate": 9.888790885506448e-06, "loss": 0.514, "step": 11140 }, { "epoch": 0.16070218929709007, "grad_norm": 1.6149140951362468, "learning_rate": 9.888262671481548e-06, "loss": 0.5254, "step": 11150 }, { "epoch": 0.16084631682112333, "grad_norm": 1.5892798446894743, "learning_rate": 9.887733220162834e-06, "loss": 0.5299, "step": 11160 }, { "epoch": 0.1609904443451566, "grad_norm": 2.3708765591404632, "learning_rate": 9.887202531684322e-06, "loss": 0.5204, "step": 11170 }, { "epoch": 0.16113457186918986, "grad_norm": 1.6843173571042662, "learning_rate": 9.886670606180336e-06, "loss": 0.5158, "step": 11180 }, { "epoch": 0.16127869939322312, "grad_norm": 1.662647298917777, "learning_rate": 9.886137443785514e-06, "loss": 0.5383, "step": 11190 }, { "epoch": 0.1614228269172564, "grad_norm": 1.612732255079154, "learning_rate": 9.885603044634808e-06, "loss": 0.5506, "step": 11200 }, { "epoch": 0.16156695444128966, "grad_norm": 1.6056686504294448, "learning_rate": 9.885067408863482e-06, "loss": 0.5147, "step": 11210 }, { "epoch": 0.1617110819653229, "grad_norm": 1.8014426330028541, "learning_rate": 9.884530536607112e-06, "loss": 0.5133, "step": 11220 }, { "epoch": 0.1618552094893562, "grad_norm": 1.6362925853768928, "learning_rate": 9.883992428001595e-06, "loss": 0.5255, "step": 11230 }, { "epoch": 0.16199933701338945, "grad_norm": 1.4328127383210247, "learning_rate": 9.883453083183126e-06, "loss": 0.5353, "step": 11240 }, { "epoch": 0.1621434645374227, "grad_norm": 1.878188513570915, "learning_rate": 9.882912502288226e-06, "loss": 0.5458, "step": 11250 }, { "epoch": 0.162287592061456, "grad_norm": 1.764949007857695, "learning_rate": 9.882370685453723e-06, "loss": 0.5016, "step": 11260 }, { "epoch": 0.16243171958548924, "grad_norm": 1.447364765289072, "learning_rate": 9.881827632816761e-06, "loss": 0.5211, "step": 11270 }, { "epoch": 0.1625758471095225, "grad_norm": 1.5701636058838808, "learning_rate": 9.881283344514793e-06, "loss": 0.4942, "step": 11280 }, { "epoch": 0.16271997463355578, "grad_norm": 1.4491992895330128, "learning_rate": 9.880737820685589e-06, "loss": 0.5059, "step": 11290 }, { "epoch": 0.16286410215758904, "grad_norm": 1.5733031411620868, "learning_rate": 9.880191061467225e-06, "loss": 0.5088, "step": 11300 }, { "epoch": 0.1630082296816223, "grad_norm": 1.6018276788387238, "learning_rate": 9.879643066998099e-06, "loss": 0.5191, "step": 11310 }, { "epoch": 0.16315235720565557, "grad_norm": 1.889059979702948, "learning_rate": 9.879093837416913e-06, "loss": 0.5613, "step": 11320 }, { "epoch": 0.16329648472968883, "grad_norm": 1.6896162215542931, "learning_rate": 9.878543372862688e-06, "loss": 0.5306, "step": 11330 }, { "epoch": 0.16344061225372208, "grad_norm": 1.7771730460951303, "learning_rate": 9.877991673474753e-06, "loss": 0.5312, "step": 11340 }, { "epoch": 0.16358473977775537, "grad_norm": 1.6232648142345079, "learning_rate": 9.877438739392753e-06, "loss": 0.5352, "step": 11350 }, { "epoch": 0.16372886730178862, "grad_norm": 2.7332965808411, "learning_rate": 9.876884570756643e-06, "loss": 0.5003, "step": 11360 }, { "epoch": 0.16387299482582188, "grad_norm": 2.104456116651174, "learning_rate": 9.87632916770669e-06, "loss": 0.5122, "step": 11370 }, { "epoch": 0.16401712234985516, "grad_norm": 1.5105610418986453, "learning_rate": 9.875772530383481e-06, "loss": 0.4909, "step": 11380 }, { "epoch": 0.16416124987388842, "grad_norm": 1.6734611643731485, "learning_rate": 9.875214658927904e-06, "loss": 0.5163, "step": 11390 }, { "epoch": 0.16430537739792167, "grad_norm": 1.548727114519337, "learning_rate": 9.874655553481164e-06, "loss": 0.4867, "step": 11400 }, { "epoch": 0.16444950492195495, "grad_norm": 1.547503362659037, "learning_rate": 9.874095214184782e-06, "loss": 0.4907, "step": 11410 }, { "epoch": 0.1645936324459882, "grad_norm": 1.544742609241974, "learning_rate": 9.873533641180588e-06, "loss": 0.5169, "step": 11420 }, { "epoch": 0.16473775997002146, "grad_norm": 1.642483726816812, "learning_rate": 9.872970834610724e-06, "loss": 0.5038, "step": 11430 }, { "epoch": 0.16488188749405475, "grad_norm": 2.0871235595826096, "learning_rate": 9.872406794617648e-06, "loss": 0.5359, "step": 11440 }, { "epoch": 0.165026015018088, "grad_norm": 1.7406799360590173, "learning_rate": 9.871841521344122e-06, "loss": 0.5233, "step": 11450 }, { "epoch": 0.16517014254212126, "grad_norm": 1.4930656553272532, "learning_rate": 9.871275014933226e-06, "loss": 0.5017, "step": 11460 }, { "epoch": 0.16531427006615454, "grad_norm": 1.5855535246026553, "learning_rate": 9.870707275528357e-06, "loss": 0.5253, "step": 11470 }, { "epoch": 0.1654583975901878, "grad_norm": 1.4891093706786265, "learning_rate": 9.87013830327321e-06, "loss": 0.5495, "step": 11480 }, { "epoch": 0.16560252511422105, "grad_norm": 1.7157408190709555, "learning_rate": 9.86956809831181e-06, "loss": 0.5329, "step": 11490 }, { "epoch": 0.16574665263825433, "grad_norm": 1.9754582476118565, "learning_rate": 9.868996660788477e-06, "loss": 0.5375, "step": 11500 }, { "epoch": 0.1658907801622876, "grad_norm": 1.4764876349972642, "learning_rate": 9.868423990847852e-06, "loss": 0.4983, "step": 11510 }, { "epoch": 0.16603490768632084, "grad_norm": 1.7450889700480754, "learning_rate": 9.867850088634889e-06, "loss": 0.5433, "step": 11520 }, { "epoch": 0.16617903521035413, "grad_norm": 1.700522431041132, "learning_rate": 9.867274954294852e-06, "loss": 0.5004, "step": 11530 }, { "epoch": 0.16632316273438738, "grad_norm": 1.4196506909113318, "learning_rate": 9.866698587973312e-06, "loss": 0.5237, "step": 11540 }, { "epoch": 0.16646729025842066, "grad_norm": 1.9111635843121184, "learning_rate": 9.86612098981616e-06, "loss": 0.5289, "step": 11550 }, { "epoch": 0.16661141778245392, "grad_norm": 1.751024536488573, "learning_rate": 9.865542159969593e-06, "loss": 0.514, "step": 11560 }, { "epoch": 0.16675554530648717, "grad_norm": 1.3823942074397615, "learning_rate": 9.864962098580123e-06, "loss": 0.4979, "step": 11570 }, { "epoch": 0.16689967283052046, "grad_norm": 1.6165861263836068, "learning_rate": 9.864380805794569e-06, "loss": 0.5272, "step": 11580 }, { "epoch": 0.1670438003545537, "grad_norm": 1.6367154569138824, "learning_rate": 9.863798281760068e-06, "loss": 0.5074, "step": 11590 }, { "epoch": 0.16718792787858697, "grad_norm": 1.7630120341778643, "learning_rate": 9.863214526624065e-06, "loss": 0.5179, "step": 11600 }, { "epoch": 0.16733205540262025, "grad_norm": 1.4655214932751215, "learning_rate": 9.86262954053432e-06, "loss": 0.5121, "step": 11610 }, { "epoch": 0.1674761829266535, "grad_norm": 1.7114828252009937, "learning_rate": 9.862043323638896e-06, "loss": 0.5336, "step": 11620 }, { "epoch": 0.16762031045068676, "grad_norm": 1.5894123831468083, "learning_rate": 9.861455876086181e-06, "loss": 0.5336, "step": 11630 }, { "epoch": 0.16776443797472004, "grad_norm": 1.7790403960043817, "learning_rate": 9.86086719802486e-06, "loss": 0.5286, "step": 11640 }, { "epoch": 0.1679085654987533, "grad_norm": 1.6967311434776569, "learning_rate": 9.86027728960394e-06, "loss": 0.5428, "step": 11650 }, { "epoch": 0.16805269302278655, "grad_norm": 1.6841535050771572, "learning_rate": 9.859686150972735e-06, "loss": 0.544, "step": 11660 }, { "epoch": 0.16819682054681984, "grad_norm": 1.4481385348713371, "learning_rate": 9.859093782280871e-06, "loss": 0.5096, "step": 11670 }, { "epoch": 0.1683409480708531, "grad_norm": 1.3280722838729975, "learning_rate": 9.858500183678285e-06, "loss": 0.5131, "step": 11680 }, { "epoch": 0.16848507559488635, "grad_norm": 1.5988281273526197, "learning_rate": 9.857905355315229e-06, "loss": 0.481, "step": 11690 }, { "epoch": 0.16862920311891963, "grad_norm": 1.7402136452010113, "learning_rate": 9.857309297342259e-06, "loss": 0.5564, "step": 11700 }, { "epoch": 0.16877333064295288, "grad_norm": 1.5734276100831937, "learning_rate": 9.856712009910248e-06, "loss": 0.5274, "step": 11710 }, { "epoch": 0.16891745816698614, "grad_norm": 1.6005080589938296, "learning_rate": 9.856113493170379e-06, "loss": 0.5233, "step": 11720 }, { "epoch": 0.16906158569101942, "grad_norm": 1.6505360787485344, "learning_rate": 9.855513747274146e-06, "loss": 0.5014, "step": 11730 }, { "epoch": 0.16920571321505268, "grad_norm": 1.7632231769415996, "learning_rate": 9.854912772373353e-06, "loss": 0.4828, "step": 11740 }, { "epoch": 0.16934984073908593, "grad_norm": 1.6682303362040085, "learning_rate": 9.854310568620116e-06, "loss": 0.5329, "step": 11750 }, { "epoch": 0.16949396826311922, "grad_norm": 2.02737594851901, "learning_rate": 9.853707136166861e-06, "loss": 0.5237, "step": 11760 }, { "epoch": 0.16963809578715247, "grad_norm": 1.5512684687045886, "learning_rate": 9.853102475166328e-06, "loss": 0.5052, "step": 11770 }, { "epoch": 0.16978222331118573, "grad_norm": 1.6130648927674123, "learning_rate": 9.852496585771566e-06, "loss": 0.5055, "step": 11780 }, { "epoch": 0.169926350835219, "grad_norm": 1.699430779820969, "learning_rate": 9.851889468135933e-06, "loss": 0.5275, "step": 11790 }, { "epoch": 0.17007047835925226, "grad_norm": 1.5006142574822, "learning_rate": 9.8512811224131e-06, "loss": 0.4879, "step": 11800 }, { "epoch": 0.17021460588328552, "grad_norm": 1.7368255363969805, "learning_rate": 9.850671548757049e-06, "loss": 0.5185, "step": 11810 }, { "epoch": 0.1703587334073188, "grad_norm": 1.9825594680612388, "learning_rate": 9.850060747322072e-06, "loss": 0.5151, "step": 11820 }, { "epoch": 0.17050286093135206, "grad_norm": 1.390559631895014, "learning_rate": 9.849448718262774e-06, "loss": 0.5175, "step": 11830 }, { "epoch": 0.1706469884553853, "grad_norm": 1.8080835485860225, "learning_rate": 9.848835461734067e-06, "loss": 0.5253, "step": 11840 }, { "epoch": 0.1707911159794186, "grad_norm": 1.6521722047113891, "learning_rate": 9.848220977891177e-06, "loss": 0.5049, "step": 11850 }, { "epoch": 0.17093524350345185, "grad_norm": 1.5133596770715543, "learning_rate": 9.847605266889637e-06, "loss": 0.5264, "step": 11860 }, { "epoch": 0.1710793710274851, "grad_norm": 1.6974982234056932, "learning_rate": 9.846988328885293e-06, "loss": 0.546, "step": 11870 }, { "epoch": 0.1712234985515184, "grad_norm": 1.5886595860748405, "learning_rate": 9.846370164034306e-06, "loss": 0.53, "step": 11880 }, { "epoch": 0.17136762607555164, "grad_norm": 1.789719253466655, "learning_rate": 9.845750772493136e-06, "loss": 0.504, "step": 11890 }, { "epoch": 0.17151175359958493, "grad_norm": 1.6180402695534897, "learning_rate": 9.845130154418567e-06, "loss": 0.5398, "step": 11900 }, { "epoch": 0.17165588112361818, "grad_norm": 1.5794809299656059, "learning_rate": 9.844508309967683e-06, "loss": 0.5034, "step": 11910 }, { "epoch": 0.17180000864765144, "grad_norm": 1.4810481225013172, "learning_rate": 9.843885239297883e-06, "loss": 0.5219, "step": 11920 }, { "epoch": 0.17194413617168472, "grad_norm": 1.5168614086119132, "learning_rate": 9.843260942566876e-06, "loss": 0.4991, "step": 11930 }, { "epoch": 0.17208826369571797, "grad_norm": 1.550097418902657, "learning_rate": 9.842635419932681e-06, "loss": 0.4977, "step": 11940 }, { "epoch": 0.17223239121975123, "grad_norm": 1.7202102197644922, "learning_rate": 9.842008671553626e-06, "loss": 0.5251, "step": 11950 }, { "epoch": 0.1723765187437845, "grad_norm": 1.717769830422512, "learning_rate": 9.841380697588353e-06, "loss": 0.5087, "step": 11960 }, { "epoch": 0.17252064626781777, "grad_norm": 1.6483148471312175, "learning_rate": 9.840751498195811e-06, "loss": 0.5263, "step": 11970 }, { "epoch": 0.17266477379185102, "grad_norm": 1.418060424464346, "learning_rate": 9.840121073535258e-06, "loss": 0.5026, "step": 11980 }, { "epoch": 0.1728089013158843, "grad_norm": 1.3031483238267725, "learning_rate": 9.839489423766266e-06, "loss": 0.5131, "step": 11990 }, { "epoch": 0.17295302883991756, "grad_norm": 1.51611734699334, "learning_rate": 9.838856549048716e-06, "loss": 0.527, "step": 12000 }, { "epoch": 0.17309715636395082, "grad_norm": 1.4024196213801388, "learning_rate": 9.838222449542796e-06, "loss": 0.4992, "step": 12010 }, { "epoch": 0.1732412838879841, "grad_norm": 1.6146448021086381, "learning_rate": 9.837587125409008e-06, "loss": 0.5533, "step": 12020 }, { "epoch": 0.17338541141201735, "grad_norm": 1.4736669078208833, "learning_rate": 9.83695057680816e-06, "loss": 0.5185, "step": 12030 }, { "epoch": 0.1735295389360506, "grad_norm": 1.7102833268173496, "learning_rate": 9.836312803901376e-06, "loss": 0.5274, "step": 12040 }, { "epoch": 0.1736736664600839, "grad_norm": 1.6340057983406302, "learning_rate": 9.835673806850081e-06, "loss": 0.5279, "step": 12050 }, { "epoch": 0.17381779398411715, "grad_norm": 1.803065764967815, "learning_rate": 9.835033585816021e-06, "loss": 0.5322, "step": 12060 }, { "epoch": 0.1739619215081504, "grad_norm": 1.8170414007956939, "learning_rate": 9.83439214096124e-06, "loss": 0.5533, "step": 12070 }, { "epoch": 0.17410604903218369, "grad_norm": 1.6492621555682794, "learning_rate": 9.8337494724481e-06, "loss": 0.5286, "step": 12080 }, { "epoch": 0.17425017655621694, "grad_norm": 1.5521579152116116, "learning_rate": 9.83310558043927e-06, "loss": 0.4879, "step": 12090 }, { "epoch": 0.1743943040802502, "grad_norm": 1.5468949337839768, "learning_rate": 9.83246046509773e-06, "loss": 0.4827, "step": 12100 }, { "epoch": 0.17453843160428348, "grad_norm": 1.586577032192172, "learning_rate": 9.831814126586766e-06, "loss": 0.4942, "step": 12110 }, { "epoch": 0.17468255912831673, "grad_norm": 1.5913850189475194, "learning_rate": 9.831166565069979e-06, "loss": 0.5384, "step": 12120 }, { "epoch": 0.17482668665235, "grad_norm": 1.4433324786511998, "learning_rate": 9.830517780711274e-06, "loss": 0.5097, "step": 12130 }, { "epoch": 0.17497081417638327, "grad_norm": 1.4675394480824842, "learning_rate": 9.82986777367487e-06, "loss": 0.5029, "step": 12140 }, { "epoch": 0.17511494170041653, "grad_norm": 1.7907753573708092, "learning_rate": 9.829216544125293e-06, "loss": 0.4946, "step": 12150 }, { "epoch": 0.17525906922444978, "grad_norm": 1.8822931010335235, "learning_rate": 9.82856409222738e-06, "loss": 0.5143, "step": 12160 }, { "epoch": 0.17540319674848306, "grad_norm": 1.5914266880230987, "learning_rate": 9.827910418146275e-06, "loss": 0.5194, "step": 12170 }, { "epoch": 0.17554732427251632, "grad_norm": 1.3628138549370277, "learning_rate": 9.827255522047434e-06, "loss": 0.4937, "step": 12180 }, { "epoch": 0.17569145179654957, "grad_norm": 1.7083076429212316, "learning_rate": 9.82659940409662e-06, "loss": 0.5242, "step": 12190 }, { "epoch": 0.17583557932058286, "grad_norm": 1.7111261663572463, "learning_rate": 9.825942064459909e-06, "loss": 0.5366, "step": 12200 }, { "epoch": 0.1759797068446161, "grad_norm": 1.6757581664685364, "learning_rate": 9.82528350330368e-06, "loss": 0.487, "step": 12210 }, { "epoch": 0.17612383436864937, "grad_norm": 1.386305678344102, "learning_rate": 9.824623720794628e-06, "loss": 0.5176, "step": 12220 }, { "epoch": 0.17626796189268265, "grad_norm": 1.4639586977002002, "learning_rate": 9.823962717099754e-06, "loss": 0.5047, "step": 12230 }, { "epoch": 0.1764120894167159, "grad_norm": 1.3759980438912354, "learning_rate": 9.823300492386366e-06, "loss": 0.5233, "step": 12240 }, { "epoch": 0.17655621694074916, "grad_norm": 1.760265662834426, "learning_rate": 9.822637046822083e-06, "loss": 0.5145, "step": 12250 }, { "epoch": 0.17670034446478244, "grad_norm": 1.8963429206545588, "learning_rate": 9.821972380574836e-06, "loss": 0.5158, "step": 12260 }, { "epoch": 0.1768444719888157, "grad_norm": 1.6981098814852944, "learning_rate": 9.82130649381286e-06, "loss": 0.5085, "step": 12270 }, { "epoch": 0.17698859951284898, "grad_norm": 1.609430306040321, "learning_rate": 9.8206393867047e-06, "loss": 0.507, "step": 12280 }, { "epoch": 0.17713272703688224, "grad_norm": 1.574901955256671, "learning_rate": 9.819971059419215e-06, "loss": 0.5318, "step": 12290 }, { "epoch": 0.1772768545609155, "grad_norm": 1.6643490843814372, "learning_rate": 9.819301512125565e-06, "loss": 0.5475, "step": 12300 }, { "epoch": 0.17742098208494878, "grad_norm": 1.7629512690919387, "learning_rate": 9.818630744993226e-06, "loss": 0.5032, "step": 12310 }, { "epoch": 0.17756510960898203, "grad_norm": 2.037814423523097, "learning_rate": 9.817958758191974e-06, "loss": 0.5138, "step": 12320 }, { "epoch": 0.17770923713301529, "grad_norm": 1.556669601285521, "learning_rate": 9.817285551891905e-06, "loss": 0.4891, "step": 12330 }, { "epoch": 0.17785336465704857, "grad_norm": 1.3827986168354331, "learning_rate": 9.816611126263415e-06, "loss": 0.5423, "step": 12340 }, { "epoch": 0.17799749218108182, "grad_norm": 1.5755120138873158, "learning_rate": 9.815935481477213e-06, "loss": 0.5446, "step": 12350 }, { "epoch": 0.17814161970511508, "grad_norm": 1.5849006674637942, "learning_rate": 9.81525861770431e-06, "loss": 0.5298, "step": 12360 }, { "epoch": 0.17828574722914836, "grad_norm": 1.722444121861003, "learning_rate": 9.814580535116039e-06, "loss": 0.5527, "step": 12370 }, { "epoch": 0.17842987475318162, "grad_norm": 1.5912663002873106, "learning_rate": 9.813901233884025e-06, "loss": 0.5265, "step": 12380 }, { "epoch": 0.17857400227721487, "grad_norm": 1.6485719111058277, "learning_rate": 9.813220714180215e-06, "loss": 0.5461, "step": 12390 }, { "epoch": 0.17871812980124815, "grad_norm": 1.7242596213417372, "learning_rate": 9.812538976176854e-06, "loss": 0.5434, "step": 12400 }, { "epoch": 0.1788622573252814, "grad_norm": 1.4376310312845695, "learning_rate": 9.811856020046505e-06, "loss": 0.5167, "step": 12410 }, { "epoch": 0.17900638484931466, "grad_norm": 1.6483344599750587, "learning_rate": 9.81117184596203e-06, "loss": 0.5388, "step": 12420 }, { "epoch": 0.17915051237334795, "grad_norm": 1.3964776106006147, "learning_rate": 9.81048645409661e-06, "loss": 0.4882, "step": 12430 }, { "epoch": 0.1792946398973812, "grad_norm": 1.6088907847602287, "learning_rate": 9.809799844623724e-06, "loss": 0.5444, "step": 12440 }, { "epoch": 0.17943876742141446, "grad_norm": 1.5997975754811549, "learning_rate": 9.809112017717163e-06, "loss": 0.5259, "step": 12450 }, { "epoch": 0.17958289494544774, "grad_norm": 1.5525005838458807, "learning_rate": 9.808422973551027e-06, "loss": 0.5072, "step": 12460 }, { "epoch": 0.179727022469481, "grad_norm": 1.498933669971197, "learning_rate": 9.807732712299725e-06, "loss": 0.5276, "step": 12470 }, { "epoch": 0.17987114999351425, "grad_norm": 1.5542747945246633, "learning_rate": 9.80704123413797e-06, "loss": 0.4964, "step": 12480 }, { "epoch": 0.18001527751754753, "grad_norm": 1.5657066174354564, "learning_rate": 9.806348539240789e-06, "loss": 0.5154, "step": 12490 }, { "epoch": 0.1801594050415808, "grad_norm": 1.684550370796612, "learning_rate": 9.805654627783511e-06, "loss": 0.5258, "step": 12500 }, { "epoch": 0.18030353256561404, "grad_norm": 1.5743730542039927, "learning_rate": 9.80495949994178e-06, "loss": 0.5237, "step": 12510 }, { "epoch": 0.18044766008964733, "grad_norm": 1.8197029217360117, "learning_rate": 9.804263155891536e-06, "loss": 0.4998, "step": 12520 }, { "epoch": 0.18059178761368058, "grad_norm": 1.6762942084553596, "learning_rate": 9.80356559580904e-06, "loss": 0.5037, "step": 12530 }, { "epoch": 0.18073591513771384, "grad_norm": 1.851892555065324, "learning_rate": 9.802866819870853e-06, "loss": 0.508, "step": 12540 }, { "epoch": 0.18088004266174712, "grad_norm": 1.4148237947650877, "learning_rate": 9.802166828253845e-06, "loss": 0.5097, "step": 12550 }, { "epoch": 0.18102417018578038, "grad_norm": 1.519926954114353, "learning_rate": 9.801465621135198e-06, "loss": 0.4777, "step": 12560 }, { "epoch": 0.18116829770981363, "grad_norm": 2.198419524981991, "learning_rate": 9.800763198692394e-06, "loss": 0.5203, "step": 12570 }, { "epoch": 0.1813124252338469, "grad_norm": 1.396381201910422, "learning_rate": 9.80005956110323e-06, "loss": 0.4957, "step": 12580 }, { "epoch": 0.18145655275788017, "grad_norm": 1.525852370873749, "learning_rate": 9.799354708545807e-06, "loss": 0.5148, "step": 12590 }, { "epoch": 0.18160068028191342, "grad_norm": 1.5823565247976092, "learning_rate": 9.798648641198531e-06, "loss": 0.5525, "step": 12600 }, { "epoch": 0.1817448078059467, "grad_norm": 1.8973305402295848, "learning_rate": 9.797941359240122e-06, "loss": 0.5126, "step": 12610 }, { "epoch": 0.18188893532997996, "grad_norm": 1.5546510950399073, "learning_rate": 9.797232862849601e-06, "loss": 0.5263, "step": 12620 }, { "epoch": 0.18203306285401324, "grad_norm": 1.6823520012009774, "learning_rate": 9.796523152206303e-06, "loss": 0.5028, "step": 12630 }, { "epoch": 0.1821771903780465, "grad_norm": 1.358765810352795, "learning_rate": 9.795812227489863e-06, "loss": 0.5136, "step": 12640 }, { "epoch": 0.18232131790207975, "grad_norm": 1.5174326737859996, "learning_rate": 9.795100088880226e-06, "loss": 0.5025, "step": 12650 }, { "epoch": 0.18246544542611304, "grad_norm": 1.6494805466152571, "learning_rate": 9.794386736557651e-06, "loss": 0.516, "step": 12660 }, { "epoch": 0.1826095729501463, "grad_norm": 1.8467112535179238, "learning_rate": 9.793672170702693e-06, "loss": 0.5178, "step": 12670 }, { "epoch": 0.18275370047417955, "grad_norm": 1.5730899129268128, "learning_rate": 9.792956391496222e-06, "loss": 0.538, "step": 12680 }, { "epoch": 0.18289782799821283, "grad_norm": 1.4334996105801376, "learning_rate": 9.79223939911941e-06, "loss": 0.5157, "step": 12690 }, { "epoch": 0.18304195552224609, "grad_norm": 1.5784252522651552, "learning_rate": 9.791521193753742e-06, "loss": 0.5283, "step": 12700 }, { "epoch": 0.18318608304627934, "grad_norm": 1.3235069402146267, "learning_rate": 9.790801775581004e-06, "loss": 0.5277, "step": 12710 }, { "epoch": 0.18333021057031262, "grad_norm": 1.6844245607221693, "learning_rate": 9.790081144783294e-06, "loss": 0.5096, "step": 12720 }, { "epoch": 0.18347433809434588, "grad_norm": 1.5906524438472158, "learning_rate": 9.789359301543014e-06, "loss": 0.5309, "step": 12730 }, { "epoch": 0.18361846561837913, "grad_norm": 1.8906226867831069, "learning_rate": 9.788636246042871e-06, "loss": 0.5467, "step": 12740 }, { "epoch": 0.18376259314241242, "grad_norm": 1.6854091671138725, "learning_rate": 9.787911978465884e-06, "loss": 0.5136, "step": 12750 }, { "epoch": 0.18390672066644567, "grad_norm": 1.4403870622388653, "learning_rate": 9.787186498995376e-06, "loss": 0.5055, "step": 12760 }, { "epoch": 0.18405084819047893, "grad_norm": 1.6238581723004653, "learning_rate": 9.786459807814976e-06, "loss": 0.5111, "step": 12770 }, { "epoch": 0.1841949757145122, "grad_norm": 1.5829313200257897, "learning_rate": 9.785731905108621e-06, "loss": 0.5161, "step": 12780 }, { "epoch": 0.18433910323854547, "grad_norm": 1.4874276917052396, "learning_rate": 9.785002791060557e-06, "loss": 0.5281, "step": 12790 }, { "epoch": 0.18448323076257872, "grad_norm": 1.5840744133099647, "learning_rate": 9.78427246585533e-06, "loss": 0.5285, "step": 12800 }, { "epoch": 0.184627358286612, "grad_norm": 2.087275537971018, "learning_rate": 9.783540929677798e-06, "loss": 0.5375, "step": 12810 }, { "epoch": 0.18477148581064526, "grad_norm": 1.3853621346816585, "learning_rate": 9.782808182713122e-06, "loss": 0.5093, "step": 12820 }, { "epoch": 0.1849156133346785, "grad_norm": 1.664513042616467, "learning_rate": 9.782074225146773e-06, "loss": 0.4966, "step": 12830 }, { "epoch": 0.1850597408587118, "grad_norm": 1.824224797770038, "learning_rate": 9.781339057164528e-06, "loss": 0.5195, "step": 12840 }, { "epoch": 0.18520386838274505, "grad_norm": 1.477681164999277, "learning_rate": 9.780602678952469e-06, "loss": 0.5248, "step": 12850 }, { "epoch": 0.1853479959067783, "grad_norm": 1.1598877042046316, "learning_rate": 9.779865090696983e-06, "loss": 0.4942, "step": 12860 }, { "epoch": 0.1854921234308116, "grad_norm": 1.7961987446408978, "learning_rate": 9.779126292584765e-06, "loss": 0.5214, "step": 12870 }, { "epoch": 0.18563625095484484, "grad_norm": 1.6221179009637692, "learning_rate": 9.778386284802817e-06, "loss": 0.5096, "step": 12880 }, { "epoch": 0.1857803784788781, "grad_norm": 1.7279424637265641, "learning_rate": 9.777645067538446e-06, "loss": 0.5421, "step": 12890 }, { "epoch": 0.18592450600291138, "grad_norm": 1.4453774936335322, "learning_rate": 9.776902640979266e-06, "loss": 0.5253, "step": 12900 }, { "epoch": 0.18606863352694464, "grad_norm": 1.5177793938281672, "learning_rate": 9.776159005313195e-06, "loss": 0.5158, "step": 12910 }, { "epoch": 0.1862127610509779, "grad_norm": 1.4933130958335392, "learning_rate": 9.775414160728459e-06, "loss": 0.5294, "step": 12920 }, { "epoch": 0.18635688857501118, "grad_norm": 1.4159817639822114, "learning_rate": 9.77466810741359e-06, "loss": 0.4925, "step": 12930 }, { "epoch": 0.18650101609904443, "grad_norm": 1.6172209528311, "learning_rate": 9.773920845557427e-06, "loss": 0.5016, "step": 12940 }, { "epoch": 0.1866451436230777, "grad_norm": 1.748765636040724, "learning_rate": 9.77317237534911e-06, "loss": 0.5175, "step": 12950 }, { "epoch": 0.18678927114711097, "grad_norm": 1.307662427996525, "learning_rate": 9.77242269697809e-06, "loss": 0.4977, "step": 12960 }, { "epoch": 0.18693339867114422, "grad_norm": 2.1804588306252137, "learning_rate": 9.771671810634123e-06, "loss": 0.5175, "step": 12970 }, { "epoch": 0.18707752619517748, "grad_norm": 1.380463031194979, "learning_rate": 9.770919716507266e-06, "loss": 0.5215, "step": 12980 }, { "epoch": 0.18722165371921076, "grad_norm": 1.6894258121058014, "learning_rate": 9.770166414787891e-06, "loss": 0.5162, "step": 12990 }, { "epoch": 0.18736578124324402, "grad_norm": 1.554082916374579, "learning_rate": 9.769411905666665e-06, "loss": 0.5016, "step": 13000 }, { "epoch": 0.1875099087672773, "grad_norm": 1.59895289979835, "learning_rate": 9.76865618933457e-06, "loss": 0.5057, "step": 13010 }, { "epoch": 0.18765403629131056, "grad_norm": 1.5768155817129847, "learning_rate": 9.767899265982888e-06, "loss": 0.4921, "step": 13020 }, { "epoch": 0.1877981638153438, "grad_norm": 1.7366585665446916, "learning_rate": 9.767141135803204e-06, "loss": 0.5137, "step": 13030 }, { "epoch": 0.1879422913393771, "grad_norm": 1.5732104821111832, "learning_rate": 9.766381798987419e-06, "loss": 0.5113, "step": 13040 }, { "epoch": 0.18808641886341035, "grad_norm": 1.722712667901084, "learning_rate": 9.765621255727728e-06, "loss": 0.5231, "step": 13050 }, { "epoch": 0.1882305463874436, "grad_norm": 1.3911222902941425, "learning_rate": 9.764859506216637e-06, "loss": 0.5074, "step": 13060 }, { "epoch": 0.1883746739114769, "grad_norm": 1.6997664257549154, "learning_rate": 9.764096550646957e-06, "loss": 0.5118, "step": 13070 }, { "epoch": 0.18851880143551014, "grad_norm": 1.7931172132325484, "learning_rate": 9.763332389211803e-06, "loss": 0.4954, "step": 13080 }, { "epoch": 0.1886629289595434, "grad_norm": 1.4730952376973478, "learning_rate": 9.762567022104597e-06, "loss": 0.5097, "step": 13090 }, { "epoch": 0.18880705648357668, "grad_norm": 1.7989530629744757, "learning_rate": 9.761800449519064e-06, "loss": 0.5336, "step": 13100 }, { "epoch": 0.18895118400760993, "grad_norm": 1.3533195679243841, "learning_rate": 9.761032671649237e-06, "loss": 0.5036, "step": 13110 }, { "epoch": 0.1890953115316432, "grad_norm": 1.362146241012182, "learning_rate": 9.76026368868945e-06, "loss": 0.506, "step": 13120 }, { "epoch": 0.18923943905567647, "grad_norm": 1.5724374533432803, "learning_rate": 9.759493500834345e-06, "loss": 0.507, "step": 13130 }, { "epoch": 0.18938356657970973, "grad_norm": 1.593691477506538, "learning_rate": 9.75872210827887e-06, "loss": 0.5339, "step": 13140 }, { "epoch": 0.18952769410374298, "grad_norm": 1.8799814988435701, "learning_rate": 9.757949511218273e-06, "loss": 0.5253, "step": 13150 }, { "epoch": 0.18967182162777627, "grad_norm": 1.6435846542823411, "learning_rate": 9.757175709848113e-06, "loss": 0.5275, "step": 13160 }, { "epoch": 0.18981594915180952, "grad_norm": 1.561747226603953, "learning_rate": 9.756400704364249e-06, "loss": 0.5293, "step": 13170 }, { "epoch": 0.18996007667584278, "grad_norm": 1.627422574497872, "learning_rate": 9.755624494962848e-06, "loss": 0.5013, "step": 13180 }, { "epoch": 0.19010420419987606, "grad_norm": 1.5065381728415508, "learning_rate": 9.754847081840381e-06, "loss": 0.5339, "step": 13190 }, { "epoch": 0.19024833172390931, "grad_norm": 1.6223675939283495, "learning_rate": 9.75406846519362e-06, "loss": 0.4985, "step": 13200 }, { "epoch": 0.19039245924794257, "grad_norm": 1.5244014776943817, "learning_rate": 9.753288645219649e-06, "loss": 0.5075, "step": 13210 }, { "epoch": 0.19053658677197585, "grad_norm": 1.376721738492903, "learning_rate": 9.752507622115848e-06, "loss": 0.5067, "step": 13220 }, { "epoch": 0.1906807142960091, "grad_norm": 1.5611711621890023, "learning_rate": 9.75172539607991e-06, "loss": 0.5089, "step": 13230 }, { "epoch": 0.19082484182004236, "grad_norm": 1.4301476302873637, "learning_rate": 9.750941967309826e-06, "loss": 0.4929, "step": 13240 }, { "epoch": 0.19096896934407565, "grad_norm": 1.8114204754359278, "learning_rate": 9.750157336003892e-06, "loss": 0.5103, "step": 13250 }, { "epoch": 0.1911130968681089, "grad_norm": 1.4863637996637593, "learning_rate": 9.749371502360714e-06, "loss": 0.5174, "step": 13260 }, { "epoch": 0.19125722439214216, "grad_norm": 1.4984871067873886, "learning_rate": 9.748584466579196e-06, "loss": 0.5321, "step": 13270 }, { "epoch": 0.19140135191617544, "grad_norm": 1.660721680979183, "learning_rate": 9.74779622885855e-06, "loss": 0.4866, "step": 13280 }, { "epoch": 0.1915454794402087, "grad_norm": 1.4036453339868846, "learning_rate": 9.747006789398291e-06, "loss": 0.5297, "step": 13290 }, { "epoch": 0.19168960696424195, "grad_norm": 1.5408663170881876, "learning_rate": 9.746216148398238e-06, "loss": 0.5174, "step": 13300 }, { "epoch": 0.19183373448827523, "grad_norm": 1.555808599003675, "learning_rate": 9.745424306058512e-06, "loss": 0.529, "step": 13310 }, { "epoch": 0.1919778620123085, "grad_norm": 1.5011161394974544, "learning_rate": 9.744631262579542e-06, "loss": 0.5237, "step": 13320 }, { "epoch": 0.19212198953634174, "grad_norm": 1.618903846813848, "learning_rate": 9.743837018162061e-06, "loss": 0.5147, "step": 13330 }, { "epoch": 0.19226611706037502, "grad_norm": 1.4866020527507227, "learning_rate": 9.743041573007105e-06, "loss": 0.503, "step": 13340 }, { "epoch": 0.19241024458440828, "grad_norm": 1.6450576959312495, "learning_rate": 9.742244927316007e-06, "loss": 0.5503, "step": 13350 }, { "epoch": 0.19255437210844156, "grad_norm": 1.4371265344207147, "learning_rate": 9.741447081290418e-06, "loss": 0.49, "step": 13360 }, { "epoch": 0.19269849963247482, "grad_norm": 1.4978831217148405, "learning_rate": 9.740648035132281e-06, "loss": 0.5374, "step": 13370 }, { "epoch": 0.19284262715650807, "grad_norm": 1.5649772585282857, "learning_rate": 9.739847789043847e-06, "loss": 0.5178, "step": 13380 }, { "epoch": 0.19298675468054136, "grad_norm": 1.6573149706363757, "learning_rate": 9.73904634322767e-06, "loss": 0.5137, "step": 13390 }, { "epoch": 0.1931308822045746, "grad_norm": 1.6463854195975667, "learning_rate": 9.738243697886609e-06, "loss": 0.4896, "step": 13400 }, { "epoch": 0.19327500972860787, "grad_norm": 1.5690364843878064, "learning_rate": 9.737439853223826e-06, "loss": 0.5171, "step": 13410 }, { "epoch": 0.19341913725264115, "grad_norm": 1.5757829292520107, "learning_rate": 9.736634809442786e-06, "loss": 0.5172, "step": 13420 }, { "epoch": 0.1935632647766744, "grad_norm": 1.5192077305113985, "learning_rate": 9.735828566747257e-06, "loss": 0.5089, "step": 13430 }, { "epoch": 0.19370739230070766, "grad_norm": 1.2806243883629782, "learning_rate": 9.735021125341313e-06, "loss": 0.4918, "step": 13440 }, { "epoch": 0.19385151982474094, "grad_norm": 1.613041977934406, "learning_rate": 9.734212485429329e-06, "loss": 0.506, "step": 13450 }, { "epoch": 0.1939956473487742, "grad_norm": 1.6413432872963682, "learning_rate": 9.733402647215986e-06, "loss": 0.5064, "step": 13460 }, { "epoch": 0.19413977487280745, "grad_norm": 1.67281027648677, "learning_rate": 9.732591610906261e-06, "loss": 0.5201, "step": 13470 }, { "epoch": 0.19428390239684074, "grad_norm": 1.7063917975035146, "learning_rate": 9.731779376705445e-06, "loss": 0.506, "step": 13480 }, { "epoch": 0.194428029920874, "grad_norm": 1.5890665945609708, "learning_rate": 9.730965944819125e-06, "loss": 0.5161, "step": 13490 }, { "epoch": 0.19457215744490725, "grad_norm": 1.8597105259771023, "learning_rate": 9.73015131545319e-06, "loss": 0.5514, "step": 13500 }, { "epoch": 0.19471628496894053, "grad_norm": 1.6990826303983249, "learning_rate": 9.729335488813843e-06, "loss": 0.4885, "step": 13510 }, { "epoch": 0.19486041249297378, "grad_norm": 2.436808394525393, "learning_rate": 9.728518465107575e-06, "loss": 0.5155, "step": 13520 }, { "epoch": 0.19500454001700704, "grad_norm": 1.6263665733067871, "learning_rate": 9.727700244541189e-06, "loss": 0.4963, "step": 13530 }, { "epoch": 0.19514866754104032, "grad_norm": 1.4188608824600228, "learning_rate": 9.72688082732179e-06, "loss": 0.4639, "step": 13540 }, { "epoch": 0.19529279506507358, "grad_norm": 1.6544836788003074, "learning_rate": 9.726060213656782e-06, "loss": 0.4931, "step": 13550 }, { "epoch": 0.19543692258910683, "grad_norm": 1.5887291080290527, "learning_rate": 9.72523840375388e-06, "loss": 0.4908, "step": 13560 }, { "epoch": 0.19558105011314011, "grad_norm": 1.5030165207047146, "learning_rate": 9.724415397821095e-06, "loss": 0.4946, "step": 13570 }, { "epoch": 0.19572517763717337, "grad_norm": 1.6319634227889166, "learning_rate": 9.72359119606674e-06, "loss": 0.5528, "step": 13580 }, { "epoch": 0.19586930516120662, "grad_norm": 1.5933197583439727, "learning_rate": 9.722765798699433e-06, "loss": 0.5386, "step": 13590 }, { "epoch": 0.1960134326852399, "grad_norm": 1.5238628588266319, "learning_rate": 9.721939205928096e-06, "loss": 0.4935, "step": 13600 }, { "epoch": 0.19615756020927316, "grad_norm": 1.7452603603761019, "learning_rate": 9.721111417961954e-06, "loss": 0.5138, "step": 13610 }, { "epoch": 0.19630168773330642, "grad_norm": 1.6572904478045045, "learning_rate": 9.720282435010531e-06, "loss": 0.5099, "step": 13620 }, { "epoch": 0.1964458152573397, "grad_norm": 1.4287361800538834, "learning_rate": 9.719452257283653e-06, "loss": 0.5138, "step": 13630 }, { "epoch": 0.19658994278137296, "grad_norm": 1.9047719688860405, "learning_rate": 9.718620884991455e-06, "loss": 0.4983, "step": 13640 }, { "epoch": 0.1967340703054062, "grad_norm": 1.627039038030197, "learning_rate": 9.717788318344366e-06, "loss": 0.5334, "step": 13650 }, { "epoch": 0.1968781978294395, "grad_norm": 1.4706857099208344, "learning_rate": 9.716954557553125e-06, "loss": 0.5115, "step": 13660 }, { "epoch": 0.19702232535347275, "grad_norm": 1.6973823788875118, "learning_rate": 9.716119602828766e-06, "loss": 0.5209, "step": 13670 }, { "epoch": 0.197166452877506, "grad_norm": 1.516741428880145, "learning_rate": 9.71528345438263e-06, "loss": 0.5285, "step": 13680 }, { "epoch": 0.1973105804015393, "grad_norm": 1.6623143114171606, "learning_rate": 9.714446112426362e-06, "loss": 0.5229, "step": 13690 }, { "epoch": 0.19745470792557254, "grad_norm": 1.669614740920426, "learning_rate": 9.713607577171901e-06, "loss": 0.5419, "step": 13700 }, { "epoch": 0.1975988354496058, "grad_norm": 1.5593861134122748, "learning_rate": 9.712767848831497e-06, "loss": 0.4898, "step": 13710 }, { "epoch": 0.19774296297363908, "grad_norm": 1.5439972399973771, "learning_rate": 9.711926927617697e-06, "loss": 0.5617, "step": 13720 }, { "epoch": 0.19788709049767234, "grad_norm": 1.6272813006784468, "learning_rate": 9.71108481374335e-06, "loss": 0.5196, "step": 13730 }, { "epoch": 0.19803121802170562, "grad_norm": 1.4166914257169854, "learning_rate": 9.710241507421607e-06, "loss": 0.5216, "step": 13740 }, { "epoch": 0.19817534554573887, "grad_norm": 1.5537096087526165, "learning_rate": 9.709397008865923e-06, "loss": 0.5183, "step": 13750 }, { "epoch": 0.19831947306977213, "grad_norm": 1.5905727664068336, "learning_rate": 9.708551318290057e-06, "loss": 0.5125, "step": 13760 }, { "epoch": 0.1984636005938054, "grad_norm": 1.7917618615772868, "learning_rate": 9.707704435908059e-06, "loss": 0.5389, "step": 13770 }, { "epoch": 0.19860772811783867, "grad_norm": 1.4237568472687303, "learning_rate": 9.706856361934293e-06, "loss": 0.4892, "step": 13780 }, { "epoch": 0.19875185564187192, "grad_norm": 1.728426332300924, "learning_rate": 9.706007096583417e-06, "loss": 0.5015, "step": 13790 }, { "epoch": 0.1988959831659052, "grad_norm": 1.4836434778392569, "learning_rate": 9.705156640070395e-06, "loss": 0.4849, "step": 13800 }, { "epoch": 0.19904011068993846, "grad_norm": 1.5234082315472113, "learning_rate": 9.70430499261049e-06, "loss": 0.5087, "step": 13810 }, { "epoch": 0.19918423821397171, "grad_norm": 1.5133481769175257, "learning_rate": 9.703452154419265e-06, "loss": 0.5288, "step": 13820 }, { "epoch": 0.199328365738005, "grad_norm": 1.6384643716784537, "learning_rate": 9.702598125712588e-06, "loss": 0.5326, "step": 13830 }, { "epoch": 0.19947249326203825, "grad_norm": 1.6391222651093178, "learning_rate": 9.701742906706626e-06, "loss": 0.511, "step": 13840 }, { "epoch": 0.1996166207860715, "grad_norm": 1.5835409751770038, "learning_rate": 9.700886497617849e-06, "loss": 0.5268, "step": 13850 }, { "epoch": 0.1997607483101048, "grad_norm": 1.666565764570527, "learning_rate": 9.700028898663028e-06, "loss": 0.5062, "step": 13860 }, { "epoch": 0.19990487583413805, "grad_norm": 1.6989829545671837, "learning_rate": 9.699170110059231e-06, "loss": 0.5374, "step": 13870 }, { "epoch": 0.2000490033581713, "grad_norm": 1.6009712011350452, "learning_rate": 9.698310132023835e-06, "loss": 0.5212, "step": 13880 }, { "epoch": 0.20019313088220458, "grad_norm": 1.4440622108891943, "learning_rate": 9.697448964774509e-06, "loss": 0.4997, "step": 13890 }, { "epoch": 0.20033725840623784, "grad_norm": 1.616260931391138, "learning_rate": 9.69658660852923e-06, "loss": 0.5203, "step": 13900 }, { "epoch": 0.2004813859302711, "grad_norm": 1.57468399438094, "learning_rate": 9.695723063506272e-06, "loss": 0.4843, "step": 13910 }, { "epoch": 0.20062551345430438, "grad_norm": 1.6743602968119322, "learning_rate": 9.694858329924215e-06, "loss": 0.5171, "step": 13920 }, { "epoch": 0.20076964097833763, "grad_norm": 1.5053768015191729, "learning_rate": 9.693992408001934e-06, "loss": 0.5169, "step": 13930 }, { "epoch": 0.2009137685023709, "grad_norm": 1.7364873006779118, "learning_rate": 9.693125297958605e-06, "loss": 0.5577, "step": 13940 }, { "epoch": 0.20105789602640417, "grad_norm": 1.8998368092639262, "learning_rate": 9.692257000013708e-06, "loss": 0.5091, "step": 13950 }, { "epoch": 0.20120202355043743, "grad_norm": 1.5296730914369885, "learning_rate": 9.691387514387025e-06, "loss": 0.5197, "step": 13960 }, { "epoch": 0.20134615107447068, "grad_norm": 1.4198073498264123, "learning_rate": 9.690516841298633e-06, "loss": 0.5044, "step": 13970 }, { "epoch": 0.20149027859850396, "grad_norm": 1.7654708076540224, "learning_rate": 9.689644980968914e-06, "loss": 0.522, "step": 13980 }, { "epoch": 0.20163440612253722, "grad_norm": 1.1080730576887359, "learning_rate": 9.68877193361855e-06, "loss": 0.5215, "step": 13990 }, { "epoch": 0.20177853364657047, "grad_norm": 1.5944210636954126, "learning_rate": 9.68789769946852e-06, "loss": 0.5409, "step": 14000 }, { "epoch": 0.20192266117060376, "grad_norm": 1.6568720424107248, "learning_rate": 9.687022278740107e-06, "loss": 0.5056, "step": 14010 }, { "epoch": 0.202066788694637, "grad_norm": 1.7044329174791875, "learning_rate": 9.686145671654896e-06, "loss": 0.5133, "step": 14020 }, { "epoch": 0.20221091621867027, "grad_norm": 1.6704770204424497, "learning_rate": 9.685267878434766e-06, "loss": 0.5149, "step": 14030 }, { "epoch": 0.20235504374270355, "grad_norm": 1.0639599421223942, "learning_rate": 9.684388899301902e-06, "loss": 0.4555, "step": 14040 }, { "epoch": 0.2024991712667368, "grad_norm": 1.6805647267009212, "learning_rate": 9.683508734478786e-06, "loss": 0.4985, "step": 14050 }, { "epoch": 0.20264329879077006, "grad_norm": 1.538381031292001, "learning_rate": 9.682627384188202e-06, "loss": 0.4994, "step": 14060 }, { "epoch": 0.20278742631480334, "grad_norm": 1.5866325527003244, "learning_rate": 9.681744848653231e-06, "loss": 0.5314, "step": 14070 }, { "epoch": 0.2029315538388366, "grad_norm": 1.4805883224386713, "learning_rate": 9.68086112809726e-06, "loss": 0.4961, "step": 14080 }, { "epoch": 0.20307568136286988, "grad_norm": 1.47191936691781, "learning_rate": 9.679976222743968e-06, "loss": 0.5384, "step": 14090 }, { "epoch": 0.20321980888690314, "grad_norm": 1.6331923289320116, "learning_rate": 9.67909013281734e-06, "loss": 0.5328, "step": 14100 }, { "epoch": 0.2033639364109364, "grad_norm": 1.4666050643311401, "learning_rate": 9.67820285854166e-06, "loss": 0.4984, "step": 14110 }, { "epoch": 0.20350806393496967, "grad_norm": 1.6050951449825541, "learning_rate": 9.677314400141511e-06, "loss": 0.5162, "step": 14120 }, { "epoch": 0.20365219145900293, "grad_norm": 1.5554053209495384, "learning_rate": 9.676424757841772e-06, "loss": 0.5143, "step": 14130 }, { "epoch": 0.20379631898303618, "grad_norm": 1.7106452941656671, "learning_rate": 9.675533931867626e-06, "loss": 0.5048, "step": 14140 }, { "epoch": 0.20394044650706947, "grad_norm": 1.5955057537861679, "learning_rate": 9.67464192244456e-06, "loss": 0.4934, "step": 14150 }, { "epoch": 0.20408457403110272, "grad_norm": 1.4737770184200254, "learning_rate": 9.673748729798346e-06, "loss": 0.507, "step": 14160 }, { "epoch": 0.20422870155513598, "grad_norm": 2.1370865570150253, "learning_rate": 9.672854354155071e-06, "loss": 0.5407, "step": 14170 }, { "epoch": 0.20437282907916926, "grad_norm": 1.6831306508063848, "learning_rate": 9.671958795741114e-06, "loss": 0.5007, "step": 14180 }, { "epoch": 0.20451695660320252, "grad_norm": 1.6977899616147483, "learning_rate": 9.671062054783154e-06, "loss": 0.5104, "step": 14190 }, { "epoch": 0.20466108412723577, "grad_norm": 1.5601217482160823, "learning_rate": 9.670164131508168e-06, "loss": 0.529, "step": 14200 }, { "epoch": 0.20480521165126905, "grad_norm": 1.7578064496774235, "learning_rate": 9.669265026143438e-06, "loss": 0.4991, "step": 14210 }, { "epoch": 0.2049493391753023, "grad_norm": 1.2810999272846997, "learning_rate": 9.668364738916539e-06, "loss": 0.5013, "step": 14220 }, { "epoch": 0.20509346669933556, "grad_norm": 1.7800289260210085, "learning_rate": 9.667463270055348e-06, "loss": 0.5257, "step": 14230 }, { "epoch": 0.20523759422336885, "grad_norm": 1.8767411514536887, "learning_rate": 9.666560619788038e-06, "loss": 0.5095, "step": 14240 }, { "epoch": 0.2053817217474021, "grad_norm": 1.6749248759024609, "learning_rate": 9.665656788343089e-06, "loss": 0.5322, "step": 14250 }, { "epoch": 0.20552584927143536, "grad_norm": 1.5438853008204563, "learning_rate": 9.664751775949268e-06, "loss": 0.5092, "step": 14260 }, { "epoch": 0.20566997679546864, "grad_norm": 1.3499879108080304, "learning_rate": 9.663845582835653e-06, "loss": 0.511, "step": 14270 }, { "epoch": 0.2058141043195019, "grad_norm": 1.7987926908700211, "learning_rate": 9.662938209231611e-06, "loss": 0.4961, "step": 14280 }, { "epoch": 0.20595823184353515, "grad_norm": 1.4519367165388326, "learning_rate": 9.662029655366816e-06, "loss": 0.5256, "step": 14290 }, { "epoch": 0.20610235936756843, "grad_norm": 1.4766564588221007, "learning_rate": 9.661119921471234e-06, "loss": 0.5183, "step": 14300 }, { "epoch": 0.2062464868916017, "grad_norm": 1.4285357602436748, "learning_rate": 9.660209007775133e-06, "loss": 0.5054, "step": 14310 }, { "epoch": 0.20639061441563494, "grad_norm": 1.7957374549555754, "learning_rate": 9.659296914509079e-06, "loss": 0.5404, "step": 14320 }, { "epoch": 0.20653474193966823, "grad_norm": 1.8076376958737408, "learning_rate": 9.658383641903938e-06, "loss": 0.5298, "step": 14330 }, { "epoch": 0.20667886946370148, "grad_norm": 1.2813481000973708, "learning_rate": 9.657469190190873e-06, "loss": 0.5024, "step": 14340 }, { "epoch": 0.20682299698773474, "grad_norm": 1.4713850008062965, "learning_rate": 9.656553559601344e-06, "loss": 0.4994, "step": 14350 }, { "epoch": 0.20696712451176802, "grad_norm": 1.7739231711490107, "learning_rate": 9.655636750367114e-06, "loss": 0.5069, "step": 14360 }, { "epoch": 0.20711125203580127, "grad_norm": 1.7683079602385063, "learning_rate": 9.654718762720238e-06, "loss": 0.5139, "step": 14370 }, { "epoch": 0.20725537955983453, "grad_norm": 1.4690048542712217, "learning_rate": 9.653799596893076e-06, "loss": 0.5115, "step": 14380 }, { "epoch": 0.2073995070838678, "grad_norm": 1.465000750244727, "learning_rate": 9.65287925311828e-06, "loss": 0.4862, "step": 14390 }, { "epoch": 0.20754363460790107, "grad_norm": 1.5937260636981183, "learning_rate": 9.651957731628802e-06, "loss": 0.5352, "step": 14400 }, { "epoch": 0.20768776213193432, "grad_norm": 1.5831048372661058, "learning_rate": 9.651035032657898e-06, "loss": 0.5116, "step": 14410 }, { "epoch": 0.2078318896559676, "grad_norm": 1.775951754197746, "learning_rate": 9.650111156439114e-06, "loss": 0.4978, "step": 14420 }, { "epoch": 0.20797601718000086, "grad_norm": 1.460785678121549, "learning_rate": 9.649186103206298e-06, "loss": 0.489, "step": 14430 }, { "epoch": 0.20812014470403412, "grad_norm": 1.7832920161057084, "learning_rate": 9.648259873193595e-06, "loss": 0.5161, "step": 14440 }, { "epoch": 0.2082642722280674, "grad_norm": 1.5582248524995463, "learning_rate": 9.647332466635446e-06, "loss": 0.5118, "step": 14450 }, { "epoch": 0.20840839975210065, "grad_norm": 1.4417709892901758, "learning_rate": 9.646403883766596e-06, "loss": 0.4917, "step": 14460 }, { "epoch": 0.20855252727613394, "grad_norm": 1.5619816326286136, "learning_rate": 9.645474124822078e-06, "loss": 0.5151, "step": 14470 }, { "epoch": 0.2086966548001672, "grad_norm": 1.6518978459722542, "learning_rate": 9.644543190037232e-06, "loss": 0.4849, "step": 14480 }, { "epoch": 0.20884078232420045, "grad_norm": 1.3857059554818805, "learning_rate": 9.643611079647693e-06, "loss": 0.5216, "step": 14490 }, { "epoch": 0.20898490984823373, "grad_norm": 1.5829180744900813, "learning_rate": 9.642677793889387e-06, "loss": 0.5338, "step": 14500 }, { "epoch": 0.20912903737226698, "grad_norm": 1.392978320481961, "learning_rate": 9.641743332998547e-06, "loss": 0.5083, "step": 14510 }, { "epoch": 0.20927316489630024, "grad_norm": 1.7464794082913238, "learning_rate": 9.6408076972117e-06, "loss": 0.5114, "step": 14520 }, { "epoch": 0.20941729242033352, "grad_norm": 1.771846991275052, "learning_rate": 9.639870886765666e-06, "loss": 0.5102, "step": 14530 }, { "epoch": 0.20956141994436678, "grad_norm": 1.3863598486583133, "learning_rate": 9.638932901897568e-06, "loss": 0.4928, "step": 14540 }, { "epoch": 0.20970554746840003, "grad_norm": 1.7185349559025431, "learning_rate": 9.637993742844828e-06, "loss": 0.5479, "step": 14550 }, { "epoch": 0.20984967499243332, "grad_norm": 1.3277612417875044, "learning_rate": 9.637053409845155e-06, "loss": 0.5201, "step": 14560 }, { "epoch": 0.20999380251646657, "grad_norm": 1.3136508087348997, "learning_rate": 9.636111903136562e-06, "loss": 0.5174, "step": 14570 }, { "epoch": 0.21013793004049983, "grad_norm": 1.621225355688089, "learning_rate": 9.635169222957363e-06, "loss": 0.5294, "step": 14580 }, { "epoch": 0.2102820575645331, "grad_norm": 1.3152790389579814, "learning_rate": 9.634225369546163e-06, "loss": 0.497, "step": 14590 }, { "epoch": 0.21042618508856636, "grad_norm": 1.6120638934077873, "learning_rate": 9.633280343141868e-06, "loss": 0.5028, "step": 14600 }, { "epoch": 0.21057031261259962, "grad_norm": 1.5736237445789305, "learning_rate": 9.632334143983674e-06, "loss": 0.546, "step": 14610 }, { "epoch": 0.2107144401366329, "grad_norm": 1.2206669589122976, "learning_rate": 9.63138677231108e-06, "loss": 0.5078, "step": 14620 }, { "epoch": 0.21085856766066616, "grad_norm": 1.29886673633247, "learning_rate": 9.630438228363881e-06, "loss": 0.5228, "step": 14630 }, { "epoch": 0.2110026951846994, "grad_norm": 1.6853961112681777, "learning_rate": 9.629488512382168e-06, "loss": 0.5075, "step": 14640 }, { "epoch": 0.2111468227087327, "grad_norm": 1.4857797215917583, "learning_rate": 9.62853762460633e-06, "loss": 0.5303, "step": 14650 }, { "epoch": 0.21129095023276595, "grad_norm": 1.493914321577519, "learning_rate": 9.627585565277048e-06, "loss": 0.5209, "step": 14660 }, { "epoch": 0.2114350777567992, "grad_norm": 1.5354794547436907, "learning_rate": 9.626632334635304e-06, "loss": 0.5166, "step": 14670 }, { "epoch": 0.2115792052808325, "grad_norm": 1.6065441033244907, "learning_rate": 9.625677932922375e-06, "loss": 0.4946, "step": 14680 }, { "epoch": 0.21172333280486574, "grad_norm": 1.6623644964818802, "learning_rate": 9.624722360379835e-06, "loss": 0.5178, "step": 14690 }, { "epoch": 0.211867460328899, "grad_norm": 1.4583878962595096, "learning_rate": 9.623765617249555e-06, "loss": 0.5235, "step": 14700 }, { "epoch": 0.21201158785293228, "grad_norm": 1.3419598340439094, "learning_rate": 9.6228077037737e-06, "loss": 0.4903, "step": 14710 }, { "epoch": 0.21215571537696554, "grad_norm": 1.4979279868412732, "learning_rate": 9.621848620194732e-06, "loss": 0.5282, "step": 14720 }, { "epoch": 0.2122998429009988, "grad_norm": 1.4789600607431288, "learning_rate": 9.620888366755412e-06, "loss": 0.5112, "step": 14730 }, { "epoch": 0.21244397042503207, "grad_norm": 1.4957531304452905, "learning_rate": 9.619926943698792e-06, "loss": 0.5242, "step": 14740 }, { "epoch": 0.21258809794906533, "grad_norm": 1.668033201040191, "learning_rate": 9.618964351268225e-06, "loss": 0.5039, "step": 14750 }, { "epoch": 0.21273222547309859, "grad_norm": 1.3281916087361918, "learning_rate": 9.618000589707357e-06, "loss": 0.5032, "step": 14760 }, { "epoch": 0.21287635299713187, "grad_norm": 1.3963977446264502, "learning_rate": 9.61703565926013e-06, "loss": 0.4986, "step": 14770 }, { "epoch": 0.21302048052116512, "grad_norm": 1.4962420434520638, "learning_rate": 9.616069560170782e-06, "loss": 0.5009, "step": 14780 }, { "epoch": 0.21316460804519838, "grad_norm": 1.7078121186007187, "learning_rate": 9.61510229268385e-06, "loss": 0.518, "step": 14790 }, { "epoch": 0.21330873556923166, "grad_norm": 1.7260608158939346, "learning_rate": 9.614133857044164e-06, "loss": 0.5119, "step": 14800 }, { "epoch": 0.21345286309326492, "grad_norm": 1.3826609859250407, "learning_rate": 9.613164253496847e-06, "loss": 0.4724, "step": 14810 }, { "epoch": 0.2135969906172982, "grad_norm": 1.5271576964448463, "learning_rate": 9.612193482287325e-06, "loss": 0.4943, "step": 14820 }, { "epoch": 0.21374111814133145, "grad_norm": 1.7562124573292492, "learning_rate": 9.611221543661309e-06, "loss": 0.5188, "step": 14830 }, { "epoch": 0.2138852456653647, "grad_norm": 1.3970057851837199, "learning_rate": 9.610248437864815e-06, "loss": 0.5153, "step": 14840 }, { "epoch": 0.214029373189398, "grad_norm": 1.5235703785854338, "learning_rate": 9.60927416514415e-06, "loss": 0.4928, "step": 14850 }, { "epoch": 0.21417350071343125, "grad_norm": 1.3218436545044299, "learning_rate": 9.60829872574592e-06, "loss": 0.4904, "step": 14860 }, { "epoch": 0.2143176282374645, "grad_norm": 1.730439278899538, "learning_rate": 9.607322119917021e-06, "loss": 0.5395, "step": 14870 }, { "epoch": 0.21446175576149779, "grad_norm": 1.537187350494359, "learning_rate": 9.606344347904646e-06, "loss": 0.5128, "step": 14880 }, { "epoch": 0.21460588328553104, "grad_norm": 1.662208906996588, "learning_rate": 9.605365409956285e-06, "loss": 0.5061, "step": 14890 }, { "epoch": 0.2147500108095643, "grad_norm": 1.6420749181820253, "learning_rate": 9.604385306319724e-06, "loss": 0.5129, "step": 14900 }, { "epoch": 0.21489413833359758, "grad_norm": 1.4534288795399921, "learning_rate": 9.603404037243039e-06, "loss": 0.5156, "step": 14910 }, { "epoch": 0.21503826585763083, "grad_norm": 1.3823770952589973, "learning_rate": 9.602421602974605e-06, "loss": 0.5162, "step": 14920 }, { "epoch": 0.2151823933816641, "grad_norm": 1.3685683425642818, "learning_rate": 9.601438003763095e-06, "loss": 0.5344, "step": 14930 }, { "epoch": 0.21532652090569737, "grad_norm": 1.756844876395684, "learning_rate": 9.600453239857466e-06, "loss": 0.5275, "step": 14940 }, { "epoch": 0.21547064842973063, "grad_norm": 1.6126822632436548, "learning_rate": 9.599467311506983e-06, "loss": 0.4825, "step": 14950 }, { "epoch": 0.21561477595376388, "grad_norm": 1.3943160574809086, "learning_rate": 9.598480218961195e-06, "loss": 0.5018, "step": 14960 }, { "epoch": 0.21575890347779716, "grad_norm": 1.3551479633244783, "learning_rate": 9.597491962469953e-06, "loss": 0.4898, "step": 14970 }, { "epoch": 0.21590303100183042, "grad_norm": 1.4702974685511483, "learning_rate": 9.596502542283399e-06, "loss": 0.5359, "step": 14980 }, { "epoch": 0.21604715852586368, "grad_norm": 1.5643562201859962, "learning_rate": 9.59551195865197e-06, "loss": 0.514, "step": 14990 }, { "epoch": 0.21619128604989696, "grad_norm": 1.4442573703898303, "learning_rate": 9.594520211826398e-06, "loss": 0.4838, "step": 15000 }, { "epoch": 0.2163354135739302, "grad_norm": 1.453086904137718, "learning_rate": 9.59352730205771e-06, "loss": 0.4849, "step": 15010 }, { "epoch": 0.21647954109796347, "grad_norm": 3.3734629547601895, "learning_rate": 9.592533229597227e-06, "loss": 0.5319, "step": 15020 }, { "epoch": 0.21662366862199675, "grad_norm": 1.5891201730024493, "learning_rate": 9.591537994696561e-06, "loss": 0.501, "step": 15030 }, { "epoch": 0.21676779614603, "grad_norm": 1.5454332422072565, "learning_rate": 9.590541597607624e-06, "loss": 0.5053, "step": 15040 }, { "epoch": 0.21691192367006326, "grad_norm": 1.7448139946030623, "learning_rate": 9.589544038582617e-06, "loss": 0.5284, "step": 15050 }, { "epoch": 0.21705605119409654, "grad_norm": 1.6266611834809817, "learning_rate": 9.58854531787404e-06, "loss": 0.4942, "step": 15060 }, { "epoch": 0.2172001787181298, "grad_norm": 1.4237464486064597, "learning_rate": 9.587545435734682e-06, "loss": 0.5088, "step": 15070 }, { "epoch": 0.21734430624216305, "grad_norm": 1.9325137330491282, "learning_rate": 9.58654439241763e-06, "loss": 0.4948, "step": 15080 }, { "epoch": 0.21748843376619634, "grad_norm": 1.4960942970237507, "learning_rate": 9.585542188176262e-06, "loss": 0.522, "step": 15090 }, { "epoch": 0.2176325612902296, "grad_norm": 1.6010977957184318, "learning_rate": 9.584538823264254e-06, "loss": 0.5196, "step": 15100 }, { "epoch": 0.21777668881426285, "grad_norm": 1.697054638703512, "learning_rate": 9.583534297935569e-06, "loss": 0.4903, "step": 15110 }, { "epoch": 0.21792081633829613, "grad_norm": 1.5791659418016641, "learning_rate": 9.582528612444469e-06, "loss": 0.5224, "step": 15120 }, { "epoch": 0.21806494386232939, "grad_norm": 1.540837737178636, "learning_rate": 9.58152176704551e-06, "loss": 0.4741, "step": 15130 }, { "epoch": 0.21820907138636264, "grad_norm": 1.6954789078185493, "learning_rate": 9.58051376199354e-06, "loss": 0.5253, "step": 15140 }, { "epoch": 0.21835319891039592, "grad_norm": 1.6627776493291035, "learning_rate": 9.579504597543699e-06, "loss": 0.5322, "step": 15150 }, { "epoch": 0.21849732643442918, "grad_norm": 1.3329578547602856, "learning_rate": 9.578494273951421e-06, "loss": 0.5402, "step": 15160 }, { "epoch": 0.21864145395846243, "grad_norm": 1.8181295843934644, "learning_rate": 9.577482791472436e-06, "loss": 0.4929, "step": 15170 }, { "epoch": 0.21878558148249572, "grad_norm": 1.672390553362502, "learning_rate": 9.576470150362765e-06, "loss": 0.5117, "step": 15180 }, { "epoch": 0.21892970900652897, "grad_norm": 1.7393130394995626, "learning_rate": 9.575456350878724e-06, "loss": 0.4911, "step": 15190 }, { "epoch": 0.21907383653056225, "grad_norm": 1.7737759824487176, "learning_rate": 9.57444139327692e-06, "loss": 0.516, "step": 15200 }, { "epoch": 0.2192179640545955, "grad_norm": 1.535386593751621, "learning_rate": 9.573425277814253e-06, "loss": 0.5206, "step": 15210 }, { "epoch": 0.21936209157862877, "grad_norm": 1.3834934137412624, "learning_rate": 9.57240800474792e-06, "loss": 0.5067, "step": 15220 }, { "epoch": 0.21950621910266205, "grad_norm": 1.740695720243414, "learning_rate": 9.571389574335407e-06, "loss": 0.4945, "step": 15230 }, { "epoch": 0.2196503466266953, "grad_norm": 1.4580675052125898, "learning_rate": 9.570369986834493e-06, "loss": 0.5187, "step": 15240 }, { "epoch": 0.21979447415072856, "grad_norm": 1.6935125434336622, "learning_rate": 9.569349242503253e-06, "loss": 0.5041, "step": 15250 }, { "epoch": 0.21993860167476184, "grad_norm": 1.4536142222073594, "learning_rate": 9.568327341600055e-06, "loss": 0.5052, "step": 15260 }, { "epoch": 0.2200827291987951, "grad_norm": 1.7363165880304605, "learning_rate": 9.567304284383551e-06, "loss": 0.5059, "step": 15270 }, { "epoch": 0.22022685672282835, "grad_norm": 1.565863158700179, "learning_rate": 9.5662800711127e-06, "loss": 0.5047, "step": 15280 }, { "epoch": 0.22037098424686163, "grad_norm": 1.5489589201770277, "learning_rate": 9.565254702046742e-06, "loss": 0.5016, "step": 15290 }, { "epoch": 0.2205151117708949, "grad_norm": 1.5882312634449909, "learning_rate": 9.564228177445212e-06, "loss": 0.5115, "step": 15300 }, { "epoch": 0.22065923929492814, "grad_norm": 1.400167777782419, "learning_rate": 9.563200497567943e-06, "loss": 0.5316, "step": 15310 }, { "epoch": 0.22080336681896143, "grad_norm": 1.9472981395596516, "learning_rate": 9.562171662675054e-06, "loss": 0.4833, "step": 15320 }, { "epoch": 0.22094749434299468, "grad_norm": 1.4746482445371305, "learning_rate": 9.56114167302696e-06, "loss": 0.487, "step": 15330 }, { "epoch": 0.22109162186702794, "grad_norm": 1.462905821643579, "learning_rate": 9.560110528884364e-06, "loss": 0.5125, "step": 15340 }, { "epoch": 0.22123574939106122, "grad_norm": 1.7294927641385696, "learning_rate": 9.559078230508268e-06, "loss": 0.5145, "step": 15350 }, { "epoch": 0.22137987691509448, "grad_norm": 1.5357208028305311, "learning_rate": 9.558044778159962e-06, "loss": 0.5087, "step": 15360 }, { "epoch": 0.22152400443912773, "grad_norm": 1.686670325708887, "learning_rate": 9.557010172101026e-06, "loss": 0.5115, "step": 15370 }, { "epoch": 0.221668131963161, "grad_norm": 1.4976503650810453, "learning_rate": 9.555974412593339e-06, "loss": 0.5353, "step": 15380 }, { "epoch": 0.22181225948719427, "grad_norm": 1.2552011247927677, "learning_rate": 9.554937499899062e-06, "loss": 0.5028, "step": 15390 }, { "epoch": 0.22195638701122752, "grad_norm": 1.7022821817550622, "learning_rate": 9.553899434280657e-06, "loss": 0.4981, "step": 15400 }, { "epoch": 0.2221005145352608, "grad_norm": 1.348122981408367, "learning_rate": 9.552860216000874e-06, "loss": 0.4846, "step": 15410 }, { "epoch": 0.22224464205929406, "grad_norm": 1.6932331080679532, "learning_rate": 9.551819845322756e-06, "loss": 0.5343, "step": 15420 }, { "epoch": 0.22238876958332732, "grad_norm": 1.4416318377609934, "learning_rate": 9.550778322509633e-06, "loss": 0.5074, "step": 15430 }, { "epoch": 0.2225328971073606, "grad_norm": 1.6443166445315538, "learning_rate": 9.549735647825132e-06, "loss": 0.5288, "step": 15440 }, { "epoch": 0.22267702463139386, "grad_norm": 1.6554268732919208, "learning_rate": 9.548691821533172e-06, "loss": 0.4883, "step": 15450 }, { "epoch": 0.2228211521554271, "grad_norm": 1.4734696474627744, "learning_rate": 9.547646843897959e-06, "loss": 0.5026, "step": 15460 }, { "epoch": 0.2229652796794604, "grad_norm": 1.5021942317598513, "learning_rate": 9.546600715183993e-06, "loss": 0.5067, "step": 15470 }, { "epoch": 0.22310940720349365, "grad_norm": 1.497561931431536, "learning_rate": 9.545553435656065e-06, "loss": 0.4906, "step": 15480 }, { "epoch": 0.2232535347275269, "grad_norm": 1.7205565908499005, "learning_rate": 9.544505005579259e-06, "loss": 0.5043, "step": 15490 }, { "epoch": 0.2233976622515602, "grad_norm": 1.6328445318173042, "learning_rate": 9.543455425218947e-06, "loss": 0.5317, "step": 15500 }, { "epoch": 0.22354178977559344, "grad_norm": 1.5376142136377524, "learning_rate": 9.542404694840794e-06, "loss": 0.5338, "step": 15510 }, { "epoch": 0.2236859172996267, "grad_norm": 2.096570210640558, "learning_rate": 9.541352814710758e-06, "loss": 0.5126, "step": 15520 }, { "epoch": 0.22383004482365998, "grad_norm": 1.8379985653998197, "learning_rate": 9.540299785095082e-06, "loss": 0.5588, "step": 15530 }, { "epoch": 0.22397417234769323, "grad_norm": 1.5238997056963048, "learning_rate": 9.539245606260308e-06, "loss": 0.4754, "step": 15540 }, { "epoch": 0.22411829987172652, "grad_norm": 1.4995417889272216, "learning_rate": 9.538190278473261e-06, "loss": 0.4932, "step": 15550 }, { "epoch": 0.22426242739575977, "grad_norm": 1.5565093651612532, "learning_rate": 9.537133802001063e-06, "loss": 0.5061, "step": 15560 }, { "epoch": 0.22440655491979303, "grad_norm": 1.5101497101160644, "learning_rate": 9.536076177111124e-06, "loss": 0.5169, "step": 15570 }, { "epoch": 0.2245506824438263, "grad_norm": 1.596232473250312, "learning_rate": 9.535017404071146e-06, "loss": 0.4989, "step": 15580 }, { "epoch": 0.22469480996785957, "grad_norm": 1.6664386818910264, "learning_rate": 9.533957483149117e-06, "loss": 0.4976, "step": 15590 }, { "epoch": 0.22483893749189282, "grad_norm": 1.5153192633363541, "learning_rate": 9.532896414613321e-06, "loss": 0.5212, "step": 15600 }, { "epoch": 0.2249830650159261, "grad_norm": 1.5649690187228502, "learning_rate": 9.53183419873233e-06, "loss": 0.5058, "step": 15610 }, { "epoch": 0.22512719253995936, "grad_norm": 1.3905664301094507, "learning_rate": 9.530770835775012e-06, "loss": 0.4916, "step": 15620 }, { "epoch": 0.22527132006399261, "grad_norm": 1.4768121813213855, "learning_rate": 9.529706326010512e-06, "loss": 0.5226, "step": 15630 }, { "epoch": 0.2254154475880259, "grad_norm": 1.084119282185439, "learning_rate": 9.528640669708282e-06, "loss": 0.5048, "step": 15640 }, { "epoch": 0.22555957511205915, "grad_norm": 1.4472735187057322, "learning_rate": 9.52757386713805e-06, "loss": 0.5161, "step": 15650 }, { "epoch": 0.2257037026360924, "grad_norm": 1.3500817734586144, "learning_rate": 9.526505918569841e-06, "loss": 0.5013, "step": 15660 }, { "epoch": 0.2258478301601257, "grad_norm": 1.7771779645173187, "learning_rate": 9.52543682427397e-06, "loss": 0.5272, "step": 15670 }, { "epoch": 0.22599195768415895, "grad_norm": 1.6540863709294313, "learning_rate": 9.524366584521042e-06, "loss": 0.5373, "step": 15680 }, { "epoch": 0.2261360852081922, "grad_norm": 1.4925984599895976, "learning_rate": 9.52329519958195e-06, "loss": 0.5043, "step": 15690 }, { "epoch": 0.22628021273222548, "grad_norm": 1.7016962122055246, "learning_rate": 9.522222669727877e-06, "loss": 0.4989, "step": 15700 }, { "epoch": 0.22642434025625874, "grad_norm": 1.4606864888094642, "learning_rate": 9.521148995230298e-06, "loss": 0.4782, "step": 15710 }, { "epoch": 0.226568467780292, "grad_norm": 1.384472213948412, "learning_rate": 9.520074176360977e-06, "loss": 0.4956, "step": 15720 }, { "epoch": 0.22671259530432528, "grad_norm": 1.6532977477379185, "learning_rate": 9.518998213391965e-06, "loss": 0.5307, "step": 15730 }, { "epoch": 0.22685672282835853, "grad_norm": 1.4611750778503152, "learning_rate": 9.517921106595605e-06, "loss": 0.4965, "step": 15740 }, { "epoch": 0.2270008503523918, "grad_norm": 1.579827377359887, "learning_rate": 9.51684285624453e-06, "loss": 0.4946, "step": 15750 }, { "epoch": 0.22714497787642507, "grad_norm": 1.8105093424766137, "learning_rate": 9.515763462611662e-06, "loss": 0.5023, "step": 15760 }, { "epoch": 0.22728910540045832, "grad_norm": 1.6364312770324878, "learning_rate": 9.51468292597021e-06, "loss": 0.496, "step": 15770 }, { "epoch": 0.22743323292449158, "grad_norm": 1.4446648191341827, "learning_rate": 9.513601246593677e-06, "loss": 0.5148, "step": 15780 }, { "epoch": 0.22757736044852486, "grad_norm": 1.3385029171670058, "learning_rate": 9.51251842475585e-06, "loss": 0.4899, "step": 15790 }, { "epoch": 0.22772148797255812, "grad_norm": 1.5455022314869118, "learning_rate": 9.51143446073081e-06, "loss": 0.5068, "step": 15800 }, { "epoch": 0.22786561549659137, "grad_norm": 1.5891262884448176, "learning_rate": 9.510349354792925e-06, "loss": 0.4771, "step": 15810 }, { "epoch": 0.22800974302062466, "grad_norm": 1.4459226362678588, "learning_rate": 9.509263107216849e-06, "loss": 0.5173, "step": 15820 }, { "epoch": 0.2281538705446579, "grad_norm": 1.552566015458476, "learning_rate": 9.508175718277529e-06, "loss": 0.4813, "step": 15830 }, { "epoch": 0.22829799806869117, "grad_norm": 1.3877108127744342, "learning_rate": 9.5070871882502e-06, "loss": 0.4829, "step": 15840 }, { "epoch": 0.22844212559272445, "grad_norm": 1.6424402444865749, "learning_rate": 9.505997517410386e-06, "loss": 0.5272, "step": 15850 }, { "epoch": 0.2285862531167577, "grad_norm": 1.5023134140751977, "learning_rate": 9.504906706033899e-06, "loss": 0.5149, "step": 15860 }, { "epoch": 0.22873038064079096, "grad_norm": 1.3318052504247386, "learning_rate": 9.50381475439684e-06, "loss": 0.4762, "step": 15870 }, { "epoch": 0.22887450816482424, "grad_norm": 1.6667460999030461, "learning_rate": 9.502721662775597e-06, "loss": 0.5166, "step": 15880 }, { "epoch": 0.2290186356888575, "grad_norm": 1.4308345194470016, "learning_rate": 9.501627431446851e-06, "loss": 0.4997, "step": 15890 }, { "epoch": 0.22916276321289075, "grad_norm": 1.7887774547266493, "learning_rate": 9.500532060687569e-06, "loss": 0.4957, "step": 15900 }, { "epoch": 0.22930689073692404, "grad_norm": 1.459438721418775, "learning_rate": 9.499435550775002e-06, "loss": 0.4899, "step": 15910 }, { "epoch": 0.2294510182609573, "grad_norm": 1.608672134412968, "learning_rate": 9.498337901986698e-06, "loss": 0.5081, "step": 15920 }, { "epoch": 0.22959514578499057, "grad_norm": 1.3866608396356963, "learning_rate": 9.497239114600484e-06, "loss": 0.5084, "step": 15930 }, { "epoch": 0.22973927330902383, "grad_norm": 1.6146113261659496, "learning_rate": 9.496139188894484e-06, "loss": 0.5097, "step": 15940 }, { "epoch": 0.22988340083305708, "grad_norm": 1.6430784643458007, "learning_rate": 9.495038125147102e-06, "loss": 0.4921, "step": 15950 }, { "epoch": 0.23002752835709037, "grad_norm": 1.49589748402394, "learning_rate": 9.493935923637038e-06, "loss": 0.4829, "step": 15960 }, { "epoch": 0.23017165588112362, "grad_norm": 1.5803823249995306, "learning_rate": 9.492832584643275e-06, "loss": 0.5185, "step": 15970 }, { "epoch": 0.23031578340515688, "grad_norm": 1.514746964603345, "learning_rate": 9.491728108445082e-06, "loss": 0.4934, "step": 15980 }, { "epoch": 0.23045991092919016, "grad_norm": 1.4767661100611067, "learning_rate": 9.490622495322022e-06, "loss": 0.512, "step": 15990 }, { "epoch": 0.23060403845322341, "grad_norm": 1.5974611581426132, "learning_rate": 9.489515745553942e-06, "loss": 0.513, "step": 16000 }, { "epoch": 0.23074816597725667, "grad_norm": 1.5171608881597796, "learning_rate": 9.488407859420977e-06, "loss": 0.5044, "step": 16010 }, { "epoch": 0.23089229350128995, "grad_norm": 1.4900908638181543, "learning_rate": 9.487298837203548e-06, "loss": 0.524, "step": 16020 }, { "epoch": 0.2310364210253232, "grad_norm": 1.564581189343271, "learning_rate": 9.486188679182367e-06, "loss": 0.5, "step": 16030 }, { "epoch": 0.23118054854935646, "grad_norm": 1.9243041387795359, "learning_rate": 9.485077385638433e-06, "loss": 0.5171, "step": 16040 }, { "epoch": 0.23132467607338975, "grad_norm": 1.556014100585604, "learning_rate": 9.48396495685303e-06, "loss": 0.5161, "step": 16050 }, { "epoch": 0.231468803597423, "grad_norm": 1.5431442397460937, "learning_rate": 9.482851393107731e-06, "loss": 0.4926, "step": 16060 }, { "epoch": 0.23161293112145626, "grad_norm": 1.4060282882380508, "learning_rate": 9.481736694684396e-06, "loss": 0.5086, "step": 16070 }, { "epoch": 0.23175705864548954, "grad_norm": 1.2787358048967061, "learning_rate": 9.480620861865172e-06, "loss": 0.5109, "step": 16080 }, { "epoch": 0.2319011861695228, "grad_norm": 1.6949173555851667, "learning_rate": 9.479503894932494e-06, "loss": 0.5161, "step": 16090 }, { "epoch": 0.23204531369355605, "grad_norm": 2.077989117460975, "learning_rate": 9.47838579416908e-06, "loss": 0.5334, "step": 16100 }, { "epoch": 0.23218944121758933, "grad_norm": 1.5652686992092895, "learning_rate": 9.477266559857945e-06, "loss": 0.4917, "step": 16110 }, { "epoch": 0.2323335687416226, "grad_norm": 1.3660148917328818, "learning_rate": 9.476146192282378e-06, "loss": 0.5166, "step": 16120 }, { "epoch": 0.23247769626565584, "grad_norm": 1.64744530898909, "learning_rate": 9.475024691725962e-06, "loss": 0.473, "step": 16130 }, { "epoch": 0.23262182378968912, "grad_norm": 1.5747373492894128, "learning_rate": 9.473902058472571e-06, "loss": 0.5301, "step": 16140 }, { "epoch": 0.23276595131372238, "grad_norm": 1.6939857867203896, "learning_rate": 9.472778292806356e-06, "loss": 0.5289, "step": 16150 }, { "epoch": 0.23291007883775564, "grad_norm": 1.4952025346139373, "learning_rate": 9.471653395011758e-06, "loss": 0.4892, "step": 16160 }, { "epoch": 0.23305420636178892, "grad_norm": 1.8224381880403397, "learning_rate": 9.47052736537351e-06, "loss": 0.4911, "step": 16170 }, { "epoch": 0.23319833388582217, "grad_norm": 1.5389294826369293, "learning_rate": 9.469400204176624e-06, "loss": 0.5111, "step": 16180 }, { "epoch": 0.23334246140985543, "grad_norm": 1.486870050889536, "learning_rate": 9.468271911706404e-06, "loss": 0.4981, "step": 16190 }, { "epoch": 0.2334865889338887, "grad_norm": 1.5708614257004443, "learning_rate": 9.467142488248437e-06, "loss": 0.5064, "step": 16200 }, { "epoch": 0.23363071645792197, "grad_norm": 1.775898922834519, "learning_rate": 9.466011934088595e-06, "loss": 0.5024, "step": 16210 }, { "epoch": 0.23377484398195522, "grad_norm": 1.7156970550427104, "learning_rate": 9.464880249513043e-06, "loss": 0.4746, "step": 16220 }, { "epoch": 0.2339189715059885, "grad_norm": 1.7513748262015763, "learning_rate": 9.463747434808227e-06, "loss": 0.5075, "step": 16230 }, { "epoch": 0.23406309903002176, "grad_norm": 1.4330769154453527, "learning_rate": 9.462613490260875e-06, "loss": 0.5091, "step": 16240 }, { "epoch": 0.23420722655405501, "grad_norm": 1.3497315994962984, "learning_rate": 9.46147841615801e-06, "loss": 0.4898, "step": 16250 }, { "epoch": 0.2343513540780883, "grad_norm": 1.7613605410860327, "learning_rate": 9.460342212786933e-06, "loss": 0.5035, "step": 16260 }, { "epoch": 0.23449548160212155, "grad_norm": 1.6808681079622434, "learning_rate": 9.45920488043524e-06, "loss": 0.5265, "step": 16270 }, { "epoch": 0.23463960912615484, "grad_norm": 1.5327800376299778, "learning_rate": 9.458066419390802e-06, "loss": 0.4968, "step": 16280 }, { "epoch": 0.2347837366501881, "grad_norm": 1.4732370009330713, "learning_rate": 9.456926829941781e-06, "loss": 0.4824, "step": 16290 }, { "epoch": 0.23492786417422135, "grad_norm": 1.6154534913952363, "learning_rate": 9.455786112376629e-06, "loss": 0.4961, "step": 16300 }, { "epoch": 0.23507199169825463, "grad_norm": 1.816170578721868, "learning_rate": 9.454644266984075e-06, "loss": 0.5023, "step": 16310 }, { "epoch": 0.23521611922228788, "grad_norm": 1.4686140159823626, "learning_rate": 9.453501294053139e-06, "loss": 0.4733, "step": 16320 }, { "epoch": 0.23536024674632114, "grad_norm": 1.5226346025747615, "learning_rate": 9.452357193873123e-06, "loss": 0.51, "step": 16330 }, { "epoch": 0.23550437427035442, "grad_norm": 1.7128954544313824, "learning_rate": 9.45121196673362e-06, "loss": 0.5256, "step": 16340 }, { "epoch": 0.23564850179438768, "grad_norm": 1.9254809911611017, "learning_rate": 9.4500656129245e-06, "loss": 0.5139, "step": 16350 }, { "epoch": 0.23579262931842093, "grad_norm": 1.5102983847378788, "learning_rate": 9.448918132735927e-06, "loss": 0.5043, "step": 16360 }, { "epoch": 0.23593675684245421, "grad_norm": 1.7455763875945467, "learning_rate": 9.44776952645834e-06, "loss": 0.5183, "step": 16370 }, { "epoch": 0.23608088436648747, "grad_norm": 1.9757553535245853, "learning_rate": 9.446619794382476e-06, "loss": 0.5253, "step": 16380 }, { "epoch": 0.23622501189052073, "grad_norm": 1.5958408383663867, "learning_rate": 9.445468936799345e-06, "loss": 0.518, "step": 16390 }, { "epoch": 0.236369139414554, "grad_norm": 1.7406107452578305, "learning_rate": 9.444316954000246e-06, "loss": 0.529, "step": 16400 }, { "epoch": 0.23651326693858726, "grad_norm": 1.8745677000010466, "learning_rate": 9.443163846276765e-06, "loss": 0.4943, "step": 16410 }, { "epoch": 0.23665739446262052, "grad_norm": 1.4583284605357187, "learning_rate": 9.442009613920773e-06, "loss": 0.5101, "step": 16420 }, { "epoch": 0.2368015219866538, "grad_norm": 1.6135394147069755, "learning_rate": 9.440854257224421e-06, "loss": 0.5157, "step": 16430 }, { "epoch": 0.23694564951068706, "grad_norm": 1.4623100371009234, "learning_rate": 9.439697776480148e-06, "loss": 0.5041, "step": 16440 }, { "epoch": 0.2370897770347203, "grad_norm": 1.5084790306716414, "learning_rate": 9.438540171980679e-06, "loss": 0.4972, "step": 16450 }, { "epoch": 0.2372339045587536, "grad_norm": 1.6563498688420963, "learning_rate": 9.437381444019016e-06, "loss": 0.5025, "step": 16460 }, { "epoch": 0.23737803208278685, "grad_norm": 1.655203614946033, "learning_rate": 9.436221592888457e-06, "loss": 0.4912, "step": 16470 }, { "epoch": 0.2375221596068201, "grad_norm": 1.542008499036839, "learning_rate": 9.435060618882576e-06, "loss": 0.5162, "step": 16480 }, { "epoch": 0.2376662871308534, "grad_norm": 1.3743140250831434, "learning_rate": 9.43389852229523e-06, "loss": 0.5139, "step": 16490 }, { "epoch": 0.23781041465488664, "grad_norm": 1.5403319374405051, "learning_rate": 9.432735303420569e-06, "loss": 0.5324, "step": 16500 }, { "epoch": 0.2379545421789199, "grad_norm": 1.4453455504440529, "learning_rate": 9.431570962553014e-06, "loss": 0.4872, "step": 16510 }, { "epoch": 0.23809866970295318, "grad_norm": 1.6649671792974063, "learning_rate": 9.430405499987285e-06, "loss": 0.4987, "step": 16520 }, { "epoch": 0.23824279722698644, "grad_norm": 3.207081957329882, "learning_rate": 9.429238916018375e-06, "loss": 0.5236, "step": 16530 }, { "epoch": 0.2383869247510197, "grad_norm": 1.3658200717322564, "learning_rate": 9.428071210941563e-06, "loss": 0.4905, "step": 16540 }, { "epoch": 0.23853105227505297, "grad_norm": 1.7188013527834827, "learning_rate": 9.426902385052414e-06, "loss": 0.4986, "step": 16550 }, { "epoch": 0.23867517979908623, "grad_norm": 1.4961485417508216, "learning_rate": 9.425732438646777e-06, "loss": 0.5015, "step": 16560 }, { "epoch": 0.23881930732311948, "grad_norm": 1.755730143447268, "learning_rate": 9.424561372020782e-06, "loss": 0.5239, "step": 16570 }, { "epoch": 0.23896343484715277, "grad_norm": 1.6794491769564235, "learning_rate": 9.423389185470844e-06, "loss": 0.4984, "step": 16580 }, { "epoch": 0.23910756237118602, "grad_norm": 1.709459698770187, "learning_rate": 9.422215879293661e-06, "loss": 0.5072, "step": 16590 }, { "epoch": 0.23925168989521928, "grad_norm": 1.6646937969229862, "learning_rate": 9.421041453786215e-06, "loss": 0.521, "step": 16600 }, { "epoch": 0.23939581741925256, "grad_norm": 1.5759405997705682, "learning_rate": 9.419865909245771e-06, "loss": 0.4862, "step": 16610 }, { "epoch": 0.23953994494328582, "grad_norm": 1.7931755236709603, "learning_rate": 9.418689245969877e-06, "loss": 0.5115, "step": 16620 }, { "epoch": 0.23968407246731907, "grad_norm": 1.379025998523165, "learning_rate": 9.417511464256366e-06, "loss": 0.4935, "step": 16630 }, { "epoch": 0.23982819999135235, "grad_norm": 1.707910125568107, "learning_rate": 9.41633256440335e-06, "loss": 0.5245, "step": 16640 }, { "epoch": 0.2399723275153856, "grad_norm": 1.7383945398579599, "learning_rate": 9.415152546709226e-06, "loss": 0.485, "step": 16650 }, { "epoch": 0.2401164550394189, "grad_norm": 1.5112286793993284, "learning_rate": 9.413971411472677e-06, "loss": 0.5097, "step": 16660 }, { "epoch": 0.24026058256345215, "grad_norm": 1.3753242928006584, "learning_rate": 9.412789158992667e-06, "loss": 0.4739, "step": 16670 }, { "epoch": 0.2404047100874854, "grad_norm": 1.574053409847868, "learning_rate": 9.41160578956844e-06, "loss": 0.5238, "step": 16680 }, { "epoch": 0.24054883761151868, "grad_norm": 1.4963547901068457, "learning_rate": 9.410421303499525e-06, "loss": 0.5319, "step": 16690 }, { "epoch": 0.24069296513555194, "grad_norm": 1.723016808432745, "learning_rate": 9.409235701085735e-06, "loss": 0.4817, "step": 16700 }, { "epoch": 0.2408370926595852, "grad_norm": 1.5659945517153373, "learning_rate": 9.40804898262716e-06, "loss": 0.4953, "step": 16710 }, { "epoch": 0.24098122018361848, "grad_norm": 1.5789806413543566, "learning_rate": 9.406861148424182e-06, "loss": 0.5121, "step": 16720 }, { "epoch": 0.24112534770765173, "grad_norm": 1.5229991337789857, "learning_rate": 9.405672198777457e-06, "loss": 0.4998, "step": 16730 }, { "epoch": 0.241269475231685, "grad_norm": 1.5743883594794497, "learning_rate": 9.404482133987927e-06, "loss": 0.4875, "step": 16740 }, { "epoch": 0.24141360275571827, "grad_norm": 1.5360376429500322, "learning_rate": 9.403290954356815e-06, "loss": 0.5139, "step": 16750 }, { "epoch": 0.24155773027975153, "grad_norm": 1.2950928476392327, "learning_rate": 9.402098660185626e-06, "loss": 0.4758, "step": 16760 }, { "epoch": 0.24170185780378478, "grad_norm": 2.0656480252032186, "learning_rate": 9.400905251776151e-06, "loss": 0.4766, "step": 16770 }, { "epoch": 0.24184598532781806, "grad_norm": 1.5677207461050926, "learning_rate": 9.399710729430456e-06, "loss": 0.5084, "step": 16780 }, { "epoch": 0.24199011285185132, "grad_norm": 1.8194136716153393, "learning_rate": 9.398515093450895e-06, "loss": 0.5243, "step": 16790 }, { "epoch": 0.24213424037588457, "grad_norm": 1.5505865045417113, "learning_rate": 9.397318344140101e-06, "loss": 0.5141, "step": 16800 }, { "epoch": 0.24227836789991786, "grad_norm": 1.3800459335226387, "learning_rate": 9.39612048180099e-06, "loss": 0.5112, "step": 16810 }, { "epoch": 0.2424224954239511, "grad_norm": 1.6928333568390435, "learning_rate": 9.394921506736759e-06, "loss": 0.4875, "step": 16820 }, { "epoch": 0.24256662294798437, "grad_norm": 1.4348544379374266, "learning_rate": 9.393721419250885e-06, "loss": 0.4731, "step": 16830 }, { "epoch": 0.24271075047201765, "grad_norm": 1.665832981770375, "learning_rate": 9.392520219647134e-06, "loss": 0.5285, "step": 16840 }, { "epoch": 0.2428548779960509, "grad_norm": 1.5560564288623844, "learning_rate": 9.39131790822954e-06, "loss": 0.5251, "step": 16850 }, { "epoch": 0.24299900552008416, "grad_norm": 1.5258753113327825, "learning_rate": 9.390114485302433e-06, "loss": 0.5212, "step": 16860 }, { "epoch": 0.24314313304411744, "grad_norm": 1.759541419660075, "learning_rate": 9.388909951170416e-06, "loss": 0.4988, "step": 16870 }, { "epoch": 0.2432872605681507, "grad_norm": 1.53764242759161, "learning_rate": 9.387704306138372e-06, "loss": 0.5408, "step": 16880 }, { "epoch": 0.24343138809218395, "grad_norm": 4.208254761854586, "learning_rate": 9.38649755051147e-06, "loss": 0.5426, "step": 16890 }, { "epoch": 0.24357551561621724, "grad_norm": 1.5018671499831584, "learning_rate": 9.38528968459516e-06, "loss": 0.4848, "step": 16900 }, { "epoch": 0.2437196431402505, "grad_norm": 1.6241399979800626, "learning_rate": 9.38408070869517e-06, "loss": 0.5217, "step": 16910 }, { "epoch": 0.24386377066428375, "grad_norm": 1.5043551867416365, "learning_rate": 9.38287062311751e-06, "loss": 0.5334, "step": 16920 }, { "epoch": 0.24400789818831703, "grad_norm": 1.6053543269781383, "learning_rate": 9.38165942816847e-06, "loss": 0.5049, "step": 16930 }, { "epoch": 0.24415202571235028, "grad_norm": 1.7771073581297285, "learning_rate": 9.380447124154623e-06, "loss": 0.4786, "step": 16940 }, { "epoch": 0.24429615323638354, "grad_norm": 1.6442872440666412, "learning_rate": 9.379233711382823e-06, "loss": 0.503, "step": 16950 }, { "epoch": 0.24444028076041682, "grad_norm": 1.6386232506410368, "learning_rate": 9.378019190160202e-06, "loss": 0.4867, "step": 16960 }, { "epoch": 0.24458440828445008, "grad_norm": 1.5079389966470202, "learning_rate": 9.376803560794173e-06, "loss": 0.5194, "step": 16970 }, { "epoch": 0.24472853580848333, "grad_norm": 1.6084787499661748, "learning_rate": 9.375586823592432e-06, "loss": 0.5102, "step": 16980 }, { "epoch": 0.24487266333251662, "grad_norm": 1.4101657780871464, "learning_rate": 9.374368978862952e-06, "loss": 0.534, "step": 16990 }, { "epoch": 0.24501679085654987, "grad_norm": 1.605515817747498, "learning_rate": 9.37315002691399e-06, "loss": 0.5015, "step": 17000 }, { "epoch": 0.24516091838058315, "grad_norm": 1.4790657011982657, "learning_rate": 9.371929968054079e-06, "loss": 0.4997, "step": 17010 }, { "epoch": 0.2453050459046164, "grad_norm": 1.7697376156675733, "learning_rate": 9.370708802592037e-06, "loss": 0.5003, "step": 17020 }, { "epoch": 0.24544917342864966, "grad_norm": 1.4598343889244616, "learning_rate": 9.369486530836958e-06, "loss": 0.5348, "step": 17030 }, { "epoch": 0.24559330095268295, "grad_norm": 1.3959584686606386, "learning_rate": 9.368263153098215e-06, "loss": 0.4825, "step": 17040 }, { "epoch": 0.2457374284767162, "grad_norm": 1.683432359294007, "learning_rate": 9.36703866968547e-06, "loss": 0.4983, "step": 17050 }, { "epoch": 0.24588155600074946, "grad_norm": 1.587009253737403, "learning_rate": 9.365813080908655e-06, "loss": 0.5148, "step": 17060 }, { "epoch": 0.24602568352478274, "grad_norm": 1.8625543357263155, "learning_rate": 9.364586387077985e-06, "loss": 0.5335, "step": 17070 }, { "epoch": 0.246169811048816, "grad_norm": 1.4649602744877235, "learning_rate": 9.363358588503954e-06, "loss": 0.5195, "step": 17080 }, { "epoch": 0.24631393857284925, "grad_norm": 1.4411403040008428, "learning_rate": 9.36212968549734e-06, "loss": 0.5053, "step": 17090 }, { "epoch": 0.24645806609688253, "grad_norm": 4.110442822173882, "learning_rate": 9.360899678369192e-06, "loss": 0.5024, "step": 17100 }, { "epoch": 0.2466021936209158, "grad_norm": 1.592793069613827, "learning_rate": 9.359668567430846e-06, "loss": 0.4971, "step": 17110 }, { "epoch": 0.24674632114494904, "grad_norm": 1.555566109554284, "learning_rate": 9.358436352993917e-06, "loss": 0.5208, "step": 17120 }, { "epoch": 0.24689044866898233, "grad_norm": 1.8325170947818263, "learning_rate": 9.357203035370294e-06, "loss": 0.5109, "step": 17130 }, { "epoch": 0.24703457619301558, "grad_norm": 1.6697604126690613, "learning_rate": 9.35596861487215e-06, "loss": 0.5156, "step": 17140 }, { "epoch": 0.24717870371704884, "grad_norm": 1.6516643640601292, "learning_rate": 9.354733091811936e-06, "loss": 0.5134, "step": 17150 }, { "epoch": 0.24732283124108212, "grad_norm": 1.6189731529452764, "learning_rate": 9.353496466502383e-06, "loss": 0.504, "step": 17160 }, { "epoch": 0.24746695876511537, "grad_norm": 1.5244737979744332, "learning_rate": 9.352258739256497e-06, "loss": 0.5044, "step": 17170 }, { "epoch": 0.24761108628914863, "grad_norm": 1.5838711523719156, "learning_rate": 9.351019910387567e-06, "loss": 0.5174, "step": 17180 }, { "epoch": 0.2477552138131819, "grad_norm": 1.3743041711708313, "learning_rate": 9.349779980209157e-06, "loss": 0.5119, "step": 17190 }, { "epoch": 0.24789934133721517, "grad_norm": 1.6072544842431593, "learning_rate": 9.348538949035117e-06, "loss": 0.4837, "step": 17200 }, { "epoch": 0.24804346886124842, "grad_norm": 1.3125991929194052, "learning_rate": 9.347296817179568e-06, "loss": 0.4867, "step": 17210 }, { "epoch": 0.2481875963852817, "grad_norm": 2.0609736781215147, "learning_rate": 9.346053584956911e-06, "loss": 0.5118, "step": 17220 }, { "epoch": 0.24833172390931496, "grad_norm": 1.5647260924053767, "learning_rate": 9.34480925268183e-06, "loss": 0.5079, "step": 17230 }, { "epoch": 0.24847585143334822, "grad_norm": 1.5355817492607204, "learning_rate": 9.343563820669284e-06, "loss": 0.5051, "step": 17240 }, { "epoch": 0.2486199789573815, "grad_norm": 1.5743980908809185, "learning_rate": 9.342317289234511e-06, "loss": 0.4885, "step": 17250 }, { "epoch": 0.24876410648141475, "grad_norm": 1.391450716445083, "learning_rate": 9.341069658693025e-06, "loss": 0.4997, "step": 17260 }, { "epoch": 0.248908234005448, "grad_norm": 1.4502864664155386, "learning_rate": 9.33982092936062e-06, "loss": 0.4976, "step": 17270 }, { "epoch": 0.2490523615294813, "grad_norm": 1.4103783871127145, "learning_rate": 9.338571101553372e-06, "loss": 0.4989, "step": 17280 }, { "epoch": 0.24919648905351455, "grad_norm": 1.451319099886889, "learning_rate": 9.337320175587629e-06, "loss": 0.4948, "step": 17290 }, { "epoch": 0.2493406165775478, "grad_norm": 1.643917808183572, "learning_rate": 9.33606815178002e-06, "loss": 0.4941, "step": 17300 }, { "epoch": 0.24948474410158109, "grad_norm": 1.4934376668142577, "learning_rate": 9.33481503044745e-06, "loss": 0.5139, "step": 17310 }, { "epoch": 0.24962887162561434, "grad_norm": 1.7724505643433568, "learning_rate": 9.333560811907106e-06, "loss": 0.4942, "step": 17320 }, { "epoch": 0.2497729991496476, "grad_norm": 1.5877752139897399, "learning_rate": 9.332305496476448e-06, "loss": 0.5183, "step": 17330 }, { "epoch": 0.24991712667368088, "grad_norm": 1.8382188745490406, "learning_rate": 9.331049084473217e-06, "loss": 0.5194, "step": 17340 }, { "epoch": 0.25006125419771413, "grad_norm": 1.6302125088284634, "learning_rate": 9.329791576215425e-06, "loss": 0.5426, "step": 17350 }, { "epoch": 0.2502053817217474, "grad_norm": 1.4154745349773192, "learning_rate": 9.328532972021374e-06, "loss": 0.5095, "step": 17360 }, { "epoch": 0.25034950924578064, "grad_norm": 1.6008501810082867, "learning_rate": 9.327273272209629e-06, "loss": 0.5022, "step": 17370 }, { "epoch": 0.25049363676981395, "grad_norm": 1.4685703132403751, "learning_rate": 9.326012477099044e-06, "loss": 0.5205, "step": 17380 }, { "epoch": 0.2506377642938472, "grad_norm": 1.692953047498698, "learning_rate": 9.324750587008744e-06, "loss": 0.4974, "step": 17390 }, { "epoch": 0.25078189181788046, "grad_norm": 1.7759561046774368, "learning_rate": 9.323487602258133e-06, "loss": 0.5349, "step": 17400 }, { "epoch": 0.2509260193419137, "grad_norm": 1.5677310997446745, "learning_rate": 9.32222352316689e-06, "loss": 0.5013, "step": 17410 }, { "epoch": 0.251070146865947, "grad_norm": 1.7475649033233198, "learning_rate": 9.320958350054974e-06, "loss": 0.501, "step": 17420 }, { "epoch": 0.25121427438998023, "grad_norm": 1.7217522441187005, "learning_rate": 9.31969208324262e-06, "loss": 0.5005, "step": 17430 }, { "epoch": 0.25135840191401354, "grad_norm": 1.5620153638380259, "learning_rate": 9.318424723050337e-06, "loss": 0.5268, "step": 17440 }, { "epoch": 0.2515025294380468, "grad_norm": 1.3978359751481368, "learning_rate": 9.317156269798917e-06, "loss": 0.4966, "step": 17450 }, { "epoch": 0.25164665696208005, "grad_norm": 1.7147521262398333, "learning_rate": 9.315886723809422e-06, "loss": 0.5145, "step": 17460 }, { "epoch": 0.2517907844861133, "grad_norm": 1.3942503075837407, "learning_rate": 9.314616085403194e-06, "loss": 0.5031, "step": 17470 }, { "epoch": 0.25193491201014656, "grad_norm": 1.671281975095723, "learning_rate": 9.313344354901851e-06, "loss": 0.5079, "step": 17480 }, { "epoch": 0.2520790395341798, "grad_norm": 1.3868618058053686, "learning_rate": 9.31207153262729e-06, "loss": 0.4692, "step": 17490 }, { "epoch": 0.2522231670582131, "grad_norm": 1.4137530289756957, "learning_rate": 9.31079761890168e-06, "loss": 0.4911, "step": 17500 }, { "epoch": 0.2523672945822464, "grad_norm": 1.4985633998679944, "learning_rate": 9.309522614047463e-06, "loss": 0.5311, "step": 17510 }, { "epoch": 0.25251142210627964, "grad_norm": 1.6445299320413598, "learning_rate": 9.30824651838737e-06, "loss": 0.4905, "step": 17520 }, { "epoch": 0.2526555496303129, "grad_norm": 1.5230524956232423, "learning_rate": 9.306969332244397e-06, "loss": 0.513, "step": 17530 }, { "epoch": 0.25279967715434615, "grad_norm": 1.5135886739728082, "learning_rate": 9.305691055941817e-06, "loss": 0.501, "step": 17540 }, { "epoch": 0.2529438046783794, "grad_norm": 1.6701529418574257, "learning_rate": 9.304411689803185e-06, "loss": 0.5044, "step": 17550 }, { "epoch": 0.2530879322024127, "grad_norm": 1.7763804680245274, "learning_rate": 9.303131234152327e-06, "loss": 0.488, "step": 17560 }, { "epoch": 0.25323205972644597, "grad_norm": 1.8477782854505, "learning_rate": 9.301849689313343e-06, "loss": 0.5133, "step": 17570 }, { "epoch": 0.2533761872504792, "grad_norm": 1.7684974180723643, "learning_rate": 9.300567055610615e-06, "loss": 0.5124, "step": 17580 }, { "epoch": 0.2535203147745125, "grad_norm": 1.5895977608841196, "learning_rate": 9.299283333368796e-06, "loss": 0.51, "step": 17590 }, { "epoch": 0.25366444229854573, "grad_norm": 5.013116439296314, "learning_rate": 9.297998522912816e-06, "loss": 0.4786, "step": 17600 }, { "epoch": 0.25380856982257904, "grad_norm": 1.6591021414781348, "learning_rate": 9.296712624567878e-06, "loss": 0.5078, "step": 17610 }, { "epoch": 0.2539526973466123, "grad_norm": 1.4845748692897895, "learning_rate": 9.295425638659467e-06, "loss": 0.5284, "step": 17620 }, { "epoch": 0.25409682487064555, "grad_norm": 1.502372600882935, "learning_rate": 9.294137565513335e-06, "loss": 0.499, "step": 17630 }, { "epoch": 0.2542409523946788, "grad_norm": 1.4397253875759073, "learning_rate": 9.292848405455512e-06, "loss": 0.5, "step": 17640 }, { "epoch": 0.25438507991871206, "grad_norm": 1.660301100565604, "learning_rate": 9.291558158812309e-06, "loss": 0.5008, "step": 17650 }, { "epoch": 0.2545292074427453, "grad_norm": 1.3849439957706033, "learning_rate": 9.2902668259103e-06, "loss": 0.4914, "step": 17660 }, { "epoch": 0.25467333496677863, "grad_norm": 1.5418728188406006, "learning_rate": 9.28897440707635e-06, "loss": 0.5047, "step": 17670 }, { "epoch": 0.2548174624908119, "grad_norm": 1.6854326748559778, "learning_rate": 9.287680902637583e-06, "loss": 0.5111, "step": 17680 }, { "epoch": 0.25496159001484514, "grad_norm": 1.4626722943052979, "learning_rate": 9.286386312921406e-06, "loss": 0.4896, "step": 17690 }, { "epoch": 0.2551057175388784, "grad_norm": 1.5846819329441417, "learning_rate": 9.2850906382555e-06, "loss": 0.5195, "step": 17700 }, { "epoch": 0.25524984506291165, "grad_norm": 1.4262458577060926, "learning_rate": 9.283793878967822e-06, "loss": 0.5291, "step": 17710 }, { "epoch": 0.2553939725869449, "grad_norm": 3.0521228693622797, "learning_rate": 9.282496035386597e-06, "loss": 0.5233, "step": 17720 }, { "epoch": 0.2555381001109782, "grad_norm": 1.4858365325075935, "learning_rate": 9.281197107840334e-06, "loss": 0.5042, "step": 17730 }, { "epoch": 0.25568222763501147, "grad_norm": 1.599754600787325, "learning_rate": 9.279897096657809e-06, "loss": 0.4883, "step": 17740 }, { "epoch": 0.2558263551590447, "grad_norm": 1.5578421128680187, "learning_rate": 9.278596002168073e-06, "loss": 0.5033, "step": 17750 }, { "epoch": 0.255970482683078, "grad_norm": 1.8247730779831075, "learning_rate": 9.277293824700455e-06, "loss": 0.4986, "step": 17760 }, { "epoch": 0.25611461020711124, "grad_norm": 1.4173459086524691, "learning_rate": 9.275990564584558e-06, "loss": 0.4926, "step": 17770 }, { "epoch": 0.2562587377311445, "grad_norm": 1.6120740910022882, "learning_rate": 9.274686222150251e-06, "loss": 0.5097, "step": 17780 }, { "epoch": 0.2564028652551778, "grad_norm": 1.6386594217965367, "learning_rate": 9.273380797727688e-06, "loss": 0.5114, "step": 17790 }, { "epoch": 0.25654699277921106, "grad_norm": 1.5116803505753602, "learning_rate": 9.272074291647292e-06, "loss": 0.5255, "step": 17800 }, { "epoch": 0.2566911203032443, "grad_norm": 1.310307578874569, "learning_rate": 9.270766704239755e-06, "loss": 0.4712, "step": 17810 }, { "epoch": 0.25683524782727757, "grad_norm": 1.6991320757149575, "learning_rate": 9.269458035836051e-06, "loss": 0.5095, "step": 17820 }, { "epoch": 0.2569793753513108, "grad_norm": 1.4665871571207352, "learning_rate": 9.268148286767426e-06, "loss": 0.5093, "step": 17830 }, { "epoch": 0.2571235028753441, "grad_norm": 1.5801008145924758, "learning_rate": 9.266837457365391e-06, "loss": 0.5091, "step": 17840 }, { "epoch": 0.2572676303993774, "grad_norm": 1.5803014187435644, "learning_rate": 9.265525547961742e-06, "loss": 0.5245, "step": 17850 }, { "epoch": 0.25741175792341064, "grad_norm": 1.5550939747290657, "learning_rate": 9.264212558888544e-06, "loss": 0.5074, "step": 17860 }, { "epoch": 0.2575558854474439, "grad_norm": 1.5871990566441019, "learning_rate": 9.26289849047813e-06, "loss": 0.5055, "step": 17870 }, { "epoch": 0.25770001297147715, "grad_norm": 1.478604814159882, "learning_rate": 9.261583343063112e-06, "loss": 0.4951, "step": 17880 }, { "epoch": 0.2578441404955104, "grad_norm": 1.638562684092191, "learning_rate": 9.260267116976376e-06, "loss": 0.5128, "step": 17890 }, { "epoch": 0.25798826801954367, "grad_norm": 1.6699774736641486, "learning_rate": 9.258949812551079e-06, "loss": 0.5358, "step": 17900 }, { "epoch": 0.258132395543577, "grad_norm": 1.4602191039678958, "learning_rate": 9.257631430120648e-06, "loss": 0.5081, "step": 17910 }, { "epoch": 0.25827652306761023, "grad_norm": 1.0268162840209836, "learning_rate": 9.25631197001879e-06, "loss": 0.4816, "step": 17920 }, { "epoch": 0.2584206505916435, "grad_norm": 1.726686660420199, "learning_rate": 9.254991432579476e-06, "loss": 0.4832, "step": 17930 }, { "epoch": 0.25856477811567674, "grad_norm": 1.5045881421880187, "learning_rate": 9.253669818136956e-06, "loss": 0.5366, "step": 17940 }, { "epoch": 0.25870890563971, "grad_norm": 1.4916566352751304, "learning_rate": 9.252347127025751e-06, "loss": 0.5222, "step": 17950 }, { "epoch": 0.2588530331637433, "grad_norm": 1.5607939101176118, "learning_rate": 9.251023359580653e-06, "loss": 0.5189, "step": 17960 }, { "epoch": 0.25899716068777656, "grad_norm": 1.566259933296684, "learning_rate": 9.24969851613673e-06, "loss": 0.4747, "step": 17970 }, { "epoch": 0.2591412882118098, "grad_norm": 1.3340758833323265, "learning_rate": 9.248372597029319e-06, "loss": 0.5019, "step": 17980 }, { "epoch": 0.25928541573584307, "grad_norm": 1.2634969443661057, "learning_rate": 9.24704560259403e-06, "loss": 0.497, "step": 17990 }, { "epoch": 0.2594295432598763, "grad_norm": 1.6090210308109356, "learning_rate": 9.245717533166745e-06, "loss": 0.518, "step": 18000 }, { "epoch": 0.2595736707839096, "grad_norm": 1.5005556988687092, "learning_rate": 9.24438838908362e-06, "loss": 0.4913, "step": 18010 }, { "epoch": 0.2597177983079429, "grad_norm": 1.6064181457480788, "learning_rate": 9.243058170681082e-06, "loss": 0.4698, "step": 18020 }, { "epoch": 0.25986192583197615, "grad_norm": 1.503336682831016, "learning_rate": 9.241726878295827e-06, "loss": 0.5023, "step": 18030 }, { "epoch": 0.2600060533560094, "grad_norm": 1.8589493338033674, "learning_rate": 9.240394512264827e-06, "loss": 0.5121, "step": 18040 }, { "epoch": 0.26015018088004266, "grad_norm": 1.670298074431591, "learning_rate": 9.239061072925326e-06, "loss": 0.4863, "step": 18050 }, { "epoch": 0.2602943084040759, "grad_norm": 1.4247015381376837, "learning_rate": 9.237726560614835e-06, "loss": 0.4983, "step": 18060 }, { "epoch": 0.26043843592810917, "grad_norm": 1.4350177285618158, "learning_rate": 9.236390975671139e-06, "loss": 0.5046, "step": 18070 }, { "epoch": 0.2605825634521425, "grad_norm": 1.4018306502889197, "learning_rate": 9.235054318432297e-06, "loss": 0.488, "step": 18080 }, { "epoch": 0.26072669097617573, "grad_norm": 1.5910656339275029, "learning_rate": 9.233716589236637e-06, "loss": 0.4805, "step": 18090 }, { "epoch": 0.260870818500209, "grad_norm": 1.6143004586855807, "learning_rate": 9.232377788422759e-06, "loss": 0.5384, "step": 18100 }, { "epoch": 0.26101494602424224, "grad_norm": 1.7258531863678679, "learning_rate": 9.231037916329532e-06, "loss": 0.5196, "step": 18110 }, { "epoch": 0.2611590735482755, "grad_norm": 1.76108834752249, "learning_rate": 9.229696973296098e-06, "loss": 0.5016, "step": 18120 }, { "epoch": 0.26130320107230876, "grad_norm": 1.585113391708516, "learning_rate": 9.228354959661873e-06, "loss": 0.5277, "step": 18130 }, { "epoch": 0.26144732859634207, "grad_norm": 1.3944205012084259, "learning_rate": 9.227011875766541e-06, "loss": 0.4956, "step": 18140 }, { "epoch": 0.2615914561203753, "grad_norm": 1.6796781482872118, "learning_rate": 9.225667721950052e-06, "loss": 0.4951, "step": 18150 }, { "epoch": 0.2617355836444086, "grad_norm": 1.60723607987115, "learning_rate": 9.224322498552638e-06, "loss": 0.4607, "step": 18160 }, { "epoch": 0.26187971116844183, "grad_norm": 1.686331595332679, "learning_rate": 9.222976205914791e-06, "loss": 0.4689, "step": 18170 }, { "epoch": 0.2620238386924751, "grad_norm": 1.6530436935259745, "learning_rate": 9.22162884437728e-06, "loss": 0.5175, "step": 18180 }, { "epoch": 0.26216796621650834, "grad_norm": 1.7037352995135173, "learning_rate": 9.220280414281144e-06, "loss": 0.5362, "step": 18190 }, { "epoch": 0.26231209374054165, "grad_norm": 1.6718220308283886, "learning_rate": 9.21893091596769e-06, "loss": 0.5175, "step": 18200 }, { "epoch": 0.2624562212645749, "grad_norm": 1.5917538886848857, "learning_rate": 9.217580349778495e-06, "loss": 0.5169, "step": 18210 }, { "epoch": 0.26260034878860816, "grad_norm": 1.584879948646271, "learning_rate": 9.21622871605541e-06, "loss": 0.5347, "step": 18220 }, { "epoch": 0.2627444763126414, "grad_norm": 1.5000868291368374, "learning_rate": 9.214876015140555e-06, "loss": 0.5033, "step": 18230 }, { "epoch": 0.2628886038366747, "grad_norm": 1.4654746635353673, "learning_rate": 9.213522247376317e-06, "loss": 0.496, "step": 18240 }, { "epoch": 0.2630327313607079, "grad_norm": 1.4972103056054207, "learning_rate": 9.212167413105356e-06, "loss": 0.4838, "step": 18250 }, { "epoch": 0.26317685888474124, "grad_norm": 1.5627305101587536, "learning_rate": 9.210811512670601e-06, "loss": 0.4882, "step": 18260 }, { "epoch": 0.2633209864087745, "grad_norm": 1.3195852658537512, "learning_rate": 9.209454546415252e-06, "loss": 0.5109, "step": 18270 }, { "epoch": 0.26346511393280775, "grad_norm": 1.5505026871245762, "learning_rate": 9.208096514682777e-06, "loss": 0.4924, "step": 18280 }, { "epoch": 0.263609241456841, "grad_norm": 1.5356253675981546, "learning_rate": 9.206737417816915e-06, "loss": 0.5128, "step": 18290 }, { "epoch": 0.26375336898087426, "grad_norm": 1.4000641451141842, "learning_rate": 9.205377256161673e-06, "loss": 0.4806, "step": 18300 }, { "epoch": 0.26389749650490757, "grad_norm": 1.3451686610777467, "learning_rate": 9.20401603006133e-06, "loss": 0.4838, "step": 18310 }, { "epoch": 0.2640416240289408, "grad_norm": 1.633554193588703, "learning_rate": 9.202653739860432e-06, "loss": 0.5304, "step": 18320 }, { "epoch": 0.2641857515529741, "grad_norm": 1.3680211766971557, "learning_rate": 9.201290385903796e-06, "loss": 0.5131, "step": 18330 }, { "epoch": 0.26432987907700733, "grad_norm": 1.4770417677363714, "learning_rate": 9.19992596853651e-06, "loss": 0.4894, "step": 18340 }, { "epoch": 0.2644740066010406, "grad_norm": 1.530010647751496, "learning_rate": 9.198560488103924e-06, "loss": 0.4944, "step": 18350 }, { "epoch": 0.26461813412507385, "grad_norm": 1.6144556792546247, "learning_rate": 9.197193944951665e-06, "loss": 0.538, "step": 18360 }, { "epoch": 0.26476226164910716, "grad_norm": 1.215033777447035, "learning_rate": 9.195826339425626e-06, "loss": 0.5169, "step": 18370 }, { "epoch": 0.2649063891731404, "grad_norm": 1.4850211699488107, "learning_rate": 9.194457671871969e-06, "loss": 0.4724, "step": 18380 }, { "epoch": 0.26505051669717367, "grad_norm": 1.530753998960096, "learning_rate": 9.193087942637122e-06, "loss": 0.5035, "step": 18390 }, { "epoch": 0.2651946442212069, "grad_norm": 1.7234810489925334, "learning_rate": 9.191717152067786e-06, "loss": 0.493, "step": 18400 }, { "epoch": 0.2653387717452402, "grad_norm": 1.6819543006395548, "learning_rate": 9.19034530051093e-06, "loss": 0.5054, "step": 18410 }, { "epoch": 0.26548289926927343, "grad_norm": 1.1992847360340635, "learning_rate": 9.18897238831379e-06, "loss": 0.4925, "step": 18420 }, { "epoch": 0.26562702679330674, "grad_norm": 1.7393205227106003, "learning_rate": 9.18759841582387e-06, "loss": 0.4839, "step": 18430 }, { "epoch": 0.26577115431734, "grad_norm": 1.4201248108669557, "learning_rate": 9.186223383388943e-06, "loss": 0.486, "step": 18440 }, { "epoch": 0.26591528184137325, "grad_norm": 1.8894247254211993, "learning_rate": 9.184847291357056e-06, "loss": 0.5325, "step": 18450 }, { "epoch": 0.2660594093654065, "grad_norm": 1.6063215438648677, "learning_rate": 9.18347014007651e-06, "loss": 0.4935, "step": 18460 }, { "epoch": 0.26620353688943976, "grad_norm": 1.370081822389216, "learning_rate": 9.18209192989589e-06, "loss": 0.503, "step": 18470 }, { "epoch": 0.266347664413473, "grad_norm": 1.5388082595938142, "learning_rate": 9.180712661164038e-06, "loss": 0.4986, "step": 18480 }, { "epoch": 0.26649179193750633, "grad_norm": 1.3653693379103453, "learning_rate": 9.179332334230071e-06, "loss": 0.5116, "step": 18490 }, { "epoch": 0.2666359194615396, "grad_norm": 1.8943998209374266, "learning_rate": 9.17795094944337e-06, "loss": 0.5104, "step": 18500 }, { "epoch": 0.26678004698557284, "grad_norm": 1.362987334098584, "learning_rate": 9.176568507153583e-06, "loss": 0.5019, "step": 18510 }, { "epoch": 0.2669241745096061, "grad_norm": 1.870579155639326, "learning_rate": 9.175185007710628e-06, "loss": 0.505, "step": 18520 }, { "epoch": 0.26706830203363935, "grad_norm": 1.5169447928290725, "learning_rate": 9.17380045146469e-06, "loss": 0.514, "step": 18530 }, { "epoch": 0.2672124295576726, "grad_norm": 1.475731060940306, "learning_rate": 9.172414838766221e-06, "loss": 0.4881, "step": 18540 }, { "epoch": 0.2673565570817059, "grad_norm": 1.749106074868346, "learning_rate": 9.17102816996594e-06, "loss": 0.5044, "step": 18550 }, { "epoch": 0.26750068460573917, "grad_norm": 1.4331857412718942, "learning_rate": 9.169640445414836e-06, "loss": 0.4977, "step": 18560 }, { "epoch": 0.2676448121297724, "grad_norm": 1.2438835333449838, "learning_rate": 9.168251665464163e-06, "loss": 0.4884, "step": 18570 }, { "epoch": 0.2677889396538057, "grad_norm": 3.5153434657456755, "learning_rate": 9.16686183046544e-06, "loss": 0.5158, "step": 18580 }, { "epoch": 0.26793306717783894, "grad_norm": 1.3100614017506174, "learning_rate": 9.165470940770458e-06, "loss": 0.5077, "step": 18590 }, { "epoch": 0.2680771947018722, "grad_norm": 2.061983716040426, "learning_rate": 9.164078996731271e-06, "loss": 0.5182, "step": 18600 }, { "epoch": 0.2682213222259055, "grad_norm": 1.701901305100135, "learning_rate": 9.162685998700202e-06, "loss": 0.4687, "step": 18610 }, { "epoch": 0.26836544974993876, "grad_norm": 1.4929558688934892, "learning_rate": 9.16129194702984e-06, "loss": 0.4989, "step": 18620 }, { "epoch": 0.268509577273972, "grad_norm": 1.4657844013915107, "learning_rate": 9.15989684207304e-06, "loss": 0.4922, "step": 18630 }, { "epoch": 0.26865370479800527, "grad_norm": 3.208820621910888, "learning_rate": 9.158500684182925e-06, "loss": 0.5066, "step": 18640 }, { "epoch": 0.2687978323220385, "grad_norm": 1.8161352875703822, "learning_rate": 9.157103473712883e-06, "loss": 0.5103, "step": 18650 }, { "epoch": 0.26894195984607183, "grad_norm": 1.6937972832061323, "learning_rate": 9.15570521101657e-06, "loss": 0.5142, "step": 18660 }, { "epoch": 0.2690860873701051, "grad_norm": 3.1733561545719695, "learning_rate": 9.154305896447908e-06, "loss": 0.5249, "step": 18670 }, { "epoch": 0.26923021489413834, "grad_norm": 1.4845878360753322, "learning_rate": 9.152905530361085e-06, "loss": 0.4673, "step": 18680 }, { "epoch": 0.2693743424181716, "grad_norm": 1.7930571496008074, "learning_rate": 9.151504113110552e-06, "loss": 0.5247, "step": 18690 }, { "epoch": 0.26951846994220485, "grad_norm": 1.6286058338357903, "learning_rate": 9.150101645051033e-06, "loss": 0.4956, "step": 18700 }, { "epoch": 0.2696625974662381, "grad_norm": 1.822966075624408, "learning_rate": 9.148698126537512e-06, "loss": 0.5165, "step": 18710 }, { "epoch": 0.2698067249902714, "grad_norm": 1.4816031476278222, "learning_rate": 9.147293557925243e-06, "loss": 0.5032, "step": 18720 }, { "epoch": 0.2699508525143047, "grad_norm": 1.4776266922380181, "learning_rate": 9.14588793956974e-06, "loss": 0.5164, "step": 18730 }, { "epoch": 0.27009498003833793, "grad_norm": 1.4191267545427637, "learning_rate": 9.14448127182679e-06, "loss": 0.5044, "step": 18740 }, { "epoch": 0.2702391075623712, "grad_norm": 1.4435287059387938, "learning_rate": 9.14307355505244e-06, "loss": 0.4975, "step": 18750 }, { "epoch": 0.27038323508640444, "grad_norm": 1.563731303661308, "learning_rate": 9.141664789603005e-06, "loss": 0.4739, "step": 18760 }, { "epoch": 0.2705273626104377, "grad_norm": 1.7779955897601298, "learning_rate": 9.140254975835066e-06, "loss": 0.5385, "step": 18770 }, { "epoch": 0.270671490134471, "grad_norm": 2.0119879111040277, "learning_rate": 9.138844114105467e-06, "loss": 0.5001, "step": 18780 }, { "epoch": 0.27081561765850426, "grad_norm": 1.6571031769451814, "learning_rate": 9.137432204771319e-06, "loss": 0.4978, "step": 18790 }, { "epoch": 0.2709597451825375, "grad_norm": 1.7310939986987341, "learning_rate": 9.136019248189999e-06, "loss": 0.5057, "step": 18800 }, { "epoch": 0.27110387270657077, "grad_norm": 1.5077200963095905, "learning_rate": 9.134605244719144e-06, "loss": 0.5056, "step": 18810 }, { "epoch": 0.271248000230604, "grad_norm": 1.4308969649126033, "learning_rate": 9.133190194716666e-06, "loss": 0.5074, "step": 18820 }, { "epoch": 0.2713921277546373, "grad_norm": 1.495919697843187, "learning_rate": 9.131774098540732e-06, "loss": 0.4931, "step": 18830 }, { "epoch": 0.2715362552786706, "grad_norm": 1.4726729802283582, "learning_rate": 9.130356956549775e-06, "loss": 0.4797, "step": 18840 }, { "epoch": 0.27168038280270385, "grad_norm": 1.5485893652429894, "learning_rate": 9.1289387691025e-06, "loss": 0.4961, "step": 18850 }, { "epoch": 0.2718245103267371, "grad_norm": 1.5570078257092363, "learning_rate": 9.127519536557871e-06, "loss": 0.4698, "step": 18860 }, { "epoch": 0.27196863785077036, "grad_norm": 1.6179454998501102, "learning_rate": 9.126099259275116e-06, "loss": 0.518, "step": 18870 }, { "epoch": 0.2721127653748036, "grad_norm": 1.2363095216903086, "learning_rate": 9.124677937613729e-06, "loss": 0.4941, "step": 18880 }, { "epoch": 0.27225689289883687, "grad_norm": 2.634395480971051, "learning_rate": 9.123255571933468e-06, "loss": 0.4965, "step": 18890 }, { "epoch": 0.2724010204228702, "grad_norm": 1.4930522310573542, "learning_rate": 9.121832162594355e-06, "loss": 0.5137, "step": 18900 }, { "epoch": 0.27254514794690343, "grad_norm": 1.541648761222855, "learning_rate": 9.12040770995668e-06, "loss": 0.5199, "step": 18910 }, { "epoch": 0.2726892754709367, "grad_norm": 1.3548550247114861, "learning_rate": 9.118982214380989e-06, "loss": 0.4973, "step": 18920 }, { "epoch": 0.27283340299496994, "grad_norm": 1.6152524842560212, "learning_rate": 9.1175556762281e-06, "loss": 0.5077, "step": 18930 }, { "epoch": 0.2729775305190032, "grad_norm": 1.7090749092611501, "learning_rate": 9.11612809585909e-06, "loss": 0.4852, "step": 18940 }, { "epoch": 0.27312165804303645, "grad_norm": 1.7129819611327721, "learning_rate": 9.114699473635303e-06, "loss": 0.4879, "step": 18950 }, { "epoch": 0.27326578556706976, "grad_norm": 2.8830790656403105, "learning_rate": 9.113269809918342e-06, "loss": 0.5032, "step": 18960 }, { "epoch": 0.273409913091103, "grad_norm": 1.2436342868687849, "learning_rate": 9.11183910507008e-06, "loss": 0.4888, "step": 18970 }, { "epoch": 0.2735540406151363, "grad_norm": 1.4697278822348159, "learning_rate": 9.110407359452647e-06, "loss": 0.5082, "step": 18980 }, { "epoch": 0.27369816813916953, "grad_norm": 1.4796483965053169, "learning_rate": 9.108974573428441e-06, "loss": 0.5232, "step": 18990 }, { "epoch": 0.2738422956632028, "grad_norm": 1.734371562201327, "learning_rate": 9.107540747360124e-06, "loss": 0.5087, "step": 19000 }, { "epoch": 0.27398642318723604, "grad_norm": 1.5403841042417938, "learning_rate": 9.106105881610617e-06, "loss": 0.4951, "step": 19010 }, { "epoch": 0.27413055071126935, "grad_norm": 1.4134467484753863, "learning_rate": 9.104669976543107e-06, "loss": 0.4829, "step": 19020 }, { "epoch": 0.2742746782353026, "grad_norm": 1.481824650903399, "learning_rate": 9.103233032521044e-06, "loss": 0.4803, "step": 19030 }, { "epoch": 0.27441880575933586, "grad_norm": 1.5595059519643488, "learning_rate": 9.10179504990814e-06, "loss": 0.4809, "step": 19040 }, { "epoch": 0.2745629332833691, "grad_norm": 1.6440346660035692, "learning_rate": 9.10035602906837e-06, "loss": 0.514, "step": 19050 }, { "epoch": 0.27470706080740237, "grad_norm": 1.4025040777596596, "learning_rate": 9.098915970365972e-06, "loss": 0.5, "step": 19060 }, { "epoch": 0.2748511883314357, "grad_norm": 1.550819166450456, "learning_rate": 9.097474874165447e-06, "loss": 0.4908, "step": 19070 }, { "epoch": 0.27499531585546894, "grad_norm": 1.5312941484159848, "learning_rate": 9.096032740831558e-06, "loss": 0.4777, "step": 19080 }, { "epoch": 0.2751394433795022, "grad_norm": 1.4301277682125255, "learning_rate": 9.094589570729334e-06, "loss": 0.467, "step": 19090 }, { "epoch": 0.27528357090353545, "grad_norm": 1.4358433550359953, "learning_rate": 9.093145364224058e-06, "loss": 0.495, "step": 19100 }, { "epoch": 0.2754276984275687, "grad_norm": 1.4712224681284654, "learning_rate": 9.091700121681283e-06, "loss": 0.4769, "step": 19110 }, { "epoch": 0.27557182595160196, "grad_norm": 1.491050459276978, "learning_rate": 9.090253843466825e-06, "loss": 0.4914, "step": 19120 }, { "epoch": 0.27571595347563527, "grad_norm": 1.4659215893545046, "learning_rate": 9.088806529946756e-06, "loss": 0.4631, "step": 19130 }, { "epoch": 0.2758600809996685, "grad_norm": 1.4412826900930369, "learning_rate": 9.087358181487413e-06, "loss": 0.5062, "step": 19140 }, { "epoch": 0.2760042085237018, "grad_norm": 1.596558984975651, "learning_rate": 9.085908798455397e-06, "loss": 0.4803, "step": 19150 }, { "epoch": 0.27614833604773503, "grad_norm": 1.483998063726885, "learning_rate": 9.084458381217565e-06, "loss": 0.4838, "step": 19160 }, { "epoch": 0.2762924635717683, "grad_norm": 1.550801893076972, "learning_rate": 9.083006930141046e-06, "loss": 0.5052, "step": 19170 }, { "epoch": 0.27643659109580154, "grad_norm": 1.6145897024840945, "learning_rate": 9.081554445593217e-06, "loss": 0.5044, "step": 19180 }, { "epoch": 0.27658071861983485, "grad_norm": 1.4655354460064496, "learning_rate": 9.08010092794173e-06, "loss": 0.5058, "step": 19190 }, { "epoch": 0.2767248461438681, "grad_norm": 1.5235452166001042, "learning_rate": 9.07864637755449e-06, "loss": 0.5052, "step": 19200 }, { "epoch": 0.27686897366790136, "grad_norm": 1.4942694798902076, "learning_rate": 9.077190794799666e-06, "loss": 0.5076, "step": 19210 }, { "epoch": 0.2770131011919346, "grad_norm": 1.219667544290117, "learning_rate": 9.075734180045687e-06, "loss": 0.4743, "step": 19220 }, { "epoch": 0.2771572287159679, "grad_norm": 1.4644701348275693, "learning_rate": 9.074276533661248e-06, "loss": 0.5106, "step": 19230 }, { "epoch": 0.27730135624000113, "grad_norm": 1.3366030860797606, "learning_rate": 9.072817856015298e-06, "loss": 0.4955, "step": 19240 }, { "epoch": 0.27744548376403444, "grad_norm": 1.7621786261117491, "learning_rate": 9.071358147477051e-06, "loss": 0.5204, "step": 19250 }, { "epoch": 0.2775896112880677, "grad_norm": 1.5463004286123467, "learning_rate": 9.069897408415985e-06, "loss": 0.4788, "step": 19260 }, { "epoch": 0.27773373881210095, "grad_norm": 1.7912170461700387, "learning_rate": 9.06843563920183e-06, "loss": 0.5183, "step": 19270 }, { "epoch": 0.2778778663361342, "grad_norm": 1.5356144724577911, "learning_rate": 9.066972840204586e-06, "loss": 0.5115, "step": 19280 }, { "epoch": 0.27802199386016746, "grad_norm": 1.378796154412908, "learning_rate": 9.065509011794508e-06, "loss": 0.4657, "step": 19290 }, { "epoch": 0.2781661213842007, "grad_norm": 1.9125201218162184, "learning_rate": 9.064044154342114e-06, "loss": 0.5038, "step": 19300 }, { "epoch": 0.278310248908234, "grad_norm": 1.258530062144723, "learning_rate": 9.06257826821818e-06, "loss": 0.5094, "step": 19310 }, { "epoch": 0.2784543764322673, "grad_norm": 1.685946550021358, "learning_rate": 9.061111353793748e-06, "loss": 0.4752, "step": 19320 }, { "epoch": 0.27859850395630054, "grad_norm": 1.4148652252316691, "learning_rate": 9.059643411440113e-06, "loss": 0.4919, "step": 19330 }, { "epoch": 0.2787426314803338, "grad_norm": 1.6559989882049582, "learning_rate": 9.058174441528834e-06, "loss": 0.4919, "step": 19340 }, { "epoch": 0.27888675900436705, "grad_norm": 1.5430088286642085, "learning_rate": 9.05670444443173e-06, "loss": 0.489, "step": 19350 }, { "epoch": 0.2790308865284003, "grad_norm": 1.510902429299377, "learning_rate": 9.05523342052088e-06, "loss": 0.4722, "step": 19360 }, { "epoch": 0.2791750140524336, "grad_norm": 1.5687591103005096, "learning_rate": 9.053761370168624e-06, "loss": 0.5233, "step": 19370 }, { "epoch": 0.27931914157646687, "grad_norm": 1.525943906388916, "learning_rate": 9.052288293747557e-06, "loss": 0.4865, "step": 19380 }, { "epoch": 0.2794632691005001, "grad_norm": 1.678307453643725, "learning_rate": 9.05081419163054e-06, "loss": 0.4965, "step": 19390 }, { "epoch": 0.2796073966245334, "grad_norm": 1.4526043897584495, "learning_rate": 9.04933906419069e-06, "loss": 0.4884, "step": 19400 }, { "epoch": 0.27975152414856663, "grad_norm": 1.2323579198863346, "learning_rate": 9.047862911801384e-06, "loss": 0.4897, "step": 19410 }, { "epoch": 0.27989565167259994, "grad_norm": 1.6198020925474452, "learning_rate": 9.046385734836257e-06, "loss": 0.5206, "step": 19420 }, { "epoch": 0.2800397791966332, "grad_norm": 1.6039085599461995, "learning_rate": 9.044907533669208e-06, "loss": 0.4924, "step": 19430 }, { "epoch": 0.28018390672066645, "grad_norm": 1.4402695625717963, "learning_rate": 9.043428308674392e-06, "loss": 0.4923, "step": 19440 }, { "epoch": 0.2803280342446997, "grad_norm": 1.6215562473710066, "learning_rate": 9.04194806022622e-06, "loss": 0.5195, "step": 19450 }, { "epoch": 0.28047216176873296, "grad_norm": 1.378804160474966, "learning_rate": 9.040466788699368e-06, "loss": 0.4833, "step": 19460 }, { "epoch": 0.2806162892927662, "grad_norm": 1.9415841329704073, "learning_rate": 9.03898449446877e-06, "loss": 0.4771, "step": 19470 }, { "epoch": 0.28076041681679953, "grad_norm": 1.4172571764434272, "learning_rate": 9.037501177909615e-06, "loss": 0.4987, "step": 19480 }, { "epoch": 0.2809045443408328, "grad_norm": 1.519906883045466, "learning_rate": 9.036016839397352e-06, "loss": 0.5028, "step": 19490 }, { "epoch": 0.28104867186486604, "grad_norm": 1.9252613178417475, "learning_rate": 9.034531479307694e-06, "loss": 0.498, "step": 19500 }, { "epoch": 0.2811927993888993, "grad_norm": 1.4351254982260173, "learning_rate": 9.033045098016602e-06, "loss": 0.4858, "step": 19510 }, { "epoch": 0.28133692691293255, "grad_norm": 1.3677406987771266, "learning_rate": 9.031557695900308e-06, "loss": 0.4844, "step": 19520 }, { "epoch": 0.2814810544369658, "grad_norm": 1.3348281271110907, "learning_rate": 9.030069273335291e-06, "loss": 0.5319, "step": 19530 }, { "epoch": 0.2816251819609991, "grad_norm": 1.6818909747521436, "learning_rate": 9.0285798306983e-06, "loss": 0.5057, "step": 19540 }, { "epoch": 0.28176930948503237, "grad_norm": 1.595940571952654, "learning_rate": 9.027089368366326e-06, "loss": 0.5038, "step": 19550 }, { "epoch": 0.2819134370090656, "grad_norm": 1.4798662419912272, "learning_rate": 9.025597886716636e-06, "loss": 0.4814, "step": 19560 }, { "epoch": 0.2820575645330989, "grad_norm": 1.7537358627376431, "learning_rate": 9.024105386126744e-06, "loss": 0.514, "step": 19570 }, { "epoch": 0.28220169205713214, "grad_norm": 1.6098013956547441, "learning_rate": 9.022611866974424e-06, "loss": 0.4774, "step": 19580 }, { "epoch": 0.2823458195811654, "grad_norm": 1.3650315834514637, "learning_rate": 9.02111732963771e-06, "loss": 0.5075, "step": 19590 }, { "epoch": 0.2824899471051987, "grad_norm": 1.5676106910704626, "learning_rate": 9.01962177449489e-06, "loss": 0.516, "step": 19600 }, { "epoch": 0.28263407462923196, "grad_norm": 1.4236965359841665, "learning_rate": 9.018125201924514e-06, "loss": 0.5071, "step": 19610 }, { "epoch": 0.2827782021532652, "grad_norm": 1.468733288999916, "learning_rate": 9.016627612305388e-06, "loss": 0.4905, "step": 19620 }, { "epoch": 0.28292232967729847, "grad_norm": 1.6632569184092447, "learning_rate": 9.01512900601657e-06, "loss": 0.5163, "step": 19630 }, { "epoch": 0.2830664572013317, "grad_norm": 1.3679429153590443, "learning_rate": 9.013629383437386e-06, "loss": 0.5036, "step": 19640 }, { "epoch": 0.283210584725365, "grad_norm": 1.580467260332999, "learning_rate": 9.012128744947408e-06, "loss": 0.514, "step": 19650 }, { "epoch": 0.2833547122493983, "grad_norm": 1.5188730386785667, "learning_rate": 9.010627090926477e-06, "loss": 0.5073, "step": 19660 }, { "epoch": 0.28349883977343154, "grad_norm": 1.3750731533683809, "learning_rate": 9.009124421754676e-06, "loss": 0.4988, "step": 19670 }, { "epoch": 0.2836429672974648, "grad_norm": 1.5555333234145237, "learning_rate": 9.007620737812361e-06, "loss": 0.5032, "step": 19680 }, { "epoch": 0.28378709482149805, "grad_norm": 1.3422754377109396, "learning_rate": 9.006116039480133e-06, "loss": 0.4957, "step": 19690 }, { "epoch": 0.2839312223455313, "grad_norm": 1.6342921681109868, "learning_rate": 9.004610327138857e-06, "loss": 0.4916, "step": 19700 }, { "epoch": 0.28407534986956456, "grad_norm": 1.5498323675973944, "learning_rate": 9.003103601169649e-06, "loss": 0.4741, "step": 19710 }, { "epoch": 0.2842194773935979, "grad_norm": 1.2521314664860372, "learning_rate": 9.001595861953887e-06, "loss": 0.4981, "step": 19720 }, { "epoch": 0.28436360491763113, "grad_norm": 1.680625507528597, "learning_rate": 9.0000871098732e-06, "loss": 0.5269, "step": 19730 }, { "epoch": 0.2845077324416644, "grad_norm": 1.5287231340094591, "learning_rate": 8.998577345309479e-06, "loss": 0.4822, "step": 19740 }, { "epoch": 0.28465185996569764, "grad_norm": 1.5746951852256406, "learning_rate": 8.997066568644865e-06, "loss": 0.5, "step": 19750 }, { "epoch": 0.2847959874897309, "grad_norm": 1.8081508825967045, "learning_rate": 8.995554780261762e-06, "loss": 0.509, "step": 19760 }, { "epoch": 0.2849401150137642, "grad_norm": 1.6789661990806297, "learning_rate": 8.994041980542825e-06, "loss": 0.5412, "step": 19770 }, { "epoch": 0.28508424253779746, "grad_norm": 1.4299440357257238, "learning_rate": 8.992528169870966e-06, "loss": 0.5019, "step": 19780 }, { "epoch": 0.2852283700618307, "grad_norm": 1.9132492478503862, "learning_rate": 8.991013348629355e-06, "loss": 0.495, "step": 19790 }, { "epoch": 0.28537249758586397, "grad_norm": 1.6030356636422898, "learning_rate": 8.989497517201417e-06, "loss": 0.4875, "step": 19800 }, { "epoch": 0.2855166251098972, "grad_norm": 1.4544820284068032, "learning_rate": 8.987980675970832e-06, "loss": 0.496, "step": 19810 }, { "epoch": 0.2856607526339305, "grad_norm": 1.4776407533954958, "learning_rate": 8.986462825321532e-06, "loss": 0.4919, "step": 19820 }, { "epoch": 0.2858048801579638, "grad_norm": 1.6124220830800748, "learning_rate": 8.984943965637711e-06, "loss": 0.4846, "step": 19830 }, { "epoch": 0.28594900768199705, "grad_norm": 1.4460058533937548, "learning_rate": 8.983424097303815e-06, "loss": 0.5154, "step": 19840 }, { "epoch": 0.2860931352060303, "grad_norm": 1.7068008000963226, "learning_rate": 8.981903220704546e-06, "loss": 0.5212, "step": 19850 }, { "epoch": 0.28623726273006356, "grad_norm": 1.366221997928728, "learning_rate": 8.980381336224862e-06, "loss": 0.482, "step": 19860 }, { "epoch": 0.2863813902540968, "grad_norm": 1.4177769021945426, "learning_rate": 8.978858444249975e-06, "loss": 0.4982, "step": 19870 }, { "epoch": 0.28652551777813007, "grad_norm": 1.4096050566340799, "learning_rate": 8.97733454516535e-06, "loss": 0.4844, "step": 19880 }, { "epoch": 0.2866696453021634, "grad_norm": 1.476559183793358, "learning_rate": 8.975809639356712e-06, "loss": 0.4665, "step": 19890 }, { "epoch": 0.28681377282619663, "grad_norm": 1.6605736582448336, "learning_rate": 8.974283727210035e-06, "loss": 0.4822, "step": 19900 }, { "epoch": 0.2869579003502299, "grad_norm": 1.6463344840556722, "learning_rate": 8.972756809111553e-06, "loss": 0.4775, "step": 19910 }, { "epoch": 0.28710202787426314, "grad_norm": 1.5719993762221478, "learning_rate": 8.971228885447751e-06, "loss": 0.4909, "step": 19920 }, { "epoch": 0.2872461553982964, "grad_norm": 1.6279435913791416, "learning_rate": 8.96969995660537e-06, "loss": 0.4589, "step": 19930 }, { "epoch": 0.28739028292232965, "grad_norm": 1.4725493881326277, "learning_rate": 8.968170022971406e-06, "loss": 0.5046, "step": 19940 }, { "epoch": 0.28753441044636296, "grad_norm": 1.6477571196356775, "learning_rate": 8.966639084933106e-06, "loss": 0.5053, "step": 19950 }, { "epoch": 0.2876785379703962, "grad_norm": 1.563509325133638, "learning_rate": 8.965107142877977e-06, "loss": 0.4968, "step": 19960 }, { "epoch": 0.2878226654944295, "grad_norm": 1.676701061106771, "learning_rate": 8.963574197193777e-06, "loss": 0.5036, "step": 19970 }, { "epoch": 0.28796679301846273, "grad_norm": 1.5149273705098099, "learning_rate": 8.962040248268516e-06, "loss": 0.5157, "step": 19980 }, { "epoch": 0.288110920542496, "grad_norm": 1.556271634896753, "learning_rate": 8.960505296490458e-06, "loss": 0.518, "step": 19990 }, { "epoch": 0.28825504806652924, "grad_norm": 2.7848908741916394, "learning_rate": 8.958969342248127e-06, "loss": 0.5064, "step": 20000 }, { "epoch": 0.28839917559056255, "grad_norm": 1.4728772525070326, "learning_rate": 8.957432385930294e-06, "loss": 0.4479, "step": 20010 }, { "epoch": 0.2885433031145958, "grad_norm": 1.7216084211779639, "learning_rate": 8.955894427925985e-06, "loss": 0.4755, "step": 20020 }, { "epoch": 0.28868743063862906, "grad_norm": 1.4076271943224912, "learning_rate": 8.954355468624484e-06, "loss": 0.5085, "step": 20030 }, { "epoch": 0.2888315581626623, "grad_norm": 1.3725373793404527, "learning_rate": 8.952815508415322e-06, "loss": 0.4682, "step": 20040 }, { "epoch": 0.28897568568669557, "grad_norm": 1.527745460691309, "learning_rate": 8.951274547688289e-06, "loss": 0.4708, "step": 20050 }, { "epoch": 0.2891198132107288, "grad_norm": 1.4233259983003106, "learning_rate": 8.949732586833422e-06, "loss": 0.4833, "step": 20060 }, { "epoch": 0.28926394073476214, "grad_norm": 1.1231312856203979, "learning_rate": 8.94818962624102e-06, "loss": 0.467, "step": 20070 }, { "epoch": 0.2894080682587954, "grad_norm": 1.6823940977915461, "learning_rate": 8.946645666301624e-06, "loss": 0.4932, "step": 20080 }, { "epoch": 0.28955219578282865, "grad_norm": 1.708705106160182, "learning_rate": 8.945100707406037e-06, "loss": 0.4996, "step": 20090 }, { "epoch": 0.2896963233068619, "grad_norm": 1.5779932863385904, "learning_rate": 8.943554749945314e-06, "loss": 0.4963, "step": 20100 }, { "epoch": 0.28984045083089516, "grad_norm": 1.2911125508968466, "learning_rate": 8.942007794310756e-06, "loss": 0.5059, "step": 20110 }, { "epoch": 0.28998457835492847, "grad_norm": 1.5950379495577112, "learning_rate": 8.940459840893923e-06, "loss": 0.5133, "step": 20120 }, { "epoch": 0.2901287058789617, "grad_norm": 1.6290150724215344, "learning_rate": 8.938910890086626e-06, "loss": 0.4745, "step": 20130 }, { "epoch": 0.290272833402995, "grad_norm": 1.3162953651657021, "learning_rate": 8.937360942280927e-06, "loss": 0.5082, "step": 20140 }, { "epoch": 0.29041696092702823, "grad_norm": 1.3360248110963113, "learning_rate": 8.935809997869142e-06, "loss": 0.4749, "step": 20150 }, { "epoch": 0.2905610884510615, "grad_norm": 1.8936891909814384, "learning_rate": 8.934258057243839e-06, "loss": 0.5065, "step": 20160 }, { "epoch": 0.29070521597509474, "grad_norm": 1.581497419219118, "learning_rate": 8.932705120797836e-06, "loss": 0.5182, "step": 20170 }, { "epoch": 0.29084934349912805, "grad_norm": 1.2682759944985327, "learning_rate": 8.931151188924208e-06, "loss": 0.4609, "step": 20180 }, { "epoch": 0.2909934710231613, "grad_norm": 1.4655840280784815, "learning_rate": 8.929596262016275e-06, "loss": 0.459, "step": 20190 }, { "epoch": 0.29113759854719456, "grad_norm": 1.656588578029911, "learning_rate": 8.928040340467617e-06, "loss": 0.4926, "step": 20200 }, { "epoch": 0.2912817260712278, "grad_norm": 1.4059582886536979, "learning_rate": 8.926483424672059e-06, "loss": 0.4697, "step": 20210 }, { "epoch": 0.2914258535952611, "grad_norm": 1.5915520765619036, "learning_rate": 8.92492551502368e-06, "loss": 0.5165, "step": 20220 }, { "epoch": 0.29156998111929433, "grad_norm": 1.4265825365279614, "learning_rate": 8.923366611916812e-06, "loss": 0.4876, "step": 20230 }, { "epoch": 0.29171410864332764, "grad_norm": 1.8260035218143407, "learning_rate": 8.921806715746035e-06, "loss": 0.4922, "step": 20240 }, { "epoch": 0.2918582361673609, "grad_norm": 1.663222317548681, "learning_rate": 8.920245826906184e-06, "loss": 0.5227, "step": 20250 }, { "epoch": 0.29200236369139415, "grad_norm": 1.5349739712195125, "learning_rate": 8.918683945792345e-06, "loss": 0.5016, "step": 20260 }, { "epoch": 0.2921464912154274, "grad_norm": 1.7039417026212975, "learning_rate": 8.917121072799851e-06, "loss": 0.5076, "step": 20270 }, { "epoch": 0.29229061873946066, "grad_norm": 1.4675720391292568, "learning_rate": 8.915557208324291e-06, "loss": 0.4978, "step": 20280 }, { "epoch": 0.2924347462634939, "grad_norm": 1.5257905356913974, "learning_rate": 8.913992352761502e-06, "loss": 0.4829, "step": 20290 }, { "epoch": 0.2925788737875272, "grad_norm": 1.545615233457301, "learning_rate": 8.912426506507575e-06, "loss": 0.5128, "step": 20300 }, { "epoch": 0.2927230013115605, "grad_norm": 1.533614819833131, "learning_rate": 8.910859669958845e-06, "loss": 0.4995, "step": 20310 }, { "epoch": 0.29286712883559374, "grad_norm": 1.6599121646900565, "learning_rate": 8.909291843511907e-06, "loss": 0.5112, "step": 20320 }, { "epoch": 0.293011256359627, "grad_norm": 1.4436829320723168, "learning_rate": 8.9077230275636e-06, "loss": 0.4804, "step": 20330 }, { "epoch": 0.29315538388366025, "grad_norm": 1.3564072529513493, "learning_rate": 8.906153222511014e-06, "loss": 0.477, "step": 20340 }, { "epoch": 0.2932995114076935, "grad_norm": 1.5672769925028989, "learning_rate": 8.904582428751491e-06, "loss": 0.4999, "step": 20350 }, { "epoch": 0.2934436389317268, "grad_norm": 1.4041605256248981, "learning_rate": 8.903010646682625e-06, "loss": 0.505, "step": 20360 }, { "epoch": 0.29358776645576007, "grad_norm": 1.5233502799871734, "learning_rate": 8.901437876702255e-06, "loss": 0.516, "step": 20370 }, { "epoch": 0.2937318939797933, "grad_norm": 1.4043378054726245, "learning_rate": 8.899864119208475e-06, "loss": 0.4885, "step": 20380 }, { "epoch": 0.2938760215038266, "grad_norm": 1.3900589850426064, "learning_rate": 8.898289374599627e-06, "loss": 0.5037, "step": 20390 }, { "epoch": 0.29402014902785983, "grad_norm": 1.5264211064499844, "learning_rate": 8.896713643274303e-06, "loss": 0.5142, "step": 20400 }, { "epoch": 0.2941642765518931, "grad_norm": 1.2837111212617953, "learning_rate": 8.895136925631343e-06, "loss": 0.4809, "step": 20410 }, { "epoch": 0.2943084040759264, "grad_norm": 1.3341349944190422, "learning_rate": 8.893559222069838e-06, "loss": 0.4806, "step": 20420 }, { "epoch": 0.29445253159995965, "grad_norm": 1.411752274917171, "learning_rate": 8.891980532989134e-06, "loss": 0.5164, "step": 20430 }, { "epoch": 0.2945966591239929, "grad_norm": 1.4938056608155792, "learning_rate": 8.890400858788815e-06, "loss": 0.4923, "step": 20440 }, { "epoch": 0.29474078664802617, "grad_norm": 1.6808927597935703, "learning_rate": 8.888820199868722e-06, "loss": 0.5124, "step": 20450 }, { "epoch": 0.2948849141720594, "grad_norm": 1.5106636227321932, "learning_rate": 8.887238556628947e-06, "loss": 0.493, "step": 20460 }, { "epoch": 0.2950290416960927, "grad_norm": 1.6097011740278122, "learning_rate": 8.885655929469825e-06, "loss": 0.5322, "step": 20470 }, { "epoch": 0.295173169220126, "grad_norm": 1.558448580201426, "learning_rate": 8.884072318791943e-06, "loss": 0.5141, "step": 20480 }, { "epoch": 0.29531729674415924, "grad_norm": 1.5411549100664048, "learning_rate": 8.882487724996141e-06, "loss": 0.5006, "step": 20490 }, { "epoch": 0.2954614242681925, "grad_norm": 1.7293069125237155, "learning_rate": 8.880902148483497e-06, "loss": 0.4976, "step": 20500 }, { "epoch": 0.29560555179222575, "grad_norm": 1.3984789843515628, "learning_rate": 8.879315589655351e-06, "loss": 0.4866, "step": 20510 }, { "epoch": 0.295749679316259, "grad_norm": 1.646637491282316, "learning_rate": 8.877728048913281e-06, "loss": 0.5008, "step": 20520 }, { "epoch": 0.2958938068402923, "grad_norm": 1.3752629067159061, "learning_rate": 8.876139526659121e-06, "loss": 0.5116, "step": 20530 }, { "epoch": 0.2960379343643256, "grad_norm": 1.544570408678835, "learning_rate": 8.874550023294947e-06, "loss": 0.4918, "step": 20540 }, { "epoch": 0.2961820618883588, "grad_norm": 1.5376964455743978, "learning_rate": 8.872959539223091e-06, "loss": 0.5011, "step": 20550 }, { "epoch": 0.2963261894123921, "grad_norm": 1.4516956124236213, "learning_rate": 8.871368074846124e-06, "loss": 0.4917, "step": 20560 }, { "epoch": 0.29647031693642534, "grad_norm": 1.5299763597695366, "learning_rate": 8.869775630566872e-06, "loss": 0.4961, "step": 20570 }, { "epoch": 0.2966144444604586, "grad_norm": 1.3536655336782462, "learning_rate": 8.868182206788407e-06, "loss": 0.4757, "step": 20580 }, { "epoch": 0.2967585719844919, "grad_norm": 1.4471443100796064, "learning_rate": 8.866587803914048e-06, "loss": 0.4927, "step": 20590 }, { "epoch": 0.29690269950852516, "grad_norm": 1.6251369067517465, "learning_rate": 8.864992422347362e-06, "loss": 0.4703, "step": 20600 }, { "epoch": 0.2970468270325584, "grad_norm": 1.5423744654312292, "learning_rate": 8.86339606249217e-06, "loss": 0.5131, "step": 20610 }, { "epoch": 0.29719095455659167, "grad_norm": 1.5212421851663225, "learning_rate": 8.861798724752527e-06, "loss": 0.4962, "step": 20620 }, { "epoch": 0.2973350820806249, "grad_norm": 1.519435289898147, "learning_rate": 8.86020040953275e-06, "loss": 0.5021, "step": 20630 }, { "epoch": 0.2974792096046582, "grad_norm": 1.5130049807928827, "learning_rate": 8.858601117237391e-06, "loss": 0.4938, "step": 20640 }, { "epoch": 0.2976233371286915, "grad_norm": 1.6135427682913106, "learning_rate": 8.85700084827126e-06, "loss": 0.4962, "step": 20650 }, { "epoch": 0.29776746465272474, "grad_norm": 1.5462607175040044, "learning_rate": 8.855399603039407e-06, "loss": 0.4897, "step": 20660 }, { "epoch": 0.297911592176758, "grad_norm": 1.7355092238263048, "learning_rate": 8.853797381947134e-06, "loss": 0.5142, "step": 20670 }, { "epoch": 0.29805571970079126, "grad_norm": 1.3907043909427113, "learning_rate": 8.852194185399986e-06, "loss": 0.5063, "step": 20680 }, { "epoch": 0.2981998472248245, "grad_norm": 1.4728662346987198, "learning_rate": 8.850590013803756e-06, "loss": 0.4689, "step": 20690 }, { "epoch": 0.29834397474885777, "grad_norm": 1.98741866004667, "learning_rate": 8.848984867564485e-06, "loss": 0.5271, "step": 20700 }, { "epoch": 0.2984881022728911, "grad_norm": 1.687948129937522, "learning_rate": 8.84737874708846e-06, "loss": 0.5177, "step": 20710 }, { "epoch": 0.29863222979692433, "grad_norm": 1.5143027179833264, "learning_rate": 8.845771652782214e-06, "loss": 0.526, "step": 20720 }, { "epoch": 0.2987763573209576, "grad_norm": 1.3378659609209107, "learning_rate": 8.844163585052527e-06, "loss": 0.4682, "step": 20730 }, { "epoch": 0.29892048484499084, "grad_norm": 1.6136819431865062, "learning_rate": 8.842554544306428e-06, "loss": 0.4606, "step": 20740 }, { "epoch": 0.2990646123690241, "grad_norm": 1.662217222397663, "learning_rate": 8.840944530951187e-06, "loss": 0.4941, "step": 20750 }, { "epoch": 0.29920873989305735, "grad_norm": 1.3978744953141249, "learning_rate": 8.839333545394324e-06, "loss": 0.4979, "step": 20760 }, { "epoch": 0.29935286741709066, "grad_norm": 1.4209105021452664, "learning_rate": 8.837721588043602e-06, "loss": 0.5106, "step": 20770 }, { "epoch": 0.2994969949411239, "grad_norm": 1.6756261234002001, "learning_rate": 8.836108659307034e-06, "loss": 0.4818, "step": 20780 }, { "epoch": 0.2996411224651572, "grad_norm": 1.4624787189773887, "learning_rate": 8.834494759592877e-06, "loss": 0.479, "step": 20790 }, { "epoch": 0.2997852499891904, "grad_norm": 1.5180934423707855, "learning_rate": 8.832879889309633e-06, "loss": 0.5107, "step": 20800 }, { "epoch": 0.2999293775132237, "grad_norm": 1.588350504605968, "learning_rate": 8.831264048866052e-06, "loss": 0.4972, "step": 20810 }, { "epoch": 0.30007350503725694, "grad_norm": 1.5963897275329704, "learning_rate": 8.829647238671124e-06, "loss": 0.5135, "step": 20820 }, { "epoch": 0.30021763256129025, "grad_norm": 1.6489052863429585, "learning_rate": 8.828029459134092e-06, "loss": 0.501, "step": 20830 }, { "epoch": 0.3003617600853235, "grad_norm": 1.5651533743641382, "learning_rate": 8.826410710664439e-06, "loss": 0.4952, "step": 20840 }, { "epoch": 0.30050588760935676, "grad_norm": 1.79889855476252, "learning_rate": 8.824790993671894e-06, "loss": 0.5225, "step": 20850 }, { "epoch": 0.30065001513339, "grad_norm": 1.477167512324674, "learning_rate": 8.823170308566434e-06, "loss": 0.4801, "step": 20860 }, { "epoch": 0.30079414265742327, "grad_norm": 1.4361609758624594, "learning_rate": 8.82154865575828e-06, "loss": 0.4621, "step": 20870 }, { "epoch": 0.3009382701814566, "grad_norm": 1.4307599992247593, "learning_rate": 8.819926035657893e-06, "loss": 0.4732, "step": 20880 }, { "epoch": 0.30108239770548983, "grad_norm": 1.547664676964735, "learning_rate": 8.818302448675987e-06, "loss": 0.4987, "step": 20890 }, { "epoch": 0.3012265252295231, "grad_norm": 1.4918273512176015, "learning_rate": 8.816677895223513e-06, "loss": 0.4777, "step": 20900 }, { "epoch": 0.30137065275355635, "grad_norm": 1.6235207634028628, "learning_rate": 8.815052375711674e-06, "loss": 0.4885, "step": 20910 }, { "epoch": 0.3015147802775896, "grad_norm": 1.679607032095568, "learning_rate": 8.81342589055191e-06, "loss": 0.4902, "step": 20920 }, { "epoch": 0.30165890780162286, "grad_norm": 1.5487643008613368, "learning_rate": 8.811798440155913e-06, "loss": 0.4964, "step": 20930 }, { "epoch": 0.30180303532565617, "grad_norm": 1.8400744174151087, "learning_rate": 8.810170024935612e-06, "loss": 0.4801, "step": 20940 }, { "epoch": 0.3019471628496894, "grad_norm": 1.7191841745812453, "learning_rate": 8.808540645303187e-06, "loss": 0.4843, "step": 20950 }, { "epoch": 0.3020912903737227, "grad_norm": 1.4093699120203977, "learning_rate": 8.806910301671056e-06, "loss": 0.5018, "step": 20960 }, { "epoch": 0.30223541789775593, "grad_norm": 1.6058868779779802, "learning_rate": 8.805278994451886e-06, "loss": 0.4953, "step": 20970 }, { "epoch": 0.3023795454217892, "grad_norm": 1.5183218966145369, "learning_rate": 8.803646724058584e-06, "loss": 0.4791, "step": 20980 }, { "epoch": 0.30252367294582244, "grad_norm": 1.3602354840100332, "learning_rate": 8.802013490904303e-06, "loss": 0.4862, "step": 20990 }, { "epoch": 0.30266780046985575, "grad_norm": 1.5077113864832208, "learning_rate": 8.80037929540244e-06, "loss": 0.5276, "step": 21000 }, { "epoch": 0.302811927993889, "grad_norm": 1.4652162685041534, "learning_rate": 8.798744137966634e-06, "loss": 0.4851, "step": 21010 }, { "epoch": 0.30295605551792226, "grad_norm": 1.805102917363076, "learning_rate": 8.797108019010768e-06, "loss": 0.5141, "step": 21020 }, { "epoch": 0.3031001830419555, "grad_norm": 1.4532432091564367, "learning_rate": 8.79547093894897e-06, "loss": 0.5111, "step": 21030 }, { "epoch": 0.3032443105659888, "grad_norm": 1.7947209556338093, "learning_rate": 8.793832898195609e-06, "loss": 0.4798, "step": 21040 }, { "epoch": 0.30338843809002203, "grad_norm": 1.5105360962246122, "learning_rate": 8.792193897165298e-06, "loss": 0.498, "step": 21050 }, { "epoch": 0.30353256561405534, "grad_norm": 1.1948749970040997, "learning_rate": 8.790553936272894e-06, "loss": 0.4922, "step": 21060 }, { "epoch": 0.3036766931380886, "grad_norm": 1.2974287093946082, "learning_rate": 8.788913015933495e-06, "loss": 0.4858, "step": 21070 }, { "epoch": 0.30382082066212185, "grad_norm": 1.4375652199522693, "learning_rate": 8.787271136562443e-06, "loss": 0.4684, "step": 21080 }, { "epoch": 0.3039649481861551, "grad_norm": 1.506689875875043, "learning_rate": 8.785628298575324e-06, "loss": 0.5014, "step": 21090 }, { "epoch": 0.30410907571018836, "grad_norm": 1.60369090213001, "learning_rate": 8.783984502387964e-06, "loss": 0.4882, "step": 21100 }, { "epoch": 0.3042532032342216, "grad_norm": 1.4961838241586631, "learning_rate": 8.782339748416432e-06, "loss": 0.4982, "step": 21110 }, { "epoch": 0.3043973307582549, "grad_norm": 1.2761813087814138, "learning_rate": 8.780694037077043e-06, "loss": 0.517, "step": 21120 }, { "epoch": 0.3045414582822882, "grad_norm": 1.4302013839273677, "learning_rate": 8.779047368786352e-06, "loss": 0.4826, "step": 21130 }, { "epoch": 0.30468558580632144, "grad_norm": 1.433608745162409, "learning_rate": 8.77739974396115e-06, "loss": 0.4982, "step": 21140 }, { "epoch": 0.3048297133303547, "grad_norm": 1.6844561760489716, "learning_rate": 8.775751163018483e-06, "loss": 0.498, "step": 21150 }, { "epoch": 0.30497384085438795, "grad_norm": 1.6124540840295762, "learning_rate": 8.774101626375629e-06, "loss": 0.4735, "step": 21160 }, { "epoch": 0.3051179683784212, "grad_norm": 3.4107103792472784, "learning_rate": 8.77245113445011e-06, "loss": 0.4946, "step": 21170 }, { "epoch": 0.3052620959024545, "grad_norm": 1.3963286318022958, "learning_rate": 8.770799687659693e-06, "loss": 0.488, "step": 21180 }, { "epoch": 0.30540622342648777, "grad_norm": 1.3416896167536452, "learning_rate": 8.769147286422381e-06, "loss": 0.4873, "step": 21190 }, { "epoch": 0.305550350950521, "grad_norm": 1.8561729539560676, "learning_rate": 8.767493931156424e-06, "loss": 0.4836, "step": 21200 }, { "epoch": 0.3056944784745543, "grad_norm": 1.48673413720329, "learning_rate": 8.765839622280312e-06, "loss": 0.4791, "step": 21210 }, { "epoch": 0.30583860599858753, "grad_norm": 1.6227802713798987, "learning_rate": 8.764184360212776e-06, "loss": 0.4949, "step": 21220 }, { "epoch": 0.30598273352262084, "grad_norm": 1.5805566148973316, "learning_rate": 8.762528145372786e-06, "loss": 0.5002, "step": 21230 }, { "epoch": 0.3061268610466541, "grad_norm": 1.5719093648016749, "learning_rate": 8.760870978179557e-06, "loss": 0.4813, "step": 21240 }, { "epoch": 0.30627098857068735, "grad_norm": 1.5814596355309298, "learning_rate": 8.759212859052543e-06, "loss": 0.5091, "step": 21250 }, { "epoch": 0.3064151160947206, "grad_norm": 2.254323030893624, "learning_rate": 8.757553788411439e-06, "loss": 0.4971, "step": 21260 }, { "epoch": 0.30655924361875386, "grad_norm": 1.6732752980910144, "learning_rate": 8.755893766676181e-06, "loss": 0.4841, "step": 21270 }, { "epoch": 0.3067033711427871, "grad_norm": 1.689241277858554, "learning_rate": 8.754232794266944e-06, "loss": 0.5021, "step": 21280 }, { "epoch": 0.30684749866682043, "grad_norm": 1.4869796732731848, "learning_rate": 8.75257087160415e-06, "loss": 0.4816, "step": 21290 }, { "epoch": 0.3069916261908537, "grad_norm": 1.6438646975388946, "learning_rate": 8.750907999108454e-06, "loss": 0.4843, "step": 21300 }, { "epoch": 0.30713575371488694, "grad_norm": 1.4353020075866323, "learning_rate": 8.749244177200752e-06, "loss": 0.4567, "step": 21310 }, { "epoch": 0.3072798812389202, "grad_norm": 2.0913283524723054, "learning_rate": 8.747579406302187e-06, "loss": 0.5029, "step": 21320 }, { "epoch": 0.30742400876295345, "grad_norm": 1.5011937617070585, "learning_rate": 8.745913686834137e-06, "loss": 0.4893, "step": 21330 }, { "epoch": 0.3075681362869867, "grad_norm": 1.470821512582109, "learning_rate": 8.744247019218219e-06, "loss": 0.5048, "step": 21340 }, { "epoch": 0.30771226381102, "grad_norm": 1.6771579437430089, "learning_rate": 8.742579403876294e-06, "loss": 0.5056, "step": 21350 }, { "epoch": 0.30785639133505327, "grad_norm": 1.5596081678709874, "learning_rate": 8.740910841230458e-06, "loss": 0.4964, "step": 21360 }, { "epoch": 0.3080005188590865, "grad_norm": 1.8118012398321897, "learning_rate": 8.739241331703054e-06, "loss": 0.5137, "step": 21370 }, { "epoch": 0.3081446463831198, "grad_norm": 1.6343032020712847, "learning_rate": 8.73757087571666e-06, "loss": 0.4786, "step": 21380 }, { "epoch": 0.30828877390715304, "grad_norm": 1.6904750273031843, "learning_rate": 8.735899473694087e-06, "loss": 0.4913, "step": 21390 }, { "epoch": 0.3084329014311863, "grad_norm": 1.4474032996520716, "learning_rate": 8.7342271260584e-06, "loss": 0.5085, "step": 21400 }, { "epoch": 0.3085770289552196, "grad_norm": 1.5942447293593607, "learning_rate": 8.732553833232893e-06, "loss": 0.5028, "step": 21410 }, { "epoch": 0.30872115647925286, "grad_norm": 1.4412895439393716, "learning_rate": 8.7308795956411e-06, "loss": 0.5003, "step": 21420 }, { "epoch": 0.3088652840032861, "grad_norm": 1.6996646756614373, "learning_rate": 8.7292044137068e-06, "loss": 0.4744, "step": 21430 }, { "epoch": 0.30900941152731937, "grad_norm": 1.7352512532853948, "learning_rate": 8.727528287854004e-06, "loss": 0.5057, "step": 21440 }, { "epoch": 0.3091535390513526, "grad_norm": 1.5731243929431544, "learning_rate": 8.725851218506967e-06, "loss": 0.506, "step": 21450 }, { "epoch": 0.3092976665753859, "grad_norm": 1.5291605311602339, "learning_rate": 8.724173206090178e-06, "loss": 0.4675, "step": 21460 }, { "epoch": 0.3094417940994192, "grad_norm": 1.4751384283493085, "learning_rate": 8.72249425102837e-06, "loss": 0.5021, "step": 21470 }, { "epoch": 0.30958592162345244, "grad_norm": 1.7625008144078511, "learning_rate": 8.720814353746512e-06, "loss": 0.4854, "step": 21480 }, { "epoch": 0.3097300491474857, "grad_norm": 2.472892300240524, "learning_rate": 8.71913351466981e-06, "loss": 0.5076, "step": 21490 }, { "epoch": 0.30987417667151895, "grad_norm": 1.5982982224105937, "learning_rate": 8.717451734223714e-06, "loss": 0.5369, "step": 21500 }, { "epoch": 0.3100183041955522, "grad_norm": 1.6649307219010152, "learning_rate": 8.715769012833903e-06, "loss": 0.465, "step": 21510 }, { "epoch": 0.31016243171958546, "grad_norm": 1.4522540257334458, "learning_rate": 8.714085350926303e-06, "loss": 0.4998, "step": 21520 }, { "epoch": 0.3103065592436188, "grad_norm": 2.2746570677382936, "learning_rate": 8.712400748927072e-06, "loss": 0.4978, "step": 21530 }, { "epoch": 0.31045068676765203, "grad_norm": 1.3500038957961806, "learning_rate": 8.71071520726261e-06, "loss": 0.4781, "step": 21540 }, { "epoch": 0.3105948142916853, "grad_norm": 1.6961664518294628, "learning_rate": 8.709028726359554e-06, "loss": 0.508, "step": 21550 }, { "epoch": 0.31073894181571854, "grad_norm": 1.2877575529491396, "learning_rate": 8.707341306644776e-06, "loss": 0.4639, "step": 21560 }, { "epoch": 0.3108830693397518, "grad_norm": 1.2373730083261323, "learning_rate": 8.70565294854539e-06, "loss": 0.4668, "step": 21570 }, { "epoch": 0.3110271968637851, "grad_norm": 1.6263509390090973, "learning_rate": 8.703963652488741e-06, "loss": 0.5165, "step": 21580 }, { "epoch": 0.31117132438781836, "grad_norm": 1.480886832203662, "learning_rate": 8.70227341890242e-06, "loss": 0.5113, "step": 21590 }, { "epoch": 0.3113154519118516, "grad_norm": 1.4701540070707182, "learning_rate": 8.70058224821425e-06, "loss": 0.4854, "step": 21600 }, { "epoch": 0.31145957943588487, "grad_norm": 1.6244584432218485, "learning_rate": 8.698890140852292e-06, "loss": 0.5058, "step": 21610 }, { "epoch": 0.3116037069599181, "grad_norm": 2.1620742286899826, "learning_rate": 8.697197097244843e-06, "loss": 0.5209, "step": 21620 }, { "epoch": 0.3117478344839514, "grad_norm": 1.3757749522339682, "learning_rate": 8.695503117820438e-06, "loss": 0.4998, "step": 21630 }, { "epoch": 0.3118919620079847, "grad_norm": 1.466000144678914, "learning_rate": 8.693808203007853e-06, "loss": 0.5023, "step": 21640 }, { "epoch": 0.31203608953201795, "grad_norm": 1.5574830244040625, "learning_rate": 8.692112353236089e-06, "loss": 0.4827, "step": 21650 }, { "epoch": 0.3121802170560512, "grad_norm": 1.6983090575603614, "learning_rate": 8.690415568934399e-06, "loss": 0.5014, "step": 21660 }, { "epoch": 0.31232434458008446, "grad_norm": 1.601588058829328, "learning_rate": 8.68871785053226e-06, "loss": 0.4729, "step": 21670 }, { "epoch": 0.3124684721041177, "grad_norm": 1.6379367980974302, "learning_rate": 8.687019198459395e-06, "loss": 0.4751, "step": 21680 }, { "epoch": 0.31261259962815097, "grad_norm": 1.3513774719526985, "learning_rate": 8.685319613145754e-06, "loss": 0.4956, "step": 21690 }, { "epoch": 0.3127567271521843, "grad_norm": 1.5936604753320915, "learning_rate": 8.683619095021531e-06, "loss": 0.5045, "step": 21700 }, { "epoch": 0.31290085467621753, "grad_norm": 1.387573425197257, "learning_rate": 8.681917644517153e-06, "loss": 0.4746, "step": 21710 }, { "epoch": 0.3130449822002508, "grad_norm": 1.432722783343834, "learning_rate": 8.680215262063281e-06, "loss": 0.5058, "step": 21720 }, { "epoch": 0.31318910972428404, "grad_norm": 1.658501269665513, "learning_rate": 8.678511948090815e-06, "loss": 0.5087, "step": 21730 }, { "epoch": 0.3133332372483173, "grad_norm": 1.4733202051067418, "learning_rate": 8.676807703030892e-06, "loss": 0.4908, "step": 21740 }, { "epoch": 0.31347736477235055, "grad_norm": 1.5611178974496691, "learning_rate": 8.675102527314877e-06, "loss": 0.469, "step": 21750 }, { "epoch": 0.31362149229638386, "grad_norm": 1.5917309327258482, "learning_rate": 8.673396421374383e-06, "loss": 0.5292, "step": 21760 }, { "epoch": 0.3137656198204171, "grad_norm": 1.6367966220354762, "learning_rate": 8.671689385641245e-06, "loss": 0.5274, "step": 21770 }, { "epoch": 0.3139097473444504, "grad_norm": 1.3417063210444358, "learning_rate": 8.669981420547543e-06, "loss": 0.4733, "step": 21780 }, { "epoch": 0.31405387486848363, "grad_norm": 1.6371330758130658, "learning_rate": 8.66827252652559e-06, "loss": 0.4974, "step": 21790 }, { "epoch": 0.3141980023925169, "grad_norm": 1.8025726505190693, "learning_rate": 8.66656270400793e-06, "loss": 0.4857, "step": 21800 }, { "epoch": 0.31434212991655014, "grad_norm": 1.4545885796544409, "learning_rate": 8.664851953427346e-06, "loss": 0.4849, "step": 21810 }, { "epoch": 0.31448625744058345, "grad_norm": 1.7775734994124057, "learning_rate": 8.663140275216859e-06, "loss": 0.4866, "step": 21820 }, { "epoch": 0.3146303849646167, "grad_norm": 1.3906364980489243, "learning_rate": 8.661427669809714e-06, "loss": 0.5014, "step": 21830 }, { "epoch": 0.31477451248864996, "grad_norm": 1.6747593025746086, "learning_rate": 8.659714137639404e-06, "loss": 0.5119, "step": 21840 }, { "epoch": 0.3149186400126832, "grad_norm": 1.611053119836708, "learning_rate": 8.657999679139646e-06, "loss": 0.5058, "step": 21850 }, { "epoch": 0.31506276753671647, "grad_norm": 1.6160951268201205, "learning_rate": 8.656284294744398e-06, "loss": 0.51, "step": 21860 }, { "epoch": 0.3152068950607497, "grad_norm": 1.507652834087306, "learning_rate": 8.654567984887849e-06, "loss": 0.5013, "step": 21870 }, { "epoch": 0.31535102258478304, "grad_norm": 1.6072545638931177, "learning_rate": 8.652850750004422e-06, "loss": 0.5051, "step": 21880 }, { "epoch": 0.3154951501088163, "grad_norm": 1.6353917149384036, "learning_rate": 8.651132590528777e-06, "loss": 0.4915, "step": 21890 }, { "epoch": 0.31563927763284955, "grad_norm": 1.700481412697666, "learning_rate": 8.649413506895804e-06, "loss": 0.5084, "step": 21900 }, { "epoch": 0.3157834051568828, "grad_norm": 1.6165655154834921, "learning_rate": 8.647693499540634e-06, "loss": 0.5092, "step": 21910 }, { "epoch": 0.31592753268091606, "grad_norm": 1.607251591546005, "learning_rate": 8.645972568898624e-06, "loss": 0.4988, "step": 21920 }, { "epoch": 0.3160716602049493, "grad_norm": 1.6392967092758692, "learning_rate": 8.644250715405367e-06, "loss": 0.4677, "step": 21930 }, { "epoch": 0.3162157877289826, "grad_norm": 1.448850089574158, "learning_rate": 8.64252793949669e-06, "loss": 0.477, "step": 21940 }, { "epoch": 0.3163599152530159, "grad_norm": 1.7003415833994007, "learning_rate": 8.640804241608655e-06, "loss": 0.4795, "step": 21950 }, { "epoch": 0.31650404277704913, "grad_norm": 1.7271256398185373, "learning_rate": 8.63907962217756e-06, "loss": 0.4964, "step": 21960 }, { "epoch": 0.3166481703010824, "grad_norm": 2.857928874029946, "learning_rate": 8.637354081639924e-06, "loss": 0.5097, "step": 21970 }, { "epoch": 0.31679229782511564, "grad_norm": 1.5005119048932958, "learning_rate": 8.635627620432516e-06, "loss": 0.5167, "step": 21980 }, { "epoch": 0.31693642534914895, "grad_norm": 1.3116283904667665, "learning_rate": 8.633900238992325e-06, "loss": 0.4813, "step": 21990 }, { "epoch": 0.3170805528731822, "grad_norm": 1.501592287219655, "learning_rate": 8.632171937756579e-06, "loss": 0.501, "step": 22000 }, { "epoch": 0.31722468039721546, "grad_norm": 1.6453297870674868, "learning_rate": 8.630442717162736e-06, "loss": 0.4899, "step": 22010 }, { "epoch": 0.3173688079212487, "grad_norm": 1.7252916497216833, "learning_rate": 8.62871257764849e-06, "loss": 0.474, "step": 22020 }, { "epoch": 0.317512935445282, "grad_norm": 1.5176207984862915, "learning_rate": 8.626981519651764e-06, "loss": 0.4969, "step": 22030 }, { "epoch": 0.31765706296931523, "grad_norm": 1.2884256879001361, "learning_rate": 8.625249543610718e-06, "loss": 0.487, "step": 22040 }, { "epoch": 0.31780119049334854, "grad_norm": 1.6452592806196236, "learning_rate": 8.623516649963739e-06, "loss": 0.5206, "step": 22050 }, { "epoch": 0.3179453180173818, "grad_norm": 1.6316339493984242, "learning_rate": 8.621782839149449e-06, "loss": 0.4785, "step": 22060 }, { "epoch": 0.31808944554141505, "grad_norm": 1.2542420632386457, "learning_rate": 8.620048111606701e-06, "loss": 0.4813, "step": 22070 }, { "epoch": 0.3182335730654483, "grad_norm": 1.805002631720874, "learning_rate": 8.618312467774583e-06, "loss": 0.5013, "step": 22080 }, { "epoch": 0.31837770058948156, "grad_norm": 1.8846376467895571, "learning_rate": 8.616575908092413e-06, "loss": 0.4995, "step": 22090 }, { "epoch": 0.3185218281135148, "grad_norm": 1.7465157110361285, "learning_rate": 8.61483843299974e-06, "loss": 0.5036, "step": 22100 }, { "epoch": 0.3186659556375481, "grad_norm": 1.5800325296550535, "learning_rate": 8.613100042936345e-06, "loss": 0.5015, "step": 22110 }, { "epoch": 0.3188100831615814, "grad_norm": 1.753544342974804, "learning_rate": 8.611360738342241e-06, "loss": 0.4918, "step": 22120 }, { "epoch": 0.31895421068561464, "grad_norm": 1.4164350621062018, "learning_rate": 8.609620519657674e-06, "loss": 0.5179, "step": 22130 }, { "epoch": 0.3190983382096479, "grad_norm": 1.6615176762067998, "learning_rate": 8.607879387323119e-06, "loss": 0.486, "step": 22140 }, { "epoch": 0.31924246573368115, "grad_norm": 1.5955518154691222, "learning_rate": 8.606137341779282e-06, "loss": 0.4979, "step": 22150 }, { "epoch": 0.3193865932577144, "grad_norm": 1.4762179504604822, "learning_rate": 8.604394383467103e-06, "loss": 0.5031, "step": 22160 }, { "epoch": 0.3195307207817477, "grad_norm": 1.6816510967355272, "learning_rate": 8.60265051282775e-06, "loss": 0.4994, "step": 22170 }, { "epoch": 0.31967484830578097, "grad_norm": 1.3457778827028228, "learning_rate": 8.600905730302622e-06, "loss": 0.4806, "step": 22180 }, { "epoch": 0.3198189758298142, "grad_norm": 1.4590350236593643, "learning_rate": 8.599160036333354e-06, "loss": 0.4811, "step": 22190 }, { "epoch": 0.3199631033538475, "grad_norm": 2.028429718711958, "learning_rate": 8.597413431361807e-06, "loss": 0.5314, "step": 22200 }, { "epoch": 0.32010723087788073, "grad_norm": 1.3288304347570317, "learning_rate": 8.59566591583007e-06, "loss": 0.467, "step": 22210 }, { "epoch": 0.320251358401914, "grad_norm": 1.8551384231089652, "learning_rate": 8.593917490180468e-06, "loss": 0.4937, "step": 22220 }, { "epoch": 0.3203954859259473, "grad_norm": 1.5412919667264615, "learning_rate": 8.592168154855554e-06, "loss": 0.4928, "step": 22230 }, { "epoch": 0.32053961344998055, "grad_norm": 1.4391094606691197, "learning_rate": 8.590417910298112e-06, "loss": 0.4935, "step": 22240 }, { "epoch": 0.3206837409740138, "grad_norm": 1.3041072916689587, "learning_rate": 8.588666756951154e-06, "loss": 0.4821, "step": 22250 }, { "epoch": 0.32082786849804706, "grad_norm": 1.5082746757037662, "learning_rate": 8.586914695257927e-06, "loss": 0.4633, "step": 22260 }, { "epoch": 0.3209719960220803, "grad_norm": 1.7336376324096032, "learning_rate": 8.5851617256619e-06, "loss": 0.5151, "step": 22270 }, { "epoch": 0.3211161235461136, "grad_norm": 1.453260395112286, "learning_rate": 8.58340784860678e-06, "loss": 0.4991, "step": 22280 }, { "epoch": 0.3212602510701469, "grad_norm": 1.271744157733236, "learning_rate": 8.581653064536497e-06, "loss": 0.4989, "step": 22290 }, { "epoch": 0.32140437859418014, "grad_norm": 1.5097200748094406, "learning_rate": 8.579897373895216e-06, "loss": 0.4911, "step": 22300 }, { "epoch": 0.3215485061182134, "grad_norm": 1.4283674016261976, "learning_rate": 8.578140777127328e-06, "loss": 0.5048, "step": 22310 }, { "epoch": 0.32169263364224665, "grad_norm": 1.7123659705531253, "learning_rate": 8.576383274677455e-06, "loss": 0.5162, "step": 22320 }, { "epoch": 0.3218367611662799, "grad_norm": 1.4679213228147434, "learning_rate": 8.574624866990446e-06, "loss": 0.4667, "step": 22330 }, { "epoch": 0.3219808886903132, "grad_norm": 1.528714479566648, "learning_rate": 8.572865554511382e-06, "loss": 0.5069, "step": 22340 }, { "epoch": 0.32212501621434647, "grad_norm": 1.6864616952135507, "learning_rate": 8.571105337685575e-06, "loss": 0.4861, "step": 22350 }, { "epoch": 0.3222691437383797, "grad_norm": 1.5870525413135934, "learning_rate": 8.569344216958556e-06, "loss": 0.4987, "step": 22360 }, { "epoch": 0.322413271262413, "grad_norm": 1.5352204569319674, "learning_rate": 8.567582192776098e-06, "loss": 0.4746, "step": 22370 }, { "epoch": 0.32255739878644624, "grad_norm": 1.7790899501390696, "learning_rate": 8.565819265584193e-06, "loss": 0.517, "step": 22380 }, { "epoch": 0.3227015263104795, "grad_norm": 1.609306712733651, "learning_rate": 8.564055435829063e-06, "loss": 0.4947, "step": 22390 }, { "epoch": 0.3228456538345128, "grad_norm": 1.632896739620472, "learning_rate": 8.562290703957163e-06, "loss": 0.5078, "step": 22400 }, { "epoch": 0.32298978135854606, "grad_norm": 1.9212565245766184, "learning_rate": 8.560525070415172e-06, "loss": 0.492, "step": 22410 }, { "epoch": 0.3231339088825793, "grad_norm": 1.4929231726778143, "learning_rate": 8.558758535650002e-06, "loss": 0.4855, "step": 22420 }, { "epoch": 0.32327803640661257, "grad_norm": 1.6477734985531847, "learning_rate": 8.556991100108786e-06, "loss": 0.5036, "step": 22430 }, { "epoch": 0.3234221639306458, "grad_norm": 1.5842634999801928, "learning_rate": 8.555222764238891e-06, "loss": 0.483, "step": 22440 }, { "epoch": 0.3235662914546791, "grad_norm": 1.6504711429502967, "learning_rate": 8.553453528487907e-06, "loss": 0.5015, "step": 22450 }, { "epoch": 0.3237104189787124, "grad_norm": 1.468006685548674, "learning_rate": 8.551683393303659e-06, "loss": 0.4967, "step": 22460 }, { "epoch": 0.32385454650274564, "grad_norm": 1.6470550587589863, "learning_rate": 8.549912359134193e-06, "loss": 0.5046, "step": 22470 }, { "epoch": 0.3239986740267789, "grad_norm": 1.5077214242395474, "learning_rate": 8.548140426427783e-06, "loss": 0.5039, "step": 22480 }, { "epoch": 0.32414280155081215, "grad_norm": 1.6117378442013806, "learning_rate": 8.546367595632935e-06, "loss": 0.5003, "step": 22490 }, { "epoch": 0.3242869290748454, "grad_norm": 1.4628721188784246, "learning_rate": 8.544593867198378e-06, "loss": 0.476, "step": 22500 }, { "epoch": 0.32443105659887866, "grad_norm": 1.5649787292626884, "learning_rate": 8.54281924157307e-06, "loss": 0.4665, "step": 22510 }, { "epoch": 0.324575184122912, "grad_norm": 1.3907541675501054, "learning_rate": 8.541043719206195e-06, "loss": 0.4807, "step": 22520 }, { "epoch": 0.32471931164694523, "grad_norm": 1.3130233512589407, "learning_rate": 8.539267300547166e-06, "loss": 0.48, "step": 22530 }, { "epoch": 0.3248634391709785, "grad_norm": 1.4144572707970404, "learning_rate": 8.537489986045622e-06, "loss": 0.4954, "step": 22540 }, { "epoch": 0.32500756669501174, "grad_norm": 1.3830077783946555, "learning_rate": 8.535711776151428e-06, "loss": 0.4736, "step": 22550 }, { "epoch": 0.325151694219045, "grad_norm": 1.556758055982479, "learning_rate": 8.533932671314674e-06, "loss": 0.5184, "step": 22560 }, { "epoch": 0.32529582174307825, "grad_norm": 1.674112966786992, "learning_rate": 8.53215267198568e-06, "loss": 0.4786, "step": 22570 }, { "epoch": 0.32543994926711156, "grad_norm": 1.377474965007943, "learning_rate": 8.530371778614994e-06, "loss": 0.4796, "step": 22580 }, { "epoch": 0.3255840767911448, "grad_norm": 1.575169954243282, "learning_rate": 8.528589991653382e-06, "loss": 0.4846, "step": 22590 }, { "epoch": 0.32572820431517807, "grad_norm": 1.732206108839217, "learning_rate": 8.526807311551844e-06, "loss": 0.5113, "step": 22600 }, { "epoch": 0.3258723318392113, "grad_norm": 1.538925094314631, "learning_rate": 8.525023738761605e-06, "loss": 0.5082, "step": 22610 }, { "epoch": 0.3260164593632446, "grad_norm": 1.4518972495661806, "learning_rate": 8.523239273734111e-06, "loss": 0.4736, "step": 22620 }, { "epoch": 0.32616058688727784, "grad_norm": 1.4376561530933747, "learning_rate": 8.52145391692104e-06, "loss": 0.4718, "step": 22630 }, { "epoch": 0.32630471441131115, "grad_norm": 1.646895701215853, "learning_rate": 8.519667668774295e-06, "loss": 0.4898, "step": 22640 }, { "epoch": 0.3264488419353444, "grad_norm": 1.785511095482636, "learning_rate": 8.517880529745995e-06, "loss": 0.5192, "step": 22650 }, { "epoch": 0.32659296945937766, "grad_norm": 1.4312014317721458, "learning_rate": 8.5160925002885e-06, "loss": 0.473, "step": 22660 }, { "epoch": 0.3267370969834109, "grad_norm": 1.8247264816188242, "learning_rate": 8.514303580854384e-06, "loss": 0.4848, "step": 22670 }, { "epoch": 0.32688122450744417, "grad_norm": 1.7190722128282112, "learning_rate": 8.51251377189645e-06, "loss": 0.5032, "step": 22680 }, { "epoch": 0.3270253520314775, "grad_norm": 1.6037267733652376, "learning_rate": 8.510723073867727e-06, "loss": 0.5035, "step": 22690 }, { "epoch": 0.32716947955551073, "grad_norm": 1.6390840728923914, "learning_rate": 8.508931487221466e-06, "loss": 0.4718, "step": 22700 }, { "epoch": 0.327313607079544, "grad_norm": 1.5379079813601313, "learning_rate": 8.507139012411147e-06, "loss": 0.4801, "step": 22710 }, { "epoch": 0.32745773460357724, "grad_norm": 1.3894070688087832, "learning_rate": 8.50534564989047e-06, "loss": 0.4677, "step": 22720 }, { "epoch": 0.3276018621276105, "grad_norm": 1.8087451850738203, "learning_rate": 8.503551400113369e-06, "loss": 0.4767, "step": 22730 }, { "epoch": 0.32774598965164375, "grad_norm": 1.5563422750021292, "learning_rate": 8.501756263533986e-06, "loss": 0.5053, "step": 22740 }, { "epoch": 0.32789011717567706, "grad_norm": 1.6614368914365791, "learning_rate": 8.499960240606704e-06, "loss": 0.4994, "step": 22750 }, { "epoch": 0.3280342446997103, "grad_norm": 1.6900032795225455, "learning_rate": 8.498163331786122e-06, "loss": 0.478, "step": 22760 }, { "epoch": 0.3281783722237436, "grad_norm": 1.5264410179269072, "learning_rate": 8.496365537527067e-06, "loss": 0.5014, "step": 22770 }, { "epoch": 0.32832249974777683, "grad_norm": 1.5608975922511643, "learning_rate": 8.494566858284588e-06, "loss": 0.4845, "step": 22780 }, { "epoch": 0.3284666272718101, "grad_norm": 1.3002283041006368, "learning_rate": 8.492767294513953e-06, "loss": 0.4808, "step": 22790 }, { "epoch": 0.32861075479584334, "grad_norm": 1.594466450455827, "learning_rate": 8.490966846670665e-06, "loss": 0.4831, "step": 22800 }, { "epoch": 0.32875488231987665, "grad_norm": 1.4009669712934973, "learning_rate": 8.489165515210442e-06, "loss": 0.4919, "step": 22810 }, { "epoch": 0.3288990098439099, "grad_norm": 1.3758521613589196, "learning_rate": 8.487363300589228e-06, "loss": 0.4816, "step": 22820 }, { "epoch": 0.32904313736794316, "grad_norm": 1.6190953044237983, "learning_rate": 8.485560203263194e-06, "loss": 0.4876, "step": 22830 }, { "epoch": 0.3291872648919764, "grad_norm": 1.3641547885647831, "learning_rate": 8.483756223688728e-06, "loss": 0.5042, "step": 22840 }, { "epoch": 0.32933139241600967, "grad_norm": 1.6588384744483298, "learning_rate": 8.481951362322447e-06, "loss": 0.4701, "step": 22850 }, { "epoch": 0.3294755199400429, "grad_norm": 1.3491726593380637, "learning_rate": 8.480145619621188e-06, "loss": 0.481, "step": 22860 }, { "epoch": 0.32961964746407624, "grad_norm": 3.070862929397874, "learning_rate": 8.47833899604201e-06, "loss": 0.4937, "step": 22870 }, { "epoch": 0.3297637749881095, "grad_norm": 1.624148295215629, "learning_rate": 8.476531492042202e-06, "loss": 0.5203, "step": 22880 }, { "epoch": 0.32990790251214275, "grad_norm": 1.3835069059247274, "learning_rate": 8.474723108079266e-06, "loss": 0.4867, "step": 22890 }, { "epoch": 0.330052030036176, "grad_norm": 1.4141060860227332, "learning_rate": 8.472913844610935e-06, "loss": 0.484, "step": 22900 }, { "epoch": 0.33019615756020926, "grad_norm": 1.3127712225388284, "learning_rate": 8.471103702095158e-06, "loss": 0.4824, "step": 22910 }, { "epoch": 0.3303402850842425, "grad_norm": 1.4494265758060219, "learning_rate": 8.469292680990114e-06, "loss": 0.4907, "step": 22920 }, { "epoch": 0.3304844126082758, "grad_norm": 1.6472446995421117, "learning_rate": 8.467480781754193e-06, "loss": 0.5067, "step": 22930 }, { "epoch": 0.3306285401323091, "grad_norm": 1.4414696564405272, "learning_rate": 8.465668004846021e-06, "loss": 0.4972, "step": 22940 }, { "epoch": 0.33077266765634233, "grad_norm": 1.483476247403405, "learning_rate": 8.463854350724439e-06, "loss": 0.4832, "step": 22950 }, { "epoch": 0.3309167951803756, "grad_norm": 1.4700199760919945, "learning_rate": 8.462039819848505e-06, "loss": 0.4715, "step": 22960 }, { "epoch": 0.33106092270440884, "grad_norm": 1.3575206637001491, "learning_rate": 8.460224412677511e-06, "loss": 0.4892, "step": 22970 }, { "epoch": 0.3312050502284421, "grad_norm": 1.6757482986346717, "learning_rate": 8.458408129670958e-06, "loss": 0.5153, "step": 22980 }, { "epoch": 0.3313491777524754, "grad_norm": 1.6574106962302768, "learning_rate": 8.456590971288582e-06, "loss": 0.5046, "step": 22990 }, { "epoch": 0.33149330527650867, "grad_norm": 1.6602009989378377, "learning_rate": 8.454772937990328e-06, "loss": 0.4906, "step": 23000 }, { "epoch": 0.3316374328005419, "grad_norm": 1.752100672319164, "learning_rate": 8.452954030236368e-06, "loss": 0.5033, "step": 23010 }, { "epoch": 0.3317815603245752, "grad_norm": 1.4414509330176555, "learning_rate": 8.4511342484871e-06, "loss": 0.5016, "step": 23020 }, { "epoch": 0.33192568784860843, "grad_norm": 1.3843409746255577, "learning_rate": 8.449313593203134e-06, "loss": 0.485, "step": 23030 }, { "epoch": 0.3320698153726417, "grad_norm": 1.6075193142518398, "learning_rate": 8.447492064845307e-06, "loss": 0.4872, "step": 23040 }, { "epoch": 0.332213942896675, "grad_norm": 1.8355472862914868, "learning_rate": 8.445669663874676e-06, "loss": 0.4985, "step": 23050 }, { "epoch": 0.33235807042070825, "grad_norm": 1.6735180663428442, "learning_rate": 8.443846390752519e-06, "loss": 0.49, "step": 23060 }, { "epoch": 0.3325021979447415, "grad_norm": 1.5089333852084035, "learning_rate": 8.442022245940331e-06, "loss": 0.4992, "step": 23070 }, { "epoch": 0.33264632546877476, "grad_norm": 1.5706285417744041, "learning_rate": 8.440197229899835e-06, "loss": 0.4877, "step": 23080 }, { "epoch": 0.332790452992808, "grad_norm": 1.5529464846500918, "learning_rate": 8.438371343092968e-06, "loss": 0.5141, "step": 23090 }, { "epoch": 0.3329345805168413, "grad_norm": 1.4594911821748011, "learning_rate": 8.43654458598189e-06, "loss": 0.4968, "step": 23100 }, { "epoch": 0.3330787080408746, "grad_norm": 1.6196393091538064, "learning_rate": 8.434716959028983e-06, "loss": 0.4674, "step": 23110 }, { "epoch": 0.33322283556490784, "grad_norm": 1.5014551816660062, "learning_rate": 8.432888462696844e-06, "loss": 0.4906, "step": 23120 }, { "epoch": 0.3333669630889411, "grad_norm": 1.4246920787956097, "learning_rate": 8.431059097448297e-06, "loss": 0.5182, "step": 23130 }, { "epoch": 0.33351109061297435, "grad_norm": 1.5928911609287897, "learning_rate": 8.42922886374638e-06, "loss": 0.5162, "step": 23140 }, { "epoch": 0.3336552181370076, "grad_norm": 1.6668981334733242, "learning_rate": 8.42739776205435e-06, "loss": 0.504, "step": 23150 }, { "epoch": 0.3337993456610409, "grad_norm": 1.5492247818797296, "learning_rate": 8.425565792835692e-06, "loss": 0.4909, "step": 23160 }, { "epoch": 0.33394347318507417, "grad_norm": 1.7798191833520127, "learning_rate": 8.423732956554105e-06, "loss": 0.4964, "step": 23170 }, { "epoch": 0.3340876007091074, "grad_norm": 1.458581365216235, "learning_rate": 8.421899253673505e-06, "loss": 0.482, "step": 23180 }, { "epoch": 0.3342317282331407, "grad_norm": 1.5544109808811988, "learning_rate": 8.420064684658031e-06, "loss": 0.4791, "step": 23190 }, { "epoch": 0.33437585575717393, "grad_norm": 1.629163631781272, "learning_rate": 8.418229249972041e-06, "loss": 0.5298, "step": 23200 }, { "epoch": 0.3345199832812072, "grad_norm": 1.196925119404604, "learning_rate": 8.416392950080112e-06, "loss": 0.4816, "step": 23210 }, { "epoch": 0.3346641108052405, "grad_norm": 1.5589892309678557, "learning_rate": 8.414555785447038e-06, "loss": 0.506, "step": 23220 }, { "epoch": 0.33480823832927376, "grad_norm": 1.7144962802191366, "learning_rate": 8.412717756537834e-06, "loss": 0.4841, "step": 23230 }, { "epoch": 0.334952365853307, "grad_norm": 1.6705267157462018, "learning_rate": 8.410878863817734e-06, "loss": 0.5009, "step": 23240 }, { "epoch": 0.33509649337734027, "grad_norm": 1.6230317198829793, "learning_rate": 8.40903910775219e-06, "loss": 0.4909, "step": 23250 }, { "epoch": 0.3352406209013735, "grad_norm": 1.683058823132643, "learning_rate": 8.407198488806869e-06, "loss": 0.5317, "step": 23260 }, { "epoch": 0.3353847484254068, "grad_norm": 1.5305530693406113, "learning_rate": 8.405357007447662e-06, "loss": 0.5197, "step": 23270 }, { "epoch": 0.3355288759494401, "grad_norm": 1.5192653449753863, "learning_rate": 8.40351466414068e-06, "loss": 0.5147, "step": 23280 }, { "epoch": 0.33567300347347334, "grad_norm": 1.540058162076374, "learning_rate": 8.40167145935224e-06, "loss": 0.5066, "step": 23290 }, { "epoch": 0.3358171309975066, "grad_norm": 1.9464175519744928, "learning_rate": 8.399827393548891e-06, "loss": 0.5187, "step": 23300 }, { "epoch": 0.33596125852153985, "grad_norm": 1.508926393905796, "learning_rate": 8.397982467197394e-06, "loss": 0.4682, "step": 23310 }, { "epoch": 0.3361053860455731, "grad_norm": 1.7451715666242356, "learning_rate": 8.396136680764724e-06, "loss": 0.5144, "step": 23320 }, { "epoch": 0.33624951356960636, "grad_norm": 1.3315557861233438, "learning_rate": 8.394290034718083e-06, "loss": 0.4878, "step": 23330 }, { "epoch": 0.3363936410936397, "grad_norm": 1.3738395443549076, "learning_rate": 8.392442529524883e-06, "loss": 0.4591, "step": 23340 }, { "epoch": 0.3365377686176729, "grad_norm": 1.6351478379513518, "learning_rate": 8.390594165652752e-06, "loss": 0.5041, "step": 23350 }, { "epoch": 0.3366818961417062, "grad_norm": 1.3986140238701719, "learning_rate": 8.388744943569545e-06, "loss": 0.4903, "step": 23360 }, { "epoch": 0.33682602366573944, "grad_norm": 1.3727193305308998, "learning_rate": 8.386894863743325e-06, "loss": 0.5079, "step": 23370 }, { "epoch": 0.3369701511897727, "grad_norm": 1.2660926504115257, "learning_rate": 8.385043926642377e-06, "loss": 0.4792, "step": 23380 }, { "epoch": 0.33711427871380595, "grad_norm": 1.6079938017919786, "learning_rate": 8.383192132735199e-06, "loss": 0.4751, "step": 23390 }, { "epoch": 0.33725840623783926, "grad_norm": 1.532401519620708, "learning_rate": 8.381339482490511e-06, "loss": 0.4655, "step": 23400 }, { "epoch": 0.3374025337618725, "grad_norm": 1.4946619685532627, "learning_rate": 8.379485976377246e-06, "loss": 0.4502, "step": 23410 }, { "epoch": 0.33754666128590577, "grad_norm": 1.7662696255900336, "learning_rate": 8.377631614864554e-06, "loss": 0.4735, "step": 23420 }, { "epoch": 0.337690788809939, "grad_norm": 1.8868926023405548, "learning_rate": 8.375776398421803e-06, "loss": 0.4688, "step": 23430 }, { "epoch": 0.3378349163339723, "grad_norm": 1.3531169921277524, "learning_rate": 8.373920327518575e-06, "loss": 0.4629, "step": 23440 }, { "epoch": 0.3379790438580056, "grad_norm": 1.5905964666944814, "learning_rate": 8.372063402624673e-06, "loss": 0.4933, "step": 23450 }, { "epoch": 0.33812317138203885, "grad_norm": 1.4428179485781254, "learning_rate": 8.37020562421011e-06, "loss": 0.4957, "step": 23460 }, { "epoch": 0.3382672989060721, "grad_norm": 1.6670165242145927, "learning_rate": 8.36834699274512e-06, "loss": 0.5097, "step": 23470 }, { "epoch": 0.33841142643010536, "grad_norm": 1.9350522560971621, "learning_rate": 8.36648750870015e-06, "loss": 0.5041, "step": 23480 }, { "epoch": 0.3385555539541386, "grad_norm": 1.507771059698322, "learning_rate": 8.364627172545863e-06, "loss": 0.4732, "step": 23490 }, { "epoch": 0.33869968147817187, "grad_norm": 1.2897963709969698, "learning_rate": 8.362765984753139e-06, "loss": 0.4706, "step": 23500 }, { "epoch": 0.3388438090022052, "grad_norm": 1.5196070878344545, "learning_rate": 8.360903945793074e-06, "loss": 0.4842, "step": 23510 }, { "epoch": 0.33898793652623843, "grad_norm": 1.5831024645376488, "learning_rate": 8.359041056136978e-06, "loss": 0.4849, "step": 23520 }, { "epoch": 0.3391320640502717, "grad_norm": 1.6014879852691055, "learning_rate": 8.357177316256377e-06, "loss": 0.5254, "step": 23530 }, { "epoch": 0.33927619157430494, "grad_norm": 1.4741822635254853, "learning_rate": 8.35531272662301e-06, "loss": 0.478, "step": 23540 }, { "epoch": 0.3394203190983382, "grad_norm": 1.4893866256052266, "learning_rate": 8.353447287708836e-06, "loss": 0.4558, "step": 23550 }, { "epoch": 0.33956444662237145, "grad_norm": 1.4388502437278405, "learning_rate": 8.351580999986023e-06, "loss": 0.5072, "step": 23560 }, { "epoch": 0.33970857414640476, "grad_norm": 1.7381911589132584, "learning_rate": 8.34971386392696e-06, "loss": 0.4831, "step": 23570 }, { "epoch": 0.339852701670438, "grad_norm": 1.632039234734675, "learning_rate": 8.347845880004247e-06, "loss": 0.5105, "step": 23580 }, { "epoch": 0.3399968291944713, "grad_norm": 1.5974864080830644, "learning_rate": 8.345977048690696e-06, "loss": 0.4737, "step": 23590 }, { "epoch": 0.34014095671850453, "grad_norm": 1.6832571930142148, "learning_rate": 8.34410737045934e-06, "loss": 0.4977, "step": 23600 }, { "epoch": 0.3402850842425378, "grad_norm": 1.4512517430775198, "learning_rate": 8.342236845783423e-06, "loss": 0.4854, "step": 23610 }, { "epoch": 0.34042921176657104, "grad_norm": 1.9715991428077273, "learning_rate": 8.340365475136401e-06, "loss": 0.5116, "step": 23620 }, { "epoch": 0.34057333929060435, "grad_norm": 1.4362828865527888, "learning_rate": 8.338493258991949e-06, "loss": 0.4672, "step": 23630 }, { "epoch": 0.3407174668146376, "grad_norm": 1.6554753487836675, "learning_rate": 8.33662019782395e-06, "loss": 0.5026, "step": 23640 }, { "epoch": 0.34086159433867086, "grad_norm": 1.536474529183721, "learning_rate": 8.334746292106507e-06, "loss": 0.477, "step": 23650 }, { "epoch": 0.3410057218627041, "grad_norm": 1.459269164542457, "learning_rate": 8.332871542313936e-06, "loss": 0.5113, "step": 23660 }, { "epoch": 0.34114984938673737, "grad_norm": 1.3079936206235476, "learning_rate": 8.330995948920763e-06, "loss": 0.4969, "step": 23670 }, { "epoch": 0.3412939769107706, "grad_norm": 1.322460900591121, "learning_rate": 8.329119512401726e-06, "loss": 0.4784, "step": 23680 }, { "epoch": 0.34143810443480394, "grad_norm": 1.322521671480489, "learning_rate": 8.327242233231784e-06, "loss": 0.4773, "step": 23690 }, { "epoch": 0.3415822319588372, "grad_norm": 1.46632889853428, "learning_rate": 8.325364111886103e-06, "loss": 0.4943, "step": 23700 }, { "epoch": 0.34172635948287045, "grad_norm": 1.441202194440829, "learning_rate": 8.323485148840065e-06, "loss": 0.5093, "step": 23710 }, { "epoch": 0.3418704870069037, "grad_norm": 1.5527226106264946, "learning_rate": 8.321605344569263e-06, "loss": 0.4956, "step": 23720 }, { "epoch": 0.34201461453093696, "grad_norm": 1.471374836755284, "learning_rate": 8.319724699549507e-06, "loss": 0.4867, "step": 23730 }, { "epoch": 0.3421587420549702, "grad_norm": 1.2959925015082956, "learning_rate": 8.317843214256814e-06, "loss": 0.459, "step": 23740 }, { "epoch": 0.3423028695790035, "grad_norm": 1.5285775335504797, "learning_rate": 8.315960889167416e-06, "loss": 0.4772, "step": 23750 }, { "epoch": 0.3424469971030368, "grad_norm": 1.5457625476016414, "learning_rate": 8.31407772475776e-06, "loss": 0.4942, "step": 23760 }, { "epoch": 0.34259112462707003, "grad_norm": 1.4829297424625583, "learning_rate": 8.312193721504505e-06, "loss": 0.4695, "step": 23770 }, { "epoch": 0.3427352521511033, "grad_norm": 1.5936348625278107, "learning_rate": 8.310308879884516e-06, "loss": 0.5055, "step": 23780 }, { "epoch": 0.34287937967513654, "grad_norm": 1.7807728238729394, "learning_rate": 8.308423200374883e-06, "loss": 0.5187, "step": 23790 }, { "epoch": 0.34302350719916985, "grad_norm": 1.6040707221487307, "learning_rate": 8.306536683452892e-06, "loss": 0.4857, "step": 23800 }, { "epoch": 0.3431676347232031, "grad_norm": 1.929993542634815, "learning_rate": 8.304649329596053e-06, "loss": 0.4951, "step": 23810 }, { "epoch": 0.34331176224723636, "grad_norm": 1.5803848861116745, "learning_rate": 8.302761139282086e-06, "loss": 0.4854, "step": 23820 }, { "epoch": 0.3434558897712696, "grad_norm": 1.5073925901935463, "learning_rate": 8.300872112988915e-06, "loss": 0.4996, "step": 23830 }, { "epoch": 0.3436000172953029, "grad_norm": 1.5670871148604573, "learning_rate": 8.298982251194687e-06, "loss": 0.4789, "step": 23840 }, { "epoch": 0.34374414481933613, "grad_norm": 1.667623389531267, "learning_rate": 8.297091554377754e-06, "loss": 0.4863, "step": 23850 }, { "epoch": 0.34388827234336944, "grad_norm": 1.3333810899454028, "learning_rate": 8.295200023016678e-06, "loss": 0.4917, "step": 23860 }, { "epoch": 0.3440323998674027, "grad_norm": 1.610601273569789, "learning_rate": 8.293307657590235e-06, "loss": 0.5036, "step": 23870 }, { "epoch": 0.34417652739143595, "grad_norm": 1.5625672209698123, "learning_rate": 8.291414458577412e-06, "loss": 0.4832, "step": 23880 }, { "epoch": 0.3443206549154692, "grad_norm": 1.7547471206142236, "learning_rate": 8.289520426457406e-06, "loss": 0.4744, "step": 23890 }, { "epoch": 0.34446478243950246, "grad_norm": 1.5983187507656067, "learning_rate": 8.287625561709626e-06, "loss": 0.4987, "step": 23900 }, { "epoch": 0.3446089099635357, "grad_norm": 1.581530875336681, "learning_rate": 8.285729864813692e-06, "loss": 0.4632, "step": 23910 }, { "epoch": 0.344753037487569, "grad_norm": 1.5188686666514335, "learning_rate": 8.283833336249431e-06, "loss": 0.4511, "step": 23920 }, { "epoch": 0.3448971650116023, "grad_norm": 1.4966781883806022, "learning_rate": 8.281935976496888e-06, "loss": 0.4926, "step": 23930 }, { "epoch": 0.34504129253563554, "grad_norm": 1.6085346009674781, "learning_rate": 8.28003778603631e-06, "loss": 0.4886, "step": 23940 }, { "epoch": 0.3451854200596688, "grad_norm": 1.5857942851127855, "learning_rate": 8.278138765348158e-06, "loss": 0.4889, "step": 23950 }, { "epoch": 0.34532954758370205, "grad_norm": 1.5042304199470184, "learning_rate": 8.276238914913103e-06, "loss": 0.465, "step": 23960 }, { "epoch": 0.3454736751077353, "grad_norm": 1.8864363460588796, "learning_rate": 8.274338235212029e-06, "loss": 0.4579, "step": 23970 }, { "epoch": 0.3456178026317686, "grad_norm": 1.3783159152381612, "learning_rate": 8.272436726726023e-06, "loss": 0.489, "step": 23980 }, { "epoch": 0.34576193015580187, "grad_norm": 1.3000262755305725, "learning_rate": 8.270534389936389e-06, "loss": 0.4589, "step": 23990 }, { "epoch": 0.3459060576798351, "grad_norm": 1.7206817587070196, "learning_rate": 8.268631225324636e-06, "loss": 0.4606, "step": 24000 }, { "epoch": 0.3460501852038684, "grad_norm": 1.4695746978666808, "learning_rate": 8.266727233372484e-06, "loss": 0.479, "step": 24010 }, { "epoch": 0.34619431272790163, "grad_norm": 1.305895725298234, "learning_rate": 8.264822414561861e-06, "loss": 0.4615, "step": 24020 }, { "epoch": 0.3463384402519349, "grad_norm": 1.4881214232362716, "learning_rate": 8.262916769374909e-06, "loss": 0.517, "step": 24030 }, { "epoch": 0.3464825677759682, "grad_norm": 1.5230867548390834, "learning_rate": 8.261010298293974e-06, "loss": 0.5091, "step": 24040 }, { "epoch": 0.34662669530000145, "grad_norm": 1.4868417809052858, "learning_rate": 8.259103001801612e-06, "loss": 0.4772, "step": 24050 }, { "epoch": 0.3467708228240347, "grad_norm": 1.6746179250178375, "learning_rate": 8.257194880380592e-06, "loss": 0.4963, "step": 24060 }, { "epoch": 0.34691495034806796, "grad_norm": 1.5928966968911893, "learning_rate": 8.255285934513886e-06, "loss": 0.4978, "step": 24070 }, { "epoch": 0.3470590778721012, "grad_norm": 1.8432392838862643, "learning_rate": 8.253376164684676e-06, "loss": 0.4978, "step": 24080 }, { "epoch": 0.3472032053961345, "grad_norm": 1.5455916065211044, "learning_rate": 8.251465571376356e-06, "loss": 0.4866, "step": 24090 }, { "epoch": 0.3473473329201678, "grad_norm": 1.5566933666874372, "learning_rate": 8.249554155072526e-06, "loss": 0.4788, "step": 24100 }, { "epoch": 0.34749146044420104, "grad_norm": 1.5710189776332206, "learning_rate": 8.247641916256995e-06, "loss": 0.4943, "step": 24110 }, { "epoch": 0.3476355879682343, "grad_norm": 1.4715428415258005, "learning_rate": 8.245728855413779e-06, "loss": 0.4832, "step": 24120 }, { "epoch": 0.34777971549226755, "grad_norm": 1.5433616656070241, "learning_rate": 8.243814973027105e-06, "loss": 0.5044, "step": 24130 }, { "epoch": 0.3479238430163008, "grad_norm": 1.5713199721292022, "learning_rate": 8.241900269581402e-06, "loss": 0.4826, "step": 24140 }, { "epoch": 0.3480679705403341, "grad_norm": 1.5724393754721504, "learning_rate": 8.239984745561311e-06, "loss": 0.5316, "step": 24150 }, { "epoch": 0.34821209806436737, "grad_norm": 1.4879892999841902, "learning_rate": 8.238068401451686e-06, "loss": 0.4558, "step": 24160 }, { "epoch": 0.3483562255884006, "grad_norm": 1.4948854571811176, "learning_rate": 8.236151237737576e-06, "loss": 0.465, "step": 24170 }, { "epoch": 0.3485003531124339, "grad_norm": 1.5311215252156112, "learning_rate": 8.234233254904249e-06, "loss": 0.5007, "step": 24180 }, { "epoch": 0.34864448063646714, "grad_norm": 1.4289410477176256, "learning_rate": 8.232314453437173e-06, "loss": 0.4864, "step": 24190 }, { "epoch": 0.3487886081605004, "grad_norm": 1.7236296029533023, "learning_rate": 8.230394833822027e-06, "loss": 0.5188, "step": 24200 }, { "epoch": 0.3489327356845337, "grad_norm": 1.5452813555961793, "learning_rate": 8.228474396544695e-06, "loss": 0.499, "step": 24210 }, { "epoch": 0.34907686320856696, "grad_norm": 1.4834338672131753, "learning_rate": 8.226553142091271e-06, "loss": 0.4658, "step": 24220 }, { "epoch": 0.3492209907326002, "grad_norm": 1.598199382491659, "learning_rate": 8.224631070948051e-06, "loss": 0.4846, "step": 24230 }, { "epoch": 0.34936511825663347, "grad_norm": 1.5641390513254128, "learning_rate": 8.222708183601544e-06, "loss": 0.5227, "step": 24240 }, { "epoch": 0.3495092457806667, "grad_norm": 1.4379957904701892, "learning_rate": 8.22078448053846e-06, "loss": 0.486, "step": 24250 }, { "epoch": 0.3496533733047, "grad_norm": 1.576806593937036, "learning_rate": 8.218859962245716e-06, "loss": 0.5039, "step": 24260 }, { "epoch": 0.3497975008287333, "grad_norm": 1.5047983816449866, "learning_rate": 8.21693462921044e-06, "loss": 0.5017, "step": 24270 }, { "epoch": 0.34994162835276654, "grad_norm": 1.516514486516148, "learning_rate": 8.215008481919961e-06, "loss": 0.489, "step": 24280 }, { "epoch": 0.3500857558767998, "grad_norm": 1.268895794725444, "learning_rate": 8.213081520861819e-06, "loss": 0.4718, "step": 24290 }, { "epoch": 0.35022988340083305, "grad_norm": 1.6116671659401736, "learning_rate": 8.211153746523755e-06, "loss": 0.5315, "step": 24300 }, { "epoch": 0.3503740109248663, "grad_norm": 1.500528584682504, "learning_rate": 8.209225159393717e-06, "loss": 0.4873, "step": 24310 }, { "epoch": 0.35051813844889956, "grad_norm": 1.5420324868094761, "learning_rate": 8.20729575995986e-06, "loss": 0.5055, "step": 24320 }, { "epoch": 0.3506622659729329, "grad_norm": 1.4577116684581588, "learning_rate": 8.205365548710549e-06, "loss": 0.5006, "step": 24330 }, { "epoch": 0.35080639349696613, "grad_norm": 1.5244476758935166, "learning_rate": 8.203434526134346e-06, "loss": 0.4912, "step": 24340 }, { "epoch": 0.3509505210209994, "grad_norm": 1.6823750083771711, "learning_rate": 8.20150269272002e-06, "loss": 0.4916, "step": 24350 }, { "epoch": 0.35109464854503264, "grad_norm": 1.5601090486468143, "learning_rate": 8.199570048956553e-06, "loss": 0.4828, "step": 24360 }, { "epoch": 0.3512387760690659, "grad_norm": 1.6508200372749449, "learning_rate": 8.197636595333122e-06, "loss": 0.5112, "step": 24370 }, { "epoch": 0.35138290359309915, "grad_norm": 1.5877293447759318, "learning_rate": 8.195702332339117e-06, "loss": 0.4536, "step": 24380 }, { "epoch": 0.35152703111713246, "grad_norm": 1.290120999655211, "learning_rate": 8.193767260464128e-06, "loss": 0.4567, "step": 24390 }, { "epoch": 0.3516711586411657, "grad_norm": 1.6040841448328627, "learning_rate": 8.19183138019795e-06, "loss": 0.489, "step": 24400 }, { "epoch": 0.35181528616519897, "grad_norm": 1.4396057078677553, "learning_rate": 8.189894692030585e-06, "loss": 0.5155, "step": 24410 }, { "epoch": 0.3519594136892322, "grad_norm": 1.5972884347316367, "learning_rate": 8.187957196452239e-06, "loss": 0.5013, "step": 24420 }, { "epoch": 0.3521035412132655, "grad_norm": 1.6354899802807332, "learning_rate": 8.18601889395332e-06, "loss": 0.4672, "step": 24430 }, { "epoch": 0.35224766873729874, "grad_norm": 1.603717354928902, "learning_rate": 8.184079785024443e-06, "loss": 0.4907, "step": 24440 }, { "epoch": 0.35239179626133205, "grad_norm": 1.6412681759018037, "learning_rate": 8.182139870156426e-06, "loss": 0.5012, "step": 24450 }, { "epoch": 0.3525359237853653, "grad_norm": 1.5430173279253172, "learning_rate": 8.180199149840292e-06, "loss": 0.4889, "step": 24460 }, { "epoch": 0.35268005130939856, "grad_norm": 1.473017994493567, "learning_rate": 8.178257624567264e-06, "loss": 0.4941, "step": 24470 }, { "epoch": 0.3528241788334318, "grad_norm": 1.8460664498387247, "learning_rate": 8.176315294828773e-06, "loss": 0.506, "step": 24480 }, { "epoch": 0.35296830635746507, "grad_norm": 1.344949880122946, "learning_rate": 8.174372161116454e-06, "loss": 0.4966, "step": 24490 }, { "epoch": 0.3531124338814983, "grad_norm": 1.578582438478479, "learning_rate": 8.172428223922141e-06, "loss": 0.4764, "step": 24500 }, { "epoch": 0.35325656140553163, "grad_norm": 1.4862876726123675, "learning_rate": 8.17048348373788e-06, "loss": 0.4644, "step": 24510 }, { "epoch": 0.3534006889295649, "grad_norm": 1.6195062193556757, "learning_rate": 8.168537941055906e-06, "loss": 0.4916, "step": 24520 }, { "epoch": 0.35354481645359814, "grad_norm": 1.7221551164901117, "learning_rate": 8.166591596368669e-06, "loss": 0.4937, "step": 24530 }, { "epoch": 0.3536889439776314, "grad_norm": 2.556645388024475, "learning_rate": 8.164644450168821e-06, "loss": 0.5108, "step": 24540 }, { "epoch": 0.35383307150166465, "grad_norm": 1.9799128868248284, "learning_rate": 8.162696502949212e-06, "loss": 0.4939, "step": 24550 }, { "epoch": 0.35397719902569796, "grad_norm": 1.3977584817174191, "learning_rate": 8.160747755202897e-06, "loss": 0.4922, "step": 24560 }, { "epoch": 0.3541213265497312, "grad_norm": 1.3668221232874544, "learning_rate": 8.158798207423136e-06, "loss": 0.4708, "step": 24570 }, { "epoch": 0.3542654540737645, "grad_norm": 1.4242115902012413, "learning_rate": 8.156847860103385e-06, "loss": 0.4883, "step": 24580 }, { "epoch": 0.35440958159779773, "grad_norm": 1.792906894398166, "learning_rate": 8.15489671373731e-06, "loss": 0.5334, "step": 24590 }, { "epoch": 0.354553709121831, "grad_norm": 1.6259054479431023, "learning_rate": 8.152944768818775e-06, "loss": 0.4718, "step": 24600 }, { "epoch": 0.35469783664586424, "grad_norm": 1.4264492980873296, "learning_rate": 8.150992025841849e-06, "loss": 0.4868, "step": 24610 }, { "epoch": 0.35484196416989755, "grad_norm": 1.4091882594865817, "learning_rate": 8.149038485300798e-06, "loss": 0.4784, "step": 24620 }, { "epoch": 0.3549860916939308, "grad_norm": 1.5932629561147376, "learning_rate": 8.147084147690094e-06, "loss": 0.5036, "step": 24630 }, { "epoch": 0.35513021921796406, "grad_norm": 1.8236247985130598, "learning_rate": 8.14512901350441e-06, "loss": 0.4836, "step": 24640 }, { "epoch": 0.3552743467419973, "grad_norm": 1.633054862252565, "learning_rate": 8.143173083238619e-06, "loss": 0.4865, "step": 24650 }, { "epoch": 0.35541847426603057, "grad_norm": 1.5668719899240757, "learning_rate": 8.1412163573878e-06, "loss": 0.5033, "step": 24660 }, { "epoch": 0.3555626017900638, "grad_norm": 1.3776305097379282, "learning_rate": 8.139258836447226e-06, "loss": 0.4976, "step": 24670 }, { "epoch": 0.35570672931409714, "grad_norm": 1.473812613950512, "learning_rate": 8.137300520912377e-06, "loss": 0.4882, "step": 24680 }, { "epoch": 0.3558508568381304, "grad_norm": 1.7107054074247365, "learning_rate": 8.135341411278935e-06, "loss": 0.4801, "step": 24690 }, { "epoch": 0.35599498436216365, "grad_norm": 1.6582784023045565, "learning_rate": 8.133381508042774e-06, "loss": 0.4932, "step": 24700 }, { "epoch": 0.3561391118861969, "grad_norm": 1.4589847480525076, "learning_rate": 8.131420811699984e-06, "loss": 0.4945, "step": 24710 }, { "epoch": 0.35628323941023016, "grad_norm": 1.260414239508003, "learning_rate": 8.129459322746843e-06, "loss": 0.4759, "step": 24720 }, { "epoch": 0.3564273669342634, "grad_norm": 1.4354527046054564, "learning_rate": 8.12749704167983e-06, "loss": 0.4588, "step": 24730 }, { "epoch": 0.3565714944582967, "grad_norm": 1.7427463498436802, "learning_rate": 8.125533968995633e-06, "loss": 0.4672, "step": 24740 }, { "epoch": 0.35671562198233, "grad_norm": 1.545886516967061, "learning_rate": 8.123570105191137e-06, "loss": 0.4827, "step": 24750 }, { "epoch": 0.35685974950636323, "grad_norm": 1.44202873330819, "learning_rate": 8.121605450763422e-06, "loss": 0.4987, "step": 24760 }, { "epoch": 0.3570038770303965, "grad_norm": 1.411928391302774, "learning_rate": 8.119640006209772e-06, "loss": 0.4663, "step": 24770 }, { "epoch": 0.35714800455442974, "grad_norm": 1.7605418496758525, "learning_rate": 8.117673772027675e-06, "loss": 0.5102, "step": 24780 }, { "epoch": 0.357292132078463, "grad_norm": 1.6049361771852, "learning_rate": 8.11570674871481e-06, "loss": 0.5026, "step": 24790 }, { "epoch": 0.3574362596024963, "grad_norm": 1.529792048438458, "learning_rate": 8.113738936769065e-06, "loss": 0.5093, "step": 24800 }, { "epoch": 0.35758038712652956, "grad_norm": 1.4983995646090633, "learning_rate": 8.111770336688517e-06, "loss": 0.4796, "step": 24810 }, { "epoch": 0.3577245146505628, "grad_norm": 2.1589115167290123, "learning_rate": 8.109800948971456e-06, "loss": 0.4849, "step": 24820 }, { "epoch": 0.3578686421745961, "grad_norm": 1.5457829382746124, "learning_rate": 8.10783077411636e-06, "loss": 0.4819, "step": 24830 }, { "epoch": 0.35801276969862933, "grad_norm": 1.3455432000687306, "learning_rate": 8.105859812621911e-06, "loss": 0.4873, "step": 24840 }, { "epoch": 0.3581568972226626, "grad_norm": 1.6766242250626748, "learning_rate": 8.10388806498699e-06, "loss": 0.474, "step": 24850 }, { "epoch": 0.3583010247466959, "grad_norm": 1.2806559386717409, "learning_rate": 8.101915531710676e-06, "loss": 0.4624, "step": 24860 }, { "epoch": 0.35844515227072915, "grad_norm": 1.4256927880118413, "learning_rate": 8.099942213292244e-06, "loss": 0.475, "step": 24870 }, { "epoch": 0.3585892797947624, "grad_norm": 1.5292916333060353, "learning_rate": 8.097968110231177e-06, "loss": 0.5108, "step": 24880 }, { "epoch": 0.35873340731879566, "grad_norm": 1.5194749417367255, "learning_rate": 8.095993223027147e-06, "loss": 0.4849, "step": 24890 }, { "epoch": 0.3588775348428289, "grad_norm": 1.536990029749382, "learning_rate": 8.094017552180028e-06, "loss": 0.4744, "step": 24900 }, { "epoch": 0.3590216623668622, "grad_norm": 1.7745456236890815, "learning_rate": 8.092041098189896e-06, "loss": 0.4693, "step": 24910 }, { "epoch": 0.3591657898908955, "grad_norm": 1.692255373144302, "learning_rate": 8.090063861557016e-06, "loss": 0.4828, "step": 24920 }, { "epoch": 0.35930991741492874, "grad_norm": 1.5940088495672624, "learning_rate": 8.08808584278186e-06, "loss": 0.4961, "step": 24930 }, { "epoch": 0.359454044938962, "grad_norm": 1.4927091660507934, "learning_rate": 8.086107042365094e-06, "loss": 0.5085, "step": 24940 }, { "epoch": 0.35959817246299525, "grad_norm": 1.6038954752029084, "learning_rate": 8.084127460807584e-06, "loss": 0.4903, "step": 24950 }, { "epoch": 0.3597422999870285, "grad_norm": 1.541905605989773, "learning_rate": 8.082147098610392e-06, "loss": 0.4698, "step": 24960 }, { "epoch": 0.3598864275110618, "grad_norm": 1.4760196710391615, "learning_rate": 8.080165956274773e-06, "loss": 0.491, "step": 24970 }, { "epoch": 0.36003055503509507, "grad_norm": 1.5274792292367587, "learning_rate": 8.07818403430219e-06, "loss": 0.4595, "step": 24980 }, { "epoch": 0.3601746825591283, "grad_norm": 1.5387498419722954, "learning_rate": 8.076201333194296e-06, "loss": 0.5174, "step": 24990 }, { "epoch": 0.3603188100831616, "grad_norm": 1.5489611690981733, "learning_rate": 8.074217853452943e-06, "loss": 0.4965, "step": 25000 }, { "epoch": 0.36046293760719483, "grad_norm": 1.8065974145372108, "learning_rate": 8.07223359558018e-06, "loss": 0.4851, "step": 25010 }, { "epoch": 0.3606070651312281, "grad_norm": 1.6850147357661216, "learning_rate": 8.070248560078253e-06, "loss": 0.5041, "step": 25020 }, { "epoch": 0.3607511926552614, "grad_norm": 1.3836316299454294, "learning_rate": 8.068262747449604e-06, "loss": 0.4804, "step": 25030 }, { "epoch": 0.36089532017929465, "grad_norm": 1.296698891807581, "learning_rate": 8.066276158196873e-06, "loss": 0.476, "step": 25040 }, { "epoch": 0.3610394477033279, "grad_norm": 1.613264564074983, "learning_rate": 8.064288792822897e-06, "loss": 0.5063, "step": 25050 }, { "epoch": 0.36118357522736116, "grad_norm": 1.344491719304081, "learning_rate": 8.062300651830705e-06, "loss": 0.4974, "step": 25060 }, { "epoch": 0.3613277027513944, "grad_norm": 1.693718332609815, "learning_rate": 8.06031173572353e-06, "loss": 0.5083, "step": 25070 }, { "epoch": 0.3614718302754277, "grad_norm": 1.3763157257265424, "learning_rate": 8.058322045004794e-06, "loss": 0.4916, "step": 25080 }, { "epoch": 0.361615957799461, "grad_norm": 1.4616778963474357, "learning_rate": 8.056331580178118e-06, "loss": 0.4619, "step": 25090 }, { "epoch": 0.36176008532349424, "grad_norm": 1.6833647780250518, "learning_rate": 8.054340341747323e-06, "loss": 0.495, "step": 25100 }, { "epoch": 0.3619042128475275, "grad_norm": 1.6715558597571394, "learning_rate": 8.052348330216417e-06, "loss": 0.4871, "step": 25110 }, { "epoch": 0.36204834037156075, "grad_norm": 1.4787156819510086, "learning_rate": 8.050355546089612e-06, "loss": 0.4989, "step": 25120 }, { "epoch": 0.362192467895594, "grad_norm": 1.608054702295075, "learning_rate": 8.04836198987131e-06, "loss": 0.4866, "step": 25130 }, { "epoch": 0.36233659541962726, "grad_norm": 1.4369308255397073, "learning_rate": 8.04636766206611e-06, "loss": 0.5009, "step": 25140 }, { "epoch": 0.36248072294366057, "grad_norm": 1.5239944501814942, "learning_rate": 8.04437256317881e-06, "loss": 0.4855, "step": 25150 }, { "epoch": 0.3626248504676938, "grad_norm": 1.3433326081730226, "learning_rate": 8.042376693714396e-06, "loss": 0.462, "step": 25160 }, { "epoch": 0.3627689779917271, "grad_norm": 1.4807948441655712, "learning_rate": 8.040380054178056e-06, "loss": 0.4822, "step": 25170 }, { "epoch": 0.36291310551576034, "grad_norm": 1.4938292702745843, "learning_rate": 8.038382645075168e-06, "loss": 0.4869, "step": 25180 }, { "epoch": 0.3630572330397936, "grad_norm": 1.6418523804255265, "learning_rate": 8.036384466911308e-06, "loss": 0.4721, "step": 25190 }, { "epoch": 0.36320136056382685, "grad_norm": 1.7176341240019941, "learning_rate": 8.034385520192243e-06, "loss": 0.5075, "step": 25200 }, { "epoch": 0.36334548808786016, "grad_norm": 1.449363957666174, "learning_rate": 8.03238580542394e-06, "loss": 0.483, "step": 25210 }, { "epoch": 0.3634896156118934, "grad_norm": 1.4075131713259745, "learning_rate": 8.030385323112555e-06, "loss": 0.4571, "step": 25220 }, { "epoch": 0.36363374313592667, "grad_norm": 1.576136806210559, "learning_rate": 8.028384073764442e-06, "loss": 0.4895, "step": 25230 }, { "epoch": 0.3637778706599599, "grad_norm": 1.7530190500611247, "learning_rate": 8.026382057886148e-06, "loss": 0.4885, "step": 25240 }, { "epoch": 0.3639219981839932, "grad_norm": 1.5176697348961241, "learning_rate": 8.024379275984412e-06, "loss": 0.4974, "step": 25250 }, { "epoch": 0.3640661257080265, "grad_norm": 1.5135962481168748, "learning_rate": 8.02237572856617e-06, "loss": 0.4892, "step": 25260 }, { "epoch": 0.36421025323205974, "grad_norm": 1.5345511521174975, "learning_rate": 8.02037141613855e-06, "loss": 0.5031, "step": 25270 }, { "epoch": 0.364354380756093, "grad_norm": 1.5230446078263133, "learning_rate": 8.018366339208873e-06, "loss": 0.4813, "step": 25280 }, { "epoch": 0.36449850828012625, "grad_norm": 1.5648259741881398, "learning_rate": 8.016360498284657e-06, "loss": 0.495, "step": 25290 }, { "epoch": 0.3646426358041595, "grad_norm": 1.7189342474121951, "learning_rate": 8.014353893873609e-06, "loss": 0.4709, "step": 25300 }, { "epoch": 0.36478676332819276, "grad_norm": 1.548549304139978, "learning_rate": 8.01234652648363e-06, "loss": 0.4701, "step": 25310 }, { "epoch": 0.3649308908522261, "grad_norm": 1.860292005587478, "learning_rate": 8.01033839662282e-06, "loss": 0.4801, "step": 25320 }, { "epoch": 0.36507501837625933, "grad_norm": 1.5769350448226969, "learning_rate": 8.008329504799461e-06, "loss": 0.4924, "step": 25330 }, { "epoch": 0.3652191459002926, "grad_norm": 1.6183658958382332, "learning_rate": 8.006319851522039e-06, "loss": 0.4893, "step": 25340 }, { "epoch": 0.36536327342432584, "grad_norm": 1.5376355070234453, "learning_rate": 8.004309437299226e-06, "loss": 0.4851, "step": 25350 }, { "epoch": 0.3655074009483591, "grad_norm": 1.7845393577360535, "learning_rate": 8.00229826263989e-06, "loss": 0.5067, "step": 25360 }, { "epoch": 0.36565152847239235, "grad_norm": 1.493026242228404, "learning_rate": 8.000286328053088e-06, "loss": 0.4795, "step": 25370 }, { "epoch": 0.36579565599642566, "grad_norm": 1.6514431270082537, "learning_rate": 7.99827363404807e-06, "loss": 0.4864, "step": 25380 }, { "epoch": 0.3659397835204589, "grad_norm": 1.4213122549097665, "learning_rate": 7.996260181134285e-06, "loss": 0.5089, "step": 25390 }, { "epoch": 0.36608391104449217, "grad_norm": 1.5158399558104028, "learning_rate": 7.994245969821364e-06, "loss": 0.4862, "step": 25400 }, { "epoch": 0.3662280385685254, "grad_norm": 1.9431707073803268, "learning_rate": 7.992231000619135e-06, "loss": 0.4874, "step": 25410 }, { "epoch": 0.3663721660925587, "grad_norm": 1.3982000656905602, "learning_rate": 7.990215274037618e-06, "loss": 0.4792, "step": 25420 }, { "epoch": 0.36651629361659194, "grad_norm": 1.4954323594986274, "learning_rate": 7.988198790587024e-06, "loss": 0.4856, "step": 25430 }, { "epoch": 0.36666042114062525, "grad_norm": 1.8763108399287571, "learning_rate": 7.98618155077776e-06, "loss": 0.5045, "step": 25440 }, { "epoch": 0.3668045486646585, "grad_norm": 1.5824331976432573, "learning_rate": 7.984163555120412e-06, "loss": 0.4777, "step": 25450 }, { "epoch": 0.36694867618869176, "grad_norm": 1.3794239922892904, "learning_rate": 7.982144804125771e-06, "loss": 0.4838, "step": 25460 }, { "epoch": 0.367092803712725, "grad_norm": 1.3991436469801284, "learning_rate": 7.980125298304811e-06, "loss": 0.4734, "step": 25470 }, { "epoch": 0.36723693123675827, "grad_norm": 1.495333975316378, "learning_rate": 7.978105038168702e-06, "loss": 0.4746, "step": 25480 }, { "epoch": 0.3673810587607915, "grad_norm": 1.4582339742410622, "learning_rate": 7.976084024228802e-06, "loss": 0.5323, "step": 25490 }, { "epoch": 0.36752518628482483, "grad_norm": 1.6303375886405553, "learning_rate": 7.97406225699666e-06, "loss": 0.4918, "step": 25500 }, { "epoch": 0.3676693138088581, "grad_norm": 1.500617990221579, "learning_rate": 7.972039736984017e-06, "loss": 0.4924, "step": 25510 }, { "epoch": 0.36781344133289134, "grad_norm": 1.5028720331975234, "learning_rate": 7.970016464702804e-06, "loss": 0.4922, "step": 25520 }, { "epoch": 0.3679575688569246, "grad_norm": 1.7198063037278557, "learning_rate": 7.967992440665137e-06, "loss": 0.4626, "step": 25530 }, { "epoch": 0.36810169638095785, "grad_norm": 1.5889456082080922, "learning_rate": 7.965967665383334e-06, "loss": 0.4931, "step": 25540 }, { "epoch": 0.3682458239049911, "grad_norm": 1.6053092944345349, "learning_rate": 7.963942139369893e-06, "loss": 0.4899, "step": 25550 }, { "epoch": 0.3683899514290244, "grad_norm": 1.5655719272736937, "learning_rate": 7.961915863137507e-06, "loss": 0.49, "step": 25560 }, { "epoch": 0.3685340789530577, "grad_norm": 1.2037601410093548, "learning_rate": 7.959888837199056e-06, "loss": 0.4805, "step": 25570 }, { "epoch": 0.36867820647709093, "grad_norm": 1.6571727627268447, "learning_rate": 7.957861062067614e-06, "loss": 0.5123, "step": 25580 }, { "epoch": 0.3688223340011242, "grad_norm": 1.6370037317156387, "learning_rate": 7.955832538256436e-06, "loss": 0.5088, "step": 25590 }, { "epoch": 0.36896646152515744, "grad_norm": 1.4374294447272113, "learning_rate": 7.95380326627898e-06, "loss": 0.497, "step": 25600 }, { "epoch": 0.36911058904919075, "grad_norm": 1.3733551045128674, "learning_rate": 7.951773246648878e-06, "loss": 0.4762, "step": 25610 }, { "epoch": 0.369254716573224, "grad_norm": 1.8003353287534523, "learning_rate": 7.949742479879965e-06, "loss": 0.5296, "step": 25620 }, { "epoch": 0.36939884409725726, "grad_norm": 1.5418097047420116, "learning_rate": 7.947710966486256e-06, "loss": 0.4916, "step": 25630 }, { "epoch": 0.3695429716212905, "grad_norm": 1.5731993903123274, "learning_rate": 7.945678706981957e-06, "loss": 0.4611, "step": 25640 }, { "epoch": 0.36968709914532377, "grad_norm": 1.5625418966605038, "learning_rate": 7.943645701881468e-06, "loss": 0.4928, "step": 25650 }, { "epoch": 0.369831226669357, "grad_norm": 1.4678951095602946, "learning_rate": 7.94161195169937e-06, "loss": 0.4973, "step": 25660 }, { "epoch": 0.36997535419339034, "grad_norm": 1.373389140003975, "learning_rate": 7.939577456950438e-06, "loss": 0.4703, "step": 25670 }, { "epoch": 0.3701194817174236, "grad_norm": 1.48599023325054, "learning_rate": 7.937542218149632e-06, "loss": 0.4629, "step": 25680 }, { "epoch": 0.37026360924145685, "grad_norm": 1.4130945146744438, "learning_rate": 7.935506235812104e-06, "loss": 0.4861, "step": 25690 }, { "epoch": 0.3704077367654901, "grad_norm": 1.5274257942955858, "learning_rate": 7.933469510453189e-06, "loss": 0.4852, "step": 25700 }, { "epoch": 0.37055186428952336, "grad_norm": 1.48562684825372, "learning_rate": 7.931432042588418e-06, "loss": 0.4741, "step": 25710 }, { "epoch": 0.3706959918135566, "grad_norm": 1.4062337175932451, "learning_rate": 7.929393832733501e-06, "loss": 0.4755, "step": 25720 }, { "epoch": 0.3708401193375899, "grad_norm": 1.4662329326585224, "learning_rate": 7.92735488140434e-06, "loss": 0.4779, "step": 25730 }, { "epoch": 0.3709842468616232, "grad_norm": 1.5427589941251851, "learning_rate": 7.925315189117029e-06, "loss": 0.4849, "step": 25740 }, { "epoch": 0.37112837438565643, "grad_norm": 1.6559790407361477, "learning_rate": 7.923274756387841e-06, "loss": 0.514, "step": 25750 }, { "epoch": 0.3712725019096897, "grad_norm": 1.2830244890037992, "learning_rate": 7.921233583733242e-06, "loss": 0.4641, "step": 25760 }, { "epoch": 0.37141662943372294, "grad_norm": 1.6459970054487019, "learning_rate": 7.919191671669884e-06, "loss": 0.5007, "step": 25770 }, { "epoch": 0.3715607569577562, "grad_norm": 1.7750437166496646, "learning_rate": 7.917149020714606e-06, "loss": 0.4922, "step": 25780 }, { "epoch": 0.3717048844817895, "grad_norm": 3.0146054054298776, "learning_rate": 7.915105631384434e-06, "loss": 0.5075, "step": 25790 }, { "epoch": 0.37184901200582277, "grad_norm": 1.5708157121461188, "learning_rate": 7.913061504196583e-06, "loss": 0.4936, "step": 25800 }, { "epoch": 0.371993139529856, "grad_norm": 1.5375335171264515, "learning_rate": 7.911016639668448e-06, "loss": 0.4739, "step": 25810 }, { "epoch": 0.3721372670538893, "grad_norm": 1.4152016787782453, "learning_rate": 7.908971038317618e-06, "loss": 0.4947, "step": 25820 }, { "epoch": 0.37228139457792253, "grad_norm": 1.373921468833761, "learning_rate": 7.906924700661869e-06, "loss": 0.4842, "step": 25830 }, { "epoch": 0.3724255221019558, "grad_norm": 1.4081189248775046, "learning_rate": 7.904877627219156e-06, "loss": 0.4667, "step": 25840 }, { "epoch": 0.3725696496259891, "grad_norm": 1.6944864167485016, "learning_rate": 7.902829818507625e-06, "loss": 0.5112, "step": 25850 }, { "epoch": 0.37271377715002235, "grad_norm": 1.3493461471465067, "learning_rate": 7.900781275045608e-06, "loss": 0.4873, "step": 25860 }, { "epoch": 0.3728579046740556, "grad_norm": 1.7138822140196242, "learning_rate": 7.898731997351625e-06, "loss": 0.5106, "step": 25870 }, { "epoch": 0.37300203219808886, "grad_norm": 1.597744564691175, "learning_rate": 7.896681985944377e-06, "loss": 0.5003, "step": 25880 }, { "epoch": 0.3731461597221221, "grad_norm": 1.2673982545719187, "learning_rate": 7.894631241342753e-06, "loss": 0.4797, "step": 25890 }, { "epoch": 0.3732902872461554, "grad_norm": 1.6479923423247964, "learning_rate": 7.892579764065828e-06, "loss": 0.4733, "step": 25900 }, { "epoch": 0.3734344147701887, "grad_norm": 1.4591425165813925, "learning_rate": 7.890527554632865e-06, "loss": 0.4562, "step": 25910 }, { "epoch": 0.37357854229422194, "grad_norm": 1.5144158513799286, "learning_rate": 7.888474613563304e-06, "loss": 0.4798, "step": 25920 }, { "epoch": 0.3737226698182552, "grad_norm": 1.3535154049706932, "learning_rate": 7.88642094137678e-06, "loss": 0.4913, "step": 25930 }, { "epoch": 0.37386679734228845, "grad_norm": 1.4889332241042446, "learning_rate": 7.884366538593108e-06, "loss": 0.4939, "step": 25940 }, { "epoch": 0.3740109248663217, "grad_norm": 1.6310413674230426, "learning_rate": 7.882311405732288e-06, "loss": 0.4851, "step": 25950 }, { "epoch": 0.37415505239035496, "grad_norm": 1.4900485789371904, "learning_rate": 7.880255543314505e-06, "loss": 0.4728, "step": 25960 }, { "epoch": 0.37429917991438827, "grad_norm": 1.6096983546141939, "learning_rate": 7.87819895186013e-06, "loss": 0.4838, "step": 25970 }, { "epoch": 0.3744433074384215, "grad_norm": 1.5478444041730843, "learning_rate": 7.876141631889717e-06, "loss": 0.4993, "step": 25980 }, { "epoch": 0.3745874349624548, "grad_norm": 1.3740553362794328, "learning_rate": 7.874083583924005e-06, "loss": 0.4773, "step": 25990 }, { "epoch": 0.37473156248648803, "grad_norm": 1.365660534385458, "learning_rate": 7.87202480848392e-06, "loss": 0.4823, "step": 26000 }, { "epoch": 0.3748756900105213, "grad_norm": 2.4015996037604523, "learning_rate": 7.869965306090565e-06, "loss": 0.4674, "step": 26010 }, { "epoch": 0.3750198175345546, "grad_norm": 1.1810496802633508, "learning_rate": 7.867905077265236e-06, "loss": 0.4848, "step": 26020 }, { "epoch": 0.37516394505858786, "grad_norm": 1.4869845790890146, "learning_rate": 7.865844122529403e-06, "loss": 0.4906, "step": 26030 }, { "epoch": 0.3753080725826211, "grad_norm": 1.5616387714043778, "learning_rate": 7.86378244240473e-06, "loss": 0.4975, "step": 26040 }, { "epoch": 0.37545220010665437, "grad_norm": 1.6893140326421165, "learning_rate": 7.861720037413057e-06, "loss": 0.4853, "step": 26050 }, { "epoch": 0.3755963276306876, "grad_norm": 1.666112062239544, "learning_rate": 7.859656908076411e-06, "loss": 0.4905, "step": 26060 }, { "epoch": 0.3757404551547209, "grad_norm": 1.4792661936381828, "learning_rate": 7.857593054917002e-06, "loss": 0.4834, "step": 26070 }, { "epoch": 0.3758845826787542, "grad_norm": 1.4011363045806522, "learning_rate": 7.85552847845722e-06, "loss": 0.4678, "step": 26080 }, { "epoch": 0.37602871020278744, "grad_norm": 1.2838011549128474, "learning_rate": 7.853463179219645e-06, "loss": 0.453, "step": 26090 }, { "epoch": 0.3761728377268207, "grad_norm": 1.5865315365235086, "learning_rate": 7.851397157727033e-06, "loss": 0.4754, "step": 26100 }, { "epoch": 0.37631696525085395, "grad_norm": 1.6950213685937143, "learning_rate": 7.849330414502325e-06, "loss": 0.4885, "step": 26110 }, { "epoch": 0.3764610927748872, "grad_norm": 1.4252678072473222, "learning_rate": 7.847262950068649e-06, "loss": 0.5006, "step": 26120 }, { "epoch": 0.37660522029892046, "grad_norm": 1.3066726437975067, "learning_rate": 7.845194764949306e-06, "loss": 0.4774, "step": 26130 }, { "epoch": 0.3767493478229538, "grad_norm": 1.4236495578085957, "learning_rate": 7.84312585966779e-06, "loss": 0.496, "step": 26140 }, { "epoch": 0.37689347534698703, "grad_norm": 1.3853633731183994, "learning_rate": 7.84105623474777e-06, "loss": 0.4618, "step": 26150 }, { "epoch": 0.3770376028710203, "grad_norm": 1.8378700180674112, "learning_rate": 7.838985890713103e-06, "loss": 0.4955, "step": 26160 }, { "epoch": 0.37718173039505354, "grad_norm": 1.3174391783375368, "learning_rate": 7.836914828087818e-06, "loss": 0.4642, "step": 26170 }, { "epoch": 0.3773258579190868, "grad_norm": 1.7317274506672065, "learning_rate": 7.83484304739614e-06, "loss": 0.5031, "step": 26180 }, { "epoch": 0.37746998544312005, "grad_norm": 1.4937730851205038, "learning_rate": 7.832770549162463e-06, "loss": 0.4731, "step": 26190 }, { "epoch": 0.37761411296715336, "grad_norm": 1.706095290537538, "learning_rate": 7.830697333911368e-06, "loss": 0.4749, "step": 26200 }, { "epoch": 0.3777582404911866, "grad_norm": 1.4394487829870297, "learning_rate": 7.828623402167623e-06, "loss": 0.4752, "step": 26210 }, { "epoch": 0.37790236801521987, "grad_norm": 1.47437850955074, "learning_rate": 7.826548754456164e-06, "loss": 0.472, "step": 26220 }, { "epoch": 0.3780464955392531, "grad_norm": 1.3619704530039556, "learning_rate": 7.824473391302124e-06, "loss": 0.4867, "step": 26230 }, { "epoch": 0.3781906230632864, "grad_norm": 1.3432433778310888, "learning_rate": 7.822397313230802e-06, "loss": 0.4518, "step": 26240 }, { "epoch": 0.37833475058731963, "grad_norm": 1.58190450004438, "learning_rate": 7.820320520767687e-06, "loss": 0.4798, "step": 26250 }, { "epoch": 0.37847887811135295, "grad_norm": 1.4316672240624717, "learning_rate": 7.818243014438451e-06, "loss": 0.4653, "step": 26260 }, { "epoch": 0.3786230056353862, "grad_norm": 1.0611287635502862, "learning_rate": 7.816164794768937e-06, "loss": 0.4893, "step": 26270 }, { "epoch": 0.37876713315941946, "grad_norm": 1.3251196500301095, "learning_rate": 7.814085862285177e-06, "loss": 0.4602, "step": 26280 }, { "epoch": 0.3789112606834527, "grad_norm": 1.2802360374137225, "learning_rate": 7.812006217513379e-06, "loss": 0.4682, "step": 26290 }, { "epoch": 0.37905538820748597, "grad_norm": 2.240521380025867, "learning_rate": 7.809925860979936e-06, "loss": 0.4878, "step": 26300 }, { "epoch": 0.3791995157315192, "grad_norm": 1.3792643013594819, "learning_rate": 7.807844793211412e-06, "loss": 0.4648, "step": 26310 }, { "epoch": 0.37934364325555253, "grad_norm": 1.4307131534649944, "learning_rate": 7.805763014734565e-06, "loss": 0.4795, "step": 26320 }, { "epoch": 0.3794877707795858, "grad_norm": 1.4418940351536635, "learning_rate": 7.803680526076315e-06, "loss": 0.4725, "step": 26330 }, { "epoch": 0.37963189830361904, "grad_norm": 1.6928711927804625, "learning_rate": 7.801597327763782e-06, "loss": 0.4793, "step": 26340 }, { "epoch": 0.3797760258276523, "grad_norm": 1.4057786335891154, "learning_rate": 7.79951342032425e-06, "loss": 0.5044, "step": 26350 }, { "epoch": 0.37992015335168555, "grad_norm": 1.3911092444761117, "learning_rate": 7.797428804285186e-06, "loss": 0.4807, "step": 26360 }, { "epoch": 0.38006428087571886, "grad_norm": 1.7985236915187421, "learning_rate": 7.79534348017424e-06, "loss": 0.4578, "step": 26370 }, { "epoch": 0.3802084083997521, "grad_norm": 1.4794308209846936, "learning_rate": 7.79325744851924e-06, "loss": 0.5009, "step": 26380 }, { "epoch": 0.3803525359237854, "grad_norm": 1.4575722891825873, "learning_rate": 7.791170709848193e-06, "loss": 0.5058, "step": 26390 }, { "epoch": 0.38049666344781863, "grad_norm": 1.57867042196659, "learning_rate": 7.789083264689283e-06, "loss": 0.4843, "step": 26400 }, { "epoch": 0.3806407909718519, "grad_norm": 1.3609503947780195, "learning_rate": 7.786995113570874e-06, "loss": 0.4994, "step": 26410 }, { "epoch": 0.38078491849588514, "grad_norm": 1.6205357857229903, "learning_rate": 7.78490625702151e-06, "loss": 0.4783, "step": 26420 }, { "epoch": 0.38092904601991845, "grad_norm": 1.4577989955441755, "learning_rate": 7.78281669556991e-06, "loss": 0.4658, "step": 26430 }, { "epoch": 0.3810731735439517, "grad_norm": 1.5090330683310205, "learning_rate": 7.780726429744977e-06, "loss": 0.4726, "step": 26440 }, { "epoch": 0.38121730106798496, "grad_norm": 1.5818798594586523, "learning_rate": 7.778635460075789e-06, "loss": 0.5053, "step": 26450 }, { "epoch": 0.3813614285920182, "grad_norm": 1.8012864906274137, "learning_rate": 7.776543787091599e-06, "loss": 0.4594, "step": 26460 }, { "epoch": 0.38150555611605147, "grad_norm": 1.6897263869298254, "learning_rate": 7.774451411321844e-06, "loss": 0.505, "step": 26470 }, { "epoch": 0.3816496836400847, "grad_norm": 1.5776527198802412, "learning_rate": 7.772358333296137e-06, "loss": 0.4665, "step": 26480 }, { "epoch": 0.38179381116411804, "grad_norm": 1.671305185220988, "learning_rate": 7.770264553544267e-06, "loss": 0.5046, "step": 26490 }, { "epoch": 0.3819379386881513, "grad_norm": 1.3762253287133717, "learning_rate": 7.7681700725962e-06, "loss": 0.4613, "step": 26500 }, { "epoch": 0.38208206621218455, "grad_norm": 1.6086857049651064, "learning_rate": 7.766074890982082e-06, "loss": 0.4654, "step": 26510 }, { "epoch": 0.3822261937362178, "grad_norm": 1.8202829679550678, "learning_rate": 7.763979009232237e-06, "loss": 0.5216, "step": 26520 }, { "epoch": 0.38237032126025106, "grad_norm": 1.3191347986995008, "learning_rate": 7.761882427877162e-06, "loss": 0.4713, "step": 26530 }, { "epoch": 0.3825144487842843, "grad_norm": 1.4593488130032366, "learning_rate": 7.759785147447535e-06, "loss": 0.4979, "step": 26540 }, { "epoch": 0.3826585763083176, "grad_norm": 1.5469662434534444, "learning_rate": 7.757687168474211e-06, "loss": 0.5123, "step": 26550 }, { "epoch": 0.3828027038323509, "grad_norm": 1.668828198225028, "learning_rate": 7.755588491488218e-06, "loss": 0.5155, "step": 26560 }, { "epoch": 0.38294683135638413, "grad_norm": 1.5005913740978916, "learning_rate": 7.753489117020766e-06, "loss": 0.49, "step": 26570 }, { "epoch": 0.3830909588804174, "grad_norm": 1.5316843831221507, "learning_rate": 7.751389045603234e-06, "loss": 0.5272, "step": 26580 }, { "epoch": 0.38323508640445064, "grad_norm": 1.35703038324906, "learning_rate": 7.749288277767187e-06, "loss": 0.5064, "step": 26590 }, { "epoch": 0.3833792139284839, "grad_norm": 1.6769215588449824, "learning_rate": 7.747186814044359e-06, "loss": 0.4799, "step": 26600 }, { "epoch": 0.3835233414525172, "grad_norm": 1.4650625360393614, "learning_rate": 7.745084654966664e-06, "loss": 0.4512, "step": 26610 }, { "epoch": 0.38366746897655046, "grad_norm": 1.6916551009826206, "learning_rate": 7.742981801066187e-06, "loss": 0.4771, "step": 26620 }, { "epoch": 0.3838115965005837, "grad_norm": 1.550872639696266, "learning_rate": 7.740878252875199e-06, "loss": 0.5051, "step": 26630 }, { "epoch": 0.383955724024617, "grad_norm": 1.6381471635925606, "learning_rate": 7.73877401092613e-06, "loss": 0.495, "step": 26640 }, { "epoch": 0.38409985154865023, "grad_norm": 1.649767077474349, "learning_rate": 7.736669075751607e-06, "loss": 0.4762, "step": 26650 }, { "epoch": 0.3842439790726835, "grad_norm": 1.7323066752067564, "learning_rate": 7.734563447884413e-06, "loss": 0.5077, "step": 26660 }, { "epoch": 0.3843881065967168, "grad_norm": 1.6379689538286784, "learning_rate": 7.73245712785752e-06, "loss": 0.475, "step": 26670 }, { "epoch": 0.38453223412075005, "grad_norm": 1.4624995781173167, "learning_rate": 7.730350116204065e-06, "loss": 0.4417, "step": 26680 }, { "epoch": 0.3846763616447833, "grad_norm": 1.2952589900044484, "learning_rate": 7.728242413457368e-06, "loss": 0.4768, "step": 26690 }, { "epoch": 0.38482048916881656, "grad_norm": 1.5846480140812857, "learning_rate": 7.726134020150917e-06, "loss": 0.4721, "step": 26700 }, { "epoch": 0.3849646166928498, "grad_norm": 1.502426521053713, "learning_rate": 7.724024936818384e-06, "loss": 0.4849, "step": 26710 }, { "epoch": 0.3851087442168831, "grad_norm": 1.430160508894515, "learning_rate": 7.721915163993606e-06, "loss": 0.4621, "step": 26720 }, { "epoch": 0.3852528717409164, "grad_norm": 1.3436893110449608, "learning_rate": 7.719804702210601e-06, "loss": 0.4916, "step": 26730 }, { "epoch": 0.38539699926494964, "grad_norm": 1.3264053277774412, "learning_rate": 7.717693552003559e-06, "loss": 0.4976, "step": 26740 }, { "epoch": 0.3855411267889829, "grad_norm": 1.5225606890982488, "learning_rate": 7.71558171390684e-06, "loss": 0.4887, "step": 26750 }, { "epoch": 0.38568525431301615, "grad_norm": 1.384122452851286, "learning_rate": 7.713469188454985e-06, "loss": 0.457, "step": 26760 }, { "epoch": 0.3858293818370494, "grad_norm": 1.5085069552694588, "learning_rate": 7.711355976182711e-06, "loss": 0.5065, "step": 26770 }, { "epoch": 0.3859735093610827, "grad_norm": 1.7844202762832573, "learning_rate": 7.709242077624898e-06, "loss": 0.4945, "step": 26780 }, { "epoch": 0.38611763688511597, "grad_norm": 1.4673527935820696, "learning_rate": 7.707127493316608e-06, "loss": 0.4854, "step": 26790 }, { "epoch": 0.3862617644091492, "grad_norm": 1.7482786948708746, "learning_rate": 7.705012223793075e-06, "loss": 0.508, "step": 26800 }, { "epoch": 0.3864058919331825, "grad_norm": 1.323900044290503, "learning_rate": 7.702896269589705e-06, "loss": 0.475, "step": 26810 }, { "epoch": 0.38655001945721573, "grad_norm": 1.3627963057195571, "learning_rate": 7.700779631242082e-06, "loss": 0.474, "step": 26820 }, { "epoch": 0.386694146981249, "grad_norm": 1.3143422162324865, "learning_rate": 7.698662309285955e-06, "loss": 0.4563, "step": 26830 }, { "epoch": 0.3868382745052823, "grad_norm": 1.9041128970552208, "learning_rate": 7.696544304257252e-06, "loss": 0.4788, "step": 26840 }, { "epoch": 0.38698240202931555, "grad_norm": 1.6139221139081616, "learning_rate": 7.694425616692072e-06, "loss": 0.5141, "step": 26850 }, { "epoch": 0.3871265295533488, "grad_norm": 1.5577472773092271, "learning_rate": 7.692306247126686e-06, "loss": 0.5084, "step": 26860 }, { "epoch": 0.38727065707738206, "grad_norm": 1.500092001852779, "learning_rate": 7.690186196097543e-06, "loss": 0.4937, "step": 26870 }, { "epoch": 0.3874147846014153, "grad_norm": 1.433199824321318, "learning_rate": 7.688065464141256e-06, "loss": 0.461, "step": 26880 }, { "epoch": 0.3875589121254486, "grad_norm": 1.590105449510327, "learning_rate": 7.685944051794615e-06, "loss": 0.481, "step": 26890 }, { "epoch": 0.3877030396494819, "grad_norm": 1.7076203682796312, "learning_rate": 7.683821959594585e-06, "loss": 0.5238, "step": 26900 }, { "epoch": 0.38784716717351514, "grad_norm": 1.6079293826976706, "learning_rate": 7.681699188078296e-06, "loss": 0.513, "step": 26910 }, { "epoch": 0.3879912946975484, "grad_norm": 1.388472822029969, "learning_rate": 7.679575737783056e-06, "loss": 0.485, "step": 26920 }, { "epoch": 0.38813542222158165, "grad_norm": 1.2448163751406245, "learning_rate": 7.677451609246343e-06, "loss": 0.5065, "step": 26930 }, { "epoch": 0.3882795497456149, "grad_norm": 5.334839181548182, "learning_rate": 7.675326803005805e-06, "loss": 0.4855, "step": 26940 }, { "epoch": 0.38842367726964816, "grad_norm": 1.4847117149096256, "learning_rate": 7.673201319599262e-06, "loss": 0.4696, "step": 26950 }, { "epoch": 0.38856780479368147, "grad_norm": 1.5361356832246962, "learning_rate": 7.671075159564709e-06, "loss": 0.4851, "step": 26960 }, { "epoch": 0.3887119323177147, "grad_norm": 1.6544874107809315, "learning_rate": 7.668948323440309e-06, "loss": 0.4715, "step": 26970 }, { "epoch": 0.388856059841748, "grad_norm": 1.6106422227902155, "learning_rate": 7.666820811764395e-06, "loss": 0.5084, "step": 26980 }, { "epoch": 0.38900018736578124, "grad_norm": 1.5326112464806922, "learning_rate": 7.664692625075473e-06, "loss": 0.4686, "step": 26990 }, { "epoch": 0.3891443148898145, "grad_norm": 1.5208700500045533, "learning_rate": 7.662563763912219e-06, "loss": 0.4996, "step": 27000 }, { "epoch": 0.38928844241384775, "grad_norm": 1.5506355801256382, "learning_rate": 7.660434228813484e-06, "loss": 0.4595, "step": 27010 }, { "epoch": 0.38943256993788106, "grad_norm": 1.396048622604807, "learning_rate": 7.658304020318281e-06, "loss": 0.465, "step": 27020 }, { "epoch": 0.3895766974619143, "grad_norm": 1.667529912080469, "learning_rate": 7.6561731389658e-06, "loss": 0.481, "step": 27030 }, { "epoch": 0.38972082498594757, "grad_norm": 1.6362005017595977, "learning_rate": 7.6540415852954e-06, "loss": 0.4807, "step": 27040 }, { "epoch": 0.3898649525099808, "grad_norm": 1.775076265798186, "learning_rate": 7.65190935984661e-06, "loss": 0.4788, "step": 27050 }, { "epoch": 0.3900090800340141, "grad_norm": 1.4734417509868816, "learning_rate": 7.649776463159129e-06, "loss": 0.472, "step": 27060 }, { "epoch": 0.3901532075580474, "grad_norm": 1.6317696908151291, "learning_rate": 7.647642895772825e-06, "loss": 0.517, "step": 27070 }, { "epoch": 0.39029733508208064, "grad_norm": 1.6471052930283885, "learning_rate": 7.645508658227733e-06, "loss": 0.4926, "step": 27080 }, { "epoch": 0.3904414626061139, "grad_norm": 1.5345403506349653, "learning_rate": 7.643373751064067e-06, "loss": 0.4748, "step": 27090 }, { "epoch": 0.39058559013014715, "grad_norm": 1.4932581603784245, "learning_rate": 7.641238174822203e-06, "loss": 0.5022, "step": 27100 }, { "epoch": 0.3907297176541804, "grad_norm": 1.8414235348778478, "learning_rate": 7.639101930042687e-06, "loss": 0.4959, "step": 27110 }, { "epoch": 0.39087384517821366, "grad_norm": 1.883824778700384, "learning_rate": 7.636965017266232e-06, "loss": 0.4866, "step": 27120 }, { "epoch": 0.391017972702247, "grad_norm": 1.798261441434388, "learning_rate": 7.634827437033728e-06, "loss": 0.4516, "step": 27130 }, { "epoch": 0.39116210022628023, "grad_norm": 1.4501332628216197, "learning_rate": 7.632689189886227e-06, "loss": 0.492, "step": 27140 }, { "epoch": 0.3913062277503135, "grad_norm": 1.861186385196663, "learning_rate": 7.630550276364952e-06, "loss": 0.4848, "step": 27150 }, { "epoch": 0.39145035527434674, "grad_norm": 1.3204865875622889, "learning_rate": 7.628410697011297e-06, "loss": 0.4693, "step": 27160 }, { "epoch": 0.39159448279838, "grad_norm": 1.5107201479001287, "learning_rate": 7.62627045236682e-06, "loss": 0.481, "step": 27170 }, { "epoch": 0.39173861032241325, "grad_norm": 1.4941171429396074, "learning_rate": 7.624129542973248e-06, "loss": 0.4335, "step": 27180 }, { "epoch": 0.39188273784644656, "grad_norm": 1.4345342294068204, "learning_rate": 7.621987969372479e-06, "loss": 0.4658, "step": 27190 }, { "epoch": 0.3920268653704798, "grad_norm": 1.637159624004935, "learning_rate": 7.6198457321065785e-06, "loss": 0.4858, "step": 27200 }, { "epoch": 0.39217099289451307, "grad_norm": 1.5059729759929146, "learning_rate": 7.617702831717781e-06, "loss": 0.4616, "step": 27210 }, { "epoch": 0.3923151204185463, "grad_norm": 1.3201688491532273, "learning_rate": 7.6155592687484845e-06, "loss": 0.481, "step": 27220 }, { "epoch": 0.3924592479425796, "grad_norm": 1.2787133222231777, "learning_rate": 7.613415043741259e-06, "loss": 0.4783, "step": 27230 }, { "epoch": 0.39260337546661284, "grad_norm": 1.4511124087790779, "learning_rate": 7.61127015723884e-06, "loss": 0.5014, "step": 27240 }, { "epoch": 0.39274750299064615, "grad_norm": 1.6181906846029463, "learning_rate": 7.609124609784131e-06, "loss": 0.4728, "step": 27250 }, { "epoch": 0.3928916305146794, "grad_norm": 1.7399738807913292, "learning_rate": 7.606978401920202e-06, "loss": 0.5078, "step": 27260 }, { "epoch": 0.39303575803871266, "grad_norm": 1.5838961120294863, "learning_rate": 7.604831534190293e-06, "loss": 0.4993, "step": 27270 }, { "epoch": 0.3931798855627459, "grad_norm": 1.4829582754074624, "learning_rate": 7.602684007137808e-06, "loss": 0.4769, "step": 27280 }, { "epoch": 0.39332401308677917, "grad_norm": 1.5651490578883602, "learning_rate": 7.600535821306318e-06, "loss": 0.4901, "step": 27290 }, { "epoch": 0.3934681406108124, "grad_norm": 1.669549005838766, "learning_rate": 7.5983869772395626e-06, "loss": 0.4938, "step": 27300 }, { "epoch": 0.39361226813484573, "grad_norm": 1.4798685395111868, "learning_rate": 7.596237475481445e-06, "loss": 0.4745, "step": 27310 }, { "epoch": 0.393756395658879, "grad_norm": 1.569659573691684, "learning_rate": 7.59408731657604e-06, "loss": 0.5098, "step": 27320 }, { "epoch": 0.39390052318291224, "grad_norm": 1.4627987280157346, "learning_rate": 7.5919365010675825e-06, "loss": 0.4854, "step": 27330 }, { "epoch": 0.3940446507069455, "grad_norm": 1.5809932047542221, "learning_rate": 7.589785029500479e-06, "loss": 0.4779, "step": 27340 }, { "epoch": 0.39418877823097875, "grad_norm": 1.4762447566124255, "learning_rate": 7.587632902419301e-06, "loss": 0.4711, "step": 27350 }, { "epoch": 0.394332905755012, "grad_norm": 1.4430563223297892, "learning_rate": 7.585480120368779e-06, "loss": 0.4744, "step": 27360 }, { "epoch": 0.3944770332790453, "grad_norm": 1.3117377694753594, "learning_rate": 7.5833266838938216e-06, "loss": 0.5086, "step": 27370 }, { "epoch": 0.3946211608030786, "grad_norm": 1.6310734538814788, "learning_rate": 7.581172593539492e-06, "loss": 0.496, "step": 27380 }, { "epoch": 0.39476528832711183, "grad_norm": 1.570708335020682, "learning_rate": 7.579017849851025e-06, "loss": 0.4754, "step": 27390 }, { "epoch": 0.3949094158511451, "grad_norm": 1.5931943705513671, "learning_rate": 7.576862453373819e-06, "loss": 0.4843, "step": 27400 }, { "epoch": 0.39505354337517834, "grad_norm": 1.55138011684392, "learning_rate": 7.5747064046534355e-06, "loss": 0.4916, "step": 27410 }, { "epoch": 0.3951976708992116, "grad_norm": 1.5230417904806248, "learning_rate": 7.572549704235608e-06, "loss": 0.4882, "step": 27420 }, { "epoch": 0.3953417984232449, "grad_norm": 1.4377419057475567, "learning_rate": 7.570392352666227e-06, "loss": 0.4731, "step": 27430 }, { "epoch": 0.39548592594727816, "grad_norm": 1.4394223201211218, "learning_rate": 7.56823435049135e-06, "loss": 0.4917, "step": 27440 }, { "epoch": 0.3956300534713114, "grad_norm": 1.7103572361464354, "learning_rate": 7.566075698257205e-06, "loss": 0.4838, "step": 27450 }, { "epoch": 0.39577418099534467, "grad_norm": 1.5912253438489292, "learning_rate": 7.563916396510173e-06, "loss": 0.4714, "step": 27460 }, { "epoch": 0.3959183085193779, "grad_norm": 1.9935352966295388, "learning_rate": 7.561756445796812e-06, "loss": 0.517, "step": 27470 }, { "epoch": 0.39606243604341124, "grad_norm": 1.530580558219414, "learning_rate": 7.559595846663837e-06, "loss": 0.4761, "step": 27480 }, { "epoch": 0.3962065635674445, "grad_norm": 1.538969268443982, "learning_rate": 7.5574345996581265e-06, "loss": 0.4584, "step": 27490 }, { "epoch": 0.39635069109147775, "grad_norm": 1.5611866499803, "learning_rate": 7.5552727053267284e-06, "loss": 0.4796, "step": 27500 }, { "epoch": 0.396494818615511, "grad_norm": 1.541685386180318, "learning_rate": 7.553110164216848e-06, "loss": 0.4629, "step": 27510 }, { "epoch": 0.39663894613954426, "grad_norm": 1.521529755267886, "learning_rate": 7.550946976875857e-06, "loss": 0.4697, "step": 27520 }, { "epoch": 0.3967830736635775, "grad_norm": 1.5497847506717708, "learning_rate": 7.548783143851295e-06, "loss": 0.4909, "step": 27530 }, { "epoch": 0.3969272011876108, "grad_norm": 1.2137373892518666, "learning_rate": 7.546618665690857e-06, "loss": 0.4724, "step": 27540 }, { "epoch": 0.3970713287116441, "grad_norm": 1.759997378211116, "learning_rate": 7.5444535429424095e-06, "loss": 0.4734, "step": 27550 }, { "epoch": 0.39721545623567733, "grad_norm": 1.6477473156244742, "learning_rate": 7.542287776153975e-06, "loss": 0.4753, "step": 27560 }, { "epoch": 0.3973595837597106, "grad_norm": 2.024312283579598, "learning_rate": 7.540121365873743e-06, "loss": 0.4974, "step": 27570 }, { "epoch": 0.39750371128374384, "grad_norm": 1.7200818914399203, "learning_rate": 7.537954312650065e-06, "loss": 0.5026, "step": 27580 }, { "epoch": 0.3976478388077771, "grad_norm": 1.5218109640001656, "learning_rate": 7.535786617031453e-06, "loss": 0.4824, "step": 27590 }, { "epoch": 0.3977919663318104, "grad_norm": 1.742688927763601, "learning_rate": 7.533618279566588e-06, "loss": 0.4709, "step": 27600 }, { "epoch": 0.39793609385584366, "grad_norm": 1.5954236452238533, "learning_rate": 7.531449300804308e-06, "loss": 0.449, "step": 27610 }, { "epoch": 0.3980802213798769, "grad_norm": 1.6257656343386688, "learning_rate": 7.529279681293613e-06, "loss": 0.4705, "step": 27620 }, { "epoch": 0.3982243489039102, "grad_norm": 1.465125519630452, "learning_rate": 7.527109421583667e-06, "loss": 0.4769, "step": 27630 }, { "epoch": 0.39836847642794343, "grad_norm": 1.1935260452609295, "learning_rate": 7.524938522223797e-06, "loss": 0.4721, "step": 27640 }, { "epoch": 0.3985126039519767, "grad_norm": 1.649714302854257, "learning_rate": 7.52276698376349e-06, "loss": 0.4878, "step": 27650 }, { "epoch": 0.39865673147601, "grad_norm": 1.680094255161812, "learning_rate": 7.520594806752397e-06, "loss": 0.4548, "step": 27660 }, { "epoch": 0.39880085900004325, "grad_norm": 1.3210519480794545, "learning_rate": 7.518421991740326e-06, "loss": 0.4969, "step": 27670 }, { "epoch": 0.3989449865240765, "grad_norm": 1.2914615049282216, "learning_rate": 7.516248539277251e-06, "loss": 0.5007, "step": 27680 }, { "epoch": 0.39908911404810976, "grad_norm": 1.4835235892995826, "learning_rate": 7.514074449913307e-06, "loss": 0.4821, "step": 27690 }, { "epoch": 0.399233241572143, "grad_norm": 1.5262661522844514, "learning_rate": 7.511899724198789e-06, "loss": 0.4863, "step": 27700 }, { "epoch": 0.39937736909617627, "grad_norm": 1.7360140181412367, "learning_rate": 7.509724362684153e-06, "loss": 0.4904, "step": 27710 }, { "epoch": 0.3995214966202096, "grad_norm": 1.446325291976817, "learning_rate": 7.5075483659200155e-06, "loss": 0.4633, "step": 27720 }, { "epoch": 0.39966562414424284, "grad_norm": 1.3400269836472645, "learning_rate": 7.505371734457154e-06, "loss": 0.5054, "step": 27730 }, { "epoch": 0.3998097516682761, "grad_norm": 1.3965478746678262, "learning_rate": 7.50319446884651e-06, "loss": 0.4764, "step": 27740 }, { "epoch": 0.39995387919230935, "grad_norm": 1.5412482415904045, "learning_rate": 7.501016569639178e-06, "loss": 0.489, "step": 27750 }, { "epoch": 0.4000980067163426, "grad_norm": 1.3941962066433073, "learning_rate": 7.498838037386422e-06, "loss": 0.5002, "step": 27760 }, { "epoch": 0.40024213424037586, "grad_norm": 1.3585762203577971, "learning_rate": 7.496658872639659e-06, "loss": 0.4763, "step": 27770 }, { "epoch": 0.40038626176440917, "grad_norm": 1.6452634527277037, "learning_rate": 7.49447907595047e-06, "loss": 0.4863, "step": 27780 }, { "epoch": 0.4005303892884424, "grad_norm": 1.4487743776346376, "learning_rate": 7.492298647870595e-06, "loss": 0.4801, "step": 27790 }, { "epoch": 0.4006745168124757, "grad_norm": 1.9287696247267396, "learning_rate": 7.490117588951931e-06, "loss": 0.509, "step": 27800 }, { "epoch": 0.40081864433650893, "grad_norm": 1.3022623216119986, "learning_rate": 7.487935899746542e-06, "loss": 0.4536, "step": 27810 }, { "epoch": 0.4009627718605422, "grad_norm": 1.473814308584845, "learning_rate": 7.4857535808066415e-06, "loss": 0.4745, "step": 27820 }, { "epoch": 0.4011068993845755, "grad_norm": 1.3447784548805681, "learning_rate": 7.483570632684611e-06, "loss": 0.5118, "step": 27830 }, { "epoch": 0.40125102690860875, "grad_norm": 1.5499410301979024, "learning_rate": 7.481387055932989e-06, "loss": 0.4932, "step": 27840 }, { "epoch": 0.401395154432642, "grad_norm": 1.3455161662835249, "learning_rate": 7.479202851104468e-06, "loss": 0.4932, "step": 27850 }, { "epoch": 0.40153928195667526, "grad_norm": 1.431757514191807, "learning_rate": 7.477018018751906e-06, "loss": 0.4596, "step": 27860 }, { "epoch": 0.4016834094807085, "grad_norm": 1.7020686965678196, "learning_rate": 7.474832559428319e-06, "loss": 0.4873, "step": 27870 }, { "epoch": 0.4018275370047418, "grad_norm": 1.5350979331818124, "learning_rate": 7.4726464736868765e-06, "loss": 0.4766, "step": 27880 }, { "epoch": 0.4019716645287751, "grad_norm": 1.5823413208997088, "learning_rate": 7.470459762080913e-06, "loss": 0.4761, "step": 27890 }, { "epoch": 0.40211579205280834, "grad_norm": 1.6125605085926638, "learning_rate": 7.468272425163917e-06, "loss": 0.4805, "step": 27900 }, { "epoch": 0.4022599195768416, "grad_norm": 1.5177279721884018, "learning_rate": 7.466084463489537e-06, "loss": 0.4468, "step": 27910 }, { "epoch": 0.40240404710087485, "grad_norm": 1.2701072898648875, "learning_rate": 7.46389587761158e-06, "loss": 0.4614, "step": 27920 }, { "epoch": 0.4025481746249081, "grad_norm": 2.432668238145178, "learning_rate": 7.46170666808401e-06, "loss": 0.4722, "step": 27930 }, { "epoch": 0.40269230214894136, "grad_norm": 1.581882586781673, "learning_rate": 7.459516835460951e-06, "loss": 0.4778, "step": 27940 }, { "epoch": 0.40283642967297467, "grad_norm": 1.5344699708127674, "learning_rate": 7.457326380296681e-06, "loss": 0.49, "step": 27950 }, { "epoch": 0.4029805571970079, "grad_norm": 1.3189790558215773, "learning_rate": 7.455135303145638e-06, "loss": 0.4936, "step": 27960 }, { "epoch": 0.4031246847210412, "grad_norm": 1.3563745693521765, "learning_rate": 7.452943604562415e-06, "loss": 0.509, "step": 27970 }, { "epoch": 0.40326881224507444, "grad_norm": 2.046308414137757, "learning_rate": 7.4507512851017686e-06, "loss": 0.466, "step": 27980 }, { "epoch": 0.4034129397691077, "grad_norm": 1.704990135422287, "learning_rate": 7.448558345318605e-06, "loss": 0.507, "step": 27990 }, { "epoch": 0.40355706729314095, "grad_norm": 1.5224626064981979, "learning_rate": 7.446364785767995e-06, "loss": 0.4936, "step": 28000 }, { "epoch": 0.40370119481717426, "grad_norm": 1.5958101880180098, "learning_rate": 7.444170607005155e-06, "loss": 0.4665, "step": 28010 }, { "epoch": 0.4038453223412075, "grad_norm": 1.4464831740681818, "learning_rate": 7.44197580958547e-06, "loss": 0.476, "step": 28020 }, { "epoch": 0.40398944986524077, "grad_norm": 1.6055784755705182, "learning_rate": 7.439780394064476e-06, "loss": 0.4737, "step": 28030 }, { "epoch": 0.404133577389274, "grad_norm": 1.6464350511399506, "learning_rate": 7.4375843609978656e-06, "loss": 0.4786, "step": 28040 }, { "epoch": 0.4042777049133073, "grad_norm": 1.4707392369224757, "learning_rate": 7.4353877109414895e-06, "loss": 0.4665, "step": 28050 }, { "epoch": 0.40442183243734053, "grad_norm": 1.568586202604824, "learning_rate": 7.4331904444513516e-06, "loss": 0.4882, "step": 28060 }, { "epoch": 0.40456595996137384, "grad_norm": 2.150436623726638, "learning_rate": 7.4309925620836144e-06, "loss": 0.4899, "step": 28070 }, { "epoch": 0.4047100874854071, "grad_norm": 1.4818862944346376, "learning_rate": 7.428794064394593e-06, "loss": 0.5006, "step": 28080 }, { "epoch": 0.40485421500944035, "grad_norm": 1.376924400776098, "learning_rate": 7.426594951940766e-06, "loss": 0.4432, "step": 28090 }, { "epoch": 0.4049983425334736, "grad_norm": 1.7721783647199114, "learning_rate": 7.424395225278758e-06, "loss": 0.476, "step": 28100 }, { "epoch": 0.40514247005750686, "grad_norm": 1.285671442575613, "learning_rate": 7.4221948849653545e-06, "loss": 0.4508, "step": 28110 }, { "epoch": 0.4052865975815401, "grad_norm": 1.570330040261905, "learning_rate": 7.419993931557495e-06, "loss": 0.4974, "step": 28120 }, { "epoch": 0.40543072510557343, "grad_norm": 1.4013072857213937, "learning_rate": 7.417792365612275e-06, "loss": 0.4729, "step": 28130 }, { "epoch": 0.4055748526296067, "grad_norm": 1.495982267026689, "learning_rate": 7.415590187686943e-06, "loss": 0.4653, "step": 28140 }, { "epoch": 0.40571898015363994, "grad_norm": 1.628081483926935, "learning_rate": 7.4133873983389075e-06, "loss": 0.4671, "step": 28150 }, { "epoch": 0.4058631076776732, "grad_norm": 1.5436326331441215, "learning_rate": 7.411183998125723e-06, "loss": 0.4753, "step": 28160 }, { "epoch": 0.40600723520170645, "grad_norm": 1.2302059145037711, "learning_rate": 7.408979987605106e-06, "loss": 0.4587, "step": 28170 }, { "epoch": 0.40615136272573976, "grad_norm": 1.6410655983315257, "learning_rate": 7.406775367334926e-06, "loss": 0.4815, "step": 28180 }, { "epoch": 0.406295490249773, "grad_norm": 1.506075332585115, "learning_rate": 7.404570137873205e-06, "loss": 0.4771, "step": 28190 }, { "epoch": 0.40643961777380627, "grad_norm": 1.6420067584440812, "learning_rate": 7.40236429977812e-06, "loss": 0.4702, "step": 28200 }, { "epoch": 0.4065837452978395, "grad_norm": 1.3485736879757406, "learning_rate": 7.400157853608005e-06, "loss": 0.4738, "step": 28210 }, { "epoch": 0.4067278728218728, "grad_norm": 1.2579814210508746, "learning_rate": 7.397950799921341e-06, "loss": 0.4711, "step": 28220 }, { "epoch": 0.40687200034590604, "grad_norm": 1.5152094098471758, "learning_rate": 7.39574313927677e-06, "loss": 0.4646, "step": 28230 }, { "epoch": 0.40701612786993935, "grad_norm": 1.5631487946987783, "learning_rate": 7.393534872233081e-06, "loss": 0.4463, "step": 28240 }, { "epoch": 0.4071602553939726, "grad_norm": 1.5766138806772447, "learning_rate": 7.391325999349224e-06, "loss": 0.5002, "step": 28250 }, { "epoch": 0.40730438291800586, "grad_norm": 1.633205081867234, "learning_rate": 7.389116521184296e-06, "loss": 0.506, "step": 28260 }, { "epoch": 0.4074485104420391, "grad_norm": 1.5851143022447953, "learning_rate": 7.386906438297552e-06, "loss": 0.5086, "step": 28270 }, { "epoch": 0.40759263796607237, "grad_norm": 1.8060462870393406, "learning_rate": 7.384695751248394e-06, "loss": 0.4667, "step": 28280 }, { "epoch": 0.4077367654901056, "grad_norm": 1.1364032864428006, "learning_rate": 7.382484460596385e-06, "loss": 0.5006, "step": 28290 }, { "epoch": 0.40788089301413893, "grad_norm": 1.6844832086544799, "learning_rate": 7.380272566901232e-06, "loss": 0.4857, "step": 28300 }, { "epoch": 0.4080250205381722, "grad_norm": 1.534240810481546, "learning_rate": 7.378060070722804e-06, "loss": 0.4369, "step": 28310 }, { "epoch": 0.40816914806220544, "grad_norm": 1.7411948847875212, "learning_rate": 7.375846972621113e-06, "loss": 0.5053, "step": 28320 }, { "epoch": 0.4083132755862387, "grad_norm": 1.5891666964901754, "learning_rate": 7.373633273156328e-06, "loss": 0.5139, "step": 28330 }, { "epoch": 0.40845740311027195, "grad_norm": 1.8821458365877342, "learning_rate": 7.371418972888774e-06, "loss": 0.5027, "step": 28340 }, { "epoch": 0.4086015306343052, "grad_norm": 1.5533152733450541, "learning_rate": 7.36920407237892e-06, "loss": 0.476, "step": 28350 }, { "epoch": 0.4087456581583385, "grad_norm": 1.5149245706903247, "learning_rate": 7.366988572187394e-06, "loss": 0.4622, "step": 28360 }, { "epoch": 0.4088897856823718, "grad_norm": 1.447719790751373, "learning_rate": 7.36477247287497e-06, "loss": 0.4972, "step": 28370 }, { "epoch": 0.40903391320640503, "grad_norm": 1.526861509989161, "learning_rate": 7.36255577500258e-06, "loss": 0.4665, "step": 28380 }, { "epoch": 0.4091780407304383, "grad_norm": 1.5916079528785907, "learning_rate": 7.360338479131303e-06, "loss": 0.4854, "step": 28390 }, { "epoch": 0.40932216825447154, "grad_norm": 1.5067525012343241, "learning_rate": 7.358120585822368e-06, "loss": 0.5078, "step": 28400 }, { "epoch": 0.4094662957785048, "grad_norm": 1.3188573772855587, "learning_rate": 7.35590209563716e-06, "loss": 0.4816, "step": 28410 }, { "epoch": 0.4096104233025381, "grad_norm": 1.6635197351129376, "learning_rate": 7.353683009137211e-06, "loss": 0.5184, "step": 28420 }, { "epoch": 0.40975455082657136, "grad_norm": 1.804492257040842, "learning_rate": 7.3514633268842075e-06, "loss": 0.47, "step": 28430 }, { "epoch": 0.4098986783506046, "grad_norm": 1.50687940551377, "learning_rate": 7.349243049439985e-06, "loss": 0.4582, "step": 28440 }, { "epoch": 0.4100428058746379, "grad_norm": 1.5478609655192468, "learning_rate": 7.347022177366526e-06, "loss": 0.4852, "step": 28450 }, { "epoch": 0.4101869333986711, "grad_norm": 1.4733237787873728, "learning_rate": 7.3448007112259716e-06, "loss": 0.475, "step": 28460 }, { "epoch": 0.4103310609227044, "grad_norm": 1.2004974409028366, "learning_rate": 7.342578651580605e-06, "loss": 0.4538, "step": 28470 }, { "epoch": 0.4104751884467377, "grad_norm": 1.663197540018624, "learning_rate": 7.340355998992867e-06, "loss": 0.4487, "step": 28480 }, { "epoch": 0.41061931597077095, "grad_norm": 1.372777825154454, "learning_rate": 7.338132754025343e-06, "loss": 0.5, "step": 28490 }, { "epoch": 0.4107634434948042, "grad_norm": 1.4157463227951694, "learning_rate": 7.33590891724077e-06, "loss": 0.4702, "step": 28500 }, { "epoch": 0.41090757101883746, "grad_norm": 1.4638772582290007, "learning_rate": 7.333684489202034e-06, "loss": 0.449, "step": 28510 }, { "epoch": 0.4110516985428707, "grad_norm": 1.2469780285540728, "learning_rate": 7.331459470472174e-06, "loss": 0.4645, "step": 28520 }, { "epoch": 0.411195826066904, "grad_norm": 1.6289658501043545, "learning_rate": 7.329233861614376e-06, "loss": 0.4847, "step": 28530 }, { "epoch": 0.4113399535909373, "grad_norm": 1.532981898267358, "learning_rate": 7.327007663191974e-06, "loss": 0.4692, "step": 28540 }, { "epoch": 0.41148408111497053, "grad_norm": 1.7005273426927583, "learning_rate": 7.324780875768453e-06, "loss": 0.476, "step": 28550 }, { "epoch": 0.4116282086390038, "grad_norm": 1.4283794185077, "learning_rate": 7.322553499907447e-06, "loss": 0.4742, "step": 28560 }, { "epoch": 0.41177233616303704, "grad_norm": 1.5264435305700408, "learning_rate": 7.320325536172739e-06, "loss": 0.4683, "step": 28570 }, { "epoch": 0.4119164636870703, "grad_norm": 1.3588451520123896, "learning_rate": 7.31809698512826e-06, "loss": 0.4958, "step": 28580 }, { "epoch": 0.4120605912111036, "grad_norm": 1.3164648343522445, "learning_rate": 7.315867847338092e-06, "loss": 0.4793, "step": 28590 }, { "epoch": 0.41220471873513687, "grad_norm": 1.6178440035405184, "learning_rate": 7.313638123366462e-06, "loss": 0.4616, "step": 28600 }, { "epoch": 0.4123488462591701, "grad_norm": 1.5524870160410629, "learning_rate": 7.311407813777748e-06, "loss": 0.4624, "step": 28610 }, { "epoch": 0.4124929737832034, "grad_norm": 1.2470667714453594, "learning_rate": 7.309176919136475e-06, "loss": 0.4833, "step": 28620 }, { "epoch": 0.41263710130723663, "grad_norm": 1.5348293236910981, "learning_rate": 7.306945440007316e-06, "loss": 0.476, "step": 28630 }, { "epoch": 0.4127812288312699, "grad_norm": 1.514844748314905, "learning_rate": 7.304713376955092e-06, "loss": 0.4794, "step": 28640 }, { "epoch": 0.4129253563553032, "grad_norm": 1.4786532064082671, "learning_rate": 7.302480730544776e-06, "loss": 0.5175, "step": 28650 }, { "epoch": 0.41306948387933645, "grad_norm": 1.9099306579314752, "learning_rate": 7.300247501341481e-06, "loss": 0.4785, "step": 28660 }, { "epoch": 0.4132136114033697, "grad_norm": 1.6087003477141641, "learning_rate": 7.298013689910472e-06, "loss": 0.4879, "step": 28670 }, { "epoch": 0.41335773892740296, "grad_norm": 1.6055324490939396, "learning_rate": 7.2957792968171614e-06, "loss": 0.466, "step": 28680 }, { "epoch": 0.4135018664514362, "grad_norm": 1.3395092517650604, "learning_rate": 7.293544322627106e-06, "loss": 0.4976, "step": 28690 }, { "epoch": 0.4136459939754695, "grad_norm": 1.6757397561761533, "learning_rate": 7.291308767906015e-06, "loss": 0.4851, "step": 28700 }, { "epoch": 0.4137901214995028, "grad_norm": 1.420949702203535, "learning_rate": 7.28907263321974e-06, "loss": 0.4606, "step": 28710 }, { "epoch": 0.41393424902353604, "grad_norm": 1.5243021165036639, "learning_rate": 7.286835919134282e-06, "loss": 0.4943, "step": 28720 }, { "epoch": 0.4140783765475693, "grad_norm": 1.457807328871015, "learning_rate": 7.2845986262157854e-06, "loss": 0.4895, "step": 28730 }, { "epoch": 0.41422250407160255, "grad_norm": 1.520782284235389, "learning_rate": 7.282360755030543e-06, "loss": 0.4755, "step": 28740 }, { "epoch": 0.4143666315956358, "grad_norm": 1.5685432368386107, "learning_rate": 7.280122306144997e-06, "loss": 0.4774, "step": 28750 }, { "epoch": 0.41451075911966906, "grad_norm": 1.5381529183794984, "learning_rate": 7.277883280125729e-06, "loss": 0.4486, "step": 28760 }, { "epoch": 0.41465488664370237, "grad_norm": 1.7284681697509656, "learning_rate": 7.275643677539475e-06, "loss": 0.4729, "step": 28770 }, { "epoch": 0.4147990141677356, "grad_norm": 1.511171447666252, "learning_rate": 7.273403498953109e-06, "loss": 0.4542, "step": 28780 }, { "epoch": 0.4149431416917689, "grad_norm": 1.7902279419473932, "learning_rate": 7.271162744933655e-06, "loss": 0.4722, "step": 28790 }, { "epoch": 0.41508726921580213, "grad_norm": 1.6167260989638412, "learning_rate": 7.268921416048283e-06, "loss": 0.4982, "step": 28800 }, { "epoch": 0.4152313967398354, "grad_norm": 1.5054873231929529, "learning_rate": 7.266679512864308e-06, "loss": 0.4629, "step": 28810 }, { "epoch": 0.41537552426386865, "grad_norm": 1.3493435454652218, "learning_rate": 7.264437035949188e-06, "loss": 0.4753, "step": 28820 }, { "epoch": 0.41551965178790196, "grad_norm": 1.5319899176675815, "learning_rate": 7.262193985870529e-06, "loss": 0.472, "step": 28830 }, { "epoch": 0.4156637793119352, "grad_norm": 1.669650184195024, "learning_rate": 7.259950363196082e-06, "loss": 0.4906, "step": 28840 }, { "epoch": 0.41580790683596847, "grad_norm": 1.3782028774070605, "learning_rate": 7.257706168493741e-06, "loss": 0.5056, "step": 28850 }, { "epoch": 0.4159520343600017, "grad_norm": 1.5428556322666644, "learning_rate": 7.255461402331546e-06, "loss": 0.4817, "step": 28860 }, { "epoch": 0.416096161884035, "grad_norm": 1.9785420265842106, "learning_rate": 7.2532160652776815e-06, "loss": 0.4713, "step": 28870 }, { "epoch": 0.41624028940806823, "grad_norm": 1.5744240486469212, "learning_rate": 7.250970157900479e-06, "loss": 0.476, "step": 28880 }, { "epoch": 0.41638441693210154, "grad_norm": 1.4024365677501822, "learning_rate": 7.248723680768408e-06, "loss": 0.4686, "step": 28890 }, { "epoch": 0.4165285444561348, "grad_norm": 1.503878220927161, "learning_rate": 7.2464766344500885e-06, "loss": 0.4908, "step": 28900 }, { "epoch": 0.41667267198016805, "grad_norm": 1.6355042299268345, "learning_rate": 7.2442290195142815e-06, "loss": 0.45, "step": 28910 }, { "epoch": 0.4168167995042013, "grad_norm": 1.497026984676697, "learning_rate": 7.241980836529892e-06, "loss": 0.5034, "step": 28920 }, { "epoch": 0.41696092702823456, "grad_norm": 1.4806894939100927, "learning_rate": 7.23973208606597e-06, "loss": 0.4528, "step": 28930 }, { "epoch": 0.4171050545522679, "grad_norm": 1.541374560807645, "learning_rate": 7.237482768691711e-06, "loss": 0.4748, "step": 28940 }, { "epoch": 0.41724918207630113, "grad_norm": 5.8945089525320435, "learning_rate": 7.235232884976446e-06, "loss": 0.4993, "step": 28950 }, { "epoch": 0.4173933096003344, "grad_norm": 1.7357335474271047, "learning_rate": 7.23298243548966e-06, "loss": 0.497, "step": 28960 }, { "epoch": 0.41753743712436764, "grad_norm": 1.7430493892740926, "learning_rate": 7.230731420800972e-06, "loss": 0.5066, "step": 28970 }, { "epoch": 0.4176815646484009, "grad_norm": 1.4259632662788329, "learning_rate": 7.228479841480151e-06, "loss": 0.4821, "step": 28980 }, { "epoch": 0.41782569217243415, "grad_norm": 1.3785034386416952, "learning_rate": 7.226227698097108e-06, "loss": 0.4599, "step": 28990 }, { "epoch": 0.41796981969646746, "grad_norm": 1.4923730782692353, "learning_rate": 7.223974991221889e-06, "loss": 0.4953, "step": 29000 }, { "epoch": 0.4181139472205007, "grad_norm": 1.6635150684512325, "learning_rate": 7.221721721424692e-06, "loss": 0.4736, "step": 29010 }, { "epoch": 0.41825807474453397, "grad_norm": 1.4368665569357688, "learning_rate": 7.219467889275854e-06, "loss": 0.479, "step": 29020 }, { "epoch": 0.4184022022685672, "grad_norm": 1.7861487279703971, "learning_rate": 7.217213495345854e-06, "loss": 0.4767, "step": 29030 }, { "epoch": 0.4185463297926005, "grad_norm": 1.3119527086911795, "learning_rate": 7.2149585402053135e-06, "loss": 0.487, "step": 29040 }, { "epoch": 0.41869045731663374, "grad_norm": 1.4861043248629349, "learning_rate": 7.212703024424995e-06, "loss": 0.4867, "step": 29050 }, { "epoch": 0.41883458484066705, "grad_norm": 1.443008656968503, "learning_rate": 7.210446948575805e-06, "loss": 0.4795, "step": 29060 }, { "epoch": 0.4189787123647003, "grad_norm": 1.635002549975949, "learning_rate": 7.208190313228791e-06, "loss": 0.4671, "step": 29070 }, { "epoch": 0.41912283988873356, "grad_norm": 1.4887869839018018, "learning_rate": 7.205933118955139e-06, "loss": 0.4751, "step": 29080 }, { "epoch": 0.4192669674127668, "grad_norm": 1.3967985294338505, "learning_rate": 7.203675366326184e-06, "loss": 0.4388, "step": 29090 }, { "epoch": 0.41941109493680007, "grad_norm": 1.4551752505183175, "learning_rate": 7.201417055913394e-06, "loss": 0.4741, "step": 29100 }, { "epoch": 0.4195552224608333, "grad_norm": 4.346049024600536, "learning_rate": 7.199158188288382e-06, "loss": 0.473, "step": 29110 }, { "epoch": 0.41969934998486663, "grad_norm": 1.8467156077862645, "learning_rate": 7.196898764022905e-06, "loss": 0.4476, "step": 29120 }, { "epoch": 0.4198434775088999, "grad_norm": 1.4203033996392216, "learning_rate": 7.194638783688852e-06, "loss": 0.5002, "step": 29130 }, { "epoch": 0.41998760503293314, "grad_norm": 1.3820962937090375, "learning_rate": 7.192378247858265e-06, "loss": 0.4835, "step": 29140 }, { "epoch": 0.4201317325569664, "grad_norm": 1.4476012195769359, "learning_rate": 7.190117157103317e-06, "loss": 0.4605, "step": 29150 }, { "epoch": 0.42027586008099965, "grad_norm": 1.4328529826511542, "learning_rate": 7.187855511996322e-06, "loss": 0.4467, "step": 29160 }, { "epoch": 0.4204199876050329, "grad_norm": 1.364767432949932, "learning_rate": 7.185593313109743e-06, "loss": 0.4894, "step": 29170 }, { "epoch": 0.4205641151290662, "grad_norm": 1.371605015620643, "learning_rate": 7.18333056101617e-06, "loss": 0.4761, "step": 29180 }, { "epoch": 0.4207082426530995, "grad_norm": 1.5454691436362638, "learning_rate": 7.1810672562883465e-06, "loss": 0.4904, "step": 29190 }, { "epoch": 0.42085237017713273, "grad_norm": 1.4885677801521746, "learning_rate": 7.178803399499145e-06, "loss": 0.453, "step": 29200 }, { "epoch": 0.420996497701166, "grad_norm": 1.1867713488031892, "learning_rate": 7.176538991221583e-06, "loss": 0.48, "step": 29210 }, { "epoch": 0.42114062522519924, "grad_norm": 1.547168086814566, "learning_rate": 7.1742740320288185e-06, "loss": 0.4675, "step": 29220 }, { "epoch": 0.4212847527492325, "grad_norm": 1.299638714913525, "learning_rate": 7.172008522494146e-06, "loss": 0.4974, "step": 29230 }, { "epoch": 0.4214288802732658, "grad_norm": 1.5875871468149687, "learning_rate": 7.169742463190998e-06, "loss": 0.4499, "step": 29240 }, { "epoch": 0.42157300779729906, "grad_norm": 1.7330955534957249, "learning_rate": 7.167475854692955e-06, "loss": 0.4738, "step": 29250 }, { "epoch": 0.4217171353213323, "grad_norm": 1.4905540801112886, "learning_rate": 7.165208697573724e-06, "loss": 0.4688, "step": 29260 }, { "epoch": 0.42186126284536557, "grad_norm": 1.5393856307012124, "learning_rate": 7.162940992407162e-06, "loss": 0.4544, "step": 29270 }, { "epoch": 0.4220053903693988, "grad_norm": 1.5553729001798513, "learning_rate": 7.160672739767256e-06, "loss": 0.4796, "step": 29280 }, { "epoch": 0.42214951789343214, "grad_norm": 1.5235347837577813, "learning_rate": 7.158403940228136e-06, "loss": 0.4566, "step": 29290 }, { "epoch": 0.4222936454174654, "grad_norm": 1.3684504675303801, "learning_rate": 7.15613459436407e-06, "loss": 0.4595, "step": 29300 }, { "epoch": 0.42243777294149865, "grad_norm": 1.282518547844895, "learning_rate": 7.153864702749464e-06, "loss": 0.4653, "step": 29310 }, { "epoch": 0.4225819004655319, "grad_norm": 1.195334985632303, "learning_rate": 7.1515942659588634e-06, "loss": 0.4644, "step": 29320 }, { "epoch": 0.42272602798956516, "grad_norm": 1.7343726623551774, "learning_rate": 7.14932328456695e-06, "loss": 0.4885, "step": 29330 }, { "epoch": 0.4228701555135984, "grad_norm": 1.4416628065235502, "learning_rate": 7.147051759148542e-06, "loss": 0.4824, "step": 29340 }, { "epoch": 0.4230142830376317, "grad_norm": 1.7298214976327686, "learning_rate": 7.1447796902786e-06, "loss": 0.4748, "step": 29350 }, { "epoch": 0.423158410561665, "grad_norm": 1.4407572661465045, "learning_rate": 7.142507078532216e-06, "loss": 0.4735, "step": 29360 }, { "epoch": 0.42330253808569823, "grad_norm": 1.593922073957958, "learning_rate": 7.1402339244846245e-06, "loss": 0.4737, "step": 29370 }, { "epoch": 0.4234466656097315, "grad_norm": 1.6382957644573768, "learning_rate": 7.137960228711196e-06, "loss": 0.4634, "step": 29380 }, { "epoch": 0.42359079313376474, "grad_norm": 1.4965958552331267, "learning_rate": 7.135685991787438e-06, "loss": 0.4603, "step": 29390 }, { "epoch": 0.423734920657798, "grad_norm": 1.6882262796687764, "learning_rate": 7.133411214288991e-06, "loss": 0.4847, "step": 29400 }, { "epoch": 0.4238790481818313, "grad_norm": 1.7670066120315895, "learning_rate": 7.131135896791639e-06, "loss": 0.4812, "step": 29410 }, { "epoch": 0.42402317570586456, "grad_norm": 1.4650648246217095, "learning_rate": 7.1288600398713e-06, "loss": 0.481, "step": 29420 }, { "epoch": 0.4241673032298978, "grad_norm": 1.4301771205217213, "learning_rate": 7.126583644104026e-06, "loss": 0.4635, "step": 29430 }, { "epoch": 0.4243114307539311, "grad_norm": 1.2563729290051726, "learning_rate": 7.124306710066008e-06, "loss": 0.4783, "step": 29440 }, { "epoch": 0.42445555827796433, "grad_norm": 1.589850931896046, "learning_rate": 7.122029238333574e-06, "loss": 0.5147, "step": 29450 }, { "epoch": 0.4245996858019976, "grad_norm": 1.5292781373812943, "learning_rate": 7.119751229483185e-06, "loss": 0.4799, "step": 29460 }, { "epoch": 0.4247438133260309, "grad_norm": 1.9980427137369192, "learning_rate": 7.117472684091441e-06, "loss": 0.4787, "step": 29470 }, { "epoch": 0.42488794085006415, "grad_norm": 1.473347217560487, "learning_rate": 7.115193602735078e-06, "loss": 0.4713, "step": 29480 }, { "epoch": 0.4250320683740974, "grad_norm": 1.7171352190016251, "learning_rate": 7.112913985990962e-06, "loss": 0.4437, "step": 29490 }, { "epoch": 0.42517619589813066, "grad_norm": 1.6183058003503656, "learning_rate": 7.110633834436102e-06, "loss": 0.4727, "step": 29500 }, { "epoch": 0.4253203234221639, "grad_norm": 1.6629912089934882, "learning_rate": 7.10835314864764e-06, "loss": 0.4818, "step": 29510 }, { "epoch": 0.42546445094619717, "grad_norm": 1.7970083682399185, "learning_rate": 7.106071929202848e-06, "loss": 0.4756, "step": 29520 }, { "epoch": 0.4256085784702305, "grad_norm": 1.480881367338096, "learning_rate": 7.103790176679143e-06, "loss": 0.4689, "step": 29530 }, { "epoch": 0.42575270599426374, "grad_norm": 1.632291644263292, "learning_rate": 7.101507891654067e-06, "loss": 0.4719, "step": 29540 }, { "epoch": 0.425896833518297, "grad_norm": 1.5220616891922147, "learning_rate": 7.099225074705303e-06, "loss": 0.4777, "step": 29550 }, { "epoch": 0.42604096104233025, "grad_norm": 1.468459615856853, "learning_rate": 7.096941726410667e-06, "loss": 0.471, "step": 29560 }, { "epoch": 0.4261850885663635, "grad_norm": 1.3231554412575697, "learning_rate": 7.094657847348105e-06, "loss": 0.503, "step": 29570 }, { "epoch": 0.42632921609039676, "grad_norm": 1.4355704177361681, "learning_rate": 7.0923734380957084e-06, "loss": 0.4896, "step": 29580 }, { "epoch": 0.42647334361443007, "grad_norm": 1.4917397075588243, "learning_rate": 7.090088499231694e-06, "loss": 0.5077, "step": 29590 }, { "epoch": 0.4266174711384633, "grad_norm": 1.6003627793935171, "learning_rate": 7.087803031334413e-06, "loss": 0.4574, "step": 29600 }, { "epoch": 0.4267615986624966, "grad_norm": 1.6918371776297878, "learning_rate": 7.085517034982352e-06, "loss": 0.4897, "step": 29610 }, { "epoch": 0.42690572618652983, "grad_norm": 1.5053387167877246, "learning_rate": 7.083230510754132e-06, "loss": 0.4735, "step": 29620 }, { "epoch": 0.4270498537105631, "grad_norm": 1.5812230771319624, "learning_rate": 7.080943459228506e-06, "loss": 0.4915, "step": 29630 }, { "epoch": 0.4271939812345964, "grad_norm": 1.2708974174805083, "learning_rate": 7.078655880984366e-06, "loss": 0.4525, "step": 29640 }, { "epoch": 0.42733810875862965, "grad_norm": 1.3607017045604544, "learning_rate": 7.076367776600729e-06, "loss": 0.4695, "step": 29650 }, { "epoch": 0.4274822362826629, "grad_norm": 1.4712653543288359, "learning_rate": 7.07407914665675e-06, "loss": 0.4609, "step": 29660 }, { "epoch": 0.42762636380669616, "grad_norm": 1.576365800994945, "learning_rate": 7.071789991731717e-06, "loss": 0.4835, "step": 29670 }, { "epoch": 0.4277704913307294, "grad_norm": 1.93185822793807, "learning_rate": 7.069500312405048e-06, "loss": 0.5046, "step": 29680 }, { "epoch": 0.4279146188547627, "grad_norm": 1.4870440869182608, "learning_rate": 7.067210109256297e-06, "loss": 0.4612, "step": 29690 }, { "epoch": 0.428058746378796, "grad_norm": 1.660900832228145, "learning_rate": 7.064919382865149e-06, "loss": 0.4789, "step": 29700 }, { "epoch": 0.42820287390282924, "grad_norm": 1.3489536794154249, "learning_rate": 7.062628133811423e-06, "loss": 0.4925, "step": 29710 }, { "epoch": 0.4283470014268625, "grad_norm": 1.4577478053262238, "learning_rate": 7.060336362675069e-06, "loss": 0.4589, "step": 29720 }, { "epoch": 0.42849112895089575, "grad_norm": 1.7318627491596834, "learning_rate": 7.058044070036167e-06, "loss": 0.4665, "step": 29730 }, { "epoch": 0.428635256474929, "grad_norm": 1.5148088394938728, "learning_rate": 7.055751256474932e-06, "loss": 0.4724, "step": 29740 }, { "epoch": 0.42877938399896226, "grad_norm": 1.7138196882646304, "learning_rate": 7.053457922571712e-06, "loss": 0.4765, "step": 29750 }, { "epoch": 0.42892351152299557, "grad_norm": 1.4230660214018303, "learning_rate": 7.051164068906982e-06, "loss": 0.4742, "step": 29760 }, { "epoch": 0.4290676390470288, "grad_norm": 1.5136416586149481, "learning_rate": 7.0488696960613546e-06, "loss": 0.4899, "step": 29770 }, { "epoch": 0.4292117665710621, "grad_norm": 1.43219157761143, "learning_rate": 7.046574804615568e-06, "loss": 0.4752, "step": 29780 }, { "epoch": 0.42935589409509534, "grad_norm": 1.6322148596172887, "learning_rate": 7.0442793951504936e-06, "loss": 0.4629, "step": 29790 }, { "epoch": 0.4295000216191286, "grad_norm": 1.6552033478999142, "learning_rate": 7.041983468247137e-06, "loss": 0.4914, "step": 29800 }, { "epoch": 0.42964414914316185, "grad_norm": 1.3443585486937892, "learning_rate": 7.0396870244866315e-06, "loss": 0.4627, "step": 29810 }, { "epoch": 0.42978827666719516, "grad_norm": 1.668889588115687, "learning_rate": 7.037390064450242e-06, "loss": 0.4728, "step": 29820 }, { "epoch": 0.4299324041912284, "grad_norm": 1.3189440597274489, "learning_rate": 7.0350925887193635e-06, "loss": 0.4775, "step": 29830 }, { "epoch": 0.43007653171526167, "grad_norm": 1.581585302160226, "learning_rate": 7.032794597875521e-06, "loss": 0.4631, "step": 29840 }, { "epoch": 0.4302206592392949, "grad_norm": 1.5701688464114145, "learning_rate": 7.030496092500374e-06, "loss": 0.468, "step": 29850 }, { "epoch": 0.4303647867633282, "grad_norm": 1.4745940273573663, "learning_rate": 7.028197073175707e-06, "loss": 0.4824, "step": 29860 }, { "epoch": 0.43050891428736143, "grad_norm": 1.5520466919638949, "learning_rate": 7.025897540483438e-06, "loss": 0.4596, "step": 29870 }, { "epoch": 0.43065304181139474, "grad_norm": 1.5780938430787568, "learning_rate": 7.023597495005613e-06, "loss": 0.4778, "step": 29880 }, { "epoch": 0.430797169335428, "grad_norm": 1.55105914709262, "learning_rate": 7.021296937324409e-06, "loss": 0.4711, "step": 29890 }, { "epoch": 0.43094129685946125, "grad_norm": 1.692456964459829, "learning_rate": 7.018995868022133e-06, "loss": 0.4877, "step": 29900 }, { "epoch": 0.4310854243834945, "grad_norm": 1.6206784049106595, "learning_rate": 7.016694287681218e-06, "loss": 0.4564, "step": 29910 }, { "epoch": 0.43122955190752776, "grad_norm": 1.8289301535169407, "learning_rate": 7.014392196884232e-06, "loss": 0.4653, "step": 29920 }, { "epoch": 0.431373679431561, "grad_norm": 1.3579307340886435, "learning_rate": 7.012089596213869e-06, "loss": 0.4878, "step": 29930 }, { "epoch": 0.43151780695559433, "grad_norm": 1.5549341062551933, "learning_rate": 7.009786486252951e-06, "loss": 0.4585, "step": 29940 }, { "epoch": 0.4316619344796276, "grad_norm": 1.3695438923953738, "learning_rate": 7.007482867584431e-06, "loss": 0.4501, "step": 29950 }, { "epoch": 0.43180606200366084, "grad_norm": 1.4255998041914388, "learning_rate": 7.005178740791388e-06, "loss": 0.4302, "step": 29960 }, { "epoch": 0.4319501895276941, "grad_norm": 1.173466378126782, "learning_rate": 7.002874106457035e-06, "loss": 0.4648, "step": 29970 }, { "epoch": 0.43209431705172735, "grad_norm": 1.5586350542429925, "learning_rate": 7.000568965164709e-06, "loss": 0.5077, "step": 29980 }, { "epoch": 0.4322384445757606, "grad_norm": 1.416387633135208, "learning_rate": 6.998263317497875e-06, "loss": 0.4467, "step": 29990 }, { "epoch": 0.4323825720997939, "grad_norm": 1.861489070356398, "learning_rate": 6.99595716404013e-06, "loss": 0.5018, "step": 30000 }, { "epoch": 0.43252669962382717, "grad_norm": 1.2445893824089433, "learning_rate": 6.993650505375193e-06, "loss": 0.4619, "step": 30010 }, { "epoch": 0.4326708271478604, "grad_norm": 1.5165795872072736, "learning_rate": 6.991343342086917e-06, "loss": 0.4705, "step": 30020 }, { "epoch": 0.4328149546718937, "grad_norm": 1.5979091953039999, "learning_rate": 6.989035674759281e-06, "loss": 0.4873, "step": 30030 }, { "epoch": 0.43295908219592694, "grad_norm": 1.6779754667514837, "learning_rate": 6.986727503976388e-06, "loss": 0.4796, "step": 30040 }, { "epoch": 0.43310320971996025, "grad_norm": 1.5836265157203173, "learning_rate": 6.984418830322475e-06, "loss": 0.4514, "step": 30050 }, { "epoch": 0.4332473372439935, "grad_norm": 1.4987450733872054, "learning_rate": 6.982109654381899e-06, "loss": 0.4736, "step": 30060 }, { "epoch": 0.43339146476802676, "grad_norm": 1.9685811581627728, "learning_rate": 6.979799976739148e-06, "loss": 0.4783, "step": 30070 }, { "epoch": 0.43353559229206, "grad_norm": 1.5236838848531686, "learning_rate": 6.97748979797884e-06, "loss": 0.4487, "step": 30080 }, { "epoch": 0.43367971981609327, "grad_norm": 1.634538652367483, "learning_rate": 6.975179118685711e-06, "loss": 0.4831, "step": 30090 }, { "epoch": 0.4338238473401265, "grad_norm": 1.5245718361166094, "learning_rate": 6.972867939444634e-06, "loss": 0.4708, "step": 30100 }, { "epoch": 0.43396797486415983, "grad_norm": 1.4953280203650468, "learning_rate": 6.970556260840602e-06, "loss": 0.4936, "step": 30110 }, { "epoch": 0.4341121023881931, "grad_norm": 1.5736742042845289, "learning_rate": 6.968244083458735e-06, "loss": 0.4701, "step": 30120 }, { "epoch": 0.43425622991222634, "grad_norm": 1.5789780884081277, "learning_rate": 6.965931407884282e-06, "loss": 0.4552, "step": 30130 }, { "epoch": 0.4344003574362596, "grad_norm": 1.3525445465713386, "learning_rate": 6.963618234702616e-06, "loss": 0.4948, "step": 30140 }, { "epoch": 0.43454448496029285, "grad_norm": 1.5849340135472907, "learning_rate": 6.961304564499235e-06, "loss": 0.5184, "step": 30150 }, { "epoch": 0.4346886124843261, "grad_norm": 1.3784405510909723, "learning_rate": 6.958990397859768e-06, "loss": 0.4399, "step": 30160 }, { "epoch": 0.4348327400083594, "grad_norm": 1.61143750177957, "learning_rate": 6.956675735369961e-06, "loss": 0.4828, "step": 30170 }, { "epoch": 0.4349768675323927, "grad_norm": 1.4923497433256732, "learning_rate": 6.954360577615693e-06, "loss": 0.4672, "step": 30180 }, { "epoch": 0.43512099505642593, "grad_norm": 1.5869905081790308, "learning_rate": 6.952044925182965e-06, "loss": 0.4538, "step": 30190 }, { "epoch": 0.4352651225804592, "grad_norm": 1.6136468836333522, "learning_rate": 6.949728778657905e-06, "loss": 0.4701, "step": 30200 }, { "epoch": 0.43540925010449244, "grad_norm": 1.580264084799024, "learning_rate": 6.947412138626765e-06, "loss": 0.4599, "step": 30210 }, { "epoch": 0.4355533776285257, "grad_norm": 1.638104277958598, "learning_rate": 6.945095005675919e-06, "loss": 0.4473, "step": 30220 }, { "epoch": 0.435697505152559, "grad_norm": 1.7848693722730222, "learning_rate": 6.942777380391872e-06, "loss": 0.452, "step": 30230 }, { "epoch": 0.43584163267659226, "grad_norm": 1.1909442241513026, "learning_rate": 6.9404592633612486e-06, "loss": 0.4589, "step": 30240 }, { "epoch": 0.4359857602006255, "grad_norm": 1.3735832415053504, "learning_rate": 6.938140655170801e-06, "loss": 0.4525, "step": 30250 }, { "epoch": 0.43612988772465877, "grad_norm": 1.537561004679077, "learning_rate": 6.935821556407404e-06, "loss": 0.4851, "step": 30260 }, { "epoch": 0.436274015248692, "grad_norm": 1.6660827557662814, "learning_rate": 6.9335019676580555e-06, "loss": 0.4964, "step": 30270 }, { "epoch": 0.4364181427727253, "grad_norm": 1.5047214965794564, "learning_rate": 6.93118188950988e-06, "loss": 0.473, "step": 30280 }, { "epoch": 0.4365622702967586, "grad_norm": 1.2674239981290465, "learning_rate": 6.928861322550124e-06, "loss": 0.4777, "step": 30290 }, { "epoch": 0.43670639782079185, "grad_norm": 1.4967491784566778, "learning_rate": 6.926540267366157e-06, "loss": 0.4575, "step": 30300 }, { "epoch": 0.4368505253448251, "grad_norm": 1.4056405622426404, "learning_rate": 6.924218724545476e-06, "loss": 0.4813, "step": 30310 }, { "epoch": 0.43699465286885836, "grad_norm": 1.6003680542447998, "learning_rate": 6.921896694675699e-06, "loss": 0.4751, "step": 30320 }, { "epoch": 0.4371387803928916, "grad_norm": 1.5603052340206554, "learning_rate": 6.919574178344566e-06, "loss": 0.4845, "step": 30330 }, { "epoch": 0.43728290791692487, "grad_norm": 1.2607214153043809, "learning_rate": 6.917251176139941e-06, "loss": 0.4499, "step": 30340 }, { "epoch": 0.4374270354409582, "grad_norm": 1.6980930714943312, "learning_rate": 6.91492768864981e-06, "loss": 0.4895, "step": 30350 }, { "epoch": 0.43757116296499143, "grad_norm": 1.4259384347832245, "learning_rate": 6.912603716462286e-06, "loss": 0.4924, "step": 30360 }, { "epoch": 0.4377152904890247, "grad_norm": 1.5070257582435749, "learning_rate": 6.910279260165601e-06, "loss": 0.4724, "step": 30370 }, { "epoch": 0.43785941801305794, "grad_norm": 1.7361346551992645, "learning_rate": 6.9079543203481094e-06, "loss": 0.4909, "step": 30380 }, { "epoch": 0.4380035455370912, "grad_norm": 1.8156584216759344, "learning_rate": 6.9056288975982896e-06, "loss": 0.4754, "step": 30390 }, { "epoch": 0.4381476730611245, "grad_norm": 1.575441657324562, "learning_rate": 6.903302992504742e-06, "loss": 0.4769, "step": 30400 }, { "epoch": 0.43829180058515776, "grad_norm": 1.6066278194179544, "learning_rate": 6.9009766056561865e-06, "loss": 0.4436, "step": 30410 }, { "epoch": 0.438435928109191, "grad_norm": 1.6154621769273432, "learning_rate": 6.89864973764147e-06, "loss": 0.4617, "step": 30420 }, { "epoch": 0.4385800556332243, "grad_norm": 1.5259419009496527, "learning_rate": 6.896322389049556e-06, "loss": 0.459, "step": 30430 }, { "epoch": 0.43872418315725753, "grad_norm": 1.1657812748626881, "learning_rate": 6.8939945604695325e-06, "loss": 0.444, "step": 30440 }, { "epoch": 0.4388683106812908, "grad_norm": 1.5645098841135145, "learning_rate": 6.8916662524906095e-06, "loss": 0.4843, "step": 30450 }, { "epoch": 0.4390124382053241, "grad_norm": 1.6047221124031084, "learning_rate": 6.889337465702115e-06, "loss": 0.4495, "step": 30460 }, { "epoch": 0.43915656572935735, "grad_norm": 1.4160365944964857, "learning_rate": 6.8870082006935035e-06, "loss": 0.4673, "step": 30470 }, { "epoch": 0.4393006932533906, "grad_norm": 1.171170146856432, "learning_rate": 6.8846784580543455e-06, "loss": 0.4324, "step": 30480 }, { "epoch": 0.43944482077742386, "grad_norm": 1.5411743871487464, "learning_rate": 6.882348238374334e-06, "loss": 0.471, "step": 30490 }, { "epoch": 0.4395889483014571, "grad_norm": 1.745090528428787, "learning_rate": 6.880017542243286e-06, "loss": 0.4732, "step": 30500 }, { "epoch": 0.43973307582549037, "grad_norm": 1.8849880374026171, "learning_rate": 6.877686370251131e-06, "loss": 0.4417, "step": 30510 }, { "epoch": 0.4398772033495237, "grad_norm": 1.502021563259431, "learning_rate": 6.875354722987929e-06, "loss": 0.4645, "step": 30520 }, { "epoch": 0.44002133087355694, "grad_norm": 1.4944000616366506, "learning_rate": 6.873022601043853e-06, "loss": 0.4689, "step": 30530 }, { "epoch": 0.4401654583975902, "grad_norm": 1.7417410004308744, "learning_rate": 6.8706900050092e-06, "loss": 0.4773, "step": 30540 }, { "epoch": 0.44030958592162345, "grad_norm": 1.5013838053844315, "learning_rate": 6.868356935474385e-06, "loss": 0.4903, "step": 30550 }, { "epoch": 0.4404537134456567, "grad_norm": 1.5326150150661515, "learning_rate": 6.866023393029944e-06, "loss": 0.4712, "step": 30560 }, { "epoch": 0.44059784096968996, "grad_norm": 1.735637448608301, "learning_rate": 6.863689378266529e-06, "loss": 0.4771, "step": 30570 }, { "epoch": 0.44074196849372327, "grad_norm": 1.3898280308633242, "learning_rate": 6.861354891774919e-06, "loss": 0.4609, "step": 30580 }, { "epoch": 0.4408860960177565, "grad_norm": 1.5689863269889277, "learning_rate": 6.859019934146008e-06, "loss": 0.4775, "step": 30590 }, { "epoch": 0.4410302235417898, "grad_norm": 1.7136842513285306, "learning_rate": 6.856684505970806e-06, "loss": 0.4951, "step": 30600 }, { "epoch": 0.44117435106582303, "grad_norm": 1.3864125012928796, "learning_rate": 6.854348607840448e-06, "loss": 0.4709, "step": 30610 }, { "epoch": 0.4413184785898563, "grad_norm": 1.292010798072243, "learning_rate": 6.852012240346184e-06, "loss": 0.49, "step": 30620 }, { "epoch": 0.44146260611388954, "grad_norm": 1.501819278957208, "learning_rate": 6.849675404079385e-06, "loss": 0.4789, "step": 30630 }, { "epoch": 0.44160673363792285, "grad_norm": 1.5811713388547481, "learning_rate": 6.84733809963154e-06, "loss": 0.5072, "step": 30640 }, { "epoch": 0.4417508611619561, "grad_norm": 1.6124686092199645, "learning_rate": 6.845000327594257e-06, "loss": 0.4902, "step": 30650 }, { "epoch": 0.44189498868598936, "grad_norm": 1.53290673989619, "learning_rate": 6.84266208855926e-06, "loss": 0.4657, "step": 30660 }, { "epoch": 0.4420391162100226, "grad_norm": 1.6177073603392105, "learning_rate": 6.840323383118394e-06, "loss": 0.5227, "step": 30670 }, { "epoch": 0.4421832437340559, "grad_norm": 1.4935466143011356, "learning_rate": 6.837984211863622e-06, "loss": 0.4535, "step": 30680 }, { "epoch": 0.44232737125808913, "grad_norm": 1.6000091095453024, "learning_rate": 6.835644575387022e-06, "loss": 0.479, "step": 30690 }, { "epoch": 0.44247149878212244, "grad_norm": 1.6760343007702683, "learning_rate": 6.833304474280793e-06, "loss": 0.4738, "step": 30700 }, { "epoch": 0.4426156263061557, "grad_norm": 1.4827882765468319, "learning_rate": 6.8309639091372496e-06, "loss": 0.4669, "step": 30710 }, { "epoch": 0.44275975383018895, "grad_norm": 2.3756636457317066, "learning_rate": 6.828622880548825e-06, "loss": 0.4796, "step": 30720 }, { "epoch": 0.4429038813542222, "grad_norm": 1.7262737793092755, "learning_rate": 6.826281389108068e-06, "loss": 0.4771, "step": 30730 }, { "epoch": 0.44304800887825546, "grad_norm": 1.273128186471354, "learning_rate": 6.823939435407647e-06, "loss": 0.469, "step": 30740 }, { "epoch": 0.44319213640228877, "grad_norm": 1.606802384006068, "learning_rate": 6.8215970200403465e-06, "loss": 0.4763, "step": 30750 }, { "epoch": 0.443336263926322, "grad_norm": 3.455183173172844, "learning_rate": 6.819254143599068e-06, "loss": 0.4391, "step": 30760 }, { "epoch": 0.4434803914503553, "grad_norm": 1.6977452900032262, "learning_rate": 6.816910806676826e-06, "loss": 0.4932, "step": 30770 }, { "epoch": 0.44362451897438854, "grad_norm": 1.5820391746377194, "learning_rate": 6.814567009866758e-06, "loss": 0.4911, "step": 30780 }, { "epoch": 0.4437686464984218, "grad_norm": 1.610056338914254, "learning_rate": 6.812222753762113e-06, "loss": 0.4599, "step": 30790 }, { "epoch": 0.44391277402245505, "grad_norm": 1.5409415225882488, "learning_rate": 6.80987803895626e-06, "loss": 0.452, "step": 30800 }, { "epoch": 0.44405690154648836, "grad_norm": 1.4085533821282321, "learning_rate": 6.807532866042681e-06, "loss": 0.4735, "step": 30810 }, { "epoch": 0.4442010290705216, "grad_norm": 1.8305201205492234, "learning_rate": 6.805187235614974e-06, "loss": 0.4804, "step": 30820 }, { "epoch": 0.44434515659455487, "grad_norm": 1.7962093551452383, "learning_rate": 6.8028411482668544e-06, "loss": 0.4808, "step": 30830 }, { "epoch": 0.4444892841185881, "grad_norm": 1.4527948816540293, "learning_rate": 6.800494604592154e-06, "loss": 0.4733, "step": 30840 }, { "epoch": 0.4446334116426214, "grad_norm": 1.7160399792438477, "learning_rate": 6.798147605184815e-06, "loss": 0.4761, "step": 30850 }, { "epoch": 0.44477753916665463, "grad_norm": 1.6700663336003865, "learning_rate": 6.7958001506389035e-06, "loss": 0.4779, "step": 30860 }, { "epoch": 0.44492166669068794, "grad_norm": 1.7256224488197045, "learning_rate": 6.793452241548594e-06, "loss": 0.4834, "step": 30870 }, { "epoch": 0.4450657942147212, "grad_norm": 1.5087800906608209, "learning_rate": 6.7911038785081765e-06, "loss": 0.4714, "step": 30880 }, { "epoch": 0.44520992173875445, "grad_norm": 1.6346339986214058, "learning_rate": 6.7887550621120616e-06, "loss": 0.4681, "step": 30890 }, { "epoch": 0.4453540492627877, "grad_norm": 1.5713406025781405, "learning_rate": 6.786405792954764e-06, "loss": 0.4788, "step": 30900 }, { "epoch": 0.44549817678682097, "grad_norm": 1.5848958173153813, "learning_rate": 6.7840560716309264e-06, "loss": 0.4832, "step": 30910 }, { "epoch": 0.4456423043108542, "grad_norm": 1.2260047060394506, "learning_rate": 6.7817058987352956e-06, "loss": 0.4798, "step": 30920 }, { "epoch": 0.44578643183488753, "grad_norm": 1.4887847000570242, "learning_rate": 6.779355274862738e-06, "loss": 0.4368, "step": 30930 }, { "epoch": 0.4459305593589208, "grad_norm": 1.5541525944181536, "learning_rate": 6.77700420060823e-06, "loss": 0.484, "step": 30940 }, { "epoch": 0.44607468688295404, "grad_norm": 1.4643952968239635, "learning_rate": 6.774652676566865e-06, "loss": 0.4743, "step": 30950 }, { "epoch": 0.4462188144069873, "grad_norm": 1.5339431547711577, "learning_rate": 6.7723007033338505e-06, "loss": 0.4785, "step": 30960 }, { "epoch": 0.44636294193102055, "grad_norm": 1.7100271664395086, "learning_rate": 6.769948281504506e-06, "loss": 0.4877, "step": 30970 }, { "epoch": 0.4465070694550538, "grad_norm": 2.1081732247370493, "learning_rate": 6.767595411674266e-06, "loss": 0.4808, "step": 30980 }, { "epoch": 0.4466511969790871, "grad_norm": 1.5095892518162066, "learning_rate": 6.765242094438678e-06, "loss": 0.4708, "step": 30990 }, { "epoch": 0.4467953245031204, "grad_norm": 1.5386222043776887, "learning_rate": 6.7628883303934e-06, "loss": 0.4485, "step": 31000 }, { "epoch": 0.4469394520271536, "grad_norm": 1.4826236418650276, "learning_rate": 6.760534120134209e-06, "loss": 0.4669, "step": 31010 }, { "epoch": 0.4470835795511869, "grad_norm": 1.4393260580580411, "learning_rate": 6.75817946425699e-06, "loss": 0.4766, "step": 31020 }, { "epoch": 0.44722770707522014, "grad_norm": 1.5969949317412429, "learning_rate": 6.755824363357741e-06, "loss": 0.4559, "step": 31030 }, { "epoch": 0.4473718345992534, "grad_norm": 1.4505787303042643, "learning_rate": 6.753468818032576e-06, "loss": 0.4664, "step": 31040 }, { "epoch": 0.4475159621232867, "grad_norm": 1.5834549970254665, "learning_rate": 6.751112828877719e-06, "loss": 0.4587, "step": 31050 }, { "epoch": 0.44766008964731996, "grad_norm": 1.6214048275098112, "learning_rate": 6.7487563964895066e-06, "loss": 0.4427, "step": 31060 }, { "epoch": 0.4478042171713532, "grad_norm": 1.7106508417695856, "learning_rate": 6.746399521464386e-06, "loss": 0.4448, "step": 31070 }, { "epoch": 0.44794834469538647, "grad_norm": 1.677041249989253, "learning_rate": 6.744042204398921e-06, "loss": 0.4373, "step": 31080 }, { "epoch": 0.4480924722194197, "grad_norm": 1.534825129612206, "learning_rate": 6.741684445889782e-06, "loss": 0.4644, "step": 31090 }, { "epoch": 0.44823659974345303, "grad_norm": 1.7370526381780456, "learning_rate": 6.7393262465337575e-06, "loss": 0.4827, "step": 31100 }, { "epoch": 0.4483807272674863, "grad_norm": 1.344484849308861, "learning_rate": 6.736967606927738e-06, "loss": 0.4831, "step": 31110 }, { "epoch": 0.44852485479151954, "grad_norm": 1.6026455891618876, "learning_rate": 6.734608527668736e-06, "loss": 0.4921, "step": 31120 }, { "epoch": 0.4486689823155528, "grad_norm": 1.4128878712000579, "learning_rate": 6.732249009353868e-06, "loss": 0.4866, "step": 31130 }, { "epoch": 0.44881310983958606, "grad_norm": 1.5286339931516773, "learning_rate": 6.729889052580365e-06, "loss": 0.4565, "step": 31140 }, { "epoch": 0.4489572373636193, "grad_norm": 1.5471375542598234, "learning_rate": 6.727528657945569e-06, "loss": 0.4568, "step": 31150 }, { "epoch": 0.4491013648876526, "grad_norm": 1.529873297367653, "learning_rate": 6.7251678260469286e-06, "loss": 0.4594, "step": 31160 }, { "epoch": 0.4492454924116859, "grad_norm": 1.6271097680062314, "learning_rate": 6.7228065574820085e-06, "loss": 0.5191, "step": 31170 }, { "epoch": 0.44938961993571913, "grad_norm": 1.3974242120090568, "learning_rate": 6.720444852848484e-06, "loss": 0.4866, "step": 31180 }, { "epoch": 0.4495337474597524, "grad_norm": 1.6429568520524207, "learning_rate": 6.718082712744134e-06, "loss": 0.4569, "step": 31190 }, { "epoch": 0.44967787498378564, "grad_norm": 1.9152873839405122, "learning_rate": 6.715720137766855e-06, "loss": 0.4848, "step": 31200 }, { "epoch": 0.4498220025078189, "grad_norm": 1.8556494224111173, "learning_rate": 6.713357128514651e-06, "loss": 0.4869, "step": 31210 }, { "epoch": 0.4499661300318522, "grad_norm": 1.930650257078906, "learning_rate": 6.710993685585633e-06, "loss": 0.4835, "step": 31220 }, { "epoch": 0.45011025755588546, "grad_norm": 1.414525435759156, "learning_rate": 6.708629809578028e-06, "loss": 0.455, "step": 31230 }, { "epoch": 0.4502543850799187, "grad_norm": 1.8035311549791309, "learning_rate": 6.7062655010901656e-06, "loss": 0.4757, "step": 31240 }, { "epoch": 0.450398512603952, "grad_norm": 1.524771281668877, "learning_rate": 6.703900760720492e-06, "loss": 0.4835, "step": 31250 }, { "epoch": 0.45054264012798523, "grad_norm": 1.317843678605566, "learning_rate": 6.701535589067555e-06, "loss": 0.5053, "step": 31260 }, { "epoch": 0.4506867676520185, "grad_norm": 1.437076830349391, "learning_rate": 6.699169986730018e-06, "loss": 0.4859, "step": 31270 }, { "epoch": 0.4508308951760518, "grad_norm": 1.7662886724572844, "learning_rate": 6.696803954306654e-06, "loss": 0.4703, "step": 31280 }, { "epoch": 0.45097502270008505, "grad_norm": 1.6573899652317252, "learning_rate": 6.694437492396334e-06, "loss": 0.4759, "step": 31290 }, { "epoch": 0.4511191502241183, "grad_norm": 1.6000034915120969, "learning_rate": 6.692070601598053e-06, "loss": 0.4489, "step": 31300 }, { "epoch": 0.45126327774815156, "grad_norm": 1.3792178144036495, "learning_rate": 6.6897032825109035e-06, "loss": 0.4849, "step": 31310 }, { "epoch": 0.4514074052721848, "grad_norm": 1.5614981700734398, "learning_rate": 6.68733553573409e-06, "loss": 0.4748, "step": 31320 }, { "epoch": 0.45155153279621807, "grad_norm": 1.6533305189906957, "learning_rate": 6.684967361866927e-06, "loss": 0.4877, "step": 31330 }, { "epoch": 0.4516956603202514, "grad_norm": 1.4354970842880195, "learning_rate": 6.682598761508835e-06, "loss": 0.4731, "step": 31340 }, { "epoch": 0.45183978784428463, "grad_norm": 1.7484841364702073, "learning_rate": 6.680229735259341e-06, "loss": 0.4614, "step": 31350 }, { "epoch": 0.4519839153683179, "grad_norm": 1.461409054271455, "learning_rate": 6.677860283718085e-06, "loss": 0.4802, "step": 31360 }, { "epoch": 0.45212804289235115, "grad_norm": 1.5074787013530773, "learning_rate": 6.675490407484808e-06, "loss": 0.4469, "step": 31370 }, { "epoch": 0.4522721704163844, "grad_norm": 1.548181787028199, "learning_rate": 6.673120107159366e-06, "loss": 0.4669, "step": 31380 }, { "epoch": 0.45241629794041766, "grad_norm": 1.5409901104946968, "learning_rate": 6.670749383341712e-06, "loss": 0.4548, "step": 31390 }, { "epoch": 0.45256042546445097, "grad_norm": 1.3887267749030485, "learning_rate": 6.668378236631916e-06, "loss": 0.467, "step": 31400 }, { "epoch": 0.4527045529884842, "grad_norm": 1.5892533446915384, "learning_rate": 6.666006667630153e-06, "loss": 0.4676, "step": 31410 }, { "epoch": 0.4528486805125175, "grad_norm": 1.5047518593987599, "learning_rate": 6.6636346769367e-06, "loss": 0.4543, "step": 31420 }, { "epoch": 0.45299280803655073, "grad_norm": 1.6543825193503767, "learning_rate": 6.661262265151947e-06, "loss": 0.4809, "step": 31430 }, { "epoch": 0.453136935560584, "grad_norm": 1.39432680211292, "learning_rate": 6.658889432876386e-06, "loss": 0.4936, "step": 31440 }, { "epoch": 0.45328106308461724, "grad_norm": 1.7409201660803495, "learning_rate": 6.656516180710615e-06, "loss": 0.4657, "step": 31450 }, { "epoch": 0.45342519060865055, "grad_norm": 1.5096012588615182, "learning_rate": 6.654142509255343e-06, "loss": 0.4849, "step": 31460 }, { "epoch": 0.4535693181326838, "grad_norm": 1.538464139886356, "learning_rate": 6.651768419111382e-06, "loss": 0.4661, "step": 31470 }, { "epoch": 0.45371344565671706, "grad_norm": 1.3952349351710869, "learning_rate": 6.649393910879649e-06, "loss": 0.471, "step": 31480 }, { "epoch": 0.4538575731807503, "grad_norm": 1.4516632643757947, "learning_rate": 6.647018985161172e-06, "loss": 0.4831, "step": 31490 }, { "epoch": 0.4540017007047836, "grad_norm": 1.7988359178989008, "learning_rate": 6.644643642557076e-06, "loss": 0.4963, "step": 31500 }, { "epoch": 0.4541458282288169, "grad_norm": 1.3967555222128178, "learning_rate": 6.642267883668601e-06, "loss": 0.4605, "step": 31510 }, { "epoch": 0.45428995575285014, "grad_norm": 1.6498188076270421, "learning_rate": 6.639891709097083e-06, "loss": 0.4524, "step": 31520 }, { "epoch": 0.4544340832768834, "grad_norm": 1.596667543298379, "learning_rate": 6.637515119443971e-06, "loss": 0.4776, "step": 31530 }, { "epoch": 0.45457821080091665, "grad_norm": 1.645488374828854, "learning_rate": 6.6351381153108176e-06, "loss": 0.4612, "step": 31540 }, { "epoch": 0.4547223383249499, "grad_norm": 1.6729455958880826, "learning_rate": 6.6327606972992765e-06, "loss": 0.481, "step": 31550 }, { "epoch": 0.45486646584898316, "grad_norm": 1.3919844762571498, "learning_rate": 6.630382866011107e-06, "loss": 0.4667, "step": 31560 }, { "epoch": 0.45501059337301647, "grad_norm": 1.5710661913500725, "learning_rate": 6.628004622048177e-06, "loss": 0.5032, "step": 31570 }, { "epoch": 0.4551547208970497, "grad_norm": 1.51265437950114, "learning_rate": 6.625625966012456e-06, "loss": 0.4582, "step": 31580 }, { "epoch": 0.455298848421083, "grad_norm": 1.3354965440310993, "learning_rate": 6.6232468985060175e-06, "loss": 0.4906, "step": 31590 }, { "epoch": 0.45544297594511624, "grad_norm": 1.742998274714732, "learning_rate": 6.62086742013104e-06, "loss": 0.4843, "step": 31600 }, { "epoch": 0.4555871034691495, "grad_norm": 1.621034063248442, "learning_rate": 6.618487531489805e-06, "loss": 0.4847, "step": 31610 }, { "epoch": 0.45573123099318275, "grad_norm": 1.7023539867492983, "learning_rate": 6.6161072331847e-06, "loss": 0.4939, "step": 31620 }, { "epoch": 0.45587535851721606, "grad_norm": 1.5463783983413426, "learning_rate": 6.613726525818214e-06, "loss": 0.4785, "step": 31630 }, { "epoch": 0.4560194860412493, "grad_norm": 1.5305878097751882, "learning_rate": 6.61134540999294e-06, "loss": 0.4584, "step": 31640 }, { "epoch": 0.45616361356528257, "grad_norm": 1.4999500169282007, "learning_rate": 6.6089638863115745e-06, "loss": 0.4691, "step": 31650 }, { "epoch": 0.4563077410893158, "grad_norm": 1.6317291610615514, "learning_rate": 6.606581955376918e-06, "loss": 0.4654, "step": 31660 }, { "epoch": 0.4564518686133491, "grad_norm": 1.2178687355613524, "learning_rate": 6.604199617791877e-06, "loss": 0.4564, "step": 31670 }, { "epoch": 0.45659599613738233, "grad_norm": 1.544634329690222, "learning_rate": 6.601816874159449e-06, "loss": 0.4935, "step": 31680 }, { "epoch": 0.45674012366141564, "grad_norm": 1.7123625217554284, "learning_rate": 6.599433725082751e-06, "loss": 0.4372, "step": 31690 }, { "epoch": 0.4568842511854489, "grad_norm": 1.4839659559650362, "learning_rate": 6.597050171164991e-06, "loss": 0.4917, "step": 31700 }, { "epoch": 0.45702837870948215, "grad_norm": 1.3696663320097864, "learning_rate": 6.5946662130094816e-06, "loss": 0.4663, "step": 31710 }, { "epoch": 0.4571725062335154, "grad_norm": 1.4899397281657318, "learning_rate": 6.5922818512196415e-06, "loss": 0.4713, "step": 31720 }, { "epoch": 0.45731663375754866, "grad_norm": 1.580251166568806, "learning_rate": 6.589897086398989e-06, "loss": 0.4615, "step": 31730 }, { "epoch": 0.4574607612815819, "grad_norm": 1.6231452445551424, "learning_rate": 6.587511919151139e-06, "loss": 0.4809, "step": 31740 }, { "epoch": 0.45760488880561523, "grad_norm": 1.8379867004304613, "learning_rate": 6.585126350079823e-06, "loss": 0.4535, "step": 31750 }, { "epoch": 0.4577490163296485, "grad_norm": 1.5451158556217777, "learning_rate": 6.582740379788857e-06, "loss": 0.4363, "step": 31760 }, { "epoch": 0.45789314385368174, "grad_norm": 1.3974737974127538, "learning_rate": 6.58035400888217e-06, "loss": 0.5008, "step": 31770 }, { "epoch": 0.458037271377715, "grad_norm": 1.6124873442885723, "learning_rate": 6.57796723796379e-06, "loss": 0.4807, "step": 31780 }, { "epoch": 0.45818139890174825, "grad_norm": 1.3462139755850269, "learning_rate": 6.575580067637839e-06, "loss": 0.4643, "step": 31790 }, { "epoch": 0.4583255264257815, "grad_norm": 1.5456921135326458, "learning_rate": 6.573192498508555e-06, "loss": 0.4671, "step": 31800 }, { "epoch": 0.4584696539498148, "grad_norm": 1.5076717510879205, "learning_rate": 6.570804531180262e-06, "loss": 0.4765, "step": 31810 }, { "epoch": 0.45861378147384807, "grad_norm": 1.912891148447214, "learning_rate": 6.568416166257392e-06, "loss": 0.5023, "step": 31820 }, { "epoch": 0.4587579089978813, "grad_norm": 1.2183862585876257, "learning_rate": 6.566027404344479e-06, "loss": 0.4569, "step": 31830 }, { "epoch": 0.4589020365219146, "grad_norm": 1.4681656461594619, "learning_rate": 6.563638246046151e-06, "loss": 0.4539, "step": 31840 }, { "epoch": 0.45904616404594784, "grad_norm": 1.8787996186465146, "learning_rate": 6.561248691967144e-06, "loss": 0.4771, "step": 31850 }, { "epoch": 0.45919029156998115, "grad_norm": 1.369951461415793, "learning_rate": 6.558858742712288e-06, "loss": 0.4592, "step": 31860 }, { "epoch": 0.4593344190940144, "grad_norm": 1.5861802823149456, "learning_rate": 6.556468398886519e-06, "loss": 0.4609, "step": 31870 }, { "epoch": 0.45947854661804766, "grad_norm": 1.5382643088353858, "learning_rate": 6.554077661094867e-06, "loss": 0.4666, "step": 31880 }, { "epoch": 0.4596226741420809, "grad_norm": 1.2096106168103284, "learning_rate": 6.551686529942464e-06, "loss": 0.4789, "step": 31890 }, { "epoch": 0.45976680166611417, "grad_norm": 1.795446079869341, "learning_rate": 6.549295006034542e-06, "loss": 0.4706, "step": 31900 }, { "epoch": 0.4599109291901474, "grad_norm": 1.375855305958925, "learning_rate": 6.54690308997643e-06, "loss": 0.4577, "step": 31910 }, { "epoch": 0.46005505671418073, "grad_norm": 1.448652365425306, "learning_rate": 6.5445107823735645e-06, "loss": 0.4547, "step": 31920 }, { "epoch": 0.460199184238214, "grad_norm": 1.4479012162706375, "learning_rate": 6.5421180838314705e-06, "loss": 0.4786, "step": 31930 }, { "epoch": 0.46034331176224724, "grad_norm": 1.436529086656878, "learning_rate": 6.539724994955776e-06, "loss": 0.487, "step": 31940 }, { "epoch": 0.4604874392862805, "grad_norm": 2.73985264750567, "learning_rate": 6.537331516352211e-06, "loss": 0.4757, "step": 31950 }, { "epoch": 0.46063156681031375, "grad_norm": 1.4926387270999941, "learning_rate": 6.5349376486265995e-06, "loss": 0.4814, "step": 31960 }, { "epoch": 0.460775694334347, "grad_norm": 1.661457856685825, "learning_rate": 6.532543392384866e-06, "loss": 0.4801, "step": 31970 }, { "epoch": 0.4609198218583803, "grad_norm": 1.4665738823156211, "learning_rate": 6.530148748233035e-06, "loss": 0.4483, "step": 31980 }, { "epoch": 0.4610639493824136, "grad_norm": 1.5739260074571675, "learning_rate": 6.527753716777225e-06, "loss": 0.4719, "step": 31990 }, { "epoch": 0.46120807690644683, "grad_norm": 2.0556555545083977, "learning_rate": 6.525358298623656e-06, "loss": 0.5169, "step": 32000 }, { "epoch": 0.4613522044304801, "grad_norm": 1.604968063525497, "learning_rate": 6.522962494378646e-06, "loss": 0.4369, "step": 32010 }, { "epoch": 0.46149633195451334, "grad_norm": 1.7172767801254836, "learning_rate": 6.5205663046486075e-06, "loss": 0.4829, "step": 32020 }, { "epoch": 0.4616404594785466, "grad_norm": 1.4775605917865537, "learning_rate": 6.518169730040056e-06, "loss": 0.4476, "step": 32030 }, { "epoch": 0.4617845870025799, "grad_norm": 1.6418338939144768, "learning_rate": 6.515772771159596e-06, "loss": 0.4961, "step": 32040 }, { "epoch": 0.46192871452661316, "grad_norm": 1.4706105756231571, "learning_rate": 6.513375428613939e-06, "loss": 0.4774, "step": 32050 }, { "epoch": 0.4620728420506464, "grad_norm": 1.4830465470252467, "learning_rate": 6.5109777030098875e-06, "loss": 0.4597, "step": 32060 }, { "epoch": 0.46221696957467967, "grad_norm": 1.6495419455689675, "learning_rate": 6.5085795949543394e-06, "loss": 0.4764, "step": 32070 }, { "epoch": 0.4623610970987129, "grad_norm": 1.8613154243969456, "learning_rate": 6.5061811050542965e-06, "loss": 0.4815, "step": 32080 }, { "epoch": 0.4625052246227462, "grad_norm": 1.4451771499268267, "learning_rate": 6.503782233916852e-06, "loss": 0.4636, "step": 32090 }, { "epoch": 0.4626493521467795, "grad_norm": 1.6061494063096702, "learning_rate": 6.501382982149197e-06, "loss": 0.4924, "step": 32100 }, { "epoch": 0.46279347967081275, "grad_norm": 1.898404117467885, "learning_rate": 6.4989833503586176e-06, "loss": 0.4698, "step": 32110 }, { "epoch": 0.462937607194846, "grad_norm": 1.474730862194186, "learning_rate": 6.496583339152497e-06, "loss": 0.4638, "step": 32120 }, { "epoch": 0.46308173471887926, "grad_norm": 1.512779197228842, "learning_rate": 6.494182949138315e-06, "loss": 0.4905, "step": 32130 }, { "epoch": 0.4632258622429125, "grad_norm": 1.9864198223351512, "learning_rate": 6.491782180923649e-06, "loss": 0.4823, "step": 32140 }, { "epoch": 0.46336998976694577, "grad_norm": 1.8622715999395658, "learning_rate": 6.489381035116167e-06, "loss": 0.465, "step": 32150 }, { "epoch": 0.4635141172909791, "grad_norm": 1.9112067950795735, "learning_rate": 6.486979512323638e-06, "loss": 0.4878, "step": 32160 }, { "epoch": 0.46365824481501233, "grad_norm": 1.760222258539101, "learning_rate": 6.4845776131539225e-06, "loss": 0.4636, "step": 32170 }, { "epoch": 0.4638023723390456, "grad_norm": 1.4717148380577911, "learning_rate": 6.4821753382149775e-06, "loss": 0.4629, "step": 32180 }, { "epoch": 0.46394649986307884, "grad_norm": 1.6899672361199065, "learning_rate": 6.479772688114857e-06, "loss": 0.4912, "step": 32190 }, { "epoch": 0.4640906273871121, "grad_norm": 1.4659218680902293, "learning_rate": 6.477369663461708e-06, "loss": 0.4942, "step": 32200 }, { "epoch": 0.4642347549111454, "grad_norm": 1.5059961082284836, "learning_rate": 6.4749662648637714e-06, "loss": 0.482, "step": 32210 }, { "epoch": 0.46437888243517866, "grad_norm": 1.4267119485119277, "learning_rate": 6.472562492929386e-06, "loss": 0.4715, "step": 32220 }, { "epoch": 0.4645230099592119, "grad_norm": 1.429154152320503, "learning_rate": 6.470158348266979e-06, "loss": 0.4614, "step": 32230 }, { "epoch": 0.4646671374832452, "grad_norm": 1.4457580156462502, "learning_rate": 6.4677538314850814e-06, "loss": 0.4919, "step": 32240 }, { "epoch": 0.46481126500727843, "grad_norm": 1.5577489800048598, "learning_rate": 6.465348943192311e-06, "loss": 0.5077, "step": 32250 }, { "epoch": 0.4649553925313117, "grad_norm": 1.3626310993652773, "learning_rate": 6.46294368399738e-06, "loss": 0.4542, "step": 32260 }, { "epoch": 0.465099520055345, "grad_norm": 1.55032417755583, "learning_rate": 6.4605380545091e-06, "loss": 0.4876, "step": 32270 }, { "epoch": 0.46524364757937825, "grad_norm": 1.4955715593126517, "learning_rate": 6.458132055336368e-06, "loss": 0.4485, "step": 32280 }, { "epoch": 0.4653877751034115, "grad_norm": 1.5678554500496038, "learning_rate": 6.455725687088182e-06, "loss": 0.4889, "step": 32290 }, { "epoch": 0.46553190262744476, "grad_norm": 1.810115731739358, "learning_rate": 6.45331895037363e-06, "loss": 0.4564, "step": 32300 }, { "epoch": 0.465676030151478, "grad_norm": 1.2329295984716235, "learning_rate": 6.450911845801891e-06, "loss": 0.4807, "step": 32310 }, { "epoch": 0.46582015767551127, "grad_norm": 1.5256515269688056, "learning_rate": 6.448504373982245e-06, "loss": 0.469, "step": 32320 }, { "epoch": 0.4659642851995446, "grad_norm": 1.3405185026122457, "learning_rate": 6.446096535524056e-06, "loss": 0.4499, "step": 32330 }, { "epoch": 0.46610841272357784, "grad_norm": 1.6599780526433408, "learning_rate": 6.443688331036786e-06, "loss": 0.4733, "step": 32340 }, { "epoch": 0.4662525402476111, "grad_norm": 1.4633285583560265, "learning_rate": 6.441279761129987e-06, "loss": 0.4762, "step": 32350 }, { "epoch": 0.46639666777164435, "grad_norm": 1.6840111224871037, "learning_rate": 6.438870826413307e-06, "loss": 0.4863, "step": 32360 }, { "epoch": 0.4665407952956776, "grad_norm": 1.3300990961653008, "learning_rate": 6.4364615274964824e-06, "loss": 0.4723, "step": 32370 }, { "epoch": 0.46668492281971086, "grad_norm": 1.542681852866588, "learning_rate": 6.4340518649893445e-06, "loss": 0.467, "step": 32380 }, { "epoch": 0.46682905034374417, "grad_norm": 1.514527473492163, "learning_rate": 6.431641839501815e-06, "loss": 0.4788, "step": 32390 }, { "epoch": 0.4669731778677774, "grad_norm": 1.6018762511817537, "learning_rate": 6.429231451643907e-06, "loss": 0.4845, "step": 32400 }, { "epoch": 0.4671173053918107, "grad_norm": 1.3896182501026864, "learning_rate": 6.426820702025728e-06, "loss": 0.4862, "step": 32410 }, { "epoch": 0.46726143291584393, "grad_norm": 1.661741389159456, "learning_rate": 6.4244095912574764e-06, "loss": 0.4749, "step": 32420 }, { "epoch": 0.4674055604398772, "grad_norm": 1.5012501431828147, "learning_rate": 6.4219981199494385e-06, "loss": 0.4824, "step": 32430 }, { "epoch": 0.46754968796391044, "grad_norm": 1.5983836349386014, "learning_rate": 6.419586288711995e-06, "loss": 0.488, "step": 32440 }, { "epoch": 0.46769381548794375, "grad_norm": 1.5437559903291003, "learning_rate": 6.417174098155621e-06, "loss": 0.4761, "step": 32450 }, { "epoch": 0.467837943011977, "grad_norm": 1.3403853263372403, "learning_rate": 6.414761548890871e-06, "loss": 0.487, "step": 32460 }, { "epoch": 0.46798207053601026, "grad_norm": 1.2929188432446326, "learning_rate": 6.4123486415284056e-06, "loss": 0.4642, "step": 32470 }, { "epoch": 0.4681261980600435, "grad_norm": 1.512104880288404, "learning_rate": 6.409935376678966e-06, "loss": 0.4735, "step": 32480 }, { "epoch": 0.4682703255840768, "grad_norm": 1.804023490280619, "learning_rate": 6.407521754953384e-06, "loss": 0.4999, "step": 32490 }, { "epoch": 0.46841445310811003, "grad_norm": 1.5702450571046032, "learning_rate": 6.405107776962587e-06, "loss": 0.4886, "step": 32500 }, { "epoch": 0.46855858063214334, "grad_norm": 1.6329780844799744, "learning_rate": 6.402693443317587e-06, "loss": 0.4971, "step": 32510 }, { "epoch": 0.4687027081561766, "grad_norm": 1.481562091510773, "learning_rate": 6.4002787546294905e-06, "loss": 0.4605, "step": 32520 }, { "epoch": 0.46884683568020985, "grad_norm": 1.5190839770849098, "learning_rate": 6.397863711509493e-06, "loss": 0.4856, "step": 32530 }, { "epoch": 0.4689909632042431, "grad_norm": 1.403042364560666, "learning_rate": 6.395448314568877e-06, "loss": 0.4751, "step": 32540 }, { "epoch": 0.46913509072827636, "grad_norm": 1.4691279042587828, "learning_rate": 6.393032564419016e-06, "loss": 0.4842, "step": 32550 }, { "epoch": 0.46927921825230967, "grad_norm": 1.505114206848178, "learning_rate": 6.390616461671375e-06, "loss": 0.4699, "step": 32560 }, { "epoch": 0.4694233457763429, "grad_norm": 1.5341248783620112, "learning_rate": 6.388200006937503e-06, "loss": 0.4944, "step": 32570 }, { "epoch": 0.4695674733003762, "grad_norm": 1.4715729555024897, "learning_rate": 6.385783200829045e-06, "loss": 0.4691, "step": 32580 }, { "epoch": 0.46971160082440944, "grad_norm": 1.5404595848848444, "learning_rate": 6.383366043957732e-06, "loss": 0.4684, "step": 32590 }, { "epoch": 0.4698557283484427, "grad_norm": 1.6680704880823731, "learning_rate": 6.380948536935379e-06, "loss": 0.51, "step": 32600 }, { "epoch": 0.46999985587247595, "grad_norm": 1.3894922204133011, "learning_rate": 6.378530680373899e-06, "loss": 0.4715, "step": 32610 }, { "epoch": 0.47014398339650926, "grad_norm": 1.5106014718802414, "learning_rate": 6.376112474885283e-06, "loss": 0.4727, "step": 32620 }, { "epoch": 0.4702881109205425, "grad_norm": 1.5476921227731266, "learning_rate": 6.373693921081621e-06, "loss": 0.4833, "step": 32630 }, { "epoch": 0.47043223844457577, "grad_norm": 1.4586113823862241, "learning_rate": 6.371275019575082e-06, "loss": 0.4837, "step": 32640 }, { "epoch": 0.470576365968609, "grad_norm": 1.4612736178776822, "learning_rate": 6.368855770977928e-06, "loss": 0.4499, "step": 32650 }, { "epoch": 0.4707204934926423, "grad_norm": 1.3350367257830604, "learning_rate": 6.366436175902509e-06, "loss": 0.4688, "step": 32660 }, { "epoch": 0.47086462101667553, "grad_norm": 1.6103942230303874, "learning_rate": 6.3640162349612585e-06, "loss": 0.4827, "step": 32670 }, { "epoch": 0.47100874854070884, "grad_norm": 1.3453657928102682, "learning_rate": 6.361595948766703e-06, "loss": 0.4491, "step": 32680 }, { "epoch": 0.4711528760647421, "grad_norm": 1.6614158853596668, "learning_rate": 6.359175317931454e-06, "loss": 0.4334, "step": 32690 }, { "epoch": 0.47129700358877535, "grad_norm": 2.2405904566722215, "learning_rate": 6.356754343068209e-06, "loss": 0.4736, "step": 32700 }, { "epoch": 0.4714411311128086, "grad_norm": 1.6163368806701053, "learning_rate": 6.354333024789754e-06, "loss": 0.4754, "step": 32710 }, { "epoch": 0.47158525863684186, "grad_norm": 1.5095935570816141, "learning_rate": 6.351911363708961e-06, "loss": 0.4627, "step": 32720 }, { "epoch": 0.4717293861608751, "grad_norm": 1.3391216142629325, "learning_rate": 6.349489360438787e-06, "loss": 0.459, "step": 32730 }, { "epoch": 0.47187351368490843, "grad_norm": 1.7228316458453656, "learning_rate": 6.3470670155922845e-06, "loss": 0.4509, "step": 32740 }, { "epoch": 0.4720176412089417, "grad_norm": 1.7717939788736299, "learning_rate": 6.344644329782581e-06, "loss": 0.4929, "step": 32750 }, { "epoch": 0.47216176873297494, "grad_norm": 2.2057749589402302, "learning_rate": 6.342221303622896e-06, "loss": 0.4325, "step": 32760 }, { "epoch": 0.4723058962570082, "grad_norm": 1.4461213180260846, "learning_rate": 6.339797937726535e-06, "loss": 0.4768, "step": 32770 }, { "epoch": 0.47245002378104145, "grad_norm": 1.4458047929310907, "learning_rate": 6.337374232706887e-06, "loss": 0.4608, "step": 32780 }, { "epoch": 0.4725941513050747, "grad_norm": 1.3933644698209264, "learning_rate": 6.334950189177432e-06, "loss": 0.4506, "step": 32790 }, { "epoch": 0.472738278829108, "grad_norm": 1.3859441415986367, "learning_rate": 6.332525807751731e-06, "loss": 0.4679, "step": 32800 }, { "epoch": 0.47288240635314127, "grad_norm": 1.61540954066461, "learning_rate": 6.330101089043429e-06, "loss": 0.4414, "step": 32810 }, { "epoch": 0.4730265338771745, "grad_norm": 1.3247382035890805, "learning_rate": 6.3276760336662645e-06, "loss": 0.4558, "step": 32820 }, { "epoch": 0.4731706614012078, "grad_norm": 1.69083633315459, "learning_rate": 6.325250642234052e-06, "loss": 0.464, "step": 32830 }, { "epoch": 0.47331478892524104, "grad_norm": 1.5210558488706827, "learning_rate": 6.322824915360696e-06, "loss": 0.4822, "step": 32840 }, { "epoch": 0.4734589164492743, "grad_norm": 1.6982983131772504, "learning_rate": 6.320398853660186e-06, "loss": 0.4604, "step": 32850 }, { "epoch": 0.4736030439733076, "grad_norm": 1.3791747756897543, "learning_rate": 6.3179724577465925e-06, "loss": 0.4564, "step": 32860 }, { "epoch": 0.47374717149734086, "grad_norm": 1.2355605840337545, "learning_rate": 6.315545728234078e-06, "loss": 0.4862, "step": 32870 }, { "epoch": 0.4738912990213741, "grad_norm": 1.6144060319699316, "learning_rate": 6.313118665736881e-06, "loss": 0.4621, "step": 32880 }, { "epoch": 0.47403542654540737, "grad_norm": 1.5444509644049114, "learning_rate": 6.310691270869329e-06, "loss": 0.4799, "step": 32890 }, { "epoch": 0.4741795540694406, "grad_norm": 1.6113324418280746, "learning_rate": 6.308263544245832e-06, "loss": 0.4604, "step": 32900 }, { "epoch": 0.4743236815934739, "grad_norm": 1.4636934337912433, "learning_rate": 6.305835486480886e-06, "loss": 0.4851, "step": 32910 }, { "epoch": 0.4744678091175072, "grad_norm": 1.430965763515956, "learning_rate": 6.303407098189069e-06, "loss": 0.4763, "step": 32920 }, { "epoch": 0.47461193664154044, "grad_norm": 1.398925072975072, "learning_rate": 6.300978379985043e-06, "loss": 0.4524, "step": 32930 }, { "epoch": 0.4747560641655737, "grad_norm": 1.6766104094119696, "learning_rate": 6.298549332483552e-06, "loss": 0.4635, "step": 32940 }, { "epoch": 0.47490019168960695, "grad_norm": 1.8207236076682833, "learning_rate": 6.2961199562994275e-06, "loss": 0.4671, "step": 32950 }, { "epoch": 0.4750443192136402, "grad_norm": 1.317167585612634, "learning_rate": 6.293690252047581e-06, "loss": 0.4699, "step": 32960 }, { "epoch": 0.4751884467376735, "grad_norm": 1.6877659704263275, "learning_rate": 6.291260220343007e-06, "loss": 0.4731, "step": 32970 }, { "epoch": 0.4753325742617068, "grad_norm": 1.7275972911627322, "learning_rate": 6.288829861800784e-06, "loss": 0.5028, "step": 32980 }, { "epoch": 0.47547670178574003, "grad_norm": 2.0383713438935493, "learning_rate": 6.286399177036071e-06, "loss": 0.4722, "step": 32990 }, { "epoch": 0.4756208293097733, "grad_norm": 1.6039566263143656, "learning_rate": 6.283968166664115e-06, "loss": 0.4766, "step": 33000 }, { "epoch": 0.47576495683380654, "grad_norm": 1.3442858802824424, "learning_rate": 6.281536831300238e-06, "loss": 0.4543, "step": 33010 }, { "epoch": 0.4759090843578398, "grad_norm": 1.4976471139289518, "learning_rate": 6.2791051715598515e-06, "loss": 0.4813, "step": 33020 }, { "epoch": 0.4760532118818731, "grad_norm": 1.4187202583067728, "learning_rate": 6.276673188058443e-06, "loss": 0.4568, "step": 33030 }, { "epoch": 0.47619733940590636, "grad_norm": 1.3597018447476963, "learning_rate": 6.274240881411586e-06, "loss": 0.4793, "step": 33040 }, { "epoch": 0.4763414669299396, "grad_norm": 1.5448163346360717, "learning_rate": 6.2718082522349344e-06, "loss": 0.4806, "step": 33050 }, { "epoch": 0.47648559445397287, "grad_norm": 1.6167065185419345, "learning_rate": 6.269375301144222e-06, "loss": 0.4696, "step": 33060 }, { "epoch": 0.4766297219780061, "grad_norm": 1.6978369187548785, "learning_rate": 6.266942028755269e-06, "loss": 0.473, "step": 33070 }, { "epoch": 0.4767738495020394, "grad_norm": 1.3865025031949951, "learning_rate": 6.26450843568397e-06, "loss": 0.4586, "step": 33080 }, { "epoch": 0.4769179770260727, "grad_norm": 1.4238411873232115, "learning_rate": 6.262074522546308e-06, "loss": 0.4839, "step": 33090 }, { "epoch": 0.47706210455010595, "grad_norm": 1.797018776940375, "learning_rate": 6.259640289958342e-06, "loss": 0.4546, "step": 33100 }, { "epoch": 0.4772062320741392, "grad_norm": 1.496179827847296, "learning_rate": 6.257205738536212e-06, "loss": 0.4608, "step": 33110 }, { "epoch": 0.47735035959817246, "grad_norm": 1.5242619872017433, "learning_rate": 6.254770868896141e-06, "loss": 0.4719, "step": 33120 }, { "epoch": 0.4774944871222057, "grad_norm": 1.5986771483647155, "learning_rate": 6.252335681654433e-06, "loss": 0.4461, "step": 33130 }, { "epoch": 0.47763861464623897, "grad_norm": 1.4013272360173046, "learning_rate": 6.249900177427471e-06, "loss": 0.493, "step": 33140 }, { "epoch": 0.4777827421702723, "grad_norm": 1.6156322123577502, "learning_rate": 6.247464356831716e-06, "loss": 0.4733, "step": 33150 }, { "epoch": 0.47792686969430553, "grad_norm": 1.5519462922168366, "learning_rate": 6.245028220483713e-06, "loss": 0.4904, "step": 33160 }, { "epoch": 0.4780709972183388, "grad_norm": 1.444275402946011, "learning_rate": 6.242591769000085e-06, "loss": 0.4875, "step": 33170 }, { "epoch": 0.47821512474237204, "grad_norm": 1.5734198990157033, "learning_rate": 6.240155002997535e-06, "loss": 0.474, "step": 33180 }, { "epoch": 0.4783592522664053, "grad_norm": 1.4714598589398584, "learning_rate": 6.2377179230928445e-06, "loss": 0.4984, "step": 33190 }, { "epoch": 0.47850337979043855, "grad_norm": 1.6135307908316487, "learning_rate": 6.235280529902877e-06, "loss": 0.483, "step": 33200 }, { "epoch": 0.47864750731447187, "grad_norm": 1.36923847968873, "learning_rate": 6.2328428240445745e-06, "loss": 0.4538, "step": 33210 }, { "epoch": 0.4787916348385051, "grad_norm": 2.3425130500225926, "learning_rate": 6.2304048061349564e-06, "loss": 0.4583, "step": 33220 }, { "epoch": 0.4789357623625384, "grad_norm": 1.3618268262802324, "learning_rate": 6.227966476791121e-06, "loss": 0.4689, "step": 33230 }, { "epoch": 0.47907988988657163, "grad_norm": 1.3331588195653032, "learning_rate": 6.225527836630248e-06, "loss": 0.4741, "step": 33240 }, { "epoch": 0.4792240174106049, "grad_norm": 1.6460044393420512, "learning_rate": 6.223088886269595e-06, "loss": 0.4589, "step": 33250 }, { "epoch": 0.47936814493463814, "grad_norm": 1.5729121941653692, "learning_rate": 6.220649626326498e-06, "loss": 0.4773, "step": 33260 }, { "epoch": 0.47951227245867145, "grad_norm": 1.784075071289667, "learning_rate": 6.218210057418369e-06, "loss": 0.4583, "step": 33270 }, { "epoch": 0.4796563999827047, "grad_norm": 1.6446985143025128, "learning_rate": 6.2157701801627e-06, "loss": 0.4835, "step": 33280 }, { "epoch": 0.47980052750673796, "grad_norm": 1.5180704272264063, "learning_rate": 6.213329995177062e-06, "loss": 0.4563, "step": 33290 }, { "epoch": 0.4799446550307712, "grad_norm": 1.5933532852272083, "learning_rate": 6.210889503079104e-06, "loss": 0.4689, "step": 33300 }, { "epoch": 0.48008878255480447, "grad_norm": 1.5213040706219703, "learning_rate": 6.20844870448655e-06, "loss": 0.493, "step": 33310 }, { "epoch": 0.4802329100788378, "grad_norm": 1.2182895723349851, "learning_rate": 6.206007600017204e-06, "loss": 0.46, "step": 33320 }, { "epoch": 0.48037703760287104, "grad_norm": 1.6270954308853631, "learning_rate": 6.203566190288948e-06, "loss": 0.4862, "step": 33330 }, { "epoch": 0.4805211651269043, "grad_norm": 1.7007658911718744, "learning_rate": 6.201124475919738e-06, "loss": 0.4464, "step": 33340 }, { "epoch": 0.48066529265093755, "grad_norm": 1.5148247570253228, "learning_rate": 6.198682457527609e-06, "loss": 0.4486, "step": 33350 }, { "epoch": 0.4808094201749708, "grad_norm": 1.6336158003876948, "learning_rate": 6.1962401357306765e-06, "loss": 0.4694, "step": 33360 }, { "epoch": 0.48095354769900406, "grad_norm": 1.4052025394492922, "learning_rate": 6.193797511147125e-06, "loss": 0.4707, "step": 33370 }, { "epoch": 0.48109767522303737, "grad_norm": 1.591711530315452, "learning_rate": 6.191354584395223e-06, "loss": 0.4627, "step": 33380 }, { "epoch": 0.4812418027470706, "grad_norm": 1.509393703488179, "learning_rate": 6.188911356093312e-06, "loss": 0.4638, "step": 33390 }, { "epoch": 0.4813859302711039, "grad_norm": 1.4484473948449157, "learning_rate": 6.186467826859808e-06, "loss": 0.4694, "step": 33400 }, { "epoch": 0.48153005779513713, "grad_norm": 1.3929114711316022, "learning_rate": 6.184023997313209e-06, "loss": 0.4971, "step": 33410 }, { "epoch": 0.4816741853191704, "grad_norm": 1.3331224130132315, "learning_rate": 6.181579868072083e-06, "loss": 0.4655, "step": 33420 }, { "epoch": 0.48181831284320364, "grad_norm": 1.650434736855968, "learning_rate": 6.1791354397550775e-06, "loss": 0.4415, "step": 33430 }, { "epoch": 0.48196244036723696, "grad_norm": 1.497268690967772, "learning_rate": 6.176690712980915e-06, "loss": 0.457, "step": 33440 }, { "epoch": 0.4821065678912702, "grad_norm": 1.7008426070171354, "learning_rate": 6.17424568836839e-06, "loss": 0.4666, "step": 33450 }, { "epoch": 0.48225069541530347, "grad_norm": 1.5225106419462218, "learning_rate": 6.17180036653638e-06, "loss": 0.4672, "step": 33460 }, { "epoch": 0.4823948229393367, "grad_norm": 1.2671611085633645, "learning_rate": 6.169354748103832e-06, "loss": 0.4834, "step": 33470 }, { "epoch": 0.48253895046337, "grad_norm": 1.7566456196973934, "learning_rate": 6.166908833689766e-06, "loss": 0.4856, "step": 33480 }, { "epoch": 0.48268307798740323, "grad_norm": 1.111642586044883, "learning_rate": 6.164462623913286e-06, "loss": 0.4466, "step": 33490 }, { "epoch": 0.48282720551143654, "grad_norm": 1.4977736619194058, "learning_rate": 6.162016119393559e-06, "loss": 0.4608, "step": 33500 }, { "epoch": 0.4829713330354698, "grad_norm": 1.5573218432916558, "learning_rate": 6.159569320749835e-06, "loss": 0.4733, "step": 33510 }, { "epoch": 0.48311546055950305, "grad_norm": 1.6157737945153463, "learning_rate": 6.157122228601439e-06, "loss": 0.4566, "step": 33520 }, { "epoch": 0.4832595880835363, "grad_norm": 1.4307740762103442, "learning_rate": 6.154674843567763e-06, "loss": 0.4646, "step": 33530 }, { "epoch": 0.48340371560756956, "grad_norm": 1.2720141683912531, "learning_rate": 6.152227166268279e-06, "loss": 0.4624, "step": 33540 }, { "epoch": 0.4835478431316028, "grad_norm": 1.634981714957599, "learning_rate": 6.149779197322533e-06, "loss": 0.4626, "step": 33550 }, { "epoch": 0.4836919706556361, "grad_norm": 1.4212568424848535, "learning_rate": 6.14733093735014e-06, "loss": 0.4783, "step": 33560 }, { "epoch": 0.4838360981796694, "grad_norm": 6.663468662139285, "learning_rate": 6.144882386970795e-06, "loss": 0.4984, "step": 33570 }, { "epoch": 0.48398022570370264, "grad_norm": 1.322616137771127, "learning_rate": 6.142433546804261e-06, "loss": 0.4604, "step": 33580 }, { "epoch": 0.4841243532277359, "grad_norm": 1.505227997432047, "learning_rate": 6.13998441747038e-06, "loss": 0.4716, "step": 33590 }, { "epoch": 0.48426848075176915, "grad_norm": 1.582361330702915, "learning_rate": 6.137534999589061e-06, "loss": 0.4562, "step": 33600 }, { "epoch": 0.4844126082758024, "grad_norm": 1.5841796627986564, "learning_rate": 6.135085293780291e-06, "loss": 0.4833, "step": 33610 }, { "epoch": 0.4845567357998357, "grad_norm": 1.6073231582314118, "learning_rate": 6.132635300664125e-06, "loss": 0.481, "step": 33620 }, { "epoch": 0.48470086332386897, "grad_norm": 1.450763103594595, "learning_rate": 6.130185020860696e-06, "loss": 0.4559, "step": 33630 }, { "epoch": 0.4848449908479022, "grad_norm": 1.5122035564849174, "learning_rate": 6.1277344549902066e-06, "loss": 0.4611, "step": 33640 }, { "epoch": 0.4849891183719355, "grad_norm": 1.6696903419097968, "learning_rate": 6.125283603672933e-06, "loss": 0.4806, "step": 33650 }, { "epoch": 0.48513324589596873, "grad_norm": 1.3507079571629885, "learning_rate": 6.122832467529223e-06, "loss": 0.4478, "step": 33660 }, { "epoch": 0.48527737342000205, "grad_norm": 1.5525531185222683, "learning_rate": 6.120381047179496e-06, "loss": 0.456, "step": 33670 }, { "epoch": 0.4854215009440353, "grad_norm": 1.6846136459576821, "learning_rate": 6.1179293432442425e-06, "loss": 0.4583, "step": 33680 }, { "epoch": 0.48556562846806856, "grad_norm": 1.5823209013595907, "learning_rate": 6.115477356344028e-06, "loss": 0.4901, "step": 33690 }, { "epoch": 0.4857097559921018, "grad_norm": 1.6687633749063358, "learning_rate": 6.113025087099489e-06, "loss": 0.4565, "step": 33700 }, { "epoch": 0.48585388351613507, "grad_norm": 1.530030253831995, "learning_rate": 6.110572536131328e-06, "loss": 0.4718, "step": 33710 }, { "epoch": 0.4859980110401683, "grad_norm": 1.5156852222127237, "learning_rate": 6.108119704060327e-06, "loss": 0.4817, "step": 33720 }, { "epoch": 0.48614213856420163, "grad_norm": 1.3746696704331713, "learning_rate": 6.105666591507333e-06, "loss": 0.4724, "step": 33730 }, { "epoch": 0.4862862660882349, "grad_norm": 1.5399957613199609, "learning_rate": 6.103213199093267e-06, "loss": 0.4607, "step": 33740 }, { "epoch": 0.48643039361226814, "grad_norm": 1.4554199929775795, "learning_rate": 6.100759527439122e-06, "loss": 0.4649, "step": 33750 }, { "epoch": 0.4865745211363014, "grad_norm": 1.4924018255663203, "learning_rate": 6.098305577165956e-06, "loss": 0.4764, "step": 33760 }, { "epoch": 0.48671864866033465, "grad_norm": 1.6463199670016677, "learning_rate": 6.0958513488949035e-06, "loss": 0.462, "step": 33770 }, { "epoch": 0.4868627761843679, "grad_norm": 1.5707277231512602, "learning_rate": 6.093396843247167e-06, "loss": 0.4687, "step": 33780 }, { "epoch": 0.4870069037084012, "grad_norm": 1.5201967955536912, "learning_rate": 6.090942060844016e-06, "loss": 0.4353, "step": 33790 }, { "epoch": 0.4871510312324345, "grad_norm": 1.6726714482871485, "learning_rate": 6.0884870023068e-06, "loss": 0.4732, "step": 33800 }, { "epoch": 0.48729515875646773, "grad_norm": 1.4183084650585034, "learning_rate": 6.086031668256925e-06, "loss": 0.4949, "step": 33810 }, { "epoch": 0.487439286280501, "grad_norm": 1.4905876287894497, "learning_rate": 6.083576059315877e-06, "loss": 0.4659, "step": 33820 }, { "epoch": 0.48758341380453424, "grad_norm": 1.59259631444836, "learning_rate": 6.0811201761052096e-06, "loss": 0.4684, "step": 33830 }, { "epoch": 0.4877275413285675, "grad_norm": 1.4410239814233345, "learning_rate": 6.07866401924654e-06, "loss": 0.4767, "step": 33840 }, { "epoch": 0.4878716688526008, "grad_norm": 1.5057455085728049, "learning_rate": 6.076207589361561e-06, "loss": 0.4684, "step": 33850 }, { "epoch": 0.48801579637663406, "grad_norm": 1.6429511259758618, "learning_rate": 6.073750887072034e-06, "loss": 0.4555, "step": 33860 }, { "epoch": 0.4881599239006673, "grad_norm": 1.775666308308627, "learning_rate": 6.071293912999785e-06, "loss": 0.4599, "step": 33870 }, { "epoch": 0.48830405142470057, "grad_norm": 1.3585948195774953, "learning_rate": 6.068836667766714e-06, "loss": 0.4813, "step": 33880 }, { "epoch": 0.4884481789487338, "grad_norm": 1.3139255393566278, "learning_rate": 6.066379151994787e-06, "loss": 0.4658, "step": 33890 }, { "epoch": 0.4885923064727671, "grad_norm": 1.8450531090985636, "learning_rate": 6.063921366306035e-06, "loss": 0.5002, "step": 33900 }, { "epoch": 0.4887364339968004, "grad_norm": 1.512162357399115, "learning_rate": 6.061463311322566e-06, "loss": 0.4771, "step": 33910 }, { "epoch": 0.48888056152083365, "grad_norm": 1.6841343641360635, "learning_rate": 6.059004987666549e-06, "loss": 0.4875, "step": 33920 }, { "epoch": 0.4890246890448669, "grad_norm": 1.558065353023061, "learning_rate": 6.056546395960223e-06, "loss": 0.4632, "step": 33930 }, { "epoch": 0.48916881656890016, "grad_norm": 1.7191172396821335, "learning_rate": 6.054087536825897e-06, "loss": 0.4641, "step": 33940 }, { "epoch": 0.4893129440929334, "grad_norm": 1.4567292938927334, "learning_rate": 6.0516284108859416e-06, "loss": 0.4342, "step": 33950 }, { "epoch": 0.48945707161696667, "grad_norm": 1.6300438019851047, "learning_rate": 6.049169018762803e-06, "loss": 0.472, "step": 33960 }, { "epoch": 0.489601199141, "grad_norm": 1.5742739009300668, "learning_rate": 6.046709361078989e-06, "loss": 0.4581, "step": 33970 }, { "epoch": 0.48974532666503323, "grad_norm": 1.4182871682777611, "learning_rate": 6.044249438457076e-06, "loss": 0.4584, "step": 33980 }, { "epoch": 0.4898894541890665, "grad_norm": 1.473323910563921, "learning_rate": 6.041789251519711e-06, "loss": 0.4441, "step": 33990 }, { "epoch": 0.49003358171309974, "grad_norm": 1.6851459969812115, "learning_rate": 6.039328800889601e-06, "loss": 0.4721, "step": 34000 }, { "epoch": 0.490177709237133, "grad_norm": 1.6637946794142007, "learning_rate": 6.036868087189526e-06, "loss": 0.4665, "step": 34010 }, { "epoch": 0.4903218367611663, "grad_norm": 1.5924316037088613, "learning_rate": 6.034407111042328e-06, "loss": 0.4946, "step": 34020 }, { "epoch": 0.49046596428519956, "grad_norm": 1.5706299307326816, "learning_rate": 6.03194587307092e-06, "loss": 0.4831, "step": 34030 }, { "epoch": 0.4906100918092328, "grad_norm": 1.385352980062478, "learning_rate": 6.029484373898279e-06, "loss": 0.4657, "step": 34040 }, { "epoch": 0.4907542193332661, "grad_norm": 1.6322010993740188, "learning_rate": 6.027022614147445e-06, "loss": 0.4529, "step": 34050 }, { "epoch": 0.49089834685729933, "grad_norm": 1.5181528275283867, "learning_rate": 6.0245605944415285e-06, "loss": 0.4426, "step": 34060 }, { "epoch": 0.4910424743813326, "grad_norm": 1.552663003536218, "learning_rate": 6.022098315403706e-06, "loss": 0.4651, "step": 34070 }, { "epoch": 0.4911866019053659, "grad_norm": 1.6291980531364596, "learning_rate": 6.019635777657214e-06, "loss": 0.476, "step": 34080 }, { "epoch": 0.49133072942939915, "grad_norm": 1.3439272569776637, "learning_rate": 6.017172981825363e-06, "loss": 0.5019, "step": 34090 }, { "epoch": 0.4914748569534324, "grad_norm": 1.4498172066158923, "learning_rate": 6.0147099285315214e-06, "loss": 0.4955, "step": 34100 }, { "epoch": 0.49161898447746566, "grad_norm": 1.3720628567777722, "learning_rate": 6.012246618399125e-06, "loss": 0.4703, "step": 34110 }, { "epoch": 0.4917631120014989, "grad_norm": 2.2277377880965794, "learning_rate": 6.009783052051676e-06, "loss": 0.4617, "step": 34120 }, { "epoch": 0.49190723952553217, "grad_norm": 1.5542424344748764, "learning_rate": 6.0073192301127415e-06, "loss": 0.4845, "step": 34130 }, { "epoch": 0.4920513670495655, "grad_norm": 1.4488529430509267, "learning_rate": 6.004855153205952e-06, "loss": 0.4786, "step": 34140 }, { "epoch": 0.49219549457359874, "grad_norm": 1.514492277510532, "learning_rate": 6.002390821955002e-06, "loss": 0.4585, "step": 34150 }, { "epoch": 0.492339622097632, "grad_norm": 1.3595077366837036, "learning_rate": 5.9999262369836516e-06, "loss": 0.4537, "step": 34160 }, { "epoch": 0.49248374962166525, "grad_norm": 1.387211118204698, "learning_rate": 5.9974613989157245e-06, "loss": 0.4763, "step": 34170 }, { "epoch": 0.4926278771456985, "grad_norm": 1.3593556958205686, "learning_rate": 5.994996308375111e-06, "loss": 0.4656, "step": 34180 }, { "epoch": 0.49277200466973176, "grad_norm": 1.4777426821992352, "learning_rate": 5.99253096598576e-06, "loss": 0.4681, "step": 34190 }, { "epoch": 0.49291613219376507, "grad_norm": 1.6407829885488339, "learning_rate": 5.99006537237169e-06, "loss": 0.4699, "step": 34200 }, { "epoch": 0.4930602597177983, "grad_norm": 1.6839663293539713, "learning_rate": 5.987599528156977e-06, "loss": 0.4695, "step": 34210 }, { "epoch": 0.4932043872418316, "grad_norm": 2.0243781226685744, "learning_rate": 5.985133433965769e-06, "loss": 0.466, "step": 34220 }, { "epoch": 0.49334851476586483, "grad_norm": 1.420389549289776, "learning_rate": 5.9826670904222664e-06, "loss": 0.4863, "step": 34230 }, { "epoch": 0.4934926422898981, "grad_norm": 1.6077409428809843, "learning_rate": 5.980200498150741e-06, "loss": 0.489, "step": 34240 }, { "epoch": 0.49363676981393134, "grad_norm": 1.6577067645603607, "learning_rate": 5.977733657775526e-06, "loss": 0.4686, "step": 34250 }, { "epoch": 0.49378089733796465, "grad_norm": 1.3423568843368923, "learning_rate": 5.975266569921014e-06, "loss": 0.4805, "step": 34260 }, { "epoch": 0.4939250248619979, "grad_norm": 1.543571009518091, "learning_rate": 5.972799235211664e-06, "loss": 0.4597, "step": 34270 }, { "epoch": 0.49406915238603116, "grad_norm": 1.542772441141585, "learning_rate": 5.970331654271996e-06, "loss": 0.4711, "step": 34280 }, { "epoch": 0.4942132799100644, "grad_norm": 1.4695483336105015, "learning_rate": 5.967863827726591e-06, "loss": 0.4657, "step": 34290 }, { "epoch": 0.4943574074340977, "grad_norm": 1.5947113196863738, "learning_rate": 5.965395756200095e-06, "loss": 0.4857, "step": 34300 }, { "epoch": 0.49450153495813093, "grad_norm": 1.7010469839010218, "learning_rate": 5.962927440317214e-06, "loss": 0.4678, "step": 34310 }, { "epoch": 0.49464566248216424, "grad_norm": 1.8575071668733356, "learning_rate": 5.960458880702718e-06, "loss": 0.489, "step": 34320 }, { "epoch": 0.4947897900061975, "grad_norm": 1.531779373855491, "learning_rate": 5.957990077981434e-06, "loss": 0.4402, "step": 34330 }, { "epoch": 0.49493391753023075, "grad_norm": 1.6301541976536529, "learning_rate": 5.955521032778254e-06, "loss": 0.4586, "step": 34340 }, { "epoch": 0.495078045054264, "grad_norm": 1.7037715578874524, "learning_rate": 5.953051745718134e-06, "loss": 0.4781, "step": 34350 }, { "epoch": 0.49522217257829726, "grad_norm": 1.5030383419125604, "learning_rate": 5.950582217426085e-06, "loss": 0.4388, "step": 34360 }, { "epoch": 0.4953663001023305, "grad_norm": 1.7005554905199414, "learning_rate": 5.948112448527185e-06, "loss": 0.4731, "step": 34370 }, { "epoch": 0.4955104276263638, "grad_norm": 1.43014689449633, "learning_rate": 5.945642439646569e-06, "loss": 0.4546, "step": 34380 }, { "epoch": 0.4956545551503971, "grad_norm": 1.6878068999614169, "learning_rate": 5.94317219140943e-06, "loss": 0.478, "step": 34390 }, { "epoch": 0.49579868267443034, "grad_norm": 1.5626727942159164, "learning_rate": 5.940701704441033e-06, "loss": 0.4566, "step": 34400 }, { "epoch": 0.4959428101984636, "grad_norm": 1.4314867969468794, "learning_rate": 5.938230979366691e-06, "loss": 0.4589, "step": 34410 }, { "epoch": 0.49608693772249685, "grad_norm": 1.6562262853238219, "learning_rate": 5.935760016811783e-06, "loss": 0.4469, "step": 34420 }, { "epoch": 0.49623106524653016, "grad_norm": 2.6095738069157925, "learning_rate": 5.933288817401748e-06, "loss": 0.4684, "step": 34430 }, { "epoch": 0.4963751927705634, "grad_norm": 1.4724562530194814, "learning_rate": 5.930817381762083e-06, "loss": 0.4664, "step": 34440 }, { "epoch": 0.49651932029459667, "grad_norm": 1.7975358603008658, "learning_rate": 5.9283457105183465e-06, "loss": 0.4927, "step": 34450 }, { "epoch": 0.4966634478186299, "grad_norm": 1.4667661083142884, "learning_rate": 5.925873804296157e-06, "loss": 0.4575, "step": 34460 }, { "epoch": 0.4968075753426632, "grad_norm": 1.7392563186186227, "learning_rate": 5.9234016637211915e-06, "loss": 0.4826, "step": 34470 }, { "epoch": 0.49695170286669643, "grad_norm": 1.8196517770038048, "learning_rate": 5.920929289419186e-06, "loss": 0.4685, "step": 34480 }, { "epoch": 0.49709583039072974, "grad_norm": 1.6998444620201147, "learning_rate": 5.9184566820159364e-06, "loss": 0.4882, "step": 34490 }, { "epoch": 0.497239957914763, "grad_norm": 1.6906717259383155, "learning_rate": 5.915983842137296e-06, "loss": 0.4852, "step": 34500 }, { "epoch": 0.49738408543879625, "grad_norm": 1.7621626271147104, "learning_rate": 5.913510770409181e-06, "loss": 0.4703, "step": 34510 }, { "epoch": 0.4975282129628295, "grad_norm": 1.2298342705041683, "learning_rate": 5.911037467457561e-06, "loss": 0.4448, "step": 34520 }, { "epoch": 0.49767234048686276, "grad_norm": 1.5799114595609627, "learning_rate": 5.908563933908469e-06, "loss": 0.4795, "step": 34530 }, { "epoch": 0.497816468010896, "grad_norm": 1.5043415395330393, "learning_rate": 5.906090170387992e-06, "loss": 0.4956, "step": 34540 }, { "epoch": 0.49796059553492933, "grad_norm": 1.6651296573767334, "learning_rate": 5.903616177522277e-06, "loss": 0.4636, "step": 34550 }, { "epoch": 0.4981047230589626, "grad_norm": 1.7719082877346322, "learning_rate": 5.901141955937532e-06, "loss": 0.4597, "step": 34560 }, { "epoch": 0.49824885058299584, "grad_norm": 1.5089846244923268, "learning_rate": 5.898667506260016e-06, "loss": 0.4992, "step": 34570 }, { "epoch": 0.4983929781070291, "grad_norm": 1.6570183061451793, "learning_rate": 5.896192829116055e-06, "loss": 0.4773, "step": 34580 }, { "epoch": 0.49853710563106235, "grad_norm": 1.4666672521869555, "learning_rate": 5.893717925132025e-06, "loss": 0.4638, "step": 34590 }, { "epoch": 0.4986812331550956, "grad_norm": 1.4240385854755329, "learning_rate": 5.891242794934361e-06, "loss": 0.4939, "step": 34600 }, { "epoch": 0.4988253606791289, "grad_norm": 1.3201694791760223, "learning_rate": 5.888767439149558e-06, "loss": 0.4385, "step": 34610 }, { "epoch": 0.49896948820316217, "grad_norm": 1.6930214949258091, "learning_rate": 5.886291858404164e-06, "loss": 0.4726, "step": 34620 }, { "epoch": 0.4991136157271954, "grad_norm": 2.2878386233919374, "learning_rate": 5.883816053324788e-06, "loss": 0.486, "step": 34630 }, { "epoch": 0.4992577432512287, "grad_norm": 1.8086216623564142, "learning_rate": 5.881340024538096e-06, "loss": 0.4582, "step": 34640 }, { "epoch": 0.49940187077526194, "grad_norm": 1.6701932137147701, "learning_rate": 5.878863772670803e-06, "loss": 0.4459, "step": 34650 }, { "epoch": 0.4995459982992952, "grad_norm": 1.7220622529976812, "learning_rate": 5.87638729834969e-06, "loss": 0.4813, "step": 34660 }, { "epoch": 0.4996901258233285, "grad_norm": 1.7424038701744526, "learning_rate": 5.873910602201591e-06, "loss": 0.4789, "step": 34670 }, { "epoch": 0.49983425334736176, "grad_norm": 1.599006663861622, "learning_rate": 5.871433684853393e-06, "loss": 0.4698, "step": 34680 }, { "epoch": 0.499978380871395, "grad_norm": 1.9841445998073342, "learning_rate": 5.868956546932046e-06, "loss": 0.4577, "step": 34690 }, { "epoch": 0.5001225083954283, "grad_norm": 1.4900653395339152, "learning_rate": 5.866479189064547e-06, "loss": 0.4459, "step": 34700 }, { "epoch": 0.5002666359194615, "grad_norm": 1.9705243987150474, "learning_rate": 5.864001611877952e-06, "loss": 0.4753, "step": 34710 }, { "epoch": 0.5004107634434948, "grad_norm": 1.5620720945431452, "learning_rate": 5.86152381599938e-06, "loss": 0.4588, "step": 34720 }, { "epoch": 0.500554890967528, "grad_norm": 1.646032627520629, "learning_rate": 5.859045802055989e-06, "loss": 0.4855, "step": 34730 }, { "epoch": 0.5006990184915613, "grad_norm": 1.5225013976553503, "learning_rate": 5.856567570675012e-06, "loss": 0.4935, "step": 34740 }, { "epoch": 0.5008431460155947, "grad_norm": 1.556303788085655, "learning_rate": 5.8540891224837224e-06, "loss": 0.4935, "step": 34750 }, { "epoch": 0.5009872735396279, "grad_norm": 1.355905091735987, "learning_rate": 5.851610458109453e-06, "loss": 0.462, "step": 34760 }, { "epoch": 0.5011314010636612, "grad_norm": 1.3212783419070926, "learning_rate": 5.849131578179595e-06, "loss": 0.4655, "step": 34770 }, { "epoch": 0.5012755285876944, "grad_norm": 1.730978008407635, "learning_rate": 5.846652483321585e-06, "loss": 0.4671, "step": 34780 }, { "epoch": 0.5014196561117277, "grad_norm": 1.3488005376452936, "learning_rate": 5.8441731741629244e-06, "loss": 0.4687, "step": 34790 }, { "epoch": 0.5015637836357609, "grad_norm": 1.5094464815906052, "learning_rate": 5.841693651331163e-06, "loss": 0.442, "step": 34800 }, { "epoch": 0.5017079111597942, "grad_norm": 1.6790103533315406, "learning_rate": 5.839213915453905e-06, "loss": 0.4552, "step": 34810 }, { "epoch": 0.5018520386838274, "grad_norm": 1.3522302498928713, "learning_rate": 5.83673396715881e-06, "loss": 0.4854, "step": 34820 }, { "epoch": 0.5019961662078607, "grad_norm": 1.5744102146406331, "learning_rate": 5.83425380707359e-06, "loss": 0.4929, "step": 34830 }, { "epoch": 0.502140293731894, "grad_norm": 1.1680063842570276, "learning_rate": 5.831773435826011e-06, "loss": 0.4469, "step": 34840 }, { "epoch": 0.5022844212559272, "grad_norm": 1.648428128688303, "learning_rate": 5.829292854043896e-06, "loss": 0.4619, "step": 34850 }, { "epoch": 0.5024285487799605, "grad_norm": 1.4815929623522615, "learning_rate": 5.826812062355114e-06, "loss": 0.4405, "step": 34860 }, { "epoch": 0.5025726763039938, "grad_norm": 1.3604552560859016, "learning_rate": 5.824331061387594e-06, "loss": 0.46, "step": 34870 }, { "epoch": 0.5027168038280271, "grad_norm": 1.4671819611579455, "learning_rate": 5.821849851769312e-06, "loss": 0.4509, "step": 34880 }, { "epoch": 0.5028609313520603, "grad_norm": 1.6433529776825708, "learning_rate": 5.819368434128303e-06, "loss": 0.4563, "step": 34890 }, { "epoch": 0.5030050588760936, "grad_norm": 1.5249355746109952, "learning_rate": 5.816886809092651e-06, "loss": 0.4859, "step": 34900 }, { "epoch": 0.5031491864001268, "grad_norm": 1.6216172604117154, "learning_rate": 5.814404977290493e-06, "loss": 0.4789, "step": 34910 }, { "epoch": 0.5032933139241601, "grad_norm": 1.6163730974413588, "learning_rate": 5.811922939350019e-06, "loss": 0.4759, "step": 34920 }, { "epoch": 0.5034374414481934, "grad_norm": 1.4453263259691114, "learning_rate": 5.809440695899469e-06, "loss": 0.475, "step": 34930 }, { "epoch": 0.5035815689722266, "grad_norm": 2.283151471196203, "learning_rate": 5.806958247567138e-06, "loss": 0.4637, "step": 34940 }, { "epoch": 0.5037256964962599, "grad_norm": 1.5246304907394588, "learning_rate": 5.80447559498137e-06, "loss": 0.4721, "step": 34950 }, { "epoch": 0.5038698240202931, "grad_norm": 1.587604154191411, "learning_rate": 5.801992738770565e-06, "loss": 0.4632, "step": 34960 }, { "epoch": 0.5040139515443264, "grad_norm": 1.641983737865006, "learning_rate": 5.799509679563169e-06, "loss": 0.4893, "step": 34970 }, { "epoch": 0.5041580790683596, "grad_norm": 1.6483137824559624, "learning_rate": 5.797026417987684e-06, "loss": 0.4727, "step": 34980 }, { "epoch": 0.504302206592393, "grad_norm": 1.5452236607712688, "learning_rate": 5.794542954672659e-06, "loss": 0.4633, "step": 34990 }, { "epoch": 0.5044463341164263, "grad_norm": 1.618630444082143, "learning_rate": 5.7920592902466995e-06, "loss": 0.4789, "step": 35000 }, { "epoch": 0.5045904616404595, "grad_norm": 1.5954551452510315, "learning_rate": 5.789575425338457e-06, "loss": 0.4686, "step": 35010 }, { "epoch": 0.5047345891644928, "grad_norm": 1.6717645476830358, "learning_rate": 5.7870913605766355e-06, "loss": 0.476, "step": 35020 }, { "epoch": 0.504878716688526, "grad_norm": 1.7984186321698288, "learning_rate": 5.784607096589992e-06, "loss": 0.4673, "step": 35030 }, { "epoch": 0.5050228442125593, "grad_norm": 1.5511064287406655, "learning_rate": 5.7821226340073275e-06, "loss": 0.4929, "step": 35040 }, { "epoch": 0.5051669717365925, "grad_norm": 1.6851034231748847, "learning_rate": 5.779637973457501e-06, "loss": 0.4501, "step": 35050 }, { "epoch": 0.5053110992606258, "grad_norm": 1.3683545402786943, "learning_rate": 5.777153115569416e-06, "loss": 0.4572, "step": 35060 }, { "epoch": 0.505455226784659, "grad_norm": 1.4846147283803224, "learning_rate": 5.7746680609720285e-06, "loss": 0.4674, "step": 35070 }, { "epoch": 0.5055993543086923, "grad_norm": 1.3252150293475913, "learning_rate": 5.7721828102943445e-06, "loss": 0.4286, "step": 35080 }, { "epoch": 0.5057434818327255, "grad_norm": 1.3265298419006526, "learning_rate": 5.769697364165416e-06, "loss": 0.4529, "step": 35090 }, { "epoch": 0.5058876093567588, "grad_norm": 1.5075989272195447, "learning_rate": 5.767211723214351e-06, "loss": 0.4448, "step": 35100 }, { "epoch": 0.5060317368807922, "grad_norm": 1.6171903256836175, "learning_rate": 5.7647258880703e-06, "loss": 0.4733, "step": 35110 }, { "epoch": 0.5061758644048254, "grad_norm": 1.9858571041463322, "learning_rate": 5.762239859362469e-06, "loss": 0.4924, "step": 35120 }, { "epoch": 0.5063199919288587, "grad_norm": 1.6814782617407271, "learning_rate": 5.759753637720108e-06, "loss": 0.4777, "step": 35130 }, { "epoch": 0.5064641194528919, "grad_norm": 1.4763814514160565, "learning_rate": 5.757267223772516e-06, "loss": 0.4552, "step": 35140 }, { "epoch": 0.5066082469769252, "grad_norm": 1.4716428343031522, "learning_rate": 5.754780618149045e-06, "loss": 0.4714, "step": 35150 }, { "epoch": 0.5067523745009584, "grad_norm": 1.5618269741180484, "learning_rate": 5.7522938214790915e-06, "loss": 0.4684, "step": 35160 }, { "epoch": 0.5068965020249917, "grad_norm": 1.3512658848086858, "learning_rate": 5.749806834392101e-06, "loss": 0.4372, "step": 35170 }, { "epoch": 0.507040629549025, "grad_norm": 1.4678124308619842, "learning_rate": 5.747319657517569e-06, "loss": 0.4475, "step": 35180 }, { "epoch": 0.5071847570730582, "grad_norm": 1.392319511415949, "learning_rate": 5.744832291485038e-06, "loss": 0.454, "step": 35190 }, { "epoch": 0.5073288845970915, "grad_norm": 1.8317010013359407, "learning_rate": 5.742344736924098e-06, "loss": 0.4874, "step": 35200 }, { "epoch": 0.5074730121211247, "grad_norm": 1.6124815607157095, "learning_rate": 5.739856994464387e-06, "loss": 0.4623, "step": 35210 }, { "epoch": 0.5076171396451581, "grad_norm": 1.6572869266653527, "learning_rate": 5.73736906473559e-06, "loss": 0.4824, "step": 35220 }, { "epoch": 0.5077612671691913, "grad_norm": 1.4460995650891921, "learning_rate": 5.7348809483674385e-06, "loss": 0.4698, "step": 35230 }, { "epoch": 0.5079053946932246, "grad_norm": 1.4191151917061473, "learning_rate": 5.732392645989717e-06, "loss": 0.4679, "step": 35240 }, { "epoch": 0.5080495222172579, "grad_norm": 1.460723255333908, "learning_rate": 5.729904158232249e-06, "loss": 0.475, "step": 35250 }, { "epoch": 0.5081936497412911, "grad_norm": 1.3968519921575766, "learning_rate": 5.727415485724911e-06, "loss": 0.4492, "step": 35260 }, { "epoch": 0.5083377772653244, "grad_norm": 1.6826709034481617, "learning_rate": 5.724926629097622e-06, "loss": 0.4587, "step": 35270 }, { "epoch": 0.5084819047893576, "grad_norm": 1.4600524044310803, "learning_rate": 5.72243758898035e-06, "loss": 0.4344, "step": 35280 }, { "epoch": 0.5086260323133909, "grad_norm": 1.4130029630379273, "learning_rate": 5.719948366003111e-06, "loss": 0.4696, "step": 35290 }, { "epoch": 0.5087701598374241, "grad_norm": 1.6511652121613203, "learning_rate": 5.717458960795963e-06, "loss": 0.4798, "step": 35300 }, { "epoch": 0.5089142873614574, "grad_norm": 1.4422198355553308, "learning_rate": 5.714969373989012e-06, "loss": 0.4966, "step": 35310 }, { "epoch": 0.5090584148854906, "grad_norm": 1.509659354567045, "learning_rate": 5.7124796062124146e-06, "loss": 0.4585, "step": 35320 }, { "epoch": 0.5092025424095239, "grad_norm": 1.411522300845506, "learning_rate": 5.7099896580963635e-06, "loss": 0.4591, "step": 35330 }, { "epoch": 0.5093466699335573, "grad_norm": 1.5119203232607328, "learning_rate": 5.7074995302711045e-06, "loss": 0.4752, "step": 35340 }, { "epoch": 0.5094907974575905, "grad_norm": 1.5699477612275063, "learning_rate": 5.7050092233669284e-06, "loss": 0.4568, "step": 35350 }, { "epoch": 0.5096349249816238, "grad_norm": 1.6383939710019169, "learning_rate": 5.702518738014169e-06, "loss": 0.4591, "step": 35360 }, { "epoch": 0.509779052505657, "grad_norm": 1.2616620942005863, "learning_rate": 5.7000280748432065e-06, "loss": 0.4322, "step": 35370 }, { "epoch": 0.5099231800296903, "grad_norm": 1.3494810089407288, "learning_rate": 5.697537234484464e-06, "loss": 0.4439, "step": 35380 }, { "epoch": 0.5100673075537235, "grad_norm": 1.6525222029736448, "learning_rate": 5.695046217568413e-06, "loss": 0.4786, "step": 35390 }, { "epoch": 0.5102114350777568, "grad_norm": 1.665650558810209, "learning_rate": 5.692555024725566e-06, "loss": 0.4619, "step": 35400 }, { "epoch": 0.51035556260179, "grad_norm": 1.3838959584937938, "learning_rate": 5.690063656586484e-06, "loss": 0.4598, "step": 35410 }, { "epoch": 0.5104996901258233, "grad_norm": 1.3841636202449847, "learning_rate": 5.687572113781771e-06, "loss": 0.4639, "step": 35420 }, { "epoch": 0.5106438176498566, "grad_norm": 1.5414661355883046, "learning_rate": 5.6850803969420705e-06, "loss": 0.4475, "step": 35430 }, { "epoch": 0.5107879451738898, "grad_norm": 1.467590120019346, "learning_rate": 5.682588506698077e-06, "loss": 0.4798, "step": 35440 }, { "epoch": 0.5109320726979231, "grad_norm": 1.5842795387684612, "learning_rate": 5.680096443680525e-06, "loss": 0.4583, "step": 35450 }, { "epoch": 0.5110762002219564, "grad_norm": 1.501255610711296, "learning_rate": 5.677604208520193e-06, "loss": 0.4561, "step": 35460 }, { "epoch": 0.5112203277459897, "grad_norm": 1.5593876158528854, "learning_rate": 5.675111801847908e-06, "loss": 0.4792, "step": 35470 }, { "epoch": 0.5113644552700229, "grad_norm": 1.4148408614102332, "learning_rate": 5.67261922429453e-06, "loss": 0.4524, "step": 35480 }, { "epoch": 0.5115085827940562, "grad_norm": 1.6880865928122053, "learning_rate": 5.670126476490972e-06, "loss": 0.4842, "step": 35490 }, { "epoch": 0.5116527103180895, "grad_norm": 1.830483940314315, "learning_rate": 5.6676335590681845e-06, "loss": 0.4508, "step": 35500 }, { "epoch": 0.5117968378421227, "grad_norm": 1.2859976531193986, "learning_rate": 5.665140472657164e-06, "loss": 0.442, "step": 35510 }, { "epoch": 0.511940965366156, "grad_norm": 1.6960322517906234, "learning_rate": 5.662647217888951e-06, "loss": 0.472, "step": 35520 }, { "epoch": 0.5120850928901892, "grad_norm": 1.4281043015240678, "learning_rate": 5.660153795394623e-06, "loss": 0.4327, "step": 35530 }, { "epoch": 0.5122292204142225, "grad_norm": 1.5516823206255022, "learning_rate": 5.6576602058053035e-06, "loss": 0.4507, "step": 35540 }, { "epoch": 0.5123733479382557, "grad_norm": 1.7306305491862122, "learning_rate": 5.6551664497521596e-06, "loss": 0.4674, "step": 35550 }, { "epoch": 0.512517475462289, "grad_norm": 1.6127546092031577, "learning_rate": 5.652672527866396e-06, "loss": 0.4599, "step": 35560 }, { "epoch": 0.5126616029863224, "grad_norm": 1.3682581873757909, "learning_rate": 5.650178440779268e-06, "loss": 0.4337, "step": 35570 }, { "epoch": 0.5128057305103556, "grad_norm": 1.4112184310633455, "learning_rate": 5.647684189122061e-06, "loss": 0.4561, "step": 35580 }, { "epoch": 0.5129498580343889, "grad_norm": 1.6174034078104422, "learning_rate": 5.6451897735261115e-06, "loss": 0.4438, "step": 35590 }, { "epoch": 0.5130939855584221, "grad_norm": 1.63140227178853, "learning_rate": 5.642695194622795e-06, "loss": 0.4533, "step": 35600 }, { "epoch": 0.5132381130824554, "grad_norm": 1.4296572703448835, "learning_rate": 5.640200453043523e-06, "loss": 0.4235, "step": 35610 }, { "epoch": 0.5133822406064886, "grad_norm": 1.4969348591328966, "learning_rate": 5.637705549419756e-06, "loss": 0.4495, "step": 35620 }, { "epoch": 0.5135263681305219, "grad_norm": 1.5239579892192985, "learning_rate": 5.635210484382992e-06, "loss": 0.4644, "step": 35630 }, { "epoch": 0.5136704956545551, "grad_norm": 1.6278800815085497, "learning_rate": 5.632715258564771e-06, "loss": 0.4881, "step": 35640 }, { "epoch": 0.5138146231785884, "grad_norm": 1.8125929176493745, "learning_rate": 5.6302198725966695e-06, "loss": 0.4674, "step": 35650 }, { "epoch": 0.5139587507026216, "grad_norm": 1.6085094323836764, "learning_rate": 5.62772432711031e-06, "loss": 0.4573, "step": 35660 }, { "epoch": 0.5141028782266549, "grad_norm": 1.8659866198628965, "learning_rate": 5.625228622737351e-06, "loss": 0.4863, "step": 35670 }, { "epoch": 0.5142470057506882, "grad_norm": 1.7191645487093998, "learning_rate": 5.622732760109498e-06, "loss": 0.438, "step": 35680 }, { "epoch": 0.5143911332747215, "grad_norm": 1.5427945586831362, "learning_rate": 5.620236739858487e-06, "loss": 0.4717, "step": 35690 }, { "epoch": 0.5145352607987548, "grad_norm": 1.4071973988914337, "learning_rate": 5.6177405626161e-06, "loss": 0.4654, "step": 35700 }, { "epoch": 0.514679388322788, "grad_norm": 2.1200919291207776, "learning_rate": 5.615244229014161e-06, "loss": 0.4534, "step": 35710 }, { "epoch": 0.5148235158468213, "grad_norm": 1.510684653516439, "learning_rate": 5.612747739684524e-06, "loss": 0.4726, "step": 35720 }, { "epoch": 0.5149676433708545, "grad_norm": 1.4753239565349252, "learning_rate": 5.6102510952590934e-06, "loss": 0.4747, "step": 35730 }, { "epoch": 0.5151117708948878, "grad_norm": 1.5418789937416424, "learning_rate": 5.607754296369806e-06, "loss": 0.4633, "step": 35740 }, { "epoch": 0.515255898418921, "grad_norm": 1.5034450317492585, "learning_rate": 5.60525734364864e-06, "loss": 0.4742, "step": 35750 }, { "epoch": 0.5154000259429543, "grad_norm": 1.5690595355244623, "learning_rate": 5.602760237727613e-06, "loss": 0.4733, "step": 35760 }, { "epoch": 0.5155441534669876, "grad_norm": 1.6818686624639065, "learning_rate": 5.600262979238779e-06, "loss": 0.4461, "step": 35770 }, { "epoch": 0.5156882809910208, "grad_norm": 1.6502076423135903, "learning_rate": 5.597765568814232e-06, "loss": 0.4985, "step": 35780 }, { "epoch": 0.5158324085150541, "grad_norm": 1.685514755121466, "learning_rate": 5.595268007086106e-06, "loss": 0.4549, "step": 35790 }, { "epoch": 0.5159765360390873, "grad_norm": 1.6665570522256696, "learning_rate": 5.592770294686571e-06, "loss": 0.4744, "step": 35800 }, { "epoch": 0.5161206635631207, "grad_norm": 1.179964669416231, "learning_rate": 5.59027243224784e-06, "loss": 0.4725, "step": 35810 }, { "epoch": 0.516264791087154, "grad_norm": 1.7027118451965586, "learning_rate": 5.5877744204021535e-06, "loss": 0.4584, "step": 35820 }, { "epoch": 0.5164089186111872, "grad_norm": 1.5893230630574584, "learning_rate": 5.5852762597818e-06, "loss": 0.4636, "step": 35830 }, { "epoch": 0.5165530461352205, "grad_norm": 1.4947409829461717, "learning_rate": 5.582777951019102e-06, "loss": 0.4818, "step": 35840 }, { "epoch": 0.5166971736592537, "grad_norm": 1.5373484901711778, "learning_rate": 5.580279494746419e-06, "loss": 0.4789, "step": 35850 }, { "epoch": 0.516841301183287, "grad_norm": 1.5226189177079912, "learning_rate": 5.57778089159615e-06, "loss": 0.4563, "step": 35860 }, { "epoch": 0.5169854287073202, "grad_norm": 1.8656006146231294, "learning_rate": 5.575282142200726e-06, "loss": 0.4576, "step": 35870 }, { "epoch": 0.5171295562313535, "grad_norm": 1.6551548333254233, "learning_rate": 5.572783247192621e-06, "loss": 0.439, "step": 35880 }, { "epoch": 0.5172736837553867, "grad_norm": 1.7758903362621563, "learning_rate": 5.570284207204344e-06, "loss": 0.4464, "step": 35890 }, { "epoch": 0.51741781127942, "grad_norm": 1.6955410665311015, "learning_rate": 5.5677850228684395e-06, "loss": 0.4683, "step": 35900 }, { "epoch": 0.5175619388034532, "grad_norm": 1.424953162818623, "learning_rate": 5.565285694817488e-06, "loss": 0.4558, "step": 35910 }, { "epoch": 0.5177060663274866, "grad_norm": 2.1328049649988468, "learning_rate": 5.562786223684109e-06, "loss": 0.4595, "step": 35920 }, { "epoch": 0.5178501938515199, "grad_norm": 1.4894710134356584, "learning_rate": 5.560286610100956e-06, "loss": 0.487, "step": 35930 }, { "epoch": 0.5179943213755531, "grad_norm": 1.465789609152315, "learning_rate": 5.557786854700719e-06, "loss": 0.4651, "step": 35940 }, { "epoch": 0.5181384488995864, "grad_norm": 1.5701669346496168, "learning_rate": 5.555286958116125e-06, "loss": 0.4743, "step": 35950 }, { "epoch": 0.5182825764236196, "grad_norm": 1.2720598579253146, "learning_rate": 5.552786920979935e-06, "loss": 0.458, "step": 35960 }, { "epoch": 0.5184267039476529, "grad_norm": 1.6222188150421477, "learning_rate": 5.550286743924949e-06, "loss": 0.4886, "step": 35970 }, { "epoch": 0.5185708314716861, "grad_norm": 1.5649009322767198, "learning_rate": 5.547786427583997e-06, "loss": 0.4644, "step": 35980 }, { "epoch": 0.5187149589957194, "grad_norm": 1.6558443694042724, "learning_rate": 5.545285972589949e-06, "loss": 0.4701, "step": 35990 }, { "epoch": 0.5188590865197527, "grad_norm": 1.6745778469939774, "learning_rate": 5.5427853795757055e-06, "loss": 0.4809, "step": 36000 }, { "epoch": 0.5190032140437859, "grad_norm": 1.6235368908631318, "learning_rate": 5.540284649174208e-06, "loss": 0.4765, "step": 36010 }, { "epoch": 0.5191473415678192, "grad_norm": 1.4568529380961321, "learning_rate": 5.5377837820184285e-06, "loss": 0.466, "step": 36020 }, { "epoch": 0.5192914690918524, "grad_norm": 1.6700103080044368, "learning_rate": 5.535282778741374e-06, "loss": 0.4415, "step": 36030 }, { "epoch": 0.5194355966158858, "grad_norm": 1.5079617301003985, "learning_rate": 5.532781639976087e-06, "loss": 0.4697, "step": 36040 }, { "epoch": 0.519579724139919, "grad_norm": 1.4991917633078604, "learning_rate": 5.530280366355645e-06, "loss": 0.4429, "step": 36050 }, { "epoch": 0.5197238516639523, "grad_norm": 1.4151654985491366, "learning_rate": 5.5277789585131545e-06, "loss": 0.4581, "step": 36060 }, { "epoch": 0.5198679791879856, "grad_norm": 1.7715557122016827, "learning_rate": 5.525277417081766e-06, "loss": 0.4947, "step": 36070 }, { "epoch": 0.5200121067120188, "grad_norm": 1.4002456825871927, "learning_rate": 5.522775742694653e-06, "loss": 0.4692, "step": 36080 }, { "epoch": 0.5201562342360521, "grad_norm": 1.659139667022029, "learning_rate": 5.520273935985028e-06, "loss": 0.4819, "step": 36090 }, { "epoch": 0.5203003617600853, "grad_norm": 1.7351771967574021, "learning_rate": 5.517771997586139e-06, "loss": 0.4878, "step": 36100 }, { "epoch": 0.5204444892841186, "grad_norm": 1.3584235566684566, "learning_rate": 5.515269928131262e-06, "loss": 0.4865, "step": 36110 }, { "epoch": 0.5205886168081518, "grad_norm": 1.4753329408532723, "learning_rate": 5.512767728253711e-06, "loss": 0.4527, "step": 36120 }, { "epoch": 0.5207327443321851, "grad_norm": 1.6762828425316814, "learning_rate": 5.510265398586828e-06, "loss": 0.4776, "step": 36130 }, { "epoch": 0.5208768718562183, "grad_norm": 1.4415843981517176, "learning_rate": 5.5077629397639944e-06, "loss": 0.454, "step": 36140 }, { "epoch": 0.5210209993802516, "grad_norm": 1.6692837886286118, "learning_rate": 5.5052603524186196e-06, "loss": 0.4721, "step": 36150 }, { "epoch": 0.521165126904285, "grad_norm": 1.3682434495362785, "learning_rate": 5.502757637184145e-06, "loss": 0.4435, "step": 36160 }, { "epoch": 0.5213092544283182, "grad_norm": 1.6242722814078345, "learning_rate": 5.500254794694048e-06, "loss": 0.4624, "step": 36170 }, { "epoch": 0.5214533819523515, "grad_norm": 1.7905045989207853, "learning_rate": 5.497751825581834e-06, "loss": 0.4808, "step": 36180 }, { "epoch": 0.5215975094763847, "grad_norm": 1.5379505985603088, "learning_rate": 5.495248730481044e-06, "loss": 0.4545, "step": 36190 }, { "epoch": 0.521741637000418, "grad_norm": 1.6965223765287918, "learning_rate": 5.49274551002525e-06, "loss": 0.4484, "step": 36200 }, { "epoch": 0.5218857645244512, "grad_norm": 1.4497940633626472, "learning_rate": 5.490242164848055e-06, "loss": 0.457, "step": 36210 }, { "epoch": 0.5220298920484845, "grad_norm": 1.494777644284114, "learning_rate": 5.4877386955830935e-06, "loss": 0.4654, "step": 36220 }, { "epoch": 0.5221740195725177, "grad_norm": 1.3844736238420419, "learning_rate": 5.4852351028640304e-06, "loss": 0.4436, "step": 36230 }, { "epoch": 0.522318147096551, "grad_norm": 1.589232470633262, "learning_rate": 5.482731387324566e-06, "loss": 0.4528, "step": 36240 }, { "epoch": 0.5224622746205843, "grad_norm": 1.5395897887341476, "learning_rate": 5.480227549598427e-06, "loss": 0.477, "step": 36250 }, { "epoch": 0.5226064021446175, "grad_norm": 1.4979515531724932, "learning_rate": 5.477723590319374e-06, "loss": 0.4522, "step": 36260 }, { "epoch": 0.5227505296686509, "grad_norm": 1.587819390305418, "learning_rate": 5.4752195101211955e-06, "loss": 0.4782, "step": 36270 }, { "epoch": 0.5228946571926841, "grad_norm": 1.5612895487937921, "learning_rate": 5.472715309637714e-06, "loss": 0.4475, "step": 36280 }, { "epoch": 0.5230387847167174, "grad_norm": 1.361127447970817, "learning_rate": 5.47021098950278e-06, "loss": 0.4645, "step": 36290 }, { "epoch": 0.5231829122407506, "grad_norm": 1.6388019391713853, "learning_rate": 5.467706550350275e-06, "loss": 0.4634, "step": 36300 }, { "epoch": 0.5233270397647839, "grad_norm": 1.4650014470943706, "learning_rate": 5.465201992814112e-06, "loss": 0.5034, "step": 36310 }, { "epoch": 0.5234711672888172, "grad_norm": 1.3498046443724856, "learning_rate": 5.4626973175282315e-06, "loss": 0.4495, "step": 36320 }, { "epoch": 0.5236152948128504, "grad_norm": 1.607318475444032, "learning_rate": 5.460192525126605e-06, "loss": 0.4693, "step": 36330 }, { "epoch": 0.5237594223368837, "grad_norm": 1.3063900580216086, "learning_rate": 5.457687616243234e-06, "loss": 0.4605, "step": 36340 }, { "epoch": 0.5239035498609169, "grad_norm": 1.579560766187891, "learning_rate": 5.45518259151215e-06, "loss": 0.4613, "step": 36350 }, { "epoch": 0.5240476773849502, "grad_norm": 1.3491603680972253, "learning_rate": 5.452677451567413e-06, "loss": 0.4394, "step": 36360 }, { "epoch": 0.5241918049089834, "grad_norm": 1.5240984101076565, "learning_rate": 5.450172197043109e-06, "loss": 0.4381, "step": 36370 }, { "epoch": 0.5243359324330167, "grad_norm": 1.5577294549102159, "learning_rate": 5.447666828573359e-06, "loss": 0.4541, "step": 36380 }, { "epoch": 0.52448005995705, "grad_norm": 1.4553613782735777, "learning_rate": 5.445161346792309e-06, "loss": 0.4726, "step": 36390 }, { "epoch": 0.5246241874810833, "grad_norm": 1.6110051716637386, "learning_rate": 5.442655752334135e-06, "loss": 0.469, "step": 36400 }, { "epoch": 0.5247683150051166, "grad_norm": 1.644016104536547, "learning_rate": 5.4401500458330425e-06, "loss": 0.4642, "step": 36410 }, { "epoch": 0.5249124425291498, "grad_norm": 1.4166830693164443, "learning_rate": 5.437644227923261e-06, "loss": 0.4829, "step": 36420 }, { "epoch": 0.5250565700531831, "grad_norm": 1.5421672478988608, "learning_rate": 5.435138299239053e-06, "loss": 0.464, "step": 36430 }, { "epoch": 0.5252006975772163, "grad_norm": 1.6104825118466404, "learning_rate": 5.432632260414707e-06, "loss": 0.4399, "step": 36440 }, { "epoch": 0.5253448251012496, "grad_norm": 1.4850211829863103, "learning_rate": 5.43012611208454e-06, "loss": 0.4864, "step": 36450 }, { "epoch": 0.5254889526252828, "grad_norm": 1.4781536506033626, "learning_rate": 5.427619854882898e-06, "loss": 0.4757, "step": 36460 }, { "epoch": 0.5256330801493161, "grad_norm": 1.5146245458427043, "learning_rate": 5.4251134894441486e-06, "loss": 0.4738, "step": 36470 }, { "epoch": 0.5257772076733493, "grad_norm": 1.6000578790228415, "learning_rate": 5.4226070164026934e-06, "loss": 0.4605, "step": 36480 }, { "epoch": 0.5259213351973826, "grad_norm": 1.665651999863201, "learning_rate": 5.42010043639296e-06, "loss": 0.4571, "step": 36490 }, { "epoch": 0.5260654627214159, "grad_norm": 1.7448717635031183, "learning_rate": 5.417593750049399e-06, "loss": 0.479, "step": 36500 }, { "epoch": 0.5262095902454492, "grad_norm": 1.6497017850229312, "learning_rate": 5.4150869580064954e-06, "loss": 0.4508, "step": 36510 }, { "epoch": 0.5263537177694825, "grad_norm": 1.7199265231346528, "learning_rate": 5.412580060898753e-06, "loss": 0.4802, "step": 36520 }, { "epoch": 0.5264978452935157, "grad_norm": 1.710759179434894, "learning_rate": 5.410073059360706e-06, "loss": 0.4538, "step": 36530 }, { "epoch": 0.526641972817549, "grad_norm": 1.5857994906685557, "learning_rate": 5.4075659540269175e-06, "loss": 0.4522, "step": 36540 }, { "epoch": 0.5267861003415822, "grad_norm": 1.800721430606689, "learning_rate": 5.40505874553197e-06, "loss": 0.4482, "step": 36550 }, { "epoch": 0.5269302278656155, "grad_norm": 1.595449790823619, "learning_rate": 5.402551434510479e-06, "loss": 0.4641, "step": 36560 }, { "epoch": 0.5270743553896488, "grad_norm": 1.782799944396859, "learning_rate": 5.400044021597083e-06, "loss": 0.483, "step": 36570 }, { "epoch": 0.527218482913682, "grad_norm": 1.6761162089210482, "learning_rate": 5.397536507426444e-06, "loss": 0.4673, "step": 36580 }, { "epoch": 0.5273626104377153, "grad_norm": 1.418984311162267, "learning_rate": 5.3950288926332564e-06, "loss": 0.4477, "step": 36590 }, { "epoch": 0.5275067379617485, "grad_norm": 1.5522954263603688, "learning_rate": 5.392521177852231e-06, "loss": 0.4788, "step": 36600 }, { "epoch": 0.5276508654857818, "grad_norm": 1.235425110940512, "learning_rate": 5.390013363718111e-06, "loss": 0.4425, "step": 36610 }, { "epoch": 0.5277949930098151, "grad_norm": 1.2450148701368138, "learning_rate": 5.387505450865663e-06, "loss": 0.4458, "step": 36620 }, { "epoch": 0.5279391205338484, "grad_norm": 1.301573836974629, "learning_rate": 5.384997439929678e-06, "loss": 0.4327, "step": 36630 }, { "epoch": 0.5280832480578816, "grad_norm": 1.723814658088641, "learning_rate": 5.382489331544971e-06, "loss": 0.476, "step": 36640 }, { "epoch": 0.5282273755819149, "grad_norm": 1.664268890297088, "learning_rate": 5.379981126346381e-06, "loss": 0.4916, "step": 36650 }, { "epoch": 0.5283715031059482, "grad_norm": 1.4478485442787845, "learning_rate": 5.377472824968775e-06, "loss": 0.4836, "step": 36660 }, { "epoch": 0.5285156306299814, "grad_norm": 1.541532600287598, "learning_rate": 5.374964428047041e-06, "loss": 0.4771, "step": 36670 }, { "epoch": 0.5286597581540147, "grad_norm": 1.4713247488182324, "learning_rate": 5.372455936216094e-06, "loss": 0.4567, "step": 36680 }, { "epoch": 0.5288038856780479, "grad_norm": 2.8518569454902307, "learning_rate": 5.369947350110872e-06, "loss": 0.4602, "step": 36690 }, { "epoch": 0.5289480132020812, "grad_norm": 1.632571755532429, "learning_rate": 5.367438670366335e-06, "loss": 0.491, "step": 36700 }, { "epoch": 0.5290921407261144, "grad_norm": 1.401770035870708, "learning_rate": 5.364929897617467e-06, "loss": 0.4607, "step": 36710 }, { "epoch": 0.5292362682501477, "grad_norm": 1.625073873366912, "learning_rate": 5.362421032499279e-06, "loss": 0.4676, "step": 36720 }, { "epoch": 0.529380395774181, "grad_norm": 1.3390246095632665, "learning_rate": 5.359912075646801e-06, "loss": 0.4581, "step": 36730 }, { "epoch": 0.5295245232982143, "grad_norm": 1.7822495079207166, "learning_rate": 5.3574030276950894e-06, "loss": 0.4705, "step": 36740 }, { "epoch": 0.5296686508222476, "grad_norm": 1.6539145219291567, "learning_rate": 5.354893889279223e-06, "loss": 0.4805, "step": 36750 }, { "epoch": 0.5298127783462808, "grad_norm": 1.8610742543216565, "learning_rate": 5.352384661034302e-06, "loss": 0.4602, "step": 36760 }, { "epoch": 0.5299569058703141, "grad_norm": 1.6168547812472653, "learning_rate": 5.349875343595451e-06, "loss": 0.4728, "step": 36770 }, { "epoch": 0.5301010333943473, "grad_norm": 1.3311142929946957, "learning_rate": 5.347365937597815e-06, "loss": 0.4528, "step": 36780 }, { "epoch": 0.5302451609183806, "grad_norm": 1.3663760402345282, "learning_rate": 5.344856443676565e-06, "loss": 0.4118, "step": 36790 }, { "epoch": 0.5303892884424138, "grad_norm": 1.5101787260211221, "learning_rate": 5.342346862466892e-06, "loss": 0.4575, "step": 36800 }, { "epoch": 0.5305334159664471, "grad_norm": 1.4715912517788998, "learning_rate": 5.339837194604006e-06, "loss": 0.4755, "step": 36810 }, { "epoch": 0.5306775434904804, "grad_norm": 1.6537956759773456, "learning_rate": 5.337327440723146e-06, "loss": 0.4846, "step": 36820 }, { "epoch": 0.5308216710145136, "grad_norm": 1.5969910837002772, "learning_rate": 5.334817601459568e-06, "loss": 0.4745, "step": 36830 }, { "epoch": 0.5309657985385469, "grad_norm": 1.5439109140090246, "learning_rate": 5.332307677448549e-06, "loss": 0.483, "step": 36840 }, { "epoch": 0.5311099260625801, "grad_norm": 1.5959934386769437, "learning_rate": 5.329797669325392e-06, "loss": 0.4388, "step": 36850 }, { "epoch": 0.5312540535866135, "grad_norm": 1.5420890325146106, "learning_rate": 5.327287577725415e-06, "loss": 0.4395, "step": 36860 }, { "epoch": 0.5313981811106467, "grad_norm": 1.857644812004084, "learning_rate": 5.324777403283962e-06, "loss": 0.4653, "step": 36870 }, { "epoch": 0.53154230863468, "grad_norm": 1.598212321605816, "learning_rate": 5.3222671466363995e-06, "loss": 0.4733, "step": 36880 }, { "epoch": 0.5316864361587132, "grad_norm": 1.7316741220045635, "learning_rate": 5.319756808418105e-06, "loss": 0.5015, "step": 36890 }, { "epoch": 0.5318305636827465, "grad_norm": 1.5100788903622804, "learning_rate": 5.317246389264489e-06, "loss": 0.477, "step": 36900 }, { "epoch": 0.5319746912067798, "grad_norm": 1.443485210776272, "learning_rate": 5.314735889810975e-06, "loss": 0.4904, "step": 36910 }, { "epoch": 0.532118818730813, "grad_norm": 1.7068484809873392, "learning_rate": 5.312225310693006e-06, "loss": 0.4793, "step": 36920 }, { "epoch": 0.5322629462548463, "grad_norm": 1.4493775830605058, "learning_rate": 5.309714652546054e-06, "loss": 0.4842, "step": 36930 }, { "epoch": 0.5324070737788795, "grad_norm": 1.543906744145479, "learning_rate": 5.307203916005599e-06, "loss": 0.4447, "step": 36940 }, { "epoch": 0.5325512013029128, "grad_norm": 1.5779916610486964, "learning_rate": 5.30469310170715e-06, "loss": 0.4343, "step": 36950 }, { "epoch": 0.532695328826946, "grad_norm": 1.5925314014083312, "learning_rate": 5.302182210286232e-06, "loss": 0.4445, "step": 36960 }, { "epoch": 0.5328394563509794, "grad_norm": 1.5257000765389548, "learning_rate": 5.299671242378388e-06, "loss": 0.4465, "step": 36970 }, { "epoch": 0.5329835838750127, "grad_norm": 1.439789023765836, "learning_rate": 5.297160198619184e-06, "loss": 0.4807, "step": 36980 }, { "epoch": 0.5331277113990459, "grad_norm": 1.5408972295976184, "learning_rate": 5.294649079644204e-06, "loss": 0.4503, "step": 36990 }, { "epoch": 0.5332718389230792, "grad_norm": 1.7073567091652593, "learning_rate": 5.292137886089047e-06, "loss": 0.4741, "step": 37000 }, { "epoch": 0.5334159664471124, "grad_norm": 1.4473315664291206, "learning_rate": 5.28962661858934e-06, "loss": 0.4385, "step": 37010 }, { "epoch": 0.5335600939711457, "grad_norm": 3.7974044060837766, "learning_rate": 5.287115277780718e-06, "loss": 0.4846, "step": 37020 }, { "epoch": 0.5337042214951789, "grad_norm": 1.4593506556285791, "learning_rate": 5.2846038642988415e-06, "loss": 0.4526, "step": 37030 }, { "epoch": 0.5338483490192122, "grad_norm": 1.7130288361078858, "learning_rate": 5.28209237877939e-06, "loss": 0.4466, "step": 37040 }, { "epoch": 0.5339924765432454, "grad_norm": 1.7728680669303243, "learning_rate": 5.279580821858054e-06, "loss": 0.4638, "step": 37050 }, { "epoch": 0.5341366040672787, "grad_norm": 1.4910931105582903, "learning_rate": 5.27706919417055e-06, "loss": 0.4781, "step": 37060 }, { "epoch": 0.534280731591312, "grad_norm": 1.4151814393604718, "learning_rate": 5.274557496352609e-06, "loss": 0.473, "step": 37070 }, { "epoch": 0.5344248591153452, "grad_norm": 2.127451875291446, "learning_rate": 5.272045729039978e-06, "loss": 0.484, "step": 37080 }, { "epoch": 0.5345689866393786, "grad_norm": 1.7254462216771858, "learning_rate": 5.269533892868428e-06, "loss": 0.4832, "step": 37090 }, { "epoch": 0.5347131141634118, "grad_norm": 1.623492549897459, "learning_rate": 5.267021988473738e-06, "loss": 0.4411, "step": 37100 }, { "epoch": 0.5348572416874451, "grad_norm": 1.4480206966388924, "learning_rate": 5.264510016491713e-06, "loss": 0.442, "step": 37110 }, { "epoch": 0.5350013692114783, "grad_norm": 1.578227982793027, "learning_rate": 5.261997977558169e-06, "loss": 0.4604, "step": 37120 }, { "epoch": 0.5351454967355116, "grad_norm": 2.0322157212184715, "learning_rate": 5.259485872308942e-06, "loss": 0.4735, "step": 37130 }, { "epoch": 0.5352896242595448, "grad_norm": 1.7434522681637667, "learning_rate": 5.256973701379886e-06, "loss": 0.4504, "step": 37140 }, { "epoch": 0.5354337517835781, "grad_norm": 1.5827539110461128, "learning_rate": 5.254461465406867e-06, "loss": 0.4763, "step": 37150 }, { "epoch": 0.5355778793076114, "grad_norm": 1.4504966496767076, "learning_rate": 5.251949165025772e-06, "loss": 0.4505, "step": 37160 }, { "epoch": 0.5357220068316446, "grad_norm": 1.624740336249175, "learning_rate": 5.2494368008725004e-06, "loss": 0.4771, "step": 37170 }, { "epoch": 0.5358661343556779, "grad_norm": 1.6128305444111608, "learning_rate": 5.246924373582972e-06, "loss": 0.4432, "step": 37180 }, { "epoch": 0.5360102618797111, "grad_norm": 1.3468787676776834, "learning_rate": 5.244411883793121e-06, "loss": 0.4674, "step": 37190 }, { "epoch": 0.5361543894037444, "grad_norm": 1.714973117365849, "learning_rate": 5.241899332138896e-06, "loss": 0.4566, "step": 37200 }, { "epoch": 0.5362985169277777, "grad_norm": 1.5790418000591935, "learning_rate": 5.2393867192562605e-06, "loss": 0.4487, "step": 37210 }, { "epoch": 0.536442644451811, "grad_norm": 1.6637895983479978, "learning_rate": 5.236874045781198e-06, "loss": 0.4808, "step": 37220 }, { "epoch": 0.5365867719758443, "grad_norm": 1.6879187638488253, "learning_rate": 5.234361312349701e-06, "loss": 0.466, "step": 37230 }, { "epoch": 0.5367308994998775, "grad_norm": 1.6822095050633201, "learning_rate": 5.231848519597784e-06, "loss": 0.4605, "step": 37240 }, { "epoch": 0.5368750270239108, "grad_norm": 1.66888109959196, "learning_rate": 5.229335668161472e-06, "loss": 0.4608, "step": 37250 }, { "epoch": 0.537019154547944, "grad_norm": 1.7360610550002948, "learning_rate": 5.226822758676804e-06, "loss": 0.4697, "step": 37260 }, { "epoch": 0.5371632820719773, "grad_norm": 1.6579820007593118, "learning_rate": 5.22430979177984e-06, "loss": 0.4634, "step": 37270 }, { "epoch": 0.5373074095960105, "grad_norm": 1.6311653576309784, "learning_rate": 5.221796768106645e-06, "loss": 0.484, "step": 37280 }, { "epoch": 0.5374515371200438, "grad_norm": 1.35522642378553, "learning_rate": 5.219283688293308e-06, "loss": 0.4477, "step": 37290 }, { "epoch": 0.537595664644077, "grad_norm": 1.5959927943147754, "learning_rate": 5.2167705529759265e-06, "loss": 0.4778, "step": 37300 }, { "epoch": 0.5377397921681103, "grad_norm": 1.6005512008298515, "learning_rate": 5.214257362790612e-06, "loss": 0.4547, "step": 37310 }, { "epoch": 0.5378839196921437, "grad_norm": 1.6947381939944752, "learning_rate": 5.211744118373493e-06, "loss": 0.4868, "step": 37320 }, { "epoch": 0.5380280472161769, "grad_norm": 1.6802126705550624, "learning_rate": 5.209230820360708e-06, "loss": 0.4619, "step": 37330 }, { "epoch": 0.5381721747402102, "grad_norm": 1.3048675115152515, "learning_rate": 5.206717469388412e-06, "loss": 0.4326, "step": 37340 }, { "epoch": 0.5383163022642434, "grad_norm": 1.6482517272510766, "learning_rate": 5.204204066092777e-06, "loss": 0.4798, "step": 37350 }, { "epoch": 0.5384604297882767, "grad_norm": 1.676479677134855, "learning_rate": 5.201690611109975e-06, "loss": 0.4832, "step": 37360 }, { "epoch": 0.5386045573123099, "grad_norm": 1.530730528597889, "learning_rate": 5.199177105076206e-06, "loss": 0.4806, "step": 37370 }, { "epoch": 0.5387486848363432, "grad_norm": 1.5181186256975168, "learning_rate": 5.196663548627675e-06, "loss": 0.4479, "step": 37380 }, { "epoch": 0.5388928123603764, "grad_norm": 1.4804361125405487, "learning_rate": 5.194149942400599e-06, "loss": 0.4774, "step": 37390 }, { "epoch": 0.5390369398844097, "grad_norm": 1.6152463220774174, "learning_rate": 5.191636287031215e-06, "loss": 0.4847, "step": 37400 }, { "epoch": 0.539181067408443, "grad_norm": 1.51060378704119, "learning_rate": 5.1891225831557646e-06, "loss": 0.481, "step": 37410 }, { "epoch": 0.5393251949324762, "grad_norm": 1.4067689775197914, "learning_rate": 5.186608831410504e-06, "loss": 0.4455, "step": 37420 }, { "epoch": 0.5394693224565095, "grad_norm": 1.3187880064076791, "learning_rate": 5.184095032431705e-06, "loss": 0.4591, "step": 37430 }, { "epoch": 0.5396134499805428, "grad_norm": 1.5936139618371956, "learning_rate": 5.1815811868556445e-06, "loss": 0.4817, "step": 37440 }, { "epoch": 0.5397575775045761, "grad_norm": 1.3932913560111855, "learning_rate": 5.179067295318618e-06, "loss": 0.4627, "step": 37450 }, { "epoch": 0.5399017050286093, "grad_norm": 1.393847004501826, "learning_rate": 5.176553358456929e-06, "loss": 0.4766, "step": 37460 }, { "epoch": 0.5400458325526426, "grad_norm": 1.5413265830923188, "learning_rate": 5.1740393769068915e-06, "loss": 0.4502, "step": 37470 }, { "epoch": 0.5401899600766759, "grad_norm": 1.6111837164131053, "learning_rate": 5.171525351304837e-06, "loss": 0.4374, "step": 37480 }, { "epoch": 0.5403340876007091, "grad_norm": 1.5554508554583262, "learning_rate": 5.1690112822870985e-06, "loss": 0.4598, "step": 37490 }, { "epoch": 0.5404782151247424, "grad_norm": 1.5874777699894982, "learning_rate": 5.166497170490028e-06, "loss": 0.4799, "step": 37500 }, { "epoch": 0.5406223426487756, "grad_norm": 1.3444718411610108, "learning_rate": 5.163983016549986e-06, "loss": 0.4602, "step": 37510 }, { "epoch": 0.5407664701728089, "grad_norm": 1.3888526262660599, "learning_rate": 5.161468821103341e-06, "loss": 0.4667, "step": 37520 }, { "epoch": 0.5409105976968421, "grad_norm": 1.4745564544477106, "learning_rate": 5.158954584786477e-06, "loss": 0.4634, "step": 37530 }, { "epoch": 0.5410547252208754, "grad_norm": 1.6170116035412505, "learning_rate": 5.156440308235784e-06, "loss": 0.4743, "step": 37540 }, { "epoch": 0.5411988527449086, "grad_norm": 1.5384867506994426, "learning_rate": 5.153925992087663e-06, "loss": 0.4405, "step": 37550 }, { "epoch": 0.541342980268942, "grad_norm": 1.9137166862098434, "learning_rate": 5.151411636978526e-06, "loss": 0.4556, "step": 37560 }, { "epoch": 0.5414871077929753, "grad_norm": 1.8240241620539113, "learning_rate": 5.148897243544796e-06, "loss": 0.4707, "step": 37570 }, { "epoch": 0.5416312353170085, "grad_norm": 1.4491547744068984, "learning_rate": 5.146382812422904e-06, "loss": 0.4328, "step": 37580 }, { "epoch": 0.5417753628410418, "grad_norm": 1.58733168892316, "learning_rate": 5.14386834424929e-06, "loss": 0.4454, "step": 37590 }, { "epoch": 0.541919490365075, "grad_norm": 1.8055728943337441, "learning_rate": 5.141353839660403e-06, "loss": 0.4637, "step": 37600 }, { "epoch": 0.5420636178891083, "grad_norm": 2.272660671464405, "learning_rate": 5.138839299292706e-06, "loss": 0.4262, "step": 37610 }, { "epoch": 0.5422077454131415, "grad_norm": 1.5395536186764165, "learning_rate": 5.136324723782664e-06, "loss": 0.4845, "step": 37620 }, { "epoch": 0.5423518729371748, "grad_norm": 1.4903644247391237, "learning_rate": 5.1338101137667575e-06, "loss": 0.488, "step": 37630 }, { "epoch": 0.542496000461208, "grad_norm": 1.6239057126293144, "learning_rate": 5.13129546988147e-06, "loss": 0.4671, "step": 37640 }, { "epoch": 0.5426401279852413, "grad_norm": 1.700645092304939, "learning_rate": 5.128780792763298e-06, "loss": 0.4707, "step": 37650 }, { "epoch": 0.5427842555092746, "grad_norm": 1.3933719621887843, "learning_rate": 5.126266083048743e-06, "loss": 0.48, "step": 37660 }, { "epoch": 0.5429283830333078, "grad_norm": 1.3218728295299165, "learning_rate": 5.123751341374317e-06, "loss": 0.4387, "step": 37670 }, { "epoch": 0.5430725105573412, "grad_norm": 1.5102465916918784, "learning_rate": 5.121236568376542e-06, "loss": 0.4405, "step": 37680 }, { "epoch": 0.5432166380813744, "grad_norm": 1.75351741700287, "learning_rate": 5.118721764691941e-06, "loss": 0.4631, "step": 37690 }, { "epoch": 0.5433607656054077, "grad_norm": 1.383318415856007, "learning_rate": 5.1162069309570504e-06, "loss": 0.4562, "step": 37700 }, { "epoch": 0.543504893129441, "grad_norm": 1.6018641353511593, "learning_rate": 5.113692067808417e-06, "loss": 0.4785, "step": 37710 }, { "epoch": 0.5436490206534742, "grad_norm": 2.0947890788117576, "learning_rate": 5.111177175882585e-06, "loss": 0.4786, "step": 37720 }, { "epoch": 0.5437931481775075, "grad_norm": 1.6360574899363416, "learning_rate": 5.1086622558161144e-06, "loss": 0.4652, "step": 37730 }, { "epoch": 0.5439372757015407, "grad_norm": 1.811215470175936, "learning_rate": 5.106147308245573e-06, "loss": 0.449, "step": 37740 }, { "epoch": 0.544081403225574, "grad_norm": 1.597737585561143, "learning_rate": 5.1036323338075265e-06, "loss": 0.4777, "step": 37750 }, { "epoch": 0.5442255307496072, "grad_norm": 1.6241393580301062, "learning_rate": 5.101117333138558e-06, "loss": 0.474, "step": 37760 }, { "epoch": 0.5443696582736405, "grad_norm": 1.6593615378372573, "learning_rate": 5.09860230687525e-06, "loss": 0.4549, "step": 37770 }, { "epoch": 0.5445137857976737, "grad_norm": 1.5115273657028585, "learning_rate": 5.096087255654195e-06, "loss": 0.4322, "step": 37780 }, { "epoch": 0.5446579133217071, "grad_norm": 1.5775114655697955, "learning_rate": 5.093572180111992e-06, "loss": 0.4694, "step": 37790 }, { "epoch": 0.5448020408457404, "grad_norm": 1.4662400787313532, "learning_rate": 5.091057080885241e-06, "loss": 0.4924, "step": 37800 }, { "epoch": 0.5449461683697736, "grad_norm": 1.4000139311933373, "learning_rate": 5.088541958610557e-06, "loss": 0.4587, "step": 37810 }, { "epoch": 0.5450902958938069, "grad_norm": 1.5219028390751566, "learning_rate": 5.0860268139245516e-06, "loss": 0.4467, "step": 37820 }, { "epoch": 0.5452344234178401, "grad_norm": 1.449116794766293, "learning_rate": 5.083511647463848e-06, "loss": 0.477, "step": 37830 }, { "epoch": 0.5453785509418734, "grad_norm": 1.6014230421733375, "learning_rate": 5.080996459865074e-06, "loss": 0.4614, "step": 37840 }, { "epoch": 0.5455226784659066, "grad_norm": 1.5709078043843459, "learning_rate": 5.078481251764861e-06, "loss": 0.4498, "step": 37850 }, { "epoch": 0.5456668059899399, "grad_norm": 1.3548885484426991, "learning_rate": 5.075966023799845e-06, "loss": 0.4613, "step": 37860 }, { "epoch": 0.5458109335139731, "grad_norm": 1.7079252176317272, "learning_rate": 5.073450776606673e-06, "loss": 0.4581, "step": 37870 }, { "epoch": 0.5459550610380064, "grad_norm": 1.6341083950646875, "learning_rate": 5.070935510821987e-06, "loss": 0.4541, "step": 37880 }, { "epoch": 0.5460991885620397, "grad_norm": 1.6817648835710204, "learning_rate": 5.068420227082444e-06, "loss": 0.4712, "step": 37890 }, { "epoch": 0.5462433160860729, "grad_norm": 1.625874616230247, "learning_rate": 5.0659049260246965e-06, "loss": 0.4668, "step": 37900 }, { "epoch": 0.5463874436101063, "grad_norm": 1.551552912855378, "learning_rate": 5.063389608285407e-06, "loss": 0.4544, "step": 37910 }, { "epoch": 0.5465315711341395, "grad_norm": 1.532693316313019, "learning_rate": 5.0608742745012425e-06, "loss": 0.4732, "step": 37920 }, { "epoch": 0.5466756986581728, "grad_norm": 1.4033680544866831, "learning_rate": 5.058358925308872e-06, "loss": 0.4578, "step": 37930 }, { "epoch": 0.546819826182206, "grad_norm": 1.4480312526447525, "learning_rate": 5.055843561344965e-06, "loss": 0.4557, "step": 37940 }, { "epoch": 0.5469639537062393, "grad_norm": 1.5197948835157706, "learning_rate": 5.053328183246203e-06, "loss": 0.458, "step": 37950 }, { "epoch": 0.5471080812302725, "grad_norm": 1.5783005050991215, "learning_rate": 5.050812791649263e-06, "loss": 0.4264, "step": 37960 }, { "epoch": 0.5472522087543058, "grad_norm": 1.47717238196859, "learning_rate": 5.0482973871908335e-06, "loss": 0.4908, "step": 37970 }, { "epoch": 0.5473963362783391, "grad_norm": 1.660778469557244, "learning_rate": 5.045781970507597e-06, "loss": 0.4694, "step": 37980 }, { "epoch": 0.5475404638023723, "grad_norm": 1.802771653486674, "learning_rate": 5.0432665422362456e-06, "loss": 0.4676, "step": 37990 }, { "epoch": 0.5476845913264056, "grad_norm": 1.8646000455327754, "learning_rate": 5.040751103013474e-06, "loss": 0.4543, "step": 38000 }, { "epoch": 0.5478287188504388, "grad_norm": 1.5011678471878214, "learning_rate": 5.038235653475976e-06, "loss": 0.468, "step": 38010 }, { "epoch": 0.5479728463744721, "grad_norm": 1.5657441189827925, "learning_rate": 5.035720194260452e-06, "loss": 0.4718, "step": 38020 }, { "epoch": 0.5481169738985054, "grad_norm": 1.6162885610850177, "learning_rate": 5.033204726003601e-06, "loss": 0.4533, "step": 38030 }, { "epoch": 0.5482611014225387, "grad_norm": 1.6345390905864108, "learning_rate": 5.030689249342128e-06, "loss": 0.4525, "step": 38040 }, { "epoch": 0.548405228946572, "grad_norm": 1.7110419281429632, "learning_rate": 5.028173764912737e-06, "loss": 0.4959, "step": 38050 }, { "epoch": 0.5485493564706052, "grad_norm": 1.3062528787284153, "learning_rate": 5.025658273352138e-06, "loss": 0.4327, "step": 38060 }, { "epoch": 0.5486934839946385, "grad_norm": 1.6041440505645792, "learning_rate": 5.0231427752970384e-06, "loss": 0.4777, "step": 38070 }, { "epoch": 0.5488376115186717, "grad_norm": 1.5589079139280912, "learning_rate": 5.020627271384151e-06, "loss": 0.4741, "step": 38080 }, { "epoch": 0.548981739042705, "grad_norm": 1.3161275682417568, "learning_rate": 5.018111762250184e-06, "loss": 0.4598, "step": 38090 }, { "epoch": 0.5491258665667382, "grad_norm": 1.7712028132247586, "learning_rate": 5.015596248531857e-06, "loss": 0.4628, "step": 38100 }, { "epoch": 0.5492699940907715, "grad_norm": 1.7058011787730687, "learning_rate": 5.013080730865878e-06, "loss": 0.4572, "step": 38110 }, { "epoch": 0.5494141216148047, "grad_norm": 1.4476790869824583, "learning_rate": 5.010565209888968e-06, "loss": 0.462, "step": 38120 }, { "epoch": 0.549558249138838, "grad_norm": 1.42667969921962, "learning_rate": 5.008049686237844e-06, "loss": 0.4413, "step": 38130 }, { "epoch": 0.5497023766628714, "grad_norm": 1.4767126484863689, "learning_rate": 5.00553416054922e-06, "loss": 0.4459, "step": 38140 }, { "epoch": 0.5498465041869046, "grad_norm": 1.6753408067888498, "learning_rate": 5.0030186334598164e-06, "loss": 0.4803, "step": 38150 }, { "epoch": 0.5499906317109379, "grad_norm": 1.4557216855593744, "learning_rate": 5.000503105606351e-06, "loss": 0.4686, "step": 38160 }, { "epoch": 0.5501347592349711, "grad_norm": 1.253330500219252, "learning_rate": 4.99798757762554e-06, "loss": 0.4536, "step": 38170 }, { "epoch": 0.5502788867590044, "grad_norm": 1.5720876569934683, "learning_rate": 4.995472050154106e-06, "loss": 0.4818, "step": 38180 }, { "epoch": 0.5504230142830376, "grad_norm": 2.353863565242887, "learning_rate": 4.9929565238287635e-06, "loss": 0.4469, "step": 38190 }, { "epoch": 0.5505671418070709, "grad_norm": 1.8105817740291112, "learning_rate": 4.990440999286233e-06, "loss": 0.4447, "step": 38200 }, { "epoch": 0.5507112693311041, "grad_norm": 1.5693411195497207, "learning_rate": 4.987925477163232e-06, "loss": 0.4335, "step": 38210 }, { "epoch": 0.5508553968551374, "grad_norm": 1.5138032018469965, "learning_rate": 4.985409958096476e-06, "loss": 0.4594, "step": 38220 }, { "epoch": 0.5509995243791707, "grad_norm": 1.7821747658132419, "learning_rate": 4.982894442722681e-06, "loss": 0.4686, "step": 38230 }, { "epoch": 0.5511436519032039, "grad_norm": 1.434068457770524, "learning_rate": 4.980378931678564e-06, "loss": 0.4864, "step": 38240 }, { "epoch": 0.5512877794272372, "grad_norm": 1.6165793129992487, "learning_rate": 4.977863425600839e-06, "loss": 0.4373, "step": 38250 }, { "epoch": 0.5514319069512705, "grad_norm": 1.4378739131006757, "learning_rate": 4.975347925126219e-06, "loss": 0.4353, "step": 38260 }, { "epoch": 0.5515760344753038, "grad_norm": 1.3394344005764869, "learning_rate": 4.972832430891413e-06, "loss": 0.4501, "step": 38270 }, { "epoch": 0.551720161999337, "grad_norm": 1.581963009869215, "learning_rate": 4.970316943533136e-06, "loss": 0.4429, "step": 38280 }, { "epoch": 0.5518642895233703, "grad_norm": 1.369065993148357, "learning_rate": 4.9678014636880915e-06, "loss": 0.4698, "step": 38290 }, { "epoch": 0.5520084170474036, "grad_norm": 1.7763820288714998, "learning_rate": 4.965285991992987e-06, "loss": 0.4627, "step": 38300 }, { "epoch": 0.5521525445714368, "grad_norm": 1.4508466665733433, "learning_rate": 4.962770529084528e-06, "loss": 0.4414, "step": 38310 }, { "epoch": 0.5522966720954701, "grad_norm": 1.3128925087289822, "learning_rate": 4.960255075599416e-06, "loss": 0.4681, "step": 38320 }, { "epoch": 0.5524407996195033, "grad_norm": 1.412732023303427, "learning_rate": 4.95773963217435e-06, "loss": 0.4629, "step": 38330 }, { "epoch": 0.5525849271435366, "grad_norm": 1.5224813300428965, "learning_rate": 4.955224199446027e-06, "loss": 0.4554, "step": 38340 }, { "epoch": 0.5527290546675698, "grad_norm": 1.5990583444845325, "learning_rate": 4.952708778051142e-06, "loss": 0.4673, "step": 38350 }, { "epoch": 0.5528731821916031, "grad_norm": 2.1119458819288113, "learning_rate": 4.950193368626384e-06, "loss": 0.4379, "step": 38360 }, { "epoch": 0.5530173097156363, "grad_norm": 1.5713225953635677, "learning_rate": 4.947677971808446e-06, "loss": 0.4596, "step": 38370 }, { "epoch": 0.5531614372396697, "grad_norm": 1.6568904965960622, "learning_rate": 4.945162588234011e-06, "loss": 0.4482, "step": 38380 }, { "epoch": 0.553305564763703, "grad_norm": 1.4638290207002025, "learning_rate": 4.942647218539763e-06, "loss": 0.4563, "step": 38390 }, { "epoch": 0.5534496922877362, "grad_norm": 1.5925363681324722, "learning_rate": 4.940131863362376e-06, "loss": 0.4589, "step": 38400 }, { "epoch": 0.5535938198117695, "grad_norm": 1.6095981727936977, "learning_rate": 4.9376165233385284e-06, "loss": 0.4284, "step": 38410 }, { "epoch": 0.5537379473358027, "grad_norm": 1.6419777618029003, "learning_rate": 4.935101199104889e-06, "loss": 0.4374, "step": 38420 }, { "epoch": 0.553882074859836, "grad_norm": 2.25148011786883, "learning_rate": 4.9325858912981265e-06, "loss": 0.4726, "step": 38430 }, { "epoch": 0.5540262023838692, "grad_norm": 1.637621710705603, "learning_rate": 4.930070600554903e-06, "loss": 0.4643, "step": 38440 }, { "epoch": 0.5541703299079025, "grad_norm": 1.6997442067911144, "learning_rate": 4.927555327511875e-06, "loss": 0.4504, "step": 38450 }, { "epoch": 0.5543144574319357, "grad_norm": 1.4555229049256009, "learning_rate": 4.9250400728056985e-06, "loss": 0.4429, "step": 38460 }, { "epoch": 0.554458584955969, "grad_norm": 1.8754903541099321, "learning_rate": 4.922524837073022e-06, "loss": 0.4614, "step": 38470 }, { "epoch": 0.5546027124800023, "grad_norm": 1.5433616313950231, "learning_rate": 4.9200096209504905e-06, "loss": 0.4385, "step": 38480 }, { "epoch": 0.5547468400040356, "grad_norm": 1.5333904485218586, "learning_rate": 4.917494425074745e-06, "loss": 0.4537, "step": 38490 }, { "epoch": 0.5548909675280689, "grad_norm": 1.7383077786046761, "learning_rate": 4.9149792500824174e-06, "loss": 0.4502, "step": 38500 }, { "epoch": 0.5550350950521021, "grad_norm": 1.3563869932052375, "learning_rate": 4.912464096610139e-06, "loss": 0.4731, "step": 38510 }, { "epoch": 0.5551792225761354, "grad_norm": 1.4976129044813686, "learning_rate": 4.909948965294533e-06, "loss": 0.4568, "step": 38520 }, { "epoch": 0.5553233501001686, "grad_norm": 1.2349793737125923, "learning_rate": 4.907433856772214e-06, "loss": 0.4334, "step": 38530 }, { "epoch": 0.5554674776242019, "grad_norm": 1.433866142881104, "learning_rate": 4.904918771679799e-06, "loss": 0.4404, "step": 38540 }, { "epoch": 0.5556116051482352, "grad_norm": 1.7951908320022145, "learning_rate": 4.902403710653893e-06, "loss": 0.4972, "step": 38550 }, { "epoch": 0.5557557326722684, "grad_norm": 1.6407768225041803, "learning_rate": 4.8998886743310955e-06, "loss": 0.4306, "step": 38560 }, { "epoch": 0.5558998601963017, "grad_norm": 1.6242650050752776, "learning_rate": 4.897373663348e-06, "loss": 0.4486, "step": 38570 }, { "epoch": 0.5560439877203349, "grad_norm": 1.4888085089758774, "learning_rate": 4.894858678341195e-06, "loss": 0.4467, "step": 38580 }, { "epoch": 0.5561881152443682, "grad_norm": 2.1261812574134624, "learning_rate": 4.892343719947262e-06, "loss": 0.4478, "step": 38590 }, { "epoch": 0.5563322427684014, "grad_norm": 1.8415447845202455, "learning_rate": 4.8898287888027755e-06, "loss": 0.469, "step": 38600 }, { "epoch": 0.5564763702924348, "grad_norm": 1.448150305840433, "learning_rate": 4.887313885544301e-06, "loss": 0.4422, "step": 38610 }, { "epoch": 0.556620497816468, "grad_norm": 1.4550360666858624, "learning_rate": 4.884799010808402e-06, "loss": 0.4527, "step": 38620 }, { "epoch": 0.5567646253405013, "grad_norm": 1.4904300544037468, "learning_rate": 4.882284165231629e-06, "loss": 0.4622, "step": 38630 }, { "epoch": 0.5569087528645346, "grad_norm": 1.5622579546139823, "learning_rate": 4.8797693494505274e-06, "loss": 0.4774, "step": 38640 }, { "epoch": 0.5570528803885678, "grad_norm": 1.5921330896412096, "learning_rate": 4.877254564101639e-06, "loss": 0.4471, "step": 38650 }, { "epoch": 0.5571970079126011, "grad_norm": 1.4851570864114383, "learning_rate": 4.874739809821489e-06, "loss": 0.4785, "step": 38660 }, { "epoch": 0.5573411354366343, "grad_norm": 2.36929348994567, "learning_rate": 4.872225087246604e-06, "loss": 0.4802, "step": 38670 }, { "epoch": 0.5574852629606676, "grad_norm": 1.584667449570066, "learning_rate": 4.869710397013499e-06, "loss": 0.4798, "step": 38680 }, { "epoch": 0.5576293904847008, "grad_norm": 2.214034095347911, "learning_rate": 4.867195739758675e-06, "loss": 0.4462, "step": 38690 }, { "epoch": 0.5577735180087341, "grad_norm": 1.5684794588405948, "learning_rate": 4.864681116118639e-06, "loss": 0.453, "step": 38700 }, { "epoch": 0.5579176455327673, "grad_norm": 1.238534861492573, "learning_rate": 4.862166526729873e-06, "loss": 0.4307, "step": 38710 }, { "epoch": 0.5580617730568006, "grad_norm": 1.634551271541335, "learning_rate": 4.859651972228861e-06, "loss": 0.4657, "step": 38720 }, { "epoch": 0.558205900580834, "grad_norm": 1.4960726833855762, "learning_rate": 4.857137453252075e-06, "loss": 0.4705, "step": 38730 }, { "epoch": 0.5583500281048672, "grad_norm": 1.8010404859333446, "learning_rate": 4.854622970435977e-06, "loss": 0.4943, "step": 38740 }, { "epoch": 0.5584941556289005, "grad_norm": 1.4489978797896257, "learning_rate": 4.8521085244170245e-06, "loss": 0.4675, "step": 38750 }, { "epoch": 0.5586382831529337, "grad_norm": 1.3734630082944377, "learning_rate": 4.849594115831656e-06, "loss": 0.4328, "step": 38760 }, { "epoch": 0.558782410676967, "grad_norm": 1.2069160831524954, "learning_rate": 4.847079745316311e-06, "loss": 0.4691, "step": 38770 }, { "epoch": 0.5589265382010002, "grad_norm": 1.5715842452953404, "learning_rate": 4.844565413507415e-06, "loss": 0.4706, "step": 38780 }, { "epoch": 0.5590706657250335, "grad_norm": 1.3820002648804308, "learning_rate": 4.842051121041382e-06, "loss": 0.4111, "step": 38790 }, { "epoch": 0.5592147932490668, "grad_norm": 1.5399613475211764, "learning_rate": 4.839536868554616e-06, "loss": 0.4612, "step": 38800 }, { "epoch": 0.5593589207731, "grad_norm": 1.6923355032471878, "learning_rate": 4.837022656683518e-06, "loss": 0.4335, "step": 38810 }, { "epoch": 0.5595030482971333, "grad_norm": 1.6290628671150222, "learning_rate": 4.834508486064469e-06, "loss": 0.4528, "step": 38820 }, { "epoch": 0.5596471758211665, "grad_norm": 1.5170934515051187, "learning_rate": 4.831994357333847e-06, "loss": 0.4627, "step": 38830 }, { "epoch": 0.5597913033451999, "grad_norm": 2.011909487702345, "learning_rate": 4.829480271128014e-06, "loss": 0.4823, "step": 38840 }, { "epoch": 0.5599354308692331, "grad_norm": 1.7514509475689042, "learning_rate": 4.826966228083324e-06, "loss": 0.4558, "step": 38850 }, { "epoch": 0.5600795583932664, "grad_norm": 1.5413292929368823, "learning_rate": 4.824452228836121e-06, "loss": 0.4381, "step": 38860 }, { "epoch": 0.5602236859172997, "grad_norm": 1.464078332259744, "learning_rate": 4.821938274022734e-06, "loss": 0.5032, "step": 38870 }, { "epoch": 0.5603678134413329, "grad_norm": 1.6359585523421512, "learning_rate": 4.819424364279484e-06, "loss": 0.4686, "step": 38880 }, { "epoch": 0.5605119409653662, "grad_norm": 1.5635700910297992, "learning_rate": 4.8169105002426825e-06, "loss": 0.4535, "step": 38890 }, { "epoch": 0.5606560684893994, "grad_norm": 1.3964637100432529, "learning_rate": 4.814396682548624e-06, "loss": 0.4321, "step": 38900 }, { "epoch": 0.5608001960134327, "grad_norm": 1.5720392250544915, "learning_rate": 4.811882911833594e-06, "loss": 0.459, "step": 38910 }, { "epoch": 0.5609443235374659, "grad_norm": 1.4356324997899466, "learning_rate": 4.809369188733867e-06, "loss": 0.4489, "step": 38920 }, { "epoch": 0.5610884510614992, "grad_norm": 1.7757196131403563, "learning_rate": 4.806855513885707e-06, "loss": 0.4593, "step": 38930 }, { "epoch": 0.5612325785855324, "grad_norm": 1.5766101978241818, "learning_rate": 4.80434188792536e-06, "loss": 0.4578, "step": 38940 }, { "epoch": 0.5613767061095657, "grad_norm": 1.6380738784840494, "learning_rate": 4.801828311489064e-06, "loss": 0.45, "step": 38950 }, { "epoch": 0.5615208336335991, "grad_norm": 1.3838551785145947, "learning_rate": 4.799314785213045e-06, "loss": 0.4615, "step": 38960 }, { "epoch": 0.5616649611576323, "grad_norm": 1.8290247250406337, "learning_rate": 4.796801309733513e-06, "loss": 0.4735, "step": 38970 }, { "epoch": 0.5618090886816656, "grad_norm": 1.743858972335429, "learning_rate": 4.7942878856866676e-06, "loss": 0.442, "step": 38980 }, { "epoch": 0.5619532162056988, "grad_norm": 1.7694288692478437, "learning_rate": 4.791774513708695e-06, "loss": 0.4315, "step": 38990 }, { "epoch": 0.5620973437297321, "grad_norm": 1.6522475148414966, "learning_rate": 4.789261194435768e-06, "loss": 0.4532, "step": 39000 }, { "epoch": 0.5622414712537653, "grad_norm": 1.455382838033725, "learning_rate": 4.786747928504044e-06, "loss": 0.449, "step": 39010 }, { "epoch": 0.5623855987777986, "grad_norm": 1.5499274978328876, "learning_rate": 4.784234716549671e-06, "loss": 0.4649, "step": 39020 }, { "epoch": 0.5625297263018318, "grad_norm": 1.5515931838906545, "learning_rate": 4.781721559208782e-06, "loss": 0.4414, "step": 39030 }, { "epoch": 0.5626738538258651, "grad_norm": 1.5820320547424298, "learning_rate": 4.779208457117495e-06, "loss": 0.468, "step": 39040 }, { "epoch": 0.5628179813498984, "grad_norm": 1.5007360526693598, "learning_rate": 4.776695410911913e-06, "loss": 0.4703, "step": 39050 }, { "epoch": 0.5629621088739316, "grad_norm": 1.4912725996682992, "learning_rate": 4.774182421228128e-06, "loss": 0.4687, "step": 39060 }, { "epoch": 0.5631062363979649, "grad_norm": 1.757767426565982, "learning_rate": 4.771669488702214e-06, "loss": 0.4721, "step": 39070 }, { "epoch": 0.5632503639219982, "grad_norm": 1.7012098468057897, "learning_rate": 4.769156613970233e-06, "loss": 0.4966, "step": 39080 }, { "epoch": 0.5633944914460315, "grad_norm": 1.646465376555122, "learning_rate": 4.766643797668234e-06, "loss": 0.4726, "step": 39090 }, { "epoch": 0.5635386189700647, "grad_norm": 1.604210159298889, "learning_rate": 4.7641310404322475e-06, "loss": 0.4346, "step": 39100 }, { "epoch": 0.563682746494098, "grad_norm": 1.414628654557984, "learning_rate": 4.761618342898291e-06, "loss": 0.4344, "step": 39110 }, { "epoch": 0.5638268740181313, "grad_norm": 1.5274404075805808, "learning_rate": 4.759105705702366e-06, "loss": 0.459, "step": 39120 }, { "epoch": 0.5639710015421645, "grad_norm": 1.5607876144764925, "learning_rate": 4.756593129480458e-06, "loss": 0.4665, "step": 39130 }, { "epoch": 0.5641151290661978, "grad_norm": 1.5464147012235703, "learning_rate": 4.754080614868543e-06, "loss": 0.4396, "step": 39140 }, { "epoch": 0.564259256590231, "grad_norm": 1.4243839439456834, "learning_rate": 4.7515681625025725e-06, "loss": 0.4361, "step": 39150 }, { "epoch": 0.5644033841142643, "grad_norm": 1.4069430869813824, "learning_rate": 4.749055773018488e-06, "loss": 0.4368, "step": 39160 }, { "epoch": 0.5645475116382975, "grad_norm": 2.187305586321528, "learning_rate": 4.746543447052214e-06, "loss": 0.4678, "step": 39170 }, { "epoch": 0.5646916391623308, "grad_norm": 1.4958178070591042, "learning_rate": 4.744031185239657e-06, "loss": 0.437, "step": 39180 }, { "epoch": 0.5648357666863641, "grad_norm": 1.1528146769010235, "learning_rate": 4.74151898821671e-06, "loss": 0.4413, "step": 39190 }, { "epoch": 0.5649798942103974, "grad_norm": 1.5397352037266536, "learning_rate": 4.739006856619249e-06, "loss": 0.472, "step": 39200 }, { "epoch": 0.5651240217344307, "grad_norm": 1.4341843660113185, "learning_rate": 4.73649479108313e-06, "loss": 0.4456, "step": 39210 }, { "epoch": 0.5652681492584639, "grad_norm": 1.6340053554202276, "learning_rate": 4.733982792244198e-06, "loss": 0.4605, "step": 39220 }, { "epoch": 0.5654122767824972, "grad_norm": 1.6392357089644851, "learning_rate": 4.731470860738278e-06, "loss": 0.4457, "step": 39230 }, { "epoch": 0.5655564043065304, "grad_norm": 1.6063668023503712, "learning_rate": 4.728958997201175e-06, "loss": 0.4439, "step": 39240 }, { "epoch": 0.5657005318305637, "grad_norm": 1.6543077273465598, "learning_rate": 4.726447202268683e-06, "loss": 0.4534, "step": 39250 }, { "epoch": 0.5658446593545969, "grad_norm": 2.3173889922526407, "learning_rate": 4.723935476576576e-06, "loss": 0.443, "step": 39260 }, { "epoch": 0.5659887868786302, "grad_norm": 1.3230126949948322, "learning_rate": 4.721423820760609e-06, "loss": 0.4627, "step": 39270 }, { "epoch": 0.5661329144026634, "grad_norm": 1.8528209669931628, "learning_rate": 4.71891223545652e-06, "loss": 0.4727, "step": 39280 }, { "epoch": 0.5662770419266967, "grad_norm": 1.4379942809245247, "learning_rate": 4.716400721300029e-06, "loss": 0.455, "step": 39290 }, { "epoch": 0.56642116945073, "grad_norm": 1.5485855020360997, "learning_rate": 4.7138892789268406e-06, "loss": 0.4624, "step": 39300 }, { "epoch": 0.5665652969747633, "grad_norm": 1.5326260020638593, "learning_rate": 4.711377908972637e-06, "loss": 0.4382, "step": 39310 }, { "epoch": 0.5667094244987966, "grad_norm": 1.46526475658046, "learning_rate": 4.708866612073086e-06, "loss": 0.4692, "step": 39320 }, { "epoch": 0.5668535520228298, "grad_norm": 1.4239082179893425, "learning_rate": 4.706355388863835e-06, "loss": 0.4417, "step": 39330 }, { "epoch": 0.5669976795468631, "grad_norm": 1.521896425026199, "learning_rate": 4.703844239980511e-06, "loss": 0.4432, "step": 39340 }, { "epoch": 0.5671418070708963, "grad_norm": 1.6099052630701784, "learning_rate": 4.7013331660587235e-06, "loss": 0.476, "step": 39350 }, { "epoch": 0.5672859345949296, "grad_norm": 1.6035540675554898, "learning_rate": 4.698822167734067e-06, "loss": 0.4589, "step": 39360 }, { "epoch": 0.5674300621189629, "grad_norm": 1.5961152661036135, "learning_rate": 4.696311245642112e-06, "loss": 0.4471, "step": 39370 }, { "epoch": 0.5675741896429961, "grad_norm": 1.6928483449150535, "learning_rate": 4.693800400418411e-06, "loss": 0.4617, "step": 39380 }, { "epoch": 0.5677183171670294, "grad_norm": 1.4427253835347087, "learning_rate": 4.691289632698497e-06, "loss": 0.4502, "step": 39390 }, { "epoch": 0.5678624446910626, "grad_norm": 1.4578582579031296, "learning_rate": 4.688778943117884e-06, "loss": 0.4731, "step": 39400 }, { "epoch": 0.5680065722150959, "grad_norm": 1.6924155913556165, "learning_rate": 4.686268332312066e-06, "loss": 0.4916, "step": 39410 }, { "epoch": 0.5681506997391291, "grad_norm": 1.7548104027778544, "learning_rate": 4.683757800916515e-06, "loss": 0.4852, "step": 39420 }, { "epoch": 0.5682948272631625, "grad_norm": 1.5370115203622803, "learning_rate": 4.681247349566689e-06, "loss": 0.4694, "step": 39430 }, { "epoch": 0.5684389547871957, "grad_norm": 1.3261669452237022, "learning_rate": 4.678736978898016e-06, "loss": 0.4279, "step": 39440 }, { "epoch": 0.568583082311229, "grad_norm": 1.5662876439326205, "learning_rate": 4.676226689545912e-06, "loss": 0.4611, "step": 39450 }, { "epoch": 0.5687272098352623, "grad_norm": 1.4497515227418323, "learning_rate": 4.6737164821457685e-06, "loss": 0.4561, "step": 39460 }, { "epoch": 0.5688713373592955, "grad_norm": 1.4551960718094406, "learning_rate": 4.67120635733296e-06, "loss": 0.4723, "step": 39470 }, { "epoch": 0.5690154648833288, "grad_norm": 1.5144064216820907, "learning_rate": 4.668696315742835e-06, "loss": 0.4674, "step": 39480 }, { "epoch": 0.569159592407362, "grad_norm": 1.436301195321993, "learning_rate": 4.666186358010724e-06, "loss": 0.4624, "step": 39490 }, { "epoch": 0.5693037199313953, "grad_norm": 1.639335090349737, "learning_rate": 4.663676484771935e-06, "loss": 0.4495, "step": 39500 }, { "epoch": 0.5694478474554285, "grad_norm": 1.6515919718990435, "learning_rate": 4.661166696661756e-06, "loss": 0.4661, "step": 39510 }, { "epoch": 0.5695919749794618, "grad_norm": 1.6313200933570768, "learning_rate": 4.658656994315452e-06, "loss": 0.4785, "step": 39520 }, { "epoch": 0.569736102503495, "grad_norm": 1.7022178960049656, "learning_rate": 4.656147378368266e-06, "loss": 0.4546, "step": 39530 }, { "epoch": 0.5698802300275284, "grad_norm": 1.647157596077954, "learning_rate": 4.653637849455423e-06, "loss": 0.4305, "step": 39540 }, { "epoch": 0.5700243575515617, "grad_norm": 1.6113771047745924, "learning_rate": 4.65112840821212e-06, "loss": 0.4421, "step": 39550 }, { "epoch": 0.5701684850755949, "grad_norm": 1.5271236529246603, "learning_rate": 4.6486190552735375e-06, "loss": 0.4714, "step": 39560 }, { "epoch": 0.5703126125996282, "grad_norm": 1.4910033527923223, "learning_rate": 4.646109791274826e-06, "loss": 0.4936, "step": 39570 }, { "epoch": 0.5704567401236614, "grad_norm": 1.402459975756256, "learning_rate": 4.6436006168511245e-06, "loss": 0.4017, "step": 39580 }, { "epoch": 0.5706008676476947, "grad_norm": 1.4868950161784458, "learning_rate": 4.64109153263754e-06, "loss": 0.453, "step": 39590 }, { "epoch": 0.5707449951717279, "grad_norm": 1.645846561207322, "learning_rate": 4.638582539269161e-06, "loss": 0.4741, "step": 39600 }, { "epoch": 0.5708891226957612, "grad_norm": 1.3052992838115447, "learning_rate": 4.6360736373810525e-06, "loss": 0.4431, "step": 39610 }, { "epoch": 0.5710332502197945, "grad_norm": 1.6248056788581124, "learning_rate": 4.633564827608254e-06, "loss": 0.4442, "step": 39620 }, { "epoch": 0.5711773777438277, "grad_norm": 1.5669537653081425, "learning_rate": 4.631056110585785e-06, "loss": 0.445, "step": 39630 }, { "epoch": 0.571321505267861, "grad_norm": 1.5562855063044034, "learning_rate": 4.62854748694864e-06, "loss": 0.488, "step": 39640 }, { "epoch": 0.5714656327918942, "grad_norm": 1.4757720158153558, "learning_rate": 4.626038957331789e-06, "loss": 0.4494, "step": 39650 }, { "epoch": 0.5716097603159276, "grad_norm": 1.3244772925193675, "learning_rate": 4.623530522370179e-06, "loss": 0.4391, "step": 39660 }, { "epoch": 0.5717538878399608, "grad_norm": 1.4687792208040507, "learning_rate": 4.621022182698736e-06, "loss": 0.463, "step": 39670 }, { "epoch": 0.5718980153639941, "grad_norm": 1.6256380221761735, "learning_rate": 4.618513938952353e-06, "loss": 0.4864, "step": 39680 }, { "epoch": 0.5720421428880273, "grad_norm": 1.3034819202689538, "learning_rate": 4.61600579176591e-06, "loss": 0.4463, "step": 39690 }, { "epoch": 0.5721862704120606, "grad_norm": 1.5507886802173492, "learning_rate": 4.613497741774256e-06, "loss": 0.4506, "step": 39700 }, { "epoch": 0.5723303979360939, "grad_norm": 1.5002392433072318, "learning_rate": 4.610989789612217e-06, "loss": 0.443, "step": 39710 }, { "epoch": 0.5724745254601271, "grad_norm": 1.6274249926743738, "learning_rate": 4.6084819359145925e-06, "loss": 0.4866, "step": 39720 }, { "epoch": 0.5726186529841604, "grad_norm": 3.841546011594747, "learning_rate": 4.60597418131616e-06, "loss": 0.4485, "step": 39730 }, { "epoch": 0.5727627805081936, "grad_norm": 1.3947628283259905, "learning_rate": 4.60346652645167e-06, "loss": 0.4623, "step": 39740 }, { "epoch": 0.5729069080322269, "grad_norm": 1.609418695313543, "learning_rate": 4.600958971955847e-06, "loss": 0.4615, "step": 39750 }, { "epoch": 0.5730510355562601, "grad_norm": 1.4855992166350769, "learning_rate": 4.598451518463393e-06, "loss": 0.4523, "step": 39760 }, { "epoch": 0.5731951630802934, "grad_norm": 1.5084408395929563, "learning_rate": 4.595944166608982e-06, "loss": 0.4731, "step": 39770 }, { "epoch": 0.5733392906043268, "grad_norm": 1.4844403766295202, "learning_rate": 4.593436917027263e-06, "loss": 0.4537, "step": 39780 }, { "epoch": 0.57348341812836, "grad_norm": 1.4766040032693564, "learning_rate": 4.590929770352858e-06, "loss": 0.4541, "step": 39790 }, { "epoch": 0.5736275456523933, "grad_norm": 1.4440324366509347, "learning_rate": 4.588422727220365e-06, "loss": 0.4484, "step": 39800 }, { "epoch": 0.5737716731764265, "grad_norm": 1.7011094993644404, "learning_rate": 4.585915788264354e-06, "loss": 0.4397, "step": 39810 }, { "epoch": 0.5739158007004598, "grad_norm": 1.4388141096341651, "learning_rate": 4.583408954119374e-06, "loss": 0.4511, "step": 39820 }, { "epoch": 0.574059928224493, "grad_norm": 1.9006285647319316, "learning_rate": 4.580902225419937e-06, "loss": 0.4712, "step": 39830 }, { "epoch": 0.5742040557485263, "grad_norm": 1.2063191435572356, "learning_rate": 4.578395602800537e-06, "loss": 0.4501, "step": 39840 }, { "epoch": 0.5743481832725595, "grad_norm": 1.6228341258915433, "learning_rate": 4.575889086895638e-06, "loss": 0.4688, "step": 39850 }, { "epoch": 0.5744923107965928, "grad_norm": 1.5232152603576066, "learning_rate": 4.573382678339677e-06, "loss": 0.4184, "step": 39860 }, { "epoch": 0.574636438320626, "grad_norm": 1.5244585879825994, "learning_rate": 4.570876377767065e-06, "loss": 0.4586, "step": 39870 }, { "epoch": 0.5747805658446593, "grad_norm": 1.489794260203928, "learning_rate": 4.568370185812185e-06, "loss": 0.4337, "step": 39880 }, { "epoch": 0.5749246933686927, "grad_norm": 1.5882085584629642, "learning_rate": 4.5658641031093914e-06, "loss": 0.4454, "step": 39890 }, { "epoch": 0.5750688208927259, "grad_norm": 1.684731793676535, "learning_rate": 4.563358130293013e-06, "loss": 0.4498, "step": 39900 }, { "epoch": 0.5752129484167592, "grad_norm": 1.3640289000949173, "learning_rate": 4.560852267997346e-06, "loss": 0.4492, "step": 39910 }, { "epoch": 0.5753570759407924, "grad_norm": 1.6700552479487725, "learning_rate": 4.558346516856669e-06, "loss": 0.4631, "step": 39920 }, { "epoch": 0.5755012034648257, "grad_norm": 1.534008459666677, "learning_rate": 4.555840877505221e-06, "loss": 0.4464, "step": 39930 }, { "epoch": 0.575645330988859, "grad_norm": 1.2984264329906434, "learning_rate": 4.553335350577219e-06, "loss": 0.4546, "step": 39940 }, { "epoch": 0.5757894585128922, "grad_norm": 1.434831752842008, "learning_rate": 4.5508299367068506e-06, "loss": 0.4528, "step": 39950 }, { "epoch": 0.5759335860369255, "grad_norm": 1.5074060353831122, "learning_rate": 4.5483246365282726e-06, "loss": 0.4353, "step": 39960 }, { "epoch": 0.5760777135609587, "grad_norm": 1.5338007396517197, "learning_rate": 4.545819450675616e-06, "loss": 0.4496, "step": 39970 }, { "epoch": 0.576221841084992, "grad_norm": 1.534142494381964, "learning_rate": 4.543314379782981e-06, "loss": 0.4635, "step": 39980 }, { "epoch": 0.5763659686090252, "grad_norm": 1.7916210395093684, "learning_rate": 4.54080942448444e-06, "loss": 0.47, "step": 39990 }, { "epoch": 0.5765100961330585, "grad_norm": 1.878420747071709, "learning_rate": 4.538304585414034e-06, "loss": 0.4584, "step": 40000 }, { "epoch": 0.5766542236570918, "grad_norm": 1.5632691405732693, "learning_rate": 4.535799863205778e-06, "loss": 0.4427, "step": 40010 }, { "epoch": 0.5767983511811251, "grad_norm": 1.8885378134669004, "learning_rate": 4.5332952584936515e-06, "loss": 0.4318, "step": 40020 }, { "epoch": 0.5769424787051584, "grad_norm": 1.5396377007233844, "learning_rate": 4.530790771911613e-06, "loss": 0.4564, "step": 40030 }, { "epoch": 0.5770866062291916, "grad_norm": 1.4578645626348046, "learning_rate": 4.528286404093583e-06, "loss": 0.4592, "step": 40040 }, { "epoch": 0.5772307337532249, "grad_norm": 1.7401504379698458, "learning_rate": 4.525782155673458e-06, "loss": 0.4661, "step": 40050 }, { "epoch": 0.5773748612772581, "grad_norm": 1.5600121119210784, "learning_rate": 4.523278027285098e-06, "loss": 0.4711, "step": 40060 }, { "epoch": 0.5775189888012914, "grad_norm": 1.6236889396105083, "learning_rate": 4.520774019562337e-06, "loss": 0.4436, "step": 40070 }, { "epoch": 0.5776631163253246, "grad_norm": 1.5054410676573435, "learning_rate": 4.518270133138981e-06, "loss": 0.4455, "step": 40080 }, { "epoch": 0.5778072438493579, "grad_norm": 1.336529998231639, "learning_rate": 4.515766368648797e-06, "loss": 0.4788, "step": 40090 }, { "epoch": 0.5779513713733911, "grad_norm": 1.4770550689078121, "learning_rate": 4.513262726725528e-06, "loss": 0.4561, "step": 40100 }, { "epoch": 0.5780954988974244, "grad_norm": 1.475190120375319, "learning_rate": 4.510759208002885e-06, "loss": 0.462, "step": 40110 }, { "epoch": 0.5782396264214577, "grad_norm": 1.4248953825453488, "learning_rate": 4.5082558131145445e-06, "loss": 0.4521, "step": 40120 }, { "epoch": 0.578383753945491, "grad_norm": 1.5901077222906292, "learning_rate": 4.5057525426941555e-06, "loss": 0.4699, "step": 40130 }, { "epoch": 0.5785278814695243, "grad_norm": 1.9297700213603697, "learning_rate": 4.503249397375333e-06, "loss": 0.4636, "step": 40140 }, { "epoch": 0.5786720089935575, "grad_norm": 1.6716155921667522, "learning_rate": 4.500746377791662e-06, "loss": 0.4459, "step": 40150 }, { "epoch": 0.5788161365175908, "grad_norm": 1.264867832679635, "learning_rate": 4.498243484576697e-06, "loss": 0.4239, "step": 40160 }, { "epoch": 0.578960264041624, "grad_norm": 1.5647294906180982, "learning_rate": 4.495740718363954e-06, "loss": 0.4649, "step": 40170 }, { "epoch": 0.5791043915656573, "grad_norm": 1.4368110235893334, "learning_rate": 4.4932380797869235e-06, "loss": 0.4414, "step": 40180 }, { "epoch": 0.5792485190896906, "grad_norm": 1.6120667924339704, "learning_rate": 4.490735569479063e-06, "loss": 0.47, "step": 40190 }, { "epoch": 0.5793926466137238, "grad_norm": 1.5548991168287742, "learning_rate": 4.488233188073793e-06, "loss": 0.4866, "step": 40200 }, { "epoch": 0.5795367741377571, "grad_norm": 1.609650022026108, "learning_rate": 4.4857309362045065e-06, "loss": 0.4653, "step": 40210 }, { "epoch": 0.5796809016617903, "grad_norm": 1.4179520077767291, "learning_rate": 4.483228814504561e-06, "loss": 0.4758, "step": 40220 }, { "epoch": 0.5798250291858236, "grad_norm": 1.4088570523366817, "learning_rate": 4.48072682360728e-06, "loss": 0.4363, "step": 40230 }, { "epoch": 0.5799691567098569, "grad_norm": 1.5991140120764296, "learning_rate": 4.4782249641459574e-06, "loss": 0.4854, "step": 40240 }, { "epoch": 0.5801132842338902, "grad_norm": 1.6379200770109583, "learning_rate": 4.475723236753853e-06, "loss": 0.4738, "step": 40250 }, { "epoch": 0.5802574117579234, "grad_norm": 1.5870210166184486, "learning_rate": 4.473221642064191e-06, "loss": 0.4725, "step": 40260 }, { "epoch": 0.5804015392819567, "grad_norm": 1.2336345408274636, "learning_rate": 4.470720180710162e-06, "loss": 0.4481, "step": 40270 }, { "epoch": 0.58054566680599, "grad_norm": 1.6350866908034152, "learning_rate": 4.468218853324926e-06, "loss": 0.4784, "step": 40280 }, { "epoch": 0.5806897943300232, "grad_norm": 1.5388786577036813, "learning_rate": 4.465717660541606e-06, "loss": 0.4771, "step": 40290 }, { "epoch": 0.5808339218540565, "grad_norm": 1.485845953330438, "learning_rate": 4.463216602993291e-06, "loss": 0.4307, "step": 40300 }, { "epoch": 0.5809780493780897, "grad_norm": 1.413343469757955, "learning_rate": 4.460715681313039e-06, "loss": 0.4603, "step": 40310 }, { "epoch": 0.581122176902123, "grad_norm": 1.9763696538231466, "learning_rate": 4.45821489613387e-06, "loss": 0.4412, "step": 40320 }, { "epoch": 0.5812663044261562, "grad_norm": 1.9507404054004254, "learning_rate": 4.455714248088771e-06, "loss": 0.4704, "step": 40330 }, { "epoch": 0.5814104319501895, "grad_norm": 1.6251183888371967, "learning_rate": 4.4532137378106945e-06, "loss": 0.4505, "step": 40340 }, { "epoch": 0.5815545594742227, "grad_norm": 1.61664603892641, "learning_rate": 4.450713365932557e-06, "loss": 0.4717, "step": 40350 }, { "epoch": 0.5816986869982561, "grad_norm": 1.4955245436894506, "learning_rate": 4.448213133087243e-06, "loss": 0.4594, "step": 40360 }, { "epoch": 0.5818428145222894, "grad_norm": 1.445331815336829, "learning_rate": 4.445713039907596e-06, "loss": 0.4419, "step": 40370 }, { "epoch": 0.5819869420463226, "grad_norm": 1.4942298941433119, "learning_rate": 4.44321308702643e-06, "loss": 0.4596, "step": 40380 }, { "epoch": 0.5821310695703559, "grad_norm": 1.4302263916405478, "learning_rate": 4.440713275076522e-06, "loss": 0.463, "step": 40390 }, { "epoch": 0.5822751970943891, "grad_norm": 1.7466530029973806, "learning_rate": 4.438213604690611e-06, "loss": 0.4363, "step": 40400 }, { "epoch": 0.5824193246184224, "grad_norm": 1.5083939582459935, "learning_rate": 4.4357140765014015e-06, "loss": 0.4527, "step": 40410 }, { "epoch": 0.5825634521424556, "grad_norm": 1.6080848644534085, "learning_rate": 4.433214691141563e-06, "loss": 0.4701, "step": 40420 }, { "epoch": 0.5827075796664889, "grad_norm": 1.3763663461125761, "learning_rate": 4.430715449243728e-06, "loss": 0.4806, "step": 40430 }, { "epoch": 0.5828517071905222, "grad_norm": 1.5284689694062417, "learning_rate": 4.428216351440492e-06, "loss": 0.4519, "step": 40440 }, { "epoch": 0.5829958347145554, "grad_norm": 1.8062286630891145, "learning_rate": 4.425717398364416e-06, "loss": 0.4626, "step": 40450 }, { "epoch": 0.5831399622385887, "grad_norm": 1.5381661871820378, "learning_rate": 4.423218590648019e-06, "loss": 0.4458, "step": 40460 }, { "epoch": 0.5832840897626219, "grad_norm": 1.1656343267802713, "learning_rate": 4.420719928923795e-06, "loss": 0.4366, "step": 40470 }, { "epoch": 0.5834282172866553, "grad_norm": 1.70711833280701, "learning_rate": 4.418221413824187e-06, "loss": 0.4463, "step": 40480 }, { "epoch": 0.5835723448106885, "grad_norm": 1.5351912141814104, "learning_rate": 4.41572304598161e-06, "loss": 0.4437, "step": 40490 }, { "epoch": 0.5837164723347218, "grad_norm": 2.653509408708213, "learning_rate": 4.413224826028439e-06, "loss": 0.4762, "step": 40500 }, { "epoch": 0.583860599858755, "grad_norm": 1.4212653330418965, "learning_rate": 4.410726754597009e-06, "loss": 0.4702, "step": 40510 }, { "epoch": 0.5840047273827883, "grad_norm": 9.780242694371962, "learning_rate": 4.4082288323196245e-06, "loss": 0.5016, "step": 40520 }, { "epoch": 0.5841488549068216, "grad_norm": 1.8302495273436241, "learning_rate": 4.405731059828543e-06, "loss": 0.4644, "step": 40530 }, { "epoch": 0.5842929824308548, "grad_norm": 1.6389685102061926, "learning_rate": 4.403233437755992e-06, "loss": 0.4647, "step": 40540 }, { "epoch": 0.5844371099548881, "grad_norm": 1.639309941926737, "learning_rate": 4.400735966734156e-06, "loss": 0.449, "step": 40550 }, { "epoch": 0.5845812374789213, "grad_norm": 1.4215297108484466, "learning_rate": 4.398238647395183e-06, "loss": 0.4619, "step": 40560 }, { "epoch": 0.5847253650029546, "grad_norm": 1.8364622034918205, "learning_rate": 4.395741480371183e-06, "loss": 0.4741, "step": 40570 }, { "epoch": 0.5848694925269878, "grad_norm": 1.556936764614015, "learning_rate": 4.393244466294228e-06, "loss": 0.4335, "step": 40580 }, { "epoch": 0.5850136200510211, "grad_norm": 1.428583733181042, "learning_rate": 4.390747605796349e-06, "loss": 0.4236, "step": 40590 }, { "epoch": 0.5851577475750545, "grad_norm": 1.5906274945184706, "learning_rate": 4.388250899509541e-06, "loss": 0.4535, "step": 40600 }, { "epoch": 0.5853018750990877, "grad_norm": 1.5122076555523039, "learning_rate": 4.385754348065757e-06, "loss": 0.4542, "step": 40610 }, { "epoch": 0.585446002623121, "grad_norm": 2.2328937921273275, "learning_rate": 4.383257952096912e-06, "loss": 0.4615, "step": 40620 }, { "epoch": 0.5855901301471542, "grad_norm": 1.4858292011497403, "learning_rate": 4.380761712234885e-06, "loss": 0.4453, "step": 40630 }, { "epoch": 0.5857342576711875, "grad_norm": 1.5525295116211706, "learning_rate": 4.378265629111508e-06, "loss": 0.4275, "step": 40640 }, { "epoch": 0.5858783851952207, "grad_norm": 1.5046319119136329, "learning_rate": 4.37576970335858e-06, "loss": 0.4484, "step": 40650 }, { "epoch": 0.586022512719254, "grad_norm": 1.3489167821952361, "learning_rate": 4.373273935607858e-06, "loss": 0.4496, "step": 40660 }, { "epoch": 0.5861666402432872, "grad_norm": 1.77715835237768, "learning_rate": 4.370778326491057e-06, "loss": 0.449, "step": 40670 }, { "epoch": 0.5863107677673205, "grad_norm": 1.657784939281618, "learning_rate": 4.368282876639856e-06, "loss": 0.4574, "step": 40680 }, { "epoch": 0.5864548952913538, "grad_norm": 1.533073371378726, "learning_rate": 4.3657875866858894e-06, "loss": 0.4422, "step": 40690 }, { "epoch": 0.586599022815387, "grad_norm": 1.8040856169906758, "learning_rate": 4.363292457260756e-06, "loss": 0.4505, "step": 40700 }, { "epoch": 0.5867431503394204, "grad_norm": 1.5223826375784084, "learning_rate": 4.360797488996008e-06, "loss": 0.4352, "step": 40710 }, { "epoch": 0.5868872778634536, "grad_norm": 1.7131584569770315, "learning_rate": 4.358302682523162e-06, "loss": 0.4598, "step": 40720 }, { "epoch": 0.5870314053874869, "grad_norm": 1.6543168822651757, "learning_rate": 4.355808038473691e-06, "loss": 0.4626, "step": 40730 }, { "epoch": 0.5871755329115201, "grad_norm": 1.6122469700707807, "learning_rate": 4.353313557479027e-06, "loss": 0.436, "step": 40740 }, { "epoch": 0.5873196604355534, "grad_norm": 1.6109664534857318, "learning_rate": 4.3508192401705605e-06, "loss": 0.4696, "step": 40750 }, { "epoch": 0.5874637879595866, "grad_norm": 1.4539608137869473, "learning_rate": 4.348325087179644e-06, "loss": 0.4617, "step": 40760 }, { "epoch": 0.5876079154836199, "grad_norm": 1.7180826550530608, "learning_rate": 4.345831099137583e-06, "loss": 0.4762, "step": 40770 }, { "epoch": 0.5877520430076532, "grad_norm": 1.4134968875864233, "learning_rate": 4.343337276675644e-06, "loss": 0.446, "step": 40780 }, { "epoch": 0.5878961705316864, "grad_norm": 1.6723344663988444, "learning_rate": 4.340843620425051e-06, "loss": 0.4448, "step": 40790 }, { "epoch": 0.5880402980557197, "grad_norm": 1.563834547427807, "learning_rate": 4.338350131016989e-06, "loss": 0.4451, "step": 40800 }, { "epoch": 0.5881844255797529, "grad_norm": 1.9272697980025082, "learning_rate": 4.335856809082597e-06, "loss": 0.4709, "step": 40810 }, { "epoch": 0.5883285531037862, "grad_norm": 1.593802494602787, "learning_rate": 4.333363655252973e-06, "loss": 0.4794, "step": 40820 }, { "epoch": 0.5884726806278195, "grad_norm": 1.6533275934438088, "learning_rate": 4.330870670159173e-06, "loss": 0.4347, "step": 40830 }, { "epoch": 0.5886168081518528, "grad_norm": 1.4768171654358764, "learning_rate": 4.328377854432206e-06, "loss": 0.4588, "step": 40840 }, { "epoch": 0.588760935675886, "grad_norm": 1.6795463684561733, "learning_rate": 4.325885208703044e-06, "loss": 0.4637, "step": 40850 }, { "epoch": 0.5889050631999193, "grad_norm": 1.8638377216784339, "learning_rate": 4.3233927336026154e-06, "loss": 0.4674, "step": 40860 }, { "epoch": 0.5890491907239526, "grad_norm": 1.3091387691829786, "learning_rate": 4.3209004297618004e-06, "loss": 0.453, "step": 40870 }, { "epoch": 0.5891933182479858, "grad_norm": 1.5700869949010026, "learning_rate": 4.318408297811441e-06, "loss": 0.464, "step": 40880 }, { "epoch": 0.5893374457720191, "grad_norm": 1.5822253437389031, "learning_rate": 4.315916338382335e-06, "loss": 0.4598, "step": 40890 }, { "epoch": 0.5894815732960523, "grad_norm": 1.5202102499395784, "learning_rate": 4.31342455210523e-06, "loss": 0.4631, "step": 40900 }, { "epoch": 0.5896257008200856, "grad_norm": 1.6037254204954725, "learning_rate": 4.310932939610842e-06, "loss": 0.4687, "step": 40910 }, { "epoch": 0.5897698283441188, "grad_norm": 1.5003656055189565, "learning_rate": 4.308441501529832e-06, "loss": 0.4554, "step": 40920 }, { "epoch": 0.5899139558681521, "grad_norm": 1.4166828994078573, "learning_rate": 4.305950238492822e-06, "loss": 0.464, "step": 40930 }, { "epoch": 0.5900580833921854, "grad_norm": 1.6098123115155614, "learning_rate": 4.303459151130389e-06, "loss": 0.4698, "step": 40940 }, { "epoch": 0.5902022109162187, "grad_norm": 1.5422329799716363, "learning_rate": 4.300968240073064e-06, "loss": 0.4475, "step": 40950 }, { "epoch": 0.590346338440252, "grad_norm": 1.826180352901743, "learning_rate": 4.298477505951335e-06, "loss": 0.4359, "step": 40960 }, { "epoch": 0.5904904659642852, "grad_norm": 1.3310360217286419, "learning_rate": 4.295986949395647e-06, "loss": 0.4495, "step": 40970 }, { "epoch": 0.5906345934883185, "grad_norm": 1.4926654795264842, "learning_rate": 4.293496571036395e-06, "loss": 0.4366, "step": 40980 }, { "epoch": 0.5907787210123517, "grad_norm": 1.7246540955067524, "learning_rate": 4.291006371503932e-06, "loss": 0.4581, "step": 40990 }, { "epoch": 0.590922848536385, "grad_norm": 1.5700468728148123, "learning_rate": 4.2885163514285676e-06, "loss": 0.451, "step": 41000 }, { "epoch": 0.5910669760604182, "grad_norm": 1.509256045295471, "learning_rate": 4.28602651144056e-06, "loss": 0.4406, "step": 41010 }, { "epoch": 0.5912111035844515, "grad_norm": 1.322773805988006, "learning_rate": 4.283536852170129e-06, "loss": 0.442, "step": 41020 }, { "epoch": 0.5913552311084848, "grad_norm": 1.4617270367170545, "learning_rate": 4.281047374247445e-06, "loss": 0.4576, "step": 41030 }, { "epoch": 0.591499358632518, "grad_norm": 1.4914717067855827, "learning_rate": 4.278558078302633e-06, "loss": 0.4603, "step": 41040 }, { "epoch": 0.5916434861565513, "grad_norm": 1.6263777661571706, "learning_rate": 4.276068964965769e-06, "loss": 0.4396, "step": 41050 }, { "epoch": 0.5917876136805846, "grad_norm": 1.5478956899939142, "learning_rate": 4.2735800348668885e-06, "loss": 0.4683, "step": 41060 }, { "epoch": 0.5919317412046179, "grad_norm": 1.6125619793694759, "learning_rate": 4.271091288635978e-06, "loss": 0.4824, "step": 41070 }, { "epoch": 0.5920758687286511, "grad_norm": 1.7029733651812315, "learning_rate": 4.2686027269029736e-06, "loss": 0.4575, "step": 41080 }, { "epoch": 0.5922199962526844, "grad_norm": 1.5638132771348614, "learning_rate": 4.266114350297771e-06, "loss": 0.4497, "step": 41090 }, { "epoch": 0.5923641237767177, "grad_norm": 1.5847866317902337, "learning_rate": 4.263626159450217e-06, "loss": 0.4481, "step": 41100 }, { "epoch": 0.5925082513007509, "grad_norm": 1.4823894224030003, "learning_rate": 4.2611381549901085e-06, "loss": 0.455, "step": 41110 }, { "epoch": 0.5926523788247842, "grad_norm": 1.4386323734636988, "learning_rate": 4.258650337547197e-06, "loss": 0.4348, "step": 41120 }, { "epoch": 0.5927965063488174, "grad_norm": 1.8597635075547545, "learning_rate": 4.256162707751188e-06, "loss": 0.4166, "step": 41130 }, { "epoch": 0.5929406338728507, "grad_norm": 1.3057256642359012, "learning_rate": 4.25367526623174e-06, "loss": 0.443, "step": 41140 }, { "epoch": 0.5930847613968839, "grad_norm": 1.6914836571359189, "learning_rate": 4.251188013618462e-06, "loss": 0.432, "step": 41150 }, { "epoch": 0.5932288889209172, "grad_norm": 1.431434343434237, "learning_rate": 4.248700950540912e-06, "loss": 0.4502, "step": 41160 }, { "epoch": 0.5933730164449504, "grad_norm": 1.6664540227971392, "learning_rate": 4.246214077628609e-06, "loss": 0.4311, "step": 41170 }, { "epoch": 0.5935171439689838, "grad_norm": 1.733911186862913, "learning_rate": 4.243727395511013e-06, "loss": 0.4548, "step": 41180 }, { "epoch": 0.5936612714930171, "grad_norm": 1.4069542462494917, "learning_rate": 4.2412409048175455e-06, "loss": 0.4554, "step": 41190 }, { "epoch": 0.5938053990170503, "grad_norm": 1.7705926530370082, "learning_rate": 4.238754606177574e-06, "loss": 0.4673, "step": 41200 }, { "epoch": 0.5939495265410836, "grad_norm": 1.6271390523215954, "learning_rate": 4.236268500220417e-06, "loss": 0.4687, "step": 41210 }, { "epoch": 0.5940936540651168, "grad_norm": 2.0415183036980635, "learning_rate": 4.233782587575347e-06, "loss": 0.4526, "step": 41220 }, { "epoch": 0.5942377815891501, "grad_norm": 1.5428619014588818, "learning_rate": 4.231296868871587e-06, "loss": 0.4635, "step": 41230 }, { "epoch": 0.5943819091131833, "grad_norm": 1.3677453986172805, "learning_rate": 4.2288113447383075e-06, "loss": 0.4244, "step": 41240 }, { "epoch": 0.5945260366372166, "grad_norm": 1.8039065496490914, "learning_rate": 4.226326015804636e-06, "loss": 0.4764, "step": 41250 }, { "epoch": 0.5946701641612498, "grad_norm": 1.4109983262942745, "learning_rate": 4.223840882699647e-06, "loss": 0.4569, "step": 41260 }, { "epoch": 0.5948142916852831, "grad_norm": 1.325296733970194, "learning_rate": 4.221355946052363e-06, "loss": 0.4494, "step": 41270 }, { "epoch": 0.5949584192093164, "grad_norm": 1.6919746583149728, "learning_rate": 4.218871206491762e-06, "loss": 0.4513, "step": 41280 }, { "epoch": 0.5951025467333496, "grad_norm": 1.5454093816323802, "learning_rate": 4.216386664646768e-06, "loss": 0.4474, "step": 41290 }, { "epoch": 0.595246674257383, "grad_norm": 1.6215188876219517, "learning_rate": 4.213902321146255e-06, "loss": 0.4271, "step": 41300 }, { "epoch": 0.5953908017814162, "grad_norm": 1.2265753417772283, "learning_rate": 4.211418176619052e-06, "loss": 0.4509, "step": 41310 }, { "epoch": 0.5955349293054495, "grad_norm": 1.8400268045122679, "learning_rate": 4.2089342316939316e-06, "loss": 0.4617, "step": 41320 }, { "epoch": 0.5956790568294827, "grad_norm": 1.4121399534021717, "learning_rate": 4.206450486999617e-06, "loss": 0.4793, "step": 41330 }, { "epoch": 0.595823184353516, "grad_norm": 1.4992527170349481, "learning_rate": 4.203966943164784e-06, "loss": 0.4757, "step": 41340 }, { "epoch": 0.5959673118775493, "grad_norm": 1.4239002556033615, "learning_rate": 4.2014836008180536e-06, "loss": 0.4552, "step": 41350 }, { "epoch": 0.5961114394015825, "grad_norm": 1.5124085490626096, "learning_rate": 4.199000460587999e-06, "loss": 0.4506, "step": 41360 }, { "epoch": 0.5962555669256158, "grad_norm": 1.4106278000982218, "learning_rate": 4.1965175231031395e-06, "loss": 0.4557, "step": 41370 }, { "epoch": 0.596399694449649, "grad_norm": 1.365971530959136, "learning_rate": 4.194034788991947e-06, "loss": 0.4586, "step": 41380 }, { "epoch": 0.5965438219736823, "grad_norm": 1.4477314133608499, "learning_rate": 4.191552258882837e-06, "loss": 0.4479, "step": 41390 }, { "epoch": 0.5966879494977155, "grad_norm": 1.6330806089731629, "learning_rate": 4.1890699334041765e-06, "loss": 0.4514, "step": 41400 }, { "epoch": 0.5968320770217489, "grad_norm": 1.5135444236234712, "learning_rate": 4.186587813184281e-06, "loss": 0.4723, "step": 41410 }, { "epoch": 0.5969762045457822, "grad_norm": 1.5013813650220944, "learning_rate": 4.18410589885141e-06, "loss": 0.4648, "step": 41420 }, { "epoch": 0.5971203320698154, "grad_norm": 1.725742791285683, "learning_rate": 4.181624191033776e-06, "loss": 0.4781, "step": 41430 }, { "epoch": 0.5972644595938487, "grad_norm": 1.5197738452880831, "learning_rate": 4.1791426903595375e-06, "loss": 0.488, "step": 41440 }, { "epoch": 0.5974085871178819, "grad_norm": 1.5534111731713387, "learning_rate": 4.176661397456799e-06, "loss": 0.4914, "step": 41450 }, { "epoch": 0.5975527146419152, "grad_norm": 1.336558506439783, "learning_rate": 4.174180312953612e-06, "loss": 0.4365, "step": 41460 }, { "epoch": 0.5976968421659484, "grad_norm": 1.3177451226958268, "learning_rate": 4.1716994374779805e-06, "loss": 0.4186, "step": 41470 }, { "epoch": 0.5978409696899817, "grad_norm": 1.4900096235025442, "learning_rate": 4.169218771657852e-06, "loss": 0.4305, "step": 41480 }, { "epoch": 0.5979850972140149, "grad_norm": 1.5466385890689158, "learning_rate": 4.166738316121116e-06, "loss": 0.46, "step": 41490 }, { "epoch": 0.5981292247380482, "grad_norm": 1.5871159618694837, "learning_rate": 4.164258071495619e-06, "loss": 0.4374, "step": 41500 }, { "epoch": 0.5982733522620814, "grad_norm": 1.8271340971166241, "learning_rate": 4.1617780384091456e-06, "loss": 0.428, "step": 41510 }, { "epoch": 0.5984174797861147, "grad_norm": 1.3797322688630573, "learning_rate": 4.15929821748943e-06, "loss": 0.4509, "step": 41520 }, { "epoch": 0.5985616073101481, "grad_norm": 1.3803965223705286, "learning_rate": 4.156818609364154e-06, "loss": 0.4561, "step": 41530 }, { "epoch": 0.5987057348341813, "grad_norm": 1.7227570175741833, "learning_rate": 4.154339214660944e-06, "loss": 0.4311, "step": 41540 }, { "epoch": 0.5988498623582146, "grad_norm": 1.4822469465208887, "learning_rate": 4.151860034007371e-06, "loss": 0.482, "step": 41550 }, { "epoch": 0.5989939898822478, "grad_norm": 1.4735607475605597, "learning_rate": 4.1493810680309565e-06, "loss": 0.4758, "step": 41560 }, { "epoch": 0.5991381174062811, "grad_norm": 1.5621722871076942, "learning_rate": 4.1469023173591605e-06, "loss": 0.4674, "step": 41570 }, { "epoch": 0.5992822449303143, "grad_norm": 1.4275763363320033, "learning_rate": 4.144423782619396e-06, "loss": 0.4683, "step": 41580 }, { "epoch": 0.5994263724543476, "grad_norm": 1.5713752232038407, "learning_rate": 4.141945464439018e-06, "loss": 0.468, "step": 41590 }, { "epoch": 0.5995704999783809, "grad_norm": 1.6361270915698172, "learning_rate": 4.1394673634453236e-06, "loss": 0.4333, "step": 41600 }, { "epoch": 0.5997146275024141, "grad_norm": 1.614799965479121, "learning_rate": 4.13698948026556e-06, "loss": 0.4545, "step": 41610 }, { "epoch": 0.5998587550264474, "grad_norm": 1.6166611321819107, "learning_rate": 4.134511815526919e-06, "loss": 0.4311, "step": 41620 }, { "epoch": 0.6000028825504806, "grad_norm": 1.9311024350529749, "learning_rate": 4.132034369856532e-06, "loss": 0.4513, "step": 41630 }, { "epoch": 0.6001470100745139, "grad_norm": 1.5463522289254603, "learning_rate": 4.129557143881479e-06, "loss": 0.4527, "step": 41640 }, { "epoch": 0.6002911375985472, "grad_norm": 1.749512202935596, "learning_rate": 4.127080138228785e-06, "loss": 0.4823, "step": 41650 }, { "epoch": 0.6004352651225805, "grad_norm": 1.2657768072835514, "learning_rate": 4.124603353525416e-06, "loss": 0.4119, "step": 41660 }, { "epoch": 0.6005793926466138, "grad_norm": 1.6543803932453636, "learning_rate": 4.122126790398287e-06, "loss": 0.4411, "step": 41670 }, { "epoch": 0.600723520170647, "grad_norm": 1.8080804560912012, "learning_rate": 4.1196504494742485e-06, "loss": 0.4605, "step": 41680 }, { "epoch": 0.6008676476946803, "grad_norm": 1.5661067848503107, "learning_rate": 4.117174331380107e-06, "loss": 0.4338, "step": 41690 }, { "epoch": 0.6010117752187135, "grad_norm": 1.7856791090100501, "learning_rate": 4.114698436742601e-06, "loss": 0.4573, "step": 41700 }, { "epoch": 0.6011559027427468, "grad_norm": 1.4716314322528183, "learning_rate": 4.112222766188418e-06, "loss": 0.4426, "step": 41710 }, { "epoch": 0.60130003026678, "grad_norm": 1.7961468343885867, "learning_rate": 4.1097473203441905e-06, "loss": 0.4566, "step": 41720 }, { "epoch": 0.6014441577908133, "grad_norm": 1.5131308020230971, "learning_rate": 4.107272099836488e-06, "loss": 0.4726, "step": 41730 }, { "epoch": 0.6015882853148465, "grad_norm": 2.0185905308626904, "learning_rate": 4.104797105291829e-06, "loss": 0.4563, "step": 41740 }, { "epoch": 0.6017324128388798, "grad_norm": 1.5364133636921293, "learning_rate": 4.102322337336672e-06, "loss": 0.4533, "step": 41750 }, { "epoch": 0.6018765403629132, "grad_norm": 1.4563305078909852, "learning_rate": 4.099847796597417e-06, "loss": 0.4704, "step": 41760 }, { "epoch": 0.6020206678869464, "grad_norm": 1.7551565317921582, "learning_rate": 4.0973734837004086e-06, "loss": 0.4611, "step": 41770 }, { "epoch": 0.6021647954109797, "grad_norm": 1.8887980165680276, "learning_rate": 4.094899399271935e-06, "loss": 0.4369, "step": 41780 }, { "epoch": 0.6023089229350129, "grad_norm": 1.8659215580316526, "learning_rate": 4.09242554393822e-06, "loss": 0.4731, "step": 41790 }, { "epoch": 0.6024530504590462, "grad_norm": 1.5931897709946135, "learning_rate": 4.08995191832544e-06, "loss": 0.4453, "step": 41800 }, { "epoch": 0.6025971779830794, "grad_norm": 1.7251011872469024, "learning_rate": 4.0874785230597035e-06, "loss": 0.4508, "step": 41810 }, { "epoch": 0.6027413055071127, "grad_norm": 1.9117974182001485, "learning_rate": 4.085005358767067e-06, "loss": 0.4402, "step": 41820 }, { "epoch": 0.602885433031146, "grad_norm": 1.613113934484344, "learning_rate": 4.082532426073524e-06, "loss": 0.4447, "step": 41830 }, { "epoch": 0.6030295605551792, "grad_norm": 1.4554570404254863, "learning_rate": 4.080059725605013e-06, "loss": 0.4534, "step": 41840 }, { "epoch": 0.6031736880792125, "grad_norm": 1.750197167632994, "learning_rate": 4.077587257987411e-06, "loss": 0.4564, "step": 41850 }, { "epoch": 0.6033178156032457, "grad_norm": 1.4631769407290762, "learning_rate": 4.0751150238465376e-06, "loss": 0.4424, "step": 41860 }, { "epoch": 0.603461943127279, "grad_norm": 1.3901703421802698, "learning_rate": 4.072643023808153e-06, "loss": 0.4395, "step": 41870 }, { "epoch": 0.6036060706513123, "grad_norm": 1.6929916472435553, "learning_rate": 4.070171258497959e-06, "loss": 0.4556, "step": 41880 }, { "epoch": 0.6037501981753456, "grad_norm": 1.4681928643377524, "learning_rate": 4.067699728541595e-06, "loss": 0.4656, "step": 41890 }, { "epoch": 0.6038943256993788, "grad_norm": 1.5180521610569035, "learning_rate": 4.065228434564644e-06, "loss": 0.4747, "step": 41900 }, { "epoch": 0.6040384532234121, "grad_norm": 1.6923136327040633, "learning_rate": 4.062757377192629e-06, "loss": 0.4721, "step": 41910 }, { "epoch": 0.6041825807474454, "grad_norm": 1.3699374072236146, "learning_rate": 4.060286557051011e-06, "loss": 0.4455, "step": 41920 }, { "epoch": 0.6043267082714786, "grad_norm": 1.4747523914844323, "learning_rate": 4.057815974765194e-06, "loss": 0.4339, "step": 41930 }, { "epoch": 0.6044708357955119, "grad_norm": 1.452607279402355, "learning_rate": 4.055345630960519e-06, "loss": 0.4632, "step": 41940 }, { "epoch": 0.6046149633195451, "grad_norm": 1.5801933149254173, "learning_rate": 4.052875526262267e-06, "loss": 0.4786, "step": 41950 }, { "epoch": 0.6047590908435784, "grad_norm": 1.8043071893889755, "learning_rate": 4.0504056612956606e-06, "loss": 0.4904, "step": 41960 }, { "epoch": 0.6049032183676116, "grad_norm": 1.6032021110757182, "learning_rate": 4.047936036685859e-06, "loss": 0.4274, "step": 41970 }, { "epoch": 0.6050473458916449, "grad_norm": 1.2562408692909226, "learning_rate": 4.045466653057962e-06, "loss": 0.4532, "step": 41980 }, { "epoch": 0.6051914734156781, "grad_norm": 1.5496324758165436, "learning_rate": 4.042997511037009e-06, "loss": 0.4301, "step": 41990 }, { "epoch": 0.6053356009397115, "grad_norm": 1.7306371219782397, "learning_rate": 4.040528611247977e-06, "loss": 0.4665, "step": 42000 }, { "epoch": 0.6054797284637448, "grad_norm": 1.2545159673385984, "learning_rate": 4.03805995431578e-06, "loss": 0.4765, "step": 42010 }, { "epoch": 0.605623855987778, "grad_norm": 1.621578960237222, "learning_rate": 4.0355915408652765e-06, "loss": 0.4665, "step": 42020 }, { "epoch": 0.6057679835118113, "grad_norm": 1.6516097023540832, "learning_rate": 4.0331233715212596e-06, "loss": 0.4429, "step": 42030 }, { "epoch": 0.6059121110358445, "grad_norm": 1.6691778147806426, "learning_rate": 4.030655446908458e-06, "loss": 0.4521, "step": 42040 }, { "epoch": 0.6060562385598778, "grad_norm": 1.6612544078273541, "learning_rate": 4.028187767651541e-06, "loss": 0.454, "step": 42050 }, { "epoch": 0.606200366083911, "grad_norm": 1.5401522925363225, "learning_rate": 4.025720334375118e-06, "loss": 0.4546, "step": 42060 }, { "epoch": 0.6063444936079443, "grad_norm": 1.4487480738430765, "learning_rate": 4.023253147703733e-06, "loss": 0.4607, "step": 42070 }, { "epoch": 0.6064886211319775, "grad_norm": 1.6985067701682608, "learning_rate": 4.0207862082618675e-06, "loss": 0.4553, "step": 42080 }, { "epoch": 0.6066327486560108, "grad_norm": 1.3126993679025025, "learning_rate": 4.0183195166739445e-06, "loss": 0.4775, "step": 42090 }, { "epoch": 0.6067768761800441, "grad_norm": 1.5711248314057595, "learning_rate": 4.015853073564317e-06, "loss": 0.4581, "step": 42100 }, { "epoch": 0.6069210037040774, "grad_norm": 1.399893298412513, "learning_rate": 4.0133868795572815e-06, "loss": 0.471, "step": 42110 }, { "epoch": 0.6070651312281107, "grad_norm": 1.3131403171287248, "learning_rate": 4.010920935277069e-06, "loss": 0.4265, "step": 42120 }, { "epoch": 0.6072092587521439, "grad_norm": 1.6974243642137483, "learning_rate": 4.00845524134785e-06, "loss": 0.4524, "step": 42130 }, { "epoch": 0.6073533862761772, "grad_norm": 1.4979478528477737, "learning_rate": 4.005989798393725e-06, "loss": 0.4472, "step": 42140 }, { "epoch": 0.6074975138002104, "grad_norm": 1.6874192581363383, "learning_rate": 4.003524607038739e-06, "loss": 0.4798, "step": 42150 }, { "epoch": 0.6076416413242437, "grad_norm": 1.4835812088703015, "learning_rate": 4.001059667906868e-06, "loss": 0.4467, "step": 42160 }, { "epoch": 0.607785768848277, "grad_norm": 1.5463327778817215, "learning_rate": 3.998594981622025e-06, "loss": 0.4492, "step": 42170 }, { "epoch": 0.6079298963723102, "grad_norm": 1.4711062836089162, "learning_rate": 3.996130548808059e-06, "loss": 0.4431, "step": 42180 }, { "epoch": 0.6080740238963435, "grad_norm": 1.7459864597556807, "learning_rate": 3.993666370088759e-06, "loss": 0.4659, "step": 42190 }, { "epoch": 0.6082181514203767, "grad_norm": 1.6358711649115545, "learning_rate": 3.991202446087843e-06, "loss": 0.4599, "step": 42200 }, { "epoch": 0.60836227894441, "grad_norm": 1.4406994522023633, "learning_rate": 3.988738777428968e-06, "loss": 0.404, "step": 42210 }, { "epoch": 0.6085064064684432, "grad_norm": 1.407606279619941, "learning_rate": 3.986275364735728e-06, "loss": 0.4364, "step": 42220 }, { "epoch": 0.6086505339924766, "grad_norm": 1.6426725037086378, "learning_rate": 3.983812208631646e-06, "loss": 0.4723, "step": 42230 }, { "epoch": 0.6087946615165098, "grad_norm": 1.731198440889505, "learning_rate": 3.98134930974019e-06, "loss": 0.4408, "step": 42240 }, { "epoch": 0.6089387890405431, "grad_norm": 1.5632666893182774, "learning_rate": 3.978886668684755e-06, "loss": 0.4514, "step": 42250 }, { "epoch": 0.6090829165645764, "grad_norm": 1.547861504911312, "learning_rate": 3.9764242860886724e-06, "loss": 0.4695, "step": 42260 }, { "epoch": 0.6092270440886096, "grad_norm": 1.5294438385094278, "learning_rate": 3.973962162575209e-06, "loss": 0.4434, "step": 42270 }, { "epoch": 0.6093711716126429, "grad_norm": 1.4488614037190952, "learning_rate": 3.971500298767566e-06, "loss": 0.4434, "step": 42280 }, { "epoch": 0.6095152991366761, "grad_norm": 1.7162538079756435, "learning_rate": 3.96903869528888e-06, "loss": 0.4678, "step": 42290 }, { "epoch": 0.6096594266607094, "grad_norm": 1.6401272573554015, "learning_rate": 3.9665773527622164e-06, "loss": 0.4701, "step": 42300 }, { "epoch": 0.6098035541847426, "grad_norm": 1.4654534135122679, "learning_rate": 3.964116271810582e-06, "loss": 0.4818, "step": 42310 }, { "epoch": 0.6099476817087759, "grad_norm": 1.9121652376068887, "learning_rate": 3.961655453056914e-06, "loss": 0.4566, "step": 42320 }, { "epoch": 0.6100918092328091, "grad_norm": 1.7138592695592199, "learning_rate": 3.95919489712408e-06, "loss": 0.4429, "step": 42330 }, { "epoch": 0.6102359367568424, "grad_norm": 1.4776728088288136, "learning_rate": 3.956734604634886e-06, "loss": 0.4515, "step": 42340 }, { "epoch": 0.6103800642808758, "grad_norm": 1.5831287477697518, "learning_rate": 3.954274576212071e-06, "loss": 0.4511, "step": 42350 }, { "epoch": 0.610524191804909, "grad_norm": 1.5725403954068622, "learning_rate": 3.951814812478305e-06, "loss": 0.4894, "step": 42360 }, { "epoch": 0.6106683193289423, "grad_norm": 1.6503495249653024, "learning_rate": 3.949355314056192e-06, "loss": 0.4472, "step": 42370 }, { "epoch": 0.6108124468529755, "grad_norm": 1.5967062101678335, "learning_rate": 3.9468960815682676e-06, "loss": 0.4463, "step": 42380 }, { "epoch": 0.6109565743770088, "grad_norm": 1.441677407190797, "learning_rate": 3.944437115637e-06, "loss": 0.446, "step": 42390 }, { "epoch": 0.611100701901042, "grad_norm": 1.8824550048655526, "learning_rate": 3.941978416884794e-06, "loss": 0.4663, "step": 42400 }, { "epoch": 0.6112448294250753, "grad_norm": 1.5170180507900395, "learning_rate": 3.93951998593398e-06, "loss": 0.433, "step": 42410 }, { "epoch": 0.6113889569491086, "grad_norm": 1.57690886365611, "learning_rate": 3.937061823406827e-06, "loss": 0.4386, "step": 42420 }, { "epoch": 0.6115330844731418, "grad_norm": 1.5369614072391329, "learning_rate": 3.934603929925534e-06, "loss": 0.4458, "step": 42430 }, { "epoch": 0.6116772119971751, "grad_norm": 1.6384884343524062, "learning_rate": 3.9321463061122285e-06, "loss": 0.454, "step": 42440 }, { "epoch": 0.6118213395212083, "grad_norm": 1.6387259555816844, "learning_rate": 3.929688952588974e-06, "loss": 0.4616, "step": 42450 }, { "epoch": 0.6119654670452417, "grad_norm": 1.652557554219597, "learning_rate": 3.927231869977765e-06, "loss": 0.4734, "step": 42460 }, { "epoch": 0.6121095945692749, "grad_norm": 1.8060838594512516, "learning_rate": 3.924775058900526e-06, "loss": 0.4592, "step": 42470 }, { "epoch": 0.6122537220933082, "grad_norm": 1.5552875695310802, "learning_rate": 3.922318519979112e-06, "loss": 0.4545, "step": 42480 }, { "epoch": 0.6123978496173414, "grad_norm": 1.4624978878220478, "learning_rate": 3.919862253835312e-06, "loss": 0.4623, "step": 42490 }, { "epoch": 0.6125419771413747, "grad_norm": 3.2253069369724527, "learning_rate": 3.917406261090846e-06, "loss": 0.4628, "step": 42500 }, { "epoch": 0.612686104665408, "grad_norm": 1.5187564468909127, "learning_rate": 3.914950542367359e-06, "loss": 0.4516, "step": 42510 }, { "epoch": 0.6128302321894412, "grad_norm": 1.2483779086448838, "learning_rate": 3.912495098286434e-06, "loss": 0.4396, "step": 42520 }, { "epoch": 0.6129743597134745, "grad_norm": 1.534886003730892, "learning_rate": 3.910039929469582e-06, "loss": 0.4509, "step": 42530 }, { "epoch": 0.6131184872375077, "grad_norm": 1.4256423261044682, "learning_rate": 3.90758503653824e-06, "loss": 0.4165, "step": 42540 }, { "epoch": 0.613262614761541, "grad_norm": 1.7540247535308362, "learning_rate": 3.905130420113781e-06, "loss": 0.4541, "step": 42550 }, { "epoch": 0.6134067422855742, "grad_norm": 1.5913186143592057, "learning_rate": 3.902676080817507e-06, "loss": 0.4716, "step": 42560 }, { "epoch": 0.6135508698096075, "grad_norm": 1.4058706564502579, "learning_rate": 3.900222019270644e-06, "loss": 0.4463, "step": 42570 }, { "epoch": 0.6136949973336409, "grad_norm": 1.4925740240913783, "learning_rate": 3.89776823609436e-06, "loss": 0.4295, "step": 42580 }, { "epoch": 0.6138391248576741, "grad_norm": 1.5948323072832835, "learning_rate": 3.895314731909739e-06, "loss": 0.4422, "step": 42590 }, { "epoch": 0.6139832523817074, "grad_norm": 1.56662715964143, "learning_rate": 3.892861507337802e-06, "loss": 0.4565, "step": 42600 }, { "epoch": 0.6141273799057406, "grad_norm": 1.7216702833244961, "learning_rate": 3.8904085629994984e-06, "loss": 0.4481, "step": 42610 }, { "epoch": 0.6142715074297739, "grad_norm": 1.5526431479158511, "learning_rate": 3.887955899515704e-06, "loss": 0.4711, "step": 42620 }, { "epoch": 0.6144156349538071, "grad_norm": 1.4598801870860203, "learning_rate": 3.885503517507229e-06, "loss": 0.4431, "step": 42630 }, { "epoch": 0.6145597624778404, "grad_norm": 1.540834258885347, "learning_rate": 3.883051417594804e-06, "loss": 0.4405, "step": 42640 }, { "epoch": 0.6147038900018736, "grad_norm": 1.5825919028568627, "learning_rate": 3.880599600399094e-06, "loss": 0.4567, "step": 42650 }, { "epoch": 0.6148480175259069, "grad_norm": 1.5406391465892428, "learning_rate": 3.878148066540695e-06, "loss": 0.4578, "step": 42660 }, { "epoch": 0.6149921450499402, "grad_norm": 1.488165762924287, "learning_rate": 3.875696816640123e-06, "loss": 0.4535, "step": 42670 }, { "epoch": 0.6151362725739734, "grad_norm": 1.6919200928697216, "learning_rate": 3.873245851317827e-06, "loss": 0.4364, "step": 42680 }, { "epoch": 0.6152804000980067, "grad_norm": 1.33204740365368, "learning_rate": 3.870795171194187e-06, "loss": 0.4403, "step": 42690 }, { "epoch": 0.61542452762204, "grad_norm": 1.546604966855585, "learning_rate": 3.868344776889505e-06, "loss": 0.447, "step": 42700 }, { "epoch": 0.6155686551460733, "grad_norm": 1.639940484409849, "learning_rate": 3.8658946690240165e-06, "loss": 0.4349, "step": 42710 }, { "epoch": 0.6157127826701065, "grad_norm": 1.6596808988923901, "learning_rate": 3.863444848217876e-06, "loss": 0.4485, "step": 42720 }, { "epoch": 0.6158569101941398, "grad_norm": 1.5184018390254717, "learning_rate": 3.860995315091174e-06, "loss": 0.449, "step": 42730 }, { "epoch": 0.616001037718173, "grad_norm": 1.6340159351849828, "learning_rate": 3.8585460702639245e-06, "loss": 0.4409, "step": 42740 }, { "epoch": 0.6161451652422063, "grad_norm": 1.6463819976642655, "learning_rate": 3.856097114356067e-06, "loss": 0.4753, "step": 42750 }, { "epoch": 0.6162892927662396, "grad_norm": 1.7788234926762054, "learning_rate": 3.85364844798747e-06, "loss": 0.4469, "step": 42760 }, { "epoch": 0.6164334202902728, "grad_norm": 1.5110566708983222, "learning_rate": 3.85120007177793e-06, "loss": 0.4627, "step": 42770 }, { "epoch": 0.6165775478143061, "grad_norm": 1.5112705976152732, "learning_rate": 3.848751986347167e-06, "loss": 0.4287, "step": 42780 }, { "epoch": 0.6167216753383393, "grad_norm": 1.4939562575522818, "learning_rate": 3.846304192314827e-06, "loss": 0.4092, "step": 42790 }, { "epoch": 0.6168658028623726, "grad_norm": 1.616636437054175, "learning_rate": 3.8438566903004874e-06, "loss": 0.4516, "step": 42800 }, { "epoch": 0.617009930386406, "grad_norm": 1.3984652446342274, "learning_rate": 3.841409480923648e-06, "loss": 0.4364, "step": 42810 }, { "epoch": 0.6171540579104392, "grad_norm": 1.5105681423757134, "learning_rate": 3.838962564803732e-06, "loss": 0.4418, "step": 42820 }, { "epoch": 0.6172981854344725, "grad_norm": 1.4730332881774002, "learning_rate": 3.836515942560094e-06, "loss": 0.4584, "step": 42830 }, { "epoch": 0.6174423129585057, "grad_norm": 1.5740619179471296, "learning_rate": 3.834069614812009e-06, "loss": 0.4781, "step": 42840 }, { "epoch": 0.617586440482539, "grad_norm": 1.6533405300485489, "learning_rate": 3.831623582178682e-06, "loss": 0.4324, "step": 42850 }, { "epoch": 0.6177305680065722, "grad_norm": 1.3589548720675773, "learning_rate": 3.829177845279239e-06, "loss": 0.4419, "step": 42860 }, { "epoch": 0.6178746955306055, "grad_norm": 1.299415448088216, "learning_rate": 3.826732404732736e-06, "loss": 0.4414, "step": 42870 }, { "epoch": 0.6180188230546387, "grad_norm": 1.5024777714910287, "learning_rate": 3.82428726115815e-06, "loss": 0.4541, "step": 42880 }, { "epoch": 0.618162950578672, "grad_norm": 1.2506140190506305, "learning_rate": 3.821842415174382e-06, "loss": 0.4291, "step": 42890 }, { "epoch": 0.6183070781027052, "grad_norm": 1.6373143313667684, "learning_rate": 3.819397867400261e-06, "loss": 0.4503, "step": 42900 }, { "epoch": 0.6184512056267385, "grad_norm": 1.4415627736193752, "learning_rate": 3.816953618454542e-06, "loss": 0.4488, "step": 42910 }, { "epoch": 0.6185953331507718, "grad_norm": 1.7061960767873638, "learning_rate": 3.8145096689558995e-06, "loss": 0.4577, "step": 42920 }, { "epoch": 0.6187394606748051, "grad_norm": 1.5048773622943765, "learning_rate": 3.8120660195229344e-06, "loss": 0.4414, "step": 42930 }, { "epoch": 0.6188835881988384, "grad_norm": 1.752834342202424, "learning_rate": 3.8096226707741722e-06, "loss": 0.4437, "step": 42940 }, { "epoch": 0.6190277157228716, "grad_norm": 1.6976179381311938, "learning_rate": 3.8071796233280606e-06, "loss": 0.4383, "step": 42950 }, { "epoch": 0.6191718432469049, "grad_norm": 1.3375842598645864, "learning_rate": 3.8047368778029724e-06, "loss": 0.4746, "step": 42960 }, { "epoch": 0.6193159707709381, "grad_norm": 2.056392389165613, "learning_rate": 3.8022944348172063e-06, "loss": 0.4806, "step": 42970 }, { "epoch": 0.6194600982949714, "grad_norm": 1.6271186664876527, "learning_rate": 3.7998522949889777e-06, "loss": 0.4277, "step": 42980 }, { "epoch": 0.6196042258190047, "grad_norm": 1.3740970507279924, "learning_rate": 3.797410458936432e-06, "loss": 0.4291, "step": 42990 }, { "epoch": 0.6197483533430379, "grad_norm": 1.7390759928127733, "learning_rate": 3.794968927277636e-06, "loss": 0.4601, "step": 43000 }, { "epoch": 0.6198924808670712, "grad_norm": 1.6848300787747654, "learning_rate": 3.7925277006305726e-06, "loss": 0.4786, "step": 43010 }, { "epoch": 0.6200366083911044, "grad_norm": 1.680624414500806, "learning_rate": 3.7900867796131624e-06, "loss": 0.4488, "step": 43020 }, { "epoch": 0.6201807359151377, "grad_norm": 1.3213490062222504, "learning_rate": 3.7876461648432337e-06, "loss": 0.4135, "step": 43030 }, { "epoch": 0.6203248634391709, "grad_norm": 3.160706531468403, "learning_rate": 3.785205856938546e-06, "loss": 0.4283, "step": 43040 }, { "epoch": 0.6204689909632043, "grad_norm": 1.5636638356875685, "learning_rate": 3.782765856516778e-06, "loss": 0.464, "step": 43050 }, { "epoch": 0.6206131184872375, "grad_norm": 1.6822200700617262, "learning_rate": 3.780326164195529e-06, "loss": 0.4249, "step": 43060 }, { "epoch": 0.6207572460112708, "grad_norm": 1.519183075727348, "learning_rate": 3.777886780592325e-06, "loss": 0.4729, "step": 43070 }, { "epoch": 0.6209013735353041, "grad_norm": 1.5463132034998064, "learning_rate": 3.7754477063246096e-06, "loss": 0.4479, "step": 43080 }, { "epoch": 0.6210455010593373, "grad_norm": 1.6550096831209293, "learning_rate": 3.7730089420097495e-06, "loss": 0.4545, "step": 43090 }, { "epoch": 0.6211896285833706, "grad_norm": 1.4446848522406854, "learning_rate": 3.770570488265034e-06, "loss": 0.4724, "step": 43100 }, { "epoch": 0.6213337561074038, "grad_norm": 1.4026518329489177, "learning_rate": 3.7681323457076735e-06, "loss": 0.425, "step": 43110 }, { "epoch": 0.6214778836314371, "grad_norm": 1.4193897334371417, "learning_rate": 3.765694514954796e-06, "loss": 0.4211, "step": 43120 }, { "epoch": 0.6216220111554703, "grad_norm": 1.5912279460046428, "learning_rate": 3.763256996623457e-06, "loss": 0.4485, "step": 43130 }, { "epoch": 0.6217661386795036, "grad_norm": 1.4316770922665953, "learning_rate": 3.760819791330628e-06, "loss": 0.4245, "step": 43140 }, { "epoch": 0.6219102662035368, "grad_norm": 1.7271493702308753, "learning_rate": 3.7583828996932043e-06, "loss": 0.4719, "step": 43150 }, { "epoch": 0.6220543937275702, "grad_norm": 1.8156852369560068, "learning_rate": 3.755946322327998e-06, "loss": 0.4485, "step": 43160 }, { "epoch": 0.6221985212516035, "grad_norm": 1.6031467632204515, "learning_rate": 3.7535100598517453e-06, "loss": 0.461, "step": 43170 }, { "epoch": 0.6223426487756367, "grad_norm": 1.5530095316867738, "learning_rate": 3.751074112881103e-06, "loss": 0.4425, "step": 43180 }, { "epoch": 0.62248677629967, "grad_norm": 3.607583152809227, "learning_rate": 3.7486384820326426e-06, "loss": 0.4318, "step": 43190 }, { "epoch": 0.6226309038237032, "grad_norm": 1.7461577635159826, "learning_rate": 3.7462031679228623e-06, "loss": 0.4544, "step": 43200 }, { "epoch": 0.6227750313477365, "grad_norm": 1.330984964409724, "learning_rate": 3.7437681711681774e-06, "loss": 0.4514, "step": 43210 }, { "epoch": 0.6229191588717697, "grad_norm": 1.5338599656103469, "learning_rate": 3.7413334923849197e-06, "loss": 0.4526, "step": 43220 }, { "epoch": 0.623063286395803, "grad_norm": 1.5581538564216695, "learning_rate": 3.7388991321893455e-06, "loss": 0.4401, "step": 43230 }, { "epoch": 0.6232074139198363, "grad_norm": 1.6450920819791046, "learning_rate": 3.7364650911976287e-06, "loss": 0.4506, "step": 43240 }, { "epoch": 0.6233515414438695, "grad_norm": 1.625295243184275, "learning_rate": 3.7340313700258635e-06, "loss": 0.4902, "step": 43250 }, { "epoch": 0.6234956689679028, "grad_norm": 1.5498339093435347, "learning_rate": 3.7315979692900594e-06, "loss": 0.4338, "step": 43260 }, { "epoch": 0.623639796491936, "grad_norm": 1.7280005259654636, "learning_rate": 3.7291648896061484e-06, "loss": 0.4633, "step": 43270 }, { "epoch": 0.6237839240159694, "grad_norm": 1.4702223388340876, "learning_rate": 3.726732131589981e-06, "loss": 0.4251, "step": 43280 }, { "epoch": 0.6239280515400026, "grad_norm": 1.7210971810995115, "learning_rate": 3.7242996958573218e-06, "loss": 0.4542, "step": 43290 }, { "epoch": 0.6240721790640359, "grad_norm": 1.6394004699471125, "learning_rate": 3.721867583023861e-06, "loss": 0.4349, "step": 43300 }, { "epoch": 0.6242163065880691, "grad_norm": 1.342400623924549, "learning_rate": 3.719435793705203e-06, "loss": 0.4585, "step": 43310 }, { "epoch": 0.6243604341121024, "grad_norm": 1.5079023751452856, "learning_rate": 3.717004328516869e-06, "loss": 0.4509, "step": 43320 }, { "epoch": 0.6245045616361357, "grad_norm": 1.6647461683871443, "learning_rate": 3.7145731880743e-06, "loss": 0.464, "step": 43330 }, { "epoch": 0.6246486891601689, "grad_norm": 1.2805427673147847, "learning_rate": 3.712142372992855e-06, "loss": 0.4382, "step": 43340 }, { "epoch": 0.6247928166842022, "grad_norm": 1.8250202193614682, "learning_rate": 3.709711883887812e-06, "loss": 0.4742, "step": 43350 }, { "epoch": 0.6249369442082354, "grad_norm": 1.6060885522251551, "learning_rate": 3.707281721374365e-06, "loss": 0.4533, "step": 43360 }, { "epoch": 0.6250810717322687, "grad_norm": 1.4569027798662815, "learning_rate": 3.704851886067623e-06, "loss": 0.4688, "step": 43370 }, { "epoch": 0.6252251992563019, "grad_norm": 1.2896973418769817, "learning_rate": 3.702422378582615e-06, "loss": 0.4479, "step": 43380 }, { "epoch": 0.6253693267803352, "grad_norm": 1.484618523574335, "learning_rate": 3.699993199534288e-06, "loss": 0.4601, "step": 43390 }, { "epoch": 0.6255134543043686, "grad_norm": 1.4915296585029867, "learning_rate": 3.6975643495375014e-06, "loss": 0.4633, "step": 43400 }, { "epoch": 0.6256575818284018, "grad_norm": 1.9916604973356666, "learning_rate": 3.695135829207036e-06, "loss": 0.4119, "step": 43410 }, { "epoch": 0.6258017093524351, "grad_norm": 1.2490995049397202, "learning_rate": 3.692707639157589e-06, "loss": 0.4501, "step": 43420 }, { "epoch": 0.6259458368764683, "grad_norm": 1.6968221051187642, "learning_rate": 3.690279780003768e-06, "loss": 0.4476, "step": 43430 }, { "epoch": 0.6260899644005016, "grad_norm": 1.4811845091531608, "learning_rate": 3.687852252360105e-06, "loss": 0.4699, "step": 43440 }, { "epoch": 0.6262340919245348, "grad_norm": 1.5595410045396112, "learning_rate": 3.68542505684104e-06, "loss": 0.428, "step": 43450 }, { "epoch": 0.6263782194485681, "grad_norm": 1.3690841374027813, "learning_rate": 3.6829981940609382e-06, "loss": 0.4202, "step": 43460 }, { "epoch": 0.6265223469726013, "grad_norm": 1.8190820907140814, "learning_rate": 3.6805716646340733e-06, "loss": 0.4723, "step": 43470 }, { "epoch": 0.6266664744966346, "grad_norm": 1.6200937612418596, "learning_rate": 3.6781454691746367e-06, "loss": 0.4713, "step": 43480 }, { "epoch": 0.6268106020206679, "grad_norm": 1.4504369552593774, "learning_rate": 3.675719608296736e-06, "loss": 0.4568, "step": 43490 }, { "epoch": 0.6269547295447011, "grad_norm": 1.5125551463352036, "learning_rate": 3.6732940826143936e-06, "loss": 0.4287, "step": 43500 }, { "epoch": 0.6270988570687344, "grad_norm": 1.7204926473091682, "learning_rate": 3.6708688927415457e-06, "loss": 0.4563, "step": 43510 }, { "epoch": 0.6272429845927677, "grad_norm": 1.438708424205809, "learning_rate": 3.6684440392920473e-06, "loss": 0.4503, "step": 43520 }, { "epoch": 0.627387112116801, "grad_norm": 1.564008428819224, "learning_rate": 3.6660195228796636e-06, "loss": 0.444, "step": 43530 }, { "epoch": 0.6275312396408342, "grad_norm": 1.7707272740898163, "learning_rate": 3.6635953441180764e-06, "loss": 0.4375, "step": 43540 }, { "epoch": 0.6276753671648675, "grad_norm": 1.720643309837536, "learning_rate": 3.661171503620885e-06, "loss": 0.4463, "step": 43550 }, { "epoch": 0.6278194946889007, "grad_norm": 1.4918476439191242, "learning_rate": 3.6587480020015963e-06, "loss": 0.4287, "step": 43560 }, { "epoch": 0.627963622212934, "grad_norm": 1.7641251666858555, "learning_rate": 3.6563248398736396e-06, "loss": 0.4584, "step": 43570 }, { "epoch": 0.6281077497369673, "grad_norm": 1.7761620379728835, "learning_rate": 3.653902017850352e-06, "loss": 0.4539, "step": 43580 }, { "epoch": 0.6282518772610005, "grad_norm": 1.4949268884386695, "learning_rate": 3.6514795365449886e-06, "loss": 0.4527, "step": 43590 }, { "epoch": 0.6283960047850338, "grad_norm": 1.5124737054068775, "learning_rate": 3.6490573965707134e-06, "loss": 0.4336, "step": 43600 }, { "epoch": 0.628540132309067, "grad_norm": 1.5114615698255716, "learning_rate": 3.6466355985406087e-06, "loss": 0.4512, "step": 43610 }, { "epoch": 0.6286842598331003, "grad_norm": 1.6406873615783946, "learning_rate": 3.6442141430676694e-06, "loss": 0.4391, "step": 43620 }, { "epoch": 0.6288283873571336, "grad_norm": 1.682933996935155, "learning_rate": 3.6417930307648004e-06, "loss": 0.4641, "step": 43630 }, { "epoch": 0.6289725148811669, "grad_norm": 1.3813132094239045, "learning_rate": 3.6393722622448237e-06, "loss": 0.4058, "step": 43640 }, { "epoch": 0.6291166424052002, "grad_norm": 1.7836674661180503, "learning_rate": 3.636951838120473e-06, "loss": 0.4452, "step": 43650 }, { "epoch": 0.6292607699292334, "grad_norm": 1.3865632643963943, "learning_rate": 3.6345317590043938e-06, "loss": 0.4374, "step": 43660 }, { "epoch": 0.6294048974532667, "grad_norm": 1.3591609235728912, "learning_rate": 3.6321120255091437e-06, "loss": 0.4471, "step": 43670 }, { "epoch": 0.6295490249772999, "grad_norm": 1.4587014340668127, "learning_rate": 3.6296926382471963e-06, "loss": 0.4422, "step": 43680 }, { "epoch": 0.6296931525013332, "grad_norm": 1.4955342008174386, "learning_rate": 3.627273597830935e-06, "loss": 0.4655, "step": 43690 }, { "epoch": 0.6298372800253664, "grad_norm": 1.58225961927405, "learning_rate": 3.6248549048726567e-06, "loss": 0.4601, "step": 43700 }, { "epoch": 0.6299814075493997, "grad_norm": 1.7405958037422302, "learning_rate": 3.622436559984568e-06, "loss": 0.4543, "step": 43710 }, { "epoch": 0.6301255350734329, "grad_norm": 1.593477973521786, "learning_rate": 3.620018563778788e-06, "loss": 0.4407, "step": 43720 }, { "epoch": 0.6302696625974662, "grad_norm": 1.4341664884074181, "learning_rate": 3.6176009168673514e-06, "loss": 0.4253, "step": 43730 }, { "epoch": 0.6304137901214995, "grad_norm": 1.4602440476874112, "learning_rate": 3.6151836198621983e-06, "loss": 0.4528, "step": 43740 }, { "epoch": 0.6305579176455328, "grad_norm": 1.679818199851561, "learning_rate": 3.6127666733751853e-06, "loss": 0.4265, "step": 43750 }, { "epoch": 0.6307020451695661, "grad_norm": 1.724448357074446, "learning_rate": 3.610350078018079e-06, "loss": 0.4287, "step": 43760 }, { "epoch": 0.6308461726935993, "grad_norm": 1.5308162966533416, "learning_rate": 3.607933834402554e-06, "loss": 0.4111, "step": 43770 }, { "epoch": 0.6309903002176326, "grad_norm": 1.455158044171446, "learning_rate": 3.6055179431401988e-06, "loss": 0.4435, "step": 43780 }, { "epoch": 0.6311344277416658, "grad_norm": 1.4892604293917628, "learning_rate": 3.6031024048425146e-06, "loss": 0.4422, "step": 43790 }, { "epoch": 0.6312785552656991, "grad_norm": 1.5004440689088334, "learning_rate": 3.600687220120911e-06, "loss": 0.4635, "step": 43800 }, { "epoch": 0.6314226827897323, "grad_norm": 1.7098058141444972, "learning_rate": 3.598272389586705e-06, "loss": 0.4897, "step": 43810 }, { "epoch": 0.6315668103137656, "grad_norm": 1.3957289224309424, "learning_rate": 3.5958579138511295e-06, "loss": 0.4513, "step": 43820 }, { "epoch": 0.6317109378377989, "grad_norm": 1.6325371497129013, "learning_rate": 3.5934437935253253e-06, "loss": 0.459, "step": 43830 }, { "epoch": 0.6318550653618321, "grad_norm": 1.9711180182037478, "learning_rate": 3.5910300292203416e-06, "loss": 0.44, "step": 43840 }, { "epoch": 0.6319991928858654, "grad_norm": 1.7454762351804476, "learning_rate": 3.588616621547139e-06, "loss": 0.4571, "step": 43850 }, { "epoch": 0.6321433204098986, "grad_norm": 1.4941447910449466, "learning_rate": 3.58620357111659e-06, "loss": 0.4421, "step": 43860 }, { "epoch": 0.632287447933932, "grad_norm": 1.3909393384118636, "learning_rate": 3.5837908785394715e-06, "loss": 0.4389, "step": 43870 }, { "epoch": 0.6324315754579652, "grad_norm": 1.2549077501628672, "learning_rate": 3.5813785444264737e-06, "loss": 0.4568, "step": 43880 }, { "epoch": 0.6325757029819985, "grad_norm": 1.448301899111112, "learning_rate": 3.578966569388197e-06, "loss": 0.4273, "step": 43890 }, { "epoch": 0.6327198305060318, "grad_norm": 1.769634632099174, "learning_rate": 3.576554954035145e-06, "loss": 0.4317, "step": 43900 }, { "epoch": 0.632863958030065, "grad_norm": 1.6853987062966014, "learning_rate": 3.5741436989777387e-06, "loss": 0.4417, "step": 43910 }, { "epoch": 0.6330080855540983, "grad_norm": 3.19066086405174, "learning_rate": 3.571732804826302e-06, "loss": 0.4717, "step": 43920 }, { "epoch": 0.6331522130781315, "grad_norm": 1.5544778212206705, "learning_rate": 3.5693222721910692e-06, "loss": 0.4363, "step": 43930 }, { "epoch": 0.6332963406021648, "grad_norm": 1.6468769952895674, "learning_rate": 3.5669121016821807e-06, "loss": 0.4457, "step": 43940 }, { "epoch": 0.633440468126198, "grad_norm": 1.661972451740062, "learning_rate": 3.5645022939096898e-06, "loss": 0.4828, "step": 43950 }, { "epoch": 0.6335845956502313, "grad_norm": 1.4761604907106347, "learning_rate": 3.5620928494835562e-06, "loss": 0.4227, "step": 43960 }, { "epoch": 0.6337287231742645, "grad_norm": 1.5610019742258592, "learning_rate": 3.559683769013644e-06, "loss": 0.44, "step": 43970 }, { "epoch": 0.6338728506982979, "grad_norm": 1.6614952352828694, "learning_rate": 3.55727505310973e-06, "loss": 0.4568, "step": 43980 }, { "epoch": 0.6340169782223312, "grad_norm": 1.50543234227529, "learning_rate": 3.5548667023814975e-06, "loss": 0.4485, "step": 43990 }, { "epoch": 0.6341611057463644, "grad_norm": 1.6000232197531918, "learning_rate": 3.552458717438536e-06, "loss": 0.4084, "step": 44000 }, { "epoch": 0.6343052332703977, "grad_norm": 1.5184125868482272, "learning_rate": 3.5500510988903413e-06, "loss": 0.4445, "step": 44010 }, { "epoch": 0.6344493607944309, "grad_norm": 1.5886319981472852, "learning_rate": 3.5476438473463213e-06, "loss": 0.4468, "step": 44020 }, { "epoch": 0.6345934883184642, "grad_norm": 1.4060470937508405, "learning_rate": 3.545236963415788e-06, "loss": 0.4263, "step": 44030 }, { "epoch": 0.6347376158424974, "grad_norm": 1.3387821773870416, "learning_rate": 3.54283044770796e-06, "loss": 0.445, "step": 44040 }, { "epoch": 0.6348817433665307, "grad_norm": 1.686039638929588, "learning_rate": 3.5404243008319618e-06, "loss": 0.4334, "step": 44050 }, { "epoch": 0.635025870890564, "grad_norm": 1.5555462696303186, "learning_rate": 3.5380185233968267e-06, "loss": 0.4391, "step": 44060 }, { "epoch": 0.6351699984145972, "grad_norm": 1.4874023657361364, "learning_rate": 3.535613116011495e-06, "loss": 0.4529, "step": 44070 }, { "epoch": 0.6353141259386305, "grad_norm": 1.5261540081864942, "learning_rate": 3.533208079284809e-06, "loss": 0.4559, "step": 44080 }, { "epoch": 0.6354582534626637, "grad_norm": 1.5634047640703712, "learning_rate": 3.530803413825523e-06, "loss": 0.4528, "step": 44090 }, { "epoch": 0.6356023809866971, "grad_norm": 1.9726309758675118, "learning_rate": 3.5283991202422922e-06, "loss": 0.4671, "step": 44100 }, { "epoch": 0.6357465085107303, "grad_norm": 1.5339552229466162, "learning_rate": 3.525995199143681e-06, "loss": 0.4399, "step": 44110 }, { "epoch": 0.6358906360347636, "grad_norm": 1.5743634883343784, "learning_rate": 3.5235916511381578e-06, "loss": 0.4784, "step": 44120 }, { "epoch": 0.6360347635587968, "grad_norm": 1.5463561010040716, "learning_rate": 3.5211884768340987e-06, "loss": 0.427, "step": 44130 }, { "epoch": 0.6361788910828301, "grad_norm": 1.7837756201608417, "learning_rate": 3.5187856768397843e-06, "loss": 0.4694, "step": 44140 }, { "epoch": 0.6363230186068634, "grad_norm": 1.72458484778355, "learning_rate": 3.5163832517633976e-06, "loss": 0.4443, "step": 44150 }, { "epoch": 0.6364671461308966, "grad_norm": 1.722623804334363, "learning_rate": 3.5139812022130304e-06, "loss": 0.4535, "step": 44160 }, { "epoch": 0.6366112736549299, "grad_norm": 1.5388642586301204, "learning_rate": 3.5115795287966788e-06, "loss": 0.4462, "step": 44170 }, { "epoch": 0.6367554011789631, "grad_norm": 1.3231375998655337, "learning_rate": 3.50917823212224e-06, "loss": 0.4435, "step": 44180 }, { "epoch": 0.6368995287029964, "grad_norm": 1.4088919851363293, "learning_rate": 3.5067773127975212e-06, "loss": 0.4627, "step": 44190 }, { "epoch": 0.6370436562270296, "grad_norm": 1.7794561205814652, "learning_rate": 3.504376771430232e-06, "loss": 0.4395, "step": 44200 }, { "epoch": 0.6371877837510629, "grad_norm": 1.4982833578576942, "learning_rate": 3.501976608627985e-06, "loss": 0.4066, "step": 44210 }, { "epoch": 0.6373319112750963, "grad_norm": 1.6744307701680428, "learning_rate": 3.4995768249982975e-06, "loss": 0.4725, "step": 44220 }, { "epoch": 0.6374760387991295, "grad_norm": 1.6294005611571252, "learning_rate": 3.497177421148591e-06, "loss": 0.4634, "step": 44230 }, { "epoch": 0.6376201663231628, "grad_norm": 1.149411321580332, "learning_rate": 3.4947783976861944e-06, "loss": 0.4696, "step": 44240 }, { "epoch": 0.637764293847196, "grad_norm": 1.950475166538651, "learning_rate": 3.4923797552183335e-06, "loss": 0.4408, "step": 44250 }, { "epoch": 0.6379084213712293, "grad_norm": 1.6461277416015583, "learning_rate": 3.489981494352144e-06, "loss": 0.4319, "step": 44260 }, { "epoch": 0.6380525488952625, "grad_norm": 1.5894243199154994, "learning_rate": 3.487583615694661e-06, "loss": 0.4586, "step": 44270 }, { "epoch": 0.6381966764192958, "grad_norm": 1.4777139673137958, "learning_rate": 3.485186119852823e-06, "loss": 0.441, "step": 44280 }, { "epoch": 0.638340803943329, "grad_norm": 1.5115999655766166, "learning_rate": 3.482789007433474e-06, "loss": 0.4467, "step": 44290 }, { "epoch": 0.6384849314673623, "grad_norm": 1.7412546403320244, "learning_rate": 3.4803922790433607e-06, "loss": 0.4417, "step": 44300 }, { "epoch": 0.6386290589913955, "grad_norm": 1.4595009232053229, "learning_rate": 3.47799593528913e-06, "loss": 0.4661, "step": 44310 }, { "epoch": 0.6387731865154288, "grad_norm": 1.5524215562379504, "learning_rate": 3.475599976777333e-06, "loss": 0.4406, "step": 44320 }, { "epoch": 0.6389173140394622, "grad_norm": 1.5975261067729256, "learning_rate": 3.473204404114425e-06, "loss": 0.444, "step": 44330 }, { "epoch": 0.6390614415634954, "grad_norm": 1.7466692240084363, "learning_rate": 3.470809217906758e-06, "loss": 0.4403, "step": 44340 }, { "epoch": 0.6392055690875287, "grad_norm": 1.5641178068103851, "learning_rate": 3.4684144187605963e-06, "loss": 0.4337, "step": 44350 }, { "epoch": 0.6393496966115619, "grad_norm": 1.5589985815358336, "learning_rate": 3.4660200072820948e-06, "loss": 0.4558, "step": 44360 }, { "epoch": 0.6394938241355952, "grad_norm": 1.7569223516735273, "learning_rate": 3.463625984077318e-06, "loss": 0.4436, "step": 44370 }, { "epoch": 0.6396379516596284, "grad_norm": 1.4298407032889422, "learning_rate": 3.4612323497522304e-06, "loss": 0.4372, "step": 44380 }, { "epoch": 0.6397820791836617, "grad_norm": 1.515643787725272, "learning_rate": 3.458839104912695e-06, "loss": 0.4519, "step": 44390 }, { "epoch": 0.639926206707695, "grad_norm": 1.6601045048002894, "learning_rate": 3.4564462501644814e-06, "loss": 0.4428, "step": 44400 }, { "epoch": 0.6400703342317282, "grad_norm": 1.5672438017933061, "learning_rate": 3.4540537861132538e-06, "loss": 0.4387, "step": 44410 }, { "epoch": 0.6402144617557615, "grad_norm": 1.5738867782995682, "learning_rate": 3.4516617133645836e-06, "loss": 0.4464, "step": 44420 }, { "epoch": 0.6403585892797947, "grad_norm": 1.513699425020062, "learning_rate": 3.4492700325239427e-06, "loss": 0.4586, "step": 44430 }, { "epoch": 0.640502716803828, "grad_norm": 1.3019457241689982, "learning_rate": 3.446878744196698e-06, "loss": 0.4329, "step": 44440 }, { "epoch": 0.6406468443278613, "grad_norm": 1.6118320796091592, "learning_rate": 3.444487848988122e-06, "loss": 0.4536, "step": 44450 }, { "epoch": 0.6407909718518946, "grad_norm": 1.355880007986958, "learning_rate": 3.4420973475033894e-06, "loss": 0.4392, "step": 44460 }, { "epoch": 0.6409350993759279, "grad_norm": 1.6547711313631972, "learning_rate": 3.4397072403475705e-06, "loss": 0.422, "step": 44470 }, { "epoch": 0.6410792268999611, "grad_norm": 1.3197315652265262, "learning_rate": 3.4373175281256397e-06, "loss": 0.4406, "step": 44480 }, { "epoch": 0.6412233544239944, "grad_norm": 1.477831180650535, "learning_rate": 3.434928211442467e-06, "loss": 0.435, "step": 44490 }, { "epoch": 0.6413674819480276, "grad_norm": 1.6093786129768501, "learning_rate": 3.432539290902826e-06, "loss": 0.435, "step": 44500 }, { "epoch": 0.6415116094720609, "grad_norm": 1.4677199982576599, "learning_rate": 3.43015076711139e-06, "loss": 0.4578, "step": 44510 }, { "epoch": 0.6416557369960941, "grad_norm": 1.5389395511919486, "learning_rate": 3.4277626406727293e-06, "loss": 0.459, "step": 44520 }, { "epoch": 0.6417998645201274, "grad_norm": 1.514942812647361, "learning_rate": 3.425374912191315e-06, "loss": 0.4551, "step": 44530 }, { "epoch": 0.6419439920441606, "grad_norm": 2.5347436089176734, "learning_rate": 3.4229875822715195e-06, "loss": 0.4619, "step": 44540 }, { "epoch": 0.6420881195681939, "grad_norm": 1.610476463846873, "learning_rate": 3.42060065151761e-06, "loss": 0.4582, "step": 44550 }, { "epoch": 0.6422322470922271, "grad_norm": 1.8256745071419545, "learning_rate": 3.418214120533755e-06, "loss": 0.452, "step": 44560 }, { "epoch": 0.6423763746162605, "grad_norm": 1.546589192032217, "learning_rate": 3.415827989924024e-06, "loss": 0.4429, "step": 44570 }, { "epoch": 0.6425205021402938, "grad_norm": 1.4100014712191344, "learning_rate": 3.4134422602923833e-06, "loss": 0.4448, "step": 44580 }, { "epoch": 0.642664629664327, "grad_norm": 1.3691806073462098, "learning_rate": 3.4110569322426956e-06, "loss": 0.4517, "step": 44590 }, { "epoch": 0.6428087571883603, "grad_norm": 1.4217429796525543, "learning_rate": 3.4086720063787236e-06, "loss": 0.4352, "step": 44600 }, { "epoch": 0.6429528847123935, "grad_norm": 1.6183842430125146, "learning_rate": 3.406287483304131e-06, "loss": 0.4842, "step": 44610 }, { "epoch": 0.6430970122364268, "grad_norm": 1.7373909054331196, "learning_rate": 3.4039033636224737e-06, "loss": 0.4474, "step": 44620 }, { "epoch": 0.64324113976046, "grad_norm": 1.6908080467913587, "learning_rate": 3.4015196479372104e-06, "loss": 0.4276, "step": 44630 }, { "epoch": 0.6433852672844933, "grad_norm": 1.8459516279926649, "learning_rate": 3.3991363368516966e-06, "loss": 0.4525, "step": 44640 }, { "epoch": 0.6435293948085266, "grad_norm": 1.6763400281929564, "learning_rate": 3.396753430969183e-06, "loss": 0.4591, "step": 44650 }, { "epoch": 0.6436735223325598, "grad_norm": 1.5778963835894413, "learning_rate": 3.3943709308928196e-06, "loss": 0.4385, "step": 44660 }, { "epoch": 0.6438176498565931, "grad_norm": 1.5598659251295892, "learning_rate": 3.3919888372256533e-06, "loss": 0.4217, "step": 44670 }, { "epoch": 0.6439617773806264, "grad_norm": 1.3465809146400278, "learning_rate": 3.38960715057063e-06, "loss": 0.4646, "step": 44680 }, { "epoch": 0.6441059049046597, "grad_norm": 1.5005032415116843, "learning_rate": 3.3872258715305915e-06, "loss": 0.4261, "step": 44690 }, { "epoch": 0.6442500324286929, "grad_norm": 1.622058110845444, "learning_rate": 3.384845000708273e-06, "loss": 0.4698, "step": 44700 }, { "epoch": 0.6443941599527262, "grad_norm": 1.5033857593210687, "learning_rate": 3.3824645387063117e-06, "loss": 0.4517, "step": 44710 }, { "epoch": 0.6445382874767595, "grad_norm": 1.5958385134907223, "learning_rate": 3.3800844861272365e-06, "loss": 0.4568, "step": 44720 }, { "epoch": 0.6446824150007927, "grad_norm": 1.4980129093088657, "learning_rate": 3.377704843573476e-06, "loss": 0.4164, "step": 44730 }, { "epoch": 0.644826542524826, "grad_norm": 1.528617308553592, "learning_rate": 3.3753256116473555e-06, "loss": 0.4489, "step": 44740 }, { "epoch": 0.6449706700488592, "grad_norm": 1.7971673241984718, "learning_rate": 3.3729467909510914e-06, "loss": 0.4405, "step": 44750 }, { "epoch": 0.6451147975728925, "grad_norm": 1.691627420712971, "learning_rate": 3.370568382086803e-06, "loss": 0.4418, "step": 44760 }, { "epoch": 0.6452589250969257, "grad_norm": 1.588000143656118, "learning_rate": 3.3681903856565013e-06, "loss": 0.4579, "step": 44770 }, { "epoch": 0.645403052620959, "grad_norm": 1.5442342493266048, "learning_rate": 3.3658128022620895e-06, "loss": 0.4726, "step": 44780 }, { "epoch": 0.6455471801449922, "grad_norm": 1.3496434715367471, "learning_rate": 3.363435632505376e-06, "loss": 0.4159, "step": 44790 }, { "epoch": 0.6456913076690256, "grad_norm": 1.5215761756885844, "learning_rate": 3.361058876988055e-06, "loss": 0.4426, "step": 44800 }, { "epoch": 0.6458354351930589, "grad_norm": 1.7174380941438043, "learning_rate": 3.3586825363117207e-06, "loss": 0.443, "step": 44810 }, { "epoch": 0.6459795627170921, "grad_norm": 1.5205918912298009, "learning_rate": 3.3563066110778626e-06, "loss": 0.4385, "step": 44820 }, { "epoch": 0.6461236902411254, "grad_norm": 1.4227650768247948, "learning_rate": 3.3539311018878606e-06, "loss": 0.4414, "step": 44830 }, { "epoch": 0.6462678177651586, "grad_norm": 1.5508547227137481, "learning_rate": 3.351556009342994e-06, "loss": 0.4533, "step": 44840 }, { "epoch": 0.6464119452891919, "grad_norm": 1.6528690912589938, "learning_rate": 3.3491813340444357e-06, "loss": 0.4544, "step": 44850 }, { "epoch": 0.6465560728132251, "grad_norm": 1.477130403524539, "learning_rate": 3.3468070765932497e-06, "loss": 0.4152, "step": 44860 }, { "epoch": 0.6467002003372584, "grad_norm": 1.5978804953280343, "learning_rate": 3.3444332375903988e-06, "loss": 0.4793, "step": 44870 }, { "epoch": 0.6468443278612916, "grad_norm": 1.6817209875413084, "learning_rate": 3.342059817636738e-06, "loss": 0.4652, "step": 44880 }, { "epoch": 0.6469884553853249, "grad_norm": 1.4923657213102859, "learning_rate": 3.339686817333014e-06, "loss": 0.4125, "step": 44890 }, { "epoch": 0.6471325829093582, "grad_norm": 1.8404084666181384, "learning_rate": 3.337314237279871e-06, "loss": 0.4538, "step": 44900 }, { "epoch": 0.6472767104333914, "grad_norm": 1.6608538436329356, "learning_rate": 3.3349420780778453e-06, "loss": 0.4422, "step": 44910 }, { "epoch": 0.6474208379574248, "grad_norm": 1.4946296660587302, "learning_rate": 3.332570340327367e-06, "loss": 0.4513, "step": 44920 }, { "epoch": 0.647564965481458, "grad_norm": 1.6784011013640512, "learning_rate": 3.3301990246287574e-06, "loss": 0.4523, "step": 44930 }, { "epoch": 0.6477090930054913, "grad_norm": 1.523942633196496, "learning_rate": 3.3278281315822337e-06, "loss": 0.4387, "step": 44940 }, { "epoch": 0.6478532205295245, "grad_norm": 1.7461498353456113, "learning_rate": 3.325457661787906e-06, "loss": 0.4168, "step": 44950 }, { "epoch": 0.6479973480535578, "grad_norm": 1.6138323112122575, "learning_rate": 3.3230876158457735e-06, "loss": 0.4241, "step": 44960 }, { "epoch": 0.648141475577591, "grad_norm": 1.626871325935654, "learning_rate": 3.320717994355734e-06, "loss": 0.4633, "step": 44970 }, { "epoch": 0.6482856031016243, "grad_norm": 1.492251878512997, "learning_rate": 3.3183487979175738e-06, "loss": 0.4533, "step": 44980 }, { "epoch": 0.6484297306256576, "grad_norm": 1.7581340479629908, "learning_rate": 3.315980027130972e-06, "loss": 0.4452, "step": 44990 }, { "epoch": 0.6485738581496908, "grad_norm": 1.7425282468679735, "learning_rate": 3.313611682595499e-06, "loss": 0.4379, "step": 45000 }, { "epoch": 0.6487179856737241, "grad_norm": 1.4607067222111072, "learning_rate": 3.3112437649106223e-06, "loss": 0.4467, "step": 45010 }, { "epoch": 0.6488621131977573, "grad_norm": 1.5097761755790873, "learning_rate": 3.3088762746756963e-06, "loss": 0.4537, "step": 45020 }, { "epoch": 0.6490062407217907, "grad_norm": 1.7024409703268015, "learning_rate": 3.3065092124899713e-06, "loss": 0.4286, "step": 45030 }, { "epoch": 0.649150368245824, "grad_norm": 1.4561137861667062, "learning_rate": 3.304142578952583e-06, "loss": 0.4366, "step": 45040 }, { "epoch": 0.6492944957698572, "grad_norm": 1.5697585958222928, "learning_rate": 3.3017763746625648e-06, "loss": 0.427, "step": 45050 }, { "epoch": 0.6494386232938905, "grad_norm": 1.2903802282991104, "learning_rate": 3.2994106002188373e-06, "loss": 0.4249, "step": 45060 }, { "epoch": 0.6495827508179237, "grad_norm": 1.4822917149034958, "learning_rate": 3.297045256220216e-06, "loss": 0.4503, "step": 45070 }, { "epoch": 0.649726878341957, "grad_norm": 1.5551644740957649, "learning_rate": 3.294680343265405e-06, "loss": 0.4134, "step": 45080 }, { "epoch": 0.6498710058659902, "grad_norm": 1.5745063424889227, "learning_rate": 3.2923158619529994e-06, "loss": 0.4495, "step": 45090 }, { "epoch": 0.6500151333900235, "grad_norm": 1.6391982420853755, "learning_rate": 3.2899518128814843e-06, "loss": 0.4418, "step": 45100 }, { "epoch": 0.6501592609140567, "grad_norm": 1.481261527925262, "learning_rate": 3.287588196649238e-06, "loss": 0.4611, "step": 45110 }, { "epoch": 0.65030338843809, "grad_norm": 1.994613431807514, "learning_rate": 3.2852250138545282e-06, "loss": 0.4456, "step": 45120 }, { "epoch": 0.6504475159621232, "grad_norm": 1.7062616164347773, "learning_rate": 3.2828622650955133e-06, "loss": 0.4578, "step": 45130 }, { "epoch": 0.6505916434861565, "grad_norm": 1.2469928950977778, "learning_rate": 3.2804999509702383e-06, "loss": 0.4509, "step": 45140 }, { "epoch": 0.6507357710101899, "grad_norm": 1.702100877953611, "learning_rate": 3.278138072076642e-06, "loss": 0.433, "step": 45150 }, { "epoch": 0.6508798985342231, "grad_norm": 1.509488577641911, "learning_rate": 3.275776629012554e-06, "loss": 0.4321, "step": 45160 }, { "epoch": 0.6510240260582564, "grad_norm": 1.5272948058081857, "learning_rate": 3.2734156223756888e-06, "loss": 0.4326, "step": 45170 }, { "epoch": 0.6511681535822896, "grad_norm": 1.6137144787076807, "learning_rate": 3.2710550527636543e-06, "loss": 0.459, "step": 45180 }, { "epoch": 0.6513122811063229, "grad_norm": 1.2452790303514365, "learning_rate": 3.268694920773947e-06, "loss": 0.4545, "step": 45190 }, { "epoch": 0.6514564086303561, "grad_norm": 1.9356020083648207, "learning_rate": 3.2663352270039518e-06, "loss": 0.4432, "step": 45200 }, { "epoch": 0.6516005361543894, "grad_norm": 1.9260089521897317, "learning_rate": 3.2639759720509423e-06, "loss": 0.4729, "step": 45210 }, { "epoch": 0.6517446636784227, "grad_norm": 1.6699227341569862, "learning_rate": 3.261617156512082e-06, "loss": 0.454, "step": 45220 }, { "epoch": 0.6518887912024559, "grad_norm": 1.4861097896389588, "learning_rate": 3.259258780984426e-06, "loss": 0.4535, "step": 45230 }, { "epoch": 0.6520329187264892, "grad_norm": 1.4144490706607524, "learning_rate": 3.256900846064912e-06, "loss": 0.4319, "step": 45240 }, { "epoch": 0.6521770462505224, "grad_norm": 1.8746288827384114, "learning_rate": 3.2545433523503706e-06, "loss": 0.4471, "step": 45250 }, { "epoch": 0.6523211737745557, "grad_norm": 1.878720239813831, "learning_rate": 3.25218630043752e-06, "loss": 0.4277, "step": 45260 }, { "epoch": 0.652465301298589, "grad_norm": 1.9319540469134384, "learning_rate": 3.2498296909229636e-06, "loss": 0.4546, "step": 45270 }, { "epoch": 0.6526094288226223, "grad_norm": 1.3699997781065836, "learning_rate": 3.2474735244031973e-06, "loss": 0.4511, "step": 45280 }, { "epoch": 0.6527535563466556, "grad_norm": 1.116228361404753, "learning_rate": 3.2451178014746033e-06, "loss": 0.407, "step": 45290 }, { "epoch": 0.6528976838706888, "grad_norm": 1.59844011533765, "learning_rate": 3.242762522733448e-06, "loss": 0.477, "step": 45300 }, { "epoch": 0.6530418113947221, "grad_norm": 1.9032797994794044, "learning_rate": 3.2404076887758905e-06, "loss": 0.4098, "step": 45310 }, { "epoch": 0.6531859389187553, "grad_norm": 5.04330683908247, "learning_rate": 3.2380533001979757e-06, "loss": 0.4455, "step": 45320 }, { "epoch": 0.6533300664427886, "grad_norm": 1.5908050364692778, "learning_rate": 3.2356993575956343e-06, "loss": 0.4333, "step": 45330 }, { "epoch": 0.6534741939668218, "grad_norm": 1.994507168016046, "learning_rate": 3.233345861564683e-06, "loss": 0.4798, "step": 45340 }, { "epoch": 0.6536183214908551, "grad_norm": 1.6832840051144489, "learning_rate": 3.2309928127008315e-06, "loss": 0.4491, "step": 45350 }, { "epoch": 0.6537624490148883, "grad_norm": 1.6746217729894275, "learning_rate": 3.228640211599671e-06, "loss": 0.4604, "step": 45360 }, { "epoch": 0.6539065765389216, "grad_norm": 1.5167072315801888, "learning_rate": 3.22628805885668e-06, "loss": 0.4426, "step": 45370 }, { "epoch": 0.654050704062955, "grad_norm": 1.4210868462149706, "learning_rate": 3.2239363550672235e-06, "loss": 0.4236, "step": 45380 }, { "epoch": 0.6541948315869882, "grad_norm": 1.8164561422990373, "learning_rate": 3.2215851008265563e-06, "loss": 0.4552, "step": 45390 }, { "epoch": 0.6543389591110215, "grad_norm": 1.700632066778551, "learning_rate": 3.219234296729814e-06, "loss": 0.4483, "step": 45400 }, { "epoch": 0.6544830866350547, "grad_norm": 1.5866573444900116, "learning_rate": 3.216883943372021e-06, "loss": 0.4385, "step": 45410 }, { "epoch": 0.654627214159088, "grad_norm": 1.481043637382776, "learning_rate": 3.2145340413480908e-06, "loss": 0.4221, "step": 45420 }, { "epoch": 0.6547713416831212, "grad_norm": 1.7843465673446899, "learning_rate": 3.2121845912528157e-06, "loss": 0.4464, "step": 45430 }, { "epoch": 0.6549154692071545, "grad_norm": 3.3981374338495507, "learning_rate": 3.2098355936808777e-06, "loss": 0.416, "step": 45440 }, { "epoch": 0.6550595967311877, "grad_norm": 1.7182977219256728, "learning_rate": 3.207487049226845e-06, "loss": 0.4328, "step": 45450 }, { "epoch": 0.655203724255221, "grad_norm": 1.7866711475493127, "learning_rate": 3.2051389584851713e-06, "loss": 0.4848, "step": 45460 }, { "epoch": 0.6553478517792543, "grad_norm": 1.780572339996317, "learning_rate": 3.2027913220501928e-06, "loss": 0.4527, "step": 45470 }, { "epoch": 0.6554919793032875, "grad_norm": 1.5217719606027176, "learning_rate": 3.200444140516132e-06, "loss": 0.4462, "step": 45480 }, { "epoch": 0.6556361068273208, "grad_norm": 1.7151725672756837, "learning_rate": 3.198097414477096e-06, "loss": 0.4455, "step": 45490 }, { "epoch": 0.6557802343513541, "grad_norm": 1.6168874588335067, "learning_rate": 3.1957511445270783e-06, "loss": 0.445, "step": 45500 }, { "epoch": 0.6559243618753874, "grad_norm": 1.5043034460448725, "learning_rate": 3.1934053312599537e-06, "loss": 0.4355, "step": 45510 }, { "epoch": 0.6560684893994206, "grad_norm": 1.523040189154208, "learning_rate": 3.191059975269485e-06, "loss": 0.4468, "step": 45520 }, { "epoch": 0.6562126169234539, "grad_norm": 1.7950419710649004, "learning_rate": 3.188715077149317e-06, "loss": 0.4687, "step": 45530 }, { "epoch": 0.6563567444474872, "grad_norm": 3.1490521287383535, "learning_rate": 3.186370637492978e-06, "loss": 0.4409, "step": 45540 }, { "epoch": 0.6565008719715204, "grad_norm": 1.6677689637691933, "learning_rate": 3.1840266568938837e-06, "loss": 0.4265, "step": 45550 }, { "epoch": 0.6566449994955537, "grad_norm": 1.4231377315929854, "learning_rate": 3.181683135945327e-06, "loss": 0.4192, "step": 45560 }, { "epoch": 0.6567891270195869, "grad_norm": 1.7554348067271806, "learning_rate": 3.179340075240495e-06, "loss": 0.4235, "step": 45570 }, { "epoch": 0.6569332545436202, "grad_norm": 1.6724426438326947, "learning_rate": 3.1769974753724477e-06, "loss": 0.4123, "step": 45580 }, { "epoch": 0.6570773820676534, "grad_norm": 1.4681816362441154, "learning_rate": 3.1746553369341342e-06, "loss": 0.4338, "step": 45590 }, { "epoch": 0.6572215095916867, "grad_norm": 1.5534013496768415, "learning_rate": 3.172313660518387e-06, "loss": 0.4474, "step": 45600 }, { "epoch": 0.6573656371157199, "grad_norm": 1.4275172991582596, "learning_rate": 3.169972446717916e-06, "loss": 0.4324, "step": 45610 }, { "epoch": 0.6575097646397533, "grad_norm": 1.4347259026847523, "learning_rate": 3.1676316961253217e-06, "loss": 0.4377, "step": 45620 }, { "epoch": 0.6576538921637866, "grad_norm": 1.5188718426277148, "learning_rate": 3.1652914093330835e-06, "loss": 0.4531, "step": 45630 }, { "epoch": 0.6577980196878198, "grad_norm": 1.5175810508866516, "learning_rate": 3.1629515869335616e-06, "loss": 0.4692, "step": 45640 }, { "epoch": 0.6579421472118531, "grad_norm": 1.7189533743685905, "learning_rate": 3.1606122295190013e-06, "loss": 0.4688, "step": 45650 }, { "epoch": 0.6580862747358863, "grad_norm": 1.5013214545106406, "learning_rate": 3.1582733376815317e-06, "loss": 0.433, "step": 45660 }, { "epoch": 0.6582304022599196, "grad_norm": 1.7942413134447226, "learning_rate": 3.1559349120131584e-06, "loss": 0.4502, "step": 45670 }, { "epoch": 0.6583745297839528, "grad_norm": 1.5767311866707565, "learning_rate": 3.1535969531057754e-06, "loss": 0.4301, "step": 45680 }, { "epoch": 0.6585186573079861, "grad_norm": 1.6348060359029037, "learning_rate": 3.151259461551156e-06, "loss": 0.4576, "step": 45690 }, { "epoch": 0.6586627848320193, "grad_norm": 1.836940395660697, "learning_rate": 3.1489224379409545e-06, "loss": 0.4711, "step": 45700 }, { "epoch": 0.6588069123560526, "grad_norm": 1.5183111376513148, "learning_rate": 3.146585882866705e-06, "loss": 0.4186, "step": 45710 }, { "epoch": 0.6589510398800859, "grad_norm": 1.6366689119204298, "learning_rate": 3.1442497969198275e-06, "loss": 0.4646, "step": 45720 }, { "epoch": 0.6590951674041192, "grad_norm": 1.4958473406220836, "learning_rate": 3.141914180691622e-06, "loss": 0.4564, "step": 45730 }, { "epoch": 0.6592392949281525, "grad_norm": 1.718361428493663, "learning_rate": 3.139579034773265e-06, "loss": 0.4309, "step": 45740 }, { "epoch": 0.6593834224521857, "grad_norm": 1.5281462032170245, "learning_rate": 3.13724435975582e-06, "loss": 0.4507, "step": 45750 }, { "epoch": 0.659527549976219, "grad_norm": 1.7476965569990617, "learning_rate": 3.1349101562302285e-06, "loss": 0.4422, "step": 45760 }, { "epoch": 0.6596716775002522, "grad_norm": 3.4035569914621533, "learning_rate": 3.1325764247873132e-06, "loss": 0.4477, "step": 45770 }, { "epoch": 0.6598158050242855, "grad_norm": 1.4153915129046575, "learning_rate": 3.1302431660177744e-06, "loss": 0.442, "step": 45780 }, { "epoch": 0.6599599325483188, "grad_norm": 1.7050080027654804, "learning_rate": 3.1279103805122e-06, "loss": 0.4592, "step": 45790 }, { "epoch": 0.660104060072352, "grad_norm": 1.7269336325675384, "learning_rate": 3.125578068861051e-06, "loss": 0.4552, "step": 45800 }, { "epoch": 0.6602481875963853, "grad_norm": 1.5831463214269423, "learning_rate": 3.123246231654672e-06, "loss": 0.4461, "step": 45810 }, { "epoch": 0.6603923151204185, "grad_norm": 1.591845146255107, "learning_rate": 3.1209148694832858e-06, "loss": 0.462, "step": 45820 }, { "epoch": 0.6605364426444518, "grad_norm": 1.742336493402345, "learning_rate": 3.1185839829369953e-06, "loss": 0.4511, "step": 45830 }, { "epoch": 0.660680570168485, "grad_norm": 1.502845230325684, "learning_rate": 3.116253572605785e-06, "loss": 0.4411, "step": 45840 }, { "epoch": 0.6608246976925184, "grad_norm": 1.757466796802797, "learning_rate": 3.113923639079516e-06, "loss": 0.4371, "step": 45850 }, { "epoch": 0.6609688252165516, "grad_norm": 1.2509488380462401, "learning_rate": 3.11159418294793e-06, "loss": 0.4383, "step": 45860 }, { "epoch": 0.6611129527405849, "grad_norm": 1.4607824990438945, "learning_rate": 3.109265204800648e-06, "loss": 0.4434, "step": 45870 }, { "epoch": 0.6612570802646182, "grad_norm": 1.449919301303756, "learning_rate": 3.1069367052271703e-06, "loss": 0.4369, "step": 45880 }, { "epoch": 0.6614012077886514, "grad_norm": 1.504926647679212, "learning_rate": 3.104608684816874e-06, "loss": 0.4301, "step": 45890 }, { "epoch": 0.6615453353126847, "grad_norm": 1.540945723998452, "learning_rate": 3.102281144159019e-06, "loss": 0.4301, "step": 45900 }, { "epoch": 0.6616894628367179, "grad_norm": 1.819096441156242, "learning_rate": 3.099954083842741e-06, "loss": 0.4745, "step": 45910 }, { "epoch": 0.6618335903607512, "grad_norm": 1.4962621703820027, "learning_rate": 3.0976275044570535e-06, "loss": 0.4255, "step": 45920 }, { "epoch": 0.6619777178847844, "grad_norm": 1.5280568436729187, "learning_rate": 3.0953014065908487e-06, "loss": 0.4588, "step": 45930 }, { "epoch": 0.6621218454088177, "grad_norm": 1.6769043087949171, "learning_rate": 3.0929757908328993e-06, "loss": 0.4441, "step": 45940 }, { "epoch": 0.6622659729328509, "grad_norm": 2.220883447487129, "learning_rate": 3.090650657771852e-06, "loss": 0.4416, "step": 45950 }, { "epoch": 0.6624101004568842, "grad_norm": 2.419656415183907, "learning_rate": 3.088326007996235e-06, "loss": 0.4302, "step": 45960 }, { "epoch": 0.6625542279809176, "grad_norm": 1.525364904602922, "learning_rate": 3.0860018420944525e-06, "loss": 0.4597, "step": 45970 }, { "epoch": 0.6626983555049508, "grad_norm": 1.8068117400028907, "learning_rate": 3.083678160654785e-06, "loss": 0.4456, "step": 45980 }, { "epoch": 0.6628424830289841, "grad_norm": 1.7355868198153732, "learning_rate": 3.081354964265392e-06, "loss": 0.419, "step": 45990 }, { "epoch": 0.6629866105530173, "grad_norm": 1.616827144402018, "learning_rate": 3.0790322535143104e-06, "loss": 0.4282, "step": 46000 }, { "epoch": 0.6631307380770506, "grad_norm": 1.6652063745766026, "learning_rate": 3.076710028989455e-06, "loss": 0.4544, "step": 46010 }, { "epoch": 0.6632748656010838, "grad_norm": 1.6113789240543746, "learning_rate": 3.074388291278614e-06, "loss": 0.4299, "step": 46020 }, { "epoch": 0.6634189931251171, "grad_norm": 1.4694468891074453, "learning_rate": 3.072067040969456e-06, "loss": 0.4609, "step": 46030 }, { "epoch": 0.6635631206491504, "grad_norm": 1.6828900115995138, "learning_rate": 3.069746278649525e-06, "loss": 0.4301, "step": 46040 }, { "epoch": 0.6637072481731836, "grad_norm": 1.494758015209708, "learning_rate": 3.0674260049062405e-06, "loss": 0.4256, "step": 46050 }, { "epoch": 0.6638513756972169, "grad_norm": 1.4933964797702914, "learning_rate": 3.065106220326899e-06, "loss": 0.4481, "step": 46060 }, { "epoch": 0.6639955032212501, "grad_norm": 1.6315882223546327, "learning_rate": 3.0627869254986746e-06, "loss": 0.4628, "step": 46070 }, { "epoch": 0.6641396307452834, "grad_norm": 1.5460901825372784, "learning_rate": 3.0604681210086152e-06, "loss": 0.4123, "step": 46080 }, { "epoch": 0.6642837582693167, "grad_norm": 1.5030532160702934, "learning_rate": 3.0581498074436445e-06, "loss": 0.4226, "step": 46090 }, { "epoch": 0.66442788579335, "grad_norm": 1.7128743403007622, "learning_rate": 3.055831985390566e-06, "loss": 0.4709, "step": 46100 }, { "epoch": 0.6645720133173832, "grad_norm": 1.6457044932159077, "learning_rate": 3.0535146554360513e-06, "loss": 0.4599, "step": 46110 }, { "epoch": 0.6647161408414165, "grad_norm": 1.5260376629360048, "learning_rate": 3.0511978181666576e-06, "loss": 0.4756, "step": 46120 }, { "epoch": 0.6648602683654498, "grad_norm": 1.5663507581831297, "learning_rate": 3.0488814741688074e-06, "loss": 0.4261, "step": 46130 }, { "epoch": 0.665004395889483, "grad_norm": 1.680348942852007, "learning_rate": 3.046565624028804e-06, "loss": 0.4337, "step": 46140 }, { "epoch": 0.6651485234135163, "grad_norm": 1.495262987939226, "learning_rate": 3.044250268332826e-06, "loss": 0.4601, "step": 46150 }, { "epoch": 0.6652926509375495, "grad_norm": 1.569023239559878, "learning_rate": 3.0419354076669227e-06, "loss": 0.4583, "step": 46160 }, { "epoch": 0.6654367784615828, "grad_norm": 1.4248729932502258, "learning_rate": 3.0396210426170215e-06, "loss": 0.4402, "step": 46170 }, { "epoch": 0.665580905985616, "grad_norm": 1.8121598604711748, "learning_rate": 3.037307173768925e-06, "loss": 0.4349, "step": 46180 }, { "epoch": 0.6657250335096493, "grad_norm": 3.2815624501674945, "learning_rate": 3.034993801708306e-06, "loss": 0.4384, "step": 46190 }, { "epoch": 0.6658691610336827, "grad_norm": 1.7136599085609723, "learning_rate": 3.0326809270207165e-06, "loss": 0.4453, "step": 46200 }, { "epoch": 0.6660132885577159, "grad_norm": 1.6650926070500047, "learning_rate": 3.0303685502915783e-06, "loss": 0.4593, "step": 46210 }, { "epoch": 0.6661574160817492, "grad_norm": 1.7087664363893085, "learning_rate": 3.0280566721061894e-06, "loss": 0.444, "step": 46220 }, { "epoch": 0.6663015436057824, "grad_norm": 1.5440941463962026, "learning_rate": 3.0257452930497233e-06, "loss": 0.4371, "step": 46230 }, { "epoch": 0.6664456711298157, "grad_norm": 2.297765081234399, "learning_rate": 3.0234344137072246e-06, "loss": 0.4562, "step": 46240 }, { "epoch": 0.6665897986538489, "grad_norm": 1.7193777837351318, "learning_rate": 3.021124034663613e-06, "loss": 0.4338, "step": 46250 }, { "epoch": 0.6667339261778822, "grad_norm": 1.539254707899385, "learning_rate": 3.018814156503678e-06, "loss": 0.451, "step": 46260 }, { "epoch": 0.6668780537019154, "grad_norm": 1.4655567271105077, "learning_rate": 3.016504779812086e-06, "loss": 0.4425, "step": 46270 }, { "epoch": 0.6670221812259487, "grad_norm": 1.700845062630006, "learning_rate": 3.0141959051733772e-06, "loss": 0.4588, "step": 46280 }, { "epoch": 0.667166308749982, "grad_norm": 1.5475281946829216, "learning_rate": 3.011887533171961e-06, "loss": 0.4227, "step": 46290 }, { "epoch": 0.6673104362740152, "grad_norm": 1.62786975336037, "learning_rate": 3.0095796643921228e-06, "loss": 0.4422, "step": 46300 }, { "epoch": 0.6674545637980485, "grad_norm": 1.4086557007410048, "learning_rate": 3.007272299418019e-06, "loss": 0.4351, "step": 46310 }, { "epoch": 0.6675986913220818, "grad_norm": 1.5427024185117384, "learning_rate": 3.0049654388336776e-06, "loss": 0.4395, "step": 46320 }, { "epoch": 0.6677428188461151, "grad_norm": 1.622975299809041, "learning_rate": 3.002659083223e-06, "loss": 0.4092, "step": 46330 }, { "epoch": 0.6678869463701483, "grad_norm": 1.4698512393415455, "learning_rate": 3.0003532331697625e-06, "loss": 0.42, "step": 46340 }, { "epoch": 0.6680310738941816, "grad_norm": 1.7362115937958542, "learning_rate": 2.998047889257611e-06, "loss": 0.471, "step": 46350 }, { "epoch": 0.6681752014182148, "grad_norm": 1.6622261874304562, "learning_rate": 2.9957430520700605e-06, "loss": 0.4156, "step": 46360 }, { "epoch": 0.6683193289422481, "grad_norm": 1.4972494095959568, "learning_rate": 2.9934387221905016e-06, "loss": 0.4491, "step": 46370 }, { "epoch": 0.6684634564662814, "grad_norm": 1.354227648485922, "learning_rate": 2.9911349002021974e-06, "loss": 0.4227, "step": 46380 }, { "epoch": 0.6686075839903146, "grad_norm": 1.9963289204847836, "learning_rate": 2.988831586688276e-06, "loss": 0.4332, "step": 46390 }, { "epoch": 0.6687517115143479, "grad_norm": 1.7757119434259452, "learning_rate": 2.9865287822317445e-06, "loss": 0.4355, "step": 46400 }, { "epoch": 0.6688958390383811, "grad_norm": 1.8129874887262056, "learning_rate": 2.9842264874154775e-06, "loss": 0.4222, "step": 46410 }, { "epoch": 0.6690399665624144, "grad_norm": 1.408486066970904, "learning_rate": 2.9819247028222198e-06, "loss": 0.4294, "step": 46420 }, { "epoch": 0.6691840940864476, "grad_norm": 1.890731738546372, "learning_rate": 2.979623429034588e-06, "loss": 0.4755, "step": 46430 }, { "epoch": 0.669328221610481, "grad_norm": 3.6599840887745856, "learning_rate": 2.9773226666350694e-06, "loss": 0.435, "step": 46440 }, { "epoch": 0.6694723491345143, "grad_norm": 1.5786703280731462, "learning_rate": 2.975022416206025e-06, "loss": 0.4511, "step": 46450 }, { "epoch": 0.6696164766585475, "grad_norm": 1.6688434789830873, "learning_rate": 2.9727226783296816e-06, "loss": 0.4101, "step": 46460 }, { "epoch": 0.6697606041825808, "grad_norm": 1.468370163553962, "learning_rate": 2.970423453588136e-06, "loss": 0.4558, "step": 46470 }, { "epoch": 0.669904731706614, "grad_norm": 1.758739598071942, "learning_rate": 2.9681247425633584e-06, "loss": 0.448, "step": 46480 }, { "epoch": 0.6700488592306473, "grad_norm": 1.4398552157350617, "learning_rate": 2.9658265458371897e-06, "loss": 0.4822, "step": 46490 }, { "epoch": 0.6701929867546805, "grad_norm": 1.6082866552973376, "learning_rate": 2.963528863991335e-06, "loss": 0.4504, "step": 46500 }, { "epoch": 0.6703371142787138, "grad_norm": 1.4953602959570869, "learning_rate": 2.961231697607376e-06, "loss": 0.442, "step": 46510 }, { "epoch": 0.670481241802747, "grad_norm": 1.4944252522799535, "learning_rate": 2.9589350472667564e-06, "loss": 0.4331, "step": 46520 }, { "epoch": 0.6706253693267803, "grad_norm": 1.4442945145589083, "learning_rate": 2.9566389135507965e-06, "loss": 0.4494, "step": 46530 }, { "epoch": 0.6707694968508136, "grad_norm": 1.4900712046215234, "learning_rate": 2.954343297040683e-06, "loss": 0.4395, "step": 46540 }, { "epoch": 0.6709136243748469, "grad_norm": 1.744343574188276, "learning_rate": 2.9520481983174675e-06, "loss": 0.4514, "step": 46550 }, { "epoch": 0.6710577518988802, "grad_norm": 1.8347472935789526, "learning_rate": 2.9497536179620802e-06, "loss": 0.4821, "step": 46560 }, { "epoch": 0.6712018794229134, "grad_norm": 1.3207623892821143, "learning_rate": 2.947459556555311e-06, "loss": 0.4243, "step": 46570 }, { "epoch": 0.6713460069469467, "grad_norm": 1.6285623567009275, "learning_rate": 2.945166014677822e-06, "loss": 0.4393, "step": 46580 }, { "epoch": 0.6714901344709799, "grad_norm": 1.60052814486207, "learning_rate": 2.942872992910145e-06, "loss": 0.465, "step": 46590 }, { "epoch": 0.6716342619950132, "grad_norm": 1.5406511785082466, "learning_rate": 2.9405804918326773e-06, "loss": 0.4827, "step": 46600 }, { "epoch": 0.6717783895190464, "grad_norm": 1.6114166669594299, "learning_rate": 2.938288512025686e-06, "loss": 0.4364, "step": 46610 }, { "epoch": 0.6719225170430797, "grad_norm": 1.7327640562205644, "learning_rate": 2.9359970540693072e-06, "loss": 0.4294, "step": 46620 }, { "epoch": 0.672066644567113, "grad_norm": 1.5149447426883094, "learning_rate": 2.9337061185435427e-06, "loss": 0.4398, "step": 46630 }, { "epoch": 0.6722107720911462, "grad_norm": 1.5952007561426358, "learning_rate": 2.9314157060282634e-06, "loss": 0.4303, "step": 46640 }, { "epoch": 0.6723548996151795, "grad_norm": 1.9045167534811607, "learning_rate": 2.929125817103209e-06, "loss": 0.4591, "step": 46650 }, { "epoch": 0.6724990271392127, "grad_norm": 1.7425325076486855, "learning_rate": 2.926836452347982e-06, "loss": 0.4188, "step": 46660 }, { "epoch": 0.6726431546632461, "grad_norm": 1.5463866973209448, "learning_rate": 2.924547612342059e-06, "loss": 0.4491, "step": 46670 }, { "epoch": 0.6727872821872793, "grad_norm": 1.464543453519204, "learning_rate": 2.922259297664779e-06, "loss": 0.4422, "step": 46680 }, { "epoch": 0.6729314097113126, "grad_norm": 1.5913939872697602, "learning_rate": 2.9199715088953506e-06, "loss": 0.4249, "step": 46690 }, { "epoch": 0.6730755372353459, "grad_norm": 1.6052693974559238, "learning_rate": 2.9176842466128443e-06, "loss": 0.4372, "step": 46700 }, { "epoch": 0.6732196647593791, "grad_norm": 1.6419789878367457, "learning_rate": 2.915397511396205e-06, "loss": 0.445, "step": 46710 }, { "epoch": 0.6733637922834124, "grad_norm": 1.5703702395086874, "learning_rate": 2.91311130382424e-06, "loss": 0.4594, "step": 46720 }, { "epoch": 0.6735079198074456, "grad_norm": 1.7600722103770094, "learning_rate": 2.9108256244756195e-06, "loss": 0.4604, "step": 46730 }, { "epoch": 0.6736520473314789, "grad_norm": 1.6629812257058423, "learning_rate": 2.9085404739288885e-06, "loss": 0.4412, "step": 46740 }, { "epoch": 0.6737961748555121, "grad_norm": 1.6861284563611205, "learning_rate": 2.906255852762451e-06, "loss": 0.4565, "step": 46750 }, { "epoch": 0.6739403023795454, "grad_norm": 1.5984647963504786, "learning_rate": 2.9039717615545804e-06, "loss": 0.4634, "step": 46760 }, { "epoch": 0.6740844299035786, "grad_norm": 1.5332608341951277, "learning_rate": 2.901688200883412e-06, "loss": 0.43, "step": 46770 }, { "epoch": 0.6742285574276119, "grad_norm": 1.5341010976516434, "learning_rate": 2.899405171326952e-06, "loss": 0.4492, "step": 46780 }, { "epoch": 0.6743726849516453, "grad_norm": 1.7858083275079426, "learning_rate": 2.897122673463072e-06, "loss": 0.4416, "step": 46790 }, { "epoch": 0.6745168124756785, "grad_norm": 1.5512538354667933, "learning_rate": 2.894840707869504e-06, "loss": 0.4526, "step": 46800 }, { "epoch": 0.6746609399997118, "grad_norm": 1.5130947307646008, "learning_rate": 2.892559275123851e-06, "loss": 0.4298, "step": 46810 }, { "epoch": 0.674805067523745, "grad_norm": 1.6642753598973556, "learning_rate": 2.890278375803577e-06, "loss": 0.4377, "step": 46820 }, { "epoch": 0.6749491950477783, "grad_norm": 1.5954327154003212, "learning_rate": 2.8879980104860096e-06, "loss": 0.4327, "step": 46830 }, { "epoch": 0.6750933225718115, "grad_norm": 1.5200709189198067, "learning_rate": 2.885718179748349e-06, "loss": 0.4607, "step": 46840 }, { "epoch": 0.6752374500958448, "grad_norm": 1.5174596749749063, "learning_rate": 2.8834388841676535e-06, "loss": 0.4559, "step": 46850 }, { "epoch": 0.675381577619878, "grad_norm": 1.7301103508840343, "learning_rate": 2.881160124320844e-06, "loss": 0.4592, "step": 46860 }, { "epoch": 0.6755257051439113, "grad_norm": 1.3745383967823772, "learning_rate": 2.878881900784715e-06, "loss": 0.4287, "step": 46870 }, { "epoch": 0.6756698326679446, "grad_norm": 1.7525622646722192, "learning_rate": 2.876604214135917e-06, "loss": 0.4818, "step": 46880 }, { "epoch": 0.6758139601919778, "grad_norm": 1.5454623527719327, "learning_rate": 2.874327064950965e-06, "loss": 0.4385, "step": 46890 }, { "epoch": 0.6759580877160112, "grad_norm": 1.7001864147933428, "learning_rate": 2.8720504538062417e-06, "loss": 0.4665, "step": 46900 }, { "epoch": 0.6761022152400444, "grad_norm": 1.3756092135776035, "learning_rate": 2.869774381277995e-06, "loss": 0.4455, "step": 46910 }, { "epoch": 0.6762463427640777, "grad_norm": 1.5692910580579393, "learning_rate": 2.867498847942331e-06, "loss": 0.4607, "step": 46920 }, { "epoch": 0.676390470288111, "grad_norm": 1.3647719732184438, "learning_rate": 2.8652238543752204e-06, "loss": 0.442, "step": 46930 }, { "epoch": 0.6765345978121442, "grad_norm": 1.535113829065749, "learning_rate": 2.862949401152502e-06, "loss": 0.4418, "step": 46940 }, { "epoch": 0.6766787253361775, "grad_norm": 1.57236209599821, "learning_rate": 2.8606754888498735e-06, "loss": 0.4119, "step": 46950 }, { "epoch": 0.6768228528602107, "grad_norm": 1.587506006779159, "learning_rate": 2.8584021180428945e-06, "loss": 0.4481, "step": 46960 }, { "epoch": 0.676966980384244, "grad_norm": 1.5150301103007857, "learning_rate": 2.856129289306993e-06, "loss": 0.4381, "step": 46970 }, { "epoch": 0.6771111079082772, "grad_norm": 1.9918305232963358, "learning_rate": 2.8538570032174553e-06, "loss": 0.4447, "step": 46980 }, { "epoch": 0.6772552354323105, "grad_norm": 1.6130794849504546, "learning_rate": 2.8515852603494297e-06, "loss": 0.4423, "step": 46990 }, { "epoch": 0.6773993629563437, "grad_norm": 1.6006656598120765, "learning_rate": 2.8493140612779315e-06, "loss": 0.452, "step": 47000 }, { "epoch": 0.677543490480377, "grad_norm": 1.574519193567147, "learning_rate": 2.8470434065778363e-06, "loss": 0.4451, "step": 47010 }, { "epoch": 0.6776876180044104, "grad_norm": 1.483762879721842, "learning_rate": 2.8447732968238805e-06, "loss": 0.455, "step": 47020 }, { "epoch": 0.6778317455284436, "grad_norm": 1.7605418310295864, "learning_rate": 2.8425037325906614e-06, "loss": 0.4508, "step": 47030 }, { "epoch": 0.6779758730524769, "grad_norm": 1.4303880325539702, "learning_rate": 2.8402347144526445e-06, "loss": 0.4326, "step": 47040 }, { "epoch": 0.6781200005765101, "grad_norm": 1.6409348305020701, "learning_rate": 2.8379662429841506e-06, "loss": 0.4328, "step": 47050 }, { "epoch": 0.6782641281005434, "grad_norm": 1.8367939908129818, "learning_rate": 2.835698318759361e-06, "loss": 0.4522, "step": 47060 }, { "epoch": 0.6784082556245766, "grad_norm": 1.4983692570189728, "learning_rate": 2.833430942352329e-06, "loss": 0.4309, "step": 47070 }, { "epoch": 0.6785523831486099, "grad_norm": 1.9518319196743108, "learning_rate": 2.831164114336957e-06, "loss": 0.4386, "step": 47080 }, { "epoch": 0.6786965106726431, "grad_norm": 1.2778668975743435, "learning_rate": 2.8288978352870146e-06, "loss": 0.435, "step": 47090 }, { "epoch": 0.6788406381966764, "grad_norm": 1.690851808863212, "learning_rate": 2.826632105776133e-06, "loss": 0.4572, "step": 47100 }, { "epoch": 0.6789847657207096, "grad_norm": 1.7543767285993153, "learning_rate": 2.8243669263778005e-06, "loss": 0.4425, "step": 47110 }, { "epoch": 0.6791288932447429, "grad_norm": 1.718601245286868, "learning_rate": 2.822102297665372e-06, "loss": 0.4637, "step": 47120 }, { "epoch": 0.6792730207687762, "grad_norm": 1.5037277998602323, "learning_rate": 2.8198382202120554e-06, "loss": 0.4563, "step": 47130 }, { "epoch": 0.6794171482928095, "grad_norm": 1.5714416981013128, "learning_rate": 2.8175746945909277e-06, "loss": 0.452, "step": 47140 }, { "epoch": 0.6795612758168428, "grad_norm": 1.5282109121960428, "learning_rate": 2.8153117213749193e-06, "loss": 0.4506, "step": 47150 }, { "epoch": 0.679705403340876, "grad_norm": 1.6391060367152723, "learning_rate": 2.813049301136822e-06, "loss": 0.4369, "step": 47160 }, { "epoch": 0.6798495308649093, "grad_norm": 1.449263513777128, "learning_rate": 2.8107874344492915e-06, "loss": 0.4367, "step": 47170 }, { "epoch": 0.6799936583889425, "grad_norm": 1.6899710719783456, "learning_rate": 2.8085261218848403e-06, "loss": 0.4338, "step": 47180 }, { "epoch": 0.6801377859129758, "grad_norm": 1.8112499492544643, "learning_rate": 2.8062653640158377e-06, "loss": 0.44, "step": 47190 }, { "epoch": 0.6802819134370091, "grad_norm": 1.7297665448001471, "learning_rate": 2.8040051614145203e-06, "loss": 0.4538, "step": 47200 }, { "epoch": 0.6804260409610423, "grad_norm": 1.6308167789325159, "learning_rate": 2.801745514652979e-06, "loss": 0.4461, "step": 47210 }, { "epoch": 0.6805701684850756, "grad_norm": 1.742843132776276, "learning_rate": 2.799486424303161e-06, "loss": 0.4516, "step": 47220 }, { "epoch": 0.6807142960091088, "grad_norm": 1.6559614235277829, "learning_rate": 2.79722789093688e-06, "loss": 0.4377, "step": 47230 }, { "epoch": 0.6808584235331421, "grad_norm": 1.6549306894016498, "learning_rate": 2.794969915125807e-06, "loss": 0.445, "step": 47240 }, { "epoch": 0.6810025510571754, "grad_norm": 1.5767598050956968, "learning_rate": 2.7927124974414675e-06, "loss": 0.4365, "step": 47250 }, { "epoch": 0.6811466785812087, "grad_norm": 1.6145277895448282, "learning_rate": 2.7904556384552463e-06, "loss": 0.4404, "step": 47260 }, { "epoch": 0.681290806105242, "grad_norm": 1.5487512430769779, "learning_rate": 2.788199338738394e-06, "loss": 0.4621, "step": 47270 }, { "epoch": 0.6814349336292752, "grad_norm": 1.695085175958239, "learning_rate": 2.785943598862011e-06, "loss": 0.4313, "step": 47280 }, { "epoch": 0.6815790611533085, "grad_norm": 1.3550914658485014, "learning_rate": 2.7836884193970583e-06, "loss": 0.451, "step": 47290 }, { "epoch": 0.6817231886773417, "grad_norm": 1.5558740212303324, "learning_rate": 2.7814338009143593e-06, "loss": 0.4552, "step": 47300 }, { "epoch": 0.681867316201375, "grad_norm": 1.7725422103157307, "learning_rate": 2.7791797439845923e-06, "loss": 0.4382, "step": 47310 }, { "epoch": 0.6820114437254082, "grad_norm": 1.6122148411507895, "learning_rate": 2.776926249178289e-06, "loss": 0.4708, "step": 47320 }, { "epoch": 0.6821555712494415, "grad_norm": 3.3608397247899644, "learning_rate": 2.774673317065847e-06, "loss": 0.4368, "step": 47330 }, { "epoch": 0.6822996987734747, "grad_norm": 1.6183568108335054, "learning_rate": 2.772420948217519e-06, "loss": 0.4262, "step": 47340 }, { "epoch": 0.682443826297508, "grad_norm": 1.5687177318096466, "learning_rate": 2.7701691432034105e-06, "loss": 0.4509, "step": 47350 }, { "epoch": 0.6825879538215412, "grad_norm": 1.5661658142972086, "learning_rate": 2.7679179025934916e-06, "loss": 0.4436, "step": 47360 }, { "epoch": 0.6827320813455746, "grad_norm": 1.6241014361279036, "learning_rate": 2.7656672269575826e-06, "loss": 0.4348, "step": 47370 }, { "epoch": 0.6828762088696079, "grad_norm": 1.5118972631735637, "learning_rate": 2.7634171168653635e-06, "loss": 0.4325, "step": 47380 }, { "epoch": 0.6830203363936411, "grad_norm": 1.6521139393962379, "learning_rate": 2.7611675728863734e-06, "loss": 0.4275, "step": 47390 }, { "epoch": 0.6831644639176744, "grad_norm": 1.5950622725299828, "learning_rate": 2.7589185955900056e-06, "loss": 0.4566, "step": 47400 }, { "epoch": 0.6833085914417076, "grad_norm": 1.690622659315358, "learning_rate": 2.7566701855455108e-06, "loss": 0.4411, "step": 47410 }, { "epoch": 0.6834527189657409, "grad_norm": 1.5920636596605746, "learning_rate": 2.754422343321993e-06, "loss": 0.456, "step": 47420 }, { "epoch": 0.6835968464897741, "grad_norm": 1.441269892091372, "learning_rate": 2.7521750694884197e-06, "loss": 0.4498, "step": 47430 }, { "epoch": 0.6837409740138074, "grad_norm": 1.5217521543130583, "learning_rate": 2.749928364613606e-06, "loss": 0.4732, "step": 47440 }, { "epoch": 0.6838851015378407, "grad_norm": 1.6018614508245341, "learning_rate": 2.747682229266229e-06, "loss": 0.4469, "step": 47450 }, { "epoch": 0.6840292290618739, "grad_norm": 1.6932362781053367, "learning_rate": 2.7454366640148227e-06, "loss": 0.4476, "step": 47460 }, { "epoch": 0.6841733565859072, "grad_norm": 1.4003733016222328, "learning_rate": 2.743191669427771e-06, "loss": 0.4452, "step": 47470 }, { "epoch": 0.6843174841099404, "grad_norm": 1.260887255400385, "learning_rate": 2.7409472460733145e-06, "loss": 0.4373, "step": 47480 }, { "epoch": 0.6844616116339738, "grad_norm": 1.4165937496778085, "learning_rate": 2.738703394519555e-06, "loss": 0.418, "step": 47490 }, { "epoch": 0.684605739158007, "grad_norm": 1.7849891947200127, "learning_rate": 2.736460115334444e-06, "loss": 0.4486, "step": 47500 }, { "epoch": 0.6847498666820403, "grad_norm": 1.605506250930403, "learning_rate": 2.7342174090857866e-06, "loss": 0.4403, "step": 47510 }, { "epoch": 0.6848939942060736, "grad_norm": 1.5143753229845738, "learning_rate": 2.731975276341251e-06, "loss": 0.4469, "step": 47520 }, { "epoch": 0.6850381217301068, "grad_norm": 1.5471720638163486, "learning_rate": 2.7297337176683523e-06, "loss": 0.4684, "step": 47530 }, { "epoch": 0.6851822492541401, "grad_norm": 1.553328148331682, "learning_rate": 2.727492733634462e-06, "loss": 0.4294, "step": 47540 }, { "epoch": 0.6853263767781733, "grad_norm": 1.5710126579506372, "learning_rate": 2.7252523248068085e-06, "loss": 0.426, "step": 47550 }, { "epoch": 0.6854705043022066, "grad_norm": 1.5303527833954949, "learning_rate": 2.723012491752477e-06, "loss": 0.42, "step": 47560 }, { "epoch": 0.6856146318262398, "grad_norm": 1.7481060088972473, "learning_rate": 2.7207732350383992e-06, "loss": 0.4545, "step": 47570 }, { "epoch": 0.6857587593502731, "grad_norm": 1.9953657991216467, "learning_rate": 2.7185345552313645e-06, "loss": 0.481, "step": 47580 }, { "epoch": 0.6859028868743063, "grad_norm": 1.5176042288764924, "learning_rate": 2.7162964528980205e-06, "loss": 0.422, "step": 47590 }, { "epoch": 0.6860470143983397, "grad_norm": 1.7570056420208497, "learning_rate": 2.7140589286048633e-06, "loss": 0.4318, "step": 47600 }, { "epoch": 0.686191141922373, "grad_norm": 1.4925787149909464, "learning_rate": 2.7118219829182425e-06, "loss": 0.4418, "step": 47610 }, { "epoch": 0.6863352694464062, "grad_norm": 1.476370614306045, "learning_rate": 2.7095856164043665e-06, "loss": 0.453, "step": 47620 }, { "epoch": 0.6864793969704395, "grad_norm": 1.695080058012233, "learning_rate": 2.7073498296292922e-06, "loss": 0.4494, "step": 47630 }, { "epoch": 0.6866235244944727, "grad_norm": 1.7624862632588818, "learning_rate": 2.7051146231589287e-06, "loss": 0.4455, "step": 47640 }, { "epoch": 0.686767652018506, "grad_norm": 1.9440873906121565, "learning_rate": 2.7028799975590457e-06, "loss": 0.4582, "step": 47650 }, { "epoch": 0.6869117795425392, "grad_norm": 1.6974064224856535, "learning_rate": 2.700645953395258e-06, "loss": 0.435, "step": 47660 }, { "epoch": 0.6870559070665725, "grad_norm": 1.6358796033433924, "learning_rate": 2.6984124912330374e-06, "loss": 0.4604, "step": 47670 }, { "epoch": 0.6872000345906057, "grad_norm": 1.5836154606279793, "learning_rate": 2.6961796116377052e-06, "loss": 0.4361, "step": 47680 }, { "epoch": 0.687344162114639, "grad_norm": 1.1806497291930815, "learning_rate": 2.693947315174441e-06, "loss": 0.3948, "step": 47690 }, { "epoch": 0.6874882896386723, "grad_norm": 1.5538273740781918, "learning_rate": 2.6917156024082708e-06, "loss": 0.4503, "step": 47700 }, { "epoch": 0.6876324171627055, "grad_norm": 1.4485770394349535, "learning_rate": 2.6894844739040736e-06, "loss": 0.4262, "step": 47710 }, { "epoch": 0.6877765446867389, "grad_norm": 1.4837844774654734, "learning_rate": 2.6872539302265856e-06, "loss": 0.4157, "step": 47720 }, { "epoch": 0.6879206722107721, "grad_norm": 1.5913225598897043, "learning_rate": 2.685023971940389e-06, "loss": 0.4146, "step": 47730 }, { "epoch": 0.6880647997348054, "grad_norm": 1.6852858787948053, "learning_rate": 2.682794599609919e-06, "loss": 0.4278, "step": 47740 }, { "epoch": 0.6882089272588386, "grad_norm": 1.7516832803173976, "learning_rate": 2.680565813799467e-06, "loss": 0.4452, "step": 47750 }, { "epoch": 0.6883530547828719, "grad_norm": 1.7868408719267528, "learning_rate": 2.6783376150731722e-06, "loss": 0.4374, "step": 47760 }, { "epoch": 0.6884971823069052, "grad_norm": 1.9665819530442963, "learning_rate": 2.6761100039950224e-06, "loss": 0.4281, "step": 47770 }, { "epoch": 0.6886413098309384, "grad_norm": 1.8861644953957244, "learning_rate": 2.673882981128861e-06, "loss": 0.4636, "step": 47780 }, { "epoch": 0.6887854373549717, "grad_norm": 1.5980545836635351, "learning_rate": 2.671656547038386e-06, "loss": 0.4486, "step": 47790 }, { "epoch": 0.6889295648790049, "grad_norm": 1.6620232919371114, "learning_rate": 2.669430702287138e-06, "loss": 0.4607, "step": 47800 }, { "epoch": 0.6890736924030382, "grad_norm": 1.594044485894039, "learning_rate": 2.6672054474385102e-06, "loss": 0.4244, "step": 47810 }, { "epoch": 0.6892178199270714, "grad_norm": 1.642370122559594, "learning_rate": 2.6649807830557527e-06, "loss": 0.458, "step": 47820 }, { "epoch": 0.6893619474511047, "grad_norm": 1.6762303280560302, "learning_rate": 2.6627567097019613e-06, "loss": 0.4469, "step": 47830 }, { "epoch": 0.689506074975138, "grad_norm": 1.5644933777403787, "learning_rate": 2.6605332279400792e-06, "loss": 0.4109, "step": 47840 }, { "epoch": 0.6896502024991713, "grad_norm": 1.6962335424226767, "learning_rate": 2.6583103383329077e-06, "loss": 0.4562, "step": 47850 }, { "epoch": 0.6897943300232046, "grad_norm": 1.6468799466888602, "learning_rate": 2.6560880414430935e-06, "loss": 0.4272, "step": 47860 }, { "epoch": 0.6899384575472378, "grad_norm": 1.5181590545545645, "learning_rate": 2.6538663378331305e-06, "loss": 0.4239, "step": 47870 }, { "epoch": 0.6900825850712711, "grad_norm": 1.6085239261679691, "learning_rate": 2.651645228065368e-06, "loss": 0.4333, "step": 47880 }, { "epoch": 0.6902267125953043, "grad_norm": 1.4717575845511315, "learning_rate": 2.6494247127020047e-06, "loss": 0.4294, "step": 47890 }, { "epoch": 0.6903708401193376, "grad_norm": 1.5416430366440474, "learning_rate": 2.6472047923050846e-06, "loss": 0.4377, "step": 47900 }, { "epoch": 0.6905149676433708, "grad_norm": 1.4716119174577165, "learning_rate": 2.6449854674365027e-06, "loss": 0.4374, "step": 47910 }, { "epoch": 0.6906590951674041, "grad_norm": 1.676363905763221, "learning_rate": 2.642766738658007e-06, "loss": 0.4576, "step": 47920 }, { "epoch": 0.6908032226914373, "grad_norm": 1.6178218287374126, "learning_rate": 2.640548606531189e-06, "loss": 0.4407, "step": 47930 }, { "epoch": 0.6909473502154706, "grad_norm": 1.583308816586073, "learning_rate": 2.6383310716174908e-06, "loss": 0.4473, "step": 47940 }, { "epoch": 0.691091477739504, "grad_norm": 1.6004354751700733, "learning_rate": 2.636114134478208e-06, "loss": 0.4353, "step": 47950 }, { "epoch": 0.6912356052635372, "grad_norm": 1.6803054000595385, "learning_rate": 2.6338977956744792e-06, "loss": 0.4794, "step": 47960 }, { "epoch": 0.6913797327875705, "grad_norm": 1.9515044395249785, "learning_rate": 2.631682055767291e-06, "loss": 0.4617, "step": 47970 }, { "epoch": 0.6915238603116037, "grad_norm": 1.4846548687682473, "learning_rate": 2.6294669153174857e-06, "loss": 0.4432, "step": 47980 }, { "epoch": 0.691667987835637, "grad_norm": 1.8277483581496292, "learning_rate": 2.627252374885745e-06, "loss": 0.4555, "step": 47990 }, { "epoch": 0.6918121153596702, "grad_norm": 1.7502587579356625, "learning_rate": 2.625038435032605e-06, "loss": 0.4542, "step": 48000 }, { "epoch": 0.6919562428837035, "grad_norm": 1.645255754469987, "learning_rate": 2.622825096318449e-06, "loss": 0.4616, "step": 48010 }, { "epoch": 0.6921003704077368, "grad_norm": 1.7354173801372181, "learning_rate": 2.620612359303504e-06, "loss": 0.4558, "step": 48020 }, { "epoch": 0.69224449793177, "grad_norm": 1.7941898024052083, "learning_rate": 2.61840022454785e-06, "loss": 0.4761, "step": 48030 }, { "epoch": 0.6923886254558033, "grad_norm": 1.5348055233798577, "learning_rate": 2.6161886926114075e-06, "loss": 0.4299, "step": 48040 }, { "epoch": 0.6925327529798365, "grad_norm": 1.5634099550525704, "learning_rate": 2.6139777640539553e-06, "loss": 0.4716, "step": 48050 }, { "epoch": 0.6926768805038698, "grad_norm": 1.7386587485749685, "learning_rate": 2.611767439435109e-06, "loss": 0.4166, "step": 48060 }, { "epoch": 0.6928210080279031, "grad_norm": 1.6405556107108195, "learning_rate": 2.6095577193143344e-06, "loss": 0.4419, "step": 48070 }, { "epoch": 0.6929651355519364, "grad_norm": 1.7199163508644986, "learning_rate": 2.6073486042509497e-06, "loss": 0.4224, "step": 48080 }, { "epoch": 0.6931092630759697, "grad_norm": 1.5412153161663018, "learning_rate": 2.6051400948041127e-06, "loss": 0.4428, "step": 48090 }, { "epoch": 0.6932533906000029, "grad_norm": 1.458397724062923, "learning_rate": 2.60293219153283e-06, "loss": 0.4523, "step": 48100 }, { "epoch": 0.6933975181240362, "grad_norm": 1.263595280716506, "learning_rate": 2.6007248949959564e-06, "loss": 0.421, "step": 48110 }, { "epoch": 0.6935416456480694, "grad_norm": 1.7588246967596501, "learning_rate": 2.598518205752195e-06, "loss": 0.4628, "step": 48120 }, { "epoch": 0.6936857731721027, "grad_norm": 1.7682809728554088, "learning_rate": 2.596312124360088e-06, "loss": 0.4477, "step": 48130 }, { "epoch": 0.6938299006961359, "grad_norm": 1.6261503206291263, "learning_rate": 2.594106651378032e-06, "loss": 0.4348, "step": 48140 }, { "epoch": 0.6939740282201692, "grad_norm": 1.7660016803869183, "learning_rate": 2.5919017873642656e-06, "loss": 0.4322, "step": 48150 }, { "epoch": 0.6941181557442024, "grad_norm": 1.1963729662183895, "learning_rate": 2.5896975328768696e-06, "loss": 0.4392, "step": 48160 }, { "epoch": 0.6942622832682357, "grad_norm": 1.6550337273571338, "learning_rate": 2.5874938884737795e-06, "loss": 0.4433, "step": 48170 }, { "epoch": 0.694406410792269, "grad_norm": 1.665836373667024, "learning_rate": 2.5852908547127686e-06, "loss": 0.435, "step": 48180 }, { "epoch": 0.6945505383163023, "grad_norm": 1.4717139498813618, "learning_rate": 2.583088432151456e-06, "loss": 0.4444, "step": 48190 }, { "epoch": 0.6946946658403356, "grad_norm": 1.7463130436707066, "learning_rate": 2.5808866213473138e-06, "loss": 0.4527, "step": 48200 }, { "epoch": 0.6948387933643688, "grad_norm": 1.5477462615727673, "learning_rate": 2.5786854228576508e-06, "loss": 0.4342, "step": 48210 }, { "epoch": 0.6949829208884021, "grad_norm": 1.589866112637717, "learning_rate": 2.5764848372396224e-06, "loss": 0.4284, "step": 48220 }, { "epoch": 0.6951270484124353, "grad_norm": 1.5598374377815734, "learning_rate": 2.574284865050233e-06, "loss": 0.4412, "step": 48230 }, { "epoch": 0.6952711759364686, "grad_norm": 2.006871458306546, "learning_rate": 2.572085506846329e-06, "loss": 0.4373, "step": 48240 }, { "epoch": 0.6954153034605018, "grad_norm": 1.7404646967743296, "learning_rate": 2.569886763184601e-06, "loss": 0.4598, "step": 48250 }, { "epoch": 0.6955594309845351, "grad_norm": 1.6144287072763732, "learning_rate": 2.5676886346215825e-06, "loss": 0.4392, "step": 48260 }, { "epoch": 0.6957035585085684, "grad_norm": 1.516849625660156, "learning_rate": 2.5654911217136576e-06, "loss": 0.4484, "step": 48270 }, { "epoch": 0.6958476860326016, "grad_norm": 1.5229978051824058, "learning_rate": 2.563294225017047e-06, "loss": 0.4207, "step": 48280 }, { "epoch": 0.6959918135566349, "grad_norm": 1.6851863927683175, "learning_rate": 2.5610979450878187e-06, "loss": 0.4026, "step": 48290 }, { "epoch": 0.6961359410806682, "grad_norm": 1.6235489209628493, "learning_rate": 2.558902282481887e-06, "loss": 0.4513, "step": 48300 }, { "epoch": 0.6962800686047015, "grad_norm": 1.4409193455239988, "learning_rate": 2.556707237755006e-06, "loss": 0.4456, "step": 48310 }, { "epoch": 0.6964241961287347, "grad_norm": 1.438773678563683, "learning_rate": 2.5545128114627738e-06, "loss": 0.4117, "step": 48320 }, { "epoch": 0.696568323652768, "grad_norm": 1.7107226137035065, "learning_rate": 2.5523190041606343e-06, "loss": 0.4229, "step": 48330 }, { "epoch": 0.6967124511768013, "grad_norm": 1.6679787111974502, "learning_rate": 2.5501258164038755e-06, "loss": 0.4564, "step": 48340 }, { "epoch": 0.6968565787008345, "grad_norm": 1.6367123231064251, "learning_rate": 2.547933248747625e-06, "loss": 0.4356, "step": 48350 }, { "epoch": 0.6970007062248678, "grad_norm": 1.5828435399562026, "learning_rate": 2.5457413017468533e-06, "loss": 0.4532, "step": 48360 }, { "epoch": 0.697144833748901, "grad_norm": 1.525743756143241, "learning_rate": 2.54354997595638e-06, "loss": 0.4262, "step": 48370 }, { "epoch": 0.6972889612729343, "grad_norm": 1.4936217911085352, "learning_rate": 2.541359271930859e-06, "loss": 0.4365, "step": 48380 }, { "epoch": 0.6974330887969675, "grad_norm": 1.6328349477886202, "learning_rate": 2.5391691902247916e-06, "loss": 0.4623, "step": 48390 }, { "epoch": 0.6975772163210008, "grad_norm": 1.7759383740279757, "learning_rate": 2.5369797313925226e-06, "loss": 0.4545, "step": 48400 }, { "epoch": 0.697721343845034, "grad_norm": 1.6435536861823337, "learning_rate": 2.534790895988237e-06, "loss": 0.4332, "step": 48410 }, { "epoch": 0.6978654713690674, "grad_norm": 1.5143549003913706, "learning_rate": 2.5326026845659596e-06, "loss": 0.4377, "step": 48420 }, { "epoch": 0.6980095988931007, "grad_norm": 1.4740455079014936, "learning_rate": 2.5304150976795643e-06, "loss": 0.402, "step": 48430 }, { "epoch": 0.6981537264171339, "grad_norm": 1.387860082027487, "learning_rate": 2.5282281358827586e-06, "loss": 0.4258, "step": 48440 }, { "epoch": 0.6982978539411672, "grad_norm": 1.7129089494811653, "learning_rate": 2.5260417997291003e-06, "loss": 0.4411, "step": 48450 }, { "epoch": 0.6984419814652004, "grad_norm": 1.59534996105311, "learning_rate": 2.52385608977198e-06, "loss": 0.4336, "step": 48460 }, { "epoch": 0.6985861089892337, "grad_norm": 1.9144813327423171, "learning_rate": 2.5216710065646377e-06, "loss": 0.437, "step": 48470 }, { "epoch": 0.6987302365132669, "grad_norm": 1.4876960072629786, "learning_rate": 2.5194865506601507e-06, "loss": 0.4431, "step": 48480 }, { "epoch": 0.6988743640373002, "grad_norm": 1.4561692764002214, "learning_rate": 2.517302722611435e-06, "loss": 0.4207, "step": 48490 }, { "epoch": 0.6990184915613334, "grad_norm": 1.7197486043876575, "learning_rate": 2.515119522971255e-06, "loss": 0.4356, "step": 48500 }, { "epoch": 0.6991626190853667, "grad_norm": 1.494315880475146, "learning_rate": 2.5129369522922098e-06, "loss": 0.4498, "step": 48510 }, { "epoch": 0.6993067466094, "grad_norm": 1.831873636454799, "learning_rate": 2.510755011126739e-06, "loss": 0.4586, "step": 48520 }, { "epoch": 0.6994508741334332, "grad_norm": 1.4415179874589135, "learning_rate": 2.5085737000271304e-06, "loss": 0.4423, "step": 48530 }, { "epoch": 0.6995950016574666, "grad_norm": 1.3906951433947585, "learning_rate": 2.5063930195455032e-06, "loss": 0.4545, "step": 48540 }, { "epoch": 0.6997391291814998, "grad_norm": 1.4627680849934128, "learning_rate": 2.504212970233821e-06, "loss": 0.4486, "step": 48550 }, { "epoch": 0.6998832567055331, "grad_norm": 1.5136237222509448, "learning_rate": 2.502033552643888e-06, "loss": 0.4327, "step": 48560 }, { "epoch": 0.7000273842295663, "grad_norm": 1.4846884055905947, "learning_rate": 2.4998547673273498e-06, "loss": 0.431, "step": 48570 }, { "epoch": 0.7001715117535996, "grad_norm": 1.621389048468725, "learning_rate": 2.4976766148356894e-06, "loss": 0.4525, "step": 48580 }, { "epoch": 0.7003156392776329, "grad_norm": 1.355607912072252, "learning_rate": 2.495499095720228e-06, "loss": 0.4309, "step": 48590 }, { "epoch": 0.7004597668016661, "grad_norm": 1.7136818408896837, "learning_rate": 2.4933222105321326e-06, "loss": 0.4324, "step": 48600 }, { "epoch": 0.7006038943256994, "grad_norm": 1.7664313362402877, "learning_rate": 2.4911459598224043e-06, "loss": 0.45, "step": 48610 }, { "epoch": 0.7007480218497326, "grad_norm": 1.8063749603253811, "learning_rate": 2.488970344141883e-06, "loss": 0.4417, "step": 48620 }, { "epoch": 0.7008921493737659, "grad_norm": 1.7085504464157635, "learning_rate": 2.4867953640412545e-06, "loss": 0.4482, "step": 48630 }, { "epoch": 0.7010362768977991, "grad_norm": 1.3832932105465185, "learning_rate": 2.4846210200710376e-06, "loss": 0.4117, "step": 48640 }, { "epoch": 0.7011804044218325, "grad_norm": 1.6765638599886405, "learning_rate": 2.482447312781589e-06, "loss": 0.4438, "step": 48650 }, { "epoch": 0.7013245319458657, "grad_norm": 1.3152182803129175, "learning_rate": 2.48027424272311e-06, "loss": 0.4218, "step": 48660 }, { "epoch": 0.701468659469899, "grad_norm": 1.620814349420251, "learning_rate": 2.47810181044564e-06, "loss": 0.4169, "step": 48670 }, { "epoch": 0.7016127869939323, "grad_norm": 1.7846273634941654, "learning_rate": 2.475930016499052e-06, "loss": 0.4762, "step": 48680 }, { "epoch": 0.7017569145179655, "grad_norm": 1.4459907468844335, "learning_rate": 2.4737588614330584e-06, "loss": 0.4058, "step": 48690 }, { "epoch": 0.7019010420419988, "grad_norm": 1.7633449216325947, "learning_rate": 2.4715883457972157e-06, "loss": 0.4539, "step": 48700 }, { "epoch": 0.702045169566032, "grad_norm": 1.5483723814057948, "learning_rate": 2.4694184701409123e-06, "loss": 0.425, "step": 48710 }, { "epoch": 0.7021892970900653, "grad_norm": 1.4841941866747477, "learning_rate": 2.4672492350133753e-06, "loss": 0.4627, "step": 48720 }, { "epoch": 0.7023334246140985, "grad_norm": 1.6230512419609306, "learning_rate": 2.4650806409636745e-06, "loss": 0.4388, "step": 48730 }, { "epoch": 0.7024775521381318, "grad_norm": 1.3374835285330084, "learning_rate": 2.462912688540713e-06, "loss": 0.4246, "step": 48740 }, { "epoch": 0.702621679662165, "grad_norm": 1.7259057859490978, "learning_rate": 2.46074537829323e-06, "loss": 0.4593, "step": 48750 }, { "epoch": 0.7027658071861983, "grad_norm": 1.3601762964234931, "learning_rate": 2.458578710769809e-06, "loss": 0.4087, "step": 48760 }, { "epoch": 0.7029099347102317, "grad_norm": 1.4523590163718088, "learning_rate": 2.456412686518862e-06, "loss": 0.4366, "step": 48770 }, { "epoch": 0.7030540622342649, "grad_norm": 1.491643233409415, "learning_rate": 2.454247306088646e-06, "loss": 0.432, "step": 48780 }, { "epoch": 0.7031981897582982, "grad_norm": 1.9083578911628531, "learning_rate": 2.4520825700272527e-06, "loss": 0.4144, "step": 48790 }, { "epoch": 0.7033423172823314, "grad_norm": 1.8323139433200628, "learning_rate": 2.449918478882608e-06, "loss": 0.4545, "step": 48800 }, { "epoch": 0.7034864448063647, "grad_norm": 1.845002124169119, "learning_rate": 2.4477550332024747e-06, "loss": 0.4538, "step": 48810 }, { "epoch": 0.7036305723303979, "grad_norm": 1.6854146278345628, "learning_rate": 2.4455922335344575e-06, "loss": 0.4686, "step": 48820 }, { "epoch": 0.7037746998544312, "grad_norm": 1.586216737067374, "learning_rate": 2.443430080425992e-06, "loss": 0.4442, "step": 48830 }, { "epoch": 0.7039188273784645, "grad_norm": 1.5293790752915015, "learning_rate": 2.4412685744243508e-06, "loss": 0.4482, "step": 48840 }, { "epoch": 0.7040629549024977, "grad_norm": 1.5084589719849293, "learning_rate": 2.439107716076647e-06, "loss": 0.4555, "step": 48850 }, { "epoch": 0.704207082426531, "grad_norm": 1.540210726035104, "learning_rate": 2.4369475059298255e-06, "loss": 0.4391, "step": 48860 }, { "epoch": 0.7043512099505642, "grad_norm": 1.8013847511581582, "learning_rate": 2.434787944530668e-06, "loss": 0.4275, "step": 48870 }, { "epoch": 0.7044953374745975, "grad_norm": 1.5797440127903182, "learning_rate": 2.4326290324257896e-06, "loss": 0.4507, "step": 48880 }, { "epoch": 0.7046394649986308, "grad_norm": 1.750345090387699, "learning_rate": 2.43047077016165e-06, "loss": 0.4711, "step": 48890 }, { "epoch": 0.7047835925226641, "grad_norm": 1.5239140351029061, "learning_rate": 2.4283131582845355e-06, "loss": 0.4391, "step": 48900 }, { "epoch": 0.7049277200466973, "grad_norm": 1.6957808507926422, "learning_rate": 2.426156197340568e-06, "loss": 0.4359, "step": 48910 }, { "epoch": 0.7050718475707306, "grad_norm": 1.3942540775316588, "learning_rate": 2.4239998878757115e-06, "loss": 0.4414, "step": 48920 }, { "epoch": 0.7052159750947639, "grad_norm": 1.6863789806140133, "learning_rate": 2.421844230435759e-06, "loss": 0.472, "step": 48930 }, { "epoch": 0.7053601026187971, "grad_norm": 1.6510061735623425, "learning_rate": 2.4196892255663377e-06, "loss": 0.4375, "step": 48940 }, { "epoch": 0.7055042301428304, "grad_norm": 1.8270395239823778, "learning_rate": 2.4175348738129173e-06, "loss": 0.4522, "step": 48950 }, { "epoch": 0.7056483576668636, "grad_norm": 1.3990906896219708, "learning_rate": 2.4153811757207935e-06, "loss": 0.4462, "step": 48960 }, { "epoch": 0.7057924851908969, "grad_norm": 1.4266439290879787, "learning_rate": 2.4132281318350992e-06, "loss": 0.4352, "step": 48970 }, { "epoch": 0.7059366127149301, "grad_norm": 1.4994729989814024, "learning_rate": 2.4110757427008062e-06, "loss": 0.46, "step": 48980 }, { "epoch": 0.7060807402389634, "grad_norm": 1.8409286332187844, "learning_rate": 2.4089240088627134e-06, "loss": 0.4335, "step": 48990 }, { "epoch": 0.7062248677629966, "grad_norm": 1.455613409479865, "learning_rate": 2.406772930865461e-06, "loss": 0.4326, "step": 49000 }, { "epoch": 0.70636899528703, "grad_norm": 1.5624742914029457, "learning_rate": 2.4046225092535157e-06, "loss": 0.4435, "step": 49010 }, { "epoch": 0.7065131228110633, "grad_norm": 1.4808485889113214, "learning_rate": 2.4024727445711866e-06, "loss": 0.4274, "step": 49020 }, { "epoch": 0.7066572503350965, "grad_norm": 1.4763980972815183, "learning_rate": 2.4003236373626095e-06, "loss": 0.4326, "step": 49030 }, { "epoch": 0.7068013778591298, "grad_norm": 1.8939576101119686, "learning_rate": 2.3981751881717537e-06, "loss": 0.4475, "step": 49040 }, { "epoch": 0.706945505383163, "grad_norm": 1.635526497307048, "learning_rate": 2.396027397542429e-06, "loss": 0.4224, "step": 49050 }, { "epoch": 0.7070896329071963, "grad_norm": 1.5612305329012022, "learning_rate": 2.393880266018272e-06, "loss": 0.4545, "step": 49060 }, { "epoch": 0.7072337604312295, "grad_norm": 1.7254605253547772, "learning_rate": 2.391733794142752e-06, "loss": 0.4259, "step": 49070 }, { "epoch": 0.7073778879552628, "grad_norm": 1.6814977747212692, "learning_rate": 2.3895879824591785e-06, "loss": 0.4188, "step": 49080 }, { "epoch": 0.707522015479296, "grad_norm": 1.3440059598263387, "learning_rate": 2.387442831510686e-06, "loss": 0.4475, "step": 49090 }, { "epoch": 0.7076661430033293, "grad_norm": 1.695346363094425, "learning_rate": 2.385298341840244e-06, "loss": 0.4646, "step": 49100 }, { "epoch": 0.7078102705273626, "grad_norm": 1.7228519990100053, "learning_rate": 2.3831545139906575e-06, "loss": 0.4495, "step": 49110 }, { "epoch": 0.7079543980513959, "grad_norm": 1.6718513871532594, "learning_rate": 2.3810113485045634e-06, "loss": 0.4551, "step": 49120 }, { "epoch": 0.7080985255754292, "grad_norm": 1.967002369839841, "learning_rate": 2.3788688459244274e-06, "loss": 0.4258, "step": 49130 }, { "epoch": 0.7082426530994624, "grad_norm": 1.6493148811164478, "learning_rate": 2.3767270067925484e-06, "loss": 0.4389, "step": 49140 }, { "epoch": 0.7083867806234957, "grad_norm": 1.7164718438632054, "learning_rate": 2.374585831651062e-06, "loss": 0.4514, "step": 49150 }, { "epoch": 0.708530908147529, "grad_norm": 1.5123077304679864, "learning_rate": 2.3724453210419306e-06, "loss": 0.4205, "step": 49160 }, { "epoch": 0.7086750356715622, "grad_norm": 1.603428246770768, "learning_rate": 2.3703054755069476e-06, "loss": 0.4374, "step": 49170 }, { "epoch": 0.7088191631955955, "grad_norm": 1.6024002314053192, "learning_rate": 2.368166295587745e-06, "loss": 0.4607, "step": 49180 }, { "epoch": 0.7089632907196287, "grad_norm": 1.7006800738542862, "learning_rate": 2.3660277818257794e-06, "loss": 0.4572, "step": 49190 }, { "epoch": 0.709107418243662, "grad_norm": 1.661148266008358, "learning_rate": 2.36388993476234e-06, "loss": 0.4278, "step": 49200 }, { "epoch": 0.7092515457676952, "grad_norm": 1.7443089383930783, "learning_rate": 2.3617527549385493e-06, "loss": 0.4487, "step": 49210 }, { "epoch": 0.7093956732917285, "grad_norm": 1.7078984793551755, "learning_rate": 2.3596162428953635e-06, "loss": 0.4623, "step": 49220 }, { "epoch": 0.7095398008157617, "grad_norm": 1.5023565121808005, "learning_rate": 2.357480399173563e-06, "loss": 0.4089, "step": 49230 }, { "epoch": 0.7096839283397951, "grad_norm": 1.5743313233689702, "learning_rate": 2.3553452243137616e-06, "loss": 0.4464, "step": 49240 }, { "epoch": 0.7098280558638284, "grad_norm": 1.6754641088069235, "learning_rate": 2.3532107188564074e-06, "loss": 0.4239, "step": 49250 }, { "epoch": 0.7099721833878616, "grad_norm": 1.5323060462505493, "learning_rate": 2.351076883341775e-06, "loss": 0.4387, "step": 49260 }, { "epoch": 0.7101163109118949, "grad_norm": 1.4858292869496033, "learning_rate": 2.348943718309969e-06, "loss": 0.4502, "step": 49270 }, { "epoch": 0.7102604384359281, "grad_norm": 1.6385047728329978, "learning_rate": 2.3468112243009294e-06, "loss": 0.4143, "step": 49280 }, { "epoch": 0.7104045659599614, "grad_norm": 1.8411136942827406, "learning_rate": 2.3446794018544215e-06, "loss": 0.4241, "step": 49290 }, { "epoch": 0.7105486934839946, "grad_norm": 1.8313255802683273, "learning_rate": 2.3425482515100394e-06, "loss": 0.4245, "step": 49300 }, { "epoch": 0.7106928210080279, "grad_norm": 1.6984794560465268, "learning_rate": 2.340417773807215e-06, "loss": 0.4565, "step": 49310 }, { "epoch": 0.7108369485320611, "grad_norm": 1.7941546095349792, "learning_rate": 2.338287969285199e-06, "loss": 0.4486, "step": 49320 }, { "epoch": 0.7109810760560944, "grad_norm": 1.814852950161679, "learning_rate": 2.336158838483082e-06, "loss": 0.439, "step": 49330 }, { "epoch": 0.7111252035801277, "grad_norm": 1.8521826417929288, "learning_rate": 2.334030381939776e-06, "loss": 0.4296, "step": 49340 }, { "epoch": 0.7112693311041609, "grad_norm": 1.570625135602766, "learning_rate": 2.331902600194029e-06, "loss": 0.4175, "step": 49350 }, { "epoch": 0.7114134586281943, "grad_norm": 1.456092352980655, "learning_rate": 2.329775493784414e-06, "loss": 0.4284, "step": 49360 }, { "epoch": 0.7115575861522275, "grad_norm": 1.6728851915730931, "learning_rate": 2.3276490632493315e-06, "loss": 0.4625, "step": 49370 }, { "epoch": 0.7117017136762608, "grad_norm": 1.6424397612555954, "learning_rate": 2.3255233091270174e-06, "loss": 0.4222, "step": 49380 }, { "epoch": 0.711845841200294, "grad_norm": 1.9249025795451968, "learning_rate": 2.3233982319555304e-06, "loss": 0.4592, "step": 49390 }, { "epoch": 0.7119899687243273, "grad_norm": 1.61428907997761, "learning_rate": 2.321273832272758e-06, "loss": 0.4497, "step": 49400 }, { "epoch": 0.7121340962483605, "grad_norm": 1.587799027797903, "learning_rate": 2.3191501106164225e-06, "loss": 0.4466, "step": 49410 }, { "epoch": 0.7122782237723938, "grad_norm": 1.7408668824564495, "learning_rate": 2.3170270675240682e-06, "loss": 0.4547, "step": 49420 }, { "epoch": 0.7124223512964271, "grad_norm": 1.7475130866164788, "learning_rate": 2.3149047035330652e-06, "loss": 0.4407, "step": 49430 }, { "epoch": 0.7125664788204603, "grad_norm": 1.5119826029187784, "learning_rate": 2.3127830191806243e-06, "loss": 0.4426, "step": 49440 }, { "epoch": 0.7127106063444936, "grad_norm": 1.4360235439022218, "learning_rate": 2.310662015003773e-06, "loss": 0.4218, "step": 49450 }, { "epoch": 0.7128547338685268, "grad_norm": 1.7006219031524068, "learning_rate": 2.3085416915393666e-06, "loss": 0.4564, "step": 49460 }, { "epoch": 0.7129988613925602, "grad_norm": 1.6526836078621883, "learning_rate": 2.306422049324096e-06, "loss": 0.4566, "step": 49470 }, { "epoch": 0.7131429889165934, "grad_norm": 1.663731949011887, "learning_rate": 2.304303088894472e-06, "loss": 0.443, "step": 49480 }, { "epoch": 0.7132871164406267, "grad_norm": 1.5876060275641162, "learning_rate": 2.3021848107868376e-06, "loss": 0.4444, "step": 49490 }, { "epoch": 0.71343124396466, "grad_norm": 1.6698886703033373, "learning_rate": 2.3000672155373577e-06, "loss": 0.4226, "step": 49500 }, { "epoch": 0.7135753714886932, "grad_norm": 1.6018053785887054, "learning_rate": 2.2979503036820327e-06, "loss": 0.4238, "step": 49510 }, { "epoch": 0.7137194990127265, "grad_norm": 2.1243450142680307, "learning_rate": 2.295834075756683e-06, "loss": 0.4404, "step": 49520 }, { "epoch": 0.7138636265367597, "grad_norm": 1.4948248928472851, "learning_rate": 2.2937185322969562e-06, "loss": 0.4574, "step": 49530 }, { "epoch": 0.714007754060793, "grad_norm": 1.7253860620075225, "learning_rate": 2.291603673838333e-06, "loss": 0.429, "step": 49540 }, { "epoch": 0.7141518815848262, "grad_norm": 1.4957046366991542, "learning_rate": 2.289489500916111e-06, "loss": 0.4343, "step": 49550 }, { "epoch": 0.7142960091088595, "grad_norm": 1.5830853562576999, "learning_rate": 2.2873760140654235e-06, "loss": 0.4329, "step": 49560 }, { "epoch": 0.7144401366328927, "grad_norm": 1.5484614569349955, "learning_rate": 2.2852632138212266e-06, "loss": 0.4521, "step": 49570 }, { "epoch": 0.714584264156926, "grad_norm": 1.765775452546839, "learning_rate": 2.2831511007183014e-06, "loss": 0.4733, "step": 49580 }, { "epoch": 0.7147283916809594, "grad_norm": 1.6319793209399438, "learning_rate": 2.2810396752912534e-06, "loss": 0.4546, "step": 49590 }, { "epoch": 0.7148725192049926, "grad_norm": 1.6024419227882303, "learning_rate": 2.2789289380745207e-06, "loss": 0.4323, "step": 49600 }, { "epoch": 0.7150166467290259, "grad_norm": 1.5559824836855303, "learning_rate": 2.276818889602361e-06, "loss": 0.4369, "step": 49610 }, { "epoch": 0.7151607742530591, "grad_norm": 1.550245017495668, "learning_rate": 2.2747095304088574e-06, "loss": 0.4308, "step": 49620 }, { "epoch": 0.7153049017770924, "grad_norm": 1.5666776761682324, "learning_rate": 2.2726008610279255e-06, "loss": 0.4328, "step": 49630 }, { "epoch": 0.7154490293011256, "grad_norm": 1.607874157987465, "learning_rate": 2.270492881993299e-06, "loss": 0.4523, "step": 49640 }, { "epoch": 0.7155931568251589, "grad_norm": 1.7178986549053035, "learning_rate": 2.2683855938385384e-06, "loss": 0.418, "step": 49650 }, { "epoch": 0.7157372843491921, "grad_norm": 1.7555498599983193, "learning_rate": 2.2662789970970316e-06, "loss": 0.428, "step": 49660 }, { "epoch": 0.7158814118732254, "grad_norm": 1.345148250154459, "learning_rate": 2.2641730923019927e-06, "loss": 0.4454, "step": 49670 }, { "epoch": 0.7160255393972587, "grad_norm": 1.5846855746970856, "learning_rate": 2.262067879986457e-06, "loss": 0.4389, "step": 49680 }, { "epoch": 0.7161696669212919, "grad_norm": 1.5422880441132725, "learning_rate": 2.259963360683283e-06, "loss": 0.462, "step": 49690 }, { "epoch": 0.7163137944453252, "grad_norm": 1.7347584404742986, "learning_rate": 2.2578595349251603e-06, "loss": 0.454, "step": 49700 }, { "epoch": 0.7164579219693585, "grad_norm": 1.8993271884353136, "learning_rate": 2.2557564032445977e-06, "loss": 0.4498, "step": 49710 }, { "epoch": 0.7166020494933918, "grad_norm": 1.7696567584470522, "learning_rate": 2.2536539661739287e-06, "loss": 0.4538, "step": 49720 }, { "epoch": 0.716746177017425, "grad_norm": 1.556593899269549, "learning_rate": 2.251552224245315e-06, "loss": 0.4361, "step": 49730 }, { "epoch": 0.7168903045414583, "grad_norm": 1.9941251310647246, "learning_rate": 2.249451177990738e-06, "loss": 0.4536, "step": 49740 }, { "epoch": 0.7170344320654916, "grad_norm": 1.6179121730443518, "learning_rate": 2.2473508279420026e-06, "loss": 0.4519, "step": 49750 }, { "epoch": 0.7171785595895248, "grad_norm": 1.3914501259790928, "learning_rate": 2.245251174630743e-06, "loss": 0.4576, "step": 49760 }, { "epoch": 0.7173226871135581, "grad_norm": 1.1763476918890394, "learning_rate": 2.243152218588411e-06, "loss": 0.4399, "step": 49770 }, { "epoch": 0.7174668146375913, "grad_norm": 1.8523199828564751, "learning_rate": 2.2410539603462856e-06, "loss": 0.4392, "step": 49780 }, { "epoch": 0.7176109421616246, "grad_norm": 1.741948086375241, "learning_rate": 2.2389564004354663e-06, "loss": 0.4526, "step": 49790 }, { "epoch": 0.7177550696856578, "grad_norm": 1.595535845777749, "learning_rate": 2.236859539386881e-06, "loss": 0.4101, "step": 49800 }, { "epoch": 0.7178991972096911, "grad_norm": 1.716719711114598, "learning_rate": 2.2347633777312743e-06, "loss": 0.4525, "step": 49810 }, { "epoch": 0.7180433247337245, "grad_norm": 1.5566886213182398, "learning_rate": 2.232667915999216e-06, "loss": 0.436, "step": 49820 }, { "epoch": 0.7181874522577577, "grad_norm": 1.6484940134639734, "learning_rate": 2.2305731547211025e-06, "loss": 0.4399, "step": 49830 }, { "epoch": 0.718331579781791, "grad_norm": 1.733488140905345, "learning_rate": 2.2284790944271474e-06, "loss": 0.4303, "step": 49840 }, { "epoch": 0.7184757073058242, "grad_norm": 1.5237397879478893, "learning_rate": 2.2263857356473886e-06, "loss": 0.4284, "step": 49850 }, { "epoch": 0.7186198348298575, "grad_norm": 1.7569088770079844, "learning_rate": 2.22429307891169e-06, "loss": 0.4315, "step": 49860 }, { "epoch": 0.7187639623538907, "grad_norm": 1.5993123678704648, "learning_rate": 2.2222011247497338e-06, "loss": 0.4424, "step": 49870 }, { "epoch": 0.718908089877924, "grad_norm": 1.8300306093052303, "learning_rate": 2.2201098736910233e-06, "loss": 0.4202, "step": 49880 }, { "epoch": 0.7190522174019572, "grad_norm": 1.7186795843447398, "learning_rate": 2.218019326264887e-06, "loss": 0.4545, "step": 49890 }, { "epoch": 0.7191963449259905, "grad_norm": 1.7571067130243536, "learning_rate": 2.2159294830004775e-06, "loss": 0.4451, "step": 49900 }, { "epoch": 0.7193404724500237, "grad_norm": 1.4971457388993643, "learning_rate": 2.2138403444267637e-06, "loss": 0.4439, "step": 49910 }, { "epoch": 0.719484599974057, "grad_norm": 1.8818121225297668, "learning_rate": 2.211751911072536e-06, "loss": 0.4424, "step": 49920 }, { "epoch": 0.7196287274980903, "grad_norm": 1.8340486092614976, "learning_rate": 2.2096641834664136e-06, "loss": 0.4514, "step": 49930 }, { "epoch": 0.7197728550221236, "grad_norm": 1.5639649598231322, "learning_rate": 2.207577162136829e-06, "loss": 0.454, "step": 49940 }, { "epoch": 0.7199169825461569, "grad_norm": 1.6379533916207523, "learning_rate": 2.2054908476120383e-06, "loss": 0.4206, "step": 49950 }, { "epoch": 0.7200611100701901, "grad_norm": 1.6598257225490742, "learning_rate": 2.2034052404201227e-06, "loss": 0.4399, "step": 49960 }, { "epoch": 0.7202052375942234, "grad_norm": 1.420851697488361, "learning_rate": 2.20132034108898e-06, "loss": 0.4343, "step": 49970 }, { "epoch": 0.7203493651182566, "grad_norm": 1.6399317066359276, "learning_rate": 2.199236150146327e-06, "loss": 0.4437, "step": 49980 }, { "epoch": 0.7204934926422899, "grad_norm": 1.9811456332349264, "learning_rate": 2.197152668119707e-06, "loss": 0.458, "step": 49990 }, { "epoch": 0.7206376201663232, "grad_norm": 1.5849225976647254, "learning_rate": 2.1950698955364824e-06, "loss": 0.4375, "step": 50000 }, { "epoch": 0.7207817476903564, "grad_norm": 1.5398401791534317, "learning_rate": 2.192987832923833e-06, "loss": 0.4222, "step": 50010 }, { "epoch": 0.7209258752143897, "grad_norm": 1.2691218075386799, "learning_rate": 2.190906480808759e-06, "loss": 0.4188, "step": 50020 }, { "epoch": 0.7210700027384229, "grad_norm": 1.5325381669747482, "learning_rate": 2.1888258397180857e-06, "loss": 0.4215, "step": 50030 }, { "epoch": 0.7212141302624562, "grad_norm": 1.4480847780363493, "learning_rate": 2.1867459101784534e-06, "loss": 0.4447, "step": 50040 }, { "epoch": 0.7213582577864894, "grad_norm": 1.7516135730988742, "learning_rate": 2.1846666927163225e-06, "loss": 0.4373, "step": 50050 }, { "epoch": 0.7215023853105228, "grad_norm": 1.8283064570499263, "learning_rate": 2.1825881878579776e-06, "loss": 0.4224, "step": 50060 }, { "epoch": 0.721646512834556, "grad_norm": 1.5988562215267639, "learning_rate": 2.1805103961295183e-06, "loss": 0.4436, "step": 50070 }, { "epoch": 0.7217906403585893, "grad_norm": 2.0620897430575096, "learning_rate": 2.1784333180568636e-06, "loss": 0.4419, "step": 50080 }, { "epoch": 0.7219347678826226, "grad_norm": 1.5177805538909204, "learning_rate": 2.1763569541657575e-06, "loss": 0.4549, "step": 50090 }, { "epoch": 0.7220788954066558, "grad_norm": 1.5896200542053571, "learning_rate": 2.1742813049817557e-06, "loss": 0.4493, "step": 50100 }, { "epoch": 0.7222230229306891, "grad_norm": 1.7879832573004346, "learning_rate": 2.1722063710302383e-06, "loss": 0.4215, "step": 50110 }, { "epoch": 0.7223671504547223, "grad_norm": 1.692925186830242, "learning_rate": 2.1701321528364043e-06, "loss": 0.4272, "step": 50120 }, { "epoch": 0.7225112779787556, "grad_norm": 1.5257146009671947, "learning_rate": 2.1680586509252693e-06, "loss": 0.4472, "step": 50130 }, { "epoch": 0.7226554055027888, "grad_norm": 1.622936414486525, "learning_rate": 2.1659858658216664e-06, "loss": 0.4215, "step": 50140 }, { "epoch": 0.7227995330268221, "grad_norm": 1.7757345747896336, "learning_rate": 2.163913798050249e-06, "loss": 0.4541, "step": 50150 }, { "epoch": 0.7229436605508553, "grad_norm": 1.6303474177779023, "learning_rate": 2.1618424481354914e-06, "loss": 0.4603, "step": 50160 }, { "epoch": 0.7230877880748887, "grad_norm": 1.4238032985338505, "learning_rate": 2.1597718166016834e-06, "loss": 0.409, "step": 50170 }, { "epoch": 0.723231915598922, "grad_norm": 1.3585970033364192, "learning_rate": 2.1577019039729303e-06, "loss": 0.4547, "step": 50180 }, { "epoch": 0.7233760431229552, "grad_norm": 1.5969558589214081, "learning_rate": 2.1556327107731624e-06, "loss": 0.4314, "step": 50190 }, { "epoch": 0.7235201706469885, "grad_norm": 1.749598811713255, "learning_rate": 2.153564237526123e-06, "loss": 0.4496, "step": 50200 }, { "epoch": 0.7236642981710217, "grad_norm": 1.7126137487079884, "learning_rate": 2.15149648475537e-06, "loss": 0.4494, "step": 50210 }, { "epoch": 0.723808425695055, "grad_norm": 1.6220384609242893, "learning_rate": 2.1494294529842898e-06, "loss": 0.4218, "step": 50220 }, { "epoch": 0.7239525532190882, "grad_norm": 1.5639054564778676, "learning_rate": 2.1473631427360774e-06, "loss": 0.4364, "step": 50230 }, { "epoch": 0.7240966807431215, "grad_norm": 1.5671989467260543, "learning_rate": 2.1452975545337445e-06, "loss": 0.4502, "step": 50240 }, { "epoch": 0.7242408082671548, "grad_norm": 1.6752702019725674, "learning_rate": 2.1432326889001276e-06, "loss": 0.4712, "step": 50250 }, { "epoch": 0.724384935791188, "grad_norm": 1.4430017441588625, "learning_rate": 2.141168546357873e-06, "loss": 0.4238, "step": 50260 }, { "epoch": 0.7245290633152213, "grad_norm": 1.692518337576632, "learning_rate": 2.139105127429445e-06, "loss": 0.4344, "step": 50270 }, { "epoch": 0.7246731908392545, "grad_norm": 1.5454828393248916, "learning_rate": 2.1370424326371304e-06, "loss": 0.4248, "step": 50280 }, { "epoch": 0.7248173183632879, "grad_norm": 1.6547475976461439, "learning_rate": 2.134980462503026e-06, "loss": 0.4291, "step": 50290 }, { "epoch": 0.7249614458873211, "grad_norm": 1.733574828669323, "learning_rate": 2.1329192175490473e-06, "loss": 0.4305, "step": 50300 }, { "epoch": 0.7251055734113544, "grad_norm": 1.7706936340563282, "learning_rate": 2.1308586982969296e-06, "loss": 0.43, "step": 50310 }, { "epoch": 0.7252497009353877, "grad_norm": 1.6343933700027533, "learning_rate": 2.1287989052682185e-06, "loss": 0.4519, "step": 50320 }, { "epoch": 0.7253938284594209, "grad_norm": 1.7259170632064382, "learning_rate": 2.1267398389842826e-06, "loss": 0.3806, "step": 50330 }, { "epoch": 0.7255379559834542, "grad_norm": 1.5201427677171309, "learning_rate": 2.124681499966299e-06, "loss": 0.4614, "step": 50340 }, { "epoch": 0.7256820835074874, "grad_norm": 2.149980927562327, "learning_rate": 2.1226238887352683e-06, "loss": 0.4505, "step": 50350 }, { "epoch": 0.7258262110315207, "grad_norm": 1.4773069817736788, "learning_rate": 2.1205670058120028e-06, "loss": 0.4358, "step": 50360 }, { "epoch": 0.7259703385555539, "grad_norm": 1.4986441854360617, "learning_rate": 2.118510851717127e-06, "loss": 0.4079, "step": 50370 }, { "epoch": 0.7261144660795872, "grad_norm": 1.6888198187989356, "learning_rate": 2.1164554269710897e-06, "loss": 0.4208, "step": 50380 }, { "epoch": 0.7262585936036204, "grad_norm": 1.5073684849553735, "learning_rate": 2.1144007320941483e-06, "loss": 0.4301, "step": 50390 }, { "epoch": 0.7264027211276537, "grad_norm": 1.9922558249517701, "learning_rate": 2.1123467676063757e-06, "loss": 0.4194, "step": 50400 }, { "epoch": 0.7265468486516871, "grad_norm": 1.2965328112677768, "learning_rate": 2.110293534027665e-06, "loss": 0.4411, "step": 50410 }, { "epoch": 0.7266909761757203, "grad_norm": 1.581319566729781, "learning_rate": 2.108241031877719e-06, "loss": 0.4684, "step": 50420 }, { "epoch": 0.7268351036997536, "grad_norm": 1.5512515075198094, "learning_rate": 2.106189261676056e-06, "loss": 0.4288, "step": 50430 }, { "epoch": 0.7269792312237868, "grad_norm": 1.4401365097423193, "learning_rate": 2.1041382239420118e-06, "loss": 0.44, "step": 50440 }, { "epoch": 0.7271233587478201, "grad_norm": 1.7085566924386035, "learning_rate": 2.1020879191947368e-06, "loss": 0.4473, "step": 50450 }, { "epoch": 0.7272674862718533, "grad_norm": 1.4798860406932879, "learning_rate": 2.1000383479531934e-06, "loss": 0.4209, "step": 50460 }, { "epoch": 0.7274116137958866, "grad_norm": 1.4083569384155763, "learning_rate": 2.0979895107361563e-06, "loss": 0.4182, "step": 50470 }, { "epoch": 0.7275557413199198, "grad_norm": 1.6627936371302272, "learning_rate": 2.0959414080622226e-06, "loss": 0.4478, "step": 50480 }, { "epoch": 0.7276998688439531, "grad_norm": 1.4108532564448466, "learning_rate": 2.093894040449795e-06, "loss": 0.4338, "step": 50490 }, { "epoch": 0.7278439963679864, "grad_norm": 1.7361979313129947, "learning_rate": 2.091847408417093e-06, "loss": 0.435, "step": 50500 }, { "epoch": 0.7279881238920196, "grad_norm": 1.6483433647665822, "learning_rate": 2.089801512482153e-06, "loss": 0.4127, "step": 50510 }, { "epoch": 0.728132251416053, "grad_norm": 1.4874950413697463, "learning_rate": 2.087756353162821e-06, "loss": 0.4589, "step": 50520 }, { "epoch": 0.7282763789400862, "grad_norm": 1.6658934722159722, "learning_rate": 2.0857119309767565e-06, "loss": 0.4415, "step": 50530 }, { "epoch": 0.7284205064641195, "grad_norm": 1.5248676897240232, "learning_rate": 2.083668246441435e-06, "loss": 0.4318, "step": 50540 }, { "epoch": 0.7285646339881527, "grad_norm": 1.8446450641059926, "learning_rate": 2.0816253000741467e-06, "loss": 0.4194, "step": 50550 }, { "epoch": 0.728708761512186, "grad_norm": 1.546882101556937, "learning_rate": 2.0795830923919904e-06, "loss": 0.4416, "step": 50560 }, { "epoch": 0.7288528890362193, "grad_norm": 1.4678761086345262, "learning_rate": 2.077541623911878e-06, "loss": 0.4247, "step": 50570 }, { "epoch": 0.7289970165602525, "grad_norm": 1.699583344597938, "learning_rate": 2.07550089515054e-06, "loss": 0.4569, "step": 50580 }, { "epoch": 0.7291411440842858, "grad_norm": 2.051410771975666, "learning_rate": 2.0734609066245144e-06, "loss": 0.4335, "step": 50590 }, { "epoch": 0.729285271608319, "grad_norm": 1.9236822456032177, "learning_rate": 2.071421658850152e-06, "loss": 0.4538, "step": 50600 }, { "epoch": 0.7294293991323523, "grad_norm": 1.38153480070696, "learning_rate": 2.0693831523436196e-06, "loss": 0.4474, "step": 50610 }, { "epoch": 0.7295735266563855, "grad_norm": 1.3733386976618178, "learning_rate": 2.067345387620894e-06, "loss": 0.4109, "step": 50620 }, { "epoch": 0.7297176541804188, "grad_norm": 1.7304124539024053, "learning_rate": 2.0653083651977624e-06, "loss": 0.4459, "step": 50630 }, { "epoch": 0.7298617817044522, "grad_norm": 2.0086796590042453, "learning_rate": 2.0632720855898293e-06, "loss": 0.4218, "step": 50640 }, { "epoch": 0.7300059092284854, "grad_norm": 1.4562849008264163, "learning_rate": 2.0612365493125045e-06, "loss": 0.4474, "step": 50650 }, { "epoch": 0.7301500367525187, "grad_norm": 1.827562634890574, "learning_rate": 2.0592017568810173e-06, "loss": 0.4398, "step": 50660 }, { "epoch": 0.7302941642765519, "grad_norm": 1.430540392277313, "learning_rate": 2.0571677088104015e-06, "loss": 0.4422, "step": 50670 }, { "epoch": 0.7304382918005852, "grad_norm": 1.489106097824292, "learning_rate": 2.055134405615508e-06, "loss": 0.4455, "step": 50680 }, { "epoch": 0.7305824193246184, "grad_norm": 1.5111647705206586, "learning_rate": 2.0531018478109967e-06, "loss": 0.4249, "step": 50690 }, { "epoch": 0.7307265468486517, "grad_norm": 1.6429544684715165, "learning_rate": 2.0510700359113354e-06, "loss": 0.4349, "step": 50700 }, { "epoch": 0.7308706743726849, "grad_norm": 1.825034911297173, "learning_rate": 2.049038970430812e-06, "loss": 0.4127, "step": 50710 }, { "epoch": 0.7310148018967182, "grad_norm": 1.8416129596843138, "learning_rate": 2.047008651883517e-06, "loss": 0.4625, "step": 50720 }, { "epoch": 0.7311589294207514, "grad_norm": 1.6032911237319658, "learning_rate": 2.0449790807833537e-06, "loss": 0.434, "step": 50730 }, { "epoch": 0.7313030569447847, "grad_norm": 1.615008845894603, "learning_rate": 2.0429502576440407e-06, "loss": 0.4059, "step": 50740 }, { "epoch": 0.731447184468818, "grad_norm": 1.7212281805572964, "learning_rate": 2.0409221829791028e-06, "loss": 0.4402, "step": 50750 }, { "epoch": 0.7315913119928513, "grad_norm": 1.5708443195979263, "learning_rate": 2.0388948573018744e-06, "loss": 0.4233, "step": 50760 }, { "epoch": 0.7317354395168846, "grad_norm": 1.7343381756681135, "learning_rate": 2.0368682811255053e-06, "loss": 0.4282, "step": 50770 }, { "epoch": 0.7318795670409178, "grad_norm": 1.5868777100953309, "learning_rate": 2.0348424549629537e-06, "loss": 0.4531, "step": 50780 }, { "epoch": 0.7320236945649511, "grad_norm": 1.4058698402401573, "learning_rate": 2.032817379326985e-06, "loss": 0.4257, "step": 50790 }, { "epoch": 0.7321678220889843, "grad_norm": 1.3912793893940347, "learning_rate": 2.0307930547301755e-06, "loss": 0.4399, "step": 50800 }, { "epoch": 0.7323119496130176, "grad_norm": 1.9324572109187703, "learning_rate": 2.0287694816849165e-06, "loss": 0.4919, "step": 50810 }, { "epoch": 0.7324560771370509, "grad_norm": 1.3542060949823098, "learning_rate": 2.0267466607034027e-06, "loss": 0.4128, "step": 50820 }, { "epoch": 0.7326002046610841, "grad_norm": 1.5815969871916666, "learning_rate": 2.0247245922976395e-06, "loss": 0.4445, "step": 50830 }, { "epoch": 0.7327443321851174, "grad_norm": 1.6582967618179216, "learning_rate": 2.022703276979447e-06, "loss": 0.463, "step": 50840 }, { "epoch": 0.7328884597091506, "grad_norm": 1.580273227871161, "learning_rate": 2.0206827152604487e-06, "loss": 0.4248, "step": 50850 }, { "epoch": 0.7330325872331839, "grad_norm": 1.6413634148852685, "learning_rate": 2.018662907652078e-06, "loss": 0.4424, "step": 50860 }, { "epoch": 0.7331767147572172, "grad_norm": 1.9191307826733648, "learning_rate": 2.0166438546655828e-06, "loss": 0.4429, "step": 50870 }, { "epoch": 0.7333208422812505, "grad_norm": 1.4388531059878693, "learning_rate": 2.014625556812012e-06, "loss": 0.425, "step": 50880 }, { "epoch": 0.7334649698052838, "grad_norm": 1.6265664347455766, "learning_rate": 2.01260801460223e-06, "loss": 0.4705, "step": 50890 }, { "epoch": 0.733609097329317, "grad_norm": 1.5329429858595398, "learning_rate": 2.0105912285469086e-06, "loss": 0.4192, "step": 50900 }, { "epoch": 0.7337532248533503, "grad_norm": 1.781210953213935, "learning_rate": 2.0085751991565267e-06, "loss": 0.4642, "step": 50910 }, { "epoch": 0.7338973523773835, "grad_norm": 1.712185231579369, "learning_rate": 2.006559926941369e-06, "loss": 0.4611, "step": 50920 }, { "epoch": 0.7340414799014168, "grad_norm": 1.4503149694889725, "learning_rate": 2.004545412411535e-06, "loss": 0.4344, "step": 50930 }, { "epoch": 0.73418560742545, "grad_norm": 1.6239103710171985, "learning_rate": 2.0025316560769283e-06, "loss": 0.4312, "step": 50940 }, { "epoch": 0.7343297349494833, "grad_norm": 1.8349662543615677, "learning_rate": 2.0005186584472587e-06, "loss": 0.4736, "step": 50950 }, { "epoch": 0.7344738624735165, "grad_norm": 1.6444636483652482, "learning_rate": 1.9985064200320505e-06, "loss": 0.4578, "step": 50960 }, { "epoch": 0.7346179899975498, "grad_norm": 1.727736302855119, "learning_rate": 1.9964949413406297e-06, "loss": 0.4204, "step": 50970 }, { "epoch": 0.734762117521583, "grad_norm": 1.4645186767284601, "learning_rate": 1.994484222882134e-06, "loss": 0.4339, "step": 50980 }, { "epoch": 0.7349062450456164, "grad_norm": 1.4844242646408374, "learning_rate": 1.9924742651655006e-06, "loss": 0.4458, "step": 50990 }, { "epoch": 0.7350503725696497, "grad_norm": 1.7210248545578615, "learning_rate": 1.9904650686994897e-06, "loss": 0.4566, "step": 51000 }, { "epoch": 0.7351945000936829, "grad_norm": 1.6018938426416678, "learning_rate": 1.9884566339926547e-06, "loss": 0.414, "step": 51010 }, { "epoch": 0.7353386276177162, "grad_norm": 1.7011138419641716, "learning_rate": 1.9864489615533605e-06, "loss": 0.4452, "step": 51020 }, { "epoch": 0.7354827551417494, "grad_norm": 1.4530132261197688, "learning_rate": 1.984442051889782e-06, "loss": 0.4515, "step": 51030 }, { "epoch": 0.7356268826657827, "grad_norm": 1.520330979137856, "learning_rate": 1.982435905509898e-06, "loss": 0.4287, "step": 51040 }, { "epoch": 0.7357710101898159, "grad_norm": 1.5552986826356054, "learning_rate": 1.9804305229214915e-06, "loss": 0.4142, "step": 51050 }, { "epoch": 0.7359151377138492, "grad_norm": 1.4672746683823665, "learning_rate": 1.97842590463216e-06, "loss": 0.4399, "step": 51060 }, { "epoch": 0.7360592652378825, "grad_norm": 1.619541776461693, "learning_rate": 1.976422051149301e-06, "loss": 0.4366, "step": 51070 }, { "epoch": 0.7362033927619157, "grad_norm": 1.4199704135446662, "learning_rate": 1.9744189629801184e-06, "loss": 0.4507, "step": 51080 }, { "epoch": 0.736347520285949, "grad_norm": 1.3540586201486347, "learning_rate": 1.972416640631629e-06, "loss": 0.4434, "step": 51090 }, { "epoch": 0.7364916478099822, "grad_norm": 1.6923558052633063, "learning_rate": 1.970415084610645e-06, "loss": 0.4487, "step": 51100 }, { "epoch": 0.7366357753340156, "grad_norm": 1.5534870178987816, "learning_rate": 1.968414295423797e-06, "loss": 0.4677, "step": 51110 }, { "epoch": 0.7367799028580488, "grad_norm": 1.6420834318516002, "learning_rate": 1.9664142735775104e-06, "loss": 0.4557, "step": 51120 }, { "epoch": 0.7369240303820821, "grad_norm": 1.529652994330532, "learning_rate": 1.9644150195780248e-06, "loss": 0.4344, "step": 51130 }, { "epoch": 0.7370681579061154, "grad_norm": 1.5794734083782185, "learning_rate": 1.96241653393138e-06, "loss": 0.4241, "step": 51140 }, { "epoch": 0.7372122854301486, "grad_norm": 1.8439818329603337, "learning_rate": 1.9604188171434224e-06, "loss": 0.4289, "step": 51150 }, { "epoch": 0.7373564129541819, "grad_norm": 1.4837033574413456, "learning_rate": 1.958421869719807e-06, "loss": 0.4145, "step": 51160 }, { "epoch": 0.7375005404782151, "grad_norm": 1.6330012289597877, "learning_rate": 1.95642569216599e-06, "loss": 0.4411, "step": 51170 }, { "epoch": 0.7376446680022484, "grad_norm": 1.5064657920000555, "learning_rate": 1.954430284987233e-06, "loss": 0.4667, "step": 51180 }, { "epoch": 0.7377887955262816, "grad_norm": 1.5116960392686707, "learning_rate": 1.9524356486886076e-06, "loss": 0.4399, "step": 51190 }, { "epoch": 0.7379329230503149, "grad_norm": 1.7167414645581487, "learning_rate": 1.9504417837749844e-06, "loss": 0.434, "step": 51200 }, { "epoch": 0.7380770505743481, "grad_norm": 1.6674876996033343, "learning_rate": 1.9484486907510405e-06, "loss": 0.4341, "step": 51210 }, { "epoch": 0.7382211780983815, "grad_norm": 1.76759678871903, "learning_rate": 1.946456370121258e-06, "loss": 0.4394, "step": 51220 }, { "epoch": 0.7383653056224148, "grad_norm": 1.501579523303625, "learning_rate": 1.944464822389926e-06, "loss": 0.4165, "step": 51230 }, { "epoch": 0.738509433146448, "grad_norm": 1.4741588189097359, "learning_rate": 1.942474048061135e-06, "loss": 0.4303, "step": 51240 }, { "epoch": 0.7386535606704813, "grad_norm": 1.761075093054551, "learning_rate": 1.940484047638777e-06, "loss": 0.452, "step": 51250 }, { "epoch": 0.7387976881945145, "grad_norm": 1.6257532452180383, "learning_rate": 1.9384948216265565e-06, "loss": 0.4423, "step": 51260 }, { "epoch": 0.7389418157185478, "grad_norm": 1.5745214735503181, "learning_rate": 1.9365063705279735e-06, "loss": 0.4268, "step": 51270 }, { "epoch": 0.739085943242581, "grad_norm": 1.6879540058772287, "learning_rate": 1.9345186948463347e-06, "loss": 0.4268, "step": 51280 }, { "epoch": 0.7392300707666143, "grad_norm": 1.8561285388542699, "learning_rate": 1.9325317950847545e-06, "loss": 0.4423, "step": 51290 }, { "epoch": 0.7393741982906475, "grad_norm": 1.786031219107292, "learning_rate": 1.9305456717461456e-06, "loss": 0.459, "step": 51300 }, { "epoch": 0.7395183258146808, "grad_norm": 1.4398939033490885, "learning_rate": 1.928560325333224e-06, "loss": 0.4285, "step": 51310 }, { "epoch": 0.739662453338714, "grad_norm": 1.6549246814907947, "learning_rate": 1.926575756348514e-06, "loss": 0.4486, "step": 51320 }, { "epoch": 0.7398065808627473, "grad_norm": 1.3833771993774582, "learning_rate": 1.9245919652943407e-06, "loss": 0.4231, "step": 51330 }, { "epoch": 0.7399507083867807, "grad_norm": 1.7547816014857953, "learning_rate": 1.9226089526728303e-06, "loss": 0.4351, "step": 51340 }, { "epoch": 0.7400948359108139, "grad_norm": 1.7540720629448665, "learning_rate": 1.920626718985913e-06, "loss": 0.4395, "step": 51350 }, { "epoch": 0.7402389634348472, "grad_norm": 1.3507305990780827, "learning_rate": 1.918645264735324e-06, "loss": 0.4111, "step": 51360 }, { "epoch": 0.7403830909588804, "grad_norm": 1.5988607638106764, "learning_rate": 1.9166645904225994e-06, "loss": 0.4409, "step": 51370 }, { "epoch": 0.7405272184829137, "grad_norm": 1.6306829416752557, "learning_rate": 1.914684696549076e-06, "loss": 0.4319, "step": 51380 }, { "epoch": 0.740671346006947, "grad_norm": 1.490876937882348, "learning_rate": 1.912705583615898e-06, "loss": 0.4405, "step": 51390 }, { "epoch": 0.7408154735309802, "grad_norm": 1.7184644743548145, "learning_rate": 1.9107272521240085e-06, "loss": 0.4313, "step": 51400 }, { "epoch": 0.7409596010550135, "grad_norm": 1.500091548346066, "learning_rate": 1.9087497025741496e-06, "loss": 0.4301, "step": 51410 }, { "epoch": 0.7411037285790467, "grad_norm": 1.7213351010067697, "learning_rate": 1.9067729354668747e-06, "loss": 0.4268, "step": 51420 }, { "epoch": 0.74124785610308, "grad_norm": 1.4515578424173234, "learning_rate": 1.9047969513025295e-06, "loss": 0.4145, "step": 51430 }, { "epoch": 0.7413919836271132, "grad_norm": 1.9448860118020008, "learning_rate": 1.902821750581269e-06, "loss": 0.4525, "step": 51440 }, { "epoch": 0.7415361111511465, "grad_norm": 1.636767536140509, "learning_rate": 1.900847333803043e-06, "loss": 0.4598, "step": 51450 }, { "epoch": 0.7416802386751798, "grad_norm": 1.69282982239877, "learning_rate": 1.8988737014676106e-06, "loss": 0.4375, "step": 51460 }, { "epoch": 0.7418243661992131, "grad_norm": 1.429287249882354, "learning_rate": 1.8969008540745265e-06, "loss": 0.4498, "step": 51470 }, { "epoch": 0.7419684937232464, "grad_norm": 2.0420638730576908, "learning_rate": 1.8949287921231458e-06, "loss": 0.4382, "step": 51480 }, { "epoch": 0.7421126212472796, "grad_norm": 1.4816230519333777, "learning_rate": 1.8929575161126318e-06, "loss": 0.4532, "step": 51490 }, { "epoch": 0.7422567487713129, "grad_norm": 1.800375552168596, "learning_rate": 1.8909870265419422e-06, "loss": 0.4573, "step": 51500 }, { "epoch": 0.7424008762953461, "grad_norm": 1.520954336021461, "learning_rate": 1.889017323909837e-06, "loss": 0.421, "step": 51510 }, { "epoch": 0.7425450038193794, "grad_norm": 1.2463609497637396, "learning_rate": 1.8870484087148805e-06, "loss": 0.4, "step": 51520 }, { "epoch": 0.7426891313434126, "grad_norm": 1.7550860806772217, "learning_rate": 1.8850802814554336e-06, "loss": 0.4743, "step": 51530 }, { "epoch": 0.7428332588674459, "grad_norm": 1.3896852157067083, "learning_rate": 1.8831129426296567e-06, "loss": 0.4283, "step": 51540 }, { "epoch": 0.7429773863914791, "grad_norm": 1.7681291943544661, "learning_rate": 1.8811463927355194e-06, "loss": 0.4189, "step": 51550 }, { "epoch": 0.7431215139155124, "grad_norm": 1.6189271418173767, "learning_rate": 1.8791806322707818e-06, "loss": 0.445, "step": 51560 }, { "epoch": 0.7432656414395457, "grad_norm": 1.5155394745236919, "learning_rate": 1.8772156617330072e-06, "loss": 0.4418, "step": 51570 }, { "epoch": 0.743409768963579, "grad_norm": 1.5842733162351093, "learning_rate": 1.8752514816195615e-06, "loss": 0.4533, "step": 51580 }, { "epoch": 0.7435538964876123, "grad_norm": 1.5710765153909954, "learning_rate": 1.8732880924276086e-06, "loss": 0.4476, "step": 51590 }, { "epoch": 0.7436980240116455, "grad_norm": 1.5876936189794815, "learning_rate": 1.871325494654111e-06, "loss": 0.4387, "step": 51600 }, { "epoch": 0.7438421515356788, "grad_norm": 1.7582536638682875, "learning_rate": 1.8693636887958311e-06, "loss": 0.4317, "step": 51610 }, { "epoch": 0.743986279059712, "grad_norm": 1.6888146638950978, "learning_rate": 1.8674026753493346e-06, "loss": 0.4374, "step": 51620 }, { "epoch": 0.7441304065837453, "grad_norm": 1.5215835773250646, "learning_rate": 1.8654424548109828e-06, "loss": 0.4045, "step": 51630 }, { "epoch": 0.7442745341077786, "grad_norm": 1.6558778629683364, "learning_rate": 1.8634830276769356e-06, "loss": 0.4725, "step": 51640 }, { "epoch": 0.7444186616318118, "grad_norm": 1.7310048813439218, "learning_rate": 1.8615243944431554e-06, "loss": 0.427, "step": 51650 }, { "epoch": 0.7445627891558451, "grad_norm": 1.7541658535150289, "learning_rate": 1.8595665556054037e-06, "loss": 0.434, "step": 51660 }, { "epoch": 0.7447069166798783, "grad_norm": 1.5448193678660183, "learning_rate": 1.8576095116592363e-06, "loss": 0.3956, "step": 51670 }, { "epoch": 0.7448510442039116, "grad_norm": 1.6468585288923467, "learning_rate": 1.8556532631000145e-06, "loss": 0.4182, "step": 51680 }, { "epoch": 0.7449951717279449, "grad_norm": 1.7433439921353822, "learning_rate": 1.853697810422892e-06, "loss": 0.4367, "step": 51690 }, { "epoch": 0.7451392992519782, "grad_norm": 1.6644538507891087, "learning_rate": 1.8517431541228232e-06, "loss": 0.4354, "step": 51700 }, { "epoch": 0.7452834267760114, "grad_norm": 1.3135233886169675, "learning_rate": 1.8497892946945638e-06, "loss": 0.4276, "step": 51710 }, { "epoch": 0.7454275543000447, "grad_norm": 1.8499125304518809, "learning_rate": 1.8478362326326644e-06, "loss": 0.4529, "step": 51720 }, { "epoch": 0.745571681824078, "grad_norm": 1.682541460482283, "learning_rate": 1.8458839684314723e-06, "loss": 0.418, "step": 51730 }, { "epoch": 0.7457158093481112, "grad_norm": 1.7325017933427873, "learning_rate": 1.8439325025851395e-06, "loss": 0.448, "step": 51740 }, { "epoch": 0.7458599368721445, "grad_norm": 1.6252311562289325, "learning_rate": 1.8419818355876096e-06, "loss": 0.4541, "step": 51750 }, { "epoch": 0.7460040643961777, "grad_norm": 1.4657485049779935, "learning_rate": 1.8400319679326245e-06, "loss": 0.4393, "step": 51760 }, { "epoch": 0.746148191920211, "grad_norm": 1.5046061819700873, "learning_rate": 1.8380829001137273e-06, "loss": 0.4219, "step": 51770 }, { "epoch": 0.7462923194442442, "grad_norm": 1.446975447980143, "learning_rate": 1.836134632624259e-06, "loss": 0.4414, "step": 51780 }, { "epoch": 0.7464364469682775, "grad_norm": 2.237419576896236, "learning_rate": 1.834187165957353e-06, "loss": 0.4384, "step": 51790 }, { "epoch": 0.7465805744923107, "grad_norm": 1.6959073663592994, "learning_rate": 1.832240500605942e-06, "loss": 0.4494, "step": 51800 }, { "epoch": 0.7467247020163441, "grad_norm": 1.6853216624594414, "learning_rate": 1.8302946370627594e-06, "loss": 0.4207, "step": 51810 }, { "epoch": 0.7468688295403774, "grad_norm": 1.651593958785194, "learning_rate": 1.8283495758203318e-06, "loss": 0.4307, "step": 51820 }, { "epoch": 0.7470129570644106, "grad_norm": 1.646779414381745, "learning_rate": 1.8264053173709817e-06, "loss": 0.4274, "step": 51830 }, { "epoch": 0.7471570845884439, "grad_norm": 1.5580214276674362, "learning_rate": 1.8244618622068344e-06, "loss": 0.4156, "step": 51840 }, { "epoch": 0.7473012121124771, "grad_norm": 1.5283483156873654, "learning_rate": 1.8225192108198065e-06, "loss": 0.4394, "step": 51850 }, { "epoch": 0.7474453396365104, "grad_norm": 1.4921903815965911, "learning_rate": 1.82057736370161e-06, "loss": 0.4033, "step": 51860 }, { "epoch": 0.7475894671605436, "grad_norm": 1.6340878791766804, "learning_rate": 1.8186363213437586e-06, "loss": 0.431, "step": 51870 }, { "epoch": 0.7477335946845769, "grad_norm": 1.6870387246949696, "learning_rate": 1.8166960842375609e-06, "loss": 0.4615, "step": 51880 }, { "epoch": 0.7478777222086102, "grad_norm": 1.7053211422166563, "learning_rate": 1.814756652874119e-06, "loss": 0.461, "step": 51890 }, { "epoch": 0.7480218497326434, "grad_norm": 1.7267137267964694, "learning_rate": 1.8128180277443307e-06, "loss": 0.4571, "step": 51900 }, { "epoch": 0.7481659772566767, "grad_norm": 1.5553306797721063, "learning_rate": 1.8108802093388955e-06, "loss": 0.4337, "step": 51910 }, { "epoch": 0.7483101047807099, "grad_norm": 1.3291916225437423, "learning_rate": 1.8089431981483025e-06, "loss": 0.4273, "step": 51920 }, { "epoch": 0.7484542323047433, "grad_norm": 1.5006306278579686, "learning_rate": 1.8070069946628378e-06, "loss": 0.4215, "step": 51930 }, { "epoch": 0.7485983598287765, "grad_norm": 1.676963438410771, "learning_rate": 1.8050715993725864e-06, "loss": 0.4533, "step": 51940 }, { "epoch": 0.7487424873528098, "grad_norm": 1.7574527604209567, "learning_rate": 1.8031370127674253e-06, "loss": 0.4587, "step": 51950 }, { "epoch": 0.748886614876843, "grad_norm": 1.482790467017506, "learning_rate": 1.8012032353370261e-06, "loss": 0.444, "step": 51960 }, { "epoch": 0.7490307424008763, "grad_norm": 1.776282771198771, "learning_rate": 1.7992702675708607e-06, "loss": 0.4353, "step": 51970 }, { "epoch": 0.7491748699249096, "grad_norm": 1.4864957391990827, "learning_rate": 1.7973381099581889e-06, "loss": 0.4792, "step": 51980 }, { "epoch": 0.7493189974489428, "grad_norm": 1.9306461816405565, "learning_rate": 1.7954067629880734e-06, "loss": 0.4225, "step": 51990 }, { "epoch": 0.7494631249729761, "grad_norm": 1.7567430685359815, "learning_rate": 1.7934762271493638e-06, "loss": 0.452, "step": 52000 }, { "epoch": 0.7496072524970093, "grad_norm": 1.6071281977559067, "learning_rate": 1.7915465029307116e-06, "loss": 0.4322, "step": 52010 }, { "epoch": 0.7497513800210426, "grad_norm": 1.5197464681121078, "learning_rate": 1.7896175908205577e-06, "loss": 0.431, "step": 52020 }, { "epoch": 0.7498955075450758, "grad_norm": 1.4657231070374463, "learning_rate": 1.7876894913071375e-06, "loss": 0.4146, "step": 52030 }, { "epoch": 0.7500396350691092, "grad_norm": 1.64805649716231, "learning_rate": 1.7857622048784856e-06, "loss": 0.421, "step": 52040 }, { "epoch": 0.7501837625931425, "grad_norm": 1.7341556595003946, "learning_rate": 1.7838357320224265e-06, "loss": 0.4364, "step": 52050 }, { "epoch": 0.7503278901171757, "grad_norm": 1.578761105043543, "learning_rate": 1.7819100732265771e-06, "loss": 0.4276, "step": 52060 }, { "epoch": 0.750472017641209, "grad_norm": 2.129552659373591, "learning_rate": 1.7799852289783547e-06, "loss": 0.4404, "step": 52070 }, { "epoch": 0.7506161451652422, "grad_norm": 1.6813959849299174, "learning_rate": 1.778061199764966e-06, "loss": 0.4349, "step": 52080 }, { "epoch": 0.7507602726892755, "grad_norm": 1.7080474520269042, "learning_rate": 1.7761379860734096e-06, "loss": 0.447, "step": 52090 }, { "epoch": 0.7509044002133087, "grad_norm": 1.7815120125871575, "learning_rate": 1.7742155883904817e-06, "loss": 0.4401, "step": 52100 }, { "epoch": 0.751048527737342, "grad_norm": 1.3999008541148166, "learning_rate": 1.7722940072027728e-06, "loss": 0.4137, "step": 52110 }, { "epoch": 0.7511926552613752, "grad_norm": 1.490890013905584, "learning_rate": 1.7703732429966625e-06, "loss": 0.4435, "step": 52120 }, { "epoch": 0.7513367827854085, "grad_norm": 1.6005378569830775, "learning_rate": 1.7684532962583233e-06, "loss": 0.4241, "step": 52130 }, { "epoch": 0.7514809103094418, "grad_norm": 1.9584825781219353, "learning_rate": 1.7665341674737268e-06, "loss": 0.4426, "step": 52140 }, { "epoch": 0.751625037833475, "grad_norm": 1.4079104281472052, "learning_rate": 1.7646158571286326e-06, "loss": 0.4361, "step": 52150 }, { "epoch": 0.7517691653575084, "grad_norm": 1.5208063435287826, "learning_rate": 1.7626983657085917e-06, "loss": 0.4221, "step": 52160 }, { "epoch": 0.7519132928815416, "grad_norm": 1.1833066562719046, "learning_rate": 1.7607816936989541e-06, "loss": 0.3941, "step": 52170 }, { "epoch": 0.7520574204055749, "grad_norm": 1.5655464459093873, "learning_rate": 1.758865841584857e-06, "loss": 0.3943, "step": 52180 }, { "epoch": 0.7522015479296081, "grad_norm": 1.4926935924369078, "learning_rate": 1.7569508098512305e-06, "loss": 0.4551, "step": 52190 }, { "epoch": 0.7523456754536414, "grad_norm": 1.6267987627524998, "learning_rate": 1.7550365989827995e-06, "loss": 0.4483, "step": 52200 }, { "epoch": 0.7524898029776746, "grad_norm": 1.456639004222271, "learning_rate": 1.7531232094640815e-06, "loss": 0.4355, "step": 52210 }, { "epoch": 0.7526339305017079, "grad_norm": 1.4359165207216629, "learning_rate": 1.7512106417793823e-06, "loss": 0.4345, "step": 52220 }, { "epoch": 0.7527780580257412, "grad_norm": 1.556175831475567, "learning_rate": 1.7492988964128038e-06, "loss": 0.4324, "step": 52230 }, { "epoch": 0.7529221855497744, "grad_norm": 1.4751087511983365, "learning_rate": 1.7473879738482375e-06, "loss": 0.4475, "step": 52240 }, { "epoch": 0.7530663130738077, "grad_norm": 1.6337118319411033, "learning_rate": 1.7454778745693656e-06, "loss": 0.4269, "step": 52250 }, { "epoch": 0.7532104405978409, "grad_norm": 1.5544191168634864, "learning_rate": 1.7435685990596636e-06, "loss": 0.4438, "step": 52260 }, { "epoch": 0.7533545681218742, "grad_norm": 1.713497499083675, "learning_rate": 1.7416601478023998e-06, "loss": 0.442, "step": 52270 }, { "epoch": 0.7534986956459075, "grad_norm": 1.5465710459753121, "learning_rate": 1.7397525212806316e-06, "loss": 0.4393, "step": 52280 }, { "epoch": 0.7536428231699408, "grad_norm": 1.836953845042981, "learning_rate": 1.7378457199772065e-06, "loss": 0.4311, "step": 52290 }, { "epoch": 0.7537869506939741, "grad_norm": 1.8884981193553814, "learning_rate": 1.7359397443747684e-06, "loss": 0.4409, "step": 52300 }, { "epoch": 0.7539310782180073, "grad_norm": 1.3326251287342326, "learning_rate": 1.7340345949557468e-06, "loss": 0.45, "step": 52310 }, { "epoch": 0.7540752057420406, "grad_norm": 1.5876969042729574, "learning_rate": 1.7321302722023608e-06, "loss": 0.4443, "step": 52320 }, { "epoch": 0.7542193332660738, "grad_norm": 1.7110398552340338, "learning_rate": 1.7302267765966306e-06, "loss": 0.403, "step": 52330 }, { "epoch": 0.7543634607901071, "grad_norm": 1.7999819779917299, "learning_rate": 1.7283241086203562e-06, "loss": 0.4313, "step": 52340 }, { "epoch": 0.7545075883141403, "grad_norm": 1.7883703671633688, "learning_rate": 1.7264222687551307e-06, "loss": 0.4486, "step": 52350 }, { "epoch": 0.7546517158381736, "grad_norm": 2.7721442614737146, "learning_rate": 1.7245212574823417e-06, "loss": 0.4267, "step": 52360 }, { "epoch": 0.7547958433622068, "grad_norm": 1.6029066307051862, "learning_rate": 1.7226210752831618e-06, "loss": 0.4299, "step": 52370 }, { "epoch": 0.7549399708862401, "grad_norm": 1.7714092466542848, "learning_rate": 1.7207217226385558e-06, "loss": 0.41, "step": 52380 }, { "epoch": 0.7550840984102735, "grad_norm": 1.418999488156182, "learning_rate": 1.7188232000292816e-06, "loss": 0.4369, "step": 52390 }, { "epoch": 0.7552282259343067, "grad_norm": 1.6792202724164509, "learning_rate": 1.7169255079358822e-06, "loss": 0.4471, "step": 52400 }, { "epoch": 0.75537235345834, "grad_norm": 1.4570315150927688, "learning_rate": 1.7150286468386906e-06, "loss": 0.4621, "step": 52410 }, { "epoch": 0.7555164809823732, "grad_norm": 1.642685796131738, "learning_rate": 1.713132617217836e-06, "loss": 0.4482, "step": 52420 }, { "epoch": 0.7556606085064065, "grad_norm": 1.5393632429263067, "learning_rate": 1.7112374195532272e-06, "loss": 0.4409, "step": 52430 }, { "epoch": 0.7558047360304397, "grad_norm": 1.6561618861803826, "learning_rate": 1.709343054324572e-06, "loss": 0.4344, "step": 52440 }, { "epoch": 0.755948863554473, "grad_norm": 1.632360395621149, "learning_rate": 1.7074495220113596e-06, "loss": 0.4367, "step": 52450 }, { "epoch": 0.7560929910785062, "grad_norm": 1.8216824320668832, "learning_rate": 1.7055568230928748e-06, "loss": 0.4435, "step": 52460 }, { "epoch": 0.7562371186025395, "grad_norm": 1.7010552678453787, "learning_rate": 1.703664958048188e-06, "loss": 0.4428, "step": 52470 }, { "epoch": 0.7563812461265728, "grad_norm": 1.6726058533714796, "learning_rate": 1.7017739273561567e-06, "loss": 0.456, "step": 52480 }, { "epoch": 0.756525373650606, "grad_norm": 1.4422970644939126, "learning_rate": 1.6998837314954325e-06, "loss": 0.4365, "step": 52490 }, { "epoch": 0.7566695011746393, "grad_norm": 1.5094784342571805, "learning_rate": 1.697994370944452e-06, "loss": 0.4226, "step": 52500 }, { "epoch": 0.7568136286986726, "grad_norm": 1.8380612644773715, "learning_rate": 1.6961058461814389e-06, "loss": 0.4192, "step": 52510 }, { "epoch": 0.7569577562227059, "grad_norm": 1.6315477325004264, "learning_rate": 1.6942181576844114e-06, "loss": 0.4373, "step": 52520 }, { "epoch": 0.7571018837467391, "grad_norm": 1.7076673144189818, "learning_rate": 1.6923313059311696e-06, "loss": 0.4314, "step": 52530 }, { "epoch": 0.7572460112707724, "grad_norm": 1.4761919373773267, "learning_rate": 1.690445291399304e-06, "loss": 0.4287, "step": 52540 }, { "epoch": 0.7573901387948057, "grad_norm": 1.6989859945077417, "learning_rate": 1.6885601145661951e-06, "loss": 0.4245, "step": 52550 }, { "epoch": 0.7575342663188389, "grad_norm": 1.378727709007465, "learning_rate": 1.6866757759090108e-06, "loss": 0.4275, "step": 52560 }, { "epoch": 0.7576783938428722, "grad_norm": 1.5820441465217014, "learning_rate": 1.6847922759047046e-06, "loss": 0.4447, "step": 52570 }, { "epoch": 0.7578225213669054, "grad_norm": 1.602012870803603, "learning_rate": 1.6829096150300178e-06, "loss": 0.4532, "step": 52580 }, { "epoch": 0.7579666488909387, "grad_norm": 1.65127713457745, "learning_rate": 1.6810277937614839e-06, "loss": 0.4454, "step": 52590 }, { "epoch": 0.7581107764149719, "grad_norm": 1.8440832332849586, "learning_rate": 1.6791468125754184e-06, "loss": 0.4534, "step": 52600 }, { "epoch": 0.7582549039390052, "grad_norm": 1.5310611225255124, "learning_rate": 1.6772666719479247e-06, "loss": 0.4502, "step": 52610 }, { "epoch": 0.7583990314630384, "grad_norm": 1.7402909019275514, "learning_rate": 1.6753873723548986e-06, "loss": 0.4553, "step": 52620 }, { "epoch": 0.7585431589870718, "grad_norm": 1.5897241070277495, "learning_rate": 1.673508914272018e-06, "loss": 0.4472, "step": 52630 }, { "epoch": 0.7586872865111051, "grad_norm": 2.0869657722102564, "learning_rate": 1.6716312981747474e-06, "loss": 0.4457, "step": 52640 }, { "epoch": 0.7588314140351383, "grad_norm": 1.7312952845967338, "learning_rate": 1.6697545245383412e-06, "loss": 0.4538, "step": 52650 }, { "epoch": 0.7589755415591716, "grad_norm": 1.474696643277913, "learning_rate": 1.6678785938378423e-06, "loss": 0.4009, "step": 52660 }, { "epoch": 0.7591196690832048, "grad_norm": 1.4673784105781764, "learning_rate": 1.6660035065480746e-06, "loss": 0.4204, "step": 52670 }, { "epoch": 0.7592637966072381, "grad_norm": 1.5312114511162795, "learning_rate": 1.66412926314365e-06, "loss": 0.4145, "step": 52680 }, { "epoch": 0.7594079241312713, "grad_norm": 3.3368277152754184, "learning_rate": 1.6622558640989717e-06, "loss": 0.438, "step": 52690 }, { "epoch": 0.7595520516553046, "grad_norm": 1.62204987392226, "learning_rate": 1.6603833098882232e-06, "loss": 0.4265, "step": 52700 }, { "epoch": 0.7596961791793378, "grad_norm": 1.6372610669789587, "learning_rate": 1.6585116009853757e-06, "loss": 0.4263, "step": 52710 }, { "epoch": 0.7598403067033711, "grad_norm": 1.8248098779983553, "learning_rate": 1.6566407378641903e-06, "loss": 0.4099, "step": 52720 }, { "epoch": 0.7599844342274044, "grad_norm": 1.6220926254404517, "learning_rate": 1.654770720998209e-06, "loss": 0.4165, "step": 52730 }, { "epoch": 0.7601285617514377, "grad_norm": 1.5736978225070373, "learning_rate": 1.65290155086076e-06, "loss": 0.4213, "step": 52740 }, { "epoch": 0.760272689275471, "grad_norm": 1.5949670609146227, "learning_rate": 1.6510332279249624e-06, "loss": 0.4464, "step": 52750 }, { "epoch": 0.7604168167995042, "grad_norm": 1.4779964461385726, "learning_rate": 1.6491657526637133e-06, "loss": 0.4234, "step": 52760 }, { "epoch": 0.7605609443235375, "grad_norm": 1.6378636286118036, "learning_rate": 1.6472991255497034e-06, "loss": 0.4181, "step": 52770 }, { "epoch": 0.7607050718475707, "grad_norm": 1.820709709397795, "learning_rate": 1.6454333470554e-06, "loss": 0.4392, "step": 52780 }, { "epoch": 0.760849199371604, "grad_norm": 1.5473731833829862, "learning_rate": 1.6435684176530646e-06, "loss": 0.4211, "step": 52790 }, { "epoch": 0.7609933268956373, "grad_norm": 1.8501605573160136, "learning_rate": 1.641704337814737e-06, "loss": 0.4163, "step": 52800 }, { "epoch": 0.7611374544196705, "grad_norm": 1.7193742738409026, "learning_rate": 1.6398411080122424e-06, "loss": 0.4438, "step": 52810 }, { "epoch": 0.7612815819437038, "grad_norm": 1.826924765724245, "learning_rate": 1.637978728717196e-06, "loss": 0.4076, "step": 52820 }, { "epoch": 0.761425709467737, "grad_norm": 1.643779367145216, "learning_rate": 1.6361172004009935e-06, "loss": 0.4466, "step": 52830 }, { "epoch": 0.7615698369917703, "grad_norm": 1.6971774329471336, "learning_rate": 1.6342565235348135e-06, "loss": 0.4572, "step": 52840 }, { "epoch": 0.7617139645158035, "grad_norm": 1.5534746403657673, "learning_rate": 1.6323966985896255e-06, "loss": 0.4293, "step": 52850 }, { "epoch": 0.7618580920398369, "grad_norm": 1.7192888932132468, "learning_rate": 1.6305377260361776e-06, "loss": 0.4386, "step": 52860 }, { "epoch": 0.7620022195638702, "grad_norm": 1.3636891243127331, "learning_rate": 1.6286796063450028e-06, "loss": 0.4629, "step": 52870 }, { "epoch": 0.7621463470879034, "grad_norm": 1.7003968788332064, "learning_rate": 1.6268223399864214e-06, "loss": 0.4244, "step": 52880 }, { "epoch": 0.7622904746119367, "grad_norm": 2.042576397611525, "learning_rate": 1.6249659274305363e-06, "loss": 0.4476, "step": 52890 }, { "epoch": 0.7624346021359699, "grad_norm": 1.8356591419282922, "learning_rate": 1.6231103691472334e-06, "loss": 0.4634, "step": 52900 }, { "epoch": 0.7625787296600032, "grad_norm": 1.7392802901146933, "learning_rate": 1.6212556656061807e-06, "loss": 0.429, "step": 52910 }, { "epoch": 0.7627228571840364, "grad_norm": 1.4471760400443496, "learning_rate": 1.619401817276835e-06, "loss": 0.44, "step": 52920 }, { "epoch": 0.7628669847080697, "grad_norm": 1.6483041667256353, "learning_rate": 1.617548824628432e-06, "loss": 0.4167, "step": 52930 }, { "epoch": 0.7630111122321029, "grad_norm": 1.465082973164752, "learning_rate": 1.6156966881299913e-06, "loss": 0.4178, "step": 52940 }, { "epoch": 0.7631552397561362, "grad_norm": 1.7660183345836213, "learning_rate": 1.6138454082503197e-06, "loss": 0.4544, "step": 52950 }, { "epoch": 0.7632993672801694, "grad_norm": 1.480753973029858, "learning_rate": 1.611994985458003e-06, "loss": 0.417, "step": 52960 }, { "epoch": 0.7634434948042027, "grad_norm": 1.7455062703083046, "learning_rate": 1.6101454202214096e-06, "loss": 0.448, "step": 52970 }, { "epoch": 0.7635876223282361, "grad_norm": 1.4765072684328366, "learning_rate": 1.6082967130086935e-06, "loss": 0.4312, "step": 52980 }, { "epoch": 0.7637317498522693, "grad_norm": 1.681041930901877, "learning_rate": 1.606448864287794e-06, "loss": 0.4447, "step": 52990 }, { "epoch": 0.7638758773763026, "grad_norm": 1.6573708247379215, "learning_rate": 1.6046018745264253e-06, "loss": 0.4305, "step": 53000 }, { "epoch": 0.7640200049003358, "grad_norm": 1.8722174331727761, "learning_rate": 1.6027557441920921e-06, "loss": 0.4132, "step": 53010 }, { "epoch": 0.7641641324243691, "grad_norm": 1.7909240509777116, "learning_rate": 1.600910473752077e-06, "loss": 0.4299, "step": 53020 }, { "epoch": 0.7643082599484023, "grad_norm": 1.5777261013097508, "learning_rate": 1.599066063673444e-06, "loss": 0.4583, "step": 53030 }, { "epoch": 0.7644523874724356, "grad_norm": 1.8530553794895348, "learning_rate": 1.5972225144230447e-06, "loss": 0.4445, "step": 53040 }, { "epoch": 0.7645965149964689, "grad_norm": 1.6029757838096836, "learning_rate": 1.595379826467508e-06, "loss": 0.429, "step": 53050 }, { "epoch": 0.7647406425205021, "grad_norm": 1.4100212007073687, "learning_rate": 1.593538000273246e-06, "loss": 0.4107, "step": 53060 }, { "epoch": 0.7648847700445354, "grad_norm": 1.7817552586562504, "learning_rate": 1.591697036306452e-06, "loss": 0.4367, "step": 53070 }, { "epoch": 0.7650288975685686, "grad_norm": 1.5845669328605103, "learning_rate": 1.589856935033105e-06, "loss": 0.4316, "step": 53080 }, { "epoch": 0.765173025092602, "grad_norm": 1.640779649241548, "learning_rate": 1.5880176969189593e-06, "loss": 0.443, "step": 53090 }, { "epoch": 0.7653171526166352, "grad_norm": 1.7386359077868179, "learning_rate": 1.5861793224295557e-06, "loss": 0.4492, "step": 53100 }, { "epoch": 0.7654612801406685, "grad_norm": 1.5689668125934575, "learning_rate": 1.5843418120302167e-06, "loss": 0.4342, "step": 53110 }, { "epoch": 0.7656054076647018, "grad_norm": 1.9275281426005648, "learning_rate": 1.5825051661860425e-06, "loss": 0.4378, "step": 53120 }, { "epoch": 0.765749535188735, "grad_norm": 1.4254594811537227, "learning_rate": 1.5806693853619143e-06, "loss": 0.4455, "step": 53130 }, { "epoch": 0.7658936627127683, "grad_norm": 1.5387822242667613, "learning_rate": 1.5788344700224994e-06, "loss": 0.4444, "step": 53140 }, { "epoch": 0.7660377902368015, "grad_norm": 1.584205113145228, "learning_rate": 1.577000420632242e-06, "loss": 0.4342, "step": 53150 }, { "epoch": 0.7661819177608348, "grad_norm": 1.553144755764281, "learning_rate": 1.5751672376553655e-06, "loss": 0.4403, "step": 53160 }, { "epoch": 0.766326045284868, "grad_norm": 1.540136807644499, "learning_rate": 1.5733349215558802e-06, "loss": 0.4231, "step": 53170 }, { "epoch": 0.7664701728089013, "grad_norm": 1.6608298176455527, "learning_rate": 1.5715034727975715e-06, "loss": 0.4203, "step": 53180 }, { "epoch": 0.7666143003329345, "grad_norm": 1.461592246844186, "learning_rate": 1.5696728918440052e-06, "loss": 0.4178, "step": 53190 }, { "epoch": 0.7667584278569678, "grad_norm": 1.6875014389331868, "learning_rate": 1.5678431791585308e-06, "loss": 0.4427, "step": 53200 }, { "epoch": 0.7669025553810012, "grad_norm": 1.5941029296550346, "learning_rate": 1.5660143352042784e-06, "loss": 0.4347, "step": 53210 }, { "epoch": 0.7670466829050344, "grad_norm": 1.4573203249203974, "learning_rate": 1.564186360444155e-06, "loss": 0.4391, "step": 53220 }, { "epoch": 0.7671908104290677, "grad_norm": 1.5793023177729704, "learning_rate": 1.5623592553408462e-06, "loss": 0.4474, "step": 53230 }, { "epoch": 0.7673349379531009, "grad_norm": 1.667560195318803, "learning_rate": 1.5605330203568241e-06, "loss": 0.4389, "step": 53240 }, { "epoch": 0.7674790654771342, "grad_norm": 1.8603990491711029, "learning_rate": 1.5587076559543352e-06, "loss": 0.4375, "step": 53250 }, { "epoch": 0.7676231930011674, "grad_norm": 1.6056066413012982, "learning_rate": 1.556883162595405e-06, "loss": 0.4241, "step": 53260 }, { "epoch": 0.7677673205252007, "grad_norm": 1.7308277256047326, "learning_rate": 1.5550595407418439e-06, "loss": 0.454, "step": 53270 }, { "epoch": 0.767911448049234, "grad_norm": 1.5896089553288648, "learning_rate": 1.553236790855237e-06, "loss": 0.4378, "step": 53280 }, { "epoch": 0.7680555755732672, "grad_norm": 1.7647997303967216, "learning_rate": 1.5514149133969485e-06, "loss": 0.447, "step": 53290 }, { "epoch": 0.7681997030973005, "grad_norm": 1.6150846853001792, "learning_rate": 1.5495939088281258e-06, "loss": 0.4196, "step": 53300 }, { "epoch": 0.7683438306213337, "grad_norm": 1.5565159601971623, "learning_rate": 1.5477737776096912e-06, "loss": 0.4382, "step": 53310 }, { "epoch": 0.768487958145367, "grad_norm": 1.828560134764145, "learning_rate": 1.5459545202023497e-06, "loss": 0.4345, "step": 53320 }, { "epoch": 0.7686320856694003, "grad_norm": 1.6474517310924066, "learning_rate": 1.5441361370665796e-06, "loss": 0.4568, "step": 53330 }, { "epoch": 0.7687762131934336, "grad_norm": 1.6033530471541977, "learning_rate": 1.5423186286626458e-06, "loss": 0.4133, "step": 53340 }, { "epoch": 0.7689203407174668, "grad_norm": 1.598004020426878, "learning_rate": 1.5405019954505852e-06, "loss": 0.4361, "step": 53350 }, { "epoch": 0.7690644682415001, "grad_norm": 1.5599492087921647, "learning_rate": 1.538686237890213e-06, "loss": 0.4211, "step": 53360 }, { "epoch": 0.7692085957655334, "grad_norm": 1.6508687320930913, "learning_rate": 1.5368713564411298e-06, "loss": 0.4077, "step": 53370 }, { "epoch": 0.7693527232895666, "grad_norm": 1.6324913006735697, "learning_rate": 1.535057351562707e-06, "loss": 0.4373, "step": 53380 }, { "epoch": 0.7694968508135999, "grad_norm": 1.6114977297781732, "learning_rate": 1.533244223714096e-06, "loss": 0.4452, "step": 53390 }, { "epoch": 0.7696409783376331, "grad_norm": 1.8893906436028602, "learning_rate": 1.53143197335423e-06, "loss": 0.4334, "step": 53400 }, { "epoch": 0.7697851058616664, "grad_norm": 1.7241912585053574, "learning_rate": 1.5296206009418152e-06, "loss": 0.4105, "step": 53410 }, { "epoch": 0.7699292333856996, "grad_norm": 1.669639303331813, "learning_rate": 1.5278101069353368e-06, "loss": 0.4044, "step": 53420 }, { "epoch": 0.7700733609097329, "grad_norm": 1.5211569127761255, "learning_rate": 1.5260004917930592e-06, "loss": 0.4448, "step": 53430 }, { "epoch": 0.7702174884337663, "grad_norm": 1.6684704975795408, "learning_rate": 1.5241917559730257e-06, "loss": 0.4404, "step": 53440 }, { "epoch": 0.7703616159577995, "grad_norm": 1.6447835066442402, "learning_rate": 1.5223838999330538e-06, "loss": 0.4258, "step": 53450 }, { "epoch": 0.7705057434818328, "grad_norm": 1.722899637450616, "learning_rate": 1.5205769241307366e-06, "loss": 0.4492, "step": 53460 }, { "epoch": 0.770649871005866, "grad_norm": 2.5219911038193614, "learning_rate": 1.5187708290234515e-06, "loss": 0.4118, "step": 53470 }, { "epoch": 0.7707939985298993, "grad_norm": 1.4656819884215524, "learning_rate": 1.5169656150683466e-06, "loss": 0.4138, "step": 53480 }, { "epoch": 0.7709381260539325, "grad_norm": 1.622692451495825, "learning_rate": 1.5151612827223472e-06, "loss": 0.422, "step": 53490 }, { "epoch": 0.7710822535779658, "grad_norm": 1.6614647453825517, "learning_rate": 1.513357832442161e-06, "loss": 0.448, "step": 53500 }, { "epoch": 0.771226381101999, "grad_norm": 1.6496734731649174, "learning_rate": 1.5115552646842669e-06, "loss": 0.4213, "step": 53510 }, { "epoch": 0.7713705086260323, "grad_norm": 1.8835692444045822, "learning_rate": 1.5097535799049213e-06, "loss": 0.4488, "step": 53520 }, { "epoch": 0.7715146361500655, "grad_norm": 1.5459184458185675, "learning_rate": 1.5079527785601588e-06, "loss": 0.4168, "step": 53530 }, { "epoch": 0.7716587636740988, "grad_norm": 1.6021357318433982, "learning_rate": 1.5061528611057917e-06, "loss": 0.4559, "step": 53540 }, { "epoch": 0.7718028911981321, "grad_norm": 2.018485393522879, "learning_rate": 1.504353827997405e-06, "loss": 0.4349, "step": 53550 }, { "epoch": 0.7719470187221654, "grad_norm": 1.6246556922256046, "learning_rate": 1.50255567969036e-06, "loss": 0.4293, "step": 53560 }, { "epoch": 0.7720911462461987, "grad_norm": 1.7476412016777054, "learning_rate": 1.5007584166397987e-06, "loss": 0.45, "step": 53570 }, { "epoch": 0.7722352737702319, "grad_norm": 1.6361553910039537, "learning_rate": 1.4989620393006332e-06, "loss": 0.4394, "step": 53580 }, { "epoch": 0.7723794012942652, "grad_norm": 1.6761460819791387, "learning_rate": 1.4971665481275532e-06, "loss": 0.4519, "step": 53590 }, { "epoch": 0.7725235288182984, "grad_norm": 1.6375272580966973, "learning_rate": 1.495371943575028e-06, "loss": 0.4481, "step": 53600 }, { "epoch": 0.7726676563423317, "grad_norm": 1.4481886485023798, "learning_rate": 1.493578226097298e-06, "loss": 0.4164, "step": 53610 }, { "epoch": 0.772811783866365, "grad_norm": 1.5884708297506986, "learning_rate": 1.491785396148378e-06, "loss": 0.4279, "step": 53620 }, { "epoch": 0.7729559113903982, "grad_norm": 1.4226091915154166, "learning_rate": 1.489993454182065e-06, "loss": 0.4148, "step": 53630 }, { "epoch": 0.7731000389144315, "grad_norm": 1.7768301448836894, "learning_rate": 1.488202400651924e-06, "loss": 0.428, "step": 53640 }, { "epoch": 0.7732441664384647, "grad_norm": 1.6714494405218232, "learning_rate": 1.4864122360112958e-06, "loss": 0.4389, "step": 53650 }, { "epoch": 0.773388293962498, "grad_norm": 1.634108149942759, "learning_rate": 1.4846229607133045e-06, "loss": 0.4174, "step": 53660 }, { "epoch": 0.7735324214865312, "grad_norm": 1.5891429748260706, "learning_rate": 1.4828345752108392e-06, "loss": 0.4234, "step": 53670 }, { "epoch": 0.7736765490105646, "grad_norm": 1.3799924894347035, "learning_rate": 1.4810470799565669e-06, "loss": 0.4391, "step": 53680 }, { "epoch": 0.7738206765345979, "grad_norm": 1.891761697907302, "learning_rate": 1.4792604754029315e-06, "loss": 0.4729, "step": 53690 }, { "epoch": 0.7739648040586311, "grad_norm": 1.7015513755620715, "learning_rate": 1.4774747620021502e-06, "loss": 0.4522, "step": 53700 }, { "epoch": 0.7741089315826644, "grad_norm": 1.4503314594395695, "learning_rate": 1.475689940206213e-06, "loss": 0.4293, "step": 53710 }, { "epoch": 0.7742530591066976, "grad_norm": 1.733548859372547, "learning_rate": 1.4739060104668845e-06, "loss": 0.4281, "step": 53720 }, { "epoch": 0.7743971866307309, "grad_norm": 1.6545752731314982, "learning_rate": 1.4721229732357073e-06, "loss": 0.4239, "step": 53730 }, { "epoch": 0.7745413141547641, "grad_norm": 1.3598949453503268, "learning_rate": 1.4703408289639936e-06, "loss": 0.3994, "step": 53740 }, { "epoch": 0.7746854416787974, "grad_norm": 1.5756795094059617, "learning_rate": 1.4685595781028306e-06, "loss": 0.4284, "step": 53750 }, { "epoch": 0.7748295692028306, "grad_norm": 1.805899018486738, "learning_rate": 1.4667792211030807e-06, "loss": 0.4256, "step": 53760 }, { "epoch": 0.7749736967268639, "grad_norm": 1.8184101943050714, "learning_rate": 1.4649997584153813e-06, "loss": 0.4438, "step": 53770 }, { "epoch": 0.7751178242508971, "grad_norm": 1.6739027195956195, "learning_rate": 1.4632211904901384e-06, "loss": 0.4217, "step": 53780 }, { "epoch": 0.7752619517749305, "grad_norm": 1.4020362820281314, "learning_rate": 1.4614435177775376e-06, "loss": 0.4342, "step": 53790 }, { "epoch": 0.7754060792989638, "grad_norm": 1.5733235315299603, "learning_rate": 1.459666740727534e-06, "loss": 0.4267, "step": 53800 }, { "epoch": 0.775550206822997, "grad_norm": 1.7179178620079116, "learning_rate": 1.457890859789855e-06, "loss": 0.4368, "step": 53810 }, { "epoch": 0.7756943343470303, "grad_norm": 1.7885018549666445, "learning_rate": 1.456115875414006e-06, "loss": 0.4533, "step": 53820 }, { "epoch": 0.7758384618710635, "grad_norm": 1.5355083872490267, "learning_rate": 1.4543417880492617e-06, "loss": 0.4378, "step": 53830 }, { "epoch": 0.7759825893950968, "grad_norm": 1.555073719240842, "learning_rate": 1.452568598144668e-06, "loss": 0.4357, "step": 53840 }, { "epoch": 0.77612671691913, "grad_norm": 1.6052260860723557, "learning_rate": 1.450796306149051e-06, "loss": 0.448, "step": 53850 }, { "epoch": 0.7762708444431633, "grad_norm": 1.8337495261625758, "learning_rate": 1.4490249125110018e-06, "loss": 0.4185, "step": 53860 }, { "epoch": 0.7764149719671966, "grad_norm": 1.9869268653656882, "learning_rate": 1.4472544176788861e-06, "loss": 0.4639, "step": 53870 }, { "epoch": 0.7765590994912298, "grad_norm": 1.7039949357879192, "learning_rate": 1.4454848221008456e-06, "loss": 0.4394, "step": 53880 }, { "epoch": 0.7767032270152631, "grad_norm": 1.9643594669609645, "learning_rate": 1.4437161262247918e-06, "loss": 0.4158, "step": 53890 }, { "epoch": 0.7768473545392963, "grad_norm": 1.8605735935761933, "learning_rate": 1.4419483304984083e-06, "loss": 0.4113, "step": 53900 }, { "epoch": 0.7769914820633297, "grad_norm": 1.619866245215594, "learning_rate": 1.440181435369149e-06, "loss": 0.4559, "step": 53910 }, { "epoch": 0.7771356095873629, "grad_norm": 1.7765767325348634, "learning_rate": 1.4384154412842455e-06, "loss": 0.4308, "step": 53920 }, { "epoch": 0.7772797371113962, "grad_norm": 1.957064486942348, "learning_rate": 1.4366503486906957e-06, "loss": 0.4371, "step": 53930 }, { "epoch": 0.7774238646354295, "grad_norm": 2.3376432821685684, "learning_rate": 1.4348861580352702e-06, "loss": 0.4331, "step": 53940 }, { "epoch": 0.7775679921594627, "grad_norm": 1.735318411163263, "learning_rate": 1.433122869764516e-06, "loss": 0.4267, "step": 53950 }, { "epoch": 0.777712119683496, "grad_norm": 1.5258982152060303, "learning_rate": 1.4313604843247465e-06, "loss": 0.4299, "step": 53960 }, { "epoch": 0.7778562472075292, "grad_norm": 1.6244739124027714, "learning_rate": 1.4295990021620465e-06, "loss": 0.428, "step": 53970 }, { "epoch": 0.7780003747315625, "grad_norm": 1.7875022870566035, "learning_rate": 1.4278384237222758e-06, "loss": 0.4192, "step": 53980 }, { "epoch": 0.7781445022555957, "grad_norm": 1.6881401611083426, "learning_rate": 1.4260787494510653e-06, "loss": 0.4674, "step": 53990 }, { "epoch": 0.778288629779629, "grad_norm": 1.4986745559996424, "learning_rate": 1.4243199797938135e-06, "loss": 0.4198, "step": 54000 }, { "epoch": 0.7784327573036622, "grad_norm": 1.6459295758030903, "learning_rate": 1.4225621151956903e-06, "loss": 0.414, "step": 54010 }, { "epoch": 0.7785768848276955, "grad_norm": 1.3775413245773769, "learning_rate": 1.4208051561016417e-06, "loss": 0.4075, "step": 54020 }, { "epoch": 0.7787210123517289, "grad_norm": 1.8064691505007133, "learning_rate": 1.4190491029563785e-06, "loss": 0.4118, "step": 54030 }, { "epoch": 0.7788651398757621, "grad_norm": 1.2867581549780438, "learning_rate": 1.417293956204383e-06, "loss": 0.4593, "step": 54040 }, { "epoch": 0.7790092673997954, "grad_norm": 1.6419616212212578, "learning_rate": 1.4155397162899137e-06, "loss": 0.466, "step": 54050 }, { "epoch": 0.7791533949238286, "grad_norm": 1.2665577322792703, "learning_rate": 1.4137863836569926e-06, "loss": 0.4178, "step": 54060 }, { "epoch": 0.7792975224478619, "grad_norm": 1.6955756984598067, "learning_rate": 1.4120339587494142e-06, "loss": 0.4404, "step": 54070 }, { "epoch": 0.7794416499718951, "grad_norm": 1.539852733303061, "learning_rate": 1.4102824420107469e-06, "loss": 0.4253, "step": 54080 }, { "epoch": 0.7795857774959284, "grad_norm": 1.5978113495475708, "learning_rate": 1.4085318338843235e-06, "loss": 0.42, "step": 54090 }, { "epoch": 0.7797299050199616, "grad_norm": 1.6175584211516039, "learning_rate": 1.406782134813252e-06, "loss": 0.4417, "step": 54100 }, { "epoch": 0.7798740325439949, "grad_norm": 1.3915706580253722, "learning_rate": 1.4050333452404051e-06, "loss": 0.4321, "step": 54110 }, { "epoch": 0.7800181600680282, "grad_norm": 1.5022003652929388, "learning_rate": 1.4032854656084317e-06, "loss": 0.4228, "step": 54120 }, { "epoch": 0.7801622875920614, "grad_norm": 1.6957322082173836, "learning_rate": 1.4015384963597446e-06, "loss": 0.4389, "step": 54130 }, { "epoch": 0.7803064151160948, "grad_norm": 1.7857010935746278, "learning_rate": 1.3997924379365274e-06, "loss": 0.4335, "step": 54140 }, { "epoch": 0.780450542640128, "grad_norm": 1.7617874669980125, "learning_rate": 1.3980472907807363e-06, "loss": 0.4459, "step": 54150 }, { "epoch": 0.7805946701641613, "grad_norm": 1.8068275941370704, "learning_rate": 1.396303055334094e-06, "loss": 0.4334, "step": 54160 }, { "epoch": 0.7807387976881945, "grad_norm": 1.6737157637933797, "learning_rate": 1.3945597320380904e-06, "loss": 0.4067, "step": 54170 }, { "epoch": 0.7808829252122278, "grad_norm": 1.6638400381250757, "learning_rate": 1.3928173213339917e-06, "loss": 0.4354, "step": 54180 }, { "epoch": 0.781027052736261, "grad_norm": 1.5232145361530558, "learning_rate": 1.3910758236628263e-06, "loss": 0.4168, "step": 54190 }, { "epoch": 0.7811711802602943, "grad_norm": 2.6398161751468914, "learning_rate": 1.3893352394653932e-06, "loss": 0.4259, "step": 54200 }, { "epoch": 0.7813153077843276, "grad_norm": 1.5237424581493246, "learning_rate": 1.3875955691822607e-06, "loss": 0.4108, "step": 54210 }, { "epoch": 0.7814594353083608, "grad_norm": 1.6560468016888543, "learning_rate": 1.3858568132537682e-06, "loss": 0.4348, "step": 54220 }, { "epoch": 0.7816035628323941, "grad_norm": 1.7200875174325285, "learning_rate": 1.38411897212002e-06, "loss": 0.4259, "step": 54230 }, { "epoch": 0.7817476903564273, "grad_norm": 1.3299610372452488, "learning_rate": 1.3823820462208886e-06, "loss": 0.4425, "step": 54240 }, { "epoch": 0.7818918178804606, "grad_norm": 2.045273536068148, "learning_rate": 1.3806460359960195e-06, "loss": 0.4508, "step": 54250 }, { "epoch": 0.782035945404494, "grad_norm": 1.6754056220974778, "learning_rate": 1.3789109418848219e-06, "loss": 0.4386, "step": 54260 }, { "epoch": 0.7821800729285272, "grad_norm": 1.7374739216559465, "learning_rate": 1.3771767643264722e-06, "loss": 0.4271, "step": 54270 }, { "epoch": 0.7823242004525605, "grad_norm": 1.7064554205032374, "learning_rate": 1.3754435037599213e-06, "loss": 0.4396, "step": 54280 }, { "epoch": 0.7824683279765937, "grad_norm": 1.74946905280716, "learning_rate": 1.3737111606238822e-06, "loss": 0.4336, "step": 54290 }, { "epoch": 0.782612455500627, "grad_norm": 1.777375876241475, "learning_rate": 1.3719797353568353e-06, "loss": 0.441, "step": 54300 }, { "epoch": 0.7827565830246602, "grad_norm": 1.543030156489122, "learning_rate": 1.3702492283970326e-06, "loss": 0.4381, "step": 54310 }, { "epoch": 0.7829007105486935, "grad_norm": 1.423441019847351, "learning_rate": 1.3685196401824935e-06, "loss": 0.4256, "step": 54320 }, { "epoch": 0.7830448380727267, "grad_norm": 1.1017476927289285, "learning_rate": 1.3667909711510007e-06, "loss": 0.4381, "step": 54330 }, { "epoch": 0.78318896559676, "grad_norm": 1.5882942703464715, "learning_rate": 1.3650632217401066e-06, "loss": 0.4409, "step": 54340 }, { "epoch": 0.7833330931207932, "grad_norm": 1.808185815934507, "learning_rate": 1.3633363923871328e-06, "loss": 0.4472, "step": 54350 }, { "epoch": 0.7834772206448265, "grad_norm": 1.7226926103049849, "learning_rate": 1.3616104835291654e-06, "loss": 0.4307, "step": 54360 }, { "epoch": 0.7836213481688598, "grad_norm": 1.7973216712615507, "learning_rate": 1.3598854956030566e-06, "loss": 0.4125, "step": 54370 }, { "epoch": 0.7837654756928931, "grad_norm": 1.438455526668344, "learning_rate": 1.3581614290454293e-06, "loss": 0.4443, "step": 54380 }, { "epoch": 0.7839096032169264, "grad_norm": 1.6404131160868185, "learning_rate": 1.3564382842926704e-06, "loss": 0.4198, "step": 54390 }, { "epoch": 0.7840537307409596, "grad_norm": 1.7579667263448877, "learning_rate": 1.3547160617809325e-06, "loss": 0.4428, "step": 54400 }, { "epoch": 0.7841978582649929, "grad_norm": 1.5689934476856695, "learning_rate": 1.3529947619461391e-06, "loss": 0.4351, "step": 54410 }, { "epoch": 0.7843419857890261, "grad_norm": 1.5279331396088074, "learning_rate": 1.3512743852239746e-06, "loss": 0.4516, "step": 54420 }, { "epoch": 0.7844861133130594, "grad_norm": 1.7152627240792044, "learning_rate": 1.349554932049894e-06, "loss": 0.4192, "step": 54430 }, { "epoch": 0.7846302408370927, "grad_norm": 1.7079819741467106, "learning_rate": 1.347836402859119e-06, "loss": 0.4304, "step": 54440 }, { "epoch": 0.7847743683611259, "grad_norm": 1.7522422724233968, "learning_rate": 1.3461187980866336e-06, "loss": 0.4344, "step": 54450 }, { "epoch": 0.7849184958851592, "grad_norm": 1.7726008664194934, "learning_rate": 1.3444021181671885e-06, "loss": 0.4232, "step": 54460 }, { "epoch": 0.7850626234091924, "grad_norm": 1.9199912038766742, "learning_rate": 1.3426863635353037e-06, "loss": 0.4081, "step": 54470 }, { "epoch": 0.7852067509332257, "grad_norm": 1.6475883049819573, "learning_rate": 1.3409715346252628e-06, "loss": 0.4439, "step": 54480 }, { "epoch": 0.7853508784572589, "grad_norm": 1.653303938898634, "learning_rate": 1.3392576318711126e-06, "loss": 0.4208, "step": 54490 }, { "epoch": 0.7854950059812923, "grad_norm": 1.7284587774840137, "learning_rate": 1.3375446557066717e-06, "loss": 0.4665, "step": 54500 }, { "epoch": 0.7856391335053255, "grad_norm": 1.6594449633234525, "learning_rate": 1.3358326065655187e-06, "loss": 0.4353, "step": 54510 }, { "epoch": 0.7857832610293588, "grad_norm": 1.250731591979954, "learning_rate": 1.3341214848809976e-06, "loss": 0.441, "step": 54520 }, { "epoch": 0.7859273885533921, "grad_norm": 1.9427274721141294, "learning_rate": 1.3324112910862209e-06, "loss": 0.4223, "step": 54530 }, { "epoch": 0.7860715160774253, "grad_norm": 1.6449257673740407, "learning_rate": 1.3307020256140663e-06, "loss": 0.4332, "step": 54540 }, { "epoch": 0.7862156436014586, "grad_norm": 1.506689638400721, "learning_rate": 1.3289936888971738e-06, "loss": 0.4083, "step": 54550 }, { "epoch": 0.7863597711254918, "grad_norm": 1.2859103836465207, "learning_rate": 1.3272862813679472e-06, "loss": 0.4217, "step": 54560 }, { "epoch": 0.7865038986495251, "grad_norm": 1.604395927998628, "learning_rate": 1.3255798034585616e-06, "loss": 0.4316, "step": 54570 }, { "epoch": 0.7866480261735583, "grad_norm": 1.490306109368618, "learning_rate": 1.3238742556009498e-06, "loss": 0.4455, "step": 54580 }, { "epoch": 0.7867921536975916, "grad_norm": 1.5144124481770596, "learning_rate": 1.3221696382268113e-06, "loss": 0.4263, "step": 54590 }, { "epoch": 0.7869362812216248, "grad_norm": 1.8425630552095027, "learning_rate": 1.3204659517676132e-06, "loss": 0.4539, "step": 54600 }, { "epoch": 0.7870804087456582, "grad_norm": 1.635904018638228, "learning_rate": 1.3187631966545838e-06, "loss": 0.4225, "step": 54610 }, { "epoch": 0.7872245362696915, "grad_norm": 1.4989418133855652, "learning_rate": 1.3170613733187144e-06, "loss": 0.4012, "step": 54620 }, { "epoch": 0.7873686637937247, "grad_norm": 1.692634056272059, "learning_rate": 1.315360482190765e-06, "loss": 0.4264, "step": 54630 }, { "epoch": 0.787512791317758, "grad_norm": 1.5974732314044429, "learning_rate": 1.3136605237012546e-06, "loss": 0.4079, "step": 54640 }, { "epoch": 0.7876569188417912, "grad_norm": 1.6291415608774795, "learning_rate": 1.3119614982804712e-06, "loss": 0.4364, "step": 54650 }, { "epoch": 0.7878010463658245, "grad_norm": 1.6618591067695812, "learning_rate": 1.3102634063584618e-06, "loss": 0.4258, "step": 54660 }, { "epoch": 0.7879451738898577, "grad_norm": 1.7578416684009266, "learning_rate": 1.3085662483650418e-06, "loss": 0.3908, "step": 54670 }, { "epoch": 0.788089301413891, "grad_norm": 1.6671583174013196, "learning_rate": 1.3068700247297867e-06, "loss": 0.4292, "step": 54680 }, { "epoch": 0.7882334289379243, "grad_norm": 1.553710114401341, "learning_rate": 1.3051747358820344e-06, "loss": 0.4354, "step": 54690 }, { "epoch": 0.7883775564619575, "grad_norm": 1.873547292101309, "learning_rate": 1.3034803822508912e-06, "loss": 0.429, "step": 54700 }, { "epoch": 0.7885216839859908, "grad_norm": 1.758246082359055, "learning_rate": 1.301786964265223e-06, "loss": 0.4221, "step": 54710 }, { "epoch": 0.788665811510024, "grad_norm": 1.5910284490860203, "learning_rate": 1.3000944823536589e-06, "loss": 0.4045, "step": 54720 }, { "epoch": 0.7888099390340574, "grad_norm": 1.7389834450109933, "learning_rate": 1.2984029369445933e-06, "loss": 0.4419, "step": 54730 }, { "epoch": 0.7889540665580906, "grad_norm": 1.6507995995343523, "learning_rate": 1.296712328466181e-06, "loss": 0.3963, "step": 54740 }, { "epoch": 0.7890981940821239, "grad_norm": 1.860060989634454, "learning_rate": 1.2950226573463404e-06, "loss": 0.4226, "step": 54750 }, { "epoch": 0.7892423216061571, "grad_norm": 1.3864003356982468, "learning_rate": 1.293333924012753e-06, "loss": 0.4419, "step": 54760 }, { "epoch": 0.7893864491301904, "grad_norm": 1.751544773931915, "learning_rate": 1.2916461288928656e-06, "loss": 0.44, "step": 54770 }, { "epoch": 0.7895305766542237, "grad_norm": 1.667272267479007, "learning_rate": 1.2899592724138832e-06, "loss": 0.4344, "step": 54780 }, { "epoch": 0.7896747041782569, "grad_norm": 1.7357819917204322, "learning_rate": 1.2882733550027726e-06, "loss": 0.4524, "step": 54790 }, { "epoch": 0.7898188317022902, "grad_norm": 1.672576467291069, "learning_rate": 1.2865883770862692e-06, "loss": 0.4321, "step": 54800 }, { "epoch": 0.7899629592263234, "grad_norm": 1.5499284314230433, "learning_rate": 1.2849043390908643e-06, "loss": 0.446, "step": 54810 }, { "epoch": 0.7901070867503567, "grad_norm": 1.6952933786025268, "learning_rate": 1.2832212414428124e-06, "loss": 0.4225, "step": 54820 }, { "epoch": 0.7902512142743899, "grad_norm": 1.4417141045244308, "learning_rate": 1.2815390845681347e-06, "loss": 0.3993, "step": 54830 }, { "epoch": 0.7903953417984232, "grad_norm": 1.61380926690884, "learning_rate": 1.279857868892609e-06, "loss": 0.4287, "step": 54840 }, { "epoch": 0.7905394693224566, "grad_norm": 1.7572982027155348, "learning_rate": 1.278177594841774e-06, "loss": 0.4638, "step": 54850 }, { "epoch": 0.7906835968464898, "grad_norm": 1.6984406905312746, "learning_rate": 1.2764982628409356e-06, "loss": 0.4282, "step": 54860 }, { "epoch": 0.7908277243705231, "grad_norm": 1.669645047408781, "learning_rate": 1.2748198733151596e-06, "loss": 0.4315, "step": 54870 }, { "epoch": 0.7909718518945563, "grad_norm": 1.7004007051234407, "learning_rate": 1.27314242668927e-06, "loss": 0.4294, "step": 54880 }, { "epoch": 0.7911159794185896, "grad_norm": 2.5101530727215953, "learning_rate": 1.2714659233878523e-06, "loss": 0.4253, "step": 54890 }, { "epoch": 0.7912601069426228, "grad_norm": 1.837129496000666, "learning_rate": 1.2697903638352594e-06, "loss": 0.4486, "step": 54900 }, { "epoch": 0.7914042344666561, "grad_norm": 1.7213918553579985, "learning_rate": 1.2681157484555978e-06, "loss": 0.4352, "step": 54910 }, { "epoch": 0.7915483619906893, "grad_norm": 1.7779134933948348, "learning_rate": 1.2664420776727383e-06, "loss": 0.4135, "step": 54920 }, { "epoch": 0.7916924895147226, "grad_norm": 1.5955129319883827, "learning_rate": 1.2647693519103144e-06, "loss": 0.4363, "step": 54930 }, { "epoch": 0.7918366170387559, "grad_norm": 1.8782485710691217, "learning_rate": 1.263097571591717e-06, "loss": 0.4289, "step": 54940 }, { "epoch": 0.7919807445627891, "grad_norm": 1.7946221902924302, "learning_rate": 1.2614267371400984e-06, "loss": 0.4614, "step": 54950 }, { "epoch": 0.7921248720868225, "grad_norm": 1.589779928448815, "learning_rate": 1.2597568489783747e-06, "loss": 0.4225, "step": 54960 }, { "epoch": 0.7922689996108557, "grad_norm": 1.6151782269873372, "learning_rate": 1.2580879075292174e-06, "loss": 0.4633, "step": 54970 }, { "epoch": 0.792413127134889, "grad_norm": 1.4867766271006733, "learning_rate": 1.2564199132150634e-06, "loss": 0.4273, "step": 54980 }, { "epoch": 0.7925572546589222, "grad_norm": 1.7224832048724548, "learning_rate": 1.2547528664581054e-06, "loss": 0.4247, "step": 54990 }, { "epoch": 0.7927013821829555, "grad_norm": 1.6800314130249505, "learning_rate": 1.253086767680301e-06, "loss": 0.4174, "step": 55000 }, { "epoch": 0.7928455097069887, "grad_norm": 1.4013639592873286, "learning_rate": 1.2514216173033633e-06, "loss": 0.4348, "step": 55010 }, { "epoch": 0.792989637231022, "grad_norm": 1.4235329634250053, "learning_rate": 1.2497574157487658e-06, "loss": 0.4264, "step": 55020 }, { "epoch": 0.7931337647550553, "grad_norm": 1.5169092559520214, "learning_rate": 1.248094163437747e-06, "loss": 0.4251, "step": 55030 }, { "epoch": 0.7932778922790885, "grad_norm": 1.3113336157206668, "learning_rate": 1.246431860791299e-06, "loss": 0.4035, "step": 55040 }, { "epoch": 0.7934220198031218, "grad_norm": 1.5809954471914422, "learning_rate": 1.2447705082301754e-06, "loss": 0.4279, "step": 55050 }, { "epoch": 0.793566147327155, "grad_norm": 1.946495252285814, "learning_rate": 1.2431101061748924e-06, "loss": 0.44, "step": 55060 }, { "epoch": 0.7937102748511883, "grad_norm": 1.6404166640487212, "learning_rate": 1.241450655045721e-06, "loss": 0.4336, "step": 55070 }, { "epoch": 0.7938544023752216, "grad_norm": 1.3587336630997935, "learning_rate": 1.2397921552626929e-06, "loss": 0.4266, "step": 55080 }, { "epoch": 0.7939985298992549, "grad_norm": 1.7707146794563222, "learning_rate": 1.2381346072456007e-06, "loss": 0.4213, "step": 55090 }, { "epoch": 0.7941426574232882, "grad_norm": 1.6004781932450345, "learning_rate": 1.2364780114139968e-06, "loss": 0.4206, "step": 55100 }, { "epoch": 0.7942867849473214, "grad_norm": 1.579967496616124, "learning_rate": 1.2348223681871884e-06, "loss": 0.4322, "step": 55110 }, { "epoch": 0.7944309124713547, "grad_norm": 1.8235063397502171, "learning_rate": 1.233167677984246e-06, "loss": 0.4291, "step": 55120 }, { "epoch": 0.7945750399953879, "grad_norm": 1.7006072390272482, "learning_rate": 1.2315139412239958e-06, "loss": 0.4403, "step": 55130 }, { "epoch": 0.7947191675194212, "grad_norm": 1.6980431569432315, "learning_rate": 1.2298611583250224e-06, "loss": 0.4459, "step": 55140 }, { "epoch": 0.7948632950434544, "grad_norm": 1.6894934941771373, "learning_rate": 1.2282093297056735e-06, "loss": 0.4601, "step": 55150 }, { "epoch": 0.7950074225674877, "grad_norm": 1.2932527564442162, "learning_rate": 1.2265584557840503e-06, "loss": 0.4076, "step": 55160 }, { "epoch": 0.7951515500915209, "grad_norm": 1.7670731643296054, "learning_rate": 1.2249085369780145e-06, "loss": 0.4303, "step": 55170 }, { "epoch": 0.7952956776155542, "grad_norm": 1.5161584132623642, "learning_rate": 1.223259573705184e-06, "loss": 0.4296, "step": 55180 }, { "epoch": 0.7954398051395875, "grad_norm": 1.7017530056793133, "learning_rate": 1.2216115663829393e-06, "loss": 0.4357, "step": 55190 }, { "epoch": 0.7955839326636208, "grad_norm": 1.9625862463543915, "learning_rate": 1.219964515428414e-06, "loss": 0.4321, "step": 55200 }, { "epoch": 0.7957280601876541, "grad_norm": 1.8097025164330975, "learning_rate": 1.218318421258502e-06, "loss": 0.4545, "step": 55210 }, { "epoch": 0.7958721877116873, "grad_norm": 1.5101549331980264, "learning_rate": 1.2166732842898576e-06, "loss": 0.4337, "step": 55220 }, { "epoch": 0.7960163152357206, "grad_norm": 1.5071913766066858, "learning_rate": 1.2150291049388879e-06, "loss": 0.4345, "step": 55230 }, { "epoch": 0.7961604427597538, "grad_norm": 1.5432854195405286, "learning_rate": 1.2133858836217582e-06, "loss": 0.4168, "step": 55240 }, { "epoch": 0.7963045702837871, "grad_norm": 1.8423679273450055, "learning_rate": 1.2117436207543964e-06, "loss": 0.422, "step": 55250 }, { "epoch": 0.7964486978078203, "grad_norm": 1.6644693642435722, "learning_rate": 1.2101023167524823e-06, "loss": 0.4375, "step": 55260 }, { "epoch": 0.7965928253318536, "grad_norm": 1.5907794031311688, "learning_rate": 1.2084619720314534e-06, "loss": 0.4285, "step": 55270 }, { "epoch": 0.7967369528558869, "grad_norm": 1.6088262582890467, "learning_rate": 1.2068225870065091e-06, "loss": 0.4053, "step": 55280 }, { "epoch": 0.7968810803799201, "grad_norm": 1.5198422157761566, "learning_rate": 1.2051841620926013e-06, "loss": 0.4455, "step": 55290 }, { "epoch": 0.7970252079039534, "grad_norm": 1.5864633824975267, "learning_rate": 1.203546697704439e-06, "loss": 0.4341, "step": 55300 }, { "epoch": 0.7971693354279867, "grad_norm": 1.701244020220154, "learning_rate": 1.201910194256491e-06, "loss": 0.4352, "step": 55310 }, { "epoch": 0.79731346295202, "grad_norm": 1.8977587858627447, "learning_rate": 1.2002746521629822e-06, "loss": 0.4317, "step": 55320 }, { "epoch": 0.7974575904760532, "grad_norm": 1.5934222208398612, "learning_rate": 1.198640071837892e-06, "loss": 0.4382, "step": 55330 }, { "epoch": 0.7976017180000865, "grad_norm": 1.8730246708746494, "learning_rate": 1.1970064536949566e-06, "loss": 0.4568, "step": 55340 }, { "epoch": 0.7977458455241198, "grad_norm": 1.5742596430263949, "learning_rate": 1.1953737981476716e-06, "loss": 0.4579, "step": 55350 }, { "epoch": 0.797889973048153, "grad_norm": 1.6474348765730376, "learning_rate": 1.1937421056092863e-06, "loss": 0.443, "step": 55360 }, { "epoch": 0.7980341005721863, "grad_norm": 1.5562478960248443, "learning_rate": 1.1921113764928054e-06, "loss": 0.4299, "step": 55370 }, { "epoch": 0.7981782280962195, "grad_norm": 1.5828663472052442, "learning_rate": 1.190481611210994e-06, "loss": 0.4301, "step": 55380 }, { "epoch": 0.7983223556202528, "grad_norm": 1.9089219472492005, "learning_rate": 1.188852810176369e-06, "loss": 0.4485, "step": 55390 }, { "epoch": 0.798466483144286, "grad_norm": 1.6574877450483776, "learning_rate": 1.1872249738012043e-06, "loss": 0.4247, "step": 55400 }, { "epoch": 0.7986106106683193, "grad_norm": 1.5464355131954564, "learning_rate": 1.1855981024975315e-06, "loss": 0.4484, "step": 55410 }, { "epoch": 0.7987547381923525, "grad_norm": 1.6634130137951713, "learning_rate": 1.1839721966771344e-06, "loss": 0.4375, "step": 55420 }, { "epoch": 0.7988988657163859, "grad_norm": 1.642130662241345, "learning_rate": 1.1823472567515581e-06, "loss": 0.4416, "step": 55430 }, { "epoch": 0.7990429932404192, "grad_norm": 1.4776379223304184, "learning_rate": 1.1807232831320953e-06, "loss": 0.4129, "step": 55440 }, { "epoch": 0.7991871207644524, "grad_norm": 1.7704590776695466, "learning_rate": 1.1791002762298026e-06, "loss": 0.4309, "step": 55450 }, { "epoch": 0.7993312482884857, "grad_norm": 1.750475026533567, "learning_rate": 1.177478236455486e-06, "loss": 0.4159, "step": 55460 }, { "epoch": 0.7994753758125189, "grad_norm": 1.9453953076451693, "learning_rate": 1.1758571642197069e-06, "loss": 0.4755, "step": 55470 }, { "epoch": 0.7996195033365522, "grad_norm": 1.7907289435380955, "learning_rate": 1.1742370599327863e-06, "loss": 0.4342, "step": 55480 }, { "epoch": 0.7997636308605854, "grad_norm": 2.104952074146845, "learning_rate": 1.1726179240047957e-06, "loss": 0.4192, "step": 55490 }, { "epoch": 0.7999077583846187, "grad_norm": 1.6046340614817367, "learning_rate": 1.1709997568455621e-06, "loss": 0.4368, "step": 55500 }, { "epoch": 0.800051885908652, "grad_norm": 1.7472997087540036, "learning_rate": 1.1693825588646712e-06, "loss": 0.4327, "step": 55510 }, { "epoch": 0.8001960134326852, "grad_norm": 1.4173126358777546, "learning_rate": 1.1677663304714587e-06, "loss": 0.4156, "step": 55520 }, { "epoch": 0.8003401409567185, "grad_norm": 1.7214994335559561, "learning_rate": 1.1661510720750153e-06, "loss": 0.4304, "step": 55530 }, { "epoch": 0.8004842684807517, "grad_norm": 1.7421232633684547, "learning_rate": 1.1645367840841888e-06, "loss": 0.4309, "step": 55540 }, { "epoch": 0.8006283960047851, "grad_norm": 1.6632801016334309, "learning_rate": 1.162923466907581e-06, "loss": 0.4561, "step": 55550 }, { "epoch": 0.8007725235288183, "grad_norm": 1.7292662292768912, "learning_rate": 1.1613111209535472e-06, "loss": 0.4424, "step": 55560 }, { "epoch": 0.8009166510528516, "grad_norm": 1.6520036160988454, "learning_rate": 1.159699746630194e-06, "loss": 0.4347, "step": 55570 }, { "epoch": 0.8010607785768848, "grad_norm": 1.8492441558019537, "learning_rate": 1.1580893443453883e-06, "loss": 0.4489, "step": 55580 }, { "epoch": 0.8012049061009181, "grad_norm": 1.570991967729416, "learning_rate": 1.1564799145067458e-06, "loss": 0.4373, "step": 55590 }, { "epoch": 0.8013490336249514, "grad_norm": 1.7835261417337627, "learning_rate": 1.1548714575216364e-06, "loss": 0.4363, "step": 55600 }, { "epoch": 0.8014931611489846, "grad_norm": 1.3951493038446217, "learning_rate": 1.1532639737971873e-06, "loss": 0.44, "step": 55610 }, { "epoch": 0.8016372886730179, "grad_norm": 1.6675417926314027, "learning_rate": 1.1516574637402772e-06, "loss": 0.4363, "step": 55620 }, { "epoch": 0.8017814161970511, "grad_norm": 1.329768128933742, "learning_rate": 1.150051927757535e-06, "loss": 0.4061, "step": 55630 }, { "epoch": 0.8019255437210844, "grad_norm": 2.0295680168572225, "learning_rate": 1.1484473662553497e-06, "loss": 0.4565, "step": 55640 }, { "epoch": 0.8020696712451176, "grad_norm": 1.6506859036327768, "learning_rate": 1.1468437796398602e-06, "loss": 0.4335, "step": 55650 }, { "epoch": 0.802213798769151, "grad_norm": 1.4949474894100234, "learning_rate": 1.145241168316958e-06, "loss": 0.4381, "step": 55660 }, { "epoch": 0.8023579262931843, "grad_norm": 1.843775530961705, "learning_rate": 1.1436395326922866e-06, "loss": 0.4548, "step": 55670 }, { "epoch": 0.8025020538172175, "grad_norm": 1.6015749983861087, "learning_rate": 1.1420388731712473e-06, "loss": 0.4214, "step": 55680 }, { "epoch": 0.8026461813412508, "grad_norm": 1.6816583221960995, "learning_rate": 1.1404391901589906e-06, "loss": 0.4192, "step": 55690 }, { "epoch": 0.802790308865284, "grad_norm": 1.7720142896603064, "learning_rate": 1.138840484060419e-06, "loss": 0.429, "step": 55700 }, { "epoch": 0.8029344363893173, "grad_norm": 1.815288055608563, "learning_rate": 1.1372427552801918e-06, "loss": 0.4045, "step": 55710 }, { "epoch": 0.8030785639133505, "grad_norm": 2.123484153281622, "learning_rate": 1.1356460042227174e-06, "loss": 0.4449, "step": 55720 }, { "epoch": 0.8032226914373838, "grad_norm": 1.7240259364071413, "learning_rate": 1.1340502312921563e-06, "loss": 0.4761, "step": 55730 }, { "epoch": 0.803366818961417, "grad_norm": 1.8402494002153837, "learning_rate": 1.1324554368924262e-06, "loss": 0.4441, "step": 55740 }, { "epoch": 0.8035109464854503, "grad_norm": 1.4390386372990414, "learning_rate": 1.1308616214271901e-06, "loss": 0.4068, "step": 55750 }, { "epoch": 0.8036550740094835, "grad_norm": 1.570234386200347, "learning_rate": 1.1292687852998697e-06, "loss": 0.4304, "step": 55760 }, { "epoch": 0.8037992015335168, "grad_norm": 1.9035225200445947, "learning_rate": 1.1276769289136364e-06, "loss": 0.4387, "step": 55770 }, { "epoch": 0.8039433290575502, "grad_norm": 1.4676103757034122, "learning_rate": 1.1260860526714129e-06, "loss": 0.4198, "step": 55780 }, { "epoch": 0.8040874565815834, "grad_norm": 1.7689097163512992, "learning_rate": 1.1244961569758718e-06, "loss": 0.4328, "step": 55790 }, { "epoch": 0.8042315841056167, "grad_norm": 1.6295733384414732, "learning_rate": 1.1229072422294435e-06, "loss": 0.446, "step": 55800 }, { "epoch": 0.8043757116296499, "grad_norm": 1.545531458481788, "learning_rate": 1.1213193088343045e-06, "loss": 0.4317, "step": 55810 }, { "epoch": 0.8045198391536832, "grad_norm": 1.7341078106261578, "learning_rate": 1.1197323571923858e-06, "loss": 0.4408, "step": 55820 }, { "epoch": 0.8046639666777164, "grad_norm": 1.6526368283610229, "learning_rate": 1.1181463877053673e-06, "loss": 0.421, "step": 55830 }, { "epoch": 0.8048080942017497, "grad_norm": 1.7080031737642922, "learning_rate": 1.1165614007746845e-06, "loss": 0.4118, "step": 55840 }, { "epoch": 0.804952221725783, "grad_norm": 1.538993511594361, "learning_rate": 1.1149773968015205e-06, "loss": 0.4198, "step": 55850 }, { "epoch": 0.8050963492498162, "grad_norm": 1.472597888567655, "learning_rate": 1.1133943761868082e-06, "loss": 0.4099, "step": 55860 }, { "epoch": 0.8052404767738495, "grad_norm": 1.79449269562781, "learning_rate": 1.1118123393312397e-06, "loss": 0.4398, "step": 55870 }, { "epoch": 0.8053846042978827, "grad_norm": 1.6241137808579127, "learning_rate": 1.1102312866352505e-06, "loss": 0.4316, "step": 55880 }, { "epoch": 0.805528731821916, "grad_norm": 1.6293012190393674, "learning_rate": 1.1086512184990266e-06, "loss": 0.4405, "step": 55890 }, { "epoch": 0.8056728593459493, "grad_norm": 1.4928481533715203, "learning_rate": 1.1070721353225112e-06, "loss": 0.4211, "step": 55900 }, { "epoch": 0.8058169868699826, "grad_norm": 1.6886780841868134, "learning_rate": 1.1054940375053925e-06, "loss": 0.4161, "step": 55910 }, { "epoch": 0.8059611143940159, "grad_norm": 1.408969152963364, "learning_rate": 1.1039169254471093e-06, "loss": 0.4195, "step": 55920 }, { "epoch": 0.8061052419180491, "grad_norm": 2.1015701211841735, "learning_rate": 1.102340799546856e-06, "loss": 0.4616, "step": 55930 }, { "epoch": 0.8062493694420824, "grad_norm": 1.6486471437513008, "learning_rate": 1.1007656602035726e-06, "loss": 0.4326, "step": 55940 }, { "epoch": 0.8063934969661156, "grad_norm": 1.882384432362265, "learning_rate": 1.0991915078159498e-06, "loss": 0.4164, "step": 55950 }, { "epoch": 0.8065376244901489, "grad_norm": 1.5311546284689712, "learning_rate": 1.0976183427824316e-06, "loss": 0.4041, "step": 55960 }, { "epoch": 0.8066817520141821, "grad_norm": 1.5537459637251907, "learning_rate": 1.0960461655012083e-06, "loss": 0.4416, "step": 55970 }, { "epoch": 0.8068258795382154, "grad_norm": 1.5502207949283258, "learning_rate": 1.0944749763702238e-06, "loss": 0.4552, "step": 55980 }, { "epoch": 0.8069700070622486, "grad_norm": 1.5978542533976225, "learning_rate": 1.0929047757871674e-06, "loss": 0.4187, "step": 55990 }, { "epoch": 0.8071141345862819, "grad_norm": 1.6538523992717695, "learning_rate": 1.0913355641494845e-06, "loss": 0.4414, "step": 56000 }, { "epoch": 0.8072582621103153, "grad_norm": 1.8058043981306515, "learning_rate": 1.0897673418543635e-06, "loss": 0.4343, "step": 56010 }, { "epoch": 0.8074023896343485, "grad_norm": 1.6757180126921778, "learning_rate": 1.0882001092987454e-06, "loss": 0.4267, "step": 56020 }, { "epoch": 0.8075465171583818, "grad_norm": 5.6806210480350146, "learning_rate": 1.0866338668793224e-06, "loss": 0.433, "step": 56030 }, { "epoch": 0.807690644682415, "grad_norm": 1.5105945426085738, "learning_rate": 1.0850686149925338e-06, "loss": 0.4367, "step": 56040 }, { "epoch": 0.8078347722064483, "grad_norm": 1.590817561504414, "learning_rate": 1.083504354034567e-06, "loss": 0.4488, "step": 56050 }, { "epoch": 0.8079788997304815, "grad_norm": 1.6111697882454832, "learning_rate": 1.0819410844013629e-06, "loss": 0.4355, "step": 56060 }, { "epoch": 0.8081230272545148, "grad_norm": 1.5309608582626988, "learning_rate": 1.0803788064886078e-06, "loss": 0.4274, "step": 56070 }, { "epoch": 0.808267154778548, "grad_norm": 1.7208827527192316, "learning_rate": 1.0788175206917362e-06, "loss": 0.4422, "step": 56080 }, { "epoch": 0.8084112823025813, "grad_norm": 1.483284365657305, "learning_rate": 1.0772572274059357e-06, "loss": 0.4253, "step": 56090 }, { "epoch": 0.8085554098266146, "grad_norm": 1.6329308193518475, "learning_rate": 1.0756979270261402e-06, "loss": 0.3947, "step": 56100 }, { "epoch": 0.8086995373506478, "grad_norm": 1.5882860876518772, "learning_rate": 1.0741396199470322e-06, "loss": 0.4294, "step": 56110 }, { "epoch": 0.8088436648746811, "grad_norm": 1.426567209244864, "learning_rate": 1.0725823065630413e-06, "loss": 0.4396, "step": 56120 }, { "epoch": 0.8089877923987144, "grad_norm": 1.5902617208612395, "learning_rate": 1.07102598726835e-06, "loss": 0.4202, "step": 56130 }, { "epoch": 0.8091319199227477, "grad_norm": 1.2675051385208729, "learning_rate": 1.069470662456885e-06, "loss": 0.4294, "step": 56140 }, { "epoch": 0.8092760474467809, "grad_norm": 1.7069357614301728, "learning_rate": 1.0679163325223213e-06, "loss": 0.4405, "step": 56150 }, { "epoch": 0.8094201749708142, "grad_norm": 1.8332835133491534, "learning_rate": 1.0663629978580863e-06, "loss": 0.4361, "step": 56160 }, { "epoch": 0.8095643024948475, "grad_norm": 1.2995142997193083, "learning_rate": 1.0648106588573514e-06, "loss": 0.4276, "step": 56170 }, { "epoch": 0.8097084300188807, "grad_norm": 1.5523839859939366, "learning_rate": 1.063259315913036e-06, "loss": 0.4232, "step": 56180 }, { "epoch": 0.809852557542914, "grad_norm": 1.6255530674397642, "learning_rate": 1.0617089694178096e-06, "loss": 0.4548, "step": 56190 }, { "epoch": 0.8099966850669472, "grad_norm": 1.913739943088046, "learning_rate": 1.0601596197640906e-06, "loss": 0.442, "step": 56200 }, { "epoch": 0.8101408125909805, "grad_norm": 1.6256201428992272, "learning_rate": 1.0586112673440407e-06, "loss": 0.4388, "step": 56210 }, { "epoch": 0.8102849401150137, "grad_norm": 1.3868696117294934, "learning_rate": 1.0570639125495712e-06, "loss": 0.4426, "step": 56220 }, { "epoch": 0.810429067639047, "grad_norm": 1.5955997429314104, "learning_rate": 1.055517555772343e-06, "loss": 0.4337, "step": 56230 }, { "epoch": 0.8105731951630802, "grad_norm": 1.4166306963154052, "learning_rate": 1.053972197403762e-06, "loss": 0.4361, "step": 56240 }, { "epoch": 0.8107173226871136, "grad_norm": 1.5833272599343486, "learning_rate": 1.0524278378349794e-06, "loss": 0.4354, "step": 56250 }, { "epoch": 0.8108614502111469, "grad_norm": 1.7113622486227131, "learning_rate": 1.0508844774569005e-06, "loss": 0.4429, "step": 56260 }, { "epoch": 0.8110055777351801, "grad_norm": 1.784877233208649, "learning_rate": 1.0493421166601709e-06, "loss": 0.4395, "step": 56270 }, { "epoch": 0.8111497052592134, "grad_norm": 1.6382841629720524, "learning_rate": 1.0478007558351843e-06, "loss": 0.4186, "step": 56280 }, { "epoch": 0.8112938327832466, "grad_norm": 1.5245295946383193, "learning_rate": 1.0462603953720857e-06, "loss": 0.4118, "step": 56290 }, { "epoch": 0.8114379603072799, "grad_norm": 1.8951440706425333, "learning_rate": 1.0447210356607612e-06, "loss": 0.419, "step": 56300 }, { "epoch": 0.8115820878313131, "grad_norm": 1.6753138125985705, "learning_rate": 1.0431826770908487e-06, "loss": 0.4529, "step": 56310 }, { "epoch": 0.8117262153553464, "grad_norm": 1.585622062568285, "learning_rate": 1.0416453200517274e-06, "loss": 0.4202, "step": 56320 }, { "epoch": 0.8118703428793796, "grad_norm": 1.7312870729770002, "learning_rate": 1.0401089649325286e-06, "loss": 0.4334, "step": 56330 }, { "epoch": 0.8120144704034129, "grad_norm": 2.0864328205468583, "learning_rate": 1.0385736121221257e-06, "loss": 0.4345, "step": 56340 }, { "epoch": 0.8121585979274462, "grad_norm": 1.7571494819268683, "learning_rate": 1.0370392620091392e-06, "loss": 0.407, "step": 56350 }, { "epoch": 0.8123027254514795, "grad_norm": 1.5880957922936498, "learning_rate": 1.035505914981938e-06, "loss": 0.4439, "step": 56360 }, { "epoch": 0.8124468529755128, "grad_norm": 1.7268280039252415, "learning_rate": 1.0339735714286352e-06, "loss": 0.4441, "step": 56370 }, { "epoch": 0.812590980499546, "grad_norm": 1.5210214878183461, "learning_rate": 1.0324422317370892e-06, "loss": 0.4362, "step": 56380 }, { "epoch": 0.8127351080235793, "grad_norm": 1.7864730867161276, "learning_rate": 1.0309118962949078e-06, "loss": 0.4358, "step": 56390 }, { "epoch": 0.8128792355476125, "grad_norm": 1.6509046591363297, "learning_rate": 1.0293825654894402e-06, "loss": 0.4572, "step": 56400 }, { "epoch": 0.8130233630716458, "grad_norm": 1.344132219922776, "learning_rate": 1.0278542397077812e-06, "loss": 0.4251, "step": 56410 }, { "epoch": 0.813167490595679, "grad_norm": 1.2859470675318418, "learning_rate": 1.0263269193367792e-06, "loss": 0.4621, "step": 56420 }, { "epoch": 0.8133116181197123, "grad_norm": 1.71069993105173, "learning_rate": 1.0248006047630187e-06, "loss": 0.4628, "step": 56430 }, { "epoch": 0.8134557456437456, "grad_norm": 1.8410744233546896, "learning_rate": 1.0232752963728342e-06, "loss": 0.4158, "step": 56440 }, { "epoch": 0.8135998731677788, "grad_norm": 1.8912944306401898, "learning_rate": 1.021750994552302e-06, "loss": 0.4351, "step": 56450 }, { "epoch": 0.8137440006918121, "grad_norm": 1.8751036579223144, "learning_rate": 1.0202276996872495e-06, "loss": 0.4406, "step": 56460 }, { "epoch": 0.8138881282158453, "grad_norm": 1.5722916585469955, "learning_rate": 1.018705412163245e-06, "loss": 0.4547, "step": 56470 }, { "epoch": 0.8140322557398787, "grad_norm": 3.3067930578002565, "learning_rate": 1.0171841323656002e-06, "loss": 0.457, "step": 56480 }, { "epoch": 0.814176383263912, "grad_norm": 1.467413335143864, "learning_rate": 1.0156638606793779e-06, "loss": 0.4118, "step": 56490 }, { "epoch": 0.8143205107879452, "grad_norm": 1.968277492286416, "learning_rate": 1.01414459748938e-06, "loss": 0.4507, "step": 56500 }, { "epoch": 0.8144646383119785, "grad_norm": 1.961249217855374, "learning_rate": 1.0126263431801535e-06, "loss": 0.4067, "step": 56510 }, { "epoch": 0.8146087658360117, "grad_norm": 1.7237789889734334, "learning_rate": 1.0111090981359961e-06, "loss": 0.4036, "step": 56520 }, { "epoch": 0.814752893360045, "grad_norm": 1.6510707661923958, "learning_rate": 1.0095928627409412e-06, "loss": 0.4314, "step": 56530 }, { "epoch": 0.8148970208840782, "grad_norm": 1.4345853460581275, "learning_rate": 1.0080776373787732e-06, "loss": 0.4547, "step": 56540 }, { "epoch": 0.8150411484081115, "grad_norm": 1.7311546266607967, "learning_rate": 1.00656342243302e-06, "loss": 0.4433, "step": 56550 }, { "epoch": 0.8151852759321447, "grad_norm": 1.8368841615625222, "learning_rate": 1.0050502182869514e-06, "loss": 0.4293, "step": 56560 }, { "epoch": 0.815329403456178, "grad_norm": 2.3055732608068236, "learning_rate": 1.0035380253235805e-06, "loss": 0.43, "step": 56570 }, { "epoch": 0.8154735309802112, "grad_norm": 1.50296921481571, "learning_rate": 1.0020268439256691e-06, "loss": 0.4314, "step": 56580 }, { "epoch": 0.8156176585042445, "grad_norm": 1.5536101731001044, "learning_rate": 1.00051667447572e-06, "loss": 0.4405, "step": 56590 }, { "epoch": 0.8157617860282779, "grad_norm": 1.5992324746017628, "learning_rate": 9.990075173559776e-07, "loss": 0.4378, "step": 56600 }, { "epoch": 0.8159059135523111, "grad_norm": 1.7443819072851687, "learning_rate": 9.974993729484355e-07, "loss": 0.4196, "step": 56610 }, { "epoch": 0.8160500410763444, "grad_norm": 1.7084141305359026, "learning_rate": 9.959922416348273e-07, "loss": 0.4366, "step": 56620 }, { "epoch": 0.8161941686003776, "grad_norm": 1.4524096405182243, "learning_rate": 9.944861237966302e-07, "loss": 0.4164, "step": 56630 }, { "epoch": 0.8163382961244109, "grad_norm": 1.8060327376570535, "learning_rate": 9.92981019815063e-07, "loss": 0.4276, "step": 56640 }, { "epoch": 0.8164824236484441, "grad_norm": 1.6647125581472808, "learning_rate": 9.914769300710963e-07, "loss": 0.4435, "step": 56650 }, { "epoch": 0.8166265511724774, "grad_norm": 1.5172886800426577, "learning_rate": 9.899738549454352e-07, "loss": 0.4221, "step": 56660 }, { "epoch": 0.8167706786965107, "grad_norm": 1.4390282556724885, "learning_rate": 9.884717948185291e-07, "loss": 0.4292, "step": 56670 }, { "epoch": 0.8169148062205439, "grad_norm": 1.7724650427835162, "learning_rate": 9.86970750070576e-07, "loss": 0.4447, "step": 56680 }, { "epoch": 0.8170589337445772, "grad_norm": 1.8274725913666767, "learning_rate": 9.854707210815112e-07, "loss": 0.455, "step": 56690 }, { "epoch": 0.8172030612686104, "grad_norm": 1.7907697346750366, "learning_rate": 9.839717082310135e-07, "loss": 0.4498, "step": 56700 }, { "epoch": 0.8173471887926438, "grad_norm": 1.5758267531493295, "learning_rate": 9.82473711898509e-07, "loss": 0.4358, "step": 56710 }, { "epoch": 0.817491316316677, "grad_norm": 1.582205171151574, "learning_rate": 9.809767324631613e-07, "loss": 0.4296, "step": 56720 }, { "epoch": 0.8176354438407103, "grad_norm": 4.790461006560925, "learning_rate": 9.794807703038788e-07, "loss": 0.4444, "step": 56730 }, { "epoch": 0.8177795713647436, "grad_norm": 1.349754801216206, "learning_rate": 9.779858257993136e-07, "loss": 0.4501, "step": 56740 }, { "epoch": 0.8179236988887768, "grad_norm": 1.731022228203461, "learning_rate": 9.76491899327857e-07, "loss": 0.439, "step": 56750 }, { "epoch": 0.8180678264128101, "grad_norm": 1.6203257198941046, "learning_rate": 9.749989912676472e-07, "loss": 0.4282, "step": 56760 }, { "epoch": 0.8182119539368433, "grad_norm": 1.8159617037322329, "learning_rate": 9.735071019965587e-07, "loss": 0.4415, "step": 56770 }, { "epoch": 0.8183560814608766, "grad_norm": 1.5224779329713507, "learning_rate": 9.72016231892215e-07, "loss": 0.3968, "step": 56780 }, { "epoch": 0.8185002089849098, "grad_norm": 1.6102392573239963, "learning_rate": 9.705263813319759e-07, "loss": 0.401, "step": 56790 }, { "epoch": 0.8186443365089431, "grad_norm": 1.608857862961687, "learning_rate": 9.69037550692945e-07, "loss": 0.4263, "step": 56800 }, { "epoch": 0.8187884640329763, "grad_norm": 1.8350587566203613, "learning_rate": 9.675497403519696e-07, "loss": 0.4386, "step": 56810 }, { "epoch": 0.8189325915570096, "grad_norm": 1.390533812902729, "learning_rate": 9.660629506856361e-07, "loss": 0.4104, "step": 56820 }, { "epoch": 0.819076719081043, "grad_norm": 1.5132148601508173, "learning_rate": 9.645771820702727e-07, "loss": 0.4036, "step": 56830 }, { "epoch": 0.8192208466050762, "grad_norm": 1.5938357420076064, "learning_rate": 9.630924348819525e-07, "loss": 0.4388, "step": 56840 }, { "epoch": 0.8193649741291095, "grad_norm": 1.779327818341265, "learning_rate": 9.616087094964866e-07, "loss": 0.4109, "step": 56850 }, { "epoch": 0.8195091016531427, "grad_norm": 1.612306145678643, "learning_rate": 9.60126006289427e-07, "loss": 0.4079, "step": 56860 }, { "epoch": 0.819653229177176, "grad_norm": 1.8168649360532036, "learning_rate": 9.586443256360694e-07, "loss": 0.4352, "step": 56870 }, { "epoch": 0.8197973567012092, "grad_norm": 1.688496714850703, "learning_rate": 9.571636679114516e-07, "loss": 0.4292, "step": 56880 }, { "epoch": 0.8199414842252425, "grad_norm": 1.4869188300698877, "learning_rate": 9.556840334903495e-07, "loss": 0.4201, "step": 56890 }, { "epoch": 0.8200856117492757, "grad_norm": 1.985833830937119, "learning_rate": 9.54205422747279e-07, "loss": 0.4115, "step": 56900 }, { "epoch": 0.820229739273309, "grad_norm": 1.5088534455697036, "learning_rate": 9.527278360565024e-07, "loss": 0.4453, "step": 56910 }, { "epoch": 0.8203738667973423, "grad_norm": 1.5195253958376407, "learning_rate": 9.512512737920182e-07, "loss": 0.4512, "step": 56920 }, { "epoch": 0.8205179943213755, "grad_norm": 1.6402310766728116, "learning_rate": 9.497757363275644e-07, "loss": 0.4115, "step": 56930 }, { "epoch": 0.8206621218454088, "grad_norm": 1.9396852722826232, "learning_rate": 9.483012240366263e-07, "loss": 0.4528, "step": 56940 }, { "epoch": 0.8208062493694421, "grad_norm": 1.9910378546095275, "learning_rate": 9.468277372924228e-07, "loss": 0.4264, "step": 56950 }, { "epoch": 0.8209503768934754, "grad_norm": 1.857764236274395, "learning_rate": 9.453552764679147e-07, "loss": 0.4353, "step": 56960 }, { "epoch": 0.8210945044175086, "grad_norm": 1.5040205272369433, "learning_rate": 9.438838419358065e-07, "loss": 0.4189, "step": 56970 }, { "epoch": 0.8212386319415419, "grad_norm": 1.8601540558213994, "learning_rate": 9.42413434068541e-07, "loss": 0.4297, "step": 56980 }, { "epoch": 0.8213827594655752, "grad_norm": 1.8663729182480902, "learning_rate": 9.409440532383002e-07, "loss": 0.4164, "step": 56990 }, { "epoch": 0.8215268869896084, "grad_norm": 1.6858132406275932, "learning_rate": 9.394756998170051e-07, "loss": 0.4391, "step": 57000 }, { "epoch": 0.8216710145136417, "grad_norm": 1.6720519852738571, "learning_rate": 9.380083741763208e-07, "loss": 0.4399, "step": 57010 }, { "epoch": 0.8218151420376749, "grad_norm": 1.693020110046579, "learning_rate": 9.365420766876493e-07, "loss": 0.4219, "step": 57020 }, { "epoch": 0.8219592695617082, "grad_norm": 1.6323612065592263, "learning_rate": 9.350768077221306e-07, "loss": 0.431, "step": 57030 }, { "epoch": 0.8221033970857414, "grad_norm": 1.6678926218967496, "learning_rate": 9.336125676506503e-07, "loss": 0.4287, "step": 57040 }, { "epoch": 0.8222475246097747, "grad_norm": 1.6551531776205746, "learning_rate": 9.321493568438273e-07, "loss": 0.4068, "step": 57050 }, { "epoch": 0.822391652133808, "grad_norm": 1.6882728918112928, "learning_rate": 9.306871756720221e-07, "loss": 0.4479, "step": 57060 }, { "epoch": 0.8225357796578413, "grad_norm": 1.6398484078380389, "learning_rate": 9.292260245053375e-07, "loss": 0.4341, "step": 57070 }, { "epoch": 0.8226799071818746, "grad_norm": 1.7071308098802425, "learning_rate": 9.277659037136106e-07, "loss": 0.4463, "step": 57080 }, { "epoch": 0.8228240347059078, "grad_norm": 1.829247354419662, "learning_rate": 9.263068136664227e-07, "loss": 0.4134, "step": 57090 }, { "epoch": 0.8229681622299411, "grad_norm": 1.8529467263607986, "learning_rate": 9.248487547330887e-07, "loss": 0.4199, "step": 57100 }, { "epoch": 0.8231122897539743, "grad_norm": 1.607311248934434, "learning_rate": 9.233917272826693e-07, "loss": 0.4166, "step": 57110 }, { "epoch": 0.8232564172780076, "grad_norm": 1.7440807458466578, "learning_rate": 9.219357316839578e-07, "loss": 0.437, "step": 57120 }, { "epoch": 0.8234005448020408, "grad_norm": 1.5691374516259795, "learning_rate": 9.204807683054884e-07, "loss": 0.4326, "step": 57130 }, { "epoch": 0.8235446723260741, "grad_norm": 1.62050347684357, "learning_rate": 9.19026837515537e-07, "loss": 0.4187, "step": 57140 }, { "epoch": 0.8236887998501073, "grad_norm": 1.6595644376276972, "learning_rate": 9.175739396821138e-07, "loss": 0.432, "step": 57150 }, { "epoch": 0.8238329273741406, "grad_norm": 1.6425260682395122, "learning_rate": 9.161220751729688e-07, "loss": 0.4214, "step": 57160 }, { "epoch": 0.8239770548981739, "grad_norm": 1.5346525382255363, "learning_rate": 9.146712443555933e-07, "loss": 0.4218, "step": 57170 }, { "epoch": 0.8241211824222072, "grad_norm": 1.8242062646329933, "learning_rate": 9.132214475972135e-07, "loss": 0.4306, "step": 57180 }, { "epoch": 0.8242653099462405, "grad_norm": 1.8222200329592115, "learning_rate": 9.117726852647924e-07, "loss": 0.4335, "step": 57190 }, { "epoch": 0.8244094374702737, "grad_norm": 1.6872595712234157, "learning_rate": 9.103249577250389e-07, "loss": 0.4431, "step": 57200 }, { "epoch": 0.824553564994307, "grad_norm": 1.6791938659945378, "learning_rate": 9.088782653443928e-07, "loss": 0.4492, "step": 57210 }, { "epoch": 0.8246976925183402, "grad_norm": 1.3348595993353918, "learning_rate": 9.074326084890323e-07, "loss": 0.4144, "step": 57220 }, { "epoch": 0.8248418200423735, "grad_norm": 1.569034940937884, "learning_rate": 9.059879875248772e-07, "loss": 0.4123, "step": 57230 }, { "epoch": 0.8249859475664068, "grad_norm": 1.6437024698071514, "learning_rate": 9.045444028175832e-07, "loss": 0.4196, "step": 57240 }, { "epoch": 0.82513007509044, "grad_norm": 1.6940555182300556, "learning_rate": 9.031018547325416e-07, "loss": 0.4108, "step": 57250 }, { "epoch": 0.8252742026144733, "grad_norm": 1.6323384150143407, "learning_rate": 9.016603436348853e-07, "loss": 0.4196, "step": 57260 }, { "epoch": 0.8254183301385065, "grad_norm": 1.3731549114254487, "learning_rate": 9.002198698894821e-07, "loss": 0.4152, "step": 57270 }, { "epoch": 0.8255624576625398, "grad_norm": 1.4644919732254178, "learning_rate": 8.98780433860938e-07, "loss": 0.4339, "step": 57280 }, { "epoch": 0.825706585186573, "grad_norm": 1.8775664708900783, "learning_rate": 8.973420359135948e-07, "loss": 0.4524, "step": 57290 }, { "epoch": 0.8258507127106064, "grad_norm": 1.4273074141005926, "learning_rate": 8.959046764115337e-07, "loss": 0.4525, "step": 57300 }, { "epoch": 0.8259948402346396, "grad_norm": 1.6678900768527927, "learning_rate": 8.944683557185746e-07, "loss": 0.447, "step": 57310 }, { "epoch": 0.8261389677586729, "grad_norm": 1.2464318536203223, "learning_rate": 8.930330741982685e-07, "loss": 0.4385, "step": 57320 }, { "epoch": 0.8262830952827062, "grad_norm": 1.7566014859647603, "learning_rate": 8.915988322139102e-07, "loss": 0.4104, "step": 57330 }, { "epoch": 0.8264272228067394, "grad_norm": 1.9164752155999147, "learning_rate": 8.901656301285272e-07, "loss": 0.4273, "step": 57340 }, { "epoch": 0.8265713503307727, "grad_norm": 1.5726124477039576, "learning_rate": 8.887334683048827e-07, "loss": 0.4057, "step": 57350 }, { "epoch": 0.8267154778548059, "grad_norm": 1.649563864280244, "learning_rate": 8.873023471054821e-07, "loss": 0.4527, "step": 57360 }, { "epoch": 0.8268596053788392, "grad_norm": 1.7080207947151536, "learning_rate": 8.858722668925618e-07, "loss": 0.4441, "step": 57370 }, { "epoch": 0.8270037329028724, "grad_norm": 1.6503948575186407, "learning_rate": 8.844432280280968e-07, "loss": 0.448, "step": 57380 }, { "epoch": 0.8271478604269057, "grad_norm": 1.5508639883666382, "learning_rate": 8.830152308738e-07, "loss": 0.4054, "step": 57390 }, { "epoch": 0.827291987950939, "grad_norm": 1.7575809824741364, "learning_rate": 8.815882757911187e-07, "loss": 0.438, "step": 57400 }, { "epoch": 0.8274361154749722, "grad_norm": 1.920242709945805, "learning_rate": 8.80162363141236e-07, "loss": 0.4113, "step": 57410 }, { "epoch": 0.8275802429990056, "grad_norm": 2.8187169022726217, "learning_rate": 8.787374932850723e-07, "loss": 0.4431, "step": 57420 }, { "epoch": 0.8277243705230388, "grad_norm": 1.712165191563382, "learning_rate": 8.77313666583286e-07, "loss": 0.4657, "step": 57430 }, { "epoch": 0.8278684980470721, "grad_norm": 1.5534876782040565, "learning_rate": 8.758908833962682e-07, "loss": 0.4363, "step": 57440 }, { "epoch": 0.8280126255711053, "grad_norm": 1.8573906606793384, "learning_rate": 8.744691440841446e-07, "loss": 0.4285, "step": 57450 }, { "epoch": 0.8281567530951386, "grad_norm": 1.6111021228970945, "learning_rate": 8.730484490067831e-07, "loss": 0.4055, "step": 57460 }, { "epoch": 0.8283008806191718, "grad_norm": 2.91248038413954, "learning_rate": 8.716287985237815e-07, "loss": 0.408, "step": 57470 }, { "epoch": 0.8284450081432051, "grad_norm": 1.8924410454039748, "learning_rate": 8.702101929944729e-07, "loss": 0.43, "step": 57480 }, { "epoch": 0.8285891356672384, "grad_norm": 2.0258709772397316, "learning_rate": 8.687926327779317e-07, "loss": 0.4448, "step": 57490 }, { "epoch": 0.8287332631912716, "grad_norm": 1.7470847597940713, "learning_rate": 8.67376118232962e-07, "loss": 0.4565, "step": 57500 }, { "epoch": 0.8288773907153049, "grad_norm": 1.6435091537113082, "learning_rate": 8.659606497181044e-07, "loss": 0.4431, "step": 57510 }, { "epoch": 0.8290215182393381, "grad_norm": 1.5109250159964769, "learning_rate": 8.645462275916367e-07, "loss": 0.4254, "step": 57520 }, { "epoch": 0.8291656457633715, "grad_norm": 1.5581679776547115, "learning_rate": 8.631328522115717e-07, "loss": 0.4433, "step": 57530 }, { "epoch": 0.8293097732874047, "grad_norm": 1.6684852016505476, "learning_rate": 8.617205239356552e-07, "loss": 0.4673, "step": 57540 }, { "epoch": 0.829453900811438, "grad_norm": 1.5405440945145945, "learning_rate": 8.603092431213678e-07, "loss": 0.4326, "step": 57550 }, { "epoch": 0.8295980283354712, "grad_norm": 1.625436243995845, "learning_rate": 8.588990101259287e-07, "loss": 0.4069, "step": 57560 }, { "epoch": 0.8297421558595045, "grad_norm": 1.843094961114778, "learning_rate": 8.574898253062879e-07, "loss": 0.4374, "step": 57570 }, { "epoch": 0.8298862833835378, "grad_norm": 1.6524674061522164, "learning_rate": 8.560816890191309e-07, "loss": 0.4381, "step": 57580 }, { "epoch": 0.830030410907571, "grad_norm": 1.5845278441638664, "learning_rate": 8.546746016208802e-07, "loss": 0.4388, "step": 57590 }, { "epoch": 0.8301745384316043, "grad_norm": 1.7892169848807677, "learning_rate": 8.532685634676901e-07, "loss": 0.4257, "step": 57600 }, { "epoch": 0.8303186659556375, "grad_norm": 1.386514085263582, "learning_rate": 8.518635749154497e-07, "loss": 0.4244, "step": 57610 }, { "epoch": 0.8304627934796708, "grad_norm": 1.8728322007636529, "learning_rate": 8.504596363197842e-07, "loss": 0.4498, "step": 57620 }, { "epoch": 0.830606921003704, "grad_norm": 2.1585000866472046, "learning_rate": 8.490567480360506e-07, "loss": 0.4258, "step": 57630 }, { "epoch": 0.8307510485277373, "grad_norm": 1.7195017051563495, "learning_rate": 8.476549104193432e-07, "loss": 0.4176, "step": 57640 }, { "epoch": 0.8308951760517707, "grad_norm": 1.7414177941833693, "learning_rate": 8.462541238244859e-07, "loss": 0.4295, "step": 57650 }, { "epoch": 0.8310393035758039, "grad_norm": 1.3285083739733885, "learning_rate": 8.448543886060423e-07, "loss": 0.4066, "step": 57660 }, { "epoch": 0.8311834310998372, "grad_norm": 1.5657772379703911, "learning_rate": 8.434557051183046e-07, "loss": 0.4451, "step": 57670 }, { "epoch": 0.8313275586238704, "grad_norm": 1.835982580954093, "learning_rate": 8.420580737153001e-07, "loss": 0.4388, "step": 57680 }, { "epoch": 0.8314716861479037, "grad_norm": 1.5344158338165883, "learning_rate": 8.40661494750793e-07, "loss": 0.3954, "step": 57690 }, { "epoch": 0.8316158136719369, "grad_norm": 1.5723863480968276, "learning_rate": 8.392659685782778e-07, "loss": 0.4426, "step": 57700 }, { "epoch": 0.8317599411959702, "grad_norm": 1.5922343014002434, "learning_rate": 8.37871495550982e-07, "loss": 0.4189, "step": 57710 }, { "epoch": 0.8319040687200034, "grad_norm": 1.62721792962643, "learning_rate": 8.364780760218704e-07, "loss": 0.4332, "step": 57720 }, { "epoch": 0.8320481962440367, "grad_norm": 1.6053292209683245, "learning_rate": 8.350857103436378e-07, "loss": 0.4112, "step": 57730 }, { "epoch": 0.83219232376807, "grad_norm": 1.890524238802288, "learning_rate": 8.336943988687112e-07, "loss": 0.4344, "step": 57740 }, { "epoch": 0.8323364512921032, "grad_norm": 1.8538129510894368, "learning_rate": 8.32304141949255e-07, "loss": 0.4332, "step": 57750 }, { "epoch": 0.8324805788161365, "grad_norm": 1.5137517051738623, "learning_rate": 8.309149399371647e-07, "loss": 0.3998, "step": 57760 }, { "epoch": 0.8326247063401698, "grad_norm": 2.019544083759248, "learning_rate": 8.295267931840678e-07, "loss": 0.4295, "step": 57770 }, { "epoch": 0.8327688338642031, "grad_norm": 1.6189251722431164, "learning_rate": 8.281397020413245e-07, "loss": 0.4228, "step": 57780 }, { "epoch": 0.8329129613882363, "grad_norm": 1.5855469259309931, "learning_rate": 8.267536668600302e-07, "loss": 0.4184, "step": 57790 }, { "epoch": 0.8330570889122696, "grad_norm": 1.6120998276353566, "learning_rate": 8.253686879910111e-07, "loss": 0.4242, "step": 57800 }, { "epoch": 0.8332012164363028, "grad_norm": 1.6967244676428517, "learning_rate": 8.239847657848249e-07, "loss": 0.4171, "step": 57810 }, { "epoch": 0.8333453439603361, "grad_norm": 1.10013695036445, "learning_rate": 8.226019005917662e-07, "loss": 0.4128, "step": 57820 }, { "epoch": 0.8334894714843694, "grad_norm": 1.7086558989907057, "learning_rate": 8.212200927618574e-07, "loss": 0.4602, "step": 57830 }, { "epoch": 0.8336335990084026, "grad_norm": 1.4679814216909783, "learning_rate": 8.198393426448547e-07, "loss": 0.4261, "step": 57840 }, { "epoch": 0.8337777265324359, "grad_norm": 2.3819657304384134, "learning_rate": 8.184596505902492e-07, "loss": 0.4401, "step": 57850 }, { "epoch": 0.8339218540564691, "grad_norm": 1.7667499155885436, "learning_rate": 8.170810169472593e-07, "loss": 0.4457, "step": 57860 }, { "epoch": 0.8340659815805024, "grad_norm": 1.742261101233003, "learning_rate": 8.157034420648391e-07, "loss": 0.4502, "step": 57870 }, { "epoch": 0.8342101091045357, "grad_norm": 1.6161133407270174, "learning_rate": 8.143269262916758e-07, "loss": 0.4513, "step": 57880 }, { "epoch": 0.834354236628569, "grad_norm": 1.936178895332988, "learning_rate": 8.129514699761848e-07, "loss": 0.4455, "step": 57890 }, { "epoch": 0.8344983641526023, "grad_norm": 1.47452086577939, "learning_rate": 8.115770734665157e-07, "loss": 0.4203, "step": 57900 }, { "epoch": 0.8346424916766355, "grad_norm": 1.75053734103711, "learning_rate": 8.10203737110547e-07, "loss": 0.4326, "step": 57910 }, { "epoch": 0.8347866192006688, "grad_norm": 1.826965205005753, "learning_rate": 8.088314612558946e-07, "loss": 0.4225, "step": 57920 }, { "epoch": 0.834930746724702, "grad_norm": 1.729239684120623, "learning_rate": 8.074602462499003e-07, "loss": 0.4443, "step": 57930 }, { "epoch": 0.8350748742487353, "grad_norm": 1.727156574563864, "learning_rate": 8.060900924396386e-07, "loss": 0.4232, "step": 57940 }, { "epoch": 0.8352190017727685, "grad_norm": 1.8027806487851608, "learning_rate": 8.047210001719185e-07, "loss": 0.4191, "step": 57950 }, { "epoch": 0.8353631292968018, "grad_norm": 1.9192580814334914, "learning_rate": 8.033529697932779e-07, "loss": 0.4438, "step": 57960 }, { "epoch": 0.835507256820835, "grad_norm": 1.2930799854243304, "learning_rate": 8.019860016499825e-07, "loss": 0.4283, "step": 57970 }, { "epoch": 0.8356513843448683, "grad_norm": 1.6115092230064962, "learning_rate": 8.00620096088039e-07, "loss": 0.4264, "step": 57980 }, { "epoch": 0.8357955118689016, "grad_norm": 1.901091269413093, "learning_rate": 7.99255253453175e-07, "loss": 0.4135, "step": 57990 }, { "epoch": 0.8359396393929349, "grad_norm": 1.8557059359054784, "learning_rate": 7.978914740908522e-07, "loss": 0.436, "step": 58000 }, { "epoch": 0.8360837669169682, "grad_norm": 2.215640395072219, "learning_rate": 7.965287583462672e-07, "loss": 0.4295, "step": 58010 }, { "epoch": 0.8362278944410014, "grad_norm": 2.100629018286657, "learning_rate": 7.951671065643424e-07, "loss": 0.4303, "step": 58020 }, { "epoch": 0.8363720219650347, "grad_norm": 1.59747013482262, "learning_rate": 7.938065190897309e-07, "loss": 0.3931, "step": 58030 }, { "epoch": 0.8365161494890679, "grad_norm": 2.198894841486332, "learning_rate": 7.924469962668213e-07, "loss": 0.4512, "step": 58040 }, { "epoch": 0.8366602770131012, "grad_norm": 1.7706731711069175, "learning_rate": 7.910885384397277e-07, "loss": 0.4521, "step": 58050 }, { "epoch": 0.8368044045371344, "grad_norm": 1.5876794892089583, "learning_rate": 7.897311459522961e-07, "loss": 0.4469, "step": 58060 }, { "epoch": 0.8369485320611677, "grad_norm": 1.6698920661481662, "learning_rate": 7.88374819148105e-07, "loss": 0.4071, "step": 58070 }, { "epoch": 0.837092659585201, "grad_norm": 2.2426415471271097, "learning_rate": 7.870195583704593e-07, "loss": 0.4261, "step": 58080 }, { "epoch": 0.8372367871092342, "grad_norm": 1.9683090388063804, "learning_rate": 7.856653639623979e-07, "loss": 0.4478, "step": 58090 }, { "epoch": 0.8373809146332675, "grad_norm": 1.7276797665310126, "learning_rate": 7.843122362666866e-07, "loss": 0.442, "step": 58100 }, { "epoch": 0.8375250421573007, "grad_norm": 1.7807365449818529, "learning_rate": 7.829601756258248e-07, "loss": 0.4221, "step": 58110 }, { "epoch": 0.8376691696813341, "grad_norm": 1.7671973651453994, "learning_rate": 7.816091823820382e-07, "loss": 0.4364, "step": 58120 }, { "epoch": 0.8378132972053673, "grad_norm": 1.6573975976567301, "learning_rate": 7.802592568772827e-07, "loss": 0.4578, "step": 58130 }, { "epoch": 0.8379574247294006, "grad_norm": 1.7011825498119022, "learning_rate": 7.789103994532471e-07, "loss": 0.4271, "step": 58140 }, { "epoch": 0.8381015522534339, "grad_norm": 1.8088493628817472, "learning_rate": 7.775626104513473e-07, "loss": 0.4388, "step": 58150 }, { "epoch": 0.8382456797774671, "grad_norm": 1.722931355638949, "learning_rate": 7.762158902127276e-07, "loss": 0.4225, "step": 58160 }, { "epoch": 0.8383898073015004, "grad_norm": 1.3151332984705015, "learning_rate": 7.748702390782653e-07, "loss": 0.4056, "step": 58170 }, { "epoch": 0.8385339348255336, "grad_norm": 1.743675554724438, "learning_rate": 7.735256573885652e-07, "loss": 0.4265, "step": 58180 }, { "epoch": 0.8386780623495669, "grad_norm": 1.6835832125049954, "learning_rate": 7.721821454839595e-07, "loss": 0.4151, "step": 58190 }, { "epoch": 0.8388221898736001, "grad_norm": 1.7524920492022298, "learning_rate": 7.708397037045129e-07, "loss": 0.434, "step": 58200 }, { "epoch": 0.8389663173976334, "grad_norm": 1.590043856927459, "learning_rate": 7.694983323900191e-07, "loss": 0.4106, "step": 58210 }, { "epoch": 0.8391104449216666, "grad_norm": 1.576333472800532, "learning_rate": 7.68158031879998e-07, "loss": 0.4469, "step": 58220 }, { "epoch": 0.8392545724457, "grad_norm": 1.7023652783439718, "learning_rate": 7.668188025136997e-07, "loss": 0.4342, "step": 58230 }, { "epoch": 0.8393986999697333, "grad_norm": 1.6817295140821342, "learning_rate": 7.65480644630105e-07, "loss": 0.4279, "step": 58240 }, { "epoch": 0.8395428274937665, "grad_norm": 1.538580603132655, "learning_rate": 7.641435585679219e-07, "loss": 0.4172, "step": 58250 }, { "epoch": 0.8396869550177998, "grad_norm": 1.4351474611877555, "learning_rate": 7.628075446655852e-07, "loss": 0.4194, "step": 58260 }, { "epoch": 0.839831082541833, "grad_norm": 1.5094906406024444, "learning_rate": 7.614726032612635e-07, "loss": 0.4171, "step": 58270 }, { "epoch": 0.8399752100658663, "grad_norm": 1.5371118057570008, "learning_rate": 7.601387346928491e-07, "loss": 0.4261, "step": 58280 }, { "epoch": 0.8401193375898995, "grad_norm": 1.527954135756852, "learning_rate": 7.588059392979635e-07, "loss": 0.4389, "step": 58290 }, { "epoch": 0.8402634651139328, "grad_norm": 1.8217787507045629, "learning_rate": 7.574742174139588e-07, "loss": 0.4339, "step": 58300 }, { "epoch": 0.840407592637966, "grad_norm": 1.9409947689995257, "learning_rate": 7.561435693779151e-07, "loss": 0.4179, "step": 58310 }, { "epoch": 0.8405517201619993, "grad_norm": 1.5295284478617452, "learning_rate": 7.548139955266387e-07, "loss": 0.4059, "step": 58320 }, { "epoch": 0.8406958476860326, "grad_norm": 1.5961734240740455, "learning_rate": 7.534854961966637e-07, "loss": 0.4425, "step": 58330 }, { "epoch": 0.8408399752100658, "grad_norm": 1.6849771774399123, "learning_rate": 7.521580717242554e-07, "loss": 0.4719, "step": 58340 }, { "epoch": 0.8409841027340992, "grad_norm": 1.6702556980268293, "learning_rate": 7.508317224454053e-07, "loss": 0.4175, "step": 58350 }, { "epoch": 0.8411282302581324, "grad_norm": 1.6581307347629894, "learning_rate": 7.495064486958303e-07, "loss": 0.4115, "step": 58360 }, { "epoch": 0.8412723577821657, "grad_norm": 1.472580786795677, "learning_rate": 7.481822508109803e-07, "loss": 0.4189, "step": 58370 }, { "epoch": 0.841416485306199, "grad_norm": 2.050354892345516, "learning_rate": 7.468591291260285e-07, "loss": 0.4296, "step": 58380 }, { "epoch": 0.8415606128302322, "grad_norm": 1.5235092623323887, "learning_rate": 7.455370839758758e-07, "loss": 0.4308, "step": 58390 }, { "epoch": 0.8417047403542655, "grad_norm": 1.695308340242877, "learning_rate": 7.44216115695155e-07, "loss": 0.4503, "step": 58400 }, { "epoch": 0.8418488678782987, "grad_norm": 1.8088183759183638, "learning_rate": 7.428962246182208e-07, "loss": 0.3997, "step": 58410 }, { "epoch": 0.841992995402332, "grad_norm": 1.80761531484309, "learning_rate": 7.415774110791596e-07, "loss": 0.4252, "step": 58420 }, { "epoch": 0.8421371229263652, "grad_norm": 1.4863976240208683, "learning_rate": 7.402596754117813e-07, "loss": 0.4153, "step": 58430 }, { "epoch": 0.8422812504503985, "grad_norm": 1.6217805734609387, "learning_rate": 7.389430179496271e-07, "loss": 0.4203, "step": 58440 }, { "epoch": 0.8424253779744317, "grad_norm": 1.7519855904478225, "learning_rate": 7.376274390259619e-07, "loss": 0.4337, "step": 58450 }, { "epoch": 0.842569505498465, "grad_norm": 1.3820729790301929, "learning_rate": 7.363129389737777e-07, "loss": 0.4622, "step": 58460 }, { "epoch": 0.8427136330224984, "grad_norm": 1.8166845485530578, "learning_rate": 7.349995181257969e-07, "loss": 0.4453, "step": 58470 }, { "epoch": 0.8428577605465316, "grad_norm": 1.6473129239091122, "learning_rate": 7.336871768144649e-07, "loss": 0.434, "step": 58480 }, { "epoch": 0.8430018880705649, "grad_norm": 1.8106876562756031, "learning_rate": 7.323759153719546e-07, "loss": 0.4323, "step": 58490 }, { "epoch": 0.8431460155945981, "grad_norm": 1.916782999541291, "learning_rate": 7.310657341301674e-07, "loss": 0.4496, "step": 58500 }, { "epoch": 0.8432901431186314, "grad_norm": 1.4243113019254887, "learning_rate": 7.297566334207307e-07, "loss": 0.4248, "step": 58510 }, { "epoch": 0.8434342706426646, "grad_norm": 1.5167506055630662, "learning_rate": 7.284486135749946e-07, "loss": 0.4211, "step": 58520 }, { "epoch": 0.8435783981666979, "grad_norm": 1.3410987624790294, "learning_rate": 7.271416749240435e-07, "loss": 0.4165, "step": 58530 }, { "epoch": 0.8437225256907311, "grad_norm": 1.5263373748741236, "learning_rate": 7.258358177986812e-07, "loss": 0.4116, "step": 58540 }, { "epoch": 0.8438666532147644, "grad_norm": 1.5861571702528723, "learning_rate": 7.245310425294399e-07, "loss": 0.4178, "step": 58550 }, { "epoch": 0.8440107807387977, "grad_norm": 1.4813904094960797, "learning_rate": 7.232273494465775e-07, "loss": 0.407, "step": 58560 }, { "epoch": 0.8441549082628309, "grad_norm": 1.8982060491678276, "learning_rate": 7.219247388800804e-07, "loss": 0.4312, "step": 58570 }, { "epoch": 0.8442990357868643, "grad_norm": 1.5517156339137248, "learning_rate": 7.206232111596584e-07, "loss": 0.4322, "step": 58580 }, { "epoch": 0.8444431633108975, "grad_norm": 1.6440421145726185, "learning_rate": 7.193227666147468e-07, "loss": 0.4461, "step": 58590 }, { "epoch": 0.8445872908349308, "grad_norm": 1.6209819415091675, "learning_rate": 7.1802340557451e-07, "loss": 0.4349, "step": 58600 }, { "epoch": 0.844731418358964, "grad_norm": 1.5386294075383122, "learning_rate": 7.16725128367835e-07, "loss": 0.4163, "step": 58610 }, { "epoch": 0.8448755458829973, "grad_norm": 1.8674573417072167, "learning_rate": 7.154279353233346e-07, "loss": 0.4349, "step": 58620 }, { "epoch": 0.8450196734070305, "grad_norm": 1.6303046215417112, "learning_rate": 7.141318267693487e-07, "loss": 0.4268, "step": 58630 }, { "epoch": 0.8451638009310638, "grad_norm": 1.933328527185691, "learning_rate": 7.128368030339444e-07, "loss": 0.4474, "step": 58640 }, { "epoch": 0.8453079284550971, "grad_norm": 2.5227227246407415, "learning_rate": 7.115428644449085e-07, "loss": 0.4249, "step": 58650 }, { "epoch": 0.8454520559791303, "grad_norm": 1.6467112399810986, "learning_rate": 7.102500113297595e-07, "loss": 0.4105, "step": 58660 }, { "epoch": 0.8455961835031636, "grad_norm": 1.4516623211326574, "learning_rate": 7.089582440157372e-07, "loss": 0.4426, "step": 58670 }, { "epoch": 0.8457403110271968, "grad_norm": 1.6076133310106249, "learning_rate": 7.076675628298063e-07, "loss": 0.4697, "step": 58680 }, { "epoch": 0.8458844385512301, "grad_norm": 1.6122115025475965, "learning_rate": 7.063779680986599e-07, "loss": 0.4474, "step": 58690 }, { "epoch": 0.8460285660752634, "grad_norm": 1.5735317079974587, "learning_rate": 7.05089460148713e-07, "loss": 0.435, "step": 58700 }, { "epoch": 0.8461726935992967, "grad_norm": 1.3959771923094306, "learning_rate": 7.038020393061057e-07, "loss": 0.4448, "step": 58710 }, { "epoch": 0.84631682112333, "grad_norm": 1.6556626822943574, "learning_rate": 7.025157058967064e-07, "loss": 0.4073, "step": 58720 }, { "epoch": 0.8464609486473632, "grad_norm": 1.6725391326428791, "learning_rate": 7.012304602461034e-07, "loss": 0.4489, "step": 58730 }, { "epoch": 0.8466050761713965, "grad_norm": 1.695970335974282, "learning_rate": 6.999463026796121e-07, "loss": 0.4611, "step": 58740 }, { "epoch": 0.8467492036954297, "grad_norm": 1.6831777967243007, "learning_rate": 6.986632335222731e-07, "loss": 0.4452, "step": 58750 }, { "epoch": 0.846893331219463, "grad_norm": 1.7598937095291647, "learning_rate": 6.973812530988522e-07, "loss": 0.4016, "step": 58760 }, { "epoch": 0.8470374587434962, "grad_norm": 2.253047766297032, "learning_rate": 6.961003617338363e-07, "loss": 0.4148, "step": 58770 }, { "epoch": 0.8471815862675295, "grad_norm": 1.748382570436172, "learning_rate": 6.948205597514374e-07, "loss": 0.4428, "step": 58780 }, { "epoch": 0.8473257137915627, "grad_norm": 1.8602452612555083, "learning_rate": 6.935418474755962e-07, "loss": 0.46, "step": 58790 }, { "epoch": 0.847469841315596, "grad_norm": 1.7259168121189585, "learning_rate": 6.922642252299722e-07, "loss": 0.4213, "step": 58800 }, { "epoch": 0.8476139688396293, "grad_norm": 1.8213574606206857, "learning_rate": 6.9098769333795e-07, "loss": 0.4094, "step": 58810 }, { "epoch": 0.8477580963636626, "grad_norm": 1.5383347744864284, "learning_rate": 6.897122521226423e-07, "loss": 0.4336, "step": 58820 }, { "epoch": 0.8479022238876959, "grad_norm": 1.6647998300510864, "learning_rate": 6.88437901906881e-07, "loss": 0.4222, "step": 58830 }, { "epoch": 0.8480463514117291, "grad_norm": 1.6294433096704677, "learning_rate": 6.871646430132223e-07, "loss": 0.4111, "step": 58840 }, { "epoch": 0.8481904789357624, "grad_norm": 1.6794005987380063, "learning_rate": 6.858924757639484e-07, "loss": 0.4183, "step": 58850 }, { "epoch": 0.8483346064597956, "grad_norm": 1.2902419749847267, "learning_rate": 6.846214004810664e-07, "loss": 0.4291, "step": 58860 }, { "epoch": 0.8484787339838289, "grad_norm": 1.5345176196010586, "learning_rate": 6.833514174863026e-07, "loss": 0.4116, "step": 58870 }, { "epoch": 0.8486228615078621, "grad_norm": 1.545271449289884, "learning_rate": 6.820825271011083e-07, "loss": 0.4141, "step": 58880 }, { "epoch": 0.8487669890318954, "grad_norm": 1.5839359416392387, "learning_rate": 6.808147296466617e-07, "loss": 0.4456, "step": 58890 }, { "epoch": 0.8489111165559287, "grad_norm": 1.7273049363198159, "learning_rate": 6.795480254438597e-07, "loss": 0.4186, "step": 58900 }, { "epoch": 0.8490552440799619, "grad_norm": 1.7176982526806484, "learning_rate": 6.782824148133238e-07, "loss": 0.4367, "step": 58910 }, { "epoch": 0.8491993716039952, "grad_norm": 1.4833527748514235, "learning_rate": 6.770178980754022e-07, "loss": 0.4488, "step": 58920 }, { "epoch": 0.8493434991280285, "grad_norm": 1.6661321303491265, "learning_rate": 6.75754475550161e-07, "loss": 0.4225, "step": 58930 }, { "epoch": 0.8494876266520618, "grad_norm": 1.4237930474633742, "learning_rate": 6.744921475573913e-07, "loss": 0.4289, "step": 58940 }, { "epoch": 0.849631754176095, "grad_norm": 1.8960511526673312, "learning_rate": 6.7323091441661e-07, "loss": 0.446, "step": 58950 }, { "epoch": 0.8497758817001283, "grad_norm": 1.631181428978591, "learning_rate": 6.719707764470518e-07, "loss": 0.4164, "step": 58960 }, { "epoch": 0.8499200092241616, "grad_norm": 1.7043766548354087, "learning_rate": 6.707117339676789e-07, "loss": 0.4576, "step": 58970 }, { "epoch": 0.8500641367481948, "grad_norm": 1.9380865073812485, "learning_rate": 6.694537872971724e-07, "loss": 0.427, "step": 58980 }, { "epoch": 0.8502082642722281, "grad_norm": 1.6062605605122378, "learning_rate": 6.681969367539398e-07, "loss": 0.4357, "step": 58990 }, { "epoch": 0.8503523917962613, "grad_norm": 1.640560399141422, "learning_rate": 6.669411826561078e-07, "loss": 0.4472, "step": 59000 }, { "epoch": 0.8504965193202946, "grad_norm": 1.3408144921549583, "learning_rate": 6.656865253215262e-07, "loss": 0.4434, "step": 59010 }, { "epoch": 0.8506406468443278, "grad_norm": 1.5006205881120547, "learning_rate": 6.6443296506777e-07, "loss": 0.4364, "step": 59020 }, { "epoch": 0.8507847743683611, "grad_norm": 1.4614435312084826, "learning_rate": 6.631805022121329e-07, "loss": 0.439, "step": 59030 }, { "epoch": 0.8509289018923943, "grad_norm": 1.6732557105357273, "learning_rate": 6.619291370716319e-07, "loss": 0.4131, "step": 59040 }, { "epoch": 0.8510730294164277, "grad_norm": 1.706542612608019, "learning_rate": 6.606788699630085e-07, "loss": 0.4318, "step": 59050 }, { "epoch": 0.851217156940461, "grad_norm": 1.7080131600539485, "learning_rate": 6.594297012027234e-07, "loss": 0.4371, "step": 59060 }, { "epoch": 0.8513612844644942, "grad_norm": 1.6519151443530256, "learning_rate": 6.581816311069588e-07, "loss": 0.4156, "step": 59070 }, { "epoch": 0.8515054119885275, "grad_norm": 1.6911280333148289, "learning_rate": 6.569346599916215e-07, "loss": 0.4228, "step": 59080 }, { "epoch": 0.8516495395125607, "grad_norm": 1.9576349320900877, "learning_rate": 6.556887881723401e-07, "loss": 0.4289, "step": 59090 }, { "epoch": 0.851793667036594, "grad_norm": 1.6450034542868888, "learning_rate": 6.544440159644628e-07, "loss": 0.4154, "step": 59100 }, { "epoch": 0.8519377945606272, "grad_norm": 1.8098337713109331, "learning_rate": 6.532003436830586e-07, "loss": 0.4269, "step": 59110 }, { "epoch": 0.8520819220846605, "grad_norm": 1.6991263898608193, "learning_rate": 6.519577716429232e-07, "loss": 0.4381, "step": 59120 }, { "epoch": 0.8522260496086937, "grad_norm": 1.8444418595415772, "learning_rate": 6.507163001585687e-07, "loss": 0.4358, "step": 59130 }, { "epoch": 0.852370177132727, "grad_norm": 2.6498533929864796, "learning_rate": 6.494759295442299e-07, "loss": 0.4431, "step": 59140 }, { "epoch": 0.8525143046567603, "grad_norm": 1.7574614402146447, "learning_rate": 6.482366601138652e-07, "loss": 0.4513, "step": 59150 }, { "epoch": 0.8526584321807935, "grad_norm": 2.2256578489332064, "learning_rate": 6.469984921811512e-07, "loss": 0.4196, "step": 59160 }, { "epoch": 0.8528025597048269, "grad_norm": 1.7133486232119235, "learning_rate": 6.457614260594869e-07, "loss": 0.4364, "step": 59170 }, { "epoch": 0.8529466872288601, "grad_norm": 1.7012131378882984, "learning_rate": 6.445254620619934e-07, "loss": 0.4558, "step": 59180 }, { "epoch": 0.8530908147528934, "grad_norm": 1.4672880016764185, "learning_rate": 6.432906005015127e-07, "loss": 0.4232, "step": 59190 }, { "epoch": 0.8532349422769266, "grad_norm": 1.6156452155373502, "learning_rate": 6.420568416906059e-07, "loss": 0.4391, "step": 59200 }, { "epoch": 0.8533790698009599, "grad_norm": 1.4313388939541984, "learning_rate": 6.408241859415559e-07, "loss": 0.4314, "step": 59210 }, { "epoch": 0.8535231973249932, "grad_norm": 1.8780315326316763, "learning_rate": 6.395926335663682e-07, "loss": 0.4349, "step": 59220 }, { "epoch": 0.8536673248490264, "grad_norm": 1.8010966873819738, "learning_rate": 6.383621848767668e-07, "loss": 0.4192, "step": 59230 }, { "epoch": 0.8538114523730597, "grad_norm": 1.870351567868265, "learning_rate": 6.371328401841958e-07, "loss": 0.4495, "step": 59240 }, { "epoch": 0.8539555798970929, "grad_norm": 1.5820749638318752, "learning_rate": 6.359045997998231e-07, "loss": 0.4392, "step": 59250 }, { "epoch": 0.8540997074211262, "grad_norm": 1.6599728160880853, "learning_rate": 6.346774640345338e-07, "loss": 0.4432, "step": 59260 }, { "epoch": 0.8542438349451594, "grad_norm": 1.675009667879129, "learning_rate": 6.334514331989339e-07, "loss": 0.4108, "step": 59270 }, { "epoch": 0.8543879624691928, "grad_norm": 1.841948424747236, "learning_rate": 6.322265076033523e-07, "loss": 0.431, "step": 59280 }, { "epoch": 0.854532089993226, "grad_norm": 1.361952286224052, "learning_rate": 6.310026875578362e-07, "loss": 0.4304, "step": 59290 }, { "epoch": 0.8546762175172593, "grad_norm": 1.5894367068659536, "learning_rate": 6.297799733721499e-07, "loss": 0.462, "step": 59300 }, { "epoch": 0.8548203450412926, "grad_norm": 1.4383240626794176, "learning_rate": 6.285583653557859e-07, "loss": 0.4218, "step": 59310 }, { "epoch": 0.8549644725653258, "grad_norm": 1.6679836787481044, "learning_rate": 6.273378638179495e-07, "loss": 0.4386, "step": 59320 }, { "epoch": 0.8551086000893591, "grad_norm": 2.5340582054348615, "learning_rate": 6.26118469067567e-07, "loss": 0.4429, "step": 59330 }, { "epoch": 0.8552527276133923, "grad_norm": 1.6153912967525756, "learning_rate": 6.249001814132882e-07, "loss": 0.4167, "step": 59340 }, { "epoch": 0.8553968551374256, "grad_norm": 1.7734207631882846, "learning_rate": 6.236830011634793e-07, "loss": 0.4502, "step": 59350 }, { "epoch": 0.8555409826614588, "grad_norm": 1.7154513547718486, "learning_rate": 6.224669286262258e-07, "loss": 0.4316, "step": 59360 }, { "epoch": 0.8556851101854921, "grad_norm": 2.3079841393412366, "learning_rate": 6.212519641093367e-07, "loss": 0.4376, "step": 59370 }, { "epoch": 0.8558292377095253, "grad_norm": 1.8264637779601212, "learning_rate": 6.200381079203366e-07, "loss": 0.4393, "step": 59380 }, { "epoch": 0.8559733652335586, "grad_norm": 1.4833754383663744, "learning_rate": 6.188253603664707e-07, "loss": 0.4265, "step": 59390 }, { "epoch": 0.856117492757592, "grad_norm": 1.6200349423457776, "learning_rate": 6.176137217547041e-07, "loss": 0.4464, "step": 59400 }, { "epoch": 0.8562616202816252, "grad_norm": 1.8066874291299317, "learning_rate": 6.164031923917207e-07, "loss": 0.4216, "step": 59410 }, { "epoch": 0.8564057478056585, "grad_norm": 1.7737462607999872, "learning_rate": 6.151937725839253e-07, "loss": 0.4428, "step": 59420 }, { "epoch": 0.8565498753296917, "grad_norm": 1.5984687325655502, "learning_rate": 6.139854626374392e-07, "loss": 0.3955, "step": 59430 }, { "epoch": 0.856694002853725, "grad_norm": 1.6716684087877898, "learning_rate": 6.127782628581053e-07, "loss": 0.4352, "step": 59440 }, { "epoch": 0.8568381303777582, "grad_norm": 1.6827786132995552, "learning_rate": 6.11572173551484e-07, "loss": 0.4185, "step": 59450 }, { "epoch": 0.8569822579017915, "grad_norm": 1.8309767386914952, "learning_rate": 6.103671950228529e-07, "loss": 0.4014, "step": 59460 }, { "epoch": 0.8571263854258248, "grad_norm": 1.6056192513595209, "learning_rate": 6.09163327577213e-07, "loss": 0.4274, "step": 59470 }, { "epoch": 0.857270512949858, "grad_norm": 1.6772192412689255, "learning_rate": 6.079605715192805e-07, "loss": 0.4209, "step": 59480 }, { "epoch": 0.8574146404738913, "grad_norm": 1.6219692732069475, "learning_rate": 6.067589271534901e-07, "loss": 0.4437, "step": 59490 }, { "epoch": 0.8575587679979245, "grad_norm": 1.693842394325585, "learning_rate": 6.055583947839988e-07, "loss": 0.4523, "step": 59500 }, { "epoch": 0.8577028955219578, "grad_norm": 1.6215107684043788, "learning_rate": 6.043589747146783e-07, "loss": 0.433, "step": 59510 }, { "epoch": 0.8578470230459911, "grad_norm": 1.454443169545881, "learning_rate": 6.031606672491191e-07, "loss": 0.4301, "step": 59520 }, { "epoch": 0.8579911505700244, "grad_norm": 1.681940284550722, "learning_rate": 6.019634726906321e-07, "loss": 0.4171, "step": 59530 }, { "epoch": 0.8581352780940577, "grad_norm": 1.6805970185281143, "learning_rate": 6.007673913422462e-07, "loss": 0.4396, "step": 59540 }, { "epoch": 0.8582794056180909, "grad_norm": 1.6493285380006257, "learning_rate": 5.99572423506708e-07, "loss": 0.4298, "step": 59550 }, { "epoch": 0.8584235331421242, "grad_norm": 1.5891813330942757, "learning_rate": 5.983785694864791e-07, "loss": 0.4404, "step": 59560 }, { "epoch": 0.8585676606661574, "grad_norm": 1.9017035848187618, "learning_rate": 5.971858295837462e-07, "loss": 0.4433, "step": 59570 }, { "epoch": 0.8587117881901907, "grad_norm": 1.619123405707459, "learning_rate": 5.959942041004074e-07, "loss": 0.4127, "step": 59580 }, { "epoch": 0.8588559157142239, "grad_norm": 1.6889171231958955, "learning_rate": 5.948036933380813e-07, "loss": 0.4414, "step": 59590 }, { "epoch": 0.8590000432382572, "grad_norm": 1.7830090900682845, "learning_rate": 5.936142975981057e-07, "loss": 0.4461, "step": 59600 }, { "epoch": 0.8591441707622904, "grad_norm": 1.7095429991589686, "learning_rate": 5.924260171815338e-07, "loss": 0.4358, "step": 59610 }, { "epoch": 0.8592882982863237, "grad_norm": 1.8586723258989912, "learning_rate": 5.912388523891372e-07, "loss": 0.4209, "step": 59620 }, { "epoch": 0.8594324258103571, "grad_norm": 1.7805828628255909, "learning_rate": 5.900528035214054e-07, "loss": 0.4363, "step": 59630 }, { "epoch": 0.8595765533343903, "grad_norm": 1.3715431403386549, "learning_rate": 5.888678708785472e-07, "loss": 0.4296, "step": 59640 }, { "epoch": 0.8597206808584236, "grad_norm": 1.9331969972059646, "learning_rate": 5.876840547604862e-07, "loss": 0.4252, "step": 59650 }, { "epoch": 0.8598648083824568, "grad_norm": 1.6677298754472527, "learning_rate": 5.86501355466863e-07, "loss": 0.4073, "step": 59660 }, { "epoch": 0.8600089359064901, "grad_norm": 1.5055830957313137, "learning_rate": 5.853197732970383e-07, "loss": 0.4315, "step": 59670 }, { "epoch": 0.8601530634305233, "grad_norm": 1.635197779360385, "learning_rate": 5.841393085500885e-07, "loss": 0.4363, "step": 59680 }, { "epoch": 0.8602971909545566, "grad_norm": 1.5366926974423936, "learning_rate": 5.829599615248055e-07, "loss": 0.4505, "step": 59690 }, { "epoch": 0.8604413184785898, "grad_norm": 1.819786892543634, "learning_rate": 5.817817325197028e-07, "loss": 0.4385, "step": 59700 }, { "epoch": 0.8605854460026231, "grad_norm": 1.5886339896093618, "learning_rate": 5.806046218330063e-07, "loss": 0.4545, "step": 59710 }, { "epoch": 0.8607295735266564, "grad_norm": 1.4154520762165994, "learning_rate": 5.7942862976266e-07, "loss": 0.4241, "step": 59720 }, { "epoch": 0.8608737010506896, "grad_norm": 1.575609842388483, "learning_rate": 5.782537566063274e-07, "loss": 0.4172, "step": 59730 }, { "epoch": 0.8610178285747229, "grad_norm": 1.7919402520819538, "learning_rate": 5.770800026613849e-07, "loss": 0.4294, "step": 59740 }, { "epoch": 0.8611619560987562, "grad_norm": 1.6302874519711688, "learning_rate": 5.7590736822493e-07, "loss": 0.4459, "step": 59750 }, { "epoch": 0.8613060836227895, "grad_norm": 1.6927383972709669, "learning_rate": 5.747358535937714e-07, "loss": 0.4166, "step": 59760 }, { "epoch": 0.8614502111468227, "grad_norm": 1.6929593771714901, "learning_rate": 5.735654590644396e-07, "loss": 0.4581, "step": 59770 }, { "epoch": 0.861594338670856, "grad_norm": 1.6866695403119212, "learning_rate": 5.723961849331783e-07, "loss": 0.4238, "step": 59780 }, { "epoch": 0.8617384661948893, "grad_norm": 2.2528652063730044, "learning_rate": 5.712280314959478e-07, "loss": 0.4477, "step": 59790 }, { "epoch": 0.8618825937189225, "grad_norm": 1.8167846232829317, "learning_rate": 5.70060999048428e-07, "loss": 0.4129, "step": 59800 }, { "epoch": 0.8620267212429558, "grad_norm": 1.6538713366439475, "learning_rate": 5.688950878860106e-07, "loss": 0.386, "step": 59810 }, { "epoch": 0.862170848766989, "grad_norm": 1.4222537331171698, "learning_rate": 5.677302983038052e-07, "loss": 0.4348, "step": 59820 }, { "epoch": 0.8623149762910223, "grad_norm": 1.5047626222900328, "learning_rate": 5.665666305966394e-07, "loss": 0.4238, "step": 59830 }, { "epoch": 0.8624591038150555, "grad_norm": 1.654216039817174, "learning_rate": 5.654040850590547e-07, "loss": 0.4128, "step": 59840 }, { "epoch": 0.8626032313390888, "grad_norm": 1.8688414375592324, "learning_rate": 5.642426619853076e-07, "loss": 0.4184, "step": 59850 }, { "epoch": 0.862747358863122, "grad_norm": 1.7032721781551772, "learning_rate": 5.630823616693731e-07, "loss": 0.4398, "step": 59860 }, { "epoch": 0.8628914863871554, "grad_norm": 1.6476933880751377, "learning_rate": 5.619231844049422e-07, "loss": 0.4237, "step": 59870 }, { "epoch": 0.8630356139111887, "grad_norm": 1.6588779750593605, "learning_rate": 5.607651304854195e-07, "loss": 0.4419, "step": 59880 }, { "epoch": 0.8631797414352219, "grad_norm": 1.5714754922085767, "learning_rate": 5.596082002039243e-07, "loss": 0.4517, "step": 59890 }, { "epoch": 0.8633238689592552, "grad_norm": 1.713323770675039, "learning_rate": 5.584523938532959e-07, "loss": 0.4148, "step": 59900 }, { "epoch": 0.8634679964832884, "grad_norm": 1.7157383196284914, "learning_rate": 5.572977117260852e-07, "loss": 0.4483, "step": 59910 }, { "epoch": 0.8636121240073217, "grad_norm": 1.6946191196115776, "learning_rate": 5.56144154114559e-07, "loss": 0.4096, "step": 59920 }, { "epoch": 0.8637562515313549, "grad_norm": 1.8487515490496014, "learning_rate": 5.549917213107026e-07, "loss": 0.4361, "step": 59930 }, { "epoch": 0.8639003790553882, "grad_norm": 1.5542476258810987, "learning_rate": 5.538404136062131e-07, "loss": 0.4022, "step": 59940 }, { "epoch": 0.8640445065794214, "grad_norm": 1.577603659134278, "learning_rate": 5.526902312925026e-07, "loss": 0.4381, "step": 59950 }, { "epoch": 0.8641886341034547, "grad_norm": 1.4927967542941516, "learning_rate": 5.515411746607014e-07, "loss": 0.4332, "step": 59960 }, { "epoch": 0.864332761627488, "grad_norm": 2.037992733293959, "learning_rate": 5.50393244001654e-07, "loss": 0.4469, "step": 59970 }, { "epoch": 0.8644768891515212, "grad_norm": 1.3400862840728387, "learning_rate": 5.492464396059172e-07, "loss": 0.4072, "step": 59980 }, { "epoch": 0.8646210166755546, "grad_norm": 1.5836703462688733, "learning_rate": 5.481007617637669e-07, "loss": 0.4371, "step": 59990 }, { "epoch": 0.8647651441995878, "grad_norm": 1.6611356206255448, "learning_rate": 5.469562107651899e-07, "loss": 0.4665, "step": 60000 }, { "epoch": 0.8649092717236211, "grad_norm": 1.8125919973952742, "learning_rate": 5.458127868998897e-07, "loss": 0.4101, "step": 60010 }, { "epoch": 0.8650533992476543, "grad_norm": 1.7981087622038419, "learning_rate": 5.446704904572841e-07, "loss": 0.451, "step": 60020 }, { "epoch": 0.8651975267716876, "grad_norm": 1.8894901472369614, "learning_rate": 5.435293217265075e-07, "loss": 0.4132, "step": 60030 }, { "epoch": 0.8653416542957209, "grad_norm": 1.5199507835358868, "learning_rate": 5.423892809964055e-07, "loss": 0.3819, "step": 60040 }, { "epoch": 0.8654857818197541, "grad_norm": 1.8226694276411037, "learning_rate": 5.412503685555392e-07, "loss": 0.4267, "step": 60050 }, { "epoch": 0.8656299093437874, "grad_norm": 1.7723737122485572, "learning_rate": 5.401125846921862e-07, "loss": 0.4298, "step": 60060 }, { "epoch": 0.8657740368678206, "grad_norm": 1.537261132426841, "learning_rate": 5.389759296943359e-07, "loss": 0.4627, "step": 60070 }, { "epoch": 0.8659181643918539, "grad_norm": 1.6607168448250669, "learning_rate": 5.378404038496931e-07, "loss": 0.4206, "step": 60080 }, { "epoch": 0.8660622919158871, "grad_norm": 1.7155709147307239, "learning_rate": 5.367060074456787e-07, "loss": 0.4452, "step": 60090 }, { "epoch": 0.8662064194399205, "grad_norm": 1.7727048922542126, "learning_rate": 5.355727407694239e-07, "loss": 0.4444, "step": 60100 }, { "epoch": 0.8663505469639537, "grad_norm": 1.8522789540366247, "learning_rate": 5.344406041077749e-07, "loss": 0.4395, "step": 60110 }, { "epoch": 0.866494674487987, "grad_norm": 1.4037957675334125, "learning_rate": 5.333095977472946e-07, "loss": 0.4453, "step": 60120 }, { "epoch": 0.8666388020120203, "grad_norm": 1.7144736034807666, "learning_rate": 5.321797219742575e-07, "loss": 0.4065, "step": 60130 }, { "epoch": 0.8667829295360535, "grad_norm": 2.096774836344585, "learning_rate": 5.310509770746508e-07, "loss": 0.4295, "step": 60140 }, { "epoch": 0.8669270570600868, "grad_norm": 1.4546702413703725, "learning_rate": 5.2992336333418e-07, "loss": 0.4568, "step": 60150 }, { "epoch": 0.86707118458412, "grad_norm": 1.6958275532083709, "learning_rate": 5.287968810382587e-07, "loss": 0.3883, "step": 60160 }, { "epoch": 0.8672153121081533, "grad_norm": 2.0376557670640203, "learning_rate": 5.276715304720176e-07, "loss": 0.4906, "step": 60170 }, { "epoch": 0.8673594396321865, "grad_norm": 1.5407519901301552, "learning_rate": 5.265473119202997e-07, "loss": 0.4313, "step": 60180 }, { "epoch": 0.8675035671562198, "grad_norm": 1.461346291447983, "learning_rate": 5.254242256676634e-07, "loss": 0.4249, "step": 60190 }, { "epoch": 0.867647694680253, "grad_norm": 1.6277543346620345, "learning_rate": 5.243022719983781e-07, "loss": 0.4225, "step": 60200 }, { "epoch": 0.8677918222042863, "grad_norm": 1.694155354699263, "learning_rate": 5.231814511964256e-07, "loss": 0.4248, "step": 60210 }, { "epoch": 0.8679359497283197, "grad_norm": 1.7151192587348238, "learning_rate": 5.220617635455061e-07, "loss": 0.4383, "step": 60220 }, { "epoch": 0.8680800772523529, "grad_norm": 1.5275962248691286, "learning_rate": 5.209432093290273e-07, "loss": 0.4272, "step": 60230 }, { "epoch": 0.8682242047763862, "grad_norm": 1.6499866919090787, "learning_rate": 5.198257888301117e-07, "loss": 0.3966, "step": 60240 }, { "epoch": 0.8683683323004194, "grad_norm": 1.8243707688037336, "learning_rate": 5.18709502331598e-07, "loss": 0.4639, "step": 60250 }, { "epoch": 0.8685124598244527, "grad_norm": 1.5581308633592665, "learning_rate": 5.175943501160341e-07, "loss": 0.4044, "step": 60260 }, { "epoch": 0.8686565873484859, "grad_norm": 1.619050492802656, "learning_rate": 5.164803324656809e-07, "loss": 0.4249, "step": 60270 }, { "epoch": 0.8688007148725192, "grad_norm": 1.8691556682231116, "learning_rate": 5.153674496625149e-07, "loss": 0.4617, "step": 60280 }, { "epoch": 0.8689448423965525, "grad_norm": 1.400584442738805, "learning_rate": 5.142557019882221e-07, "loss": 0.4371, "step": 60290 }, { "epoch": 0.8690889699205857, "grad_norm": 1.750309131763282, "learning_rate": 5.131450897242046e-07, "loss": 0.4141, "step": 60300 }, { "epoch": 0.869233097444619, "grad_norm": 1.9452415693122338, "learning_rate": 5.120356131515736e-07, "loss": 0.4468, "step": 60310 }, { "epoch": 0.8693772249686522, "grad_norm": 1.8112296614779952, "learning_rate": 5.109272725511566e-07, "loss": 0.4622, "step": 60320 }, { "epoch": 0.8695213524926855, "grad_norm": 2.0456662057716613, "learning_rate": 5.098200682034898e-07, "loss": 0.4453, "step": 60330 }, { "epoch": 0.8696654800167188, "grad_norm": 1.4385621639949464, "learning_rate": 5.08714000388823e-07, "loss": 0.4372, "step": 60340 }, { "epoch": 0.8698096075407521, "grad_norm": 1.6477120794726121, "learning_rate": 5.07609069387121e-07, "loss": 0.4292, "step": 60350 }, { "epoch": 0.8699537350647853, "grad_norm": 1.5472359594665865, "learning_rate": 5.065052754780575e-07, "loss": 0.4208, "step": 60360 }, { "epoch": 0.8700978625888186, "grad_norm": 2.1937064888745224, "learning_rate": 5.054026189410183e-07, "loss": 0.3899, "step": 60370 }, { "epoch": 0.8702419901128519, "grad_norm": 1.462388914943136, "learning_rate": 5.043011000551052e-07, "loss": 0.4287, "step": 60380 }, { "epoch": 0.8703861176368851, "grad_norm": 1.7254015040541, "learning_rate": 5.032007190991278e-07, "loss": 0.4437, "step": 60390 }, { "epoch": 0.8705302451609184, "grad_norm": 1.6903008847950496, "learning_rate": 5.021014763516085e-07, "loss": 0.4424, "step": 60400 }, { "epoch": 0.8706743726849516, "grad_norm": 1.8472382960664773, "learning_rate": 5.010033720907836e-07, "loss": 0.4195, "step": 60410 }, { "epoch": 0.8708185002089849, "grad_norm": 1.7645194661505246, "learning_rate": 4.999064065946007e-07, "loss": 0.427, "step": 60420 }, { "epoch": 0.8709626277330181, "grad_norm": 1.45076094744917, "learning_rate": 4.988105801407178e-07, "loss": 0.4185, "step": 60430 }, { "epoch": 0.8711067552570514, "grad_norm": 1.8344099387901913, "learning_rate": 4.97715893006504e-07, "loss": 0.446, "step": 60440 }, { "epoch": 0.8712508827810848, "grad_norm": 1.7053594122525655, "learning_rate": 4.966223454690439e-07, "loss": 0.4456, "step": 60450 }, { "epoch": 0.871395010305118, "grad_norm": 1.564755939188648, "learning_rate": 4.955299378051293e-07, "loss": 0.4183, "step": 60460 }, { "epoch": 0.8715391378291513, "grad_norm": 2.014215771912221, "learning_rate": 4.944386702912645e-07, "loss": 0.4509, "step": 60470 }, { "epoch": 0.8716832653531845, "grad_norm": 1.8968099207313396, "learning_rate": 4.933485432036677e-07, "loss": 0.4746, "step": 60480 }, { "epoch": 0.8718273928772178, "grad_norm": 1.8664076608371842, "learning_rate": 4.922595568182664e-07, "loss": 0.4306, "step": 60490 }, { "epoch": 0.871971520401251, "grad_norm": 1.6478465821187351, "learning_rate": 4.911717114106985e-07, "loss": 0.4355, "step": 60500 }, { "epoch": 0.8721156479252843, "grad_norm": 1.795750223667297, "learning_rate": 4.900850072563146e-07, "loss": 0.427, "step": 60510 }, { "epoch": 0.8722597754493175, "grad_norm": 1.665797715046043, "learning_rate": 4.889994446301777e-07, "loss": 0.4423, "step": 60520 }, { "epoch": 0.8724039029733508, "grad_norm": 1.5212582872880323, "learning_rate": 4.879150238070585e-07, "loss": 0.4276, "step": 60530 }, { "epoch": 0.872548030497384, "grad_norm": 1.5658089016296084, "learning_rate": 4.868317450614407e-07, "loss": 0.4305, "step": 60540 }, { "epoch": 0.8726921580214173, "grad_norm": 1.6391266940211153, "learning_rate": 4.857496086675195e-07, "loss": 0.4308, "step": 60550 }, { "epoch": 0.8728362855454506, "grad_norm": 1.6408683100781356, "learning_rate": 4.846686148991997e-07, "loss": 0.4507, "step": 60560 }, { "epoch": 0.8729804130694839, "grad_norm": 1.4658731452832021, "learning_rate": 4.835887640300957e-07, "loss": 0.4136, "step": 60570 }, { "epoch": 0.8731245405935172, "grad_norm": 1.790408025075511, "learning_rate": 4.825100563335372e-07, "loss": 0.4509, "step": 60580 }, { "epoch": 0.8732686681175504, "grad_norm": 1.7489611850476645, "learning_rate": 4.814324920825603e-07, "loss": 0.4622, "step": 60590 }, { "epoch": 0.8734127956415837, "grad_norm": 2.2619988060642675, "learning_rate": 4.803560715499111e-07, "loss": 0.4387, "step": 60600 }, { "epoch": 0.873556923165617, "grad_norm": 1.686117465462127, "learning_rate": 4.792807950080508e-07, "loss": 0.3986, "step": 60610 }, { "epoch": 0.8737010506896502, "grad_norm": 1.7804368688486958, "learning_rate": 4.782066627291471e-07, "loss": 0.4375, "step": 60620 }, { "epoch": 0.8738451782136835, "grad_norm": 1.7545451065521662, "learning_rate": 4.771336749850786e-07, "loss": 0.4193, "step": 60630 }, { "epoch": 0.8739893057377167, "grad_norm": 1.878137045221044, "learning_rate": 4.760618320474353e-07, "loss": 0.4275, "step": 60640 }, { "epoch": 0.87413343326175, "grad_norm": 1.683936595500172, "learning_rate": 4.749911341875185e-07, "loss": 0.4336, "step": 60650 }, { "epoch": 0.8742775607857832, "grad_norm": 1.6199415425265045, "learning_rate": 4.739215816763365e-07, "loss": 0.4169, "step": 60660 }, { "epoch": 0.8744216883098165, "grad_norm": 1.778190905912577, "learning_rate": 4.7285317478460935e-07, "loss": 0.4168, "step": 60670 }, { "epoch": 0.8745658158338497, "grad_norm": 1.649926585400354, "learning_rate": 4.7178591378276794e-07, "loss": 0.4322, "step": 60680 }, { "epoch": 0.8747099433578831, "grad_norm": 1.4093695173572438, "learning_rate": 4.707197989409518e-07, "loss": 0.4044, "step": 60690 }, { "epoch": 0.8748540708819164, "grad_norm": 2.1003410492674393, "learning_rate": 4.6965483052901053e-07, "loss": 0.4257, "step": 60700 }, { "epoch": 0.8749981984059496, "grad_norm": 1.6070570616597404, "learning_rate": 4.6859100881650466e-07, "loss": 0.4427, "step": 60710 }, { "epoch": 0.8751423259299829, "grad_norm": 2.5056522865403186, "learning_rate": 4.6752833407270327e-07, "loss": 0.4198, "step": 60720 }, { "epoch": 0.8752864534540161, "grad_norm": 1.6979085242881293, "learning_rate": 4.66466806566585e-07, "loss": 0.4171, "step": 60730 }, { "epoch": 0.8754305809780494, "grad_norm": 1.9087544811190134, "learning_rate": 4.65406426566839e-07, "loss": 0.4372, "step": 60740 }, { "epoch": 0.8755747085020826, "grad_norm": 1.6605439891745966, "learning_rate": 4.643471943418648e-07, "loss": 0.4468, "step": 60750 }, { "epoch": 0.8757188360261159, "grad_norm": 1.937872224935206, "learning_rate": 4.6328911015976864e-07, "loss": 0.4583, "step": 60760 }, { "epoch": 0.8758629635501491, "grad_norm": 1.619591741079567, "learning_rate": 4.6223217428836895e-07, "loss": 0.4379, "step": 60770 }, { "epoch": 0.8760070910741824, "grad_norm": 1.5525077429043324, "learning_rate": 4.6117638699519184e-07, "loss": 0.435, "step": 60780 }, { "epoch": 0.8761512185982157, "grad_norm": 1.7973586254279128, "learning_rate": 4.601217485474718e-07, "loss": 0.4101, "step": 60790 }, { "epoch": 0.876295346122249, "grad_norm": 1.5101956499199864, "learning_rate": 4.590682592121565e-07, "loss": 0.4166, "step": 60800 }, { "epoch": 0.8764394736462823, "grad_norm": 1.5356602256600291, "learning_rate": 4.580159192558986e-07, "loss": 0.4379, "step": 60810 }, { "epoch": 0.8765836011703155, "grad_norm": 1.7018041843811669, "learning_rate": 4.5696472894506074e-07, "loss": 0.4548, "step": 60820 }, { "epoch": 0.8767277286943488, "grad_norm": 2.0681913446194335, "learning_rate": 4.559146885457172e-07, "loss": 0.446, "step": 60830 }, { "epoch": 0.876871856218382, "grad_norm": 1.3397851690157625, "learning_rate": 4.5486579832364764e-07, "loss": 0.3821, "step": 60840 }, { "epoch": 0.8770159837424153, "grad_norm": 1.797860267766215, "learning_rate": 4.5381805854434193e-07, "loss": 0.4147, "step": 60850 }, { "epoch": 0.8771601112664486, "grad_norm": 1.7008839506738525, "learning_rate": 4.5277146947299954e-07, "loss": 0.4352, "step": 60860 }, { "epoch": 0.8773042387904818, "grad_norm": 1.492662886916976, "learning_rate": 4.5172603137452974e-07, "loss": 0.4424, "step": 60870 }, { "epoch": 0.8774483663145151, "grad_norm": 1.528657717790666, "learning_rate": 4.5068174451354695e-07, "loss": 0.4581, "step": 60880 }, { "epoch": 0.8775924938385483, "grad_norm": 1.7587591889775527, "learning_rate": 4.496386091543753e-07, "loss": 0.4113, "step": 60890 }, { "epoch": 0.8777366213625816, "grad_norm": 1.6414209701100941, "learning_rate": 4.4859662556105077e-07, "loss": 0.4367, "step": 60900 }, { "epoch": 0.8778807488866148, "grad_norm": 1.314495244538257, "learning_rate": 4.4755579399731407e-07, "loss": 0.4337, "step": 60910 }, { "epoch": 0.8780248764106482, "grad_norm": 1.9565879413850296, "learning_rate": 4.465161147266145e-07, "loss": 0.4478, "step": 60920 }, { "epoch": 0.8781690039346814, "grad_norm": 1.5428571717902508, "learning_rate": 4.4547758801211207e-07, "loss": 0.4211, "step": 60930 }, { "epoch": 0.8783131314587147, "grad_norm": 1.823615760532702, "learning_rate": 4.444402141166737e-07, "loss": 0.3961, "step": 60940 }, { "epoch": 0.878457258982748, "grad_norm": 1.97496270102262, "learning_rate": 4.434039933028733e-07, "loss": 0.4019, "step": 60950 }, { "epoch": 0.8786013865067812, "grad_norm": 1.8157684450776674, "learning_rate": 4.423689258329955e-07, "loss": 0.4178, "step": 60960 }, { "epoch": 0.8787455140308145, "grad_norm": 1.6544055237195499, "learning_rate": 4.413350119690313e-07, "loss": 0.4422, "step": 60970 }, { "epoch": 0.8788896415548477, "grad_norm": 1.7243211859348877, "learning_rate": 4.403022519726807e-07, "loss": 0.4277, "step": 60980 }, { "epoch": 0.879033769078881, "grad_norm": 1.2932582226838987, "learning_rate": 4.3927064610534867e-07, "loss": 0.4235, "step": 60990 }, { "epoch": 0.8791778966029142, "grad_norm": 1.8415605230593388, "learning_rate": 4.382401946281534e-07, "loss": 0.4385, "step": 61000 }, { "epoch": 0.8793220241269475, "grad_norm": 1.6511671662818352, "learning_rate": 4.3721089780191627e-07, "loss": 0.4336, "step": 61010 }, { "epoch": 0.8794661516509807, "grad_norm": 1.2939661778226585, "learning_rate": 4.361827558871673e-07, "loss": 0.4271, "step": 61020 }, { "epoch": 0.879610279175014, "grad_norm": 1.7458148306209686, "learning_rate": 4.351557691441466e-07, "loss": 0.4144, "step": 61030 }, { "epoch": 0.8797544066990474, "grad_norm": 1.5223582352187803, "learning_rate": 4.3412993783279953e-07, "loss": 0.4506, "step": 61040 }, { "epoch": 0.8798985342230806, "grad_norm": 1.7845797839085893, "learning_rate": 4.331052622127785e-07, "loss": 0.4368, "step": 61050 }, { "epoch": 0.8800426617471139, "grad_norm": 1.6021100951073772, "learning_rate": 4.320817425434459e-07, "loss": 0.4267, "step": 61060 }, { "epoch": 0.8801867892711471, "grad_norm": 1.6653465317784282, "learning_rate": 4.310593790838696e-07, "loss": 0.4228, "step": 61070 }, { "epoch": 0.8803309167951804, "grad_norm": 1.7517784388459172, "learning_rate": 4.3003817209282596e-07, "loss": 0.4142, "step": 61080 }, { "epoch": 0.8804750443192136, "grad_norm": 1.7501158320002541, "learning_rate": 4.29018121828797e-07, "loss": 0.4301, "step": 61090 }, { "epoch": 0.8806191718432469, "grad_norm": 1.6412026789057963, "learning_rate": 4.27999228549974e-07, "loss": 0.425, "step": 61100 }, { "epoch": 0.8807632993672802, "grad_norm": 1.6001638746257334, "learning_rate": 4.26981492514254e-07, "loss": 0.409, "step": 61110 }, { "epoch": 0.8809074268913134, "grad_norm": 1.79879315335898, "learning_rate": 4.25964913979241e-07, "loss": 0.431, "step": 61120 }, { "epoch": 0.8810515544153467, "grad_norm": 1.6735215752935118, "learning_rate": 4.24949493202248e-07, "loss": 0.4532, "step": 61130 }, { "epoch": 0.8811956819393799, "grad_norm": 1.620702187079473, "learning_rate": 4.2393523044029215e-07, "loss": 0.4203, "step": 61140 }, { "epoch": 0.8813398094634133, "grad_norm": 1.717055863490049, "learning_rate": 4.229221259500982e-07, "loss": 0.4251, "step": 61150 }, { "epoch": 0.8814839369874465, "grad_norm": 1.5714643612011254, "learning_rate": 4.2191017998810037e-07, "loss": 0.4068, "step": 61160 }, { "epoch": 0.8816280645114798, "grad_norm": 1.7355016388749611, "learning_rate": 4.2089939281043655e-07, "loss": 0.4537, "step": 61170 }, { "epoch": 0.881772192035513, "grad_norm": 1.4953128970028897, "learning_rate": 4.198897646729516e-07, "loss": 0.4072, "step": 61180 }, { "epoch": 0.8819163195595463, "grad_norm": 1.8302993285701954, "learning_rate": 4.188812958311983e-07, "loss": 0.4327, "step": 61190 }, { "epoch": 0.8820604470835796, "grad_norm": 1.6822760534930974, "learning_rate": 4.1787398654043685e-07, "loss": 0.4387, "step": 61200 }, { "epoch": 0.8822045746076128, "grad_norm": 1.6347719789185755, "learning_rate": 4.1686783705563115e-07, "loss": 0.4262, "step": 61210 }, { "epoch": 0.8823487021316461, "grad_norm": 1.7411881012599562, "learning_rate": 4.1586284763145303e-07, "loss": 0.4329, "step": 61220 }, { "epoch": 0.8824928296556793, "grad_norm": 1.9218075245704005, "learning_rate": 4.148590185222812e-07, "loss": 0.4267, "step": 61230 }, { "epoch": 0.8826369571797126, "grad_norm": 1.4686371035574133, "learning_rate": 4.1385634998220015e-07, "loss": 0.4442, "step": 61240 }, { "epoch": 0.8827810847037458, "grad_norm": 1.8691574066679437, "learning_rate": 4.1285484226499975e-07, "loss": 0.4216, "step": 61250 }, { "epoch": 0.8829252122277791, "grad_norm": 1.849333774051145, "learning_rate": 4.1185449562417765e-07, "loss": 0.4276, "step": 61260 }, { "epoch": 0.8830693397518125, "grad_norm": 2.032567801300828, "learning_rate": 4.10855310312937e-07, "loss": 0.4326, "step": 61270 }, { "epoch": 0.8832134672758457, "grad_norm": 1.5223975263077372, "learning_rate": 4.0985728658418544e-07, "loss": 0.4132, "step": 61280 }, { "epoch": 0.883357594799879, "grad_norm": 1.5295519590953286, "learning_rate": 4.0886042469053866e-07, "loss": 0.4302, "step": 61290 }, { "epoch": 0.8835017223239122, "grad_norm": 1.8650706478724437, "learning_rate": 4.0786472488431927e-07, "loss": 0.4384, "step": 61300 }, { "epoch": 0.8836458498479455, "grad_norm": 1.9252732256287588, "learning_rate": 4.0687018741755234e-07, "loss": 0.4528, "step": 61310 }, { "epoch": 0.8837899773719787, "grad_norm": 1.7129036024621829, "learning_rate": 4.0587681254196986e-07, "loss": 0.4486, "step": 61320 }, { "epoch": 0.883934104896012, "grad_norm": 1.4902470317667413, "learning_rate": 4.0488460050901235e-07, "loss": 0.427, "step": 61330 }, { "epoch": 0.8840782324200452, "grad_norm": 1.6620063323615197, "learning_rate": 4.038935515698228e-07, "loss": 0.4436, "step": 61340 }, { "epoch": 0.8842223599440785, "grad_norm": 1.6667560422596375, "learning_rate": 4.0290366597524943e-07, "loss": 0.4187, "step": 61350 }, { "epoch": 0.8843664874681118, "grad_norm": 1.702367767913968, "learning_rate": 4.019149439758496e-07, "loss": 0.4197, "step": 61360 }, { "epoch": 0.884510614992145, "grad_norm": 1.6729869551575103, "learning_rate": 4.009273858218826e-07, "loss": 0.4177, "step": 61370 }, { "epoch": 0.8846547425161783, "grad_norm": 1.5887832512637652, "learning_rate": 3.99940991763314e-07, "loss": 0.4148, "step": 61380 }, { "epoch": 0.8847988700402116, "grad_norm": 2.1851797965644058, "learning_rate": 3.9895576204981634e-07, "loss": 0.4676, "step": 61390 }, { "epoch": 0.8849429975642449, "grad_norm": 1.6462779482663905, "learning_rate": 3.9797169693076567e-07, "loss": 0.4297, "step": 61400 }, { "epoch": 0.8850871250882781, "grad_norm": 1.7845708845245294, "learning_rate": 3.969887966552438e-07, "loss": 0.411, "step": 61410 }, { "epoch": 0.8852312526123114, "grad_norm": 1.834921742846996, "learning_rate": 3.960070614720385e-07, "loss": 0.422, "step": 61420 }, { "epoch": 0.8853753801363446, "grad_norm": 2.142735943114194, "learning_rate": 3.9502649162964203e-07, "loss": 0.4482, "step": 61430 }, { "epoch": 0.8855195076603779, "grad_norm": 1.7757986921379365, "learning_rate": 3.9404708737624974e-07, "loss": 0.4598, "step": 61440 }, { "epoch": 0.8856636351844112, "grad_norm": 1.8591681813397358, "learning_rate": 3.9306884895976617e-07, "loss": 0.4239, "step": 61450 }, { "epoch": 0.8858077627084444, "grad_norm": 1.4224340672238194, "learning_rate": 3.9209177662779705e-07, "loss": 0.4011, "step": 61460 }, { "epoch": 0.8859518902324777, "grad_norm": 1.5033661575041692, "learning_rate": 3.911158706276541e-07, "loss": 0.4329, "step": 61470 }, { "epoch": 0.8860960177565109, "grad_norm": 1.5718273601380246, "learning_rate": 3.901411312063541e-07, "loss": 0.3924, "step": 61480 }, { "epoch": 0.8862401452805442, "grad_norm": 1.9905008440058791, "learning_rate": 3.891675586106192e-07, "loss": 0.436, "step": 61490 }, { "epoch": 0.8863842728045775, "grad_norm": 1.6788483310892346, "learning_rate": 3.881951530868749e-07, "loss": 0.4296, "step": 61500 }, { "epoch": 0.8865284003286108, "grad_norm": 1.6848173057430644, "learning_rate": 3.8722391488125065e-07, "loss": 0.3969, "step": 61510 }, { "epoch": 0.886672527852644, "grad_norm": 1.7357096986668068, "learning_rate": 3.862538442395847e-07, "loss": 0.4206, "step": 61520 }, { "epoch": 0.8868166553766773, "grad_norm": 1.6466832077925764, "learning_rate": 3.852849414074145e-07, "loss": 0.4016, "step": 61530 }, { "epoch": 0.8869607829007106, "grad_norm": 1.5357475594357528, "learning_rate": 3.8431720662998384e-07, "loss": 0.4192, "step": 61540 }, { "epoch": 0.8871049104247438, "grad_norm": 1.6480691909320035, "learning_rate": 3.83350640152243e-07, "loss": 0.4336, "step": 61550 }, { "epoch": 0.8872490379487771, "grad_norm": 1.6259001654340999, "learning_rate": 3.823852422188434e-07, "loss": 0.427, "step": 61560 }, { "epoch": 0.8873931654728103, "grad_norm": 1.641520973619801, "learning_rate": 3.814210130741414e-07, "loss": 0.4432, "step": 61570 }, { "epoch": 0.8875372929968436, "grad_norm": 1.7774125277967507, "learning_rate": 3.804579529621999e-07, "loss": 0.4412, "step": 61580 }, { "epoch": 0.8876814205208768, "grad_norm": 2.697599991230594, "learning_rate": 3.794960621267829e-07, "loss": 0.4546, "step": 61590 }, { "epoch": 0.8878255480449101, "grad_norm": 1.5317941197963505, "learning_rate": 3.7853534081135903e-07, "loss": 0.3892, "step": 61600 }, { "epoch": 0.8879696755689434, "grad_norm": 1.5709933619078642, "learning_rate": 3.775757892591031e-07, "loss": 0.4193, "step": 61610 }, { "epoch": 0.8881138030929767, "grad_norm": 1.5857606974319043, "learning_rate": 3.7661740771289036e-07, "loss": 0.456, "step": 61620 }, { "epoch": 0.88825793061701, "grad_norm": 1.8538763936532343, "learning_rate": 3.7566019641530395e-07, "loss": 0.4469, "step": 61630 }, { "epoch": 0.8884020581410432, "grad_norm": 1.6033589635850363, "learning_rate": 3.747041556086267e-07, "loss": 0.4272, "step": 61640 }, { "epoch": 0.8885461856650765, "grad_norm": 1.6101926809950513, "learning_rate": 3.73749285534849e-07, "loss": 0.4347, "step": 61650 }, { "epoch": 0.8886903131891097, "grad_norm": 1.6607730671429437, "learning_rate": 3.7279558643566137e-07, "loss": 0.4266, "step": 61660 }, { "epoch": 0.888834440713143, "grad_norm": 1.7052685606081848, "learning_rate": 3.718430585524596e-07, "loss": 0.4269, "step": 61670 }, { "epoch": 0.8889785682371762, "grad_norm": 1.7368638166975736, "learning_rate": 3.7089170212634475e-07, "loss": 0.4232, "step": 61680 }, { "epoch": 0.8891226957612095, "grad_norm": 1.522211168841321, "learning_rate": 3.6994151739811855e-07, "loss": 0.4499, "step": 61690 }, { "epoch": 0.8892668232852428, "grad_norm": 1.727716720554557, "learning_rate": 3.6899250460828597e-07, "loss": 0.4125, "step": 61700 }, { "epoch": 0.889410950809276, "grad_norm": 1.6707790121933068, "learning_rate": 3.680446639970592e-07, "loss": 0.412, "step": 61710 }, { "epoch": 0.8895550783333093, "grad_norm": 1.6462011094082234, "learning_rate": 3.6709799580434914e-07, "loss": 0.4248, "step": 61720 }, { "epoch": 0.8896992058573425, "grad_norm": 1.8212763397888065, "learning_rate": 3.661525002697719e-07, "loss": 0.4428, "step": 61730 }, { "epoch": 0.8898433333813759, "grad_norm": 1.6962124345460678, "learning_rate": 3.652081776326477e-07, "loss": 0.4205, "step": 61740 }, { "epoch": 0.8899874609054091, "grad_norm": 1.764049689373594, "learning_rate": 3.642650281319998e-07, "loss": 0.435, "step": 61750 }, { "epoch": 0.8901315884294424, "grad_norm": 1.5972469215758536, "learning_rate": 3.633230520065528e-07, "loss": 0.4244, "step": 61760 }, { "epoch": 0.8902757159534757, "grad_norm": 1.618986455993786, "learning_rate": 3.623822494947343e-07, "loss": 0.4192, "step": 61770 }, { "epoch": 0.8904198434775089, "grad_norm": 1.8077286762476599, "learning_rate": 3.614426208346772e-07, "loss": 0.4174, "step": 61780 }, { "epoch": 0.8905639710015422, "grad_norm": 2.007688739870771, "learning_rate": 3.6050416626421504e-07, "loss": 0.4267, "step": 61790 }, { "epoch": 0.8907080985255754, "grad_norm": 1.5734761377570277, "learning_rate": 3.595668860208845e-07, "loss": 0.4476, "step": 61800 }, { "epoch": 0.8908522260496087, "grad_norm": 1.5234308967674688, "learning_rate": 3.5863078034192756e-07, "loss": 0.4038, "step": 61810 }, { "epoch": 0.8909963535736419, "grad_norm": 1.8215004301370137, "learning_rate": 3.5769584946428513e-07, "loss": 0.4506, "step": 61820 }, { "epoch": 0.8911404810976752, "grad_norm": 1.7448102436845272, "learning_rate": 3.567620936246019e-07, "loss": 0.4126, "step": 61830 }, { "epoch": 0.8912846086217084, "grad_norm": 1.795701744107655, "learning_rate": 3.558295130592265e-07, "loss": 0.4217, "step": 61840 }, { "epoch": 0.8914287361457418, "grad_norm": 1.6669656363012662, "learning_rate": 3.5489810800421023e-07, "loss": 0.4148, "step": 61850 }, { "epoch": 0.8915728636697751, "grad_norm": 1.6193513114842863, "learning_rate": 3.539678786953055e-07, "loss": 0.4393, "step": 61860 }, { "epoch": 0.8917169911938083, "grad_norm": 1.845639165752408, "learning_rate": 3.5303882536796616e-07, "loss": 0.4515, "step": 61870 }, { "epoch": 0.8918611187178416, "grad_norm": 1.734649779642527, "learning_rate": 3.5211094825735147e-07, "loss": 0.4293, "step": 61880 }, { "epoch": 0.8920052462418748, "grad_norm": 1.898264025751943, "learning_rate": 3.5118424759832014e-07, "loss": 0.43, "step": 61890 }, { "epoch": 0.8921493737659081, "grad_norm": 2.26433780288896, "learning_rate": 3.5025872362543456e-07, "loss": 0.4312, "step": 61900 }, { "epoch": 0.8922935012899413, "grad_norm": 1.8142210027492736, "learning_rate": 3.4933437657295957e-07, "loss": 0.426, "step": 61910 }, { "epoch": 0.8924376288139746, "grad_norm": 1.4762694747526333, "learning_rate": 3.484112066748607e-07, "loss": 0.4115, "step": 61920 }, { "epoch": 0.8925817563380078, "grad_norm": 1.5442747578357712, "learning_rate": 3.474892141648056e-07, "loss": 0.4502, "step": 61930 }, { "epoch": 0.8927258838620411, "grad_norm": 1.7068337494410344, "learning_rate": 3.46568399276167e-07, "loss": 0.4191, "step": 61940 }, { "epoch": 0.8928700113860744, "grad_norm": 1.8012968270078487, "learning_rate": 3.4564876224201514e-07, "loss": 0.4462, "step": 61950 }, { "epoch": 0.8930141389101076, "grad_norm": 1.4163672647273995, "learning_rate": 3.447303032951249e-07, "loss": 0.426, "step": 61960 }, { "epoch": 0.893158266434141, "grad_norm": 1.8847779081238765, "learning_rate": 3.43813022667972e-07, "loss": 0.4286, "step": 61970 }, { "epoch": 0.8933023939581742, "grad_norm": 1.8803261209372655, "learning_rate": 3.4289692059273516e-07, "loss": 0.4321, "step": 61980 }, { "epoch": 0.8934465214822075, "grad_norm": 1.9182740423920444, "learning_rate": 3.419819973012928e-07, "loss": 0.4385, "step": 61990 }, { "epoch": 0.8935906490062407, "grad_norm": 1.5801345605362733, "learning_rate": 3.4106825302522516e-07, "loss": 0.4421, "step": 62000 }, { "epoch": 0.893734776530274, "grad_norm": 1.526930113270712, "learning_rate": 3.401556879958168e-07, "loss": 0.4228, "step": 62010 }, { "epoch": 0.8938789040543073, "grad_norm": 1.5816775734932265, "learning_rate": 3.392443024440506e-07, "loss": 0.4288, "step": 62020 }, { "epoch": 0.8940230315783405, "grad_norm": 1.5306213471133032, "learning_rate": 3.3833409660061155e-07, "loss": 0.4393, "step": 62030 }, { "epoch": 0.8941671591023738, "grad_norm": 1.678237154466543, "learning_rate": 3.374250706958887e-07, "loss": 0.4093, "step": 62040 }, { "epoch": 0.894311286626407, "grad_norm": 1.552745001698903, "learning_rate": 3.3651722495996905e-07, "loss": 0.4354, "step": 62050 }, { "epoch": 0.8944554141504403, "grad_norm": 1.581429532502726, "learning_rate": 3.356105596226411e-07, "loss": 0.4124, "step": 62060 }, { "epoch": 0.8945995416744735, "grad_norm": 1.6963189990787129, "learning_rate": 3.347050749133973e-07, "loss": 0.4639, "step": 62070 }, { "epoch": 0.8947436691985068, "grad_norm": 1.8503874554460265, "learning_rate": 3.338007710614294e-07, "loss": 0.4294, "step": 62080 }, { "epoch": 0.8948877967225402, "grad_norm": 1.4644787852557712, "learning_rate": 3.3289764829562976e-07, "loss": 0.4136, "step": 62090 }, { "epoch": 0.8950319242465734, "grad_norm": 1.7073881494239793, "learning_rate": 3.319957068445939e-07, "loss": 0.4335, "step": 62100 }, { "epoch": 0.8951760517706067, "grad_norm": 1.6275193451403946, "learning_rate": 3.310949469366165e-07, "loss": 0.4431, "step": 62110 }, { "epoch": 0.8953201792946399, "grad_norm": 2.0416956045632144, "learning_rate": 3.301953687996928e-07, "loss": 0.4373, "step": 62120 }, { "epoch": 0.8954643068186732, "grad_norm": 1.6264411575692799, "learning_rate": 3.2929697266151917e-07, "loss": 0.4232, "step": 62130 }, { "epoch": 0.8956084343427064, "grad_norm": 1.548610753662318, "learning_rate": 3.283997587494953e-07, "loss": 0.4187, "step": 62140 }, { "epoch": 0.8957525618667397, "grad_norm": 1.819644242310398, "learning_rate": 3.2750372729071887e-07, "loss": 0.4265, "step": 62150 }, { "epoch": 0.8958966893907729, "grad_norm": 1.6115287187506318, "learning_rate": 3.266088785119881e-07, "loss": 0.4484, "step": 62160 }, { "epoch": 0.8960408169148062, "grad_norm": 1.3734957008586623, "learning_rate": 3.25715212639805e-07, "loss": 0.425, "step": 62170 }, { "epoch": 0.8961849444388394, "grad_norm": 1.783785554726618, "learning_rate": 3.2482272990036756e-07, "loss": 0.4084, "step": 62180 }, { "epoch": 0.8963290719628727, "grad_norm": 1.4472308516402907, "learning_rate": 3.2393143051957786e-07, "loss": 0.441, "step": 62190 }, { "epoch": 0.8964731994869061, "grad_norm": 1.7336377737885784, "learning_rate": 3.2304131472303877e-07, "loss": 0.4298, "step": 62200 }, { "epoch": 0.8966173270109393, "grad_norm": 1.7860662670459648, "learning_rate": 3.2215238273605064e-07, "loss": 0.4379, "step": 62210 }, { "epoch": 0.8967614545349726, "grad_norm": 1.680870031419123, "learning_rate": 3.2126463478361515e-07, "loss": 0.4269, "step": 62220 }, { "epoch": 0.8969055820590058, "grad_norm": 1.6182465441432297, "learning_rate": 3.203780710904364e-07, "loss": 0.4295, "step": 62230 }, { "epoch": 0.8970497095830391, "grad_norm": 1.6438059061734562, "learning_rate": 3.1949269188091657e-07, "loss": 0.4376, "step": 62240 }, { "epoch": 0.8971938371070723, "grad_norm": 1.8800692995392145, "learning_rate": 3.1860849737915754e-07, "loss": 0.4272, "step": 62250 }, { "epoch": 0.8973379646311056, "grad_norm": 1.7335356039956396, "learning_rate": 3.177254878089642e-07, "loss": 0.4168, "step": 62260 }, { "epoch": 0.8974820921551389, "grad_norm": 1.5777083487217591, "learning_rate": 3.168436633938393e-07, "loss": 0.4324, "step": 62270 }, { "epoch": 0.8976262196791721, "grad_norm": 1.5649055548250304, "learning_rate": 3.1596302435698446e-07, "loss": 0.4089, "step": 62280 }, { "epoch": 0.8977703472032054, "grad_norm": 1.655186297401593, "learning_rate": 3.150835709213046e-07, "loss": 0.4269, "step": 62290 }, { "epoch": 0.8979144747272386, "grad_norm": 1.8355278864040723, "learning_rate": 3.1420530330940225e-07, "loss": 0.4156, "step": 62300 }, { "epoch": 0.8980586022512719, "grad_norm": 1.5721364350153908, "learning_rate": 3.133282217435807e-07, "loss": 0.4245, "step": 62310 }, { "epoch": 0.8982027297753052, "grad_norm": 1.8305660183695296, "learning_rate": 3.1245232644584176e-07, "loss": 0.4424, "step": 62320 }, { "epoch": 0.8983468572993385, "grad_norm": 1.6177237406189968, "learning_rate": 3.1157761763788873e-07, "loss": 0.4362, "step": 62330 }, { "epoch": 0.8984909848233718, "grad_norm": 1.7041383103449121, "learning_rate": 3.107040955411239e-07, "loss": 0.4419, "step": 62340 }, { "epoch": 0.898635112347405, "grad_norm": 1.8756321628289605, "learning_rate": 3.0983176037664765e-07, "loss": 0.4048, "step": 62350 }, { "epoch": 0.8987792398714383, "grad_norm": 1.6524212195232413, "learning_rate": 3.0896061236526277e-07, "loss": 0.4167, "step": 62360 }, { "epoch": 0.8989233673954715, "grad_norm": 1.4865452683218672, "learning_rate": 3.0809065172746957e-07, "loss": 0.4301, "step": 62370 }, { "epoch": 0.8990674949195048, "grad_norm": 1.6700469294729263, "learning_rate": 3.07221878683468e-07, "loss": 0.4478, "step": 62380 }, { "epoch": 0.899211622443538, "grad_norm": 1.6404450682778224, "learning_rate": 3.063542934531583e-07, "loss": 0.4123, "step": 62390 }, { "epoch": 0.8993557499675713, "grad_norm": 1.9326273706897388, "learning_rate": 3.0548789625613917e-07, "loss": 0.4543, "step": 62400 }, { "epoch": 0.8994998774916045, "grad_norm": 1.5541933453109391, "learning_rate": 3.046226873117092e-07, "loss": 0.3969, "step": 62410 }, { "epoch": 0.8996440050156378, "grad_norm": 1.3411974279941319, "learning_rate": 3.037586668388659e-07, "loss": 0.4027, "step": 62420 }, { "epoch": 0.899788132539671, "grad_norm": 1.715623825052082, "learning_rate": 3.0289583505630606e-07, "loss": 0.4509, "step": 62430 }, { "epoch": 0.8999322600637044, "grad_norm": 1.6777420766338242, "learning_rate": 3.0203419218242604e-07, "loss": 0.443, "step": 62440 }, { "epoch": 0.9000763875877377, "grad_norm": 1.883777255568555, "learning_rate": 3.011737384353192e-07, "loss": 0.4371, "step": 62450 }, { "epoch": 0.9002205151117709, "grad_norm": 1.772220646263764, "learning_rate": 3.0031447403278123e-07, "loss": 0.4363, "step": 62460 }, { "epoch": 0.9003646426358042, "grad_norm": 1.8435491591118909, "learning_rate": 2.994563991923044e-07, "loss": 0.4607, "step": 62470 }, { "epoch": 0.9005087701598374, "grad_norm": 1.5671723791263044, "learning_rate": 2.985995141310799e-07, "loss": 0.4072, "step": 62480 }, { "epoch": 0.9006528976838707, "grad_norm": 1.5527743736031163, "learning_rate": 2.9774381906599927e-07, "loss": 0.4272, "step": 62490 }, { "epoch": 0.900797025207904, "grad_norm": 1.9240671857861278, "learning_rate": 2.9688931421365253e-07, "loss": 0.4268, "step": 62500 }, { "epoch": 0.9009411527319372, "grad_norm": 1.7253940098305118, "learning_rate": 2.960359997903256e-07, "loss": 0.4429, "step": 62510 }, { "epoch": 0.9010852802559705, "grad_norm": 1.5998846591928886, "learning_rate": 2.951838760120074e-07, "loss": 0.398, "step": 62520 }, { "epoch": 0.9012294077800037, "grad_norm": 1.6366990771572636, "learning_rate": 2.943329430943831e-07, "loss": 0.4293, "step": 62530 }, { "epoch": 0.901373535304037, "grad_norm": 1.495263912312282, "learning_rate": 2.9348320125283713e-07, "loss": 0.4277, "step": 62540 }, { "epoch": 0.9015176628280703, "grad_norm": 1.6834041029537203, "learning_rate": 2.926346507024502e-07, "loss": 0.4207, "step": 62550 }, { "epoch": 0.9016617903521036, "grad_norm": 1.5615946194875658, "learning_rate": 2.917872916580061e-07, "loss": 0.417, "step": 62560 }, { "epoch": 0.9018059178761368, "grad_norm": 1.7804416320610599, "learning_rate": 2.909411243339827e-07, "loss": 0.4434, "step": 62570 }, { "epoch": 0.9019500454001701, "grad_norm": 1.9441804638743112, "learning_rate": 2.900961489445575e-07, "loss": 0.4205, "step": 62580 }, { "epoch": 0.9020941729242034, "grad_norm": 1.638673704457773, "learning_rate": 2.892523657036078e-07, "loss": 0.4331, "step": 62590 }, { "epoch": 0.9022383004482366, "grad_norm": 1.7984870339931416, "learning_rate": 2.8840977482470775e-07, "loss": 0.4081, "step": 62600 }, { "epoch": 0.9023824279722699, "grad_norm": 1.273845950451001, "learning_rate": 2.87568376521129e-07, "loss": 0.4243, "step": 62610 }, { "epoch": 0.9025265554963031, "grad_norm": 2.028860674202935, "learning_rate": 2.8672817100584226e-07, "loss": 0.4638, "step": 62620 }, { "epoch": 0.9026706830203364, "grad_norm": 1.7229883495776093, "learning_rate": 2.8588915849151855e-07, "loss": 0.4323, "step": 62630 }, { "epoch": 0.9028148105443696, "grad_norm": 1.5466146285596019, "learning_rate": 2.85051339190523e-07, "loss": 0.4348, "step": 62640 }, { "epoch": 0.9029589380684029, "grad_norm": 1.729599633984599, "learning_rate": 2.8421471331492036e-07, "loss": 0.4235, "step": 62650 }, { "epoch": 0.9031030655924361, "grad_norm": 1.7271223335492296, "learning_rate": 2.833792810764741e-07, "loss": 0.4444, "step": 62660 }, { "epoch": 0.9032471931164695, "grad_norm": 1.4652927503935336, "learning_rate": 2.825450426866444e-07, "loss": 0.442, "step": 62670 }, { "epoch": 0.9033913206405028, "grad_norm": 1.6230996434314802, "learning_rate": 2.817119983565891e-07, "loss": 0.4219, "step": 62680 }, { "epoch": 0.903535448164536, "grad_norm": 1.6418836467791516, "learning_rate": 2.808801482971657e-07, "loss": 0.4295, "step": 62690 }, { "epoch": 0.9036795756885693, "grad_norm": 1.7406324759767635, "learning_rate": 2.800494927189279e-07, "loss": 0.4466, "step": 62700 }, { "epoch": 0.9038237032126025, "grad_norm": 1.658489678809848, "learning_rate": 2.792200318321264e-07, "loss": 0.4275, "step": 62710 }, { "epoch": 0.9039678307366358, "grad_norm": 1.8685124101181987, "learning_rate": 2.783917658467111e-07, "loss": 0.4234, "step": 62720 }, { "epoch": 0.904111958260669, "grad_norm": 1.5831925214072875, "learning_rate": 2.7756469497232816e-07, "loss": 0.4165, "step": 62730 }, { "epoch": 0.9042560857847023, "grad_norm": 1.4358724486407952, "learning_rate": 2.7673881941832345e-07, "loss": 0.4203, "step": 62740 }, { "epoch": 0.9044002133087355, "grad_norm": 1.4916866417764783, "learning_rate": 2.759141393937359e-07, "loss": 0.4316, "step": 62750 }, { "epoch": 0.9045443408327688, "grad_norm": 1.5372162226431956, "learning_rate": 2.750906551073074e-07, "loss": 0.4071, "step": 62760 }, { "epoch": 0.9046884683568021, "grad_norm": 1.5306797928040599, "learning_rate": 2.7426836676747335e-07, "loss": 0.4298, "step": 62770 }, { "epoch": 0.9048325958808353, "grad_norm": 1.6862255170116387, "learning_rate": 2.734472745823663e-07, "loss": 0.4533, "step": 62780 }, { "epoch": 0.9049767234048687, "grad_norm": 1.8207789028660806, "learning_rate": 2.7262737875981883e-07, "loss": 0.4187, "step": 62790 }, { "epoch": 0.9051208509289019, "grad_norm": 1.6774484529959846, "learning_rate": 2.7180867950735825e-07, "loss": 0.4341, "step": 62800 }, { "epoch": 0.9052649784529352, "grad_norm": 1.6074202912510054, "learning_rate": 2.7099117703220933e-07, "loss": 0.4239, "step": 62810 }, { "epoch": 0.9054091059769684, "grad_norm": 1.839871496122434, "learning_rate": 2.7017487154129606e-07, "loss": 0.4324, "step": 62820 }, { "epoch": 0.9055532335010017, "grad_norm": 1.5814058991025994, "learning_rate": 2.69359763241237e-07, "loss": 0.4394, "step": 62830 }, { "epoch": 0.905697361025035, "grad_norm": 1.619211103001716, "learning_rate": 2.685458523383466e-07, "loss": 0.4275, "step": 62840 }, { "epoch": 0.9058414885490682, "grad_norm": 1.7916772411978144, "learning_rate": 2.677331390386412e-07, "loss": 0.4114, "step": 62850 }, { "epoch": 0.9059856160731015, "grad_norm": 1.5320875861765686, "learning_rate": 2.669216235478295e-07, "loss": 0.4043, "step": 62860 }, { "epoch": 0.9061297435971347, "grad_norm": 1.52251679491105, "learning_rate": 2.6611130607131776e-07, "loss": 0.4391, "step": 62870 }, { "epoch": 0.906273871121168, "grad_norm": 1.5330263148892018, "learning_rate": 2.653021868142114e-07, "loss": 0.4271, "step": 62880 }, { "epoch": 0.9064179986452012, "grad_norm": 1.6235934029557626, "learning_rate": 2.6449426598130934e-07, "loss": 0.3994, "step": 62890 }, { "epoch": 0.9065621261692345, "grad_norm": 1.756385572992713, "learning_rate": 2.6368754377710914e-07, "loss": 0.4255, "step": 62900 }, { "epoch": 0.9067062536932678, "grad_norm": 1.9034843686071337, "learning_rate": 2.628820204058047e-07, "loss": 0.4348, "step": 62910 }, { "epoch": 0.9068503812173011, "grad_norm": 1.9185048602408377, "learning_rate": 2.6207769607128675e-07, "loss": 0.4066, "step": 62920 }, { "epoch": 0.9069945087413344, "grad_norm": 1.4753417328376945, "learning_rate": 2.612745709771408e-07, "loss": 0.4283, "step": 62930 }, { "epoch": 0.9071386362653676, "grad_norm": 1.7143953549030468, "learning_rate": 2.6047264532664985e-07, "loss": 0.4172, "step": 62940 }, { "epoch": 0.9072827637894009, "grad_norm": 1.5564671048340217, "learning_rate": 2.596719193227948e-07, "loss": 0.4606, "step": 62950 }, { "epoch": 0.9074268913134341, "grad_norm": 1.7149012535558605, "learning_rate": 2.5887239316825186e-07, "loss": 0.4227, "step": 62960 }, { "epoch": 0.9075710188374674, "grad_norm": 1.6670547980582855, "learning_rate": 2.580740670653914e-07, "loss": 0.444, "step": 62970 }, { "epoch": 0.9077151463615006, "grad_norm": 1.661576060092272, "learning_rate": 2.57276941216284e-07, "loss": 0.4574, "step": 62980 }, { "epoch": 0.9078592738855339, "grad_norm": 1.4839895015023052, "learning_rate": 2.5648101582269325e-07, "loss": 0.4265, "step": 62990 }, { "epoch": 0.9080034014095671, "grad_norm": 1.7565622597185784, "learning_rate": 2.556862910860797e-07, "loss": 0.4281, "step": 63000 }, { "epoch": 0.9081475289336004, "grad_norm": 1.6219552338369467, "learning_rate": 2.548927672076018e-07, "loss": 0.4191, "step": 63010 }, { "epoch": 0.9082916564576338, "grad_norm": 1.4619984767741343, "learning_rate": 2.541004443881112e-07, "loss": 0.4425, "step": 63020 }, { "epoch": 0.908435783981667, "grad_norm": 1.757202242071179, "learning_rate": 2.5330932282815635e-07, "loss": 0.4423, "step": 63030 }, { "epoch": 0.9085799115057003, "grad_norm": 1.6894428781192659, "learning_rate": 2.5251940272798367e-07, "loss": 0.4211, "step": 63040 }, { "epoch": 0.9087240390297335, "grad_norm": 1.6940568284264412, "learning_rate": 2.517306842875339e-07, "loss": 0.4208, "step": 63050 }, { "epoch": 0.9088681665537668, "grad_norm": 1.5626101403360528, "learning_rate": 2.509431677064422e-07, "loss": 0.4356, "step": 63060 }, { "epoch": 0.9090122940778, "grad_norm": 2.072834185704468, "learning_rate": 2.50156853184042e-07, "loss": 0.4304, "step": 63070 }, { "epoch": 0.9091564216018333, "grad_norm": 1.524729506600102, "learning_rate": 2.493717409193619e-07, "loss": 0.3967, "step": 63080 }, { "epoch": 0.9093005491258666, "grad_norm": 1.7464987795384708, "learning_rate": 2.485878311111256e-07, "loss": 0.4427, "step": 63090 }, { "epoch": 0.9094446766498998, "grad_norm": 1.6654906940531107, "learning_rate": 2.478051239577517e-07, "loss": 0.4132, "step": 63100 }, { "epoch": 0.9095888041739331, "grad_norm": 1.4731196076772664, "learning_rate": 2.4702361965735664e-07, "loss": 0.4317, "step": 63110 }, { "epoch": 0.9097329316979663, "grad_norm": 1.6865206674803421, "learning_rate": 2.4624331840775063e-07, "loss": 0.4286, "step": 63120 }, { "epoch": 0.9098770592219996, "grad_norm": 1.653674924903928, "learning_rate": 2.454642204064389e-07, "loss": 0.4014, "step": 63130 }, { "epoch": 0.9100211867460329, "grad_norm": 1.891299705795535, "learning_rate": 2.446863258506243e-07, "loss": 0.4649, "step": 63140 }, { "epoch": 0.9101653142700662, "grad_norm": 1.7464041556652674, "learning_rate": 2.439096349372033e-07, "loss": 0.4222, "step": 63150 }, { "epoch": 0.9103094417940994, "grad_norm": 1.6128592358811795, "learning_rate": 2.4313414786276747e-07, "loss": 0.4083, "step": 63160 }, { "epoch": 0.9104535693181327, "grad_norm": 1.5864795928308737, "learning_rate": 2.423598648236053e-07, "loss": 0.4083, "step": 63170 }, { "epoch": 0.910597696842166, "grad_norm": 1.5893037163772183, "learning_rate": 2.415867860156995e-07, "loss": 0.4403, "step": 63180 }, { "epoch": 0.9107418243661992, "grad_norm": 1.2465198395788006, "learning_rate": 2.408149116347286e-07, "loss": 0.4097, "step": 63190 }, { "epoch": 0.9108859518902325, "grad_norm": 1.920831304481136, "learning_rate": 2.400442418760646e-07, "loss": 0.4261, "step": 63200 }, { "epoch": 0.9110300794142657, "grad_norm": 1.6415240477675501, "learning_rate": 2.3927477693477694e-07, "loss": 0.4399, "step": 63210 }, { "epoch": 0.911174206938299, "grad_norm": 1.7194926155148134, "learning_rate": 2.385065170056283e-07, "loss": 0.4142, "step": 63220 }, { "epoch": 0.9113183344623322, "grad_norm": 1.5404728726709764, "learning_rate": 2.3773946228307642e-07, "loss": 0.4223, "step": 63230 }, { "epoch": 0.9114624619863655, "grad_norm": 2.0439704538660948, "learning_rate": 2.3697361296127597e-07, "loss": 0.4364, "step": 63240 }, { "epoch": 0.9116065895103987, "grad_norm": 1.9602120746439144, "learning_rate": 2.362089692340741e-07, "loss": 0.442, "step": 63250 }, { "epoch": 0.9117507170344321, "grad_norm": 1.362622683269053, "learning_rate": 2.3544553129501323e-07, "loss": 0.4145, "step": 63260 }, { "epoch": 0.9118948445584654, "grad_norm": 1.81215337839584, "learning_rate": 2.3468329933733268e-07, "loss": 0.429, "step": 63270 }, { "epoch": 0.9120389720824986, "grad_norm": 1.5956290742760204, "learning_rate": 2.3392227355396368e-07, "loss": 0.4484, "step": 63280 }, { "epoch": 0.9121830996065319, "grad_norm": 1.4465032854699178, "learning_rate": 2.3316245413753492e-07, "loss": 0.4394, "step": 63290 }, { "epoch": 0.9123272271305651, "grad_norm": 1.7570188484319509, "learning_rate": 2.3240384128036642e-07, "loss": 0.4262, "step": 63300 }, { "epoch": 0.9124713546545984, "grad_norm": 2.585354341461092, "learning_rate": 2.316464351744757e-07, "loss": 0.4364, "step": 63310 }, { "epoch": 0.9126154821786316, "grad_norm": 1.4757134210571012, "learning_rate": 2.3089023601157445e-07, "loss": 0.4122, "step": 63320 }, { "epoch": 0.9127596097026649, "grad_norm": 1.5918345506215916, "learning_rate": 2.3013524398306675e-07, "loss": 0.4286, "step": 63330 }, { "epoch": 0.9129037372266982, "grad_norm": 1.6494242060084534, "learning_rate": 2.293814592800536e-07, "loss": 0.4314, "step": 63340 }, { "epoch": 0.9130478647507314, "grad_norm": 1.7515244373457741, "learning_rate": 2.2862888209332913e-07, "loss": 0.4297, "step": 63350 }, { "epoch": 0.9131919922747647, "grad_norm": 1.720664430291723, "learning_rate": 2.2787751261338142e-07, "loss": 0.4416, "step": 63360 }, { "epoch": 0.913336119798798, "grad_norm": 1.6371255629068953, "learning_rate": 2.2712735103039453e-07, "loss": 0.4275, "step": 63370 }, { "epoch": 0.9134802473228313, "grad_norm": 1.6331260091575535, "learning_rate": 2.2637839753424594e-07, "loss": 0.4184, "step": 63380 }, { "epoch": 0.9136243748468645, "grad_norm": 1.5212877385020211, "learning_rate": 2.2563065231450577e-07, "loss": 0.4284, "step": 63390 }, { "epoch": 0.9137685023708978, "grad_norm": 1.702897094097612, "learning_rate": 2.2488411556044087e-07, "loss": 0.4135, "step": 63400 }, { "epoch": 0.913912629894931, "grad_norm": 1.8187950939924822, "learning_rate": 2.2413878746101125e-07, "loss": 0.4362, "step": 63410 }, { "epoch": 0.9140567574189643, "grad_norm": 1.505693939864453, "learning_rate": 2.23394668204871e-07, "loss": 0.4219, "step": 63420 }, { "epoch": 0.9142008849429976, "grad_norm": 1.645477405269082, "learning_rate": 2.226517579803661e-07, "loss": 0.4332, "step": 63430 }, { "epoch": 0.9143450124670308, "grad_norm": 1.970341432263468, "learning_rate": 2.2191005697554113e-07, "loss": 0.4458, "step": 63440 }, { "epoch": 0.9144891399910641, "grad_norm": 1.839604659568886, "learning_rate": 2.2116956537813038e-07, "loss": 0.4493, "step": 63450 }, { "epoch": 0.9146332675150973, "grad_norm": 1.586929341682851, "learning_rate": 2.2043028337556283e-07, "loss": 0.4388, "step": 63460 }, { "epoch": 0.9147773950391306, "grad_norm": 1.853212061634742, "learning_rate": 2.196922111549643e-07, "loss": 0.4544, "step": 63470 }, { "epoch": 0.9149215225631638, "grad_norm": 1.5567878984021937, "learning_rate": 2.189553489031504e-07, "loss": 0.427, "step": 63480 }, { "epoch": 0.9150656500871972, "grad_norm": 1.992885568954758, "learning_rate": 2.182196968066319e-07, "loss": 0.4273, "step": 63490 }, { "epoch": 0.9152097776112305, "grad_norm": 2.0081373005706316, "learning_rate": 2.174852550516149e-07, "loss": 0.4314, "step": 63500 }, { "epoch": 0.9153539051352637, "grad_norm": 1.958341322613441, "learning_rate": 2.1675202382399673e-07, "loss": 0.4443, "step": 63510 }, { "epoch": 0.915498032659297, "grad_norm": 1.8694357557461803, "learning_rate": 2.160200033093701e-07, "loss": 0.4426, "step": 63520 }, { "epoch": 0.9156421601833302, "grad_norm": 1.470320927916226, "learning_rate": 2.152891936930207e-07, "loss": 0.4043, "step": 63530 }, { "epoch": 0.9157862877073635, "grad_norm": 1.7132545693979362, "learning_rate": 2.145595951599272e-07, "loss": 0.4316, "step": 63540 }, { "epoch": 0.9159304152313967, "grad_norm": 1.8255755002446272, "learning_rate": 2.1383120789476186e-07, "loss": 0.4195, "step": 63550 }, { "epoch": 0.91607454275543, "grad_norm": 2.8034556367979446, "learning_rate": 2.1310403208189113e-07, "loss": 0.3976, "step": 63560 }, { "epoch": 0.9162186702794632, "grad_norm": 1.9037147139674404, "learning_rate": 2.1237806790537385e-07, "loss": 0.4342, "step": 63570 }, { "epoch": 0.9163627978034965, "grad_norm": 1.5787292693891044, "learning_rate": 2.1165331554896306e-07, "loss": 0.4376, "step": 63580 }, { "epoch": 0.9165069253275298, "grad_norm": 1.6110691065862823, "learning_rate": 2.109297751961037e-07, "loss": 0.4149, "step": 63590 }, { "epoch": 0.916651052851563, "grad_norm": 1.7921395215764022, "learning_rate": 2.1020744702993645e-07, "loss": 0.4481, "step": 63600 }, { "epoch": 0.9167951803755964, "grad_norm": 1.6049705474877654, "learning_rate": 2.094863312332923e-07, "loss": 0.4395, "step": 63610 }, { "epoch": 0.9169393078996296, "grad_norm": 1.510660149701149, "learning_rate": 2.0876642798869584e-07, "loss": 0.4393, "step": 63620 }, { "epoch": 0.9170834354236629, "grad_norm": 1.6693906473374227, "learning_rate": 2.0804773747836848e-07, "loss": 0.4391, "step": 63630 }, { "epoch": 0.9172275629476961, "grad_norm": 1.6070501847833112, "learning_rate": 2.0733025988421918e-07, "loss": 0.435, "step": 63640 }, { "epoch": 0.9173716904717294, "grad_norm": 1.521994001388293, "learning_rate": 2.0661399538785322e-07, "loss": 0.4316, "step": 63650 }, { "epoch": 0.9175158179957627, "grad_norm": 1.6717531740943088, "learning_rate": 2.0589894417056887e-07, "loss": 0.449, "step": 63660 }, { "epoch": 0.9176599455197959, "grad_norm": 1.5592921187404711, "learning_rate": 2.0518510641335532e-07, "loss": 0.4143, "step": 63670 }, { "epoch": 0.9178040730438292, "grad_norm": 1.4912980059092609, "learning_rate": 2.0447248229689575e-07, "loss": 0.4227, "step": 63680 }, { "epoch": 0.9179482005678624, "grad_norm": 1.6347443115191305, "learning_rate": 2.0376107200156758e-07, "loss": 0.4278, "step": 63690 }, { "epoch": 0.9180923280918957, "grad_norm": 1.774118939642558, "learning_rate": 2.0305087570743843e-07, "loss": 0.4222, "step": 63700 }, { "epoch": 0.9182364556159289, "grad_norm": 1.9283127703682952, "learning_rate": 2.0234189359426948e-07, "loss": 0.4387, "step": 63710 }, { "epoch": 0.9183805831399623, "grad_norm": 1.7743924039804155, "learning_rate": 2.0163412584151608e-07, "loss": 0.4423, "step": 63720 }, { "epoch": 0.9185247106639955, "grad_norm": 1.7426670187458384, "learning_rate": 2.0092757262832385e-07, "loss": 0.4318, "step": 63730 }, { "epoch": 0.9186688381880288, "grad_norm": 1.6601983060935346, "learning_rate": 2.0022223413353359e-07, "loss": 0.4348, "step": 63740 }, { "epoch": 0.9188129657120621, "grad_norm": 1.7839668282056351, "learning_rate": 1.9951811053567526e-07, "loss": 0.4168, "step": 63750 }, { "epoch": 0.9189570932360953, "grad_norm": 1.6885302164711222, "learning_rate": 1.9881520201297577e-07, "loss": 0.4379, "step": 63760 }, { "epoch": 0.9191012207601286, "grad_norm": 1.543843094618937, "learning_rate": 1.9811350874335e-07, "loss": 0.4286, "step": 63770 }, { "epoch": 0.9192453482841618, "grad_norm": 1.6041247302679376, "learning_rate": 1.9741303090440754e-07, "loss": 0.4392, "step": 63780 }, { "epoch": 0.9193894758081951, "grad_norm": 1.9028078857414417, "learning_rate": 1.967137686734516e-07, "loss": 0.4264, "step": 63790 }, { "epoch": 0.9195336033322283, "grad_norm": 1.8326339005382553, "learning_rate": 1.960157222274739e-07, "loss": 0.4437, "step": 63800 }, { "epoch": 0.9196777308562616, "grad_norm": 1.5073465350412916, "learning_rate": 1.9531889174316143e-07, "loss": 0.406, "step": 63810 }, { "epoch": 0.9198218583802948, "grad_norm": 1.611796345269405, "learning_rate": 1.9462327739689312e-07, "loss": 0.4227, "step": 63820 }, { "epoch": 0.9199659859043281, "grad_norm": 1.5877189994151863, "learning_rate": 1.939288793647398e-07, "loss": 0.4188, "step": 63830 }, { "epoch": 0.9201101134283615, "grad_norm": 1.7335440769637396, "learning_rate": 1.9323569782246254e-07, "loss": 0.4436, "step": 63840 }, { "epoch": 0.9202542409523947, "grad_norm": 1.723252242183029, "learning_rate": 1.9254373294551764e-07, "loss": 0.4475, "step": 63850 }, { "epoch": 0.920398368476428, "grad_norm": 1.5294320762587592, "learning_rate": 1.9185298490905168e-07, "loss": 0.3886, "step": 63860 }, { "epoch": 0.9205424960004612, "grad_norm": 1.5087037305247417, "learning_rate": 1.911634538879037e-07, "loss": 0.4245, "step": 63870 }, { "epoch": 0.9206866235244945, "grad_norm": 2.048358658773896, "learning_rate": 1.904751400566035e-07, "loss": 0.4234, "step": 63880 }, { "epoch": 0.9208307510485277, "grad_norm": 1.8071374553376154, "learning_rate": 1.8978804358937508e-07, "loss": 0.435, "step": 63890 }, { "epoch": 0.920974878572561, "grad_norm": 1.6411912199975167, "learning_rate": 1.8910216466013265e-07, "loss": 0.4396, "step": 63900 }, { "epoch": 0.9211190060965943, "grad_norm": 1.4541125000978528, "learning_rate": 1.8841750344248176e-07, "loss": 0.4276, "step": 63910 }, { "epoch": 0.9212631336206275, "grad_norm": 1.7549183891460263, "learning_rate": 1.8773406010972162e-07, "loss": 0.4415, "step": 63920 }, { "epoch": 0.9214072611446608, "grad_norm": 1.6491892193614455, "learning_rate": 1.8705183483484214e-07, "loss": 0.446, "step": 63930 }, { "epoch": 0.921551388668694, "grad_norm": 1.7213166495153684, "learning_rate": 1.8637082779052407e-07, "loss": 0.4585, "step": 63940 }, { "epoch": 0.9216955161927273, "grad_norm": 1.755392031225814, "learning_rate": 1.8569103914914067e-07, "loss": 0.4539, "step": 63950 }, { "epoch": 0.9218396437167606, "grad_norm": 1.5940528229996849, "learning_rate": 1.8501246908275872e-07, "loss": 0.4166, "step": 63960 }, { "epoch": 0.9219837712407939, "grad_norm": 1.7679964636345349, "learning_rate": 1.8433511776313305e-07, "loss": 0.4206, "step": 63970 }, { "epoch": 0.9221278987648271, "grad_norm": 1.6697469278289345, "learning_rate": 1.83658985361711e-07, "loss": 0.4325, "step": 63980 }, { "epoch": 0.9222720262888604, "grad_norm": 1.5017092394163079, "learning_rate": 1.8298407204963398e-07, "loss": 0.4328, "step": 63990 }, { "epoch": 0.9224161538128937, "grad_norm": 1.9281062336425003, "learning_rate": 1.8231037799773087e-07, "loss": 0.4318, "step": 64000 }, { "epoch": 0.9225602813369269, "grad_norm": 1.8498220083639136, "learning_rate": 1.8163790337652475e-07, "loss": 0.4021, "step": 64010 }, { "epoch": 0.9227044088609602, "grad_norm": 1.8701997955945902, "learning_rate": 1.8096664835623e-07, "loss": 0.4272, "step": 64020 }, { "epoch": 0.9228485363849934, "grad_norm": 1.7062065833050937, "learning_rate": 1.8029661310675017e-07, "loss": 0.435, "step": 64030 }, { "epoch": 0.9229926639090267, "grad_norm": 1.6995866935304573, "learning_rate": 1.796277977976818e-07, "loss": 0.4193, "step": 64040 }, { "epoch": 0.9231367914330599, "grad_norm": 1.822796190274373, "learning_rate": 1.7896020259831226e-07, "loss": 0.4263, "step": 64050 }, { "epoch": 0.9232809189570932, "grad_norm": 1.555121936648908, "learning_rate": 1.7829382767762027e-07, "loss": 0.4255, "step": 64060 }, { "epoch": 0.9234250464811266, "grad_norm": 1.4172540712093413, "learning_rate": 1.776286732042759e-07, "loss": 0.4206, "step": 64070 }, { "epoch": 0.9235691740051598, "grad_norm": 1.4555907410132833, "learning_rate": 1.769647393466384e-07, "loss": 0.4248, "step": 64080 }, { "epoch": 0.9237133015291931, "grad_norm": 1.489207672372582, "learning_rate": 1.763020262727616e-07, "loss": 0.4206, "step": 64090 }, { "epoch": 0.9238574290532263, "grad_norm": 1.6458171197902816, "learning_rate": 1.7564053415038695e-07, "loss": 0.4157, "step": 64100 }, { "epoch": 0.9240015565772596, "grad_norm": 1.605936933038544, "learning_rate": 1.7498026314694772e-07, "loss": 0.4217, "step": 64110 }, { "epoch": 0.9241456841012928, "grad_norm": 1.806189908211933, "learning_rate": 1.743212134295702e-07, "loss": 0.4113, "step": 64120 }, { "epoch": 0.9242898116253261, "grad_norm": 1.5257316331918618, "learning_rate": 1.7366338516506875e-07, "loss": 0.429, "step": 64130 }, { "epoch": 0.9244339391493593, "grad_norm": 1.5059123896568534, "learning_rate": 1.7300677851994964e-07, "loss": 0.408, "step": 64140 }, { "epoch": 0.9245780666733926, "grad_norm": 2.5089739082489033, "learning_rate": 1.7235139366041042e-07, "loss": 0.4185, "step": 64150 }, { "epoch": 0.9247221941974259, "grad_norm": 1.4617264909997878, "learning_rate": 1.7169723075233958e-07, "loss": 0.4387, "step": 64160 }, { "epoch": 0.9248663217214591, "grad_norm": 1.760332494636966, "learning_rate": 1.7104428996131352e-07, "loss": 0.4085, "step": 64170 }, { "epoch": 0.9250104492454924, "grad_norm": 1.273621986262402, "learning_rate": 1.7039257145260447e-07, "loss": 0.4108, "step": 64180 }, { "epoch": 0.9251545767695257, "grad_norm": 1.942999942106693, "learning_rate": 1.6974207539117106e-07, "loss": 0.4247, "step": 64190 }, { "epoch": 0.925298704293559, "grad_norm": 1.7566492605323352, "learning_rate": 1.6909280194166377e-07, "loss": 0.4489, "step": 64200 }, { "epoch": 0.9254428318175922, "grad_norm": 1.811723852116638, "learning_rate": 1.6844475126842286e-07, "loss": 0.4046, "step": 64210 }, { "epoch": 0.9255869593416255, "grad_norm": 1.6828956675699025, "learning_rate": 1.677979235354804e-07, "loss": 0.4268, "step": 64220 }, { "epoch": 0.9257310868656587, "grad_norm": 1.5131218416048982, "learning_rate": 1.6715231890655935e-07, "loss": 0.4158, "step": 64230 }, { "epoch": 0.925875214389692, "grad_norm": 1.3968328613141252, "learning_rate": 1.665079375450701e-07, "loss": 0.4268, "step": 64240 }, { "epoch": 0.9260193419137253, "grad_norm": 1.6669708153372673, "learning_rate": 1.6586477961411662e-07, "loss": 0.4348, "step": 64250 }, { "epoch": 0.9261634694377585, "grad_norm": 1.8219845240510342, "learning_rate": 1.6522284527649256e-07, "loss": 0.4368, "step": 64260 }, { "epoch": 0.9263075969617918, "grad_norm": 1.539671319170859, "learning_rate": 1.6458213469467964e-07, "loss": 0.4134, "step": 64270 }, { "epoch": 0.926451724485825, "grad_norm": 1.612832698135599, "learning_rate": 1.6394264803085203e-07, "loss": 0.4535, "step": 64280 }, { "epoch": 0.9265958520098583, "grad_norm": 1.8655909324577364, "learning_rate": 1.6330438544687522e-07, "loss": 0.4534, "step": 64290 }, { "epoch": 0.9267399795338915, "grad_norm": 1.750962918091019, "learning_rate": 1.6266734710430054e-07, "loss": 0.3992, "step": 64300 }, { "epoch": 0.9268841070579249, "grad_norm": 1.7560281102096682, "learning_rate": 1.6203153316437458e-07, "loss": 0.4395, "step": 64310 }, { "epoch": 0.9270282345819582, "grad_norm": 1.4338554961218857, "learning_rate": 1.6139694378803028e-07, "loss": 0.4378, "step": 64320 }, { "epoch": 0.9271723621059914, "grad_norm": 1.7074103473338358, "learning_rate": 1.6076357913589135e-07, "loss": 0.4499, "step": 64330 }, { "epoch": 0.9273164896300247, "grad_norm": 1.6252248424567772, "learning_rate": 1.6013143936827346e-07, "loss": 0.4619, "step": 64340 }, { "epoch": 0.9274606171540579, "grad_norm": 1.5387717019694893, "learning_rate": 1.5950052464517974e-07, "loss": 0.4346, "step": 64350 }, { "epoch": 0.9276047446780912, "grad_norm": 1.422666820258154, "learning_rate": 1.5887083512630463e-07, "loss": 0.4265, "step": 64360 }, { "epoch": 0.9277488722021244, "grad_norm": 1.5759529219123178, "learning_rate": 1.5824237097103234e-07, "loss": 0.4246, "step": 64370 }, { "epoch": 0.9278929997261577, "grad_norm": 1.9458851937028936, "learning_rate": 1.5761513233843673e-07, "loss": 0.4604, "step": 64380 }, { "epoch": 0.9280371272501909, "grad_norm": 1.3394723060009186, "learning_rate": 1.5698911938728023e-07, "loss": 0.4191, "step": 64390 }, { "epoch": 0.9281812547742242, "grad_norm": 1.6658088152339259, "learning_rate": 1.5636433227601778e-07, "loss": 0.4186, "step": 64400 }, { "epoch": 0.9283253822982575, "grad_norm": 1.6180277268470276, "learning_rate": 1.5574077116279286e-07, "loss": 0.4589, "step": 64410 }, { "epoch": 0.9284695098222908, "grad_norm": 1.5356276377146765, "learning_rate": 1.5511843620543698e-07, "loss": 0.4518, "step": 64420 }, { "epoch": 0.9286136373463241, "grad_norm": 1.4392400947679038, "learning_rate": 1.5449732756147305e-07, "loss": 0.4085, "step": 64430 }, { "epoch": 0.9287577648703573, "grad_norm": 1.400562057419743, "learning_rate": 1.5387744538811367e-07, "loss": 0.4076, "step": 64440 }, { "epoch": 0.9289018923943906, "grad_norm": 1.7841491685811006, "learning_rate": 1.5325878984225995e-07, "loss": 0.4299, "step": 64450 }, { "epoch": 0.9290460199184238, "grad_norm": 1.6155658686233516, "learning_rate": 1.5264136108050276e-07, "loss": 0.4242, "step": 64460 }, { "epoch": 0.9291901474424571, "grad_norm": 1.6742502977397076, "learning_rate": 1.520251592591232e-07, "loss": 0.4302, "step": 64470 }, { "epoch": 0.9293342749664903, "grad_norm": 1.8961709881153215, "learning_rate": 1.514101845340915e-07, "loss": 0.4292, "step": 64480 }, { "epoch": 0.9294784024905236, "grad_norm": 2.81153889375623, "learning_rate": 1.5079643706106652e-07, "loss": 0.4189, "step": 64490 }, { "epoch": 0.9296225300145569, "grad_norm": 1.7939117869345877, "learning_rate": 1.501839169953967e-07, "loss": 0.4386, "step": 64500 }, { "epoch": 0.9297666575385901, "grad_norm": 1.9806249343203206, "learning_rate": 1.4957262449212251e-07, "loss": 0.4453, "step": 64510 }, { "epoch": 0.9299107850626234, "grad_norm": 1.7972026056037784, "learning_rate": 1.4896255970596906e-07, "loss": 0.4227, "step": 64520 }, { "epoch": 0.9300549125866566, "grad_norm": 1.5408409810107078, "learning_rate": 1.4835372279135395e-07, "loss": 0.4277, "step": 64530 }, { "epoch": 0.93019904011069, "grad_norm": 1.5175723289866019, "learning_rate": 1.477461139023828e-07, "loss": 0.409, "step": 64540 }, { "epoch": 0.9303431676347232, "grad_norm": 1.9199708221334664, "learning_rate": 1.4713973319285147e-07, "loss": 0.4048, "step": 64550 }, { "epoch": 0.9304872951587565, "grad_norm": 1.5467081724364287, "learning_rate": 1.465345808162427e-07, "loss": 0.4362, "step": 64560 }, { "epoch": 0.9306314226827898, "grad_norm": 1.748669327557665, "learning_rate": 1.4593065692573126e-07, "loss": 0.4459, "step": 64570 }, { "epoch": 0.930775550206823, "grad_norm": 1.5487748572131879, "learning_rate": 1.4532796167417928e-07, "loss": 0.4179, "step": 64580 }, { "epoch": 0.9309196777308563, "grad_norm": 1.3901450538571143, "learning_rate": 1.447264952141364e-07, "loss": 0.4258, "step": 64590 }, { "epoch": 0.9310638052548895, "grad_norm": 1.8080163233215856, "learning_rate": 1.4412625769784528e-07, "loss": 0.4537, "step": 64600 }, { "epoch": 0.9312079327789228, "grad_norm": 1.6932060547816254, "learning_rate": 1.4352724927723382e-07, "loss": 0.4234, "step": 64610 }, { "epoch": 0.931352060302956, "grad_norm": 1.4917211632596354, "learning_rate": 1.4292947010392078e-07, "loss": 0.4247, "step": 64620 }, { "epoch": 0.9314961878269893, "grad_norm": 1.6419222574811094, "learning_rate": 1.423329203292123e-07, "loss": 0.4404, "step": 64630 }, { "epoch": 0.9316403153510225, "grad_norm": 1.6759964091510984, "learning_rate": 1.4173760010410543e-07, "loss": 0.4373, "step": 64640 }, { "epoch": 0.9317844428750558, "grad_norm": 1.6622706053299157, "learning_rate": 1.4114350957928456e-07, "loss": 0.3935, "step": 64650 }, { "epoch": 0.9319285703990892, "grad_norm": 1.6349250935035209, "learning_rate": 1.4055064890512172e-07, "loss": 0.4221, "step": 64660 }, { "epoch": 0.9320726979231224, "grad_norm": 1.7464509792396894, "learning_rate": 1.3995901823168067e-07, "loss": 0.4155, "step": 64670 }, { "epoch": 0.9322168254471557, "grad_norm": 1.7122042683710865, "learning_rate": 1.393686177087117e-07, "loss": 0.4363, "step": 64680 }, { "epoch": 0.9323609529711889, "grad_norm": 1.5535261929333446, "learning_rate": 1.3877944748565352e-07, "loss": 0.404, "step": 64690 }, { "epoch": 0.9325050804952222, "grad_norm": 1.5208468128293524, "learning_rate": 1.381915077116347e-07, "loss": 0.4341, "step": 64700 }, { "epoch": 0.9326492080192554, "grad_norm": 1.5171534184281215, "learning_rate": 1.376047985354717e-07, "loss": 0.4021, "step": 64710 }, { "epoch": 0.9327933355432887, "grad_norm": 1.9557720157814327, "learning_rate": 1.370193201056691e-07, "loss": 0.4253, "step": 64720 }, { "epoch": 0.932937463067322, "grad_norm": 1.7475735495863751, "learning_rate": 1.3643507257042053e-07, "loss": 0.428, "step": 64730 }, { "epoch": 0.9330815905913552, "grad_norm": 1.6753882289156277, "learning_rate": 1.3585205607760887e-07, "loss": 0.413, "step": 64740 }, { "epoch": 0.9332257181153885, "grad_norm": 2.5240962262562796, "learning_rate": 1.3527027077480382e-07, "loss": 0.4479, "step": 64750 }, { "epoch": 0.9333698456394217, "grad_norm": 1.460610931390494, "learning_rate": 1.346897168092637e-07, "loss": 0.4323, "step": 64760 }, { "epoch": 0.9335139731634551, "grad_norm": 1.5219747833415251, "learning_rate": 1.3411039432793593e-07, "loss": 0.4102, "step": 64770 }, { "epoch": 0.9336581006874883, "grad_norm": 2.126688004011397, "learning_rate": 1.335323034774566e-07, "loss": 0.4292, "step": 64780 }, { "epoch": 0.9338022282115216, "grad_norm": 1.7569862832008192, "learning_rate": 1.3295544440414754e-07, "loss": 0.4196, "step": 64790 }, { "epoch": 0.9339463557355548, "grad_norm": 1.7769499821831976, "learning_rate": 1.323798172540225e-07, "loss": 0.4216, "step": 64800 }, { "epoch": 0.9340904832595881, "grad_norm": 1.6679955867689604, "learning_rate": 1.3180542217278048e-07, "loss": 0.4384, "step": 64810 }, { "epoch": 0.9342346107836214, "grad_norm": 1.9644758277524028, "learning_rate": 1.3123225930580963e-07, "loss": 0.4218, "step": 64820 }, { "epoch": 0.9343787383076546, "grad_norm": 1.9851439353660385, "learning_rate": 1.3066032879818612e-07, "loss": 0.4495, "step": 64830 }, { "epoch": 0.9345228658316879, "grad_norm": 1.6516304972518485, "learning_rate": 1.3008963079467475e-07, "loss": 0.444, "step": 64840 }, { "epoch": 0.9346669933557211, "grad_norm": 1.651122938197978, "learning_rate": 1.2952016543972768e-07, "loss": 0.4349, "step": 64850 }, { "epoch": 0.9348111208797544, "grad_norm": 1.3500886181719662, "learning_rate": 1.289519328774852e-07, "loss": 0.4019, "step": 64860 }, { "epoch": 0.9349552484037876, "grad_norm": 1.6115901784880382, "learning_rate": 1.2838493325177614e-07, "loss": 0.4146, "step": 64870 }, { "epoch": 0.9350993759278209, "grad_norm": 1.6862609104413522, "learning_rate": 1.2781916670611627e-07, "loss": 0.4182, "step": 64880 }, { "epoch": 0.9352435034518543, "grad_norm": 1.6062943645193195, "learning_rate": 1.272546333837088e-07, "loss": 0.4143, "step": 64890 }, { "epoch": 0.9353876309758875, "grad_norm": 1.7952757868311462, "learning_rate": 1.2669133342744776e-07, "loss": 0.4411, "step": 64900 }, { "epoch": 0.9355317584999208, "grad_norm": 1.390111435631372, "learning_rate": 1.261292669799119e-07, "loss": 0.4137, "step": 64910 }, { "epoch": 0.935675886023954, "grad_norm": 1.7416291309958287, "learning_rate": 1.2556843418336796e-07, "loss": 0.4302, "step": 64920 }, { "epoch": 0.9358200135479873, "grad_norm": 1.7503475526921943, "learning_rate": 1.250088351797729e-07, "loss": 0.4356, "step": 64930 }, { "epoch": 0.9359641410720205, "grad_norm": 1.5519950157424933, "learning_rate": 1.244504701107685e-07, "loss": 0.4388, "step": 64940 }, { "epoch": 0.9361082685960538, "grad_norm": 1.7365180057554064, "learning_rate": 1.2389333911768554e-07, "loss": 0.4467, "step": 64950 }, { "epoch": 0.936252396120087, "grad_norm": 1.6788035930432343, "learning_rate": 1.2333744234154343e-07, "loss": 0.3908, "step": 64960 }, { "epoch": 0.9363965236441203, "grad_norm": 1.6403073618680062, "learning_rate": 1.2278277992304744e-07, "loss": 0.4304, "step": 64970 }, { "epoch": 0.9365406511681535, "grad_norm": 1.6253148393539067, "learning_rate": 1.222293520025908e-07, "loss": 0.4238, "step": 64980 }, { "epoch": 0.9366847786921868, "grad_norm": 2.0657829914345434, "learning_rate": 1.216771587202553e-07, "loss": 0.4244, "step": 64990 }, { "epoch": 0.9368289062162201, "grad_norm": 1.602804028893397, "learning_rate": 1.2112620021580868e-07, "loss": 0.4293, "step": 65000 }, { "epoch": 0.9369730337402534, "grad_norm": 1.5503420886622146, "learning_rate": 1.205764766287071e-07, "loss": 0.4099, "step": 65010 }, { "epoch": 0.9371171612642867, "grad_norm": 1.6579629685399462, "learning_rate": 1.2002798809809478e-07, "loss": 0.4333, "step": 65020 }, { "epoch": 0.9372612887883199, "grad_norm": 1.6669664275289133, "learning_rate": 1.1948073476280187e-07, "loss": 0.4236, "step": 65030 }, { "epoch": 0.9374054163123532, "grad_norm": 1.7025233367741186, "learning_rate": 1.1893471676134638e-07, "loss": 0.4348, "step": 65040 }, { "epoch": 0.9375495438363864, "grad_norm": 1.7484921598691967, "learning_rate": 1.183899342319339e-07, "loss": 0.4509, "step": 65050 }, { "epoch": 0.9376936713604197, "grad_norm": 1.7839799363119764, "learning_rate": 1.178463873124569e-07, "loss": 0.4305, "step": 65060 }, { "epoch": 0.937837798884453, "grad_norm": 1.5954664352722367, "learning_rate": 1.1730407614049644e-07, "loss": 0.4337, "step": 65070 }, { "epoch": 0.9379819264084862, "grad_norm": 1.8592644069771718, "learning_rate": 1.1676300085331826e-07, "loss": 0.4399, "step": 65080 }, { "epoch": 0.9381260539325195, "grad_norm": 1.994031163928509, "learning_rate": 1.1622316158787894e-07, "loss": 0.4736, "step": 65090 }, { "epoch": 0.9382701814565527, "grad_norm": 1.8562206704609967, "learning_rate": 1.1568455848081806e-07, "loss": 0.4355, "step": 65100 }, { "epoch": 0.938414308980586, "grad_norm": 1.6919890265017972, "learning_rate": 1.1514719166846433e-07, "loss": 0.4232, "step": 65110 }, { "epoch": 0.9385584365046193, "grad_norm": 1.1660911744112015, "learning_rate": 1.1461106128683453e-07, "loss": 0.4455, "step": 65120 }, { "epoch": 0.9387025640286526, "grad_norm": 1.6161368936975562, "learning_rate": 1.1407616747163119e-07, "loss": 0.4372, "step": 65130 }, { "epoch": 0.9388466915526859, "grad_norm": 1.5024374172591095, "learning_rate": 1.1354251035824326e-07, "loss": 0.4503, "step": 65140 }, { "epoch": 0.9389908190767191, "grad_norm": 1.882698223377005, "learning_rate": 1.1301009008174879e-07, "loss": 0.441, "step": 65150 }, { "epoch": 0.9391349466007524, "grad_norm": 1.6905503496376517, "learning_rate": 1.1247890677691053e-07, "loss": 0.4396, "step": 65160 }, { "epoch": 0.9392790741247856, "grad_norm": 1.8847345027030293, "learning_rate": 1.1194896057817928e-07, "loss": 0.4227, "step": 65170 }, { "epoch": 0.9394232016488189, "grad_norm": 1.46830794103866, "learning_rate": 1.1142025161969216e-07, "loss": 0.4335, "step": 65180 }, { "epoch": 0.9395673291728521, "grad_norm": 1.9781469552290516, "learning_rate": 1.1089278003527438e-07, "loss": 0.4219, "step": 65190 }, { "epoch": 0.9397114566968854, "grad_norm": 1.9716120889757434, "learning_rate": 1.1036654595843632e-07, "loss": 0.4255, "step": 65200 }, { "epoch": 0.9398555842209186, "grad_norm": 1.8217346469273834, "learning_rate": 1.0984154952237592e-07, "loss": 0.3973, "step": 65210 }, { "epoch": 0.9399997117449519, "grad_norm": 1.8348512028612793, "learning_rate": 1.0931779085997796e-07, "loss": 0.399, "step": 65220 }, { "epoch": 0.9401438392689851, "grad_norm": 1.5048309824713506, "learning_rate": 1.0879527010381419e-07, "loss": 0.4431, "step": 65230 }, { "epoch": 0.9402879667930185, "grad_norm": 1.6248008207140596, "learning_rate": 1.0827398738614104e-07, "loss": 0.4225, "step": 65240 }, { "epoch": 0.9404320943170518, "grad_norm": 1.6535974217271872, "learning_rate": 1.0775394283890461e-07, "loss": 0.4621, "step": 65250 }, { "epoch": 0.940576221841085, "grad_norm": 1.1746756730315806, "learning_rate": 1.072351365937363e-07, "loss": 0.4321, "step": 65260 }, { "epoch": 0.9407203493651183, "grad_norm": 1.5501389017346208, "learning_rate": 1.0671756878195216e-07, "loss": 0.4146, "step": 65270 }, { "epoch": 0.9408644768891515, "grad_norm": 1.9014764038431353, "learning_rate": 1.0620123953455796e-07, "loss": 0.43, "step": 65280 }, { "epoch": 0.9410086044131848, "grad_norm": 1.4375811785278596, "learning_rate": 1.0568614898224416e-07, "loss": 0.4475, "step": 65290 }, { "epoch": 0.941152731937218, "grad_norm": 1.7576656667694932, "learning_rate": 1.0517229725538814e-07, "loss": 0.409, "step": 65300 }, { "epoch": 0.9412968594612513, "grad_norm": 1.5254457851726415, "learning_rate": 1.0465968448405306e-07, "loss": 0.4315, "step": 65310 }, { "epoch": 0.9414409869852846, "grad_norm": 1.5302714144300984, "learning_rate": 1.041483107979896e-07, "loss": 0.4156, "step": 65320 }, { "epoch": 0.9415851145093178, "grad_norm": 2.0362561884716186, "learning_rate": 1.0363817632663476e-07, "loss": 0.4225, "step": 65330 }, { "epoch": 0.9417292420333511, "grad_norm": 1.695465857482426, "learning_rate": 1.0312928119910915e-07, "loss": 0.4281, "step": 65340 }, { "epoch": 0.9418733695573843, "grad_norm": 1.5500900746106503, "learning_rate": 1.0262162554422472e-07, "loss": 0.4163, "step": 65350 }, { "epoch": 0.9420174970814177, "grad_norm": 1.7257437569526177, "learning_rate": 1.0211520949047481e-07, "loss": 0.4038, "step": 65360 }, { "epoch": 0.9421616246054509, "grad_norm": 1.5220521272766356, "learning_rate": 1.016100331660419e-07, "loss": 0.3936, "step": 65370 }, { "epoch": 0.9423057521294842, "grad_norm": 1.6130540630316323, "learning_rate": 1.0110609669879368e-07, "loss": 0.4049, "step": 65380 }, { "epoch": 0.9424498796535175, "grad_norm": 1.6635945099474503, "learning_rate": 1.0060340021628368e-07, "loss": 0.4402, "step": 65390 }, { "epoch": 0.9425940071775507, "grad_norm": 1.601285336422593, "learning_rate": 1.0010194384575345e-07, "loss": 0.4197, "step": 65400 }, { "epoch": 0.942738134701584, "grad_norm": 1.6579675461532946, "learning_rate": 9.960172771412702e-08, "loss": 0.4395, "step": 65410 }, { "epoch": 0.9428822622256172, "grad_norm": 1.7553844037434765, "learning_rate": 9.910275194801921e-08, "loss": 0.4382, "step": 65420 }, { "epoch": 0.9430263897496505, "grad_norm": 1.4804321812073051, "learning_rate": 9.86050166737268e-08, "loss": 0.4166, "step": 65430 }, { "epoch": 0.9431705172736837, "grad_norm": 1.4410355526834722, "learning_rate": 9.810852201723398e-08, "loss": 0.4505, "step": 65440 }, { "epoch": 0.943314644797717, "grad_norm": 1.509461256730521, "learning_rate": 9.761326810421246e-08, "loss": 0.4104, "step": 65450 }, { "epoch": 0.9434587723217502, "grad_norm": 1.7958974025990002, "learning_rate": 9.711925506001751e-08, "loss": 0.4269, "step": 65460 }, { "epoch": 0.9436028998457836, "grad_norm": 1.7276019164860181, "learning_rate": 9.662648300969135e-08, "loss": 0.422, "step": 65470 }, { "epoch": 0.9437470273698169, "grad_norm": 1.4922080516457308, "learning_rate": 9.613495207796309e-08, "loss": 0.4368, "step": 65480 }, { "epoch": 0.9438911548938501, "grad_norm": 1.7881648710522082, "learning_rate": 9.564466238924541e-08, "loss": 0.3895, "step": 65490 }, { "epoch": 0.9440352824178834, "grad_norm": 1.898302292008932, "learning_rate": 9.515561406763852e-08, "loss": 0.4337, "step": 65500 }, { "epoch": 0.9441794099419166, "grad_norm": 1.5735624200305953, "learning_rate": 9.466780723692836e-08, "loss": 0.4247, "step": 65510 }, { "epoch": 0.9443235374659499, "grad_norm": 1.9122106861358188, "learning_rate": 9.41812420205862e-08, "loss": 0.4397, "step": 65520 }, { "epoch": 0.9444676649899831, "grad_norm": 2.0294116160434736, "learning_rate": 9.369591854176962e-08, "loss": 0.415, "step": 65530 }, { "epoch": 0.9446117925140164, "grad_norm": 1.664026729908959, "learning_rate": 9.321183692331981e-08, "loss": 0.4258, "step": 65540 }, { "epoch": 0.9447559200380496, "grad_norm": 1.6525860909509493, "learning_rate": 9.272899728776653e-08, "loss": 0.4293, "step": 65550 }, { "epoch": 0.9449000475620829, "grad_norm": 1.5975410132383658, "learning_rate": 9.224739975732366e-08, "loss": 0.4385, "step": 65560 }, { "epoch": 0.9450441750861162, "grad_norm": 1.678772692195743, "learning_rate": 9.176704445389039e-08, "loss": 0.4422, "step": 65570 }, { "epoch": 0.9451883026101494, "grad_norm": 1.6397434168812781, "learning_rate": 9.12879314990528e-08, "loss": 0.4444, "step": 65580 }, { "epoch": 0.9453324301341828, "grad_norm": 2.4668221136372286, "learning_rate": 9.081006101408108e-08, "loss": 0.4309, "step": 65590 }, { "epoch": 0.945476557658216, "grad_norm": 1.7909584648922885, "learning_rate": 9.033343311993126e-08, "loss": 0.4245, "step": 65600 }, { "epoch": 0.9456206851822493, "grad_norm": 1.5611648098522437, "learning_rate": 8.985804793724572e-08, "loss": 0.4196, "step": 65610 }, { "epoch": 0.9457648127062825, "grad_norm": 1.7582628192824261, "learning_rate": 8.938390558635157e-08, "loss": 0.4141, "step": 65620 }, { "epoch": 0.9459089402303158, "grad_norm": 1.530070274409612, "learning_rate": 8.891100618726112e-08, "loss": 0.4292, "step": 65630 }, { "epoch": 0.946053067754349, "grad_norm": 1.7415056310119637, "learning_rate": 8.843934985967306e-08, "loss": 0.4423, "step": 65640 }, { "epoch": 0.9461971952783823, "grad_norm": 1.6664091814303643, "learning_rate": 8.796893672297025e-08, "loss": 0.4347, "step": 65650 }, { "epoch": 0.9463413228024156, "grad_norm": 1.7469816314178557, "learning_rate": 8.749976689622131e-08, "loss": 0.4344, "step": 65660 }, { "epoch": 0.9464854503264488, "grad_norm": 1.6470104535849721, "learning_rate": 8.703184049818125e-08, "loss": 0.4509, "step": 65670 }, { "epoch": 0.9466295778504821, "grad_norm": 1.5659491713999543, "learning_rate": 8.656515764728868e-08, "loss": 0.4117, "step": 65680 }, { "epoch": 0.9467737053745153, "grad_norm": 1.639855818461225, "learning_rate": 8.609971846166798e-08, "loss": 0.431, "step": 65690 }, { "epoch": 0.9469178328985486, "grad_norm": 1.6889637602784937, "learning_rate": 8.563552305912825e-08, "loss": 0.4526, "step": 65700 }, { "epoch": 0.947061960422582, "grad_norm": 1.8684076836325592, "learning_rate": 8.517257155716607e-08, "loss": 0.4243, "step": 65710 }, { "epoch": 0.9472060879466152, "grad_norm": 1.6599290361748869, "learning_rate": 8.471086407296047e-08, "loss": 0.4315, "step": 65720 }, { "epoch": 0.9473502154706485, "grad_norm": 1.865691906206184, "learning_rate": 8.425040072337687e-08, "loss": 0.418, "step": 65730 }, { "epoch": 0.9474943429946817, "grad_norm": 1.5424921431333354, "learning_rate": 8.379118162496592e-08, "loss": 0.4058, "step": 65740 }, { "epoch": 0.947638470518715, "grad_norm": 1.7688356276839108, "learning_rate": 8.33332068939624e-08, "loss": 0.4076, "step": 65750 }, { "epoch": 0.9477825980427482, "grad_norm": 1.6473825911323408, "learning_rate": 8.287647664628639e-08, "loss": 0.421, "step": 65760 }, { "epoch": 0.9479267255667815, "grad_norm": 1.393820089826911, "learning_rate": 8.242099099754486e-08, "loss": 0.4266, "step": 65770 }, { "epoch": 0.9480708530908147, "grad_norm": 1.8231731628846086, "learning_rate": 8.196675006302668e-08, "loss": 0.4456, "step": 65780 }, { "epoch": 0.948214980614848, "grad_norm": 1.8529143584916241, "learning_rate": 8.151375395770766e-08, "loss": 0.4268, "step": 65790 }, { "epoch": 0.9483591081388812, "grad_norm": 1.6676725337633393, "learning_rate": 8.106200279624776e-08, "loss": 0.4176, "step": 65800 }, { "epoch": 0.9485032356629145, "grad_norm": 1.7337725565156625, "learning_rate": 8.061149669299273e-08, "loss": 0.4349, "step": 65810 }, { "epoch": 0.9486473631869478, "grad_norm": 1.753854813920592, "learning_rate": 8.016223576197135e-08, "loss": 0.4455, "step": 65820 }, { "epoch": 0.9487914907109811, "grad_norm": 1.7270167292446166, "learning_rate": 7.971422011689989e-08, "loss": 0.399, "step": 65830 }, { "epoch": 0.9489356182350144, "grad_norm": 1.5251805342488203, "learning_rate": 7.926744987117707e-08, "loss": 0.4262, "step": 65840 }, { "epoch": 0.9490797457590476, "grad_norm": 1.679685456295759, "learning_rate": 7.882192513788744e-08, "loss": 0.4289, "step": 65850 }, { "epoch": 0.9492238732830809, "grad_norm": 1.4230257377928295, "learning_rate": 7.837764602979969e-08, "loss": 0.4238, "step": 65860 }, { "epoch": 0.9493680008071141, "grad_norm": 1.8404401160081774, "learning_rate": 7.793461265936831e-08, "loss": 0.4572, "step": 65870 }, { "epoch": 0.9495121283311474, "grad_norm": 1.4442543714263127, "learning_rate": 7.74928251387319e-08, "loss": 0.4071, "step": 65880 }, { "epoch": 0.9496562558551807, "grad_norm": 1.822818808519335, "learning_rate": 7.705228357971217e-08, "loss": 0.4577, "step": 65890 }, { "epoch": 0.9498003833792139, "grad_norm": 1.6811131344024928, "learning_rate": 7.661298809381878e-08, "loss": 0.429, "step": 65900 }, { "epoch": 0.9499445109032472, "grad_norm": 1.793236477241939, "learning_rate": 7.617493879224335e-08, "loss": 0.4316, "step": 65910 }, { "epoch": 0.9500886384272804, "grad_norm": 1.7589785213230544, "learning_rate": 7.57381357858622e-08, "loss": 0.4231, "step": 65920 }, { "epoch": 0.9502327659513137, "grad_norm": 1.4567792159264066, "learning_rate": 7.530257918523798e-08, "loss": 0.4145, "step": 65930 }, { "epoch": 0.950376893475347, "grad_norm": 1.816495001202815, "learning_rate": 7.486826910061529e-08, "loss": 0.4348, "step": 65940 }, { "epoch": 0.9505210209993803, "grad_norm": 1.820254564141088, "learning_rate": 7.443520564192619e-08, "loss": 0.4186, "step": 65950 }, { "epoch": 0.9506651485234136, "grad_norm": 1.900435837747483, "learning_rate": 7.400338891878467e-08, "loss": 0.4039, "step": 65960 }, { "epoch": 0.9508092760474468, "grad_norm": 1.4368882224690296, "learning_rate": 7.357281904049107e-08, "loss": 0.4103, "step": 65970 }, { "epoch": 0.9509534035714801, "grad_norm": 1.6381792616412854, "learning_rate": 7.314349611602767e-08, "loss": 0.4195, "step": 65980 }, { "epoch": 0.9510975310955133, "grad_norm": 1.862519276287891, "learning_rate": 7.271542025406364e-08, "loss": 0.4145, "step": 65990 }, { "epoch": 0.9512416586195466, "grad_norm": 1.7936022973325678, "learning_rate": 7.228859156295176e-08, "loss": 0.4211, "step": 66000 }, { "epoch": 0.9513857861435798, "grad_norm": 1.4762808221914192, "learning_rate": 7.186301015072838e-08, "loss": 0.4155, "step": 66010 }, { "epoch": 0.9515299136676131, "grad_norm": 1.6353010955981584, "learning_rate": 7.143867612511402e-08, "loss": 0.4214, "step": 66020 }, { "epoch": 0.9516740411916463, "grad_norm": 1.8673639827750181, "learning_rate": 7.10155895935155e-08, "loss": 0.3943, "step": 66030 }, { "epoch": 0.9518181687156796, "grad_norm": 1.5253639783455817, "learning_rate": 7.059375066302221e-08, "loss": 0.42, "step": 66040 }, { "epoch": 0.9519622962397128, "grad_norm": 1.6363258392622313, "learning_rate": 7.017315944040648e-08, "loss": 0.4217, "step": 66050 }, { "epoch": 0.9521064237637462, "grad_norm": 1.8661949158683897, "learning_rate": 6.975381603212761e-08, "loss": 0.4347, "step": 66060 }, { "epoch": 0.9522505512877795, "grad_norm": 4.900100690660882, "learning_rate": 6.933572054432846e-08, "loss": 0.4154, "step": 66070 }, { "epoch": 0.9523946788118127, "grad_norm": 1.5600019765238258, "learning_rate": 6.891887308283441e-08, "loss": 0.4223, "step": 66080 }, { "epoch": 0.952538806335846, "grad_norm": 1.8835155134494768, "learning_rate": 6.850327375315546e-08, "loss": 0.4426, "step": 66090 }, { "epoch": 0.9526829338598792, "grad_norm": 1.5393853263564166, "learning_rate": 6.80889226604875e-08, "loss": 0.4371, "step": 66100 }, { "epoch": 0.9528270613839125, "grad_norm": 1.5415149553668739, "learning_rate": 6.767581990970773e-08, "loss": 0.4449, "step": 66110 }, { "epoch": 0.9529711889079457, "grad_norm": 1.6819694949146604, "learning_rate": 6.726396560537973e-08, "loss": 0.4402, "step": 66120 }, { "epoch": 0.953115316431979, "grad_norm": 1.3861259216068795, "learning_rate": 6.685335985174957e-08, "loss": 0.3986, "step": 66130 }, { "epoch": 0.9532594439560123, "grad_norm": 1.604266381760518, "learning_rate": 6.644400275274854e-08, "loss": 0.4085, "step": 66140 }, { "epoch": 0.9534035714800455, "grad_norm": 1.9034529688175015, "learning_rate": 6.603589441198987e-08, "loss": 0.4148, "step": 66150 }, { "epoch": 0.9535476990040788, "grad_norm": 1.872693830476899, "learning_rate": 6.562903493277317e-08, "loss": 0.4395, "step": 66160 }, { "epoch": 0.953691826528112, "grad_norm": 1.3656409816541466, "learning_rate": 6.52234244180805e-08, "loss": 0.4494, "step": 66170 }, { "epoch": 0.9538359540521454, "grad_norm": 1.7599514292415148, "learning_rate": 6.481906297057806e-08, "loss": 0.4454, "step": 66180 }, { "epoch": 0.9539800815761786, "grad_norm": 2.0398892620879594, "learning_rate": 6.441595069261509e-08, "loss": 0.4371, "step": 66190 }, { "epoch": 0.9541242091002119, "grad_norm": 1.6575983749336611, "learning_rate": 6.401408768622663e-08, "loss": 0.4068, "step": 66200 }, { "epoch": 0.9542683366242452, "grad_norm": 1.6472333570507045, "learning_rate": 6.361347405313024e-08, "loss": 0.4439, "step": 66210 }, { "epoch": 0.9544124641482784, "grad_norm": 1.6309552997758217, "learning_rate": 6.321410989472588e-08, "loss": 0.4168, "step": 66220 }, { "epoch": 0.9545565916723117, "grad_norm": 1.7337246289824413, "learning_rate": 6.281599531210104e-08, "loss": 0.4483, "step": 66230 }, { "epoch": 0.9547007191963449, "grad_norm": 1.8725982912012467, "learning_rate": 6.241913040602232e-08, "loss": 0.401, "step": 66240 }, { "epoch": 0.9548448467203782, "grad_norm": 1.921946341940077, "learning_rate": 6.202351527694383e-08, "loss": 0.4473, "step": 66250 }, { "epoch": 0.9549889742444114, "grad_norm": 1.5617878154690372, "learning_rate": 6.162915002500103e-08, "loss": 0.4003, "step": 66260 }, { "epoch": 0.9551331017684447, "grad_norm": 1.5301305084201777, "learning_rate": 6.123603475001461e-08, "loss": 0.4263, "step": 66270 }, { "epoch": 0.9552772292924779, "grad_norm": 1.8532611665293175, "learning_rate": 6.084416955148608e-08, "loss": 0.4039, "step": 66280 }, { "epoch": 0.9554213568165113, "grad_norm": 1.8178567486968369, "learning_rate": 6.045355452860502e-08, "loss": 0.4286, "step": 66290 }, { "epoch": 0.9555654843405446, "grad_norm": 1.829722096295612, "learning_rate": 6.006418978024064e-08, "loss": 0.414, "step": 66300 }, { "epoch": 0.9557096118645778, "grad_norm": 1.7291841999807844, "learning_rate": 5.967607540494691e-08, "loss": 0.4245, "step": 66310 }, { "epoch": 0.9558537393886111, "grad_norm": 1.6217952348178604, "learning_rate": 5.9289211500961894e-08, "loss": 0.4142, "step": 66320 }, { "epoch": 0.9559978669126443, "grad_norm": 1.562124391942096, "learning_rate": 5.8903598166207273e-08, "loss": 0.4107, "step": 66330 }, { "epoch": 0.9561419944366776, "grad_norm": 1.6431009154156144, "learning_rate": 5.85192354982872e-08, "loss": 0.4291, "step": 66340 }, { "epoch": 0.9562861219607108, "grad_norm": 1.9657031792228115, "learning_rate": 5.8136123594489394e-08, "loss": 0.4433, "step": 66350 }, { "epoch": 0.9564302494847441, "grad_norm": 1.4430872624294806, "learning_rate": 5.7754262551785736e-08, "loss": 0.4209, "step": 66360 }, { "epoch": 0.9565743770087773, "grad_norm": 1.8942530338895707, "learning_rate": 5.7373652466831134e-08, "loss": 0.4066, "step": 66370 }, { "epoch": 0.9567185045328106, "grad_norm": 1.7582629778235177, "learning_rate": 5.6994293435962966e-08, "loss": 0.414, "step": 66380 }, { "epoch": 0.9568626320568439, "grad_norm": 1.4984970582075174, "learning_rate": 5.661618555520332e-08, "loss": 0.4195, "step": 66390 }, { "epoch": 0.9570067595808771, "grad_norm": 1.6499958127169243, "learning_rate": 5.623932892025785e-08, "loss": 0.4423, "step": 66400 }, { "epoch": 0.9571508871049105, "grad_norm": 2.067575800572338, "learning_rate": 5.58637236265136e-08, "loss": 0.416, "step": 66410 }, { "epoch": 0.9572950146289437, "grad_norm": 1.7857236895271287, "learning_rate": 5.548936976904284e-08, "loss": 0.4411, "step": 66420 }, { "epoch": 0.957439142152977, "grad_norm": 1.7373864249414759, "learning_rate": 5.511626744259979e-08, "loss": 0.4163, "step": 66430 }, { "epoch": 0.9575832696770102, "grad_norm": 1.517661501026764, "learning_rate": 5.474441674162223e-08, "loss": 0.4485, "step": 66440 }, { "epoch": 0.9577273972010435, "grad_norm": 1.603860364337207, "learning_rate": 5.437381776023154e-08, "loss": 0.4217, "step": 66450 }, { "epoch": 0.9578715247250768, "grad_norm": 1.843624143831151, "learning_rate": 5.4004470592232125e-08, "loss": 0.4477, "step": 66460 }, { "epoch": 0.95801565224911, "grad_norm": 1.645677020058575, "learning_rate": 5.363637533111032e-08, "loss": 0.4132, "step": 66470 }, { "epoch": 0.9581597797731433, "grad_norm": 1.5747433717332833, "learning_rate": 5.326953207003771e-08, "loss": 0.4199, "step": 66480 }, { "epoch": 0.9583039072971765, "grad_norm": 1.4984814973435272, "learning_rate": 5.290394090186835e-08, "loss": 0.4172, "step": 66490 }, { "epoch": 0.9584480348212098, "grad_norm": 1.814552764451491, "learning_rate": 5.25396019191371e-08, "loss": 0.4341, "step": 66500 }, { "epoch": 0.958592162345243, "grad_norm": 2.1512469016377245, "learning_rate": 5.2176515214065196e-08, "loss": 0.4283, "step": 66510 }, { "epoch": 0.9587362898692763, "grad_norm": 1.3357108904187738, "learning_rate": 5.1814680878555234e-08, "loss": 0.4269, "step": 66520 }, { "epoch": 0.9588804173933096, "grad_norm": 1.6900418615448942, "learning_rate": 5.1454099004192847e-08, "loss": 0.4244, "step": 66530 }, { "epoch": 0.9590245449173429, "grad_norm": 1.630748733820928, "learning_rate": 5.1094769682246695e-08, "loss": 0.4279, "step": 66540 }, { "epoch": 0.9591686724413762, "grad_norm": 1.688679520401951, "learning_rate": 5.073669300366846e-08, "loss": 0.4258, "step": 66550 }, { "epoch": 0.9593127999654094, "grad_norm": 1.6162149060365703, "learning_rate": 5.0379869059093425e-08, "loss": 0.4169, "step": 66560 }, { "epoch": 0.9594569274894427, "grad_norm": 1.754047105116342, "learning_rate": 5.0024297938838227e-08, "loss": 0.4058, "step": 66570 }, { "epoch": 0.9596010550134759, "grad_norm": 1.7734844420699682, "learning_rate": 4.9669979732904215e-08, "loss": 0.4019, "step": 66580 }, { "epoch": 0.9597451825375092, "grad_norm": 1.7756163221782229, "learning_rate": 4.9316914530974094e-08, "loss": 0.4232, "step": 66590 }, { "epoch": 0.9598893100615424, "grad_norm": 1.832473258399126, "learning_rate": 4.896510242241359e-08, "loss": 0.4443, "step": 66600 }, { "epoch": 0.9600334375855757, "grad_norm": 1.710142681870665, "learning_rate": 4.861454349627315e-08, "loss": 0.4394, "step": 66610 }, { "epoch": 0.9601775651096089, "grad_norm": 1.7290684399041836, "learning_rate": 4.8265237841283455e-08, "loss": 0.4401, "step": 66620 }, { "epoch": 0.9603216926336422, "grad_norm": 1.4594889208533723, "learning_rate": 4.791718554585989e-08, "loss": 0.4248, "step": 66630 }, { "epoch": 0.9604658201576756, "grad_norm": 1.9993281436820802, "learning_rate": 4.757038669809866e-08, "loss": 0.4025, "step": 66640 }, { "epoch": 0.9606099476817088, "grad_norm": 1.8204200204912575, "learning_rate": 4.722484138578121e-08, "loss": 0.425, "step": 66650 }, { "epoch": 0.9607540752057421, "grad_norm": 1.6348779965137437, "learning_rate": 4.6880549696369794e-08, "loss": 0.4434, "step": 66660 }, { "epoch": 0.9608982027297753, "grad_norm": 1.8720203605584202, "learning_rate": 4.653751171700916e-08, "loss": 0.4537, "step": 66670 }, { "epoch": 0.9610423302538086, "grad_norm": 2.0466135834065944, "learning_rate": 4.619572753452817e-08, "loss": 0.4207, "step": 66680 }, { "epoch": 0.9611864577778418, "grad_norm": 1.5345613811906929, "learning_rate": 4.5855197235438184e-08, "loss": 0.4228, "step": 66690 }, { "epoch": 0.9613305853018751, "grad_norm": 1.802135241268105, "learning_rate": 4.5515920905930824e-08, "loss": 0.437, "step": 66700 }, { "epoch": 0.9614747128259084, "grad_norm": 1.730516439396735, "learning_rate": 4.517789863188404e-08, "loss": 0.4194, "step": 66710 }, { "epoch": 0.9616188403499416, "grad_norm": 1.5834740415831627, "learning_rate": 4.4841130498854965e-08, "loss": 0.4469, "step": 66720 }, { "epoch": 0.9617629678739749, "grad_norm": 1.7526018430017032, "learning_rate": 4.450561659208597e-08, "loss": 0.446, "step": 66730 }, { "epoch": 0.9619070953980081, "grad_norm": 1.9236768506089506, "learning_rate": 4.417135699649966e-08, "loss": 0.4414, "step": 66740 }, { "epoch": 0.9620512229220414, "grad_norm": 1.5033240222636466, "learning_rate": 4.383835179670281e-08, "loss": 0.4217, "step": 66750 }, { "epoch": 0.9621953504460747, "grad_norm": 1.5991664070688816, "learning_rate": 4.350660107698412e-08, "loss": 0.3957, "step": 66760 }, { "epoch": 0.962339477970108, "grad_norm": 1.5929373472178912, "learning_rate": 4.317610492131474e-08, "loss": 0.4418, "step": 66770 }, { "epoch": 0.9624836054941412, "grad_norm": 1.7898547273737546, "learning_rate": 4.284686341334776e-08, "loss": 0.4036, "step": 66780 }, { "epoch": 0.9626277330181745, "grad_norm": 1.5175885004379681, "learning_rate": 4.25188766364204e-08, "loss": 0.4268, "step": 66790 }, { "epoch": 0.9627718605422078, "grad_norm": 1.833708926571794, "learning_rate": 4.219214467354959e-08, "loss": 0.4242, "step": 66800 }, { "epoch": 0.962915988066241, "grad_norm": 1.9906565286878413, "learning_rate": 4.186666760743752e-08, "loss": 0.436, "step": 66810 }, { "epoch": 0.9630601155902743, "grad_norm": 1.812728610266906, "learning_rate": 4.15424455204666e-08, "loss": 0.4036, "step": 66820 }, { "epoch": 0.9632042431143075, "grad_norm": 1.7577351034679005, "learning_rate": 4.1219478494702314e-08, "loss": 0.4283, "step": 66830 }, { "epoch": 0.9633483706383408, "grad_norm": 1.7382614109111347, "learning_rate": 4.089776661189315e-08, "loss": 0.425, "step": 66840 }, { "epoch": 0.963492498162374, "grad_norm": 1.8391614272156858, "learning_rate": 4.057730995346898e-08, "loss": 0.4373, "step": 66850 }, { "epoch": 0.9636366256864073, "grad_norm": 1.5365233999913865, "learning_rate": 4.0258108600542135e-08, "loss": 0.4203, "step": 66860 }, { "epoch": 0.9637807532104405, "grad_norm": 1.6781194752702449, "learning_rate": 3.994016263390743e-08, "loss": 0.4379, "step": 66870 }, { "epoch": 0.9639248807344739, "grad_norm": 1.5093621025236463, "learning_rate": 3.9623472134042165e-08, "loss": 0.4357, "step": 66880 }, { "epoch": 0.9640690082585072, "grad_norm": 1.71305424301772, "learning_rate": 3.930803718110499e-08, "loss": 0.4196, "step": 66890 }, { "epoch": 0.9642131357825404, "grad_norm": 1.6898443684786308, "learning_rate": 3.899385785493759e-08, "loss": 0.4404, "step": 66900 }, { "epoch": 0.9643572633065737, "grad_norm": 1.4149574184222877, "learning_rate": 3.8680934235064136e-08, "loss": 0.4088, "step": 66910 }, { "epoch": 0.9645013908306069, "grad_norm": 1.3769557706496696, "learning_rate": 3.836926640068905e-08, "loss": 0.4363, "step": 66920 }, { "epoch": 0.9646455183546402, "grad_norm": 1.7009637362773462, "learning_rate": 3.80588544307009e-08, "loss": 0.4259, "step": 66930 }, { "epoch": 0.9647896458786734, "grad_norm": 1.7861469961185104, "learning_rate": 3.7749698403670154e-08, "loss": 0.4298, "step": 66940 }, { "epoch": 0.9649337734027067, "grad_norm": 1.7295366396244287, "learning_rate": 3.744179839784812e-08, "loss": 0.414, "step": 66950 }, { "epoch": 0.96507790092674, "grad_norm": 1.316818990849817, "learning_rate": 3.713515449116967e-08, "loss": 0.434, "step": 66960 }, { "epoch": 0.9652220284507732, "grad_norm": 1.648551664678556, "learning_rate": 3.682976676124994e-08, "loss": 0.4196, "step": 66970 }, { "epoch": 0.9653661559748065, "grad_norm": 1.8767719274165255, "learning_rate": 3.652563528538877e-08, "loss": 0.4327, "step": 66980 }, { "epoch": 0.9655102834988398, "grad_norm": 1.795452003627315, "learning_rate": 3.6222760140565696e-08, "loss": 0.4255, "step": 66990 }, { "epoch": 0.9656544110228731, "grad_norm": 1.7473780134856993, "learning_rate": 3.5921141403442716e-08, "loss": 0.4492, "step": 67000 }, { "epoch": 0.9657985385469063, "grad_norm": 1.556646754009691, "learning_rate": 3.562077915036433e-08, "loss": 0.4277, "step": 67010 }, { "epoch": 0.9659426660709396, "grad_norm": 1.4061889098248652, "learning_rate": 3.5321673457357487e-08, "loss": 0.4036, "step": 67020 }, { "epoch": 0.9660867935949728, "grad_norm": 1.656010263180573, "learning_rate": 3.502382440012886e-08, "loss": 0.442, "step": 67030 }, { "epoch": 0.9662309211190061, "grad_norm": 1.7665967369134115, "learning_rate": 3.472723205407036e-08, "loss": 0.4523, "step": 67040 }, { "epoch": 0.9663750486430394, "grad_norm": 1.8742403371040692, "learning_rate": 3.443189649425305e-08, "loss": 0.4445, "step": 67050 }, { "epoch": 0.9665191761670726, "grad_norm": 1.633052033425889, "learning_rate": 3.413781779543046e-08, "loss": 0.4098, "step": 67060 }, { "epoch": 0.9666633036911059, "grad_norm": 1.6580067927149609, "learning_rate": 3.3844996032039724e-08, "loss": 0.4262, "step": 67070 }, { "epoch": 0.9668074312151391, "grad_norm": 1.7995925073052035, "learning_rate": 3.355343127819766e-08, "loss": 0.4381, "step": 67080 }, { "epoch": 0.9669515587391724, "grad_norm": 1.8580660860840583, "learning_rate": 3.3263123607703564e-08, "loss": 0.4318, "step": 67090 }, { "epoch": 0.9670956862632056, "grad_norm": 1.9008596784230019, "learning_rate": 3.2974073094039214e-08, "loss": 0.4271, "step": 67100 }, { "epoch": 0.967239813787239, "grad_norm": 1.402127372845815, "learning_rate": 3.268627981036776e-08, "loss": 0.4206, "step": 67110 }, { "epoch": 0.9673839413112723, "grad_norm": 1.6424241287213919, "learning_rate": 3.239974382953315e-08, "loss": 0.4099, "step": 67120 }, { "epoch": 0.9675280688353055, "grad_norm": 1.885630900196806, "learning_rate": 3.211446522406347e-08, "loss": 0.4306, "step": 67130 }, { "epoch": 0.9676721963593388, "grad_norm": 1.6197925925006509, "learning_rate": 3.1830444066165976e-08, "loss": 0.4286, "step": 67140 }, { "epoch": 0.967816323883372, "grad_norm": 1.9274316250839962, "learning_rate": 3.154768042773093e-08, "loss": 0.4444, "step": 67150 }, { "epoch": 0.9679604514074053, "grad_norm": 1.674844877576883, "learning_rate": 3.126617438032942e-08, "loss": 0.4119, "step": 67160 }, { "epoch": 0.9681045789314385, "grad_norm": 1.873273901991671, "learning_rate": 3.098592599521666e-08, "loss": 0.417, "step": 67170 }, { "epoch": 0.9682487064554718, "grad_norm": 1.703082589295493, "learning_rate": 3.0706935343327025e-08, "loss": 0.4451, "step": 67180 }, { "epoch": 0.968392833979505, "grad_norm": 1.7565298674670236, "learning_rate": 3.042920249527681e-08, "loss": 0.4482, "step": 67190 }, { "epoch": 0.9685369615035383, "grad_norm": 1.5011968950829846, "learning_rate": 3.0152727521364774e-08, "loss": 0.417, "step": 67200 }, { "epoch": 0.9686810890275716, "grad_norm": 1.498451284175867, "learning_rate": 2.9877510491571614e-08, "loss": 0.4199, "step": 67210 }, { "epoch": 0.9688252165516048, "grad_norm": 1.5208828006098363, "learning_rate": 2.9603551475557156e-08, "loss": 0.413, "step": 67220 }, { "epoch": 0.9689693440756382, "grad_norm": 1.6150166101274421, "learning_rate": 2.933085054266649e-08, "loss": 0.4196, "step": 67230 }, { "epoch": 0.9691134715996714, "grad_norm": 1.7581268292054755, "learning_rate": 2.905940776192384e-08, "loss": 0.411, "step": 67240 }, { "epoch": 0.9692575991237047, "grad_norm": 1.8029794235231085, "learning_rate": 2.878922320203481e-08, "loss": 0.4397, "step": 67250 }, { "epoch": 0.9694017266477379, "grad_norm": 1.6480961645574208, "learning_rate": 2.852029693138747e-08, "loss": 0.4081, "step": 67260 }, { "epoch": 0.9695458541717712, "grad_norm": 1.680286663575062, "learning_rate": 2.8252629018051813e-08, "loss": 0.4143, "step": 67270 }, { "epoch": 0.9696899816958044, "grad_norm": 1.6086676383026168, "learning_rate": 2.7986219529778646e-08, "loss": 0.4232, "step": 67280 }, { "epoch": 0.9698341092198377, "grad_norm": 1.781117908268036, "learning_rate": 2.772106853399903e-08, "loss": 0.4271, "step": 67290 }, { "epoch": 0.969978236743871, "grad_norm": 1.6208345966813629, "learning_rate": 2.7457176097828165e-08, "loss": 0.4436, "step": 67300 }, { "epoch": 0.9701223642679042, "grad_norm": 2.0231849598520446, "learning_rate": 2.7194542288060956e-08, "loss": 0.4399, "step": 67310 }, { "epoch": 0.9702664917919375, "grad_norm": 2.5880123951991623, "learning_rate": 2.6933167171173114e-08, "loss": 0.41, "step": 67320 }, { "epoch": 0.9704106193159707, "grad_norm": 1.7607877519611488, "learning_rate": 2.6673050813323388e-08, "loss": 0.45, "step": 67330 }, { "epoch": 0.9705547468400041, "grad_norm": 1.998914363451013, "learning_rate": 2.641419328035133e-08, "loss": 0.4414, "step": 67340 }, { "epoch": 0.9706988743640373, "grad_norm": 1.6181706623689849, "learning_rate": 2.615659463777731e-08, "loss": 0.4233, "step": 67350 }, { "epoch": 0.9708430018880706, "grad_norm": 1.6060979685900314, "learning_rate": 2.590025495080417e-08, "loss": 0.4361, "step": 67360 }, { "epoch": 0.9709871294121039, "grad_norm": 1.619133619750623, "learning_rate": 2.5645174284314455e-08, "loss": 0.426, "step": 67370 }, { "epoch": 0.9711312569361371, "grad_norm": 1.3980950643379775, "learning_rate": 2.5391352702873186e-08, "loss": 0.4144, "step": 67380 }, { "epoch": 0.9712753844601704, "grad_norm": 1.69285929423982, "learning_rate": 2.5138790270726742e-08, "loss": 0.4156, "step": 67390 }, { "epoch": 0.9714195119842036, "grad_norm": 1.6466304645202252, "learning_rate": 2.4887487051802884e-08, "loss": 0.4428, "step": 67400 }, { "epoch": 0.9715636395082369, "grad_norm": 1.852696323365912, "learning_rate": 2.4637443109709613e-08, "loss": 0.4362, "step": 67410 }, { "epoch": 0.9717077670322701, "grad_norm": 1.9124628242030721, "learning_rate": 2.4388658507737417e-08, "loss": 0.4267, "step": 67420 }, { "epoch": 0.9718518945563034, "grad_norm": 1.6844490763564606, "learning_rate": 2.4141133308857035e-08, "loss": 0.4108, "step": 67430 }, { "epoch": 0.9719960220803366, "grad_norm": 1.96568002801418, "learning_rate": 2.3894867575721125e-08, "loss": 0.4442, "step": 67440 }, { "epoch": 0.9721401496043699, "grad_norm": 1.751192126113049, "learning_rate": 2.364986137066261e-08, "loss": 0.438, "step": 67450 }, { "epoch": 0.9722842771284033, "grad_norm": 1.6576471396715773, "learning_rate": 2.340611475569743e-08, "loss": 0.4498, "step": 67460 }, { "epoch": 0.9724284046524365, "grad_norm": 1.213861472034385, "learning_rate": 2.316362779252068e-08, "loss": 0.4055, "step": 67470 }, { "epoch": 0.9725725321764698, "grad_norm": 1.7395076290312186, "learning_rate": 2.292240054250994e-08, "loss": 0.4277, "step": 67480 }, { "epoch": 0.972716659700503, "grad_norm": 1.664996747792808, "learning_rate": 2.2682433066723574e-08, "loss": 0.4187, "step": 67490 }, { "epoch": 0.9728607872245363, "grad_norm": 1.7196411524487996, "learning_rate": 2.2443725425900786e-08, "loss": 0.4319, "step": 67500 }, { "epoch": 0.9730049147485695, "grad_norm": 1.5728872281285358, "learning_rate": 2.220627768046213e-08, "loss": 0.4059, "step": 67510 }, { "epoch": 0.9731490422726028, "grad_norm": 1.7334508906350337, "learning_rate": 2.1970089890509527e-08, "loss": 0.4325, "step": 67520 }, { "epoch": 0.973293169796636, "grad_norm": 1.6726283904729815, "learning_rate": 2.173516211582516e-08, "loss": 0.4509, "step": 67530 }, { "epoch": 0.9734372973206693, "grad_norm": 1.8392333077211591, "learning_rate": 2.150149441587368e-08, "loss": 0.4255, "step": 67540 }, { "epoch": 0.9735814248447026, "grad_norm": 1.9320757031096718, "learning_rate": 2.126908684979889e-08, "loss": 0.4506, "step": 67550 }, { "epoch": 0.9737255523687358, "grad_norm": 1.744438097752348, "learning_rate": 2.103793947642707e-08, "loss": 0.4196, "step": 67560 }, { "epoch": 0.9738696798927691, "grad_norm": 1.94465528925598, "learning_rate": 2.0808052354265862e-08, "loss": 0.4266, "step": 67570 }, { "epoch": 0.9740138074168024, "grad_norm": 1.7979948672289223, "learning_rate": 2.057942554150205e-08, "loss": 0.4187, "step": 67580 }, { "epoch": 0.9741579349408357, "grad_norm": 1.862538444300126, "learning_rate": 2.0352059096005462e-08, "loss": 0.4458, "step": 67590 }, { "epoch": 0.974302062464869, "grad_norm": 1.7775968138612086, "learning_rate": 2.0125953075326166e-08, "loss": 0.4486, "step": 67600 }, { "epoch": 0.9744461899889022, "grad_norm": 1.503503924289898, "learning_rate": 1.990110753669394e-08, "loss": 0.4052, "step": 67610 }, { "epoch": 0.9745903175129355, "grad_norm": 1.7744699077800987, "learning_rate": 1.967752253702104e-08, "loss": 0.4196, "step": 67620 }, { "epoch": 0.9747344450369687, "grad_norm": 1.780859202887377, "learning_rate": 1.945519813290109e-08, "loss": 0.44, "step": 67630 }, { "epoch": 0.974878572561002, "grad_norm": 1.6245540605981625, "learning_rate": 1.923413438060684e-08, "loss": 0.4298, "step": 67640 }, { "epoch": 0.9750227000850352, "grad_norm": 1.7868241656658972, "learning_rate": 1.9014331336092983e-08, "loss": 0.4294, "step": 67650 }, { "epoch": 0.9751668276090685, "grad_norm": 1.8746554370617434, "learning_rate": 1.8795789054995573e-08, "loss": 0.4558, "step": 67660 }, { "epoch": 0.9753109551331017, "grad_norm": 1.5066878371597667, "learning_rate": 1.8578507592630356e-08, "loss": 0.414, "step": 67670 }, { "epoch": 0.975455082657135, "grad_norm": 1.4905817181864611, "learning_rate": 1.836248700399501e-08, "loss": 0.4258, "step": 67680 }, { "epoch": 0.9755992101811684, "grad_norm": 1.769809416477447, "learning_rate": 1.8147727343767462e-08, "loss": 0.4329, "step": 67690 }, { "epoch": 0.9757433377052016, "grad_norm": 1.7284850502502815, "learning_rate": 1.7934228666307007e-08, "loss": 0.4177, "step": 67700 }, { "epoch": 0.9758874652292349, "grad_norm": 1.7384625391470174, "learning_rate": 1.7721991025653195e-08, "loss": 0.4248, "step": 67710 }, { "epoch": 0.9760315927532681, "grad_norm": 1.7415697659328349, "learning_rate": 1.7511014475525833e-08, "loss": 0.4236, "step": 67720 }, { "epoch": 0.9761757202773014, "grad_norm": 1.6976109511285016, "learning_rate": 1.730129906932776e-08, "loss": 0.424, "step": 67730 }, { "epoch": 0.9763198478013346, "grad_norm": 1.6483787729612336, "learning_rate": 1.7092844860139845e-08, "loss": 0.4459, "step": 67740 }, { "epoch": 0.9764639753253679, "grad_norm": 1.6150058491507469, "learning_rate": 1.688565190072544e-08, "loss": 0.4289, "step": 67750 }, { "epoch": 0.9766081028494011, "grad_norm": 1.4634340821391556, "learning_rate": 1.6679720243529263e-08, "loss": 0.4278, "step": 67760 }, { "epoch": 0.9767522303734344, "grad_norm": 1.4588750396300965, "learning_rate": 1.647504994067406e-08, "loss": 0.4075, "step": 67770 }, { "epoch": 0.9768963578974676, "grad_norm": 1.7037530263265561, "learning_rate": 1.6271641043966168e-08, "loss": 0.4238, "step": 67780 }, { "epoch": 0.9770404854215009, "grad_norm": 2.093041323668409, "learning_rate": 1.6069493604891074e-08, "loss": 0.4311, "step": 67790 }, { "epoch": 0.9771846129455342, "grad_norm": 1.9812728077170203, "learning_rate": 1.5868607674615065e-08, "loss": 0.4493, "step": 67800 }, { "epoch": 0.9773287404695675, "grad_norm": 1.5081065131480103, "learning_rate": 1.566898330398636e-08, "loss": 0.4014, "step": 67810 }, { "epoch": 0.9774728679936008, "grad_norm": 1.8228203946126749, "learning_rate": 1.5470620543531766e-08, "loss": 0.4397, "step": 67820 }, { "epoch": 0.977616995517634, "grad_norm": 1.7124136818012088, "learning_rate": 1.527351944346056e-08, "loss": 0.4417, "step": 67830 }, { "epoch": 0.9777611230416673, "grad_norm": 1.6506126760505044, "learning_rate": 1.5077680053662282e-08, "loss": 0.4122, "step": 67840 }, { "epoch": 0.9779052505657005, "grad_norm": 1.6721016526824186, "learning_rate": 1.488310242370672e-08, "loss": 0.4374, "step": 67850 }, { "epoch": 0.9780493780897338, "grad_norm": 1.5911646086718194, "learning_rate": 1.4689786602843926e-08, "loss": 0.4205, "step": 67860 }, { "epoch": 0.9781935056137671, "grad_norm": 1.4476085089670996, "learning_rate": 1.4497732640005868e-08, "loss": 0.4249, "step": 67870 }, { "epoch": 0.9783376331378003, "grad_norm": 1.6363733805455236, "learning_rate": 1.4306940583803664e-08, "loss": 0.4272, "step": 67880 }, { "epoch": 0.9784817606618336, "grad_norm": 1.7823324232943734, "learning_rate": 1.4117410482529792e-08, "loss": 0.4215, "step": 67890 }, { "epoch": 0.9786258881858668, "grad_norm": 2.076835498845369, "learning_rate": 1.3929142384157545e-08, "loss": 0.4437, "step": 67900 }, { "epoch": 0.9787700157099001, "grad_norm": 1.8599060481621765, "learning_rate": 1.3742136336340473e-08, "loss": 0.4375, "step": 67910 }, { "epoch": 0.9789141432339333, "grad_norm": 3.3636267413764953, "learning_rate": 1.3556392386412376e-08, "loss": 0.4308, "step": 67920 }, { "epoch": 0.9790582707579667, "grad_norm": 1.712523428797583, "learning_rate": 1.3371910581387315e-08, "loss": 0.4305, "step": 67930 }, { "epoch": 0.979202398282, "grad_norm": 1.3929851875383787, "learning_rate": 1.3188690967961826e-08, "loss": 0.4128, "step": 67940 }, { "epoch": 0.9793465258060332, "grad_norm": 1.7356569309935426, "learning_rate": 1.3006733592510478e-08, "loss": 0.4663, "step": 67950 }, { "epoch": 0.9794906533300665, "grad_norm": 1.5690009647395713, "learning_rate": 1.2826038501089767e-08, "loss": 0.4247, "step": 67960 }, { "epoch": 0.9796347808540997, "grad_norm": 1.831418238800077, "learning_rate": 1.264660573943699e-08, "loss": 0.4546, "step": 67970 }, { "epoch": 0.979778908378133, "grad_norm": 1.8105534484893582, "learning_rate": 1.2468435352968045e-08, "loss": 0.4545, "step": 67980 }, { "epoch": 0.9799230359021662, "grad_norm": 1.6868663004436857, "learning_rate": 1.2291527386781854e-08, "loss": 0.4538, "step": 67990 }, { "epoch": 0.9800671634261995, "grad_norm": 1.7064000613604426, "learning_rate": 1.2115881885655379e-08, "loss": 0.4165, "step": 68000 }, { "epoch": 0.9802112909502327, "grad_norm": 1.8885073832710413, "learning_rate": 1.1941498894048609e-08, "loss": 0.4174, "step": 68010 }, { "epoch": 0.980355418474266, "grad_norm": 1.6633696693105788, "learning_rate": 1.1768378456099017e-08, "loss": 0.424, "step": 68020 }, { "epoch": 0.9804995459982992, "grad_norm": 1.6952006861488331, "learning_rate": 1.1596520615626549e-08, "loss": 0.4172, "step": 68030 }, { "epoch": 0.9806436735223326, "grad_norm": 1.6310835633394685, "learning_rate": 1.1425925416131412e-08, "loss": 0.3979, "step": 68040 }, { "epoch": 0.9807878010463659, "grad_norm": 1.8018997494949376, "learning_rate": 1.125659290079406e-08, "loss": 0.446, "step": 68050 }, { "epoch": 0.9809319285703991, "grad_norm": 1.5270452691579437, "learning_rate": 1.1088523112474103e-08, "loss": 0.4321, "step": 68060 }, { "epoch": 0.9810760560944324, "grad_norm": 1.5695412460099474, "learning_rate": 1.092171609371362e-08, "loss": 0.4159, "step": 68070 }, { "epoch": 0.9812201836184656, "grad_norm": 1.7820548098183546, "learning_rate": 1.075617188673328e-08, "loss": 0.4514, "step": 68080 }, { "epoch": 0.9813643111424989, "grad_norm": 2.0476176540036564, "learning_rate": 1.0591890533435123e-08, "loss": 0.4482, "step": 68090 }, { "epoch": 0.9815084386665321, "grad_norm": 1.967942840196384, "learning_rate": 1.0428872075400887e-08, "loss": 0.4333, "step": 68100 }, { "epoch": 0.9816525661905654, "grad_norm": 1.8200829468314417, "learning_rate": 1.0267116553894229e-08, "loss": 0.4251, "step": 68110 }, { "epoch": 0.9817966937145987, "grad_norm": 1.740474240321703, "learning_rate": 1.0106624009856292e-08, "loss": 0.4494, "step": 68120 }, { "epoch": 0.9819408212386319, "grad_norm": 1.6882028714061588, "learning_rate": 9.947394483911798e-09, "loss": 0.4293, "step": 68130 }, { "epoch": 0.9820849487626652, "grad_norm": 1.6981747608476245, "learning_rate": 9.789428016362956e-09, "loss": 0.4311, "step": 68140 }, { "epoch": 0.9822290762866984, "grad_norm": 1.8179645351277218, "learning_rate": 9.632724647193892e-09, "loss": 0.4317, "step": 68150 }, { "epoch": 0.9823732038107318, "grad_norm": 1.5823754041285611, "learning_rate": 9.477284416068432e-09, "loss": 0.4045, "step": 68160 }, { "epoch": 0.982517331334765, "grad_norm": 1.5245364196869566, "learning_rate": 9.323107362331219e-09, "loss": 0.42, "step": 68170 }, { "epoch": 0.9826614588587983, "grad_norm": 1.8291384459328015, "learning_rate": 9.170193525006587e-09, "loss": 0.4276, "step": 68180 }, { "epoch": 0.9828055863828316, "grad_norm": 1.6281814541017237, "learning_rate": 9.018542942799691e-09, "loss": 0.4211, "step": 68190 }, { "epoch": 0.9829497139068648, "grad_norm": 2.002474642211567, "learning_rate": 8.868155654095379e-09, "loss": 0.4255, "step": 68200 }, { "epoch": 0.9830938414308981, "grad_norm": 1.6107569001158502, "learning_rate": 8.719031696958757e-09, "loss": 0.3979, "step": 68210 }, { "epoch": 0.9832379689549313, "grad_norm": 1.601439765350871, "learning_rate": 8.571171109135746e-09, "loss": 0.4078, "step": 68220 }, { "epoch": 0.9833820964789646, "grad_norm": 1.703237699393066, "learning_rate": 8.424573928051404e-09, "loss": 0.4631, "step": 68230 }, { "epoch": 0.9835262240029978, "grad_norm": 1.708581090135575, "learning_rate": 8.279240190812721e-09, "loss": 0.4303, "step": 68240 }, { "epoch": 0.9836703515270311, "grad_norm": 1.7801916932972686, "learning_rate": 8.135169934205268e-09, "loss": 0.4432, "step": 68250 }, { "epoch": 0.9838144790510643, "grad_norm": 1.567649157236905, "learning_rate": 7.99236319469543e-09, "loss": 0.4055, "step": 68260 }, { "epoch": 0.9839586065750976, "grad_norm": 1.7013587737812574, "learning_rate": 7.85082000842985e-09, "loss": 0.4469, "step": 68270 }, { "epoch": 0.984102734099131, "grad_norm": 1.7107352240060074, "learning_rate": 7.710540411235978e-09, "loss": 0.3914, "step": 68280 }, { "epoch": 0.9842468616231642, "grad_norm": 1.6184720830170993, "learning_rate": 7.571524438619305e-09, "loss": 0.4163, "step": 68290 }, { "epoch": 0.9843909891471975, "grad_norm": 1.8483627326019338, "learning_rate": 7.433772125767791e-09, "loss": 0.4444, "step": 68300 }, { "epoch": 0.9845351166712307, "grad_norm": 1.5318991654800151, "learning_rate": 7.297283507548547e-09, "loss": 0.4224, "step": 68310 }, { "epoch": 0.984679244195264, "grad_norm": 1.5024197175732956, "learning_rate": 7.162058618508938e-09, "loss": 0.4191, "step": 68320 }, { "epoch": 0.9848233717192972, "grad_norm": 1.6059610941611238, "learning_rate": 7.028097492876584e-09, "loss": 0.4169, "step": 68330 }, { "epoch": 0.9849674992433305, "grad_norm": 1.400594172493725, "learning_rate": 6.895400164558252e-09, "loss": 0.4113, "step": 68340 }, { "epoch": 0.9851116267673637, "grad_norm": 1.8543334807644065, "learning_rate": 6.7639666671426295e-09, "loss": 0.4297, "step": 68350 }, { "epoch": 0.985255754291397, "grad_norm": 2.04101687846601, "learning_rate": 6.633797033897549e-09, "loss": 0.429, "step": 68360 }, { "epoch": 0.9853998818154303, "grad_norm": 1.7664204995203145, "learning_rate": 6.504891297769988e-09, "loss": 0.449, "step": 68370 }, { "epoch": 0.9855440093394635, "grad_norm": 1.7963973251913055, "learning_rate": 6.377249491388848e-09, "loss": 0.4411, "step": 68380 }, { "epoch": 0.9856881368634968, "grad_norm": 1.6820371491683885, "learning_rate": 6.250871647061618e-09, "loss": 0.423, "step": 68390 }, { "epoch": 0.9858322643875301, "grad_norm": 1.758343534685433, "learning_rate": 6.125757796777154e-09, "loss": 0.4638, "step": 68400 }, { "epoch": 0.9859763919115634, "grad_norm": 1.464240504269536, "learning_rate": 6.001907972202903e-09, "loss": 0.419, "step": 68410 }, { "epoch": 0.9861205194355966, "grad_norm": 1.5569609661382275, "learning_rate": 5.8793222046876766e-09, "loss": 0.4155, "step": 68420 }, { "epoch": 0.9862646469596299, "grad_norm": 1.427484309246035, "learning_rate": 5.758000525259988e-09, "loss": 0.44, "step": 68430 }, { "epoch": 0.9864087744836632, "grad_norm": 2.03656894876445, "learning_rate": 5.637942964627496e-09, "loss": 0.4191, "step": 68440 }, { "epoch": 0.9865529020076964, "grad_norm": 1.7501486401240813, "learning_rate": 5.519149553178671e-09, "loss": 0.4238, "step": 68450 }, { "epoch": 0.9866970295317297, "grad_norm": 1.8332526800940474, "learning_rate": 5.401620320982792e-09, "loss": 0.4341, "step": 68460 }, { "epoch": 0.9868411570557629, "grad_norm": 1.647688136166394, "learning_rate": 5.285355297787176e-09, "loss": 0.4289, "step": 68470 }, { "epoch": 0.9869852845797962, "grad_norm": 1.645061108933858, "learning_rate": 5.1703545130216135e-09, "loss": 0.417, "step": 68480 }, { "epoch": 0.9871294121038294, "grad_norm": 1.6281684971718589, "learning_rate": 5.056617995792823e-09, "loss": 0.4424, "step": 68490 }, { "epoch": 0.9872735396278627, "grad_norm": 1.8341145688738856, "learning_rate": 4.944145774890552e-09, "loss": 0.4253, "step": 68500 }, { "epoch": 0.987417667151896, "grad_norm": 1.5733829464333438, "learning_rate": 4.832937878783139e-09, "loss": 0.4223, "step": 68510 }, { "epoch": 0.9875617946759293, "grad_norm": 1.8754421552647722, "learning_rate": 4.722994335618625e-09, "loss": 0.4318, "step": 68520 }, { "epoch": 0.9877059221999626, "grad_norm": 1.685743981160708, "learning_rate": 4.614315173225303e-09, "loss": 0.4412, "step": 68530 }, { "epoch": 0.9878500497239958, "grad_norm": 2.0482885036430174, "learning_rate": 4.506900419112281e-09, "loss": 0.4433, "step": 68540 }, { "epoch": 0.9879941772480291, "grad_norm": 1.748824118875449, "learning_rate": 4.4007501004667e-09, "loss": 0.4356, "step": 68550 }, { "epoch": 0.9881383047720623, "grad_norm": 1.6170239014768433, "learning_rate": 4.2958642441576215e-09, "loss": 0.4355, "step": 68560 }, { "epoch": 0.9882824322960956, "grad_norm": 1.649734469028705, "learning_rate": 4.192242876733255e-09, "loss": 0.4133, "step": 68570 }, { "epoch": 0.9884265598201288, "grad_norm": 1.6098204995820884, "learning_rate": 4.089886024421508e-09, "loss": 0.4237, "step": 68580 }, { "epoch": 0.9885706873441621, "grad_norm": 1.3216537030815088, "learning_rate": 3.988793713130546e-09, "loss": 0.394, "step": 68590 }, { "epoch": 0.9887148148681953, "grad_norm": 1.4132985447596793, "learning_rate": 3.888965968448233e-09, "loss": 0.4189, "step": 68600 }, { "epoch": 0.9888589423922286, "grad_norm": 1.632902152211534, "learning_rate": 3.790402815642691e-09, "loss": 0.4479, "step": 68610 }, { "epoch": 0.9890030699162619, "grad_norm": 1.6783980458552035, "learning_rate": 3.693104279661741e-09, "loss": 0.4434, "step": 68620 }, { "epoch": 0.9891471974402952, "grad_norm": 1.5856913169856197, "learning_rate": 3.597070385132906e-09, "loss": 0.4267, "step": 68630 }, { "epoch": 0.9892913249643285, "grad_norm": 1.7987945212207974, "learning_rate": 3.502301156364518e-09, "loss": 0.4259, "step": 68640 }, { "epoch": 0.9894354524883617, "grad_norm": 1.8723559901563263, "learning_rate": 3.4087966173429465e-09, "loss": 0.4066, "step": 68650 }, { "epoch": 0.989579580012395, "grad_norm": 2.133134010899348, "learning_rate": 3.3165567917364803e-09, "loss": 0.4321, "step": 68660 }, { "epoch": 0.9897237075364282, "grad_norm": 1.6370031134085485, "learning_rate": 3.225581702892555e-09, "loss": 0.4243, "step": 68670 }, { "epoch": 0.9898678350604615, "grad_norm": 1.694169239907301, "learning_rate": 3.135871373837196e-09, "loss": 0.4427, "step": 68680 }, { "epoch": 0.9900119625844948, "grad_norm": 1.7638739449218042, "learning_rate": 3.047425827278905e-09, "loss": 0.4321, "step": 68690 }, { "epoch": 0.990156090108528, "grad_norm": 1.8102929934587895, "learning_rate": 2.9602450856036635e-09, "loss": 0.4345, "step": 68700 }, { "epoch": 0.9903002176325613, "grad_norm": 1.6186041090542187, "learning_rate": 2.87432917087882e-09, "loss": 0.4344, "step": 68710 }, { "epoch": 0.9904443451565945, "grad_norm": 1.6314082333827418, "learning_rate": 2.789678104850313e-09, "loss": 0.4123, "step": 68720 }, { "epoch": 0.9905884726806278, "grad_norm": 1.8606954620485932, "learning_rate": 2.7062919089454467e-09, "loss": 0.4258, "step": 68730 }, { "epoch": 0.990732600204661, "grad_norm": 1.5391790948957575, "learning_rate": 2.6241706042695604e-09, "loss": 0.4708, "step": 68740 }, { "epoch": 0.9908767277286944, "grad_norm": 2.070144374205012, "learning_rate": 2.543314211609915e-09, "loss": 0.4562, "step": 68750 }, { "epoch": 0.9910208552527277, "grad_norm": 1.6039243225273607, "learning_rate": 2.4637227514318073e-09, "loss": 0.4182, "step": 68760 }, { "epoch": 0.9911649827767609, "grad_norm": 1.7264365639647583, "learning_rate": 2.385396243881344e-09, "loss": 0.4248, "step": 68770 }, { "epoch": 0.9913091103007942, "grad_norm": 1.7364165635892874, "learning_rate": 2.308334708783222e-09, "loss": 0.4108, "step": 68780 }, { "epoch": 0.9914532378248274, "grad_norm": 2.0468368369798924, "learning_rate": 2.2325381656446155e-09, "loss": 0.4491, "step": 68790 }, { "epoch": 0.9915973653488607, "grad_norm": 1.6024660857868585, "learning_rate": 2.1580066336490677e-09, "loss": 0.4075, "step": 68800 }, { "epoch": 0.9917414928728939, "grad_norm": 1.5174079584937659, "learning_rate": 2.0847401316631545e-09, "loss": 0.4497, "step": 68810 }, { "epoch": 0.9918856203969272, "grad_norm": 1.887040995665082, "learning_rate": 2.012738678230375e-09, "loss": 0.432, "step": 68820 }, { "epoch": 0.9920297479209604, "grad_norm": 1.576632064209749, "learning_rate": 1.9420022915761507e-09, "loss": 0.4389, "step": 68830 }, { "epoch": 0.9921738754449937, "grad_norm": 1.7755960175527237, "learning_rate": 1.872530989605048e-09, "loss": 0.4417, "step": 68840 }, { "epoch": 0.992318002969027, "grad_norm": 1.7693708930906422, "learning_rate": 1.804324789901335e-09, "loss": 0.4473, "step": 68850 }, { "epoch": 0.9924621304930603, "grad_norm": 1.6903842961654676, "learning_rate": 1.737383709728424e-09, "loss": 0.4409, "step": 68860 }, { "epoch": 0.9926062580170936, "grad_norm": 1.623988009437292, "learning_rate": 1.6717077660305391e-09, "loss": 0.4167, "step": 68870 }, { "epoch": 0.9927503855411268, "grad_norm": 1.4922371120569224, "learning_rate": 1.6072969754310497e-09, "loss": 0.4398, "step": 68880 }, { "epoch": 0.9928945130651601, "grad_norm": 1.8308200223891886, "learning_rate": 1.5441513542335806e-09, "loss": 0.4281, "step": 68890 }, { "epoch": 0.9930386405891933, "grad_norm": 1.8449157428824918, "learning_rate": 1.4822709184209028e-09, "loss": 0.4476, "step": 68900 }, { "epoch": 0.9931827681132266, "grad_norm": 1.7217240168596908, "learning_rate": 1.4216556836565976e-09, "loss": 0.4281, "step": 68910 }, { "epoch": 0.9933268956372598, "grad_norm": 1.506377464551682, "learning_rate": 1.3623056652822818e-09, "loss": 0.4032, "step": 68920 }, { "epoch": 0.9934710231612931, "grad_norm": 1.8733614031513381, "learning_rate": 1.3042208783214939e-09, "loss": 0.4339, "step": 68930 }, { "epoch": 0.9936151506853264, "grad_norm": 1.688924939379104, "learning_rate": 1.2474013374758065e-09, "loss": 0.4241, "step": 68940 }, { "epoch": 0.9937592782093596, "grad_norm": 1.6682486622238402, "learning_rate": 1.191847057126494e-09, "loss": 0.434, "step": 68950 }, { "epoch": 0.9939034057333929, "grad_norm": 1.5566438457279994, "learning_rate": 1.137558051336196e-09, "loss": 0.4393, "step": 68960 }, { "epoch": 0.9940475332574261, "grad_norm": 1.4668418958432246, "learning_rate": 1.0845343338455883e-09, "loss": 0.4404, "step": 68970 }, { "epoch": 0.9941916607814595, "grad_norm": 1.6977149629401467, "learning_rate": 1.0327759180767117e-09, "loss": 0.4229, "step": 68980 }, { "epoch": 0.9943357883054927, "grad_norm": 1.7939806861496657, "learning_rate": 9.822828171290877e-10, "loss": 0.4344, "step": 68990 }, { "epoch": 0.994479915829526, "grad_norm": 1.7603885605834235, "learning_rate": 9.330550437847141e-10, "loss": 0.4325, "step": 69000 }, { "epoch": 0.9946240433535593, "grad_norm": 1.5212588428957803, "learning_rate": 8.850926105025137e-10, "loss": 0.437, "step": 69010 }, { "epoch": 0.9947681708775925, "grad_norm": 1.7332305002020891, "learning_rate": 8.383955294233304e-10, "loss": 0.4208, "step": 69020 }, { "epoch": 0.9949122984016258, "grad_norm": 1.5492141405604063, "learning_rate": 7.929638123665984e-10, "loss": 0.4435, "step": 69030 }, { "epoch": 0.995056425925659, "grad_norm": 1.7732287250876235, "learning_rate": 7.487974708325629e-10, "loss": 0.4057, "step": 69040 }, { "epoch": 0.9952005534496923, "grad_norm": 1.637423217585823, "learning_rate": 7.058965159989495e-10, "loss": 0.4406, "step": 69050 }, { "epoch": 0.9953446809737255, "grad_norm": 1.7848886557842707, "learning_rate": 6.642609587259596e-10, "loss": 0.4521, "step": 69060 }, { "epoch": 0.9954888084977588, "grad_norm": 1.6843496791244659, "learning_rate": 6.238908095512752e-10, "loss": 0.4542, "step": 69070 }, { "epoch": 0.995632936021792, "grad_norm": 1.5299200968743847, "learning_rate": 5.847860786939441e-10, "loss": 0.4264, "step": 69080 }, { "epoch": 0.9957770635458253, "grad_norm": 1.5652964553702877, "learning_rate": 5.469467760516045e-10, "loss": 0.4165, "step": 69090 }, { "epoch": 0.9959211910698587, "grad_norm": 1.7710203047035136, "learning_rate": 5.103729112015953e-10, "loss": 0.4451, "step": 69100 }, { "epoch": 0.9960653185938919, "grad_norm": 1.7430810785363542, "learning_rate": 4.750644934020665e-10, "loss": 0.4415, "step": 69110 }, { "epoch": 0.9962094461179252, "grad_norm": 1.5015722624907601, "learning_rate": 4.4102153158975815e-10, "loss": 0.4153, "step": 69120 }, { "epoch": 0.9963535736419584, "grad_norm": 1.61513543212553, "learning_rate": 4.0824403438111113e-10, "loss": 0.4385, "step": 69130 }, { "epoch": 0.9964977011659917, "grad_norm": 1.7257099178361945, "learning_rate": 3.7673201007337734e-10, "loss": 0.4426, "step": 69140 }, { "epoch": 0.9966418286900249, "grad_norm": 1.5933381567724794, "learning_rate": 3.464854666418438e-10, "loss": 0.401, "step": 69150 }, { "epoch": 0.9967859562140582, "grad_norm": 1.7166857502043025, "learning_rate": 3.175044117431636e-10, "loss": 0.421, "step": 69160 }, { "epoch": 0.9969300837380914, "grad_norm": 1.7569548515748805, "learning_rate": 2.897888527125803e-10, "loss": 0.4422, "step": 69170 }, { "epoch": 0.9970742112621247, "grad_norm": 1.6932551862554301, "learning_rate": 2.63338796565038e-10, "loss": 0.4194, "step": 69180 }, { "epoch": 0.997218338786158, "grad_norm": 1.5020110771536122, "learning_rate": 2.3815424999629186e-10, "loss": 0.4243, "step": 69190 }, { "epoch": 0.9973624663101912, "grad_norm": 1.5746089400909227, "learning_rate": 2.1423521937957714e-10, "loss": 0.3995, "step": 69200 }, { "epoch": 0.9975065938342246, "grad_norm": 1.8934085817179442, "learning_rate": 1.9158171077060528e-10, "loss": 0.4278, "step": 69210 }, { "epoch": 0.9976507213582578, "grad_norm": 1.752882473014528, "learning_rate": 1.7019372990201289e-10, "loss": 0.4224, "step": 69220 }, { "epoch": 0.9977948488822911, "grad_norm": 1.7203097417389726, "learning_rate": 1.5007128218835764e-10, "loss": 0.4157, "step": 69230 }, { "epoch": 0.9979389764063243, "grad_norm": 1.6897439129303073, "learning_rate": 1.312143727227877e-10, "loss": 0.4223, "step": 69240 }, { "epoch": 0.9980831039303576, "grad_norm": 1.8291203904060285, "learning_rate": 1.1362300627815182e-10, "loss": 0.4198, "step": 69250 }, { "epoch": 0.9982272314543909, "grad_norm": 1.5535257191028586, "learning_rate": 9.729718730699944e-11, "loss": 0.4137, "step": 69260 }, { "epoch": 0.9983713589784241, "grad_norm": 1.5561846725942405, "learning_rate": 8.223691994158068e-11, "loss": 0.4418, "step": 69270 }, { "epoch": 0.9985154865024574, "grad_norm": 1.6971617342103047, "learning_rate": 6.844220799440138e-11, "loss": 0.433, "step": 69280 }, { "epoch": 0.9986596140264906, "grad_norm": 1.7297460199596089, "learning_rate": 5.5913054956557854e-11, "loss": 0.434, "step": 69290 }, { "epoch": 0.9988037415505239, "grad_norm": 1.7198585300882803, "learning_rate": 4.464946399940218e-11, "loss": 0.4145, "step": 69300 }, { "epoch": 0.9989478690745571, "grad_norm": 1.5000888727742663, "learning_rate": 3.465143797398707e-11, "loss": 0.3947, "step": 69310 }, { "epoch": 0.9990919965985904, "grad_norm": 1.8439965971037204, "learning_rate": 2.5918979411065914e-11, "loss": 0.4355, "step": 69320 }, { "epoch": 0.9992361241226237, "grad_norm": 1.6428368730986571, "learning_rate": 1.8452090521092757e-11, "loss": 0.4439, "step": 69330 }, { "epoch": 0.999380251646657, "grad_norm": 1.742621567778271, "learning_rate": 1.2250773193667188e-11, "loss": 0.4085, "step": 69340 }, { "epoch": 0.9995243791706903, "grad_norm": 1.9121736275710284, "learning_rate": 7.315028999199669e-12, "loss": 0.4319, "step": 69350 }, { "epoch": 0.9996685066947235, "grad_norm": 1.536903413396275, "learning_rate": 3.644859186135996e-12, "loss": 0.4004, "step": 69360 }, { "epoch": 0.9998126342187568, "grad_norm": 1.515727014907349, "learning_rate": 1.2402646837328392e-12, "loss": 0.4216, "step": 69370 }, { "epoch": 0.99995676174279, "grad_norm": 1.632831832930296, "learning_rate": 1.012461003924159e-13, "loss": 0.4336, "step": 69380 }, { "epoch": 1.0, "step": 69383, "total_flos": 6127412891811840.0, "train_loss": 0.47471993926994643, "train_runtime": 207211.5693, "train_samples_per_second": 2.679, "train_steps_per_second": 0.335 } ], "logging_steps": 10, "max_steps": 69383, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 999999, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6127412891811840.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }