{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 52718, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00018968853143138965, "grad_norm": 3.791473956704384, "learning_rate": 1.7071320182094085e-08, "loss": 0.7858, "step": 10 }, { "epoch": 0.0003793770628627793, "grad_norm": 2.4975598427742574, "learning_rate": 3.603945371775418e-08, "loss": 0.818, "step": 20 }, { "epoch": 0.000569065594294169, "grad_norm": 2.793911347086056, "learning_rate": 5.500758725341427e-08, "loss": 0.8063, "step": 30 }, { "epoch": 0.0007587541257255586, "grad_norm": 3.071025828179853, "learning_rate": 7.397572078907435e-08, "loss": 0.8353, "step": 40 }, { "epoch": 0.0009484426571569483, "grad_norm": 3.8795884798805766, "learning_rate": 9.294385432473446e-08, "loss": 0.8204, "step": 50 }, { "epoch": 0.001138131188588338, "grad_norm": 3.28101798698911, "learning_rate": 1.1191198786039454e-07, "loss": 0.7982, "step": 60 }, { "epoch": 0.0013278197200197277, "grad_norm": 2.347259719154062, "learning_rate": 1.3088012139605464e-07, "loss": 0.7861, "step": 70 }, { "epoch": 0.0015175082514511172, "grad_norm": 2.257539772085425, "learning_rate": 1.4984825493171475e-07, "loss": 0.7328, "step": 80 }, { "epoch": 0.0017071967828825069, "grad_norm": 2.2987234941390913, "learning_rate": 1.688163884673748e-07, "loss": 0.8032, "step": 90 }, { "epoch": 0.0018968853143138966, "grad_norm": 2.4644256614305498, "learning_rate": 1.8778452200303492e-07, "loss": 0.8065, "step": 100 }, { "epoch": 0.0020865738457452863, "grad_norm": 2.39247141319451, "learning_rate": 2.0675265553869503e-07, "loss": 0.7583, "step": 110 }, { "epoch": 0.002276262377176676, "grad_norm": 2.3384144852855013, "learning_rate": 2.2572078907435508e-07, "loss": 0.7329, "step": 120 }, { "epoch": 0.0024659509086080657, "grad_norm": 2.0042020744912947, "learning_rate": 2.446889226100152e-07, "loss": 0.7584, "step": 130 }, { "epoch": 0.0026556394400394554, "grad_norm": 3.106115284605038, "learning_rate": 2.6365705614567525e-07, "loss": 0.7949, "step": 140 }, { "epoch": 0.0028453279714708447, "grad_norm": 2.020350735491634, "learning_rate": 2.8262518968133536e-07, "loss": 0.7004, "step": 150 }, { "epoch": 0.0030350165029022344, "grad_norm": 1.6695697192632857, "learning_rate": 3.0159332321699547e-07, "loss": 0.6977, "step": 160 }, { "epoch": 0.003224705034333624, "grad_norm": 3.0518626816851575, "learning_rate": 3.205614567526556e-07, "loss": 0.6854, "step": 170 }, { "epoch": 0.0034143935657650138, "grad_norm": 9.700022357333797, "learning_rate": 3.3952959028831563e-07, "loss": 0.6383, "step": 180 }, { "epoch": 0.0036040820971964035, "grad_norm": 1.8687854050789774, "learning_rate": 3.5849772382397574e-07, "loss": 0.7155, "step": 190 }, { "epoch": 0.003793770628627793, "grad_norm": 2.019421318542884, "learning_rate": 3.7746585735963585e-07, "loss": 0.6918, "step": 200 }, { "epoch": 0.003983459160059183, "grad_norm": 1.9908367905221558, "learning_rate": 3.964339908952959e-07, "loss": 0.6866, "step": 210 }, { "epoch": 0.004173147691490573, "grad_norm": 1.972849727767741, "learning_rate": 4.1540212443095607e-07, "loss": 0.6487, "step": 220 }, { "epoch": 0.004362836222921962, "grad_norm": 1.6918865390991027, "learning_rate": 4.3437025796661613e-07, "loss": 0.6584, "step": 230 }, { "epoch": 0.004552524754353352, "grad_norm": 1.6304617766986143, "learning_rate": 4.533383915022762e-07, "loss": 0.7027, "step": 240 }, { "epoch": 0.004742213285784742, "grad_norm": 1.7100588980120381, "learning_rate": 4.7230652503793635e-07, "loss": 0.6306, "step": 250 }, { "epoch": 0.004931901817216131, "grad_norm": 1.816246776189567, "learning_rate": 4.912746585735965e-07, "loss": 0.6321, "step": 260 }, { "epoch": 0.005121590348647521, "grad_norm": 1.855850535774215, "learning_rate": 5.102427921092565e-07, "loss": 0.62, "step": 270 }, { "epoch": 0.005311278880078911, "grad_norm": 1.6131757955196862, "learning_rate": 5.292109256449166e-07, "loss": 0.656, "step": 280 }, { "epoch": 0.0055009674115103005, "grad_norm": 2.050287985971541, "learning_rate": 5.481790591805767e-07, "loss": 0.6278, "step": 290 }, { "epoch": 0.005690655942941689, "grad_norm": 2.0799455476806035, "learning_rate": 5.671471927162368e-07, "loss": 0.6429, "step": 300 }, { "epoch": 0.005880344474373079, "grad_norm": 1.6531353379885525, "learning_rate": 5.861153262518968e-07, "loss": 0.6144, "step": 310 }, { "epoch": 0.006070033005804469, "grad_norm": 1.6921687643617416, "learning_rate": 6.05083459787557e-07, "loss": 0.6299, "step": 320 }, { "epoch": 0.0062597215372358584, "grad_norm": 1.8664720873863816, "learning_rate": 6.24051593323217e-07, "loss": 0.6225, "step": 330 }, { "epoch": 0.006449410068667248, "grad_norm": 1.9123920903035612, "learning_rate": 6.430197268588771e-07, "loss": 0.6072, "step": 340 }, { "epoch": 0.006639098600098638, "grad_norm": 1.66229735564831, "learning_rate": 6.619878603945372e-07, "loss": 0.6197, "step": 350 }, { "epoch": 0.0068287871315300275, "grad_norm": 1.4579727877371542, "learning_rate": 6.809559939301972e-07, "loss": 0.618, "step": 360 }, { "epoch": 0.007018475662961417, "grad_norm": 1.8011621762030345, "learning_rate": 6.999241274658575e-07, "loss": 0.6528, "step": 370 }, { "epoch": 0.007208164194392807, "grad_norm": 1.7723919493443605, "learning_rate": 7.188922610015176e-07, "loss": 0.6, "step": 380 }, { "epoch": 0.007397852725824197, "grad_norm": 1.7192143422465238, "learning_rate": 7.378603945371776e-07, "loss": 0.5892, "step": 390 }, { "epoch": 0.007587541257255586, "grad_norm": 1.8989850089017786, "learning_rate": 7.568285280728377e-07, "loss": 0.6412, "step": 400 }, { "epoch": 0.007777229788686976, "grad_norm": 1.516385966668898, "learning_rate": 7.757966616084978e-07, "loss": 0.6098, "step": 410 }, { "epoch": 0.007966918320118366, "grad_norm": 1.4980168306834598, "learning_rate": 7.947647951441578e-07, "loss": 0.6103, "step": 420 }, { "epoch": 0.008156606851549755, "grad_norm": 1.8833620506157993, "learning_rate": 8.13732928679818e-07, "loss": 0.6222, "step": 430 }, { "epoch": 0.008346295382981145, "grad_norm": 1.5040848423861612, "learning_rate": 8.327010622154781e-07, "loss": 0.5879, "step": 440 }, { "epoch": 0.008535983914412534, "grad_norm": 1.9076522376250369, "learning_rate": 8.516691957511382e-07, "loss": 0.6026, "step": 450 }, { "epoch": 0.008725672445843925, "grad_norm": 1.9902090037789353, "learning_rate": 8.706373292867982e-07, "loss": 0.593, "step": 460 }, { "epoch": 0.008915360977275313, "grad_norm": 1.8084897888579459, "learning_rate": 8.896054628224583e-07, "loss": 0.6069, "step": 470 }, { "epoch": 0.009105049508706704, "grad_norm": 1.5548702638288967, "learning_rate": 9.085735963581184e-07, "loss": 0.5872, "step": 480 }, { "epoch": 0.009294738040138093, "grad_norm": 1.69944458760748, "learning_rate": 9.275417298937785e-07, "loss": 0.5776, "step": 490 }, { "epoch": 0.009484426571569483, "grad_norm": 1.8639805144831643, "learning_rate": 9.465098634294387e-07, "loss": 0.6019, "step": 500 }, { "epoch": 0.009674115103000872, "grad_norm": 1.5928447570110509, "learning_rate": 9.654779969650987e-07, "loss": 0.6088, "step": 510 }, { "epoch": 0.009863803634432263, "grad_norm": 1.2010754642518502, "learning_rate": 9.844461305007588e-07, "loss": 0.551, "step": 520 }, { "epoch": 0.010053492165863652, "grad_norm": 1.7466324470372094, "learning_rate": 1.0034142640364189e-06, "loss": 0.5579, "step": 530 }, { "epoch": 0.010243180697295042, "grad_norm": 1.7206188114874807, "learning_rate": 1.022382397572079e-06, "loss": 0.6199, "step": 540 }, { "epoch": 0.010432869228726431, "grad_norm": 2.285044247502816, "learning_rate": 1.0413505311077391e-06, "loss": 0.6022, "step": 550 }, { "epoch": 0.010622557760157822, "grad_norm": 1.8545310609350356, "learning_rate": 1.060318664643399e-06, "loss": 0.607, "step": 560 }, { "epoch": 0.01081224629158921, "grad_norm": 1.8037532045907683, "learning_rate": 1.0792867981790593e-06, "loss": 0.5598, "step": 570 }, { "epoch": 0.011001934823020601, "grad_norm": 1.8059804491026195, "learning_rate": 1.0982549317147194e-06, "loss": 0.5853, "step": 580 }, { "epoch": 0.01119162335445199, "grad_norm": 1.6436579047184678, "learning_rate": 1.1172230652503795e-06, "loss": 0.602, "step": 590 }, { "epoch": 0.011381311885883379, "grad_norm": 2.204461525370445, "learning_rate": 1.1361911987860394e-06, "loss": 0.5833, "step": 600 }, { "epoch": 0.01157100041731477, "grad_norm": 1.3628173556967456, "learning_rate": 1.1551593323216996e-06, "loss": 0.543, "step": 610 }, { "epoch": 0.011760688948746158, "grad_norm": 1.6982483761154568, "learning_rate": 1.1741274658573597e-06, "loss": 0.5924, "step": 620 }, { "epoch": 0.011950377480177549, "grad_norm": 1.7312199019957861, "learning_rate": 1.19309559939302e-06, "loss": 0.5898, "step": 630 }, { "epoch": 0.012140066011608937, "grad_norm": 1.9046646807420844, "learning_rate": 1.2120637329286799e-06, "loss": 0.6016, "step": 640 }, { "epoch": 0.012329754543040328, "grad_norm": 1.9585721906545346, "learning_rate": 1.23103186646434e-06, "loss": 0.5532, "step": 650 }, { "epoch": 0.012519443074471717, "grad_norm": 1.6691653400161859, "learning_rate": 1.25e-06, "loss": 0.5667, "step": 660 }, { "epoch": 0.012709131605903107, "grad_norm": 2.114932422596338, "learning_rate": 1.2689681335356602e-06, "loss": 0.591, "step": 670 }, { "epoch": 0.012898820137334496, "grad_norm": 1.8595228869116993, "learning_rate": 1.2879362670713203e-06, "loss": 0.5691, "step": 680 }, { "epoch": 0.013088508668765887, "grad_norm": 1.5799982060136797, "learning_rate": 1.3069044006069802e-06, "loss": 0.5562, "step": 690 }, { "epoch": 0.013278197200197276, "grad_norm": 2.054300999842608, "learning_rate": 1.3258725341426403e-06, "loss": 0.5704, "step": 700 }, { "epoch": 0.013467885731628666, "grad_norm": 1.7026370837106817, "learning_rate": 1.3448406676783004e-06, "loss": 0.5571, "step": 710 }, { "epoch": 0.013657574263060055, "grad_norm": 1.3655431848858934, "learning_rate": 1.3638088012139605e-06, "loss": 0.535, "step": 720 }, { "epoch": 0.013847262794491446, "grad_norm": 1.8665810877522857, "learning_rate": 1.3827769347496209e-06, "loss": 0.588, "step": 730 }, { "epoch": 0.014036951325922834, "grad_norm": 1.7832308483006092, "learning_rate": 1.401745068285281e-06, "loss": 0.5773, "step": 740 }, { "epoch": 0.014226639857354225, "grad_norm": 2.0454504077505744, "learning_rate": 1.420713201820941e-06, "loss": 0.5623, "step": 750 }, { "epoch": 0.014416328388785614, "grad_norm": 1.9662126898051238, "learning_rate": 1.4396813353566012e-06, "loss": 0.595, "step": 760 }, { "epoch": 0.014606016920217004, "grad_norm": 1.4595619618890585, "learning_rate": 1.458649468892261e-06, "loss": 0.5586, "step": 770 }, { "epoch": 0.014795705451648393, "grad_norm": 1.8098038279255806, "learning_rate": 1.4776176024279212e-06, "loss": 0.5912, "step": 780 }, { "epoch": 0.014985393983079782, "grad_norm": 1.919638988591767, "learning_rate": 1.4965857359635813e-06, "loss": 0.5907, "step": 790 }, { "epoch": 0.015175082514511173, "grad_norm": 1.7037226006406623, "learning_rate": 1.5155538694992414e-06, "loss": 0.5857, "step": 800 }, { "epoch": 0.015364771045942562, "grad_norm": 1.8994553380579542, "learning_rate": 1.5345220030349015e-06, "loss": 0.567, "step": 810 }, { "epoch": 0.015554459577373952, "grad_norm": 1.8331375315236398, "learning_rate": 1.5534901365705614e-06, "loss": 0.5915, "step": 820 }, { "epoch": 0.015744148108805343, "grad_norm": 1.7745156452963555, "learning_rate": 1.5724582701062215e-06, "loss": 0.5774, "step": 830 }, { "epoch": 0.01593383664023673, "grad_norm": 2.0659239050048503, "learning_rate": 1.5914264036418817e-06, "loss": 0.5473, "step": 840 }, { "epoch": 0.01612352517166812, "grad_norm": 1.900889461035925, "learning_rate": 1.6103945371775418e-06, "loss": 0.5805, "step": 850 }, { "epoch": 0.01631321370309951, "grad_norm": 1.8098706231533759, "learning_rate": 1.629362670713202e-06, "loss": 0.5846, "step": 860 }, { "epoch": 0.0165029022345309, "grad_norm": 1.9273148492987757, "learning_rate": 1.6483308042488622e-06, "loss": 0.5678, "step": 870 }, { "epoch": 0.01669259076596229, "grad_norm": 1.937211192629668, "learning_rate": 1.6672989377845223e-06, "loss": 0.5641, "step": 880 }, { "epoch": 0.01688227929739368, "grad_norm": 2.10297048167858, "learning_rate": 1.6862670713201822e-06, "loss": 0.5563, "step": 890 }, { "epoch": 0.017071967828825068, "grad_norm": 1.9712231582281818, "learning_rate": 1.7052352048558423e-06, "loss": 0.5857, "step": 900 }, { "epoch": 0.01726165636025646, "grad_norm": 1.8242225686729077, "learning_rate": 1.7242033383915024e-06, "loss": 0.5747, "step": 910 }, { "epoch": 0.01745134489168785, "grad_norm": 2.1923102168348723, "learning_rate": 1.7431714719271625e-06, "loss": 0.5713, "step": 920 }, { "epoch": 0.017641033423119238, "grad_norm": 1.6948215700475573, "learning_rate": 1.7621396054628226e-06, "loss": 0.566, "step": 930 }, { "epoch": 0.017830721954550627, "grad_norm": 2.0221728180889462, "learning_rate": 1.7811077389984827e-06, "loss": 0.5877, "step": 940 }, { "epoch": 0.01802041048598202, "grad_norm": 1.8824263032395443, "learning_rate": 1.8000758725341426e-06, "loss": 0.5503, "step": 950 }, { "epoch": 0.018210099017413408, "grad_norm": 1.8220315247992431, "learning_rate": 1.8190440060698028e-06, "loss": 0.5703, "step": 960 }, { "epoch": 0.018399787548844797, "grad_norm": 1.7295328847045348, "learning_rate": 1.8380121396054629e-06, "loss": 0.5536, "step": 970 }, { "epoch": 0.018589476080276186, "grad_norm": 1.858333260888776, "learning_rate": 1.856980273141123e-06, "loss": 0.5649, "step": 980 }, { "epoch": 0.018779164611707574, "grad_norm": 1.7903843122842442, "learning_rate": 1.8759484066767833e-06, "loss": 0.5789, "step": 990 }, { "epoch": 0.018968853143138967, "grad_norm": 1.735095643049438, "learning_rate": 1.8949165402124434e-06, "loss": 0.5619, "step": 1000 }, { "epoch": 0.019158541674570356, "grad_norm": 1.7721372473039767, "learning_rate": 1.9138846737481035e-06, "loss": 0.5845, "step": 1010 }, { "epoch": 0.019348230206001744, "grad_norm": 1.8118912973676777, "learning_rate": 1.9328528072837634e-06, "loss": 0.5619, "step": 1020 }, { "epoch": 0.019537918737433133, "grad_norm": 1.8895623959632737, "learning_rate": 1.9518209408194237e-06, "loss": 0.5468, "step": 1030 }, { "epoch": 0.019727607268864526, "grad_norm": 1.9369464648477677, "learning_rate": 1.9707890743550836e-06, "loss": 0.5494, "step": 1040 }, { "epoch": 0.019917295800295914, "grad_norm": 1.7735214926852216, "learning_rate": 1.9897572078907435e-06, "loss": 0.5436, "step": 1050 }, { "epoch": 0.020106984331727303, "grad_norm": 1.525384877567519, "learning_rate": 2.008725341426404e-06, "loss": 0.5197, "step": 1060 }, { "epoch": 0.020296672863158692, "grad_norm": 2.1085683695402233, "learning_rate": 2.0276934749620638e-06, "loss": 0.5603, "step": 1070 }, { "epoch": 0.020486361394590084, "grad_norm": 1.979001508761242, "learning_rate": 2.046661608497724e-06, "loss": 0.555, "step": 1080 }, { "epoch": 0.020676049926021473, "grad_norm": 1.7450890584583412, "learning_rate": 2.065629742033384e-06, "loss": 0.5541, "step": 1090 }, { "epoch": 0.020865738457452862, "grad_norm": 1.7479102379076459, "learning_rate": 2.0845978755690443e-06, "loss": 0.5437, "step": 1100 }, { "epoch": 0.02105542698888425, "grad_norm": 2.1060127958150163, "learning_rate": 2.103566009104704e-06, "loss": 0.5635, "step": 1110 }, { "epoch": 0.021245115520315643, "grad_norm": 1.9235217520242374, "learning_rate": 2.122534142640364e-06, "loss": 0.5263, "step": 1120 }, { "epoch": 0.021434804051747032, "grad_norm": 1.5968108571997337, "learning_rate": 2.1415022761760244e-06, "loss": 0.5791, "step": 1130 }, { "epoch": 0.02162449258317842, "grad_norm": 1.747669362159445, "learning_rate": 2.1604704097116847e-06, "loss": 0.5296, "step": 1140 }, { "epoch": 0.02181418111460981, "grad_norm": 1.83856539586502, "learning_rate": 2.1794385432473446e-06, "loss": 0.5713, "step": 1150 }, { "epoch": 0.022003869646041202, "grad_norm": 1.677911057585291, "learning_rate": 2.198406676783005e-06, "loss": 0.5424, "step": 1160 }, { "epoch": 0.02219355817747259, "grad_norm": 2.001057460981959, "learning_rate": 2.217374810318665e-06, "loss": 0.5511, "step": 1170 }, { "epoch": 0.02238324670890398, "grad_norm": 1.9646008554518322, "learning_rate": 2.2363429438543247e-06, "loss": 0.5566, "step": 1180 }, { "epoch": 0.02257293524033537, "grad_norm": 1.6433735947793464, "learning_rate": 2.255311077389985e-06, "loss": 0.5429, "step": 1190 }, { "epoch": 0.022762623771766757, "grad_norm": 1.913835886204066, "learning_rate": 2.274279210925645e-06, "loss": 0.5632, "step": 1200 }, { "epoch": 0.02295231230319815, "grad_norm": 1.8952540086730558, "learning_rate": 2.2932473444613053e-06, "loss": 0.5591, "step": 1210 }, { "epoch": 0.02314200083462954, "grad_norm": 1.7636571361556452, "learning_rate": 2.312215477996965e-06, "loss": 0.5575, "step": 1220 }, { "epoch": 0.023331689366060927, "grad_norm": 1.896866660341218, "learning_rate": 2.331183611532625e-06, "loss": 0.5227, "step": 1230 }, { "epoch": 0.023521377897492316, "grad_norm": 1.5501481962445187, "learning_rate": 2.3501517450682854e-06, "loss": 0.5457, "step": 1240 }, { "epoch": 0.02371106642892371, "grad_norm": 1.9205782928911126, "learning_rate": 2.3691198786039453e-06, "loss": 0.5166, "step": 1250 }, { "epoch": 0.023900754960355097, "grad_norm": 1.7183776297546145, "learning_rate": 2.3880880121396056e-06, "loss": 0.5426, "step": 1260 }, { "epoch": 0.024090443491786486, "grad_norm": 1.7625232005002158, "learning_rate": 2.407056145675266e-06, "loss": 0.5503, "step": 1270 }, { "epoch": 0.024280132023217875, "grad_norm": 2.086352380503355, "learning_rate": 2.426024279210926e-06, "loss": 0.5572, "step": 1280 }, { "epoch": 0.024469820554649267, "grad_norm": 1.6346963426605774, "learning_rate": 2.444992412746586e-06, "loss": 0.5295, "step": 1290 }, { "epoch": 0.024659509086080656, "grad_norm": 1.7465315284374276, "learning_rate": 2.463960546282246e-06, "loss": 0.528, "step": 1300 }, { "epoch": 0.024849197617512045, "grad_norm": 1.744318647493091, "learning_rate": 2.482928679817906e-06, "loss": 0.5549, "step": 1310 }, { "epoch": 0.025038886148943434, "grad_norm": 1.8036599519494523, "learning_rate": 2.5018968133535663e-06, "loss": 0.5622, "step": 1320 }, { "epoch": 0.025228574680374826, "grad_norm": 1.6166590950098119, "learning_rate": 2.5208649468892266e-06, "loss": 0.5478, "step": 1330 }, { "epoch": 0.025418263211806215, "grad_norm": 2.3209937273982537, "learning_rate": 2.5398330804248865e-06, "loss": 0.565, "step": 1340 }, { "epoch": 0.025607951743237604, "grad_norm": 2.1419638073692244, "learning_rate": 2.558801213960547e-06, "loss": 0.5528, "step": 1350 }, { "epoch": 0.025797640274668993, "grad_norm": 1.5945880874631546, "learning_rate": 2.5777693474962067e-06, "loss": 0.5394, "step": 1360 }, { "epoch": 0.025987328806100385, "grad_norm": 2.189505616568391, "learning_rate": 2.596737481031867e-06, "loss": 0.5455, "step": 1370 }, { "epoch": 0.026177017337531774, "grad_norm": 2.245733766391652, "learning_rate": 2.615705614567527e-06, "loss": 0.5355, "step": 1380 }, { "epoch": 0.026366705868963163, "grad_norm": 1.8327015960840927, "learning_rate": 2.634673748103187e-06, "loss": 0.5413, "step": 1390 }, { "epoch": 0.02655639440039455, "grad_norm": 2.182535479731654, "learning_rate": 2.653641881638847e-06, "loss": 0.5665, "step": 1400 }, { "epoch": 0.02674608293182594, "grad_norm": 1.9096875827573143, "learning_rate": 2.672610015174507e-06, "loss": 0.5288, "step": 1410 }, { "epoch": 0.026935771463257333, "grad_norm": 1.8388130273273893, "learning_rate": 2.6915781487101674e-06, "loss": 0.535, "step": 1420 }, { "epoch": 0.02712545999468872, "grad_norm": 1.6667145077745231, "learning_rate": 2.7105462822458273e-06, "loss": 0.5173, "step": 1430 }, { "epoch": 0.02731514852612011, "grad_norm": 1.748121801147781, "learning_rate": 2.729514415781487e-06, "loss": 0.5522, "step": 1440 }, { "epoch": 0.0275048370575515, "grad_norm": 2.283570865375367, "learning_rate": 2.7484825493171475e-06, "loss": 0.5461, "step": 1450 }, { "epoch": 0.02769452558898289, "grad_norm": 1.8363790315315196, "learning_rate": 2.7674506828528074e-06, "loss": 0.5112, "step": 1460 }, { "epoch": 0.02788421412041428, "grad_norm": 1.650537714157058, "learning_rate": 2.7864188163884677e-06, "loss": 0.5218, "step": 1470 }, { "epoch": 0.02807390265184567, "grad_norm": 1.8211726896375189, "learning_rate": 2.8053869499241276e-06, "loss": 0.5332, "step": 1480 }, { "epoch": 0.028263591183277058, "grad_norm": 1.629672432771682, "learning_rate": 2.8243550834597875e-06, "loss": 0.5355, "step": 1490 }, { "epoch": 0.02845327971470845, "grad_norm": 1.9978400353759973, "learning_rate": 2.843323216995448e-06, "loss": 0.5619, "step": 1500 }, { "epoch": 0.02864296824613984, "grad_norm": 2.0285080275585283, "learning_rate": 2.8622913505311077e-06, "loss": 0.5275, "step": 1510 }, { "epoch": 0.028832656777571228, "grad_norm": 1.9157684381840174, "learning_rate": 2.881259484066768e-06, "loss": 0.5683, "step": 1520 }, { "epoch": 0.029022345309002617, "grad_norm": 1.8360904904344924, "learning_rate": 2.900227617602428e-06, "loss": 0.5128, "step": 1530 }, { "epoch": 0.02921203384043401, "grad_norm": 1.6840056966883346, "learning_rate": 2.9191957511380883e-06, "loss": 0.5339, "step": 1540 }, { "epoch": 0.029401722371865398, "grad_norm": 1.8523786581013, "learning_rate": 2.938163884673748e-06, "loss": 0.541, "step": 1550 }, { "epoch": 0.029591410903296787, "grad_norm": 1.975986118507894, "learning_rate": 2.957132018209408e-06, "loss": 0.551, "step": 1560 }, { "epoch": 0.029781099434728175, "grad_norm": 1.598225494460456, "learning_rate": 2.9761001517450684e-06, "loss": 0.541, "step": 1570 }, { "epoch": 0.029970787966159564, "grad_norm": 1.91909369080613, "learning_rate": 2.9950682852807283e-06, "loss": 0.5313, "step": 1580 }, { "epoch": 0.030160476497590957, "grad_norm": 1.9122148655653084, "learning_rate": 3.014036418816389e-06, "loss": 0.5353, "step": 1590 }, { "epoch": 0.030350165029022345, "grad_norm": 1.8453503761255399, "learning_rate": 3.033004552352049e-06, "loss": 0.5239, "step": 1600 }, { "epoch": 0.030539853560453734, "grad_norm": 1.85954461989685, "learning_rate": 3.0519726858877092e-06, "loss": 0.544, "step": 1610 }, { "epoch": 0.030729542091885123, "grad_norm": 1.5746799377764205, "learning_rate": 3.070940819423369e-06, "loss": 0.5524, "step": 1620 }, { "epoch": 0.030919230623316515, "grad_norm": 1.890690693180639, "learning_rate": 3.0899089529590295e-06, "loss": 0.5224, "step": 1630 }, { "epoch": 0.031108919154747904, "grad_norm": 1.5881881680777024, "learning_rate": 3.1088770864946894e-06, "loss": 0.5412, "step": 1640 }, { "epoch": 0.03129860768617929, "grad_norm": 1.7449676041847455, "learning_rate": 3.1278452200303493e-06, "loss": 0.5356, "step": 1650 }, { "epoch": 0.031488296217610685, "grad_norm": 1.870161854795933, "learning_rate": 3.1468133535660096e-06, "loss": 0.5417, "step": 1660 }, { "epoch": 0.03167798474904207, "grad_norm": 1.8624880823713046, "learning_rate": 3.1657814871016695e-06, "loss": 0.5174, "step": 1670 }, { "epoch": 0.03186767328047346, "grad_norm": 1.924576434911169, "learning_rate": 3.18474962063733e-06, "loss": 0.5248, "step": 1680 }, { "epoch": 0.032057361811904855, "grad_norm": 1.5834463923779913, "learning_rate": 3.2037177541729897e-06, "loss": 0.5556, "step": 1690 }, { "epoch": 0.03224705034333624, "grad_norm": 1.7197539759152733, "learning_rate": 3.2226858877086496e-06, "loss": 0.5423, "step": 1700 }, { "epoch": 0.03243673887476763, "grad_norm": 1.7245821779748662, "learning_rate": 3.24165402124431e-06, "loss": 0.5222, "step": 1710 }, { "epoch": 0.03262642740619902, "grad_norm": 1.499380119861583, "learning_rate": 3.26062215477997e-06, "loss": 0.5423, "step": 1720 }, { "epoch": 0.03281611593763041, "grad_norm": 1.6287646768374344, "learning_rate": 3.27959028831563e-06, "loss": 0.5328, "step": 1730 }, { "epoch": 0.0330058044690618, "grad_norm": 1.715482398854966, "learning_rate": 3.29855842185129e-06, "loss": 0.5405, "step": 1740 }, { "epoch": 0.03319549300049319, "grad_norm": 1.8448072116650935, "learning_rate": 3.31752655538695e-06, "loss": 0.5607, "step": 1750 }, { "epoch": 0.03338518153192458, "grad_norm": 1.5512763832807486, "learning_rate": 3.3364946889226103e-06, "loss": 0.5109, "step": 1760 }, { "epoch": 0.03357487006335597, "grad_norm": 1.6284107759814537, "learning_rate": 3.35546282245827e-06, "loss": 0.5406, "step": 1770 }, { "epoch": 0.03376455859478736, "grad_norm": 6.310659143608582, "learning_rate": 3.3744309559939305e-06, "loss": 0.5527, "step": 1780 }, { "epoch": 0.03395424712621875, "grad_norm": 2.032327837723218, "learning_rate": 3.3933990895295904e-06, "loss": 0.5329, "step": 1790 }, { "epoch": 0.034143935657650136, "grad_norm": 3.4774075743793498, "learning_rate": 3.4123672230652503e-06, "loss": 0.5318, "step": 1800 }, { "epoch": 0.03433362418908153, "grad_norm": 2.1536296853057686, "learning_rate": 3.4313353566009106e-06, "loss": 0.5252, "step": 1810 }, { "epoch": 0.03452331272051292, "grad_norm": 1.7351831263030448, "learning_rate": 3.4503034901365705e-06, "loss": 0.5186, "step": 1820 }, { "epoch": 0.034713001251944306, "grad_norm": 2.284645178638098, "learning_rate": 3.469271623672231e-06, "loss": 0.5511, "step": 1830 }, { "epoch": 0.0349026897833757, "grad_norm": 1.569681093253331, "learning_rate": 3.4882397572078907e-06, "loss": 0.531, "step": 1840 }, { "epoch": 0.035092378314807084, "grad_norm": 1.8299633940754034, "learning_rate": 3.5072078907435515e-06, "loss": 0.5217, "step": 1850 }, { "epoch": 0.035282066846238476, "grad_norm": 1.8089859433607942, "learning_rate": 3.5261760242792114e-06, "loss": 0.5457, "step": 1860 }, { "epoch": 0.03547175537766987, "grad_norm": 1.713422176390333, "learning_rate": 3.5451441578148717e-06, "loss": 0.5243, "step": 1870 }, { "epoch": 0.035661443909101254, "grad_norm": 1.8633029349519452, "learning_rate": 3.5641122913505316e-06, "loss": 0.5371, "step": 1880 }, { "epoch": 0.035851132440532646, "grad_norm": 1.6968742143786002, "learning_rate": 3.5830804248861915e-06, "loss": 0.5389, "step": 1890 }, { "epoch": 0.03604082097196404, "grad_norm": 2.1598044256757443, "learning_rate": 3.602048558421852e-06, "loss": 0.5532, "step": 1900 }, { "epoch": 0.036230509503395424, "grad_norm": 1.7843108779968904, "learning_rate": 3.6210166919575117e-06, "loss": 0.5327, "step": 1910 }, { "epoch": 0.036420198034826816, "grad_norm": 1.7233829187300977, "learning_rate": 3.639984825493172e-06, "loss": 0.5419, "step": 1920 }, { "epoch": 0.0366098865662582, "grad_norm": 1.7677012216900871, "learning_rate": 3.658952959028832e-06, "loss": 0.5353, "step": 1930 }, { "epoch": 0.036799575097689594, "grad_norm": 1.776753040575546, "learning_rate": 3.6779210925644922e-06, "loss": 0.5059, "step": 1940 }, { "epoch": 0.036989263629120986, "grad_norm": 1.6938943971420617, "learning_rate": 3.696889226100152e-06, "loss": 0.5345, "step": 1950 }, { "epoch": 0.03717895216055237, "grad_norm": 1.7377467662748056, "learning_rate": 3.715857359635812e-06, "loss": 0.5683, "step": 1960 }, { "epoch": 0.037368640691983764, "grad_norm": 1.903352498743982, "learning_rate": 3.7348254931714723e-06, "loss": 0.5321, "step": 1970 }, { "epoch": 0.03755832922341515, "grad_norm": 2.0013754342383967, "learning_rate": 3.7537936267071322e-06, "loss": 0.521, "step": 1980 }, { "epoch": 0.03774801775484654, "grad_norm": 1.4444983260380362, "learning_rate": 3.7727617602427926e-06, "loss": 0.5051, "step": 1990 }, { "epoch": 0.037937706286277934, "grad_norm": 1.9780449867382695, "learning_rate": 3.7917298937784525e-06, "loss": 0.5414, "step": 2000 }, { "epoch": 0.03812739481770932, "grad_norm": 1.9130843092432692, "learning_rate": 3.8106980273141124e-06, "loss": 0.5187, "step": 2010 }, { "epoch": 0.03831708334914071, "grad_norm": 3.0414453469744784, "learning_rate": 3.829666160849772e-06, "loss": 0.5407, "step": 2020 }, { "epoch": 0.038506771880572103, "grad_norm": 1.6458958341343932, "learning_rate": 3.848634294385433e-06, "loss": 0.5386, "step": 2030 }, { "epoch": 0.03869646041200349, "grad_norm": 1.761353124313499, "learning_rate": 3.867602427921093e-06, "loss": 0.5402, "step": 2040 }, { "epoch": 0.03888614894343488, "grad_norm": 1.9812202299196597, "learning_rate": 3.886570561456753e-06, "loss": 0.5316, "step": 2050 }, { "epoch": 0.039075837474866267, "grad_norm": 2.217170507514349, "learning_rate": 3.905538694992413e-06, "loss": 0.4918, "step": 2060 }, { "epoch": 0.03926552600629766, "grad_norm": 1.8530867601879877, "learning_rate": 3.924506828528073e-06, "loss": 0.5329, "step": 2070 }, { "epoch": 0.03945521453772905, "grad_norm": 1.7497944880699692, "learning_rate": 3.943474962063733e-06, "loss": 0.5296, "step": 2080 }, { "epoch": 0.039644903069160436, "grad_norm": 1.9247793035955192, "learning_rate": 3.962443095599393e-06, "loss": 0.5297, "step": 2090 }, { "epoch": 0.03983459160059183, "grad_norm": 1.7399832549472969, "learning_rate": 3.981411229135053e-06, "loss": 0.5365, "step": 2100 }, { "epoch": 0.04002428013202322, "grad_norm": 1.7514715069315585, "learning_rate": 4.000379362670714e-06, "loss": 0.4923, "step": 2110 }, { "epoch": 0.040213968663454606, "grad_norm": 1.9919634904317571, "learning_rate": 4.019347496206374e-06, "loss": 0.5491, "step": 2120 }, { "epoch": 0.040403657194886, "grad_norm": 1.6510281118938166, "learning_rate": 4.038315629742034e-06, "loss": 0.5341, "step": 2130 }, { "epoch": 0.040593345726317384, "grad_norm": 1.6567139691821973, "learning_rate": 4.057283763277694e-06, "loss": 0.5378, "step": 2140 }, { "epoch": 0.040783034257748776, "grad_norm": 1.8860227116916437, "learning_rate": 4.076251896813354e-06, "loss": 0.5202, "step": 2150 }, { "epoch": 0.04097272278918017, "grad_norm": 1.5000433046667172, "learning_rate": 4.095220030349014e-06, "loss": 0.5199, "step": 2160 }, { "epoch": 0.041162411320611554, "grad_norm": 2.0945548791125677, "learning_rate": 4.114188163884674e-06, "loss": 0.5261, "step": 2170 }, { "epoch": 0.041352099852042946, "grad_norm": 1.5663273616762419, "learning_rate": 4.133156297420334e-06, "loss": 0.5308, "step": 2180 }, { "epoch": 0.04154178838347433, "grad_norm": 1.6440264408459173, "learning_rate": 4.152124430955995e-06, "loss": 0.5153, "step": 2190 }, { "epoch": 0.041731476914905724, "grad_norm": 1.7976178995915593, "learning_rate": 4.171092564491655e-06, "loss": 0.5431, "step": 2200 }, { "epoch": 0.041921165446337116, "grad_norm": 1.755603763528081, "learning_rate": 4.1900606980273146e-06, "loss": 0.5309, "step": 2210 }, { "epoch": 0.0421108539777685, "grad_norm": 1.7633902969284994, "learning_rate": 4.2090288315629745e-06, "loss": 0.521, "step": 2220 }, { "epoch": 0.042300542509199894, "grad_norm": 1.9069116840898541, "learning_rate": 4.227996965098634e-06, "loss": 0.5214, "step": 2230 }, { "epoch": 0.042490231040631286, "grad_norm": 1.5287401216590146, "learning_rate": 4.246965098634295e-06, "loss": 0.5056, "step": 2240 }, { "epoch": 0.04267991957206267, "grad_norm": 1.7946959649540402, "learning_rate": 4.265933232169955e-06, "loss": 0.525, "step": 2250 }, { "epoch": 0.042869608103494064, "grad_norm": 1.6459806944643776, "learning_rate": 4.284901365705615e-06, "loss": 0.5392, "step": 2260 }, { "epoch": 0.04305929663492545, "grad_norm": 2.280857309347471, "learning_rate": 4.303869499241275e-06, "loss": 0.5657, "step": 2270 }, { "epoch": 0.04324898516635684, "grad_norm": 1.6669997698016392, "learning_rate": 4.322837632776935e-06, "loss": 0.5338, "step": 2280 }, { "epoch": 0.043438673697788234, "grad_norm": 1.8124963645343206, "learning_rate": 4.3418057663125954e-06, "loss": 0.5343, "step": 2290 }, { "epoch": 0.04362836222921962, "grad_norm": 1.8891789097642393, "learning_rate": 4.360773899848255e-06, "loss": 0.5124, "step": 2300 }, { "epoch": 0.04381805076065101, "grad_norm": 1.8822673566036001, "learning_rate": 4.379742033383915e-06, "loss": 0.5493, "step": 2310 }, { "epoch": 0.044007739292082404, "grad_norm": 1.8077732570775413, "learning_rate": 4.398710166919575e-06, "loss": 0.5093, "step": 2320 }, { "epoch": 0.04419742782351379, "grad_norm": 1.435128798758343, "learning_rate": 4.417678300455235e-06, "loss": 0.5221, "step": 2330 }, { "epoch": 0.04438711635494518, "grad_norm": 1.6384920457624232, "learning_rate": 4.436646433990896e-06, "loss": 0.5355, "step": 2340 }, { "epoch": 0.04457680488637657, "grad_norm": 1.8388305478327025, "learning_rate": 4.455614567526556e-06, "loss": 0.5201, "step": 2350 }, { "epoch": 0.04476649341780796, "grad_norm": 1.7116752305356528, "learning_rate": 4.4745827010622156e-06, "loss": 0.5262, "step": 2360 }, { "epoch": 0.04495618194923935, "grad_norm": 1.7768454650747083, "learning_rate": 4.4935508345978755e-06, "loss": 0.5524, "step": 2370 }, { "epoch": 0.04514587048067074, "grad_norm": 1.4688511917276552, "learning_rate": 4.512518968133536e-06, "loss": 0.5401, "step": 2380 }, { "epoch": 0.04533555901210213, "grad_norm": 1.780511597092053, "learning_rate": 4.531487101669196e-06, "loss": 0.5316, "step": 2390 }, { "epoch": 0.045525247543533515, "grad_norm": 1.7135489863862183, "learning_rate": 4.550455235204857e-06, "loss": 0.5199, "step": 2400 }, { "epoch": 0.04571493607496491, "grad_norm": 1.8778375894074775, "learning_rate": 4.569423368740517e-06, "loss": 0.5081, "step": 2410 }, { "epoch": 0.0459046246063963, "grad_norm": 1.8184171423527011, "learning_rate": 4.588391502276177e-06, "loss": 0.5216, "step": 2420 }, { "epoch": 0.046094313137827685, "grad_norm": 1.7235220159758853, "learning_rate": 4.6073596358118365e-06, "loss": 0.5401, "step": 2430 }, { "epoch": 0.04628400166925908, "grad_norm": 1.6622818854410404, "learning_rate": 4.6263277693474964e-06, "loss": 0.54, "step": 2440 }, { "epoch": 0.04647369020069047, "grad_norm": 1.7192506171011046, "learning_rate": 4.645295902883157e-06, "loss": 0.5302, "step": 2450 }, { "epoch": 0.046663378732121855, "grad_norm": 1.7925893483537665, "learning_rate": 4.664264036418817e-06, "loss": 0.5756, "step": 2460 }, { "epoch": 0.04685306726355325, "grad_norm": 1.906856364471289, "learning_rate": 4.683232169954477e-06, "loss": 0.5516, "step": 2470 }, { "epoch": 0.04704275579498463, "grad_norm": 1.5520118994202579, "learning_rate": 4.702200303490137e-06, "loss": 0.5231, "step": 2480 }, { "epoch": 0.047232444326416025, "grad_norm": 2.023479687932391, "learning_rate": 4.721168437025797e-06, "loss": 0.5168, "step": 2490 }, { "epoch": 0.04742213285784742, "grad_norm": 2.122203402414034, "learning_rate": 4.7401365705614575e-06, "loss": 0.5545, "step": 2500 }, { "epoch": 0.0476118213892788, "grad_norm": 1.6308249285389684, "learning_rate": 4.759104704097117e-06, "loss": 0.5307, "step": 2510 }, { "epoch": 0.047801509920710195, "grad_norm": 1.6795965333582155, "learning_rate": 4.778072837632777e-06, "loss": 0.5358, "step": 2520 }, { "epoch": 0.04799119845214159, "grad_norm": 1.5467555376639337, "learning_rate": 4.797040971168437e-06, "loss": 0.5288, "step": 2530 }, { "epoch": 0.04818088698357297, "grad_norm": 1.5405362657134662, "learning_rate": 4.816009104704097e-06, "loss": 0.5317, "step": 2540 }, { "epoch": 0.048370575515004365, "grad_norm": 1.739959579640685, "learning_rate": 4.834977238239758e-06, "loss": 0.5279, "step": 2550 }, { "epoch": 0.04856026404643575, "grad_norm": 2.0113862567678864, "learning_rate": 4.853945371775418e-06, "loss": 0.52, "step": 2560 }, { "epoch": 0.04874995257786714, "grad_norm": 2.077057268359139, "learning_rate": 4.872913505311078e-06, "loss": 0.4994, "step": 2570 }, { "epoch": 0.048939641109298535, "grad_norm": 1.5981600945780574, "learning_rate": 4.8918816388467376e-06, "loss": 0.528, "step": 2580 }, { "epoch": 0.04912932964072992, "grad_norm": 1.5148190039959295, "learning_rate": 4.9108497723823974e-06, "loss": 0.5216, "step": 2590 }, { "epoch": 0.04931901817216131, "grad_norm": 1.8000584764784378, "learning_rate": 4.929817905918058e-06, "loss": 0.5209, "step": 2600 }, { "epoch": 0.0495087067035927, "grad_norm": 1.5337016926537326, "learning_rate": 4.948786039453718e-06, "loss": 0.4912, "step": 2610 }, { "epoch": 0.04969839523502409, "grad_norm": 1.8081917580199791, "learning_rate": 4.967754172989378e-06, "loss": 0.5216, "step": 2620 }, { "epoch": 0.04988808376645548, "grad_norm": 1.569241095721757, "learning_rate": 4.986722306525038e-06, "loss": 0.5267, "step": 2630 }, { "epoch": 0.05007777229788687, "grad_norm": 1.6416421384492061, "learning_rate": 5.005690440060699e-06, "loss": 0.5165, "step": 2640 }, { "epoch": 0.05026746082931826, "grad_norm": 1.7640545682355695, "learning_rate": 5.0246585735963585e-06, "loss": 0.5367, "step": 2650 }, { "epoch": 0.05045714936074965, "grad_norm": 1.6709132902122366, "learning_rate": 5.0436267071320184e-06, "loss": 0.5171, "step": 2660 }, { "epoch": 0.05064683789218104, "grad_norm": 2.2361461740797526, "learning_rate": 5.062594840667678e-06, "loss": 0.5325, "step": 2670 }, { "epoch": 0.05083652642361243, "grad_norm": 1.6609954234331445, "learning_rate": 5.081562974203339e-06, "loss": 0.5184, "step": 2680 }, { "epoch": 0.051026214955043815, "grad_norm": 1.604377235796367, "learning_rate": 5.100531107738998e-06, "loss": 0.5696, "step": 2690 }, { "epoch": 0.05121590348647521, "grad_norm": 1.7416539590452913, "learning_rate": 5.119499241274659e-06, "loss": 0.5273, "step": 2700 }, { "epoch": 0.0514055920179066, "grad_norm": 1.7081473188553637, "learning_rate": 5.138467374810319e-06, "loss": 0.5386, "step": 2710 }, { "epoch": 0.051595280549337985, "grad_norm": 1.7810361440589504, "learning_rate": 5.1574355083459795e-06, "loss": 0.5464, "step": 2720 }, { "epoch": 0.05178496908076938, "grad_norm": 1.575670392873096, "learning_rate": 5.1764036418816386e-06, "loss": 0.5319, "step": 2730 }, { "epoch": 0.05197465761220077, "grad_norm": 1.9890752144718618, "learning_rate": 5.195371775417299e-06, "loss": 0.5385, "step": 2740 }, { "epoch": 0.052164346143632155, "grad_norm": 1.5099405009884792, "learning_rate": 5.214339908952959e-06, "loss": 0.508, "step": 2750 }, { "epoch": 0.05235403467506355, "grad_norm": 1.7671182205731615, "learning_rate": 5.23330804248862e-06, "loss": 0.5152, "step": 2760 }, { "epoch": 0.05254372320649493, "grad_norm": 2.6053621222610928, "learning_rate": 5.25227617602428e-06, "loss": 0.5107, "step": 2770 }, { "epoch": 0.052733411737926325, "grad_norm": 1.766438334683734, "learning_rate": 5.27124430955994e-06, "loss": 0.5549, "step": 2780 }, { "epoch": 0.05292310026935772, "grad_norm": 1.8330737094311205, "learning_rate": 5.2902124430956005e-06, "loss": 0.5155, "step": 2790 }, { "epoch": 0.0531127888007891, "grad_norm": 1.8604213683299282, "learning_rate": 5.3091805766312595e-06, "loss": 0.5439, "step": 2800 }, { "epoch": 0.053302477332220495, "grad_norm": 2.1052699972801068, "learning_rate": 5.32814871016692e-06, "loss": 0.5447, "step": 2810 }, { "epoch": 0.05349216586365188, "grad_norm": 2.0282164430291547, "learning_rate": 5.34711684370258e-06, "loss": 0.4911, "step": 2820 }, { "epoch": 0.05368185439508327, "grad_norm": 1.567331179827999, "learning_rate": 5.366084977238241e-06, "loss": 0.5255, "step": 2830 }, { "epoch": 0.053871542926514665, "grad_norm": 1.6323566367612492, "learning_rate": 5.3850531107739e-06, "loss": 0.5395, "step": 2840 }, { "epoch": 0.05406123145794605, "grad_norm": 1.7740621464160256, "learning_rate": 5.404021244309561e-06, "loss": 0.5247, "step": 2850 }, { "epoch": 0.05425091998937744, "grad_norm": 1.9236342190826616, "learning_rate": 5.422989377845221e-06, "loss": 0.5483, "step": 2860 }, { "epoch": 0.054440608520808835, "grad_norm": 1.9620809053974655, "learning_rate": 5.4419575113808805e-06, "loss": 0.5312, "step": 2870 }, { "epoch": 0.05463029705224022, "grad_norm": 1.9317658682836438, "learning_rate": 5.46092564491654e-06, "loss": 0.5057, "step": 2880 }, { "epoch": 0.05481998558367161, "grad_norm": 1.730095488975269, "learning_rate": 5.479893778452201e-06, "loss": 0.5308, "step": 2890 }, { "epoch": 0.055009674115103, "grad_norm": 1.7249818943877582, "learning_rate": 5.49886191198786e-06, "loss": 0.5068, "step": 2900 }, { "epoch": 0.05519936264653439, "grad_norm": 1.6564714950956148, "learning_rate": 5.517830045523521e-06, "loss": 0.5079, "step": 2910 }, { "epoch": 0.05538905117796578, "grad_norm": 1.7751408859875253, "learning_rate": 5.536798179059181e-06, "loss": 0.4983, "step": 2920 }, { "epoch": 0.05557873970939717, "grad_norm": 1.2412746698984132, "learning_rate": 5.555766312594842e-06, "loss": 0.5309, "step": 2930 }, { "epoch": 0.05576842824082856, "grad_norm": 1.6452218522303867, "learning_rate": 5.574734446130501e-06, "loss": 0.5197, "step": 2940 }, { "epoch": 0.05595811677225995, "grad_norm": 1.7565554553153633, "learning_rate": 5.593702579666161e-06, "loss": 0.5265, "step": 2950 }, { "epoch": 0.05614780530369134, "grad_norm": 1.7264566550732638, "learning_rate": 5.612670713201821e-06, "loss": 0.4834, "step": 2960 }, { "epoch": 0.05633749383512273, "grad_norm": 1.6193681180740045, "learning_rate": 5.631638846737482e-06, "loss": 0.5278, "step": 2970 }, { "epoch": 0.056527182366554116, "grad_norm": 1.7832130024500148, "learning_rate": 5.650606980273141e-06, "loss": 0.5282, "step": 2980 }, { "epoch": 0.05671687089798551, "grad_norm": 2.125528805047113, "learning_rate": 5.669575113808802e-06, "loss": 0.5508, "step": 2990 }, { "epoch": 0.0569065594294169, "grad_norm": 1.7189343870019864, "learning_rate": 5.688543247344461e-06, "loss": 0.544, "step": 3000 }, { "epoch": 0.057096247960848286, "grad_norm": 1.4127676478874107, "learning_rate": 5.707511380880122e-06, "loss": 0.5086, "step": 3010 }, { "epoch": 0.05728593649227968, "grad_norm": 1.751855889719644, "learning_rate": 5.7264795144157815e-06, "loss": 0.5252, "step": 3020 }, { "epoch": 0.05747562502371106, "grad_norm": 2.099851016962707, "learning_rate": 5.745447647951442e-06, "loss": 0.5212, "step": 3030 }, { "epoch": 0.057665313555142456, "grad_norm": 2.154462684854332, "learning_rate": 5.764415781487103e-06, "loss": 0.5616, "step": 3040 }, { "epoch": 0.05785500208657385, "grad_norm": 1.9703885261104115, "learning_rate": 5.783383915022762e-06, "loss": 0.5341, "step": 3050 }, { "epoch": 0.05804469061800523, "grad_norm": 1.6486578834171874, "learning_rate": 5.802352048558423e-06, "loss": 0.5241, "step": 3060 }, { "epoch": 0.058234379149436626, "grad_norm": 1.7467611120307944, "learning_rate": 5.821320182094083e-06, "loss": 0.5026, "step": 3070 }, { "epoch": 0.05842406768086802, "grad_norm": 1.6779834801378963, "learning_rate": 5.840288315629743e-06, "loss": 0.5215, "step": 3080 }, { "epoch": 0.0586137562122994, "grad_norm": 2.4752112323579234, "learning_rate": 5.8592564491654025e-06, "loss": 0.5444, "step": 3090 }, { "epoch": 0.058803444743730796, "grad_norm": 1.7473656972862668, "learning_rate": 5.878224582701063e-06, "loss": 0.5291, "step": 3100 }, { "epoch": 0.05899313327516218, "grad_norm": 2.1762843899829987, "learning_rate": 5.897192716236722e-06, "loss": 0.5137, "step": 3110 }, { "epoch": 0.05918282180659357, "grad_norm": 1.9272312956614541, "learning_rate": 5.916160849772383e-06, "loss": 0.5096, "step": 3120 }, { "epoch": 0.059372510338024966, "grad_norm": 1.5297586179337879, "learning_rate": 5.935128983308043e-06, "loss": 0.5014, "step": 3130 }, { "epoch": 0.05956219886945635, "grad_norm": 1.978834771943619, "learning_rate": 5.954097116843704e-06, "loss": 0.5425, "step": 3140 }, { "epoch": 0.05975188740088774, "grad_norm": 1.8016629012660914, "learning_rate": 5.973065250379363e-06, "loss": 0.5339, "step": 3150 }, { "epoch": 0.05994157593231913, "grad_norm": 2.9753745256924797, "learning_rate": 5.9920333839150235e-06, "loss": 0.5527, "step": 3160 }, { "epoch": 0.06013126446375052, "grad_norm": 1.9250898896930528, "learning_rate": 6.011001517450683e-06, "loss": 0.5463, "step": 3170 }, { "epoch": 0.06032095299518191, "grad_norm": 1.7880270025550353, "learning_rate": 6.029969650986343e-06, "loss": 0.5213, "step": 3180 }, { "epoch": 0.0605106415266133, "grad_norm": 2.2295041852479365, "learning_rate": 6.048937784522003e-06, "loss": 0.551, "step": 3190 }, { "epoch": 0.06070033005804469, "grad_norm": 1.8335338256621854, "learning_rate": 6.067905918057664e-06, "loss": 0.5203, "step": 3200 }, { "epoch": 0.06089001858947608, "grad_norm": 2.8129764277685148, "learning_rate": 6.086874051593323e-06, "loss": 0.4907, "step": 3210 }, { "epoch": 0.06107970712090747, "grad_norm": 1.6385426129687175, "learning_rate": 6.105842185128984e-06, "loss": 0.5166, "step": 3220 }, { "epoch": 0.06126939565233886, "grad_norm": 1.9431246490516707, "learning_rate": 6.124810318664644e-06, "loss": 0.5364, "step": 3230 }, { "epoch": 0.061459084183770246, "grad_norm": 1.9304329509561318, "learning_rate": 6.143778452200304e-06, "loss": 0.5307, "step": 3240 }, { "epoch": 0.06164877271520164, "grad_norm": 1.965015874496107, "learning_rate": 6.162746585735963e-06, "loss": 0.5407, "step": 3250 }, { "epoch": 0.06183846124663303, "grad_norm": 1.7708194802066957, "learning_rate": 6.181714719271624e-06, "loss": 0.5346, "step": 3260 }, { "epoch": 0.062028149778064416, "grad_norm": 1.7112165729101456, "learning_rate": 6.200682852807284e-06, "loss": 0.5365, "step": 3270 }, { "epoch": 0.06221783830949581, "grad_norm": 1.9884446822284112, "learning_rate": 6.219650986342945e-06, "loss": 0.5055, "step": 3280 }, { "epoch": 0.0624075268409272, "grad_norm": 1.670398334426901, "learning_rate": 6.238619119878604e-06, "loss": 0.534, "step": 3290 }, { "epoch": 0.06259721537235859, "grad_norm": 1.747836787253228, "learning_rate": 6.257587253414265e-06, "loss": 0.5062, "step": 3300 }, { "epoch": 0.06278690390378998, "grad_norm": 1.839473099346884, "learning_rate": 6.276555386949925e-06, "loss": 0.5281, "step": 3310 }, { "epoch": 0.06297659243522137, "grad_norm": 2.2610471156561314, "learning_rate": 6.295523520485584e-06, "loss": 0.5685, "step": 3320 }, { "epoch": 0.06316628096665276, "grad_norm": 1.6464712437064681, "learning_rate": 6.314491654021245e-06, "loss": 0.5149, "step": 3330 }, { "epoch": 0.06335596949808414, "grad_norm": 1.6336987082391035, "learning_rate": 6.333459787556905e-06, "loss": 0.5068, "step": 3340 }, { "epoch": 0.06354565802951553, "grad_norm": 1.8124057264703761, "learning_rate": 6.352427921092566e-06, "loss": 0.5623, "step": 3350 }, { "epoch": 0.06373534656094693, "grad_norm": 1.6121346193755535, "learning_rate": 6.371396054628225e-06, "loss": 0.5412, "step": 3360 }, { "epoch": 0.06392503509237832, "grad_norm": 1.954493799532843, "learning_rate": 6.390364188163886e-06, "loss": 0.556, "step": 3370 }, { "epoch": 0.06411472362380971, "grad_norm": 1.6659526767295891, "learning_rate": 6.4093323216995455e-06, "loss": 0.512, "step": 3380 }, { "epoch": 0.06430441215524109, "grad_norm": 2.588835351119702, "learning_rate": 6.428300455235205e-06, "loss": 0.5393, "step": 3390 }, { "epoch": 0.06449410068667248, "grad_norm": 1.837882995941782, "learning_rate": 6.447268588770865e-06, "loss": 0.5151, "step": 3400 }, { "epoch": 0.06468378921810387, "grad_norm": 1.4278861587447795, "learning_rate": 6.466236722306526e-06, "loss": 0.4907, "step": 3410 }, { "epoch": 0.06487347774953527, "grad_norm": 1.7809390439818826, "learning_rate": 6.485204855842185e-06, "loss": 0.5055, "step": 3420 }, { "epoch": 0.06506316628096666, "grad_norm": 1.5749788679783094, "learning_rate": 6.504172989377846e-06, "loss": 0.4977, "step": 3430 }, { "epoch": 0.06525285481239804, "grad_norm": 1.690466071856457, "learning_rate": 6.523141122913506e-06, "loss": 0.4991, "step": 3440 }, { "epoch": 0.06544254334382943, "grad_norm": 2.035786368251427, "learning_rate": 6.5421092564491665e-06, "loss": 0.5553, "step": 3450 }, { "epoch": 0.06563223187526082, "grad_norm": 1.4643772262862884, "learning_rate": 6.5610773899848255e-06, "loss": 0.514, "step": 3460 }, { "epoch": 0.06582192040669221, "grad_norm": 1.8877327520072833, "learning_rate": 6.580045523520486e-06, "loss": 0.5534, "step": 3470 }, { "epoch": 0.0660116089381236, "grad_norm": 1.6200226177762709, "learning_rate": 6.599013657056146e-06, "loss": 0.5438, "step": 3480 }, { "epoch": 0.06620129746955498, "grad_norm": 1.4762939084975968, "learning_rate": 6.617981790591806e-06, "loss": 0.5151, "step": 3490 }, { "epoch": 0.06639098600098638, "grad_norm": 1.5373172130268997, "learning_rate": 6.636949924127466e-06, "loss": 0.5283, "step": 3500 }, { "epoch": 0.06658067453241777, "grad_norm": 1.731210034514663, "learning_rate": 6.655918057663127e-06, "loss": 0.5081, "step": 3510 }, { "epoch": 0.06677036306384916, "grad_norm": 1.5093950178919953, "learning_rate": 6.674886191198786e-06, "loss": 0.5068, "step": 3520 }, { "epoch": 0.06696005159528055, "grad_norm": 6.619950474749183, "learning_rate": 6.6938543247344465e-06, "loss": 0.495, "step": 3530 }, { "epoch": 0.06714974012671195, "grad_norm": 1.722886880172219, "learning_rate": 6.712822458270106e-06, "loss": 0.5367, "step": 3540 }, { "epoch": 0.06733942865814332, "grad_norm": 1.6382370629362097, "learning_rate": 6.731790591805767e-06, "loss": 0.5004, "step": 3550 }, { "epoch": 0.06752911718957472, "grad_norm": 1.5976404571561131, "learning_rate": 6.750758725341428e-06, "loss": 0.516, "step": 3560 }, { "epoch": 0.06771880572100611, "grad_norm": 2.0016621421611562, "learning_rate": 6.769726858877087e-06, "loss": 0.5174, "step": 3570 }, { "epoch": 0.0679084942524375, "grad_norm": 1.632839852890159, "learning_rate": 6.788694992412748e-06, "loss": 0.5209, "step": 3580 }, { "epoch": 0.0680981827838689, "grad_norm": 1.4535468610068338, "learning_rate": 6.8076631259484076e-06, "loss": 0.5115, "step": 3590 }, { "epoch": 0.06828787131530027, "grad_norm": 1.8750487127156923, "learning_rate": 6.8266312594840675e-06, "loss": 0.5037, "step": 3600 }, { "epoch": 0.06847755984673166, "grad_norm": 1.6131307089491926, "learning_rate": 6.845599393019727e-06, "loss": 0.5173, "step": 3610 }, { "epoch": 0.06866724837816306, "grad_norm": 1.6308566526903086, "learning_rate": 6.864567526555388e-06, "loss": 0.5136, "step": 3620 }, { "epoch": 0.06885693690959445, "grad_norm": 1.6466230273191538, "learning_rate": 6.883535660091047e-06, "loss": 0.5374, "step": 3630 }, { "epoch": 0.06904662544102584, "grad_norm": 2.3327610048028693, "learning_rate": 6.902503793626708e-06, "loss": 0.5255, "step": 3640 }, { "epoch": 0.06923631397245722, "grad_norm": 1.706769508452958, "learning_rate": 6.921471927162368e-06, "loss": 0.5529, "step": 3650 }, { "epoch": 0.06942600250388861, "grad_norm": 1.7074490857933462, "learning_rate": 6.9404400606980285e-06, "loss": 0.4913, "step": 3660 }, { "epoch": 0.06961569103532, "grad_norm": 3.104944732725076, "learning_rate": 6.959408194233688e-06, "loss": 0.5612, "step": 3670 }, { "epoch": 0.0698053795667514, "grad_norm": 1.4786910285613208, "learning_rate": 6.978376327769348e-06, "loss": 0.5166, "step": 3680 }, { "epoch": 0.06999506809818279, "grad_norm": 1.4297389863024628, "learning_rate": 6.997344461305008e-06, "loss": 0.5077, "step": 3690 }, { "epoch": 0.07018475662961417, "grad_norm": 1.837494614416053, "learning_rate": 7.016312594840668e-06, "loss": 0.5449, "step": 3700 }, { "epoch": 0.07037444516104556, "grad_norm": 1.721499214307457, "learning_rate": 7.035280728376328e-06, "loss": 0.5082, "step": 3710 }, { "epoch": 0.07056413369247695, "grad_norm": 1.7824133432256304, "learning_rate": 7.054248861911989e-06, "loss": 0.5515, "step": 3720 }, { "epoch": 0.07075382222390834, "grad_norm": 1.817815490619233, "learning_rate": 7.073216995447648e-06, "loss": 0.5263, "step": 3730 }, { "epoch": 0.07094351075533974, "grad_norm": 1.9002439937637092, "learning_rate": 7.0921851289833086e-06, "loss": 0.5214, "step": 3740 }, { "epoch": 0.07113319928677111, "grad_norm": 2.8103938855968, "learning_rate": 7.1111532625189685e-06, "loss": 0.5361, "step": 3750 }, { "epoch": 0.07132288781820251, "grad_norm": 1.7371018334633601, "learning_rate": 7.130121396054629e-06, "loss": 0.517, "step": 3760 }, { "epoch": 0.0715125763496339, "grad_norm": 1.654597537320439, "learning_rate": 7.149089529590288e-06, "loss": 0.5426, "step": 3770 }, { "epoch": 0.07170226488106529, "grad_norm": 2.2365481793251876, "learning_rate": 7.168057663125949e-06, "loss": 0.5282, "step": 3780 }, { "epoch": 0.07189195341249668, "grad_norm": 1.750338185486541, "learning_rate": 7.187025796661609e-06, "loss": 0.5023, "step": 3790 }, { "epoch": 0.07208164194392808, "grad_norm": 1.969535663650574, "learning_rate": 7.205993930197269e-06, "loss": 0.5184, "step": 3800 }, { "epoch": 0.07227133047535945, "grad_norm": 1.8248716363607198, "learning_rate": 7.224962063732929e-06, "loss": 0.5271, "step": 3810 }, { "epoch": 0.07246101900679085, "grad_norm": 1.8359533178841112, "learning_rate": 7.2439301972685895e-06, "loss": 0.5135, "step": 3820 }, { "epoch": 0.07265070753822224, "grad_norm": 1.8358261970853282, "learning_rate": 7.26289833080425e-06, "loss": 0.522, "step": 3830 }, { "epoch": 0.07284039606965363, "grad_norm": 1.5816015997255914, "learning_rate": 7.281866464339909e-06, "loss": 0.5203, "step": 3840 }, { "epoch": 0.07303008460108502, "grad_norm": 1.7184108881433102, "learning_rate": 7.30083459787557e-06, "loss": 0.5431, "step": 3850 }, { "epoch": 0.0732197731325164, "grad_norm": 1.5857735362450505, "learning_rate": 7.31980273141123e-06, "loss": 0.5162, "step": 3860 }, { "epoch": 0.0734094616639478, "grad_norm": 1.897079551577131, "learning_rate": 7.338770864946891e-06, "loss": 0.5316, "step": 3870 }, { "epoch": 0.07359915019537919, "grad_norm": 1.6855235066665333, "learning_rate": 7.35773899848255e-06, "loss": 0.5186, "step": 3880 }, { "epoch": 0.07378883872681058, "grad_norm": 1.5183425602034712, "learning_rate": 7.3767071320182104e-06, "loss": 0.5228, "step": 3890 }, { "epoch": 0.07397852725824197, "grad_norm": 1.6955705782369739, "learning_rate": 7.39567526555387e-06, "loss": 0.5003, "step": 3900 }, { "epoch": 0.07416821578967335, "grad_norm": 1.6537427934356166, "learning_rate": 7.41464339908953e-06, "loss": 0.5074, "step": 3910 }, { "epoch": 0.07435790432110474, "grad_norm": 1.8259059906854676, "learning_rate": 7.43361153262519e-06, "loss": 0.5331, "step": 3920 }, { "epoch": 0.07454759285253613, "grad_norm": 1.6494869525625147, "learning_rate": 7.452579666160851e-06, "loss": 0.531, "step": 3930 }, { "epoch": 0.07473728138396753, "grad_norm": 1.6604514658884504, "learning_rate": 7.47154779969651e-06, "loss": 0.4692, "step": 3940 }, { "epoch": 0.07492696991539892, "grad_norm": 1.6936956994901886, "learning_rate": 7.490515933232171e-06, "loss": 0.5084, "step": 3950 }, { "epoch": 0.0751166584468303, "grad_norm": 1.8173456858489967, "learning_rate": 7.5094840667678306e-06, "loss": 0.5291, "step": 3960 }, { "epoch": 0.07530634697826169, "grad_norm": 1.4436303066911385, "learning_rate": 7.528452200303491e-06, "loss": 0.5193, "step": 3970 }, { "epoch": 0.07549603550969308, "grad_norm": 1.595039606181745, "learning_rate": 7.54742033383915e-06, "loss": 0.5167, "step": 3980 }, { "epoch": 0.07568572404112447, "grad_norm": 1.8772448701886832, "learning_rate": 7.566388467374811e-06, "loss": 0.5564, "step": 3990 }, { "epoch": 0.07587541257255587, "grad_norm": 1.86275410753034, "learning_rate": 7.585356600910471e-06, "loss": 0.5199, "step": 4000 }, { "epoch": 0.07606510110398726, "grad_norm": 1.6744463164368621, "learning_rate": 7.604324734446131e-06, "loss": 0.504, "step": 4010 }, { "epoch": 0.07625478963541864, "grad_norm": 1.7423855397250128, "learning_rate": 7.623292867981791e-06, "loss": 0.5343, "step": 4020 }, { "epoch": 0.07644447816685003, "grad_norm": 1.495462525677658, "learning_rate": 7.64226100151745e-06, "loss": 0.5178, "step": 4030 }, { "epoch": 0.07663416669828142, "grad_norm": 1.7076722474248176, "learning_rate": 7.661229135053111e-06, "loss": 0.5298, "step": 4040 }, { "epoch": 0.07682385522971281, "grad_norm": 1.5626896561108987, "learning_rate": 7.680197268588772e-06, "loss": 0.5293, "step": 4050 }, { "epoch": 0.07701354376114421, "grad_norm": 1.6254450814863435, "learning_rate": 7.699165402124431e-06, "loss": 0.526, "step": 4060 }, { "epoch": 0.07720323229257559, "grad_norm": 1.7600625041361495, "learning_rate": 7.718133535660092e-06, "loss": 0.5071, "step": 4070 }, { "epoch": 0.07739292082400698, "grad_norm": 1.6683588580383302, "learning_rate": 7.737101669195751e-06, "loss": 0.529, "step": 4080 }, { "epoch": 0.07758260935543837, "grad_norm": 2.0909380190999705, "learning_rate": 7.756069802731412e-06, "loss": 0.5426, "step": 4090 }, { "epoch": 0.07777229788686976, "grad_norm": 1.7227841918314641, "learning_rate": 7.775037936267073e-06, "loss": 0.5478, "step": 4100 }, { "epoch": 0.07796198641830115, "grad_norm": 1.8511489437938242, "learning_rate": 7.794006069802732e-06, "loss": 0.5281, "step": 4110 }, { "epoch": 0.07815167494973253, "grad_norm": 1.9129164386441089, "learning_rate": 7.812974203338392e-06, "loss": 0.5435, "step": 4120 }, { "epoch": 0.07834136348116393, "grad_norm": 1.7234410953367512, "learning_rate": 7.831942336874051e-06, "loss": 0.5524, "step": 4130 }, { "epoch": 0.07853105201259532, "grad_norm": 1.917161710298174, "learning_rate": 7.850910470409712e-06, "loss": 0.5523, "step": 4140 }, { "epoch": 0.07872074054402671, "grad_norm": 1.634345508361669, "learning_rate": 7.869878603945373e-06, "loss": 0.5265, "step": 4150 }, { "epoch": 0.0789104290754581, "grad_norm": 1.442885486308895, "learning_rate": 7.888846737481034e-06, "loss": 0.5228, "step": 4160 }, { "epoch": 0.07910011760688948, "grad_norm": 1.5537546837760554, "learning_rate": 7.907814871016693e-06, "loss": 0.5409, "step": 4170 }, { "epoch": 0.07928980613832087, "grad_norm": 1.9320715596799019, "learning_rate": 7.926783004552353e-06, "loss": 0.535, "step": 4180 }, { "epoch": 0.07947949466975227, "grad_norm": 1.957485200231192, "learning_rate": 7.945751138088012e-06, "loss": 0.51, "step": 4190 }, { "epoch": 0.07966918320118366, "grad_norm": 1.5206838119498054, "learning_rate": 7.964719271623673e-06, "loss": 0.4888, "step": 4200 }, { "epoch": 0.07985887173261505, "grad_norm": 1.4668572097783774, "learning_rate": 7.983687405159332e-06, "loss": 0.494, "step": 4210 }, { "epoch": 0.08004856026404644, "grad_norm": 1.6021382400127502, "learning_rate": 8.002655538694993e-06, "loss": 0.5271, "step": 4220 }, { "epoch": 0.08023824879547782, "grad_norm": 1.6851813275754508, "learning_rate": 8.021623672230652e-06, "loss": 0.5074, "step": 4230 }, { "epoch": 0.08042793732690921, "grad_norm": 1.7753534709076926, "learning_rate": 8.040591805766313e-06, "loss": 0.5001, "step": 4240 }, { "epoch": 0.0806176258583406, "grad_norm": 1.7439662512595695, "learning_rate": 8.059559939301974e-06, "loss": 0.5318, "step": 4250 }, { "epoch": 0.080807314389772, "grad_norm": 1.5747450903750766, "learning_rate": 8.078528072837634e-06, "loss": 0.52, "step": 4260 }, { "epoch": 0.08099700292120339, "grad_norm": 1.9718960425543668, "learning_rate": 8.097496206373293e-06, "loss": 0.5453, "step": 4270 }, { "epoch": 0.08118669145263477, "grad_norm": 1.667842229238603, "learning_rate": 8.116464339908954e-06, "loss": 0.5396, "step": 4280 }, { "epoch": 0.08137637998406616, "grad_norm": 1.4204951907910859, "learning_rate": 8.135432473444613e-06, "loss": 0.5193, "step": 4290 }, { "epoch": 0.08156606851549755, "grad_norm": 1.858944167884541, "learning_rate": 8.154400606980274e-06, "loss": 0.5154, "step": 4300 }, { "epoch": 0.08175575704692895, "grad_norm": 1.1586923690291893, "learning_rate": 8.173368740515933e-06, "loss": 0.4996, "step": 4310 }, { "epoch": 0.08194544557836034, "grad_norm": 2.0485540242199702, "learning_rate": 8.192336874051594e-06, "loss": 0.5432, "step": 4320 }, { "epoch": 0.08213513410979172, "grad_norm": 1.694111318114411, "learning_rate": 8.211305007587254e-06, "loss": 0.5414, "step": 4330 }, { "epoch": 0.08232482264122311, "grad_norm": 1.762943833966317, "learning_rate": 8.230273141122913e-06, "loss": 0.5371, "step": 4340 }, { "epoch": 0.0825145111726545, "grad_norm": 3.8343419923150828, "learning_rate": 8.249241274658574e-06, "loss": 0.5287, "step": 4350 }, { "epoch": 0.08270419970408589, "grad_norm": 1.802923546018819, "learning_rate": 8.268209408194235e-06, "loss": 0.5043, "step": 4360 }, { "epoch": 0.08289388823551729, "grad_norm": 1.4793460741260012, "learning_rate": 8.287177541729896e-06, "loss": 0.5232, "step": 4370 }, { "epoch": 0.08308357676694866, "grad_norm": 1.7273470036918204, "learning_rate": 8.306145675265555e-06, "loss": 0.5045, "step": 4380 }, { "epoch": 0.08327326529838006, "grad_norm": 1.9487278498106522, "learning_rate": 8.325113808801215e-06, "loss": 0.5253, "step": 4390 }, { "epoch": 0.08346295382981145, "grad_norm": 1.8919178102962362, "learning_rate": 8.344081942336875e-06, "loss": 0.5276, "step": 4400 }, { "epoch": 0.08365264236124284, "grad_norm": 1.9145381057260042, "learning_rate": 8.363050075872535e-06, "loss": 0.5455, "step": 4410 }, { "epoch": 0.08384233089267423, "grad_norm": 1.8242731741047793, "learning_rate": 8.382018209408194e-06, "loss": 0.5439, "step": 4420 }, { "epoch": 0.08403201942410563, "grad_norm": 1.7360381406255851, "learning_rate": 8.400986342943855e-06, "loss": 0.5102, "step": 4430 }, { "epoch": 0.084221707955537, "grad_norm": 1.7721331830450198, "learning_rate": 8.419954476479514e-06, "loss": 0.5221, "step": 4440 }, { "epoch": 0.0844113964869684, "grad_norm": 1.6775800951412767, "learning_rate": 8.438922610015175e-06, "loss": 0.5247, "step": 4450 }, { "epoch": 0.08460108501839979, "grad_norm": 1.4109012943970047, "learning_rate": 8.457890743550836e-06, "loss": 0.5189, "step": 4460 }, { "epoch": 0.08479077354983118, "grad_norm": 1.881613163271371, "learning_rate": 8.476858877086496e-06, "loss": 0.5325, "step": 4470 }, { "epoch": 0.08498046208126257, "grad_norm": 1.6615014617722554, "learning_rate": 8.495827010622155e-06, "loss": 0.5366, "step": 4480 }, { "epoch": 0.08517015061269395, "grad_norm": 2.2772113667328915, "learning_rate": 8.514795144157816e-06, "loss": 0.5201, "step": 4490 }, { "epoch": 0.08535983914412534, "grad_norm": 1.5852797814942887, "learning_rate": 8.533763277693475e-06, "loss": 0.5023, "step": 4500 }, { "epoch": 0.08554952767555674, "grad_norm": 1.7191097625051552, "learning_rate": 8.552731411229136e-06, "loss": 0.5234, "step": 4510 }, { "epoch": 0.08573921620698813, "grad_norm": 1.4502513354183233, "learning_rate": 8.571699544764795e-06, "loss": 0.5087, "step": 4520 }, { "epoch": 0.08592890473841952, "grad_norm": 1.6223575612606063, "learning_rate": 8.590667678300456e-06, "loss": 0.4712, "step": 4530 }, { "epoch": 0.0861185932698509, "grad_norm": 1.8677209729911717, "learning_rate": 8.609635811836115e-06, "loss": 0.5324, "step": 4540 }, { "epoch": 0.08630828180128229, "grad_norm": 1.910483403118022, "learning_rate": 8.628603945371776e-06, "loss": 0.5621, "step": 4550 }, { "epoch": 0.08649797033271368, "grad_norm": 1.7861958952300077, "learning_rate": 8.647572078907436e-06, "loss": 0.5275, "step": 4560 }, { "epoch": 0.08668765886414508, "grad_norm": 1.7248351789902145, "learning_rate": 8.666540212443097e-06, "loss": 0.5297, "step": 4570 }, { "epoch": 0.08687734739557647, "grad_norm": 1.394504427748179, "learning_rate": 8.685508345978756e-06, "loss": 0.4933, "step": 4580 }, { "epoch": 0.08706703592700785, "grad_norm": 1.6256655642535507, "learning_rate": 8.704476479514417e-06, "loss": 0.5305, "step": 4590 }, { "epoch": 0.08725672445843924, "grad_norm": 2.600008039748113, "learning_rate": 8.723444613050076e-06, "loss": 0.5381, "step": 4600 }, { "epoch": 0.08744641298987063, "grad_norm": 1.8860694394545592, "learning_rate": 8.742412746585737e-06, "loss": 0.5048, "step": 4610 }, { "epoch": 0.08763610152130202, "grad_norm": 1.634101343420773, "learning_rate": 8.761380880121397e-06, "loss": 0.4881, "step": 4620 }, { "epoch": 0.08782579005273342, "grad_norm": 1.648030019045957, "learning_rate": 8.780349013657056e-06, "loss": 0.5227, "step": 4630 }, { "epoch": 0.08801547858416481, "grad_norm": 1.5741551494871722, "learning_rate": 8.799317147192717e-06, "loss": 0.501, "step": 4640 }, { "epoch": 0.08820516711559619, "grad_norm": 1.7399781457892414, "learning_rate": 8.818285280728376e-06, "loss": 0.514, "step": 4650 }, { "epoch": 0.08839485564702758, "grad_norm": 1.5751667647710643, "learning_rate": 8.837253414264037e-06, "loss": 0.4955, "step": 4660 }, { "epoch": 0.08858454417845897, "grad_norm": 1.7607122902599417, "learning_rate": 8.856221547799698e-06, "loss": 0.5342, "step": 4670 }, { "epoch": 0.08877423270989036, "grad_norm": 1.7110548230440934, "learning_rate": 8.875189681335358e-06, "loss": 0.556, "step": 4680 }, { "epoch": 0.08896392124132176, "grad_norm": 1.3423586847901627, "learning_rate": 8.894157814871018e-06, "loss": 0.5003, "step": 4690 }, { "epoch": 0.08915360977275313, "grad_norm": 1.6067347749690177, "learning_rate": 8.913125948406678e-06, "loss": 0.5264, "step": 4700 }, { "epoch": 0.08934329830418453, "grad_norm": 1.4945014979841578, "learning_rate": 8.932094081942337e-06, "loss": 0.5128, "step": 4710 }, { "epoch": 0.08953298683561592, "grad_norm": 1.5937326370400773, "learning_rate": 8.951062215477998e-06, "loss": 0.5219, "step": 4720 }, { "epoch": 0.08972267536704731, "grad_norm": 1.554942266043332, "learning_rate": 8.970030349013657e-06, "loss": 0.499, "step": 4730 }, { "epoch": 0.0899123638984787, "grad_norm": 1.8910770581371488, "learning_rate": 8.988998482549318e-06, "loss": 0.4994, "step": 4740 }, { "epoch": 0.09010205242991008, "grad_norm": 1.842003844375404, "learning_rate": 9.007966616084977e-06, "loss": 0.5665, "step": 4750 }, { "epoch": 0.09029174096134147, "grad_norm": 1.3389848586622595, "learning_rate": 9.026934749620638e-06, "loss": 0.512, "step": 4760 }, { "epoch": 0.09048142949277287, "grad_norm": 1.5279939377057983, "learning_rate": 9.045902883156298e-06, "loss": 0.5308, "step": 4770 }, { "epoch": 0.09067111802420426, "grad_norm": 1.7835847730186112, "learning_rate": 9.064871016691959e-06, "loss": 0.5508, "step": 4780 }, { "epoch": 0.09086080655563565, "grad_norm": 1.555711289473708, "learning_rate": 9.083839150227618e-06, "loss": 0.5619, "step": 4790 }, { "epoch": 0.09105049508706703, "grad_norm": 1.791984139504516, "learning_rate": 9.102807283763279e-06, "loss": 0.5266, "step": 4800 }, { "epoch": 0.09124018361849842, "grad_norm": 1.4864266589239945, "learning_rate": 9.121775417298938e-06, "loss": 0.5029, "step": 4810 }, { "epoch": 0.09142987214992981, "grad_norm": 1.3991900587096893, "learning_rate": 9.140743550834599e-06, "loss": 0.5048, "step": 4820 }, { "epoch": 0.0916195606813612, "grad_norm": 1.5766340116389443, "learning_rate": 9.159711684370258e-06, "loss": 0.5187, "step": 4830 }, { "epoch": 0.0918092492127926, "grad_norm": 3.437639243812229, "learning_rate": 9.178679817905919e-06, "loss": 0.5214, "step": 4840 }, { "epoch": 0.09199893774422399, "grad_norm": 1.8294735589814684, "learning_rate": 9.197647951441578e-06, "loss": 0.5231, "step": 4850 }, { "epoch": 0.09218862627565537, "grad_norm": 1.5312297384065605, "learning_rate": 9.216616084977238e-06, "loss": 0.5243, "step": 4860 }, { "epoch": 0.09237831480708676, "grad_norm": 1.6955375885541757, "learning_rate": 9.235584218512899e-06, "loss": 0.5134, "step": 4870 }, { "epoch": 0.09256800333851815, "grad_norm": 1.520672980867336, "learning_rate": 9.25455235204856e-06, "loss": 0.546, "step": 4880 }, { "epoch": 0.09275769186994955, "grad_norm": 1.4006587724578885, "learning_rate": 9.27352048558422e-06, "loss": 0.5222, "step": 4890 }, { "epoch": 0.09294738040138094, "grad_norm": 1.5544506800206788, "learning_rate": 9.29248861911988e-06, "loss": 0.4943, "step": 4900 }, { "epoch": 0.09313706893281232, "grad_norm": 1.509767314391753, "learning_rate": 9.31145675265554e-06, "loss": 0.5061, "step": 4910 }, { "epoch": 0.09332675746424371, "grad_norm": 1.8951445911842166, "learning_rate": 9.3304248861912e-06, "loss": 0.5241, "step": 4920 }, { "epoch": 0.0935164459956751, "grad_norm": 1.585163394560726, "learning_rate": 9.34939301972686e-06, "loss": 0.5216, "step": 4930 }, { "epoch": 0.0937061345271065, "grad_norm": 1.8443926260430512, "learning_rate": 9.36836115326252e-06, "loss": 0.5265, "step": 4940 }, { "epoch": 0.09389582305853789, "grad_norm": 1.738820033198625, "learning_rate": 9.38732928679818e-06, "loss": 0.527, "step": 4950 }, { "epoch": 0.09408551158996926, "grad_norm": 1.9220888422594118, "learning_rate": 9.406297420333839e-06, "loss": 0.5297, "step": 4960 }, { "epoch": 0.09427520012140066, "grad_norm": 2.518975527375163, "learning_rate": 9.4252655538695e-06, "loss": 0.5177, "step": 4970 }, { "epoch": 0.09446488865283205, "grad_norm": 1.8465570092892483, "learning_rate": 9.44423368740516e-06, "loss": 0.5396, "step": 4980 }, { "epoch": 0.09465457718426344, "grad_norm": 1.5780551864121788, "learning_rate": 9.463201820940821e-06, "loss": 0.518, "step": 4990 }, { "epoch": 0.09484426571569483, "grad_norm": 1.6019109657401942, "learning_rate": 9.48216995447648e-06, "loss": 0.5515, "step": 5000 }, { "epoch": 0.09503395424712621, "grad_norm": 1.4370772508325513, "learning_rate": 9.501138088012141e-06, "loss": 0.5321, "step": 5010 }, { "epoch": 0.0952236427785576, "grad_norm": 1.736735982619308, "learning_rate": 9.5201062215478e-06, "loss": 0.5234, "step": 5020 }, { "epoch": 0.095413331309989, "grad_norm": 1.594589110013727, "learning_rate": 9.53907435508346e-06, "loss": 0.5397, "step": 5030 }, { "epoch": 0.09560301984142039, "grad_norm": 1.6614096779768195, "learning_rate": 9.55804248861912e-06, "loss": 0.5193, "step": 5040 }, { "epoch": 0.09579270837285178, "grad_norm": 1.6862427788941927, "learning_rate": 9.57701062215478e-06, "loss": 0.5532, "step": 5050 }, { "epoch": 0.09598239690428317, "grad_norm": 1.8707065930155369, "learning_rate": 9.59597875569044e-06, "loss": 0.5095, "step": 5060 }, { "epoch": 0.09617208543571455, "grad_norm": 1.9395815065402864, "learning_rate": 9.6149468892261e-06, "loss": 0.5394, "step": 5070 }, { "epoch": 0.09636177396714594, "grad_norm": 1.3638048871380215, "learning_rate": 9.633915022761761e-06, "loss": 0.505, "step": 5080 }, { "epoch": 0.09655146249857734, "grad_norm": 1.5991772248109886, "learning_rate": 9.652883156297422e-06, "loss": 0.5079, "step": 5090 }, { "epoch": 0.09674115103000873, "grad_norm": 1.637140863772353, "learning_rate": 9.671851289833081e-06, "loss": 0.516, "step": 5100 }, { "epoch": 0.09693083956144012, "grad_norm": 1.7329608345236929, "learning_rate": 9.690819423368742e-06, "loss": 0.5236, "step": 5110 }, { "epoch": 0.0971205280928715, "grad_norm": 1.7828573598341488, "learning_rate": 9.7097875569044e-06, "loss": 0.5275, "step": 5120 }, { "epoch": 0.09731021662430289, "grad_norm": 1.6497102640635406, "learning_rate": 9.728755690440061e-06, "loss": 0.5091, "step": 5130 }, { "epoch": 0.09749990515573428, "grad_norm": 1.6544875570195157, "learning_rate": 9.74772382397572e-06, "loss": 0.5159, "step": 5140 }, { "epoch": 0.09768959368716568, "grad_norm": 1.7564968440882436, "learning_rate": 9.766691957511381e-06, "loss": 0.5495, "step": 5150 }, { "epoch": 0.09787928221859707, "grad_norm": 1.546087411823132, "learning_rate": 9.785660091047042e-06, "loss": 0.5245, "step": 5160 }, { "epoch": 0.09806897075002845, "grad_norm": 1.7700514015177573, "learning_rate": 9.804628224582701e-06, "loss": 0.5429, "step": 5170 }, { "epoch": 0.09825865928145984, "grad_norm": 1.4725725463043116, "learning_rate": 9.823596358118362e-06, "loss": 0.5269, "step": 5180 }, { "epoch": 0.09844834781289123, "grad_norm": 1.7685715032064018, "learning_rate": 9.842564491654023e-06, "loss": 0.5225, "step": 5190 }, { "epoch": 0.09863803634432262, "grad_norm": 1.6566678996570943, "learning_rate": 9.861532625189683e-06, "loss": 0.5376, "step": 5200 }, { "epoch": 0.09882772487575402, "grad_norm": 1.6580213992880812, "learning_rate": 9.880500758725342e-06, "loss": 0.5058, "step": 5210 }, { "epoch": 0.0990174134071854, "grad_norm": 1.6090401358450293, "learning_rate": 9.899468892261003e-06, "loss": 0.507, "step": 5220 }, { "epoch": 0.09920710193861679, "grad_norm": 1.9540397802182232, "learning_rate": 9.918437025796662e-06, "loss": 0.5196, "step": 5230 }, { "epoch": 0.09939679047004818, "grad_norm": 1.6911356443116914, "learning_rate": 9.937405159332323e-06, "loss": 0.5167, "step": 5240 }, { "epoch": 0.09958647900147957, "grad_norm": 1.5740193374444158, "learning_rate": 9.956373292867982e-06, "loss": 0.5373, "step": 5250 }, { "epoch": 0.09977616753291096, "grad_norm": 1.7396343174812763, "learning_rate": 9.975341426403643e-06, "loss": 0.5216, "step": 5260 }, { "epoch": 0.09996585606434236, "grad_norm": 1.4718453098216215, "learning_rate": 9.994309559939302e-06, "loss": 0.5337, "step": 5270 }, { "epoch": 0.10015554459577374, "grad_norm": 1.5244848158600346, "learning_rate": 9.999999462922923e-06, "loss": 0.5164, "step": 5280 }, { "epoch": 0.10034523312720513, "grad_norm": 1.7764508891121285, "learning_rate": 9.999996832341596e-06, "loss": 0.5058, "step": 5290 }, { "epoch": 0.10053492165863652, "grad_norm": 1.5310082383872803, "learning_rate": 9.99999200961036e-06, "loss": 0.5367, "step": 5300 }, { "epoch": 0.10072461019006791, "grad_norm": 1.8891816434820234, "learning_rate": 9.99998499473133e-06, "loss": 0.5293, "step": 5310 }, { "epoch": 0.1009142987214993, "grad_norm": 1.3344741053570583, "learning_rate": 9.99997578770758e-06, "loss": 0.5226, "step": 5320 }, { "epoch": 0.10110398725293068, "grad_norm": 1.4164913815047453, "learning_rate": 9.999964388543151e-06, "loss": 0.5399, "step": 5330 }, { "epoch": 0.10129367578436207, "grad_norm": 1.5954018795917078, "learning_rate": 9.999950797243037e-06, "loss": 0.5045, "step": 5340 }, { "epoch": 0.10148336431579347, "grad_norm": 1.6029283883492103, "learning_rate": 9.999935013813198e-06, "loss": 0.5254, "step": 5350 }, { "epoch": 0.10167305284722486, "grad_norm": 1.6211194785417973, "learning_rate": 9.999917038260553e-06, "loss": 0.5194, "step": 5360 }, { "epoch": 0.10186274137865625, "grad_norm": 1.267771240694528, "learning_rate": 9.999896870592984e-06, "loss": 0.4741, "step": 5370 }, { "epoch": 0.10205242991008763, "grad_norm": 2.028438642458747, "learning_rate": 9.999874510819334e-06, "loss": 0.5509, "step": 5380 }, { "epoch": 0.10224211844151902, "grad_norm": 1.4203509193046686, "learning_rate": 9.999849958949406e-06, "loss": 0.5115, "step": 5390 }, { "epoch": 0.10243180697295041, "grad_norm": 1.7157983717879641, "learning_rate": 9.999823214993962e-06, "loss": 0.5205, "step": 5400 }, { "epoch": 0.10262149550438181, "grad_norm": 1.6065671085281121, "learning_rate": 9.999794278964728e-06, "loss": 0.5286, "step": 5410 }, { "epoch": 0.1028111840358132, "grad_norm": 1.4961703125337353, "learning_rate": 9.999763150874394e-06, "loss": 0.5305, "step": 5420 }, { "epoch": 0.10300087256724458, "grad_norm": 1.5496472618373247, "learning_rate": 9.999729830736602e-06, "loss": 0.513, "step": 5430 }, { "epoch": 0.10319056109867597, "grad_norm": 1.7777880771447339, "learning_rate": 9.999694318565963e-06, "loss": 0.5125, "step": 5440 }, { "epoch": 0.10338024963010736, "grad_norm": 1.6003244609748908, "learning_rate": 9.999656614378048e-06, "loss": 0.5126, "step": 5450 }, { "epoch": 0.10356993816153875, "grad_norm": 7.5356333727096425, "learning_rate": 9.999616718189387e-06, "loss": 0.5335, "step": 5460 }, { "epoch": 0.10375962669297015, "grad_norm": 1.6474781844661246, "learning_rate": 9.99957463001747e-06, "loss": 0.5338, "step": 5470 }, { "epoch": 0.10394931522440154, "grad_norm": 1.6525645341464847, "learning_rate": 9.999530349880752e-06, "loss": 0.5244, "step": 5480 }, { "epoch": 0.10413900375583292, "grad_norm": 1.340711922139197, "learning_rate": 9.999483877798646e-06, "loss": 0.5093, "step": 5490 }, { "epoch": 0.10432869228726431, "grad_norm": 1.6665502480199648, "learning_rate": 9.999435213791526e-06, "loss": 0.5299, "step": 5500 }, { "epoch": 0.1045183808186957, "grad_norm": 1.5781920459784784, "learning_rate": 9.999384357880728e-06, "loss": 0.5184, "step": 5510 }, { "epoch": 0.1047080693501271, "grad_norm": 1.7589445227482683, "learning_rate": 9.99933131008855e-06, "loss": 0.4949, "step": 5520 }, { "epoch": 0.10489775788155849, "grad_norm": 1.724191780518539, "learning_rate": 9.999276070438247e-06, "loss": 0.5268, "step": 5530 }, { "epoch": 0.10508744641298987, "grad_norm": 1.835387372037156, "learning_rate": 9.99921863895404e-06, "loss": 0.5319, "step": 5540 }, { "epoch": 0.10527713494442126, "grad_norm": 1.5872720738896073, "learning_rate": 9.99915901566111e-06, "loss": 0.5224, "step": 5550 }, { "epoch": 0.10546682347585265, "grad_norm": 1.5504025061270645, "learning_rate": 9.999097200585595e-06, "loss": 0.5333, "step": 5560 }, { "epoch": 0.10565651200728404, "grad_norm": 1.6590389818190634, "learning_rate": 9.999033193754598e-06, "loss": 0.4998, "step": 5570 }, { "epoch": 0.10584620053871543, "grad_norm": 1.3767382163516353, "learning_rate": 9.998966995196178e-06, "loss": 0.5288, "step": 5580 }, { "epoch": 0.10603588907014681, "grad_norm": 1.7295524359524646, "learning_rate": 9.998898604939365e-06, "loss": 0.5402, "step": 5590 }, { "epoch": 0.1062255776015782, "grad_norm": 1.8504169141773872, "learning_rate": 9.998828023014137e-06, "loss": 0.5132, "step": 5600 }, { "epoch": 0.1064152661330096, "grad_norm": 1.5829859808986788, "learning_rate": 9.998755249451444e-06, "loss": 0.5212, "step": 5610 }, { "epoch": 0.10660495466444099, "grad_norm": 1.8579008624687354, "learning_rate": 9.99868028428319e-06, "loss": 0.5222, "step": 5620 }, { "epoch": 0.10679464319587238, "grad_norm": 1.7397375242766482, "learning_rate": 9.998603127542241e-06, "loss": 0.4857, "step": 5630 }, { "epoch": 0.10698433172730376, "grad_norm": 1.392842512307985, "learning_rate": 9.998523779262428e-06, "loss": 0.5068, "step": 5640 }, { "epoch": 0.10717402025873515, "grad_norm": 1.610529012834568, "learning_rate": 9.998442239478536e-06, "loss": 0.5143, "step": 5650 }, { "epoch": 0.10736370879016655, "grad_norm": 1.3639077745487145, "learning_rate": 9.998358508226318e-06, "loss": 0.507, "step": 5660 }, { "epoch": 0.10755339732159794, "grad_norm": 1.6710839228485266, "learning_rate": 9.998272585542481e-06, "loss": 0.4923, "step": 5670 }, { "epoch": 0.10774308585302933, "grad_norm": 1.4636508375217288, "learning_rate": 9.998184471464698e-06, "loss": 0.5003, "step": 5680 }, { "epoch": 0.10793277438446072, "grad_norm": 1.6511651323881065, "learning_rate": 9.9980941660316e-06, "loss": 0.4884, "step": 5690 }, { "epoch": 0.1081224629158921, "grad_norm": 2.1036701978670562, "learning_rate": 9.998001669282783e-06, "loss": 0.5263, "step": 5700 }, { "epoch": 0.1083121514473235, "grad_norm": 1.3425455770769295, "learning_rate": 9.997906981258793e-06, "loss": 0.5106, "step": 5710 }, { "epoch": 0.10850183997875489, "grad_norm": 1.675502015816788, "learning_rate": 9.997810102001153e-06, "loss": 0.5491, "step": 5720 }, { "epoch": 0.10869152851018628, "grad_norm": 1.2912760676217636, "learning_rate": 9.99771103155233e-06, "loss": 0.5064, "step": 5730 }, { "epoch": 0.10888121704161767, "grad_norm": 1.4649299977343713, "learning_rate": 9.997609769955765e-06, "loss": 0.5139, "step": 5740 }, { "epoch": 0.10907090557304905, "grad_norm": 1.6284830403289356, "learning_rate": 9.997506317255852e-06, "loss": 0.5392, "step": 5750 }, { "epoch": 0.10926059410448044, "grad_norm": 1.5137300750321374, "learning_rate": 9.997400673497947e-06, "loss": 0.5165, "step": 5760 }, { "epoch": 0.10945028263591183, "grad_norm": 1.9848778005368246, "learning_rate": 9.997292838728368e-06, "loss": 0.5238, "step": 5770 }, { "epoch": 0.10963997116734323, "grad_norm": 1.807690588615623, "learning_rate": 9.997182812994395e-06, "loss": 0.522, "step": 5780 }, { "epoch": 0.10982965969877462, "grad_norm": 1.6254478041070206, "learning_rate": 9.997070596344264e-06, "loss": 0.5221, "step": 5790 }, { "epoch": 0.110019348230206, "grad_norm": 1.6404179430444694, "learning_rate": 9.996956188827175e-06, "loss": 0.5504, "step": 5800 }, { "epoch": 0.11020903676163739, "grad_norm": 1.5235530753436644, "learning_rate": 9.996839590493289e-06, "loss": 0.5146, "step": 5810 }, { "epoch": 0.11039872529306878, "grad_norm": 1.7173738256391105, "learning_rate": 9.996720801393723e-06, "loss": 0.5099, "step": 5820 }, { "epoch": 0.11058841382450017, "grad_norm": 1.6274862187099586, "learning_rate": 9.99659982158056e-06, "loss": 0.5352, "step": 5830 }, { "epoch": 0.11077810235593157, "grad_norm": 1.5848145359798718, "learning_rate": 9.996476651106841e-06, "loss": 0.507, "step": 5840 }, { "epoch": 0.11096779088736294, "grad_norm": 1.4772684790313972, "learning_rate": 9.996351290026569e-06, "loss": 0.5042, "step": 5850 }, { "epoch": 0.11115747941879434, "grad_norm": 1.6374284450280139, "learning_rate": 9.996223738394704e-06, "loss": 0.5161, "step": 5860 }, { "epoch": 0.11134716795022573, "grad_norm": 1.5433552243738744, "learning_rate": 9.996093996267169e-06, "loss": 0.5354, "step": 5870 }, { "epoch": 0.11153685648165712, "grad_norm": 1.465695438687943, "learning_rate": 9.995962063700848e-06, "loss": 0.5293, "step": 5880 }, { "epoch": 0.11172654501308851, "grad_norm": 1.4251271900769367, "learning_rate": 9.995827940753583e-06, "loss": 0.5359, "step": 5890 }, { "epoch": 0.1119162335445199, "grad_norm": 1.3395942175556312, "learning_rate": 9.99569162748418e-06, "loss": 0.5169, "step": 5900 }, { "epoch": 0.11210592207595128, "grad_norm": 1.52166062871243, "learning_rate": 9.995553123952397e-06, "loss": 0.5013, "step": 5910 }, { "epoch": 0.11229561060738268, "grad_norm": 1.391568450740038, "learning_rate": 9.995412430218963e-06, "loss": 0.5101, "step": 5920 }, { "epoch": 0.11248529913881407, "grad_norm": 1.565956164670117, "learning_rate": 9.995269546345562e-06, "loss": 0.5103, "step": 5930 }, { "epoch": 0.11267498767024546, "grad_norm": 1.8013771441694488, "learning_rate": 9.995124472394838e-06, "loss": 0.5137, "step": 5940 }, { "epoch": 0.11286467620167685, "grad_norm": 1.6316382041523474, "learning_rate": 9.994977208430394e-06, "loss": 0.5226, "step": 5950 }, { "epoch": 0.11305436473310823, "grad_norm": 1.805520407776098, "learning_rate": 9.994827754516799e-06, "loss": 0.5431, "step": 5960 }, { "epoch": 0.11324405326453962, "grad_norm": 1.8364150426415244, "learning_rate": 9.994676110719575e-06, "loss": 0.5232, "step": 5970 }, { "epoch": 0.11343374179597102, "grad_norm": 1.5847008203217392, "learning_rate": 9.994522277105208e-06, "loss": 0.507, "step": 5980 }, { "epoch": 0.11362343032740241, "grad_norm": 1.616209468225034, "learning_rate": 9.994366253741143e-06, "loss": 0.5194, "step": 5990 }, { "epoch": 0.1138131188588338, "grad_norm": 1.628599365180203, "learning_rate": 9.994208040695786e-06, "loss": 0.5221, "step": 6000 }, { "epoch": 0.11400280739026518, "grad_norm": 1.3983106502445637, "learning_rate": 9.994047638038502e-06, "loss": 0.5117, "step": 6010 }, { "epoch": 0.11419249592169657, "grad_norm": 1.4946222589923936, "learning_rate": 9.993885045839617e-06, "loss": 0.5282, "step": 6020 }, { "epoch": 0.11438218445312796, "grad_norm": 1.6091579478887057, "learning_rate": 9.993720264170417e-06, "loss": 0.5565, "step": 6030 }, { "epoch": 0.11457187298455936, "grad_norm": 1.4036162281139457, "learning_rate": 9.993553293103144e-06, "loss": 0.5247, "step": 6040 }, { "epoch": 0.11476156151599075, "grad_norm": 1.8637080700798658, "learning_rate": 9.993384132711006e-06, "loss": 0.4908, "step": 6050 }, { "epoch": 0.11495125004742213, "grad_norm": 1.4917593590328584, "learning_rate": 9.993212783068167e-06, "loss": 0.5392, "step": 6060 }, { "epoch": 0.11514093857885352, "grad_norm": 1.6320079875337712, "learning_rate": 9.993039244249752e-06, "loss": 0.508, "step": 6070 }, { "epoch": 0.11533062711028491, "grad_norm": 1.4781647086945136, "learning_rate": 9.992863516331846e-06, "loss": 0.5321, "step": 6080 }, { "epoch": 0.1155203156417163, "grad_norm": 1.7544504554912264, "learning_rate": 9.992685599391494e-06, "loss": 0.5178, "step": 6090 }, { "epoch": 0.1157100041731477, "grad_norm": 1.4830862934536249, "learning_rate": 9.9925054935067e-06, "loss": 0.4936, "step": 6100 }, { "epoch": 0.11589969270457907, "grad_norm": 2.0200625887282784, "learning_rate": 9.992323198756426e-06, "loss": 0.5259, "step": 6110 }, { "epoch": 0.11608938123601047, "grad_norm": 1.5234415419234377, "learning_rate": 9.992138715220597e-06, "loss": 0.5209, "step": 6120 }, { "epoch": 0.11627906976744186, "grad_norm": 2.0208332754697214, "learning_rate": 9.991952042980096e-06, "loss": 0.5075, "step": 6130 }, { "epoch": 0.11646875829887325, "grad_norm": 1.9570026839863899, "learning_rate": 9.991763182116766e-06, "loss": 0.5363, "step": 6140 }, { "epoch": 0.11665844683030464, "grad_norm": 1.6608348197239358, "learning_rate": 9.99157213271341e-06, "loss": 0.5085, "step": 6150 }, { "epoch": 0.11684813536173604, "grad_norm": 1.6790445077109826, "learning_rate": 9.991378894853787e-06, "loss": 0.5396, "step": 6160 }, { "epoch": 0.11703782389316741, "grad_norm": 1.9248536742478846, "learning_rate": 9.991183468622622e-06, "loss": 0.5665, "step": 6170 }, { "epoch": 0.1172275124245988, "grad_norm": 1.5546846245547543, "learning_rate": 9.990985854105593e-06, "loss": 0.5304, "step": 6180 }, { "epoch": 0.1174172009560302, "grad_norm": 1.5380944470747544, "learning_rate": 9.99078605138934e-06, "loss": 0.5016, "step": 6190 }, { "epoch": 0.11760688948746159, "grad_norm": 2.365716255978723, "learning_rate": 9.990584060561467e-06, "loss": 0.4965, "step": 6200 }, { "epoch": 0.11779657801889298, "grad_norm": 1.470410907949162, "learning_rate": 9.990379881710528e-06, "loss": 0.5033, "step": 6210 }, { "epoch": 0.11798626655032436, "grad_norm": 1.540062850970352, "learning_rate": 9.990173514926042e-06, "loss": 0.512, "step": 6220 }, { "epoch": 0.11817595508175575, "grad_norm": 3.0169824604512896, "learning_rate": 9.989964960298488e-06, "loss": 0.5218, "step": 6230 }, { "epoch": 0.11836564361318715, "grad_norm": 1.2197488930482918, "learning_rate": 9.989754217919303e-06, "loss": 0.5013, "step": 6240 }, { "epoch": 0.11855533214461854, "grad_norm": 1.7933573650090533, "learning_rate": 9.989541287880881e-06, "loss": 0.5323, "step": 6250 }, { "epoch": 0.11874502067604993, "grad_norm": 1.5970020646988103, "learning_rate": 9.989326170276578e-06, "loss": 0.4929, "step": 6260 }, { "epoch": 0.11893470920748131, "grad_norm": 2.172710269236764, "learning_rate": 9.989108865200707e-06, "loss": 0.5208, "step": 6270 }, { "epoch": 0.1191243977389127, "grad_norm": 1.5521294807245771, "learning_rate": 9.988889372748543e-06, "loss": 0.5282, "step": 6280 }, { "epoch": 0.1193140862703441, "grad_norm": 1.6698580408142065, "learning_rate": 9.988667693016316e-06, "loss": 0.5389, "step": 6290 }, { "epoch": 0.11950377480177549, "grad_norm": 1.6636416067724094, "learning_rate": 9.988443826101218e-06, "loss": 0.5136, "step": 6300 }, { "epoch": 0.11969346333320688, "grad_norm": 1.5990858131870536, "learning_rate": 9.988217772101401e-06, "loss": 0.4944, "step": 6310 }, { "epoch": 0.11988315186463826, "grad_norm": 1.7838869885283872, "learning_rate": 9.987989531115972e-06, "loss": 0.5455, "step": 6320 }, { "epoch": 0.12007284039606965, "grad_norm": 1.4204311403689078, "learning_rate": 9.987759103244996e-06, "loss": 0.5079, "step": 6330 }, { "epoch": 0.12026252892750104, "grad_norm": 1.6064836257095236, "learning_rate": 9.987526488589505e-06, "loss": 0.5118, "step": 6340 }, { "epoch": 0.12045221745893243, "grad_norm": 1.7082804477239635, "learning_rate": 9.987291687251482e-06, "loss": 0.5164, "step": 6350 }, { "epoch": 0.12064190599036383, "grad_norm": 1.8072071938489174, "learning_rate": 9.98705469933387e-06, "loss": 0.5134, "step": 6360 }, { "epoch": 0.12083159452179522, "grad_norm": 1.3743795146938442, "learning_rate": 9.986815524940571e-06, "loss": 0.5281, "step": 6370 }, { "epoch": 0.1210212830532266, "grad_norm": 1.3418551297536003, "learning_rate": 9.98657416417645e-06, "loss": 0.535, "step": 6380 }, { "epoch": 0.12121097158465799, "grad_norm": 1.3977688526159346, "learning_rate": 9.986330617147323e-06, "loss": 0.4887, "step": 6390 }, { "epoch": 0.12140066011608938, "grad_norm": 1.5846754729276278, "learning_rate": 9.98608488395997e-06, "loss": 0.5489, "step": 6400 }, { "epoch": 0.12159034864752077, "grad_norm": 1.5240706334884335, "learning_rate": 9.98583696472213e-06, "loss": 0.5034, "step": 6410 }, { "epoch": 0.12178003717895217, "grad_norm": 1.7820548645358176, "learning_rate": 9.985586859542494e-06, "loss": 0.528, "step": 6420 }, { "epoch": 0.12196972571038354, "grad_norm": 1.4990207517128002, "learning_rate": 9.985334568530719e-06, "loss": 0.511, "step": 6430 }, { "epoch": 0.12215941424181494, "grad_norm": 1.6307766109166024, "learning_rate": 9.985080091797415e-06, "loss": 0.5069, "step": 6440 }, { "epoch": 0.12234910277324633, "grad_norm": 2.05017618051032, "learning_rate": 9.984823429454152e-06, "loss": 0.5556, "step": 6450 }, { "epoch": 0.12253879130467772, "grad_norm": 1.610684548393351, "learning_rate": 9.984564581613461e-06, "loss": 0.5147, "step": 6460 }, { "epoch": 0.12272847983610911, "grad_norm": 1.3890636766182398, "learning_rate": 9.984303548388827e-06, "loss": 0.5309, "step": 6470 }, { "epoch": 0.12291816836754049, "grad_norm": 1.6571171512166656, "learning_rate": 9.984040329894697e-06, "loss": 0.5299, "step": 6480 }, { "epoch": 0.12310785689897188, "grad_norm": 1.4947763877352016, "learning_rate": 9.98377492624647e-06, "loss": 0.5223, "step": 6490 }, { "epoch": 0.12329754543040328, "grad_norm": 1.434764178419925, "learning_rate": 9.98350733756051e-06, "loss": 0.5264, "step": 6500 }, { "epoch": 0.12348723396183467, "grad_norm": 1.7137376944482088, "learning_rate": 9.983237563954136e-06, "loss": 0.5347, "step": 6510 }, { "epoch": 0.12367692249326606, "grad_norm": 1.692279596325619, "learning_rate": 9.982965605545623e-06, "loss": 0.5093, "step": 6520 }, { "epoch": 0.12386661102469744, "grad_norm": 1.3293684224380733, "learning_rate": 9.982691462454206e-06, "loss": 0.5258, "step": 6530 }, { "epoch": 0.12405629955612883, "grad_norm": 1.2423976712261153, "learning_rate": 9.982415134800081e-06, "loss": 0.5412, "step": 6540 }, { "epoch": 0.12424598808756022, "grad_norm": 1.6150324635406605, "learning_rate": 9.982136622704393e-06, "loss": 0.5159, "step": 6550 }, { "epoch": 0.12443567661899162, "grad_norm": 1.4341085180769397, "learning_rate": 9.981855926289255e-06, "loss": 0.508, "step": 6560 }, { "epoch": 0.12462536515042301, "grad_norm": 1.6595894720907234, "learning_rate": 9.981573045677731e-06, "loss": 0.5067, "step": 6570 }, { "epoch": 0.1248150536818544, "grad_norm": 1.4614405088604454, "learning_rate": 9.981287980993844e-06, "loss": 0.5156, "step": 6580 }, { "epoch": 0.12500474221328578, "grad_norm": 1.7075821299712357, "learning_rate": 9.981000732362574e-06, "loss": 0.5275, "step": 6590 }, { "epoch": 0.12519443074471717, "grad_norm": 1.533538816372119, "learning_rate": 9.980711299909863e-06, "loss": 0.5248, "step": 6600 }, { "epoch": 0.12538411927614856, "grad_norm": 1.5832914584863325, "learning_rate": 9.980419683762602e-06, "loss": 0.5099, "step": 6610 }, { "epoch": 0.12557380780757996, "grad_norm": 1.6894366169125548, "learning_rate": 9.980125884048647e-06, "loss": 0.4907, "step": 6620 }, { "epoch": 0.12576349633901135, "grad_norm": 1.5369711825892942, "learning_rate": 9.97982990089681e-06, "loss": 0.5429, "step": 6630 }, { "epoch": 0.12595318487044274, "grad_norm": 1.3322412448853984, "learning_rate": 9.979531734436859e-06, "loss": 0.5102, "step": 6640 }, { "epoch": 0.12614287340187413, "grad_norm": 1.4635672136721651, "learning_rate": 9.979231384799517e-06, "loss": 0.5403, "step": 6650 }, { "epoch": 0.12633256193330553, "grad_norm": 1.5230214429811273, "learning_rate": 9.978928852116467e-06, "loss": 0.5205, "step": 6660 }, { "epoch": 0.1265222504647369, "grad_norm": 1.467701226789839, "learning_rate": 9.97862413652035e-06, "loss": 0.5078, "step": 6670 }, { "epoch": 0.12671193899616828, "grad_norm": 1.5669978465763539, "learning_rate": 9.978317238144759e-06, "loss": 0.5081, "step": 6680 }, { "epoch": 0.12690162752759968, "grad_norm": 1.5415561322966398, "learning_rate": 9.978008157124251e-06, "loss": 0.5192, "step": 6690 }, { "epoch": 0.12709131605903107, "grad_norm": 1.7647079853305625, "learning_rate": 9.977696893594333e-06, "loss": 0.5119, "step": 6700 }, { "epoch": 0.12728100459046246, "grad_norm": 1.3677302426299163, "learning_rate": 9.977383447691477e-06, "loss": 0.5083, "step": 6710 }, { "epoch": 0.12747069312189385, "grad_norm": 1.4520070245462517, "learning_rate": 9.977067819553104e-06, "loss": 0.5176, "step": 6720 }, { "epoch": 0.12766038165332524, "grad_norm": 1.6051567623322114, "learning_rate": 9.976750009317597e-06, "loss": 0.5117, "step": 6730 }, { "epoch": 0.12785007018475664, "grad_norm": 1.380819806189477, "learning_rate": 9.976430017124291e-06, "loss": 0.5257, "step": 6740 }, { "epoch": 0.12803975871618803, "grad_norm": 1.445904995779815, "learning_rate": 9.976107843113484e-06, "loss": 0.5296, "step": 6750 }, { "epoch": 0.12822944724761942, "grad_norm": 1.46984952635008, "learning_rate": 9.975783487426421e-06, "loss": 0.52, "step": 6760 }, { "epoch": 0.12841913577905079, "grad_norm": 1.6266593739388455, "learning_rate": 9.975456950205314e-06, "loss": 0.4919, "step": 6770 }, { "epoch": 0.12860882431048218, "grad_norm": 1.6707955246902713, "learning_rate": 9.975128231593325e-06, "loss": 0.526, "step": 6780 }, { "epoch": 0.12879851284191357, "grad_norm": 1.5462104202598892, "learning_rate": 9.974797331734575e-06, "loss": 0.5327, "step": 6790 }, { "epoch": 0.12898820137334496, "grad_norm": 2.1960962460232203, "learning_rate": 9.974464250774144e-06, "loss": 0.4962, "step": 6800 }, { "epoch": 0.12917788990477636, "grad_norm": 1.5439548257036662, "learning_rate": 9.974128988858057e-06, "loss": 0.4894, "step": 6810 }, { "epoch": 0.12936757843620775, "grad_norm": 1.4539262719673907, "learning_rate": 9.973791546133307e-06, "loss": 0.4996, "step": 6820 }, { "epoch": 0.12955726696763914, "grad_norm": 1.418965153028487, "learning_rate": 9.97345192274784e-06, "loss": 0.5107, "step": 6830 }, { "epoch": 0.12974695549907053, "grad_norm": 1.5266008420369255, "learning_rate": 9.973110118850556e-06, "loss": 0.4993, "step": 6840 }, { "epoch": 0.12993664403050192, "grad_norm": 1.556422915666801, "learning_rate": 9.972766134591314e-06, "loss": 0.4936, "step": 6850 }, { "epoch": 0.13012633256193332, "grad_norm": 1.224485276044693, "learning_rate": 9.972419970120924e-06, "loss": 0.5022, "step": 6860 }, { "epoch": 0.1303160210933647, "grad_norm": 1.4208326378882743, "learning_rate": 9.972071625591158e-06, "loss": 0.5134, "step": 6870 }, { "epoch": 0.13050570962479607, "grad_norm": 1.3029416914747363, "learning_rate": 9.971721101154737e-06, "loss": 0.5019, "step": 6880 }, { "epoch": 0.13069539815622747, "grad_norm": 1.996210632527502, "learning_rate": 9.971368396965345e-06, "loss": 0.5152, "step": 6890 }, { "epoch": 0.13088508668765886, "grad_norm": 1.6481209006053261, "learning_rate": 9.971013513177617e-06, "loss": 0.5124, "step": 6900 }, { "epoch": 0.13107477521909025, "grad_norm": 1.4752349215773322, "learning_rate": 9.970656449947145e-06, "loss": 0.5239, "step": 6910 }, { "epoch": 0.13126446375052164, "grad_norm": 1.4789746235202463, "learning_rate": 9.970297207430476e-06, "loss": 0.5222, "step": 6920 }, { "epoch": 0.13145415228195304, "grad_norm": 1.5417989300989754, "learning_rate": 9.969935785785115e-06, "loss": 0.5138, "step": 6930 }, { "epoch": 0.13164384081338443, "grad_norm": 1.3316494886721888, "learning_rate": 9.969572185169515e-06, "loss": 0.5332, "step": 6940 }, { "epoch": 0.13183352934481582, "grad_norm": 1.5885344427044323, "learning_rate": 9.969206405743096e-06, "loss": 0.5287, "step": 6950 }, { "epoch": 0.1320232178762472, "grad_norm": 1.567327148230765, "learning_rate": 9.968838447666219e-06, "loss": 0.5186, "step": 6960 }, { "epoch": 0.1322129064076786, "grad_norm": 1.6281704991889407, "learning_rate": 9.968468311100214e-06, "loss": 0.5264, "step": 6970 }, { "epoch": 0.13240259493910997, "grad_norm": 1.330272356518888, "learning_rate": 9.96809599620736e-06, "loss": 0.5049, "step": 6980 }, { "epoch": 0.13259228347054136, "grad_norm": 1.4485063825189546, "learning_rate": 9.96772150315089e-06, "loss": 0.539, "step": 6990 }, { "epoch": 0.13278197200197275, "grad_norm": 1.451927416315254, "learning_rate": 9.96734483209499e-06, "loss": 0.4937, "step": 7000 }, { "epoch": 0.13297166053340415, "grad_norm": 1.4471234635169947, "learning_rate": 9.966965983204809e-06, "loss": 0.4726, "step": 7010 }, { "epoch": 0.13316134906483554, "grad_norm": 1.5515607075837619, "learning_rate": 9.96658495664644e-06, "loss": 0.5047, "step": 7020 }, { "epoch": 0.13335103759626693, "grad_norm": 1.4697261678752658, "learning_rate": 9.966201752586943e-06, "loss": 0.4954, "step": 7030 }, { "epoch": 0.13354072612769832, "grad_norm": 1.825858251789552, "learning_rate": 9.965816371194323e-06, "loss": 0.5398, "step": 7040 }, { "epoch": 0.13373041465912971, "grad_norm": 1.541159685102043, "learning_rate": 9.965428812637542e-06, "loss": 0.5145, "step": 7050 }, { "epoch": 0.1339201031905611, "grad_norm": 1.3829143726322644, "learning_rate": 9.96503907708652e-06, "loss": 0.5071, "step": 7060 }, { "epoch": 0.1341097917219925, "grad_norm": 1.4034486222094174, "learning_rate": 9.964647164712128e-06, "loss": 0.5432, "step": 7070 }, { "epoch": 0.1342994802534239, "grad_norm": 1.333010333719979, "learning_rate": 9.96425307568619e-06, "loss": 0.522, "step": 7080 }, { "epoch": 0.13448916878485526, "grad_norm": 1.5473893583940708, "learning_rate": 9.963856810181489e-06, "loss": 0.5023, "step": 7090 }, { "epoch": 0.13467885731628665, "grad_norm": 1.7225030262212453, "learning_rate": 9.96345836837176e-06, "loss": 0.5299, "step": 7100 }, { "epoch": 0.13486854584771804, "grad_norm": 1.5192576512262348, "learning_rate": 9.963057750431688e-06, "loss": 0.5292, "step": 7110 }, { "epoch": 0.13505823437914943, "grad_norm": 1.386561694072886, "learning_rate": 9.96265495653692e-06, "loss": 0.503, "step": 7120 }, { "epoch": 0.13524792291058083, "grad_norm": 1.5757623121112072, "learning_rate": 9.962249986864053e-06, "loss": 0.5009, "step": 7130 }, { "epoch": 0.13543761144201222, "grad_norm": 1.7295644604559643, "learning_rate": 9.961842841590637e-06, "loss": 0.4838, "step": 7140 }, { "epoch": 0.1356272999734436, "grad_norm": 1.7414309087408801, "learning_rate": 9.961433520895175e-06, "loss": 0.5399, "step": 7150 }, { "epoch": 0.135816988504875, "grad_norm": 1.5814773929730135, "learning_rate": 9.96102202495713e-06, "loss": 0.5095, "step": 7160 }, { "epoch": 0.1360066770363064, "grad_norm": 1.3219645851187605, "learning_rate": 9.96060835395691e-06, "loss": 0.4698, "step": 7170 }, { "epoch": 0.1361963655677378, "grad_norm": 1.5475666685787208, "learning_rate": 9.960192508075883e-06, "loss": 0.5338, "step": 7180 }, { "epoch": 0.13638605409916915, "grad_norm": 1.4379336870504007, "learning_rate": 9.959774487496368e-06, "loss": 0.5183, "step": 7190 }, { "epoch": 0.13657574263060054, "grad_norm": 1.735203988193066, "learning_rate": 9.959354292401637e-06, "loss": 0.517, "step": 7200 }, { "epoch": 0.13676543116203194, "grad_norm": 1.4945816942534453, "learning_rate": 9.958931922975916e-06, "loss": 0.5206, "step": 7210 }, { "epoch": 0.13695511969346333, "grad_norm": 1.6271586380246825, "learning_rate": 9.958507379404387e-06, "loss": 0.5143, "step": 7220 }, { "epoch": 0.13714480822489472, "grad_norm": 1.4188581374262759, "learning_rate": 9.958080661873182e-06, "loss": 0.5151, "step": 7230 }, { "epoch": 0.1373344967563261, "grad_norm": 1.3920514125944927, "learning_rate": 9.957651770569385e-06, "loss": 0.5009, "step": 7240 }, { "epoch": 0.1375241852877575, "grad_norm": 1.6054247996902573, "learning_rate": 9.957220705681036e-06, "loss": 0.4886, "step": 7250 }, { "epoch": 0.1377138738191889, "grad_norm": 1.4811901261689688, "learning_rate": 9.95678746739713e-06, "loss": 0.5428, "step": 7260 }, { "epoch": 0.1379035623506203, "grad_norm": 1.7198820137396118, "learning_rate": 9.956352055907605e-06, "loss": 0.5175, "step": 7270 }, { "epoch": 0.13809325088205168, "grad_norm": 1.4471098775641134, "learning_rate": 9.955914471403363e-06, "loss": 0.5105, "step": 7280 }, { "epoch": 0.13828293941348305, "grad_norm": 1.7359811521719402, "learning_rate": 9.955474714076254e-06, "loss": 0.5119, "step": 7290 }, { "epoch": 0.13847262794491444, "grad_norm": 1.7495386266077462, "learning_rate": 9.955032784119082e-06, "loss": 0.4943, "step": 7300 }, { "epoch": 0.13866231647634583, "grad_norm": 1.5353687983091866, "learning_rate": 9.9545886817256e-06, "loss": 0.5449, "step": 7310 }, { "epoch": 0.13885200500777722, "grad_norm": 1.7621933183092893, "learning_rate": 9.954142407090518e-06, "loss": 0.5528, "step": 7320 }, { "epoch": 0.13904169353920862, "grad_norm": 1.3700841914084432, "learning_rate": 9.953693960409493e-06, "loss": 0.5062, "step": 7330 }, { "epoch": 0.13923138207064, "grad_norm": 1.5585484383362636, "learning_rate": 9.95324334187914e-06, "loss": 0.4939, "step": 7340 }, { "epoch": 0.1394210706020714, "grad_norm": 1.6188472580959188, "learning_rate": 9.952790551697024e-06, "loss": 0.5076, "step": 7350 }, { "epoch": 0.1396107591335028, "grad_norm": 1.5496199360255882, "learning_rate": 9.952335590061664e-06, "loss": 0.5183, "step": 7360 }, { "epoch": 0.13980044766493419, "grad_norm": 1.5808609212069589, "learning_rate": 9.951878457172523e-06, "loss": 0.5025, "step": 7370 }, { "epoch": 0.13999013619636558, "grad_norm": 1.3974605458705842, "learning_rate": 9.951419153230028e-06, "loss": 0.5293, "step": 7380 }, { "epoch": 0.14017982472779697, "grad_norm": 1.6177625244278102, "learning_rate": 9.95095767843555e-06, "loss": 0.5142, "step": 7390 }, { "epoch": 0.14036951325922833, "grad_norm": 1.5048853782803406, "learning_rate": 9.950494032991409e-06, "loss": 0.5099, "step": 7400 }, { "epoch": 0.14055920179065973, "grad_norm": 1.5311561852239726, "learning_rate": 9.950028217100886e-06, "loss": 0.5209, "step": 7410 }, { "epoch": 0.14074889032209112, "grad_norm": 3.6614849523426685, "learning_rate": 9.94956023096821e-06, "loss": 0.5008, "step": 7420 }, { "epoch": 0.1409385788535225, "grad_norm": 1.5193210434589919, "learning_rate": 9.949090074798556e-06, "loss": 0.5223, "step": 7430 }, { "epoch": 0.1411282673849539, "grad_norm": 1.6846218941674365, "learning_rate": 9.948617748798057e-06, "loss": 0.5209, "step": 7440 }, { "epoch": 0.1413179559163853, "grad_norm": 1.6288828783508045, "learning_rate": 9.948143253173794e-06, "loss": 0.5137, "step": 7450 }, { "epoch": 0.1415076444478167, "grad_norm": 1.5875205652050723, "learning_rate": 9.947666588133802e-06, "loss": 0.513, "step": 7460 }, { "epoch": 0.14169733297924808, "grad_norm": 1.5757926935402913, "learning_rate": 9.947187753887063e-06, "loss": 0.4957, "step": 7470 }, { "epoch": 0.14188702151067947, "grad_norm": 1.3774343524868298, "learning_rate": 9.946706750643514e-06, "loss": 0.473, "step": 7480 }, { "epoch": 0.14207671004211087, "grad_norm": 1.6132645607179967, "learning_rate": 9.946223578614042e-06, "loss": 0.4932, "step": 7490 }, { "epoch": 0.14226639857354223, "grad_norm": 1.5655266454438492, "learning_rate": 9.945738238010482e-06, "loss": 0.5411, "step": 7500 }, { "epoch": 0.14245608710497362, "grad_norm": 1.3441778317860753, "learning_rate": 9.945250729045623e-06, "loss": 0.4925, "step": 7510 }, { "epoch": 0.14264577563640501, "grad_norm": 1.6256769657627475, "learning_rate": 9.944761051933206e-06, "loss": 0.5214, "step": 7520 }, { "epoch": 0.1428354641678364, "grad_norm": 1.5130725399640397, "learning_rate": 9.944269206887917e-06, "loss": 0.5006, "step": 7530 }, { "epoch": 0.1430251526992678, "grad_norm": 1.5137974087154094, "learning_rate": 9.943775194125397e-06, "loss": 0.5272, "step": 7540 }, { "epoch": 0.1432148412306992, "grad_norm": 5.387398992984719, "learning_rate": 9.943279013862237e-06, "loss": 0.4998, "step": 7550 }, { "epoch": 0.14340452976213058, "grad_norm": 1.4258488385879986, "learning_rate": 9.942780666315974e-06, "loss": 0.4955, "step": 7560 }, { "epoch": 0.14359421829356198, "grad_norm": 2.7672140645958514, "learning_rate": 9.942280151705103e-06, "loss": 0.5076, "step": 7570 }, { "epoch": 0.14378390682499337, "grad_norm": 1.4758285220341567, "learning_rate": 9.941777470249062e-06, "loss": 0.5187, "step": 7580 }, { "epoch": 0.14397359535642476, "grad_norm": 1.3076148715406901, "learning_rate": 9.941272622168245e-06, "loss": 0.4964, "step": 7590 }, { "epoch": 0.14416328388785615, "grad_norm": 1.6219209744024685, "learning_rate": 9.940765607683988e-06, "loss": 0.5264, "step": 7600 }, { "epoch": 0.14435297241928752, "grad_norm": 1.462649030574997, "learning_rate": 9.940256427018587e-06, "loss": 0.5204, "step": 7610 }, { "epoch": 0.1445426609507189, "grad_norm": 1.613022668436343, "learning_rate": 9.939745080395277e-06, "loss": 0.5333, "step": 7620 }, { "epoch": 0.1447323494821503, "grad_norm": 2.661839606841312, "learning_rate": 9.93923156803825e-06, "loss": 0.516, "step": 7630 }, { "epoch": 0.1449220380135817, "grad_norm": 1.4991089866730234, "learning_rate": 9.938715890172645e-06, "loss": 0.5231, "step": 7640 }, { "epoch": 0.1451117265450131, "grad_norm": 1.465873155104159, "learning_rate": 9.93819804702455e-06, "loss": 0.518, "step": 7650 }, { "epoch": 0.14530141507644448, "grad_norm": 1.4558185181960652, "learning_rate": 9.937678038821005e-06, "loss": 0.5329, "step": 7660 }, { "epoch": 0.14549110360787587, "grad_norm": 1.5473948617480637, "learning_rate": 9.937155865789998e-06, "loss": 0.525, "step": 7670 }, { "epoch": 0.14568079213930726, "grad_norm": 1.6711318515606082, "learning_rate": 9.936631528160463e-06, "loss": 0.4932, "step": 7680 }, { "epoch": 0.14587048067073866, "grad_norm": 1.5932290159808948, "learning_rate": 9.936105026162286e-06, "loss": 0.5303, "step": 7690 }, { "epoch": 0.14606016920217005, "grad_norm": 1.6348951307964783, "learning_rate": 9.935576360026302e-06, "loss": 0.5181, "step": 7700 }, { "epoch": 0.1462498577336014, "grad_norm": 1.3980653345645864, "learning_rate": 9.935045529984296e-06, "loss": 0.5169, "step": 7710 }, { "epoch": 0.1464395462650328, "grad_norm": 1.3207318517427538, "learning_rate": 9.934512536268996e-06, "loss": 0.5276, "step": 7720 }, { "epoch": 0.1466292347964642, "grad_norm": 1.6703681327709965, "learning_rate": 9.933977379114084e-06, "loss": 0.5272, "step": 7730 }, { "epoch": 0.1468189233278956, "grad_norm": 1.6114396151784875, "learning_rate": 9.933440058754191e-06, "loss": 0.5124, "step": 7740 }, { "epoch": 0.14700861185932698, "grad_norm": 1.684442757506366, "learning_rate": 9.932900575424893e-06, "loss": 0.511, "step": 7750 }, { "epoch": 0.14719830039075837, "grad_norm": 1.5395296192132952, "learning_rate": 9.932358929362715e-06, "loss": 0.5147, "step": 7760 }, { "epoch": 0.14738798892218977, "grad_norm": 1.3103865484979187, "learning_rate": 9.931815120805135e-06, "loss": 0.4978, "step": 7770 }, { "epoch": 0.14757767745362116, "grad_norm": 1.604875247821696, "learning_rate": 9.93126914999057e-06, "loss": 0.5115, "step": 7780 }, { "epoch": 0.14776736598505255, "grad_norm": 1.5258810938535667, "learning_rate": 9.930721017158391e-06, "loss": 0.5067, "step": 7790 }, { "epoch": 0.14795705451648394, "grad_norm": 1.7365290568067384, "learning_rate": 9.93017072254892e-06, "loss": 0.5479, "step": 7800 }, { "epoch": 0.14814674304791534, "grad_norm": 1.314815456406119, "learning_rate": 9.929618266403419e-06, "loss": 0.5373, "step": 7810 }, { "epoch": 0.1483364315793467, "grad_norm": 1.4921619043857888, "learning_rate": 9.929063648964101e-06, "loss": 0.5197, "step": 7820 }, { "epoch": 0.1485261201107781, "grad_norm": 1.9846801608662963, "learning_rate": 9.92850687047413e-06, "loss": 0.5168, "step": 7830 }, { "epoch": 0.14871580864220948, "grad_norm": 1.3351987776110472, "learning_rate": 9.927947931177614e-06, "loss": 0.5203, "step": 7840 }, { "epoch": 0.14890549717364088, "grad_norm": 1.5397439134519921, "learning_rate": 9.927386831319608e-06, "loss": 0.5163, "step": 7850 }, { "epoch": 0.14909518570507227, "grad_norm": 1.3663152068295255, "learning_rate": 9.926823571146113e-06, "loss": 0.494, "step": 7860 }, { "epoch": 0.14928487423650366, "grad_norm": 1.6541950631988074, "learning_rate": 9.926258150904085e-06, "loss": 0.5092, "step": 7870 }, { "epoch": 0.14947456276793505, "grad_norm": 1.6589883078353098, "learning_rate": 9.925690570841415e-06, "loss": 0.5299, "step": 7880 }, { "epoch": 0.14966425129936645, "grad_norm": 1.2559099622910366, "learning_rate": 9.925120831206952e-06, "loss": 0.5094, "step": 7890 }, { "epoch": 0.14985393983079784, "grad_norm": 1.4789992254034852, "learning_rate": 9.924548932250483e-06, "loss": 0.5262, "step": 7900 }, { "epoch": 0.15004362836222923, "grad_norm": 1.33787411955403, "learning_rate": 9.923974874222751e-06, "loss": 0.4816, "step": 7910 }, { "epoch": 0.1502333168936606, "grad_norm": 1.677315281629099, "learning_rate": 9.923398657375437e-06, "loss": 0.532, "step": 7920 }, { "epoch": 0.150423005425092, "grad_norm": 1.4970387524131392, "learning_rate": 9.922820281961172e-06, "loss": 0.4966, "step": 7930 }, { "epoch": 0.15061269395652338, "grad_norm": 1.3726204429608875, "learning_rate": 9.922239748233533e-06, "loss": 0.486, "step": 7940 }, { "epoch": 0.15080238248795477, "grad_norm": 1.5299004443390263, "learning_rate": 9.921657056447046e-06, "loss": 0.5295, "step": 7950 }, { "epoch": 0.15099207101938616, "grad_norm": 1.2898279384039357, "learning_rate": 9.921072206857176e-06, "loss": 0.4804, "step": 7960 }, { "epoch": 0.15118175955081756, "grad_norm": 1.6605522870875131, "learning_rate": 9.920485199720345e-06, "loss": 0.4954, "step": 7970 }, { "epoch": 0.15137144808224895, "grad_norm": 1.4559694458548933, "learning_rate": 9.919896035293908e-06, "loss": 0.4873, "step": 7980 }, { "epoch": 0.15156113661368034, "grad_norm": 1.483239539633467, "learning_rate": 9.919304713836179e-06, "loss": 0.5294, "step": 7990 }, { "epoch": 0.15175082514511173, "grad_norm": 1.6590361335569104, "learning_rate": 9.918711235606406e-06, "loss": 0.4956, "step": 8000 }, { "epoch": 0.15194051367654313, "grad_norm": 1.6648647774954077, "learning_rate": 9.918115600864792e-06, "loss": 0.5172, "step": 8010 }, { "epoch": 0.15213020220797452, "grad_norm": 1.4428895151286714, "learning_rate": 9.917517809872478e-06, "loss": 0.4994, "step": 8020 }, { "epoch": 0.15231989073940588, "grad_norm": 1.5522814300094956, "learning_rate": 9.916917862891555e-06, "loss": 0.4739, "step": 8030 }, { "epoch": 0.15250957927083728, "grad_norm": 1.4808702514357444, "learning_rate": 9.916315760185058e-06, "loss": 0.4852, "step": 8040 }, { "epoch": 0.15269926780226867, "grad_norm": 1.3574185711711166, "learning_rate": 9.915711502016967e-06, "loss": 0.525, "step": 8050 }, { "epoch": 0.15288895633370006, "grad_norm": 1.7752012903067156, "learning_rate": 9.915105088652206e-06, "loss": 0.5073, "step": 8060 }, { "epoch": 0.15307864486513145, "grad_norm": 1.8325226442304616, "learning_rate": 9.914496520356646e-06, "loss": 0.516, "step": 8070 }, { "epoch": 0.15326833339656284, "grad_norm": 1.5441504789425684, "learning_rate": 9.913885797397102e-06, "loss": 0.5168, "step": 8080 }, { "epoch": 0.15345802192799424, "grad_norm": 1.4138470123571678, "learning_rate": 9.913272920041335e-06, "loss": 0.5072, "step": 8090 }, { "epoch": 0.15364771045942563, "grad_norm": 1.4142398654361206, "learning_rate": 9.912657888558044e-06, "loss": 0.4873, "step": 8100 }, { "epoch": 0.15383739899085702, "grad_norm": 1.4397640361109627, "learning_rate": 9.912040703216882e-06, "loss": 0.5092, "step": 8110 }, { "epoch": 0.15402708752228841, "grad_norm": 1.6663677505089247, "learning_rate": 9.91142136428844e-06, "loss": 0.4742, "step": 8120 }, { "epoch": 0.15421677605371978, "grad_norm": 1.5434008130747745, "learning_rate": 9.910799872044255e-06, "loss": 0.5126, "step": 8130 }, { "epoch": 0.15440646458515117, "grad_norm": 1.583159235502757, "learning_rate": 9.91017622675681e-06, "loss": 0.5076, "step": 8140 }, { "epoch": 0.15459615311658256, "grad_norm": 1.6305856952489484, "learning_rate": 9.909550428699527e-06, "loss": 0.505, "step": 8150 }, { "epoch": 0.15478584164801396, "grad_norm": 1.334558502898081, "learning_rate": 9.908922478146774e-06, "loss": 0.4884, "step": 8160 }, { "epoch": 0.15497553017944535, "grad_norm": 1.5054146762566842, "learning_rate": 9.908292375373866e-06, "loss": 0.5225, "step": 8170 }, { "epoch": 0.15516521871087674, "grad_norm": 1.8841299153749915, "learning_rate": 9.907660120657059e-06, "loss": 0.493, "step": 8180 }, { "epoch": 0.15535490724230813, "grad_norm": 1.4298968432755634, "learning_rate": 9.907025714273552e-06, "loss": 0.5209, "step": 8190 }, { "epoch": 0.15554459577373952, "grad_norm": 1.5719335995834047, "learning_rate": 9.90638915650149e-06, "loss": 0.5006, "step": 8200 }, { "epoch": 0.15573428430517092, "grad_norm": 1.8930600405168347, "learning_rate": 9.905750447619956e-06, "loss": 0.5125, "step": 8210 }, { "epoch": 0.1559239728366023, "grad_norm": 1.1977154900765499, "learning_rate": 9.90510958790898e-06, "loss": 0.4577, "step": 8220 }, { "epoch": 0.1561136613680337, "grad_norm": 1.3913711868408811, "learning_rate": 9.904466577649534e-06, "loss": 0.5057, "step": 8230 }, { "epoch": 0.15630334989946507, "grad_norm": 1.5342202309707158, "learning_rate": 9.903821417123536e-06, "loss": 0.505, "step": 8240 }, { "epoch": 0.15649303843089646, "grad_norm": 1.708865773376042, "learning_rate": 9.903174106613842e-06, "loss": 0.5498, "step": 8250 }, { "epoch": 0.15668272696232785, "grad_norm": 1.306685969472039, "learning_rate": 9.902524646404252e-06, "loss": 0.4834, "step": 8260 }, { "epoch": 0.15687241549375924, "grad_norm": 1.3460276048673028, "learning_rate": 9.90187303677951e-06, "loss": 0.5146, "step": 8270 }, { "epoch": 0.15706210402519064, "grad_norm": 1.6080151717375188, "learning_rate": 9.901219278025298e-06, "loss": 0.4903, "step": 8280 }, { "epoch": 0.15725179255662203, "grad_norm": 1.9091829463378778, "learning_rate": 9.900563370428248e-06, "loss": 0.4954, "step": 8290 }, { "epoch": 0.15744148108805342, "grad_norm": 1.5326201345611055, "learning_rate": 9.89990531427593e-06, "loss": 0.4992, "step": 8300 }, { "epoch": 0.1576311696194848, "grad_norm": 1.5081933493703006, "learning_rate": 9.899245109856853e-06, "loss": 0.5038, "step": 8310 }, { "epoch": 0.1578208581509162, "grad_norm": 1.3939707857536332, "learning_rate": 9.898582757460471e-06, "loss": 0.5055, "step": 8320 }, { "epoch": 0.1580105466823476, "grad_norm": 1.4859383522590421, "learning_rate": 9.89791825737718e-06, "loss": 0.543, "step": 8330 }, { "epoch": 0.15820023521377896, "grad_norm": 1.3164672084468998, "learning_rate": 9.897251609898317e-06, "loss": 0.5076, "step": 8340 }, { "epoch": 0.15838992374521035, "grad_norm": 1.5624007779667761, "learning_rate": 9.89658281531616e-06, "loss": 0.4974, "step": 8350 }, { "epoch": 0.15857961227664175, "grad_norm": 1.5423637939566388, "learning_rate": 9.895911873923929e-06, "loss": 0.5208, "step": 8360 }, { "epoch": 0.15876930080807314, "grad_norm": 1.6794650328565859, "learning_rate": 9.895238786015786e-06, "loss": 0.5138, "step": 8370 }, { "epoch": 0.15895898933950453, "grad_norm": 1.7725545550790054, "learning_rate": 9.894563551886831e-06, "loss": 0.5084, "step": 8380 }, { "epoch": 0.15914867787093592, "grad_norm": 1.3334026520530713, "learning_rate": 9.893886171833107e-06, "loss": 0.5333, "step": 8390 }, { "epoch": 0.15933836640236732, "grad_norm": 1.4767018304208195, "learning_rate": 9.893206646151601e-06, "loss": 0.5082, "step": 8400 }, { "epoch": 0.1595280549337987, "grad_norm": 1.600546339954834, "learning_rate": 9.892524975140236e-06, "loss": 0.5153, "step": 8410 }, { "epoch": 0.1597177434652301, "grad_norm": 1.5949286709460158, "learning_rate": 9.891841159097876e-06, "loss": 0.5315, "step": 8420 }, { "epoch": 0.1599074319966615, "grad_norm": 1.3965448336157817, "learning_rate": 9.891155198324328e-06, "loss": 0.5047, "step": 8430 }, { "epoch": 0.16009712052809288, "grad_norm": 1.3684175423385783, "learning_rate": 9.890467093120337e-06, "loss": 0.5117, "step": 8440 }, { "epoch": 0.16028680905952425, "grad_norm": 1.439821523888261, "learning_rate": 9.88977684378759e-06, "loss": 0.5226, "step": 8450 }, { "epoch": 0.16047649759095564, "grad_norm": 1.44462603639424, "learning_rate": 9.889084450628712e-06, "loss": 0.5035, "step": 8460 }, { "epoch": 0.16066618612238703, "grad_norm": 1.4050027939435623, "learning_rate": 9.888389913947271e-06, "loss": 0.5176, "step": 8470 }, { "epoch": 0.16085587465381843, "grad_norm": 1.5022550873951517, "learning_rate": 9.887693234047771e-06, "loss": 0.5208, "step": 8480 }, { "epoch": 0.16104556318524982, "grad_norm": 1.278573836663318, "learning_rate": 9.88699441123566e-06, "loss": 0.489, "step": 8490 }, { "epoch": 0.1612352517166812, "grad_norm": 1.6104007620217686, "learning_rate": 9.886293445817321e-06, "loss": 0.5233, "step": 8500 }, { "epoch": 0.1614249402481126, "grad_norm": 1.500389760843655, "learning_rate": 9.885590338100077e-06, "loss": 0.4797, "step": 8510 }, { "epoch": 0.161614628779544, "grad_norm": 1.5448463775319723, "learning_rate": 9.884885088392196e-06, "loss": 0.4974, "step": 8520 }, { "epoch": 0.1618043173109754, "grad_norm": 1.4718071880566428, "learning_rate": 9.884177697002878e-06, "loss": 0.5191, "step": 8530 }, { "epoch": 0.16199400584240678, "grad_norm": 1.8865463936431064, "learning_rate": 9.883468164242265e-06, "loss": 0.516, "step": 8540 }, { "epoch": 0.16218369437383814, "grad_norm": 1.7864037349854118, "learning_rate": 9.882756490421437e-06, "loss": 0.5076, "step": 8550 }, { "epoch": 0.16237338290526954, "grad_norm": 1.469845609031576, "learning_rate": 9.882042675852416e-06, "loss": 0.5148, "step": 8560 }, { "epoch": 0.16256307143670093, "grad_norm": 1.4303787468736033, "learning_rate": 9.881326720848156e-06, "loss": 0.4834, "step": 8570 }, { "epoch": 0.16275275996813232, "grad_norm": 1.4600928740173587, "learning_rate": 9.880608625722556e-06, "loss": 0.5118, "step": 8580 }, { "epoch": 0.1629424484995637, "grad_norm": 1.3564295835423592, "learning_rate": 9.879888390790452e-06, "loss": 0.4852, "step": 8590 }, { "epoch": 0.1631321370309951, "grad_norm": 1.5377872273830617, "learning_rate": 9.879166016367615e-06, "loss": 0.5264, "step": 8600 }, { "epoch": 0.1633218255624265, "grad_norm": 1.414588104457917, "learning_rate": 9.878441502770752e-06, "loss": 0.4647, "step": 8610 }, { "epoch": 0.1635115140938579, "grad_norm": 1.6477807917993212, "learning_rate": 9.87771485031752e-06, "loss": 0.4988, "step": 8620 }, { "epoch": 0.16370120262528928, "grad_norm": 1.5698635025510512, "learning_rate": 9.876986059326496e-06, "loss": 0.5202, "step": 8630 }, { "epoch": 0.16389089115672067, "grad_norm": 1.735657165355934, "learning_rate": 9.876255130117211e-06, "loss": 0.4949, "step": 8640 }, { "epoch": 0.16408057968815207, "grad_norm": 1.6144833542188215, "learning_rate": 9.875522063010126e-06, "loss": 0.5467, "step": 8650 }, { "epoch": 0.16427026821958343, "grad_norm": 1.5600264195171196, "learning_rate": 9.874786858326637e-06, "loss": 0.491, "step": 8660 }, { "epoch": 0.16445995675101482, "grad_norm": 1.4707936053751332, "learning_rate": 9.874049516389079e-06, "loss": 0.5332, "step": 8670 }, { "epoch": 0.16464964528244622, "grad_norm": 1.6358857399941136, "learning_rate": 9.873310037520729e-06, "loss": 0.4815, "step": 8680 }, { "epoch": 0.1648393338138776, "grad_norm": 1.5500221404871861, "learning_rate": 9.872568422045796e-06, "loss": 0.5106, "step": 8690 }, { "epoch": 0.165029022345309, "grad_norm": 1.5013087280704753, "learning_rate": 9.871824670289424e-06, "loss": 0.5085, "step": 8700 }, { "epoch": 0.1652187108767404, "grad_norm": 1.7393800588510042, "learning_rate": 9.871078782577698e-06, "loss": 0.5048, "step": 8710 }, { "epoch": 0.16540839940817179, "grad_norm": 1.3809317026575296, "learning_rate": 9.870330759237638e-06, "loss": 0.5318, "step": 8720 }, { "epoch": 0.16559808793960318, "grad_norm": 1.5340015341111146, "learning_rate": 9.8695806005972e-06, "loss": 0.4992, "step": 8730 }, { "epoch": 0.16578777647103457, "grad_norm": 1.6985987557830182, "learning_rate": 9.868828306985277e-06, "loss": 0.4956, "step": 8740 }, { "epoch": 0.16597746500246596, "grad_norm": 1.6009186667921318, "learning_rate": 9.868073878731694e-06, "loss": 0.5344, "step": 8750 }, { "epoch": 0.16616715353389733, "grad_norm": 1.3732474211678312, "learning_rate": 9.86731731616722e-06, "loss": 0.5042, "step": 8760 }, { "epoch": 0.16635684206532872, "grad_norm": 1.1377996958963965, "learning_rate": 9.866558619623549e-06, "loss": 0.4872, "step": 8770 }, { "epoch": 0.1665465305967601, "grad_norm": 1.5713873442959438, "learning_rate": 9.865797789433324e-06, "loss": 0.4882, "step": 8780 }, { "epoch": 0.1667362191281915, "grad_norm": 1.7092374940328063, "learning_rate": 9.865034825930108e-06, "loss": 0.5077, "step": 8790 }, { "epoch": 0.1669259076596229, "grad_norm": 1.5836394311462092, "learning_rate": 9.864269729448415e-06, "loss": 0.492, "step": 8800 }, { "epoch": 0.1671155961910543, "grad_norm": 1.693117735238425, "learning_rate": 9.86350250032368e-06, "loss": 0.526, "step": 8810 }, { "epoch": 0.16730528472248568, "grad_norm": 1.260266532986945, "learning_rate": 9.862733138892282e-06, "loss": 0.5159, "step": 8820 }, { "epoch": 0.16749497325391707, "grad_norm": 1.551683990414855, "learning_rate": 9.861961645491534e-06, "loss": 0.5283, "step": 8830 }, { "epoch": 0.16768466178534847, "grad_norm": 1.718143844239898, "learning_rate": 9.86118802045968e-06, "loss": 0.5004, "step": 8840 }, { "epoch": 0.16787435031677986, "grad_norm": 1.6448092479385952, "learning_rate": 9.8604122641359e-06, "loss": 0.5297, "step": 8850 }, { "epoch": 0.16806403884821125, "grad_norm": 1.53661729981769, "learning_rate": 9.859634376860313e-06, "loss": 0.547, "step": 8860 }, { "epoch": 0.16825372737964261, "grad_norm": 1.4960938572485325, "learning_rate": 9.858854358973961e-06, "loss": 0.5156, "step": 8870 }, { "epoch": 0.168443415911074, "grad_norm": 1.5568135837715293, "learning_rate": 9.858072210818835e-06, "loss": 0.4926, "step": 8880 }, { "epoch": 0.1686331044425054, "grad_norm": 1.5840442948515152, "learning_rate": 9.857287932737849e-06, "loss": 0.5074, "step": 8890 }, { "epoch": 0.1688227929739368, "grad_norm": 1.416835869572044, "learning_rate": 9.856501525074852e-06, "loss": 0.5084, "step": 8900 }, { "epoch": 0.16901248150536818, "grad_norm": 1.7580425927769716, "learning_rate": 9.855712988174636e-06, "loss": 0.5102, "step": 8910 }, { "epoch": 0.16920217003679958, "grad_norm": 1.2855428247801621, "learning_rate": 9.854922322382911e-06, "loss": 0.5232, "step": 8920 }, { "epoch": 0.16939185856823097, "grad_norm": 1.4068408511265202, "learning_rate": 9.854129528046335e-06, "loss": 0.5288, "step": 8930 }, { "epoch": 0.16958154709966236, "grad_norm": 1.3723992896310133, "learning_rate": 9.85333460551249e-06, "loss": 0.496, "step": 8940 }, { "epoch": 0.16977123563109375, "grad_norm": 1.625387772257246, "learning_rate": 9.852537555129895e-06, "loss": 0.5007, "step": 8950 }, { "epoch": 0.16996092416252515, "grad_norm": 1.5449592177498312, "learning_rate": 9.851738377248002e-06, "loss": 0.4894, "step": 8960 }, { "epoch": 0.1701506126939565, "grad_norm": 1.5328834151367674, "learning_rate": 9.850937072217191e-06, "loss": 0.5334, "step": 8970 }, { "epoch": 0.1703403012253879, "grad_norm": 1.3570028969456709, "learning_rate": 9.850133640388783e-06, "loss": 0.5107, "step": 8980 }, { "epoch": 0.1705299897568193, "grad_norm": 1.5733526381641143, "learning_rate": 9.849328082115024e-06, "loss": 0.5043, "step": 8990 }, { "epoch": 0.1707196782882507, "grad_norm": 1.608688821627803, "learning_rate": 9.848520397749096e-06, "loss": 0.5077, "step": 9000 }, { "epoch": 0.17090936681968208, "grad_norm": 1.4514527411560216, "learning_rate": 9.847710587645112e-06, "loss": 0.491, "step": 9010 }, { "epoch": 0.17109905535111347, "grad_norm": 1.5908819229197753, "learning_rate": 9.846898652158118e-06, "loss": 0.4883, "step": 9020 }, { "epoch": 0.17128874388254486, "grad_norm": 1.6185426476500542, "learning_rate": 9.84608459164409e-06, "loss": 0.496, "step": 9030 }, { "epoch": 0.17147843241397626, "grad_norm": 1.6198328909490285, "learning_rate": 9.845268406459938e-06, "loss": 0.5006, "step": 9040 }, { "epoch": 0.17166812094540765, "grad_norm": 1.4451224546374493, "learning_rate": 9.844450096963502e-06, "loss": 0.5086, "step": 9050 }, { "epoch": 0.17185780947683904, "grad_norm": 1.2765230878242237, "learning_rate": 9.843629663513552e-06, "loss": 0.4803, "step": 9060 }, { "epoch": 0.17204749800827043, "grad_norm": 1.3849228417882784, "learning_rate": 9.842807106469793e-06, "loss": 0.5247, "step": 9070 }, { "epoch": 0.1722371865397018, "grad_norm": 1.639731571419582, "learning_rate": 9.841982426192858e-06, "loss": 0.5153, "step": 9080 }, { "epoch": 0.1724268750711332, "grad_norm": 1.6420544525421066, "learning_rate": 9.84115562304431e-06, "loss": 0.4964, "step": 9090 }, { "epoch": 0.17261656360256458, "grad_norm": 1.6882773395789195, "learning_rate": 9.840326697386648e-06, "loss": 0.5215, "step": 9100 }, { "epoch": 0.17280625213399597, "grad_norm": 1.5850044728073283, "learning_rate": 9.839495649583297e-06, "loss": 0.5039, "step": 9110 }, { "epoch": 0.17299594066542737, "grad_norm": 1.7654405078202373, "learning_rate": 9.838662479998611e-06, "loss": 0.5116, "step": 9120 }, { "epoch": 0.17318562919685876, "grad_norm": 1.568028212447477, "learning_rate": 9.83782718899788e-06, "loss": 0.5309, "step": 9130 }, { "epoch": 0.17337531772829015, "grad_norm": 1.2977176633239151, "learning_rate": 9.83698977694732e-06, "loss": 0.5081, "step": 9140 }, { "epoch": 0.17356500625972154, "grad_norm": 1.5468512652533806, "learning_rate": 9.836150244214075e-06, "loss": 0.5084, "step": 9150 }, { "epoch": 0.17375469479115294, "grad_norm": 1.5526760283803416, "learning_rate": 9.835308591166226e-06, "loss": 0.5444, "step": 9160 }, { "epoch": 0.17394438332258433, "grad_norm": 2.280247127082352, "learning_rate": 9.834464818172775e-06, "loss": 0.5007, "step": 9170 }, { "epoch": 0.1741340718540157, "grad_norm": 1.6239509542356847, "learning_rate": 9.833618925603662e-06, "loss": 0.5113, "step": 9180 }, { "epoch": 0.17432376038544709, "grad_norm": 2.064013603736334, "learning_rate": 9.832770913829748e-06, "loss": 0.5018, "step": 9190 }, { "epoch": 0.17451344891687848, "grad_norm": 1.5165896460559547, "learning_rate": 9.83192078322283e-06, "loss": 0.5188, "step": 9200 }, { "epoch": 0.17470313744830987, "grad_norm": 1.6247336308664517, "learning_rate": 9.831068534155628e-06, "loss": 0.4848, "step": 9210 }, { "epoch": 0.17489282597974126, "grad_norm": 1.2541427500338695, "learning_rate": 9.830214167001795e-06, "loss": 0.5001, "step": 9220 }, { "epoch": 0.17508251451117265, "grad_norm": 1.8199309434666817, "learning_rate": 9.829357682135912e-06, "loss": 0.5037, "step": 9230 }, { "epoch": 0.17527220304260405, "grad_norm": 1.5772079342183156, "learning_rate": 9.82849907993349e-06, "loss": 0.5382, "step": 9240 }, { "epoch": 0.17546189157403544, "grad_norm": 1.5231242034162205, "learning_rate": 9.82763836077096e-06, "loss": 0.5152, "step": 9250 }, { "epoch": 0.17565158010546683, "grad_norm": 1.6514964297128436, "learning_rate": 9.826775525025692e-06, "loss": 0.4994, "step": 9260 }, { "epoch": 0.17584126863689822, "grad_norm": 2.0652898384677676, "learning_rate": 9.82591057307598e-06, "loss": 0.4909, "step": 9270 }, { "epoch": 0.17603095716832962, "grad_norm": 1.4732745826329388, "learning_rate": 9.825043505301041e-06, "loss": 0.4929, "step": 9280 }, { "epoch": 0.17622064569976098, "grad_norm": 1.3692234996977102, "learning_rate": 9.82417432208103e-06, "loss": 0.5001, "step": 9290 }, { "epoch": 0.17641033423119237, "grad_norm": 1.5721384170593315, "learning_rate": 9.823303023797017e-06, "loss": 0.4767, "step": 9300 }, { "epoch": 0.17660002276262377, "grad_norm": 1.7844342988264157, "learning_rate": 9.822429610831009e-06, "loss": 0.5098, "step": 9310 }, { "epoch": 0.17678971129405516, "grad_norm": 1.4795675475468533, "learning_rate": 9.821554083565935e-06, "loss": 0.5215, "step": 9320 }, { "epoch": 0.17697939982548655, "grad_norm": 1.5684031261688, "learning_rate": 9.820676442385652e-06, "loss": 0.4915, "step": 9330 }, { "epoch": 0.17716908835691794, "grad_norm": 1.4035570166335571, "learning_rate": 9.819796687674948e-06, "loss": 0.4821, "step": 9340 }, { "epoch": 0.17735877688834933, "grad_norm": 1.5904839451415813, "learning_rate": 9.818914819819532e-06, "loss": 0.4954, "step": 9350 }, { "epoch": 0.17754846541978073, "grad_norm": 1.4008194168795514, "learning_rate": 9.81803083920604e-06, "loss": 0.4999, "step": 9360 }, { "epoch": 0.17773815395121212, "grad_norm": 1.7018828361337273, "learning_rate": 9.817144746222039e-06, "loss": 0.5007, "step": 9370 }, { "epoch": 0.1779278424826435, "grad_norm": 1.5532308339659822, "learning_rate": 9.816256541256016e-06, "loss": 0.5245, "step": 9380 }, { "epoch": 0.17811753101407488, "grad_norm": 1.3590247698143836, "learning_rate": 9.815366224697386e-06, "loss": 0.5131, "step": 9390 }, { "epoch": 0.17830721954550627, "grad_norm": 1.469071849591338, "learning_rate": 9.814473796936497e-06, "loss": 0.5248, "step": 9400 }, { "epoch": 0.17849690807693766, "grad_norm": 1.7637188122921383, "learning_rate": 9.81357925836461e-06, "loss": 0.504, "step": 9410 }, { "epoch": 0.17868659660836905, "grad_norm": 1.7826824796241811, "learning_rate": 9.812682609373921e-06, "loss": 0.5013, "step": 9420 }, { "epoch": 0.17887628513980044, "grad_norm": 1.500966055101482, "learning_rate": 9.811783850357546e-06, "loss": 0.492, "step": 9430 }, { "epoch": 0.17906597367123184, "grad_norm": 1.3383567025030398, "learning_rate": 9.81088298170953e-06, "loss": 0.5197, "step": 9440 }, { "epoch": 0.17925566220266323, "grad_norm": 1.569628953720144, "learning_rate": 9.80998000382484e-06, "loss": 0.5165, "step": 9450 }, { "epoch": 0.17944535073409462, "grad_norm": 1.6917043433296952, "learning_rate": 9.809074917099368e-06, "loss": 0.5139, "step": 9460 }, { "epoch": 0.17963503926552601, "grad_norm": 1.1048356339368384, "learning_rate": 9.808167721929933e-06, "loss": 0.49, "step": 9470 }, { "epoch": 0.1798247277969574, "grad_norm": 1.4887711470185754, "learning_rate": 9.807258418714278e-06, "loss": 0.4952, "step": 9480 }, { "epoch": 0.1800144163283888, "grad_norm": 1.367801417262415, "learning_rate": 9.806347007851065e-06, "loss": 0.4921, "step": 9490 }, { "epoch": 0.18020410485982016, "grad_norm": 1.5632887438933232, "learning_rate": 9.80543348973989e-06, "loss": 0.5055, "step": 9500 }, { "epoch": 0.18039379339125156, "grad_norm": 1.5251879110302138, "learning_rate": 9.80451786478126e-06, "loss": 0.52, "step": 9510 }, { "epoch": 0.18058348192268295, "grad_norm": 1.717315340976967, "learning_rate": 9.803600133376617e-06, "loss": 0.5128, "step": 9520 }, { "epoch": 0.18077317045411434, "grad_norm": 1.5382346644098805, "learning_rate": 9.802680295928321e-06, "loss": 0.4962, "step": 9530 }, { "epoch": 0.18096285898554573, "grad_norm": 1.6987324727642217, "learning_rate": 9.801758352839658e-06, "loss": 0.5191, "step": 9540 }, { "epoch": 0.18115254751697712, "grad_norm": 1.4105413278966845, "learning_rate": 9.800834304514835e-06, "loss": 0.512, "step": 9550 }, { "epoch": 0.18134223604840852, "grad_norm": 1.163327236542609, "learning_rate": 9.799908151358981e-06, "loss": 0.4755, "step": 9560 }, { "epoch": 0.1815319245798399, "grad_norm": 1.457543794350864, "learning_rate": 9.798979893778153e-06, "loss": 0.5259, "step": 9570 }, { "epoch": 0.1817216131112713, "grad_norm": 1.4190416840781732, "learning_rate": 9.798049532179324e-06, "loss": 0.4991, "step": 9580 }, { "epoch": 0.1819113016427027, "grad_norm": 1.995009360458491, "learning_rate": 9.797117066970394e-06, "loss": 0.5253, "step": 9590 }, { "epoch": 0.18210099017413406, "grad_norm": 1.5672362294759479, "learning_rate": 9.796182498560185e-06, "loss": 0.5361, "step": 9600 }, { "epoch": 0.18229067870556545, "grad_norm": 1.1548319926151187, "learning_rate": 9.795245827358437e-06, "loss": 0.5103, "step": 9610 }, { "epoch": 0.18248036723699684, "grad_norm": 1.2584070230627555, "learning_rate": 9.794307053775817e-06, "loss": 0.4785, "step": 9620 }, { "epoch": 0.18267005576842824, "grad_norm": 1.3936180299194634, "learning_rate": 9.793366178223913e-06, "loss": 0.5122, "step": 9630 }, { "epoch": 0.18285974429985963, "grad_norm": 1.248341282071258, "learning_rate": 9.792423201115231e-06, "loss": 0.458, "step": 9640 }, { "epoch": 0.18304943283129102, "grad_norm": 1.5529442420268824, "learning_rate": 9.791478122863202e-06, "loss": 0.4924, "step": 9650 }, { "epoch": 0.1832391213627224, "grad_norm": 1.5533847624600114, "learning_rate": 9.790530943882177e-06, "loss": 0.5027, "step": 9660 }, { "epoch": 0.1834288098941538, "grad_norm": 1.4664359882526263, "learning_rate": 9.789581664587426e-06, "loss": 0.5151, "step": 9670 }, { "epoch": 0.1836184984255852, "grad_norm": 1.6296841228445162, "learning_rate": 9.788630285395146e-06, "loss": 0.5131, "step": 9680 }, { "epoch": 0.1838081869570166, "grad_norm": 1.4330308006735821, "learning_rate": 9.787676806722447e-06, "loss": 0.5066, "step": 9690 }, { "epoch": 0.18399787548844798, "grad_norm": 1.573940232621826, "learning_rate": 9.786721228987361e-06, "loss": 0.5061, "step": 9700 }, { "epoch": 0.18418756401987935, "grad_norm": 1.4395591969715993, "learning_rate": 9.785763552608848e-06, "loss": 0.4831, "step": 9710 }, { "epoch": 0.18437725255131074, "grad_norm": 1.508467333754892, "learning_rate": 9.784803778006777e-06, "loss": 0.5384, "step": 9720 }, { "epoch": 0.18456694108274213, "grad_norm": 1.5332695583014113, "learning_rate": 9.783841905601943e-06, "loss": 0.5235, "step": 9730 }, { "epoch": 0.18475662961417352, "grad_norm": 1.0275591524012238, "learning_rate": 9.782877935816064e-06, "loss": 0.4722, "step": 9740 }, { "epoch": 0.18494631814560492, "grad_norm": 1.640710818190215, "learning_rate": 9.78191186907177e-06, "loss": 0.4998, "step": 9750 }, { "epoch": 0.1851360066770363, "grad_norm": 1.4578341921615134, "learning_rate": 9.780943705792615e-06, "loss": 0.4974, "step": 9760 }, { "epoch": 0.1853256952084677, "grad_norm": 1.7723353374618782, "learning_rate": 9.77997344640307e-06, "loss": 0.4994, "step": 9770 }, { "epoch": 0.1855153837398991, "grad_norm": 1.646057582093707, "learning_rate": 9.779001091328529e-06, "loss": 0.4831, "step": 9780 }, { "epoch": 0.18570507227133048, "grad_norm": 1.4097290617204756, "learning_rate": 9.778026640995298e-06, "loss": 0.4936, "step": 9790 }, { "epoch": 0.18589476080276188, "grad_norm": 1.6989470257068786, "learning_rate": 9.777050095830607e-06, "loss": 0.5264, "step": 9800 }, { "epoch": 0.18608444933419324, "grad_norm": 1.3553585975710076, "learning_rate": 9.776071456262603e-06, "loss": 0.4954, "step": 9810 }, { "epoch": 0.18627413786562463, "grad_norm": 1.2585508511343098, "learning_rate": 9.775090722720352e-06, "loss": 0.537, "step": 9820 }, { "epoch": 0.18646382639705603, "grad_norm": 1.4431294044686254, "learning_rate": 9.774107895633837e-06, "loss": 0.4808, "step": 9830 }, { "epoch": 0.18665351492848742, "grad_norm": 1.6046977696571898, "learning_rate": 9.77312297543396e-06, "loss": 0.5211, "step": 9840 }, { "epoch": 0.1868432034599188, "grad_norm": 1.4624404522212093, "learning_rate": 9.772135962552535e-06, "loss": 0.4981, "step": 9850 }, { "epoch": 0.1870328919913502, "grad_norm": 1.9028053422582583, "learning_rate": 9.771146857422304e-06, "loss": 0.5104, "step": 9860 }, { "epoch": 0.1872225805227816, "grad_norm": 1.6219019278654467, "learning_rate": 9.770155660476919e-06, "loss": 0.4998, "step": 9870 }, { "epoch": 0.187412269054213, "grad_norm": 1.4371790394914978, "learning_rate": 9.769162372150951e-06, "loss": 0.523, "step": 9880 }, { "epoch": 0.18760195758564438, "grad_norm": 1.306232319069455, "learning_rate": 9.768166992879886e-06, "loss": 0.4988, "step": 9890 }, { "epoch": 0.18779164611707577, "grad_norm": 1.7176041005008333, "learning_rate": 9.76716952310013e-06, "loss": 0.4708, "step": 9900 }, { "epoch": 0.18798133464850716, "grad_norm": 1.4275231369555914, "learning_rate": 9.766169963249002e-06, "loss": 0.5036, "step": 9910 }, { "epoch": 0.18817102317993853, "grad_norm": 1.5931165707950568, "learning_rate": 9.765168313764741e-06, "loss": 0.5165, "step": 9920 }, { "epoch": 0.18836071171136992, "grad_norm": 2.461686970913694, "learning_rate": 9.764164575086501e-06, "loss": 0.5003, "step": 9930 }, { "epoch": 0.1885504002428013, "grad_norm": 1.483747467899293, "learning_rate": 9.76315874765435e-06, "loss": 0.494, "step": 9940 }, { "epoch": 0.1887400887742327, "grad_norm": 1.450325407581652, "learning_rate": 9.762150831909272e-06, "loss": 0.4981, "step": 9950 }, { "epoch": 0.1889297773056641, "grad_norm": 1.3375714951252062, "learning_rate": 9.761140828293172e-06, "loss": 0.5065, "step": 9960 }, { "epoch": 0.1891194658370955, "grad_norm": 1.4434472409485537, "learning_rate": 9.760128737248861e-06, "loss": 0.5193, "step": 9970 }, { "epoch": 0.18930915436852688, "grad_norm": 1.4561010906705698, "learning_rate": 9.759114559220074e-06, "loss": 0.5296, "step": 9980 }, { "epoch": 0.18949884289995828, "grad_norm": 1.3821594556789951, "learning_rate": 9.758098294651456e-06, "loss": 0.4752, "step": 9990 }, { "epoch": 0.18968853143138967, "grad_norm": 1.578190960635779, "learning_rate": 9.75707994398857e-06, "loss": 0.5081, "step": 10000 }, { "epoch": 0.18987821996282106, "grad_norm": 1.5279453959577487, "learning_rate": 9.756059507677887e-06, "loss": 0.5141, "step": 10010 }, { "epoch": 0.19006790849425242, "grad_norm": 1.5647737466247051, "learning_rate": 9.755036986166802e-06, "loss": 0.5209, "step": 10020 }, { "epoch": 0.19025759702568382, "grad_norm": 1.7308492915954792, "learning_rate": 9.754012379903618e-06, "loss": 0.4906, "step": 10030 }, { "epoch": 0.1904472855571152, "grad_norm": 1.4190266866719257, "learning_rate": 9.752985689337552e-06, "loss": 0.5109, "step": 10040 }, { "epoch": 0.1906369740885466, "grad_norm": 1.5074243353860455, "learning_rate": 9.751956914918737e-06, "loss": 0.5185, "step": 10050 }, { "epoch": 0.190826662619978, "grad_norm": 1.5065132375617816, "learning_rate": 9.75092605709822e-06, "loss": 0.5026, "step": 10060 }, { "epoch": 0.19101635115140939, "grad_norm": 1.3750322223391656, "learning_rate": 9.74989311632796e-06, "loss": 0.4885, "step": 10070 }, { "epoch": 0.19120603968284078, "grad_norm": 1.3749751919723643, "learning_rate": 9.748858093060827e-06, "loss": 0.517, "step": 10080 }, { "epoch": 0.19139572821427217, "grad_norm": 1.594784490430636, "learning_rate": 9.747820987750608e-06, "loss": 0.4591, "step": 10090 }, { "epoch": 0.19158541674570356, "grad_norm": 1.478181766393693, "learning_rate": 9.746781800852004e-06, "loss": 0.5331, "step": 10100 }, { "epoch": 0.19177510527713496, "grad_norm": 1.571597971665521, "learning_rate": 9.745740532820622e-06, "loss": 0.5154, "step": 10110 }, { "epoch": 0.19196479380856635, "grad_norm": 1.4156926840229709, "learning_rate": 9.744697184112987e-06, "loss": 0.5176, "step": 10120 }, { "epoch": 0.1921544823399977, "grad_norm": 1.4404414013871458, "learning_rate": 9.743651755186536e-06, "loss": 0.4921, "step": 10130 }, { "epoch": 0.1923441708714291, "grad_norm": 1.5129343247184313, "learning_rate": 9.742604246499615e-06, "loss": 0.5213, "step": 10140 }, { "epoch": 0.1925338594028605, "grad_norm": 1.5717455433265086, "learning_rate": 9.741554658511483e-06, "loss": 0.5215, "step": 10150 }, { "epoch": 0.1927235479342919, "grad_norm": 1.3293297702855933, "learning_rate": 9.740502991682312e-06, "loss": 0.5213, "step": 10160 }, { "epoch": 0.19291323646572328, "grad_norm": 1.6152034903476062, "learning_rate": 9.739449246473185e-06, "loss": 0.503, "step": 10170 }, { "epoch": 0.19310292499715467, "grad_norm": 1.4367819748257546, "learning_rate": 9.738393423346095e-06, "loss": 0.5019, "step": 10180 }, { "epoch": 0.19329261352858607, "grad_norm": 1.39217369423044, "learning_rate": 9.737335522763947e-06, "loss": 0.4845, "step": 10190 }, { "epoch": 0.19348230206001746, "grad_norm": 1.482534623578316, "learning_rate": 9.736275545190558e-06, "loss": 0.5178, "step": 10200 }, { "epoch": 0.19367199059144885, "grad_norm": 1.366279046323342, "learning_rate": 9.735213491090651e-06, "loss": 0.5108, "step": 10210 }, { "epoch": 0.19386167912288024, "grad_norm": 1.6030630302412374, "learning_rate": 9.734149360929866e-06, "loss": 0.5217, "step": 10220 }, { "epoch": 0.1940513676543116, "grad_norm": 1.2964443540079997, "learning_rate": 9.733083155174749e-06, "loss": 0.4985, "step": 10230 }, { "epoch": 0.194241056185743, "grad_norm": 1.5593744136910688, "learning_rate": 9.732014874292757e-06, "loss": 0.4758, "step": 10240 }, { "epoch": 0.1944307447171744, "grad_norm": 1.4495203177682434, "learning_rate": 9.730944518752254e-06, "loss": 0.5057, "step": 10250 }, { "epoch": 0.19462043324860578, "grad_norm": 1.3979531292462881, "learning_rate": 9.729872089022519e-06, "loss": 0.4785, "step": 10260 }, { "epoch": 0.19481012178003718, "grad_norm": 1.345316885361852, "learning_rate": 9.728797585573737e-06, "loss": 0.4943, "step": 10270 }, { "epoch": 0.19499981031146857, "grad_norm": 1.539777351564505, "learning_rate": 9.727721008877004e-06, "loss": 0.512, "step": 10280 }, { "epoch": 0.19518949884289996, "grad_norm": 1.5048120753039977, "learning_rate": 9.726642359404322e-06, "loss": 0.5057, "step": 10290 }, { "epoch": 0.19537918737433135, "grad_norm": 1.5273289475245786, "learning_rate": 9.725561637628604e-06, "loss": 0.4732, "step": 10300 }, { "epoch": 0.19556887590576275, "grad_norm": 1.239865674220142, "learning_rate": 9.724478844023671e-06, "loss": 0.4915, "step": 10310 }, { "epoch": 0.19575856443719414, "grad_norm": 1.3572816141163109, "learning_rate": 9.723393979064254e-06, "loss": 0.5245, "step": 10320 }, { "epoch": 0.19594825296862553, "grad_norm": 1.457982235193582, "learning_rate": 9.722307043225986e-06, "loss": 0.501, "step": 10330 }, { "epoch": 0.1961379415000569, "grad_norm": 1.5528967841070176, "learning_rate": 9.721218036985421e-06, "loss": 0.4738, "step": 10340 }, { "epoch": 0.1963276300314883, "grad_norm": 1.6293160616036606, "learning_rate": 9.720126960820004e-06, "loss": 0.4893, "step": 10350 }, { "epoch": 0.19651731856291968, "grad_norm": 1.5518765871601106, "learning_rate": 9.719033815208098e-06, "loss": 0.4639, "step": 10360 }, { "epoch": 0.19670700709435107, "grad_norm": 1.5458900208486226, "learning_rate": 9.717938600628975e-06, "loss": 0.5144, "step": 10370 }, { "epoch": 0.19689669562578246, "grad_norm": 1.5223002362450606, "learning_rate": 9.716841317562807e-06, "loss": 0.4917, "step": 10380 }, { "epoch": 0.19708638415721386, "grad_norm": 1.2340053969895466, "learning_rate": 9.715741966490674e-06, "loss": 0.4982, "step": 10390 }, { "epoch": 0.19727607268864525, "grad_norm": 1.4684645383488026, "learning_rate": 9.714640547894571e-06, "loss": 0.4964, "step": 10400 }, { "epoch": 0.19746576122007664, "grad_norm": 1.4427888372726259, "learning_rate": 9.713537062257388e-06, "loss": 0.5004, "step": 10410 }, { "epoch": 0.19765544975150803, "grad_norm": 1.0946181030999391, "learning_rate": 9.712431510062926e-06, "loss": 0.4791, "step": 10420 }, { "epoch": 0.19784513828293943, "grad_norm": 1.5075161074119348, "learning_rate": 9.711323891795897e-06, "loss": 0.5058, "step": 10430 }, { "epoch": 0.1980348268143708, "grad_norm": 1.0870522303552888, "learning_rate": 9.71021420794191e-06, "loss": 0.4799, "step": 10440 }, { "epoch": 0.19822451534580218, "grad_norm": 1.578432197196379, "learning_rate": 9.709102458987486e-06, "loss": 0.4975, "step": 10450 }, { "epoch": 0.19841420387723357, "grad_norm": 1.5179473092208369, "learning_rate": 9.707988645420052e-06, "loss": 0.494, "step": 10460 }, { "epoch": 0.19860389240866497, "grad_norm": 1.6875110851048836, "learning_rate": 9.706872767727933e-06, "loss": 0.4769, "step": 10470 }, { "epoch": 0.19879358094009636, "grad_norm": 2.060296073193915, "learning_rate": 9.705754826400365e-06, "loss": 0.4979, "step": 10480 }, { "epoch": 0.19898326947152775, "grad_norm": 1.4216134271278518, "learning_rate": 9.704634821927487e-06, "loss": 0.5024, "step": 10490 }, { "epoch": 0.19917295800295914, "grad_norm": 1.5238994831984534, "learning_rate": 9.703512754800347e-06, "loss": 0.5168, "step": 10500 }, { "epoch": 0.19936264653439054, "grad_norm": 1.5766255414448995, "learning_rate": 9.702388625510886e-06, "loss": 0.507, "step": 10510 }, { "epoch": 0.19955233506582193, "grad_norm": 1.5610162137450054, "learning_rate": 9.701262434551962e-06, "loss": 0.4959, "step": 10520 }, { "epoch": 0.19974202359725332, "grad_norm": 1.7359778865142264, "learning_rate": 9.700134182417329e-06, "loss": 0.5277, "step": 10530 }, { "epoch": 0.1999317121286847, "grad_norm": 1.6937710637714107, "learning_rate": 9.699003869601647e-06, "loss": 0.5129, "step": 10540 }, { "epoch": 0.20012140066011608, "grad_norm": 1.5499135549883338, "learning_rate": 9.697871496600478e-06, "loss": 0.485, "step": 10550 }, { "epoch": 0.20031108919154747, "grad_norm": 1.2869493727488452, "learning_rate": 9.69673706391029e-06, "loss": 0.5056, "step": 10560 }, { "epoch": 0.20050077772297886, "grad_norm": 1.5217155943656808, "learning_rate": 9.695600572028457e-06, "loss": 0.4949, "step": 10570 }, { "epoch": 0.20069046625441025, "grad_norm": 1.6277927917613708, "learning_rate": 9.694462021453243e-06, "loss": 0.5041, "step": 10580 }, { "epoch": 0.20088015478584165, "grad_norm": 1.456867173734366, "learning_rate": 9.693321412683827e-06, "loss": 0.4867, "step": 10590 }, { "epoch": 0.20106984331727304, "grad_norm": 1.654467736809909, "learning_rate": 9.692178746220287e-06, "loss": 0.4812, "step": 10600 }, { "epoch": 0.20125953184870443, "grad_norm": 1.3791541849983686, "learning_rate": 9.691034022563603e-06, "loss": 0.4901, "step": 10610 }, { "epoch": 0.20144922038013582, "grad_norm": 1.5468140682502218, "learning_rate": 9.689887242215656e-06, "loss": 0.5247, "step": 10620 }, { "epoch": 0.20163890891156722, "grad_norm": 1.4460254513051933, "learning_rate": 9.688738405679228e-06, "loss": 0.5118, "step": 10630 }, { "epoch": 0.2018285974429986, "grad_norm": 1.2317762347119412, "learning_rate": 9.687587513458003e-06, "loss": 0.4615, "step": 10640 }, { "epoch": 0.20201828597442997, "grad_norm": 1.4424756264522238, "learning_rate": 9.686434566056571e-06, "loss": 0.4732, "step": 10650 }, { "epoch": 0.20220797450586137, "grad_norm": 1.657907926376119, "learning_rate": 9.685279563980414e-06, "loss": 0.5114, "step": 10660 }, { "epoch": 0.20239766303729276, "grad_norm": 1.5428167573675966, "learning_rate": 9.684122507735924e-06, "loss": 0.4912, "step": 10670 }, { "epoch": 0.20258735156872415, "grad_norm": 1.6792754332172048, "learning_rate": 9.682963397830387e-06, "loss": 0.5203, "step": 10680 }, { "epoch": 0.20277704010015554, "grad_norm": 1.6096363919743213, "learning_rate": 9.681802234771993e-06, "loss": 0.511, "step": 10690 }, { "epoch": 0.20296672863158693, "grad_norm": 1.5867805003877065, "learning_rate": 9.68063901906983e-06, "loss": 0.522, "step": 10700 }, { "epoch": 0.20315641716301833, "grad_norm": 1.3082517958889455, "learning_rate": 9.679473751233887e-06, "loss": 0.488, "step": 10710 }, { "epoch": 0.20334610569444972, "grad_norm": 1.420259347961252, "learning_rate": 9.678306431775055e-06, "loss": 0.5024, "step": 10720 }, { "epoch": 0.2035357942258811, "grad_norm": 1.465355271314803, "learning_rate": 9.677137061205119e-06, "loss": 0.5076, "step": 10730 }, { "epoch": 0.2037254827573125, "grad_norm": 1.45207182641384, "learning_rate": 9.675965640036771e-06, "loss": 0.4768, "step": 10740 }, { "epoch": 0.2039151712887439, "grad_norm": 1.7488306846386057, "learning_rate": 9.67479216878359e-06, "loss": 0.5019, "step": 10750 }, { "epoch": 0.20410485982017526, "grad_norm": 1.330734464108164, "learning_rate": 9.67361664796007e-06, "loss": 0.4753, "step": 10760 }, { "epoch": 0.20429454835160665, "grad_norm": 1.4337075186682933, "learning_rate": 9.672439078081588e-06, "loss": 0.5167, "step": 10770 }, { "epoch": 0.20448423688303805, "grad_norm": 2.685942898212742, "learning_rate": 9.671259459664428e-06, "loss": 0.5058, "step": 10780 }, { "epoch": 0.20467392541446944, "grad_norm": 1.4304373031088697, "learning_rate": 9.670077793225773e-06, "loss": 0.5116, "step": 10790 }, { "epoch": 0.20486361394590083, "grad_norm": 1.6609163983448754, "learning_rate": 9.6688940792837e-06, "loss": 0.4947, "step": 10800 }, { "epoch": 0.20505330247733222, "grad_norm": 1.2937139510079871, "learning_rate": 9.667708318357183e-06, "loss": 0.4902, "step": 10810 }, { "epoch": 0.20524299100876361, "grad_norm": 1.4488767748917408, "learning_rate": 9.6665205109661e-06, "loss": 0.503, "step": 10820 }, { "epoch": 0.205432679540195, "grad_norm": 1.3019516477559443, "learning_rate": 9.665330657631216e-06, "loss": 0.5029, "step": 10830 }, { "epoch": 0.2056223680716264, "grad_norm": 1.5566632608751756, "learning_rate": 9.664138758874202e-06, "loss": 0.5176, "step": 10840 }, { "epoch": 0.2058120566030578, "grad_norm": 1.5037690791296607, "learning_rate": 9.662944815217621e-06, "loss": 0.5123, "step": 10850 }, { "epoch": 0.20600174513448916, "grad_norm": 1.4705018398062648, "learning_rate": 9.661748827184936e-06, "loss": 0.4946, "step": 10860 }, { "epoch": 0.20619143366592055, "grad_norm": 1.4716062131247722, "learning_rate": 9.660550795300503e-06, "loss": 0.4879, "step": 10870 }, { "epoch": 0.20638112219735194, "grad_norm": 1.5645476105847487, "learning_rate": 9.659350720089577e-06, "loss": 0.4912, "step": 10880 }, { "epoch": 0.20657081072878333, "grad_norm": 1.4459980494662092, "learning_rate": 9.658148602078306e-06, "loss": 0.481, "step": 10890 }, { "epoch": 0.20676049926021473, "grad_norm": 1.7381096353038648, "learning_rate": 9.656944441793735e-06, "loss": 0.5098, "step": 10900 }, { "epoch": 0.20695018779164612, "grad_norm": 1.3607155828473823, "learning_rate": 9.655738239763803e-06, "loss": 0.5045, "step": 10910 }, { "epoch": 0.2071398763230775, "grad_norm": 1.3925362036639688, "learning_rate": 9.654529996517347e-06, "loss": 0.5021, "step": 10920 }, { "epoch": 0.2073295648545089, "grad_norm": 1.4834182421944557, "learning_rate": 9.653319712584097e-06, "loss": 0.5196, "step": 10930 }, { "epoch": 0.2075192533859403, "grad_norm": 1.6982378651054821, "learning_rate": 9.65210738849468e-06, "loss": 0.5092, "step": 10940 }, { "epoch": 0.2077089419173717, "grad_norm": 1.4462202189458566, "learning_rate": 9.650893024780611e-06, "loss": 0.5016, "step": 10950 }, { "epoch": 0.20789863044880308, "grad_norm": 1.5417299642225353, "learning_rate": 9.649676621974309e-06, "loss": 0.4741, "step": 10960 }, { "epoch": 0.20808831898023444, "grad_norm": 1.4124396571611013, "learning_rate": 9.648458180609079e-06, "loss": 0.4768, "step": 10970 }, { "epoch": 0.20827800751166584, "grad_norm": 1.2813113691308764, "learning_rate": 9.647237701219121e-06, "loss": 0.5141, "step": 10980 }, { "epoch": 0.20846769604309723, "grad_norm": 1.2254235934255573, "learning_rate": 9.646015184339535e-06, "loss": 0.5112, "step": 10990 }, { "epoch": 0.20865738457452862, "grad_norm": 1.5466453546139067, "learning_rate": 9.644790630506305e-06, "loss": 0.5079, "step": 11000 }, { "epoch": 0.20884707310596, "grad_norm": 1.2847983712516464, "learning_rate": 9.643564040256313e-06, "loss": 0.4876, "step": 11010 }, { "epoch": 0.2090367616373914, "grad_norm": 1.3654926360448452, "learning_rate": 9.642335414127335e-06, "loss": 0.5159, "step": 11020 }, { "epoch": 0.2092264501688228, "grad_norm": 1.5048430375835802, "learning_rate": 9.641104752658036e-06, "loss": 0.5029, "step": 11030 }, { "epoch": 0.2094161387002542, "grad_norm": 1.458526470700299, "learning_rate": 9.639872056387978e-06, "loss": 0.5043, "step": 11040 }, { "epoch": 0.20960582723168558, "grad_norm": 1.2684796313858198, "learning_rate": 9.63863732585761e-06, "loss": 0.4964, "step": 11050 }, { "epoch": 0.20979551576311697, "grad_norm": 1.753521321033982, "learning_rate": 9.637400561608277e-06, "loss": 0.5029, "step": 11060 }, { "epoch": 0.20998520429454834, "grad_norm": 1.3156603191375325, "learning_rate": 9.636161764182213e-06, "loss": 0.494, "step": 11070 }, { "epoch": 0.21017489282597973, "grad_norm": 1.3880992664104634, "learning_rate": 9.634920934122542e-06, "loss": 0.4762, "step": 11080 }, { "epoch": 0.21036458135741112, "grad_norm": 1.235626995217612, "learning_rate": 9.633678071973284e-06, "loss": 0.5072, "step": 11090 }, { "epoch": 0.21055426988884252, "grad_norm": 1.6630321600254054, "learning_rate": 9.632433178279347e-06, "loss": 0.53, "step": 11100 }, { "epoch": 0.2107439584202739, "grad_norm": 1.6729372199526917, "learning_rate": 9.631186253586532e-06, "loss": 0.4981, "step": 11110 }, { "epoch": 0.2109336469517053, "grad_norm": 1.5252380844613656, "learning_rate": 9.629937298441522e-06, "loss": 0.4985, "step": 11120 }, { "epoch": 0.2111233354831367, "grad_norm": 1.3741701078123525, "learning_rate": 9.628686313391904e-06, "loss": 0.4801, "step": 11130 }, { "epoch": 0.21131302401456808, "grad_norm": 1.2376084693970109, "learning_rate": 9.627433298986144e-06, "loss": 0.4808, "step": 11140 }, { "epoch": 0.21150271254599948, "grad_norm": 1.5894270047023764, "learning_rate": 9.626178255773604e-06, "loss": 0.4906, "step": 11150 }, { "epoch": 0.21169240107743087, "grad_norm": 1.400267288071837, "learning_rate": 9.62492118430453e-06, "loss": 0.4955, "step": 11160 }, { "epoch": 0.21188208960886226, "grad_norm": 1.851748176034251, "learning_rate": 9.62366208513006e-06, "loss": 0.5318, "step": 11170 }, { "epoch": 0.21207177814029363, "grad_norm": 1.4913815441404743, "learning_rate": 9.622400958802224e-06, "loss": 0.5204, "step": 11180 }, { "epoch": 0.21226146667172502, "grad_norm": 1.687354075089361, "learning_rate": 9.621137805873935e-06, "loss": 0.5001, "step": 11190 }, { "epoch": 0.2124511552031564, "grad_norm": 1.5473378765266455, "learning_rate": 9.619872626899e-06, "loss": 0.4738, "step": 11200 }, { "epoch": 0.2126408437345878, "grad_norm": 1.558333668920554, "learning_rate": 9.618605422432112e-06, "loss": 0.4777, "step": 11210 }, { "epoch": 0.2128305322660192, "grad_norm": 1.3588867652464611, "learning_rate": 9.61733619302885e-06, "loss": 0.5172, "step": 11220 }, { "epoch": 0.2130202207974506, "grad_norm": 1.6041936522569173, "learning_rate": 9.616064939245681e-06, "loss": 0.5039, "step": 11230 }, { "epoch": 0.21320990932888198, "grad_norm": 1.7429905260221987, "learning_rate": 9.614791661639965e-06, "loss": 0.497, "step": 11240 }, { "epoch": 0.21339959786031337, "grad_norm": 1.5957555293162056, "learning_rate": 9.613516360769944e-06, "loss": 0.4992, "step": 11250 }, { "epoch": 0.21358928639174476, "grad_norm": 1.3000263248544706, "learning_rate": 9.612239037194746e-06, "loss": 0.4823, "step": 11260 }, { "epoch": 0.21377897492317616, "grad_norm": 1.308230921079254, "learning_rate": 9.610959691474392e-06, "loss": 0.4982, "step": 11270 }, { "epoch": 0.21396866345460752, "grad_norm": 1.5816424283494224, "learning_rate": 9.609678324169784e-06, "loss": 0.5197, "step": 11280 }, { "epoch": 0.21415835198603891, "grad_norm": 1.5538478342259523, "learning_rate": 9.608394935842713e-06, "loss": 0.5003, "step": 11290 }, { "epoch": 0.2143480405174703, "grad_norm": 1.2961696733272863, "learning_rate": 9.607109527055853e-06, "loss": 0.5062, "step": 11300 }, { "epoch": 0.2145377290489017, "grad_norm": 1.4644980011643105, "learning_rate": 9.60582209837277e-06, "loss": 0.4797, "step": 11310 }, { "epoch": 0.2147274175803331, "grad_norm": 1.2939398725468902, "learning_rate": 9.60453265035791e-06, "loss": 0.5203, "step": 11320 }, { "epoch": 0.21491710611176448, "grad_norm": 1.5772917351951512, "learning_rate": 9.603241183576604e-06, "loss": 0.543, "step": 11330 }, { "epoch": 0.21510679464319588, "grad_norm": 1.4011455027289494, "learning_rate": 9.60194769859507e-06, "loss": 0.5125, "step": 11340 }, { "epoch": 0.21529648317462727, "grad_norm": 1.504100315443313, "learning_rate": 9.600652195980415e-06, "loss": 0.5159, "step": 11350 }, { "epoch": 0.21548617170605866, "grad_norm": 1.7024456153653482, "learning_rate": 9.599354676300625e-06, "loss": 0.4552, "step": 11360 }, { "epoch": 0.21567586023749005, "grad_norm": 1.3111267164809939, "learning_rate": 9.59805514012457e-06, "loss": 0.5086, "step": 11370 }, { "epoch": 0.21586554876892144, "grad_norm": 1.638462981070576, "learning_rate": 9.596753588022008e-06, "loss": 0.4965, "step": 11380 }, { "epoch": 0.2160552373003528, "grad_norm": 1.550606476649364, "learning_rate": 9.595450020563576e-06, "loss": 0.4947, "step": 11390 }, { "epoch": 0.2162449258317842, "grad_norm": 1.4803506028013034, "learning_rate": 9.5941444383208e-06, "loss": 0.5206, "step": 11400 }, { "epoch": 0.2164346143632156, "grad_norm": 1.6367781136311774, "learning_rate": 9.592836841866086e-06, "loss": 0.4851, "step": 11410 }, { "epoch": 0.216624302894647, "grad_norm": 1.3465687648602112, "learning_rate": 9.591527231772724e-06, "loss": 0.5155, "step": 11420 }, { "epoch": 0.21681399142607838, "grad_norm": 1.9129108461976345, "learning_rate": 9.590215608614888e-06, "loss": 0.4904, "step": 11430 }, { "epoch": 0.21700367995750977, "grad_norm": 1.7213589015673505, "learning_rate": 9.58890197296763e-06, "loss": 0.4868, "step": 11440 }, { "epoch": 0.21719336848894116, "grad_norm": 3.292001912212146, "learning_rate": 9.587586325406892e-06, "loss": 0.4906, "step": 11450 }, { "epoch": 0.21738305702037256, "grad_norm": 1.4991627353778962, "learning_rate": 9.586268666509488e-06, "loss": 0.481, "step": 11460 }, { "epoch": 0.21757274555180395, "grad_norm": 1.5470525385059206, "learning_rate": 9.584948996853125e-06, "loss": 0.5076, "step": 11470 }, { "epoch": 0.21776243408323534, "grad_norm": 1.694232612040648, "learning_rate": 9.583627317016383e-06, "loss": 0.4927, "step": 11480 }, { "epoch": 0.2179521226146667, "grad_norm": 1.7020256478434472, "learning_rate": 9.582303627578728e-06, "loss": 0.5138, "step": 11490 }, { "epoch": 0.2181418111460981, "grad_norm": 1.594144510034667, "learning_rate": 9.580977929120505e-06, "loss": 0.5178, "step": 11500 }, { "epoch": 0.2183314996775295, "grad_norm": 1.3650898645401044, "learning_rate": 9.57965022222294e-06, "loss": 0.5065, "step": 11510 }, { "epoch": 0.21852118820896088, "grad_norm": 1.3726575119455102, "learning_rate": 9.57832050746814e-06, "loss": 0.4619, "step": 11520 }, { "epoch": 0.21871087674039227, "grad_norm": 1.4534557881091505, "learning_rate": 9.57698878543909e-06, "loss": 0.5107, "step": 11530 }, { "epoch": 0.21890056527182367, "grad_norm": 1.5056320101159015, "learning_rate": 9.575655056719661e-06, "loss": 0.4891, "step": 11540 }, { "epoch": 0.21909025380325506, "grad_norm": 1.622998248158072, "learning_rate": 9.5743193218946e-06, "loss": 0.5291, "step": 11550 }, { "epoch": 0.21927994233468645, "grad_norm": 1.4326307707987114, "learning_rate": 9.572981581549531e-06, "loss": 0.5038, "step": 11560 }, { "epoch": 0.21946963086611784, "grad_norm": 1.7753310927224204, "learning_rate": 9.571641836270959e-06, "loss": 0.5027, "step": 11570 }, { "epoch": 0.21965931939754924, "grad_norm": 1.4701157522553947, "learning_rate": 9.570300086646274e-06, "loss": 0.4875, "step": 11580 }, { "epoch": 0.21984900792898063, "grad_norm": 1.4883096845958388, "learning_rate": 9.568956333263735e-06, "loss": 0.5149, "step": 11590 }, { "epoch": 0.220038696460412, "grad_norm": 1.6045296277923304, "learning_rate": 9.567610576712484e-06, "loss": 0.4868, "step": 11600 }, { "epoch": 0.22022838499184338, "grad_norm": 1.6413223480275874, "learning_rate": 9.566262817582546e-06, "loss": 0.5073, "step": 11610 }, { "epoch": 0.22041807352327478, "grad_norm": 1.773324176415152, "learning_rate": 9.564913056464813e-06, "loss": 0.5153, "step": 11620 }, { "epoch": 0.22060776205470617, "grad_norm": 1.3873777909421676, "learning_rate": 9.563561293951068e-06, "loss": 0.4969, "step": 11630 }, { "epoch": 0.22079745058613756, "grad_norm": 1.223442632768257, "learning_rate": 9.562207530633958e-06, "loss": 0.5108, "step": 11640 }, { "epoch": 0.22098713911756895, "grad_norm": 1.5680996969039707, "learning_rate": 9.560851767107018e-06, "loss": 0.5103, "step": 11650 }, { "epoch": 0.22117682764900035, "grad_norm": 1.4186840166456685, "learning_rate": 9.559494003964654e-06, "loss": 0.5043, "step": 11660 }, { "epoch": 0.22136651618043174, "grad_norm": 1.3982039002934679, "learning_rate": 9.55813424180215e-06, "loss": 0.5078, "step": 11670 }, { "epoch": 0.22155620471186313, "grad_norm": 1.5504413206553276, "learning_rate": 9.55677248121567e-06, "loss": 0.5043, "step": 11680 }, { "epoch": 0.22174589324329452, "grad_norm": 1.1317690107644396, "learning_rate": 9.555408722802249e-06, "loss": 0.4849, "step": 11690 }, { "epoch": 0.2219355817747259, "grad_norm": 1.4176318328425335, "learning_rate": 9.554042967159797e-06, "loss": 0.499, "step": 11700 }, { "epoch": 0.22212527030615728, "grad_norm": 1.4100275728989664, "learning_rate": 9.552675214887108e-06, "loss": 0.4684, "step": 11710 }, { "epoch": 0.22231495883758867, "grad_norm": 1.5252063867549408, "learning_rate": 9.551305466583845e-06, "loss": 0.5157, "step": 11720 }, { "epoch": 0.22250464736902006, "grad_norm": 1.3845518779751897, "learning_rate": 9.549933722850541e-06, "loss": 0.4978, "step": 11730 }, { "epoch": 0.22269433590045146, "grad_norm": 1.473971721688146, "learning_rate": 9.548559984288619e-06, "loss": 0.4981, "step": 11740 }, { "epoch": 0.22288402443188285, "grad_norm": 1.3777963744066393, "learning_rate": 9.54718425150036e-06, "loss": 0.4777, "step": 11750 }, { "epoch": 0.22307371296331424, "grad_norm": 1.2793724757264793, "learning_rate": 9.545806525088932e-06, "loss": 0.5041, "step": 11760 }, { "epoch": 0.22326340149474563, "grad_norm": 1.254035579689412, "learning_rate": 9.544426805658368e-06, "loss": 0.513, "step": 11770 }, { "epoch": 0.22345309002617703, "grad_norm": 1.2318200941729658, "learning_rate": 9.543045093813583e-06, "loss": 0.4686, "step": 11780 }, { "epoch": 0.22364277855760842, "grad_norm": 1.4174334466503724, "learning_rate": 9.541661390160357e-06, "loss": 0.5059, "step": 11790 }, { "epoch": 0.2238324670890398, "grad_norm": 1.722920308271938, "learning_rate": 9.54027569530535e-06, "loss": 0.4988, "step": 11800 }, { "epoch": 0.22402215562047118, "grad_norm": 1.3681766988588349, "learning_rate": 9.538888009856094e-06, "loss": 0.5031, "step": 11810 }, { "epoch": 0.22421184415190257, "grad_norm": 1.62457471774164, "learning_rate": 9.537498334420989e-06, "loss": 0.4824, "step": 11820 }, { "epoch": 0.22440153268333396, "grad_norm": 1.355013542077469, "learning_rate": 9.536106669609311e-06, "loss": 0.4918, "step": 11830 }, { "epoch": 0.22459122121476535, "grad_norm": 1.461864423812874, "learning_rate": 9.534713016031209e-06, "loss": 0.5228, "step": 11840 }, { "epoch": 0.22478090974619674, "grad_norm": 1.4780754686546436, "learning_rate": 9.533317374297705e-06, "loss": 0.4834, "step": 11850 }, { "epoch": 0.22497059827762814, "grad_norm": 1.424899868095105, "learning_rate": 9.531919745020688e-06, "loss": 0.5001, "step": 11860 }, { "epoch": 0.22516028680905953, "grad_norm": 1.5444107665380102, "learning_rate": 9.53052012881292e-06, "loss": 0.4986, "step": 11870 }, { "epoch": 0.22534997534049092, "grad_norm": 1.4450645757652687, "learning_rate": 9.529118526288037e-06, "loss": 0.5265, "step": 11880 }, { "epoch": 0.2255396638719223, "grad_norm": 1.29455538198575, "learning_rate": 9.527714938060544e-06, "loss": 0.511, "step": 11890 }, { "epoch": 0.2257293524033537, "grad_norm": 1.4910543716028115, "learning_rate": 9.526309364745818e-06, "loss": 0.4976, "step": 11900 }, { "epoch": 0.22591904093478507, "grad_norm": 1.3222563622040153, "learning_rate": 9.5249018069601e-06, "loss": 0.47, "step": 11910 }, { "epoch": 0.22610872946621646, "grad_norm": 1.7276231230226102, "learning_rate": 9.52349226532051e-06, "loss": 0.4979, "step": 11920 }, { "epoch": 0.22629841799764785, "grad_norm": 1.4295046753373564, "learning_rate": 9.522080740445035e-06, "loss": 0.4921, "step": 11930 }, { "epoch": 0.22648810652907925, "grad_norm": 1.3980289617175863, "learning_rate": 9.520667232952526e-06, "loss": 0.4978, "step": 11940 }, { "epoch": 0.22667779506051064, "grad_norm": 1.4905106417265297, "learning_rate": 9.519251743462707e-06, "loss": 0.4679, "step": 11950 }, { "epoch": 0.22686748359194203, "grad_norm": 1.21399259002925, "learning_rate": 9.517834272596175e-06, "loss": 0.4772, "step": 11960 }, { "epoch": 0.22705717212337342, "grad_norm": 1.4200198114528095, "learning_rate": 9.516414820974393e-06, "loss": 0.4718, "step": 11970 }, { "epoch": 0.22724686065480482, "grad_norm": 1.3857649106960515, "learning_rate": 9.514993389219688e-06, "loss": 0.5018, "step": 11980 }, { "epoch": 0.2274365491862362, "grad_norm": 2.156113450282381, "learning_rate": 9.513569977955259e-06, "loss": 0.5228, "step": 11990 }, { "epoch": 0.2276262377176676, "grad_norm": 1.2863584724743775, "learning_rate": 9.512144587805175e-06, "loss": 0.5126, "step": 12000 }, { "epoch": 0.22781592624909897, "grad_norm": 1.4609813895859527, "learning_rate": 9.51071721939437e-06, "loss": 0.4901, "step": 12010 }, { "epoch": 0.22800561478053036, "grad_norm": 1.7357984306637595, "learning_rate": 9.509287873348641e-06, "loss": 0.5267, "step": 12020 }, { "epoch": 0.22819530331196175, "grad_norm": 1.6414888146078717, "learning_rate": 9.507856550294663e-06, "loss": 0.4965, "step": 12030 }, { "epoch": 0.22838499184339314, "grad_norm": 1.0824746096303837, "learning_rate": 9.506423250859967e-06, "loss": 0.5266, "step": 12040 }, { "epoch": 0.22857468037482453, "grad_norm": 1.6435007343654056, "learning_rate": 9.504987975672956e-06, "loss": 0.5134, "step": 12050 }, { "epoch": 0.22876436890625593, "grad_norm": 1.536066080428704, "learning_rate": 9.503550725362898e-06, "loss": 0.4987, "step": 12060 }, { "epoch": 0.22895405743768732, "grad_norm": 1.254375882158513, "learning_rate": 9.502111500559928e-06, "loss": 0.4781, "step": 12070 }, { "epoch": 0.2291437459691187, "grad_norm": 1.3105386333496012, "learning_rate": 9.500670301895043e-06, "loss": 0.5085, "step": 12080 }, { "epoch": 0.2293334345005501, "grad_norm": 1.5128283846099855, "learning_rate": 9.499227130000112e-06, "loss": 0.5067, "step": 12090 }, { "epoch": 0.2295231230319815, "grad_norm": 3.2377999535890254, "learning_rate": 9.49778198550786e-06, "loss": 0.5165, "step": 12100 }, { "epoch": 0.2297128115634129, "grad_norm": 1.6212527241783532, "learning_rate": 9.49633486905189e-06, "loss": 0.4999, "step": 12110 }, { "epoch": 0.22990250009484425, "grad_norm": 1.7598544846914506, "learning_rate": 9.494885781266655e-06, "loss": 0.5202, "step": 12120 }, { "epoch": 0.23009218862627565, "grad_norm": 1.3981452328732233, "learning_rate": 9.49343472278748e-06, "loss": 0.4945, "step": 12130 }, { "epoch": 0.23028187715770704, "grad_norm": 1.399927024495097, "learning_rate": 9.491981694250553e-06, "loss": 0.448, "step": 12140 }, { "epoch": 0.23047156568913843, "grad_norm": 1.4866135924500918, "learning_rate": 9.490526696292927e-06, "loss": 0.4966, "step": 12150 }, { "epoch": 0.23066125422056982, "grad_norm": 1.4598349463010696, "learning_rate": 9.489069729552519e-06, "loss": 0.5036, "step": 12160 }, { "epoch": 0.23085094275200121, "grad_norm": 1.5965838565629749, "learning_rate": 9.487610794668103e-06, "loss": 0.5022, "step": 12170 }, { "epoch": 0.2310406312834326, "grad_norm": 1.5427982231080528, "learning_rate": 9.48614989227932e-06, "loss": 0.5121, "step": 12180 }, { "epoch": 0.231230319814864, "grad_norm": 1.5860446168708064, "learning_rate": 9.484687023026679e-06, "loss": 0.5189, "step": 12190 }, { "epoch": 0.2314200083462954, "grad_norm": 1.459867573442538, "learning_rate": 9.483222187551544e-06, "loss": 0.4976, "step": 12200 }, { "epoch": 0.23160969687772678, "grad_norm": 1.322327917734079, "learning_rate": 9.481755386496141e-06, "loss": 0.4859, "step": 12210 }, { "epoch": 0.23179938540915815, "grad_norm": 1.7516706251467438, "learning_rate": 9.48028662050356e-06, "loss": 0.4993, "step": 12220 }, { "epoch": 0.23198907394058954, "grad_norm": 1.3933666111663179, "learning_rate": 9.478815890217754e-06, "loss": 0.4774, "step": 12230 }, { "epoch": 0.23217876247202093, "grad_norm": 1.1884029043332698, "learning_rate": 9.477343196283537e-06, "loss": 0.4991, "step": 12240 }, { "epoch": 0.23236845100345233, "grad_norm": 1.5924489686707566, "learning_rate": 9.47586853934658e-06, "loss": 0.5072, "step": 12250 }, { "epoch": 0.23255813953488372, "grad_norm": 1.544939193472435, "learning_rate": 9.474391920053419e-06, "loss": 0.4781, "step": 12260 }, { "epoch": 0.2327478280663151, "grad_norm": 1.331954085161139, "learning_rate": 9.472913339051447e-06, "loss": 0.5115, "step": 12270 }, { "epoch": 0.2329375165977465, "grad_norm": 1.418790014926632, "learning_rate": 9.471432796988917e-06, "loss": 0.4868, "step": 12280 }, { "epoch": 0.2331272051291779, "grad_norm": 1.1775986011867665, "learning_rate": 9.469950294514949e-06, "loss": 0.4824, "step": 12290 }, { "epoch": 0.2333168936606093, "grad_norm": 1.91955691373141, "learning_rate": 9.468465832279512e-06, "loss": 0.4887, "step": 12300 }, { "epoch": 0.23350658219204068, "grad_norm": 1.7255148395419995, "learning_rate": 9.466979410933442e-06, "loss": 0.5192, "step": 12310 }, { "epoch": 0.23369627072347207, "grad_norm": 1.6120538745656672, "learning_rate": 9.46549103112843e-06, "loss": 0.4881, "step": 12320 }, { "epoch": 0.23388595925490344, "grad_norm": 1.2058717763430888, "learning_rate": 9.464000693517026e-06, "loss": 0.47, "step": 12330 }, { "epoch": 0.23407564778633483, "grad_norm": 1.3908619794156671, "learning_rate": 9.462508398752642e-06, "loss": 0.4978, "step": 12340 }, { "epoch": 0.23426533631776622, "grad_norm": 1.508192669614504, "learning_rate": 9.46101414748954e-06, "loss": 0.4978, "step": 12350 }, { "epoch": 0.2344550248491976, "grad_norm": 1.3349433614002526, "learning_rate": 9.459517940382849e-06, "loss": 0.5296, "step": 12360 }, { "epoch": 0.234644713380629, "grad_norm": 1.1374074096782527, "learning_rate": 9.458019778088552e-06, "loss": 0.5129, "step": 12370 }, { "epoch": 0.2348344019120604, "grad_norm": 1.5579733452862925, "learning_rate": 9.456519661263486e-06, "loss": 0.4943, "step": 12380 }, { "epoch": 0.2350240904434918, "grad_norm": 1.313321489313903, "learning_rate": 9.455017590565349e-06, "loss": 0.4845, "step": 12390 }, { "epoch": 0.23521377897492318, "grad_norm": 1.3461001978329106, "learning_rate": 9.453513566652694e-06, "loss": 0.4907, "step": 12400 }, { "epoch": 0.23540346750635457, "grad_norm": 1.368474842844559, "learning_rate": 9.45200759018493e-06, "loss": 0.4832, "step": 12410 }, { "epoch": 0.23559315603778597, "grad_norm": 1.4492307306188565, "learning_rate": 9.450499661822325e-06, "loss": 0.4918, "step": 12420 }, { "epoch": 0.23578284456921733, "grad_norm": 1.5333958503089375, "learning_rate": 9.448989782225996e-06, "loss": 0.4877, "step": 12430 }, { "epoch": 0.23597253310064872, "grad_norm": 1.2556804945348186, "learning_rate": 9.447477952057925e-06, "loss": 0.5026, "step": 12440 }, { "epoch": 0.23616222163208012, "grad_norm": 1.3039486904762405, "learning_rate": 9.445964171980942e-06, "loss": 0.4923, "step": 12450 }, { "epoch": 0.2363519101635115, "grad_norm": 1.1866104852630857, "learning_rate": 9.444448442658732e-06, "loss": 0.5099, "step": 12460 }, { "epoch": 0.2365415986949429, "grad_norm": 1.7140903497071585, "learning_rate": 9.44293076475584e-06, "loss": 0.5074, "step": 12470 }, { "epoch": 0.2367312872263743, "grad_norm": 1.4334379934222725, "learning_rate": 9.441411138937658e-06, "loss": 0.5019, "step": 12480 }, { "epoch": 0.23692097575780569, "grad_norm": 1.5427979381646004, "learning_rate": 9.43988956587044e-06, "loss": 0.4955, "step": 12490 }, { "epoch": 0.23711066428923708, "grad_norm": 1.4119926659001019, "learning_rate": 9.438366046221288e-06, "loss": 0.5052, "step": 12500 }, { "epoch": 0.23730035282066847, "grad_norm": 1.3886292940824734, "learning_rate": 9.436840580658157e-06, "loss": 0.4742, "step": 12510 }, { "epoch": 0.23749004135209986, "grad_norm": 1.2754173885027658, "learning_rate": 9.43531316984986e-06, "loss": 0.5212, "step": 12520 }, { "epoch": 0.23767972988353125, "grad_norm": 1.366885985881385, "learning_rate": 9.43378381446606e-06, "loss": 0.5072, "step": 12530 }, { "epoch": 0.23786941841496262, "grad_norm": 1.3823694802924686, "learning_rate": 9.432252515177271e-06, "loss": 0.4797, "step": 12540 }, { "epoch": 0.238059106946394, "grad_norm": 1.6103080997840693, "learning_rate": 9.430719272654864e-06, "loss": 0.4636, "step": 12550 }, { "epoch": 0.2382487954778254, "grad_norm": 1.5267930106556162, "learning_rate": 9.429184087571054e-06, "loss": 0.4781, "step": 12560 }, { "epoch": 0.2384384840092568, "grad_norm": 1.2704356868902045, "learning_rate": 9.427646960598919e-06, "loss": 0.4825, "step": 12570 }, { "epoch": 0.2386281725406882, "grad_norm": 1.3353516419733207, "learning_rate": 9.426107892412377e-06, "loss": 0.4468, "step": 12580 }, { "epoch": 0.23881786107211958, "grad_norm": 1.5538602425036223, "learning_rate": 9.4245668836862e-06, "loss": 0.5084, "step": 12590 }, { "epoch": 0.23900754960355097, "grad_norm": 1.339617414870888, "learning_rate": 9.423023935096021e-06, "loss": 0.481, "step": 12600 }, { "epoch": 0.23919723813498237, "grad_norm": 1.515576168730627, "learning_rate": 9.42147904731831e-06, "loss": 0.4824, "step": 12610 }, { "epoch": 0.23938692666641376, "grad_norm": 1.3918710935689373, "learning_rate": 9.419932221030392e-06, "loss": 0.491, "step": 12620 }, { "epoch": 0.23957661519784515, "grad_norm": 1.3664800503759968, "learning_rate": 9.418383456910444e-06, "loss": 0.4814, "step": 12630 }, { "epoch": 0.23976630372927651, "grad_norm": 1.3203012097914244, "learning_rate": 9.416832755637491e-06, "loss": 0.4996, "step": 12640 }, { "epoch": 0.2399559922607079, "grad_norm": 1.4527569389749018, "learning_rate": 9.415280117891407e-06, "loss": 0.488, "step": 12650 }, { "epoch": 0.2401456807921393, "grad_norm": 1.3603111002086052, "learning_rate": 9.413725544352916e-06, "loss": 0.5227, "step": 12660 }, { "epoch": 0.2403353693235707, "grad_norm": 1.5558020876330456, "learning_rate": 9.412169035703589e-06, "loss": 0.5077, "step": 12670 }, { "epoch": 0.24052505785500208, "grad_norm": 1.399267123792341, "learning_rate": 9.410610592625846e-06, "loss": 0.4926, "step": 12680 }, { "epoch": 0.24071474638643348, "grad_norm": 1.5840173450850574, "learning_rate": 9.409050215802957e-06, "loss": 0.4713, "step": 12690 }, { "epoch": 0.24090443491786487, "grad_norm": 1.5520168878009393, "learning_rate": 9.407487905919039e-06, "loss": 0.481, "step": 12700 }, { "epoch": 0.24109412344929626, "grad_norm": 1.2743428245315338, "learning_rate": 9.405923663659056e-06, "loss": 0.4824, "step": 12710 }, { "epoch": 0.24128381198072765, "grad_norm": 1.3867409174774479, "learning_rate": 9.404357489708817e-06, "loss": 0.4628, "step": 12720 }, { "epoch": 0.24147350051215904, "grad_norm": 1.6733818463254486, "learning_rate": 9.402789384754981e-06, "loss": 0.5024, "step": 12730 }, { "epoch": 0.24166318904359044, "grad_norm": 1.6504649635941784, "learning_rate": 9.401219349485053e-06, "loss": 0.4845, "step": 12740 }, { "epoch": 0.2418528775750218, "grad_norm": 1.5128015363844916, "learning_rate": 9.399647384587384e-06, "loss": 0.5118, "step": 12750 }, { "epoch": 0.2420425661064532, "grad_norm": 1.3862864882243575, "learning_rate": 9.398073490751171e-06, "loss": 0.4965, "step": 12760 }, { "epoch": 0.2422322546378846, "grad_norm": 1.5984901521216721, "learning_rate": 9.396497668666456e-06, "loss": 0.5048, "step": 12770 }, { "epoch": 0.24242194316931598, "grad_norm": 2.0422376738999986, "learning_rate": 9.394919919024128e-06, "loss": 0.5073, "step": 12780 }, { "epoch": 0.24261163170074737, "grad_norm": 1.5955545614165851, "learning_rate": 9.393340242515919e-06, "loss": 0.4855, "step": 12790 }, { "epoch": 0.24280132023217876, "grad_norm": 1.5157374644958346, "learning_rate": 9.391758639834408e-06, "loss": 0.5167, "step": 12800 }, { "epoch": 0.24299100876361016, "grad_norm": 1.7193033083025835, "learning_rate": 9.390175111673019e-06, "loss": 0.4875, "step": 12810 }, { "epoch": 0.24318069729504155, "grad_norm": 1.219747993403934, "learning_rate": 9.388589658726015e-06, "loss": 0.4907, "step": 12820 }, { "epoch": 0.24337038582647294, "grad_norm": 1.4022353728636323, "learning_rate": 9.387002281688508e-06, "loss": 0.4879, "step": 12830 }, { "epoch": 0.24356007435790433, "grad_norm": 1.3692189571314308, "learning_rate": 9.385412981256454e-06, "loss": 0.4884, "step": 12840 }, { "epoch": 0.2437497628893357, "grad_norm": 1.4542538200506974, "learning_rate": 9.383821758126649e-06, "loss": 0.496, "step": 12850 }, { "epoch": 0.2439394514207671, "grad_norm": 1.3040276190250142, "learning_rate": 9.382228612996732e-06, "loss": 0.4885, "step": 12860 }, { "epoch": 0.24412913995219848, "grad_norm": 1.1504797018528232, "learning_rate": 9.380633546565187e-06, "loss": 0.4914, "step": 12870 }, { "epoch": 0.24431882848362987, "grad_norm": 1.5536212551868092, "learning_rate": 9.379036559531343e-06, "loss": 0.4776, "step": 12880 }, { "epoch": 0.24450851701506127, "grad_norm": 1.2801515960430778, "learning_rate": 9.37743765259536e-06, "loss": 0.498, "step": 12890 }, { "epoch": 0.24469820554649266, "grad_norm": 1.4292857617735484, "learning_rate": 9.375836826458255e-06, "loss": 0.482, "step": 12900 }, { "epoch": 0.24488789407792405, "grad_norm": 1.439786534253733, "learning_rate": 9.374234081821874e-06, "loss": 0.4995, "step": 12910 }, { "epoch": 0.24507758260935544, "grad_norm": 1.732656962262285, "learning_rate": 9.372629419388907e-06, "loss": 0.5163, "step": 12920 }, { "epoch": 0.24526727114078684, "grad_norm": 1.3711742936221267, "learning_rate": 9.371022839862894e-06, "loss": 0.4874, "step": 12930 }, { "epoch": 0.24545695967221823, "grad_norm": 1.538547827911795, "learning_rate": 9.3694143439482e-06, "loss": 0.4878, "step": 12940 }, { "epoch": 0.24564664820364962, "grad_norm": 1.6951291833577542, "learning_rate": 9.367803932350041e-06, "loss": 0.5148, "step": 12950 }, { "epoch": 0.24583633673508098, "grad_norm": 1.436591582464131, "learning_rate": 9.366191605774473e-06, "loss": 0.5175, "step": 12960 }, { "epoch": 0.24602602526651238, "grad_norm": 1.4209865225077154, "learning_rate": 9.364577364928386e-06, "loss": 0.4976, "step": 12970 }, { "epoch": 0.24621571379794377, "grad_norm": 1.2610176293656132, "learning_rate": 9.362961210519512e-06, "loss": 0.4766, "step": 12980 }, { "epoch": 0.24640540232937516, "grad_norm": 1.5604620302625567, "learning_rate": 9.361343143256423e-06, "loss": 0.5065, "step": 12990 }, { "epoch": 0.24659509086080655, "grad_norm": 1.488013768381261, "learning_rate": 9.35972316384853e-06, "loss": 0.5208, "step": 13000 }, { "epoch": 0.24678477939223795, "grad_norm": 2.1097903097922215, "learning_rate": 9.358101273006077e-06, "loss": 0.4839, "step": 13010 }, { "epoch": 0.24697446792366934, "grad_norm": 1.5975651164288274, "learning_rate": 9.356477471440152e-06, "loss": 0.53, "step": 13020 }, { "epoch": 0.24716415645510073, "grad_norm": 1.5363016907706717, "learning_rate": 9.354851759862683e-06, "loss": 0.4963, "step": 13030 }, { "epoch": 0.24735384498653212, "grad_norm": 1.4249086717866775, "learning_rate": 9.353224138986424e-06, "loss": 0.522, "step": 13040 }, { "epoch": 0.24754353351796352, "grad_norm": 1.4218020065696422, "learning_rate": 9.351594609524977e-06, "loss": 0.4887, "step": 13050 }, { "epoch": 0.24773322204939488, "grad_norm": 1.267805100144563, "learning_rate": 9.349963172192775e-06, "loss": 0.4942, "step": 13060 }, { "epoch": 0.24792291058082627, "grad_norm": 1.8941958178975389, "learning_rate": 9.348329827705091e-06, "loss": 0.5078, "step": 13070 }, { "epoch": 0.24811259911225766, "grad_norm": 1.2556620485972187, "learning_rate": 9.346694576778034e-06, "loss": 0.498, "step": 13080 }, { "epoch": 0.24830228764368906, "grad_norm": 1.253474783195086, "learning_rate": 9.345057420128547e-06, "loss": 0.4721, "step": 13090 }, { "epoch": 0.24849197617512045, "grad_norm": 1.4515005029372703, "learning_rate": 9.343418358474406e-06, "loss": 0.499, "step": 13100 }, { "epoch": 0.24868166470655184, "grad_norm": 1.174125850169572, "learning_rate": 9.341777392534227e-06, "loss": 0.5023, "step": 13110 }, { "epoch": 0.24887135323798323, "grad_norm": 1.6409360164464715, "learning_rate": 9.340134523027461e-06, "loss": 0.4845, "step": 13120 }, { "epoch": 0.24906104176941463, "grad_norm": 1.3990218845640021, "learning_rate": 9.338489750674391e-06, "loss": 0.481, "step": 13130 }, { "epoch": 0.24925073030084602, "grad_norm": 1.484282807431582, "learning_rate": 9.336843076196134e-06, "loss": 0.492, "step": 13140 }, { "epoch": 0.2494404188322774, "grad_norm": 1.5391268486622458, "learning_rate": 9.335194500314639e-06, "loss": 0.4963, "step": 13150 }, { "epoch": 0.2496301073637088, "grad_norm": 1.6756351103311136, "learning_rate": 9.333544023752698e-06, "loss": 0.4925, "step": 13160 }, { "epoch": 0.24981979589514017, "grad_norm": 1.3659670032003783, "learning_rate": 9.331891647233925e-06, "loss": 0.4948, "step": 13170 }, { "epoch": 0.25000948442657156, "grad_norm": 3.0781915570280205, "learning_rate": 9.330237371482773e-06, "loss": 0.4771, "step": 13180 }, { "epoch": 0.250199172958003, "grad_norm": 1.2485203375637348, "learning_rate": 9.328581197224526e-06, "loss": 0.5057, "step": 13190 }, { "epoch": 0.25038886148943434, "grad_norm": 1.753365581301381, "learning_rate": 9.326923125185303e-06, "loss": 0.523, "step": 13200 }, { "epoch": 0.25057855002086576, "grad_norm": 1.363724635937623, "learning_rate": 9.325263156092052e-06, "loss": 0.4982, "step": 13210 }, { "epoch": 0.25076823855229713, "grad_norm": 1.2465966531057349, "learning_rate": 9.323601290672554e-06, "loss": 0.4933, "step": 13220 }, { "epoch": 0.2509579270837285, "grad_norm": 1.3158036683444616, "learning_rate": 9.32193752965542e-06, "loss": 0.5241, "step": 13230 }, { "epoch": 0.2511476156151599, "grad_norm": 1.4433371414783724, "learning_rate": 9.320271873770093e-06, "loss": 0.5091, "step": 13240 }, { "epoch": 0.2513373041465913, "grad_norm": 1.6558744598854516, "learning_rate": 9.318604323746846e-06, "loss": 0.5158, "step": 13250 }, { "epoch": 0.2515269926780227, "grad_norm": 1.2724232898538987, "learning_rate": 9.316934880316789e-06, "loss": 0.5041, "step": 13260 }, { "epoch": 0.25171668120945406, "grad_norm": 1.5792413512506018, "learning_rate": 9.31526354421185e-06, "loss": 0.5264, "step": 13270 }, { "epoch": 0.2519063697408855, "grad_norm": 1.305620718958354, "learning_rate": 9.313590316164796e-06, "loss": 0.4749, "step": 13280 }, { "epoch": 0.25209605827231685, "grad_norm": 1.499820645016185, "learning_rate": 9.311915196909221e-06, "loss": 0.4876, "step": 13290 }, { "epoch": 0.25228574680374827, "grad_norm": 1.3884114210901102, "learning_rate": 9.310238187179548e-06, "loss": 0.4822, "step": 13300 }, { "epoch": 0.25247543533517963, "grad_norm": 1.3800072322616606, "learning_rate": 9.308559287711028e-06, "loss": 0.5133, "step": 13310 }, { "epoch": 0.25266512386661105, "grad_norm": 1.4938128615823516, "learning_rate": 9.306878499239742e-06, "loss": 0.5051, "step": 13320 }, { "epoch": 0.2528548123980424, "grad_norm": 1.2453093433141282, "learning_rate": 9.305195822502597e-06, "loss": 0.4843, "step": 13330 }, { "epoch": 0.2530445009294738, "grad_norm": 1.2006375046169686, "learning_rate": 9.303511258237332e-06, "loss": 0.476, "step": 13340 }, { "epoch": 0.2532341894609052, "grad_norm": 1.4378941890670092, "learning_rate": 9.301824807182509e-06, "loss": 0.4997, "step": 13350 }, { "epoch": 0.25342387799233657, "grad_norm": 1.650369377271437, "learning_rate": 9.300136470077522e-06, "loss": 0.5032, "step": 13360 }, { "epoch": 0.253613566523768, "grad_norm": 1.5800448765929616, "learning_rate": 9.298446247662584e-06, "loss": 0.4701, "step": 13370 }, { "epoch": 0.25380325505519935, "grad_norm": 1.49672566753785, "learning_rate": 9.296754140678743e-06, "loss": 0.4976, "step": 13380 }, { "epoch": 0.25399294358663077, "grad_norm": 1.3552038221600793, "learning_rate": 9.295060149867871e-06, "loss": 0.5084, "step": 13390 }, { "epoch": 0.25418263211806214, "grad_norm": 1.303550243663329, "learning_rate": 9.293364275972661e-06, "loss": 0.4724, "step": 13400 }, { "epoch": 0.25437232064949356, "grad_norm": 1.4119996499175542, "learning_rate": 9.29166651973664e-06, "loss": 0.5007, "step": 13410 }, { "epoch": 0.2545620091809249, "grad_norm": 1.3857298398621514, "learning_rate": 9.289966881904151e-06, "loss": 0.5102, "step": 13420 }, { "epoch": 0.2547516977123563, "grad_norm": 1.3527865219741584, "learning_rate": 9.288265363220372e-06, "loss": 0.4855, "step": 13430 }, { "epoch": 0.2549413862437877, "grad_norm": 1.3618120918372494, "learning_rate": 9.286561964431295e-06, "loss": 0.5118, "step": 13440 }, { "epoch": 0.25513107477521907, "grad_norm": 1.6413222054430328, "learning_rate": 9.284856686283745e-06, "loss": 0.5327, "step": 13450 }, { "epoch": 0.2553207633066505, "grad_norm": 1.803667678247114, "learning_rate": 9.283149529525366e-06, "loss": 0.5095, "step": 13460 }, { "epoch": 0.25551045183808185, "grad_norm": 1.6371191599710488, "learning_rate": 9.281440494904627e-06, "loss": 0.5024, "step": 13470 }, { "epoch": 0.2557001403695133, "grad_norm": 1.5594045328658317, "learning_rate": 9.279729583170822e-06, "loss": 0.5002, "step": 13480 }, { "epoch": 0.25588982890094464, "grad_norm": 1.3804950030608127, "learning_rate": 9.278016795074066e-06, "loss": 0.4863, "step": 13490 }, { "epoch": 0.25607951743237606, "grad_norm": 1.7159306059114532, "learning_rate": 9.276302131365296e-06, "loss": 0.4729, "step": 13500 }, { "epoch": 0.2562692059638074, "grad_norm": 1.301847840347257, "learning_rate": 9.274585592796274e-06, "loss": 0.4599, "step": 13510 }, { "epoch": 0.25645889449523884, "grad_norm": 1.4367807985855265, "learning_rate": 9.27286718011958e-06, "loss": 0.5051, "step": 13520 }, { "epoch": 0.2566485830266702, "grad_norm": 1.456783020062055, "learning_rate": 9.27114689408862e-06, "loss": 0.4785, "step": 13530 }, { "epoch": 0.25683827155810157, "grad_norm": 1.2700401232040313, "learning_rate": 9.269424735457622e-06, "loss": 0.4901, "step": 13540 }, { "epoch": 0.257027960089533, "grad_norm": 1.5298456032977867, "learning_rate": 9.267700704981627e-06, "loss": 0.4858, "step": 13550 }, { "epoch": 0.25721764862096436, "grad_norm": 1.4308900145489791, "learning_rate": 9.265974803416505e-06, "loss": 0.4784, "step": 13560 }, { "epoch": 0.2574073371523958, "grad_norm": 1.557555244525776, "learning_rate": 9.264247031518945e-06, "loss": 0.4906, "step": 13570 }, { "epoch": 0.25759702568382714, "grad_norm": 1.3203234308800145, "learning_rate": 9.262517390046451e-06, "loss": 0.4879, "step": 13580 }, { "epoch": 0.25778671421525856, "grad_norm": 1.4432809556100585, "learning_rate": 9.260785879757352e-06, "loss": 0.4767, "step": 13590 }, { "epoch": 0.2579764027466899, "grad_norm": 1.5768159235688202, "learning_rate": 9.259052501410795e-06, "loss": 0.5124, "step": 13600 }, { "epoch": 0.25816609127812135, "grad_norm": 1.547353940557032, "learning_rate": 9.257317255766745e-06, "loss": 0.5041, "step": 13610 }, { "epoch": 0.2583557798095527, "grad_norm": 1.484400217225698, "learning_rate": 9.255580143585985e-06, "loss": 0.511, "step": 13620 }, { "epoch": 0.25854546834098413, "grad_norm": 1.6107092596844155, "learning_rate": 9.25384116563012e-06, "loss": 0.4951, "step": 13630 }, { "epoch": 0.2587351568724155, "grad_norm": 1.2733608062429087, "learning_rate": 9.252100322661567e-06, "loss": 0.4991, "step": 13640 }, { "epoch": 0.25892484540384686, "grad_norm": 1.5501448090540795, "learning_rate": 9.25035761544357e-06, "loss": 0.4882, "step": 13650 }, { "epoch": 0.2591145339352783, "grad_norm": 1.4456154896321896, "learning_rate": 9.248613044740177e-06, "loss": 0.4781, "step": 13660 }, { "epoch": 0.25930422246670964, "grad_norm": 1.651926735649223, "learning_rate": 9.246866611316268e-06, "loss": 0.4906, "step": 13670 }, { "epoch": 0.25949391099814106, "grad_norm": 1.3300558248427556, "learning_rate": 9.245118315937528e-06, "loss": 0.5194, "step": 13680 }, { "epoch": 0.25968359952957243, "grad_norm": 1.4700884743242972, "learning_rate": 9.243368159370463e-06, "loss": 0.4858, "step": 13690 }, { "epoch": 0.25987328806100385, "grad_norm": 1.640582440394035, "learning_rate": 9.241616142382395e-06, "loss": 0.4942, "step": 13700 }, { "epoch": 0.2600629765924352, "grad_norm": 1.5101559693338127, "learning_rate": 9.239862265741461e-06, "loss": 0.4571, "step": 13710 }, { "epoch": 0.26025266512386663, "grad_norm": 1.2607838849634487, "learning_rate": 9.238106530216615e-06, "loss": 0.4675, "step": 13720 }, { "epoch": 0.260442353655298, "grad_norm": 1.4698038048620392, "learning_rate": 9.236348936577623e-06, "loss": 0.4752, "step": 13730 }, { "epoch": 0.2606320421867294, "grad_norm": 1.4641614277857458, "learning_rate": 9.234589485595067e-06, "loss": 0.4613, "step": 13740 }, { "epoch": 0.2608217307181608, "grad_norm": 1.4960167385540626, "learning_rate": 9.232828178040344e-06, "loss": 0.4893, "step": 13750 }, { "epoch": 0.26101141924959215, "grad_norm": 1.6904492615221696, "learning_rate": 9.231065014685667e-06, "loss": 0.4891, "step": 13760 }, { "epoch": 0.26120110778102357, "grad_norm": 1.7021499784817602, "learning_rate": 9.229299996304058e-06, "loss": 0.491, "step": 13770 }, { "epoch": 0.26139079631245493, "grad_norm": 1.3162784090116193, "learning_rate": 9.227533123669353e-06, "loss": 0.4731, "step": 13780 }, { "epoch": 0.26158048484388635, "grad_norm": 1.6422386512480598, "learning_rate": 9.225764397556204e-06, "loss": 0.5077, "step": 13790 }, { "epoch": 0.2617701733753177, "grad_norm": 1.527935897820861, "learning_rate": 9.223993818740074e-06, "loss": 0.4934, "step": 13800 }, { "epoch": 0.26195986190674914, "grad_norm": 1.5949465001038747, "learning_rate": 9.222221387997238e-06, "loss": 0.4731, "step": 13810 }, { "epoch": 0.2621495504381805, "grad_norm": 1.3139060550448622, "learning_rate": 9.220447106104784e-06, "loss": 0.4947, "step": 13820 }, { "epoch": 0.2623392389696119, "grad_norm": 1.3153335951093557, "learning_rate": 9.21867097384061e-06, "loss": 0.4668, "step": 13830 }, { "epoch": 0.2625289275010433, "grad_norm": 1.3482802281433128, "learning_rate": 9.216892991983427e-06, "loss": 0.4743, "step": 13840 }, { "epoch": 0.26271861603247465, "grad_norm": 1.5803986392691158, "learning_rate": 9.215113161312754e-06, "loss": 0.4958, "step": 13850 }, { "epoch": 0.26290830456390607, "grad_norm": 1.3103186416371169, "learning_rate": 9.213331482608926e-06, "loss": 0.4691, "step": 13860 }, { "epoch": 0.26309799309533743, "grad_norm": 1.7507489964423144, "learning_rate": 9.211547956653083e-06, "loss": 0.491, "step": 13870 }, { "epoch": 0.26328768162676885, "grad_norm": 1.4361698922743211, "learning_rate": 9.209762584227176e-06, "loss": 0.4936, "step": 13880 }, { "epoch": 0.2634773701582002, "grad_norm": 1.3965632812074555, "learning_rate": 9.207975366113968e-06, "loss": 0.5056, "step": 13890 }, { "epoch": 0.26366705868963164, "grad_norm": 1.5358804326847726, "learning_rate": 9.206186303097028e-06, "loss": 0.499, "step": 13900 }, { "epoch": 0.263856747221063, "grad_norm": 1.3917722099643302, "learning_rate": 9.204395395960736e-06, "loss": 0.4547, "step": 13910 }, { "epoch": 0.2640464357524944, "grad_norm": 1.3288147910044632, "learning_rate": 9.20260264549028e-06, "loss": 0.5052, "step": 13920 }, { "epoch": 0.2642361242839258, "grad_norm": 1.4421133359384672, "learning_rate": 9.200808052471655e-06, "loss": 0.4909, "step": 13930 }, { "epoch": 0.2644258128153572, "grad_norm": 1.4114798943963847, "learning_rate": 9.199011617691666e-06, "loss": 0.4939, "step": 13940 }, { "epoch": 0.2646155013467886, "grad_norm": 2.2100581035664173, "learning_rate": 9.197213341937925e-06, "loss": 0.4856, "step": 13950 }, { "epoch": 0.26480518987821994, "grad_norm": 1.5448259665021071, "learning_rate": 9.19541322599885e-06, "loss": 0.4876, "step": 13960 }, { "epoch": 0.26499487840965136, "grad_norm": 1.4655466883726154, "learning_rate": 9.193611270663666e-06, "loss": 0.5041, "step": 13970 }, { "epoch": 0.2651845669410827, "grad_norm": 1.3756276251565782, "learning_rate": 9.191807476722402e-06, "loss": 0.4927, "step": 13980 }, { "epoch": 0.26537425547251414, "grad_norm": 1.556897978538492, "learning_rate": 9.190001844965902e-06, "loss": 0.5247, "step": 13990 }, { "epoch": 0.2655639440039455, "grad_norm": 1.4198274128570492, "learning_rate": 9.188194376185804e-06, "loss": 0.5116, "step": 14000 }, { "epoch": 0.2657536325353769, "grad_norm": 1.5552649048166245, "learning_rate": 9.18638507117456e-06, "loss": 0.4934, "step": 14010 }, { "epoch": 0.2659433210668083, "grad_norm": 1.5223531502728243, "learning_rate": 9.184573930725423e-06, "loss": 0.468, "step": 14020 }, { "epoch": 0.2661330095982397, "grad_norm": 1.3910797704043683, "learning_rate": 9.182760955632453e-06, "loss": 0.5024, "step": 14030 }, { "epoch": 0.2663226981296711, "grad_norm": 1.5558009888781907, "learning_rate": 9.18094614669051e-06, "loss": 0.4978, "step": 14040 }, { "epoch": 0.2665123866611025, "grad_norm": 1.3350245684726332, "learning_rate": 9.179129504695265e-06, "loss": 0.502, "step": 14050 }, { "epoch": 0.26670207519253386, "grad_norm": 1.4010484962174594, "learning_rate": 9.177311030443185e-06, "loss": 0.4897, "step": 14060 }, { "epoch": 0.2668917637239652, "grad_norm": 1.7193080325194001, "learning_rate": 9.175490724731549e-06, "loss": 0.5147, "step": 14070 }, { "epoch": 0.26708145225539665, "grad_norm": 1.4339671908790979, "learning_rate": 9.17366858835843e-06, "loss": 0.4682, "step": 14080 }, { "epoch": 0.267271140786828, "grad_norm": 3.3960523776252387, "learning_rate": 9.171844622122709e-06, "loss": 0.4632, "step": 14090 }, { "epoch": 0.26746082931825943, "grad_norm": 1.5171985102344585, "learning_rate": 9.170018826824069e-06, "loss": 0.5106, "step": 14100 }, { "epoch": 0.2676505178496908, "grad_norm": 1.4634593687237074, "learning_rate": 9.168191203262993e-06, "loss": 0.4974, "step": 14110 }, { "epoch": 0.2678402063811222, "grad_norm": 1.5865930633177032, "learning_rate": 9.166361752240764e-06, "loss": 0.4668, "step": 14120 }, { "epoch": 0.2680298949125536, "grad_norm": 1.7016278927673265, "learning_rate": 9.164530474559474e-06, "loss": 0.5, "step": 14130 }, { "epoch": 0.268219583443985, "grad_norm": 1.5580879153966447, "learning_rate": 9.162697371022006e-06, "loss": 0.5044, "step": 14140 }, { "epoch": 0.26840927197541636, "grad_norm": 1.5473851219514188, "learning_rate": 9.160862442432051e-06, "loss": 0.501, "step": 14150 }, { "epoch": 0.2685989605068478, "grad_norm": 1.4037489092140991, "learning_rate": 9.159025689594095e-06, "loss": 0.4682, "step": 14160 }, { "epoch": 0.26878864903827915, "grad_norm": 1.2169099052727639, "learning_rate": 9.157187113313425e-06, "loss": 0.4781, "step": 14170 }, { "epoch": 0.2689783375697105, "grad_norm": 1.7086661414013915, "learning_rate": 9.155346714396134e-06, "loss": 0.501, "step": 14180 }, { "epoch": 0.26916802610114193, "grad_norm": 2.033601860515618, "learning_rate": 9.153504493649103e-06, "loss": 0.5121, "step": 14190 }, { "epoch": 0.2693577146325733, "grad_norm": 1.4676952012453286, "learning_rate": 9.15166045188002e-06, "loss": 0.4906, "step": 14200 }, { "epoch": 0.2695474031640047, "grad_norm": 1.7849961866242892, "learning_rate": 9.149814589897368e-06, "loss": 0.5113, "step": 14210 }, { "epoch": 0.2697370916954361, "grad_norm": 1.5449348895146724, "learning_rate": 9.147966908510429e-06, "loss": 0.457, "step": 14220 }, { "epoch": 0.2699267802268675, "grad_norm": 1.5528594415468997, "learning_rate": 9.146117408529283e-06, "loss": 0.499, "step": 14230 }, { "epoch": 0.27011646875829887, "grad_norm": 1.392017558204682, "learning_rate": 9.144266090764803e-06, "loss": 0.4444, "step": 14240 }, { "epoch": 0.2703061572897303, "grad_norm": 1.5648684435374838, "learning_rate": 9.14241295602867e-06, "loss": 0.4619, "step": 14250 }, { "epoch": 0.27049584582116165, "grad_norm": 1.249561219183363, "learning_rate": 9.140558005133347e-06, "loss": 0.4992, "step": 14260 }, { "epoch": 0.270685534352593, "grad_norm": 1.517277827118045, "learning_rate": 9.138701238892104e-06, "loss": 0.4875, "step": 14270 }, { "epoch": 0.27087522288402444, "grad_norm": 1.3659045339905513, "learning_rate": 9.136842658119005e-06, "loss": 0.5013, "step": 14280 }, { "epoch": 0.2710649114154558, "grad_norm": 1.5257717043809436, "learning_rate": 9.134982263628904e-06, "loss": 0.4813, "step": 14290 }, { "epoch": 0.2712545999468872, "grad_norm": 1.5243720052351462, "learning_rate": 9.133120056237457e-06, "loss": 0.4977, "step": 14300 }, { "epoch": 0.2714442884783186, "grad_norm": 1.2362642338780547, "learning_rate": 9.131256036761111e-06, "loss": 0.4799, "step": 14310 }, { "epoch": 0.27163397700975, "grad_norm": 1.3720965109248395, "learning_rate": 9.12939020601711e-06, "loss": 0.5009, "step": 14320 }, { "epoch": 0.27182366554118137, "grad_norm": 2.119020437582983, "learning_rate": 9.12752256482349e-06, "loss": 0.469, "step": 14330 }, { "epoch": 0.2720133540726128, "grad_norm": 1.1586107010061564, "learning_rate": 9.12565311399908e-06, "loss": 0.4795, "step": 14340 }, { "epoch": 0.27220304260404415, "grad_norm": 1.469655383814442, "learning_rate": 9.123781854363508e-06, "loss": 0.4823, "step": 14350 }, { "epoch": 0.2723927311354756, "grad_norm": 2.1695454070326705, "learning_rate": 9.121908786737183e-06, "loss": 0.4829, "step": 14360 }, { "epoch": 0.27258241966690694, "grad_norm": 1.6729438389616147, "learning_rate": 9.120033911941324e-06, "loss": 0.4723, "step": 14370 }, { "epoch": 0.2727721081983383, "grad_norm": 1.506122189364878, "learning_rate": 9.118157230797926e-06, "loss": 0.5284, "step": 14380 }, { "epoch": 0.2729617967297697, "grad_norm": 1.543364795291856, "learning_rate": 9.116278744129783e-06, "loss": 0.4476, "step": 14390 }, { "epoch": 0.2731514852612011, "grad_norm": 1.6187831455590365, "learning_rate": 9.114398452760484e-06, "loss": 0.4977, "step": 14400 }, { "epoch": 0.2733411737926325, "grad_norm": 2.326599165463778, "learning_rate": 9.112516357514404e-06, "loss": 0.4747, "step": 14410 }, { "epoch": 0.2735308623240639, "grad_norm": 1.5991186681501932, "learning_rate": 9.110632459216713e-06, "loss": 0.4688, "step": 14420 }, { "epoch": 0.2737205508554953, "grad_norm": 1.5760562205481905, "learning_rate": 9.108746758693363e-06, "loss": 0.4976, "step": 14430 }, { "epoch": 0.27391023938692666, "grad_norm": 1.3413371431453145, "learning_rate": 9.106859256771108e-06, "loss": 0.4971, "step": 14440 }, { "epoch": 0.2740999279183581, "grad_norm": 1.6085545873852285, "learning_rate": 9.104969954277483e-06, "loss": 0.503, "step": 14450 }, { "epoch": 0.27428961644978944, "grad_norm": 1.45142208741508, "learning_rate": 9.103078852040815e-06, "loss": 0.4989, "step": 14460 }, { "epoch": 0.27447930498122086, "grad_norm": 1.7255135458192086, "learning_rate": 9.101185950890223e-06, "loss": 0.4811, "step": 14470 }, { "epoch": 0.2746689935126522, "grad_norm": 1.560445081874364, "learning_rate": 9.09929125165561e-06, "loss": 0.4813, "step": 14480 }, { "epoch": 0.2748586820440836, "grad_norm": 1.377215535165055, "learning_rate": 9.09739475516767e-06, "loss": 0.5064, "step": 14490 }, { "epoch": 0.275048370575515, "grad_norm": 1.4541004226384917, "learning_rate": 9.095496462257884e-06, "loss": 0.5174, "step": 14500 }, { "epoch": 0.2752380591069464, "grad_norm": 1.2775145327912232, "learning_rate": 9.093596373758522e-06, "loss": 0.5079, "step": 14510 }, { "epoch": 0.2754277476383778, "grad_norm": 1.3348337684122435, "learning_rate": 9.09169449050264e-06, "loss": 0.5041, "step": 14520 }, { "epoch": 0.27561743616980916, "grad_norm": 1.327298392024903, "learning_rate": 9.08979081332408e-06, "loss": 0.4771, "step": 14530 }, { "epoch": 0.2758071247012406, "grad_norm": 1.6039915068175794, "learning_rate": 9.087885343057473e-06, "loss": 0.4816, "step": 14540 }, { "epoch": 0.27599681323267194, "grad_norm": 1.5199786262698003, "learning_rate": 9.085978080538236e-06, "loss": 0.4856, "step": 14550 }, { "epoch": 0.27618650176410336, "grad_norm": 1.2527422294168775, "learning_rate": 9.084069026602569e-06, "loss": 0.4738, "step": 14560 }, { "epoch": 0.27637619029553473, "grad_norm": 1.3918992114087625, "learning_rate": 9.08215818208746e-06, "loss": 0.4883, "step": 14570 }, { "epoch": 0.2765658788269661, "grad_norm": 1.4328206010544093, "learning_rate": 9.080245547830678e-06, "loss": 0.4819, "step": 14580 }, { "epoch": 0.2767555673583975, "grad_norm": 1.51318662183964, "learning_rate": 9.078331124670782e-06, "loss": 0.4743, "step": 14590 }, { "epoch": 0.2769452558898289, "grad_norm": 1.8252041156731054, "learning_rate": 9.076414913447113e-06, "loss": 0.4915, "step": 14600 }, { "epoch": 0.2771349444212603, "grad_norm": 1.3892873473372884, "learning_rate": 9.0744969149998e-06, "loss": 0.4746, "step": 14610 }, { "epoch": 0.27732463295269166, "grad_norm": 1.5974236288905472, "learning_rate": 9.072577130169742e-06, "loss": 0.5058, "step": 14620 }, { "epoch": 0.2775143214841231, "grad_norm": 1.4719926543882373, "learning_rate": 9.070655559798638e-06, "loss": 0.4891, "step": 14630 }, { "epoch": 0.27770401001555445, "grad_norm": 1.3418215862152183, "learning_rate": 9.068732204728962e-06, "loss": 0.4991, "step": 14640 }, { "epoch": 0.27789369854698587, "grad_norm": 1.557667548993602, "learning_rate": 9.06680706580397e-06, "loss": 0.4904, "step": 14650 }, { "epoch": 0.27808338707841723, "grad_norm": 1.3931329406605635, "learning_rate": 9.064880143867701e-06, "loss": 0.4961, "step": 14660 }, { "epoch": 0.27827307560984865, "grad_norm": 1.0005357816966531, "learning_rate": 9.062951439764975e-06, "loss": 0.4853, "step": 14670 }, { "epoch": 0.27846276414128, "grad_norm": 1.4380784031593605, "learning_rate": 9.061020954341398e-06, "loss": 0.4843, "step": 14680 }, { "epoch": 0.2786524526727114, "grad_norm": 1.6137459851466796, "learning_rate": 9.059088688443348e-06, "loss": 0.4939, "step": 14690 }, { "epoch": 0.2788421412041428, "grad_norm": 1.3813302182507685, "learning_rate": 9.057154642917991e-06, "loss": 0.485, "step": 14700 }, { "epoch": 0.27903182973557417, "grad_norm": 1.4635506860096232, "learning_rate": 9.055218818613271e-06, "loss": 0.4969, "step": 14710 }, { "epoch": 0.2792215182670056, "grad_norm": 1.436525954236254, "learning_rate": 9.053281216377914e-06, "loss": 0.5165, "step": 14720 }, { "epoch": 0.27941120679843695, "grad_norm": 1.3483517934216336, "learning_rate": 9.051341837061423e-06, "loss": 0.4915, "step": 14730 }, { "epoch": 0.27960089532986837, "grad_norm": 1.2690559871046054, "learning_rate": 9.049400681514077e-06, "loss": 0.486, "step": 14740 }, { "epoch": 0.27979058386129974, "grad_norm": 1.4940010146600802, "learning_rate": 9.04745775058694e-06, "loss": 0.4891, "step": 14750 }, { "epoch": 0.27998027239273116, "grad_norm": 1.7573872739976972, "learning_rate": 9.045513045131852e-06, "loss": 0.4877, "step": 14760 }, { "epoch": 0.2801699609241625, "grad_norm": 1.6645446960763242, "learning_rate": 9.04356656600143e-06, "loss": 0.4762, "step": 14770 }, { "epoch": 0.28035964945559394, "grad_norm": 1.4687621468057785, "learning_rate": 9.04161831404907e-06, "loss": 0.4633, "step": 14780 }, { "epoch": 0.2805493379870253, "grad_norm": 1.6149548135553842, "learning_rate": 9.039668290128942e-06, "loss": 0.5129, "step": 14790 }, { "epoch": 0.28073902651845667, "grad_norm": 1.67088851023797, "learning_rate": 9.037716495096e-06, "loss": 0.5088, "step": 14800 }, { "epoch": 0.2809287150498881, "grad_norm": 1.440726973900628, "learning_rate": 9.035762929805968e-06, "loss": 0.4925, "step": 14810 }, { "epoch": 0.28111840358131945, "grad_norm": 1.6876892824753416, "learning_rate": 9.033807595115344e-06, "loss": 0.4614, "step": 14820 }, { "epoch": 0.2813080921127509, "grad_norm": 1.373488498235833, "learning_rate": 9.031850491881413e-06, "loss": 0.4975, "step": 14830 }, { "epoch": 0.28149778064418224, "grad_norm": 1.1918525798537418, "learning_rate": 9.029891620962224e-06, "loss": 0.4868, "step": 14840 }, { "epoch": 0.28168746917561366, "grad_norm": 1.376782376172575, "learning_rate": 9.027930983216606e-06, "loss": 0.5023, "step": 14850 }, { "epoch": 0.281877157707045, "grad_norm": 1.5647646020474142, "learning_rate": 9.025968579504163e-06, "loss": 0.4908, "step": 14860 }, { "epoch": 0.28206684623847644, "grad_norm": 1.9898740013858127, "learning_rate": 9.02400441068527e-06, "loss": 0.4919, "step": 14870 }, { "epoch": 0.2822565347699078, "grad_norm": 1.5680477892561127, "learning_rate": 9.022038477621081e-06, "loss": 0.4966, "step": 14880 }, { "epoch": 0.2824462233013392, "grad_norm": 1.5786958168043383, "learning_rate": 9.020070781173515e-06, "loss": 0.4909, "step": 14890 }, { "epoch": 0.2826359118327706, "grad_norm": 1.5047836452601746, "learning_rate": 9.018101322205277e-06, "loss": 0.5129, "step": 14900 }, { "epoch": 0.28282560036420196, "grad_norm": 1.8494495671472966, "learning_rate": 9.016130101579834e-06, "loss": 0.5046, "step": 14910 }, { "epoch": 0.2830152888956334, "grad_norm": 1.175023454659598, "learning_rate": 9.014157120161427e-06, "loss": 0.5074, "step": 14920 }, { "epoch": 0.28320497742706474, "grad_norm": 1.394320637963658, "learning_rate": 9.012182378815073e-06, "loss": 0.4874, "step": 14930 }, { "epoch": 0.28339466595849616, "grad_norm": 1.631743864812664, "learning_rate": 9.010205878406559e-06, "loss": 0.491, "step": 14940 }, { "epoch": 0.2835843544899275, "grad_norm": 1.6421128022533498, "learning_rate": 9.008227619802439e-06, "loss": 0.4903, "step": 14950 }, { "epoch": 0.28377404302135895, "grad_norm": 1.4254450952647353, "learning_rate": 9.006247603870046e-06, "loss": 0.4797, "step": 14960 }, { "epoch": 0.2839637315527903, "grad_norm": 1.478590928669197, "learning_rate": 9.004265831477477e-06, "loss": 0.4509, "step": 14970 }, { "epoch": 0.28415342008422173, "grad_norm": 1.3293038680611238, "learning_rate": 9.002282303493598e-06, "loss": 0.4749, "step": 14980 }, { "epoch": 0.2843431086156531, "grad_norm": 1.6935379509500634, "learning_rate": 9.000297020788053e-06, "loss": 0.5094, "step": 14990 }, { "epoch": 0.28453279714708446, "grad_norm": 1.476176273149372, "learning_rate": 8.998309984231246e-06, "loss": 0.4735, "step": 15000 }, { "epoch": 0.2847224856785159, "grad_norm": 1.4575944770159732, "learning_rate": 8.996321194694356e-06, "loss": 0.4904, "step": 15010 }, { "epoch": 0.28491217420994724, "grad_norm": 1.5259352134132254, "learning_rate": 8.994330653049328e-06, "loss": 0.4817, "step": 15020 }, { "epoch": 0.28510186274137866, "grad_norm": 1.3170339311273944, "learning_rate": 8.992338360168876e-06, "loss": 0.4708, "step": 15030 }, { "epoch": 0.28529155127281003, "grad_norm": 1.2513958619658776, "learning_rate": 8.99034431692648e-06, "loss": 0.4743, "step": 15040 }, { "epoch": 0.28548123980424145, "grad_norm": 1.4532016784761863, "learning_rate": 8.988348524196392e-06, "loss": 0.4861, "step": 15050 }, { "epoch": 0.2856709283356728, "grad_norm": 1.6605211763427856, "learning_rate": 8.986350982853622e-06, "loss": 0.4749, "step": 15060 }, { "epoch": 0.28586061686710423, "grad_norm": 1.482491771333399, "learning_rate": 8.98435169377396e-06, "loss": 0.4848, "step": 15070 }, { "epoch": 0.2860503053985356, "grad_norm": 1.551502502520213, "learning_rate": 8.98235065783395e-06, "loss": 0.4584, "step": 15080 }, { "epoch": 0.286239993929967, "grad_norm": 1.2674968816614642, "learning_rate": 8.980347875910907e-06, "loss": 0.453, "step": 15090 }, { "epoch": 0.2864296824613984, "grad_norm": 1.484521024382017, "learning_rate": 8.978343348882914e-06, "loss": 0.4984, "step": 15100 }, { "epoch": 0.28661937099282975, "grad_norm": 1.9316671964140257, "learning_rate": 8.976337077628813e-06, "loss": 0.5018, "step": 15110 }, { "epoch": 0.28680905952426117, "grad_norm": 1.3810063732283355, "learning_rate": 8.974329063028215e-06, "loss": 0.5128, "step": 15120 }, { "epoch": 0.28699874805569253, "grad_norm": 1.5592148026954114, "learning_rate": 8.972319305961498e-06, "loss": 0.5043, "step": 15130 }, { "epoch": 0.28718843658712395, "grad_norm": 1.4767706851907432, "learning_rate": 8.970307807309794e-06, "loss": 0.4764, "step": 15140 }, { "epoch": 0.2873781251185553, "grad_norm": 1.432233679358938, "learning_rate": 8.968294567955009e-06, "loss": 0.4955, "step": 15150 }, { "epoch": 0.28756781364998674, "grad_norm": 1.7677026605244504, "learning_rate": 8.966279588779808e-06, "loss": 0.4691, "step": 15160 }, { "epoch": 0.2877575021814181, "grad_norm": 2.0752623333719304, "learning_rate": 8.964262870667617e-06, "loss": 0.4994, "step": 15170 }, { "epoch": 0.2879471907128495, "grad_norm": 1.4337102997983782, "learning_rate": 8.962244414502628e-06, "loss": 0.4834, "step": 15180 }, { "epoch": 0.2881368792442809, "grad_norm": 1.2964702242249624, "learning_rate": 8.960224221169793e-06, "loss": 0.4639, "step": 15190 }, { "epoch": 0.2883265677757123, "grad_norm": 1.4328367873719774, "learning_rate": 8.958202291554823e-06, "loss": 0.4882, "step": 15200 }, { "epoch": 0.28851625630714367, "grad_norm": 1.4115936532621363, "learning_rate": 8.956178626544197e-06, "loss": 0.4723, "step": 15210 }, { "epoch": 0.28870594483857503, "grad_norm": 1.6196537365350379, "learning_rate": 8.95415322702515e-06, "loss": 0.5167, "step": 15220 }, { "epoch": 0.28889563337000645, "grad_norm": 1.3468495655897803, "learning_rate": 8.952126093885677e-06, "loss": 0.4957, "step": 15230 }, { "epoch": 0.2890853219014378, "grad_norm": 1.0727138695762772, "learning_rate": 8.950097228014537e-06, "loss": 0.4911, "step": 15240 }, { "epoch": 0.28927501043286924, "grad_norm": 1.3004347144999553, "learning_rate": 8.948066630301242e-06, "loss": 0.453, "step": 15250 }, { "epoch": 0.2894646989643006, "grad_norm": 1.4134267835975216, "learning_rate": 8.946034301636071e-06, "loss": 0.4856, "step": 15260 }, { "epoch": 0.289654387495732, "grad_norm": 1.3798709111666936, "learning_rate": 8.944000242910058e-06, "loss": 0.514, "step": 15270 }, { "epoch": 0.2898440760271634, "grad_norm": 1.3055031844697658, "learning_rate": 8.941964455014995e-06, "loss": 0.4871, "step": 15280 }, { "epoch": 0.2900337645585948, "grad_norm": 1.4097906097774904, "learning_rate": 8.939926938843433e-06, "loss": 0.4924, "step": 15290 }, { "epoch": 0.2902234530900262, "grad_norm": 1.5156912608773672, "learning_rate": 8.937887695288681e-06, "loss": 0.4925, "step": 15300 }, { "epoch": 0.2904131416214576, "grad_norm": 1.343526673392391, "learning_rate": 8.935846725244806e-06, "loss": 0.4825, "step": 15310 }, { "epoch": 0.29060283015288896, "grad_norm": 1.3879955149216812, "learning_rate": 8.933804029606629e-06, "loss": 0.4535, "step": 15320 }, { "epoch": 0.2907925186843203, "grad_norm": 1.544495567124889, "learning_rate": 8.931759609269733e-06, "loss": 0.5019, "step": 15330 }, { "epoch": 0.29098220721575174, "grad_norm": 1.5875288123837472, "learning_rate": 8.92971346513045e-06, "loss": 0.4886, "step": 15340 }, { "epoch": 0.2911718957471831, "grad_norm": 1.5563435141710824, "learning_rate": 8.927665598085873e-06, "loss": 0.5059, "step": 15350 }, { "epoch": 0.2913615842786145, "grad_norm": 1.4833293401552108, "learning_rate": 8.925616009033849e-06, "loss": 0.4617, "step": 15360 }, { "epoch": 0.2915512728100459, "grad_norm": 1.3383310454031776, "learning_rate": 8.92356469887298e-06, "loss": 0.4785, "step": 15370 }, { "epoch": 0.2917409613414773, "grad_norm": 1.2697458474224952, "learning_rate": 8.921511668502623e-06, "loss": 0.4927, "step": 15380 }, { "epoch": 0.2919306498729087, "grad_norm": 1.355685797028599, "learning_rate": 8.919456918822887e-06, "loss": 0.4551, "step": 15390 }, { "epoch": 0.2921203384043401, "grad_norm": 1.3847711987003888, "learning_rate": 8.917400450734636e-06, "loss": 0.4705, "step": 15400 }, { "epoch": 0.29231002693577146, "grad_norm": 1.674373933005323, "learning_rate": 8.91534226513949e-06, "loss": 0.5038, "step": 15410 }, { "epoch": 0.2924997154672028, "grad_norm": 1.6341042310554181, "learning_rate": 8.91328236293982e-06, "loss": 0.4636, "step": 15420 }, { "epoch": 0.29268940399863425, "grad_norm": 1.4710176395239531, "learning_rate": 8.911220745038747e-06, "loss": 0.4957, "step": 15430 }, { "epoch": 0.2928790925300656, "grad_norm": 1.7123003736685334, "learning_rate": 8.90915741234015e-06, "loss": 0.5022, "step": 15440 }, { "epoch": 0.29306878106149703, "grad_norm": 1.6867624413333222, "learning_rate": 8.907092365748653e-06, "loss": 0.5098, "step": 15450 }, { "epoch": 0.2932584695929284, "grad_norm": 1.480963172793533, "learning_rate": 8.905025606169637e-06, "loss": 0.4804, "step": 15460 }, { "epoch": 0.2934481581243598, "grad_norm": 1.4906801662207416, "learning_rate": 8.902957134509232e-06, "loss": 0.4733, "step": 15470 }, { "epoch": 0.2936378466557912, "grad_norm": 1.4367054671547133, "learning_rate": 8.900886951674317e-06, "loss": 0.4652, "step": 15480 }, { "epoch": 0.2938275351872226, "grad_norm": 1.4497160879949649, "learning_rate": 8.898815058572524e-06, "loss": 0.4612, "step": 15490 }, { "epoch": 0.29401722371865396, "grad_norm": 1.399430658436859, "learning_rate": 8.896741456112234e-06, "loss": 0.4846, "step": 15500 }, { "epoch": 0.2942069122500854, "grad_norm": 1.4392649327035678, "learning_rate": 8.894666145202577e-06, "loss": 0.4825, "step": 15510 }, { "epoch": 0.29439660078151675, "grad_norm": 1.37434734798085, "learning_rate": 8.89258912675343e-06, "loss": 0.4865, "step": 15520 }, { "epoch": 0.2945862893129481, "grad_norm": 2.0700152527007893, "learning_rate": 8.890510401675422e-06, "loss": 0.4969, "step": 15530 }, { "epoch": 0.29477597784437953, "grad_norm": 1.2809216791873292, "learning_rate": 8.888429970879932e-06, "loss": 0.4937, "step": 15540 }, { "epoch": 0.2949656663758109, "grad_norm": 2.2569308930161944, "learning_rate": 8.88634783527908e-06, "loss": 0.4979, "step": 15550 }, { "epoch": 0.2951553549072423, "grad_norm": 1.6667748662085116, "learning_rate": 8.884263995785737e-06, "loss": 0.4912, "step": 15560 }, { "epoch": 0.2953450434386737, "grad_norm": 1.3489269515978068, "learning_rate": 8.882178453313523e-06, "loss": 0.4547, "step": 15570 }, { "epoch": 0.2955347319701051, "grad_norm": 1.4120806301680444, "learning_rate": 8.880091208776805e-06, "loss": 0.4593, "step": 15580 }, { "epoch": 0.29572442050153647, "grad_norm": 1.574911877140649, "learning_rate": 8.878002263090689e-06, "loss": 0.4972, "step": 15590 }, { "epoch": 0.2959141090329679, "grad_norm": 1.3854453391528578, "learning_rate": 8.875911617171036e-06, "loss": 0.4954, "step": 15600 }, { "epoch": 0.29610379756439925, "grad_norm": 1.3064658004723286, "learning_rate": 8.873819271934447e-06, "loss": 0.4852, "step": 15610 }, { "epoch": 0.29629348609583067, "grad_norm": 1.4786643095587741, "learning_rate": 8.87172522829827e-06, "loss": 0.4823, "step": 15620 }, { "epoch": 0.29648317462726204, "grad_norm": 1.5074070782066589, "learning_rate": 8.869629487180595e-06, "loss": 0.5116, "step": 15630 }, { "epoch": 0.2966728631586934, "grad_norm": 1.6256738390560452, "learning_rate": 8.867532049500262e-06, "loss": 0.4592, "step": 15640 }, { "epoch": 0.2968625516901248, "grad_norm": 1.3060584647772344, "learning_rate": 8.865432916176849e-06, "loss": 0.4901, "step": 15650 }, { "epoch": 0.2970522402215562, "grad_norm": 6.636305369337919, "learning_rate": 8.863332088130681e-06, "loss": 0.4942, "step": 15660 }, { "epoch": 0.2972419287529876, "grad_norm": 1.6754466734566185, "learning_rate": 8.86122956628282e-06, "loss": 0.5035, "step": 15670 }, { "epoch": 0.29743161728441897, "grad_norm": 1.543552469484648, "learning_rate": 8.85912535155508e-06, "loss": 0.5212, "step": 15680 }, { "epoch": 0.2976213058158504, "grad_norm": 1.3906751477109023, "learning_rate": 8.857019444870012e-06, "loss": 0.4679, "step": 15690 }, { "epoch": 0.29781099434728175, "grad_norm": 1.3459609834976676, "learning_rate": 8.854911847150909e-06, "loss": 0.5062, "step": 15700 }, { "epoch": 0.2980006828787132, "grad_norm": 1.4567987274863858, "learning_rate": 8.852802559321803e-06, "loss": 0.5093, "step": 15710 }, { "epoch": 0.29819037141014454, "grad_norm": 1.4459321940407854, "learning_rate": 8.850691582307472e-06, "loss": 0.4747, "step": 15720 }, { "epoch": 0.29838005994157596, "grad_norm": 1.5770060316927237, "learning_rate": 8.84857891703343e-06, "loss": 0.501, "step": 15730 }, { "epoch": 0.2985697484730073, "grad_norm": 1.495479795235324, "learning_rate": 8.846464564425938e-06, "loss": 0.4847, "step": 15740 }, { "epoch": 0.2987594370044387, "grad_norm": 1.4315932504455025, "learning_rate": 8.844348525411988e-06, "loss": 0.4816, "step": 15750 }, { "epoch": 0.2989491255358701, "grad_norm": 1.706483994345748, "learning_rate": 8.842230800919315e-06, "loss": 0.4547, "step": 15760 }, { "epoch": 0.2991388140673015, "grad_norm": 1.1974984942541023, "learning_rate": 8.840111391876395e-06, "loss": 0.4979, "step": 15770 }, { "epoch": 0.2993285025987329, "grad_norm": 1.299993403140954, "learning_rate": 8.837990299212444e-06, "loss": 0.4462, "step": 15780 }, { "epoch": 0.29951819113016426, "grad_norm": 1.5285281640061719, "learning_rate": 8.835867523857409e-06, "loss": 0.4812, "step": 15790 }, { "epoch": 0.2997078796615957, "grad_norm": 1.7362127263667013, "learning_rate": 8.83374306674198e-06, "loss": 0.4914, "step": 15800 }, { "epoch": 0.29989756819302704, "grad_norm": 1.593659969864873, "learning_rate": 8.831616928797586e-06, "loss": 0.5124, "step": 15810 }, { "epoch": 0.30008725672445846, "grad_norm": 1.4677888583626681, "learning_rate": 8.829489110956385e-06, "loss": 0.4718, "step": 15820 }, { "epoch": 0.3002769452558898, "grad_norm": 1.4272571900917341, "learning_rate": 8.827359614151282e-06, "loss": 0.4726, "step": 15830 }, { "epoch": 0.3004666337873212, "grad_norm": 1.5760928394628628, "learning_rate": 8.82522843931591e-06, "loss": 0.4517, "step": 15840 }, { "epoch": 0.3006563223187526, "grad_norm": 1.2543882859500786, "learning_rate": 8.82309558738464e-06, "loss": 0.4633, "step": 15850 }, { "epoch": 0.300846010850184, "grad_norm": 1.2224859639511572, "learning_rate": 8.820961059292578e-06, "loss": 0.4582, "step": 15860 }, { "epoch": 0.3010356993816154, "grad_norm": 1.4724653465678423, "learning_rate": 8.818824855975572e-06, "loss": 0.482, "step": 15870 }, { "epoch": 0.30122538791304676, "grad_norm": 1.3802543763506425, "learning_rate": 8.81668697837019e-06, "loss": 0.4689, "step": 15880 }, { "epoch": 0.3014150764444782, "grad_norm": 1.434994960300729, "learning_rate": 8.814547427413747e-06, "loss": 0.4983, "step": 15890 }, { "epoch": 0.30160476497590955, "grad_norm": 1.7271174587936153, "learning_rate": 8.812406204044286e-06, "loss": 0.4789, "step": 15900 }, { "epoch": 0.30179445350734097, "grad_norm": 1.468688442238247, "learning_rate": 8.810263309200585e-06, "loss": 0.4803, "step": 15910 }, { "epoch": 0.30198414203877233, "grad_norm": 1.3958465097537325, "learning_rate": 8.80811874382215e-06, "loss": 0.4876, "step": 15920 }, { "epoch": 0.30217383057020375, "grad_norm": 1.6248740744721153, "learning_rate": 8.80597250884923e-06, "loss": 0.4972, "step": 15930 }, { "epoch": 0.3023635191016351, "grad_norm": 1.238375432103168, "learning_rate": 8.803824605222796e-06, "loss": 0.5284, "step": 15940 }, { "epoch": 0.3025532076330665, "grad_norm": 1.5172836158105043, "learning_rate": 8.80167503388455e-06, "loss": 0.4871, "step": 15950 }, { "epoch": 0.3027428961644979, "grad_norm": 1.3784012942472794, "learning_rate": 8.799523795776934e-06, "loss": 0.4667, "step": 15960 }, { "epoch": 0.30293258469592926, "grad_norm": 1.1106686221186963, "learning_rate": 8.797370891843114e-06, "loss": 0.492, "step": 15970 }, { "epoch": 0.3031222732273607, "grad_norm": 1.4437116210222987, "learning_rate": 8.795216323026989e-06, "loss": 0.478, "step": 15980 }, { "epoch": 0.30331196175879205, "grad_norm": 1.4207537995675703, "learning_rate": 8.793060090273188e-06, "loss": 0.4509, "step": 15990 }, { "epoch": 0.30350165029022347, "grad_norm": 1.5906559926504187, "learning_rate": 8.790902194527067e-06, "loss": 0.4875, "step": 16000 }, { "epoch": 0.30369133882165483, "grad_norm": 1.5921431828792265, "learning_rate": 8.788742636734713e-06, "loss": 0.5092, "step": 16010 }, { "epoch": 0.30388102735308625, "grad_norm": 1.5180292852152313, "learning_rate": 8.786581417842943e-06, "loss": 0.4635, "step": 16020 }, { "epoch": 0.3040707158845176, "grad_norm": 1.2742571044190727, "learning_rate": 8.784418538799299e-06, "loss": 0.4675, "step": 16030 }, { "epoch": 0.30426040441594904, "grad_norm": 1.4244374800521755, "learning_rate": 8.782254000552053e-06, "loss": 0.5023, "step": 16040 }, { "epoch": 0.3044500929473804, "grad_norm": 1.6116976349967562, "learning_rate": 8.780087804050203e-06, "loss": 0.4749, "step": 16050 }, { "epoch": 0.30463978147881177, "grad_norm": 1.524776972421378, "learning_rate": 8.77791995024348e-06, "loss": 0.4552, "step": 16060 }, { "epoch": 0.3048294700102432, "grad_norm": 1.2873724199552752, "learning_rate": 8.77575044008233e-06, "loss": 0.4858, "step": 16070 }, { "epoch": 0.30501915854167455, "grad_norm": 1.3950309888072725, "learning_rate": 8.773579274517936e-06, "loss": 0.5024, "step": 16080 }, { "epoch": 0.30520884707310597, "grad_norm": 1.4815891923949103, "learning_rate": 8.771406454502203e-06, "loss": 0.4737, "step": 16090 }, { "epoch": 0.30539853560453734, "grad_norm": 1.571500033277056, "learning_rate": 8.769231980987758e-06, "loss": 0.4986, "step": 16100 }, { "epoch": 0.30558822413596876, "grad_norm": 1.7079299110618118, "learning_rate": 8.76705585492796e-06, "loss": 0.4886, "step": 16110 }, { "epoch": 0.3057779126674001, "grad_norm": 1.825920965329622, "learning_rate": 8.764878077276884e-06, "loss": 0.489, "step": 16120 }, { "epoch": 0.30596760119883154, "grad_norm": 1.199767447362885, "learning_rate": 8.762698648989335e-06, "loss": 0.4579, "step": 16130 }, { "epoch": 0.3061572897302629, "grad_norm": 1.3336641068025006, "learning_rate": 8.760517571020843e-06, "loss": 0.499, "step": 16140 }, { "epoch": 0.3063469782616943, "grad_norm": 1.6181851503023381, "learning_rate": 8.758334844327658e-06, "loss": 0.4677, "step": 16150 }, { "epoch": 0.3065366667931257, "grad_norm": 1.4926646572973414, "learning_rate": 8.756150469866751e-06, "loss": 0.4606, "step": 16160 }, { "epoch": 0.30672635532455705, "grad_norm": 4.15612754007255, "learning_rate": 8.753964448595818e-06, "loss": 0.4953, "step": 16170 }, { "epoch": 0.3069160438559885, "grad_norm": 1.3804908929054283, "learning_rate": 8.751776781473279e-06, "loss": 0.4766, "step": 16180 }, { "epoch": 0.30710573238741984, "grad_norm": 1.2287705889438871, "learning_rate": 8.749587469458271e-06, "loss": 0.4592, "step": 16190 }, { "epoch": 0.30729542091885126, "grad_norm": 1.3206107007045451, "learning_rate": 8.747396513510659e-06, "loss": 0.4806, "step": 16200 }, { "epoch": 0.3074851094502826, "grad_norm": 1.8125629591254648, "learning_rate": 8.745203914591018e-06, "loss": 0.4995, "step": 16210 }, { "epoch": 0.30767479798171404, "grad_norm": 1.7276211507612071, "learning_rate": 8.743009673660657e-06, "loss": 0.481, "step": 16220 }, { "epoch": 0.3078644865131454, "grad_norm": 1.5498726101461275, "learning_rate": 8.74081379168159e-06, "loss": 0.4735, "step": 16230 }, { "epoch": 0.30805417504457683, "grad_norm": 1.498405673790738, "learning_rate": 8.738616269616564e-06, "loss": 0.4582, "step": 16240 }, { "epoch": 0.3082438635760082, "grad_norm": 1.5098585593132048, "learning_rate": 8.736417108429036e-06, "loss": 0.4957, "step": 16250 }, { "epoch": 0.30843355210743956, "grad_norm": 1.4425429397731615, "learning_rate": 8.734216309083187e-06, "loss": 0.482, "step": 16260 }, { "epoch": 0.308623240638871, "grad_norm": 1.4653021633723422, "learning_rate": 8.732013872543912e-06, "loss": 0.4662, "step": 16270 }, { "epoch": 0.30881292917030234, "grad_norm": 1.2806471423008168, "learning_rate": 8.729809799776827e-06, "loss": 0.4726, "step": 16280 }, { "epoch": 0.30900261770173376, "grad_norm": 1.1363925909828285, "learning_rate": 8.727604091748263e-06, "loss": 0.4914, "step": 16290 }, { "epoch": 0.3091923062331651, "grad_norm": 1.5438687124135977, "learning_rate": 8.725396749425272e-06, "loss": 0.4932, "step": 16300 }, { "epoch": 0.30938199476459655, "grad_norm": 1.4578298602433815, "learning_rate": 8.723187773775616e-06, "loss": 0.4649, "step": 16310 }, { "epoch": 0.3095716832960279, "grad_norm": 1.771520201177536, "learning_rate": 8.720977165767779e-06, "loss": 0.4836, "step": 16320 }, { "epoch": 0.30976137182745933, "grad_norm": 1.5804892454981807, "learning_rate": 8.71876492637096e-06, "loss": 0.4912, "step": 16330 }, { "epoch": 0.3099510603588907, "grad_norm": 1.3334811198001228, "learning_rate": 8.716551056555067e-06, "loss": 0.4867, "step": 16340 }, { "epoch": 0.3101407488903221, "grad_norm": 1.4852229822982224, "learning_rate": 8.71433555729073e-06, "loss": 0.4944, "step": 16350 }, { "epoch": 0.3103304374217535, "grad_norm": 1.900141072842528, "learning_rate": 8.712118429549291e-06, "loss": 0.4775, "step": 16360 }, { "epoch": 0.31052012595318484, "grad_norm": 1.685551737196893, "learning_rate": 8.709899674302807e-06, "loss": 0.5031, "step": 16370 }, { "epoch": 0.31070981448461626, "grad_norm": 1.3096058913458304, "learning_rate": 8.707679292524047e-06, "loss": 0.432, "step": 16380 }, { "epoch": 0.31089950301604763, "grad_norm": 1.346997719842997, "learning_rate": 8.705457285186492e-06, "loss": 0.4964, "step": 16390 }, { "epoch": 0.31108919154747905, "grad_norm": 1.576827515199479, "learning_rate": 8.703233653264337e-06, "loss": 0.4642, "step": 16400 }, { "epoch": 0.3112788800789104, "grad_norm": 2.1192260506653944, "learning_rate": 8.701008397732493e-06, "loss": 0.5189, "step": 16410 }, { "epoch": 0.31146856861034183, "grad_norm": 1.0307453826905975, "learning_rate": 8.698781519566574e-06, "loss": 0.4906, "step": 16420 }, { "epoch": 0.3116582571417732, "grad_norm": 1.6412666922388612, "learning_rate": 8.696553019742917e-06, "loss": 0.5009, "step": 16430 }, { "epoch": 0.3118479456732046, "grad_norm": 1.4860157372494165, "learning_rate": 8.694322899238558e-06, "loss": 0.4937, "step": 16440 }, { "epoch": 0.312037634204636, "grad_norm": 1.4236065791843528, "learning_rate": 8.692091159031255e-06, "loss": 0.5067, "step": 16450 }, { "epoch": 0.3122273227360674, "grad_norm": 1.2400870174897871, "learning_rate": 8.689857800099465e-06, "loss": 0.472, "step": 16460 }, { "epoch": 0.31241701126749877, "grad_norm": 1.5138427710980167, "learning_rate": 8.687622823422364e-06, "loss": 0.4772, "step": 16470 }, { "epoch": 0.31260669979893013, "grad_norm": 1.3640471660567945, "learning_rate": 8.68538622997983e-06, "loss": 0.4955, "step": 16480 }, { "epoch": 0.31279638833036155, "grad_norm": 1.2106233061229923, "learning_rate": 8.683148020752456e-06, "loss": 0.4806, "step": 16490 }, { "epoch": 0.3129860768617929, "grad_norm": 1.5480787423411664, "learning_rate": 8.68090819672154e-06, "loss": 0.4949, "step": 16500 }, { "epoch": 0.31317576539322434, "grad_norm": 1.3879020583285748, "learning_rate": 8.678666758869088e-06, "loss": 0.481, "step": 16510 }, { "epoch": 0.3133654539246557, "grad_norm": 1.502121693818913, "learning_rate": 8.676423708177815e-06, "loss": 0.4781, "step": 16520 }, { "epoch": 0.3135551424560871, "grad_norm": 1.6535642742886703, "learning_rate": 8.674179045631142e-06, "loss": 0.4653, "step": 16530 }, { "epoch": 0.3137448309875185, "grad_norm": 1.4667262055786014, "learning_rate": 8.671932772213195e-06, "loss": 0.4994, "step": 16540 }, { "epoch": 0.3139345195189499, "grad_norm": 1.5426555077483242, "learning_rate": 8.669684888908813e-06, "loss": 0.4834, "step": 16550 }, { "epoch": 0.31412420805038127, "grad_norm": 1.4933276722378654, "learning_rate": 8.667435396703532e-06, "loss": 0.4827, "step": 16560 }, { "epoch": 0.3143138965818127, "grad_norm": 1.6717729171886935, "learning_rate": 8.665184296583598e-06, "loss": 0.5026, "step": 16570 }, { "epoch": 0.31450358511324406, "grad_norm": 1.3099148414551482, "learning_rate": 8.662931589535964e-06, "loss": 0.4925, "step": 16580 }, { "epoch": 0.3146932736446754, "grad_norm": 1.370015604854444, "learning_rate": 8.66067727654828e-06, "loss": 0.4703, "step": 16590 }, { "epoch": 0.31488296217610684, "grad_norm": 1.523998395390378, "learning_rate": 8.658421358608909e-06, "loss": 0.4795, "step": 16600 }, { "epoch": 0.3150726507075382, "grad_norm": 1.2979209473078688, "learning_rate": 8.656163836706915e-06, "loss": 0.4939, "step": 16610 }, { "epoch": 0.3152623392389696, "grad_norm": 1.3821656851116453, "learning_rate": 8.653904711832058e-06, "loss": 0.476, "step": 16620 }, { "epoch": 0.315452027770401, "grad_norm": 1.566933497066839, "learning_rate": 8.651643984974812e-06, "loss": 0.4952, "step": 16630 }, { "epoch": 0.3156417163018324, "grad_norm": 1.4002571284574334, "learning_rate": 8.649381657126346e-06, "loss": 0.5229, "step": 16640 }, { "epoch": 0.3158314048332638, "grad_norm": 1.4931895974277618, "learning_rate": 8.647117729278534e-06, "loss": 0.4953, "step": 16650 }, { "epoch": 0.3160210933646952, "grad_norm": 1.7483100904864106, "learning_rate": 8.64485220242395e-06, "loss": 0.4793, "step": 16660 }, { "epoch": 0.31621078189612656, "grad_norm": 1.5601761613505072, "learning_rate": 8.64258507755587e-06, "loss": 0.4879, "step": 16670 }, { "epoch": 0.3164004704275579, "grad_norm": 1.4057408119026515, "learning_rate": 8.640316355668269e-06, "loss": 0.4911, "step": 16680 }, { "epoch": 0.31659015895898934, "grad_norm": 1.201763405555499, "learning_rate": 8.638046037755821e-06, "loss": 0.456, "step": 16690 }, { "epoch": 0.3167798474904207, "grad_norm": 1.4785239688996403, "learning_rate": 8.635774124813908e-06, "loss": 0.4914, "step": 16700 }, { "epoch": 0.3169695360218521, "grad_norm": 1.35222230711774, "learning_rate": 8.633500617838601e-06, "loss": 0.4755, "step": 16710 }, { "epoch": 0.3171592245532835, "grad_norm": 1.7443779910533546, "learning_rate": 8.631225517826675e-06, "loss": 0.4855, "step": 16720 }, { "epoch": 0.3173489130847149, "grad_norm": 1.6190009843704305, "learning_rate": 8.628948825775604e-06, "loss": 0.4907, "step": 16730 }, { "epoch": 0.3175386016161463, "grad_norm": 1.4197936137726956, "learning_rate": 8.62667054268356e-06, "loss": 0.4506, "step": 16740 }, { "epoch": 0.3177282901475777, "grad_norm": 1.6177195803724063, "learning_rate": 8.624390669549407e-06, "loss": 0.4878, "step": 16750 }, { "epoch": 0.31791797867900906, "grad_norm": 1.3754096257875195, "learning_rate": 8.622109207372711e-06, "loss": 0.4513, "step": 16760 }, { "epoch": 0.3181076672104405, "grad_norm": 1.463717850528942, "learning_rate": 8.619826157153737e-06, "loss": 0.456, "step": 16770 }, { "epoch": 0.31829735574187185, "grad_norm": 1.3913428779276678, "learning_rate": 8.617541519893441e-06, "loss": 0.4865, "step": 16780 }, { "epoch": 0.3184870442733032, "grad_norm": 2.051173018076694, "learning_rate": 8.615255296593481e-06, "loss": 0.491, "step": 16790 }, { "epoch": 0.31867673280473463, "grad_norm": 1.4716564627052493, "learning_rate": 8.612967488256201e-06, "loss": 0.4914, "step": 16800 }, { "epoch": 0.318866421336166, "grad_norm": 1.536518551281412, "learning_rate": 8.610678095884648e-06, "loss": 0.5062, "step": 16810 }, { "epoch": 0.3190561098675974, "grad_norm": 1.431822503442325, "learning_rate": 8.608387120482563e-06, "loss": 0.472, "step": 16820 }, { "epoch": 0.3192457983990288, "grad_norm": 1.3482483311950397, "learning_rate": 8.606094563054373e-06, "loss": 0.4859, "step": 16830 }, { "epoch": 0.3194354869304602, "grad_norm": 1.4914077812473687, "learning_rate": 8.60380042460521e-06, "loss": 0.4935, "step": 16840 }, { "epoch": 0.31962517546189156, "grad_norm": 1.4881551974603884, "learning_rate": 8.601504706140892e-06, "loss": 0.4831, "step": 16850 }, { "epoch": 0.319814863993323, "grad_norm": 1.4560795058948, "learning_rate": 8.599207408667932e-06, "loss": 0.4953, "step": 16860 }, { "epoch": 0.32000455252475435, "grad_norm": 1.4262812069277107, "learning_rate": 8.596908533193533e-06, "loss": 0.4993, "step": 16870 }, { "epoch": 0.32019424105618577, "grad_norm": 1.4052662916154595, "learning_rate": 8.594608080725593e-06, "loss": 0.4803, "step": 16880 }, { "epoch": 0.32038392958761713, "grad_norm": 1.4408062768181384, "learning_rate": 8.592306052272699e-06, "loss": 0.5152, "step": 16890 }, { "epoch": 0.3205736181190485, "grad_norm": 1.4940657874687968, "learning_rate": 8.59000244884413e-06, "loss": 0.486, "step": 16900 }, { "epoch": 0.3207633066504799, "grad_norm": 1.1994017481852244, "learning_rate": 8.587697271449857e-06, "loss": 0.4837, "step": 16910 }, { "epoch": 0.3209529951819113, "grad_norm": 1.4221768940122947, "learning_rate": 8.585390521100541e-06, "loss": 0.4957, "step": 16920 }, { "epoch": 0.3211426837133427, "grad_norm": 1.5341676680199534, "learning_rate": 8.583082198807525e-06, "loss": 0.4883, "step": 16930 }, { "epoch": 0.32133237224477407, "grad_norm": 1.3942703960150824, "learning_rate": 8.580772305582854e-06, "loss": 0.5027, "step": 16940 }, { "epoch": 0.3215220607762055, "grad_norm": 1.551905973905717, "learning_rate": 8.578460842439249e-06, "loss": 0.4529, "step": 16950 }, { "epoch": 0.32171174930763685, "grad_norm": 1.6309067121932486, "learning_rate": 8.576147810390132e-06, "loss": 0.468, "step": 16960 }, { "epoch": 0.32190143783906827, "grad_norm": 1.4508776271101231, "learning_rate": 8.5738332104496e-06, "loss": 0.4749, "step": 16970 }, { "epoch": 0.32209112637049964, "grad_norm": 1.2428446495055707, "learning_rate": 8.571517043632448e-06, "loss": 0.4624, "step": 16980 }, { "epoch": 0.32228081490193106, "grad_norm": 1.2391350752936503, "learning_rate": 8.56919931095415e-06, "loss": 0.4928, "step": 16990 }, { "epoch": 0.3224705034333624, "grad_norm": 1.4287555851652223, "learning_rate": 8.566880013430874e-06, "loss": 0.4712, "step": 17000 }, { "epoch": 0.3226601919647938, "grad_norm": 1.379853177554836, "learning_rate": 8.564559152079469e-06, "loss": 0.455, "step": 17010 }, { "epoch": 0.3228498804962252, "grad_norm": 1.489402279752962, "learning_rate": 8.56223672791747e-06, "loss": 0.4999, "step": 17020 }, { "epoch": 0.32303956902765657, "grad_norm": 1.4497555912307514, "learning_rate": 8.559912741963096e-06, "loss": 0.4687, "step": 17030 }, { "epoch": 0.323229257559088, "grad_norm": 1.3497571218697475, "learning_rate": 8.557587195235257e-06, "loss": 0.4935, "step": 17040 }, { "epoch": 0.32341894609051935, "grad_norm": 1.5802932512333443, "learning_rate": 8.55526008875354e-06, "loss": 0.5043, "step": 17050 }, { "epoch": 0.3236086346219508, "grad_norm": 1.315158284387478, "learning_rate": 8.552931423538221e-06, "loss": 0.507, "step": 17060 }, { "epoch": 0.32379832315338214, "grad_norm": 1.6128518273499863, "learning_rate": 8.550601200610255e-06, "loss": 0.4954, "step": 17070 }, { "epoch": 0.32398801168481356, "grad_norm": 1.834330587171514, "learning_rate": 8.548269420991286e-06, "loss": 0.482, "step": 17080 }, { "epoch": 0.3241777002162449, "grad_norm": 1.553469570948052, "learning_rate": 8.545936085703633e-06, "loss": 0.4852, "step": 17090 }, { "epoch": 0.3243673887476763, "grad_norm": 1.4764886083664108, "learning_rate": 8.543601195770302e-06, "loss": 0.4844, "step": 17100 }, { "epoch": 0.3245570772791077, "grad_norm": 1.2389580575148582, "learning_rate": 8.541264752214979e-06, "loss": 0.4857, "step": 17110 }, { "epoch": 0.3247467658105391, "grad_norm": 1.3477150596779912, "learning_rate": 8.538926756062032e-06, "loss": 0.4967, "step": 17120 }, { "epoch": 0.3249364543419705, "grad_norm": 1.4402539131622876, "learning_rate": 8.536587208336508e-06, "loss": 0.4621, "step": 17130 }, { "epoch": 0.32512614287340186, "grad_norm": 1.4118553302357737, "learning_rate": 8.53424611006414e-06, "loss": 0.4701, "step": 17140 }, { "epoch": 0.3253158314048333, "grad_norm": 1.4730953817851902, "learning_rate": 8.53190346227133e-06, "loss": 0.4934, "step": 17150 }, { "epoch": 0.32550551993626464, "grad_norm": 1.4599490866223348, "learning_rate": 8.529559265985171e-06, "loss": 0.4505, "step": 17160 }, { "epoch": 0.32569520846769606, "grad_norm": 1.3323398659594872, "learning_rate": 8.527213522233426e-06, "loss": 0.4942, "step": 17170 }, { "epoch": 0.3258848969991274, "grad_norm": 1.460687430259785, "learning_rate": 8.524866232044543e-06, "loss": 0.4929, "step": 17180 }, { "epoch": 0.32607458553055885, "grad_norm": 1.4170063554903587, "learning_rate": 8.522517396447642e-06, "loss": 0.5077, "step": 17190 }, { "epoch": 0.3262642740619902, "grad_norm": 1.5712744932975016, "learning_rate": 8.520167016472524e-06, "loss": 0.4952, "step": 17200 }, { "epoch": 0.3264539625934216, "grad_norm": 1.4199748838612023, "learning_rate": 8.517815093149671e-06, "loss": 0.4743, "step": 17210 }, { "epoch": 0.326643651124853, "grad_norm": 1.5586667787670487, "learning_rate": 8.51546162751023e-06, "loss": 0.4664, "step": 17220 }, { "epoch": 0.32683333965628436, "grad_norm": 1.4141612968439479, "learning_rate": 8.513106620586039e-06, "loss": 0.4564, "step": 17230 }, { "epoch": 0.3270230281877158, "grad_norm": 1.442160692997381, "learning_rate": 8.510750073409601e-06, "loss": 0.4594, "step": 17240 }, { "epoch": 0.32721271671914715, "grad_norm": 1.5625909281439245, "learning_rate": 8.508391987014095e-06, "loss": 0.4973, "step": 17250 }, { "epoch": 0.32740240525057857, "grad_norm": 1.4380874733575224, "learning_rate": 8.506032362433383e-06, "loss": 0.4651, "step": 17260 }, { "epoch": 0.32759209378200993, "grad_norm": 1.513523035760784, "learning_rate": 8.50367120070199e-06, "loss": 0.4629, "step": 17270 }, { "epoch": 0.32778178231344135, "grad_norm": 1.4250409205005856, "learning_rate": 8.501308502855125e-06, "loss": 0.5013, "step": 17280 }, { "epoch": 0.3279714708448727, "grad_norm": 1.2426659917240896, "learning_rate": 8.498944269928665e-06, "loss": 0.4545, "step": 17290 }, { "epoch": 0.32816115937630413, "grad_norm": 1.6512419860442746, "learning_rate": 8.496578502959161e-06, "loss": 0.4844, "step": 17300 }, { "epoch": 0.3283508479077355, "grad_norm": 1.3902109108102627, "learning_rate": 8.494211202983837e-06, "loss": 0.4946, "step": 17310 }, { "epoch": 0.32854053643916686, "grad_norm": 1.5103966336081427, "learning_rate": 8.49184237104059e-06, "loss": 0.4671, "step": 17320 }, { "epoch": 0.3287302249705983, "grad_norm": 1.4750144794514148, "learning_rate": 8.489472008167983e-06, "loss": 0.4634, "step": 17330 }, { "epoch": 0.32891991350202965, "grad_norm": 1.5360582810030454, "learning_rate": 8.48710011540526e-06, "loss": 0.4531, "step": 17340 }, { "epoch": 0.32910960203346107, "grad_norm": 1.3445877447120382, "learning_rate": 8.484726693792329e-06, "loss": 0.4584, "step": 17350 }, { "epoch": 0.32929929056489243, "grad_norm": 1.337449284109544, "learning_rate": 8.482351744369769e-06, "loss": 0.4685, "step": 17360 }, { "epoch": 0.32948897909632385, "grad_norm": 1.3643266559678677, "learning_rate": 8.47997526817883e-06, "loss": 0.4793, "step": 17370 }, { "epoch": 0.3296786676277552, "grad_norm": 1.4973980047741544, "learning_rate": 8.477597266261432e-06, "loss": 0.4701, "step": 17380 }, { "epoch": 0.32986835615918664, "grad_norm": 1.4397586435345422, "learning_rate": 8.475217739660162e-06, "loss": 0.4837, "step": 17390 }, { "epoch": 0.330058044690618, "grad_norm": 1.293513073864488, "learning_rate": 8.472836689418278e-06, "loss": 0.4784, "step": 17400 }, { "epoch": 0.3302477332220494, "grad_norm": 1.3084357016629609, "learning_rate": 8.470454116579703e-06, "loss": 0.4946, "step": 17410 }, { "epoch": 0.3304374217534808, "grad_norm": 0.9277533525255505, "learning_rate": 8.468070022189028e-06, "loss": 0.4747, "step": 17420 }, { "epoch": 0.33062711028491215, "grad_norm": 1.3964592567666045, "learning_rate": 8.465684407291514e-06, "loss": 0.4742, "step": 17430 }, { "epoch": 0.33081679881634357, "grad_norm": 1.6675097852301473, "learning_rate": 8.463297272933088e-06, "loss": 0.5036, "step": 17440 }, { "epoch": 0.33100648734777494, "grad_norm": 1.4953011061883426, "learning_rate": 8.46090862016034e-06, "loss": 0.4607, "step": 17450 }, { "epoch": 0.33119617587920636, "grad_norm": 1.4798685220957422, "learning_rate": 8.458518450020526e-06, "loss": 0.4649, "step": 17460 }, { "epoch": 0.3313858644106377, "grad_norm": 1.2719613140300896, "learning_rate": 8.456126763561572e-06, "loss": 0.4825, "step": 17470 }, { "epoch": 0.33157555294206914, "grad_norm": 1.4198242299455497, "learning_rate": 8.453733561832063e-06, "loss": 0.4902, "step": 17480 }, { "epoch": 0.3317652414735005, "grad_norm": 1.3329239024093298, "learning_rate": 8.451338845881254e-06, "loss": 0.4603, "step": 17490 }, { "epoch": 0.3319549300049319, "grad_norm": 1.3526495175390931, "learning_rate": 8.44894261675906e-06, "loss": 0.4836, "step": 17500 }, { "epoch": 0.3321446185363633, "grad_norm": 1.6989083337489168, "learning_rate": 8.446544875516058e-06, "loss": 0.4842, "step": 17510 }, { "epoch": 0.33233430706779465, "grad_norm": 1.7985636609631344, "learning_rate": 8.444145623203492e-06, "loss": 0.4792, "step": 17520 }, { "epoch": 0.3325239955992261, "grad_norm": 1.4496527510393125, "learning_rate": 8.441744860873269e-06, "loss": 0.4768, "step": 17530 }, { "epoch": 0.33271368413065744, "grad_norm": 1.521046225404569, "learning_rate": 8.439342589577953e-06, "loss": 0.4801, "step": 17540 }, { "epoch": 0.33290337266208886, "grad_norm": 1.5932011263541186, "learning_rate": 8.436938810370772e-06, "loss": 0.5118, "step": 17550 }, { "epoch": 0.3330930611935202, "grad_norm": 1.4348060405567984, "learning_rate": 8.434533524305617e-06, "loss": 0.5094, "step": 17560 }, { "epoch": 0.33328274972495164, "grad_norm": 1.3993442368636022, "learning_rate": 8.43212673243704e-06, "loss": 0.4754, "step": 17570 }, { "epoch": 0.333472438256383, "grad_norm": 1.540118237126327, "learning_rate": 8.429718435820246e-06, "loss": 0.4808, "step": 17580 }, { "epoch": 0.33366212678781443, "grad_norm": 1.3813259761219177, "learning_rate": 8.42730863551111e-06, "loss": 0.4812, "step": 17590 }, { "epoch": 0.3338518153192458, "grad_norm": 1.2748431957929995, "learning_rate": 8.424897332566162e-06, "loss": 0.4764, "step": 17600 }, { "epoch": 0.3340415038506772, "grad_norm": 1.4161113410844892, "learning_rate": 8.422484528042585e-06, "loss": 0.4685, "step": 17610 }, { "epoch": 0.3342311923821086, "grad_norm": 1.5737242902326263, "learning_rate": 8.420070222998229e-06, "loss": 0.4697, "step": 17620 }, { "epoch": 0.33442088091353994, "grad_norm": 1.3806793365348924, "learning_rate": 8.417654418491598e-06, "loss": 0.4913, "step": 17630 }, { "epoch": 0.33461056944497136, "grad_norm": 1.3971054747470835, "learning_rate": 8.415237115581855e-06, "loss": 0.487, "step": 17640 }, { "epoch": 0.3348002579764027, "grad_norm": 1.3205794711456291, "learning_rate": 8.412818315328817e-06, "loss": 0.4783, "step": 17650 }, { "epoch": 0.33498994650783415, "grad_norm": 1.0911858513898343, "learning_rate": 8.41039801879296e-06, "loss": 0.4614, "step": 17660 }, { "epoch": 0.3351796350392655, "grad_norm": 1.378662502213396, "learning_rate": 8.407976227035414e-06, "loss": 0.4819, "step": 17670 }, { "epoch": 0.33536932357069693, "grad_norm": 1.2056423048748195, "learning_rate": 8.405552941117967e-06, "loss": 0.46, "step": 17680 }, { "epoch": 0.3355590121021283, "grad_norm": 1.657797577269143, "learning_rate": 8.40312816210306e-06, "loss": 0.4983, "step": 17690 }, { "epoch": 0.3357487006335597, "grad_norm": 1.7570356421474889, "learning_rate": 8.40070189105379e-06, "loss": 0.4888, "step": 17700 }, { "epoch": 0.3359383891649911, "grad_norm": 1.40227070143786, "learning_rate": 8.398274129033907e-06, "loss": 0.4803, "step": 17710 }, { "epoch": 0.3361280776964225, "grad_norm": 1.330839265871238, "learning_rate": 8.39584487710782e-06, "loss": 0.4484, "step": 17720 }, { "epoch": 0.33631776622785386, "grad_norm": 1.5974671376044043, "learning_rate": 8.393414136340578e-06, "loss": 0.4939, "step": 17730 }, { "epoch": 0.33650745475928523, "grad_norm": 2.330274679375041, "learning_rate": 8.390981907797898e-06, "loss": 0.4925, "step": 17740 }, { "epoch": 0.33669714329071665, "grad_norm": 1.3495856422407717, "learning_rate": 8.38854819254614e-06, "loss": 0.4673, "step": 17750 }, { "epoch": 0.336886831822148, "grad_norm": 1.4447905036720479, "learning_rate": 8.38611299165232e-06, "loss": 0.491, "step": 17760 }, { "epoch": 0.33707652035357943, "grad_norm": 1.2007225981581302, "learning_rate": 8.383676306184101e-06, "loss": 0.4908, "step": 17770 }, { "epoch": 0.3372662088850108, "grad_norm": 1.2712370349354734, "learning_rate": 8.381238137209803e-06, "loss": 0.4836, "step": 17780 }, { "epoch": 0.3374558974164422, "grad_norm": 1.4187893825366398, "learning_rate": 8.37879848579839e-06, "loss": 0.4708, "step": 17790 }, { "epoch": 0.3376455859478736, "grad_norm": 1.3693555563556217, "learning_rate": 8.376357353019479e-06, "loss": 0.4536, "step": 17800 }, { "epoch": 0.337835274479305, "grad_norm": 1.3447047834529422, "learning_rate": 8.37391473994334e-06, "loss": 0.4698, "step": 17810 }, { "epoch": 0.33802496301073637, "grad_norm": 1.4676372319537476, "learning_rate": 8.371470647640886e-06, "loss": 0.5062, "step": 17820 }, { "epoch": 0.3382146515421678, "grad_norm": 1.4929403856742276, "learning_rate": 8.369025077183681e-06, "loss": 0.4722, "step": 17830 }, { "epoch": 0.33840434007359915, "grad_norm": 1.3920927333034283, "learning_rate": 8.366578029643938e-06, "loss": 0.5011, "step": 17840 }, { "epoch": 0.3385940286050305, "grad_norm": 1.587448777527757, "learning_rate": 8.364129506094516e-06, "loss": 0.4753, "step": 17850 }, { "epoch": 0.33878371713646194, "grad_norm": 1.3464414680045087, "learning_rate": 8.361679507608921e-06, "loss": 0.4602, "step": 17860 }, { "epoch": 0.3389734056678933, "grad_norm": 1.4129961296999214, "learning_rate": 8.359228035261307e-06, "loss": 0.5028, "step": 17870 }, { "epoch": 0.3391630941993247, "grad_norm": 1.2234443564642867, "learning_rate": 8.356775090126475e-06, "loss": 0.4824, "step": 17880 }, { "epoch": 0.3393527827307561, "grad_norm": 1.3920895939651141, "learning_rate": 8.35432067327987e-06, "loss": 0.5055, "step": 17890 }, { "epoch": 0.3395424712621875, "grad_norm": 1.4870954902708207, "learning_rate": 8.351864785797582e-06, "loss": 0.505, "step": 17900 }, { "epoch": 0.33973215979361887, "grad_norm": 1.217454886681824, "learning_rate": 8.349407428756344e-06, "loss": 0.4542, "step": 17910 }, { "epoch": 0.3399218483250503, "grad_norm": 1.4007820546732506, "learning_rate": 8.34694860323354e-06, "loss": 0.47, "step": 17920 }, { "epoch": 0.34011153685648166, "grad_norm": 1.5110535010156465, "learning_rate": 8.344488310307193e-06, "loss": 0.4884, "step": 17930 }, { "epoch": 0.340301225387913, "grad_norm": 1.3857894653097207, "learning_rate": 8.342026551055966e-06, "loss": 0.5134, "step": 17940 }, { "epoch": 0.34049091391934444, "grad_norm": 1.5530961159560297, "learning_rate": 8.339563326559173e-06, "loss": 0.4762, "step": 17950 }, { "epoch": 0.3406806024507758, "grad_norm": 1.3761144683370676, "learning_rate": 8.337098637896763e-06, "loss": 0.489, "step": 17960 }, { "epoch": 0.3408702909822072, "grad_norm": 1.3941692978856783, "learning_rate": 8.334632486149333e-06, "loss": 0.4984, "step": 17970 }, { "epoch": 0.3410599795136386, "grad_norm": 1.3753498100500743, "learning_rate": 8.332164872398116e-06, "loss": 0.4625, "step": 17980 }, { "epoch": 0.34124966804507, "grad_norm": 1.4415958632982089, "learning_rate": 8.32969579772499e-06, "loss": 0.4428, "step": 17990 }, { "epoch": 0.3414393565765014, "grad_norm": 1.2772331896968547, "learning_rate": 8.32722526321247e-06, "loss": 0.4877, "step": 18000 }, { "epoch": 0.3416290451079328, "grad_norm": 1.3660434420562182, "learning_rate": 8.324753269943717e-06, "loss": 0.4698, "step": 18010 }, { "epoch": 0.34181873363936416, "grad_norm": 1.3788140077889106, "learning_rate": 8.322279819002524e-06, "loss": 0.4831, "step": 18020 }, { "epoch": 0.3420084221707956, "grad_norm": 1.22066976204121, "learning_rate": 8.319804911473329e-06, "loss": 0.4953, "step": 18030 }, { "epoch": 0.34219811070222694, "grad_norm": 1.403913082433395, "learning_rate": 8.317328548441203e-06, "loss": 0.4808, "step": 18040 }, { "epoch": 0.3423877992336583, "grad_norm": 1.4276101131118761, "learning_rate": 8.314850730991863e-06, "loss": 0.5123, "step": 18050 }, { "epoch": 0.3425774877650897, "grad_norm": 1.2142105517163617, "learning_rate": 8.312371460211658e-06, "loss": 0.4652, "step": 18060 }, { "epoch": 0.3427671762965211, "grad_norm": 1.4473569065799807, "learning_rate": 8.309890737187574e-06, "loss": 0.4969, "step": 18070 }, { "epoch": 0.3429568648279525, "grad_norm": 1.5232873490572671, "learning_rate": 8.307408563007233e-06, "loss": 0.4674, "step": 18080 }, { "epoch": 0.3431465533593839, "grad_norm": 1.3091593924750584, "learning_rate": 8.3049249387589e-06, "loss": 0.4671, "step": 18090 }, { "epoch": 0.3433362418908153, "grad_norm": 1.6268406450866468, "learning_rate": 8.302439865531468e-06, "loss": 0.4834, "step": 18100 }, { "epoch": 0.34352593042224666, "grad_norm": 1.187530535509114, "learning_rate": 8.29995334441447e-06, "loss": 0.4402, "step": 18110 }, { "epoch": 0.3437156189536781, "grad_norm": 1.6897414651900746, "learning_rate": 8.29746537649807e-06, "loss": 0.4868, "step": 18120 }, { "epoch": 0.34390530748510945, "grad_norm": 1.6401721752537055, "learning_rate": 8.294975962873069e-06, "loss": 0.4798, "step": 18130 }, { "epoch": 0.34409499601654087, "grad_norm": 1.2763056764586886, "learning_rate": 8.292485104630903e-06, "loss": 0.4808, "step": 18140 }, { "epoch": 0.34428468454797223, "grad_norm": 1.5440426783332635, "learning_rate": 8.289992802863638e-06, "loss": 0.4738, "step": 18150 }, { "epoch": 0.3444743730794036, "grad_norm": 1.350878611378826, "learning_rate": 8.287499058663974e-06, "loss": 0.4448, "step": 18160 }, { "epoch": 0.344664061610835, "grad_norm": 1.2117722810969416, "learning_rate": 8.285003873125248e-06, "loss": 0.4735, "step": 18170 }, { "epoch": 0.3448537501422664, "grad_norm": 1.5482367083726103, "learning_rate": 8.28250724734142e-06, "loss": 0.4923, "step": 18180 }, { "epoch": 0.3450434386736978, "grad_norm": 1.1142924843384783, "learning_rate": 8.280009182407087e-06, "loss": 0.4702, "step": 18190 }, { "epoch": 0.34523312720512916, "grad_norm": 1.4236221133473859, "learning_rate": 8.277509679417476e-06, "loss": 0.4688, "step": 18200 }, { "epoch": 0.3454228157365606, "grad_norm": 1.325258962593217, "learning_rate": 8.27500873946845e-06, "loss": 0.4996, "step": 18210 }, { "epoch": 0.34561250426799195, "grad_norm": 1.5013567455161414, "learning_rate": 8.27250636365649e-06, "loss": 0.4626, "step": 18220 }, { "epoch": 0.34580219279942337, "grad_norm": 1.2288356237871811, "learning_rate": 8.270002553078717e-06, "loss": 0.4753, "step": 18230 }, { "epoch": 0.34599188133085473, "grad_norm": 1.6120038945915491, "learning_rate": 8.267497308832877e-06, "loss": 0.4846, "step": 18240 }, { "epoch": 0.34618156986228615, "grad_norm": 1.4274503389975925, "learning_rate": 8.264990632017343e-06, "loss": 0.4814, "step": 18250 }, { "epoch": 0.3463712583937175, "grad_norm": 1.5782241378725017, "learning_rate": 8.262482523731119e-06, "loss": 0.4682, "step": 18260 }, { "epoch": 0.3465609469251489, "grad_norm": 1.291002483459425, "learning_rate": 8.259972985073836e-06, "loss": 0.4523, "step": 18270 }, { "epoch": 0.3467506354565803, "grad_norm": 1.5056782672745788, "learning_rate": 8.257462017145752e-06, "loss": 0.4841, "step": 18280 }, { "epoch": 0.34694032398801167, "grad_norm": 1.4568466599253211, "learning_rate": 8.25494962104775e-06, "loss": 0.4972, "step": 18290 }, { "epoch": 0.3471300125194431, "grad_norm": 1.4377078431935948, "learning_rate": 8.252435797881341e-06, "loss": 0.4701, "step": 18300 }, { "epoch": 0.34731970105087445, "grad_norm": 1.6381170998756047, "learning_rate": 8.249920548748664e-06, "loss": 0.4928, "step": 18310 }, { "epoch": 0.34750938958230587, "grad_norm": 1.153711946664799, "learning_rate": 8.247403874752475e-06, "loss": 0.4864, "step": 18320 }, { "epoch": 0.34769907811373724, "grad_norm": 1.6787473459252107, "learning_rate": 8.244885776996164e-06, "loss": 0.4601, "step": 18330 }, { "epoch": 0.34788876664516866, "grad_norm": 1.4617477668085699, "learning_rate": 8.24236625658374e-06, "loss": 0.4792, "step": 18340 }, { "epoch": 0.3480784551766, "grad_norm": 1.5671385880402375, "learning_rate": 8.239845314619836e-06, "loss": 0.4595, "step": 18350 }, { "epoch": 0.3482681437080314, "grad_norm": 1.647634971133222, "learning_rate": 8.23732295220971e-06, "loss": 0.4699, "step": 18360 }, { "epoch": 0.3484578322394628, "grad_norm": 1.4856271910084673, "learning_rate": 8.234799170459241e-06, "loss": 0.5088, "step": 18370 }, { "epoch": 0.34864752077089417, "grad_norm": 4.548956230895349, "learning_rate": 8.232273970474937e-06, "loss": 0.5011, "step": 18380 }, { "epoch": 0.3488372093023256, "grad_norm": 1.4971383711789672, "learning_rate": 8.229747353363914e-06, "loss": 0.4808, "step": 18390 }, { "epoch": 0.34902689783375695, "grad_norm": 1.272691357260161, "learning_rate": 8.227219320233923e-06, "loss": 0.4716, "step": 18400 }, { "epoch": 0.3492165863651884, "grad_norm": 1.0935872857384565, "learning_rate": 8.224689872193328e-06, "loss": 0.4424, "step": 18410 }, { "epoch": 0.34940627489661974, "grad_norm": 1.536766290072999, "learning_rate": 8.222159010351114e-06, "loss": 0.4914, "step": 18420 }, { "epoch": 0.34959596342805116, "grad_norm": 1.4720267665636781, "learning_rate": 8.21962673581689e-06, "loss": 0.479, "step": 18430 }, { "epoch": 0.3497856519594825, "grad_norm": 1.478996498060629, "learning_rate": 8.217093049700883e-06, "loss": 0.4755, "step": 18440 }, { "epoch": 0.34997534049091394, "grad_norm": 1.4552006423990829, "learning_rate": 8.214557953113932e-06, "loss": 0.4782, "step": 18450 }, { "epoch": 0.3501650290223453, "grad_norm": 1.604036128554052, "learning_rate": 8.212021447167507e-06, "loss": 0.4353, "step": 18460 }, { "epoch": 0.3503547175537767, "grad_norm": 1.3783475367950297, "learning_rate": 8.209483532973683e-06, "loss": 0.4714, "step": 18470 }, { "epoch": 0.3505444060852081, "grad_norm": 1.2079222525977622, "learning_rate": 8.206944211645164e-06, "loss": 0.4599, "step": 18480 }, { "epoch": 0.35073409461663946, "grad_norm": 1.3433717750432561, "learning_rate": 8.20440348429526e-06, "loss": 0.4628, "step": 18490 }, { "epoch": 0.3509237831480709, "grad_norm": 1.5237315349850133, "learning_rate": 8.201861352037906e-06, "loss": 0.5055, "step": 18500 }, { "epoch": 0.35111347167950224, "grad_norm": 1.60275556776077, "learning_rate": 8.19931781598765e-06, "loss": 0.4786, "step": 18510 }, { "epoch": 0.35130316021093366, "grad_norm": 1.409256658450161, "learning_rate": 8.196772877259651e-06, "loss": 0.4722, "step": 18520 }, { "epoch": 0.351492848742365, "grad_norm": 1.5601325846183156, "learning_rate": 8.194226536969691e-06, "loss": 0.4857, "step": 18530 }, { "epoch": 0.35168253727379645, "grad_norm": 1.6087849594220365, "learning_rate": 8.191678796234164e-06, "loss": 0.5169, "step": 18540 }, { "epoch": 0.3518722258052278, "grad_norm": 1.394462465259266, "learning_rate": 8.189129656170072e-06, "loss": 0.4877, "step": 18550 }, { "epoch": 0.35206191433665923, "grad_norm": 1.4292199064772138, "learning_rate": 8.186579117895038e-06, "loss": 0.4723, "step": 18560 }, { "epoch": 0.3522516028680906, "grad_norm": 1.4725033254909203, "learning_rate": 8.184027182527295e-06, "loss": 0.4487, "step": 18570 }, { "epoch": 0.35244129139952196, "grad_norm": 1.2289340115411922, "learning_rate": 8.181473851185688e-06, "loss": 0.488, "step": 18580 }, { "epoch": 0.3526309799309534, "grad_norm": 1.6222153638331405, "learning_rate": 8.178919124989677e-06, "loss": 0.4892, "step": 18590 }, { "epoch": 0.35282066846238475, "grad_norm": 1.4763462491017547, "learning_rate": 8.176363005059327e-06, "loss": 0.489, "step": 18600 }, { "epoch": 0.35301035699381617, "grad_norm": 1.6031040619713866, "learning_rate": 8.173805492515322e-06, "loss": 0.5078, "step": 18610 }, { "epoch": 0.35320004552524753, "grad_norm": 1.4453689412357411, "learning_rate": 8.171246588478949e-06, "loss": 0.4981, "step": 18620 }, { "epoch": 0.35338973405667895, "grad_norm": 1.8164074959815115, "learning_rate": 8.168686294072115e-06, "loss": 0.4777, "step": 18630 }, { "epoch": 0.3535794225881103, "grad_norm": 1.352003118689137, "learning_rate": 8.166124610417326e-06, "loss": 0.4935, "step": 18640 }, { "epoch": 0.35376911111954173, "grad_norm": 1.4116176238949578, "learning_rate": 8.163561538637702e-06, "loss": 0.4682, "step": 18650 }, { "epoch": 0.3539587996509731, "grad_norm": 1.4014408118461361, "learning_rate": 8.16099707985697e-06, "loss": 0.482, "step": 18660 }, { "epoch": 0.3541484881824045, "grad_norm": 1.4498811509429725, "learning_rate": 8.158431235199472e-06, "loss": 0.473, "step": 18670 }, { "epoch": 0.3543381767138359, "grad_norm": 1.3806889223020173, "learning_rate": 8.155864005790147e-06, "loss": 0.5007, "step": 18680 }, { "epoch": 0.35452786524526725, "grad_norm": 1.5167030184281596, "learning_rate": 8.153295392754546e-06, "loss": 0.4955, "step": 18690 }, { "epoch": 0.35471755377669867, "grad_norm": 1.5675196027287306, "learning_rate": 8.15072539721883e-06, "loss": 0.5112, "step": 18700 }, { "epoch": 0.35490724230813003, "grad_norm": 1.330197644662648, "learning_rate": 8.14815402030976e-06, "loss": 0.4768, "step": 18710 }, { "epoch": 0.35509693083956145, "grad_norm": 1.3901442118638603, "learning_rate": 8.145581263154705e-06, "loss": 0.5151, "step": 18720 }, { "epoch": 0.3552866193709928, "grad_norm": 1.4924089614141423, "learning_rate": 8.143007126881641e-06, "loss": 0.4851, "step": 18730 }, { "epoch": 0.35547630790242424, "grad_norm": 1.2936711591679995, "learning_rate": 8.140431612619148e-06, "loss": 0.4537, "step": 18740 }, { "epoch": 0.3556659964338556, "grad_norm": 1.3507042182512101, "learning_rate": 8.137854721496408e-06, "loss": 0.4511, "step": 18750 }, { "epoch": 0.355855684965287, "grad_norm": 1.8763363493081535, "learning_rate": 8.135276454643209e-06, "loss": 0.4667, "step": 18760 }, { "epoch": 0.3560453734967184, "grad_norm": 1.300972251155895, "learning_rate": 8.132696813189939e-06, "loss": 0.4872, "step": 18770 }, { "epoch": 0.35623506202814975, "grad_norm": 1.6705375090261527, "learning_rate": 8.130115798267593e-06, "loss": 0.4467, "step": 18780 }, { "epoch": 0.35642475055958117, "grad_norm": 1.3141276592985347, "learning_rate": 8.127533411007767e-06, "loss": 0.4519, "step": 18790 }, { "epoch": 0.35661443909101254, "grad_norm": 1.6238582499835073, "learning_rate": 8.124949652542656e-06, "loss": 0.4779, "step": 18800 }, { "epoch": 0.35680412762244396, "grad_norm": 1.3967146485233979, "learning_rate": 8.122364524005058e-06, "loss": 0.4663, "step": 18810 }, { "epoch": 0.3569938161538753, "grad_norm": 1.4638026106472495, "learning_rate": 8.119778026528368e-06, "loss": 0.448, "step": 18820 }, { "epoch": 0.35718350468530674, "grad_norm": 1.4762359442603774, "learning_rate": 8.117190161246593e-06, "loss": 0.4684, "step": 18830 }, { "epoch": 0.3573731932167381, "grad_norm": 1.3253315879387684, "learning_rate": 8.114600929294326e-06, "loss": 0.4667, "step": 18840 }, { "epoch": 0.3575628817481695, "grad_norm": 1.3892744355898066, "learning_rate": 8.112010331806765e-06, "loss": 0.4641, "step": 18850 }, { "epoch": 0.3577525702796009, "grad_norm": 1.3771691848753842, "learning_rate": 8.109418369919707e-06, "loss": 0.4735, "step": 18860 }, { "epoch": 0.3579422588110323, "grad_norm": 1.5067842728341694, "learning_rate": 8.106825044769547e-06, "loss": 0.4879, "step": 18870 }, { "epoch": 0.3581319473424637, "grad_norm": 1.3855497746354621, "learning_rate": 8.104230357493276e-06, "loss": 0.4656, "step": 18880 }, { "epoch": 0.35832163587389504, "grad_norm": 1.3599718786597446, "learning_rate": 8.101634309228485e-06, "loss": 0.4768, "step": 18890 }, { "epoch": 0.35851132440532646, "grad_norm": 1.4915599110911542, "learning_rate": 8.099036901113358e-06, "loss": 0.4579, "step": 18900 }, { "epoch": 0.3587010129367578, "grad_norm": 1.5820467030044025, "learning_rate": 8.096438134286679e-06, "loss": 0.4677, "step": 18910 }, { "epoch": 0.35889070146818924, "grad_norm": 1.3214643749557997, "learning_rate": 8.093838009887825e-06, "loss": 0.4992, "step": 18920 }, { "epoch": 0.3590803899996206, "grad_norm": 1.4126872422902654, "learning_rate": 8.09123652905677e-06, "loss": 0.4711, "step": 18930 }, { "epoch": 0.35927007853105203, "grad_norm": 1.3724506695026293, "learning_rate": 8.088633692934082e-06, "loss": 0.4722, "step": 18940 }, { "epoch": 0.3594597670624834, "grad_norm": 1.3810551546687813, "learning_rate": 8.086029502660921e-06, "loss": 0.4653, "step": 18950 }, { "epoch": 0.3596494555939148, "grad_norm": 1.4609291366696664, "learning_rate": 8.083423959379046e-06, "loss": 0.4449, "step": 18960 }, { "epoch": 0.3598391441253462, "grad_norm": 1.3705055917283453, "learning_rate": 8.080817064230805e-06, "loss": 0.4802, "step": 18970 }, { "epoch": 0.3600288326567776, "grad_norm": 1.372727696622747, "learning_rate": 8.07820881835914e-06, "loss": 0.4941, "step": 18980 }, { "epoch": 0.36021852118820896, "grad_norm": 1.390950409733369, "learning_rate": 8.075599222907582e-06, "loss": 0.4538, "step": 18990 }, { "epoch": 0.3604082097196403, "grad_norm": 1.4307202310171658, "learning_rate": 8.072988279020262e-06, "loss": 0.4976, "step": 19000 }, { "epoch": 0.36059789825107175, "grad_norm": 1.5386377228781254, "learning_rate": 8.070375987841892e-06, "loss": 0.469, "step": 19010 }, { "epoch": 0.3607875867825031, "grad_norm": 1.2080646672028095, "learning_rate": 8.06776235051778e-06, "loss": 0.4657, "step": 19020 }, { "epoch": 0.36097727531393453, "grad_norm": 1.5182671325434014, "learning_rate": 8.065147368193825e-06, "loss": 0.4764, "step": 19030 }, { "epoch": 0.3611669638453659, "grad_norm": 2.0207310079475116, "learning_rate": 8.062531042016515e-06, "loss": 0.4602, "step": 19040 }, { "epoch": 0.3613566523767973, "grad_norm": 1.4694901908931315, "learning_rate": 8.059913373132926e-06, "loss": 0.4813, "step": 19050 }, { "epoch": 0.3615463409082287, "grad_norm": 1.7049465972102869, "learning_rate": 8.057294362690721e-06, "loss": 0.4797, "step": 19060 }, { "epoch": 0.3617360294396601, "grad_norm": 1.2558937930143093, "learning_rate": 8.054674011838158e-06, "loss": 0.492, "step": 19070 }, { "epoch": 0.36192571797109147, "grad_norm": 1.5578862686650932, "learning_rate": 8.052052321724075e-06, "loss": 0.4801, "step": 19080 }, { "epoch": 0.3621154065025229, "grad_norm": 1.4040973636711984, "learning_rate": 8.049429293497901e-06, "loss": 0.4845, "step": 19090 }, { "epoch": 0.36230509503395425, "grad_norm": 1.4872891471986156, "learning_rate": 8.04680492830965e-06, "loss": 0.4643, "step": 19100 }, { "epoch": 0.3624947835653856, "grad_norm": 1.469576908221357, "learning_rate": 8.044179227309924e-06, "loss": 0.4775, "step": 19110 }, { "epoch": 0.36268447209681703, "grad_norm": 1.5061067678056796, "learning_rate": 8.041552191649908e-06, "loss": 0.4722, "step": 19120 }, { "epoch": 0.3628741606282484, "grad_norm": 1.2653936908933507, "learning_rate": 8.038923822481379e-06, "loss": 0.4689, "step": 19130 }, { "epoch": 0.3630638491596798, "grad_norm": 1.4315236324609328, "learning_rate": 8.036294120956688e-06, "loss": 0.468, "step": 19140 }, { "epoch": 0.3632535376911112, "grad_norm": 1.5605959554629971, "learning_rate": 8.033663088228777e-06, "loss": 0.5148, "step": 19150 }, { "epoch": 0.3634432262225426, "grad_norm": 1.4665293201386405, "learning_rate": 8.031030725451173e-06, "loss": 0.4581, "step": 19160 }, { "epoch": 0.36363291475397397, "grad_norm": 1.4643628998614262, "learning_rate": 8.028397033777981e-06, "loss": 0.4904, "step": 19170 }, { "epoch": 0.3638226032854054, "grad_norm": 1.4600932246888103, "learning_rate": 8.025762014363893e-06, "loss": 0.4697, "step": 19180 }, { "epoch": 0.36401229181683675, "grad_norm": 1.35623638249219, "learning_rate": 8.023125668364179e-06, "loss": 0.457, "step": 19190 }, { "epoch": 0.3642019803482681, "grad_norm": 1.3422945198837573, "learning_rate": 8.020487996934694e-06, "loss": 0.443, "step": 19200 }, { "epoch": 0.36439166887969954, "grad_norm": 1.4893181768791763, "learning_rate": 8.017849001231873e-06, "loss": 0.4801, "step": 19210 }, { "epoch": 0.3645813574111309, "grad_norm": 1.4839733776112123, "learning_rate": 8.015208682412732e-06, "loss": 0.4814, "step": 19220 }, { "epoch": 0.3647710459425623, "grad_norm": 1.6852113556418133, "learning_rate": 8.012567041634866e-06, "loss": 0.4867, "step": 19230 }, { "epoch": 0.3649607344739937, "grad_norm": 1.544191037545056, "learning_rate": 8.00992408005645e-06, "loss": 0.4773, "step": 19240 }, { "epoch": 0.3651504230054251, "grad_norm": 1.5351727916300708, "learning_rate": 8.007279798836239e-06, "loss": 0.495, "step": 19250 }, { "epoch": 0.36534011153685647, "grad_norm": 1.6974501647976146, "learning_rate": 8.004634199133566e-06, "loss": 0.471, "step": 19260 }, { "epoch": 0.3655298000682879, "grad_norm": 1.2236879740268967, "learning_rate": 8.001987282108341e-06, "loss": 0.4873, "step": 19270 }, { "epoch": 0.36571948859971926, "grad_norm": 1.5373573239565608, "learning_rate": 7.999339048921053e-06, "loss": 0.4656, "step": 19280 }, { "epoch": 0.3659091771311507, "grad_norm": 1.2232349462058059, "learning_rate": 7.996689500732769e-06, "loss": 0.4871, "step": 19290 }, { "epoch": 0.36609886566258204, "grad_norm": 1.5483136587942072, "learning_rate": 7.994038638705128e-06, "loss": 0.4844, "step": 19300 }, { "epoch": 0.3662885541940134, "grad_norm": 1.6074414588108876, "learning_rate": 7.99138646400035e-06, "loss": 0.4919, "step": 19310 }, { "epoch": 0.3664782427254448, "grad_norm": 1.540611686901592, "learning_rate": 7.98873297778123e-06, "loss": 0.4891, "step": 19320 }, { "epoch": 0.3666679312568762, "grad_norm": 1.4129956053578092, "learning_rate": 7.986078181211135e-06, "loss": 0.4997, "step": 19330 }, { "epoch": 0.3668576197883076, "grad_norm": 1.5250765009462777, "learning_rate": 7.983422075454006e-06, "loss": 0.4458, "step": 19340 }, { "epoch": 0.367047308319739, "grad_norm": 1.5839711567175208, "learning_rate": 7.980764661674366e-06, "loss": 0.5075, "step": 19350 }, { "epoch": 0.3672369968511704, "grad_norm": 1.3127559565144244, "learning_rate": 7.9781059410373e-06, "loss": 0.4527, "step": 19360 }, { "epoch": 0.36742668538260176, "grad_norm": 1.4646213920064, "learning_rate": 7.975445914708472e-06, "loss": 0.5067, "step": 19370 }, { "epoch": 0.3676163739140332, "grad_norm": 1.6582743666901127, "learning_rate": 7.972784583854122e-06, "loss": 0.465, "step": 19380 }, { "epoch": 0.36780606244546454, "grad_norm": 1.593587847782907, "learning_rate": 7.970121949641056e-06, "loss": 0.4695, "step": 19390 }, { "epoch": 0.36799575097689596, "grad_norm": 2.1398586402046718, "learning_rate": 7.967458013236653e-06, "loss": 0.4822, "step": 19400 }, { "epoch": 0.36818543950832733, "grad_norm": 1.4495791702295802, "learning_rate": 7.964792775808862e-06, "loss": 0.4637, "step": 19410 }, { "epoch": 0.3683751280397587, "grad_norm": 1.6392870598372795, "learning_rate": 7.962126238526208e-06, "loss": 0.4726, "step": 19420 }, { "epoch": 0.3685648165711901, "grad_norm": 1.1562799241438753, "learning_rate": 7.959458402557777e-06, "loss": 0.4497, "step": 19430 }, { "epoch": 0.3687545051026215, "grad_norm": 1.4064728212718718, "learning_rate": 7.95678926907323e-06, "loss": 0.4724, "step": 19440 }, { "epoch": 0.3689441936340529, "grad_norm": 1.488791328679968, "learning_rate": 7.954118839242799e-06, "loss": 0.4831, "step": 19450 }, { "epoch": 0.36913388216548426, "grad_norm": 1.342530408342459, "learning_rate": 7.951447114237277e-06, "loss": 0.4469, "step": 19460 }, { "epoch": 0.3693235706969157, "grad_norm": 1.6147600382471337, "learning_rate": 7.948774095228031e-06, "loss": 0.4679, "step": 19470 }, { "epoch": 0.36951325922834705, "grad_norm": 1.8801097969523715, "learning_rate": 7.946099783386994e-06, "loss": 0.4561, "step": 19480 }, { "epoch": 0.36970294775977847, "grad_norm": 1.3499916135136996, "learning_rate": 7.943424179886665e-06, "loss": 0.4786, "step": 19490 }, { "epoch": 0.36989263629120983, "grad_norm": 1.4479414976819553, "learning_rate": 7.94074728590011e-06, "loss": 0.5127, "step": 19500 }, { "epoch": 0.3700823248226412, "grad_norm": 1.5263941481748144, "learning_rate": 7.938069102600958e-06, "loss": 0.4437, "step": 19510 }, { "epoch": 0.3702720133540726, "grad_norm": 1.3007720714105289, "learning_rate": 7.935389631163408e-06, "loss": 0.4775, "step": 19520 }, { "epoch": 0.370461701885504, "grad_norm": 1.7958247136972074, "learning_rate": 7.93270887276222e-06, "loss": 0.4748, "step": 19530 }, { "epoch": 0.3706513904169354, "grad_norm": 1.5519384620967904, "learning_rate": 7.93002682857272e-06, "loss": 0.4748, "step": 19540 }, { "epoch": 0.37084107894836676, "grad_norm": 1.4689688311817883, "learning_rate": 7.927343499770798e-06, "loss": 0.4803, "step": 19550 }, { "epoch": 0.3710307674797982, "grad_norm": 1.2325331225629799, "learning_rate": 7.924658887532905e-06, "loss": 0.4792, "step": 19560 }, { "epoch": 0.37122045601122955, "grad_norm": 1.4162535395294475, "learning_rate": 7.921972993036059e-06, "loss": 0.4671, "step": 19570 }, { "epoch": 0.37141014454266097, "grad_norm": 1.6275226176256827, "learning_rate": 7.919285817457834e-06, "loss": 0.4629, "step": 19580 }, { "epoch": 0.37159983307409233, "grad_norm": 1.285118076519333, "learning_rate": 7.916597361976372e-06, "loss": 0.4711, "step": 19590 }, { "epoch": 0.37178952160552375, "grad_norm": 1.5479468259074607, "learning_rate": 7.91390762777037e-06, "loss": 0.472, "step": 19600 }, { "epoch": 0.3719792101369551, "grad_norm": 1.7856003787248942, "learning_rate": 7.911216616019092e-06, "loss": 0.4574, "step": 19610 }, { "epoch": 0.3721688986683865, "grad_norm": 1.6263727547254583, "learning_rate": 7.908524327902359e-06, "loss": 0.4643, "step": 19620 }, { "epoch": 0.3723585871998179, "grad_norm": 1.4495774505030934, "learning_rate": 7.905830764600548e-06, "loss": 0.4836, "step": 19630 }, { "epoch": 0.37254827573124927, "grad_norm": 1.3395768182544296, "learning_rate": 7.903135927294602e-06, "loss": 0.5021, "step": 19640 }, { "epoch": 0.3727379642626807, "grad_norm": 1.4352181279809755, "learning_rate": 7.90043981716602e-06, "loss": 0.4956, "step": 19650 }, { "epoch": 0.37292765279411205, "grad_norm": 1.2320237184585545, "learning_rate": 7.897742435396855e-06, "loss": 0.4597, "step": 19660 }, { "epoch": 0.3731173413255435, "grad_norm": 1.3568631551552104, "learning_rate": 7.895043783169722e-06, "loss": 0.4592, "step": 19670 }, { "epoch": 0.37330702985697484, "grad_norm": 1.5992522744271604, "learning_rate": 7.892343861667793e-06, "loss": 0.4695, "step": 19680 }, { "epoch": 0.37349671838840626, "grad_norm": 1.635733046606226, "learning_rate": 7.889642672074793e-06, "loss": 0.4854, "step": 19690 }, { "epoch": 0.3736864069198376, "grad_norm": 1.5157025022989568, "learning_rate": 7.886940215575008e-06, "loss": 0.4984, "step": 19700 }, { "epoch": 0.37387609545126904, "grad_norm": 1.4854656453871364, "learning_rate": 7.884236493353275e-06, "loss": 0.4677, "step": 19710 }, { "epoch": 0.3740657839827004, "grad_norm": 1.2810713675612455, "learning_rate": 7.881531506594989e-06, "loss": 0.4482, "step": 19720 }, { "epoch": 0.37425547251413177, "grad_norm": 1.340829892809061, "learning_rate": 7.878825256486094e-06, "loss": 0.4694, "step": 19730 }, { "epoch": 0.3744451610455632, "grad_norm": 1.2938077060834572, "learning_rate": 7.876117744213097e-06, "loss": 0.4467, "step": 19740 }, { "epoch": 0.37463484957699456, "grad_norm": 1.4715340708539755, "learning_rate": 7.873408970963053e-06, "loss": 0.4775, "step": 19750 }, { "epoch": 0.374824538108426, "grad_norm": 1.5530774001972607, "learning_rate": 7.870698937923567e-06, "loss": 0.4942, "step": 19760 }, { "epoch": 0.37501422663985734, "grad_norm": 1.324712253232126, "learning_rate": 7.867987646282799e-06, "loss": 0.4863, "step": 19770 }, { "epoch": 0.37520391517128876, "grad_norm": 1.4591186122935131, "learning_rate": 7.865275097229466e-06, "loss": 0.4804, "step": 19780 }, { "epoch": 0.3753936037027201, "grad_norm": 1.6146463128383863, "learning_rate": 7.862561291952826e-06, "loss": 0.4739, "step": 19790 }, { "epoch": 0.37558329223415154, "grad_norm": 1.595505915883136, "learning_rate": 7.859846231642697e-06, "loss": 0.4638, "step": 19800 }, { "epoch": 0.3757729807655829, "grad_norm": 1.348298854072047, "learning_rate": 7.857129917489445e-06, "loss": 0.4699, "step": 19810 }, { "epoch": 0.37596266929701433, "grad_norm": 1.339059095097965, "learning_rate": 7.85441235068398e-06, "loss": 0.4766, "step": 19820 }, { "epoch": 0.3761523578284457, "grad_norm": 1.5715856097765952, "learning_rate": 7.851693532417767e-06, "loss": 0.4623, "step": 19830 }, { "epoch": 0.37634204635987706, "grad_norm": 1.3436948026976538, "learning_rate": 7.848973463882821e-06, "loss": 0.4716, "step": 19840 }, { "epoch": 0.3765317348913085, "grad_norm": 1.518789240597304, "learning_rate": 7.8462521462717e-06, "loss": 0.4715, "step": 19850 }, { "epoch": 0.37672142342273984, "grad_norm": 1.4584598904053865, "learning_rate": 7.843529580777512e-06, "loss": 0.4809, "step": 19860 }, { "epoch": 0.37691111195417126, "grad_norm": 1.6900791848521983, "learning_rate": 7.840805768593912e-06, "loss": 0.5031, "step": 19870 }, { "epoch": 0.3771008004856026, "grad_norm": 1.6626641524951054, "learning_rate": 7.8380807109151e-06, "loss": 0.5019, "step": 19880 }, { "epoch": 0.37729048901703405, "grad_norm": 1.4122704151451726, "learning_rate": 7.835354408935828e-06, "loss": 0.4587, "step": 19890 }, { "epoch": 0.3774801775484654, "grad_norm": 1.5393486610514733, "learning_rate": 7.832626863851384e-06, "loss": 0.5084, "step": 19900 }, { "epoch": 0.37766986607989683, "grad_norm": 1.481227352434755, "learning_rate": 7.829898076857612e-06, "loss": 0.4668, "step": 19910 }, { "epoch": 0.3778595546113282, "grad_norm": 1.4679657219223097, "learning_rate": 7.827168049150892e-06, "loss": 0.4771, "step": 19920 }, { "epoch": 0.37804924314275956, "grad_norm": 1.5116644505956078, "learning_rate": 7.824436781928151e-06, "loss": 0.4707, "step": 19930 }, { "epoch": 0.378238931674191, "grad_norm": 1.5691031456676692, "learning_rate": 7.821704276386857e-06, "loss": 0.4499, "step": 19940 }, { "epoch": 0.37842862020562235, "grad_norm": 1.4404717539437115, "learning_rate": 7.818970533725025e-06, "loss": 0.4692, "step": 19950 }, { "epoch": 0.37861830873705377, "grad_norm": 1.4454470476119832, "learning_rate": 7.816235555141213e-06, "loss": 0.4852, "step": 19960 }, { "epoch": 0.37880799726848513, "grad_norm": 1.3142907996837323, "learning_rate": 7.813499341834515e-06, "loss": 0.488, "step": 19970 }, { "epoch": 0.37899768579991655, "grad_norm": 1.5482894332159622, "learning_rate": 7.81076189500457e-06, "loss": 0.4878, "step": 19980 }, { "epoch": 0.3791873743313479, "grad_norm": 1.5454047619676252, "learning_rate": 7.808023215851559e-06, "loss": 0.4921, "step": 19990 }, { "epoch": 0.37937706286277934, "grad_norm": 1.537405274863433, "learning_rate": 7.8052833055762e-06, "loss": 0.4645, "step": 20000 }, { "epoch": 0.3795667513942107, "grad_norm": 1.222635221088207, "learning_rate": 7.802542165379754e-06, "loss": 0.4393, "step": 20010 }, { "epoch": 0.3797564399256421, "grad_norm": 1.2477735668856789, "learning_rate": 7.79979979646402e-06, "loss": 0.4685, "step": 20020 }, { "epoch": 0.3799461284570735, "grad_norm": 1.1766953733512304, "learning_rate": 7.797056200031335e-06, "loss": 0.4551, "step": 20030 }, { "epoch": 0.38013581698850485, "grad_norm": 1.2314644403787742, "learning_rate": 7.794311377284573e-06, "loss": 0.4491, "step": 20040 }, { "epoch": 0.38032550551993627, "grad_norm": 1.515804521771148, "learning_rate": 7.79156532942715e-06, "loss": 0.4724, "step": 20050 }, { "epoch": 0.38051519405136763, "grad_norm": 1.4314057488643765, "learning_rate": 7.788818057663016e-06, "loss": 0.4663, "step": 20060 }, { "epoch": 0.38070488258279905, "grad_norm": 1.491980623722452, "learning_rate": 7.786069563196657e-06, "loss": 0.4708, "step": 20070 }, { "epoch": 0.3808945711142304, "grad_norm": 1.384110074196164, "learning_rate": 7.783319847233097e-06, "loss": 0.4767, "step": 20080 }, { "epoch": 0.38108425964566184, "grad_norm": 1.7322202982004473, "learning_rate": 7.780568910977894e-06, "loss": 0.4647, "step": 20090 }, { "epoch": 0.3812739481770932, "grad_norm": 1.3279525829246308, "learning_rate": 7.777816755637143e-06, "loss": 0.4541, "step": 20100 }, { "epoch": 0.3814636367085246, "grad_norm": 1.3752343135472764, "learning_rate": 7.77506338241747e-06, "loss": 0.4758, "step": 20110 }, { "epoch": 0.381653325239956, "grad_norm": 1.4675771376174358, "learning_rate": 7.772308792526037e-06, "loss": 0.4833, "step": 20120 }, { "epoch": 0.3818430137713874, "grad_norm": 1.4141972821209703, "learning_rate": 7.769552987170543e-06, "loss": 0.4767, "step": 20130 }, { "epoch": 0.38203270230281877, "grad_norm": 1.2543495504978028, "learning_rate": 7.766795967559213e-06, "loss": 0.4626, "step": 20140 }, { "epoch": 0.38222239083425014, "grad_norm": 1.3982952637415171, "learning_rate": 7.764037734900811e-06, "loss": 0.4716, "step": 20150 }, { "epoch": 0.38241207936568156, "grad_norm": 1.920567769713812, "learning_rate": 7.761278290404627e-06, "loss": 0.4652, "step": 20160 }, { "epoch": 0.3826017678971129, "grad_norm": 1.1165818101159208, "learning_rate": 7.758517635280486e-06, "loss": 0.4985, "step": 20170 }, { "epoch": 0.38279145642854434, "grad_norm": 1.3377619284439377, "learning_rate": 7.755755770738741e-06, "loss": 0.4775, "step": 20180 }, { "epoch": 0.3829811449599757, "grad_norm": 1.4619442123428408, "learning_rate": 7.75299269799028e-06, "loss": 0.4739, "step": 20190 }, { "epoch": 0.3831708334914071, "grad_norm": 1.535352538568172, "learning_rate": 7.750228418246517e-06, "loss": 0.4611, "step": 20200 }, { "epoch": 0.3833605220228385, "grad_norm": 1.730809014561608, "learning_rate": 7.747462932719394e-06, "loss": 0.4838, "step": 20210 }, { "epoch": 0.3835502105542699, "grad_norm": 1.4262747935192053, "learning_rate": 7.744696242621385e-06, "loss": 0.4667, "step": 20220 }, { "epoch": 0.3837398990857013, "grad_norm": 1.5368568117293069, "learning_rate": 7.741928349165488e-06, "loss": 0.4889, "step": 20230 }, { "epoch": 0.3839295876171327, "grad_norm": 1.6251413361763865, "learning_rate": 7.739159253565237e-06, "loss": 0.4471, "step": 20240 }, { "epoch": 0.38411927614856406, "grad_norm": 1.4312441320834355, "learning_rate": 7.736388957034682e-06, "loss": 0.5043, "step": 20250 }, { "epoch": 0.3843089646799954, "grad_norm": 1.3805438072558833, "learning_rate": 7.733617460788407e-06, "loss": 0.4747, "step": 20260 }, { "epoch": 0.38449865321142684, "grad_norm": 1.5018961158524886, "learning_rate": 7.73084476604152e-06, "loss": 0.4991, "step": 20270 }, { "epoch": 0.3846883417428582, "grad_norm": 1.640186603011992, "learning_rate": 7.728070874009655e-06, "loss": 0.4558, "step": 20280 }, { "epoch": 0.38487803027428963, "grad_norm": 1.6666419369630878, "learning_rate": 7.725295785908966e-06, "loss": 0.4777, "step": 20290 }, { "epoch": 0.385067718805721, "grad_norm": 1.5219936906657612, "learning_rate": 7.722519502956141e-06, "loss": 0.4697, "step": 20300 }, { "epoch": 0.3852574073371524, "grad_norm": 1.6595181024874313, "learning_rate": 7.719742026368383e-06, "loss": 0.4816, "step": 20310 }, { "epoch": 0.3854470958685838, "grad_norm": 1.4453741792738621, "learning_rate": 7.716963357363424e-06, "loss": 0.4746, "step": 20320 }, { "epoch": 0.3856367844000152, "grad_norm": 1.3174000723162804, "learning_rate": 7.714183497159514e-06, "loss": 0.4819, "step": 20330 }, { "epoch": 0.38582647293144656, "grad_norm": 1.385536373967774, "learning_rate": 7.71140244697543e-06, "loss": 0.4606, "step": 20340 }, { "epoch": 0.3860161614628779, "grad_norm": 1.5612525799925083, "learning_rate": 7.708620208030466e-06, "loss": 0.4922, "step": 20350 }, { "epoch": 0.38620584999430935, "grad_norm": 1.582017256986177, "learning_rate": 7.705836781544443e-06, "loss": 0.4625, "step": 20360 }, { "epoch": 0.3863955385257407, "grad_norm": 1.4441046351112596, "learning_rate": 7.703052168737697e-06, "loss": 0.4958, "step": 20370 }, { "epoch": 0.38658522705717213, "grad_norm": 1.5499275491075761, "learning_rate": 7.700266370831089e-06, "loss": 0.4734, "step": 20380 }, { "epoch": 0.3867749155886035, "grad_norm": 1.5067304529046834, "learning_rate": 7.697479389045993e-06, "loss": 0.4644, "step": 20390 }, { "epoch": 0.3869646041200349, "grad_norm": 1.4246047604219432, "learning_rate": 7.694691224604311e-06, "loss": 0.4492, "step": 20400 }, { "epoch": 0.3871542926514663, "grad_norm": 1.550916423452603, "learning_rate": 7.691901878728455e-06, "loss": 0.4615, "step": 20410 }, { "epoch": 0.3873439811828977, "grad_norm": 1.5434657562426053, "learning_rate": 7.68911135264136e-06, "loss": 0.4535, "step": 20420 }, { "epoch": 0.38753366971432907, "grad_norm": 1.56039108649645, "learning_rate": 7.68631964756648e-06, "loss": 0.4644, "step": 20430 }, { "epoch": 0.3877233582457605, "grad_norm": 1.4764983918395456, "learning_rate": 7.683526764727777e-06, "loss": 0.4792, "step": 20440 }, { "epoch": 0.38791304677719185, "grad_norm": 1.5224308281495058, "learning_rate": 7.680732705349738e-06, "loss": 0.4939, "step": 20450 }, { "epoch": 0.3881027353086232, "grad_norm": 2.4513427136746704, "learning_rate": 7.677937470657367e-06, "loss": 0.4794, "step": 20460 }, { "epoch": 0.38829242384005463, "grad_norm": 1.4828934247577241, "learning_rate": 7.675141061876173e-06, "loss": 0.4703, "step": 20470 }, { "epoch": 0.388482112371486, "grad_norm": 1.4884061982849486, "learning_rate": 7.672343480232189e-06, "loss": 0.4742, "step": 20480 }, { "epoch": 0.3886718009029174, "grad_norm": 1.3956434163446025, "learning_rate": 7.669544726951962e-06, "loss": 0.4773, "step": 20490 }, { "epoch": 0.3888614894343488, "grad_norm": 1.483453180403653, "learning_rate": 7.666744803262546e-06, "loss": 0.4817, "step": 20500 }, { "epoch": 0.3890511779657802, "grad_norm": 1.4815206398942018, "learning_rate": 7.663943710391516e-06, "loss": 0.4692, "step": 20510 }, { "epoch": 0.38924086649721157, "grad_norm": 1.4890067005107797, "learning_rate": 7.66114144956695e-06, "loss": 0.4906, "step": 20520 }, { "epoch": 0.389430555028643, "grad_norm": 1.3155309636827657, "learning_rate": 7.658338022017452e-06, "loss": 0.4318, "step": 20530 }, { "epoch": 0.38962024356007435, "grad_norm": 1.3750582566497225, "learning_rate": 7.655533428972123e-06, "loss": 0.4765, "step": 20540 }, { "epoch": 0.3898099320915058, "grad_norm": 1.4469957388950614, "learning_rate": 7.652727671660585e-06, "loss": 0.4737, "step": 20550 }, { "epoch": 0.38999962062293714, "grad_norm": 1.3368734645226112, "learning_rate": 7.649920751312967e-06, "loss": 0.4511, "step": 20560 }, { "epoch": 0.3901893091543685, "grad_norm": 1.5036593872954274, "learning_rate": 7.647112669159904e-06, "loss": 0.4582, "step": 20570 }, { "epoch": 0.3903789976857999, "grad_norm": 1.3329601126654245, "learning_rate": 7.644303426432546e-06, "loss": 0.4792, "step": 20580 }, { "epoch": 0.3905686862172313, "grad_norm": 1.1237194784210034, "learning_rate": 7.641493024362554e-06, "loss": 0.4559, "step": 20590 }, { "epoch": 0.3907583747486627, "grad_norm": 1.7393793713152303, "learning_rate": 7.638681464182087e-06, "loss": 0.489, "step": 20600 }, { "epoch": 0.39094806328009407, "grad_norm": 1.6072036117073554, "learning_rate": 7.635868747123825e-06, "loss": 0.4658, "step": 20610 }, { "epoch": 0.3911377518115255, "grad_norm": 1.7098163571472298, "learning_rate": 7.633054874420942e-06, "loss": 0.4782, "step": 20620 }, { "epoch": 0.39132744034295686, "grad_norm": 1.4286973511029997, "learning_rate": 7.630239847307127e-06, "loss": 0.4713, "step": 20630 }, { "epoch": 0.3915171288743883, "grad_norm": 1.4408908147640787, "learning_rate": 7.6274236670165735e-06, "loss": 0.4914, "step": 20640 }, { "epoch": 0.39170681740581964, "grad_norm": 1.2750057856839345, "learning_rate": 7.624606334783981e-06, "loss": 0.4536, "step": 20650 }, { "epoch": 0.39189650593725106, "grad_norm": 1.8438781143516607, "learning_rate": 7.621787851844552e-06, "loss": 0.4615, "step": 20660 }, { "epoch": 0.3920861944686824, "grad_norm": 1.4231630616807465, "learning_rate": 7.6189682194339945e-06, "loss": 0.4613, "step": 20670 }, { "epoch": 0.3922758830001138, "grad_norm": 1.2470354288105037, "learning_rate": 7.61614743878852e-06, "loss": 0.4856, "step": 20680 }, { "epoch": 0.3924655715315452, "grad_norm": 1.8457938093251796, "learning_rate": 7.613325511144846e-06, "loss": 0.5164, "step": 20690 }, { "epoch": 0.3926552600629766, "grad_norm": 1.5897654698617796, "learning_rate": 7.610502437740193e-06, "loss": 0.4616, "step": 20700 }, { "epoch": 0.392844948594408, "grad_norm": 1.3629018961017771, "learning_rate": 7.607678219812276e-06, "loss": 0.4512, "step": 20710 }, { "epoch": 0.39303463712583936, "grad_norm": 1.4294388563478273, "learning_rate": 7.604852858599321e-06, "loss": 0.4866, "step": 20720 }, { "epoch": 0.3932243256572708, "grad_norm": 1.3665471607174855, "learning_rate": 7.602026355340051e-06, "loss": 0.4963, "step": 20730 }, { "epoch": 0.39341401418870214, "grad_norm": 1.5285107686549209, "learning_rate": 7.599198711273693e-06, "loss": 0.4559, "step": 20740 }, { "epoch": 0.39360370272013356, "grad_norm": 1.465573725164412, "learning_rate": 7.596369927639969e-06, "loss": 0.4862, "step": 20750 }, { "epoch": 0.39379339125156493, "grad_norm": 1.4092920371560842, "learning_rate": 7.593540005679103e-06, "loss": 0.4704, "step": 20760 }, { "epoch": 0.3939830797829963, "grad_norm": 1.5678420736928917, "learning_rate": 7.59070894663182e-06, "loss": 0.4851, "step": 20770 }, { "epoch": 0.3941727683144277, "grad_norm": 1.4675543332978183, "learning_rate": 7.587876751739342e-06, "loss": 0.4833, "step": 20780 }, { "epoch": 0.3943624568458591, "grad_norm": 2.1934217505615274, "learning_rate": 7.585043422243388e-06, "loss": 0.4399, "step": 20790 }, { "epoch": 0.3945521453772905, "grad_norm": 1.356674554732635, "learning_rate": 7.582208959386175e-06, "loss": 0.485, "step": 20800 }, { "epoch": 0.39474183390872186, "grad_norm": 1.8302757178266782, "learning_rate": 7.57937336441042e-06, "loss": 0.4732, "step": 20810 }, { "epoch": 0.3949315224401533, "grad_norm": 3.006265806885921, "learning_rate": 7.57653663855933e-06, "loss": 0.4527, "step": 20820 }, { "epoch": 0.39512121097158465, "grad_norm": 1.2485190357608311, "learning_rate": 7.573698783076613e-06, "loss": 0.4941, "step": 20830 }, { "epoch": 0.39531089950301607, "grad_norm": 1.2061157325718725, "learning_rate": 7.570859799206472e-06, "loss": 0.4618, "step": 20840 }, { "epoch": 0.39550058803444743, "grad_norm": 1.7147174429332335, "learning_rate": 7.568019688193602e-06, "loss": 0.4594, "step": 20850 }, { "epoch": 0.39569027656587885, "grad_norm": 1.3975018544391038, "learning_rate": 7.565178451283193e-06, "loss": 0.4816, "step": 20860 }, { "epoch": 0.3958799650973102, "grad_norm": 1.410009577689328, "learning_rate": 7.562336089720932e-06, "loss": 0.4923, "step": 20870 }, { "epoch": 0.3960696536287416, "grad_norm": 1.4563937419388298, "learning_rate": 7.559492604752993e-06, "loss": 0.4665, "step": 20880 }, { "epoch": 0.396259342160173, "grad_norm": 1.52793264856889, "learning_rate": 7.556647997626047e-06, "loss": 0.4859, "step": 20890 }, { "epoch": 0.39644903069160436, "grad_norm": 1.486540446877573, "learning_rate": 7.553802269587256e-06, "loss": 0.4802, "step": 20900 }, { "epoch": 0.3966387192230358, "grad_norm": 1.5435381566065438, "learning_rate": 7.550955421884274e-06, "loss": 0.4532, "step": 20910 }, { "epoch": 0.39682840775446715, "grad_norm": 1.7017157783876657, "learning_rate": 7.548107455765245e-06, "loss": 0.4847, "step": 20920 }, { "epoch": 0.39701809628589857, "grad_norm": 1.1722208861665773, "learning_rate": 7.545258372478803e-06, "loss": 0.4423, "step": 20930 }, { "epoch": 0.39720778481732993, "grad_norm": 1.3918793216777743, "learning_rate": 7.542408173274074e-06, "loss": 0.4663, "step": 20940 }, { "epoch": 0.39739747334876135, "grad_norm": 1.512795021733048, "learning_rate": 7.539556859400667e-06, "loss": 0.4955, "step": 20950 }, { "epoch": 0.3975871618801927, "grad_norm": 1.510507895109745, "learning_rate": 7.536704432108689e-06, "loss": 0.461, "step": 20960 }, { "epoch": 0.39777685041162414, "grad_norm": 4.129223482421032, "learning_rate": 7.533850892648728e-06, "loss": 0.485, "step": 20970 }, { "epoch": 0.3979665389430555, "grad_norm": 1.525419017575158, "learning_rate": 7.530996242271864e-06, "loss": 0.494, "step": 20980 }, { "epoch": 0.39815622747448687, "grad_norm": 1.9307133794745934, "learning_rate": 7.52814048222966e-06, "loss": 0.4782, "step": 20990 }, { "epoch": 0.3983459160059183, "grad_norm": 1.4405696011878402, "learning_rate": 7.525283613774169e-06, "loss": 0.4774, "step": 21000 }, { "epoch": 0.39853560453734965, "grad_norm": 1.6484542251116239, "learning_rate": 7.522425638157926e-06, "loss": 0.4633, "step": 21010 }, { "epoch": 0.3987252930687811, "grad_norm": 1.5111432061109986, "learning_rate": 7.5195665566339595e-06, "loss": 0.4855, "step": 21020 }, { "epoch": 0.39891498160021244, "grad_norm": 1.3729330784896285, "learning_rate": 7.516706370455773e-06, "loss": 0.4735, "step": 21030 }, { "epoch": 0.39910467013164386, "grad_norm": 1.1781089572303807, "learning_rate": 7.513845080877358e-06, "loss": 0.4882, "step": 21040 }, { "epoch": 0.3992943586630752, "grad_norm": 1.4005678012520564, "learning_rate": 7.5109826891531935e-06, "loss": 0.4744, "step": 21050 }, { "epoch": 0.39948404719450664, "grad_norm": 3.0950035523844517, "learning_rate": 7.508119196538237e-06, "loss": 0.4725, "step": 21060 }, { "epoch": 0.399673735725938, "grad_norm": 1.349116879132479, "learning_rate": 7.50525460428793e-06, "loss": 0.4318, "step": 21070 }, { "epoch": 0.3998634242573694, "grad_norm": 1.2036192739738825, "learning_rate": 7.502388913658198e-06, "loss": 0.4701, "step": 21080 }, { "epoch": 0.4000531127888008, "grad_norm": 1.2526561376109353, "learning_rate": 7.499522125905443e-06, "loss": 0.4676, "step": 21090 }, { "epoch": 0.40024280132023216, "grad_norm": 1.349134611330848, "learning_rate": 7.496654242286556e-06, "loss": 0.4828, "step": 21100 }, { "epoch": 0.4004324898516636, "grad_norm": 1.3669227466468148, "learning_rate": 7.493785264058902e-06, "loss": 0.4637, "step": 21110 }, { "epoch": 0.40062217838309494, "grad_norm": 1.3398256450731907, "learning_rate": 7.4909151924803284e-06, "loss": 0.5015, "step": 21120 }, { "epoch": 0.40081186691452636, "grad_norm": 1.1926866421701041, "learning_rate": 7.488044028809158e-06, "loss": 0.4554, "step": 21130 }, { "epoch": 0.4010015554459577, "grad_norm": 1.302589297065083, "learning_rate": 7.485171774304199e-06, "loss": 0.4743, "step": 21140 }, { "epoch": 0.40119124397738914, "grad_norm": 1.6638024349989493, "learning_rate": 7.482298430224737e-06, "loss": 0.4586, "step": 21150 }, { "epoch": 0.4013809325088205, "grad_norm": 1.358088626642925, "learning_rate": 7.479423997830528e-06, "loss": 0.4697, "step": 21160 }, { "epoch": 0.40157062104025193, "grad_norm": 1.362736912331566, "learning_rate": 7.476548478381813e-06, "loss": 0.4974, "step": 21170 }, { "epoch": 0.4017603095716833, "grad_norm": 1.4084370798741614, "learning_rate": 7.4736718731393065e-06, "loss": 0.4668, "step": 21180 }, { "epoch": 0.40194999810311466, "grad_norm": 2.2095093325393678, "learning_rate": 7.470794183364198e-06, "loss": 0.4704, "step": 21190 }, { "epoch": 0.4021396866345461, "grad_norm": 1.62823086070595, "learning_rate": 7.4679154103181565e-06, "loss": 0.4639, "step": 21200 }, { "epoch": 0.40232937516597744, "grad_norm": 1.3967279764057274, "learning_rate": 7.4650355552633205e-06, "loss": 0.4701, "step": 21210 }, { "epoch": 0.40251906369740886, "grad_norm": 1.4216985571531187, "learning_rate": 7.4621546194623065e-06, "loss": 0.4658, "step": 21220 }, { "epoch": 0.4027087522288402, "grad_norm": 1.419812535194673, "learning_rate": 7.459272604178203e-06, "loss": 0.4796, "step": 21230 }, { "epoch": 0.40289844076027165, "grad_norm": 1.5498089971680569, "learning_rate": 7.456389510674574e-06, "loss": 0.4994, "step": 21240 }, { "epoch": 0.403088129291703, "grad_norm": 1.6259562641536154, "learning_rate": 7.4535053402154566e-06, "loss": 0.4987, "step": 21250 }, { "epoch": 0.40327781782313443, "grad_norm": 1.315498529280489, "learning_rate": 7.4506200940653525e-06, "loss": 0.4682, "step": 21260 }, { "epoch": 0.4034675063545658, "grad_norm": 1.313379550791671, "learning_rate": 7.447733773489247e-06, "loss": 0.4765, "step": 21270 }, { "epoch": 0.4036571948859972, "grad_norm": 1.3877185715429199, "learning_rate": 7.4448463797525905e-06, "loss": 0.4651, "step": 21280 }, { "epoch": 0.4038468834174286, "grad_norm": 1.394895943507151, "learning_rate": 7.441957914121298e-06, "loss": 0.4916, "step": 21290 }, { "epoch": 0.40403657194885995, "grad_norm": 1.3804904380302956, "learning_rate": 7.439068377861766e-06, "loss": 0.4591, "step": 21300 }, { "epoch": 0.40422626048029137, "grad_norm": 1.2776462641016644, "learning_rate": 7.436177772240852e-06, "loss": 0.4457, "step": 21310 }, { "epoch": 0.40441594901172273, "grad_norm": 1.6934266063750787, "learning_rate": 7.433286098525884e-06, "loss": 0.4923, "step": 21320 }, { "epoch": 0.40460563754315415, "grad_norm": 1.364117928409184, "learning_rate": 7.430393357984662e-06, "loss": 0.4503, "step": 21330 }, { "epoch": 0.4047953260745855, "grad_norm": 1.3519025629292478, "learning_rate": 7.427499551885448e-06, "loss": 0.472, "step": 21340 }, { "epoch": 0.40498501460601694, "grad_norm": 1.7087958924465603, "learning_rate": 7.424604681496977e-06, "loss": 0.4879, "step": 21350 }, { "epoch": 0.4051747031374483, "grad_norm": 1.4868391545003057, "learning_rate": 7.421708748088445e-06, "loss": 0.4908, "step": 21360 }, { "epoch": 0.4053643916688797, "grad_norm": 1.9669147010180132, "learning_rate": 7.41881175292952e-06, "loss": 0.4774, "step": 21370 }, { "epoch": 0.4055540802003111, "grad_norm": 1.446221287690438, "learning_rate": 7.41591369729033e-06, "loss": 0.4435, "step": 21380 }, { "epoch": 0.4057437687317425, "grad_norm": 1.7220391486440532, "learning_rate": 7.413014582441472e-06, "loss": 0.4653, "step": 21390 }, { "epoch": 0.40593345726317387, "grad_norm": 1.465685598500072, "learning_rate": 7.410114409654002e-06, "loss": 0.5023, "step": 21400 }, { "epoch": 0.40612314579460523, "grad_norm": 1.372008218487047, "learning_rate": 7.40721318019945e-06, "loss": 0.4728, "step": 21410 }, { "epoch": 0.40631283432603665, "grad_norm": 1.3638877215547038, "learning_rate": 7.404310895349796e-06, "loss": 0.4512, "step": 21420 }, { "epoch": 0.406502522857468, "grad_norm": 1.2835379078100682, "learning_rate": 7.401407556377494e-06, "loss": 0.4621, "step": 21430 }, { "epoch": 0.40669221138889944, "grad_norm": 1.5239171533207703, "learning_rate": 7.398503164555452e-06, "loss": 0.4782, "step": 21440 }, { "epoch": 0.4068818999203308, "grad_norm": 1.502769987576892, "learning_rate": 7.395597721157046e-06, "loss": 0.4736, "step": 21450 }, { "epoch": 0.4070715884517622, "grad_norm": 1.5935378285641641, "learning_rate": 7.39269122745611e-06, "loss": 0.4546, "step": 21460 }, { "epoch": 0.4072612769831936, "grad_norm": 1.229811176926998, "learning_rate": 7.3897836847269386e-06, "loss": 0.4458, "step": 21470 }, { "epoch": 0.407450965514625, "grad_norm": 1.3241068235991889, "learning_rate": 7.386875094244288e-06, "loss": 0.4985, "step": 21480 }, { "epoch": 0.40764065404605637, "grad_norm": 1.2040156248826335, "learning_rate": 7.383965457283368e-06, "loss": 0.4344, "step": 21490 }, { "epoch": 0.4078303425774878, "grad_norm": 1.482773799094743, "learning_rate": 7.3810547751198555e-06, "loss": 0.481, "step": 21500 }, { "epoch": 0.40802003110891916, "grad_norm": 1.4498549452674758, "learning_rate": 7.37814304902988e-06, "loss": 0.4531, "step": 21510 }, { "epoch": 0.4082097196403505, "grad_norm": 1.50866923610873, "learning_rate": 7.375230280290031e-06, "loss": 0.4796, "step": 21520 }, { "epoch": 0.40839940817178194, "grad_norm": 1.4692684374790848, "learning_rate": 7.372316470177354e-06, "loss": 0.4701, "step": 21530 }, { "epoch": 0.4085890967032133, "grad_norm": 1.2626481322664431, "learning_rate": 7.369401619969351e-06, "loss": 0.4676, "step": 21540 }, { "epoch": 0.4087787852346447, "grad_norm": 1.5144898112670406, "learning_rate": 7.3664857309439795e-06, "loss": 0.4632, "step": 21550 }, { "epoch": 0.4089684737660761, "grad_norm": 1.3892196778630859, "learning_rate": 7.363568804379657e-06, "loss": 0.4645, "step": 21560 }, { "epoch": 0.4091581622975075, "grad_norm": 1.443834052283334, "learning_rate": 7.36065084155525e-06, "loss": 0.4686, "step": 21570 }, { "epoch": 0.4093478508289389, "grad_norm": 1.6397947927449672, "learning_rate": 7.3577318437500835e-06, "loss": 0.4742, "step": 21580 }, { "epoch": 0.4095375393603703, "grad_norm": 1.2810935548457916, "learning_rate": 7.3548118122439315e-06, "loss": 0.4597, "step": 21590 }, { "epoch": 0.40972722789180166, "grad_norm": 1.2588152533139134, "learning_rate": 7.3518907483170256e-06, "loss": 0.4687, "step": 21600 }, { "epoch": 0.409916916423233, "grad_norm": 1.7108659529192687, "learning_rate": 7.34896865325005e-06, "loss": 0.4762, "step": 21610 }, { "epoch": 0.41010660495466444, "grad_norm": 1.0184073369560094, "learning_rate": 7.346045528324138e-06, "loss": 0.4276, "step": 21620 }, { "epoch": 0.4102962934860958, "grad_norm": 1.2913966852919965, "learning_rate": 7.3431213748208764e-06, "loss": 0.4778, "step": 21630 }, { "epoch": 0.41048598201752723, "grad_norm": 1.3850527529566192, "learning_rate": 7.340196194022302e-06, "loss": 0.4874, "step": 21640 }, { "epoch": 0.4106756705489586, "grad_norm": 1.4999080992538232, "learning_rate": 7.337269987210903e-06, "loss": 0.4916, "step": 21650 }, { "epoch": 0.41086535908039, "grad_norm": 1.3924126031203372, "learning_rate": 7.334342755669619e-06, "loss": 0.4533, "step": 21660 }, { "epoch": 0.4110550476118214, "grad_norm": 1.7752685475203036, "learning_rate": 7.331414500681833e-06, "loss": 0.4453, "step": 21670 }, { "epoch": 0.4112447361432528, "grad_norm": 1.3946933577613958, "learning_rate": 7.328485223531383e-06, "loss": 0.4383, "step": 21680 }, { "epoch": 0.41143442467468416, "grad_norm": 1.4869055575931356, "learning_rate": 7.325554925502554e-06, "loss": 0.4644, "step": 21690 }, { "epoch": 0.4116241132061156, "grad_norm": 1.294261917315372, "learning_rate": 7.322623607880074e-06, "loss": 0.4871, "step": 21700 }, { "epoch": 0.41181380173754695, "grad_norm": 1.2864582837643432, "learning_rate": 7.319691271949123e-06, "loss": 0.4667, "step": 21710 }, { "epoch": 0.4120034902689783, "grad_norm": 2.2496759929535513, "learning_rate": 7.316757918995326e-06, "loss": 0.4756, "step": 21720 }, { "epoch": 0.41219317880040973, "grad_norm": 9.79321829776654, "learning_rate": 7.313823550304754e-06, "loss": 0.4574, "step": 21730 }, { "epoch": 0.4123828673318411, "grad_norm": 1.3366133358800079, "learning_rate": 7.3108881671639234e-06, "loss": 0.4762, "step": 21740 }, { "epoch": 0.4125725558632725, "grad_norm": 1.4486270086020796, "learning_rate": 7.307951770859792e-06, "loss": 0.4846, "step": 21750 }, { "epoch": 0.4127622443947039, "grad_norm": 1.3691272622062076, "learning_rate": 7.30501436267977e-06, "loss": 0.4713, "step": 21760 }, { "epoch": 0.4129519329261353, "grad_norm": 2.172904528424453, "learning_rate": 7.302075943911701e-06, "loss": 0.4775, "step": 21770 }, { "epoch": 0.41314162145756667, "grad_norm": 1.3554515813371968, "learning_rate": 7.299136515843878e-06, "loss": 0.4683, "step": 21780 }, { "epoch": 0.4133313099889981, "grad_norm": 1.5392752738591229, "learning_rate": 7.296196079765038e-06, "loss": 0.4603, "step": 21790 }, { "epoch": 0.41352099852042945, "grad_norm": 1.3828555190715988, "learning_rate": 7.293254636964353e-06, "loss": 0.4937, "step": 21800 }, { "epoch": 0.41371068705186087, "grad_norm": 1.528144340802723, "learning_rate": 7.290312188731444e-06, "loss": 0.4672, "step": 21810 }, { "epoch": 0.41390037558329223, "grad_norm": 1.2345052621156143, "learning_rate": 7.287368736356368e-06, "loss": 0.4586, "step": 21820 }, { "epoch": 0.4140900641147236, "grad_norm": 1.5046367761527322, "learning_rate": 7.284424281129623e-06, "loss": 0.4586, "step": 21830 }, { "epoch": 0.414279752646155, "grad_norm": 1.4682402883882775, "learning_rate": 7.281478824342148e-06, "loss": 0.4508, "step": 21840 }, { "epoch": 0.4144694411775864, "grad_norm": 1.54723082787977, "learning_rate": 7.278532367285319e-06, "loss": 0.4803, "step": 21850 }, { "epoch": 0.4146591297090178, "grad_norm": 1.5293440450436488, "learning_rate": 7.275584911250955e-06, "loss": 0.4708, "step": 21860 }, { "epoch": 0.41484881824044917, "grad_norm": 1.5803274906840423, "learning_rate": 7.2726364575313065e-06, "loss": 0.4467, "step": 21870 }, { "epoch": 0.4150385067718806, "grad_norm": 1.380893597168172, "learning_rate": 7.269687007419066e-06, "loss": 0.4848, "step": 21880 }, { "epoch": 0.41522819530331195, "grad_norm": 1.2196769038681021, "learning_rate": 7.266736562207365e-06, "loss": 0.4645, "step": 21890 }, { "epoch": 0.4154178838347434, "grad_norm": 1.5236754499483272, "learning_rate": 7.263785123189763e-06, "loss": 0.4795, "step": 21900 }, { "epoch": 0.41560757236617474, "grad_norm": 1.3676788313284804, "learning_rate": 7.2608326916602614e-06, "loss": 0.4654, "step": 21910 }, { "epoch": 0.41579726089760616, "grad_norm": 1.7996801088713317, "learning_rate": 7.257879268913297e-06, "loss": 0.4691, "step": 21920 }, { "epoch": 0.4159869494290375, "grad_norm": 1.4341750988182478, "learning_rate": 7.254924856243739e-06, "loss": 0.4628, "step": 21930 }, { "epoch": 0.4161766379604689, "grad_norm": 1.5619864674628052, "learning_rate": 7.251969454946892e-06, "loss": 0.4845, "step": 21940 }, { "epoch": 0.4163663264919003, "grad_norm": 1.5298419593384625, "learning_rate": 7.249013066318492e-06, "loss": 0.4704, "step": 21950 }, { "epoch": 0.41655601502333167, "grad_norm": 1.4479340496915867, "learning_rate": 7.24605569165471e-06, "loss": 0.4714, "step": 21960 }, { "epoch": 0.4167457035547631, "grad_norm": 1.4330063973903067, "learning_rate": 7.243097332252148e-06, "loss": 0.4563, "step": 21970 }, { "epoch": 0.41693539208619446, "grad_norm": 1.3308359106583323, "learning_rate": 7.240137989407841e-06, "loss": 0.4808, "step": 21980 }, { "epoch": 0.4171250806176259, "grad_norm": 1.4916364219666656, "learning_rate": 7.237177664419254e-06, "loss": 0.4726, "step": 21990 }, { "epoch": 0.41731476914905724, "grad_norm": 1.4245833096911091, "learning_rate": 7.234216358584284e-06, "loss": 0.4968, "step": 22000 }, { "epoch": 0.41750445768048866, "grad_norm": 1.4345968303139345, "learning_rate": 7.2312540732012545e-06, "loss": 0.4692, "step": 22010 }, { "epoch": 0.41769414621192, "grad_norm": 1.463221480320871, "learning_rate": 7.2282908095689265e-06, "loss": 0.4616, "step": 22020 }, { "epoch": 0.4178838347433514, "grad_norm": 1.6162194498573088, "learning_rate": 7.225326568986477e-06, "loss": 0.4966, "step": 22030 }, { "epoch": 0.4180735232747828, "grad_norm": 1.608141961239944, "learning_rate": 7.222361352753524e-06, "loss": 0.4548, "step": 22040 }, { "epoch": 0.4182632118062142, "grad_norm": 1.7014575038666016, "learning_rate": 7.219395162170108e-06, "loss": 0.4858, "step": 22050 }, { "epoch": 0.4184529003376456, "grad_norm": 2.129899459287553, "learning_rate": 7.216427998536692e-06, "loss": 0.4699, "step": 22060 }, { "epoch": 0.41864258886907696, "grad_norm": 1.6248738872949362, "learning_rate": 7.213459863154177e-06, "loss": 0.4959, "step": 22070 }, { "epoch": 0.4188322774005084, "grad_norm": 1.3715676696929815, "learning_rate": 7.210490757323878e-06, "loss": 0.4702, "step": 22080 }, { "epoch": 0.41902196593193974, "grad_norm": 1.5989810675889466, "learning_rate": 7.207520682347544e-06, "loss": 0.4523, "step": 22090 }, { "epoch": 0.41921165446337116, "grad_norm": 1.5843576983145253, "learning_rate": 7.204549639527343e-06, "loss": 0.4927, "step": 22100 }, { "epoch": 0.41940134299480253, "grad_norm": 1.679885243982517, "learning_rate": 7.201577630165872e-06, "loss": 0.4765, "step": 22110 }, { "epoch": 0.41959103152623395, "grad_norm": 1.6217342266599633, "learning_rate": 7.198604655566151e-06, "loss": 0.4857, "step": 22120 }, { "epoch": 0.4197807200576653, "grad_norm": 1.3511910242416045, "learning_rate": 7.195630717031619e-06, "loss": 0.4878, "step": 22130 }, { "epoch": 0.4199704085890967, "grad_norm": 1.4389002732590528, "learning_rate": 7.192655815866143e-06, "loss": 0.469, "step": 22140 }, { "epoch": 0.4201600971205281, "grad_norm": 1.743951083942627, "learning_rate": 7.1896799533740095e-06, "loss": 0.4814, "step": 22150 }, { "epoch": 0.42034978565195946, "grad_norm": 1.2203468059476037, "learning_rate": 7.186703130859924e-06, "loss": 0.4437, "step": 22160 }, { "epoch": 0.4205394741833909, "grad_norm": 1.3163643766433415, "learning_rate": 7.183725349629018e-06, "loss": 0.4592, "step": 22170 }, { "epoch": 0.42072916271482225, "grad_norm": 1.3280112970773812, "learning_rate": 7.180746610986842e-06, "loss": 0.4529, "step": 22180 }, { "epoch": 0.42091885124625367, "grad_norm": 1.6200382135814517, "learning_rate": 7.177766916239361e-06, "loss": 0.4264, "step": 22190 }, { "epoch": 0.42110853977768503, "grad_norm": 1.5850518778672333, "learning_rate": 7.174786266692969e-06, "loss": 0.4506, "step": 22200 }, { "epoch": 0.42129822830911645, "grad_norm": 1.4210615307393908, "learning_rate": 7.171804663654467e-06, "loss": 0.5056, "step": 22210 }, { "epoch": 0.4214879168405478, "grad_norm": 1.5356508278312355, "learning_rate": 7.1688221084310864e-06, "loss": 0.4891, "step": 22220 }, { "epoch": 0.42167760537197924, "grad_norm": 1.5273586965564907, "learning_rate": 7.165838602330462e-06, "loss": 0.4857, "step": 22230 }, { "epoch": 0.4218672939034106, "grad_norm": 1.774353333396026, "learning_rate": 7.16285414666066e-06, "loss": 0.4696, "step": 22240 }, { "epoch": 0.42205698243484197, "grad_norm": 1.5862276171238823, "learning_rate": 7.159868742730153e-06, "loss": 0.487, "step": 22250 }, { "epoch": 0.4222466709662734, "grad_norm": 1.6597534020201319, "learning_rate": 7.156882391847833e-06, "loss": 0.4777, "step": 22260 }, { "epoch": 0.42243635949770475, "grad_norm": 1.5435823632766954, "learning_rate": 7.153895095323006e-06, "loss": 0.4399, "step": 22270 }, { "epoch": 0.42262604802913617, "grad_norm": 1.5259795650645593, "learning_rate": 7.150906854465394e-06, "loss": 0.4797, "step": 22280 }, { "epoch": 0.42281573656056753, "grad_norm": 1.3559206505813246, "learning_rate": 7.147917670585129e-06, "loss": 0.4246, "step": 22290 }, { "epoch": 0.42300542509199895, "grad_norm": 1.5602076862523204, "learning_rate": 7.144927544992767e-06, "loss": 0.4758, "step": 22300 }, { "epoch": 0.4231951136234303, "grad_norm": 1.4959615432842284, "learning_rate": 7.141936478999263e-06, "loss": 0.4698, "step": 22310 }, { "epoch": 0.42338480215486174, "grad_norm": 1.6820646114158149, "learning_rate": 7.138944473915992e-06, "loss": 0.4658, "step": 22320 }, { "epoch": 0.4235744906862931, "grad_norm": 3.957694445797216, "learning_rate": 7.135951531054741e-06, "loss": 0.4497, "step": 22330 }, { "epoch": 0.4237641792177245, "grad_norm": 1.350240105301941, "learning_rate": 7.132957651727706e-06, "loss": 0.4833, "step": 22340 }, { "epoch": 0.4239538677491559, "grad_norm": 1.618604116009303, "learning_rate": 7.129962837247496e-06, "loss": 0.4697, "step": 22350 }, { "epoch": 0.42414355628058725, "grad_norm": 1.4984145961757118, "learning_rate": 7.126967088927126e-06, "loss": 0.4521, "step": 22360 }, { "epoch": 0.4243332448120187, "grad_norm": 1.1850028951086862, "learning_rate": 7.123970408080021e-06, "loss": 0.4673, "step": 22370 }, { "epoch": 0.42452293334345004, "grad_norm": 1.2922374662134597, "learning_rate": 7.120972796020021e-06, "loss": 0.4599, "step": 22380 }, { "epoch": 0.42471262187488146, "grad_norm": 1.3708394104552848, "learning_rate": 7.117974254061369e-06, "loss": 0.4727, "step": 22390 }, { "epoch": 0.4249023104063128, "grad_norm": 1.6680726743891596, "learning_rate": 7.114974783518715e-06, "loss": 0.4601, "step": 22400 }, { "epoch": 0.42509199893774424, "grad_norm": 1.3573265029621089, "learning_rate": 7.1119743857071145e-06, "loss": 0.4689, "step": 22410 }, { "epoch": 0.4252816874691756, "grad_norm": 1.3840417562866132, "learning_rate": 7.108973061942039e-06, "loss": 0.4852, "step": 22420 }, { "epoch": 0.425471376000607, "grad_norm": 1.4986368798256624, "learning_rate": 7.105970813539358e-06, "loss": 0.4619, "step": 22430 }, { "epoch": 0.4256610645320384, "grad_norm": 1.6219219941942065, "learning_rate": 7.102967641815345e-06, "loss": 0.4546, "step": 22440 }, { "epoch": 0.42585075306346976, "grad_norm": 1.4056675762755988, "learning_rate": 7.099963548086684e-06, "loss": 0.445, "step": 22450 }, { "epoch": 0.4260404415949012, "grad_norm": 1.3463728108000883, "learning_rate": 7.096958533670458e-06, "loss": 0.4561, "step": 22460 }, { "epoch": 0.42623013012633254, "grad_norm": 1.4838227566882134, "learning_rate": 7.09395259988416e-06, "loss": 0.4534, "step": 22470 }, { "epoch": 0.42641981865776396, "grad_norm": 1.475668825087277, "learning_rate": 7.090945748045681e-06, "loss": 0.4752, "step": 22480 }, { "epoch": 0.4266095071891953, "grad_norm": 3.24971694199291, "learning_rate": 7.087937979473314e-06, "loss": 0.4743, "step": 22490 }, { "epoch": 0.42679919572062675, "grad_norm": 1.5976592735074746, "learning_rate": 7.084929295485756e-06, "loss": 0.4542, "step": 22500 }, { "epoch": 0.4269888842520581, "grad_norm": 1.4806877610957088, "learning_rate": 7.081919697402109e-06, "loss": 0.4194, "step": 22510 }, { "epoch": 0.42717857278348953, "grad_norm": 1.641495035328654, "learning_rate": 7.078909186541867e-06, "loss": 0.4675, "step": 22520 }, { "epoch": 0.4273682613149209, "grad_norm": 1.701564016580942, "learning_rate": 7.0758977642249315e-06, "loss": 0.459, "step": 22530 }, { "epoch": 0.4275579498463523, "grad_norm": 1.4091752690437949, "learning_rate": 7.072885431771599e-06, "loss": 0.4595, "step": 22540 }, { "epoch": 0.4277476383777837, "grad_norm": 1.4035429098154237, "learning_rate": 7.069872190502572e-06, "loss": 0.5027, "step": 22550 }, { "epoch": 0.42793732690921504, "grad_norm": 1.6350459478524755, "learning_rate": 7.066858041738942e-06, "loss": 0.4901, "step": 22560 }, { "epoch": 0.42812701544064646, "grad_norm": 1.4795125156352742, "learning_rate": 7.063842986802204e-06, "loss": 0.4544, "step": 22570 }, { "epoch": 0.42831670397207783, "grad_norm": 1.364464519701592, "learning_rate": 7.06082702701425e-06, "loss": 0.4737, "step": 22580 }, { "epoch": 0.42850639250350925, "grad_norm": 1.6074854470692228, "learning_rate": 7.057810163697368e-06, "loss": 0.4327, "step": 22590 }, { "epoch": 0.4286960810349406, "grad_norm": 1.5809774905508156, "learning_rate": 7.054792398174243e-06, "loss": 0.5003, "step": 22600 }, { "epoch": 0.42888576956637203, "grad_norm": 1.5009776171365636, "learning_rate": 7.0517737317679545e-06, "loss": 0.5029, "step": 22610 }, { "epoch": 0.4290754580978034, "grad_norm": 1.4617598985324418, "learning_rate": 7.048754165801974e-06, "loss": 0.4917, "step": 22620 }, { "epoch": 0.4292651466292348, "grad_norm": 1.538777829583372, "learning_rate": 7.045733701600175e-06, "loss": 0.4782, "step": 22630 }, { "epoch": 0.4294548351606662, "grad_norm": 1.7454077910692405, "learning_rate": 7.0427123404868184e-06, "loss": 0.5056, "step": 22640 }, { "epoch": 0.4296445236920976, "grad_norm": 1.5102421217815776, "learning_rate": 7.039690083786559e-06, "loss": 0.5086, "step": 22650 }, { "epoch": 0.42983421222352897, "grad_norm": 1.2087992388752373, "learning_rate": 7.036666932824448e-06, "loss": 0.4256, "step": 22660 }, { "epoch": 0.43002390075496033, "grad_norm": 1.2803636772737905, "learning_rate": 7.033642888925925e-06, "loss": 0.5026, "step": 22670 }, { "epoch": 0.43021358928639175, "grad_norm": 1.4596926622191817, "learning_rate": 7.030617953416823e-06, "loss": 0.4851, "step": 22680 }, { "epoch": 0.4304032778178231, "grad_norm": 1.3224561462327689, "learning_rate": 7.0275921276233665e-06, "loss": 0.4453, "step": 22690 }, { "epoch": 0.43059296634925454, "grad_norm": 1.4934161446378613, "learning_rate": 7.024565412872164e-06, "loss": 0.4725, "step": 22700 }, { "epoch": 0.4307826548806859, "grad_norm": 1.4253017029459445, "learning_rate": 7.0215378104902256e-06, "loss": 0.4712, "step": 22710 }, { "epoch": 0.4309723434121173, "grad_norm": 1.7383697705302716, "learning_rate": 7.018509321804938e-06, "loss": 0.4656, "step": 22720 }, { "epoch": 0.4311620319435487, "grad_norm": 1.2837366606018543, "learning_rate": 7.015479948144086e-06, "loss": 0.4375, "step": 22730 }, { "epoch": 0.4313517204749801, "grad_norm": 1.3607439945491913, "learning_rate": 7.012449690835839e-06, "loss": 0.4687, "step": 22740 }, { "epoch": 0.43154140900641147, "grad_norm": 1.3738135620639507, "learning_rate": 7.009418551208749e-06, "loss": 0.4777, "step": 22750 }, { "epoch": 0.4317310975378429, "grad_norm": 1.599666569780175, "learning_rate": 7.006386530591765e-06, "loss": 0.4715, "step": 22760 }, { "epoch": 0.43192078606927425, "grad_norm": 1.2776093791687353, "learning_rate": 7.003353630314211e-06, "loss": 0.4409, "step": 22770 }, { "epoch": 0.4321104746007056, "grad_norm": 1.4223596453164782, "learning_rate": 7.000319851705807e-06, "loss": 0.4469, "step": 22780 }, { "epoch": 0.43230016313213704, "grad_norm": 1.3238003636054574, "learning_rate": 6.997285196096651e-06, "loss": 0.4629, "step": 22790 }, { "epoch": 0.4324898516635684, "grad_norm": 1.4274547054770959, "learning_rate": 6.994249664817227e-06, "loss": 0.469, "step": 22800 }, { "epoch": 0.4326795401949998, "grad_norm": 1.341091878915102, "learning_rate": 6.991213259198408e-06, "loss": 0.4728, "step": 22810 }, { "epoch": 0.4328692287264312, "grad_norm": 1.604000615516371, "learning_rate": 6.988175980571442e-06, "loss": 0.4577, "step": 22820 }, { "epoch": 0.4330589172578626, "grad_norm": 1.3651071944073125, "learning_rate": 6.985137830267964e-06, "loss": 0.469, "step": 22830 }, { "epoch": 0.433248605789294, "grad_norm": 1.3182232348223342, "learning_rate": 6.982098809619994e-06, "loss": 0.439, "step": 22840 }, { "epoch": 0.4334382943207254, "grad_norm": 1.5079647760055412, "learning_rate": 6.979058919959927e-06, "loss": 0.4727, "step": 22850 }, { "epoch": 0.43362798285215676, "grad_norm": 1.2730070643829001, "learning_rate": 6.976018162620545e-06, "loss": 0.4826, "step": 22860 }, { "epoch": 0.4338176713835881, "grad_norm": 1.4357594055696241, "learning_rate": 6.972976538935007e-06, "loss": 0.4636, "step": 22870 }, { "epoch": 0.43400735991501954, "grad_norm": 1.2163826282250678, "learning_rate": 6.969934050236853e-06, "loss": 0.4396, "step": 22880 }, { "epoch": 0.4341970484464509, "grad_norm": 1.1631814269489045, "learning_rate": 6.966890697860005e-06, "loss": 0.4533, "step": 22890 }, { "epoch": 0.4343867369778823, "grad_norm": 1.4660013555075333, "learning_rate": 6.963846483138755e-06, "loss": 0.4676, "step": 22900 }, { "epoch": 0.4345764255093137, "grad_norm": 1.7581691140787772, "learning_rate": 6.960801407407782e-06, "loss": 0.4685, "step": 22910 }, { "epoch": 0.4347661140407451, "grad_norm": 1.6669231030082896, "learning_rate": 6.957755472002141e-06, "loss": 0.475, "step": 22920 }, { "epoch": 0.4349558025721765, "grad_norm": 1.6181910023377417, "learning_rate": 6.954708678257259e-06, "loss": 0.4698, "step": 22930 }, { "epoch": 0.4351454911036079, "grad_norm": 1.18787368644545, "learning_rate": 6.951661027508946e-06, "loss": 0.4781, "step": 22940 }, { "epoch": 0.43533517963503926, "grad_norm": 1.5398017248261806, "learning_rate": 6.948612521093379e-06, "loss": 0.4899, "step": 22950 }, { "epoch": 0.4355248681664707, "grad_norm": 1.2679622775688733, "learning_rate": 6.945563160347122e-06, "loss": 0.4601, "step": 22960 }, { "epoch": 0.43571455669790204, "grad_norm": 1.895691511224617, "learning_rate": 6.942512946607103e-06, "loss": 0.4761, "step": 22970 }, { "epoch": 0.4359042452293334, "grad_norm": 1.2318203085190649, "learning_rate": 6.9394618812106295e-06, "loss": 0.4503, "step": 22980 }, { "epoch": 0.43609393376076483, "grad_norm": 1.4621035061401848, "learning_rate": 6.9364099654953786e-06, "loss": 0.4597, "step": 22990 }, { "epoch": 0.4362836222921962, "grad_norm": 1.2407067753122867, "learning_rate": 6.933357200799405e-06, "loss": 0.4687, "step": 23000 }, { "epoch": 0.4364733108236276, "grad_norm": 1.3111231718469354, "learning_rate": 6.930303588461133e-06, "loss": 0.4726, "step": 23010 }, { "epoch": 0.436662999355059, "grad_norm": 1.186819442952292, "learning_rate": 6.927249129819358e-06, "loss": 0.4727, "step": 23020 }, { "epoch": 0.4368526878864904, "grad_norm": 1.2104822262581285, "learning_rate": 6.924193826213246e-06, "loss": 0.4711, "step": 23030 }, { "epoch": 0.43704237641792176, "grad_norm": 1.5399315069466548, "learning_rate": 6.921137678982336e-06, "loss": 0.4595, "step": 23040 }, { "epoch": 0.4372320649493532, "grad_norm": 1.7190327330780986, "learning_rate": 6.918080689466535e-06, "loss": 0.4737, "step": 23050 }, { "epoch": 0.43742175348078455, "grad_norm": 1.6814420875739047, "learning_rate": 6.91502285900612e-06, "loss": 0.4684, "step": 23060 }, { "epoch": 0.43761144201221597, "grad_norm": 1.4355709363246398, "learning_rate": 6.9119641889417365e-06, "loss": 0.4902, "step": 23070 }, { "epoch": 0.43780113054364733, "grad_norm": 1.3644157639138121, "learning_rate": 6.908904680614396e-06, "loss": 0.4704, "step": 23080 }, { "epoch": 0.4379908190750787, "grad_norm": 1.483931034966368, "learning_rate": 6.905844335365483e-06, "loss": 0.5025, "step": 23090 }, { "epoch": 0.4381805076065101, "grad_norm": 1.629851099830194, "learning_rate": 6.902783154536743e-06, "loss": 0.4619, "step": 23100 }, { "epoch": 0.4383701961379415, "grad_norm": 1.3506766993840313, "learning_rate": 6.89972113947029e-06, "loss": 0.4485, "step": 23110 }, { "epoch": 0.4385598846693729, "grad_norm": 1.3960859449243772, "learning_rate": 6.896658291508607e-06, "loss": 0.4669, "step": 23120 }, { "epoch": 0.43874957320080427, "grad_norm": 1.3581043527877055, "learning_rate": 6.893594611994535e-06, "loss": 0.4889, "step": 23130 }, { "epoch": 0.4389392617322357, "grad_norm": 1.7795307755756522, "learning_rate": 6.8905301022712865e-06, "loss": 0.4839, "step": 23140 }, { "epoch": 0.43912895026366705, "grad_norm": 1.4096361512780238, "learning_rate": 6.8874647636824365e-06, "loss": 0.4755, "step": 23150 }, { "epoch": 0.43931863879509847, "grad_norm": 1.599494716764829, "learning_rate": 6.884398597571919e-06, "loss": 0.4648, "step": 23160 }, { "epoch": 0.43950832732652984, "grad_norm": 1.182546071131612, "learning_rate": 6.881331605284036e-06, "loss": 0.4508, "step": 23170 }, { "epoch": 0.43969801585796126, "grad_norm": 1.4814559037572044, "learning_rate": 6.87826378816345e-06, "loss": 0.4443, "step": 23180 }, { "epoch": 0.4398877043893926, "grad_norm": 1.3389491625225938, "learning_rate": 6.875195147555183e-06, "loss": 0.4312, "step": 23190 }, { "epoch": 0.440077392920824, "grad_norm": 1.3771593907577413, "learning_rate": 6.8721256848046205e-06, "loss": 0.4531, "step": 23200 }, { "epoch": 0.4402670814522554, "grad_norm": 1.2357361029488845, "learning_rate": 6.869055401257509e-06, "loss": 0.4675, "step": 23210 }, { "epoch": 0.44045676998368677, "grad_norm": 1.4766266510526151, "learning_rate": 6.865984298259953e-06, "loss": 0.4878, "step": 23220 }, { "epoch": 0.4406464585151182, "grad_norm": 1.319061584946929, "learning_rate": 6.862912377158418e-06, "loss": 0.4799, "step": 23230 }, { "epoch": 0.44083614704654955, "grad_norm": 1.8478328652886462, "learning_rate": 6.8598396392997246e-06, "loss": 0.4676, "step": 23240 }, { "epoch": 0.441025835577981, "grad_norm": 1.4802914523742523, "learning_rate": 6.856766086031057e-06, "loss": 0.4797, "step": 23250 }, { "epoch": 0.44121552410941234, "grad_norm": 1.5027789433585002, "learning_rate": 6.8536917186999515e-06, "loss": 0.4408, "step": 23260 }, { "epoch": 0.44140521264084376, "grad_norm": 1.3566731247735637, "learning_rate": 6.850616538654306e-06, "loss": 0.466, "step": 23270 }, { "epoch": 0.4415949011722751, "grad_norm": 1.4544652995563199, "learning_rate": 6.847540547242371e-06, "loss": 0.4943, "step": 23280 }, { "epoch": 0.4417845897037065, "grad_norm": 1.355837882540347, "learning_rate": 6.844463745812754e-06, "loss": 0.4665, "step": 23290 }, { "epoch": 0.4419742782351379, "grad_norm": 1.311374477864285, "learning_rate": 6.841386135714419e-06, "loss": 0.428, "step": 23300 }, { "epoch": 0.44216396676656927, "grad_norm": 1.283755579124715, "learning_rate": 6.838307718296683e-06, "loss": 0.4509, "step": 23310 }, { "epoch": 0.4423536552980007, "grad_norm": 1.2432252758193971, "learning_rate": 6.835228494909217e-06, "loss": 0.4633, "step": 23320 }, { "epoch": 0.44254334382943206, "grad_norm": 1.2244714736993014, "learning_rate": 6.832148466902046e-06, "loss": 0.4331, "step": 23330 }, { "epoch": 0.4427330323608635, "grad_norm": 1.437328952430779, "learning_rate": 6.829067635625546e-06, "loss": 0.4716, "step": 23340 }, { "epoch": 0.44292272089229484, "grad_norm": 1.7791925612410686, "learning_rate": 6.8259860024304505e-06, "loss": 0.4831, "step": 23350 }, { "epoch": 0.44311240942372626, "grad_norm": 1.6673335449615234, "learning_rate": 6.822903568667836e-06, "loss": 0.4584, "step": 23360 }, { "epoch": 0.4433020979551576, "grad_norm": 1.4640644103037275, "learning_rate": 6.819820335689138e-06, "loss": 0.4812, "step": 23370 }, { "epoch": 0.44349178648658905, "grad_norm": 1.3620243725611598, "learning_rate": 6.8167363048461375e-06, "loss": 0.474, "step": 23380 }, { "epoch": 0.4436814750180204, "grad_norm": 1.2832430434749775, "learning_rate": 6.813651477490968e-06, "loss": 0.4723, "step": 23390 }, { "epoch": 0.4438711635494518, "grad_norm": 1.3560813549889852, "learning_rate": 6.810565854976112e-06, "loss": 0.4505, "step": 23400 }, { "epoch": 0.4440608520808832, "grad_norm": 1.6120051127680464, "learning_rate": 6.807479438654396e-06, "loss": 0.4843, "step": 23410 }, { "epoch": 0.44425054061231456, "grad_norm": 1.3427244532628766, "learning_rate": 6.804392229879002e-06, "loss": 0.471, "step": 23420 }, { "epoch": 0.444440229143746, "grad_norm": 1.434954052413221, "learning_rate": 6.801304230003455e-06, "loss": 0.4729, "step": 23430 }, { "epoch": 0.44462991767517734, "grad_norm": 1.4023038620809074, "learning_rate": 6.798215440381627e-06, "loss": 0.4843, "step": 23440 }, { "epoch": 0.44481960620660876, "grad_norm": 1.409989342616447, "learning_rate": 6.795125862367736e-06, "loss": 0.4802, "step": 23450 }, { "epoch": 0.44500929473804013, "grad_norm": 1.396027278729306, "learning_rate": 6.792035497316348e-06, "loss": 0.4709, "step": 23460 }, { "epoch": 0.44519898326947155, "grad_norm": 1.695840177300891, "learning_rate": 6.7889443465823725e-06, "loss": 0.4721, "step": 23470 }, { "epoch": 0.4453886718009029, "grad_norm": 1.4923012342233621, "learning_rate": 6.785852411521063e-06, "loss": 0.451, "step": 23480 }, { "epoch": 0.44557836033233433, "grad_norm": 1.4292136627036451, "learning_rate": 6.782759693488017e-06, "loss": 0.4463, "step": 23490 }, { "epoch": 0.4457680488637657, "grad_norm": 1.4002450991370206, "learning_rate": 6.779666193839176e-06, "loss": 0.4777, "step": 23500 }, { "epoch": 0.44595773739519706, "grad_norm": 1.6491166756877842, "learning_rate": 6.776571913930825e-06, "loss": 0.4709, "step": 23510 }, { "epoch": 0.4461474259266285, "grad_norm": 1.5446616486771496, "learning_rate": 6.7734768551195875e-06, "loss": 0.4537, "step": 23520 }, { "epoch": 0.44633711445805985, "grad_norm": 1.4851516888966634, "learning_rate": 6.770381018762433e-06, "loss": 0.5013, "step": 23530 }, { "epoch": 0.44652680298949127, "grad_norm": 1.4772055556907633, "learning_rate": 6.76728440621667e-06, "loss": 0.4796, "step": 23540 }, { "epoch": 0.44671649152092263, "grad_norm": 1.5254759019616073, "learning_rate": 6.764187018839945e-06, "loss": 0.4879, "step": 23550 }, { "epoch": 0.44690618005235405, "grad_norm": 1.7795040665255484, "learning_rate": 6.761088857990249e-06, "loss": 0.4828, "step": 23560 }, { "epoch": 0.4470958685837854, "grad_norm": 1.4083020554696384, "learning_rate": 6.757989925025905e-06, "loss": 0.459, "step": 23570 }, { "epoch": 0.44728555711521684, "grad_norm": 1.565151563419642, "learning_rate": 6.754890221305585e-06, "loss": 0.4718, "step": 23580 }, { "epoch": 0.4474752456466482, "grad_norm": 1.1802702204534774, "learning_rate": 6.75178974818829e-06, "loss": 0.4595, "step": 23590 }, { "epoch": 0.4476649341780796, "grad_norm": 1.336178955651597, "learning_rate": 6.748688507033361e-06, "loss": 0.4371, "step": 23600 }, { "epoch": 0.447854622709511, "grad_norm": 1.435366838305594, "learning_rate": 6.745586499200476e-06, "loss": 0.4843, "step": 23610 }, { "epoch": 0.44804431124094235, "grad_norm": 1.284672874119895, "learning_rate": 6.742483726049649e-06, "loss": 0.4925, "step": 23620 }, { "epoch": 0.44823399977237377, "grad_norm": 1.318791514090566, "learning_rate": 6.7393801889412306e-06, "loss": 0.476, "step": 23630 }, { "epoch": 0.44842368830380513, "grad_norm": 1.6213551541770779, "learning_rate": 6.736275889235904e-06, "loss": 0.4804, "step": 23640 }, { "epoch": 0.44861337683523655, "grad_norm": 1.4159347725758635, "learning_rate": 6.7331708282946885e-06, "loss": 0.469, "step": 23650 }, { "epoch": 0.4488030653666679, "grad_norm": 1.3749863721279152, "learning_rate": 6.730065007478938e-06, "loss": 0.4442, "step": 23660 }, { "epoch": 0.44899275389809934, "grad_norm": 1.4220054475891801, "learning_rate": 6.726958428150336e-06, "loss": 0.4556, "step": 23670 }, { "epoch": 0.4491824424295307, "grad_norm": 1.6208434443951112, "learning_rate": 6.723851091670906e-06, "loss": 0.4688, "step": 23680 }, { "epoch": 0.4493721309609621, "grad_norm": 1.6157808985984172, "learning_rate": 6.720742999402991e-06, "loss": 0.4733, "step": 23690 }, { "epoch": 0.4495618194923935, "grad_norm": 1.493731927543378, "learning_rate": 6.717634152709277e-06, "loss": 0.4695, "step": 23700 }, { "epoch": 0.44975150802382485, "grad_norm": 1.3621144940964185, "learning_rate": 6.714524552952777e-06, "loss": 0.4707, "step": 23710 }, { "epoch": 0.4499411965552563, "grad_norm": 1.4100516791169588, "learning_rate": 6.711414201496832e-06, "loss": 0.4657, "step": 23720 }, { "epoch": 0.45013088508668764, "grad_norm": 1.4720525885387037, "learning_rate": 6.708303099705114e-06, "loss": 0.4916, "step": 23730 }, { "epoch": 0.45032057361811906, "grad_norm": 1.311449480050586, "learning_rate": 6.705191248941625e-06, "loss": 0.4526, "step": 23740 }, { "epoch": 0.4505102621495504, "grad_norm": 1.4845001669605962, "learning_rate": 6.702078650570696e-06, "loss": 0.4981, "step": 23750 }, { "epoch": 0.45069995068098184, "grad_norm": 1.6141481154528219, "learning_rate": 6.698965305956982e-06, "loss": 0.481, "step": 23760 }, { "epoch": 0.4508896392124132, "grad_norm": 1.4033643807429128, "learning_rate": 6.695851216465466e-06, "loss": 0.4722, "step": 23770 }, { "epoch": 0.4510793277438446, "grad_norm": 1.2629664632915227, "learning_rate": 6.692736383461463e-06, "loss": 0.4585, "step": 23780 }, { "epoch": 0.451269016275276, "grad_norm": 1.2750353199371487, "learning_rate": 6.689620808310608e-06, "loss": 0.4919, "step": 23790 }, { "epoch": 0.4514587048067074, "grad_norm": 1.5929504658856541, "learning_rate": 6.686504492378864e-06, "loss": 0.4771, "step": 23800 }, { "epoch": 0.4516483933381388, "grad_norm": 1.3327252097728712, "learning_rate": 6.683387437032518e-06, "loss": 0.4506, "step": 23810 }, { "epoch": 0.45183808186957014, "grad_norm": 1.3956901305466776, "learning_rate": 6.6802696436381795e-06, "loss": 0.4543, "step": 23820 }, { "epoch": 0.45202777040100156, "grad_norm": 1.6171538242632064, "learning_rate": 6.6771511135627875e-06, "loss": 0.4918, "step": 23830 }, { "epoch": 0.4522174589324329, "grad_norm": 1.5760859959363376, "learning_rate": 6.674031848173598e-06, "loss": 0.4632, "step": 23840 }, { "epoch": 0.45240714746386435, "grad_norm": 1.6446232512524035, "learning_rate": 6.67091184883819e-06, "loss": 0.4998, "step": 23850 }, { "epoch": 0.4525968359952957, "grad_norm": 1.4138535027352983, "learning_rate": 6.667791116924466e-06, "loss": 0.4488, "step": 23860 }, { "epoch": 0.45278652452672713, "grad_norm": 1.191753546950133, "learning_rate": 6.664669653800651e-06, "loss": 0.4642, "step": 23870 }, { "epoch": 0.4529762130581585, "grad_norm": 1.4387987575996508, "learning_rate": 6.6615474608352874e-06, "loss": 0.4759, "step": 23880 }, { "epoch": 0.4531659015895899, "grad_norm": 1.4075201150581869, "learning_rate": 6.658424539397239e-06, "loss": 0.4948, "step": 23890 }, { "epoch": 0.4533555901210213, "grad_norm": 1.5671485370313383, "learning_rate": 6.65530089085569e-06, "loss": 0.4904, "step": 23900 }, { "epoch": 0.4535452786524527, "grad_norm": 1.519599241830159, "learning_rate": 6.6521765165801426e-06, "loss": 0.4848, "step": 23910 }, { "epoch": 0.45373496718388406, "grad_norm": 1.4559492219771768, "learning_rate": 6.649051417940416e-06, "loss": 0.4831, "step": 23920 }, { "epoch": 0.45392465571531543, "grad_norm": 1.6306476752263117, "learning_rate": 6.645925596306647e-06, "loss": 0.4717, "step": 23930 }, { "epoch": 0.45411434424674685, "grad_norm": 1.2067456336663314, "learning_rate": 6.642799053049294e-06, "loss": 0.4784, "step": 23940 }, { "epoch": 0.4543040327781782, "grad_norm": 1.72220586079825, "learning_rate": 6.639671789539124e-06, "loss": 0.4546, "step": 23950 }, { "epoch": 0.45449372130960963, "grad_norm": 1.1418520390734441, "learning_rate": 6.636543807147226e-06, "loss": 0.4559, "step": 23960 }, { "epoch": 0.454683409841041, "grad_norm": 1.4045076713382356, "learning_rate": 6.633415107245005e-06, "loss": 0.4918, "step": 23970 }, { "epoch": 0.4548730983724724, "grad_norm": 1.4009445961492184, "learning_rate": 6.630285691204171e-06, "loss": 0.4659, "step": 23980 }, { "epoch": 0.4550627869039038, "grad_norm": 1.573178356222765, "learning_rate": 6.627155560396762e-06, "loss": 0.4474, "step": 23990 }, { "epoch": 0.4552524754353352, "grad_norm": 1.5311792865755578, "learning_rate": 6.6240247161951165e-06, "loss": 0.4469, "step": 24000 }, { "epoch": 0.45544216396676657, "grad_norm": 1.1600008853123855, "learning_rate": 6.620893159971894e-06, "loss": 0.4307, "step": 24010 }, { "epoch": 0.45563185249819793, "grad_norm": 1.6282641695327826, "learning_rate": 6.617760893100063e-06, "loss": 0.433, "step": 24020 }, { "epoch": 0.45582154102962935, "grad_norm": 1.4107096354924447, "learning_rate": 6.614627916952903e-06, "loss": 0.463, "step": 24030 }, { "epoch": 0.4560112295610607, "grad_norm": 1.2502055600573094, "learning_rate": 6.611494232904007e-06, "loss": 0.4654, "step": 24040 }, { "epoch": 0.45620091809249214, "grad_norm": 1.4294332006418269, "learning_rate": 6.6083598423272765e-06, "loss": 0.4582, "step": 24050 }, { "epoch": 0.4563906066239235, "grad_norm": 1.423291622532806, "learning_rate": 6.605224746596923e-06, "loss": 0.462, "step": 24060 }, { "epoch": 0.4565802951553549, "grad_norm": 1.3113965964208163, "learning_rate": 6.6020889470874685e-06, "loss": 0.4739, "step": 24070 }, { "epoch": 0.4567699836867863, "grad_norm": 1.6753196417107716, "learning_rate": 6.59895244517374e-06, "loss": 0.4617, "step": 24080 }, { "epoch": 0.4569596722182177, "grad_norm": 1.533367151635791, "learning_rate": 6.595815242230877e-06, "loss": 0.4473, "step": 24090 }, { "epoch": 0.45714936074964907, "grad_norm": 1.4988471403596373, "learning_rate": 6.592677339634323e-06, "loss": 0.4771, "step": 24100 }, { "epoch": 0.4573390492810805, "grad_norm": 3.119341763295953, "learning_rate": 6.589538738759828e-06, "loss": 0.4636, "step": 24110 }, { "epoch": 0.45752873781251185, "grad_norm": 1.5000810185325821, "learning_rate": 6.586399440983454e-06, "loss": 0.4635, "step": 24120 }, { "epoch": 0.4577184263439432, "grad_norm": 1.3718117648382617, "learning_rate": 6.58325944768156e-06, "loss": 0.443, "step": 24130 }, { "epoch": 0.45790811487537464, "grad_norm": 1.6602576511781555, "learning_rate": 6.580118760230817e-06, "loss": 0.4668, "step": 24140 }, { "epoch": 0.458097803406806, "grad_norm": 1.6514789395782896, "learning_rate": 6.576977380008195e-06, "loss": 0.4795, "step": 24150 }, { "epoch": 0.4582874919382374, "grad_norm": 1.2399769290415144, "learning_rate": 6.573835308390971e-06, "loss": 0.4648, "step": 24160 }, { "epoch": 0.4584771804696688, "grad_norm": 1.4774278669024439, "learning_rate": 6.570692546756726e-06, "loss": 0.4733, "step": 24170 }, { "epoch": 0.4586668690011002, "grad_norm": 1.3912831187828205, "learning_rate": 6.567549096483339e-06, "loss": 0.4879, "step": 24180 }, { "epoch": 0.4588565575325316, "grad_norm": 1.297134858888106, "learning_rate": 6.564404958948995e-06, "loss": 0.4781, "step": 24190 }, { "epoch": 0.459046246063963, "grad_norm": 1.4404096639015183, "learning_rate": 6.561260135532179e-06, "loss": 0.474, "step": 24200 }, { "epoch": 0.45923593459539436, "grad_norm": 1.5517142409388074, "learning_rate": 6.558114627611677e-06, "loss": 0.4406, "step": 24210 }, { "epoch": 0.4594256231268258, "grad_norm": 1.472339148137617, "learning_rate": 6.5549684365665755e-06, "loss": 0.4775, "step": 24220 }, { "epoch": 0.45961531165825714, "grad_norm": 1.3970412595671904, "learning_rate": 6.5518215637762575e-06, "loss": 0.4507, "step": 24230 }, { "epoch": 0.4598050001896885, "grad_norm": 2.0499949846061187, "learning_rate": 6.5486740106204085e-06, "loss": 0.4773, "step": 24240 }, { "epoch": 0.4599946887211199, "grad_norm": 1.495134047171575, "learning_rate": 6.545525778479013e-06, "loss": 0.4728, "step": 24250 }, { "epoch": 0.4601843772525513, "grad_norm": 19.137001739901912, "learning_rate": 6.542376868732349e-06, "loss": 0.4624, "step": 24260 }, { "epoch": 0.4603740657839827, "grad_norm": 1.2575164857363268, "learning_rate": 6.539227282760993e-06, "loss": 0.4616, "step": 24270 }, { "epoch": 0.4605637543154141, "grad_norm": 1.3960596434063035, "learning_rate": 6.536077021945821e-06, "loss": 0.4404, "step": 24280 }, { "epoch": 0.4607534428468455, "grad_norm": 1.4635716698365577, "learning_rate": 6.5329260876680016e-06, "loss": 0.4622, "step": 24290 }, { "epoch": 0.46094313137827686, "grad_norm": 1.4865414174318485, "learning_rate": 6.529774481309e-06, "loss": 0.4555, "step": 24300 }, { "epoch": 0.4611328199097083, "grad_norm": 1.0687977187109017, "learning_rate": 6.526622204250574e-06, "loss": 0.4554, "step": 24310 }, { "epoch": 0.46132250844113964, "grad_norm": 1.484557144774892, "learning_rate": 6.523469257874783e-06, "loss": 0.4506, "step": 24320 }, { "epoch": 0.46151219697257106, "grad_norm": 1.3234164085899462, "learning_rate": 6.5203156435639666e-06, "loss": 0.4209, "step": 24330 }, { "epoch": 0.46170188550400243, "grad_norm": 1.3856031535810673, "learning_rate": 6.51716136270077e-06, "loss": 0.4858, "step": 24340 }, { "epoch": 0.4618915740354338, "grad_norm": 1.3614706712090938, "learning_rate": 6.514006416668124e-06, "loss": 0.4526, "step": 24350 }, { "epoch": 0.4620812625668652, "grad_norm": 1.420933160868267, "learning_rate": 6.510850806849249e-06, "loss": 0.4523, "step": 24360 }, { "epoch": 0.4622709510982966, "grad_norm": 1.3767365493390595, "learning_rate": 6.507694534627667e-06, "loss": 0.4671, "step": 24370 }, { "epoch": 0.462460639629728, "grad_norm": 1.438242785700753, "learning_rate": 6.504537601387175e-06, "loss": 0.4523, "step": 24380 }, { "epoch": 0.46265032816115936, "grad_norm": 1.695134584776196, "learning_rate": 6.5013800085118735e-06, "loss": 0.4755, "step": 24390 }, { "epoch": 0.4628400166925908, "grad_norm": 1.391619204982393, "learning_rate": 6.498221757386146e-06, "loss": 0.4565, "step": 24400 }, { "epoch": 0.46302970522402215, "grad_norm": 1.5793069990177286, "learning_rate": 6.495062849394664e-06, "loss": 0.4453, "step": 24410 }, { "epoch": 0.46321939375545357, "grad_norm": 1.4309094990665723, "learning_rate": 6.49190328592239e-06, "loss": 0.4357, "step": 24420 }, { "epoch": 0.46340908228688493, "grad_norm": 1.5309187834773632, "learning_rate": 6.4887430683545716e-06, "loss": 0.4726, "step": 24430 }, { "epoch": 0.4635987708183163, "grad_norm": 1.189801958862558, "learning_rate": 6.485582198076742e-06, "loss": 0.4891, "step": 24440 }, { "epoch": 0.4637884593497477, "grad_norm": 1.480994634654949, "learning_rate": 6.4824206764747256e-06, "loss": 0.4759, "step": 24450 }, { "epoch": 0.4639781478811791, "grad_norm": 1.8421987231098171, "learning_rate": 6.479258504934626e-06, "loss": 0.4865, "step": 24460 }, { "epoch": 0.4641678364126105, "grad_norm": 1.356010534520551, "learning_rate": 6.476095684842837e-06, "loss": 0.4388, "step": 24470 }, { "epoch": 0.46435752494404187, "grad_norm": 1.5033940922768387, "learning_rate": 6.472932217586033e-06, "loss": 0.4464, "step": 24480 }, { "epoch": 0.4645472134754733, "grad_norm": 1.4493271355020925, "learning_rate": 6.469768104551176e-06, "loss": 0.4764, "step": 24490 }, { "epoch": 0.46473690200690465, "grad_norm": 1.537604886330132, "learning_rate": 6.466603347125509e-06, "loss": 0.4694, "step": 24500 }, { "epoch": 0.46492659053833607, "grad_norm": 1.6486963880963847, "learning_rate": 6.463437946696553e-06, "loss": 0.4419, "step": 24510 }, { "epoch": 0.46511627906976744, "grad_norm": 2.4405400478060937, "learning_rate": 6.460271904652119e-06, "loss": 0.4529, "step": 24520 }, { "epoch": 0.46530596760119886, "grad_norm": 1.326806878418735, "learning_rate": 6.457105222380297e-06, "loss": 0.4861, "step": 24530 }, { "epoch": 0.4654956561326302, "grad_norm": 1.4267799689012295, "learning_rate": 6.453937901269452e-06, "loss": 0.4696, "step": 24540 }, { "epoch": 0.4656853446640616, "grad_norm": 1.542798433369737, "learning_rate": 6.450769942708237e-06, "loss": 0.4749, "step": 24550 }, { "epoch": 0.465875033195493, "grad_norm": 1.5158489568382523, "learning_rate": 6.447601348085579e-06, "loss": 0.4506, "step": 24560 }, { "epoch": 0.46606472172692437, "grad_norm": 1.5349723334203835, "learning_rate": 6.444432118790687e-06, "loss": 0.4803, "step": 24570 }, { "epoch": 0.4662544102583558, "grad_norm": 1.3091759146216249, "learning_rate": 6.4412622562130465e-06, "loss": 0.4827, "step": 24580 }, { "epoch": 0.46644409878978715, "grad_norm": 1.1421936133788533, "learning_rate": 6.438091761742421e-06, "loss": 0.4417, "step": 24590 }, { "epoch": 0.4666337873212186, "grad_norm": 1.6042472221709667, "learning_rate": 6.434920636768851e-06, "loss": 0.4631, "step": 24600 }, { "epoch": 0.46682347585264994, "grad_norm": 1.4354702789390064, "learning_rate": 6.431748882682655e-06, "loss": 0.434, "step": 24610 }, { "epoch": 0.46701316438408136, "grad_norm": 1.178364558553312, "learning_rate": 6.428576500874424e-06, "loss": 0.4478, "step": 24620 }, { "epoch": 0.4672028529155127, "grad_norm": 1.371945899505461, "learning_rate": 6.425403492735027e-06, "loss": 0.441, "step": 24630 }, { "epoch": 0.46739254144694414, "grad_norm": 1.6059471728163708, "learning_rate": 6.422229859655606e-06, "loss": 0.5029, "step": 24640 }, { "epoch": 0.4675822299783755, "grad_norm": 1.5261112602399818, "learning_rate": 6.419055603027582e-06, "loss": 0.4557, "step": 24650 }, { "epoch": 0.46777191850980687, "grad_norm": 1.2895489925637416, "learning_rate": 6.415880724242639e-06, "loss": 0.4642, "step": 24660 }, { "epoch": 0.4679616070412383, "grad_norm": 5.511228768547208, "learning_rate": 6.412705224692743e-06, "loss": 0.446, "step": 24670 }, { "epoch": 0.46815129557266966, "grad_norm": 1.480448239334362, "learning_rate": 6.40952910577013e-06, "loss": 0.4604, "step": 24680 }, { "epoch": 0.4683409841041011, "grad_norm": 1.4315002661632992, "learning_rate": 6.4063523688673035e-06, "loss": 0.476, "step": 24690 }, { "epoch": 0.46853067263553244, "grad_norm": 1.4748885874031383, "learning_rate": 6.4031750153770445e-06, "loss": 0.471, "step": 24700 }, { "epoch": 0.46872036116696386, "grad_norm": 1.4147732304933829, "learning_rate": 6.399997046692401e-06, "loss": 0.4464, "step": 24710 }, { "epoch": 0.4689100496983952, "grad_norm": 1.7502007224394909, "learning_rate": 6.396818464206687e-06, "loss": 0.4907, "step": 24720 }, { "epoch": 0.46909973822982665, "grad_norm": 1.4941562238048698, "learning_rate": 6.393639269313492e-06, "loss": 0.472, "step": 24730 }, { "epoch": 0.469289426761258, "grad_norm": 1.4018063484672585, "learning_rate": 6.39045946340667e-06, "loss": 0.4419, "step": 24740 }, { "epoch": 0.46947911529268943, "grad_norm": 1.520120407295168, "learning_rate": 6.387279047880345e-06, "loss": 0.4913, "step": 24750 }, { "epoch": 0.4696688038241208, "grad_norm": 1.4232023398273956, "learning_rate": 6.384098024128908e-06, "loss": 0.45, "step": 24760 }, { "epoch": 0.46985849235555216, "grad_norm": 1.5938454723137883, "learning_rate": 6.3809163935470145e-06, "loss": 0.4806, "step": 24770 }, { "epoch": 0.4700481808869836, "grad_norm": 1.304737664790739, "learning_rate": 6.37773415752959e-06, "loss": 0.454, "step": 24780 }, { "epoch": 0.47023786941841494, "grad_norm": 1.3487608154421868, "learning_rate": 6.374551317471818e-06, "loss": 0.45, "step": 24790 }, { "epoch": 0.47042755794984636, "grad_norm": 1.8167023002910148, "learning_rate": 6.371367874769157e-06, "loss": 0.4564, "step": 24800 }, { "epoch": 0.47061724648127773, "grad_norm": 1.523675904523999, "learning_rate": 6.368183830817323e-06, "loss": 0.4768, "step": 24810 }, { "epoch": 0.47080693501270915, "grad_norm": 1.071417362471939, "learning_rate": 6.364999187012297e-06, "loss": 0.4437, "step": 24820 }, { "epoch": 0.4709966235441405, "grad_norm": 1.592449171344647, "learning_rate": 6.361813944750323e-06, "loss": 0.4872, "step": 24830 }, { "epoch": 0.47118631207557193, "grad_norm": 1.9359911992637506, "learning_rate": 6.358628105427909e-06, "loss": 0.4587, "step": 24840 }, { "epoch": 0.4713760006070033, "grad_norm": 1.353198018624165, "learning_rate": 6.355441670441822e-06, "loss": 0.4755, "step": 24850 }, { "epoch": 0.47156568913843466, "grad_norm": 1.3991616151160742, "learning_rate": 6.352254641189092e-06, "loss": 0.4617, "step": 24860 }, { "epoch": 0.4717553776698661, "grad_norm": 1.8573146444727722, "learning_rate": 6.349067019067008e-06, "loss": 0.4399, "step": 24870 }, { "epoch": 0.47194506620129745, "grad_norm": 1.4999322662733856, "learning_rate": 6.345878805473121e-06, "loss": 0.429, "step": 24880 }, { "epoch": 0.47213475473272887, "grad_norm": 1.270712016767341, "learning_rate": 6.34269000180524e-06, "loss": 0.4651, "step": 24890 }, { "epoch": 0.47232444326416023, "grad_norm": 1.3382113113391638, "learning_rate": 6.339500609461433e-06, "loss": 0.5117, "step": 24900 }, { "epoch": 0.47251413179559165, "grad_norm": 1.3795045912520598, "learning_rate": 6.336310629840028e-06, "loss": 0.4382, "step": 24910 }, { "epoch": 0.472703820327023, "grad_norm": 1.2466081671209293, "learning_rate": 6.333120064339606e-06, "loss": 0.472, "step": 24920 }, { "epoch": 0.47289350885845444, "grad_norm": 1.556254796707311, "learning_rate": 6.329928914359007e-06, "loss": 0.4571, "step": 24930 }, { "epoch": 0.4730831973898858, "grad_norm": 1.4067718716185265, "learning_rate": 6.3267371812973314e-06, "loss": 0.468, "step": 24940 }, { "epoch": 0.4732728859213172, "grad_norm": 1.518356321625548, "learning_rate": 6.323544866553928e-06, "loss": 0.4751, "step": 24950 }, { "epoch": 0.4734625744527486, "grad_norm": 1.5022588545030426, "learning_rate": 6.3203519715284054e-06, "loss": 0.4636, "step": 24960 }, { "epoch": 0.47365226298417995, "grad_norm": 1.4501692306511373, "learning_rate": 6.317158497620624e-06, "loss": 0.4642, "step": 24970 }, { "epoch": 0.47384195151561137, "grad_norm": 1.378438103540124, "learning_rate": 6.313964446230703e-06, "loss": 0.4356, "step": 24980 }, { "epoch": 0.47403164004704273, "grad_norm": 1.3516093593720668, "learning_rate": 6.310769818759009e-06, "loss": 0.4882, "step": 24990 }, { "epoch": 0.47422132857847415, "grad_norm": 1.4043032501872548, "learning_rate": 6.307574616606163e-06, "loss": 0.4743, "step": 25000 }, { "epoch": 0.4744110171099055, "grad_norm": 1.3632761465853893, "learning_rate": 6.304378841173037e-06, "loss": 0.4756, "step": 25010 }, { "epoch": 0.47460070564133694, "grad_norm": 1.4991090285470796, "learning_rate": 6.301182493860758e-06, "loss": 0.4663, "step": 25020 }, { "epoch": 0.4747903941727683, "grad_norm": 1.339271950345401, "learning_rate": 6.297985576070701e-06, "loss": 0.4584, "step": 25030 }, { "epoch": 0.4749800827041997, "grad_norm": 1.3318392175931126, "learning_rate": 6.294788089204492e-06, "loss": 0.4786, "step": 25040 }, { "epoch": 0.4751697712356311, "grad_norm": 2.049915474622519, "learning_rate": 6.2915900346640045e-06, "loss": 0.4479, "step": 25050 }, { "epoch": 0.4753594597670625, "grad_norm": 1.3404696063682233, "learning_rate": 6.288391413851362e-06, "loss": 0.45, "step": 25060 }, { "epoch": 0.4755491482984939, "grad_norm": 1.4851839313456041, "learning_rate": 6.28519222816894e-06, "loss": 0.4787, "step": 25070 }, { "epoch": 0.47573883682992524, "grad_norm": 1.491271993320599, "learning_rate": 6.281992479019354e-06, "loss": 0.4535, "step": 25080 }, { "epoch": 0.47592852536135666, "grad_norm": 1.388232112851654, "learning_rate": 6.278792167805474e-06, "loss": 0.442, "step": 25090 }, { "epoch": 0.476118213892788, "grad_norm": 1.5678774945727105, "learning_rate": 6.27559129593041e-06, "loss": 0.4552, "step": 25100 }, { "epoch": 0.47630790242421944, "grad_norm": 1.4511978404876564, "learning_rate": 6.2723898647975255e-06, "loss": 0.4602, "step": 25110 }, { "epoch": 0.4764975909556508, "grad_norm": 1.1809512127950155, "learning_rate": 6.269187875810422e-06, "loss": 0.4362, "step": 25120 }, { "epoch": 0.4766872794870822, "grad_norm": 1.71925776447124, "learning_rate": 6.265985330372949e-06, "loss": 0.4375, "step": 25130 }, { "epoch": 0.4768769680185136, "grad_norm": 1.7447053212545756, "learning_rate": 6.2627822298891985e-06, "loss": 0.4726, "step": 25140 }, { "epoch": 0.477066656549945, "grad_norm": 1.536908759648597, "learning_rate": 6.259578575763507e-06, "loss": 0.4544, "step": 25150 }, { "epoch": 0.4772563450813764, "grad_norm": 1.8676119120552612, "learning_rate": 6.256374369400456e-06, "loss": 0.4649, "step": 25160 }, { "epoch": 0.4774460336128078, "grad_norm": 1.9163896433995795, "learning_rate": 6.2531696122048625e-06, "loss": 0.4589, "step": 25170 }, { "epoch": 0.47763572214423916, "grad_norm": 1.4467176403611102, "learning_rate": 6.2499643055817895e-06, "loss": 0.4789, "step": 25180 }, { "epoch": 0.4778254106756705, "grad_norm": 1.4418573697139994, "learning_rate": 6.246758450936544e-06, "loss": 0.4638, "step": 25190 }, { "epoch": 0.47801509920710195, "grad_norm": 1.465974165872065, "learning_rate": 6.2435520496746675e-06, "loss": 0.4792, "step": 25200 }, { "epoch": 0.4782047877385333, "grad_norm": 1.4637742801334581, "learning_rate": 6.240345103201941e-06, "loss": 0.4588, "step": 25210 }, { "epoch": 0.47839447626996473, "grad_norm": 1.8808162078619242, "learning_rate": 6.237137612924391e-06, "loss": 0.4927, "step": 25220 }, { "epoch": 0.4785841648013961, "grad_norm": 1.9717352993112918, "learning_rate": 6.233929580248275e-06, "loss": 0.469, "step": 25230 }, { "epoch": 0.4787738533328275, "grad_norm": 1.5314765884288146, "learning_rate": 6.230721006580095e-06, "loss": 0.4584, "step": 25240 }, { "epoch": 0.4789635418642589, "grad_norm": 1.3748319959435673, "learning_rate": 6.2275118933265845e-06, "loss": 0.4555, "step": 25250 }, { "epoch": 0.4791532303956903, "grad_norm": 1.243589414433877, "learning_rate": 6.224302241894714e-06, "loss": 0.4633, "step": 25260 }, { "epoch": 0.47934291892712166, "grad_norm": 1.5151331569022244, "learning_rate": 6.221092053691698e-06, "loss": 0.45, "step": 25270 }, { "epoch": 0.47953260745855303, "grad_norm": 1.3528190996433822, "learning_rate": 6.217881330124973e-06, "loss": 0.4744, "step": 25280 }, { "epoch": 0.47972229598998445, "grad_norm": 1.3963480503754555, "learning_rate": 6.21467007260222e-06, "loss": 0.4618, "step": 25290 }, { "epoch": 0.4799119845214158, "grad_norm": 1.5312358194432167, "learning_rate": 6.211458282531354e-06, "loss": 0.4459, "step": 25300 }, { "epoch": 0.48010167305284723, "grad_norm": 1.218458626803612, "learning_rate": 6.208245961320517e-06, "loss": 0.457, "step": 25310 }, { "epoch": 0.4802913615842786, "grad_norm": 1.3064248727457117, "learning_rate": 6.205033110378092e-06, "loss": 0.4711, "step": 25320 }, { "epoch": 0.48048105011571, "grad_norm": 1.7477339588719842, "learning_rate": 6.201819731112686e-06, "loss": 0.4613, "step": 25330 }, { "epoch": 0.4806707386471414, "grad_norm": 1.4508942574684525, "learning_rate": 6.198605824933143e-06, "loss": 0.4811, "step": 25340 }, { "epoch": 0.4808604271785728, "grad_norm": 1.4448415050798147, "learning_rate": 6.1953913932485385e-06, "loss": 0.4814, "step": 25350 }, { "epoch": 0.48105011571000417, "grad_norm": 1.7387283628345953, "learning_rate": 6.192176437468176e-06, "loss": 0.4787, "step": 25360 }, { "epoch": 0.4812398042414356, "grad_norm": 2.3483196698417657, "learning_rate": 6.188960959001589e-06, "loss": 0.4637, "step": 25370 }, { "epoch": 0.48142949277286695, "grad_norm": 1.6236794668721122, "learning_rate": 6.185744959258538e-06, "loss": 0.466, "step": 25380 }, { "epoch": 0.4816191813042983, "grad_norm": 1.430445112538219, "learning_rate": 6.182528439649017e-06, "loss": 0.4316, "step": 25390 }, { "epoch": 0.48180886983572974, "grad_norm": 1.3976239101287988, "learning_rate": 6.179311401583248e-06, "loss": 0.4694, "step": 25400 }, { "epoch": 0.4819985583671611, "grad_norm": 1.4215234226767413, "learning_rate": 6.176093846471674e-06, "loss": 0.438, "step": 25410 }, { "epoch": 0.4821882468985925, "grad_norm": 1.547530061140425, "learning_rate": 6.172875775724969e-06, "loss": 0.4744, "step": 25420 }, { "epoch": 0.4823779354300239, "grad_norm": 1.2556451369485424, "learning_rate": 6.169657190754033e-06, "loss": 0.4573, "step": 25430 }, { "epoch": 0.4825676239614553, "grad_norm": 1.3544591817402594, "learning_rate": 6.166438092969993e-06, "loss": 0.4557, "step": 25440 }, { "epoch": 0.48275731249288667, "grad_norm": 1.420690943756009, "learning_rate": 6.163218483784198e-06, "loss": 0.4633, "step": 25450 }, { "epoch": 0.4829470010243181, "grad_norm": 1.314873139329424, "learning_rate": 6.1599983646082195e-06, "loss": 0.4356, "step": 25460 }, { "epoch": 0.48313668955574945, "grad_norm": 1.5385000731314402, "learning_rate": 6.156777736853857e-06, "loss": 0.4741, "step": 25470 }, { "epoch": 0.4833263780871809, "grad_norm": 1.4485735551142083, "learning_rate": 6.1535566019331316e-06, "loss": 0.4564, "step": 25480 }, { "epoch": 0.48351606661861224, "grad_norm": 1.3113577193137118, "learning_rate": 6.150334961258286e-06, "loss": 0.4432, "step": 25490 }, { "epoch": 0.4837057551500436, "grad_norm": 1.4785521356473792, "learning_rate": 6.147112816241787e-06, "loss": 0.4441, "step": 25500 }, { "epoch": 0.483895443681475, "grad_norm": 1.5882255825911373, "learning_rate": 6.143890168296315e-06, "loss": 0.4527, "step": 25510 }, { "epoch": 0.4840851322129064, "grad_norm": 1.2748377349405555, "learning_rate": 6.1406670188347805e-06, "loss": 0.4822, "step": 25520 }, { "epoch": 0.4842748207443378, "grad_norm": 1.3370969958097738, "learning_rate": 6.137443369270312e-06, "loss": 0.5123, "step": 25530 }, { "epoch": 0.4844645092757692, "grad_norm": 1.416403699000312, "learning_rate": 6.1342192210162484e-06, "loss": 0.4979, "step": 25540 }, { "epoch": 0.4846541978072006, "grad_norm": 1.5324635776597417, "learning_rate": 6.13099457548616e-06, "loss": 0.4635, "step": 25550 }, { "epoch": 0.48484388633863196, "grad_norm": 1.3723834689279493, "learning_rate": 6.127769434093824e-06, "loss": 0.4728, "step": 25560 }, { "epoch": 0.4850335748700634, "grad_norm": 1.5568790862841886, "learning_rate": 6.1245437982532445e-06, "loss": 0.4602, "step": 25570 }, { "epoch": 0.48522326340149474, "grad_norm": 1.249590462980019, "learning_rate": 6.121317669378636e-06, "loss": 0.4557, "step": 25580 }, { "epoch": 0.48541295193292616, "grad_norm": 2.356829535302866, "learning_rate": 6.118091048884429e-06, "loss": 0.4481, "step": 25590 }, { "epoch": 0.4856026404643575, "grad_norm": 1.4355856480698184, "learning_rate": 6.114863938185275e-06, "loss": 0.4325, "step": 25600 }, { "epoch": 0.4857923289957889, "grad_norm": 1.2309198021316297, "learning_rate": 6.1116363386960345e-06, "loss": 0.4615, "step": 25610 }, { "epoch": 0.4859820175272203, "grad_norm": 1.3548975860821284, "learning_rate": 6.108408251831786e-06, "loss": 0.4723, "step": 25620 }, { "epoch": 0.4861717060586517, "grad_norm": 1.4732805458313007, "learning_rate": 6.105179679007821e-06, "loss": 0.486, "step": 25630 }, { "epoch": 0.4863613945900831, "grad_norm": 1.6035815837213612, "learning_rate": 6.101950621639641e-06, "loss": 0.443, "step": 25640 }, { "epoch": 0.48655108312151446, "grad_norm": 1.5046716184793005, "learning_rate": 6.098721081142964e-06, "loss": 0.456, "step": 25650 }, { "epoch": 0.4867407716529459, "grad_norm": 1.3446055710669118, "learning_rate": 6.09549105893372e-06, "loss": 0.4754, "step": 25660 }, { "epoch": 0.48693046018437725, "grad_norm": 1.4294635824511195, "learning_rate": 6.092260556428045e-06, "loss": 0.4563, "step": 25670 }, { "epoch": 0.48712014871580867, "grad_norm": 1.4282081261211386, "learning_rate": 6.089029575042292e-06, "loss": 0.4712, "step": 25680 }, { "epoch": 0.48730983724724003, "grad_norm": 1.412036459880029, "learning_rate": 6.08579811619302e-06, "loss": 0.4541, "step": 25690 }, { "epoch": 0.4874995257786714, "grad_norm": 1.4716147016022842, "learning_rate": 6.082566181296997e-06, "loss": 0.4627, "step": 25700 }, { "epoch": 0.4876892143101028, "grad_norm": 1.8142790570262939, "learning_rate": 6.079333771771204e-06, "loss": 0.4687, "step": 25710 }, { "epoch": 0.4878789028415342, "grad_norm": 1.5664895245545307, "learning_rate": 6.0761008890328245e-06, "loss": 0.4272, "step": 25720 }, { "epoch": 0.4880685913729656, "grad_norm": 1.4099145715147032, "learning_rate": 6.072867534499254e-06, "loss": 0.434, "step": 25730 }, { "epoch": 0.48825827990439696, "grad_norm": 1.6690287648636908, "learning_rate": 6.069633709588091e-06, "loss": 0.4688, "step": 25740 }, { "epoch": 0.4884479684358284, "grad_norm": 1.4742142348969114, "learning_rate": 6.066399415717143e-06, "loss": 0.4769, "step": 25750 }, { "epoch": 0.48863765696725975, "grad_norm": 1.2695946881276654, "learning_rate": 6.063164654304423e-06, "loss": 0.4672, "step": 25760 }, { "epoch": 0.48882734549869117, "grad_norm": 1.2186507864361833, "learning_rate": 6.059929426768147e-06, "loss": 0.4437, "step": 25770 }, { "epoch": 0.48901703403012253, "grad_norm": 1.4370868346718642, "learning_rate": 6.05669373452674e-06, "loss": 0.4674, "step": 25780 }, { "epoch": 0.48920672256155395, "grad_norm": 1.4092332655666102, "learning_rate": 6.053457578998823e-06, "loss": 0.4576, "step": 25790 }, { "epoch": 0.4893964110929853, "grad_norm": 1.459397192909951, "learning_rate": 6.050220961603225e-06, "loss": 0.4955, "step": 25800 }, { "epoch": 0.4895860996244167, "grad_norm": 1.383937341552094, "learning_rate": 6.046983883758981e-06, "loss": 0.4493, "step": 25810 }, { "epoch": 0.4897757881558481, "grad_norm": 1.3115754495831184, "learning_rate": 6.043746346885319e-06, "loss": 0.4522, "step": 25820 }, { "epoch": 0.48996547668727947, "grad_norm": 1.5474134511210407, "learning_rate": 6.040508352401677e-06, "loss": 0.4651, "step": 25830 }, { "epoch": 0.4901551652187109, "grad_norm": 1.5728375443457854, "learning_rate": 6.037269901727687e-06, "loss": 0.493, "step": 25840 }, { "epoch": 0.49034485375014225, "grad_norm": 1.4903763590122845, "learning_rate": 6.0340309962831855e-06, "loss": 0.4626, "step": 25850 }, { "epoch": 0.49053454228157367, "grad_norm": 2.1694602445839366, "learning_rate": 6.030791637488207e-06, "loss": 0.4548, "step": 25860 }, { "epoch": 0.49072423081300504, "grad_norm": 1.3035129482051844, "learning_rate": 6.027551826762982e-06, "loss": 0.4823, "step": 25870 }, { "epoch": 0.49091391934443646, "grad_norm": 1.404885686385415, "learning_rate": 6.0243115655279425e-06, "loss": 0.4888, "step": 25880 }, { "epoch": 0.4911036078758678, "grad_norm": 1.3533228644491437, "learning_rate": 6.021070855203719e-06, "loss": 0.4566, "step": 25890 }, { "epoch": 0.49129329640729924, "grad_norm": 1.3447873948001137, "learning_rate": 6.017829697211135e-06, "loss": 0.4753, "step": 25900 }, { "epoch": 0.4914829849387306, "grad_norm": 1.4270600567844491, "learning_rate": 6.014588092971214e-06, "loss": 0.4696, "step": 25910 }, { "epoch": 0.49167267347016197, "grad_norm": 1.3371752577843286, "learning_rate": 6.01134604390517e-06, "loss": 0.4604, "step": 25920 }, { "epoch": 0.4918623620015934, "grad_norm": 1.7111051979154672, "learning_rate": 6.008103551434419e-06, "loss": 0.4826, "step": 25930 }, { "epoch": 0.49205205053302475, "grad_norm": 1.4372576520481386, "learning_rate": 6.004860616980567e-06, "loss": 0.4291, "step": 25940 }, { "epoch": 0.4922417390644562, "grad_norm": 1.53501496355933, "learning_rate": 6.001617241965413e-06, "loss": 0.4694, "step": 25950 }, { "epoch": 0.49243142759588754, "grad_norm": 1.4950747084257896, "learning_rate": 5.998373427810953e-06, "loss": 0.4429, "step": 25960 }, { "epoch": 0.49262111612731896, "grad_norm": 1.5121586734075678, "learning_rate": 5.995129175939369e-06, "loss": 0.4571, "step": 25970 }, { "epoch": 0.4928108046587503, "grad_norm": 1.4668615422153084, "learning_rate": 5.991884487773044e-06, "loss": 0.4541, "step": 25980 }, { "epoch": 0.49300049319018174, "grad_norm": 3.240250852577892, "learning_rate": 5.988639364734548e-06, "loss": 0.4666, "step": 25990 }, { "epoch": 0.4931901817216131, "grad_norm": 1.7016405651310356, "learning_rate": 5.9853938082466356e-06, "loss": 0.4519, "step": 26000 }, { "epoch": 0.49337987025304453, "grad_norm": 1.451873670280076, "learning_rate": 5.9821478197322615e-06, "loss": 0.4325, "step": 26010 }, { "epoch": 0.4935695587844759, "grad_norm": 1.3076186511752175, "learning_rate": 5.978901400614564e-06, "loss": 0.4696, "step": 26020 }, { "epoch": 0.49375924731590726, "grad_norm": 1.4022701974146148, "learning_rate": 5.9756545523168705e-06, "loss": 0.4478, "step": 26030 }, { "epoch": 0.4939489358473387, "grad_norm": 1.528311626883123, "learning_rate": 5.972407276262701e-06, "loss": 0.4616, "step": 26040 }, { "epoch": 0.49413862437877004, "grad_norm": 1.4462869402878877, "learning_rate": 5.9691595738757535e-06, "loss": 0.4801, "step": 26050 }, { "epoch": 0.49432831291020146, "grad_norm": 1.2240808593890056, "learning_rate": 5.965911446579926e-06, "loss": 0.4452, "step": 26060 }, { "epoch": 0.4945180014416328, "grad_norm": 1.369984680732786, "learning_rate": 5.96266289579929e-06, "loss": 0.4571, "step": 26070 }, { "epoch": 0.49470768997306425, "grad_norm": 1.3356411521921014, "learning_rate": 5.959413922958112e-06, "loss": 0.4733, "step": 26080 }, { "epoch": 0.4948973785044956, "grad_norm": 1.629719309050422, "learning_rate": 5.956164529480839e-06, "loss": 0.4769, "step": 26090 }, { "epoch": 0.49508706703592703, "grad_norm": 1.503007567425116, "learning_rate": 5.952914716792103e-06, "loss": 0.4672, "step": 26100 }, { "epoch": 0.4952767555673584, "grad_norm": 1.2603905064787422, "learning_rate": 5.949664486316721e-06, "loss": 0.4479, "step": 26110 }, { "epoch": 0.49546644409878976, "grad_norm": 1.486151697914851, "learning_rate": 5.946413839479692e-06, "loss": 0.4551, "step": 26120 }, { "epoch": 0.4956561326302212, "grad_norm": 1.3526596905254014, "learning_rate": 5.943162777706196e-06, "loss": 0.4315, "step": 26130 }, { "epoch": 0.49584582116165254, "grad_norm": 1.4185185084500302, "learning_rate": 5.939911302421601e-06, "loss": 0.4483, "step": 26140 }, { "epoch": 0.49603550969308396, "grad_norm": 1.524888868862193, "learning_rate": 5.936659415051449e-06, "loss": 0.4503, "step": 26150 }, { "epoch": 0.49622519822451533, "grad_norm": 1.3475783207155334, "learning_rate": 5.933407117021466e-06, "loss": 0.4544, "step": 26160 }, { "epoch": 0.49641488675594675, "grad_norm": 1.3826769812650106, "learning_rate": 5.930154409757558e-06, "loss": 0.4905, "step": 26170 }, { "epoch": 0.4966045752873781, "grad_norm": 1.4487912005921457, "learning_rate": 5.926901294685812e-06, "loss": 0.4395, "step": 26180 }, { "epoch": 0.49679426381880953, "grad_norm": 1.4195082274742759, "learning_rate": 5.923647773232493e-06, "loss": 0.4838, "step": 26190 }, { "epoch": 0.4969839523502409, "grad_norm": 1.6039527758328294, "learning_rate": 5.920393846824038e-06, "loss": 0.4785, "step": 26200 }, { "epoch": 0.4971736408816723, "grad_norm": 1.6310119130687417, "learning_rate": 5.917139516887069e-06, "loss": 0.4648, "step": 26210 }, { "epoch": 0.4973633294131037, "grad_norm": 1.3873345260898333, "learning_rate": 5.9138847848483875e-06, "loss": 0.46, "step": 26220 }, { "epoch": 0.49755301794453505, "grad_norm": 1.5326996159495676, "learning_rate": 5.9106296521349605e-06, "loss": 0.4634, "step": 26230 }, { "epoch": 0.49774270647596647, "grad_norm": 1.0935446666484931, "learning_rate": 5.907374120173939e-06, "loss": 0.431, "step": 26240 }, { "epoch": 0.49793239500739783, "grad_norm": 2.095652393616246, "learning_rate": 5.9041181903926456e-06, "loss": 0.4508, "step": 26250 }, { "epoch": 0.49812208353882925, "grad_norm": 1.3368129167264347, "learning_rate": 5.900861864218581e-06, "loss": 0.4714, "step": 26260 }, { "epoch": 0.4983117720702606, "grad_norm": 1.285494751951941, "learning_rate": 5.897605143079417e-06, "loss": 0.4349, "step": 26270 }, { "epoch": 0.49850146060169204, "grad_norm": 1.5391882997222166, "learning_rate": 5.894348028402994e-06, "loss": 0.4736, "step": 26280 }, { "epoch": 0.4986911491331234, "grad_norm": 1.4041256996568974, "learning_rate": 5.8910905216173354e-06, "loss": 0.4675, "step": 26290 }, { "epoch": 0.4988808376645548, "grad_norm": 1.459552934762875, "learning_rate": 5.8878326241506266e-06, "loss": 0.4506, "step": 26300 }, { "epoch": 0.4990705261959862, "grad_norm": 1.3891237779443304, "learning_rate": 5.88457433743123e-06, "loss": 0.4649, "step": 26310 }, { "epoch": 0.4992602147274176, "grad_norm": 1.3299339820143672, "learning_rate": 5.881315662887679e-06, "loss": 0.4583, "step": 26320 }, { "epoch": 0.49944990325884897, "grad_norm": 1.382658854374168, "learning_rate": 5.878056601948672e-06, "loss": 0.4726, "step": 26330 }, { "epoch": 0.49963959179028034, "grad_norm": 1.3863044843939765, "learning_rate": 5.874797156043081e-06, "loss": 0.4637, "step": 26340 }, { "epoch": 0.49982928032171176, "grad_norm": 1.4118409945104786, "learning_rate": 5.871537326599946e-06, "loss": 0.4616, "step": 26350 }, { "epoch": 0.5000189688531431, "grad_norm": 1.6546895586856891, "learning_rate": 5.868277115048475e-06, "loss": 0.4685, "step": 26360 }, { "epoch": 0.5002086573845745, "grad_norm": 1.3977761305182428, "learning_rate": 5.865016522818044e-06, "loss": 0.4533, "step": 26370 }, { "epoch": 0.500398345916006, "grad_norm": 1.2997356683372108, "learning_rate": 5.861755551338193e-06, "loss": 0.4636, "step": 26380 }, { "epoch": 0.5005880344474373, "grad_norm": 1.4473474922142073, "learning_rate": 5.858494202038633e-06, "loss": 0.4329, "step": 26390 }, { "epoch": 0.5007777229788687, "grad_norm": 1.3121460413121244, "learning_rate": 5.8552324763492375e-06, "loss": 0.4561, "step": 26400 }, { "epoch": 0.5009674115103001, "grad_norm": 1.1716759333102482, "learning_rate": 5.851970375700045e-06, "loss": 0.4246, "step": 26410 }, { "epoch": 0.5011571000417315, "grad_norm": 1.6725209287736007, "learning_rate": 5.84870790152126e-06, "loss": 0.4461, "step": 26420 }, { "epoch": 0.5013467885731628, "grad_norm": 1.3423926300740239, "learning_rate": 5.845445055243249e-06, "loss": 0.4512, "step": 26430 }, { "epoch": 0.5015364771045943, "grad_norm": 1.2907759273492203, "learning_rate": 5.842181838296543e-06, "loss": 0.4762, "step": 26440 }, { "epoch": 0.5017261656360257, "grad_norm": 1.3748900492109584, "learning_rate": 5.838918252111837e-06, "loss": 0.469, "step": 26450 }, { "epoch": 0.501915854167457, "grad_norm": 1.3776608876055096, "learning_rate": 5.835654298119981e-06, "loss": 0.4593, "step": 26460 }, { "epoch": 0.5021055426988884, "grad_norm": 1.4143921048619308, "learning_rate": 5.832389977751997e-06, "loss": 0.4443, "step": 26470 }, { "epoch": 0.5022952312303198, "grad_norm": 1.3697568036418086, "learning_rate": 5.829125292439058e-06, "loss": 0.4334, "step": 26480 }, { "epoch": 0.5024849197617512, "grad_norm": 1.606980488967349, "learning_rate": 5.8258602436125025e-06, "loss": 0.4694, "step": 26490 }, { "epoch": 0.5026746082931826, "grad_norm": 1.6715711052008289, "learning_rate": 5.822594832703825e-06, "loss": 0.4825, "step": 26500 }, { "epoch": 0.502864296824614, "grad_norm": 1.474384713157998, "learning_rate": 5.819329061144683e-06, "loss": 0.4789, "step": 26510 }, { "epoch": 0.5030539853560454, "grad_norm": 1.5301614518207909, "learning_rate": 5.816062930366887e-06, "loss": 0.4565, "step": 26520 }, { "epoch": 0.5032436738874768, "grad_norm": 1.4423806655657458, "learning_rate": 5.812796441802409e-06, "loss": 0.4558, "step": 26530 }, { "epoch": 0.5034333624189081, "grad_norm": 1.496723368603156, "learning_rate": 5.809529596883377e-06, "loss": 0.4791, "step": 26540 }, { "epoch": 0.5036230509503395, "grad_norm": 1.4816725786377976, "learning_rate": 5.806262397042073e-06, "loss": 0.4659, "step": 26550 }, { "epoch": 0.503812739481771, "grad_norm": 1.648422480814036, "learning_rate": 5.802994843710937e-06, "loss": 0.4495, "step": 26560 }, { "epoch": 0.5040024280132023, "grad_norm": 1.5692542852978473, "learning_rate": 5.799726938322564e-06, "loss": 0.4869, "step": 26570 }, { "epoch": 0.5041921165446337, "grad_norm": 1.5461559054041216, "learning_rate": 5.7964586823097025e-06, "loss": 0.4578, "step": 26580 }, { "epoch": 0.5043818050760651, "grad_norm": 2.1829600920211227, "learning_rate": 5.793190077105252e-06, "loss": 0.4457, "step": 26590 }, { "epoch": 0.5045714936074965, "grad_norm": 1.4675097499862573, "learning_rate": 5.789921124142273e-06, "loss": 0.4742, "step": 26600 }, { "epoch": 0.5047611821389278, "grad_norm": 1.1892476743342455, "learning_rate": 5.786651824853969e-06, "loss": 0.4482, "step": 26610 }, { "epoch": 0.5049508706703593, "grad_norm": 1.4256910185805671, "learning_rate": 5.783382180673701e-06, "loss": 0.4599, "step": 26620 }, { "epoch": 0.5051405592017907, "grad_norm": 1.4035281015581136, "learning_rate": 5.780112193034981e-06, "loss": 0.4546, "step": 26630 }, { "epoch": 0.5053302477332221, "grad_norm": 1.2228486254747752, "learning_rate": 5.77684186337147e-06, "loss": 0.4843, "step": 26640 }, { "epoch": 0.5055199362646534, "grad_norm": 1.5136833254284479, "learning_rate": 5.77357119311698e-06, "loss": 0.4427, "step": 26650 }, { "epoch": 0.5057096247960848, "grad_norm": 1.566915542416598, "learning_rate": 5.770300183705471e-06, "loss": 0.4341, "step": 26660 }, { "epoch": 0.5058993133275163, "grad_norm": 1.3551074342688618, "learning_rate": 5.767028836571049e-06, "loss": 0.468, "step": 26670 }, { "epoch": 0.5060890018589476, "grad_norm": 1.3543041361135892, "learning_rate": 5.76375715314798e-06, "loss": 0.4651, "step": 26680 }, { "epoch": 0.506278690390379, "grad_norm": 1.1844516182821543, "learning_rate": 5.760485134870662e-06, "loss": 0.4389, "step": 26690 }, { "epoch": 0.5064683789218104, "grad_norm": 1.479400790479177, "learning_rate": 5.757212783173649e-06, "loss": 0.4558, "step": 26700 }, { "epoch": 0.5066580674532418, "grad_norm": 1.455578645605707, "learning_rate": 5.753940099491639e-06, "loss": 0.4769, "step": 26710 }, { "epoch": 0.5068477559846731, "grad_norm": 1.3095498575167108, "learning_rate": 5.750667085259476e-06, "loss": 0.4381, "step": 26720 }, { "epoch": 0.5070374445161046, "grad_norm": 1.3366663478708176, "learning_rate": 5.747393741912148e-06, "loss": 0.4885, "step": 26730 }, { "epoch": 0.507227133047536, "grad_norm": 1.341898762256724, "learning_rate": 5.7441200708847885e-06, "loss": 0.4645, "step": 26740 }, { "epoch": 0.5074168215789673, "grad_norm": 1.629060029602303, "learning_rate": 5.740846073612672e-06, "loss": 0.49, "step": 26750 }, { "epoch": 0.5076065101103987, "grad_norm": 1.2907934374668362, "learning_rate": 5.7375717515312206e-06, "loss": 0.4603, "step": 26760 }, { "epoch": 0.5077961986418301, "grad_norm": 1.0276288429374492, "learning_rate": 5.734297106075993e-06, "loss": 0.4484, "step": 26770 }, { "epoch": 0.5079858871732615, "grad_norm": 1.334278092760005, "learning_rate": 5.731022138682697e-06, "loss": 0.4581, "step": 26780 }, { "epoch": 0.5081755757046929, "grad_norm": 1.404839747788908, "learning_rate": 5.727746850787173e-06, "loss": 0.462, "step": 26790 }, { "epoch": 0.5083652642361243, "grad_norm": 1.55426518894309, "learning_rate": 5.724471243825409e-06, "loss": 0.4592, "step": 26800 }, { "epoch": 0.5085549527675557, "grad_norm": 1.5002594611116291, "learning_rate": 5.721195319233531e-06, "loss": 0.4847, "step": 26810 }, { "epoch": 0.5087446412989871, "grad_norm": 1.3274034180002185, "learning_rate": 5.717919078447801e-06, "loss": 0.4734, "step": 26820 }, { "epoch": 0.5089343298304184, "grad_norm": 1.4483551298432247, "learning_rate": 5.714642522904623e-06, "loss": 0.4603, "step": 26830 }, { "epoch": 0.5091240183618498, "grad_norm": 1.5442711822376651, "learning_rate": 5.711365654040538e-06, "loss": 0.4545, "step": 26840 }, { "epoch": 0.5093137068932813, "grad_norm": 1.5566262807948534, "learning_rate": 5.708088473292226e-06, "loss": 0.4779, "step": 26850 }, { "epoch": 0.5095033954247126, "grad_norm": 1.460044774222057, "learning_rate": 5.7048109820965e-06, "loss": 0.447, "step": 26860 }, { "epoch": 0.509693083956144, "grad_norm": 1.5383458735159925, "learning_rate": 5.701533181890312e-06, "loss": 0.4855, "step": 26870 }, { "epoch": 0.5098827724875754, "grad_norm": 1.5561262580488664, "learning_rate": 5.69825507411075e-06, "loss": 0.4963, "step": 26880 }, { "epoch": 0.5100724610190068, "grad_norm": 1.4183122622343882, "learning_rate": 5.694976660195034e-06, "loss": 0.4407, "step": 26890 }, { "epoch": 0.5102621495504381, "grad_norm": 2.295353802059023, "learning_rate": 5.691697941580521e-06, "loss": 0.4676, "step": 26900 }, { "epoch": 0.5104518380818696, "grad_norm": 1.3302893297877547, "learning_rate": 5.688418919704701e-06, "loss": 0.4527, "step": 26910 }, { "epoch": 0.510641526613301, "grad_norm": 1.5115842671274495, "learning_rate": 5.685139596005193e-06, "loss": 0.4723, "step": 26920 }, { "epoch": 0.5108312151447324, "grad_norm": 1.3342814296674004, "learning_rate": 5.681859971919756e-06, "loss": 0.4582, "step": 26930 }, { "epoch": 0.5110209036761637, "grad_norm": 1.2698820186950317, "learning_rate": 5.678580048886274e-06, "loss": 0.4372, "step": 26940 }, { "epoch": 0.5112105922075951, "grad_norm": 1.3960571426170978, "learning_rate": 5.675299828342767e-06, "loss": 0.476, "step": 26950 }, { "epoch": 0.5114002807390265, "grad_norm": 1.4085176047661587, "learning_rate": 5.672019311727378e-06, "loss": 0.4586, "step": 26960 }, { "epoch": 0.5115899692704579, "grad_norm": 1.3817889471560072, "learning_rate": 5.668738500478389e-06, "loss": 0.4568, "step": 26970 }, { "epoch": 0.5117796578018893, "grad_norm": 1.2681206060308159, "learning_rate": 5.665457396034206e-06, "loss": 0.4576, "step": 26980 }, { "epoch": 0.5119693463333207, "grad_norm": 1.3252388221878113, "learning_rate": 5.662175999833365e-06, "loss": 0.454, "step": 26990 }, { "epoch": 0.5121590348647521, "grad_norm": 1.4245548970455015, "learning_rate": 5.658894313314527e-06, "loss": 0.4364, "step": 27000 }, { "epoch": 0.5123487233961834, "grad_norm": 1.3653745737470275, "learning_rate": 5.655612337916485e-06, "loss": 0.456, "step": 27010 }, { "epoch": 0.5125384119276148, "grad_norm": 1.5742505985711406, "learning_rate": 5.652330075078157e-06, "loss": 0.4658, "step": 27020 }, { "epoch": 0.5127281004590463, "grad_norm": 1.4584581360025737, "learning_rate": 5.649047526238582e-06, "loss": 0.4391, "step": 27030 }, { "epoch": 0.5129177889904777, "grad_norm": 1.3686684197051815, "learning_rate": 5.645764692836933e-06, "loss": 0.4642, "step": 27040 }, { "epoch": 0.513107477521909, "grad_norm": 1.519326794741162, "learning_rate": 5.642481576312503e-06, "loss": 0.4629, "step": 27050 }, { "epoch": 0.5132971660533404, "grad_norm": 1.5498073524752982, "learning_rate": 5.639198178104709e-06, "loss": 0.4836, "step": 27060 }, { "epoch": 0.5134868545847718, "grad_norm": 1.6959175422926434, "learning_rate": 5.635914499653091e-06, "loss": 0.4533, "step": 27070 }, { "epoch": 0.5136765431162031, "grad_norm": 1.3661296611115532, "learning_rate": 5.632630542397313e-06, "loss": 0.4595, "step": 27080 }, { "epoch": 0.5138662316476346, "grad_norm": 1.5289327454365902, "learning_rate": 5.629346307777165e-06, "loss": 0.478, "step": 27090 }, { "epoch": 0.514055920179066, "grad_norm": 1.6363890662602003, "learning_rate": 5.626061797232549e-06, "loss": 0.4744, "step": 27100 }, { "epoch": 0.5142456087104974, "grad_norm": 1.3800674755820426, "learning_rate": 5.622777012203499e-06, "loss": 0.4623, "step": 27110 }, { "epoch": 0.5144352972419287, "grad_norm": 1.6169277563577282, "learning_rate": 5.6194919541301606e-06, "loss": 0.4685, "step": 27120 }, { "epoch": 0.5146249857733601, "grad_norm": 1.3586151379738847, "learning_rate": 5.616206624452804e-06, "loss": 0.445, "step": 27130 }, { "epoch": 0.5148146743047916, "grad_norm": 1.5377802329740244, "learning_rate": 5.61292102461182e-06, "loss": 0.44, "step": 27140 }, { "epoch": 0.515004362836223, "grad_norm": 1.568778789468528, "learning_rate": 5.609635156047709e-06, "loss": 0.4711, "step": 27150 }, { "epoch": 0.5151940513676543, "grad_norm": 1.4911735095397454, "learning_rate": 5.6063490202010995e-06, "loss": 0.4629, "step": 27160 }, { "epoch": 0.5153837398990857, "grad_norm": 1.6356093706980603, "learning_rate": 5.603062618512732e-06, "loss": 0.4528, "step": 27170 }, { "epoch": 0.5155734284305171, "grad_norm": 1.4017991902485838, "learning_rate": 5.599775952423464e-06, "loss": 0.452, "step": 27180 }, { "epoch": 0.5157631169619484, "grad_norm": 1.4919432819032574, "learning_rate": 5.596489023374269e-06, "loss": 0.4602, "step": 27190 }, { "epoch": 0.5159528054933799, "grad_norm": 1.4911975528449326, "learning_rate": 5.593201832806236e-06, "loss": 0.4282, "step": 27200 }, { "epoch": 0.5161424940248113, "grad_norm": 1.541326237099629, "learning_rate": 5.5899143821605715e-06, "loss": 0.4787, "step": 27210 }, { "epoch": 0.5163321825562427, "grad_norm": 1.5665914880917018, "learning_rate": 5.586626672878592e-06, "loss": 0.4705, "step": 27220 }, { "epoch": 0.516521871087674, "grad_norm": 1.3618506725418391, "learning_rate": 5.5833387064017265e-06, "loss": 0.4437, "step": 27230 }, { "epoch": 0.5167115596191054, "grad_norm": 1.6245378904483714, "learning_rate": 5.580050484171522e-06, "loss": 0.4907, "step": 27240 }, { "epoch": 0.5169012481505368, "grad_norm": 1.303926671084538, "learning_rate": 5.5767620076296306e-06, "loss": 0.4529, "step": 27250 }, { "epoch": 0.5170909366819683, "grad_norm": 1.4356207469022828, "learning_rate": 5.573473278217825e-06, "loss": 0.4567, "step": 27260 }, { "epoch": 0.5172806252133996, "grad_norm": 1.2837222562085204, "learning_rate": 5.5701842973779806e-06, "loss": 0.4587, "step": 27270 }, { "epoch": 0.517470313744831, "grad_norm": 1.6532030739456456, "learning_rate": 5.566895066552086e-06, "loss": 0.4506, "step": 27280 }, { "epoch": 0.5176600022762624, "grad_norm": 1.4002741116264408, "learning_rate": 5.563605587182241e-06, "loss": 0.4435, "step": 27290 }, { "epoch": 0.5178496908076937, "grad_norm": 1.4167929959776, "learning_rate": 5.5603158607106525e-06, "loss": 0.4958, "step": 27300 }, { "epoch": 0.5180393793391251, "grad_norm": 1.606101855391575, "learning_rate": 5.557025888579635e-06, "loss": 0.43, "step": 27310 }, { "epoch": 0.5182290678705566, "grad_norm": 1.392397271622879, "learning_rate": 5.553735672231614e-06, "loss": 0.455, "step": 27320 }, { "epoch": 0.518418756401988, "grad_norm": 1.4524707526307854, "learning_rate": 5.5504452131091155e-06, "loss": 0.4422, "step": 27330 }, { "epoch": 0.5186084449334193, "grad_norm": 1.6195597846814012, "learning_rate": 5.547154512654783e-06, "loss": 0.47, "step": 27340 }, { "epoch": 0.5187981334648507, "grad_norm": 1.4695911422449837, "learning_rate": 5.543863572311354e-06, "loss": 0.4592, "step": 27350 }, { "epoch": 0.5189878219962821, "grad_norm": 1.3713331335532375, "learning_rate": 5.540572393521678e-06, "loss": 0.4425, "step": 27360 }, { "epoch": 0.5191775105277135, "grad_norm": 1.409684252703003, "learning_rate": 5.537280977728707e-06, "loss": 0.4821, "step": 27370 }, { "epoch": 0.5193671990591449, "grad_norm": 1.6723265976299695, "learning_rate": 5.5339893263754985e-06, "loss": 0.4566, "step": 27380 }, { "epoch": 0.5195568875905763, "grad_norm": 1.3405877327049578, "learning_rate": 5.530697440905209e-06, "loss": 0.4315, "step": 27390 }, { "epoch": 0.5197465761220077, "grad_norm": 1.5107222928614563, "learning_rate": 5.527405322761106e-06, "loss": 0.4538, "step": 27400 }, { "epoch": 0.519936264653439, "grad_norm": 1.211524144690998, "learning_rate": 5.5241129733865474e-06, "loss": 0.4512, "step": 27410 }, { "epoch": 0.5201259531848704, "grad_norm": 1.5788828246764024, "learning_rate": 5.520820394225005e-06, "loss": 0.4573, "step": 27420 }, { "epoch": 0.5203156417163018, "grad_norm": 1.3972637879788066, "learning_rate": 5.51752758672004e-06, "loss": 0.4549, "step": 27430 }, { "epoch": 0.5205053302477333, "grad_norm": 1.4052688580158117, "learning_rate": 5.51423455231532e-06, "loss": 0.4653, "step": 27440 }, { "epoch": 0.5206950187791646, "grad_norm": 1.450598470590522, "learning_rate": 5.510941292454614e-06, "loss": 0.4556, "step": 27450 }, { "epoch": 0.520884707310596, "grad_norm": 1.4450283129213273, "learning_rate": 5.507647808581783e-06, "loss": 0.4512, "step": 27460 }, { "epoch": 0.5210743958420274, "grad_norm": 1.420604521408006, "learning_rate": 5.504354102140795e-06, "loss": 0.4363, "step": 27470 }, { "epoch": 0.5212640843734588, "grad_norm": 1.5544141382337482, "learning_rate": 5.501060174575703e-06, "loss": 0.4575, "step": 27480 }, { "epoch": 0.5214537729048901, "grad_norm": 1.2955711480151462, "learning_rate": 5.497766027330669e-06, "loss": 0.4564, "step": 27490 }, { "epoch": 0.5216434614363216, "grad_norm": 1.3615330886100467, "learning_rate": 5.4944716618499485e-06, "loss": 0.4579, "step": 27500 }, { "epoch": 0.521833149967753, "grad_norm": 1.453221311278276, "learning_rate": 5.491177079577889e-06, "loss": 0.4926, "step": 27510 }, { "epoch": 0.5220228384991843, "grad_norm": 1.4534210686320939, "learning_rate": 5.487882281958933e-06, "loss": 0.4453, "step": 27520 }, { "epoch": 0.5222125270306157, "grad_norm": 1.3517220383330955, "learning_rate": 5.4845872704376236e-06, "loss": 0.4496, "step": 27530 }, { "epoch": 0.5224022155620471, "grad_norm": 1.1323259743698626, "learning_rate": 5.481292046458589e-06, "loss": 0.4653, "step": 27540 }, { "epoch": 0.5225919040934786, "grad_norm": 1.4744296777046555, "learning_rate": 5.4779966114665606e-06, "loss": 0.4795, "step": 27550 }, { "epoch": 0.5227815926249099, "grad_norm": 1.4874428934382407, "learning_rate": 5.474700966906353e-06, "loss": 0.4269, "step": 27560 }, { "epoch": 0.5229712811563413, "grad_norm": 1.5199876066325113, "learning_rate": 5.471405114222877e-06, "loss": 0.4568, "step": 27570 }, { "epoch": 0.5231609696877727, "grad_norm": 1.490515413409648, "learning_rate": 5.468109054861134e-06, "loss": 0.4439, "step": 27580 }, { "epoch": 0.523350658219204, "grad_norm": 1.430483831051571, "learning_rate": 5.464812790266216e-06, "loss": 0.4953, "step": 27590 }, { "epoch": 0.5235403467506354, "grad_norm": 1.4706824324951842, "learning_rate": 5.461516321883309e-06, "loss": 0.4476, "step": 27600 }, { "epoch": 0.5237300352820669, "grad_norm": 1.820454129707675, "learning_rate": 5.458219651157678e-06, "loss": 0.4505, "step": 27610 }, { "epoch": 0.5239197238134983, "grad_norm": 1.3840571824601842, "learning_rate": 5.4549227795346885e-06, "loss": 0.4692, "step": 27620 }, { "epoch": 0.5241094123449296, "grad_norm": 1.3476700923717926, "learning_rate": 5.451625708459785e-06, "loss": 0.4807, "step": 27630 }, { "epoch": 0.524299100876361, "grad_norm": 1.4116076256597025, "learning_rate": 5.4483284393785055e-06, "loss": 0.456, "step": 27640 }, { "epoch": 0.5244887894077924, "grad_norm": 1.645521517806782, "learning_rate": 5.445030973736472e-06, "loss": 0.4809, "step": 27650 }, { "epoch": 0.5246784779392238, "grad_norm": 1.4623827041601365, "learning_rate": 5.441733312979393e-06, "loss": 0.4697, "step": 27660 }, { "epoch": 0.5248681664706552, "grad_norm": 1.2049759304096361, "learning_rate": 5.438435458553064e-06, "loss": 0.4313, "step": 27670 }, { "epoch": 0.5250578550020866, "grad_norm": 1.1940332691010347, "learning_rate": 5.4351374119033625e-06, "loss": 0.452, "step": 27680 }, { "epoch": 0.525247543533518, "grad_norm": 1.2159725323179693, "learning_rate": 5.431839174476253e-06, "loss": 0.454, "step": 27690 }, { "epoch": 0.5254372320649493, "grad_norm": 1.646350096269824, "learning_rate": 5.428540747717782e-06, "loss": 0.4461, "step": 27700 }, { "epoch": 0.5256269205963807, "grad_norm": 1.5045289988270043, "learning_rate": 5.42524213307408e-06, "loss": 0.4651, "step": 27710 }, { "epoch": 0.5258166091278121, "grad_norm": 1.351696630827547, "learning_rate": 5.421943331991358e-06, "loss": 0.4469, "step": 27720 }, { "epoch": 0.5260062976592436, "grad_norm": 1.4192576488507014, "learning_rate": 5.418644345915914e-06, "loss": 0.4421, "step": 27730 }, { "epoch": 0.5261959861906749, "grad_norm": 1.5168798192099364, "learning_rate": 5.415345176294119e-06, "loss": 0.4729, "step": 27740 }, { "epoch": 0.5263856747221063, "grad_norm": 1.4229192419011842, "learning_rate": 5.412045824572432e-06, "loss": 0.4529, "step": 27750 }, { "epoch": 0.5265753632535377, "grad_norm": 1.2572807572691602, "learning_rate": 5.408746292197385e-06, "loss": 0.4473, "step": 27760 }, { "epoch": 0.5267650517849691, "grad_norm": 1.4983106532877408, "learning_rate": 5.405446580615596e-06, "loss": 0.4834, "step": 27770 }, { "epoch": 0.5269547403164004, "grad_norm": 1.3769830662420712, "learning_rate": 5.402146691273759e-06, "loss": 0.4611, "step": 27780 }, { "epoch": 0.5271444288478319, "grad_norm": 1.4335060211282733, "learning_rate": 5.398846625618642e-06, "loss": 0.4296, "step": 27790 }, { "epoch": 0.5273341173792633, "grad_norm": 1.4733494585383167, "learning_rate": 5.395546385097096e-06, "loss": 0.458, "step": 27800 }, { "epoch": 0.5275238059106946, "grad_norm": 1.382607680514603, "learning_rate": 5.392245971156047e-06, "loss": 0.4729, "step": 27810 }, { "epoch": 0.527713494442126, "grad_norm": 1.396098829122922, "learning_rate": 5.388945385242492e-06, "loss": 0.4606, "step": 27820 }, { "epoch": 0.5279031829735574, "grad_norm": 1.6212319422516739, "learning_rate": 5.3856446288035135e-06, "loss": 0.4446, "step": 27830 }, { "epoch": 0.5280928715049888, "grad_norm": 1.3277495008663251, "learning_rate": 5.38234370328626e-06, "loss": 0.4411, "step": 27840 }, { "epoch": 0.5282825600364202, "grad_norm": 1.5794502873489213, "learning_rate": 5.379042610137957e-06, "loss": 0.4649, "step": 27850 }, { "epoch": 0.5284722485678516, "grad_norm": 1.396893745673152, "learning_rate": 5.375741350805906e-06, "loss": 0.4436, "step": 27860 }, { "epoch": 0.528661937099283, "grad_norm": 1.3763633122122567, "learning_rate": 5.372439926737472e-06, "loss": 0.4361, "step": 27870 }, { "epoch": 0.5288516256307144, "grad_norm": 1.4551568797734062, "learning_rate": 5.36913833938011e-06, "loss": 0.4755, "step": 27880 }, { "epoch": 0.5290413141621457, "grad_norm": 1.4935124905131196, "learning_rate": 5.365836590181327e-06, "loss": 0.4513, "step": 27890 }, { "epoch": 0.5292310026935771, "grad_norm": 1.612434519280013, "learning_rate": 5.362534680588714e-06, "loss": 0.4607, "step": 27900 }, { "epoch": 0.5294206912250086, "grad_norm": 1.2763215348535755, "learning_rate": 5.359232612049926e-06, "loss": 0.4461, "step": 27910 }, { "epoch": 0.5296103797564399, "grad_norm": 1.5198196891625575, "learning_rate": 5.355930386012691e-06, "loss": 0.4557, "step": 27920 }, { "epoch": 0.5298000682878713, "grad_norm": 1.4080341408930492, "learning_rate": 5.352628003924806e-06, "loss": 0.4549, "step": 27930 }, { "epoch": 0.5299897568193027, "grad_norm": 1.381795812028002, "learning_rate": 5.3493254672341325e-06, "loss": 0.4529, "step": 27940 }, { "epoch": 0.5301794453507341, "grad_norm": 1.604676598086687, "learning_rate": 5.346022777388602e-06, "loss": 0.4872, "step": 27950 }, { "epoch": 0.5303691338821654, "grad_norm": 1.4306723626600342, "learning_rate": 5.342719935836218e-06, "loss": 0.4539, "step": 27960 }, { "epoch": 0.5305588224135969, "grad_norm": 1.257124610087744, "learning_rate": 5.339416944025043e-06, "loss": 0.471, "step": 27970 }, { "epoch": 0.5307485109450283, "grad_norm": 1.3164737334780965, "learning_rate": 5.336113803403209e-06, "loss": 0.4353, "step": 27980 }, { "epoch": 0.5309381994764597, "grad_norm": 1.3100275389840177, "learning_rate": 5.332810515418914e-06, "loss": 0.4437, "step": 27990 }, { "epoch": 0.531127888007891, "grad_norm": 1.4483886836701811, "learning_rate": 5.329507081520416e-06, "loss": 0.463, "step": 28000 }, { "epoch": 0.5313175765393224, "grad_norm": 1.5078018972950653, "learning_rate": 5.326203503156045e-06, "loss": 0.4346, "step": 28010 }, { "epoch": 0.5315072650707539, "grad_norm": 1.2296600602436638, "learning_rate": 5.322899781774187e-06, "loss": 0.4915, "step": 28020 }, { "epoch": 0.5316969536021852, "grad_norm": 1.1576215741322087, "learning_rate": 5.319595918823294e-06, "loss": 0.4471, "step": 28030 }, { "epoch": 0.5318866421336166, "grad_norm": 1.1983061243198014, "learning_rate": 5.316291915751878e-06, "loss": 0.4567, "step": 28040 }, { "epoch": 0.532076330665048, "grad_norm": 1.3201431252458722, "learning_rate": 5.312987774008517e-06, "loss": 0.4484, "step": 28050 }, { "epoch": 0.5322660191964794, "grad_norm": 1.3579706114106975, "learning_rate": 5.309683495041844e-06, "loss": 0.4268, "step": 28060 }, { "epoch": 0.5324557077279107, "grad_norm": 1.4179400867512784, "learning_rate": 5.306379080300555e-06, "loss": 0.4477, "step": 28070 }, { "epoch": 0.5326453962593422, "grad_norm": 1.5992731448577209, "learning_rate": 5.3030745312334085e-06, "loss": 0.4584, "step": 28080 }, { "epoch": 0.5328350847907736, "grad_norm": 1.5602605311311966, "learning_rate": 5.299769849289216e-06, "loss": 0.4487, "step": 28090 }, { "epoch": 0.533024773322205, "grad_norm": 1.2997557857668296, "learning_rate": 5.296465035916851e-06, "loss": 0.4725, "step": 28100 }, { "epoch": 0.5332144618536363, "grad_norm": 1.5339551401323015, "learning_rate": 5.293160092565243e-06, "loss": 0.4606, "step": 28110 }, { "epoch": 0.5334041503850677, "grad_norm": 1.6054762513399863, "learning_rate": 5.289855020683378e-06, "loss": 0.468, "step": 28120 }, { "epoch": 0.5335938389164991, "grad_norm": 1.4021020518759701, "learning_rate": 5.286549821720303e-06, "loss": 0.4561, "step": 28130 }, { "epoch": 0.5337835274479305, "grad_norm": 1.1986404181552717, "learning_rate": 5.2832444971251165e-06, "loss": 0.4461, "step": 28140 }, { "epoch": 0.5339732159793619, "grad_norm": 1.6380015639809313, "learning_rate": 5.279939048346968e-06, "loss": 0.4576, "step": 28150 }, { "epoch": 0.5341629045107933, "grad_norm": 1.589455140040522, "learning_rate": 5.276633476835072e-06, "loss": 0.4647, "step": 28160 }, { "epoch": 0.5343525930422247, "grad_norm": 1.5103444373439072, "learning_rate": 5.27332778403869e-06, "loss": 0.438, "step": 28170 }, { "epoch": 0.534542281573656, "grad_norm": 1.4938069057225491, "learning_rate": 5.270021971407135e-06, "loss": 0.4651, "step": 28180 }, { "epoch": 0.5347319701050874, "grad_norm": 1.7760885758761928, "learning_rate": 5.266716040389779e-06, "loss": 0.4541, "step": 28190 }, { "epoch": 0.5349216586365189, "grad_norm": 1.372375404188667, "learning_rate": 5.2634099924360374e-06, "loss": 0.4629, "step": 28200 }, { "epoch": 0.5351113471679503, "grad_norm": 1.3515462048201317, "learning_rate": 5.260103828995386e-06, "loss": 0.4445, "step": 28210 }, { "epoch": 0.5353010356993816, "grad_norm": 1.4407762283280168, "learning_rate": 5.256797551517346e-06, "loss": 0.4512, "step": 28220 }, { "epoch": 0.535490724230813, "grad_norm": 1.139607948572595, "learning_rate": 5.253491161451488e-06, "loss": 0.4499, "step": 28230 }, { "epoch": 0.5356804127622444, "grad_norm": 1.4623932375376383, "learning_rate": 5.250184660247435e-06, "loss": 0.4468, "step": 28240 }, { "epoch": 0.5358701012936757, "grad_norm": 1.5681201683485566, "learning_rate": 5.246878049354855e-06, "loss": 0.4506, "step": 28250 }, { "epoch": 0.5360597898251072, "grad_norm": 1.276387611880325, "learning_rate": 5.243571330223469e-06, "loss": 0.4301, "step": 28260 }, { "epoch": 0.5362494783565386, "grad_norm": 1.472664445675011, "learning_rate": 5.240264504303042e-06, "loss": 0.4918, "step": 28270 }, { "epoch": 0.53643916688797, "grad_norm": 1.4585535017146023, "learning_rate": 5.236957573043384e-06, "loss": 0.4383, "step": 28280 }, { "epoch": 0.5366288554194013, "grad_norm": 1.0938140554123261, "learning_rate": 5.233650537894358e-06, "loss": 0.4448, "step": 28290 }, { "epoch": 0.5368185439508327, "grad_norm": 1.4356990295800758, "learning_rate": 5.230343400305867e-06, "loss": 0.4531, "step": 28300 }, { "epoch": 0.5370082324822641, "grad_norm": 1.5268141449222863, "learning_rate": 5.2270361617278566e-06, "loss": 0.4574, "step": 28310 }, { "epoch": 0.5371979210136956, "grad_norm": 2.0620175929983224, "learning_rate": 5.223728823610323e-06, "loss": 0.4471, "step": 28320 }, { "epoch": 0.5373876095451269, "grad_norm": 1.3813473450969085, "learning_rate": 5.2204213874033035e-06, "loss": 0.4733, "step": 28330 }, { "epoch": 0.5375772980765583, "grad_norm": 1.654008659267068, "learning_rate": 5.21711385455688e-06, "loss": 0.4777, "step": 28340 }, { "epoch": 0.5377669866079897, "grad_norm": 1.6507556436090756, "learning_rate": 5.213806226521171e-06, "loss": 0.4616, "step": 28350 }, { "epoch": 0.537956675139421, "grad_norm": 1.3542451813386631, "learning_rate": 5.210498504746342e-06, "loss": 0.4322, "step": 28360 }, { "epoch": 0.5381463636708524, "grad_norm": 1.1294257090457238, "learning_rate": 5.207190690682601e-06, "loss": 0.4411, "step": 28370 }, { "epoch": 0.5383360522022839, "grad_norm": 1.3771453103083273, "learning_rate": 5.20388278578019e-06, "loss": 0.4508, "step": 28380 }, { "epoch": 0.5385257407337153, "grad_norm": 1.2101023050429913, "learning_rate": 5.2005747914893965e-06, "loss": 0.4439, "step": 28390 }, { "epoch": 0.5387154292651466, "grad_norm": 1.4036631089928242, "learning_rate": 5.197266709260545e-06, "loss": 0.4352, "step": 28400 }, { "epoch": 0.538905117796578, "grad_norm": 1.36960198433709, "learning_rate": 5.193958540544001e-06, "loss": 0.4441, "step": 28410 }, { "epoch": 0.5390948063280094, "grad_norm": 1.4650437001769954, "learning_rate": 5.190650286790163e-06, "loss": 0.4621, "step": 28420 }, { "epoch": 0.5392844948594407, "grad_norm": 1.5167946854125298, "learning_rate": 5.1873419494494695e-06, "loss": 0.4281, "step": 28430 }, { "epoch": 0.5394741833908722, "grad_norm": 1.899191637091855, "learning_rate": 5.184033529972396e-06, "loss": 0.4347, "step": 28440 }, { "epoch": 0.5396638719223036, "grad_norm": 1.6136033418767242, "learning_rate": 5.180725029809455e-06, "loss": 0.4411, "step": 28450 }, { "epoch": 0.539853560453735, "grad_norm": 1.7716769001863906, "learning_rate": 5.177416450411192e-06, "loss": 0.4596, "step": 28460 }, { "epoch": 0.5400432489851663, "grad_norm": 1.6284492409528832, "learning_rate": 5.174107793228191e-06, "loss": 0.4531, "step": 28470 }, { "epoch": 0.5402329375165977, "grad_norm": 1.317314036014362, "learning_rate": 5.17079905971106e-06, "loss": 0.4793, "step": 28480 }, { "epoch": 0.5404226260480292, "grad_norm": 1.4388005132613442, "learning_rate": 5.167490251310457e-06, "loss": 0.4684, "step": 28490 }, { "epoch": 0.5406123145794606, "grad_norm": 1.5534593059291026, "learning_rate": 5.164181369477058e-06, "loss": 0.4339, "step": 28500 }, { "epoch": 0.5408020031108919, "grad_norm": 1.4169899094458438, "learning_rate": 5.16087241566158e-06, "loss": 0.453, "step": 28510 }, { "epoch": 0.5409916916423233, "grad_norm": 1.7276663820683922, "learning_rate": 5.157563391314767e-06, "loss": 0.431, "step": 28520 }, { "epoch": 0.5411813801737547, "grad_norm": 1.1491493773890074, "learning_rate": 5.154254297887394e-06, "loss": 0.4347, "step": 28530 }, { "epoch": 0.541371068705186, "grad_norm": 1.3962756199277513, "learning_rate": 5.150945136830269e-06, "loss": 0.4651, "step": 28540 }, { "epoch": 0.5415607572366175, "grad_norm": 1.5912458956026436, "learning_rate": 5.147635909594229e-06, "loss": 0.4617, "step": 28550 }, { "epoch": 0.5417504457680489, "grad_norm": 1.2988127517324126, "learning_rate": 5.1443266176301395e-06, "loss": 0.4316, "step": 28560 }, { "epoch": 0.5419401342994803, "grad_norm": 1.1801667316322568, "learning_rate": 5.141017262388893e-06, "loss": 0.4402, "step": 28570 }, { "epoch": 0.5421298228309116, "grad_norm": 1.4236061833215445, "learning_rate": 5.137707845321412e-06, "loss": 0.4454, "step": 28580 }, { "epoch": 0.542319511362343, "grad_norm": 1.3903826646954671, "learning_rate": 5.134398367878644e-06, "loss": 0.4509, "step": 28590 }, { "epoch": 0.5425091998937744, "grad_norm": 1.5321854409692943, "learning_rate": 5.131088831511566e-06, "loss": 0.4562, "step": 28600 }, { "epoch": 0.5426988884252059, "grad_norm": 1.6944390805476122, "learning_rate": 5.127779237671176e-06, "loss": 0.4639, "step": 28610 }, { "epoch": 0.5428885769566372, "grad_norm": 1.4578861115545707, "learning_rate": 5.124469587808504e-06, "loss": 0.4368, "step": 28620 }, { "epoch": 0.5430782654880686, "grad_norm": 1.4773351396087004, "learning_rate": 5.121159883374596e-06, "loss": 0.4355, "step": 28630 }, { "epoch": 0.5432679540195, "grad_norm": 1.437036978653553, "learning_rate": 5.117850125820529e-06, "loss": 0.4439, "step": 28640 }, { "epoch": 0.5434576425509313, "grad_norm": 1.4569253596828078, "learning_rate": 5.114540316597401e-06, "loss": 0.4568, "step": 28650 }, { "epoch": 0.5436473310823627, "grad_norm": 1.3864387836760876, "learning_rate": 5.111230457156333e-06, "loss": 0.4897, "step": 28660 }, { "epoch": 0.5438370196137942, "grad_norm": 1.1980870842762419, "learning_rate": 5.107920548948466e-06, "loss": 0.4611, "step": 28670 }, { "epoch": 0.5440267081452256, "grad_norm": 1.9995387185310303, "learning_rate": 5.104610593424967e-06, "loss": 0.4765, "step": 28680 }, { "epoch": 0.5442163966766569, "grad_norm": 1.3074093966687308, "learning_rate": 5.101300592037015e-06, "loss": 0.4209, "step": 28690 }, { "epoch": 0.5444060852080883, "grad_norm": 1.4015129219842721, "learning_rate": 5.09799054623582e-06, "loss": 0.4487, "step": 28700 }, { "epoch": 0.5445957737395197, "grad_norm": 1.6660330010139985, "learning_rate": 5.094680457472605e-06, "loss": 0.4543, "step": 28710 }, { "epoch": 0.5447854622709511, "grad_norm": 1.5754490909518593, "learning_rate": 5.091370327198611e-06, "loss": 0.4915, "step": 28720 }, { "epoch": 0.5449751508023825, "grad_norm": 1.513645541435606, "learning_rate": 5.088060156865101e-06, "loss": 0.4416, "step": 28730 }, { "epoch": 0.5451648393338139, "grad_norm": 1.211771702758897, "learning_rate": 5.084749947923354e-06, "loss": 0.4474, "step": 28740 }, { "epoch": 0.5453545278652453, "grad_norm": 1.5017644143353577, "learning_rate": 5.081439701824666e-06, "loss": 0.4574, "step": 28750 }, { "epoch": 0.5455442163966766, "grad_norm": 1.5926005767738478, "learning_rate": 5.0781294200203476e-06, "loss": 0.4413, "step": 28760 }, { "epoch": 0.545733904928108, "grad_norm": 1.4333264222609, "learning_rate": 5.074819103961727e-06, "loss": 0.4401, "step": 28770 }, { "epoch": 0.5459235934595394, "grad_norm": 1.4240978702138496, "learning_rate": 5.071508755100148e-06, "loss": 0.4686, "step": 28780 }, { "epoch": 0.5461132819909709, "grad_norm": 1.4701985428823339, "learning_rate": 5.068198374886967e-06, "loss": 0.4399, "step": 28790 }, { "epoch": 0.5463029705224022, "grad_norm": 1.0975313685874615, "learning_rate": 5.064887964773554e-06, "loss": 0.4362, "step": 28800 }, { "epoch": 0.5464926590538336, "grad_norm": 1.471400315936144, "learning_rate": 5.061577526211296e-06, "loss": 0.441, "step": 28810 }, { "epoch": 0.546682347585265, "grad_norm": 1.166887127802254, "learning_rate": 5.058267060651584e-06, "loss": 0.4628, "step": 28820 }, { "epoch": 0.5468720361166964, "grad_norm": 1.2776411897721276, "learning_rate": 5.054956569545832e-06, "loss": 0.4226, "step": 28830 }, { "epoch": 0.5470617246481277, "grad_norm": 1.3014254679808448, "learning_rate": 5.051646054345455e-06, "loss": 0.4602, "step": 28840 }, { "epoch": 0.5472514131795592, "grad_norm": 1.6884566284914633, "learning_rate": 5.048335516501886e-06, "loss": 0.4643, "step": 28850 }, { "epoch": 0.5474411017109906, "grad_norm": 1.4256588101328396, "learning_rate": 5.045024957466563e-06, "loss": 0.434, "step": 28860 }, { "epoch": 0.5476307902424219, "grad_norm": 1.5324658193858165, "learning_rate": 5.041714378690937e-06, "loss": 0.4431, "step": 28870 }, { "epoch": 0.5478204787738533, "grad_norm": 1.4116739758534118, "learning_rate": 5.038403781626466e-06, "loss": 0.4487, "step": 28880 }, { "epoch": 0.5480101673052847, "grad_norm": 1.395165861000514, "learning_rate": 5.0350931677246136e-06, "loss": 0.4383, "step": 28890 }, { "epoch": 0.5481998558367162, "grad_norm": 1.5801251735134334, "learning_rate": 5.031782538436855e-06, "loss": 0.4639, "step": 28900 }, { "epoch": 0.5483895443681475, "grad_norm": 1.1807301564294204, "learning_rate": 5.028471895214671e-06, "loss": 0.4281, "step": 28910 }, { "epoch": 0.5485792328995789, "grad_norm": 1.7766749146519993, "learning_rate": 5.025161239509545e-06, "loss": 0.4537, "step": 28920 }, { "epoch": 0.5487689214310103, "grad_norm": 1.4896289729447199, "learning_rate": 5.021850572772972e-06, "loss": 0.4505, "step": 28930 }, { "epoch": 0.5489586099624417, "grad_norm": 1.4683338327212239, "learning_rate": 5.018539896456443e-06, "loss": 0.4537, "step": 28940 }, { "epoch": 0.549148298493873, "grad_norm": 1.4098849727414922, "learning_rate": 5.015229212011466e-06, "loss": 0.4678, "step": 28950 }, { "epoch": 0.5493379870253045, "grad_norm": 1.4881920581323438, "learning_rate": 5.011918520889542e-06, "loss": 0.4754, "step": 28960 }, { "epoch": 0.5495276755567359, "grad_norm": 1.4087486209109668, "learning_rate": 5.008607824542176e-06, "loss": 0.4521, "step": 28970 }, { "epoch": 0.5497173640881672, "grad_norm": 1.5050397003197478, "learning_rate": 5.005297124420879e-06, "loss": 0.4599, "step": 28980 }, { "epoch": 0.5499070526195986, "grad_norm": 7.172051590006274, "learning_rate": 5.001986421977163e-06, "loss": 0.4295, "step": 28990 }, { "epoch": 0.55009674115103, "grad_norm": 1.395494760612361, "learning_rate": 4.998675718662539e-06, "loss": 0.4548, "step": 29000 }, { "epoch": 0.5502864296824614, "grad_norm": 1.4883157783892844, "learning_rate": 4.99536501592852e-06, "loss": 0.4501, "step": 29010 }, { "epoch": 0.5504761182138928, "grad_norm": 1.3908674340925944, "learning_rate": 4.99205431522662e-06, "loss": 0.442, "step": 29020 }, { "epoch": 0.5506658067453242, "grad_norm": 1.658961012969458, "learning_rate": 4.988743618008347e-06, "loss": 0.4689, "step": 29030 }, { "epoch": 0.5508554952767556, "grad_norm": 1.5396795332276816, "learning_rate": 4.9854329257252115e-06, "loss": 0.4739, "step": 29040 }, { "epoch": 0.551045183808187, "grad_norm": 1.3988007152110031, "learning_rate": 4.982122239828724e-06, "loss": 0.4303, "step": 29050 }, { "epoch": 0.5512348723396183, "grad_norm": 1.25881083169418, "learning_rate": 4.978811561770388e-06, "loss": 0.4489, "step": 29060 }, { "epoch": 0.5514245608710497, "grad_norm": 1.634260914588607, "learning_rate": 4.975500893001704e-06, "loss": 0.4714, "step": 29070 }, { "epoch": 0.5516142494024812, "grad_norm": 1.5557028089778397, "learning_rate": 4.972190234974168e-06, "loss": 0.4613, "step": 29080 }, { "epoch": 0.5518039379339125, "grad_norm": 1.3489153139227275, "learning_rate": 4.968879589139276e-06, "loss": 0.4453, "step": 29090 }, { "epoch": 0.5519936264653439, "grad_norm": 1.394867854739491, "learning_rate": 4.965568956948514e-06, "loss": 0.4356, "step": 29100 }, { "epoch": 0.5521833149967753, "grad_norm": 1.5006155234158172, "learning_rate": 4.962258339853363e-06, "loss": 0.4435, "step": 29110 }, { "epoch": 0.5523730035282067, "grad_norm": 1.4890847853998044, "learning_rate": 4.958947739305297e-06, "loss": 0.4705, "step": 29120 }, { "epoch": 0.552562692059638, "grad_norm": 1.702641847891812, "learning_rate": 4.955637156755786e-06, "loss": 0.4696, "step": 29130 }, { "epoch": 0.5527523805910695, "grad_norm": 1.2184642278535172, "learning_rate": 4.952326593656286e-06, "loss": 0.4515, "step": 29140 }, { "epoch": 0.5529420691225009, "grad_norm": 1.4414456725910163, "learning_rate": 4.949016051458251e-06, "loss": 0.4879, "step": 29150 }, { "epoch": 0.5531317576539322, "grad_norm": 1.323774191550401, "learning_rate": 4.945705531613122e-06, "loss": 0.452, "step": 29160 }, { "epoch": 0.5533214461853636, "grad_norm": 1.21208120733551, "learning_rate": 4.942395035572329e-06, "loss": 0.432, "step": 29170 }, { "epoch": 0.553511134716795, "grad_norm": 1.5180510598881718, "learning_rate": 4.939084564787298e-06, "loss": 0.4531, "step": 29180 }, { "epoch": 0.5537008232482264, "grad_norm": 1.4224711715820468, "learning_rate": 4.935774120709435e-06, "loss": 0.4478, "step": 29190 }, { "epoch": 0.5538905117796578, "grad_norm": 1.2128003528900297, "learning_rate": 4.932463704790144e-06, "loss": 0.4592, "step": 29200 }, { "epoch": 0.5540802003110892, "grad_norm": 1.223326455095073, "learning_rate": 4.929153318480805e-06, "loss": 0.4469, "step": 29210 }, { "epoch": 0.5542698888425206, "grad_norm": 1.1516332257198199, "learning_rate": 4.925842963232796e-06, "loss": 0.4509, "step": 29220 }, { "epoch": 0.554459577373952, "grad_norm": 1.3495514300599172, "learning_rate": 4.9225326404974765e-06, "loss": 0.4208, "step": 29230 }, { "epoch": 0.5546492659053833, "grad_norm": 1.3530885963056127, "learning_rate": 4.919222351726191e-06, "loss": 0.4673, "step": 29240 }, { "epoch": 0.5548389544368147, "grad_norm": 1.4168637252136331, "learning_rate": 4.91591209837027e-06, "loss": 0.4767, "step": 29250 }, { "epoch": 0.5550286429682462, "grad_norm": 1.5046162794433922, "learning_rate": 4.912601881881031e-06, "loss": 0.4729, "step": 29260 }, { "epoch": 0.5552183314996775, "grad_norm": 1.1965634064529254, "learning_rate": 4.909291703709771e-06, "loss": 0.4476, "step": 29270 }, { "epoch": 0.5554080200311089, "grad_norm": 1.1773206045445195, "learning_rate": 4.9059815653077746e-06, "loss": 0.4391, "step": 29280 }, { "epoch": 0.5555977085625403, "grad_norm": 1.3360932497919489, "learning_rate": 4.902671468126302e-06, "loss": 0.4507, "step": 29290 }, { "epoch": 0.5557873970939717, "grad_norm": 1.5858253518545145, "learning_rate": 4.899361413616603e-06, "loss": 0.4821, "step": 29300 }, { "epoch": 0.555977085625403, "grad_norm": 1.3840938631349249, "learning_rate": 4.896051403229908e-06, "loss": 0.4733, "step": 29310 }, { "epoch": 0.5561667741568345, "grad_norm": 1.4631706897718957, "learning_rate": 4.892741438417422e-06, "loss": 0.4449, "step": 29320 }, { "epoch": 0.5563564626882659, "grad_norm": 1.5654359401263425, "learning_rate": 4.889431520630333e-06, "loss": 0.4906, "step": 29330 }, { "epoch": 0.5565461512196973, "grad_norm": 1.5424998214924959, "learning_rate": 4.8861216513198155e-06, "loss": 0.459, "step": 29340 }, { "epoch": 0.5567358397511286, "grad_norm": 1.3113762571309637, "learning_rate": 4.882811831937008e-06, "loss": 0.441, "step": 29350 }, { "epoch": 0.55692552828256, "grad_norm": 2.0482394702502775, "learning_rate": 4.8795020639330415e-06, "loss": 0.4706, "step": 29360 }, { "epoch": 0.5571152168139915, "grad_norm": 1.5620690937923982, "learning_rate": 4.876192348759016e-06, "loss": 0.4797, "step": 29370 }, { "epoch": 0.5573049053454228, "grad_norm": 1.4810007069206654, "learning_rate": 4.872882687866009e-06, "loss": 0.4671, "step": 29380 }, { "epoch": 0.5574945938768542, "grad_norm": 1.2672320984016567, "learning_rate": 4.8695730827050816e-06, "loss": 0.4122, "step": 29390 }, { "epoch": 0.5576842824082856, "grad_norm": 1.4098981010084075, "learning_rate": 4.8662635347272595e-06, "loss": 0.4596, "step": 29400 }, { "epoch": 0.557873970939717, "grad_norm": 1.3038176567639808, "learning_rate": 4.862954045383551e-06, "loss": 0.4733, "step": 29410 }, { "epoch": 0.5580636594711483, "grad_norm": 1.2282633194371368, "learning_rate": 4.859644616124936e-06, "loss": 0.4494, "step": 29420 }, { "epoch": 0.5582533480025798, "grad_norm": 1.2890868231126054, "learning_rate": 4.8563352484023664e-06, "loss": 0.4492, "step": 29430 }, { "epoch": 0.5584430365340112, "grad_norm": 1.4824916711243945, "learning_rate": 4.853025943666774e-06, "loss": 0.4707, "step": 29440 }, { "epoch": 0.5586327250654426, "grad_norm": 1.3692228635806951, "learning_rate": 4.849716703369054e-06, "loss": 0.4341, "step": 29450 }, { "epoch": 0.5588224135968739, "grad_norm": 1.2926099371973696, "learning_rate": 4.846407528960078e-06, "loss": 0.4751, "step": 29460 }, { "epoch": 0.5590121021283053, "grad_norm": 1.488586937215835, "learning_rate": 4.843098421890692e-06, "loss": 0.4495, "step": 29470 }, { "epoch": 0.5592017906597367, "grad_norm": 1.458149576137237, "learning_rate": 4.839789383611702e-06, "loss": 0.4565, "step": 29480 }, { "epoch": 0.559391479191168, "grad_norm": 1.166952244154251, "learning_rate": 4.836480415573896e-06, "loss": 0.4654, "step": 29490 }, { "epoch": 0.5595811677225995, "grad_norm": 1.4630920930330296, "learning_rate": 4.833171519228023e-06, "loss": 0.4562, "step": 29500 }, { "epoch": 0.5597708562540309, "grad_norm": 1.427048310853004, "learning_rate": 4.829862696024803e-06, "loss": 0.4537, "step": 29510 }, { "epoch": 0.5599605447854623, "grad_norm": 1.2154399402920582, "learning_rate": 4.826553947414929e-06, "loss": 0.4463, "step": 29520 }, { "epoch": 0.5601502333168936, "grad_norm": 1.4045077902220566, "learning_rate": 4.823245274849049e-06, "loss": 0.4456, "step": 29530 }, { "epoch": 0.560339921848325, "grad_norm": 1.588581517053551, "learning_rate": 4.8199366797777905e-06, "loss": 0.4481, "step": 29540 }, { "epoch": 0.5605296103797565, "grad_norm": 1.5981286753338295, "learning_rate": 4.816628163651741e-06, "loss": 0.454, "step": 29550 }, { "epoch": 0.5607192989111879, "grad_norm": 1.3386345629090943, "learning_rate": 4.813319727921452e-06, "loss": 0.4383, "step": 29560 }, { "epoch": 0.5609089874426192, "grad_norm": 1.3476520742740012, "learning_rate": 4.8100113740374444e-06, "loss": 0.458, "step": 29570 }, { "epoch": 0.5610986759740506, "grad_norm": 1.4300526544748444, "learning_rate": 4.806703103450198e-06, "loss": 0.4663, "step": 29580 }, { "epoch": 0.561288364505482, "grad_norm": 1.6802350170167821, "learning_rate": 4.803394917610161e-06, "loss": 0.439, "step": 29590 }, { "epoch": 0.5614780530369133, "grad_norm": 1.563262920556094, "learning_rate": 4.800086817967742e-06, "loss": 0.4561, "step": 29600 }, { "epoch": 0.5616677415683448, "grad_norm": 1.265427320271987, "learning_rate": 4.796778805973309e-06, "loss": 0.4631, "step": 29610 }, { "epoch": 0.5618574300997762, "grad_norm": 1.354757333364259, "learning_rate": 4.793470883077197e-06, "loss": 0.4305, "step": 29620 }, { "epoch": 0.5620471186312076, "grad_norm": 1.3117767176687478, "learning_rate": 4.790163050729702e-06, "loss": 0.4803, "step": 29630 }, { "epoch": 0.5622368071626389, "grad_norm": 1.5141725255112095, "learning_rate": 4.786855310381072e-06, "loss": 0.4363, "step": 29640 }, { "epoch": 0.5624264956940703, "grad_norm": 1.5199749115798387, "learning_rate": 4.783547663481527e-06, "loss": 0.4626, "step": 29650 }, { "epoch": 0.5626161842255017, "grad_norm": 1.2895811599156621, "learning_rate": 4.780240111481233e-06, "loss": 0.4301, "step": 29660 }, { "epoch": 0.5628058727569332, "grad_norm": 1.4816112498109366, "learning_rate": 4.776932655830325e-06, "loss": 0.4435, "step": 29670 }, { "epoch": 0.5629955612883645, "grad_norm": 1.3010961480463032, "learning_rate": 4.7736252979788915e-06, "loss": 0.4137, "step": 29680 }, { "epoch": 0.5631852498197959, "grad_norm": 1.6701895740536823, "learning_rate": 4.770318039376975e-06, "loss": 0.4535, "step": 29690 }, { "epoch": 0.5633749383512273, "grad_norm": 1.3598698709045824, "learning_rate": 4.767010881474581e-06, "loss": 0.4798, "step": 29700 }, { "epoch": 0.5635646268826586, "grad_norm": 1.3822423806236954, "learning_rate": 4.763703825721668e-06, "loss": 0.4334, "step": 29710 }, { "epoch": 0.56375431541409, "grad_norm": 1.5526947539921832, "learning_rate": 4.7603968735681475e-06, "loss": 0.4607, "step": 29720 }, { "epoch": 0.5639440039455215, "grad_norm": 1.4344468319496242, "learning_rate": 4.757090026463888e-06, "loss": 0.4455, "step": 29730 }, { "epoch": 0.5641336924769529, "grad_norm": 1.5548808808384447, "learning_rate": 4.7537832858587105e-06, "loss": 0.4517, "step": 29740 }, { "epoch": 0.5643233810083842, "grad_norm": 1.4449101358742267, "learning_rate": 4.7504766532023905e-06, "loss": 0.4689, "step": 29750 }, { "epoch": 0.5645130695398156, "grad_norm": 1.215847205529556, "learning_rate": 4.747170129944657e-06, "loss": 0.4516, "step": 29760 }, { "epoch": 0.564702758071247, "grad_norm": 1.465076256827275, "learning_rate": 4.7438637175351885e-06, "loss": 0.4501, "step": 29770 }, { "epoch": 0.5648924466026785, "grad_norm": 1.2861060008606855, "learning_rate": 4.740557417423619e-06, "loss": 0.447, "step": 29780 }, { "epoch": 0.5650821351341098, "grad_norm": 1.3883465286527514, "learning_rate": 4.7372512310595246e-06, "loss": 0.451, "step": 29790 }, { "epoch": 0.5652718236655412, "grad_norm": 1.681213824922198, "learning_rate": 4.733945159892443e-06, "loss": 0.452, "step": 29800 }, { "epoch": 0.5654615121969726, "grad_norm": 1.6332225621994405, "learning_rate": 4.730639205371855e-06, "loss": 0.461, "step": 29810 }, { "epoch": 0.5656512007284039, "grad_norm": 1.4490943985042133, "learning_rate": 4.727333368947187e-06, "loss": 0.4697, "step": 29820 }, { "epoch": 0.5658408892598353, "grad_norm": 1.2532615532559486, "learning_rate": 4.724027652067823e-06, "loss": 0.4232, "step": 29830 }, { "epoch": 0.5660305777912668, "grad_norm": 1.4746534778064933, "learning_rate": 4.720722056183088e-06, "loss": 0.4665, "step": 29840 }, { "epoch": 0.5662202663226982, "grad_norm": 1.4880602159211151, "learning_rate": 4.717416582742253e-06, "loss": 0.4471, "step": 29850 }, { "epoch": 0.5664099548541295, "grad_norm": 1.279550837182813, "learning_rate": 4.71411123319454e-06, "loss": 0.4294, "step": 29860 }, { "epoch": 0.5665996433855609, "grad_norm": 1.559382229516376, "learning_rate": 4.710806008989111e-06, "loss": 0.4447, "step": 29870 }, { "epoch": 0.5667893319169923, "grad_norm": 1.5587007343587966, "learning_rate": 4.707500911575079e-06, "loss": 0.4444, "step": 29880 }, { "epoch": 0.5669790204484237, "grad_norm": 1.2095364385841643, "learning_rate": 4.7041959424015e-06, "loss": 0.4496, "step": 29890 }, { "epoch": 0.567168708979855, "grad_norm": 1.6379653475744682, "learning_rate": 4.70089110291737e-06, "loss": 0.4796, "step": 29900 }, { "epoch": 0.5673583975112865, "grad_norm": 1.3717821749717447, "learning_rate": 4.69758639457163e-06, "loss": 0.4633, "step": 29910 }, { "epoch": 0.5675480860427179, "grad_norm": 1.4362287629080666, "learning_rate": 4.694281818813167e-06, "loss": 0.4713, "step": 29920 }, { "epoch": 0.5677377745741492, "grad_norm": 1.5777854050865439, "learning_rate": 4.690977377090805e-06, "loss": 0.4683, "step": 29930 }, { "epoch": 0.5679274631055806, "grad_norm": 1.3744777504932326, "learning_rate": 4.687673070853313e-06, "loss": 0.4247, "step": 29940 }, { "epoch": 0.568117151637012, "grad_norm": 1.3226672124459586, "learning_rate": 4.684368901549395e-06, "loss": 0.4713, "step": 29950 }, { "epoch": 0.5683068401684435, "grad_norm": 1.3646193965541127, "learning_rate": 4.681064870627702e-06, "loss": 0.4489, "step": 29960 }, { "epoch": 0.5684965286998748, "grad_norm": 1.4827052421759155, "learning_rate": 4.677760979536823e-06, "loss": 0.4675, "step": 29970 }, { "epoch": 0.5686862172313062, "grad_norm": 1.587600537681114, "learning_rate": 4.674457229725279e-06, "loss": 0.4521, "step": 29980 }, { "epoch": 0.5688759057627376, "grad_norm": 1.43335126258364, "learning_rate": 4.671153622641535e-06, "loss": 0.466, "step": 29990 }, { "epoch": 0.5690655942941689, "grad_norm": 1.455787697258167, "learning_rate": 4.667850159733996e-06, "loss": 0.4625, "step": 30000 }, { "epoch": 0.5692552828256003, "grad_norm": 1.4736561865050692, "learning_rate": 4.664546842450998e-06, "loss": 0.4094, "step": 30010 }, { "epoch": 0.5694449713570318, "grad_norm": 1.1414610052280025, "learning_rate": 4.661243672240814e-06, "loss": 0.4327, "step": 30020 }, { "epoch": 0.5696346598884632, "grad_norm": 1.1056458268145504, "learning_rate": 4.6579406505516545e-06, "loss": 0.4312, "step": 30030 }, { "epoch": 0.5698243484198945, "grad_norm": 1.3919516708093127, "learning_rate": 4.6546377788316634e-06, "loss": 0.4775, "step": 30040 }, { "epoch": 0.5700140369513259, "grad_norm": 1.7655484756162343, "learning_rate": 4.651335058528922e-06, "loss": 0.4603, "step": 30050 }, { "epoch": 0.5702037254827573, "grad_norm": 1.267889960075006, "learning_rate": 4.648032491091441e-06, "loss": 0.4431, "step": 30060 }, { "epoch": 0.5703934140141887, "grad_norm": 1.335180560840944, "learning_rate": 4.644730077967165e-06, "loss": 0.4622, "step": 30070 }, { "epoch": 0.5705831025456201, "grad_norm": 1.3463555937835578, "learning_rate": 4.641427820603974e-06, "loss": 0.4279, "step": 30080 }, { "epoch": 0.5707727910770515, "grad_norm": 1.320056262414886, "learning_rate": 4.638125720449677e-06, "loss": 0.4363, "step": 30090 }, { "epoch": 0.5709624796084829, "grad_norm": 1.4177516014442428, "learning_rate": 4.634823778952015e-06, "loss": 0.4641, "step": 30100 }, { "epoch": 0.5711521681399142, "grad_norm": 1.590476849818066, "learning_rate": 4.631521997558657e-06, "loss": 0.4667, "step": 30110 }, { "epoch": 0.5713418566713456, "grad_norm": 1.171297104687076, "learning_rate": 4.6282203777172045e-06, "loss": 0.4415, "step": 30120 }, { "epoch": 0.571531545202777, "grad_norm": 1.5298003626694878, "learning_rate": 4.62491892087519e-06, "loss": 0.4482, "step": 30130 }, { "epoch": 0.5717212337342085, "grad_norm": 1.460618109942677, "learning_rate": 4.621617628480069e-06, "loss": 0.4345, "step": 30140 }, { "epoch": 0.5719109222656398, "grad_norm": 1.2672636810948466, "learning_rate": 4.618316501979231e-06, "loss": 0.4463, "step": 30150 }, { "epoch": 0.5721006107970712, "grad_norm": 1.4808921826707395, "learning_rate": 4.615015542819987e-06, "loss": 0.4654, "step": 30160 }, { "epoch": 0.5722902993285026, "grad_norm": 1.4885929894552214, "learning_rate": 4.611714752449577e-06, "loss": 0.436, "step": 30170 }, { "epoch": 0.572479987859934, "grad_norm": 1.5206197482543573, "learning_rate": 4.6084141323151714e-06, "loss": 0.4566, "step": 30180 }, { "epoch": 0.5726696763913653, "grad_norm": 1.3850374106474113, "learning_rate": 4.6051136838638575e-06, "loss": 0.4452, "step": 30190 }, { "epoch": 0.5728593649227968, "grad_norm": 1.5438289229754796, "learning_rate": 4.6018134085426534e-06, "loss": 0.4966, "step": 30200 }, { "epoch": 0.5730490534542282, "grad_norm": 1.5796987132340155, "learning_rate": 4.598513307798502e-06, "loss": 0.4257, "step": 30210 }, { "epoch": 0.5732387419856595, "grad_norm": 1.358499335846172, "learning_rate": 4.595213383078264e-06, "loss": 0.4379, "step": 30220 }, { "epoch": 0.5734284305170909, "grad_norm": 1.4715948584834795, "learning_rate": 4.591913635828728e-06, "loss": 0.4381, "step": 30230 }, { "epoch": 0.5736181190485223, "grad_norm": 1.4055828821969993, "learning_rate": 4.588614067496602e-06, "loss": 0.4391, "step": 30240 }, { "epoch": 0.5738078075799538, "grad_norm": 1.5383825094543009, "learning_rate": 4.585314679528515e-06, "loss": 0.4369, "step": 30250 }, { "epoch": 0.5739974961113851, "grad_norm": 1.5752707895258313, "learning_rate": 4.582015473371024e-06, "loss": 0.437, "step": 30260 }, { "epoch": 0.5741871846428165, "grad_norm": 1.2991445772321193, "learning_rate": 4.578716450470596e-06, "loss": 0.4502, "step": 30270 }, { "epoch": 0.5743768731742479, "grad_norm": 1.3406407222530996, "learning_rate": 4.575417612273624e-06, "loss": 0.4588, "step": 30280 }, { "epoch": 0.5745665617056793, "grad_norm": 1.6150304116354641, "learning_rate": 4.5721189602264195e-06, "loss": 0.4297, "step": 30290 }, { "epoch": 0.5747562502371106, "grad_norm": 1.6042570128994997, "learning_rate": 4.568820495775208e-06, "loss": 0.447, "step": 30300 }, { "epoch": 0.574945938768542, "grad_norm": 1.421749745876949, "learning_rate": 4.5655222203661405e-06, "loss": 0.455, "step": 30310 }, { "epoch": 0.5751356272999735, "grad_norm": 1.2395505126015924, "learning_rate": 4.562224135445278e-06, "loss": 0.4444, "step": 30320 }, { "epoch": 0.5753253158314048, "grad_norm": 1.5187854733506667, "learning_rate": 4.5589262424585995e-06, "loss": 0.461, "step": 30330 }, { "epoch": 0.5755150043628362, "grad_norm": 1.2853961580096296, "learning_rate": 4.555628542852006e-06, "loss": 0.4639, "step": 30340 }, { "epoch": 0.5757046928942676, "grad_norm": 1.594052301072847, "learning_rate": 4.552331038071304e-06, "loss": 0.4674, "step": 30350 }, { "epoch": 0.575894381425699, "grad_norm": 1.3176899680121121, "learning_rate": 4.5490337295622205e-06, "loss": 0.4461, "step": 30360 }, { "epoch": 0.5760840699571304, "grad_norm": 1.6217000436620828, "learning_rate": 4.545736618770398e-06, "loss": 0.4453, "step": 30370 }, { "epoch": 0.5762737584885618, "grad_norm": 1.2652014794032225, "learning_rate": 4.542439707141385e-06, "loss": 0.4536, "step": 30380 }, { "epoch": 0.5764634470199932, "grad_norm": 1.399443738028146, "learning_rate": 4.539142996120652e-06, "loss": 0.4376, "step": 30390 }, { "epoch": 0.5766531355514246, "grad_norm": 1.5291727685367966, "learning_rate": 4.535846487153574e-06, "loss": 0.4607, "step": 30400 }, { "epoch": 0.5768428240828559, "grad_norm": 1.474347111289582, "learning_rate": 4.532550181685441e-06, "loss": 0.4359, "step": 30410 }, { "epoch": 0.5770325126142873, "grad_norm": 1.335048168646705, "learning_rate": 4.529254081161454e-06, "loss": 0.4425, "step": 30420 }, { "epoch": 0.5772222011457188, "grad_norm": 1.3625791535920537, "learning_rate": 4.525958187026721e-06, "loss": 0.438, "step": 30430 }, { "epoch": 0.5774118896771501, "grad_norm": 1.8288425689611425, "learning_rate": 4.5226625007262645e-06, "loss": 0.4356, "step": 30440 }, { "epoch": 0.5776015782085815, "grad_norm": 1.4231676645039368, "learning_rate": 4.51936702370501e-06, "loss": 0.4508, "step": 30450 }, { "epoch": 0.5777912667400129, "grad_norm": 1.3447317818590117, "learning_rate": 4.516071757407795e-06, "loss": 0.4278, "step": 30460 }, { "epoch": 0.5779809552714443, "grad_norm": 1.9465775955201718, "learning_rate": 4.512776703279369e-06, "loss": 0.4499, "step": 30470 }, { "epoch": 0.5781706438028756, "grad_norm": 1.2290948513840492, "learning_rate": 4.509481862764376e-06, "loss": 0.4207, "step": 30480 }, { "epoch": 0.5783603323343071, "grad_norm": 1.2449989402433457, "learning_rate": 4.5061872373073784e-06, "loss": 0.4733, "step": 30490 }, { "epoch": 0.5785500208657385, "grad_norm": 1.4959213454052878, "learning_rate": 4.502892828352839e-06, "loss": 0.4781, "step": 30500 }, { "epoch": 0.5787397093971699, "grad_norm": 1.4780613775034495, "learning_rate": 4.499598637345124e-06, "loss": 0.4289, "step": 30510 }, { "epoch": 0.5789293979286012, "grad_norm": 1.3569259101056306, "learning_rate": 4.496304665728512e-06, "loss": 0.4348, "step": 30520 }, { "epoch": 0.5791190864600326, "grad_norm": 1.2949992405668855, "learning_rate": 4.493010914947171e-06, "loss": 0.4573, "step": 30530 }, { "epoch": 0.579308774991464, "grad_norm": 1.6082417883725757, "learning_rate": 4.489717386445188e-06, "loss": 0.4454, "step": 30540 }, { "epoch": 0.5794984635228954, "grad_norm": 1.3224949136561928, "learning_rate": 4.486424081666546e-06, "loss": 0.4708, "step": 30550 }, { "epoch": 0.5796881520543268, "grad_norm": 1.2740707684293122, "learning_rate": 4.483131002055123e-06, "loss": 0.4418, "step": 30560 }, { "epoch": 0.5798778405857582, "grad_norm": 1.3231152177462833, "learning_rate": 4.479838149054712e-06, "loss": 0.4584, "step": 30570 }, { "epoch": 0.5800675291171896, "grad_norm": 1.3794382191460508, "learning_rate": 4.476545524108995e-06, "loss": 0.4532, "step": 30580 }, { "epoch": 0.5802572176486209, "grad_norm": 1.474952338070174, "learning_rate": 4.47325312866156e-06, "loss": 0.4469, "step": 30590 }, { "epoch": 0.5804469061800523, "grad_norm": 1.617744567080518, "learning_rate": 4.469960964155894e-06, "loss": 0.4811, "step": 30600 }, { "epoch": 0.5806365947114838, "grad_norm": 1.4397926862131856, "learning_rate": 4.4666690320353774e-06, "loss": 0.4714, "step": 30610 }, { "epoch": 0.5808262832429152, "grad_norm": 1.5623137210006128, "learning_rate": 4.463377333743296e-06, "loss": 0.4885, "step": 30620 }, { "epoch": 0.5810159717743465, "grad_norm": 1.3978550232651206, "learning_rate": 4.4600858707228305e-06, "loss": 0.461, "step": 30630 }, { "epoch": 0.5812056603057779, "grad_norm": 1.3207888169564486, "learning_rate": 4.456794644417055e-06, "loss": 0.4188, "step": 30640 }, { "epoch": 0.5813953488372093, "grad_norm": 1.459957546243944, "learning_rate": 4.453503656268945e-06, "loss": 0.4405, "step": 30650 }, { "epoch": 0.5815850373686406, "grad_norm": 1.294605838761504, "learning_rate": 4.450212907721371e-06, "loss": 0.4662, "step": 30660 }, { "epoch": 0.5817747259000721, "grad_norm": 1.4738840015923715, "learning_rate": 4.4469224002170925e-06, "loss": 0.438, "step": 30670 }, { "epoch": 0.5819644144315035, "grad_norm": 1.3086406936133188, "learning_rate": 4.443632135198771e-06, "loss": 0.4614, "step": 30680 }, { "epoch": 0.5821541029629349, "grad_norm": 1.6271109108118997, "learning_rate": 4.4403421141089544e-06, "loss": 0.4064, "step": 30690 }, { "epoch": 0.5823437914943662, "grad_norm": 1.3351432568584074, "learning_rate": 4.437052338390091e-06, "loss": 0.4639, "step": 30700 }, { "epoch": 0.5825334800257976, "grad_norm": 1.395405192973698, "learning_rate": 4.433762809484517e-06, "loss": 0.4411, "step": 30710 }, { "epoch": 0.582723168557229, "grad_norm": 1.30875668964769, "learning_rate": 4.430473528834462e-06, "loss": 0.4221, "step": 30720 }, { "epoch": 0.5829128570886605, "grad_norm": 1.3881149114449534, "learning_rate": 4.4271844978820465e-06, "loss": 0.4424, "step": 30730 }, { "epoch": 0.5831025456200918, "grad_norm": 1.3219927305215184, "learning_rate": 4.423895718069276e-06, "loss": 0.4338, "step": 30740 }, { "epoch": 0.5832922341515232, "grad_norm": 1.4813677033400394, "learning_rate": 4.420607190838058e-06, "loss": 0.4454, "step": 30750 }, { "epoch": 0.5834819226829546, "grad_norm": 1.2880501549429504, "learning_rate": 4.4173189176301814e-06, "loss": 0.4536, "step": 30760 }, { "epoch": 0.5836716112143859, "grad_norm": 1.397576998032133, "learning_rate": 4.4140308998873195e-06, "loss": 0.4459, "step": 30770 }, { "epoch": 0.5838612997458174, "grad_norm": 1.4062764964987777, "learning_rate": 4.410743139051044e-06, "loss": 0.4481, "step": 30780 }, { "epoch": 0.5840509882772488, "grad_norm": 1.6434966896638084, "learning_rate": 4.407455636562807e-06, "loss": 0.4319, "step": 30790 }, { "epoch": 0.5842406768086802, "grad_norm": 1.5891418406277016, "learning_rate": 4.404168393863948e-06, "loss": 0.4749, "step": 30800 }, { "epoch": 0.5844303653401115, "grad_norm": 1.561477682261398, "learning_rate": 4.400881412395697e-06, "loss": 0.4558, "step": 30810 }, { "epoch": 0.5846200538715429, "grad_norm": 1.8594841809290246, "learning_rate": 4.39759469359916e-06, "loss": 0.441, "step": 30820 }, { "epoch": 0.5848097424029743, "grad_norm": 1.490495355352727, "learning_rate": 4.394308238915339e-06, "loss": 0.4286, "step": 30830 }, { "epoch": 0.5849994309344057, "grad_norm": 1.6714362683245718, "learning_rate": 4.391022049785116e-06, "loss": 0.4436, "step": 30840 }, { "epoch": 0.5851891194658371, "grad_norm": 1.4595269510105553, "learning_rate": 4.387736127649252e-06, "loss": 0.4522, "step": 30850 }, { "epoch": 0.5853788079972685, "grad_norm": 1.5412591539224896, "learning_rate": 4.384450473948396e-06, "loss": 0.4527, "step": 30860 }, { "epoch": 0.5855684965286999, "grad_norm": 1.5102928735128707, "learning_rate": 4.3811650901230805e-06, "loss": 0.4508, "step": 30870 }, { "epoch": 0.5857581850601312, "grad_norm": 1.645337841446648, "learning_rate": 4.377879977613714e-06, "loss": 0.4551, "step": 30880 }, { "epoch": 0.5859478735915626, "grad_norm": 1.3835002758935566, "learning_rate": 4.374595137860591e-06, "loss": 0.4535, "step": 30890 }, { "epoch": 0.5861375621229941, "grad_norm": 1.3848955164886059, "learning_rate": 4.371310572303882e-06, "loss": 0.4087, "step": 30900 }, { "epoch": 0.5863272506544255, "grad_norm": 1.7610316624729434, "learning_rate": 4.368026282383642e-06, "loss": 0.4693, "step": 30910 }, { "epoch": 0.5865169391858568, "grad_norm": 1.242756578390984, "learning_rate": 4.3647422695398054e-06, "loss": 0.4382, "step": 30920 }, { "epoch": 0.5867066277172882, "grad_norm": 1.541898996437468, "learning_rate": 4.361458535212178e-06, "loss": 0.4423, "step": 30930 }, { "epoch": 0.5868963162487196, "grad_norm": 1.1794850506925394, "learning_rate": 4.35817508084045e-06, "loss": 0.4467, "step": 30940 }, { "epoch": 0.5870860047801509, "grad_norm": 1.400926698627615, "learning_rate": 4.3548919078641904e-06, "loss": 0.4434, "step": 30950 }, { "epoch": 0.5872756933115824, "grad_norm": 1.296520672519891, "learning_rate": 4.351609017722838e-06, "loss": 0.4521, "step": 30960 }, { "epoch": 0.5874653818430138, "grad_norm": 1.049761822708201, "learning_rate": 4.348326411855713e-06, "loss": 0.4173, "step": 30970 }, { "epoch": 0.5876550703744452, "grad_norm": 1.291631242805578, "learning_rate": 4.345044091702007e-06, "loss": 0.45, "step": 30980 }, { "epoch": 0.5878447589058765, "grad_norm": 1.6403511987401345, "learning_rate": 4.341762058700789e-06, "loss": 0.454, "step": 30990 }, { "epoch": 0.5880344474373079, "grad_norm": 1.540272569071127, "learning_rate": 4.3384803142910045e-06, "loss": 0.4423, "step": 31000 }, { "epoch": 0.5882241359687393, "grad_norm": 1.6161020438899685, "learning_rate": 4.3351988599114665e-06, "loss": 0.4352, "step": 31010 }, { "epoch": 0.5884138245001708, "grad_norm": 1.3777407802452446, "learning_rate": 4.331917697000863e-06, "loss": 0.4167, "step": 31020 }, { "epoch": 0.5886035130316021, "grad_norm": 1.3869882074260678, "learning_rate": 4.328636826997761e-06, "loss": 0.4649, "step": 31030 }, { "epoch": 0.5887932015630335, "grad_norm": 1.507023688568238, "learning_rate": 4.325356251340586e-06, "loss": 0.4508, "step": 31040 }, { "epoch": 0.5889828900944649, "grad_norm": 1.4254435103655811, "learning_rate": 4.322075971467646e-06, "loss": 0.4266, "step": 31050 }, { "epoch": 0.5891725786258962, "grad_norm": 1.375644416735551, "learning_rate": 4.318795988817113e-06, "loss": 0.459, "step": 31060 }, { "epoch": 0.5893622671573276, "grad_norm": 1.6011837337643895, "learning_rate": 4.3155163048270306e-06, "loss": 0.4564, "step": 31070 }, { "epoch": 0.5895519556887591, "grad_norm": 1.432571929244935, "learning_rate": 4.312236920935314e-06, "loss": 0.4247, "step": 31080 }, { "epoch": 0.5897416442201905, "grad_norm": 1.233428402937021, "learning_rate": 4.308957838579742e-06, "loss": 0.4332, "step": 31090 }, { "epoch": 0.5899313327516218, "grad_norm": 1.696631624822855, "learning_rate": 4.305679059197964e-06, "loss": 0.4805, "step": 31100 }, { "epoch": 0.5901210212830532, "grad_norm": 1.4453044910347175, "learning_rate": 4.302400584227496e-06, "loss": 0.4407, "step": 31110 }, { "epoch": 0.5903107098144846, "grad_norm": 1.2720220188363094, "learning_rate": 4.29912241510572e-06, "loss": 0.4224, "step": 31120 }, { "epoch": 0.590500398345916, "grad_norm": 1.3100410408155476, "learning_rate": 4.2958445532698865e-06, "loss": 0.4441, "step": 31130 }, { "epoch": 0.5906900868773474, "grad_norm": 1.6075745554059848, "learning_rate": 4.2925670001571084e-06, "loss": 0.4451, "step": 31140 }, { "epoch": 0.5908797754087788, "grad_norm": 1.5072699116139172, "learning_rate": 4.289289757204363e-06, "loss": 0.4663, "step": 31150 }, { "epoch": 0.5910694639402102, "grad_norm": 1.668480238633598, "learning_rate": 4.286012825848495e-06, "loss": 0.4376, "step": 31160 }, { "epoch": 0.5912591524716415, "grad_norm": 1.4552912048094233, "learning_rate": 4.282736207526208e-06, "loss": 0.4618, "step": 31170 }, { "epoch": 0.5914488410030729, "grad_norm": 1.3510725880062455, "learning_rate": 4.2794599036740716e-06, "loss": 0.4594, "step": 31180 }, { "epoch": 0.5916385295345044, "grad_norm": 2.3562416176528176, "learning_rate": 4.276183915728517e-06, "loss": 0.4565, "step": 31190 }, { "epoch": 0.5918282180659358, "grad_norm": 1.3312792836256169, "learning_rate": 4.272908245125835e-06, "loss": 0.4484, "step": 31200 }, { "epoch": 0.5920179065973671, "grad_norm": 1.3117664230866295, "learning_rate": 4.269632893302182e-06, "loss": 0.445, "step": 31210 }, { "epoch": 0.5922075951287985, "grad_norm": 1.3282090681253726, "learning_rate": 4.2663578616935665e-06, "loss": 0.4632, "step": 31220 }, { "epoch": 0.5923972836602299, "grad_norm": 1.4313073811155332, "learning_rate": 4.263083151735864e-06, "loss": 0.4452, "step": 31230 }, { "epoch": 0.5925869721916613, "grad_norm": 1.299863141323395, "learning_rate": 4.259808764864809e-06, "loss": 0.431, "step": 31240 }, { "epoch": 0.5927766607230927, "grad_norm": 1.4687629988229964, "learning_rate": 4.256534702515987e-06, "loss": 0.4335, "step": 31250 }, { "epoch": 0.5929663492545241, "grad_norm": 1.4333743218973505, "learning_rate": 4.25326096612485e-06, "loss": 0.435, "step": 31260 }, { "epoch": 0.5931560377859555, "grad_norm": 1.9133548463093055, "learning_rate": 4.2499875571267e-06, "loss": 0.4736, "step": 31270 }, { "epoch": 0.5933457263173868, "grad_norm": 1.458636186454773, "learning_rate": 4.2467144769567e-06, "loss": 0.4459, "step": 31280 }, { "epoch": 0.5935354148488182, "grad_norm": 1.5000556492629988, "learning_rate": 4.243441727049868e-06, "loss": 0.4603, "step": 31290 }, { "epoch": 0.5937251033802496, "grad_norm": 1.4749527506436257, "learning_rate": 4.240169308841075e-06, "loss": 0.4353, "step": 31300 }, { "epoch": 0.5939147919116811, "grad_norm": 1.281012394205957, "learning_rate": 4.2368972237650484e-06, "loss": 0.4387, "step": 31310 }, { "epoch": 0.5941044804431124, "grad_norm": 1.5730008678680483, "learning_rate": 4.233625473256372e-06, "loss": 0.4375, "step": 31320 }, { "epoch": 0.5942941689745438, "grad_norm": 1.5052368934732183, "learning_rate": 4.230354058749476e-06, "loss": 0.4596, "step": 31330 }, { "epoch": 0.5944838575059752, "grad_norm": 1.1090889132494366, "learning_rate": 4.227082981678653e-06, "loss": 0.4329, "step": 31340 }, { "epoch": 0.5946735460374066, "grad_norm": 1.4496699071429289, "learning_rate": 4.2238122434780355e-06, "loss": 0.4421, "step": 31350 }, { "epoch": 0.5948632345688379, "grad_norm": 1.6292198288546171, "learning_rate": 4.220541845581618e-06, "loss": 0.4656, "step": 31360 }, { "epoch": 0.5950529231002694, "grad_norm": 1.6189939872473735, "learning_rate": 4.217271789423243e-06, "loss": 0.463, "step": 31370 }, { "epoch": 0.5952426116317008, "grad_norm": 1.6298480694223487, "learning_rate": 4.214002076436598e-06, "loss": 0.4624, "step": 31380 }, { "epoch": 0.5954323001631321, "grad_norm": 1.4473164299985586, "learning_rate": 4.21073270805523e-06, "loss": 0.4271, "step": 31390 }, { "epoch": 0.5956219886945635, "grad_norm": 1.5893089770295767, "learning_rate": 4.207463685712523e-06, "loss": 0.4729, "step": 31400 }, { "epoch": 0.5958116772259949, "grad_norm": 1.551039113912539, "learning_rate": 4.204195010841718e-06, "loss": 0.4587, "step": 31410 }, { "epoch": 0.5960013657574263, "grad_norm": 1.3046746759105778, "learning_rate": 4.200926684875902e-06, "loss": 0.4255, "step": 31420 }, { "epoch": 0.5961910542888577, "grad_norm": 1.4993007571892907, "learning_rate": 4.197658709248005e-06, "loss": 0.4465, "step": 31430 }, { "epoch": 0.5963807428202891, "grad_norm": 1.1046672736250833, "learning_rate": 4.194391085390809e-06, "loss": 0.4308, "step": 31440 }, { "epoch": 0.5965704313517205, "grad_norm": 1.4996046676629318, "learning_rate": 4.19112381473694e-06, "loss": 0.4453, "step": 31450 }, { "epoch": 0.5967601198831519, "grad_norm": 1.6375408890637755, "learning_rate": 4.187856898718865e-06, "loss": 0.4521, "step": 31460 }, { "epoch": 0.5969498084145832, "grad_norm": 1.2336447174732965, "learning_rate": 4.184590338768903e-06, "loss": 0.461, "step": 31470 }, { "epoch": 0.5971394969460146, "grad_norm": 1.4372233347206822, "learning_rate": 4.181324136319206e-06, "loss": 0.3975, "step": 31480 }, { "epoch": 0.5973291854774461, "grad_norm": 1.4313721616333324, "learning_rate": 4.1780582928017825e-06, "loss": 0.4351, "step": 31490 }, { "epoch": 0.5975188740088774, "grad_norm": 1.526778565768069, "learning_rate": 4.1747928096484765e-06, "loss": 0.4317, "step": 31500 }, { "epoch": 0.5977085625403088, "grad_norm": 1.2872009386737735, "learning_rate": 4.17152768829097e-06, "loss": 0.4166, "step": 31510 }, { "epoch": 0.5978982510717402, "grad_norm": 1.2387190414899494, "learning_rate": 4.168262930160796e-06, "loss": 0.4409, "step": 31520 }, { "epoch": 0.5980879396031716, "grad_norm": 1.4654582292752247, "learning_rate": 4.164998536689321e-06, "loss": 0.4521, "step": 31530 }, { "epoch": 0.598277628134603, "grad_norm": 1.5232044633739215, "learning_rate": 4.161734509307754e-06, "loss": 0.4274, "step": 31540 }, { "epoch": 0.5984673166660344, "grad_norm": 1.6257379021859515, "learning_rate": 4.158470849447145e-06, "loss": 0.4601, "step": 31550 }, { "epoch": 0.5986570051974658, "grad_norm": 1.671936155345529, "learning_rate": 4.155207558538377e-06, "loss": 0.4732, "step": 31560 }, { "epoch": 0.5988466937288972, "grad_norm": 1.3379641480796793, "learning_rate": 4.15194463801218e-06, "loss": 0.4431, "step": 31570 }, { "epoch": 0.5990363822603285, "grad_norm": 1.80060771475653, "learning_rate": 4.148682089299116e-06, "loss": 0.4535, "step": 31580 }, { "epoch": 0.5992260707917599, "grad_norm": 1.2599847150362744, "learning_rate": 4.145419913829584e-06, "loss": 0.4359, "step": 31590 }, { "epoch": 0.5994157593231914, "grad_norm": 1.3350434805674913, "learning_rate": 4.1421581130338185e-06, "loss": 0.4508, "step": 31600 }, { "epoch": 0.5996054478546227, "grad_norm": 1.7106819428517053, "learning_rate": 4.1388966883418965e-06, "loss": 0.4376, "step": 31610 }, { "epoch": 0.5997951363860541, "grad_norm": 1.3817910883806988, "learning_rate": 4.135635641183723e-06, "loss": 0.4815, "step": 31620 }, { "epoch": 0.5999848249174855, "grad_norm": 1.2710545869320533, "learning_rate": 4.13237497298904e-06, "loss": 0.4303, "step": 31630 }, { "epoch": 0.6001745134489169, "grad_norm": 1.3598856046495411, "learning_rate": 4.129114685187419e-06, "loss": 0.468, "step": 31640 }, { "epoch": 0.6003642019803482, "grad_norm": 1.418812323002581, "learning_rate": 4.125854779208275e-06, "loss": 0.4482, "step": 31650 }, { "epoch": 0.6005538905117797, "grad_norm": 1.3619694371981526, "learning_rate": 4.122595256480849e-06, "loss": 0.4674, "step": 31660 }, { "epoch": 0.6007435790432111, "grad_norm": 1.2962940330472505, "learning_rate": 4.11933611843421e-06, "loss": 0.4213, "step": 31670 }, { "epoch": 0.6009332675746424, "grad_norm": 1.420395711836784, "learning_rate": 4.116077366497266e-06, "loss": 0.4422, "step": 31680 }, { "epoch": 0.6011229561060738, "grad_norm": 1.3372416878146745, "learning_rate": 4.112819002098749e-06, "loss": 0.4527, "step": 31690 }, { "epoch": 0.6013126446375052, "grad_norm": 1.7849585981660552, "learning_rate": 4.109561026667229e-06, "loss": 0.4403, "step": 31700 }, { "epoch": 0.6015023331689366, "grad_norm": 1.5214306469374785, "learning_rate": 4.1063034416311e-06, "loss": 0.4338, "step": 31710 }, { "epoch": 0.601692021700368, "grad_norm": 1.402703657754914, "learning_rate": 4.103046248418584e-06, "loss": 0.4805, "step": 31720 }, { "epoch": 0.6018817102317994, "grad_norm": 1.1993375211003288, "learning_rate": 4.099789448457732e-06, "loss": 0.4473, "step": 31730 }, { "epoch": 0.6020713987632308, "grad_norm": 1.3919441017388716, "learning_rate": 4.096533043176429e-06, "loss": 0.4446, "step": 31740 }, { "epoch": 0.6022610872946622, "grad_norm": 1.5349640495691945, "learning_rate": 4.0932770340023765e-06, "loss": 0.464, "step": 31750 }, { "epoch": 0.6024507758260935, "grad_norm": 1.384167358050999, "learning_rate": 4.090021422363111e-06, "loss": 0.4391, "step": 31760 }, { "epoch": 0.6026404643575249, "grad_norm": 1.118979021746307, "learning_rate": 4.0867662096859875e-06, "loss": 0.4367, "step": 31770 }, { "epoch": 0.6028301528889564, "grad_norm": 1.3756808764384953, "learning_rate": 4.083511397398191e-06, "loss": 0.4503, "step": 31780 }, { "epoch": 0.6030198414203877, "grad_norm": 1.2754347401108037, "learning_rate": 4.080256986926731e-06, "loss": 0.4105, "step": 31790 }, { "epoch": 0.6032095299518191, "grad_norm": 1.5444940046752003, "learning_rate": 4.07700297969844e-06, "loss": 0.4505, "step": 31800 }, { "epoch": 0.6033992184832505, "grad_norm": 1.4794076925236688, "learning_rate": 4.073749377139969e-06, "loss": 0.424, "step": 31810 }, { "epoch": 0.6035889070146819, "grad_norm": 1.2512862789349724, "learning_rate": 4.070496180677802e-06, "loss": 0.4524, "step": 31820 }, { "epoch": 0.6037785955461132, "grad_norm": 1.3998156617376172, "learning_rate": 4.067243391738235e-06, "loss": 0.4369, "step": 31830 }, { "epoch": 0.6039682840775447, "grad_norm": 1.571012141893095, "learning_rate": 4.063991011747391e-06, "loss": 0.4456, "step": 31840 }, { "epoch": 0.6041579726089761, "grad_norm": 1.5154412641578385, "learning_rate": 4.060739042131208e-06, "loss": 0.452, "step": 31850 }, { "epoch": 0.6043476611404075, "grad_norm": 1.388538180855185, "learning_rate": 4.05748748431545e-06, "loss": 0.4387, "step": 31860 }, { "epoch": 0.6045373496718388, "grad_norm": 1.272840878583925, "learning_rate": 4.054236339725701e-06, "loss": 0.4247, "step": 31870 }, { "epoch": 0.6047270382032702, "grad_norm": 1.3724429411357217, "learning_rate": 4.050985609787358e-06, "loss": 0.4365, "step": 31880 }, { "epoch": 0.6049167267347016, "grad_norm": 1.3956666167482537, "learning_rate": 4.0477352959256385e-06, "loss": 0.4289, "step": 31890 }, { "epoch": 0.605106415266133, "grad_norm": 1.6979675354894979, "learning_rate": 4.044485399565585e-06, "loss": 0.4507, "step": 31900 }, { "epoch": 0.6052961037975644, "grad_norm": 1.4834383105502942, "learning_rate": 4.041235922132041e-06, "loss": 0.4248, "step": 31910 }, { "epoch": 0.6054857923289958, "grad_norm": 1.4130006324425328, "learning_rate": 4.037986865049684e-06, "loss": 0.4657, "step": 31920 }, { "epoch": 0.6056754808604272, "grad_norm": 1.3356026953626041, "learning_rate": 4.034738229742992e-06, "loss": 0.4503, "step": 31930 }, { "epoch": 0.6058651693918585, "grad_norm": 1.4352084261658697, "learning_rate": 4.031490017636269e-06, "loss": 0.429, "step": 31940 }, { "epoch": 0.60605485792329, "grad_norm": 1.3685214408713144, "learning_rate": 4.0282422301536305e-06, "loss": 0.4458, "step": 31950 }, { "epoch": 0.6062445464547214, "grad_norm": 1.359772699088533, "learning_rate": 4.024994868719002e-06, "loss": 0.4617, "step": 31960 }, { "epoch": 0.6064342349861528, "grad_norm": 1.5369949662991627, "learning_rate": 4.021747934756126e-06, "loss": 0.4489, "step": 31970 }, { "epoch": 0.6066239235175841, "grad_norm": 1.4234992789232614, "learning_rate": 4.018501429688558e-06, "loss": 0.4412, "step": 31980 }, { "epoch": 0.6068136120490155, "grad_norm": 1.6439193094722697, "learning_rate": 4.015255354939661e-06, "loss": 0.4274, "step": 31990 }, { "epoch": 0.6070033005804469, "grad_norm": 1.4079222364879942, "learning_rate": 4.0120097119326155e-06, "loss": 0.4643, "step": 32000 }, { "epoch": 0.6071929891118782, "grad_norm": 1.4899735000401901, "learning_rate": 4.008764502090408e-06, "loss": 0.4339, "step": 32010 }, { "epoch": 0.6073826776433097, "grad_norm": 1.3784733205180142, "learning_rate": 4.005519726835836e-06, "loss": 0.4418, "step": 32020 }, { "epoch": 0.6075723661747411, "grad_norm": 1.5789569593372095, "learning_rate": 4.002275387591511e-06, "loss": 0.4612, "step": 32030 }, { "epoch": 0.6077620547061725, "grad_norm": 1.33037110091202, "learning_rate": 3.999031485779843e-06, "loss": 0.4383, "step": 32040 }, { "epoch": 0.6079517432376038, "grad_norm": 1.5846969371179942, "learning_rate": 3.9957880228230635e-06, "loss": 0.441, "step": 32050 }, { "epoch": 0.6081414317690352, "grad_norm": 1.233288812488895, "learning_rate": 3.9925450001431995e-06, "loss": 0.4378, "step": 32060 }, { "epoch": 0.6083311203004667, "grad_norm": 1.3665672198814456, "learning_rate": 3.989302419162092e-06, "loss": 0.4475, "step": 32070 }, { "epoch": 0.6085208088318981, "grad_norm": 1.422280213880445, "learning_rate": 3.9860602813013905e-06, "loss": 0.4602, "step": 32080 }, { "epoch": 0.6087104973633294, "grad_norm": 1.418220358139045, "learning_rate": 3.9828185879825396e-06, "loss": 0.4568, "step": 32090 }, { "epoch": 0.6089001858947608, "grad_norm": 1.3978382602222674, "learning_rate": 3.9795773406268e-06, "loss": 0.4646, "step": 32100 }, { "epoch": 0.6090898744261922, "grad_norm": 1.2881372649160048, "learning_rate": 3.976336540655232e-06, "loss": 0.4682, "step": 32110 }, { "epoch": 0.6092795629576235, "grad_norm": 1.706968397169209, "learning_rate": 3.973096189488699e-06, "loss": 0.4215, "step": 32120 }, { "epoch": 0.609469251489055, "grad_norm": 1.3582909760220336, "learning_rate": 3.969856288547872e-06, "loss": 0.427, "step": 32130 }, { "epoch": 0.6096589400204864, "grad_norm": 1.3989738863205674, "learning_rate": 3.966616839253218e-06, "loss": 0.4129, "step": 32140 }, { "epoch": 0.6098486285519178, "grad_norm": 1.4923803608122723, "learning_rate": 3.963377843025011e-06, "loss": 0.4448, "step": 32150 }, { "epoch": 0.6100383170833491, "grad_norm": 1.1537971612416988, "learning_rate": 3.960139301283327e-06, "loss": 0.4469, "step": 32160 }, { "epoch": 0.6102280056147805, "grad_norm": 1.3354755390384951, "learning_rate": 3.956901215448037e-06, "loss": 0.4208, "step": 32170 }, { "epoch": 0.6104176941462119, "grad_norm": 1.392999393977368, "learning_rate": 3.953663586938818e-06, "loss": 0.4315, "step": 32180 }, { "epoch": 0.6106073826776434, "grad_norm": 1.2107206526307355, "learning_rate": 3.950426417175146e-06, "loss": 0.4546, "step": 32190 }, { "epoch": 0.6107970712090747, "grad_norm": 1.538175496857217, "learning_rate": 3.947189707576289e-06, "loss": 0.4498, "step": 32200 }, { "epoch": 0.6109867597405061, "grad_norm": 1.4788100419926402, "learning_rate": 3.943953459561326e-06, "loss": 0.4406, "step": 32210 }, { "epoch": 0.6111764482719375, "grad_norm": 1.2320594307781876, "learning_rate": 3.940717674549118e-06, "loss": 0.4106, "step": 32220 }, { "epoch": 0.6113661368033688, "grad_norm": 1.4004168979577305, "learning_rate": 3.9374823539583354e-06, "loss": 0.4407, "step": 32230 }, { "epoch": 0.6115558253348002, "grad_norm": 1.309490205691461, "learning_rate": 3.934247499207442e-06, "loss": 0.4591, "step": 32240 }, { "epoch": 0.6117455138662317, "grad_norm": 1.4574900480962834, "learning_rate": 3.931013111714691e-06, "loss": 0.4641, "step": 32250 }, { "epoch": 0.6119352023976631, "grad_norm": 1.3315674619815285, "learning_rate": 3.927779192898139e-06, "loss": 0.4357, "step": 32260 }, { "epoch": 0.6121248909290944, "grad_norm": 1.3363777686608236, "learning_rate": 3.924545744175636e-06, "loss": 0.4484, "step": 32270 }, { "epoch": 0.6123145794605258, "grad_norm": 1.3252495452257853, "learning_rate": 3.92131276696482e-06, "loss": 0.4431, "step": 32280 }, { "epoch": 0.6125042679919572, "grad_norm": 1.461575849386338, "learning_rate": 3.918080262683129e-06, "loss": 0.5008, "step": 32290 }, { "epoch": 0.6126939565233886, "grad_norm": 1.5237632598399549, "learning_rate": 3.914848232747786e-06, "loss": 0.4167, "step": 32300 }, { "epoch": 0.61288364505482, "grad_norm": 1.599120535790058, "learning_rate": 3.911616678575815e-06, "loss": 0.4396, "step": 32310 }, { "epoch": 0.6130733335862514, "grad_norm": 1.2637342424225309, "learning_rate": 3.908385601584028e-06, "loss": 0.4486, "step": 32320 }, { "epoch": 0.6132630221176828, "grad_norm": 1.4116004558502224, "learning_rate": 3.905155003189022e-06, "loss": 0.4527, "step": 32330 }, { "epoch": 0.6134527106491141, "grad_norm": 1.4684346281108371, "learning_rate": 3.901924884807195e-06, "loss": 0.45, "step": 32340 }, { "epoch": 0.6136423991805455, "grad_norm": 1.4249758293430443, "learning_rate": 3.898695247854723e-06, "loss": 0.4354, "step": 32350 }, { "epoch": 0.613832087711977, "grad_norm": 1.1602011720885597, "learning_rate": 3.895466093747579e-06, "loss": 0.4423, "step": 32360 }, { "epoch": 0.6140217762434084, "grad_norm": 1.1969306053393682, "learning_rate": 3.892237423901525e-06, "loss": 0.465, "step": 32370 }, { "epoch": 0.6142114647748397, "grad_norm": 1.4424972065222226, "learning_rate": 3.889009239732101e-06, "loss": 0.4332, "step": 32380 }, { "epoch": 0.6144011533062711, "grad_norm": 1.626664853949643, "learning_rate": 3.885781542654647e-06, "loss": 0.4542, "step": 32390 }, { "epoch": 0.6145908418377025, "grad_norm": 1.9267992250117443, "learning_rate": 3.882554334084279e-06, "loss": 0.4601, "step": 32400 }, { "epoch": 0.6147805303691339, "grad_norm": 1.4889562725996512, "learning_rate": 3.879327615435905e-06, "loss": 0.4504, "step": 32410 }, { "epoch": 0.6149702189005652, "grad_norm": 1.3765152170803792, "learning_rate": 3.876101388124216e-06, "loss": 0.4638, "step": 32420 }, { "epoch": 0.6151599074319967, "grad_norm": 1.4470392289944722, "learning_rate": 3.872875653563685e-06, "loss": 0.4651, "step": 32430 }, { "epoch": 0.6153495959634281, "grad_norm": 1.0864501457847469, "learning_rate": 3.869650413168574e-06, "loss": 0.439, "step": 32440 }, { "epoch": 0.6155392844948594, "grad_norm": 1.2752681002659598, "learning_rate": 3.866425668352927e-06, "loss": 0.4534, "step": 32450 }, { "epoch": 0.6157289730262908, "grad_norm": 1.364591884181209, "learning_rate": 3.863201420530566e-06, "loss": 0.4234, "step": 32460 }, { "epoch": 0.6159186615577222, "grad_norm": 1.283320463979868, "learning_rate": 3.859977671115102e-06, "loss": 0.4473, "step": 32470 }, { "epoch": 0.6161083500891537, "grad_norm": 1.3463154240393116, "learning_rate": 3.856754421519925e-06, "loss": 0.4358, "step": 32480 }, { "epoch": 0.616298038620585, "grad_norm": 1.2905362680154486, "learning_rate": 3.853531673158202e-06, "loss": 0.4323, "step": 32490 }, { "epoch": 0.6164877271520164, "grad_norm": 1.4693924095139783, "learning_rate": 3.850309427442886e-06, "loss": 0.4573, "step": 32500 }, { "epoch": 0.6166774156834478, "grad_norm": 1.2817217406151913, "learning_rate": 3.847087685786704e-06, "loss": 0.4348, "step": 32510 }, { "epoch": 0.6168671042148791, "grad_norm": 1.5123418454557938, "learning_rate": 3.843866449602168e-06, "loss": 0.4715, "step": 32520 }, { "epoch": 0.6170567927463105, "grad_norm": 1.7995857342287382, "learning_rate": 3.840645720301566e-06, "loss": 0.4604, "step": 32530 }, { "epoch": 0.617246481277742, "grad_norm": 1.4373981892358207, "learning_rate": 3.83742549929696e-06, "loss": 0.4578, "step": 32540 }, { "epoch": 0.6174361698091734, "grad_norm": 1.9366075982113096, "learning_rate": 3.834205788000193e-06, "loss": 0.4579, "step": 32550 }, { "epoch": 0.6176258583406047, "grad_norm": 1.3877079623091546, "learning_rate": 3.8309865878228866e-06, "loss": 0.4494, "step": 32560 }, { "epoch": 0.6178155468720361, "grad_norm": 1.2133916533458857, "learning_rate": 3.827767900176434e-06, "loss": 0.4274, "step": 32570 }, { "epoch": 0.6180052354034675, "grad_norm": 1.2918422335299895, "learning_rate": 3.824549726472005e-06, "loss": 0.4252, "step": 32580 }, { "epoch": 0.6181949239348989, "grad_norm": 1.1798453297565719, "learning_rate": 3.821332068120543e-06, "loss": 0.4281, "step": 32590 }, { "epoch": 0.6183846124663303, "grad_norm": 1.5986939915158727, "learning_rate": 3.818114926532768e-06, "loss": 0.4552, "step": 32600 }, { "epoch": 0.6185743009977617, "grad_norm": 1.3779256232060046, "learning_rate": 3.814898303119174e-06, "loss": 0.4417, "step": 32610 }, { "epoch": 0.6187639895291931, "grad_norm": 1.409585842399665, "learning_rate": 3.8116821992900234e-06, "loss": 0.4309, "step": 32620 }, { "epoch": 0.6189536780606244, "grad_norm": 1.6522731256417507, "learning_rate": 3.808466616455353e-06, "loss": 0.4341, "step": 32630 }, { "epoch": 0.6191433665920558, "grad_norm": 1.4756861080166985, "learning_rate": 3.805251556024976e-06, "loss": 0.4589, "step": 32640 }, { "epoch": 0.6193330551234872, "grad_norm": 1.5153550711697814, "learning_rate": 3.802037019408467e-06, "loss": 0.4425, "step": 32650 }, { "epoch": 0.6195227436549187, "grad_norm": 1.444751780784455, "learning_rate": 3.7988230080151805e-06, "loss": 0.4573, "step": 32660 }, { "epoch": 0.61971243218635, "grad_norm": 1.448934550099616, "learning_rate": 3.7956095232542328e-06, "loss": 0.4548, "step": 32670 }, { "epoch": 0.6199021207177814, "grad_norm": 1.3657606681695882, "learning_rate": 3.792396566534513e-06, "loss": 0.4693, "step": 32680 }, { "epoch": 0.6200918092492128, "grad_norm": 1.280159819145446, "learning_rate": 3.7891841392646824e-06, "loss": 0.4432, "step": 32690 }, { "epoch": 0.6202814977806442, "grad_norm": 1.4853791605661546, "learning_rate": 3.785972242853162e-06, "loss": 0.4452, "step": 32700 }, { "epoch": 0.6204711863120755, "grad_norm": 1.7878610848590932, "learning_rate": 3.7827608787081477e-06, "loss": 0.4532, "step": 32710 }, { "epoch": 0.620660874843507, "grad_norm": 1.4016152264077353, "learning_rate": 3.7795500482375955e-06, "loss": 0.4378, "step": 32720 }, { "epoch": 0.6208505633749384, "grad_norm": 1.6901253716529088, "learning_rate": 3.776339752849232e-06, "loss": 0.4338, "step": 32730 }, { "epoch": 0.6210402519063697, "grad_norm": 1.5418478189566982, "learning_rate": 3.7731299939505495e-06, "loss": 0.4739, "step": 32740 }, { "epoch": 0.6212299404378011, "grad_norm": 1.336877990430374, "learning_rate": 3.769920772948802e-06, "loss": 0.3978, "step": 32750 }, { "epoch": 0.6214196289692325, "grad_norm": 1.5795698005463923, "learning_rate": 3.766712091251007e-06, "loss": 0.4331, "step": 32760 }, { "epoch": 0.621609317500664, "grad_norm": 1.437167175243793, "learning_rate": 3.763503950263953e-06, "loss": 0.4184, "step": 32770 }, { "epoch": 0.6217990060320953, "grad_norm": 1.3494953458943997, "learning_rate": 3.7602963513941806e-06, "loss": 0.4567, "step": 32780 }, { "epoch": 0.6219886945635267, "grad_norm": 1.2886233459275196, "learning_rate": 3.7570892960480015e-06, "loss": 0.4388, "step": 32790 }, { "epoch": 0.6221783830949581, "grad_norm": 1.4700820904659215, "learning_rate": 3.753882785631484e-06, "loss": 0.4422, "step": 32800 }, { "epoch": 0.6223680716263895, "grad_norm": 1.5211486790081907, "learning_rate": 3.7506768215504596e-06, "loss": 0.45, "step": 32810 }, { "epoch": 0.6225577601578208, "grad_norm": 1.160646084322301, "learning_rate": 3.747471405210522e-06, "loss": 0.4376, "step": 32820 }, { "epoch": 0.6227474486892522, "grad_norm": 1.2085459886784948, "learning_rate": 3.7442665380170205e-06, "loss": 0.4324, "step": 32830 }, { "epoch": 0.6229371372206837, "grad_norm": 1.3387363381026989, "learning_rate": 3.741062221375066e-06, "loss": 0.4274, "step": 32840 }, { "epoch": 0.623126825752115, "grad_norm": 1.4436494713950356, "learning_rate": 3.737858456689529e-06, "loss": 0.4409, "step": 32850 }, { "epoch": 0.6233165142835464, "grad_norm": 1.146433644602283, "learning_rate": 3.7346552453650354e-06, "loss": 0.4436, "step": 32860 }, { "epoch": 0.6235062028149778, "grad_norm": 1.4582513503963481, "learning_rate": 3.7314525888059728e-06, "loss": 0.4311, "step": 32870 }, { "epoch": 0.6236958913464092, "grad_norm": 1.5190602430693303, "learning_rate": 3.728250488416479e-06, "loss": 0.4768, "step": 32880 }, { "epoch": 0.6238855798778405, "grad_norm": 1.6367676158863607, "learning_rate": 3.7250489456004522e-06, "loss": 0.4183, "step": 32890 }, { "epoch": 0.624075268409272, "grad_norm": 1.5824717232686119, "learning_rate": 3.721847961761551e-06, "loss": 0.4533, "step": 32900 }, { "epoch": 0.6242649569407034, "grad_norm": 1.302646863155381, "learning_rate": 3.718647538303176e-06, "loss": 0.438, "step": 32910 }, { "epoch": 0.6244546454721348, "grad_norm": 1.4695215058424005, "learning_rate": 3.715447676628494e-06, "loss": 0.4441, "step": 32920 }, { "epoch": 0.6246443340035661, "grad_norm": 1.509529528196352, "learning_rate": 3.7122483781404217e-06, "loss": 0.4457, "step": 32930 }, { "epoch": 0.6248340225349975, "grad_norm": 1.2030509013589654, "learning_rate": 3.7090496442416247e-06, "loss": 0.4391, "step": 32940 }, { "epoch": 0.625023711066429, "grad_norm": 1.7390811014193324, "learning_rate": 3.7058514763345294e-06, "loss": 0.4542, "step": 32950 }, { "epoch": 0.6252133995978603, "grad_norm": 1.532572316787173, "learning_rate": 3.702653875821306e-06, "loss": 0.4383, "step": 32960 }, { "epoch": 0.6254030881292917, "grad_norm": 1.6235359104883549, "learning_rate": 3.69945684410388e-06, "loss": 0.4652, "step": 32970 }, { "epoch": 0.6255927766607231, "grad_norm": 1.6780786369731475, "learning_rate": 3.696260382583929e-06, "loss": 0.4359, "step": 32980 }, { "epoch": 0.6257824651921545, "grad_norm": 1.5638737841047972, "learning_rate": 3.693064492662874e-06, "loss": 0.4451, "step": 32990 }, { "epoch": 0.6259721537235858, "grad_norm": 1.6606358232812009, "learning_rate": 3.6898691757418947e-06, "loss": 0.4553, "step": 33000 }, { "epoch": 0.6261618422550173, "grad_norm": 1.234996398813695, "learning_rate": 3.6866744332219117e-06, "loss": 0.4146, "step": 33010 }, { "epoch": 0.6263515307864487, "grad_norm": 1.5573763006231158, "learning_rate": 3.6834802665035964e-06, "loss": 0.456, "step": 33020 }, { "epoch": 0.6265412193178801, "grad_norm": 1.5510630826291198, "learning_rate": 3.6802866769873724e-06, "loss": 0.4514, "step": 33030 }, { "epoch": 0.6267309078493114, "grad_norm": 1.1981636012613475, "learning_rate": 3.6770936660734e-06, "loss": 0.4414, "step": 33040 }, { "epoch": 0.6269205963807428, "grad_norm": 1.4540390063887498, "learning_rate": 3.6739012351615965e-06, "loss": 0.4471, "step": 33050 }, { "epoch": 0.6271102849121742, "grad_norm": 1.580694688281262, "learning_rate": 3.6707093856516185e-06, "loss": 0.4439, "step": 33060 }, { "epoch": 0.6272999734436056, "grad_norm": 1.3674400980110129, "learning_rate": 3.667518118942868e-06, "loss": 0.4681, "step": 33070 }, { "epoch": 0.627489661975037, "grad_norm": 1.4203121793665985, "learning_rate": 3.664327436434497e-06, "loss": 0.4566, "step": 33080 }, { "epoch": 0.6276793505064684, "grad_norm": 1.3190748332844078, "learning_rate": 3.6611373395253913e-06, "loss": 0.4396, "step": 33090 }, { "epoch": 0.6278690390378998, "grad_norm": 1.5829896603086842, "learning_rate": 3.6579478296141907e-06, "loss": 0.4475, "step": 33100 }, { "epoch": 0.6280587275693311, "grad_norm": 1.4125184030825528, "learning_rate": 3.654758908099272e-06, "loss": 0.4151, "step": 33110 }, { "epoch": 0.6282484161007625, "grad_norm": 1.3952139185000698, "learning_rate": 3.6515705763787524e-06, "loss": 0.4393, "step": 33120 }, { "epoch": 0.628438104632194, "grad_norm": 1.428468454055854, "learning_rate": 3.648382835850496e-06, "loss": 0.4288, "step": 33130 }, { "epoch": 0.6286277931636254, "grad_norm": 1.5789324422843098, "learning_rate": 3.645195687912104e-06, "loss": 0.4444, "step": 33140 }, { "epoch": 0.6288174816950567, "grad_norm": 2.5587237234269415, "learning_rate": 3.642009133960917e-06, "loss": 0.4445, "step": 33150 }, { "epoch": 0.6290071702264881, "grad_norm": 1.6146390622375935, "learning_rate": 3.638823175394019e-06, "loss": 0.4525, "step": 33160 }, { "epoch": 0.6291968587579195, "grad_norm": 1.5032875455915273, "learning_rate": 3.635637813608227e-06, "loss": 0.4685, "step": 33170 }, { "epoch": 0.6293865472893508, "grad_norm": 1.6156792754242166, "learning_rate": 3.632453050000104e-06, "loss": 0.4626, "step": 33180 }, { "epoch": 0.6295762358207823, "grad_norm": 1.4576770444302043, "learning_rate": 3.629268885965945e-06, "loss": 0.4416, "step": 33190 }, { "epoch": 0.6297659243522137, "grad_norm": 1.3695271733333683, "learning_rate": 3.626085322901783e-06, "loss": 0.4259, "step": 33200 }, { "epoch": 0.6299556128836451, "grad_norm": 1.3832646467686631, "learning_rate": 3.62290236220339e-06, "loss": 0.446, "step": 33210 }, { "epoch": 0.6301453014150764, "grad_norm": 1.5416229898207265, "learning_rate": 3.619720005266272e-06, "loss": 0.452, "step": 33220 }, { "epoch": 0.6303349899465078, "grad_norm": 1.270272840575282, "learning_rate": 3.61653825348567e-06, "loss": 0.4091, "step": 33230 }, { "epoch": 0.6305246784779392, "grad_norm": 1.4298914110042782, "learning_rate": 3.613357108256561e-06, "loss": 0.438, "step": 33240 }, { "epoch": 0.6307143670093707, "grad_norm": 1.4450667632773782, "learning_rate": 3.6101765709736526e-06, "loss": 0.422, "step": 33250 }, { "epoch": 0.630904055540802, "grad_norm": 1.309217854139199, "learning_rate": 3.606996643031392e-06, "loss": 0.4216, "step": 33260 }, { "epoch": 0.6310937440722334, "grad_norm": 1.5700050415657005, "learning_rate": 3.603817325823955e-06, "loss": 0.458, "step": 33270 }, { "epoch": 0.6312834326036648, "grad_norm": 1.2621553473776566, "learning_rate": 3.60063862074525e-06, "loss": 0.4795, "step": 33280 }, { "epoch": 0.6314731211350961, "grad_norm": 1.2400934042109306, "learning_rate": 3.5974605291889177e-06, "loss": 0.4562, "step": 33290 }, { "epoch": 0.6316628096665275, "grad_norm": 1.5088820574717647, "learning_rate": 3.5942830525483276e-06, "loss": 0.4432, "step": 33300 }, { "epoch": 0.631852498197959, "grad_norm": 1.2833438938836912, "learning_rate": 3.5911061922165844e-06, "loss": 0.4281, "step": 33310 }, { "epoch": 0.6320421867293904, "grad_norm": 1.5023782190096708, "learning_rate": 3.5879299495865204e-06, "loss": 0.4302, "step": 33320 }, { "epoch": 0.6322318752608217, "grad_norm": 1.7207101154222637, "learning_rate": 3.5847543260506925e-06, "loss": 0.4481, "step": 33330 }, { "epoch": 0.6324215637922531, "grad_norm": 1.3911272722507373, "learning_rate": 3.581579323001394e-06, "loss": 0.4213, "step": 33340 }, { "epoch": 0.6326112523236845, "grad_norm": 1.6015537401364712, "learning_rate": 3.578404941830641e-06, "loss": 0.4511, "step": 33350 }, { "epoch": 0.6328009408551158, "grad_norm": 1.2692341726752672, "learning_rate": 3.575231183930178e-06, "loss": 0.4249, "step": 33360 }, { "epoch": 0.6329906293865473, "grad_norm": 1.1507674736540126, "learning_rate": 3.572058050691477e-06, "loss": 0.4457, "step": 33370 }, { "epoch": 0.6331803179179787, "grad_norm": 1.5644671133410355, "learning_rate": 3.568885543505734e-06, "loss": 0.4609, "step": 33380 }, { "epoch": 0.6333700064494101, "grad_norm": 1.274546825652551, "learning_rate": 3.565713663763874e-06, "loss": 0.4239, "step": 33390 }, { "epoch": 0.6335596949808414, "grad_norm": 1.4551245193845077, "learning_rate": 3.5625424128565454e-06, "loss": 0.4416, "step": 33400 }, { "epoch": 0.6337493835122728, "grad_norm": 1.2993902590794721, "learning_rate": 3.559371792174119e-06, "loss": 0.4453, "step": 33410 }, { "epoch": 0.6339390720437043, "grad_norm": 1.4259389022323168, "learning_rate": 3.5562018031066904e-06, "loss": 0.4553, "step": 33420 }, { "epoch": 0.6341287605751357, "grad_norm": 1.2891764157738514, "learning_rate": 3.5530324470440815e-06, "loss": 0.4351, "step": 33430 }, { "epoch": 0.634318449106567, "grad_norm": 1.2268139070338135, "learning_rate": 3.549863725375831e-06, "loss": 0.4544, "step": 33440 }, { "epoch": 0.6345081376379984, "grad_norm": 1.7016296010295426, "learning_rate": 3.546695639491205e-06, "loss": 0.4268, "step": 33450 }, { "epoch": 0.6346978261694298, "grad_norm": 1.2631128462049668, "learning_rate": 3.5435281907791856e-06, "loss": 0.4267, "step": 33460 }, { "epoch": 0.6348875147008611, "grad_norm": 1.4952577502722306, "learning_rate": 3.5403613806284786e-06, "loss": 0.4249, "step": 33470 }, { "epoch": 0.6350772032322926, "grad_norm": 1.2059681971938092, "learning_rate": 3.537195210427512e-06, "loss": 0.4178, "step": 33480 }, { "epoch": 0.635266891763724, "grad_norm": 1.4518557777035588, "learning_rate": 3.534029681564428e-06, "loss": 0.4374, "step": 33490 }, { "epoch": 0.6354565802951554, "grad_norm": 1.383217618556541, "learning_rate": 3.5308647954270895e-06, "loss": 0.4367, "step": 33500 }, { "epoch": 0.6356462688265867, "grad_norm": 1.2038145245093206, "learning_rate": 3.527700553403082e-06, "loss": 0.4489, "step": 33510 }, { "epoch": 0.6358359573580181, "grad_norm": 1.345674973231324, "learning_rate": 3.5245369568797023e-06, "loss": 0.417, "step": 33520 }, { "epoch": 0.6360256458894495, "grad_norm": 1.9597026992764883, "learning_rate": 3.5213740072439685e-06, "loss": 0.4327, "step": 33530 }, { "epoch": 0.636215334420881, "grad_norm": 1.498374164481802, "learning_rate": 3.5182117058826105e-06, "loss": 0.4377, "step": 33540 }, { "epoch": 0.6364050229523123, "grad_norm": 1.624364726624077, "learning_rate": 3.5150500541820777e-06, "loss": 0.4226, "step": 33550 }, { "epoch": 0.6365947114837437, "grad_norm": 1.6211314005410804, "learning_rate": 3.5118890535285377e-06, "loss": 0.4059, "step": 33560 }, { "epoch": 0.6367844000151751, "grad_norm": 1.4926100466123131, "learning_rate": 3.5087287053078645e-06, "loss": 0.4457, "step": 33570 }, { "epoch": 0.6369740885466064, "grad_norm": 1.4694391149070405, "learning_rate": 3.5055690109056506e-06, "loss": 0.4367, "step": 33580 }, { "epoch": 0.6371637770780378, "grad_norm": 1.2110909092942783, "learning_rate": 3.502409971707207e-06, "loss": 0.4284, "step": 33590 }, { "epoch": 0.6373534656094693, "grad_norm": 1.495508536643047, "learning_rate": 3.4992515890975436e-06, "loss": 0.4574, "step": 33600 }, { "epoch": 0.6375431541409007, "grad_norm": 2.206341632554717, "learning_rate": 3.4960938644613984e-06, "loss": 0.4222, "step": 33610 }, { "epoch": 0.637732842672332, "grad_norm": 1.488023269308877, "learning_rate": 3.492936799183209e-06, "loss": 0.4357, "step": 33620 }, { "epoch": 0.6379225312037634, "grad_norm": 1.4907069600311922, "learning_rate": 3.489780394647128e-06, "loss": 0.4456, "step": 33630 }, { "epoch": 0.6381122197351948, "grad_norm": 1.3327851770218293, "learning_rate": 3.486624652237023e-06, "loss": 0.4491, "step": 33640 }, { "epoch": 0.6383019082666263, "grad_norm": 1.4779448770328976, "learning_rate": 3.4834695733364633e-06, "loss": 0.4331, "step": 33650 }, { "epoch": 0.6384915967980576, "grad_norm": 1.404517615165058, "learning_rate": 3.480315159328733e-06, "loss": 0.4445, "step": 33660 }, { "epoch": 0.638681285329489, "grad_norm": 1.500836169896743, "learning_rate": 3.477161411596821e-06, "loss": 0.4215, "step": 33670 }, { "epoch": 0.6388709738609204, "grad_norm": 0.9486442848912741, "learning_rate": 3.4740083315234252e-06, "loss": 0.4377, "step": 33680 }, { "epoch": 0.6390606623923517, "grad_norm": 1.2098781266438186, "learning_rate": 3.470855920490954e-06, "loss": 0.4293, "step": 33690 }, { "epoch": 0.6392503509237831, "grad_norm": 1.6351399163012346, "learning_rate": 3.4677041798815176e-06, "loss": 0.4233, "step": 33700 }, { "epoch": 0.6394400394552145, "grad_norm": 1.4338647529496265, "learning_rate": 3.464553111076935e-06, "loss": 0.4623, "step": 33710 }, { "epoch": 0.639629727986646, "grad_norm": 1.3012094291237672, "learning_rate": 3.4614027154587317e-06, "loss": 0.4192, "step": 33720 }, { "epoch": 0.6398194165180773, "grad_norm": 1.5273816027641547, "learning_rate": 3.458252994408132e-06, "loss": 0.4621, "step": 33730 }, { "epoch": 0.6400091050495087, "grad_norm": 1.523352649979313, "learning_rate": 3.4551039493060745e-06, "loss": 0.4264, "step": 33740 }, { "epoch": 0.6401987935809401, "grad_norm": 1.5510613230181034, "learning_rate": 3.4519555815331927e-06, "loss": 0.4691, "step": 33750 }, { "epoch": 0.6403884821123715, "grad_norm": 1.736439466768527, "learning_rate": 3.448807892469825e-06, "loss": 0.4614, "step": 33760 }, { "epoch": 0.6405781706438028, "grad_norm": 1.4942593775942419, "learning_rate": 3.445660883496019e-06, "loss": 0.4473, "step": 33770 }, { "epoch": 0.6407678591752343, "grad_norm": 1.2779084071503415, "learning_rate": 3.442514555991512e-06, "loss": 0.4558, "step": 33780 }, { "epoch": 0.6409575477066657, "grad_norm": 1.5184426122122898, "learning_rate": 3.4393689113357533e-06, "loss": 0.4436, "step": 33790 }, { "epoch": 0.641147236238097, "grad_norm": 1.525673056808638, "learning_rate": 3.4362239509078885e-06, "loss": 0.4456, "step": 33800 }, { "epoch": 0.6413369247695284, "grad_norm": 1.5919490052496548, "learning_rate": 3.4330796760867613e-06, "loss": 0.4386, "step": 33810 }, { "epoch": 0.6415266133009598, "grad_norm": 1.4087584916657385, "learning_rate": 3.429936088250919e-06, "loss": 0.4301, "step": 33820 }, { "epoch": 0.6417163018323913, "grad_norm": 1.544864774221889, "learning_rate": 3.4267931887786048e-06, "loss": 0.4542, "step": 33830 }, { "epoch": 0.6419059903638226, "grad_norm": 1.390556035967671, "learning_rate": 3.4236509790477605e-06, "loss": 0.4715, "step": 33840 }, { "epoch": 0.642095678895254, "grad_norm": 1.556210640769598, "learning_rate": 3.4205094604360277e-06, "loss": 0.4355, "step": 33850 }, { "epoch": 0.6422853674266854, "grad_norm": 1.2669497510187708, "learning_rate": 3.4173686343207396e-06, "loss": 0.433, "step": 33860 }, { "epoch": 0.6424750559581168, "grad_norm": 1.3559006732091237, "learning_rate": 3.414228502078933e-06, "loss": 0.4593, "step": 33870 }, { "epoch": 0.6426647444895481, "grad_norm": 1.4284311767573903, "learning_rate": 3.411089065087336e-06, "loss": 0.4559, "step": 33880 }, { "epoch": 0.6428544330209796, "grad_norm": 1.2461212887876505, "learning_rate": 3.407950324722371e-06, "loss": 0.4273, "step": 33890 }, { "epoch": 0.643044121552411, "grad_norm": 1.4269848999644688, "learning_rate": 3.4048122823601617e-06, "loss": 0.41, "step": 33900 }, { "epoch": 0.6432338100838423, "grad_norm": 1.6053899632636304, "learning_rate": 3.401674939376513e-06, "loss": 0.4546, "step": 33910 }, { "epoch": 0.6434234986152737, "grad_norm": 1.5461991263754407, "learning_rate": 3.3985382971469366e-06, "loss": 0.4542, "step": 33920 }, { "epoch": 0.6436131871467051, "grad_norm": 1.4775078329252729, "learning_rate": 3.395402357046631e-06, "loss": 0.4258, "step": 33930 }, { "epoch": 0.6438028756781365, "grad_norm": 1.2820537715338878, "learning_rate": 3.3922671204504846e-06, "loss": 0.4235, "step": 33940 }, { "epoch": 0.6439925642095679, "grad_norm": 1.7748726701705275, "learning_rate": 3.389132588733084e-06, "loss": 0.4407, "step": 33950 }, { "epoch": 0.6441822527409993, "grad_norm": 1.3534600454418786, "learning_rate": 3.3859987632686985e-06, "loss": 0.4455, "step": 33960 }, { "epoch": 0.6443719412724307, "grad_norm": 1.613708090343375, "learning_rate": 3.382865645431295e-06, "loss": 0.4605, "step": 33970 }, { "epoch": 0.6445616298038621, "grad_norm": 1.7748387447171647, "learning_rate": 3.3797332365945264e-06, "loss": 0.444, "step": 33980 }, { "epoch": 0.6447513183352934, "grad_norm": 1.4757882821012012, "learning_rate": 3.376601538131733e-06, "loss": 0.4633, "step": 33990 }, { "epoch": 0.6449410068667248, "grad_norm": 1.230660262237669, "learning_rate": 3.37347055141595e-06, "loss": 0.4312, "step": 34000 }, { "epoch": 0.6451306953981563, "grad_norm": 1.1468625716065488, "learning_rate": 3.3703402778198957e-06, "loss": 0.4499, "step": 34010 }, { "epoch": 0.6453203839295876, "grad_norm": 1.3779163602236184, "learning_rate": 3.367210718715975e-06, "loss": 0.4261, "step": 34020 }, { "epoch": 0.645510072461019, "grad_norm": 1.4454074900156597, "learning_rate": 3.3640818754762838e-06, "loss": 0.454, "step": 34030 }, { "epoch": 0.6456997609924504, "grad_norm": 1.4653801640748894, "learning_rate": 3.3609537494725986e-06, "loss": 0.4224, "step": 34040 }, { "epoch": 0.6458894495238818, "grad_norm": 1.4519661597046274, "learning_rate": 3.3578263420763866e-06, "loss": 0.4566, "step": 34050 }, { "epoch": 0.6460791380553131, "grad_norm": 1.3477336846478465, "learning_rate": 3.354699654658799e-06, "loss": 0.4453, "step": 34060 }, { "epoch": 0.6462688265867446, "grad_norm": 1.3620826310136265, "learning_rate": 3.351573688590666e-06, "loss": 0.422, "step": 34070 }, { "epoch": 0.646458515118176, "grad_norm": 1.3443543382509424, "learning_rate": 3.3484484452425096e-06, "loss": 0.4349, "step": 34080 }, { "epoch": 0.6466482036496073, "grad_norm": 1.7841342216838638, "learning_rate": 3.3453239259845316e-06, "loss": 0.4512, "step": 34090 }, { "epoch": 0.6468378921810387, "grad_norm": 1.5352030312032985, "learning_rate": 3.3422001321866115e-06, "loss": 0.4288, "step": 34100 }, { "epoch": 0.6470275807124701, "grad_norm": 1.3205345094964196, "learning_rate": 3.3390770652183203e-06, "loss": 0.4326, "step": 34110 }, { "epoch": 0.6472172692439015, "grad_norm": 1.386186493434873, "learning_rate": 3.335954726448899e-06, "loss": 0.4781, "step": 34120 }, { "epoch": 0.6474069577753329, "grad_norm": 1.2921080087313082, "learning_rate": 3.3328331172472792e-06, "loss": 0.4263, "step": 34130 }, { "epoch": 0.6475966463067643, "grad_norm": 1.512252408990307, "learning_rate": 3.32971223898207e-06, "loss": 0.4195, "step": 34140 }, { "epoch": 0.6477863348381957, "grad_norm": 1.5235904681572858, "learning_rate": 3.3265920930215566e-06, "loss": 0.4444, "step": 34150 }, { "epoch": 0.6479760233696271, "grad_norm": 1.273574489828384, "learning_rate": 3.3234726807337046e-06, "loss": 0.4221, "step": 34160 }, { "epoch": 0.6481657119010584, "grad_norm": 1.1243962217602055, "learning_rate": 3.3203540034861627e-06, "loss": 0.4173, "step": 34170 }, { "epoch": 0.6483554004324898, "grad_norm": 1.088573422419614, "learning_rate": 3.3172360626462484e-06, "loss": 0.4109, "step": 34180 }, { "epoch": 0.6485450889639213, "grad_norm": 1.1648348176507932, "learning_rate": 3.314118859580966e-06, "loss": 0.4091, "step": 34190 }, { "epoch": 0.6487347774953526, "grad_norm": 1.6838158329182444, "learning_rate": 3.311002395656987e-06, "loss": 0.4578, "step": 34200 }, { "epoch": 0.648924466026784, "grad_norm": 1.2871439809658425, "learning_rate": 3.3078866722406667e-06, "loss": 0.4396, "step": 34210 }, { "epoch": 0.6491141545582154, "grad_norm": 1.2959729653543932, "learning_rate": 3.304771690698032e-06, "loss": 0.4295, "step": 34220 }, { "epoch": 0.6493038430896468, "grad_norm": 1.4098323044963184, "learning_rate": 3.3016574523947846e-06, "loss": 0.4548, "step": 34230 }, { "epoch": 0.6494935316210781, "grad_norm": 1.257560153703771, "learning_rate": 3.298543958696302e-06, "loss": 0.4211, "step": 34240 }, { "epoch": 0.6496832201525096, "grad_norm": 11.885158248154571, "learning_rate": 3.2954312109676303e-06, "loss": 0.4421, "step": 34250 }, { "epoch": 0.649872908683941, "grad_norm": 1.5298561540213822, "learning_rate": 3.292319210573497e-06, "loss": 0.447, "step": 34260 }, { "epoch": 0.6500625972153724, "grad_norm": 1.376484238684937, "learning_rate": 3.2892079588782953e-06, "loss": 0.4368, "step": 34270 }, { "epoch": 0.6502522857468037, "grad_norm": 1.647269682948111, "learning_rate": 3.286097457246091e-06, "loss": 0.4494, "step": 34280 }, { "epoch": 0.6504419742782351, "grad_norm": 1.1915513064848033, "learning_rate": 3.2829877070406226e-06, "loss": 0.429, "step": 34290 }, { "epoch": 0.6506316628096666, "grad_norm": 1.3746551847552457, "learning_rate": 3.2798787096253008e-06, "loss": 0.4449, "step": 34300 }, { "epoch": 0.6508213513410979, "grad_norm": 1.3732385330360894, "learning_rate": 3.2767704663632017e-06, "loss": 0.4828, "step": 34310 }, { "epoch": 0.6510110398725293, "grad_norm": 1.3711635855246982, "learning_rate": 3.273662978617075e-06, "loss": 0.4408, "step": 34320 }, { "epoch": 0.6512007284039607, "grad_norm": 1.515444308604836, "learning_rate": 3.2705562477493356e-06, "loss": 0.4389, "step": 34330 }, { "epoch": 0.6513904169353921, "grad_norm": 1.385787844678283, "learning_rate": 3.2674502751220682e-06, "loss": 0.4328, "step": 34340 }, { "epoch": 0.6515801054668234, "grad_norm": 1.5691337288773095, "learning_rate": 3.2643450620970285e-06, "loss": 0.4709, "step": 34350 }, { "epoch": 0.6517697939982549, "grad_norm": 1.281018178388677, "learning_rate": 3.261240610035632e-06, "loss": 0.4481, "step": 34360 }, { "epoch": 0.6519594825296863, "grad_norm": 1.4144412475749646, "learning_rate": 3.2581369202989657e-06, "loss": 0.4291, "step": 34370 }, { "epoch": 0.6521491710611177, "grad_norm": 1.9461253429168093, "learning_rate": 3.255033994247782e-06, "loss": 0.4013, "step": 34380 }, { "epoch": 0.652338859592549, "grad_norm": 1.5164942199824003, "learning_rate": 3.2519318332424965e-06, "loss": 0.452, "step": 34390 }, { "epoch": 0.6525285481239804, "grad_norm": 1.3978199288498308, "learning_rate": 3.2488304386431917e-06, "loss": 0.4322, "step": 34400 }, { "epoch": 0.6527182366554118, "grad_norm": 1.4663904062779016, "learning_rate": 3.2457298118096114e-06, "loss": 0.4246, "step": 34410 }, { "epoch": 0.6529079251868432, "grad_norm": 1.467723899582653, "learning_rate": 3.242629954101163e-06, "loss": 0.4213, "step": 34420 }, { "epoch": 0.6530976137182746, "grad_norm": 1.632245090527201, "learning_rate": 3.2395308668769226e-06, "loss": 0.4503, "step": 34430 }, { "epoch": 0.653287302249706, "grad_norm": 1.588305105003479, "learning_rate": 3.23643255149562e-06, "loss": 0.4687, "step": 34440 }, { "epoch": 0.6534769907811374, "grad_norm": 1.5755104790035503, "learning_rate": 3.2333350093156497e-06, "loss": 0.4499, "step": 34450 }, { "epoch": 0.6536666793125687, "grad_norm": 1.4108930012919414, "learning_rate": 3.230238241695073e-06, "loss": 0.3943, "step": 34460 }, { "epoch": 0.6538563678440001, "grad_norm": 1.4536593490335032, "learning_rate": 3.2271422499915993e-06, "loss": 0.4419, "step": 34470 }, { "epoch": 0.6540460563754316, "grad_norm": 1.6472573508975992, "learning_rate": 3.224047035562612e-06, "loss": 0.4489, "step": 34480 }, { "epoch": 0.654235744906863, "grad_norm": 1.9218280970780124, "learning_rate": 3.2209525997651412e-06, "loss": 0.4739, "step": 34490 }, { "epoch": 0.6544254334382943, "grad_norm": 1.5466491242417548, "learning_rate": 3.217858943955884e-06, "loss": 0.4548, "step": 34500 }, { "epoch": 0.6546151219697257, "grad_norm": 1.2222154519817685, "learning_rate": 3.214766069491194e-06, "loss": 0.4361, "step": 34510 }, { "epoch": 0.6548048105011571, "grad_norm": 1.6440689511456956, "learning_rate": 3.211673977727078e-06, "loss": 0.455, "step": 34520 }, { "epoch": 0.6549944990325884, "grad_norm": 1.248316098010481, "learning_rate": 3.2085826700192036e-06, "loss": 0.4576, "step": 34530 }, { "epoch": 0.6551841875640199, "grad_norm": 1.4164586438445255, "learning_rate": 3.2054921477228957e-06, "loss": 0.4347, "step": 34540 }, { "epoch": 0.6553738760954513, "grad_norm": 1.3852808362192282, "learning_rate": 3.2024024121931285e-06, "loss": 0.4443, "step": 34550 }, { "epoch": 0.6555635646268827, "grad_norm": 1.4171962700468452, "learning_rate": 3.1993134647845403e-06, "loss": 0.4373, "step": 34560 }, { "epoch": 0.655753253158314, "grad_norm": 2.2719663629037385, "learning_rate": 3.196225306851416e-06, "loss": 0.4478, "step": 34570 }, { "epoch": 0.6559429416897454, "grad_norm": 1.732644350201887, "learning_rate": 3.193137939747697e-06, "loss": 0.4077, "step": 34580 }, { "epoch": 0.6561326302211768, "grad_norm": 1.3142354538587206, "learning_rate": 3.1900513648269828e-06, "loss": 0.4212, "step": 34590 }, { "epoch": 0.6563223187526083, "grad_norm": 1.4112674581912086, "learning_rate": 3.186965583442515e-06, "loss": 0.4538, "step": 34600 }, { "epoch": 0.6565120072840396, "grad_norm": 1.4891910519171374, "learning_rate": 3.1838805969471993e-06, "loss": 0.4254, "step": 34610 }, { "epoch": 0.656701695815471, "grad_norm": 1.2830846362270028, "learning_rate": 3.1807964066935836e-06, "loss": 0.4557, "step": 34620 }, { "epoch": 0.6568913843469024, "grad_norm": 1.5755836700508934, "learning_rate": 3.1777130140338695e-06, "loss": 0.4468, "step": 34630 }, { "epoch": 0.6570810728783337, "grad_norm": 1.4090460557549485, "learning_rate": 3.1746304203199153e-06, "loss": 0.4413, "step": 34640 }, { "epoch": 0.6572707614097651, "grad_norm": 1.513976021480606, "learning_rate": 3.171548626903216e-06, "loss": 0.4271, "step": 34650 }, { "epoch": 0.6574604499411966, "grad_norm": 1.4778463098449903, "learning_rate": 3.1684676351349275e-06, "loss": 0.4315, "step": 34660 }, { "epoch": 0.657650138472628, "grad_norm": 1.3690444759389668, "learning_rate": 3.1653874463658507e-06, "loss": 0.4722, "step": 34670 }, { "epoch": 0.6578398270040593, "grad_norm": 1.5718223741993886, "learning_rate": 3.1623080619464286e-06, "loss": 0.4331, "step": 34680 }, { "epoch": 0.6580295155354907, "grad_norm": 1.3736120359125097, "learning_rate": 3.1592294832267622e-06, "loss": 0.4607, "step": 34690 }, { "epoch": 0.6582192040669221, "grad_norm": 1.5973403405753632, "learning_rate": 3.15615171155659e-06, "loss": 0.4356, "step": 34700 }, { "epoch": 0.6584088925983536, "grad_norm": 1.4900588062741917, "learning_rate": 3.153074748285301e-06, "loss": 0.4436, "step": 34710 }, { "epoch": 0.6585985811297849, "grad_norm": 1.5372641412626031, "learning_rate": 3.1499985947619304e-06, "loss": 0.439, "step": 34720 }, { "epoch": 0.6587882696612163, "grad_norm": 1.8088796993647487, "learning_rate": 3.146923252335154e-06, "loss": 0.4688, "step": 34730 }, { "epoch": 0.6589779581926477, "grad_norm": 1.2731653719288567, "learning_rate": 3.1438487223532977e-06, "loss": 0.4235, "step": 34740 }, { "epoch": 0.659167646724079, "grad_norm": 1.7875890673167147, "learning_rate": 3.1407750061643284e-06, "loss": 0.4265, "step": 34750 }, { "epoch": 0.6593573352555104, "grad_norm": 1.298939033585238, "learning_rate": 3.1377021051158533e-06, "loss": 0.4378, "step": 34760 }, { "epoch": 0.6595470237869419, "grad_norm": 1.5289361986995658, "learning_rate": 3.1346300205551306e-06, "loss": 0.4387, "step": 34770 }, { "epoch": 0.6597367123183733, "grad_norm": 1.675549001317194, "learning_rate": 3.131558753829049e-06, "loss": 0.442, "step": 34780 }, { "epoch": 0.6599264008498046, "grad_norm": 1.6130458818793239, "learning_rate": 3.1284883062841486e-06, "loss": 0.4478, "step": 34790 }, { "epoch": 0.660116089381236, "grad_norm": 1.4495622909861963, "learning_rate": 3.125418679266607e-06, "loss": 0.4184, "step": 34800 }, { "epoch": 0.6603057779126674, "grad_norm": 1.2527302488603256, "learning_rate": 3.122349874122238e-06, "loss": 0.4193, "step": 34810 }, { "epoch": 0.6604954664440988, "grad_norm": 1.4029567591029923, "learning_rate": 3.1192818921965014e-06, "loss": 0.4391, "step": 34820 }, { "epoch": 0.6606851549755302, "grad_norm": 1.3180960659252674, "learning_rate": 3.116214734834494e-06, "loss": 0.4466, "step": 34830 }, { "epoch": 0.6608748435069616, "grad_norm": 1.153361218952275, "learning_rate": 3.113148403380948e-06, "loss": 0.4038, "step": 34840 }, { "epoch": 0.661064532038393, "grad_norm": 1.506454960057358, "learning_rate": 3.110082899180238e-06, "loss": 0.4344, "step": 34850 }, { "epoch": 0.6612542205698243, "grad_norm": 1.2372763977868564, "learning_rate": 3.10701822357637e-06, "loss": 0.4372, "step": 34860 }, { "epoch": 0.6614439091012557, "grad_norm": 1.3670395583495254, "learning_rate": 3.1039543779129955e-06, "loss": 0.4665, "step": 34870 }, { "epoch": 0.6616335976326871, "grad_norm": 1.4253600630548149, "learning_rate": 3.1008913635333952e-06, "loss": 0.435, "step": 34880 }, { "epoch": 0.6618232861641186, "grad_norm": 1.449064498085023, "learning_rate": 3.0978291817804855e-06, "loss": 0.4249, "step": 34890 }, { "epoch": 0.6620129746955499, "grad_norm": 1.5628859532142088, "learning_rate": 3.094767833996824e-06, "loss": 0.4431, "step": 34900 }, { "epoch": 0.6622026632269813, "grad_norm": 1.302947339640956, "learning_rate": 3.0917073215245922e-06, "loss": 0.4362, "step": 34910 }, { "epoch": 0.6623923517584127, "grad_norm": 1.2981029248429106, "learning_rate": 3.088647645705616e-06, "loss": 0.4445, "step": 34920 }, { "epoch": 0.662582040289844, "grad_norm": 1.6058333655387234, "learning_rate": 3.085588807881349e-06, "loss": 0.4517, "step": 34930 }, { "epoch": 0.6627717288212754, "grad_norm": 1.49945365329138, "learning_rate": 3.082530809392875e-06, "loss": 0.4561, "step": 34940 }, { "epoch": 0.6629614173527069, "grad_norm": 1.548440156658072, "learning_rate": 3.079473651580917e-06, "loss": 0.4372, "step": 34950 }, { "epoch": 0.6631511058841383, "grad_norm": 1.4291918317537773, "learning_rate": 3.0764173357858253e-06, "loss": 0.4131, "step": 34960 }, { "epoch": 0.6633407944155696, "grad_norm": 1.4218507838037862, "learning_rate": 3.073361863347579e-06, "loss": 0.4265, "step": 34970 }, { "epoch": 0.663530482947001, "grad_norm": 1.4325857775814141, "learning_rate": 3.070307235605792e-06, "loss": 0.4299, "step": 34980 }, { "epoch": 0.6637201714784324, "grad_norm": 1.9349812645551, "learning_rate": 3.067253453899702e-06, "loss": 0.4555, "step": 34990 }, { "epoch": 0.6639098600098639, "grad_norm": 1.6046743878545926, "learning_rate": 3.0642005195681835e-06, "loss": 0.439, "step": 35000 }, { "epoch": 0.6640995485412952, "grad_norm": 1.597682185426783, "learning_rate": 3.061148433949733e-06, "loss": 0.446, "step": 35010 }, { "epoch": 0.6642892370727266, "grad_norm": 1.6762483913494848, "learning_rate": 3.0580971983824775e-06, "loss": 0.4383, "step": 35020 }, { "epoch": 0.664478925604158, "grad_norm": 1.3629875693867286, "learning_rate": 3.05504681420417e-06, "loss": 0.4399, "step": 35030 }, { "epoch": 0.6646686141355893, "grad_norm": 1.287387999234761, "learning_rate": 3.051997282752194e-06, "loss": 0.4404, "step": 35040 }, { "epoch": 0.6648583026670207, "grad_norm": 1.5638475206964066, "learning_rate": 3.0489486053635535e-06, "loss": 0.4491, "step": 35050 }, { "epoch": 0.6650479911984521, "grad_norm": 1.4299797390131068, "learning_rate": 3.045900783374883e-06, "loss": 0.4319, "step": 35060 }, { "epoch": 0.6652376797298836, "grad_norm": 1.6910354458138728, "learning_rate": 3.0428538181224374e-06, "loss": 0.4348, "step": 35070 }, { "epoch": 0.6654273682613149, "grad_norm": 1.4323700488903106, "learning_rate": 3.0398077109421013e-06, "loss": 0.4332, "step": 35080 }, { "epoch": 0.6656170567927463, "grad_norm": 1.225719083600582, "learning_rate": 3.0367624631693797e-06, "loss": 0.4246, "step": 35090 }, { "epoch": 0.6658067453241777, "grad_norm": 1.3948433155000524, "learning_rate": 3.0337180761394002e-06, "loss": 0.4387, "step": 35100 }, { "epoch": 0.6659964338556091, "grad_norm": 1.2706627393810612, "learning_rate": 3.030674551186913e-06, "loss": 0.4157, "step": 35110 }, { "epoch": 0.6661861223870404, "grad_norm": 1.577888137708138, "learning_rate": 3.0276318896462954e-06, "loss": 0.428, "step": 35120 }, { "epoch": 0.6663758109184719, "grad_norm": 1.2238370095425923, "learning_rate": 3.0245900928515395e-06, "loss": 0.43, "step": 35130 }, { "epoch": 0.6665654994499033, "grad_norm": 1.4974042598141444, "learning_rate": 3.0215491621362614e-06, "loss": 0.4666, "step": 35140 }, { "epoch": 0.6667551879813346, "grad_norm": 1.555376921873687, "learning_rate": 3.0185090988336974e-06, "loss": 0.4557, "step": 35150 }, { "epoch": 0.666944876512766, "grad_norm": 1.3359311206186224, "learning_rate": 3.0154699042767022e-06, "loss": 0.4372, "step": 35160 }, { "epoch": 0.6671345650441974, "grad_norm": 1.5242752213803819, "learning_rate": 3.012431579797753e-06, "loss": 0.4336, "step": 35170 }, { "epoch": 0.6673242535756289, "grad_norm": 1.2958567352316877, "learning_rate": 3.0093941267289417e-06, "loss": 0.4102, "step": 35180 }, { "epoch": 0.6675139421070602, "grad_norm": 1.5089626907550635, "learning_rate": 3.006357546401979e-06, "loss": 0.4491, "step": 35190 }, { "epoch": 0.6677036306384916, "grad_norm": 1.4856685721429428, "learning_rate": 3.0033218401481958e-06, "loss": 0.428, "step": 35200 }, { "epoch": 0.667893319169923, "grad_norm": 1.753394941144932, "learning_rate": 3.000287009298535e-06, "loss": 0.437, "step": 35210 }, { "epoch": 0.6680830077013544, "grad_norm": 1.3513270555543353, "learning_rate": 2.997253055183561e-06, "loss": 0.4379, "step": 35220 }, { "epoch": 0.6682726962327857, "grad_norm": 1.2724676179535594, "learning_rate": 2.9942199791334486e-06, "loss": 0.4311, "step": 35230 }, { "epoch": 0.6684623847642172, "grad_norm": 1.469821618599109, "learning_rate": 2.99118778247799e-06, "loss": 0.4398, "step": 35240 }, { "epoch": 0.6686520732956486, "grad_norm": 2.0086608548430784, "learning_rate": 2.988156466546595e-06, "loss": 0.4152, "step": 35250 }, { "epoch": 0.6688417618270799, "grad_norm": 1.5531251902411871, "learning_rate": 2.9851260326682815e-06, "loss": 0.4457, "step": 35260 }, { "epoch": 0.6690314503585113, "grad_norm": 1.3882399243272123, "learning_rate": 2.9820964821716846e-06, "loss": 0.4438, "step": 35270 }, { "epoch": 0.6692211388899427, "grad_norm": 1.3810179512139804, "learning_rate": 2.97906781638505e-06, "loss": 0.4356, "step": 35280 }, { "epoch": 0.6694108274213741, "grad_norm": 1.3802819018318286, "learning_rate": 2.9760400366362356e-06, "loss": 0.4272, "step": 35290 }, { "epoch": 0.6696005159528055, "grad_norm": 1.637921785744158, "learning_rate": 2.9730131442527157e-06, "loss": 0.4218, "step": 35300 }, { "epoch": 0.6697902044842369, "grad_norm": 1.4320714737086182, "learning_rate": 2.969987140561567e-06, "loss": 0.4276, "step": 35310 }, { "epoch": 0.6699798930156683, "grad_norm": 1.4700873513160595, "learning_rate": 2.9669620268894822e-06, "loss": 0.434, "step": 35320 }, { "epoch": 0.6701695815470997, "grad_norm": 1.7378260368512697, "learning_rate": 2.9639378045627666e-06, "loss": 0.4249, "step": 35330 }, { "epoch": 0.670359270078531, "grad_norm": 1.3783401345194852, "learning_rate": 2.9609144749073236e-06, "loss": 0.4582, "step": 35340 }, { "epoch": 0.6705489586099624, "grad_norm": 1.4852530671691573, "learning_rate": 2.9578920392486797e-06, "loss": 0.4418, "step": 35350 }, { "epoch": 0.6707386471413939, "grad_norm": 1.3169673573094693, "learning_rate": 2.954870498911957e-06, "loss": 0.4517, "step": 35360 }, { "epoch": 0.6709283356728252, "grad_norm": 1.535116229614185, "learning_rate": 2.9518498552218912e-06, "loss": 0.4335, "step": 35370 }, { "epoch": 0.6711180242042566, "grad_norm": 1.408957664319235, "learning_rate": 2.9488301095028266e-06, "loss": 0.4508, "step": 35380 }, { "epoch": 0.671307712735688, "grad_norm": 1.5007456833520791, "learning_rate": 2.9458112630787085e-06, "loss": 0.4425, "step": 35390 }, { "epoch": 0.6714974012671194, "grad_norm": 1.4985788471675217, "learning_rate": 2.9427933172730904e-06, "loss": 0.429, "step": 35400 }, { "epoch": 0.6716870897985507, "grad_norm": 1.64227722650052, "learning_rate": 2.939776273409134e-06, "loss": 0.4298, "step": 35410 }, { "epoch": 0.6718767783299822, "grad_norm": 1.4181629824866602, "learning_rate": 2.936760132809598e-06, "loss": 0.4639, "step": 35420 }, { "epoch": 0.6720664668614136, "grad_norm": 1.5202335707754264, "learning_rate": 2.933744896796854e-06, "loss": 0.4509, "step": 35430 }, { "epoch": 0.672256155392845, "grad_norm": 1.3758934209178366, "learning_rate": 2.93073056669287e-06, "loss": 0.445, "step": 35440 }, { "epoch": 0.6724458439242763, "grad_norm": 1.4597349981505872, "learning_rate": 2.9277171438192197e-06, "loss": 0.4576, "step": 35450 }, { "epoch": 0.6726355324557077, "grad_norm": 1.3273347229148889, "learning_rate": 2.924704629497083e-06, "loss": 0.4345, "step": 35460 }, { "epoch": 0.6728252209871391, "grad_norm": 1.576305087738042, "learning_rate": 2.9216930250472296e-06, "loss": 0.4528, "step": 35470 }, { "epoch": 0.6730149095185705, "grad_norm": 1.469482644006584, "learning_rate": 2.918682331790044e-06, "loss": 0.4431, "step": 35480 }, { "epoch": 0.6732045980500019, "grad_norm": 1.4255311565187097, "learning_rate": 2.9156725510455047e-06, "loss": 0.4432, "step": 35490 }, { "epoch": 0.6733942865814333, "grad_norm": 1.643581339312168, "learning_rate": 2.91266368413319e-06, "loss": 0.4332, "step": 35500 }, { "epoch": 0.6735839751128647, "grad_norm": 1.344477835377183, "learning_rate": 2.9096557323722775e-06, "loss": 0.4242, "step": 35510 }, { "epoch": 0.673773663644296, "grad_norm": 1.3962696157434913, "learning_rate": 2.9066486970815424e-06, "loss": 0.4106, "step": 35520 }, { "epoch": 0.6739633521757274, "grad_norm": 1.4394085724103296, "learning_rate": 2.903642579579362e-06, "loss": 0.4197, "step": 35530 }, { "epoch": 0.6741530407071589, "grad_norm": 1.418702475025988, "learning_rate": 2.900637381183712e-06, "loss": 0.4502, "step": 35540 }, { "epoch": 0.6743427292385903, "grad_norm": 1.6544494596534576, "learning_rate": 2.8976331032121584e-06, "loss": 0.4282, "step": 35550 }, { "epoch": 0.6745324177700216, "grad_norm": 1.556777863591104, "learning_rate": 2.8946297469818695e-06, "loss": 0.4264, "step": 35560 }, { "epoch": 0.674722106301453, "grad_norm": 1.3266689044622073, "learning_rate": 2.891627313809605e-06, "loss": 0.4212, "step": 35570 }, { "epoch": 0.6749117948328844, "grad_norm": 1.5743850322927617, "learning_rate": 2.8886258050117243e-06, "loss": 0.444, "step": 35580 }, { "epoch": 0.6751014833643157, "grad_norm": 1.6397144117278517, "learning_rate": 2.8856252219041813e-06, "loss": 0.4332, "step": 35590 }, { "epoch": 0.6752911718957472, "grad_norm": 1.4432868542428474, "learning_rate": 2.8826255658025205e-06, "loss": 0.4463, "step": 35600 }, { "epoch": 0.6754808604271786, "grad_norm": 1.340907194963398, "learning_rate": 2.8796268380218806e-06, "loss": 0.419, "step": 35610 }, { "epoch": 0.67567054895861, "grad_norm": 1.3022033016211567, "learning_rate": 2.876629039876998e-06, "loss": 0.4292, "step": 35620 }, { "epoch": 0.6758602374900413, "grad_norm": 1.604631509160611, "learning_rate": 2.8736321726821945e-06, "loss": 0.4294, "step": 35630 }, { "epoch": 0.6760499260214727, "grad_norm": 1.4131102669003377, "learning_rate": 2.870636237751391e-06, "loss": 0.4304, "step": 35640 }, { "epoch": 0.6762396145529042, "grad_norm": 1.7941649840552905, "learning_rate": 2.8676412363980944e-06, "loss": 0.4292, "step": 35650 }, { "epoch": 0.6764293030843356, "grad_norm": 1.2128848707459408, "learning_rate": 2.8646471699354017e-06, "loss": 0.4454, "step": 35660 }, { "epoch": 0.6766189916157669, "grad_norm": 2.1362305450584325, "learning_rate": 2.8616540396760063e-06, "loss": 0.4659, "step": 35670 }, { "epoch": 0.6768086801471983, "grad_norm": 1.4185202146785447, "learning_rate": 2.8586618469321823e-06, "loss": 0.4371, "step": 35680 }, { "epoch": 0.6769983686786297, "grad_norm": 1.3069773763636439, "learning_rate": 2.8556705930158005e-06, "loss": 0.4177, "step": 35690 }, { "epoch": 0.677188057210061, "grad_norm": 1.3237671638592516, "learning_rate": 2.852680279238319e-06, "loss": 0.4406, "step": 35700 }, { "epoch": 0.6773777457414925, "grad_norm": 1.4712146242235018, "learning_rate": 2.8496909069107805e-06, "loss": 0.4538, "step": 35710 }, { "epoch": 0.6775674342729239, "grad_norm": 1.433557623541551, "learning_rate": 2.8467024773438147e-06, "loss": 0.4298, "step": 35720 }, { "epoch": 0.6777571228043553, "grad_norm": 1.3701317086601688, "learning_rate": 2.843714991847638e-06, "loss": 0.42, "step": 35730 }, { "epoch": 0.6779468113357866, "grad_norm": 1.3444139730474034, "learning_rate": 2.8407284517320577e-06, "loss": 0.4403, "step": 35740 }, { "epoch": 0.678136499867218, "grad_norm": 1.548478619146086, "learning_rate": 2.8377428583064643e-06, "loss": 0.4535, "step": 35750 }, { "epoch": 0.6783261883986494, "grad_norm": 1.369392970036666, "learning_rate": 2.83475821287983e-06, "loss": 0.4463, "step": 35760 }, { "epoch": 0.6785158769300808, "grad_norm": 1.4956196163723692, "learning_rate": 2.8317745167607123e-06, "loss": 0.4443, "step": 35770 }, { "epoch": 0.6787055654615122, "grad_norm": 1.304207090280284, "learning_rate": 2.828791771257258e-06, "loss": 0.4233, "step": 35780 }, { "epoch": 0.6788952539929436, "grad_norm": 1.5675875745214969, "learning_rate": 2.8258099776771886e-06, "loss": 0.4323, "step": 35790 }, { "epoch": 0.679084942524375, "grad_norm": 1.5968288164253777, "learning_rate": 2.822829137327817e-06, "loss": 0.4478, "step": 35800 }, { "epoch": 0.6792746310558063, "grad_norm": 1.7342650514954145, "learning_rate": 2.819849251516031e-06, "loss": 0.4405, "step": 35810 }, { "epoch": 0.6794643195872377, "grad_norm": 1.6113754642492486, "learning_rate": 2.8168703215483013e-06, "loss": 0.4242, "step": 35820 }, { "epoch": 0.6796540081186692, "grad_norm": 1.7207772860448238, "learning_rate": 2.8138923487306846e-06, "loss": 0.428, "step": 35830 }, { "epoch": 0.6798436966501006, "grad_norm": 1.3038053449666733, "learning_rate": 2.8109153343688113e-06, "loss": 0.4393, "step": 35840 }, { "epoch": 0.6800333851815319, "grad_norm": 1.6870114494425752, "learning_rate": 2.8079392797678973e-06, "loss": 0.4406, "step": 35850 }, { "epoch": 0.6802230737129633, "grad_norm": 1.4728670417916971, "learning_rate": 2.804964186232734e-06, "loss": 0.4605, "step": 35860 }, { "epoch": 0.6804127622443947, "grad_norm": 1.536609365377838, "learning_rate": 2.8019900550676894e-06, "loss": 0.4282, "step": 35870 }, { "epoch": 0.680602450775826, "grad_norm": 1.3858109141574229, "learning_rate": 2.799016887576718e-06, "loss": 0.4511, "step": 35880 }, { "epoch": 0.6807921393072575, "grad_norm": 1.273065078248984, "learning_rate": 2.7960446850633405e-06, "loss": 0.4204, "step": 35890 }, { "epoch": 0.6809818278386889, "grad_norm": 1.6033988796930898, "learning_rate": 2.793073448830666e-06, "loss": 0.458, "step": 35900 }, { "epoch": 0.6811715163701203, "grad_norm": 1.3619186777919028, "learning_rate": 2.7901031801813716e-06, "loss": 0.4266, "step": 35910 }, { "epoch": 0.6813612049015516, "grad_norm": 1.3502530032667124, "learning_rate": 2.787133880417712e-06, "loss": 0.4571, "step": 35920 }, { "epoch": 0.681550893432983, "grad_norm": 1.4905787840201536, "learning_rate": 2.7841655508415197e-06, "loss": 0.4276, "step": 35930 }, { "epoch": 0.6817405819644144, "grad_norm": 1.5383993498131558, "learning_rate": 2.7811981927541998e-06, "loss": 0.4249, "step": 35940 }, { "epoch": 0.6819302704958459, "grad_norm": 1.4421251400979882, "learning_rate": 2.7782318074567305e-06, "loss": 0.4388, "step": 35950 }, { "epoch": 0.6821199590272772, "grad_norm": 1.5697230354729168, "learning_rate": 2.7752663962496706e-06, "loss": 0.4516, "step": 35960 }, { "epoch": 0.6823096475587086, "grad_norm": 1.5058885623970342, "learning_rate": 2.772301960433138e-06, "loss": 0.4297, "step": 35970 }, { "epoch": 0.68249933609014, "grad_norm": 1.5338085062695637, "learning_rate": 2.769338501306834e-06, "loss": 0.4873, "step": 35980 }, { "epoch": 0.6826890246215713, "grad_norm": 1.4225298010253709, "learning_rate": 2.766376020170032e-06, "loss": 0.4488, "step": 35990 }, { "epoch": 0.6828787131530027, "grad_norm": 1.5801122934346623, "learning_rate": 2.763414518321569e-06, "loss": 0.4263, "step": 36000 }, { "epoch": 0.6830684016844342, "grad_norm": 1.4667462034327357, "learning_rate": 2.760453997059861e-06, "loss": 0.4573, "step": 36010 }, { "epoch": 0.6832580902158656, "grad_norm": 1.4145039029813762, "learning_rate": 2.7574944576828877e-06, "loss": 0.4361, "step": 36020 }, { "epoch": 0.6834477787472969, "grad_norm": 1.4343654252878044, "learning_rate": 2.7545359014882e-06, "loss": 0.4622, "step": 36030 }, { "epoch": 0.6836374672787283, "grad_norm": 2.1147096217206247, "learning_rate": 2.75157832977292e-06, "loss": 0.4513, "step": 36040 }, { "epoch": 0.6838271558101597, "grad_norm": 1.6075783826468817, "learning_rate": 2.748621743833736e-06, "loss": 0.4427, "step": 36050 }, { "epoch": 0.6840168443415912, "grad_norm": 1.4275971295701195, "learning_rate": 2.7456661449669053e-06, "loss": 0.4325, "step": 36060 }, { "epoch": 0.6842065328730225, "grad_norm": 1.551980277931896, "learning_rate": 2.742711534468252e-06, "loss": 0.438, "step": 36070 }, { "epoch": 0.6843962214044539, "grad_norm": 1.389238719431952, "learning_rate": 2.739757913633165e-06, "loss": 0.4288, "step": 36080 }, { "epoch": 0.6845859099358853, "grad_norm": 1.4515041343552242, "learning_rate": 2.7368052837566033e-06, "loss": 0.4206, "step": 36090 }, { "epoch": 0.6847755984673166, "grad_norm": 1.4206259034630326, "learning_rate": 2.7338536461330866e-06, "loss": 0.4491, "step": 36100 }, { "epoch": 0.684965286998748, "grad_norm": 1.5509580579363422, "learning_rate": 2.7309030020567055e-06, "loss": 0.4518, "step": 36110 }, { "epoch": 0.6851549755301795, "grad_norm": 1.2111059301835962, "learning_rate": 2.72795335282111e-06, "loss": 0.421, "step": 36120 }, { "epoch": 0.6853446640616109, "grad_norm": 1.1700226820937374, "learning_rate": 2.7250046997195134e-06, "loss": 0.4346, "step": 36130 }, { "epoch": 0.6855343525930422, "grad_norm": 1.3665398829619912, "learning_rate": 2.722057044044697e-06, "loss": 0.4361, "step": 36140 }, { "epoch": 0.6857240411244736, "grad_norm": 1.4796657295062094, "learning_rate": 2.719110387089003e-06, "loss": 0.4627, "step": 36150 }, { "epoch": 0.685913729655905, "grad_norm": 1.4969256578888233, "learning_rate": 2.716164730144334e-06, "loss": 0.464, "step": 36160 }, { "epoch": 0.6861034181873364, "grad_norm": 1.1656513805738864, "learning_rate": 2.713220074502154e-06, "loss": 0.4457, "step": 36170 }, { "epoch": 0.6862931067187678, "grad_norm": 1.2916690069980918, "learning_rate": 2.710276421453489e-06, "loss": 0.4312, "step": 36180 }, { "epoch": 0.6864827952501992, "grad_norm": 1.2565812012919957, "learning_rate": 2.7073337722889263e-06, "loss": 0.4376, "step": 36190 }, { "epoch": 0.6866724837816306, "grad_norm": 1.579205119767377, "learning_rate": 2.704392128298615e-06, "loss": 0.4189, "step": 36200 }, { "epoch": 0.6868621723130619, "grad_norm": 1.2981237362229505, "learning_rate": 2.7014514907722576e-06, "loss": 0.4216, "step": 36210 }, { "epoch": 0.6870518608444933, "grad_norm": 1.2879383003598701, "learning_rate": 2.69851186099912e-06, "loss": 0.4338, "step": 36220 }, { "epoch": 0.6872415493759247, "grad_norm": 1.4578528679141853, "learning_rate": 2.695573240268022e-06, "loss": 0.4342, "step": 36230 }, { "epoch": 0.6874312379073562, "grad_norm": 1.2872687195560353, "learning_rate": 2.6926356298673463e-06, "loss": 0.4177, "step": 36240 }, { "epoch": 0.6876209264387875, "grad_norm": 2.196317073610982, "learning_rate": 2.6896990310850313e-06, "loss": 0.4287, "step": 36250 }, { "epoch": 0.6878106149702189, "grad_norm": 1.1715195633426567, "learning_rate": 2.686763445208568e-06, "loss": 0.4066, "step": 36260 }, { "epoch": 0.6880003035016503, "grad_norm": 1.386061191141058, "learning_rate": 2.6838288735250097e-06, "loss": 0.3992, "step": 36270 }, { "epoch": 0.6881899920330817, "grad_norm": 1.3297002041764194, "learning_rate": 2.6808953173209584e-06, "loss": 0.4345, "step": 36280 }, { "epoch": 0.688379680564513, "grad_norm": 1.3504484038156868, "learning_rate": 2.6779627778825736e-06, "loss": 0.4151, "step": 36290 }, { "epoch": 0.6885693690959445, "grad_norm": 1.4983595710857553, "learning_rate": 2.675031256495572e-06, "loss": 0.4188, "step": 36300 }, { "epoch": 0.6887590576273759, "grad_norm": 1.2461702324896287, "learning_rate": 2.672100754445218e-06, "loss": 0.4348, "step": 36310 }, { "epoch": 0.6889487461588072, "grad_norm": 1.9935566782244596, "learning_rate": 2.669171273016336e-06, "loss": 0.4129, "step": 36320 }, { "epoch": 0.6891384346902386, "grad_norm": 1.4895346702144079, "learning_rate": 2.6662428134932978e-06, "loss": 0.4357, "step": 36330 }, { "epoch": 0.68932812322167, "grad_norm": 1.2369624027449235, "learning_rate": 2.6633153771600263e-06, "loss": 0.4315, "step": 36340 }, { "epoch": 0.6895178117531015, "grad_norm": 1.491845613597137, "learning_rate": 2.6603889652999992e-06, "loss": 0.424, "step": 36350 }, { "epoch": 0.6897075002845328, "grad_norm": 1.816938418094634, "learning_rate": 2.657463579196248e-06, "loss": 0.4032, "step": 36360 }, { "epoch": 0.6898971888159642, "grad_norm": 1.7522513464283505, "learning_rate": 2.654539220131347e-06, "loss": 0.3976, "step": 36370 }, { "epoch": 0.6900868773473956, "grad_norm": 1.3780893276234925, "learning_rate": 2.6516158893874245e-06, "loss": 0.4341, "step": 36380 }, { "epoch": 0.690276565878827, "grad_norm": 1.5577414867490285, "learning_rate": 2.6486935882461543e-06, "loss": 0.4253, "step": 36390 }, { "epoch": 0.6904662544102583, "grad_norm": 1.3867554328171434, "learning_rate": 2.6457723179887653e-06, "loss": 0.4404, "step": 36400 }, { "epoch": 0.6906559429416897, "grad_norm": 1.239201103259099, "learning_rate": 2.6428520798960305e-06, "loss": 0.4386, "step": 36410 }, { "epoch": 0.6908456314731212, "grad_norm": 1.3882889496345336, "learning_rate": 2.63993287524827e-06, "loss": 0.4516, "step": 36420 }, { "epoch": 0.6910353200045525, "grad_norm": 1.441756387788487, "learning_rate": 2.6370147053253494e-06, "loss": 0.4271, "step": 36430 }, { "epoch": 0.6912250085359839, "grad_norm": 1.1426067680717482, "learning_rate": 2.634097571406687e-06, "loss": 0.4253, "step": 36440 }, { "epoch": 0.6914146970674153, "grad_norm": 1.535975467481061, "learning_rate": 2.6311814747712373e-06, "loss": 0.4326, "step": 36450 }, { "epoch": 0.6916043855988467, "grad_norm": 1.2074240486446033, "learning_rate": 2.628266416697511e-06, "loss": 0.4261, "step": 36460 }, { "epoch": 0.691794074130278, "grad_norm": 1.5820880535575867, "learning_rate": 2.6253523984635542e-06, "loss": 0.4579, "step": 36470 }, { "epoch": 0.6919837626617095, "grad_norm": 1.2180544264271969, "learning_rate": 2.6224394213469593e-06, "loss": 0.4339, "step": 36480 }, { "epoch": 0.6921734511931409, "grad_norm": 1.3193911653525108, "learning_rate": 2.619527486624869e-06, "loss": 0.4182, "step": 36490 }, { "epoch": 0.6923631397245723, "grad_norm": 1.6466239420871798, "learning_rate": 2.616616595573957e-06, "loss": 0.4718, "step": 36500 }, { "epoch": 0.6925528282560036, "grad_norm": 1.6508654656195656, "learning_rate": 2.6137067494704517e-06, "loss": 0.4298, "step": 36510 }, { "epoch": 0.692742516787435, "grad_norm": 1.6402226686454255, "learning_rate": 2.610797949590115e-06, "loss": 0.4357, "step": 36520 }, { "epoch": 0.6929322053188665, "grad_norm": 1.4808542696649716, "learning_rate": 2.6078901972082515e-06, "loss": 0.4443, "step": 36530 }, { "epoch": 0.6931218938502978, "grad_norm": 1.6689414612345739, "learning_rate": 2.604983493599711e-06, "loss": 0.4763, "step": 36540 }, { "epoch": 0.6933115823817292, "grad_norm": 1.6279320924445382, "learning_rate": 2.602077840038877e-06, "loss": 0.4577, "step": 36550 }, { "epoch": 0.6935012709131606, "grad_norm": 1.3974858021345091, "learning_rate": 2.5991732377996777e-06, "loss": 0.4208, "step": 36560 }, { "epoch": 0.693690959444592, "grad_norm": 1.3722174829679807, "learning_rate": 2.5962696881555807e-06, "loss": 0.4441, "step": 36570 }, { "epoch": 0.6938806479760233, "grad_norm": 1.3167387537024935, "learning_rate": 2.5933671923795872e-06, "loss": 0.4126, "step": 36580 }, { "epoch": 0.6940703365074548, "grad_norm": 1.2869039524972494, "learning_rate": 2.59046575174424e-06, "loss": 0.4267, "step": 36590 }, { "epoch": 0.6942600250388862, "grad_norm": 1.098933649599547, "learning_rate": 2.587565367521617e-06, "loss": 0.4253, "step": 36600 }, { "epoch": 0.6944497135703175, "grad_norm": 1.2470058156009765, "learning_rate": 2.584666040983336e-06, "loss": 0.434, "step": 36610 }, { "epoch": 0.6946394021017489, "grad_norm": 1.5577979873533023, "learning_rate": 2.58176777340055e-06, "loss": 0.4133, "step": 36620 }, { "epoch": 0.6948290906331803, "grad_norm": 1.5248456357228257, "learning_rate": 2.578870566043947e-06, "loss": 0.4049, "step": 36630 }, { "epoch": 0.6950187791646117, "grad_norm": 1.410335810655973, "learning_rate": 2.5759744201837476e-06, "loss": 0.4296, "step": 36640 }, { "epoch": 0.695208467696043, "grad_norm": 1.4351665773123403, "learning_rate": 2.5730793370897146e-06, "loss": 0.4295, "step": 36650 }, { "epoch": 0.6953981562274745, "grad_norm": 1.1638086682154178, "learning_rate": 2.5701853180311355e-06, "loss": 0.4322, "step": 36660 }, { "epoch": 0.6955878447589059, "grad_norm": 1.8661911079786508, "learning_rate": 2.5672923642768394e-06, "loss": 0.4495, "step": 36670 }, { "epoch": 0.6957775332903373, "grad_norm": 1.3189011908155148, "learning_rate": 2.564400477095183e-06, "loss": 0.4412, "step": 36680 }, { "epoch": 0.6959672218217686, "grad_norm": 1.5212741558810225, "learning_rate": 2.5615096577540556e-06, "loss": 0.4379, "step": 36690 }, { "epoch": 0.6961569103532, "grad_norm": 1.3933674875238573, "learning_rate": 2.5586199075208828e-06, "loss": 0.454, "step": 36700 }, { "epoch": 0.6963465988846315, "grad_norm": 1.5567278877052164, "learning_rate": 2.5557312276626163e-06, "loss": 0.4429, "step": 36710 }, { "epoch": 0.6965362874160628, "grad_norm": 1.214276010876328, "learning_rate": 2.552843619445743e-06, "loss": 0.449, "step": 36720 }, { "epoch": 0.6967259759474942, "grad_norm": 1.5746960794332927, "learning_rate": 2.549957084136277e-06, "loss": 0.3882, "step": 36730 }, { "epoch": 0.6969156644789256, "grad_norm": 1.4149377299648214, "learning_rate": 2.54707162299976e-06, "loss": 0.4426, "step": 36740 }, { "epoch": 0.697105353010357, "grad_norm": 1.6488134834591068, "learning_rate": 2.54418723730127e-06, "loss": 0.4462, "step": 36750 }, { "epoch": 0.6972950415417883, "grad_norm": 1.6203240804772445, "learning_rate": 2.541303928305404e-06, "loss": 0.4557, "step": 36760 }, { "epoch": 0.6974847300732198, "grad_norm": 1.3978046688757773, "learning_rate": 2.5384216972762966e-06, "loss": 0.4584, "step": 36770 }, { "epoch": 0.6976744186046512, "grad_norm": 1.2143544214301323, "learning_rate": 2.5355405454776035e-06, "loss": 0.4302, "step": 36780 }, { "epoch": 0.6978641071360826, "grad_norm": 1.2028634709237134, "learning_rate": 2.5326604741725058e-06, "loss": 0.4535, "step": 36790 }, { "epoch": 0.6980537956675139, "grad_norm": 1.354071446490671, "learning_rate": 2.529781484623719e-06, "loss": 0.4171, "step": 36800 }, { "epoch": 0.6982434841989453, "grad_norm": 1.5788529784050112, "learning_rate": 2.5269035780934747e-06, "loss": 0.4356, "step": 36810 }, { "epoch": 0.6984331727303768, "grad_norm": 1.6035257979700979, "learning_rate": 2.524026755843536e-06, "loss": 0.4456, "step": 36820 }, { "epoch": 0.6986228612618081, "grad_norm": 1.666977572922073, "learning_rate": 2.521151019135193e-06, "loss": 0.4804, "step": 36830 }, { "epoch": 0.6988125497932395, "grad_norm": 1.4411200613781168, "learning_rate": 2.5182763692292476e-06, "loss": 0.4301, "step": 36840 }, { "epoch": 0.6990022383246709, "grad_norm": 1.3917511919085517, "learning_rate": 2.515402807386038e-06, "loss": 0.4379, "step": 36850 }, { "epoch": 0.6991919268561023, "grad_norm": 1.5829030178959438, "learning_rate": 2.512530334865422e-06, "loss": 0.4289, "step": 36860 }, { "epoch": 0.6993816153875336, "grad_norm": 1.4836166136099296, "learning_rate": 2.5096589529267744e-06, "loss": 0.4392, "step": 36870 }, { "epoch": 0.699571303918965, "grad_norm": 1.662925079659535, "learning_rate": 2.506788662829e-06, "loss": 0.432, "step": 36880 }, { "epoch": 0.6997609924503965, "grad_norm": 1.389000782577621, "learning_rate": 2.503919465830519e-06, "loss": 0.4365, "step": 36890 }, { "epoch": 0.6999506809818279, "grad_norm": 1.364956104704819, "learning_rate": 2.5010513631892724e-06, "loss": 0.4206, "step": 36900 }, { "epoch": 0.7001403695132592, "grad_norm": 1.5146727254276902, "learning_rate": 2.4981843561627265e-06, "loss": 0.4191, "step": 36910 }, { "epoch": 0.7003300580446906, "grad_norm": 1.4398531556086367, "learning_rate": 2.495318446007861e-06, "loss": 0.4081, "step": 36920 }, { "epoch": 0.700519746576122, "grad_norm": 1.6705833950066782, "learning_rate": 2.4924536339811806e-06, "loss": 0.4405, "step": 36930 }, { "epoch": 0.7007094351075533, "grad_norm": 1.1958104491201635, "learning_rate": 2.4895899213387035e-06, "loss": 0.4487, "step": 36940 }, { "epoch": 0.7008991236389848, "grad_norm": 1.3068814128606756, "learning_rate": 2.4867273093359673e-06, "loss": 0.4327, "step": 36950 }, { "epoch": 0.7010888121704162, "grad_norm": 1.4065453952203408, "learning_rate": 2.4838657992280303e-06, "loss": 0.4347, "step": 36960 }, { "epoch": 0.7012785007018476, "grad_norm": 1.3909655701741157, "learning_rate": 2.4810053922694616e-06, "loss": 0.416, "step": 36970 }, { "epoch": 0.7014681892332789, "grad_norm": 1.3657532585767498, "learning_rate": 2.4781460897143544e-06, "loss": 0.4273, "step": 36980 }, { "epoch": 0.7016578777647103, "grad_norm": 1.3648998518983506, "learning_rate": 2.4752878928163114e-06, "loss": 0.4635, "step": 36990 }, { "epoch": 0.7018475662961418, "grad_norm": 1.4924103511258817, "learning_rate": 2.4724308028284507e-06, "loss": 0.4376, "step": 37000 }, { "epoch": 0.7020372548275732, "grad_norm": 1.372268314324241, "learning_rate": 2.4695748210034085e-06, "loss": 0.447, "step": 37010 }, { "epoch": 0.7022269433590045, "grad_norm": 1.6210273184744426, "learning_rate": 2.4667199485933364e-06, "loss": 0.4084, "step": 37020 }, { "epoch": 0.7024166318904359, "grad_norm": 1.317600845507823, "learning_rate": 2.4638661868498936e-06, "loss": 0.428, "step": 37030 }, { "epoch": 0.7026063204218673, "grad_norm": 1.285519256171338, "learning_rate": 2.461013537024257e-06, "loss": 0.4207, "step": 37040 }, { "epoch": 0.7027960089532986, "grad_norm": 1.4056770071149534, "learning_rate": 2.458162000367112e-06, "loss": 0.4424, "step": 37050 }, { "epoch": 0.70298569748473, "grad_norm": 1.460905291737663, "learning_rate": 2.455311578128661e-06, "loss": 0.4093, "step": 37060 }, { "epoch": 0.7031753860161615, "grad_norm": 1.1583399224335524, "learning_rate": 2.452462271558616e-06, "loss": 0.4427, "step": 37070 }, { "epoch": 0.7033650745475929, "grad_norm": 1.329437995658874, "learning_rate": 2.4496140819061986e-06, "loss": 0.4447, "step": 37080 }, { "epoch": 0.7035547630790242, "grad_norm": 1.6051004894424996, "learning_rate": 2.446767010420139e-06, "loss": 0.4583, "step": 37090 }, { "epoch": 0.7037444516104556, "grad_norm": 1.3764169231035506, "learning_rate": 2.443921058348683e-06, "loss": 0.436, "step": 37100 }, { "epoch": 0.703934140141887, "grad_norm": 1.4011762638558432, "learning_rate": 2.4410762269395795e-06, "loss": 0.4556, "step": 37110 }, { "epoch": 0.7041238286733185, "grad_norm": 1.5304967672085605, "learning_rate": 2.4382325174400907e-06, "loss": 0.4284, "step": 37120 }, { "epoch": 0.7043135172047498, "grad_norm": 1.591616536183991, "learning_rate": 2.435389931096982e-06, "loss": 0.4454, "step": 37130 }, { "epoch": 0.7045032057361812, "grad_norm": 1.246599079781713, "learning_rate": 2.4325484691565333e-06, "loss": 0.4268, "step": 37140 }, { "epoch": 0.7046928942676126, "grad_norm": 1.4181317421212343, "learning_rate": 2.429708132864525e-06, "loss": 0.4519, "step": 37150 }, { "epoch": 0.7048825827990439, "grad_norm": 1.4947246736508024, "learning_rate": 2.426868923466245e-06, "loss": 0.3938, "step": 37160 }, { "epoch": 0.7050722713304753, "grad_norm": 1.7414971537178086, "learning_rate": 2.424030842206493e-06, "loss": 0.4607, "step": 37170 }, { "epoch": 0.7052619598619068, "grad_norm": 1.3681074233397443, "learning_rate": 2.4211938903295647e-06, "loss": 0.4279, "step": 37180 }, { "epoch": 0.7054516483933382, "grad_norm": 1.5923976593814295, "learning_rate": 2.4183580690792694e-06, "loss": 0.4539, "step": 37190 }, { "epoch": 0.7056413369247695, "grad_norm": 1.5471516606393116, "learning_rate": 2.4155233796989163e-06, "loss": 0.4478, "step": 37200 }, { "epoch": 0.7058310254562009, "grad_norm": 1.6822776669538777, "learning_rate": 2.412689823431316e-06, "loss": 0.4278, "step": 37210 }, { "epoch": 0.7060207139876323, "grad_norm": 1.9318867519072318, "learning_rate": 2.409857401518788e-06, "loss": 0.4299, "step": 37220 }, { "epoch": 0.7062104025190638, "grad_norm": 1.3606716680005835, "learning_rate": 2.407026115203153e-06, "loss": 0.4537, "step": 37230 }, { "epoch": 0.7064000910504951, "grad_norm": 1.568649729222492, "learning_rate": 2.4041959657257313e-06, "loss": 0.4116, "step": 37240 }, { "epoch": 0.7065897795819265, "grad_norm": 2.431693215762096, "learning_rate": 2.4013669543273454e-06, "loss": 0.4296, "step": 37250 }, { "epoch": 0.7067794681133579, "grad_norm": 1.4984215536120957, "learning_rate": 2.398539082248318e-06, "loss": 0.4345, "step": 37260 }, { "epoch": 0.7069691566447892, "grad_norm": 1.3899335108170152, "learning_rate": 2.3957123507284758e-06, "loss": 0.4238, "step": 37270 }, { "epoch": 0.7071588451762206, "grad_norm": 1.6025329186359374, "learning_rate": 2.392886761007145e-06, "loss": 0.4674, "step": 37280 }, { "epoch": 0.707348533707652, "grad_norm": 1.1534634142024414, "learning_rate": 2.3900623143231482e-06, "loss": 0.4249, "step": 37290 }, { "epoch": 0.7075382222390835, "grad_norm": 1.585884508218133, "learning_rate": 2.3872390119148057e-06, "loss": 0.4627, "step": 37300 }, { "epoch": 0.7077279107705148, "grad_norm": 1.4495836566236224, "learning_rate": 2.3844168550199432e-06, "loss": 0.4352, "step": 37310 }, { "epoch": 0.7079175993019462, "grad_norm": 1.5370709004068963, "learning_rate": 2.3815958448758756e-06, "loss": 0.4095, "step": 37320 }, { "epoch": 0.7081072878333776, "grad_norm": 1.5864741838242358, "learning_rate": 2.378775982719423e-06, "loss": 0.4231, "step": 37330 }, { "epoch": 0.708296976364809, "grad_norm": 1.4206856234227694, "learning_rate": 2.375957269786896e-06, "loss": 0.4234, "step": 37340 }, { "epoch": 0.7084866648962403, "grad_norm": 1.5021480106470015, "learning_rate": 2.373139707314102e-06, "loss": 0.439, "step": 37350 }, { "epoch": 0.7086763534276718, "grad_norm": 1.413676169267906, "learning_rate": 2.37032329653635e-06, "loss": 0.4351, "step": 37360 }, { "epoch": 0.7088660419591032, "grad_norm": 1.2489767684761122, "learning_rate": 2.367508038688435e-06, "loss": 0.417, "step": 37370 }, { "epoch": 0.7090557304905345, "grad_norm": 1.3127694830224608, "learning_rate": 2.3646939350046533e-06, "loss": 0.4257, "step": 37380 }, { "epoch": 0.7092454190219659, "grad_norm": 1.2935832036630415, "learning_rate": 2.3618809867187977e-06, "loss": 0.447, "step": 37390 }, { "epoch": 0.7094351075533973, "grad_norm": 1.5690680490256634, "learning_rate": 2.359069195064141e-06, "loss": 0.4325, "step": 37400 }, { "epoch": 0.7096247960848288, "grad_norm": 1.3890338836723009, "learning_rate": 2.3562585612734647e-06, "loss": 0.3901, "step": 37410 }, { "epoch": 0.7098144846162601, "grad_norm": 1.3815761016268722, "learning_rate": 2.353449086579031e-06, "loss": 0.4216, "step": 37420 }, { "epoch": 0.7100041731476915, "grad_norm": 1.6624375450595898, "learning_rate": 2.3506407722126007e-06, "loss": 0.4332, "step": 37430 }, { "epoch": 0.7101938616791229, "grad_norm": 1.2533675291654807, "learning_rate": 2.3478336194054263e-06, "loss": 0.427, "step": 37440 }, { "epoch": 0.7103835502105542, "grad_norm": 1.4687370289200263, "learning_rate": 2.3450276293882458e-06, "loss": 0.409, "step": 37450 }, { "epoch": 0.7105732387419856, "grad_norm": 1.321690808512031, "learning_rate": 2.3422228033912907e-06, "loss": 0.4456, "step": 37460 }, { "epoch": 0.7107629272734171, "grad_norm": 1.1290651165564256, "learning_rate": 2.3394191426442793e-06, "loss": 0.4562, "step": 37470 }, { "epoch": 0.7109526158048485, "grad_norm": 1.5133411863704025, "learning_rate": 2.3366166483764236e-06, "loss": 0.4396, "step": 37480 }, { "epoch": 0.7111423043362798, "grad_norm": 1.448007149686687, "learning_rate": 2.3338153218164233e-06, "loss": 0.453, "step": 37490 }, { "epoch": 0.7113319928677112, "grad_norm": 1.43650712052939, "learning_rate": 2.3310151641924634e-06, "loss": 0.4445, "step": 37500 }, { "epoch": 0.7115216813991426, "grad_norm": 1.5758179113491542, "learning_rate": 2.3282161767322157e-06, "loss": 0.4396, "step": 37510 }, { "epoch": 0.711711369930574, "grad_norm": 1.354857162830624, "learning_rate": 2.3254183606628443e-06, "loss": 0.4077, "step": 37520 }, { "epoch": 0.7119010584620054, "grad_norm": 1.2613947911023395, "learning_rate": 2.3226217172109933e-06, "loss": 0.4199, "step": 37530 }, { "epoch": 0.7120907469934368, "grad_norm": 1.4001465708821816, "learning_rate": 2.319826247602799e-06, "loss": 0.4212, "step": 37540 }, { "epoch": 0.7122804355248682, "grad_norm": 1.5059056132097677, "learning_rate": 2.3170319530638783e-06, "loss": 0.4181, "step": 37550 }, { "epoch": 0.7124701240562995, "grad_norm": 1.4281195071897876, "learning_rate": 2.3142388348193327e-06, "loss": 0.4114, "step": 37560 }, { "epoch": 0.7126598125877309, "grad_norm": 1.6501916624577424, "learning_rate": 2.3114468940937535e-06, "loss": 0.4429, "step": 37570 }, { "epoch": 0.7128495011191623, "grad_norm": 1.396318442669214, "learning_rate": 2.308656132111208e-06, "loss": 0.4205, "step": 37580 }, { "epoch": 0.7130391896505938, "grad_norm": 1.4253263805171967, "learning_rate": 2.305866550095254e-06, "loss": 0.408, "step": 37590 }, { "epoch": 0.7132288781820251, "grad_norm": 1.5402476979162019, "learning_rate": 2.303078149268928e-06, "loss": 0.449, "step": 37600 }, { "epoch": 0.7134185667134565, "grad_norm": 1.196467525291415, "learning_rate": 2.3002909308547466e-06, "loss": 0.4268, "step": 37610 }, { "epoch": 0.7136082552448879, "grad_norm": 1.3657597784039417, "learning_rate": 2.2975048960747147e-06, "loss": 0.4422, "step": 37620 }, { "epoch": 0.7137979437763193, "grad_norm": 1.2883812939661874, "learning_rate": 2.29472004615031e-06, "loss": 0.4369, "step": 37630 }, { "epoch": 0.7139876323077506, "grad_norm": 1.4747943066316465, "learning_rate": 2.2919363823024976e-06, "loss": 0.4206, "step": 37640 }, { "epoch": 0.7141773208391821, "grad_norm": 1.7008418120723372, "learning_rate": 2.2891539057517235e-06, "loss": 0.4519, "step": 37650 }, { "epoch": 0.7143670093706135, "grad_norm": 1.4754209557042273, "learning_rate": 2.286372617717902e-06, "loss": 0.431, "step": 37660 }, { "epoch": 0.7145566979020448, "grad_norm": 1.6050451627823161, "learning_rate": 2.2835925194204376e-06, "loss": 0.4618, "step": 37670 }, { "epoch": 0.7147463864334762, "grad_norm": 1.4878393163288617, "learning_rate": 2.2808136120782117e-06, "loss": 0.4284, "step": 37680 }, { "epoch": 0.7149360749649076, "grad_norm": 1.3757005643152558, "learning_rate": 2.278035896909577e-06, "loss": 0.4279, "step": 37690 }, { "epoch": 0.715125763496339, "grad_norm": 1.5463831518553806, "learning_rate": 2.2752593751323736e-06, "loss": 0.4256, "step": 37700 }, { "epoch": 0.7153154520277704, "grad_norm": 1.5244190367715642, "learning_rate": 2.272484047963906e-06, "loss": 0.4226, "step": 37710 }, { "epoch": 0.7155051405592018, "grad_norm": 1.2476217534090615, "learning_rate": 2.2697099166209654e-06, "loss": 0.4062, "step": 37720 }, { "epoch": 0.7156948290906332, "grad_norm": 1.7687012357603737, "learning_rate": 2.266936982319815e-06, "loss": 0.4183, "step": 37730 }, { "epoch": 0.7158845176220646, "grad_norm": 1.2451397689523525, "learning_rate": 2.2641652462761914e-06, "loss": 0.4553, "step": 37740 }, { "epoch": 0.7160742061534959, "grad_norm": 1.5222579006178762, "learning_rate": 2.26139470970531e-06, "loss": 0.4225, "step": 37750 }, { "epoch": 0.7162638946849273, "grad_norm": 1.542811441699248, "learning_rate": 2.2586253738218565e-06, "loss": 0.4255, "step": 37760 }, { "epoch": 0.7164535832163588, "grad_norm": 1.3274284987519112, "learning_rate": 2.25585723983999e-06, "loss": 0.418, "step": 37770 }, { "epoch": 0.7166432717477901, "grad_norm": 1.3084725329636775, "learning_rate": 2.2530903089733477e-06, "loss": 0.4173, "step": 37780 }, { "epoch": 0.7168329602792215, "grad_norm": 1.6148651652491344, "learning_rate": 2.2503245824350318e-06, "loss": 0.455, "step": 37790 }, { "epoch": 0.7170226488106529, "grad_norm": 1.5059681102204847, "learning_rate": 2.247560061437624e-06, "loss": 0.4316, "step": 37800 }, { "epoch": 0.7172123373420843, "grad_norm": 1.4434582821238988, "learning_rate": 2.244796747193173e-06, "loss": 0.4424, "step": 37810 }, { "epoch": 0.7174020258735156, "grad_norm": 1.556336348794247, "learning_rate": 2.242034640913197e-06, "loss": 0.4639, "step": 37820 }, { "epoch": 0.7175917144049471, "grad_norm": 1.5770258613529082, "learning_rate": 2.2392737438086903e-06, "loss": 0.4191, "step": 37830 }, { "epoch": 0.7177814029363785, "grad_norm": 1.2930171124933436, "learning_rate": 2.2365140570901105e-06, "loss": 0.4285, "step": 37840 }, { "epoch": 0.7179710914678099, "grad_norm": 1.8181587324801716, "learning_rate": 2.233755581967392e-06, "loss": 0.4448, "step": 37850 }, { "epoch": 0.7181607799992412, "grad_norm": 1.2085854344662503, "learning_rate": 2.2309983196499308e-06, "loss": 0.4597, "step": 37860 }, { "epoch": 0.7183504685306726, "grad_norm": 1.4287613626027826, "learning_rate": 2.228242271346593e-06, "loss": 0.4397, "step": 37870 }, { "epoch": 0.7185401570621041, "grad_norm": 1.97335538280673, "learning_rate": 2.2254874382657156e-06, "loss": 0.4397, "step": 37880 }, { "epoch": 0.7187298455935354, "grad_norm": 1.4834877292971458, "learning_rate": 2.222733821615101e-06, "loss": 0.4271, "step": 37890 }, { "epoch": 0.7189195341249668, "grad_norm": 1.7844818125028838, "learning_rate": 2.219981422602019e-06, "loss": 0.4051, "step": 37900 }, { "epoch": 0.7191092226563982, "grad_norm": 1.5548474352335133, "learning_rate": 2.2172302424332022e-06, "loss": 0.421, "step": 37910 }, { "epoch": 0.7192989111878296, "grad_norm": 1.3100649767439305, "learning_rate": 2.21448028231485e-06, "loss": 0.4346, "step": 37920 }, { "epoch": 0.7194885997192609, "grad_norm": 1.4511116446539232, "learning_rate": 2.211731543452631e-06, "loss": 0.4388, "step": 37930 }, { "epoch": 0.7196782882506924, "grad_norm": 1.6053083379891588, "learning_rate": 2.2089840270516768e-06, "loss": 0.4222, "step": 37940 }, { "epoch": 0.7198679767821238, "grad_norm": 1.4894830828224346, "learning_rate": 2.206237734316577e-06, "loss": 0.4333, "step": 37950 }, { "epoch": 0.7200576653135552, "grad_norm": 1.2525211447814748, "learning_rate": 2.203492666451394e-06, "loss": 0.453, "step": 37960 }, { "epoch": 0.7202473538449865, "grad_norm": 1.5522137717422237, "learning_rate": 2.200748824659647e-06, "loss": 0.4365, "step": 37970 }, { "epoch": 0.7204370423764179, "grad_norm": 1.375385443438512, "learning_rate": 2.1980062101443173e-06, "loss": 0.4443, "step": 37980 }, { "epoch": 0.7206267309078493, "grad_norm": 1.4893309470797778, "learning_rate": 2.1952648241078534e-06, "loss": 0.4641, "step": 37990 }, { "epoch": 0.7208164194392807, "grad_norm": 1.5079409760168305, "learning_rate": 2.1925246677521585e-06, "loss": 0.4402, "step": 38000 }, { "epoch": 0.7210061079707121, "grad_norm": 1.4173879461994852, "learning_rate": 2.1897857422786035e-06, "loss": 0.4412, "step": 38010 }, { "epoch": 0.7211957965021435, "grad_norm": 1.2507983343500382, "learning_rate": 2.1870480488880147e-06, "loss": 0.4437, "step": 38020 }, { "epoch": 0.7213854850335749, "grad_norm": 1.3728842139394857, "learning_rate": 2.1843115887806778e-06, "loss": 0.4093, "step": 38030 }, { "epoch": 0.7215751735650062, "grad_norm": 1.4000768741371459, "learning_rate": 2.1815763631563414e-06, "loss": 0.3943, "step": 38040 }, { "epoch": 0.7217648620964376, "grad_norm": 1.6061174218768226, "learning_rate": 2.1788423732142126e-06, "loss": 0.4449, "step": 38050 }, { "epoch": 0.7219545506278691, "grad_norm": 1.434732700625126, "learning_rate": 2.1761096201529546e-06, "loss": 0.4589, "step": 38060 }, { "epoch": 0.7221442391593005, "grad_norm": 1.367122977415537, "learning_rate": 2.173378105170688e-06, "loss": 0.4458, "step": 38070 }, { "epoch": 0.7223339276907318, "grad_norm": 1.276508561679422, "learning_rate": 2.170647829464991e-06, "loss": 0.4318, "step": 38080 }, { "epoch": 0.7225236162221632, "grad_norm": 1.147490658213073, "learning_rate": 2.1679187942328996e-06, "loss": 0.413, "step": 38090 }, { "epoch": 0.7227133047535946, "grad_norm": 1.4093348628103741, "learning_rate": 2.1651910006709077e-06, "loss": 0.4401, "step": 38100 }, { "epoch": 0.7229029932850259, "grad_norm": 1.4098434103791508, "learning_rate": 2.162464449974961e-06, "loss": 0.4549, "step": 38110 }, { "epoch": 0.7230926818164574, "grad_norm": 1.5712579690541264, "learning_rate": 2.159739143340461e-06, "loss": 0.4308, "step": 38120 }, { "epoch": 0.7232823703478888, "grad_norm": 1.601908769769666, "learning_rate": 2.1570150819622635e-06, "loss": 0.4041, "step": 38130 }, { "epoch": 0.7234720588793202, "grad_norm": 1.535880846644856, "learning_rate": 2.154292267034682e-06, "loss": 0.4432, "step": 38140 }, { "epoch": 0.7236617474107515, "grad_norm": 1.4734707636853865, "learning_rate": 2.1515706997514813e-06, "loss": 0.4336, "step": 38150 }, { "epoch": 0.7238514359421829, "grad_norm": 1.3795154959816296, "learning_rate": 2.148850381305878e-06, "loss": 0.4297, "step": 38160 }, { "epoch": 0.7240411244736144, "grad_norm": 1.5619449113623851, "learning_rate": 2.1461313128905393e-06, "loss": 0.4123, "step": 38170 }, { "epoch": 0.7242308130050458, "grad_norm": 1.6863524532620484, "learning_rate": 2.143413495697592e-06, "loss": 0.4342, "step": 38180 }, { "epoch": 0.7244205015364771, "grad_norm": 1.7541006977728186, "learning_rate": 2.1406969309186045e-06, "loss": 0.4679, "step": 38190 }, { "epoch": 0.7246101900679085, "grad_norm": 1.4273727365361288, "learning_rate": 2.137981619744605e-06, "loss": 0.4141, "step": 38200 }, { "epoch": 0.7247998785993399, "grad_norm": 1.385116213092878, "learning_rate": 2.1352675633660668e-06, "loss": 0.4386, "step": 38210 }, { "epoch": 0.7249895671307712, "grad_norm": 1.1609804743483485, "learning_rate": 2.132554762972912e-06, "loss": 0.4139, "step": 38220 }, { "epoch": 0.7251792556622026, "grad_norm": 1.2103021460879033, "learning_rate": 2.1298432197545184e-06, "loss": 0.4339, "step": 38230 }, { "epoch": 0.7253689441936341, "grad_norm": 1.44904989380221, "learning_rate": 2.1271329348997046e-06, "loss": 0.4412, "step": 38240 }, { "epoch": 0.7255586327250655, "grad_norm": 7.748902632291573, "learning_rate": 2.124423909596743e-06, "loss": 0.4484, "step": 38250 }, { "epoch": 0.7257483212564968, "grad_norm": 1.4247425079914158, "learning_rate": 2.1217161450333544e-06, "loss": 0.4433, "step": 38260 }, { "epoch": 0.7259380097879282, "grad_norm": 1.693265410788799, "learning_rate": 2.1190096423967036e-06, "loss": 0.4398, "step": 38270 }, { "epoch": 0.7261276983193596, "grad_norm": 1.334632615713144, "learning_rate": 2.1163044028734024e-06, "loss": 0.4107, "step": 38280 }, { "epoch": 0.726317386850791, "grad_norm": 1.4996819782368855, "learning_rate": 2.1136004276495086e-06, "loss": 0.4274, "step": 38290 }, { "epoch": 0.7265070753822224, "grad_norm": 1.569596122058885, "learning_rate": 2.110897717910528e-06, "loss": 0.4538, "step": 38300 }, { "epoch": 0.7266967639136538, "grad_norm": 1.592150718731856, "learning_rate": 2.108196274841412e-06, "loss": 0.4274, "step": 38310 }, { "epoch": 0.7268864524450852, "grad_norm": 1.7481915757895514, "learning_rate": 2.105496099626553e-06, "loss": 0.4561, "step": 38320 }, { "epoch": 0.7270761409765165, "grad_norm": 1.3030807631393317, "learning_rate": 2.1027971934497888e-06, "loss": 0.4275, "step": 38330 }, { "epoch": 0.7272658295079479, "grad_norm": 1.5505979499582676, "learning_rate": 2.100099557494404e-06, "loss": 0.4238, "step": 38340 }, { "epoch": 0.7274555180393794, "grad_norm": 1.3669103686360382, "learning_rate": 2.097403192943121e-06, "loss": 0.4366, "step": 38350 }, { "epoch": 0.7276452065708108, "grad_norm": 1.4145276439410759, "learning_rate": 2.0947081009781106e-06, "loss": 0.4066, "step": 38360 }, { "epoch": 0.7278348951022421, "grad_norm": 1.3577861767253108, "learning_rate": 2.0920142827809813e-06, "loss": 0.4179, "step": 38370 }, { "epoch": 0.7280245836336735, "grad_norm": 1.5483106208078234, "learning_rate": 2.089321739532783e-06, "loss": 0.4421, "step": 38380 }, { "epoch": 0.7282142721651049, "grad_norm": 1.311053768065097, "learning_rate": 2.086630472414011e-06, "loss": 0.4339, "step": 38390 }, { "epoch": 0.7284039606965362, "grad_norm": 1.6133817037827636, "learning_rate": 2.083940482604595e-06, "loss": 0.4398, "step": 38400 }, { "epoch": 0.7285936492279677, "grad_norm": 1.0959419333389064, "learning_rate": 2.0812517712839114e-06, "loss": 0.4263, "step": 38410 }, { "epoch": 0.7287833377593991, "grad_norm": 1.7703875957168116, "learning_rate": 2.078564339630771e-06, "loss": 0.4347, "step": 38420 }, { "epoch": 0.7289730262908305, "grad_norm": 1.3465466291802155, "learning_rate": 2.075878188823423e-06, "loss": 0.4349, "step": 38430 }, { "epoch": 0.7291627148222618, "grad_norm": 1.3377953269564427, "learning_rate": 2.0731933200395613e-06, "loss": 0.4359, "step": 38440 }, { "epoch": 0.7293524033536932, "grad_norm": 1.547130634905378, "learning_rate": 2.07050973445631e-06, "loss": 0.4385, "step": 38450 }, { "epoch": 0.7295420918851246, "grad_norm": 1.4271895208216239, "learning_rate": 2.0678274332502375e-06, "loss": 0.4312, "step": 38460 }, { "epoch": 0.7297317804165561, "grad_norm": 1.6584176675617865, "learning_rate": 2.0651464175973442e-06, "loss": 0.4391, "step": 38470 }, { "epoch": 0.7299214689479874, "grad_norm": 1.2640248657227426, "learning_rate": 2.062466688673066e-06, "loss": 0.4434, "step": 38480 }, { "epoch": 0.7301111574794188, "grad_norm": 1.6398718183077268, "learning_rate": 2.059788247652282e-06, "loss": 0.4209, "step": 38490 }, { "epoch": 0.7303008460108502, "grad_norm": 1.515278920934435, "learning_rate": 2.0571110957092972e-06, "loss": 0.4346, "step": 38500 }, { "epoch": 0.7304905345422815, "grad_norm": 1.6640621086616512, "learning_rate": 2.054435234017858e-06, "loss": 0.4307, "step": 38510 }, { "epoch": 0.7306802230737129, "grad_norm": 1.2530399553980178, "learning_rate": 2.0517606637511472e-06, "loss": 0.4231, "step": 38520 }, { "epoch": 0.7308699116051444, "grad_norm": 1.4294475564211335, "learning_rate": 2.0490873860817694e-06, "loss": 0.4224, "step": 38530 }, { "epoch": 0.7310596001365758, "grad_norm": 1.2541983205098175, "learning_rate": 2.046415402181775e-06, "loss": 0.4246, "step": 38540 }, { "epoch": 0.7312492886680071, "grad_norm": 1.4298027268419398, "learning_rate": 2.0437447132226434e-06, "loss": 0.4265, "step": 38550 }, { "epoch": 0.7314389771994385, "grad_norm": 1.4361100210387439, "learning_rate": 2.0410753203752827e-06, "loss": 0.3983, "step": 38560 }, { "epoch": 0.7316286657308699, "grad_norm": 1.296156304405761, "learning_rate": 2.0384072248100386e-06, "loss": 0.4314, "step": 38570 }, { "epoch": 0.7318183542623014, "grad_norm": 1.3624804178636578, "learning_rate": 2.035740427696683e-06, "loss": 0.4396, "step": 38580 }, { "epoch": 0.7320080427937327, "grad_norm": 1.3672786528296779, "learning_rate": 2.033074930204419e-06, "loss": 0.4482, "step": 38590 }, { "epoch": 0.7321977313251641, "grad_norm": 1.721308159762764, "learning_rate": 2.0304107335018847e-06, "loss": 0.4388, "step": 38600 }, { "epoch": 0.7323874198565955, "grad_norm": 1.3126957417233818, "learning_rate": 2.027747838757141e-06, "loss": 0.4382, "step": 38610 }, { "epoch": 0.7325771083880268, "grad_norm": 1.1804705073243935, "learning_rate": 2.0250862471376853e-06, "loss": 0.426, "step": 38620 }, { "epoch": 0.7327667969194582, "grad_norm": 1.3187522415401427, "learning_rate": 2.0224259598104383e-06, "loss": 0.4666, "step": 38630 }, { "epoch": 0.7329564854508896, "grad_norm": 1.3011272948461108, "learning_rate": 2.0197669779417483e-06, "loss": 0.4431, "step": 38640 }, { "epoch": 0.7331461739823211, "grad_norm": 1.411010247184846, "learning_rate": 2.017109302697397e-06, "loss": 0.4571, "step": 38650 }, { "epoch": 0.7333358625137524, "grad_norm": 1.373641655086551, "learning_rate": 2.0144529352425864e-06, "loss": 0.397, "step": 38660 }, { "epoch": 0.7335255510451838, "grad_norm": 1.5444085048300065, "learning_rate": 2.011797876741951e-06, "loss": 0.4313, "step": 38670 }, { "epoch": 0.7337152395766152, "grad_norm": 1.4294238601651905, "learning_rate": 2.0091441283595485e-06, "loss": 0.4343, "step": 38680 }, { "epoch": 0.7339049281080466, "grad_norm": 1.8136034938812013, "learning_rate": 2.0064916912588594e-06, "loss": 0.4209, "step": 38690 }, { "epoch": 0.734094616639478, "grad_norm": 1.6168683948744815, "learning_rate": 2.003840566602795e-06, "loss": 0.422, "step": 38700 }, { "epoch": 0.7342843051709094, "grad_norm": 1.7701687029973836, "learning_rate": 2.00119075555369e-06, "loss": 0.4237, "step": 38710 }, { "epoch": 0.7344739937023408, "grad_norm": 1.473947279594919, "learning_rate": 1.9985422592732995e-06, "loss": 0.4311, "step": 38720 }, { "epoch": 0.7346636822337721, "grad_norm": 1.458260572223085, "learning_rate": 1.9958950789228055e-06, "loss": 0.4178, "step": 38730 }, { "epoch": 0.7348533707652035, "grad_norm": 1.359509585786513, "learning_rate": 1.9932492156628094e-06, "loss": 0.4317, "step": 38740 }, { "epoch": 0.7350430592966349, "grad_norm": 1.4863434174216075, "learning_rate": 1.9906046706533392e-06, "loss": 0.4429, "step": 38750 }, { "epoch": 0.7352327478280664, "grad_norm": 1.5017490649676697, "learning_rate": 1.9879614450538464e-06, "loss": 0.4149, "step": 38760 }, { "epoch": 0.7354224363594977, "grad_norm": 1.358939343964874, "learning_rate": 1.9853195400231986e-06, "loss": 0.437, "step": 38770 }, { "epoch": 0.7356121248909291, "grad_norm": 1.3372541228708292, "learning_rate": 1.982678956719687e-06, "loss": 0.4399, "step": 38780 }, { "epoch": 0.7358018134223605, "grad_norm": 1.4603027983665344, "learning_rate": 1.980039696301021e-06, "loss": 0.4275, "step": 38790 }, { "epoch": 0.7359915019537919, "grad_norm": 1.6140396565157553, "learning_rate": 1.9774017599243345e-06, "loss": 0.4328, "step": 38800 }, { "epoch": 0.7361811904852232, "grad_norm": 1.7916001361041787, "learning_rate": 1.9747651487461805e-06, "loss": 0.4645, "step": 38810 }, { "epoch": 0.7363708790166547, "grad_norm": 1.3925122019529894, "learning_rate": 1.972129863922525e-06, "loss": 0.4129, "step": 38820 }, { "epoch": 0.7365605675480861, "grad_norm": 1.316469103715336, "learning_rate": 1.9694959066087606e-06, "loss": 0.4244, "step": 38830 }, { "epoch": 0.7367502560795174, "grad_norm": 1.5836088097248706, "learning_rate": 1.9668632779596924e-06, "loss": 0.4465, "step": 38840 }, { "epoch": 0.7369399446109488, "grad_norm": 1.3483852651476211, "learning_rate": 1.9642319791295417e-06, "loss": 0.4287, "step": 38850 }, { "epoch": 0.7371296331423802, "grad_norm": 1.4326402571018102, "learning_rate": 1.961602011271954e-06, "loss": 0.4487, "step": 38860 }, { "epoch": 0.7373193216738116, "grad_norm": 1.4392781735590319, "learning_rate": 1.958973375539983e-06, "loss": 0.4625, "step": 38870 }, { "epoch": 0.737509010205243, "grad_norm": 1.9469248900679008, "learning_rate": 1.956346073086106e-06, "loss": 0.4465, "step": 38880 }, { "epoch": 0.7376986987366744, "grad_norm": 1.4829091796777147, "learning_rate": 1.9537201050622095e-06, "loss": 0.4328, "step": 38890 }, { "epoch": 0.7378883872681058, "grad_norm": 1.5488491338358894, "learning_rate": 1.9510954726195956e-06, "loss": 0.4243, "step": 38900 }, { "epoch": 0.7380780757995372, "grad_norm": 1.1307040301751718, "learning_rate": 1.948472176908985e-06, "loss": 0.428, "step": 38910 }, { "epoch": 0.7382677643309685, "grad_norm": 1.2314475273760492, "learning_rate": 1.945850219080511e-06, "loss": 0.4311, "step": 38920 }, { "epoch": 0.7384574528623999, "grad_norm": 1.3711404455723422, "learning_rate": 1.9432296002837176e-06, "loss": 0.412, "step": 38930 }, { "epoch": 0.7386471413938314, "grad_norm": 1.1180335295473731, "learning_rate": 1.9406103216675638e-06, "loss": 0.407, "step": 38940 }, { "epoch": 0.7388368299252627, "grad_norm": 1.428161803642926, "learning_rate": 1.937992384380419e-06, "loss": 0.4144, "step": 38950 }, { "epoch": 0.7390265184566941, "grad_norm": 1.386275910213571, "learning_rate": 1.9353757895700666e-06, "loss": 0.4123, "step": 38960 }, { "epoch": 0.7392162069881255, "grad_norm": 1.8480465124410057, "learning_rate": 1.932760538383704e-06, "loss": 0.4594, "step": 38970 }, { "epoch": 0.7394058955195569, "grad_norm": 1.3064934260947263, "learning_rate": 1.930146631967934e-06, "loss": 0.4425, "step": 38980 }, { "epoch": 0.7395955840509882, "grad_norm": 1.2692290853247703, "learning_rate": 1.927534071468771e-06, "loss": 0.4239, "step": 38990 }, { "epoch": 0.7397852725824197, "grad_norm": 1.3947227644476679, "learning_rate": 1.9249228580316444e-06, "loss": 0.4347, "step": 39000 }, { "epoch": 0.7399749611138511, "grad_norm": 1.5397624366060103, "learning_rate": 1.922312992801384e-06, "loss": 0.4468, "step": 39010 }, { "epoch": 0.7401646496452824, "grad_norm": 0.9433871181318709, "learning_rate": 1.919704476922239e-06, "loss": 0.3954, "step": 39020 }, { "epoch": 0.7403543381767138, "grad_norm": 1.4302993557850938, "learning_rate": 1.9170973115378595e-06, "loss": 0.4386, "step": 39030 }, { "epoch": 0.7405440267081452, "grad_norm": 1.5409987739719313, "learning_rate": 1.9144914977913033e-06, "loss": 0.427, "step": 39040 }, { "epoch": 0.7407337152395767, "grad_norm": 1.4949861925150407, "learning_rate": 1.911887036825042e-06, "loss": 0.4532, "step": 39050 }, { "epoch": 0.740923403771008, "grad_norm": 1.4902794770798073, "learning_rate": 1.9092839297809463e-06, "loss": 0.4446, "step": 39060 }, { "epoch": 0.7411130923024394, "grad_norm": 1.6736625065247588, "learning_rate": 1.9066821778003014e-06, "loss": 0.4528, "step": 39070 }, { "epoch": 0.7413027808338708, "grad_norm": 1.4846163449932395, "learning_rate": 1.9040817820237911e-06, "loss": 0.4352, "step": 39080 }, { "epoch": 0.7414924693653022, "grad_norm": 1.4972745183746399, "learning_rate": 1.9014827435915067e-06, "loss": 0.4266, "step": 39090 }, { "epoch": 0.7416821578967335, "grad_norm": 1.506202727846336, "learning_rate": 1.898885063642949e-06, "loss": 0.4181, "step": 39100 }, { "epoch": 0.741871846428165, "grad_norm": 1.496658000942081, "learning_rate": 1.896288743317015e-06, "loss": 0.4213, "step": 39110 }, { "epoch": 0.7420615349595964, "grad_norm": 1.2669256785955478, "learning_rate": 1.893693783752013e-06, "loss": 0.4323, "step": 39120 }, { "epoch": 0.7422512234910277, "grad_norm": 1.5816529140998357, "learning_rate": 1.8911001860856526e-06, "loss": 0.4079, "step": 39130 }, { "epoch": 0.7424409120224591, "grad_norm": 1.5541014689588462, "learning_rate": 1.8885079514550443e-06, "loss": 0.4309, "step": 39140 }, { "epoch": 0.7426306005538905, "grad_norm": 1.5521448387285297, "learning_rate": 1.885917080996702e-06, "loss": 0.4484, "step": 39150 }, { "epoch": 0.7428202890853219, "grad_norm": 1.3833426193284575, "learning_rate": 1.8833275758465402e-06, "loss": 0.4254, "step": 39160 }, { "epoch": 0.7430099776167532, "grad_norm": 1.4374155062885028, "learning_rate": 1.8807394371398774e-06, "loss": 0.4409, "step": 39170 }, { "epoch": 0.7431996661481847, "grad_norm": 1.5254233327015136, "learning_rate": 1.878152666011434e-06, "loss": 0.4123, "step": 39180 }, { "epoch": 0.7433893546796161, "grad_norm": 1.5264497911108728, "learning_rate": 1.8755672635953276e-06, "loss": 0.4279, "step": 39190 }, { "epoch": 0.7435790432110475, "grad_norm": 1.6187036401743586, "learning_rate": 1.8729832310250745e-06, "loss": 0.4577, "step": 39200 }, { "epoch": 0.7437687317424788, "grad_norm": 1.8256135574784758, "learning_rate": 1.8704005694335963e-06, "loss": 0.4049, "step": 39210 }, { "epoch": 0.7439584202739102, "grad_norm": 1.3719785421522208, "learning_rate": 1.867819279953207e-06, "loss": 0.4172, "step": 39220 }, { "epoch": 0.7441481088053417, "grad_norm": 1.3436480965211524, "learning_rate": 1.8652393637156246e-06, "loss": 0.4364, "step": 39230 }, { "epoch": 0.744337797336773, "grad_norm": 1.3348767412815103, "learning_rate": 1.862660821851961e-06, "loss": 0.4223, "step": 39240 }, { "epoch": 0.7445274858682044, "grad_norm": 1.6537391978793528, "learning_rate": 1.860083655492726e-06, "loss": 0.4454, "step": 39250 }, { "epoch": 0.7447171743996358, "grad_norm": 1.671065397902563, "learning_rate": 1.8575078657678297e-06, "loss": 0.4138, "step": 39260 }, { "epoch": 0.7449068629310672, "grad_norm": 1.338391925302713, "learning_rate": 1.854933453806574e-06, "loss": 0.4543, "step": 39270 }, { "epoch": 0.7450965514624985, "grad_norm": 1.3122771426297888, "learning_rate": 1.8523604207376611e-06, "loss": 0.4451, "step": 39280 }, { "epoch": 0.74528623999393, "grad_norm": 1.4646867042317977, "learning_rate": 1.8497887676891857e-06, "loss": 0.3996, "step": 39290 }, { "epoch": 0.7454759285253614, "grad_norm": 1.3991380929769344, "learning_rate": 1.8472184957886363e-06, "loss": 0.4543, "step": 39300 }, { "epoch": 0.7456656170567928, "grad_norm": 1.6393820206621739, "learning_rate": 1.8446496061629016e-06, "loss": 0.424, "step": 39310 }, { "epoch": 0.7458553055882241, "grad_norm": 1.0924231634524295, "learning_rate": 1.8420820999382566e-06, "loss": 0.43, "step": 39320 }, { "epoch": 0.7460449941196555, "grad_norm": 1.3154176897248084, "learning_rate": 1.839515978240376e-06, "loss": 0.4483, "step": 39330 }, { "epoch": 0.746234682651087, "grad_norm": 1.6205925887257784, "learning_rate": 1.8369512421943287e-06, "loss": 0.4406, "step": 39340 }, { "epoch": 0.7464243711825183, "grad_norm": 2.079027144876206, "learning_rate": 1.8343878929245651e-06, "loss": 0.4442, "step": 39350 }, { "epoch": 0.7466140597139497, "grad_norm": 1.3646124481722026, "learning_rate": 1.8318259315549403e-06, "loss": 0.4219, "step": 39360 }, { "epoch": 0.7468037482453811, "grad_norm": 1.2085635147388387, "learning_rate": 1.8292653592086928e-06, "loss": 0.4025, "step": 39370 }, { "epoch": 0.7469934367768125, "grad_norm": 1.4426198560470365, "learning_rate": 1.826706177008456e-06, "loss": 0.4146, "step": 39380 }, { "epoch": 0.7471831253082438, "grad_norm": 1.3295939948280946, "learning_rate": 1.8241483860762571e-06, "loss": 0.4127, "step": 39390 }, { "epoch": 0.7473728138396752, "grad_norm": 1.3898505192452604, "learning_rate": 1.821591987533502e-06, "loss": 0.4175, "step": 39400 }, { "epoch": 0.7475625023711067, "grad_norm": 1.5863275999625392, "learning_rate": 1.8190369825009956e-06, "loss": 0.4025, "step": 39410 }, { "epoch": 0.7477521909025381, "grad_norm": 1.5098445851650688, "learning_rate": 1.8164833720989323e-06, "loss": 0.4421, "step": 39420 }, { "epoch": 0.7479418794339694, "grad_norm": 1.584349046130134, "learning_rate": 1.8139311574468888e-06, "loss": 0.4269, "step": 39430 }, { "epoch": 0.7481315679654008, "grad_norm": 1.508151853896329, "learning_rate": 1.8113803396638353e-06, "loss": 0.4395, "step": 39440 }, { "epoch": 0.7483212564968322, "grad_norm": 1.543931557757243, "learning_rate": 1.808830919868127e-06, "loss": 0.4363, "step": 39450 }, { "epoch": 0.7485109450282635, "grad_norm": 1.766436462691068, "learning_rate": 1.806282899177505e-06, "loss": 0.4333, "step": 39460 }, { "epoch": 0.748700633559695, "grad_norm": 1.0872293522923255, "learning_rate": 1.8037362787091011e-06, "loss": 0.4244, "step": 39470 }, { "epoch": 0.7488903220911264, "grad_norm": 1.5803086103842374, "learning_rate": 1.8011910595794284e-06, "loss": 0.4364, "step": 39480 }, { "epoch": 0.7490800106225578, "grad_norm": 1.4665821157042596, "learning_rate": 1.7986472429043911e-06, "loss": 0.4257, "step": 39490 }, { "epoch": 0.7492696991539891, "grad_norm": 1.459664958357735, "learning_rate": 1.7961048297992733e-06, "loss": 0.3981, "step": 39500 }, { "epoch": 0.7494593876854205, "grad_norm": 1.2987999271500317, "learning_rate": 1.7935638213787449e-06, "loss": 0.4369, "step": 39510 }, { "epoch": 0.749649076216852, "grad_norm": 1.2908329446848876, "learning_rate": 1.7910242187568632e-06, "loss": 0.3991, "step": 39520 }, { "epoch": 0.7498387647482834, "grad_norm": 1.3092594829730875, "learning_rate": 1.788486023047064e-06, "loss": 0.439, "step": 39530 }, { "epoch": 0.7500284532797147, "grad_norm": 1.4058902588583062, "learning_rate": 1.7859492353621732e-06, "loss": 0.4097, "step": 39540 }, { "epoch": 0.7502181418111461, "grad_norm": 1.5491807818247483, "learning_rate": 1.7834138568143928e-06, "loss": 0.395, "step": 39550 }, { "epoch": 0.7504078303425775, "grad_norm": 1.4679468374115463, "learning_rate": 1.7808798885153072e-06, "loss": 0.432, "step": 39560 }, { "epoch": 0.7505975188740088, "grad_norm": 1.6663156795496354, "learning_rate": 1.7783473315758882e-06, "loss": 0.4547, "step": 39570 }, { "epoch": 0.7507872074054402, "grad_norm": 1.3600532328022297, "learning_rate": 1.7758161871064856e-06, "loss": 0.4426, "step": 39580 }, { "epoch": 0.7509768959368717, "grad_norm": 1.655190837327343, "learning_rate": 1.7732864562168284e-06, "loss": 0.4788, "step": 39590 }, { "epoch": 0.7511665844683031, "grad_norm": 1.6659128021010359, "learning_rate": 1.7707581400160274e-06, "loss": 0.4425, "step": 39600 }, { "epoch": 0.7513562729997344, "grad_norm": 1.4088277696958058, "learning_rate": 1.7682312396125706e-06, "loss": 0.4255, "step": 39610 }, { "epoch": 0.7515459615311658, "grad_norm": 1.5672840060420052, "learning_rate": 1.7657057561143304e-06, "loss": 0.4517, "step": 39620 }, { "epoch": 0.7517356500625972, "grad_norm": 1.2855486165981416, "learning_rate": 1.763181690628556e-06, "loss": 0.417, "step": 39630 }, { "epoch": 0.7519253385940287, "grad_norm": 1.171903638291201, "learning_rate": 1.7606590442618715e-06, "loss": 0.3957, "step": 39640 }, { "epoch": 0.75211502712546, "grad_norm": 1.4997892701404232, "learning_rate": 1.7581378181202835e-06, "loss": 0.4468, "step": 39650 }, { "epoch": 0.7523047156568914, "grad_norm": 1.38453059014203, "learning_rate": 1.7556180133091732e-06, "loss": 0.4177, "step": 39660 }, { "epoch": 0.7524944041883228, "grad_norm": 1.391556470591728, "learning_rate": 1.7530996309332977e-06, "loss": 0.4266, "step": 39670 }, { "epoch": 0.7526840927197541, "grad_norm": 1.2380397422054576, "learning_rate": 1.7505826720967945e-06, "loss": 0.4451, "step": 39680 }, { "epoch": 0.7528737812511855, "grad_norm": 1.340279621591674, "learning_rate": 1.7480671379031721e-06, "loss": 0.412, "step": 39690 }, { "epoch": 0.753063469782617, "grad_norm": 1.3787311998205731, "learning_rate": 1.7455530294553192e-06, "loss": 0.4349, "step": 39700 }, { "epoch": 0.7532531583140484, "grad_norm": 1.5057803331523207, "learning_rate": 1.7430403478554963e-06, "loss": 0.4337, "step": 39710 }, { "epoch": 0.7534428468454797, "grad_norm": 1.5565831076603505, "learning_rate": 1.7405290942053366e-06, "loss": 0.4213, "step": 39720 }, { "epoch": 0.7536325353769111, "grad_norm": 1.4657545170874877, "learning_rate": 1.738019269605854e-06, "loss": 0.4639, "step": 39730 }, { "epoch": 0.7538222239083425, "grad_norm": 1.499148266884659, "learning_rate": 1.7355108751574267e-06, "loss": 0.4471, "step": 39740 }, { "epoch": 0.754011912439774, "grad_norm": 1.9203988361621567, "learning_rate": 1.7330039119598157e-06, "loss": 0.4228, "step": 39750 }, { "epoch": 0.7542016009712053, "grad_norm": 1.45436551210172, "learning_rate": 1.7304983811121467e-06, "loss": 0.4155, "step": 39760 }, { "epoch": 0.7543912895026367, "grad_norm": 1.5315537575512594, "learning_rate": 1.7279942837129188e-06, "loss": 0.4298, "step": 39770 }, { "epoch": 0.7545809780340681, "grad_norm": 1.9299559492250988, "learning_rate": 1.7254916208600064e-06, "loss": 0.4201, "step": 39780 }, { "epoch": 0.7547706665654994, "grad_norm": 1.9243640457333937, "learning_rate": 1.7229903936506537e-06, "loss": 0.4547, "step": 39790 }, { "epoch": 0.7549603550969308, "grad_norm": 1.6586368936317823, "learning_rate": 1.7204906031814728e-06, "loss": 0.4462, "step": 39800 }, { "epoch": 0.7551500436283622, "grad_norm": 1.4230048822012062, "learning_rate": 1.7179922505484476e-06, "loss": 0.4366, "step": 39810 }, { "epoch": 0.7553397321597937, "grad_norm": 1.6993088127468514, "learning_rate": 1.7154953368469291e-06, "loss": 0.4416, "step": 39820 }, { "epoch": 0.755529420691225, "grad_norm": 1.3433235537333843, "learning_rate": 1.7129998631716427e-06, "loss": 0.4435, "step": 39830 }, { "epoch": 0.7557191092226564, "grad_norm": 2.24329144707166, "learning_rate": 1.7105058306166806e-06, "loss": 0.4311, "step": 39840 }, { "epoch": 0.7559087977540878, "grad_norm": 1.7052434884634626, "learning_rate": 1.7080132402755006e-06, "loss": 0.4241, "step": 39850 }, { "epoch": 0.7560984862855191, "grad_norm": 1.4171483239238893, "learning_rate": 1.7055220932409277e-06, "loss": 0.4227, "step": 39860 }, { "epoch": 0.7562881748169505, "grad_norm": 1.3254786358213817, "learning_rate": 1.7030323906051604e-06, "loss": 0.4314, "step": 39870 }, { "epoch": 0.756477863348382, "grad_norm": 1.4696535291873336, "learning_rate": 1.7005441334597555e-06, "loss": 0.4308, "step": 39880 }, { "epoch": 0.7566675518798134, "grad_norm": 1.3272121313777399, "learning_rate": 1.6980573228956438e-06, "loss": 0.4215, "step": 39890 }, { "epoch": 0.7568572404112447, "grad_norm": 1.5273839405510394, "learning_rate": 1.6955719600031163e-06, "loss": 0.4147, "step": 39900 }, { "epoch": 0.7570469289426761, "grad_norm": 1.4920669315397008, "learning_rate": 1.6930880458718301e-06, "loss": 0.4272, "step": 39910 }, { "epoch": 0.7572366174741075, "grad_norm": 1.559462709045001, "learning_rate": 1.6906055815908112e-06, "loss": 0.4616, "step": 39920 }, { "epoch": 0.757426306005539, "grad_norm": 1.4043355553701689, "learning_rate": 1.6881245682484442e-06, "loss": 0.3961, "step": 39930 }, { "epoch": 0.7576159945369703, "grad_norm": 1.483679860943232, "learning_rate": 1.685645006932482e-06, "loss": 0.4143, "step": 39940 }, { "epoch": 0.7578056830684017, "grad_norm": 1.2937465381347906, "learning_rate": 1.6831668987300414e-06, "loss": 0.417, "step": 39950 }, { "epoch": 0.7579953715998331, "grad_norm": 1.6465946682714343, "learning_rate": 1.6806902447275969e-06, "loss": 0.4254, "step": 39960 }, { "epoch": 0.7581850601312644, "grad_norm": 1.6402935444345825, "learning_rate": 1.6782150460109903e-06, "loss": 0.4168, "step": 39970 }, { "epoch": 0.7583747486626958, "grad_norm": 1.3635764941486204, "learning_rate": 1.6757413036654207e-06, "loss": 0.428, "step": 39980 }, { "epoch": 0.7585644371941273, "grad_norm": 1.6373342058366533, "learning_rate": 1.6732690187754542e-06, "loss": 0.4463, "step": 39990 }, { "epoch": 0.7587541257255587, "grad_norm": 1.8173651351375681, "learning_rate": 1.670798192425016e-06, "loss": 0.4771, "step": 40000 }, { "epoch": 0.75894381425699, "grad_norm": 1.3369341823770984, "learning_rate": 1.66832882569739e-06, "loss": 0.4182, "step": 40010 }, { "epoch": 0.7591335027884214, "grad_norm": 1.42029773539435, "learning_rate": 1.6658609196752212e-06, "loss": 0.4473, "step": 40020 }, { "epoch": 0.7593231913198528, "grad_norm": 1.581827436707298, "learning_rate": 1.6633944754405128e-06, "loss": 0.4339, "step": 40030 }, { "epoch": 0.7595128798512842, "grad_norm": 1.439041661807006, "learning_rate": 1.66092949407463e-06, "loss": 0.4394, "step": 40040 }, { "epoch": 0.7597025683827155, "grad_norm": 1.4640799055909919, "learning_rate": 1.6584659766582972e-06, "loss": 0.4563, "step": 40050 }, { "epoch": 0.759892256914147, "grad_norm": 1.211443131110081, "learning_rate": 1.656003924271593e-06, "loss": 0.4277, "step": 40060 }, { "epoch": 0.7600819454455784, "grad_norm": 1.1641769648961413, "learning_rate": 1.653543337993954e-06, "loss": 0.4426, "step": 40070 }, { "epoch": 0.7602716339770097, "grad_norm": 2.1083770097128243, "learning_rate": 1.6510842189041792e-06, "loss": 0.4229, "step": 40080 }, { "epoch": 0.7604613225084411, "grad_norm": 1.262018336759582, "learning_rate": 1.6486265680804175e-06, "loss": 0.4562, "step": 40090 }, { "epoch": 0.7606510110398725, "grad_norm": 1.4248117046450424, "learning_rate": 1.646170386600181e-06, "loss": 0.4224, "step": 40100 }, { "epoch": 0.760840699571304, "grad_norm": 1.532641617068622, "learning_rate": 1.6437156755403317e-06, "loss": 0.4094, "step": 40110 }, { "epoch": 0.7610303881027353, "grad_norm": 1.2834650019763822, "learning_rate": 1.6412624359770873e-06, "loss": 0.4291, "step": 40120 }, { "epoch": 0.7612200766341667, "grad_norm": 1.4777051820874718, "learning_rate": 1.6388106689860266e-06, "loss": 0.4431, "step": 40130 }, { "epoch": 0.7614097651655981, "grad_norm": 1.4349185970795217, "learning_rate": 1.6363603756420743e-06, "loss": 0.4232, "step": 40140 }, { "epoch": 0.7615994536970295, "grad_norm": 1.4428110844232696, "learning_rate": 1.633911557019517e-06, "loss": 0.4362, "step": 40150 }, { "epoch": 0.7617891422284608, "grad_norm": 4.902289530734491, "learning_rate": 1.6314642141919889e-06, "loss": 0.4438, "step": 40160 }, { "epoch": 0.7619788307598923, "grad_norm": 1.5670028234040292, "learning_rate": 1.6290183482324778e-06, "loss": 0.4453, "step": 40170 }, { "epoch": 0.7621685192913237, "grad_norm": 1.4121646943478539, "learning_rate": 1.626573960213328e-06, "loss": 0.415, "step": 40180 }, { "epoch": 0.762358207822755, "grad_norm": 1.7437764814581656, "learning_rate": 1.6241310512062308e-06, "loss": 0.4232, "step": 40190 }, { "epoch": 0.7625478963541864, "grad_norm": 1.5805788321407164, "learning_rate": 1.6216896222822321e-06, "loss": 0.4277, "step": 40200 }, { "epoch": 0.7627375848856178, "grad_norm": 1.4581473942914422, "learning_rate": 1.6192496745117325e-06, "loss": 0.4079, "step": 40210 }, { "epoch": 0.7629272734170492, "grad_norm": 1.7960715190203387, "learning_rate": 1.6168112089644717e-06, "loss": 0.4148, "step": 40220 }, { "epoch": 0.7631169619484806, "grad_norm": 1.3481346958497573, "learning_rate": 1.6143742267095496e-06, "loss": 0.4383, "step": 40230 }, { "epoch": 0.763306650479912, "grad_norm": 1.2763537582832283, "learning_rate": 1.6119387288154158e-06, "loss": 0.4117, "step": 40240 }, { "epoch": 0.7634963390113434, "grad_norm": 1.4140873254434696, "learning_rate": 1.609504716349863e-06, "loss": 0.4145, "step": 40250 }, { "epoch": 0.7636860275427748, "grad_norm": 1.3825709326068845, "learning_rate": 1.6070721903800374e-06, "loss": 0.4142, "step": 40260 }, { "epoch": 0.7638757160742061, "grad_norm": 1.3408244699279122, "learning_rate": 1.6046411519724325e-06, "loss": 0.4307, "step": 40270 }, { "epoch": 0.7640654046056375, "grad_norm": 1.5304273900913865, "learning_rate": 1.6022116021928857e-06, "loss": 0.4287, "step": 40280 }, { "epoch": 0.764255093137069, "grad_norm": 1.689275281264622, "learning_rate": 1.59978354210659e-06, "loss": 0.4352, "step": 40290 }, { "epoch": 0.7644447816685003, "grad_norm": 1.4851653533544207, "learning_rate": 1.5973569727780763e-06, "loss": 0.4043, "step": 40300 }, { "epoch": 0.7646344701999317, "grad_norm": 1.5609594990107565, "learning_rate": 1.594931895271229e-06, "loss": 0.4291, "step": 40310 }, { "epoch": 0.7648241587313631, "grad_norm": 1.7563277323295177, "learning_rate": 1.5925083106492745e-06, "loss": 0.4614, "step": 40320 }, { "epoch": 0.7650138472627945, "grad_norm": 1.3646863853900508, "learning_rate": 1.5900862199747834e-06, "loss": 0.4359, "step": 40330 }, { "epoch": 0.7652035357942258, "grad_norm": 1.517200143254079, "learning_rate": 1.5876656243096772e-06, "loss": 0.4366, "step": 40340 }, { "epoch": 0.7653932243256573, "grad_norm": 1.5646671060381374, "learning_rate": 1.5852465247152148e-06, "loss": 0.4589, "step": 40350 }, { "epoch": 0.7655829128570887, "grad_norm": 1.486063032441489, "learning_rate": 1.5828289222520065e-06, "loss": 0.4015, "step": 40360 }, { "epoch": 0.7657726013885201, "grad_norm": 1.5630264697898086, "learning_rate": 1.58041281798e-06, "loss": 0.4485, "step": 40370 }, { "epoch": 0.7659622899199514, "grad_norm": 1.4309511646413977, "learning_rate": 1.5779982129584875e-06, "loss": 0.4233, "step": 40380 }, { "epoch": 0.7661519784513828, "grad_norm": 1.317588730274247, "learning_rate": 1.5755851082461077e-06, "loss": 0.4525, "step": 40390 }, { "epoch": 0.7663416669828143, "grad_norm": 1.4178603011892827, "learning_rate": 1.5731735049008357e-06, "loss": 0.4151, "step": 40400 }, { "epoch": 0.7665313555142456, "grad_norm": 1.412021967719018, "learning_rate": 1.570763403979994e-06, "loss": 0.4133, "step": 40410 }, { "epoch": 0.766721044045677, "grad_norm": 1.2712011666057397, "learning_rate": 1.5683548065402432e-06, "loss": 0.4309, "step": 40420 }, { "epoch": 0.7669107325771084, "grad_norm": 1.3795209478121033, "learning_rate": 1.5659477136375834e-06, "loss": 0.4205, "step": 40430 }, { "epoch": 0.7671004211085398, "grad_norm": 1.4499386747730028, "learning_rate": 1.5635421263273576e-06, "loss": 0.4314, "step": 40440 }, { "epoch": 0.7672901096399711, "grad_norm": 1.469402526327863, "learning_rate": 1.561138045664251e-06, "loss": 0.4407, "step": 40450 }, { "epoch": 0.7674797981714025, "grad_norm": 1.5397975988216719, "learning_rate": 1.5587354727022836e-06, "loss": 0.4381, "step": 40460 }, { "epoch": 0.767669486702834, "grad_norm": 1.2222259181003081, "learning_rate": 1.5563344084948152e-06, "loss": 0.4076, "step": 40470 }, { "epoch": 0.7678591752342654, "grad_norm": 1.460805656782849, "learning_rate": 1.5539348540945431e-06, "loss": 0.4226, "step": 40480 }, { "epoch": 0.7680488637656967, "grad_norm": 1.7669760762802769, "learning_rate": 1.5515368105535079e-06, "loss": 0.437, "step": 40490 }, { "epoch": 0.7682385522971281, "grad_norm": 1.7231886292765766, "learning_rate": 1.5491402789230848e-06, "loss": 0.4462, "step": 40500 }, { "epoch": 0.7684282408285595, "grad_norm": 1.492254134765918, "learning_rate": 1.5467452602539828e-06, "loss": 0.4047, "step": 40510 }, { "epoch": 0.7686179293599908, "grad_norm": 1.51410365430511, "learning_rate": 1.5443517555962534e-06, "loss": 0.4302, "step": 40520 }, { "epoch": 0.7688076178914223, "grad_norm": 1.2143606966093219, "learning_rate": 1.5419597659992808e-06, "loss": 0.4051, "step": 40530 }, { "epoch": 0.7689973064228537, "grad_norm": 1.4064295915300444, "learning_rate": 1.5395692925117833e-06, "loss": 0.4346, "step": 40540 }, { "epoch": 0.7691869949542851, "grad_norm": 1.6644596321859337, "learning_rate": 1.53718033618182e-06, "loss": 0.457, "step": 40550 }, { "epoch": 0.7693766834857164, "grad_norm": 1.561881674273252, "learning_rate": 1.534792898056779e-06, "loss": 0.4309, "step": 40560 }, { "epoch": 0.7695663720171478, "grad_norm": 1.4355097752404289, "learning_rate": 1.5324069791833879e-06, "loss": 0.4347, "step": 40570 }, { "epoch": 0.7697560605485793, "grad_norm": 1.3711541532536744, "learning_rate": 1.5300225806077046e-06, "loss": 0.4258, "step": 40580 }, { "epoch": 0.7699457490800107, "grad_norm": 1.7233386353892284, "learning_rate": 1.52763970337512e-06, "loss": 0.4538, "step": 40590 }, { "epoch": 0.770135437611442, "grad_norm": 1.4707921558031551, "learning_rate": 1.5252583485303613e-06, "loss": 0.4277, "step": 40600 }, { "epoch": 0.7703251261428734, "grad_norm": 1.3547642566043947, "learning_rate": 1.5228785171174877e-06, "loss": 0.4227, "step": 40610 }, { "epoch": 0.7705148146743048, "grad_norm": 1.5657088474345855, "learning_rate": 1.5205002101798872e-06, "loss": 0.4167, "step": 40620 }, { "epoch": 0.7707045032057361, "grad_norm": 1.256835478240665, "learning_rate": 1.518123428760283e-06, "loss": 0.4006, "step": 40630 }, { "epoch": 0.7708941917371676, "grad_norm": 1.5638579321976678, "learning_rate": 1.5157481739007252e-06, "loss": 0.4498, "step": 40640 }, { "epoch": 0.771083880268599, "grad_norm": 1.4587484528764545, "learning_rate": 1.5133744466425993e-06, "loss": 0.4257, "step": 40650 }, { "epoch": 0.7712735688000304, "grad_norm": 1.2108567630266271, "learning_rate": 1.5110022480266213e-06, "loss": 0.4127, "step": 40660 }, { "epoch": 0.7714632573314617, "grad_norm": 1.3716850184755882, "learning_rate": 1.5086315790928324e-06, "loss": 0.4344, "step": 40670 }, { "epoch": 0.7716529458628931, "grad_norm": 1.2637819074726913, "learning_rate": 1.5062624408806064e-06, "loss": 0.4087, "step": 40680 }, { "epoch": 0.7718426343943245, "grad_norm": 1.5145879169455339, "learning_rate": 1.503894834428643e-06, "loss": 0.4232, "step": 40690 }, { "epoch": 0.7720323229257559, "grad_norm": 1.7184043720486128, "learning_rate": 1.5015287607749747e-06, "loss": 0.4507, "step": 40700 }, { "epoch": 0.7722220114571873, "grad_norm": 1.2279979170902926, "learning_rate": 1.49916422095696e-06, "loss": 0.4158, "step": 40710 }, { "epoch": 0.7724116999886187, "grad_norm": 1.3848117110926585, "learning_rate": 1.496801216011285e-06, "loss": 0.4323, "step": 40720 }, { "epoch": 0.7726013885200501, "grad_norm": 1.3556688704323576, "learning_rate": 1.4944397469739596e-06, "loss": 0.4082, "step": 40730 }, { "epoch": 0.7727910770514814, "grad_norm": 1.8208078743782201, "learning_rate": 1.4920798148803272e-06, "loss": 0.4136, "step": 40740 }, { "epoch": 0.7729807655829128, "grad_norm": 1.7530737251329611, "learning_rate": 1.4897214207650496e-06, "loss": 0.4275, "step": 40750 }, { "epoch": 0.7731704541143443, "grad_norm": 1.7345850770934284, "learning_rate": 1.487364565662122e-06, "loss": 0.4486, "step": 40760 }, { "epoch": 0.7733601426457757, "grad_norm": 1.8044016498058675, "learning_rate": 1.485009250604859e-06, "loss": 0.4629, "step": 40770 }, { "epoch": 0.773549831177207, "grad_norm": 1.2680604835810867, "learning_rate": 1.482655476625901e-06, "loss": 0.4448, "step": 40780 }, { "epoch": 0.7737395197086384, "grad_norm": 1.6851237435430102, "learning_rate": 1.4803032447572163e-06, "loss": 0.4373, "step": 40790 }, { "epoch": 0.7739292082400698, "grad_norm": 1.4221626423778175, "learning_rate": 1.4779525560300912e-06, "loss": 0.455, "step": 40800 }, { "epoch": 0.7741188967715011, "grad_norm": 1.4326748059451855, "learning_rate": 1.4756034114751405e-06, "loss": 0.4244, "step": 40810 }, { "epoch": 0.7743085853029326, "grad_norm": 1.4555031877829154, "learning_rate": 1.4732558121223022e-06, "loss": 0.4354, "step": 40820 }, { "epoch": 0.774498273834364, "grad_norm": 1.341093616535932, "learning_rate": 1.4709097590008336e-06, "loss": 0.41, "step": 40830 }, { "epoch": 0.7746879623657954, "grad_norm": 1.2933373603922174, "learning_rate": 1.4685652531393141e-06, "loss": 0.4248, "step": 40840 }, { "epoch": 0.7748776508972267, "grad_norm": 1.6887621976583227, "learning_rate": 1.4662222955656458e-06, "loss": 0.4264, "step": 40850 }, { "epoch": 0.7750673394286581, "grad_norm": 1.486231335337743, "learning_rate": 1.4638808873070531e-06, "loss": 0.4369, "step": 40860 }, { "epoch": 0.7752570279600896, "grad_norm": 1.4358998759384662, "learning_rate": 1.4615410293900817e-06, "loss": 0.3937, "step": 40870 }, { "epoch": 0.775446716491521, "grad_norm": 1.309475788345335, "learning_rate": 1.4592027228405947e-06, "loss": 0.4293, "step": 40880 }, { "epoch": 0.7756364050229523, "grad_norm": 1.3376799577207592, "learning_rate": 1.456865968683775e-06, "loss": 0.4412, "step": 40890 }, { "epoch": 0.7758260935543837, "grad_norm": 1.4628484885715682, "learning_rate": 1.4545307679441295e-06, "loss": 0.4309, "step": 40900 }, { "epoch": 0.7760157820858151, "grad_norm": 1.5458318131635065, "learning_rate": 1.4521971216454773e-06, "loss": 0.451, "step": 40910 }, { "epoch": 0.7762054706172464, "grad_norm": 1.7223928268004003, "learning_rate": 1.449865030810963e-06, "loss": 0.4248, "step": 40920 }, { "epoch": 0.7763951591486778, "grad_norm": 1.4006892029957074, "learning_rate": 1.4475344964630444e-06, "loss": 0.433, "step": 40930 }, { "epoch": 0.7765848476801093, "grad_norm": 1.539552495241117, "learning_rate": 1.4452055196234965e-06, "loss": 0.4434, "step": 40940 }, { "epoch": 0.7767745362115407, "grad_norm": 1.367848643337001, "learning_rate": 1.4428781013134162e-06, "loss": 0.4228, "step": 40950 }, { "epoch": 0.776964224742972, "grad_norm": 1.4343346998231656, "learning_rate": 1.4405522425532114e-06, "loss": 0.4266, "step": 40960 }, { "epoch": 0.7771539132744034, "grad_norm": 1.3396176749611384, "learning_rate": 1.4382279443626125e-06, "loss": 0.4245, "step": 40970 }, { "epoch": 0.7773436018058348, "grad_norm": 1.4294158555701013, "learning_rate": 1.435905207760659e-06, "loss": 0.4326, "step": 40980 }, { "epoch": 0.7775332903372663, "grad_norm": 1.466869154987824, "learning_rate": 1.4335840337657086e-06, "loss": 0.4377, "step": 40990 }, { "epoch": 0.7777229788686976, "grad_norm": 1.4146793485373055, "learning_rate": 1.431264423395437e-06, "loss": 0.4313, "step": 41000 }, { "epoch": 0.777912667400129, "grad_norm": 1.3124570354548901, "learning_rate": 1.4289463776668284e-06, "loss": 0.4204, "step": 41010 }, { "epoch": 0.7781023559315604, "grad_norm": 1.0847896221990267, "learning_rate": 1.4266298975961873e-06, "loss": 0.4138, "step": 41020 }, { "epoch": 0.7782920444629917, "grad_norm": 1.1631442975174369, "learning_rate": 1.4243149841991277e-06, "loss": 0.4026, "step": 41030 }, { "epoch": 0.7784817329944231, "grad_norm": 1.4439309866092855, "learning_rate": 1.4220016384905756e-06, "loss": 0.4496, "step": 41040 }, { "epoch": 0.7786714215258546, "grad_norm": 1.5897980623661185, "learning_rate": 1.4196898614847742e-06, "loss": 0.4218, "step": 41050 }, { "epoch": 0.778861110057286, "grad_norm": 1.3780596085106647, "learning_rate": 1.4173796541952744e-06, "loss": 0.4336, "step": 41060 }, { "epoch": 0.7790507985887173, "grad_norm": 1.9463115772542718, "learning_rate": 1.4150710176349425e-06, "loss": 0.4491, "step": 41070 }, { "epoch": 0.7792404871201487, "grad_norm": 2.2447845200828223, "learning_rate": 1.4127639528159576e-06, "loss": 0.4273, "step": 41080 }, { "epoch": 0.7794301756515801, "grad_norm": 1.6974105675161464, "learning_rate": 1.4104584607498e-06, "loss": 0.4268, "step": 41090 }, { "epoch": 0.7796198641830115, "grad_norm": 1.5908469369871252, "learning_rate": 1.4081545424472704e-06, "loss": 0.4404, "step": 41100 }, { "epoch": 0.7798095527144429, "grad_norm": 1.429303627106933, "learning_rate": 1.4058521989184787e-06, "loss": 0.4305, "step": 41110 }, { "epoch": 0.7799992412458743, "grad_norm": 1.2421474402695751, "learning_rate": 1.4035514311728376e-06, "loss": 0.4065, "step": 41120 }, { "epoch": 0.7801889297773057, "grad_norm": 1.5670266953376326, "learning_rate": 1.4012522402190777e-06, "loss": 0.4379, "step": 41130 }, { "epoch": 0.780378618308737, "grad_norm": 1.4777884060023716, "learning_rate": 1.3989546270652316e-06, "loss": 0.4466, "step": 41140 }, { "epoch": 0.7805683068401684, "grad_norm": 1.6450826300270227, "learning_rate": 1.3966585927186404e-06, "loss": 0.4123, "step": 41150 }, { "epoch": 0.7807579953715998, "grad_norm": 1.6118771102561045, "learning_rate": 1.3943641381859596e-06, "loss": 0.408, "step": 41160 }, { "epoch": 0.7809476839030313, "grad_norm": 1.4120067265397993, "learning_rate": 1.3920712644731432e-06, "loss": 0.4275, "step": 41170 }, { "epoch": 0.7811373724344626, "grad_norm": 1.362091503184185, "learning_rate": 1.3897799725854594e-06, "loss": 0.4479, "step": 41180 }, { "epoch": 0.781327060965894, "grad_norm": 1.4703445373584445, "learning_rate": 1.3874902635274796e-06, "loss": 0.4227, "step": 41190 }, { "epoch": 0.7815167494973254, "grad_norm": 1.5399342925020816, "learning_rate": 1.3852021383030784e-06, "loss": 0.4465, "step": 41200 }, { "epoch": 0.7817064380287568, "grad_norm": 1.262913875317832, "learning_rate": 1.3829155979154434e-06, "loss": 0.398, "step": 41210 }, { "epoch": 0.7818961265601881, "grad_norm": 1.30239456694778, "learning_rate": 1.3806306433670597e-06, "loss": 0.445, "step": 41220 }, { "epoch": 0.7820858150916196, "grad_norm": 1.4046326404290548, "learning_rate": 1.3783472756597233e-06, "loss": 0.4387, "step": 41230 }, { "epoch": 0.782275503623051, "grad_norm": 1.5174897245336152, "learning_rate": 1.3760654957945302e-06, "loss": 0.4225, "step": 41240 }, { "epoch": 0.7824651921544823, "grad_norm": 1.3648100158895715, "learning_rate": 1.3737853047718803e-06, "loss": 0.4279, "step": 41250 }, { "epoch": 0.7826548806859137, "grad_norm": 1.544144688025679, "learning_rate": 1.371506703591482e-06, "loss": 0.4467, "step": 41260 }, { "epoch": 0.7828445692173451, "grad_norm": 1.5484286234938565, "learning_rate": 1.3692296932523391e-06, "loss": 0.4123, "step": 41270 }, { "epoch": 0.7830342577487766, "grad_norm": 1.6423368623111445, "learning_rate": 1.3669542747527653e-06, "loss": 0.4541, "step": 41280 }, { "epoch": 0.7832239462802079, "grad_norm": 1.592979805682502, "learning_rate": 1.3646804490903714e-06, "loss": 0.4371, "step": 41290 }, { "epoch": 0.7834136348116393, "grad_norm": 1.4824459956046587, "learning_rate": 1.3624082172620696e-06, "loss": 0.4522, "step": 41300 }, { "epoch": 0.7836033233430707, "grad_norm": 1.6264431139707587, "learning_rate": 1.3601375802640766e-06, "loss": 0.4193, "step": 41310 }, { "epoch": 0.7837930118745021, "grad_norm": 1.4627760022093312, "learning_rate": 1.3578685390919106e-06, "loss": 0.43, "step": 41320 }, { "epoch": 0.7839827004059334, "grad_norm": 1.4813406656904096, "learning_rate": 1.3556010947403853e-06, "loss": 0.4228, "step": 41330 }, { "epoch": 0.7841723889373649, "grad_norm": 1.58648455610345, "learning_rate": 1.3533352482036178e-06, "loss": 0.4373, "step": 41340 }, { "epoch": 0.7843620774687963, "grad_norm": 1.559944866200032, "learning_rate": 1.3510710004750217e-06, "loss": 0.4497, "step": 41350 }, { "epoch": 0.7845517660002276, "grad_norm": 1.34912292262135, "learning_rate": 1.3488083525473134e-06, "loss": 0.4117, "step": 41360 }, { "epoch": 0.784741454531659, "grad_norm": 1.2860276438605829, "learning_rate": 1.3465473054125077e-06, "loss": 0.4129, "step": 41370 }, { "epoch": 0.7849311430630904, "grad_norm": 1.3823586078497874, "learning_rate": 1.3442878600619137e-06, "loss": 0.4219, "step": 41380 }, { "epoch": 0.7851208315945218, "grad_norm": 1.674738783466442, "learning_rate": 1.3420300174861429e-06, "loss": 0.4403, "step": 41390 }, { "epoch": 0.7853105201259531, "grad_norm": 1.5800028957047763, "learning_rate": 1.3397737786751003e-06, "loss": 0.438, "step": 41400 }, { "epoch": 0.7855002086573846, "grad_norm": 1.835907881732873, "learning_rate": 1.3375191446179881e-06, "loss": 0.4194, "step": 41410 }, { "epoch": 0.785689897188816, "grad_norm": 1.2090353466079946, "learning_rate": 1.3352661163033088e-06, "loss": 0.4457, "step": 41420 }, { "epoch": 0.7858795857202474, "grad_norm": 1.445874668764661, "learning_rate": 1.3330146947188554e-06, "loss": 0.4344, "step": 41430 }, { "epoch": 0.7860692742516787, "grad_norm": 1.5342897677708374, "learning_rate": 1.3307648808517227e-06, "loss": 0.4223, "step": 41440 }, { "epoch": 0.7862589627831101, "grad_norm": 1.458277678576853, "learning_rate": 1.3285166756882944e-06, "loss": 0.4282, "step": 41450 }, { "epoch": 0.7864486513145416, "grad_norm": 1.434178430169671, "learning_rate": 1.3262700802142515e-06, "loss": 0.4209, "step": 41460 }, { "epoch": 0.7866383398459729, "grad_norm": 1.4798879950136292, "learning_rate": 1.32402509541457e-06, "loss": 0.4302, "step": 41470 }, { "epoch": 0.7868280283774043, "grad_norm": 1.17927705830392, "learning_rate": 1.3217817222735207e-06, "loss": 0.4157, "step": 41480 }, { "epoch": 0.7870177169088357, "grad_norm": 1.4061164585035644, "learning_rate": 1.3195399617746657e-06, "loss": 0.4182, "step": 41490 }, { "epoch": 0.7872074054402671, "grad_norm": 1.6043420266826616, "learning_rate": 1.3172998149008598e-06, "loss": 0.455, "step": 41500 }, { "epoch": 0.7873970939716984, "grad_norm": 1.3315608740039115, "learning_rate": 1.31506128263425e-06, "loss": 0.4354, "step": 41510 }, { "epoch": 0.7875867825031299, "grad_norm": 1.4192915819579919, "learning_rate": 1.3128243659562783e-06, "loss": 0.4227, "step": 41520 }, { "epoch": 0.7877764710345613, "grad_norm": 1.189691497017128, "learning_rate": 1.3105890658476778e-06, "loss": 0.4153, "step": 41530 }, { "epoch": 0.7879661595659926, "grad_norm": 1.3536403664238699, "learning_rate": 1.3083553832884699e-06, "loss": 0.4079, "step": 41540 }, { "epoch": 0.788155848097424, "grad_norm": 1.6674724318275196, "learning_rate": 1.3061233192579676e-06, "loss": 0.4194, "step": 41550 }, { "epoch": 0.7883455366288554, "grad_norm": 1.3087568715307705, "learning_rate": 1.3038928747347785e-06, "loss": 0.4125, "step": 41560 }, { "epoch": 0.7885352251602868, "grad_norm": 1.4408968199830663, "learning_rate": 1.3016640506967932e-06, "loss": 0.422, "step": 41570 }, { "epoch": 0.7887249136917182, "grad_norm": 1.5795598267740585, "learning_rate": 1.2994368481211995e-06, "loss": 0.4387, "step": 41580 }, { "epoch": 0.7889146022231496, "grad_norm": 1.9276224922698033, "learning_rate": 1.2972112679844678e-06, "loss": 0.4161, "step": 41590 }, { "epoch": 0.789104290754581, "grad_norm": 1.6561922035272196, "learning_rate": 1.2949873112623595e-06, "loss": 0.42, "step": 41600 }, { "epoch": 0.7892939792860124, "grad_norm": 1.4471616310860953, "learning_rate": 1.2927649789299273e-06, "loss": 0.4381, "step": 41610 }, { "epoch": 0.7894836678174437, "grad_norm": 1.3028475249420328, "learning_rate": 1.2905442719615052e-06, "loss": 0.4232, "step": 41620 }, { "epoch": 0.7896733563488751, "grad_norm": 1.4920087383179355, "learning_rate": 1.2883251913307215e-06, "loss": 0.4235, "step": 41630 }, { "epoch": 0.7898630448803066, "grad_norm": 1.4651967307051164, "learning_rate": 1.2861077380104875e-06, "loss": 0.4251, "step": 41640 }, { "epoch": 0.7900527334117379, "grad_norm": 1.6096300230187102, "learning_rate": 1.2838919129729999e-06, "loss": 0.4241, "step": 41650 }, { "epoch": 0.7902424219431693, "grad_norm": 1.680599408602637, "learning_rate": 1.281677717189746e-06, "loss": 0.4479, "step": 41660 }, { "epoch": 0.7904321104746007, "grad_norm": 1.3121807785624862, "learning_rate": 1.2794651516314943e-06, "loss": 0.4041, "step": 41670 }, { "epoch": 0.7906217990060321, "grad_norm": 1.278113881939806, "learning_rate": 1.2772542172683005e-06, "loss": 0.4402, "step": 41680 }, { "epoch": 0.7908114875374634, "grad_norm": 1.5049860776327177, "learning_rate": 1.2750449150695082e-06, "loss": 0.4241, "step": 41690 }, { "epoch": 0.7910011760688949, "grad_norm": 1.7484205373725126, "learning_rate": 1.2728372460037398e-06, "loss": 0.4313, "step": 41700 }, { "epoch": 0.7911908646003263, "grad_norm": 1.5464438179548117, "learning_rate": 1.2706312110389046e-06, "loss": 0.4459, "step": 41710 }, { "epoch": 0.7913805531317577, "grad_norm": 1.4583475928829754, "learning_rate": 1.2684268111421937e-06, "loss": 0.42, "step": 41720 }, { "epoch": 0.791570241663189, "grad_norm": 1.2767977819332017, "learning_rate": 1.2662240472800834e-06, "loss": 0.4159, "step": 41730 }, { "epoch": 0.7917599301946204, "grad_norm": 2.4413485904743357, "learning_rate": 1.2640229204183352e-06, "loss": 0.426, "step": 41740 }, { "epoch": 0.7919496187260519, "grad_norm": 1.406278314312972, "learning_rate": 1.2618234315219863e-06, "loss": 0.4311, "step": 41750 }, { "epoch": 0.7921393072574832, "grad_norm": 1.6843304762642755, "learning_rate": 1.2596255815553594e-06, "loss": 0.4123, "step": 41760 }, { "epoch": 0.7923289957889146, "grad_norm": 1.3596107608430525, "learning_rate": 1.2574293714820595e-06, "loss": 0.4063, "step": 41770 }, { "epoch": 0.792518684320346, "grad_norm": 1.4591026113232217, "learning_rate": 1.2552348022649706e-06, "loss": 0.42, "step": 41780 }, { "epoch": 0.7927083728517774, "grad_norm": 1.3028909553792969, "learning_rate": 1.25304187486626e-06, "loss": 0.425, "step": 41790 }, { "epoch": 0.7928980613832087, "grad_norm": 1.602135809725502, "learning_rate": 1.2508505902473722e-06, "loss": 0.4369, "step": 41800 }, { "epoch": 0.7930877499146401, "grad_norm": 1.5254682618635254, "learning_rate": 1.248660949369031e-06, "loss": 0.43, "step": 41810 }, { "epoch": 0.7932774384460716, "grad_norm": 1.6688620391638727, "learning_rate": 1.246472953191245e-06, "loss": 0.4028, "step": 41820 }, { "epoch": 0.793467126977503, "grad_norm": 1.5033828406782588, "learning_rate": 1.2442866026732942e-06, "loss": 0.4126, "step": 41830 }, { "epoch": 0.7936568155089343, "grad_norm": 1.463735920691093, "learning_rate": 1.2421018987737437e-06, "loss": 0.4546, "step": 41840 }, { "epoch": 0.7938465040403657, "grad_norm": 1.5779668670026579, "learning_rate": 1.2399188424504328e-06, "loss": 0.4482, "step": 41850 }, { "epoch": 0.7940361925717971, "grad_norm": 1.403525193626625, "learning_rate": 1.2377374346604776e-06, "loss": 0.4213, "step": 41860 }, { "epoch": 0.7942258811032284, "grad_norm": 1.2730812895932933, "learning_rate": 1.2355576763602766e-06, "loss": 0.4112, "step": 41870 }, { "epoch": 0.7944155696346599, "grad_norm": 1.3856331336477312, "learning_rate": 1.2333795685054984e-06, "loss": 0.4168, "step": 41880 }, { "epoch": 0.7946052581660913, "grad_norm": 1.3827349144128331, "learning_rate": 1.2312031120510937e-06, "loss": 0.4462, "step": 41890 }, { "epoch": 0.7947949466975227, "grad_norm": 1.5538259997466608, "learning_rate": 1.2290283079512888e-06, "loss": 0.4513, "step": 41900 }, { "epoch": 0.794984635228954, "grad_norm": 1.5508298888363727, "learning_rate": 1.2268551571595782e-06, "loss": 0.4407, "step": 41910 }, { "epoch": 0.7951743237603854, "grad_norm": 1.4229752048294948, "learning_rate": 1.224683660628741e-06, "loss": 0.4215, "step": 41920 }, { "epoch": 0.7953640122918169, "grad_norm": 1.3961469170140848, "learning_rate": 1.2225138193108249e-06, "loss": 0.4297, "step": 41930 }, { "epoch": 0.7955537008232483, "grad_norm": 1.8375104719563051, "learning_rate": 1.2203456341571546e-06, "loss": 0.4299, "step": 41940 }, { "epoch": 0.7957433893546796, "grad_norm": 1.721584583446236, "learning_rate": 1.2181791061183318e-06, "loss": 0.3989, "step": 41950 }, { "epoch": 0.795933077886111, "grad_norm": 1.2219392413426116, "learning_rate": 1.2160142361442212e-06, "loss": 0.4174, "step": 41960 }, { "epoch": 0.7961227664175424, "grad_norm": 1.7175123062840023, "learning_rate": 1.2138510251839714e-06, "loss": 0.4181, "step": 41970 }, { "epoch": 0.7963124549489737, "grad_norm": 1.4639365713423285, "learning_rate": 1.2116894741859997e-06, "loss": 0.4502, "step": 41980 }, { "epoch": 0.7965021434804052, "grad_norm": 1.2746456058206344, "learning_rate": 1.2095295840979932e-06, "loss": 0.408, "step": 41990 }, { "epoch": 0.7966918320118366, "grad_norm": 1.5259901445651052, "learning_rate": 1.2073713558669158e-06, "loss": 0.427, "step": 42000 }, { "epoch": 0.796881520543268, "grad_norm": 1.5347765347106637, "learning_rate": 1.2052147904389987e-06, "loss": 0.4474, "step": 42010 }, { "epoch": 0.7970712090746993, "grad_norm": 1.4367270766696032, "learning_rate": 1.2030598887597444e-06, "loss": 0.4376, "step": 42020 }, { "epoch": 0.7972608976061307, "grad_norm": 1.4157637976466089, "learning_rate": 1.2009066517739292e-06, "loss": 0.4372, "step": 42030 }, { "epoch": 0.7974505861375621, "grad_norm": 1.569992310361327, "learning_rate": 1.1987550804255955e-06, "loss": 0.4325, "step": 42040 }, { "epoch": 0.7976402746689936, "grad_norm": 1.5016462355201114, "learning_rate": 1.1966051756580588e-06, "loss": 0.4144, "step": 42050 }, { "epoch": 0.7978299632004249, "grad_norm": 1.5621972595959701, "learning_rate": 1.1944569384139016e-06, "loss": 0.3959, "step": 42060 }, { "epoch": 0.7980196517318563, "grad_norm": 1.7137108854773788, "learning_rate": 1.1923103696349747e-06, "loss": 0.4498, "step": 42070 }, { "epoch": 0.7982093402632877, "grad_norm": 1.4497968034807907, "learning_rate": 1.190165470262402e-06, "loss": 0.4326, "step": 42080 }, { "epoch": 0.798399028794719, "grad_norm": 1.3752304476850958, "learning_rate": 1.1880222412365689e-06, "loss": 0.4463, "step": 42090 }, { "epoch": 0.7985887173261504, "grad_norm": 1.3070898389085805, "learning_rate": 1.1858806834971347e-06, "loss": 0.4098, "step": 42100 }, { "epoch": 0.7987784058575819, "grad_norm": 1.6829885766683466, "learning_rate": 1.1837407979830223e-06, "loss": 0.4294, "step": 42110 }, { "epoch": 0.7989680943890133, "grad_norm": 1.532455728352068, "learning_rate": 1.1816025856324198e-06, "loss": 0.4139, "step": 42120 }, { "epoch": 0.7991577829204446, "grad_norm": 1.397966931316021, "learning_rate": 1.1794660473827868e-06, "loss": 0.4263, "step": 42130 }, { "epoch": 0.799347471451876, "grad_norm": 1.5454403666789176, "learning_rate": 1.1773311841708467e-06, "loss": 0.4491, "step": 42140 }, { "epoch": 0.7995371599833074, "grad_norm": 1.2141432201240996, "learning_rate": 1.1751979969325876e-06, "loss": 0.4126, "step": 42150 }, { "epoch": 0.7997268485147389, "grad_norm": 1.2643329303366309, "learning_rate": 1.1730664866032632e-06, "loss": 0.4007, "step": 42160 }, { "epoch": 0.7999165370461702, "grad_norm": 1.3574159925462363, "learning_rate": 1.17093665411739e-06, "loss": 0.4097, "step": 42170 }, { "epoch": 0.8001062255776016, "grad_norm": 1.455440685492418, "learning_rate": 1.168808500408753e-06, "loss": 0.422, "step": 42180 }, { "epoch": 0.800295914109033, "grad_norm": 1.4030680410608072, "learning_rate": 1.1666820264104006e-06, "loss": 0.4081, "step": 42190 }, { "epoch": 0.8004856026404643, "grad_norm": 1.2890398446453224, "learning_rate": 1.16455723305464e-06, "loss": 0.4223, "step": 42200 }, { "epoch": 0.8006752911718957, "grad_norm": 1.6832612493623582, "learning_rate": 1.162434121273049e-06, "loss": 0.4523, "step": 42210 }, { "epoch": 0.8008649797033272, "grad_norm": 1.4579463298856343, "learning_rate": 1.1603126919964607e-06, "loss": 0.4411, "step": 42220 }, { "epoch": 0.8010546682347586, "grad_norm": 1.4196670196946999, "learning_rate": 1.158192946154974e-06, "loss": 0.4521, "step": 42230 }, { "epoch": 0.8012443567661899, "grad_norm": 1.3934294202578286, "learning_rate": 1.1560748846779518e-06, "loss": 0.4079, "step": 42240 }, { "epoch": 0.8014340452976213, "grad_norm": 1.2317782943586202, "learning_rate": 1.153958508494014e-06, "loss": 0.4273, "step": 42250 }, { "epoch": 0.8016237338290527, "grad_norm": 1.544952354700476, "learning_rate": 1.1518438185310465e-06, "loss": 0.4327, "step": 42260 }, { "epoch": 0.8018134223604841, "grad_norm": 1.535794364945714, "learning_rate": 1.1497308157161918e-06, "loss": 0.4128, "step": 42270 }, { "epoch": 0.8020031108919154, "grad_norm": 1.5267799754549494, "learning_rate": 1.147619500975853e-06, "loss": 0.4226, "step": 42280 }, { "epoch": 0.8021927994233469, "grad_norm": 1.3071878457846127, "learning_rate": 1.1455098752356969e-06, "loss": 0.4432, "step": 42290 }, { "epoch": 0.8023824879547783, "grad_norm": 1.4018722874101737, "learning_rate": 1.1434019394206447e-06, "loss": 0.4395, "step": 42300 }, { "epoch": 0.8025721764862096, "grad_norm": 1.71130835829576, "learning_rate": 1.1412956944548826e-06, "loss": 0.4791, "step": 42310 }, { "epoch": 0.802761865017641, "grad_norm": 1.5525866831976456, "learning_rate": 1.13919114126185e-06, "loss": 0.4192, "step": 42320 }, { "epoch": 0.8029515535490724, "grad_norm": 1.2992814674308066, "learning_rate": 1.1370882807642454e-06, "loss": 0.4217, "step": 42330 }, { "epoch": 0.8031412420805039, "grad_norm": 1.3610776852595572, "learning_rate": 1.1349871138840278e-06, "loss": 0.43, "step": 42340 }, { "epoch": 0.8033309306119352, "grad_norm": 1.5497819141441547, "learning_rate": 1.1328876415424144e-06, "loss": 0.4211, "step": 42350 }, { "epoch": 0.8035206191433666, "grad_norm": 1.792464788123695, "learning_rate": 1.1307898646598752e-06, "loss": 0.4425, "step": 42360 }, { "epoch": 0.803710307674798, "grad_norm": 1.3963406174242765, "learning_rate": 1.1286937841561397e-06, "loss": 0.4404, "step": 42370 }, { "epoch": 0.8038999962062293, "grad_norm": 1.579468343133648, "learning_rate": 1.1265994009501906e-06, "loss": 0.4407, "step": 42380 }, { "epoch": 0.8040896847376607, "grad_norm": 1.5091220120333966, "learning_rate": 1.1245067159602718e-06, "loss": 0.4038, "step": 42390 }, { "epoch": 0.8042793732690922, "grad_norm": 1.2636193795583242, "learning_rate": 1.1224157301038796e-06, "loss": 0.4187, "step": 42400 }, { "epoch": 0.8044690618005236, "grad_norm": 1.4512071485409037, "learning_rate": 1.1203264442977652e-06, "loss": 0.4391, "step": 42410 }, { "epoch": 0.8046587503319549, "grad_norm": 1.2556193846704549, "learning_rate": 1.1182388594579329e-06, "loss": 0.4264, "step": 42420 }, { "epoch": 0.8048484388633863, "grad_norm": 1.4849429090503554, "learning_rate": 1.116152976499646e-06, "loss": 0.4387, "step": 42430 }, { "epoch": 0.8050381273948177, "grad_norm": 1.5197533646242034, "learning_rate": 1.1140687963374153e-06, "loss": 0.4226, "step": 42440 }, { "epoch": 0.8052278159262491, "grad_norm": 1.4628493895698769, "learning_rate": 1.1119863198850124e-06, "loss": 0.4326, "step": 42450 }, { "epoch": 0.8054175044576805, "grad_norm": 1.3910826188164844, "learning_rate": 1.109905548055455e-06, "loss": 0.4208, "step": 42460 }, { "epoch": 0.8056071929891119, "grad_norm": 1.4508668244338325, "learning_rate": 1.1078264817610164e-06, "loss": 0.4305, "step": 42470 }, { "epoch": 0.8057968815205433, "grad_norm": 1.6326174560019342, "learning_rate": 1.1057491219132244e-06, "loss": 0.4408, "step": 42480 }, { "epoch": 0.8059865700519746, "grad_norm": 1.357058421781271, "learning_rate": 1.103673469422854e-06, "loss": 0.41, "step": 42490 }, { "epoch": 0.806176258583406, "grad_norm": 1.660339962422901, "learning_rate": 1.101599525199935e-06, "loss": 0.449, "step": 42500 }, { "epoch": 0.8063659471148374, "grad_norm": 1.48684587051325, "learning_rate": 1.099527290153749e-06, "loss": 0.4278, "step": 42510 }, { "epoch": 0.8065556356462689, "grad_norm": 1.4142465597518314, "learning_rate": 1.0974567651928248e-06, "loss": 0.4427, "step": 42520 }, { "epoch": 0.8067453241777002, "grad_norm": 1.7500639486518255, "learning_rate": 1.0953879512249437e-06, "loss": 0.4218, "step": 42530 }, { "epoch": 0.8069350127091316, "grad_norm": 1.2553375193441263, "learning_rate": 1.0933208491571345e-06, "loss": 0.4045, "step": 42540 }, { "epoch": 0.807124701240563, "grad_norm": 1.4424325430985414, "learning_rate": 1.0912554598956793e-06, "loss": 0.4338, "step": 42550 }, { "epoch": 0.8073143897719944, "grad_norm": 1.460064810114554, "learning_rate": 1.0891917843461075e-06, "loss": 0.4218, "step": 42560 }, { "epoch": 0.8075040783034257, "grad_norm": 1.5816036678161574, "learning_rate": 1.0871298234131967e-06, "loss": 0.4151, "step": 42570 }, { "epoch": 0.8076937668348572, "grad_norm": 1.4120966191787636, "learning_rate": 1.0850695780009724e-06, "loss": 0.4059, "step": 42580 }, { "epoch": 0.8078834553662886, "grad_norm": 1.4311913363143078, "learning_rate": 1.0830110490127072e-06, "loss": 0.4135, "step": 42590 }, { "epoch": 0.8080731438977199, "grad_norm": 1.5607420849511062, "learning_rate": 1.080954237350924e-06, "loss": 0.4482, "step": 42600 }, { "epoch": 0.8082628324291513, "grad_norm": 1.50073848045592, "learning_rate": 1.078899143917393e-06, "loss": 0.4546, "step": 42610 }, { "epoch": 0.8084525209605827, "grad_norm": 1.3023039799422549, "learning_rate": 1.0768457696131268e-06, "loss": 0.4311, "step": 42620 }, { "epoch": 0.8086422094920142, "grad_norm": 1.4554048789876979, "learning_rate": 1.0747941153383867e-06, "loss": 0.4269, "step": 42630 }, { "epoch": 0.8088318980234455, "grad_norm": 1.3745942049367303, "learning_rate": 1.072744181992682e-06, "loss": 0.4337, "step": 42640 }, { "epoch": 0.8090215865548769, "grad_norm": 1.3913836253092418, "learning_rate": 1.0706959704747632e-06, "loss": 0.4314, "step": 42650 }, { "epoch": 0.8092112750863083, "grad_norm": 1.6747860227396878, "learning_rate": 1.0686494816826305e-06, "loss": 0.4449, "step": 42660 }, { "epoch": 0.8094009636177397, "grad_norm": 1.7015457848468927, "learning_rate": 1.0666047165135256e-06, "loss": 0.4463, "step": 42670 }, { "epoch": 0.809590652149171, "grad_norm": 1.5187339526460846, "learning_rate": 1.0645616758639337e-06, "loss": 0.4247, "step": 42680 }, { "epoch": 0.8097803406806025, "grad_norm": 1.5834818448067305, "learning_rate": 1.0625203606295892e-06, "loss": 0.416, "step": 42690 }, { "epoch": 0.8099700292120339, "grad_norm": 1.440105094765439, "learning_rate": 1.060480771705462e-06, "loss": 0.4255, "step": 42700 }, { "epoch": 0.8101597177434652, "grad_norm": 1.298063395415752, "learning_rate": 1.0584429099857746e-06, "loss": 0.4254, "step": 42710 }, { "epoch": 0.8103494062748966, "grad_norm": 1.6352978254427928, "learning_rate": 1.056406776363984e-06, "loss": 0.4301, "step": 42720 }, { "epoch": 0.810539094806328, "grad_norm": 1.8031809118062019, "learning_rate": 1.054372371732792e-06, "loss": 0.4175, "step": 42730 }, { "epoch": 0.8107287833377594, "grad_norm": 1.2211848620134538, "learning_rate": 1.0523396969841465e-06, "loss": 0.4102, "step": 42740 }, { "epoch": 0.8109184718691907, "grad_norm": 1.4938840400817865, "learning_rate": 1.05030875300923e-06, "loss": 0.4087, "step": 42750 }, { "epoch": 0.8111081604006222, "grad_norm": 1.4982162503430554, "learning_rate": 1.048279540698472e-06, "loss": 0.4165, "step": 42760 }, { "epoch": 0.8112978489320536, "grad_norm": 1.418335627155998, "learning_rate": 1.0462520609415421e-06, "loss": 0.4294, "step": 42770 }, { "epoch": 0.811487537463485, "grad_norm": 1.4513464277549455, "learning_rate": 1.0442263146273446e-06, "loss": 0.4015, "step": 42780 }, { "epoch": 0.8116772259949163, "grad_norm": 1.2339243708378649, "learning_rate": 1.0422023026440303e-06, "loss": 0.4201, "step": 42790 }, { "epoch": 0.8118669145263477, "grad_norm": 1.468230505617086, "learning_rate": 1.0401800258789884e-06, "loss": 0.4466, "step": 42800 }, { "epoch": 0.8120566030577792, "grad_norm": 1.726366884341001, "learning_rate": 1.0381594852188436e-06, "loss": 0.4452, "step": 42810 }, { "epoch": 0.8122462915892105, "grad_norm": 1.4416512690175631, "learning_rate": 1.0361406815494652e-06, "loss": 0.4096, "step": 42820 }, { "epoch": 0.8124359801206419, "grad_norm": 1.4895573354048242, "learning_rate": 1.0341236157559553e-06, "loss": 0.4254, "step": 42830 }, { "epoch": 0.8126256686520733, "grad_norm": 1.4510505493279016, "learning_rate": 1.0321082887226564e-06, "loss": 0.4411, "step": 42840 }, { "epoch": 0.8128153571835047, "grad_norm": 1.3712331828045636, "learning_rate": 1.0300947013331514e-06, "loss": 0.4114, "step": 42850 }, { "epoch": 0.813005045714936, "grad_norm": 1.3660957848778474, "learning_rate": 1.0280828544702547e-06, "loss": 0.4177, "step": 42860 }, { "epoch": 0.8131947342463675, "grad_norm": 1.7259712235992113, "learning_rate": 1.0260727490160243e-06, "loss": 0.4246, "step": 42870 }, { "epoch": 0.8133844227777989, "grad_norm": 1.7760758262316736, "learning_rate": 1.024064385851749e-06, "loss": 0.4297, "step": 42880 }, { "epoch": 0.8135741113092303, "grad_norm": 1.3760813624158375, "learning_rate": 1.0220577658579556e-06, "loss": 0.4344, "step": 42890 }, { "epoch": 0.8137637998406616, "grad_norm": 1.5468999890636, "learning_rate": 1.0200528899144086e-06, "loss": 0.4136, "step": 42900 }, { "epoch": 0.813953488372093, "grad_norm": 1.5066530494684593, "learning_rate": 1.0180497589001048e-06, "loss": 0.4301, "step": 42910 }, { "epoch": 0.8141431769035244, "grad_norm": 3.2021010195957422, "learning_rate": 1.0160483736932792e-06, "loss": 0.4021, "step": 42920 }, { "epoch": 0.8143328654349558, "grad_norm": 1.604260873093354, "learning_rate": 1.0140487351713984e-06, "loss": 0.413, "step": 42930 }, { "epoch": 0.8145225539663872, "grad_norm": 1.307726996858775, "learning_rate": 1.0120508442111638e-06, "loss": 0.4284, "step": 42940 }, { "epoch": 0.8147122424978186, "grad_norm": 1.3681272091556058, "learning_rate": 1.0100547016885137e-06, "loss": 0.4048, "step": 42950 }, { "epoch": 0.81490193102925, "grad_norm": 1.2245316654789544, "learning_rate": 1.008060308478614e-06, "loss": 0.4389, "step": 42960 }, { "epoch": 0.8150916195606813, "grad_norm": 1.2755741248352257, "learning_rate": 1.0060676654558705e-06, "loss": 0.4287, "step": 42970 }, { "epoch": 0.8152813080921127, "grad_norm": 1.6125465998697608, "learning_rate": 1.004076773493916e-06, "loss": 0.4418, "step": 42980 }, { "epoch": 0.8154709966235442, "grad_norm": 1.3747082826740427, "learning_rate": 1.002087633465617e-06, "loss": 0.4151, "step": 42990 }, { "epoch": 0.8156606851549756, "grad_norm": 1.7485389220973668, "learning_rate": 1.0001002462430747e-06, "loss": 0.429, "step": 43000 }, { "epoch": 0.8158503736864069, "grad_norm": 1.2291166902305362, "learning_rate": 9.981146126976198e-07, "loss": 0.4157, "step": 43010 }, { "epoch": 0.8160400622178383, "grad_norm": 1.8209102802985166, "learning_rate": 9.961307336998138e-07, "loss": 0.4309, "step": 43020 }, { "epoch": 0.8162297507492697, "grad_norm": 1.4455626792668903, "learning_rate": 9.941486101194487e-07, "loss": 0.4397, "step": 43030 }, { "epoch": 0.816419439280701, "grad_norm": 1.9876279216805741, "learning_rate": 9.921682428255463e-07, "loss": 0.4434, "step": 43040 }, { "epoch": 0.8166091278121325, "grad_norm": 1.7685509736566805, "learning_rate": 9.901896326863613e-07, "loss": 0.4165, "step": 43050 }, { "epoch": 0.8167988163435639, "grad_norm": 1.1351523788624682, "learning_rate": 9.88212780569377e-07, "loss": 0.4131, "step": 43060 }, { "epoch": 0.8169885048749953, "grad_norm": 1.780122344083268, "learning_rate": 9.862376873413026e-07, "loss": 0.4038, "step": 43070 }, { "epoch": 0.8171781934064266, "grad_norm": 1.4226022673419583, "learning_rate": 9.842643538680813e-07, "loss": 0.4343, "step": 43080 }, { "epoch": 0.817367881937858, "grad_norm": 1.2403922697430627, "learning_rate": 9.822927810148814e-07, "loss": 0.4117, "step": 43090 }, { "epoch": 0.8175575704692895, "grad_norm": 1.5055301487257549, "learning_rate": 9.803229696460987e-07, "loss": 0.4311, "step": 43100 }, { "epoch": 0.8177472590007209, "grad_norm": 1.5295114788271618, "learning_rate": 9.783549206253602e-07, "loss": 0.4312, "step": 43110 }, { "epoch": 0.8179369475321522, "grad_norm": 1.6378625643650961, "learning_rate": 9.76388634815516e-07, "loss": 0.4434, "step": 43120 }, { "epoch": 0.8181266360635836, "grad_norm": 1.5707767850633676, "learning_rate": 9.744241130786475e-07, "loss": 0.422, "step": 43130 }, { "epoch": 0.818316324595015, "grad_norm": 1.3630657672143796, "learning_rate": 9.724613562760593e-07, "loss": 0.4328, "step": 43140 }, { "epoch": 0.8185060131264463, "grad_norm": 1.5245035599149086, "learning_rate": 9.705003652682827e-07, "loss": 0.4367, "step": 43150 }, { "epoch": 0.8186957016578778, "grad_norm": 1.4345075079161869, "learning_rate": 9.685411409150762e-07, "loss": 0.4119, "step": 43160 }, { "epoch": 0.8188853901893092, "grad_norm": 1.397673994249349, "learning_rate": 9.66583684075424e-07, "loss": 0.4161, "step": 43170 }, { "epoch": 0.8190750787207406, "grad_norm": 1.7323055216738201, "learning_rate": 9.646279956075338e-07, "loss": 0.4116, "step": 43180 }, { "epoch": 0.8192647672521719, "grad_norm": 1.3837331143902465, "learning_rate": 9.626740763688385e-07, "loss": 0.4146, "step": 43190 }, { "epoch": 0.8194544557836033, "grad_norm": 1.4804163000751096, "learning_rate": 9.60721927215994e-07, "loss": 0.4516, "step": 43200 }, { "epoch": 0.8196441443150347, "grad_norm": 1.5493407006759476, "learning_rate": 9.587715490048826e-07, "loss": 0.4264, "step": 43210 }, { "epoch": 0.819833832846466, "grad_norm": 1.5151993147041425, "learning_rate": 9.568229425906105e-07, "loss": 0.4205, "step": 43220 }, { "epoch": 0.8200235213778975, "grad_norm": 1.574861164665697, "learning_rate": 9.54876108827505e-07, "loss": 0.4157, "step": 43230 }, { "epoch": 0.8202132099093289, "grad_norm": 1.3690239572586649, "learning_rate": 9.529310485691162e-07, "loss": 0.4322, "step": 43240 }, { "epoch": 0.8204028984407603, "grad_norm": 1.4830915902020103, "learning_rate": 9.509877626682162e-07, "loss": 0.447, "step": 43250 }, { "epoch": 0.8205925869721916, "grad_norm": 1.3115134650699016, "learning_rate": 9.490462519768023e-07, "loss": 0.415, "step": 43260 }, { "epoch": 0.820782275503623, "grad_norm": 1.4547958065335929, "learning_rate": 9.47106517346092e-07, "loss": 0.4036, "step": 43270 }, { "epoch": 0.8209719640350545, "grad_norm": 1.4440157434542615, "learning_rate": 9.451685596265231e-07, "loss": 0.4471, "step": 43280 }, { "epoch": 0.8211616525664859, "grad_norm": 1.133774218371234, "learning_rate": 9.432323796677529e-07, "loss": 0.4343, "step": 43290 }, { "epoch": 0.8213513410979172, "grad_norm": 1.5923779087496372, "learning_rate": 9.412979783186638e-07, "loss": 0.435, "step": 43300 }, { "epoch": 0.8215410296293486, "grad_norm": 1.360586445568396, "learning_rate": 9.393653564273541e-07, "loss": 0.4081, "step": 43310 }, { "epoch": 0.82173071816078, "grad_norm": 1.6193032129216027, "learning_rate": 9.374345148411462e-07, "loss": 0.4091, "step": 43320 }, { "epoch": 0.8219204066922113, "grad_norm": 1.3506105677220992, "learning_rate": 9.355054544065773e-07, "loss": 0.4189, "step": 43330 }, { "epoch": 0.8221100952236428, "grad_norm": 1.448469248257159, "learning_rate": 9.335781759694057e-07, "loss": 0.4245, "step": 43340 }, { "epoch": 0.8222997837550742, "grad_norm": 1.6782314496859587, "learning_rate": 9.3165268037461e-07, "loss": 0.4375, "step": 43350 }, { "epoch": 0.8224894722865056, "grad_norm": 1.5703799679714785, "learning_rate": 9.297289684663841e-07, "loss": 0.4189, "step": 43360 }, { "epoch": 0.8226791608179369, "grad_norm": 1.4086492908384407, "learning_rate": 9.278070410881423e-07, "loss": 0.4501, "step": 43370 }, { "epoch": 0.8228688493493683, "grad_norm": 1.4654416341944787, "learning_rate": 9.258868990825176e-07, "loss": 0.4511, "step": 43380 }, { "epoch": 0.8230585378807997, "grad_norm": 1.3937835840033868, "learning_rate": 9.239685432913565e-07, "loss": 0.4192, "step": 43390 }, { "epoch": 0.8232482264122312, "grad_norm": 1.4075068430153888, "learning_rate": 9.220519745557238e-07, "loss": 0.4228, "step": 43400 }, { "epoch": 0.8234379149436625, "grad_norm": 1.1609170395072312, "learning_rate": 9.201371937159009e-07, "loss": 0.3857, "step": 43410 }, { "epoch": 0.8236276034750939, "grad_norm": 1.8394334849034384, "learning_rate": 9.182242016113869e-07, "loss": 0.4529, "step": 43420 }, { "epoch": 0.8238172920065253, "grad_norm": 1.576529821200015, "learning_rate": 9.163129990808962e-07, "loss": 0.426, "step": 43430 }, { "epoch": 0.8240069805379566, "grad_norm": 0.928395583223825, "learning_rate": 9.144035869623563e-07, "loss": 0.4444, "step": 43440 }, { "epoch": 0.824196669069388, "grad_norm": 1.6666686802163158, "learning_rate": 9.124959660929105e-07, "loss": 0.4443, "step": 43450 }, { "epoch": 0.8243863576008195, "grad_norm": 1.3692879963306612, "learning_rate": 9.105901373089199e-07, "loss": 0.4106, "step": 43460 }, { "epoch": 0.8245760461322509, "grad_norm": 2.6765585161002616, "learning_rate": 9.086861014459553e-07, "loss": 0.4357, "step": 43470 }, { "epoch": 0.8247657346636822, "grad_norm": 1.4476061161880633, "learning_rate": 9.067838593388056e-07, "loss": 0.4176, "step": 43480 }, { "epoch": 0.8249554231951136, "grad_norm": 1.3661984836504533, "learning_rate": 9.048834118214705e-07, "loss": 0.4088, "step": 43490 }, { "epoch": 0.825145111726545, "grad_norm": 1.4113730523115797, "learning_rate": 9.029847597271624e-07, "loss": 0.4223, "step": 43500 }, { "epoch": 0.8253348002579765, "grad_norm": 1.239720949285354, "learning_rate": 9.010879038883102e-07, "loss": 0.4242, "step": 43510 }, { "epoch": 0.8255244887894078, "grad_norm": 1.1545532013180921, "learning_rate": 8.991928451365501e-07, "loss": 0.4146, "step": 43520 }, { "epoch": 0.8257141773208392, "grad_norm": 1.4437134830923357, "learning_rate": 8.97299584302736e-07, "loss": 0.4406, "step": 43530 }, { "epoch": 0.8259038658522706, "grad_norm": 1.4519541684608288, "learning_rate": 8.954081222169298e-07, "loss": 0.3863, "step": 43540 }, { "epoch": 0.8260935543837019, "grad_norm": 1.3395594224352505, "learning_rate": 8.93518459708404e-07, "loss": 0.4274, "step": 43550 }, { "epoch": 0.8262832429151333, "grad_norm": 1.5101229661425941, "learning_rate": 8.916305976056467e-07, "loss": 0.4201, "step": 43560 }, { "epoch": 0.8264729314465648, "grad_norm": 1.6590214937661782, "learning_rate": 8.897445367363505e-07, "loss": 0.4515, "step": 43570 }, { "epoch": 0.8266626199779962, "grad_norm": 1.3224676516834786, "learning_rate": 8.878602779274247e-07, "loss": 0.4261, "step": 43580 }, { "epoch": 0.8268523085094275, "grad_norm": 1.8485522943623824, "learning_rate": 8.859778220049841e-07, "loss": 0.4562, "step": 43590 }, { "epoch": 0.8270419970408589, "grad_norm": 1.5299289272828247, "learning_rate": 8.840971697943529e-07, "loss": 0.4339, "step": 43600 }, { "epoch": 0.8272316855722903, "grad_norm": 1.4491462760189184, "learning_rate": 8.822183221200692e-07, "loss": 0.4459, "step": 43610 }, { "epoch": 0.8274213741037217, "grad_norm": 1.520104063310853, "learning_rate": 8.803412798058731e-07, "loss": 0.4215, "step": 43620 }, { "epoch": 0.827611062635153, "grad_norm": 1.4289556865021846, "learning_rate": 8.784660436747183e-07, "loss": 0.4134, "step": 43630 }, { "epoch": 0.8278007511665845, "grad_norm": 1.2034694617017758, "learning_rate": 8.765926145487674e-07, "loss": 0.4148, "step": 43640 }, { "epoch": 0.8279904396980159, "grad_norm": 1.772962020172949, "learning_rate": 8.747209932493839e-07, "loss": 0.4193, "step": 43650 }, { "epoch": 0.8281801282294472, "grad_norm": 1.4924217426102564, "learning_rate": 8.728511805971457e-07, "loss": 0.4264, "step": 43660 }, { "epoch": 0.8283698167608786, "grad_norm": 1.5203268571562019, "learning_rate": 8.709831774118366e-07, "loss": 0.4421, "step": 43670 }, { "epoch": 0.82855950529231, "grad_norm": 1.7969549203381199, "learning_rate": 8.691169845124425e-07, "loss": 0.4318, "step": 43680 }, { "epoch": 0.8287491938237415, "grad_norm": 2.522324299907255, "learning_rate": 8.672526027171618e-07, "loss": 0.4586, "step": 43690 }, { "epoch": 0.8289388823551728, "grad_norm": 1.3880362019224497, "learning_rate": 8.653900328433945e-07, "loss": 0.422, "step": 43700 }, { "epoch": 0.8291285708866042, "grad_norm": 1.7041630229782605, "learning_rate": 8.635292757077468e-07, "loss": 0.4073, "step": 43710 }, { "epoch": 0.8293182594180356, "grad_norm": 1.4639609804388038, "learning_rate": 8.616703321260328e-07, "loss": 0.4226, "step": 43720 }, { "epoch": 0.829507947949467, "grad_norm": 1.531152398561569, "learning_rate": 8.598132029132678e-07, "loss": 0.4165, "step": 43730 }, { "epoch": 0.8296976364808983, "grad_norm": 1.5325794904360572, "learning_rate": 8.579578888836749e-07, "loss": 0.4374, "step": 43740 }, { "epoch": 0.8298873250123298, "grad_norm": 1.5730865563809198, "learning_rate": 8.561043908506799e-07, "loss": 0.4038, "step": 43750 }, { "epoch": 0.8300770135437612, "grad_norm": 1.3157324146949516, "learning_rate": 8.542527096269105e-07, "loss": 0.4133, "step": 43760 }, { "epoch": 0.8302667020751925, "grad_norm": 1.4774858431135012, "learning_rate": 8.524028460242018e-07, "loss": 0.4207, "step": 43770 }, { "epoch": 0.8304563906066239, "grad_norm": 1.3497073046936146, "learning_rate": 8.505548008535886e-07, "loss": 0.4256, "step": 43780 }, { "epoch": 0.8306460791380553, "grad_norm": 1.3915802730633884, "learning_rate": 8.487085749253116e-07, "loss": 0.3998, "step": 43790 }, { "epoch": 0.8308357676694867, "grad_norm": 1.4975954423072477, "learning_rate": 8.46864169048811e-07, "loss": 0.4348, "step": 43800 }, { "epoch": 0.8310254562009181, "grad_norm": 1.5998047691736914, "learning_rate": 8.450215840327291e-07, "loss": 0.4424, "step": 43810 }, { "epoch": 0.8312151447323495, "grad_norm": 1.416759939665774, "learning_rate": 8.431808206849129e-07, "loss": 0.4058, "step": 43820 }, { "epoch": 0.8314048332637809, "grad_norm": 1.59874076321181, "learning_rate": 8.413418798124067e-07, "loss": 0.4219, "step": 43830 }, { "epoch": 0.8315945217952123, "grad_norm": 1.2782837443898039, "learning_rate": 8.395047622214602e-07, "loss": 0.4233, "step": 43840 }, { "epoch": 0.8317842103266436, "grad_norm": 1.7144356243333783, "learning_rate": 8.376694687175208e-07, "loss": 0.4468, "step": 43850 }, { "epoch": 0.831973898858075, "grad_norm": 1.371330300383349, "learning_rate": 8.35836000105234e-07, "loss": 0.4354, "step": 43860 }, { "epoch": 0.8321635873895065, "grad_norm": 1.555102943645896, "learning_rate": 8.340043571884498e-07, "loss": 0.4073, "step": 43870 }, { "epoch": 0.8323532759209378, "grad_norm": 1.4128018314755568, "learning_rate": 8.321745407702181e-07, "loss": 0.4375, "step": 43880 }, { "epoch": 0.8325429644523692, "grad_norm": 1.6983097145779067, "learning_rate": 8.303465516527831e-07, "loss": 0.3991, "step": 43890 }, { "epoch": 0.8327326529838006, "grad_norm": 1.5014603241211701, "learning_rate": 8.285203906375911e-07, "loss": 0.4144, "step": 43900 }, { "epoch": 0.832922341515232, "grad_norm": 1.8310365361118777, "learning_rate": 8.266960585252853e-07, "loss": 0.4244, "step": 43910 }, { "epoch": 0.8331120300466633, "grad_norm": 1.4889811207686603, "learning_rate": 8.248735561157095e-07, "loss": 0.4625, "step": 43920 }, { "epoch": 0.8333017185780948, "grad_norm": 1.5478874412355095, "learning_rate": 8.230528842079044e-07, "loss": 0.4361, "step": 43930 }, { "epoch": 0.8334914071095262, "grad_norm": 1.4061727105423802, "learning_rate": 8.212340436001059e-07, "loss": 0.4156, "step": 43940 }, { "epoch": 0.8336810956409575, "grad_norm": 1.4810664768497415, "learning_rate": 8.194170350897512e-07, "loss": 0.4401, "step": 43950 }, { "epoch": 0.8338707841723889, "grad_norm": 1.3566563282866462, "learning_rate": 8.176018594734697e-07, "loss": 0.3973, "step": 43960 }, { "epoch": 0.8340604727038203, "grad_norm": 1.8110584107665173, "learning_rate": 8.157885175470892e-07, "loss": 0.4367, "step": 43970 }, { "epoch": 0.8342501612352518, "grad_norm": 1.362779263625032, "learning_rate": 8.139770101056355e-07, "loss": 0.4329, "step": 43980 }, { "epoch": 0.8344398497666831, "grad_norm": 1.1360879290501298, "learning_rate": 8.121673379433259e-07, "loss": 0.3911, "step": 43990 }, { "epoch": 0.8346295382981145, "grad_norm": 1.4626446932817498, "learning_rate": 8.103595018535781e-07, "loss": 0.4287, "step": 44000 }, { "epoch": 0.8348192268295459, "grad_norm": 1.7959503864854713, "learning_rate": 8.085535026290009e-07, "loss": 0.4257, "step": 44010 }, { "epoch": 0.8350089153609773, "grad_norm": 1.4701249797740703, "learning_rate": 8.067493410613975e-07, "loss": 0.423, "step": 44020 }, { "epoch": 0.8351986038924086, "grad_norm": 1.4187635381917587, "learning_rate": 8.049470179417679e-07, "loss": 0.4152, "step": 44030 }, { "epoch": 0.83538829242384, "grad_norm": 1.6922307751709345, "learning_rate": 8.031465340603073e-07, "loss": 0.4341, "step": 44040 }, { "epoch": 0.8355779809552715, "grad_norm": 1.456784908488898, "learning_rate": 8.013478902064004e-07, "loss": 0.4406, "step": 44050 }, { "epoch": 0.8357676694867028, "grad_norm": 1.2755242075358957, "learning_rate": 7.995510871686269e-07, "loss": 0.4437, "step": 44060 }, { "epoch": 0.8359573580181342, "grad_norm": 1.3732769445736621, "learning_rate": 7.977561257347594e-07, "loss": 0.4124, "step": 44070 }, { "epoch": 0.8361470465495656, "grad_norm": 1.6633148412521854, "learning_rate": 7.959630066917634e-07, "loss": 0.4065, "step": 44080 }, { "epoch": 0.836336735080997, "grad_norm": 1.3286790990918471, "learning_rate": 7.941717308257979e-07, "loss": 0.4388, "step": 44090 }, { "epoch": 0.8365264236124283, "grad_norm": 1.7149243222051274, "learning_rate": 7.923822989222119e-07, "loss": 0.426, "step": 44100 }, { "epoch": 0.8367161121438598, "grad_norm": 1.4596713444467913, "learning_rate": 7.90594711765545e-07, "loss": 0.4208, "step": 44110 }, { "epoch": 0.8369058006752912, "grad_norm": 1.3162826520601916, "learning_rate": 7.888089701395318e-07, "loss": 0.4174, "step": 44120 }, { "epoch": 0.8370954892067226, "grad_norm": 1.3909166120253451, "learning_rate": 7.87025074827093e-07, "loss": 0.4193, "step": 44130 }, { "epoch": 0.8372851777381539, "grad_norm": 1.6536265750506012, "learning_rate": 7.852430266103444e-07, "loss": 0.4343, "step": 44140 }, { "epoch": 0.8374748662695853, "grad_norm": 1.6347038248301664, "learning_rate": 7.834628262705896e-07, "loss": 0.4285, "step": 44150 }, { "epoch": 0.8376645548010168, "grad_norm": 1.5279213546691637, "learning_rate": 7.8168447458832e-07, "loss": 0.4307, "step": 44160 }, { "epoch": 0.8378542433324481, "grad_norm": 1.5435829557171261, "learning_rate": 7.799079723432224e-07, "loss": 0.4183, "step": 44170 }, { "epoch": 0.8380439318638795, "grad_norm": 1.6773054023275065, "learning_rate": 7.781333203141655e-07, "loss": 0.4137, "step": 44180 }, { "epoch": 0.8382336203953109, "grad_norm": 1.674974608826804, "learning_rate": 7.76360519279214e-07, "loss": 0.4394, "step": 44190 }, { "epoch": 0.8384233089267423, "grad_norm": 1.4908113899840691, "learning_rate": 7.745895700156153e-07, "loss": 0.4025, "step": 44200 }, { "epoch": 0.8386129974581736, "grad_norm": 1.4596520887943838, "learning_rate": 7.728204732998062e-07, "loss": 0.4255, "step": 44210 }, { "epoch": 0.8388026859896051, "grad_norm": 1.4055584825727874, "learning_rate": 7.710532299074153e-07, "loss": 0.4171, "step": 44220 }, { "epoch": 0.8389923745210365, "grad_norm": 1.7257814818409998, "learning_rate": 7.692878406132525e-07, "loss": 0.4268, "step": 44230 }, { "epoch": 0.8391820630524679, "grad_norm": 1.4288400575613895, "learning_rate": 7.67524306191319e-07, "loss": 0.3993, "step": 44240 }, { "epoch": 0.8393717515838992, "grad_norm": 1.635392572689795, "learning_rate": 7.657626274148034e-07, "loss": 0.4216, "step": 44250 }, { "epoch": 0.8395614401153306, "grad_norm": 1.365679752370959, "learning_rate": 7.640028050560772e-07, "loss": 0.4077, "step": 44260 }, { "epoch": 0.839751128646762, "grad_norm": 1.676899835376375, "learning_rate": 7.622448398867005e-07, "loss": 0.4642, "step": 44270 }, { "epoch": 0.8399408171781934, "grad_norm": 1.6955512110460622, "learning_rate": 7.604887326774169e-07, "loss": 0.4449, "step": 44280 }, { "epoch": 0.8401305057096248, "grad_norm": 1.1669810532257927, "learning_rate": 7.587344841981575e-07, "loss": 0.3967, "step": 44290 }, { "epoch": 0.8403201942410562, "grad_norm": 1.5103570493170646, "learning_rate": 7.569820952180401e-07, "loss": 0.442, "step": 44300 }, { "epoch": 0.8405098827724876, "grad_norm": 1.3080329222425042, "learning_rate": 7.552315665053633e-07, "loss": 0.4058, "step": 44310 }, { "epoch": 0.8406995713039189, "grad_norm": 1.0525223311461194, "learning_rate": 7.534828988276105e-07, "loss": 0.4154, "step": 44320 }, { "epoch": 0.8408892598353503, "grad_norm": 1.3904430429447237, "learning_rate": 7.517360929514539e-07, "loss": 0.4394, "step": 44330 }, { "epoch": 0.8410789483667818, "grad_norm": 1.3892926998477435, "learning_rate": 7.499911496427426e-07, "loss": 0.4473, "step": 44340 }, { "epoch": 0.8412686368982132, "grad_norm": 1.587951243713772, "learning_rate": 7.482480696665151e-07, "loss": 0.4419, "step": 44350 }, { "epoch": 0.8414583254296445, "grad_norm": 1.5740395402461398, "learning_rate": 7.465068537869891e-07, "loss": 0.4021, "step": 44360 }, { "epoch": 0.8416480139610759, "grad_norm": 1.3365120173257652, "learning_rate": 7.44767502767566e-07, "loss": 0.426, "step": 44370 }, { "epoch": 0.8418377024925073, "grad_norm": 1.4106935986308462, "learning_rate": 7.430300173708316e-07, "loss": 0.4162, "step": 44380 }, { "epoch": 0.8420273910239386, "grad_norm": 1.586407904763393, "learning_rate": 7.412943983585502e-07, "loss": 0.4246, "step": 44390 }, { "epoch": 0.8422170795553701, "grad_norm": 1.3864747049927137, "learning_rate": 7.39560646491671e-07, "loss": 0.424, "step": 44400 }, { "epoch": 0.8424067680868015, "grad_norm": 1.5673940957096169, "learning_rate": 7.378287625303232e-07, "loss": 0.4412, "step": 44410 }, { "epoch": 0.8425964566182329, "grad_norm": 1.49111005572538, "learning_rate": 7.360987472338154e-07, "loss": 0.4165, "step": 44420 }, { "epoch": 0.8427861451496642, "grad_norm": 1.352709754716588, "learning_rate": 7.343706013606416e-07, "loss": 0.4003, "step": 44430 }, { "epoch": 0.8429758336810956, "grad_norm": 1.2731335799239059, "learning_rate": 7.326443256684701e-07, "loss": 0.4579, "step": 44440 }, { "epoch": 0.843165522212527, "grad_norm": 1.498537776368755, "learning_rate": 7.309199209141538e-07, "loss": 0.4063, "step": 44450 }, { "epoch": 0.8433552107439585, "grad_norm": 1.2671273325277381, "learning_rate": 7.291973878537267e-07, "loss": 0.3978, "step": 44460 }, { "epoch": 0.8435448992753898, "grad_norm": 1.5151798507132992, "learning_rate": 7.274767272423943e-07, "loss": 0.3981, "step": 44470 }, { "epoch": 0.8437345878068212, "grad_norm": 1.3168495629191486, "learning_rate": 7.257579398345499e-07, "loss": 0.4296, "step": 44480 }, { "epoch": 0.8439242763382526, "grad_norm": 1.3073632116669636, "learning_rate": 7.2404102638376e-07, "loss": 0.4095, "step": 44490 }, { "epoch": 0.8441139648696839, "grad_norm": 1.601175891591172, "learning_rate": 7.223259876427719e-07, "loss": 0.4476, "step": 44500 }, { "epoch": 0.8443036534011154, "grad_norm": 1.365400405588277, "learning_rate": 7.206128243635129e-07, "loss": 0.4437, "step": 44510 }, { "epoch": 0.8444933419325468, "grad_norm": 1.9815937321979116, "learning_rate": 7.189015372970814e-07, "loss": 0.4302, "step": 44520 }, { "epoch": 0.8446830304639782, "grad_norm": 1.3600208063541823, "learning_rate": 7.171921271937593e-07, "loss": 0.4302, "step": 44530 }, { "epoch": 0.8448727189954095, "grad_norm": 1.2113438279483526, "learning_rate": 7.154845948030048e-07, "loss": 0.4204, "step": 44540 }, { "epoch": 0.8450624075268409, "grad_norm": 1.4457016613945322, "learning_rate": 7.137789408734502e-07, "loss": 0.392, "step": 44550 }, { "epoch": 0.8452520960582723, "grad_norm": 1.8953088917560341, "learning_rate": 7.120751661529074e-07, "loss": 0.3992, "step": 44560 }, { "epoch": 0.8454417845897038, "grad_norm": 1.1577842782522014, "learning_rate": 7.103732713883621e-07, "loss": 0.4037, "step": 44570 }, { "epoch": 0.8456314731211351, "grad_norm": 1.5158064370238382, "learning_rate": 7.086732573259753e-07, "loss": 0.4242, "step": 44580 }, { "epoch": 0.8458211616525665, "grad_norm": 1.4484495101237644, "learning_rate": 7.069751247110862e-07, "loss": 0.4115, "step": 44590 }, { "epoch": 0.8460108501839979, "grad_norm": 1.596048459736807, "learning_rate": 7.052788742882061e-07, "loss": 0.4546, "step": 44600 }, { "epoch": 0.8462005387154292, "grad_norm": 1.6296359508002036, "learning_rate": 7.035845068010244e-07, "loss": 0.4166, "step": 44610 }, { "epoch": 0.8463902272468606, "grad_norm": 1.388788842854816, "learning_rate": 7.018920229924015e-07, "loss": 0.4379, "step": 44620 }, { "epoch": 0.8465799157782921, "grad_norm": 1.515891951506198, "learning_rate": 7.00201423604373e-07, "loss": 0.4379, "step": 44630 }, { "epoch": 0.8467696043097235, "grad_norm": 1.3805036747393318, "learning_rate": 6.985127093781513e-07, "loss": 0.4438, "step": 44640 }, { "epoch": 0.8469592928411548, "grad_norm": 1.481347187082709, "learning_rate": 6.968258810541173e-07, "loss": 0.4489, "step": 44650 }, { "epoch": 0.8471489813725862, "grad_norm": 1.3057952694811417, "learning_rate": 6.951409393718289e-07, "loss": 0.4089, "step": 44660 }, { "epoch": 0.8473386699040176, "grad_norm": 1.541015198612865, "learning_rate": 6.934578850700158e-07, "loss": 0.4235, "step": 44670 }, { "epoch": 0.847528358435449, "grad_norm": 1.2706509754023416, "learning_rate": 6.917767188865782e-07, "loss": 0.4372, "step": 44680 }, { "epoch": 0.8477180469668804, "grad_norm": 1.401224892725216, "learning_rate": 6.900974415585914e-07, "loss": 0.4159, "step": 44690 }, { "epoch": 0.8479077354983118, "grad_norm": 1.782173093190325, "learning_rate": 6.884200538223024e-07, "loss": 0.4377, "step": 44700 }, { "epoch": 0.8480974240297432, "grad_norm": 1.5213720992569415, "learning_rate": 6.867445564131281e-07, "loss": 0.4128, "step": 44710 }, { "epoch": 0.8482871125611745, "grad_norm": 1.3025031762615675, "learning_rate": 6.850709500656566e-07, "loss": 0.397, "step": 44720 }, { "epoch": 0.8484768010926059, "grad_norm": 1.2385419829872784, "learning_rate": 6.833992355136465e-07, "loss": 0.4313, "step": 44730 }, { "epoch": 0.8486664896240373, "grad_norm": 0.9712780565205537, "learning_rate": 6.817294134900293e-07, "loss": 0.4267, "step": 44740 }, { "epoch": 0.8488561781554688, "grad_norm": 1.653906771986044, "learning_rate": 6.800614847269072e-07, "loss": 0.4301, "step": 44750 }, { "epoch": 0.8490458666869001, "grad_norm": 1.385712714399491, "learning_rate": 6.783954499555479e-07, "loss": 0.4035, "step": 44760 }, { "epoch": 0.8492355552183315, "grad_norm": 1.4068302403780806, "learning_rate": 6.767313099063944e-07, "loss": 0.4257, "step": 44770 }, { "epoch": 0.8494252437497629, "grad_norm": 1.364355170243881, "learning_rate": 6.75069065309052e-07, "loss": 0.4205, "step": 44780 }, { "epoch": 0.8496149322811942, "grad_norm": 1.1850727309799312, "learning_rate": 6.734087168923015e-07, "loss": 0.4262, "step": 44790 }, { "epoch": 0.8498046208126256, "grad_norm": 1.6031366194995793, "learning_rate": 6.717502653840908e-07, "loss": 0.4346, "step": 44800 }, { "epoch": 0.8499943093440571, "grad_norm": 1.3106590594631229, "learning_rate": 6.70093711511533e-07, "loss": 0.4283, "step": 44810 }, { "epoch": 0.8501839978754885, "grad_norm": 1.6035216945173048, "learning_rate": 6.684390560009136e-07, "loss": 0.4317, "step": 44820 }, { "epoch": 0.8503736864069198, "grad_norm": 1.7611152022799226, "learning_rate": 6.667862995776825e-07, "loss": 0.4177, "step": 44830 }, { "epoch": 0.8505633749383512, "grad_norm": 1.32958851536201, "learning_rate": 6.651354429664575e-07, "loss": 0.4457, "step": 44840 }, { "epoch": 0.8507530634697826, "grad_norm": 1.2682494625781378, "learning_rate": 6.634864868910252e-07, "loss": 0.4092, "step": 44850 }, { "epoch": 0.850942752001214, "grad_norm": 1.571228923802189, "learning_rate": 6.618394320743365e-07, "loss": 0.3971, "step": 44860 }, { "epoch": 0.8511324405326454, "grad_norm": 1.3701117234214362, "learning_rate": 6.601942792385125e-07, "loss": 0.4022, "step": 44870 }, { "epoch": 0.8513221290640768, "grad_norm": 1.3690304906688835, "learning_rate": 6.585510291048364e-07, "loss": 0.424, "step": 44880 }, { "epoch": 0.8515118175955082, "grad_norm": 1.2200080492909424, "learning_rate": 6.569096823937576e-07, "loss": 0.4259, "step": 44890 }, { "epoch": 0.8517015061269395, "grad_norm": 1.5089924500850345, "learning_rate": 6.552702398248933e-07, "loss": 0.4264, "step": 44900 }, { "epoch": 0.8518911946583709, "grad_norm": 1.426286163655415, "learning_rate": 6.536327021170263e-07, "loss": 0.4317, "step": 44910 }, { "epoch": 0.8520808831898024, "grad_norm": 1.4100099716708747, "learning_rate": 6.519970699881012e-07, "loss": 0.409, "step": 44920 }, { "epoch": 0.8522705717212338, "grad_norm": 1.4432672229349917, "learning_rate": 6.503633441552282e-07, "loss": 0.4248, "step": 44930 }, { "epoch": 0.8524602602526651, "grad_norm": 1.8455005382878562, "learning_rate": 6.487315253346821e-07, "loss": 0.4236, "step": 44940 }, { "epoch": 0.8526499487840965, "grad_norm": 1.7769074505817242, "learning_rate": 6.471016142419018e-07, "loss": 0.4395, "step": 44950 }, { "epoch": 0.8528396373155279, "grad_norm": 1.3714707419740158, "learning_rate": 6.454736115914911e-07, "loss": 0.4151, "step": 44960 }, { "epoch": 0.8530293258469593, "grad_norm": 1.6107946743137622, "learning_rate": 6.438475180972148e-07, "loss": 0.4424, "step": 44970 }, { "epoch": 0.8532190143783906, "grad_norm": 2.427414573044939, "learning_rate": 6.422233344719997e-07, "loss": 0.4371, "step": 44980 }, { "epoch": 0.8534087029098221, "grad_norm": 1.4219073231193813, "learning_rate": 6.406010614279401e-07, "loss": 0.4288, "step": 44990 }, { "epoch": 0.8535983914412535, "grad_norm": 1.5491281027541615, "learning_rate": 6.389806996762865e-07, "loss": 0.4531, "step": 45000 }, { "epoch": 0.8537880799726848, "grad_norm": 1.3333857630254937, "learning_rate": 6.373622499274579e-07, "loss": 0.419, "step": 45010 }, { "epoch": 0.8539777685041162, "grad_norm": 1.1726891148795566, "learning_rate": 6.357457128910293e-07, "loss": 0.4067, "step": 45020 }, { "epoch": 0.8541674570355476, "grad_norm": 1.4720249174802298, "learning_rate": 6.341310892757391e-07, "loss": 0.4177, "step": 45030 }, { "epoch": 0.8543571455669791, "grad_norm": 1.543499001962245, "learning_rate": 6.325183797894884e-07, "loss": 0.4171, "step": 45040 }, { "epoch": 0.8545468340984104, "grad_norm": 1.5028372048452094, "learning_rate": 6.309075851393371e-07, "loss": 0.4178, "step": 45050 }, { "epoch": 0.8547365226298418, "grad_norm": 1.2561268694666976, "learning_rate": 6.292987060315065e-07, "loss": 0.4358, "step": 45060 }, { "epoch": 0.8549262111612732, "grad_norm": 1.469895252638069, "learning_rate": 6.276917431713792e-07, "loss": 0.4275, "step": 45070 }, { "epoch": 0.8551158996927046, "grad_norm": 2.0958220386955184, "learning_rate": 6.260866972634955e-07, "loss": 0.4424, "step": 45080 }, { "epoch": 0.8553055882241359, "grad_norm": 1.8068329816183837, "learning_rate": 6.244835690115552e-07, "loss": 0.4343, "step": 45090 }, { "epoch": 0.8554952767555674, "grad_norm": 1.5346399094153444, "learning_rate": 6.22882359118418e-07, "loss": 0.4167, "step": 45100 }, { "epoch": 0.8556849652869988, "grad_norm": 1.4489185130438886, "learning_rate": 6.212830682861038e-07, "loss": 0.4285, "step": 45110 }, { "epoch": 0.8558746538184301, "grad_norm": 1.450458744112663, "learning_rate": 6.19685697215791e-07, "loss": 0.4238, "step": 45120 }, { "epoch": 0.8560643423498615, "grad_norm": 1.4255448734443121, "learning_rate": 6.180902466078137e-07, "loss": 0.4196, "step": 45130 }, { "epoch": 0.8562540308812929, "grad_norm": 1.5590013899338993, "learning_rate": 6.16496717161667e-07, "loss": 0.4358, "step": 45140 }, { "epoch": 0.8564437194127243, "grad_norm": 1.671030084340425, "learning_rate": 6.149051095760005e-07, "loss": 0.4257, "step": 45150 }, { "epoch": 0.8566334079441557, "grad_norm": 1.6395802432495536, "learning_rate": 6.133154245486245e-07, "loss": 0.4255, "step": 45160 }, { "epoch": 0.8568230964755871, "grad_norm": 2.9267108714332433, "learning_rate": 6.117276627765057e-07, "loss": 0.4415, "step": 45170 }, { "epoch": 0.8570127850070185, "grad_norm": 1.6262084848804101, "learning_rate": 6.101418249557661e-07, "loss": 0.4155, "step": 45180 }, { "epoch": 0.8572024735384499, "grad_norm": 1.3499902682410712, "learning_rate": 6.085579117816842e-07, "loss": 0.412, "step": 45190 }, { "epoch": 0.8573921620698812, "grad_norm": 1.4852134228419387, "learning_rate": 6.069759239486972e-07, "loss": 0.4019, "step": 45200 }, { "epoch": 0.8575818506013126, "grad_norm": 1.6601085305629903, "learning_rate": 6.053958621503947e-07, "loss": 0.4045, "step": 45210 }, { "epoch": 0.8577715391327441, "grad_norm": 1.6472429904424388, "learning_rate": 6.038177270795248e-07, "loss": 0.4441, "step": 45220 }, { "epoch": 0.8579612276641754, "grad_norm": 1.5400270488296803, "learning_rate": 6.022415194279902e-07, "loss": 0.4319, "step": 45230 }, { "epoch": 0.8581509161956068, "grad_norm": 1.5103145539669502, "learning_rate": 6.006672398868452e-07, "loss": 0.4395, "step": 45240 }, { "epoch": 0.8583406047270382, "grad_norm": 1.7016270223156367, "learning_rate": 5.990948891463049e-07, "loss": 0.43, "step": 45250 }, { "epoch": 0.8585302932584696, "grad_norm": 1.5061225948150871, "learning_rate": 5.975244678957331e-07, "loss": 0.4474, "step": 45260 }, { "epoch": 0.8587199817899009, "grad_norm": 1.4908239261255902, "learning_rate": 5.959559768236517e-07, "loss": 0.4297, "step": 45270 }, { "epoch": 0.8589096703213324, "grad_norm": 1.3743162266846558, "learning_rate": 5.943894166177344e-07, "loss": 0.4158, "step": 45280 }, { "epoch": 0.8590993588527638, "grad_norm": 1.5113697378466067, "learning_rate": 5.928247879648064e-07, "loss": 0.4512, "step": 45290 }, { "epoch": 0.8592890473841952, "grad_norm": 1.5873650887634185, "learning_rate": 5.912620915508515e-07, "loss": 0.4339, "step": 45300 }, { "epoch": 0.8594787359156265, "grad_norm": 1.332962397201876, "learning_rate": 5.897013280610003e-07, "loss": 0.4325, "step": 45310 }, { "epoch": 0.8596684244470579, "grad_norm": 1.4648444453532465, "learning_rate": 5.881424981795397e-07, "loss": 0.407, "step": 45320 }, { "epoch": 0.8598581129784894, "grad_norm": 1.4936734398177798, "learning_rate": 5.865856025899103e-07, "loss": 0.4033, "step": 45330 }, { "epoch": 0.8600478015099207, "grad_norm": 1.5222813569337779, "learning_rate": 5.850306419746982e-07, "loss": 0.443, "step": 45340 }, { "epoch": 0.8602374900413521, "grad_norm": 1.3154344964647318, "learning_rate": 5.834776170156469e-07, "loss": 0.427, "step": 45350 }, { "epoch": 0.8604271785727835, "grad_norm": 1.4263356637849762, "learning_rate": 5.819265283936509e-07, "loss": 0.4479, "step": 45360 }, { "epoch": 0.8606168671042149, "grad_norm": 1.6032844187684248, "learning_rate": 5.803773767887522e-07, "loss": 0.4225, "step": 45370 }, { "epoch": 0.8608065556356462, "grad_norm": 1.7493754833884783, "learning_rate": 5.788301628801479e-07, "loss": 0.4172, "step": 45380 }, { "epoch": 0.8609962441670777, "grad_norm": 1.1748506505476037, "learning_rate": 5.772848873461817e-07, "loss": 0.4102, "step": 45390 }, { "epoch": 0.8611859326985091, "grad_norm": 1.4919323015387322, "learning_rate": 5.757415508643494e-07, "loss": 0.4533, "step": 45400 }, { "epoch": 0.8613756212299405, "grad_norm": 1.2135902008419772, "learning_rate": 5.742001541112973e-07, "loss": 0.4332, "step": 45410 }, { "epoch": 0.8615653097613718, "grad_norm": 1.2481924035126148, "learning_rate": 5.726606977628185e-07, "loss": 0.4365, "step": 45420 }, { "epoch": 0.8617549982928032, "grad_norm": 1.3727604391210269, "learning_rate": 5.711231824938601e-07, "loss": 0.4262, "step": 45430 }, { "epoch": 0.8619446868242346, "grad_norm": 1.2923444333452, "learning_rate": 5.69587608978513e-07, "loss": 0.421, "step": 45440 }, { "epoch": 0.862134375355666, "grad_norm": 1.424839860682033, "learning_rate": 5.680539778900196e-07, "loss": 0.401, "step": 45450 }, { "epoch": 0.8623240638870974, "grad_norm": 1.6205764509941551, "learning_rate": 5.665222899007711e-07, "loss": 0.4358, "step": 45460 }, { "epoch": 0.8625137524185288, "grad_norm": 1.6636369563366948, "learning_rate": 5.649925456823041e-07, "loss": 0.4336, "step": 45470 }, { "epoch": 0.8627034409499602, "grad_norm": 1.3115959712316732, "learning_rate": 5.634647459053067e-07, "loss": 0.4313, "step": 45480 }, { "epoch": 0.8628931294813915, "grad_norm": 1.3866012260173513, "learning_rate": 5.619388912396118e-07, "loss": 0.4194, "step": 45490 }, { "epoch": 0.8630828180128229, "grad_norm": 1.6035868404269071, "learning_rate": 5.604149823541993e-07, "loss": 0.4146, "step": 45500 }, { "epoch": 0.8632725065442544, "grad_norm": 1.3532467545323343, "learning_rate": 5.588930199171988e-07, "loss": 0.4303, "step": 45510 }, { "epoch": 0.8634621950756858, "grad_norm": 1.9740148043127532, "learning_rate": 5.573730045958826e-07, "loss": 0.44, "step": 45520 }, { "epoch": 0.8636518836071171, "grad_norm": 1.1785725502719426, "learning_rate": 5.558549370566734e-07, "loss": 0.4033, "step": 45530 }, { "epoch": 0.8638415721385485, "grad_norm": 1.518314847062348, "learning_rate": 5.543388179651371e-07, "loss": 0.3944, "step": 45540 }, { "epoch": 0.8640312606699799, "grad_norm": 1.6785678188687334, "learning_rate": 5.52824647985985e-07, "loss": 0.4423, "step": 45550 }, { "epoch": 0.8642209492014112, "grad_norm": 1.4736332467067728, "learning_rate": 5.513124277830767e-07, "loss": 0.4186, "step": 45560 }, { "epoch": 0.8644106377328427, "grad_norm": 5.3500407441431905, "learning_rate": 5.498021580194152e-07, "loss": 0.4042, "step": 45570 }, { "epoch": 0.8646003262642741, "grad_norm": 1.247022584450899, "learning_rate": 5.482938393571485e-07, "loss": 0.4358, "step": 45580 }, { "epoch": 0.8647900147957055, "grad_norm": 1.4473428243064148, "learning_rate": 5.467874724575683e-07, "loss": 0.4126, "step": 45590 }, { "epoch": 0.8649797033271368, "grad_norm": 1.6740138288639677, "learning_rate": 5.452830579811108e-07, "loss": 0.4348, "step": 45600 }, { "epoch": 0.8651693918585682, "grad_norm": 1.3392067869697142, "learning_rate": 5.437805965873583e-07, "loss": 0.4366, "step": 45610 }, { "epoch": 0.8653590803899996, "grad_norm": 1.3461310473479637, "learning_rate": 5.422800889350355e-07, "loss": 0.4115, "step": 45620 }, { "epoch": 0.865548768921431, "grad_norm": 1.6846925154604162, "learning_rate": 5.407815356820084e-07, "loss": 0.4262, "step": 45630 }, { "epoch": 0.8657384574528624, "grad_norm": 1.4623260115613466, "learning_rate": 5.39284937485291e-07, "loss": 0.4141, "step": 45640 }, { "epoch": 0.8659281459842938, "grad_norm": 1.37072060929974, "learning_rate": 5.377902950010355e-07, "loss": 0.431, "step": 45650 }, { "epoch": 0.8661178345157252, "grad_norm": 1.5632703048927705, "learning_rate": 5.362976088845373e-07, "loss": 0.4081, "step": 45660 }, { "epoch": 0.8663075230471565, "grad_norm": 1.7341942066852467, "learning_rate": 5.348068797902378e-07, "loss": 0.4314, "step": 45670 }, { "epoch": 0.866497211578588, "grad_norm": 1.7696742516433361, "learning_rate": 5.333181083717154e-07, "loss": 0.4266, "step": 45680 }, { "epoch": 0.8666869001100194, "grad_norm": 1.282878087974263, "learning_rate": 5.31831295281694e-07, "loss": 0.3994, "step": 45690 }, { "epoch": 0.8668765886414508, "grad_norm": 1.4609411529195409, "learning_rate": 5.303464411720372e-07, "loss": 0.4277, "step": 45700 }, { "epoch": 0.8670662771728821, "grad_norm": 1.7641047299540247, "learning_rate": 5.288635466937486e-07, "loss": 0.4387, "step": 45710 }, { "epoch": 0.8672559657043135, "grad_norm": 1.1326716595344777, "learning_rate": 5.273826124969756e-07, "loss": 0.4475, "step": 45720 }, { "epoch": 0.8674456542357449, "grad_norm": 1.3517340347771742, "learning_rate": 5.259036392310046e-07, "loss": 0.4476, "step": 45730 }, { "epoch": 0.8676353427671762, "grad_norm": 1.2324463395730474, "learning_rate": 5.244266275442617e-07, "loss": 0.4193, "step": 45740 }, { "epoch": 0.8678250312986077, "grad_norm": 1.59111902363142, "learning_rate": 5.229515780843142e-07, "loss": 0.4572, "step": 45750 }, { "epoch": 0.8680147198300391, "grad_norm": 1.3995607911492731, "learning_rate": 5.21478491497866e-07, "loss": 0.4224, "step": 45760 }, { "epoch": 0.8682044083614705, "grad_norm": 1.049941301037072, "learning_rate": 5.200073684307649e-07, "loss": 0.4131, "step": 45770 }, { "epoch": 0.8683940968929018, "grad_norm": 1.3695700017960244, "learning_rate": 5.185382095279962e-07, "loss": 0.4213, "step": 45780 }, { "epoch": 0.8685837854243332, "grad_norm": 1.1228112738139415, "learning_rate": 5.170710154336833e-07, "loss": 0.4348, "step": 45790 }, { "epoch": 0.8687734739557647, "grad_norm": 1.5945090220613602, "learning_rate": 5.15605786791088e-07, "loss": 0.4385, "step": 45800 }, { "epoch": 0.8689631624871961, "grad_norm": 1.3931743535731185, "learning_rate": 5.141425242426096e-07, "loss": 0.4019, "step": 45810 }, { "epoch": 0.8691528510186274, "grad_norm": 1.7148307442725756, "learning_rate": 5.126812284297883e-07, "loss": 0.4402, "step": 45820 }, { "epoch": 0.8693425395500588, "grad_norm": 1.3890847274548446, "learning_rate": 5.11221899993301e-07, "loss": 0.4009, "step": 45830 }, { "epoch": 0.8695322280814902, "grad_norm": 1.4265214650555251, "learning_rate": 5.097645395729606e-07, "loss": 0.4322, "step": 45840 }, { "epoch": 0.8697219166129215, "grad_norm": 1.6996944523950548, "learning_rate": 5.083091478077174e-07, "loss": 0.4637, "step": 45850 }, { "epoch": 0.869911605144353, "grad_norm": 1.5723554809002536, "learning_rate": 5.068557253356604e-07, "loss": 0.422, "step": 45860 }, { "epoch": 0.8701012936757844, "grad_norm": 1.4136924550256171, "learning_rate": 5.054042727940123e-07, "loss": 0.4491, "step": 45870 }, { "epoch": 0.8702909822072158, "grad_norm": 1.3815729935277028, "learning_rate": 5.039547908191356e-07, "loss": 0.4188, "step": 45880 }, { "epoch": 0.8704806707386471, "grad_norm": 1.5193635575100364, "learning_rate": 5.02507280046527e-07, "loss": 0.4404, "step": 45890 }, { "epoch": 0.8706703592700785, "grad_norm": 1.536217865246224, "learning_rate": 5.010617411108166e-07, "loss": 0.4249, "step": 45900 }, { "epoch": 0.8708600478015099, "grad_norm": 1.5381906728594141, "learning_rate": 4.996181746457757e-07, "loss": 0.4186, "step": 45910 }, { "epoch": 0.8710497363329414, "grad_norm": 1.7117679052764647, "learning_rate": 4.981765812843048e-07, "loss": 0.4211, "step": 45920 }, { "epoch": 0.8712394248643727, "grad_norm": 1.331047700776078, "learning_rate": 4.967369616584434e-07, "loss": 0.4273, "step": 45930 }, { "epoch": 0.8714291133958041, "grad_norm": 1.4539617501670008, "learning_rate": 4.952993163993647e-07, "loss": 0.4399, "step": 45940 }, { "epoch": 0.8716188019272355, "grad_norm": 1.3905808593772646, "learning_rate": 4.938636461373758e-07, "loss": 0.4325, "step": 45950 }, { "epoch": 0.8718084904586668, "grad_norm": 1.4717887808412946, "learning_rate": 4.924299515019171e-07, "loss": 0.4346, "step": 45960 }, { "epoch": 0.8719981789900982, "grad_norm": 1.2600556089656718, "learning_rate": 4.909982331215635e-07, "loss": 0.4036, "step": 45970 }, { "epoch": 0.8721878675215297, "grad_norm": 1.2927424370803067, "learning_rate": 4.89568491624024e-07, "loss": 0.4368, "step": 45980 }, { "epoch": 0.8723775560529611, "grad_norm": 1.5115654697113579, "learning_rate": 4.881407276361421e-07, "loss": 0.4357, "step": 45990 }, { "epoch": 0.8725672445843924, "grad_norm": 1.6979166160888197, "learning_rate": 4.867149417838912e-07, "loss": 0.4343, "step": 46000 }, { "epoch": 0.8727569331158238, "grad_norm": 1.5490364427665568, "learning_rate": 4.852911346923783e-07, "loss": 0.4289, "step": 46010 }, { "epoch": 0.8729466216472552, "grad_norm": 1.6743175772972259, "learning_rate": 4.838693069858457e-07, "loss": 0.4511, "step": 46020 }, { "epoch": 0.8731363101786866, "grad_norm": 1.338122485299529, "learning_rate": 4.824494592876627e-07, "loss": 0.3979, "step": 46030 }, { "epoch": 0.873325998710118, "grad_norm": 1.4590076622716053, "learning_rate": 4.810315922203368e-07, "loss": 0.44, "step": 46040 }, { "epoch": 0.8735156872415494, "grad_norm": 1.5574935899470428, "learning_rate": 4.796157064055023e-07, "loss": 0.4432, "step": 46050 }, { "epoch": 0.8737053757729808, "grad_norm": 1.447461569420758, "learning_rate": 4.782018024639251e-07, "loss": 0.4109, "step": 46060 }, { "epoch": 0.8738950643044121, "grad_norm": 1.2736399065350015, "learning_rate": 4.7678988101550585e-07, "loss": 0.4269, "step": 46070 }, { "epoch": 0.8740847528358435, "grad_norm": 1.2035870542971723, "learning_rate": 4.75379942679271e-07, "loss": 0.401, "step": 46080 }, { "epoch": 0.874274441367275, "grad_norm": 1.0913107107067883, "learning_rate": 4.7397198807338287e-07, "loss": 0.416, "step": 46090 }, { "epoch": 0.8744641298987064, "grad_norm": 1.5271741810154127, "learning_rate": 4.7256601781513036e-07, "loss": 0.4467, "step": 46100 }, { "epoch": 0.8746538184301377, "grad_norm": 1.3865521168862915, "learning_rate": 4.7116203252093204e-07, "loss": 0.3906, "step": 46110 }, { "epoch": 0.8748435069615691, "grad_norm": 1.399573295994359, "learning_rate": 4.6976003280634006e-07, "loss": 0.4084, "step": 46120 }, { "epoch": 0.8750331954930005, "grad_norm": 1.5325330658734801, "learning_rate": 4.683600192860305e-07, "loss": 0.4275, "step": 46130 }, { "epoch": 0.8752228840244319, "grad_norm": 1.4985476357597571, "learning_rate": 4.66961992573815e-07, "loss": 0.432, "step": 46140 }, { "epoch": 0.8754125725558632, "grad_norm": 1.484640027658279, "learning_rate": 4.655659532826284e-07, "loss": 0.4321, "step": 46150 }, { "epoch": 0.8756022610872947, "grad_norm": 1.3853997697564824, "learning_rate": 4.6417190202453666e-07, "loss": 0.4279, "step": 46160 }, { "epoch": 0.8757919496187261, "grad_norm": 1.4461733466118651, "learning_rate": 4.6277983941073524e-07, "loss": 0.4385, "step": 46170 }, { "epoch": 0.8759816381501574, "grad_norm": 1.6677750412535302, "learning_rate": 4.6138976605154484e-07, "loss": 0.408, "step": 46180 }, { "epoch": 0.8761713266815888, "grad_norm": 1.166653356849602, "learning_rate": 4.6000168255641743e-07, "loss": 0.3928, "step": 46190 }, { "epoch": 0.8763610152130202, "grad_norm": 1.490025885346682, "learning_rate": 4.5861558953393114e-07, "loss": 0.4036, "step": 46200 }, { "epoch": 0.8765507037444517, "grad_norm": 1.6340332145143892, "learning_rate": 4.572314875917888e-07, "loss": 0.4101, "step": 46210 }, { "epoch": 0.876740392275883, "grad_norm": 1.5139410907071615, "learning_rate": 4.5584937733682335e-07, "loss": 0.4497, "step": 46220 }, { "epoch": 0.8769300808073144, "grad_norm": 1.2933532392410507, "learning_rate": 4.5446925937499555e-07, "loss": 0.404, "step": 46230 }, { "epoch": 0.8771197693387458, "grad_norm": 1.6802483910931418, "learning_rate": 4.530911343113886e-07, "loss": 0.4154, "step": 46240 }, { "epoch": 0.8773094578701772, "grad_norm": 1.4472245448868606, "learning_rate": 4.5171500275021597e-07, "loss": 0.4701, "step": 46250 }, { "epoch": 0.8774991464016085, "grad_norm": 1.7236801641226744, "learning_rate": 4.503408652948149e-07, "loss": 0.4473, "step": 46260 }, { "epoch": 0.87768883493304, "grad_norm": 3.460915431501275, "learning_rate": 4.4896872254764746e-07, "loss": 0.409, "step": 46270 }, { "epoch": 0.8778785234644714, "grad_norm": 1.4957033844535654, "learning_rate": 4.4759857511030405e-07, "loss": 0.419, "step": 46280 }, { "epoch": 0.8780682119959027, "grad_norm": 1.5570113991005647, "learning_rate": 4.46230423583498e-07, "loss": 0.4133, "step": 46290 }, { "epoch": 0.8782579005273341, "grad_norm": 1.5082065609443036, "learning_rate": 4.4486426856706955e-07, "loss": 0.4285, "step": 46300 }, { "epoch": 0.8784475890587655, "grad_norm": 1.8735910263488, "learning_rate": 4.4350011065998123e-07, "loss": 0.4582, "step": 46310 }, { "epoch": 0.8786372775901969, "grad_norm": 1.3555293064361937, "learning_rate": 4.4213795046032073e-07, "loss": 0.431, "step": 46320 }, { "epoch": 0.8788269661216283, "grad_norm": 1.470521025518004, "learning_rate": 4.4077778856530193e-07, "loss": 0.4219, "step": 46330 }, { "epoch": 0.8790166546530597, "grad_norm": 1.43791137618709, "learning_rate": 4.3941962557125905e-07, "loss": 0.4396, "step": 46340 }, { "epoch": 0.8792063431844911, "grad_norm": 1.3260848116890007, "learning_rate": 4.38063462073654e-07, "loss": 0.4096, "step": 46350 }, { "epoch": 0.8793960317159225, "grad_norm": 2.2370403213554124, "learning_rate": 4.3670929866706845e-07, "loss": 0.395, "step": 46360 }, { "epoch": 0.8795857202473538, "grad_norm": 1.3308631940362479, "learning_rate": 4.35357135945208e-07, "loss": 0.4292, "step": 46370 }, { "epoch": 0.8797754087787852, "grad_norm": 1.5488761588846447, "learning_rate": 4.3400697450090346e-07, "loss": 0.4432, "step": 46380 }, { "epoch": 0.8799650973102167, "grad_norm": 1.1766502398467975, "learning_rate": 4.3265881492610516e-07, "loss": 0.3948, "step": 46390 }, { "epoch": 0.880154785841648, "grad_norm": 1.33104379424067, "learning_rate": 4.3131265781188814e-07, "loss": 0.4156, "step": 46400 }, { "epoch": 0.8803444743730794, "grad_norm": 1.5757273907069473, "learning_rate": 4.2996850374844747e-07, "loss": 0.4333, "step": 46410 }, { "epoch": 0.8805341629045108, "grad_norm": 1.6120312069117304, "learning_rate": 4.286263533251006e-07, "loss": 0.4416, "step": 46420 }, { "epoch": 0.8807238514359422, "grad_norm": 1.4261757094533172, "learning_rate": 4.2728620713028745e-07, "loss": 0.4217, "step": 46430 }, { "epoch": 0.8809135399673735, "grad_norm": 1.816481622415623, "learning_rate": 4.259480657515697e-07, "loss": 0.4287, "step": 46440 }, { "epoch": 0.881103228498805, "grad_norm": 1.9149176161724093, "learning_rate": 4.246119297756285e-07, "loss": 0.4088, "step": 46450 }, { "epoch": 0.8812929170302364, "grad_norm": 1.5608174650884346, "learning_rate": 4.2327779978826535e-07, "loss": 0.4319, "step": 46460 }, { "epoch": 0.8814826055616677, "grad_norm": 1.6548428715838763, "learning_rate": 4.219456763744029e-07, "loss": 0.4298, "step": 46470 }, { "epoch": 0.8816722940930991, "grad_norm": 1.788630372420694, "learning_rate": 4.2061556011808556e-07, "loss": 0.4421, "step": 46480 }, { "epoch": 0.8818619826245305, "grad_norm": 1.6873566433712746, "learning_rate": 4.1928745160247644e-07, "loss": 0.4078, "step": 46490 }, { "epoch": 0.882051671155962, "grad_norm": 1.5481990946789597, "learning_rate": 4.1796135140985796e-07, "loss": 0.4391, "step": 46500 }, { "epoch": 0.8822413596873933, "grad_norm": 1.3731804226321345, "learning_rate": 4.166372601216334e-07, "loss": 0.4277, "step": 46510 }, { "epoch": 0.8824310482188247, "grad_norm": 1.591388987420211, "learning_rate": 4.15315178318324e-07, "loss": 0.4256, "step": 46520 }, { "epoch": 0.8826207367502561, "grad_norm": 1.4566559209671537, "learning_rate": 4.1399510657956944e-07, "loss": 0.411, "step": 46530 }, { "epoch": 0.8828104252816875, "grad_norm": 1.4922572720146898, "learning_rate": 4.1267704548413115e-07, "loss": 0.4143, "step": 46540 }, { "epoch": 0.8830001138131188, "grad_norm": 1.6059938855591254, "learning_rate": 4.1136099560988464e-07, "loss": 0.4116, "step": 46550 }, { "epoch": 0.8831898023445502, "grad_norm": 1.330829695212197, "learning_rate": 4.100469575338278e-07, "loss": 0.3991, "step": 46560 }, { "epoch": 0.8833794908759817, "grad_norm": 1.6992217708761894, "learning_rate": 4.087349318320749e-07, "loss": 0.4188, "step": 46570 }, { "epoch": 0.883569179407413, "grad_norm": 1.255544177652832, "learning_rate": 4.074249190798557e-07, "loss": 0.3875, "step": 46580 }, { "epoch": 0.8837588679388444, "grad_norm": 1.5116242041712629, "learning_rate": 4.0611691985152024e-07, "loss": 0.4296, "step": 46590 }, { "epoch": 0.8839485564702758, "grad_norm": 1.7547371775953662, "learning_rate": 4.0481093472053645e-07, "loss": 0.4172, "step": 46600 }, { "epoch": 0.8841382450017072, "grad_norm": 1.4778464758168859, "learning_rate": 4.035069642594869e-07, "loss": 0.4158, "step": 46610 }, { "epoch": 0.8843279335331385, "grad_norm": 1.9794947601986557, "learning_rate": 4.0220500904007143e-07, "loss": 0.4254, "step": 46620 }, { "epoch": 0.88451762206457, "grad_norm": 1.4285055477825066, "learning_rate": 4.0090506963310574e-07, "loss": 0.3886, "step": 46630 }, { "epoch": 0.8847073105960014, "grad_norm": 1.4732768437476376, "learning_rate": 3.996071466085233e-07, "loss": 0.4192, "step": 46640 }, { "epoch": 0.8848969991274328, "grad_norm": 1.7755685100587604, "learning_rate": 3.983112405353734e-07, "loss": 0.4569, "step": 46650 }, { "epoch": 0.8850866876588641, "grad_norm": 1.6644008914205715, "learning_rate": 3.97017351981821e-07, "loss": 0.4065, "step": 46660 }, { "epoch": 0.8852763761902955, "grad_norm": 1.3031419216150608, "learning_rate": 3.9572548151514344e-07, "loss": 0.4154, "step": 46670 }, { "epoch": 0.885466064721727, "grad_norm": 1.4078964231225544, "learning_rate": 3.9443562970173874e-07, "loss": 0.4234, "step": 46680 }, { "epoch": 0.8856557532531583, "grad_norm": 1.2156802800924393, "learning_rate": 3.9314779710711457e-07, "loss": 0.4069, "step": 46690 }, { "epoch": 0.8858454417845897, "grad_norm": 1.3962858444807025, "learning_rate": 3.9186198429589816e-07, "loss": 0.4061, "step": 46700 }, { "epoch": 0.8860351303160211, "grad_norm": 1.5014795183213054, "learning_rate": 3.905781918318269e-07, "loss": 0.4213, "step": 46710 }, { "epoch": 0.8862248188474525, "grad_norm": 1.429518604038442, "learning_rate": 3.892964202777544e-07, "loss": 0.4273, "step": 46720 }, { "epoch": 0.8864145073788838, "grad_norm": 1.604696649831656, "learning_rate": 3.8801667019564893e-07, "loss": 0.4296, "step": 46730 }, { "epoch": 0.8866041959103153, "grad_norm": 1.673091097966965, "learning_rate": 3.867389421465906e-07, "loss": 0.4268, "step": 46740 }, { "epoch": 0.8867938844417467, "grad_norm": 1.4019390932571956, "learning_rate": 3.854632366907751e-07, "loss": 0.4112, "step": 46750 }, { "epoch": 0.8869835729731781, "grad_norm": 1.7241100059964127, "learning_rate": 3.8418955438751013e-07, "loss": 0.4322, "step": 46760 }, { "epoch": 0.8871732615046094, "grad_norm": 1.8231076321338544, "learning_rate": 3.8291789579521563e-07, "loss": 0.4277, "step": 46770 }, { "epoch": 0.8873629500360408, "grad_norm": 1.584866907902207, "learning_rate": 3.816482614714262e-07, "loss": 0.37, "step": 46780 }, { "epoch": 0.8875526385674722, "grad_norm": 1.2963363418779148, "learning_rate": 3.8038065197278706e-07, "loss": 0.4166, "step": 46790 }, { "epoch": 0.8877423270989035, "grad_norm": 1.6741849274009604, "learning_rate": 3.79115067855057e-07, "loss": 0.4504, "step": 46800 }, { "epoch": 0.887932015630335, "grad_norm": 1.5413373540212978, "learning_rate": 3.778515096731078e-07, "loss": 0.4154, "step": 46810 }, { "epoch": 0.8881217041617664, "grad_norm": 1.5358034130769362, "learning_rate": 3.7658997798091956e-07, "loss": 0.4482, "step": 46820 }, { "epoch": 0.8883113926931978, "grad_norm": 1.4305913268930581, "learning_rate": 3.753304733315871e-07, "loss": 0.3983, "step": 46830 }, { "epoch": 0.8885010812246291, "grad_norm": 1.4114420979575844, "learning_rate": 3.7407299627731375e-07, "loss": 0.4474, "step": 46840 }, { "epoch": 0.8886907697560605, "grad_norm": 1.5235944102323684, "learning_rate": 3.7281754736941735e-07, "loss": 0.4252, "step": 46850 }, { "epoch": 0.888880458287492, "grad_norm": 1.4721374790748167, "learning_rate": 3.7156412715832424e-07, "loss": 0.4285, "step": 46860 }, { "epoch": 0.8890701468189234, "grad_norm": 1.4894235276348704, "learning_rate": 3.7031273619357213e-07, "loss": 0.4304, "step": 46870 }, { "epoch": 0.8892598353503547, "grad_norm": 1.522271952931535, "learning_rate": 3.690633750238065e-07, "loss": 0.4218, "step": 46880 }, { "epoch": 0.8894495238817861, "grad_norm": 1.4108509477698927, "learning_rate": 3.6781604419678816e-07, "loss": 0.4411, "step": 46890 }, { "epoch": 0.8896392124132175, "grad_norm": 1.5793536913666626, "learning_rate": 3.6657074425938243e-07, "loss": 0.4304, "step": 46900 }, { "epoch": 0.8898289009446488, "grad_norm": 1.4347504390373675, "learning_rate": 3.653274757575681e-07, "loss": 0.4063, "step": 46910 }, { "epoch": 0.8900185894760803, "grad_norm": 1.4458697383950774, "learning_rate": 3.640862392364308e-07, "loss": 0.4041, "step": 46920 }, { "epoch": 0.8902082780075117, "grad_norm": 1.571499418349213, "learning_rate": 3.628470352401653e-07, "loss": 0.4202, "step": 46930 }, { "epoch": 0.8903979665389431, "grad_norm": 1.5042122417851937, "learning_rate": 3.61609864312078e-07, "loss": 0.4173, "step": 46940 }, { "epoch": 0.8905876550703744, "grad_norm": 1.4799036284089198, "learning_rate": 3.6037472699458067e-07, "loss": 0.4292, "step": 46950 }, { "epoch": 0.8907773436018058, "grad_norm": 1.4969911867912014, "learning_rate": 3.5914162382919616e-07, "loss": 0.4033, "step": 46960 }, { "epoch": 0.8909670321332372, "grad_norm": 1.2572221746805523, "learning_rate": 3.579105553565532e-07, "loss": 0.4262, "step": 46970 }, { "epoch": 0.8911567206646687, "grad_norm": 1.43755880141844, "learning_rate": 3.5668152211638884e-07, "loss": 0.4313, "step": 46980 }, { "epoch": 0.8913464091961, "grad_norm": 1.349236674095922, "learning_rate": 3.5545452464755046e-07, "loss": 0.4346, "step": 46990 }, { "epoch": 0.8915360977275314, "grad_norm": 1.5704397655707107, "learning_rate": 3.542295634879883e-07, "loss": 0.4241, "step": 47000 }, { "epoch": 0.8917257862589628, "grad_norm": 1.4014040548915343, "learning_rate": 3.530066391747644e-07, "loss": 0.4303, "step": 47010 }, { "epoch": 0.8919154747903941, "grad_norm": 2.379361583318835, "learning_rate": 3.5178575224404655e-07, "loss": 0.4242, "step": 47020 }, { "epoch": 0.8921051633218255, "grad_norm": 1.450509449759415, "learning_rate": 3.50566903231106e-07, "loss": 0.4367, "step": 47030 }, { "epoch": 0.892294851853257, "grad_norm": 1.354742059122219, "learning_rate": 3.493500926703247e-07, "loss": 0.4142, "step": 47040 }, { "epoch": 0.8924845403846884, "grad_norm": 1.3559000031070223, "learning_rate": 3.4813532109518767e-07, "loss": 0.4254, "step": 47050 }, { "epoch": 0.8926742289161197, "grad_norm": 1.450252513732408, "learning_rate": 3.4692258903828934e-07, "loss": 0.415, "step": 47060 }, { "epoch": 0.8928639174475511, "grad_norm": 1.3397001079617386, "learning_rate": 3.4571189703132835e-07, "loss": 0.4424, "step": 47070 }, { "epoch": 0.8930536059789825, "grad_norm": 1.3696644958000306, "learning_rate": 3.4450324560510616e-07, "loss": 0.443, "step": 47080 }, { "epoch": 0.893243294510414, "grad_norm": 1.4586349742294853, "learning_rate": 3.4329663528953395e-07, "loss": 0.4416, "step": 47090 }, { "epoch": 0.8934329830418453, "grad_norm": 1.4993938955620434, "learning_rate": 3.420920666136268e-07, "loss": 0.4522, "step": 47100 }, { "epoch": 0.8936226715732767, "grad_norm": 1.1840238096969977, "learning_rate": 3.4088954010550235e-07, "loss": 0.4151, "step": 47110 }, { "epoch": 0.8938123601047081, "grad_norm": 1.2319058178697158, "learning_rate": 3.3968905629238657e-07, "loss": 0.4469, "step": 47120 }, { "epoch": 0.8940020486361394, "grad_norm": 1.4761784469444679, "learning_rate": 3.384906157006068e-07, "loss": 0.4199, "step": 47130 }, { "epoch": 0.8941917371675708, "grad_norm": 1.7031506915394994, "learning_rate": 3.372942188555944e-07, "loss": 0.405, "step": 47140 }, { "epoch": 0.8943814256990023, "grad_norm": 1.4010305629438278, "learning_rate": 3.3609986628188874e-07, "loss": 0.4203, "step": 47150 }, { "epoch": 0.8945711142304337, "grad_norm": 1.3430062579230588, "learning_rate": 3.3490755850312694e-07, "loss": 0.4079, "step": 47160 }, { "epoch": 0.894760802761865, "grad_norm": 1.2302166812093207, "learning_rate": 3.337172960420559e-07, "loss": 0.3856, "step": 47170 }, { "epoch": 0.8949504912932964, "grad_norm": 1.4860146571101316, "learning_rate": 3.32529079420521e-07, "loss": 0.4391, "step": 47180 }, { "epoch": 0.8951401798247278, "grad_norm": 1.7697377750183028, "learning_rate": 3.313429091594722e-07, "loss": 0.4214, "step": 47190 }, { "epoch": 0.8953298683561592, "grad_norm": 1.6003243711733, "learning_rate": 3.3015878577896345e-07, "loss": 0.441, "step": 47200 }, { "epoch": 0.8955195568875906, "grad_norm": 1.4705806228821252, "learning_rate": 3.289767097981489e-07, "loss": 0.4158, "step": 47210 }, { "epoch": 0.895709245419022, "grad_norm": 1.4524354348833755, "learning_rate": 3.277966817352879e-07, "loss": 0.4294, "step": 47220 }, { "epoch": 0.8958989339504534, "grad_norm": 1.4232271010581736, "learning_rate": 3.2661870210773994e-07, "loss": 0.412, "step": 47230 }, { "epoch": 0.8960886224818847, "grad_norm": 1.3772347583768019, "learning_rate": 3.2544277143196634e-07, "loss": 0.423, "step": 47240 }, { "epoch": 0.8962783110133161, "grad_norm": 1.3931204640460029, "learning_rate": 3.242688902235308e-07, "loss": 0.4315, "step": 47250 }, { "epoch": 0.8964679995447475, "grad_norm": 1.495865255202473, "learning_rate": 3.2309705899709944e-07, "loss": 0.4009, "step": 47260 }, { "epoch": 0.896657688076179, "grad_norm": 1.3051189511299626, "learning_rate": 3.2192727826643786e-07, "loss": 0.4155, "step": 47270 }, { "epoch": 0.8968473766076103, "grad_norm": 1.3667665396454396, "learning_rate": 3.2075954854441314e-07, "loss": 0.4197, "step": 47280 }, { "epoch": 0.8970370651390417, "grad_norm": 1.5616754644354636, "learning_rate": 3.1959387034299294e-07, "loss": 0.449, "step": 47290 }, { "epoch": 0.8972267536704731, "grad_norm": 1.674881888513668, "learning_rate": 3.184302441732456e-07, "loss": 0.426, "step": 47300 }, { "epoch": 0.8974164422019044, "grad_norm": 1.2642098498428755, "learning_rate": 3.172686705453426e-07, "loss": 0.4179, "step": 47310 }, { "epoch": 0.8976061307333358, "grad_norm": 1.440527369457717, "learning_rate": 3.161091499685498e-07, "loss": 0.4172, "step": 47320 }, { "epoch": 0.8977958192647673, "grad_norm": 1.3679485552156396, "learning_rate": 3.149516829512389e-07, "loss": 0.4041, "step": 47330 }, { "epoch": 0.8979855077961987, "grad_norm": 1.5003859488619267, "learning_rate": 3.137962700008762e-07, "loss": 0.4116, "step": 47340 }, { "epoch": 0.89817519632763, "grad_norm": 1.6559476159638822, "learning_rate": 3.126429116240298e-07, "loss": 0.4345, "step": 47350 }, { "epoch": 0.8983648848590614, "grad_norm": 1.4872730035871937, "learning_rate": 3.1149160832636915e-07, "loss": 0.4301, "step": 47360 }, { "epoch": 0.8985545733904928, "grad_norm": 1.2727966061537974, "learning_rate": 3.1034236061265766e-07, "loss": 0.4021, "step": 47370 }, { "epoch": 0.8987442619219242, "grad_norm": 1.3104173581374716, "learning_rate": 3.091951689867623e-07, "loss": 0.4056, "step": 47380 }, { "epoch": 0.8989339504533556, "grad_norm": 1.4762910374897602, "learning_rate": 3.0805003395164625e-07, "loss": 0.4253, "step": 47390 }, { "epoch": 0.899123638984787, "grad_norm": 1.520825795590658, "learning_rate": 3.0690695600937015e-07, "loss": 0.4393, "step": 47400 }, { "epoch": 0.8993133275162184, "grad_norm": 1.742217978753529, "learning_rate": 3.0576593566109533e-07, "loss": 0.4204, "step": 47410 }, { "epoch": 0.8995030160476497, "grad_norm": 1.5790586240797377, "learning_rate": 3.0462697340707834e-07, "loss": 0.4067, "step": 47420 }, { "epoch": 0.8996927045790811, "grad_norm": 1.3081555650189092, "learning_rate": 3.034900697466764e-07, "loss": 0.4211, "step": 47430 }, { "epoch": 0.8998823931105125, "grad_norm": 1.7063669385441225, "learning_rate": 3.0235522517834184e-07, "loss": 0.4353, "step": 47440 }, { "epoch": 0.900072081641944, "grad_norm": 1.6317384112073121, "learning_rate": 3.0122244019962333e-07, "loss": 0.4463, "step": 47450 }, { "epoch": 0.9002617701733753, "grad_norm": 1.5025412045688202, "learning_rate": 3.0009171530716973e-07, "loss": 0.4387, "step": 47460 }, { "epoch": 0.9004514587048067, "grad_norm": 1.7916934133514208, "learning_rate": 2.989630509967251e-07, "loss": 0.4222, "step": 47470 }, { "epoch": 0.9006411472362381, "grad_norm": 1.387603930647901, "learning_rate": 2.9783644776312957e-07, "loss": 0.3962, "step": 47480 }, { "epoch": 0.9008308357676695, "grad_norm": 7.312040607743128, "learning_rate": 2.967119061003204e-07, "loss": 0.4279, "step": 47490 }, { "epoch": 0.9010205242991008, "grad_norm": 1.4352882073652766, "learning_rate": 2.9558942650132926e-07, "loss": 0.4283, "step": 47500 }, { "epoch": 0.9012102128305323, "grad_norm": 1.278612729210031, "learning_rate": 2.944690094582864e-07, "loss": 0.4158, "step": 47510 }, { "epoch": 0.9013999013619637, "grad_norm": 1.6809724381673639, "learning_rate": 2.9335065546241716e-07, "loss": 0.4324, "step": 47520 }, { "epoch": 0.901589589893395, "grad_norm": 1.6400516879067952, "learning_rate": 2.92234365004041e-07, "loss": 0.4286, "step": 47530 }, { "epoch": 0.9017792784248264, "grad_norm": 1.1526279018153147, "learning_rate": 2.911201385725726e-07, "loss": 0.4123, "step": 47540 }, { "epoch": 0.9019689669562578, "grad_norm": 2.6941010096990006, "learning_rate": 2.900079766565245e-07, "loss": 0.4274, "step": 47550 }, { "epoch": 0.9021586554876893, "grad_norm": 1.3716774404940861, "learning_rate": 2.8889787974350046e-07, "loss": 0.4273, "step": 47560 }, { "epoch": 0.9023483440191206, "grad_norm": 2.0313380219372212, "learning_rate": 2.877898483202013e-07, "loss": 0.4289, "step": 47570 }, { "epoch": 0.902538032550552, "grad_norm": 1.4795098778747284, "learning_rate": 2.866838828724222e-07, "loss": 0.4294, "step": 47580 }, { "epoch": 0.9027277210819834, "grad_norm": 1.368649402157932, "learning_rate": 2.8557998388504984e-07, "loss": 0.3978, "step": 47590 }, { "epoch": 0.9029174096134148, "grad_norm": 1.4557327388002181, "learning_rate": 2.844781518420686e-07, "loss": 0.4247, "step": 47600 }, { "epoch": 0.9031070981448461, "grad_norm": 1.4751990004992857, "learning_rate": 2.8337838722655443e-07, "loss": 0.4384, "step": 47610 }, { "epoch": 0.9032967866762776, "grad_norm": 1.5512968877222664, "learning_rate": 2.822806905206771e-07, "loss": 0.419, "step": 47620 }, { "epoch": 0.903486475207709, "grad_norm": 1.4839251096311241, "learning_rate": 2.811850622057011e-07, "loss": 0.4322, "step": 47630 }, { "epoch": 0.9036761637391403, "grad_norm": 1.3983538249725476, "learning_rate": 2.800915027619827e-07, "loss": 0.4235, "step": 47640 }, { "epoch": 0.9038658522705717, "grad_norm": 1.4553681204606705, "learning_rate": 2.7900001266897126e-07, "loss": 0.4269, "step": 47650 }, { "epoch": 0.9040555408020031, "grad_norm": 1.8501040434086748, "learning_rate": 2.7791059240520834e-07, "loss": 0.3994, "step": 47660 }, { "epoch": 0.9042452293334345, "grad_norm": 1.5630592143255007, "learning_rate": 2.7682324244832916e-07, "loss": 0.4044, "step": 47670 }, { "epoch": 0.9044349178648659, "grad_norm": 1.4639473919613013, "learning_rate": 2.7573796327506175e-07, "loss": 0.4126, "step": 47680 }, { "epoch": 0.9046246063962973, "grad_norm": 1.5850074047496943, "learning_rate": 2.74654755361225e-07, "loss": 0.431, "step": 47690 }, { "epoch": 0.9048142949277287, "grad_norm": 1.5713829303828595, "learning_rate": 2.735736191817295e-07, "loss": 0.4122, "step": 47700 }, { "epoch": 0.9050039834591601, "grad_norm": 1.324039863240786, "learning_rate": 2.724945552105779e-07, "loss": 0.4251, "step": 47710 }, { "epoch": 0.9051936719905914, "grad_norm": 1.136551226500308, "learning_rate": 2.714175639208644e-07, "loss": 0.4295, "step": 47720 }, { "epoch": 0.9053833605220228, "grad_norm": 1.3905422108671115, "learning_rate": 2.7034264578477574e-07, "loss": 0.4473, "step": 47730 }, { "epoch": 0.9055730490534543, "grad_norm": 1.6036093002161158, "learning_rate": 2.6926980127358835e-07, "loss": 0.4115, "step": 47740 }, { "epoch": 0.9057627375848856, "grad_norm": 1.6386885851659667, "learning_rate": 2.681990308576682e-07, "loss": 0.4447, "step": 47750 }, { "epoch": 0.905952426116317, "grad_norm": 1.4765315560354209, "learning_rate": 2.6713033500647523e-07, "loss": 0.4254, "step": 47760 }, { "epoch": 0.9061421146477484, "grad_norm": 1.90137666147405, "learning_rate": 2.660637141885564e-07, "loss": 0.4444, "step": 47770 }, { "epoch": 0.9063318031791798, "grad_norm": 1.3658287988809619, "learning_rate": 2.6499916887155254e-07, "loss": 0.4218, "step": 47780 }, { "epoch": 0.9065214917106111, "grad_norm": 1.5339355309475367, "learning_rate": 2.639366995221909e-07, "loss": 0.4161, "step": 47790 }, { "epoch": 0.9067111802420426, "grad_norm": 1.526741494143643, "learning_rate": 2.6287630660629007e-07, "loss": 0.4242, "step": 47800 }, { "epoch": 0.906900868773474, "grad_norm": 1.3309385620037608, "learning_rate": 2.618179905887602e-07, "loss": 0.4323, "step": 47810 }, { "epoch": 0.9070905573049054, "grad_norm": 1.6429093857986712, "learning_rate": 2.60761751933597e-07, "loss": 0.4228, "step": 47820 }, { "epoch": 0.9072802458363367, "grad_norm": 1.5600346829973586, "learning_rate": 2.597075911038888e-07, "loss": 0.423, "step": 47830 }, { "epoch": 0.9074699343677681, "grad_norm": 1.4210052410206322, "learning_rate": 2.586555085618114e-07, "loss": 0.4462, "step": 47840 }, { "epoch": 0.9076596228991995, "grad_norm": 1.3957861607787176, "learning_rate": 2.5760550476862876e-07, "loss": 0.4105, "step": 47850 }, { "epoch": 0.9078493114306309, "grad_norm": 1.4777668597510447, "learning_rate": 2.565575801846959e-07, "loss": 0.4189, "step": 47860 }, { "epoch": 0.9080389999620623, "grad_norm": 1.5126544331856662, "learning_rate": 2.555117352694525e-07, "loss": 0.4197, "step": 47870 }, { "epoch": 0.9082286884934937, "grad_norm": 1.5463349068261898, "learning_rate": 2.5446797048143015e-07, "loss": 0.447, "step": 47880 }, { "epoch": 0.9084183770249251, "grad_norm": 1.5819231038549668, "learning_rate": 2.534262862782477e-07, "loss": 0.4338, "step": 47890 }, { "epoch": 0.9086080655563564, "grad_norm": 1.6090260262230442, "learning_rate": 2.523866831166083e-07, "loss": 0.4345, "step": 47900 }, { "epoch": 0.9087977540877878, "grad_norm": 1.6310804848140474, "learning_rate": 2.513491614523078e-07, "loss": 0.42, "step": 47910 }, { "epoch": 0.9089874426192193, "grad_norm": 1.1292645172058755, "learning_rate": 2.503137217402263e-07, "loss": 0.4286, "step": 47920 }, { "epoch": 0.9091771311506507, "grad_norm": 1.8520045523466577, "learning_rate": 2.4928036443433167e-07, "loss": 0.4406, "step": 47930 }, { "epoch": 0.909366819682082, "grad_norm": 1.3599434181601564, "learning_rate": 2.4824908998767996e-07, "loss": 0.4377, "step": 47940 }, { "epoch": 0.9095565082135134, "grad_norm": 1.5527450152920326, "learning_rate": 2.4721989885241215e-07, "loss": 0.4159, "step": 47950 }, { "epoch": 0.9097461967449448, "grad_norm": 1.5908926655469562, "learning_rate": 2.4619279147975684e-07, "loss": 0.4338, "step": 47960 }, { "epoch": 0.9099358852763761, "grad_norm": 1.4027760694327722, "learning_rate": 2.451677683200293e-07, "loss": 0.4304, "step": 47970 }, { "epoch": 0.9101255738078076, "grad_norm": 1.522034357925995, "learning_rate": 2.441448298226307e-07, "loss": 0.4373, "step": 47980 }, { "epoch": 0.910315262339239, "grad_norm": 1.466143139236492, "learning_rate": 2.431239764360482e-07, "loss": 0.4298, "step": 47990 }, { "epoch": 0.9105049508706704, "grad_norm": 1.504096485773935, "learning_rate": 2.421052086078546e-07, "loss": 0.4329, "step": 48000 }, { "epoch": 0.9106946394021017, "grad_norm": 1.5472448489157167, "learning_rate": 2.410885267847085e-07, "loss": 0.422, "step": 48010 }, { "epoch": 0.9108843279335331, "grad_norm": 1.5410670941663698, "learning_rate": 2.4007393141235456e-07, "loss": 0.4522, "step": 48020 }, { "epoch": 0.9110740164649646, "grad_norm": 1.4557632339988789, "learning_rate": 2.390614229356214e-07, "loss": 0.4281, "step": 48030 }, { "epoch": 0.9112637049963959, "grad_norm": 1.588024969681179, "learning_rate": 2.380510017984239e-07, "loss": 0.4176, "step": 48040 }, { "epoch": 0.9114533935278273, "grad_norm": 1.6440089203509494, "learning_rate": 2.37042668443761e-07, "loss": 0.4412, "step": 48050 }, { "epoch": 0.9116430820592587, "grad_norm": 1.6257067531915563, "learning_rate": 2.3603642331371634e-07, "loss": 0.427, "step": 48060 }, { "epoch": 0.9118327705906901, "grad_norm": 1.7235584216141848, "learning_rate": 2.3503226684945869e-07, "loss": 0.4307, "step": 48070 }, { "epoch": 0.9120224591221214, "grad_norm": 1.5031941165034415, "learning_rate": 2.3403019949123974e-07, "loss": 0.4073, "step": 48080 }, { "epoch": 0.9122121476535529, "grad_norm": 1.2210800486789353, "learning_rate": 2.3303022167839805e-07, "loss": 0.4113, "step": 48090 }, { "epoch": 0.9124018361849843, "grad_norm": 1.352061246890154, "learning_rate": 2.3203233384935243e-07, "loss": 0.4062, "step": 48100 }, { "epoch": 0.9125915247164157, "grad_norm": 1.3408959214037885, "learning_rate": 2.3103653644160673e-07, "loss": 0.4228, "step": 48110 }, { "epoch": 0.912781213247847, "grad_norm": 1.6069367556647727, "learning_rate": 2.300428298917501e-07, "loss": 0.4038, "step": 48120 }, { "epoch": 0.9129709017792784, "grad_norm": 1.6968125456941225, "learning_rate": 2.2905121463545342e-07, "loss": 0.4266, "step": 48130 }, { "epoch": 0.9131605903107098, "grad_norm": 1.5431371106410854, "learning_rate": 2.2806169110747012e-07, "loss": 0.4279, "step": 48140 }, { "epoch": 0.9133502788421412, "grad_norm": 1.4863545137993672, "learning_rate": 2.270742597416381e-07, "loss": 0.4244, "step": 48150 }, { "epoch": 0.9135399673735726, "grad_norm": 1.6132535312309961, "learning_rate": 2.2608892097087552e-07, "loss": 0.3974, "step": 48160 }, { "epoch": 0.913729655905004, "grad_norm": 1.3189386676781445, "learning_rate": 2.2510567522718574e-07, "loss": 0.4344, "step": 48170 }, { "epoch": 0.9139193444364354, "grad_norm": 1.3121257896342788, "learning_rate": 2.2412452294165443e-07, "loss": 0.42, "step": 48180 }, { "epoch": 0.9141090329678667, "grad_norm": 1.7258291708700586, "learning_rate": 2.231454645444464e-07, "loss": 0.4224, "step": 48190 }, { "epoch": 0.9142987214992981, "grad_norm": 1.515873083900082, "learning_rate": 2.2216850046481277e-07, "loss": 0.445, "step": 48200 }, { "epoch": 0.9144884100307296, "grad_norm": 1.311998623076959, "learning_rate": 2.211936311310825e-07, "loss": 0.4211, "step": 48210 }, { "epoch": 0.914678098562161, "grad_norm": 1.512260439845532, "learning_rate": 2.2022085697066763e-07, "loss": 0.4212, "step": 48220 }, { "epoch": 0.9148677870935923, "grad_norm": 1.3052108513289344, "learning_rate": 2.1925017841006312e-07, "loss": 0.4229, "step": 48230 }, { "epoch": 0.9150574756250237, "grad_norm": 1.2479028564277055, "learning_rate": 2.1828159587484242e-07, "loss": 0.4329, "step": 48240 }, { "epoch": 0.9152471641564551, "grad_norm": 1.521319606801902, "learning_rate": 2.1731510978966307e-07, "loss": 0.4442, "step": 48250 }, { "epoch": 0.9154368526878864, "grad_norm": 1.5541977820019055, "learning_rate": 2.1635072057826057e-07, "loss": 0.4107, "step": 48260 }, { "epoch": 0.9156265412193179, "grad_norm": 1.7338729538993956, "learning_rate": 2.1538842866345223e-07, "loss": 0.4122, "step": 48270 }, { "epoch": 0.9158162297507493, "grad_norm": 1.498123793957498, "learning_rate": 2.1442823446713612e-07, "loss": 0.4471, "step": 48280 }, { "epoch": 0.9160059182821807, "grad_norm": 1.6458558703907602, "learning_rate": 2.1347013841029108e-07, "loss": 0.419, "step": 48290 }, { "epoch": 0.916195606813612, "grad_norm": 1.4708981485053498, "learning_rate": 2.1251414091297552e-07, "loss": 0.406, "step": 48300 }, { "epoch": 0.9163852953450434, "grad_norm": 1.41202494372756, "learning_rate": 2.1156024239432637e-07, "loss": 0.4188, "step": 48310 }, { "epoch": 0.9165749838764748, "grad_norm": 1.4600127286800975, "learning_rate": 2.1060844327256247e-07, "loss": 0.4146, "step": 48320 }, { "epoch": 0.9167646724079063, "grad_norm": 1.496316324120932, "learning_rate": 2.096587439649811e-07, "loss": 0.4316, "step": 48330 }, { "epoch": 0.9169543609393376, "grad_norm": 1.4119463935678598, "learning_rate": 2.0871114488795974e-07, "loss": 0.4322, "step": 48340 }, { "epoch": 0.917144049470769, "grad_norm": 1.5548950333906704, "learning_rate": 2.0776564645695442e-07, "loss": 0.4424, "step": 48350 }, { "epoch": 0.9173337380022004, "grad_norm": 1.5282446104346896, "learning_rate": 2.0682224908649961e-07, "loss": 0.4112, "step": 48360 }, { "epoch": 0.9175234265336317, "grad_norm": 1.5315292116049006, "learning_rate": 2.0588095319020895e-07, "loss": 0.4261, "step": 48370 }, { "epoch": 0.9177131150650631, "grad_norm": 1.4977361417712303, "learning_rate": 2.0494175918077563e-07, "loss": 0.4187, "step": 48380 }, { "epoch": 0.9179028035964946, "grad_norm": 1.5834896327686214, "learning_rate": 2.0400466746997138e-07, "loss": 0.4056, "step": 48390 }, { "epoch": 0.918092492127926, "grad_norm": 1.5329184980069994, "learning_rate": 2.0306967846864533e-07, "loss": 0.4264, "step": 48400 }, { "epoch": 0.9182821806593573, "grad_norm": 1.4066826335332545, "learning_rate": 2.0213679258672346e-07, "loss": 0.4142, "step": 48410 }, { "epoch": 0.9184718691907887, "grad_norm": 1.8684109296674292, "learning_rate": 2.0120601023321363e-07, "loss": 0.4032, "step": 48420 }, { "epoch": 0.9186615577222201, "grad_norm": 1.6268928912296647, "learning_rate": 2.0027733181619658e-07, "loss": 0.431, "step": 48430 }, { "epoch": 0.9188512462536516, "grad_norm": 1.3587505203343238, "learning_rate": 1.993507577428344e-07, "loss": 0.4219, "step": 48440 }, { "epoch": 0.9190409347850829, "grad_norm": 1.5492802244155799, "learning_rate": 1.9842628841936495e-07, "loss": 0.4278, "step": 48450 }, { "epoch": 0.9192306233165143, "grad_norm": 1.4667480945779494, "learning_rate": 1.9750392425110343e-07, "loss": 0.4285, "step": 48460 }, { "epoch": 0.9194203118479457, "grad_norm": 1.402662427568354, "learning_rate": 1.9658366564244247e-07, "loss": 0.4187, "step": 48470 }, { "epoch": 0.919610000379377, "grad_norm": 1.7031681506717165, "learning_rate": 1.9566551299685045e-07, "loss": 0.437, "step": 48480 }, { "epoch": 0.9197996889108084, "grad_norm": 1.5174405228400891, "learning_rate": 1.9474946671687423e-07, "loss": 0.4071, "step": 48490 }, { "epoch": 0.9199893774422399, "grad_norm": 1.6705298964250168, "learning_rate": 1.938355272041359e-07, "loss": 0.4242, "step": 48500 }, { "epoch": 0.9201790659736713, "grad_norm": 1.4272961070661507, "learning_rate": 1.9292369485933493e-07, "loss": 0.4179, "step": 48510 }, { "epoch": 0.9203687545051026, "grad_norm": 1.6098205631521125, "learning_rate": 1.9201397008224487e-07, "loss": 0.4284, "step": 48520 }, { "epoch": 0.920558443036534, "grad_norm": 1.542292902511836, "learning_rate": 1.9110635327171724e-07, "loss": 0.4434, "step": 48530 }, { "epoch": 0.9207481315679654, "grad_norm": 1.3817127374960303, "learning_rate": 1.9020084482567812e-07, "loss": 0.4355, "step": 48540 }, { "epoch": 0.9209378200993968, "grad_norm": 1.535501571252213, "learning_rate": 1.8929744514113112e-07, "loss": 0.4371, "step": 48550 }, { "epoch": 0.9211275086308282, "grad_norm": 1.498740256223204, "learning_rate": 1.8839615461415327e-07, "loss": 0.4459, "step": 48560 }, { "epoch": 0.9213171971622596, "grad_norm": 2.4171812362288443, "learning_rate": 1.8749697363989682e-07, "loss": 0.4437, "step": 48570 }, { "epoch": 0.921506885693691, "grad_norm": 1.6007464519952104, "learning_rate": 1.865999026125914e-07, "loss": 0.431, "step": 48580 }, { "epoch": 0.9216965742251223, "grad_norm": 1.4674481251679892, "learning_rate": 1.8570494192553912e-07, "loss": 0.4437, "step": 48590 }, { "epoch": 0.9218862627565537, "grad_norm": 1.5949448546239913, "learning_rate": 1.8481209197111826e-07, "loss": 0.4338, "step": 48600 }, { "epoch": 0.9220759512879851, "grad_norm": 1.4047383391951458, "learning_rate": 1.839213531407813e-07, "loss": 0.4545, "step": 48610 }, { "epoch": 0.9222656398194166, "grad_norm": 1.7191866247006051, "learning_rate": 1.8303272582505417e-07, "loss": 0.3894, "step": 48620 }, { "epoch": 0.9224553283508479, "grad_norm": 1.687609651028876, "learning_rate": 1.8214621041353908e-07, "loss": 0.4275, "step": 48630 }, { "epoch": 0.9226450168822793, "grad_norm": 1.547753518666105, "learning_rate": 1.8126180729491015e-07, "loss": 0.4442, "step": 48640 }, { "epoch": 0.9228347054137107, "grad_norm": 1.534703331713256, "learning_rate": 1.8037951685691835e-07, "loss": 0.4447, "step": 48650 }, { "epoch": 0.9230243939451421, "grad_norm": 1.395560162990009, "learning_rate": 1.7949933948638476e-07, "loss": 0.3991, "step": 48660 }, { "epoch": 0.9232140824765734, "grad_norm": 1.6679991675304044, "learning_rate": 1.7862127556920573e-07, "loss": 0.4433, "step": 48670 }, { "epoch": 0.9234037710080049, "grad_norm": 1.4624245960082451, "learning_rate": 1.7774532549035216e-07, "loss": 0.407, "step": 48680 }, { "epoch": 0.9235934595394363, "grad_norm": 0.9936405759955769, "learning_rate": 1.7687148963386636e-07, "loss": 0.4063, "step": 48690 }, { "epoch": 0.9237831480708676, "grad_norm": 1.7302417207518341, "learning_rate": 1.7599976838286402e-07, "loss": 0.4268, "step": 48700 }, { "epoch": 0.923972836602299, "grad_norm": 1.6222347296451884, "learning_rate": 1.7513016211953616e-07, "loss": 0.4564, "step": 48710 }, { "epoch": 0.9241625251337304, "grad_norm": 2.4547514552731675, "learning_rate": 1.7426267122514218e-07, "loss": 0.4468, "step": 48720 }, { "epoch": 0.9243522136651618, "grad_norm": 1.3463586777110996, "learning_rate": 1.733972960800173e-07, "loss": 0.4246, "step": 48730 }, { "epoch": 0.9245419021965932, "grad_norm": 1.6418852570097944, "learning_rate": 1.7253403706356752e-07, "loss": 0.4279, "step": 48740 }, { "epoch": 0.9247315907280246, "grad_norm": 1.454140069619216, "learning_rate": 1.7167289455427228e-07, "loss": 0.4402, "step": 48750 }, { "epoch": 0.924921279259456, "grad_norm": 1.1905791551341356, "learning_rate": 1.7081386892968344e-07, "loss": 0.4104, "step": 48760 }, { "epoch": 0.9251109677908874, "grad_norm": 1.3141279198914233, "learning_rate": 1.6995696056642142e-07, "loss": 0.4508, "step": 48770 }, { "epoch": 0.9253006563223187, "grad_norm": 1.446811844114713, "learning_rate": 1.6910216984018292e-07, "loss": 0.4279, "step": 48780 }, { "epoch": 0.9254903448537501, "grad_norm": 1.2185532567162798, "learning_rate": 1.6824949712573314e-07, "loss": 0.4334, "step": 48790 }, { "epoch": 0.9256800333851816, "grad_norm": 1.4041358718153296, "learning_rate": 1.6739894279690972e-07, "loss": 0.402, "step": 48800 }, { "epoch": 0.9258697219166129, "grad_norm": 1.408492516832766, "learning_rate": 1.6655050722662214e-07, "loss": 0.415, "step": 48810 }, { "epoch": 0.9260594104480443, "grad_norm": 1.3938315590697217, "learning_rate": 1.6570419078684897e-07, "loss": 0.416, "step": 48820 }, { "epoch": 0.9262490989794757, "grad_norm": 1.4414105193561126, "learning_rate": 1.648599938486417e-07, "loss": 0.4222, "step": 48830 }, { "epoch": 0.9264387875109071, "grad_norm": 1.277368235752878, "learning_rate": 1.6401791678212152e-07, "loss": 0.4305, "step": 48840 }, { "epoch": 0.9266284760423384, "grad_norm": 1.502865036294863, "learning_rate": 1.631779599564809e-07, "loss": 0.4495, "step": 48850 }, { "epoch": 0.9268181645737699, "grad_norm": 1.3692634575938025, "learning_rate": 1.6234012373998186e-07, "loss": 0.4068, "step": 48860 }, { "epoch": 0.9270078531052013, "grad_norm": 1.6541167580738085, "learning_rate": 1.6150440849995785e-07, "loss": 0.4323, "step": 48870 }, { "epoch": 0.9271975416366326, "grad_norm": 1.3067061612562019, "learning_rate": 1.606708146028102e-07, "loss": 0.4085, "step": 48880 }, { "epoch": 0.927387230168064, "grad_norm": 1.382418319973301, "learning_rate": 1.5983934241401377e-07, "loss": 0.4102, "step": 48890 }, { "epoch": 0.9275769186994954, "grad_norm": 1.5018684833196703, "learning_rate": 1.590099922981092e-07, "loss": 0.4192, "step": 48900 }, { "epoch": 0.9277666072309269, "grad_norm": 1.3023344064975177, "learning_rate": 1.5818276461871064e-07, "loss": 0.4278, "step": 48910 }, { "epoch": 0.9279562957623582, "grad_norm": 1.6387997173094035, "learning_rate": 1.5735765973849794e-07, "loss": 0.4258, "step": 48920 }, { "epoch": 0.9281459842937896, "grad_norm": 3.24649147426023, "learning_rate": 1.5653467801922285e-07, "loss": 0.4137, "step": 48930 }, { "epoch": 0.928335672825221, "grad_norm": 1.3777334789041424, "learning_rate": 1.5571381982170565e-07, "loss": 0.4215, "step": 48940 }, { "epoch": 0.9285253613566524, "grad_norm": 1.4321216264842147, "learning_rate": 1.5489508550583455e-07, "loss": 0.4261, "step": 48950 }, { "epoch": 0.9287150498880837, "grad_norm": 1.3436090486902819, "learning_rate": 1.5407847543056854e-07, "loss": 0.4081, "step": 48960 }, { "epoch": 0.9289047384195152, "grad_norm": 1.4568956514629428, "learning_rate": 1.5326398995393343e-07, "loss": 0.4069, "step": 48970 }, { "epoch": 0.9290944269509466, "grad_norm": 1.4851719440935998, "learning_rate": 1.5245162943302415e-07, "loss": 0.4305, "step": 48980 }, { "epoch": 0.9292841154823779, "grad_norm": 1.5266136373258592, "learning_rate": 1.516413942240047e-07, "loss": 0.4236, "step": 48990 }, { "epoch": 0.9294738040138093, "grad_norm": 1.4484425839625192, "learning_rate": 1.50833284682107e-07, "loss": 0.4104, "step": 49000 }, { "epoch": 0.9296634925452407, "grad_norm": 1.7189973256868654, "learning_rate": 1.5002730116162988e-07, "loss": 0.438, "step": 49010 }, { "epoch": 0.9298531810766721, "grad_norm": 1.4675483373744198, "learning_rate": 1.4922344401594236e-07, "loss": 0.3929, "step": 49020 }, { "epoch": 0.9300428696081035, "grad_norm": 1.4121804383265977, "learning_rate": 1.4842171359747747e-07, "loss": 0.4241, "step": 49030 }, { "epoch": 0.9302325581395349, "grad_norm": 1.4530359530383095, "learning_rate": 1.476221102577391e-07, "loss": 0.4416, "step": 49040 }, { "epoch": 0.9304222466709663, "grad_norm": 1.7145863374961918, "learning_rate": 1.4682463434729898e-07, "loss": 0.4437, "step": 49050 }, { "epoch": 0.9306119352023977, "grad_norm": 1.2415890049954879, "learning_rate": 1.4602928621579194e-07, "loss": 0.4168, "step": 49060 }, { "epoch": 0.930801623733829, "grad_norm": 1.2108037064893935, "learning_rate": 1.4523606621192565e-07, "loss": 0.3951, "step": 49070 }, { "epoch": 0.9309913122652604, "grad_norm": 1.5908178886670141, "learning_rate": 1.4444497468346975e-07, "loss": 0.4353, "step": 49080 }, { "epoch": 0.9311810007966919, "grad_norm": 1.150876816009947, "learning_rate": 1.4365601197726232e-07, "loss": 0.4181, "step": 49090 }, { "epoch": 0.9313706893281232, "grad_norm": 1.418425593966501, "learning_rate": 1.4286917843921056e-07, "loss": 0.4215, "step": 49100 }, { "epoch": 0.9315603778595546, "grad_norm": 1.7113715397120244, "learning_rate": 1.4208447441428407e-07, "loss": 0.4265, "step": 49110 }, { "epoch": 0.931750066390986, "grad_norm": 1.7125436256662956, "learning_rate": 1.4130190024652268e-07, "loss": 0.4276, "step": 49120 }, { "epoch": 0.9319397549224174, "grad_norm": 1.6516218918890055, "learning_rate": 1.4052145627902912e-07, "loss": 0.4241, "step": 49130 }, { "epoch": 0.9321294434538487, "grad_norm": 1.3315315857673697, "learning_rate": 1.397431428539736e-07, "loss": 0.4205, "step": 49140 }, { "epoch": 0.9323191319852802, "grad_norm": 1.5890003766662497, "learning_rate": 1.3896696031259316e-07, "loss": 0.4305, "step": 49150 }, { "epoch": 0.9325088205167116, "grad_norm": 1.5955869564001375, "learning_rate": 1.3819290899519e-07, "loss": 0.4239, "step": 49160 }, { "epoch": 0.932698509048143, "grad_norm": 1.554879000502247, "learning_rate": 1.3742098924113046e-07, "loss": 0.4429, "step": 49170 }, { "epoch": 0.9328881975795743, "grad_norm": 1.3195657054059682, "learning_rate": 1.3665120138884825e-07, "loss": 0.4037, "step": 49180 }, { "epoch": 0.9330778861110057, "grad_norm": 1.5857837324347666, "learning_rate": 1.3588354577584117e-07, "loss": 0.4007, "step": 49190 }, { "epoch": 0.9332675746424371, "grad_norm": 1.5579528380951158, "learning_rate": 1.351180227386728e-07, "loss": 0.4159, "step": 49200 }, { "epoch": 0.9334572631738685, "grad_norm": 1.4017999846695535, "learning_rate": 1.3435463261297244e-07, "loss": 0.4361, "step": 49210 }, { "epoch": 0.9336469517052999, "grad_norm": 1.447071870211625, "learning_rate": 1.3359337573343234e-07, "loss": 0.4127, "step": 49220 }, { "epoch": 0.9338366402367313, "grad_norm": 1.7835856449447218, "learning_rate": 1.328342524338111e-07, "loss": 0.432, "step": 49230 }, { "epoch": 0.9340263287681627, "grad_norm": 1.6809267443940146, "learning_rate": 1.320772630469308e-07, "loss": 0.3886, "step": 49240 }, { "epoch": 0.934216017299594, "grad_norm": 1.801055025426548, "learning_rate": 1.3132240790467932e-07, "loss": 0.4216, "step": 49250 }, { "epoch": 0.9344057058310254, "grad_norm": 1.5605429057919382, "learning_rate": 1.3056968733800745e-07, "loss": 0.4076, "step": 49260 }, { "epoch": 0.9345953943624569, "grad_norm": 1.4794373191337187, "learning_rate": 1.2981910167693124e-07, "loss": 0.4397, "step": 49270 }, { "epoch": 0.9347850828938883, "grad_norm": 5.954171515617968, "learning_rate": 1.290706512505291e-07, "loss": 0.4136, "step": 49280 }, { "epoch": 0.9349747714253196, "grad_norm": 1.5058629851696719, "learning_rate": 1.283243363869452e-07, "loss": 0.4186, "step": 49290 }, { "epoch": 0.935164459956751, "grad_norm": 1.4289882826488418, "learning_rate": 1.2758015741338614e-07, "loss": 0.429, "step": 49300 }, { "epoch": 0.9353541484881824, "grad_norm": 10.006034309135229, "learning_rate": 1.268381146561226e-07, "loss": 0.4299, "step": 49310 }, { "epoch": 0.9355438370196137, "grad_norm": 1.3139854974017555, "learning_rate": 1.260982084404888e-07, "loss": 0.4286, "step": 49320 }, { "epoch": 0.9357335255510452, "grad_norm": 1.3969843410543028, "learning_rate": 1.253604390908819e-07, "loss": 0.4205, "step": 49330 }, { "epoch": 0.9359232140824766, "grad_norm": 1.1645883320417263, "learning_rate": 1.246248069307626e-07, "loss": 0.4411, "step": 49340 }, { "epoch": 0.936112902613908, "grad_norm": 1.427932475967878, "learning_rate": 1.238913122826535e-07, "loss": 0.4105, "step": 49350 }, { "epoch": 0.9363025911453393, "grad_norm": 1.7714637075268629, "learning_rate": 1.2315995546814118e-07, "loss": 0.4201, "step": 49360 }, { "epoch": 0.9364922796767707, "grad_norm": 1.3051583832606697, "learning_rate": 1.2243073680787532e-07, "loss": 0.4268, "step": 49370 }, { "epoch": 0.9366819682082022, "grad_norm": 1.5916373730124391, "learning_rate": 1.217036566215668e-07, "loss": 0.4271, "step": 49380 }, { "epoch": 0.9368716567396336, "grad_norm": 1.4565730039831966, "learning_rate": 1.2097871522798954e-07, "loss": 0.4383, "step": 49390 }, { "epoch": 0.9370613452710649, "grad_norm": 1.5092483019911929, "learning_rate": 1.202559129449793e-07, "loss": 0.4494, "step": 49400 }, { "epoch": 0.9372510338024963, "grad_norm": 1.2877314259082004, "learning_rate": 1.1953525008943535e-07, "loss": 0.4215, "step": 49410 }, { "epoch": 0.9374407223339277, "grad_norm": 1.4403574616512917, "learning_rate": 1.1881672697731827e-07, "loss": 0.433, "step": 49420 }, { "epoch": 0.937630410865359, "grad_norm": 1.5201232259008426, "learning_rate": 1.1810034392364944e-07, "loss": 0.3867, "step": 49430 }, { "epoch": 0.9378200993967905, "grad_norm": 1.214558333579652, "learning_rate": 1.1738610124251315e-07, "loss": 0.4263, "step": 49440 }, { "epoch": 0.9380097879282219, "grad_norm": 1.5457362432487172, "learning_rate": 1.1667399924705558e-07, "loss": 0.4475, "step": 49450 }, { "epoch": 0.9381994764596533, "grad_norm": 1.1639730603715044, "learning_rate": 1.159640382494831e-07, "loss": 0.4059, "step": 49460 }, { "epoch": 0.9383891649910846, "grad_norm": 1.453837371039802, "learning_rate": 1.152562185610645e-07, "loss": 0.4279, "step": 49470 }, { "epoch": 0.938578853522516, "grad_norm": 1.3938319805152848, "learning_rate": 1.1455054049212932e-07, "loss": 0.4143, "step": 49480 }, { "epoch": 0.9387685420539474, "grad_norm": 1.300199786194343, "learning_rate": 1.138470043520673e-07, "loss": 0.4286, "step": 49490 }, { "epoch": 0.9389582305853789, "grad_norm": 1.4928614720261355, "learning_rate": 1.131456104493317e-07, "loss": 0.4218, "step": 49500 }, { "epoch": 0.9391479191168102, "grad_norm": 1.257057797948008, "learning_rate": 1.124463590914332e-07, "loss": 0.4669, "step": 49510 }, { "epoch": 0.9393376076482416, "grad_norm": 1.5002872752933647, "learning_rate": 1.1174925058494601e-07, "loss": 0.449, "step": 49520 }, { "epoch": 0.939527296179673, "grad_norm": 1.4202758764083483, "learning_rate": 1.1105428523550233e-07, "loss": 0.4439, "step": 49530 }, { "epoch": 0.9397169847111043, "grad_norm": 1.6322715886249664, "learning_rate": 1.1036146334779673e-07, "loss": 0.4142, "step": 49540 }, { "epoch": 0.9399066732425357, "grad_norm": 1.5114865269124014, "learning_rate": 1.0967078522558406e-07, "loss": 0.431, "step": 49550 }, { "epoch": 0.9400963617739672, "grad_norm": 1.4561293804383773, "learning_rate": 1.0898225117167649e-07, "loss": 0.4295, "step": 49560 }, { "epoch": 0.9402860503053986, "grad_norm": 1.724012986249405, "learning_rate": 1.082958614879498e-07, "loss": 0.4194, "step": 49570 }, { "epoch": 0.9404757388368299, "grad_norm": 1.4068060387721038, "learning_rate": 1.0761161647533825e-07, "loss": 0.4037, "step": 49580 }, { "epoch": 0.9406654273682613, "grad_norm": 1.505403836224482, "learning_rate": 1.0692951643383409e-07, "loss": 0.4019, "step": 49590 }, { "epoch": 0.9408551158996927, "grad_norm": 1.5352600351809609, "learning_rate": 1.0624956166249201e-07, "loss": 0.4226, "step": 49600 }, { "epoch": 0.9410448044311241, "grad_norm": 1.6641478086366421, "learning_rate": 1.0557175245942352e-07, "loss": 0.4187, "step": 49610 }, { "epoch": 0.9412344929625555, "grad_norm": 1.6797428917262915, "learning_rate": 1.0489608912180149e-07, "loss": 0.436, "step": 49620 }, { "epoch": 0.9414241814939869, "grad_norm": 1.333105004643222, "learning_rate": 1.0422257194585727e-07, "loss": 0.4102, "step": 49630 }, { "epoch": 0.9416138700254183, "grad_norm": 1.6006747188727317, "learning_rate": 1.0355120122688078e-07, "loss": 0.431, "step": 49640 }, { "epoch": 0.9418035585568496, "grad_norm": 1.6018832270389067, "learning_rate": 1.02881977259221e-07, "loss": 0.4393, "step": 49650 }, { "epoch": 0.941993247088281, "grad_norm": 1.1516459528672722, "learning_rate": 1.0221490033628712e-07, "loss": 0.4071, "step": 49660 }, { "epoch": 0.9421829356197124, "grad_norm": 1.5223742025428906, "learning_rate": 1.0154997075054407e-07, "loss": 0.4182, "step": 49670 }, { "epoch": 0.9423726241511439, "grad_norm": 1.6241777687830483, "learning_rate": 1.0088718879351922e-07, "loss": 0.4261, "step": 49680 }, { "epoch": 0.9425623126825752, "grad_norm": 1.5161645839216966, "learning_rate": 1.0022655475579457e-07, "loss": 0.4242, "step": 49690 }, { "epoch": 0.9427520012140066, "grad_norm": 1.6094479227920369, "learning_rate": 9.956806892701232e-08, "loss": 0.4556, "step": 49700 }, { "epoch": 0.942941689745438, "grad_norm": 1.6977403237014355, "learning_rate": 9.89117315958732e-08, "loss": 0.4325, "step": 49710 }, { "epoch": 0.9431313782768693, "grad_norm": 1.8735462885148666, "learning_rate": 9.82575430501348e-08, "loss": 0.4456, "step": 49720 }, { "epoch": 0.9433210668083007, "grad_norm": 1.635336544953809, "learning_rate": 9.76055035766138e-08, "loss": 0.4504, "step": 49730 }, { "epoch": 0.9435107553397322, "grad_norm": 1.3711200076570023, "learning_rate": 9.695561346118376e-08, "loss": 0.4196, "step": 49740 }, { "epoch": 0.9437004438711636, "grad_norm": 1.8676887255240118, "learning_rate": 9.630787298877564e-08, "loss": 0.4113, "step": 49750 }, { "epoch": 0.9438901324025949, "grad_norm": 1.3948165464853262, "learning_rate": 9.566228244337894e-08, "loss": 0.3947, "step": 49760 }, { "epoch": 0.9440798209340263, "grad_norm": 1.5901672981626744, "learning_rate": 9.501884210804003e-08, "loss": 0.4354, "step": 49770 }, { "epoch": 0.9442695094654577, "grad_norm": 1.5954062557304545, "learning_rate": 9.437755226486267e-08, "loss": 0.4367, "step": 49780 }, { "epoch": 0.9444591979968892, "grad_norm": 1.5000101693916252, "learning_rate": 9.37384131950081e-08, "loss": 0.4164, "step": 49790 }, { "epoch": 0.9446488865283205, "grad_norm": 1.5548558881039292, "learning_rate": 9.310142517869325e-08, "loss": 0.4341, "step": 49800 }, { "epoch": 0.9448385750597519, "grad_norm": 1.2902345562816213, "learning_rate": 9.246658849519364e-08, "loss": 0.3925, "step": 49810 }, { "epoch": 0.9450282635911833, "grad_norm": 1.3853641606891358, "learning_rate": 9.183390342284104e-08, "loss": 0.4194, "step": 49820 }, { "epoch": 0.9452179521226146, "grad_norm": 1.4692848506515872, "learning_rate": 9.120337023902304e-08, "loss": 0.4344, "step": 49830 }, { "epoch": 0.945407640654046, "grad_norm": 1.678564665535788, "learning_rate": 9.057498922018514e-08, "loss": 0.4318, "step": 49840 }, { "epoch": 0.9455973291854775, "grad_norm": 1.6966906810686933, "learning_rate": 8.994876064182811e-08, "loss": 0.4517, "step": 49850 }, { "epoch": 0.9457870177169089, "grad_norm": 1.4841902974005263, "learning_rate": 8.932468477850952e-08, "loss": 0.4213, "step": 49860 }, { "epoch": 0.9459767062483402, "grad_norm": 1.5940420140460743, "learning_rate": 8.870276190384386e-08, "loss": 0.4236, "step": 49870 }, { "epoch": 0.9461663947797716, "grad_norm": 1.6150760620872935, "learning_rate": 8.808299229050021e-08, "loss": 0.4379, "step": 49880 }, { "epoch": 0.946356083311203, "grad_norm": 1.578668682759706, "learning_rate": 8.746537621020512e-08, "loss": 0.42, "step": 49890 }, { "epoch": 0.9465457718426344, "grad_norm": 1.6278601400614083, "learning_rate": 8.684991393373864e-08, "loss": 0.44, "step": 49900 }, { "epoch": 0.9467354603740658, "grad_norm": 1.274681495357069, "learning_rate": 8.623660573093884e-08, "loss": 0.3944, "step": 49910 }, { "epoch": 0.9469251489054972, "grad_norm": 1.205803211835994, "learning_rate": 8.562545187069948e-08, "loss": 0.4041, "step": 49920 }, { "epoch": 0.9471148374369286, "grad_norm": 1.6175144092749316, "learning_rate": 8.501645262096735e-08, "loss": 0.4335, "step": 49930 }, { "epoch": 0.9473045259683599, "grad_norm": 1.9199390473812632, "learning_rate": 8.440960824874722e-08, "loss": 0.4378, "step": 49940 }, { "epoch": 0.9474942144997913, "grad_norm": 1.5608484967015632, "learning_rate": 8.380491902009846e-08, "loss": 0.4521, "step": 49950 }, { "epoch": 0.9476839030312227, "grad_norm": 1.5237748951864134, "learning_rate": 8.320238520013346e-08, "loss": 0.4289, "step": 49960 }, { "epoch": 0.9478735915626542, "grad_norm": 1.8149068828085075, "learning_rate": 8.260200705302257e-08, "loss": 0.4248, "step": 49970 }, { "epoch": 0.9480632800940855, "grad_norm": 1.4620615959869703, "learning_rate": 8.200378484198968e-08, "loss": 0.4296, "step": 49980 }, { "epoch": 0.9482529686255169, "grad_norm": 1.5286575805298368, "learning_rate": 8.140771882931276e-08, "loss": 0.4389, "step": 49990 }, { "epoch": 0.9484426571569483, "grad_norm": 1.4748522164242688, "learning_rate": 8.08138092763261e-08, "loss": 0.4118, "step": 50000 }, { "epoch": 0.9486323456883797, "grad_norm": 1.852942368390438, "learning_rate": 8.022205644341696e-08, "loss": 0.4317, "step": 50010 }, { "epoch": 0.948822034219811, "grad_norm": 1.4037198279743806, "learning_rate": 7.963246059002782e-08, "loss": 0.4488, "step": 50020 }, { "epoch": 0.9490117227512425, "grad_norm": 1.3971826613095508, "learning_rate": 7.904502197465524e-08, "loss": 0.4092, "step": 50030 }, { "epoch": 0.9492014112826739, "grad_norm": 1.5652739100365118, "learning_rate": 7.845974085485097e-08, "loss": 0.4394, "step": 50040 }, { "epoch": 0.9493910998141052, "grad_norm": 1.4681990012909, "learning_rate": 7.787661748721919e-08, "loss": 0.4394, "step": 50050 }, { "epoch": 0.9495807883455366, "grad_norm": 1.366926911168463, "learning_rate": 7.729565212741819e-08, "loss": 0.415, "step": 50060 }, { "epoch": 0.949770476876968, "grad_norm": 1.4060040349182161, "learning_rate": 7.671684503016142e-08, "loss": 0.4317, "step": 50070 }, { "epoch": 0.9499601654083994, "grad_norm": 1.3877810222539164, "learning_rate": 7.614019644921645e-08, "loss": 0.416, "step": 50080 }, { "epoch": 0.9501498539398308, "grad_norm": 1.4510330190053398, "learning_rate": 7.556570663740215e-08, "loss": 0.442, "step": 50090 }, { "epoch": 0.9503395424712622, "grad_norm": 1.5098427667955099, "learning_rate": 7.499337584659205e-08, "loss": 0.418, "step": 50100 }, { "epoch": 0.9505292310026936, "grad_norm": 1.699488718026681, "learning_rate": 7.442320432771432e-08, "loss": 0.4072, "step": 50110 }, { "epoch": 0.950718919534125, "grad_norm": 1.7342831129145508, "learning_rate": 7.385519233074845e-08, "loss": 0.4355, "step": 50120 }, { "epoch": 0.9509086080655563, "grad_norm": 1.649340218495518, "learning_rate": 7.32893401047291e-08, "loss": 0.4217, "step": 50130 }, { "epoch": 0.9510982965969877, "grad_norm": 1.4031238662304866, "learning_rate": 7.272564789774228e-08, "loss": 0.4188, "step": 50140 }, { "epoch": 0.9512879851284192, "grad_norm": 1.4683113560852619, "learning_rate": 7.216411595692808e-08, "loss": 0.4196, "step": 50150 }, { "epoch": 0.9514776736598505, "grad_norm": 1.5427019278134655, "learning_rate": 7.160474452847844e-08, "loss": 0.4319, "step": 50160 }, { "epoch": 0.9516673621912819, "grad_norm": 1.4521103468646275, "learning_rate": 7.10475338576394e-08, "loss": 0.4394, "step": 50170 }, { "epoch": 0.9518570507227133, "grad_norm": 1.3245456477945516, "learning_rate": 7.049248418870835e-08, "loss": 0.4343, "step": 50180 }, { "epoch": 0.9520467392541447, "grad_norm": 1.6103593269389131, "learning_rate": 6.993959576503729e-08, "loss": 0.4162, "step": 50190 }, { "epoch": 0.952236427785576, "grad_norm": 1.6525670379864, "learning_rate": 6.938886882902729e-08, "loss": 0.448, "step": 50200 }, { "epoch": 0.9524261163170075, "grad_norm": 1.4045126118892604, "learning_rate": 6.884030362213523e-08, "loss": 0.4324, "step": 50210 }, { "epoch": 0.9526158048484389, "grad_norm": 1.4898045901071337, "learning_rate": 6.82939003848676e-08, "loss": 0.4249, "step": 50220 }, { "epoch": 0.9528054933798703, "grad_norm": 1.4233729758981577, "learning_rate": 6.774965935678501e-08, "loss": 0.425, "step": 50230 }, { "epoch": 0.9529951819113016, "grad_norm": 1.5367685651071084, "learning_rate": 6.720758077649881e-08, "loss": 0.4261, "step": 50240 }, { "epoch": 0.953184870442733, "grad_norm": 1.3597416400628772, "learning_rate": 6.66676648816722e-08, "loss": 0.4366, "step": 50250 }, { "epoch": 0.9533745589741645, "grad_norm": 1.6958308443483394, "learning_rate": 6.61299119090214e-08, "loss": 0.4359, "step": 50260 }, { "epoch": 0.9535642475055958, "grad_norm": 2.0010046382632978, "learning_rate": 6.559432209431337e-08, "loss": 0.4418, "step": 50270 }, { "epoch": 0.9537539360370272, "grad_norm": 1.5093804018415642, "learning_rate": 6.50608956723664e-08, "loss": 0.4331, "step": 50280 }, { "epoch": 0.9539436245684586, "grad_norm": 1.6265316226475912, "learning_rate": 6.452963287705171e-08, "loss": 0.4253, "step": 50290 }, { "epoch": 0.95413331309989, "grad_norm": 1.5108995515580605, "learning_rate": 6.400053394129025e-08, "loss": 0.429, "step": 50300 }, { "epoch": 0.9543230016313213, "grad_norm": 1.5424341628023406, "learning_rate": 6.347359909705475e-08, "loss": 0.4478, "step": 50310 }, { "epoch": 0.9545126901627528, "grad_norm": 1.353229539408088, "learning_rate": 6.294882857537044e-08, "loss": 0.4031, "step": 50320 }, { "epoch": 0.9547023786941842, "grad_norm": 1.3804009101223695, "learning_rate": 6.242622260631103e-08, "loss": 0.4238, "step": 50330 }, { "epoch": 0.9548920672256156, "grad_norm": 1.501982386372731, "learning_rate": 6.190578141900494e-08, "loss": 0.4481, "step": 50340 }, { "epoch": 0.9550817557570469, "grad_norm": 1.449847949768673, "learning_rate": 6.138750524162795e-08, "loss": 0.4328, "step": 50350 }, { "epoch": 0.9552714442884783, "grad_norm": 1.5031200271949614, "learning_rate": 6.08713943014072e-08, "loss": 0.4329, "step": 50360 }, { "epoch": 0.9554611328199097, "grad_norm": 1.8809913544468055, "learning_rate": 6.035744882462335e-08, "loss": 0.435, "step": 50370 }, { "epoch": 0.955650821351341, "grad_norm": 1.544138624304727, "learning_rate": 5.98456690366045e-08, "loss": 0.4203, "step": 50380 }, { "epoch": 0.9558405098827725, "grad_norm": 1.264728272469624, "learning_rate": 5.9336055161730575e-08, "loss": 0.4509, "step": 50390 }, { "epoch": 0.9560301984142039, "grad_norm": 1.5897151069098032, "learning_rate": 5.8828607423431215e-08, "loss": 0.4401, "step": 50400 }, { "epoch": 0.9562198869456353, "grad_norm": 1.2773176013299592, "learning_rate": 5.8323326044187886e-08, "loss": 0.4258, "step": 50410 }, { "epoch": 0.9564095754770666, "grad_norm": 1.5392805417618638, "learning_rate": 5.7820211245530614e-08, "loss": 0.4139, "step": 50420 }, { "epoch": 0.956599264008498, "grad_norm": 1.616063291767939, "learning_rate": 5.73192632480396e-08, "loss": 0.4262, "step": 50430 }, { "epoch": 0.9567889525399295, "grad_norm": 1.2421612443661134, "learning_rate": 5.68204822713464e-08, "loss": 0.4205, "step": 50440 }, { "epoch": 0.9569786410713609, "grad_norm": 1.3364052354815683, "learning_rate": 5.632386853413274e-08, "loss": 0.436, "step": 50450 }, { "epoch": 0.9571683296027922, "grad_norm": 2.087554150957356, "learning_rate": 5.5829422254126685e-08, "loss": 0.415, "step": 50460 }, { "epoch": 0.9573580181342236, "grad_norm": 1.1590557140057234, "learning_rate": 5.5337143648109824e-08, "loss": 0.4133, "step": 50470 }, { "epoch": 0.957547706665655, "grad_norm": 1.4784116925897928, "learning_rate": 5.4847032931911735e-08, "loss": 0.4285, "step": 50480 }, { "epoch": 0.9577373951970863, "grad_norm": 1.3855124988411416, "learning_rate": 5.43590903204122e-08, "loss": 0.4318, "step": 50490 }, { "epoch": 0.9579270837285178, "grad_norm": 1.4138530182185867, "learning_rate": 5.387331602753953e-08, "loss": 0.4269, "step": 50500 }, { "epoch": 0.9581167722599492, "grad_norm": 1.5182007188488793, "learning_rate": 5.338971026627227e-08, "loss": 0.4141, "step": 50510 }, { "epoch": 0.9583064607913806, "grad_norm": 1.5448598624751249, "learning_rate": 5.290827324863745e-08, "loss": 0.436, "step": 50520 }, { "epoch": 0.9584961493228119, "grad_norm": 1.5625202099756892, "learning_rate": 5.242900518571181e-08, "loss": 0.414, "step": 50530 }, { "epoch": 0.9586858378542433, "grad_norm": 1.4937296860144031, "learning_rate": 5.195190628762059e-08, "loss": 0.4074, "step": 50540 }, { "epoch": 0.9588755263856747, "grad_norm": 1.5740079458858984, "learning_rate": 5.1476976763539246e-08, "loss": 0.4291, "step": 50550 }, { "epoch": 0.9590652149171061, "grad_norm": 1.6135256769616237, "learning_rate": 5.1004216821690655e-08, "loss": 0.4187, "step": 50560 }, { "epoch": 0.9592549034485375, "grad_norm": 1.4538875088920387, "learning_rate": 5.0533626669346804e-08, "loss": 0.4167, "step": 50570 }, { "epoch": 0.9594445919799689, "grad_norm": 1.47685448036831, "learning_rate": 5.006520651282931e-08, "loss": 0.407, "step": 50580 }, { "epoch": 0.9596342805114003, "grad_norm": 1.2978771166358556, "learning_rate": 4.959895655750724e-08, "loss": 0.4188, "step": 50590 }, { "epoch": 0.9598239690428316, "grad_norm": 1.4628926552264403, "learning_rate": 4.913487700779873e-08, "loss": 0.4147, "step": 50600 }, { "epoch": 0.960013657574263, "grad_norm": 1.5193614760707517, "learning_rate": 4.8672968067171034e-08, "loss": 0.4155, "step": 50610 }, { "epoch": 0.9602033461056945, "grad_norm": 1.3253993425413457, "learning_rate": 4.8213229938137707e-08, "loss": 0.4285, "step": 50620 }, { "epoch": 0.9603930346371259, "grad_norm": 1.3542026041405542, "learning_rate": 4.775566282226307e-08, "loss": 0.4142, "step": 50630 }, { "epoch": 0.9605827231685572, "grad_norm": 1.596217862172447, "learning_rate": 4.7300266920157764e-08, "loss": 0.4111, "step": 50640 }, { "epoch": 0.9607724116999886, "grad_norm": 1.653090362947965, "learning_rate": 4.684704243148097e-08, "loss": 0.4364, "step": 50650 }, { "epoch": 0.96096210023142, "grad_norm": 1.3596080919031495, "learning_rate": 4.6395989554940955e-08, "loss": 0.4459, "step": 50660 }, { "epoch": 0.9611517887628513, "grad_norm": 1.4403092889847777, "learning_rate": 4.594710848829176e-08, "loss": 0.4591, "step": 50670 }, { "epoch": 0.9613414772942828, "grad_norm": 1.161784512046351, "learning_rate": 4.550039942833706e-08, "loss": 0.4045, "step": 50680 }, { "epoch": 0.9615311658257142, "grad_norm": 1.6152540799996553, "learning_rate": 4.505586257092742e-08, "loss": 0.4148, "step": 50690 }, { "epoch": 0.9617208543571456, "grad_norm": 1.4947213739598955, "learning_rate": 4.461349811096194e-08, "loss": 0.4327, "step": 50700 }, { "epoch": 0.9619105428885769, "grad_norm": 1.339989229134176, "learning_rate": 4.417330624238603e-08, "loss": 0.4252, "step": 50710 }, { "epoch": 0.9621002314200083, "grad_norm": 1.4631141834010903, "learning_rate": 4.373528715819253e-08, "loss": 0.4296, "step": 50720 }, { "epoch": 0.9622899199514398, "grad_norm": 1.3399121517469987, "learning_rate": 4.3299441050422833e-08, "loss": 0.424, "step": 50730 }, { "epoch": 0.9624796084828712, "grad_norm": 1.3567477625205264, "learning_rate": 4.2865768110165185e-08, "loss": 0.4158, "step": 50740 }, { "epoch": 0.9626692970143025, "grad_norm": 1.630569362386526, "learning_rate": 4.2434268527554725e-08, "loss": 0.4401, "step": 50750 }, { "epoch": 0.9628589855457339, "grad_norm": 1.5284709225967736, "learning_rate": 4.200494249177456e-08, "loss": 0.4257, "step": 50760 }, { "epoch": 0.9630486740771653, "grad_norm": 1.5547984932370018, "learning_rate": 4.1577790191053015e-08, "loss": 0.4105, "step": 50770 }, { "epoch": 0.9632383626085966, "grad_norm": 1.429473517254782, "learning_rate": 4.1152811812667504e-08, "loss": 0.3976, "step": 50780 }, { "epoch": 0.963428051140028, "grad_norm": 1.606546307406173, "learning_rate": 4.073000754294121e-08, "loss": 0.4205, "step": 50790 }, { "epoch": 0.9636177396714595, "grad_norm": 1.6522961910589005, "learning_rate": 4.030937756724418e-08, "loss": 0.4686, "step": 50800 }, { "epoch": 0.9638074282028909, "grad_norm": 1.3702633613020496, "learning_rate": 3.989092206999334e-08, "loss": 0.3933, "step": 50810 }, { "epoch": 0.9639971167343222, "grad_norm": 1.7383083422539394, "learning_rate": 3.94746412346525e-08, "loss": 0.4347, "step": 50820 }, { "epoch": 0.9641868052657536, "grad_norm": 1.4176182499499626, "learning_rate": 3.906053524373121e-08, "loss": 0.422, "step": 50830 }, { "epoch": 0.964376493797185, "grad_norm": 1.9630507260635983, "learning_rate": 3.864860427878647e-08, "loss": 0.4181, "step": 50840 }, { "epoch": 0.9645661823286165, "grad_norm": 1.4794403611099598, "learning_rate": 3.823884852042159e-08, "loss": 0.4244, "step": 50850 }, { "epoch": 0.9647558708600478, "grad_norm": 1.5813934414282, "learning_rate": 3.7831268148285084e-08, "loss": 0.4219, "step": 50860 }, { "epoch": 0.9649455593914792, "grad_norm": 1.6360549296988909, "learning_rate": 3.742586334107345e-08, "loss": 0.4414, "step": 50870 }, { "epoch": 0.9651352479229106, "grad_norm": 1.5998596836759726, "learning_rate": 3.70226342765273e-08, "loss": 0.4193, "step": 50880 }, { "epoch": 0.9653249364543419, "grad_norm": 1.5659138830558494, "learning_rate": 3.662158113143577e-08, "loss": 0.4251, "step": 50890 }, { "epoch": 0.9655146249857733, "grad_norm": 1.4242798911223282, "learning_rate": 3.6222704081631534e-08, "loss": 0.4209, "step": 50900 }, { "epoch": 0.9657043135172048, "grad_norm": 1.351906755271376, "learning_rate": 3.582600330199526e-08, "loss": 0.4187, "step": 50910 }, { "epoch": 0.9658940020486362, "grad_norm": 1.217702698378508, "learning_rate": 3.543147896645227e-08, "loss": 0.3859, "step": 50920 }, { "epoch": 0.9660836905800675, "grad_norm": 1.4591795959642944, "learning_rate": 3.5039131247973646e-08, "loss": 0.4407, "step": 50930 }, { "epoch": 0.9662733791114989, "grad_norm": 1.5434753520280993, "learning_rate": 3.464896031857623e-08, "loss": 0.4343, "step": 50940 }, { "epoch": 0.9664630676429303, "grad_norm": 2.28364895774402, "learning_rate": 3.426096634932374e-08, "loss": 0.4315, "step": 50950 }, { "epoch": 0.9666527561743617, "grad_norm": 2.0156948613066605, "learning_rate": 3.3875149510324e-08, "loss": 0.4028, "step": 50960 }, { "epoch": 0.9668424447057931, "grad_norm": 1.7680687189693887, "learning_rate": 3.349150997073059e-08, "loss": 0.4112, "step": 50970 }, { "epoch": 0.9670321332372245, "grad_norm": 1.5679583390898149, "learning_rate": 3.311004789874339e-08, "loss": 0.4028, "step": 50980 }, { "epoch": 0.9672218217686559, "grad_norm": 1.223168917972058, "learning_rate": 3.273076346160586e-08, "loss": 0.4219, "step": 50990 }, { "epoch": 0.9674115103000872, "grad_norm": 1.801712311195421, "learning_rate": 3.235365682560832e-08, "loss": 0.4291, "step": 51000 }, { "epoch": 0.9676011988315186, "grad_norm": 1.5361506106443878, "learning_rate": 3.197872815608627e-08, "loss": 0.3907, "step": 51010 }, { "epoch": 0.96779088736295, "grad_norm": 1.7239820382429965, "learning_rate": 3.160597761741824e-08, "loss": 0.4438, "step": 51020 }, { "epoch": 0.9679805758943815, "grad_norm": 1.0626668297163366, "learning_rate": 3.1235405373031267e-08, "loss": 0.4288, "step": 51030 }, { "epoch": 0.9681702644258128, "grad_norm": 1.8562539701958631, "learning_rate": 3.086701158539429e-08, "loss": 0.4186, "step": 51040 }, { "epoch": 0.9683599529572442, "grad_norm": 1.5699037357725492, "learning_rate": 3.0500796416021996e-08, "loss": 0.4303, "step": 51050 }, { "epoch": 0.9685496414886756, "grad_norm": 1.3564565830244177, "learning_rate": 3.0136760025475386e-08, "loss": 0.3912, "step": 51060 }, { "epoch": 0.968739330020107, "grad_norm": 1.6212900015278442, "learning_rate": 2.977490257335791e-08, "loss": 0.4259, "step": 51070 }, { "epoch": 0.9689290185515383, "grad_norm": 1.417026349876405, "learning_rate": 2.941522421831988e-08, "loss": 0.4502, "step": 51080 }, { "epoch": 0.9691187070829698, "grad_norm": 1.2996344024299096, "learning_rate": 2.905772511805405e-08, "loss": 0.416, "step": 51090 }, { "epoch": 0.9693083956144012, "grad_norm": 1.5425538074207374, "learning_rate": 2.8702405429299473e-08, "loss": 0.4344, "step": 51100 }, { "epoch": 0.9694980841458325, "grad_norm": 1.331652757599783, "learning_rate": 2.834926530783877e-08, "loss": 0.438, "step": 51110 }, { "epoch": 0.9696877726772639, "grad_norm": 1.5796601867505766, "learning_rate": 2.799830490849975e-08, "loss": 0.4527, "step": 51120 }, { "epoch": 0.9698774612086953, "grad_norm": 1.7187355650092144, "learning_rate": 2.7649524385153782e-08, "loss": 0.4401, "step": 51130 }, { "epoch": 0.9700671497401268, "grad_norm": 1.3905043098033927, "learning_rate": 2.7302923890716314e-08, "loss": 0.4242, "step": 51140 }, { "epoch": 0.9702568382715581, "grad_norm": 1.7234447091074816, "learning_rate": 2.6958503577148577e-08, "loss": 0.4149, "step": 51150 }, { "epoch": 0.9704465268029895, "grad_norm": 1.6377943904761612, "learning_rate": 2.6616263595453663e-08, "loss": 0.4497, "step": 51160 }, { "epoch": 0.9706362153344209, "grad_norm": 1.2805283842498765, "learning_rate": 2.627620409568099e-08, "loss": 0.4105, "step": 51170 }, { "epoch": 0.9708259038658523, "grad_norm": 1.5330977554381906, "learning_rate": 2.5938325226921855e-08, "loss": 0.4223, "step": 51180 }, { "epoch": 0.9710155923972836, "grad_norm": 1.661384451843816, "learning_rate": 2.5602627137313872e-08, "loss": 0.4457, "step": 51190 }, { "epoch": 0.971205280928715, "grad_norm": 1.5464692200172712, "learning_rate": 2.5269109974035977e-08, "loss": 0.43, "step": 51200 }, { "epoch": 0.9713949694601465, "grad_norm": 1.1998035167398133, "learning_rate": 2.4937773883312866e-08, "loss": 0.3968, "step": 51210 }, { "epoch": 0.9715846579915778, "grad_norm": 1.7180685722900653, "learning_rate": 2.460861901041223e-08, "loss": 0.421, "step": 51220 }, { "epoch": 0.9717743465230092, "grad_norm": 1.385522672446319, "learning_rate": 2.428164549964529e-08, "loss": 0.4222, "step": 51230 }, { "epoch": 0.9719640350544406, "grad_norm": 1.6476616796962293, "learning_rate": 2.3956853494367938e-08, "loss": 0.4404, "step": 51240 }, { "epoch": 0.972153723585872, "grad_norm": 1.4859448764091734, "learning_rate": 2.3634243136977374e-08, "loss": 0.4268, "step": 51250 }, { "epoch": 0.9723434121173034, "grad_norm": 1.6803994906713748, "learning_rate": 2.3313814568917127e-08, "loss": 0.4046, "step": 51260 }, { "epoch": 0.9725331006487348, "grad_norm": 1.4169063973824647, "learning_rate": 2.2995567930672035e-08, "loss": 0.4186, "step": 51270 }, { "epoch": 0.9727227891801662, "grad_norm": 1.3395789805131444, "learning_rate": 2.267950336177105e-08, "loss": 0.4335, "step": 51280 }, { "epoch": 0.9729124777115976, "grad_norm": 1.5020729783485132, "learning_rate": 2.2365621000787206e-08, "loss": 0.4406, "step": 51290 }, { "epoch": 0.9731021662430289, "grad_norm": 1.5556698996504394, "learning_rate": 2.2053920985334877e-08, "loss": 0.444, "step": 51300 }, { "epoch": 0.9732918547744603, "grad_norm": 1.6132943319155624, "learning_rate": 2.1744403452073626e-08, "loss": 0.3907, "step": 51310 }, { "epoch": 0.9734815433058918, "grad_norm": 1.6521842094337649, "learning_rate": 2.1437068536704908e-08, "loss": 0.4284, "step": 51320 }, { "epoch": 0.9736712318373231, "grad_norm": 1.5521550787480016, "learning_rate": 2.1131916373973715e-08, "loss": 0.4388, "step": 51330 }, { "epoch": 0.9738609203687545, "grad_norm": 1.5511616433073, "learning_rate": 2.0828947097668028e-08, "loss": 0.3931, "step": 51340 }, { "epoch": 0.9740506089001859, "grad_norm": 1.5811999205021763, "learning_rate": 2.0528160840618815e-08, "loss": 0.4428, "step": 51350 }, { "epoch": 0.9742402974316173, "grad_norm": 1.5853784628651577, "learning_rate": 2.0229557734700034e-08, "loss": 0.4064, "step": 51360 }, { "epoch": 0.9744299859630486, "grad_norm": 1.4048221924399458, "learning_rate": 1.9933137910828072e-08, "loss": 0.4348, "step": 51370 }, { "epoch": 0.9746196744944801, "grad_norm": 1.4474317873179938, "learning_rate": 1.9638901498962303e-08, "loss": 0.4399, "step": 51380 }, { "epoch": 0.9748093630259115, "grad_norm": 1.4544354850016092, "learning_rate": 1.9346848628104542e-08, "loss": 0.4212, "step": 51390 }, { "epoch": 0.9749990515573428, "grad_norm": 1.6975958881988717, "learning_rate": 1.9056979426300694e-08, "loss": 0.4302, "step": 51400 }, { "epoch": 0.9751887400887742, "grad_norm": 1.4598472777587301, "learning_rate": 1.8769294020636874e-08, "loss": 0.4331, "step": 51410 }, { "epoch": 0.9753784286202056, "grad_norm": 1.4671672999743246, "learning_rate": 1.8483792537244416e-08, "loss": 0.4203, "step": 51420 }, { "epoch": 0.975568117151637, "grad_norm": 1.4545783074077367, "learning_rate": 1.8200475101294857e-08, "loss": 0.4064, "step": 51430 }, { "epoch": 0.9757578056830684, "grad_norm": 2.5391153992582396, "learning_rate": 1.791934183700328e-08, "loss": 0.4391, "step": 51440 }, { "epoch": 0.9759474942144998, "grad_norm": 1.6131820643379506, "learning_rate": 1.7640392867627197e-08, "loss": 0.436, "step": 51450 }, { "epoch": 0.9761371827459312, "grad_norm": 1.6974300218176213, "learning_rate": 1.736362831546601e-08, "loss": 0.4019, "step": 51460 }, { "epoch": 0.9763268712773626, "grad_norm": 1.4202629127349642, "learning_rate": 1.7089048301861532e-08, "loss": 0.4583, "step": 51470 }, { "epoch": 0.9765165598087939, "grad_norm": 1.3182779758701202, "learning_rate": 1.6816652947199142e-08, "loss": 0.4084, "step": 51480 }, { "epoch": 0.9767062483402253, "grad_norm": 1.7087113148415158, "learning_rate": 1.654644237090386e-08, "loss": 0.4189, "step": 51490 }, { "epoch": 0.9768959368716568, "grad_norm": 1.5912413533096885, "learning_rate": 1.627841669144481e-08, "loss": 0.4527, "step": 51500 }, { "epoch": 0.9770856254030881, "grad_norm": 1.2192693540184145, "learning_rate": 1.6012576026331882e-08, "loss": 0.4253, "step": 51510 }, { "epoch": 0.9772753139345195, "grad_norm": 1.7667976323841887, "learning_rate": 1.574892049211907e-08, "loss": 0.403, "step": 51520 }, { "epoch": 0.9774650024659509, "grad_norm": 1.575861787794697, "learning_rate": 1.5487450204399456e-08, "loss": 0.4192, "step": 51530 }, { "epoch": 0.9776546909973823, "grad_norm": 1.2469803309893683, "learning_rate": 1.5228165277810235e-08, "loss": 0.4169, "step": 51540 }, { "epoch": 0.9778443795288136, "grad_norm": 1.7095234194468725, "learning_rate": 1.4971065826029362e-08, "loss": 0.4438, "step": 51550 }, { "epoch": 0.9780340680602451, "grad_norm": 1.3897392946214409, "learning_rate": 1.471615196177778e-08, "loss": 0.4205, "step": 51560 }, { "epoch": 0.9782237565916765, "grad_norm": 1.2584755986569096, "learning_rate": 1.4463423796816645e-08, "loss": 0.4202, "step": 51570 }, { "epoch": 0.9784134451231079, "grad_norm": 1.7278277333419574, "learning_rate": 1.4212881441950655e-08, "loss": 0.419, "step": 51580 }, { "epoch": 0.9786031336545392, "grad_norm": 1.7262817774291692, "learning_rate": 1.3964525007024165e-08, "loss": 0.4416, "step": 51590 }, { "epoch": 0.9787928221859706, "grad_norm": 1.7248302039854784, "learning_rate": 1.3718354600923967e-08, "loss": 0.4438, "step": 51600 }, { "epoch": 0.978982510717402, "grad_norm": 1.4144214143132483, "learning_rate": 1.3474370331579279e-08, "loss": 0.4151, "step": 51610 }, { "epoch": 0.9791721992488334, "grad_norm": 1.6467189259990858, "learning_rate": 1.3232572305960089e-08, "loss": 0.403, "step": 51620 }, { "epoch": 0.9793618877802648, "grad_norm": 1.4483209076371184, "learning_rate": 1.2992960630078266e-08, "loss": 0.4227, "step": 51630 }, { "epoch": 0.9795515763116962, "grad_norm": 1.4511869620889157, "learning_rate": 1.2755535408986442e-08, "loss": 0.4406, "step": 51640 }, { "epoch": 0.9797412648431276, "grad_norm": 1.4579239013384535, "learning_rate": 1.252029674677857e-08, "loss": 0.4068, "step": 51650 }, { "epoch": 0.9799309533745589, "grad_norm": 1.1848569758691898, "learning_rate": 1.2287244746591042e-08, "loss": 0.4042, "step": 51660 }, { "epoch": 0.9801206419059904, "grad_norm": 1.5146673153265788, "learning_rate": 1.2056379510600458e-08, "loss": 0.4255, "step": 51670 }, { "epoch": 0.9803103304374218, "grad_norm": 1.197061077313418, "learning_rate": 1.182770114002585e-08, "loss": 0.4129, "step": 51680 }, { "epoch": 0.9805000189688532, "grad_norm": 1.7113232140837242, "learning_rate": 1.1601209735125907e-08, "loss": 0.4127, "step": 51690 }, { "epoch": 0.9806897075002845, "grad_norm": 1.5326143386616988, "learning_rate": 1.1376905395201753e-08, "loss": 0.4401, "step": 51700 }, { "epoch": 0.9808793960317159, "grad_norm": 1.5510116154961187, "learning_rate": 1.115478821859528e-08, "loss": 0.4271, "step": 51710 }, { "epoch": 0.9810690845631473, "grad_norm": 1.6892646586416602, "learning_rate": 1.0934858302689144e-08, "loss": 0.4295, "step": 51720 }, { "epoch": 0.9812587730945787, "grad_norm": 1.3528328309748985, "learning_rate": 1.0717115743907324e-08, "loss": 0.4061, "step": 51730 }, { "epoch": 0.9814484616260101, "grad_norm": 1.3373451367319142, "learning_rate": 1.050156063771457e-08, "loss": 0.4271, "step": 51740 }, { "epoch": 0.9816381501574415, "grad_norm": 1.6360355361700614, "learning_rate": 1.0288193078616949e-08, "loss": 0.4425, "step": 51750 }, { "epoch": 0.9818278386888729, "grad_norm": 1.2515431543584778, "learning_rate": 1.0077013160161298e-08, "loss": 0.4144, "step": 51760 }, { "epoch": 0.9820175272203042, "grad_norm": 1.721030582285527, "learning_rate": 9.868020974935777e-09, "loss": 0.4254, "step": 51770 }, { "epoch": 0.9822072157517356, "grad_norm": 1.5089716314315818, "learning_rate": 9.661216614568203e-09, "loss": 0.419, "step": 51780 }, { "epoch": 0.9823969042831671, "grad_norm": 1.3481176409973123, "learning_rate": 9.456600169727714e-09, "loss": 0.4199, "step": 51790 }, { "epoch": 0.9825865928145985, "grad_norm": 1.6576990649982042, "learning_rate": 9.254171730124773e-09, "loss": 0.422, "step": 51800 }, { "epoch": 0.9827762813460298, "grad_norm": 2.1020512540417053, "learning_rate": 9.053931384510051e-09, "loss": 0.3989, "step": 51810 }, { "epoch": 0.9829659698774612, "grad_norm": 1.5104399355996017, "learning_rate": 8.85587922067499e-09, "loss": 0.3988, "step": 51820 }, { "epoch": 0.9831556584088926, "grad_norm": 1.56707706920002, "learning_rate": 8.660015325451243e-09, "loss": 0.4204, "step": 51830 }, { "epoch": 0.9833453469403239, "grad_norm": 1.5307606612540954, "learning_rate": 8.466339784712341e-09, "loss": 0.4103, "step": 51840 }, { "epoch": 0.9835350354717554, "grad_norm": 2.0029088209380266, "learning_rate": 8.274852683370916e-09, "loss": 0.4231, "step": 51850 }, { "epoch": 0.9837247240031868, "grad_norm": 1.627388743223124, "learning_rate": 8.085554105380367e-09, "loss": 0.4191, "step": 51860 }, { "epoch": 0.9839144125346182, "grad_norm": 1.269038547003959, "learning_rate": 7.898444133735416e-09, "loss": 0.4345, "step": 51870 }, { "epoch": 0.9841041010660495, "grad_norm": 1.525324005303102, "learning_rate": 7.713522850471e-09, "loss": 0.4144, "step": 51880 }, { "epoch": 0.9842937895974809, "grad_norm": 1.31572409121765, "learning_rate": 7.53079033666171e-09, "loss": 0.4314, "step": 51890 }, { "epoch": 0.9844834781289123, "grad_norm": 1.4543480306986587, "learning_rate": 7.350246672423456e-09, "loss": 0.3816, "step": 51900 }, { "epoch": 0.9846731666603438, "grad_norm": 1.5632306807302339, "learning_rate": 7.171891936911257e-09, "loss": 0.408, "step": 51910 }, { "epoch": 0.9848628551917751, "grad_norm": 1.7349521649781172, "learning_rate": 6.995726208322007e-09, "loss": 0.4364, "step": 51920 }, { "epoch": 0.9850525437232065, "grad_norm": 1.5440397728661446, "learning_rate": 6.8217495638916995e-09, "loss": 0.4117, "step": 51930 }, { "epoch": 0.9852422322546379, "grad_norm": 1.7419974379869978, "learning_rate": 6.6499620798970985e-09, "loss": 0.4373, "step": 51940 }, { "epoch": 0.9854319207860692, "grad_norm": 1.5983177736669558, "learning_rate": 6.480363831655179e-09, "loss": 0.4282, "step": 51950 }, { "epoch": 0.9856216093175006, "grad_norm": 1.6806595474896724, "learning_rate": 6.312954893522572e-09, "loss": 0.4192, "step": 51960 }, { "epoch": 0.9858112978489321, "grad_norm": 2.3321083401974527, "learning_rate": 6.147735338896122e-09, "loss": 0.4406, "step": 51970 }, { "epoch": 0.9860009863803635, "grad_norm": 1.555836744978688, "learning_rate": 5.98470524021455e-09, "loss": 0.4249, "step": 51980 }, { "epoch": 0.9861906749117948, "grad_norm": 5.473739694849766, "learning_rate": 5.82386466895346e-09, "loss": 0.4178, "step": 51990 }, { "epoch": 0.9863803634432262, "grad_norm": 1.6089560490714352, "learning_rate": 5.665213695631444e-09, "loss": 0.4261, "step": 52000 }, { "epoch": 0.9865700519746576, "grad_norm": 1.5492215654131798, "learning_rate": 5.508752389805083e-09, "loss": 0.43, "step": 52010 }, { "epoch": 0.9867597405060891, "grad_norm": 1.3291167124076295, "learning_rate": 5.3544808200722834e-09, "loss": 0.4295, "step": 52020 }, { "epoch": 0.9869494290375204, "grad_norm": 1.5061052505998425, "learning_rate": 5.202399054070051e-09, "loss": 0.4092, "step": 52030 }, { "epoch": 0.9871391175689518, "grad_norm": 1.3164408707467483, "learning_rate": 5.052507158476161e-09, "loss": 0.4102, "step": 52040 }, { "epoch": 0.9873288061003832, "grad_norm": 1.4639601455514204, "learning_rate": 4.904805199006935e-09, "loss": 0.428, "step": 52050 }, { "epoch": 0.9875184946318145, "grad_norm": 1.4583251497812268, "learning_rate": 4.75929324041946e-09, "loss": 0.4178, "step": 52060 }, { "epoch": 0.9877081831632459, "grad_norm": 1.3434080209791817, "learning_rate": 4.615971346511594e-09, "loss": 0.4169, "step": 52070 }, { "epoch": 0.9878978716946774, "grad_norm": 1.6184156532519223, "learning_rate": 4.4748395801191835e-09, "loss": 0.4066, "step": 52080 }, { "epoch": 0.9880875602261088, "grad_norm": 1.4813207205851553, "learning_rate": 4.335898003118289e-09, "loss": 0.4114, "step": 52090 }, { "epoch": 0.9882772487575401, "grad_norm": 1.6184363788787866, "learning_rate": 4.199146676426291e-09, "loss": 0.3917, "step": 52100 }, { "epoch": 0.9884669372889715, "grad_norm": 1.3130827020880604, "learning_rate": 4.06458565999801e-09, "loss": 0.4163, "step": 52110 }, { "epoch": 0.9886566258204029, "grad_norm": 3.0151630483679734, "learning_rate": 3.932215012829588e-09, "loss": 0.43, "step": 52120 }, { "epoch": 0.9888463143518343, "grad_norm": 1.4754479394121596, "learning_rate": 3.802034792956266e-09, "loss": 0.4349, "step": 52130 }, { "epoch": 0.9890360028832657, "grad_norm": 1.5852407682140357, "learning_rate": 3.6740450574529463e-09, "loss": 0.4259, "step": 52140 }, { "epoch": 0.9892256914146971, "grad_norm": 1.6825705790234868, "learning_rate": 3.5482458624341855e-09, "loss": 0.4267, "step": 52150 }, { "epoch": 0.9894153799461285, "grad_norm": 1.2140538735924453, "learning_rate": 3.424637263054198e-09, "loss": 0.395, "step": 52160 }, { "epoch": 0.9896050684775598, "grad_norm": 1.320671637409867, "learning_rate": 3.303219313506856e-09, "loss": 0.4191, "step": 52170 }, { "epoch": 0.9897947570089912, "grad_norm": 1.4774027619123071, "learning_rate": 3.1839920670245774e-09, "loss": 0.4283, "step": 52180 }, { "epoch": 0.9899844455404226, "grad_norm": 1.7382221721647049, "learning_rate": 3.066955575881658e-09, "loss": 0.4396, "step": 52190 }, { "epoch": 0.9901741340718541, "grad_norm": 1.3910108301849464, "learning_rate": 2.9521098913898315e-09, "loss": 0.4356, "step": 52200 }, { "epoch": 0.9903638226032854, "grad_norm": 1.4769960551905783, "learning_rate": 2.839455063900487e-09, "loss": 0.4304, "step": 52210 }, { "epoch": 0.9905535111347168, "grad_norm": 1.3873194357616399, "learning_rate": 2.728991142805226e-09, "loss": 0.4184, "step": 52220 }, { "epoch": 0.9907431996661482, "grad_norm": 1.1200837502377994, "learning_rate": 2.6207181765347532e-09, "loss": 0.4116, "step": 52230 }, { "epoch": 0.9909328881975795, "grad_norm": 1.4052930382907096, "learning_rate": 2.5146362125594294e-09, "loss": 0.3886, "step": 52240 }, { "epoch": 0.9911225767290109, "grad_norm": 1.7810243648741448, "learning_rate": 2.410745297388162e-09, "loss": 0.436, "step": 52250 }, { "epoch": 0.9913122652604424, "grad_norm": 1.467687090893613, "learning_rate": 2.309045476570626e-09, "loss": 0.4271, "step": 52260 }, { "epoch": 0.9915019537918738, "grad_norm": 1.5174690303382081, "learning_rate": 2.2095367946950442e-09, "loss": 0.4173, "step": 52270 }, { "epoch": 0.9916916423233051, "grad_norm": 1.5311984198695263, "learning_rate": 2.112219295388185e-09, "loss": 0.4186, "step": 52280 }, { "epoch": 0.9918813308547365, "grad_norm": 1.6262010286877546, "learning_rate": 2.0170930213181395e-09, "loss": 0.433, "step": 52290 }, { "epoch": 0.9920710193861679, "grad_norm": 1.6087081585020782, "learning_rate": 1.924158014190436e-09, "loss": 0.431, "step": 52300 }, { "epoch": 0.9922607079175993, "grad_norm": 1.4516604658835754, "learning_rate": 1.8334143147502591e-09, "loss": 0.4133, "step": 52310 }, { "epoch": 0.9924503964490307, "grad_norm": 1.6699964052720013, "learning_rate": 1.7448619627835618e-09, "loss": 0.4257, "step": 52320 }, { "epoch": 0.9926400849804621, "grad_norm": 1.8440873286987478, "learning_rate": 1.6585009971131772e-09, "loss": 0.4157, "step": 52330 }, { "epoch": 0.9928297735118935, "grad_norm": 1.3296096994415039, "learning_rate": 1.5743314556032618e-09, "loss": 0.3976, "step": 52340 }, { "epoch": 0.9930194620433248, "grad_norm": 1.760593953433829, "learning_rate": 1.4923533751554086e-09, "loss": 0.4417, "step": 52350 }, { "epoch": 0.9932091505747562, "grad_norm": 1.403494216712482, "learning_rate": 1.4125667917119779e-09, "loss": 0.4079, "step": 52360 }, { "epoch": 0.9933988391061876, "grad_norm": 1.369586848654867, "learning_rate": 1.3349717402538765e-09, "loss": 0.424, "step": 52370 }, { "epoch": 0.9935885276376191, "grad_norm": 1.3074397314170658, "learning_rate": 1.2595682548005583e-09, "loss": 0.4123, "step": 52380 }, { "epoch": 0.9937782161690504, "grad_norm": 1.5485323246886613, "learning_rate": 1.1863563684116898e-09, "loss": 0.4272, "step": 52390 }, { "epoch": 0.9939679047004818, "grad_norm": 1.4393079935520041, "learning_rate": 1.1153361131854833e-09, "loss": 0.4324, "step": 52400 }, { "epoch": 0.9941575932319132, "grad_norm": 1.4288302227548348, "learning_rate": 1.0465075202592546e-09, "loss": 0.3902, "step": 52410 }, { "epoch": 0.9943472817633446, "grad_norm": 1.5019994586696768, "learning_rate": 9.7987061980942e-10, "loss": 0.4298, "step": 52420 }, { "epoch": 0.994536970294776, "grad_norm": 1.6368015948198638, "learning_rate": 9.154254410514984e-10, "loss": 0.4266, "step": 52430 }, { "epoch": 0.9947266588262074, "grad_norm": 1.5900511055017237, "learning_rate": 8.531720122412213e-10, "loss": 0.4545, "step": 52440 }, { "epoch": 0.9949163473576388, "grad_norm": 1.554616295491599, "learning_rate": 7.931103606712009e-10, "loss": 0.4679, "step": 52450 }, { "epoch": 0.9951060358890701, "grad_norm": 1.6680149762229195, "learning_rate": 7.352405126742623e-10, "loss": 0.4442, "step": 52460 }, { "epoch": 0.9952957244205015, "grad_norm": 1.3350926390509974, "learning_rate": 6.795624936234424e-10, "loss": 0.3947, "step": 52470 }, { "epoch": 0.9954854129519329, "grad_norm": 1.3283435081246884, "learning_rate": 6.260763279286597e-10, "loss": 0.3999, "step": 52480 }, { "epoch": 0.9956751014833644, "grad_norm": 1.3339676794671376, "learning_rate": 5.74782039040045e-10, "loss": 0.3996, "step": 52490 }, { "epoch": 0.9958647900147957, "grad_norm": 1.7481995382045596, "learning_rate": 5.256796494468308e-10, "loss": 0.4402, "step": 52500 }, { "epoch": 0.9960544785462271, "grad_norm": 1.3649251630515429, "learning_rate": 4.787691806767969e-10, "loss": 0.4208, "step": 52510 }, { "epoch": 0.9962441670776585, "grad_norm": 1.1996881393922871, "learning_rate": 4.3405065329737986e-10, "loss": 0.4215, "step": 52520 }, { "epoch": 0.9964338556090899, "grad_norm": 1.4062694616474671, "learning_rate": 3.915240869134529e-10, "loss": 0.4093, "step": 52530 }, { "epoch": 0.9966235441405212, "grad_norm": 1.3217211748599376, "learning_rate": 3.511895001712118e-10, "loss": 0.4389, "step": 52540 }, { "epoch": 0.9968132326719527, "grad_norm": 1.9675847796853387, "learning_rate": 3.130469107537337e-10, "loss": 0.3902, "step": 52550 }, { "epoch": 0.9970029212033841, "grad_norm": 1.4961180656406452, "learning_rate": 2.7709633538375304e-10, "loss": 0.4249, "step": 52560 }, { "epoch": 0.9971926097348154, "grad_norm": 1.1969261675993386, "learning_rate": 2.4333778982366106e-10, "loss": 0.4255, "step": 52570 }, { "epoch": 0.9973822982662468, "grad_norm": 1.61573263065483, "learning_rate": 2.1177128887439614e-10, "loss": 0.4139, "step": 52580 }, { "epoch": 0.9975719867976782, "grad_norm": 1.1601848302417441, "learning_rate": 1.823968463748882e-10, "loss": 0.3949, "step": 52590 }, { "epoch": 0.9977616753291096, "grad_norm": 1.3884517556400113, "learning_rate": 1.5521447520427947e-10, "loss": 0.4138, "step": 52600 }, { "epoch": 0.997951363860541, "grad_norm": 1.613291302983719, "learning_rate": 1.3022418728025899e-10, "loss": 0.419, "step": 52610 }, { "epoch": 0.9981410523919724, "grad_norm": 1.3257294977362508, "learning_rate": 1.0742599355906269e-10, "loss": 0.4313, "step": 52620 }, { "epoch": 0.9983307409234038, "grad_norm": 1.530015642984255, "learning_rate": 8.681990403602847e-11, "loss": 0.4202, "step": 52630 }, { "epoch": 0.9985204294548352, "grad_norm": 1.3422690420824723, "learning_rate": 6.840592774559618e-11, "loss": 0.3993, "step": 52640 }, { "epoch": 0.9987101179862665, "grad_norm": 1.4897970191144463, "learning_rate": 5.218407276130766e-11, "loss": 0.407, "step": 52650 }, { "epoch": 0.9988998065176979, "grad_norm": 1.34463146065185, "learning_rate": 3.815434619469649e-11, "loss": 0.4054, "step": 52660 }, { "epoch": 0.9990894950491294, "grad_norm": 1.9700863409461191, "learning_rate": 2.6316754197508454e-11, "loss": 0.4428, "step": 52670 }, { "epoch": 0.9992791835805607, "grad_norm": 1.373285698405177, "learning_rate": 1.6671301958925966e-11, "loss": 0.4215, "step": 52680 }, { "epoch": 0.9994688721119921, "grad_norm": 1.7391026335580941, "learning_rate": 9.217993708898754e-12, "loss": 0.4388, "step": 52690 }, { "epoch": 0.9996585606434235, "grad_norm": 1.44433006180619, "learning_rate": 3.956832713702952e-12, "loss": 0.4151, "step": 52700 }, { "epoch": 0.9998482491748549, "grad_norm": 1.4471940885802677, "learning_rate": 8.878212814922293e-13, "loss": 0.4286, "step": 52710 }, { "epoch": 1.0, "step": 52718, "total_flos": 5545793594392576.0, "train_loss": 0.4667590878757732, "train_runtime": 75687.1773, "train_samples_per_second": 5.572, "train_steps_per_second": 0.697 } ], "logging_steps": 10, "max_steps": 52718, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5545793594392576.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }