{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 500, "global_step": 8094, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00024709661477637757, "grad_norm": 1.4643443380306504, "learning_rate": 0.0, "loss": 1.4364773035049438, "step": 1 }, { "epoch": 0.0004941932295527551, "grad_norm": 1.386695105475926, "learning_rate": 4.938271604938272e-08, "loss": 1.664607048034668, "step": 2 }, { "epoch": 0.0007412898443291327, "grad_norm": 1.4060948063288128, "learning_rate": 9.876543209876543e-08, "loss": 1.4737625122070312, "step": 3 }, { "epoch": 0.0009883864591055103, "grad_norm": 1.41736389016719, "learning_rate": 1.4814814814814817e-07, "loss": 1.4625980854034424, "step": 4 }, { "epoch": 0.001235483073881888, "grad_norm": 1.4186779283330626, "learning_rate": 1.9753086419753087e-07, "loss": 1.3868944644927979, "step": 5 }, { "epoch": 0.0014825796886582653, "grad_norm": 1.3911534434629733, "learning_rate": 2.469135802469136e-07, "loss": 1.532092809677124, "step": 6 }, { "epoch": 0.001729676303434643, "grad_norm": 1.3806824098171089, "learning_rate": 2.9629629629629634e-07, "loss": 1.428198218345642, "step": 7 }, { "epoch": 0.0019767729182110206, "grad_norm": 1.4215495285260444, "learning_rate": 3.45679012345679e-07, "loss": 1.5378371477127075, "step": 8 }, { "epoch": 0.002223869532987398, "grad_norm": 1.3055298036100815, "learning_rate": 3.9506172839506174e-07, "loss": 1.5235862731933594, "step": 9 }, { "epoch": 0.002470966147763776, "grad_norm": 1.3797544112799773, "learning_rate": 4.444444444444445e-07, "loss": 1.5244669914245605, "step": 10 }, { "epoch": 0.0027180627625401532, "grad_norm": 1.3708745577732409, "learning_rate": 4.938271604938272e-07, "loss": 1.4571117162704468, "step": 11 }, { "epoch": 0.0029651593773165306, "grad_norm": 1.441092577013532, "learning_rate": 5.432098765432099e-07, "loss": 1.5923757553100586, "step": 12 }, { "epoch": 0.0032122559920929085, "grad_norm": 1.399727465049909, "learning_rate": 5.925925925925927e-07, "loss": 1.477729082107544, "step": 13 }, { "epoch": 0.003459352606869286, "grad_norm": 1.429905971151134, "learning_rate": 6.419753086419754e-07, "loss": 1.4599099159240723, "step": 14 }, { "epoch": 0.0037064492216456633, "grad_norm": 1.3437817715329607, "learning_rate": 6.91358024691358e-07, "loss": 1.4626681804656982, "step": 15 }, { "epoch": 0.003953545836422041, "grad_norm": 1.505206066666944, "learning_rate": 7.407407407407407e-07, "loss": 1.481959581375122, "step": 16 }, { "epoch": 0.004200642451198419, "grad_norm": 1.4704260633574284, "learning_rate": 7.901234567901235e-07, "loss": 1.441692590713501, "step": 17 }, { "epoch": 0.004447739065974796, "grad_norm": 1.3995166728845299, "learning_rate": 8.395061728395062e-07, "loss": 1.3803867101669312, "step": 18 }, { "epoch": 0.004694835680751174, "grad_norm": 1.4081591912166018, "learning_rate": 8.88888888888889e-07, "loss": 1.4750735759735107, "step": 19 }, { "epoch": 0.004941932295527552, "grad_norm": 1.3691798310171084, "learning_rate": 9.382716049382717e-07, "loss": 1.451157808303833, "step": 20 }, { "epoch": 0.005189028910303929, "grad_norm": 1.3548251662432254, "learning_rate": 9.876543209876544e-07, "loss": 1.526191234588623, "step": 21 }, { "epoch": 0.0054361255250803065, "grad_norm": 1.4945138823474449, "learning_rate": 1.0370370370370371e-06, "loss": 1.641974687576294, "step": 22 }, { "epoch": 0.005683222139856684, "grad_norm": 1.3630042217687293, "learning_rate": 1.0864197530864199e-06, "loss": 1.425872564315796, "step": 23 }, { "epoch": 0.005930318754633061, "grad_norm": 1.3677812609703563, "learning_rate": 1.1358024691358026e-06, "loss": 1.4519203901290894, "step": 24 }, { "epoch": 0.006177415369409439, "grad_norm": 1.6005050186212453, "learning_rate": 1.1851851851851854e-06, "loss": 1.5306754112243652, "step": 25 }, { "epoch": 0.006424511984185817, "grad_norm": 1.3367356257535692, "learning_rate": 1.234567901234568e-06, "loss": 1.501684308052063, "step": 26 }, { "epoch": 0.006671608598962194, "grad_norm": 1.3904451902767807, "learning_rate": 1.2839506172839509e-06, "loss": 1.3720037937164307, "step": 27 }, { "epoch": 0.006918705213738572, "grad_norm": 1.354234124652224, "learning_rate": 1.3333333333333334e-06, "loss": 1.5924553871154785, "step": 28 }, { "epoch": 0.00716580182851495, "grad_norm": 1.387632297547068, "learning_rate": 1.382716049382716e-06, "loss": 1.4633831977844238, "step": 29 }, { "epoch": 0.007412898443291327, "grad_norm": 1.3339402982373787, "learning_rate": 1.432098765432099e-06, "loss": 1.4821045398712158, "step": 30 }, { "epoch": 0.0076599950580677045, "grad_norm": 1.3954765350820162, "learning_rate": 1.4814814814814815e-06, "loss": 1.422290563583374, "step": 31 }, { "epoch": 0.007907091672844082, "grad_norm": 1.491745257488567, "learning_rate": 1.5308641975308644e-06, "loss": 1.4756203889846802, "step": 32 }, { "epoch": 0.00815418828762046, "grad_norm": 1.3336482601345965, "learning_rate": 1.580246913580247e-06, "loss": 1.498488426208496, "step": 33 }, { "epoch": 0.008401284902396838, "grad_norm": 1.4434452163426554, "learning_rate": 1.62962962962963e-06, "loss": 1.4829633235931396, "step": 34 }, { "epoch": 0.008648381517173214, "grad_norm": 1.5531272523652766, "learning_rate": 1.6790123456790125e-06, "loss": 1.5297141075134277, "step": 35 }, { "epoch": 0.008895478131949592, "grad_norm": 1.3980033205291873, "learning_rate": 1.7283950617283952e-06, "loss": 1.3992717266082764, "step": 36 }, { "epoch": 0.00914257474672597, "grad_norm": 1.3738454407957574, "learning_rate": 1.777777777777778e-06, "loss": 1.5806591510772705, "step": 37 }, { "epoch": 0.009389671361502348, "grad_norm": 1.3928176816745712, "learning_rate": 1.8271604938271605e-06, "loss": 1.4124950170516968, "step": 38 }, { "epoch": 0.009636767976278725, "grad_norm": 1.3860447325153011, "learning_rate": 1.8765432098765435e-06, "loss": 1.3540472984313965, "step": 39 }, { "epoch": 0.009883864591055103, "grad_norm": 1.4529768514321395, "learning_rate": 1.925925925925926e-06, "loss": 1.3466284275054932, "step": 40 }, { "epoch": 0.01013096120583148, "grad_norm": 1.3438212763665185, "learning_rate": 1.9753086419753087e-06, "loss": 1.3990169763565063, "step": 41 }, { "epoch": 0.010378057820607857, "grad_norm": 1.3488368390610463, "learning_rate": 2.0246913580246915e-06, "loss": 1.4854390621185303, "step": 42 }, { "epoch": 0.010625154435384235, "grad_norm": 1.4755353682574464, "learning_rate": 2.0740740740740742e-06, "loss": 1.4375433921813965, "step": 43 }, { "epoch": 0.010872251050160613, "grad_norm": 1.3212659692766533, "learning_rate": 2.123456790123457e-06, "loss": 1.352947473526001, "step": 44 }, { "epoch": 0.01111934766493699, "grad_norm": 1.3187689137922611, "learning_rate": 2.1728395061728397e-06, "loss": 1.3757442235946655, "step": 45 }, { "epoch": 0.011366444279713369, "grad_norm": 1.3907588388010033, "learning_rate": 2.222222222222222e-06, "loss": 1.3733090162277222, "step": 46 }, { "epoch": 0.011613540894489745, "grad_norm": 1.4053871390288764, "learning_rate": 2.2716049382716052e-06, "loss": 1.323127269744873, "step": 47 }, { "epoch": 0.011860637509266123, "grad_norm": 1.281044098410328, "learning_rate": 2.3209876543209876e-06, "loss": 1.3643927574157715, "step": 48 }, { "epoch": 0.0121077341240425, "grad_norm": 1.321911995624781, "learning_rate": 2.3703703703703707e-06, "loss": 1.3495442867279053, "step": 49 }, { "epoch": 0.012354830738818878, "grad_norm": 1.3999034642036314, "learning_rate": 2.419753086419753e-06, "loss": 1.2933852672576904, "step": 50 }, { "epoch": 0.012601927353595256, "grad_norm": 1.3017288484950829, "learning_rate": 2.469135802469136e-06, "loss": 1.3145182132720947, "step": 51 }, { "epoch": 0.012849023968371634, "grad_norm": 1.325997733319123, "learning_rate": 2.5185185185185186e-06, "loss": 1.428991436958313, "step": 52 }, { "epoch": 0.01309612058314801, "grad_norm": 1.2496711744280906, "learning_rate": 2.5679012345679018e-06, "loss": 1.3120423555374146, "step": 53 }, { "epoch": 0.013343217197924388, "grad_norm": 1.3061844930931834, "learning_rate": 2.617283950617284e-06, "loss": 1.402439832687378, "step": 54 }, { "epoch": 0.013590313812700766, "grad_norm": 1.330284245570945, "learning_rate": 2.666666666666667e-06, "loss": 1.2928203344345093, "step": 55 }, { "epoch": 0.013837410427477144, "grad_norm": 1.2469301586484145, "learning_rate": 2.7160493827160496e-06, "loss": 1.2114064693450928, "step": 56 }, { "epoch": 0.014084507042253521, "grad_norm": 1.1417329012224289, "learning_rate": 2.765432098765432e-06, "loss": 1.2668697834014893, "step": 57 }, { "epoch": 0.0143316036570299, "grad_norm": 1.2324817789069191, "learning_rate": 2.814814814814815e-06, "loss": 1.2026606798171997, "step": 58 }, { "epoch": 0.014578700271806275, "grad_norm": 1.250774971382677, "learning_rate": 2.864197530864198e-06, "loss": 1.2564818859100342, "step": 59 }, { "epoch": 0.014825796886582653, "grad_norm": 1.0669912236634602, "learning_rate": 2.9135802469135806e-06, "loss": 1.1671714782714844, "step": 60 }, { "epoch": 0.015072893501359031, "grad_norm": 1.1094162687403122, "learning_rate": 2.962962962962963e-06, "loss": 1.0962650775909424, "step": 61 }, { "epoch": 0.015319990116135409, "grad_norm": 1.123016633250808, "learning_rate": 3.012345679012346e-06, "loss": 1.1710751056671143, "step": 62 }, { "epoch": 0.015567086730911787, "grad_norm": 1.0544698723133406, "learning_rate": 3.061728395061729e-06, "loss": 1.0912549495697021, "step": 63 }, { "epoch": 0.015814183345688165, "grad_norm": 1.092904579636327, "learning_rate": 3.1111111111111116e-06, "loss": 1.1574137210845947, "step": 64 }, { "epoch": 0.016061279960464542, "grad_norm": 1.2122734229910381, "learning_rate": 3.160493827160494e-06, "loss": 1.2438185214996338, "step": 65 }, { "epoch": 0.01630837657524092, "grad_norm": 1.0458694551987333, "learning_rate": 3.2098765432098767e-06, "loss": 1.147150993347168, "step": 66 }, { "epoch": 0.016555473190017298, "grad_norm": 1.1129916459924174, "learning_rate": 3.25925925925926e-06, "loss": 1.113739013671875, "step": 67 }, { "epoch": 0.016802569804793676, "grad_norm": 1.0948594888965977, "learning_rate": 3.3086419753086426e-06, "loss": 1.0685616731643677, "step": 68 }, { "epoch": 0.01704966641957005, "grad_norm": 1.1675866177257395, "learning_rate": 3.358024691358025e-06, "loss": 1.1489982604980469, "step": 69 }, { "epoch": 0.017296763034346428, "grad_norm": 1.079860144536559, "learning_rate": 3.4074074074074077e-06, "loss": 1.016658067703247, "step": 70 }, { "epoch": 0.017543859649122806, "grad_norm": 1.0625172801813134, "learning_rate": 3.4567901234567904e-06, "loss": 0.9935876131057739, "step": 71 }, { "epoch": 0.017790956263899184, "grad_norm": 1.076005723058914, "learning_rate": 3.5061728395061736e-06, "loss": 0.9441846609115601, "step": 72 }, { "epoch": 0.01803805287867556, "grad_norm": 1.0907918784451691, "learning_rate": 3.555555555555556e-06, "loss": 0.9665080308914185, "step": 73 }, { "epoch": 0.01828514949345194, "grad_norm": 1.0177765073646727, "learning_rate": 3.6049382716049387e-06, "loss": 0.9302685260772705, "step": 74 }, { "epoch": 0.018532246108228317, "grad_norm": 1.3666119029215889, "learning_rate": 3.654320987654321e-06, "loss": 1.140694260597229, "step": 75 }, { "epoch": 0.018779342723004695, "grad_norm": 1.2996657628316388, "learning_rate": 3.7037037037037037e-06, "loss": 1.1337695121765137, "step": 76 }, { "epoch": 0.019026439337781073, "grad_norm": 1.0525558913228734, "learning_rate": 3.753086419753087e-06, "loss": 0.9686880111694336, "step": 77 }, { "epoch": 0.01927353595255745, "grad_norm": 1.0540969400491795, "learning_rate": 3.8024691358024697e-06, "loss": 0.9219396114349365, "step": 78 }, { "epoch": 0.01952063256733383, "grad_norm": 1.0202247232886252, "learning_rate": 3.851851851851852e-06, "loss": 0.9488000273704529, "step": 79 }, { "epoch": 0.019767729182110207, "grad_norm": 1.068099804667322, "learning_rate": 3.901234567901235e-06, "loss": 0.9181085824966431, "step": 80 }, { "epoch": 0.02001482579688658, "grad_norm": 1.0606050595157595, "learning_rate": 3.9506172839506175e-06, "loss": 0.9652681350708008, "step": 81 }, { "epoch": 0.02026192241166296, "grad_norm": 1.0363787988213347, "learning_rate": 4.000000000000001e-06, "loss": 0.8510067462921143, "step": 82 }, { "epoch": 0.020509019026439337, "grad_norm": 1.016111257880361, "learning_rate": 4.049382716049383e-06, "loss": 0.8592743873596191, "step": 83 }, { "epoch": 0.020756115641215715, "grad_norm": 0.9895531008744093, "learning_rate": 4.098765432098766e-06, "loss": 0.7843612432479858, "step": 84 }, { "epoch": 0.021003212255992092, "grad_norm": 1.099306039811947, "learning_rate": 4.1481481481481485e-06, "loss": 0.869019627571106, "step": 85 }, { "epoch": 0.02125030887076847, "grad_norm": 1.0460155862695348, "learning_rate": 4.197530864197531e-06, "loss": 0.8462990522384644, "step": 86 }, { "epoch": 0.021497405485544848, "grad_norm": 0.9354489740531178, "learning_rate": 4.246913580246914e-06, "loss": 0.8183064460754395, "step": 87 }, { "epoch": 0.021744502100321226, "grad_norm": 0.9510579237176574, "learning_rate": 4.296296296296296e-06, "loss": 0.7919366359710693, "step": 88 }, { "epoch": 0.021991598715097604, "grad_norm": 0.9676636416535894, "learning_rate": 4.3456790123456795e-06, "loss": 0.7651770114898682, "step": 89 }, { "epoch": 0.02223869532987398, "grad_norm": 1.0176364835519882, "learning_rate": 4.395061728395062e-06, "loss": 0.7388014793395996, "step": 90 }, { "epoch": 0.02248579194465036, "grad_norm": 1.0233554077762281, "learning_rate": 4.444444444444444e-06, "loss": 0.7859802842140198, "step": 91 }, { "epoch": 0.022732888559426737, "grad_norm": 0.899901941349005, "learning_rate": 4.493827160493827e-06, "loss": 0.7034074068069458, "step": 92 }, { "epoch": 0.022979985174203115, "grad_norm": 0.8754443076474531, "learning_rate": 4.5432098765432105e-06, "loss": 0.6753864288330078, "step": 93 }, { "epoch": 0.02322708178897949, "grad_norm": 0.8598309932690128, "learning_rate": 4.592592592592593e-06, "loss": 0.7619961500167847, "step": 94 }, { "epoch": 0.023474178403755867, "grad_norm": 1.0217196517288047, "learning_rate": 4.641975308641975e-06, "loss": 0.8130659461021423, "step": 95 }, { "epoch": 0.023721275018532245, "grad_norm": 0.7961625645746726, "learning_rate": 4.691358024691358e-06, "loss": 0.6868627071380615, "step": 96 }, { "epoch": 0.023968371633308623, "grad_norm": 0.9531387479399893, "learning_rate": 4.7407407407407415e-06, "loss": 0.7546731233596802, "step": 97 }, { "epoch": 0.024215468248085, "grad_norm": 0.9659398123536449, "learning_rate": 4.790123456790124e-06, "loss": 0.7605322599411011, "step": 98 }, { "epoch": 0.02446256486286138, "grad_norm": 0.8457172122370771, "learning_rate": 4.839506172839506e-06, "loss": 0.6537407636642456, "step": 99 }, { "epoch": 0.024709661477637757, "grad_norm": 1.0139177530820396, "learning_rate": 4.888888888888889e-06, "loss": 0.6869999766349792, "step": 100 }, { "epoch": 0.024956758092414134, "grad_norm": 1.0702122679953225, "learning_rate": 4.938271604938272e-06, "loss": 0.6936236619949341, "step": 101 }, { "epoch": 0.025203854707190512, "grad_norm": 0.8643001526722436, "learning_rate": 4.987654320987655e-06, "loss": 0.6856365203857422, "step": 102 }, { "epoch": 0.02545095132196689, "grad_norm": 0.883024730105403, "learning_rate": 5.037037037037037e-06, "loss": 0.6475402116775513, "step": 103 }, { "epoch": 0.025698047936743268, "grad_norm": 0.8140160394304098, "learning_rate": 5.08641975308642e-06, "loss": 0.6429651975631714, "step": 104 }, { "epoch": 0.025945144551519646, "grad_norm": 0.9179333835754738, "learning_rate": 5.1358024691358035e-06, "loss": 0.7357193231582642, "step": 105 }, { "epoch": 0.02619224116629602, "grad_norm": 0.9575345769714584, "learning_rate": 5.185185185185185e-06, "loss": 0.7301129102706909, "step": 106 }, { "epoch": 0.026439337781072398, "grad_norm": 0.948637988996886, "learning_rate": 5.234567901234568e-06, "loss": 0.6746642589569092, "step": 107 }, { "epoch": 0.026686434395848776, "grad_norm": 0.8370092987000868, "learning_rate": 5.2839506172839505e-06, "loss": 0.6570262908935547, "step": 108 }, { "epoch": 0.026933531010625154, "grad_norm": 0.8839770024472888, "learning_rate": 5.333333333333334e-06, "loss": 0.6510944366455078, "step": 109 }, { "epoch": 0.02718062762540153, "grad_norm": 0.8164008218204178, "learning_rate": 5.382716049382717e-06, "loss": 0.7273664474487305, "step": 110 }, { "epoch": 0.02742772424017791, "grad_norm": 0.8760162462794042, "learning_rate": 5.432098765432099e-06, "loss": 0.7023192048072815, "step": 111 }, { "epoch": 0.027674820854954287, "grad_norm": 0.8773116793852297, "learning_rate": 5.481481481481482e-06, "loss": 0.68325275182724, "step": 112 }, { "epoch": 0.027921917469730665, "grad_norm": 0.9094034630265362, "learning_rate": 5.530864197530864e-06, "loss": 0.6808160543441772, "step": 113 }, { "epoch": 0.028169014084507043, "grad_norm": 0.9954193555377551, "learning_rate": 5.580246913580247e-06, "loss": 0.7337883710861206, "step": 114 }, { "epoch": 0.02841611069928342, "grad_norm": 0.8556962049743794, "learning_rate": 5.62962962962963e-06, "loss": 0.6368393301963806, "step": 115 }, { "epoch": 0.0286632073140598, "grad_norm": 0.9666272287607607, "learning_rate": 5.6790123456790125e-06, "loss": 0.7178615927696228, "step": 116 }, { "epoch": 0.028910303928836176, "grad_norm": 0.937286661732415, "learning_rate": 5.728395061728396e-06, "loss": 0.6408815979957581, "step": 117 }, { "epoch": 0.02915740054361255, "grad_norm": 0.9938365946679664, "learning_rate": 5.777777777777778e-06, "loss": 0.6572318077087402, "step": 118 }, { "epoch": 0.02940449715838893, "grad_norm": 0.9737184490277444, "learning_rate": 5.827160493827161e-06, "loss": 0.5940371751785278, "step": 119 }, { "epoch": 0.029651593773165306, "grad_norm": 0.9763074210349386, "learning_rate": 5.876543209876544e-06, "loss": 0.5854802131652832, "step": 120 }, { "epoch": 0.029898690387941684, "grad_norm": 0.9061513790303929, "learning_rate": 5.925925925925926e-06, "loss": 0.6016863584518433, "step": 121 }, { "epoch": 0.030145787002718062, "grad_norm": 0.8475935458406584, "learning_rate": 5.975308641975309e-06, "loss": 0.6246448159217834, "step": 122 }, { "epoch": 0.03039288361749444, "grad_norm": 0.9226887260223712, "learning_rate": 6.024691358024692e-06, "loss": 0.6973131895065308, "step": 123 }, { "epoch": 0.030639980232270818, "grad_norm": 0.9317823678240809, "learning_rate": 6.0740740740740745e-06, "loss": 0.5797619819641113, "step": 124 }, { "epoch": 0.030887076847047196, "grad_norm": 0.9837402884180654, "learning_rate": 6.123456790123458e-06, "loss": 0.5747758746147156, "step": 125 }, { "epoch": 0.031134173461823574, "grad_norm": 0.9072254076912137, "learning_rate": 6.17283950617284e-06, "loss": 0.6678116321563721, "step": 126 }, { "epoch": 0.03138127007659995, "grad_norm": 0.9044016875781312, "learning_rate": 6.222222222222223e-06, "loss": 0.668877363204956, "step": 127 }, { "epoch": 0.03162836669137633, "grad_norm": 0.8815369300835252, "learning_rate": 6.271604938271606e-06, "loss": 0.6345216035842896, "step": 128 }, { "epoch": 0.03187546330615271, "grad_norm": 0.9059369078547715, "learning_rate": 6.320987654320988e-06, "loss": 0.6855568885803223, "step": 129 }, { "epoch": 0.032122559920929085, "grad_norm": 0.9810734172442992, "learning_rate": 6.370370370370371e-06, "loss": 0.7296284437179565, "step": 130 }, { "epoch": 0.03236965653570546, "grad_norm": 0.8779294761863116, "learning_rate": 6.419753086419753e-06, "loss": 0.6802117824554443, "step": 131 }, { "epoch": 0.03261675315048184, "grad_norm": 0.9525932891738919, "learning_rate": 6.4691358024691365e-06, "loss": 0.6531357765197754, "step": 132 }, { "epoch": 0.03286384976525822, "grad_norm": 0.8722550194614532, "learning_rate": 6.51851851851852e-06, "loss": 0.5698685646057129, "step": 133 }, { "epoch": 0.033110946380034596, "grad_norm": 0.9220801516998056, "learning_rate": 6.567901234567902e-06, "loss": 0.6400830149650574, "step": 134 }, { "epoch": 0.033358042994810974, "grad_norm": 0.9944511685975926, "learning_rate": 6.617283950617285e-06, "loss": 0.5911290645599365, "step": 135 }, { "epoch": 0.03360513960958735, "grad_norm": 1.0186447911016905, "learning_rate": 6.666666666666667e-06, "loss": 0.6523990631103516, "step": 136 }, { "epoch": 0.03385223622436372, "grad_norm": 0.9726853302046142, "learning_rate": 6.71604938271605e-06, "loss": 0.5578332543373108, "step": 137 }, { "epoch": 0.0340993328391401, "grad_norm": 0.8674902481037055, "learning_rate": 6.765432098765433e-06, "loss": 0.6422985792160034, "step": 138 }, { "epoch": 0.03434642945391648, "grad_norm": 0.8899582766611167, "learning_rate": 6.814814814814815e-06, "loss": 0.6542617082595825, "step": 139 }, { "epoch": 0.034593526068692856, "grad_norm": 0.9002119668901902, "learning_rate": 6.8641975308641985e-06, "loss": 0.5770111083984375, "step": 140 }, { "epoch": 0.034840622683469234, "grad_norm": 0.9389168329006045, "learning_rate": 6.913580246913581e-06, "loss": 0.6005039215087891, "step": 141 }, { "epoch": 0.03508771929824561, "grad_norm": 0.9299297315973508, "learning_rate": 6.962962962962964e-06, "loss": 0.6668676137924194, "step": 142 }, { "epoch": 0.03533481591302199, "grad_norm": 0.8748310523205618, "learning_rate": 7.012345679012347e-06, "loss": 0.5114421844482422, "step": 143 }, { "epoch": 0.03558191252779837, "grad_norm": 0.8948846086483841, "learning_rate": 7.061728395061729e-06, "loss": 0.5919623970985413, "step": 144 }, { "epoch": 0.035829009142574746, "grad_norm": 0.951762223832205, "learning_rate": 7.111111111111112e-06, "loss": 0.6001653671264648, "step": 145 }, { "epoch": 0.03607610575735112, "grad_norm": 0.9715866294454967, "learning_rate": 7.160493827160494e-06, "loss": 0.6762633323669434, "step": 146 }, { "epoch": 0.0363232023721275, "grad_norm": 0.9170164173749982, "learning_rate": 7.209876543209877e-06, "loss": 0.643061637878418, "step": 147 }, { "epoch": 0.03657029898690388, "grad_norm": 0.8787751572069907, "learning_rate": 7.2592592592592605e-06, "loss": 0.5902014970779419, "step": 148 }, { "epoch": 0.03681739560168026, "grad_norm": 0.9100584323145736, "learning_rate": 7.308641975308642e-06, "loss": 0.5946463346481323, "step": 149 }, { "epoch": 0.037064492216456635, "grad_norm": 0.9484816420800458, "learning_rate": 7.358024691358025e-06, "loss": 0.6049221158027649, "step": 150 }, { "epoch": 0.03731158883123301, "grad_norm": 0.9838557455793265, "learning_rate": 7.4074074074074075e-06, "loss": 0.6057566404342651, "step": 151 }, { "epoch": 0.03755868544600939, "grad_norm": 0.9758016699343552, "learning_rate": 7.456790123456791e-06, "loss": 0.6963274478912354, "step": 152 }, { "epoch": 0.03780578206078577, "grad_norm": 0.8529906383613668, "learning_rate": 7.506172839506174e-06, "loss": 0.5097123384475708, "step": 153 }, { "epoch": 0.038052878675562146, "grad_norm": 0.930205232611475, "learning_rate": 7.555555555555556e-06, "loss": 0.616525411605835, "step": 154 }, { "epoch": 0.038299975290338524, "grad_norm": 0.9370821762996969, "learning_rate": 7.604938271604939e-06, "loss": 0.6053072810173035, "step": 155 }, { "epoch": 0.0385470719051149, "grad_norm": 0.8862865941122092, "learning_rate": 7.654320987654322e-06, "loss": 0.573745608329773, "step": 156 }, { "epoch": 0.03879416851989128, "grad_norm": 0.9787870087736464, "learning_rate": 7.703703703703704e-06, "loss": 0.5216530561447144, "step": 157 }, { "epoch": 0.03904126513466766, "grad_norm": 0.9262382887623407, "learning_rate": 7.753086419753088e-06, "loss": 0.6163315773010254, "step": 158 }, { "epoch": 0.039288361749444035, "grad_norm": 0.8649790658898739, "learning_rate": 7.80246913580247e-06, "loss": 0.5856317281723022, "step": 159 }, { "epoch": 0.03953545836422041, "grad_norm": 1.0278361521244668, "learning_rate": 7.851851851851853e-06, "loss": 0.591067910194397, "step": 160 }, { "epoch": 0.03978255497899679, "grad_norm": 0.8891857649799672, "learning_rate": 7.901234567901235e-06, "loss": 0.5951709747314453, "step": 161 }, { "epoch": 0.04002965159377316, "grad_norm": 0.944249591753084, "learning_rate": 7.950617283950617e-06, "loss": 0.5870499014854431, "step": 162 }, { "epoch": 0.04027674820854954, "grad_norm": 0.9306238883907105, "learning_rate": 8.000000000000001e-06, "loss": 0.5965971946716309, "step": 163 }, { "epoch": 0.04052384482332592, "grad_norm": 1.0026158460416414, "learning_rate": 8.049382716049384e-06, "loss": 0.6549112796783447, "step": 164 }, { "epoch": 0.040770941438102296, "grad_norm": 1.0272324927328607, "learning_rate": 8.098765432098766e-06, "loss": 0.6974608898162842, "step": 165 }, { "epoch": 0.04101803805287867, "grad_norm": 1.0544576000337995, "learning_rate": 8.148148148148148e-06, "loss": 0.6175243258476257, "step": 166 }, { "epoch": 0.04126513466765505, "grad_norm": 0.9617025678672692, "learning_rate": 8.197530864197532e-06, "loss": 0.5323407649993896, "step": 167 }, { "epoch": 0.04151223128243143, "grad_norm": 0.9051929417009832, "learning_rate": 8.246913580246915e-06, "loss": 0.6238798499107361, "step": 168 }, { "epoch": 0.04175932789720781, "grad_norm": 0.8810879322484243, "learning_rate": 8.296296296296297e-06, "loss": 0.5835152864456177, "step": 169 }, { "epoch": 0.042006424511984185, "grad_norm": 0.8757579685136776, "learning_rate": 8.34567901234568e-06, "loss": 0.5789281725883484, "step": 170 }, { "epoch": 0.04225352112676056, "grad_norm": 0.8706365717354804, "learning_rate": 8.395061728395062e-06, "loss": 0.5749459266662598, "step": 171 }, { "epoch": 0.04250061774153694, "grad_norm": 0.9153118124639958, "learning_rate": 8.444444444444446e-06, "loss": 0.6430952548980713, "step": 172 }, { "epoch": 0.04274771435631332, "grad_norm": 0.9735834071728342, "learning_rate": 8.493827160493828e-06, "loss": 0.5635827779769897, "step": 173 }, { "epoch": 0.042994810971089696, "grad_norm": 0.9478206343795988, "learning_rate": 8.54320987654321e-06, "loss": 0.6374496221542358, "step": 174 }, { "epoch": 0.043241907585866074, "grad_norm": 1.051671808868491, "learning_rate": 8.592592592592593e-06, "loss": 0.6350709199905396, "step": 175 }, { "epoch": 0.04348900420064245, "grad_norm": 0.9251991009669414, "learning_rate": 8.641975308641975e-06, "loss": 0.6192104816436768, "step": 176 }, { "epoch": 0.04373610081541883, "grad_norm": 1.0170492586901672, "learning_rate": 8.691358024691359e-06, "loss": 0.5435853004455566, "step": 177 }, { "epoch": 0.04398319743019521, "grad_norm": 0.9402932385536863, "learning_rate": 8.740740740740741e-06, "loss": 0.5739273428916931, "step": 178 }, { "epoch": 0.044230294044971585, "grad_norm": 0.9634067817447504, "learning_rate": 8.790123456790124e-06, "loss": 0.6093436479568481, "step": 179 }, { "epoch": 0.04447739065974796, "grad_norm": 0.9818459631993479, "learning_rate": 8.839506172839508e-06, "loss": 0.6480109691619873, "step": 180 }, { "epoch": 0.04472448727452434, "grad_norm": 1.0643732236594015, "learning_rate": 8.888888888888888e-06, "loss": 0.6183211207389832, "step": 181 }, { "epoch": 0.04497158388930072, "grad_norm": 0.9154357714919052, "learning_rate": 8.938271604938272e-06, "loss": 0.5551972389221191, "step": 182 }, { "epoch": 0.0452186805040771, "grad_norm": 1.0534106480543919, "learning_rate": 8.987654320987655e-06, "loss": 0.57023686170578, "step": 183 }, { "epoch": 0.045465777118853475, "grad_norm": 0.9508986705035122, "learning_rate": 9.037037037037037e-06, "loss": 0.5303903222084045, "step": 184 }, { "epoch": 0.04571287373362985, "grad_norm": 0.9873535113979167, "learning_rate": 9.086419753086421e-06, "loss": 0.5358332991600037, "step": 185 }, { "epoch": 0.04595997034840623, "grad_norm": 0.9205423434725697, "learning_rate": 9.135802469135803e-06, "loss": 0.5776067972183228, "step": 186 }, { "epoch": 0.0462070669631826, "grad_norm": 1.039491965642533, "learning_rate": 9.185185185185186e-06, "loss": 0.5497357845306396, "step": 187 }, { "epoch": 0.04645416357795898, "grad_norm": 0.8859213588270116, "learning_rate": 9.23456790123457e-06, "loss": 0.5311161875724792, "step": 188 }, { "epoch": 0.04670126019273536, "grad_norm": 1.0873006746677174, "learning_rate": 9.28395061728395e-06, "loss": 0.6142665147781372, "step": 189 }, { "epoch": 0.046948356807511735, "grad_norm": 1.080768646778126, "learning_rate": 9.333333333333334e-06, "loss": 0.5745618343353271, "step": 190 }, { "epoch": 0.04719545342228811, "grad_norm": 0.9900333312404502, "learning_rate": 9.382716049382717e-06, "loss": 0.6353927850723267, "step": 191 }, { "epoch": 0.04744255003706449, "grad_norm": 0.9223938732878414, "learning_rate": 9.432098765432099e-06, "loss": 0.5660842657089233, "step": 192 }, { "epoch": 0.04768964665184087, "grad_norm": 0.9747088599771772, "learning_rate": 9.481481481481483e-06, "loss": 0.5560893416404724, "step": 193 }, { "epoch": 0.047936743266617246, "grad_norm": 0.903787279973877, "learning_rate": 9.530864197530865e-06, "loss": 0.5770151615142822, "step": 194 }, { "epoch": 0.048183839881393624, "grad_norm": 0.8867875652081866, "learning_rate": 9.580246913580248e-06, "loss": 0.5216684341430664, "step": 195 }, { "epoch": 0.04843093649617, "grad_norm": 1.0660995507418705, "learning_rate": 9.62962962962963e-06, "loss": 0.5383985042572021, "step": 196 }, { "epoch": 0.04867803311094638, "grad_norm": 0.8868323185754534, "learning_rate": 9.679012345679012e-06, "loss": 0.5051260590553284, "step": 197 }, { "epoch": 0.04892512972572276, "grad_norm": 0.9906136873632133, "learning_rate": 9.728395061728396e-06, "loss": 0.5884324312210083, "step": 198 }, { "epoch": 0.049172226340499135, "grad_norm": 0.9671100302183704, "learning_rate": 9.777777777777779e-06, "loss": 0.5635128021240234, "step": 199 }, { "epoch": 0.04941932295527551, "grad_norm": 0.9541070208342587, "learning_rate": 9.827160493827161e-06, "loss": 0.612021803855896, "step": 200 }, { "epoch": 0.04966641957005189, "grad_norm": 0.9715087163599602, "learning_rate": 9.876543209876543e-06, "loss": 0.6051968336105347, "step": 201 }, { "epoch": 0.04991351618482827, "grad_norm": 0.9589177901393033, "learning_rate": 9.925925925925927e-06, "loss": 0.5375806093215942, "step": 202 }, { "epoch": 0.05016061279960465, "grad_norm": 0.9217957381158395, "learning_rate": 9.97530864197531e-06, "loss": 0.5795732140541077, "step": 203 }, { "epoch": 0.050407709414381024, "grad_norm": 1.0008718389389264, "learning_rate": 1.0024691358024692e-05, "loss": 0.5463623404502869, "step": 204 }, { "epoch": 0.0506548060291574, "grad_norm": 1.0803113496776309, "learning_rate": 1.0074074074074074e-05, "loss": 0.6643832325935364, "step": 205 }, { "epoch": 0.05090190264393378, "grad_norm": 1.02872489694968, "learning_rate": 1.0123456790123458e-05, "loss": 0.6277819871902466, "step": 206 }, { "epoch": 0.05114899925871016, "grad_norm": 1.0617374348033735, "learning_rate": 1.017283950617284e-05, "loss": 0.5468822717666626, "step": 207 }, { "epoch": 0.051396095873486536, "grad_norm": 1.0401306154814145, "learning_rate": 1.0222222222222223e-05, "loss": 0.5603351593017578, "step": 208 }, { "epoch": 0.051643192488262914, "grad_norm": 0.9431348135800443, "learning_rate": 1.0271604938271607e-05, "loss": 0.6369042992591858, "step": 209 }, { "epoch": 0.05189028910303929, "grad_norm": 0.9777001151349541, "learning_rate": 1.032098765432099e-05, "loss": 0.5154774188995361, "step": 210 }, { "epoch": 0.05213738571781567, "grad_norm": 1.1391741593147116, "learning_rate": 1.037037037037037e-05, "loss": 0.6575545072555542, "step": 211 }, { "epoch": 0.05238448233259204, "grad_norm": 1.0702258069769526, "learning_rate": 1.0419753086419756e-05, "loss": 0.5928763151168823, "step": 212 }, { "epoch": 0.05263157894736842, "grad_norm": 1.0053181564103184, "learning_rate": 1.0469135802469136e-05, "loss": 0.5870167016983032, "step": 213 }, { "epoch": 0.052878675562144796, "grad_norm": 1.0002195600317372, "learning_rate": 1.0518518518518519e-05, "loss": 0.5895674228668213, "step": 214 }, { "epoch": 0.053125772176921174, "grad_norm": 1.0136104532852512, "learning_rate": 1.0567901234567901e-05, "loss": 0.6909312009811401, "step": 215 }, { "epoch": 0.05337286879169755, "grad_norm": 0.930335375026708, "learning_rate": 1.0617283950617285e-05, "loss": 0.5051767230033875, "step": 216 }, { "epoch": 0.05361996540647393, "grad_norm": 1.074946416681427, "learning_rate": 1.0666666666666667e-05, "loss": 0.5044318437576294, "step": 217 }, { "epoch": 0.05386706202125031, "grad_norm": 0.9262209042458449, "learning_rate": 1.071604938271605e-05, "loss": 0.5659178495407104, "step": 218 }, { "epoch": 0.054114158636026685, "grad_norm": 1.0581438282974387, "learning_rate": 1.0765432098765434e-05, "loss": 0.567792534828186, "step": 219 }, { "epoch": 0.05436125525080306, "grad_norm": 0.9347709643280552, "learning_rate": 1.0814814814814816e-05, "loss": 0.45745712518692017, "step": 220 }, { "epoch": 0.05460835186557944, "grad_norm": 0.9948157558551322, "learning_rate": 1.0864197530864198e-05, "loss": 0.5178786516189575, "step": 221 }, { "epoch": 0.05485544848035582, "grad_norm": 1.0000416797825837, "learning_rate": 1.0913580246913582e-05, "loss": 0.6133204698562622, "step": 222 }, { "epoch": 0.0551025450951322, "grad_norm": 0.9814640902879799, "learning_rate": 1.0962962962962965e-05, "loss": 0.5544575452804565, "step": 223 }, { "epoch": 0.055349641709908574, "grad_norm": 0.9763357475667112, "learning_rate": 1.1012345679012347e-05, "loss": 0.5373265147209167, "step": 224 }, { "epoch": 0.05559673832468495, "grad_norm": 0.9045066775223725, "learning_rate": 1.1061728395061728e-05, "loss": 0.5613001585006714, "step": 225 }, { "epoch": 0.05584383493946133, "grad_norm": 1.253187910186798, "learning_rate": 1.1111111111111113e-05, "loss": 0.7091293931007385, "step": 226 }, { "epoch": 0.05609093155423771, "grad_norm": 1.005835016175944, "learning_rate": 1.1160493827160494e-05, "loss": 0.5629034042358398, "step": 227 }, { "epoch": 0.056338028169014086, "grad_norm": 0.9784222615863195, "learning_rate": 1.1209876543209876e-05, "loss": 0.4538424015045166, "step": 228 }, { "epoch": 0.056585124783790464, "grad_norm": 1.0582669282231736, "learning_rate": 1.125925925925926e-05, "loss": 0.6091480255126953, "step": 229 }, { "epoch": 0.05683222139856684, "grad_norm": 1.0231703419654132, "learning_rate": 1.1308641975308643e-05, "loss": 0.607703685760498, "step": 230 }, { "epoch": 0.05707931801334322, "grad_norm": 0.9946794048146541, "learning_rate": 1.1358024691358025e-05, "loss": 0.5426801443099976, "step": 231 }, { "epoch": 0.0573264146281196, "grad_norm": 1.0192010650395187, "learning_rate": 1.1407407407407409e-05, "loss": 0.5612596273422241, "step": 232 }, { "epoch": 0.057573511242895975, "grad_norm": 1.0658552905913024, "learning_rate": 1.1456790123456791e-05, "loss": 0.5382722616195679, "step": 233 }, { "epoch": 0.05782060785767235, "grad_norm": 1.0993345423433305, "learning_rate": 1.1506172839506174e-05, "loss": 0.4816238284111023, "step": 234 }, { "epoch": 0.05806770447244873, "grad_norm": 0.9599279535240665, "learning_rate": 1.1555555555555556e-05, "loss": 0.5848016738891602, "step": 235 }, { "epoch": 0.0583148010872251, "grad_norm": 1.0877603292417624, "learning_rate": 1.160493827160494e-05, "loss": 0.5480165481567383, "step": 236 }, { "epoch": 0.05856189770200148, "grad_norm": 0.9928421761756023, "learning_rate": 1.1654320987654322e-05, "loss": 0.4714718759059906, "step": 237 }, { "epoch": 0.05880899431677786, "grad_norm": 1.0223142156775624, "learning_rate": 1.1703703703703703e-05, "loss": 0.5909614562988281, "step": 238 }, { "epoch": 0.059056090931554235, "grad_norm": 1.1342045400983893, "learning_rate": 1.1753086419753089e-05, "loss": 0.6149535179138184, "step": 239 }, { "epoch": 0.05930318754633061, "grad_norm": 1.0933578881416777, "learning_rate": 1.180246913580247e-05, "loss": 0.48445355892181396, "step": 240 }, { "epoch": 0.05955028416110699, "grad_norm": 1.060190418883379, "learning_rate": 1.1851851851851852e-05, "loss": 0.6130196452140808, "step": 241 }, { "epoch": 0.05979738077588337, "grad_norm": 1.0189282282743901, "learning_rate": 1.1901234567901236e-05, "loss": 0.5338863730430603, "step": 242 }, { "epoch": 0.060044477390659746, "grad_norm": 1.0364381258312625, "learning_rate": 1.1950617283950618e-05, "loss": 0.5970376133918762, "step": 243 }, { "epoch": 0.060291574005436124, "grad_norm": 0.9665628227255916, "learning_rate": 1.2e-05, "loss": 0.48992547392845154, "step": 244 }, { "epoch": 0.0605386706202125, "grad_norm": 0.9313420188164415, "learning_rate": 1.2049382716049384e-05, "loss": 0.487262487411499, "step": 245 }, { "epoch": 0.06078576723498888, "grad_norm": 1.0333481600499863, "learning_rate": 1.2098765432098767e-05, "loss": 0.4913530647754669, "step": 246 }, { "epoch": 0.06103286384976526, "grad_norm": 1.0554368254056516, "learning_rate": 1.2148148148148149e-05, "loss": 0.5688247680664062, "step": 247 }, { "epoch": 0.061279960464541636, "grad_norm": 1.1054683346253, "learning_rate": 1.2197530864197531e-05, "loss": 0.5455566644668579, "step": 248 }, { "epoch": 0.061527057079318014, "grad_norm": 1.0608907377111594, "learning_rate": 1.2246913580246915e-05, "loss": 0.6471099853515625, "step": 249 }, { "epoch": 0.06177415369409439, "grad_norm": 1.1133218669140246, "learning_rate": 1.2296296296296298e-05, "loss": 0.5842370986938477, "step": 250 }, { "epoch": 0.06202125030887077, "grad_norm": 1.0927657062010299, "learning_rate": 1.234567901234568e-05, "loss": 0.5827319025993347, "step": 251 }, { "epoch": 0.06226834692364715, "grad_norm": 1.0556666416988971, "learning_rate": 1.2395061728395064e-05, "loss": 0.5209188461303711, "step": 252 }, { "epoch": 0.06251544353842352, "grad_norm": 0.9820177667353921, "learning_rate": 1.2444444444444446e-05, "loss": 0.5262765884399414, "step": 253 }, { "epoch": 0.0627625401531999, "grad_norm": 1.141986515147913, "learning_rate": 1.2493827160493827e-05, "loss": 0.5364784002304077, "step": 254 }, { "epoch": 0.06300963676797627, "grad_norm": 1.1063985062419912, "learning_rate": 1.2543209876543213e-05, "loss": 0.6207551956176758, "step": 255 }, { "epoch": 0.06325673338275266, "grad_norm": 0.9796631424671087, "learning_rate": 1.2592592592592593e-05, "loss": 0.5617996454238892, "step": 256 }, { "epoch": 0.06350382999752903, "grad_norm": 1.1784347346569521, "learning_rate": 1.2641975308641976e-05, "loss": 0.4974507689476013, "step": 257 }, { "epoch": 0.06375092661230541, "grad_norm": 0.9496121168920871, "learning_rate": 1.2691358024691358e-05, "loss": 0.5331505537033081, "step": 258 }, { "epoch": 0.06399802322708179, "grad_norm": 1.0053968480172677, "learning_rate": 1.2740740740740742e-05, "loss": 0.5849424600601196, "step": 259 }, { "epoch": 0.06424511984185817, "grad_norm": 1.1592196476536512, "learning_rate": 1.2790123456790124e-05, "loss": 0.6232506036758423, "step": 260 }, { "epoch": 0.06449221645663454, "grad_norm": 0.9072633692999097, "learning_rate": 1.2839506172839507e-05, "loss": 0.5269280672073364, "step": 261 }, { "epoch": 0.06473931307141093, "grad_norm": 1.064613530292812, "learning_rate": 1.288888888888889e-05, "loss": 0.5982104539871216, "step": 262 }, { "epoch": 0.0649864096861873, "grad_norm": 1.0091140897580826, "learning_rate": 1.2938271604938273e-05, "loss": 0.544775128364563, "step": 263 }, { "epoch": 0.06523350630096368, "grad_norm": 1.1614945625082864, "learning_rate": 1.2987654320987655e-05, "loss": 0.5395841002464294, "step": 264 }, { "epoch": 0.06548060291574005, "grad_norm": 0.9866358859643434, "learning_rate": 1.303703703703704e-05, "loss": 0.5090043544769287, "step": 265 }, { "epoch": 0.06572769953051644, "grad_norm": 1.061923424881116, "learning_rate": 1.3086419753086422e-05, "loss": 0.5056370496749878, "step": 266 }, { "epoch": 0.06597479614529281, "grad_norm": 1.0422310967332822, "learning_rate": 1.3135802469135804e-05, "loss": 0.49812713265419006, "step": 267 }, { "epoch": 0.06622189276006919, "grad_norm": 1.0920899256630412, "learning_rate": 1.3185185185185185e-05, "loss": 0.550094723701477, "step": 268 }, { "epoch": 0.06646898937484556, "grad_norm": 0.9814940644089625, "learning_rate": 1.323456790123457e-05, "loss": 0.4708777070045471, "step": 269 }, { "epoch": 0.06671608598962195, "grad_norm": 1.060845804160528, "learning_rate": 1.3283950617283951e-05, "loss": 0.5835709571838379, "step": 270 }, { "epoch": 0.06696318260439832, "grad_norm": 0.9577018489188089, "learning_rate": 1.3333333333333333e-05, "loss": 0.5495673418045044, "step": 271 }, { "epoch": 0.0672102792191747, "grad_norm": 1.0833927215848396, "learning_rate": 1.3382716049382717e-05, "loss": 0.5394706130027771, "step": 272 }, { "epoch": 0.06745737583395107, "grad_norm": 1.1776814732536218, "learning_rate": 1.34320987654321e-05, "loss": 0.5697737336158752, "step": 273 }, { "epoch": 0.06770447244872745, "grad_norm": 1.2056399015024164, "learning_rate": 1.3481481481481482e-05, "loss": 0.5370118021965027, "step": 274 }, { "epoch": 0.06795156906350383, "grad_norm": 1.2432097048718378, "learning_rate": 1.3530864197530866e-05, "loss": 0.5425596237182617, "step": 275 }, { "epoch": 0.0681986656782802, "grad_norm": 1.195631347789536, "learning_rate": 1.3580246913580248e-05, "loss": 0.594305694103241, "step": 276 }, { "epoch": 0.06844576229305659, "grad_norm": 0.925076000917221, "learning_rate": 1.362962962962963e-05, "loss": 0.5343754291534424, "step": 277 }, { "epoch": 0.06869285890783296, "grad_norm": 1.0325181411887239, "learning_rate": 1.3679012345679013e-05, "loss": 0.5809817910194397, "step": 278 }, { "epoch": 0.06893995552260934, "grad_norm": 1.1666532642949279, "learning_rate": 1.3728395061728397e-05, "loss": 0.5979821681976318, "step": 279 }, { "epoch": 0.06918705213738571, "grad_norm": 0.9711323196431999, "learning_rate": 1.377777777777778e-05, "loss": 0.49529585242271423, "step": 280 }, { "epoch": 0.0694341487521621, "grad_norm": 1.0167852333138874, "learning_rate": 1.3827160493827162e-05, "loss": 0.5383665561676025, "step": 281 }, { "epoch": 0.06968124536693847, "grad_norm": 1.0985131454082944, "learning_rate": 1.3876543209876546e-05, "loss": 0.530430793762207, "step": 282 }, { "epoch": 0.06992834198171485, "grad_norm": 1.06361975829084, "learning_rate": 1.3925925925925928e-05, "loss": 0.4958809018135071, "step": 283 }, { "epoch": 0.07017543859649122, "grad_norm": 1.0930178387647074, "learning_rate": 1.3975308641975309e-05, "loss": 0.5688295364379883, "step": 284 }, { "epoch": 0.07042253521126761, "grad_norm": 1.0864887928498075, "learning_rate": 1.4024691358024694e-05, "loss": 0.5363324284553528, "step": 285 }, { "epoch": 0.07066963182604398, "grad_norm": 1.2774794479165792, "learning_rate": 1.4074074074074075e-05, "loss": 0.5861618518829346, "step": 286 }, { "epoch": 0.07091672844082036, "grad_norm": 0.8932967396698341, "learning_rate": 1.4123456790123457e-05, "loss": 0.5277290344238281, "step": 287 }, { "epoch": 0.07116382505559674, "grad_norm": 0.9751522798466952, "learning_rate": 1.417283950617284e-05, "loss": 0.5501270294189453, "step": 288 }, { "epoch": 0.07141092167037312, "grad_norm": 1.0116892172892722, "learning_rate": 1.4222222222222224e-05, "loss": 0.5959082841873169, "step": 289 }, { "epoch": 0.07165801828514949, "grad_norm": 1.101464653953447, "learning_rate": 1.4271604938271606e-05, "loss": 0.5755316019058228, "step": 290 }, { "epoch": 0.07190511489992588, "grad_norm": 0.9055879586120578, "learning_rate": 1.4320987654320988e-05, "loss": 0.4982122778892517, "step": 291 }, { "epoch": 0.07215221151470225, "grad_norm": 1.039890562405945, "learning_rate": 1.4370370370370372e-05, "loss": 0.5149146318435669, "step": 292 }, { "epoch": 0.07239930812947863, "grad_norm": 0.9242666097612203, "learning_rate": 1.4419753086419755e-05, "loss": 0.5075903534889221, "step": 293 }, { "epoch": 0.072646404744255, "grad_norm": 1.0079993681008785, "learning_rate": 1.4469135802469137e-05, "loss": 0.536419153213501, "step": 294 }, { "epoch": 0.07289350135903139, "grad_norm": 1.0532592990939078, "learning_rate": 1.4518518518518521e-05, "loss": 0.48714208602905273, "step": 295 }, { "epoch": 0.07314059797380776, "grad_norm": 1.0639316183096355, "learning_rate": 1.4567901234567903e-05, "loss": 0.5458095669746399, "step": 296 }, { "epoch": 0.07338769458858414, "grad_norm": 1.00084579443787, "learning_rate": 1.4617283950617284e-05, "loss": 0.5439126491546631, "step": 297 }, { "epoch": 0.07363479120336051, "grad_norm": 0.9335668345820834, "learning_rate": 1.4666666666666666e-05, "loss": 0.5003355741500854, "step": 298 }, { "epoch": 0.07388188781813688, "grad_norm": 1.126929340893151, "learning_rate": 1.471604938271605e-05, "loss": 0.5035877823829651, "step": 299 }, { "epoch": 0.07412898443291327, "grad_norm": 1.1613336370842076, "learning_rate": 1.4765432098765433e-05, "loss": 0.5500510931015015, "step": 300 }, { "epoch": 0.07437608104768964, "grad_norm": 1.0657901165849089, "learning_rate": 1.4814814814814815e-05, "loss": 0.5283631086349487, "step": 301 }, { "epoch": 0.07462317766246603, "grad_norm": 1.1007876256163636, "learning_rate": 1.4864197530864199e-05, "loss": 0.5242631435394287, "step": 302 }, { "epoch": 0.0748702742772424, "grad_norm": 1.1589539240786184, "learning_rate": 1.4913580246913581e-05, "loss": 0.5547994375228882, "step": 303 }, { "epoch": 0.07511737089201878, "grad_norm": 1.2018106767773566, "learning_rate": 1.4962962962962964e-05, "loss": 0.5425190925598145, "step": 304 }, { "epoch": 0.07536446750679515, "grad_norm": 1.0957538962947484, "learning_rate": 1.5012345679012348e-05, "loss": 0.5694150328636169, "step": 305 }, { "epoch": 0.07561156412157154, "grad_norm": 0.9988882438530783, "learning_rate": 1.506172839506173e-05, "loss": 0.5454326868057251, "step": 306 }, { "epoch": 0.07585866073634791, "grad_norm": 1.0693492077010474, "learning_rate": 1.5111111111111112e-05, "loss": 0.48871302604675293, "step": 307 }, { "epoch": 0.07610575735112429, "grad_norm": 1.2164559930457106, "learning_rate": 1.5160493827160495e-05, "loss": 0.5476065278053284, "step": 308 }, { "epoch": 0.07635285396590066, "grad_norm": 0.9997266616813411, "learning_rate": 1.5209876543209879e-05, "loss": 0.4554753303527832, "step": 309 }, { "epoch": 0.07659995058067705, "grad_norm": 1.1450224530676312, "learning_rate": 1.525925925925926e-05, "loss": 0.5501112341880798, "step": 310 }, { "epoch": 0.07684704719545342, "grad_norm": 0.9482967475014519, "learning_rate": 1.5308641975308643e-05, "loss": 0.5474594831466675, "step": 311 }, { "epoch": 0.0770941438102298, "grad_norm": 1.0934495692469457, "learning_rate": 1.5358024691358026e-05, "loss": 0.5288143157958984, "step": 312 }, { "epoch": 0.07734124042500617, "grad_norm": 1.0806113834625313, "learning_rate": 1.5407407407407408e-05, "loss": 0.5477120280265808, "step": 313 }, { "epoch": 0.07758833703978256, "grad_norm": 0.9614700559787005, "learning_rate": 1.545679012345679e-05, "loss": 0.5099549293518066, "step": 314 }, { "epoch": 0.07783543365455893, "grad_norm": 1.0857329607208566, "learning_rate": 1.5506172839506176e-05, "loss": 0.5259889960289001, "step": 315 }, { "epoch": 0.07808253026933532, "grad_norm": 1.0919610812602087, "learning_rate": 1.555555555555556e-05, "loss": 0.5508008003234863, "step": 316 }, { "epoch": 0.07832962688411169, "grad_norm": 0.8916809695032462, "learning_rate": 1.560493827160494e-05, "loss": 0.5371847152709961, "step": 317 }, { "epoch": 0.07857672349888807, "grad_norm": 1.039778531013135, "learning_rate": 1.565432098765432e-05, "loss": 0.5109792947769165, "step": 318 }, { "epoch": 0.07882382011366444, "grad_norm": 1.0301842742490757, "learning_rate": 1.5703703703703705e-05, "loss": 0.4768526256084442, "step": 319 }, { "epoch": 0.07907091672844083, "grad_norm": 1.0412786642128746, "learning_rate": 1.5753086419753088e-05, "loss": 0.5516396760940552, "step": 320 }, { "epoch": 0.0793180133432172, "grad_norm": 1.0990429330215108, "learning_rate": 1.580246913580247e-05, "loss": 0.5379005074501038, "step": 321 }, { "epoch": 0.07956510995799358, "grad_norm": 1.0843476927514255, "learning_rate": 1.5851851851851852e-05, "loss": 0.539249062538147, "step": 322 }, { "epoch": 0.07981220657276995, "grad_norm": 1.0534985292381318, "learning_rate": 1.5901234567901235e-05, "loss": 0.4889170527458191, "step": 323 }, { "epoch": 0.08005930318754632, "grad_norm": 1.0046204322384473, "learning_rate": 1.5950617283950617e-05, "loss": 0.5275892615318298, "step": 324 }, { "epoch": 0.08030639980232271, "grad_norm": 0.9832821358203281, "learning_rate": 1.6000000000000003e-05, "loss": 0.5069926977157593, "step": 325 }, { "epoch": 0.08055349641709908, "grad_norm": 1.0195152252853281, "learning_rate": 1.6049382716049385e-05, "loss": 0.5306823253631592, "step": 326 }, { "epoch": 0.08080059303187546, "grad_norm": 1.1615566330727145, "learning_rate": 1.6098765432098767e-05, "loss": 0.5229095816612244, "step": 327 }, { "epoch": 0.08104768964665184, "grad_norm": 0.9795093972479023, "learning_rate": 1.614814814814815e-05, "loss": 0.5310205221176147, "step": 328 }, { "epoch": 0.08129478626142822, "grad_norm": 1.215452270430382, "learning_rate": 1.6197530864197532e-05, "loss": 0.5061969757080078, "step": 329 }, { "epoch": 0.08154188287620459, "grad_norm": 1.0630385097468673, "learning_rate": 1.6246913580246914e-05, "loss": 0.5064999461174011, "step": 330 }, { "epoch": 0.08178897949098098, "grad_norm": 1.0610828855729162, "learning_rate": 1.6296296296296297e-05, "loss": 0.5022560358047485, "step": 331 }, { "epoch": 0.08203607610575735, "grad_norm": 1.2713503564535336, "learning_rate": 1.6345679012345682e-05, "loss": 0.5792784094810486, "step": 332 }, { "epoch": 0.08228317272053373, "grad_norm": 0.9829790136776136, "learning_rate": 1.6395061728395065e-05, "loss": 0.5531458854675293, "step": 333 }, { "epoch": 0.0825302693353101, "grad_norm": 1.433554213171085, "learning_rate": 1.6444444444444444e-05, "loss": 0.5679094791412354, "step": 334 }, { "epoch": 0.08277736595008649, "grad_norm": 1.0888828925897693, "learning_rate": 1.649382716049383e-05, "loss": 0.5267306566238403, "step": 335 }, { "epoch": 0.08302446256486286, "grad_norm": 1.0649927012128755, "learning_rate": 1.654320987654321e-05, "loss": 0.5345470905303955, "step": 336 }, { "epoch": 0.08327155917963924, "grad_norm": 1.2178086229863971, "learning_rate": 1.6592592592592594e-05, "loss": 0.5519043207168579, "step": 337 }, { "epoch": 0.08351865579441561, "grad_norm": 1.0658478902321726, "learning_rate": 1.6641975308641976e-05, "loss": 0.5037249326705933, "step": 338 }, { "epoch": 0.083765752409192, "grad_norm": 1.0203057482597888, "learning_rate": 1.669135802469136e-05, "loss": 0.4947260618209839, "step": 339 }, { "epoch": 0.08401284902396837, "grad_norm": 1.0352753549253584, "learning_rate": 1.674074074074074e-05, "loss": 0.5189676284790039, "step": 340 }, { "epoch": 0.08425994563874475, "grad_norm": 1.0343886684524963, "learning_rate": 1.6790123456790123e-05, "loss": 0.5149326324462891, "step": 341 }, { "epoch": 0.08450704225352113, "grad_norm": 0.9954734206413627, "learning_rate": 1.683950617283951e-05, "loss": 0.4647838771343231, "step": 342 }, { "epoch": 0.08475413886829751, "grad_norm": 1.0116387635564563, "learning_rate": 1.688888888888889e-05, "loss": 0.47919929027557373, "step": 343 }, { "epoch": 0.08500123548307388, "grad_norm": 1.081164688155172, "learning_rate": 1.6938271604938274e-05, "loss": 0.4911682903766632, "step": 344 }, { "epoch": 0.08524833209785027, "grad_norm": 1.0279880819016252, "learning_rate": 1.6987654320987656e-05, "loss": 0.5485577583312988, "step": 345 }, { "epoch": 0.08549542871262664, "grad_norm": 1.1293038364581547, "learning_rate": 1.7037037037037038e-05, "loss": 0.5524568557739258, "step": 346 }, { "epoch": 0.08574252532740302, "grad_norm": 1.1021668308727768, "learning_rate": 1.708641975308642e-05, "loss": 0.5272347927093506, "step": 347 }, { "epoch": 0.08598962194217939, "grad_norm": 1.015092742356707, "learning_rate": 1.7135802469135806e-05, "loss": 0.4862084984779358, "step": 348 }, { "epoch": 0.08623671855695576, "grad_norm": 1.0485124139703235, "learning_rate": 1.7185185185185185e-05, "loss": 0.555648922920227, "step": 349 }, { "epoch": 0.08648381517173215, "grad_norm": 1.0557084291045822, "learning_rate": 1.7234567901234568e-05, "loss": 0.4812682867050171, "step": 350 }, { "epoch": 0.08673091178650852, "grad_norm": 1.1419212508364274, "learning_rate": 1.728395061728395e-05, "loss": 0.5421712398529053, "step": 351 }, { "epoch": 0.0869780084012849, "grad_norm": 1.0956297285311984, "learning_rate": 1.7333333333333336e-05, "loss": 0.520918607711792, "step": 352 }, { "epoch": 0.08722510501606127, "grad_norm": 1.0346531746460406, "learning_rate": 1.7382716049382718e-05, "loss": 0.5074133276939392, "step": 353 }, { "epoch": 0.08747220163083766, "grad_norm": 1.244324728805935, "learning_rate": 1.74320987654321e-05, "loss": 0.5179446935653687, "step": 354 }, { "epoch": 0.08771929824561403, "grad_norm": 0.9874185588404132, "learning_rate": 1.7481481481481483e-05, "loss": 0.5183665156364441, "step": 355 }, { "epoch": 0.08796639486039042, "grad_norm": 1.0421588156970327, "learning_rate": 1.7530864197530865e-05, "loss": 0.5478167533874512, "step": 356 }, { "epoch": 0.08821349147516679, "grad_norm": 1.2391635991909453, "learning_rate": 1.7580246913580247e-05, "loss": 0.5758296251296997, "step": 357 }, { "epoch": 0.08846058808994317, "grad_norm": 1.033445101897872, "learning_rate": 1.7629629629629633e-05, "loss": 0.4267658591270447, "step": 358 }, { "epoch": 0.08870768470471954, "grad_norm": 1.0725378989437746, "learning_rate": 1.7679012345679015e-05, "loss": 0.4927263855934143, "step": 359 }, { "epoch": 0.08895478131949593, "grad_norm": 1.1099537635266747, "learning_rate": 1.7728395061728398e-05, "loss": 0.5328878164291382, "step": 360 }, { "epoch": 0.0892018779342723, "grad_norm": 1.113846511340859, "learning_rate": 1.7777777777777777e-05, "loss": 0.4401858448982239, "step": 361 }, { "epoch": 0.08944897454904868, "grad_norm": 1.3155797739029114, "learning_rate": 1.7827160493827162e-05, "loss": 0.4649726152420044, "step": 362 }, { "epoch": 0.08969607116382505, "grad_norm": 1.0535274149594813, "learning_rate": 1.7876543209876545e-05, "loss": 0.49358803033828735, "step": 363 }, { "epoch": 0.08994316777860144, "grad_norm": 1.0533034442420026, "learning_rate": 1.7925925925925927e-05, "loss": 0.4953600764274597, "step": 364 }, { "epoch": 0.09019026439337781, "grad_norm": 1.177959265098368, "learning_rate": 1.797530864197531e-05, "loss": 0.5177078247070312, "step": 365 }, { "epoch": 0.0904373610081542, "grad_norm": 1.1956812117445408, "learning_rate": 1.802469135802469e-05, "loss": 0.5127096772193909, "step": 366 }, { "epoch": 0.09068445762293056, "grad_norm": 1.069754108641862, "learning_rate": 1.8074074074074074e-05, "loss": 0.5269686579704285, "step": 367 }, { "epoch": 0.09093155423770695, "grad_norm": 1.2367748425097809, "learning_rate": 1.812345679012346e-05, "loss": 0.49229711294174194, "step": 368 }, { "epoch": 0.09117865085248332, "grad_norm": 1.1206875167584736, "learning_rate": 1.8172839506172842e-05, "loss": 0.5404107570648193, "step": 369 }, { "epoch": 0.0914257474672597, "grad_norm": 1.137156090637887, "learning_rate": 1.8222222222222224e-05, "loss": 0.5653643012046814, "step": 370 }, { "epoch": 0.09167284408203608, "grad_norm": 1.0098380469595098, "learning_rate": 1.8271604938271607e-05, "loss": 0.5045976638793945, "step": 371 }, { "epoch": 0.09191994069681246, "grad_norm": 1.1215585141449251, "learning_rate": 1.832098765432099e-05, "loss": 0.5433730483055115, "step": 372 }, { "epoch": 0.09216703731158883, "grad_norm": 1.0415473729382556, "learning_rate": 1.837037037037037e-05, "loss": 0.4979349374771118, "step": 373 }, { "epoch": 0.0924141339263652, "grad_norm": 1.3594647465572518, "learning_rate": 1.8419753086419754e-05, "loss": 0.5344895124435425, "step": 374 }, { "epoch": 0.09266123054114159, "grad_norm": 1.1697287806886059, "learning_rate": 1.846913580246914e-05, "loss": 0.5099678039550781, "step": 375 }, { "epoch": 0.09290832715591796, "grad_norm": 1.0330958861040818, "learning_rate": 1.851851851851852e-05, "loss": 0.5077311992645264, "step": 376 }, { "epoch": 0.09315542377069434, "grad_norm": 1.1527652965482256, "learning_rate": 1.85679012345679e-05, "loss": 0.5584789514541626, "step": 377 }, { "epoch": 0.09340252038547071, "grad_norm": 1.0705301870286492, "learning_rate": 1.8617283950617286e-05, "loss": 0.5117822885513306, "step": 378 }, { "epoch": 0.0936496170002471, "grad_norm": 0.9227571899672546, "learning_rate": 1.866666666666667e-05, "loss": 0.44964107871055603, "step": 379 }, { "epoch": 0.09389671361502347, "grad_norm": 1.0713296176316975, "learning_rate": 1.871604938271605e-05, "loss": 0.49468857049942017, "step": 380 }, { "epoch": 0.09414381022979985, "grad_norm": 1.0065275465630912, "learning_rate": 1.8765432098765433e-05, "loss": 0.500532329082489, "step": 381 }, { "epoch": 0.09439090684457623, "grad_norm": 1.0600742465499062, "learning_rate": 1.8814814814814816e-05, "loss": 0.47912701964378357, "step": 382 }, { "epoch": 0.09463800345935261, "grad_norm": 1.0468958607452166, "learning_rate": 1.8864197530864198e-05, "loss": 0.4717734158039093, "step": 383 }, { "epoch": 0.09488510007412898, "grad_norm": 1.1154465589493041, "learning_rate": 1.891358024691358e-05, "loss": 0.4566038250923157, "step": 384 }, { "epoch": 0.09513219668890537, "grad_norm": 1.097299106870104, "learning_rate": 1.8962962962962966e-05, "loss": 0.5006905198097229, "step": 385 }, { "epoch": 0.09537929330368174, "grad_norm": 1.0400913258882958, "learning_rate": 1.901234567901235e-05, "loss": 0.5213012099266052, "step": 386 }, { "epoch": 0.09562638991845812, "grad_norm": 1.054246598781912, "learning_rate": 1.906172839506173e-05, "loss": 0.4608762264251709, "step": 387 }, { "epoch": 0.09587348653323449, "grad_norm": 1.2367558885418999, "learning_rate": 1.9111111111111113e-05, "loss": 0.5379590392112732, "step": 388 }, { "epoch": 0.09612058314801088, "grad_norm": 1.000661109125773, "learning_rate": 1.9160493827160495e-05, "loss": 0.4928660988807678, "step": 389 }, { "epoch": 0.09636767976278725, "grad_norm": 1.016039222197859, "learning_rate": 1.9209876543209878e-05, "loss": 0.5103530287742615, "step": 390 }, { "epoch": 0.09661477637756363, "grad_norm": 1.107266535409496, "learning_rate": 1.925925925925926e-05, "loss": 0.5545858144760132, "step": 391 }, { "epoch": 0.09686187299234, "grad_norm": 1.0196671641715527, "learning_rate": 1.9308641975308646e-05, "loss": 0.44689422845840454, "step": 392 }, { "epoch": 0.09710896960711639, "grad_norm": 1.049840562333487, "learning_rate": 1.9358024691358025e-05, "loss": 0.5178558230400085, "step": 393 }, { "epoch": 0.09735606622189276, "grad_norm": 1.0697334375054999, "learning_rate": 1.9407407407407407e-05, "loss": 0.5121500492095947, "step": 394 }, { "epoch": 0.09760316283666914, "grad_norm": 0.9748719021238385, "learning_rate": 1.9456790123456793e-05, "loss": 0.469825804233551, "step": 395 }, { "epoch": 0.09785025945144551, "grad_norm": 1.0186797876193012, "learning_rate": 1.9506172839506175e-05, "loss": 0.531976580619812, "step": 396 }, { "epoch": 0.0980973560662219, "grad_norm": 1.089604129407877, "learning_rate": 1.9555555555555557e-05, "loss": 0.484782338142395, "step": 397 }, { "epoch": 0.09834445268099827, "grad_norm": 1.0831682975470796, "learning_rate": 1.960493827160494e-05, "loss": 0.4766656160354614, "step": 398 }, { "epoch": 0.09859154929577464, "grad_norm": 0.9938007419381598, "learning_rate": 1.9654320987654322e-05, "loss": 0.45987552404403687, "step": 399 }, { "epoch": 0.09883864591055103, "grad_norm": 1.1251999182434502, "learning_rate": 1.9703703703703704e-05, "loss": 0.5646716952323914, "step": 400 }, { "epoch": 0.0990857425253274, "grad_norm": 1.1442197258685192, "learning_rate": 1.9753086419753087e-05, "loss": 0.4849340617656708, "step": 401 }, { "epoch": 0.09933283914010378, "grad_norm": 1.0679650129624267, "learning_rate": 1.9802469135802472e-05, "loss": 0.5126779079437256, "step": 402 }, { "epoch": 0.09957993575488015, "grad_norm": 1.0290619435518447, "learning_rate": 1.9851851851851855e-05, "loss": 0.4912632703781128, "step": 403 }, { "epoch": 0.09982703236965654, "grad_norm": 0.9762354930853929, "learning_rate": 1.9901234567901237e-05, "loss": 0.49874866008758545, "step": 404 }, { "epoch": 0.10007412898443291, "grad_norm": 1.229409625785988, "learning_rate": 1.995061728395062e-05, "loss": 0.5540339350700378, "step": 405 }, { "epoch": 0.1003212255992093, "grad_norm": 1.141349986595276, "learning_rate": 2e-05, "loss": 0.5228179693222046, "step": 406 }, { "epoch": 0.10056832221398566, "grad_norm": 1.0552313409180318, "learning_rate": 1.9999999165300755e-05, "loss": 0.4817062020301819, "step": 407 }, { "epoch": 0.10081541882876205, "grad_norm": 1.1385479350851413, "learning_rate": 1.9999996661203152e-05, "loss": 0.5632885098457336, "step": 408 }, { "epoch": 0.10106251544353842, "grad_norm": 1.041551190174628, "learning_rate": 1.9999992487707615e-05, "loss": 0.468686044216156, "step": 409 }, { "epoch": 0.1013096120583148, "grad_norm": 1.12127310183764, "learning_rate": 1.999998664481483e-05, "loss": 0.5300260782241821, "step": 410 }, { "epoch": 0.10155670867309118, "grad_norm": 1.0902306839145983, "learning_rate": 1.9999979132525786e-05, "loss": 0.4767228662967682, "step": 411 }, { "epoch": 0.10180380528786756, "grad_norm": 1.0926801734033333, "learning_rate": 1.9999969950841728e-05, "loss": 0.5804744958877563, "step": 412 }, { "epoch": 0.10205090190264393, "grad_norm": 1.133630338890004, "learning_rate": 1.9999959099764194e-05, "loss": 0.545983612537384, "step": 413 }, { "epoch": 0.10229799851742032, "grad_norm": 0.9646702379108719, "learning_rate": 1.9999946579294995e-05, "loss": 0.4503708481788635, "step": 414 }, { "epoch": 0.10254509513219669, "grad_norm": 1.0230123271856908, "learning_rate": 1.9999932389436217e-05, "loss": 0.49539583921432495, "step": 415 }, { "epoch": 0.10279219174697307, "grad_norm": 0.9818417471953134, "learning_rate": 1.999991653019023e-05, "loss": 0.4858836531639099, "step": 416 }, { "epoch": 0.10303928836174944, "grad_norm": 1.0707112034429487, "learning_rate": 1.9999899001559684e-05, "loss": 0.5566180944442749, "step": 417 }, { "epoch": 0.10328638497652583, "grad_norm": 1.074712175543496, "learning_rate": 1.9999879803547503e-05, "loss": 0.4605311453342438, "step": 418 }, { "epoch": 0.1035334815913022, "grad_norm": 1.0753974698876894, "learning_rate": 1.9999858936156897e-05, "loss": 0.5417741537094116, "step": 419 }, { "epoch": 0.10378057820607858, "grad_norm": 1.1776607393488026, "learning_rate": 1.9999836399391342e-05, "loss": 0.5219302177429199, "step": 420 }, { "epoch": 0.10402767482085495, "grad_norm": 0.9894492339380352, "learning_rate": 1.9999812193254603e-05, "loss": 0.47810202836990356, "step": 421 }, { "epoch": 0.10427477143563134, "grad_norm": 1.1169424310945892, "learning_rate": 1.9999786317750725e-05, "loss": 0.46822577714920044, "step": 422 }, { "epoch": 0.10452186805040771, "grad_norm": 0.9684679161954045, "learning_rate": 1.999975877288402e-05, "loss": 0.49114882946014404, "step": 423 }, { "epoch": 0.10476896466518408, "grad_norm": 1.1668748667057134, "learning_rate": 1.9999729558659097e-05, "loss": 0.5159168243408203, "step": 424 }, { "epoch": 0.10501606127996047, "grad_norm": 0.9836052912752233, "learning_rate": 1.9999698675080823e-05, "loss": 0.4388241767883301, "step": 425 }, { "epoch": 0.10526315789473684, "grad_norm": 1.052373365861282, "learning_rate": 1.9999666122154356e-05, "loss": 0.4836425483226776, "step": 426 }, { "epoch": 0.10551025450951322, "grad_norm": 1.2031474221630778, "learning_rate": 1.9999631899885136e-05, "loss": 0.5598310232162476, "step": 427 }, { "epoch": 0.10575735112428959, "grad_norm": 1.0555884120907617, "learning_rate": 1.999959600827887e-05, "loss": 0.47416234016418457, "step": 428 }, { "epoch": 0.10600444773906598, "grad_norm": 0.9770836248336973, "learning_rate": 1.999955844734155e-05, "loss": 0.4066373109817505, "step": 429 }, { "epoch": 0.10625154435384235, "grad_norm": 1.0514309276430163, "learning_rate": 1.999951921707945e-05, "loss": 0.4814978837966919, "step": 430 }, { "epoch": 0.10649864096861873, "grad_norm": 1.0249508897158077, "learning_rate": 1.999947831749912e-05, "loss": 0.48588675260543823, "step": 431 }, { "epoch": 0.1067457375833951, "grad_norm": 1.1462260087077107, "learning_rate": 1.999943574860738e-05, "loss": 0.5432606935501099, "step": 432 }, { "epoch": 0.10699283419817149, "grad_norm": 1.041761611495914, "learning_rate": 1.9999391510411343e-05, "loss": 0.48392224311828613, "step": 433 }, { "epoch": 0.10723993081294786, "grad_norm": 1.0035075345607165, "learning_rate": 1.9999345602918394e-05, "loss": 0.442148894071579, "step": 434 }, { "epoch": 0.10748702742772424, "grad_norm": 1.1231944198960093, "learning_rate": 1.9999298026136192e-05, "loss": 0.5204436182975769, "step": 435 }, { "epoch": 0.10773412404250061, "grad_norm": 1.0367896272432786, "learning_rate": 1.9999248780072687e-05, "loss": 0.486219584941864, "step": 436 }, { "epoch": 0.107981220657277, "grad_norm": 1.008351806326509, "learning_rate": 1.9999197864736093e-05, "loss": 0.5150943994522095, "step": 437 }, { "epoch": 0.10822831727205337, "grad_norm": 0.9700163394460888, "learning_rate": 1.9999145280134913e-05, "loss": 0.42161089181900024, "step": 438 }, { "epoch": 0.10847541388682976, "grad_norm": 1.0584729938044735, "learning_rate": 1.999909102627793e-05, "loss": 0.5130641460418701, "step": 439 }, { "epoch": 0.10872251050160613, "grad_norm": 0.8783097595433069, "learning_rate": 1.999903510317419e-05, "loss": 0.44228750467300415, "step": 440 }, { "epoch": 0.10896960711638251, "grad_norm": 0.9831273944335245, "learning_rate": 1.9998977510833034e-05, "loss": 0.48472708463668823, "step": 441 }, { "epoch": 0.10921670373115888, "grad_norm": 1.0196723464048418, "learning_rate": 1.999891824926408e-05, "loss": 0.4891436994075775, "step": 442 }, { "epoch": 0.10946380034593527, "grad_norm": 1.1116654264904047, "learning_rate": 1.9998857318477224e-05, "loss": 0.50822913646698, "step": 443 }, { "epoch": 0.10971089696071164, "grad_norm": 1.1556754773015627, "learning_rate": 1.999879471848263e-05, "loss": 0.5111104249954224, "step": 444 }, { "epoch": 0.10995799357548802, "grad_norm": 1.090328870755421, "learning_rate": 1.9998730449290745e-05, "loss": 0.45199960470199585, "step": 445 }, { "epoch": 0.1102050901902644, "grad_norm": 0.9782241633776745, "learning_rate": 1.999866451091231e-05, "loss": 0.503210186958313, "step": 446 }, { "epoch": 0.11045218680504076, "grad_norm": 1.0434058674336173, "learning_rate": 1.9998596903358322e-05, "loss": 0.47840777039527893, "step": 447 }, { "epoch": 0.11069928341981715, "grad_norm": 1.0524535608803076, "learning_rate": 1.9998527626640076e-05, "loss": 0.4915250837802887, "step": 448 }, { "epoch": 0.11094638003459352, "grad_norm": 1.1264440399310043, "learning_rate": 1.9998456680769133e-05, "loss": 0.5042505264282227, "step": 449 }, { "epoch": 0.1111934766493699, "grad_norm": 1.1950537365488816, "learning_rate": 1.9998384065757334e-05, "loss": 0.47561168670654297, "step": 450 }, { "epoch": 0.11144057326414628, "grad_norm": 1.1887934460586855, "learning_rate": 1.9998309781616805e-05, "loss": 0.5295384526252747, "step": 451 }, { "epoch": 0.11168766987892266, "grad_norm": 1.1466071118710557, "learning_rate": 1.9998233828359948e-05, "loss": 0.5513627529144287, "step": 452 }, { "epoch": 0.11193476649369903, "grad_norm": 1.0617659998942, "learning_rate": 1.999815620599944e-05, "loss": 0.4189717471599579, "step": 453 }, { "epoch": 0.11218186310847542, "grad_norm": 1.1152917789670491, "learning_rate": 1.999807691454824e-05, "loss": 0.547242283821106, "step": 454 }, { "epoch": 0.11242895972325179, "grad_norm": 1.1597197708290392, "learning_rate": 1.9997995954019585e-05, "loss": 0.5338085889816284, "step": 455 }, { "epoch": 0.11267605633802817, "grad_norm": 1.0730142308146267, "learning_rate": 1.999791332442699e-05, "loss": 0.49785560369491577, "step": 456 }, { "epoch": 0.11292315295280454, "grad_norm": 0.954511577542885, "learning_rate": 1.999782902578425e-05, "loss": 0.41862568259239197, "step": 457 }, { "epoch": 0.11317024956758093, "grad_norm": 1.035741754284345, "learning_rate": 1.999774305810544e-05, "loss": 0.47863584756851196, "step": 458 }, { "epoch": 0.1134173461823573, "grad_norm": 1.0597191352743207, "learning_rate": 1.9997655421404905e-05, "loss": 0.5222382545471191, "step": 459 }, { "epoch": 0.11366444279713368, "grad_norm": 0.9714066544362729, "learning_rate": 1.999756611569728e-05, "loss": 0.46164125204086304, "step": 460 }, { "epoch": 0.11391153941191005, "grad_norm": 0.9330113172350255, "learning_rate": 1.9997475140997475e-05, "loss": 0.4786023497581482, "step": 461 }, { "epoch": 0.11415863602668644, "grad_norm": 0.9919630545146628, "learning_rate": 1.9997382497320674e-05, "loss": 0.4166843295097351, "step": 462 }, { "epoch": 0.11440573264146281, "grad_norm": 1.093942556447136, "learning_rate": 1.9997288184682344e-05, "loss": 0.5545295476913452, "step": 463 }, { "epoch": 0.1146528292562392, "grad_norm": 1.069479952711971, "learning_rate": 1.9997192203098227e-05, "loss": 0.47677403688430786, "step": 464 }, { "epoch": 0.11489992587101557, "grad_norm": 0.9535700700736695, "learning_rate": 1.9997094552584355e-05, "loss": 0.45836275815963745, "step": 465 }, { "epoch": 0.11514702248579195, "grad_norm": 1.1198834677100429, "learning_rate": 1.9996995233157015e-05, "loss": 0.550153374671936, "step": 466 }, { "epoch": 0.11539411910056832, "grad_norm": 1.0207542744651232, "learning_rate": 1.99968942448328e-05, "loss": 0.4831583499908447, "step": 467 }, { "epoch": 0.1156412157153447, "grad_norm": 1.2347639725967303, "learning_rate": 1.9996791587628562e-05, "loss": 0.5719254016876221, "step": 468 }, { "epoch": 0.11588831233012108, "grad_norm": 1.2111972009575018, "learning_rate": 1.9996687261561445e-05, "loss": 0.5355052947998047, "step": 469 }, { "epoch": 0.11613540894489746, "grad_norm": 1.1392201049346018, "learning_rate": 1.999658126664886e-05, "loss": 0.5095142722129822, "step": 470 }, { "epoch": 0.11638250555967383, "grad_norm": 1.166779381603166, "learning_rate": 1.99964736029085e-05, "loss": 0.4741051197052002, "step": 471 }, { "epoch": 0.1166296021744502, "grad_norm": 1.061287083699801, "learning_rate": 1.9996364270358346e-05, "loss": 0.5238326787948608, "step": 472 }, { "epoch": 0.11687669878922659, "grad_norm": 1.1262912322561949, "learning_rate": 1.9996253269016646e-05, "loss": 0.5077928900718689, "step": 473 }, { "epoch": 0.11712379540400296, "grad_norm": 1.0987358322769634, "learning_rate": 1.999614059890193e-05, "loss": 0.5772190093994141, "step": 474 }, { "epoch": 0.11737089201877934, "grad_norm": 1.1010345578884173, "learning_rate": 1.9996026260033003e-05, "loss": 0.4601318836212158, "step": 475 }, { "epoch": 0.11761798863355571, "grad_norm": 0.910123150867915, "learning_rate": 1.999591025242896e-05, "loss": 0.40992727875709534, "step": 476 }, { "epoch": 0.1178650852483321, "grad_norm": 1.0355406431091116, "learning_rate": 1.999579257610916e-05, "loss": 0.4758719205856323, "step": 477 }, { "epoch": 0.11811218186310847, "grad_norm": 0.9675052116488352, "learning_rate": 1.9995673231093256e-05, "loss": 0.492972195148468, "step": 478 }, { "epoch": 0.11835927847788486, "grad_norm": 1.1273904707669307, "learning_rate": 1.999555221740117e-05, "loss": 0.577843427658081, "step": 479 }, { "epoch": 0.11860637509266123, "grad_norm": 1.0715563653048785, "learning_rate": 1.99954295350531e-05, "loss": 0.5024944543838501, "step": 480 }, { "epoch": 0.11885347170743761, "grad_norm": 1.0851661246729962, "learning_rate": 1.9995305184069524e-05, "loss": 0.5504424571990967, "step": 481 }, { "epoch": 0.11910056832221398, "grad_norm": 1.1238478145696469, "learning_rate": 1.999517916447121e-05, "loss": 0.5179471969604492, "step": 482 }, { "epoch": 0.11934766493699037, "grad_norm": 1.055580225188001, "learning_rate": 1.999505147627919e-05, "loss": 0.4516528844833374, "step": 483 }, { "epoch": 0.11959476155176674, "grad_norm": 1.1356515476472988, "learning_rate": 1.9994922119514778e-05, "loss": 0.46201926469802856, "step": 484 }, { "epoch": 0.11984185816654312, "grad_norm": 1.0324215566956763, "learning_rate": 1.9994791094199573e-05, "loss": 0.43873298168182373, "step": 485 }, { "epoch": 0.12008895478131949, "grad_norm": 1.0281622101897212, "learning_rate": 1.999465840035545e-05, "loss": 0.4095543324947357, "step": 486 }, { "epoch": 0.12033605139609588, "grad_norm": 1.0204377350241398, "learning_rate": 1.9994524038004558e-05, "loss": 0.48423832654953003, "step": 487 }, { "epoch": 0.12058314801087225, "grad_norm": 1.001181018479807, "learning_rate": 1.9994388007169324e-05, "loss": 0.43119919300079346, "step": 488 }, { "epoch": 0.12083024462564863, "grad_norm": 1.127761384719825, "learning_rate": 1.9994250307872464e-05, "loss": 0.5356267690658569, "step": 489 }, { "epoch": 0.121077341240425, "grad_norm": 1.2110168156930345, "learning_rate": 1.9994110940136962e-05, "loss": 0.5319631099700928, "step": 490 }, { "epoch": 0.12132443785520139, "grad_norm": 1.333592908142206, "learning_rate": 1.999396990398608e-05, "loss": 0.5872631072998047, "step": 491 }, { "epoch": 0.12157153446997776, "grad_norm": 1.2749006354810406, "learning_rate": 1.9993827199443373e-05, "loss": 0.5761593580245972, "step": 492 }, { "epoch": 0.12181863108475414, "grad_norm": 1.1762305846955392, "learning_rate": 1.9993682826532655e-05, "loss": 0.5200316905975342, "step": 493 }, { "epoch": 0.12206572769953052, "grad_norm": 1.2225167430868127, "learning_rate": 1.999353678527803e-05, "loss": 0.5364953279495239, "step": 494 }, { "epoch": 0.1223128243143069, "grad_norm": 1.0847873099148033, "learning_rate": 1.999338907570388e-05, "loss": 0.48197197914123535, "step": 495 }, { "epoch": 0.12255992092908327, "grad_norm": 1.019725357441386, "learning_rate": 1.999323969783486e-05, "loss": 0.46169546246528625, "step": 496 }, { "epoch": 0.12280701754385964, "grad_norm": 1.0458126451998584, "learning_rate": 1.999308865169591e-05, "loss": 0.5414837598800659, "step": 497 }, { "epoch": 0.12305411415863603, "grad_norm": 1.1053052868810498, "learning_rate": 1.9992935937312245e-05, "loss": 0.5195422172546387, "step": 498 }, { "epoch": 0.1233012107734124, "grad_norm": 1.0597834647171185, "learning_rate": 1.999278155470936e-05, "loss": 0.44647932052612305, "step": 499 }, { "epoch": 0.12354830738818878, "grad_norm": 1.0527979627286612, "learning_rate": 1.9992625503913023e-05, "loss": 0.4696844518184662, "step": 500 }, { "epoch": 0.12379540400296515, "grad_norm": 1.0732078341648368, "learning_rate": 1.999246778494929e-05, "loss": 0.4754623770713806, "step": 501 }, { "epoch": 0.12404250061774154, "grad_norm": 1.037786256711579, "learning_rate": 1.999230839784449e-05, "loss": 0.49167436361312866, "step": 502 }, { "epoch": 0.12428959723251791, "grad_norm": 1.196854710974382, "learning_rate": 1.9992147342625234e-05, "loss": 0.5055179595947266, "step": 503 }, { "epoch": 0.1245366938472943, "grad_norm": 1.1122844149797975, "learning_rate": 1.99919846193184e-05, "loss": 0.4443962872028351, "step": 504 }, { "epoch": 0.12478379046207067, "grad_norm": 1.0788807915085714, "learning_rate": 1.999182022795116e-05, "loss": 0.4347198009490967, "step": 505 }, { "epoch": 0.12503088707684704, "grad_norm": 1.0259538486568998, "learning_rate": 1.9991654168550952e-05, "loss": 0.47081810235977173, "step": 506 }, { "epoch": 0.12527798369162343, "grad_norm": 1.1620524222669417, "learning_rate": 1.9991486441145504e-05, "loss": 0.5457199811935425, "step": 507 }, { "epoch": 0.1255250803063998, "grad_norm": 1.0266659603122599, "learning_rate": 1.999131704576281e-05, "loss": 0.470820814371109, "step": 508 }, { "epoch": 0.12577217692117618, "grad_norm": 1.0854010948407382, "learning_rate": 1.999114598243116e-05, "loss": 0.43739932775497437, "step": 509 }, { "epoch": 0.12601927353595255, "grad_norm": 1.07730211303325, "learning_rate": 1.99909732511791e-05, "loss": 0.4653388261795044, "step": 510 }, { "epoch": 0.12626637015072895, "grad_norm": 1.1530846039239377, "learning_rate": 1.9990798852035466e-05, "loss": 0.4778762459754944, "step": 511 }, { "epoch": 0.12651346676550532, "grad_norm": 1.12915028062565, "learning_rate": 1.999062278502938e-05, "loss": 0.407817006111145, "step": 512 }, { "epoch": 0.1267605633802817, "grad_norm": 1.1626993948889701, "learning_rate": 1.9990445050190226e-05, "loss": 0.5060060620307922, "step": 513 }, { "epoch": 0.12700765999505806, "grad_norm": 1.1011321104198744, "learning_rate": 1.9990265647547688e-05, "loss": 0.43834173679351807, "step": 514 }, { "epoch": 0.12725475660983446, "grad_norm": 1.1381282117161087, "learning_rate": 1.99900845771317e-05, "loss": 0.49693048000335693, "step": 515 }, { "epoch": 0.12750185322461083, "grad_norm": 1.0587588186623744, "learning_rate": 1.9989901838972496e-05, "loss": 0.4720430374145508, "step": 516 }, { "epoch": 0.1277489498393872, "grad_norm": 1.0880627802110252, "learning_rate": 1.9989717433100586e-05, "loss": 0.4663942754268646, "step": 517 }, { "epoch": 0.12799604645416357, "grad_norm": 1.0649305815106866, "learning_rate": 1.998953135954675e-05, "loss": 0.446818083524704, "step": 518 }, { "epoch": 0.12824314306893997, "grad_norm": 1.0530421378607289, "learning_rate": 1.998934361834205e-05, "loss": 0.45269161462783813, "step": 519 }, { "epoch": 0.12849023968371634, "grad_norm": 1.0648284099871075, "learning_rate": 1.9989154209517834e-05, "loss": 0.5068754553794861, "step": 520 }, { "epoch": 0.1287373362984927, "grad_norm": 1.2206077993048075, "learning_rate": 1.9988963133105715e-05, "loss": 0.5556261539459229, "step": 521 }, { "epoch": 0.12898443291326908, "grad_norm": 1.0974678241371487, "learning_rate": 1.9988770389137597e-05, "loss": 0.4654293656349182, "step": 522 }, { "epoch": 0.12923152952804545, "grad_norm": 1.11972446958289, "learning_rate": 1.9988575977645653e-05, "loss": 0.5125724077224731, "step": 523 }, { "epoch": 0.12947862614282185, "grad_norm": 1.1881445512642432, "learning_rate": 1.998837989866234e-05, "loss": 0.44100093841552734, "step": 524 }, { "epoch": 0.12972572275759822, "grad_norm": 1.2288563832181794, "learning_rate": 1.998818215222039e-05, "loss": 0.5220111012458801, "step": 525 }, { "epoch": 0.1299728193723746, "grad_norm": 1.080989548524319, "learning_rate": 1.9987982738352813e-05, "loss": 0.46042129397392273, "step": 526 }, { "epoch": 0.13021991598715096, "grad_norm": 1.1305148213479737, "learning_rate": 1.99877816570929e-05, "loss": 0.45012545585632324, "step": 527 }, { "epoch": 0.13046701260192736, "grad_norm": 1.1112950213125927, "learning_rate": 1.9987578908474225e-05, "loss": 0.49068599939346313, "step": 528 }, { "epoch": 0.13071410921670373, "grad_norm": 0.9659109349593109, "learning_rate": 1.9987374492530628e-05, "loss": 0.4363171458244324, "step": 529 }, { "epoch": 0.1309612058314801, "grad_norm": 1.0595968962230535, "learning_rate": 1.9987168409296237e-05, "loss": 0.4545901417732239, "step": 530 }, { "epoch": 0.13120830244625648, "grad_norm": 1.0881116061197642, "learning_rate": 1.998696065880546e-05, "loss": 0.5261813402175903, "step": 531 }, { "epoch": 0.13145539906103287, "grad_norm": 1.1031920687425, "learning_rate": 1.998675124109297e-05, "loss": 0.4957846999168396, "step": 532 }, { "epoch": 0.13170249567580924, "grad_norm": 1.036989245964102, "learning_rate": 1.9986540156193728e-05, "loss": 0.510922908782959, "step": 533 }, { "epoch": 0.13194959229058562, "grad_norm": 1.1011089096906366, "learning_rate": 1.998632740414298e-05, "loss": 0.5636661052703857, "step": 534 }, { "epoch": 0.132196688905362, "grad_norm": 1.1852940598136732, "learning_rate": 1.9986112984976235e-05, "loss": 0.554114580154419, "step": 535 }, { "epoch": 0.13244378552013839, "grad_norm": 1.0853892480099743, "learning_rate": 1.9985896898729296e-05, "loss": 0.46407046914100647, "step": 536 }, { "epoch": 0.13269088213491476, "grad_norm": 1.132235036229156, "learning_rate": 1.9985679145438227e-05, "loss": 0.5688060522079468, "step": 537 }, { "epoch": 0.13293797874969113, "grad_norm": 1.0852666060741216, "learning_rate": 1.998545972513939e-05, "loss": 0.4494878053665161, "step": 538 }, { "epoch": 0.1331850753644675, "grad_norm": 1.141049565982104, "learning_rate": 1.9985238637869406e-05, "loss": 0.5112329721450806, "step": 539 }, { "epoch": 0.1334321719792439, "grad_norm": 0.9407208953504241, "learning_rate": 1.9985015883665188e-05, "loss": 0.4410082697868347, "step": 540 }, { "epoch": 0.13367926859402027, "grad_norm": 1.0745267882097769, "learning_rate": 1.998479146256392e-05, "loss": 0.5041527152061462, "step": 541 }, { "epoch": 0.13392636520879664, "grad_norm": 1.1227454308850031, "learning_rate": 1.998456537460307e-05, "loss": 0.4779517352581024, "step": 542 }, { "epoch": 0.134173461823573, "grad_norm": 1.0629541189360112, "learning_rate": 1.998433761982038e-05, "loss": 0.46570539474487305, "step": 543 }, { "epoch": 0.1344205584383494, "grad_norm": 1.0244182068452359, "learning_rate": 1.998410819825387e-05, "loss": 0.4759644567966461, "step": 544 }, { "epoch": 0.13466765505312578, "grad_norm": 1.0393616180882026, "learning_rate": 1.998387710994184e-05, "loss": 0.46182847023010254, "step": 545 }, { "epoch": 0.13491475166790215, "grad_norm": 1.1055443137079604, "learning_rate": 1.9983644354922868e-05, "loss": 0.4841572046279907, "step": 546 }, { "epoch": 0.13516184828267852, "grad_norm": 0.9938698319971244, "learning_rate": 1.9983409933235813e-05, "loss": 0.47097617387771606, "step": 547 }, { "epoch": 0.1354089448974549, "grad_norm": 0.9922431134421534, "learning_rate": 1.99831738449198e-05, "loss": 0.44293659925460815, "step": 548 }, { "epoch": 0.1356560415122313, "grad_norm": 1.044777094502227, "learning_rate": 1.998293609001426e-05, "loss": 0.4548563063144684, "step": 549 }, { "epoch": 0.13590313812700766, "grad_norm": 1.0994222035346661, "learning_rate": 1.9982696668558863e-05, "loss": 0.45547324419021606, "step": 550 }, { "epoch": 0.13615023474178403, "grad_norm": 1.174825130447801, "learning_rate": 1.9982455580593588e-05, "loss": 0.4361599087715149, "step": 551 }, { "epoch": 0.1363973313565604, "grad_norm": 1.129938030337342, "learning_rate": 1.9982212826158684e-05, "loss": 0.44356220960617065, "step": 552 }, { "epoch": 0.1366444279713368, "grad_norm": 0.9778781424046799, "learning_rate": 1.998196840529467e-05, "loss": 0.4237848222255707, "step": 553 }, { "epoch": 0.13689152458611317, "grad_norm": 1.0504676498945869, "learning_rate": 1.9981722318042355e-05, "loss": 0.47808757424354553, "step": 554 }, { "epoch": 0.13713862120088954, "grad_norm": 1.1743121192138661, "learning_rate": 1.9981474564442814e-05, "loss": 0.5386635661125183, "step": 555 }, { "epoch": 0.13738571781566591, "grad_norm": 0.997793403491087, "learning_rate": 1.998122514453742e-05, "loss": 0.4651044011116028, "step": 556 }, { "epoch": 0.1376328144304423, "grad_norm": 1.1077823975977288, "learning_rate": 1.99809740583678e-05, "loss": 0.49219876527786255, "step": 557 }, { "epoch": 0.13787991104521868, "grad_norm": 1.1785326124737532, "learning_rate": 1.998072130597587e-05, "loss": 0.552130937576294, "step": 558 }, { "epoch": 0.13812700765999505, "grad_norm": 1.1320897585460605, "learning_rate": 1.998046688740383e-05, "loss": 0.47432219982147217, "step": 559 }, { "epoch": 0.13837410427477143, "grad_norm": 1.182533544784216, "learning_rate": 1.998021080269415e-05, "loss": 0.5184386968612671, "step": 560 }, { "epoch": 0.13862120088954782, "grad_norm": 1.13406505579654, "learning_rate": 1.9979953051889584e-05, "loss": 0.4579479694366455, "step": 561 }, { "epoch": 0.1388682975043242, "grad_norm": 1.108645941966503, "learning_rate": 1.9979693635033152e-05, "loss": 0.5106009244918823, "step": 562 }, { "epoch": 0.13911539411910057, "grad_norm": 1.0213584612600024, "learning_rate": 1.997943255216817e-05, "loss": 0.43657386302948, "step": 563 }, { "epoch": 0.13936249073387694, "grad_norm": 1.0230905839942557, "learning_rate": 1.997916980333822e-05, "loss": 0.4243133068084717, "step": 564 }, { "epoch": 0.13960958734865334, "grad_norm": 0.9788068778067263, "learning_rate": 1.9978905388587166e-05, "loss": 0.4371919631958008, "step": 565 }, { "epoch": 0.1398566839634297, "grad_norm": 0.9774932158076189, "learning_rate": 1.997863930795915e-05, "loss": 0.4957963228225708, "step": 566 }, { "epoch": 0.14010378057820608, "grad_norm": 1.0224142967410106, "learning_rate": 1.9978371561498587e-05, "loss": 0.45241352915763855, "step": 567 }, { "epoch": 0.14035087719298245, "grad_norm": 1.0449408498912758, "learning_rate": 1.9978102149250177e-05, "loss": 0.4324636459350586, "step": 568 }, { "epoch": 0.14059797380775885, "grad_norm": 1.143655411347235, "learning_rate": 1.9977831071258898e-05, "loss": 0.4601697325706482, "step": 569 }, { "epoch": 0.14084507042253522, "grad_norm": 1.0689165668085352, "learning_rate": 1.997755832757e-05, "loss": 0.4533436894416809, "step": 570 }, { "epoch": 0.1410921670373116, "grad_norm": 1.0015713867991587, "learning_rate": 1.9977283918229024e-05, "loss": 0.49387651681900024, "step": 571 }, { "epoch": 0.14133926365208796, "grad_norm": 1.0874944823326895, "learning_rate": 1.9977007843281765e-05, "loss": 0.45525437593460083, "step": 572 }, { "epoch": 0.14158636026686433, "grad_norm": 1.2328676056841783, "learning_rate": 1.9976730102774323e-05, "loss": 0.5048923492431641, "step": 573 }, { "epoch": 0.14183345688164073, "grad_norm": 1.0551254399686063, "learning_rate": 1.9976450696753057e-05, "loss": 0.45439931750297546, "step": 574 }, { "epoch": 0.1420805534964171, "grad_norm": 1.0809930290087726, "learning_rate": 1.9976169625264613e-05, "loss": 0.48859214782714844, "step": 575 }, { "epoch": 0.14232765011119347, "grad_norm": 1.2215319966579625, "learning_rate": 1.9975886888355912e-05, "loss": 0.49050503969192505, "step": 576 }, { "epoch": 0.14257474672596984, "grad_norm": 1.0427947337903924, "learning_rate": 1.997560248607416e-05, "loss": 0.5035057067871094, "step": 577 }, { "epoch": 0.14282184334074624, "grad_norm": 1.2906489115604287, "learning_rate": 1.997531641846683e-05, "loss": 0.5385046005249023, "step": 578 }, { "epoch": 0.1430689399555226, "grad_norm": 1.0613713046274518, "learning_rate": 1.9975028685581675e-05, "loss": 0.46604543924331665, "step": 579 }, { "epoch": 0.14331603657029898, "grad_norm": 1.0938700754802078, "learning_rate": 1.9974739287466737e-05, "loss": 0.4897286593914032, "step": 580 }, { "epoch": 0.14356313318507535, "grad_norm": 1.0147564275465009, "learning_rate": 1.9974448224170323e-05, "loss": 0.40777525305747986, "step": 581 }, { "epoch": 0.14381022979985175, "grad_norm": 1.1292479210761226, "learning_rate": 1.9974155495741024e-05, "loss": 0.45265626907348633, "step": 582 }, { "epoch": 0.14405732641462812, "grad_norm": 1.0080603477458618, "learning_rate": 1.9973861102227707e-05, "loss": 0.3913687765598297, "step": 583 }, { "epoch": 0.1443044230294045, "grad_norm": 1.0511420004518688, "learning_rate": 1.997356504367952e-05, "loss": 0.4702889025211334, "step": 584 }, { "epoch": 0.14455151964418086, "grad_norm": 1.0372825514523172, "learning_rate": 1.9973267320145884e-05, "loss": 0.4232579469680786, "step": 585 }, { "epoch": 0.14479861625895726, "grad_norm": 1.0924456962037192, "learning_rate": 1.9972967931676506e-05, "loss": 0.43464887142181396, "step": 586 }, { "epoch": 0.14504571287373363, "grad_norm": 1.051366879090323, "learning_rate": 1.997266687832136e-05, "loss": 0.5302625298500061, "step": 587 }, { "epoch": 0.14529280948851, "grad_norm": 3.0578117087084933, "learning_rate": 1.9972364160130708e-05, "loss": 0.47669896483421326, "step": 588 }, { "epoch": 0.14553990610328638, "grad_norm": 1.0500069434018504, "learning_rate": 1.9972059777155083e-05, "loss": 0.4627160429954529, "step": 589 }, { "epoch": 0.14578700271806277, "grad_norm": 1.081761976695199, "learning_rate": 1.9971753729445305e-05, "loss": 0.4751037359237671, "step": 590 }, { "epoch": 0.14603409933283915, "grad_norm": 1.2494436645262408, "learning_rate": 1.9971446017052453e-05, "loss": 0.4398380517959595, "step": 591 }, { "epoch": 0.14628119594761552, "grad_norm": 1.1207629565919428, "learning_rate": 1.9971136640027908e-05, "loss": 0.5191912651062012, "step": 592 }, { "epoch": 0.1465282925623919, "grad_norm": 1.0809845775007096, "learning_rate": 1.9970825598423316e-05, "loss": 0.5355327129364014, "step": 593 }, { "epoch": 0.1467753891771683, "grad_norm": 1.0759480144745288, "learning_rate": 1.9970512892290596e-05, "loss": 0.5114356875419617, "step": 594 }, { "epoch": 0.14702248579194466, "grad_norm": 1.0021204059493238, "learning_rate": 1.9970198521681956e-05, "loss": 0.38980215787887573, "step": 595 }, { "epoch": 0.14726958240672103, "grad_norm": 1.0059303894623006, "learning_rate": 1.9969882486649875e-05, "loss": 0.435428261756897, "step": 596 }, { "epoch": 0.1475166790214974, "grad_norm": 1.038069544509696, "learning_rate": 1.996956478724711e-05, "loss": 0.42825981974601746, "step": 597 }, { "epoch": 0.14776377563627377, "grad_norm": 1.2223931293108523, "learning_rate": 1.9969245423526704e-05, "loss": 0.4987899661064148, "step": 598 }, { "epoch": 0.14801087225105017, "grad_norm": 1.1128434597029657, "learning_rate": 1.9968924395541967e-05, "loss": 0.46352750062942505, "step": 599 }, { "epoch": 0.14825796886582654, "grad_norm": 1.0958360723358873, "learning_rate": 1.996860170334649e-05, "loss": 0.4235289692878723, "step": 600 }, { "epoch": 0.1485050654806029, "grad_norm": 1.1055348854030527, "learning_rate": 1.996827734699415e-05, "loss": 0.43008846044540405, "step": 601 }, { "epoch": 0.14875216209537928, "grad_norm": 1.210106481768799, "learning_rate": 1.9967951326539087e-05, "loss": 0.5112056732177734, "step": 602 }, { "epoch": 0.14899925871015568, "grad_norm": 1.1305029628171228, "learning_rate": 1.996762364203573e-05, "loss": 0.4774465262889862, "step": 603 }, { "epoch": 0.14924635532493205, "grad_norm": 1.1442733944980532, "learning_rate": 1.996729429353878e-05, "loss": 0.4793345332145691, "step": 604 }, { "epoch": 0.14949345193970842, "grad_norm": 1.0060151176687855, "learning_rate": 1.9966963281103228e-05, "loss": 0.4427064061164856, "step": 605 }, { "epoch": 0.1497405485544848, "grad_norm": 0.958758939684755, "learning_rate": 1.996663060478432e-05, "loss": 0.45375797152519226, "step": 606 }, { "epoch": 0.1499876451692612, "grad_norm": 1.0651562867128952, "learning_rate": 1.9966296264637604e-05, "loss": 0.46983861923217773, "step": 607 }, { "epoch": 0.15023474178403756, "grad_norm": 1.1673338153802375, "learning_rate": 1.9965960260718885e-05, "loss": 0.5680628418922424, "step": 608 }, { "epoch": 0.15048183839881393, "grad_norm": 0.9832074041484219, "learning_rate": 1.9965622593084265e-05, "loss": 0.4407661557197571, "step": 609 }, { "epoch": 0.1507289350135903, "grad_norm": 1.1167241084754382, "learning_rate": 1.996528326179011e-05, "loss": 0.46595025062561035, "step": 610 }, { "epoch": 0.1509760316283667, "grad_norm": 1.172838682745028, "learning_rate": 1.9964942266893062e-05, "loss": 0.4443111717700958, "step": 611 }, { "epoch": 0.15122312824314307, "grad_norm": 1.1112506312955268, "learning_rate": 1.9964599608450057e-05, "loss": 0.4355314075946808, "step": 612 }, { "epoch": 0.15147022485791944, "grad_norm": 1.1391759253138674, "learning_rate": 1.996425528651829e-05, "loss": 0.47237423062324524, "step": 613 }, { "epoch": 0.15171732147269582, "grad_norm": 1.0270268566762237, "learning_rate": 1.9963909301155248e-05, "loss": 0.4673294425010681, "step": 614 }, { "epoch": 0.15196441808747221, "grad_norm": 1.1343689412196374, "learning_rate": 1.9963561652418684e-05, "loss": 0.4563443958759308, "step": 615 }, { "epoch": 0.15221151470224858, "grad_norm": 0.9682557936962443, "learning_rate": 1.996321234036664e-05, "loss": 0.3841860294342041, "step": 616 }, { "epoch": 0.15245861131702496, "grad_norm": 1.2695351484046065, "learning_rate": 1.996286136505743e-05, "loss": 0.542529821395874, "step": 617 }, { "epoch": 0.15270570793180133, "grad_norm": 1.0247841104781439, "learning_rate": 1.996250872654964e-05, "loss": 0.40408849716186523, "step": 618 }, { "epoch": 0.15295280454657773, "grad_norm": 1.055930995706144, "learning_rate": 1.9962154424902143e-05, "loss": 0.3936700224876404, "step": 619 }, { "epoch": 0.1531999011613541, "grad_norm": 1.1340285079895505, "learning_rate": 1.9961798460174088e-05, "loss": 0.493546724319458, "step": 620 }, { "epoch": 0.15344699777613047, "grad_norm": 1.252059172750092, "learning_rate": 1.9961440832424896e-05, "loss": 0.5207912921905518, "step": 621 }, { "epoch": 0.15369409439090684, "grad_norm": 1.0974460733001552, "learning_rate": 1.9961081541714272e-05, "loss": 0.4634722173213959, "step": 622 }, { "epoch": 0.1539411910056832, "grad_norm": 1.08179401110742, "learning_rate": 1.9960720588102194e-05, "loss": 0.39883142709732056, "step": 623 }, { "epoch": 0.1541882876204596, "grad_norm": 1.2520113233974002, "learning_rate": 1.996035797164892e-05, "loss": 0.48931506276130676, "step": 624 }, { "epoch": 0.15443538423523598, "grad_norm": 1.0405017968877923, "learning_rate": 1.9959993692414987e-05, "loss": 0.49390241503715515, "step": 625 }, { "epoch": 0.15468248085001235, "grad_norm": 1.242073044508943, "learning_rate": 1.9959627750461208e-05, "loss": 0.4771464169025421, "step": 626 }, { "epoch": 0.15492957746478872, "grad_norm": 1.1951755895985665, "learning_rate": 1.995926014584867e-05, "loss": 0.46340513229370117, "step": 627 }, { "epoch": 0.15517667407956512, "grad_norm": 1.1458174241507557, "learning_rate": 1.995889087863874e-05, "loss": 0.47516775131225586, "step": 628 }, { "epoch": 0.1554237706943415, "grad_norm": 1.0785388456204472, "learning_rate": 1.9958519948893068e-05, "loss": 0.3684823215007782, "step": 629 }, { "epoch": 0.15567086730911786, "grad_norm": 1.1355792611501363, "learning_rate": 1.9958147356673576e-05, "loss": 0.4173637628555298, "step": 630 }, { "epoch": 0.15591796392389423, "grad_norm": 1.0716991956287327, "learning_rate": 1.9957773102042462e-05, "loss": 0.4043159484863281, "step": 631 }, { "epoch": 0.15616506053867063, "grad_norm": 1.053045637509359, "learning_rate": 1.995739718506221e-05, "loss": 0.48693156242370605, "step": 632 }, { "epoch": 0.156412157153447, "grad_norm": 1.1746064288118019, "learning_rate": 1.9957019605795566e-05, "loss": 0.4784833788871765, "step": 633 }, { "epoch": 0.15665925376822337, "grad_norm": 1.1408880167932025, "learning_rate": 1.9956640364305565e-05, "loss": 0.4900963008403778, "step": 634 }, { "epoch": 0.15690635038299974, "grad_norm": 1.1383412994271263, "learning_rate": 1.9956259460655526e-05, "loss": 0.45669662952423096, "step": 635 }, { "epoch": 0.15715344699777614, "grad_norm": 0.9908532878898776, "learning_rate": 1.995587689490903e-05, "loss": 0.41405218839645386, "step": 636 }, { "epoch": 0.1574005436125525, "grad_norm": 0.9915647955590853, "learning_rate": 1.995549266712994e-05, "loss": 0.46773868799209595, "step": 637 }, { "epoch": 0.15764764022732888, "grad_norm": 0.9880336554083371, "learning_rate": 1.995510677738241e-05, "loss": 0.40959930419921875, "step": 638 }, { "epoch": 0.15789473684210525, "grad_norm": 1.1146343832943324, "learning_rate": 1.9954719225730847e-05, "loss": 0.5236585140228271, "step": 639 }, { "epoch": 0.15814183345688165, "grad_norm": 1.2497871904542013, "learning_rate": 1.995433001223996e-05, "loss": 0.5434281826019287, "step": 640 }, { "epoch": 0.15838893007165802, "grad_norm": 0.9806788068524025, "learning_rate": 1.9953939136974717e-05, "loss": 0.41729819774627686, "step": 641 }, { "epoch": 0.1586360266864344, "grad_norm": 1.0231353997342423, "learning_rate": 1.995354660000037e-05, "loss": 0.4188227653503418, "step": 642 }, { "epoch": 0.15888312330121077, "grad_norm": 1.0847377355274022, "learning_rate": 1.9953152401382455e-05, "loss": 0.5073963403701782, "step": 643 }, { "epoch": 0.15913021991598716, "grad_norm": 1.0006837586857493, "learning_rate": 1.9952756541186775e-05, "loss": 0.43540483713150024, "step": 644 }, { "epoch": 0.15937731653076354, "grad_norm": 1.1965742188634878, "learning_rate": 1.995235901947942e-05, "loss": 0.5219881534576416, "step": 645 }, { "epoch": 0.1596244131455399, "grad_norm": 1.01276714182936, "learning_rate": 1.9951959836326745e-05, "loss": 0.41628819704055786, "step": 646 }, { "epoch": 0.15987150976031628, "grad_norm": 2.1454105187732075, "learning_rate": 1.9951558991795393e-05, "loss": 0.5214790105819702, "step": 647 }, { "epoch": 0.16011860637509265, "grad_norm": 1.25677838015058, "learning_rate": 1.995115648595228e-05, "loss": 0.44513189792633057, "step": 648 }, { "epoch": 0.16036570298986905, "grad_norm": 1.2379959879096587, "learning_rate": 1.9950752318864605e-05, "loss": 0.48923802375793457, "step": 649 }, { "epoch": 0.16061279960464542, "grad_norm": 1.1079473238537183, "learning_rate": 1.9950346490599833e-05, "loss": 0.46642380952835083, "step": 650 }, { "epoch": 0.1608598962194218, "grad_norm": 1.2651803550289393, "learning_rate": 1.9949939001225718e-05, "loss": 0.5254008173942566, "step": 651 }, { "epoch": 0.16110699283419816, "grad_norm": 1.1295061686843344, "learning_rate": 1.9949529850810286e-05, "loss": 0.406915545463562, "step": 652 }, { "epoch": 0.16135408944897456, "grad_norm": 1.0548874821406473, "learning_rate": 1.9949119039421832e-05, "loss": 0.43084076046943665, "step": 653 }, { "epoch": 0.16160118606375093, "grad_norm": 1.0721807743398373, "learning_rate": 1.994870656712895e-05, "loss": 0.45882344245910645, "step": 654 }, { "epoch": 0.1618482826785273, "grad_norm": 1.1744791022493068, "learning_rate": 1.9948292434000483e-05, "loss": 0.41792190074920654, "step": 655 }, { "epoch": 0.16209537929330367, "grad_norm": 1.098962667138706, "learning_rate": 1.9947876640105584e-05, "loss": 0.41426384449005127, "step": 656 }, { "epoch": 0.16234247590808007, "grad_norm": 1.2353264419059518, "learning_rate": 1.994745918551365e-05, "loss": 0.4496003985404968, "step": 657 }, { "epoch": 0.16258957252285644, "grad_norm": 1.0092831067486656, "learning_rate": 1.994704007029438e-05, "loss": 0.422840416431427, "step": 658 }, { "epoch": 0.1628366691376328, "grad_norm": 1.016102945423512, "learning_rate": 1.994661929451774e-05, "loss": 0.4078865349292755, "step": 659 }, { "epoch": 0.16308376575240918, "grad_norm": 1.0867452789501082, "learning_rate": 1.9946196858253967e-05, "loss": 0.47404003143310547, "step": 660 }, { "epoch": 0.16333086236718558, "grad_norm": 1.1176848765617138, "learning_rate": 1.994577276157359e-05, "loss": 0.4478823244571686, "step": 661 }, { "epoch": 0.16357795898196195, "grad_norm": 0.9452043264736303, "learning_rate": 1.9945347004547404e-05, "loss": 0.4613984227180481, "step": 662 }, { "epoch": 0.16382505559673832, "grad_norm": 1.2780661845601042, "learning_rate": 1.9944919587246487e-05, "loss": 0.4416124224662781, "step": 663 }, { "epoch": 0.1640721522115147, "grad_norm": 1.0137346192335583, "learning_rate": 1.9944490509742193e-05, "loss": 0.4084761142730713, "step": 664 }, { "epoch": 0.1643192488262911, "grad_norm": 1.0878601901294151, "learning_rate": 1.9944059772106146e-05, "loss": 0.39347660541534424, "step": 665 }, { "epoch": 0.16456634544106746, "grad_norm": 1.1759339319658757, "learning_rate": 1.9943627374410264e-05, "loss": 0.49036574363708496, "step": 666 }, { "epoch": 0.16481344205584383, "grad_norm": 1.1650294347469978, "learning_rate": 1.994319331672672e-05, "loss": 0.49207693338394165, "step": 667 }, { "epoch": 0.1650605386706202, "grad_norm": 1.034353937443376, "learning_rate": 1.9942757599127983e-05, "loss": 0.46865779161453247, "step": 668 }, { "epoch": 0.1653076352853966, "grad_norm": 1.1242023252327997, "learning_rate": 1.994232022168679e-05, "loss": 0.50669926404953, "step": 669 }, { "epoch": 0.16555473190017297, "grad_norm": 1.009559047971051, "learning_rate": 1.9941881184476154e-05, "loss": 0.4654829502105713, "step": 670 }, { "epoch": 0.16580182851494935, "grad_norm": 1.1170120988242263, "learning_rate": 1.994144048756937e-05, "loss": 0.4863721430301666, "step": 671 }, { "epoch": 0.16604892512972572, "grad_norm": 1.038450154218255, "learning_rate": 1.994099813104001e-05, "loss": 0.48259639739990234, "step": 672 }, { "epoch": 0.1662960217445021, "grad_norm": 1.066651947539703, "learning_rate": 1.9940554114961915e-05, "loss": 0.4430471658706665, "step": 673 }, { "epoch": 0.16654311835927849, "grad_norm": 1.0310266402355595, "learning_rate": 1.9940108439409215e-05, "loss": 0.45683035254478455, "step": 674 }, { "epoch": 0.16679021497405486, "grad_norm": 1.1718895595588343, "learning_rate": 1.9939661104456304e-05, "loss": 0.45702648162841797, "step": 675 }, { "epoch": 0.16703731158883123, "grad_norm": 1.1874955872948982, "learning_rate": 1.993921211017787e-05, "loss": 0.4904472231864929, "step": 676 }, { "epoch": 0.1672844082036076, "grad_norm": 1.0853800710525774, "learning_rate": 1.9938761456648858e-05, "loss": 0.45684680342674255, "step": 677 }, { "epoch": 0.167531504818384, "grad_norm": 1.1022479117847055, "learning_rate": 1.993830914394451e-05, "loss": 0.5073447227478027, "step": 678 }, { "epoch": 0.16777860143316037, "grad_norm": 1.1757069765891848, "learning_rate": 1.9937855172140325e-05, "loss": 0.43263787031173706, "step": 679 }, { "epoch": 0.16802569804793674, "grad_norm": 1.0228872869946721, "learning_rate": 1.9937399541312096e-05, "loss": 0.43177151679992676, "step": 680 }, { "epoch": 0.1682727946627131, "grad_norm": 1.2456370867210604, "learning_rate": 1.9936942251535884e-05, "loss": 0.4962512254714966, "step": 681 }, { "epoch": 0.1685198912774895, "grad_norm": 1.0329348737429556, "learning_rate": 1.9936483302888026e-05, "loss": 0.4003651738166809, "step": 682 }, { "epoch": 0.16876698789226588, "grad_norm": 1.2262661959046943, "learning_rate": 1.993602269544514e-05, "loss": 0.5384105443954468, "step": 683 }, { "epoch": 0.16901408450704225, "grad_norm": 1.0954645970756587, "learning_rate": 1.9935560429284124e-05, "loss": 0.4459867477416992, "step": 684 }, { "epoch": 0.16926118112181862, "grad_norm": 1.1262825227776758, "learning_rate": 1.9935096504482145e-05, "loss": 0.42043232917785645, "step": 685 }, { "epoch": 0.16950827773659502, "grad_norm": 1.1077527856929301, "learning_rate": 1.9934630921116654e-05, "loss": 0.5041652917861938, "step": 686 }, { "epoch": 0.1697553743513714, "grad_norm": 1.1029804543904165, "learning_rate": 1.993416367926537e-05, "loss": 0.49432212114334106, "step": 687 }, { "epoch": 0.17000247096614776, "grad_norm": 1.1093933955496695, "learning_rate": 1.9933694779006294e-05, "loss": 0.48123860359191895, "step": 688 }, { "epoch": 0.17024956758092413, "grad_norm": 1.1661907226717445, "learning_rate": 1.9933224220417712e-05, "loss": 0.4450361728668213, "step": 689 }, { "epoch": 0.17049666419570053, "grad_norm": 1.1647136480485198, "learning_rate": 1.9932752003578167e-05, "loss": 0.44985535740852356, "step": 690 }, { "epoch": 0.1707437608104769, "grad_norm": 1.2466198878414008, "learning_rate": 1.993227812856651e-05, "loss": 0.4793316125869751, "step": 691 }, { "epoch": 0.17099085742525327, "grad_norm": 1.0955542505103772, "learning_rate": 1.9931802595461827e-05, "loss": 0.46049070358276367, "step": 692 }, { "epoch": 0.17123795404002964, "grad_norm": 1.1490412423688283, "learning_rate": 1.9931325404343512e-05, "loss": 0.5009739995002747, "step": 693 }, { "epoch": 0.17148505065480604, "grad_norm": 0.9678568978534556, "learning_rate": 1.9930846555291237e-05, "loss": 0.40847426652908325, "step": 694 }, { "epoch": 0.1717321472695824, "grad_norm": 1.0636189703629917, "learning_rate": 1.993036604838493e-05, "loss": 0.39640307426452637, "step": 695 }, { "epoch": 0.17197924388435878, "grad_norm": 1.2284547548698026, "learning_rate": 1.992988388370481e-05, "loss": 0.5195721387863159, "step": 696 }, { "epoch": 0.17222634049913516, "grad_norm": 1.1154442326334342, "learning_rate": 1.9929400061331368e-05, "loss": 0.42455747723579407, "step": 697 }, { "epoch": 0.17247343711391153, "grad_norm": 1.228540123399055, "learning_rate": 1.9928914581345373e-05, "loss": 0.5310759544372559, "step": 698 }, { "epoch": 0.17272053372868792, "grad_norm": 1.3101233672577335, "learning_rate": 1.9928427443827874e-05, "loss": 0.5211117267608643, "step": 699 }, { "epoch": 0.1729676303434643, "grad_norm": 1.0496051082917535, "learning_rate": 1.9927938648860194e-05, "loss": 0.42815059423446655, "step": 700 }, { "epoch": 0.17321472695824067, "grad_norm": 1.139270977312398, "learning_rate": 1.992744819652393e-05, "loss": 0.44333451986312866, "step": 701 }, { "epoch": 0.17346182357301704, "grad_norm": 1.1677542553201028, "learning_rate": 1.9926956086900956e-05, "loss": 0.4527941346168518, "step": 702 }, { "epoch": 0.17370892018779344, "grad_norm": 1.1632109728993427, "learning_rate": 1.992646232007343e-05, "loss": 0.5283389687538147, "step": 703 }, { "epoch": 0.1739560168025698, "grad_norm": 1.1932478575285963, "learning_rate": 1.9925966896123777e-05, "loss": 0.4606601893901825, "step": 704 }, { "epoch": 0.17420311341734618, "grad_norm": 1.0284139818892921, "learning_rate": 1.9925469815134706e-05, "loss": 0.4274876117706299, "step": 705 }, { "epoch": 0.17445021003212255, "grad_norm": 1.0634019948290414, "learning_rate": 1.9924971077189198e-05, "loss": 0.40352851152420044, "step": 706 }, { "epoch": 0.17469730664689895, "grad_norm": 0.9991561915397781, "learning_rate": 1.992447068237051e-05, "loss": 0.4232434630393982, "step": 707 }, { "epoch": 0.17494440326167532, "grad_norm": 1.3440912330250778, "learning_rate": 1.9923968630762187e-05, "loss": 0.4985716938972473, "step": 708 }, { "epoch": 0.1751914998764517, "grad_norm": 1.0684551494380707, "learning_rate": 1.992346492244803e-05, "loss": 0.4157037138938904, "step": 709 }, { "epoch": 0.17543859649122806, "grad_norm": 1.0583709476159482, "learning_rate": 1.992295955751213e-05, "loss": 0.426014244556427, "step": 710 }, { "epoch": 0.17568569310600446, "grad_norm": 1.110919678718669, "learning_rate": 1.9922452536038858e-05, "loss": 0.4393619894981384, "step": 711 }, { "epoch": 0.17593278972078083, "grad_norm": 1.0202309975629549, "learning_rate": 1.9921943858112854e-05, "loss": 0.4325813353061676, "step": 712 }, { "epoch": 0.1761798863355572, "grad_norm": 1.0054320980828328, "learning_rate": 1.9921433523819034e-05, "loss": 0.4240115284919739, "step": 713 }, { "epoch": 0.17642698295033357, "grad_norm": 1.165510779833044, "learning_rate": 1.9920921533242597e-05, "loss": 0.47524338960647583, "step": 714 }, { "epoch": 0.17667407956510997, "grad_norm": 1.0610698035667216, "learning_rate": 1.9920407886469015e-05, "loss": 0.4419434368610382, "step": 715 }, { "epoch": 0.17692117617988634, "grad_norm": 1.0146948380034673, "learning_rate": 1.991989258358403e-05, "loss": 0.4686160385608673, "step": 716 }, { "epoch": 0.1771682727946627, "grad_norm": 1.153018987014404, "learning_rate": 1.9919375624673672e-05, "loss": 0.5155465602874756, "step": 717 }, { "epoch": 0.17741536940943908, "grad_norm": 1.1428711504533424, "learning_rate": 1.9918857009824236e-05, "loss": 0.4975202679634094, "step": 718 }, { "epoch": 0.17766246602421548, "grad_norm": 1.0497682157326875, "learning_rate": 1.991833673912231e-05, "loss": 0.4594228267669678, "step": 719 }, { "epoch": 0.17790956263899185, "grad_norm": 1.1302896630354895, "learning_rate": 1.991781481265474e-05, "loss": 0.41559308767318726, "step": 720 }, { "epoch": 0.17815665925376822, "grad_norm": 1.100158016780944, "learning_rate": 1.9917291230508658e-05, "loss": 0.40448373556137085, "step": 721 }, { "epoch": 0.1784037558685446, "grad_norm": 1.0896674456368898, "learning_rate": 1.991676599277147e-05, "loss": 0.4834950566291809, "step": 722 }, { "epoch": 0.17865085248332097, "grad_norm": 0.9923676860759691, "learning_rate": 1.991623909953086e-05, "loss": 0.4314972162246704, "step": 723 }, { "epoch": 0.17889794909809736, "grad_norm": 1.2708915867137343, "learning_rate": 1.991571055087479e-05, "loss": 0.5583851337432861, "step": 724 }, { "epoch": 0.17914504571287374, "grad_norm": 1.146241443319418, "learning_rate": 1.9915180346891492e-05, "loss": 0.41516679525375366, "step": 725 }, { "epoch": 0.1793921423276501, "grad_norm": 0.9881611578959247, "learning_rate": 1.991464848766948e-05, "loss": 0.43462270498275757, "step": 726 }, { "epoch": 0.17963923894242648, "grad_norm": 1.0839889390288064, "learning_rate": 1.991411497329754e-05, "loss": 0.45366328954696655, "step": 727 }, { "epoch": 0.17988633555720288, "grad_norm": 1.1940727527291082, "learning_rate": 1.991357980386474e-05, "loss": 0.4669267237186432, "step": 728 }, { "epoch": 0.18013343217197925, "grad_norm": 1.0652670068169656, "learning_rate": 1.9913042979460418e-05, "loss": 0.36056089401245117, "step": 729 }, { "epoch": 0.18038052878675562, "grad_norm": 1.1255088668175957, "learning_rate": 1.9912504500174196e-05, "loss": 0.485487163066864, "step": 730 }, { "epoch": 0.180627625401532, "grad_norm": 1.174286301463361, "learning_rate": 1.9911964366095964e-05, "loss": 0.4995037317276001, "step": 731 }, { "epoch": 0.1808747220163084, "grad_norm": 1.126424274998357, "learning_rate": 1.9911422577315896e-05, "loss": 0.45353496074676514, "step": 732 }, { "epoch": 0.18112181863108476, "grad_norm": 1.1985474381950272, "learning_rate": 1.991087913392443e-05, "loss": 0.5178018808364868, "step": 733 }, { "epoch": 0.18136891524586113, "grad_norm": 1.0313399988686807, "learning_rate": 1.9910334036012295e-05, "loss": 0.39615413546562195, "step": 734 }, { "epoch": 0.1816160118606375, "grad_norm": 1.3327730689183768, "learning_rate": 1.990978728367049e-05, "loss": 0.41027510166168213, "step": 735 }, { "epoch": 0.1818631084754139, "grad_norm": 1.2768422556313785, "learning_rate": 1.9909238876990283e-05, "loss": 0.47907936573028564, "step": 736 }, { "epoch": 0.18211020509019027, "grad_norm": 1.1709271255066402, "learning_rate": 1.9908688816063236e-05, "loss": 0.4795265793800354, "step": 737 }, { "epoch": 0.18235730170496664, "grad_norm": 1.0936154319472815, "learning_rate": 1.9908137100981165e-05, "loss": 0.4463469386100769, "step": 738 }, { "epoch": 0.182604398319743, "grad_norm": 1.1479612031997324, "learning_rate": 1.990758373183618e-05, "loss": 0.5228809714317322, "step": 739 }, { "epoch": 0.1828514949345194, "grad_norm": 1.1364928364393851, "learning_rate": 1.9907028708720654e-05, "loss": 0.49452242255210876, "step": 740 }, { "epoch": 0.18309859154929578, "grad_norm": 1.212564894374083, "learning_rate": 1.9906472031727252e-05, "loss": 0.35554537177085876, "step": 741 }, { "epoch": 0.18334568816407215, "grad_norm": 1.0360348619592512, "learning_rate": 1.99059137009489e-05, "loss": 0.4287320375442505, "step": 742 }, { "epoch": 0.18359278477884852, "grad_norm": 1.041373326267412, "learning_rate": 1.9905353716478803e-05, "loss": 0.45353853702545166, "step": 743 }, { "epoch": 0.18383988139362492, "grad_norm": 1.1331408626678203, "learning_rate": 1.990479207841045e-05, "loss": 0.46967875957489014, "step": 744 }, { "epoch": 0.1840869780084013, "grad_norm": 1.0923211803753994, "learning_rate": 1.9904228786837597e-05, "loss": 0.446040540933609, "step": 745 }, { "epoch": 0.18433407462317766, "grad_norm": 1.0714100816442467, "learning_rate": 1.9903663841854285e-05, "loss": 0.4944271147251129, "step": 746 }, { "epoch": 0.18458117123795403, "grad_norm": 1.1612374069166373, "learning_rate": 1.9903097243554818e-05, "loss": 0.4619523882865906, "step": 747 }, { "epoch": 0.1848282678527304, "grad_norm": 1.1795756842623382, "learning_rate": 1.9902528992033788e-05, "loss": 0.430846631526947, "step": 748 }, { "epoch": 0.1850753644675068, "grad_norm": 1.1186681614395233, "learning_rate": 1.990195908738606e-05, "loss": 0.4583691358566284, "step": 749 }, { "epoch": 0.18532246108228317, "grad_norm": 1.0868668857354093, "learning_rate": 1.990138752970677e-05, "loss": 0.4540676772594452, "step": 750 }, { "epoch": 0.18556955769705955, "grad_norm": 1.0631768332468228, "learning_rate": 1.990081431909134e-05, "loss": 0.4625493586063385, "step": 751 }, { "epoch": 0.18581665431183592, "grad_norm": 1.055671014092039, "learning_rate": 1.9900239455635458e-05, "loss": 0.42315685749053955, "step": 752 }, { "epoch": 0.18606375092661231, "grad_norm": 1.056032578387629, "learning_rate": 1.989966293943509e-05, "loss": 0.479211688041687, "step": 753 }, { "epoch": 0.18631084754138869, "grad_norm": 1.136706234752538, "learning_rate": 1.9899084770586482e-05, "loss": 0.4723115563392639, "step": 754 }, { "epoch": 0.18655794415616506, "grad_norm": 1.0832733720174479, "learning_rate": 1.989850494918615e-05, "loss": 0.4715535640716553, "step": 755 }, { "epoch": 0.18680504077094143, "grad_norm": 1.0599338888512577, "learning_rate": 1.9897923475330896e-05, "loss": 0.473336786031723, "step": 756 }, { "epoch": 0.18705213738571783, "grad_norm": 1.0234477326746745, "learning_rate": 1.9897340349117785e-05, "loss": 0.39215952157974243, "step": 757 }, { "epoch": 0.1872992340004942, "grad_norm": 1.0071725582389899, "learning_rate": 1.9896755570644165e-05, "loss": 0.4249839782714844, "step": 758 }, { "epoch": 0.18754633061527057, "grad_norm": 1.1417759077154357, "learning_rate": 1.989616914000766e-05, "loss": 0.44383561611175537, "step": 759 }, { "epoch": 0.18779342723004694, "grad_norm": 1.173792316971768, "learning_rate": 1.989558105730617e-05, "loss": 0.43983834981918335, "step": 760 }, { "epoch": 0.18804052384482334, "grad_norm": 1.1527130293100296, "learning_rate": 1.9894991322637872e-05, "loss": 0.5298507213592529, "step": 761 }, { "epoch": 0.1882876204595997, "grad_norm": 1.057217620055211, "learning_rate": 1.9894399936101204e-05, "loss": 0.40196529030799866, "step": 762 }, { "epoch": 0.18853471707437608, "grad_norm": 0.9630259286161239, "learning_rate": 1.9893806897794906e-05, "loss": 0.38431107997894287, "step": 763 }, { "epoch": 0.18878181368915245, "grad_norm": 1.1436451198335684, "learning_rate": 1.9893212207817972e-05, "loss": 0.4147067070007324, "step": 764 }, { "epoch": 0.18902891030392885, "grad_norm": 1.0982107141377175, "learning_rate": 1.989261586626968e-05, "loss": 0.403231143951416, "step": 765 }, { "epoch": 0.18927600691870522, "grad_norm": 1.10636151752875, "learning_rate": 1.9892017873249588e-05, "loss": 0.4397987723350525, "step": 766 }, { "epoch": 0.1895231035334816, "grad_norm": 1.1226540289767597, "learning_rate": 1.9891418228857518e-05, "loss": 0.46170419454574585, "step": 767 }, { "epoch": 0.18977020014825796, "grad_norm": 1.193836102627106, "learning_rate": 1.989081693319358e-05, "loss": 0.4826369285583496, "step": 768 }, { "epoch": 0.19001729676303436, "grad_norm": 1.1964171681740532, "learning_rate": 1.9890213986358148e-05, "loss": 0.428552508354187, "step": 769 }, { "epoch": 0.19026439337781073, "grad_norm": 1.204097587333347, "learning_rate": 1.9889609388451886e-05, "loss": 0.4499293565750122, "step": 770 }, { "epoch": 0.1905114899925871, "grad_norm": 1.128806160070485, "learning_rate": 1.988900313957572e-05, "loss": 0.48148253560066223, "step": 771 }, { "epoch": 0.19075858660736347, "grad_norm": 1.1880037183345005, "learning_rate": 1.988839523983086e-05, "loss": 0.49272531270980835, "step": 772 }, { "epoch": 0.19100568322213984, "grad_norm": 1.2526028817229218, "learning_rate": 1.9887785689318786e-05, "loss": 0.4537902772426605, "step": 773 }, { "epoch": 0.19125277983691624, "grad_norm": 1.137600830934191, "learning_rate": 1.988717448814126e-05, "loss": 0.40033307671546936, "step": 774 }, { "epoch": 0.1914998764516926, "grad_norm": 1.1457764011628353, "learning_rate": 1.988656163640031e-05, "loss": 0.4247094690799713, "step": 775 }, { "epoch": 0.19174697306646898, "grad_norm": 1.2749881672641417, "learning_rate": 1.9885947134198246e-05, "loss": 0.493020236492157, "step": 776 }, { "epoch": 0.19199406968124536, "grad_norm": 1.1695601181903847, "learning_rate": 1.988533098163766e-05, "loss": 0.46678227186203003, "step": 777 }, { "epoch": 0.19224116629602175, "grad_norm": 1.1044167137317416, "learning_rate": 1.988471317882141e-05, "loss": 0.3980989158153534, "step": 778 }, { "epoch": 0.19248826291079812, "grad_norm": 1.2574428461649707, "learning_rate": 1.988409372585263e-05, "loss": 0.4914180040359497, "step": 779 }, { "epoch": 0.1927353595255745, "grad_norm": 1.0705910292776408, "learning_rate": 1.9883472622834724e-05, "loss": 0.3984605073928833, "step": 780 }, { "epoch": 0.19298245614035087, "grad_norm": 1.0835584799471303, "learning_rate": 1.988284986987139e-05, "loss": 0.4409230351448059, "step": 781 }, { "epoch": 0.19322955275512727, "grad_norm": 1.0309477300525542, "learning_rate": 1.988222546706659e-05, "loss": 0.4493047297000885, "step": 782 }, { "epoch": 0.19347664936990364, "grad_norm": 1.017862954281258, "learning_rate": 1.988159941452456e-05, "loss": 0.45687177777290344, "step": 783 }, { "epoch": 0.19372374598468, "grad_norm": 1.16445949262716, "learning_rate": 1.9880971712349805e-05, "loss": 0.5048147439956665, "step": 784 }, { "epoch": 0.19397084259945638, "grad_norm": 1.1071198367942612, "learning_rate": 1.9880342360647122e-05, "loss": 0.4079487919807434, "step": 785 }, { "epoch": 0.19421793921423278, "grad_norm": 1.2279561932105587, "learning_rate": 1.987971135952157e-05, "loss": 0.5033586621284485, "step": 786 }, { "epoch": 0.19446503582900915, "grad_norm": 1.1081112197316116, "learning_rate": 1.9879078709078494e-05, "loss": 0.4194066524505615, "step": 787 }, { "epoch": 0.19471213244378552, "grad_norm": 1.0871112362083646, "learning_rate": 1.9878444409423505e-05, "loss": 0.424580842256546, "step": 788 }, { "epoch": 0.1949592290585619, "grad_norm": 1.1142835092786934, "learning_rate": 1.9877808460662494e-05, "loss": 0.44802242517471313, "step": 789 }, { "epoch": 0.1952063256733383, "grad_norm": 1.1534666877271973, "learning_rate": 1.9877170862901623e-05, "loss": 0.4184938669204712, "step": 790 }, { "epoch": 0.19545342228811466, "grad_norm": 1.1876078082548722, "learning_rate": 1.9876531616247335e-05, "loss": 0.4538682997226715, "step": 791 }, { "epoch": 0.19570051890289103, "grad_norm": 1.0886193183829587, "learning_rate": 1.987589072080635e-05, "loss": 0.40222442150115967, "step": 792 }, { "epoch": 0.1959476155176674, "grad_norm": 1.112459036661285, "learning_rate": 1.9875248176685652e-05, "loss": 0.4680640995502472, "step": 793 }, { "epoch": 0.1961947121324438, "grad_norm": 1.0092020247550284, "learning_rate": 1.9874603983992513e-05, "loss": 0.36210349202156067, "step": 794 }, { "epoch": 0.19644180874722017, "grad_norm": 1.047426082867141, "learning_rate": 1.987395814283447e-05, "loss": 0.4174862205982208, "step": 795 }, { "epoch": 0.19668890536199654, "grad_norm": 1.0347786274244153, "learning_rate": 1.9873310653319337e-05, "loss": 0.39218389987945557, "step": 796 }, { "epoch": 0.1969360019767729, "grad_norm": 1.154700665527125, "learning_rate": 1.9872661515555213e-05, "loss": 0.46048712730407715, "step": 797 }, { "epoch": 0.19718309859154928, "grad_norm": 1.1035333195424741, "learning_rate": 1.987201072965046e-05, "loss": 0.3974722921848297, "step": 798 }, { "epoch": 0.19743019520632568, "grad_norm": 1.2573667825278512, "learning_rate": 1.9871358295713726e-05, "loss": 0.5071564316749573, "step": 799 }, { "epoch": 0.19767729182110205, "grad_norm": 1.2056339989169698, "learning_rate": 1.9870704213853922e-05, "loss": 0.5213316082954407, "step": 800 }, { "epoch": 0.19792438843587842, "grad_norm": 1.2011170574896173, "learning_rate": 1.987004848418024e-05, "loss": 0.47286832332611084, "step": 801 }, { "epoch": 0.1981714850506548, "grad_norm": 1.0669337321520058, "learning_rate": 1.9869391106802152e-05, "loss": 0.46117568016052246, "step": 802 }, { "epoch": 0.1984185816654312, "grad_norm": 1.1499843341364637, "learning_rate": 1.98687320818294e-05, "loss": 0.44226139783859253, "step": 803 }, { "epoch": 0.19866567828020756, "grad_norm": 1.0996753924172085, "learning_rate": 1.9868071409371997e-05, "loss": 0.4229026436805725, "step": 804 }, { "epoch": 0.19891277489498393, "grad_norm": 1.245190608990723, "learning_rate": 1.986740908954024e-05, "loss": 0.47559359669685364, "step": 805 }, { "epoch": 0.1991598715097603, "grad_norm": 1.081556174052948, "learning_rate": 1.9866745122444695e-05, "loss": 0.4453134536743164, "step": 806 }, { "epoch": 0.1994069681245367, "grad_norm": 1.0976592664332583, "learning_rate": 1.9866079508196203e-05, "loss": 0.3938356637954712, "step": 807 }, { "epoch": 0.19965406473931308, "grad_norm": 1.0360338443018773, "learning_rate": 1.9865412246905883e-05, "loss": 0.44556254148483276, "step": 808 }, { "epoch": 0.19990116135408945, "grad_norm": 1.1484124543935958, "learning_rate": 1.9864743338685132e-05, "loss": 0.41235411167144775, "step": 809 }, { "epoch": 0.20014825796886582, "grad_norm": 1.0140999127659223, "learning_rate": 1.9864072783645608e-05, "loss": 0.4642380475997925, "step": 810 }, { "epoch": 0.20039535458364222, "grad_norm": 1.0690667916483283, "learning_rate": 1.986340058189926e-05, "loss": 0.3592289090156555, "step": 811 }, { "epoch": 0.2006424511984186, "grad_norm": 1.1185973100752487, "learning_rate": 1.9862726733558305e-05, "loss": 0.4772152304649353, "step": 812 }, { "epoch": 0.20088954781319496, "grad_norm": 1.2185579038075558, "learning_rate": 1.9862051238735233e-05, "loss": 0.49502289295196533, "step": 813 }, { "epoch": 0.20113664442797133, "grad_norm": 1.1975375887318747, "learning_rate": 1.986137409754281e-05, "loss": 0.5039775371551514, "step": 814 }, { "epoch": 0.20138374104274773, "grad_norm": 0.9516012758542114, "learning_rate": 1.9860695310094084e-05, "loss": 0.3731744885444641, "step": 815 }, { "epoch": 0.2016308376575241, "grad_norm": 1.2658351377607509, "learning_rate": 1.9860014876502363e-05, "loss": 0.5002920031547546, "step": 816 }, { "epoch": 0.20187793427230047, "grad_norm": 1.2055878441922978, "learning_rate": 1.9859332796881247e-05, "loss": 0.48933857679367065, "step": 817 }, { "epoch": 0.20212503088707684, "grad_norm": 1.0840894496160187, "learning_rate": 1.9858649071344596e-05, "loss": 0.3761183023452759, "step": 818 }, { "epoch": 0.20237212750185324, "grad_norm": 1.138234291544235, "learning_rate": 1.985796370000655e-05, "loss": 0.4086572229862213, "step": 819 }, { "epoch": 0.2026192241166296, "grad_norm": 0.9672360236273894, "learning_rate": 1.9857276682981532e-05, "loss": 0.39252805709838867, "step": 820 }, { "epoch": 0.20286632073140598, "grad_norm": 1.1840617769790864, "learning_rate": 1.985658802038423e-05, "loss": 0.4208684265613556, "step": 821 }, { "epoch": 0.20311341734618235, "grad_norm": 1.0519504523225072, "learning_rate": 1.9855897712329605e-05, "loss": 0.4187632203102112, "step": 822 }, { "epoch": 0.20336051396095872, "grad_norm": 1.0389314852671254, "learning_rate": 1.9855205758932898e-05, "loss": 0.43199169635772705, "step": 823 }, { "epoch": 0.20360761057573512, "grad_norm": 1.2230972053460143, "learning_rate": 1.9854512160309626e-05, "loss": 0.43565845489501953, "step": 824 }, { "epoch": 0.2038547071905115, "grad_norm": 1.030340535378089, "learning_rate": 1.985381691657558e-05, "loss": 0.37656503915786743, "step": 825 }, { "epoch": 0.20410180380528786, "grad_norm": 1.1041338684706876, "learning_rate": 1.985312002784682e-05, "loss": 0.3866596221923828, "step": 826 }, { "epoch": 0.20434890042006423, "grad_norm": 1.225806021105272, "learning_rate": 1.9852421494239683e-05, "loss": 0.47959548234939575, "step": 827 }, { "epoch": 0.20459599703484063, "grad_norm": 1.0668809765792466, "learning_rate": 1.985172131587079e-05, "loss": 0.39571720361709595, "step": 828 }, { "epoch": 0.204843093649617, "grad_norm": 1.068299407731187, "learning_rate": 1.985101949285702e-05, "loss": 0.3746510148048401, "step": 829 }, { "epoch": 0.20509019026439337, "grad_norm": 1.170908435647061, "learning_rate": 1.985031602531554e-05, "loss": 0.4736773371696472, "step": 830 }, { "epoch": 0.20533728687916974, "grad_norm": 1.198239775993358, "learning_rate": 1.9849610913363783e-05, "loss": 0.4349817633628845, "step": 831 }, { "epoch": 0.20558438349394614, "grad_norm": 1.180370055613535, "learning_rate": 1.9848904157119465e-05, "loss": 0.4213574230670929, "step": 832 }, { "epoch": 0.20583148010872251, "grad_norm": 1.0278810703291925, "learning_rate": 1.984819575670057e-05, "loss": 0.4303247928619385, "step": 833 }, { "epoch": 0.20607857672349889, "grad_norm": 1.202425472471468, "learning_rate": 1.9847485712225358e-05, "loss": 0.483460396528244, "step": 834 }, { "epoch": 0.20632567333827526, "grad_norm": 1.1917412979561246, "learning_rate": 1.9846774023812366e-05, "loss": 0.46379464864730835, "step": 835 }, { "epoch": 0.20657276995305165, "grad_norm": 1.2083074450058011, "learning_rate": 1.98460606915804e-05, "loss": 0.5064494609832764, "step": 836 }, { "epoch": 0.20681986656782803, "grad_norm": 1.154295534680605, "learning_rate": 1.984534571564854e-05, "loss": 0.46369293332099915, "step": 837 }, { "epoch": 0.2070669631826044, "grad_norm": 1.1300275772339379, "learning_rate": 1.984462909613615e-05, "loss": 0.4572177231311798, "step": 838 }, { "epoch": 0.20731405979738077, "grad_norm": 1.051476476863679, "learning_rate": 1.984391083316286e-05, "loss": 0.40104061365127563, "step": 839 }, { "epoch": 0.20756115641215717, "grad_norm": 1.1488166526811252, "learning_rate": 1.984319092684858e-05, "loss": 0.5601277947425842, "step": 840 }, { "epoch": 0.20780825302693354, "grad_norm": 0.9224106084879157, "learning_rate": 1.9842469377313486e-05, "loss": 0.3732836842536926, "step": 841 }, { "epoch": 0.2080553496417099, "grad_norm": 0.9883257984368421, "learning_rate": 1.984174618467804e-05, "loss": 0.34942126274108887, "step": 842 }, { "epoch": 0.20830244625648628, "grad_norm": 1.3470319905369827, "learning_rate": 1.9841021349062965e-05, "loss": 0.4080442190170288, "step": 843 }, { "epoch": 0.20854954287126268, "grad_norm": 0.9534236639738287, "learning_rate": 1.9840294870589266e-05, "loss": 0.3888307213783264, "step": 844 }, { "epoch": 0.20879663948603905, "grad_norm": 1.0099357541851268, "learning_rate": 1.9839566749378223e-05, "loss": 0.3959799110889435, "step": 845 }, { "epoch": 0.20904373610081542, "grad_norm": 1.1826236173912945, "learning_rate": 1.9838836985551388e-05, "loss": 0.4786267876625061, "step": 846 }, { "epoch": 0.2092908327155918, "grad_norm": 1.0689244493367347, "learning_rate": 1.9838105579230592e-05, "loss": 0.4108949303627014, "step": 847 }, { "epoch": 0.20953792933036816, "grad_norm": 1.0868125177490278, "learning_rate": 1.9837372530537925e-05, "loss": 0.44886505603790283, "step": 848 }, { "epoch": 0.20978502594514456, "grad_norm": 1.2427969200319946, "learning_rate": 1.9836637839595775e-05, "loss": 0.46155431866645813, "step": 849 }, { "epoch": 0.21003212255992093, "grad_norm": 1.1030187762810795, "learning_rate": 1.983590150652678e-05, "loss": 0.3370492160320282, "step": 850 }, { "epoch": 0.2102792191746973, "grad_norm": 1.377692837414968, "learning_rate": 1.983516353145387e-05, "loss": 0.4872419238090515, "step": 851 }, { "epoch": 0.21052631578947367, "grad_norm": 1.1782115933290023, "learning_rate": 1.983442391450024e-05, "loss": 0.45917218923568726, "step": 852 }, { "epoch": 0.21077341240425007, "grad_norm": 1.0139616983811004, "learning_rate": 1.9833682655789364e-05, "loss": 0.3656752109527588, "step": 853 }, { "epoch": 0.21102050901902644, "grad_norm": 1.1747822014699134, "learning_rate": 1.9832939755444983e-05, "loss": 0.3905158042907715, "step": 854 }, { "epoch": 0.2112676056338028, "grad_norm": 1.295428505765818, "learning_rate": 1.983219521359112e-05, "loss": 0.47880038619041443, "step": 855 }, { "epoch": 0.21151470224857918, "grad_norm": 1.223728672600716, "learning_rate": 1.983144903035207e-05, "loss": 0.45634448528289795, "step": 856 }, { "epoch": 0.21176179886335558, "grad_norm": 1.1351538796089693, "learning_rate": 1.9830701205852397e-05, "loss": 0.4098327159881592, "step": 857 }, { "epoch": 0.21200889547813195, "grad_norm": 1.2010492993603372, "learning_rate": 1.9829951740216947e-05, "loss": 0.4307943880558014, "step": 858 }, { "epoch": 0.21225599209290832, "grad_norm": 1.1315668673530748, "learning_rate": 1.9829200633570832e-05, "loss": 0.40949738025665283, "step": 859 }, { "epoch": 0.2125030887076847, "grad_norm": 1.1097249773149271, "learning_rate": 1.982844788603944e-05, "loss": 0.4210088849067688, "step": 860 }, { "epoch": 0.2127501853224611, "grad_norm": 1.1474628077779887, "learning_rate": 1.9827693497748443e-05, "loss": 0.4282582402229309, "step": 861 }, { "epoch": 0.21299728193723746, "grad_norm": 1.175062455374597, "learning_rate": 1.982693746882377e-05, "loss": 0.4423362612724304, "step": 862 }, { "epoch": 0.21324437855201384, "grad_norm": 1.0799160646436212, "learning_rate": 1.9826179799391636e-05, "loss": 0.4121166467666626, "step": 863 }, { "epoch": 0.2134914751667902, "grad_norm": 1.116949542687322, "learning_rate": 1.9825420489578528e-05, "loss": 0.5015184283256531, "step": 864 }, { "epoch": 0.2137385717815666, "grad_norm": 1.1901853505505882, "learning_rate": 1.9824659539511196e-05, "loss": 0.4162989854812622, "step": 865 }, { "epoch": 0.21398566839634298, "grad_norm": 1.0228482995962467, "learning_rate": 1.9823896949316683e-05, "loss": 0.39073967933654785, "step": 866 }, { "epoch": 0.21423276501111935, "grad_norm": 1.2098547130714716, "learning_rate": 1.9823132719122293e-05, "loss": 0.46378013491630554, "step": 867 }, { "epoch": 0.21447986162589572, "grad_norm": 1.1108591723155568, "learning_rate": 1.9822366849055603e-05, "loss": 0.3737980127334595, "step": 868 }, { "epoch": 0.2147269582406721, "grad_norm": 1.107812220671532, "learning_rate": 1.9821599339244472e-05, "loss": 0.4155998229980469, "step": 869 }, { "epoch": 0.2149740548554485, "grad_norm": 1.1551100545110615, "learning_rate": 1.9820830189817028e-05, "loss": 0.44715601205825806, "step": 870 }, { "epoch": 0.21522115147022486, "grad_norm": 1.0393917955557987, "learning_rate": 1.9820059400901668e-05, "loss": 0.4123138189315796, "step": 871 }, { "epoch": 0.21546824808500123, "grad_norm": 1.1235788493966297, "learning_rate": 1.9819286972627066e-05, "loss": 0.33557364344596863, "step": 872 }, { "epoch": 0.2157153446997776, "grad_norm": 1.1781569019065925, "learning_rate": 1.9818512905122182e-05, "loss": 0.43855005502700806, "step": 873 }, { "epoch": 0.215962441314554, "grad_norm": 1.0490010438638369, "learning_rate": 1.9817737198516228e-05, "loss": 0.3527181148529053, "step": 874 }, { "epoch": 0.21620953792933037, "grad_norm": 1.2767690709495338, "learning_rate": 1.9816959852938703e-05, "loss": 0.5275216698646545, "step": 875 }, { "epoch": 0.21645663454410674, "grad_norm": 1.0543445000638805, "learning_rate": 1.9816180868519382e-05, "loss": 0.39621591567993164, "step": 876 }, { "epoch": 0.2167037311588831, "grad_norm": 1.05695170272311, "learning_rate": 1.98154002453883e-05, "loss": 0.39567887783050537, "step": 877 }, { "epoch": 0.2169508277736595, "grad_norm": 1.23547377712593, "learning_rate": 1.9814617983675782e-05, "loss": 0.4710097908973694, "step": 878 }, { "epoch": 0.21719792438843588, "grad_norm": 1.2125056871681321, "learning_rate": 1.9813834083512417e-05, "loss": 0.4333864748477936, "step": 879 }, { "epoch": 0.21744502100321225, "grad_norm": 1.1746704651115691, "learning_rate": 1.9813048545029062e-05, "loss": 0.44243377447128296, "step": 880 }, { "epoch": 0.21769211761798862, "grad_norm": 1.1952324829728123, "learning_rate": 1.9812261368356865e-05, "loss": 0.426582396030426, "step": 881 }, { "epoch": 0.21793921423276502, "grad_norm": 1.179542271219341, "learning_rate": 1.9811472553627228e-05, "loss": 0.429448664188385, "step": 882 }, { "epoch": 0.2181863108475414, "grad_norm": 1.1897842033025465, "learning_rate": 1.9810682100971844e-05, "loss": 0.4633028507232666, "step": 883 }, { "epoch": 0.21843340746231776, "grad_norm": 1.164937849205093, "learning_rate": 1.980989001052266e-05, "loss": 0.4270232915878296, "step": 884 }, { "epoch": 0.21868050407709413, "grad_norm": 1.0835663713004768, "learning_rate": 1.980909628241192e-05, "loss": 0.3612966239452362, "step": 885 }, { "epoch": 0.21892760069187053, "grad_norm": 4.873807649759417, "learning_rate": 1.9808300916772122e-05, "loss": 0.4387701749801636, "step": 886 }, { "epoch": 0.2191746973066469, "grad_norm": 1.065900032304899, "learning_rate": 1.9807503913736048e-05, "loss": 0.39141416549682617, "step": 887 }, { "epoch": 0.21942179392142327, "grad_norm": 0.9909666785973752, "learning_rate": 1.9806705273436745e-05, "loss": 0.446305513381958, "step": 888 }, { "epoch": 0.21966889053619965, "grad_norm": 1.0170690542379286, "learning_rate": 1.980590499600754e-05, "loss": 0.4153711199760437, "step": 889 }, { "epoch": 0.21991598715097604, "grad_norm": 1.24781718241342, "learning_rate": 1.9805103081582032e-05, "loss": 0.49810218811035156, "step": 890 }, { "epoch": 0.22016308376575242, "grad_norm": 1.2204724913074672, "learning_rate": 1.9804299530294087e-05, "loss": 0.39662298560142517, "step": 891 }, { "epoch": 0.2204101803805288, "grad_norm": 1.0183406353604663, "learning_rate": 1.9803494342277863e-05, "loss": 0.38749244809150696, "step": 892 }, { "epoch": 0.22065727699530516, "grad_norm": 1.1113969334291343, "learning_rate": 1.9802687517667764e-05, "loss": 0.4343857765197754, "step": 893 }, { "epoch": 0.22090437361008153, "grad_norm": 0.9745278822029796, "learning_rate": 1.9801879056598484e-05, "loss": 0.3820934593677521, "step": 894 }, { "epoch": 0.22115147022485793, "grad_norm": 1.1355188345583445, "learning_rate": 1.9801068959204994e-05, "loss": 0.3905976414680481, "step": 895 }, { "epoch": 0.2213985668396343, "grad_norm": 1.1631914671800272, "learning_rate": 1.9800257225622526e-05, "loss": 0.4388422966003418, "step": 896 }, { "epoch": 0.22164566345441067, "grad_norm": 1.2966362539087068, "learning_rate": 1.979944385598659e-05, "loss": 0.4061671495437622, "step": 897 }, { "epoch": 0.22189276006918704, "grad_norm": 0.9844198720024753, "learning_rate": 1.979862885043298e-05, "loss": 0.43253248929977417, "step": 898 }, { "epoch": 0.22213985668396344, "grad_norm": 1.0616019467631577, "learning_rate": 1.9797812209097735e-05, "loss": 0.4516317844390869, "step": 899 }, { "epoch": 0.2223869532987398, "grad_norm": 1.1556527993756085, "learning_rate": 1.9796993932117198e-05, "loss": 0.4270211458206177, "step": 900 }, { "epoch": 0.22263404991351618, "grad_norm": 1.0926331984591804, "learning_rate": 1.979617401962797e-05, "loss": 0.39187294244766235, "step": 901 }, { "epoch": 0.22288114652829255, "grad_norm": 1.1373556527772881, "learning_rate": 1.9795352471766925e-05, "loss": 0.5192693471908569, "step": 902 }, { "epoch": 0.22312824314306895, "grad_norm": 1.141466587323349, "learning_rate": 1.9794529288671215e-05, "loss": 0.3860773742198944, "step": 903 }, { "epoch": 0.22337533975784532, "grad_norm": 1.1525381410376065, "learning_rate": 1.9793704470478258e-05, "loss": 0.43874815106391907, "step": 904 }, { "epoch": 0.2236224363726217, "grad_norm": 1.0392203236709436, "learning_rate": 1.979287801732575e-05, "loss": 0.44389843940734863, "step": 905 }, { "epoch": 0.22386953298739806, "grad_norm": 1.197988249635099, "learning_rate": 1.979204992935166e-05, "loss": 0.42189332842826843, "step": 906 }, { "epoch": 0.22411662960217446, "grad_norm": 1.2271897553026985, "learning_rate": 1.979122020669423e-05, "loss": 0.4262953996658325, "step": 907 }, { "epoch": 0.22436372621695083, "grad_norm": 1.1378039512257312, "learning_rate": 1.9790388849491972e-05, "loss": 0.3709666132926941, "step": 908 }, { "epoch": 0.2246108228317272, "grad_norm": 1.281366268776768, "learning_rate": 1.9789555857883674e-05, "loss": 0.459900826215744, "step": 909 }, { "epoch": 0.22485791944650357, "grad_norm": 1.1547721394670298, "learning_rate": 1.9788721232008396e-05, "loss": 0.4507795572280884, "step": 910 }, { "epoch": 0.22510501606127997, "grad_norm": 1.0419751777119297, "learning_rate": 1.978788497200547e-05, "loss": 0.38431257009506226, "step": 911 }, { "epoch": 0.22535211267605634, "grad_norm": 1.1831351867780673, "learning_rate": 1.9787047078014496e-05, "loss": 0.47240760922431946, "step": 912 }, { "epoch": 0.22559920929083271, "grad_norm": 1.0578969690464102, "learning_rate": 1.978620755017536e-05, "loss": 0.41096770763397217, "step": 913 }, { "epoch": 0.22584630590560908, "grad_norm": 1.1663581672151955, "learning_rate": 1.9785366388628205e-05, "loss": 0.46030735969543457, "step": 914 }, { "epoch": 0.22609340252038548, "grad_norm": 1.1213510820579178, "learning_rate": 1.978452359351346e-05, "loss": 0.43776339292526245, "step": 915 }, { "epoch": 0.22634049913516185, "grad_norm": 1.2640880120758267, "learning_rate": 1.978367916497182e-05, "loss": 0.5045658349990845, "step": 916 }, { "epoch": 0.22658759574993823, "grad_norm": 1.1197353475049903, "learning_rate": 1.9782833103144252e-05, "loss": 0.41927409172058105, "step": 917 }, { "epoch": 0.2268346923647146, "grad_norm": 1.0798726804304006, "learning_rate": 1.9781985408172e-05, "loss": 0.4073624610900879, "step": 918 }, { "epoch": 0.22708178897949097, "grad_norm": 1.2237052092962255, "learning_rate": 1.9781136080196576e-05, "loss": 0.4203270673751831, "step": 919 }, { "epoch": 0.22732888559426737, "grad_norm": 1.1781005030790432, "learning_rate": 1.9780285119359766e-05, "loss": 0.4441659450531006, "step": 920 }, { "epoch": 0.22757598220904374, "grad_norm": 1.1871631130582472, "learning_rate": 1.9779432525803634e-05, "loss": 0.4455307424068451, "step": 921 }, { "epoch": 0.2278230788238201, "grad_norm": 1.3062516098946984, "learning_rate": 1.9778578299670503e-05, "loss": 0.5188876986503601, "step": 922 }, { "epoch": 0.22807017543859648, "grad_norm": 1.2051180581919754, "learning_rate": 1.9777722441102987e-05, "loss": 0.43373000621795654, "step": 923 }, { "epoch": 0.22831727205337288, "grad_norm": 1.0847244302334935, "learning_rate": 1.9776864950243954e-05, "loss": 0.32963740825653076, "step": 924 }, { "epoch": 0.22856436866814925, "grad_norm": 1.1464574457577874, "learning_rate": 1.9776005827236563e-05, "loss": 0.443924218416214, "step": 925 }, { "epoch": 0.22881146528292562, "grad_norm": 1.1712424472038026, "learning_rate": 1.9775145072224226e-05, "loss": 0.4477020502090454, "step": 926 }, { "epoch": 0.229058561897702, "grad_norm": 1.1447914655362637, "learning_rate": 1.9774282685350647e-05, "loss": 0.44976383447647095, "step": 927 }, { "epoch": 0.2293056585124784, "grad_norm": 1.0820079666358249, "learning_rate": 1.9773418666759784e-05, "loss": 0.41851282119750977, "step": 928 }, { "epoch": 0.22955275512725476, "grad_norm": 1.0698064618250505, "learning_rate": 1.9772553016595882e-05, "loss": 0.33433353900909424, "step": 929 }, { "epoch": 0.22979985174203113, "grad_norm": 1.2106785932376274, "learning_rate": 1.9771685735003453e-05, "loss": 0.446154922246933, "step": 930 }, { "epoch": 0.2300469483568075, "grad_norm": 1.2768877900281226, "learning_rate": 1.9770816822127276e-05, "loss": 0.473537802696228, "step": 931 }, { "epoch": 0.2302940449715839, "grad_norm": 1.0632441495681915, "learning_rate": 1.9769946278112407e-05, "loss": 0.42566007375717163, "step": 932 }, { "epoch": 0.23054114158636027, "grad_norm": 1.1725339609360996, "learning_rate": 1.976907410310418e-05, "loss": 0.4781855046749115, "step": 933 }, { "epoch": 0.23078823820113664, "grad_norm": 1.018863673496545, "learning_rate": 1.9768200297248193e-05, "loss": 0.3831608295440674, "step": 934 }, { "epoch": 0.231035334815913, "grad_norm": 1.2910064315547616, "learning_rate": 1.976732486069032e-05, "loss": 0.47399985790252686, "step": 935 }, { "epoch": 0.2312824314306894, "grad_norm": 1.1194787930354824, "learning_rate": 1.97664477935767e-05, "loss": 0.4876324236392975, "step": 936 }, { "epoch": 0.23152952804546578, "grad_norm": 1.1917383149276963, "learning_rate": 1.976556909605376e-05, "loss": 0.5112042427062988, "step": 937 }, { "epoch": 0.23177662466024215, "grad_norm": 1.15431766971889, "learning_rate": 1.9764688768268185e-05, "loss": 0.4082554578781128, "step": 938 }, { "epoch": 0.23202372127501852, "grad_norm": 1.0471766594193856, "learning_rate": 1.9763806810366936e-05, "loss": 0.39087826013565063, "step": 939 }, { "epoch": 0.23227081788979492, "grad_norm": 1.2287024008760532, "learning_rate": 1.976292322249725e-05, "loss": 0.5170259475708008, "step": 940 }, { "epoch": 0.2325179145045713, "grad_norm": 1.0733431628754473, "learning_rate": 1.9762038004806626e-05, "loss": 0.38876664638519287, "step": 941 }, { "epoch": 0.23276501111934766, "grad_norm": 1.1142797943645342, "learning_rate": 1.976115115744285e-05, "loss": 0.39602982997894287, "step": 942 }, { "epoch": 0.23301210773412404, "grad_norm": 1.0703957003442015, "learning_rate": 1.976026268055397e-05, "loss": 0.3542947769165039, "step": 943 }, { "epoch": 0.2332592043489004, "grad_norm": 1.1215297423551163, "learning_rate": 1.9759372574288304e-05, "loss": 0.44698208570480347, "step": 944 }, { "epoch": 0.2335063009636768, "grad_norm": 1.1949425771776032, "learning_rate": 1.9758480838794453e-05, "loss": 0.47489792108535767, "step": 945 }, { "epoch": 0.23375339757845318, "grad_norm": 1.154599058369444, "learning_rate": 1.9757587474221282e-05, "loss": 0.488805890083313, "step": 946 }, { "epoch": 0.23400049419322955, "grad_norm": 1.1390554198279077, "learning_rate": 1.975669248071792e-05, "loss": 0.47275498509407043, "step": 947 }, { "epoch": 0.23424759080800592, "grad_norm": 0.9923061750560411, "learning_rate": 1.975579585843379e-05, "loss": 0.371770977973938, "step": 948 }, { "epoch": 0.23449468742278232, "grad_norm": 1.0947800063088344, "learning_rate": 1.9754897607518567e-05, "loss": 0.4177151918411255, "step": 949 }, { "epoch": 0.2347417840375587, "grad_norm": 1.305833214418974, "learning_rate": 1.9753997728122206e-05, "loss": 0.4322340488433838, "step": 950 }, { "epoch": 0.23498888065233506, "grad_norm": 1.2489015762808102, "learning_rate": 1.975309622039493e-05, "loss": 0.4439646601676941, "step": 951 }, { "epoch": 0.23523597726711143, "grad_norm": 1.1574322658707696, "learning_rate": 1.975219308448724e-05, "loss": 0.4425225853919983, "step": 952 }, { "epoch": 0.23548307388188783, "grad_norm": 1.1188336019704697, "learning_rate": 1.9751288320549907e-05, "loss": 0.47763651609420776, "step": 953 }, { "epoch": 0.2357301704966642, "grad_norm": 1.0645221207273532, "learning_rate": 1.9750381928733965e-05, "loss": 0.39440521597862244, "step": 954 }, { "epoch": 0.23597726711144057, "grad_norm": 1.2828607104600704, "learning_rate": 1.9749473909190736e-05, "loss": 0.4643334746360779, "step": 955 }, { "epoch": 0.23622436372621694, "grad_norm": 1.3094504134263802, "learning_rate": 1.9748564262071798e-05, "loss": 0.47076213359832764, "step": 956 }, { "epoch": 0.23647146034099334, "grad_norm": 1.2805930833715788, "learning_rate": 1.974765298752901e-05, "loss": 0.37634938955307007, "step": 957 }, { "epoch": 0.2367185569557697, "grad_norm": 1.1679656983718503, "learning_rate": 1.97467400857145e-05, "loss": 0.4317474067211151, "step": 958 }, { "epoch": 0.23696565357054608, "grad_norm": 1.204818353900437, "learning_rate": 1.974582555678067e-05, "loss": 0.45420774817466736, "step": 959 }, { "epoch": 0.23721275018532245, "grad_norm": 1.1175029375599337, "learning_rate": 1.9744909400880185e-05, "loss": 0.40167996287345886, "step": 960 }, { "epoch": 0.23745984680009885, "grad_norm": 1.122910419348738, "learning_rate": 1.974399161816599e-05, "loss": 0.4025406837463379, "step": 961 }, { "epoch": 0.23770694341487522, "grad_norm": 1.0671040564828522, "learning_rate": 1.9743072208791308e-05, "loss": 0.456535279750824, "step": 962 }, { "epoch": 0.2379540400296516, "grad_norm": 1.0511638279807305, "learning_rate": 1.974215117290961e-05, "loss": 0.4253663122653961, "step": 963 }, { "epoch": 0.23820113664442796, "grad_norm": 1.0621617508953056, "learning_rate": 1.974122851067467e-05, "loss": 0.4700334966182709, "step": 964 }, { "epoch": 0.23844823325920436, "grad_norm": 1.1807385741706133, "learning_rate": 1.9740304222240502e-05, "loss": 0.5127362012863159, "step": 965 }, { "epoch": 0.23869532987398073, "grad_norm": 1.0935611047772291, "learning_rate": 1.9739378307761418e-05, "loss": 0.4163603186607361, "step": 966 }, { "epoch": 0.2389424264887571, "grad_norm": 1.1307307257182448, "learning_rate": 1.973845076739198e-05, "loss": 0.46414682269096375, "step": 967 }, { "epoch": 0.23918952310353347, "grad_norm": 1.2320345135249213, "learning_rate": 1.9737521601287045e-05, "loss": 0.45461881160736084, "step": 968 }, { "epoch": 0.23943661971830985, "grad_norm": 1.0034021390242835, "learning_rate": 1.9736590809601714e-05, "loss": 0.3990170955657959, "step": 969 }, { "epoch": 0.23968371633308624, "grad_norm": 1.0914923427035677, "learning_rate": 1.9735658392491378e-05, "loss": 0.4701874256134033, "step": 970 }, { "epoch": 0.23993081294786262, "grad_norm": 1.1856628823191253, "learning_rate": 1.97347243501117e-05, "loss": 0.4762086868286133, "step": 971 }, { "epoch": 0.24017790956263899, "grad_norm": 1.0672832974718982, "learning_rate": 1.9733788682618602e-05, "loss": 0.3683438301086426, "step": 972 }, { "epoch": 0.24042500617741536, "grad_norm": 1.174215161505029, "learning_rate": 1.9732851390168286e-05, "loss": 0.4920666813850403, "step": 973 }, { "epoch": 0.24067210279219176, "grad_norm": 1.2704917191621152, "learning_rate": 1.9731912472917223e-05, "loss": 0.41409969329833984, "step": 974 }, { "epoch": 0.24091919940696813, "grad_norm": 1.1635529262674285, "learning_rate": 1.973097193102216e-05, "loss": 0.49085360765457153, "step": 975 }, { "epoch": 0.2411662960217445, "grad_norm": 1.1589675231385745, "learning_rate": 1.973002976464011e-05, "loss": 0.44926607608795166, "step": 976 }, { "epoch": 0.24141339263652087, "grad_norm": 1.210694384673921, "learning_rate": 1.972908597392835e-05, "loss": 0.405539870262146, "step": 977 }, { "epoch": 0.24166048925129727, "grad_norm": 1.1789394788496121, "learning_rate": 1.9728140559044443e-05, "loss": 0.3717843294143677, "step": 978 }, { "epoch": 0.24190758586607364, "grad_norm": 1.0867291754548596, "learning_rate": 1.9727193520146217e-05, "loss": 0.44104743003845215, "step": 979 }, { "epoch": 0.24215468248085, "grad_norm": 1.3253205864826723, "learning_rate": 1.972624485739177e-05, "loss": 0.45477569103240967, "step": 980 }, { "epoch": 0.24240177909562638, "grad_norm": 1.1072798194101994, "learning_rate": 1.972529457093947e-05, "loss": 0.4285319745540619, "step": 981 }, { "epoch": 0.24264887571040278, "grad_norm": 1.0660555448693234, "learning_rate": 1.9724342660947956e-05, "loss": 0.44698792695999146, "step": 982 }, { "epoch": 0.24289597232517915, "grad_norm": 1.0614411331881624, "learning_rate": 1.9723389127576147e-05, "loss": 0.3679320216178894, "step": 983 }, { "epoch": 0.24314306893995552, "grad_norm": 1.1211083791428682, "learning_rate": 1.972243397098322e-05, "loss": 0.42675966024398804, "step": 984 }, { "epoch": 0.2433901655547319, "grad_norm": 1.205558628190739, "learning_rate": 1.9721477191328623e-05, "loss": 0.45698660612106323, "step": 985 }, { "epoch": 0.2436372621695083, "grad_norm": 1.1790274719641112, "learning_rate": 1.972051878877209e-05, "loss": 0.4590452015399933, "step": 986 }, { "epoch": 0.24388435878428466, "grad_norm": 1.0324268596598767, "learning_rate": 1.971955876347362e-05, "loss": 0.43149232864379883, "step": 987 }, { "epoch": 0.24413145539906103, "grad_norm": 1.1541734574318636, "learning_rate": 1.9718597115593465e-05, "loss": 0.404529333114624, "step": 988 }, { "epoch": 0.2443785520138374, "grad_norm": 1.3132622245166774, "learning_rate": 1.9717633845292175e-05, "loss": 0.4787481427192688, "step": 989 }, { "epoch": 0.2446256486286138, "grad_norm": 1.1543019094825522, "learning_rate": 1.9716668952730553e-05, "loss": 0.43453770875930786, "step": 990 }, { "epoch": 0.24487274524339017, "grad_norm": 0.992918254730936, "learning_rate": 1.971570243806968e-05, "loss": 0.3369629383087158, "step": 991 }, { "epoch": 0.24511984185816654, "grad_norm": 1.226717199208219, "learning_rate": 1.97147343014709e-05, "loss": 0.4168316721916199, "step": 992 }, { "epoch": 0.2453669384729429, "grad_norm": 1.2104455827152432, "learning_rate": 1.971376454309584e-05, "loss": 0.4762820601463318, "step": 993 }, { "epoch": 0.24561403508771928, "grad_norm": 1.217659495261335, "learning_rate": 1.9712793163106392e-05, "loss": 0.41546016931533813, "step": 994 }, { "epoch": 0.24586113170249568, "grad_norm": 1.512695004944571, "learning_rate": 1.9711820161664716e-05, "loss": 0.4667038917541504, "step": 995 }, { "epoch": 0.24610822831727205, "grad_norm": 1.1050966797597737, "learning_rate": 1.971084553893324e-05, "loss": 0.391311377286911, "step": 996 }, { "epoch": 0.24635532493204843, "grad_norm": 1.0527838864641372, "learning_rate": 1.9709869295074675e-05, "loss": 0.3558835983276367, "step": 997 }, { "epoch": 0.2466024215468248, "grad_norm": 1.2096610730890511, "learning_rate": 1.970889143025199e-05, "loss": 0.41551581025123596, "step": 998 }, { "epoch": 0.2468495181616012, "grad_norm": 1.207232834564333, "learning_rate": 1.9707911944628434e-05, "loss": 0.4226401448249817, "step": 999 }, { "epoch": 0.24709661477637757, "grad_norm": 1.053143882992, "learning_rate": 1.9706930838367517e-05, "loss": 0.41790759563446045, "step": 1000 }, { "epoch": 0.24734371139115394, "grad_norm": 1.2593434180732483, "learning_rate": 1.9705948111633025e-05, "loss": 0.47771555185317993, "step": 1001 }, { "epoch": 0.2475908080059303, "grad_norm": 1.1920719936457078, "learning_rate": 1.970496376458902e-05, "loss": 0.4430827498435974, "step": 1002 }, { "epoch": 0.2478379046207067, "grad_norm": 1.2454311495215706, "learning_rate": 1.9703977797399822e-05, "loss": 0.4329947829246521, "step": 1003 }, { "epoch": 0.24808500123548308, "grad_norm": 1.300851760259806, "learning_rate": 1.9702990210230034e-05, "loss": 0.44114357233047485, "step": 1004 }, { "epoch": 0.24833209785025945, "grad_norm": 1.1533470411977607, "learning_rate": 1.970200100324452e-05, "loss": 0.428692102432251, "step": 1005 }, { "epoch": 0.24857919446503582, "grad_norm": 1.2083331330812708, "learning_rate": 1.970101017660842e-05, "loss": 0.4130925238132477, "step": 1006 }, { "epoch": 0.24882629107981222, "grad_norm": 1.1315036276502317, "learning_rate": 1.970001773048714e-05, "loss": 0.4228384494781494, "step": 1007 }, { "epoch": 0.2490733876945886, "grad_norm": 1.2344250170872542, "learning_rate": 1.9699023665046362e-05, "loss": 0.4015180468559265, "step": 1008 }, { "epoch": 0.24932048430936496, "grad_norm": 1.0768474336192737, "learning_rate": 1.969802798045203e-05, "loss": 0.4077056050300598, "step": 1009 }, { "epoch": 0.24956758092414133, "grad_norm": 1.19531692115959, "learning_rate": 1.969703067687037e-05, "loss": 0.45599764585494995, "step": 1010 }, { "epoch": 0.24981467753891773, "grad_norm": 1.0750820252472169, "learning_rate": 1.969603175446787e-05, "loss": 0.3956388533115387, "step": 1011 }, { "epoch": 0.25006177415369407, "grad_norm": 1.0997011452669914, "learning_rate": 1.9695031213411284e-05, "loss": 0.3915627598762512, "step": 1012 }, { "epoch": 0.25030887076847047, "grad_norm": 1.1848137326081876, "learning_rate": 1.969402905386765e-05, "loss": 0.4549153447151184, "step": 1013 }, { "epoch": 0.25055596738324687, "grad_norm": 1.146038150787251, "learning_rate": 1.9693025276004264e-05, "loss": 0.43965649604797363, "step": 1014 }, { "epoch": 0.2508030639980232, "grad_norm": 2.32719308539165, "learning_rate": 1.9692019879988698e-05, "loss": 0.37172946333885193, "step": 1015 }, { "epoch": 0.2510501606127996, "grad_norm": 1.070245729342349, "learning_rate": 1.9691012865988794e-05, "loss": 0.37616419792175293, "step": 1016 }, { "epoch": 0.251297257227576, "grad_norm": 1.1015028878481745, "learning_rate": 1.9690004234172657e-05, "loss": 0.3987337052822113, "step": 1017 }, { "epoch": 0.25154435384235235, "grad_norm": 1.1148558142948668, "learning_rate": 1.9688993984708674e-05, "loss": 0.4052703380584717, "step": 1018 }, { "epoch": 0.25179145045712875, "grad_norm": 1.0705930398696635, "learning_rate": 1.9687982117765494e-05, "loss": 0.4213217496871948, "step": 1019 }, { "epoch": 0.2520385470719051, "grad_norm": 1.2498952003735855, "learning_rate": 1.9686968633512037e-05, "loss": 0.3847372233867645, "step": 1020 }, { "epoch": 0.2522856436866815, "grad_norm": 1.0809832023256574, "learning_rate": 1.9685953532117494e-05, "loss": 0.43566691875457764, "step": 1021 }, { "epoch": 0.2525327403014579, "grad_norm": 1.2498820818576843, "learning_rate": 1.968493681375133e-05, "loss": 0.43988925218582153, "step": 1022 }, { "epoch": 0.25277983691623424, "grad_norm": 1.1184126486134316, "learning_rate": 1.9683918478583267e-05, "loss": 0.4012066721916199, "step": 1023 }, { "epoch": 0.25302693353101063, "grad_norm": 1.6455231563563462, "learning_rate": 1.9682898526783314e-05, "loss": 0.4017634987831116, "step": 1024 }, { "epoch": 0.253274030145787, "grad_norm": 1.1363306693190451, "learning_rate": 1.9681876958521738e-05, "loss": 0.4113745093345642, "step": 1025 }, { "epoch": 0.2535211267605634, "grad_norm": 1.4090518420247589, "learning_rate": 1.968085377396908e-05, "loss": 0.38025155663490295, "step": 1026 }, { "epoch": 0.2537682233753398, "grad_norm": 1.1898472971118412, "learning_rate": 1.967982897329615e-05, "loss": 0.4221080541610718, "step": 1027 }, { "epoch": 0.2540153199901161, "grad_norm": 1.12896613982399, "learning_rate": 1.967880255667403e-05, "loss": 0.407889723777771, "step": 1028 }, { "epoch": 0.2542624166048925, "grad_norm": 1.302473614358338, "learning_rate": 1.9677774524274067e-05, "loss": 0.5056171417236328, "step": 1029 }, { "epoch": 0.2545095132196689, "grad_norm": 1.1497301525966577, "learning_rate": 1.967674487626788e-05, "loss": 0.40268170833587646, "step": 1030 }, { "epoch": 0.25475660983444526, "grad_norm": 1.2384005641935592, "learning_rate": 1.9675713612827362e-05, "loss": 0.45338404178619385, "step": 1031 }, { "epoch": 0.25500370644922166, "grad_norm": 1.1336871179983712, "learning_rate": 1.967468073412467e-05, "loss": 0.4489847421646118, "step": 1032 }, { "epoch": 0.255250803063998, "grad_norm": 1.1657696826823596, "learning_rate": 1.9673646240332233e-05, "loss": 0.45837199687957764, "step": 1033 }, { "epoch": 0.2554978996787744, "grad_norm": 1.0830432238921832, "learning_rate": 1.9672610131622753e-05, "loss": 0.38600340485572815, "step": 1034 }, { "epoch": 0.2557449962935508, "grad_norm": 1.2134582638697315, "learning_rate": 1.9671572408169185e-05, "loss": 0.42043641209602356, "step": 1035 }, { "epoch": 0.25599209290832714, "grad_norm": 1.2132774749373725, "learning_rate": 1.9670533070144778e-05, "loss": 0.4076109528541565, "step": 1036 }, { "epoch": 0.25623918952310354, "grad_norm": 0.9673503096518458, "learning_rate": 1.9669492117723038e-05, "loss": 0.36809027194976807, "step": 1037 }, { "epoch": 0.25648628613787994, "grad_norm": 1.1612103913142757, "learning_rate": 1.9668449551077742e-05, "loss": 0.38775867223739624, "step": 1038 }, { "epoch": 0.2567333827526563, "grad_norm": 1.116691815257804, "learning_rate": 1.966740537038293e-05, "loss": 0.45195621252059937, "step": 1039 }, { "epoch": 0.2569804793674327, "grad_norm": 1.063299877252186, "learning_rate": 1.966635957581292e-05, "loss": 0.3665730059146881, "step": 1040 }, { "epoch": 0.257227575982209, "grad_norm": 1.1755301195899068, "learning_rate": 1.96653121675423e-05, "loss": 0.41702568531036377, "step": 1041 }, { "epoch": 0.2574746725969854, "grad_norm": 1.0592527396166367, "learning_rate": 1.966426314574592e-05, "loss": 0.35754960775375366, "step": 1042 }, { "epoch": 0.2577217692117618, "grad_norm": 1.0341750725063807, "learning_rate": 1.9663212510598906e-05, "loss": 0.3479679822921753, "step": 1043 }, { "epoch": 0.25796886582653816, "grad_norm": 1.129427062906733, "learning_rate": 1.966216026227665e-05, "loss": 0.4788052439689636, "step": 1044 }, { "epoch": 0.25821596244131456, "grad_norm": 1.2589643467875398, "learning_rate": 1.9661106400954816e-05, "loss": 0.45772790908813477, "step": 1045 }, { "epoch": 0.2584630590560909, "grad_norm": 1.1266755288921477, "learning_rate": 1.966005092680933e-05, "loss": 0.44863569736480713, "step": 1046 }, { "epoch": 0.2587101556708673, "grad_norm": 1.145890247258432, "learning_rate": 1.9658993840016398e-05, "loss": 0.3910033404827118, "step": 1047 }, { "epoch": 0.2589572522856437, "grad_norm": 1.085989992936848, "learning_rate": 1.965793514075249e-05, "loss": 0.3927766680717468, "step": 1048 }, { "epoch": 0.25920434890042005, "grad_norm": 1.0721735623883588, "learning_rate": 1.9656874829194342e-05, "loss": 0.4093847870826721, "step": 1049 }, { "epoch": 0.25945144551519644, "grad_norm": 1.0915430265330988, "learning_rate": 1.9655812905518965e-05, "loss": 0.413923978805542, "step": 1050 }, { "epoch": 0.25969854212997284, "grad_norm": 1.141818483178012, "learning_rate": 1.9654749369903636e-05, "loss": 0.3930321931838989, "step": 1051 }, { "epoch": 0.2599456387447492, "grad_norm": 1.1312448588788007, "learning_rate": 1.9653684222525897e-05, "loss": 0.47252947092056274, "step": 1052 }, { "epoch": 0.2601927353595256, "grad_norm": 1.2119375581618692, "learning_rate": 1.965261746356357e-05, "loss": 0.46641138195991516, "step": 1053 }, { "epoch": 0.2604398319743019, "grad_norm": 1.0724477209232561, "learning_rate": 1.9651549093194736e-05, "loss": 0.3770906925201416, "step": 1054 }, { "epoch": 0.2606869285890783, "grad_norm": 1.1742089263871507, "learning_rate": 1.965047911159775e-05, "loss": 0.4273623526096344, "step": 1055 }, { "epoch": 0.2609340252038547, "grad_norm": 1.1480806028668662, "learning_rate": 1.964940751895123e-05, "loss": 0.4691994786262512, "step": 1056 }, { "epoch": 0.26118112181863107, "grad_norm": 1.2634199854048576, "learning_rate": 1.9648334315434076e-05, "loss": 0.4207971692085266, "step": 1057 }, { "epoch": 0.26142821843340747, "grad_norm": 1.225748943572212, "learning_rate": 1.9647259501225443e-05, "loss": 0.4818498194217682, "step": 1058 }, { "epoch": 0.26167531504818387, "grad_norm": 1.182759151109803, "learning_rate": 1.964618307650476e-05, "loss": 0.44026970863342285, "step": 1059 }, { "epoch": 0.2619224116629602, "grad_norm": 1.1127323823581863, "learning_rate": 1.9645105041451725e-05, "loss": 0.42247194051742554, "step": 1060 }, { "epoch": 0.2621695082777366, "grad_norm": 1.3458378493506677, "learning_rate": 1.964402539624631e-05, "loss": 0.42947694659233093, "step": 1061 }, { "epoch": 0.26241660489251295, "grad_norm": 1.1468376900168966, "learning_rate": 1.9642944141068745e-05, "loss": 0.40158694982528687, "step": 1062 }, { "epoch": 0.26266370150728935, "grad_norm": 1.2299832196738776, "learning_rate": 1.9641861276099536e-05, "loss": 0.4724552035331726, "step": 1063 }, { "epoch": 0.26291079812206575, "grad_norm": 1.1941748287661544, "learning_rate": 1.9640776801519463e-05, "loss": 0.44411540031433105, "step": 1064 }, { "epoch": 0.2631578947368421, "grad_norm": 1.2593162448817306, "learning_rate": 1.9639690717509555e-05, "loss": 0.45720821619033813, "step": 1065 }, { "epoch": 0.2634049913516185, "grad_norm": 1.2147822149011416, "learning_rate": 1.963860302425113e-05, "loss": 0.42398035526275635, "step": 1066 }, { "epoch": 0.2636520879663949, "grad_norm": 1.2630109304478787, "learning_rate": 1.963751372192577e-05, "loss": 0.4267384707927704, "step": 1067 }, { "epoch": 0.26389918458117123, "grad_norm": 1.0372066672031, "learning_rate": 1.963642281071532e-05, "loss": 0.4228876829147339, "step": 1068 }, { "epoch": 0.26414628119594763, "grad_norm": 1.2201178867059765, "learning_rate": 1.9635330290801895e-05, "loss": 0.40443867444992065, "step": 1069 }, { "epoch": 0.264393377810724, "grad_norm": 1.0015524940208618, "learning_rate": 1.9634236162367882e-05, "loss": 0.38136938214302063, "step": 1070 }, { "epoch": 0.26464047442550037, "grad_norm": 1.162187853864899, "learning_rate": 1.9633140425595932e-05, "loss": 0.387037992477417, "step": 1071 }, { "epoch": 0.26488757104027677, "grad_norm": 1.2148274768590455, "learning_rate": 1.9632043080668977e-05, "loss": 0.41726934909820557, "step": 1072 }, { "epoch": 0.2651346676550531, "grad_norm": 1.0949981145083374, "learning_rate": 1.9630944127770193e-05, "loss": 0.43455564975738525, "step": 1073 }, { "epoch": 0.2653817642698295, "grad_norm": 1.216611910965769, "learning_rate": 1.962984356708305e-05, "loss": 0.40780362486839294, "step": 1074 }, { "epoch": 0.26562886088460586, "grad_norm": 1.068263010461909, "learning_rate": 1.962874139879127e-05, "loss": 0.3727540373802185, "step": 1075 }, { "epoch": 0.26587595749938225, "grad_norm": 1.1421225547062688, "learning_rate": 1.9627637623078852e-05, "loss": 0.48276495933532715, "step": 1076 }, { "epoch": 0.26612305411415865, "grad_norm": 1.055533065901247, "learning_rate": 1.9626532240130056e-05, "loss": 0.36030906438827515, "step": 1077 }, { "epoch": 0.266370150728935, "grad_norm": 1.0905974286125435, "learning_rate": 1.9625425250129417e-05, "loss": 0.4476609230041504, "step": 1078 }, { "epoch": 0.2666172473437114, "grad_norm": 1.0978823294364306, "learning_rate": 1.9624316653261738e-05, "loss": 0.38726726174354553, "step": 1079 }, { "epoch": 0.2668643439584878, "grad_norm": 1.0126039468104497, "learning_rate": 1.962320644971208e-05, "loss": 0.38394591212272644, "step": 1080 }, { "epoch": 0.26711144057326414, "grad_norm": 1.1746225668960981, "learning_rate": 1.9622094639665794e-05, "loss": 0.4316257834434509, "step": 1081 }, { "epoch": 0.26735853718804053, "grad_norm": 1.187388950358814, "learning_rate": 1.9620981223308475e-05, "loss": 0.4428377151489258, "step": 1082 }, { "epoch": 0.2676056338028169, "grad_norm": 1.19102079716194, "learning_rate": 1.9619866200825998e-05, "loss": 0.4580989480018616, "step": 1083 }, { "epoch": 0.2678527304175933, "grad_norm": 1.1761911712775839, "learning_rate": 1.9618749572404506e-05, "loss": 0.4658115804195404, "step": 1084 }, { "epoch": 0.2680998270323697, "grad_norm": 1.1155203268065963, "learning_rate": 1.9617631338230405e-05, "loss": 0.42886897921562195, "step": 1085 }, { "epoch": 0.268346923647146, "grad_norm": 1.133088986859219, "learning_rate": 1.961651149849038e-05, "loss": 0.40341806411743164, "step": 1086 }, { "epoch": 0.2685940202619224, "grad_norm": 1.1931007421709041, "learning_rate": 1.961539005337137e-05, "loss": 0.3772978186607361, "step": 1087 }, { "epoch": 0.2688411168766988, "grad_norm": 1.0432580745287123, "learning_rate": 1.9614267003060594e-05, "loss": 0.43759065866470337, "step": 1088 }, { "epoch": 0.26908821349147516, "grad_norm": 1.1227795255834072, "learning_rate": 1.9613142347745533e-05, "loss": 0.3803812265396118, "step": 1089 }, { "epoch": 0.26933531010625156, "grad_norm": 1.098339451576046, "learning_rate": 1.9612016087613935e-05, "loss": 0.3681487739086151, "step": 1090 }, { "epoch": 0.2695824067210279, "grad_norm": 1.2090016605474279, "learning_rate": 1.961088822285382e-05, "loss": 0.39879825711250305, "step": 1091 }, { "epoch": 0.2698295033358043, "grad_norm": 1.302671002120151, "learning_rate": 1.960975875365347e-05, "loss": 0.44657009840011597, "step": 1092 }, { "epoch": 0.2700765999505807, "grad_norm": 1.2194095587304947, "learning_rate": 1.9608627680201437e-05, "loss": 0.4198206663131714, "step": 1093 }, { "epoch": 0.27032369656535704, "grad_norm": 1.0982436913167477, "learning_rate": 1.960749500268655e-05, "loss": 0.43388062715530396, "step": 1094 }, { "epoch": 0.27057079318013344, "grad_norm": 1.1916818840549708, "learning_rate": 1.960636072129789e-05, "loss": 0.4535834789276123, "step": 1095 }, { "epoch": 0.2708178897949098, "grad_norm": 1.1899139238648484, "learning_rate": 1.960522483622482e-05, "loss": 0.44916436076164246, "step": 1096 }, { "epoch": 0.2710649864096862, "grad_norm": 1.1753519443199127, "learning_rate": 1.960408734765696e-05, "loss": 0.3849030137062073, "step": 1097 }, { "epoch": 0.2713120830244626, "grad_norm": 1.2183354283358467, "learning_rate": 1.9602948255784207e-05, "loss": 0.5510818958282471, "step": 1098 }, { "epoch": 0.2715591796392389, "grad_norm": 1.1519623681844822, "learning_rate": 1.9601807560796715e-05, "loss": 0.4526265859603882, "step": 1099 }, { "epoch": 0.2718062762540153, "grad_norm": 1.145473670264848, "learning_rate": 1.9600665262884916e-05, "loss": 0.4420124888420105, "step": 1100 }, { "epoch": 0.2720533728687917, "grad_norm": 1.0891115520777481, "learning_rate": 1.9599521362239503e-05, "loss": 0.4289267659187317, "step": 1101 }, { "epoch": 0.27230046948356806, "grad_norm": 1.2619901145608674, "learning_rate": 1.9598375859051437e-05, "loss": 0.4513486325740814, "step": 1102 }, { "epoch": 0.27254756609834446, "grad_norm": 1.2201897885830226, "learning_rate": 1.959722875351195e-05, "loss": 0.49863025546073914, "step": 1103 }, { "epoch": 0.2727946627131208, "grad_norm": 1.0687197049392159, "learning_rate": 1.9596080045812545e-05, "loss": 0.37932586669921875, "step": 1104 }, { "epoch": 0.2730417593278972, "grad_norm": 1.103738571484632, "learning_rate": 1.9594929736144978e-05, "loss": 0.38865241408348083, "step": 1105 }, { "epoch": 0.2732888559426736, "grad_norm": 1.2580521978567973, "learning_rate": 1.9593777824701282e-05, "loss": 0.504294216632843, "step": 1106 }, { "epoch": 0.27353595255744995, "grad_norm": 1.1473136234488501, "learning_rate": 1.9592624311673764e-05, "loss": 0.3855142593383789, "step": 1107 }, { "epoch": 0.27378304917222634, "grad_norm": 1.0845790101562436, "learning_rate": 1.9591469197254985e-05, "loss": 0.43764185905456543, "step": 1108 }, { "epoch": 0.27403014578700274, "grad_norm": 1.0840482593981988, "learning_rate": 1.9590312481637785e-05, "loss": 0.3898318409919739, "step": 1109 }, { "epoch": 0.2742772424017791, "grad_norm": 1.1702631664506093, "learning_rate": 1.9589154165015262e-05, "loss": 0.3883647322654724, "step": 1110 }, { "epoch": 0.2745243390165555, "grad_norm": 1.0602090355970961, "learning_rate": 1.9587994247580785e-05, "loss": 0.421792596578598, "step": 1111 }, { "epoch": 0.27477143563133183, "grad_norm": 1.108642389062736, "learning_rate": 1.9586832729527994e-05, "loss": 0.36725443601608276, "step": 1112 }, { "epoch": 0.2750185322461082, "grad_norm": 1.307815986345502, "learning_rate": 1.9585669611050788e-05, "loss": 0.4647235870361328, "step": 1113 }, { "epoch": 0.2752656288608846, "grad_norm": 1.0922545599252051, "learning_rate": 1.958450489234334e-05, "loss": 0.41483286023139954, "step": 1114 }, { "epoch": 0.27551272547566097, "grad_norm": 1.097978350717029, "learning_rate": 1.958333857360009e-05, "loss": 0.3694912791252136, "step": 1115 }, { "epoch": 0.27575982209043737, "grad_norm": 1.1481673891025834, "learning_rate": 1.9582170655015743e-05, "loss": 0.4500781297683716, "step": 1116 }, { "epoch": 0.27600691870521377, "grad_norm": 1.1180482515765835, "learning_rate": 1.9581001136785264e-05, "loss": 0.4185076951980591, "step": 1117 }, { "epoch": 0.2762540153199901, "grad_norm": 1.1524668397309048, "learning_rate": 1.9579830019103904e-05, "loss": 0.4212998151779175, "step": 1118 }, { "epoch": 0.2765011119347665, "grad_norm": 1.2086908434261399, "learning_rate": 1.9578657302167157e-05, "loss": 0.45007413625717163, "step": 1119 }, { "epoch": 0.27674820854954285, "grad_norm": 1.3123138077615584, "learning_rate": 1.9577482986170805e-05, "loss": 0.45555973052978516, "step": 1120 }, { "epoch": 0.27699530516431925, "grad_norm": 1.1595518692489872, "learning_rate": 1.9576307071310884e-05, "loss": 0.42661070823669434, "step": 1121 }, { "epoch": 0.27724240177909565, "grad_norm": 1.0880847045162068, "learning_rate": 1.9575129557783702e-05, "loss": 0.434402734041214, "step": 1122 }, { "epoch": 0.277489498393872, "grad_norm": 1.0073118351552415, "learning_rate": 1.9573950445785833e-05, "loss": 0.36976200342178345, "step": 1123 }, { "epoch": 0.2777365950086484, "grad_norm": 1.1327659810331738, "learning_rate": 1.9572769735514116e-05, "loss": 0.41784340143203735, "step": 1124 }, { "epoch": 0.27798369162342473, "grad_norm": 1.242357679093136, "learning_rate": 1.9571587427165663e-05, "loss": 0.4082183837890625, "step": 1125 }, { "epoch": 0.27823078823820113, "grad_norm": 1.187224002755718, "learning_rate": 1.9570403520937846e-05, "loss": 0.44903090596199036, "step": 1126 }, { "epoch": 0.27847788485297753, "grad_norm": 1.1246760348924034, "learning_rate": 1.9569218017028305e-05, "loss": 0.36271363496780396, "step": 1127 }, { "epoch": 0.2787249814677539, "grad_norm": 1.0102918221512127, "learning_rate": 1.956803091563495e-05, "loss": 0.35767561197280884, "step": 1128 }, { "epoch": 0.2789720780825303, "grad_norm": 1.1716483280514, "learning_rate": 1.956684221695595e-05, "loss": 0.43618667125701904, "step": 1129 }, { "epoch": 0.27921917469730667, "grad_norm": 1.1982490723498258, "learning_rate": 1.9565651921189758e-05, "loss": 0.4836835265159607, "step": 1130 }, { "epoch": 0.279466271312083, "grad_norm": 1.1561700019865484, "learning_rate": 1.956446002853507e-05, "loss": 0.4346729516983032, "step": 1131 }, { "epoch": 0.2797133679268594, "grad_norm": 1.2681713251266042, "learning_rate": 1.9563266539190864e-05, "loss": 0.47835981845855713, "step": 1132 }, { "epoch": 0.27996046454163576, "grad_norm": 1.210202989116714, "learning_rate": 1.956207145335638e-05, "loss": 0.4546605348587036, "step": 1133 }, { "epoch": 0.28020756115641215, "grad_norm": 1.0276223274247591, "learning_rate": 1.9560874771231132e-05, "loss": 0.3879781663417816, "step": 1134 }, { "epoch": 0.28045465777118855, "grad_norm": 1.1400021496076613, "learning_rate": 1.9559676493014886e-05, "loss": 0.4300438463687897, "step": 1135 }, { "epoch": 0.2807017543859649, "grad_norm": 1.0428256212319675, "learning_rate": 1.9558476618907686e-05, "loss": 0.41103535890579224, "step": 1136 }, { "epoch": 0.2809488510007413, "grad_norm": 1.2260685269457194, "learning_rate": 1.955727514910984e-05, "loss": 0.3822963833808899, "step": 1137 }, { "epoch": 0.2811959476155177, "grad_norm": 1.170595369151599, "learning_rate": 1.9556072083821914e-05, "loss": 0.4965004324913025, "step": 1138 }, { "epoch": 0.28144304423029404, "grad_norm": 1.1398700366176988, "learning_rate": 1.9554867423244753e-05, "loss": 0.4063853919506073, "step": 1139 }, { "epoch": 0.28169014084507044, "grad_norm": 1.2498763074333163, "learning_rate": 1.9553661167579467e-05, "loss": 0.4003523290157318, "step": 1140 }, { "epoch": 0.2819372374598468, "grad_norm": 1.1393508252490188, "learning_rate": 1.9552453317027425e-05, "loss": 0.3730405569076538, "step": 1141 }, { "epoch": 0.2821843340746232, "grad_norm": 1.1719718591004828, "learning_rate": 1.955124387179026e-05, "loss": 0.36652734875679016, "step": 1142 }, { "epoch": 0.2824314306893996, "grad_norm": 1.0616433316635283, "learning_rate": 1.955003283206988e-05, "loss": 0.4108664393424988, "step": 1143 }, { "epoch": 0.2826785273041759, "grad_norm": 1.2417431922407323, "learning_rate": 1.9548820198068463e-05, "loss": 0.48156750202178955, "step": 1144 }, { "epoch": 0.2829256239189523, "grad_norm": 1.0661668013059635, "learning_rate": 1.9547605969988436e-05, "loss": 0.40041542053222656, "step": 1145 }, { "epoch": 0.28317272053372866, "grad_norm": 1.240994620811322, "learning_rate": 1.9546390148032505e-05, "loss": 0.4305451512336731, "step": 1146 }, { "epoch": 0.28341981714850506, "grad_norm": 1.1024184595194808, "learning_rate": 1.9545172732403644e-05, "loss": 0.41726523637771606, "step": 1147 }, { "epoch": 0.28366691376328146, "grad_norm": 1.1387767226124088, "learning_rate": 1.954395372330508e-05, "loss": 0.42850542068481445, "step": 1148 }, { "epoch": 0.2839140103780578, "grad_norm": 1.0661047359382763, "learning_rate": 1.954273312094032e-05, "loss": 0.37864530086517334, "step": 1149 }, { "epoch": 0.2841611069928342, "grad_norm": 1.2299850375423251, "learning_rate": 1.9541510925513125e-05, "loss": 0.4089665412902832, "step": 1150 }, { "epoch": 0.2844082036076106, "grad_norm": 1.077510176721587, "learning_rate": 1.9540287137227534e-05, "loss": 0.38735830783843994, "step": 1151 }, { "epoch": 0.28465530022238694, "grad_norm": 1.2789962634612737, "learning_rate": 1.953906175628785e-05, "loss": 0.46308791637420654, "step": 1152 }, { "epoch": 0.28490239683716334, "grad_norm": 1.1885857964937263, "learning_rate": 1.9537834782898625e-05, "loss": 0.46716228127479553, "step": 1153 }, { "epoch": 0.2851494934519397, "grad_norm": 1.1727818824477254, "learning_rate": 1.95366062172647e-05, "loss": 0.42700499296188354, "step": 1154 }, { "epoch": 0.2853965900667161, "grad_norm": 1.1203247901136202, "learning_rate": 1.953537605959117e-05, "loss": 0.35816341638565063, "step": 1155 }, { "epoch": 0.2856436866814925, "grad_norm": 1.3060857299189115, "learning_rate": 1.9534144310083397e-05, "loss": 0.4427608847618103, "step": 1156 }, { "epoch": 0.2858907832962688, "grad_norm": 1.1733454636253304, "learning_rate": 1.9532910968947004e-05, "loss": 0.40745091438293457, "step": 1157 }, { "epoch": 0.2861378799110452, "grad_norm": 1.1195999722965229, "learning_rate": 1.9531676036387894e-05, "loss": 0.3646082878112793, "step": 1158 }, { "epoch": 0.2863849765258216, "grad_norm": 1.0233611269531278, "learning_rate": 1.9530439512612216e-05, "loss": 0.3498198986053467, "step": 1159 }, { "epoch": 0.28663207314059796, "grad_norm": 1.3748482986547785, "learning_rate": 1.9529201397826403e-05, "loss": 0.45190685987472534, "step": 1160 }, { "epoch": 0.28687916975537436, "grad_norm": 1.165952368965837, "learning_rate": 1.9527961692237144e-05, "loss": 0.37750476598739624, "step": 1161 }, { "epoch": 0.2871262663701507, "grad_norm": 1.1376075324865775, "learning_rate": 1.9526720396051395e-05, "loss": 0.3906645178794861, "step": 1162 }, { "epoch": 0.2873733629849271, "grad_norm": 1.08072938663004, "learning_rate": 1.9525477509476377e-05, "loss": 0.4033709168434143, "step": 1163 }, { "epoch": 0.2876204595997035, "grad_norm": 1.2676517677239156, "learning_rate": 1.9524233032719574e-05, "loss": 0.47305670380592346, "step": 1164 }, { "epoch": 0.28786755621447985, "grad_norm": 1.142059411392094, "learning_rate": 1.9522986965988748e-05, "loss": 0.45038479566574097, "step": 1165 }, { "epoch": 0.28811465282925625, "grad_norm": 1.191144894448803, "learning_rate": 1.9521739309491903e-05, "loss": 0.40078991651535034, "step": 1166 }, { "epoch": 0.2883617494440326, "grad_norm": 1.1935665268005233, "learning_rate": 1.952049006343734e-05, "loss": 0.3717430830001831, "step": 1167 }, { "epoch": 0.288608846058809, "grad_norm": 1.2804343170196097, "learning_rate": 1.9519239228033597e-05, "loss": 0.4845566749572754, "step": 1168 }, { "epoch": 0.2888559426735854, "grad_norm": 1.187221039055872, "learning_rate": 1.9517986803489487e-05, "loss": 0.4677293300628662, "step": 1169 }, { "epoch": 0.28910303928836173, "grad_norm": 1.1208586844406427, "learning_rate": 1.951673279001409e-05, "loss": 0.41443827748298645, "step": 1170 }, { "epoch": 0.28935013590313813, "grad_norm": 1.1673507029885226, "learning_rate": 1.9515477187816763e-05, "loss": 0.4347304105758667, "step": 1171 }, { "epoch": 0.2895972325179145, "grad_norm": 1.0471135782326713, "learning_rate": 1.9514219997107103e-05, "loss": 0.36381128430366516, "step": 1172 }, { "epoch": 0.28984432913269087, "grad_norm": 1.2671703701981414, "learning_rate": 1.9512961218094986e-05, "loss": 0.394267737865448, "step": 1173 }, { "epoch": 0.29009142574746727, "grad_norm": 1.0932904918749669, "learning_rate": 1.9511700850990558e-05, "loss": 0.4060139060020447, "step": 1174 }, { "epoch": 0.2903385223622436, "grad_norm": 1.2023721067754891, "learning_rate": 1.951043889600422e-05, "loss": 0.38101571798324585, "step": 1175 }, { "epoch": 0.29058561897702, "grad_norm": 1.0999337721342868, "learning_rate": 1.9509175353346646e-05, "loss": 0.36324477195739746, "step": 1176 }, { "epoch": 0.2908327155917964, "grad_norm": 1.2212222878245942, "learning_rate": 1.9507910223228768e-05, "loss": 0.4693678021430969, "step": 1177 }, { "epoch": 0.29107981220657275, "grad_norm": 1.112090236731795, "learning_rate": 1.950664350586179e-05, "loss": 0.4199824929237366, "step": 1178 }, { "epoch": 0.29132690882134915, "grad_norm": 1.0657876125070391, "learning_rate": 1.9505375201457174e-05, "loss": 0.39823243021965027, "step": 1179 }, { "epoch": 0.29157400543612555, "grad_norm": 1.0070970644479933, "learning_rate": 1.9504105310226656e-05, "loss": 0.33092349767684937, "step": 1180 }, { "epoch": 0.2918211020509019, "grad_norm": 1.2539131871079794, "learning_rate": 1.9502833832382228e-05, "loss": 0.4562669098377228, "step": 1181 }, { "epoch": 0.2920681986656783, "grad_norm": 1.1131886866902323, "learning_rate": 1.950156076813615e-05, "loss": 0.42253831028938293, "step": 1182 }, { "epoch": 0.29231529528045463, "grad_norm": 1.2827658541003075, "learning_rate": 1.950028611770095e-05, "loss": 0.42372846603393555, "step": 1183 }, { "epoch": 0.29256239189523103, "grad_norm": 1.2037966114792613, "learning_rate": 1.949900988128941e-05, "loss": 0.39087194204330444, "step": 1184 }, { "epoch": 0.29280948851000743, "grad_norm": 1.055913599657149, "learning_rate": 1.9497732059114594e-05, "loss": 0.41675662994384766, "step": 1185 }, { "epoch": 0.2930565851247838, "grad_norm": 1.106987526517386, "learning_rate": 1.9496452651389818e-05, "loss": 0.40591907501220703, "step": 1186 }, { "epoch": 0.2933036817395602, "grad_norm": 1.1080450184541448, "learning_rate": 1.9495171658328664e-05, "loss": 0.38056516647338867, "step": 1187 }, { "epoch": 0.2935507783543366, "grad_norm": 1.183809223453014, "learning_rate": 1.9493889080144987e-05, "loss": 0.4040225148200989, "step": 1188 }, { "epoch": 0.2937978749691129, "grad_norm": 1.0682580746612558, "learning_rate": 1.9492604917052893e-05, "loss": 0.3550708293914795, "step": 1189 }, { "epoch": 0.2940449715838893, "grad_norm": 1.0993572485263208, "learning_rate": 1.9491319169266764e-05, "loss": 0.36353617906570435, "step": 1190 }, { "epoch": 0.29429206819866566, "grad_norm": 1.1496688838767053, "learning_rate": 1.949003183700124e-05, "loss": 0.4518004059791565, "step": 1191 }, { "epoch": 0.29453916481344206, "grad_norm": 1.242796103237116, "learning_rate": 1.9488742920471232e-05, "loss": 0.426822304725647, "step": 1192 }, { "epoch": 0.29478626142821845, "grad_norm": 1.1053576425527603, "learning_rate": 1.948745241989191e-05, "loss": 0.418363481760025, "step": 1193 }, { "epoch": 0.2950333580429948, "grad_norm": 1.4302891652174246, "learning_rate": 1.9486160335478702e-05, "loss": 0.41525551676750183, "step": 1194 }, { "epoch": 0.2952804546577712, "grad_norm": 1.0969817502318595, "learning_rate": 1.9484866667447323e-05, "loss": 0.4020170569419861, "step": 1195 }, { "epoch": 0.29552755127254754, "grad_norm": 1.075345933287582, "learning_rate": 1.948357141601373e-05, "loss": 0.4532582759857178, "step": 1196 }, { "epoch": 0.29577464788732394, "grad_norm": 1.2031637870217653, "learning_rate": 1.948227458139415e-05, "loss": 0.39338892698287964, "step": 1197 }, { "epoch": 0.29602174450210034, "grad_norm": 1.2185096845389172, "learning_rate": 1.948097616380508e-05, "loss": 0.37253761291503906, "step": 1198 }, { "epoch": 0.2962688411168767, "grad_norm": 1.1784481469209604, "learning_rate": 1.9479676163463278e-05, "loss": 0.4466940760612488, "step": 1199 }, { "epoch": 0.2965159377316531, "grad_norm": 1.1816836595229558, "learning_rate": 1.947837458058576e-05, "loss": 0.440700888633728, "step": 1200 }, { "epoch": 0.2967630343464295, "grad_norm": 1.1214779358714109, "learning_rate": 1.947707141538982e-05, "loss": 0.3676428198814392, "step": 1201 }, { "epoch": 0.2970101309612058, "grad_norm": 1.253279347205707, "learning_rate": 1.9475766668093e-05, "loss": 0.45390772819519043, "step": 1202 }, { "epoch": 0.2972572275759822, "grad_norm": 1.2833489238630738, "learning_rate": 1.9474460338913124e-05, "loss": 0.4828430414199829, "step": 1203 }, { "epoch": 0.29750432419075856, "grad_norm": 1.223024906180215, "learning_rate": 1.9473152428068263e-05, "loss": 0.40068966150283813, "step": 1204 }, { "epoch": 0.29775142080553496, "grad_norm": 1.2167343353084177, "learning_rate": 1.9471842935776762e-05, "loss": 0.4122784733772278, "step": 1205 }, { "epoch": 0.29799851742031136, "grad_norm": 1.1683557184032358, "learning_rate": 1.947053186225723e-05, "loss": 0.4172208905220032, "step": 1206 }, { "epoch": 0.2982456140350877, "grad_norm": 1.0724119048347789, "learning_rate": 1.946921920772853e-05, "loss": 0.38613271713256836, "step": 1207 }, { "epoch": 0.2984927106498641, "grad_norm": 1.2135876668922454, "learning_rate": 1.9467904972409807e-05, "loss": 0.4013817310333252, "step": 1208 }, { "epoch": 0.2987398072646405, "grad_norm": 1.6212787239261224, "learning_rate": 1.9466589156520448e-05, "loss": 0.44677239656448364, "step": 1209 }, { "epoch": 0.29898690387941684, "grad_norm": 1.1429281907646578, "learning_rate": 1.946527176028012e-05, "loss": 0.3827959895133972, "step": 1210 }, { "epoch": 0.29923400049419324, "grad_norm": 1.3102236139108383, "learning_rate": 1.9463952783908753e-05, "loss": 0.501110315322876, "step": 1211 }, { "epoch": 0.2994810971089696, "grad_norm": 1.2141272726545234, "learning_rate": 1.946263222762653e-05, "loss": 0.43033653497695923, "step": 1212 }, { "epoch": 0.299728193723746, "grad_norm": 1.2206687824888465, "learning_rate": 1.9461310091653905e-05, "loss": 0.41931864619255066, "step": 1213 }, { "epoch": 0.2999752903385224, "grad_norm": 1.0632677846755594, "learning_rate": 1.9459986376211603e-05, "loss": 0.309340238571167, "step": 1214 }, { "epoch": 0.3002223869532987, "grad_norm": 1.1956886141677745, "learning_rate": 1.94586610815206e-05, "loss": 0.4052238166332245, "step": 1215 }, { "epoch": 0.3004694835680751, "grad_norm": 1.1795704516285437, "learning_rate": 1.9457334207802133e-05, "loss": 0.4367956221103668, "step": 1216 }, { "epoch": 0.30071658018285147, "grad_norm": 1.072349157320875, "learning_rate": 1.9456005755277722e-05, "loss": 0.3862387537956238, "step": 1217 }, { "epoch": 0.30096367679762787, "grad_norm": 1.105283282441381, "learning_rate": 1.9454675724169135e-05, "loss": 0.3554226756095886, "step": 1218 }, { "epoch": 0.30121077341240426, "grad_norm": 1.1568317046404446, "learning_rate": 1.94533441146984e-05, "loss": 0.4445829689502716, "step": 1219 }, { "epoch": 0.3014578700271806, "grad_norm": 1.179277893901391, "learning_rate": 1.9452010927087827e-05, "loss": 0.44013941287994385, "step": 1220 }, { "epoch": 0.301704966641957, "grad_norm": 1.0743227693940718, "learning_rate": 1.9450676161559975e-05, "loss": 0.38955482840538025, "step": 1221 }, { "epoch": 0.3019520632567334, "grad_norm": 1.2784751940154766, "learning_rate": 1.944933981833766e-05, "loss": 0.42275530099868774, "step": 1222 }, { "epoch": 0.30219915987150975, "grad_norm": 1.2362964293487821, "learning_rate": 1.9448001897643983e-05, "loss": 0.4532148241996765, "step": 1223 }, { "epoch": 0.30244625648628615, "grad_norm": 1.128328639948804, "learning_rate": 1.944666239970229e-05, "loss": 0.3931865096092224, "step": 1224 }, { "epoch": 0.3026933531010625, "grad_norm": 1.1231734433185983, "learning_rate": 1.94453213247362e-05, "loss": 0.35000723600387573, "step": 1225 }, { "epoch": 0.3029404497158389, "grad_norm": 1.205714270929866, "learning_rate": 1.9443978672969586e-05, "loss": 0.4711715579032898, "step": 1226 }, { "epoch": 0.3031875463306153, "grad_norm": 1.2158288932847272, "learning_rate": 1.9442634444626597e-05, "loss": 0.44770294427871704, "step": 1227 }, { "epoch": 0.30343464294539163, "grad_norm": 1.1096955580899635, "learning_rate": 1.9441288639931633e-05, "loss": 0.42675530910491943, "step": 1228 }, { "epoch": 0.30368173956016803, "grad_norm": 1.236322400576574, "learning_rate": 1.9439941259109366e-05, "loss": 0.4628218710422516, "step": 1229 }, { "epoch": 0.30392883617494443, "grad_norm": 1.0814665226081253, "learning_rate": 1.9438592302384727e-05, "loss": 0.42145827412605286, "step": 1230 }, { "epoch": 0.30417593278972077, "grad_norm": 1.080171747662659, "learning_rate": 1.943724176998291e-05, "loss": 0.3744428753852844, "step": 1231 }, { "epoch": 0.30442302940449717, "grad_norm": 1.1222068523835917, "learning_rate": 1.943588966212937e-05, "loss": 0.4499506950378418, "step": 1232 }, { "epoch": 0.3046701260192735, "grad_norm": 1.1378494474806269, "learning_rate": 1.9434535979049833e-05, "loss": 0.4353228807449341, "step": 1233 }, { "epoch": 0.3049172226340499, "grad_norm": 1.1444656211350057, "learning_rate": 1.9433180720970278e-05, "loss": 0.38267481327056885, "step": 1234 }, { "epoch": 0.3051643192488263, "grad_norm": 1.1638397334265163, "learning_rate": 1.9431823888116958e-05, "loss": 0.3689883351325989, "step": 1235 }, { "epoch": 0.30541141586360265, "grad_norm": 1.018946555987404, "learning_rate": 1.9430465480716375e-05, "loss": 0.34568721055984497, "step": 1236 }, { "epoch": 0.30565851247837905, "grad_norm": 1.1335275243827927, "learning_rate": 1.9429105498995302e-05, "loss": 0.40021929144859314, "step": 1237 }, { "epoch": 0.30590560909315545, "grad_norm": 1.3316778980486936, "learning_rate": 1.942774394318078e-05, "loss": 0.4299350082874298, "step": 1238 }, { "epoch": 0.3061527057079318, "grad_norm": 1.4908969051707976, "learning_rate": 1.94263808135001e-05, "loss": 0.4366927742958069, "step": 1239 }, { "epoch": 0.3063998023227082, "grad_norm": 1.1184527046515493, "learning_rate": 1.9425016110180828e-05, "loss": 0.3806101381778717, "step": 1240 }, { "epoch": 0.30664689893748454, "grad_norm": 1.1690047127600716, "learning_rate": 1.9423649833450783e-05, "loss": 0.41701608896255493, "step": 1241 }, { "epoch": 0.30689399555226093, "grad_norm": 1.1419406026804062, "learning_rate": 1.9422281983538056e-05, "loss": 0.3948105573654175, "step": 1242 }, { "epoch": 0.30714109216703733, "grad_norm": 1.2406900021520895, "learning_rate": 1.9420912560670995e-05, "loss": 0.43637433648109436, "step": 1243 }, { "epoch": 0.3073881887818137, "grad_norm": 1.2462582226509686, "learning_rate": 1.9419541565078205e-05, "loss": 0.4081137776374817, "step": 1244 }, { "epoch": 0.3076352853965901, "grad_norm": 1.198163055146096, "learning_rate": 1.9418168996988566e-05, "loss": 0.3968390226364136, "step": 1245 }, { "epoch": 0.3078823820113664, "grad_norm": 1.1965505091449224, "learning_rate": 1.9416794856631213e-05, "loss": 0.3930845856666565, "step": 1246 }, { "epoch": 0.3081294786261428, "grad_norm": 1.2888252922762655, "learning_rate": 1.9415419144235544e-05, "loss": 0.4332362413406372, "step": 1247 }, { "epoch": 0.3083765752409192, "grad_norm": 1.214393748663038, "learning_rate": 1.941404186003122e-05, "loss": 0.4235694408416748, "step": 1248 }, { "epoch": 0.30862367185569556, "grad_norm": 1.1348621172548323, "learning_rate": 1.9412663004248165e-05, "loss": 0.4202786087989807, "step": 1249 }, { "epoch": 0.30887076847047196, "grad_norm": 1.1558250002073267, "learning_rate": 1.9411282577116567e-05, "loss": 0.4610140025615692, "step": 1250 }, { "epoch": 0.30911786508524836, "grad_norm": 1.1337543763215951, "learning_rate": 1.940990057886687e-05, "loss": 0.39903825521469116, "step": 1251 }, { "epoch": 0.3093649617000247, "grad_norm": 1.2236473588731325, "learning_rate": 1.9408517009729788e-05, "loss": 0.43579286336898804, "step": 1252 }, { "epoch": 0.3096120583148011, "grad_norm": 1.0645398071873908, "learning_rate": 1.9407131869936292e-05, "loss": 0.3979259133338928, "step": 1253 }, { "epoch": 0.30985915492957744, "grad_norm": 1.1140750324014927, "learning_rate": 1.9405745159717616e-05, "loss": 0.4198010563850403, "step": 1254 }, { "epoch": 0.31010625154435384, "grad_norm": 1.0405545582972542, "learning_rate": 1.9404356879305265e-05, "loss": 0.3808962106704712, "step": 1255 }, { "epoch": 0.31035334815913024, "grad_norm": 1.0553700456496824, "learning_rate": 1.9402967028930986e-05, "loss": 0.35449889302253723, "step": 1256 }, { "epoch": 0.3106004447739066, "grad_norm": 1.0611002493922819, "learning_rate": 1.940157560882681e-05, "loss": 0.36339256167411804, "step": 1257 }, { "epoch": 0.310847541388683, "grad_norm": 1.0941462255177343, "learning_rate": 1.9400182619225017e-05, "loss": 0.3776073455810547, "step": 1258 }, { "epoch": 0.3110946380034594, "grad_norm": 1.113945475603046, "learning_rate": 1.939878806035815e-05, "loss": 0.3992674946784973, "step": 1259 }, { "epoch": 0.3113417346182357, "grad_norm": 1.2546455824274663, "learning_rate": 1.9397391932459022e-05, "loss": 0.40253719687461853, "step": 1260 }, { "epoch": 0.3115888312330121, "grad_norm": 1.0307010899899383, "learning_rate": 1.9395994235760696e-05, "loss": 0.3335898518562317, "step": 1261 }, { "epoch": 0.31183592784778846, "grad_norm": 1.3981965438122002, "learning_rate": 1.9394594970496513e-05, "loss": 0.41016891598701477, "step": 1262 }, { "epoch": 0.31208302446256486, "grad_norm": 1.1426072037502553, "learning_rate": 1.9393194136900056e-05, "loss": 0.4375283122062683, "step": 1263 }, { "epoch": 0.31233012107734126, "grad_norm": 1.1234371197654252, "learning_rate": 1.9391791735205182e-05, "loss": 0.3982020616531372, "step": 1264 }, { "epoch": 0.3125772176921176, "grad_norm": 1.1599306950234998, "learning_rate": 1.939038776564601e-05, "loss": 0.43773987889289856, "step": 1265 }, { "epoch": 0.312824314306894, "grad_norm": 1.1334415692627262, "learning_rate": 1.9388982228456923e-05, "loss": 0.425442099571228, "step": 1266 }, { "epoch": 0.31307141092167035, "grad_norm": 1.2318447479579084, "learning_rate": 1.9387575123872553e-05, "loss": 0.40527039766311646, "step": 1267 }, { "epoch": 0.31331850753644674, "grad_norm": 1.2262011168793794, "learning_rate": 1.9386166452127804e-05, "loss": 0.41595059633255005, "step": 1268 }, { "epoch": 0.31356560415122314, "grad_norm": 1.0979903781667777, "learning_rate": 1.938475621345784e-05, "loss": 0.3808109164237976, "step": 1269 }, { "epoch": 0.3138127007659995, "grad_norm": 1.1064221418931914, "learning_rate": 1.9383344408098087e-05, "loss": 0.40943944454193115, "step": 1270 }, { "epoch": 0.3140597973807759, "grad_norm": 1.2834080757942865, "learning_rate": 1.938193103628423e-05, "loss": 0.4620349407196045, "step": 1271 }, { "epoch": 0.3143068939955523, "grad_norm": 1.2467137632238856, "learning_rate": 1.9380516098252223e-05, "loss": 0.4282250702381134, "step": 1272 }, { "epoch": 0.3145539906103286, "grad_norm": 1.1366799514528523, "learning_rate": 1.9379099594238268e-05, "loss": 0.41597503423690796, "step": 1273 }, { "epoch": 0.314801087225105, "grad_norm": 1.107357743613898, "learning_rate": 1.9377681524478834e-05, "loss": 0.39593085646629333, "step": 1274 }, { "epoch": 0.31504818383988137, "grad_norm": 1.131237941468289, "learning_rate": 1.9376261889210664e-05, "loss": 0.4239940643310547, "step": 1275 }, { "epoch": 0.31529528045465777, "grad_norm": 1.231056959967337, "learning_rate": 1.9374840688670745e-05, "loss": 0.3849679231643677, "step": 1276 }, { "epoch": 0.31554237706943417, "grad_norm": 1.2351287553889985, "learning_rate": 1.9373417923096333e-05, "loss": 0.4202159643173218, "step": 1277 }, { "epoch": 0.3157894736842105, "grad_norm": 1.0739432049856044, "learning_rate": 1.9371993592724943e-05, "loss": 0.38143718242645264, "step": 1278 }, { "epoch": 0.3160365702989869, "grad_norm": 1.2315349815159637, "learning_rate": 1.937056769779435e-05, "loss": 0.40038740634918213, "step": 1279 }, { "epoch": 0.3162836669137633, "grad_norm": 1.1969318206989576, "learning_rate": 1.9369140238542598e-05, "loss": 0.4629591703414917, "step": 1280 }, { "epoch": 0.31653076352853965, "grad_norm": 1.2341379349729786, "learning_rate": 1.9367711215207986e-05, "loss": 0.4642338156700134, "step": 1281 }, { "epoch": 0.31677786014331605, "grad_norm": 0.9970072790890062, "learning_rate": 1.936628062802908e-05, "loss": 0.3368876576423645, "step": 1282 }, { "epoch": 0.3170249567580924, "grad_norm": 1.0574798960077654, "learning_rate": 1.9364848477244688e-05, "loss": 0.36556679010391235, "step": 1283 }, { "epoch": 0.3172720533728688, "grad_norm": 1.0519250788303043, "learning_rate": 1.9363414763093903e-05, "loss": 0.4139278531074524, "step": 1284 }, { "epoch": 0.3175191499876452, "grad_norm": 1.1824340450278161, "learning_rate": 1.9361979485816067e-05, "loss": 0.3896678686141968, "step": 1285 }, { "epoch": 0.31776624660242153, "grad_norm": 1.1760530442515278, "learning_rate": 1.9360542645650783e-05, "loss": 0.41566866636276245, "step": 1286 }, { "epoch": 0.31801334321719793, "grad_norm": 1.174551555543629, "learning_rate": 1.9359104242837922e-05, "loss": 0.4386754631996155, "step": 1287 }, { "epoch": 0.31826043983197433, "grad_norm": 1.2455010870954044, "learning_rate": 1.9357664277617604e-05, "loss": 0.3524169623851776, "step": 1288 }, { "epoch": 0.31850753644675067, "grad_norm": 1.097059442328505, "learning_rate": 1.9356222750230224e-05, "loss": 0.3711431622505188, "step": 1289 }, { "epoch": 0.31875463306152707, "grad_norm": 1.2363791318061899, "learning_rate": 1.9354779660916426e-05, "loss": 0.38976648449897766, "step": 1290 }, { "epoch": 0.3190017296763034, "grad_norm": 1.1584326977031698, "learning_rate": 1.9353335009917118e-05, "loss": 0.37312063574790955, "step": 1291 }, { "epoch": 0.3192488262910798, "grad_norm": 1.2159465563082725, "learning_rate": 1.9351888797473474e-05, "loss": 0.37401890754699707, "step": 1292 }, { "epoch": 0.3194959229058562, "grad_norm": 1.1572449749362665, "learning_rate": 1.9350441023826917e-05, "loss": 0.37817302346229553, "step": 1293 }, { "epoch": 0.31974301952063255, "grad_norm": 1.2236640600701365, "learning_rate": 1.9348991689219148e-05, "loss": 0.4333312511444092, "step": 1294 }, { "epoch": 0.31999011613540895, "grad_norm": 1.0566289443616965, "learning_rate": 1.9347540793892113e-05, "loss": 0.3835028409957886, "step": 1295 }, { "epoch": 0.3202372127501853, "grad_norm": 1.30425893850789, "learning_rate": 1.9346088338088025e-05, "loss": 0.39854130148887634, "step": 1296 }, { "epoch": 0.3204843093649617, "grad_norm": 1.1937870763473941, "learning_rate": 1.9344634322049357e-05, "loss": 0.42679572105407715, "step": 1297 }, { "epoch": 0.3207314059797381, "grad_norm": 1.0808151190540678, "learning_rate": 1.9343178746018842e-05, "loss": 0.33267128467559814, "step": 1298 }, { "epoch": 0.32097850259451444, "grad_norm": 1.4199941378648997, "learning_rate": 1.934172161023947e-05, "loss": 0.5378856062889099, "step": 1299 }, { "epoch": 0.32122559920929084, "grad_norm": 1.3122984272083769, "learning_rate": 1.93402629149545e-05, "loss": 0.4956590533256531, "step": 1300 }, { "epoch": 0.32147269582406723, "grad_norm": 1.1808460235031597, "learning_rate": 1.9338802660407446e-05, "loss": 0.3825092911720276, "step": 1301 }, { "epoch": 0.3217197924388436, "grad_norm": 1.2419996427295674, "learning_rate": 1.9337340846842085e-05, "loss": 0.43212223052978516, "step": 1302 }, { "epoch": 0.32196688905362, "grad_norm": 1.2651810744526386, "learning_rate": 1.9335877474502446e-05, "loss": 0.38999754190444946, "step": 1303 }, { "epoch": 0.3222139856683963, "grad_norm": 1.0974223907585416, "learning_rate": 1.9334412543632825e-05, "loss": 0.38367122411727905, "step": 1304 }, { "epoch": 0.3224610822831727, "grad_norm": 1.084196322724643, "learning_rate": 1.9332946054477783e-05, "loss": 0.34876322746276855, "step": 1305 }, { "epoch": 0.3227081788979491, "grad_norm": 1.3315073918589653, "learning_rate": 1.9331478007282128e-05, "loss": 0.47826600074768066, "step": 1306 }, { "epoch": 0.32295527551272546, "grad_norm": 1.2738690284737362, "learning_rate": 1.933000840229094e-05, "loss": 0.40360623598098755, "step": 1307 }, { "epoch": 0.32320237212750186, "grad_norm": 1.1614791013911254, "learning_rate": 1.9328537239749553e-05, "loss": 0.42666095495224, "step": 1308 }, { "epoch": 0.32344946874227826, "grad_norm": 1.1251015251665857, "learning_rate": 1.9327064519903566e-05, "loss": 0.4073406755924225, "step": 1309 }, { "epoch": 0.3236965653570546, "grad_norm": 1.2177142837277435, "learning_rate": 1.932559024299883e-05, "loss": 0.4119945466518402, "step": 1310 }, { "epoch": 0.323943661971831, "grad_norm": 1.0754607222331505, "learning_rate": 1.9324114409281467e-05, "loss": 0.41716960072517395, "step": 1311 }, { "epoch": 0.32419075858660734, "grad_norm": 1.0957319137595223, "learning_rate": 1.9322637018997846e-05, "loss": 0.4098065197467804, "step": 1312 }, { "epoch": 0.32443785520138374, "grad_norm": 1.1281268921579508, "learning_rate": 1.93211580723946e-05, "loss": 0.4469154477119446, "step": 1313 }, { "epoch": 0.32468495181616014, "grad_norm": 1.2201803421067894, "learning_rate": 1.9319677569718634e-05, "loss": 0.46589764952659607, "step": 1314 }, { "epoch": 0.3249320484309365, "grad_norm": 1.1770005991186063, "learning_rate": 1.9318195511217097e-05, "loss": 0.48429936170578003, "step": 1315 }, { "epoch": 0.3251791450457129, "grad_norm": 1.086749493741622, "learning_rate": 1.93167118971374e-05, "loss": 0.4358164668083191, "step": 1316 }, { "epoch": 0.3254262416604892, "grad_norm": 1.2314562548091832, "learning_rate": 1.9315226727727224e-05, "loss": 0.42245858907699585, "step": 1317 }, { "epoch": 0.3256733382752656, "grad_norm": 1.0565206060743215, "learning_rate": 1.93137400032345e-05, "loss": 0.41989874839782715, "step": 1318 }, { "epoch": 0.325920434890042, "grad_norm": 1.06759205292836, "learning_rate": 1.9312251723907422e-05, "loss": 0.3598628342151642, "step": 1319 }, { "epoch": 0.32616753150481836, "grad_norm": 1.2281202369904975, "learning_rate": 1.9310761889994443e-05, "loss": 0.42987844347953796, "step": 1320 }, { "epoch": 0.32641462811959476, "grad_norm": 1.004276395576963, "learning_rate": 1.9309270501744276e-05, "loss": 0.3893887996673584, "step": 1321 }, { "epoch": 0.32666172473437116, "grad_norm": 1.242117581536976, "learning_rate": 1.9307777559405894e-05, "loss": 0.5083735585212708, "step": 1322 }, { "epoch": 0.3269088213491475, "grad_norm": 1.1414366403504626, "learning_rate": 1.9306283063228525e-05, "loss": 0.4021914601325989, "step": 1323 }, { "epoch": 0.3271559179639239, "grad_norm": 1.1345995804835143, "learning_rate": 1.9304787013461664e-05, "loss": 0.4279201030731201, "step": 1324 }, { "epoch": 0.32740301457870025, "grad_norm": 0.9819188407459083, "learning_rate": 1.930328941035506e-05, "loss": 0.3621779680252075, "step": 1325 }, { "epoch": 0.32765011119347665, "grad_norm": 1.1292744234873768, "learning_rate": 1.9301790254158722e-05, "loss": 0.4052876830101013, "step": 1326 }, { "epoch": 0.32789720780825304, "grad_norm": 1.2117562730479745, "learning_rate": 1.9300289545122922e-05, "loss": 0.45996612310409546, "step": 1327 }, { "epoch": 0.3281443044230294, "grad_norm": 1.1733216037629393, "learning_rate": 1.929878728349818e-05, "loss": 0.432341068983078, "step": 1328 }, { "epoch": 0.3283914010378058, "grad_norm": 1.1912407303609687, "learning_rate": 1.9297283469535292e-05, "loss": 0.37831324338912964, "step": 1329 }, { "epoch": 0.3286384976525822, "grad_norm": 1.3191454971754637, "learning_rate": 1.9295778103485297e-05, "loss": 0.432273805141449, "step": 1330 }, { "epoch": 0.3288855942673585, "grad_norm": 1.1139970671756065, "learning_rate": 1.9294271185599508e-05, "loss": 0.44858017563819885, "step": 1331 }, { "epoch": 0.3291326908821349, "grad_norm": 1.2927861903204183, "learning_rate": 1.929276271612949e-05, "loss": 0.4262391924858093, "step": 1332 }, { "epoch": 0.32937978749691127, "grad_norm": 1.2384172542122038, "learning_rate": 1.9291252695327058e-05, "loss": 0.384127140045166, "step": 1333 }, { "epoch": 0.32962688411168767, "grad_norm": 1.0716675523814314, "learning_rate": 1.92897411234443e-05, "loss": 0.42200517654418945, "step": 1334 }, { "epoch": 0.32987398072646407, "grad_norm": 1.1048628546868564, "learning_rate": 1.928822800073356e-05, "loss": 0.33658790588378906, "step": 1335 }, { "epoch": 0.3301210773412404, "grad_norm": 1.0831733788220104, "learning_rate": 1.9286713327447435e-05, "loss": 0.37310123443603516, "step": 1336 }, { "epoch": 0.3303681739560168, "grad_norm": 1.1064292633341952, "learning_rate": 1.9285197103838787e-05, "loss": 0.40716591477394104, "step": 1337 }, { "epoch": 0.3306152705707932, "grad_norm": 1.2005471443188327, "learning_rate": 1.9283679330160726e-05, "loss": 0.4064866304397583, "step": 1338 }, { "epoch": 0.33086236718556955, "grad_norm": 1.0959267599260727, "learning_rate": 1.928216000666664e-05, "loss": 0.34905189275741577, "step": 1339 }, { "epoch": 0.33110946380034595, "grad_norm": 1.1994612433771612, "learning_rate": 1.928063913361016e-05, "loss": 0.40754538774490356, "step": 1340 }, { "epoch": 0.3313565604151223, "grad_norm": 1.0141420503286314, "learning_rate": 1.9279116711245177e-05, "loss": 0.3519004285335541, "step": 1341 }, { "epoch": 0.3316036570298987, "grad_norm": 1.1789920877342126, "learning_rate": 1.9277592739825852e-05, "loss": 0.4234257936477661, "step": 1342 }, { "epoch": 0.3318507536446751, "grad_norm": 1.147152702015548, "learning_rate": 1.927606721960659e-05, "loss": 0.42649802565574646, "step": 1343 }, { "epoch": 0.33209785025945143, "grad_norm": 1.1383428047439879, "learning_rate": 1.927454015084206e-05, "loss": 0.3900797367095947, "step": 1344 }, { "epoch": 0.33234494687422783, "grad_norm": 1.1325734607872744, "learning_rate": 1.9273011533787194e-05, "loss": 0.38394808769226074, "step": 1345 }, { "epoch": 0.3325920434890042, "grad_norm": 1.0811049012636895, "learning_rate": 1.9271481368697177e-05, "loss": 0.3559752404689789, "step": 1346 }, { "epoch": 0.3328391401037806, "grad_norm": 1.1661886260092007, "learning_rate": 1.926994965582746e-05, "loss": 0.4234912693500519, "step": 1347 }, { "epoch": 0.33308623671855697, "grad_norm": 1.4009494382187604, "learning_rate": 1.9268416395433744e-05, "loss": 0.41531428694725037, "step": 1348 }, { "epoch": 0.3333333333333333, "grad_norm": 1.0390200903865268, "learning_rate": 1.9266881587771986e-05, "loss": 0.34764355421066284, "step": 1349 }, { "epoch": 0.3335804299481097, "grad_norm": 1.0465048260585024, "learning_rate": 1.9265345233098412e-05, "loss": 0.395413875579834, "step": 1350 }, { "epoch": 0.3338275265628861, "grad_norm": 1.130401435370305, "learning_rate": 1.92638073316695e-05, "loss": 0.4146196246147156, "step": 1351 }, { "epoch": 0.33407462317766246, "grad_norm": 1.1604109532063591, "learning_rate": 1.9262267883741986e-05, "loss": 0.3687879145145416, "step": 1352 }, { "epoch": 0.33432171979243885, "grad_norm": 1.0101423112368728, "learning_rate": 1.9260726889572864e-05, "loss": 0.3750845789909363, "step": 1353 }, { "epoch": 0.3345688164072152, "grad_norm": 1.1973086872586405, "learning_rate": 1.9259184349419393e-05, "loss": 0.398102343082428, "step": 1354 }, { "epoch": 0.3348159130219916, "grad_norm": 0.9959854966086708, "learning_rate": 1.925764026353908e-05, "loss": 0.31968194246292114, "step": 1355 }, { "epoch": 0.335063009636768, "grad_norm": 1.143220802309691, "learning_rate": 1.925609463218969e-05, "loss": 0.37622782588005066, "step": 1356 }, { "epoch": 0.33531010625154434, "grad_norm": 1.0764322645787174, "learning_rate": 1.9254547455629263e-05, "loss": 0.36129292845726013, "step": 1357 }, { "epoch": 0.33555720286632074, "grad_norm": 1.0773964764492652, "learning_rate": 1.9252998734116074e-05, "loss": 0.33526837825775146, "step": 1358 }, { "epoch": 0.33580429948109713, "grad_norm": 1.1832794587220696, "learning_rate": 1.925144846790867e-05, "loss": 0.3749843239784241, "step": 1359 }, { "epoch": 0.3360513960958735, "grad_norm": 1.1837079104123995, "learning_rate": 1.924989665726585e-05, "loss": 0.394500732421875, "step": 1360 }, { "epoch": 0.3362984927106499, "grad_norm": 1.3222824009586973, "learning_rate": 1.9248343302446674e-05, "loss": 0.43256038427352905, "step": 1361 }, { "epoch": 0.3365455893254262, "grad_norm": 1.2691618786208088, "learning_rate": 1.924678840371046e-05, "loss": 0.36961615085601807, "step": 1362 }, { "epoch": 0.3367926859402026, "grad_norm": 1.0924735463435706, "learning_rate": 1.9245231961316782e-05, "loss": 0.37214231491088867, "step": 1363 }, { "epoch": 0.337039782554979, "grad_norm": 1.1783312996422952, "learning_rate": 1.9243673975525473e-05, "loss": 0.35319459438323975, "step": 1364 }, { "epoch": 0.33728687916975536, "grad_norm": 1.0619123567899427, "learning_rate": 1.924211444659662e-05, "loss": 0.3698628544807434, "step": 1365 }, { "epoch": 0.33753397578453176, "grad_norm": 1.1905449172095737, "learning_rate": 1.9240553374790573e-05, "loss": 0.3791910409927368, "step": 1366 }, { "epoch": 0.3377810723993081, "grad_norm": 1.1342471962123055, "learning_rate": 1.923899076036794e-05, "loss": 0.3362424671649933, "step": 1367 }, { "epoch": 0.3380281690140845, "grad_norm": 1.1654442859650282, "learning_rate": 1.9237426603589577e-05, "loss": 0.36194583773612976, "step": 1368 }, { "epoch": 0.3382752656288609, "grad_norm": 1.1690228156245064, "learning_rate": 1.9235860904716606e-05, "loss": 0.4044646918773651, "step": 1369 }, { "epoch": 0.33852236224363724, "grad_norm": 1.205070898991891, "learning_rate": 1.923429366401041e-05, "loss": 0.42672398686408997, "step": 1370 }, { "epoch": 0.33876945885841364, "grad_norm": 1.278445071847927, "learning_rate": 1.9232724881732616e-05, "loss": 0.4403476119041443, "step": 1371 }, { "epoch": 0.33901655547319004, "grad_norm": 1.1779237433254102, "learning_rate": 1.923115455814512e-05, "loss": 0.37911349534988403, "step": 1372 }, { "epoch": 0.3392636520879664, "grad_norm": 1.1950503855187935, "learning_rate": 1.9229582693510076e-05, "loss": 0.3505610227584839, "step": 1373 }, { "epoch": 0.3395107487027428, "grad_norm": 1.1818070115826431, "learning_rate": 1.9228009288089886e-05, "loss": 0.43870532512664795, "step": 1374 }, { "epoch": 0.3397578453175191, "grad_norm": 1.1443847428100724, "learning_rate": 1.922643434214721e-05, "loss": 0.40638983249664307, "step": 1375 }, { "epoch": 0.3400049419322955, "grad_norm": 1.3663701566182889, "learning_rate": 1.922485785594498e-05, "loss": 0.40868088603019714, "step": 1376 }, { "epoch": 0.3402520385470719, "grad_norm": 1.1270176241910772, "learning_rate": 1.9223279829746362e-05, "loss": 0.38167351484298706, "step": 1377 }, { "epoch": 0.34049913516184827, "grad_norm": 1.210951454643504, "learning_rate": 1.92217002638148e-05, "loss": 0.42704635858535767, "step": 1378 }, { "epoch": 0.34074623177662466, "grad_norm": 1.1637598713866586, "learning_rate": 1.9220119158413987e-05, "loss": 0.43786364793777466, "step": 1379 }, { "epoch": 0.34099332839140106, "grad_norm": 1.158801899477003, "learning_rate": 1.921853651380787e-05, "loss": 0.36254727840423584, "step": 1380 }, { "epoch": 0.3412404250061774, "grad_norm": 1.0995976738417101, "learning_rate": 1.921695233026065e-05, "loss": 0.42491912841796875, "step": 1381 }, { "epoch": 0.3414875216209538, "grad_norm": 1.222261085966986, "learning_rate": 1.92153666080368e-05, "loss": 0.4104219675064087, "step": 1382 }, { "epoch": 0.34173461823573015, "grad_norm": 1.151422527923624, "learning_rate": 1.921377934740103e-05, "loss": 0.39984261989593506, "step": 1383 }, { "epoch": 0.34198171485050655, "grad_norm": 1.3192091734558051, "learning_rate": 1.921219054861833e-05, "loss": 0.423667848110199, "step": 1384 }, { "epoch": 0.34222881146528294, "grad_norm": 1.2668113766741524, "learning_rate": 1.921060021195392e-05, "loss": 0.48099374771118164, "step": 1385 }, { "epoch": 0.3424759080800593, "grad_norm": 1.1468128868052487, "learning_rate": 1.92090083376733e-05, "loss": 0.42100539803504944, "step": 1386 }, { "epoch": 0.3427230046948357, "grad_norm": 1.2222135193972345, "learning_rate": 1.9207414926042216e-05, "loss": 0.4127594828605652, "step": 1387 }, { "epoch": 0.3429701013096121, "grad_norm": 1.0576499289833898, "learning_rate": 1.920581997732667e-05, "loss": 0.328377366065979, "step": 1388 }, { "epoch": 0.34321719792438843, "grad_norm": 1.2337561780684903, "learning_rate": 1.920422349179292e-05, "loss": 0.4988155961036682, "step": 1389 }, { "epoch": 0.3434642945391648, "grad_norm": 1.1318666199397287, "learning_rate": 1.920262546970749e-05, "loss": 0.4637540578842163, "step": 1390 }, { "epoch": 0.34371139115394117, "grad_norm": 1.0128004480968238, "learning_rate": 1.9201025911337147e-05, "loss": 0.3267706036567688, "step": 1391 }, { "epoch": 0.34395848776871757, "grad_norm": 1.0156338326998742, "learning_rate": 1.9199424816948922e-05, "loss": 0.3281017541885376, "step": 1392 }, { "epoch": 0.34420558438349397, "grad_norm": 1.1918247346407058, "learning_rate": 1.9197822186810106e-05, "loss": 0.42334991693496704, "step": 1393 }, { "epoch": 0.3444526809982703, "grad_norm": 1.1677086685840739, "learning_rate": 1.9196218021188238e-05, "loss": 0.35889172554016113, "step": 1394 }, { "epoch": 0.3446997776130467, "grad_norm": 1.2147162266444533, "learning_rate": 1.9194612320351117e-05, "loss": 0.3676144480705261, "step": 1395 }, { "epoch": 0.34494687422782305, "grad_norm": 1.2008176728995543, "learning_rate": 1.9193005084566798e-05, "loss": 0.41254138946533203, "step": 1396 }, { "epoch": 0.34519397084259945, "grad_norm": 1.3485017863455055, "learning_rate": 1.9191396314103598e-05, "loss": 0.4040246605873108, "step": 1397 }, { "epoch": 0.34544106745737585, "grad_norm": 1.2525262971531907, "learning_rate": 1.918978600923008e-05, "loss": 0.4444252848625183, "step": 1398 }, { "epoch": 0.3456881640721522, "grad_norm": 1.103759308523341, "learning_rate": 1.9188174170215065e-05, "loss": 0.38165533542633057, "step": 1399 }, { "epoch": 0.3459352606869286, "grad_norm": 1.285387388810823, "learning_rate": 1.9186560797327642e-05, "loss": 0.40359580516815186, "step": 1400 }, { "epoch": 0.346182357301705, "grad_norm": 1.1928536719083829, "learning_rate": 1.9184945890837137e-05, "loss": 0.4284363389015198, "step": 1401 }, { "epoch": 0.34642945391648133, "grad_norm": 1.1840131685988975, "learning_rate": 1.9183329451013154e-05, "loss": 0.4277694523334503, "step": 1402 }, { "epoch": 0.34667655053125773, "grad_norm": 1.0405859090461662, "learning_rate": 1.918171147812553e-05, "loss": 0.36005571484565735, "step": 1403 }, { "epoch": 0.3469236471460341, "grad_norm": 1.1930484160276145, "learning_rate": 1.9180091972444376e-05, "loss": 0.4098218083381653, "step": 1404 }, { "epoch": 0.3471707437608105, "grad_norm": 1.1182941152414347, "learning_rate": 1.9178470934240052e-05, "loss": 0.39660143852233887, "step": 1405 }, { "epoch": 0.3474178403755869, "grad_norm": 1.0563872789144093, "learning_rate": 1.917684836378317e-05, "loss": 0.4101113975048065, "step": 1406 }, { "epoch": 0.3476649369903632, "grad_norm": 1.1984874643438737, "learning_rate": 1.9175224261344604e-05, "loss": 0.46679818630218506, "step": 1407 }, { "epoch": 0.3479120336051396, "grad_norm": 1.1875636280748723, "learning_rate": 1.917359862719548e-05, "loss": 0.3918529748916626, "step": 1408 }, { "epoch": 0.348159130219916, "grad_norm": 1.0917169307865398, "learning_rate": 1.9171971461607182e-05, "loss": 0.3406611680984497, "step": 1409 }, { "epoch": 0.34840622683469236, "grad_norm": 1.2843412171632773, "learning_rate": 1.917034276485135e-05, "loss": 0.5084093809127808, "step": 1410 }, { "epoch": 0.34865332344946875, "grad_norm": 1.226716305740022, "learning_rate": 1.9168712537199877e-05, "loss": 0.39405715465545654, "step": 1411 }, { "epoch": 0.3489004200642451, "grad_norm": 1.1007724209664778, "learning_rate": 1.9167080778924915e-05, "loss": 0.3939303159713745, "step": 1412 }, { "epoch": 0.3491475166790215, "grad_norm": 1.0248862292244125, "learning_rate": 1.9165447490298865e-05, "loss": 0.3852020800113678, "step": 1413 }, { "epoch": 0.3493946132937979, "grad_norm": 1.0955788376423032, "learning_rate": 1.916381267159439e-05, "loss": 0.34468457102775574, "step": 1414 }, { "epoch": 0.34964170990857424, "grad_norm": 1.1102302752078361, "learning_rate": 1.9162176323084413e-05, "loss": 0.35529112815856934, "step": 1415 }, { "epoch": 0.34988880652335064, "grad_norm": 1.2676310927712846, "learning_rate": 1.9160538445042093e-05, "loss": 0.3825176954269409, "step": 1416 }, { "epoch": 0.350135903138127, "grad_norm": 1.1878634200652471, "learning_rate": 1.915889903774087e-05, "loss": 0.4136885404586792, "step": 1417 }, { "epoch": 0.3503829997529034, "grad_norm": 1.3695169246333114, "learning_rate": 1.9157258101454417e-05, "loss": 0.39820796251296997, "step": 1418 }, { "epoch": 0.3506300963676798, "grad_norm": 1.1414909419451251, "learning_rate": 1.9155615636456675e-05, "loss": 0.42884767055511475, "step": 1419 }, { "epoch": 0.3508771929824561, "grad_norm": 1.1439868461950955, "learning_rate": 1.9153971643021837e-05, "loss": 0.3886333405971527, "step": 1420 }, { "epoch": 0.3511242895972325, "grad_norm": 1.1466372483854148, "learning_rate": 1.9152326121424352e-05, "loss": 0.3842110335826874, "step": 1421 }, { "epoch": 0.3513713862120089, "grad_norm": 1.059435329446415, "learning_rate": 1.915067907193892e-05, "loss": 0.3979138135910034, "step": 1422 }, { "epoch": 0.35161848282678526, "grad_norm": 1.3441365240057912, "learning_rate": 1.91490304948405e-05, "loss": 0.4557384252548218, "step": 1423 }, { "epoch": 0.35186557944156166, "grad_norm": 1.0927851018204566, "learning_rate": 1.9147380390404313e-05, "loss": 0.40824615955352783, "step": 1424 }, { "epoch": 0.352112676056338, "grad_norm": 1.2008968367687758, "learning_rate": 1.9145728758905815e-05, "loss": 0.3906661868095398, "step": 1425 }, { "epoch": 0.3523597726711144, "grad_norm": 1.073554894057951, "learning_rate": 1.9144075600620736e-05, "loss": 0.3736230432987213, "step": 1426 }, { "epoch": 0.3526068692858908, "grad_norm": 1.1016096004437268, "learning_rate": 1.9142420915825056e-05, "loss": 0.3574768304824829, "step": 1427 }, { "epoch": 0.35285396590066714, "grad_norm": 1.2458950607238974, "learning_rate": 1.9140764704795e-05, "loss": 0.43808430433273315, "step": 1428 }, { "epoch": 0.35310106251544354, "grad_norm": 1.2182829870641985, "learning_rate": 1.9139106967807063e-05, "loss": 0.4264410138130188, "step": 1429 }, { "epoch": 0.35334815913021994, "grad_norm": 1.075863071212204, "learning_rate": 1.913744770513798e-05, "loss": 0.3621140718460083, "step": 1430 }, { "epoch": 0.3535952557449963, "grad_norm": 1.1209409833611566, "learning_rate": 1.913578691706476e-05, "loss": 0.40407395362854004, "step": 1431 }, { "epoch": 0.3538423523597727, "grad_norm": 1.2199285254433476, "learning_rate": 1.9134124603864644e-05, "loss": 0.4125858545303345, "step": 1432 }, { "epoch": 0.354089448974549, "grad_norm": 1.215421110873965, "learning_rate": 1.913246076581514e-05, "loss": 0.3997756540775299, "step": 1433 }, { "epoch": 0.3543365455893254, "grad_norm": 1.1590569367055237, "learning_rate": 1.913079540319401e-05, "loss": 0.38958805799484253, "step": 1434 }, { "epoch": 0.3545836422041018, "grad_norm": 1.1277647846986982, "learning_rate": 1.9129128516279273e-05, "loss": 0.39731961488723755, "step": 1435 }, { "epoch": 0.35483073881887817, "grad_norm": 1.1230778401949262, "learning_rate": 1.9127460105349194e-05, "loss": 0.3766719102859497, "step": 1436 }, { "epoch": 0.35507783543365457, "grad_norm": 1.2377122053325347, "learning_rate": 1.91257901706823e-05, "loss": 0.41563624143600464, "step": 1437 }, { "epoch": 0.35532493204843096, "grad_norm": 1.2748465453926405, "learning_rate": 1.9124118712557368e-05, "loss": 0.4282197952270508, "step": 1438 }, { "epoch": 0.3555720286632073, "grad_norm": 1.3180368436945629, "learning_rate": 1.912244573125343e-05, "loss": 0.4565618634223938, "step": 1439 }, { "epoch": 0.3558191252779837, "grad_norm": 1.3215342221923152, "learning_rate": 1.912077122704978e-05, "loss": 0.3920452892780304, "step": 1440 }, { "epoch": 0.35606622189276005, "grad_norm": 1.1148948868385633, "learning_rate": 1.911909520022595e-05, "loss": 0.341638445854187, "step": 1441 }, { "epoch": 0.35631331850753645, "grad_norm": 1.0630703777069332, "learning_rate": 1.9117417651061742e-05, "loss": 0.36930036544799805, "step": 1442 }, { "epoch": 0.35656041512231285, "grad_norm": 1.2051464620676502, "learning_rate": 1.9115738579837205e-05, "loss": 0.4005590081214905, "step": 1443 }, { "epoch": 0.3568075117370892, "grad_norm": 1.1723739254965555, "learning_rate": 1.9114057986832638e-05, "loss": 0.40446311235427856, "step": 1444 }, { "epoch": 0.3570546083518656, "grad_norm": 1.134778218756457, "learning_rate": 1.9112375872328605e-05, "loss": 0.41264545917510986, "step": 1445 }, { "epoch": 0.35730170496664193, "grad_norm": 1.1133986501861521, "learning_rate": 1.9110692236605916e-05, "loss": 0.399455189704895, "step": 1446 }, { "epoch": 0.35754880158141833, "grad_norm": 1.2146460986823069, "learning_rate": 1.9109007079945635e-05, "loss": 0.45837825536727905, "step": 1447 }, { "epoch": 0.35779589819619473, "grad_norm": 1.1845791424309988, "learning_rate": 1.9107320402629086e-05, "loss": 0.4104687571525574, "step": 1448 }, { "epoch": 0.35804299481097107, "grad_norm": 1.170802712110562, "learning_rate": 1.9105632204937836e-05, "loss": 0.39842531085014343, "step": 1449 }, { "epoch": 0.35829009142574747, "grad_norm": 1.1166760284637027, "learning_rate": 1.910394248715372e-05, "loss": 0.36485806107521057, "step": 1450 }, { "epoch": 0.35853718804052387, "grad_norm": 1.1644439749322033, "learning_rate": 1.9102251249558814e-05, "loss": 0.4105347990989685, "step": 1451 }, { "epoch": 0.3587842846553002, "grad_norm": 1.0747812026204115, "learning_rate": 1.9100558492435455e-05, "loss": 0.3329727053642273, "step": 1452 }, { "epoch": 0.3590313812700766, "grad_norm": 1.1293099019335064, "learning_rate": 1.9098864216066232e-05, "loss": 0.3878389000892639, "step": 1453 }, { "epoch": 0.35927847788485295, "grad_norm": 1.0333587218591103, "learning_rate": 1.9097168420733984e-05, "loss": 0.34744566679000854, "step": 1454 }, { "epoch": 0.35952557449962935, "grad_norm": 1.1982555004439708, "learning_rate": 1.9095471106721812e-05, "loss": 0.42564964294433594, "step": 1455 }, { "epoch": 0.35977267111440575, "grad_norm": 1.1509721773572121, "learning_rate": 1.909377227431306e-05, "loss": 0.36358100175857544, "step": 1456 }, { "epoch": 0.3600197677291821, "grad_norm": 1.253638567782353, "learning_rate": 1.9092071923791334e-05, "loss": 0.41182947158813477, "step": 1457 }, { "epoch": 0.3602668643439585, "grad_norm": 1.1070710535217476, "learning_rate": 1.9090370055440496e-05, "loss": 0.3677186071872711, "step": 1458 }, { "epoch": 0.3605139609587349, "grad_norm": 1.200755473927808, "learning_rate": 1.908866666954464e-05, "loss": 0.3932059705257416, "step": 1459 }, { "epoch": 0.36076105757351123, "grad_norm": 1.026532226978109, "learning_rate": 1.9086961766388142e-05, "loss": 0.311340868473053, "step": 1460 }, { "epoch": 0.36100815418828763, "grad_norm": 1.197129181544811, "learning_rate": 1.908525534625562e-05, "loss": 0.41643190383911133, "step": 1461 }, { "epoch": 0.361255250803064, "grad_norm": 1.1687776142587143, "learning_rate": 1.908354740943193e-05, "loss": 0.38362008333206177, "step": 1462 }, { "epoch": 0.3615023474178404, "grad_norm": 1.2566877052768766, "learning_rate": 1.9081837956202208e-05, "loss": 0.41060173511505127, "step": 1463 }, { "epoch": 0.3617494440326168, "grad_norm": 1.0510721056521524, "learning_rate": 1.908012698685182e-05, "loss": 0.3861108422279358, "step": 1464 }, { "epoch": 0.3619965406473931, "grad_norm": 0.9964907926587012, "learning_rate": 1.9078414501666405e-05, "loss": 0.3323323130607605, "step": 1465 }, { "epoch": 0.3622436372621695, "grad_norm": 1.2036420031110675, "learning_rate": 1.9076700500931836e-05, "loss": 0.360675573348999, "step": 1466 }, { "epoch": 0.36249073387694586, "grad_norm": 1.3930662718926743, "learning_rate": 1.9074984984934254e-05, "loss": 0.4250057339668274, "step": 1467 }, { "epoch": 0.36273783049172226, "grad_norm": 1.3551592087107376, "learning_rate": 1.9073267953960048e-05, "loss": 0.4473647475242615, "step": 1468 }, { "epoch": 0.36298492710649866, "grad_norm": 1.1842323345276022, "learning_rate": 1.907154940829585e-05, "loss": 0.4104827642440796, "step": 1469 }, { "epoch": 0.363232023721275, "grad_norm": 1.1808252361151421, "learning_rate": 1.9069829348228558e-05, "loss": 0.42549461126327515, "step": 1470 }, { "epoch": 0.3634791203360514, "grad_norm": 1.1108442854244969, "learning_rate": 1.9068107774045325e-05, "loss": 0.3928564786911011, "step": 1471 }, { "epoch": 0.3637262169508278, "grad_norm": 1.2794768148565407, "learning_rate": 1.9066384686033547e-05, "loss": 0.4489070177078247, "step": 1472 }, { "epoch": 0.36397331356560414, "grad_norm": 1.1691050881105107, "learning_rate": 1.906466008448087e-05, "loss": 0.4522506594657898, "step": 1473 }, { "epoch": 0.36422041018038054, "grad_norm": 1.1661360080824188, "learning_rate": 1.90629339696752e-05, "loss": 0.4347689151763916, "step": 1474 }, { "epoch": 0.3644675067951569, "grad_norm": 1.2484748569612918, "learning_rate": 1.9061206341904703e-05, "loss": 0.4096407890319824, "step": 1475 }, { "epoch": 0.3647146034099333, "grad_norm": 1.0838163960241134, "learning_rate": 1.905947720145778e-05, "loss": 0.35124990344047546, "step": 1476 }, { "epoch": 0.3649617000247097, "grad_norm": 1.124751891724469, "learning_rate": 1.90577465486231e-05, "loss": 0.4030390977859497, "step": 1477 }, { "epoch": 0.365208796639486, "grad_norm": 1.0847518107928955, "learning_rate": 1.905601438368957e-05, "loss": 0.3754006028175354, "step": 1478 }, { "epoch": 0.3654558932542624, "grad_norm": 1.1573135432820636, "learning_rate": 1.905428070694637e-05, "loss": 0.41065096855163574, "step": 1479 }, { "epoch": 0.3657029898690388, "grad_norm": 1.1360792466316258, "learning_rate": 1.9052545518682906e-05, "loss": 0.42499688267707825, "step": 1480 }, { "epoch": 0.36595008648381516, "grad_norm": 1.1528175714089561, "learning_rate": 1.9050808819188856e-05, "loss": 0.4408928155899048, "step": 1481 }, { "epoch": 0.36619718309859156, "grad_norm": 1.1458565824839204, "learning_rate": 1.9049070608754144e-05, "loss": 0.4279026985168457, "step": 1482 }, { "epoch": 0.3664442797133679, "grad_norm": 1.1402578030940105, "learning_rate": 1.904733088766895e-05, "loss": 0.40492093563079834, "step": 1483 }, { "epoch": 0.3666913763281443, "grad_norm": 1.1266225932422695, "learning_rate": 1.9045589656223698e-05, "loss": 0.39029568433761597, "step": 1484 }, { "epoch": 0.3669384729429207, "grad_norm": 1.1319659862453366, "learning_rate": 1.904384691470907e-05, "loss": 0.39455389976501465, "step": 1485 }, { "epoch": 0.36718556955769704, "grad_norm": 1.2240250325226951, "learning_rate": 1.9042102663416e-05, "loss": 0.3628894090652466, "step": 1486 }, { "epoch": 0.36743266617247344, "grad_norm": 1.0251376054401873, "learning_rate": 1.904035690263567e-05, "loss": 0.35108768939971924, "step": 1487 }, { "epoch": 0.36767976278724984, "grad_norm": 1.118559999504077, "learning_rate": 1.9038609632659522e-05, "loss": 0.35090988874435425, "step": 1488 }, { "epoch": 0.3679268594020262, "grad_norm": 1.1886674482835486, "learning_rate": 1.9036860853779244e-05, "loss": 0.41309303045272827, "step": 1489 }, { "epoch": 0.3681739560168026, "grad_norm": 1.0719802630301178, "learning_rate": 1.903511056628677e-05, "loss": 0.4200592041015625, "step": 1490 }, { "epoch": 0.3684210526315789, "grad_norm": 1.150232903771007, "learning_rate": 1.9033358770474306e-05, "loss": 0.37882453203201294, "step": 1491 }, { "epoch": 0.3686681492463553, "grad_norm": 1.1721245094613413, "learning_rate": 1.9031605466634282e-05, "loss": 0.37584584951400757, "step": 1492 }, { "epoch": 0.3689152458611317, "grad_norm": 1.1171416421464948, "learning_rate": 1.9029850655059407e-05, "loss": 0.4018614888191223, "step": 1493 }, { "epoch": 0.36916234247590807, "grad_norm": 1.0825890382065868, "learning_rate": 1.9028094336042617e-05, "loss": 0.3560650646686554, "step": 1494 }, { "epoch": 0.36940943909068447, "grad_norm": 1.290648646654249, "learning_rate": 1.902633650987712e-05, "loss": 0.4266859292984009, "step": 1495 }, { "epoch": 0.3696565357054608, "grad_norm": 1.2493774147221022, "learning_rate": 1.9024577176856364e-05, "loss": 0.41664132475852966, "step": 1496 }, { "epoch": 0.3699036323202372, "grad_norm": 1.0645740107038375, "learning_rate": 1.9022816337274056e-05, "loss": 0.3433448076248169, "step": 1497 }, { "epoch": 0.3701507289350136, "grad_norm": 1.0874388391525878, "learning_rate": 1.9021053991424145e-05, "loss": 0.35276710987091064, "step": 1498 }, { "epoch": 0.37039782554978995, "grad_norm": 1.1413508204185678, "learning_rate": 1.901929013960084e-05, "loss": 0.4026232361793518, "step": 1499 }, { "epoch": 0.37064492216456635, "grad_norm": 1.1953264317128431, "learning_rate": 1.9017524782098593e-05, "loss": 0.41153502464294434, "step": 1500 }, { "epoch": 0.37089201877934275, "grad_norm": 1.274949007979414, "learning_rate": 1.901575791921212e-05, "loss": 0.4522094130516052, "step": 1501 }, { "epoch": 0.3711391153941191, "grad_norm": 1.1331848734731702, "learning_rate": 1.9013989551236375e-05, "loss": 0.41233426332473755, "step": 1502 }, { "epoch": 0.3713862120088955, "grad_norm": 1.1333752972050677, "learning_rate": 1.9012219678466573e-05, "loss": 0.4016250967979431, "step": 1503 }, { "epoch": 0.37163330862367183, "grad_norm": 1.3430473962228682, "learning_rate": 1.9010448301198172e-05, "loss": 0.436124712228775, "step": 1504 }, { "epoch": 0.37188040523844823, "grad_norm": 1.13559323033194, "learning_rate": 1.900867541972689e-05, "loss": 0.3958176374435425, "step": 1505 }, { "epoch": 0.37212750185322463, "grad_norm": 1.0950072733679352, "learning_rate": 1.900690103434869e-05, "loss": 0.3331385850906372, "step": 1506 }, { "epoch": 0.372374598468001, "grad_norm": 1.1625618803696416, "learning_rate": 1.9005125145359785e-05, "loss": 0.3436441421508789, "step": 1507 }, { "epoch": 0.37262169508277737, "grad_norm": 1.1507342595329548, "learning_rate": 1.9003347753056648e-05, "loss": 0.4155968427658081, "step": 1508 }, { "epoch": 0.37286879169755377, "grad_norm": 1.2288775237306162, "learning_rate": 1.9001568857735985e-05, "loss": 0.41455531120300293, "step": 1509 }, { "epoch": 0.3731158883123301, "grad_norm": 1.2731167246632948, "learning_rate": 1.899978845969478e-05, "loss": 0.39693138003349304, "step": 1510 }, { "epoch": 0.3733629849271065, "grad_norm": 1.1280424393527444, "learning_rate": 1.899800655923024e-05, "loss": 0.36949843168258667, "step": 1511 }, { "epoch": 0.37361008154188285, "grad_norm": 1.1096024918888205, "learning_rate": 1.899622315663984e-05, "loss": 0.38546162843704224, "step": 1512 }, { "epoch": 0.37385717815665925, "grad_norm": 1.0278560351519537, "learning_rate": 1.8994438252221302e-05, "loss": 0.3680773377418518, "step": 1513 }, { "epoch": 0.37410427477143565, "grad_norm": 1.0917327892071458, "learning_rate": 1.8992651846272598e-05, "loss": 0.4131346344947815, "step": 1514 }, { "epoch": 0.374351371386212, "grad_norm": 1.197202414936878, "learning_rate": 1.8990863939091944e-05, "loss": 0.3284909129142761, "step": 1515 }, { "epoch": 0.3745984680009884, "grad_norm": 1.0679350170443038, "learning_rate": 1.898907453097782e-05, "loss": 0.34370654821395874, "step": 1516 }, { "epoch": 0.37484556461576474, "grad_norm": 1.145916921050455, "learning_rate": 1.8987283622228943e-05, "loss": 0.31086304783821106, "step": 1517 }, { "epoch": 0.37509266123054114, "grad_norm": 1.1720944298752767, "learning_rate": 1.8985491213144296e-05, "loss": 0.39110732078552246, "step": 1518 }, { "epoch": 0.37533975784531753, "grad_norm": 1.2041577331841613, "learning_rate": 1.8983697304023097e-05, "loss": 0.38466984033584595, "step": 1519 }, { "epoch": 0.3755868544600939, "grad_norm": 1.059601584338426, "learning_rate": 1.8981901895164823e-05, "loss": 0.3784255385398865, "step": 1520 }, { "epoch": 0.3758339510748703, "grad_norm": 1.1778170427222197, "learning_rate": 1.8980104986869195e-05, "loss": 0.4024115204811096, "step": 1521 }, { "epoch": 0.3760810476896467, "grad_norm": 1.119922814904556, "learning_rate": 1.8978306579436195e-05, "loss": 0.32716307044029236, "step": 1522 }, { "epoch": 0.376328144304423, "grad_norm": 1.2207391862935695, "learning_rate": 1.897650667316605e-05, "loss": 0.4273037016391754, "step": 1523 }, { "epoch": 0.3765752409191994, "grad_norm": 1.2390662459900226, "learning_rate": 1.8974705268359228e-05, "loss": 0.3810078203678131, "step": 1524 }, { "epoch": 0.37682233753397576, "grad_norm": 1.1390546077132337, "learning_rate": 1.897290236531646e-05, "loss": 0.39779865741729736, "step": 1525 }, { "epoch": 0.37706943414875216, "grad_norm": 1.13923386508921, "learning_rate": 1.8971097964338724e-05, "loss": 0.34493112564086914, "step": 1526 }, { "epoch": 0.37731653076352856, "grad_norm": 1.1896693408144896, "learning_rate": 1.896929206572724e-05, "loss": 0.38748621940612793, "step": 1527 }, { "epoch": 0.3775636273783049, "grad_norm": 1.295803141680112, "learning_rate": 1.8967484669783494e-05, "loss": 0.44285741448402405, "step": 1528 }, { "epoch": 0.3778107239930813, "grad_norm": 1.1887783590812528, "learning_rate": 1.89656757768092e-05, "loss": 0.4352962076663971, "step": 1529 }, { "epoch": 0.3780578206078577, "grad_norm": 1.10077099376148, "learning_rate": 1.896386538710635e-05, "loss": 0.3294663429260254, "step": 1530 }, { "epoch": 0.37830491722263404, "grad_norm": 1.0365950724424964, "learning_rate": 1.8962053500977156e-05, "loss": 0.35187989473342896, "step": 1531 }, { "epoch": 0.37855201383741044, "grad_norm": 1.1931314409941844, "learning_rate": 1.8960240118724103e-05, "loss": 0.43076291680336, "step": 1532 }, { "epoch": 0.3787991104521868, "grad_norm": 1.1597950927443794, "learning_rate": 1.895842524064991e-05, "loss": 0.3856182396411896, "step": 1533 }, { "epoch": 0.3790462070669632, "grad_norm": 1.240449113086955, "learning_rate": 1.895660886705756e-05, "loss": 0.4714832305908203, "step": 1534 }, { "epoch": 0.3792933036817396, "grad_norm": 1.0512288513646875, "learning_rate": 1.8954790998250273e-05, "loss": 0.360932856798172, "step": 1535 }, { "epoch": 0.3795404002965159, "grad_norm": 1.1597095071035934, "learning_rate": 1.8952971634531524e-05, "loss": 0.38451826572418213, "step": 1536 }, { "epoch": 0.3797874969112923, "grad_norm": 1.1012964591472054, "learning_rate": 1.8951150776205037e-05, "loss": 0.3931354284286499, "step": 1537 }, { "epoch": 0.3800345935260687, "grad_norm": 1.26695128004117, "learning_rate": 1.894932842357479e-05, "loss": 0.45152321457862854, "step": 1538 }, { "epoch": 0.38028169014084506, "grad_norm": 1.1045419442056363, "learning_rate": 1.8947504576944998e-05, "loss": 0.3506332337856293, "step": 1539 }, { "epoch": 0.38052878675562146, "grad_norm": 1.0884911318670207, "learning_rate": 1.894567923662014e-05, "loss": 0.3946336507797241, "step": 1540 }, { "epoch": 0.3807758833703978, "grad_norm": 1.201933485880634, "learning_rate": 1.8943852402904942e-05, "loss": 0.41248074173927307, "step": 1541 }, { "epoch": 0.3810229799851742, "grad_norm": 1.0911262977886613, "learning_rate": 1.8942024076104367e-05, "loss": 0.395054429769516, "step": 1542 }, { "epoch": 0.3812700765999506, "grad_norm": 1.2622946556681958, "learning_rate": 1.894019425652364e-05, "loss": 0.37507274746894836, "step": 1543 }, { "epoch": 0.38151717321472695, "grad_norm": 0.996958109548805, "learning_rate": 1.8938362944468225e-05, "loss": 0.397294282913208, "step": 1544 }, { "epoch": 0.38176426982950334, "grad_norm": 1.2456557040647565, "learning_rate": 1.893653014024385e-05, "loss": 0.4889829456806183, "step": 1545 }, { "epoch": 0.3820113664442797, "grad_norm": 1.1150902879576836, "learning_rate": 1.8934695844156477e-05, "loss": 0.3114156723022461, "step": 1546 }, { "epoch": 0.3822584630590561, "grad_norm": 1.103408855845868, "learning_rate": 1.8932860056512327e-05, "loss": 0.3677099943161011, "step": 1547 }, { "epoch": 0.3825055596738325, "grad_norm": 1.0900540942317845, "learning_rate": 1.893102277761786e-05, "loss": 0.35595694184303284, "step": 1548 }, { "epoch": 0.38275265628860883, "grad_norm": 1.361633808902931, "learning_rate": 1.89291840077798e-05, "loss": 0.4247480630874634, "step": 1549 }, { "epoch": 0.3829997529033852, "grad_norm": 1.0877733120281785, "learning_rate": 1.8927343747305103e-05, "loss": 0.3317810297012329, "step": 1550 }, { "epoch": 0.3832468495181616, "grad_norm": 1.1485000598562471, "learning_rate": 1.8925501996500987e-05, "loss": 0.42724156379699707, "step": 1551 }, { "epoch": 0.38349394613293797, "grad_norm": 1.1300125830422503, "learning_rate": 1.8923658755674906e-05, "loss": 0.3469632863998413, "step": 1552 }, { "epoch": 0.38374104274771437, "grad_norm": 1.1641765186207504, "learning_rate": 1.8921814025134582e-05, "loss": 0.4511302411556244, "step": 1553 }, { "epoch": 0.3839881393624907, "grad_norm": 1.2240757502295976, "learning_rate": 1.8919967805187963e-05, "loss": 0.3950282335281372, "step": 1554 }, { "epoch": 0.3842352359772671, "grad_norm": 1.2222474619953418, "learning_rate": 1.8918120096143263e-05, "loss": 0.4263366758823395, "step": 1555 }, { "epoch": 0.3844823325920435, "grad_norm": 1.0775975386682148, "learning_rate": 1.8916270898308936e-05, "loss": 0.3664762079715729, "step": 1556 }, { "epoch": 0.38472942920681985, "grad_norm": 1.1764187752288675, "learning_rate": 1.8914420211993693e-05, "loss": 0.3752383589744568, "step": 1557 }, { "epoch": 0.38497652582159625, "grad_norm": 1.136794222264414, "learning_rate": 1.8912568037506475e-05, "loss": 0.342409610748291, "step": 1558 }, { "epoch": 0.38522362243637265, "grad_norm": 1.15205476312802, "learning_rate": 1.8910714375156493e-05, "loss": 0.39239826798439026, "step": 1559 }, { "epoch": 0.385470719051149, "grad_norm": 1.2245241158576345, "learning_rate": 1.8908859225253192e-05, "loss": 0.40703046321868896, "step": 1560 }, { "epoch": 0.3857178156659254, "grad_norm": 1.4567293321085233, "learning_rate": 1.8907002588106277e-05, "loss": 0.47666555643081665, "step": 1561 }, { "epoch": 0.38596491228070173, "grad_norm": 1.2728332139132523, "learning_rate": 1.890514446402569e-05, "loss": 0.422232985496521, "step": 1562 }, { "epoch": 0.38621200889547813, "grad_norm": 1.2022555907914547, "learning_rate": 1.8903284853321623e-05, "loss": 0.4155529737472534, "step": 1563 }, { "epoch": 0.38645910551025453, "grad_norm": 1.2720172501121578, "learning_rate": 1.8901423756304525e-05, "loss": 0.4613437056541443, "step": 1564 }, { "epoch": 0.3867062021250309, "grad_norm": 1.0955374697481886, "learning_rate": 1.889956117328509e-05, "loss": 0.3787885904312134, "step": 1565 }, { "epoch": 0.38695329873980727, "grad_norm": 1.050537568498713, "learning_rate": 1.889769710457425e-05, "loss": 0.32947367429733276, "step": 1566 }, { "epoch": 0.3872003953545836, "grad_norm": 1.1723349341778324, "learning_rate": 1.889583155048319e-05, "loss": 0.4250631332397461, "step": 1567 }, { "epoch": 0.38744749196936, "grad_norm": 1.1524430918148094, "learning_rate": 1.8893964511323352e-05, "loss": 0.40169522166252136, "step": 1568 }, { "epoch": 0.3876945885841364, "grad_norm": 1.0997421547117205, "learning_rate": 1.889209598740642e-05, "loss": 0.4265097975730896, "step": 1569 }, { "epoch": 0.38794168519891276, "grad_norm": 1.2111333147864645, "learning_rate": 1.8890225979044322e-05, "loss": 0.4867493510246277, "step": 1570 }, { "epoch": 0.38818878181368915, "grad_norm": 1.3466315361880858, "learning_rate": 1.8888354486549238e-05, "loss": 0.46409308910369873, "step": 1571 }, { "epoch": 0.38843587842846555, "grad_norm": 1.1621086345794565, "learning_rate": 1.8886481510233593e-05, "loss": 0.406304270029068, "step": 1572 }, { "epoch": 0.3886829750432419, "grad_norm": 1.2680135603813814, "learning_rate": 1.888460705041006e-05, "loss": 0.4090372323989868, "step": 1573 }, { "epoch": 0.3889300716580183, "grad_norm": 1.1085152739968727, "learning_rate": 1.8882731107391566e-05, "loss": 0.4106670618057251, "step": 1574 }, { "epoch": 0.38917716827279464, "grad_norm": 1.123336515778048, "learning_rate": 1.8880853681491277e-05, "loss": 0.34381359815597534, "step": 1575 }, { "epoch": 0.38942426488757104, "grad_norm": 1.0957911889926328, "learning_rate": 1.8878974773022612e-05, "loss": 0.37359991669654846, "step": 1576 }, { "epoch": 0.38967136150234744, "grad_norm": 1.180031960997362, "learning_rate": 1.8877094382299233e-05, "loss": 0.44227737188339233, "step": 1577 }, { "epoch": 0.3899184581171238, "grad_norm": 1.1715002769328235, "learning_rate": 1.8875212509635057e-05, "loss": 0.38448846340179443, "step": 1578 }, { "epoch": 0.3901655547319002, "grad_norm": 1.0531574431887907, "learning_rate": 1.8873329155344244e-05, "loss": 0.38803741335868835, "step": 1579 }, { "epoch": 0.3904126513466766, "grad_norm": 1.3558869106857188, "learning_rate": 1.887144431974119e-05, "loss": 0.38429880142211914, "step": 1580 }, { "epoch": 0.3906597479614529, "grad_norm": 1.1769285065509603, "learning_rate": 1.886955800314056e-05, "loss": 0.4438413977622986, "step": 1581 }, { "epoch": 0.3909068445762293, "grad_norm": 1.1223530196016474, "learning_rate": 1.8867670205857253e-05, "loss": 0.36521703004837036, "step": 1582 }, { "epoch": 0.39115394119100566, "grad_norm": 1.1699787094272334, "learning_rate": 1.8865780928206415e-05, "loss": 0.3674086332321167, "step": 1583 }, { "epoch": 0.39140103780578206, "grad_norm": 3.0809836941604445, "learning_rate": 1.8863890170503446e-05, "loss": 0.40341484546661377, "step": 1584 }, { "epoch": 0.39164813442055846, "grad_norm": 1.1609437577598931, "learning_rate": 1.8861997933063985e-05, "loss": 0.3825533390045166, "step": 1585 }, { "epoch": 0.3918952310353348, "grad_norm": 1.2337619144824927, "learning_rate": 1.8860104216203923e-05, "loss": 0.4539698362350464, "step": 1586 }, { "epoch": 0.3921423276501112, "grad_norm": 1.2306168824558106, "learning_rate": 1.8858209020239395e-05, "loss": 0.40392422676086426, "step": 1587 }, { "epoch": 0.3923894242648876, "grad_norm": 1.19426389962561, "learning_rate": 1.885631234548679e-05, "loss": 0.3889050781726837, "step": 1588 }, { "epoch": 0.39263652087966394, "grad_norm": 1.266619285465504, "learning_rate": 1.8854414192262735e-05, "loss": 0.38949382305145264, "step": 1589 }, { "epoch": 0.39288361749444034, "grad_norm": 1.282569596170833, "learning_rate": 1.8852514560884104e-05, "loss": 0.4721469283103943, "step": 1590 }, { "epoch": 0.3931307141092167, "grad_norm": 1.281302711529973, "learning_rate": 1.8850613451668028e-05, "loss": 0.3978145718574524, "step": 1591 }, { "epoch": 0.3933778107239931, "grad_norm": 1.11961351476766, "learning_rate": 1.8848710864931874e-05, "loss": 0.39345109462738037, "step": 1592 }, { "epoch": 0.3936249073387695, "grad_norm": 1.158349216696049, "learning_rate": 1.884680680099326e-05, "loss": 0.3936307430267334, "step": 1593 }, { "epoch": 0.3938720039535458, "grad_norm": 1.013076456285439, "learning_rate": 1.8844901260170052e-05, "loss": 0.3925994634628296, "step": 1594 }, { "epoch": 0.3941191005683222, "grad_norm": 1.1022436207877653, "learning_rate": 1.884299424278036e-05, "loss": 0.3868270516395569, "step": 1595 }, { "epoch": 0.39436619718309857, "grad_norm": 1.098623915190641, "learning_rate": 1.8841085749142536e-05, "loss": 0.3518510162830353, "step": 1596 }, { "epoch": 0.39461329379787496, "grad_norm": 1.0553140838634214, "learning_rate": 1.883917577957519e-05, "loss": 0.3413812816143036, "step": 1597 }, { "epoch": 0.39486039041265136, "grad_norm": 1.131128866317899, "learning_rate": 1.883726433439717e-05, "loss": 0.4198228418827057, "step": 1598 }, { "epoch": 0.3951074870274277, "grad_norm": 1.090656134756794, "learning_rate": 1.8835351413927575e-05, "loss": 0.316087543964386, "step": 1599 }, { "epoch": 0.3953545836422041, "grad_norm": 1.2302704077731306, "learning_rate": 1.883343701848574e-05, "loss": 0.40642768144607544, "step": 1600 }, { "epoch": 0.3956016802569805, "grad_norm": 1.0862266901650917, "learning_rate": 1.8831521148391263e-05, "loss": 0.3555774390697479, "step": 1601 }, { "epoch": 0.39584877687175685, "grad_norm": 1.0563271710075322, "learning_rate": 1.8829603803963973e-05, "loss": 0.3623272776603699, "step": 1602 }, { "epoch": 0.39609587348653325, "grad_norm": 1.2079397083568806, "learning_rate": 1.8827684985523954e-05, "loss": 0.4059014618396759, "step": 1603 }, { "epoch": 0.3963429701013096, "grad_norm": 1.2922083630518209, "learning_rate": 1.882576469339153e-05, "loss": 0.4216366708278656, "step": 1604 }, { "epoch": 0.396590066716086, "grad_norm": 1.2722305979305166, "learning_rate": 1.8823842927887283e-05, "loss": 0.42369523644447327, "step": 1605 }, { "epoch": 0.3968371633308624, "grad_norm": 1.2495426521387285, "learning_rate": 1.882191968933202e-05, "loss": 0.4389491677284241, "step": 1606 }, { "epoch": 0.39708425994563873, "grad_norm": 1.1943360788617587, "learning_rate": 1.8819994978046813e-05, "loss": 0.436074435710907, "step": 1607 }, { "epoch": 0.3973313565604151, "grad_norm": 1.1370561047914454, "learning_rate": 1.8818068794352974e-05, "loss": 0.3979055881500244, "step": 1608 }, { "epoch": 0.3975784531751915, "grad_norm": 1.1685160049252, "learning_rate": 1.8816141138572056e-05, "loss": 0.43505045771598816, "step": 1609 }, { "epoch": 0.39782554978996787, "grad_norm": 1.39576300398208, "learning_rate": 1.8814212011025865e-05, "loss": 0.4314740300178528, "step": 1610 }, { "epoch": 0.39807264640474427, "grad_norm": 1.2010072556260512, "learning_rate": 1.8812281412036447e-05, "loss": 0.38461506366729736, "step": 1611 }, { "epoch": 0.3983197430195206, "grad_norm": 1.2165687711629198, "learning_rate": 1.88103493419261e-05, "loss": 0.3663494884967804, "step": 1612 }, { "epoch": 0.398566839634297, "grad_norm": 1.137911556488604, "learning_rate": 1.8808415801017356e-05, "loss": 0.41438448429107666, "step": 1613 }, { "epoch": 0.3988139362490734, "grad_norm": 1.109712459486909, "learning_rate": 1.8806480789633003e-05, "loss": 0.37480443716049194, "step": 1614 }, { "epoch": 0.39906103286384975, "grad_norm": 1.2110398588602762, "learning_rate": 1.8804544308096074e-05, "loss": 0.42121079564094543, "step": 1615 }, { "epoch": 0.39930812947862615, "grad_norm": 1.1163308719513518, "learning_rate": 1.8802606356729845e-05, "loss": 0.4332238733768463, "step": 1616 }, { "epoch": 0.3995552260934025, "grad_norm": 1.209456167285793, "learning_rate": 1.8800666935857838e-05, "loss": 0.3815411329269409, "step": 1617 }, { "epoch": 0.3998023227081789, "grad_norm": 1.277262822270364, "learning_rate": 1.8798726045803813e-05, "loss": 0.4131830930709839, "step": 1618 }, { "epoch": 0.4000494193229553, "grad_norm": 1.1548606223653533, "learning_rate": 1.879678368689179e-05, "loss": 0.3829866945743561, "step": 1619 }, { "epoch": 0.40029651593773163, "grad_norm": 0.9672021171184639, "learning_rate": 1.879483985944602e-05, "loss": 0.34650295972824097, "step": 1620 }, { "epoch": 0.40054361255250803, "grad_norm": 1.2363684749658415, "learning_rate": 1.879289456379101e-05, "loss": 0.37224477529525757, "step": 1621 }, { "epoch": 0.40079070916728443, "grad_norm": 1.310519129990068, "learning_rate": 1.8790947800251502e-05, "loss": 0.4224206209182739, "step": 1622 }, { "epoch": 0.4010378057820608, "grad_norm": 1.1313084156438231, "learning_rate": 1.8788999569152496e-05, "loss": 0.3542625904083252, "step": 1623 }, { "epoch": 0.4012849023968372, "grad_norm": 1.2512675148186267, "learning_rate": 1.878704987081922e-05, "loss": 0.4009738564491272, "step": 1624 }, { "epoch": 0.4015319990116135, "grad_norm": 1.0934230531972078, "learning_rate": 1.8785098705577166e-05, "loss": 0.3888946771621704, "step": 1625 }, { "epoch": 0.4017790956263899, "grad_norm": 1.106964574265083, "learning_rate": 1.8783146073752056e-05, "loss": 0.3330422639846802, "step": 1626 }, { "epoch": 0.4020261922411663, "grad_norm": 1.2273631940951983, "learning_rate": 1.878119197566986e-05, "loss": 0.44296377897262573, "step": 1627 }, { "epoch": 0.40227328885594266, "grad_norm": 1.152523723528084, "learning_rate": 1.87792364116568e-05, "loss": 0.40542900562286377, "step": 1628 }, { "epoch": 0.40252038547071906, "grad_norm": 1.0089514404188187, "learning_rate": 1.877727938203933e-05, "loss": 0.3599333167076111, "step": 1629 }, { "epoch": 0.40276748208549545, "grad_norm": 1.0565295193376258, "learning_rate": 1.877532088714417e-05, "loss": 0.38161569833755493, "step": 1630 }, { "epoch": 0.4030145787002718, "grad_norm": 1.1598329613307359, "learning_rate": 1.8773360927298253e-05, "loss": 0.3564172387123108, "step": 1631 }, { "epoch": 0.4032616753150482, "grad_norm": 1.2161246757107027, "learning_rate": 1.877139950282879e-05, "loss": 0.4254184365272522, "step": 1632 }, { "epoch": 0.40350877192982454, "grad_norm": 1.1661715127204584, "learning_rate": 1.8769436614063212e-05, "loss": 0.4340718984603882, "step": 1633 }, { "epoch": 0.40375586854460094, "grad_norm": 1.1500981967239814, "learning_rate": 1.8767472261329207e-05, "loss": 0.36860191822052, "step": 1634 }, { "epoch": 0.40400296515937734, "grad_norm": 1.2592942760085215, "learning_rate": 1.87655064449547e-05, "loss": 0.37728017568588257, "step": 1635 }, { "epoch": 0.4042500617741537, "grad_norm": 1.0974387065560012, "learning_rate": 1.876353916526787e-05, "loss": 0.39082467555999756, "step": 1636 }, { "epoch": 0.4044971583889301, "grad_norm": 1.1998036794993518, "learning_rate": 1.8761570422597126e-05, "loss": 0.3648565709590912, "step": 1637 }, { "epoch": 0.4047442550037065, "grad_norm": 1.1128718039120342, "learning_rate": 1.875960021727114e-05, "loss": 0.3490803837776184, "step": 1638 }, { "epoch": 0.4049913516184828, "grad_norm": 1.3043747870437719, "learning_rate": 1.875762854961881e-05, "loss": 0.4443337917327881, "step": 1639 }, { "epoch": 0.4052384482332592, "grad_norm": 1.2022167053795803, "learning_rate": 1.8755655419969287e-05, "loss": 0.39787524938583374, "step": 1640 }, { "epoch": 0.40548554484803556, "grad_norm": 1.2230829737423299, "learning_rate": 1.875368082865197e-05, "loss": 0.3987809419631958, "step": 1641 }, { "epoch": 0.40573264146281196, "grad_norm": 1.1856924397643873, "learning_rate": 1.8751704775996488e-05, "loss": 0.4511677622795105, "step": 1642 }, { "epoch": 0.40597973807758836, "grad_norm": 1.237455027346202, "learning_rate": 1.8749727262332733e-05, "loss": 0.44283732771873474, "step": 1643 }, { "epoch": 0.4062268346923647, "grad_norm": 1.1792017775283163, "learning_rate": 1.8747748287990824e-05, "loss": 0.43991488218307495, "step": 1644 }, { "epoch": 0.4064739313071411, "grad_norm": 1.198055195008244, "learning_rate": 1.8745767853301132e-05, "loss": 0.3846742510795593, "step": 1645 }, { "epoch": 0.40672102792191744, "grad_norm": 1.228719945146029, "learning_rate": 1.874378595859427e-05, "loss": 0.4036152958869934, "step": 1646 }, { "epoch": 0.40696812453669384, "grad_norm": 1.257706692391684, "learning_rate": 1.87418026042011e-05, "loss": 0.39479780197143555, "step": 1647 }, { "epoch": 0.40721522115147024, "grad_norm": 1.1368881350309437, "learning_rate": 1.8739817790452718e-05, "loss": 0.3697044849395752, "step": 1648 }, { "epoch": 0.4074623177662466, "grad_norm": 1.1488198438827366, "learning_rate": 1.873783151768047e-05, "loss": 0.37912046909332275, "step": 1649 }, { "epoch": 0.407709414381023, "grad_norm": 1.0862897404224299, "learning_rate": 1.8735843786215943e-05, "loss": 0.3854413628578186, "step": 1650 }, { "epoch": 0.4079565109957994, "grad_norm": 1.2273677882013707, "learning_rate": 1.873385459639097e-05, "loss": 0.377662718296051, "step": 1651 }, { "epoch": 0.4082036076105757, "grad_norm": 1.2619069279707118, "learning_rate": 1.8731863948537623e-05, "loss": 0.43176180124282837, "step": 1652 }, { "epoch": 0.4084507042253521, "grad_norm": 1.203224504605149, "learning_rate": 1.8729871842988225e-05, "loss": 0.38309767842292786, "step": 1653 }, { "epoch": 0.40869780084012847, "grad_norm": 1.136157080185302, "learning_rate": 1.8727878280075334e-05, "loss": 0.42385315895080566, "step": 1654 }, { "epoch": 0.40894489745490487, "grad_norm": 1.145197039563039, "learning_rate": 1.872588326013176e-05, "loss": 0.3667531907558441, "step": 1655 }, { "epoch": 0.40919199406968126, "grad_norm": 1.0711754679256171, "learning_rate": 1.8723886783490545e-05, "loss": 0.40643373131752014, "step": 1656 }, { "epoch": 0.4094390906844576, "grad_norm": 1.1600007364910743, "learning_rate": 1.8721888850484988e-05, "loss": 0.37301284074783325, "step": 1657 }, { "epoch": 0.409686187299234, "grad_norm": 1.0859790595358736, "learning_rate": 1.8719889461448615e-05, "loss": 0.35094553232192993, "step": 1658 }, { "epoch": 0.4099332839140104, "grad_norm": 1.1563081422722938, "learning_rate": 1.871788861671521e-05, "loss": 0.3956511616706848, "step": 1659 }, { "epoch": 0.41018038052878675, "grad_norm": 1.1530567437151815, "learning_rate": 1.871588631661879e-05, "loss": 0.4151986837387085, "step": 1660 }, { "epoch": 0.41042747714356315, "grad_norm": 1.7921450591376187, "learning_rate": 1.8713882561493622e-05, "loss": 0.45754146575927734, "step": 1661 }, { "epoch": 0.4106745737583395, "grad_norm": 1.1558185549316335, "learning_rate": 1.871187735167421e-05, "loss": 0.39246684312820435, "step": 1662 }, { "epoch": 0.4109216703731159, "grad_norm": 1.312124320097337, "learning_rate": 1.8709870687495308e-05, "loss": 0.43857258558273315, "step": 1663 }, { "epoch": 0.4111687669878923, "grad_norm": 1.288959279585105, "learning_rate": 1.8707862569291898e-05, "loss": 0.4192362129688263, "step": 1664 }, { "epoch": 0.41141586360266863, "grad_norm": 1.2923514961477363, "learning_rate": 1.8705852997399224e-05, "loss": 0.40449175238609314, "step": 1665 }, { "epoch": 0.41166296021744503, "grad_norm": 1.390401332447418, "learning_rate": 1.8703841972152762e-05, "loss": 0.4040095806121826, "step": 1666 }, { "epoch": 0.41191005683222137, "grad_norm": 1.167278519853101, "learning_rate": 1.8701829493888232e-05, "loss": 0.43751823902130127, "step": 1667 }, { "epoch": 0.41215715344699777, "grad_norm": 1.2280809754618716, "learning_rate": 1.8699815562941598e-05, "loss": 0.4298381507396698, "step": 1668 }, { "epoch": 0.41240425006177417, "grad_norm": 1.2074683719067427, "learning_rate": 1.869780017964906e-05, "loss": 0.3833162486553192, "step": 1669 }, { "epoch": 0.4126513466765505, "grad_norm": 1.2766244391228634, "learning_rate": 1.869578334434707e-05, "loss": 0.4440433084964752, "step": 1670 }, { "epoch": 0.4128984432913269, "grad_norm": 1.145686538430457, "learning_rate": 1.8693765057372318e-05, "loss": 0.412402868270874, "step": 1671 }, { "epoch": 0.4131455399061033, "grad_norm": 1.1048560210304696, "learning_rate": 1.869174531906174e-05, "loss": 0.3764578104019165, "step": 1672 }, { "epoch": 0.41339263652087965, "grad_norm": 1.2045054402219277, "learning_rate": 1.86897241297525e-05, "loss": 0.36073029041290283, "step": 1673 }, { "epoch": 0.41363973313565605, "grad_norm": 1.1350273160121676, "learning_rate": 1.8687701489782026e-05, "loss": 0.3537546396255493, "step": 1674 }, { "epoch": 0.4138868297504324, "grad_norm": 1.2037440961863797, "learning_rate": 1.8685677399487972e-05, "loss": 0.39370983839035034, "step": 1675 }, { "epoch": 0.4141339263652088, "grad_norm": 1.2880794493800838, "learning_rate": 1.868365185920824e-05, "loss": 0.42726558446884155, "step": 1676 }, { "epoch": 0.4143810229799852, "grad_norm": 1.1990670601272664, "learning_rate": 1.8681624869280976e-05, "loss": 0.38502368330955505, "step": 1677 }, { "epoch": 0.41462811959476154, "grad_norm": 1.0858318786311922, "learning_rate": 1.8679596430044562e-05, "loss": 0.4112018942832947, "step": 1678 }, { "epoch": 0.41487521620953793, "grad_norm": 1.1482888528006727, "learning_rate": 1.8677566541837628e-05, "loss": 0.4254053235054016, "step": 1679 }, { "epoch": 0.41512231282431433, "grad_norm": 1.1306828854271478, "learning_rate": 1.867553520499904e-05, "loss": 0.43351924419403076, "step": 1680 }, { "epoch": 0.4153694094390907, "grad_norm": 1.1688382481004065, "learning_rate": 1.8673502419867912e-05, "loss": 0.41072142124176025, "step": 1681 }, { "epoch": 0.4156165060538671, "grad_norm": 1.0666163695928876, "learning_rate": 1.8671468186783594e-05, "loss": 0.3395834267139435, "step": 1682 }, { "epoch": 0.4158636026686434, "grad_norm": 1.2068466703011287, "learning_rate": 1.8669432506085684e-05, "loss": 0.4159921407699585, "step": 1683 }, { "epoch": 0.4161106992834198, "grad_norm": 1.235186860800881, "learning_rate": 1.8667395378114013e-05, "loss": 0.3951745629310608, "step": 1684 }, { "epoch": 0.4163577958981962, "grad_norm": 1.18123937032509, "learning_rate": 1.8665356803208668e-05, "loss": 0.40199580788612366, "step": 1685 }, { "epoch": 0.41660489251297256, "grad_norm": 1.0719266842830353, "learning_rate": 1.8663316781709958e-05, "loss": 0.35418790578842163, "step": 1686 }, { "epoch": 0.41685198912774896, "grad_norm": 1.1679860203431376, "learning_rate": 1.866127531395845e-05, "loss": 0.3994465470314026, "step": 1687 }, { "epoch": 0.41709908574252536, "grad_norm": 1.2080821931353036, "learning_rate": 1.8659232400294947e-05, "loss": 0.40807044506073, "step": 1688 }, { "epoch": 0.4173461823573017, "grad_norm": 1.133948704873941, "learning_rate": 1.8657188041060487e-05, "loss": 0.42539680004119873, "step": 1689 }, { "epoch": 0.4175932789720781, "grad_norm": 1.2452702232964947, "learning_rate": 1.865514223659636e-05, "loss": 0.37428683042526245, "step": 1690 }, { "epoch": 0.41784037558685444, "grad_norm": 1.1399487997779345, "learning_rate": 1.8653094987244093e-05, "loss": 0.3884751796722412, "step": 1691 }, { "epoch": 0.41808747220163084, "grad_norm": 1.1913061028462608, "learning_rate": 1.865104629334545e-05, "loss": 0.3977842926979065, "step": 1692 }, { "epoch": 0.41833456881640724, "grad_norm": 1.235624545176243, "learning_rate": 1.864899615524244e-05, "loss": 0.40000832080841064, "step": 1693 }, { "epoch": 0.4185816654311836, "grad_norm": 1.134166315915228, "learning_rate": 1.8646944573277316e-05, "loss": 0.40565475821495056, "step": 1694 }, { "epoch": 0.41882876204596, "grad_norm": 1.1523853354654963, "learning_rate": 1.8644891547792565e-05, "loss": 0.36939337849617004, "step": 1695 }, { "epoch": 0.4190758586607363, "grad_norm": 1.2392549931661876, "learning_rate": 1.864283707913092e-05, "loss": 0.46776366233825684, "step": 1696 }, { "epoch": 0.4193229552755127, "grad_norm": 1.1565539558019242, "learning_rate": 1.8640781167635356e-05, "loss": 0.35708391666412354, "step": 1697 }, { "epoch": 0.4195700518902891, "grad_norm": 1.1669741617243128, "learning_rate": 1.8638723813649083e-05, "loss": 0.4221571683883667, "step": 1698 }, { "epoch": 0.41981714850506546, "grad_norm": 1.0775385082558857, "learning_rate": 1.863666501751556e-05, "loss": 0.38862907886505127, "step": 1699 }, { "epoch": 0.42006424511984186, "grad_norm": 1.1905389332458938, "learning_rate": 1.8634604779578476e-05, "loss": 0.4252963066101074, "step": 1700 }, { "epoch": 0.42031134173461826, "grad_norm": 1.1664794573598785, "learning_rate": 1.863254310018177e-05, "loss": 0.4012037515640259, "step": 1701 }, { "epoch": 0.4205584383493946, "grad_norm": 1.165685529488528, "learning_rate": 1.863047997966962e-05, "loss": 0.3769227862358093, "step": 1702 }, { "epoch": 0.420805534964171, "grad_norm": 1.0560381515819628, "learning_rate": 1.8628415418386443e-05, "loss": 0.34694722294807434, "step": 1703 }, { "epoch": 0.42105263157894735, "grad_norm": 1.2193873160862285, "learning_rate": 1.862634941667689e-05, "loss": 0.39068669080734253, "step": 1704 }, { "epoch": 0.42129972819372374, "grad_norm": 1.2339558966491897, "learning_rate": 1.862428197488587e-05, "loss": 0.4871863126754761, "step": 1705 }, { "epoch": 0.42154682480850014, "grad_norm": 1.2283051507863652, "learning_rate": 1.862221309335851e-05, "loss": 0.413157194852829, "step": 1706 }, { "epoch": 0.4217939214232765, "grad_norm": 1.1245925593827324, "learning_rate": 1.8620142772440197e-05, "loss": 0.3703014850616455, "step": 1707 }, { "epoch": 0.4220410180380529, "grad_norm": 1.1264943790731843, "learning_rate": 1.8618071012476546e-05, "loss": 0.3837234377861023, "step": 1708 }, { "epoch": 0.4222881146528293, "grad_norm": 1.3047054932544806, "learning_rate": 1.8615997813813417e-05, "loss": 0.41123712062835693, "step": 1709 }, { "epoch": 0.4225352112676056, "grad_norm": 1.1029200207593146, "learning_rate": 1.8613923176796912e-05, "loss": 0.3685697317123413, "step": 1710 }, { "epoch": 0.422782307882382, "grad_norm": 1.1084338311795472, "learning_rate": 1.861184710177337e-05, "loss": 0.36001652479171753, "step": 1711 }, { "epoch": 0.42302940449715837, "grad_norm": 2.3812412253277837, "learning_rate": 1.8609769589089363e-05, "loss": 0.3842385411262512, "step": 1712 }, { "epoch": 0.42327650111193477, "grad_norm": 1.1987937217717493, "learning_rate": 1.8607690639091723e-05, "loss": 0.3881821632385254, "step": 1713 }, { "epoch": 0.42352359772671117, "grad_norm": 1.2320948031921137, "learning_rate": 1.86056102521275e-05, "loss": 0.408403217792511, "step": 1714 }, { "epoch": 0.4237706943414875, "grad_norm": 1.1819626038302342, "learning_rate": 1.8603528428544e-05, "loss": 0.4222238063812256, "step": 1715 }, { "epoch": 0.4240177909562639, "grad_norm": 1.0814159760769715, "learning_rate": 1.8601445168688757e-05, "loss": 0.3697231709957123, "step": 1716 }, { "epoch": 0.42426488757104025, "grad_norm": 1.1832146724305646, "learning_rate": 1.859936047290955e-05, "loss": 0.3791312575340271, "step": 1717 }, { "epoch": 0.42451198418581665, "grad_norm": 1.1301269271597238, "learning_rate": 1.8597274341554406e-05, "loss": 0.41344374418258667, "step": 1718 }, { "epoch": 0.42475908080059305, "grad_norm": 1.0534296154711043, "learning_rate": 1.859518677497157e-05, "loss": 0.3596171438694, "step": 1719 }, { "epoch": 0.4250061774153694, "grad_norm": 1.2263554213455343, "learning_rate": 1.8593097773509557e-05, "loss": 0.40756022930145264, "step": 1720 }, { "epoch": 0.4252532740301458, "grad_norm": 1.1897171559059407, "learning_rate": 1.859100733751709e-05, "loss": 0.39225006103515625, "step": 1721 }, { "epoch": 0.4255003706449222, "grad_norm": 1.1795939562515347, "learning_rate": 1.8588915467343153e-05, "loss": 0.3467811346054077, "step": 1722 }, { "epoch": 0.42574746725969853, "grad_norm": 1.1712164590751846, "learning_rate": 1.8586822163336958e-05, "loss": 0.3841426968574524, "step": 1723 }, { "epoch": 0.42599456387447493, "grad_norm": 1.0876012071850174, "learning_rate": 1.8584727425847968e-05, "loss": 0.3134393095970154, "step": 1724 }, { "epoch": 0.4262416604892513, "grad_norm": 1.1692895844529714, "learning_rate": 1.8582631255225873e-05, "loss": 0.4121391773223877, "step": 1725 }, { "epoch": 0.42648875710402767, "grad_norm": 1.1981252701523457, "learning_rate": 1.85805336518206e-05, "loss": 0.38527095317840576, "step": 1726 }, { "epoch": 0.42673585371880407, "grad_norm": 1.1472386338252043, "learning_rate": 1.857843461598234e-05, "loss": 0.35646048188209534, "step": 1727 }, { "epoch": 0.4269829503335804, "grad_norm": 1.2203718623984403, "learning_rate": 1.8576334148061495e-05, "loss": 0.3851707875728607, "step": 1728 }, { "epoch": 0.4272300469483568, "grad_norm": 1.2852730276262152, "learning_rate": 1.8574232248408715e-05, "loss": 0.4052029848098755, "step": 1729 }, { "epoch": 0.4274771435631332, "grad_norm": 0.9955875967689474, "learning_rate": 1.8572128917374898e-05, "loss": 0.3328041434288025, "step": 1730 }, { "epoch": 0.42772424017790955, "grad_norm": 1.07372636567073, "learning_rate": 1.857002415531117e-05, "loss": 0.3061659038066864, "step": 1731 }, { "epoch": 0.42797133679268595, "grad_norm": 1.203783420801546, "learning_rate": 1.8567917962568897e-05, "loss": 0.3507237732410431, "step": 1732 }, { "epoch": 0.4282184334074623, "grad_norm": 1.2293566473358521, "learning_rate": 1.8565810339499694e-05, "loss": 0.3459499478340149, "step": 1733 }, { "epoch": 0.4284655300222387, "grad_norm": 1.36678511827485, "learning_rate": 1.85637012864554e-05, "loss": 0.451356440782547, "step": 1734 }, { "epoch": 0.4287126266370151, "grad_norm": 1.1213984240033767, "learning_rate": 1.8561590803788098e-05, "loss": 0.39018842577934265, "step": 1735 }, { "epoch": 0.42895972325179144, "grad_norm": 1.2348903588996363, "learning_rate": 1.8559478891850122e-05, "loss": 0.4670308828353882, "step": 1736 }, { "epoch": 0.42920681986656783, "grad_norm": 1.296243019993213, "learning_rate": 1.8557365550994025e-05, "loss": 0.3775119185447693, "step": 1737 }, { "epoch": 0.4294539164813442, "grad_norm": 1.1376359286279003, "learning_rate": 1.855525078157261e-05, "loss": 0.3638717830181122, "step": 1738 }, { "epoch": 0.4297010130961206, "grad_norm": 1.2890940495892758, "learning_rate": 1.855313458393892e-05, "loss": 0.3993796706199646, "step": 1739 }, { "epoch": 0.429948109710897, "grad_norm": 1.0748922408120694, "learning_rate": 1.855101695844623e-05, "loss": 0.3217151165008545, "step": 1740 }, { "epoch": 0.4301952063256733, "grad_norm": 1.217806944234736, "learning_rate": 1.8548897905448056e-05, "loss": 0.39489588141441345, "step": 1741 }, { "epoch": 0.4304423029404497, "grad_norm": 1.3001587791056046, "learning_rate": 1.8546777425298154e-05, "loss": 0.3999299108982086, "step": 1742 }, { "epoch": 0.4306893995552261, "grad_norm": 1.262161666004398, "learning_rate": 1.854465551835051e-05, "loss": 0.43013450503349304, "step": 1743 }, { "epoch": 0.43093649617000246, "grad_norm": 1.1851853529558862, "learning_rate": 1.8542532184959366e-05, "loss": 0.33886104822158813, "step": 1744 }, { "epoch": 0.43118359278477886, "grad_norm": 1.2089187383969466, "learning_rate": 1.854040742547918e-05, "loss": 0.4062986373901367, "step": 1745 }, { "epoch": 0.4314306893995552, "grad_norm": 1.1209122177152793, "learning_rate": 1.853828124026467e-05, "loss": 0.37855738401412964, "step": 1746 }, { "epoch": 0.4316777860143316, "grad_norm": 1.2601117438819065, "learning_rate": 1.853615362967077e-05, "loss": 0.4199727177619934, "step": 1747 }, { "epoch": 0.431924882629108, "grad_norm": 1.2942568190506176, "learning_rate": 1.8534024594052668e-05, "loss": 0.4541405737400055, "step": 1748 }, { "epoch": 0.43217197924388434, "grad_norm": 1.2151115182976155, "learning_rate": 1.853189413376579e-05, "loss": 0.4016333222389221, "step": 1749 }, { "epoch": 0.43241907585866074, "grad_norm": 1.167029060653222, "learning_rate": 1.8529762249165783e-05, "loss": 0.4280731678009033, "step": 1750 }, { "epoch": 0.43266617247343714, "grad_norm": 1.0952538231012667, "learning_rate": 1.8527628940608555e-05, "loss": 0.3099188208580017, "step": 1751 }, { "epoch": 0.4329132690882135, "grad_norm": 1.172868057825325, "learning_rate": 1.8525494208450235e-05, "loss": 0.39680683612823486, "step": 1752 }, { "epoch": 0.4331603657029899, "grad_norm": 1.107617765741365, "learning_rate": 1.8523358053047195e-05, "loss": 0.34064793586730957, "step": 1753 }, { "epoch": 0.4334074623177662, "grad_norm": 1.1830041115443615, "learning_rate": 1.8521220474756044e-05, "loss": 0.38072189688682556, "step": 1754 }, { "epoch": 0.4336545589325426, "grad_norm": 1.2670676934430067, "learning_rate": 1.8519081473933628e-05, "loss": 0.42038214206695557, "step": 1755 }, { "epoch": 0.433901655547319, "grad_norm": 1.1351627677152158, "learning_rate": 1.8516941050937035e-05, "loss": 0.4489743709564209, "step": 1756 }, { "epoch": 0.43414875216209536, "grad_norm": 1.0520789208594734, "learning_rate": 1.8514799206123582e-05, "loss": 0.34373366832733154, "step": 1757 }, { "epoch": 0.43439584877687176, "grad_norm": 1.2355564668941355, "learning_rate": 1.8512655939850838e-05, "loss": 0.3871777653694153, "step": 1758 }, { "epoch": 0.43464294539164816, "grad_norm": 1.1761175121156777, "learning_rate": 1.851051125247659e-05, "loss": 0.43252885341644287, "step": 1759 }, { "epoch": 0.4348900420064245, "grad_norm": 1.1727424506805995, "learning_rate": 1.8508365144358875e-05, "loss": 0.4065181016921997, "step": 1760 }, { "epoch": 0.4351371386212009, "grad_norm": 1.2769574292767616, "learning_rate": 1.850621761585596e-05, "loss": 0.3935182988643646, "step": 1761 }, { "epoch": 0.43538423523597725, "grad_norm": 1.1314401135476762, "learning_rate": 1.850406866732636e-05, "loss": 0.4234010577201843, "step": 1762 }, { "epoch": 0.43563133185075364, "grad_norm": 1.1100627255985025, "learning_rate": 1.8501918299128814e-05, "loss": 0.3946491479873657, "step": 1763 }, { "epoch": 0.43587842846553004, "grad_norm": 1.3924192400778104, "learning_rate": 1.8499766511622312e-05, "loss": 0.3729901909828186, "step": 1764 }, { "epoch": 0.4361255250803064, "grad_norm": 1.3841957470200166, "learning_rate": 1.8497613305166065e-05, "loss": 0.4389552175998688, "step": 1765 }, { "epoch": 0.4363726216950828, "grad_norm": 1.1705337188982725, "learning_rate": 1.8495458680119533e-05, "loss": 0.41853874921798706, "step": 1766 }, { "epoch": 0.43661971830985913, "grad_norm": 1.0928696384284229, "learning_rate": 1.8493302636842406e-05, "loss": 0.33199799060821533, "step": 1767 }, { "epoch": 0.4368668149246355, "grad_norm": 1.0206123943713137, "learning_rate": 1.8491145175694616e-05, "loss": 0.3505648374557495, "step": 1768 }, { "epoch": 0.4371139115394119, "grad_norm": 1.144835287195326, "learning_rate": 1.8488986297036332e-05, "loss": 0.35090172290802, "step": 1769 }, { "epoch": 0.43736100815418827, "grad_norm": 1.0623189342867303, "learning_rate": 1.848682600122795e-05, "loss": 0.3591504693031311, "step": 1770 }, { "epoch": 0.43760810476896467, "grad_norm": 1.162560582257692, "learning_rate": 1.8484664288630113e-05, "loss": 0.41143256425857544, "step": 1771 }, { "epoch": 0.43785520138374107, "grad_norm": 1.095839114113142, "learning_rate": 1.8482501159603695e-05, "loss": 0.3729006052017212, "step": 1772 }, { "epoch": 0.4381022979985174, "grad_norm": 1.2402545025657645, "learning_rate": 1.848033661450981e-05, "loss": 0.3678221106529236, "step": 1773 }, { "epoch": 0.4383493946132938, "grad_norm": 1.1346564689769827, "learning_rate": 1.8478170653709813e-05, "loss": 0.3700699508190155, "step": 1774 }, { "epoch": 0.43859649122807015, "grad_norm": 1.1738044863941766, "learning_rate": 1.8476003277565282e-05, "loss": 0.360930860042572, "step": 1775 }, { "epoch": 0.43884358784284655, "grad_norm": 1.2060060829192942, "learning_rate": 1.8473834486438037e-05, "loss": 0.36445021629333496, "step": 1776 }, { "epoch": 0.43909068445762295, "grad_norm": 1.1405349287923077, "learning_rate": 1.847166428069014e-05, "loss": 0.39872556924819946, "step": 1777 }, { "epoch": 0.4393377810723993, "grad_norm": 1.244238298272981, "learning_rate": 1.8469492660683884e-05, "loss": 0.39756572246551514, "step": 1778 }, { "epoch": 0.4395848776871757, "grad_norm": 1.203652732726797, "learning_rate": 1.8467319626781796e-05, "loss": 0.4037621021270752, "step": 1779 }, { "epoch": 0.4398319743019521, "grad_norm": 1.2957322112014364, "learning_rate": 1.8465145179346647e-05, "loss": 0.4075017273426056, "step": 1780 }, { "epoch": 0.44007907091672843, "grad_norm": 1.3095889193744317, "learning_rate": 1.8462969318741433e-05, "loss": 0.3921661972999573, "step": 1781 }, { "epoch": 0.44032616753150483, "grad_norm": 0.9857843639369627, "learning_rate": 1.8460792045329398e-05, "loss": 0.2942311763763428, "step": 1782 }, { "epoch": 0.4405732641462812, "grad_norm": 1.0592043368787838, "learning_rate": 1.845861335947401e-05, "loss": 0.3681548237800598, "step": 1783 }, { "epoch": 0.4408203607610576, "grad_norm": 1.0819553625310572, "learning_rate": 1.8456433261538982e-05, "loss": 0.3173884153366089, "step": 1784 }, { "epoch": 0.44106745737583397, "grad_norm": 1.1829046751891255, "learning_rate": 1.845425175188826e-05, "loss": 0.3641677498817444, "step": 1785 }, { "epoch": 0.4413145539906103, "grad_norm": 1.185600614955508, "learning_rate": 1.845206883088602e-05, "loss": 0.3580746650695801, "step": 1786 }, { "epoch": 0.4415616506053867, "grad_norm": 1.1370437695127582, "learning_rate": 1.844988449889668e-05, "loss": 0.3682413101196289, "step": 1787 }, { "epoch": 0.44180874722016306, "grad_norm": 1.1138565300831584, "learning_rate": 1.84476987562849e-05, "loss": 0.36649730801582336, "step": 1788 }, { "epoch": 0.44205584383493945, "grad_norm": 1.0628499950960364, "learning_rate": 1.8445511603415557e-05, "loss": 0.3356555700302124, "step": 1789 }, { "epoch": 0.44230294044971585, "grad_norm": 1.1576057206481642, "learning_rate": 1.8443323040653783e-05, "loss": 0.40255075693130493, "step": 1790 }, { "epoch": 0.4425500370644922, "grad_norm": 1.2489864077219648, "learning_rate": 1.844113306836493e-05, "loss": 0.4231100082397461, "step": 1791 }, { "epoch": 0.4427971336792686, "grad_norm": 1.1639553166116237, "learning_rate": 1.843894168691459e-05, "loss": 0.37110739946365356, "step": 1792 }, { "epoch": 0.443044230294045, "grad_norm": 1.1247079411702945, "learning_rate": 1.8436748896668594e-05, "loss": 0.4046688675880432, "step": 1793 }, { "epoch": 0.44329132690882134, "grad_norm": 1.2383674319593667, "learning_rate": 1.8434554697993013e-05, "loss": 0.4063264727592468, "step": 1794 }, { "epoch": 0.44353842352359774, "grad_norm": 1.2571470144738106, "learning_rate": 1.8432359091254137e-05, "loss": 0.33532416820526123, "step": 1795 }, { "epoch": 0.4437855201383741, "grad_norm": 1.1715890018086386, "learning_rate": 1.8430162076818507e-05, "loss": 0.3942539691925049, "step": 1796 }, { "epoch": 0.4440326167531505, "grad_norm": 1.2432878746487799, "learning_rate": 1.842796365505288e-05, "loss": 0.3950515687465668, "step": 1797 }, { "epoch": 0.4442797133679269, "grad_norm": 1.129818564540944, "learning_rate": 1.842576382632428e-05, "loss": 0.32610857486724854, "step": 1798 }, { "epoch": 0.4445268099827032, "grad_norm": 1.1267796591172703, "learning_rate": 1.842356259099993e-05, "loss": 0.34566745162010193, "step": 1799 }, { "epoch": 0.4447739065974796, "grad_norm": 1.243155212479976, "learning_rate": 1.8421359949447312e-05, "loss": 0.3785676956176758, "step": 1800 }, { "epoch": 0.445021003212256, "grad_norm": 1.248705620525549, "learning_rate": 1.841915590203413e-05, "loss": 0.39301079511642456, "step": 1801 }, { "epoch": 0.44526809982703236, "grad_norm": 1.298994461450318, "learning_rate": 1.8416950449128327e-05, "loss": 0.40900808572769165, "step": 1802 }, { "epoch": 0.44551519644180876, "grad_norm": 1.1771917991656773, "learning_rate": 1.8414743591098086e-05, "loss": 0.43811002373695374, "step": 1803 }, { "epoch": 0.4457622930565851, "grad_norm": 1.1702741662654668, "learning_rate": 1.8412535328311813e-05, "loss": 0.3508942723274231, "step": 1804 }, { "epoch": 0.4460093896713615, "grad_norm": 1.1673206088327883, "learning_rate": 1.841032566113816e-05, "loss": 0.3592892289161682, "step": 1805 }, { "epoch": 0.4462564862861379, "grad_norm": 1.0499121618420102, "learning_rate": 1.8408114589946012e-05, "loss": 0.34994858503341675, "step": 1806 }, { "epoch": 0.44650358290091424, "grad_norm": 1.146313826634462, "learning_rate": 1.840590211510447e-05, "loss": 0.3730677664279938, "step": 1807 }, { "epoch": 0.44675067951569064, "grad_norm": 1.1755258474228583, "learning_rate": 1.8403688236982904e-05, "loss": 0.4182075262069702, "step": 1808 }, { "epoch": 0.44699777613046704, "grad_norm": 1.2330055612205704, "learning_rate": 1.8401472955950884e-05, "loss": 0.39240318536758423, "step": 1809 }, { "epoch": 0.4472448727452434, "grad_norm": 1.2804203430760672, "learning_rate": 1.8399256272378235e-05, "loss": 0.4253978133201599, "step": 1810 }, { "epoch": 0.4474919693600198, "grad_norm": 1.1137585577727838, "learning_rate": 1.8397038186635013e-05, "loss": 0.36131590604782104, "step": 1811 }, { "epoch": 0.4477390659747961, "grad_norm": 1.3074189402516945, "learning_rate": 1.8394818699091495e-05, "loss": 0.47745636105537415, "step": 1812 }, { "epoch": 0.4479861625895725, "grad_norm": 1.1922151566751387, "learning_rate": 1.8392597810118207e-05, "loss": 0.38868242502212524, "step": 1813 }, { "epoch": 0.4482332592043489, "grad_norm": 1.129007131488862, "learning_rate": 1.8390375520085905e-05, "loss": 0.36301571130752563, "step": 1814 }, { "epoch": 0.44848035581912526, "grad_norm": 1.1831399852764977, "learning_rate": 1.8388151829365575e-05, "loss": 0.38781702518463135, "step": 1815 }, { "epoch": 0.44872745243390166, "grad_norm": 1.1910872120343272, "learning_rate": 1.8385926738328443e-05, "loss": 0.4009385108947754, "step": 1816 }, { "epoch": 0.448974549048678, "grad_norm": 1.2080824448285414, "learning_rate": 1.8383700247345965e-05, "loss": 0.39104413986206055, "step": 1817 }, { "epoch": 0.4492216456634544, "grad_norm": 1.1333402954726324, "learning_rate": 1.8381472356789827e-05, "loss": 0.38540053367614746, "step": 1818 }, { "epoch": 0.4494687422782308, "grad_norm": 1.3061048790715766, "learning_rate": 1.8379243067031957e-05, "loss": 0.44660472869873047, "step": 1819 }, { "epoch": 0.44971583889300715, "grad_norm": 1.0751474590205279, "learning_rate": 1.837701237844451e-05, "loss": 0.3536609411239624, "step": 1820 }, { "epoch": 0.44996293550778355, "grad_norm": 1.0644320931537756, "learning_rate": 1.8374780291399877e-05, "loss": 0.33608323335647583, "step": 1821 }, { "epoch": 0.45021003212255994, "grad_norm": 1.1775938707102767, "learning_rate": 1.8372546806270686e-05, "loss": 0.4056907892227173, "step": 1822 }, { "epoch": 0.4504571287373363, "grad_norm": 1.2432558720183715, "learning_rate": 1.837031192342979e-05, "loss": 0.3686826229095459, "step": 1823 }, { "epoch": 0.4507042253521127, "grad_norm": 1.1121349463336228, "learning_rate": 1.836807564325028e-05, "loss": 0.341969758272171, "step": 1824 }, { "epoch": 0.45095132196688903, "grad_norm": 1.0768765243805982, "learning_rate": 1.8365837966105486e-05, "loss": 0.3463117778301239, "step": 1825 }, { "epoch": 0.45119841858166543, "grad_norm": 1.1048144074680917, "learning_rate": 1.8363598892368955e-05, "loss": 0.342210590839386, "step": 1826 }, { "epoch": 0.4514455151964418, "grad_norm": 1.0370657598707398, "learning_rate": 1.836135842241449e-05, "loss": 0.3116580843925476, "step": 1827 }, { "epoch": 0.45169261181121817, "grad_norm": 1.046057072703626, "learning_rate": 1.8359116556616103e-05, "loss": 0.357785165309906, "step": 1828 }, { "epoch": 0.45193970842599457, "grad_norm": 1.1630165678027538, "learning_rate": 1.835687329534806e-05, "loss": 0.37428444623947144, "step": 1829 }, { "epoch": 0.45218680504077097, "grad_norm": 1.0886578201186776, "learning_rate": 1.8354628638984846e-05, "loss": 0.3680313229560852, "step": 1830 }, { "epoch": 0.4524339016555473, "grad_norm": 1.2492167281705446, "learning_rate": 1.8352382587901186e-05, "loss": 0.411393404006958, "step": 1831 }, { "epoch": 0.4526809982703237, "grad_norm": 1.0086300166798239, "learning_rate": 1.835013514247203e-05, "loss": 0.3144400715827942, "step": 1832 }, { "epoch": 0.45292809488510005, "grad_norm": 1.1319433894629272, "learning_rate": 1.834788630307258e-05, "loss": 0.41399556398391724, "step": 1833 }, { "epoch": 0.45317519149987645, "grad_norm": 1.0662559836769745, "learning_rate": 1.834563607007824e-05, "loss": 0.32497504353523254, "step": 1834 }, { "epoch": 0.45342228811465285, "grad_norm": 1.2287777539702542, "learning_rate": 1.8343384443864672e-05, "loss": 0.4114522337913513, "step": 1835 }, { "epoch": 0.4536693847294292, "grad_norm": 1.198813353892091, "learning_rate": 1.834113142480776e-05, "loss": 0.36219334602355957, "step": 1836 }, { "epoch": 0.4539164813442056, "grad_norm": 1.0479243982979098, "learning_rate": 1.8338877013283623e-05, "loss": 0.32422730326652527, "step": 1837 }, { "epoch": 0.45416357795898193, "grad_norm": 1.1803099861592163, "learning_rate": 1.8336621209668614e-05, "loss": 0.3765912652015686, "step": 1838 }, { "epoch": 0.45441067457375833, "grad_norm": 1.1164695406083383, "learning_rate": 1.8334364014339316e-05, "loss": 0.4034706950187683, "step": 1839 }, { "epoch": 0.45465777118853473, "grad_norm": 1.217775390425771, "learning_rate": 1.8332105427672543e-05, "loss": 0.44991642236709595, "step": 1840 }, { "epoch": 0.4549048678033111, "grad_norm": 1.1893285820667097, "learning_rate": 1.8329845450045344e-05, "loss": 0.3855817914009094, "step": 1841 }, { "epoch": 0.4551519644180875, "grad_norm": 1.1348777739034257, "learning_rate": 1.8327584081835e-05, "loss": 0.40558964014053345, "step": 1842 }, { "epoch": 0.45539906103286387, "grad_norm": 1.1636921441811459, "learning_rate": 1.8325321323419022e-05, "loss": 0.4076579213142395, "step": 1843 }, { "epoch": 0.4556461576476402, "grad_norm": 1.1685808128603838, "learning_rate": 1.8323057175175155e-05, "loss": 0.38466909527778625, "step": 1844 }, { "epoch": 0.4558932542624166, "grad_norm": 1.2812010849966688, "learning_rate": 1.832079163748138e-05, "loss": 0.40721800923347473, "step": 1845 }, { "epoch": 0.45614035087719296, "grad_norm": 1.186754578211296, "learning_rate": 1.83185247107159e-05, "loss": 0.3755691647529602, "step": 1846 }, { "epoch": 0.45638744749196936, "grad_norm": 1.2781205607986958, "learning_rate": 1.8316256395257156e-05, "loss": 0.44984978437423706, "step": 1847 }, { "epoch": 0.45663454410674575, "grad_norm": 1.218020402163014, "learning_rate": 1.8313986691483823e-05, "loss": 0.3843593895435333, "step": 1848 }, { "epoch": 0.4568816407215221, "grad_norm": 1.1676686211716536, "learning_rate": 1.83117155997748e-05, "loss": 0.40070974826812744, "step": 1849 }, { "epoch": 0.4571287373362985, "grad_norm": 1.1915705972952726, "learning_rate": 1.8309443120509226e-05, "loss": 0.34405118227005005, "step": 1850 }, { "epoch": 0.4573758339510749, "grad_norm": 1.2155033152033263, "learning_rate": 1.8307169254066474e-05, "loss": 0.4004865288734436, "step": 1851 }, { "epoch": 0.45762293056585124, "grad_norm": 1.1032493613222025, "learning_rate": 1.8304894000826133e-05, "loss": 0.3686661124229431, "step": 1852 }, { "epoch": 0.45787002718062764, "grad_norm": 1.2159117041829162, "learning_rate": 1.8302617361168038e-05, "loss": 0.42856645584106445, "step": 1853 }, { "epoch": 0.458117123795404, "grad_norm": 1.2090435230878411, "learning_rate": 1.830033933547225e-05, "loss": 0.404352068901062, "step": 1854 }, { "epoch": 0.4583642204101804, "grad_norm": 1.1455405910146248, "learning_rate": 1.8298059924119066e-05, "loss": 0.3800197243690491, "step": 1855 }, { "epoch": 0.4586113170249568, "grad_norm": 1.116735851065093, "learning_rate": 1.8295779127489e-05, "loss": 0.3440534770488739, "step": 1856 }, { "epoch": 0.4588584136397331, "grad_norm": 1.200749054869804, "learning_rate": 1.8293496945962824e-05, "loss": 0.36578911542892456, "step": 1857 }, { "epoch": 0.4591055102545095, "grad_norm": 1.065824494431092, "learning_rate": 1.829121337992151e-05, "loss": 0.37044957280158997, "step": 1858 }, { "epoch": 0.4593526068692859, "grad_norm": 1.2892567798943195, "learning_rate": 1.828892842974629e-05, "loss": 0.45479556918144226, "step": 1859 }, { "epoch": 0.45959970348406226, "grad_norm": 1.2134267451722636, "learning_rate": 1.82866420958186e-05, "loss": 0.3658345341682434, "step": 1860 }, { "epoch": 0.45984680009883866, "grad_norm": 1.199612186187777, "learning_rate": 1.8284354378520127e-05, "loss": 0.3572714924812317, "step": 1861 }, { "epoch": 0.460093896713615, "grad_norm": 1.2043507457647928, "learning_rate": 1.828206527823278e-05, "loss": 0.41433995962142944, "step": 1862 }, { "epoch": 0.4603409933283914, "grad_norm": 1.1327818115979735, "learning_rate": 1.8279774795338702e-05, "loss": 0.36114782094955444, "step": 1863 }, { "epoch": 0.4605880899431678, "grad_norm": 1.220479369588926, "learning_rate": 1.827748293022027e-05, "loss": 0.3809395730495453, "step": 1864 }, { "epoch": 0.46083518655794414, "grad_norm": 1.197558453049441, "learning_rate": 1.8275189683260083e-05, "loss": 0.41907504200935364, "step": 1865 }, { "epoch": 0.46108228317272054, "grad_norm": 1.1094166445952036, "learning_rate": 1.8272895054840974e-05, "loss": 0.353055477142334, "step": 1866 }, { "epoch": 0.4613293797874969, "grad_norm": 1.2961828512737958, "learning_rate": 1.827059904534601e-05, "loss": 0.4056320786476135, "step": 1867 }, { "epoch": 0.4615764764022733, "grad_norm": 1.109366483354244, "learning_rate": 1.8268301655158486e-05, "loss": 0.3439268469810486, "step": 1868 }, { "epoch": 0.4618235730170497, "grad_norm": 1.1550424176137977, "learning_rate": 1.8266002884661928e-05, "loss": 0.3799242377281189, "step": 1869 }, { "epoch": 0.462070669631826, "grad_norm": 1.1854487208359026, "learning_rate": 1.8263702734240093e-05, "loss": 0.37509775161743164, "step": 1870 }, { "epoch": 0.4623177662466024, "grad_norm": 1.2562467122160943, "learning_rate": 1.8261401204276968e-05, "loss": 0.4096122086048126, "step": 1871 }, { "epoch": 0.4625648628613788, "grad_norm": 1.3831080035955141, "learning_rate": 1.8259098295156773e-05, "loss": 0.43684685230255127, "step": 1872 }, { "epoch": 0.46281195947615517, "grad_norm": 1.2851559644276302, "learning_rate": 1.8256794007263946e-05, "loss": 0.352447509765625, "step": 1873 }, { "epoch": 0.46305905609093156, "grad_norm": 1.1413797750997947, "learning_rate": 1.8254488340983176e-05, "loss": 0.3423295021057129, "step": 1874 }, { "epoch": 0.4633061527057079, "grad_norm": 1.2011849045554743, "learning_rate": 1.825218129669936e-05, "loss": 0.3977935314178467, "step": 1875 }, { "epoch": 0.4635532493204843, "grad_norm": 1.2801236495413326, "learning_rate": 1.8249872874797643e-05, "loss": 0.39860475063323975, "step": 1876 }, { "epoch": 0.4638003459352607, "grad_norm": 1.1089783159962037, "learning_rate": 1.824756307566339e-05, "loss": 0.36551350355148315, "step": 1877 }, { "epoch": 0.46404744255003705, "grad_norm": 1.213480381830983, "learning_rate": 1.82452518996822e-05, "loss": 0.37153416872024536, "step": 1878 }, { "epoch": 0.46429453916481345, "grad_norm": 1.1296738636557293, "learning_rate": 1.8242939347239897e-05, "loss": 0.36113864183425903, "step": 1879 }, { "epoch": 0.46454163577958985, "grad_norm": 1.1753370751595225, "learning_rate": 1.8240625418722543e-05, "loss": 0.35717591643333435, "step": 1880 }, { "epoch": 0.4647887323943662, "grad_norm": 1.1482880793033619, "learning_rate": 1.8238310114516417e-05, "loss": 0.3502758741378784, "step": 1881 }, { "epoch": 0.4650358290091426, "grad_norm": 1.2015365841779895, "learning_rate": 1.8235993435008045e-05, "loss": 0.42684006690979004, "step": 1882 }, { "epoch": 0.46528292562391893, "grad_norm": 1.2419341811999793, "learning_rate": 1.8233675380584168e-05, "loss": 0.36604875326156616, "step": 1883 }, { "epoch": 0.46553002223869533, "grad_norm": 1.2392373497129374, "learning_rate": 1.8231355951631765e-05, "loss": 0.41317498683929443, "step": 1884 }, { "epoch": 0.4657771188534717, "grad_norm": 1.228417367427705, "learning_rate": 1.8229035148538036e-05, "loss": 0.37803584337234497, "step": 1885 }, { "epoch": 0.46602421546824807, "grad_norm": 1.0277109941341855, "learning_rate": 1.8226712971690416e-05, "loss": 0.31939494609832764, "step": 1886 }, { "epoch": 0.46627131208302447, "grad_norm": 1.0785764947424097, "learning_rate": 1.8224389421476572e-05, "loss": 0.3431977927684784, "step": 1887 }, { "epoch": 0.4665184086978008, "grad_norm": 1.1983720719755144, "learning_rate": 1.8222064498284398e-05, "loss": 0.4022766947746277, "step": 1888 }, { "epoch": 0.4667655053125772, "grad_norm": 1.0918994063286975, "learning_rate": 1.821973820250201e-05, "loss": 0.37010300159454346, "step": 1889 }, { "epoch": 0.4670126019273536, "grad_norm": 1.1390466166091973, "learning_rate": 1.8217410534517766e-05, "loss": 0.3009410798549652, "step": 1890 }, { "epoch": 0.46725969854212995, "grad_norm": 1.2491815799964354, "learning_rate": 1.8215081494720248e-05, "loss": 0.3856649696826935, "step": 1891 }, { "epoch": 0.46750679515690635, "grad_norm": 1.1069833876109019, "learning_rate": 1.821275108349826e-05, "loss": 0.35011857748031616, "step": 1892 }, { "epoch": 0.46775389177168275, "grad_norm": 1.2478555143782886, "learning_rate": 1.8210419301240843e-05, "loss": 0.3705919682979584, "step": 1893 }, { "epoch": 0.4680009883864591, "grad_norm": 1.1402036542935088, "learning_rate": 1.8208086148337263e-05, "loss": 0.4017578959465027, "step": 1894 }, { "epoch": 0.4682480850012355, "grad_norm": 1.1163635038576825, "learning_rate": 1.8205751625177014e-05, "loss": 0.3578161895275116, "step": 1895 }, { "epoch": 0.46849518161601184, "grad_norm": 1.141256435096399, "learning_rate": 1.8203415732149828e-05, "loss": 0.353283554315567, "step": 1896 }, { "epoch": 0.46874227823078823, "grad_norm": 0.9927433396702533, "learning_rate": 1.8201078469645655e-05, "loss": 0.33836519718170166, "step": 1897 }, { "epoch": 0.46898937484556463, "grad_norm": 1.1434329275447188, "learning_rate": 1.819873983805467e-05, "loss": 0.3466319441795349, "step": 1898 }, { "epoch": 0.469236471460341, "grad_norm": 1.1815796101288165, "learning_rate": 1.81963998377673e-05, "loss": 0.3817142844200134, "step": 1899 }, { "epoch": 0.4694835680751174, "grad_norm": 1.1851049545139438, "learning_rate": 1.819405846917417e-05, "loss": 0.40476393699645996, "step": 1900 }, { "epoch": 0.4697306646898938, "grad_norm": 1.2717185139710299, "learning_rate": 1.8191715732666154e-05, "loss": 0.37846434116363525, "step": 1901 }, { "epoch": 0.4699777613046701, "grad_norm": 1.2195078465461624, "learning_rate": 1.8189371628634348e-05, "loss": 0.4088021516799927, "step": 1902 }, { "epoch": 0.4702248579194465, "grad_norm": 1.3645043670961612, "learning_rate": 1.8187026157470074e-05, "loss": 0.3403722047805786, "step": 1903 }, { "epoch": 0.47047195453422286, "grad_norm": 1.148503250090614, "learning_rate": 1.8184679319564885e-05, "loss": 0.3706257939338684, "step": 1904 }, { "epoch": 0.47071905114899926, "grad_norm": 1.064799647489829, "learning_rate": 1.818233111531056e-05, "loss": 0.3705982565879822, "step": 1905 }, { "epoch": 0.47096614776377566, "grad_norm": 1.120078671988388, "learning_rate": 1.8179981545099115e-05, "loss": 0.34644371271133423, "step": 1906 }, { "epoch": 0.471213244378552, "grad_norm": 1.2874153032196964, "learning_rate": 1.817763060932278e-05, "loss": 0.39708566665649414, "step": 1907 }, { "epoch": 0.4714603409933284, "grad_norm": 1.1533396721628022, "learning_rate": 1.8175278308374022e-05, "loss": 0.4116443395614624, "step": 1908 }, { "epoch": 0.4717074376081048, "grad_norm": 1.3484309555322926, "learning_rate": 1.8172924642645533e-05, "loss": 0.3253818154335022, "step": 1909 }, { "epoch": 0.47195453422288114, "grad_norm": 1.1647464542388077, "learning_rate": 1.8170569612530236e-05, "loss": 0.3646380603313446, "step": 1910 }, { "epoch": 0.47220163083765754, "grad_norm": 1.1363950515902344, "learning_rate": 1.8168213218421278e-05, "loss": 0.379983127117157, "step": 1911 }, { "epoch": 0.4724487274524339, "grad_norm": 1.084202401686777, "learning_rate": 1.8165855460712033e-05, "loss": 0.3907693326473236, "step": 1912 }, { "epoch": 0.4726958240672103, "grad_norm": 1.2574472675618051, "learning_rate": 1.816349633979611e-05, "loss": 0.47507649660110474, "step": 1913 }, { "epoch": 0.4729429206819867, "grad_norm": 1.09029482749333, "learning_rate": 1.8161135856067337e-05, "loss": 0.34705257415771484, "step": 1914 }, { "epoch": 0.473190017296763, "grad_norm": 1.064936996004779, "learning_rate": 1.815877400991977e-05, "loss": 0.3171876072883606, "step": 1915 }, { "epoch": 0.4734371139115394, "grad_norm": 1.0434536038654718, "learning_rate": 1.8156410801747698e-05, "loss": 0.3733982443809509, "step": 1916 }, { "epoch": 0.47368421052631576, "grad_norm": 1.1819636956316548, "learning_rate": 1.8154046231945636e-05, "loss": 0.36578255891799927, "step": 1917 }, { "epoch": 0.47393130714109216, "grad_norm": 1.1427711433438812, "learning_rate": 1.8151680300908325e-05, "loss": 0.409831702709198, "step": 1918 }, { "epoch": 0.47417840375586856, "grad_norm": 1.1247533864385166, "learning_rate": 1.8149313009030728e-05, "loss": 0.4254469871520996, "step": 1919 }, { "epoch": 0.4744255003706449, "grad_norm": 1.1762134104953992, "learning_rate": 1.814694435670805e-05, "loss": 0.35801035165786743, "step": 1920 }, { "epoch": 0.4746725969854213, "grad_norm": 1.1657765612394164, "learning_rate": 1.8144574344335703e-05, "loss": 0.4006906747817993, "step": 1921 }, { "epoch": 0.4749196936001977, "grad_norm": 1.2386230955863888, "learning_rate": 1.814220297230934e-05, "loss": 0.43167251348495483, "step": 1922 }, { "epoch": 0.47516679021497404, "grad_norm": 1.0679707535806988, "learning_rate": 1.813983024102484e-05, "loss": 0.3613626956939697, "step": 1923 }, { "epoch": 0.47541388682975044, "grad_norm": 1.1902639080900406, "learning_rate": 1.8137456150878306e-05, "loss": 0.38388168811798096, "step": 1924 }, { "epoch": 0.4756609834445268, "grad_norm": 1.1142317108453905, "learning_rate": 1.8135080702266062e-05, "loss": 0.37210190296173096, "step": 1925 }, { "epoch": 0.4759080800593032, "grad_norm": 1.075081482937002, "learning_rate": 1.8132703895584674e-05, "loss": 0.3599686920642853, "step": 1926 }, { "epoch": 0.4761551766740796, "grad_norm": 1.2009979310805683, "learning_rate": 1.8130325731230926e-05, "loss": 0.40868672728538513, "step": 1927 }, { "epoch": 0.4764022732888559, "grad_norm": 1.1986815911621471, "learning_rate": 1.812794620960182e-05, "loss": 0.41109615564346313, "step": 1928 }, { "epoch": 0.4766493699036323, "grad_norm": 1.095453388137244, "learning_rate": 1.8125565331094597e-05, "loss": 0.3752101957798004, "step": 1929 }, { "epoch": 0.4768964665184087, "grad_norm": 1.0857020300128815, "learning_rate": 1.8123183096106723e-05, "loss": 0.4177641272544861, "step": 1930 }, { "epoch": 0.47714356313318507, "grad_norm": 1.1753282021839548, "learning_rate": 1.812079950503588e-05, "loss": 0.4431336522102356, "step": 1931 }, { "epoch": 0.47739065974796147, "grad_norm": 1.1187634969270281, "learning_rate": 1.811841455828e-05, "loss": 0.3981505334377289, "step": 1932 }, { "epoch": 0.4776377563627378, "grad_norm": 1.1673983735826365, "learning_rate": 1.8116028256237208e-05, "loss": 0.39343780279159546, "step": 1933 }, { "epoch": 0.4778848529775142, "grad_norm": 1.1055341132648917, "learning_rate": 1.8113640599305885e-05, "loss": 0.41950756311416626, "step": 1934 }, { "epoch": 0.4781319495922906, "grad_norm": 1.189894690495912, "learning_rate": 1.8111251587884616e-05, "loss": 0.4041418135166168, "step": 1935 }, { "epoch": 0.47837904620706695, "grad_norm": 1.158832949148395, "learning_rate": 1.8108861222372236e-05, "loss": 0.3914214074611664, "step": 1936 }, { "epoch": 0.47862614282184335, "grad_norm": 1.2597772666439673, "learning_rate": 1.8106469503167778e-05, "loss": 0.41457387804985046, "step": 1937 }, { "epoch": 0.4788732394366197, "grad_norm": 1.18915630173791, "learning_rate": 1.8104076430670523e-05, "loss": 0.3749004006385803, "step": 1938 }, { "epoch": 0.4791203360513961, "grad_norm": 1.138233183430653, "learning_rate": 1.810168200527997e-05, "loss": 0.3703514337539673, "step": 1939 }, { "epoch": 0.4793674326661725, "grad_norm": 1.2492594424821366, "learning_rate": 1.8099286227395844e-05, "loss": 0.40864092111587524, "step": 1940 }, { "epoch": 0.47961452928094883, "grad_norm": 1.1148121856990676, "learning_rate": 1.8096889097418092e-05, "loss": 0.36331599950790405, "step": 1941 }, { "epoch": 0.47986162589572523, "grad_norm": 1.2642831216374129, "learning_rate": 1.809449061574689e-05, "loss": 0.33624976873397827, "step": 1942 }, { "epoch": 0.48010872251050163, "grad_norm": 1.077679929159655, "learning_rate": 1.809209078278265e-05, "loss": 0.3186646103858948, "step": 1943 }, { "epoch": 0.48035581912527797, "grad_norm": 1.0808379006698, "learning_rate": 1.808968959892599e-05, "loss": 0.35957348346710205, "step": 1944 }, { "epoch": 0.48060291574005437, "grad_norm": 1.404348043206445, "learning_rate": 1.8087287064577765e-05, "loss": 0.36666029691696167, "step": 1945 }, { "epoch": 0.4808500123548307, "grad_norm": 1.1412383606298055, "learning_rate": 1.8084883180139053e-05, "loss": 0.35959601402282715, "step": 1946 }, { "epoch": 0.4810971089696071, "grad_norm": 1.2120656328954762, "learning_rate": 1.8082477946011162e-05, "loss": 0.37561723589897156, "step": 1947 }, { "epoch": 0.4813442055843835, "grad_norm": 1.1925199269207098, "learning_rate": 1.8080071362595622e-05, "loss": 0.3785945177078247, "step": 1948 }, { "epoch": 0.48159130219915985, "grad_norm": 1.2451626939547766, "learning_rate": 1.8077663430294184e-05, "loss": 0.43941694498062134, "step": 1949 }, { "epoch": 0.48183839881393625, "grad_norm": 1.1945801520884933, "learning_rate": 1.8075254149508827e-05, "loss": 0.4133284091949463, "step": 1950 }, { "epoch": 0.48208549542871265, "grad_norm": 1.1249070550383704, "learning_rate": 1.807284352064176e-05, "loss": 0.3785862326622009, "step": 1951 }, { "epoch": 0.482332592043489, "grad_norm": 1.0672269420779252, "learning_rate": 1.807043154409541e-05, "loss": 0.3391239643096924, "step": 1952 }, { "epoch": 0.4825796886582654, "grad_norm": 1.348493251518922, "learning_rate": 1.806801822027243e-05, "loss": 0.4442442059516907, "step": 1953 }, { "epoch": 0.48282678527304174, "grad_norm": 1.2419044254422338, "learning_rate": 1.806560354957571e-05, "loss": 0.4156813621520996, "step": 1954 }, { "epoch": 0.48307388188781814, "grad_norm": 1.1346019958017908, "learning_rate": 1.806318753240834e-05, "loss": 0.4028030037879944, "step": 1955 }, { "epoch": 0.48332097850259453, "grad_norm": 1.0172952888864737, "learning_rate": 1.8060770169173662e-05, "loss": 0.32820945978164673, "step": 1956 }, { "epoch": 0.4835680751173709, "grad_norm": 1.041732339629997, "learning_rate": 1.8058351460275222e-05, "loss": 0.39460110664367676, "step": 1957 }, { "epoch": 0.4838151717321473, "grad_norm": 1.1685129205539355, "learning_rate": 1.8055931406116803e-05, "loss": 0.369279682636261, "step": 1958 }, { "epoch": 0.4840622683469237, "grad_norm": 1.1131092537982796, "learning_rate": 1.805351000710241e-05, "loss": 0.36890703439712524, "step": 1959 }, { "epoch": 0.4843093649617, "grad_norm": 1.1148165505379637, "learning_rate": 1.8051087263636266e-05, "loss": 0.32366544008255005, "step": 1960 }, { "epoch": 0.4845564615764764, "grad_norm": 1.0419813252152001, "learning_rate": 1.804866317612283e-05, "loss": 0.3127521276473999, "step": 1961 }, { "epoch": 0.48480355819125276, "grad_norm": 1.1525758801525192, "learning_rate": 1.804623774496677e-05, "loss": 0.34155988693237305, "step": 1962 }, { "epoch": 0.48505065480602916, "grad_norm": 1.2878415847990112, "learning_rate": 1.8043810970572996e-05, "loss": 0.4463993310928345, "step": 1963 }, { "epoch": 0.48529775142080556, "grad_norm": 1.2340576128141783, "learning_rate": 1.8041382853346626e-05, "loss": 0.3859240412712097, "step": 1964 }, { "epoch": 0.4855448480355819, "grad_norm": 1.2977798602371808, "learning_rate": 1.803895339369302e-05, "loss": 0.3607219159603119, "step": 1965 }, { "epoch": 0.4857919446503583, "grad_norm": 1.083983100440678, "learning_rate": 1.8036522592017737e-05, "loss": 0.3477560579776764, "step": 1966 }, { "epoch": 0.48603904126513464, "grad_norm": 1.1387127751953325, "learning_rate": 1.8034090448726586e-05, "loss": 0.36663252115249634, "step": 1967 }, { "epoch": 0.48628613787991104, "grad_norm": 1.303244902438972, "learning_rate": 1.8031656964225585e-05, "loss": 0.4207229018211365, "step": 1968 }, { "epoch": 0.48653323449468744, "grad_norm": 1.184867522422636, "learning_rate": 1.802922213892098e-05, "loss": 0.4083021283149719, "step": 1969 }, { "epoch": 0.4867803311094638, "grad_norm": 1.1576245445334923, "learning_rate": 1.8026785973219237e-05, "loss": 0.37794744968414307, "step": 1970 }, { "epoch": 0.4870274277242402, "grad_norm": 1.1081989729443145, "learning_rate": 1.8024348467527053e-05, "loss": 0.35029202699661255, "step": 1971 }, { "epoch": 0.4872745243390166, "grad_norm": 1.3259415218376422, "learning_rate": 1.8021909622251344e-05, "loss": 0.41762471199035645, "step": 1972 }, { "epoch": 0.4875216209537929, "grad_norm": 1.0241689010133852, "learning_rate": 1.801946943779925e-05, "loss": 0.2998301684856415, "step": 1973 }, { "epoch": 0.4877687175685693, "grad_norm": 1.2842046729988128, "learning_rate": 1.8017027914578137e-05, "loss": 0.40348464250564575, "step": 1974 }, { "epoch": 0.48801581418334566, "grad_norm": 1.0404845886353355, "learning_rate": 1.801458505299559e-05, "loss": 0.3277153968811035, "step": 1975 }, { "epoch": 0.48826291079812206, "grad_norm": 1.1992551402287206, "learning_rate": 1.8012140853459423e-05, "loss": 0.4162501096725464, "step": 1976 }, { "epoch": 0.48851000741289846, "grad_norm": 1.2242339934367608, "learning_rate": 1.8009695316377662e-05, "loss": 0.375619113445282, "step": 1977 }, { "epoch": 0.4887571040276748, "grad_norm": 1.2775729963531453, "learning_rate": 1.8007248442158575e-05, "loss": 0.4347098767757416, "step": 1978 }, { "epoch": 0.4890042006424512, "grad_norm": 1.2392560289117722, "learning_rate": 1.8004800231210637e-05, "loss": 0.41482383012771606, "step": 1979 }, { "epoch": 0.4892512972572276, "grad_norm": 1.2718724418894172, "learning_rate": 1.8002350683942556e-05, "loss": 0.41577574610710144, "step": 1980 }, { "epoch": 0.48949839387200395, "grad_norm": 1.2579576801614298, "learning_rate": 1.7999899800763255e-05, "loss": 0.36690646409988403, "step": 1981 }, { "epoch": 0.48974549048678034, "grad_norm": 1.1553024518341393, "learning_rate": 1.7997447582081885e-05, "loss": 0.3513144254684448, "step": 1982 }, { "epoch": 0.4899925871015567, "grad_norm": 1.2472033351443153, "learning_rate": 1.799499402830782e-05, "loss": 0.4396058917045593, "step": 1983 }, { "epoch": 0.4902396837163331, "grad_norm": 1.0804332949184807, "learning_rate": 1.7992539139850653e-05, "loss": 0.36145055294036865, "step": 1984 }, { "epoch": 0.4904867803311095, "grad_norm": 1.039241364255451, "learning_rate": 1.799008291712021e-05, "loss": 0.320328950881958, "step": 1985 }, { "epoch": 0.4907338769458858, "grad_norm": 1.18201848804438, "learning_rate": 1.7987625360526525e-05, "loss": 0.43112581968307495, "step": 1986 }, { "epoch": 0.4909809735606622, "grad_norm": 1.0936826302038964, "learning_rate": 1.7985166470479862e-05, "loss": 0.37015360593795776, "step": 1987 }, { "epoch": 0.49122807017543857, "grad_norm": 1.109753815870175, "learning_rate": 1.7982706247390717e-05, "loss": 0.3506610095500946, "step": 1988 }, { "epoch": 0.49147516679021497, "grad_norm": 1.272610805534782, "learning_rate": 1.7980244691669786e-05, "loss": 0.31391745805740356, "step": 1989 }, { "epoch": 0.49172226340499137, "grad_norm": 1.2184863124919192, "learning_rate": 1.7977781803728012e-05, "loss": 0.3936711847782135, "step": 1990 }, { "epoch": 0.4919693600197677, "grad_norm": 1.238022883009819, "learning_rate": 1.7975317583976542e-05, "loss": 0.41545090079307556, "step": 1991 }, { "epoch": 0.4922164566345441, "grad_norm": 1.038020074078235, "learning_rate": 1.797285203282676e-05, "loss": 0.35457178950309753, "step": 1992 }, { "epoch": 0.4924635532493205, "grad_norm": 1.1425353560752138, "learning_rate": 1.7970385150690254e-05, "loss": 0.4145331382751465, "step": 1993 }, { "epoch": 0.49271064986409685, "grad_norm": 1.0655863504215701, "learning_rate": 1.7967916937978853e-05, "loss": 0.36403003334999084, "step": 1994 }, { "epoch": 0.49295774647887325, "grad_norm": 1.1457388672480016, "learning_rate": 1.7965447395104598e-05, "loss": 0.40297073125839233, "step": 1995 }, { "epoch": 0.4932048430936496, "grad_norm": 1.0316586940089607, "learning_rate": 1.7962976522479753e-05, "loss": 0.3527231812477112, "step": 1996 }, { "epoch": 0.493451939708426, "grad_norm": 1.0514893256670796, "learning_rate": 1.7960504320516806e-05, "loss": 0.3484930396080017, "step": 1997 }, { "epoch": 0.4936990363232024, "grad_norm": 1.2627534611871867, "learning_rate": 1.795803078962847e-05, "loss": 0.44060570001602173, "step": 1998 }, { "epoch": 0.49394613293797873, "grad_norm": 1.2437362980730986, "learning_rate": 1.7955555930227668e-05, "loss": 0.4134582281112671, "step": 1999 }, { "epoch": 0.49419322955275513, "grad_norm": 1.270598043231495, "learning_rate": 1.7953079742727557e-05, "loss": 0.44167500734329224, "step": 2000 }, { "epoch": 0.49444032616753153, "grad_norm": 1.1121183112988877, "learning_rate": 1.7950602227541513e-05, "loss": 0.3927081823348999, "step": 2001 }, { "epoch": 0.4946874227823079, "grad_norm": 1.1399886585757146, "learning_rate": 1.7948123385083128e-05, "loss": 0.3759424686431885, "step": 2002 }, { "epoch": 0.49493451939708427, "grad_norm": 1.088432697386439, "learning_rate": 1.794564321576622e-05, "loss": 0.34202897548675537, "step": 2003 }, { "epoch": 0.4951816160118606, "grad_norm": 1.240653481331048, "learning_rate": 1.794316172000483e-05, "loss": 0.4489574432373047, "step": 2004 }, { "epoch": 0.495428712626637, "grad_norm": 1.3051789162902465, "learning_rate": 1.794067889821322e-05, "loss": 0.3951481580734253, "step": 2005 }, { "epoch": 0.4956758092414134, "grad_norm": 1.0719459299947072, "learning_rate": 1.793819475080587e-05, "loss": 0.38737332820892334, "step": 2006 }, { "epoch": 0.49592290585618976, "grad_norm": 1.1923289580067666, "learning_rate": 1.7935709278197485e-05, "loss": 0.3600251078605652, "step": 2007 }, { "epoch": 0.49617000247096615, "grad_norm": 1.1400291016841202, "learning_rate": 1.793322248080299e-05, "loss": 0.38639312982559204, "step": 2008 }, { "epoch": 0.49641709908574255, "grad_norm": 1.0799613765124143, "learning_rate": 1.793073435903752e-05, "loss": 0.3659078776836395, "step": 2009 }, { "epoch": 0.4966641957005189, "grad_norm": 1.12366786224527, "learning_rate": 1.7928244913316456e-05, "loss": 0.38216251134872437, "step": 2010 }, { "epoch": 0.4969112923152953, "grad_norm": 1.2465423544361487, "learning_rate": 1.792575414405538e-05, "loss": 0.42766469717025757, "step": 2011 }, { "epoch": 0.49715838893007164, "grad_norm": 1.1866929778006854, "learning_rate": 1.79232620516701e-05, "loss": 0.3695152997970581, "step": 2012 }, { "epoch": 0.49740548554484804, "grad_norm": 1.0946775223160383, "learning_rate": 1.7920768636576643e-05, "loss": 0.32959499955177307, "step": 2013 }, { "epoch": 0.49765258215962443, "grad_norm": 1.0925720637217164, "learning_rate": 1.7918273899191265e-05, "loss": 0.3367137908935547, "step": 2014 }, { "epoch": 0.4978996787744008, "grad_norm": 1.1665830721685466, "learning_rate": 1.791577783993043e-05, "loss": 0.3737567663192749, "step": 2015 }, { "epoch": 0.4981467753891772, "grad_norm": 1.1176660669590308, "learning_rate": 1.7913280459210837e-05, "loss": 0.35991400480270386, "step": 2016 }, { "epoch": 0.4983938720039535, "grad_norm": 1.1121074693455368, "learning_rate": 1.7910781757449397e-05, "loss": 0.3772552013397217, "step": 2017 }, { "epoch": 0.4986409686187299, "grad_norm": 1.25244824431555, "learning_rate": 1.790828173506324e-05, "loss": 0.4047377407550812, "step": 2018 }, { "epoch": 0.4988880652335063, "grad_norm": 1.1135104446806494, "learning_rate": 1.7905780392469717e-05, "loss": 0.3503093719482422, "step": 2019 }, { "epoch": 0.49913516184828266, "grad_norm": 1.0664734390382624, "learning_rate": 1.7903277730086407e-05, "loss": 0.3122912645339966, "step": 2020 }, { "epoch": 0.49938225846305906, "grad_norm": 1.2836058296199073, "learning_rate": 1.7900773748331108e-05, "loss": 0.447368860244751, "step": 2021 }, { "epoch": 0.49962935507783546, "grad_norm": 1.1876833098938673, "learning_rate": 1.7898268447621825e-05, "loss": 0.33288174867630005, "step": 2022 }, { "epoch": 0.4998764516926118, "grad_norm": 1.227597309375793, "learning_rate": 1.7895761828376795e-05, "loss": 0.3692878484725952, "step": 2023 }, { "epoch": 0.5001235483073881, "grad_norm": 1.0337004857266145, "learning_rate": 1.7893253891014474e-05, "loss": 0.33521768450737, "step": 2024 }, { "epoch": 0.5003706449221645, "grad_norm": 1.220064444549077, "learning_rate": 1.7890744635953538e-05, "loss": 0.3833588659763336, "step": 2025 }, { "epoch": 0.5006177415369409, "grad_norm": 1.3306893478224362, "learning_rate": 1.788823406361288e-05, "loss": 0.4389491081237793, "step": 2026 }, { "epoch": 0.5008648381517173, "grad_norm": 1.160388195261502, "learning_rate": 1.7885722174411616e-05, "loss": 0.39499199390411377, "step": 2027 }, { "epoch": 0.5011119347664937, "grad_norm": 1.1846311629544413, "learning_rate": 1.788320896876908e-05, "loss": 0.34773755073547363, "step": 2028 }, { "epoch": 0.50135903138127, "grad_norm": 1.2798565897616638, "learning_rate": 1.7880694447104823e-05, "loss": 0.4055519700050354, "step": 2029 }, { "epoch": 0.5016061279960464, "grad_norm": 1.2208687053124418, "learning_rate": 1.787817860983862e-05, "loss": 0.40657782554626465, "step": 2030 }, { "epoch": 0.5018532246108228, "grad_norm": 1.21038237775142, "learning_rate": 1.7875661457390472e-05, "loss": 0.36542803049087524, "step": 2031 }, { "epoch": 0.5021003212255992, "grad_norm": 1.2298012382567025, "learning_rate": 1.787314299018058e-05, "loss": 0.40999865531921387, "step": 2032 }, { "epoch": 0.5023474178403756, "grad_norm": 1.1791448333948191, "learning_rate": 1.7870623208629386e-05, "loss": 0.38649237155914307, "step": 2033 }, { "epoch": 0.502594514455152, "grad_norm": 1.427727419223536, "learning_rate": 1.7868102113157535e-05, "loss": 0.4746246933937073, "step": 2034 }, { "epoch": 0.5028416110699283, "grad_norm": 1.2188973941352852, "learning_rate": 1.7865579704185906e-05, "loss": 0.39671486616134644, "step": 2035 }, { "epoch": 0.5030887076847047, "grad_norm": 1.153382273056188, "learning_rate": 1.7863055982135582e-05, "loss": 0.3853709101676941, "step": 2036 }, { "epoch": 0.5033358042994811, "grad_norm": 1.139531225075307, "learning_rate": 1.7860530947427878e-05, "loss": 0.4100465178489685, "step": 2037 }, { "epoch": 0.5035829009142575, "grad_norm": 1.0702094949953938, "learning_rate": 1.785800460048432e-05, "loss": 0.3527708649635315, "step": 2038 }, { "epoch": 0.5038299975290339, "grad_norm": 1.2777718760170604, "learning_rate": 1.7855476941726657e-05, "loss": 0.4239378571510315, "step": 2039 }, { "epoch": 0.5040770941438102, "grad_norm": 1.0975183038384146, "learning_rate": 1.7852947971576852e-05, "loss": 0.3897266089916229, "step": 2040 }, { "epoch": 0.5043241907585866, "grad_norm": 1.2126264777538904, "learning_rate": 1.78504176904571e-05, "loss": 0.3582034111022949, "step": 2041 }, { "epoch": 0.504571287373363, "grad_norm": 1.1237684956419673, "learning_rate": 1.78478860987898e-05, "loss": 0.3206598162651062, "step": 2042 }, { "epoch": 0.5048183839881394, "grad_norm": 1.1165297767210964, "learning_rate": 1.7845353196997575e-05, "loss": 0.3746933937072754, "step": 2043 }, { "epoch": 0.5050654806029158, "grad_norm": 1.1064248143408024, "learning_rate": 1.7842818985503268e-05, "loss": 0.33831894397735596, "step": 2044 }, { "epoch": 0.5053125772176921, "grad_norm": 1.0794028626707108, "learning_rate": 1.7840283464729942e-05, "loss": 0.35371848940849304, "step": 2045 }, { "epoch": 0.5055596738324685, "grad_norm": 1.1339173615576434, "learning_rate": 1.783774663510087e-05, "loss": 0.368713915348053, "step": 2046 }, { "epoch": 0.5058067704472449, "grad_norm": 1.0964416969518012, "learning_rate": 1.783520849703956e-05, "loss": 0.29665014147758484, "step": 2047 }, { "epoch": 0.5060538670620213, "grad_norm": 1.2124511676778886, "learning_rate": 1.783266905096972e-05, "loss": 0.42840880155563354, "step": 2048 }, { "epoch": 0.5063009636767977, "grad_norm": 1.1440136655351585, "learning_rate": 1.7830128297315287e-05, "loss": 0.33314788341522217, "step": 2049 }, { "epoch": 0.506548060291574, "grad_norm": 1.1610423162911576, "learning_rate": 1.782758623650042e-05, "loss": 0.353060781955719, "step": 2050 }, { "epoch": 0.5067951569063504, "grad_norm": 1.2518997987355278, "learning_rate": 1.782504286894948e-05, "loss": 0.3659330904483795, "step": 2051 }, { "epoch": 0.5070422535211268, "grad_norm": 1.2969744216777077, "learning_rate": 1.7822498195087062e-05, "loss": 0.42240476608276367, "step": 2052 }, { "epoch": 0.5072893501359031, "grad_norm": 1.3433827418214273, "learning_rate": 1.7819952215337975e-05, "loss": 0.4377967119216919, "step": 2053 }, { "epoch": 0.5075364467506795, "grad_norm": 1.278697743364474, "learning_rate": 1.781740493012724e-05, "loss": 0.3804337978363037, "step": 2054 }, { "epoch": 0.507783543365456, "grad_norm": 1.1855658991401554, "learning_rate": 1.7814856339880106e-05, "loss": 0.3446415960788727, "step": 2055 }, { "epoch": 0.5080306399802322, "grad_norm": 1.145571972561493, "learning_rate": 1.781230644502203e-05, "loss": 0.303702175617218, "step": 2056 }, { "epoch": 0.5082777365950086, "grad_norm": 1.1432835510676154, "learning_rate": 1.7809755245978687e-05, "loss": 0.33951282501220703, "step": 2057 }, { "epoch": 0.508524833209785, "grad_norm": 1.2171898984047693, "learning_rate": 1.7807202743175984e-05, "loss": 0.367460161447525, "step": 2058 }, { "epoch": 0.5087719298245614, "grad_norm": 1.0668026677597073, "learning_rate": 1.780464893704003e-05, "loss": 0.3762957453727722, "step": 2059 }, { "epoch": 0.5090190264393378, "grad_norm": 1.1428570120974755, "learning_rate": 1.7802093827997157e-05, "loss": 0.372580349445343, "step": 2060 }, { "epoch": 0.5092661230541141, "grad_norm": 1.2443305020884052, "learning_rate": 1.7799537416473916e-05, "loss": 0.37895137071609497, "step": 2061 }, { "epoch": 0.5095132196688905, "grad_norm": 1.1443584768423976, "learning_rate": 1.779697970289707e-05, "loss": 0.343389630317688, "step": 2062 }, { "epoch": 0.5097603162836669, "grad_norm": 1.346930742973459, "learning_rate": 1.7794420687693607e-05, "loss": 0.4092646837234497, "step": 2063 }, { "epoch": 0.5100074128984433, "grad_norm": 1.0984130508925445, "learning_rate": 1.7791860371290728e-05, "loss": 0.33088427782058716, "step": 2064 }, { "epoch": 0.5102545095132197, "grad_norm": 1.238546592209221, "learning_rate": 1.7789298754115853e-05, "loss": 0.43380820751190186, "step": 2065 }, { "epoch": 0.510501606127996, "grad_norm": 1.1404295790368753, "learning_rate": 1.7786735836596616e-05, "loss": 0.39250731468200684, "step": 2066 }, { "epoch": 0.5107487027427724, "grad_norm": 1.1638681100538046, "learning_rate": 1.778417161916087e-05, "loss": 0.3492637574672699, "step": 2067 }, { "epoch": 0.5109957993575488, "grad_norm": 1.1504648246970495, "learning_rate": 1.7781606102236683e-05, "loss": 0.43543827533721924, "step": 2068 }, { "epoch": 0.5112428959723252, "grad_norm": 1.1878917910831732, "learning_rate": 1.777903928625235e-05, "loss": 0.3936561346054077, "step": 2069 }, { "epoch": 0.5114899925871016, "grad_norm": 1.114773857621844, "learning_rate": 1.7776471171636365e-05, "loss": 0.34499579668045044, "step": 2070 }, { "epoch": 0.5117370892018779, "grad_norm": 1.1094279743470483, "learning_rate": 1.7773901758817456e-05, "loss": 0.372422993183136, "step": 2071 }, { "epoch": 0.5119841858166543, "grad_norm": 1.172368456479668, "learning_rate": 1.777133104822456e-05, "loss": 0.408812552690506, "step": 2072 }, { "epoch": 0.5122312824314307, "grad_norm": 1.1513490430553002, "learning_rate": 1.776875904028682e-05, "loss": 0.34246963262557983, "step": 2073 }, { "epoch": 0.5124783790462071, "grad_norm": 1.1040357071189897, "learning_rate": 1.7766185735433624e-05, "loss": 0.3422088623046875, "step": 2074 }, { "epoch": 0.5127254756609835, "grad_norm": 1.2794954823648204, "learning_rate": 1.776361113409455e-05, "loss": 0.4349989891052246, "step": 2075 }, { "epoch": 0.5129725722757599, "grad_norm": 1.3180012430458294, "learning_rate": 1.7761035236699397e-05, "loss": 0.28739404678344727, "step": 2076 }, { "epoch": 0.5132196688905362, "grad_norm": 1.1976720104742808, "learning_rate": 1.775845804367819e-05, "loss": 0.42372816801071167, "step": 2077 }, { "epoch": 0.5134667655053126, "grad_norm": 1.1983237040654393, "learning_rate": 1.775587955546117e-05, "loss": 0.37903571128845215, "step": 2078 }, { "epoch": 0.513713862120089, "grad_norm": 1.1365668099297714, "learning_rate": 1.7753299772478783e-05, "loss": 0.35548925399780273, "step": 2079 }, { "epoch": 0.5139609587348654, "grad_norm": 1.131259024507948, "learning_rate": 1.7750718695161697e-05, "loss": 0.34033268690109253, "step": 2080 }, { "epoch": 0.5142080553496418, "grad_norm": 1.0927990834996764, "learning_rate": 1.77481363239408e-05, "loss": 0.37244439125061035, "step": 2081 }, { "epoch": 0.514455151964418, "grad_norm": 1.188361796516872, "learning_rate": 1.774555265924719e-05, "loss": 0.36448967456817627, "step": 2082 }, { "epoch": 0.5147022485791944, "grad_norm": 1.1697516293654542, "learning_rate": 1.774296770151219e-05, "loss": 0.4111456274986267, "step": 2083 }, { "epoch": 0.5149493451939708, "grad_norm": 1.1801710670349175, "learning_rate": 1.7740381451167324e-05, "loss": 0.3767312169075012, "step": 2084 }, { "epoch": 0.5151964418087472, "grad_norm": 1.0894987961174656, "learning_rate": 1.7737793908644344e-05, "loss": 0.36536043882369995, "step": 2085 }, { "epoch": 0.5154435384235236, "grad_norm": 1.4201355517989684, "learning_rate": 1.7735205074375215e-05, "loss": 0.3980242908000946, "step": 2086 }, { "epoch": 0.5156906350382999, "grad_norm": 1.131803670286396, "learning_rate": 1.7732614948792113e-05, "loss": 0.3535422682762146, "step": 2087 }, { "epoch": 0.5159377316530763, "grad_norm": 1.149330125943751, "learning_rate": 1.7730023532327435e-05, "loss": 0.35584938526153564, "step": 2088 }, { "epoch": 0.5161848282678527, "grad_norm": 1.1913047081748531, "learning_rate": 1.7727430825413794e-05, "loss": 0.33369895815849304, "step": 2089 }, { "epoch": 0.5164319248826291, "grad_norm": 1.2205231599603708, "learning_rate": 1.7724836828484012e-05, "loss": 0.39413630962371826, "step": 2090 }, { "epoch": 0.5166790214974055, "grad_norm": 1.1962089605872934, "learning_rate": 1.7722241541971138e-05, "loss": 0.36113035678863525, "step": 2091 }, { "epoch": 0.5169261181121818, "grad_norm": 1.3024721060557165, "learning_rate": 1.771964496630842e-05, "loss": 0.37217801809310913, "step": 2092 }, { "epoch": 0.5171732147269582, "grad_norm": 1.2821888667999313, "learning_rate": 1.771704710192933e-05, "loss": 0.4033240079879761, "step": 2093 }, { "epoch": 0.5174203113417346, "grad_norm": 1.1785196029850218, "learning_rate": 1.7714447949267564e-05, "loss": 0.3397243320941925, "step": 2094 }, { "epoch": 0.517667407956511, "grad_norm": 1.0465268821746718, "learning_rate": 1.7711847508757015e-05, "loss": 0.362011194229126, "step": 2095 }, { "epoch": 0.5179145045712874, "grad_norm": 1.1288593558643454, "learning_rate": 1.770924578083181e-05, "loss": 0.3603997230529785, "step": 2096 }, { "epoch": 0.5181616011860638, "grad_norm": 1.2785632121664432, "learning_rate": 1.770664276592627e-05, "loss": 0.37770459055900574, "step": 2097 }, { "epoch": 0.5184086978008401, "grad_norm": 1.2819439660073506, "learning_rate": 1.7704038464474947e-05, "loss": 0.39486902952194214, "step": 2098 }, { "epoch": 0.5186557944156165, "grad_norm": 1.2016735966490546, "learning_rate": 1.7701432876912604e-05, "loss": 0.3591235876083374, "step": 2099 }, { "epoch": 0.5189028910303929, "grad_norm": 1.1314493643704087, "learning_rate": 1.769882600367421e-05, "loss": 0.3934895694255829, "step": 2100 }, { "epoch": 0.5191499876451693, "grad_norm": 1.3053533777102366, "learning_rate": 1.769621784519497e-05, "loss": 0.37158143520355225, "step": 2101 }, { "epoch": 0.5193970842599457, "grad_norm": 1.2130526623013622, "learning_rate": 1.7693608401910277e-05, "loss": 0.3659983277320862, "step": 2102 }, { "epoch": 0.519644180874722, "grad_norm": 1.0999677856837669, "learning_rate": 1.7690997674255755e-05, "loss": 0.39478427171707153, "step": 2103 }, { "epoch": 0.5198912774894984, "grad_norm": 1.4184702407105185, "learning_rate": 1.7688385662667245e-05, "loss": 0.34318965673446655, "step": 2104 }, { "epoch": 0.5201383741042748, "grad_norm": 1.0852673201389846, "learning_rate": 1.7685772367580785e-05, "loss": 0.36343905329704285, "step": 2105 }, { "epoch": 0.5203854707190512, "grad_norm": 1.0533822152279597, "learning_rate": 1.768315778943264e-05, "loss": 0.30149319767951965, "step": 2106 }, { "epoch": 0.5206325673338276, "grad_norm": 1.1576957833274693, "learning_rate": 1.7680541928659297e-05, "loss": 0.3519359230995178, "step": 2107 }, { "epoch": 0.5208796639486039, "grad_norm": 1.1557453989464361, "learning_rate": 1.7677924785697436e-05, "loss": 0.35615837574005127, "step": 2108 }, { "epoch": 0.5211267605633803, "grad_norm": 1.1470503502116427, "learning_rate": 1.767530636098397e-05, "loss": 0.3962283134460449, "step": 2109 }, { "epoch": 0.5213738571781567, "grad_norm": 1.1347071495699588, "learning_rate": 1.7672686654956015e-05, "loss": 0.36800989508628845, "step": 2110 }, { "epoch": 0.521620953792933, "grad_norm": 1.2660218119740743, "learning_rate": 1.76700656680509e-05, "loss": 0.4535592198371887, "step": 2111 }, { "epoch": 0.5218680504077094, "grad_norm": 1.1896649385737297, "learning_rate": 1.7667443400706182e-05, "loss": 0.38420504331588745, "step": 2112 }, { "epoch": 0.5221151470224857, "grad_norm": 1.2666481292446865, "learning_rate": 1.7664819853359615e-05, "loss": 0.43741634488105774, "step": 2113 }, { "epoch": 0.5223622436372621, "grad_norm": 1.190097863224127, "learning_rate": 1.7662195026449178e-05, "loss": 0.38773760199546814, "step": 2114 }, { "epoch": 0.5226093402520385, "grad_norm": 1.1233438169462107, "learning_rate": 1.7659568920413053e-05, "loss": 0.37211868166923523, "step": 2115 }, { "epoch": 0.5228564368668149, "grad_norm": 1.0403149413849344, "learning_rate": 1.7656941535689652e-05, "loss": 0.3657495081424713, "step": 2116 }, { "epoch": 0.5231035334815913, "grad_norm": 1.2428383604929176, "learning_rate": 1.765431287271758e-05, "loss": 0.3626473546028137, "step": 2117 }, { "epoch": 0.5233506300963677, "grad_norm": 1.1130692305993821, "learning_rate": 1.7651682931935664e-05, "loss": 0.3632292151451111, "step": 2118 }, { "epoch": 0.523597726711144, "grad_norm": 1.5066144402838046, "learning_rate": 1.7649051713782958e-05, "loss": 0.36127910017967224, "step": 2119 }, { "epoch": 0.5238448233259204, "grad_norm": 1.2538240315221645, "learning_rate": 1.764641921869871e-05, "loss": 0.3668014407157898, "step": 2120 }, { "epoch": 0.5240919199406968, "grad_norm": 1.2881837966925773, "learning_rate": 1.7643785447122385e-05, "loss": 0.3971572816371918, "step": 2121 }, { "epoch": 0.5243390165554732, "grad_norm": 1.1861355477676543, "learning_rate": 1.764115039949367e-05, "loss": 0.3735707402229309, "step": 2122 }, { "epoch": 0.5245861131702496, "grad_norm": 1.0787865591661217, "learning_rate": 1.763851407625246e-05, "loss": 0.30229341983795166, "step": 2123 }, { "epoch": 0.5248332097850259, "grad_norm": 1.1428532328868588, "learning_rate": 1.7635876477838857e-05, "loss": 0.3525081276893616, "step": 2124 }, { "epoch": 0.5250803063998023, "grad_norm": 1.2493146451018393, "learning_rate": 1.7633237604693186e-05, "loss": 0.39479905366897583, "step": 2125 }, { "epoch": 0.5253274030145787, "grad_norm": 1.1283834486368443, "learning_rate": 1.7630597457255977e-05, "loss": 0.40696704387664795, "step": 2126 }, { "epoch": 0.5255744996293551, "grad_norm": 1.2932683052164646, "learning_rate": 1.7627956035967978e-05, "loss": 0.41573938727378845, "step": 2127 }, { "epoch": 0.5258215962441315, "grad_norm": 1.1840623784016175, "learning_rate": 1.7625313341270145e-05, "loss": 0.4164816737174988, "step": 2128 }, { "epoch": 0.5260686928589078, "grad_norm": 1.0774825368087382, "learning_rate": 1.7622669373603653e-05, "loss": 0.30780839920043945, "step": 2129 }, { "epoch": 0.5263157894736842, "grad_norm": 1.177137361934297, "learning_rate": 1.762002413340988e-05, "loss": 0.35493505001068115, "step": 2130 }, { "epoch": 0.5265628860884606, "grad_norm": 1.2251392909012697, "learning_rate": 1.7617377621130428e-05, "loss": 0.3792756497859955, "step": 2131 }, { "epoch": 0.526809982703237, "grad_norm": 1.2342088394023734, "learning_rate": 1.7614729837207103e-05, "loss": 0.38740795850753784, "step": 2132 }, { "epoch": 0.5270570793180134, "grad_norm": 1.2167401412521353, "learning_rate": 1.7612080782081924e-05, "loss": 0.4179290235042572, "step": 2133 }, { "epoch": 0.5273041759327898, "grad_norm": 1.2567121809498598, "learning_rate": 1.7609430456197126e-05, "loss": 0.44177836179733276, "step": 2134 }, { "epoch": 0.5275512725475661, "grad_norm": 1.1427803718390899, "learning_rate": 1.7606778859995152e-05, "loss": 0.37991687655448914, "step": 2135 }, { "epoch": 0.5277983691623425, "grad_norm": 0.9501657543761336, "learning_rate": 1.760412599391866e-05, "loss": 0.32108670473098755, "step": 2136 }, { "epoch": 0.5280454657771189, "grad_norm": 1.025302294761227, "learning_rate": 1.760147185841052e-05, "loss": 0.33661949634552, "step": 2137 }, { "epoch": 0.5282925623918953, "grad_norm": 1.2606491640025772, "learning_rate": 1.7598816453913815e-05, "loss": 0.39161473512649536, "step": 2138 }, { "epoch": 0.5285396590066717, "grad_norm": 1.1165922558348869, "learning_rate": 1.759615978087183e-05, "loss": 0.34843534231185913, "step": 2139 }, { "epoch": 0.528786755621448, "grad_norm": 1.2410895704776062, "learning_rate": 1.7593501839728077e-05, "loss": 0.4099137485027313, "step": 2140 }, { "epoch": 0.5290338522362243, "grad_norm": 1.2509253049635582, "learning_rate": 1.759084263092627e-05, "loss": 0.3528222441673279, "step": 2141 }, { "epoch": 0.5292809488510007, "grad_norm": 1.3276256791817818, "learning_rate": 1.758818215491034e-05, "loss": 0.37148481607437134, "step": 2142 }, { "epoch": 0.5295280454657771, "grad_norm": 1.1968132688911086, "learning_rate": 1.758552041212442e-05, "loss": 0.42845970392227173, "step": 2143 }, { "epoch": 0.5297751420805535, "grad_norm": 1.165001505079869, "learning_rate": 1.7582857403012865e-05, "loss": 0.3630355894565582, "step": 2144 }, { "epoch": 0.5300222386953298, "grad_norm": 1.1603604330536168, "learning_rate": 1.7580193128020237e-05, "loss": 0.41970860958099365, "step": 2145 }, { "epoch": 0.5302693353101062, "grad_norm": 1.093204033151228, "learning_rate": 1.757752758759131e-05, "loss": 0.338795006275177, "step": 2146 }, { "epoch": 0.5305164319248826, "grad_norm": 1.1356525406230602, "learning_rate": 1.7574860782171065e-05, "loss": 0.38345813751220703, "step": 2147 }, { "epoch": 0.530763528539659, "grad_norm": 1.1138446182035584, "learning_rate": 1.75721927122047e-05, "loss": 0.3629835844039917, "step": 2148 }, { "epoch": 0.5310106251544354, "grad_norm": 1.2291712592301975, "learning_rate": 1.756952337813763e-05, "loss": 0.425129234790802, "step": 2149 }, { "epoch": 0.5312577217692117, "grad_norm": 1.2588468862579218, "learning_rate": 1.7566852780415464e-05, "loss": 0.4185973107814789, "step": 2150 }, { "epoch": 0.5315048183839881, "grad_norm": 1.1351293627615664, "learning_rate": 1.756418091948403e-05, "loss": 0.30661505460739136, "step": 2151 }, { "epoch": 0.5317519149987645, "grad_norm": 1.2045288329287214, "learning_rate": 1.756150779578938e-05, "loss": 0.35646405816078186, "step": 2152 }, { "epoch": 0.5319990116135409, "grad_norm": 1.1298249330243242, "learning_rate": 1.755883340977775e-05, "loss": 0.3414093255996704, "step": 2153 }, { "epoch": 0.5322461082283173, "grad_norm": 1.083349477964281, "learning_rate": 1.7556157761895615e-05, "loss": 0.33801209926605225, "step": 2154 }, { "epoch": 0.5324932048430937, "grad_norm": 1.137645723536166, "learning_rate": 1.7553480852589635e-05, "loss": 0.40392154455184937, "step": 2155 }, { "epoch": 0.53274030145787, "grad_norm": 1.133003571931362, "learning_rate": 1.75508026823067e-05, "loss": 0.39980459213256836, "step": 2156 }, { "epoch": 0.5329873980726464, "grad_norm": 1.3557308092555986, "learning_rate": 1.7548123251493903e-05, "loss": 0.4570770263671875, "step": 2157 }, { "epoch": 0.5332344946874228, "grad_norm": 1.19157049242974, "learning_rate": 1.7545442560598546e-05, "loss": 0.37592461705207825, "step": 2158 }, { "epoch": 0.5334815913021992, "grad_norm": 1.1731520072829476, "learning_rate": 1.7542760610068143e-05, "loss": 0.3434872627258301, "step": 2159 }, { "epoch": 0.5337286879169756, "grad_norm": 1.0330463431155688, "learning_rate": 1.754007740035042e-05, "loss": 0.32128721475601196, "step": 2160 }, { "epoch": 0.5339757845317519, "grad_norm": 1.2053334845612211, "learning_rate": 1.753739293189331e-05, "loss": 0.34150081872940063, "step": 2161 }, { "epoch": 0.5342228811465283, "grad_norm": 1.1466363405422542, "learning_rate": 1.7534707205144962e-05, "loss": 0.34147369861602783, "step": 2162 }, { "epoch": 0.5344699777613047, "grad_norm": 1.212960171255103, "learning_rate": 1.7532020220553724e-05, "loss": 0.3723154067993164, "step": 2163 }, { "epoch": 0.5347170743760811, "grad_norm": 1.1066433097252464, "learning_rate": 1.7529331978568165e-05, "loss": 0.38070517778396606, "step": 2164 }, { "epoch": 0.5349641709908575, "grad_norm": 1.2642332058545118, "learning_rate": 1.752664247963706e-05, "loss": 0.4080457091331482, "step": 2165 }, { "epoch": 0.5352112676056338, "grad_norm": 1.1790515540758149, "learning_rate": 1.7523951724209388e-05, "loss": 0.37743067741394043, "step": 2166 }, { "epoch": 0.5354583642204102, "grad_norm": 1.087611924281375, "learning_rate": 1.7521259712734355e-05, "loss": 0.3228120803833008, "step": 2167 }, { "epoch": 0.5357054608351866, "grad_norm": 1.0609603256071418, "learning_rate": 1.7518566445661353e-05, "loss": 0.3430929183959961, "step": 2168 }, { "epoch": 0.535952557449963, "grad_norm": 1.2536087013715929, "learning_rate": 1.751587192344e-05, "loss": 0.3676208257675171, "step": 2169 }, { "epoch": 0.5361996540647394, "grad_norm": 1.0975136690896816, "learning_rate": 1.751317614652012e-05, "loss": 0.3182121813297272, "step": 2170 }, { "epoch": 0.5364467506795156, "grad_norm": 1.278489801237829, "learning_rate": 1.751047911535175e-05, "loss": 0.4117313325405121, "step": 2171 }, { "epoch": 0.536693847294292, "grad_norm": 1.2062421078568693, "learning_rate": 1.7507780830385122e-05, "loss": 0.4573667049407959, "step": 2172 }, { "epoch": 0.5369409439090684, "grad_norm": 1.2181944910905131, "learning_rate": 1.7505081292070693e-05, "loss": 0.34178289771080017, "step": 2173 }, { "epoch": 0.5371880405238448, "grad_norm": 1.0465280629110587, "learning_rate": 1.7502380500859126e-05, "loss": 0.276217520236969, "step": 2174 }, { "epoch": 0.5374351371386212, "grad_norm": 1.2767951014745975, "learning_rate": 1.749967845720128e-05, "loss": 0.3365969657897949, "step": 2175 }, { "epoch": 0.5376822337533976, "grad_norm": 1.3267650702961487, "learning_rate": 1.749697516154825e-05, "loss": 0.4072115123271942, "step": 2176 }, { "epoch": 0.5379293303681739, "grad_norm": 1.2611066691510917, "learning_rate": 1.7494270614351314e-05, "loss": 0.40029627084732056, "step": 2177 }, { "epoch": 0.5381764269829503, "grad_norm": 1.156269174042538, "learning_rate": 1.7491564816061965e-05, "loss": 0.3185693323612213, "step": 2178 }, { "epoch": 0.5384235235977267, "grad_norm": 1.2736978296382624, "learning_rate": 1.7488857767131916e-05, "loss": 0.40542471408843994, "step": 2179 }, { "epoch": 0.5386706202125031, "grad_norm": 1.3419015412777375, "learning_rate": 1.7486149468013075e-05, "loss": 0.42670226097106934, "step": 2180 }, { "epoch": 0.5389177168272795, "grad_norm": 1.2323595547090596, "learning_rate": 1.7483439919157575e-05, "loss": 0.34207725524902344, "step": 2181 }, { "epoch": 0.5391648134420558, "grad_norm": 1.1960950616524912, "learning_rate": 1.7480729121017738e-05, "loss": 0.41732528805732727, "step": 2182 }, { "epoch": 0.5394119100568322, "grad_norm": 1.2135911777403772, "learning_rate": 1.747801707404611e-05, "loss": 0.39810502529144287, "step": 2183 }, { "epoch": 0.5396590066716086, "grad_norm": 1.2383166160597154, "learning_rate": 1.7475303778695437e-05, "loss": 0.35228395462036133, "step": 2184 }, { "epoch": 0.539906103286385, "grad_norm": 1.1683492416034196, "learning_rate": 1.747258923541867e-05, "loss": 0.35973551869392395, "step": 2185 }, { "epoch": 0.5401531999011614, "grad_norm": 1.1870175824202533, "learning_rate": 1.746987344466899e-05, "loss": 0.4115106463432312, "step": 2186 }, { "epoch": 0.5404002965159377, "grad_norm": 1.089257824569551, "learning_rate": 1.7467156406899758e-05, "loss": 0.3447074890136719, "step": 2187 }, { "epoch": 0.5406473931307141, "grad_norm": 1.158352247432864, "learning_rate": 1.746443812256456e-05, "loss": 0.35106217861175537, "step": 2188 }, { "epoch": 0.5408944897454905, "grad_norm": 1.1195735953831847, "learning_rate": 1.7461718592117184e-05, "loss": 0.36367446184158325, "step": 2189 }, { "epoch": 0.5411415863602669, "grad_norm": 1.2115352020888224, "learning_rate": 1.745899781601163e-05, "loss": 0.3798319101333618, "step": 2190 }, { "epoch": 0.5413886829750433, "grad_norm": 1.1918224888689215, "learning_rate": 1.7456275794702106e-05, "loss": 0.36402273178100586, "step": 2191 }, { "epoch": 0.5416357795898196, "grad_norm": 1.0973771852991319, "learning_rate": 1.745355252864302e-05, "loss": 0.33361321687698364, "step": 2192 }, { "epoch": 0.541882876204596, "grad_norm": 1.2826431218888839, "learning_rate": 1.7450828018289e-05, "loss": 0.4253445863723755, "step": 2193 }, { "epoch": 0.5421299728193724, "grad_norm": 1.1679288282330371, "learning_rate": 1.744810226409487e-05, "loss": 0.37337034940719604, "step": 2194 }, { "epoch": 0.5423770694341488, "grad_norm": 1.2224852870043736, "learning_rate": 1.7445375266515667e-05, "loss": 0.38859257102012634, "step": 2195 }, { "epoch": 0.5426241660489252, "grad_norm": 1.2067125708888842, "learning_rate": 1.7442647026006642e-05, "loss": 0.4170328974723816, "step": 2196 }, { "epoch": 0.5428712626637016, "grad_norm": 1.167410731885532, "learning_rate": 1.7439917543023243e-05, "loss": 0.4297490119934082, "step": 2197 }, { "epoch": 0.5431183592784778, "grad_norm": 1.2838828956782988, "learning_rate": 1.7437186818021125e-05, "loss": 0.37706923484802246, "step": 2198 }, { "epoch": 0.5433654558932542, "grad_norm": 1.0693522286550325, "learning_rate": 1.743445485145616e-05, "loss": 0.3382512927055359, "step": 2199 }, { "epoch": 0.5436125525080306, "grad_norm": 1.0621527203002847, "learning_rate": 1.7431721643784426e-05, "loss": 0.3156704902648926, "step": 2200 }, { "epoch": 0.543859649122807, "grad_norm": 1.2867468023129507, "learning_rate": 1.7428987195462193e-05, "loss": 0.3905448317527771, "step": 2201 }, { "epoch": 0.5441067457375834, "grad_norm": 1.0716149863727635, "learning_rate": 1.7426251506945965e-05, "loss": 0.36368274688720703, "step": 2202 }, { "epoch": 0.5443538423523597, "grad_norm": 1.2729629237012603, "learning_rate": 1.742351457869242e-05, "loss": 0.3599029779434204, "step": 2203 }, { "epoch": 0.5446009389671361, "grad_norm": 1.1291650356757463, "learning_rate": 1.742077641115847e-05, "loss": 0.3761764168739319, "step": 2204 }, { "epoch": 0.5448480355819125, "grad_norm": 1.1993003567592317, "learning_rate": 1.7418037004801226e-05, "loss": 0.44329333305358887, "step": 2205 }, { "epoch": 0.5450951321966889, "grad_norm": 1.1564373452578813, "learning_rate": 1.7415296360078004e-05, "loss": 0.36443576216697693, "step": 2206 }, { "epoch": 0.5453422288114653, "grad_norm": 1.146526280689644, "learning_rate": 1.7412554477446317e-05, "loss": 0.33868300914764404, "step": 2207 }, { "epoch": 0.5455893254262416, "grad_norm": 1.2877702265824245, "learning_rate": 1.7409811357363902e-05, "loss": 0.4148460626602173, "step": 2208 }, { "epoch": 0.545836422041018, "grad_norm": 1.152869860069075, "learning_rate": 1.74070670002887e-05, "loss": 0.34155750274658203, "step": 2209 }, { "epoch": 0.5460835186557944, "grad_norm": 1.211923894916243, "learning_rate": 1.7404321406678844e-05, "loss": 0.3482329845428467, "step": 2210 }, { "epoch": 0.5463306152705708, "grad_norm": 1.3677639502139787, "learning_rate": 1.740157457699269e-05, "loss": 0.45982199907302856, "step": 2211 }, { "epoch": 0.5465777118853472, "grad_norm": 1.1146565198238827, "learning_rate": 1.7398826511688786e-05, "loss": 0.3192886710166931, "step": 2212 }, { "epoch": 0.5468248085001235, "grad_norm": 1.237270551310495, "learning_rate": 1.7396077211225902e-05, "loss": 0.38831397891044617, "step": 2213 }, { "epoch": 0.5470719051148999, "grad_norm": 1.1561636035723613, "learning_rate": 1.7393326676063e-05, "loss": 0.3876377046108246, "step": 2214 }, { "epoch": 0.5473190017296763, "grad_norm": 1.1867046955760208, "learning_rate": 1.7390574906659254e-05, "loss": 0.3777622580528259, "step": 2215 }, { "epoch": 0.5475660983444527, "grad_norm": 1.2641496675363753, "learning_rate": 1.7387821903474046e-05, "loss": 0.41086438298225403, "step": 2216 }, { "epoch": 0.5478131949592291, "grad_norm": 1.1908473767987133, "learning_rate": 1.7385067666966964e-05, "loss": 0.42281997203826904, "step": 2217 }, { "epoch": 0.5480602915740055, "grad_norm": 1.2543098416482963, "learning_rate": 1.7382312197597798e-05, "loss": 0.37166133522987366, "step": 2218 }, { "epoch": 0.5483073881887818, "grad_norm": 1.0885676958638153, "learning_rate": 1.737955549582654e-05, "loss": 0.3661435544490814, "step": 2219 }, { "epoch": 0.5485544848035582, "grad_norm": 1.1784437674912642, "learning_rate": 1.7376797562113403e-05, "loss": 0.362498939037323, "step": 2220 }, { "epoch": 0.5488015814183346, "grad_norm": 1.1601901486161048, "learning_rate": 1.737403839691879e-05, "loss": 0.37103399634361267, "step": 2221 }, { "epoch": 0.549048678033111, "grad_norm": 1.1218541087059466, "learning_rate": 1.7371278000703316e-05, "loss": 0.3718504011631012, "step": 2222 }, { "epoch": 0.5492957746478874, "grad_norm": 1.242128298326736, "learning_rate": 1.7368516373927802e-05, "loss": 0.3710981607437134, "step": 2223 }, { "epoch": 0.5495428712626637, "grad_norm": 1.2432541814469809, "learning_rate": 1.736575351705328e-05, "loss": 0.3496847152709961, "step": 2224 }, { "epoch": 0.5497899678774401, "grad_norm": 1.1835635367274493, "learning_rate": 1.7362989430540968e-05, "loss": 0.3790493607521057, "step": 2225 }, { "epoch": 0.5500370644922165, "grad_norm": 1.16456156168733, "learning_rate": 1.736022411485231e-05, "loss": 0.3816348910331726, "step": 2226 }, { "epoch": 0.5502841611069929, "grad_norm": 1.1816857579161866, "learning_rate": 1.7357457570448945e-05, "loss": 0.39548152685165405, "step": 2227 }, { "epoch": 0.5505312577217693, "grad_norm": 1.112679010643085, "learning_rate": 1.7354689797792725e-05, "loss": 0.33803653717041016, "step": 2228 }, { "epoch": 0.5507783543365455, "grad_norm": 1.3433157871601038, "learning_rate": 1.7351920797345693e-05, "loss": 0.4463246464729309, "step": 2229 }, { "epoch": 0.5510254509513219, "grad_norm": 1.2484537923014711, "learning_rate": 1.7349150569570114e-05, "loss": 0.3911808133125305, "step": 2230 }, { "epoch": 0.5512725475660983, "grad_norm": 1.1586446121728187, "learning_rate": 1.7346379114928442e-05, "loss": 0.3483375906944275, "step": 2231 }, { "epoch": 0.5515196441808747, "grad_norm": 1.160273461489651, "learning_rate": 1.7343606433883348e-05, "loss": 0.4094994366168976, "step": 2232 }, { "epoch": 0.5517667407956511, "grad_norm": 1.1522903552736181, "learning_rate": 1.73408325268977e-05, "loss": 0.40174588561058044, "step": 2233 }, { "epoch": 0.5520138374104275, "grad_norm": 1.3183597942031435, "learning_rate": 1.7338057394434578e-05, "loss": 0.4713344871997833, "step": 2234 }, { "epoch": 0.5522609340252038, "grad_norm": 1.1591488186980712, "learning_rate": 1.7335281036957258e-05, "loss": 0.3643238842487335, "step": 2235 }, { "epoch": 0.5525080306399802, "grad_norm": 1.0715743737382384, "learning_rate": 1.7332503454929224e-05, "loss": 0.3567599058151245, "step": 2236 }, { "epoch": 0.5527551272547566, "grad_norm": 1.0851824303212119, "learning_rate": 1.7329724648814166e-05, "loss": 0.4110870957374573, "step": 2237 }, { "epoch": 0.553002223869533, "grad_norm": 1.2173142331352569, "learning_rate": 1.7326944619075983e-05, "loss": 0.4023841917514801, "step": 2238 }, { "epoch": 0.5532493204843094, "grad_norm": 1.0762486204092174, "learning_rate": 1.732416336617877e-05, "loss": 0.3774207830429077, "step": 2239 }, { "epoch": 0.5534964170990857, "grad_norm": 1.082149555032959, "learning_rate": 1.7321380890586824e-05, "loss": 0.32517725229263306, "step": 2240 }, { "epoch": 0.5537435137138621, "grad_norm": 2.672350306331573, "learning_rate": 1.7318597192764654e-05, "loss": 0.37224817276000977, "step": 2241 }, { "epoch": 0.5539906103286385, "grad_norm": 1.1350377084987051, "learning_rate": 1.7315812273176966e-05, "loss": 0.3839457035064697, "step": 2242 }, { "epoch": 0.5542377069434149, "grad_norm": 1.0798728368533903, "learning_rate": 1.7313026132288684e-05, "loss": 0.32244306802749634, "step": 2243 }, { "epoch": 0.5544848035581913, "grad_norm": 1.2766294910713551, "learning_rate": 1.7310238770564916e-05, "loss": 0.4318862557411194, "step": 2244 }, { "epoch": 0.5547319001729676, "grad_norm": 1.1800618890785342, "learning_rate": 1.7307450188470994e-05, "loss": 0.38151299953460693, "step": 2245 }, { "epoch": 0.554978996787744, "grad_norm": 1.19311759765675, "learning_rate": 1.7304660386472434e-05, "loss": 0.3448258638381958, "step": 2246 }, { "epoch": 0.5552260934025204, "grad_norm": 1.1560369534196817, "learning_rate": 1.7301869365034967e-05, "loss": 0.3416752517223358, "step": 2247 }, { "epoch": 0.5554731900172968, "grad_norm": 1.146881439574146, "learning_rate": 1.7299077124624532e-05, "loss": 0.3780331611633301, "step": 2248 }, { "epoch": 0.5557202866320732, "grad_norm": 1.0744134611316656, "learning_rate": 1.729628366570726e-05, "loss": 0.3427378535270691, "step": 2249 }, { "epoch": 0.5559673832468495, "grad_norm": 1.0640232764438267, "learning_rate": 1.7293488988749488e-05, "loss": 0.3504236936569214, "step": 2250 }, { "epoch": 0.5562144798616259, "grad_norm": 1.173197092776816, "learning_rate": 1.7290693094217763e-05, "loss": 0.37860241532325745, "step": 2251 }, { "epoch": 0.5564615764764023, "grad_norm": 1.1540248967835742, "learning_rate": 1.7287895982578833e-05, "loss": 0.3553634285926819, "step": 2252 }, { "epoch": 0.5567086730911787, "grad_norm": 1.329712430566946, "learning_rate": 1.7285097654299643e-05, "loss": 0.41442906856536865, "step": 2253 }, { "epoch": 0.5569557697059551, "grad_norm": 1.0983832676140595, "learning_rate": 1.7282298109847346e-05, "loss": 0.33992043137550354, "step": 2254 }, { "epoch": 0.5572028663207315, "grad_norm": 1.2018887211072304, "learning_rate": 1.72794973496893e-05, "loss": 0.35734331607818604, "step": 2255 }, { "epoch": 0.5574499629355077, "grad_norm": 1.1116525591225317, "learning_rate": 1.727669537429306e-05, "loss": 0.3096155524253845, "step": 2256 }, { "epoch": 0.5576970595502841, "grad_norm": 1.2694290581960335, "learning_rate": 1.727389218412639e-05, "loss": 0.38954657316207886, "step": 2257 }, { "epoch": 0.5579441561650605, "grad_norm": 1.2546773663098396, "learning_rate": 1.7271087779657262e-05, "loss": 0.36275917291641235, "step": 2258 }, { "epoch": 0.5581912527798369, "grad_norm": 2.8135647743915526, "learning_rate": 1.7268282161353826e-05, "loss": 0.34618544578552246, "step": 2259 }, { "epoch": 0.5584383493946133, "grad_norm": 1.2775602090080918, "learning_rate": 1.7265475329684462e-05, "loss": 0.36514878273010254, "step": 2260 }, { "epoch": 0.5586854460093896, "grad_norm": 1.2076283015068254, "learning_rate": 1.726266728511774e-05, "loss": 0.36206626892089844, "step": 2261 }, { "epoch": 0.558932542624166, "grad_norm": 1.2055160519448629, "learning_rate": 1.7259858028122435e-05, "loss": 0.351290225982666, "step": 2262 }, { "epoch": 0.5591796392389424, "grad_norm": 1.1561968581902293, "learning_rate": 1.7257047559167526e-05, "loss": 0.3465690612792969, "step": 2263 }, { "epoch": 0.5594267358537188, "grad_norm": 1.0976489437574415, "learning_rate": 1.725423587872219e-05, "loss": 0.37913429737091064, "step": 2264 }, { "epoch": 0.5596738324684952, "grad_norm": 1.2242904110288289, "learning_rate": 1.7251422987255805e-05, "loss": 0.3642905652523041, "step": 2265 }, { "epoch": 0.5599209290832715, "grad_norm": 1.11997474070485, "learning_rate": 1.724860888523796e-05, "loss": 0.37449929118156433, "step": 2266 }, { "epoch": 0.5601680256980479, "grad_norm": 1.1592256099685132, "learning_rate": 1.7245793573138438e-05, "loss": 0.3488929569721222, "step": 2267 }, { "epoch": 0.5604151223128243, "grad_norm": 1.220356684446437, "learning_rate": 1.7242977051427227e-05, "loss": 0.39283594489097595, "step": 2268 }, { "epoch": 0.5606622189276007, "grad_norm": 1.1660531828081215, "learning_rate": 1.724015932057452e-05, "loss": 0.35591334104537964, "step": 2269 }, { "epoch": 0.5609093155423771, "grad_norm": 1.2037525086079366, "learning_rate": 1.72373403810507e-05, "loss": 0.38374876976013184, "step": 2270 }, { "epoch": 0.5611564121571534, "grad_norm": 1.2223255756186753, "learning_rate": 1.7234520233326373e-05, "loss": 0.3522288203239441, "step": 2271 }, { "epoch": 0.5614035087719298, "grad_norm": 1.082159590218156, "learning_rate": 1.7231698877872327e-05, "loss": 0.3919287919998169, "step": 2272 }, { "epoch": 0.5616506053867062, "grad_norm": 1.1162757558865202, "learning_rate": 1.7228876315159558e-05, "loss": 0.33658725023269653, "step": 2273 }, { "epoch": 0.5618977020014826, "grad_norm": 1.0964681971356083, "learning_rate": 1.7226052545659265e-05, "loss": 0.34262579679489136, "step": 2274 }, { "epoch": 0.562144798616259, "grad_norm": 1.2887432461240762, "learning_rate": 1.7223227569842844e-05, "loss": 0.4460541009902954, "step": 2275 }, { "epoch": 0.5623918952310354, "grad_norm": 1.1652540547776444, "learning_rate": 1.7220401388181903e-05, "loss": 0.33422696590423584, "step": 2276 }, { "epoch": 0.5626389918458117, "grad_norm": 1.1965700026961235, "learning_rate": 1.7217574001148243e-05, "loss": 0.38274967670440674, "step": 2277 }, { "epoch": 0.5628860884605881, "grad_norm": 1.1537756925674072, "learning_rate": 1.721474540921386e-05, "loss": 0.39069175720214844, "step": 2278 }, { "epoch": 0.5631331850753645, "grad_norm": 1.1764201692109857, "learning_rate": 1.7211915612850968e-05, "loss": 0.3750770092010498, "step": 2279 }, { "epoch": 0.5633802816901409, "grad_norm": 1.0981952799076324, "learning_rate": 1.720908461253197e-05, "loss": 0.3571769893169403, "step": 2280 }, { "epoch": 0.5636273783049173, "grad_norm": 1.2288384739124647, "learning_rate": 1.7206252408729474e-05, "loss": 0.37398791313171387, "step": 2281 }, { "epoch": 0.5638744749196936, "grad_norm": 1.1791025610868568, "learning_rate": 1.7203419001916284e-05, "loss": 0.3925120532512665, "step": 2282 }, { "epoch": 0.56412157153447, "grad_norm": 1.2517452829449114, "learning_rate": 1.720058439256541e-05, "loss": 0.38295778632164, "step": 2283 }, { "epoch": 0.5643686681492464, "grad_norm": 1.2194103092649629, "learning_rate": 1.7197748581150058e-05, "loss": 0.40938568115234375, "step": 2284 }, { "epoch": 0.5646157647640228, "grad_norm": 1.1479791982571463, "learning_rate": 1.719491156814365e-05, "loss": 0.3768290877342224, "step": 2285 }, { "epoch": 0.5648628613787992, "grad_norm": 1.0704078344035515, "learning_rate": 1.719207335401978e-05, "loss": 0.3614666759967804, "step": 2286 }, { "epoch": 0.5651099579935754, "grad_norm": 1.2311709003186224, "learning_rate": 1.718923393925227e-05, "loss": 0.41920825839042664, "step": 2287 }, { "epoch": 0.5653570546083518, "grad_norm": 1.0643329171670421, "learning_rate": 1.718639332431513e-05, "loss": 0.3724663555622101, "step": 2288 }, { "epoch": 0.5656041512231282, "grad_norm": 1.1562331390473144, "learning_rate": 1.7183551509682566e-05, "loss": 0.36202847957611084, "step": 2289 }, { "epoch": 0.5658512478379046, "grad_norm": 1.132190663005746, "learning_rate": 1.7180708495828996e-05, "loss": 0.39053064584732056, "step": 2290 }, { "epoch": 0.566098344452681, "grad_norm": 1.143390981598221, "learning_rate": 1.717786428322903e-05, "loss": 0.3594382703304291, "step": 2291 }, { "epoch": 0.5663454410674573, "grad_norm": 1.078733781571771, "learning_rate": 1.7175018872357484e-05, "loss": 0.3513562083244324, "step": 2292 }, { "epoch": 0.5665925376822337, "grad_norm": 1.2229810728979995, "learning_rate": 1.7172172263689364e-05, "loss": 0.3607688546180725, "step": 2293 }, { "epoch": 0.5668396342970101, "grad_norm": 1.2427212246629904, "learning_rate": 1.7169324457699888e-05, "loss": 0.384593665599823, "step": 2294 }, { "epoch": 0.5670867309117865, "grad_norm": 1.2148789746788988, "learning_rate": 1.7166475454864466e-05, "loss": 0.3847053050994873, "step": 2295 }, { "epoch": 0.5673338275265629, "grad_norm": 1.1277826344988808, "learning_rate": 1.7163625255658712e-05, "loss": 0.4161651134490967, "step": 2296 }, { "epoch": 0.5675809241413393, "grad_norm": 1.0193954035760266, "learning_rate": 1.7160773860558433e-05, "loss": 0.3054035007953644, "step": 2297 }, { "epoch": 0.5678280207561156, "grad_norm": 1.245762863833713, "learning_rate": 1.7157921270039644e-05, "loss": 0.3447827100753784, "step": 2298 }, { "epoch": 0.568075117370892, "grad_norm": 1.172427299692085, "learning_rate": 1.7155067484578558e-05, "loss": 0.3610656261444092, "step": 2299 }, { "epoch": 0.5683222139856684, "grad_norm": 1.1097199460635978, "learning_rate": 1.7152212504651582e-05, "loss": 0.35636669397354126, "step": 2300 }, { "epoch": 0.5685693106004448, "grad_norm": 1.3146344506294507, "learning_rate": 1.714935633073533e-05, "loss": 0.41976362466812134, "step": 2301 }, { "epoch": 0.5688164072152212, "grad_norm": 1.0535547966863041, "learning_rate": 1.7146498963306607e-05, "loss": 0.35777074098587036, "step": 2302 }, { "epoch": 0.5690635038299975, "grad_norm": 1.2646097249167174, "learning_rate": 1.7143640402842418e-05, "loss": 0.38767024874687195, "step": 2303 }, { "epoch": 0.5693106004447739, "grad_norm": 1.1834536697997833, "learning_rate": 1.714078064981998e-05, "loss": 0.33666062355041504, "step": 2304 }, { "epoch": 0.5695576970595503, "grad_norm": 1.2020809944543496, "learning_rate": 1.7137919704716693e-05, "loss": 0.3453451097011566, "step": 2305 }, { "epoch": 0.5698047936743267, "grad_norm": 2.0030319441616777, "learning_rate": 1.713505756801017e-05, "loss": 0.4495145380496979, "step": 2306 }, { "epoch": 0.5700518902891031, "grad_norm": 1.3357451466715486, "learning_rate": 1.7132194240178207e-05, "loss": 0.40793660283088684, "step": 2307 }, { "epoch": 0.5702989869038794, "grad_norm": 1.3467510001306837, "learning_rate": 1.712932972169881e-05, "loss": 0.3893353343009949, "step": 2308 }, { "epoch": 0.5705460835186558, "grad_norm": 1.141768742433526, "learning_rate": 1.7126464013050187e-05, "loss": 0.3360096216201782, "step": 2309 }, { "epoch": 0.5707931801334322, "grad_norm": 1.251238604205216, "learning_rate": 1.712359711471073e-05, "loss": 0.4386434555053711, "step": 2310 }, { "epoch": 0.5710402767482086, "grad_norm": 1.3346448689233603, "learning_rate": 1.7120729027159044e-05, "loss": 0.3963259160518646, "step": 2311 }, { "epoch": 0.571287373362985, "grad_norm": 1.2660063013025664, "learning_rate": 1.7117859750873927e-05, "loss": 0.3940209150314331, "step": 2312 }, { "epoch": 0.5715344699777613, "grad_norm": 1.232934926727149, "learning_rate": 1.7114989286334376e-05, "loss": 0.33443087339401245, "step": 2313 }, { "epoch": 0.5717815665925376, "grad_norm": 1.175160705141853, "learning_rate": 1.7112117634019582e-05, "loss": 0.37766212224960327, "step": 2314 }, { "epoch": 0.572028663207314, "grad_norm": 1.4762258801880181, "learning_rate": 1.710924479440894e-05, "loss": 0.3871889114379883, "step": 2315 }, { "epoch": 0.5722757598220904, "grad_norm": 1.1942798776973667, "learning_rate": 1.7106370767982046e-05, "loss": 0.3214360475540161, "step": 2316 }, { "epoch": 0.5725228564368668, "grad_norm": 1.1692307333331842, "learning_rate": 1.7103495555218682e-05, "loss": 0.37184715270996094, "step": 2317 }, { "epoch": 0.5727699530516432, "grad_norm": 1.2024094357793411, "learning_rate": 1.710061915659884e-05, "loss": 0.41286998987197876, "step": 2318 }, { "epoch": 0.5730170496664195, "grad_norm": 1.1705047503748942, "learning_rate": 1.709774157260271e-05, "loss": 0.3355306386947632, "step": 2319 }, { "epoch": 0.5732641462811959, "grad_norm": 1.2944519711651645, "learning_rate": 1.7094862803710665e-05, "loss": 0.3905545473098755, "step": 2320 }, { "epoch": 0.5735112428959723, "grad_norm": 1.1691493329541518, "learning_rate": 1.7091982850403294e-05, "loss": 0.3817211985588074, "step": 2321 }, { "epoch": 0.5737583395107487, "grad_norm": 1.1724830222809113, "learning_rate": 1.7089101713161374e-05, "loss": 0.35238149762153625, "step": 2322 }, { "epoch": 0.5740054361255251, "grad_norm": 1.1287607095953465, "learning_rate": 1.7086219392465877e-05, "loss": 0.3704782724380493, "step": 2323 }, { "epoch": 0.5742525327403014, "grad_norm": 1.2413462386302836, "learning_rate": 1.7083335888797986e-05, "loss": 0.3760336637496948, "step": 2324 }, { "epoch": 0.5744996293550778, "grad_norm": 1.3627116753908197, "learning_rate": 1.7080451202639066e-05, "loss": 0.38401710987091064, "step": 2325 }, { "epoch": 0.5747467259698542, "grad_norm": 1.167764432569415, "learning_rate": 1.7077565334470686e-05, "loss": 0.35704630613327026, "step": 2326 }, { "epoch": 0.5749938225846306, "grad_norm": 1.2111537382016822, "learning_rate": 1.7074678284774616e-05, "loss": 0.41331297159194946, "step": 2327 }, { "epoch": 0.575240919199407, "grad_norm": 1.1022019790795359, "learning_rate": 1.7071790054032825e-05, "loss": 0.35165777802467346, "step": 2328 }, { "epoch": 0.5754880158141833, "grad_norm": 1.204104387188851, "learning_rate": 1.706890064272746e-05, "loss": 0.3628162443637848, "step": 2329 }, { "epoch": 0.5757351124289597, "grad_norm": 1.2635750175292149, "learning_rate": 1.7066010051340886e-05, "loss": 0.36394256353378296, "step": 2330 }, { "epoch": 0.5759822090437361, "grad_norm": 1.125829946333893, "learning_rate": 1.7063118280355656e-05, "loss": 0.34179574251174927, "step": 2331 }, { "epoch": 0.5762293056585125, "grad_norm": 1.2522118521032963, "learning_rate": 1.7060225330254526e-05, "loss": 0.40710997581481934, "step": 2332 }, { "epoch": 0.5764764022732889, "grad_norm": 1.1296976000841212, "learning_rate": 1.7057331201520443e-05, "loss": 0.39779332280158997, "step": 2333 }, { "epoch": 0.5767234988880652, "grad_norm": 1.1747418094487079, "learning_rate": 1.705443589463655e-05, "loss": 0.37292659282684326, "step": 2334 }, { "epoch": 0.5769705955028416, "grad_norm": 1.024625819019286, "learning_rate": 1.7051539410086195e-05, "loss": 0.32032373547554016, "step": 2335 }, { "epoch": 0.577217692117618, "grad_norm": 1.1483104392605117, "learning_rate": 1.7048641748352908e-05, "loss": 0.3196156620979309, "step": 2336 }, { "epoch": 0.5774647887323944, "grad_norm": 1.1429345559383255, "learning_rate": 1.704574290992043e-05, "loss": 0.3959848880767822, "step": 2337 }, { "epoch": 0.5777118853471708, "grad_norm": 1.2837764333030925, "learning_rate": 1.704284289527269e-05, "loss": 0.40730157494544983, "step": 2338 }, { "epoch": 0.5779589819619472, "grad_norm": 1.1328744658746117, "learning_rate": 1.703994170489382e-05, "loss": 0.3485880196094513, "step": 2339 }, { "epoch": 0.5782060785767235, "grad_norm": 1.243199262831143, "learning_rate": 1.7037039339268137e-05, "loss": 0.376227468252182, "step": 2340 }, { "epoch": 0.5784531751914999, "grad_norm": 1.0497504590143425, "learning_rate": 1.703413579888017e-05, "loss": 0.3608759045600891, "step": 2341 }, { "epoch": 0.5787002718062763, "grad_norm": 1.1974426643742349, "learning_rate": 1.7031231084214633e-05, "loss": 0.34377044439315796, "step": 2342 }, { "epoch": 0.5789473684210527, "grad_norm": 1.132965367009995, "learning_rate": 1.7028325195756435e-05, "loss": 0.3879219889640808, "step": 2343 }, { "epoch": 0.579194465035829, "grad_norm": 1.077156147494961, "learning_rate": 1.7025418133990686e-05, "loss": 0.3454912602901459, "step": 2344 }, { "epoch": 0.5794415616506053, "grad_norm": 1.1311755180117742, "learning_rate": 1.702250989940269e-05, "loss": 0.3324621915817261, "step": 2345 }, { "epoch": 0.5796886582653817, "grad_norm": 1.1914526133172303, "learning_rate": 1.701960049247795e-05, "loss": 0.3922058939933777, "step": 2346 }, { "epoch": 0.5799357548801581, "grad_norm": 1.089751193477827, "learning_rate": 1.701668991370216e-05, "loss": 0.3374705910682678, "step": 2347 }, { "epoch": 0.5801828514949345, "grad_norm": 1.2443548003184561, "learning_rate": 1.7013778163561218e-05, "loss": 0.35062628984451294, "step": 2348 }, { "epoch": 0.5804299481097109, "grad_norm": 1.2481981158408315, "learning_rate": 1.70108652425412e-05, "loss": 0.4068008065223694, "step": 2349 }, { "epoch": 0.5806770447244872, "grad_norm": 1.3551592411130648, "learning_rate": 1.700795115112839e-05, "loss": 0.4351794123649597, "step": 2350 }, { "epoch": 0.5809241413392636, "grad_norm": 1.1481116852764317, "learning_rate": 1.7005035889809276e-05, "loss": 0.35279303789138794, "step": 2351 }, { "epoch": 0.58117123795404, "grad_norm": 1.1947852116596913, "learning_rate": 1.7002119459070522e-05, "loss": 0.38611119985580444, "step": 2352 }, { "epoch": 0.5814183345688164, "grad_norm": 1.1939736987149447, "learning_rate": 1.6999201859399002e-05, "loss": 0.4146774709224701, "step": 2353 }, { "epoch": 0.5816654311835928, "grad_norm": 1.0645363660851255, "learning_rate": 1.6996283091281775e-05, "loss": 0.33486878871917725, "step": 2354 }, { "epoch": 0.5819125277983692, "grad_norm": 1.1474076894065715, "learning_rate": 1.6993363155206104e-05, "loss": 0.3357577919960022, "step": 2355 }, { "epoch": 0.5821596244131455, "grad_norm": 1.1816708077388567, "learning_rate": 1.699044205165944e-05, "loss": 0.3675197660923004, "step": 2356 }, { "epoch": 0.5824067210279219, "grad_norm": 1.1260200595640164, "learning_rate": 1.698751978112943e-05, "loss": 0.32056674361228943, "step": 2357 }, { "epoch": 0.5826538176426983, "grad_norm": 1.1900041044266856, "learning_rate": 1.6984596344103922e-05, "loss": 0.3801029622554779, "step": 2358 }, { "epoch": 0.5829009142574747, "grad_norm": 1.1758108918746946, "learning_rate": 1.6981671741070953e-05, "loss": 0.3833216428756714, "step": 2359 }, { "epoch": 0.5831480108722511, "grad_norm": 1.1010815073982287, "learning_rate": 1.697874597251875e-05, "loss": 0.3436390161514282, "step": 2360 }, { "epoch": 0.5833951074870274, "grad_norm": 1.1176990775326459, "learning_rate": 1.697581903893575e-05, "loss": 0.3394116461277008, "step": 2361 }, { "epoch": 0.5836422041018038, "grad_norm": 1.122034424216045, "learning_rate": 1.6972890940810568e-05, "loss": 0.3356654942035675, "step": 2362 }, { "epoch": 0.5838893007165802, "grad_norm": 1.3098441240488543, "learning_rate": 1.696996167863202e-05, "loss": 0.3655951917171478, "step": 2363 }, { "epoch": 0.5841363973313566, "grad_norm": 1.2076988818455658, "learning_rate": 1.696703125288912e-05, "loss": 0.36265021562576294, "step": 2364 }, { "epoch": 0.584383493946133, "grad_norm": 1.131424577007748, "learning_rate": 1.6964099664071072e-05, "loss": 0.3278941214084625, "step": 2365 }, { "epoch": 0.5846305905609093, "grad_norm": 1.3510720509136755, "learning_rate": 1.6961166912667274e-05, "loss": 0.36086326837539673, "step": 2366 }, { "epoch": 0.5848776871756857, "grad_norm": 1.070403802444096, "learning_rate": 1.695823299916732e-05, "loss": 0.3582749366760254, "step": 2367 }, { "epoch": 0.5851247837904621, "grad_norm": 1.2233970107712004, "learning_rate": 1.6955297924060997e-05, "loss": 0.4102901816368103, "step": 2368 }, { "epoch": 0.5853718804052385, "grad_norm": 1.2193436168898464, "learning_rate": 1.6952361687838282e-05, "loss": 0.34034329652786255, "step": 2369 }, { "epoch": 0.5856189770200149, "grad_norm": 1.1646181451959499, "learning_rate": 1.6949424290989356e-05, "loss": 0.31324025988578796, "step": 2370 }, { "epoch": 0.5858660736347912, "grad_norm": 1.202525829103735, "learning_rate": 1.6946485734004583e-05, "loss": 0.36421990394592285, "step": 2371 }, { "epoch": 0.5861131702495675, "grad_norm": 1.1714998459723767, "learning_rate": 1.6943546017374533e-05, "loss": 0.3916762173175812, "step": 2372 }, { "epoch": 0.586360266864344, "grad_norm": 1.2880091876729354, "learning_rate": 1.6940605141589948e-05, "loss": 0.4140579104423523, "step": 2373 }, { "epoch": 0.5866073634791203, "grad_norm": 1.2320172337996231, "learning_rate": 1.693766310714179e-05, "loss": 0.3733428120613098, "step": 2374 }, { "epoch": 0.5868544600938967, "grad_norm": 1.2256917733852504, "learning_rate": 1.69347199145212e-05, "loss": 0.36918002367019653, "step": 2375 }, { "epoch": 0.5871015567086731, "grad_norm": 1.1247567237437144, "learning_rate": 1.693177556421951e-05, "loss": 0.344479501247406, "step": 2376 }, { "epoch": 0.5873486533234494, "grad_norm": 1.2379127793895113, "learning_rate": 1.6928830056728248e-05, "loss": 0.38070735335350037, "step": 2377 }, { "epoch": 0.5875957499382258, "grad_norm": 1.0873184389559896, "learning_rate": 1.692588339253914e-05, "loss": 0.36695659160614014, "step": 2378 }, { "epoch": 0.5878428465530022, "grad_norm": 1.2031026271363254, "learning_rate": 1.6922935572144104e-05, "loss": 0.3387022912502289, "step": 2379 }, { "epoch": 0.5880899431677786, "grad_norm": 1.0794471810852917, "learning_rate": 1.6919986596035244e-05, "loss": 0.3504478931427002, "step": 2380 }, { "epoch": 0.588337039782555, "grad_norm": 1.1196507525232096, "learning_rate": 1.6917036464704863e-05, "loss": 0.37896841764450073, "step": 2381 }, { "epoch": 0.5885841363973313, "grad_norm": 1.156819367596473, "learning_rate": 1.6914085178645456e-05, "loss": 0.33982011675834656, "step": 2382 }, { "epoch": 0.5888312330121077, "grad_norm": 1.132096284158785, "learning_rate": 1.6911132738349708e-05, "loss": 0.38047030568122864, "step": 2383 }, { "epoch": 0.5890783296268841, "grad_norm": 1.2969717473450983, "learning_rate": 1.6908179144310505e-05, "loss": 0.4626803398132324, "step": 2384 }, { "epoch": 0.5893254262416605, "grad_norm": 1.2108978830866133, "learning_rate": 1.690522439702091e-05, "loss": 0.33546745777130127, "step": 2385 }, { "epoch": 0.5895725228564369, "grad_norm": 1.096356282460716, "learning_rate": 1.69022684969742e-05, "loss": 0.3767315149307251, "step": 2386 }, { "epoch": 0.5898196194712132, "grad_norm": 1.3801178090504893, "learning_rate": 1.6899311444663823e-05, "loss": 0.40554893016815186, "step": 2387 }, { "epoch": 0.5900667160859896, "grad_norm": 1.161218511917818, "learning_rate": 1.6896353240583433e-05, "loss": 0.370114803314209, "step": 2388 }, { "epoch": 0.590313812700766, "grad_norm": 1.190394137295463, "learning_rate": 1.6893393885226873e-05, "loss": 0.36675912141799927, "step": 2389 }, { "epoch": 0.5905609093155424, "grad_norm": 1.091887722597731, "learning_rate": 1.6890433379088173e-05, "loss": 0.37551039457321167, "step": 2390 }, { "epoch": 0.5908080059303188, "grad_norm": 1.2019847216778357, "learning_rate": 1.688747172266156e-05, "loss": 0.39565151929855347, "step": 2391 }, { "epoch": 0.5910551025450951, "grad_norm": 1.2049766022756927, "learning_rate": 1.688450891644146e-05, "loss": 0.348932683467865, "step": 2392 }, { "epoch": 0.5913021991598715, "grad_norm": 1.2560193438173664, "learning_rate": 1.6881544960922474e-05, "loss": 0.37328505516052246, "step": 2393 }, { "epoch": 0.5915492957746479, "grad_norm": 1.1534386537355732, "learning_rate": 1.6878579856599406e-05, "loss": 0.37391921877861023, "step": 2394 }, { "epoch": 0.5917963923894243, "grad_norm": 1.186194146651258, "learning_rate": 1.6875613603967255e-05, "loss": 0.4701418876647949, "step": 2395 }, { "epoch": 0.5920434890042007, "grad_norm": 1.0829725671310553, "learning_rate": 1.6872646203521205e-05, "loss": 0.3510931134223938, "step": 2396 }, { "epoch": 0.5922905856189771, "grad_norm": 1.1541464005873265, "learning_rate": 1.686967765575663e-05, "loss": 0.3573458194732666, "step": 2397 }, { "epoch": 0.5925376822337534, "grad_norm": 1.0851749479567423, "learning_rate": 1.6866707961169104e-05, "loss": 0.33705276250839233, "step": 2398 }, { "epoch": 0.5927847788485298, "grad_norm": 1.0544228557624642, "learning_rate": 1.6863737120254383e-05, "loss": 0.33868542313575745, "step": 2399 }, { "epoch": 0.5930318754633062, "grad_norm": 1.240596966169715, "learning_rate": 1.6860765133508422e-05, "loss": 0.38583511114120483, "step": 2400 }, { "epoch": 0.5932789720780826, "grad_norm": 1.166223476193178, "learning_rate": 1.685779200142736e-05, "loss": 0.38331425189971924, "step": 2401 }, { "epoch": 0.593526068692859, "grad_norm": 1.1621450326841105, "learning_rate": 1.6854817724507534e-05, "loss": 0.3276634216308594, "step": 2402 }, { "epoch": 0.5937731653076352, "grad_norm": 1.267372511750744, "learning_rate": 1.685184230324547e-05, "loss": 0.4331716299057007, "step": 2403 }, { "epoch": 0.5940202619224116, "grad_norm": 1.2447788717972954, "learning_rate": 1.684886573813788e-05, "loss": 0.36472249031066895, "step": 2404 }, { "epoch": 0.594267358537188, "grad_norm": 1.3370874820163898, "learning_rate": 1.6845888029681682e-05, "loss": 0.44278398156166077, "step": 2405 }, { "epoch": 0.5945144551519644, "grad_norm": 1.191466013920373, "learning_rate": 1.6842909178373964e-05, "loss": 0.3557663559913635, "step": 2406 }, { "epoch": 0.5947615517667408, "grad_norm": 1.1775868521765116, "learning_rate": 1.6839929184712015e-05, "loss": 0.34036147594451904, "step": 2407 }, { "epoch": 0.5950086483815171, "grad_norm": 1.060600979228856, "learning_rate": 1.6836948049193318e-05, "loss": 0.3185519576072693, "step": 2408 }, { "epoch": 0.5952557449962935, "grad_norm": 1.199349334482163, "learning_rate": 1.6833965772315544e-05, "loss": 0.3690495491027832, "step": 2409 }, { "epoch": 0.5955028416110699, "grad_norm": 1.1991626080492617, "learning_rate": 1.6830982354576556e-05, "loss": 0.3524100184440613, "step": 2410 }, { "epoch": 0.5957499382258463, "grad_norm": 1.1829460511989192, "learning_rate": 1.6827997796474398e-05, "loss": 0.38231557607650757, "step": 2411 }, { "epoch": 0.5959970348406227, "grad_norm": 1.266898391021993, "learning_rate": 1.6825012098507315e-05, "loss": 0.39778590202331543, "step": 2412 }, { "epoch": 0.596244131455399, "grad_norm": 1.1958345654369775, "learning_rate": 1.6822025261173743e-05, "loss": 0.35540634393692017, "step": 2413 }, { "epoch": 0.5964912280701754, "grad_norm": 1.0376611616052356, "learning_rate": 1.68190372849723e-05, "loss": 0.32300135493278503, "step": 2414 }, { "epoch": 0.5967383246849518, "grad_norm": 1.2139119669387595, "learning_rate": 1.6816048170401796e-05, "loss": 0.37650012969970703, "step": 2415 }, { "epoch": 0.5969854212997282, "grad_norm": 1.1465965707072752, "learning_rate": 1.681305791796124e-05, "loss": 0.3523452877998352, "step": 2416 }, { "epoch": 0.5972325179145046, "grad_norm": 1.16134130869985, "learning_rate": 1.681006652814982e-05, "loss": 0.3501370847225189, "step": 2417 }, { "epoch": 0.597479614529281, "grad_norm": 1.2920169364864302, "learning_rate": 1.6807074001466918e-05, "loss": 0.4097038507461548, "step": 2418 }, { "epoch": 0.5977267111440573, "grad_norm": 1.1430838510716699, "learning_rate": 1.6804080338412108e-05, "loss": 0.35407108068466187, "step": 2419 }, { "epoch": 0.5979738077588337, "grad_norm": 1.2103055282361797, "learning_rate": 1.680108553948515e-05, "loss": 0.41356319189071655, "step": 2420 }, { "epoch": 0.5982209043736101, "grad_norm": 1.3310753326097315, "learning_rate": 1.6798089605185994e-05, "loss": 0.30788910388946533, "step": 2421 }, { "epoch": 0.5984680009883865, "grad_norm": 1.2080698379486268, "learning_rate": 1.6795092536014784e-05, "loss": 0.3723036050796509, "step": 2422 }, { "epoch": 0.5987150976031629, "grad_norm": 1.1537676391113278, "learning_rate": 1.679209433247185e-05, "loss": 0.3783816695213318, "step": 2423 }, { "epoch": 0.5989621942179392, "grad_norm": 1.2252611119335053, "learning_rate": 1.6789094995057713e-05, "loss": 0.3377401828765869, "step": 2424 }, { "epoch": 0.5992092908327156, "grad_norm": 1.0277573306542591, "learning_rate": 1.6786094524273074e-05, "loss": 0.3020663261413574, "step": 2425 }, { "epoch": 0.599456387447492, "grad_norm": 1.25425451485354, "learning_rate": 1.6783092920618843e-05, "loss": 0.3875995874404907, "step": 2426 }, { "epoch": 0.5997034840622684, "grad_norm": 1.0993588420083005, "learning_rate": 1.6780090184596097e-05, "loss": 0.3897002935409546, "step": 2427 }, { "epoch": 0.5999505806770448, "grad_norm": 1.115105861588339, "learning_rate": 1.6777086316706114e-05, "loss": 0.3183092474937439, "step": 2428 }, { "epoch": 0.600197677291821, "grad_norm": 1.1459262693880763, "learning_rate": 1.6774081317450365e-05, "loss": 0.3702682852745056, "step": 2429 }, { "epoch": 0.6004447739065975, "grad_norm": 1.2987746063589913, "learning_rate": 1.67710751873305e-05, "loss": 0.361902117729187, "step": 2430 }, { "epoch": 0.6006918705213738, "grad_norm": 1.108655457763827, "learning_rate": 1.6768067926848363e-05, "loss": 0.3821706771850586, "step": 2431 }, { "epoch": 0.6009389671361502, "grad_norm": 1.1203991938250157, "learning_rate": 1.6765059536505984e-05, "loss": 0.364109069108963, "step": 2432 }, { "epoch": 0.6011860637509266, "grad_norm": 1.1041704935579588, "learning_rate": 1.6762050016805584e-05, "loss": 0.3738134503364563, "step": 2433 }, { "epoch": 0.6014331603657029, "grad_norm": 1.0983580896993437, "learning_rate": 1.6759039368249576e-05, "loss": 0.3408236801624298, "step": 2434 }, { "epoch": 0.6016802569804793, "grad_norm": 1.268129058466345, "learning_rate": 1.675602759134055e-05, "loss": 0.37823358178138733, "step": 2435 }, { "epoch": 0.6019273535952557, "grad_norm": 1.078212149438958, "learning_rate": 1.6753014686581294e-05, "loss": 0.37693706154823303, "step": 2436 }, { "epoch": 0.6021744502100321, "grad_norm": 1.2332506147608504, "learning_rate": 1.6750000654474786e-05, "loss": 0.3646615445613861, "step": 2437 }, { "epoch": 0.6024215468248085, "grad_norm": 1.3011186509498263, "learning_rate": 1.674698549552418e-05, "loss": 0.3988953232765198, "step": 2438 }, { "epoch": 0.6026686434395849, "grad_norm": 1.2615473768065388, "learning_rate": 1.6743969210232835e-05, "loss": 0.3762795329093933, "step": 2439 }, { "epoch": 0.6029157400543612, "grad_norm": 1.1526159012854025, "learning_rate": 1.6740951799104283e-05, "loss": 0.35894960165023804, "step": 2440 }, { "epoch": 0.6031628366691376, "grad_norm": 1.0259379461995144, "learning_rate": 1.673793326264226e-05, "loss": 0.3200739026069641, "step": 2441 }, { "epoch": 0.603409933283914, "grad_norm": 1.1684474058969514, "learning_rate": 1.673491360135066e-05, "loss": 0.3100735545158386, "step": 2442 }, { "epoch": 0.6036570298986904, "grad_norm": 0.9752349445728862, "learning_rate": 1.6731892815733606e-05, "loss": 0.34490907192230225, "step": 2443 }, { "epoch": 0.6039041265134668, "grad_norm": 1.1850135818852277, "learning_rate": 1.6728870906295377e-05, "loss": 0.3459533751010895, "step": 2444 }, { "epoch": 0.6041512231282431, "grad_norm": 1.9024083120723765, "learning_rate": 1.672584787354045e-05, "loss": 0.3356567621231079, "step": 2445 }, { "epoch": 0.6043983197430195, "grad_norm": 1.1831592367951929, "learning_rate": 1.6722823717973493e-05, "loss": 0.3317191004753113, "step": 2446 }, { "epoch": 0.6046454163577959, "grad_norm": 1.3966314607745534, "learning_rate": 1.6719798440099356e-05, "loss": 0.4304991066455841, "step": 2447 }, { "epoch": 0.6048925129725723, "grad_norm": 1.154696069326959, "learning_rate": 1.6716772040423078e-05, "loss": 0.37190455198287964, "step": 2448 }, { "epoch": 0.6051396095873487, "grad_norm": 1.1116828692910992, "learning_rate": 1.671374451944989e-05, "loss": 0.35607171058654785, "step": 2449 }, { "epoch": 0.605386706202125, "grad_norm": 1.2273143404284168, "learning_rate": 1.67107158776852e-05, "loss": 0.3873000741004944, "step": 2450 }, { "epoch": 0.6056338028169014, "grad_norm": 1.378210729763527, "learning_rate": 1.670768611563461e-05, "loss": 0.40146103501319885, "step": 2451 }, { "epoch": 0.6058808994316778, "grad_norm": 1.090139719063189, "learning_rate": 1.6704655233803912e-05, "loss": 0.374654620885849, "step": 2452 }, { "epoch": 0.6061279960464542, "grad_norm": 1.1367424692407757, "learning_rate": 1.670162323269908e-05, "loss": 0.3319157361984253, "step": 2453 }, { "epoch": 0.6063750926612306, "grad_norm": 1.1922673163116573, "learning_rate": 1.669859011282627e-05, "loss": 0.3626279830932617, "step": 2454 }, { "epoch": 0.606622189276007, "grad_norm": 1.2458135105548998, "learning_rate": 1.669555587469184e-05, "loss": 0.4090430438518524, "step": 2455 }, { "epoch": 0.6068692858907833, "grad_norm": 1.2112404069754832, "learning_rate": 1.669252051880232e-05, "loss": 0.3588320016860962, "step": 2456 }, { "epoch": 0.6071163825055597, "grad_norm": 1.2158625578251867, "learning_rate": 1.6689484045664427e-05, "loss": 0.33628177642822266, "step": 2457 }, { "epoch": 0.6073634791203361, "grad_norm": 1.105710433749845, "learning_rate": 1.668644645578508e-05, "loss": 0.32207971811294556, "step": 2458 }, { "epoch": 0.6076105757351125, "grad_norm": 1.197098845977042, "learning_rate": 1.6683407749671365e-05, "loss": 0.41264426708221436, "step": 2459 }, { "epoch": 0.6078576723498889, "grad_norm": 1.2950555955490064, "learning_rate": 1.6680367927830566e-05, "loss": 0.3984009325504303, "step": 2460 }, { "epoch": 0.6081047689646651, "grad_norm": 1.185864834588595, "learning_rate": 1.667732699077015e-05, "loss": 0.38207757472991943, "step": 2461 }, { "epoch": 0.6083518655794415, "grad_norm": 1.1107242024202952, "learning_rate": 1.6674284938997774e-05, "loss": 0.3356426954269409, "step": 2462 }, { "epoch": 0.6085989621942179, "grad_norm": 1.171492533757222, "learning_rate": 1.6671241773021274e-05, "loss": 0.399681031703949, "step": 2463 }, { "epoch": 0.6088460588089943, "grad_norm": 1.2154395607883284, "learning_rate": 1.6668197493348678e-05, "loss": 0.32974809408187866, "step": 2464 }, { "epoch": 0.6090931554237707, "grad_norm": 1.1650914979205385, "learning_rate": 1.6665152100488197e-05, "loss": 0.39567649364471436, "step": 2465 }, { "epoch": 0.609340252038547, "grad_norm": 1.2814291136167444, "learning_rate": 1.6662105594948225e-05, "loss": 0.402656614780426, "step": 2466 }, { "epoch": 0.6095873486533234, "grad_norm": 1.3100617653451434, "learning_rate": 1.6659057977237348e-05, "loss": 0.3681592345237732, "step": 2467 }, { "epoch": 0.6098344452680998, "grad_norm": 1.0328586971068077, "learning_rate": 1.6656009247864333e-05, "loss": 0.3138682246208191, "step": 2468 }, { "epoch": 0.6100815418828762, "grad_norm": 1.2554020473356893, "learning_rate": 1.665295940733814e-05, "loss": 0.40620097517967224, "step": 2469 }, { "epoch": 0.6103286384976526, "grad_norm": 1.1584858670613238, "learning_rate": 1.6649908456167906e-05, "loss": 0.38231393694877625, "step": 2470 }, { "epoch": 0.6105757351124289, "grad_norm": 1.135096150972597, "learning_rate": 1.6646856394862953e-05, "loss": 0.3998323976993561, "step": 2471 }, { "epoch": 0.6108228317272053, "grad_norm": 1.1527903338359085, "learning_rate": 1.6643803223932793e-05, "loss": 0.3661813735961914, "step": 2472 }, { "epoch": 0.6110699283419817, "grad_norm": 1.2060725807022337, "learning_rate": 1.6640748943887126e-05, "loss": 0.36683154106140137, "step": 2473 }, { "epoch": 0.6113170249567581, "grad_norm": 1.2147359799391713, "learning_rate": 1.6637693555235825e-05, "loss": 0.40502142906188965, "step": 2474 }, { "epoch": 0.6115641215715345, "grad_norm": 1.0826964715979701, "learning_rate": 1.6634637058488966e-05, "loss": 0.3513891100883484, "step": 2475 }, { "epoch": 0.6118112181863109, "grad_norm": 1.1466826165672872, "learning_rate": 1.6631579454156794e-05, "loss": 0.3901464343070984, "step": 2476 }, { "epoch": 0.6120583148010872, "grad_norm": 1.0958607975406558, "learning_rate": 1.6628520742749744e-05, "loss": 0.33006125688552856, "step": 2477 }, { "epoch": 0.6123054114158636, "grad_norm": 1.1960012649213265, "learning_rate": 1.662546092477844e-05, "loss": 0.34521400928497314, "step": 2478 }, { "epoch": 0.61255250803064, "grad_norm": 1.047501920165884, "learning_rate": 1.662240000075369e-05, "loss": 0.3385639190673828, "step": 2479 }, { "epoch": 0.6127996046454164, "grad_norm": 1.0837904652386174, "learning_rate": 1.6619337971186476e-05, "loss": 0.33112767338752747, "step": 2480 }, { "epoch": 0.6130467012601928, "grad_norm": 1.3725618797519312, "learning_rate": 1.661627483658798e-05, "loss": 0.3788571357727051, "step": 2481 }, { "epoch": 0.6132937978749691, "grad_norm": 1.1601112052534561, "learning_rate": 1.661321059746956e-05, "loss": 0.33211565017700195, "step": 2482 }, { "epoch": 0.6135408944897455, "grad_norm": 1.071531932586248, "learning_rate": 1.6610145254342756e-05, "loss": 0.34540295600891113, "step": 2483 }, { "epoch": 0.6137879911045219, "grad_norm": 1.1955203037814244, "learning_rate": 1.6607078807719298e-05, "loss": 0.36844027042388916, "step": 2484 }, { "epoch": 0.6140350877192983, "grad_norm": 1.0765425822490167, "learning_rate": 1.6604011258111097e-05, "loss": 0.31019771099090576, "step": 2485 }, { "epoch": 0.6142821843340747, "grad_norm": 1.1978159432645579, "learning_rate": 1.6600942606030258e-05, "loss": 0.3402952551841736, "step": 2486 }, { "epoch": 0.614529280948851, "grad_norm": 1.29930682328531, "learning_rate": 1.6597872851989048e-05, "loss": 0.40251249074935913, "step": 2487 }, { "epoch": 0.6147763775636274, "grad_norm": 1.299375931538419, "learning_rate": 1.659480199649994e-05, "loss": 0.4136693477630615, "step": 2488 }, { "epoch": 0.6150234741784038, "grad_norm": 1.1889553799887411, "learning_rate": 1.6591730040075576e-05, "loss": 0.40193265676498413, "step": 2489 }, { "epoch": 0.6152705707931801, "grad_norm": 1.1824087553925127, "learning_rate": 1.6588656983228793e-05, "loss": 0.3963184952735901, "step": 2490 }, { "epoch": 0.6155176674079565, "grad_norm": 1.1727426149479125, "learning_rate": 1.6585582826472605e-05, "loss": 0.3269711136817932, "step": 2491 }, { "epoch": 0.6157647640227328, "grad_norm": 1.0798978918427011, "learning_rate": 1.6582507570320214e-05, "loss": 0.3257615864276886, "step": 2492 }, { "epoch": 0.6160118606375092, "grad_norm": 1.2194544295547625, "learning_rate": 1.6579431215284998e-05, "loss": 0.36055755615234375, "step": 2493 }, { "epoch": 0.6162589572522856, "grad_norm": 1.138589385917832, "learning_rate": 1.6576353761880526e-05, "loss": 0.3554808497428894, "step": 2494 }, { "epoch": 0.616506053867062, "grad_norm": 1.1358403641464014, "learning_rate": 1.6573275210620544e-05, "loss": 0.3043570816516876, "step": 2495 }, { "epoch": 0.6167531504818384, "grad_norm": 1.0409620842783764, "learning_rate": 1.6570195562018994e-05, "loss": 0.3224186301231384, "step": 2496 }, { "epoch": 0.6170002470966148, "grad_norm": 1.2365660354962802, "learning_rate": 1.656711481658998e-05, "loss": 0.3871905207633972, "step": 2497 }, { "epoch": 0.6172473437113911, "grad_norm": 1.2124503913155822, "learning_rate": 1.656403297484781e-05, "loss": 0.3621279001235962, "step": 2498 }, { "epoch": 0.6174944403261675, "grad_norm": 1.309938741899681, "learning_rate": 1.6560950037306962e-05, "loss": 0.354715496301651, "step": 2499 }, { "epoch": 0.6177415369409439, "grad_norm": 1.0753074445226969, "learning_rate": 1.6557866004482105e-05, "loss": 0.3021288514137268, "step": 2500 }, { "epoch": 0.6179886335557203, "grad_norm": 1.2253672084134595, "learning_rate": 1.6554780876888085e-05, "loss": 0.43510276079177856, "step": 2501 }, { "epoch": 0.6182357301704967, "grad_norm": 1.2967478468356628, "learning_rate": 1.6551694655039926e-05, "loss": 0.3729448914527893, "step": 2502 }, { "epoch": 0.618482826785273, "grad_norm": 1.2148938282169681, "learning_rate": 1.6548607339452853e-05, "loss": 0.35247790813446045, "step": 2503 }, { "epoch": 0.6187299234000494, "grad_norm": 1.2079406521924303, "learning_rate": 1.6545518930642253e-05, "loss": 0.3848533034324646, "step": 2504 }, { "epoch": 0.6189770200148258, "grad_norm": 1.2096083680764511, "learning_rate": 1.654242942912371e-05, "loss": 0.345691442489624, "step": 2505 }, { "epoch": 0.6192241166296022, "grad_norm": 1.4375456941125946, "learning_rate": 1.6539338835412983e-05, "loss": 0.4041849374771118, "step": 2506 }, { "epoch": 0.6194712132443786, "grad_norm": 1.181030718916289, "learning_rate": 1.6536247150026016e-05, "loss": 0.34141361713409424, "step": 2507 }, { "epoch": 0.6197183098591549, "grad_norm": 1.1551971262784073, "learning_rate": 1.653315437347893e-05, "loss": 0.35654884576797485, "step": 2508 }, { "epoch": 0.6199654064739313, "grad_norm": 1.0653621127209558, "learning_rate": 1.6530060506288042e-05, "loss": 0.2987525463104248, "step": 2509 }, { "epoch": 0.6202125030887077, "grad_norm": 1.1887735692420645, "learning_rate": 1.6526965548969833e-05, "loss": 0.32144540548324585, "step": 2510 }, { "epoch": 0.6204595997034841, "grad_norm": 1.1384432556680293, "learning_rate": 1.652386950204098e-05, "loss": 0.31348997354507446, "step": 2511 }, { "epoch": 0.6207066963182605, "grad_norm": 1.193685046768494, "learning_rate": 1.652077236601833e-05, "loss": 0.37065866589546204, "step": 2512 }, { "epoch": 0.6209537929330368, "grad_norm": 1.2584823650784276, "learning_rate": 1.651767414141893e-05, "loss": 0.3758401870727539, "step": 2513 }, { "epoch": 0.6212008895478132, "grad_norm": 1.3109896925448197, "learning_rate": 1.6514574828759985e-05, "loss": 0.3583495020866394, "step": 2514 }, { "epoch": 0.6214479861625896, "grad_norm": 1.1814993546994441, "learning_rate": 1.6511474428558896e-05, "loss": 0.3937252461910248, "step": 2515 }, { "epoch": 0.621695082777366, "grad_norm": 1.565270694816824, "learning_rate": 1.650837294133325e-05, "loss": 0.35718029737472534, "step": 2516 }, { "epoch": 0.6219421793921424, "grad_norm": 1.1048737926624053, "learning_rate": 1.6505270367600804e-05, "loss": 0.3682991862297058, "step": 2517 }, { "epoch": 0.6221892760069188, "grad_norm": 1.075820634929744, "learning_rate": 1.6502166707879505e-05, "loss": 0.34530654549598694, "step": 2518 }, { "epoch": 0.622436372621695, "grad_norm": 1.179789321509275, "learning_rate": 1.649906196268747e-05, "loss": 0.40729960799217224, "step": 2519 }, { "epoch": 0.6226834692364714, "grad_norm": 1.2515136709292827, "learning_rate": 1.6495956132543013e-05, "loss": 0.4218754172325134, "step": 2520 }, { "epoch": 0.6229305658512478, "grad_norm": 1.3112177071563915, "learning_rate": 1.6492849217964616e-05, "loss": 0.37504351139068604, "step": 2521 }, { "epoch": 0.6231776624660242, "grad_norm": 1.156854741928561, "learning_rate": 1.6489741219470947e-05, "loss": 0.36261653900146484, "step": 2522 }, { "epoch": 0.6234247590808006, "grad_norm": 1.3292819481665696, "learning_rate": 1.6486632137580854e-05, "loss": 0.40260717272758484, "step": 2523 }, { "epoch": 0.6236718556955769, "grad_norm": 1.1029087795075179, "learning_rate": 1.6483521972813372e-05, "loss": 0.2937977910041809, "step": 2524 }, { "epoch": 0.6239189523103533, "grad_norm": 1.1217202597578324, "learning_rate": 1.6480410725687708e-05, "loss": 0.3647119402885437, "step": 2525 }, { "epoch": 0.6241660489251297, "grad_norm": 1.236720404329141, "learning_rate": 1.6477298396723253e-05, "loss": 0.371451735496521, "step": 2526 }, { "epoch": 0.6244131455399061, "grad_norm": 1.2327536328270619, "learning_rate": 1.6474184986439576e-05, "loss": 0.3740915060043335, "step": 2527 }, { "epoch": 0.6246602421546825, "grad_norm": 1.170756394822542, "learning_rate": 1.6471070495356435e-05, "loss": 0.36188334226608276, "step": 2528 }, { "epoch": 0.6249073387694588, "grad_norm": 1.1148864840964192, "learning_rate": 1.6467954923993757e-05, "loss": 0.32752686738967896, "step": 2529 }, { "epoch": 0.6251544353842352, "grad_norm": 1.1760782532835259, "learning_rate": 1.6464838272871656e-05, "loss": 0.3064236342906952, "step": 2530 }, { "epoch": 0.6254015319990116, "grad_norm": 1.1497967998020893, "learning_rate": 1.6461720542510434e-05, "loss": 0.3937004804611206, "step": 2531 }, { "epoch": 0.625648628613788, "grad_norm": 1.2589760831811314, "learning_rate": 1.645860173343055e-05, "loss": 0.4212881922721863, "step": 2532 }, { "epoch": 0.6258957252285644, "grad_norm": 1.1490421986489596, "learning_rate": 1.6455481846152667e-05, "loss": 0.32843148708343506, "step": 2533 }, { "epoch": 0.6261428218433407, "grad_norm": 1.255522462086548, "learning_rate": 1.645236088119762e-05, "loss": 0.3616219162940979, "step": 2534 }, { "epoch": 0.6263899184581171, "grad_norm": 1.1616000933440314, "learning_rate": 1.6449238839086415e-05, "loss": 0.3685963451862335, "step": 2535 }, { "epoch": 0.6266370150728935, "grad_norm": 1.2799744606613948, "learning_rate": 1.644611572034025e-05, "loss": 0.4159002900123596, "step": 2536 }, { "epoch": 0.6268841116876699, "grad_norm": 1.2846203369192974, "learning_rate": 1.6442991525480498e-05, "loss": 0.3859599828720093, "step": 2537 }, { "epoch": 0.6271312083024463, "grad_norm": 1.1003103649437886, "learning_rate": 1.643986625502871e-05, "loss": 0.3498181104660034, "step": 2538 }, { "epoch": 0.6273783049172227, "grad_norm": 1.1008760832732898, "learning_rate": 1.643673990950662e-05, "loss": 0.31489914655685425, "step": 2539 }, { "epoch": 0.627625401531999, "grad_norm": 2.608925185722187, "learning_rate": 1.6433612489436136e-05, "loss": 0.3049967288970947, "step": 2540 }, { "epoch": 0.6278724981467754, "grad_norm": 1.177411220871684, "learning_rate": 1.643048399533935e-05, "loss": 0.328687459230423, "step": 2541 }, { "epoch": 0.6281195947615518, "grad_norm": 1.1121170520907586, "learning_rate": 1.642735442773854e-05, "loss": 0.35735705494880676, "step": 2542 }, { "epoch": 0.6283666913763282, "grad_norm": 1.2208031608371435, "learning_rate": 1.6424223787156145e-05, "loss": 0.33231571316719055, "step": 2543 }, { "epoch": 0.6286137879911046, "grad_norm": 1.2990120373536098, "learning_rate": 1.6421092074114796e-05, "loss": 0.46404165029525757, "step": 2544 }, { "epoch": 0.6288608846058809, "grad_norm": 1.2784928922292147, "learning_rate": 1.6417959289137308e-05, "loss": 0.408075749874115, "step": 2545 }, { "epoch": 0.6291079812206573, "grad_norm": 1.2598491501273743, "learning_rate": 1.6414825432746662e-05, "loss": 0.3955145478248596, "step": 2546 }, { "epoch": 0.6293550778354337, "grad_norm": 1.3889665960466793, "learning_rate": 1.641169050546602e-05, "loss": 0.4054705500602722, "step": 2547 }, { "epoch": 0.62960217445021, "grad_norm": 1.2014131073266443, "learning_rate": 1.6408554507818734e-05, "loss": 0.354611873626709, "step": 2548 }, { "epoch": 0.6298492710649864, "grad_norm": 1.2181331474563917, "learning_rate": 1.640541744032832e-05, "loss": 0.3250390291213989, "step": 2549 }, { "epoch": 0.6300963676797627, "grad_norm": 1.2115593891209764, "learning_rate": 1.6402279303518485e-05, "loss": 0.3816150426864624, "step": 2550 }, { "epoch": 0.6303434642945391, "grad_norm": 1.2789613753413454, "learning_rate": 1.6399140097913105e-05, "loss": 0.3687470555305481, "step": 2551 }, { "epoch": 0.6305905609093155, "grad_norm": 1.1728724138021969, "learning_rate": 1.6395999824036244e-05, "loss": 0.35885533690452576, "step": 2552 }, { "epoch": 0.6308376575240919, "grad_norm": 1.1900227718205498, "learning_rate": 1.639285848241213e-05, "loss": 0.34945303201675415, "step": 2553 }, { "epoch": 0.6310847541388683, "grad_norm": 1.2888262789053178, "learning_rate": 1.6389716073565188e-05, "loss": 0.436992883682251, "step": 2554 }, { "epoch": 0.6313318507536447, "grad_norm": 1.2123920856075234, "learning_rate": 1.6386572598020003e-05, "loss": 0.39669865369796753, "step": 2555 }, { "epoch": 0.631578947368421, "grad_norm": 1.1415088265432853, "learning_rate": 1.6383428056301355e-05, "loss": 0.3901795744895935, "step": 2556 }, { "epoch": 0.6318260439831974, "grad_norm": 1.2203183999373133, "learning_rate": 1.638028244893418e-05, "loss": 0.3607789874076843, "step": 2557 }, { "epoch": 0.6320731405979738, "grad_norm": 1.0732774964031242, "learning_rate": 1.637713577644362e-05, "loss": 0.33372437953948975, "step": 2558 }, { "epoch": 0.6323202372127502, "grad_norm": 1.0114235493584465, "learning_rate": 1.637398803935497e-05, "loss": 0.2744535803794861, "step": 2559 }, { "epoch": 0.6325673338275266, "grad_norm": 1.0861591117662395, "learning_rate": 1.6370839238193718e-05, "loss": 0.3561391234397888, "step": 2560 }, { "epoch": 0.6328144304423029, "grad_norm": 1.1674277626995309, "learning_rate": 1.6367689373485523e-05, "loss": 0.4221188426017761, "step": 2561 }, { "epoch": 0.6330615270570793, "grad_norm": 1.129191927767666, "learning_rate": 1.6364538445756224e-05, "loss": 0.3672782778739929, "step": 2562 }, { "epoch": 0.6333086236718557, "grad_norm": 1.1852476066641595, "learning_rate": 1.6361386455531833e-05, "loss": 0.3333624601364136, "step": 2563 }, { "epoch": 0.6335557202866321, "grad_norm": 1.3643070416653114, "learning_rate": 1.6358233403338545e-05, "loss": 0.39074811339378357, "step": 2564 }, { "epoch": 0.6338028169014085, "grad_norm": 1.0589031105374802, "learning_rate": 1.635507928970273e-05, "loss": 0.2663320004940033, "step": 2565 }, { "epoch": 0.6340499135161848, "grad_norm": 1.1222183030932482, "learning_rate": 1.635192411515094e-05, "loss": 0.3362261652946472, "step": 2566 }, { "epoch": 0.6342970101309612, "grad_norm": 1.2418979826601426, "learning_rate": 1.6348767880209884e-05, "loss": 0.36379820108413696, "step": 2567 }, { "epoch": 0.6345441067457376, "grad_norm": 1.1324824590787046, "learning_rate": 1.6345610585406478e-05, "loss": 0.3181542158126831, "step": 2568 }, { "epoch": 0.634791203360514, "grad_norm": 1.102495239978865, "learning_rate": 1.6342452231267798e-05, "loss": 0.34601956605911255, "step": 2569 }, { "epoch": 0.6350382999752904, "grad_norm": 1.1158975265417506, "learning_rate": 1.6339292818321095e-05, "loss": 0.35763031244277954, "step": 2570 }, { "epoch": 0.6352853965900667, "grad_norm": 1.2548518299019777, "learning_rate": 1.6336132347093805e-05, "loss": 0.38356631994247437, "step": 2571 }, { "epoch": 0.6355324932048431, "grad_norm": 1.4103197076374638, "learning_rate": 1.6332970818113537e-05, "loss": 0.39906248450279236, "step": 2572 }, { "epoch": 0.6357795898196195, "grad_norm": 1.2758850491323839, "learning_rate": 1.6329808231908072e-05, "loss": 0.3356160521507263, "step": 2573 }, { "epoch": 0.6360266864343959, "grad_norm": 1.1382247987834666, "learning_rate": 1.632664458900537e-05, "loss": 0.3574286103248596, "step": 2574 }, { "epoch": 0.6362737830491723, "grad_norm": 1.168204901107447, "learning_rate": 1.6323479889933577e-05, "loss": 0.31020134687423706, "step": 2575 }, { "epoch": 0.6365208796639487, "grad_norm": 1.2398527708027902, "learning_rate": 1.6320314135221002e-05, "loss": 0.4068431854248047, "step": 2576 }, { "epoch": 0.636767976278725, "grad_norm": 1.124167353051839, "learning_rate": 1.6317147325396135e-05, "loss": 0.30577996373176575, "step": 2577 }, { "epoch": 0.6370150728935013, "grad_norm": 1.199606254006815, "learning_rate": 1.631397946098765e-05, "loss": 0.3319805860519409, "step": 2578 }, { "epoch": 0.6372621695082777, "grad_norm": 1.2909961199856312, "learning_rate": 1.631081054252438e-05, "loss": 0.38729050755500793, "step": 2579 }, { "epoch": 0.6375092661230541, "grad_norm": 1.1135327284440446, "learning_rate": 1.6307640570535348e-05, "loss": 0.36555367708206177, "step": 2580 }, { "epoch": 0.6377563627378305, "grad_norm": 1.2104016599850431, "learning_rate": 1.630446954554975e-05, "loss": 0.3662099838256836, "step": 2581 }, { "epoch": 0.6380034593526068, "grad_norm": 1.1419724189786629, "learning_rate": 1.6301297468096954e-05, "loss": 0.3345777988433838, "step": 2582 }, { "epoch": 0.6382505559673832, "grad_norm": 1.1421115184344839, "learning_rate": 1.629812433870651e-05, "loss": 0.3274189233779907, "step": 2583 }, { "epoch": 0.6384976525821596, "grad_norm": 1.2908588635551828, "learning_rate": 1.6294950157908133e-05, "loss": 0.4378684163093567, "step": 2584 }, { "epoch": 0.638744749196936, "grad_norm": 1.20209843280696, "learning_rate": 1.6291774926231725e-05, "loss": 0.35490667819976807, "step": 2585 }, { "epoch": 0.6389918458117124, "grad_norm": 1.3653619488431454, "learning_rate": 1.6288598644207357e-05, "loss": 0.4417075514793396, "step": 2586 }, { "epoch": 0.6392389424264887, "grad_norm": 1.200470014277598, "learning_rate": 1.628542131236528e-05, "loss": 0.3528425097465515, "step": 2587 }, { "epoch": 0.6394860390412651, "grad_norm": 1.2958369217727554, "learning_rate": 1.6282242931235916e-05, "loss": 0.3821691870689392, "step": 2588 }, { "epoch": 0.6397331356560415, "grad_norm": 1.0598037836543104, "learning_rate": 1.6279063501349864e-05, "loss": 0.3436042368412018, "step": 2589 }, { "epoch": 0.6399802322708179, "grad_norm": 1.1989091857114587, "learning_rate": 1.6275883023237893e-05, "loss": 0.35232964158058167, "step": 2590 }, { "epoch": 0.6402273288855943, "grad_norm": 1.1477374225654762, "learning_rate": 1.6272701497430958e-05, "loss": 0.3185686469078064, "step": 2591 }, { "epoch": 0.6404744255003706, "grad_norm": 1.067442687983464, "learning_rate": 1.6269518924460175e-05, "loss": 0.31560495495796204, "step": 2592 }, { "epoch": 0.640721522115147, "grad_norm": 1.2045627117232107, "learning_rate": 1.6266335304856847e-05, "loss": 0.3895453214645386, "step": 2593 }, { "epoch": 0.6409686187299234, "grad_norm": 1.21312356106001, "learning_rate": 1.626315063915245e-05, "loss": 0.38376280665397644, "step": 2594 }, { "epoch": 0.6412157153446998, "grad_norm": 1.1750789798674721, "learning_rate": 1.6259964927878625e-05, "loss": 0.39059415459632874, "step": 2595 }, { "epoch": 0.6414628119594762, "grad_norm": 1.2431256048024382, "learning_rate": 1.62567781715672e-05, "loss": 0.3531108796596527, "step": 2596 }, { "epoch": 0.6417099085742526, "grad_norm": 1.4208965775958098, "learning_rate": 1.625359037075017e-05, "loss": 0.4441503882408142, "step": 2597 }, { "epoch": 0.6419570051890289, "grad_norm": 1.2298875165143652, "learning_rate": 1.62504015259597e-05, "loss": 0.429848313331604, "step": 2598 }, { "epoch": 0.6422041018038053, "grad_norm": 1.1837902006677747, "learning_rate": 1.6247211637728144e-05, "loss": 0.34643006324768066, "step": 2599 }, { "epoch": 0.6424511984185817, "grad_norm": 1.0638718801575473, "learning_rate": 1.6244020706588017e-05, "loss": 0.3359282314777374, "step": 2600 }, { "epoch": 0.6426982950333581, "grad_norm": 1.3415298324254872, "learning_rate": 1.6240828733072015e-05, "loss": 0.4130958318710327, "step": 2601 }, { "epoch": 0.6429453916481345, "grad_norm": 1.2882458170195585, "learning_rate": 1.6237635717713e-05, "loss": 0.40380075573921204, "step": 2602 }, { "epoch": 0.6431924882629108, "grad_norm": 1.1758008304949954, "learning_rate": 1.623444166104402e-05, "loss": 0.31244391202926636, "step": 2603 }, { "epoch": 0.6434395848776872, "grad_norm": 1.284410192630127, "learning_rate": 1.6231246563598285e-05, "loss": 0.37321561574935913, "step": 2604 }, { "epoch": 0.6436866814924636, "grad_norm": 1.178526451949405, "learning_rate": 1.622805042590919e-05, "loss": 0.3753158450126648, "step": 2605 }, { "epoch": 0.64393377810724, "grad_norm": 1.0795179487789903, "learning_rate": 1.622485324851029e-05, "loss": 0.31961315870285034, "step": 2606 }, { "epoch": 0.6441808747220164, "grad_norm": 1.2714287561237778, "learning_rate": 1.6221655031935332e-05, "loss": 0.4001765847206116, "step": 2607 }, { "epoch": 0.6444279713367926, "grad_norm": 1.1203289226355868, "learning_rate": 1.6218455776718212e-05, "loss": 0.33968907594680786, "step": 2608 }, { "epoch": 0.644675067951569, "grad_norm": 1.1846659485416673, "learning_rate": 1.6215255483393027e-05, "loss": 0.3690234422683716, "step": 2609 }, { "epoch": 0.6449221645663454, "grad_norm": 1.3049806075999817, "learning_rate": 1.6212054152494023e-05, "loss": 0.39496228098869324, "step": 2610 }, { "epoch": 0.6451692611811218, "grad_norm": 1.1430445451735227, "learning_rate": 1.6208851784555637e-05, "loss": 0.2983558475971222, "step": 2611 }, { "epoch": 0.6454163577958982, "grad_norm": 1.2011029660023833, "learning_rate": 1.6205648380112467e-05, "loss": 0.33037564158439636, "step": 2612 }, { "epoch": 0.6456634544106745, "grad_norm": 1.1922193257913554, "learning_rate": 1.6202443939699294e-05, "loss": 0.3601425886154175, "step": 2613 }, { "epoch": 0.6459105510254509, "grad_norm": 1.0939898861628328, "learning_rate": 1.619923846385106e-05, "loss": 0.3399641215801239, "step": 2614 }, { "epoch": 0.6461576476402273, "grad_norm": 1.0744607507158823, "learning_rate": 1.6196031953102892e-05, "loss": 0.3114602565765381, "step": 2615 }, { "epoch": 0.6464047442550037, "grad_norm": 1.1208114692084592, "learning_rate": 1.6192824407990076e-05, "loss": 0.3326723575592041, "step": 2616 }, { "epoch": 0.6466518408697801, "grad_norm": 1.0465245751896448, "learning_rate": 1.6189615829048096e-05, "loss": 0.29725098609924316, "step": 2617 }, { "epoch": 0.6468989374845565, "grad_norm": 1.1459140674500408, "learning_rate": 1.6186406216812575e-05, "loss": 0.37144559621810913, "step": 2618 }, { "epoch": 0.6471460340993328, "grad_norm": 1.0824555739937094, "learning_rate": 1.6183195571819333e-05, "loss": 0.3299696445465088, "step": 2619 }, { "epoch": 0.6473931307141092, "grad_norm": 1.109800763349884, "learning_rate": 1.617998389460435e-05, "loss": 0.36478114128112793, "step": 2620 }, { "epoch": 0.6476402273288856, "grad_norm": 1.2512378805918944, "learning_rate": 1.617677118570379e-05, "loss": 0.31911802291870117, "step": 2621 }, { "epoch": 0.647887323943662, "grad_norm": 1.1717285140972076, "learning_rate": 1.6173557445653977e-05, "loss": 0.377310574054718, "step": 2622 }, { "epoch": 0.6481344205584384, "grad_norm": 1.2212169130887807, "learning_rate": 1.6170342674991413e-05, "loss": 0.4152644872665405, "step": 2623 }, { "epoch": 0.6483815171732147, "grad_norm": 1.1242001933314916, "learning_rate": 1.6167126874252775e-05, "loss": 0.32064497470855713, "step": 2624 }, { "epoch": 0.6486286137879911, "grad_norm": 1.2480853112281916, "learning_rate": 1.6163910043974898e-05, "loss": 0.3905348777770996, "step": 2625 }, { "epoch": 0.6488757104027675, "grad_norm": 1.1933402262598198, "learning_rate": 1.6160692184694813e-05, "loss": 0.3947891294956207, "step": 2626 }, { "epoch": 0.6491228070175439, "grad_norm": 1.0708856160291602, "learning_rate": 1.61574732969497e-05, "loss": 0.3083345890045166, "step": 2627 }, { "epoch": 0.6493699036323203, "grad_norm": 1.1926715929936789, "learning_rate": 1.615425338127692e-05, "loss": 0.3793172240257263, "step": 2628 }, { "epoch": 0.6496170002470966, "grad_norm": 1.1414758886543157, "learning_rate": 1.6151032438214006e-05, "loss": 0.3708112835884094, "step": 2629 }, { "epoch": 0.649864096861873, "grad_norm": 1.0408431094170925, "learning_rate": 1.6147810468298667e-05, "loss": 0.32473278045654297, "step": 2630 }, { "epoch": 0.6501111934766494, "grad_norm": 1.0799893594466115, "learning_rate": 1.614458747206877e-05, "loss": 0.3187539577484131, "step": 2631 }, { "epoch": 0.6503582900914258, "grad_norm": 1.1910315422512046, "learning_rate": 1.6141363450062367e-05, "loss": 0.32765311002731323, "step": 2632 }, { "epoch": 0.6506053867062022, "grad_norm": 1.3088912207862566, "learning_rate": 1.6138138402817673e-05, "loss": 0.4175297021865845, "step": 2633 }, { "epoch": 0.6508524833209784, "grad_norm": 1.145940061450207, "learning_rate": 1.613491233087308e-05, "loss": 0.33705437183380127, "step": 2634 }, { "epoch": 0.6510995799357548, "grad_norm": 1.340649977961218, "learning_rate": 1.6131685234767144e-05, "loss": 0.37990492582321167, "step": 2635 }, { "epoch": 0.6513466765505312, "grad_norm": 1.1253699369330186, "learning_rate": 1.61284571150386e-05, "loss": 0.3377375602722168, "step": 2636 }, { "epoch": 0.6515937731653076, "grad_norm": 1.058335881013121, "learning_rate": 1.6125227972226348e-05, "loss": 0.3018220067024231, "step": 2637 }, { "epoch": 0.651840869780084, "grad_norm": 1.1983807292060193, "learning_rate": 1.6121997806869458e-05, "loss": 0.38538140058517456, "step": 2638 }, { "epoch": 0.6520879663948604, "grad_norm": 0.992116015490234, "learning_rate": 1.6118766619507174e-05, "loss": 0.3255753517150879, "step": 2639 }, { "epoch": 0.6523350630096367, "grad_norm": 1.2774980820838053, "learning_rate": 1.6115534410678918e-05, "loss": 0.33561843633651733, "step": 2640 }, { "epoch": 0.6525821596244131, "grad_norm": 1.1658553955702755, "learning_rate": 1.6112301180924267e-05, "loss": 0.3639029860496521, "step": 2641 }, { "epoch": 0.6528292562391895, "grad_norm": 1.1610819292026389, "learning_rate": 1.6109066930782972e-05, "loss": 0.3744019865989685, "step": 2642 }, { "epoch": 0.6530763528539659, "grad_norm": 1.1905512858881835, "learning_rate": 1.6105831660794965e-05, "loss": 0.3617699146270752, "step": 2643 }, { "epoch": 0.6533234494687423, "grad_norm": 1.0829239093836425, "learning_rate": 1.6102595371500343e-05, "loss": 0.3343011140823364, "step": 2644 }, { "epoch": 0.6535705460835186, "grad_norm": 1.0738407430310808, "learning_rate": 1.6099358063439365e-05, "loss": 0.3076052665710449, "step": 2645 }, { "epoch": 0.653817642698295, "grad_norm": 1.3036003153252191, "learning_rate": 1.609611973715247e-05, "loss": 0.34789371490478516, "step": 2646 }, { "epoch": 0.6540647393130714, "grad_norm": 1.1490846441719733, "learning_rate": 1.6092880393180265e-05, "loss": 0.34595751762390137, "step": 2647 }, { "epoch": 0.6543118359278478, "grad_norm": 1.2923314809551687, "learning_rate": 1.6089640032063526e-05, "loss": 0.3939742147922516, "step": 2648 }, { "epoch": 0.6545589325426242, "grad_norm": 1.3306662775813671, "learning_rate": 1.6086398654343198e-05, "loss": 0.4133138060569763, "step": 2649 }, { "epoch": 0.6548060291574005, "grad_norm": 1.3202513103951166, "learning_rate": 1.6083156260560387e-05, "loss": 0.3768444061279297, "step": 2650 }, { "epoch": 0.6550531257721769, "grad_norm": 1.2531505411789767, "learning_rate": 1.6079912851256394e-05, "loss": 0.32666462659835815, "step": 2651 }, { "epoch": 0.6553002223869533, "grad_norm": 1.325697120898294, "learning_rate": 1.607666842697266e-05, "loss": 0.43866202235221863, "step": 2652 }, { "epoch": 0.6555473190017297, "grad_norm": 1.1871094709846746, "learning_rate": 1.6073422988250817e-05, "loss": 0.32061508297920227, "step": 2653 }, { "epoch": 0.6557944156165061, "grad_norm": 1.184395917854677, "learning_rate": 1.607017653563265e-05, "loss": 0.40022599697113037, "step": 2654 }, { "epoch": 0.6560415122312825, "grad_norm": 1.153902162012801, "learning_rate": 1.6066929069660128e-05, "loss": 0.371804416179657, "step": 2655 }, { "epoch": 0.6562886088460588, "grad_norm": 1.157915313496482, "learning_rate": 1.6063680590875383e-05, "loss": 0.3813178539276123, "step": 2656 }, { "epoch": 0.6565357054608352, "grad_norm": 1.261178344337702, "learning_rate": 1.606043109982071e-05, "loss": 0.44476762413978577, "step": 2657 }, { "epoch": 0.6567828020756116, "grad_norm": 1.121906279788871, "learning_rate": 1.6057180597038584e-05, "loss": 0.3571542799472809, "step": 2658 }, { "epoch": 0.657029898690388, "grad_norm": 1.1767092768529028, "learning_rate": 1.605392908307164e-05, "loss": 0.3610125184059143, "step": 2659 }, { "epoch": 0.6572769953051644, "grad_norm": 1.062074439076396, "learning_rate": 1.6050676558462683e-05, "loss": 0.299304723739624, "step": 2660 }, { "epoch": 0.6575240919199407, "grad_norm": 1.1915723982235462, "learning_rate": 1.6047423023754695e-05, "loss": 0.3413258194923401, "step": 2661 }, { "epoch": 0.657771188534717, "grad_norm": 1.1657080798881108, "learning_rate": 1.6044168479490817e-05, "loss": 0.3086547553539276, "step": 2662 }, { "epoch": 0.6580182851494935, "grad_norm": 1.2195462217349495, "learning_rate": 1.6040912926214363e-05, "loss": 0.4002707600593567, "step": 2663 }, { "epoch": 0.6582653817642699, "grad_norm": 1.125222840470997, "learning_rate": 1.6037656364468818e-05, "loss": 0.3297268748283386, "step": 2664 }, { "epoch": 0.6585124783790463, "grad_norm": 1.1910597987604947, "learning_rate": 1.6034398794797826e-05, "loss": 0.3622686564922333, "step": 2665 }, { "epoch": 0.6587595749938225, "grad_norm": 1.170916118013525, "learning_rate": 1.6031140217745207e-05, "loss": 0.3409222960472107, "step": 2666 }, { "epoch": 0.6590066716085989, "grad_norm": 1.0246062582219304, "learning_rate": 1.6027880633854946e-05, "loss": 0.29276978969573975, "step": 2667 }, { "epoch": 0.6592537682233753, "grad_norm": 1.1845263538770185, "learning_rate": 1.6024620043671203e-05, "loss": 0.38767918944358826, "step": 2668 }, { "epoch": 0.6595008648381517, "grad_norm": 1.136021357559886, "learning_rate": 1.6021358447738296e-05, "loss": 0.33611541986465454, "step": 2669 }, { "epoch": 0.6597479614529281, "grad_norm": 1.0466398057145043, "learning_rate": 1.6018095846600714e-05, "loss": 0.3267543315887451, "step": 2670 }, { "epoch": 0.6599950580677044, "grad_norm": 1.2904049315880097, "learning_rate": 1.6014832240803117e-05, "loss": 0.42552533745765686, "step": 2671 }, { "epoch": 0.6602421546824808, "grad_norm": 1.1621504045570235, "learning_rate": 1.6011567630890336e-05, "loss": 0.30980998277664185, "step": 2672 }, { "epoch": 0.6604892512972572, "grad_norm": 1.1631854283306, "learning_rate": 1.6008302017407356e-05, "loss": 0.22779181599617004, "step": 2673 }, { "epoch": 0.6607363479120336, "grad_norm": 1.1717899311291389, "learning_rate": 1.600503540089934e-05, "loss": 0.35192787647247314, "step": 2674 }, { "epoch": 0.66098344452681, "grad_norm": 1.2110721204796415, "learning_rate": 1.6001767781911624e-05, "loss": 0.2967046797275543, "step": 2675 }, { "epoch": 0.6612305411415864, "grad_norm": 1.2606729226355946, "learning_rate": 1.5998499160989694e-05, "loss": 0.3575725853443146, "step": 2676 }, { "epoch": 0.6614776377563627, "grad_norm": 1.3426237937671066, "learning_rate": 1.5995229538679217e-05, "loss": 0.45177412033081055, "step": 2677 }, { "epoch": 0.6617247343711391, "grad_norm": 1.154788861303922, "learning_rate": 1.5991958915526024e-05, "loss": 0.33931297063827515, "step": 2678 }, { "epoch": 0.6619718309859155, "grad_norm": 1.2725496371221532, "learning_rate": 1.5988687292076113e-05, "loss": 0.390535831451416, "step": 2679 }, { "epoch": 0.6622189276006919, "grad_norm": 1.1776707969059343, "learning_rate": 1.5985414668875646e-05, "loss": 0.38157790899276733, "step": 2680 }, { "epoch": 0.6624660242154683, "grad_norm": 1.1288147595165792, "learning_rate": 1.5982141046470954e-05, "loss": 0.3099637031555176, "step": 2681 }, { "epoch": 0.6627131208302446, "grad_norm": 1.2140115561400944, "learning_rate": 1.5978866425408536e-05, "loss": 0.36112672090530396, "step": 2682 }, { "epoch": 0.662960217445021, "grad_norm": 1.170555333243545, "learning_rate": 1.5975590806235056e-05, "loss": 0.34363406896591187, "step": 2683 }, { "epoch": 0.6632073140597974, "grad_norm": 1.3057557406138678, "learning_rate": 1.597231418949735e-05, "loss": 0.40670883655548096, "step": 2684 }, { "epoch": 0.6634544106745738, "grad_norm": 1.1671707601655001, "learning_rate": 1.596903657574241e-05, "loss": 0.39568400382995605, "step": 2685 }, { "epoch": 0.6637015072893502, "grad_norm": 1.0749777015726982, "learning_rate": 1.5965757965517405e-05, "loss": 0.3064078092575073, "step": 2686 }, { "epoch": 0.6639486039041265, "grad_norm": 1.3868823065343703, "learning_rate": 1.596247835936966e-05, "loss": 0.3790194094181061, "step": 2687 }, { "epoch": 0.6641957005189029, "grad_norm": 1.028801027224893, "learning_rate": 1.595919775784668e-05, "loss": 0.31655555963516235, "step": 2688 }, { "epoch": 0.6644427971336793, "grad_norm": 1.2521905886834075, "learning_rate": 1.595591616149612e-05, "loss": 0.3615870475769043, "step": 2689 }, { "epoch": 0.6646898937484557, "grad_norm": 1.1603716861895668, "learning_rate": 1.595263357086581e-05, "loss": 0.3878035545349121, "step": 2690 }, { "epoch": 0.6649369903632321, "grad_norm": 1.1560624517326568, "learning_rate": 1.5949349986503753e-05, "loss": 0.2976042330265045, "step": 2691 }, { "epoch": 0.6651840869780083, "grad_norm": 1.2679391290424202, "learning_rate": 1.5946065408958102e-05, "loss": 0.3683386445045471, "step": 2692 }, { "epoch": 0.6654311835927847, "grad_norm": 1.1946637273991523, "learning_rate": 1.594277983877719e-05, "loss": 0.37289175391197205, "step": 2693 }, { "epoch": 0.6656782802075611, "grad_norm": 1.2080295401928087, "learning_rate": 1.59394932765095e-05, "loss": 0.334316611289978, "step": 2694 }, { "epoch": 0.6659253768223375, "grad_norm": 1.200455114770924, "learning_rate": 1.5936205722703698e-05, "loss": 0.3836359977722168, "step": 2695 }, { "epoch": 0.6661724734371139, "grad_norm": 1.1204032067206369, "learning_rate": 1.5932917177908613e-05, "loss": 0.36138054728507996, "step": 2696 }, { "epoch": 0.6664195700518903, "grad_norm": 1.3961450137384324, "learning_rate": 1.592962764267322e-05, "loss": 0.35262519121170044, "step": 2697 }, { "epoch": 0.6666666666666666, "grad_norm": 1.2378024644301806, "learning_rate": 1.5926337117546684e-05, "loss": 0.36186471581459045, "step": 2698 }, { "epoch": 0.666913763281443, "grad_norm": 1.056967690255054, "learning_rate": 1.5923045603078324e-05, "loss": 0.3162550926208496, "step": 2699 }, { "epoch": 0.6671608598962194, "grad_norm": 1.1135954385487632, "learning_rate": 1.591975309981762e-05, "loss": 0.3191254734992981, "step": 2700 }, { "epoch": 0.6674079565109958, "grad_norm": 1.2824078591196293, "learning_rate": 1.5916459608314225e-05, "loss": 0.4282833933830261, "step": 2701 }, { "epoch": 0.6676550531257722, "grad_norm": 1.0464964356334208, "learning_rate": 1.5913165129117955e-05, "loss": 0.27677279710769653, "step": 2702 }, { "epoch": 0.6679021497405485, "grad_norm": 1.1819852412720178, "learning_rate": 1.5909869662778782e-05, "loss": 0.3492734134197235, "step": 2703 }, { "epoch": 0.6681492463553249, "grad_norm": 1.2483271806769896, "learning_rate": 1.5906573209846864e-05, "loss": 0.37727785110473633, "step": 2704 }, { "epoch": 0.6683963429701013, "grad_norm": 1.1457338558398065, "learning_rate": 1.59032757708725e-05, "loss": 0.3639194071292877, "step": 2705 }, { "epoch": 0.6686434395848777, "grad_norm": 1.1355053536673305, "learning_rate": 1.589997734640617e-05, "loss": 0.34458857774734497, "step": 2706 }, { "epoch": 0.6688905361996541, "grad_norm": 1.1190031741284887, "learning_rate": 1.5896677936998512e-05, "loss": 0.3622613549232483, "step": 2707 }, { "epoch": 0.6691376328144304, "grad_norm": 1.0869760831682702, "learning_rate": 1.5893377543200325e-05, "loss": 0.33273473381996155, "step": 2708 }, { "epoch": 0.6693847294292068, "grad_norm": 1.1400303264647211, "learning_rate": 1.5890076165562574e-05, "loss": 0.2923426330089569, "step": 2709 }, { "epoch": 0.6696318260439832, "grad_norm": 1.0910133867830791, "learning_rate": 1.5886773804636398e-05, "loss": 0.3257511258125305, "step": 2710 }, { "epoch": 0.6698789226587596, "grad_norm": 1.0416012620944797, "learning_rate": 1.588347046097309e-05, "loss": 0.33148127794265747, "step": 2711 }, { "epoch": 0.670126019273536, "grad_norm": 1.1780293941769102, "learning_rate": 1.5880166135124108e-05, "loss": 0.3264511823654175, "step": 2712 }, { "epoch": 0.6703731158883123, "grad_norm": 1.130361960127368, "learning_rate": 1.5876860827641074e-05, "loss": 0.35057783126831055, "step": 2713 }, { "epoch": 0.6706202125030887, "grad_norm": 1.0909405449601461, "learning_rate": 1.5873554539075784e-05, "loss": 0.36532485485076904, "step": 2714 }, { "epoch": 0.6708673091178651, "grad_norm": 1.1603980004822911, "learning_rate": 1.587024726998018e-05, "loss": 0.34014052152633667, "step": 2715 }, { "epoch": 0.6711144057326415, "grad_norm": 1.0495932293270793, "learning_rate": 1.5866939020906378e-05, "loss": 0.34859323501586914, "step": 2716 }, { "epoch": 0.6713615023474179, "grad_norm": 1.1358298866481131, "learning_rate": 1.5863629792406662e-05, "loss": 0.35175758600234985, "step": 2717 }, { "epoch": 0.6716085989621943, "grad_norm": 1.1410603364708456, "learning_rate": 1.5860319585033473e-05, "loss": 0.340697705745697, "step": 2718 }, { "epoch": 0.6718556955769706, "grad_norm": 1.0966116226899858, "learning_rate": 1.585700839933941e-05, "loss": 0.3212571144104004, "step": 2719 }, { "epoch": 0.672102792191747, "grad_norm": 1.155466343392251, "learning_rate": 1.5853696235877252e-05, "loss": 0.3289589285850525, "step": 2720 }, { "epoch": 0.6723498888065234, "grad_norm": 1.244235317657618, "learning_rate": 1.585038309519992e-05, "loss": 0.40821573138237, "step": 2721 }, { "epoch": 0.6725969854212998, "grad_norm": 1.2480791100146524, "learning_rate": 1.584706897786052e-05, "loss": 0.3739045262336731, "step": 2722 }, { "epoch": 0.6728440820360762, "grad_norm": 1.179861331923736, "learning_rate": 1.58437538844123e-05, "loss": 0.3662647604942322, "step": 2723 }, { "epoch": 0.6730911786508524, "grad_norm": 1.039960042971236, "learning_rate": 1.584043781540869e-05, "loss": 0.31968510150909424, "step": 2724 }, { "epoch": 0.6733382752656288, "grad_norm": 1.2633633428706068, "learning_rate": 1.583712077140327e-05, "loss": 0.382120281457901, "step": 2725 }, { "epoch": 0.6735853718804052, "grad_norm": 1.1617485476799874, "learning_rate": 1.5833802752949785e-05, "loss": 0.30885183811187744, "step": 2726 }, { "epoch": 0.6738324684951816, "grad_norm": 1.1747596991768874, "learning_rate": 1.583048376060215e-05, "loss": 0.3522378206253052, "step": 2727 }, { "epoch": 0.674079565109958, "grad_norm": 1.3277482822095048, "learning_rate": 1.582716379491443e-05, "loss": 0.40693262219429016, "step": 2728 }, { "epoch": 0.6743266617247343, "grad_norm": 1.1395910950872004, "learning_rate": 1.582384285644086e-05, "loss": 0.3037918210029602, "step": 2729 }, { "epoch": 0.6745737583395107, "grad_norm": 1.0892388496167125, "learning_rate": 1.5820520945735847e-05, "loss": 0.3190951943397522, "step": 2730 }, { "epoch": 0.6748208549542871, "grad_norm": 1.147796055924, "learning_rate": 1.5817198063353944e-05, "loss": 0.36734676361083984, "step": 2731 }, { "epoch": 0.6750679515690635, "grad_norm": 1.2922067976219245, "learning_rate": 1.5813874209849868e-05, "loss": 0.4402363896369934, "step": 2732 }, { "epoch": 0.6753150481838399, "grad_norm": 1.2104337444175055, "learning_rate": 1.5810549385778508e-05, "loss": 0.3124302327632904, "step": 2733 }, { "epoch": 0.6755621447986162, "grad_norm": 1.2302807486189509, "learning_rate": 1.580722359169491e-05, "loss": 0.348437637090683, "step": 2734 }, { "epoch": 0.6758092414133926, "grad_norm": 1.053959371778045, "learning_rate": 1.5803896828154276e-05, "loss": 0.29827192425727844, "step": 2735 }, { "epoch": 0.676056338028169, "grad_norm": 1.1558726180453585, "learning_rate": 1.5800569095711983e-05, "loss": 0.352424681186676, "step": 2736 }, { "epoch": 0.6763034346429454, "grad_norm": 1.1764403174686966, "learning_rate": 1.5797240394923556e-05, "loss": 0.383107453584671, "step": 2737 }, { "epoch": 0.6765505312577218, "grad_norm": 1.116580984375724, "learning_rate": 1.5793910726344693e-05, "loss": 0.3736529052257538, "step": 2738 }, { "epoch": 0.6767976278724982, "grad_norm": 1.1326262901331023, "learning_rate": 1.5790580090531243e-05, "loss": 0.37637168169021606, "step": 2739 }, { "epoch": 0.6770447244872745, "grad_norm": 1.1446644682288445, "learning_rate": 1.5787248488039223e-05, "loss": 0.39927011728286743, "step": 2740 }, { "epoch": 0.6772918211020509, "grad_norm": 1.1888192348419613, "learning_rate": 1.5783915919424815e-05, "loss": 0.36119696497917175, "step": 2741 }, { "epoch": 0.6775389177168273, "grad_norm": 1.1515288894511582, "learning_rate": 1.5780582385244354e-05, "loss": 0.3188479542732239, "step": 2742 }, { "epoch": 0.6777860143316037, "grad_norm": 1.2604823217495849, "learning_rate": 1.5777247886054336e-05, "loss": 0.39290928840637207, "step": 2743 }, { "epoch": 0.6780331109463801, "grad_norm": 1.3310142239363318, "learning_rate": 1.577391242241143e-05, "loss": 0.38815242052078247, "step": 2744 }, { "epoch": 0.6782802075611564, "grad_norm": 1.1736826713978539, "learning_rate": 1.577057599487245e-05, "loss": 0.3751429617404938, "step": 2745 }, { "epoch": 0.6785273041759328, "grad_norm": 1.1894497683170286, "learning_rate": 1.5767238603994384e-05, "loss": 0.3715296983718872, "step": 2746 }, { "epoch": 0.6787744007907092, "grad_norm": 1.1630342174402752, "learning_rate": 1.5763900250334374e-05, "loss": 0.3379248380661011, "step": 2747 }, { "epoch": 0.6790214974054856, "grad_norm": 1.1779161513391583, "learning_rate": 1.5760560934449722e-05, "loss": 0.35934409499168396, "step": 2748 }, { "epoch": 0.679268594020262, "grad_norm": 1.1993191017734053, "learning_rate": 1.5757220656897896e-05, "loss": 0.3245087265968323, "step": 2749 }, { "epoch": 0.6795156906350382, "grad_norm": 1.134073985369892, "learning_rate": 1.5753879418236522e-05, "loss": 0.346071720123291, "step": 2750 }, { "epoch": 0.6797627872498146, "grad_norm": 1.1306625008818871, "learning_rate": 1.575053721902338e-05, "loss": 0.3198941946029663, "step": 2751 }, { "epoch": 0.680009883864591, "grad_norm": 1.2035676319935589, "learning_rate": 1.5747194059816424e-05, "loss": 0.39590585231781006, "step": 2752 }, { "epoch": 0.6802569804793674, "grad_norm": 1.190283061341973, "learning_rate": 1.5743849941173755e-05, "loss": 0.3100394010543823, "step": 2753 }, { "epoch": 0.6805040770941438, "grad_norm": 1.3102174538717868, "learning_rate": 1.574050486365364e-05, "loss": 0.3943645656108856, "step": 2754 }, { "epoch": 0.6807511737089202, "grad_norm": 1.191389074452793, "learning_rate": 1.5737158827814513e-05, "loss": 0.40348976850509644, "step": 2755 }, { "epoch": 0.6809982703236965, "grad_norm": 1.148072972179473, "learning_rate": 1.573381183421495e-05, "loss": 0.37756234407424927, "step": 2756 }, { "epoch": 0.6812453669384729, "grad_norm": 1.1803662637726544, "learning_rate": 1.5730463883413704e-05, "loss": 0.37032681703567505, "step": 2757 }, { "epoch": 0.6814924635532493, "grad_norm": 1.0900804447634755, "learning_rate": 1.572711497596968e-05, "loss": 0.3445177674293518, "step": 2758 }, { "epoch": 0.6817395601680257, "grad_norm": 1.0949331913417377, "learning_rate": 1.5723765112441944e-05, "loss": 0.33038175106048584, "step": 2759 }, { "epoch": 0.6819866567828021, "grad_norm": 1.1893210173987434, "learning_rate": 1.572041429338972e-05, "loss": 0.40618985891342163, "step": 2760 }, { "epoch": 0.6822337533975784, "grad_norm": 1.1164050109582713, "learning_rate": 1.5717062519372395e-05, "loss": 0.4216606020927429, "step": 2761 }, { "epoch": 0.6824808500123548, "grad_norm": 1.2570098003539016, "learning_rate": 1.571370979094952e-05, "loss": 0.41365882754325867, "step": 2762 }, { "epoch": 0.6827279466271312, "grad_norm": 1.2302506017366157, "learning_rate": 1.5710356108680786e-05, "loss": 0.38679903745651245, "step": 2763 }, { "epoch": 0.6829750432419076, "grad_norm": 1.1415634291408467, "learning_rate": 1.570700147312606e-05, "loss": 0.35266363620758057, "step": 2764 }, { "epoch": 0.683222139856684, "grad_norm": 1.2350438580854752, "learning_rate": 1.5703645884845375e-05, "loss": 0.35472244024276733, "step": 2765 }, { "epoch": 0.6834692364714603, "grad_norm": 1.184138431374236, "learning_rate": 1.5700289344398903e-05, "loss": 0.3848971128463745, "step": 2766 }, { "epoch": 0.6837163330862367, "grad_norm": 1.0580644107384218, "learning_rate": 1.5696931852346985e-05, "loss": 0.3397386074066162, "step": 2767 }, { "epoch": 0.6839634297010131, "grad_norm": 1.1227194865096721, "learning_rate": 1.569357340925012e-05, "loss": 0.29931938648223877, "step": 2768 }, { "epoch": 0.6842105263157895, "grad_norm": 1.254769696356338, "learning_rate": 1.5690214015668967e-05, "loss": 0.36536934971809387, "step": 2769 }, { "epoch": 0.6844576229305659, "grad_norm": 1.1962568709244832, "learning_rate": 1.5686853672164342e-05, "loss": 0.3919636309146881, "step": 2770 }, { "epoch": 0.6847047195453422, "grad_norm": 1.2766163591624082, "learning_rate": 1.568349237929722e-05, "loss": 0.3982052206993103, "step": 2771 }, { "epoch": 0.6849518161601186, "grad_norm": 1.1459802230655272, "learning_rate": 1.568013013762874e-05, "loss": 0.34028786420822144, "step": 2772 }, { "epoch": 0.685198912774895, "grad_norm": 1.2100010801526866, "learning_rate": 1.567676694772019e-05, "loss": 0.40161991119384766, "step": 2773 }, { "epoch": 0.6854460093896714, "grad_norm": 1.1710069722993512, "learning_rate": 1.5673402810133018e-05, "loss": 0.3895764648914337, "step": 2774 }, { "epoch": 0.6856931060044478, "grad_norm": 1.1199458667520972, "learning_rate": 1.5670037725428834e-05, "loss": 0.3630504608154297, "step": 2775 }, { "epoch": 0.6859402026192242, "grad_norm": 1.0229667008390406, "learning_rate": 1.5666671694169407e-05, "loss": 0.32804736495018005, "step": 2776 }, { "epoch": 0.6861872992340005, "grad_norm": 1.2991195045559678, "learning_rate": 1.5663304716916663e-05, "loss": 0.3938347101211548, "step": 2777 }, { "epoch": 0.6864343958487769, "grad_norm": 1.1244691393228883, "learning_rate": 1.5659936794232678e-05, "loss": 0.34852689504623413, "step": 2778 }, { "epoch": 0.6866814924635533, "grad_norm": 1.1450950124701766, "learning_rate": 1.56565679266797e-05, "loss": 0.3500472605228424, "step": 2779 }, { "epoch": 0.6869285890783297, "grad_norm": 1.194871741678119, "learning_rate": 1.565319811482012e-05, "loss": 0.3569410741329193, "step": 2780 }, { "epoch": 0.687175685693106, "grad_norm": 1.1893835106353443, "learning_rate": 1.56498273592165e-05, "loss": 0.38345450162887573, "step": 2781 }, { "epoch": 0.6874227823078823, "grad_norm": 1.063594672778598, "learning_rate": 1.5646455660431552e-05, "loss": 0.2923434376716614, "step": 2782 }, { "epoch": 0.6876698789226587, "grad_norm": 1.130824912062733, "learning_rate": 1.5643083019028147e-05, "loss": 0.3038540780544281, "step": 2783 }, { "epoch": 0.6879169755374351, "grad_norm": 1.1272126279973225, "learning_rate": 1.563970943556931e-05, "loss": 0.38635361194610596, "step": 2784 }, { "epoch": 0.6881640721522115, "grad_norm": 1.306322811435139, "learning_rate": 1.5636334910618228e-05, "loss": 0.3604605197906494, "step": 2785 }, { "epoch": 0.6884111687669879, "grad_norm": 1.3044257825791798, "learning_rate": 1.5632959444738246e-05, "loss": 0.36752623319625854, "step": 2786 }, { "epoch": 0.6886582653817642, "grad_norm": 1.1004029566218165, "learning_rate": 1.5629583038492864e-05, "loss": 0.3357987403869629, "step": 2787 }, { "epoch": 0.6889053619965406, "grad_norm": 1.238508200407615, "learning_rate": 1.5626205692445734e-05, "loss": 0.3807338774204254, "step": 2788 }, { "epoch": 0.689152458611317, "grad_norm": 1.2411851168276848, "learning_rate": 1.5622827407160673e-05, "loss": 0.35518592596054077, "step": 2789 }, { "epoch": 0.6893995552260934, "grad_norm": 1.1776634590829704, "learning_rate": 1.5619448183201652e-05, "loss": 0.36552631855010986, "step": 2790 }, { "epoch": 0.6896466518408698, "grad_norm": 1.2879381394516662, "learning_rate": 1.5616068021132798e-05, "loss": 0.3738049268722534, "step": 2791 }, { "epoch": 0.6898937484556461, "grad_norm": 1.0496236454412127, "learning_rate": 1.561268692151839e-05, "loss": 0.27886876463890076, "step": 2792 }, { "epoch": 0.6901408450704225, "grad_norm": 1.2100240772820707, "learning_rate": 1.5609304884922876e-05, "loss": 0.37677234411239624, "step": 2793 }, { "epoch": 0.6903879416851989, "grad_norm": 1.1093902959562945, "learning_rate": 1.560592191191085e-05, "loss": 0.3447059690952301, "step": 2794 }, { "epoch": 0.6906350382999753, "grad_norm": 1.2117833877695066, "learning_rate": 1.560253800304706e-05, "loss": 0.3676905035972595, "step": 2795 }, { "epoch": 0.6908821349147517, "grad_norm": 1.1142240529838754, "learning_rate": 1.5599153158896424e-05, "loss": 0.3797990083694458, "step": 2796 }, { "epoch": 0.6911292315295281, "grad_norm": 1.2496306293700639, "learning_rate": 1.5595767380024002e-05, "loss": 0.35978612303733826, "step": 2797 }, { "epoch": 0.6913763281443044, "grad_norm": 0.9318080594643927, "learning_rate": 1.5592380666995012e-05, "loss": 0.2648482024669647, "step": 2798 }, { "epoch": 0.6916234247590808, "grad_norm": 1.055883381994195, "learning_rate": 1.5588993020374836e-05, "loss": 0.3477737605571747, "step": 2799 }, { "epoch": 0.6918705213738572, "grad_norm": 1.218845376892678, "learning_rate": 1.5585604440729006e-05, "loss": 0.34016022086143494, "step": 2800 }, { "epoch": 0.6921176179886336, "grad_norm": 1.1952292823021928, "learning_rate": 1.5582214928623213e-05, "loss": 0.34445124864578247, "step": 2801 }, { "epoch": 0.69236471460341, "grad_norm": 1.0964769372823824, "learning_rate": 1.5578824484623304e-05, "loss": 0.3212506175041199, "step": 2802 }, { "epoch": 0.6926118112181863, "grad_norm": 1.2077470301419337, "learning_rate": 1.557543310929527e-05, "loss": 0.3881685137748718, "step": 2803 }, { "epoch": 0.6928589078329627, "grad_norm": 1.1918011827894057, "learning_rate": 1.5572040803205276e-05, "loss": 0.3346017897129059, "step": 2804 }, { "epoch": 0.6931060044477391, "grad_norm": 1.2826582416010603, "learning_rate": 1.5568647566919626e-05, "loss": 0.40590381622314453, "step": 2805 }, { "epoch": 0.6933531010625155, "grad_norm": 1.1632089836687816, "learning_rate": 1.556525340100479e-05, "loss": 0.3971637487411499, "step": 2806 }, { "epoch": 0.6936001976772919, "grad_norm": 1.2930434035020064, "learning_rate": 1.556185830602739e-05, "loss": 0.3800658583641052, "step": 2807 }, { "epoch": 0.6938472942920682, "grad_norm": 1.0400510099621012, "learning_rate": 1.5558462282554202e-05, "loss": 0.29768747091293335, "step": 2808 }, { "epoch": 0.6940943909068445, "grad_norm": 1.1150577536043802, "learning_rate": 1.5555065331152156e-05, "loss": 0.25977712869644165, "step": 2809 }, { "epoch": 0.694341487521621, "grad_norm": 1.2626354057348494, "learning_rate": 1.555166745238834e-05, "loss": 0.369299054145813, "step": 2810 }, { "epoch": 0.6945885841363973, "grad_norm": 1.1297584991729854, "learning_rate": 1.5548268646829992e-05, "loss": 0.3593330979347229, "step": 2811 }, { "epoch": 0.6948356807511737, "grad_norm": 1.191392612748544, "learning_rate": 1.554486891504452e-05, "loss": 0.4194416403770447, "step": 2812 }, { "epoch": 0.69508277736595, "grad_norm": 1.1700792630581085, "learning_rate": 1.5541468257599456e-05, "loss": 0.3079301416873932, "step": 2813 }, { "epoch": 0.6953298739807264, "grad_norm": 1.1717527405263224, "learning_rate": 1.5538066675062524e-05, "loss": 0.3509373664855957, "step": 2814 }, { "epoch": 0.6955769705955028, "grad_norm": 1.049931892345867, "learning_rate": 1.553466416800157e-05, "loss": 0.35472577810287476, "step": 2815 }, { "epoch": 0.6958240672102792, "grad_norm": 1.0979072254909898, "learning_rate": 1.5531260736984616e-05, "loss": 0.35514527559280396, "step": 2816 }, { "epoch": 0.6960711638250556, "grad_norm": 1.3053987479162354, "learning_rate": 1.5527856382579822e-05, "loss": 0.3859570026397705, "step": 2817 }, { "epoch": 0.696318260439832, "grad_norm": 1.1052126914265636, "learning_rate": 1.552445110535552e-05, "loss": 0.3329761326313019, "step": 2818 }, { "epoch": 0.6965653570546083, "grad_norm": 1.097821836543976, "learning_rate": 1.552104490588018e-05, "loss": 0.2980571389198303, "step": 2819 }, { "epoch": 0.6968124536693847, "grad_norm": 1.1308850550033454, "learning_rate": 1.5517637784722435e-05, "loss": 0.31491875648498535, "step": 2820 }, { "epoch": 0.6970595502841611, "grad_norm": 1.1933949607299954, "learning_rate": 1.5514229742451074e-05, "loss": 0.3624289035797119, "step": 2821 }, { "epoch": 0.6973066468989375, "grad_norm": 1.2312864650549789, "learning_rate": 1.5510820779635022e-05, "loss": 0.39977312088012695, "step": 2822 }, { "epoch": 0.6975537435137139, "grad_norm": 1.0931821413906797, "learning_rate": 1.550741089684338e-05, "loss": 0.30806171894073486, "step": 2823 }, { "epoch": 0.6978008401284902, "grad_norm": 1.2527054007463463, "learning_rate": 1.5504000094645397e-05, "loss": 0.3775728940963745, "step": 2824 }, { "epoch": 0.6980479367432666, "grad_norm": 1.2853629487140203, "learning_rate": 1.5500588373610464e-05, "loss": 0.42775672674179077, "step": 2825 }, { "epoch": 0.698295033358043, "grad_norm": 1.2742653546427642, "learning_rate": 1.5497175734308135e-05, "loss": 0.4064474403858185, "step": 2826 }, { "epoch": 0.6985421299728194, "grad_norm": 1.2491445653760513, "learning_rate": 1.549376217730812e-05, "loss": 0.3784012198448181, "step": 2827 }, { "epoch": 0.6987892265875958, "grad_norm": 1.2806703056203352, "learning_rate": 1.549034770318027e-05, "loss": 0.34835028648376465, "step": 2828 }, { "epoch": 0.6990363232023721, "grad_norm": 1.1187936051038272, "learning_rate": 1.54869323124946e-05, "loss": 0.30872130393981934, "step": 2829 }, { "epoch": 0.6992834198171485, "grad_norm": 1.2154066313264353, "learning_rate": 1.548351600582128e-05, "loss": 0.34146320819854736, "step": 2830 }, { "epoch": 0.6995305164319249, "grad_norm": 1.3682463930909283, "learning_rate": 1.548009878373062e-05, "loss": 0.3861428499221802, "step": 2831 }, { "epoch": 0.6997776130467013, "grad_norm": 1.1827520745861362, "learning_rate": 1.5476680646793094e-05, "loss": 0.3001266121864319, "step": 2832 }, { "epoch": 0.7000247096614777, "grad_norm": 1.2654112391771304, "learning_rate": 1.5473261595579328e-05, "loss": 0.35385239124298096, "step": 2833 }, { "epoch": 0.700271806276254, "grad_norm": 1.2120669688104768, "learning_rate": 1.5469841630660093e-05, "loss": 0.30350446701049805, "step": 2834 }, { "epoch": 0.7005189028910304, "grad_norm": 1.3993713890277917, "learning_rate": 1.5466420752606324e-05, "loss": 0.4429166615009308, "step": 2835 }, { "epoch": 0.7007659995058068, "grad_norm": 1.2114845738111446, "learning_rate": 1.5462998961989093e-05, "loss": 0.3583378195762634, "step": 2836 }, { "epoch": 0.7010130961205832, "grad_norm": 1.2050019996178942, "learning_rate": 1.545957625937964e-05, "loss": 0.39391911029815674, "step": 2837 }, { "epoch": 0.7012601927353596, "grad_norm": 1.1679466964631697, "learning_rate": 1.5456152645349346e-05, "loss": 0.30182430148124695, "step": 2838 }, { "epoch": 0.701507289350136, "grad_norm": 1.1715564187482232, "learning_rate": 1.5452728120469747e-05, "loss": 0.30723726749420166, "step": 2839 }, { "epoch": 0.7017543859649122, "grad_norm": 1.1482235106101946, "learning_rate": 1.544930268531254e-05, "loss": 0.3711516261100769, "step": 2840 }, { "epoch": 0.7020014825796886, "grad_norm": 1.3145899041505216, "learning_rate": 1.5445876340449567e-05, "loss": 0.33330637216567993, "step": 2841 }, { "epoch": 0.702248579194465, "grad_norm": 1.031494671207003, "learning_rate": 1.5442449086452815e-05, "loss": 0.3268534541130066, "step": 2842 }, { "epoch": 0.7024956758092414, "grad_norm": 1.3073858890492032, "learning_rate": 1.5439020923894432e-05, "loss": 0.3702878952026367, "step": 2843 }, { "epoch": 0.7027427724240178, "grad_norm": 1.268081027787257, "learning_rate": 1.5435591853346714e-05, "loss": 0.32094863057136536, "step": 2844 }, { "epoch": 0.7029898690387941, "grad_norm": 1.2699267697087313, "learning_rate": 1.543216187538211e-05, "loss": 0.35776638984680176, "step": 2845 }, { "epoch": 0.7032369656535705, "grad_norm": 1.14086384734702, "learning_rate": 1.5428730990573223e-05, "loss": 0.34912192821502686, "step": 2846 }, { "epoch": 0.7034840622683469, "grad_norm": 1.1930064282909896, "learning_rate": 1.5425299199492797e-05, "loss": 0.3144100308418274, "step": 2847 }, { "epoch": 0.7037311588831233, "grad_norm": 1.0543528107409943, "learning_rate": 1.542186650271374e-05, "loss": 0.2927097976207733, "step": 2848 }, { "epoch": 0.7039782554978997, "grad_norm": 1.232316172792104, "learning_rate": 1.541843290080911e-05, "loss": 0.37727147340774536, "step": 2849 }, { "epoch": 0.704225352112676, "grad_norm": 1.23195575114384, "learning_rate": 1.5414998394352105e-05, "loss": 0.3103561997413635, "step": 2850 }, { "epoch": 0.7044724487274524, "grad_norm": 1.2770047752503615, "learning_rate": 1.541156298391608e-05, "loss": 0.41665372252464294, "step": 2851 }, { "epoch": 0.7047195453422288, "grad_norm": 1.184265161421334, "learning_rate": 1.5408126670074548e-05, "loss": 0.3631194829940796, "step": 2852 }, { "epoch": 0.7049666419570052, "grad_norm": 1.084708867195712, "learning_rate": 1.5404689453401163e-05, "loss": 0.2936696410179138, "step": 2853 }, { "epoch": 0.7052137385717816, "grad_norm": 1.1442777813869842, "learning_rate": 1.5401251334469733e-05, "loss": 0.3700244426727295, "step": 2854 }, { "epoch": 0.705460835186558, "grad_norm": 1.2003476815287548, "learning_rate": 1.539781231385422e-05, "loss": 0.3177962899208069, "step": 2855 }, { "epoch": 0.7057079318013343, "grad_norm": 1.2946900962153756, "learning_rate": 1.5394372392128732e-05, "loss": 0.3400028347969055, "step": 2856 }, { "epoch": 0.7059550284161107, "grad_norm": 1.190698987448686, "learning_rate": 1.5390931569867528e-05, "loss": 0.34319812059402466, "step": 2857 }, { "epoch": 0.7062021250308871, "grad_norm": 1.1263700658369815, "learning_rate": 1.538748984764502e-05, "loss": 0.32456088066101074, "step": 2858 }, { "epoch": 0.7064492216456635, "grad_norm": 1.2154120082395625, "learning_rate": 1.538404722603577e-05, "loss": 0.3442002236843109, "step": 2859 }, { "epoch": 0.7066963182604399, "grad_norm": 1.233768734014814, "learning_rate": 1.5380603705614486e-05, "loss": 0.37653806805610657, "step": 2860 }, { "epoch": 0.7069434148752162, "grad_norm": 1.206644209108322, "learning_rate": 1.5377159286956026e-05, "loss": 0.393071711063385, "step": 2861 }, { "epoch": 0.7071905114899926, "grad_norm": 1.2766316060157816, "learning_rate": 1.5373713970635407e-05, "loss": 0.402737021446228, "step": 2862 }, { "epoch": 0.707437608104769, "grad_norm": 1.2814091057583699, "learning_rate": 1.5370267757227787e-05, "loss": 0.43892452120780945, "step": 2863 }, { "epoch": 0.7076847047195454, "grad_norm": 1.2448410061600648, "learning_rate": 1.5366820647308475e-05, "loss": 0.37757977843284607, "step": 2864 }, { "epoch": 0.7079318013343218, "grad_norm": 1.086571336768437, "learning_rate": 1.536337264145293e-05, "loss": 0.31933465600013733, "step": 2865 }, { "epoch": 0.708178897949098, "grad_norm": 1.265338063045137, "learning_rate": 1.5359923740236765e-05, "loss": 0.41716665029525757, "step": 2866 }, { "epoch": 0.7084259945638745, "grad_norm": 1.1556239919543851, "learning_rate": 1.535647394423574e-05, "loss": 0.3493465781211853, "step": 2867 }, { "epoch": 0.7086730911786508, "grad_norm": 1.1552313133085863, "learning_rate": 1.5353023254025753e-05, "loss": 0.2852872610092163, "step": 2868 }, { "epoch": 0.7089201877934272, "grad_norm": 1.1913513598799406, "learning_rate": 1.534957167018288e-05, "loss": 0.3813187777996063, "step": 2869 }, { "epoch": 0.7091672844082036, "grad_norm": 1.1977154860801646, "learning_rate": 1.5346119193283314e-05, "loss": 0.4046867787837982, "step": 2870 }, { "epoch": 0.7094143810229799, "grad_norm": 1.2042150536001865, "learning_rate": 1.5342665823903414e-05, "loss": 0.42741701006889343, "step": 2871 }, { "epoch": 0.7096614776377563, "grad_norm": 1.1279666884322734, "learning_rate": 1.5339211562619687e-05, "loss": 0.37808412313461304, "step": 2872 }, { "epoch": 0.7099085742525327, "grad_norm": 1.0899522828833215, "learning_rate": 1.533575641000878e-05, "loss": 0.3536792993545532, "step": 2873 }, { "epoch": 0.7101556708673091, "grad_norm": 1.1058677011088025, "learning_rate": 1.5332300366647507e-05, "loss": 0.3444337248802185, "step": 2874 }, { "epoch": 0.7104027674820855, "grad_norm": 1.176018117600925, "learning_rate": 1.532884343311281e-05, "loss": 0.3536263704299927, "step": 2875 }, { "epoch": 0.7106498640968619, "grad_norm": 1.045087071192406, "learning_rate": 1.5325385609981798e-05, "loss": 0.3013978898525238, "step": 2876 }, { "epoch": 0.7108969607116382, "grad_norm": 1.1500865167108978, "learning_rate": 1.532192689783171e-05, "loss": 0.3441227674484253, "step": 2877 }, { "epoch": 0.7111440573264146, "grad_norm": 1.0877680793394553, "learning_rate": 1.5318467297239947e-05, "loss": 0.36121052503585815, "step": 2878 }, { "epoch": 0.711391153941191, "grad_norm": 1.2110045768058262, "learning_rate": 1.5315006808784058e-05, "loss": 0.32612884044647217, "step": 2879 }, { "epoch": 0.7116382505559674, "grad_norm": 1.207838872878121, "learning_rate": 1.5311545433041722e-05, "loss": 0.385373055934906, "step": 2880 }, { "epoch": 0.7118853471707438, "grad_norm": 1.0977958174329387, "learning_rate": 1.53080831705908e-05, "loss": 0.3405875265598297, "step": 2881 }, { "epoch": 0.7121324437855201, "grad_norm": 1.1851104688955867, "learning_rate": 1.530462002200927e-05, "loss": 0.34368377923965454, "step": 2882 }, { "epoch": 0.7123795404002965, "grad_norm": 1.0286524638632026, "learning_rate": 1.5301155987875277e-05, "loss": 0.32639893889427185, "step": 2883 }, { "epoch": 0.7126266370150729, "grad_norm": 1.236741450619732, "learning_rate": 1.5297691068767095e-05, "loss": 0.3720354437828064, "step": 2884 }, { "epoch": 0.7128737336298493, "grad_norm": 1.1936328776651783, "learning_rate": 1.5294225265263162e-05, "loss": 0.3575345575809479, "step": 2885 }, { "epoch": 0.7131208302446257, "grad_norm": 1.1115469811071679, "learning_rate": 1.5290758577942067e-05, "loss": 0.32989388704299927, "step": 2886 }, { "epoch": 0.713367926859402, "grad_norm": 1.1809595408444837, "learning_rate": 1.5287291007382526e-05, "loss": 0.410249799489975, "step": 2887 }, { "epoch": 0.7136150234741784, "grad_norm": 1.2281202332494658, "learning_rate": 1.5283822554163423e-05, "loss": 0.3597467541694641, "step": 2888 }, { "epoch": 0.7138621200889548, "grad_norm": 1.022610052559893, "learning_rate": 1.5280353218863778e-05, "loss": 0.29504963755607605, "step": 2889 }, { "epoch": 0.7141092167037312, "grad_norm": 1.1265632750141945, "learning_rate": 1.5276883002062762e-05, "loss": 0.29801732301712036, "step": 2890 }, { "epoch": 0.7143563133185076, "grad_norm": 1.1393096943968388, "learning_rate": 1.5273411904339692e-05, "loss": 0.3185853958129883, "step": 2891 }, { "epoch": 0.7146034099332839, "grad_norm": 1.370426541546549, "learning_rate": 1.526993992627403e-05, "loss": 0.348358154296875, "step": 2892 }, { "epoch": 0.7148505065480603, "grad_norm": 1.0703982793553606, "learning_rate": 1.526646706844539e-05, "loss": 0.3058416247367859, "step": 2893 }, { "epoch": 0.7150976031628367, "grad_norm": 1.215953516932583, "learning_rate": 1.5262993331433533e-05, "loss": 0.3397080898284912, "step": 2894 }, { "epoch": 0.7153446997776131, "grad_norm": 1.1935954168858927, "learning_rate": 1.5259518715818356e-05, "loss": 0.3431982398033142, "step": 2895 }, { "epoch": 0.7155917963923895, "grad_norm": 1.1799772183502513, "learning_rate": 1.5256043222179924e-05, "loss": 0.32888364791870117, "step": 2896 }, { "epoch": 0.7158388930071659, "grad_norm": 1.1807472796242855, "learning_rate": 1.5252566851098425e-05, "loss": 0.29149967432022095, "step": 2897 }, { "epoch": 0.7160859896219421, "grad_norm": 1.2905075446593717, "learning_rate": 1.5249089603154206e-05, "loss": 0.33298182487487793, "step": 2898 }, { "epoch": 0.7163330862367185, "grad_norm": 1.130594416204695, "learning_rate": 1.5245611478927758e-05, "loss": 0.3410637378692627, "step": 2899 }, { "epoch": 0.7165801828514949, "grad_norm": 1.2703771108544415, "learning_rate": 1.5242132478999722e-05, "loss": 0.3732583820819855, "step": 2900 }, { "epoch": 0.7168272794662713, "grad_norm": 1.1448812153101313, "learning_rate": 1.5238652603950877e-05, "loss": 0.3473847508430481, "step": 2901 }, { "epoch": 0.7170743760810477, "grad_norm": 1.1648844901664481, "learning_rate": 1.5235171854362154e-05, "loss": 0.34788191318511963, "step": 2902 }, { "epoch": 0.717321472695824, "grad_norm": 1.2015398263436656, "learning_rate": 1.5231690230814633e-05, "loss": 0.3636695146560669, "step": 2903 }, { "epoch": 0.7175685693106004, "grad_norm": 1.2544424991920744, "learning_rate": 1.522820773388953e-05, "loss": 0.3119182288646698, "step": 2904 }, { "epoch": 0.7178156659253768, "grad_norm": 1.2929550278062927, "learning_rate": 1.5224724364168215e-05, "loss": 0.3664253354072571, "step": 2905 }, { "epoch": 0.7180627625401532, "grad_norm": 1.1568024917590984, "learning_rate": 1.5221240122232201e-05, "loss": 0.3471584618091583, "step": 2906 }, { "epoch": 0.7183098591549296, "grad_norm": 1.2878976178542194, "learning_rate": 1.5217755008663149e-05, "loss": 0.394331693649292, "step": 2907 }, { "epoch": 0.7185569557697059, "grad_norm": 1.1133286894192533, "learning_rate": 1.521426902404286e-05, "loss": 0.3574444651603699, "step": 2908 }, { "epoch": 0.7188040523844823, "grad_norm": 1.1305189291938185, "learning_rate": 1.5210782168953284e-05, "loss": 0.36834877729415894, "step": 2909 }, { "epoch": 0.7190511489992587, "grad_norm": 1.1579379151239368, "learning_rate": 1.5207294443976518e-05, "loss": 0.26288101077079773, "step": 2910 }, { "epoch": 0.7192982456140351, "grad_norm": 1.2148550531199898, "learning_rate": 1.5203805849694805e-05, "loss": 0.3458217978477478, "step": 2911 }, { "epoch": 0.7195453422288115, "grad_norm": 1.1251269988687065, "learning_rate": 1.520031638669052e-05, "loss": 0.35527265071868896, "step": 2912 }, { "epoch": 0.7197924388435878, "grad_norm": 1.238258201047935, "learning_rate": 1.5196826055546203e-05, "loss": 0.3602432608604431, "step": 2913 }, { "epoch": 0.7200395354583642, "grad_norm": 1.2001180730319445, "learning_rate": 1.5193334856844528e-05, "loss": 0.3806783854961395, "step": 2914 }, { "epoch": 0.7202866320731406, "grad_norm": 1.185860159867311, "learning_rate": 1.5189842791168314e-05, "loss": 0.3539336025714874, "step": 2915 }, { "epoch": 0.720533728687917, "grad_norm": 1.2028374531767, "learning_rate": 1.5186349859100524e-05, "loss": 0.33268076181411743, "step": 2916 }, { "epoch": 0.7207808253026934, "grad_norm": 1.2314994825971306, "learning_rate": 1.5182856061224271e-05, "loss": 0.38219448924064636, "step": 2917 }, { "epoch": 0.7210279219174698, "grad_norm": 1.1561338901138791, "learning_rate": 1.5179361398122808e-05, "loss": 0.334580659866333, "step": 2918 }, { "epoch": 0.7212750185322461, "grad_norm": 1.1920599856460465, "learning_rate": 1.5175865870379529e-05, "loss": 0.3335151672363281, "step": 2919 }, { "epoch": 0.7215221151470225, "grad_norm": 1.236236549168554, "learning_rate": 1.5172369478577982e-05, "loss": 0.36183151602745056, "step": 2920 }, { "epoch": 0.7217692117617989, "grad_norm": 1.299225036499463, "learning_rate": 1.5168872223301856e-05, "loss": 0.4153488576412201, "step": 2921 }, { "epoch": 0.7220163083765753, "grad_norm": 1.1103765705694757, "learning_rate": 1.5165374105134977e-05, "loss": 0.2619406580924988, "step": 2922 }, { "epoch": 0.7222634049913517, "grad_norm": 1.106946207170238, "learning_rate": 1.5161875124661323e-05, "loss": 0.29545527696609497, "step": 2923 }, { "epoch": 0.722510501606128, "grad_norm": 1.2107871252577163, "learning_rate": 1.5158375282465012e-05, "loss": 0.3217892050743103, "step": 2924 }, { "epoch": 0.7227575982209044, "grad_norm": 1.0842989747503458, "learning_rate": 1.5154874579130309e-05, "loss": 0.2885577082633972, "step": 2925 }, { "epoch": 0.7230046948356808, "grad_norm": 1.2670140420410878, "learning_rate": 1.5151373015241617e-05, "loss": 0.353529155254364, "step": 2926 }, { "epoch": 0.7232517914504571, "grad_norm": 1.316504020103273, "learning_rate": 1.5147870591383492e-05, "loss": 0.3768170475959778, "step": 2927 }, { "epoch": 0.7234988880652335, "grad_norm": 1.2101787701711124, "learning_rate": 1.5144367308140627e-05, "loss": 0.34705209732055664, "step": 2928 }, { "epoch": 0.7237459846800098, "grad_norm": 1.3132671220919943, "learning_rate": 1.5140863166097855e-05, "loss": 0.3677259683609009, "step": 2929 }, { "epoch": 0.7239930812947862, "grad_norm": 1.3866031569765043, "learning_rate": 1.5137358165840159e-05, "loss": 0.3985472321510315, "step": 2930 }, { "epoch": 0.7242401779095626, "grad_norm": 1.2495221124542903, "learning_rate": 1.5133852307952666e-05, "loss": 0.39597827196121216, "step": 2931 }, { "epoch": 0.724487274524339, "grad_norm": 1.1125999341107355, "learning_rate": 1.513034559302064e-05, "loss": 0.36262422800064087, "step": 2932 }, { "epoch": 0.7247343711391154, "grad_norm": 1.2198416576896025, "learning_rate": 1.5126838021629496e-05, "loss": 0.36834466457366943, "step": 2933 }, { "epoch": 0.7249814677538917, "grad_norm": 1.1997264399135814, "learning_rate": 1.5123329594364787e-05, "loss": 0.36133843660354614, "step": 2934 }, { "epoch": 0.7252285643686681, "grad_norm": 1.1612765289892393, "learning_rate": 1.51198203118122e-05, "loss": 0.3454573452472687, "step": 2935 }, { "epoch": 0.7254756609834445, "grad_norm": 1.161790623836711, "learning_rate": 1.5116310174557582e-05, "loss": 0.30950725078582764, "step": 2936 }, { "epoch": 0.7257227575982209, "grad_norm": 1.126994372171724, "learning_rate": 1.5112799183186915e-05, "loss": 0.3134285509586334, "step": 2937 }, { "epoch": 0.7259698542129973, "grad_norm": 1.2289485458954477, "learning_rate": 1.5109287338286325e-05, "loss": 0.33443155884742737, "step": 2938 }, { "epoch": 0.7262169508277737, "grad_norm": 1.1546809555624218, "learning_rate": 1.5105774640442073e-05, "loss": 0.3478814363479614, "step": 2939 }, { "epoch": 0.72646404744255, "grad_norm": 1.1828393399616548, "learning_rate": 1.5102261090240569e-05, "loss": 0.35717102885246277, "step": 2940 }, { "epoch": 0.7267111440573264, "grad_norm": 1.2653575146193945, "learning_rate": 1.509874668826837e-05, "loss": 0.3716880679130554, "step": 2941 }, { "epoch": 0.7269582406721028, "grad_norm": 1.207008685499738, "learning_rate": 1.5095231435112165e-05, "loss": 0.3466199040412903, "step": 2942 }, { "epoch": 0.7272053372868792, "grad_norm": 1.2192772726668437, "learning_rate": 1.5091715331358792e-05, "loss": 0.351062536239624, "step": 2943 }, { "epoch": 0.7274524339016556, "grad_norm": 1.240895060257169, "learning_rate": 1.5088198377595226e-05, "loss": 0.3346995413303375, "step": 2944 }, { "epoch": 0.7276995305164319, "grad_norm": 1.130972232068014, "learning_rate": 1.5084680574408595e-05, "loss": 0.31152331829071045, "step": 2945 }, { "epoch": 0.7279466271312083, "grad_norm": 1.1220792952606287, "learning_rate": 1.5081161922386147e-05, "loss": 0.351888507604599, "step": 2946 }, { "epoch": 0.7281937237459847, "grad_norm": 1.3026229567819263, "learning_rate": 1.5077642422115296e-05, "loss": 0.33895063400268555, "step": 2947 }, { "epoch": 0.7284408203607611, "grad_norm": 1.3723608550240969, "learning_rate": 1.5074122074183583e-05, "loss": 0.3704664707183838, "step": 2948 }, { "epoch": 0.7286879169755375, "grad_norm": 1.1994189529519028, "learning_rate": 1.5070600879178694e-05, "loss": 0.3651512861251831, "step": 2949 }, { "epoch": 0.7289350135903138, "grad_norm": 1.059185367173951, "learning_rate": 1.5067078837688458e-05, "loss": 0.30728334188461304, "step": 2950 }, { "epoch": 0.7291821102050902, "grad_norm": 1.2108174647959398, "learning_rate": 1.5063555950300844e-05, "loss": 0.332084059715271, "step": 2951 }, { "epoch": 0.7294292068198666, "grad_norm": 1.2953215258969348, "learning_rate": 1.5060032217603961e-05, "loss": 0.36325857043266296, "step": 2952 }, { "epoch": 0.729676303434643, "grad_norm": 1.1645698812490104, "learning_rate": 1.5056507640186065e-05, "loss": 0.3632184565067291, "step": 2953 }, { "epoch": 0.7299234000494194, "grad_norm": 1.4194433603498513, "learning_rate": 1.5052982218635542e-05, "loss": 0.35636967420578003, "step": 2954 }, { "epoch": 0.7301704966641956, "grad_norm": 1.1492245610593714, "learning_rate": 1.5049455953540926e-05, "loss": 0.3140638768672943, "step": 2955 }, { "epoch": 0.730417593278972, "grad_norm": 2.8941506657568885, "learning_rate": 1.5045928845490894e-05, "loss": 0.3361854553222656, "step": 2956 }, { "epoch": 0.7306646898937484, "grad_norm": 1.292395580284491, "learning_rate": 1.5042400895074262e-05, "loss": 0.3780355453491211, "step": 2957 }, { "epoch": 0.7309117865085248, "grad_norm": 1.3553434673284455, "learning_rate": 1.5038872102879982e-05, "loss": 0.3888869285583496, "step": 2958 }, { "epoch": 0.7311588831233012, "grad_norm": 1.178053647007653, "learning_rate": 1.5035342469497155e-05, "loss": 0.3572891056537628, "step": 2959 }, { "epoch": 0.7314059797380776, "grad_norm": 1.0768392221708885, "learning_rate": 1.5031811995515012e-05, "loss": 0.32120829820632935, "step": 2960 }, { "epoch": 0.7316530763528539, "grad_norm": 0.9828104814093173, "learning_rate": 1.5028280681522931e-05, "loss": 0.3085906505584717, "step": 2961 }, { "epoch": 0.7319001729676303, "grad_norm": 1.3234612355311794, "learning_rate": 1.5024748528110431e-05, "loss": 0.3929934799671173, "step": 2962 }, { "epoch": 0.7321472695824067, "grad_norm": 1.2020322139315953, "learning_rate": 1.5021215535867166e-05, "loss": 0.36437875032424927, "step": 2963 }, { "epoch": 0.7323943661971831, "grad_norm": 1.2217513830267077, "learning_rate": 1.5017681705382938e-05, "loss": 0.345183789730072, "step": 2964 }, { "epoch": 0.7326414628119595, "grad_norm": 1.2172414819878001, "learning_rate": 1.5014147037247682e-05, "loss": 0.32615193724632263, "step": 2965 }, { "epoch": 0.7328885594267358, "grad_norm": 1.20623687878146, "learning_rate": 1.5010611532051474e-05, "loss": 0.3366418182849884, "step": 2966 }, { "epoch": 0.7331356560415122, "grad_norm": 1.421480886156197, "learning_rate": 1.5007075190384528e-05, "loss": 0.3098849654197693, "step": 2967 }, { "epoch": 0.7333827526562886, "grad_norm": 1.2997359138999505, "learning_rate": 1.5003538012837206e-05, "loss": 0.3586621880531311, "step": 2968 }, { "epoch": 0.733629849271065, "grad_norm": 1.2405496893039034, "learning_rate": 1.5000000000000002e-05, "loss": 0.368030309677124, "step": 2969 }, { "epoch": 0.7338769458858414, "grad_norm": 1.0622570021285491, "learning_rate": 1.4996461152463548e-05, "loss": 0.2773520350456238, "step": 2970 }, { "epoch": 0.7341240425006177, "grad_norm": 1.1929570419608764, "learning_rate": 1.4992921470818625e-05, "loss": 0.35108238458633423, "step": 2971 }, { "epoch": 0.7343711391153941, "grad_norm": 1.1602463900361193, "learning_rate": 1.498938095565614e-05, "loss": 0.28860265016555786, "step": 2972 }, { "epoch": 0.7346182357301705, "grad_norm": 1.1806233394993015, "learning_rate": 1.4985839607567151e-05, "loss": 0.32598596811294556, "step": 2973 }, { "epoch": 0.7348653323449469, "grad_norm": 1.4300622106597412, "learning_rate": 1.498229742714285e-05, "loss": 0.36551401019096375, "step": 2974 }, { "epoch": 0.7351124289597233, "grad_norm": 1.2588670674134883, "learning_rate": 1.4978754414974564e-05, "loss": 0.34101903438568115, "step": 2975 }, { "epoch": 0.7353595255744997, "grad_norm": 1.4029415512751593, "learning_rate": 1.4975210571653767e-05, "loss": 0.4341028928756714, "step": 2976 }, { "epoch": 0.735606622189276, "grad_norm": 1.351341223296773, "learning_rate": 1.4971665897772062e-05, "loss": 0.4601435959339142, "step": 2977 }, { "epoch": 0.7358537188040524, "grad_norm": 1.296411683096495, "learning_rate": 1.4968120393921206e-05, "loss": 0.34837204217910767, "step": 2978 }, { "epoch": 0.7361008154188288, "grad_norm": 1.4629893295332073, "learning_rate": 1.4964574060693078e-05, "loss": 0.3802284002304077, "step": 2979 }, { "epoch": 0.7363479120336052, "grad_norm": 1.0966425305787273, "learning_rate": 1.4961026898679703e-05, "loss": 0.28037264943122864, "step": 2980 }, { "epoch": 0.7365950086483816, "grad_norm": 1.077625526209257, "learning_rate": 1.4957478908473241e-05, "loss": 0.32766425609588623, "step": 2981 }, { "epoch": 0.7368421052631579, "grad_norm": 1.2656120258247077, "learning_rate": 1.4953930090666003e-05, "loss": 0.3291098475456238, "step": 2982 }, { "epoch": 0.7370892018779343, "grad_norm": 1.158728736665764, "learning_rate": 1.4950380445850417e-05, "loss": 0.3069840669631958, "step": 2983 }, { "epoch": 0.7373362984927107, "grad_norm": 1.1155899413283508, "learning_rate": 1.4946829974619065e-05, "loss": 0.3191441297531128, "step": 2984 }, { "epoch": 0.737583395107487, "grad_norm": 1.194083660798679, "learning_rate": 1.4943278677564663e-05, "loss": 0.3771979808807373, "step": 2985 }, { "epoch": 0.7378304917222634, "grad_norm": 1.1821514460523177, "learning_rate": 1.4939726555280061e-05, "loss": 0.34255191683769226, "step": 2986 }, { "epoch": 0.7380775883370397, "grad_norm": 1.2776321145356055, "learning_rate": 1.4936173608358254e-05, "loss": 0.3950074315071106, "step": 2987 }, { "epoch": 0.7383246849518161, "grad_norm": 1.1320817730851898, "learning_rate": 1.4932619837392366e-05, "loss": 0.35276150703430176, "step": 2988 }, { "epoch": 0.7385717815665925, "grad_norm": 1.1218240054458328, "learning_rate": 1.4929065242975665e-05, "loss": 0.3562018871307373, "step": 2989 }, { "epoch": 0.7388188781813689, "grad_norm": 1.2335661951090253, "learning_rate": 1.4925509825701557e-05, "loss": 0.36089086532592773, "step": 2990 }, { "epoch": 0.7390659747961453, "grad_norm": 1.3025820278546578, "learning_rate": 1.4921953586163578e-05, "loss": 0.3667137920856476, "step": 2991 }, { "epoch": 0.7393130714109216, "grad_norm": 1.1265734864683132, "learning_rate": 1.4918396524955405e-05, "loss": 0.33688706159591675, "step": 2992 }, { "epoch": 0.739560168025698, "grad_norm": 1.0132318875117974, "learning_rate": 1.4914838642670862e-05, "loss": 0.28503331542015076, "step": 2993 }, { "epoch": 0.7398072646404744, "grad_norm": 1.3791277828052244, "learning_rate": 1.4911279939903893e-05, "loss": 0.3797142505645752, "step": 2994 }, { "epoch": 0.7400543612552508, "grad_norm": 1.0421709198067126, "learning_rate": 1.4907720417248588e-05, "loss": 0.29266923666000366, "step": 2995 }, { "epoch": 0.7403014578700272, "grad_norm": 1.1492859150278594, "learning_rate": 1.4904160075299178e-05, "loss": 0.33739277720451355, "step": 2996 }, { "epoch": 0.7405485544848036, "grad_norm": 1.1676847130348371, "learning_rate": 1.4900598914650024e-05, "loss": 0.33820247650146484, "step": 2997 }, { "epoch": 0.7407956510995799, "grad_norm": 1.1790750001582169, "learning_rate": 1.4897036935895626e-05, "loss": 0.3428526520729065, "step": 2998 }, { "epoch": 0.7410427477143563, "grad_norm": 1.2546735703608762, "learning_rate": 1.4893474139630614e-05, "loss": 0.35213521122932434, "step": 2999 }, { "epoch": 0.7412898443291327, "grad_norm": 1.1592821934318114, "learning_rate": 1.4889910526449771e-05, "loss": 0.31846022605895996, "step": 3000 }, { "epoch": 0.7415369409439091, "grad_norm": 1.230109077405123, "learning_rate": 1.4886346096947999e-05, "loss": 0.3641698360443115, "step": 3001 }, { "epoch": 0.7417840375586855, "grad_norm": 1.2574372228502098, "learning_rate": 1.4882780851720344e-05, "loss": 0.30185359716415405, "step": 3002 }, { "epoch": 0.7420311341734618, "grad_norm": 1.261428837386932, "learning_rate": 1.4879214791361993e-05, "loss": 0.34517529606819153, "step": 3003 }, { "epoch": 0.7422782307882382, "grad_norm": 1.100250941853395, "learning_rate": 1.4875647916468255e-05, "loss": 0.2941664159297943, "step": 3004 }, { "epoch": 0.7425253274030146, "grad_norm": 1.1075135469834656, "learning_rate": 1.4872080227634588e-05, "loss": 0.3378065824508667, "step": 3005 }, { "epoch": 0.742772424017791, "grad_norm": 1.1150814889973508, "learning_rate": 1.486851172545658e-05, "loss": 0.3089839518070221, "step": 3006 }, { "epoch": 0.7430195206325674, "grad_norm": 1.3295878085511688, "learning_rate": 1.4864942410529961e-05, "loss": 0.3885310888290405, "step": 3007 }, { "epoch": 0.7432666172473437, "grad_norm": 1.1845895197166438, "learning_rate": 1.4861372283450589e-05, "loss": 0.3384432792663574, "step": 3008 }, { "epoch": 0.7435137138621201, "grad_norm": 1.292548704144376, "learning_rate": 1.4857801344814456e-05, "loss": 0.33969974517822266, "step": 3009 }, { "epoch": 0.7437608104768965, "grad_norm": 1.310520468221305, "learning_rate": 1.48542295952177e-05, "loss": 0.3511858284473419, "step": 3010 }, { "epoch": 0.7440079070916729, "grad_norm": 1.158383169105221, "learning_rate": 1.4850657035256583e-05, "loss": 0.331304669380188, "step": 3011 }, { "epoch": 0.7442550037064493, "grad_norm": 1.3108613341002981, "learning_rate": 1.4847083665527516e-05, "loss": 0.4151220917701721, "step": 3012 }, { "epoch": 0.7445021003212255, "grad_norm": 1.2710168441489627, "learning_rate": 1.4843509486627027e-05, "loss": 0.3359023332595825, "step": 3013 }, { "epoch": 0.744749196936002, "grad_norm": 1.0956619857744683, "learning_rate": 1.4839934499151796e-05, "loss": 0.32031798362731934, "step": 3014 }, { "epoch": 0.7449962935507783, "grad_norm": 1.1606266979504278, "learning_rate": 1.4836358703698627e-05, "loss": 0.3142626881599426, "step": 3015 }, { "epoch": 0.7452433901655547, "grad_norm": 1.080590073620996, "learning_rate": 1.4832782100864465e-05, "loss": 0.3082689642906189, "step": 3016 }, { "epoch": 0.7454904867803311, "grad_norm": 1.147330983709119, "learning_rate": 1.4829204691246388e-05, "loss": 0.36272507905960083, "step": 3017 }, { "epoch": 0.7457375833951075, "grad_norm": 1.3422662276446335, "learning_rate": 1.4825626475441603e-05, "loss": 0.339526504278183, "step": 3018 }, { "epoch": 0.7459846800098838, "grad_norm": 1.1015038474168142, "learning_rate": 1.4822047454047463e-05, "loss": 0.3607363700866699, "step": 3019 }, { "epoch": 0.7462317766246602, "grad_norm": 1.1999075979072344, "learning_rate": 1.4818467627661446e-05, "loss": 0.3478200435638428, "step": 3020 }, { "epoch": 0.7464788732394366, "grad_norm": 1.2462270382296678, "learning_rate": 1.4814886996881171e-05, "loss": 0.37197035551071167, "step": 3021 }, { "epoch": 0.746725969854213, "grad_norm": 1.0018998564488621, "learning_rate": 1.4811305562304383e-05, "loss": 0.31594884395599365, "step": 3022 }, { "epoch": 0.7469730664689894, "grad_norm": 1.2678399512194005, "learning_rate": 1.4807723324528968e-05, "loss": 0.3363671898841858, "step": 3023 }, { "epoch": 0.7472201630837657, "grad_norm": 1.3443542365997865, "learning_rate": 1.480414028415295e-05, "loss": 0.33778661489486694, "step": 3024 }, { "epoch": 0.7474672596985421, "grad_norm": 1.128414201200269, "learning_rate": 1.4800556441774472e-05, "loss": 0.3170180320739746, "step": 3025 }, { "epoch": 0.7477143563133185, "grad_norm": 1.0876067048889098, "learning_rate": 1.4796971797991826e-05, "loss": 0.33891546726226807, "step": 3026 }, { "epoch": 0.7479614529280949, "grad_norm": 1.116863655394385, "learning_rate": 1.4793386353403424e-05, "loss": 0.3872910737991333, "step": 3027 }, { "epoch": 0.7482085495428713, "grad_norm": 1.1403254553033986, "learning_rate": 1.4789800108607835e-05, "loss": 0.3490116000175476, "step": 3028 }, { "epoch": 0.7484556461576476, "grad_norm": 1.23967887406427, "learning_rate": 1.4786213064203732e-05, "loss": 0.3419932723045349, "step": 3029 }, { "epoch": 0.748702742772424, "grad_norm": 1.1213896935349223, "learning_rate": 1.4782625220789939e-05, "loss": 0.3422098755836487, "step": 3030 }, { "epoch": 0.7489498393872004, "grad_norm": 1.1163168651737017, "learning_rate": 1.4779036578965417e-05, "loss": 0.3260084390640259, "step": 3031 }, { "epoch": 0.7491969360019768, "grad_norm": 1.093909449418695, "learning_rate": 1.4775447139329243e-05, "loss": 0.3346729874610901, "step": 3032 }, { "epoch": 0.7494440326167532, "grad_norm": 1.235723387182409, "learning_rate": 1.4771856902480645e-05, "loss": 0.4074220657348633, "step": 3033 }, { "epoch": 0.7496911292315295, "grad_norm": 1.099400187694459, "learning_rate": 1.4768265869018975e-05, "loss": 0.32242143154144287, "step": 3034 }, { "epoch": 0.7499382258463059, "grad_norm": 1.136772400795546, "learning_rate": 1.4764674039543718e-05, "loss": 0.29337742924690247, "step": 3035 }, { "epoch": 0.7501853224610823, "grad_norm": 1.2138106170917298, "learning_rate": 1.4761081414654493e-05, "loss": 0.3638605773448944, "step": 3036 }, { "epoch": 0.7504324190758587, "grad_norm": 1.1837368157595067, "learning_rate": 1.4757487994951051e-05, "loss": 0.400645911693573, "step": 3037 }, { "epoch": 0.7506795156906351, "grad_norm": 1.1946180740188588, "learning_rate": 1.4753893781033284e-05, "loss": 0.37379005551338196, "step": 3038 }, { "epoch": 0.7509266123054115, "grad_norm": 1.0854898306263228, "learning_rate": 1.4750298773501202e-05, "loss": 0.3791964054107666, "step": 3039 }, { "epoch": 0.7511737089201878, "grad_norm": 1.1903920720538084, "learning_rate": 1.4746702972954958e-05, "loss": 0.37493079900741577, "step": 3040 }, { "epoch": 0.7514208055349642, "grad_norm": 1.3375300643928072, "learning_rate": 1.4743106379994836e-05, "loss": 0.3868149518966675, "step": 3041 }, { "epoch": 0.7516679021497406, "grad_norm": 1.0877707112854917, "learning_rate": 1.4739508995221248e-05, "loss": 0.35830602049827576, "step": 3042 }, { "epoch": 0.751914998764517, "grad_norm": 1.2289300051534258, "learning_rate": 1.473591081923474e-05, "loss": 0.3487023711204529, "step": 3043 }, { "epoch": 0.7521620953792933, "grad_norm": 1.0154154963565027, "learning_rate": 1.4732311852635994e-05, "loss": 0.28325292468070984, "step": 3044 }, { "epoch": 0.7524091919940696, "grad_norm": 1.1966738399500356, "learning_rate": 1.4728712096025821e-05, "loss": 0.3167914152145386, "step": 3045 }, { "epoch": 0.752656288608846, "grad_norm": 1.1026090184427115, "learning_rate": 1.472511155000516e-05, "loss": 0.35487499833106995, "step": 3046 }, { "epoch": 0.7529033852236224, "grad_norm": 1.2190595374515292, "learning_rate": 1.472151021517509e-05, "loss": 0.3707568347454071, "step": 3047 }, { "epoch": 0.7531504818383988, "grad_norm": 1.5810868156367683, "learning_rate": 1.4717908092136817e-05, "loss": 0.35922771692276, "step": 3048 }, { "epoch": 0.7533975784531752, "grad_norm": 1.142078211084381, "learning_rate": 1.4714305181491675e-05, "loss": 0.3233216404914856, "step": 3049 }, { "epoch": 0.7536446750679515, "grad_norm": 1.1297837191687028, "learning_rate": 1.4710701483841134e-05, "loss": 0.329418420791626, "step": 3050 }, { "epoch": 0.7538917716827279, "grad_norm": 1.5490056812549227, "learning_rate": 1.47070969997868e-05, "loss": 0.31919771432876587, "step": 3051 }, { "epoch": 0.7541388682975043, "grad_norm": 1.1221721723617095, "learning_rate": 1.4703491729930401e-05, "loss": 0.2857627272605896, "step": 3052 }, { "epoch": 0.7543859649122807, "grad_norm": 1.025175124637147, "learning_rate": 1.4699885674873797e-05, "loss": 0.2764459252357483, "step": 3053 }, { "epoch": 0.7546330615270571, "grad_norm": 1.1403399425821328, "learning_rate": 1.469627883521899e-05, "loss": 0.307733952999115, "step": 3054 }, { "epoch": 0.7548801581418334, "grad_norm": 1.1853155498634904, "learning_rate": 1.4692671211568101e-05, "loss": 0.35906338691711426, "step": 3055 }, { "epoch": 0.7551272547566098, "grad_norm": 1.1320581919690702, "learning_rate": 1.4689062804523387e-05, "loss": 0.3328098654747009, "step": 3056 }, { "epoch": 0.7553743513713862, "grad_norm": 1.1336851013370652, "learning_rate": 1.468545361468723e-05, "loss": 0.3009733557701111, "step": 3057 }, { "epoch": 0.7556214479861626, "grad_norm": 1.087476276228649, "learning_rate": 1.4681843642662154e-05, "loss": 0.30993086099624634, "step": 3058 }, { "epoch": 0.755868544600939, "grad_norm": 1.2983018057949502, "learning_rate": 1.4678232889050808e-05, "loss": 0.3852180242538452, "step": 3059 }, { "epoch": 0.7561156412157154, "grad_norm": 1.413072249598852, "learning_rate": 1.4674621354455962e-05, "loss": 0.3718680143356323, "step": 3060 }, { "epoch": 0.7563627378304917, "grad_norm": 1.1369066038692208, "learning_rate": 1.4671009039480535e-05, "loss": 0.3070686459541321, "step": 3061 }, { "epoch": 0.7566098344452681, "grad_norm": 1.173924242898298, "learning_rate": 1.4667395944727563e-05, "loss": 0.3172175884246826, "step": 3062 }, { "epoch": 0.7568569310600445, "grad_norm": 1.1328800175736458, "learning_rate": 1.4663782070800212e-05, "loss": 0.30395275354385376, "step": 3063 }, { "epoch": 0.7571040276748209, "grad_norm": 1.2345910795835306, "learning_rate": 1.4660167418301785e-05, "loss": 0.33544689416885376, "step": 3064 }, { "epoch": 0.7573511242895973, "grad_norm": 1.4419973819062903, "learning_rate": 1.465655198783571e-05, "loss": 0.4350890815258026, "step": 3065 }, { "epoch": 0.7575982209043736, "grad_norm": 1.2117125896612129, "learning_rate": 1.465293578000555e-05, "loss": 0.36704370379447937, "step": 3066 }, { "epoch": 0.75784531751915, "grad_norm": 1.1145148325100267, "learning_rate": 1.4649318795414986e-05, "loss": 0.3023114502429962, "step": 3067 }, { "epoch": 0.7580924141339264, "grad_norm": 1.1782975926466404, "learning_rate": 1.4645701034667848e-05, "loss": 0.3179556727409363, "step": 3068 }, { "epoch": 0.7583395107487028, "grad_norm": 1.1775552563758176, "learning_rate": 1.4642082498368075e-05, "loss": 0.39309558272361755, "step": 3069 }, { "epoch": 0.7585866073634792, "grad_norm": 1.0337186226788584, "learning_rate": 1.4638463187119752e-05, "loss": 0.2928007245063782, "step": 3070 }, { "epoch": 0.7588337039782554, "grad_norm": 1.1700905601481157, "learning_rate": 1.4634843101527079e-05, "loss": 0.3399061858654022, "step": 3071 }, { "epoch": 0.7590808005930318, "grad_norm": 1.0878608630067297, "learning_rate": 1.4631222242194399e-05, "loss": 0.3322938084602356, "step": 3072 }, { "epoch": 0.7593278972078082, "grad_norm": 1.2265244495749434, "learning_rate": 1.4627600609726171e-05, "loss": 0.34749066829681396, "step": 3073 }, { "epoch": 0.7595749938225846, "grad_norm": 1.1254200552415823, "learning_rate": 1.4623978204726997e-05, "loss": 0.32805436849594116, "step": 3074 }, { "epoch": 0.759822090437361, "grad_norm": 1.2210364097685773, "learning_rate": 1.4620355027801598e-05, "loss": 0.37652724981307983, "step": 3075 }, { "epoch": 0.7600691870521374, "grad_norm": 2.2663295468769347, "learning_rate": 1.4616731079554827e-05, "loss": 0.4228748679161072, "step": 3076 }, { "epoch": 0.7603162836669137, "grad_norm": 1.191594740415274, "learning_rate": 1.4613106360591662e-05, "loss": 0.36913347244262695, "step": 3077 }, { "epoch": 0.7605633802816901, "grad_norm": 1.1459980483375127, "learning_rate": 1.4609480871517217e-05, "loss": 0.32842010259628296, "step": 3078 }, { "epoch": 0.7608104768964665, "grad_norm": 1.2381357911050286, "learning_rate": 1.460585461293673e-05, "loss": 0.30114179849624634, "step": 3079 }, { "epoch": 0.7610575735112429, "grad_norm": 1.1432236834765228, "learning_rate": 1.4602227585455564e-05, "loss": 0.36561325192451477, "step": 3080 }, { "epoch": 0.7613046701260193, "grad_norm": 0.9874855736041074, "learning_rate": 1.4598599789679222e-05, "loss": 0.26331132650375366, "step": 3081 }, { "epoch": 0.7615517667407956, "grad_norm": 1.2273655934278809, "learning_rate": 1.4594971226213319e-05, "loss": 0.3540372848510742, "step": 3082 }, { "epoch": 0.761798863355572, "grad_norm": 1.3090672018963538, "learning_rate": 1.4591341895663613e-05, "loss": 0.4377685487270355, "step": 3083 }, { "epoch": 0.7620459599703484, "grad_norm": 1.1920541122579744, "learning_rate": 1.4587711798635984e-05, "loss": 0.32607388496398926, "step": 3084 }, { "epoch": 0.7622930565851248, "grad_norm": 1.3147364373811257, "learning_rate": 1.4584080935736435e-05, "loss": 0.3659108877182007, "step": 3085 }, { "epoch": 0.7625401531999012, "grad_norm": 1.1905345136225574, "learning_rate": 1.4580449307571108e-05, "loss": 0.3728267550468445, "step": 3086 }, { "epoch": 0.7627872498146775, "grad_norm": 1.2907174041219178, "learning_rate": 1.4576816914746262e-05, "loss": 0.38742685317993164, "step": 3087 }, { "epoch": 0.7630343464294539, "grad_norm": 1.2555183642093595, "learning_rate": 1.4573183757868287e-05, "loss": 0.3786296844482422, "step": 3088 }, { "epoch": 0.7632814430442303, "grad_norm": 1.1393632102512898, "learning_rate": 1.4569549837543704e-05, "loss": 0.3576888144016266, "step": 3089 }, { "epoch": 0.7635285396590067, "grad_norm": 1.1592775424875408, "learning_rate": 1.4565915154379163e-05, "loss": 0.3480481207370758, "step": 3090 }, { "epoch": 0.7637756362737831, "grad_norm": 1.2131683283483745, "learning_rate": 1.456227970898143e-05, "loss": 0.3448007106781006, "step": 3091 }, { "epoch": 0.7640227328885594, "grad_norm": 1.2032641512119426, "learning_rate": 1.4558643501957407e-05, "loss": 0.3451518416404724, "step": 3092 }, { "epoch": 0.7642698295033358, "grad_norm": 1.1396776218381148, "learning_rate": 1.4555006533914132e-05, "loss": 0.3157140612602234, "step": 3093 }, { "epoch": 0.7645169261181122, "grad_norm": 1.104549424400532, "learning_rate": 1.4551368805458746e-05, "loss": 0.338657408952713, "step": 3094 }, { "epoch": 0.7647640227328886, "grad_norm": 1.0684315167537917, "learning_rate": 1.4547730317198538e-05, "loss": 0.31079164147377014, "step": 3095 }, { "epoch": 0.765011119347665, "grad_norm": 1.1322003106694738, "learning_rate": 1.4544091069740914e-05, "loss": 0.3453037142753601, "step": 3096 }, { "epoch": 0.7652582159624414, "grad_norm": 1.1021093568744957, "learning_rate": 1.4540451063693413e-05, "loss": 0.3318374752998352, "step": 3097 }, { "epoch": 0.7655053125772177, "grad_norm": 1.2701321231151679, "learning_rate": 1.4536810299663696e-05, "loss": 0.44052833318710327, "step": 3098 }, { "epoch": 0.765752409191994, "grad_norm": 1.2507096311283254, "learning_rate": 1.4533168778259546e-05, "loss": 0.33810585737228394, "step": 3099 }, { "epoch": 0.7659995058067705, "grad_norm": 1.1871916072025999, "learning_rate": 1.4529526500088885e-05, "loss": 0.3836267590522766, "step": 3100 }, { "epoch": 0.7662466024215469, "grad_norm": 1.3337499871159872, "learning_rate": 1.4525883465759751e-05, "loss": 0.39637088775634766, "step": 3101 }, { "epoch": 0.7664936990363233, "grad_norm": 1.2528276711449078, "learning_rate": 1.4522239675880316e-05, "loss": 0.3227333426475525, "step": 3102 }, { "epoch": 0.7667407956510995, "grad_norm": 1.0306662186454831, "learning_rate": 1.4518595131058865e-05, "loss": 0.3101767897605896, "step": 3103 }, { "epoch": 0.7669878922658759, "grad_norm": 1.1290172015519833, "learning_rate": 1.4514949831903829e-05, "loss": 0.3454323709011078, "step": 3104 }, { "epoch": 0.7672349888806523, "grad_norm": 1.1848453187382437, "learning_rate": 1.4511303779023745e-05, "loss": 0.34395813941955566, "step": 3105 }, { "epoch": 0.7674820854954287, "grad_norm": 1.2942355976770694, "learning_rate": 1.4507656973027283e-05, "loss": 0.4113420844078064, "step": 3106 }, { "epoch": 0.7677291821102051, "grad_norm": 1.2324882707973173, "learning_rate": 1.4504009414523252e-05, "loss": 0.35573118925094604, "step": 3107 }, { "epoch": 0.7679762787249814, "grad_norm": 1.1049746401898959, "learning_rate": 1.4500361104120564e-05, "loss": 0.30965209007263184, "step": 3108 }, { "epoch": 0.7682233753397578, "grad_norm": 1.1403779349083214, "learning_rate": 1.449671204242827e-05, "loss": 0.3254874050617218, "step": 3109 }, { "epoch": 0.7684704719545342, "grad_norm": 1.202006436040822, "learning_rate": 1.4493062230055546e-05, "loss": 0.3534168601036072, "step": 3110 }, { "epoch": 0.7687175685693106, "grad_norm": 1.1807519087267164, "learning_rate": 1.4489411667611688e-05, "loss": 0.3358272910118103, "step": 3111 }, { "epoch": 0.768964665184087, "grad_norm": 1.2039903069675524, "learning_rate": 1.4485760355706125e-05, "loss": 0.34377139806747437, "step": 3112 }, { "epoch": 0.7692117617988633, "grad_norm": 1.1498981281335567, "learning_rate": 1.4482108294948399e-05, "loss": 0.35530179738998413, "step": 3113 }, { "epoch": 0.7694588584136397, "grad_norm": 1.1937895999266672, "learning_rate": 1.4478455485948193e-05, "loss": 0.36264729499816895, "step": 3114 }, { "epoch": 0.7697059550284161, "grad_norm": 1.2652421505375664, "learning_rate": 1.44748019293153e-05, "loss": 0.32908445596694946, "step": 3115 }, { "epoch": 0.7699530516431925, "grad_norm": 1.1166072917933583, "learning_rate": 1.4471147625659648e-05, "loss": 0.3389297127723694, "step": 3116 }, { "epoch": 0.7702001482579689, "grad_norm": 1.3205796151527598, "learning_rate": 1.4467492575591285e-05, "loss": 0.4015289545059204, "step": 3117 }, { "epoch": 0.7704472448727453, "grad_norm": 1.1224926906289046, "learning_rate": 1.4463836779720381e-05, "loss": 0.3006135821342468, "step": 3118 }, { "epoch": 0.7706943414875216, "grad_norm": 1.2713811433578388, "learning_rate": 1.4460180238657238e-05, "loss": 0.37171339988708496, "step": 3119 }, { "epoch": 0.770941438102298, "grad_norm": 1.2913664910641955, "learning_rate": 1.4456522953012278e-05, "loss": 0.38774728775024414, "step": 3120 }, { "epoch": 0.7711885347170744, "grad_norm": 1.3000591405884627, "learning_rate": 1.445286492339605e-05, "loss": 0.37644341588020325, "step": 3121 }, { "epoch": 0.7714356313318508, "grad_norm": 1.1915994750295391, "learning_rate": 1.4449206150419218e-05, "loss": 0.3484254479408264, "step": 3122 }, { "epoch": 0.7716827279466272, "grad_norm": 1.1051447186977283, "learning_rate": 1.4445546634692582e-05, "loss": 0.3504984378814697, "step": 3123 }, { "epoch": 0.7719298245614035, "grad_norm": 1.3219757436040689, "learning_rate": 1.4441886376827062e-05, "loss": 0.3977007269859314, "step": 3124 }, { "epoch": 0.7721769211761799, "grad_norm": 1.1911793886251494, "learning_rate": 1.4438225377433697e-05, "loss": 0.3378638029098511, "step": 3125 }, { "epoch": 0.7724240177909563, "grad_norm": 1.1981139468041586, "learning_rate": 1.4434563637123656e-05, "loss": 0.33427345752716064, "step": 3126 }, { "epoch": 0.7726711144057327, "grad_norm": 1.117340911532168, "learning_rate": 1.4430901156508228e-05, "loss": 0.34988242387771606, "step": 3127 }, { "epoch": 0.7729182110205091, "grad_norm": 1.0957275742393877, "learning_rate": 1.4427237936198828e-05, "loss": 0.31143659353256226, "step": 3128 }, { "epoch": 0.7731653076352853, "grad_norm": 1.2700441269923548, "learning_rate": 1.4423573976806995e-05, "loss": 0.34576451778411865, "step": 3129 }, { "epoch": 0.7734124042500617, "grad_norm": 1.192726455623568, "learning_rate": 1.4419909278944386e-05, "loss": 0.35769835114479065, "step": 3130 }, { "epoch": 0.7736595008648381, "grad_norm": 1.286469928795132, "learning_rate": 1.441624384322279e-05, "loss": 0.35770052671432495, "step": 3131 }, { "epoch": 0.7739065974796145, "grad_norm": 1.132497922407292, "learning_rate": 1.441257767025411e-05, "loss": 0.30375996232032776, "step": 3132 }, { "epoch": 0.7741536940943909, "grad_norm": 1.0709662558493225, "learning_rate": 1.4408910760650378e-05, "loss": 0.29565924406051636, "step": 3133 }, { "epoch": 0.7744007907091672, "grad_norm": 1.3553993375316635, "learning_rate": 1.4405243115023749e-05, "loss": 0.41733601689338684, "step": 3134 }, { "epoch": 0.7746478873239436, "grad_norm": 1.1570396346585143, "learning_rate": 1.4401574733986497e-05, "loss": 0.28815948963165283, "step": 3135 }, { "epoch": 0.77489498393872, "grad_norm": 1.268015799901413, "learning_rate": 1.4397905618151018e-05, "loss": 0.3949039578437805, "step": 3136 }, { "epoch": 0.7751420805534964, "grad_norm": 1.2294400544776138, "learning_rate": 1.439423576812984e-05, "loss": 0.3555927574634552, "step": 3137 }, { "epoch": 0.7753891771682728, "grad_norm": 1.20625438464114, "learning_rate": 1.4390565184535606e-05, "loss": 0.3189433813095093, "step": 3138 }, { "epoch": 0.7756362737830492, "grad_norm": 1.3104182418275796, "learning_rate": 1.438689386798108e-05, "loss": 0.3248079717159271, "step": 3139 }, { "epoch": 0.7758833703978255, "grad_norm": 1.3290148599119944, "learning_rate": 1.438322181907915e-05, "loss": 0.3355429768562317, "step": 3140 }, { "epoch": 0.7761304670126019, "grad_norm": 1.1888754384373283, "learning_rate": 1.437954903844283e-05, "loss": 0.3413013517856598, "step": 3141 }, { "epoch": 0.7763775636273783, "grad_norm": 1.1782810852848753, "learning_rate": 1.4375875526685255e-05, "loss": 0.3058377504348755, "step": 3142 }, { "epoch": 0.7766246602421547, "grad_norm": 1.2481804301022654, "learning_rate": 1.4372201284419676e-05, "loss": 0.3792092800140381, "step": 3143 }, { "epoch": 0.7768717568569311, "grad_norm": 1.228151682847367, "learning_rate": 1.4368526312259474e-05, "loss": 0.4049273133277893, "step": 3144 }, { "epoch": 0.7771188534717074, "grad_norm": 1.4618481230371219, "learning_rate": 1.4364850610818147e-05, "loss": 0.3817770481109619, "step": 3145 }, { "epoch": 0.7773659500864838, "grad_norm": 1.1496462200802056, "learning_rate": 1.4361174180709317e-05, "loss": 0.3512932062149048, "step": 3146 }, { "epoch": 0.7776130467012602, "grad_norm": 1.325376083533565, "learning_rate": 1.4357497022546722e-05, "loss": 0.37997952103614807, "step": 3147 }, { "epoch": 0.7778601433160366, "grad_norm": 1.1039767090962826, "learning_rate": 1.4353819136944233e-05, "loss": 0.3352244794368744, "step": 3148 }, { "epoch": 0.778107239930813, "grad_norm": 1.1420427268181295, "learning_rate": 1.4350140524515835e-05, "loss": 0.3423023521900177, "step": 3149 }, { "epoch": 0.7783543365455893, "grad_norm": 1.08559084364155, "learning_rate": 1.4346461185875628e-05, "loss": 0.3398503065109253, "step": 3150 }, { "epoch": 0.7786014331603657, "grad_norm": 1.1189617153730547, "learning_rate": 1.4342781121637847e-05, "loss": 0.27491098642349243, "step": 3151 }, { "epoch": 0.7788485297751421, "grad_norm": 1.1715761972336753, "learning_rate": 1.433910033241684e-05, "loss": 0.3563489317893982, "step": 3152 }, { "epoch": 0.7790956263899185, "grad_norm": 1.0881609113202706, "learning_rate": 1.4335418818827076e-05, "loss": 0.33830633759498596, "step": 3153 }, { "epoch": 0.7793427230046949, "grad_norm": 1.1627680716975042, "learning_rate": 1.4331736581483147e-05, "loss": 0.3807460069656372, "step": 3154 }, { "epoch": 0.7795898196194712, "grad_norm": 1.2526791961222998, "learning_rate": 1.4328053620999765e-05, "loss": 0.2934725880622864, "step": 3155 }, { "epoch": 0.7798369162342476, "grad_norm": 1.2300403576441568, "learning_rate": 1.4324369937991765e-05, "loss": 0.33869677782058716, "step": 3156 }, { "epoch": 0.780084012849024, "grad_norm": 1.2445762201873114, "learning_rate": 1.4320685533074094e-05, "loss": 0.3577091693878174, "step": 3157 }, { "epoch": 0.7803311094638004, "grad_norm": 1.1118064322977064, "learning_rate": 1.4317000406861834e-05, "loss": 0.35584115982055664, "step": 3158 }, { "epoch": 0.7805782060785768, "grad_norm": 1.1023076754609205, "learning_rate": 1.4313314559970177e-05, "loss": 0.3344600796699524, "step": 3159 }, { "epoch": 0.7808253026933532, "grad_norm": 1.2064309818035337, "learning_rate": 1.4309627993014434e-05, "loss": 0.3470122814178467, "step": 3160 }, { "epoch": 0.7810723993081294, "grad_norm": 1.1749812929620025, "learning_rate": 1.4305940706610042e-05, "loss": 0.3442881107330322, "step": 3161 }, { "epoch": 0.7813194959229058, "grad_norm": 1.0849818084037526, "learning_rate": 1.430225270137256e-05, "loss": 0.3276846408843994, "step": 3162 }, { "epoch": 0.7815665925376822, "grad_norm": 1.1939532093273284, "learning_rate": 1.4298563977917658e-05, "loss": 0.3555203974246979, "step": 3163 }, { "epoch": 0.7818136891524586, "grad_norm": 1.1557724378813332, "learning_rate": 1.4294874536861132e-05, "loss": 0.37312597036361694, "step": 3164 }, { "epoch": 0.782060785767235, "grad_norm": 1.0929054025641773, "learning_rate": 1.4291184378818898e-05, "loss": 0.3356674611568451, "step": 3165 }, { "epoch": 0.7823078823820113, "grad_norm": 1.1456384708535368, "learning_rate": 1.4287493504406991e-05, "loss": 0.36598896980285645, "step": 3166 }, { "epoch": 0.7825549789967877, "grad_norm": 1.3645919685962427, "learning_rate": 1.4283801914241562e-05, "loss": 0.3835144639015198, "step": 3167 }, { "epoch": 0.7828020756115641, "grad_norm": 1.2385318266041192, "learning_rate": 1.4280109608938886e-05, "loss": 0.3800857663154602, "step": 3168 }, { "epoch": 0.7830491722263405, "grad_norm": 1.3945667154051902, "learning_rate": 1.4276416589115358e-05, "loss": 0.38003379106521606, "step": 3169 }, { "epoch": 0.7832962688411169, "grad_norm": 1.1973970766573774, "learning_rate": 1.4272722855387486e-05, "loss": 0.3169389069080353, "step": 3170 }, { "epoch": 0.7835433654558932, "grad_norm": 1.2050779071614894, "learning_rate": 1.4269028408371907e-05, "loss": 0.3776249587535858, "step": 3171 }, { "epoch": 0.7837904620706696, "grad_norm": 1.3962058632505998, "learning_rate": 1.4265333248685364e-05, "loss": 0.30552372336387634, "step": 3172 }, { "epoch": 0.784037558685446, "grad_norm": 1.1042371265845834, "learning_rate": 1.4261637376944734e-05, "loss": 0.3148173391819, "step": 3173 }, { "epoch": 0.7842846553002224, "grad_norm": 1.1663715036087972, "learning_rate": 1.4257940793767001e-05, "loss": 0.3210235834121704, "step": 3174 }, { "epoch": 0.7845317519149988, "grad_norm": 1.3856691161603532, "learning_rate": 1.4254243499769272e-05, "loss": 0.40083205699920654, "step": 3175 }, { "epoch": 0.7847788485297752, "grad_norm": 1.1075016242011522, "learning_rate": 1.4250545495568777e-05, "loss": 0.3220617175102234, "step": 3176 }, { "epoch": 0.7850259451445515, "grad_norm": 1.4660318136295813, "learning_rate": 1.4246846781782853e-05, "loss": 0.29668498039245605, "step": 3177 }, { "epoch": 0.7852730417593279, "grad_norm": 1.266033648022012, "learning_rate": 1.4243147359028968e-05, "loss": 0.3884502053260803, "step": 3178 }, { "epoch": 0.7855201383741043, "grad_norm": 1.2156142506903138, "learning_rate": 1.42394472279247e-05, "loss": 0.3221966028213501, "step": 3179 }, { "epoch": 0.7857672349888807, "grad_norm": 1.079527199508195, "learning_rate": 1.423574638908775e-05, "loss": 0.2842956781387329, "step": 3180 }, { "epoch": 0.7860143316036571, "grad_norm": 1.1391563947976, "learning_rate": 1.4232044843135937e-05, "loss": 0.3404695391654968, "step": 3181 }, { "epoch": 0.7862614282184334, "grad_norm": 1.1107340004237445, "learning_rate": 1.4228342590687195e-05, "loss": 0.3518146872520447, "step": 3182 }, { "epoch": 0.7865085248332098, "grad_norm": 1.1809378874329242, "learning_rate": 1.4224639632359578e-05, "loss": 0.3774825930595398, "step": 3183 }, { "epoch": 0.7867556214479862, "grad_norm": 1.173625473793059, "learning_rate": 1.4220935968771254e-05, "loss": 0.33792728185653687, "step": 3184 }, { "epoch": 0.7870027180627626, "grad_norm": 1.2506741687589293, "learning_rate": 1.4217231600540518e-05, "loss": 0.33839768171310425, "step": 3185 }, { "epoch": 0.787249814677539, "grad_norm": 1.2380201802311441, "learning_rate": 1.4213526528285772e-05, "loss": 0.4289829134941101, "step": 3186 }, { "epoch": 0.7874969112923152, "grad_norm": 1.2167612279001516, "learning_rate": 1.420982075262554e-05, "loss": 0.29876279830932617, "step": 3187 }, { "epoch": 0.7877440079070916, "grad_norm": 1.1150850719186718, "learning_rate": 1.4206114274178469e-05, "loss": 0.33120882511138916, "step": 3188 }, { "epoch": 0.787991104521868, "grad_norm": 1.3909878111650467, "learning_rate": 1.420240709356331e-05, "loss": 0.508197009563446, "step": 3189 }, { "epoch": 0.7882382011366444, "grad_norm": 1.1243859923031692, "learning_rate": 1.419869921139895e-05, "loss": 0.3328986167907715, "step": 3190 }, { "epoch": 0.7884852977514208, "grad_norm": 1.2100612998972073, "learning_rate": 1.4194990628304368e-05, "loss": 0.3546978235244751, "step": 3191 }, { "epoch": 0.7887323943661971, "grad_norm": 1.4200376293671049, "learning_rate": 1.4191281344898686e-05, "loss": 0.3604191243648529, "step": 3192 }, { "epoch": 0.7889794909809735, "grad_norm": 1.1628760503495887, "learning_rate": 1.418757136180113e-05, "loss": 0.350461483001709, "step": 3193 }, { "epoch": 0.7892265875957499, "grad_norm": 1.3856891237599482, "learning_rate": 1.4183860679631034e-05, "loss": 0.3805266320705414, "step": 3194 }, { "epoch": 0.7894736842105263, "grad_norm": 1.236651656301321, "learning_rate": 1.418014929900787e-05, "loss": 0.3769413232803345, "step": 3195 }, { "epoch": 0.7897207808253027, "grad_norm": 1.0167632594260982, "learning_rate": 1.4176437220551211e-05, "loss": 0.31618887186050415, "step": 3196 }, { "epoch": 0.7899678774400791, "grad_norm": 1.2010299988929787, "learning_rate": 1.4172724444880755e-05, "loss": 0.3236001133918762, "step": 3197 }, { "epoch": 0.7902149740548554, "grad_norm": 1.0559677617769914, "learning_rate": 1.4169010972616302e-05, "loss": 0.3410758376121521, "step": 3198 }, { "epoch": 0.7904620706696318, "grad_norm": 1.1077504580435868, "learning_rate": 1.4165296804377788e-05, "loss": 0.31573379039764404, "step": 3199 }, { "epoch": 0.7907091672844082, "grad_norm": 1.0900999757187526, "learning_rate": 1.4161581940785252e-05, "loss": 0.2906264662742615, "step": 3200 }, { "epoch": 0.7909562638991846, "grad_norm": 1.1001676283357094, "learning_rate": 1.4157866382458851e-05, "loss": 0.3111318349838257, "step": 3201 }, { "epoch": 0.791203360513961, "grad_norm": 1.080055005250102, "learning_rate": 1.4154150130018867e-05, "loss": 0.32743144035339355, "step": 3202 }, { "epoch": 0.7914504571287373, "grad_norm": 1.010585693843783, "learning_rate": 1.4150433184085678e-05, "loss": 0.27987971901893616, "step": 3203 }, { "epoch": 0.7916975537435137, "grad_norm": 1.2278545211865275, "learning_rate": 1.4146715545279808e-05, "loss": 0.38096874952316284, "step": 3204 }, { "epoch": 0.7919446503582901, "grad_norm": 1.2444722564203334, "learning_rate": 1.4142997214221863e-05, "loss": 0.38387730717658997, "step": 3205 }, { "epoch": 0.7921917469730665, "grad_norm": 1.1068403018677073, "learning_rate": 1.4139278191532588e-05, "loss": 0.31514331698417664, "step": 3206 }, { "epoch": 0.7924388435878429, "grad_norm": 1.1996406315278487, "learning_rate": 1.4135558477832833e-05, "loss": 0.37790417671203613, "step": 3207 }, { "epoch": 0.7926859402026192, "grad_norm": 1.1766501375282075, "learning_rate": 1.413183807374357e-05, "loss": 0.35165390372276306, "step": 3208 }, { "epoch": 0.7929330368173956, "grad_norm": 1.1629068764874, "learning_rate": 1.4128116979885882e-05, "loss": 0.3439006805419922, "step": 3209 }, { "epoch": 0.793180133432172, "grad_norm": 1.1924059897307089, "learning_rate": 1.4124395196880965e-05, "loss": 0.35688379406929016, "step": 3210 }, { "epoch": 0.7934272300469484, "grad_norm": 1.25675154259365, "learning_rate": 1.4120672725350137e-05, "loss": 0.3553430438041687, "step": 3211 }, { "epoch": 0.7936743266617248, "grad_norm": 1.362077340787289, "learning_rate": 1.4116949565914822e-05, "loss": 0.4146290421485901, "step": 3212 }, { "epoch": 0.7939214232765011, "grad_norm": 1.1874463569244027, "learning_rate": 1.4113225719196569e-05, "loss": 0.3338252902030945, "step": 3213 }, { "epoch": 0.7941685198912775, "grad_norm": 1.0538592260050879, "learning_rate": 1.4109501185817032e-05, "loss": 0.2925771176815033, "step": 3214 }, { "epoch": 0.7944156165060539, "grad_norm": 1.0798219664801088, "learning_rate": 1.4105775966397985e-05, "loss": 0.37022218108177185, "step": 3215 }, { "epoch": 0.7946627131208303, "grad_norm": 1.0927748171510827, "learning_rate": 1.4102050061561316e-05, "loss": 0.32423537969589233, "step": 3216 }, { "epoch": 0.7949098097356067, "grad_norm": 1.2024428207421332, "learning_rate": 1.4098323471929028e-05, "loss": 0.32879745960235596, "step": 3217 }, { "epoch": 0.795156906350383, "grad_norm": 1.0897516861292118, "learning_rate": 1.4094596198123239e-05, "loss": 0.3459974527359009, "step": 3218 }, { "epoch": 0.7954040029651593, "grad_norm": 1.0822462482003334, "learning_rate": 1.4090868240766171e-05, "loss": 0.3023231625556946, "step": 3219 }, { "epoch": 0.7956510995799357, "grad_norm": 1.1164621495199611, "learning_rate": 1.408713960048018e-05, "loss": 0.3368402123451233, "step": 3220 }, { "epoch": 0.7958981961947121, "grad_norm": 1.6045890439686035, "learning_rate": 1.4083410277887717e-05, "loss": 0.3264089822769165, "step": 3221 }, { "epoch": 0.7961452928094885, "grad_norm": 1.0926740677401412, "learning_rate": 1.4079680273611358e-05, "loss": 0.28467857837677, "step": 3222 }, { "epoch": 0.7963923894242649, "grad_norm": 1.0580157184850596, "learning_rate": 1.4075949588273787e-05, "loss": 0.28369712829589844, "step": 3223 }, { "epoch": 0.7966394860390412, "grad_norm": 1.1644780241459824, "learning_rate": 1.4072218222497806e-05, "loss": 0.38255971670150757, "step": 3224 }, { "epoch": 0.7968865826538176, "grad_norm": 1.0683547333080488, "learning_rate": 1.406848617690633e-05, "loss": 0.3275947868824005, "step": 3225 }, { "epoch": 0.797133679268594, "grad_norm": 1.1665724731843974, "learning_rate": 1.406475345212238e-05, "loss": 0.34732410311698914, "step": 3226 }, { "epoch": 0.7973807758833704, "grad_norm": 1.2126596446269018, "learning_rate": 1.4061020048769102e-05, "loss": 0.36260494589805603, "step": 3227 }, { "epoch": 0.7976278724981468, "grad_norm": 1.1978912452335355, "learning_rate": 1.4057285967469752e-05, "loss": 0.3210209012031555, "step": 3228 }, { "epoch": 0.7978749691129231, "grad_norm": 1.4261395101157648, "learning_rate": 1.405355120884769e-05, "loss": 0.3652443587779999, "step": 3229 }, { "epoch": 0.7981220657276995, "grad_norm": 1.3390297260865054, "learning_rate": 1.4049815773526401e-05, "loss": 0.37029266357421875, "step": 3230 }, { "epoch": 0.7983691623424759, "grad_norm": 1.1344645124887334, "learning_rate": 1.4046079662129475e-05, "loss": 0.36476144194602966, "step": 3231 }, { "epoch": 0.7986162589572523, "grad_norm": 1.1616868134729028, "learning_rate": 1.4042342875280621e-05, "loss": 0.31709468364715576, "step": 3232 }, { "epoch": 0.7988633555720287, "grad_norm": 1.9706136088149238, "learning_rate": 1.4038605413603652e-05, "loss": 0.32435518503189087, "step": 3233 }, { "epoch": 0.799110452186805, "grad_norm": 1.0949967964092229, "learning_rate": 1.4034867277722506e-05, "loss": 0.3325822353363037, "step": 3234 }, { "epoch": 0.7993575488015814, "grad_norm": 1.1486320310040818, "learning_rate": 1.4031128468261226e-05, "loss": 0.26599597930908203, "step": 3235 }, { "epoch": 0.7996046454163578, "grad_norm": 1.0664790874105485, "learning_rate": 1.4027388985843966e-05, "loss": 0.28445810079574585, "step": 3236 }, { "epoch": 0.7998517420311342, "grad_norm": 1.1529618435730633, "learning_rate": 1.4023648831094991e-05, "loss": 0.26147061586380005, "step": 3237 }, { "epoch": 0.8000988386459106, "grad_norm": 1.1694693764790682, "learning_rate": 1.4019908004638687e-05, "loss": 0.32841598987579346, "step": 3238 }, { "epoch": 0.800345935260687, "grad_norm": 1.3121688671576226, "learning_rate": 1.4016166507099549e-05, "loss": 0.3797103762626648, "step": 3239 }, { "epoch": 0.8005930318754633, "grad_norm": 1.1913820904717423, "learning_rate": 1.4012424339102175e-05, "loss": 0.2947796583175659, "step": 3240 }, { "epoch": 0.8008401284902397, "grad_norm": 1.2492126789650537, "learning_rate": 1.4008681501271288e-05, "loss": 0.3060375452041626, "step": 3241 }, { "epoch": 0.8010872251050161, "grad_norm": 1.2613169401735134, "learning_rate": 1.4004937994231715e-05, "loss": 0.31543660163879395, "step": 3242 }, { "epoch": 0.8013343217197925, "grad_norm": 1.3044160610366864, "learning_rate": 1.4001193818608397e-05, "loss": 0.34669405221939087, "step": 3243 }, { "epoch": 0.8015814183345689, "grad_norm": 1.2518684801981916, "learning_rate": 1.3997448975026382e-05, "loss": 0.350368857383728, "step": 3244 }, { "epoch": 0.8018285149493452, "grad_norm": 1.2864527682821523, "learning_rate": 1.3993703464110839e-05, "loss": 0.33794188499450684, "step": 3245 }, { "epoch": 0.8020756115641215, "grad_norm": 1.1728421748750069, "learning_rate": 1.3989957286487042e-05, "loss": 0.2660486102104187, "step": 3246 }, { "epoch": 0.802322708178898, "grad_norm": 1.43842585225429, "learning_rate": 1.3986210442780372e-05, "loss": 0.3708547353744507, "step": 3247 }, { "epoch": 0.8025698047936743, "grad_norm": 1.3360473237829111, "learning_rate": 1.3982462933616336e-05, "loss": 0.40523749589920044, "step": 3248 }, { "epoch": 0.8028169014084507, "grad_norm": 1.3200255682949393, "learning_rate": 1.3978714759620535e-05, "loss": 0.3849186301231384, "step": 3249 }, { "epoch": 0.803063998023227, "grad_norm": 1.3377814192650508, "learning_rate": 1.3974965921418693e-05, "loss": 0.3914942443370819, "step": 3250 }, { "epoch": 0.8033110946380034, "grad_norm": 1.169415195447727, "learning_rate": 1.3971216419636632e-05, "loss": 0.3114597499370575, "step": 3251 }, { "epoch": 0.8035581912527798, "grad_norm": 1.1445478070508148, "learning_rate": 1.3967466254900307e-05, "loss": 0.33770060539245605, "step": 3252 }, { "epoch": 0.8038052878675562, "grad_norm": 1.2241463793011598, "learning_rate": 1.396371542783576e-05, "loss": 0.3192178010940552, "step": 3253 }, { "epoch": 0.8040523844823326, "grad_norm": 1.2755838229960887, "learning_rate": 1.3959963939069154e-05, "loss": 0.43228596448898315, "step": 3254 }, { "epoch": 0.8042994810971089, "grad_norm": 1.3954273111090478, "learning_rate": 1.395621178922677e-05, "loss": 0.41870948672294617, "step": 3255 }, { "epoch": 0.8045465777118853, "grad_norm": 1.3499631019519212, "learning_rate": 1.395245897893498e-05, "loss": 0.337137907743454, "step": 3256 }, { "epoch": 0.8047936743266617, "grad_norm": 1.1893788595026702, "learning_rate": 1.3948705508820284e-05, "loss": 0.35449016094207764, "step": 3257 }, { "epoch": 0.8050407709414381, "grad_norm": 1.3010488386562629, "learning_rate": 1.3944951379509284e-05, "loss": 0.33836841583251953, "step": 3258 }, { "epoch": 0.8052878675562145, "grad_norm": 1.2476666632197457, "learning_rate": 1.3941196591628699e-05, "loss": 0.3853386640548706, "step": 3259 }, { "epoch": 0.8055349641709909, "grad_norm": 1.1156524820030957, "learning_rate": 1.3937441145805343e-05, "loss": 0.31394562125205994, "step": 3260 }, { "epoch": 0.8057820607857672, "grad_norm": 1.1197518756006268, "learning_rate": 1.3933685042666158e-05, "loss": 0.3625726103782654, "step": 3261 }, { "epoch": 0.8060291574005436, "grad_norm": 1.216327510450451, "learning_rate": 1.3929928282838186e-05, "loss": 0.33932843804359436, "step": 3262 }, { "epoch": 0.80627625401532, "grad_norm": 1.0939373090093603, "learning_rate": 1.3926170866948572e-05, "loss": 0.34609854221343994, "step": 3263 }, { "epoch": 0.8065233506300964, "grad_norm": 1.2171075035810943, "learning_rate": 1.3922412795624588e-05, "loss": 0.34346944093704224, "step": 3264 }, { "epoch": 0.8067704472448728, "grad_norm": 1.2247329258379247, "learning_rate": 1.3918654069493602e-05, "loss": 0.39765632152557373, "step": 3265 }, { "epoch": 0.8070175438596491, "grad_norm": 1.1519651889561635, "learning_rate": 1.3914894689183099e-05, "loss": 0.3478424549102783, "step": 3266 }, { "epoch": 0.8072646404744255, "grad_norm": 1.23308445076053, "learning_rate": 1.3911134655320661e-05, "loss": 0.3741133511066437, "step": 3267 }, { "epoch": 0.8075117370892019, "grad_norm": 1.2022917226377314, "learning_rate": 1.3907373968533995e-05, "loss": 0.3457132577896118, "step": 3268 }, { "epoch": 0.8077588337039783, "grad_norm": 1.3517356438529096, "learning_rate": 1.3903612629450906e-05, "loss": 0.36934876441955566, "step": 3269 }, { "epoch": 0.8080059303187547, "grad_norm": 1.260962595751049, "learning_rate": 1.389985063869931e-05, "loss": 0.39524298906326294, "step": 3270 }, { "epoch": 0.808253026933531, "grad_norm": 1.1945848576518303, "learning_rate": 1.389608799690724e-05, "loss": 0.34383928775787354, "step": 3271 }, { "epoch": 0.8085001235483074, "grad_norm": 1.0200943519804837, "learning_rate": 1.3892324704702823e-05, "loss": 0.26877570152282715, "step": 3272 }, { "epoch": 0.8087472201630838, "grad_norm": 1.1359350673418387, "learning_rate": 1.3888560762714309e-05, "loss": 0.30432334542274475, "step": 3273 }, { "epoch": 0.8089943167778602, "grad_norm": 1.149257908682948, "learning_rate": 1.3884796171570042e-05, "loss": 0.30137312412261963, "step": 3274 }, { "epoch": 0.8092414133926366, "grad_norm": 1.2986969981117036, "learning_rate": 1.3881030931898491e-05, "loss": 0.40676793456077576, "step": 3275 }, { "epoch": 0.809488510007413, "grad_norm": 1.0976728884243794, "learning_rate": 1.3877265044328218e-05, "loss": 0.2987005114555359, "step": 3276 }, { "epoch": 0.8097356066221892, "grad_norm": 1.144533841421807, "learning_rate": 1.3873498509487902e-05, "loss": 0.313514769077301, "step": 3277 }, { "epoch": 0.8099827032369656, "grad_norm": 1.1371037524876373, "learning_rate": 1.3869731328006331e-05, "loss": 0.3251796364784241, "step": 3278 }, { "epoch": 0.810229799851742, "grad_norm": 1.2481394269774857, "learning_rate": 1.3865963500512391e-05, "loss": 0.34241414070129395, "step": 3279 }, { "epoch": 0.8104768964665184, "grad_norm": 1.178453505431991, "learning_rate": 1.386219502763509e-05, "loss": 0.2975517213344574, "step": 3280 }, { "epoch": 0.8107239930812948, "grad_norm": 1.3546603335108707, "learning_rate": 1.385842591000353e-05, "loss": 0.39890533685684204, "step": 3281 }, { "epoch": 0.8109710896960711, "grad_norm": 1.2611337199248582, "learning_rate": 1.385465614824693e-05, "loss": 0.3471081852912903, "step": 3282 }, { "epoch": 0.8112181863108475, "grad_norm": 1.2615316510434351, "learning_rate": 1.3850885742994613e-05, "loss": 0.3794558048248291, "step": 3283 }, { "epoch": 0.8114652829256239, "grad_norm": 1.199167182168048, "learning_rate": 1.3847114694876008e-05, "loss": 0.32044774293899536, "step": 3284 }, { "epoch": 0.8117123795404003, "grad_norm": 1.3773787668417783, "learning_rate": 1.3843343004520657e-05, "loss": 0.37890201807022095, "step": 3285 }, { "epoch": 0.8119594761551767, "grad_norm": 1.188301720459594, "learning_rate": 1.3839570672558202e-05, "loss": 0.3696851134300232, "step": 3286 }, { "epoch": 0.812206572769953, "grad_norm": 1.3391146061036865, "learning_rate": 1.3835797699618398e-05, "loss": 0.29502981901168823, "step": 3287 }, { "epoch": 0.8124536693847294, "grad_norm": 1.234549814434533, "learning_rate": 1.3832024086331104e-05, "loss": 0.3564421832561493, "step": 3288 }, { "epoch": 0.8127007659995058, "grad_norm": 1.2311602829661898, "learning_rate": 1.3828249833326285e-05, "loss": 0.3254801034927368, "step": 3289 }, { "epoch": 0.8129478626142822, "grad_norm": 1.1755824216561641, "learning_rate": 1.3824474941234016e-05, "loss": 0.34147047996520996, "step": 3290 }, { "epoch": 0.8131949592290586, "grad_norm": 1.169269239629065, "learning_rate": 1.3820699410684472e-05, "loss": 0.2656553387641907, "step": 3291 }, { "epoch": 0.8134420558438349, "grad_norm": 1.1893088427661838, "learning_rate": 1.381692324230795e-05, "loss": 0.36659595370292664, "step": 3292 }, { "epoch": 0.8136891524586113, "grad_norm": 1.2405423396841337, "learning_rate": 1.3813146436734832e-05, "loss": 0.34119510650634766, "step": 3293 }, { "epoch": 0.8139362490733877, "grad_norm": 1.327994923463999, "learning_rate": 1.3809368994595624e-05, "loss": 0.3312588930130005, "step": 3294 }, { "epoch": 0.8141833456881641, "grad_norm": 1.1244779705415573, "learning_rate": 1.3805590916520928e-05, "loss": 0.3420473337173462, "step": 3295 }, { "epoch": 0.8144304423029405, "grad_norm": 1.105259918554501, "learning_rate": 1.3801812203141459e-05, "loss": 0.32385534048080444, "step": 3296 }, { "epoch": 0.8146775389177169, "grad_norm": 1.1473931939130504, "learning_rate": 1.3798032855088032e-05, "loss": 0.3556511402130127, "step": 3297 }, { "epoch": 0.8149246355324932, "grad_norm": 1.2398729525267185, "learning_rate": 1.3794252872991571e-05, "loss": 0.3441116213798523, "step": 3298 }, { "epoch": 0.8151717321472696, "grad_norm": 1.2054189039338867, "learning_rate": 1.3790472257483108e-05, "loss": 0.3391241431236267, "step": 3299 }, { "epoch": 0.815418828762046, "grad_norm": 1.1422893681645498, "learning_rate": 1.3786691009193777e-05, "loss": 0.31642135977745056, "step": 3300 }, { "epoch": 0.8156659253768224, "grad_norm": 1.3654243869728924, "learning_rate": 1.378290912875482e-05, "loss": 0.4234481155872345, "step": 3301 }, { "epoch": 0.8159130219915988, "grad_norm": 1.3291677380022608, "learning_rate": 1.377912661679758e-05, "loss": 0.351007878780365, "step": 3302 }, { "epoch": 0.816160118606375, "grad_norm": 1.146509762839147, "learning_rate": 1.3775343473953512e-05, "loss": 0.33489176630973816, "step": 3303 }, { "epoch": 0.8164072152211514, "grad_norm": 1.4809289368395702, "learning_rate": 1.3771559700854173e-05, "loss": 0.40146389603614807, "step": 3304 }, { "epoch": 0.8166543118359278, "grad_norm": 1.2458421866510327, "learning_rate": 1.3767775298131227e-05, "loss": 0.3390381336212158, "step": 3305 }, { "epoch": 0.8169014084507042, "grad_norm": 1.2015928658268435, "learning_rate": 1.3763990266416438e-05, "loss": 0.347028911113739, "step": 3306 }, { "epoch": 0.8171485050654806, "grad_norm": 1.2199160006249206, "learning_rate": 1.3760204606341683e-05, "loss": 0.3225880265235901, "step": 3307 }, { "epoch": 0.8173956016802569, "grad_norm": 1.1354355955058102, "learning_rate": 1.3756418318538934e-05, "loss": 0.31068873405456543, "step": 3308 }, { "epoch": 0.8176426982950333, "grad_norm": 1.4427805927380608, "learning_rate": 1.3752631403640276e-05, "loss": 0.4078211784362793, "step": 3309 }, { "epoch": 0.8178897949098097, "grad_norm": 1.1859676225509854, "learning_rate": 1.37488438622779e-05, "loss": 0.3344365954399109, "step": 3310 }, { "epoch": 0.8181368915245861, "grad_norm": 1.1784315535771859, "learning_rate": 1.3745055695084092e-05, "loss": 0.3738686442375183, "step": 3311 }, { "epoch": 0.8183839881393625, "grad_norm": 1.274423553228582, "learning_rate": 1.374126690269125e-05, "loss": 0.39451807737350464, "step": 3312 }, { "epoch": 0.8186310847541388, "grad_norm": 1.3712585135575643, "learning_rate": 1.3737477485731874e-05, "loss": 0.4055447578430176, "step": 3313 }, { "epoch": 0.8188781813689152, "grad_norm": 1.1349913189893674, "learning_rate": 1.3733687444838571e-05, "loss": 0.31298184394836426, "step": 3314 }, { "epoch": 0.8191252779836916, "grad_norm": 1.1354862257539342, "learning_rate": 1.3729896780644046e-05, "loss": 0.33283817768096924, "step": 3315 }, { "epoch": 0.819372374598468, "grad_norm": 1.2531760907425549, "learning_rate": 1.3726105493781112e-05, "loss": 0.36692386865615845, "step": 3316 }, { "epoch": 0.8196194712132444, "grad_norm": 1.1802457148467558, "learning_rate": 1.3722313584882694e-05, "loss": 0.30892282724380493, "step": 3317 }, { "epoch": 0.8198665678280208, "grad_norm": 1.2286164868787557, "learning_rate": 1.3718521054581802e-05, "loss": 0.3548890948295593, "step": 3318 }, { "epoch": 0.8201136644427971, "grad_norm": 1.0820959271342423, "learning_rate": 1.3714727903511564e-05, "loss": 0.30882933735847473, "step": 3319 }, { "epoch": 0.8203607610575735, "grad_norm": 1.8531884719452343, "learning_rate": 1.3710934132305212e-05, "loss": 0.33949658274650574, "step": 3320 }, { "epoch": 0.8206078576723499, "grad_norm": 1.1776648228481308, "learning_rate": 1.3707139741596071e-05, "loss": 0.3428702652454376, "step": 3321 }, { "epoch": 0.8208549542871263, "grad_norm": 1.1797302811873887, "learning_rate": 1.3703344732017584e-05, "loss": 0.36383411288261414, "step": 3322 }, { "epoch": 0.8211020509019027, "grad_norm": 1.0118279923380535, "learning_rate": 1.3699549104203283e-05, "loss": 0.2999538779258728, "step": 3323 }, { "epoch": 0.821349147516679, "grad_norm": 1.0803618002392121, "learning_rate": 1.3695752858786812e-05, "loss": 0.3149568438529968, "step": 3324 }, { "epoch": 0.8215962441314554, "grad_norm": 1.055693397493552, "learning_rate": 1.3691955996401914e-05, "loss": 0.3200797140598297, "step": 3325 }, { "epoch": 0.8218433407462318, "grad_norm": 1.1795283969362884, "learning_rate": 1.368815851768244e-05, "loss": 0.3246101438999176, "step": 3326 }, { "epoch": 0.8220904373610082, "grad_norm": 1.2405507603235228, "learning_rate": 1.3684360423262333e-05, "loss": 0.3766840696334839, "step": 3327 }, { "epoch": 0.8223375339757846, "grad_norm": 1.216707001594924, "learning_rate": 1.3680561713775658e-05, "loss": 0.33339744806289673, "step": 3328 }, { "epoch": 0.8225846305905609, "grad_norm": 1.3275506260751908, "learning_rate": 1.3676762389856562e-05, "loss": 0.34022679924964905, "step": 3329 }, { "epoch": 0.8228317272053373, "grad_norm": 1.1224276274611138, "learning_rate": 1.3672962452139303e-05, "loss": 0.35712724924087524, "step": 3330 }, { "epoch": 0.8230788238201137, "grad_norm": 1.2696432595998812, "learning_rate": 1.3669161901258253e-05, "loss": 0.34596002101898193, "step": 3331 }, { "epoch": 0.8233259204348901, "grad_norm": 1.3522175206157598, "learning_rate": 1.366536073784786e-05, "loss": 0.3531634211540222, "step": 3332 }, { "epoch": 0.8235730170496665, "grad_norm": 1.1362864897976286, "learning_rate": 1.36615589625427e-05, "loss": 0.32708221673965454, "step": 3333 }, { "epoch": 0.8238201136644427, "grad_norm": 1.1835699501754695, "learning_rate": 1.3657756575977437e-05, "loss": 0.30552220344543457, "step": 3334 }, { "epoch": 0.8240672102792191, "grad_norm": 1.1834059286870373, "learning_rate": 1.365395357878684e-05, "loss": 0.3218202590942383, "step": 3335 }, { "epoch": 0.8243143068939955, "grad_norm": 1.2281205550928802, "learning_rate": 1.3650149971605785e-05, "loss": 0.3711199164390564, "step": 3336 }, { "epoch": 0.8245614035087719, "grad_norm": 1.3671168112350378, "learning_rate": 1.3646345755069245e-05, "loss": 0.37297213077545166, "step": 3337 }, { "epoch": 0.8248085001235483, "grad_norm": 1.2639485055475157, "learning_rate": 1.3642540929812293e-05, "loss": 0.380947470664978, "step": 3338 }, { "epoch": 0.8250555967383247, "grad_norm": 1.2355741311008095, "learning_rate": 1.3638735496470106e-05, "loss": 0.31494414806365967, "step": 3339 }, { "epoch": 0.825302693353101, "grad_norm": 1.3153002131371947, "learning_rate": 1.363492945567796e-05, "loss": 0.35993772745132446, "step": 3340 }, { "epoch": 0.8255497899678774, "grad_norm": 1.157370583095398, "learning_rate": 1.363112280807124e-05, "loss": 0.3443572521209717, "step": 3341 }, { "epoch": 0.8257968865826538, "grad_norm": 1.2008070841976977, "learning_rate": 1.3627315554285427e-05, "loss": 0.29549434781074524, "step": 3342 }, { "epoch": 0.8260439831974302, "grad_norm": 1.1177809695800298, "learning_rate": 1.3623507694956101e-05, "loss": 0.30938005447387695, "step": 3343 }, { "epoch": 0.8262910798122066, "grad_norm": 1.276295382424882, "learning_rate": 1.3619699230718944e-05, "loss": 0.3994385302066803, "step": 3344 }, { "epoch": 0.8265381764269829, "grad_norm": 1.0953842302254473, "learning_rate": 1.3615890162209744e-05, "loss": 0.33732330799102783, "step": 3345 }, { "epoch": 0.8267852730417593, "grad_norm": 1.2221314718102283, "learning_rate": 1.3612080490064383e-05, "loss": 0.3503376841545105, "step": 3346 }, { "epoch": 0.8270323696565357, "grad_norm": 1.1722281970274393, "learning_rate": 1.3608270214918848e-05, "loss": 0.33414024114608765, "step": 3347 }, { "epoch": 0.8272794662713121, "grad_norm": 1.2633399044499127, "learning_rate": 1.3604459337409228e-05, "loss": 0.3781949281692505, "step": 3348 }, { "epoch": 0.8275265628860885, "grad_norm": 1.217890804641655, "learning_rate": 1.3600647858171712e-05, "loss": 0.36161649227142334, "step": 3349 }, { "epoch": 0.8277736595008648, "grad_norm": 1.2728746834409217, "learning_rate": 1.3596835777842579e-05, "loss": 0.4320034384727478, "step": 3350 }, { "epoch": 0.8280207561156412, "grad_norm": 1.1136736922067771, "learning_rate": 1.3593023097058227e-05, "loss": 0.318107545375824, "step": 3351 }, { "epoch": 0.8282678527304176, "grad_norm": 1.248501511811275, "learning_rate": 1.3589209816455137e-05, "loss": 0.33454567193984985, "step": 3352 }, { "epoch": 0.828514949345194, "grad_norm": 1.3252749152738459, "learning_rate": 1.35853959366699e-05, "loss": 0.40991145372390747, "step": 3353 }, { "epoch": 0.8287620459599704, "grad_norm": 1.2389948999828524, "learning_rate": 1.3581581458339208e-05, "loss": 0.3732573986053467, "step": 3354 }, { "epoch": 0.8290091425747467, "grad_norm": 1.1452059165342885, "learning_rate": 1.3577766382099847e-05, "loss": 0.34273484349250793, "step": 3355 }, { "epoch": 0.8292562391895231, "grad_norm": 1.0480478418948893, "learning_rate": 1.3573950708588703e-05, "loss": 0.29449865221977234, "step": 3356 }, { "epoch": 0.8295033358042995, "grad_norm": 1.2302158661416753, "learning_rate": 1.3570134438442763e-05, "loss": 0.3522438406944275, "step": 3357 }, { "epoch": 0.8297504324190759, "grad_norm": 1.1036828129227136, "learning_rate": 1.3566317572299121e-05, "loss": 0.30218952894210815, "step": 3358 }, { "epoch": 0.8299975290338523, "grad_norm": 1.3374589507579946, "learning_rate": 1.356250011079496e-05, "loss": 0.38182541728019714, "step": 3359 }, { "epoch": 0.8302446256486287, "grad_norm": 1.118741024357403, "learning_rate": 1.3558682054567565e-05, "loss": 0.3278653621673584, "step": 3360 }, { "epoch": 0.830491722263405, "grad_norm": 1.2819230111335025, "learning_rate": 1.3554863404254326e-05, "loss": 0.38894879817962646, "step": 3361 }, { "epoch": 0.8307388188781814, "grad_norm": 1.2047293740934457, "learning_rate": 1.3551044160492723e-05, "loss": 0.3584107458591461, "step": 3362 }, { "epoch": 0.8309859154929577, "grad_norm": 1.1993662204446134, "learning_rate": 1.3547224323920344e-05, "loss": 0.372204065322876, "step": 3363 }, { "epoch": 0.8312330121077341, "grad_norm": 1.1868603343217976, "learning_rate": 1.354340389517487e-05, "loss": 0.3738929331302643, "step": 3364 }, { "epoch": 0.8314801087225105, "grad_norm": 1.3166577092433598, "learning_rate": 1.3539582874894084e-05, "loss": 0.34596705436706543, "step": 3365 }, { "epoch": 0.8317272053372868, "grad_norm": 1.2247346835183615, "learning_rate": 1.3535761263715865e-05, "loss": 0.3762887418270111, "step": 3366 }, { "epoch": 0.8319743019520632, "grad_norm": 1.2370101182561752, "learning_rate": 1.3531939062278192e-05, "loss": 0.37280914187431335, "step": 3367 }, { "epoch": 0.8322213985668396, "grad_norm": 1.2494345563424183, "learning_rate": 1.3528116271219146e-05, "loss": 0.31195443868637085, "step": 3368 }, { "epoch": 0.832468495181616, "grad_norm": 1.2161888294047218, "learning_rate": 1.3524292891176898e-05, "loss": 0.3825802803039551, "step": 3369 }, { "epoch": 0.8327155917963924, "grad_norm": 1.290724351495923, "learning_rate": 1.3520468922789727e-05, "loss": 0.3413546085357666, "step": 3370 }, { "epoch": 0.8329626884111687, "grad_norm": 1.1731740695317878, "learning_rate": 1.3516644366696002e-05, "loss": 0.3390570878982544, "step": 3371 }, { "epoch": 0.8332097850259451, "grad_norm": 1.2842944743309965, "learning_rate": 1.3512819223534198e-05, "loss": 0.3508989214897156, "step": 3372 }, { "epoch": 0.8334568816407215, "grad_norm": 1.1813605972854138, "learning_rate": 1.3508993493942882e-05, "loss": 0.35083919763565063, "step": 3373 }, { "epoch": 0.8337039782554979, "grad_norm": 1.2703244113169796, "learning_rate": 1.3505167178560716e-05, "loss": 0.3700406551361084, "step": 3374 }, { "epoch": 0.8339510748702743, "grad_norm": 1.1879906203542923, "learning_rate": 1.3501340278026473e-05, "loss": 0.36115050315856934, "step": 3375 }, { "epoch": 0.8341981714850507, "grad_norm": 1.1957472454261173, "learning_rate": 1.3497512792979013e-05, "loss": 0.2780151069164276, "step": 3376 }, { "epoch": 0.834445268099827, "grad_norm": 1.1817000403250646, "learning_rate": 1.349368472405729e-05, "loss": 0.297199547290802, "step": 3377 }, { "epoch": 0.8346923647146034, "grad_norm": 1.1750029202756294, "learning_rate": 1.3489856071900365e-05, "loss": 0.32794445753097534, "step": 3378 }, { "epoch": 0.8349394613293798, "grad_norm": 1.521538623732885, "learning_rate": 1.3486026837147394e-05, "loss": 0.3298035264015198, "step": 3379 }, { "epoch": 0.8351865579441562, "grad_norm": 1.486197813747087, "learning_rate": 1.3482197020437629e-05, "loss": 0.29770511388778687, "step": 3380 }, { "epoch": 0.8354336545589326, "grad_norm": 1.3835889034945517, "learning_rate": 1.3478366622410414e-05, "loss": 0.41158032417297363, "step": 3381 }, { "epoch": 0.8356807511737089, "grad_norm": 1.2759940749972274, "learning_rate": 1.3474535643705199e-05, "loss": 0.3419932723045349, "step": 3382 }, { "epoch": 0.8359278477884853, "grad_norm": 1.3472963880521103, "learning_rate": 1.347070408496153e-05, "loss": 0.3454444110393524, "step": 3383 }, { "epoch": 0.8361749444032617, "grad_norm": 1.3554871785953824, "learning_rate": 1.3466871946819045e-05, "loss": 0.3527870178222656, "step": 3384 }, { "epoch": 0.8364220410180381, "grad_norm": 1.0725982571519732, "learning_rate": 1.3463039229917474e-05, "loss": 0.2967306077480316, "step": 3385 }, { "epoch": 0.8366691376328145, "grad_norm": 1.159348128746516, "learning_rate": 1.3459205934896658e-05, "loss": 0.2923041582107544, "step": 3386 }, { "epoch": 0.8369162342475908, "grad_norm": 1.2192739987200862, "learning_rate": 1.3455372062396523e-05, "loss": 0.36327192187309265, "step": 3387 }, { "epoch": 0.8371633308623672, "grad_norm": 1.240580761169996, "learning_rate": 1.3451537613057095e-05, "loss": 0.3349381685256958, "step": 3388 }, { "epoch": 0.8374104274771436, "grad_norm": 1.19123763921538, "learning_rate": 1.3447702587518499e-05, "loss": 0.3400348126888275, "step": 3389 }, { "epoch": 0.83765752409192, "grad_norm": 1.250170643999687, "learning_rate": 1.3443866986420952e-05, "loss": 0.3615231513977051, "step": 3390 }, { "epoch": 0.8379046207066964, "grad_norm": 1.1898390746035656, "learning_rate": 1.344003081040477e-05, "loss": 0.3170925974845886, "step": 3391 }, { "epoch": 0.8381517173214726, "grad_norm": 1.313917153489311, "learning_rate": 1.343619406011036e-05, "loss": 0.3982849717140198, "step": 3392 }, { "epoch": 0.838398813936249, "grad_norm": 1.3488603795093965, "learning_rate": 1.3432356736178234e-05, "loss": 0.3683122992515564, "step": 3393 }, { "epoch": 0.8386459105510254, "grad_norm": 1.4844670460987113, "learning_rate": 1.3428518839248987e-05, "loss": 0.4230365753173828, "step": 3394 }, { "epoch": 0.8388930071658018, "grad_norm": 1.3448506275325094, "learning_rate": 1.3424680369963326e-05, "loss": 0.3634493350982666, "step": 3395 }, { "epoch": 0.8391401037805782, "grad_norm": 1.037659516622907, "learning_rate": 1.3420841328962032e-05, "loss": 0.2827875018119812, "step": 3396 }, { "epoch": 0.8393872003953546, "grad_norm": 1.1613953765505203, "learning_rate": 1.341700171688601e-05, "loss": 0.3437548279762268, "step": 3397 }, { "epoch": 0.8396342970101309, "grad_norm": 1.3237134500204826, "learning_rate": 1.3413161534376231e-05, "loss": 0.3887184262275696, "step": 3398 }, { "epoch": 0.8398813936249073, "grad_norm": 1.1554656263312673, "learning_rate": 1.340932078207378e-05, "loss": 0.3376701772212982, "step": 3399 }, { "epoch": 0.8401284902396837, "grad_norm": 1.2732846876561952, "learning_rate": 1.3405479460619832e-05, "loss": 0.3556373119354248, "step": 3400 }, { "epoch": 0.8403755868544601, "grad_norm": 1.0608602576745463, "learning_rate": 1.3401637570655655e-05, "loss": 0.2776482105255127, "step": 3401 }, { "epoch": 0.8406226834692365, "grad_norm": 1.2324931094848792, "learning_rate": 1.3397795112822614e-05, "loss": 0.3427850008010864, "step": 3402 }, { "epoch": 0.8408697800840128, "grad_norm": 1.2252513016597577, "learning_rate": 1.3393952087762166e-05, "loss": 0.34862032532691956, "step": 3403 }, { "epoch": 0.8411168766987892, "grad_norm": 1.2015475477730908, "learning_rate": 1.339010849611587e-05, "loss": 0.3252263069152832, "step": 3404 }, { "epoch": 0.8413639733135656, "grad_norm": 1.1561855004616945, "learning_rate": 1.3386264338525371e-05, "loss": 0.3245554566383362, "step": 3405 }, { "epoch": 0.841611069928342, "grad_norm": 1.142906322334995, "learning_rate": 1.3382419615632414e-05, "loss": 0.35264623165130615, "step": 3406 }, { "epoch": 0.8418581665431184, "grad_norm": 1.2204967583029829, "learning_rate": 1.3378574328078837e-05, "loss": 0.4158707559108734, "step": 3407 }, { "epoch": 0.8421052631578947, "grad_norm": 1.0148612447240504, "learning_rate": 1.3374728476506566e-05, "loss": 0.328230082988739, "step": 3408 }, { "epoch": 0.8423523597726711, "grad_norm": 1.124652990780783, "learning_rate": 1.3370882061557635e-05, "loss": 0.2690085768699646, "step": 3409 }, { "epoch": 0.8425994563874475, "grad_norm": 1.0797002929418555, "learning_rate": 1.3367035083874158e-05, "loss": 0.30266398191452026, "step": 3410 }, { "epoch": 0.8428465530022239, "grad_norm": 1.168029374518004, "learning_rate": 1.336318754409835e-05, "loss": 0.37302184104919434, "step": 3411 }, { "epoch": 0.8430936496170003, "grad_norm": 1.2104565802160792, "learning_rate": 1.3359339442872523e-05, "loss": 0.32380062341690063, "step": 3412 }, { "epoch": 0.8433407462317766, "grad_norm": 1.3640099768764322, "learning_rate": 1.3355490780839073e-05, "loss": 0.3893391489982605, "step": 3413 }, { "epoch": 0.843587842846553, "grad_norm": 1.295457680741646, "learning_rate": 1.33516415586405e-05, "loss": 0.40815144777297974, "step": 3414 }, { "epoch": 0.8438349394613294, "grad_norm": 1.1129221062517074, "learning_rate": 1.3347791776919385e-05, "loss": 0.31374263763427734, "step": 3415 }, { "epoch": 0.8440820360761058, "grad_norm": 1.1670219280886467, "learning_rate": 1.3343941436318418e-05, "loss": 0.38097697496414185, "step": 3416 }, { "epoch": 0.8443291326908822, "grad_norm": 1.1897051621419474, "learning_rate": 1.3340090537480368e-05, "loss": 0.33216768503189087, "step": 3417 }, { "epoch": 0.8445762293056586, "grad_norm": 1.1725517573724566, "learning_rate": 1.333623908104811e-05, "loss": 0.3655902147293091, "step": 3418 }, { "epoch": 0.8448233259204349, "grad_norm": 1.2057786100640921, "learning_rate": 1.3332387067664601e-05, "loss": 0.3518047332763672, "step": 3419 }, { "epoch": 0.8450704225352113, "grad_norm": 1.2961361851205662, "learning_rate": 1.3328534497972893e-05, "loss": 0.38942283391952515, "step": 3420 }, { "epoch": 0.8453175191499877, "grad_norm": 1.2040696989527209, "learning_rate": 1.3324681372616144e-05, "loss": 0.3768773674964905, "step": 3421 }, { "epoch": 0.845564615764764, "grad_norm": 1.0718976456198572, "learning_rate": 1.3320827692237581e-05, "loss": 0.2627354562282562, "step": 3422 }, { "epoch": 0.8458117123795404, "grad_norm": 1.0765539647337747, "learning_rate": 1.3316973457480546e-05, "loss": 0.2948263883590698, "step": 3423 }, { "epoch": 0.8460588089943167, "grad_norm": 1.2063866194169102, "learning_rate": 1.331311866898846e-05, "loss": 0.3052886724472046, "step": 3424 }, { "epoch": 0.8463059056090931, "grad_norm": 1.162112184280599, "learning_rate": 1.3309263327404845e-05, "loss": 0.3385840654373169, "step": 3425 }, { "epoch": 0.8465530022238695, "grad_norm": 1.2371766686681773, "learning_rate": 1.3305407433373305e-05, "loss": 0.39505696296691895, "step": 3426 }, { "epoch": 0.8468000988386459, "grad_norm": 1.1294972775319656, "learning_rate": 1.3301550987537546e-05, "loss": 0.3192881941795349, "step": 3427 }, { "epoch": 0.8470471954534223, "grad_norm": 1.0961403190185253, "learning_rate": 1.3297693990541364e-05, "loss": 0.3065216839313507, "step": 3428 }, { "epoch": 0.8472942920681986, "grad_norm": 1.1233820477131242, "learning_rate": 1.329383644302864e-05, "loss": 0.35411128401756287, "step": 3429 }, { "epoch": 0.847541388682975, "grad_norm": 1.1752707137345955, "learning_rate": 1.3289978345643363e-05, "loss": 0.336148738861084, "step": 3430 }, { "epoch": 0.8477884852977514, "grad_norm": 1.1374665733184528, "learning_rate": 1.328611969902959e-05, "loss": 0.3326353430747986, "step": 3431 }, { "epoch": 0.8480355819125278, "grad_norm": 1.346139158404085, "learning_rate": 1.328226050383149e-05, "loss": 0.36043286323547363, "step": 3432 }, { "epoch": 0.8482826785273042, "grad_norm": 1.1951476771357505, "learning_rate": 1.3278400760693317e-05, "loss": 0.3340897858142853, "step": 3433 }, { "epoch": 0.8485297751420805, "grad_norm": 1.1974685200002366, "learning_rate": 1.3274540470259412e-05, "loss": 0.3757131099700928, "step": 3434 }, { "epoch": 0.8487768717568569, "grad_norm": 1.2780781115225555, "learning_rate": 1.3270679633174219e-05, "loss": 0.40231093764305115, "step": 3435 }, { "epoch": 0.8490239683716333, "grad_norm": 1.2086684989898953, "learning_rate": 1.3266818250082257e-05, "loss": 0.3656245470046997, "step": 3436 }, { "epoch": 0.8492710649864097, "grad_norm": 1.1432408809553527, "learning_rate": 1.3262956321628147e-05, "loss": 0.2895788550376892, "step": 3437 }, { "epoch": 0.8495181616011861, "grad_norm": 1.2486416321330303, "learning_rate": 1.3259093848456604e-05, "loss": 0.3662123680114746, "step": 3438 }, { "epoch": 0.8497652582159625, "grad_norm": 1.1407674352005428, "learning_rate": 1.325523083121242e-05, "loss": 0.29628777503967285, "step": 3439 }, { "epoch": 0.8500123548307388, "grad_norm": 1.325568007355236, "learning_rate": 1.3251367270540492e-05, "loss": 0.4065803289413452, "step": 3440 }, { "epoch": 0.8502594514455152, "grad_norm": 1.2762035226761437, "learning_rate": 1.3247503167085802e-05, "loss": 0.3838003873825073, "step": 3441 }, { "epoch": 0.8505065480602916, "grad_norm": 1.1789638206661572, "learning_rate": 1.3243638521493425e-05, "loss": 0.2998434901237488, "step": 3442 }, { "epoch": 0.850753644675068, "grad_norm": 1.2113391025234301, "learning_rate": 1.3239773334408517e-05, "loss": 0.3537833094596863, "step": 3443 }, { "epoch": 0.8510007412898444, "grad_norm": 1.179771360725232, "learning_rate": 1.323590760647634e-05, "loss": 0.3378438949584961, "step": 3444 }, { "epoch": 0.8512478379046207, "grad_norm": 1.2799703301599217, "learning_rate": 1.3232041338342232e-05, "loss": 0.344633013010025, "step": 3445 }, { "epoch": 0.8514949345193971, "grad_norm": 1.0702452282216197, "learning_rate": 1.3228174530651629e-05, "loss": 0.2897040843963623, "step": 3446 }, { "epoch": 0.8517420311341735, "grad_norm": 1.0709965408090114, "learning_rate": 1.3224307184050057e-05, "loss": 0.28739747405052185, "step": 3447 }, { "epoch": 0.8519891277489499, "grad_norm": 1.129139368483565, "learning_rate": 1.322043929918313e-05, "loss": 0.3039382994174957, "step": 3448 }, { "epoch": 0.8522362243637263, "grad_norm": 1.1328553543126536, "learning_rate": 1.321657087669655e-05, "loss": 0.30427122116088867, "step": 3449 }, { "epoch": 0.8524833209785025, "grad_norm": 1.2453176406244981, "learning_rate": 1.3212701917236111e-05, "loss": 0.35306888818740845, "step": 3450 }, { "epoch": 0.8527304175932789, "grad_norm": 1.1877715836879315, "learning_rate": 1.32088324214477e-05, "loss": 0.328693151473999, "step": 3451 }, { "epoch": 0.8529775142080553, "grad_norm": 1.2205750692214279, "learning_rate": 1.3204962389977285e-05, "loss": 0.3264922797679901, "step": 3452 }, { "epoch": 0.8532246108228317, "grad_norm": 1.1756825301386058, "learning_rate": 1.3201091823470935e-05, "loss": 0.30600792169570923, "step": 3453 }, { "epoch": 0.8534717074376081, "grad_norm": 1.2320639231109325, "learning_rate": 1.3197220722574792e-05, "loss": 0.33257168531417847, "step": 3454 }, { "epoch": 0.8537188040523844, "grad_norm": 1.3701713557027506, "learning_rate": 1.3193349087935109e-05, "loss": 0.41226133704185486, "step": 3455 }, { "epoch": 0.8539659006671608, "grad_norm": 1.1458082783043302, "learning_rate": 1.3189476920198207e-05, "loss": 0.30328577756881714, "step": 3456 }, { "epoch": 0.8542129972819372, "grad_norm": 1.338503026247504, "learning_rate": 1.3185604220010507e-05, "loss": 0.43094444274902344, "step": 3457 }, { "epoch": 0.8544600938967136, "grad_norm": 1.2296394443569778, "learning_rate": 1.318173098801852e-05, "loss": 0.3461928963661194, "step": 3458 }, { "epoch": 0.85470719051149, "grad_norm": 1.1786596785847725, "learning_rate": 1.317785722486884e-05, "loss": 0.378301739692688, "step": 3459 }, { "epoch": 0.8549542871262664, "grad_norm": 1.243318322260699, "learning_rate": 1.3173982931208153e-05, "loss": 0.36773067712783813, "step": 3460 }, { "epoch": 0.8552013837410427, "grad_norm": 1.2085429373129528, "learning_rate": 1.3170108107683232e-05, "loss": 0.36792370676994324, "step": 3461 }, { "epoch": 0.8554484803558191, "grad_norm": 1.3929556788461477, "learning_rate": 1.3166232754940944e-05, "loss": 0.4642177224159241, "step": 3462 }, { "epoch": 0.8556955769705955, "grad_norm": 1.1319314023582812, "learning_rate": 1.3162356873628234e-05, "loss": 0.2861367464065552, "step": 3463 }, { "epoch": 0.8559426735853719, "grad_norm": 1.025355403331264, "learning_rate": 1.3158480464392146e-05, "loss": 0.2898791432380676, "step": 3464 }, { "epoch": 0.8561897702001483, "grad_norm": 1.1495675555137048, "learning_rate": 1.31546035278798e-05, "loss": 0.3028770089149475, "step": 3465 }, { "epoch": 0.8564368668149246, "grad_norm": 1.218411457197498, "learning_rate": 1.3150726064738422e-05, "loss": 0.3664201498031616, "step": 3466 }, { "epoch": 0.856683963429701, "grad_norm": 1.2026744883422924, "learning_rate": 1.3146848075615308e-05, "loss": 0.3151472210884094, "step": 3467 }, { "epoch": 0.8569310600444774, "grad_norm": 1.17289601151156, "learning_rate": 1.3142969561157845e-05, "loss": 0.3285972774028778, "step": 3468 }, { "epoch": 0.8571781566592538, "grad_norm": 1.3074091392514675, "learning_rate": 1.313909052201352e-05, "loss": 0.3823508024215698, "step": 3469 }, { "epoch": 0.8574252532740302, "grad_norm": 1.2426997111431162, "learning_rate": 1.3135210958829899e-05, "loss": 0.3656393885612488, "step": 3470 }, { "epoch": 0.8576723498888065, "grad_norm": 1.1785238331569865, "learning_rate": 1.3131330872254627e-05, "loss": 0.34823697805404663, "step": 3471 }, { "epoch": 0.8579194465035829, "grad_norm": 1.1237788365471901, "learning_rate": 1.3127450262935453e-05, "loss": 0.30740976333618164, "step": 3472 }, { "epoch": 0.8581665431183593, "grad_norm": 1.1441136155823646, "learning_rate": 1.3123569131520201e-05, "loss": 0.3158029317855835, "step": 3473 }, { "epoch": 0.8584136397331357, "grad_norm": 1.1843375377644674, "learning_rate": 1.3119687478656792e-05, "loss": 0.2703063189983368, "step": 3474 }, { "epoch": 0.8586607363479121, "grad_norm": 1.161388035915393, "learning_rate": 1.3115805304993221e-05, "loss": 0.3493794798851013, "step": 3475 }, { "epoch": 0.8589078329626884, "grad_norm": 1.6324619258205153, "learning_rate": 1.3111922611177586e-05, "loss": 0.32634803652763367, "step": 3476 }, { "epoch": 0.8591549295774648, "grad_norm": 1.1950012502276166, "learning_rate": 1.3108039397858057e-05, "loss": 0.3336178660392761, "step": 3477 }, { "epoch": 0.8594020261922412, "grad_norm": 1.2664877861455646, "learning_rate": 1.3104155665682896e-05, "loss": 0.31726503372192383, "step": 3478 }, { "epoch": 0.8596491228070176, "grad_norm": 1.3018688984406241, "learning_rate": 1.3100271415300459e-05, "loss": 0.38620102405548096, "step": 3479 }, { "epoch": 0.859896219421794, "grad_norm": 1.2753189912472513, "learning_rate": 1.3096386647359176e-05, "loss": 0.37462079524993896, "step": 3480 }, { "epoch": 0.8601433160365703, "grad_norm": 1.116362238482088, "learning_rate": 1.3092501362507575e-05, "loss": 0.32553631067276, "step": 3481 }, { "epoch": 0.8603904126513466, "grad_norm": 1.1296223915540802, "learning_rate": 1.3088615561394259e-05, "loss": 0.2957601249217987, "step": 3482 }, { "epoch": 0.860637509266123, "grad_norm": 1.1900907798194418, "learning_rate": 1.3084729244667928e-05, "loss": 0.33414191007614136, "step": 3483 }, { "epoch": 0.8608846058808994, "grad_norm": 1.1786829333754478, "learning_rate": 1.3080842412977361e-05, "loss": 0.29204797744750977, "step": 3484 }, { "epoch": 0.8611317024956758, "grad_norm": 1.1951520793627204, "learning_rate": 1.3076955066971425e-05, "loss": 0.33675557374954224, "step": 3485 }, { "epoch": 0.8613787991104522, "grad_norm": 1.095957523659315, "learning_rate": 1.3073067207299072e-05, "loss": 0.3044857978820801, "step": 3486 }, { "epoch": 0.8616258957252285, "grad_norm": 1.3353044347400553, "learning_rate": 1.3069178834609344e-05, "loss": 0.31871533393859863, "step": 3487 }, { "epoch": 0.8618729923400049, "grad_norm": 1.302429990469893, "learning_rate": 1.3065289949551362e-05, "loss": 0.35563668608665466, "step": 3488 }, { "epoch": 0.8621200889547813, "grad_norm": 1.074140589178515, "learning_rate": 1.306140055277434e-05, "loss": 0.29035258293151855, "step": 3489 }, { "epoch": 0.8623671855695577, "grad_norm": 1.288873802055245, "learning_rate": 1.305751064492757e-05, "loss": 0.34214258193969727, "step": 3490 }, { "epoch": 0.8626142821843341, "grad_norm": 1.1849114128834948, "learning_rate": 1.3053620226660431e-05, "loss": 0.33715975284576416, "step": 3491 }, { "epoch": 0.8628613787991104, "grad_norm": 1.3022180221928683, "learning_rate": 1.3049729298622391e-05, "loss": 0.36550116539001465, "step": 3492 }, { "epoch": 0.8631084754138868, "grad_norm": 1.2829460997112772, "learning_rate": 1.3045837861463005e-05, "loss": 0.35234329104423523, "step": 3493 }, { "epoch": 0.8633555720286632, "grad_norm": 1.369536318192634, "learning_rate": 1.30419459158319e-05, "loss": 0.3510131239891052, "step": 3494 }, { "epoch": 0.8636026686434396, "grad_norm": 1.3629498036864696, "learning_rate": 1.3038053462378804e-05, "loss": 0.4098070561885834, "step": 3495 }, { "epoch": 0.863849765258216, "grad_norm": 1.161512923512945, "learning_rate": 1.3034160501753519e-05, "loss": 0.31593701243400574, "step": 3496 }, { "epoch": 0.8640968618729924, "grad_norm": 1.1823757609571486, "learning_rate": 1.3030267034605939e-05, "loss": 0.3265953063964844, "step": 3497 }, { "epoch": 0.8643439584877687, "grad_norm": 1.2353321891496074, "learning_rate": 1.3026373061586033e-05, "loss": 0.33649399876594543, "step": 3498 }, { "epoch": 0.8645910551025451, "grad_norm": 1.4440143035950763, "learning_rate": 1.3022478583343867e-05, "loss": 0.3421892821788788, "step": 3499 }, { "epoch": 0.8648381517173215, "grad_norm": 1.3654315387150564, "learning_rate": 1.301858360052958e-05, "loss": 0.37005165219306946, "step": 3500 }, { "epoch": 0.8650852483320979, "grad_norm": 1.3913742075952011, "learning_rate": 1.3014688113793401e-05, "loss": 0.36589139699935913, "step": 3501 }, { "epoch": 0.8653323449468743, "grad_norm": 1.2185419343706547, "learning_rate": 1.3010792123785643e-05, "loss": 0.3631768822669983, "step": 3502 }, { "epoch": 0.8655794415616506, "grad_norm": 1.1485775909506386, "learning_rate": 1.3006895631156698e-05, "loss": 0.33111006021499634, "step": 3503 }, { "epoch": 0.865826538176427, "grad_norm": 1.2851688197765594, "learning_rate": 1.3002998636557054e-05, "loss": 0.35503077507019043, "step": 3504 }, { "epoch": 0.8660736347912034, "grad_norm": 1.16248376088054, "learning_rate": 1.2999101140637264e-05, "loss": 0.317313015460968, "step": 3505 }, { "epoch": 0.8663207314059798, "grad_norm": 1.1964137724924186, "learning_rate": 1.2995203144047985e-05, "loss": 0.361757755279541, "step": 3506 }, { "epoch": 0.8665678280207562, "grad_norm": 1.1996833560917974, "learning_rate": 1.2991304647439945e-05, "loss": 0.38865387439727783, "step": 3507 }, { "epoch": 0.8668149246355324, "grad_norm": 1.240992329070292, "learning_rate": 1.2987405651463954e-05, "loss": 0.42205435037612915, "step": 3508 }, { "epoch": 0.8670620212503088, "grad_norm": 1.1911472076571827, "learning_rate": 1.2983506156770915e-05, "loss": 0.2874034643173218, "step": 3509 }, { "epoch": 0.8673091178650852, "grad_norm": 1.224356195237412, "learning_rate": 1.2979606164011806e-05, "loss": 0.3139120638370514, "step": 3510 }, { "epoch": 0.8675562144798616, "grad_norm": 1.136149368245873, "learning_rate": 1.2975705673837697e-05, "loss": 0.290192186832428, "step": 3511 }, { "epoch": 0.867803311094638, "grad_norm": 1.1992736080951047, "learning_rate": 1.2971804686899725e-05, "loss": 0.36294928193092346, "step": 3512 }, { "epoch": 0.8680504077094143, "grad_norm": 1.0210962810114155, "learning_rate": 1.2967903203849131e-05, "loss": 0.27359139919281006, "step": 3513 }, { "epoch": 0.8682975043241907, "grad_norm": 1.3976437082480415, "learning_rate": 1.296400122533722e-05, "loss": 0.3691669702529907, "step": 3514 }, { "epoch": 0.8685446009389671, "grad_norm": 1.219602171646425, "learning_rate": 1.296009875201539e-05, "loss": 0.3335564136505127, "step": 3515 }, { "epoch": 0.8687916975537435, "grad_norm": 1.2620652600207487, "learning_rate": 1.2956195784535119e-05, "loss": 0.3637705445289612, "step": 3516 }, { "epoch": 0.8690387941685199, "grad_norm": 1.1901971626362813, "learning_rate": 1.2952292323547971e-05, "loss": 0.30707210302352905, "step": 3517 }, { "epoch": 0.8692858907832963, "grad_norm": 1.1910846721162254, "learning_rate": 1.2948388369705589e-05, "loss": 0.32290786504745483, "step": 3518 }, { "epoch": 0.8695329873980726, "grad_norm": 1.2243934380425223, "learning_rate": 1.2944483923659693e-05, "loss": 0.2912779450416565, "step": 3519 }, { "epoch": 0.869780084012849, "grad_norm": 1.1474581467584721, "learning_rate": 1.2940578986062095e-05, "loss": 0.34447264671325684, "step": 3520 }, { "epoch": 0.8700271806276254, "grad_norm": 1.1248895054897725, "learning_rate": 1.2936673557564685e-05, "loss": 0.2992897629737854, "step": 3521 }, { "epoch": 0.8702742772424018, "grad_norm": 1.2038276704010151, "learning_rate": 1.293276763881943e-05, "loss": 0.3057147264480591, "step": 3522 }, { "epoch": 0.8705213738571782, "grad_norm": 1.1539943402658588, "learning_rate": 1.2928861230478389e-05, "loss": 0.3305913507938385, "step": 3523 }, { "epoch": 0.8707684704719545, "grad_norm": 1.2528972077781244, "learning_rate": 1.2924954333193693e-05, "loss": 0.38025009632110596, "step": 3524 }, { "epoch": 0.8710155670867309, "grad_norm": 1.3103796537377874, "learning_rate": 1.2921046947617566e-05, "loss": 0.36866623163223267, "step": 3525 }, { "epoch": 0.8712626637015073, "grad_norm": 1.1891176807975017, "learning_rate": 1.2917139074402299e-05, "loss": 0.3462403416633606, "step": 3526 }, { "epoch": 0.8715097603162837, "grad_norm": 1.2754369929379743, "learning_rate": 1.291323071420027e-05, "loss": 0.34569448232650757, "step": 3527 }, { "epoch": 0.8717568569310601, "grad_norm": 1.1703877008880172, "learning_rate": 1.290932186766395e-05, "loss": 0.3591170608997345, "step": 3528 }, { "epoch": 0.8720039535458364, "grad_norm": 1.3278174739975888, "learning_rate": 1.2905412535445873e-05, "loss": 0.3823601007461548, "step": 3529 }, { "epoch": 0.8722510501606128, "grad_norm": 1.1302211620899691, "learning_rate": 1.2901502718198664e-05, "loss": 0.30075305700302124, "step": 3530 }, { "epoch": 0.8724981467753892, "grad_norm": 1.2802568739237272, "learning_rate": 1.2897592416575028e-05, "loss": 0.340226948261261, "step": 3531 }, { "epoch": 0.8727452433901656, "grad_norm": 1.3300789430811326, "learning_rate": 1.2893681631227753e-05, "loss": 0.37292999029159546, "step": 3532 }, { "epoch": 0.872992340004942, "grad_norm": 1.124450458011191, "learning_rate": 1.28897703628097e-05, "loss": 0.32987719774246216, "step": 3533 }, { "epoch": 0.8732394366197183, "grad_norm": 1.2523807261014626, "learning_rate": 1.2885858611973816e-05, "loss": 0.41332826018333435, "step": 3534 }, { "epoch": 0.8734865332344947, "grad_norm": 1.2715803515724058, "learning_rate": 1.2881946379373132e-05, "loss": 0.35703399777412415, "step": 3535 }, { "epoch": 0.873733629849271, "grad_norm": 1.2019400676202876, "learning_rate": 1.2878033665660753e-05, "loss": 0.38901400566101074, "step": 3536 }, { "epoch": 0.8739807264640475, "grad_norm": 1.2221489257484401, "learning_rate": 1.2874120471489866e-05, "loss": 0.38007766008377075, "step": 3537 }, { "epoch": 0.8742278230788239, "grad_norm": 1.2232731205708531, "learning_rate": 1.287020679751374e-05, "loss": 0.29751676321029663, "step": 3538 }, { "epoch": 0.8744749196936002, "grad_norm": 1.34891855640638, "learning_rate": 1.2866292644385724e-05, "loss": 0.383672833442688, "step": 3539 }, { "epoch": 0.8747220163083765, "grad_norm": 1.2152575472861515, "learning_rate": 1.2862378012759247e-05, "loss": 0.3170778155326843, "step": 3540 }, { "epoch": 0.8749691129231529, "grad_norm": 1.3389663960507865, "learning_rate": 1.2858462903287814e-05, "loss": 0.32633960247039795, "step": 3541 }, { "epoch": 0.8752162095379293, "grad_norm": 1.2258539192611306, "learning_rate": 1.2854547316625014e-05, "loss": 0.32256484031677246, "step": 3542 }, { "epoch": 0.8754633061527057, "grad_norm": 1.0476028474227659, "learning_rate": 1.2850631253424518e-05, "loss": 0.26508772373199463, "step": 3543 }, { "epoch": 0.8757104027674821, "grad_norm": 1.282525169210649, "learning_rate": 1.2846714714340063e-05, "loss": 0.35933375358581543, "step": 3544 }, { "epoch": 0.8759574993822584, "grad_norm": 1.154087734854075, "learning_rate": 1.2842797700025487e-05, "loss": 0.33146703243255615, "step": 3545 }, { "epoch": 0.8762045959970348, "grad_norm": 1.2059775854324173, "learning_rate": 1.283888021113469e-05, "loss": 0.39827466011047363, "step": 3546 }, { "epoch": 0.8764516926118112, "grad_norm": 1.1576388794018364, "learning_rate": 1.2834962248321657e-05, "loss": 0.2813495397567749, "step": 3547 }, { "epoch": 0.8766987892265876, "grad_norm": 1.1376107803541347, "learning_rate": 1.2831043812240453e-05, "loss": 0.3219439387321472, "step": 3548 }, { "epoch": 0.876945885841364, "grad_norm": 1.1708799834375585, "learning_rate": 1.2827124903545222e-05, "loss": 0.333126962184906, "step": 3549 }, { "epoch": 0.8771929824561403, "grad_norm": 1.1409256321735806, "learning_rate": 1.2823205522890186e-05, "loss": 0.31084516644477844, "step": 3550 }, { "epoch": 0.8774400790709167, "grad_norm": 1.122215357171003, "learning_rate": 1.2819285670929643e-05, "loss": 0.27925044298171997, "step": 3551 }, { "epoch": 0.8776871756856931, "grad_norm": 1.1726716008792835, "learning_rate": 1.2815365348317976e-05, "loss": 0.38101816177368164, "step": 3552 }, { "epoch": 0.8779342723004695, "grad_norm": 1.2595105293719138, "learning_rate": 1.281144455570964e-05, "loss": 0.31277966499328613, "step": 3553 }, { "epoch": 0.8781813689152459, "grad_norm": 1.1302915613798612, "learning_rate": 1.2807523293759175e-05, "loss": 0.31452327966690063, "step": 3554 }, { "epoch": 0.8784284655300222, "grad_norm": 1.175903213035885, "learning_rate": 1.2803601563121193e-05, "loss": 0.34581702947616577, "step": 3555 }, { "epoch": 0.8786755621447986, "grad_norm": 1.153422604746568, "learning_rate": 1.2799679364450387e-05, "loss": 0.29943397641181946, "step": 3556 }, { "epoch": 0.878922658759575, "grad_norm": 1.1660237831116895, "learning_rate": 1.2795756698401533e-05, "loss": 0.31146901845932007, "step": 3557 }, { "epoch": 0.8791697553743514, "grad_norm": 1.383887095895938, "learning_rate": 1.2791833565629473e-05, "loss": 0.3953278362751007, "step": 3558 }, { "epoch": 0.8794168519891278, "grad_norm": 1.1019493435690733, "learning_rate": 1.278790996678914e-05, "loss": 0.3090173602104187, "step": 3559 }, { "epoch": 0.8796639486039042, "grad_norm": 1.276971861346414, "learning_rate": 1.2783985902535536e-05, "loss": 0.3687390089035034, "step": 3560 }, { "epoch": 0.8799110452186805, "grad_norm": 1.2527076141375144, "learning_rate": 1.2780061373523742e-05, "loss": 0.311953067779541, "step": 3561 }, { "epoch": 0.8801581418334569, "grad_norm": 1.155007675715581, "learning_rate": 1.2776136380408924e-05, "loss": 0.3169524073600769, "step": 3562 }, { "epoch": 0.8804052384482333, "grad_norm": 1.3781685402748562, "learning_rate": 1.2772210923846318e-05, "loss": 0.3799681067466736, "step": 3563 }, { "epoch": 0.8806523350630097, "grad_norm": 1.3423852680604973, "learning_rate": 1.2768285004491235e-05, "loss": 0.3863564133644104, "step": 3564 }, { "epoch": 0.8808994316777861, "grad_norm": 1.160288088810567, "learning_rate": 1.276435862299907e-05, "loss": 0.2870144844055176, "step": 3565 }, { "epoch": 0.8811465282925623, "grad_norm": 1.2430829844496363, "learning_rate": 1.2760431780025293e-05, "loss": 0.27700430154800415, "step": 3566 }, { "epoch": 0.8813936249073387, "grad_norm": 1.3507274863729368, "learning_rate": 1.2756504476225452e-05, "loss": 0.44148150086402893, "step": 3567 }, { "epoch": 0.8816407215221151, "grad_norm": 1.1842884802413456, "learning_rate": 1.2752576712255165e-05, "loss": 0.3099422752857208, "step": 3568 }, { "epoch": 0.8818878181368915, "grad_norm": 1.186227614141087, "learning_rate": 1.2748648488770142e-05, "loss": 0.3376999497413635, "step": 3569 }, { "epoch": 0.8821349147516679, "grad_norm": 1.4422532229651817, "learning_rate": 1.2744719806426148e-05, "loss": 0.38421717286109924, "step": 3570 }, { "epoch": 0.8823820113664442, "grad_norm": 1.1932518082912666, "learning_rate": 1.2740790665879045e-05, "loss": 0.3252173066139221, "step": 3571 }, { "epoch": 0.8826291079812206, "grad_norm": 1.1721033289194522, "learning_rate": 1.2736861067784759e-05, "loss": 0.3451635539531708, "step": 3572 }, { "epoch": 0.882876204595997, "grad_norm": 1.1977148753935822, "learning_rate": 1.2732931012799303e-05, "loss": 0.3354026675224304, "step": 3573 }, { "epoch": 0.8831233012107734, "grad_norm": 1.228153539224069, "learning_rate": 1.272900050157875e-05, "loss": 0.37154728174209595, "step": 3574 }, { "epoch": 0.8833703978255498, "grad_norm": 1.2650194995851898, "learning_rate": 1.2725069534779269e-05, "loss": 0.3460458517074585, "step": 3575 }, { "epoch": 0.8836174944403261, "grad_norm": 1.5792963874884973, "learning_rate": 1.2721138113057086e-05, "loss": 0.3992457985877991, "step": 3576 }, { "epoch": 0.8838645910551025, "grad_norm": 1.033488532981272, "learning_rate": 1.2717206237068515e-05, "loss": 0.287028431892395, "step": 3577 }, { "epoch": 0.8841116876698789, "grad_norm": 1.055368935374127, "learning_rate": 1.2713273907469948e-05, "loss": 0.3296516537666321, "step": 3578 }, { "epoch": 0.8843587842846553, "grad_norm": 1.1912841060690027, "learning_rate": 1.270934112491784e-05, "loss": 0.38152655959129333, "step": 3579 }, { "epoch": 0.8846058808994317, "grad_norm": 0.9914394849399428, "learning_rate": 1.2705407890068738e-05, "loss": 0.24185124039649963, "step": 3580 }, { "epoch": 0.8848529775142081, "grad_norm": 1.1154145780915052, "learning_rate": 1.2701474203579243e-05, "loss": 0.3428599238395691, "step": 3581 }, { "epoch": 0.8851000741289844, "grad_norm": 1.3257317576740024, "learning_rate": 1.2697540066106056e-05, "loss": 0.4389877915382385, "step": 3582 }, { "epoch": 0.8853471707437608, "grad_norm": 1.1703231343114997, "learning_rate": 1.2693605478305934e-05, "loss": 0.3360682725906372, "step": 3583 }, { "epoch": 0.8855942673585372, "grad_norm": 1.1805049814402429, "learning_rate": 1.268967044083572e-05, "loss": 0.30817514657974243, "step": 3584 }, { "epoch": 0.8858413639733136, "grad_norm": 1.1935459122384195, "learning_rate": 1.2685734954352327e-05, "loss": 0.36248740553855896, "step": 3585 }, { "epoch": 0.88608846058809, "grad_norm": 1.1713311366256423, "learning_rate": 1.2681799019512744e-05, "loss": 0.31660008430480957, "step": 3586 }, { "epoch": 0.8863355572028663, "grad_norm": 1.1901334045990148, "learning_rate": 1.267786263697404e-05, "loss": 0.3522111177444458, "step": 3587 }, { "epoch": 0.8865826538176427, "grad_norm": 1.1552102405317028, "learning_rate": 1.2673925807393346e-05, "loss": 0.3245996832847595, "step": 3588 }, { "epoch": 0.8868297504324191, "grad_norm": 1.2126929417490273, "learning_rate": 1.2669988531427884e-05, "loss": 0.3806597590446472, "step": 3589 }, { "epoch": 0.8870768470471955, "grad_norm": 1.4199727991199007, "learning_rate": 1.2666050809734938e-05, "loss": 0.4367002546787262, "step": 3590 }, { "epoch": 0.8873239436619719, "grad_norm": 1.2769626011362551, "learning_rate": 1.2662112642971866e-05, "loss": 0.3797420859336853, "step": 3591 }, { "epoch": 0.8875710402767482, "grad_norm": 1.1322621800876067, "learning_rate": 1.2658174031796118e-05, "loss": 0.31077274680137634, "step": 3592 }, { "epoch": 0.8878181368915246, "grad_norm": 1.1278628923029321, "learning_rate": 1.265423497686519e-05, "loss": 0.3281831741333008, "step": 3593 }, { "epoch": 0.888065233506301, "grad_norm": 1.1524653080990706, "learning_rate": 1.2650295478836681e-05, "loss": 0.3200218081474304, "step": 3594 }, { "epoch": 0.8883123301210774, "grad_norm": 1.4116642554397962, "learning_rate": 1.264635553836824e-05, "loss": 0.4297561049461365, "step": 3595 }, { "epoch": 0.8885594267358538, "grad_norm": 1.2885699943713504, "learning_rate": 1.2642415156117605e-05, "loss": 0.3873130977153778, "step": 3596 }, { "epoch": 0.8888065233506302, "grad_norm": 1.2423855499991836, "learning_rate": 1.263847433274258e-05, "loss": 0.32737499475479126, "step": 3597 }, { "epoch": 0.8890536199654064, "grad_norm": 1.130677431761847, "learning_rate": 1.2634533068901047e-05, "loss": 0.31997108459472656, "step": 3598 }, { "epoch": 0.8893007165801828, "grad_norm": 1.1684873726415852, "learning_rate": 1.2630591365250964e-05, "loss": 0.3711482584476471, "step": 3599 }, { "epoch": 0.8895478131949592, "grad_norm": 1.2228386509118447, "learning_rate": 1.2626649222450352e-05, "loss": 0.3635514974594116, "step": 3600 }, { "epoch": 0.8897949098097356, "grad_norm": 1.1602400299199067, "learning_rate": 1.2622706641157314e-05, "loss": 0.37536895275115967, "step": 3601 }, { "epoch": 0.890042006424512, "grad_norm": 1.2060336445093451, "learning_rate": 1.2618763622030023e-05, "loss": 0.31997382640838623, "step": 3602 }, { "epoch": 0.8902891030392883, "grad_norm": 1.2359018690574222, "learning_rate": 1.2614820165726729e-05, "loss": 0.33101409673690796, "step": 3603 }, { "epoch": 0.8905361996540647, "grad_norm": 1.1967022530242224, "learning_rate": 1.261087627290575e-05, "loss": 0.3170337677001953, "step": 3604 }, { "epoch": 0.8907832962688411, "grad_norm": 1.1870857433424324, "learning_rate": 1.2606931944225478e-05, "loss": 0.3415236175060272, "step": 3605 }, { "epoch": 0.8910303928836175, "grad_norm": 1.1309526201768383, "learning_rate": 1.2602987180344379e-05, "loss": 0.3021295666694641, "step": 3606 }, { "epoch": 0.8912774894983939, "grad_norm": 1.2064244239457367, "learning_rate": 1.2599041981920994e-05, "loss": 0.3252032995223999, "step": 3607 }, { "epoch": 0.8915245861131702, "grad_norm": 1.2196765907889424, "learning_rate": 1.2595096349613933e-05, "loss": 0.3357781767845154, "step": 3608 }, { "epoch": 0.8917716827279466, "grad_norm": 1.2443290931452884, "learning_rate": 1.2591150284081875e-05, "loss": 0.36292028427124023, "step": 3609 }, { "epoch": 0.892018779342723, "grad_norm": 1.3059640743313496, "learning_rate": 1.258720378598358e-05, "loss": 0.356090784072876, "step": 3610 }, { "epoch": 0.8922658759574994, "grad_norm": 1.0726296459612843, "learning_rate": 1.2583256855977873e-05, "loss": 0.31168729066848755, "step": 3611 }, { "epoch": 0.8925129725722758, "grad_norm": 1.092216633335083, "learning_rate": 1.2579309494723655e-05, "loss": 0.31017351150512695, "step": 3612 }, { "epoch": 0.8927600691870521, "grad_norm": 1.209437018893141, "learning_rate": 1.25753617028799e-05, "loss": 0.3725413382053375, "step": 3613 }, { "epoch": 0.8930071658018285, "grad_norm": 1.2394894549552835, "learning_rate": 1.2571413481105653e-05, "loss": 0.3749920427799225, "step": 3614 }, { "epoch": 0.8932542624166049, "grad_norm": 1.3449611731806588, "learning_rate": 1.256746483006002e-05, "loss": 0.38582050800323486, "step": 3615 }, { "epoch": 0.8935013590313813, "grad_norm": 1.4133783551736752, "learning_rate": 1.2563515750402199e-05, "loss": 0.3018103837966919, "step": 3616 }, { "epoch": 0.8937484556461577, "grad_norm": 1.1061838216427995, "learning_rate": 1.2559566242791443e-05, "loss": 0.2925422489643097, "step": 3617 }, { "epoch": 0.8939955522609341, "grad_norm": 1.1502774346158946, "learning_rate": 1.2555616307887087e-05, "loss": 0.32994353771209717, "step": 3618 }, { "epoch": 0.8942426488757104, "grad_norm": 1.1320932303187226, "learning_rate": 1.2551665946348526e-05, "loss": 0.28004351258277893, "step": 3619 }, { "epoch": 0.8944897454904868, "grad_norm": 1.1956360731946707, "learning_rate": 1.2547715158835235e-05, "loss": 0.36194664239883423, "step": 3620 }, { "epoch": 0.8947368421052632, "grad_norm": 1.20729793830962, "learning_rate": 1.2543763946006762e-05, "loss": 0.3135375380516052, "step": 3621 }, { "epoch": 0.8949839387200396, "grad_norm": 1.2588232259346046, "learning_rate": 1.2539812308522716e-05, "loss": 0.34981662034988403, "step": 3622 }, { "epoch": 0.895231035334816, "grad_norm": 1.2013223399931006, "learning_rate": 1.2535860247042785e-05, "loss": 0.34542161226272583, "step": 3623 }, { "epoch": 0.8954781319495922, "grad_norm": 1.3470947673424953, "learning_rate": 1.253190776222673e-05, "loss": 0.3833804130554199, "step": 3624 }, { "epoch": 0.8957252285643686, "grad_norm": 1.3568310723153287, "learning_rate": 1.2527954854734374e-05, "loss": 0.38970091938972473, "step": 3625 }, { "epoch": 0.895972325179145, "grad_norm": 1.1927887603487939, "learning_rate": 1.2524001525225613e-05, "loss": 0.3334750533103943, "step": 3626 }, { "epoch": 0.8962194217939214, "grad_norm": 1.0911020029644898, "learning_rate": 1.2520047774360418e-05, "loss": 0.3305051326751709, "step": 3627 }, { "epoch": 0.8964665184086978, "grad_norm": 1.153547637007759, "learning_rate": 1.2516093602798826e-05, "loss": 0.3183302879333496, "step": 3628 }, { "epoch": 0.8967136150234741, "grad_norm": 1.2098015499542254, "learning_rate": 1.2512139011200949e-05, "loss": 0.3419644236564636, "step": 3629 }, { "epoch": 0.8969607116382505, "grad_norm": 1.1310870683539491, "learning_rate": 1.250818400022696e-05, "loss": 0.288560688495636, "step": 3630 }, { "epoch": 0.8972078082530269, "grad_norm": 1.4316560815931303, "learning_rate": 1.2504228570537113e-05, "loss": 0.3352498412132263, "step": 3631 }, { "epoch": 0.8974549048678033, "grad_norm": 1.2633456981594058, "learning_rate": 1.2500272722791726e-05, "loss": 0.3608311712741852, "step": 3632 }, { "epoch": 0.8977020014825797, "grad_norm": 1.2358827247773396, "learning_rate": 1.2496316457651188e-05, "loss": 0.3586863875389099, "step": 3633 }, { "epoch": 0.897949098097356, "grad_norm": 1.1285952335606604, "learning_rate": 1.2492359775775955e-05, "loss": 0.2872292995452881, "step": 3634 }, { "epoch": 0.8981961947121324, "grad_norm": 1.1630748893197076, "learning_rate": 1.2488402677826557e-05, "loss": 0.35734689235687256, "step": 3635 }, { "epoch": 0.8984432913269088, "grad_norm": 1.121973659726436, "learning_rate": 1.248444516446359e-05, "loss": 0.3105732202529907, "step": 3636 }, { "epoch": 0.8986903879416852, "grad_norm": 1.170525902003127, "learning_rate": 1.248048723634772e-05, "loss": 0.2776848375797272, "step": 3637 }, { "epoch": 0.8989374845564616, "grad_norm": 1.2196665212788929, "learning_rate": 1.247652889413969e-05, "loss": 0.3181226849555969, "step": 3638 }, { "epoch": 0.899184581171238, "grad_norm": 1.2822467776341553, "learning_rate": 1.2472570138500293e-05, "loss": 0.33771851658821106, "step": 3639 }, { "epoch": 0.8994316777860143, "grad_norm": 1.2025569358613102, "learning_rate": 1.246861097009041e-05, "loss": 0.3098910450935364, "step": 3640 }, { "epoch": 0.8996787744007907, "grad_norm": 1.402616285165144, "learning_rate": 1.2464651389570985e-05, "loss": 0.36509615182876587, "step": 3641 }, { "epoch": 0.8999258710155671, "grad_norm": 1.2339485967952852, "learning_rate": 1.2460691397603026e-05, "loss": 0.31802475452423096, "step": 3642 }, { "epoch": 0.9001729676303435, "grad_norm": 1.220829077940566, "learning_rate": 1.245673099484762e-05, "loss": 0.3374265134334564, "step": 3643 }, { "epoch": 0.9004200642451199, "grad_norm": 1.2751584432184377, "learning_rate": 1.2452770181965904e-05, "loss": 0.33824020624160767, "step": 3644 }, { "epoch": 0.9006671608598962, "grad_norm": 1.2814850802729747, "learning_rate": 1.2448808959619112e-05, "loss": 0.3244684934616089, "step": 3645 }, { "epoch": 0.9009142574746726, "grad_norm": 1.3820663777543705, "learning_rate": 1.2444847328468515e-05, "loss": 0.4240858554840088, "step": 3646 }, { "epoch": 0.901161354089449, "grad_norm": 1.3201599736637257, "learning_rate": 1.2440885289175475e-05, "loss": 0.30047303438186646, "step": 3647 }, { "epoch": 0.9014084507042254, "grad_norm": 1.1625466376209275, "learning_rate": 1.2436922842401411e-05, "loss": 0.3043980598449707, "step": 3648 }, { "epoch": 0.9016555473190018, "grad_norm": 1.3393092438634244, "learning_rate": 1.2432959988807815e-05, "loss": 0.3452988862991333, "step": 3649 }, { "epoch": 0.9019026439337781, "grad_norm": 1.2684809412491058, "learning_rate": 1.2428996729056247e-05, "loss": 0.3615972399711609, "step": 3650 }, { "epoch": 0.9021497405485545, "grad_norm": 1.2699104249103126, "learning_rate": 1.2425033063808328e-05, "loss": 0.35673046112060547, "step": 3651 }, { "epoch": 0.9023968371633309, "grad_norm": 1.0893772451810158, "learning_rate": 1.2421068993725758e-05, "loss": 0.3247358500957489, "step": 3652 }, { "epoch": 0.9026439337781073, "grad_norm": 1.2514926725743334, "learning_rate": 1.2417104519470288e-05, "loss": 0.28484800457954407, "step": 3653 }, { "epoch": 0.9028910303928837, "grad_norm": 1.2770718498601856, "learning_rate": 1.241313964170376e-05, "loss": 0.35940131545066833, "step": 3654 }, { "epoch": 0.9031381270076599, "grad_norm": 1.363884367745901, "learning_rate": 1.2409174361088059e-05, "loss": 0.3661508858203888, "step": 3655 }, { "epoch": 0.9033852236224363, "grad_norm": 1.1684597151214882, "learning_rate": 1.2405208678285154e-05, "loss": 0.289843887090683, "step": 3656 }, { "epoch": 0.9036323202372127, "grad_norm": 1.1171983452152983, "learning_rate": 1.2401242593957078e-05, "loss": 0.2956012487411499, "step": 3657 }, { "epoch": 0.9038794168519891, "grad_norm": 1.1523064585928102, "learning_rate": 1.2397276108765919e-05, "loss": 0.33966565132141113, "step": 3658 }, { "epoch": 0.9041265134667655, "grad_norm": 1.1511540697421827, "learning_rate": 1.2393309223373852e-05, "loss": 0.2839515209197998, "step": 3659 }, { "epoch": 0.9043736100815419, "grad_norm": 1.233511340528254, "learning_rate": 1.23893419384431e-05, "loss": 0.35332798957824707, "step": 3660 }, { "epoch": 0.9046207066963182, "grad_norm": 1.2315998801378858, "learning_rate": 1.2385374254635967e-05, "loss": 0.3091416358947754, "step": 3661 }, { "epoch": 0.9048678033110946, "grad_norm": 1.193488977541786, "learning_rate": 1.2381406172614811e-05, "loss": 0.33291342854499817, "step": 3662 }, { "epoch": 0.905114899925871, "grad_norm": 1.0975220301339943, "learning_rate": 1.2377437693042074e-05, "loss": 0.31686246395111084, "step": 3663 }, { "epoch": 0.9053619965406474, "grad_norm": 1.2673070733057028, "learning_rate": 1.2373468816580241e-05, "loss": 0.3229641616344452, "step": 3664 }, { "epoch": 0.9056090931554238, "grad_norm": 1.0554564365051866, "learning_rate": 1.2369499543891883e-05, "loss": 0.28962454199790955, "step": 3665 }, { "epoch": 0.9058561897702001, "grad_norm": 1.3449755474004474, "learning_rate": 1.2365529875639628e-05, "loss": 0.35090649127960205, "step": 3666 }, { "epoch": 0.9061032863849765, "grad_norm": 1.1550286697519503, "learning_rate": 1.236155981248617e-05, "loss": 0.3282148241996765, "step": 3667 }, { "epoch": 0.9063503829997529, "grad_norm": 1.1258450134462954, "learning_rate": 1.2357589355094275e-05, "loss": 0.2951314449310303, "step": 3668 }, { "epoch": 0.9065974796145293, "grad_norm": 1.3277516212364342, "learning_rate": 1.2353618504126765e-05, "loss": 0.39808017015457153, "step": 3669 }, { "epoch": 0.9068445762293057, "grad_norm": 1.1270563503195286, "learning_rate": 1.234964726024654e-05, "loss": 0.34043288230895996, "step": 3670 }, { "epoch": 0.907091672844082, "grad_norm": 1.3776250867874462, "learning_rate": 1.2345675624116552e-05, "loss": 0.3595518171787262, "step": 3671 }, { "epoch": 0.9073387694588584, "grad_norm": 1.2113488773251389, "learning_rate": 1.2341703596399827e-05, "loss": 0.31059694290161133, "step": 3672 }, { "epoch": 0.9075858660736348, "grad_norm": 1.285410307944403, "learning_rate": 1.233773117775946e-05, "loss": 0.33943262696266174, "step": 3673 }, { "epoch": 0.9078329626884112, "grad_norm": 1.3560000233997433, "learning_rate": 1.23337583688586e-05, "loss": 0.41346046328544617, "step": 3674 }, { "epoch": 0.9080800593031876, "grad_norm": 1.2543642388164884, "learning_rate": 1.232978517036047e-05, "loss": 0.3389570713043213, "step": 3675 }, { "epoch": 0.9083271559179639, "grad_norm": 1.4337160384117906, "learning_rate": 1.2325811582928355e-05, "loss": 0.35013604164123535, "step": 3676 }, { "epoch": 0.9085742525327403, "grad_norm": 1.3511078705182775, "learning_rate": 1.2321837607225605e-05, "loss": 0.3060823082923889, "step": 3677 }, { "epoch": 0.9088213491475167, "grad_norm": 1.2632679080006919, "learning_rate": 1.231786324391563e-05, "loss": 0.3257600665092468, "step": 3678 }, { "epoch": 0.9090684457622931, "grad_norm": 1.1316218131707254, "learning_rate": 1.2313888493661916e-05, "loss": 0.3219009041786194, "step": 3679 }, { "epoch": 0.9093155423770695, "grad_norm": 1.246340664531562, "learning_rate": 1.2309913357128008e-05, "loss": 0.3405412435531616, "step": 3680 }, { "epoch": 0.9095626389918459, "grad_norm": 1.1979122930308013, "learning_rate": 1.2305937834977508e-05, "loss": 0.3349835276603699, "step": 3681 }, { "epoch": 0.9098097356066221, "grad_norm": 1.0967294459225148, "learning_rate": 1.2301961927874095e-05, "loss": 0.2749952971935272, "step": 3682 }, { "epoch": 0.9100568322213985, "grad_norm": 1.236227005665629, "learning_rate": 1.2297985636481506e-05, "loss": 0.3287535607814789, "step": 3683 }, { "epoch": 0.910303928836175, "grad_norm": 1.0952738394128887, "learning_rate": 1.2294008961463539e-05, "loss": 0.299751341342926, "step": 3684 }, { "epoch": 0.9105510254509513, "grad_norm": 1.0881597730251795, "learning_rate": 1.229003190348406e-05, "loss": 0.28702598810195923, "step": 3685 }, { "epoch": 0.9107981220657277, "grad_norm": 1.7133123698669206, "learning_rate": 1.2286054463207003e-05, "loss": 0.38280922174453735, "step": 3686 }, { "epoch": 0.911045218680504, "grad_norm": 1.18728520721035, "learning_rate": 1.2282076641296357e-05, "loss": 0.3347788453102112, "step": 3687 }, { "epoch": 0.9112923152952804, "grad_norm": 1.3684935576841324, "learning_rate": 1.2278098438416178e-05, "loss": 0.39789628982543945, "step": 3688 }, { "epoch": 0.9115394119100568, "grad_norm": 1.231985036030357, "learning_rate": 1.2274119855230588e-05, "loss": 0.33016011118888855, "step": 3689 }, { "epoch": 0.9117865085248332, "grad_norm": 1.439254465834004, "learning_rate": 1.2270140892403775e-05, "loss": 0.3821644186973572, "step": 3690 }, { "epoch": 0.9120336051396096, "grad_norm": 1.0531002310718773, "learning_rate": 1.2266161550599982e-05, "loss": 0.2891497015953064, "step": 3691 }, { "epoch": 0.9122807017543859, "grad_norm": 1.0212200779968201, "learning_rate": 1.226218183048352e-05, "loss": 0.32432490587234497, "step": 3692 }, { "epoch": 0.9125277983691623, "grad_norm": 1.2524511621222232, "learning_rate": 1.2258201732718765e-05, "loss": 0.35889336466789246, "step": 3693 }, { "epoch": 0.9127748949839387, "grad_norm": 1.1666987998078961, "learning_rate": 1.2254221257970152e-05, "loss": 0.35733485221862793, "step": 3694 }, { "epoch": 0.9130219915987151, "grad_norm": 1.8673090772351364, "learning_rate": 1.2250240406902181e-05, "loss": 0.39192306995391846, "step": 3695 }, { "epoch": 0.9132690882134915, "grad_norm": 1.2137501587150687, "learning_rate": 1.2246259180179417e-05, "loss": 0.35948753356933594, "step": 3696 }, { "epoch": 0.9135161848282679, "grad_norm": 1.2906618957431684, "learning_rate": 1.224227757846648e-05, "loss": 0.38102012872695923, "step": 3697 }, { "epoch": 0.9137632814430442, "grad_norm": 1.0352336109399602, "learning_rate": 1.2238295602428066e-05, "loss": 0.289156973361969, "step": 3698 }, { "epoch": 0.9140103780578206, "grad_norm": 1.2157810734336116, "learning_rate": 1.2234313252728917e-05, "loss": 0.3480681777000427, "step": 3699 }, { "epoch": 0.914257474672597, "grad_norm": 1.3112407739737941, "learning_rate": 1.2230330530033854e-05, "loss": 0.3541749119758606, "step": 3700 }, { "epoch": 0.9145045712873734, "grad_norm": 1.0796074786804875, "learning_rate": 1.2226347435007746e-05, "loss": 0.3184356093406677, "step": 3701 }, { "epoch": 0.9147516679021498, "grad_norm": 1.2438136149074799, "learning_rate": 1.2222363968315531e-05, "loss": 0.3808220326900482, "step": 3702 }, { "epoch": 0.9149987645169261, "grad_norm": 1.2442095558014044, "learning_rate": 1.2218380130622211e-05, "loss": 0.3549204468727112, "step": 3703 }, { "epoch": 0.9152458611317025, "grad_norm": 1.1062225071546425, "learning_rate": 1.2214395922592847e-05, "loss": 0.3200744092464447, "step": 3704 }, { "epoch": 0.9154929577464789, "grad_norm": 1.127377754137139, "learning_rate": 1.221041134489256e-05, "loss": 0.33606964349746704, "step": 3705 }, { "epoch": 0.9157400543612553, "grad_norm": 1.2629047238730728, "learning_rate": 1.2206426398186534e-05, "loss": 0.29794546961784363, "step": 3706 }, { "epoch": 0.9159871509760317, "grad_norm": 1.364051353661464, "learning_rate": 1.2202441083140023e-05, "loss": 0.3225128650665283, "step": 3707 }, { "epoch": 0.916234247590808, "grad_norm": 1.1458473179730522, "learning_rate": 1.2198455400418326e-05, "loss": 0.3073359727859497, "step": 3708 }, { "epoch": 0.9164813442055844, "grad_norm": 1.1024336492477929, "learning_rate": 1.2194469350686812e-05, "loss": 0.2950625419616699, "step": 3709 }, { "epoch": 0.9167284408203608, "grad_norm": 2.6858237858768566, "learning_rate": 1.2190482934610918e-05, "loss": 0.3449091911315918, "step": 3710 }, { "epoch": 0.9169755374351372, "grad_norm": 1.1592835794336556, "learning_rate": 1.2186496152856133e-05, "loss": 0.2983091175556183, "step": 3711 }, { "epoch": 0.9172226340499136, "grad_norm": 1.2446415083217475, "learning_rate": 1.2182509006088007e-05, "loss": 0.3428889811038971, "step": 3712 }, { "epoch": 0.9174697306646898, "grad_norm": 1.3404724616691959, "learning_rate": 1.2178521494972157e-05, "loss": 0.3859858214855194, "step": 3713 }, { "epoch": 0.9177168272794662, "grad_norm": 1.294363075246305, "learning_rate": 1.2174533620174263e-05, "loss": 0.32191142439842224, "step": 3714 }, { "epoch": 0.9179639238942426, "grad_norm": 1.2679898790023785, "learning_rate": 1.2170545382360043e-05, "loss": 0.34799063205718994, "step": 3715 }, { "epoch": 0.918211020509019, "grad_norm": 1.3758078060081556, "learning_rate": 1.216655678219531e-05, "loss": 0.41071924567222595, "step": 3716 }, { "epoch": 0.9184581171237954, "grad_norm": 1.3427418572255252, "learning_rate": 1.2162567820345911e-05, "loss": 0.3611854910850525, "step": 3717 }, { "epoch": 0.9187052137385718, "grad_norm": 1.2674997441686437, "learning_rate": 1.2158578497477768e-05, "loss": 0.28782030940055847, "step": 3718 }, { "epoch": 0.9189523103533481, "grad_norm": 1.1681760086827409, "learning_rate": 1.2154588814256858e-05, "loss": 0.3134756088256836, "step": 3719 }, { "epoch": 0.9191994069681245, "grad_norm": 1.1586682066237188, "learning_rate": 1.2150598771349211e-05, "loss": 0.2691023349761963, "step": 3720 }, { "epoch": 0.9194465035829009, "grad_norm": 1.2109371724879268, "learning_rate": 1.2146608369420933e-05, "loss": 0.3143400549888611, "step": 3721 }, { "epoch": 0.9196936001976773, "grad_norm": 1.2225533690048358, "learning_rate": 1.2142617609138172e-05, "loss": 0.37519556283950806, "step": 3722 }, { "epoch": 0.9199406968124537, "grad_norm": 1.2331440134254001, "learning_rate": 1.2138626491167154e-05, "loss": 0.2967190742492676, "step": 3723 }, { "epoch": 0.92018779342723, "grad_norm": 1.249401813082749, "learning_rate": 1.2134635016174148e-05, "loss": 0.3814612030982971, "step": 3724 }, { "epoch": 0.9204348900420064, "grad_norm": 1.1412667365931053, "learning_rate": 1.2130643184825497e-05, "loss": 0.29822400212287903, "step": 3725 }, { "epoch": 0.9206819866567828, "grad_norm": 1.217093930098712, "learning_rate": 1.2126650997787592e-05, "loss": 0.40993404388427734, "step": 3726 }, { "epoch": 0.9209290832715592, "grad_norm": 1.183380293672236, "learning_rate": 1.2122658455726887e-05, "loss": 0.3073222041130066, "step": 3727 }, { "epoch": 0.9211761798863356, "grad_norm": 1.2525256240146574, "learning_rate": 1.2118665559309906e-05, "loss": 0.3509777784347534, "step": 3728 }, { "epoch": 0.9214232765011119, "grad_norm": 1.156780931417003, "learning_rate": 1.2114672309203209e-05, "loss": 0.3265327215194702, "step": 3729 }, { "epoch": 0.9216703731158883, "grad_norm": 1.1761956944932408, "learning_rate": 1.2110678706073438e-05, "loss": 0.3511629104614258, "step": 3730 }, { "epoch": 0.9219174697306647, "grad_norm": 1.288636217590383, "learning_rate": 1.2106684750587279e-05, "loss": 0.39430028200149536, "step": 3731 }, { "epoch": 0.9221645663454411, "grad_norm": 1.0890000946472647, "learning_rate": 1.2102690443411486e-05, "loss": 0.304395854473114, "step": 3732 }, { "epoch": 0.9224116629602175, "grad_norm": 1.1607710527748831, "learning_rate": 1.2098695785212869e-05, "loss": 0.3466271460056305, "step": 3733 }, { "epoch": 0.9226587595749938, "grad_norm": 1.1306727359177944, "learning_rate": 1.209470077665829e-05, "loss": 0.31518030166625977, "step": 3734 }, { "epoch": 0.9229058561897702, "grad_norm": 1.1952933176842133, "learning_rate": 1.2090705418414684e-05, "loss": 0.3662646412849426, "step": 3735 }, { "epoch": 0.9231529528045466, "grad_norm": 1.2625852833356468, "learning_rate": 1.2086709711149025e-05, "loss": 0.3568115234375, "step": 3736 }, { "epoch": 0.923400049419323, "grad_norm": 1.2013152573051715, "learning_rate": 1.2082713655528362e-05, "loss": 0.3018645644187927, "step": 3737 }, { "epoch": 0.9236471460340994, "grad_norm": 1.260046315807709, "learning_rate": 1.2078717252219794e-05, "loss": 0.4177415370941162, "step": 3738 }, { "epoch": 0.9238942426488758, "grad_norm": 2.005622001461962, "learning_rate": 1.2074720501890485e-05, "loss": 0.3750794231891632, "step": 3739 }, { "epoch": 0.924141339263652, "grad_norm": 1.430089878241282, "learning_rate": 1.2070723405207645e-05, "loss": 0.35817158222198486, "step": 3740 }, { "epoch": 0.9243884358784284, "grad_norm": 1.1708894047013276, "learning_rate": 1.206672596283855e-05, "loss": 0.3173895478248596, "step": 3741 }, { "epoch": 0.9246355324932048, "grad_norm": 1.2529149397133164, "learning_rate": 1.2062728175450538e-05, "loss": 0.3849390745162964, "step": 3742 }, { "epoch": 0.9248826291079812, "grad_norm": 1.1833694761876408, "learning_rate": 1.2058730043710992e-05, "loss": 0.307401567697525, "step": 3743 }, { "epoch": 0.9251297257227576, "grad_norm": 1.2742510875961677, "learning_rate": 1.2054731568287364e-05, "loss": 0.37684494256973267, "step": 3744 }, { "epoch": 0.9253768223375339, "grad_norm": 1.3013480072207306, "learning_rate": 1.2050732749847156e-05, "loss": 0.2769356966018677, "step": 3745 }, { "epoch": 0.9256239189523103, "grad_norm": 1.2580385263615008, "learning_rate": 1.2046733589057931e-05, "loss": 0.3173646926879883, "step": 3746 }, { "epoch": 0.9258710155670867, "grad_norm": 1.2086582024123687, "learning_rate": 1.2042734086587308e-05, "loss": 0.29963868856430054, "step": 3747 }, { "epoch": 0.9261181121818631, "grad_norm": 1.2197668735848306, "learning_rate": 1.2038734243102966e-05, "loss": 0.32454538345336914, "step": 3748 }, { "epoch": 0.9263652087966395, "grad_norm": 1.1289064171205085, "learning_rate": 1.2034734059272636e-05, "loss": 0.2863626480102539, "step": 3749 }, { "epoch": 0.9266123054114158, "grad_norm": 1.158868468568761, "learning_rate": 1.2030733535764108e-05, "loss": 0.3518866300582886, "step": 3750 }, { "epoch": 0.9268594020261922, "grad_norm": 1.1270000383996008, "learning_rate": 1.202673267324523e-05, "loss": 0.29522398114204407, "step": 3751 }, { "epoch": 0.9271064986409686, "grad_norm": 1.3809589303108782, "learning_rate": 1.2022731472383906e-05, "loss": 0.3446257412433624, "step": 3752 }, { "epoch": 0.927353595255745, "grad_norm": 1.210692308063054, "learning_rate": 1.2018729933848092e-05, "loss": 0.34447556734085083, "step": 3753 }, { "epoch": 0.9276006918705214, "grad_norm": 1.1838643019531148, "learning_rate": 1.2014728058305806e-05, "loss": 0.2615487575531006, "step": 3754 }, { "epoch": 0.9278477884852977, "grad_norm": 1.2383341634816718, "learning_rate": 1.2010725846425122e-05, "loss": 0.3557744026184082, "step": 3755 }, { "epoch": 0.9280948851000741, "grad_norm": 1.2999983993723692, "learning_rate": 1.200672329887417e-05, "loss": 0.40539389848709106, "step": 3756 }, { "epoch": 0.9283419817148505, "grad_norm": 1.1981773133171367, "learning_rate": 1.200272041632113e-05, "loss": 0.29218757152557373, "step": 3757 }, { "epoch": 0.9285890783296269, "grad_norm": 1.1603720918765064, "learning_rate": 1.1998717199434247e-05, "loss": 0.3367372751235962, "step": 3758 }, { "epoch": 0.9288361749444033, "grad_norm": 1.306958372478371, "learning_rate": 1.1994713648881815e-05, "loss": 0.40453875064849854, "step": 3759 }, { "epoch": 0.9290832715591797, "grad_norm": 1.1988963768471095, "learning_rate": 1.199070976533219e-05, "loss": 0.26370811462402344, "step": 3760 }, { "epoch": 0.929330368173956, "grad_norm": 1.1554813090666665, "learning_rate": 1.198670554945377e-05, "loss": 0.2816561460494995, "step": 3761 }, { "epoch": 0.9295774647887324, "grad_norm": 1.4382842372885805, "learning_rate": 1.1982701001915029e-05, "loss": 0.3425465226173401, "step": 3762 }, { "epoch": 0.9298245614035088, "grad_norm": 1.2049828040673871, "learning_rate": 1.1978696123384479e-05, "loss": 0.3451976180076599, "step": 3763 }, { "epoch": 0.9300716580182852, "grad_norm": 1.2267403330607476, "learning_rate": 1.1974690914530696e-05, "loss": 0.32489532232284546, "step": 3764 }, { "epoch": 0.9303187546330616, "grad_norm": 1.0336532387928838, "learning_rate": 1.197068537602231e-05, "loss": 0.2547299563884735, "step": 3765 }, { "epoch": 0.9305658512478379, "grad_norm": 1.3396468575429759, "learning_rate": 1.1966679508528005e-05, "loss": 0.38253462314605713, "step": 3766 }, { "epoch": 0.9308129478626143, "grad_norm": 1.3771942334194398, "learning_rate": 1.196267331271652e-05, "loss": 0.3592126965522766, "step": 3767 }, { "epoch": 0.9310600444773907, "grad_norm": 1.255015048569514, "learning_rate": 1.1958666789256643e-05, "loss": 0.33111336827278137, "step": 3768 }, { "epoch": 0.9313071410921671, "grad_norm": 1.191942884803152, "learning_rate": 1.195465993881723e-05, "loss": 0.34046387672424316, "step": 3769 }, { "epoch": 0.9315542377069435, "grad_norm": 1.1104552496964395, "learning_rate": 1.195065276206718e-05, "loss": 0.33740848302841187, "step": 3770 }, { "epoch": 0.9318013343217197, "grad_norm": 1.1238201838572117, "learning_rate": 1.1946645259675453e-05, "loss": 0.3149111568927765, "step": 3771 }, { "epoch": 0.9320484309364961, "grad_norm": 1.1789053912826069, "learning_rate": 1.194263743231106e-05, "loss": 0.3600301146507263, "step": 3772 }, { "epoch": 0.9322955275512725, "grad_norm": 1.2395561199088951, "learning_rate": 1.1938629280643063e-05, "loss": 0.3089178800582886, "step": 3773 }, { "epoch": 0.9325426241660489, "grad_norm": 1.0697099199978823, "learning_rate": 1.193462080534059e-05, "loss": 0.2970927655696869, "step": 3774 }, { "epoch": 0.9327897207808253, "grad_norm": 1.1165368944249738, "learning_rate": 1.1930612007072808e-05, "loss": 0.3637526035308838, "step": 3775 }, { "epoch": 0.9330368173956016, "grad_norm": 1.1419092281219845, "learning_rate": 1.1926602886508949e-05, "loss": 0.33412879705429077, "step": 3776 }, { "epoch": 0.933283914010378, "grad_norm": 1.11215800654605, "learning_rate": 1.1922593444318296e-05, "loss": 0.3289265036582947, "step": 3777 }, { "epoch": 0.9335310106251544, "grad_norm": 1.1407431654699964, "learning_rate": 1.1918583681170177e-05, "loss": 0.300341933965683, "step": 3778 }, { "epoch": 0.9337781072399308, "grad_norm": 1.3306041209943895, "learning_rate": 1.1914573597733994e-05, "loss": 0.3777143657207489, "step": 3779 }, { "epoch": 0.9340252038547072, "grad_norm": 1.2136160446114088, "learning_rate": 1.1910563194679179e-05, "loss": 0.33408495783805847, "step": 3780 }, { "epoch": 0.9342723004694836, "grad_norm": 1.0823526131343135, "learning_rate": 1.1906552472675234e-05, "loss": 0.28222206234931946, "step": 3781 }, { "epoch": 0.9345193970842599, "grad_norm": 1.1979179084271223, "learning_rate": 1.1902541432391704e-05, "loss": 0.3295533359050751, "step": 3782 }, { "epoch": 0.9347664936990363, "grad_norm": 1.2187387908527805, "learning_rate": 1.1898530074498193e-05, "loss": 0.24803391098976135, "step": 3783 }, { "epoch": 0.9350135903138127, "grad_norm": 1.145901477147886, "learning_rate": 1.1894518399664362e-05, "loss": 0.33399108052253723, "step": 3784 }, { "epoch": 0.9352606869285891, "grad_norm": 1.2630832444158142, "learning_rate": 1.189050640855991e-05, "loss": 0.35997384786605835, "step": 3785 }, { "epoch": 0.9355077835433655, "grad_norm": 1.4883646964646642, "learning_rate": 1.1886494101854605e-05, "loss": 0.3655410408973694, "step": 3786 }, { "epoch": 0.9357548801581418, "grad_norm": 1.2200502362267753, "learning_rate": 1.1882481480218258e-05, "loss": 0.3390260934829712, "step": 3787 }, { "epoch": 0.9360019767729182, "grad_norm": 1.1393522286478692, "learning_rate": 1.1878468544320737e-05, "loss": 0.31767019629478455, "step": 3788 }, { "epoch": 0.9362490733876946, "grad_norm": 1.1581159772850038, "learning_rate": 1.1874455294831955e-05, "loss": 0.3292267918586731, "step": 3789 }, { "epoch": 0.936496170002471, "grad_norm": 1.2340653800723878, "learning_rate": 1.1870441732421893e-05, "loss": 0.323108434677124, "step": 3790 }, { "epoch": 0.9367432666172474, "grad_norm": 1.1675172196030863, "learning_rate": 1.186642785776057e-05, "loss": 0.2898913025856018, "step": 3791 }, { "epoch": 0.9369903632320237, "grad_norm": 1.1176069734079583, "learning_rate": 1.1862413671518058e-05, "loss": 0.28297489881515503, "step": 3792 }, { "epoch": 0.9372374598468001, "grad_norm": 1.1939621163988277, "learning_rate": 1.1858399174364492e-05, "loss": 0.3400951027870178, "step": 3793 }, { "epoch": 0.9374845564615765, "grad_norm": 1.0925408131094794, "learning_rate": 1.1854384366970046e-05, "loss": 0.25717413425445557, "step": 3794 }, { "epoch": 0.9377316530763529, "grad_norm": 1.1595220682282914, "learning_rate": 1.1850369250004954e-05, "loss": 0.3109794557094574, "step": 3795 }, { "epoch": 0.9379787496911293, "grad_norm": 1.5981320713856288, "learning_rate": 1.1846353824139495e-05, "loss": 0.31589406728744507, "step": 3796 }, { "epoch": 0.9382258463059057, "grad_norm": 1.2410716756585152, "learning_rate": 1.1842338090044013e-05, "loss": 0.2976938486099243, "step": 3797 }, { "epoch": 0.938472942920682, "grad_norm": 1.2071289562908325, "learning_rate": 1.183832204838888e-05, "loss": 0.2800828814506531, "step": 3798 }, { "epoch": 0.9387200395354584, "grad_norm": 1.2874438688585679, "learning_rate": 1.1834305699844542e-05, "loss": 0.35211676359176636, "step": 3799 }, { "epoch": 0.9389671361502347, "grad_norm": 1.1407940847133433, "learning_rate": 1.183028904508149e-05, "loss": 0.30528536438941956, "step": 3800 }, { "epoch": 0.9392142327650111, "grad_norm": 1.4467180920698417, "learning_rate": 1.1826272084770258e-05, "loss": 0.32324379682540894, "step": 3801 }, { "epoch": 0.9394613293797875, "grad_norm": 1.2760760696293565, "learning_rate": 1.182225481958144e-05, "loss": 0.3887273073196411, "step": 3802 }, { "epoch": 0.9397084259945638, "grad_norm": 1.2286043098285435, "learning_rate": 1.1818237250185676e-05, "loss": 0.339563250541687, "step": 3803 }, { "epoch": 0.9399555226093402, "grad_norm": 1.275666909706126, "learning_rate": 1.1814219377253663e-05, "loss": 0.367714524269104, "step": 3804 }, { "epoch": 0.9402026192241166, "grad_norm": 1.1778969736186624, "learning_rate": 1.1810201201456134e-05, "loss": 0.3117882013320923, "step": 3805 }, { "epoch": 0.940449715838893, "grad_norm": 1.2273909985659637, "learning_rate": 1.1806182723463894e-05, "loss": 0.2957302927970886, "step": 3806 }, { "epoch": 0.9406968124536694, "grad_norm": 1.1058380036327888, "learning_rate": 1.180216394394778e-05, "loss": 0.24788296222686768, "step": 3807 }, { "epoch": 0.9409439090684457, "grad_norm": 1.1977176499146425, "learning_rate": 1.1798144863578689e-05, "loss": 0.32918912172317505, "step": 3808 }, { "epoch": 0.9411910056832221, "grad_norm": 1.190285959007944, "learning_rate": 1.1794125483027565e-05, "loss": 0.302317351102829, "step": 3809 }, { "epoch": 0.9414381022979985, "grad_norm": 1.2526944102668496, "learning_rate": 1.1790105802965402e-05, "loss": 0.37019503116607666, "step": 3810 }, { "epoch": 0.9416851989127749, "grad_norm": 1.2770529360427278, "learning_rate": 1.1786085824063246e-05, "loss": 0.33188125491142273, "step": 3811 }, { "epoch": 0.9419322955275513, "grad_norm": 1.1227735793589506, "learning_rate": 1.1782065546992191e-05, "loss": 0.2800873816013336, "step": 3812 }, { "epoch": 0.9421793921423276, "grad_norm": 1.2051260231823278, "learning_rate": 1.1778044972423383e-05, "loss": 0.3582535982131958, "step": 3813 }, { "epoch": 0.942426488757104, "grad_norm": 1.3908432995620892, "learning_rate": 1.1774024101028017e-05, "loss": 0.3849889039993286, "step": 3814 }, { "epoch": 0.9426735853718804, "grad_norm": 1.2463568931645252, "learning_rate": 1.1770002933477331e-05, "loss": 0.35835856199264526, "step": 3815 }, { "epoch": 0.9429206819866568, "grad_norm": 1.1243577895526284, "learning_rate": 1.1765981470442624e-05, "loss": 0.2982429563999176, "step": 3816 }, { "epoch": 0.9431677786014332, "grad_norm": 1.241846437736232, "learning_rate": 1.1761959712595232e-05, "loss": 0.3477679193019867, "step": 3817 }, { "epoch": 0.9434148752162096, "grad_norm": 1.2448106226024107, "learning_rate": 1.1757937660606558e-05, "loss": 0.35679692029953003, "step": 3818 }, { "epoch": 0.9436619718309859, "grad_norm": 1.2376694892024265, "learning_rate": 1.1753915315148031e-05, "loss": 0.3290979266166687, "step": 3819 }, { "epoch": 0.9439090684457623, "grad_norm": 1.1789285450655247, "learning_rate": 1.1749892676891146e-05, "loss": 0.3114325702190399, "step": 3820 }, { "epoch": 0.9441561650605387, "grad_norm": 1.3106706925561833, "learning_rate": 1.1745869746507442e-05, "loss": 0.3481232225894928, "step": 3821 }, { "epoch": 0.9444032616753151, "grad_norm": 1.225796966277612, "learning_rate": 1.17418465246685e-05, "loss": 0.39074450731277466, "step": 3822 }, { "epoch": 0.9446503582900915, "grad_norm": 1.2077042876295543, "learning_rate": 1.1737823012045968e-05, "loss": 0.33580201864242554, "step": 3823 }, { "epoch": 0.9448974549048678, "grad_norm": 1.1319733243818402, "learning_rate": 1.1733799209311521e-05, "loss": 0.3232335150241852, "step": 3824 }, { "epoch": 0.9451445515196442, "grad_norm": 1.203675252507145, "learning_rate": 1.1729775117136897e-05, "loss": 0.35783296823501587, "step": 3825 }, { "epoch": 0.9453916481344206, "grad_norm": 1.1211976551804443, "learning_rate": 1.1725750736193872e-05, "loss": 0.307128369808197, "step": 3826 }, { "epoch": 0.945638744749197, "grad_norm": 1.2114165910710057, "learning_rate": 1.1721726067154283e-05, "loss": 0.31978029012680054, "step": 3827 }, { "epoch": 0.9458858413639734, "grad_norm": 1.4971164267334156, "learning_rate": 1.171770111069e-05, "loss": 0.37799012660980225, "step": 3828 }, { "epoch": 0.9461329379787496, "grad_norm": 1.287744679564952, "learning_rate": 1.1713675867472953e-05, "loss": 0.39457032084465027, "step": 3829 }, { "epoch": 0.946380034593526, "grad_norm": 1.1090757195065175, "learning_rate": 1.1709650338175116e-05, "loss": 0.2695297300815582, "step": 3830 }, { "epoch": 0.9466271312083024, "grad_norm": 1.1041486581916842, "learning_rate": 1.1705624523468507e-05, "loss": 0.31485646963119507, "step": 3831 }, { "epoch": 0.9468742278230788, "grad_norm": 1.1286049856031755, "learning_rate": 1.17015984240252e-05, "loss": 0.32207101583480835, "step": 3832 }, { "epoch": 0.9471213244378552, "grad_norm": 1.208116127777529, "learning_rate": 1.1697572040517304e-05, "loss": 0.36283522844314575, "step": 3833 }, { "epoch": 0.9473684210526315, "grad_norm": 1.2544499311141017, "learning_rate": 1.169354537361699e-05, "loss": 0.33361294865608215, "step": 3834 }, { "epoch": 0.9476155176674079, "grad_norm": 1.140657737221942, "learning_rate": 1.1689518423996467e-05, "loss": 0.33139774203300476, "step": 3835 }, { "epoch": 0.9478626142821843, "grad_norm": 1.0798289696102379, "learning_rate": 1.168549119232799e-05, "loss": 0.2940194010734558, "step": 3836 }, { "epoch": 0.9481097108969607, "grad_norm": 1.1861283606586346, "learning_rate": 1.168146367928387e-05, "loss": 0.2889232635498047, "step": 3837 }, { "epoch": 0.9483568075117371, "grad_norm": 1.3768023561513474, "learning_rate": 1.1677435885536453e-05, "loss": 0.3865111470222473, "step": 3838 }, { "epoch": 0.9486039041265135, "grad_norm": 1.2565392188787288, "learning_rate": 1.1673407811758147e-05, "loss": 0.3074338734149933, "step": 3839 }, { "epoch": 0.9488510007412898, "grad_norm": 1.321817350025218, "learning_rate": 1.1669379458621386e-05, "loss": 0.339347243309021, "step": 3840 }, { "epoch": 0.9490980973560662, "grad_norm": 1.1831559478213118, "learning_rate": 1.1665350826798674e-05, "loss": 0.28959640860557556, "step": 3841 }, { "epoch": 0.9493451939708426, "grad_norm": 1.2032820159245865, "learning_rate": 1.1661321916962544e-05, "loss": 0.31439298391342163, "step": 3842 }, { "epoch": 0.949592290585619, "grad_norm": 1.1662860871441336, "learning_rate": 1.1657292729785585e-05, "loss": 0.27429795265197754, "step": 3843 }, { "epoch": 0.9498393872003954, "grad_norm": 1.1970981319405953, "learning_rate": 1.1653263265940425e-05, "loss": 0.3665488064289093, "step": 3844 }, { "epoch": 0.9500864838151717, "grad_norm": 1.3290739786642463, "learning_rate": 1.1649233526099745e-05, "loss": 0.3551849126815796, "step": 3845 }, { "epoch": 0.9503335804299481, "grad_norm": 1.1420239683127498, "learning_rate": 1.1645203510936269e-05, "loss": 0.3039240837097168, "step": 3846 }, { "epoch": 0.9505806770447245, "grad_norm": 1.3902139376471874, "learning_rate": 1.1641173221122766e-05, "loss": 0.3636402487754822, "step": 3847 }, { "epoch": 0.9508277736595009, "grad_norm": 1.212616228408358, "learning_rate": 1.1637142657332052e-05, "loss": 0.34598904848098755, "step": 3848 }, { "epoch": 0.9510748702742773, "grad_norm": 1.1664094655734094, "learning_rate": 1.1633111820236991e-05, "loss": 0.32456111907958984, "step": 3849 }, { "epoch": 0.9513219668890536, "grad_norm": 1.263927730151989, "learning_rate": 1.162908071051049e-05, "loss": 0.3580492436885834, "step": 3850 }, { "epoch": 0.95156906350383, "grad_norm": 1.3991745189220153, "learning_rate": 1.1625049328825495e-05, "loss": 0.3390027582645416, "step": 3851 }, { "epoch": 0.9518161601186064, "grad_norm": 1.227551457736682, "learning_rate": 1.1621017675855013e-05, "loss": 0.3690676689147949, "step": 3852 }, { "epoch": 0.9520632567333828, "grad_norm": 1.1597785778444563, "learning_rate": 1.1616985752272085e-05, "loss": 0.3416624069213867, "step": 3853 }, { "epoch": 0.9523103533481592, "grad_norm": 1.2152019316580982, "learning_rate": 1.1612953558749795e-05, "loss": 0.3164159953594208, "step": 3854 }, { "epoch": 0.9525574499629355, "grad_norm": 1.239774438241345, "learning_rate": 1.1608921095961285e-05, "loss": 0.3734780251979828, "step": 3855 }, { "epoch": 0.9528045465777119, "grad_norm": 1.2383938044574336, "learning_rate": 1.160488836457973e-05, "loss": 0.31714802980422974, "step": 3856 }, { "epoch": 0.9530516431924883, "grad_norm": 1.4285009153170525, "learning_rate": 1.160085536527835e-05, "loss": 0.4097982943058014, "step": 3857 }, { "epoch": 0.9532987398072647, "grad_norm": 1.1320137994746724, "learning_rate": 1.1596822098730415e-05, "loss": 0.30937421321868896, "step": 3858 }, { "epoch": 0.953545836422041, "grad_norm": 1.2844306954411913, "learning_rate": 1.1592788565609243e-05, "loss": 0.35887083411216736, "step": 3859 }, { "epoch": 0.9537929330368174, "grad_norm": 1.1633628843882087, "learning_rate": 1.1588754766588188e-05, "loss": 0.31650862097740173, "step": 3860 }, { "epoch": 0.9540400296515937, "grad_norm": 1.1722743340663833, "learning_rate": 1.1584720702340645e-05, "loss": 0.3287088871002197, "step": 3861 }, { "epoch": 0.9542871262663701, "grad_norm": 1.180514446345165, "learning_rate": 1.1580686373540074e-05, "loss": 0.30861735343933105, "step": 3862 }, { "epoch": 0.9545342228811465, "grad_norm": 1.3095787596703659, "learning_rate": 1.1576651780859955e-05, "loss": 0.33823317289352417, "step": 3863 }, { "epoch": 0.9547813194959229, "grad_norm": 1.1161957113943672, "learning_rate": 1.1572616924973824e-05, "loss": 0.3111230134963989, "step": 3864 }, { "epoch": 0.9550284161106993, "grad_norm": 1.176281092347007, "learning_rate": 1.1568581806555261e-05, "loss": 0.33526766300201416, "step": 3865 }, { "epoch": 0.9552755127254756, "grad_norm": 1.394852719169932, "learning_rate": 1.1564546426277889e-05, "loss": 0.36391839385032654, "step": 3866 }, { "epoch": 0.955522609340252, "grad_norm": 1.1419755137600072, "learning_rate": 1.1560510784815372e-05, "loss": 0.32552021741867065, "step": 3867 }, { "epoch": 0.9557697059550284, "grad_norm": 1.3526016202045208, "learning_rate": 1.1556474882841415e-05, "loss": 0.349439799785614, "step": 3868 }, { "epoch": 0.9560168025698048, "grad_norm": 1.2025365886343669, "learning_rate": 1.155243872102978e-05, "loss": 0.33423805236816406, "step": 3869 }, { "epoch": 0.9562638991845812, "grad_norm": 1.2802828455048239, "learning_rate": 1.1548402300054257e-05, "loss": 0.37513267993927, "step": 3870 }, { "epoch": 0.9565109957993575, "grad_norm": 1.1197561745932756, "learning_rate": 1.1544365620588689e-05, "loss": 0.3010951280593872, "step": 3871 }, { "epoch": 0.9567580924141339, "grad_norm": 1.3429344900845042, "learning_rate": 1.1540328683306954e-05, "loss": 0.3552629351615906, "step": 3872 }, { "epoch": 0.9570051890289103, "grad_norm": 1.276639254056281, "learning_rate": 1.1536291488882982e-05, "loss": 0.3411285877227783, "step": 3873 }, { "epoch": 0.9572522856436867, "grad_norm": 1.1523086696284155, "learning_rate": 1.153225403799074e-05, "loss": 0.33199241757392883, "step": 3874 }, { "epoch": 0.9574993822584631, "grad_norm": 1.0827221255631436, "learning_rate": 1.1528216331304237e-05, "loss": 0.28349971771240234, "step": 3875 }, { "epoch": 0.9577464788732394, "grad_norm": 1.2278642602058178, "learning_rate": 1.1524178369497534e-05, "loss": 0.364462673664093, "step": 3876 }, { "epoch": 0.9579935754880158, "grad_norm": 1.1963277501217136, "learning_rate": 1.152014015324472e-05, "loss": 0.2940783202648163, "step": 3877 }, { "epoch": 0.9582406721027922, "grad_norm": 1.1003098964518976, "learning_rate": 1.151610168321994e-05, "loss": 0.3389877378940582, "step": 3878 }, { "epoch": 0.9584877687175686, "grad_norm": 1.1321456213650032, "learning_rate": 1.1512062960097372e-05, "loss": 0.27249622344970703, "step": 3879 }, { "epoch": 0.958734865332345, "grad_norm": 1.2901665004997829, "learning_rate": 1.1508023984551242e-05, "loss": 0.3111211061477661, "step": 3880 }, { "epoch": 0.9589819619471214, "grad_norm": 1.2571932372127443, "learning_rate": 1.1503984757255815e-05, "loss": 0.3611052632331848, "step": 3881 }, { "epoch": 0.9592290585618977, "grad_norm": 1.2114277541669178, "learning_rate": 1.1499945278885396e-05, "loss": 0.2996877133846283, "step": 3882 }, { "epoch": 0.9594761551766741, "grad_norm": 1.3529590316741702, "learning_rate": 1.1495905550114343e-05, "loss": 0.3730393052101135, "step": 3883 }, { "epoch": 0.9597232517914505, "grad_norm": 1.2450408867338973, "learning_rate": 1.1491865571617039e-05, "loss": 0.35350072383880615, "step": 3884 }, { "epoch": 0.9599703484062269, "grad_norm": 1.1098413271059713, "learning_rate": 1.1487825344067923e-05, "loss": 0.3162643313407898, "step": 3885 }, { "epoch": 0.9602174450210033, "grad_norm": 1.314226670950009, "learning_rate": 1.1483784868141465e-05, "loss": 0.3315642476081848, "step": 3886 }, { "epoch": 0.9604645416357795, "grad_norm": 1.257793396359256, "learning_rate": 1.1479744144512188e-05, "loss": 0.33387285470962524, "step": 3887 }, { "epoch": 0.9607116382505559, "grad_norm": 1.2591718788011712, "learning_rate": 1.1475703173854645e-05, "loss": 0.309065580368042, "step": 3888 }, { "epoch": 0.9609587348653323, "grad_norm": 1.4070829138783671, "learning_rate": 1.1471661956843434e-05, "loss": 0.3714446723461151, "step": 3889 }, { "epoch": 0.9612058314801087, "grad_norm": 1.1296871523925298, "learning_rate": 1.1467620494153202e-05, "loss": 0.3389378786087036, "step": 3890 }, { "epoch": 0.9614529280948851, "grad_norm": 1.1736323440751222, "learning_rate": 1.1463578786458622e-05, "loss": 0.34718453884124756, "step": 3891 }, { "epoch": 0.9617000247096614, "grad_norm": 1.2332261005446996, "learning_rate": 1.1459536834434421e-05, "loss": 0.32354509830474854, "step": 3892 }, { "epoch": 0.9619471213244378, "grad_norm": 1.2219264819161875, "learning_rate": 1.145549463875536e-05, "loss": 0.3305700719356537, "step": 3893 }, { "epoch": 0.9621942179392142, "grad_norm": 1.251597586241959, "learning_rate": 1.1451452200096247e-05, "loss": 0.3186013996601105, "step": 3894 }, { "epoch": 0.9624413145539906, "grad_norm": 1.283940357263449, "learning_rate": 1.1447409519131915e-05, "loss": 0.3356017470359802, "step": 3895 }, { "epoch": 0.962688411168767, "grad_norm": 1.3287993293468419, "learning_rate": 1.1443366596537263e-05, "loss": 0.36882898211479187, "step": 3896 }, { "epoch": 0.9629355077835434, "grad_norm": 1.203492072170627, "learning_rate": 1.1439323432987205e-05, "loss": 0.32157227396965027, "step": 3897 }, { "epoch": 0.9631826043983197, "grad_norm": 1.1200352773011284, "learning_rate": 1.1435280029156708e-05, "loss": 0.2915501594543457, "step": 3898 }, { "epoch": 0.9634297010130961, "grad_norm": 1.1968020707916354, "learning_rate": 1.1431236385720781e-05, "loss": 0.3305101692676544, "step": 3899 }, { "epoch": 0.9636767976278725, "grad_norm": 1.1815235354594869, "learning_rate": 1.1427192503354468e-05, "loss": 0.33204036951065063, "step": 3900 }, { "epoch": 0.9639238942426489, "grad_norm": 1.202358525377515, "learning_rate": 1.1423148382732854e-05, "loss": 0.3103053569793701, "step": 3901 }, { "epoch": 0.9641709908574253, "grad_norm": 1.13545385618269, "learning_rate": 1.141910402453106e-05, "loss": 0.30652767419815063, "step": 3902 }, { "epoch": 0.9644180874722016, "grad_norm": 1.2125160174279193, "learning_rate": 1.1415059429424257e-05, "loss": 0.2986427843570709, "step": 3903 }, { "epoch": 0.964665184086978, "grad_norm": 1.2084318402977616, "learning_rate": 1.1411014598087645e-05, "loss": 0.35321950912475586, "step": 3904 }, { "epoch": 0.9649122807017544, "grad_norm": 1.1493428127792236, "learning_rate": 1.1406969531196465e-05, "loss": 0.31997859477996826, "step": 3905 }, { "epoch": 0.9651593773165308, "grad_norm": 1.2332669341929305, "learning_rate": 1.1402924229426009e-05, "loss": 0.3385886549949646, "step": 3906 }, { "epoch": 0.9654064739313072, "grad_norm": 1.278941244679384, "learning_rate": 1.1398878693451588e-05, "loss": 0.3345489203929901, "step": 3907 }, { "epoch": 0.9656535705460835, "grad_norm": 1.3231881693219434, "learning_rate": 1.1394832923948573e-05, "loss": 0.38594895601272583, "step": 3908 }, { "epoch": 0.9659006671608599, "grad_norm": 1.1507557179299694, "learning_rate": 1.1390786921592356e-05, "loss": 0.2773873507976532, "step": 3909 }, { "epoch": 0.9661477637756363, "grad_norm": 1.2526020190012979, "learning_rate": 1.1386740687058385e-05, "loss": 0.3829364478588104, "step": 3910 }, { "epoch": 0.9663948603904127, "grad_norm": 1.2523663751418406, "learning_rate": 1.1382694221022128e-05, "loss": 0.3342653512954712, "step": 3911 }, { "epoch": 0.9666419570051891, "grad_norm": 1.3073635637993328, "learning_rate": 1.1378647524159106e-05, "loss": 0.3545466661453247, "step": 3912 }, { "epoch": 0.9668890536199654, "grad_norm": 1.2939752076978774, "learning_rate": 1.1374600597144873e-05, "loss": 0.34807997941970825, "step": 3913 }, { "epoch": 0.9671361502347418, "grad_norm": 1.1354054499385526, "learning_rate": 1.1370553440655024e-05, "loss": 0.3090675473213196, "step": 3914 }, { "epoch": 0.9673832468495182, "grad_norm": 1.377762084095442, "learning_rate": 1.1366506055365194e-05, "loss": 0.32683584094047546, "step": 3915 }, { "epoch": 0.9676303434642946, "grad_norm": 1.3410019384448602, "learning_rate": 1.1362458441951043e-05, "loss": 0.31281763315200806, "step": 3916 }, { "epoch": 0.967877440079071, "grad_norm": 1.4520803287572916, "learning_rate": 1.1358410601088288e-05, "loss": 0.3116236627101898, "step": 3917 }, { "epoch": 0.9681245366938473, "grad_norm": 1.1174633779908374, "learning_rate": 1.1354362533452672e-05, "loss": 0.30779072642326355, "step": 3918 }, { "epoch": 0.9683716333086236, "grad_norm": 1.3066426950722487, "learning_rate": 1.1350314239719977e-05, "loss": 0.33097517490386963, "step": 3919 }, { "epoch": 0.9686187299234, "grad_norm": 1.2048300552351783, "learning_rate": 1.1346265720566028e-05, "loss": 0.2904425263404846, "step": 3920 }, { "epoch": 0.9688658265381764, "grad_norm": 1.4146661008943007, "learning_rate": 1.134221697666668e-05, "loss": 0.38399964570999146, "step": 3921 }, { "epoch": 0.9691129231529528, "grad_norm": 1.4544487759411566, "learning_rate": 1.1338168008697834e-05, "loss": 0.3609963059425354, "step": 3922 }, { "epoch": 0.9693600197677292, "grad_norm": 1.3249681073931858, "learning_rate": 1.133411881733542e-05, "loss": 0.38513946533203125, "step": 3923 }, { "epoch": 0.9696071163825055, "grad_norm": 1.2455505902315853, "learning_rate": 1.1330069403255412e-05, "loss": 0.3467027246952057, "step": 3924 }, { "epoch": 0.9698542129972819, "grad_norm": 1.190732043244231, "learning_rate": 1.132601976713382e-05, "loss": 0.33203238248825073, "step": 3925 }, { "epoch": 0.9701013096120583, "grad_norm": 1.1930867096996438, "learning_rate": 1.1321969909646684e-05, "loss": 0.31893742084503174, "step": 3926 }, { "epoch": 0.9703484062268347, "grad_norm": 1.2361083977440195, "learning_rate": 1.1317919831470095e-05, "loss": 0.3285403549671173, "step": 3927 }, { "epoch": 0.9705955028416111, "grad_norm": 1.3258003573433417, "learning_rate": 1.1313869533280163e-05, "loss": 0.2975560426712036, "step": 3928 }, { "epoch": 0.9708425994563874, "grad_norm": 1.1524474451067774, "learning_rate": 1.1309819015753054e-05, "loss": 0.33577772974967957, "step": 3929 }, { "epoch": 0.9710896960711638, "grad_norm": 1.2695047565528186, "learning_rate": 1.130576827956495e-05, "loss": 0.3464440107345581, "step": 3930 }, { "epoch": 0.9713367926859402, "grad_norm": 1.2064284642764092, "learning_rate": 1.130171732539209e-05, "loss": 0.29224318265914917, "step": 3931 }, { "epoch": 0.9715838893007166, "grad_norm": 1.1859790070998975, "learning_rate": 1.1297666153910736e-05, "loss": 0.34427887201309204, "step": 3932 }, { "epoch": 0.971830985915493, "grad_norm": 1.199557736618654, "learning_rate": 1.1293614765797187e-05, "loss": 0.3049808740615845, "step": 3933 }, { "epoch": 0.9720780825302693, "grad_norm": 1.1713021321003394, "learning_rate": 1.1289563161727785e-05, "loss": 0.3344089090824127, "step": 3934 }, { "epoch": 0.9723251791450457, "grad_norm": 1.304131251680634, "learning_rate": 1.1285511342378904e-05, "loss": 0.38919597864151, "step": 3935 }, { "epoch": 0.9725722757598221, "grad_norm": 1.2560043601188418, "learning_rate": 1.1281459308426953e-05, "loss": 0.3179766535758972, "step": 3936 }, { "epoch": 0.9728193723745985, "grad_norm": 1.2473904306581773, "learning_rate": 1.1277407060548374e-05, "loss": 0.3447064459323883, "step": 3937 }, { "epoch": 0.9730664689893749, "grad_norm": 1.1398588905772458, "learning_rate": 1.1273354599419657e-05, "loss": 0.32425743341445923, "step": 3938 }, { "epoch": 0.9733135656041513, "grad_norm": 1.232263615977031, "learning_rate": 1.1269301925717312e-05, "loss": 0.3326599895954132, "step": 3939 }, { "epoch": 0.9735606622189276, "grad_norm": 1.2998359406829716, "learning_rate": 1.1265249040117899e-05, "loss": 0.3548278212547302, "step": 3940 }, { "epoch": 0.973807758833704, "grad_norm": 1.2138597844007308, "learning_rate": 1.1261195943297996e-05, "loss": 0.3762994110584259, "step": 3941 }, { "epoch": 0.9740548554484804, "grad_norm": 1.3980981414031024, "learning_rate": 1.1257142635934235e-05, "loss": 0.36962878704071045, "step": 3942 }, { "epoch": 0.9743019520632568, "grad_norm": 1.2403526132693299, "learning_rate": 1.125308911870327e-05, "loss": 0.3740100860595703, "step": 3943 }, { "epoch": 0.9745490486780332, "grad_norm": 1.0373727248167361, "learning_rate": 1.1249035392281798e-05, "loss": 0.2765166461467743, "step": 3944 }, { "epoch": 0.9747961452928094, "grad_norm": 1.2259621312373212, "learning_rate": 1.1244981457346544e-05, "loss": 0.30374252796173096, "step": 3945 }, { "epoch": 0.9750432419075858, "grad_norm": 1.1875014265817847, "learning_rate": 1.1240927314574274e-05, "loss": 0.3045923113822937, "step": 3946 }, { "epoch": 0.9752903385223622, "grad_norm": 1.1618924739694643, "learning_rate": 1.1236872964641787e-05, "loss": 0.33591675758361816, "step": 3947 }, { "epoch": 0.9755374351371386, "grad_norm": 1.154770042832367, "learning_rate": 1.1232818408225909e-05, "loss": 0.26714903116226196, "step": 3948 }, { "epoch": 0.975784531751915, "grad_norm": 1.1355075056195976, "learning_rate": 1.1228763646003515e-05, "loss": 0.2987487316131592, "step": 3949 }, { "epoch": 0.9760316283666913, "grad_norm": 1.1423659299438114, "learning_rate": 1.1224708678651503e-05, "loss": 0.3265039622783661, "step": 3950 }, { "epoch": 0.9762787249814677, "grad_norm": 1.1654051858671672, "learning_rate": 1.1220653506846806e-05, "loss": 0.3199790120124817, "step": 3951 }, { "epoch": 0.9765258215962441, "grad_norm": 1.3329762518421562, "learning_rate": 1.12165981312664e-05, "loss": 0.3717731535434723, "step": 3952 }, { "epoch": 0.9767729182110205, "grad_norm": 1.1172172269129625, "learning_rate": 1.1212542552587283e-05, "loss": 0.28908807039260864, "step": 3953 }, { "epoch": 0.9770200148257969, "grad_norm": 1.2936486298248213, "learning_rate": 1.1208486771486493e-05, "loss": 0.3505653142929077, "step": 3954 }, { "epoch": 0.9772671114405732, "grad_norm": 1.2658249064771923, "learning_rate": 1.1204430788641106e-05, "loss": 0.302974671125412, "step": 3955 }, { "epoch": 0.9775142080553496, "grad_norm": 1.2158012959792681, "learning_rate": 1.1200374604728221e-05, "loss": 0.29976651072502136, "step": 3956 }, { "epoch": 0.977761304670126, "grad_norm": 1.3057984818238912, "learning_rate": 1.1196318220424984e-05, "loss": 0.3935564160346985, "step": 3957 }, { "epoch": 0.9780084012849024, "grad_norm": 1.1067593843127288, "learning_rate": 1.1192261636408562e-05, "loss": 0.27002447843551636, "step": 3958 }, { "epoch": 0.9782554978996788, "grad_norm": 1.2738573148939665, "learning_rate": 1.1188204853356163e-05, "loss": 0.34624573588371277, "step": 3959 }, { "epoch": 0.9785025945144552, "grad_norm": 1.3646118177141233, "learning_rate": 1.1184147871945022e-05, "loss": 0.40429580211639404, "step": 3960 }, { "epoch": 0.9787496911292315, "grad_norm": 1.145652532184938, "learning_rate": 1.1180090692852416e-05, "loss": 0.2597329318523407, "step": 3961 }, { "epoch": 0.9789967877440079, "grad_norm": 1.2870524282518254, "learning_rate": 1.1176033316755646e-05, "loss": 0.3348539471626282, "step": 3962 }, { "epoch": 0.9792438843587843, "grad_norm": 1.1235485704073114, "learning_rate": 1.1171975744332051e-05, "loss": 0.31032806634902954, "step": 3963 }, { "epoch": 0.9794909809735607, "grad_norm": 1.2601238126739986, "learning_rate": 1.1167917976259003e-05, "loss": 0.32162606716156006, "step": 3964 }, { "epoch": 0.9797380775883371, "grad_norm": 1.2913542688303232, "learning_rate": 1.1163860013213901e-05, "loss": 0.41513532400131226, "step": 3965 }, { "epoch": 0.9799851742031134, "grad_norm": 1.220794870614503, "learning_rate": 1.1159801855874188e-05, "loss": 0.3267216086387634, "step": 3966 }, { "epoch": 0.9802322708178898, "grad_norm": 1.0544924540301026, "learning_rate": 1.1155743504917324e-05, "loss": 0.25455397367477417, "step": 3967 }, { "epoch": 0.9804793674326662, "grad_norm": 1.0535345173781463, "learning_rate": 1.1151684961020817e-05, "loss": 0.26402607560157776, "step": 3968 }, { "epoch": 0.9807264640474426, "grad_norm": 1.081830462067809, "learning_rate": 1.1147626224862192e-05, "loss": 0.2831836938858032, "step": 3969 }, { "epoch": 0.980973560662219, "grad_norm": 1.4611058193611384, "learning_rate": 1.114356729711902e-05, "loss": 0.3271453380584717, "step": 3970 }, { "epoch": 0.9812206572769953, "grad_norm": 1.2554237902187182, "learning_rate": 1.1139508178468894e-05, "loss": 0.3916594088077545, "step": 3971 }, { "epoch": 0.9814677538917717, "grad_norm": 1.2362723323869362, "learning_rate": 1.1135448869589445e-05, "loss": 0.347976416349411, "step": 3972 }, { "epoch": 0.981714850506548, "grad_norm": 1.5317857223532412, "learning_rate": 1.1131389371158335e-05, "loss": 0.31693148612976074, "step": 3973 }, { "epoch": 0.9819619471213245, "grad_norm": 1.2578217929835356, "learning_rate": 1.112732968385325e-05, "loss": 0.3424047529697418, "step": 3974 }, { "epoch": 0.9822090437361009, "grad_norm": 1.2345509481855024, "learning_rate": 1.112326980835192e-05, "loss": 0.3279359042644501, "step": 3975 }, { "epoch": 0.9824561403508771, "grad_norm": 1.2547752311712626, "learning_rate": 1.1119209745332093e-05, "loss": 0.36020559072494507, "step": 3976 }, { "epoch": 0.9827032369656535, "grad_norm": 1.4658201301542948, "learning_rate": 1.1115149495471564e-05, "loss": 0.3504541516304016, "step": 3977 }, { "epoch": 0.9829503335804299, "grad_norm": 1.3253418392402763, "learning_rate": 1.1111089059448146e-05, "loss": 0.3473663926124573, "step": 3978 }, { "epoch": 0.9831974301952063, "grad_norm": 1.2631069762785008, "learning_rate": 1.1107028437939683e-05, "loss": 0.3101407289505005, "step": 3979 }, { "epoch": 0.9834445268099827, "grad_norm": 1.1706364133664202, "learning_rate": 1.1102967631624066e-05, "loss": 0.2908611297607422, "step": 3980 }, { "epoch": 0.9836916234247591, "grad_norm": 1.335031069242088, "learning_rate": 1.1098906641179195e-05, "loss": 0.33790794014930725, "step": 3981 }, { "epoch": 0.9839387200395354, "grad_norm": 1.0985305319720744, "learning_rate": 1.1094845467283016e-05, "loss": 0.2865736484527588, "step": 3982 }, { "epoch": 0.9841858166543118, "grad_norm": 1.4721618373524321, "learning_rate": 1.1090784110613497e-05, "loss": 0.2979542016983032, "step": 3983 }, { "epoch": 0.9844329132690882, "grad_norm": 1.314575075526728, "learning_rate": 1.1086722571848646e-05, "loss": 0.3691321611404419, "step": 3984 }, { "epoch": 0.9846800098838646, "grad_norm": 1.2076378694730885, "learning_rate": 1.108266085166649e-05, "loss": 0.37300169467926025, "step": 3985 }, { "epoch": 0.984927106498641, "grad_norm": 1.1986994527322048, "learning_rate": 1.1078598950745097e-05, "loss": 0.3327566385269165, "step": 3986 }, { "epoch": 0.9851742031134173, "grad_norm": 1.2190095185502252, "learning_rate": 1.1074536869762555e-05, "loss": 0.30951038002967834, "step": 3987 }, { "epoch": 0.9854212997281937, "grad_norm": 1.2872758595207363, "learning_rate": 1.107047460939699e-05, "loss": 0.37123095989227295, "step": 3988 }, { "epoch": 0.9856683963429701, "grad_norm": 1.421623197898731, "learning_rate": 1.1066412170326558e-05, "loss": 0.3765207529067993, "step": 3989 }, { "epoch": 0.9859154929577465, "grad_norm": 1.2319709275290285, "learning_rate": 1.1062349553229434e-05, "loss": 0.3898829221725464, "step": 3990 }, { "epoch": 0.9861625895725229, "grad_norm": 1.2263201961919752, "learning_rate": 1.1058286758783838e-05, "loss": 0.3383947014808655, "step": 3991 }, { "epoch": 0.9864096861872992, "grad_norm": 1.4557140902351535, "learning_rate": 1.1054223787668007e-05, "loss": 0.37864699959754944, "step": 3992 }, { "epoch": 0.9866567828020756, "grad_norm": 1.091986449203374, "learning_rate": 1.1050160640560222e-05, "loss": 0.28514522314071655, "step": 3993 }, { "epoch": 0.986903879416852, "grad_norm": 1.221827834192313, "learning_rate": 1.1046097318138773e-05, "loss": 0.35539209842681885, "step": 3994 }, { "epoch": 0.9871509760316284, "grad_norm": 1.623448048939218, "learning_rate": 1.1042033821081995e-05, "loss": 0.27589160203933716, "step": 3995 }, { "epoch": 0.9873980726464048, "grad_norm": 1.2057069563835938, "learning_rate": 1.103797015006825e-05, "loss": 0.2957473397254944, "step": 3996 }, { "epoch": 0.9876451692611812, "grad_norm": 1.1579250392063445, "learning_rate": 1.1033906305775924e-05, "loss": 0.2643703818321228, "step": 3997 }, { "epoch": 0.9878922658759575, "grad_norm": 1.1690596421420159, "learning_rate": 1.1029842288883435e-05, "loss": 0.24999931454658508, "step": 3998 }, { "epoch": 0.9881393624907339, "grad_norm": 1.3034694492480636, "learning_rate": 1.1025778100069226e-05, "loss": 0.3552974462509155, "step": 3999 }, { "epoch": 0.9883864591055103, "grad_norm": 1.1628107604934461, "learning_rate": 1.102171374001178e-05, "loss": 0.3394199311733246, "step": 4000 }, { "epoch": 0.9886335557202867, "grad_norm": 1.2398607905315233, "learning_rate": 1.1017649209389594e-05, "loss": 0.33880096673965454, "step": 4001 }, { "epoch": 0.9888806523350631, "grad_norm": 1.2779858763205127, "learning_rate": 1.1013584508881198e-05, "loss": 0.3876814544200897, "step": 4002 }, { "epoch": 0.9891277489498393, "grad_norm": 1.1709825840707049, "learning_rate": 1.1009519639165162e-05, "loss": 0.3129230737686157, "step": 4003 }, { "epoch": 0.9893748455646157, "grad_norm": 1.3264152882115294, "learning_rate": 1.100545460092007e-05, "loss": 0.3527703881263733, "step": 4004 }, { "epoch": 0.9896219421793921, "grad_norm": 1.1069219285538157, "learning_rate": 1.1001389394824538e-05, "loss": 0.35136109590530396, "step": 4005 }, { "epoch": 0.9898690387941685, "grad_norm": 1.1663816946561774, "learning_rate": 1.0997324021557208e-05, "loss": 0.2841987907886505, "step": 4006 }, { "epoch": 0.9901161354089449, "grad_norm": 1.248446202368719, "learning_rate": 1.0993258481796757e-05, "loss": 0.36249929666519165, "step": 4007 }, { "epoch": 0.9903632320237212, "grad_norm": 1.2822074220793913, "learning_rate": 1.0989192776221887e-05, "loss": 0.35946041345596313, "step": 4008 }, { "epoch": 0.9906103286384976, "grad_norm": 1.1932880745843273, "learning_rate": 1.0985126905511318e-05, "loss": 0.32646918296813965, "step": 4009 }, { "epoch": 0.990857425253274, "grad_norm": 1.3229084538006937, "learning_rate": 1.0981060870343819e-05, "loss": 0.3362703323364258, "step": 4010 }, { "epoch": 0.9911045218680504, "grad_norm": 1.1026713804351596, "learning_rate": 1.0976994671398162e-05, "loss": 0.30710941553115845, "step": 4011 }, { "epoch": 0.9913516184828268, "grad_norm": 1.3925250786484837, "learning_rate": 1.0972928309353164e-05, "loss": 0.35197365283966064, "step": 4012 }, { "epoch": 0.9915987150976031, "grad_norm": 1.2929480855766888, "learning_rate": 1.0968861784887657e-05, "loss": 0.33448025584220886, "step": 4013 }, { "epoch": 0.9918458117123795, "grad_norm": 1.397163486112265, "learning_rate": 1.0964795098680512e-05, "loss": 0.3821316361427307, "step": 4014 }, { "epoch": 0.9920929083271559, "grad_norm": 1.204085987248414, "learning_rate": 1.0960728251410617e-05, "loss": 0.31331905722618103, "step": 4015 }, { "epoch": 0.9923400049419323, "grad_norm": 1.175269383425586, "learning_rate": 1.0956661243756891e-05, "loss": 0.3224795460700989, "step": 4016 }, { "epoch": 0.9925871015567087, "grad_norm": 1.1253062545663917, "learning_rate": 1.0952594076398287e-05, "loss": 0.3187828063964844, "step": 4017 }, { "epoch": 0.9928341981714851, "grad_norm": 1.240870405926541, "learning_rate": 1.0948526750013766e-05, "loss": 0.3199620842933655, "step": 4018 }, { "epoch": 0.9930812947862614, "grad_norm": 1.266763039361652, "learning_rate": 1.0944459265282335e-05, "loss": 0.324149489402771, "step": 4019 }, { "epoch": 0.9933283914010378, "grad_norm": 1.2874063538297944, "learning_rate": 1.0940391622883013e-05, "loss": 0.2871295213699341, "step": 4020 }, { "epoch": 0.9935754880158142, "grad_norm": 1.10754779028805, "learning_rate": 1.0936323823494859e-05, "loss": 0.25213533639907837, "step": 4021 }, { "epoch": 0.9938225846305906, "grad_norm": 1.206008628268267, "learning_rate": 1.0932255867796944e-05, "loss": 0.27351123094558716, "step": 4022 }, { "epoch": 0.994069681245367, "grad_norm": 1.311500739367776, "learning_rate": 1.0928187756468374e-05, "loss": 0.33972203731536865, "step": 4023 }, { "epoch": 0.9943167778601433, "grad_norm": 1.5282006927050762, "learning_rate": 1.0924119490188283e-05, "loss": 0.3148316442966461, "step": 4024 }, { "epoch": 0.9945638744749197, "grad_norm": 1.5220502055061074, "learning_rate": 1.0920051069635822e-05, "loss": 0.359328031539917, "step": 4025 }, { "epoch": 0.9948109710896961, "grad_norm": 1.1719729070343317, "learning_rate": 1.0915982495490174e-05, "loss": 0.3267749547958374, "step": 4026 }, { "epoch": 0.9950580677044725, "grad_norm": 1.1486304791420636, "learning_rate": 1.0911913768430545e-05, "loss": 0.32137441635131836, "step": 4027 }, { "epoch": 0.9953051643192489, "grad_norm": 1.2617002011751703, "learning_rate": 1.0907844889136173e-05, "loss": 0.35062074661254883, "step": 4028 }, { "epoch": 0.9955522609340252, "grad_norm": 1.224547858499858, "learning_rate": 1.0903775858286306e-05, "loss": 0.3119811415672302, "step": 4029 }, { "epoch": 0.9957993575488016, "grad_norm": 1.3318312463215096, "learning_rate": 1.0899706676560236e-05, "loss": 0.3440973162651062, "step": 4030 }, { "epoch": 0.996046454163578, "grad_norm": 1.2888065114992795, "learning_rate": 1.0895637344637266e-05, "loss": 0.36158573627471924, "step": 4031 }, { "epoch": 0.9962935507783544, "grad_norm": 1.3243575435036141, "learning_rate": 1.0891567863196737e-05, "loss": 0.31485500931739807, "step": 4032 }, { "epoch": 0.9965406473931308, "grad_norm": 1.2727356121987858, "learning_rate": 1.0887498232918e-05, "loss": 0.3243595063686371, "step": 4033 }, { "epoch": 0.996787744007907, "grad_norm": 1.205327989897942, "learning_rate": 1.0883428454480442e-05, "loss": 0.2940463423728943, "step": 4034 }, { "epoch": 0.9970348406226834, "grad_norm": 1.2774545805140982, "learning_rate": 1.0879358528563475e-05, "loss": 0.38998135924339294, "step": 4035 }, { "epoch": 0.9972819372374598, "grad_norm": 1.3223068560535713, "learning_rate": 1.0875288455846521e-05, "loss": 0.3719131052494049, "step": 4036 }, { "epoch": 0.9975290338522362, "grad_norm": 1.093208845050974, "learning_rate": 1.0871218237009049e-05, "loss": 0.2894655764102936, "step": 4037 }, { "epoch": 0.9977761304670126, "grad_norm": 1.0831062928696378, "learning_rate": 1.0867147872730533e-05, "loss": 0.3205283582210541, "step": 4038 }, { "epoch": 0.998023227081789, "grad_norm": 1.2770213788579223, "learning_rate": 1.0863077363690482e-05, "loss": 0.329891562461853, "step": 4039 }, { "epoch": 0.9982703236965653, "grad_norm": 1.2584643878753956, "learning_rate": 1.0859006710568427e-05, "loss": 0.32659947872161865, "step": 4040 }, { "epoch": 0.9985174203113417, "grad_norm": 1.1153499297967917, "learning_rate": 1.085493591404392e-05, "loss": 0.2793160080909729, "step": 4041 }, { "epoch": 0.9987645169261181, "grad_norm": 1.322465910512341, "learning_rate": 1.0850864974796542e-05, "loss": 0.35708633065223694, "step": 4042 }, { "epoch": 0.9990116135408945, "grad_norm": 1.2707988157098042, "learning_rate": 1.0846793893505891e-05, "loss": 0.35838577151298523, "step": 4043 }, { "epoch": 0.9992587101556709, "grad_norm": 1.175309846224862, "learning_rate": 1.0842722670851597e-05, "loss": 0.3284147083759308, "step": 4044 }, { "epoch": 0.9995058067704472, "grad_norm": 1.3151260209659834, "learning_rate": 1.0838651307513303e-05, "loss": 0.37710028886795044, "step": 4045 }, { "epoch": 0.9997529033852236, "grad_norm": 1.239227749018031, "learning_rate": 1.083457980417069e-05, "loss": 0.36644643545150757, "step": 4046 }, { "epoch": 1.0, "grad_norm": 1.3719630392396145, "learning_rate": 1.083050816150345e-05, "loss": 0.3538367748260498, "step": 4047 }, { "epoch": 1.0002470966147763, "grad_norm": 1.1347810672180374, "learning_rate": 1.0826436380191301e-05, "loss": 0.29043352603912354, "step": 4048 }, { "epoch": 1.0004941932295528, "grad_norm": 1.0271342999950555, "learning_rate": 1.082236446091399e-05, "loss": 0.2611636519432068, "step": 4049 }, { "epoch": 1.000741289844329, "grad_norm": 1.0684232719541342, "learning_rate": 1.0818292404351277e-05, "loss": 0.271432489156723, "step": 4050 }, { "epoch": 1.0009883864591056, "grad_norm": 1.173332601396744, "learning_rate": 1.0814220211182953e-05, "loss": 0.30375462770462036, "step": 4051 }, { "epoch": 1.0012354830738819, "grad_norm": 1.1663677544717093, "learning_rate": 1.0810147882088829e-05, "loss": 0.30110645294189453, "step": 4052 }, { "epoch": 1.0014825796886582, "grad_norm": 1.2406721569363115, "learning_rate": 1.080607541774874e-05, "loss": 0.2987271845340729, "step": 4053 }, { "epoch": 1.0017296763034347, "grad_norm": 1.204038777866507, "learning_rate": 1.0802002818842544e-05, "loss": 0.3086698055267334, "step": 4054 }, { "epoch": 1.001976772918211, "grad_norm": 1.1491155985614194, "learning_rate": 1.0797930086050115e-05, "loss": 0.2391413301229477, "step": 4055 }, { "epoch": 1.0022238695329875, "grad_norm": 1.26741990636463, "learning_rate": 1.0793857220051363e-05, "loss": 0.36877375841140747, "step": 4056 }, { "epoch": 1.0024709661477638, "grad_norm": 1.176934363658048, "learning_rate": 1.0789784221526201e-05, "loss": 0.2910531461238861, "step": 4057 }, { "epoch": 1.00271806276254, "grad_norm": 1.1368494482731297, "learning_rate": 1.078571109115458e-05, "loss": 0.26296329498291016, "step": 4058 }, { "epoch": 1.0029651593773166, "grad_norm": 1.0997581022312484, "learning_rate": 1.078163782961647e-05, "loss": 0.28440940380096436, "step": 4059 }, { "epoch": 1.0032122559920928, "grad_norm": 1.23958532292874, "learning_rate": 1.0777564437591857e-05, "loss": 0.265206515789032, "step": 4060 }, { "epoch": 1.0034593526068694, "grad_norm": 1.2734572343329187, "learning_rate": 1.0773490915760755e-05, "loss": 0.2968865931034088, "step": 4061 }, { "epoch": 1.0037064492216456, "grad_norm": 1.3153184093563572, "learning_rate": 1.0769417264803194e-05, "loss": 0.29968464374542236, "step": 4062 }, { "epoch": 1.003953545836422, "grad_norm": 1.136503414271327, "learning_rate": 1.0765343485399235e-05, "loss": 0.2925775647163391, "step": 4063 }, { "epoch": 1.0042006424511984, "grad_norm": 1.2068330676972443, "learning_rate": 1.0761269578228946e-05, "loss": 0.27242910861968994, "step": 4064 }, { "epoch": 1.0044477390659747, "grad_norm": 1.268253310321308, "learning_rate": 1.075719554397243e-05, "loss": 0.3207578659057617, "step": 4065 }, { "epoch": 1.0046948356807512, "grad_norm": 1.1845160374199022, "learning_rate": 1.0753121383309801e-05, "loss": 0.2801809310913086, "step": 4066 }, { "epoch": 1.0049419322955275, "grad_norm": 1.2820768939322658, "learning_rate": 1.0749047096921203e-05, "loss": 0.29608216881752014, "step": 4067 }, { "epoch": 1.005189028910304, "grad_norm": 1.324599530739921, "learning_rate": 1.0744972685486796e-05, "loss": 0.28215715289115906, "step": 4068 }, { "epoch": 1.0054361255250803, "grad_norm": 1.2788825568541013, "learning_rate": 1.074089814968676e-05, "loss": 0.26702409982681274, "step": 4069 }, { "epoch": 1.0056832221398566, "grad_norm": 1.310969972690704, "learning_rate": 1.0736823490201302e-05, "loss": 0.30063194036483765, "step": 4070 }, { "epoch": 1.0059303187546331, "grad_norm": 1.417424880299807, "learning_rate": 1.0732748707710638e-05, "loss": 0.32967424392700195, "step": 4071 }, { "epoch": 1.0061774153694094, "grad_norm": 1.3606303853645785, "learning_rate": 1.0728673802895015e-05, "loss": 0.3263871371746063, "step": 4072 }, { "epoch": 1.006424511984186, "grad_norm": 1.518663186598886, "learning_rate": 1.0724598776434701e-05, "loss": 0.3508729934692383, "step": 4073 }, { "epoch": 1.0066716085989622, "grad_norm": 1.2411959589432369, "learning_rate": 1.0720523629009973e-05, "loss": 0.2914789617061615, "step": 4074 }, { "epoch": 1.0069187052137385, "grad_norm": 1.2555812564155993, "learning_rate": 1.0716448361301137e-05, "loss": 0.2759629786014557, "step": 4075 }, { "epoch": 1.007165801828515, "grad_norm": 1.5430644135813005, "learning_rate": 1.0712372973988523e-05, "loss": 0.3436177372932434, "step": 4076 }, { "epoch": 1.0074128984432913, "grad_norm": 1.18324404708875, "learning_rate": 1.0708297467752472e-05, "loss": 0.29922473430633545, "step": 4077 }, { "epoch": 1.0076599950580678, "grad_norm": 1.2089428326475395, "learning_rate": 1.0704221843273343e-05, "loss": 0.2855772376060486, "step": 4078 }, { "epoch": 1.007907091672844, "grad_norm": 1.3813481857575731, "learning_rate": 1.0700146101231532e-05, "loss": 0.33085668087005615, "step": 4079 }, { "epoch": 1.0081541882876204, "grad_norm": 1.2212057848933273, "learning_rate": 1.0696070242307433e-05, "loss": 0.30894672870635986, "step": 4080 }, { "epoch": 1.0084012849023969, "grad_norm": 1.3379530236718962, "learning_rate": 1.0691994267181473e-05, "loss": 0.33939871191978455, "step": 4081 }, { "epoch": 1.0086483815171732, "grad_norm": 1.2753120793949162, "learning_rate": 1.0687918176534093e-05, "loss": 0.3173312544822693, "step": 4082 }, { "epoch": 1.0088954781319497, "grad_norm": 1.2235755744284136, "learning_rate": 1.0683841971045758e-05, "loss": 0.2684073746204376, "step": 4083 }, { "epoch": 1.009142574746726, "grad_norm": 1.2826085396617957, "learning_rate": 1.0679765651396949e-05, "loss": 0.3546433448791504, "step": 4084 }, { "epoch": 1.0093896713615023, "grad_norm": 1.273071492593048, "learning_rate": 1.0675689218268161e-05, "loss": 0.29095786809921265, "step": 4085 }, { "epoch": 1.0096367679762788, "grad_norm": 1.9073946170341423, "learning_rate": 1.067161267233992e-05, "loss": 0.2233777642250061, "step": 4086 }, { "epoch": 1.009883864591055, "grad_norm": 1.1449637564628197, "learning_rate": 1.0667536014292757e-05, "loss": 0.25797542929649353, "step": 4087 }, { "epoch": 1.0101309612058316, "grad_norm": 1.2797338931716729, "learning_rate": 1.0663459244807235e-05, "loss": 0.28379929065704346, "step": 4088 }, { "epoch": 1.0103780578206079, "grad_norm": 1.1621723313827974, "learning_rate": 1.0659382364563923e-05, "loss": 0.258899062871933, "step": 4089 }, { "epoch": 1.0106251544353841, "grad_norm": 1.1425402406877037, "learning_rate": 1.0655305374243423e-05, "loss": 0.272784948348999, "step": 4090 }, { "epoch": 1.0108722510501607, "grad_norm": 1.2284108735006773, "learning_rate": 1.065122827452634e-05, "loss": 0.33683836460113525, "step": 4091 }, { "epoch": 1.011119347664937, "grad_norm": 1.2086753389253935, "learning_rate": 1.0647151066093305e-05, "loss": 0.24539761245250702, "step": 4092 }, { "epoch": 1.0113664442797135, "grad_norm": 1.2711747417655326, "learning_rate": 1.064307374962497e-05, "loss": 0.2871803641319275, "step": 4093 }, { "epoch": 1.0116135408944897, "grad_norm": 1.2266005778249962, "learning_rate": 1.0638996325802002e-05, "loss": 0.29178285598754883, "step": 4094 }, { "epoch": 1.011860637509266, "grad_norm": 1.23649298991426, "learning_rate": 1.0634918795305082e-05, "loss": 0.2869154214859009, "step": 4095 }, { "epoch": 1.0121077341240425, "grad_norm": 1.1532579048472555, "learning_rate": 1.0630841158814912e-05, "loss": 0.2958219051361084, "step": 4096 }, { "epoch": 1.0123548307388188, "grad_norm": 1.4397848151394645, "learning_rate": 1.0626763417012218e-05, "loss": 0.257508248090744, "step": 4097 }, { "epoch": 1.0126019273535953, "grad_norm": 1.1930648280104346, "learning_rate": 1.0622685570577734e-05, "loss": 0.26257243752479553, "step": 4098 }, { "epoch": 1.0128490239683716, "grad_norm": 1.1844119243709157, "learning_rate": 1.0618607620192209e-05, "loss": 0.27242395281791687, "step": 4099 }, { "epoch": 1.013096120583148, "grad_norm": 1.2828649897042306, "learning_rate": 1.0614529566536424e-05, "loss": 0.3026998043060303, "step": 4100 }, { "epoch": 1.0133432171979244, "grad_norm": 1.1805354324622195, "learning_rate": 1.0610451410291168e-05, "loss": 0.32565486431121826, "step": 4101 }, { "epoch": 1.0135903138127007, "grad_norm": 1.2921254810213951, "learning_rate": 1.0606373152137241e-05, "loss": 0.3039121627807617, "step": 4102 }, { "epoch": 1.0138374104274772, "grad_norm": 1.2072447942169937, "learning_rate": 1.0602294792755474e-05, "loss": 0.3086193799972534, "step": 4103 }, { "epoch": 1.0140845070422535, "grad_norm": 1.1295575931038875, "learning_rate": 1.0598216332826706e-05, "loss": 0.2156725972890854, "step": 4104 }, { "epoch": 1.0143316036570298, "grad_norm": 1.2852780352194098, "learning_rate": 1.0594137773031792e-05, "loss": 0.29398417472839355, "step": 4105 }, { "epoch": 1.0145787002718063, "grad_norm": 1.2568731786686642, "learning_rate": 1.0590059114051606e-05, "loss": 0.3756153881549835, "step": 4106 }, { "epoch": 1.0148257968865826, "grad_norm": 1.1986037991926013, "learning_rate": 1.058598035656704e-05, "loss": 0.2550080418586731, "step": 4107 }, { "epoch": 1.015072893501359, "grad_norm": 1.30581343716077, "learning_rate": 1.0581901501259005e-05, "loss": 0.29595232009887695, "step": 4108 }, { "epoch": 1.0153199901161354, "grad_norm": 1.4142203515304415, "learning_rate": 1.057782254880842e-05, "loss": 0.2985351085662842, "step": 4109 }, { "epoch": 1.015567086730912, "grad_norm": 1.2287307692862321, "learning_rate": 1.0573743499896222e-05, "loss": 0.253154456615448, "step": 4110 }, { "epoch": 1.0158141833456882, "grad_norm": 2.7811212550646274, "learning_rate": 1.0569664355203374e-05, "loss": 0.2771666944026947, "step": 4111 }, { "epoch": 1.0160612799604645, "grad_norm": 1.2480555027525428, "learning_rate": 1.0565585115410846e-05, "loss": 0.3149116039276123, "step": 4112 }, { "epoch": 1.016308376575241, "grad_norm": 1.2174817054927294, "learning_rate": 1.056150578119962e-05, "loss": 0.3081607520580292, "step": 4113 }, { "epoch": 1.0165554731900173, "grad_norm": 1.3776274967598063, "learning_rate": 1.0557426353250706e-05, "loss": 0.30965283513069153, "step": 4114 }, { "epoch": 1.0168025698047938, "grad_norm": 1.3575122489605074, "learning_rate": 1.055334683224512e-05, "loss": 0.29432880878448486, "step": 4115 }, { "epoch": 1.01704966641957, "grad_norm": 1.277104453539826, "learning_rate": 1.0549267218863897e-05, "loss": 0.30340576171875, "step": 4116 }, { "epoch": 1.0172967630343464, "grad_norm": 1.336672479302372, "learning_rate": 1.0545187513788085e-05, "loss": 0.2685879170894623, "step": 4117 }, { "epoch": 1.0175438596491229, "grad_norm": 1.3797415119117478, "learning_rate": 1.0541107717698757e-05, "loss": 0.33723777532577515, "step": 4118 }, { "epoch": 1.0177909562638991, "grad_norm": 1.5713948990291677, "learning_rate": 1.0537027831276982e-05, "loss": 0.350455105304718, "step": 4119 }, { "epoch": 1.0180380528786757, "grad_norm": 1.2790058265327389, "learning_rate": 1.0532947855203865e-05, "loss": 0.3005748391151428, "step": 4120 }, { "epoch": 1.018285149493452, "grad_norm": 1.1707461819782472, "learning_rate": 1.052886779016051e-05, "loss": 0.25461453199386597, "step": 4121 }, { "epoch": 1.0185322461082282, "grad_norm": 1.3873254942042208, "learning_rate": 1.0524787636828051e-05, "loss": 0.32968321442604065, "step": 4122 }, { "epoch": 1.0187793427230047, "grad_norm": 1.2283604900174085, "learning_rate": 1.0520707395887619e-05, "loss": 0.2688283324241638, "step": 4123 }, { "epoch": 1.019026439337781, "grad_norm": 1.2339105712573677, "learning_rate": 1.0516627068020373e-05, "loss": 0.3125203847885132, "step": 4124 }, { "epoch": 1.0192735359525575, "grad_norm": 1.1881486934780565, "learning_rate": 1.0512546653907484e-05, "loss": 0.28190019726753235, "step": 4125 }, { "epoch": 1.0195206325673338, "grad_norm": 1.2763003008509899, "learning_rate": 1.0508466154230132e-05, "loss": 0.27111250162124634, "step": 4126 }, { "epoch": 1.0197677291821101, "grad_norm": 1.2871758123908696, "learning_rate": 1.050438556966952e-05, "loss": 0.3188580870628357, "step": 4127 }, { "epoch": 1.0200148257968866, "grad_norm": 1.4454459731099345, "learning_rate": 1.0500304900906854e-05, "loss": 0.3301946520805359, "step": 4128 }, { "epoch": 1.020261922411663, "grad_norm": 1.1172593150884853, "learning_rate": 1.0496224148623362e-05, "loss": 0.26768580079078674, "step": 4129 }, { "epoch": 1.0205090190264394, "grad_norm": 1.1998403759165042, "learning_rate": 1.0492143313500287e-05, "loss": 0.28552815318107605, "step": 4130 }, { "epoch": 1.0207561156412157, "grad_norm": 1.3332158909923977, "learning_rate": 1.0488062396218878e-05, "loss": 0.30123966932296753, "step": 4131 }, { "epoch": 1.021003212255992, "grad_norm": 1.5198269684998684, "learning_rate": 1.0483981397460415e-05, "loss": 0.3254877030849457, "step": 4132 }, { "epoch": 1.0212503088707685, "grad_norm": 1.0701503375129566, "learning_rate": 1.0479900317906162e-05, "loss": 0.27772021293640137, "step": 4133 }, { "epoch": 1.0214974054855448, "grad_norm": 1.2789511927062052, "learning_rate": 1.0475819158237426e-05, "loss": 0.25833356380462646, "step": 4134 }, { "epoch": 1.0217445021003213, "grad_norm": 1.2129813054387928, "learning_rate": 1.047173791913551e-05, "loss": 0.25414952635765076, "step": 4135 }, { "epoch": 1.0219915987150976, "grad_norm": 1.2104977217276418, "learning_rate": 1.0467656601281734e-05, "loss": 0.26539668440818787, "step": 4136 }, { "epoch": 1.0222386953298739, "grad_norm": 1.1910860967497805, "learning_rate": 1.0463575205357438e-05, "loss": 0.3103201687335968, "step": 4137 }, { "epoch": 1.0224857919446504, "grad_norm": 1.154684983065823, "learning_rate": 1.0459493732043969e-05, "loss": 0.26367104053497314, "step": 4138 }, { "epoch": 1.0227328885594267, "grad_norm": 1.2406240979008436, "learning_rate": 1.0455412182022684e-05, "loss": 0.28925323486328125, "step": 4139 }, { "epoch": 1.0229799851742032, "grad_norm": 1.2888035894063952, "learning_rate": 1.0451330555974957e-05, "loss": 0.32344287633895874, "step": 4140 }, { "epoch": 1.0232270817889795, "grad_norm": 1.1827740535608802, "learning_rate": 1.0447248854582178e-05, "loss": 0.23885661363601685, "step": 4141 }, { "epoch": 1.0234741784037558, "grad_norm": 1.336370430018257, "learning_rate": 1.044316707852574e-05, "loss": 0.2854805886745453, "step": 4142 }, { "epoch": 1.0237212750185323, "grad_norm": 1.2936914644525517, "learning_rate": 1.0439085228487053e-05, "loss": 0.3394486904144287, "step": 4143 }, { "epoch": 1.0239683716333086, "grad_norm": 1.591362059742949, "learning_rate": 1.043500330514755e-05, "loss": 0.25614669919013977, "step": 4144 }, { "epoch": 1.024215468248085, "grad_norm": 1.2483438391940505, "learning_rate": 1.0430921309188657e-05, "loss": 0.3293806314468384, "step": 4145 }, { "epoch": 1.0244625648628614, "grad_norm": 1.302996094075559, "learning_rate": 1.0426839241291828e-05, "loss": 0.31600773334503174, "step": 4146 }, { "epoch": 1.0247096614776376, "grad_norm": 1.3613492951963997, "learning_rate": 1.042275710213852e-05, "loss": 0.2835710048675537, "step": 4147 }, { "epoch": 1.0249567580924142, "grad_norm": 1.2734759568603862, "learning_rate": 1.0418674892410206e-05, "loss": 0.24214130640029907, "step": 4148 }, { "epoch": 1.0252038547071904, "grad_norm": 1.3480850854387838, "learning_rate": 1.0414592612788366e-05, "loss": 0.3293347954750061, "step": 4149 }, { "epoch": 1.025450951321967, "grad_norm": 1.3669135301528004, "learning_rate": 1.0410510263954496e-05, "loss": 0.30973291397094727, "step": 4150 }, { "epoch": 1.0256980479367432, "grad_norm": 1.2657612589862002, "learning_rate": 1.0406427846590108e-05, "loss": 0.32697001099586487, "step": 4151 }, { "epoch": 1.0259451445515197, "grad_norm": 1.228687697047223, "learning_rate": 1.0402345361376714e-05, "loss": 0.293239027261734, "step": 4152 }, { "epoch": 1.026192241166296, "grad_norm": 1.202035496405135, "learning_rate": 1.039826280899585e-05, "loss": 0.2404642254114151, "step": 4153 }, { "epoch": 1.0264393377810723, "grad_norm": 1.2097128590840942, "learning_rate": 1.0394180190129049e-05, "loss": 0.2813189625740051, "step": 4154 }, { "epoch": 1.0266864343958488, "grad_norm": 1.287943504521935, "learning_rate": 1.0390097505457867e-05, "loss": 0.2555246949195862, "step": 4155 }, { "epoch": 1.0269335310106251, "grad_norm": 1.2595546646766427, "learning_rate": 1.0386014755663868e-05, "loss": 0.2895625829696655, "step": 4156 }, { "epoch": 1.0271806276254016, "grad_norm": 1.2901169122705751, "learning_rate": 1.0381931941428621e-05, "loss": 0.3093705177307129, "step": 4157 }, { "epoch": 1.027427724240178, "grad_norm": 1.3433551841292184, "learning_rate": 1.0377849063433715e-05, "loss": 0.2754271626472473, "step": 4158 }, { "epoch": 1.0276748208549542, "grad_norm": 1.3851232908139777, "learning_rate": 1.037376612236074e-05, "loss": 0.3235260248184204, "step": 4159 }, { "epoch": 1.0279219174697307, "grad_norm": 1.1878531707073017, "learning_rate": 1.036968311889131e-05, "loss": 0.2716686725616455, "step": 4160 }, { "epoch": 1.028169014084507, "grad_norm": 1.170362235661429, "learning_rate": 1.0365600053707032e-05, "loss": 0.26035720109939575, "step": 4161 }, { "epoch": 1.0284161106992835, "grad_norm": 1.2814159675542716, "learning_rate": 1.0361516927489538e-05, "loss": 0.288452684879303, "step": 4162 }, { "epoch": 1.0286632073140598, "grad_norm": 1.2601411852287228, "learning_rate": 1.0357433740920462e-05, "loss": 0.26522210240364075, "step": 4163 }, { "epoch": 1.028910303928836, "grad_norm": 1.130354453865836, "learning_rate": 1.035335049468145e-05, "loss": 0.22359678149223328, "step": 4164 }, { "epoch": 1.0291574005436126, "grad_norm": 1.2516504715078702, "learning_rate": 1.0349267189454163e-05, "loss": 0.3106039762496948, "step": 4165 }, { "epoch": 1.0294044971583889, "grad_norm": 1.38678589818182, "learning_rate": 1.034518382592026e-05, "loss": 0.32709789276123047, "step": 4166 }, { "epoch": 1.0296515937731654, "grad_norm": 1.385284276383417, "learning_rate": 1.0341100404761424e-05, "loss": 0.27700209617614746, "step": 4167 }, { "epoch": 1.0298986903879417, "grad_norm": 1.2079020591531426, "learning_rate": 1.0337016926659333e-05, "loss": 0.2872557044029236, "step": 4168 }, { "epoch": 1.030145787002718, "grad_norm": 1.2875188338833854, "learning_rate": 1.0332933392295692e-05, "loss": 0.27102434635162354, "step": 4169 }, { "epoch": 1.0303928836174945, "grad_norm": 1.177570993986488, "learning_rate": 1.03288498023522e-05, "loss": 0.28640303015708923, "step": 4170 }, { "epoch": 1.0306399802322708, "grad_norm": 1.3533603405150463, "learning_rate": 1.0324766157510571e-05, "loss": 0.27824926376342773, "step": 4171 }, { "epoch": 1.0308870768470473, "grad_norm": 1.2894559579413227, "learning_rate": 1.0320682458452527e-05, "loss": 0.2643100321292877, "step": 4172 }, { "epoch": 1.0311341734618236, "grad_norm": 1.3431963655622587, "learning_rate": 1.0316598705859804e-05, "loss": 0.3038950264453888, "step": 4173 }, { "epoch": 1.0313812700765999, "grad_norm": 2.139696795136523, "learning_rate": 1.031251490041414e-05, "loss": 0.3456757664680481, "step": 4174 }, { "epoch": 1.0316283666913764, "grad_norm": 1.3639839325949175, "learning_rate": 1.0308431042797282e-05, "loss": 0.35667604207992554, "step": 4175 }, { "epoch": 1.0318754633061527, "grad_norm": 1.2376440313049695, "learning_rate": 1.0304347133690995e-05, "loss": 0.2825711965560913, "step": 4176 }, { "epoch": 1.0321225599209292, "grad_norm": 1.1825625918706728, "learning_rate": 1.0300263173777045e-05, "loss": 0.3154117465019226, "step": 4177 }, { "epoch": 1.0323696565357054, "grad_norm": 1.2782704624460988, "learning_rate": 1.0296179163737205e-05, "loss": 0.3025550842285156, "step": 4178 }, { "epoch": 1.0326167531504817, "grad_norm": 1.2569561510754115, "learning_rate": 1.029209510425326e-05, "loss": 0.27442601323127747, "step": 4179 }, { "epoch": 1.0328638497652582, "grad_norm": 1.4141765166570484, "learning_rate": 1.0288010996007e-05, "loss": 0.2958926558494568, "step": 4180 }, { "epoch": 1.0331109463800345, "grad_norm": 1.283907235246623, "learning_rate": 1.028392683968023e-05, "loss": 0.3018121123313904, "step": 4181 }, { "epoch": 1.033358042994811, "grad_norm": 1.2950513759416789, "learning_rate": 1.0279842635954754e-05, "loss": 0.3200306296348572, "step": 4182 }, { "epoch": 1.0336051396095873, "grad_norm": 1.3756860092864829, "learning_rate": 1.0275758385512393e-05, "loss": 0.29950037598609924, "step": 4183 }, { "epoch": 1.0338522362243636, "grad_norm": 1.1833957178323808, "learning_rate": 1.0271674089034968e-05, "loss": 0.2572745680809021, "step": 4184 }, { "epoch": 1.0340993328391401, "grad_norm": 1.2084206162403266, "learning_rate": 1.0267589747204312e-05, "loss": 0.2765328884124756, "step": 4185 }, { "epoch": 1.0343464294539164, "grad_norm": 1.3606812596634708, "learning_rate": 1.0263505360702261e-05, "loss": 0.2864071726799011, "step": 4186 }, { "epoch": 1.034593526068693, "grad_norm": 1.245397715070295, "learning_rate": 1.0259420930210668e-05, "loss": 0.2830865979194641, "step": 4187 }, { "epoch": 1.0348406226834692, "grad_norm": 1.2524735919998922, "learning_rate": 1.0255336456411383e-05, "loss": 0.27265042066574097, "step": 4188 }, { "epoch": 1.0350877192982457, "grad_norm": 1.1990126785193758, "learning_rate": 1.0251251939986267e-05, "loss": 0.293043315410614, "step": 4189 }, { "epoch": 1.035334815913022, "grad_norm": 1.3645989291155232, "learning_rate": 1.0247167381617191e-05, "loss": 0.26486217975616455, "step": 4190 }, { "epoch": 1.0355819125277983, "grad_norm": 1.215125418514255, "learning_rate": 1.024308278198603e-05, "loss": 0.27005454897880554, "step": 4191 }, { "epoch": 1.0358290091425748, "grad_norm": 1.2351386165001963, "learning_rate": 1.0238998141774665e-05, "loss": 0.29701048135757446, "step": 4192 }, { "epoch": 1.036076105757351, "grad_norm": 1.2483464551015657, "learning_rate": 1.0234913461664986e-05, "loss": 0.25698062777519226, "step": 4193 }, { "epoch": 1.0363232023721276, "grad_norm": 1.3727032777628114, "learning_rate": 1.0230828742338893e-05, "loss": 0.3167983889579773, "step": 4194 }, { "epoch": 1.036570298986904, "grad_norm": 1.3817473204002841, "learning_rate": 1.022674398447828e-05, "loss": 0.30530691146850586, "step": 4195 }, { "epoch": 1.0368173956016802, "grad_norm": 1.4600113448554524, "learning_rate": 1.022265918876506e-05, "loss": 0.3476266860961914, "step": 4196 }, { "epoch": 1.0370644922164567, "grad_norm": 1.221462603002313, "learning_rate": 1.021857435588115e-05, "loss": 0.31702911853790283, "step": 4197 }, { "epoch": 1.037311588831233, "grad_norm": 1.261066043467126, "learning_rate": 1.0214489486508473e-05, "loss": 0.3356882333755493, "step": 4198 }, { "epoch": 1.0375586854460095, "grad_norm": 1.1980515871503574, "learning_rate": 1.021040458132895e-05, "loss": 0.2777348756790161, "step": 4199 }, { "epoch": 1.0378057820607858, "grad_norm": 1.2118174231299221, "learning_rate": 1.0206319641024515e-05, "loss": 0.2742098271846771, "step": 4200 }, { "epoch": 1.038052878675562, "grad_norm": 1.252581861887969, "learning_rate": 1.0202234666277116e-05, "loss": 0.3006320595741272, "step": 4201 }, { "epoch": 1.0382999752903386, "grad_norm": 1.316423275602201, "learning_rate": 1.019814965776869e-05, "loss": 0.2980492413043976, "step": 4202 }, { "epoch": 1.0385470719051149, "grad_norm": 1.1122230720131885, "learning_rate": 1.019406461618119e-05, "loss": 0.2946438193321228, "step": 4203 }, { "epoch": 1.0387941685198914, "grad_norm": 1.2624051263757148, "learning_rate": 1.0189979542196574e-05, "loss": 0.2751654088497162, "step": 4204 }, { "epoch": 1.0390412651346677, "grad_norm": 1.281316675362828, "learning_rate": 1.0185894436496797e-05, "loss": 0.3339443802833557, "step": 4205 }, { "epoch": 1.039288361749444, "grad_norm": 1.424916297956678, "learning_rate": 1.0181809299763834e-05, "loss": 0.34868159890174866, "step": 4206 }, { "epoch": 1.0395354583642205, "grad_norm": 1.2452976390346087, "learning_rate": 1.0177724132679652e-05, "loss": 0.30712276697158813, "step": 4207 }, { "epoch": 1.0397825549789967, "grad_norm": 1.3880520278896704, "learning_rate": 1.0173638935926235e-05, "loss": 0.3078790605068207, "step": 4208 }, { "epoch": 1.0400296515937733, "grad_norm": 1.2917161476407535, "learning_rate": 1.0169553710185555e-05, "loss": 0.2930396795272827, "step": 4209 }, { "epoch": 1.0402767482085495, "grad_norm": 1.376679886048136, "learning_rate": 1.0165468456139607e-05, "loss": 0.3156263828277588, "step": 4210 }, { "epoch": 1.0405238448233258, "grad_norm": 1.3564374480708465, "learning_rate": 1.0161383174470379e-05, "loss": 0.270297646522522, "step": 4211 }, { "epoch": 1.0407709414381023, "grad_norm": 1.3389762198679929, "learning_rate": 1.0157297865859866e-05, "loss": 0.3044115900993347, "step": 4212 }, { "epoch": 1.0410180380528786, "grad_norm": 1.2374810096340643, "learning_rate": 1.0153212530990073e-05, "loss": 0.2599133551120758, "step": 4213 }, { "epoch": 1.0412651346676551, "grad_norm": 1.3586551283715342, "learning_rate": 1.0149127170543002e-05, "loss": 0.2923559248447418, "step": 4214 }, { "epoch": 1.0415122312824314, "grad_norm": 1.3846633241076922, "learning_rate": 1.0145041785200666e-05, "loss": 0.308468222618103, "step": 4215 }, { "epoch": 1.0417593278972077, "grad_norm": 1.3624162724212652, "learning_rate": 1.0140956375645075e-05, "loss": 0.31835243105888367, "step": 4216 }, { "epoch": 1.0420064245119842, "grad_norm": 1.2010940262821477, "learning_rate": 1.0136870942558247e-05, "loss": 0.24329771101474762, "step": 4217 }, { "epoch": 1.0422535211267605, "grad_norm": 1.2891706026158327, "learning_rate": 1.0132785486622205e-05, "loss": 0.25624874234199524, "step": 4218 }, { "epoch": 1.042500617741537, "grad_norm": 1.2620372739964183, "learning_rate": 1.0128700008518975e-05, "loss": 0.2693812847137451, "step": 4219 }, { "epoch": 1.0427477143563133, "grad_norm": 1.3150525316466837, "learning_rate": 1.0124614508930584e-05, "loss": 0.34898853302001953, "step": 4220 }, { "epoch": 1.0429948109710896, "grad_norm": 1.2033082388148166, "learning_rate": 1.0120528988539062e-05, "loss": 0.2735292315483093, "step": 4221 }, { "epoch": 1.043241907585866, "grad_norm": 1.1886342163866697, "learning_rate": 1.0116443448026456e-05, "loss": 0.30620837211608887, "step": 4222 }, { "epoch": 1.0434890042006424, "grad_norm": 1.583103059967921, "learning_rate": 1.0112357888074793e-05, "loss": 0.3399518132209778, "step": 4223 }, { "epoch": 1.043736100815419, "grad_norm": 1.3505663357001223, "learning_rate": 1.0108272309366122e-05, "loss": 0.2931860685348511, "step": 4224 }, { "epoch": 1.0439831974301952, "grad_norm": 1.1962370045042465, "learning_rate": 1.0104186712582492e-05, "loss": 0.26773521304130554, "step": 4225 }, { "epoch": 1.0442302940449717, "grad_norm": 1.0725070389645694, "learning_rate": 1.0100101098405941e-05, "loss": 0.22111813724040985, "step": 4226 }, { "epoch": 1.044477390659748, "grad_norm": 1.3861050821598482, "learning_rate": 1.0096015467518531e-05, "loss": 0.3209957480430603, "step": 4227 }, { "epoch": 1.0447244872745243, "grad_norm": 1.3091697888281824, "learning_rate": 1.0091929820602312e-05, "loss": 0.31739237904548645, "step": 4228 }, { "epoch": 1.0449715838893008, "grad_norm": 1.2298326754882891, "learning_rate": 1.0087844158339349e-05, "loss": 0.30194610357284546, "step": 4229 }, { "epoch": 1.045218680504077, "grad_norm": 1.405866007031962, "learning_rate": 1.0083758481411687e-05, "loss": 0.28248685598373413, "step": 4230 }, { "epoch": 1.0454657771188536, "grad_norm": 1.3467275290311151, "learning_rate": 1.0079672790501402e-05, "loss": 0.33452650904655457, "step": 4231 }, { "epoch": 1.0457128737336299, "grad_norm": 1.5618440368267874, "learning_rate": 1.0075587086290553e-05, "loss": 0.2673431634902954, "step": 4232 }, { "epoch": 1.0459599703484062, "grad_norm": 1.3780604669062126, "learning_rate": 1.0071501369461204e-05, "loss": 0.32353442907333374, "step": 4233 }, { "epoch": 1.0462070669631827, "grad_norm": 1.235105370356971, "learning_rate": 1.006741564069543e-05, "loss": 0.3201674818992615, "step": 4234 }, { "epoch": 1.046454163577959, "grad_norm": 1.1238986790846137, "learning_rate": 1.0063329900675298e-05, "loss": 0.2814602553844452, "step": 4235 }, { "epoch": 1.0467012601927355, "grad_norm": 1.445089462965961, "learning_rate": 1.0059244150082883e-05, "loss": 0.3063255548477173, "step": 4236 }, { "epoch": 1.0469483568075117, "grad_norm": 1.2577796832549004, "learning_rate": 1.0055158389600255e-05, "loss": 0.33205315470695496, "step": 4237 }, { "epoch": 1.047195453422288, "grad_norm": 1.2225937893690297, "learning_rate": 1.0051072619909497e-05, "loss": 0.28063732385635376, "step": 4238 }, { "epoch": 1.0474425500370645, "grad_norm": 1.2267172523292813, "learning_rate": 1.0046986841692683e-05, "loss": 0.257932186126709, "step": 4239 }, { "epoch": 1.0476896466518408, "grad_norm": 1.1050642050411559, "learning_rate": 1.0042901055631893e-05, "loss": 0.2570189833641052, "step": 4240 }, { "epoch": 1.0479367432666173, "grad_norm": 1.3535556050242237, "learning_rate": 1.0038815262409207e-05, "loss": 0.32025301456451416, "step": 4241 }, { "epoch": 1.0481838398813936, "grad_norm": 1.2917275521726974, "learning_rate": 1.0034729462706708e-05, "loss": 0.30214473605155945, "step": 4242 }, { "epoch": 1.04843093649617, "grad_norm": 1.2965013978359234, "learning_rate": 1.0030643657206477e-05, "loss": 0.2826789617538452, "step": 4243 }, { "epoch": 1.0486780331109464, "grad_norm": 1.1779769652295904, "learning_rate": 1.0026557846590595e-05, "loss": 0.2572106420993805, "step": 4244 }, { "epoch": 1.0489251297257227, "grad_norm": 1.295030537477376, "learning_rate": 1.0022472031541153e-05, "loss": 0.28337109088897705, "step": 4245 }, { "epoch": 1.0491722263404992, "grad_norm": 1.3286271908636118, "learning_rate": 1.0018386212740235e-05, "loss": 0.34359210729599, "step": 4246 }, { "epoch": 1.0494193229552755, "grad_norm": 1.2762992335013974, "learning_rate": 1.0014300390869921e-05, "loss": 0.2917653024196625, "step": 4247 }, { "epoch": 1.0496664195700518, "grad_norm": 1.1870767942610252, "learning_rate": 1.0010214566612307e-05, "loss": 0.26691433787345886, "step": 4248 }, { "epoch": 1.0499135161848283, "grad_norm": 1.2741899107100596, "learning_rate": 1.0006128740649472e-05, "loss": 0.2736703157424927, "step": 4249 }, { "epoch": 1.0501606127996046, "grad_norm": 1.534436547963255, "learning_rate": 1.0002042913663508e-05, "loss": 0.3422115445137024, "step": 4250 }, { "epoch": 1.050407709414381, "grad_norm": 1.3262869363461902, "learning_rate": 9.997957086336497e-06, "loss": 0.2767578661441803, "step": 4251 }, { "epoch": 1.0506548060291574, "grad_norm": 1.2334524450020952, "learning_rate": 9.993871259350531e-06, "loss": 0.3042724132537842, "step": 4252 }, { "epoch": 1.0509019026439337, "grad_norm": 1.1661015064123312, "learning_rate": 9.989785433387695e-06, "loss": 0.3063472807407379, "step": 4253 }, { "epoch": 1.0511489992587102, "grad_norm": 1.2997711651961015, "learning_rate": 9.985699609130082e-06, "loss": 0.3223435878753662, "step": 4254 }, { "epoch": 1.0513960958734865, "grad_norm": 1.2779094054807165, "learning_rate": 9.98161378725977e-06, "loss": 0.28486528992652893, "step": 4255 }, { "epoch": 1.051643192488263, "grad_norm": 1.085050248991967, "learning_rate": 9.97752796845885e-06, "loss": 0.24512335658073425, "step": 4256 }, { "epoch": 1.0518902891030393, "grad_norm": 2.1680766160987943, "learning_rate": 9.973442153409408e-06, "loss": 0.34929805994033813, "step": 4257 }, { "epoch": 1.0521373857178156, "grad_norm": 1.2761925132282625, "learning_rate": 9.969356342793527e-06, "loss": 0.2992975115776062, "step": 4258 }, { "epoch": 1.052384482332592, "grad_norm": 1.1864209537883796, "learning_rate": 9.965270537293297e-06, "loss": 0.2898727059364319, "step": 4259 }, { "epoch": 1.0526315789473684, "grad_norm": 1.34735892200114, "learning_rate": 9.961184737590795e-06, "loss": 0.3085440397262573, "step": 4260 }, { "epoch": 1.0528786755621449, "grad_norm": 1.3300831023545516, "learning_rate": 9.957098944368112e-06, "loss": 0.304319828748703, "step": 4261 }, { "epoch": 1.0531257721769212, "grad_norm": 1.2282412782121466, "learning_rate": 9.95301315830732e-06, "loss": 0.29477131366729736, "step": 4262 }, { "epoch": 1.0533728687916974, "grad_norm": 1.252549194070891, "learning_rate": 9.948927380090505e-06, "loss": 0.3194958567619324, "step": 4263 }, { "epoch": 1.053619965406474, "grad_norm": 1.447444994821961, "learning_rate": 9.944841610399747e-06, "loss": 0.29009509086608887, "step": 4264 }, { "epoch": 1.0538670620212502, "grad_norm": 1.3438819319283097, "learning_rate": 9.94075584991712e-06, "loss": 0.3086555600166321, "step": 4265 }, { "epoch": 1.0541141586360268, "grad_norm": 1.237603342034634, "learning_rate": 9.936670099324705e-06, "loss": 0.3105946183204651, "step": 4266 }, { "epoch": 1.054361255250803, "grad_norm": 1.28216446401302, "learning_rate": 9.932584359304571e-06, "loss": 0.342193067073822, "step": 4267 }, { "epoch": 1.0546083518655793, "grad_norm": 1.387211534068963, "learning_rate": 9.928498630538798e-06, "loss": 0.3023512065410614, "step": 4268 }, { "epoch": 1.0548554484803558, "grad_norm": 1.3748407310404613, "learning_rate": 9.924412913709452e-06, "loss": 0.3338707387447357, "step": 4269 }, { "epoch": 1.0551025450951321, "grad_norm": 1.239050248014379, "learning_rate": 9.9203272094986e-06, "loss": 0.2757207751274109, "step": 4270 }, { "epoch": 1.0553496417099086, "grad_norm": 1.3288879912422231, "learning_rate": 9.916241518588314e-06, "loss": 0.325366348028183, "step": 4271 }, { "epoch": 1.055596738324685, "grad_norm": 1.3208446307640325, "learning_rate": 9.912155841660654e-06, "loss": 0.33360356092453003, "step": 4272 }, { "epoch": 1.0558438349394614, "grad_norm": 1.226798371061687, "learning_rate": 9.908070179397687e-06, "loss": 0.2854851484298706, "step": 4273 }, { "epoch": 1.0560909315542377, "grad_norm": 1.2138065731560297, "learning_rate": 9.903984532481467e-06, "loss": 0.28481248021125793, "step": 4274 }, { "epoch": 1.056338028169014, "grad_norm": 1.2924595709830198, "learning_rate": 9.899898901594062e-06, "loss": 0.2614288926124573, "step": 4275 }, { "epoch": 1.0565851247837905, "grad_norm": 1.2813326815558772, "learning_rate": 9.895813287417513e-06, "loss": 0.2789626717567444, "step": 4276 }, { "epoch": 1.0568322213985668, "grad_norm": 1.3109339703136655, "learning_rate": 9.89172769063388e-06, "loss": 0.2926616370677948, "step": 4277 }, { "epoch": 1.0570793180133433, "grad_norm": 1.3797215156488756, "learning_rate": 9.887642111925209e-06, "loss": 0.34271448850631714, "step": 4278 }, { "epoch": 1.0573264146281196, "grad_norm": 1.36038014553377, "learning_rate": 9.883556551973546e-06, "loss": 0.33151668310165405, "step": 4279 }, { "epoch": 1.057573511242896, "grad_norm": 1.246169406850972, "learning_rate": 9.879471011460938e-06, "loss": 0.2892879545688629, "step": 4280 }, { "epoch": 1.0578206078576724, "grad_norm": 1.8662060544007897, "learning_rate": 9.875385491069417e-06, "loss": 0.3215042054653168, "step": 4281 }, { "epoch": 1.0580677044724487, "grad_norm": 1.355616561354529, "learning_rate": 9.871299991481029e-06, "loss": 0.2641793489456177, "step": 4282 }, { "epoch": 1.0583148010872252, "grad_norm": 1.3873105123155514, "learning_rate": 9.867214513377798e-06, "loss": 0.28216713666915894, "step": 4283 }, { "epoch": 1.0585618977020015, "grad_norm": 1.376120349818481, "learning_rate": 9.863129057441754e-06, "loss": 0.287814199924469, "step": 4284 }, { "epoch": 1.0588089943167778, "grad_norm": 1.3951987411300135, "learning_rate": 9.859043624354929e-06, "loss": 0.30385690927505493, "step": 4285 }, { "epoch": 1.0590560909315543, "grad_norm": 1.2724875036342866, "learning_rate": 9.854958214799336e-06, "loss": 0.3168829083442688, "step": 4286 }, { "epoch": 1.0593031875463306, "grad_norm": 1.3002291418307157, "learning_rate": 9.850872829457e-06, "loss": 0.3051708936691284, "step": 4287 }, { "epoch": 1.059550284161107, "grad_norm": 1.5105640269495157, "learning_rate": 9.846787469009927e-06, "loss": 0.3372875452041626, "step": 4288 }, { "epoch": 1.0597973807758834, "grad_norm": 1.2884766799254121, "learning_rate": 9.842702134140136e-06, "loss": 0.30326932668685913, "step": 4289 }, { "epoch": 1.0600444773906597, "grad_norm": 1.2710675452356703, "learning_rate": 9.838616825529626e-06, "loss": 0.2866660952568054, "step": 4290 }, { "epoch": 1.0602915740054362, "grad_norm": 1.2550999695608722, "learning_rate": 9.834531543860396e-06, "loss": 0.25052744150161743, "step": 4291 }, { "epoch": 1.0605386706202125, "grad_norm": 1.2444219405891557, "learning_rate": 9.830446289814448e-06, "loss": 0.2643533945083618, "step": 4292 }, { "epoch": 1.060785767234989, "grad_norm": 1.20591398779569, "learning_rate": 9.826361064073768e-06, "loss": 0.26032936573028564, "step": 4293 }, { "epoch": 1.0610328638497653, "grad_norm": 1.3077548980410274, "learning_rate": 9.822275867320348e-06, "loss": 0.27021899819374084, "step": 4294 }, { "epoch": 1.0612799604645415, "grad_norm": 1.2951031156975428, "learning_rate": 9.818190700236166e-06, "loss": 0.29036834836006165, "step": 4295 }, { "epoch": 1.061527057079318, "grad_norm": 1.369422506987385, "learning_rate": 9.814105563503208e-06, "loss": 0.3017193078994751, "step": 4296 }, { "epoch": 1.0617741536940943, "grad_norm": 1.3366243492427967, "learning_rate": 9.810020457803431e-06, "loss": 0.28401798009872437, "step": 4297 }, { "epoch": 1.0620212503088708, "grad_norm": 1.2566732539690164, "learning_rate": 9.805935383818814e-06, "loss": 0.2604984641075134, "step": 4298 }, { "epoch": 1.0622683469236471, "grad_norm": 1.212417857154702, "learning_rate": 9.801850342231313e-06, "loss": 0.24249279499053955, "step": 4299 }, { "epoch": 1.0625154435384234, "grad_norm": 1.5339716060917594, "learning_rate": 9.797765333722888e-06, "loss": 0.28820884227752686, "step": 4300 }, { "epoch": 1.0627625401532, "grad_norm": 1.3041096294834786, "learning_rate": 9.793680358975483e-06, "loss": 0.31085336208343506, "step": 4301 }, { "epoch": 1.0630096367679762, "grad_norm": 1.8100979466280465, "learning_rate": 9.789595418671052e-06, "loss": 0.25277256965637207, "step": 4302 }, { "epoch": 1.0632567333827527, "grad_norm": 1.4333612060530365, "learning_rate": 9.785510513491534e-06, "loss": 0.31256765127182007, "step": 4303 }, { "epoch": 1.063503829997529, "grad_norm": 1.2161907074778495, "learning_rate": 9.781425644118853e-06, "loss": 0.25745463371276855, "step": 4304 }, { "epoch": 1.0637509266123053, "grad_norm": 1.4203574428545422, "learning_rate": 9.777340811234943e-06, "loss": 0.37386417388916016, "step": 4305 }, { "epoch": 1.0639980232270818, "grad_norm": 1.38621306384823, "learning_rate": 9.773256015521723e-06, "loss": 0.3310262858867645, "step": 4306 }, { "epoch": 1.064245119841858, "grad_norm": 1.3620263302671054, "learning_rate": 9.769171257661112e-06, "loss": 0.34394630789756775, "step": 4307 }, { "epoch": 1.0644922164566346, "grad_norm": 1.2683952460362238, "learning_rate": 9.765086538335014e-06, "loss": 0.3012593984603882, "step": 4308 }, { "epoch": 1.064739313071411, "grad_norm": 1.3865868153099916, "learning_rate": 9.761001858225337e-06, "loss": 0.2992353141307831, "step": 4309 }, { "epoch": 1.0649864096861874, "grad_norm": 1.4454976206252887, "learning_rate": 9.756917218013974e-06, "loss": 0.23278513550758362, "step": 4310 }, { "epoch": 1.0652335063009637, "grad_norm": 1.388310873946684, "learning_rate": 9.752832618382812e-06, "loss": 0.35981470346450806, "step": 4311 }, { "epoch": 1.06548060291574, "grad_norm": 1.4056730913313031, "learning_rate": 9.748748060013736e-06, "loss": 0.327356219291687, "step": 4312 }, { "epoch": 1.0657276995305165, "grad_norm": 1.276494755841463, "learning_rate": 9.74466354358862e-06, "loss": 0.2938128411769867, "step": 4313 }, { "epoch": 1.0659747961452928, "grad_norm": 1.2679934285116534, "learning_rate": 9.740579069789335e-06, "loss": 0.2788102924823761, "step": 4314 }, { "epoch": 1.0662218927600693, "grad_norm": 1.6410558143247531, "learning_rate": 9.73649463929774e-06, "loss": 0.27242350578308105, "step": 4315 }, { "epoch": 1.0664689893748456, "grad_norm": 1.3736518065298369, "learning_rate": 9.732410252795691e-06, "loss": 0.2973044812679291, "step": 4316 }, { "epoch": 1.0667160859896219, "grad_norm": 1.2866074623783652, "learning_rate": 9.728325910965037e-06, "loss": 0.29437530040740967, "step": 4317 }, { "epoch": 1.0669631826043984, "grad_norm": 1.3864971333078513, "learning_rate": 9.72424161448761e-06, "loss": 0.28702622652053833, "step": 4318 }, { "epoch": 1.0672102792191747, "grad_norm": 1.3650870659837286, "learning_rate": 9.720157364045248e-06, "loss": 0.29504695534706116, "step": 4319 }, { "epoch": 1.0674573758339512, "grad_norm": 1.4331305132953716, "learning_rate": 9.716073160319772e-06, "loss": 0.37290114164352417, "step": 4320 }, { "epoch": 1.0677044724487275, "grad_norm": 1.3794383586641459, "learning_rate": 9.711989003993003e-06, "loss": 0.3077162802219391, "step": 4321 }, { "epoch": 1.0679515690635037, "grad_norm": 1.129313110990442, "learning_rate": 9.707904895746742e-06, "loss": 0.27111339569091797, "step": 4322 }, { "epoch": 1.0681986656782803, "grad_norm": 1.4295331239595568, "learning_rate": 9.7038208362628e-06, "loss": 0.3322319984436035, "step": 4323 }, { "epoch": 1.0684457622930565, "grad_norm": 1.2637982287373193, "learning_rate": 9.69973682622296e-06, "loss": 0.25538474321365356, "step": 4324 }, { "epoch": 1.068692858907833, "grad_norm": 1.2005829165960882, "learning_rate": 9.695652866309006e-06, "loss": 0.28731656074523926, "step": 4325 }, { "epoch": 1.0689399555226093, "grad_norm": 1.29778401405901, "learning_rate": 9.691568957202721e-06, "loss": 0.28316015005111694, "step": 4326 }, { "epoch": 1.0691870521373856, "grad_norm": 1.188589459986461, "learning_rate": 9.687485099585864e-06, "loss": 0.24597208201885223, "step": 4327 }, { "epoch": 1.0694341487521621, "grad_norm": 1.3117612688031586, "learning_rate": 9.6834012941402e-06, "loss": 0.2547098398208618, "step": 4328 }, { "epoch": 1.0696812453669384, "grad_norm": 1.2372312607515863, "learning_rate": 9.679317541547474e-06, "loss": 0.2784842550754547, "step": 4329 }, { "epoch": 1.069928341981715, "grad_norm": 1.2815182156646612, "learning_rate": 9.675233842489434e-06, "loss": 0.29805880784988403, "step": 4330 }, { "epoch": 1.0701754385964912, "grad_norm": 1.2114439143550502, "learning_rate": 9.671150197647804e-06, "loss": 0.27341926097869873, "step": 4331 }, { "epoch": 1.0704225352112675, "grad_norm": 1.3844320059936184, "learning_rate": 9.667066607704311e-06, "loss": 0.33536118268966675, "step": 4332 }, { "epoch": 1.070669631826044, "grad_norm": 1.1794704149519954, "learning_rate": 9.662983073340669e-06, "loss": 0.2678295969963074, "step": 4333 }, { "epoch": 1.0709167284408203, "grad_norm": 1.2447691967212677, "learning_rate": 9.658899595238579e-06, "loss": 0.2655584216117859, "step": 4334 }, { "epoch": 1.0711638250555968, "grad_norm": 1.3219067144647696, "learning_rate": 9.654816174079743e-06, "loss": 0.2961287498474121, "step": 4335 }, { "epoch": 1.071410921670373, "grad_norm": 1.3394095740985643, "learning_rate": 9.650732810545839e-06, "loss": 0.33446547389030457, "step": 4336 }, { "epoch": 1.0716580182851494, "grad_norm": 1.246965906881577, "learning_rate": 9.646649505318553e-06, "loss": 0.270771861076355, "step": 4337 }, { "epoch": 1.071905114899926, "grad_norm": 1.377118487257213, "learning_rate": 9.642566259079542e-06, "loss": 0.2883526086807251, "step": 4338 }, { "epoch": 1.0721522115147022, "grad_norm": 2.338308954491441, "learning_rate": 9.638483072510465e-06, "loss": 0.2653293013572693, "step": 4339 }, { "epoch": 1.0723993081294787, "grad_norm": 1.3346168794954556, "learning_rate": 9.63439994629297e-06, "loss": 0.3482469618320465, "step": 4340 }, { "epoch": 1.072646404744255, "grad_norm": 1.288962824488991, "learning_rate": 9.630316881108692e-06, "loss": 0.27825629711151123, "step": 4341 }, { "epoch": 1.0728935013590313, "grad_norm": 1.4234639980198567, "learning_rate": 9.626233877639261e-06, "loss": 0.30296599864959717, "step": 4342 }, { "epoch": 1.0731405979738078, "grad_norm": 1.263148526653294, "learning_rate": 9.622150936566287e-06, "loss": 0.30158549547195435, "step": 4343 }, { "epoch": 1.073387694588584, "grad_norm": 1.3126986549205326, "learning_rate": 9.618068058571382e-06, "loss": 0.3111845850944519, "step": 4344 }, { "epoch": 1.0736347912033606, "grad_norm": 1.4152329383771418, "learning_rate": 9.613985244336137e-06, "loss": 0.32182547450065613, "step": 4345 }, { "epoch": 1.0738818878181369, "grad_norm": 1.3293744961869585, "learning_rate": 9.609902494542136e-06, "loss": 0.3063158392906189, "step": 4346 }, { "epoch": 1.0741289844329134, "grad_norm": 1.35973664518208, "learning_rate": 9.605819809870955e-06, "loss": 0.3478289842605591, "step": 4347 }, { "epoch": 1.0743760810476897, "grad_norm": 1.2886301290038724, "learning_rate": 9.601737191004152e-06, "loss": 0.3380894660949707, "step": 4348 }, { "epoch": 1.074623177662466, "grad_norm": 1.2675933391851524, "learning_rate": 9.597654638623286e-06, "loss": 0.25362828373908997, "step": 4349 }, { "epoch": 1.0748702742772425, "grad_norm": 1.229719508167844, "learning_rate": 9.593572153409892e-06, "loss": 0.3059309124946594, "step": 4350 }, { "epoch": 1.0751173708920188, "grad_norm": 1.347832540055524, "learning_rate": 9.589489736045507e-06, "loss": 0.35063207149505615, "step": 4351 }, { "epoch": 1.075364467506795, "grad_norm": 1.4217265548447586, "learning_rate": 9.58540738721164e-06, "loss": 0.2935321033000946, "step": 4352 }, { "epoch": 1.0756115641215716, "grad_norm": 1.2153815160177908, "learning_rate": 9.581325107589799e-06, "loss": 0.2537657916545868, "step": 4353 }, { "epoch": 1.0758586607363478, "grad_norm": 1.4277013807174828, "learning_rate": 9.577242897861484e-06, "loss": 0.32990700006484985, "step": 4354 }, { "epoch": 1.0761057573511243, "grad_norm": 1.4416053885329565, "learning_rate": 9.573160758708174e-06, "loss": 0.34095877408981323, "step": 4355 }, { "epoch": 1.0763528539659006, "grad_norm": 1.3061946759041676, "learning_rate": 9.569078690811345e-06, "loss": 0.2686913311481476, "step": 4356 }, { "epoch": 1.0765999505806771, "grad_norm": 1.231381124018221, "learning_rate": 9.564996694852451e-06, "loss": 0.27283287048339844, "step": 4357 }, { "epoch": 1.0768470471954534, "grad_norm": 1.3649740547523401, "learning_rate": 9.560914771512948e-06, "loss": 0.3046985864639282, "step": 4358 }, { "epoch": 1.0770941438102297, "grad_norm": 1.2769386299607095, "learning_rate": 9.556832921474266e-06, "loss": 0.3284863829612732, "step": 4359 }, { "epoch": 1.0773412404250062, "grad_norm": 1.3902494490521375, "learning_rate": 9.552751145417827e-06, "loss": 0.2509782910346985, "step": 4360 }, { "epoch": 1.0775883370397825, "grad_norm": 1.190833131655178, "learning_rate": 9.548669444025046e-06, "loss": 0.24258893728256226, "step": 4361 }, { "epoch": 1.077835433654559, "grad_norm": 1.1504412613765809, "learning_rate": 9.544587817977318e-06, "loss": 0.25451406836509705, "step": 4362 }, { "epoch": 1.0780825302693353, "grad_norm": 1.1490861627060553, "learning_rate": 9.540506267956033e-06, "loss": 0.24409708380699158, "step": 4363 }, { "epoch": 1.0783296268841116, "grad_norm": 1.2594121226906008, "learning_rate": 9.536424794642562e-06, "loss": 0.35256335139274597, "step": 4364 }, { "epoch": 1.0785767234988881, "grad_norm": 1.3924489264411006, "learning_rate": 9.532343398718267e-06, "loss": 0.35716062784194946, "step": 4365 }, { "epoch": 1.0788238201136644, "grad_norm": 1.3711517688809995, "learning_rate": 9.528262080864495e-06, "loss": 0.2893790602684021, "step": 4366 }, { "epoch": 1.079070916728441, "grad_norm": 1.337906078776972, "learning_rate": 9.524180841762577e-06, "loss": 0.34182220697402954, "step": 4367 }, { "epoch": 1.0793180133432172, "grad_norm": 1.4334877870071452, "learning_rate": 9.520099682093841e-06, "loss": 0.2913774251937866, "step": 4368 }, { "epoch": 1.0795651099579935, "grad_norm": 1.1990798204892716, "learning_rate": 9.51601860253959e-06, "loss": 0.2496957778930664, "step": 4369 }, { "epoch": 1.07981220657277, "grad_norm": 1.3097325673605906, "learning_rate": 9.51193760378112e-06, "loss": 0.3010215759277344, "step": 4370 }, { "epoch": 1.0800593031875463, "grad_norm": 1.3496840065852873, "learning_rate": 9.507856686499713e-06, "loss": 0.2813163995742798, "step": 4371 }, { "epoch": 1.0803063998023228, "grad_norm": 1.3226929367582312, "learning_rate": 9.503775851376641e-06, "loss": 0.2741542458534241, "step": 4372 }, { "epoch": 1.080553496417099, "grad_norm": 1.3466261357768883, "learning_rate": 9.49969509909315e-06, "loss": 0.31583309173583984, "step": 4373 }, { "epoch": 1.0808005930318754, "grad_norm": 1.4177197207521584, "learning_rate": 9.495614430330484e-06, "loss": 0.37418797612190247, "step": 4374 }, { "epoch": 1.0810476896466519, "grad_norm": 1.4329737848922446, "learning_rate": 9.49153384576987e-06, "loss": 0.23577037453651428, "step": 4375 }, { "epoch": 1.0812947862614282, "grad_norm": 1.409821084445419, "learning_rate": 9.487453346092517e-06, "loss": 0.2738940119743347, "step": 4376 }, { "epoch": 1.0815418828762047, "grad_norm": 1.4094328686307098, "learning_rate": 9.483372931979628e-06, "loss": 0.2913071811199188, "step": 4377 }, { "epoch": 1.081788979490981, "grad_norm": 1.3835302182433746, "learning_rate": 9.47929260411238e-06, "loss": 0.2851962447166443, "step": 4378 }, { "epoch": 1.0820360761057572, "grad_norm": 1.4081353893976005, "learning_rate": 9.475212363171952e-06, "loss": 0.27682679891586304, "step": 4379 }, { "epoch": 1.0822831727205338, "grad_norm": 1.3463605403404209, "learning_rate": 9.471132209839493e-06, "loss": 0.2845798134803772, "step": 4380 }, { "epoch": 1.08253026933531, "grad_norm": 1.2620376428303919, "learning_rate": 9.467052144796137e-06, "loss": 0.2854655385017395, "step": 4381 }, { "epoch": 1.0827773659500866, "grad_norm": 1.4089797190482383, "learning_rate": 9.462972168723021e-06, "loss": 0.3112269937992096, "step": 4382 }, { "epoch": 1.0830244625648628, "grad_norm": 1.4552561776203081, "learning_rate": 9.458892282301246e-06, "loss": 0.31400173902511597, "step": 4383 }, { "epoch": 1.0832715591796394, "grad_norm": 1.3907482284463375, "learning_rate": 9.454812486211915e-06, "loss": 0.27049845457077026, "step": 4384 }, { "epoch": 1.0835186557944156, "grad_norm": 1.3168501052363886, "learning_rate": 9.450732781136103e-06, "loss": 0.29073524475097656, "step": 4385 }, { "epoch": 1.083765752409192, "grad_norm": 1.3936320926074057, "learning_rate": 9.446653167754886e-06, "loss": 0.3189418315887451, "step": 4386 }, { "epoch": 1.0840128490239684, "grad_norm": 1.3146097980994835, "learning_rate": 9.442573646749298e-06, "loss": 0.2774944007396698, "step": 4387 }, { "epoch": 1.0842599456387447, "grad_norm": 1.3466273001572244, "learning_rate": 9.438494218800384e-06, "loss": 0.3311017155647278, "step": 4388 }, { "epoch": 1.084507042253521, "grad_norm": 1.4580767829228867, "learning_rate": 9.434414884589157e-06, "loss": 0.24582895636558533, "step": 4389 }, { "epoch": 1.0847541388682975, "grad_norm": 1.252019923595984, "learning_rate": 9.430335644796627e-06, "loss": 0.27944809198379517, "step": 4390 }, { "epoch": 1.0850012354830738, "grad_norm": 1.1782924042791367, "learning_rate": 9.426256500103778e-06, "loss": 0.27960655093193054, "step": 4391 }, { "epoch": 1.0852483320978503, "grad_norm": 1.2719875754316579, "learning_rate": 9.422177451191586e-06, "loss": 0.2876070737838745, "step": 4392 }, { "epoch": 1.0854954287126266, "grad_norm": 1.3021437731725676, "learning_rate": 9.418098498740999e-06, "loss": 0.3057039976119995, "step": 4393 }, { "epoch": 1.0857425253274031, "grad_norm": 1.4870434656642157, "learning_rate": 9.414019643432963e-06, "loss": 0.31728821992874146, "step": 4394 }, { "epoch": 1.0859896219421794, "grad_norm": 1.2783479911961708, "learning_rate": 9.4099408859484e-06, "loss": 0.27324000000953674, "step": 4395 }, { "epoch": 1.0862367185569557, "grad_norm": 1.2099520184344374, "learning_rate": 9.405862226968213e-06, "loss": 0.2584097385406494, "step": 4396 }, { "epoch": 1.0864838151717322, "grad_norm": 1.4495049532092403, "learning_rate": 9.4017836671733e-06, "loss": 0.37044820189476013, "step": 4397 }, { "epoch": 1.0867309117865085, "grad_norm": 1.3601519213496858, "learning_rate": 9.397705207244527e-06, "loss": 0.34626704454421997, "step": 4398 }, { "epoch": 1.086978008401285, "grad_norm": 1.2638445272081347, "learning_rate": 9.393626847862762e-06, "loss": 0.27759724855422974, "step": 4399 }, { "epoch": 1.0872251050160613, "grad_norm": 1.6086065039218815, "learning_rate": 9.389548589708837e-06, "loss": 0.31825685501098633, "step": 4400 }, { "epoch": 1.0874722016308376, "grad_norm": 1.323436387695426, "learning_rate": 9.385470433463578e-06, "loss": 0.3040592074394226, "step": 4401 }, { "epoch": 1.087719298245614, "grad_norm": 1.2066283861977658, "learning_rate": 9.381392379807794e-06, "loss": 0.2769585847854614, "step": 4402 }, { "epoch": 1.0879663948603904, "grad_norm": 1.4853813285068922, "learning_rate": 9.377314429422271e-06, "loss": 0.3292359411716461, "step": 4403 }, { "epoch": 1.0882134914751669, "grad_norm": 1.3051006516353567, "learning_rate": 9.373236582987786e-06, "loss": 0.2552199363708496, "step": 4404 }, { "epoch": 1.0884605880899432, "grad_norm": 1.4024338474318443, "learning_rate": 9.369158841185086e-06, "loss": 0.28164994716644287, "step": 4405 }, { "epoch": 1.0887076847047195, "grad_norm": 1.3943550474158535, "learning_rate": 9.365081204694921e-06, "loss": 0.3459336757659912, "step": 4406 }, { "epoch": 1.088954781319496, "grad_norm": 1.3991913836190948, "learning_rate": 9.361003674198003e-06, "loss": 0.2612719237804413, "step": 4407 }, { "epoch": 1.0892018779342723, "grad_norm": 1.4309380847851931, "learning_rate": 9.356926250375031e-06, "loss": 0.32950669527053833, "step": 4408 }, { "epoch": 1.0894489745490488, "grad_norm": 1.3777358623673848, "learning_rate": 9.352848933906697e-06, "loss": 0.34218883514404297, "step": 4409 }, { "epoch": 1.089696071163825, "grad_norm": 1.3416188357592047, "learning_rate": 9.348771725473663e-06, "loss": 0.31146442890167236, "step": 4410 }, { "epoch": 1.0899431677786013, "grad_norm": 1.2590932912162223, "learning_rate": 9.34469462575658e-06, "loss": 0.2287217080593109, "step": 4411 }, { "epoch": 1.0901902643933779, "grad_norm": 1.2887363319133045, "learning_rate": 9.340617635436077e-06, "loss": 0.26171043515205383, "step": 4412 }, { "epoch": 1.0904373610081541, "grad_norm": 1.393430479787377, "learning_rate": 9.33654075519277e-06, "loss": 0.3583689332008362, "step": 4413 }, { "epoch": 1.0906844576229306, "grad_norm": 1.2900418105342115, "learning_rate": 9.332463985707248e-06, "loss": 0.3190559148788452, "step": 4414 }, { "epoch": 1.090931554237707, "grad_norm": 1.3126773003174093, "learning_rate": 9.328387327660084e-06, "loss": 0.24664011597633362, "step": 4415 }, { "epoch": 1.0911786508524832, "grad_norm": 1.3152190518762765, "learning_rate": 9.324310781731842e-06, "loss": 0.2706741988658905, "step": 4416 }, { "epoch": 1.0914257474672597, "grad_norm": 1.3359265405430374, "learning_rate": 9.320234348603054e-06, "loss": 0.26909399032592773, "step": 4417 }, { "epoch": 1.091672844082036, "grad_norm": 1.2882463888363183, "learning_rate": 9.316158028954243e-06, "loss": 0.24828597903251648, "step": 4418 }, { "epoch": 1.0919199406968125, "grad_norm": 1.3029277240137527, "learning_rate": 9.312081823465905e-06, "loss": 0.3109065294265747, "step": 4419 }, { "epoch": 1.0921670373115888, "grad_norm": 1.4925108490535177, "learning_rate": 9.30800573281853e-06, "loss": 0.33928972482681274, "step": 4420 }, { "epoch": 1.092414133926365, "grad_norm": 1.3022265771739248, "learning_rate": 9.30392975769257e-06, "loss": 0.2647826671600342, "step": 4421 }, { "epoch": 1.0926612305411416, "grad_norm": 1.2437510770349085, "learning_rate": 9.299853898768471e-06, "loss": 0.300252228975296, "step": 4422 }, { "epoch": 1.092908327155918, "grad_norm": 1.4711110860490053, "learning_rate": 9.295778156726659e-06, "loss": 0.3309367895126343, "step": 4423 }, { "epoch": 1.0931554237706944, "grad_norm": 1.3717235293779675, "learning_rate": 9.291702532247532e-06, "loss": 0.3125533163547516, "step": 4424 }, { "epoch": 1.0934025203854707, "grad_norm": 1.3121068187149996, "learning_rate": 9.28762702601148e-06, "loss": 0.33179065585136414, "step": 4425 }, { "epoch": 1.093649617000247, "grad_norm": 1.2347425468005078, "learning_rate": 9.283551638698863e-06, "loss": 0.2958395481109619, "step": 4426 }, { "epoch": 1.0938967136150235, "grad_norm": 1.3669000702397862, "learning_rate": 9.279476370990032e-06, "loss": 0.3210675120353699, "step": 4427 }, { "epoch": 1.0941438102297998, "grad_norm": 1.4576597714842312, "learning_rate": 9.275401223565304e-06, "loss": 0.3411126732826233, "step": 4428 }, { "epoch": 1.0943909068445763, "grad_norm": 1.2177398081141007, "learning_rate": 9.271326197104987e-06, "loss": 0.25167280435562134, "step": 4429 }, { "epoch": 1.0946380034593526, "grad_norm": 1.3677355260888715, "learning_rate": 9.267251292289365e-06, "loss": 0.2808977961540222, "step": 4430 }, { "epoch": 1.094885100074129, "grad_norm": 1.3912819896125683, "learning_rate": 9.263176509798701e-06, "loss": 0.33022549748420715, "step": 4431 }, { "epoch": 1.0951321966889054, "grad_norm": 1.3455972843649537, "learning_rate": 9.25910185031324e-06, "loss": 0.2720234990119934, "step": 4432 }, { "epoch": 1.0953792933036817, "grad_norm": 1.277226921327839, "learning_rate": 9.255027314513204e-06, "loss": 0.3406318426132202, "step": 4433 }, { "epoch": 1.0956263899184582, "grad_norm": 1.242264789661118, "learning_rate": 9.250952903078799e-06, "loss": 0.24882301688194275, "step": 4434 }, { "epoch": 1.0958734865332345, "grad_norm": 1.2672524781377528, "learning_rate": 9.246878616690202e-06, "loss": 0.2848135828971863, "step": 4435 }, { "epoch": 1.096120583148011, "grad_norm": 1.3905755493280982, "learning_rate": 9.242804456027575e-06, "loss": 0.3211725056171417, "step": 4436 }, { "epoch": 1.0963676797627873, "grad_norm": 1.4647241080147198, "learning_rate": 9.238730421771058e-06, "loss": 0.2913506031036377, "step": 4437 }, { "epoch": 1.0966147763775635, "grad_norm": 1.3345854574855438, "learning_rate": 9.234656514600769e-06, "loss": 0.2854933738708496, "step": 4438 }, { "epoch": 1.09686187299234, "grad_norm": 1.3179300839337245, "learning_rate": 9.230582735196807e-06, "loss": 0.30920708179473877, "step": 4439 }, { "epoch": 1.0971089696071163, "grad_norm": 1.2515213909253347, "learning_rate": 9.226509084239246e-06, "loss": 0.292854368686676, "step": 4440 }, { "epoch": 1.0973560662218929, "grad_norm": 1.289030164679738, "learning_rate": 9.222435562408146e-06, "loss": 0.29383212327957153, "step": 4441 }, { "epoch": 1.0976031628366691, "grad_norm": 1.2809682908066458, "learning_rate": 9.218362170383534e-06, "loss": 0.25690212845802307, "step": 4442 }, { "epoch": 1.0978502594514454, "grad_norm": 1.3343244949033155, "learning_rate": 9.214288908845422e-06, "loss": 0.2699431777000427, "step": 4443 }, { "epoch": 1.098097356066222, "grad_norm": 1.2252622480571314, "learning_rate": 9.210215778473804e-06, "loss": 0.28770914673805237, "step": 4444 }, { "epoch": 1.0983444526809982, "grad_norm": 1.238193684283698, "learning_rate": 9.20614277994864e-06, "loss": 0.27511435747146606, "step": 4445 }, { "epoch": 1.0985915492957747, "grad_norm": 1.2790963738117433, "learning_rate": 9.202069913949886e-06, "loss": 0.2676970362663269, "step": 4446 }, { "epoch": 1.098838645910551, "grad_norm": 1.2900519422909684, "learning_rate": 9.197997181157458e-06, "loss": 0.27906113862991333, "step": 4447 }, { "epoch": 1.0990857425253273, "grad_norm": 1.263976242848393, "learning_rate": 9.193924582251262e-06, "loss": 0.30983737111091614, "step": 4448 }, { "epoch": 1.0993328391401038, "grad_norm": 1.3789777554969196, "learning_rate": 9.189852117911176e-06, "loss": 0.32147377729415894, "step": 4449 }, { "epoch": 1.0995799357548801, "grad_norm": 1.4369366033225373, "learning_rate": 9.18577978881705e-06, "loss": 0.31631213426589966, "step": 4450 }, { "epoch": 1.0998270323696566, "grad_norm": 1.56315710159792, "learning_rate": 9.181707595648728e-06, "loss": 0.3253578543663025, "step": 4451 }, { "epoch": 1.100074128984433, "grad_norm": 1.2541716208986062, "learning_rate": 9.177635539086014e-06, "loss": 0.24791595339775085, "step": 4452 }, { "epoch": 1.1003212255992092, "grad_norm": 1.3719539264827973, "learning_rate": 9.1735636198087e-06, "loss": 0.2834380269050598, "step": 4453 }, { "epoch": 1.1005683222139857, "grad_norm": 1.209779694761579, "learning_rate": 9.16949183849655e-06, "loss": 0.2762293815612793, "step": 4454 }, { "epoch": 1.100815418828762, "grad_norm": 1.218734664015824, "learning_rate": 9.165420195829311e-06, "loss": 0.2700618505477905, "step": 4455 }, { "epoch": 1.1010625154435385, "grad_norm": 1.2711791375687775, "learning_rate": 9.161348692486698e-06, "loss": 0.27447348833084106, "step": 4456 }, { "epoch": 1.1013096120583148, "grad_norm": 1.333731186908436, "learning_rate": 9.157277329148406e-06, "loss": 0.3402518928050995, "step": 4457 }, { "epoch": 1.101556708673091, "grad_norm": 1.3850231995690498, "learning_rate": 9.15320610649411e-06, "loss": 0.34531843662261963, "step": 4458 }, { "epoch": 1.1018038052878676, "grad_norm": 1.4413420469008944, "learning_rate": 9.14913502520346e-06, "loss": 0.30006444454193115, "step": 4459 }, { "epoch": 1.1020509019026439, "grad_norm": 1.2742292507447308, "learning_rate": 9.145064085956082e-06, "loss": 0.24041420221328735, "step": 4460 }, { "epoch": 1.1022979985174204, "grad_norm": 1.3305831415178646, "learning_rate": 9.140993289431572e-06, "loss": 0.28050220012664795, "step": 4461 }, { "epoch": 1.1025450951321967, "grad_norm": 1.280697141501411, "learning_rate": 9.13692263630952e-06, "loss": 0.34074336290359497, "step": 4462 }, { "epoch": 1.102792191746973, "grad_norm": 1.3305235803886606, "learning_rate": 9.13285212726947e-06, "loss": 0.304501473903656, "step": 4463 }, { "epoch": 1.1030392883617495, "grad_norm": 1.4308868726057546, "learning_rate": 9.128781762990954e-06, "loss": 0.30231165885925293, "step": 4464 }, { "epoch": 1.1032863849765258, "grad_norm": 1.362514662514629, "learning_rate": 9.12471154415348e-06, "loss": 0.23612380027770996, "step": 4465 }, { "epoch": 1.1035334815913023, "grad_norm": 1.3511174701134925, "learning_rate": 9.120641471436528e-06, "loss": 0.29765647649765015, "step": 4466 }, { "epoch": 1.1037805782060786, "grad_norm": 1.3196768733056743, "learning_rate": 9.116571545519558e-06, "loss": 0.305322527885437, "step": 4467 }, { "epoch": 1.104027674820855, "grad_norm": 1.2299229157019556, "learning_rate": 9.112501767082004e-06, "loss": 0.26815855503082275, "step": 4468 }, { "epoch": 1.1042747714356314, "grad_norm": 1.4338011903530847, "learning_rate": 9.108432136803266e-06, "loss": 0.3264486491680145, "step": 4469 }, { "epoch": 1.1045218680504076, "grad_norm": 1.4370458581652472, "learning_rate": 9.104362655362737e-06, "loss": 0.2922375202178955, "step": 4470 }, { "epoch": 1.1047689646651841, "grad_norm": 1.239363523415366, "learning_rate": 9.100293323439767e-06, "loss": 0.2924337089061737, "step": 4471 }, { "epoch": 1.1050160612799604, "grad_norm": 1.3575565899972037, "learning_rate": 9.096224141713698e-06, "loss": 0.29353490471839905, "step": 4472 }, { "epoch": 1.1052631578947367, "grad_norm": 1.3233801718319254, "learning_rate": 9.092155110863832e-06, "loss": 0.3043777346611023, "step": 4473 }, { "epoch": 1.1055102545095132, "grad_norm": 1.2785429818177307, "learning_rate": 9.088086231569457e-06, "loss": 0.27240699529647827, "step": 4474 }, { "epoch": 1.1057573511242895, "grad_norm": 1.4697101574487434, "learning_rate": 9.084017504509831e-06, "loss": 0.2987288236618042, "step": 4475 }, { "epoch": 1.106004447739066, "grad_norm": 1.6269051311691214, "learning_rate": 9.079948930364183e-06, "loss": 0.35536500811576843, "step": 4476 }, { "epoch": 1.1062515443538423, "grad_norm": 1.503624833310778, "learning_rate": 9.07588050981172e-06, "loss": 0.3068338930606842, "step": 4477 }, { "epoch": 1.1064986409686188, "grad_norm": 1.3219298458575994, "learning_rate": 9.071812243531628e-06, "loss": 0.27957749366760254, "step": 4478 }, { "epoch": 1.1067457375833951, "grad_norm": 1.37400385273276, "learning_rate": 9.06774413220306e-06, "loss": 0.3291800618171692, "step": 4479 }, { "epoch": 1.1069928341981714, "grad_norm": 1.284562014659041, "learning_rate": 9.063676176505144e-06, "loss": 0.31187179684638977, "step": 4480 }, { "epoch": 1.107239930812948, "grad_norm": 1.3804606419009655, "learning_rate": 9.059608377116987e-06, "loss": 0.2935185432434082, "step": 4481 }, { "epoch": 1.1074870274277242, "grad_norm": 1.2366002940076892, "learning_rate": 9.05554073471767e-06, "loss": 0.26990067958831787, "step": 4482 }, { "epoch": 1.1077341240425007, "grad_norm": 1.2642160793832902, "learning_rate": 9.051473249986239e-06, "loss": 0.26653462648391724, "step": 4483 }, { "epoch": 1.107981220657277, "grad_norm": 1.3726996435179528, "learning_rate": 9.047405923601718e-06, "loss": 0.3244425058364868, "step": 4484 }, { "epoch": 1.1082283172720533, "grad_norm": 1.3518698979144155, "learning_rate": 9.04333875624311e-06, "loss": 0.246873676776886, "step": 4485 }, { "epoch": 1.1084754138868298, "grad_norm": 1.2806124912777272, "learning_rate": 9.039271748589385e-06, "loss": 0.2776726484298706, "step": 4486 }, { "epoch": 1.108722510501606, "grad_norm": 1.2906722024516502, "learning_rate": 9.035204901319493e-06, "loss": 0.3333932161331177, "step": 4487 }, { "epoch": 1.1089696071163826, "grad_norm": 1.368568120350007, "learning_rate": 9.031138215112345e-06, "loss": 0.328985333442688, "step": 4488 }, { "epoch": 1.1092167037311589, "grad_norm": 1.2343506818682448, "learning_rate": 9.027071690646843e-06, "loss": 0.25990957021713257, "step": 4489 }, { "epoch": 1.1094638003459352, "grad_norm": 1.3774213588093935, "learning_rate": 9.023005328601843e-06, "loss": 0.29191964864730835, "step": 4490 }, { "epoch": 1.1097108969607117, "grad_norm": 1.410957511024718, "learning_rate": 9.018939129656186e-06, "loss": 0.3351482152938843, "step": 4491 }, { "epoch": 1.109957993575488, "grad_norm": 1.4006327016878808, "learning_rate": 9.014873094488684e-06, "loss": 0.3516804277896881, "step": 4492 }, { "epoch": 1.1102050901902645, "grad_norm": 1.3948022460672893, "learning_rate": 9.010807223778118e-06, "loss": 0.3501877188682556, "step": 4493 }, { "epoch": 1.1104521868050408, "grad_norm": 1.4128687570746208, "learning_rate": 9.006741518203246e-06, "loss": 0.32521283626556396, "step": 4494 }, { "epoch": 1.110699283419817, "grad_norm": 1.313558797041853, "learning_rate": 9.002675978442793e-06, "loss": 0.27217498421669006, "step": 4495 }, { "epoch": 1.1109463800345936, "grad_norm": 1.5992739945236663, "learning_rate": 8.998610605175469e-06, "loss": 0.3302215337753296, "step": 4496 }, { "epoch": 1.1111934766493698, "grad_norm": 1.5167606101210385, "learning_rate": 8.994545399079935e-06, "loss": 0.3038281500339508, "step": 4497 }, { "epoch": 1.1114405732641464, "grad_norm": 1.6175148389956722, "learning_rate": 8.99048036083484e-06, "loss": 0.38074707984924316, "step": 4498 }, { "epoch": 1.1116876698789226, "grad_norm": 1.2947738313828698, "learning_rate": 8.986415491118804e-06, "loss": 0.2842617332935333, "step": 4499 }, { "epoch": 1.111934766493699, "grad_norm": 1.270024169345871, "learning_rate": 8.98235079061041e-06, "loss": 0.28247833251953125, "step": 4500 }, { "epoch": 1.1121818631084754, "grad_norm": 1.3375151257460725, "learning_rate": 8.978286259988225e-06, "loss": 0.25447553396224976, "step": 4501 }, { "epoch": 1.1124289597232517, "grad_norm": 1.2583469614821876, "learning_rate": 8.974221899930775e-06, "loss": 0.2535570561885834, "step": 4502 }, { "epoch": 1.1126760563380282, "grad_norm": 1.3624873090855765, "learning_rate": 8.970157711116572e-06, "loss": 0.2843824625015259, "step": 4503 }, { "epoch": 1.1129231529528045, "grad_norm": 1.2169705985873764, "learning_rate": 8.966093694224082e-06, "loss": 0.26383399963378906, "step": 4504 }, { "epoch": 1.113170249567581, "grad_norm": 1.267459973435082, "learning_rate": 8.962029849931754e-06, "loss": 0.2641472816467285, "step": 4505 }, { "epoch": 1.1134173461823573, "grad_norm": 1.3365908011962369, "learning_rate": 8.957966178918009e-06, "loss": 0.30141329765319824, "step": 4506 }, { "epoch": 1.1136644427971336, "grad_norm": 1.3504554577121028, "learning_rate": 8.953902681861229e-06, "loss": 0.3043310046195984, "step": 4507 }, { "epoch": 1.1139115394119101, "grad_norm": 1.1997638197730047, "learning_rate": 8.949839359439783e-06, "loss": 0.2806432843208313, "step": 4508 }, { "epoch": 1.1141586360266864, "grad_norm": 1.2747837001835631, "learning_rate": 8.94577621233199e-06, "loss": 0.3080585300922394, "step": 4509 }, { "epoch": 1.1144057326414627, "grad_norm": 1.2436511688425755, "learning_rate": 8.941713241216166e-06, "loss": 0.2963334918022156, "step": 4510 }, { "epoch": 1.1146528292562392, "grad_norm": 1.2923212781761906, "learning_rate": 8.937650446770571e-06, "loss": 0.2866866886615753, "step": 4511 }, { "epoch": 1.1148999258710155, "grad_norm": 1.2923386287691783, "learning_rate": 8.933587829673447e-06, "loss": 0.2680962085723877, "step": 4512 }, { "epoch": 1.115147022485792, "grad_norm": 1.4050258543193028, "learning_rate": 8.929525390603013e-06, "loss": 0.28943753242492676, "step": 4513 }, { "epoch": 1.1153941191005683, "grad_norm": 1.4523417819869078, "learning_rate": 8.925463130237446e-06, "loss": 0.32529157400131226, "step": 4514 }, { "epoch": 1.1156412157153448, "grad_norm": 1.262844555398674, "learning_rate": 8.921401049254907e-06, "loss": 0.2535421550273895, "step": 4515 }, { "epoch": 1.115888312330121, "grad_norm": 1.198533699399106, "learning_rate": 8.917339148333511e-06, "loss": 0.2901889383792877, "step": 4516 }, { "epoch": 1.1161354089448974, "grad_norm": 1.5515281617736014, "learning_rate": 8.913277428151359e-06, "loss": 0.29002895951271057, "step": 4517 }, { "epoch": 1.1163825055596739, "grad_norm": 1.463223266289364, "learning_rate": 8.909215889386506e-06, "loss": 0.3372204005718231, "step": 4518 }, { "epoch": 1.1166296021744502, "grad_norm": 1.3743204872150416, "learning_rate": 8.905154532716986e-06, "loss": 0.25997477769851685, "step": 4519 }, { "epoch": 1.1168766987892267, "grad_norm": 1.4174003536987074, "learning_rate": 8.901093358820806e-06, "loss": 0.2637225389480591, "step": 4520 }, { "epoch": 1.117123795404003, "grad_norm": 1.2099576069929239, "learning_rate": 8.897032368375935e-06, "loss": 0.277826189994812, "step": 4521 }, { "epoch": 1.1173708920187793, "grad_norm": 1.4207347380781927, "learning_rate": 8.892971562060315e-06, "loss": 0.33670806884765625, "step": 4522 }, { "epoch": 1.1176179886335558, "grad_norm": 1.4844007733019502, "learning_rate": 8.888910940551856e-06, "loss": 0.3435863256454468, "step": 4523 }, { "epoch": 1.117865085248332, "grad_norm": 1.2762092571825265, "learning_rate": 8.884850504528438e-06, "loss": 0.2820467948913574, "step": 4524 }, { "epoch": 1.1181121818631086, "grad_norm": 1.220128886138057, "learning_rate": 8.880790254667909e-06, "loss": 0.25946366786956787, "step": 4525 }, { "epoch": 1.1183592784778849, "grad_norm": 1.3780044623202214, "learning_rate": 8.876730191648084e-06, "loss": 0.31732553243637085, "step": 4526 }, { "epoch": 1.1186063750926611, "grad_norm": 1.3506109182221253, "learning_rate": 8.872670316146754e-06, "loss": 0.29105329513549805, "step": 4527 }, { "epoch": 1.1188534717074377, "grad_norm": 1.3148403150170667, "learning_rate": 8.868610628841667e-06, "loss": 0.30334049463272095, "step": 4528 }, { "epoch": 1.119100568322214, "grad_norm": 1.2735645725492069, "learning_rate": 8.864551130410557e-06, "loss": 0.2644045948982239, "step": 4529 }, { "epoch": 1.1193476649369904, "grad_norm": 1.323870720382439, "learning_rate": 8.860491821531106e-06, "loss": 0.24534180760383606, "step": 4530 }, { "epoch": 1.1195947615517667, "grad_norm": 1.367962455347342, "learning_rate": 8.856432702880984e-06, "loss": 0.2885446846485138, "step": 4531 }, { "epoch": 1.119841858166543, "grad_norm": 1.1852831792349554, "learning_rate": 8.852373775137812e-06, "loss": 0.25780466198921204, "step": 4532 }, { "epoch": 1.1200889547813195, "grad_norm": 1.3492729283384477, "learning_rate": 8.848315038979186e-06, "loss": 0.32679617404937744, "step": 4533 }, { "epoch": 1.1203360513960958, "grad_norm": 1.3411962552365673, "learning_rate": 8.844256495082678e-06, "loss": 0.32663607597351074, "step": 4534 }, { "epoch": 1.1205831480108723, "grad_norm": 1.3233133184306352, "learning_rate": 8.840198144125814e-06, "loss": 0.2718954086303711, "step": 4535 }, { "epoch": 1.1208302446256486, "grad_norm": 1.2638282117806574, "learning_rate": 8.836139986786099e-06, "loss": 0.3002903461456299, "step": 4536 }, { "epoch": 1.121077341240425, "grad_norm": 1.4249427468374987, "learning_rate": 8.832082023741002e-06, "loss": 0.3102460503578186, "step": 4537 }, { "epoch": 1.1213244378552014, "grad_norm": 1.400817116584848, "learning_rate": 8.828024255667952e-06, "loss": 0.3362698554992676, "step": 4538 }, { "epoch": 1.1215715344699777, "grad_norm": 1.2402921640423705, "learning_rate": 8.823966683244359e-06, "loss": 0.2779204249382019, "step": 4539 }, { "epoch": 1.1218186310847542, "grad_norm": 1.2707076554355665, "learning_rate": 8.819909307147587e-06, "loss": 0.2570480704307556, "step": 4540 }, { "epoch": 1.1220657276995305, "grad_norm": 1.3545926092009268, "learning_rate": 8.81585212805498e-06, "loss": 0.3205685019493103, "step": 4541 }, { "epoch": 1.122312824314307, "grad_norm": 1.2317052023599497, "learning_rate": 8.81179514664384e-06, "loss": 0.26328209042549133, "step": 4542 }, { "epoch": 1.1225599209290833, "grad_norm": 1.4075363915129115, "learning_rate": 8.80773836359144e-06, "loss": 0.31726300716400146, "step": 4543 }, { "epoch": 1.1228070175438596, "grad_norm": 1.3152303393928082, "learning_rate": 8.803681779575021e-06, "loss": 0.29082340002059937, "step": 4544 }, { "epoch": 1.123054114158636, "grad_norm": 1.2377158411795788, "learning_rate": 8.79962539527178e-06, "loss": 0.2634829878807068, "step": 4545 }, { "epoch": 1.1233012107734124, "grad_norm": 1.4947884633399444, "learning_rate": 8.7955692113589e-06, "loss": 0.38168567419052124, "step": 4546 }, { "epoch": 1.1235483073881887, "grad_norm": 1.2258063901775855, "learning_rate": 8.791513228513508e-06, "loss": 0.30484890937805176, "step": 4547 }, { "epoch": 1.1237954040029652, "grad_norm": 1.3806797926750805, "learning_rate": 8.787457447412722e-06, "loss": 0.3209848403930664, "step": 4548 }, { "epoch": 1.1240425006177415, "grad_norm": 1.31299670698741, "learning_rate": 8.783401868733602e-06, "loss": 0.2783809304237366, "step": 4549 }, { "epoch": 1.124289597232518, "grad_norm": 1.6640465008350926, "learning_rate": 8.779346493153195e-06, "loss": 0.24763809144496918, "step": 4550 }, { "epoch": 1.1245366938472943, "grad_norm": 1.3385509946917247, "learning_rate": 8.775291321348502e-06, "loss": 0.30387231707572937, "step": 4551 }, { "epoch": 1.1247837904620708, "grad_norm": 1.3305953064420053, "learning_rate": 8.771236353996487e-06, "loss": 0.2950614094734192, "step": 4552 }, { "epoch": 1.125030887076847, "grad_norm": 1.4287400295950394, "learning_rate": 8.767181591774093e-06, "loss": 0.32362988591194153, "step": 4553 }, { "epoch": 1.1252779836916234, "grad_norm": 1.63043641298063, "learning_rate": 8.763127035358216e-06, "loss": 0.3477396070957184, "step": 4554 }, { "epoch": 1.1255250803063999, "grad_norm": 1.333534253549201, "learning_rate": 8.759072685425728e-06, "loss": 0.3281717300415039, "step": 4555 }, { "epoch": 1.1257721769211761, "grad_norm": 1.2044111336874723, "learning_rate": 8.755018542653454e-06, "loss": 0.2556273937225342, "step": 4556 }, { "epoch": 1.1260192735359524, "grad_norm": 1.6015154495090616, "learning_rate": 8.750964607718202e-06, "loss": 0.27183812856674194, "step": 4557 }, { "epoch": 1.126266370150729, "grad_norm": 1.3303162676986695, "learning_rate": 8.746910881296733e-06, "loss": 0.27940651774406433, "step": 4558 }, { "epoch": 1.1265134667655052, "grad_norm": 1.3942106781530734, "learning_rate": 8.742857364065768e-06, "loss": 0.2877708673477173, "step": 4559 }, { "epoch": 1.1267605633802817, "grad_norm": 1.2454427509166293, "learning_rate": 8.738804056702008e-06, "loss": 0.2515244483947754, "step": 4560 }, { "epoch": 1.127007659995058, "grad_norm": 1.506064421809111, "learning_rate": 8.734750959882106e-06, "loss": 0.28658363223075867, "step": 4561 }, { "epoch": 1.1272547566098345, "grad_norm": 1.4509571415084281, "learning_rate": 8.73069807428269e-06, "loss": 0.31977516412734985, "step": 4562 }, { "epoch": 1.1275018532246108, "grad_norm": 1.3501272257237669, "learning_rate": 8.726645400580345e-06, "loss": 0.3556531071662903, "step": 4563 }, { "epoch": 1.1277489498393871, "grad_norm": 3.448286739237353, "learning_rate": 8.722592939451626e-06, "loss": 0.2523960471153259, "step": 4564 }, { "epoch": 1.1279960464541636, "grad_norm": 1.4316549423239429, "learning_rate": 8.718540691573052e-06, "loss": 0.2756030559539795, "step": 4565 }, { "epoch": 1.12824314306894, "grad_norm": 1.2890073122689654, "learning_rate": 8.714488657621101e-06, "loss": 0.31792375445365906, "step": 4566 }, { "epoch": 1.1284902396837164, "grad_norm": 1.2761527515889992, "learning_rate": 8.710436838272218e-06, "loss": 0.29891520738601685, "step": 4567 }, { "epoch": 1.1287373362984927, "grad_norm": 1.3573273763973126, "learning_rate": 8.706385234202817e-06, "loss": 0.28141358494758606, "step": 4568 }, { "epoch": 1.128984432913269, "grad_norm": 1.2661043658292483, "learning_rate": 8.702333846089268e-06, "loss": 0.2641100287437439, "step": 4569 }, { "epoch": 1.1292315295280455, "grad_norm": 1.3601760268675656, "learning_rate": 8.69828267460791e-06, "loss": 0.3504883646965027, "step": 4570 }, { "epoch": 1.1294786261428218, "grad_norm": 1.2850659054641729, "learning_rate": 8.69423172043505e-06, "loss": 0.279161274433136, "step": 4571 }, { "epoch": 1.1297257227575983, "grad_norm": 1.4431461006850457, "learning_rate": 8.69018098424695e-06, "loss": 0.3353132903575897, "step": 4572 }, { "epoch": 1.1299728193723746, "grad_norm": 1.380937174136925, "learning_rate": 8.68613046671984e-06, "loss": 0.27346187829971313, "step": 4573 }, { "epoch": 1.1302199159871509, "grad_norm": 1.262474540841938, "learning_rate": 8.68208016852991e-06, "loss": 0.28805842995643616, "step": 4574 }, { "epoch": 1.1304670126019274, "grad_norm": 1.4517134785116323, "learning_rate": 8.678030090353317e-06, "loss": 0.2757834196090698, "step": 4575 }, { "epoch": 1.1307141092167037, "grad_norm": 1.2867292590329007, "learning_rate": 8.673980232866183e-06, "loss": 0.2967497706413269, "step": 4576 }, { "epoch": 1.1309612058314802, "grad_norm": 1.3350241752254643, "learning_rate": 8.66993059674459e-06, "loss": 0.2990427017211914, "step": 4577 }, { "epoch": 1.1312083024462565, "grad_norm": 1.450593866910197, "learning_rate": 8.665881182664582e-06, "loss": 0.2584417164325714, "step": 4578 }, { "epoch": 1.131455399061033, "grad_norm": 1.4439428430525774, "learning_rate": 8.661831991302171e-06, "loss": 0.3234679102897644, "step": 4579 }, { "epoch": 1.1317024956758093, "grad_norm": 1.1549121078049933, "learning_rate": 8.657783023333326e-06, "loss": 0.24963854253292084, "step": 4580 }, { "epoch": 1.1319495922905856, "grad_norm": 1.3940895119890646, "learning_rate": 8.653734279433977e-06, "loss": 0.3144843280315399, "step": 4581 }, { "epoch": 1.132196688905362, "grad_norm": 1.354077249712017, "learning_rate": 8.649685760280026e-06, "loss": 0.3207384943962097, "step": 4582 }, { "epoch": 1.1324437855201384, "grad_norm": 1.4030665989420985, "learning_rate": 8.645637466547332e-06, "loss": 0.29313912987709045, "step": 4583 }, { "epoch": 1.1326908821349146, "grad_norm": 1.3919208902179248, "learning_rate": 8.641589398911714e-06, "loss": 0.27320539951324463, "step": 4584 }, { "epoch": 1.1329379787496912, "grad_norm": 1.26950691017593, "learning_rate": 8.637541558048957e-06, "loss": 0.29351139068603516, "step": 4585 }, { "epoch": 1.1331850753644674, "grad_norm": 1.1806036031288092, "learning_rate": 8.633493944634813e-06, "loss": 0.24547207355499268, "step": 4586 }, { "epoch": 1.133432171979244, "grad_norm": 1.3068541247072372, "learning_rate": 8.629446559344978e-06, "loss": 0.27768561244010925, "step": 4587 }, { "epoch": 1.1336792685940202, "grad_norm": 1.3347378217219465, "learning_rate": 8.625399402855129e-06, "loss": 0.28572580218315125, "step": 4588 }, { "epoch": 1.1339263652087967, "grad_norm": 1.2768651243448133, "learning_rate": 8.621352475840898e-06, "loss": 0.28334715962409973, "step": 4589 }, { "epoch": 1.134173461823573, "grad_norm": 1.2073365332408101, "learning_rate": 8.617305778977875e-06, "loss": 0.23837479948997498, "step": 4590 }, { "epoch": 1.1344205584383493, "grad_norm": 1.3583513839067531, "learning_rate": 8.61325931294162e-06, "loss": 0.2959737479686737, "step": 4591 }, { "epoch": 1.1346676550531258, "grad_norm": 1.3348138921289114, "learning_rate": 8.609213078407644e-06, "loss": 0.33210963010787964, "step": 4592 }, { "epoch": 1.1349147516679021, "grad_norm": 1.2681246683146552, "learning_rate": 8.60516707605143e-06, "loss": 0.26880934834480286, "step": 4593 }, { "epoch": 1.1351618482826784, "grad_norm": 1.2711321121365713, "learning_rate": 8.601121306548414e-06, "loss": 0.3034414052963257, "step": 4594 }, { "epoch": 1.135408944897455, "grad_norm": 1.4190727047664178, "learning_rate": 8.597075770573995e-06, "loss": 0.35004931688308716, "step": 4595 }, { "epoch": 1.1356560415122312, "grad_norm": 1.4156619427992545, "learning_rate": 8.593030468803537e-06, "loss": 0.33412420749664307, "step": 4596 }, { "epoch": 1.1359031381270077, "grad_norm": 1.3463882618729202, "learning_rate": 8.588985401912357e-06, "loss": 0.28855404257774353, "step": 4597 }, { "epoch": 1.136150234741784, "grad_norm": 1.6185590161907228, "learning_rate": 8.584940570575746e-06, "loss": 0.3685518503189087, "step": 4598 }, { "epoch": 1.1363973313565605, "grad_norm": 1.5060123672655592, "learning_rate": 8.58089597546894e-06, "loss": 0.34315282106399536, "step": 4599 }, { "epoch": 1.1366444279713368, "grad_norm": 1.1702852803349324, "learning_rate": 8.576851617267151e-06, "loss": 0.2593347430229187, "step": 4600 }, { "epoch": 1.136891524586113, "grad_norm": 1.2304091519693656, "learning_rate": 8.572807496645537e-06, "loss": 0.2715400755405426, "step": 4601 }, { "epoch": 1.1371386212008896, "grad_norm": 1.3336678646756706, "learning_rate": 8.56876361427922e-06, "loss": 0.2371324598789215, "step": 4602 }, { "epoch": 1.1373857178156659, "grad_norm": 1.3244398246600082, "learning_rate": 8.564719970843296e-06, "loss": 0.2906763255596161, "step": 4603 }, { "epoch": 1.1376328144304424, "grad_norm": 1.2829101087679948, "learning_rate": 8.560676567012798e-06, "loss": 0.2571669816970825, "step": 4604 }, { "epoch": 1.1378799110452187, "grad_norm": 1.3715044281264839, "learning_rate": 8.55663340346274e-06, "loss": 0.24712082743644714, "step": 4605 }, { "epoch": 1.138127007659995, "grad_norm": 1.3829233490334494, "learning_rate": 8.552590480868088e-06, "loss": 0.3258591592311859, "step": 4606 }, { "epoch": 1.1383741042747715, "grad_norm": 1.4282333442013984, "learning_rate": 8.54854779990376e-06, "loss": 0.31242918968200684, "step": 4607 }, { "epoch": 1.1386212008895478, "grad_norm": 1.6067271399028542, "learning_rate": 8.544505361244642e-06, "loss": 0.31170523166656494, "step": 4608 }, { "epoch": 1.1388682975043243, "grad_norm": 1.3369615082563358, "learning_rate": 8.54046316556558e-06, "loss": 0.27067336440086365, "step": 4609 }, { "epoch": 1.1391153941191006, "grad_norm": 1.2768325503253422, "learning_rate": 8.536421213541381e-06, "loss": 0.2987285256385803, "step": 4610 }, { "epoch": 1.1393624907338769, "grad_norm": 1.4032308086138412, "learning_rate": 8.532379505846801e-06, "loss": 0.32451093196868896, "step": 4611 }, { "epoch": 1.1396095873486534, "grad_norm": 1.1720237654335355, "learning_rate": 8.528338043156566e-06, "loss": 0.23721349239349365, "step": 4612 }, { "epoch": 1.1398566839634297, "grad_norm": 1.283658744115973, "learning_rate": 8.52429682614536e-06, "loss": 0.24856558442115784, "step": 4613 }, { "epoch": 1.1401037805782062, "grad_norm": 1.6165808079354123, "learning_rate": 8.520255855487816e-06, "loss": 0.27857521176338196, "step": 4614 }, { "epoch": 1.1403508771929824, "grad_norm": 1.3346925867965251, "learning_rate": 8.516215131858538e-06, "loss": 0.30572760105133057, "step": 4615 }, { "epoch": 1.140597973807759, "grad_norm": 1.3441012631929137, "learning_rate": 8.51217465593208e-06, "loss": 0.24496766924858093, "step": 4616 }, { "epoch": 1.1408450704225352, "grad_norm": 1.14425471846876, "learning_rate": 8.508134428382963e-06, "loss": 0.2581389546394348, "step": 4617 }, { "epoch": 1.1410921670373115, "grad_norm": 1.3456547968773056, "learning_rate": 8.50409444988566e-06, "loss": 0.318815141916275, "step": 4618 }, { "epoch": 1.141339263652088, "grad_norm": 1.4654784363151006, "learning_rate": 8.500054721114604e-06, "loss": 0.3464733362197876, "step": 4619 }, { "epoch": 1.1415863602668643, "grad_norm": 1.3434409915395036, "learning_rate": 8.496015242744192e-06, "loss": 0.2958184480667114, "step": 4620 }, { "epoch": 1.1418334568816406, "grad_norm": 1.3097637109443323, "learning_rate": 8.491976015448762e-06, "loss": 0.24784274399280548, "step": 4621 }, { "epoch": 1.1420805534964171, "grad_norm": 1.3107703577255632, "learning_rate": 8.487937039902631e-06, "loss": 0.27525290846824646, "step": 4622 }, { "epoch": 1.1423276501111934, "grad_norm": 1.3736265647767323, "learning_rate": 8.483898316780063e-06, "loss": 0.27332866191864014, "step": 4623 }, { "epoch": 1.14257474672597, "grad_norm": 1.3135247613344596, "learning_rate": 8.479859846755282e-06, "loss": 0.2584916353225708, "step": 4624 }, { "epoch": 1.1428218433407462, "grad_norm": 1.3798075386556679, "learning_rate": 8.475821630502468e-06, "loss": 0.29220402240753174, "step": 4625 }, { "epoch": 1.1430689399555227, "grad_norm": 1.2914919299897853, "learning_rate": 8.471783668695763e-06, "loss": 0.25044703483581543, "step": 4626 }, { "epoch": 1.143316036570299, "grad_norm": 1.2500964765546765, "learning_rate": 8.467745962009266e-06, "loss": 0.2448897659778595, "step": 4627 }, { "epoch": 1.1435631331850753, "grad_norm": 1.290613208048796, "learning_rate": 8.463708511117021e-06, "loss": 0.2815669775009155, "step": 4628 }, { "epoch": 1.1438102297998518, "grad_norm": 1.373314170833153, "learning_rate": 8.45967131669305e-06, "loss": 0.2886815667152405, "step": 4629 }, { "epoch": 1.144057326414628, "grad_norm": 1.4375585397426829, "learning_rate": 8.455634379411314e-06, "loss": 0.27133458852767944, "step": 4630 }, { "epoch": 1.1443044230294044, "grad_norm": 1.4873931933839666, "learning_rate": 8.451597699945744e-06, "loss": 0.34071919322013855, "step": 4631 }, { "epoch": 1.144551519644181, "grad_norm": 1.2605440843935358, "learning_rate": 8.447561278970222e-06, "loss": 0.25312426686286926, "step": 4632 }, { "epoch": 1.1447986162589572, "grad_norm": 1.4311326924529906, "learning_rate": 8.443525117158585e-06, "loss": 0.33479928970336914, "step": 4633 }, { "epoch": 1.1450457128737337, "grad_norm": 1.2496701427450934, "learning_rate": 8.439489215184633e-06, "loss": 0.261515736579895, "step": 4634 }, { "epoch": 1.14529280948851, "grad_norm": 1.5900754776412, "learning_rate": 8.435453573722114e-06, "loss": 0.3301064074039459, "step": 4635 }, { "epoch": 1.1455399061032865, "grad_norm": 1.3754608190622422, "learning_rate": 8.431418193444742e-06, "loss": 0.3411005437374115, "step": 4636 }, { "epoch": 1.1457870027180628, "grad_norm": 1.4236894961903868, "learning_rate": 8.427383075026177e-06, "loss": 0.2759628891944885, "step": 4637 }, { "epoch": 1.146034099332839, "grad_norm": 1.3220969860985632, "learning_rate": 8.423348219140047e-06, "loss": 0.27976077795028687, "step": 4638 }, { "epoch": 1.1462811959476156, "grad_norm": 1.4056679912555736, "learning_rate": 8.419313626459926e-06, "loss": 0.2762782573699951, "step": 4639 }, { "epoch": 1.1465282925623919, "grad_norm": 1.4940812965512984, "learning_rate": 8.415279297659353e-06, "loss": 0.28689610958099365, "step": 4640 }, { "epoch": 1.1467753891771684, "grad_norm": 1.4486881646316006, "learning_rate": 8.411245233411817e-06, "loss": 0.3254487216472626, "step": 4641 }, { "epoch": 1.1470224857919447, "grad_norm": 1.2453927231575823, "learning_rate": 8.40721143439076e-06, "loss": 0.26948264241218567, "step": 4642 }, { "epoch": 1.147269582406721, "grad_norm": 1.490046890295476, "learning_rate": 8.403177901269587e-06, "loss": 0.29963377118110657, "step": 4643 }, { "epoch": 1.1475166790214975, "grad_norm": 1.3720282862197501, "learning_rate": 8.399144634721651e-06, "loss": 0.28522396087646484, "step": 4644 }, { "epoch": 1.1477637756362737, "grad_norm": 1.2473813180664228, "learning_rate": 8.395111635420275e-06, "loss": 0.2834509611129761, "step": 4645 }, { "epoch": 1.1480108722510503, "grad_norm": 1.264427434462576, "learning_rate": 8.391078904038715e-06, "loss": 0.3220484256744385, "step": 4646 }, { "epoch": 1.1482579688658265, "grad_norm": 1.3073532642926158, "learning_rate": 8.387046441250205e-06, "loss": 0.30027222633361816, "step": 4647 }, { "epoch": 1.1485050654806028, "grad_norm": 1.2766342464096765, "learning_rate": 8.38301424772792e-06, "loss": 0.2507430911064148, "step": 4648 }, { "epoch": 1.1487521620953793, "grad_norm": 1.3311248954858739, "learning_rate": 8.37898232414499e-06, "loss": 0.29685157537460327, "step": 4649 }, { "epoch": 1.1489992587101556, "grad_norm": 1.2878331528611784, "learning_rate": 8.374950671174508e-06, "loss": 0.33977407217025757, "step": 4650 }, { "epoch": 1.1492463553249321, "grad_norm": 1.2604915247317336, "learning_rate": 8.370919289489514e-06, "loss": 0.2559809982776642, "step": 4651 }, { "epoch": 1.1494934519397084, "grad_norm": 1.261680746568318, "learning_rate": 8.366888179763012e-06, "loss": 0.2804662585258484, "step": 4652 }, { "epoch": 1.1497405485544847, "grad_norm": 1.3307275572817465, "learning_rate": 8.362857342667948e-06, "loss": 0.28439390659332275, "step": 4653 }, { "epoch": 1.1499876451692612, "grad_norm": 1.3584280216446654, "learning_rate": 8.358826778877235e-06, "loss": 0.33359432220458984, "step": 4654 }, { "epoch": 1.1502347417840375, "grad_norm": 1.3301555377267844, "learning_rate": 8.354796489063736e-06, "loss": 0.28967511653900146, "step": 4655 }, { "epoch": 1.150481838398814, "grad_norm": 1.4129397928868503, "learning_rate": 8.35076647390026e-06, "loss": 0.3178577423095703, "step": 4656 }, { "epoch": 1.1507289350135903, "grad_norm": 1.3822352672443112, "learning_rate": 8.346736734059578e-06, "loss": 0.29032307863235474, "step": 4657 }, { "epoch": 1.1509760316283666, "grad_norm": 1.231288308628217, "learning_rate": 8.342707270214418e-06, "loss": 0.25629952549934387, "step": 4658 }, { "epoch": 1.151223128243143, "grad_norm": 1.2527689198722012, "learning_rate": 8.338678083037459e-06, "loss": 0.24084064364433289, "step": 4659 }, { "epoch": 1.1514702248579194, "grad_norm": 1.4809268473711983, "learning_rate": 8.334649173201328e-06, "loss": 0.32818901538848877, "step": 4660 }, { "epoch": 1.151717321472696, "grad_norm": 1.3869687678657847, "learning_rate": 8.330620541378614e-06, "loss": 0.2841421961784363, "step": 4661 }, { "epoch": 1.1519644180874722, "grad_norm": 1.4209906127949479, "learning_rate": 8.32659218824186e-06, "loss": 0.31349584460258484, "step": 4662 }, { "epoch": 1.1522115147022487, "grad_norm": 1.8472655253761778, "learning_rate": 8.322564114463552e-06, "loss": 0.28718096017837524, "step": 4663 }, { "epoch": 1.152458611317025, "grad_norm": 1.3195196466586914, "learning_rate": 8.318536320716134e-06, "loss": 0.32445693016052246, "step": 4664 }, { "epoch": 1.1527057079318013, "grad_norm": 1.281526432155879, "learning_rate": 8.314508807672012e-06, "loss": 0.30113768577575684, "step": 4665 }, { "epoch": 1.1529528045465778, "grad_norm": 1.3988314798006913, "learning_rate": 8.310481576003537e-06, "loss": 0.33560335636138916, "step": 4666 }, { "epoch": 1.153199901161354, "grad_norm": 1.3766950878828543, "learning_rate": 8.306454626383011e-06, "loss": 0.3117712736129761, "step": 4667 }, { "epoch": 1.1534469977761304, "grad_norm": 1.4206886372354852, "learning_rate": 8.302427959482696e-06, "loss": 0.3356424868106842, "step": 4668 }, { "epoch": 1.1536940943909069, "grad_norm": 1.2146705082469849, "learning_rate": 8.298401575974806e-06, "loss": 0.29219964146614075, "step": 4669 }, { "epoch": 1.1539411910056832, "grad_norm": 1.5725445440850474, "learning_rate": 8.294375476531496e-06, "loss": 0.3206666111946106, "step": 4670 }, { "epoch": 1.1541882876204597, "grad_norm": 1.1892399344883886, "learning_rate": 8.290349661824887e-06, "loss": 0.2536889910697937, "step": 4671 }, { "epoch": 1.154435384235236, "grad_norm": 1.304522652420871, "learning_rate": 8.286324132527049e-06, "loss": 0.2707456946372986, "step": 4672 }, { "epoch": 1.1546824808500125, "grad_norm": 1.2480163656424639, "learning_rate": 8.282298889310001e-06, "loss": 0.28059032559394836, "step": 4673 }, { "epoch": 1.1549295774647887, "grad_norm": 1.3635335740212289, "learning_rate": 8.27827393284572e-06, "loss": 0.2745121419429779, "step": 4674 }, { "epoch": 1.155176674079565, "grad_norm": 1.26155578237076, "learning_rate": 8.274249263806131e-06, "loss": 0.24337460100650787, "step": 4675 }, { "epoch": 1.1554237706943415, "grad_norm": 1.4399710690675742, "learning_rate": 8.270224882863107e-06, "loss": 0.2614983916282654, "step": 4676 }, { "epoch": 1.1556708673091178, "grad_norm": 1.4915356742965216, "learning_rate": 8.266200790688484e-06, "loss": 0.3623155355453491, "step": 4677 }, { "epoch": 1.1559179639238941, "grad_norm": 1.4096315812697766, "learning_rate": 8.262176987954035e-06, "loss": 0.3291287124156952, "step": 4678 }, { "epoch": 1.1561650605386706, "grad_norm": 1.2612937064307426, "learning_rate": 8.258153475331501e-06, "loss": 0.29306021332740784, "step": 4679 }, { "epoch": 1.156412157153447, "grad_norm": 1.2256628634137934, "learning_rate": 8.254130253492561e-06, "loss": 0.2746864855289459, "step": 4680 }, { "epoch": 1.1566592537682234, "grad_norm": 1.3633804090802144, "learning_rate": 8.250107323108858e-06, "loss": 0.33513766527175903, "step": 4681 }, { "epoch": 1.1569063503829997, "grad_norm": 1.3011749381409585, "learning_rate": 8.246084684851974e-06, "loss": 0.330264687538147, "step": 4682 }, { "epoch": 1.1571534469977762, "grad_norm": 1.317961691679311, "learning_rate": 8.242062339393447e-06, "loss": 0.3168051242828369, "step": 4683 }, { "epoch": 1.1574005436125525, "grad_norm": 1.303876126215111, "learning_rate": 8.23804028740477e-06, "loss": 0.28179287910461426, "step": 4684 }, { "epoch": 1.1576476402273288, "grad_norm": 1.2744532030180333, "learning_rate": 8.234018529557379e-06, "loss": 0.25839120149612427, "step": 4685 }, { "epoch": 1.1578947368421053, "grad_norm": 1.480439256337295, "learning_rate": 8.229997066522672e-06, "loss": 0.28390803933143616, "step": 4686 }, { "epoch": 1.1581418334568816, "grad_norm": 1.4327795942594397, "learning_rate": 8.225975898971987e-06, "loss": 0.31976574659347534, "step": 4687 }, { "epoch": 1.158388930071658, "grad_norm": 1.217786593033675, "learning_rate": 8.221955027576618e-06, "loss": 0.3077399730682373, "step": 4688 }, { "epoch": 1.1586360266864344, "grad_norm": 1.186360168026747, "learning_rate": 8.217934453007814e-06, "loss": 0.26516613364219666, "step": 4689 }, { "epoch": 1.1588831233012107, "grad_norm": 1.2165095260283436, "learning_rate": 8.213914175936757e-06, "loss": 0.250136137008667, "step": 4690 }, { "epoch": 1.1591302199159872, "grad_norm": 1.3327316894157466, "learning_rate": 8.209894197034603e-06, "loss": 0.2622452676296234, "step": 4691 }, { "epoch": 1.1593773165307635, "grad_norm": 1.4184081638114592, "learning_rate": 8.205874516972439e-06, "loss": 0.2827901840209961, "step": 4692 }, { "epoch": 1.15962441314554, "grad_norm": 1.26915786810235, "learning_rate": 8.201855136421314e-06, "loss": 0.26389962434768677, "step": 4693 }, { "epoch": 1.1598715097603163, "grad_norm": 1.2439542290977796, "learning_rate": 8.197836056052222e-06, "loss": 0.2530316710472107, "step": 4694 }, { "epoch": 1.1601186063750926, "grad_norm": 1.3516255956909495, "learning_rate": 8.193817276536109e-06, "loss": 0.3238978981971741, "step": 4695 }, { "epoch": 1.160365702989869, "grad_norm": 1.4005794034107175, "learning_rate": 8.18979879854387e-06, "loss": 0.3330468535423279, "step": 4696 }, { "epoch": 1.1606127996046454, "grad_norm": 1.9221155223467141, "learning_rate": 8.185780622746342e-06, "loss": 0.26934003829956055, "step": 4697 }, { "epoch": 1.1608598962194219, "grad_norm": 1.3226537656844402, "learning_rate": 8.181762749814325e-06, "loss": 0.2925993502140045, "step": 4698 }, { "epoch": 1.1611069928341982, "grad_norm": 1.3507007547422127, "learning_rate": 8.177745180418561e-06, "loss": 0.3199980854988098, "step": 4699 }, { "epoch": 1.1613540894489747, "grad_norm": 1.340910875848976, "learning_rate": 8.173727915229745e-06, "loss": 0.3120495676994324, "step": 4700 }, { "epoch": 1.161601186063751, "grad_norm": 1.2699126193568075, "learning_rate": 8.169710954918512e-06, "loss": 0.220820814371109, "step": 4701 }, { "epoch": 1.1618482826785272, "grad_norm": 1.254207511905937, "learning_rate": 8.165694300155458e-06, "loss": 0.28125888109207153, "step": 4702 }, { "epoch": 1.1620953792933038, "grad_norm": 1.2549568934344133, "learning_rate": 8.161677951611125e-06, "loss": 0.24166953563690186, "step": 4703 }, { "epoch": 1.16234247590808, "grad_norm": 1.5340833684381345, "learning_rate": 8.157661909955994e-06, "loss": 0.348810613155365, "step": 4704 }, { "epoch": 1.1625895725228563, "grad_norm": 1.2818164199125097, "learning_rate": 8.153646175860508e-06, "loss": 0.3322192430496216, "step": 4705 }, { "epoch": 1.1628366691376328, "grad_norm": 1.165499133364504, "learning_rate": 8.149630749995048e-06, "loss": 0.28918057680130005, "step": 4706 }, { "epoch": 1.1630837657524091, "grad_norm": 1.3915019781114464, "learning_rate": 8.145615633029956e-06, "loss": 0.28593650460243225, "step": 4707 }, { "epoch": 1.1633308623671856, "grad_norm": 1.3843427973161935, "learning_rate": 8.14160082563551e-06, "loss": 0.2831883430480957, "step": 4708 }, { "epoch": 1.163577958981962, "grad_norm": 1.2486168536286224, "learning_rate": 8.137586328481942e-06, "loss": 0.2571483254432678, "step": 4709 }, { "epoch": 1.1638250555967384, "grad_norm": 1.3305323010456958, "learning_rate": 8.133572142239434e-06, "loss": 0.265206515789032, "step": 4710 }, { "epoch": 1.1640721522115147, "grad_norm": 1.3567305570482884, "learning_rate": 8.129558267578109e-06, "loss": 0.25898832082748413, "step": 4711 }, { "epoch": 1.164319248826291, "grad_norm": 1.3687671658760248, "learning_rate": 8.125544705168046e-06, "loss": 0.2506851851940155, "step": 4712 }, { "epoch": 1.1645663454410675, "grad_norm": 1.325912361603354, "learning_rate": 8.121531455679267e-06, "loss": 0.29522445797920227, "step": 4713 }, { "epoch": 1.1648134420558438, "grad_norm": 1.4736425285782333, "learning_rate": 8.117518519781747e-06, "loss": 0.2705400586128235, "step": 4714 }, { "epoch": 1.16506053867062, "grad_norm": 1.3913556788976524, "learning_rate": 8.113505898145397e-06, "loss": 0.28444069623947144, "step": 4715 }, { "epoch": 1.1653076352853966, "grad_norm": 1.3276068664555183, "learning_rate": 8.109493591440091e-06, "loss": 0.2667485475540161, "step": 4716 }, { "epoch": 1.165554731900173, "grad_norm": 1.4891728412262255, "learning_rate": 8.105481600335643e-06, "loss": 0.3084794878959656, "step": 4717 }, { "epoch": 1.1658018285149494, "grad_norm": 1.5129429799246104, "learning_rate": 8.101469925501809e-06, "loss": 0.29004010558128357, "step": 4718 }, { "epoch": 1.1660489251297257, "grad_norm": 1.1958250781073132, "learning_rate": 8.0974585676083e-06, "loss": 0.22150641679763794, "step": 4719 }, { "epoch": 1.1662960217445022, "grad_norm": 1.5374278573962885, "learning_rate": 8.093447527324769e-06, "loss": 0.3274838924407959, "step": 4720 }, { "epoch": 1.1665431183592785, "grad_norm": 1.2801730697534832, "learning_rate": 8.089436805320823e-06, "loss": 0.3087577223777771, "step": 4721 }, { "epoch": 1.1667902149740548, "grad_norm": 1.2989354271199325, "learning_rate": 8.085426402266008e-06, "loss": 0.2979123592376709, "step": 4722 }, { "epoch": 1.1670373115888313, "grad_norm": 1.49917691084595, "learning_rate": 8.081416318829822e-06, "loss": 0.27845585346221924, "step": 4723 }, { "epoch": 1.1672844082036076, "grad_norm": 1.360181813426066, "learning_rate": 8.077406555681711e-06, "loss": 0.32995352149009705, "step": 4724 }, { "epoch": 1.167531504818384, "grad_norm": 1.4516121937891102, "learning_rate": 8.073397113491054e-06, "loss": 0.2758169174194336, "step": 4725 }, { "epoch": 1.1677786014331604, "grad_norm": 1.3782429054617127, "learning_rate": 8.069387992927195e-06, "loss": 0.28765639662742615, "step": 4726 }, { "epoch": 1.1680256980479367, "grad_norm": 1.2923194281758055, "learning_rate": 8.065379194659413e-06, "loss": 0.25613412261009216, "step": 4727 }, { "epoch": 1.1682727946627132, "grad_norm": 1.6323284069565356, "learning_rate": 8.061370719356939e-06, "loss": 0.28207632899284363, "step": 4728 }, { "epoch": 1.1685198912774895, "grad_norm": 1.3046242274091289, "learning_rate": 8.057362567688941e-06, "loss": 0.3108273148536682, "step": 4729 }, { "epoch": 1.168766987892266, "grad_norm": 1.2250477532277873, "learning_rate": 8.053354740324549e-06, "loss": 0.24482038617134094, "step": 4730 }, { "epoch": 1.1690140845070423, "grad_norm": 1.4430435554774277, "learning_rate": 8.049347237932823e-06, "loss": 0.3332192301750183, "step": 4731 }, { "epoch": 1.1692611811218185, "grad_norm": 1.4278033577857299, "learning_rate": 8.045340061182772e-06, "loss": 0.3043026328086853, "step": 4732 }, { "epoch": 1.169508277736595, "grad_norm": 1.1815111477068707, "learning_rate": 8.04133321074336e-06, "loss": 0.28242647647857666, "step": 4733 }, { "epoch": 1.1697553743513713, "grad_norm": 1.3560235077251959, "learning_rate": 8.037326687283483e-06, "loss": 0.3256124258041382, "step": 4734 }, { "epoch": 1.1700024709661478, "grad_norm": 1.3608746073065976, "learning_rate": 8.033320491471997e-06, "loss": 0.3102571964263916, "step": 4735 }, { "epoch": 1.1702495675809241, "grad_norm": 1.2755167096169027, "learning_rate": 8.02931462397769e-06, "loss": 0.2487679421901703, "step": 4736 }, { "epoch": 1.1704966641957006, "grad_norm": 1.2669999690534093, "learning_rate": 8.025309085469304e-06, "loss": 0.2677372097969055, "step": 4737 }, { "epoch": 1.170743760810477, "grad_norm": 1.3415764018491747, "learning_rate": 8.021303876615524e-06, "loss": 0.27933454513549805, "step": 4738 }, { "epoch": 1.1709908574252532, "grad_norm": 1.2191493920954821, "learning_rate": 8.017298998084975e-06, "loss": 0.24838873744010925, "step": 4739 }, { "epoch": 1.1712379540400297, "grad_norm": 1.3190411326796405, "learning_rate": 8.013294450546232e-06, "loss": 0.27867820858955383, "step": 4740 }, { "epoch": 1.171485050654806, "grad_norm": 1.331557535570468, "learning_rate": 8.009290234667814e-06, "loss": 0.3182458281517029, "step": 4741 }, { "epoch": 1.1717321472695823, "grad_norm": 1.2270961572401744, "learning_rate": 8.005286351118187e-06, "loss": 0.22746916115283966, "step": 4742 }, { "epoch": 1.1719792438843588, "grad_norm": 1.4994439269027802, "learning_rate": 8.001282800565753e-06, "loss": 0.32700830698013306, "step": 4743 }, { "epoch": 1.172226340499135, "grad_norm": 1.368131481125418, "learning_rate": 7.99727958367887e-06, "loss": 0.3102335035800934, "step": 4744 }, { "epoch": 1.1724734371139116, "grad_norm": 1.3369744671758168, "learning_rate": 7.993276701125835e-06, "loss": 0.33393195271492004, "step": 4745 }, { "epoch": 1.172720533728688, "grad_norm": 1.3994508072452267, "learning_rate": 7.989274153574882e-06, "loss": 0.2791900634765625, "step": 4746 }, { "epoch": 1.1729676303434644, "grad_norm": 1.4004583551296883, "learning_rate": 7.985271941694195e-06, "loss": 0.2720668315887451, "step": 4747 }, { "epoch": 1.1732147269582407, "grad_norm": 1.2730238017549065, "learning_rate": 7.98127006615191e-06, "loss": 0.2711752951145172, "step": 4748 }, { "epoch": 1.173461823573017, "grad_norm": 1.334169958277765, "learning_rate": 7.977268527616098e-06, "loss": 0.29770737886428833, "step": 4749 }, { "epoch": 1.1737089201877935, "grad_norm": 1.3363403569511574, "learning_rate": 7.97326732675477e-06, "loss": 0.3106342554092407, "step": 4750 }, { "epoch": 1.1739560168025698, "grad_norm": 1.2112156591234118, "learning_rate": 7.969266464235896e-06, "loss": 0.25681546330451965, "step": 4751 }, { "epoch": 1.174203113417346, "grad_norm": 1.2264727008695913, "learning_rate": 7.965265940727367e-06, "loss": 0.26430466771125793, "step": 4752 }, { "epoch": 1.1744502100321226, "grad_norm": 1.2565010612778644, "learning_rate": 7.96126575689704e-06, "loss": 0.2617204487323761, "step": 4753 }, { "epoch": 1.1746973066468989, "grad_norm": 1.384930938750976, "learning_rate": 7.957265913412693e-06, "loss": 0.3027562201023102, "step": 4754 }, { "epoch": 1.1749444032616754, "grad_norm": 1.3954455636311451, "learning_rate": 7.953266410942074e-06, "loss": 0.3571445941925049, "step": 4755 }, { "epoch": 1.1751914998764517, "grad_norm": 1.4638168291341682, "learning_rate": 7.949267250152846e-06, "loss": 0.3158910870552063, "step": 4756 }, { "epoch": 1.1754385964912282, "grad_norm": 1.4028014379880818, "learning_rate": 7.945268431712641e-06, "loss": 0.30793270468711853, "step": 4757 }, { "epoch": 1.1756856931060045, "grad_norm": 1.4972118331577153, "learning_rate": 7.941269956289013e-06, "loss": 0.2657829821109772, "step": 4758 }, { "epoch": 1.1759327897207807, "grad_norm": 1.4590717535149356, "learning_rate": 7.937271824549466e-06, "loss": 0.32944726943969727, "step": 4759 }, { "epoch": 1.1761798863355573, "grad_norm": 1.3752769778732512, "learning_rate": 7.933274037161453e-06, "loss": 0.27707794308662415, "step": 4760 }, { "epoch": 1.1764269829503335, "grad_norm": 1.432439078690917, "learning_rate": 7.929276594792358e-06, "loss": 0.301121324300766, "step": 4761 }, { "epoch": 1.17667407956511, "grad_norm": 1.3602436363139705, "learning_rate": 7.92527949810952e-06, "loss": 0.27349773049354553, "step": 4762 }, { "epoch": 1.1769211761798863, "grad_norm": 1.305960207628808, "learning_rate": 7.921282747780206e-06, "loss": 0.2906264066696167, "step": 4763 }, { "epoch": 1.1771682727946626, "grad_norm": 1.2548785413180248, "learning_rate": 7.91728634447164e-06, "loss": 0.2730150520801544, "step": 4764 }, { "epoch": 1.1774153694094391, "grad_norm": 1.178286112091912, "learning_rate": 7.913290288850981e-06, "loss": 0.251379132270813, "step": 4765 }, { "epoch": 1.1776624660242154, "grad_norm": 1.302866338273795, "learning_rate": 7.909294581585321e-06, "loss": 0.27267181873321533, "step": 4766 }, { "epoch": 1.177909562638992, "grad_norm": 1.3601633672833442, "learning_rate": 7.905299223341713e-06, "loss": 0.2530267536640167, "step": 4767 }, { "epoch": 1.1781566592537682, "grad_norm": 1.272403180650887, "learning_rate": 7.901304214787134e-06, "loss": 0.2570621073246002, "step": 4768 }, { "epoch": 1.1784037558685445, "grad_norm": 1.5009888417001354, "learning_rate": 7.897309556588517e-06, "loss": 0.29854267835617065, "step": 4769 }, { "epoch": 1.178650852483321, "grad_norm": 1.358976829068841, "learning_rate": 7.893315249412723e-06, "loss": 0.30347731709480286, "step": 4770 }, { "epoch": 1.1788979490980973, "grad_norm": 1.484844208858917, "learning_rate": 7.889321293926567e-06, "loss": 0.3417311906814575, "step": 4771 }, { "epoch": 1.1791450457128738, "grad_norm": 1.2678795251003623, "learning_rate": 7.885327690796796e-06, "loss": 0.23978564143180847, "step": 4772 }, { "epoch": 1.17939214232765, "grad_norm": 1.2562250887790747, "learning_rate": 7.8813344406901e-06, "loss": 0.25511634349823, "step": 4773 }, { "epoch": 1.1796392389424264, "grad_norm": 1.313615038850844, "learning_rate": 7.877341544273115e-06, "loss": 0.31105750799179077, "step": 4774 }, { "epoch": 1.179886335557203, "grad_norm": 1.2397283991849166, "learning_rate": 7.873349002212411e-06, "loss": 0.2656004726886749, "step": 4775 }, { "epoch": 1.1801334321719792, "grad_norm": 1.8537085117417125, "learning_rate": 7.869356815174508e-06, "loss": 0.27055448293685913, "step": 4776 }, { "epoch": 1.1803805287867557, "grad_norm": 1.4083569593455127, "learning_rate": 7.865364983825854e-06, "loss": 0.299424946308136, "step": 4777 }, { "epoch": 1.180627625401532, "grad_norm": 1.4525899314731092, "learning_rate": 7.86137350883285e-06, "loss": 0.3445091247558594, "step": 4778 }, { "epoch": 1.1808747220163083, "grad_norm": 1.5230806195786608, "learning_rate": 7.857382390861832e-06, "loss": 0.3126199543476105, "step": 4779 }, { "epoch": 1.1811218186310848, "grad_norm": 1.458870216533124, "learning_rate": 7.853391630579072e-06, "loss": 0.3388117849826813, "step": 4780 }, { "epoch": 1.181368915245861, "grad_norm": 1.2767374546388277, "learning_rate": 7.849401228650792e-06, "loss": 0.29297029972076416, "step": 4781 }, { "epoch": 1.1816160118606376, "grad_norm": 1.2541739290746476, "learning_rate": 7.845411185743146e-06, "loss": 0.23404166102409363, "step": 4782 }, { "epoch": 1.1818631084754139, "grad_norm": 1.2093633564352335, "learning_rate": 7.841421502522233e-06, "loss": 0.26581335067749023, "step": 4783 }, { "epoch": 1.1821102050901904, "grad_norm": 1.3711266772116428, "learning_rate": 7.837432179654087e-06, "loss": 0.3825363516807556, "step": 4784 }, { "epoch": 1.1823573017049667, "grad_norm": 1.3114530805908795, "learning_rate": 7.833443217804691e-06, "loss": 0.30486592650413513, "step": 4785 }, { "epoch": 1.182604398319743, "grad_norm": 1.2933949195637486, "learning_rate": 7.82945461763996e-06, "loss": 0.3234826326370239, "step": 4786 }, { "epoch": 1.1828514949345195, "grad_norm": 1.2634904072646207, "learning_rate": 7.825466379825744e-06, "loss": 0.28605759143829346, "step": 4787 }, { "epoch": 1.1830985915492958, "grad_norm": 1.2704879747966988, "learning_rate": 7.821478505027844e-06, "loss": 0.30562153458595276, "step": 4788 }, { "epoch": 1.183345688164072, "grad_norm": 1.4797049268468547, "learning_rate": 7.817490993911994e-06, "loss": 0.3201177716255188, "step": 4789 }, { "epoch": 1.1835927847788486, "grad_norm": 1.2286273080953394, "learning_rate": 7.81350384714387e-06, "loss": 0.2865544855594635, "step": 4790 }, { "epoch": 1.1838398813936248, "grad_norm": 1.350454703352465, "learning_rate": 7.809517065389084e-06, "loss": 0.2824997901916504, "step": 4791 }, { "epoch": 1.1840869780084013, "grad_norm": 1.422062188726198, "learning_rate": 7.80553064931319e-06, "loss": 0.2893778681755066, "step": 4792 }, { "epoch": 1.1843340746231776, "grad_norm": 1.4510899746496053, "learning_rate": 7.80154459958168e-06, "loss": 0.32281237840652466, "step": 4793 }, { "epoch": 1.1845811712379541, "grad_norm": 1.3293190755208568, "learning_rate": 7.797558916859982e-06, "loss": 0.27551382780075073, "step": 4794 }, { "epoch": 1.1848282678527304, "grad_norm": 1.390478622983903, "learning_rate": 7.793573601813467e-06, "loss": 0.24962444603443146, "step": 4795 }, { "epoch": 1.1850753644675067, "grad_norm": 1.2903533024999327, "learning_rate": 7.789588655107441e-06, "loss": 0.3114962875843048, "step": 4796 }, { "epoch": 1.1853224610822832, "grad_norm": 1.296144544989258, "learning_rate": 7.785604077407154e-06, "loss": 0.2679755687713623, "step": 4797 }, { "epoch": 1.1855695576970595, "grad_norm": 1.2286007867333668, "learning_rate": 7.781619869377789e-06, "loss": 0.26899176836013794, "step": 4798 }, { "epoch": 1.1858166543118358, "grad_norm": 1.3132742383184468, "learning_rate": 7.77763603168447e-06, "loss": 0.2613127827644348, "step": 4799 }, { "epoch": 1.1860637509266123, "grad_norm": 1.3903891812163856, "learning_rate": 7.773652564992259e-06, "loss": 0.34271612763404846, "step": 4800 }, { "epoch": 1.1863108475413886, "grad_norm": 1.4566472119587752, "learning_rate": 7.76966946996615e-06, "loss": 0.27463600039482117, "step": 4801 }, { "epoch": 1.1865579441561651, "grad_norm": 1.3926668129230457, "learning_rate": 7.765686747271085e-06, "loss": 0.25725680589675903, "step": 4802 }, { "epoch": 1.1868050407709414, "grad_norm": 1.79365285308468, "learning_rate": 7.761704397571936e-06, "loss": 0.33389025926589966, "step": 4803 }, { "epoch": 1.187052137385718, "grad_norm": 1.4485258629347124, "learning_rate": 7.75772242153352e-06, "loss": 0.31070676445961, "step": 4804 }, { "epoch": 1.1872992340004942, "grad_norm": 1.2418823155105014, "learning_rate": 7.753740819820585e-06, "loss": 0.19545264542102814, "step": 4805 }, { "epoch": 1.1875463306152705, "grad_norm": 1.7862171010048022, "learning_rate": 7.74975959309782e-06, "loss": 0.2867152690887451, "step": 4806 }, { "epoch": 1.187793427230047, "grad_norm": 1.4065159397197204, "learning_rate": 7.745778742029852e-06, "loss": 0.28626635670661926, "step": 4807 }, { "epoch": 1.1880405238448233, "grad_norm": 1.3423995885431388, "learning_rate": 7.741798267281239e-06, "loss": 0.25966259837150574, "step": 4808 }, { "epoch": 1.1882876204595998, "grad_norm": 1.4850929770154888, "learning_rate": 7.737818169516483e-06, "loss": 0.28266826272010803, "step": 4809 }, { "epoch": 1.188534717074376, "grad_norm": 1.2533562948055523, "learning_rate": 7.733838449400021e-06, "loss": 0.2714747488498688, "step": 4810 }, { "epoch": 1.1887818136891524, "grad_norm": 1.701278772028037, "learning_rate": 7.729859107596228e-06, "loss": 0.30970245599746704, "step": 4811 }, { "epoch": 1.1890289103039289, "grad_norm": 1.4751769322775692, "learning_rate": 7.725880144769412e-06, "loss": 0.32125383615493774, "step": 4812 }, { "epoch": 1.1892760069187052, "grad_norm": 1.228401084679429, "learning_rate": 7.721901561583824e-06, "loss": 0.2374611794948578, "step": 4813 }, { "epoch": 1.1895231035334817, "grad_norm": 1.4552153184182535, "learning_rate": 7.71792335870365e-06, "loss": 0.2808373272418976, "step": 4814 }, { "epoch": 1.189770200148258, "grad_norm": 1.1226977711698383, "learning_rate": 7.713945536793e-06, "loss": 0.2830045819282532, "step": 4815 }, { "epoch": 1.1900172967630342, "grad_norm": 1.1944289998395456, "learning_rate": 7.709968096515943e-06, "loss": 0.2470424324274063, "step": 4816 }, { "epoch": 1.1902643933778108, "grad_norm": 1.4226203325849618, "learning_rate": 7.705991038536463e-06, "loss": 0.30680108070373535, "step": 4817 }, { "epoch": 1.190511489992587, "grad_norm": 1.3499358324522017, "learning_rate": 7.702014363518497e-06, "loss": 0.3191997706890106, "step": 4818 }, { "epoch": 1.1907585866073636, "grad_norm": 1.4835882424044073, "learning_rate": 7.698038072125905e-06, "loss": 0.31779298186302185, "step": 4819 }, { "epoch": 1.1910056832221398, "grad_norm": 1.4806890555497003, "learning_rate": 7.694062165022495e-06, "loss": 0.29767757654190063, "step": 4820 }, { "epoch": 1.1912527798369164, "grad_norm": 1.2824577648358386, "learning_rate": 7.690086642871997e-06, "loss": 0.27588194608688354, "step": 4821 }, { "epoch": 1.1914998764516926, "grad_norm": 1.4311839867308178, "learning_rate": 7.686111506338086e-06, "loss": 0.2951726019382477, "step": 4822 }, { "epoch": 1.191746973066469, "grad_norm": 1.241223509885725, "learning_rate": 7.682136756084373e-06, "loss": 0.29899895191192627, "step": 4823 }, { "epoch": 1.1919940696812454, "grad_norm": 1.415785258893351, "learning_rate": 7.6781623927744e-06, "loss": 0.28333041071891785, "step": 4824 }, { "epoch": 1.1922411662960217, "grad_norm": 1.2812995535055374, "learning_rate": 7.674188417071648e-06, "loss": 0.24764159321784973, "step": 4825 }, { "epoch": 1.192488262910798, "grad_norm": 1.2848110921364657, "learning_rate": 7.67021482963953e-06, "loss": 0.27131929993629456, "step": 4826 }, { "epoch": 1.1927353595255745, "grad_norm": 1.3514306745574198, "learning_rate": 7.666241631141403e-06, "loss": 0.25572237372398376, "step": 4827 }, { "epoch": 1.1929824561403508, "grad_norm": 1.1968265558093825, "learning_rate": 7.662268822240546e-06, "loss": 0.24383148550987244, "step": 4828 }, { "epoch": 1.1932295527551273, "grad_norm": 1.3610211466590891, "learning_rate": 7.658296403600174e-06, "loss": 0.3067585825920105, "step": 4829 }, { "epoch": 1.1934766493699036, "grad_norm": 1.3155603914101386, "learning_rate": 7.654324375883453e-06, "loss": 0.266247034072876, "step": 4830 }, { "epoch": 1.1937237459846801, "grad_norm": 1.37794142950794, "learning_rate": 7.650352739753464e-06, "loss": 0.26283636689186096, "step": 4831 }, { "epoch": 1.1939708425994564, "grad_norm": 1.2940565726863928, "learning_rate": 7.646381495873236e-06, "loss": 0.24273662269115448, "step": 4832 }, { "epoch": 1.1942179392142327, "grad_norm": 1.4528594764003557, "learning_rate": 7.642410644905726e-06, "loss": 0.30069828033447266, "step": 4833 }, { "epoch": 1.1944650358290092, "grad_norm": 1.374046088839809, "learning_rate": 7.638440187513835e-06, "loss": 0.34082043170928955, "step": 4834 }, { "epoch": 1.1947121324437855, "grad_norm": 1.4873546819613008, "learning_rate": 7.634470124360377e-06, "loss": 0.3165174424648285, "step": 4835 }, { "epoch": 1.1949592290585618, "grad_norm": 1.5071400171392089, "learning_rate": 7.63050045610812e-06, "loss": 0.3145681917667389, "step": 4836 }, { "epoch": 1.1952063256733383, "grad_norm": 1.3118059375340352, "learning_rate": 7.626531183419762e-06, "loss": 0.32901132106781006, "step": 4837 }, { "epoch": 1.1954534222881146, "grad_norm": 1.3919824717255975, "learning_rate": 7.622562306957929e-06, "loss": 0.28842610120773315, "step": 4838 }, { "epoch": 1.195700518902891, "grad_norm": 2.118057661488321, "learning_rate": 7.618593827385188e-06, "loss": 0.33843904733657837, "step": 4839 }, { "epoch": 1.1959476155176674, "grad_norm": 1.2873740623132834, "learning_rate": 7.614625745364034e-06, "loss": 0.27839604020118713, "step": 4840 }, { "epoch": 1.1961947121324439, "grad_norm": 1.4040049790809879, "learning_rate": 7.610658061556905e-06, "loss": 0.3619431257247925, "step": 4841 }, { "epoch": 1.1964418087472202, "grad_norm": 1.315878496889071, "learning_rate": 7.606690776626153e-06, "loss": 0.2698574960231781, "step": 4842 }, { "epoch": 1.1966889053619965, "grad_norm": 1.357513122869558, "learning_rate": 7.602723891234085e-06, "loss": 0.2592017352581024, "step": 4843 }, { "epoch": 1.196936001976773, "grad_norm": 1.3314618613024078, "learning_rate": 7.598757406042927e-06, "loss": 0.31920337677001953, "step": 4844 }, { "epoch": 1.1971830985915493, "grad_norm": 1.2669107146565382, "learning_rate": 7.594791321714848e-06, "loss": 0.2845652401447296, "step": 4845 }, { "epoch": 1.1974301952063258, "grad_norm": 1.3616523191623862, "learning_rate": 7.590825638911943e-06, "loss": 0.2668483257293701, "step": 4846 }, { "epoch": 1.197677291821102, "grad_norm": 1.42733625371057, "learning_rate": 7.586860358296244e-06, "loss": 0.3241117596626282, "step": 4847 }, { "epoch": 1.1979243884358783, "grad_norm": 1.2673125180937963, "learning_rate": 7.582895480529716e-06, "loss": 0.2962621748447418, "step": 4848 }, { "epoch": 1.1981714850506548, "grad_norm": 1.3378342889007688, "learning_rate": 7.5789310062742485e-06, "loss": 0.32176172733306885, "step": 4849 }, { "epoch": 1.1984185816654311, "grad_norm": 1.3357995801375389, "learning_rate": 7.574966936191676e-06, "loss": 0.33725982904434204, "step": 4850 }, { "epoch": 1.1986656782802076, "grad_norm": 1.403783221517383, "learning_rate": 7.571003270943757e-06, "loss": 0.33502820134162903, "step": 4851 }, { "epoch": 1.198912774894984, "grad_norm": 1.3025606560058796, "learning_rate": 7.567040011192187e-06, "loss": 0.2812633812427521, "step": 4852 }, { "epoch": 1.1991598715097602, "grad_norm": 1.4610905127075724, "learning_rate": 7.563077157598589e-06, "loss": 0.3261832296848297, "step": 4853 }, { "epoch": 1.1994069681245367, "grad_norm": 1.2942072390025943, "learning_rate": 7.559114710824527e-06, "loss": 0.23347899317741394, "step": 4854 }, { "epoch": 1.199654064739313, "grad_norm": 1.4429686313674197, "learning_rate": 7.5551526715314895e-06, "loss": 0.33082422614097595, "step": 4855 }, { "epoch": 1.1999011613540895, "grad_norm": 1.2385790938020278, "learning_rate": 7.551191040380893e-06, "loss": 0.2507101893424988, "step": 4856 }, { "epoch": 1.2001482579688658, "grad_norm": 1.2934837000529327, "learning_rate": 7.547229818034096e-06, "loss": 0.2759505808353424, "step": 4857 }, { "epoch": 1.2003953545836423, "grad_norm": 1.3607845005666923, "learning_rate": 7.543269005152384e-06, "loss": 0.2961346507072449, "step": 4858 }, { "epoch": 1.2006424511984186, "grad_norm": 1.3736097454710137, "learning_rate": 7.539308602396975e-06, "loss": 0.33590254187583923, "step": 4859 }, { "epoch": 1.200889547813195, "grad_norm": 1.2429275199564376, "learning_rate": 7.535348610429017e-06, "loss": 0.27577677369117737, "step": 4860 }, { "epoch": 1.2011366444279714, "grad_norm": 1.407260622716278, "learning_rate": 7.531389029909592e-06, "loss": 0.2562680244445801, "step": 4861 }, { "epoch": 1.2013837410427477, "grad_norm": 1.2865799683894141, "learning_rate": 7.527429861499712e-06, "loss": 0.2932591140270233, "step": 4862 }, { "epoch": 1.201630837657524, "grad_norm": 1.4106616447906877, "learning_rate": 7.523471105860316e-06, "loss": 0.29666420817375183, "step": 4863 }, { "epoch": 1.2018779342723005, "grad_norm": 1.3469286299283443, "learning_rate": 7.519512763652283e-06, "loss": 0.3392697274684906, "step": 4864 }, { "epoch": 1.2021250308870768, "grad_norm": 1.4079833994714068, "learning_rate": 7.515554835536412e-06, "loss": 0.2757902443408966, "step": 4865 }, { "epoch": 1.2023721275018533, "grad_norm": 1.4360435717523978, "learning_rate": 7.5115973221734465e-06, "loss": 0.31084680557250977, "step": 4866 }, { "epoch": 1.2026192241166296, "grad_norm": 1.2321231571090727, "learning_rate": 7.507640224224046e-06, "loss": 0.29205018281936646, "step": 4867 }, { "epoch": 1.202866320731406, "grad_norm": 1.3035047589637958, "learning_rate": 7.503683542348814e-06, "loss": 0.25825461745262146, "step": 4868 }, { "epoch": 1.2031134173461824, "grad_norm": 1.3056673276003439, "learning_rate": 7.499727277208277e-06, "loss": 0.2849982678890228, "step": 4869 }, { "epoch": 1.2033605139609587, "grad_norm": 1.3404189820460992, "learning_rate": 7.495771429462891e-06, "loss": 0.30369722843170166, "step": 4870 }, { "epoch": 1.2036076105757352, "grad_norm": 1.3685171098061877, "learning_rate": 7.491815999773045e-06, "loss": 0.28333038091659546, "step": 4871 }, { "epoch": 1.2038547071905115, "grad_norm": 1.3894776653229672, "learning_rate": 7.487860988799055e-06, "loss": 0.3009853959083557, "step": 4872 }, { "epoch": 1.2041018038052878, "grad_norm": 1.2317922610840242, "learning_rate": 7.483906397201177e-06, "loss": 0.25865116715431213, "step": 4873 }, { "epoch": 1.2043489004200643, "grad_norm": 1.3864496982494703, "learning_rate": 7.479952225639584e-06, "loss": 0.317643940448761, "step": 4874 }, { "epoch": 1.2045959970348405, "grad_norm": 1.226644433473494, "learning_rate": 7.47599847477439e-06, "loss": 0.23953208327293396, "step": 4875 }, { "epoch": 1.204843093649617, "grad_norm": 1.3640671346508002, "learning_rate": 7.472045145265632e-06, "loss": 0.31687355041503906, "step": 4876 }, { "epoch": 1.2050901902643933, "grad_norm": 1.573066284323048, "learning_rate": 7.468092237773273e-06, "loss": 0.2919963002204895, "step": 4877 }, { "epoch": 1.2053372868791699, "grad_norm": 1.2840152064605117, "learning_rate": 7.4641397529572154e-06, "loss": 0.27525925636291504, "step": 4878 }, { "epoch": 1.2055843834939461, "grad_norm": 1.3348266552602486, "learning_rate": 7.460187691477285e-06, "loss": 0.267106294631958, "step": 4879 }, { "epoch": 1.2058314801087224, "grad_norm": 1.2693550266678757, "learning_rate": 7.456236053993242e-06, "loss": 0.25351494550704956, "step": 4880 }, { "epoch": 1.206078576723499, "grad_norm": 1.3008561256646056, "learning_rate": 7.452284841164765e-06, "loss": 0.28489381074905396, "step": 4881 }, { "epoch": 1.2063256733382752, "grad_norm": 1.3892699421373234, "learning_rate": 7.448334053651477e-06, "loss": 0.34444910287857056, "step": 4882 }, { "epoch": 1.2065727699530517, "grad_norm": 1.3451136560318977, "learning_rate": 7.44438369211292e-06, "loss": 0.2648202180862427, "step": 4883 }, { "epoch": 1.206819866567828, "grad_norm": 1.5141222715250717, "learning_rate": 7.4404337572085585e-06, "loss": 0.33877670764923096, "step": 4884 }, { "epoch": 1.2070669631826043, "grad_norm": 1.3475912168210022, "learning_rate": 7.436484249597804e-06, "loss": 0.28045517206192017, "step": 4885 }, { "epoch": 1.2073140597973808, "grad_norm": 1.2359987346990258, "learning_rate": 7.432535169939981e-06, "loss": 0.23117738962173462, "step": 4886 }, { "epoch": 1.207561156412157, "grad_norm": 1.383819499089396, "learning_rate": 7.4285865188943515e-06, "loss": 0.28900206089019775, "step": 4887 }, { "epoch": 1.2078082530269336, "grad_norm": 1.5063399269584052, "learning_rate": 7.4246382971200995e-06, "loss": 0.3158647119998932, "step": 4888 }, { "epoch": 1.20805534964171, "grad_norm": 1.2732615941243002, "learning_rate": 7.420690505276347e-06, "loss": 0.31934815645217896, "step": 4889 }, { "epoch": 1.2083024462564862, "grad_norm": 1.2541056666720007, "learning_rate": 7.416743144022132e-06, "loss": 0.2776501774787903, "step": 4890 }, { "epoch": 1.2085495428712627, "grad_norm": 1.2024922642052933, "learning_rate": 7.412796214016424e-06, "loss": 0.23460879921913147, "step": 4891 }, { "epoch": 1.208796639486039, "grad_norm": 1.259008892525466, "learning_rate": 7.40884971591813e-06, "loss": 0.26549986004829407, "step": 4892 }, { "epoch": 1.2090437361008155, "grad_norm": 1.3270004652139695, "learning_rate": 7.40490365038607e-06, "loss": 0.26390233635902405, "step": 4893 }, { "epoch": 1.2092908327155918, "grad_norm": 1.2806220065222476, "learning_rate": 7.400958018079009e-06, "loss": 0.27875053882598877, "step": 4894 }, { "epoch": 1.209537929330368, "grad_norm": 1.281705743709981, "learning_rate": 7.397012819655621e-06, "loss": 0.23966623842716217, "step": 4895 }, { "epoch": 1.2097850259451446, "grad_norm": 1.3161317646043564, "learning_rate": 7.393068055774526e-06, "loss": 0.33449965715408325, "step": 4896 }, { "epoch": 1.2100321225599209, "grad_norm": 1.3743456054995433, "learning_rate": 7.389123727094255e-06, "loss": 0.2718139886856079, "step": 4897 }, { "epoch": 1.2102792191746974, "grad_norm": 1.5985017572673452, "learning_rate": 7.3851798342732735e-06, "loss": 0.29125821590423584, "step": 4898 }, { "epoch": 1.2105263157894737, "grad_norm": 1.3725204384571215, "learning_rate": 7.3812363779699805e-06, "loss": 0.2976685166358948, "step": 4899 }, { "epoch": 1.21077341240425, "grad_norm": 1.3790554776471207, "learning_rate": 7.377293358842689e-06, "loss": 0.2828121483325958, "step": 4900 }, { "epoch": 1.2110205090190265, "grad_norm": 1.4581417268020942, "learning_rate": 7.373350777549652e-06, "loss": 0.3484206795692444, "step": 4901 }, { "epoch": 1.2112676056338028, "grad_norm": 1.39174630507616, "learning_rate": 7.369408634749038e-06, "loss": 0.3319321870803833, "step": 4902 }, { "epoch": 1.2115147022485793, "grad_norm": 1.3865490642070726, "learning_rate": 7.365466931098954e-06, "loss": 0.2653076648712158, "step": 4903 }, { "epoch": 1.2117617988633556, "grad_norm": 1.4170394040240601, "learning_rate": 7.361525667257423e-06, "loss": 0.26360809803009033, "step": 4904 }, { "epoch": 1.212008895478132, "grad_norm": 1.2563642783936366, "learning_rate": 7.357584843882398e-06, "loss": 0.23149514198303223, "step": 4905 }, { "epoch": 1.2122559920929084, "grad_norm": 1.3771244531862978, "learning_rate": 7.353644461631763e-06, "loss": 0.3016318082809448, "step": 4906 }, { "epoch": 1.2125030887076846, "grad_norm": 1.3616237532290107, "learning_rate": 7.349704521163321e-06, "loss": 0.2947442829608917, "step": 4907 }, { "epoch": 1.2127501853224611, "grad_norm": 1.2851313835062297, "learning_rate": 7.34576502313481e-06, "loss": 0.29860609769821167, "step": 4908 }, { "epoch": 1.2129972819372374, "grad_norm": 1.3680253513839256, "learning_rate": 7.341825968203884e-06, "loss": 0.2766990661621094, "step": 4909 }, { "epoch": 1.2132443785520137, "grad_norm": 1.3125828955718488, "learning_rate": 7.337887357028135e-06, "loss": 0.25856783986091614, "step": 4910 }, { "epoch": 1.2134914751667902, "grad_norm": 1.4409560173751839, "learning_rate": 7.333949190265068e-06, "loss": 0.3013250529766083, "step": 4911 }, { "epoch": 1.2137385717815665, "grad_norm": 1.3912183387632027, "learning_rate": 7.330011468572119e-06, "loss": 0.3176058232784271, "step": 4912 }, { "epoch": 1.213985668396343, "grad_norm": 1.1461580456497318, "learning_rate": 7.326074192606656e-06, "loss": 0.18244390189647675, "step": 4913 }, { "epoch": 1.2142327650111193, "grad_norm": 1.3713046842586776, "learning_rate": 7.322137363025963e-06, "loss": 0.280860960483551, "step": 4914 }, { "epoch": 1.2144798616258958, "grad_norm": 1.3960460766686578, "learning_rate": 7.318200980487257e-06, "loss": 0.2900065779685974, "step": 4915 }, { "epoch": 1.2147269582406721, "grad_norm": 1.330970216458164, "learning_rate": 7.314265045647674e-06, "loss": 0.27642494440078735, "step": 4916 }, { "epoch": 1.2149740548554484, "grad_norm": 1.5161793010430753, "learning_rate": 7.310329559164284e-06, "loss": 0.286338210105896, "step": 4917 }, { "epoch": 1.215221151470225, "grad_norm": 1.4783684270344524, "learning_rate": 7.306394521694069e-06, "loss": 0.3451773524284363, "step": 4918 }, { "epoch": 1.2154682480850012, "grad_norm": 1.373528276156633, "learning_rate": 7.302459933893947e-06, "loss": 0.3283950984477997, "step": 4919 }, { "epoch": 1.2157153446997775, "grad_norm": 1.4141937968888407, "learning_rate": 7.298525796420759e-06, "loss": 0.28688400983810425, "step": 4920 }, { "epoch": 1.215962441314554, "grad_norm": 1.4414014539630178, "learning_rate": 7.294592109931266e-06, "loss": 0.3117695152759552, "step": 4921 }, { "epoch": 1.2162095379293303, "grad_norm": 1.52348385798253, "learning_rate": 7.290658875082161e-06, "loss": 0.3213585615158081, "step": 4922 }, { "epoch": 1.2164566345441068, "grad_norm": 1.542514145109561, "learning_rate": 7.286726092530052e-06, "loss": 0.329140305519104, "step": 4923 }, { "epoch": 1.216703731158883, "grad_norm": 1.6204672581405237, "learning_rate": 7.282793762931488e-06, "loss": 0.2912466526031494, "step": 4924 }, { "epoch": 1.2169508277736596, "grad_norm": 1.6020971026574222, "learning_rate": 7.278861886942919e-06, "loss": 0.2752729058265686, "step": 4925 }, { "epoch": 1.2171979243884359, "grad_norm": 1.345419387802123, "learning_rate": 7.274930465220735e-06, "loss": 0.3019883632659912, "step": 4926 }, { "epoch": 1.2174450210032122, "grad_norm": 1.3016568108698694, "learning_rate": 7.2709994984212515e-06, "loss": 0.295068621635437, "step": 4927 }, { "epoch": 1.2176921176179887, "grad_norm": 1.3578922854628277, "learning_rate": 7.267068987200701e-06, "loss": 0.3096845746040344, "step": 4928 }, { "epoch": 1.217939214232765, "grad_norm": 1.2464045201678762, "learning_rate": 7.263138932215241e-06, "loss": 0.30301257967948914, "step": 4929 }, { "epoch": 1.2181863108475415, "grad_norm": 1.2823861735879274, "learning_rate": 7.2592093341209555e-06, "loss": 0.3136274814605713, "step": 4930 }, { "epoch": 1.2184334074623178, "grad_norm": 1.4202980287391669, "learning_rate": 7.255280193573857e-06, "loss": 0.3031310439109802, "step": 4931 }, { "epoch": 1.218680504077094, "grad_norm": 1.2255166638942436, "learning_rate": 7.251351511229865e-06, "loss": 0.23536676168441772, "step": 4932 }, { "epoch": 1.2189276006918706, "grad_norm": 1.2269060969450996, "learning_rate": 7.247423287744838e-06, "loss": 0.2556694746017456, "step": 4933 }, { "epoch": 1.2191746973066468, "grad_norm": 1.422681645604443, "learning_rate": 7.243495523774552e-06, "loss": 0.2924000322818756, "step": 4934 }, { "epoch": 1.2194217939214234, "grad_norm": 1.4285793387578372, "learning_rate": 7.23956821997471e-06, "loss": 0.2716752290725708, "step": 4935 }, { "epoch": 1.2196688905361996, "grad_norm": 1.3369099865976473, "learning_rate": 7.235641377000931e-06, "loss": 0.25690072774887085, "step": 4936 }, { "epoch": 1.219915987150976, "grad_norm": 1.3297353322608825, "learning_rate": 7.231714995508767e-06, "loss": 0.3041984438896179, "step": 4937 }, { "epoch": 1.2201630837657524, "grad_norm": 1.404262855020856, "learning_rate": 7.2277890761536885e-06, "loss": 0.29016292095184326, "step": 4938 }, { "epoch": 1.2204101803805287, "grad_norm": 1.2798703541229315, "learning_rate": 7.223863619591078e-06, "loss": 0.27034884691238403, "step": 4939 }, { "epoch": 1.2206572769953052, "grad_norm": 1.3244895733165405, "learning_rate": 7.21993862647626e-06, "loss": 0.2960745096206665, "step": 4940 }, { "epoch": 1.2209043736100815, "grad_norm": 1.4185822442111988, "learning_rate": 7.216014097464468e-06, "loss": 0.29493197798728943, "step": 4941 }, { "epoch": 1.221151470224858, "grad_norm": 1.3076898436377915, "learning_rate": 7.2120900332108635e-06, "loss": 0.3066205382347107, "step": 4942 }, { "epoch": 1.2213985668396343, "grad_norm": 1.2984047358401054, "learning_rate": 7.2081664343705285e-06, "loss": 0.26176637411117554, "step": 4943 }, { "epoch": 1.2216456634544106, "grad_norm": 1.2777568070933065, "learning_rate": 7.20424330159847e-06, "loss": 0.25697070360183716, "step": 4944 }, { "epoch": 1.2218927600691871, "grad_norm": 1.4224490188468324, "learning_rate": 7.200320635549616e-06, "loss": 0.32366782426834106, "step": 4945 }, { "epoch": 1.2221398566839634, "grad_norm": 1.3498792109624203, "learning_rate": 7.1963984368788095e-06, "loss": 0.2948508858680725, "step": 4946 }, { "epoch": 1.2223869532987397, "grad_norm": 1.253166503000689, "learning_rate": 7.192476706240829e-06, "loss": 0.3066119849681854, "step": 4947 }, { "epoch": 1.2226340499135162, "grad_norm": 1.3938580756484211, "learning_rate": 7.188555444290362e-06, "loss": 0.3095788359642029, "step": 4948 }, { "epoch": 1.2228811465282925, "grad_norm": 1.2960115941563353, "learning_rate": 7.184634651682028e-06, "loss": 0.27999189496040344, "step": 4949 }, { "epoch": 1.223128243143069, "grad_norm": 1.3441278627615123, "learning_rate": 7.1807143290703574e-06, "loss": 0.27354973554611206, "step": 4950 }, { "epoch": 1.2233753397578453, "grad_norm": 1.2297385913739995, "learning_rate": 7.176794477109816e-06, "loss": 0.2797207236289978, "step": 4951 }, { "epoch": 1.2236224363726218, "grad_norm": 1.474644467739412, "learning_rate": 7.172875096454782e-06, "loss": 0.3189114034175873, "step": 4952 }, { "epoch": 1.223869532987398, "grad_norm": 1.32198628000499, "learning_rate": 7.16895618775955e-06, "loss": 0.2805273234844208, "step": 4953 }, { "epoch": 1.2241166296021744, "grad_norm": 1.3997736526381672, "learning_rate": 7.1650377516783474e-06, "loss": 0.35476499795913696, "step": 4954 }, { "epoch": 1.2243637262169509, "grad_norm": 1.362283445201965, "learning_rate": 7.161119788865313e-06, "loss": 0.31385117769241333, "step": 4955 }, { "epoch": 1.2246108228317272, "grad_norm": 1.31428598912498, "learning_rate": 7.157202299974516e-06, "loss": 0.292145699262619, "step": 4956 }, { "epoch": 1.2248579194465035, "grad_norm": 1.3489823960022616, "learning_rate": 7.153285285659937e-06, "loss": 0.2881055474281311, "step": 4957 }, { "epoch": 1.22510501606128, "grad_norm": 1.3799540198710274, "learning_rate": 7.149368746575489e-06, "loss": 0.3212391436100006, "step": 4958 }, { "epoch": 1.2253521126760563, "grad_norm": 1.343868009785695, "learning_rate": 7.145452683374991e-06, "loss": 0.30598917603492737, "step": 4959 }, { "epoch": 1.2255992092908328, "grad_norm": 1.3809627298583635, "learning_rate": 7.1415370967121896e-06, "loss": 0.3107519745826721, "step": 4960 }, { "epoch": 1.225846305905609, "grad_norm": 1.3550114603894712, "learning_rate": 7.137621987240757e-06, "loss": 0.31050562858581543, "step": 4961 }, { "epoch": 1.2260934025203856, "grad_norm": 1.394218065440288, "learning_rate": 7.1337073556142764e-06, "loss": 0.3681497573852539, "step": 4962 }, { "epoch": 1.2263404991351619, "grad_norm": 1.5262740559280212, "learning_rate": 7.129793202486262e-06, "loss": 0.36354732513427734, "step": 4963 }, { "epoch": 1.2265875957499381, "grad_norm": 1.270406282011628, "learning_rate": 7.125879528510136e-06, "loss": 0.2834344208240509, "step": 4964 }, { "epoch": 1.2268346923647147, "grad_norm": 1.3032326910500416, "learning_rate": 7.121966334339252e-06, "loss": 0.26151469349861145, "step": 4965 }, { "epoch": 1.227081788979491, "grad_norm": 1.311555686540799, "learning_rate": 7.1180536206268725e-06, "loss": 0.2645493447780609, "step": 4966 }, { "epoch": 1.2273288855942674, "grad_norm": 1.2453401077689645, "learning_rate": 7.114141388026185e-06, "loss": 0.26544591784477234, "step": 4967 }, { "epoch": 1.2275759822090437, "grad_norm": 1.2684149043321369, "learning_rate": 7.1102296371903045e-06, "loss": 0.3448801338672638, "step": 4968 }, { "epoch": 1.22782307882382, "grad_norm": 1.3139479213941019, "learning_rate": 7.106318368772249e-06, "loss": 0.2782391607761383, "step": 4969 }, { "epoch": 1.2280701754385965, "grad_norm": 1.3532581072862275, "learning_rate": 7.1024075834249726e-06, "loss": 0.29311686754226685, "step": 4970 }, { "epoch": 1.2283172720533728, "grad_norm": 1.2427956750845799, "learning_rate": 7.098497281801337e-06, "loss": 0.30949628353118896, "step": 4971 }, { "epoch": 1.2285643686681493, "grad_norm": 1.2848875444604326, "learning_rate": 7.09458746455413e-06, "loss": 0.2912791967391968, "step": 4972 }, { "epoch": 1.2288114652829256, "grad_norm": 1.4176922680844193, "learning_rate": 7.090678132336054e-06, "loss": 0.2762000560760498, "step": 4973 }, { "epoch": 1.229058561897702, "grad_norm": 1.4198159629770855, "learning_rate": 7.08676928579973e-06, "loss": 0.28134822845458984, "step": 4974 }, { "epoch": 1.2293056585124784, "grad_norm": 1.3228065445409634, "learning_rate": 7.082860925597705e-06, "loss": 0.2683870494365692, "step": 4975 }, { "epoch": 1.2295527551272547, "grad_norm": 1.3541109220510155, "learning_rate": 7.078953052382436e-06, "loss": 0.2870028614997864, "step": 4976 }, { "epoch": 1.2297998517420312, "grad_norm": 1.2620511083797474, "learning_rate": 7.075045666806306e-06, "loss": 0.2641921043395996, "step": 4977 }, { "epoch": 1.2300469483568075, "grad_norm": 1.39181083853633, "learning_rate": 7.071138769521612e-06, "loss": 0.3075256943702698, "step": 4978 }, { "epoch": 1.230294044971584, "grad_norm": 1.2711863439834135, "learning_rate": 7.067232361180574e-06, "loss": 0.21849912405014038, "step": 4979 }, { "epoch": 1.2305411415863603, "grad_norm": 1.2324407895613976, "learning_rate": 7.063326442435321e-06, "loss": 0.2497217208147049, "step": 4980 }, { "epoch": 1.2307882382011366, "grad_norm": 1.4169989936051164, "learning_rate": 7.059421013937908e-06, "loss": 0.2961541712284088, "step": 4981 }, { "epoch": 1.231035334815913, "grad_norm": 1.2868161774947562, "learning_rate": 7.055516076340311e-06, "loss": 0.25293976068496704, "step": 4982 }, { "epoch": 1.2312824314306894, "grad_norm": 1.4405024964876718, "learning_rate": 7.051611630294415e-06, "loss": 0.3455355167388916, "step": 4983 }, { "epoch": 1.2315295280454657, "grad_norm": 1.5541252171954012, "learning_rate": 7.047707676452032e-06, "loss": 0.314517617225647, "step": 4984 }, { "epoch": 1.2317766246602422, "grad_norm": 1.3866066950473552, "learning_rate": 7.0438042154648804e-06, "loss": 0.2477831244468689, "step": 4985 }, { "epoch": 1.2320237212750185, "grad_norm": 1.5186354748957283, "learning_rate": 7.039901247984614e-06, "loss": 0.29368001222610474, "step": 4986 }, { "epoch": 1.232270817889795, "grad_norm": 1.1964814358825777, "learning_rate": 7.035998774662786e-06, "loss": 0.2532249689102173, "step": 4987 }, { "epoch": 1.2325179145045713, "grad_norm": 1.4506323546451332, "learning_rate": 7.032096796150873e-06, "loss": 0.29023247957229614, "step": 4988 }, { "epoch": 1.2327650111193478, "grad_norm": 1.4809604321920442, "learning_rate": 7.028195313100278e-06, "loss": 0.2790895998477936, "step": 4989 }, { "epoch": 1.233012107734124, "grad_norm": 1.3604164415196105, "learning_rate": 7.0242943261623065e-06, "loss": 0.24723225831985474, "step": 4990 }, { "epoch": 1.2332592043489004, "grad_norm": 1.2686232275706149, "learning_rate": 7.020393835988195e-06, "loss": 0.2323659062385559, "step": 4991 }, { "epoch": 1.2335063009636769, "grad_norm": 1.488598115259188, "learning_rate": 7.0164938432290854e-06, "loss": 0.3079083263874054, "step": 4992 }, { "epoch": 1.2337533975784531, "grad_norm": 1.4338831498031366, "learning_rate": 7.012594348536049e-06, "loss": 0.2798060178756714, "step": 4993 }, { "epoch": 1.2340004941932294, "grad_norm": 1.5728292481906783, "learning_rate": 7.0086953525600594e-06, "loss": 0.2720135450363159, "step": 4994 }, { "epoch": 1.234247590808006, "grad_norm": 1.542936462058883, "learning_rate": 7.004796855952016e-06, "loss": 0.2893051505088806, "step": 4995 }, { "epoch": 1.2344946874227822, "grad_norm": 1.4203782707552408, "learning_rate": 7.000898859362737e-06, "loss": 0.2840315103530884, "step": 4996 }, { "epoch": 1.2347417840375587, "grad_norm": 1.372973045368787, "learning_rate": 6.997001363442949e-06, "loss": 0.30373457074165344, "step": 4997 }, { "epoch": 1.234988880652335, "grad_norm": 1.3154553958335657, "learning_rate": 6.993104368843302e-06, "loss": 0.2978993058204651, "step": 4998 }, { "epoch": 1.2352359772671115, "grad_norm": 1.4604931595204622, "learning_rate": 6.989207876214359e-06, "loss": 0.26782718300819397, "step": 4999 }, { "epoch": 1.2354830738818878, "grad_norm": 1.455413584675688, "learning_rate": 6.985311886206602e-06, "loss": 0.33245524764060974, "step": 5000 }, { "epoch": 1.2357301704966641, "grad_norm": 1.3967176616830161, "learning_rate": 6.981416399470424e-06, "loss": 0.32060831785202026, "step": 5001 }, { "epoch": 1.2359772671114406, "grad_norm": 1.4160127342887288, "learning_rate": 6.977521416656136e-06, "loss": 0.29824334383010864, "step": 5002 }, { "epoch": 1.236224363726217, "grad_norm": 1.280887225213811, "learning_rate": 6.973626938413968e-06, "loss": 0.2702087461948395, "step": 5003 }, { "epoch": 1.2364714603409934, "grad_norm": 1.2766816692433889, "learning_rate": 6.969732965394064e-06, "loss": 0.27337586879730225, "step": 5004 }, { "epoch": 1.2367185569557697, "grad_norm": 1.2676978220793154, "learning_rate": 6.9658394982464826e-06, "loss": 0.2237911820411682, "step": 5005 }, { "epoch": 1.236965653570546, "grad_norm": 1.4393447815672062, "learning_rate": 6.961946537621198e-06, "loss": 0.30445045232772827, "step": 5006 }, { "epoch": 1.2372127501853225, "grad_norm": 1.4046447277892409, "learning_rate": 6.958054084168104e-06, "loss": 0.27830755710601807, "step": 5007 }, { "epoch": 1.2374598468000988, "grad_norm": 1.304071210970327, "learning_rate": 6.954162138537002e-06, "loss": 0.2507534623146057, "step": 5008 }, { "epoch": 1.2377069434148753, "grad_norm": 1.5501076514464862, "learning_rate": 6.95027070137761e-06, "loss": 0.33532586693763733, "step": 5009 }, { "epoch": 1.2379540400296516, "grad_norm": 1.4617852962207285, "learning_rate": 6.9463797733395724e-06, "loss": 0.3175557255744934, "step": 5010 }, { "epoch": 1.2382011366444279, "grad_norm": 1.34302240991728, "learning_rate": 6.942489355072433e-06, "loss": 0.29826539754867554, "step": 5011 }, { "epoch": 1.2384482332592044, "grad_norm": 1.530116382595265, "learning_rate": 6.9385994472256624e-06, "loss": 0.32051241397857666, "step": 5012 }, { "epoch": 1.2386953298739807, "grad_norm": 1.5079482706951206, "learning_rate": 6.934710050448637e-06, "loss": 0.31556904315948486, "step": 5013 }, { "epoch": 1.2389424264887572, "grad_norm": 1.2889853907495994, "learning_rate": 6.930821165390658e-06, "loss": 0.2913089394569397, "step": 5014 }, { "epoch": 1.2391895231035335, "grad_norm": 1.4810532362415592, "learning_rate": 6.926932792700931e-06, "loss": 0.2848961651325226, "step": 5015 }, { "epoch": 1.2394366197183098, "grad_norm": 1.5064904424821342, "learning_rate": 6.923044933028578e-06, "loss": 0.30682122707366943, "step": 5016 }, { "epoch": 1.2396837163330863, "grad_norm": 1.32958255551153, "learning_rate": 6.919157587022642e-06, "loss": 0.28720515966415405, "step": 5017 }, { "epoch": 1.2399308129478626, "grad_norm": 1.4810323561205792, "learning_rate": 6.915270755332073e-06, "loss": 0.29244130849838257, "step": 5018 }, { "epoch": 1.240177909562639, "grad_norm": 1.2723419093689914, "learning_rate": 6.911384438605743e-06, "loss": 0.2669260501861572, "step": 5019 }, { "epoch": 1.2404250061774154, "grad_norm": 1.3330434736124794, "learning_rate": 6.907498637492427e-06, "loss": 0.26983416080474854, "step": 5020 }, { "epoch": 1.2406721027921916, "grad_norm": 1.481606056884966, "learning_rate": 6.903613352640829e-06, "loss": 0.3626735508441925, "step": 5021 }, { "epoch": 1.2409191994069682, "grad_norm": 1.4981774850424316, "learning_rate": 6.8997285846995456e-06, "loss": 0.28572171926498413, "step": 5022 }, { "epoch": 1.2411662960217444, "grad_norm": 1.243659810860543, "learning_rate": 6.895844334317108e-06, "loss": 0.2510794997215271, "step": 5023 }, { "epoch": 1.241413392636521, "grad_norm": 1.2518870158179565, "learning_rate": 6.891960602141947e-06, "loss": 0.21684464812278748, "step": 5024 }, { "epoch": 1.2416604892512972, "grad_norm": 1.2814069064429119, "learning_rate": 6.888077388822418e-06, "loss": 0.27301621437072754, "step": 5025 }, { "epoch": 1.2419075858660737, "grad_norm": 1.359609317693083, "learning_rate": 6.884194695006779e-06, "loss": 0.29495707154273987, "step": 5026 }, { "epoch": 1.24215468248085, "grad_norm": 1.628284912876705, "learning_rate": 6.880312521343212e-06, "loss": 0.3182600736618042, "step": 5027 }, { "epoch": 1.2424017790956263, "grad_norm": 1.1973693732619368, "learning_rate": 6.876430868479802e-06, "loss": 0.26752787828445435, "step": 5028 }, { "epoch": 1.2426488757104028, "grad_norm": 1.3601638423633364, "learning_rate": 6.87254973706455e-06, "loss": 0.2965892553329468, "step": 5029 }, { "epoch": 1.2428959723251791, "grad_norm": 1.2002826231967907, "learning_rate": 6.868669127745377e-06, "loss": 0.2246188223361969, "step": 5030 }, { "epoch": 1.2431430689399554, "grad_norm": 1.8434564369397999, "learning_rate": 6.864789041170106e-06, "loss": 0.2733284831047058, "step": 5031 }, { "epoch": 1.243390165554732, "grad_norm": 1.5024971394963162, "learning_rate": 6.860909477986482e-06, "loss": 0.3010786175727844, "step": 5032 }, { "epoch": 1.2436372621695082, "grad_norm": 1.1638051325082241, "learning_rate": 6.857030438842155e-06, "loss": 0.24698364734649658, "step": 5033 }, { "epoch": 1.2438843587842847, "grad_norm": 1.3166537510302407, "learning_rate": 6.853151924384698e-06, "loss": 0.2563791275024414, "step": 5034 }, { "epoch": 1.244131455399061, "grad_norm": 1.498810204910615, "learning_rate": 6.849273935261582e-06, "loss": 0.28869447112083435, "step": 5035 }, { "epoch": 1.2443785520138375, "grad_norm": 1.4143964271314118, "learning_rate": 6.8453964721202e-06, "loss": 0.3092704713344574, "step": 5036 }, { "epoch": 1.2446256486286138, "grad_norm": 1.3318860969856043, "learning_rate": 6.841519535607859e-06, "loss": 0.30502068996429443, "step": 5037 }, { "epoch": 1.24487274524339, "grad_norm": 1.2707631509352455, "learning_rate": 6.8376431263717665e-06, "loss": 0.2683471143245697, "step": 5038 }, { "epoch": 1.2451198418581666, "grad_norm": 1.2930551482443007, "learning_rate": 6.833767245059059e-06, "loss": 0.2965959310531616, "step": 5039 }, { "epoch": 1.2453669384729429, "grad_norm": 1.4051140143172558, "learning_rate": 6.829891892316768e-06, "loss": 0.28359076380729675, "step": 5040 }, { "epoch": 1.2456140350877192, "grad_norm": 1.3611744517750706, "learning_rate": 6.826017068791851e-06, "loss": 0.2727327346801758, "step": 5041 }, { "epoch": 1.2458611317024957, "grad_norm": 1.4566396804729145, "learning_rate": 6.822142775131165e-06, "loss": 0.2842440605163574, "step": 5042 }, { "epoch": 1.246108228317272, "grad_norm": 1.2128258165980998, "learning_rate": 6.818269011981484e-06, "loss": 0.2034844309091568, "step": 5043 }, { "epoch": 1.2463553249320485, "grad_norm": 1.3716938976457085, "learning_rate": 6.814395779989497e-06, "loss": 0.26875045895576477, "step": 5044 }, { "epoch": 1.2466024215468248, "grad_norm": 1.4029948334050115, "learning_rate": 6.810523079801796e-06, "loss": 0.3560952842235565, "step": 5045 }, { "epoch": 1.2468495181616013, "grad_norm": 1.522168229535765, "learning_rate": 6.806650912064896e-06, "loss": 0.2645185589790344, "step": 5046 }, { "epoch": 1.2470966147763776, "grad_norm": 1.280956899086925, "learning_rate": 6.8027792774252084e-06, "loss": 0.28297778964042664, "step": 5047 }, { "epoch": 1.2473437113911539, "grad_norm": 1.445546443544655, "learning_rate": 6.798908176529071e-06, "loss": 0.3063560724258423, "step": 5048 }, { "epoch": 1.2475908080059304, "grad_norm": 1.2292183829848002, "learning_rate": 6.795037610022719e-06, "loss": 0.2566746175289154, "step": 5049 }, { "epoch": 1.2478379046207067, "grad_norm": 1.5421350586044094, "learning_rate": 6.791167578552305e-06, "loss": 0.3490370213985443, "step": 5050 }, { "epoch": 1.2480850012354832, "grad_norm": 1.3885964251108076, "learning_rate": 6.787298082763892e-06, "loss": 0.31812191009521484, "step": 5051 }, { "epoch": 1.2483320978502594, "grad_norm": 1.5125436347108316, "learning_rate": 6.783429123303453e-06, "loss": 0.274639368057251, "step": 5052 }, { "epoch": 1.2485791944650357, "grad_norm": 1.3543147397545907, "learning_rate": 6.779560700816874e-06, "loss": 0.28108108043670654, "step": 5053 }, { "epoch": 1.2488262910798122, "grad_norm": 1.3704785346143742, "learning_rate": 6.775692815949943e-06, "loss": 0.2904103994369507, "step": 5054 }, { "epoch": 1.2490733876945885, "grad_norm": 1.4684830351730067, "learning_rate": 6.7718254693483746e-06, "loss": 0.3171617388725281, "step": 5055 }, { "epoch": 1.249320484309365, "grad_norm": 1.4780898634121247, "learning_rate": 6.767958661657773e-06, "loss": 0.2793939709663391, "step": 5056 }, { "epoch": 1.2495675809241413, "grad_norm": 1.301140141948521, "learning_rate": 6.7640923935236646e-06, "loss": 0.24576382339000702, "step": 5057 }, { "epoch": 1.2498146775389176, "grad_norm": 1.5911260625281398, "learning_rate": 6.760226665591486e-06, "loss": 0.3415530323982239, "step": 5058 }, { "epoch": 1.2500617741536941, "grad_norm": 1.4960226052875572, "learning_rate": 6.756361478506579e-06, "loss": 0.2994074821472168, "step": 5059 }, { "epoch": 1.2503088707684704, "grad_norm": 1.3386861917526958, "learning_rate": 6.752496832914199e-06, "loss": 0.30535322427749634, "step": 5060 }, { "epoch": 1.250555967383247, "grad_norm": 1.5413825955190013, "learning_rate": 6.7486327294595074e-06, "loss": 0.33492252230644226, "step": 5061 }, { "epoch": 1.2508030639980232, "grad_norm": 1.3801240837341635, "learning_rate": 6.744769168787584e-06, "loss": 0.29791539907455444, "step": 5062 }, { "epoch": 1.2510501606127997, "grad_norm": 1.3204085587854968, "learning_rate": 6.740906151543403e-06, "loss": 0.25344064831733704, "step": 5063 }, { "epoch": 1.251297257227576, "grad_norm": 1.2717595081177338, "learning_rate": 6.737043678371855e-06, "loss": 0.2722280025482178, "step": 5064 }, { "epoch": 1.2515443538423523, "grad_norm": 1.272590423638181, "learning_rate": 6.7331817499177474e-06, "loss": 0.29152941703796387, "step": 5065 }, { "epoch": 1.2517914504571288, "grad_norm": 1.4290831873300611, "learning_rate": 6.729320366825785e-06, "loss": 0.2814473509788513, "step": 5066 }, { "epoch": 1.252038547071905, "grad_norm": 1.3830050241881844, "learning_rate": 6.7254595297405874e-06, "loss": 0.24306853115558624, "step": 5067 }, { "epoch": 1.2522856436866814, "grad_norm": 1.5859885689303108, "learning_rate": 6.721599239306684e-06, "loss": 0.30577531456947327, "step": 5068 }, { "epoch": 1.252532740301458, "grad_norm": 1.4147376429921092, "learning_rate": 6.717739496168514e-06, "loss": 0.28472229838371277, "step": 5069 }, { "epoch": 1.2527798369162342, "grad_norm": 1.4121346523094755, "learning_rate": 6.713880300970415e-06, "loss": 0.27326348423957825, "step": 5070 }, { "epoch": 1.2530269335310107, "grad_norm": 1.3192031181327077, "learning_rate": 6.7100216543566425e-06, "loss": 0.2976391911506653, "step": 5071 }, { "epoch": 1.253274030145787, "grad_norm": 1.4296846800204857, "learning_rate": 6.706163556971361e-06, "loss": 0.32897013425827026, "step": 5072 }, { "epoch": 1.2535211267605635, "grad_norm": 1.3213904275153785, "learning_rate": 6.702306009458639e-06, "loss": 0.3270535469055176, "step": 5073 }, { "epoch": 1.2537682233753398, "grad_norm": 1.4236043101255542, "learning_rate": 6.698449012462456e-06, "loss": 0.30959030985832214, "step": 5074 }, { "epoch": 1.254015319990116, "grad_norm": 1.3429820250534654, "learning_rate": 6.6945925666266966e-06, "loss": 0.2756541073322296, "step": 5075 }, { "epoch": 1.2542624166048926, "grad_norm": 1.5634152767764269, "learning_rate": 6.6907366725951594e-06, "loss": 0.36882907152175903, "step": 5076 }, { "epoch": 1.2545095132196689, "grad_norm": 1.165394514523816, "learning_rate": 6.686881331011543e-06, "loss": 0.22967004776000977, "step": 5077 }, { "epoch": 1.2547566098344451, "grad_norm": 1.3409784382820786, "learning_rate": 6.683026542519457e-06, "loss": 0.2576739192008972, "step": 5078 }, { "epoch": 1.2550037064492217, "grad_norm": 1.292938252776913, "learning_rate": 6.67917230776242e-06, "loss": 0.24974486231803894, "step": 5079 }, { "epoch": 1.255250803063998, "grad_norm": 1.4133218342661857, "learning_rate": 6.675318627383859e-06, "loss": 0.26643192768096924, "step": 5080 }, { "epoch": 1.2554978996787745, "grad_norm": 1.421612311638289, "learning_rate": 6.671465502027106e-06, "loss": 0.28375452756881714, "step": 5081 }, { "epoch": 1.2557449962935507, "grad_norm": 1.67771707461243, "learning_rate": 6.6676129323354e-06, "loss": 0.3576657176017761, "step": 5082 }, { "epoch": 1.2559920929083273, "grad_norm": 1.388480891084436, "learning_rate": 6.6637609189518935e-06, "loss": 0.31698575615882874, "step": 5083 }, { "epoch": 1.2562391895231035, "grad_norm": 1.5748605082552034, "learning_rate": 6.659909462519635e-06, "loss": 0.3051706552505493, "step": 5084 }, { "epoch": 1.2564862861378798, "grad_norm": 1.3460509430745107, "learning_rate": 6.656058563681585e-06, "loss": 0.2492760419845581, "step": 5085 }, { "epoch": 1.2567333827526563, "grad_norm": 1.2866389749199332, "learning_rate": 6.652208223080618e-06, "loss": 0.27138379216194153, "step": 5086 }, { "epoch": 1.2569804793674326, "grad_norm": 1.2560823345410763, "learning_rate": 6.648358441359504e-06, "loss": 0.30059105157852173, "step": 5087 }, { "epoch": 1.257227575982209, "grad_norm": 1.2895273058278318, "learning_rate": 6.644509219160929e-06, "loss": 0.26251205801963806, "step": 5088 }, { "epoch": 1.2574746725969854, "grad_norm": 1.2141490684363063, "learning_rate": 6.640660557127477e-06, "loss": 0.22481286525726318, "step": 5089 }, { "epoch": 1.257721769211762, "grad_norm": 1.4814683549197867, "learning_rate": 6.636812455901651e-06, "loss": 0.29093238711357117, "step": 5090 }, { "epoch": 1.2579688658265382, "grad_norm": 1.4471108201291403, "learning_rate": 6.6329649161258454e-06, "loss": 0.2946845293045044, "step": 5091 }, { "epoch": 1.2582159624413145, "grad_norm": 1.3412064184146621, "learning_rate": 6.6291179384423674e-06, "loss": 0.28408098220825195, "step": 5092 }, { "epoch": 1.258463059056091, "grad_norm": 1.4114938138083635, "learning_rate": 6.6252715234934366e-06, "loss": 0.3160575032234192, "step": 5093 }, { "epoch": 1.2587101556708673, "grad_norm": 1.1922716680256402, "learning_rate": 6.6214256719211656e-06, "loss": 0.24755612015724182, "step": 5094 }, { "epoch": 1.2589572522856436, "grad_norm": 1.3729248752660455, "learning_rate": 6.617580384367588e-06, "loss": 0.2867547273635864, "step": 5095 }, { "epoch": 1.25920434890042, "grad_norm": 1.418085886549511, "learning_rate": 6.6137356614746285e-06, "loss": 0.34371858835220337, "step": 5096 }, { "epoch": 1.2594514455151964, "grad_norm": 1.330408157457295, "learning_rate": 6.609891503884132e-06, "loss": 0.27984410524368286, "step": 5097 }, { "epoch": 1.259698542129973, "grad_norm": 1.2920013441246985, "learning_rate": 6.606047912237836e-06, "loss": 0.29624298214912415, "step": 5098 }, { "epoch": 1.2599456387447492, "grad_norm": 1.292015010120746, "learning_rate": 6.6022048871773896e-06, "loss": 0.3159397840499878, "step": 5099 }, { "epoch": 1.2601927353595257, "grad_norm": 1.3384272428833324, "learning_rate": 6.59836242934435e-06, "loss": 0.2856951057910919, "step": 5100 }, { "epoch": 1.260439831974302, "grad_norm": 1.2702449424649696, "learning_rate": 6.594520539380169e-06, "loss": 0.3115721344947815, "step": 5101 }, { "epoch": 1.2606869285890783, "grad_norm": 1.2758760252167993, "learning_rate": 6.590679217926221e-06, "loss": 0.25886568427085876, "step": 5102 }, { "epoch": 1.2609340252038548, "grad_norm": 1.3122831978137803, "learning_rate": 6.586838465623773e-06, "loss": 0.24718928337097168, "step": 5103 }, { "epoch": 1.261181121818631, "grad_norm": 1.3770807203334636, "learning_rate": 6.582998283113994e-06, "loss": 0.2996169924736023, "step": 5104 }, { "epoch": 1.2614282184334074, "grad_norm": 1.3341118917301111, "learning_rate": 6.579158671037968e-06, "loss": 0.2451840341091156, "step": 5105 }, { "epoch": 1.2616753150481839, "grad_norm": 1.1934093115646194, "learning_rate": 6.575319630036678e-06, "loss": 0.24229127168655396, "step": 5106 }, { "epoch": 1.2619224116629602, "grad_norm": 1.2510785451050153, "learning_rate": 6.571481160751014e-06, "loss": 0.2551286518573761, "step": 5107 }, { "epoch": 1.2621695082777367, "grad_norm": 1.4309343222125142, "learning_rate": 6.567643263821768e-06, "loss": 0.2811517119407654, "step": 5108 }, { "epoch": 1.262416604892513, "grad_norm": 1.4330771219850837, "learning_rate": 6.563805939889641e-06, "loss": 0.30958718061447144, "step": 5109 }, { "epoch": 1.2626637015072895, "grad_norm": 1.494417957263626, "learning_rate": 6.5599691895952344e-06, "loss": 0.25776952505111694, "step": 5110 }, { "epoch": 1.2629107981220657, "grad_norm": 1.3470279363517361, "learning_rate": 6.556133013579051e-06, "loss": 0.2703093886375427, "step": 5111 }, { "epoch": 1.263157894736842, "grad_norm": 1.387445480979556, "learning_rate": 6.552297412481503e-06, "loss": 0.2614898085594177, "step": 5112 }, { "epoch": 1.2634049913516185, "grad_norm": 1.4502598613241746, "learning_rate": 6.5484623869429075e-06, "loss": 0.2884119153022766, "step": 5113 }, { "epoch": 1.2636520879663948, "grad_norm": 1.496076347320101, "learning_rate": 6.54462793760348e-06, "loss": 0.3150678277015686, "step": 5114 }, { "epoch": 1.2638991845811711, "grad_norm": 1.3294224771872514, "learning_rate": 6.540794065103345e-06, "loss": 0.2559055685997009, "step": 5115 }, { "epoch": 1.2641462811959476, "grad_norm": 1.5029503163988993, "learning_rate": 6.536960770082527e-06, "loss": 0.2951451539993286, "step": 5116 }, { "epoch": 1.264393377810724, "grad_norm": 1.4773827235220456, "learning_rate": 6.533128053180961e-06, "loss": 0.2984808683395386, "step": 5117 }, { "epoch": 1.2646404744255004, "grad_norm": 1.3220573275492116, "learning_rate": 6.529295915038474e-06, "loss": 0.28205573558807373, "step": 5118 }, { "epoch": 1.2648875710402767, "grad_norm": 1.3368278098412043, "learning_rate": 6.525464356294802e-06, "loss": 0.3024718761444092, "step": 5119 }, { "epoch": 1.2651346676550532, "grad_norm": 1.2410944136809063, "learning_rate": 6.521633377589589e-06, "loss": 0.2652056813240051, "step": 5120 }, { "epoch": 1.2653817642698295, "grad_norm": 1.565286863202259, "learning_rate": 6.517802979562374e-06, "loss": 0.3392990827560425, "step": 5121 }, { "epoch": 1.2656288608846058, "grad_norm": 1.3381447554312684, "learning_rate": 6.513973162852608e-06, "loss": 0.24616482853889465, "step": 5122 }, { "epoch": 1.2658759574993823, "grad_norm": 1.2483100494118382, "learning_rate": 6.510143928099636e-06, "loss": 0.2586262822151184, "step": 5123 }, { "epoch": 1.2661230541141586, "grad_norm": 1.314824073071928, "learning_rate": 6.506315275942714e-06, "loss": 0.23504719138145447, "step": 5124 }, { "epoch": 1.2663701507289349, "grad_norm": 1.3978689875282118, "learning_rate": 6.5024872070209936e-06, "loss": 0.3049047589302063, "step": 5125 }, { "epoch": 1.2666172473437114, "grad_norm": 1.4317420107996541, "learning_rate": 6.498659721973528e-06, "loss": 0.3363094925880432, "step": 5126 }, { "epoch": 1.266864343958488, "grad_norm": 1.2846050471551382, "learning_rate": 6.494832821439285e-06, "loss": 0.24919366836547852, "step": 5127 }, { "epoch": 1.2671114405732642, "grad_norm": 2.8798609911144286, "learning_rate": 6.491006506057121e-06, "loss": 0.24545541405677795, "step": 5128 }, { "epoch": 1.2673585371880405, "grad_norm": 1.4459191056923533, "learning_rate": 6.487180776465805e-06, "loss": 0.2848317623138428, "step": 5129 }, { "epoch": 1.267605633802817, "grad_norm": 1.4976669655925612, "learning_rate": 6.483355633303998e-06, "loss": 0.345126748085022, "step": 5130 }, { "epoch": 1.2678527304175933, "grad_norm": 1.4561031366023065, "learning_rate": 6.479531077210277e-06, "loss": 0.319659560918808, "step": 5131 }, { "epoch": 1.2680998270323696, "grad_norm": 1.453835605470687, "learning_rate": 6.4757071088231064e-06, "loss": 0.324850857257843, "step": 5132 }, { "epoch": 1.268346923647146, "grad_norm": 1.4525815086927336, "learning_rate": 6.471883728780859e-06, "loss": 0.3215321898460388, "step": 5133 }, { "epoch": 1.2685940202619224, "grad_norm": 1.3256379831875416, "learning_rate": 6.468060937721812e-06, "loss": 0.26367616653442383, "step": 5134 }, { "epoch": 1.2688411168766989, "grad_norm": 1.3782233624951434, "learning_rate": 6.464238736284138e-06, "loss": 0.2669733464717865, "step": 5135 }, { "epoch": 1.2690882134914752, "grad_norm": 1.3568673427791407, "learning_rate": 6.460417125105919e-06, "loss": 0.2694673538208008, "step": 5136 }, { "epoch": 1.2693353101062517, "grad_norm": 1.3891590830348712, "learning_rate": 6.45659610482513e-06, "loss": 0.3054199516773224, "step": 5137 }, { "epoch": 1.269582406721028, "grad_norm": 1.2058742236267392, "learning_rate": 6.452775676079659e-06, "loss": 0.26638638973236084, "step": 5138 }, { "epoch": 1.2698295033358042, "grad_norm": 1.2487502827271793, "learning_rate": 6.44895583950728e-06, "loss": 0.27116119861602783, "step": 5139 }, { "epoch": 1.2700765999505808, "grad_norm": 1.3718382022730242, "learning_rate": 6.445136595745677e-06, "loss": 0.27424222230911255, "step": 5140 }, { "epoch": 1.270323696565357, "grad_norm": 1.4039720371427145, "learning_rate": 6.441317945432437e-06, "loss": 0.3535342514514923, "step": 5141 }, { "epoch": 1.2705707931801333, "grad_norm": 1.3031845914563478, "learning_rate": 6.437499889205042e-06, "loss": 0.2885025143623352, "step": 5142 }, { "epoch": 1.2708178897949098, "grad_norm": 1.2062289432965088, "learning_rate": 6.4336824277008814e-06, "loss": 0.251611590385437, "step": 5143 }, { "epoch": 1.2710649864096861, "grad_norm": 1.4378635132386943, "learning_rate": 6.429865561557237e-06, "loss": 0.32021504640579224, "step": 5144 }, { "epoch": 1.2713120830244626, "grad_norm": 1.2595583595488558, "learning_rate": 6.426049291411302e-06, "loss": 0.26914551854133606, "step": 5145 }, { "epoch": 1.271559179639239, "grad_norm": 1.271178214542162, "learning_rate": 6.422233617900158e-06, "loss": 0.29364103078842163, "step": 5146 }, { "epoch": 1.2718062762540154, "grad_norm": 1.3709538458156605, "learning_rate": 6.418418541660795e-06, "loss": 0.27369529008865356, "step": 5147 }, { "epoch": 1.2720533728687917, "grad_norm": 1.488225509524443, "learning_rate": 6.414604063330102e-06, "loss": 0.3314514756202698, "step": 5148 }, { "epoch": 1.272300469483568, "grad_norm": 1.3977529558536168, "learning_rate": 6.410790183544865e-06, "loss": 0.32261115312576294, "step": 5149 }, { "epoch": 1.2725475660983445, "grad_norm": 1.4735462446113774, "learning_rate": 6.406976902941777e-06, "loss": 0.31486132740974426, "step": 5150 }, { "epoch": 1.2727946627131208, "grad_norm": 1.5820983737835124, "learning_rate": 6.403164222157421e-06, "loss": 0.2643354535102844, "step": 5151 }, { "epoch": 1.273041759327897, "grad_norm": 1.3565525802712681, "learning_rate": 6.399352141828293e-06, "loss": 0.31483903527259827, "step": 5152 }, { "epoch": 1.2732888559426736, "grad_norm": 1.3192100800205557, "learning_rate": 6.3955406625907745e-06, "loss": 0.2870510220527649, "step": 5153 }, { "epoch": 1.27353595255745, "grad_norm": 1.5418280420302592, "learning_rate": 6.3917297850811535e-06, "loss": 0.2878255248069763, "step": 5154 }, { "epoch": 1.2737830491722264, "grad_norm": 1.396730736292871, "learning_rate": 6.3879195099356205e-06, "loss": 0.308079332113266, "step": 5155 }, { "epoch": 1.2740301457870027, "grad_norm": 1.338360942938848, "learning_rate": 6.384109837790258e-06, "loss": 0.28426632285118103, "step": 5156 }, { "epoch": 1.2742772424017792, "grad_norm": 1.3277122895975977, "learning_rate": 6.3803007692810585e-06, "loss": 0.25629135966300964, "step": 5157 }, { "epoch": 1.2745243390165555, "grad_norm": 1.3270023766778651, "learning_rate": 6.3764923050439e-06, "loss": 0.2816929817199707, "step": 5158 }, { "epoch": 1.2747714356313318, "grad_norm": 1.2972718211850498, "learning_rate": 6.372684445714577e-06, "loss": 0.25049296021461487, "step": 5159 }, { "epoch": 1.2750185322461083, "grad_norm": 1.4941722898760132, "learning_rate": 6.3688771919287615e-06, "loss": 0.29294830560684204, "step": 5160 }, { "epoch": 1.2752656288608846, "grad_norm": 1.5298617077893617, "learning_rate": 6.36507054432204e-06, "loss": 0.3029410243034363, "step": 5161 }, { "epoch": 1.2755127254756609, "grad_norm": 1.568693572000342, "learning_rate": 6.361264503529898e-06, "loss": 0.3430333733558655, "step": 5162 }, { "epoch": 1.2757598220904374, "grad_norm": 1.24305354779566, "learning_rate": 6.357459070187708e-06, "loss": 0.22300365567207336, "step": 5163 }, { "epoch": 1.2760069187052139, "grad_norm": 1.4211154683842333, "learning_rate": 6.353654244930756e-06, "loss": 0.3553996682167053, "step": 5164 }, { "epoch": 1.2762540153199902, "grad_norm": 1.4331886168221983, "learning_rate": 6.349850028394213e-06, "loss": 0.24587881565093994, "step": 5165 }, { "epoch": 1.2765011119347665, "grad_norm": 1.4243449957639134, "learning_rate": 6.346046421213161e-06, "loss": 0.30397114157676697, "step": 5166 }, { "epoch": 1.276748208549543, "grad_norm": 1.3872338169546923, "learning_rate": 6.3422434240225675e-06, "loss": 0.2829134166240692, "step": 5167 }, { "epoch": 1.2769953051643192, "grad_norm": 1.3239070606429075, "learning_rate": 6.338441037457304e-06, "loss": 0.2473117709159851, "step": 5168 }, { "epoch": 1.2772424017790955, "grad_norm": 1.2943093495665816, "learning_rate": 6.334639262152145e-06, "loss": 0.2388172149658203, "step": 5169 }, { "epoch": 1.277489498393872, "grad_norm": 1.4903920231234746, "learning_rate": 6.330838098741751e-06, "loss": 0.29762572050094604, "step": 5170 }, { "epoch": 1.2777365950086483, "grad_norm": 1.3324132508940514, "learning_rate": 6.327037547860697e-06, "loss": 0.26598021388053894, "step": 5171 }, { "epoch": 1.2779836916234246, "grad_norm": 1.5773814316709152, "learning_rate": 6.323237610143445e-06, "loss": 0.3065432608127594, "step": 5172 }, { "epoch": 1.2782307882382011, "grad_norm": 1.2951801388276376, "learning_rate": 6.319438286224346e-06, "loss": 0.2473432719707489, "step": 5173 }, { "epoch": 1.2784778848529776, "grad_norm": 1.3735425358987445, "learning_rate": 6.315639576737669e-06, "loss": 0.28777092695236206, "step": 5174 }, { "epoch": 1.278724981467754, "grad_norm": 1.4684897036415594, "learning_rate": 6.311841482317564e-06, "loss": 0.3439573049545288, "step": 5175 }, { "epoch": 1.2789720780825302, "grad_norm": 1.3121472598725006, "learning_rate": 6.308044003598088e-06, "loss": 0.29013729095458984, "step": 5176 }, { "epoch": 1.2792191746973067, "grad_norm": 2.6304904870155816, "learning_rate": 6.30424714121319e-06, "loss": 0.24418270587921143, "step": 5177 }, { "epoch": 1.279466271312083, "grad_norm": 1.4064041753590242, "learning_rate": 6.3004508957967195e-06, "loss": 0.2857142984867096, "step": 5178 }, { "epoch": 1.2797133679268593, "grad_norm": 1.3071309523681311, "learning_rate": 6.296655267982421e-06, "loss": 0.29760339856147766, "step": 5179 }, { "epoch": 1.2799604645416358, "grad_norm": 1.1861591090601666, "learning_rate": 6.29286025840393e-06, "loss": 0.28186243772506714, "step": 5180 }, { "epoch": 1.280207561156412, "grad_norm": 1.4329302587767387, "learning_rate": 6.289065867694793e-06, "loss": 0.3343771696090698, "step": 5181 }, { "epoch": 1.2804546577711886, "grad_norm": 1.5021204012063545, "learning_rate": 6.2852720964884375e-06, "loss": 0.3142203092575073, "step": 5182 }, { "epoch": 1.280701754385965, "grad_norm": 1.3334551466586555, "learning_rate": 6.281478945418202e-06, "loss": 0.3111079931259155, "step": 5183 }, { "epoch": 1.2809488510007414, "grad_norm": 1.3072897691104994, "learning_rate": 6.2776864151173086e-06, "loss": 0.29595059156417847, "step": 5184 }, { "epoch": 1.2811959476155177, "grad_norm": 1.4043377244761852, "learning_rate": 6.273894506218888e-06, "loss": 0.291840523481369, "step": 5185 }, { "epoch": 1.281443044230294, "grad_norm": 1.3438013932284008, "learning_rate": 6.270103219355958e-06, "loss": 0.2883923649787903, "step": 5186 }, { "epoch": 1.2816901408450705, "grad_norm": 1.3018242029838711, "learning_rate": 6.266312555161434e-06, "loss": 0.2627318203449249, "step": 5187 }, { "epoch": 1.2819372374598468, "grad_norm": 1.4183178142486004, "learning_rate": 6.26252251426813e-06, "loss": 0.2643488049507141, "step": 5188 }, { "epoch": 1.282184334074623, "grad_norm": 3.5156643926412743, "learning_rate": 6.258733097308754e-06, "loss": 0.293042927980423, "step": 5189 }, { "epoch": 1.2824314306893996, "grad_norm": 1.3886717657011882, "learning_rate": 6.254944304915911e-06, "loss": 0.2900661826133728, "step": 5190 }, { "epoch": 1.2826785273041759, "grad_norm": 1.2762926395064098, "learning_rate": 6.251156137722102e-06, "loss": 0.24224036931991577, "step": 5191 }, { "epoch": 1.2829256239189524, "grad_norm": 1.3175522641087953, "learning_rate": 6.247368596359724e-06, "loss": 0.2823917269706726, "step": 5192 }, { "epoch": 1.2831727205337287, "grad_norm": 1.2472810230181925, "learning_rate": 6.24358168146107e-06, "loss": 0.24493879079818726, "step": 5193 }, { "epoch": 1.2834198171485052, "grad_norm": 1.2742387044072272, "learning_rate": 6.239795393658321e-06, "loss": 0.29645276069641113, "step": 5194 }, { "epoch": 1.2836669137632815, "grad_norm": 1.3097859433552401, "learning_rate": 6.236009733583566e-06, "loss": 0.32054299116134644, "step": 5195 }, { "epoch": 1.2839140103780577, "grad_norm": 1.370843010329523, "learning_rate": 6.232224701868775e-06, "loss": 0.30203568935394287, "step": 5196 }, { "epoch": 1.2841611069928343, "grad_norm": 1.4542173813859856, "learning_rate": 6.228440299145828e-06, "loss": 0.2781713902950287, "step": 5197 }, { "epoch": 1.2844082036076105, "grad_norm": 1.4372325527170158, "learning_rate": 6.2246565260464885e-06, "loss": 0.32265186309814453, "step": 5198 }, { "epoch": 1.2846553002223868, "grad_norm": 1.3557781234049617, "learning_rate": 6.220873383202421e-06, "loss": 0.2560783326625824, "step": 5199 }, { "epoch": 1.2849023968371633, "grad_norm": 1.3138415103138539, "learning_rate": 6.217090871245185e-06, "loss": 0.3331412672996521, "step": 5200 }, { "epoch": 1.2851494934519396, "grad_norm": 1.2538431001911714, "learning_rate": 6.213308990806227e-06, "loss": 0.2858453691005707, "step": 5201 }, { "epoch": 1.2853965900667161, "grad_norm": 1.519066457644177, "learning_rate": 6.209527742516894e-06, "loss": 0.2573016285896301, "step": 5202 }, { "epoch": 1.2856436866814924, "grad_norm": 1.3046016701698566, "learning_rate": 6.205747127008432e-06, "loss": 0.2516292929649353, "step": 5203 }, { "epoch": 1.285890783296269, "grad_norm": 1.4128876415242204, "learning_rate": 6.20196714491197e-06, "loss": 0.2803135812282562, "step": 5204 }, { "epoch": 1.2861378799110452, "grad_norm": 1.2845447467491542, "learning_rate": 6.198187796858544e-06, "loss": 0.2408480942249298, "step": 5205 }, { "epoch": 1.2863849765258215, "grad_norm": 1.4276795688374762, "learning_rate": 6.194409083479073e-06, "loss": 0.2957145571708679, "step": 5206 }, { "epoch": 1.286632073140598, "grad_norm": 1.3373605778909692, "learning_rate": 6.19063100540438e-06, "loss": 0.28084874153137207, "step": 5207 }, { "epoch": 1.2868791697553743, "grad_norm": 1.3880802950525093, "learning_rate": 6.1868535632651715e-06, "loss": 0.3040139973163605, "step": 5208 }, { "epoch": 1.2871262663701506, "grad_norm": 1.2647131400385554, "learning_rate": 6.1830767576920535e-06, "loss": 0.26831167936325073, "step": 5209 }, { "epoch": 1.287373362984927, "grad_norm": 1.6153407392695724, "learning_rate": 6.179300589315529e-06, "loss": 0.29411810636520386, "step": 5210 }, { "epoch": 1.2876204595997036, "grad_norm": 1.23854314912956, "learning_rate": 6.175525058765988e-06, "loss": 0.25099068880081177, "step": 5211 }, { "epoch": 1.28786755621448, "grad_norm": 1.4265235002750514, "learning_rate": 6.171750166673718e-06, "loss": 0.2542867660522461, "step": 5212 }, { "epoch": 1.2881146528292562, "grad_norm": 1.314563269708105, "learning_rate": 6.167975913668898e-06, "loss": 0.298580527305603, "step": 5213 }, { "epoch": 1.2883617494440327, "grad_norm": 1.2989438789823606, "learning_rate": 6.164202300381606e-06, "loss": 0.28697526454925537, "step": 5214 }, { "epoch": 1.288608846058809, "grad_norm": 1.2606279835854772, "learning_rate": 6.160429327441801e-06, "loss": 0.2513843774795532, "step": 5215 }, { "epoch": 1.2888559426735853, "grad_norm": 1.480006838039208, "learning_rate": 6.156656995479347e-06, "loss": 0.32752755284309387, "step": 5216 }, { "epoch": 1.2891030392883618, "grad_norm": 1.5004919810860573, "learning_rate": 6.152885305123994e-06, "loss": 0.3334256410598755, "step": 5217 }, { "epoch": 1.289350135903138, "grad_norm": 1.4519227364430358, "learning_rate": 6.149114257005391e-06, "loss": 0.3293655514717102, "step": 5218 }, { "epoch": 1.2895972325179146, "grad_norm": 1.408833059036662, "learning_rate": 6.145343851753074e-06, "loss": 0.24722251296043396, "step": 5219 }, { "epoch": 1.2898443291326909, "grad_norm": 1.3692923014637983, "learning_rate": 6.141574089996472e-06, "loss": 0.2891196310520172, "step": 5220 }, { "epoch": 1.2900914257474674, "grad_norm": 1.4142153174662215, "learning_rate": 6.137804972364915e-06, "loss": 0.268345445394516, "step": 5221 }, { "epoch": 1.2903385223622437, "grad_norm": 1.4164355560409059, "learning_rate": 6.134036499487612e-06, "loss": 0.2814798653125763, "step": 5222 }, { "epoch": 1.29058561897702, "grad_norm": 1.319381666388738, "learning_rate": 6.130268671993672e-06, "loss": 0.2723105549812317, "step": 5223 }, { "epoch": 1.2908327155917965, "grad_norm": 1.368459626260126, "learning_rate": 6.1265014905121e-06, "loss": 0.24068154394626617, "step": 5224 }, { "epoch": 1.2910798122065728, "grad_norm": 1.4458590274418983, "learning_rate": 6.122734955671783e-06, "loss": 0.29632899165153503, "step": 5225 }, { "epoch": 1.291326908821349, "grad_norm": 1.5511090783627297, "learning_rate": 6.118969068101512e-06, "loss": 0.31104397773742676, "step": 5226 }, { "epoch": 1.2915740054361255, "grad_norm": 1.2640470222966897, "learning_rate": 6.115203828429958e-06, "loss": 0.23458042740821838, "step": 5227 }, { "epoch": 1.2918211020509018, "grad_norm": 1.3533459656258904, "learning_rate": 6.111439237285697e-06, "loss": 0.27192482352256775, "step": 5228 }, { "epoch": 1.2920681986656783, "grad_norm": 1.38217790337319, "learning_rate": 6.1076752952971805e-06, "loss": 0.2760159373283386, "step": 5229 }, { "epoch": 1.2923152952804546, "grad_norm": 1.2239536953847614, "learning_rate": 6.103912003092763e-06, "loss": 0.23987004160881042, "step": 5230 }, { "epoch": 1.2925623918952311, "grad_norm": 1.6575302411661093, "learning_rate": 6.100149361300691e-06, "loss": 0.3356543481349945, "step": 5231 }, { "epoch": 1.2928094885100074, "grad_norm": 1.4871570640144065, "learning_rate": 6.096387370549097e-06, "loss": 0.2994697093963623, "step": 5232 }, { "epoch": 1.2930565851247837, "grad_norm": 1.4311476097543772, "learning_rate": 6.092626031466009e-06, "loss": 0.2619640827178955, "step": 5233 }, { "epoch": 1.2933036817395602, "grad_norm": 1.571108396814911, "learning_rate": 6.08886534467934e-06, "loss": 0.32854902744293213, "step": 5234 }, { "epoch": 1.2935507783543365, "grad_norm": 1.2252642352949195, "learning_rate": 6.085105310816905e-06, "loss": 0.20131613314151764, "step": 5235 }, { "epoch": 1.2937978749691128, "grad_norm": 1.7059333717633258, "learning_rate": 6.081345930506401e-06, "loss": 0.2547762393951416, "step": 5236 }, { "epoch": 1.2940449715838893, "grad_norm": 1.4981580971133708, "learning_rate": 6.077587204375413e-06, "loss": 0.30294013023376465, "step": 5237 }, { "epoch": 1.2942920681986656, "grad_norm": 1.391711367057114, "learning_rate": 6.07382913305143e-06, "loss": 0.29321181774139404, "step": 5238 }, { "epoch": 1.2945391648134421, "grad_norm": 1.5203792136326904, "learning_rate": 6.070071717161818e-06, "loss": 0.3192882537841797, "step": 5239 }, { "epoch": 1.2947862614282184, "grad_norm": 1.4155418252064167, "learning_rate": 6.066314957333844e-06, "loss": 0.2943974733352661, "step": 5240 }, { "epoch": 1.295033358042995, "grad_norm": 1.4433085477017693, "learning_rate": 6.06255885419466e-06, "loss": 0.2611016035079956, "step": 5241 }, { "epoch": 1.2952804546577712, "grad_norm": 1.3525942330244838, "learning_rate": 6.058803408371305e-06, "loss": 0.2784605622291565, "step": 5242 }, { "epoch": 1.2955275512725475, "grad_norm": 1.2752227958116915, "learning_rate": 6.055048620490718e-06, "loss": 0.2780715227127075, "step": 5243 }, { "epoch": 1.295774647887324, "grad_norm": 1.6918079220889521, "learning_rate": 6.051294491179718e-06, "loss": 0.2713841199874878, "step": 5244 }, { "epoch": 1.2960217445021003, "grad_norm": 1.3586508274485125, "learning_rate": 6.047541021065024e-06, "loss": 0.2517490088939667, "step": 5245 }, { "epoch": 1.2962688411168766, "grad_norm": 1.4500680217556041, "learning_rate": 6.043788210773233e-06, "loss": 0.324454128742218, "step": 5246 }, { "epoch": 1.296515937731653, "grad_norm": 1.4186211700470854, "learning_rate": 6.0400360609308466e-06, "loss": 0.2746371030807495, "step": 5247 }, { "epoch": 1.2967630343464296, "grad_norm": 1.381285854078908, "learning_rate": 6.036284572164245e-06, "loss": 0.2938871383666992, "step": 5248 }, { "epoch": 1.2970101309612059, "grad_norm": 1.4567559722942747, "learning_rate": 6.032533745099696e-06, "loss": 0.2915937900543213, "step": 5249 }, { "epoch": 1.2972572275759822, "grad_norm": 1.3823537320020953, "learning_rate": 6.028783580363369e-06, "loss": 0.2845075726509094, "step": 5250 }, { "epoch": 1.2975043241907587, "grad_norm": 1.186562165131949, "learning_rate": 6.025034078581311e-06, "loss": 0.2572725713253021, "step": 5251 }, { "epoch": 1.297751420805535, "grad_norm": 1.4501925968176566, "learning_rate": 6.0212852403794675e-06, "loss": 0.24432674050331116, "step": 5252 }, { "epoch": 1.2979985174203112, "grad_norm": 1.4663619465026652, "learning_rate": 6.017537066383665e-06, "loss": 0.28731769323349, "step": 5253 }, { "epoch": 1.2982456140350878, "grad_norm": 1.3936243785693672, "learning_rate": 6.013789557219627e-06, "loss": 0.26722967624664307, "step": 5254 }, { "epoch": 1.298492710649864, "grad_norm": 1.4403400089268867, "learning_rate": 6.010042713512963e-06, "loss": 0.31001389026641846, "step": 5255 }, { "epoch": 1.2987398072646406, "grad_norm": 1.5316701879627441, "learning_rate": 6.006296535889162e-06, "loss": 0.32783663272857666, "step": 5256 }, { "epoch": 1.2989869038794168, "grad_norm": 1.255408913824229, "learning_rate": 6.00255102497362e-06, "loss": 0.293109655380249, "step": 5257 }, { "epoch": 1.2992340004941934, "grad_norm": 1.3260720484855353, "learning_rate": 5.998806181391606e-06, "loss": 0.31017374992370605, "step": 5258 }, { "epoch": 1.2994810971089696, "grad_norm": 1.330286846278817, "learning_rate": 5.9950620057682865e-06, "loss": 0.2656733989715576, "step": 5259 }, { "epoch": 1.299728193723746, "grad_norm": 1.3483534062560618, "learning_rate": 5.991318498728712e-06, "loss": 0.2620123028755188, "step": 5260 }, { "epoch": 1.2999752903385224, "grad_norm": 1.3284527294987944, "learning_rate": 5.9875756608978256e-06, "loss": 0.28670448064804077, "step": 5261 }, { "epoch": 1.3002223869532987, "grad_norm": 1.323476685480727, "learning_rate": 5.983833492900455e-06, "loss": 0.30729496479034424, "step": 5262 }, { "epoch": 1.300469483568075, "grad_norm": 1.2846232296436255, "learning_rate": 5.980091995361314e-06, "loss": 0.2795347571372986, "step": 5263 }, { "epoch": 1.3007165801828515, "grad_norm": 1.3540191976207518, "learning_rate": 5.976351168905012e-06, "loss": 0.24315482378005981, "step": 5264 }, { "epoch": 1.3009636767976278, "grad_norm": 1.3590427192329022, "learning_rate": 5.972611014156038e-06, "loss": 0.2912958562374115, "step": 5265 }, { "epoch": 1.3012107734124043, "grad_norm": 1.2487382684612973, "learning_rate": 5.968871531738777e-06, "loss": 0.24254709482192993, "step": 5266 }, { "epoch": 1.3014578700271806, "grad_norm": 1.5639852235593694, "learning_rate": 5.965132722277494e-06, "loss": 0.3129711151123047, "step": 5267 }, { "epoch": 1.3017049666419571, "grad_norm": 1.3307915505772574, "learning_rate": 5.961394586396348e-06, "loss": 0.23365119099617004, "step": 5268 }, { "epoch": 1.3019520632567334, "grad_norm": 1.1504671059413512, "learning_rate": 5.957657124719385e-06, "loss": 0.22603382170200348, "step": 5269 }, { "epoch": 1.3021991598715097, "grad_norm": 1.3880632622072586, "learning_rate": 5.953920337870528e-06, "loss": 0.2920875549316406, "step": 5270 }, { "epoch": 1.3024462564862862, "grad_norm": 1.4169194893452748, "learning_rate": 5.950184226473603e-06, "loss": 0.307807594537735, "step": 5271 }, { "epoch": 1.3026933531010625, "grad_norm": 1.2065865954350412, "learning_rate": 5.946448791152312e-06, "loss": 0.2658208906650543, "step": 5272 }, { "epoch": 1.3029404497158388, "grad_norm": 1.363128313607036, "learning_rate": 5.94271403253025e-06, "loss": 0.3074694275856018, "step": 5273 }, { "epoch": 1.3031875463306153, "grad_norm": 1.4919395359465113, "learning_rate": 5.938979951230896e-06, "loss": 0.345661997795105, "step": 5274 }, { "epoch": 1.3034346429453916, "grad_norm": 1.3556021260716318, "learning_rate": 5.93524654787762e-06, "loss": 0.28022927045822144, "step": 5275 }, { "epoch": 1.303681739560168, "grad_norm": 1.493817461677632, "learning_rate": 5.931513823093676e-06, "loss": 0.327042818069458, "step": 5276 }, { "epoch": 1.3039288361749444, "grad_norm": 1.5166025691049965, "learning_rate": 5.927781777502195e-06, "loss": 0.2911815941333771, "step": 5277 }, { "epoch": 1.3041759327897209, "grad_norm": 1.3690327739460317, "learning_rate": 5.924050411726215e-06, "loss": 0.2914287745952606, "step": 5278 }, { "epoch": 1.3044230294044972, "grad_norm": 1.529912122145041, "learning_rate": 5.920319726388644e-06, "loss": 0.2926628291606903, "step": 5279 }, { "epoch": 1.3046701260192735, "grad_norm": 1.3371035558118838, "learning_rate": 5.916589722112284e-06, "loss": 0.2570241391658783, "step": 5280 }, { "epoch": 1.30491722263405, "grad_norm": 1.3772311284851495, "learning_rate": 5.912860399519822e-06, "loss": 0.25069811940193176, "step": 5281 }, { "epoch": 1.3051643192488263, "grad_norm": 1.3277578341919118, "learning_rate": 5.909131759233829e-06, "loss": 0.27413448691368103, "step": 5282 }, { "epoch": 1.3054114158636025, "grad_norm": 1.5520964470561749, "learning_rate": 5.905403801876768e-06, "loss": 0.3266153633594513, "step": 5283 }, { "epoch": 1.305658512478379, "grad_norm": 1.3987988944125516, "learning_rate": 5.901676528070974e-06, "loss": 0.3016653060913086, "step": 5284 }, { "epoch": 1.3059056090931556, "grad_norm": 1.422929700102426, "learning_rate": 5.897949938438687e-06, "loss": 0.2555871605873108, "step": 5285 }, { "epoch": 1.3061527057079318, "grad_norm": 1.3531861800429645, "learning_rate": 5.894224033602018e-06, "loss": 0.30721384286880493, "step": 5286 }, { "epoch": 1.3063998023227081, "grad_norm": 1.4465771981209599, "learning_rate": 5.8904988141829726e-06, "loss": 0.2911462187767029, "step": 5287 }, { "epoch": 1.3066468989374846, "grad_norm": 1.3146566576970722, "learning_rate": 5.886774280803434e-06, "loss": 0.29867100715637207, "step": 5288 }, { "epoch": 1.306893995552261, "grad_norm": 1.3427536928219703, "learning_rate": 5.883050434085179e-06, "loss": 0.2505491375923157, "step": 5289 }, { "epoch": 1.3071410921670372, "grad_norm": 1.343820091240789, "learning_rate": 5.879327274649868e-06, "loss": 0.26677215099334717, "step": 5290 }, { "epoch": 1.3073881887818137, "grad_norm": 1.4497789300622572, "learning_rate": 5.875604803119039e-06, "loss": 0.28487056493759155, "step": 5291 }, { "epoch": 1.30763528539659, "grad_norm": 1.4418965612322743, "learning_rate": 5.871883020114121e-06, "loss": 0.2862715721130371, "step": 5292 }, { "epoch": 1.3078823820113663, "grad_norm": 1.3996464102423627, "learning_rate": 5.868161926256433e-06, "loss": 0.29136186838150024, "step": 5293 }, { "epoch": 1.3081294786261428, "grad_norm": 1.4543229808169067, "learning_rate": 5.864441522167168e-06, "loss": 0.30573201179504395, "step": 5294 }, { "epoch": 1.3083765752409193, "grad_norm": 1.540307757355147, "learning_rate": 5.860721808467416e-06, "loss": 0.2686055898666382, "step": 5295 }, { "epoch": 1.3086236718556956, "grad_norm": 1.3865453946710702, "learning_rate": 5.857002785778139e-06, "loss": 0.24403464794158936, "step": 5296 }, { "epoch": 1.308870768470472, "grad_norm": 1.2375243971885714, "learning_rate": 5.8532844547201975e-06, "loss": 0.22884806990623474, "step": 5297 }, { "epoch": 1.3091178650852484, "grad_norm": 1.3530640358098744, "learning_rate": 5.849566815914322e-06, "loss": 0.3302457332611084, "step": 5298 }, { "epoch": 1.3093649617000247, "grad_norm": 1.5591685867091156, "learning_rate": 5.845849869981137e-06, "loss": 0.29122239351272583, "step": 5299 }, { "epoch": 1.309612058314801, "grad_norm": 1.5372258030880122, "learning_rate": 5.8421336175411505e-06, "loss": 0.3194583058357239, "step": 5300 }, { "epoch": 1.3098591549295775, "grad_norm": 1.4524843572115889, "learning_rate": 5.838418059214751e-06, "loss": 0.26929181814193726, "step": 5301 }, { "epoch": 1.3101062515443538, "grad_norm": 1.2590153716301418, "learning_rate": 5.8347031956222135e-06, "loss": 0.23526468873023987, "step": 5302 }, { "epoch": 1.3103533481591303, "grad_norm": 1.4058660262951472, "learning_rate": 5.830989027383701e-06, "loss": 0.30905404686927795, "step": 5303 }, { "epoch": 1.3106004447739066, "grad_norm": 1.3547828163525206, "learning_rate": 5.827275555119254e-06, "loss": 0.27085644006729126, "step": 5304 }, { "epoch": 1.310847541388683, "grad_norm": 1.401493833779058, "learning_rate": 5.823562779448792e-06, "loss": 0.2431168556213379, "step": 5305 }, { "epoch": 1.3110946380034594, "grad_norm": 1.5231712873045944, "learning_rate": 5.8198507009921325e-06, "loss": 0.285494863986969, "step": 5306 }, { "epoch": 1.3113417346182357, "grad_norm": 1.4720353258504886, "learning_rate": 5.816139320368966e-06, "loss": 0.28202158212661743, "step": 5307 }, { "epoch": 1.3115888312330122, "grad_norm": 1.6726825625762767, "learning_rate": 5.812428638198877e-06, "loss": 0.3025549054145813, "step": 5308 }, { "epoch": 1.3118359278477885, "grad_norm": 1.4758837288496547, "learning_rate": 5.8087186551013166e-06, "loss": 0.24956506490707397, "step": 5309 }, { "epoch": 1.3120830244625648, "grad_norm": 1.3392866000670374, "learning_rate": 5.805009371695636e-06, "loss": 0.2782644033432007, "step": 5310 }, { "epoch": 1.3123301210773413, "grad_norm": 1.4332795281414479, "learning_rate": 5.801300788601056e-06, "loss": 0.278215229511261, "step": 5311 }, { "epoch": 1.3125772176921175, "grad_norm": 1.4965589676775153, "learning_rate": 5.797592906436691e-06, "loss": 0.2735884189605713, "step": 5312 }, { "epoch": 1.312824314306894, "grad_norm": 1.2411151135356564, "learning_rate": 5.793885725821537e-06, "loss": 0.25108838081359863, "step": 5313 }, { "epoch": 1.3130714109216703, "grad_norm": 1.3010698078468521, "learning_rate": 5.790179247374461e-06, "loss": 0.2799219489097595, "step": 5314 }, { "epoch": 1.3133185075364469, "grad_norm": 1.4361705787922916, "learning_rate": 5.7864734717142305e-06, "loss": 0.31815648078918457, "step": 5315 }, { "epoch": 1.3135656041512231, "grad_norm": 1.3799973859878507, "learning_rate": 5.782768399459482e-06, "loss": 0.271809846162796, "step": 5316 }, { "epoch": 1.3138127007659994, "grad_norm": 1.410069913599167, "learning_rate": 5.779064031228747e-06, "loss": 0.3394823968410492, "step": 5317 }, { "epoch": 1.314059797380776, "grad_norm": 1.3353246825872815, "learning_rate": 5.775360367640428e-06, "loss": 0.2971060872077942, "step": 5318 }, { "epoch": 1.3143068939955522, "grad_norm": 1.5467169045300393, "learning_rate": 5.7716574093128086e-06, "loss": 0.34420841932296753, "step": 5319 }, { "epoch": 1.3145539906103285, "grad_norm": 1.4692313853189776, "learning_rate": 5.767955156864064e-06, "loss": 0.2593657076358795, "step": 5320 }, { "epoch": 1.314801087225105, "grad_norm": 1.4418349457756952, "learning_rate": 5.7642536109122495e-06, "loss": 0.29342082142829895, "step": 5321 }, { "epoch": 1.3150481838398813, "grad_norm": 1.417312721337836, "learning_rate": 5.760552772075304e-06, "loss": 0.3095685839653015, "step": 5322 }, { "epoch": 1.3152952804546578, "grad_norm": 1.2290141684650142, "learning_rate": 5.756852640971036e-06, "loss": 0.28143933415412903, "step": 5323 }, { "epoch": 1.315542377069434, "grad_norm": 1.4883047933846736, "learning_rate": 5.7531532182171536e-06, "loss": 0.28146469593048096, "step": 5324 }, { "epoch": 1.3157894736842106, "grad_norm": 1.4571749373853666, "learning_rate": 5.749454504431229e-06, "loss": 0.2581382393836975, "step": 5325 }, { "epoch": 1.316036570298987, "grad_norm": 1.3754951698652405, "learning_rate": 5.7457565002307295e-06, "loss": 0.26752033829689026, "step": 5326 }, { "epoch": 1.3162836669137632, "grad_norm": 1.3157091592871109, "learning_rate": 5.742059206233004e-06, "loss": 0.29940682649612427, "step": 5327 }, { "epoch": 1.3165307635285397, "grad_norm": 1.3282028978134008, "learning_rate": 5.738362623055269e-06, "loss": 0.27930647134780884, "step": 5328 }, { "epoch": 1.316777860143316, "grad_norm": 1.3472644636778528, "learning_rate": 5.734666751314636e-06, "loss": 0.2697816789150238, "step": 5329 }, { "epoch": 1.3170249567580923, "grad_norm": 1.465158758776522, "learning_rate": 5.730971591628094e-06, "loss": 0.2859431207180023, "step": 5330 }, { "epoch": 1.3172720533728688, "grad_norm": 1.2811329036461934, "learning_rate": 5.727277144612515e-06, "loss": 0.2766760289669037, "step": 5331 }, { "epoch": 1.3175191499876453, "grad_norm": 1.4979756602077345, "learning_rate": 5.7235834108846474e-06, "loss": 0.2697315216064453, "step": 5332 }, { "epoch": 1.3177662466024216, "grad_norm": 1.337934688957454, "learning_rate": 5.719890391061117e-06, "loss": 0.25444990396499634, "step": 5333 }, { "epoch": 1.3180133432171979, "grad_norm": 1.4460099305756893, "learning_rate": 5.716198085758441e-06, "loss": 0.33496347069740295, "step": 5334 }, { "epoch": 1.3182604398319744, "grad_norm": 1.1759876965623226, "learning_rate": 5.712506495593011e-06, "loss": 0.22816796600818634, "step": 5335 }, { "epoch": 1.3185075364467507, "grad_norm": 1.334645286725464, "learning_rate": 5.708815621181105e-06, "loss": 0.27799755334854126, "step": 5336 }, { "epoch": 1.318754633061527, "grad_norm": 1.6925520125353748, "learning_rate": 5.70512546313887e-06, "loss": 0.2532825171947479, "step": 5337 }, { "epoch": 1.3190017296763035, "grad_norm": 1.1895468178884847, "learning_rate": 5.7014360220823475e-06, "loss": 0.25825443863868713, "step": 5338 }, { "epoch": 1.3192488262910798, "grad_norm": 1.550061825293135, "learning_rate": 5.697747298627443e-06, "loss": 0.31399065256118774, "step": 5339 }, { "epoch": 1.3194959229058563, "grad_norm": 1.2262429883767663, "learning_rate": 5.694059293389958e-06, "loss": 0.2497171312570572, "step": 5340 }, { "epoch": 1.3197430195206326, "grad_norm": 1.4099491547705303, "learning_rate": 5.6903720069855695e-06, "loss": 0.3139837980270386, "step": 5341 }, { "epoch": 1.319990116135409, "grad_norm": 1.4628995627730086, "learning_rate": 5.686685440029826e-06, "loss": 0.33979135751724243, "step": 5342 }, { "epoch": 1.3202372127501854, "grad_norm": 1.3911575410334733, "learning_rate": 5.6829995931381665e-06, "loss": 0.27873024344444275, "step": 5343 }, { "epoch": 1.3204843093649616, "grad_norm": 1.369250194168039, "learning_rate": 5.679314466925905e-06, "loss": 0.27139416337013245, "step": 5344 }, { "epoch": 1.3207314059797381, "grad_norm": 1.3771009991650465, "learning_rate": 5.6756300620082394e-06, "loss": 0.27974164485931396, "step": 5345 }, { "epoch": 1.3209785025945144, "grad_norm": 1.3937428803814607, "learning_rate": 5.671946379000239e-06, "loss": 0.2854241728782654, "step": 5346 }, { "epoch": 1.3212255992092907, "grad_norm": 1.2938474856621478, "learning_rate": 5.6682634185168575e-06, "loss": 0.24283045530319214, "step": 5347 }, { "epoch": 1.3214726958240672, "grad_norm": 1.2718753529211102, "learning_rate": 5.664581181172925e-06, "loss": 0.2666167616844177, "step": 5348 }, { "epoch": 1.3217197924388435, "grad_norm": 1.2592745315674239, "learning_rate": 5.660899667583161e-06, "loss": 0.2870130240917206, "step": 5349 }, { "epoch": 1.32196688905362, "grad_norm": 1.2917932270323254, "learning_rate": 5.6572188783621565e-06, "loss": 0.2419617772102356, "step": 5350 }, { "epoch": 1.3222139856683963, "grad_norm": 1.6273663615092373, "learning_rate": 5.653538814124373e-06, "loss": 0.32821041345596313, "step": 5351 }, { "epoch": 1.3224610822831728, "grad_norm": 1.4028288885932185, "learning_rate": 5.64985947548417e-06, "loss": 0.29547086358070374, "step": 5352 }, { "epoch": 1.3227081788979491, "grad_norm": 1.279786948123848, "learning_rate": 5.6461808630557695e-06, "loss": 0.2521221339702606, "step": 5353 }, { "epoch": 1.3229552755127254, "grad_norm": 1.3043305194555774, "learning_rate": 5.642502977453278e-06, "loss": 0.29376402497291565, "step": 5354 }, { "epoch": 1.323202372127502, "grad_norm": 1.3674318959039369, "learning_rate": 5.638825819290687e-06, "loss": 0.331889808177948, "step": 5355 }, { "epoch": 1.3234494687422782, "grad_norm": 1.469246277263081, "learning_rate": 5.635149389181855e-06, "loss": 0.3556157946586609, "step": 5356 }, { "epoch": 1.3236965653570545, "grad_norm": 1.5279212683182182, "learning_rate": 5.631473687740527e-06, "loss": 0.3234179615974426, "step": 5357 }, { "epoch": 1.323943661971831, "grad_norm": 1.4220952327030274, "learning_rate": 5.627798715580324e-06, "loss": 0.31314152479171753, "step": 5358 }, { "epoch": 1.3241907585866073, "grad_norm": 1.1700797021720442, "learning_rate": 5.6241244733147475e-06, "loss": 0.2051088809967041, "step": 5359 }, { "epoch": 1.3244378552013838, "grad_norm": 1.4895544197432984, "learning_rate": 5.620450961557173e-06, "loss": 0.26663053035736084, "step": 5360 }, { "epoch": 1.32468495181616, "grad_norm": 1.366336279590068, "learning_rate": 5.616778180920853e-06, "loss": 0.2788234353065491, "step": 5361 }, { "epoch": 1.3249320484309366, "grad_norm": 1.40667501629188, "learning_rate": 5.613106132018924e-06, "loss": 0.3024321496486664, "step": 5362 }, { "epoch": 1.3251791450457129, "grad_norm": 1.5755872484631268, "learning_rate": 5.609434815464395e-06, "loss": 0.3324972689151764, "step": 5363 }, { "epoch": 1.3254262416604892, "grad_norm": 1.3995149004912173, "learning_rate": 5.605764231870162e-06, "loss": 0.2451857477426529, "step": 5364 }, { "epoch": 1.3256733382752657, "grad_norm": 1.466414656469143, "learning_rate": 5.602094381848983e-06, "loss": 0.30463147163391113, "step": 5365 }, { "epoch": 1.325920434890042, "grad_norm": 1.360998207577422, "learning_rate": 5.598425266013509e-06, "loss": 0.30644139647483826, "step": 5366 }, { "epoch": 1.3261675315048183, "grad_norm": 1.565429933012678, "learning_rate": 5.594756884976255e-06, "loss": 0.26824861764907837, "step": 5367 }, { "epoch": 1.3264146281195948, "grad_norm": 1.9085808785232348, "learning_rate": 5.591089239349623e-06, "loss": 0.2774592638015747, "step": 5368 }, { "epoch": 1.3266617247343713, "grad_norm": 1.593168432762873, "learning_rate": 5.587422329745894e-06, "loss": 0.29380369186401367, "step": 5369 }, { "epoch": 1.3269088213491476, "grad_norm": 1.209695903756024, "learning_rate": 5.583756156777213e-06, "loss": 0.2729087173938751, "step": 5370 }, { "epoch": 1.3271559179639238, "grad_norm": 1.3828928806601373, "learning_rate": 5.580090721055614e-06, "loss": 0.29280415177345276, "step": 5371 }, { "epoch": 1.3274030145787004, "grad_norm": 1.261113956097883, "learning_rate": 5.576426023193005e-06, "loss": 0.235487163066864, "step": 5372 }, { "epoch": 1.3276501111934766, "grad_norm": 1.3437576441474093, "learning_rate": 5.572762063801173e-06, "loss": 0.29954254627227783, "step": 5373 }, { "epoch": 1.327897207808253, "grad_norm": 1.3788694915882374, "learning_rate": 5.569098843491777e-06, "loss": 0.29520678520202637, "step": 5374 }, { "epoch": 1.3281443044230294, "grad_norm": 1.2971404969387479, "learning_rate": 5.565436362876348e-06, "loss": 0.2752380073070526, "step": 5375 }, { "epoch": 1.3283914010378057, "grad_norm": 1.2684099894577396, "learning_rate": 5.561774622566306e-06, "loss": 0.26432445645332336, "step": 5376 }, { "epoch": 1.3286384976525822, "grad_norm": 1.463180500944386, "learning_rate": 5.558113623172941e-06, "loss": 0.2032167911529541, "step": 5377 }, { "epoch": 1.3288855942673585, "grad_norm": 1.3815964125524625, "learning_rate": 5.5544533653074205e-06, "loss": 0.29602500796318054, "step": 5378 }, { "epoch": 1.329132690882135, "grad_norm": 1.2563556971826098, "learning_rate": 5.550793849580784e-06, "loss": 0.23392269015312195, "step": 5379 }, { "epoch": 1.3293797874969113, "grad_norm": 1.4664439547046684, "learning_rate": 5.547135076603955e-06, "loss": 0.3303859829902649, "step": 5380 }, { "epoch": 1.3296268841116876, "grad_norm": 1.523759121748342, "learning_rate": 5.543477046987723e-06, "loss": 0.30972838401794434, "step": 5381 }, { "epoch": 1.3298739807264641, "grad_norm": 1.3968563785021784, "learning_rate": 5.539819761342766e-06, "loss": 0.30717331171035767, "step": 5382 }, { "epoch": 1.3301210773412404, "grad_norm": 1.516456966622148, "learning_rate": 5.536163220279622e-06, "loss": 0.32240843772888184, "step": 5383 }, { "epoch": 1.3303681739560167, "grad_norm": 1.414985421299412, "learning_rate": 5.532507424408719e-06, "loss": 0.26544344425201416, "step": 5384 }, { "epoch": 1.3306152705707932, "grad_norm": 1.4753099612730745, "learning_rate": 5.528852374340352e-06, "loss": 0.2816266417503357, "step": 5385 }, { "epoch": 1.3308623671855695, "grad_norm": 1.3368708488206769, "learning_rate": 5.525198070684703e-06, "loss": 0.28293299674987793, "step": 5386 }, { "epoch": 1.331109463800346, "grad_norm": 1.4055893389841996, "learning_rate": 5.521544514051813e-06, "loss": 0.3107130229473114, "step": 5387 }, { "epoch": 1.3313565604151223, "grad_norm": 1.4009850359514882, "learning_rate": 5.517891705051604e-06, "loss": 0.26315945386886597, "step": 5388 }, { "epoch": 1.3316036570298988, "grad_norm": 1.5708016291904028, "learning_rate": 5.51423964429388e-06, "loss": 0.3007359802722931, "step": 5389 }, { "epoch": 1.331850753644675, "grad_norm": 1.432287437158351, "learning_rate": 5.510588332388313e-06, "loss": 0.28493446111679077, "step": 5390 }, { "epoch": 1.3320978502594514, "grad_norm": 1.5273293699090478, "learning_rate": 5.5069377699444585e-06, "loss": 0.31306061148643494, "step": 5391 }, { "epoch": 1.3323449468742279, "grad_norm": 1.184636451725919, "learning_rate": 5.5032879575717326e-06, "loss": 0.21492698788642883, "step": 5392 }, { "epoch": 1.3325920434890042, "grad_norm": 1.369138294813533, "learning_rate": 5.499638895879441e-06, "loss": 0.29012531042099, "step": 5393 }, { "epoch": 1.3328391401037805, "grad_norm": 1.354709594928187, "learning_rate": 5.495990585476751e-06, "loss": 0.24134881794452667, "step": 5394 }, { "epoch": 1.333086236718557, "grad_norm": 1.4028652809986324, "learning_rate": 5.492343026972715e-06, "loss": 0.2600637674331665, "step": 5395 }, { "epoch": 1.3333333333333333, "grad_norm": 1.434808317948657, "learning_rate": 5.4886962209762604e-06, "loss": 0.2645324170589447, "step": 5396 }, { "epoch": 1.3335804299481098, "grad_norm": 1.426616729923069, "learning_rate": 5.485050168096175e-06, "loss": 0.25983935594558716, "step": 5397 }, { "epoch": 1.333827526562886, "grad_norm": 1.5047133835548279, "learning_rate": 5.481404868941133e-06, "loss": 0.2735251784324646, "step": 5398 }, { "epoch": 1.3340746231776626, "grad_norm": 1.304255666400554, "learning_rate": 5.477760324119685e-06, "loss": 0.27205711603164673, "step": 5399 }, { "epoch": 1.3343217197924389, "grad_norm": 1.3840845838746798, "learning_rate": 5.474116534240251e-06, "loss": 0.2615079879760742, "step": 5400 }, { "epoch": 1.3345688164072151, "grad_norm": 1.4442805638185916, "learning_rate": 5.4704734999111195e-06, "loss": 0.3047043979167938, "step": 5401 }, { "epoch": 1.3348159130219917, "grad_norm": 1.2820189903627879, "learning_rate": 5.466831221740459e-06, "loss": 0.2436104714870453, "step": 5402 }, { "epoch": 1.335063009636768, "grad_norm": 1.3367596381191273, "learning_rate": 5.4631897003363085e-06, "loss": 0.28784453868865967, "step": 5403 }, { "epoch": 1.3353101062515442, "grad_norm": 1.458801030038268, "learning_rate": 5.459548936306589e-06, "loss": 0.2750442624092102, "step": 5404 }, { "epoch": 1.3355572028663207, "grad_norm": 1.5546270254855667, "learning_rate": 5.455908930259089e-06, "loss": 0.34218767285346985, "step": 5405 }, { "epoch": 1.3358042994810972, "grad_norm": 1.3331846079099172, "learning_rate": 5.452269682801465e-06, "loss": 0.29300910234451294, "step": 5406 }, { "epoch": 1.3360513960958735, "grad_norm": 1.3634573843757827, "learning_rate": 5.448631194541259e-06, "loss": 0.30820298194885254, "step": 5407 }, { "epoch": 1.3362984927106498, "grad_norm": 1.3828031397369778, "learning_rate": 5.444993466085872e-06, "loss": 0.29116761684417725, "step": 5408 }, { "epoch": 1.3365455893254263, "grad_norm": 1.3896261868627515, "learning_rate": 5.441356498042592e-06, "loss": 0.30034440755844116, "step": 5409 }, { "epoch": 1.3367926859402026, "grad_norm": 1.3017900747738544, "learning_rate": 5.437720291018576e-06, "loss": 0.2830389440059662, "step": 5410 }, { "epoch": 1.337039782554979, "grad_norm": 1.3905810553108413, "learning_rate": 5.4340848456208414e-06, "loss": 0.32898449897766113, "step": 5411 }, { "epoch": 1.3372868791697554, "grad_norm": 1.1323488805458284, "learning_rate": 5.430450162456295e-06, "loss": 0.18759958446025848, "step": 5412 }, { "epoch": 1.3375339757845317, "grad_norm": 1.5029683046230937, "learning_rate": 5.426816242131714e-06, "loss": 0.2971324920654297, "step": 5413 }, { "epoch": 1.337781072399308, "grad_norm": 1.4859545891359855, "learning_rate": 5.423183085253743e-06, "loss": 0.29727908968925476, "step": 5414 }, { "epoch": 1.3380281690140845, "grad_norm": 1.4640683721614458, "learning_rate": 5.4195506924288985e-06, "loss": 0.32837551832199097, "step": 5415 }, { "epoch": 1.338275265628861, "grad_norm": 1.2877273046867526, "learning_rate": 5.4159190642635685e-06, "loss": 0.24845987558364868, "step": 5416 }, { "epoch": 1.3385223622436373, "grad_norm": 1.4526704281721932, "learning_rate": 5.41228820136402e-06, "loss": 0.29502347111701965, "step": 5417 }, { "epoch": 1.3387694588584136, "grad_norm": 1.303057017767407, "learning_rate": 5.408658104336387e-06, "loss": 0.24848084151744843, "step": 5418 }, { "epoch": 1.33901655547319, "grad_norm": 1.3599294948738412, "learning_rate": 5.405028773786684e-06, "loss": 0.2862665057182312, "step": 5419 }, { "epoch": 1.3392636520879664, "grad_norm": 1.6420921673742923, "learning_rate": 5.401400210320782e-06, "loss": 0.3573988080024719, "step": 5420 }, { "epoch": 1.3395107487027427, "grad_norm": 1.2932451877467372, "learning_rate": 5.39777241454444e-06, "loss": 0.2432803511619568, "step": 5421 }, { "epoch": 1.3397578453175192, "grad_norm": 1.5085871858744173, "learning_rate": 5.394145387063274e-06, "loss": 0.3082028329372406, "step": 5422 }, { "epoch": 1.3400049419322955, "grad_norm": 1.2788792729678085, "learning_rate": 5.390519128482785e-06, "loss": 0.25965481996536255, "step": 5423 }, { "epoch": 1.340252038547072, "grad_norm": 1.2373268657618501, "learning_rate": 5.386893639408342e-06, "loss": 0.2510172724723816, "step": 5424 }, { "epoch": 1.3404991351618483, "grad_norm": 1.350166468289286, "learning_rate": 5.383268920445177e-06, "loss": 0.2740471363067627, "step": 5425 }, { "epoch": 1.3407462317766248, "grad_norm": 1.3837894249276494, "learning_rate": 5.379644972198403e-06, "loss": 0.3111949861049652, "step": 5426 }, { "epoch": 1.340993328391401, "grad_norm": 1.3429839303731033, "learning_rate": 5.376021795273002e-06, "loss": 0.25410300493240356, "step": 5427 }, { "epoch": 1.3412404250061774, "grad_norm": 1.5598380379406995, "learning_rate": 5.372399390273831e-06, "loss": 0.3154035210609436, "step": 5428 }, { "epoch": 1.3414875216209539, "grad_norm": 1.3043759851770245, "learning_rate": 5.368777757805608e-06, "loss": 0.2742840647697449, "step": 5429 }, { "epoch": 1.3417346182357301, "grad_norm": 1.5776089022221067, "learning_rate": 5.365156898472925e-06, "loss": 0.29192814230918884, "step": 5430 }, { "epoch": 1.3419817148505064, "grad_norm": 1.3286775690629937, "learning_rate": 5.361536812880252e-06, "loss": 0.3138309121131897, "step": 5431 }, { "epoch": 1.342228811465283, "grad_norm": 1.3970428567194217, "learning_rate": 5.357917501631925e-06, "loss": 0.27954286336898804, "step": 5432 }, { "epoch": 1.3424759080800592, "grad_norm": 1.4324416430014353, "learning_rate": 5.3542989653321564e-06, "loss": 0.28409045934677124, "step": 5433 }, { "epoch": 1.3427230046948357, "grad_norm": 1.3598509446943359, "learning_rate": 5.350681204585013e-06, "loss": 0.2938222289085388, "step": 5434 }, { "epoch": 1.342970101309612, "grad_norm": 1.4191060656765613, "learning_rate": 5.3470642199944556e-06, "loss": 0.2924020290374756, "step": 5435 }, { "epoch": 1.3432171979243885, "grad_norm": 1.3667596083825617, "learning_rate": 5.343448012164292e-06, "loss": 0.2824581265449524, "step": 5436 }, { "epoch": 1.3434642945391648, "grad_norm": 1.4933867315978226, "learning_rate": 5.3398325816982165e-06, "loss": 0.3229672908782959, "step": 5437 }, { "epoch": 1.3437113911539411, "grad_norm": 2.0345289725160955, "learning_rate": 5.336217929199792e-06, "loss": 0.3348033130168915, "step": 5438 }, { "epoch": 1.3439584877687176, "grad_norm": 1.6543617878921264, "learning_rate": 5.33260405527244e-06, "loss": 0.289614737033844, "step": 5439 }, { "epoch": 1.344205584383494, "grad_norm": 1.2698321671621888, "learning_rate": 5.3289909605194645e-06, "loss": 0.26399680972099304, "step": 5440 }, { "epoch": 1.3444526809982702, "grad_norm": 1.2877508584931012, "learning_rate": 5.325378645544036e-06, "loss": 0.29450005292892456, "step": 5441 }, { "epoch": 1.3446997776130467, "grad_norm": 1.3545743352165371, "learning_rate": 5.321767110949196e-06, "loss": 0.2949198782444, "step": 5442 }, { "epoch": 1.344946874227823, "grad_norm": 1.558267842203426, "learning_rate": 5.318156357337849e-06, "loss": 0.3530550003051758, "step": 5443 }, { "epoch": 1.3451939708425995, "grad_norm": 1.3306625462966641, "learning_rate": 5.314546385312773e-06, "loss": 0.2883753776550293, "step": 5444 }, { "epoch": 1.3454410674573758, "grad_norm": 1.366614832269312, "learning_rate": 5.310937195476618e-06, "loss": 0.24902969598770142, "step": 5445 }, { "epoch": 1.3456881640721523, "grad_norm": 2.3683045837627534, "learning_rate": 5.3073287884319e-06, "loss": 0.27745795249938965, "step": 5446 }, { "epoch": 1.3459352606869286, "grad_norm": 1.4625088351050413, "learning_rate": 5.3037211647810125e-06, "loss": 0.32824429869651794, "step": 5447 }, { "epoch": 1.3461823573017049, "grad_norm": 1.3615523992005452, "learning_rate": 5.3001143251262025e-06, "loss": 0.25964102149009705, "step": 5448 }, { "epoch": 1.3464294539164814, "grad_norm": 1.3283744928081471, "learning_rate": 5.296508270069604e-06, "loss": 0.2784181833267212, "step": 5449 }, { "epoch": 1.3466765505312577, "grad_norm": 1.2118056765954732, "learning_rate": 5.292903000213203e-06, "loss": 0.23070375621318817, "step": 5450 }, { "epoch": 1.346923647146034, "grad_norm": 1.6890822864589203, "learning_rate": 5.289298516158865e-06, "loss": 0.29599231481552124, "step": 5451 }, { "epoch": 1.3471707437608105, "grad_norm": 1.2587167199477645, "learning_rate": 5.285694818508329e-06, "loss": 0.2677992284297943, "step": 5452 }, { "epoch": 1.347417840375587, "grad_norm": 1.308561285337355, "learning_rate": 5.282091907863187e-06, "loss": 0.236444890499115, "step": 5453 }, { "epoch": 1.3476649369903633, "grad_norm": 1.2979029460424305, "learning_rate": 5.27848978482491e-06, "loss": 0.26413726806640625, "step": 5454 }, { "epoch": 1.3479120336051396, "grad_norm": 1.310322701481011, "learning_rate": 5.274888449994843e-06, "loss": 0.24636343121528625, "step": 5455 }, { "epoch": 1.348159130219916, "grad_norm": 1.2825966813762497, "learning_rate": 5.271287903974182e-06, "loss": 0.26796311140060425, "step": 5456 }, { "epoch": 1.3484062268346924, "grad_norm": 1.8322535173631544, "learning_rate": 5.26768814736401e-06, "loss": 0.3122444748878479, "step": 5457 }, { "epoch": 1.3486533234494686, "grad_norm": 1.2749877247083308, "learning_rate": 5.264089180765264e-06, "loss": 0.289396196603775, "step": 5458 }, { "epoch": 1.3489004200642452, "grad_norm": 1.4212818662994189, "learning_rate": 5.260491004778755e-06, "loss": 0.25959187746047974, "step": 5459 }, { "epoch": 1.3491475166790214, "grad_norm": 1.5810041635814671, "learning_rate": 5.256893620005166e-06, "loss": 0.3057370185852051, "step": 5460 }, { "epoch": 1.349394613293798, "grad_norm": 1.354269006896947, "learning_rate": 5.253297027045045e-06, "loss": 0.24550572037696838, "step": 5461 }, { "epoch": 1.3496417099085742, "grad_norm": 1.4604550634590128, "learning_rate": 5.249701226498805e-06, "loss": 0.3407202959060669, "step": 5462 }, { "epoch": 1.3498888065233507, "grad_norm": 1.442423125072844, "learning_rate": 5.246106218966721e-06, "loss": 0.2948229908943176, "step": 5463 }, { "epoch": 1.350135903138127, "grad_norm": 1.5437939080987957, "learning_rate": 5.24251200504895e-06, "loss": 0.25238272547721863, "step": 5464 }, { "epoch": 1.3503829997529033, "grad_norm": 1.4549225077302121, "learning_rate": 5.23891858534551e-06, "loss": 0.2741934061050415, "step": 5465 }, { "epoch": 1.3506300963676798, "grad_norm": 1.3508749236787922, "learning_rate": 5.235325960456288e-06, "loss": 0.23304711282253265, "step": 5466 }, { "epoch": 1.3508771929824561, "grad_norm": 2.9976376681264414, "learning_rate": 5.231734130981028e-06, "loss": 0.3053337335586548, "step": 5467 }, { "epoch": 1.3511242895972324, "grad_norm": 1.3097542706401757, "learning_rate": 5.228143097519355e-06, "loss": 0.24156346917152405, "step": 5468 }, { "epoch": 1.351371386212009, "grad_norm": 1.2250532213859906, "learning_rate": 5.224552860670758e-06, "loss": 0.21257561445236206, "step": 5469 }, { "epoch": 1.3516184828267852, "grad_norm": 1.2451889082778496, "learning_rate": 5.2209634210345884e-06, "loss": 0.27114933729171753, "step": 5470 }, { "epoch": 1.3518655794415617, "grad_norm": 1.2653008950729716, "learning_rate": 5.217374779210063e-06, "loss": 0.24346405267715454, "step": 5471 }, { "epoch": 1.352112676056338, "grad_norm": 1.3849455361328862, "learning_rate": 5.21378693579627e-06, "loss": 0.27570247650146484, "step": 5472 }, { "epoch": 1.3523597726711145, "grad_norm": 1.453879405416296, "learning_rate": 5.210199891392167e-06, "loss": 0.25557854771614075, "step": 5473 }, { "epoch": 1.3526068692858908, "grad_norm": 1.2879279790832268, "learning_rate": 5.206613646596572e-06, "loss": 0.2566272020339966, "step": 5474 }, { "epoch": 1.352853965900667, "grad_norm": 1.3340758517413087, "learning_rate": 5.203028202008178e-06, "loss": 0.2739184498786926, "step": 5475 }, { "epoch": 1.3531010625154436, "grad_norm": 1.3900377713557401, "learning_rate": 5.199443558225534e-06, "loss": 0.31345200538635254, "step": 5476 }, { "epoch": 1.3533481591302199, "grad_norm": 1.3042461936707397, "learning_rate": 5.195859715847055e-06, "loss": 0.23973116278648376, "step": 5477 }, { "epoch": 1.3535952557449962, "grad_norm": 1.3309411944042338, "learning_rate": 5.192276675471032e-06, "loss": 0.30090510845184326, "step": 5478 }, { "epoch": 1.3538423523597727, "grad_norm": 1.491096111797678, "learning_rate": 5.188694437695621e-06, "loss": 0.2676406502723694, "step": 5479 }, { "epoch": 1.354089448974549, "grad_norm": 1.313499892027791, "learning_rate": 5.185113003118832e-06, "loss": 0.30290427803993225, "step": 5480 }, { "epoch": 1.3543365455893255, "grad_norm": 1.4032727858347187, "learning_rate": 5.1815323723385535e-06, "loss": 0.27894464135169983, "step": 5481 }, { "epoch": 1.3545836422041018, "grad_norm": 1.3355076002562414, "learning_rate": 5.177952545952536e-06, "loss": 0.2835364043712616, "step": 5482 }, { "epoch": 1.3548307388188783, "grad_norm": 1.6505604895025976, "learning_rate": 5.174373524558398e-06, "loss": 0.32583242654800415, "step": 5483 }, { "epoch": 1.3550778354336546, "grad_norm": 1.4342412804561737, "learning_rate": 5.170795308753618e-06, "loss": 0.3068428635597229, "step": 5484 }, { "epoch": 1.3553249320484309, "grad_norm": 1.464180528969786, "learning_rate": 5.167217899135538e-06, "loss": 0.28183451294898987, "step": 5485 }, { "epoch": 1.3555720286632074, "grad_norm": 1.633294063523194, "learning_rate": 5.163641296301374e-06, "loss": 0.2995855212211609, "step": 5486 }, { "epoch": 1.3558191252779836, "grad_norm": 1.6091310576189317, "learning_rate": 5.160065500848205e-06, "loss": 0.31822001934051514, "step": 5487 }, { "epoch": 1.35606622189276, "grad_norm": 1.3796431806017186, "learning_rate": 5.156490513372976e-06, "loss": 0.28150320053100586, "step": 5488 }, { "epoch": 1.3563133185075364, "grad_norm": 1.3879355526515675, "learning_rate": 5.152916334472486e-06, "loss": 0.2699538767337799, "step": 5489 }, { "epoch": 1.356560415122313, "grad_norm": 1.4678693119850925, "learning_rate": 5.14934296474342e-06, "loss": 0.2629011273384094, "step": 5490 }, { "epoch": 1.3568075117370892, "grad_norm": 1.3759811174945549, "learning_rate": 5.145770404782304e-06, "loss": 0.25882837176322937, "step": 5491 }, { "epoch": 1.3570546083518655, "grad_norm": 1.3613419398638413, "learning_rate": 5.142198655185546e-06, "loss": 0.27880680561065674, "step": 5492 }, { "epoch": 1.357301704966642, "grad_norm": 1.2689830647992986, "learning_rate": 5.138627716549417e-06, "loss": 0.25271210074424744, "step": 5493 }, { "epoch": 1.3575488015814183, "grad_norm": 1.1929900228493349, "learning_rate": 5.135057589470042e-06, "loss": 0.2366165816783905, "step": 5494 }, { "epoch": 1.3577958981961946, "grad_norm": 1.3355696288342918, "learning_rate": 5.1314882745434194e-06, "loss": 0.2912319600582123, "step": 5495 }, { "epoch": 1.3580429948109711, "grad_norm": 1.4849855423381, "learning_rate": 5.127919772365413e-06, "loss": 0.28778570890426636, "step": 5496 }, { "epoch": 1.3582900914257474, "grad_norm": 1.2806066198645063, "learning_rate": 5.124352083531748e-06, "loss": 0.27020761370658875, "step": 5497 }, { "epoch": 1.358537188040524, "grad_norm": 1.4336127266009335, "learning_rate": 5.120785208638014e-06, "loss": 0.28275591135025024, "step": 5498 }, { "epoch": 1.3587842846553002, "grad_norm": 1.316912677977561, "learning_rate": 5.117219148279658e-06, "loss": 0.2928071618080139, "step": 5499 }, { "epoch": 1.3590313812700767, "grad_norm": 1.4543935106109822, "learning_rate": 5.113653903052003e-06, "loss": 0.31219297647476196, "step": 5500 }, { "epoch": 1.359278477884853, "grad_norm": 1.464300448779194, "learning_rate": 5.110089473550229e-06, "loss": 0.34141018986701965, "step": 5501 }, { "epoch": 1.3595255744996293, "grad_norm": 1.4728220530177185, "learning_rate": 5.106525860369387e-06, "loss": 0.2768690586090088, "step": 5502 }, { "epoch": 1.3597726711144058, "grad_norm": 1.5246081416594015, "learning_rate": 5.102963064104377e-06, "loss": 0.3419702649116516, "step": 5503 }, { "epoch": 1.360019767729182, "grad_norm": 1.3663773875725387, "learning_rate": 5.09940108534998e-06, "loss": 0.29158511757850647, "step": 5504 }, { "epoch": 1.3602668643439584, "grad_norm": 1.1900637762135666, "learning_rate": 5.095839924700824e-06, "loss": 0.24320699274539948, "step": 5505 }, { "epoch": 1.360513960958735, "grad_norm": 1.4342225895734435, "learning_rate": 5.092279582751412e-06, "loss": 0.26785808801651, "step": 5506 }, { "epoch": 1.3607610575735112, "grad_norm": 1.4284743799811594, "learning_rate": 5.088720060096113e-06, "loss": 0.2428790032863617, "step": 5507 }, { "epoch": 1.3610081541882877, "grad_norm": 1.4290986308937366, "learning_rate": 5.085161357329142e-06, "loss": 0.3638089895248413, "step": 5508 }, { "epoch": 1.361255250803064, "grad_norm": 1.5082393394631348, "learning_rate": 5.081603475044596e-06, "loss": 0.34457868337631226, "step": 5509 }, { "epoch": 1.3615023474178405, "grad_norm": 1.1955532261807205, "learning_rate": 5.078046413836424e-06, "loss": 0.27298837900161743, "step": 5510 }, { "epoch": 1.3617494440326168, "grad_norm": 1.4760789764380489, "learning_rate": 5.074490174298447e-06, "loss": 0.30031782388687134, "step": 5511 }, { "epoch": 1.361996540647393, "grad_norm": 1.5386008001673832, "learning_rate": 5.070934757024339e-06, "loss": 0.3485082685947418, "step": 5512 }, { "epoch": 1.3622436372621696, "grad_norm": 1.4689085783947908, "learning_rate": 5.067380162607638e-06, "loss": 0.2949199378490448, "step": 5513 }, { "epoch": 1.3624907338769459, "grad_norm": 1.6104425218307408, "learning_rate": 5.063826391641749e-06, "loss": 0.32778429985046387, "step": 5514 }, { "epoch": 1.3627378304917221, "grad_norm": 1.4028984658578374, "learning_rate": 5.0602734447199385e-06, "loss": 0.29844212532043457, "step": 5515 }, { "epoch": 1.3629849271064987, "grad_norm": 1.4136616114355953, "learning_rate": 5.0567213224353405e-06, "loss": 0.2671540379524231, "step": 5516 }, { "epoch": 1.363232023721275, "grad_norm": 1.4340440473734688, "learning_rate": 5.0531700253809355e-06, "loss": 0.3137114644050598, "step": 5517 }, { "epoch": 1.3634791203360515, "grad_norm": 1.397760122486354, "learning_rate": 5.049619554149587e-06, "loss": 0.2850627303123474, "step": 5518 }, { "epoch": 1.3637262169508277, "grad_norm": 1.315414128361916, "learning_rate": 5.046069909334e-06, "loss": 0.26300764083862305, "step": 5519 }, { "epoch": 1.3639733135656043, "grad_norm": 1.3577897320115615, "learning_rate": 5.042521091526758e-06, "loss": 0.2899051010608673, "step": 5520 }, { "epoch": 1.3642204101803805, "grad_norm": 1.4274861975876225, "learning_rate": 5.038973101320301e-06, "loss": 0.2796486020088196, "step": 5521 }, { "epoch": 1.3644675067951568, "grad_norm": 1.465023947976104, "learning_rate": 5.035425939306925e-06, "loss": 0.28520089387893677, "step": 5522 }, { "epoch": 1.3647146034099333, "grad_norm": 1.4581249141316486, "learning_rate": 5.031879606078794e-06, "loss": 0.3197394013404846, "step": 5523 }, { "epoch": 1.3649617000247096, "grad_norm": 1.2305869064607193, "learning_rate": 5.028334102227938e-06, "loss": 0.2173580825328827, "step": 5524 }, { "epoch": 1.365208796639486, "grad_norm": 1.383795253527279, "learning_rate": 5.0247894283462365e-06, "loss": 0.2548328638076782, "step": 5525 }, { "epoch": 1.3654558932542624, "grad_norm": 1.3205276477310695, "learning_rate": 5.021245585025441e-06, "loss": 0.22043368220329285, "step": 5526 }, { "epoch": 1.365702989869039, "grad_norm": 1.4458299633788458, "learning_rate": 5.017702572857155e-06, "loss": 0.26533812284469604, "step": 5527 }, { "epoch": 1.3659500864838152, "grad_norm": 1.4666671672275264, "learning_rate": 5.01416039243285e-06, "loss": 0.3150405287742615, "step": 5528 }, { "epoch": 1.3661971830985915, "grad_norm": 1.427716861136369, "learning_rate": 5.01061904434386e-06, "loss": 0.2837211787700653, "step": 5529 }, { "epoch": 1.366444279713368, "grad_norm": 1.3844084775235084, "learning_rate": 5.007078529181379e-06, "loss": 0.24394088983535767, "step": 5530 }, { "epoch": 1.3666913763281443, "grad_norm": 1.4814805103850703, "learning_rate": 5.003538847536455e-06, "loss": 0.3019683361053467, "step": 5531 }, { "epoch": 1.3669384729429206, "grad_norm": 1.5401149159691079, "learning_rate": 5.000000000000003e-06, "loss": 0.26341915130615234, "step": 5532 }, { "epoch": 1.367185569557697, "grad_norm": 1.8053755846898816, "learning_rate": 4.996461987162796e-06, "loss": 0.25376948714256287, "step": 5533 }, { "epoch": 1.3674326661724734, "grad_norm": 1.4087492275741547, "learning_rate": 4.9929248096154725e-06, "loss": 0.29710525274276733, "step": 5534 }, { "epoch": 1.36767976278725, "grad_norm": 1.5024273321313448, "learning_rate": 4.989388467948532e-06, "loss": 0.349750816822052, "step": 5535 }, { "epoch": 1.3679268594020262, "grad_norm": 1.3842140624103345, "learning_rate": 4.985852962752321e-06, "loss": 0.27066999673843384, "step": 5536 }, { "epoch": 1.3681739560168027, "grad_norm": 1.4816378507401824, "learning_rate": 4.982318294617062e-06, "loss": 0.2509426176548004, "step": 5537 }, { "epoch": 1.368421052631579, "grad_norm": 1.4425956858182405, "learning_rate": 4.978784464132837e-06, "loss": 0.2990427613258362, "step": 5538 }, { "epoch": 1.3686681492463553, "grad_norm": 1.4487975727836493, "learning_rate": 4.975251471889572e-06, "loss": 0.2857864797115326, "step": 5539 }, { "epoch": 1.3689152458611318, "grad_norm": 1.414536645811529, "learning_rate": 4.971719318477074e-06, "loss": 0.2644099295139313, "step": 5540 }, { "epoch": 1.369162342475908, "grad_norm": 1.4862759376085424, "learning_rate": 4.968188004484992e-06, "loss": 0.2757548689842224, "step": 5541 }, { "epoch": 1.3694094390906844, "grad_norm": 1.370217893462336, "learning_rate": 4.964657530502848e-06, "loss": 0.2505994141101837, "step": 5542 }, { "epoch": 1.3696565357054609, "grad_norm": 1.5329408280818733, "learning_rate": 4.9611278971200175e-06, "loss": 0.3291051983833313, "step": 5543 }, { "epoch": 1.3699036323202372, "grad_norm": 1.4860157311040914, "learning_rate": 4.95759910492574e-06, "loss": 0.3194906711578369, "step": 5544 }, { "epoch": 1.3701507289350137, "grad_norm": 1.2999250532263062, "learning_rate": 4.95407115450911e-06, "loss": 0.25156354904174805, "step": 5545 }, { "epoch": 1.37039782554979, "grad_norm": 1.3465190326368008, "learning_rate": 4.9505440464590785e-06, "loss": 0.29031187295913696, "step": 5546 }, { "epoch": 1.3706449221645665, "grad_norm": 1.3475686674983518, "learning_rate": 4.9470177813644624e-06, "loss": 0.282174676656723, "step": 5547 }, { "epoch": 1.3708920187793427, "grad_norm": 1.5065864015832324, "learning_rate": 4.943492359813939e-06, "loss": 0.2506793141365051, "step": 5548 }, { "epoch": 1.371139115394119, "grad_norm": 1.5874975383586236, "learning_rate": 4.939967782396042e-06, "loss": 0.2654000520706177, "step": 5549 }, { "epoch": 1.3713862120088955, "grad_norm": 1.5604740226450518, "learning_rate": 4.936444049699159e-06, "loss": 0.31017544865608215, "step": 5550 }, { "epoch": 1.3716333086236718, "grad_norm": 1.5698803515415065, "learning_rate": 4.932921162311542e-06, "loss": 0.2987102270126343, "step": 5551 }, { "epoch": 1.3718804052384481, "grad_norm": 1.4831617391728482, "learning_rate": 4.9293991208213085e-06, "loss": 0.31127309799194336, "step": 5552 }, { "epoch": 1.3721275018532246, "grad_norm": 1.4990142104340403, "learning_rate": 4.9258779258164195e-06, "loss": 0.31229424476623535, "step": 5553 }, { "epoch": 1.372374598468001, "grad_norm": 1.389769791610021, "learning_rate": 4.922357577884709e-06, "loss": 0.25448739528656006, "step": 5554 }, { "epoch": 1.3726216950827774, "grad_norm": 1.4892292396623728, "learning_rate": 4.918838077613856e-06, "loss": 0.34360384941101074, "step": 5555 }, { "epoch": 1.3728687916975537, "grad_norm": 1.3360076472110558, "learning_rate": 4.915319425591409e-06, "loss": 0.2584726810455322, "step": 5556 }, { "epoch": 1.3731158883123302, "grad_norm": 1.6366262226380126, "learning_rate": 4.911801622404773e-06, "loss": 0.3079580068588257, "step": 5557 }, { "epoch": 1.3733629849271065, "grad_norm": 1.5104054158460143, "learning_rate": 4.9082846686412115e-06, "loss": 0.2948318123817444, "step": 5558 }, { "epoch": 1.3736100815418828, "grad_norm": 1.4333337167718125, "learning_rate": 4.9047685648878395e-06, "loss": 0.2544296979904175, "step": 5559 }, { "epoch": 1.3738571781566593, "grad_norm": 1.4278501752310735, "learning_rate": 4.901253311731634e-06, "loss": 0.2862567901611328, "step": 5560 }, { "epoch": 1.3741042747714356, "grad_norm": 1.3590447024028958, "learning_rate": 4.897738909759432e-06, "loss": 0.2402319610118866, "step": 5561 }, { "epoch": 1.3743513713862119, "grad_norm": 1.3523344452532544, "learning_rate": 4.8942253595579294e-06, "loss": 0.28329288959503174, "step": 5562 }, { "epoch": 1.3745984680009884, "grad_norm": 1.448726947161025, "learning_rate": 4.89071266171368e-06, "loss": 0.3008284270763397, "step": 5563 }, { "epoch": 1.3748455646157647, "grad_norm": 1.341097906359036, "learning_rate": 4.8872008168130855e-06, "loss": 0.2550903856754303, "step": 5564 }, { "epoch": 1.3750926612305412, "grad_norm": 1.4335542151169258, "learning_rate": 4.883689825442418e-06, "loss": 0.3104734420776367, "step": 5565 }, { "epoch": 1.3753397578453175, "grad_norm": 1.3691483902165376, "learning_rate": 4.880179688187804e-06, "loss": 0.2713145315647125, "step": 5566 }, { "epoch": 1.375586854460094, "grad_norm": 1.3891297823087128, "learning_rate": 4.876670405635222e-06, "loss": 0.24762916564941406, "step": 5567 }, { "epoch": 1.3758339510748703, "grad_norm": 1.4840405307753668, "learning_rate": 4.873161978370508e-06, "loss": 0.30404770374298096, "step": 5568 }, { "epoch": 1.3760810476896466, "grad_norm": 1.4182713716372322, "learning_rate": 4.869654406979361e-06, "loss": 0.2862776815891266, "step": 5569 }, { "epoch": 1.376328144304423, "grad_norm": 1.6102549741198007, "learning_rate": 4.866147692047335e-06, "loss": 0.31110885739326477, "step": 5570 }, { "epoch": 1.3765752409191994, "grad_norm": 1.2701171296116467, "learning_rate": 4.862641834159843e-06, "loss": 0.24411442875862122, "step": 5571 }, { "epoch": 1.3768223375339756, "grad_norm": 1.4076317333042598, "learning_rate": 4.859136833902148e-06, "loss": 0.30501359701156616, "step": 5572 }, { "epoch": 1.3770694341487522, "grad_norm": 1.1994600228171712, "learning_rate": 4.855632691859379e-06, "loss": 0.2275085300207138, "step": 5573 }, { "epoch": 1.3773165307635287, "grad_norm": 1.497370154183904, "learning_rate": 4.852129408616511e-06, "loss": 0.320848286151886, "step": 5574 }, { "epoch": 1.377563627378305, "grad_norm": 1.407822769070962, "learning_rate": 4.8486269847583834e-06, "loss": 0.23108887672424316, "step": 5575 }, { "epoch": 1.3778107239930812, "grad_norm": 1.3824358147604416, "learning_rate": 4.845125420869696e-06, "loss": 0.2830336093902588, "step": 5576 }, { "epoch": 1.3780578206078578, "grad_norm": 1.3589230568310793, "learning_rate": 4.84162471753499e-06, "loss": 0.28985297679901123, "step": 5577 }, { "epoch": 1.378304917222634, "grad_norm": 1.2676414087462688, "learning_rate": 4.8381248753386785e-06, "loss": 0.2999952435493469, "step": 5578 }, { "epoch": 1.3785520138374103, "grad_norm": 1.304892103479985, "learning_rate": 4.8346258948650235e-06, "loss": 0.27113276720046997, "step": 5579 }, { "epoch": 1.3787991104521868, "grad_norm": 1.545068606082019, "learning_rate": 4.831127776698147e-06, "loss": 0.2980796694755554, "step": 5580 }, { "epoch": 1.3790462070669631, "grad_norm": 1.7834026849716464, "learning_rate": 4.827630521422021e-06, "loss": 0.3128986358642578, "step": 5581 }, { "epoch": 1.3792933036817396, "grad_norm": 1.2803509662996297, "learning_rate": 4.824134129620473e-06, "loss": 0.2364516705274582, "step": 5582 }, { "epoch": 1.379540400296516, "grad_norm": 1.3328867772723219, "learning_rate": 4.820638601877196e-06, "loss": 0.26742231845855713, "step": 5583 }, { "epoch": 1.3797874969112924, "grad_norm": 1.6291921625338746, "learning_rate": 4.8171439387757305e-06, "loss": 0.29250603914260864, "step": 5584 }, { "epoch": 1.3800345935260687, "grad_norm": 1.4880446950839574, "learning_rate": 4.813650140899479e-06, "loss": 0.31677132844924927, "step": 5585 }, { "epoch": 1.380281690140845, "grad_norm": 1.3810591416156455, "learning_rate": 4.810157208831688e-06, "loss": 0.24304574728012085, "step": 5586 }, { "epoch": 1.3805287867556215, "grad_norm": 1.2486269136494188, "learning_rate": 4.806665143155476e-06, "loss": 0.25259870290756226, "step": 5587 }, { "epoch": 1.3807758833703978, "grad_norm": 1.4977706583784187, "learning_rate": 4.803173944453798e-06, "loss": 0.26441264152526855, "step": 5588 }, { "epoch": 1.381022979985174, "grad_norm": 1.4641466359465996, "learning_rate": 4.79968361330948e-06, "loss": 0.30839109420776367, "step": 5589 }, { "epoch": 1.3812700765999506, "grad_norm": 1.5490910245766316, "learning_rate": 4.796194150305201e-06, "loss": 0.26386529207229614, "step": 5590 }, { "epoch": 1.381517173214727, "grad_norm": 1.4519182699275968, "learning_rate": 4.792705556023483e-06, "loss": 0.31081029772758484, "step": 5591 }, { "epoch": 1.3817642698295034, "grad_norm": 1.4210406350627538, "learning_rate": 4.789217831046716e-06, "loss": 0.30137956142425537, "step": 5592 }, { "epoch": 1.3820113664442797, "grad_norm": 1.3640194514442021, "learning_rate": 4.785730975957145e-06, "loss": 0.30015185475349426, "step": 5593 }, { "epoch": 1.3822584630590562, "grad_norm": 1.366863447596964, "learning_rate": 4.782244991336854e-06, "loss": 0.2701331377029419, "step": 5594 }, { "epoch": 1.3825055596738325, "grad_norm": 1.3869354610535662, "learning_rate": 4.778759877767802e-06, "loss": 0.277566522359848, "step": 5595 }, { "epoch": 1.3827526562886088, "grad_norm": 1.3811850838254744, "learning_rate": 4.775275635831788e-06, "loss": 0.27339422702789307, "step": 5596 }, { "epoch": 1.3829997529033853, "grad_norm": 1.614470105427969, "learning_rate": 4.771792266110472e-06, "loss": 0.33805525302886963, "step": 5597 }, { "epoch": 1.3832468495181616, "grad_norm": 1.2420624968362755, "learning_rate": 4.768309769185369e-06, "loss": 0.21788612008094788, "step": 5598 }, { "epoch": 1.3834939461329379, "grad_norm": 1.3525994671053316, "learning_rate": 4.764828145637848e-06, "loss": 0.235982283949852, "step": 5599 }, { "epoch": 1.3837410427477144, "grad_norm": 1.432336578272803, "learning_rate": 4.761347396049129e-06, "loss": 0.2968560457229614, "step": 5600 }, { "epoch": 1.3839881393624907, "grad_norm": 1.4589263218240276, "learning_rate": 4.757867521000282e-06, "loss": 0.30884677171707153, "step": 5601 }, { "epoch": 1.3842352359772672, "grad_norm": 1.428403637103086, "learning_rate": 4.754388521072244e-06, "loss": 0.2937111258506775, "step": 5602 }, { "epoch": 1.3844823325920435, "grad_norm": 1.3552577256698668, "learning_rate": 4.750910396845795e-06, "loss": 0.2661725878715515, "step": 5603 }, { "epoch": 1.38472942920682, "grad_norm": 1.5500027711275195, "learning_rate": 4.747433148901579e-06, "loss": 0.2890235185623169, "step": 5604 }, { "epoch": 1.3849765258215962, "grad_norm": 1.360591329755056, "learning_rate": 4.743956777820078e-06, "loss": 0.2616117596626282, "step": 5605 }, { "epoch": 1.3852236224363725, "grad_norm": 1.382361998699974, "learning_rate": 4.740481284181642e-06, "loss": 0.27646130323410034, "step": 5606 }, { "epoch": 1.385470719051149, "grad_norm": 1.4329859884641611, "learning_rate": 4.737006668566472e-06, "loss": 0.29284924268722534, "step": 5607 }, { "epoch": 1.3857178156659253, "grad_norm": 1.4654819429361645, "learning_rate": 4.7335329315546116e-06, "loss": 0.3511132001876831, "step": 5608 }, { "epoch": 1.3859649122807016, "grad_norm": 1.3204178781171172, "learning_rate": 4.730060073725976e-06, "loss": 0.28303062915802, "step": 5609 }, { "epoch": 1.3862120088954781, "grad_norm": 1.4290573730196992, "learning_rate": 4.726588095660314e-06, "loss": 0.2582564353942871, "step": 5610 }, { "epoch": 1.3864591055102546, "grad_norm": 1.4942989115947787, "learning_rate": 4.723116997937241e-06, "loss": 0.3391924500465393, "step": 5611 }, { "epoch": 1.386706202125031, "grad_norm": 1.3444187667288083, "learning_rate": 4.719646781136223e-06, "loss": 0.2847146987915039, "step": 5612 }, { "epoch": 1.3869532987398072, "grad_norm": 1.4779159459066313, "learning_rate": 4.716177445836581e-06, "loss": 0.2426198273897171, "step": 5613 }, { "epoch": 1.3872003953545837, "grad_norm": 1.2296783300072613, "learning_rate": 4.712708992617479e-06, "loss": 0.27778688073158264, "step": 5614 }, { "epoch": 1.38744749196936, "grad_norm": 1.4741994032088699, "learning_rate": 4.709241422057938e-06, "loss": 0.3068539500236511, "step": 5615 }, { "epoch": 1.3876945885841363, "grad_norm": 1.3375352392565585, "learning_rate": 4.705774734736839e-06, "loss": 0.2711596190929413, "step": 5616 }, { "epoch": 1.3879416851989128, "grad_norm": 1.5275466981274863, "learning_rate": 4.702308931232907e-06, "loss": 0.31513017416000366, "step": 5617 }, { "epoch": 1.388188781813689, "grad_norm": 1.4052433084517593, "learning_rate": 4.6988440121247305e-06, "loss": 0.2739836871623993, "step": 5618 }, { "epoch": 1.3884358784284656, "grad_norm": 1.4965128019023952, "learning_rate": 4.695379977990731e-06, "loss": 0.30636686086654663, "step": 5619 }, { "epoch": 1.388682975043242, "grad_norm": 1.6265018874709332, "learning_rate": 4.6919168294092e-06, "loss": 0.30188265442848206, "step": 5620 }, { "epoch": 1.3889300716580184, "grad_norm": 1.4461634587765115, "learning_rate": 4.688454566958278e-06, "loss": 0.3263731300830841, "step": 5621 }, { "epoch": 1.3891771682727947, "grad_norm": 1.2157861204080729, "learning_rate": 4.684993191215947e-06, "loss": 0.2927563190460205, "step": 5622 }, { "epoch": 1.389424264887571, "grad_norm": 1.5520595235812946, "learning_rate": 4.681532702760057e-06, "loss": 0.318422794342041, "step": 5623 }, { "epoch": 1.3896713615023475, "grad_norm": 1.3780439113958398, "learning_rate": 4.678073102168294e-06, "loss": 0.2871999740600586, "step": 5624 }, { "epoch": 1.3899184581171238, "grad_norm": 1.212511647165871, "learning_rate": 4.674614390018204e-06, "loss": 0.26066815853118896, "step": 5625 }, { "epoch": 1.3901655547319, "grad_norm": 1.3635640214242468, "learning_rate": 4.671156566887187e-06, "loss": 0.2959553003311157, "step": 5626 }, { "epoch": 1.3904126513466766, "grad_norm": 1.6255410735744025, "learning_rate": 4.6676996333524945e-06, "loss": 0.30495214462280273, "step": 5627 }, { "epoch": 1.3906597479614529, "grad_norm": 1.5122578646123603, "learning_rate": 4.664243589991223e-06, "loss": 0.32993173599243164, "step": 5628 }, { "epoch": 1.3909068445762294, "grad_norm": 1.3544271773109289, "learning_rate": 4.660788437380319e-06, "loss": 0.2778966426849365, "step": 5629 }, { "epoch": 1.3911539411910057, "grad_norm": 1.5047169461217784, "learning_rate": 4.6573341760965895e-06, "loss": 0.34633857011795044, "step": 5630 }, { "epoch": 1.3914010378057822, "grad_norm": 1.4325131522976324, "learning_rate": 4.653880806716689e-06, "loss": 0.2995157837867737, "step": 5631 }, { "epoch": 1.3916481344205585, "grad_norm": 1.469391137037054, "learning_rate": 4.650428329817125e-06, "loss": 0.36625558137893677, "step": 5632 }, { "epoch": 1.3918952310353347, "grad_norm": 1.6228659925962763, "learning_rate": 4.646976745974247e-06, "loss": 0.2698277235031128, "step": 5633 }, { "epoch": 1.3921423276501113, "grad_norm": 1.6387907218734041, "learning_rate": 4.643526055764263e-06, "loss": 0.24821436405181885, "step": 5634 }, { "epoch": 1.3923894242648875, "grad_norm": 1.537893921633406, "learning_rate": 4.640076259763239e-06, "loss": 0.27672678232192993, "step": 5635 }, { "epoch": 1.3926365208796638, "grad_norm": 1.2356078495549916, "learning_rate": 4.636627358547073e-06, "loss": 0.2735321521759033, "step": 5636 }, { "epoch": 1.3928836174944403, "grad_norm": 1.3800303438040884, "learning_rate": 4.6331793526915325e-06, "loss": 0.27399688959121704, "step": 5637 }, { "epoch": 1.3931307141092166, "grad_norm": 1.3491007211845871, "learning_rate": 4.629732242772218e-06, "loss": 0.30588018894195557, "step": 5638 }, { "epoch": 1.3933778107239931, "grad_norm": 1.3009833553853074, "learning_rate": 4.626286029364596e-06, "loss": 0.30402815341949463, "step": 5639 }, { "epoch": 1.3936249073387694, "grad_norm": 1.5758229047586982, "learning_rate": 4.622840713043974e-06, "loss": 0.25988250970840454, "step": 5640 }, { "epoch": 1.393872003953546, "grad_norm": 1.326317387663609, "learning_rate": 4.619396294385518e-06, "loss": 0.26588496565818787, "step": 5641 }, { "epoch": 1.3941191005683222, "grad_norm": 1.2426696787995455, "learning_rate": 4.615952773964235e-06, "loss": 0.27540481090545654, "step": 5642 }, { "epoch": 1.3943661971830985, "grad_norm": 1.3642962073059537, "learning_rate": 4.612510152354982e-06, "loss": 0.2773352563381195, "step": 5643 }, { "epoch": 1.394613293797875, "grad_norm": 1.4771395768335869, "learning_rate": 4.609068430132471e-06, "loss": 0.30581313371658325, "step": 5644 }, { "epoch": 1.3948603904126513, "grad_norm": 1.4152089982349538, "learning_rate": 4.605627607871268e-06, "loss": 0.28592535853385925, "step": 5645 }, { "epoch": 1.3951074870274276, "grad_norm": 1.2905745689624064, "learning_rate": 4.602187686145781e-06, "loss": 0.25712156295776367, "step": 5646 }, { "epoch": 1.395354583642204, "grad_norm": 1.3519682852237065, "learning_rate": 4.598748665530267e-06, "loss": 0.3011855483055115, "step": 5647 }, { "epoch": 1.3956016802569806, "grad_norm": 1.4317611305183744, "learning_rate": 4.595310546598838e-06, "loss": 0.32254934310913086, "step": 5648 }, { "epoch": 1.395848776871757, "grad_norm": 1.3585811673029473, "learning_rate": 4.591873329925455e-06, "loss": 0.2983465790748596, "step": 5649 }, { "epoch": 1.3960958734865332, "grad_norm": 1.4036126218920295, "learning_rate": 4.588437016083925e-06, "loss": 0.3128153681755066, "step": 5650 }, { "epoch": 1.3963429701013097, "grad_norm": 1.4908405697914386, "learning_rate": 4.5850016056479005e-06, "loss": 0.3680287301540375, "step": 5651 }, { "epoch": 1.396590066716086, "grad_norm": 1.478600452492642, "learning_rate": 4.581567099190894e-06, "loss": 0.4049033522605896, "step": 5652 }, { "epoch": 1.3968371633308623, "grad_norm": 1.2979125100612543, "learning_rate": 4.578133497286259e-06, "loss": 0.24591834843158722, "step": 5653 }, { "epoch": 1.3970842599456388, "grad_norm": 1.514476096585314, "learning_rate": 4.5747008005072024e-06, "loss": 0.31339919567108154, "step": 5654 }, { "epoch": 1.397331356560415, "grad_norm": 1.4633671792439626, "learning_rate": 4.571269009426781e-06, "loss": 0.2637629508972168, "step": 5655 }, { "epoch": 1.3975784531751916, "grad_norm": 2.2133964137097077, "learning_rate": 4.567838124617894e-06, "loss": 0.32134342193603516, "step": 5656 }, { "epoch": 1.3978255497899679, "grad_norm": 1.8254066865170264, "learning_rate": 4.56440814665329e-06, "loss": 0.29042303562164307, "step": 5657 }, { "epoch": 1.3980726464047444, "grad_norm": 1.5959719153789536, "learning_rate": 4.560979076105571e-06, "loss": 0.32806527614593506, "step": 5658 }, { "epoch": 1.3983197430195207, "grad_norm": 1.5236898545224207, "learning_rate": 4.557550913547189e-06, "loss": 0.3159918189048767, "step": 5659 }, { "epoch": 1.398566839634297, "grad_norm": 1.3828301273649612, "learning_rate": 4.554123659550435e-06, "loss": 0.26570624113082886, "step": 5660 }, { "epoch": 1.3988139362490735, "grad_norm": 1.6344863226203987, "learning_rate": 4.5506973146874576e-06, "loss": 0.3187710642814636, "step": 5661 }, { "epoch": 1.3990610328638498, "grad_norm": 1.2658832112312586, "learning_rate": 4.547271879530254e-06, "loss": 0.2727266252040863, "step": 5662 }, { "epoch": 1.399308129478626, "grad_norm": 1.4569330938500291, "learning_rate": 4.543847354650658e-06, "loss": 0.3087548017501831, "step": 5663 }, { "epoch": 1.3995552260934025, "grad_norm": 1.364901150950452, "learning_rate": 4.540423740620368e-06, "loss": 0.3147335946559906, "step": 5664 }, { "epoch": 1.3998023227081788, "grad_norm": 1.3038178794208968, "learning_rate": 4.537001038010912e-06, "loss": 0.2786884903907776, "step": 5665 }, { "epoch": 1.4000494193229553, "grad_norm": 1.5162057541451552, "learning_rate": 4.5335792473936805e-06, "loss": 0.3089520037174225, "step": 5666 }, { "epoch": 1.4002965159377316, "grad_norm": 1.4813326938087414, "learning_rate": 4.530158369339906e-06, "loss": 0.289673388004303, "step": 5667 }, { "epoch": 1.4005436125525081, "grad_norm": 1.3718566949465483, "learning_rate": 4.526738404420675e-06, "loss": 0.2535317540168762, "step": 5668 }, { "epoch": 1.4007907091672844, "grad_norm": 1.3635760546373397, "learning_rate": 4.5233193532069096e-06, "loss": 0.2505193054676056, "step": 5669 }, { "epoch": 1.4010378057820607, "grad_norm": 1.4128707371548266, "learning_rate": 4.519901216269384e-06, "loss": 0.2878134846687317, "step": 5670 }, { "epoch": 1.4012849023968372, "grad_norm": 1.3922909516290156, "learning_rate": 4.516483994178724e-06, "loss": 0.26626425981521606, "step": 5671 }, { "epoch": 1.4015319990116135, "grad_norm": 1.4434074253954114, "learning_rate": 4.5130676875054e-06, "loss": 0.3176928758621216, "step": 5672 }, { "epoch": 1.4017790956263898, "grad_norm": 1.466628113928041, "learning_rate": 4.5096522968197356e-06, "loss": 0.2914029359817505, "step": 5673 }, { "epoch": 1.4020261922411663, "grad_norm": 1.4248916346526361, "learning_rate": 4.506237822691886e-06, "loss": 0.2901673913002014, "step": 5674 }, { "epoch": 1.4022732888559426, "grad_norm": 1.3119410051207503, "learning_rate": 4.502824265691866e-06, "loss": 0.23718124628067017, "step": 5675 }, { "epoch": 1.402520385470719, "grad_norm": 1.3496954416522216, "learning_rate": 4.49941162638954e-06, "loss": 0.2845849394798279, "step": 5676 }, { "epoch": 1.4027674820854954, "grad_norm": 1.3571992035801836, "learning_rate": 4.495999905354606e-06, "loss": 0.2572188377380371, "step": 5677 }, { "epoch": 1.403014578700272, "grad_norm": 1.296638385305134, "learning_rate": 4.492589103156621e-06, "loss": 0.2733513116836548, "step": 5678 }, { "epoch": 1.4032616753150482, "grad_norm": 1.3332197352754462, "learning_rate": 4.489179220364981e-06, "loss": 0.273296058177948, "step": 5679 }, { "epoch": 1.4035087719298245, "grad_norm": 1.473756215666345, "learning_rate": 4.48577025754893e-06, "loss": 0.31466931104660034, "step": 5680 }, { "epoch": 1.403755868544601, "grad_norm": 1.4784858082353662, "learning_rate": 4.482362215277564e-06, "loss": 0.30545249581336975, "step": 5681 }, { "epoch": 1.4040029651593773, "grad_norm": 1.3739627733620698, "learning_rate": 4.478955094119823e-06, "loss": 0.24065518379211426, "step": 5682 }, { "epoch": 1.4042500617741536, "grad_norm": 1.442237749245965, "learning_rate": 4.475548894644486e-06, "loss": 0.24320706725120544, "step": 5683 }, { "epoch": 1.40449715838893, "grad_norm": 1.3186377247036742, "learning_rate": 4.47214361742018e-06, "loss": 0.2559276223182678, "step": 5684 }, { "epoch": 1.4047442550037066, "grad_norm": 1.5398035733004982, "learning_rate": 4.468739263015389e-06, "loss": 0.2900058925151825, "step": 5685 }, { "epoch": 1.4049913516184829, "grad_norm": 1.4474468496301203, "learning_rate": 4.465335831998432e-06, "loss": 0.2653312683105469, "step": 5686 }, { "epoch": 1.4052384482332592, "grad_norm": 1.1373090625779056, "learning_rate": 4.461933324937481e-06, "loss": 0.21884702146053314, "step": 5687 }, { "epoch": 1.4054855448480357, "grad_norm": 1.3925667249090687, "learning_rate": 4.458531742400544e-06, "loss": 0.3136178255081177, "step": 5688 }, { "epoch": 1.405732641462812, "grad_norm": 1.6071359145520987, "learning_rate": 4.455131084955484e-06, "loss": 0.29926323890686035, "step": 5689 }, { "epoch": 1.4059797380775882, "grad_norm": 1.3029205302940656, "learning_rate": 4.4517313531700095e-06, "loss": 0.2319496124982834, "step": 5690 }, { "epoch": 1.4062268346923648, "grad_norm": 2.9179334788921487, "learning_rate": 4.4483325476116635e-06, "loss": 0.2527484893798828, "step": 5691 }, { "epoch": 1.406473931307141, "grad_norm": 1.3544716271248145, "learning_rate": 4.44493466884785e-06, "loss": 0.2793767750263214, "step": 5692 }, { "epoch": 1.4067210279219173, "grad_norm": 1.5406583932097548, "learning_rate": 4.441537717445803e-06, "loss": 0.3054860830307007, "step": 5693 }, { "epoch": 1.4069681245366938, "grad_norm": 1.517539544448497, "learning_rate": 4.438141693972613e-06, "loss": 0.27051281929016113, "step": 5694 }, { "epoch": 1.4072152211514704, "grad_norm": 1.571415827036815, "learning_rate": 4.434746598995211e-06, "loss": 0.261457622051239, "step": 5695 }, { "epoch": 1.4074623177662466, "grad_norm": 1.4193775757558795, "learning_rate": 4.431352433080377e-06, "loss": 0.3235654830932617, "step": 5696 }, { "epoch": 1.407709414381023, "grad_norm": 1.444834125632444, "learning_rate": 4.427959196794731e-06, "loss": 0.31234437227249146, "step": 5697 }, { "epoch": 1.4079565109957994, "grad_norm": 1.3957363648678744, "learning_rate": 4.424566890704733e-06, "loss": 0.2595450282096863, "step": 5698 }, { "epoch": 1.4082036076105757, "grad_norm": 1.327261711182374, "learning_rate": 4.4211755153767e-06, "loss": 0.28026363253593445, "step": 5699 }, { "epoch": 1.408450704225352, "grad_norm": 1.4494359854688206, "learning_rate": 4.417785071376786e-06, "loss": 0.2650151550769806, "step": 5700 }, { "epoch": 1.4086978008401285, "grad_norm": 1.2745949696706327, "learning_rate": 4.414395559270996e-06, "loss": 0.2536328434944153, "step": 5701 }, { "epoch": 1.4089448974549048, "grad_norm": 1.3882041033401658, "learning_rate": 4.411006979625165e-06, "loss": 0.2655603587627411, "step": 5702 }, { "epoch": 1.4091919940696813, "grad_norm": 1.2841358973602024, "learning_rate": 4.4076193330049895e-06, "loss": 0.2662198543548584, "step": 5703 }, { "epoch": 1.4094390906844576, "grad_norm": 1.5520174384996759, "learning_rate": 4.404232619976003e-06, "loss": 0.3245921730995178, "step": 5704 }, { "epoch": 1.4096861872992341, "grad_norm": 1.6374081704227434, "learning_rate": 4.400846841103579e-06, "loss": 0.3026338219642639, "step": 5705 }, { "epoch": 1.4099332839140104, "grad_norm": 1.315772652393784, "learning_rate": 4.397461996952942e-06, "loss": 0.26027750968933105, "step": 5706 }, { "epoch": 1.4101803805287867, "grad_norm": 1.4832704921533155, "learning_rate": 4.394078088089153e-06, "loss": 0.3074774742126465, "step": 5707 }, { "epoch": 1.4104274771435632, "grad_norm": 1.4129880880038557, "learning_rate": 4.3906951150771235e-06, "loss": 0.2758801579475403, "step": 5708 }, { "epoch": 1.4106745737583395, "grad_norm": 1.4553509705885237, "learning_rate": 4.387313078481607e-06, "loss": 0.21250006556510925, "step": 5709 }, { "epoch": 1.4109216703731158, "grad_norm": 1.4890826277136258, "learning_rate": 4.383931978867205e-06, "loss": 0.3275023102760315, "step": 5710 }, { "epoch": 1.4111687669878923, "grad_norm": 1.3632609696020284, "learning_rate": 4.380551816798353e-06, "loss": 0.2627863585948944, "step": 5711 }, { "epoch": 1.4114158636026686, "grad_norm": 1.4056959286025963, "learning_rate": 4.377172592839331e-06, "loss": 0.24680721759796143, "step": 5712 }, { "epoch": 1.411662960217445, "grad_norm": 1.4150936412957473, "learning_rate": 4.373794307554269e-06, "loss": 0.3308267593383789, "step": 5713 }, { "epoch": 1.4119100568322214, "grad_norm": 1.4729652568921476, "learning_rate": 4.370416961507138e-06, "loss": 0.3414689004421234, "step": 5714 }, { "epoch": 1.4121571534469979, "grad_norm": 1.4652616702079844, "learning_rate": 4.367040555261757e-06, "loss": 0.2997836470603943, "step": 5715 }, { "epoch": 1.4124042500617742, "grad_norm": 1.843158405485513, "learning_rate": 4.363665089381773e-06, "loss": 0.2614784836769104, "step": 5716 }, { "epoch": 1.4126513466765505, "grad_norm": 1.4769227515035555, "learning_rate": 4.360290564430691e-06, "loss": 0.3152223825454712, "step": 5717 }, { "epoch": 1.412898443291327, "grad_norm": 1.5379996806367424, "learning_rate": 4.356916980971857e-06, "loss": 0.27817070484161377, "step": 5718 }, { "epoch": 1.4131455399061033, "grad_norm": 1.3191970395952541, "learning_rate": 4.353544339568449e-06, "loss": 0.29398488998413086, "step": 5719 }, { "epoch": 1.4133926365208795, "grad_norm": 1.2666291212210936, "learning_rate": 4.350172640783502e-06, "loss": 0.27016958594322205, "step": 5720 }, { "epoch": 1.413639733135656, "grad_norm": 1.3756171621200826, "learning_rate": 4.346801885179881e-06, "loss": 0.26616087555885315, "step": 5721 }, { "epoch": 1.4138868297504323, "grad_norm": 1.4307491103016738, "learning_rate": 4.343432073320302e-06, "loss": 0.3623931407928467, "step": 5722 }, { "epoch": 1.4141339263652088, "grad_norm": 1.4598247516376937, "learning_rate": 4.340063205767321e-06, "loss": 0.28635522723197937, "step": 5723 }, { "epoch": 1.4143810229799851, "grad_norm": 1.4929627682584043, "learning_rate": 4.33669528308334e-06, "loss": 0.3209306299686432, "step": 5724 }, { "epoch": 1.4146281195947616, "grad_norm": 1.6314170091670448, "learning_rate": 4.3333283058305964e-06, "loss": 0.27321749925613403, "step": 5725 }, { "epoch": 1.414875216209538, "grad_norm": 1.292752463289266, "learning_rate": 4.329962274571168e-06, "loss": 0.27215081453323364, "step": 5726 }, { "epoch": 1.4151223128243142, "grad_norm": 1.465022779174766, "learning_rate": 4.326597189866985e-06, "loss": 0.2893774211406708, "step": 5727 }, { "epoch": 1.4153694094390907, "grad_norm": 1.4403680047865532, "learning_rate": 4.323233052279812e-06, "loss": 0.30212074518203735, "step": 5728 }, { "epoch": 1.415616506053867, "grad_norm": 1.4357763798532106, "learning_rate": 4.319869862371262e-06, "loss": 0.3047332763671875, "step": 5729 }, { "epoch": 1.4158636026686433, "grad_norm": 1.391866735580844, "learning_rate": 4.316507620702779e-06, "loss": 0.25011682510375977, "step": 5730 }, { "epoch": 1.4161106992834198, "grad_norm": 1.3380598422033216, "learning_rate": 4.3131463278356576e-06, "loss": 0.2627030909061432, "step": 5731 }, { "epoch": 1.4163577958981963, "grad_norm": 1.272961207794537, "learning_rate": 4.309785984331037e-06, "loss": 0.23090097308158875, "step": 5732 }, { "epoch": 1.4166048925129726, "grad_norm": 1.428094880742116, "learning_rate": 4.3064265907498834e-06, "loss": 0.2802146077156067, "step": 5733 }, { "epoch": 1.416851989127749, "grad_norm": 1.4264982626753537, "learning_rate": 4.303068147653021e-06, "loss": 0.34011322259902954, "step": 5734 }, { "epoch": 1.4170990857425254, "grad_norm": 1.4549166610840152, "learning_rate": 4.299710655601101e-06, "loss": 0.27138620615005493, "step": 5735 }, { "epoch": 1.4173461823573017, "grad_norm": 1.4197164687434827, "learning_rate": 4.2963541151546265e-06, "loss": 0.3156616687774658, "step": 5736 }, { "epoch": 1.417593278972078, "grad_norm": 1.3925225170771913, "learning_rate": 4.2929985268739374e-06, "loss": 0.26977241039276123, "step": 5737 }, { "epoch": 1.4178403755868545, "grad_norm": 1.3586124924285794, "learning_rate": 4.289643891319222e-06, "loss": 0.2708892822265625, "step": 5738 }, { "epoch": 1.4180874722016308, "grad_norm": 1.4095554310154954, "learning_rate": 4.286290209050488e-06, "loss": 0.2723577618598938, "step": 5739 }, { "epoch": 1.4183345688164073, "grad_norm": 1.2755187333517803, "learning_rate": 4.282937480627607e-06, "loss": 0.2415124475955963, "step": 5740 }, { "epoch": 1.4185816654311836, "grad_norm": 1.450610487515135, "learning_rate": 4.279585706610282e-06, "loss": 0.2675311267375946, "step": 5741 }, { "epoch": 1.41882876204596, "grad_norm": 1.5920908581007662, "learning_rate": 4.276234887558058e-06, "loss": 0.22922523319721222, "step": 5742 }, { "epoch": 1.4190758586607364, "grad_norm": 1.493786542875879, "learning_rate": 4.272885024030325e-06, "loss": 0.3029124140739441, "step": 5743 }, { "epoch": 1.4193229552755127, "grad_norm": 1.658224689801782, "learning_rate": 4.269536116586298e-06, "loss": 0.31736379861831665, "step": 5744 }, { "epoch": 1.4195700518902892, "grad_norm": 1.2263885641816012, "learning_rate": 4.266188165785055e-06, "loss": 0.26801663637161255, "step": 5745 }, { "epoch": 1.4198171485050655, "grad_norm": 1.3247633054956762, "learning_rate": 4.262841172185491e-06, "loss": 0.25471240282058716, "step": 5746 }, { "epoch": 1.4200642451198418, "grad_norm": 1.4295471420995247, "learning_rate": 4.2594951363463625e-06, "loss": 0.2789599299430847, "step": 5747 }, { "epoch": 1.4203113417346183, "grad_norm": 1.3204683495846745, "learning_rate": 4.256150058826248e-06, "loss": 0.27394118905067444, "step": 5748 }, { "epoch": 1.4205584383493945, "grad_norm": 1.692152256282663, "learning_rate": 4.252805940183578e-06, "loss": 0.34184134006500244, "step": 5749 }, { "epoch": 1.420805534964171, "grad_norm": 1.2597059816500982, "learning_rate": 4.2494627809766195e-06, "loss": 0.2287602722644806, "step": 5750 }, { "epoch": 1.4210526315789473, "grad_norm": 1.1619130871896348, "learning_rate": 4.246120581763482e-06, "loss": 0.251348614692688, "step": 5751 }, { "epoch": 1.4212997281937239, "grad_norm": 1.4170647349293997, "learning_rate": 4.242779343102108e-06, "loss": 0.27727681398391724, "step": 5752 }, { "epoch": 1.4215468248085001, "grad_norm": 1.3015645419134474, "learning_rate": 4.239439065550282e-06, "loss": 0.2839982509613037, "step": 5753 }, { "epoch": 1.4217939214232764, "grad_norm": 1.2512572540910583, "learning_rate": 4.236099749665629e-06, "loss": 0.23724713921546936, "step": 5754 }, { "epoch": 1.422041018038053, "grad_norm": 1.2400571446997055, "learning_rate": 4.232761396005617e-06, "loss": 0.2353123128414154, "step": 5755 }, { "epoch": 1.4222881146528292, "grad_norm": 1.5113097760373233, "learning_rate": 4.229424005127552e-06, "loss": 0.26370489597320557, "step": 5756 }, { "epoch": 1.4225352112676055, "grad_norm": 1.4076427884695302, "learning_rate": 4.226087577588574e-06, "loss": 0.30376213788986206, "step": 5757 }, { "epoch": 1.422782307882382, "grad_norm": 1.31425984774864, "learning_rate": 4.222752113945664e-06, "loss": 0.2797717750072479, "step": 5758 }, { "epoch": 1.4230294044971583, "grad_norm": 1.4811818000498467, "learning_rate": 4.219417614755651e-06, "loss": 0.31186598539352417, "step": 5759 }, { "epoch": 1.4232765011119348, "grad_norm": 1.413138630324643, "learning_rate": 4.216084080575187e-06, "loss": 0.2915351986885071, "step": 5760 }, { "epoch": 1.423523597726711, "grad_norm": 1.264908082275154, "learning_rate": 4.21275151196078e-06, "loss": 0.22808599472045898, "step": 5761 }, { "epoch": 1.4237706943414876, "grad_norm": 1.466976236834535, "learning_rate": 4.209419909468761e-06, "loss": 0.30987119674682617, "step": 5762 }, { "epoch": 1.424017790956264, "grad_norm": 1.3287093807416608, "learning_rate": 4.206089273655309e-06, "loss": 0.2417653203010559, "step": 5763 }, { "epoch": 1.4242648875710402, "grad_norm": 1.5307444466588749, "learning_rate": 4.202759605076443e-06, "loss": 0.2838457226753235, "step": 5764 }, { "epoch": 1.4245119841858167, "grad_norm": 1.4933008609969385, "learning_rate": 4.19943090428802e-06, "loss": 0.30919015407562256, "step": 5765 }, { "epoch": 1.424759080800593, "grad_norm": 1.2795824818240542, "learning_rate": 4.196103171845728e-06, "loss": 0.2204051911830902, "step": 5766 }, { "epoch": 1.4250061774153693, "grad_norm": 1.3709423270432766, "learning_rate": 4.192776408305095e-06, "loss": 0.3140795826911926, "step": 5767 }, { "epoch": 1.4252532740301458, "grad_norm": 1.2466128330969948, "learning_rate": 4.189450614221493e-06, "loss": 0.252294659614563, "step": 5768 }, { "epoch": 1.4255003706449223, "grad_norm": 1.3250591618770597, "learning_rate": 4.1861257901501326e-06, "loss": 0.24213501811027527, "step": 5769 }, { "epoch": 1.4257474672596986, "grad_norm": 1.3956239917342643, "learning_rate": 4.18280193664606e-06, "loss": 0.2770158052444458, "step": 5770 }, { "epoch": 1.4259945638744749, "grad_norm": 1.3092053203552174, "learning_rate": 4.179479054264154e-06, "loss": 0.23280349373817444, "step": 5771 }, { "epoch": 1.4262416604892514, "grad_norm": 1.3514573293764456, "learning_rate": 4.1761571435591375e-06, "loss": 0.2820494472980499, "step": 5772 }, { "epoch": 1.4264887571040277, "grad_norm": 1.4734879685771203, "learning_rate": 4.172836205085574e-06, "loss": 0.32101112604141235, "step": 5773 }, { "epoch": 1.426735853718804, "grad_norm": 1.336468343947969, "learning_rate": 4.169516239397855e-06, "loss": 0.28528696298599243, "step": 5774 }, { "epoch": 1.4269829503335805, "grad_norm": 1.3859043224628866, "learning_rate": 4.16619724705022e-06, "loss": 0.24994602799415588, "step": 5775 }, { "epoch": 1.4272300469483568, "grad_norm": 1.3632618112586117, "learning_rate": 4.162879228596735e-06, "loss": 0.225361168384552, "step": 5776 }, { "epoch": 1.4274771435631333, "grad_norm": 1.4661166463455708, "learning_rate": 4.159562184591313e-06, "loss": 0.33846884965896606, "step": 5777 }, { "epoch": 1.4277242401779096, "grad_norm": 1.296165264218147, "learning_rate": 4.1562461155877e-06, "loss": 0.2651720643043518, "step": 5778 }, { "epoch": 1.427971336792686, "grad_norm": 1.312513329834539, "learning_rate": 4.152931022139483e-06, "loss": 0.2276879847049713, "step": 5779 }, { "epoch": 1.4282184334074624, "grad_norm": 1.551929546311431, "learning_rate": 4.149616904800083e-06, "loss": 0.35321056842803955, "step": 5780 }, { "epoch": 1.4284655300222386, "grad_norm": 1.3192102914815786, "learning_rate": 4.146303764122753e-06, "loss": 0.2422892153263092, "step": 5781 }, { "epoch": 1.4287126266370151, "grad_norm": 1.2775513928282107, "learning_rate": 4.142991600660591e-06, "loss": 0.27345073223114014, "step": 5782 }, { "epoch": 1.4289597232517914, "grad_norm": 1.4670776647091381, "learning_rate": 4.13968041496653e-06, "loss": 0.2910134196281433, "step": 5783 }, { "epoch": 1.4292068198665677, "grad_norm": 1.574399837607013, "learning_rate": 4.13637020759334e-06, "loss": 0.3120974004268646, "step": 5784 }, { "epoch": 1.4294539164813442, "grad_norm": 1.3364010873506187, "learning_rate": 4.133060979093623e-06, "loss": 0.258769690990448, "step": 5785 }, { "epoch": 1.4297010130961205, "grad_norm": 1.4164093733053877, "learning_rate": 4.1297527300198215e-06, "loss": 0.2953139841556549, "step": 5786 }, { "epoch": 1.429948109710897, "grad_norm": 1.463950610805358, "learning_rate": 4.12644546092422e-06, "loss": 0.2646041810512543, "step": 5787 }, { "epoch": 1.4301952063256733, "grad_norm": 1.7056181449116672, "learning_rate": 4.123139172358926e-06, "loss": 0.30165570974349976, "step": 5788 }, { "epoch": 1.4304423029404498, "grad_norm": 1.4128969778, "learning_rate": 4.119833864875896e-06, "loss": 0.32132086157798767, "step": 5789 }, { "epoch": 1.4306893995552261, "grad_norm": 1.222802466523605, "learning_rate": 4.116529539026914e-06, "loss": 0.2476349025964737, "step": 5790 }, { "epoch": 1.4309364961700024, "grad_norm": 1.3499064214658527, "learning_rate": 4.113226195363603e-06, "loss": 0.2658666670322418, "step": 5791 }, { "epoch": 1.431183592784779, "grad_norm": 1.2396674614130312, "learning_rate": 4.109923834437425e-06, "loss": 0.22252479195594788, "step": 5792 }, { "epoch": 1.4314306893995552, "grad_norm": 1.317946078820189, "learning_rate": 4.1066224567996795e-06, "loss": 0.26297998428344727, "step": 5793 }, { "epoch": 1.4316777860143315, "grad_norm": 1.423164820709172, "learning_rate": 4.103322063001494e-06, "loss": 0.2500692903995514, "step": 5794 }, { "epoch": 1.431924882629108, "grad_norm": 1.3721787191791925, "learning_rate": 4.1000226535938315e-06, "loss": 0.2912214398384094, "step": 5795 }, { "epoch": 1.4321719792438843, "grad_norm": 1.4679903460797825, "learning_rate": 4.0967242291275e-06, "loss": 0.2619250416755676, "step": 5796 }, { "epoch": 1.4324190758586608, "grad_norm": 1.4863938285018192, "learning_rate": 4.093426790153136e-06, "loss": 0.272019624710083, "step": 5797 }, { "epoch": 1.432666172473437, "grad_norm": 1.3132485746901075, "learning_rate": 4.090130337221219e-06, "loss": 0.2516288757324219, "step": 5798 }, { "epoch": 1.4329132690882136, "grad_norm": 1.5266081900310358, "learning_rate": 4.086834870882049e-06, "loss": 0.2850741446018219, "step": 5799 }, { "epoch": 1.4331603657029899, "grad_norm": 1.4053313505935399, "learning_rate": 4.083540391685778e-06, "loss": 0.2906177043914795, "step": 5800 }, { "epoch": 1.4334074623177662, "grad_norm": 1.8139144677806, "learning_rate": 4.080246900182384e-06, "loss": 0.33509013056755066, "step": 5801 }, { "epoch": 1.4336545589325427, "grad_norm": 1.6757713042747744, "learning_rate": 4.07695439692168e-06, "loss": 0.39450234174728394, "step": 5802 }, { "epoch": 1.433901655547319, "grad_norm": 1.3165579941503074, "learning_rate": 4.073662882453319e-06, "loss": 0.21810272336006165, "step": 5803 }, { "epoch": 1.4341487521620953, "grad_norm": 1.5312572782777305, "learning_rate": 4.0703723573267815e-06, "loss": 0.27837949991226196, "step": 5804 }, { "epoch": 1.4343958487768718, "grad_norm": 1.3777679762553665, "learning_rate": 4.067082822091391e-06, "loss": 0.25325024127960205, "step": 5805 }, { "epoch": 1.4346429453916483, "grad_norm": 1.4483064139235882, "learning_rate": 4.063794277296299e-06, "loss": 0.27663323283195496, "step": 5806 }, { "epoch": 1.4348900420064246, "grad_norm": 1.2750863444862874, "learning_rate": 4.060506723490502e-06, "loss": 0.26832571625709534, "step": 5807 }, { "epoch": 1.4351371386212008, "grad_norm": 1.325562901921405, "learning_rate": 4.057220161222818e-06, "loss": 0.2790565490722656, "step": 5808 }, { "epoch": 1.4353842352359774, "grad_norm": 1.5379452550406039, "learning_rate": 4.053934591041901e-06, "loss": 0.30312150716781616, "step": 5809 }, { "epoch": 1.4356313318507536, "grad_norm": 1.4074000459375922, "learning_rate": 4.05065001349625e-06, "loss": 0.2527567446231842, "step": 5810 }, { "epoch": 1.43587842846553, "grad_norm": 1.5658134540959536, "learning_rate": 4.04736642913419e-06, "loss": 0.3049619197845459, "step": 5811 }, { "epoch": 1.4361255250803064, "grad_norm": 1.554298594853757, "learning_rate": 4.044083838503884e-06, "loss": 0.2793841063976288, "step": 5812 }, { "epoch": 1.4363726216950827, "grad_norm": 1.5838886231948335, "learning_rate": 4.040802242153323e-06, "loss": 0.31440770626068115, "step": 5813 }, { "epoch": 1.436619718309859, "grad_norm": 1.4319766014372581, "learning_rate": 4.037521640630343e-06, "loss": 0.28678959608078003, "step": 5814 }, { "epoch": 1.4368668149246355, "grad_norm": 1.349120924844113, "learning_rate": 4.034242034482598e-06, "loss": 0.2557834982872009, "step": 5815 }, { "epoch": 1.437113911539412, "grad_norm": 1.3096629223759502, "learning_rate": 4.03096342425759e-06, "loss": 0.2174355387687683, "step": 5816 }, { "epoch": 1.4373610081541883, "grad_norm": 1.3840563692351124, "learning_rate": 4.027685810502653e-06, "loss": 0.23059137165546417, "step": 5817 }, { "epoch": 1.4376081047689646, "grad_norm": 1.3115110592610832, "learning_rate": 4.024409193764944e-06, "loss": 0.2185364067554474, "step": 5818 }, { "epoch": 1.4378552013837411, "grad_norm": 1.4052981806368194, "learning_rate": 4.021133574591465e-06, "loss": 0.28695914149284363, "step": 5819 }, { "epoch": 1.4381022979985174, "grad_norm": 1.3946191348926762, "learning_rate": 4.017858953529047e-06, "loss": 0.2653994858264923, "step": 5820 }, { "epoch": 1.4383493946132937, "grad_norm": 1.4097412046816917, "learning_rate": 4.014585331124358e-06, "loss": 0.2202613353729248, "step": 5821 }, { "epoch": 1.4385964912280702, "grad_norm": 1.3339107153712721, "learning_rate": 4.011312707923892e-06, "loss": 0.24617847800254822, "step": 5822 }, { "epoch": 1.4388435878428465, "grad_norm": 1.4660753120970913, "learning_rate": 4.008041084473979e-06, "loss": 0.3258855938911438, "step": 5823 }, { "epoch": 1.439090684457623, "grad_norm": 1.4518380958214934, "learning_rate": 4.004770461320785e-06, "loss": 0.3097621500492096, "step": 5824 }, { "epoch": 1.4393377810723993, "grad_norm": 1.3871175022958682, "learning_rate": 4.0015008390103074e-06, "loss": 0.22349485754966736, "step": 5825 }, { "epoch": 1.4395848776871758, "grad_norm": 1.2873189523318664, "learning_rate": 3.998232218088379e-06, "loss": 0.2347477376461029, "step": 5826 }, { "epoch": 1.439831974301952, "grad_norm": 1.6063458413253817, "learning_rate": 3.994964599100659e-06, "loss": 0.28933006525039673, "step": 5827 }, { "epoch": 1.4400790709167284, "grad_norm": 1.3195713672513565, "learning_rate": 3.991697982592647e-06, "loss": 0.2583603858947754, "step": 5828 }, { "epoch": 1.4403261675315049, "grad_norm": 1.3588061479480598, "learning_rate": 3.988432369109667e-06, "loss": 0.2903006672859192, "step": 5829 }, { "epoch": 1.4405732641462812, "grad_norm": 1.4640350400556392, "learning_rate": 3.985167759196882e-06, "loss": 0.3160715103149414, "step": 5830 }, { "epoch": 1.4408203607610575, "grad_norm": 1.2934570262220872, "learning_rate": 3.981904153399289e-06, "loss": 0.275003045797348, "step": 5831 }, { "epoch": 1.441067457375834, "grad_norm": 1.425784451214977, "learning_rate": 3.978641552261707e-06, "loss": 0.258331298828125, "step": 5832 }, { "epoch": 1.4413145539906103, "grad_norm": 1.3855052018948535, "learning_rate": 3.9753799563287985e-06, "loss": 0.2755250334739685, "step": 5833 }, { "epoch": 1.4415616506053868, "grad_norm": 1.2993206605339869, "learning_rate": 3.972119366145053e-06, "loss": 0.27234286069869995, "step": 5834 }, { "epoch": 1.441808747220163, "grad_norm": 1.5071233464455063, "learning_rate": 3.968859782254799e-06, "loss": 0.24241715669631958, "step": 5835 }, { "epoch": 1.4420558438349396, "grad_norm": 1.5172119473640457, "learning_rate": 3.96560120520218e-06, "loss": 0.3312796652317047, "step": 5836 }, { "epoch": 1.4423029404497159, "grad_norm": 1.3719022929859754, "learning_rate": 3.962343635531185e-06, "loss": 0.24224835634231567, "step": 5837 }, { "epoch": 1.4425500370644921, "grad_norm": 1.4197829494691172, "learning_rate": 3.959087073785636e-06, "loss": 0.32169926166534424, "step": 5838 }, { "epoch": 1.4427971336792687, "grad_norm": 1.3349735019122781, "learning_rate": 3.955831520509185e-06, "loss": 0.25858402252197266, "step": 5839 }, { "epoch": 1.443044230294045, "grad_norm": 1.4066893369870217, "learning_rate": 3.952576976245307e-06, "loss": 0.28816547989845276, "step": 5840 }, { "epoch": 1.4432913269088212, "grad_norm": 1.3984990373997643, "learning_rate": 3.949323441537317e-06, "loss": 0.2787526845932007, "step": 5841 }, { "epoch": 1.4435384235235977, "grad_norm": 1.4346205399801357, "learning_rate": 3.946070916928365e-06, "loss": 0.27234506607055664, "step": 5842 }, { "epoch": 1.443785520138374, "grad_norm": 1.5243516633310652, "learning_rate": 3.94281940296142e-06, "loss": 0.2720213532447815, "step": 5843 }, { "epoch": 1.4440326167531505, "grad_norm": 1.5231737953235311, "learning_rate": 3.939568900179294e-06, "loss": 0.3353867530822754, "step": 5844 }, { "epoch": 1.4442797133679268, "grad_norm": 1.3782400450926067, "learning_rate": 3.93631940912462e-06, "loss": 0.2637089490890503, "step": 5845 }, { "epoch": 1.4445268099827033, "grad_norm": 1.5616721329933057, "learning_rate": 3.933070930339873e-06, "loss": 0.26234954595565796, "step": 5846 }, { "epoch": 1.4447739065974796, "grad_norm": 1.6415254161496347, "learning_rate": 3.929823464367349e-06, "loss": 0.250333309173584, "step": 5847 }, { "epoch": 1.445021003212256, "grad_norm": 1.4606916803486736, "learning_rate": 3.9265770117491875e-06, "loss": 0.24614891409873962, "step": 5848 }, { "epoch": 1.4452680998270324, "grad_norm": 1.4761542029166566, "learning_rate": 3.923331573027345e-06, "loss": 0.26064354181289673, "step": 5849 }, { "epoch": 1.4455151964418087, "grad_norm": 1.5898930735401071, "learning_rate": 3.9200871487436106e-06, "loss": 0.35895586013793945, "step": 5850 }, { "epoch": 1.445762293056585, "grad_norm": 1.34414014468407, "learning_rate": 3.916843739439614e-06, "loss": 0.2681278884410858, "step": 5851 }, { "epoch": 1.4460093896713615, "grad_norm": 1.2402285097328807, "learning_rate": 3.9136013456568065e-06, "loss": 0.24638453125953674, "step": 5852 }, { "epoch": 1.446256486286138, "grad_norm": 1.3866225939948933, "learning_rate": 3.910359967936478e-06, "loss": 0.23552566766738892, "step": 5853 }, { "epoch": 1.4465035829009143, "grad_norm": 1.6463623859806134, "learning_rate": 3.907119606819736e-06, "loss": 0.30972063541412354, "step": 5854 }, { "epoch": 1.4467506795156906, "grad_norm": 1.4105702062705023, "learning_rate": 3.903880262847529e-06, "loss": 0.2934907376766205, "step": 5855 }, { "epoch": 1.446997776130467, "grad_norm": 1.3439192209083215, "learning_rate": 3.900641936560638e-06, "loss": 0.2758045196533203, "step": 5856 }, { "epoch": 1.4472448727452434, "grad_norm": 1.4006746993516104, "learning_rate": 3.897404628499659e-06, "loss": 0.3074626624584198, "step": 5857 }, { "epoch": 1.4474919693600197, "grad_norm": 1.4202900693560294, "learning_rate": 3.894168339205037e-06, "loss": 0.27833834290504456, "step": 5858 }, { "epoch": 1.4477390659747962, "grad_norm": 1.4164509771599743, "learning_rate": 3.89093306921703e-06, "loss": 0.2690703868865967, "step": 5859 }, { "epoch": 1.4479861625895725, "grad_norm": 1.3582303332950998, "learning_rate": 3.887698819075737e-06, "loss": 0.2752254605293274, "step": 5860 }, { "epoch": 1.448233259204349, "grad_norm": 1.4582695519063051, "learning_rate": 3.8844655893210825e-06, "loss": 0.27971503138542175, "step": 5861 }, { "epoch": 1.4484803558191253, "grad_norm": 1.3246859574627885, "learning_rate": 3.8812333804928255e-06, "loss": 0.23504409193992615, "step": 5862 }, { "epoch": 1.4487274524339018, "grad_norm": 1.455137295784467, "learning_rate": 3.878002193130547e-06, "loss": 0.2830316424369812, "step": 5863 }, { "epoch": 1.448974549048678, "grad_norm": 1.4914691392907988, "learning_rate": 3.874772027773657e-06, "loss": 0.31346243619918823, "step": 5864 }, { "epoch": 1.4492216456634543, "grad_norm": 1.4074294076173055, "learning_rate": 3.871542884961402e-06, "loss": 0.3059028387069702, "step": 5865 }, { "epoch": 1.4494687422782309, "grad_norm": 1.3439181660287929, "learning_rate": 3.868314765232857e-06, "loss": 0.2712469696998596, "step": 5866 }, { "epoch": 1.4497158388930071, "grad_norm": 1.5427681163356912, "learning_rate": 3.865087669126923e-06, "loss": 0.28411945700645447, "step": 5867 }, { "epoch": 1.4499629355077834, "grad_norm": 1.404008231058656, "learning_rate": 3.861861597182328e-06, "loss": 0.24039751291275024, "step": 5868 }, { "epoch": 1.45021003212256, "grad_norm": 1.4857861615192898, "learning_rate": 3.858636549937634e-06, "loss": 0.3264157176017761, "step": 5869 }, { "epoch": 1.4504571287373362, "grad_norm": 1.493264595059371, "learning_rate": 3.855412527931234e-06, "loss": 0.26363304257392883, "step": 5870 }, { "epoch": 1.4507042253521127, "grad_norm": 1.5595966909004542, "learning_rate": 3.852189531701337e-06, "loss": 0.27628299593925476, "step": 5871 }, { "epoch": 1.450951321966889, "grad_norm": 1.6170852738440795, "learning_rate": 3.848967561785998e-06, "loss": 0.3382456600666046, "step": 5872 }, { "epoch": 1.4511984185816655, "grad_norm": 1.5857175864229847, "learning_rate": 3.845746618723084e-06, "loss": 0.27477505803108215, "step": 5873 }, { "epoch": 1.4514455151964418, "grad_norm": 1.3779180128687916, "learning_rate": 3.8425267030503045e-06, "loss": 0.2634170651435852, "step": 5874 }, { "epoch": 1.4516926118112181, "grad_norm": 1.3122919852825314, "learning_rate": 3.839307815305188e-06, "loss": 0.2464590221643448, "step": 5875 }, { "epoch": 1.4519397084259946, "grad_norm": 1.3412988687993463, "learning_rate": 3.836089956025103e-06, "loss": 0.2578600347042084, "step": 5876 }, { "epoch": 1.452186805040771, "grad_norm": 1.4138959330894174, "learning_rate": 3.832873125747232e-06, "loss": 0.2548627257347107, "step": 5877 }, { "epoch": 1.4524339016555472, "grad_norm": 1.4830536685786155, "learning_rate": 3.82965732500859e-06, "loss": 0.2331889271736145, "step": 5878 }, { "epoch": 1.4526809982703237, "grad_norm": 1.5047856574192566, "learning_rate": 3.826442554346025e-06, "loss": 0.25476065278053284, "step": 5879 }, { "epoch": 1.4529280948851, "grad_norm": 1.6164274261764853, "learning_rate": 3.823228814296211e-06, "loss": 0.27451246976852417, "step": 5880 }, { "epoch": 1.4531751914998765, "grad_norm": 1.5168578151547014, "learning_rate": 3.820016105395651e-06, "loss": 0.3428909480571747, "step": 5881 }, { "epoch": 1.4534222881146528, "grad_norm": 1.5557961195891974, "learning_rate": 3.81680442818067e-06, "loss": 0.36362987756729126, "step": 5882 }, { "epoch": 1.4536693847294293, "grad_norm": 1.2928204084183175, "learning_rate": 3.813593783187429e-06, "loss": 0.2675231397151947, "step": 5883 }, { "epoch": 1.4539164813442056, "grad_norm": 1.344863725622379, "learning_rate": 3.8103841709519087e-06, "loss": 0.30795273184776306, "step": 5884 }, { "epoch": 1.4541635779589819, "grad_norm": 1.477289192879923, "learning_rate": 3.807175592009922e-06, "loss": 0.27531516551971436, "step": 5885 }, { "epoch": 1.4544106745737584, "grad_norm": 1.5012791546736124, "learning_rate": 3.803968046897114e-06, "loss": 0.3162134289741516, "step": 5886 }, { "epoch": 1.4546577711885347, "grad_norm": 1.3082614203409333, "learning_rate": 3.8007615361489435e-06, "loss": 0.2231564074754715, "step": 5887 }, { "epoch": 1.454904867803311, "grad_norm": 1.3001647365853886, "learning_rate": 3.7975560603007087e-06, "loss": 0.27627646923065186, "step": 5888 }, { "epoch": 1.4551519644180875, "grad_norm": 2.1899427248447463, "learning_rate": 3.7943516198875317e-06, "loss": 0.2933298349380493, "step": 5889 }, { "epoch": 1.455399061032864, "grad_norm": 1.551277094025885, "learning_rate": 3.7911482154443646e-06, "loss": 0.2800203561782837, "step": 5890 }, { "epoch": 1.4556461576476403, "grad_norm": 1.3915507602583759, "learning_rate": 3.7879458475059804e-06, "loss": 0.2807115316390991, "step": 5891 }, { "epoch": 1.4558932542624166, "grad_norm": 1.48456045681902, "learning_rate": 3.7847445166069775e-06, "loss": 0.30217093229293823, "step": 5892 }, { "epoch": 1.456140350877193, "grad_norm": 1.363124827007313, "learning_rate": 3.781544223281789e-06, "loss": 0.26767852902412415, "step": 5893 }, { "epoch": 1.4563874474919694, "grad_norm": 1.5081687600998577, "learning_rate": 3.778344968064671e-06, "loss": 0.28778398036956787, "step": 5894 }, { "epoch": 1.4566345441067456, "grad_norm": 1.776610264615791, "learning_rate": 3.775146751489712e-06, "loss": 0.3370727300643921, "step": 5895 }, { "epoch": 1.4568816407215222, "grad_norm": 1.3591607241879615, "learning_rate": 3.771949574090814e-06, "loss": 0.30448734760284424, "step": 5896 }, { "epoch": 1.4571287373362984, "grad_norm": 1.532625025865275, "learning_rate": 3.768753436401719e-06, "loss": 0.3123081624507904, "step": 5897 }, { "epoch": 1.457375833951075, "grad_norm": 1.4023768385891588, "learning_rate": 3.7655583389559845e-06, "loss": 0.2573903203010559, "step": 5898 }, { "epoch": 1.4576229305658512, "grad_norm": 1.2771143433042056, "learning_rate": 3.7623642822870023e-06, "loss": 0.23109176754951477, "step": 5899 }, { "epoch": 1.4578700271806277, "grad_norm": 1.3511385480902571, "learning_rate": 3.759171266927991e-06, "loss": 0.22161485254764557, "step": 5900 }, { "epoch": 1.458117123795404, "grad_norm": 1.4058298486731855, "learning_rate": 3.7559792934119853e-06, "loss": 0.25470277667045593, "step": 5901 }, { "epoch": 1.4583642204101803, "grad_norm": 1.6065282937900578, "learning_rate": 3.7527883622718563e-06, "loss": 0.34363335371017456, "step": 5902 }, { "epoch": 1.4586113170249568, "grad_norm": 1.4592078688830932, "learning_rate": 3.749598474040299e-06, "loss": 0.331041157245636, "step": 5903 }, { "epoch": 1.4588584136397331, "grad_norm": 1.3976926505725686, "learning_rate": 3.746409629249833e-06, "loss": 0.27229759097099304, "step": 5904 }, { "epoch": 1.4591055102545094, "grad_norm": 1.393185420318567, "learning_rate": 3.7432218284328038e-06, "loss": 0.23330825567245483, "step": 5905 }, { "epoch": 1.459352606869286, "grad_norm": 1.5803450950617477, "learning_rate": 3.7400350721213764e-06, "loss": 0.2961187958717346, "step": 5906 }, { "epoch": 1.4595997034840622, "grad_norm": 1.415427244280751, "learning_rate": 3.736849360847552e-06, "loss": 0.2815698981285095, "step": 5907 }, { "epoch": 1.4598468000988387, "grad_norm": 1.3381177708052499, "learning_rate": 3.7336646951431522e-06, "loss": 0.24251079559326172, "step": 5908 }, { "epoch": 1.460093896713615, "grad_norm": 1.2880455756598448, "learning_rate": 3.730481075539828e-06, "loss": 0.2638334333896637, "step": 5909 }, { "epoch": 1.4603409933283915, "grad_norm": 1.3213762248250096, "learning_rate": 3.7272985025690466e-06, "loss": 0.24870353937149048, "step": 5910 }, { "epoch": 1.4605880899431678, "grad_norm": 1.3440273906548845, "learning_rate": 3.724116976762112e-06, "loss": 0.3043404519557953, "step": 5911 }, { "epoch": 1.460835186557944, "grad_norm": 1.3275789353969567, "learning_rate": 3.7209364986501404e-06, "loss": 0.27079886198043823, "step": 5912 }, { "epoch": 1.4610822831727206, "grad_norm": 1.4634709718887104, "learning_rate": 3.717757068764085e-06, "loss": 0.30211615562438965, "step": 5913 }, { "epoch": 1.4613293797874969, "grad_norm": 1.3305591829658425, "learning_rate": 3.7145786876347236e-06, "loss": 0.23367293179035187, "step": 5914 }, { "epoch": 1.4615764764022732, "grad_norm": 1.2960212991215638, "learning_rate": 3.7114013557926442e-06, "loss": 0.2882780432701111, "step": 5915 }, { "epoch": 1.4618235730170497, "grad_norm": 1.4766908264326686, "learning_rate": 3.708225073768277e-06, "loss": 0.29725170135498047, "step": 5916 }, { "epoch": 1.462070669631826, "grad_norm": 1.4074514510593417, "learning_rate": 3.705049842091869e-06, "loss": 0.3043029308319092, "step": 5917 }, { "epoch": 1.4623177662466025, "grad_norm": 1.281236614009293, "learning_rate": 3.701875661293496e-06, "loss": 0.25540268421173096, "step": 5918 }, { "epoch": 1.4625648628613788, "grad_norm": 1.4292257778411661, "learning_rate": 3.6987025319030513e-06, "loss": 0.2617146372795105, "step": 5919 }, { "epoch": 1.4628119594761553, "grad_norm": 1.320431404375973, "learning_rate": 3.6955304544502545e-06, "loss": 0.22314566373825073, "step": 5920 }, { "epoch": 1.4630590560909316, "grad_norm": 1.2899764804975598, "learning_rate": 3.692359429464655e-06, "loss": 0.24359628558158875, "step": 5921 }, { "epoch": 1.4633061527057079, "grad_norm": 1.3776381273374145, "learning_rate": 3.6891894574756217e-06, "loss": 0.23674935102462769, "step": 5922 }, { "epoch": 1.4635532493204844, "grad_norm": 1.5303007258671661, "learning_rate": 3.686020539012354e-06, "loss": 0.3398163318634033, "step": 5923 }, { "epoch": 1.4638003459352606, "grad_norm": 1.5906238194299345, "learning_rate": 3.682852674603864e-06, "loss": 0.30259525775909424, "step": 5924 }, { "epoch": 1.464047442550037, "grad_norm": 1.3682941666096327, "learning_rate": 3.6796858647790015e-06, "loss": 0.25005701184272766, "step": 5925 }, { "epoch": 1.4642945391648134, "grad_norm": 1.306714364082235, "learning_rate": 3.676520110066425e-06, "loss": 0.27369093894958496, "step": 5926 }, { "epoch": 1.46454163577959, "grad_norm": 1.525153402402807, "learning_rate": 3.6733554109946323e-06, "loss": 0.29861289262771606, "step": 5927 }, { "epoch": 1.4647887323943662, "grad_norm": 1.3698080787079392, "learning_rate": 3.670191768091933e-06, "loss": 0.290998637676239, "step": 5928 }, { "epoch": 1.4650358290091425, "grad_norm": 1.4693857878553058, "learning_rate": 3.667029181886466e-06, "loss": 0.29078221321105957, "step": 5929 }, { "epoch": 1.465282925623919, "grad_norm": 1.5945325112517499, "learning_rate": 3.6638676529061945e-06, "loss": 0.31677675247192383, "step": 5930 }, { "epoch": 1.4655300222386953, "grad_norm": 1.5445107819193675, "learning_rate": 3.6607071816789063e-06, "loss": 0.3605170249938965, "step": 5931 }, { "epoch": 1.4657771188534716, "grad_norm": 1.3021279724114563, "learning_rate": 3.657547768732207e-06, "loss": 0.2679571807384491, "step": 5932 }, { "epoch": 1.4660242154682481, "grad_norm": 1.3861396167400846, "learning_rate": 3.6543894145935245e-06, "loss": 0.28317663073539734, "step": 5933 }, { "epoch": 1.4662713120830244, "grad_norm": 1.5516060803055776, "learning_rate": 3.651232119790119e-06, "loss": 0.2617766261100769, "step": 5934 }, { "epoch": 1.4665184086978007, "grad_norm": 1.2973481525673545, "learning_rate": 3.648075884849066e-06, "loss": 0.3023183345794678, "step": 5935 }, { "epoch": 1.4667655053125772, "grad_norm": 1.3247711187251097, "learning_rate": 3.644920710297274e-06, "loss": 0.2632812559604645, "step": 5936 }, { "epoch": 1.4670126019273537, "grad_norm": 1.332557482420884, "learning_rate": 3.641766596661457e-06, "loss": 0.23801979422569275, "step": 5937 }, { "epoch": 1.46725969854213, "grad_norm": 1.3390258002363153, "learning_rate": 3.638613544468168e-06, "loss": 0.27700960636138916, "step": 5938 }, { "epoch": 1.4675067951569063, "grad_norm": 1.4033225656590589, "learning_rate": 3.635461554243779e-06, "loss": 0.2910563349723816, "step": 5939 }, { "epoch": 1.4677538917716828, "grad_norm": 1.461150268998964, "learning_rate": 3.632310626514477e-06, "loss": 0.275145947933197, "step": 5940 }, { "epoch": 1.468000988386459, "grad_norm": 1.3588336014851092, "learning_rate": 3.629160761806284e-06, "loss": 0.2766415774822235, "step": 5941 }, { "epoch": 1.4682480850012354, "grad_norm": 1.3499211222906378, "learning_rate": 3.6260119606450307e-06, "loss": 0.2587231397628784, "step": 5942 }, { "epoch": 1.468495181616012, "grad_norm": 1.518715407896947, "learning_rate": 3.6228642235563816e-06, "loss": 0.2825568616390228, "step": 5943 }, { "epoch": 1.4687422782307882, "grad_norm": 1.4524071313956604, "learning_rate": 3.6197175510658188e-06, "loss": 0.3317160904407501, "step": 5944 }, { "epoch": 1.4689893748455647, "grad_norm": 1.5537157653390306, "learning_rate": 3.6165719436986503e-06, "loss": 0.2432132363319397, "step": 5945 }, { "epoch": 1.469236471460341, "grad_norm": 1.590300233116037, "learning_rate": 3.6134274019800008e-06, "loss": 0.31550806760787964, "step": 5946 }, { "epoch": 1.4694835680751175, "grad_norm": 1.3291635874063388, "learning_rate": 3.6102839264348166e-06, "loss": 0.2510342299938202, "step": 5947 }, { "epoch": 1.4697306646898938, "grad_norm": 1.3972044235134469, "learning_rate": 3.607141517587871e-06, "loss": 0.26868024468421936, "step": 5948 }, { "epoch": 1.46997776130467, "grad_norm": 1.320429765038505, "learning_rate": 3.6040001759637588e-06, "loss": 0.281266450881958, "step": 5949 }, { "epoch": 1.4702248579194466, "grad_norm": 1.3511737197700682, "learning_rate": 3.6008599020868985e-06, "loss": 0.24375084042549133, "step": 5950 }, { "epoch": 1.4704719545342229, "grad_norm": 1.591770889317738, "learning_rate": 3.5977206964815183e-06, "loss": 0.2763451337814331, "step": 5951 }, { "epoch": 1.4707190511489991, "grad_norm": 1.4549556521305402, "learning_rate": 3.5945825596716842e-06, "loss": 0.24411270022392273, "step": 5952 }, { "epoch": 1.4709661477637757, "grad_norm": 1.5341590493745263, "learning_rate": 3.5914454921812704e-06, "loss": 0.2574108839035034, "step": 5953 }, { "epoch": 1.471213244378552, "grad_norm": 1.8540655849860561, "learning_rate": 3.5883094945339814e-06, "loss": 0.3326282501220703, "step": 5954 }, { "epoch": 1.4714603409933285, "grad_norm": 1.4260136881606953, "learning_rate": 3.5851745672533443e-06, "loss": 0.2554875910282135, "step": 5955 }, { "epoch": 1.4717074376081047, "grad_norm": 1.460683830698309, "learning_rate": 3.5820407108626953e-06, "loss": 0.30710548162460327, "step": 5956 }, { "epoch": 1.4719545342228813, "grad_norm": 1.5242499314252236, "learning_rate": 3.5789079258852032e-06, "loss": 0.3413594961166382, "step": 5957 }, { "epoch": 1.4722016308376575, "grad_norm": 1.488428993304736, "learning_rate": 3.575776212843857e-06, "loss": 0.23413066565990448, "step": 5958 }, { "epoch": 1.4724487274524338, "grad_norm": 1.4354468032560443, "learning_rate": 3.572645572261465e-06, "loss": 0.32728707790374756, "step": 5959 }, { "epoch": 1.4726958240672103, "grad_norm": 1.3183323735681436, "learning_rate": 3.5695160046606534e-06, "loss": 0.26079249382019043, "step": 5960 }, { "epoch": 1.4729429206819866, "grad_norm": 1.3329985875487362, "learning_rate": 3.566387510563869e-06, "loss": 0.24430572986602783, "step": 5961 }, { "epoch": 1.473190017296763, "grad_norm": 1.343486657160148, "learning_rate": 3.5632600904933845e-06, "loss": 0.3039717674255371, "step": 5962 }, { "epoch": 1.4734371139115394, "grad_norm": 1.4795809944621119, "learning_rate": 3.5601337449712923e-06, "loss": 0.24351128935813904, "step": 5963 }, { "epoch": 1.4736842105263157, "grad_norm": 1.425744755069471, "learning_rate": 3.5570084745195065e-06, "loss": 0.3006853461265564, "step": 5964 }, { "epoch": 1.4739313071410922, "grad_norm": 1.364094470789403, "learning_rate": 3.553884279659753e-06, "loss": 0.248569056391716, "step": 5965 }, { "epoch": 1.4741784037558685, "grad_norm": 1.4067364606779833, "learning_rate": 3.5507611609135895e-06, "loss": 0.2557205557823181, "step": 5966 }, { "epoch": 1.474425500370645, "grad_norm": 1.3693219878136798, "learning_rate": 3.547639118802384e-06, "loss": 0.2865861654281616, "step": 5967 }, { "epoch": 1.4746725969854213, "grad_norm": 1.3837947256136713, "learning_rate": 3.544518153847334e-06, "loss": 0.2633664906024933, "step": 5968 }, { "epoch": 1.4749196936001976, "grad_norm": 1.2500612728853255, "learning_rate": 3.5413982665694534e-06, "loss": 0.23815765976905823, "step": 5969 }, { "epoch": 1.475166790214974, "grad_norm": 1.4766679084691077, "learning_rate": 3.538279457489572e-06, "loss": 0.2900773882865906, "step": 5970 }, { "epoch": 1.4754138868297504, "grad_norm": 1.44404128648761, "learning_rate": 3.5351617271283435e-06, "loss": 0.33737272024154663, "step": 5971 }, { "epoch": 1.4756609834445267, "grad_norm": 1.5632701380279659, "learning_rate": 3.532045076006244e-06, "loss": 0.31340277194976807, "step": 5972 }, { "epoch": 1.4759080800593032, "grad_norm": 1.437971267775676, "learning_rate": 3.528929504643569e-06, "loss": 0.32776984572410583, "step": 5973 }, { "epoch": 1.4761551766740797, "grad_norm": 1.459044466051955, "learning_rate": 3.5258150135604287e-06, "loss": 0.24939781427383423, "step": 5974 }, { "epoch": 1.476402273288856, "grad_norm": 1.5039974190613863, "learning_rate": 3.5227016032767525e-06, "loss": 0.31614968180656433, "step": 5975 }, { "epoch": 1.4766493699036323, "grad_norm": 1.3005280079284463, "learning_rate": 3.519589274312295e-06, "loss": 0.2460014671087265, "step": 5976 }, { "epoch": 1.4768964665184088, "grad_norm": 1.483372656896113, "learning_rate": 3.516478027186628e-06, "loss": 0.2642587423324585, "step": 5977 }, { "epoch": 1.477143563133185, "grad_norm": 1.2383347614324631, "learning_rate": 3.513367862419147e-06, "loss": 0.2191745936870575, "step": 5978 }, { "epoch": 1.4773906597479614, "grad_norm": 1.4735303583400172, "learning_rate": 3.5102587805290543e-06, "loss": 0.2999717593193054, "step": 5979 }, { "epoch": 1.4776377563627379, "grad_norm": 1.3586989025688427, "learning_rate": 3.507150782035389e-06, "loss": 0.28078368306159973, "step": 5980 }, { "epoch": 1.4778848529775142, "grad_norm": 1.5614955695245283, "learning_rate": 3.5040438674569898e-06, "loss": 0.3010762929916382, "step": 5981 }, { "epoch": 1.4781319495922907, "grad_norm": 1.194651931599025, "learning_rate": 3.50093803731253e-06, "loss": 0.1941126435995102, "step": 5982 }, { "epoch": 1.478379046207067, "grad_norm": 1.3672117015675407, "learning_rate": 3.497833292120499e-06, "loss": 0.24737755954265594, "step": 5983 }, { "epoch": 1.4786261428218435, "grad_norm": 1.5521133574561157, "learning_rate": 3.4947296323991966e-06, "loss": 0.30811449885368347, "step": 5984 }, { "epoch": 1.4788732394366197, "grad_norm": 1.4048143613213924, "learning_rate": 3.49162705866675e-06, "loss": 0.2684635519981384, "step": 5985 }, { "epoch": 1.479120336051396, "grad_norm": 1.3975826696158595, "learning_rate": 3.4885255714411024e-06, "loss": 0.2564844489097595, "step": 5986 }, { "epoch": 1.4793674326661725, "grad_norm": 1.3030081050864555, "learning_rate": 3.4854251712400187e-06, "loss": 0.2713758647441864, "step": 5987 }, { "epoch": 1.4796145292809488, "grad_norm": 1.2645835199199746, "learning_rate": 3.482325858581076e-06, "loss": 0.2367832362651825, "step": 5988 }, { "epoch": 1.4798616258957251, "grad_norm": 1.375059438307684, "learning_rate": 3.4792276339816712e-06, "loss": 0.29103827476501465, "step": 5989 }, { "epoch": 1.4801087225105016, "grad_norm": 1.3859212610242215, "learning_rate": 3.4761304979590226e-06, "loss": 0.26751673221588135, "step": 5990 }, { "epoch": 1.480355819125278, "grad_norm": 1.5225833587693545, "learning_rate": 3.473034451030166e-06, "loss": 0.29648202657699585, "step": 5991 }, { "epoch": 1.4806029157400544, "grad_norm": 1.2946918929769602, "learning_rate": 3.46993949371196e-06, "loss": 0.25766807794570923, "step": 5992 }, { "epoch": 1.4808500123548307, "grad_norm": 1.4594466066302523, "learning_rate": 3.466845626521068e-06, "loss": 0.273468554019928, "step": 5993 }, { "epoch": 1.4810971089696072, "grad_norm": 1.3631599633001061, "learning_rate": 3.463752849973987e-06, "loss": 0.30262017250061035, "step": 5994 }, { "epoch": 1.4813442055843835, "grad_norm": 1.4227556154075855, "learning_rate": 3.460661164587018e-06, "loss": 0.2831883430480957, "step": 5995 }, { "epoch": 1.4815913021991598, "grad_norm": 1.2883326325239888, "learning_rate": 3.45757057087629e-06, "loss": 0.2578440308570862, "step": 5996 }, { "epoch": 1.4818383988139363, "grad_norm": 1.3312774877681965, "learning_rate": 3.454481069357749e-06, "loss": 0.2176646888256073, "step": 5997 }, { "epoch": 1.4820854954287126, "grad_norm": 1.3021334448277702, "learning_rate": 3.4513926605471504e-06, "loss": 0.24736976623535156, "step": 5998 }, { "epoch": 1.4823325920434889, "grad_norm": 1.3180159207104198, "learning_rate": 3.4483053449600746e-06, "loss": 0.26540982723236084, "step": 5999 }, { "epoch": 1.4825796886582654, "grad_norm": 1.364287281578343, "learning_rate": 3.445219123111918e-06, "loss": 0.22307245433330536, "step": 6000 }, { "epoch": 1.4828267852730417, "grad_norm": 1.4575265095260095, "learning_rate": 3.4421339955178978e-06, "loss": 0.29959046840667725, "step": 6001 }, { "epoch": 1.4830738818878182, "grad_norm": 1.4894974748141738, "learning_rate": 3.439049962693041e-06, "loss": 0.30591875314712524, "step": 6002 }, { "epoch": 1.4833209785025945, "grad_norm": 1.3675863181136545, "learning_rate": 3.4359670251521927e-06, "loss": 0.26432621479034424, "step": 6003 }, { "epoch": 1.483568075117371, "grad_norm": 1.568240448325911, "learning_rate": 3.4328851834100206e-06, "loss": 0.3049052357673645, "step": 6004 }, { "epoch": 1.4838151717321473, "grad_norm": 1.539658252822098, "learning_rate": 3.4298044379810082e-06, "loss": 0.28066495060920715, "step": 6005 }, { "epoch": 1.4840622683469236, "grad_norm": 1.2939305888589376, "learning_rate": 3.4267247893794565e-06, "loss": 0.22042647004127502, "step": 6006 }, { "epoch": 1.4843093649617, "grad_norm": 1.2785546002775858, "learning_rate": 3.4236462381194756e-06, "loss": 0.2558554410934448, "step": 6007 }, { "epoch": 1.4845564615764764, "grad_norm": 1.3679578551642808, "learning_rate": 3.420568784715005e-06, "loss": 0.336177796125412, "step": 6008 }, { "epoch": 1.4848035581912526, "grad_norm": 1.446006122500129, "learning_rate": 3.4174924296797883e-06, "loss": 0.28035634756088257, "step": 6009 }, { "epoch": 1.4850506548060292, "grad_norm": 1.4907704623093003, "learning_rate": 3.4144171735273947e-06, "loss": 0.2581688463687897, "step": 6010 }, { "epoch": 1.4852977514208057, "grad_norm": 1.3872833221296788, "learning_rate": 3.411343016771209e-06, "loss": 0.2526673674583435, "step": 6011 }, { "epoch": 1.485544848035582, "grad_norm": 1.3360712576303708, "learning_rate": 3.408269959924425e-06, "loss": 0.26524531841278076, "step": 6012 }, { "epoch": 1.4857919446503582, "grad_norm": 1.476508978270188, "learning_rate": 3.405198003500062e-06, "loss": 0.2276826947927475, "step": 6013 }, { "epoch": 1.4860390412651348, "grad_norm": 1.5213418899937532, "learning_rate": 3.402127148010952e-06, "loss": 0.30998340249061584, "step": 6014 }, { "epoch": 1.486286137879911, "grad_norm": 1.4169732892942122, "learning_rate": 3.399057393969749e-06, "loss": 0.30273324251174927, "step": 6015 }, { "epoch": 1.4865332344946873, "grad_norm": 1.4967797012360657, "learning_rate": 3.395988741888904e-06, "loss": 0.3274173140525818, "step": 6016 }, { "epoch": 1.4867803311094638, "grad_norm": 1.3184971564506112, "learning_rate": 3.392921192280705e-06, "loss": 0.25392088294029236, "step": 6017 }, { "epoch": 1.4870274277242401, "grad_norm": 1.6760906984572066, "learning_rate": 3.3898547456572463e-06, "loss": 0.32833927869796753, "step": 6018 }, { "epoch": 1.4872745243390166, "grad_norm": 1.552263090695341, "learning_rate": 3.386789402530445e-06, "loss": 0.29019424319267273, "step": 6019 }, { "epoch": 1.487521620953793, "grad_norm": 1.4530546955771644, "learning_rate": 3.383725163412023e-06, "loss": 0.2539193034172058, "step": 6020 }, { "epoch": 1.4877687175685694, "grad_norm": 1.3391113651300823, "learning_rate": 3.3806620288135285e-06, "loss": 0.2843550741672516, "step": 6021 }, { "epoch": 1.4880158141833457, "grad_norm": 1.4782403434948976, "learning_rate": 3.377599999246315e-06, "loss": 0.2963540554046631, "step": 6022 }, { "epoch": 1.488262910798122, "grad_norm": 1.424625022156926, "learning_rate": 3.3745390752215614e-06, "loss": 0.27422818541526794, "step": 6023 }, { "epoch": 1.4885100074128985, "grad_norm": 1.5317014422923565, "learning_rate": 3.3714792572502596e-06, "loss": 0.3589957654476166, "step": 6024 }, { "epoch": 1.4887571040276748, "grad_norm": 1.3300955471816893, "learning_rate": 3.368420545843211e-06, "loss": 0.26989322900772095, "step": 6025 }, { "epoch": 1.489004200642451, "grad_norm": 1.4210863304136854, "learning_rate": 3.3653629415110367e-06, "loss": 0.2655814290046692, "step": 6026 }, { "epoch": 1.4892512972572276, "grad_norm": 1.6528241455502526, "learning_rate": 3.362306444764175e-06, "loss": 0.3049144744873047, "step": 6027 }, { "epoch": 1.489498393872004, "grad_norm": 1.4433302479081824, "learning_rate": 3.3592510561128787e-06, "loss": 0.26315104961395264, "step": 6028 }, { "epoch": 1.4897454904867804, "grad_norm": 1.3931521328255634, "learning_rate": 3.356196776067212e-06, "loss": 0.31428343057632446, "step": 6029 }, { "epoch": 1.4899925871015567, "grad_norm": 1.37313020112738, "learning_rate": 3.3531436051370514e-06, "loss": 0.2960740923881531, "step": 6030 }, { "epoch": 1.4902396837163332, "grad_norm": 1.5002366609236126, "learning_rate": 3.350091543832098e-06, "loss": 0.24860626459121704, "step": 6031 }, { "epoch": 1.4904867803311095, "grad_norm": 1.3878806756759867, "learning_rate": 3.34704059266186e-06, "loss": 0.2947801947593689, "step": 6032 }, { "epoch": 1.4907338769458858, "grad_norm": 1.3673915983389047, "learning_rate": 3.3439907521356684e-06, "loss": 0.22935286164283752, "step": 6033 }, { "epoch": 1.4909809735606623, "grad_norm": 1.3702521005580264, "learning_rate": 3.3409420227626543e-06, "loss": 0.23545974493026733, "step": 6034 }, { "epoch": 1.4912280701754386, "grad_norm": 1.4321132308654083, "learning_rate": 3.33789440505178e-06, "loss": 0.26648420095443726, "step": 6035 }, { "epoch": 1.4914751667902149, "grad_norm": 1.3966710785889507, "learning_rate": 3.3348478995118074e-06, "loss": 0.27798280119895935, "step": 6036 }, { "epoch": 1.4917222634049914, "grad_norm": 1.3599334510207233, "learning_rate": 3.331802506651324e-06, "loss": 0.3106918931007385, "step": 6037 }, { "epoch": 1.4919693600197677, "grad_norm": 1.424084267636313, "learning_rate": 3.328758226978729e-06, "loss": 0.29745352268218994, "step": 6038 }, { "epoch": 1.4922164566345442, "grad_norm": 1.4245475992206993, "learning_rate": 3.325715061002228e-06, "loss": 0.22529232501983643, "step": 6039 }, { "epoch": 1.4924635532493205, "grad_norm": 1.5870536167868603, "learning_rate": 3.32267300922985e-06, "loss": 0.2865554988384247, "step": 6040 }, { "epoch": 1.492710649864097, "grad_norm": 1.3634732992209255, "learning_rate": 3.3196320721694344e-06, "loss": 0.2301805317401886, "step": 6041 }, { "epoch": 1.4929577464788732, "grad_norm": 1.4536708743352482, "learning_rate": 3.3165922503286384e-06, "loss": 0.2778843939304352, "step": 6042 }, { "epoch": 1.4932048430936495, "grad_norm": 1.5566043281185926, "learning_rate": 3.3135535442149257e-06, "loss": 0.28691136837005615, "step": 6043 }, { "epoch": 1.493451939708426, "grad_norm": 1.3671532447974466, "learning_rate": 3.3105159543355745e-06, "loss": 0.25838321447372437, "step": 6044 }, { "epoch": 1.4936990363232023, "grad_norm": 1.3237996652222364, "learning_rate": 3.307479481197684e-06, "loss": 0.30557483434677124, "step": 6045 }, { "epoch": 1.4939461329379786, "grad_norm": 1.4219130940083433, "learning_rate": 3.3044441253081607e-06, "loss": 0.32305729389190674, "step": 6046 }, { "epoch": 1.4941932295527551, "grad_norm": 1.3891108697661496, "learning_rate": 3.30140988717373e-06, "loss": 0.27501583099365234, "step": 6047 }, { "epoch": 1.4944403261675316, "grad_norm": 1.4021957797720777, "learning_rate": 3.2983767673009213e-06, "loss": 0.24690833687782288, "step": 6048 }, { "epoch": 1.494687422782308, "grad_norm": 1.4353280672082784, "learning_rate": 3.2953447661960902e-06, "loss": 0.25817450881004333, "step": 6049 }, { "epoch": 1.4949345193970842, "grad_norm": 1.5224675955588167, "learning_rate": 3.292313884365391e-06, "loss": 0.300004243850708, "step": 6050 }, { "epoch": 1.4951816160118607, "grad_norm": 1.4011354947445118, "learning_rate": 3.289284122314802e-06, "loss": 0.2512633800506592, "step": 6051 }, { "epoch": 1.495428712626637, "grad_norm": 1.3480371713914787, "learning_rate": 3.2862554805501145e-06, "loss": 0.288047730922699, "step": 6052 }, { "epoch": 1.4956758092414133, "grad_norm": 1.4753835052967001, "learning_rate": 3.2832279595769235e-06, "loss": 0.291284441947937, "step": 6053 }, { "epoch": 1.4959229058561898, "grad_norm": 1.459816897656679, "learning_rate": 3.2802015599006453e-06, "loss": 0.24810534715652466, "step": 6054 }, { "epoch": 1.496170002470966, "grad_norm": 1.3145233549948168, "learning_rate": 3.2771762820265073e-06, "loss": 0.2616848945617676, "step": 6055 }, { "epoch": 1.4964170990857426, "grad_norm": 1.487671856004232, "learning_rate": 3.2741521264595523e-06, "loss": 0.3066267967224121, "step": 6056 }, { "epoch": 1.496664195700519, "grad_norm": 1.2386508001110301, "learning_rate": 3.271129093704628e-06, "loss": 0.2621117830276489, "step": 6057 }, { "epoch": 1.4969112923152954, "grad_norm": 1.4034052809763742, "learning_rate": 3.268107184266397e-06, "loss": 0.26274827122688293, "step": 6058 }, { "epoch": 1.4971583889300717, "grad_norm": 1.51648934774367, "learning_rate": 3.2650863986493396e-06, "loss": 0.3179272413253784, "step": 6059 }, { "epoch": 1.497405485544848, "grad_norm": 1.4277460605522307, "learning_rate": 3.262066737357744e-06, "loss": 0.2681933343410492, "step": 6060 }, { "epoch": 1.4976525821596245, "grad_norm": 1.7187337548921635, "learning_rate": 3.259048200895718e-06, "loss": 0.3343358635902405, "step": 6061 }, { "epoch": 1.4978996787744008, "grad_norm": 1.3720352578858432, "learning_rate": 3.2560307897671662e-06, "loss": 0.31233739852905273, "step": 6062 }, { "epoch": 1.498146775389177, "grad_norm": 1.630760539278832, "learning_rate": 3.253014504475822e-06, "loss": 0.3051246702671051, "step": 6063 }, { "epoch": 1.4983938720039536, "grad_norm": 1.5441048823521075, "learning_rate": 3.249999345525218e-06, "loss": 0.3241877257823944, "step": 6064 }, { "epoch": 1.4986409686187299, "grad_norm": 1.4855889847283843, "learning_rate": 3.246985313418707e-06, "loss": 0.3070005774497986, "step": 6065 }, { "epoch": 1.4988880652335064, "grad_norm": 1.2737940315020453, "learning_rate": 3.2439724086594537e-06, "loss": 0.22833402454853058, "step": 6066 }, { "epoch": 1.4991351618482827, "grad_norm": 1.3939627731075077, "learning_rate": 3.240960631750427e-06, "loss": 0.3132503032684326, "step": 6067 }, { "epoch": 1.4993822584630592, "grad_norm": 1.3814645548320539, "learning_rate": 3.237949983194415e-06, "loss": 0.24766702950000763, "step": 6068 }, { "epoch": 1.4996293550778355, "grad_norm": 1.3589995640827952, "learning_rate": 3.234940463494015e-06, "loss": 0.2564871609210968, "step": 6069 }, { "epoch": 1.4998764516926117, "grad_norm": 1.3686677201052266, "learning_rate": 3.231932073151639e-06, "loss": 0.2673593759536743, "step": 6070 }, { "epoch": 1.5001235483073883, "grad_norm": 1.5525903673267893, "learning_rate": 3.228924812669503e-06, "loss": 0.34801724553108215, "step": 6071 }, { "epoch": 1.5003706449221645, "grad_norm": 1.3703893893524055, "learning_rate": 3.225918682549637e-06, "loss": 0.22791773080825806, "step": 6072 }, { "epoch": 1.5006177415369408, "grad_norm": 1.3898456376923796, "learning_rate": 3.2229136832938856e-06, "loss": 0.2873150706291199, "step": 6073 }, { "epoch": 1.5008648381517173, "grad_norm": 1.3451831072389173, "learning_rate": 3.2199098154039043e-06, "loss": 0.2628369927406311, "step": 6074 }, { "epoch": 1.5011119347664938, "grad_norm": 1.3289249421149922, "learning_rate": 3.2169070793811606e-06, "loss": 0.2277056872844696, "step": 6075 }, { "epoch": 1.50135903138127, "grad_norm": 1.3678700492403584, "learning_rate": 3.213905475726925e-06, "loss": 0.25322425365448, "step": 6076 }, { "epoch": 1.5016061279960464, "grad_norm": 1.4857875817001285, "learning_rate": 3.210905004942292e-06, "loss": 0.23954874277114868, "step": 6077 }, { "epoch": 1.501853224610823, "grad_norm": 1.4113344764211184, "learning_rate": 3.2079056675281506e-06, "loss": 0.2542542815208435, "step": 6078 }, { "epoch": 1.5021003212255992, "grad_norm": 1.481351631545876, "learning_rate": 3.204907463985215e-06, "loss": 0.33083921670913696, "step": 6079 }, { "epoch": 1.5023474178403755, "grad_norm": 1.3534705214847877, "learning_rate": 3.201910394814007e-06, "loss": 0.27491581439971924, "step": 6080 }, { "epoch": 1.502594514455152, "grad_norm": 1.4502477845151085, "learning_rate": 3.1989144605148524e-06, "loss": 0.32881635427474976, "step": 6081 }, { "epoch": 1.5028416110699283, "grad_norm": 1.3832344399783592, "learning_rate": 3.195919661587894e-06, "loss": 0.2702956795692444, "step": 6082 }, { "epoch": 1.5030887076847046, "grad_norm": 1.4152085282053268, "learning_rate": 3.192925998533082e-06, "loss": 0.30878087878227234, "step": 6083 }, { "epoch": 1.503335804299481, "grad_norm": 1.3560038674392974, "learning_rate": 3.1899334718501828e-06, "loss": 0.23302242159843445, "step": 6084 }, { "epoch": 1.5035829009142576, "grad_norm": 1.4498036216138441, "learning_rate": 3.1869420820387644e-06, "loss": 0.3394007086753845, "step": 6085 }, { "epoch": 1.503829997529034, "grad_norm": 1.389567962686666, "learning_rate": 3.1839518295982064e-06, "loss": 0.24384504556655884, "step": 6086 }, { "epoch": 1.5040770941438102, "grad_norm": 1.3920675564065852, "learning_rate": 3.1809627150277036e-06, "loss": 0.3042013943195343, "step": 6087 }, { "epoch": 1.5043241907585867, "grad_norm": 1.5772052932876977, "learning_rate": 3.177974738826258e-06, "loss": 0.32236242294311523, "step": 6088 }, { "epoch": 1.504571287373363, "grad_norm": 1.4126724492585012, "learning_rate": 3.1749879014926866e-06, "loss": 0.3256637454032898, "step": 6089 }, { "epoch": 1.5048183839881393, "grad_norm": 1.233361532824351, "learning_rate": 3.172002203525607e-06, "loss": 0.24429166316986084, "step": 6090 }, { "epoch": 1.5050654806029158, "grad_norm": 1.2874002512317106, "learning_rate": 3.1690176454234487e-06, "loss": 0.25098150968551636, "step": 6091 }, { "epoch": 1.505312577217692, "grad_norm": 1.2905185307871592, "learning_rate": 3.166034227684457e-06, "loss": 0.19899070262908936, "step": 6092 }, { "epoch": 1.5055596738324684, "grad_norm": 1.4078019910675925, "learning_rate": 3.1630519508066814e-06, "loss": 0.2721710205078125, "step": 6093 }, { "epoch": 1.5058067704472449, "grad_norm": 1.436351484852135, "learning_rate": 3.160070815287988e-06, "loss": 0.3125091791152954, "step": 6094 }, { "epoch": 1.5060538670620214, "grad_norm": 1.4263588596639936, "learning_rate": 3.15709082162604e-06, "loss": 0.302049845457077, "step": 6095 }, { "epoch": 1.5063009636767977, "grad_norm": 1.3500466199230017, "learning_rate": 3.154111970318319e-06, "loss": 0.291707307100296, "step": 6096 }, { "epoch": 1.506548060291574, "grad_norm": 1.4083829452527252, "learning_rate": 3.151134261862119e-06, "loss": 0.28326690196990967, "step": 6097 }, { "epoch": 1.5067951569063505, "grad_norm": 1.699494446174151, "learning_rate": 3.1481576967545315e-06, "loss": 0.29275423288345337, "step": 6098 }, { "epoch": 1.5070422535211268, "grad_norm": 1.3183202899541768, "learning_rate": 3.145182275492469e-06, "loss": 0.3075295388698578, "step": 6099 }, { "epoch": 1.507289350135903, "grad_norm": 1.523680361263389, "learning_rate": 3.1422079985726427e-06, "loss": 0.2893218994140625, "step": 6100 }, { "epoch": 1.5075364467506795, "grad_norm": 2.5396681224836537, "learning_rate": 3.1392348664915807e-06, "loss": 0.30809134244918823, "step": 6101 }, { "epoch": 1.507783543365456, "grad_norm": 1.4777448359292575, "learning_rate": 3.1362628797456173e-06, "loss": 0.28470245003700256, "step": 6102 }, { "epoch": 1.5080306399802321, "grad_norm": 1.7046675960610826, "learning_rate": 3.1332920388308985e-06, "loss": 0.2890945374965668, "step": 6103 }, { "epoch": 1.5082777365950086, "grad_norm": 1.4325469448261516, "learning_rate": 3.130322344243373e-06, "loss": 0.26292771100997925, "step": 6104 }, { "epoch": 1.5085248332097851, "grad_norm": 1.382288440028343, "learning_rate": 3.127353796478798e-06, "loss": 0.26368898153305054, "step": 6105 }, { "epoch": 1.5087719298245614, "grad_norm": 1.3841466300129606, "learning_rate": 3.1243863960327458e-06, "loss": 0.22711654007434845, "step": 6106 }, { "epoch": 1.5090190264393377, "grad_norm": 1.4172734272288825, "learning_rate": 3.1214201434005977e-06, "loss": 0.2557278275489807, "step": 6107 }, { "epoch": 1.5092661230541142, "grad_norm": 1.4928516625887458, "learning_rate": 3.11845503907753e-06, "loss": 0.24943067133426666, "step": 6108 }, { "epoch": 1.5095132196688905, "grad_norm": 1.530020074393932, "learning_rate": 3.1154910835585437e-06, "loss": 0.3127819895744324, "step": 6109 }, { "epoch": 1.5097603162836668, "grad_norm": 1.409318692543758, "learning_rate": 3.1125282773384403e-06, "loss": 0.29275599122047424, "step": 6110 }, { "epoch": 1.5100074128984433, "grad_norm": 1.651834441212565, "learning_rate": 3.1095666209118313e-06, "loss": 0.2955138683319092, "step": 6111 }, { "epoch": 1.5102545095132198, "grad_norm": 1.3605175749071792, "learning_rate": 3.106606114773133e-06, "loss": 0.28039678931236267, "step": 6112 }, { "epoch": 1.5105016061279959, "grad_norm": 1.3707129356521346, "learning_rate": 3.1036467594165697e-06, "loss": 0.24388909339904785, "step": 6113 }, { "epoch": 1.5107487027427724, "grad_norm": 1.2886244697216678, "learning_rate": 3.100688555336179e-06, "loss": 0.25972437858581543, "step": 6114 }, { "epoch": 1.510995799357549, "grad_norm": 1.561195742079156, "learning_rate": 3.0977315030258002e-06, "loss": 0.33979225158691406, "step": 6115 }, { "epoch": 1.5112428959723252, "grad_norm": 1.3764683586571953, "learning_rate": 3.09477560297909e-06, "loss": 0.26666781306266785, "step": 6116 }, { "epoch": 1.5114899925871015, "grad_norm": 1.5507741203645966, "learning_rate": 3.091820855689498e-06, "loss": 0.2755489945411682, "step": 6117 }, { "epoch": 1.511737089201878, "grad_norm": 1.8886826533428194, "learning_rate": 3.0888672616502946e-06, "loss": 0.26152628660202026, "step": 6118 }, { "epoch": 1.5119841858166543, "grad_norm": 1.4114814283040134, "learning_rate": 3.0859148213545475e-06, "loss": 0.308028906583786, "step": 6119 }, { "epoch": 1.5122312824314306, "grad_norm": 1.383046704549125, "learning_rate": 3.0829635352951392e-06, "loss": 0.3075484037399292, "step": 6120 }, { "epoch": 1.512478379046207, "grad_norm": 1.2150620229246485, "learning_rate": 3.080013403964761e-06, "loss": 0.2412686049938202, "step": 6121 }, { "epoch": 1.5127254756609836, "grad_norm": 1.619562427620177, "learning_rate": 3.0770644278558992e-06, "loss": 0.30777230858802795, "step": 6122 }, { "epoch": 1.5129725722757599, "grad_norm": 1.52954491407926, "learning_rate": 3.0741166074608597e-06, "loss": 0.3077544867992401, "step": 6123 }, { "epoch": 1.5132196688905362, "grad_norm": 1.3955813925541551, "learning_rate": 3.071169943271752e-06, "loss": 0.22742073237895966, "step": 6124 }, { "epoch": 1.5134667655053127, "grad_norm": 1.4540803917444385, "learning_rate": 3.0682244357804933e-06, "loss": 0.27498525381088257, "step": 6125 }, { "epoch": 1.513713862120089, "grad_norm": 1.5935261055909897, "learning_rate": 3.0652800854788046e-06, "loss": 0.3227786719799042, "step": 6126 }, { "epoch": 1.5139609587348652, "grad_norm": 1.4744406987754215, "learning_rate": 3.06233689285821e-06, "loss": 0.2656373977661133, "step": 6127 }, { "epoch": 1.5142080553496418, "grad_norm": 1.3180197126498963, "learning_rate": 3.059394858410051e-06, "loss": 0.21412163972854614, "step": 6128 }, { "epoch": 1.514455151964418, "grad_norm": 1.3759608065225921, "learning_rate": 3.056453982625469e-06, "loss": 0.298877477645874, "step": 6129 }, { "epoch": 1.5147022485791943, "grad_norm": 1.5232217356076267, "learning_rate": 3.0535142659954175e-06, "loss": 0.33103418350219727, "step": 6130 }, { "epoch": 1.5149493451939708, "grad_norm": 1.5853842859591214, "learning_rate": 3.050575709010646e-06, "loss": 0.34476539492607117, "step": 6131 }, { "epoch": 1.5151964418087474, "grad_norm": 1.379901206638567, "learning_rate": 3.0476383121617214e-06, "loss": 0.25131458044052124, "step": 6132 }, { "epoch": 1.5154435384235236, "grad_norm": 1.4045292011989716, "learning_rate": 3.0447020759390087e-06, "loss": 0.2986428439617157, "step": 6133 }, { "epoch": 1.5156906350383, "grad_norm": 1.532290449498702, "learning_rate": 3.0417670008326838e-06, "loss": 0.29001685976982117, "step": 6134 }, { "epoch": 1.5159377316530764, "grad_norm": 1.576003082928229, "learning_rate": 3.038833087332731e-06, "loss": 0.3290081024169922, "step": 6135 }, { "epoch": 1.5161848282678527, "grad_norm": 1.4388634036930081, "learning_rate": 3.0359003359289318e-06, "loss": 0.2938876748085022, "step": 6136 }, { "epoch": 1.516431924882629, "grad_norm": 1.4155473841148483, "learning_rate": 3.0329687471108815e-06, "loss": 0.26949191093444824, "step": 6137 }, { "epoch": 1.5166790214974055, "grad_norm": 1.3442728331536506, "learning_rate": 3.0300383213679805e-06, "loss": 0.2653823494911194, "step": 6138 }, { "epoch": 1.5169261181121818, "grad_norm": 1.33400262811178, "learning_rate": 3.0271090591894357e-06, "loss": 0.2848450243473053, "step": 6139 }, { "epoch": 1.517173214726958, "grad_norm": 1.3056284583247364, "learning_rate": 3.024180961064256e-06, "loss": 0.26252472400665283, "step": 6140 }, { "epoch": 1.5174203113417346, "grad_norm": 1.5486133958591335, "learning_rate": 3.021254027481252e-06, "loss": 0.25964096188545227, "step": 6141 }, { "epoch": 1.5176674079565111, "grad_norm": 1.3702513346196632, "learning_rate": 3.018328258929051e-06, "loss": 0.22437793016433716, "step": 6142 }, { "epoch": 1.5179145045712874, "grad_norm": 1.7017909157370739, "learning_rate": 3.0154036558960785e-06, "loss": 0.3511222004890442, "step": 6143 }, { "epoch": 1.5181616011860637, "grad_norm": 1.5667343453160407, "learning_rate": 3.0124802188705716e-06, "loss": 0.2777724266052246, "step": 6144 }, { "epoch": 1.5184086978008402, "grad_norm": 1.2555090758678964, "learning_rate": 3.009557948340562e-06, "loss": 0.22850504517555237, "step": 6145 }, { "epoch": 1.5186557944156165, "grad_norm": 1.5255471843322466, "learning_rate": 3.0066368447939e-06, "loss": 0.2557099461555481, "step": 6146 }, { "epoch": 1.5189028910303928, "grad_norm": 1.6586607470407773, "learning_rate": 3.0037169087182273e-06, "loss": 0.3170607089996338, "step": 6147 }, { "epoch": 1.5191499876451693, "grad_norm": 1.362006760592046, "learning_rate": 3.000798140600999e-06, "loss": 0.24059568345546722, "step": 6148 }, { "epoch": 1.5193970842599458, "grad_norm": 1.3509484750438419, "learning_rate": 2.9978805409294796e-06, "loss": 0.27487683296203613, "step": 6149 }, { "epoch": 1.5196441808747219, "grad_norm": 1.2757351283206844, "learning_rate": 2.9949641101907257e-06, "loss": 0.23748129606246948, "step": 6150 }, { "epoch": 1.5198912774894984, "grad_norm": 1.3462933742605223, "learning_rate": 2.9920488488716092e-06, "loss": 0.2949928939342499, "step": 6151 }, { "epoch": 1.5201383741042749, "grad_norm": 1.5965560754236248, "learning_rate": 2.9891347574588014e-06, "loss": 0.3098791241645813, "step": 6152 }, { "epoch": 1.5203854707190512, "grad_norm": 1.452235518028201, "learning_rate": 2.9862218364387863e-06, "loss": 0.2841379940509796, "step": 6153 }, { "epoch": 1.5206325673338275, "grad_norm": 1.3470897572259952, "learning_rate": 2.9833100862978416e-06, "loss": 0.2707095444202423, "step": 6154 }, { "epoch": 1.520879663948604, "grad_norm": 1.3722059765642327, "learning_rate": 2.9803995075220517e-06, "loss": 0.3303743600845337, "step": 6155 }, { "epoch": 1.5211267605633803, "grad_norm": 1.5209612546217384, "learning_rate": 2.977490100597311e-06, "loss": 0.2760206460952759, "step": 6156 }, { "epoch": 1.5213738571781565, "grad_norm": 1.2881730139524403, "learning_rate": 2.974581866009316e-06, "loss": 0.22873643040657043, "step": 6157 }, { "epoch": 1.521620953792933, "grad_norm": 1.3433446192064358, "learning_rate": 2.9716748042435693e-06, "loss": 0.27002227306365967, "step": 6158 }, { "epoch": 1.5218680504077096, "grad_norm": 1.3483796175897031, "learning_rate": 2.9687689157853726e-06, "loss": 0.25131654739379883, "step": 6159 }, { "epoch": 1.5221151470224856, "grad_norm": 1.3613986300110297, "learning_rate": 2.9658642011198326e-06, "loss": 0.26978516578674316, "step": 6160 }, { "epoch": 1.5223622436372621, "grad_norm": 1.5070857910231503, "learning_rate": 2.962960660731864e-06, "loss": 0.2925878167152405, "step": 6161 }, { "epoch": 1.5226093402520386, "grad_norm": 1.653707495330176, "learning_rate": 2.9600582951061818e-06, "loss": 0.3232056796550751, "step": 6162 }, { "epoch": 1.522856436866815, "grad_norm": 2.8447595684766673, "learning_rate": 2.957157104727313e-06, "loss": 0.28797030448913574, "step": 6163 }, { "epoch": 1.5231035334815912, "grad_norm": 1.4501507547943764, "learning_rate": 2.9542570900795718e-06, "loss": 0.24508538842201233, "step": 6164 }, { "epoch": 1.5233506300963677, "grad_norm": 1.3946487274066497, "learning_rate": 2.951358251647093e-06, "loss": 0.24834275245666504, "step": 6165 }, { "epoch": 1.523597726711144, "grad_norm": 1.2579564546793174, "learning_rate": 2.9484605899138087e-06, "loss": 0.22293275594711304, "step": 6166 }, { "epoch": 1.5238448233259203, "grad_norm": 1.5294967929960748, "learning_rate": 2.94556410536345e-06, "loss": 0.33615124225616455, "step": 6167 }, { "epoch": 1.5240919199406968, "grad_norm": 1.404369420825594, "learning_rate": 2.94266879847956e-06, "loss": 0.3050142228603363, "step": 6168 }, { "epoch": 1.5243390165554733, "grad_norm": 1.313954541386078, "learning_rate": 2.9397746697454755e-06, "loss": 0.26987218856811523, "step": 6169 }, { "epoch": 1.5245861131702496, "grad_norm": 1.3998782709190267, "learning_rate": 2.9368817196443444e-06, "loss": 0.23620600998401642, "step": 6170 }, { "epoch": 1.524833209785026, "grad_norm": 1.4535811362500324, "learning_rate": 2.933989948659115e-06, "loss": 0.24018964171409607, "step": 6171 }, { "epoch": 1.5250803063998024, "grad_norm": 1.4111515971367556, "learning_rate": 2.931099357272544e-06, "loss": 0.2739161550998688, "step": 6172 }, { "epoch": 1.5253274030145787, "grad_norm": 1.4024952760141771, "learning_rate": 2.928209945967182e-06, "loss": 0.25811439752578735, "step": 6173 }, { "epoch": 1.525574499629355, "grad_norm": 2.026039935470012, "learning_rate": 2.9253217152253843e-06, "loss": 0.2779083251953125, "step": 6174 }, { "epoch": 1.5258215962441315, "grad_norm": 1.6100215290388107, "learning_rate": 2.9224346655293147e-06, "loss": 0.33154186606407166, "step": 6175 }, { "epoch": 1.5260686928589078, "grad_norm": 1.4152854341861938, "learning_rate": 2.9195487973609348e-06, "loss": 0.2586267292499542, "step": 6176 }, { "epoch": 1.526315789473684, "grad_norm": 1.3358251708390116, "learning_rate": 2.9166641112020177e-06, "loss": 0.27795177698135376, "step": 6177 }, { "epoch": 1.5265628860884606, "grad_norm": 1.4537414974254366, "learning_rate": 2.9137806075341233e-06, "loss": 0.2860119342803955, "step": 6178 }, { "epoch": 1.526809982703237, "grad_norm": 1.4100763258853468, "learning_rate": 2.910898286838628e-06, "loss": 0.2851087749004364, "step": 6179 }, { "epoch": 1.5270570793180134, "grad_norm": 1.312070489697358, "learning_rate": 2.908017149596709e-06, "loss": 0.2933919429779053, "step": 6180 }, { "epoch": 1.5273041759327897, "grad_norm": 1.5002970685732142, "learning_rate": 2.905137196289336e-06, "loss": 0.2700810730457306, "step": 6181 }, { "epoch": 1.5275512725475662, "grad_norm": 1.3576931726727688, "learning_rate": 2.9022584273972955e-06, "loss": 0.2509341537952423, "step": 6182 }, { "epoch": 1.5277983691623425, "grad_norm": 1.4680072580937062, "learning_rate": 2.8993808434011593e-06, "loss": 0.267325758934021, "step": 6183 }, { "epoch": 1.5280454657771187, "grad_norm": 1.4911973819621125, "learning_rate": 2.8965044447813186e-06, "loss": 0.2821406126022339, "step": 6184 }, { "epoch": 1.5282925623918953, "grad_norm": 2.562599545308258, "learning_rate": 2.893629232017955e-06, "loss": 0.2703527808189392, "step": 6185 }, { "epoch": 1.5285396590066718, "grad_norm": 1.3933876807064638, "learning_rate": 2.8907552055910617e-06, "loss": 0.23189550638198853, "step": 6186 }, { "epoch": 1.5287867556214478, "grad_norm": 1.3401049348975904, "learning_rate": 2.8878823659804235e-06, "loss": 0.2113986313343048, "step": 6187 }, { "epoch": 1.5290338522362243, "grad_norm": 1.4826639976720113, "learning_rate": 2.885010713665629e-06, "loss": 0.2394515722990036, "step": 6188 }, { "epoch": 1.5292809488510009, "grad_norm": 1.2721032397077054, "learning_rate": 2.8821402491260755e-06, "loss": 0.23915019631385803, "step": 6189 }, { "epoch": 1.5295280454657771, "grad_norm": 1.6508213117379797, "learning_rate": 2.879270972840956e-06, "loss": 0.3061826229095459, "step": 6190 }, { "epoch": 1.5297751420805534, "grad_norm": 1.4194724519130464, "learning_rate": 2.8764028852892735e-06, "loss": 0.28964197635650635, "step": 6191 }, { "epoch": 1.53002223869533, "grad_norm": 1.3467141613780258, "learning_rate": 2.873535986949817e-06, "loss": 0.27790188789367676, "step": 6192 }, { "epoch": 1.5302693353101062, "grad_norm": 1.3722026672861225, "learning_rate": 2.8706702783011895e-06, "loss": 0.26375478506088257, "step": 6193 }, { "epoch": 1.5305164319248825, "grad_norm": 1.528044081682513, "learning_rate": 2.8678057598217966e-06, "loss": 0.25176340341567993, "step": 6194 }, { "epoch": 1.530763528539659, "grad_norm": 1.5289950028324908, "learning_rate": 2.8649424319898357e-06, "loss": 0.2829694151878357, "step": 6195 }, { "epoch": 1.5310106251544355, "grad_norm": 1.3424566666547502, "learning_rate": 2.8620802952833082e-06, "loss": 0.21928060054779053, "step": 6196 }, { "epoch": 1.5312577217692116, "grad_norm": 1.4628365760362336, "learning_rate": 2.8592193501800214e-06, "loss": 0.27144360542297363, "step": 6197 }, { "epoch": 1.531504818383988, "grad_norm": 1.4927556325636635, "learning_rate": 2.8563595971575818e-06, "loss": 0.2709523141384125, "step": 6198 }, { "epoch": 1.5317519149987646, "grad_norm": 1.6631883490038366, "learning_rate": 2.853501036693396e-06, "loss": 0.3001652956008911, "step": 6199 }, { "epoch": 1.531999011613541, "grad_norm": 1.6542900800404194, "learning_rate": 2.8506436692646734e-06, "loss": 0.3281595706939697, "step": 6200 }, { "epoch": 1.5322461082283172, "grad_norm": 1.457249628139068, "learning_rate": 2.8477874953484208e-06, "loss": 0.30502021312713623, "step": 6201 }, { "epoch": 1.5324932048430937, "grad_norm": 1.5308057911131023, "learning_rate": 2.8449325154214446e-06, "loss": 0.23615595698356628, "step": 6202 }, { "epoch": 1.53274030145787, "grad_norm": 1.364036297991014, "learning_rate": 2.842078729960356e-06, "loss": 0.21679362654685974, "step": 6203 }, { "epoch": 1.5329873980726463, "grad_norm": 1.3483830517144852, "learning_rate": 2.839226139441571e-06, "loss": 0.23113468289375305, "step": 6204 }, { "epoch": 1.5332344946874228, "grad_norm": 1.5632125104840935, "learning_rate": 2.8363747443412924e-06, "loss": 0.31791990995407104, "step": 6205 }, { "epoch": 1.5334815913021993, "grad_norm": 1.51764882299438, "learning_rate": 2.8335245451355355e-06, "loss": 0.31865009665489197, "step": 6206 }, { "epoch": 1.5337286879169756, "grad_norm": 1.4333110517875518, "learning_rate": 2.830675542300113e-06, "loss": 0.2776098847389221, "step": 6207 }, { "epoch": 1.5339757845317519, "grad_norm": 1.4555105886771291, "learning_rate": 2.827827736310639e-06, "loss": 0.2626730799674988, "step": 6208 }, { "epoch": 1.5342228811465284, "grad_norm": 1.4430835985815749, "learning_rate": 2.8249811276425222e-06, "loss": 0.30390769243240356, "step": 6209 }, { "epoch": 1.5344699777613047, "grad_norm": 1.2583621472968323, "learning_rate": 2.822135716770973e-06, "loss": 0.23292410373687744, "step": 6210 }, { "epoch": 1.534717074376081, "grad_norm": 1.3687301545330655, "learning_rate": 2.819291504171008e-06, "loss": 0.26787519454956055, "step": 6211 }, { "epoch": 1.5349641709908575, "grad_norm": 1.4617552987223366, "learning_rate": 2.8164484903174372e-06, "loss": 0.2970978617668152, "step": 6212 }, { "epoch": 1.5352112676056338, "grad_norm": 1.5332336562501978, "learning_rate": 2.8136066756848768e-06, "loss": 0.28444361686706543, "step": 6213 }, { "epoch": 1.53545836422041, "grad_norm": 1.263652779658431, "learning_rate": 2.810766060747734e-06, "loss": 0.2173776626586914, "step": 6214 }, { "epoch": 1.5357054608351866, "grad_norm": 1.3074588318222495, "learning_rate": 2.8079266459802245e-06, "loss": 0.2668275833129883, "step": 6215 }, { "epoch": 1.535952557449963, "grad_norm": 1.2951651782512008, "learning_rate": 2.8050884318563564e-06, "loss": 0.25275272130966187, "step": 6216 }, { "epoch": 1.5361996540647394, "grad_norm": 1.3432337523315487, "learning_rate": 2.8022514188499417e-06, "loss": 0.2741992473602295, "step": 6217 }, { "epoch": 1.5364467506795156, "grad_norm": 1.262563608256979, "learning_rate": 2.7994156074345946e-06, "loss": 0.2525462508201599, "step": 6218 }, { "epoch": 1.5366938472942921, "grad_norm": 1.5542651642794385, "learning_rate": 2.796580998083719e-06, "loss": 0.30782681703567505, "step": 6219 }, { "epoch": 1.5369409439090684, "grad_norm": 1.3772162945403374, "learning_rate": 2.793747591270527e-06, "loss": 0.2735882103443146, "step": 6220 }, { "epoch": 1.5371880405238447, "grad_norm": 1.3701532764187754, "learning_rate": 2.7909153874680283e-06, "loss": 0.2381698042154312, "step": 6221 }, { "epoch": 1.5374351371386212, "grad_norm": 1.5467339116597174, "learning_rate": 2.7880843871490315e-06, "loss": 0.27886897325515747, "step": 6222 }, { "epoch": 1.5376822337533977, "grad_norm": 1.434638044557498, "learning_rate": 2.7852545907861415e-06, "loss": 0.2588282823562622, "step": 6223 }, { "epoch": 1.5379293303681738, "grad_norm": 1.4685343685165702, "learning_rate": 2.7824259988517623e-06, "loss": 0.28003647923469543, "step": 6224 }, { "epoch": 1.5381764269829503, "grad_norm": 1.3925134259189325, "learning_rate": 2.7795986118180985e-06, "loss": 0.2724742293357849, "step": 6225 }, { "epoch": 1.5384235235977268, "grad_norm": 1.2522208154593615, "learning_rate": 2.7767724301571564e-06, "loss": 0.24792909622192383, "step": 6226 }, { "epoch": 1.5386706202125031, "grad_norm": 1.4086506648524193, "learning_rate": 2.773947454340741e-06, "loss": 0.2776173949241638, "step": 6227 }, { "epoch": 1.5389177168272794, "grad_norm": 1.591677181595469, "learning_rate": 2.771123684840449e-06, "loss": 0.30490976572036743, "step": 6228 }, { "epoch": 1.539164813442056, "grad_norm": 1.3008082732709003, "learning_rate": 2.768301122127678e-06, "loss": 0.23122426867485046, "step": 6229 }, { "epoch": 1.5394119100568322, "grad_norm": 1.3870023121858797, "learning_rate": 2.765479766673629e-06, "loss": 0.25653618574142456, "step": 6230 }, { "epoch": 1.5396590066716085, "grad_norm": 1.432082114321476, "learning_rate": 2.7626596189492983e-06, "loss": 0.29369062185287476, "step": 6231 }, { "epoch": 1.539906103286385, "grad_norm": 1.5882940288364569, "learning_rate": 2.759840679425485e-06, "loss": 0.27527666091918945, "step": 6232 }, { "epoch": 1.5401531999011615, "grad_norm": 1.4378692625542464, "learning_rate": 2.757022948572775e-06, "loss": 0.25596022605895996, "step": 6233 }, { "epoch": 1.5404002965159376, "grad_norm": 1.3397518672636832, "learning_rate": 2.7542064268615643e-06, "loss": 0.24233949184417725, "step": 6234 }, { "epoch": 1.540647393130714, "grad_norm": 1.4171985106935925, "learning_rate": 2.7513911147620444e-06, "loss": 0.27690792083740234, "step": 6235 }, { "epoch": 1.5408944897454906, "grad_norm": 1.5052964670756832, "learning_rate": 2.748577012744198e-06, "loss": 0.26885586977005005, "step": 6236 }, { "epoch": 1.5411415863602669, "grad_norm": 1.382685311341152, "learning_rate": 2.7457641212778164e-06, "loss": 0.2362508475780487, "step": 6237 }, { "epoch": 1.5413886829750432, "grad_norm": 1.5092207218417362, "learning_rate": 2.742952440832477e-06, "loss": 0.26730483770370483, "step": 6238 }, { "epoch": 1.5416357795898197, "grad_norm": 1.477714253433679, "learning_rate": 2.7401419718775657e-06, "loss": 0.2667822241783142, "step": 6239 }, { "epoch": 1.541882876204596, "grad_norm": 1.3986928736203275, "learning_rate": 2.7373327148822593e-06, "loss": 0.2801828980445862, "step": 6240 }, { "epoch": 1.5421299728193723, "grad_norm": 1.410620348427508, "learning_rate": 2.7345246703155403e-06, "loss": 0.2526535987854004, "step": 6241 }, { "epoch": 1.5423770694341488, "grad_norm": 1.5445310663947587, "learning_rate": 2.7317178386461783e-06, "loss": 0.2975243330001831, "step": 6242 }, { "epoch": 1.5426241660489253, "grad_norm": 1.5860106654811092, "learning_rate": 2.7289122203427444e-06, "loss": 0.2969404458999634, "step": 6243 }, { "epoch": 1.5428712626637016, "grad_norm": 1.2685540616391142, "learning_rate": 2.7261078158736087e-06, "loss": 0.2261502742767334, "step": 6244 }, { "epoch": 1.5431183592784778, "grad_norm": 1.6904303856782235, "learning_rate": 2.7233046257069396e-06, "loss": 0.28962457180023193, "step": 6245 }, { "epoch": 1.5433654558932544, "grad_norm": 1.6244656401496576, "learning_rate": 2.7205026503107037e-06, "loss": 0.23159131407737732, "step": 6246 }, { "epoch": 1.5436125525080306, "grad_norm": 1.5028366327750817, "learning_rate": 2.7177018901526562e-06, "loss": 0.2913188338279724, "step": 6247 }, { "epoch": 1.543859649122807, "grad_norm": 1.3711277616906454, "learning_rate": 2.71490234570036e-06, "loss": 0.2419424057006836, "step": 6248 }, { "epoch": 1.5441067457375834, "grad_norm": 1.3115004800636805, "learning_rate": 2.712104017421171e-06, "loss": 0.21959349513053894, "step": 6249 }, { "epoch": 1.5443538423523597, "grad_norm": 1.2834330851461018, "learning_rate": 2.7093069057822387e-06, "loss": 0.23496052622795105, "step": 6250 }, { "epoch": 1.544600938967136, "grad_norm": 1.4280538550358535, "learning_rate": 2.7065110112505156e-06, "loss": 0.2936282157897949, "step": 6251 }, { "epoch": 1.5448480355819125, "grad_norm": 1.3825728483913782, "learning_rate": 2.703716334292744e-06, "loss": 0.22375720739364624, "step": 6252 }, { "epoch": 1.545095132196689, "grad_norm": 1.562431025133957, "learning_rate": 2.70092287537547e-06, "loss": 0.2784293293952942, "step": 6253 }, { "epoch": 1.5453422288114653, "grad_norm": 1.4627795095880736, "learning_rate": 2.6981306349650314e-06, "loss": 0.2635642886161804, "step": 6254 }, { "epoch": 1.5455893254262416, "grad_norm": 1.406084040853421, "learning_rate": 2.695339613527569e-06, "loss": 0.21696841716766357, "step": 6255 }, { "epoch": 1.5458364220410181, "grad_norm": 1.36976300121376, "learning_rate": 2.6925498115290115e-06, "loss": 0.2775059938430786, "step": 6256 }, { "epoch": 1.5460835186557944, "grad_norm": 1.6077250239322118, "learning_rate": 2.689761229435085e-06, "loss": 0.30393218994140625, "step": 6257 }, { "epoch": 1.5463306152705707, "grad_norm": 1.5046141222297664, "learning_rate": 2.686973867711319e-06, "loss": 0.27139484882354736, "step": 6258 }, { "epoch": 1.5465777118853472, "grad_norm": 1.5524533736908883, "learning_rate": 2.684187726823034e-06, "loss": 0.2787517011165619, "step": 6259 }, { "epoch": 1.5468248085001235, "grad_norm": 1.31515067410041, "learning_rate": 2.6814028072353527e-06, "loss": 0.2597910165786743, "step": 6260 }, { "epoch": 1.5470719051148998, "grad_norm": 1.5675370952308707, "learning_rate": 2.678619109413181e-06, "loss": 0.2906094193458557, "step": 6261 }, { "epoch": 1.5473190017296763, "grad_norm": 1.4486323225769024, "learning_rate": 2.675836633821233e-06, "loss": 0.30648577213287354, "step": 6262 }, { "epoch": 1.5475660983444528, "grad_norm": 1.5739032557999673, "learning_rate": 2.673055380924018e-06, "loss": 0.3170620799064636, "step": 6263 }, { "epoch": 1.547813194959229, "grad_norm": 1.555446694683114, "learning_rate": 2.6702753511858324e-06, "loss": 0.2768535017967224, "step": 6264 }, { "epoch": 1.5480602915740054, "grad_norm": 1.7145087458739876, "learning_rate": 2.667496545070779e-06, "loss": 0.259835809469223, "step": 6265 }, { "epoch": 1.5483073881887819, "grad_norm": 1.5196442288548286, "learning_rate": 2.664718963042745e-06, "loss": 0.2752074897289276, "step": 6266 }, { "epoch": 1.5485544848035582, "grad_norm": 1.5201146404614192, "learning_rate": 2.661942605565423e-06, "loss": 0.2947227954864502, "step": 6267 }, { "epoch": 1.5488015814183345, "grad_norm": 1.3716361446190062, "learning_rate": 2.6591674731022987e-06, "loss": 0.26643529534339905, "step": 6268 }, { "epoch": 1.549048678033111, "grad_norm": 1.3453432954406688, "learning_rate": 2.656393566116653e-06, "loss": 0.3025282621383667, "step": 6269 }, { "epoch": 1.5492957746478875, "grad_norm": 1.5419025231588825, "learning_rate": 2.6536208850715605e-06, "loss": 0.25248461961746216, "step": 6270 }, { "epoch": 1.5495428712626635, "grad_norm": 1.2842015270782836, "learning_rate": 2.650849430429889e-06, "loss": 0.262253999710083, "step": 6271 }, { "epoch": 1.54978996787744, "grad_norm": 1.4332531722419313, "learning_rate": 2.6480792026543065e-06, "loss": 0.228349506855011, "step": 6272 }, { "epoch": 1.5500370644922166, "grad_norm": 1.5520056431843623, "learning_rate": 2.6453102022072764e-06, "loss": 0.30008596181869507, "step": 6273 }, { "epoch": 1.5502841611069929, "grad_norm": 1.5805718122159937, "learning_rate": 2.642542429551056e-06, "loss": 0.33009862899780273, "step": 6274 }, { "epoch": 1.5505312577217691, "grad_norm": 1.5080505108691369, "learning_rate": 2.639775885147692e-06, "loss": 0.3164129853248596, "step": 6275 }, { "epoch": 1.5507783543365457, "grad_norm": 1.4881583434464258, "learning_rate": 2.637010569459033e-06, "loss": 0.28144699335098267, "step": 6276 }, { "epoch": 1.551025450951322, "grad_norm": 1.351083138112272, "learning_rate": 2.6342464829467253e-06, "loss": 0.255598783493042, "step": 6277 }, { "epoch": 1.5512725475660982, "grad_norm": 1.589356844809622, "learning_rate": 2.6314836260721975e-06, "loss": 0.3124978542327881, "step": 6278 }, { "epoch": 1.5515196441808747, "grad_norm": 1.289856638069178, "learning_rate": 2.628721999296687e-06, "loss": 0.26384320855140686, "step": 6279 }, { "epoch": 1.5517667407956512, "grad_norm": 1.4628109805182803, "learning_rate": 2.6259616030812128e-06, "loss": 0.28951743245124817, "step": 6280 }, { "epoch": 1.5520138374104275, "grad_norm": 1.4619960783747525, "learning_rate": 2.6232024378865995e-06, "loss": 0.2608969807624817, "step": 6281 }, { "epoch": 1.5522609340252038, "grad_norm": 1.5469971808103786, "learning_rate": 2.6204445041734592e-06, "loss": 0.30494630336761475, "step": 6282 }, { "epoch": 1.5525080306399803, "grad_norm": 1.414933086845767, "learning_rate": 2.6176878024022068e-06, "loss": 0.2678859531879425, "step": 6283 }, { "epoch": 1.5527551272547566, "grad_norm": 1.539871465568839, "learning_rate": 2.614932333033041e-06, "loss": 0.3066767752170563, "step": 6284 }, { "epoch": 1.553002223869533, "grad_norm": 1.4639381029528518, "learning_rate": 2.612178096525957e-06, "loss": 0.3484329581260681, "step": 6285 }, { "epoch": 1.5532493204843094, "grad_norm": 1.2457050851687954, "learning_rate": 2.6094250933407485e-06, "loss": 0.20926474034786224, "step": 6286 }, { "epoch": 1.5534964170990857, "grad_norm": 1.288754819756762, "learning_rate": 2.6066733239370055e-06, "loss": 0.28136980533599854, "step": 6287 }, { "epoch": 1.553743513713862, "grad_norm": 1.236682017548464, "learning_rate": 2.6039227887741024e-06, "loss": 0.26323747634887695, "step": 6288 }, { "epoch": 1.5539906103286385, "grad_norm": 1.4107312296225374, "learning_rate": 2.601173488311216e-06, "loss": 0.27883267402648926, "step": 6289 }, { "epoch": 1.554237706943415, "grad_norm": 1.402814558771105, "learning_rate": 2.5984254230073124e-06, "loss": 0.27320152521133423, "step": 6290 }, { "epoch": 1.5544848035581913, "grad_norm": 1.3573432984277605, "learning_rate": 2.595678593321158e-06, "loss": 0.23408040404319763, "step": 6291 }, { "epoch": 1.5547319001729676, "grad_norm": 1.3146365013594807, "learning_rate": 2.592932999711305e-06, "loss": 0.24299223721027374, "step": 6292 }, { "epoch": 1.554978996787744, "grad_norm": 1.261419844936671, "learning_rate": 2.5901886426360987e-06, "loss": 0.20783114433288574, "step": 6293 }, { "epoch": 1.5552260934025204, "grad_norm": 1.517992232971901, "learning_rate": 2.5874455225536853e-06, "loss": 0.2681623697280884, "step": 6294 }, { "epoch": 1.5554731900172967, "grad_norm": 1.5877693927969097, "learning_rate": 2.5847036399220006e-06, "loss": 0.3040698170661926, "step": 6295 }, { "epoch": 1.5557202866320732, "grad_norm": 1.4735872115352862, "learning_rate": 2.581962995198776e-06, "loss": 0.2605357766151428, "step": 6296 }, { "epoch": 1.5559673832468495, "grad_norm": 1.4267586454477608, "learning_rate": 2.579223588841533e-06, "loss": 0.24233195185661316, "step": 6297 }, { "epoch": 1.5562144798616258, "grad_norm": 1.5325517331315366, "learning_rate": 2.576485421307583e-06, "loss": 0.33598029613494873, "step": 6298 }, { "epoch": 1.5564615764764023, "grad_norm": 1.3743334717116416, "learning_rate": 2.5737484930540403e-06, "loss": 0.28075143694877625, "step": 6299 }, { "epoch": 1.5567086730911788, "grad_norm": 1.3333784438797056, "learning_rate": 2.5710128045378057e-06, "loss": 0.2378661036491394, "step": 6300 }, { "epoch": 1.556955769705955, "grad_norm": 1.226354984948749, "learning_rate": 2.5682783562155787e-06, "loss": 0.20757737755775452, "step": 6301 }, { "epoch": 1.5572028663207313, "grad_norm": 1.4161371804598106, "learning_rate": 2.565545148543841e-06, "loss": 0.24962250888347626, "step": 6302 }, { "epoch": 1.5574499629355079, "grad_norm": 1.3904269462155852, "learning_rate": 2.562813181978876e-06, "loss": 0.26189109683036804, "step": 6303 }, { "epoch": 1.5576970595502841, "grad_norm": 1.5398207297485254, "learning_rate": 2.5600824569767634e-06, "loss": 0.31048083305358887, "step": 6304 }, { "epoch": 1.5579441561650604, "grad_norm": 1.2576766166662743, "learning_rate": 2.557352973993361e-06, "loss": 0.24017907679080963, "step": 6305 }, { "epoch": 1.558191252779837, "grad_norm": 1.5735746150939847, "learning_rate": 2.5546247334843367e-06, "loss": 0.2624661922454834, "step": 6306 }, { "epoch": 1.5584383493946135, "grad_norm": 1.482454010025739, "learning_rate": 2.5518977359051356e-06, "loss": 0.2606604993343353, "step": 6307 }, { "epoch": 1.5586854460093895, "grad_norm": 1.3738682904530082, "learning_rate": 2.549171981711005e-06, "loss": 0.3055276870727539, "step": 6308 }, { "epoch": 1.558932542624166, "grad_norm": 1.3634454039507253, "learning_rate": 2.546447471356982e-06, "loss": 0.27419960498809814, "step": 6309 }, { "epoch": 1.5591796392389425, "grad_norm": 1.4136896250551114, "learning_rate": 2.5437242052978984e-06, "loss": 0.2818364202976227, "step": 6310 }, { "epoch": 1.5594267358537188, "grad_norm": 1.4945689558306594, "learning_rate": 2.5410021839883747e-06, "loss": 0.31712132692337036, "step": 6311 }, { "epoch": 1.5596738324684951, "grad_norm": 1.1997896724456587, "learning_rate": 2.53828140788282e-06, "loss": 0.22155746817588806, "step": 6312 }, { "epoch": 1.5599209290832716, "grad_norm": 1.2592076962475394, "learning_rate": 2.535561877435444e-06, "loss": 0.2444954365491867, "step": 6313 }, { "epoch": 1.560168025698048, "grad_norm": 1.4536891804625882, "learning_rate": 2.5328435931002436e-06, "loss": 0.293079674243927, "step": 6314 }, { "epoch": 1.5604151223128242, "grad_norm": 1.5310658385127676, "learning_rate": 2.530126555331014e-06, "loss": 0.3122302293777466, "step": 6315 }, { "epoch": 1.5606622189276007, "grad_norm": 1.6351690553917835, "learning_rate": 2.5274107645813295e-06, "loss": 0.3105490803718567, "step": 6316 }, { "epoch": 1.5609093155423772, "grad_norm": 1.3056643933242125, "learning_rate": 2.5246962213045667e-06, "loss": 0.28407812118530273, "step": 6317 }, { "epoch": 1.5611564121571533, "grad_norm": 1.3575385587940958, "learning_rate": 2.521982925953894e-06, "loss": 0.27475500106811523, "step": 6318 }, { "epoch": 1.5614035087719298, "grad_norm": 1.3777975604688162, "learning_rate": 2.519270878982264e-06, "loss": 0.255479633808136, "step": 6319 }, { "epoch": 1.5616506053867063, "grad_norm": 1.2692326973038865, "learning_rate": 2.5165600808424295e-06, "loss": 0.26638081669807434, "step": 6320 }, { "epoch": 1.5618977020014826, "grad_norm": 1.3740030387409083, "learning_rate": 2.5138505319869255e-06, "loss": 0.2184123992919922, "step": 6321 }, { "epoch": 1.5621447986162589, "grad_norm": 1.401955941034587, "learning_rate": 2.5111422328680867e-06, "loss": 0.25613293051719666, "step": 6322 }, { "epoch": 1.5623918952310354, "grad_norm": 1.465214533944185, "learning_rate": 2.508435183938037e-06, "loss": 0.28867602348327637, "step": 6323 }, { "epoch": 1.5626389918458117, "grad_norm": 1.4172531232311045, "learning_rate": 2.505729385648692e-06, "loss": 0.30672988295555115, "step": 6324 }, { "epoch": 1.562886088460588, "grad_norm": 1.4422289575879412, "learning_rate": 2.5030248384517554e-06, "loss": 0.2876136600971222, "step": 6325 }, { "epoch": 1.5631331850753645, "grad_norm": 1.2991303853781955, "learning_rate": 2.5003215427987216e-06, "loss": 0.24937397241592407, "step": 6326 }, { "epoch": 1.563380281690141, "grad_norm": 1.5676500698571092, "learning_rate": 2.4976194991408787e-06, "loss": 0.3620862364768982, "step": 6327 }, { "epoch": 1.5636273783049173, "grad_norm": 1.4692265281418198, "learning_rate": 2.4949187079293093e-06, "loss": 0.2765630781650543, "step": 6328 }, { "epoch": 1.5638744749196936, "grad_norm": 1.4504841290710278, "learning_rate": 2.4922191696148824e-06, "loss": 0.25005802512168884, "step": 6329 }, { "epoch": 1.56412157153447, "grad_norm": 1.570091252766157, "learning_rate": 2.4895208846482542e-06, "loss": 0.27981048822402954, "step": 6330 }, { "epoch": 1.5643686681492464, "grad_norm": 1.388824028612584, "learning_rate": 2.4868238534798794e-06, "loss": 0.2944352626800537, "step": 6331 }, { "epoch": 1.5646157647640226, "grad_norm": 1.3842367001377196, "learning_rate": 2.484128076560003e-06, "loss": 0.26110532879829407, "step": 6332 }, { "epoch": 1.5648628613787992, "grad_norm": 1.7301276000069665, "learning_rate": 2.48143355433865e-06, "loss": 0.3003332316875458, "step": 6333 }, { "epoch": 1.5651099579935754, "grad_norm": 1.4916761674844659, "learning_rate": 2.478740287265651e-06, "loss": 0.2790129482746124, "step": 6334 }, { "epoch": 1.5653570546083517, "grad_norm": 1.3037122714144231, "learning_rate": 2.4760482757906133e-06, "loss": 0.29684412479400635, "step": 6335 }, { "epoch": 1.5656041512231282, "grad_norm": 1.5760161882154886, "learning_rate": 2.4733575203629434e-06, "loss": 0.2929166555404663, "step": 6336 }, { "epoch": 1.5658512478379047, "grad_norm": 1.3242819613556196, "learning_rate": 2.4706680214318367e-06, "loss": 0.3072383999824524, "step": 6337 }, { "epoch": 1.566098344452681, "grad_norm": 1.5615529779334736, "learning_rate": 2.46797977944628e-06, "loss": 0.3108351528644562, "step": 6338 }, { "epoch": 1.5663454410674573, "grad_norm": 1.3739049046016234, "learning_rate": 2.4652927948550444e-06, "loss": 0.24834004044532776, "step": 6339 }, { "epoch": 1.5665925376822338, "grad_norm": 1.2175730340113462, "learning_rate": 2.462607068106693e-06, "loss": 0.26040148735046387, "step": 6340 }, { "epoch": 1.5668396342970101, "grad_norm": 1.5404361723586426, "learning_rate": 2.4599225996495822e-06, "loss": 0.301957905292511, "step": 6341 }, { "epoch": 1.5670867309117864, "grad_norm": 1.43486531570179, "learning_rate": 2.4572393899318582e-06, "loss": 0.2638666033744812, "step": 6342 }, { "epoch": 1.567333827526563, "grad_norm": 1.7361371785808248, "learning_rate": 2.454557439401458e-06, "loss": 0.23872563242912292, "step": 6343 }, { "epoch": 1.5675809241413394, "grad_norm": 1.299233345266724, "learning_rate": 2.4518767485061e-06, "loss": 0.2263941764831543, "step": 6344 }, { "epoch": 1.5678280207561155, "grad_norm": 1.5468241896582795, "learning_rate": 2.4491973176933004e-06, "loss": 0.2616816461086273, "step": 6345 }, { "epoch": 1.568075117370892, "grad_norm": 1.3906825998094117, "learning_rate": 2.446519147410368e-06, "loss": 0.25752711296081543, "step": 6346 }, { "epoch": 1.5683222139856685, "grad_norm": 1.4497821863952418, "learning_rate": 2.443842238104389e-06, "loss": 0.2646070718765259, "step": 6347 }, { "epoch": 1.5685693106004448, "grad_norm": 1.747523121146768, "learning_rate": 2.4411665902222514e-06, "loss": 0.2674972414970398, "step": 6348 }, { "epoch": 1.568816407215221, "grad_norm": 1.7283763957362694, "learning_rate": 2.438492204210623e-06, "loss": 0.36113306879997253, "step": 6349 }, { "epoch": 1.5690635038299976, "grad_norm": 1.4120533520411334, "learning_rate": 2.435819080515968e-06, "loss": 0.27863237261772156, "step": 6350 }, { "epoch": 1.5693106004447739, "grad_norm": 1.475525719630659, "learning_rate": 2.433147219584536e-06, "loss": 0.25594794750213623, "step": 6351 }, { "epoch": 1.5695576970595502, "grad_norm": 1.353722724522722, "learning_rate": 2.430476621862372e-06, "loss": 0.30562108755111694, "step": 6352 }, { "epoch": 1.5698047936743267, "grad_norm": 1.5517469662159467, "learning_rate": 2.4278072877953006e-06, "loss": 0.23867860436439514, "step": 6353 }, { "epoch": 1.5700518902891032, "grad_norm": 1.723045431195111, "learning_rate": 2.425139217828938e-06, "loss": 0.3339960277080536, "step": 6354 }, { "epoch": 1.5702989869038793, "grad_norm": 1.3833982745310522, "learning_rate": 2.4224724124086942e-06, "loss": 0.2718137502670288, "step": 6355 }, { "epoch": 1.5705460835186558, "grad_norm": 1.440301184361409, "learning_rate": 2.4198068719797653e-06, "loss": 0.25645333528518677, "step": 6356 }, { "epoch": 1.5707931801334323, "grad_norm": 1.6679505742124163, "learning_rate": 2.4171425969871386e-06, "loss": 0.32324379682540894, "step": 6357 }, { "epoch": 1.5710402767482086, "grad_norm": 1.2354292992089861, "learning_rate": 2.4144795878755823e-06, "loss": 0.2465921938419342, "step": 6358 }, { "epoch": 1.5712873733629849, "grad_norm": 1.4596444430342808, "learning_rate": 2.411817845089661e-06, "loss": 0.2541583180427551, "step": 6359 }, { "epoch": 1.5715344699777614, "grad_norm": 1.4009450173353053, "learning_rate": 2.4091573690737312e-06, "loss": 0.28118494153022766, "step": 6360 }, { "epoch": 1.5717815665925376, "grad_norm": 1.3763148240890641, "learning_rate": 2.406498160271923e-06, "loss": 0.2755041718482971, "step": 6361 }, { "epoch": 1.572028663207314, "grad_norm": 1.3559011648860106, "learning_rate": 2.403840219128172e-06, "loss": 0.32802802324295044, "step": 6362 }, { "epoch": 1.5722757598220904, "grad_norm": 1.3974795038818888, "learning_rate": 2.401183546086189e-06, "loss": 0.31055521965026855, "step": 6363 }, { "epoch": 1.572522856436867, "grad_norm": 1.4633901425346922, "learning_rate": 2.39852814158948e-06, "loss": 0.2724856734275818, "step": 6364 }, { "epoch": 1.5727699530516432, "grad_norm": 1.386277387483835, "learning_rate": 2.395874006081339e-06, "loss": 0.2651706337928772, "step": 6365 }, { "epoch": 1.5730170496664195, "grad_norm": 1.3128058642339968, "learning_rate": 2.39322114000485e-06, "loss": 0.26365089416503906, "step": 6366 }, { "epoch": 1.573264146281196, "grad_norm": 1.4194664183309371, "learning_rate": 2.3905695438028786e-06, "loss": 0.27443695068359375, "step": 6367 }, { "epoch": 1.5735112428959723, "grad_norm": 1.3715513613863417, "learning_rate": 2.387919217918079e-06, "loss": 0.2606213092803955, "step": 6368 }, { "epoch": 1.5737583395107486, "grad_norm": 1.469345582545053, "learning_rate": 2.3852701627928997e-06, "loss": 0.30497509241104126, "step": 6369 }, { "epoch": 1.5740054361255251, "grad_norm": 1.4830397883716275, "learning_rate": 2.3826223788695724e-06, "loss": 0.36065423488616943, "step": 6370 }, { "epoch": 1.5742525327403014, "grad_norm": 1.4640655416641222, "learning_rate": 2.379975866590122e-06, "loss": 0.3487567901611328, "step": 6371 }, { "epoch": 1.5744996293550777, "grad_norm": 1.3023472163993581, "learning_rate": 2.3773306263963504e-06, "loss": 0.23572978377342224, "step": 6372 }, { "epoch": 1.5747467259698542, "grad_norm": 1.7003527276580304, "learning_rate": 2.374686658729859e-06, "loss": 0.3693557381629944, "step": 6373 }, { "epoch": 1.5749938225846307, "grad_norm": 1.3473367937460565, "learning_rate": 2.372043964032026e-06, "loss": 0.2323111593723297, "step": 6374 }, { "epoch": 1.575240919199407, "grad_norm": 1.4885059393017255, "learning_rate": 2.369402542744028e-06, "loss": 0.2607381343841553, "step": 6375 }, { "epoch": 1.5754880158141833, "grad_norm": 1.3060269130273325, "learning_rate": 2.3667623953068186e-06, "loss": 0.2511374056339264, "step": 6376 }, { "epoch": 1.5757351124289598, "grad_norm": 1.5751028385273933, "learning_rate": 2.364123522161146e-06, "loss": 0.31904658675193787, "step": 6377 }, { "epoch": 1.575982209043736, "grad_norm": 1.3646099537621168, "learning_rate": 2.361485923747543e-06, "loss": 0.2642503082752228, "step": 6378 }, { "epoch": 1.5762293056585124, "grad_norm": 1.5867754520512598, "learning_rate": 2.358849600506329e-06, "loss": 0.315982460975647, "step": 6379 }, { "epoch": 1.576476402273289, "grad_norm": 1.6999108606030175, "learning_rate": 2.356214552877619e-06, "loss": 0.2862110733985901, "step": 6380 }, { "epoch": 1.5767234988880652, "grad_norm": 1.4292966957169353, "learning_rate": 2.353580781301296e-06, "loss": 0.33718612790107727, "step": 6381 }, { "epoch": 1.5769705955028415, "grad_norm": 1.4269222311076568, "learning_rate": 2.350948286217044e-06, "loss": 0.30242305994033813, "step": 6382 }, { "epoch": 1.577217692117618, "grad_norm": 1.2526602598811742, "learning_rate": 2.3483170680643352e-06, "loss": 0.22067955136299133, "step": 6383 }, { "epoch": 1.5774647887323945, "grad_norm": 1.411006523089846, "learning_rate": 2.3456871272824256e-06, "loss": 0.24024127423763275, "step": 6384 }, { "epoch": 1.5777118853471708, "grad_norm": 1.498981261866197, "learning_rate": 2.3430584643103518e-06, "loss": 0.25845351815223694, "step": 6385 }, { "epoch": 1.577958981961947, "grad_norm": 1.4143858000428682, "learning_rate": 2.3404310795869454e-06, "loss": 0.2831456661224365, "step": 6386 }, { "epoch": 1.5782060785767236, "grad_norm": 1.4007029513963616, "learning_rate": 2.3378049735508247e-06, "loss": 0.2812865376472473, "step": 6387 }, { "epoch": 1.5784531751914999, "grad_norm": 1.3831896575945337, "learning_rate": 2.3351801466403856e-06, "loss": 0.2740539312362671, "step": 6388 }, { "epoch": 1.5787002718062761, "grad_norm": 1.4376065310648172, "learning_rate": 2.33255659929382e-06, "loss": 0.3118540048599243, "step": 6389 }, { "epoch": 1.5789473684210527, "grad_norm": 1.3882044141013417, "learning_rate": 2.329934331949101e-06, "loss": 0.3016921579837799, "step": 6390 }, { "epoch": 1.5791944650358292, "grad_norm": 1.4430994861106667, "learning_rate": 2.3273133450439887e-06, "loss": 0.2857571542263031, "step": 6391 }, { "epoch": 1.5794415616506052, "grad_norm": 1.5249066454055265, "learning_rate": 2.3246936390160325e-06, "loss": 0.2671259641647339, "step": 6392 }, { "epoch": 1.5796886582653817, "grad_norm": 1.4264602910845647, "learning_rate": 2.322075214302566e-06, "loss": 0.320911169052124, "step": 6393 }, { "epoch": 1.5799357548801582, "grad_norm": 1.3832554201000775, "learning_rate": 2.319458071340709e-06, "loss": 0.25547271966934204, "step": 6394 }, { "epoch": 1.5801828514949345, "grad_norm": 1.3823959813157247, "learning_rate": 2.3168422105673617e-06, "loss": 0.23341378569602966, "step": 6395 }, { "epoch": 1.5804299481097108, "grad_norm": 1.5185737395201182, "learning_rate": 2.314227632419219e-06, "loss": 0.2753772735595703, "step": 6396 }, { "epoch": 1.5806770447244873, "grad_norm": 1.4452045110742215, "learning_rate": 2.311614337332758e-06, "loss": 0.2486419379711151, "step": 6397 }, { "epoch": 1.5809241413392636, "grad_norm": 1.5890439582571672, "learning_rate": 2.309002325744246e-06, "loss": 0.3623042702674866, "step": 6398 }, { "epoch": 1.58117123795404, "grad_norm": 1.4774431950725195, "learning_rate": 2.3063915980897244e-06, "loss": 0.2710610032081604, "step": 6399 }, { "epoch": 1.5814183345688164, "grad_norm": 1.375932658870025, "learning_rate": 2.303782154805032e-06, "loss": 0.24317589402198792, "step": 6400 }, { "epoch": 1.581665431183593, "grad_norm": 1.5727385451300728, "learning_rate": 2.3011739963257907e-06, "loss": 0.2812860608100891, "step": 6401 }, { "epoch": 1.5819125277983692, "grad_norm": 1.4211758898360203, "learning_rate": 2.2985671230873996e-06, "loss": 0.261976957321167, "step": 6402 }, { "epoch": 1.5821596244131455, "grad_norm": 1.4709776341245757, "learning_rate": 2.2959615355250575e-06, "loss": 0.31692492961883545, "step": 6403 }, { "epoch": 1.582406721027922, "grad_norm": 1.385874459321723, "learning_rate": 2.2933572340737342e-06, "loss": 0.2784668505191803, "step": 6404 }, { "epoch": 1.5826538176426983, "grad_norm": 1.3970804670807322, "learning_rate": 2.290754219168193e-06, "loss": 0.212208092212677, "step": 6405 }, { "epoch": 1.5829009142574746, "grad_norm": 1.4745914294203806, "learning_rate": 2.288152491242983e-06, "loss": 0.25711745023727417, "step": 6406 }, { "epoch": 1.583148010872251, "grad_norm": 1.6317982099575523, "learning_rate": 2.285552050732438e-06, "loss": 0.2681235671043396, "step": 6407 }, { "epoch": 1.5833951074870274, "grad_norm": 1.5818127342040251, "learning_rate": 2.282952898070672e-06, "loss": 0.28961485624313354, "step": 6408 }, { "epoch": 1.5836422041018037, "grad_norm": 1.4151728443761107, "learning_rate": 2.2803550336915847e-06, "loss": 0.2739851474761963, "step": 6409 }, { "epoch": 1.5838893007165802, "grad_norm": 1.4681280631622091, "learning_rate": 2.277758458028866e-06, "loss": 0.24961639940738678, "step": 6410 }, { "epoch": 1.5841363973313567, "grad_norm": 1.4097337418688647, "learning_rate": 2.2751631715159872e-06, "loss": 0.24209710955619812, "step": 6411 }, { "epoch": 1.584383493946133, "grad_norm": 1.5158208464967013, "learning_rate": 2.272569174586209e-06, "loss": 0.281747043132782, "step": 6412 }, { "epoch": 1.5846305905609093, "grad_norm": 1.3544230621212143, "learning_rate": 2.2699764676725668e-06, "loss": 0.24960598349571228, "step": 6413 }, { "epoch": 1.5848776871756858, "grad_norm": 1.3698177160373355, "learning_rate": 2.2673850512078887e-06, "loss": 0.2914819121360779, "step": 6414 }, { "epoch": 1.585124783790462, "grad_norm": 1.34313274311893, "learning_rate": 2.26479492562479e-06, "loss": 0.27487847208976746, "step": 6415 }, { "epoch": 1.5853718804052384, "grad_norm": 1.7503378361351771, "learning_rate": 2.262206091355659e-06, "loss": 0.3622722029685974, "step": 6416 }, { "epoch": 1.5856189770200149, "grad_norm": 1.4077585921577325, "learning_rate": 2.2596185488326807e-06, "loss": 0.2813589572906494, "step": 6417 }, { "epoch": 1.5858660736347912, "grad_norm": 1.3271179096228016, "learning_rate": 2.2570322984878133e-06, "loss": 0.2415200173854828, "step": 6418 }, { "epoch": 1.5861131702495674, "grad_norm": 1.4154322149067031, "learning_rate": 2.2544473407528087e-06, "loss": 0.26209497451782227, "step": 6419 }, { "epoch": 1.586360266864344, "grad_norm": 1.4086578684261086, "learning_rate": 2.2518636760591995e-06, "loss": 0.2428497076034546, "step": 6420 }, { "epoch": 1.5866073634791205, "grad_norm": 1.2452475513638304, "learning_rate": 2.2492813048383044e-06, "loss": 0.24523058533668518, "step": 6421 }, { "epoch": 1.5868544600938967, "grad_norm": 1.4464313223750311, "learning_rate": 2.2467002275212214e-06, "loss": 0.3104270100593567, "step": 6422 }, { "epoch": 1.587101556708673, "grad_norm": 1.4011579439680018, "learning_rate": 2.244120444538833e-06, "loss": 0.2814447283744812, "step": 6423 }, { "epoch": 1.5873486533234495, "grad_norm": 1.7413012404816566, "learning_rate": 2.24154195632181e-06, "loss": 0.30354776978492737, "step": 6424 }, { "epoch": 1.5875957499382258, "grad_norm": 1.6721321110927783, "learning_rate": 2.2389647633006038e-06, "loss": 0.2773374915122986, "step": 6425 }, { "epoch": 1.5878428465530021, "grad_norm": 1.3289906876568136, "learning_rate": 2.2363888659054554e-06, "loss": 0.24012555181980133, "step": 6426 }, { "epoch": 1.5880899431677786, "grad_norm": 1.5017032209030043, "learning_rate": 2.2338142645663772e-06, "loss": 0.30193760991096497, "step": 6427 }, { "epoch": 1.5883370397825551, "grad_norm": 1.4608012786956421, "learning_rate": 2.231240959713178e-06, "loss": 0.28922945261001587, "step": 6428 }, { "epoch": 1.5885841363973312, "grad_norm": 1.3959464988836852, "learning_rate": 2.2286689517754456e-06, "loss": 0.27911505103111267, "step": 6429 }, { "epoch": 1.5888312330121077, "grad_norm": 1.5885771056801905, "learning_rate": 2.226098241182545e-06, "loss": 0.30394741892814636, "step": 6430 }, { "epoch": 1.5890783296268842, "grad_norm": 1.5931491733485323, "learning_rate": 2.223528828363638e-06, "loss": 0.30210545659065247, "step": 6431 }, { "epoch": 1.5893254262416605, "grad_norm": 1.492009934907038, "learning_rate": 2.220960713747654e-06, "loss": 0.2806845009326935, "step": 6432 }, { "epoch": 1.5895725228564368, "grad_norm": 1.3204252035184083, "learning_rate": 2.218393897763317e-06, "loss": 0.27165424823760986, "step": 6433 }, { "epoch": 1.5898196194712133, "grad_norm": 1.416083477968369, "learning_rate": 2.2158283808391323e-06, "loss": 0.24043706059455872, "step": 6434 }, { "epoch": 1.5900667160859896, "grad_norm": 1.3448457635585755, "learning_rate": 2.213264163403388e-06, "loss": 0.2717371881008148, "step": 6435 }, { "epoch": 1.5903138127007659, "grad_norm": 1.388359915229728, "learning_rate": 2.2107012458841525e-06, "loss": 0.2250942438840866, "step": 6436 }, { "epoch": 1.5905609093155424, "grad_norm": 1.3294612233404404, "learning_rate": 2.2081396287092747e-06, "loss": 0.25903522968292236, "step": 6437 }, { "epoch": 1.590808005930319, "grad_norm": 1.3633374491908952, "learning_rate": 2.205579312306395e-06, "loss": 0.23400601744651794, "step": 6438 }, { "epoch": 1.591055102545095, "grad_norm": 1.5481651093054882, "learning_rate": 2.2030202971029325e-06, "loss": 0.2503378093242645, "step": 6439 }, { "epoch": 1.5913021991598715, "grad_norm": 1.3686178768550379, "learning_rate": 2.2004625835260895e-06, "loss": 0.24202027916908264, "step": 6440 }, { "epoch": 1.591549295774648, "grad_norm": 1.4168013904933927, "learning_rate": 2.197906172002846e-06, "loss": 0.3097783327102661, "step": 6441 }, { "epoch": 1.5917963923894243, "grad_norm": 1.3738982661839658, "learning_rate": 2.195351062959974e-06, "loss": 0.25998836755752563, "step": 6442 }, { "epoch": 1.5920434890042006, "grad_norm": 1.5203314917438056, "learning_rate": 2.1927972568240184e-06, "loss": 0.22770127654075623, "step": 6443 }, { "epoch": 1.592290585618977, "grad_norm": 1.4697770890210813, "learning_rate": 2.1902447540213136e-06, "loss": 0.2769808769226074, "step": 6444 }, { "epoch": 1.5925376822337534, "grad_norm": 1.3288101187376478, "learning_rate": 2.1876935549779766e-06, "loss": 0.23989246785640717, "step": 6445 }, { "epoch": 1.5927847788485296, "grad_norm": 1.3961611988115759, "learning_rate": 2.1851436601198984e-06, "loss": 0.24333930015563965, "step": 6446 }, { "epoch": 1.5930318754633062, "grad_norm": 1.489501549065077, "learning_rate": 2.1825950698727614e-06, "loss": 0.23678991198539734, "step": 6447 }, { "epoch": 1.5932789720780827, "grad_norm": 3.459151708591774, "learning_rate": 2.180047784662026e-06, "loss": 0.27030453085899353, "step": 6448 }, { "epoch": 1.593526068692859, "grad_norm": 1.4970434497115699, "learning_rate": 2.1775018049129403e-06, "loss": 0.2838571071624756, "step": 6449 }, { "epoch": 1.5937731653076352, "grad_norm": 1.333349614029663, "learning_rate": 2.1749571310505246e-06, "loss": 0.2537711262702942, "step": 6450 }, { "epoch": 1.5940202619224118, "grad_norm": 1.4822756500662932, "learning_rate": 2.1724137634995845e-06, "loss": 0.26549196243286133, "step": 6451 }, { "epoch": 1.594267358537188, "grad_norm": 1.4503478858325958, "learning_rate": 2.1698717026847137e-06, "loss": 0.25726795196533203, "step": 6452 }, { "epoch": 1.5945144551519643, "grad_norm": 1.4008120017656045, "learning_rate": 2.167330949030281e-06, "loss": 0.20633675158023834, "step": 6453 }, { "epoch": 1.5947615517667408, "grad_norm": 1.4620280471523575, "learning_rate": 2.1647915029604438e-06, "loss": 0.30083245038986206, "step": 6454 }, { "epoch": 1.5950086483815171, "grad_norm": 1.4747245660814194, "learning_rate": 2.162253364899131e-06, "loss": 0.316803514957428, "step": 6455 }, { "epoch": 1.5952557449962934, "grad_norm": 1.4416529690755067, "learning_rate": 2.159716535270063e-06, "loss": 0.2379676103591919, "step": 6456 }, { "epoch": 1.59550284161107, "grad_norm": 1.5167600995015136, "learning_rate": 2.1571810144967355e-06, "loss": 0.2962314486503601, "step": 6457 }, { "epoch": 1.5957499382258464, "grad_norm": 1.405229552466495, "learning_rate": 2.154646803002427e-06, "loss": 0.2692069709300995, "step": 6458 }, { "epoch": 1.5959970348406227, "grad_norm": 1.5224979106493257, "learning_rate": 2.1521139012102044e-06, "loss": 0.26088768243789673, "step": 6459 }, { "epoch": 1.596244131455399, "grad_norm": 1.4025121336322943, "learning_rate": 2.1495823095429026e-06, "loss": 0.23170945048332214, "step": 6460 }, { "epoch": 1.5964912280701755, "grad_norm": 1.5111728960864028, "learning_rate": 2.147052028423148e-06, "loss": 0.27177688479423523, "step": 6461 }, { "epoch": 1.5967383246849518, "grad_norm": 1.58102643943238, "learning_rate": 2.1445230582733455e-06, "loss": 0.32438960671424866, "step": 6462 }, { "epoch": 1.596985421299728, "grad_norm": 1.7566138398597306, "learning_rate": 2.1419953995156863e-06, "loss": 0.3322969079017639, "step": 6463 }, { "epoch": 1.5972325179145046, "grad_norm": 1.3317071471601831, "learning_rate": 2.1394690525721275e-06, "loss": 0.2893427014350891, "step": 6464 }, { "epoch": 1.5974796145292811, "grad_norm": 1.4626176131991397, "learning_rate": 2.1369440178644206e-06, "loss": 0.29498809576034546, "step": 6465 }, { "epoch": 1.5977267111440572, "grad_norm": 1.2703627728914761, "learning_rate": 2.134420295814097e-06, "loss": 0.21654012799263, "step": 6466 }, { "epoch": 1.5979738077588337, "grad_norm": 1.4153651330958192, "learning_rate": 2.1318978868424645e-06, "loss": 0.27990907430648804, "step": 6467 }, { "epoch": 1.5982209043736102, "grad_norm": 1.7325420035563517, "learning_rate": 2.1293767913706177e-06, "loss": 0.2677476704120636, "step": 6468 }, { "epoch": 1.5984680009883865, "grad_norm": 1.4415470989737103, "learning_rate": 2.1268570098194207e-06, "loss": 0.2868029773235321, "step": 6469 }, { "epoch": 1.5987150976031628, "grad_norm": 1.3608910272038117, "learning_rate": 2.124338542609533e-06, "loss": 0.24949055910110474, "step": 6470 }, { "epoch": 1.5989621942179393, "grad_norm": 1.4288612927143456, "learning_rate": 2.1218213901613793e-06, "loss": 0.2767549157142639, "step": 6471 }, { "epoch": 1.5992092908327156, "grad_norm": 1.4154199467474653, "learning_rate": 2.1193055528951813e-06, "loss": 0.26748552918434143, "step": 6472 }, { "epoch": 1.5994563874474919, "grad_norm": 1.4277015519271676, "learning_rate": 2.1167910312309236e-06, "loss": 0.33442071080207825, "step": 6473 }, { "epoch": 1.5997034840622684, "grad_norm": 1.4111067785162776, "learning_rate": 2.1142778255883857e-06, "loss": 0.28170841932296753, "step": 6474 }, { "epoch": 1.5999505806770449, "grad_norm": 1.9277637694585994, "learning_rate": 2.111765936387119e-06, "loss": 0.25292566418647766, "step": 6475 }, { "epoch": 1.600197677291821, "grad_norm": 1.5541372731989591, "learning_rate": 2.109255364046463e-06, "loss": 0.25823062658309937, "step": 6476 }, { "epoch": 1.6004447739065975, "grad_norm": 1.4367244711623608, "learning_rate": 2.106746108985529e-06, "loss": 0.2706814408302307, "step": 6477 }, { "epoch": 1.600691870521374, "grad_norm": 1.6245581966753873, "learning_rate": 2.1042381716232085e-06, "loss": 0.34122440218925476, "step": 6478 }, { "epoch": 1.6009389671361502, "grad_norm": 1.6944668545910855, "learning_rate": 2.101731552378179e-06, "loss": 0.3619228005409241, "step": 6479 }, { "epoch": 1.6011860637509265, "grad_norm": 1.7014695994011035, "learning_rate": 2.0992262516688945e-06, "loss": 0.3085925579071045, "step": 6480 }, { "epoch": 1.601433160365703, "grad_norm": 1.660123138285704, "learning_rate": 2.0967222699135936e-06, "loss": 0.2821919322013855, "step": 6481 }, { "epoch": 1.6016802569804793, "grad_norm": 1.4834010092788708, "learning_rate": 2.0942196075302844e-06, "loss": 0.272402822971344, "step": 6482 }, { "epoch": 1.6019273535952556, "grad_norm": 1.5353703113802453, "learning_rate": 2.091718264936763e-06, "loss": 0.3059583902359009, "step": 6483 }, { "epoch": 1.6021744502100321, "grad_norm": 1.294307620600676, "learning_rate": 2.089218242550608e-06, "loss": 0.268892377614975, "step": 6484 }, { "epoch": 1.6024215468248086, "grad_norm": 1.6223160024422563, "learning_rate": 2.086719540789165e-06, "loss": 0.32269710302352905, "step": 6485 }, { "epoch": 1.602668643439585, "grad_norm": 1.4577028257095288, "learning_rate": 2.0842221600695734e-06, "loss": 0.26803573966026306, "step": 6486 }, { "epoch": 1.6029157400543612, "grad_norm": 1.3415453044509253, "learning_rate": 2.081726100808741e-06, "loss": 0.26632314920425415, "step": 6487 }, { "epoch": 1.6031628366691377, "grad_norm": 1.5630675036481556, "learning_rate": 2.0792313634233597e-06, "loss": 0.25524458289146423, "step": 6488 }, { "epoch": 1.603409933283914, "grad_norm": 1.5712852279334721, "learning_rate": 2.076737948329903e-06, "loss": 0.3043654263019562, "step": 6489 }, { "epoch": 1.6036570298986903, "grad_norm": 1.517250490139551, "learning_rate": 2.0742458559446234e-06, "loss": 0.30794286727905273, "step": 6490 }, { "epoch": 1.6039041265134668, "grad_norm": 1.4862232093134657, "learning_rate": 2.0717550866835477e-06, "loss": 0.3065077066421509, "step": 6491 }, { "epoch": 1.604151223128243, "grad_norm": 1.4242181576871522, "learning_rate": 2.069265640962481e-06, "loss": 0.25233227014541626, "step": 6492 }, { "epoch": 1.6043983197430194, "grad_norm": 1.3368060294659068, "learning_rate": 2.066777519197014e-06, "loss": 0.22380852699279785, "step": 6493 }, { "epoch": 1.604645416357796, "grad_norm": 1.495465250242139, "learning_rate": 2.064290721802514e-06, "loss": 0.29689133167266846, "step": 6494 }, { "epoch": 1.6048925129725724, "grad_norm": 1.3258807020148495, "learning_rate": 2.06180524919413e-06, "loss": 0.2624014914035797, "step": 6495 }, { "epoch": 1.6051396095873487, "grad_norm": 1.3849204781207387, "learning_rate": 2.0593211017867785e-06, "loss": 0.28743571043014526, "step": 6496 }, { "epoch": 1.605386706202125, "grad_norm": 1.3698563009324476, "learning_rate": 2.056838279995168e-06, "loss": 0.24122612178325653, "step": 6497 }, { "epoch": 1.6056338028169015, "grad_norm": 1.1654836510353868, "learning_rate": 2.054356784233782e-06, "loss": 0.21594922244548798, "step": 6498 }, { "epoch": 1.6058808994316778, "grad_norm": 1.3444019366598414, "learning_rate": 2.0518766149168758e-06, "loss": 0.2305155098438263, "step": 6499 }, { "epoch": 1.606127996046454, "grad_norm": 1.573648302908243, "learning_rate": 2.0493977724584924e-06, "loss": 0.33105146884918213, "step": 6500 }, { "epoch": 1.6063750926612306, "grad_norm": 1.5736834856294641, "learning_rate": 2.0469202572724455e-06, "loss": 0.2916497588157654, "step": 6501 }, { "epoch": 1.606622189276007, "grad_norm": 1.4475793728361013, "learning_rate": 2.0444440697723343e-06, "loss": 0.27512484788894653, "step": 6502 }, { "epoch": 1.6068692858907831, "grad_norm": 1.2286676785395818, "learning_rate": 2.041969210371533e-06, "loss": 0.22575177252292633, "step": 6503 }, { "epoch": 1.6071163825055597, "grad_norm": 1.3996049549259986, "learning_rate": 2.0394956794831957e-06, "loss": 0.23822075128555298, "step": 6504 }, { "epoch": 1.6073634791203362, "grad_norm": 1.4211028580539489, "learning_rate": 2.037023477520251e-06, "loss": 0.25485754013061523, "step": 6505 }, { "epoch": 1.6076105757351125, "grad_norm": 1.5183757334895998, "learning_rate": 2.0345526048954057e-06, "loss": 0.28780508041381836, "step": 6506 }, { "epoch": 1.6078576723498887, "grad_norm": 1.3129866756889963, "learning_rate": 2.0320830620211497e-06, "loss": 0.2872549593448639, "step": 6507 }, { "epoch": 1.6081047689646653, "grad_norm": 1.4742716983871649, "learning_rate": 2.0296148493097477e-06, "loss": 0.2468990981578827, "step": 6508 }, { "epoch": 1.6083518655794415, "grad_norm": 1.5400018944216534, "learning_rate": 2.0271479671732454e-06, "loss": 0.31128764152526855, "step": 6509 }, { "epoch": 1.6085989621942178, "grad_norm": 1.3066618031965505, "learning_rate": 2.024682416023459e-06, "loss": 0.25697392225265503, "step": 6510 }, { "epoch": 1.6088460588089943, "grad_norm": 1.625226108762277, "learning_rate": 2.0222181962719923e-06, "loss": 0.31382960081100464, "step": 6511 }, { "epoch": 1.6090931554237708, "grad_norm": 1.3914102136675168, "learning_rate": 2.0197553083302156e-06, "loss": 0.2713547945022583, "step": 6512 }, { "epoch": 1.609340252038547, "grad_norm": 1.4379097277276527, "learning_rate": 2.017293752609286e-06, "loss": 0.22214742004871368, "step": 6513 }, { "epoch": 1.6095873486533234, "grad_norm": 1.425017125702767, "learning_rate": 2.0148335295201394e-06, "loss": 0.2539535164833069, "step": 6514 }, { "epoch": 1.6098344452681, "grad_norm": 1.3820036481622888, "learning_rate": 2.012374639473478e-06, "loss": 0.2882193326950073, "step": 6515 }, { "epoch": 1.6100815418828762, "grad_norm": 1.4120751958945879, "learning_rate": 2.009917082879792e-06, "loss": 0.28444594144821167, "step": 6516 }, { "epoch": 1.6103286384976525, "grad_norm": 1.4690066701348228, "learning_rate": 2.007460860149346e-06, "loss": 0.26681968569755554, "step": 6517 }, { "epoch": 1.610575735112429, "grad_norm": 1.4661208071013827, "learning_rate": 2.005005971692183e-06, "loss": 0.2829250395298004, "step": 6518 }, { "epoch": 1.6108228317272053, "grad_norm": 1.427871130028293, "learning_rate": 2.0025524179181187e-06, "loss": 0.24601203203201294, "step": 6519 }, { "epoch": 1.6110699283419816, "grad_norm": 1.4030494151132997, "learning_rate": 2.0001001992367488e-06, "loss": 0.277673602104187, "step": 6520 }, { "epoch": 1.611317024956758, "grad_norm": 1.5534990808058604, "learning_rate": 1.997649316057446e-06, "loss": 0.31984928250312805, "step": 6521 }, { "epoch": 1.6115641215715346, "grad_norm": 1.7082410145060902, "learning_rate": 1.9951997687893623e-06, "loss": 0.3070909380912781, "step": 6522 }, { "epoch": 1.611811218186311, "grad_norm": 1.5276187771025096, "learning_rate": 1.992751557841427e-06, "loss": 0.2530117630958557, "step": 6523 }, { "epoch": 1.6120583148010872, "grad_norm": 1.5358239852316662, "learning_rate": 1.9903046836223384e-06, "loss": 0.29023194313049316, "step": 6524 }, { "epoch": 1.6123054114158637, "grad_norm": 1.3949431835036363, "learning_rate": 1.987859146540582e-06, "loss": 0.26951661705970764, "step": 6525 }, { "epoch": 1.61255250803064, "grad_norm": 1.5845260026060803, "learning_rate": 1.9854149470044115e-06, "loss": 0.2945307791233063, "step": 6526 }, { "epoch": 1.6127996046454163, "grad_norm": 1.4835401953254805, "learning_rate": 1.9829720854218637e-06, "loss": 0.27633148431777954, "step": 6527 }, { "epoch": 1.6130467012601928, "grad_norm": 1.6072942918055233, "learning_rate": 1.980530562200751e-06, "loss": 0.251801073551178, "step": 6528 }, { "epoch": 1.613293797874969, "grad_norm": 1.531497352724578, "learning_rate": 1.978090377748657e-06, "loss": 0.32922083139419556, "step": 6529 }, { "epoch": 1.6135408944897454, "grad_norm": 1.3713790994438633, "learning_rate": 1.9756515324729476e-06, "loss": 0.23429276049137115, "step": 6530 }, { "epoch": 1.6137879911045219, "grad_norm": 1.5888687426483574, "learning_rate": 1.973214026780763e-06, "loss": 0.2858818769454956, "step": 6531 }, { "epoch": 1.6140350877192984, "grad_norm": 1.4956317668430368, "learning_rate": 1.970777861079023e-06, "loss": 0.26449963450431824, "step": 6532 }, { "epoch": 1.6142821843340747, "grad_norm": 1.3120650640822906, "learning_rate": 1.9683430357744183e-06, "loss": 0.2380850613117218, "step": 6533 }, { "epoch": 1.614529280948851, "grad_norm": 1.5994973795005936, "learning_rate": 1.965909551273416e-06, "loss": 0.3179759979248047, "step": 6534 }, { "epoch": 1.6147763775636275, "grad_norm": 1.534774097523024, "learning_rate": 1.9634774079822637e-06, "loss": 0.278603732585907, "step": 6535 }, { "epoch": 1.6150234741784038, "grad_norm": 1.5708796378338439, "learning_rate": 1.9610466063069823e-06, "loss": 0.2814180254936218, "step": 6536 }, { "epoch": 1.61527057079318, "grad_norm": 1.428671508326518, "learning_rate": 1.958617146653373e-06, "loss": 0.28727954626083374, "step": 6537 }, { "epoch": 1.6155176674079565, "grad_norm": 1.4310461400726502, "learning_rate": 1.956189029427005e-06, "loss": 0.2753446698188782, "step": 6538 }, { "epoch": 1.6157647640227328, "grad_norm": 1.6409151391190004, "learning_rate": 1.9537622550332315e-06, "loss": 0.27215224504470825, "step": 6539 }, { "epoch": 1.6160118606375091, "grad_norm": 1.553928322422929, "learning_rate": 1.9513368238771735e-06, "loss": 0.30792146921157837, "step": 6540 }, { "epoch": 1.6162589572522856, "grad_norm": 1.401239365875428, "learning_rate": 1.948912736363735e-06, "loss": 0.2863994836807251, "step": 6541 }, { "epoch": 1.6165060538670621, "grad_norm": 1.2487303977291024, "learning_rate": 1.9464899928975945e-06, "loss": 0.22626164555549622, "step": 6542 }, { "epoch": 1.6167531504818384, "grad_norm": 1.3905334349214178, "learning_rate": 1.944068593883199e-06, "loss": 0.262933611869812, "step": 6543 }, { "epoch": 1.6170002470966147, "grad_norm": 1.4824446968024352, "learning_rate": 1.9416485397247796e-06, "loss": 0.29619479179382324, "step": 6544 }, { "epoch": 1.6172473437113912, "grad_norm": 1.378012763231584, "learning_rate": 1.939229830826341e-06, "loss": 0.28060412406921387, "step": 6545 }, { "epoch": 1.6174944403261675, "grad_norm": 1.4784331810697342, "learning_rate": 1.9368124675916634e-06, "loss": 0.23109468817710876, "step": 6546 }, { "epoch": 1.6177415369409438, "grad_norm": 1.5941636365591914, "learning_rate": 1.9343964504242975e-06, "loss": 0.2755095958709717, "step": 6547 }, { "epoch": 1.6179886335557203, "grad_norm": 1.5842705873809237, "learning_rate": 1.931981779727572e-06, "loss": 0.30538588762283325, "step": 6548 }, { "epoch": 1.6182357301704968, "grad_norm": 1.513811409801431, "learning_rate": 1.9295684559045936e-06, "loss": 0.31098341941833496, "step": 6549 }, { "epoch": 1.6184828267852729, "grad_norm": 1.3036441693340444, "learning_rate": 1.927156479358243e-06, "loss": 0.22807320952415466, "step": 6550 }, { "epoch": 1.6187299234000494, "grad_norm": 1.4913111989289618, "learning_rate": 1.9247458504911763e-06, "loss": 0.2582327723503113, "step": 6551 }, { "epoch": 1.618977020014826, "grad_norm": 1.338323830939367, "learning_rate": 1.922336569705819e-06, "loss": 0.2335895597934723, "step": 6552 }, { "epoch": 1.6192241166296022, "grad_norm": 1.3777271540336518, "learning_rate": 1.919928637404381e-06, "loss": 0.2598675489425659, "step": 6553 }, { "epoch": 1.6194712132443785, "grad_norm": 1.3677572668920235, "learning_rate": 1.9175220539888382e-06, "loss": 0.2773209512233734, "step": 6554 }, { "epoch": 1.619718309859155, "grad_norm": 1.554696247506235, "learning_rate": 1.915116819860947e-06, "loss": 0.2851722538471222, "step": 6555 }, { "epoch": 1.6199654064739313, "grad_norm": 1.37553987502697, "learning_rate": 1.9127129354222395e-06, "loss": 0.22590592503547668, "step": 6556 }, { "epoch": 1.6202125030887076, "grad_norm": 1.3931313119984108, "learning_rate": 1.910310401074014e-06, "loss": 0.24422025680541992, "step": 6557 }, { "epoch": 1.620459599703484, "grad_norm": 1.3041582581713336, "learning_rate": 1.907909217217352e-06, "loss": 0.28209006786346436, "step": 6558 }, { "epoch": 1.6207066963182606, "grad_norm": 1.5058058337587934, "learning_rate": 1.905509384253108e-06, "loss": 0.2919612526893616, "step": 6559 }, { "epoch": 1.6209537929330367, "grad_norm": 1.477606747648033, "learning_rate": 1.903110902581914e-06, "loss": 0.29372668266296387, "step": 6560 }, { "epoch": 1.6212008895478132, "grad_norm": 1.352580141704699, "learning_rate": 1.9007137726041613e-06, "loss": 0.24014410376548767, "step": 6561 }, { "epoch": 1.6214479861625897, "grad_norm": 1.453400210771556, "learning_rate": 1.8983179947200315e-06, "loss": 0.2918820381164551, "step": 6562 }, { "epoch": 1.621695082777366, "grad_norm": 1.3862012699799333, "learning_rate": 1.8959235693294776e-06, "loss": 0.26108068227767944, "step": 6563 }, { "epoch": 1.6219421793921422, "grad_norm": 1.5431503796927848, "learning_rate": 1.8935304968322243e-06, "loss": 0.29771244525909424, "step": 6564 }, { "epoch": 1.6221892760069188, "grad_norm": 1.6739004341457515, "learning_rate": 1.8911387776277678e-06, "loss": 0.2775457501411438, "step": 6565 }, { "epoch": 1.622436372621695, "grad_norm": 1.492553093015716, "learning_rate": 1.8887484121153831e-06, "loss": 0.3125488758087158, "step": 6566 }, { "epoch": 1.6226834692364713, "grad_norm": 1.477860207077621, "learning_rate": 1.8863594006941189e-06, "loss": 0.23791933059692383, "step": 6567 }, { "epoch": 1.6229305658512478, "grad_norm": 1.372557314163091, "learning_rate": 1.8839717437627936e-06, "loss": 0.24541346728801727, "step": 6568 }, { "epoch": 1.6231776624660244, "grad_norm": 1.244878968999493, "learning_rate": 1.8815854417200053e-06, "loss": 0.26980623602867126, "step": 6569 }, { "epoch": 1.6234247590808006, "grad_norm": 1.3266216576097434, "learning_rate": 1.8792004949641196e-06, "loss": 0.26177266240119934, "step": 6570 }, { "epoch": 1.623671855695577, "grad_norm": 1.4103888893514722, "learning_rate": 1.8768169038932805e-06, "loss": 0.30366250872612, "step": 6571 }, { "epoch": 1.6239189523103534, "grad_norm": 1.420824866253558, "learning_rate": 1.8744346689054039e-06, "loss": 0.2353609800338745, "step": 6572 }, { "epoch": 1.6241660489251297, "grad_norm": 1.3220242565031457, "learning_rate": 1.872053790398184e-06, "loss": 0.25101229548454285, "step": 6573 }, { "epoch": 1.624413145539906, "grad_norm": 1.4367531633728532, "learning_rate": 1.8696742687690794e-06, "loss": 0.27877312898635864, "step": 6574 }, { "epoch": 1.6246602421546825, "grad_norm": 1.3209146818187854, "learning_rate": 1.867296104415327e-06, "loss": 0.25255221128463745, "step": 6575 }, { "epoch": 1.6249073387694588, "grad_norm": 1.5342089331165232, "learning_rate": 1.8649192977339392e-06, "loss": 0.24622312188148499, "step": 6576 }, { "epoch": 1.625154435384235, "grad_norm": 1.315211719812343, "learning_rate": 1.8625438491216973e-06, "loss": 0.2372256964445114, "step": 6577 }, { "epoch": 1.6254015319990116, "grad_norm": 1.521888018442947, "learning_rate": 1.8601697589751645e-06, "loss": 0.23801341652870178, "step": 6578 }, { "epoch": 1.6256486286137881, "grad_norm": 1.4272382070805676, "learning_rate": 1.8577970276906632e-06, "loss": 0.2779761254787445, "step": 6579 }, { "epoch": 1.6258957252285644, "grad_norm": 1.430213522659817, "learning_rate": 1.8554256556643036e-06, "loss": 0.2680327296257019, "step": 6580 }, { "epoch": 1.6261428218433407, "grad_norm": 1.4517276978247673, "learning_rate": 1.8530556432919566e-06, "loss": 0.3024403750896454, "step": 6581 }, { "epoch": 1.6263899184581172, "grad_norm": 1.6109458984203466, "learning_rate": 1.8506869909692737e-06, "loss": 0.35917913913726807, "step": 6582 }, { "epoch": 1.6266370150728935, "grad_norm": 1.4823722081871764, "learning_rate": 1.8483196990916796e-06, "loss": 0.32293325662612915, "step": 6583 }, { "epoch": 1.6268841116876698, "grad_norm": 1.3496764741321745, "learning_rate": 1.845953768054366e-06, "loss": 0.27688145637512207, "step": 6584 }, { "epoch": 1.6271312083024463, "grad_norm": 1.3789936039079833, "learning_rate": 1.8435891982523023e-06, "loss": 0.2708415985107422, "step": 6585 }, { "epoch": 1.6273783049172228, "grad_norm": 1.3750294544866812, "learning_rate": 1.841225990080231e-06, "loss": 0.2498873770236969, "step": 6586 }, { "epoch": 1.6276254015319989, "grad_norm": 1.6371611202705416, "learning_rate": 1.838864143932666e-06, "loss": 0.30118146538734436, "step": 6587 }, { "epoch": 1.6278724981467754, "grad_norm": 1.5022449095854995, "learning_rate": 1.8365036602038932e-06, "loss": 0.2909722328186035, "step": 6588 }, { "epoch": 1.6281195947615519, "grad_norm": 1.491590509095353, "learning_rate": 1.8341445392879675e-06, "loss": 0.27574050426483154, "step": 6589 }, { "epoch": 1.6283666913763282, "grad_norm": 1.748724628394524, "learning_rate": 1.8317867815787238e-06, "loss": 0.29620450735092163, "step": 6590 }, { "epoch": 1.6286137879911045, "grad_norm": 1.4111275456458114, "learning_rate": 1.8294303874697639e-06, "loss": 0.24081623554229736, "step": 6591 }, { "epoch": 1.628860884605881, "grad_norm": 1.3463792965288373, "learning_rate": 1.8270753573544686e-06, "loss": 0.271303653717041, "step": 6592 }, { "epoch": 1.6291079812206573, "grad_norm": 1.4207454967269975, "learning_rate": 1.8247216916259803e-06, "loss": 0.27417677640914917, "step": 6593 }, { "epoch": 1.6293550778354335, "grad_norm": 1.3741527381194016, "learning_rate": 1.8223693906772245e-06, "loss": 0.24818921089172363, "step": 6594 }, { "epoch": 1.62960217445021, "grad_norm": 1.3304208797803911, "learning_rate": 1.8200184549008881e-06, "loss": 0.3158223032951355, "step": 6595 }, { "epoch": 1.6298492710649866, "grad_norm": 1.4747609496992964, "learning_rate": 1.8176688846894407e-06, "loss": 0.29374584555625916, "step": 6596 }, { "epoch": 1.6300963676797626, "grad_norm": 1.4745802433501132, "learning_rate": 1.8153206804351198e-06, "loss": 0.2662881910800934, "step": 6597 }, { "epoch": 1.6303434642945391, "grad_norm": 1.339288382710693, "learning_rate": 1.8129738425299303e-06, "loss": 0.29801174998283386, "step": 6598 }, { "epoch": 1.6305905609093156, "grad_norm": 1.4109717254799399, "learning_rate": 1.8106283713656548e-06, "loss": 0.23620285093784332, "step": 6599 }, { "epoch": 1.630837657524092, "grad_norm": 1.446014162725477, "learning_rate": 1.8082842673338463e-06, "loss": 0.28271186351776123, "step": 6600 }, { "epoch": 1.6310847541388682, "grad_norm": 1.5371522320068558, "learning_rate": 1.805941530825831e-06, "loss": 0.24748548865318298, "step": 6601 }, { "epoch": 1.6313318507536447, "grad_norm": 1.562660874725878, "learning_rate": 1.8036001622327038e-06, "loss": 0.31751197576522827, "step": 6602 }, { "epoch": 1.631578947368421, "grad_norm": 1.5167899202933361, "learning_rate": 1.8012601619453284e-06, "loss": 0.3048483729362488, "step": 6603 }, { "epoch": 1.6318260439831973, "grad_norm": 1.4248362348102597, "learning_rate": 1.7989215303543484e-06, "loss": 0.2646392583847046, "step": 6604 }, { "epoch": 1.6320731405979738, "grad_norm": 1.2540319239463498, "learning_rate": 1.7965842678501721e-06, "loss": 0.2224578857421875, "step": 6605 }, { "epoch": 1.6323202372127503, "grad_norm": 1.56179237355773, "learning_rate": 1.7942483748229866e-06, "loss": 0.33754903078079224, "step": 6606 }, { "epoch": 1.6325673338275266, "grad_norm": 1.3604562639371378, "learning_rate": 1.7919138516627399e-06, "loss": 0.2863595187664032, "step": 6607 }, { "epoch": 1.632814430442303, "grad_norm": 1.6289123706541149, "learning_rate": 1.7895806987591614e-06, "loss": 0.2538081407546997, "step": 6608 }, { "epoch": 1.6330615270570794, "grad_norm": 1.5753931051301737, "learning_rate": 1.7872489165017425e-06, "loss": 0.3351441025733948, "step": 6609 }, { "epoch": 1.6333086236718557, "grad_norm": 1.3039256419656422, "learning_rate": 1.7849185052797525e-06, "loss": 0.25156646966934204, "step": 6610 }, { "epoch": 1.633555720286632, "grad_norm": 1.5017903225335796, "learning_rate": 1.782589465482234e-06, "loss": 0.29842686653137207, "step": 6611 }, { "epoch": 1.6338028169014085, "grad_norm": 1.3911941256831786, "learning_rate": 1.7802617974979907e-06, "loss": 0.2692302465438843, "step": 6612 }, { "epoch": 1.6340499135161848, "grad_norm": 1.5275511034017166, "learning_rate": 1.7779355017156042e-06, "loss": 0.33134615421295166, "step": 6613 }, { "epoch": 1.634297010130961, "grad_norm": 1.2558755330069677, "learning_rate": 1.775610578523429e-06, "loss": 0.2258089780807495, "step": 6614 }, { "epoch": 1.6345441067457376, "grad_norm": 1.5497585591884688, "learning_rate": 1.7732870283095871e-06, "loss": 0.2879782021045685, "step": 6615 }, { "epoch": 1.634791203360514, "grad_norm": 1.3523957783214262, "learning_rate": 1.7709648514619704e-06, "loss": 0.2295450121164322, "step": 6616 }, { "epoch": 1.6350382999752904, "grad_norm": 1.3444728781777993, "learning_rate": 1.76864404836824e-06, "loss": 0.2674567997455597, "step": 6617 }, { "epoch": 1.6352853965900667, "grad_norm": 1.36849426510551, "learning_rate": 1.7663246194158334e-06, "loss": 0.2699081301689148, "step": 6618 }, { "epoch": 1.6355324932048432, "grad_norm": 1.3581929711829228, "learning_rate": 1.7640065649919557e-06, "loss": 0.25131186842918396, "step": 6619 }, { "epoch": 1.6357795898196195, "grad_norm": 1.4773530750411847, "learning_rate": 1.761689885483584e-06, "loss": 0.3125247359275818, "step": 6620 }, { "epoch": 1.6360266864343957, "grad_norm": 1.3502669418725208, "learning_rate": 1.7593745812774599e-06, "loss": 0.23329192399978638, "step": 6621 }, { "epoch": 1.6362737830491723, "grad_norm": 1.4675296551311503, "learning_rate": 1.7570606527601065e-06, "loss": 0.24586525559425354, "step": 6622 }, { "epoch": 1.6365208796639488, "grad_norm": 1.5405792887884573, "learning_rate": 1.7547481003178035e-06, "loss": 0.29117369651794434, "step": 6623 }, { "epoch": 1.6367679762787248, "grad_norm": 1.4047158074826405, "learning_rate": 1.7524369243366114e-06, "loss": 0.28000640869140625, "step": 6624 }, { "epoch": 1.6370150728935013, "grad_norm": 1.5171089222043608, "learning_rate": 1.75012712520236e-06, "loss": 0.2785894274711609, "step": 6625 }, { "epoch": 1.6372621695082779, "grad_norm": 1.4348221923929638, "learning_rate": 1.747818703300641e-06, "loss": 0.24484461545944214, "step": 6626 }, { "epoch": 1.6375092661230541, "grad_norm": 2.1311494298884197, "learning_rate": 1.7455116590168275e-06, "loss": 0.3499123454093933, "step": 6627 }, { "epoch": 1.6377563627378304, "grad_norm": 1.5588194096475028, "learning_rate": 1.7432059927360534e-06, "loss": 0.29063183069229126, "step": 6628 }, { "epoch": 1.638003459352607, "grad_norm": 1.372318196090721, "learning_rate": 1.7409017048432308e-06, "loss": 0.2590647339820862, "step": 6629 }, { "epoch": 1.6382505559673832, "grad_norm": 1.4784708177767552, "learning_rate": 1.7385987957230345e-06, "loss": 0.2929338812828064, "step": 6630 }, { "epoch": 1.6384976525821595, "grad_norm": 1.337720072436462, "learning_rate": 1.7362972657599085e-06, "loss": 0.2757628858089447, "step": 6631 }, { "epoch": 1.638744749196936, "grad_norm": 1.316298773955998, "learning_rate": 1.7339971153380742e-06, "loss": 0.21920952200889587, "step": 6632 }, { "epoch": 1.6389918458117125, "grad_norm": 1.3699310427683968, "learning_rate": 1.7316983448415158e-06, "loss": 0.24640268087387085, "step": 6633 }, { "epoch": 1.6392389424264886, "grad_norm": 1.2885527210494243, "learning_rate": 1.7294009546539937e-06, "loss": 0.2887398302555084, "step": 6634 }, { "epoch": 1.639486039041265, "grad_norm": 1.4312964374802004, "learning_rate": 1.7271049451590294e-06, "loss": 0.27500978112220764, "step": 6635 }, { "epoch": 1.6397331356560416, "grad_norm": 1.7898807034380875, "learning_rate": 1.7248103167399222e-06, "loss": 0.30597418546676636, "step": 6636 }, { "epoch": 1.639980232270818, "grad_norm": 1.3969073311362619, "learning_rate": 1.7225170697797322e-06, "loss": 0.26258155703544617, "step": 6637 }, { "epoch": 1.6402273288855942, "grad_norm": 1.46913358785806, "learning_rate": 1.7202252046612977e-06, "loss": 0.31898653507232666, "step": 6638 }, { "epoch": 1.6404744255003707, "grad_norm": 1.353711731671139, "learning_rate": 1.7179347217672227e-06, "loss": 0.28424131870269775, "step": 6639 }, { "epoch": 1.640721522115147, "grad_norm": 1.4121380361231004, "learning_rate": 1.7156456214798756e-06, "loss": 0.28068768978118896, "step": 6640 }, { "epoch": 1.6409686187299233, "grad_norm": 1.646823691377173, "learning_rate": 1.713357904181402e-06, "loss": 0.3644464612007141, "step": 6641 }, { "epoch": 1.6412157153446998, "grad_norm": 1.625228160036195, "learning_rate": 1.7110715702537118e-06, "loss": 0.3048831820487976, "step": 6642 }, { "epoch": 1.6414628119594763, "grad_norm": 1.5346959403782714, "learning_rate": 1.708786620078492e-06, "loss": 0.29321742057800293, "step": 6643 }, { "epoch": 1.6417099085742526, "grad_norm": 1.4326089304929666, "learning_rate": 1.7065030540371796e-06, "loss": 0.2748802900314331, "step": 6644 }, { "epoch": 1.6419570051890289, "grad_norm": 1.4377195247907626, "learning_rate": 1.7042208725109987e-06, "loss": 0.29010915756225586, "step": 6645 }, { "epoch": 1.6422041018038054, "grad_norm": 1.4347789615377322, "learning_rate": 1.701940075880938e-06, "loss": 0.2966383993625641, "step": 6646 }, { "epoch": 1.6424511984185817, "grad_norm": 1.4788626171737191, "learning_rate": 1.6996606645277503e-06, "loss": 0.2693951725959778, "step": 6647 }, { "epoch": 1.642698295033358, "grad_norm": 1.450769848933113, "learning_rate": 1.6973826388319648e-06, "loss": 0.2888922691345215, "step": 6648 }, { "epoch": 1.6429453916481345, "grad_norm": 1.3869855033921397, "learning_rate": 1.6951059991738717e-06, "loss": 0.27976685762405396, "step": 6649 }, { "epoch": 1.6431924882629108, "grad_norm": 1.4695617022757927, "learning_rate": 1.6928307459335302e-06, "loss": 0.26571953296661377, "step": 6650 }, { "epoch": 1.643439584877687, "grad_norm": 1.4546603646181648, "learning_rate": 1.6905568794907734e-06, "loss": 0.2721739709377289, "step": 6651 }, { "epoch": 1.6436866814924636, "grad_norm": 1.480074866469991, "learning_rate": 1.688284400225203e-06, "loss": 0.3117493987083435, "step": 6652 }, { "epoch": 1.64393377810724, "grad_norm": 1.6921019289172188, "learning_rate": 1.6860133085161813e-06, "loss": 0.3662850260734558, "step": 6653 }, { "epoch": 1.6441808747220164, "grad_norm": 1.3168035164098186, "learning_rate": 1.683743604742847e-06, "loss": 0.21026667952537537, "step": 6654 }, { "epoch": 1.6444279713367926, "grad_norm": 1.622991276525265, "learning_rate": 1.6814752892841014e-06, "loss": 0.33329063653945923, "step": 6655 }, { "epoch": 1.6446750679515691, "grad_norm": 1.369147454587579, "learning_rate": 1.6792083625186228e-06, "loss": 0.2656227946281433, "step": 6656 }, { "epoch": 1.6449221645663454, "grad_norm": 1.3118870791437844, "learning_rate": 1.6769428248248465e-06, "loss": 0.2287682592868805, "step": 6657 }, { "epoch": 1.6451692611811217, "grad_norm": 1.5789803608958415, "learning_rate": 1.6746786765809807e-06, "loss": 0.35094189643859863, "step": 6658 }, { "epoch": 1.6454163577958982, "grad_norm": 1.443303483484172, "learning_rate": 1.6724159181650024e-06, "loss": 0.2787724733352661, "step": 6659 }, { "epoch": 1.6456634544106745, "grad_norm": 1.6144704686712488, "learning_rate": 1.670154549954658e-06, "loss": 0.31566303968429565, "step": 6660 }, { "epoch": 1.6459105510254508, "grad_norm": 1.3232801171727775, "learning_rate": 1.667894572327461e-06, "loss": 0.2371065616607666, "step": 6661 }, { "epoch": 1.6461576476402273, "grad_norm": 1.3953456897346288, "learning_rate": 1.6656359856606874e-06, "loss": 0.2584149241447449, "step": 6662 }, { "epoch": 1.6464047442550038, "grad_norm": 1.368614518303612, "learning_rate": 1.6633787903313902e-06, "loss": 0.2819042205810547, "step": 6663 }, { "epoch": 1.6466518408697801, "grad_norm": 1.4908655575942704, "learning_rate": 1.6611229867163803e-06, "loss": 0.30839332938194275, "step": 6664 }, { "epoch": 1.6468989374845564, "grad_norm": 1.6410735660017681, "learning_rate": 1.658868575192244e-06, "loss": 0.3119989335536957, "step": 6665 }, { "epoch": 1.647146034099333, "grad_norm": 1.3057120903328105, "learning_rate": 1.6566155561353337e-06, "loss": 0.2854255139827728, "step": 6666 }, { "epoch": 1.6473931307141092, "grad_norm": 1.363170116205318, "learning_rate": 1.654363929921765e-06, "loss": 0.25662127137184143, "step": 6667 }, { "epoch": 1.6476402273288855, "grad_norm": 1.5023690864239267, "learning_rate": 1.6521136969274244e-06, "loss": 0.3195817172527313, "step": 6668 }, { "epoch": 1.647887323943662, "grad_norm": 1.3738360920564257, "learning_rate": 1.6498648575279675e-06, "loss": 0.24143803119659424, "step": 6669 }, { "epoch": 1.6481344205584385, "grad_norm": 1.3830261032147255, "learning_rate": 1.6476174120988165e-06, "loss": 0.2338714599609375, "step": 6670 }, { "epoch": 1.6483815171732146, "grad_norm": 1.4675210703960182, "learning_rate": 1.6453713610151567e-06, "loss": 0.30682456493377686, "step": 6671 }, { "epoch": 1.648628613787991, "grad_norm": 1.5262388604863653, "learning_rate": 1.6431267046519427e-06, "loss": 0.31199711561203003, "step": 6672 }, { "epoch": 1.6488757104027676, "grad_norm": 1.2654188221381912, "learning_rate": 1.6408834433838982e-06, "loss": 0.25643715262413025, "step": 6673 }, { "epoch": 1.6491228070175439, "grad_norm": 1.4302311326783423, "learning_rate": 1.6386415775855124e-06, "loss": 0.28184905648231506, "step": 6674 }, { "epoch": 1.6493699036323202, "grad_norm": 1.5711225260635642, "learning_rate": 1.6364011076310472e-06, "loss": 0.33535611629486084, "step": 6675 }, { "epoch": 1.6496170002470967, "grad_norm": 1.50014590553258, "learning_rate": 1.6341620338945187e-06, "loss": 0.25444716215133667, "step": 6676 }, { "epoch": 1.649864096861873, "grad_norm": 1.5177124963168458, "learning_rate": 1.6319243567497222e-06, "loss": 0.24313285946846008, "step": 6677 }, { "epoch": 1.6501111934766493, "grad_norm": 1.4708757833612187, "learning_rate": 1.6296880765702128e-06, "loss": 0.2648705244064331, "step": 6678 }, { "epoch": 1.6503582900914258, "grad_norm": 1.5662259004729102, "learning_rate": 1.627453193729316e-06, "loss": 0.30857181549072266, "step": 6679 }, { "epoch": 1.6506053867062023, "grad_norm": 1.2702928769530184, "learning_rate": 1.6252197086001242e-06, "loss": 0.2488110363483429, "step": 6680 }, { "epoch": 1.6508524833209783, "grad_norm": 1.3813944945707115, "learning_rate": 1.6229876215554919e-06, "loss": 0.25042489171028137, "step": 6681 }, { "epoch": 1.6510995799357548, "grad_norm": 1.3054691457115333, "learning_rate": 1.620756932968044e-06, "loss": 0.2040480077266693, "step": 6682 }, { "epoch": 1.6513466765505314, "grad_norm": 1.5232755427992501, "learning_rate": 1.6185276432101737e-06, "loss": 0.31110599637031555, "step": 6683 }, { "epoch": 1.6515937731653076, "grad_norm": 1.5597996895024948, "learning_rate": 1.616299752654038e-06, "loss": 0.3393228054046631, "step": 6684 }, { "epoch": 1.651840869780084, "grad_norm": 1.4174871677210805, "learning_rate": 1.6140732616715604e-06, "loss": 0.2751806378364563, "step": 6685 }, { "epoch": 1.6520879663948604, "grad_norm": 1.4628954737445492, "learning_rate": 1.6118481706344268e-06, "loss": 0.2676849663257599, "step": 6686 }, { "epoch": 1.6523350630096367, "grad_norm": 1.3536690905138145, "learning_rate": 1.609624479914098e-06, "loss": 0.26077619194984436, "step": 6687 }, { "epoch": 1.652582159624413, "grad_norm": 1.2550924422295242, "learning_rate": 1.6074021898817948e-06, "loss": 0.2079162299633026, "step": 6688 }, { "epoch": 1.6528292562391895, "grad_norm": 1.3434264090590449, "learning_rate": 1.6051813009085094e-06, "loss": 0.2678411304950714, "step": 6689 }, { "epoch": 1.653076352853966, "grad_norm": 1.223156991113494, "learning_rate": 1.6029618133649905e-06, "loss": 0.18293213844299316, "step": 6690 }, { "epoch": 1.6533234494687423, "grad_norm": 1.4794033783274378, "learning_rate": 1.6007437276217653e-06, "loss": 0.28844594955444336, "step": 6691 }, { "epoch": 1.6535705460835186, "grad_norm": 1.3106335023620292, "learning_rate": 1.5985270440491153e-06, "loss": 0.24604874849319458, "step": 6692 }, { "epoch": 1.6538176426982951, "grad_norm": 1.5884047822229324, "learning_rate": 1.5963117630170966e-06, "loss": 0.28576457500457764, "step": 6693 }, { "epoch": 1.6540647393130714, "grad_norm": 1.2809753833039907, "learning_rate": 1.594097884895529e-06, "loss": 0.2585046887397766, "step": 6694 }, { "epoch": 1.6543118359278477, "grad_norm": 1.3671652925007907, "learning_rate": 1.5918854100539926e-06, "loss": 0.26112839579582214, "step": 6695 }, { "epoch": 1.6545589325426242, "grad_norm": 1.3873282972485426, "learning_rate": 1.5896743388618396e-06, "loss": 0.24332180619239807, "step": 6696 }, { "epoch": 1.6548060291574005, "grad_norm": 1.3816955540021911, "learning_rate": 1.587464671688187e-06, "loss": 0.25648921728134155, "step": 6697 }, { "epoch": 1.6550531257721768, "grad_norm": 1.4653369765720896, "learning_rate": 1.5852564089019185e-06, "loss": 0.287777841091156, "step": 6698 }, { "epoch": 1.6553002223869533, "grad_norm": 1.4719542622661788, "learning_rate": 1.583049550871678e-06, "loss": 0.2562859058380127, "step": 6699 }, { "epoch": 1.6555473190017298, "grad_norm": 1.4240002175690298, "learning_rate": 1.5808440979658756e-06, "loss": 0.24682483077049255, "step": 6700 }, { "epoch": 1.655794415616506, "grad_norm": 1.4212273889290532, "learning_rate": 1.5786400505526922e-06, "loss": 0.2974008023738861, "step": 6701 }, { "epoch": 1.6560415122312824, "grad_norm": 1.2987561561834866, "learning_rate": 1.576437409000071e-06, "loss": 0.21028482913970947, "step": 6702 }, { "epoch": 1.6562886088460589, "grad_norm": 1.3039313283908547, "learning_rate": 1.5742361736757239e-06, "loss": 0.21836325526237488, "step": 6703 }, { "epoch": 1.6565357054608352, "grad_norm": 1.403290522103587, "learning_rate": 1.5720363449471176e-06, "loss": 0.2706948220729828, "step": 6704 }, { "epoch": 1.6567828020756115, "grad_norm": 1.289649752441213, "learning_rate": 1.5698379231814986e-06, "loss": 0.2671509087085724, "step": 6705 }, { "epoch": 1.657029898690388, "grad_norm": 1.4514499074640128, "learning_rate": 1.5676409087458654e-06, "loss": 0.2901592254638672, "step": 6706 }, { "epoch": 1.6572769953051645, "grad_norm": 1.493460568566915, "learning_rate": 1.5654453020069894e-06, "loss": 0.2747884690761566, "step": 6707 }, { "epoch": 1.6575240919199405, "grad_norm": 1.5380044512212077, "learning_rate": 1.5632511033314068e-06, "loss": 0.27654993534088135, "step": 6708 }, { "epoch": 1.657771188534717, "grad_norm": 1.3518609763600637, "learning_rate": 1.561058313085413e-06, "loss": 0.23665234446525574, "step": 6709 }, { "epoch": 1.6580182851494936, "grad_norm": 1.3261416886659614, "learning_rate": 1.5588669316350736e-06, "loss": 0.23639348149299622, "step": 6710 }, { "epoch": 1.6582653817642699, "grad_norm": 1.378631286856978, "learning_rate": 1.5566769593462184e-06, "loss": 0.2941332459449768, "step": 6711 }, { "epoch": 1.6585124783790461, "grad_norm": 1.5364588989961443, "learning_rate": 1.5544883965844425e-06, "loss": 0.29002705216407776, "step": 6712 }, { "epoch": 1.6587595749938226, "grad_norm": 1.490934990665403, "learning_rate": 1.552301243715103e-06, "loss": 0.2696673274040222, "step": 6713 }, { "epoch": 1.659006671608599, "grad_norm": 1.3716320675507574, "learning_rate": 1.5501155011033198e-06, "loss": 0.2316264808177948, "step": 6714 }, { "epoch": 1.6592537682233752, "grad_norm": 1.3731359817241757, "learning_rate": 1.547931169113982e-06, "loss": 0.26859840750694275, "step": 6715 }, { "epoch": 1.6595008648381517, "grad_norm": 1.3685390364709487, "learning_rate": 1.5457482481117425e-06, "loss": 0.26423361897468567, "step": 6716 }, { "epoch": 1.6597479614529282, "grad_norm": 1.395484722041058, "learning_rate": 1.543566738461021e-06, "loss": 0.293008029460907, "step": 6717 }, { "epoch": 1.6599950580677043, "grad_norm": 1.5516513086296602, "learning_rate": 1.5413866405259924e-06, "loss": 0.3074914216995239, "step": 6718 }, { "epoch": 1.6602421546824808, "grad_norm": 1.41373397876506, "learning_rate": 1.5392079546706062e-06, "loss": 0.3160524368286133, "step": 6719 }, { "epoch": 1.6604892512972573, "grad_norm": 1.2849004299094713, "learning_rate": 1.537030681258569e-06, "loss": 0.22909310460090637, "step": 6720 }, { "epoch": 1.6607363479120336, "grad_norm": 1.5110202309154122, "learning_rate": 1.5348548206533554e-06, "loss": 0.25828447937965393, "step": 6721 }, { "epoch": 1.66098344452681, "grad_norm": 1.4427540268115553, "learning_rate": 1.532680373218206e-06, "loss": 0.2750115990638733, "step": 6722 }, { "epoch": 1.6612305411415864, "grad_norm": 1.4073727429020777, "learning_rate": 1.5305073393161185e-06, "loss": 0.2533017694950104, "step": 6723 }, { "epoch": 1.6614776377563627, "grad_norm": 1.5985996138621348, "learning_rate": 1.5283357193098613e-06, "loss": 0.28905051946640015, "step": 6724 }, { "epoch": 1.661724734371139, "grad_norm": 1.456793531590081, "learning_rate": 1.5261655135619657e-06, "loss": 0.27673032879829407, "step": 6725 }, { "epoch": 1.6619718309859155, "grad_norm": 1.397222293868998, "learning_rate": 1.5239967224347207e-06, "loss": 0.2730797231197357, "step": 6726 }, { "epoch": 1.662218927600692, "grad_norm": 1.3075742126695673, "learning_rate": 1.521829346290189e-06, "loss": 0.24674327671527863, "step": 6727 }, { "epoch": 1.6624660242154683, "grad_norm": 1.38182657770084, "learning_rate": 1.5196633854901889e-06, "loss": 0.24527078866958618, "step": 6728 }, { "epoch": 1.6627131208302446, "grad_norm": 1.4465870181260427, "learning_rate": 1.5174988403963054e-06, "loss": 0.2903830111026764, "step": 6729 }, { "epoch": 1.662960217445021, "grad_norm": 1.376738195480861, "learning_rate": 1.5153357113698897e-06, "loss": 0.2559332847595215, "step": 6730 }, { "epoch": 1.6632073140597974, "grad_norm": 1.6894365453819546, "learning_rate": 1.513173998772054e-06, "loss": 0.31917864084243774, "step": 6731 }, { "epoch": 1.6634544106745737, "grad_norm": 1.4074842466719033, "learning_rate": 1.511013702963674e-06, "loss": 0.23089225590229034, "step": 6732 }, { "epoch": 1.6637015072893502, "grad_norm": 1.668646819527422, "learning_rate": 1.5088548243053868e-06, "loss": 0.3295859098434448, "step": 6733 }, { "epoch": 1.6639486039041265, "grad_norm": 1.450635631114892, "learning_rate": 1.5066973631575966e-06, "loss": 0.3021670877933502, "step": 6734 }, { "epoch": 1.6641957005189028, "grad_norm": 1.4704312630183372, "learning_rate": 1.5045413198804692e-06, "loss": 0.3114663362503052, "step": 6735 }, { "epoch": 1.6644427971336793, "grad_norm": 1.4558253943516537, "learning_rate": 1.5023866948339383e-06, "loss": 0.2993495464324951, "step": 6736 }, { "epoch": 1.6646898937484558, "grad_norm": 1.3751855709053027, "learning_rate": 1.5002334883776915e-06, "loss": 0.277102530002594, "step": 6737 }, { "epoch": 1.664936990363232, "grad_norm": 1.4976568373152308, "learning_rate": 1.4980817008711857e-06, "loss": 0.26431238651275635, "step": 6738 }, { "epoch": 1.6651840869780083, "grad_norm": 1.7115534329133972, "learning_rate": 1.4959313326736424e-06, "loss": 0.31521689891815186, "step": 6739 }, { "epoch": 1.6654311835927849, "grad_norm": 1.5530105570049522, "learning_rate": 1.4937823841440435e-06, "loss": 0.27824482321739197, "step": 6740 }, { "epoch": 1.6656782802075611, "grad_norm": 1.373035446261348, "learning_rate": 1.4916348556411308e-06, "loss": 0.3021219074726105, "step": 6741 }, { "epoch": 1.6659253768223374, "grad_norm": 1.5467222533170064, "learning_rate": 1.4894887475234132e-06, "loss": 0.30941155552864075, "step": 6742 }, { "epoch": 1.666172473437114, "grad_norm": 1.4443547246757904, "learning_rate": 1.487344060149164e-06, "loss": 0.28298401832580566, "step": 6743 }, { "epoch": 1.6664195700518905, "grad_norm": 1.488860462831069, "learning_rate": 1.485200793876418e-06, "loss": 0.3131595849990845, "step": 6744 }, { "epoch": 1.6666666666666665, "grad_norm": 1.601869375312531, "learning_rate": 1.4830589490629666e-06, "loss": 0.2980513870716095, "step": 6745 }, { "epoch": 1.666913763281443, "grad_norm": 1.4274214489638828, "learning_rate": 1.480918526066375e-06, "loss": 0.28926882147789, "step": 6746 }, { "epoch": 1.6671608598962195, "grad_norm": 1.3618357425874115, "learning_rate": 1.4787795252439596e-06, "loss": 0.25761595368385315, "step": 6747 }, { "epoch": 1.6674079565109958, "grad_norm": 1.4686243954957763, "learning_rate": 1.4766419469528082e-06, "loss": 0.25054603815078735, "step": 6748 }, { "epoch": 1.6676550531257721, "grad_norm": 1.3778599805798133, "learning_rate": 1.4745057915497697e-06, "loss": 0.2514180839061737, "step": 6749 }, { "epoch": 1.6679021497405486, "grad_norm": 1.4217987974357598, "learning_rate": 1.4723710593914476e-06, "loss": 0.27757707238197327, "step": 6750 }, { "epoch": 1.668149246355325, "grad_norm": 1.6298337297353478, "learning_rate": 1.470237750834217e-06, "loss": 0.3296423554420471, "step": 6751 }, { "epoch": 1.6683963429701012, "grad_norm": 1.3881822745313055, "learning_rate": 1.468105866234213e-06, "loss": 0.3139492869377136, "step": 6752 }, { "epoch": 1.6686434395848777, "grad_norm": 1.5530159793742193, "learning_rate": 1.465975405947333e-06, "loss": 0.3137839138507843, "step": 6753 }, { "epoch": 1.6688905361996542, "grad_norm": 1.4956772655366102, "learning_rate": 1.4638463703292338e-06, "loss": 0.29346203804016113, "step": 6754 }, { "epoch": 1.6691376328144303, "grad_norm": 1.3896880932562046, "learning_rate": 1.4617187597353354e-06, "loss": 0.25937706232070923, "step": 6755 }, { "epoch": 1.6693847294292068, "grad_norm": 1.9759969591400321, "learning_rate": 1.4595925745208206e-06, "loss": 0.2748869061470032, "step": 6756 }, { "epoch": 1.6696318260439833, "grad_norm": 1.4433214289631773, "learning_rate": 1.4574678150406362e-06, "loss": 0.31039315462112427, "step": 6757 }, { "epoch": 1.6698789226587596, "grad_norm": 1.383214024956362, "learning_rate": 1.4553444816494921e-06, "loss": 0.24858254194259644, "step": 6758 }, { "epoch": 1.6701260192735359, "grad_norm": 1.5904875497116564, "learning_rate": 1.4532225747018503e-06, "loss": 0.30396923422813416, "step": 6759 }, { "epoch": 1.6703731158883124, "grad_norm": 1.7800481502553311, "learning_rate": 1.4511020945519482e-06, "loss": 0.24832972884178162, "step": 6760 }, { "epoch": 1.6706202125030887, "grad_norm": 1.4107125198287809, "learning_rate": 1.4489830415537732e-06, "loss": 0.28364062309265137, "step": 6761 }, { "epoch": 1.670867309117865, "grad_norm": 1.615642361620193, "learning_rate": 1.4468654160610819e-06, "loss": 0.28614646196365356, "step": 6762 }, { "epoch": 1.6711144057326415, "grad_norm": 1.53368223524525, "learning_rate": 1.4447492184273926e-06, "loss": 0.25964659452438354, "step": 6763 }, { "epoch": 1.671361502347418, "grad_norm": 1.420682166463921, "learning_rate": 1.4426344490059785e-06, "loss": 0.3139677047729492, "step": 6764 }, { "epoch": 1.6716085989621943, "grad_norm": 1.5620442980439335, "learning_rate": 1.4405211081498816e-06, "loss": 0.28331616520881653, "step": 6765 }, { "epoch": 1.6718556955769706, "grad_norm": 1.5606682372551202, "learning_rate": 1.4384091962119018e-06, "loss": 0.3062552809715271, "step": 6766 }, { "epoch": 1.672102792191747, "grad_norm": 1.368215443262436, "learning_rate": 1.436298713544605e-06, "loss": 0.2616499662399292, "step": 6767 }, { "epoch": 1.6723498888065234, "grad_norm": 1.5889471542682791, "learning_rate": 1.4341896605003114e-06, "loss": 0.2748693823814392, "step": 6768 }, { "epoch": 1.6725969854212996, "grad_norm": 1.3359943669901606, "learning_rate": 1.432082037431104e-06, "loss": 0.24336282908916473, "step": 6769 }, { "epoch": 1.6728440820360762, "grad_norm": 1.4984804191742922, "learning_rate": 1.4299758446888312e-06, "loss": 0.2789947986602783, "step": 6770 }, { "epoch": 1.6730911786508524, "grad_norm": 1.439448399304725, "learning_rate": 1.4278710826251008e-06, "loss": 0.28275081515312195, "step": 6771 }, { "epoch": 1.6733382752656287, "grad_norm": 1.428348247706026, "learning_rate": 1.4257677515912849e-06, "loss": 0.24826860427856445, "step": 6772 }, { "epoch": 1.6735853718804052, "grad_norm": 1.4205587639870711, "learning_rate": 1.423665851938506e-06, "loss": 0.27263253927230835, "step": 6773 }, { "epoch": 1.6738324684951817, "grad_norm": 1.5142894561791256, "learning_rate": 1.4215653840176624e-06, "loss": 0.3005787134170532, "step": 6774 }, { "epoch": 1.674079565109958, "grad_norm": 1.433632920033978, "learning_rate": 1.419466348179399e-06, "loss": 0.28340086340904236, "step": 6775 }, { "epoch": 1.6743266617247343, "grad_norm": 1.50894776733457, "learning_rate": 1.4173687447741312e-06, "loss": 0.2819329798221588, "step": 6776 }, { "epoch": 1.6745737583395108, "grad_norm": 1.392668742163239, "learning_rate": 1.4152725741520368e-06, "loss": 0.26226353645324707, "step": 6777 }, { "epoch": 1.6748208549542871, "grad_norm": 1.4661440951763132, "learning_rate": 1.4131778366630434e-06, "loss": 0.343256413936615, "step": 6778 }, { "epoch": 1.6750679515690634, "grad_norm": 1.5044132027079293, "learning_rate": 1.411084532656849e-06, "loss": 0.284795343875885, "step": 6779 }, { "epoch": 1.67531504818384, "grad_norm": 1.7800303639006794, "learning_rate": 1.4089926624829109e-06, "loss": 0.3264075219631195, "step": 6780 }, { "epoch": 1.6755621447986162, "grad_norm": 1.4787399459660127, "learning_rate": 1.4069022264904463e-06, "loss": 0.3416551947593689, "step": 6781 }, { "epoch": 1.6758092414133925, "grad_norm": 1.2948407990648385, "learning_rate": 1.4048132250284297e-06, "loss": 0.21448442339897156, "step": 6782 }, { "epoch": 1.676056338028169, "grad_norm": 1.4800407221513177, "learning_rate": 1.4027256584455972e-06, "loss": 0.30033770203590393, "step": 6783 }, { "epoch": 1.6763034346429455, "grad_norm": 1.4023584560607283, "learning_rate": 1.4006395270904505e-06, "loss": 0.2543056905269623, "step": 6784 }, { "epoch": 1.6765505312577218, "grad_norm": 1.55669855242776, "learning_rate": 1.3985548313112452e-06, "loss": 0.3248399794101715, "step": 6785 }, { "epoch": 1.676797627872498, "grad_norm": 1.4687685546865838, "learning_rate": 1.396471571456004e-06, "loss": 0.2693411111831665, "step": 6786 }, { "epoch": 1.6770447244872746, "grad_norm": 1.5581395318428704, "learning_rate": 1.394389747872501e-06, "loss": 0.31503772735595703, "step": 6787 }, { "epoch": 1.6772918211020509, "grad_norm": 1.4840300221531622, "learning_rate": 1.3923093609082815e-06, "loss": 0.277890682220459, "step": 6788 }, { "epoch": 1.6775389177168272, "grad_norm": 1.4417746406147935, "learning_rate": 1.390230410910638e-06, "loss": 0.2905416488647461, "step": 6789 }, { "epoch": 1.6777860143316037, "grad_norm": 1.5966308316214548, "learning_rate": 1.3881528982266346e-06, "loss": 0.3012067675590515, "step": 6790 }, { "epoch": 1.6780331109463802, "grad_norm": 1.4807309667892177, "learning_rate": 1.3860768232030908e-06, "loss": 0.29237353801727295, "step": 6791 }, { "epoch": 1.6782802075611563, "grad_norm": 1.3111415188472195, "learning_rate": 1.3840021861865837e-06, "loss": 0.26656967401504517, "step": 6792 }, { "epoch": 1.6785273041759328, "grad_norm": 1.5155091046174884, "learning_rate": 1.3819289875234554e-06, "loss": 0.28012120723724365, "step": 6793 }, { "epoch": 1.6787744007907093, "grad_norm": 1.325551069842971, "learning_rate": 1.3798572275598065e-06, "loss": 0.27780941128730774, "step": 6794 }, { "epoch": 1.6790214974054856, "grad_norm": 1.536876422866875, "learning_rate": 1.3777869066414918e-06, "loss": 0.28360849618911743, "step": 6795 }, { "epoch": 1.6792685940202619, "grad_norm": 1.3422051114412281, "learning_rate": 1.3757180251141356e-06, "loss": 0.24598261713981628, "step": 6796 }, { "epoch": 1.6795156906350384, "grad_norm": 1.3609167300943488, "learning_rate": 1.3736505833231105e-06, "loss": 0.26466742157936096, "step": 6797 }, { "epoch": 1.6797627872498146, "grad_norm": 1.4863796311328554, "learning_rate": 1.3715845816135597e-06, "loss": 0.2652093768119812, "step": 6798 }, { "epoch": 1.680009883864591, "grad_norm": 1.3246418543573542, "learning_rate": 1.3695200203303792e-06, "loss": 0.2786561846733093, "step": 6799 }, { "epoch": 1.6802569804793674, "grad_norm": 1.26539475996731, "learning_rate": 1.3674568998182303e-06, "loss": 0.2276887446641922, "step": 6800 }, { "epoch": 1.680504077094144, "grad_norm": 1.4950610359488512, "learning_rate": 1.3653952204215271e-06, "loss": 0.32595402002334595, "step": 6801 }, { "epoch": 1.6807511737089202, "grad_norm": 1.6555169103678642, "learning_rate": 1.3633349824844433e-06, "loss": 0.31117522716522217, "step": 6802 }, { "epoch": 1.6809982703236965, "grad_norm": 1.451457488971979, "learning_rate": 1.3612761863509173e-06, "loss": 0.2417975217103958, "step": 6803 }, { "epoch": 1.681245366938473, "grad_norm": 1.3088705973166819, "learning_rate": 1.3592188323646438e-06, "loss": 0.24257805943489075, "step": 6804 }, { "epoch": 1.6814924635532493, "grad_norm": 1.5332572267421174, "learning_rate": 1.3571629208690806e-06, "loss": 0.26978129148483276, "step": 6805 }, { "epoch": 1.6817395601680256, "grad_norm": 1.325585718053376, "learning_rate": 1.3551084522074353e-06, "loss": 0.24391987919807434, "step": 6806 }, { "epoch": 1.6819866567828021, "grad_norm": 1.4416930418997205, "learning_rate": 1.3530554267226848e-06, "loss": 0.2207891047000885, "step": 6807 }, { "epoch": 1.6822337533975784, "grad_norm": 1.3716636022250461, "learning_rate": 1.3510038447575613e-06, "loss": 0.2480926811695099, "step": 6808 }, { "epoch": 1.6824808500123547, "grad_norm": 1.3815589368804242, "learning_rate": 1.3489537066545522e-06, "loss": 0.2831500768661499, "step": 6809 }, { "epoch": 1.6827279466271312, "grad_norm": 1.5569748059342812, "learning_rate": 1.3469050127559101e-06, "loss": 0.26877349615097046, "step": 6810 }, { "epoch": 1.6829750432419077, "grad_norm": 1.4062827028156002, "learning_rate": 1.3448577634036409e-06, "loss": 0.21874278783798218, "step": 6811 }, { "epoch": 1.683222139856684, "grad_norm": 1.3626845446535565, "learning_rate": 1.342811958939514e-06, "loss": 0.2525160312652588, "step": 6812 }, { "epoch": 1.6834692364714603, "grad_norm": 1.4513882487871503, "learning_rate": 1.340767599705055e-06, "loss": 0.3276200294494629, "step": 6813 }, { "epoch": 1.6837163330862368, "grad_norm": 1.3271798926336884, "learning_rate": 1.3387246860415514e-06, "loss": 0.22969838976860046, "step": 6814 }, { "epoch": 1.683963429701013, "grad_norm": 1.3232595654245294, "learning_rate": 1.3366832182900458e-06, "loss": 0.21597617864608765, "step": 6815 }, { "epoch": 1.6842105263157894, "grad_norm": 2.811986362910175, "learning_rate": 1.3346431967913364e-06, "loss": 0.2954041063785553, "step": 6816 }, { "epoch": 1.684457622930566, "grad_norm": 1.3860475987308727, "learning_rate": 1.3326046218859879e-06, "loss": 0.26150429248809814, "step": 6817 }, { "epoch": 1.6847047195453422, "grad_norm": 1.5031646811806667, "learning_rate": 1.3305674939143186e-06, "loss": 0.2998274862766266, "step": 6818 }, { "epoch": 1.6849518161601185, "grad_norm": 1.3220767301661325, "learning_rate": 1.3285318132164093e-06, "loss": 0.2644274830818176, "step": 6819 }, { "epoch": 1.685198912774895, "grad_norm": 1.3812390157914631, "learning_rate": 1.3264975801320912e-06, "loss": 0.31760042905807495, "step": 6820 }, { "epoch": 1.6854460093896715, "grad_norm": 1.353263366893938, "learning_rate": 1.3244647950009615e-06, "loss": 0.25446754693984985, "step": 6821 }, { "epoch": 1.6856931060044478, "grad_norm": 1.4747349516484856, "learning_rate": 1.3224334581623754e-06, "loss": 0.2593305706977844, "step": 6822 }, { "epoch": 1.685940202619224, "grad_norm": 1.4405147803596274, "learning_rate": 1.3204035699554396e-06, "loss": 0.24727123975753784, "step": 6823 }, { "epoch": 1.6861872992340006, "grad_norm": 1.7597824563675217, "learning_rate": 1.3183751307190263e-06, "loss": 0.27266228199005127, "step": 6824 }, { "epoch": 1.6864343958487769, "grad_norm": 1.3399122629485007, "learning_rate": 1.31634814079176e-06, "loss": 0.3053780198097229, "step": 6825 }, { "epoch": 1.6866814924635531, "grad_norm": 1.3561730715327276, "learning_rate": 1.314322600512028e-06, "loss": 0.2374938726425171, "step": 6826 }, { "epoch": 1.6869285890783297, "grad_norm": 1.360387483154895, "learning_rate": 1.3122985102179742e-06, "loss": 0.2542724609375, "step": 6827 }, { "epoch": 1.6871756856931062, "grad_norm": 1.4278538015540536, "learning_rate": 1.3102758702475004e-06, "loss": 0.2777867615222931, "step": 6828 }, { "epoch": 1.6874227823078822, "grad_norm": 1.4950078575271097, "learning_rate": 1.3082546809382658e-06, "loss": 0.28697115182876587, "step": 6829 }, { "epoch": 1.6876698789226587, "grad_norm": 1.4089613079642012, "learning_rate": 1.3062349426276832e-06, "loss": 0.28541281819343567, "step": 6830 }, { "epoch": 1.6879169755374352, "grad_norm": 1.3451354015059407, "learning_rate": 1.3042166556529302e-06, "loss": 0.23219479620456696, "step": 6831 }, { "epoch": 1.6881640721522115, "grad_norm": 1.5115805008639496, "learning_rate": 1.3021998203509423e-06, "loss": 0.3147561550140381, "step": 6832 }, { "epoch": 1.6884111687669878, "grad_norm": 1.3681197575391548, "learning_rate": 1.3001844370584049e-06, "loss": 0.2885611653327942, "step": 6833 }, { "epoch": 1.6886582653817643, "grad_norm": 1.4914224160960763, "learning_rate": 1.2981705061117688e-06, "loss": 0.28092193603515625, "step": 6834 }, { "epoch": 1.6889053619965406, "grad_norm": 1.3024570891641085, "learning_rate": 1.2961580278472374e-06, "loss": 0.24296852946281433, "step": 6835 }, { "epoch": 1.689152458611317, "grad_norm": 1.4674164498001003, "learning_rate": 1.2941470026007764e-06, "loss": 0.2582273483276367, "step": 6836 }, { "epoch": 1.6893995552260934, "grad_norm": 1.484219364984869, "learning_rate": 1.2921374307081046e-06, "loss": 0.28809621930122375, "step": 6837 }, { "epoch": 1.68964665184087, "grad_norm": 1.5205311728923783, "learning_rate": 1.2901293125046976e-06, "loss": 0.3091210126876831, "step": 6838 }, { "epoch": 1.689893748455646, "grad_norm": 1.3810487209248983, "learning_rate": 1.2881226483257913e-06, "loss": 0.250121146440506, "step": 6839 }, { "epoch": 1.6901408450704225, "grad_norm": 1.3951322271642952, "learning_rate": 1.286117438506379e-06, "loss": 0.25239643454551697, "step": 6840 }, { "epoch": 1.690387941685199, "grad_norm": 1.330256882446873, "learning_rate": 1.2841136833812117e-06, "loss": 0.2443442940711975, "step": 6841 }, { "epoch": 1.6906350382999753, "grad_norm": 1.3936891995779688, "learning_rate": 1.282111383284793e-06, "loss": 0.26234039664268494, "step": 6842 }, { "epoch": 1.6908821349147516, "grad_norm": 1.5294290139642543, "learning_rate": 1.2801105385513879e-06, "loss": 0.31126946210861206, "step": 6843 }, { "epoch": 1.691129231529528, "grad_norm": 1.5346088374235338, "learning_rate": 1.2781111495150155e-06, "loss": 0.26726317405700684, "step": 6844 }, { "epoch": 1.6913763281443044, "grad_norm": 1.7384243662131857, "learning_rate": 1.2761132165094558e-06, "loss": 0.3266991972923279, "step": 6845 }, { "epoch": 1.6916234247590807, "grad_norm": 1.6474280087161297, "learning_rate": 1.274116739868243e-06, "loss": 0.27063998579978943, "step": 6846 }, { "epoch": 1.6918705213738572, "grad_norm": 1.4670609765749558, "learning_rate": 1.272121719924667e-06, "loss": 0.26040685176849365, "step": 6847 }, { "epoch": 1.6921176179886337, "grad_norm": 1.3306865555396976, "learning_rate": 1.2701281570117762e-06, "loss": 0.2427264004945755, "step": 6848 }, { "epoch": 1.69236471460341, "grad_norm": 1.5240645861170523, "learning_rate": 1.2681360514623775e-06, "loss": 0.294477641582489, "step": 6849 }, { "epoch": 1.6926118112181863, "grad_norm": 1.4337317825377507, "learning_rate": 1.2661454036090337e-06, "loss": 0.2663026452064514, "step": 6850 }, { "epoch": 1.6928589078329628, "grad_norm": 1.2958662753489116, "learning_rate": 1.2641562137840613e-06, "loss": 0.2352847307920456, "step": 6851 }, { "epoch": 1.693106004447739, "grad_norm": 1.487748061020772, "learning_rate": 1.2621684823195334e-06, "loss": 0.2702494263648987, "step": 6852 }, { "epoch": 1.6933531010625154, "grad_norm": 1.465643780626576, "learning_rate": 1.2601822095472849e-06, "loss": 0.2448142170906067, "step": 6853 }, { "epoch": 1.6936001976772919, "grad_norm": 1.708321457982202, "learning_rate": 1.2581973957989014e-06, "loss": 0.33964014053344727, "step": 6854 }, { "epoch": 1.6938472942920682, "grad_norm": 1.5746228099424942, "learning_rate": 1.256214041405731e-06, "loss": 0.30736011266708374, "step": 6855 }, { "epoch": 1.6940943909068444, "grad_norm": 1.530461472957817, "learning_rate": 1.2542321466988694e-06, "loss": 0.3191152513027191, "step": 6856 }, { "epoch": 1.694341487521621, "grad_norm": 1.5261240363137634, "learning_rate": 1.2522517120091804e-06, "loss": 0.2705995738506317, "step": 6857 }, { "epoch": 1.6945885841363975, "grad_norm": 1.2275952982824492, "learning_rate": 1.2502727376672708e-06, "loss": 0.19873923063278198, "step": 6858 }, { "epoch": 1.6948356807511737, "grad_norm": 1.7128991239460616, "learning_rate": 1.2482952240035129e-06, "loss": 0.27387624979019165, "step": 6859 }, { "epoch": 1.69508277736595, "grad_norm": 1.4471009030177862, "learning_rate": 1.2463191713480348e-06, "loss": 0.28333571553230286, "step": 6860 }, { "epoch": 1.6953298739807265, "grad_norm": 1.4023009655366645, "learning_rate": 1.2443445800307151e-06, "loss": 0.26677578687667847, "step": 6861 }, { "epoch": 1.6955769705955028, "grad_norm": 1.355995576225978, "learning_rate": 1.2423714503811923e-06, "loss": 0.24434450268745422, "step": 6862 }, { "epoch": 1.6958240672102791, "grad_norm": 1.495819402003307, "learning_rate": 1.2403997827288639e-06, "loss": 0.2689891755580902, "step": 6863 }, { "epoch": 1.6960711638250556, "grad_norm": 1.4998538187028223, "learning_rate": 1.2384295774028755e-06, "loss": 0.2572968006134033, "step": 6864 }, { "epoch": 1.6963182604398321, "grad_norm": 1.3240415021416774, "learning_rate": 1.236460834732135e-06, "loss": 0.24279406666755676, "step": 6865 }, { "epoch": 1.6965653570546082, "grad_norm": 1.4430614297124238, "learning_rate": 1.234493555045303e-06, "loss": 0.29468774795532227, "step": 6866 }, { "epoch": 1.6968124536693847, "grad_norm": 1.3975227183022383, "learning_rate": 1.2325277386707968e-06, "loss": 0.2759101390838623, "step": 6867 }, { "epoch": 1.6970595502841612, "grad_norm": 1.296107539602407, "learning_rate": 1.2305633859367894e-06, "loss": 0.21888861060142517, "step": 6868 }, { "epoch": 1.6973066468989375, "grad_norm": 1.493586625191896, "learning_rate": 1.2286004971712129e-06, "loss": 0.28482499718666077, "step": 6869 }, { "epoch": 1.6975537435137138, "grad_norm": 1.4115647575942374, "learning_rate": 1.2266390727017497e-06, "loss": 0.24363768100738525, "step": 6870 }, { "epoch": 1.6978008401284903, "grad_norm": 1.5586958948438916, "learning_rate": 1.2246791128558355e-06, "loss": 0.3167664408683777, "step": 6871 }, { "epoch": 1.6980479367432666, "grad_norm": 1.265749301767719, "learning_rate": 1.22272061796067e-06, "loss": 0.2270520031452179, "step": 6872 }, { "epoch": 1.6982950333580429, "grad_norm": 1.4021924859284671, "learning_rate": 1.2207635883432034e-06, "loss": 0.2527700960636139, "step": 6873 }, { "epoch": 1.6985421299728194, "grad_norm": 1.415858737449542, "learning_rate": 1.2188080243301438e-06, "loss": 0.25335556268692017, "step": 6874 }, { "epoch": 1.698789226587596, "grad_norm": 1.5082517108298237, "learning_rate": 1.2168539262479485e-06, "loss": 0.2594073712825775, "step": 6875 }, { "epoch": 1.699036323202372, "grad_norm": 1.6938266091013943, "learning_rate": 1.2149012944228355e-06, "loss": 0.29272621870040894, "step": 6876 }, { "epoch": 1.6992834198171485, "grad_norm": 1.5543414057620257, "learning_rate": 1.212950129180781e-06, "loss": 0.30222105979919434, "step": 6877 }, { "epoch": 1.699530516431925, "grad_norm": 1.3537449088745375, "learning_rate": 1.2110004308475064e-06, "loss": 0.2414250671863556, "step": 6878 }, { "epoch": 1.6997776130467013, "grad_norm": 1.802092157856936, "learning_rate": 1.2090521997485005e-06, "loss": 0.317679762840271, "step": 6879 }, { "epoch": 1.7000247096614776, "grad_norm": 1.3198599037602263, "learning_rate": 1.207105436208993e-06, "loss": 0.23050935566425323, "step": 6880 }, { "epoch": 1.700271806276254, "grad_norm": 1.5328363201259716, "learning_rate": 1.2051601405539815e-06, "loss": 0.2857234477996826, "step": 6881 }, { "epoch": 1.7005189028910304, "grad_norm": 1.4217674279140973, "learning_rate": 1.203216313108212e-06, "loss": 0.21618789434432983, "step": 6882 }, { "epoch": 1.7007659995058066, "grad_norm": 1.457672489145162, "learning_rate": 1.2012739541961883e-06, "loss": 0.25724685192108154, "step": 6883 }, { "epoch": 1.7010130961205832, "grad_norm": 1.6710573522170418, "learning_rate": 1.1993330641421675e-06, "loss": 0.2997399568557739, "step": 6884 }, { "epoch": 1.7012601927353597, "grad_norm": 1.3752041610419643, "learning_rate": 1.1973936432701572e-06, "loss": 0.2729799449443817, "step": 6885 }, { "epoch": 1.701507289350136, "grad_norm": 1.4002201479101197, "learning_rate": 1.1954556919039262e-06, "loss": 0.2472270131111145, "step": 6886 }, { "epoch": 1.7017543859649122, "grad_norm": 1.4931684403435088, "learning_rate": 1.1935192103669979e-06, "loss": 0.2600235939025879, "step": 6887 }, { "epoch": 1.7020014825796888, "grad_norm": 1.5727649482566988, "learning_rate": 1.1915841989826482e-06, "loss": 0.3002133369445801, "step": 6888 }, { "epoch": 1.702248579194465, "grad_norm": 1.3797620159143478, "learning_rate": 1.1896506580739042e-06, "loss": 0.21831703186035156, "step": 6889 }, { "epoch": 1.7024956758092413, "grad_norm": 1.4277506963400057, "learning_rate": 1.1877185879635534e-06, "loss": 0.3050020933151245, "step": 6890 }, { "epoch": 1.7027427724240178, "grad_norm": 1.2506838362936232, "learning_rate": 1.1857879889741374e-06, "loss": 0.2614010274410248, "step": 6891 }, { "epoch": 1.7029898690387941, "grad_norm": 1.3991011437481828, "learning_rate": 1.1838588614279445e-06, "loss": 0.23937636613845825, "step": 6892 }, { "epoch": 1.7032369656535704, "grad_norm": 1.493376014261966, "learning_rate": 1.1819312056470289e-06, "loss": 0.26206231117248535, "step": 6893 }, { "epoch": 1.703484062268347, "grad_norm": 1.5850111487070349, "learning_rate": 1.1800050219531878e-06, "loss": 0.3199000954627991, "step": 6894 }, { "epoch": 1.7037311588831234, "grad_norm": 1.4375319812914806, "learning_rate": 1.1780803106679818e-06, "loss": 0.2650502920150757, "step": 6895 }, { "epoch": 1.7039782554978997, "grad_norm": 1.618115325414671, "learning_rate": 1.1761570721127191e-06, "loss": 0.3229297399520874, "step": 6896 }, { "epoch": 1.704225352112676, "grad_norm": 1.4120887724951068, "learning_rate": 1.17423530660847e-06, "loss": 0.2818106412887573, "step": 6897 }, { "epoch": 1.7044724487274525, "grad_norm": 1.3046763995199264, "learning_rate": 1.1723150144760486e-06, "loss": 0.2746695280075073, "step": 6898 }, { "epoch": 1.7047195453422288, "grad_norm": 1.5659371217184117, "learning_rate": 1.1703961960360288e-06, "loss": 0.30414631962776184, "step": 6899 }, { "epoch": 1.704966641957005, "grad_norm": 1.5378583514088782, "learning_rate": 1.1684788516087386e-06, "loss": 0.32269996404647827, "step": 6900 }, { "epoch": 1.7052137385717816, "grad_norm": 1.4082705811113505, "learning_rate": 1.1665629815142598e-06, "loss": 0.2104026973247528, "step": 6901 }, { "epoch": 1.705460835186558, "grad_norm": 1.6219627694079075, "learning_rate": 1.1646485860724288e-06, "loss": 0.3193133473396301, "step": 6902 }, { "epoch": 1.7057079318013342, "grad_norm": 1.3420269536856633, "learning_rate": 1.16273566560283e-06, "loss": 0.2493172287940979, "step": 6903 }, { "epoch": 1.7059550284161107, "grad_norm": 1.4820644394027007, "learning_rate": 1.1608242204248098e-06, "loss": 0.25193074345588684, "step": 6904 }, { "epoch": 1.7062021250308872, "grad_norm": 1.6327680859366072, "learning_rate": 1.1589142508574657e-06, "loss": 0.32295727729797363, "step": 6905 }, { "epoch": 1.7064492216456635, "grad_norm": 1.4255928273511158, "learning_rate": 1.1570057572196436e-06, "loss": 0.26510387659072876, "step": 6906 }, { "epoch": 1.7066963182604398, "grad_norm": 1.6010010517609528, "learning_rate": 1.1550987398299508e-06, "loss": 0.3316687345504761, "step": 6907 }, { "epoch": 1.7069434148752163, "grad_norm": 1.2644277331935125, "learning_rate": 1.1531931990067403e-06, "loss": 0.21197021007537842, "step": 6908 }, { "epoch": 1.7071905114899926, "grad_norm": 1.4098338470840053, "learning_rate": 1.1512891350681266e-06, "loss": 0.29664772748947144, "step": 6909 }, { "epoch": 1.7074376081047689, "grad_norm": 1.3608110536449907, "learning_rate": 1.1493865483319711e-06, "loss": 0.2076333463191986, "step": 6910 }, { "epoch": 1.7076847047195454, "grad_norm": 1.5065793652880255, "learning_rate": 1.1474854391158962e-06, "loss": 0.2752513885498047, "step": 6911 }, { "epoch": 1.7079318013343219, "grad_norm": 1.4597480524791906, "learning_rate": 1.1455858077372695e-06, "loss": 0.28399693965911865, "step": 6912 }, { "epoch": 1.708178897949098, "grad_norm": 1.413722778137099, "learning_rate": 1.1436876545132125e-06, "loss": 0.2696390450000763, "step": 6913 }, { "epoch": 1.7084259945638745, "grad_norm": 1.3209587959089402, "learning_rate": 1.1417909797606053e-06, "loss": 0.2735617458820343, "step": 6914 }, { "epoch": 1.708673091178651, "grad_norm": 1.5010922768775954, "learning_rate": 1.139895783796079e-06, "loss": 0.2632818818092346, "step": 6915 }, { "epoch": 1.7089201877934272, "grad_norm": 1.5865474646503996, "learning_rate": 1.138002066936018e-06, "loss": 0.3133384585380554, "step": 6916 }, { "epoch": 1.7091672844082035, "grad_norm": 1.2938404874555267, "learning_rate": 1.1361098294965555e-06, "loss": 0.2478538453578949, "step": 6917 }, { "epoch": 1.70941438102298, "grad_norm": 1.4677610619263723, "learning_rate": 1.1342190717935853e-06, "loss": 0.2901017665863037, "step": 6918 }, { "epoch": 1.7096614776377563, "grad_norm": 1.5122407689348394, "learning_rate": 1.1323297941427492e-06, "loss": 0.316777765750885, "step": 6919 }, { "epoch": 1.7099085742525326, "grad_norm": 1.5621812820422327, "learning_rate": 1.1304419968594427e-06, "loss": 0.21682852506637573, "step": 6920 }, { "epoch": 1.7101556708673091, "grad_norm": 1.334171886806009, "learning_rate": 1.1285556802588127e-06, "loss": 0.22904005646705627, "step": 6921 }, { "epoch": 1.7104027674820856, "grad_norm": 1.4519004381672185, "learning_rate": 1.1266708446557607e-06, "loss": 0.2498546540737152, "step": 6922 }, { "epoch": 1.710649864096862, "grad_norm": 1.3673603282112732, "learning_rate": 1.1247874903649437e-06, "loss": 0.2652742862701416, "step": 6923 }, { "epoch": 1.7108969607116382, "grad_norm": 1.3608733755328362, "learning_rate": 1.122905617700768e-06, "loss": 0.26589280366897583, "step": 6924 }, { "epoch": 1.7111440573264147, "grad_norm": 1.5537897473956168, "learning_rate": 1.1210252269773902e-06, "loss": 0.3565906286239624, "step": 6925 }, { "epoch": 1.711391153941191, "grad_norm": 1.4126636976880609, "learning_rate": 1.119146318508726e-06, "loss": 0.29084935784339905, "step": 6926 }, { "epoch": 1.7116382505559673, "grad_norm": 1.4770020982347287, "learning_rate": 1.1172688926084374e-06, "loss": 0.25140485167503357, "step": 6927 }, { "epoch": 1.7118853471707438, "grad_norm": 1.5655169996052407, "learning_rate": 1.1153929495899418e-06, "loss": 0.24697132408618927, "step": 6928 }, { "epoch": 1.71213244378552, "grad_norm": 1.50595074144522, "learning_rate": 1.113518489766412e-06, "loss": 0.23932281136512756, "step": 6929 }, { "epoch": 1.7123795404002964, "grad_norm": 1.6681447663343663, "learning_rate": 1.1116455134507665e-06, "loss": 0.33571571111679077, "step": 6930 }, { "epoch": 1.712626637015073, "grad_norm": 1.661239351080029, "learning_rate": 1.1097740209556795e-06, "loss": 0.2667762339115143, "step": 6931 }, { "epoch": 1.7128737336298494, "grad_norm": 1.5177769557246836, "learning_rate": 1.1079040125935824e-06, "loss": 0.25087958574295044, "step": 6932 }, { "epoch": 1.7131208302446257, "grad_norm": 1.4082874166712949, "learning_rate": 1.1060354886766477e-06, "loss": 0.29704880714416504, "step": 6933 }, { "epoch": 1.713367926859402, "grad_norm": 1.759886543917882, "learning_rate": 1.1041684495168127e-06, "loss": 0.3309004306793213, "step": 6934 }, { "epoch": 1.7136150234741785, "grad_norm": 1.572850823189218, "learning_rate": 1.1023028954257552e-06, "loss": 0.32410120964050293, "step": 6935 }, { "epoch": 1.7138621200889548, "grad_norm": 1.3438278223834208, "learning_rate": 1.1004388267149124e-06, "loss": 0.21922260522842407, "step": 6936 }, { "epoch": 1.714109216703731, "grad_norm": 1.3580177963116635, "learning_rate": 1.0985762436954728e-06, "loss": 0.25854989886283875, "step": 6937 }, { "epoch": 1.7143563133185076, "grad_norm": 1.2747361619247584, "learning_rate": 1.0967151466783776e-06, "loss": 0.21877816319465637, "step": 6938 }, { "epoch": 1.7146034099332839, "grad_norm": 1.32886675714792, "learning_rate": 1.094855535974314e-06, "loss": 0.2777125835418701, "step": 6939 }, { "epoch": 1.7148505065480601, "grad_norm": 1.4396684295597924, "learning_rate": 1.0929974118937258e-06, "loss": 0.3097342252731323, "step": 6940 }, { "epoch": 1.7150976031628367, "grad_norm": 1.6373926704654094, "learning_rate": 1.0911407747468083e-06, "loss": 0.3343542814254761, "step": 6941 }, { "epoch": 1.7153446997776132, "grad_norm": 1.5123452089609726, "learning_rate": 1.0892856248435091e-06, "loss": 0.291355699300766, "step": 6942 }, { "epoch": 1.7155917963923895, "grad_norm": 1.3799993192335567, "learning_rate": 1.087431962493528e-06, "loss": 0.29196006059646606, "step": 6943 }, { "epoch": 1.7158388930071657, "grad_norm": 1.606402735971245, "learning_rate": 1.0855797880063112e-06, "loss": 0.3447888493537903, "step": 6944 }, { "epoch": 1.7160859896219423, "grad_norm": 1.4103662422953764, "learning_rate": 1.0837291016910634e-06, "loss": 0.2927798926830292, "step": 6945 }, { "epoch": 1.7163330862367185, "grad_norm": 1.4715789742317622, "learning_rate": 1.0818799038567385e-06, "loss": 0.2860984802246094, "step": 6946 }, { "epoch": 1.7165801828514948, "grad_norm": 1.5549713664718046, "learning_rate": 1.0800321948120386e-06, "loss": 0.3077791929244995, "step": 6947 }, { "epoch": 1.7168272794662713, "grad_norm": 1.4607799858115833, "learning_rate": 1.0781859748654223e-06, "loss": 0.28150859475135803, "step": 6948 }, { "epoch": 1.7170743760810478, "grad_norm": 1.466817496906268, "learning_rate": 1.0763412443250943e-06, "loss": 0.269476056098938, "step": 6949 }, { "epoch": 1.717321472695824, "grad_norm": 1.4958319709629981, "learning_rate": 1.074498003499016e-06, "loss": 0.26868608593940735, "step": 6950 }, { "epoch": 1.7175685693106004, "grad_norm": 1.4404200111025247, "learning_rate": 1.072656252694898e-06, "loss": 0.2720656394958496, "step": 6951 }, { "epoch": 1.717815665925377, "grad_norm": 1.406297718904533, "learning_rate": 1.070815992220202e-06, "loss": 0.32924777269363403, "step": 6952 }, { "epoch": 1.7180627625401532, "grad_norm": 1.3959984006521573, "learning_rate": 1.0689772223821415e-06, "loss": 0.25644123554229736, "step": 6953 }, { "epoch": 1.7183098591549295, "grad_norm": 1.5517591152936758, "learning_rate": 1.0671399434876761e-06, "loss": 0.3178409934043884, "step": 6954 }, { "epoch": 1.718556955769706, "grad_norm": 1.4394416463323174, "learning_rate": 1.065304155843524e-06, "loss": 0.2855723798274994, "step": 6955 }, { "epoch": 1.7188040523844823, "grad_norm": 1.4548685741669058, "learning_rate": 1.063469859756151e-06, "loss": 0.2781597971916199, "step": 6956 }, { "epoch": 1.7190511489992586, "grad_norm": 1.608025317230358, "learning_rate": 1.0616370555317768e-06, "loss": 0.3190341293811798, "step": 6957 }, { "epoch": 1.719298245614035, "grad_norm": 1.6732462267762904, "learning_rate": 1.0598057434763643e-06, "loss": 0.25591954588890076, "step": 6958 }, { "epoch": 1.7195453422288116, "grad_norm": 1.647611734253988, "learning_rate": 1.0579759238956356e-06, "loss": 0.33622097969055176, "step": 6959 }, { "epoch": 1.7197924388435877, "grad_norm": 1.5142405560248908, "learning_rate": 1.0561475970950618e-06, "loss": 0.32076331973075867, "step": 6960 }, { "epoch": 1.7200395354583642, "grad_norm": 1.472252458511376, "learning_rate": 1.0543207633798592e-06, "loss": 0.2792752981185913, "step": 6961 }, { "epoch": 1.7202866320731407, "grad_norm": 1.5785666279809538, "learning_rate": 1.0524954230550043e-06, "loss": 0.27414587140083313, "step": 6962 }, { "epoch": 1.720533728687917, "grad_norm": 1.481340431786107, "learning_rate": 1.0506715764252152e-06, "loss": 0.2650974988937378, "step": 6963 }, { "epoch": 1.7207808253026933, "grad_norm": 1.5518285264701297, "learning_rate": 1.0488492237949643e-06, "loss": 0.2953152656555176, "step": 6964 }, { "epoch": 1.7210279219174698, "grad_norm": 1.393497803342969, "learning_rate": 1.0470283654684776e-06, "loss": 0.32170912623405457, "step": 6965 }, { "epoch": 1.721275018532246, "grad_norm": 1.5471835505535891, "learning_rate": 1.0452090017497295e-06, "loss": 0.26303738355636597, "step": 6966 }, { "epoch": 1.7215221151470224, "grad_norm": 1.5244202912605704, "learning_rate": 1.0433911329424428e-06, "loss": 0.29423612356185913, "step": 6967 }, { "epoch": 1.7217692117617989, "grad_norm": 1.5780529030643438, "learning_rate": 1.0415747593500903e-06, "loss": 0.2848469018936157, "step": 6968 }, { "epoch": 1.7220163083765754, "grad_norm": 1.3260457007215738, "learning_rate": 1.0397598812758992e-06, "loss": 0.2309911847114563, "step": 6969 }, { "epoch": 1.7222634049913517, "grad_norm": 1.7627531517067583, "learning_rate": 1.037946499022844e-06, "loss": 0.2655542492866516, "step": 6970 }, { "epoch": 1.722510501606128, "grad_norm": 1.5640980827973998, "learning_rate": 1.0361346128936523e-06, "loss": 0.24579942226409912, "step": 6971 }, { "epoch": 1.7227575982209045, "grad_norm": 1.3845189026358675, "learning_rate": 1.0343242231907979e-06, "loss": 0.2895998954772949, "step": 6972 }, { "epoch": 1.7230046948356808, "grad_norm": 1.267780915157656, "learning_rate": 1.0325153302165082e-06, "loss": 0.23904335498809814, "step": 6973 }, { "epoch": 1.723251791450457, "grad_norm": 1.3650637041068387, "learning_rate": 1.030707934272761e-06, "loss": 0.2510824501514435, "step": 6974 }, { "epoch": 1.7234988880652335, "grad_norm": 1.5441897088029162, "learning_rate": 1.028902035661279e-06, "loss": 0.3003690838813782, "step": 6975 }, { "epoch": 1.7237459846800098, "grad_norm": 1.5031739848929433, "learning_rate": 1.0270976346835426e-06, "loss": 0.32358765602111816, "step": 6976 }, { "epoch": 1.7239930812947861, "grad_norm": 1.3369864913275986, "learning_rate": 1.0252947316407747e-06, "loss": 0.2627412974834442, "step": 6977 }, { "epoch": 1.7242401779095626, "grad_norm": 1.4303167706040412, "learning_rate": 1.023493326833952e-06, "loss": 0.2783803939819336, "step": 6978 }, { "epoch": 1.7244872745243391, "grad_norm": 1.2791306347389837, "learning_rate": 1.0216934205638029e-06, "loss": 0.22408980131149292, "step": 6979 }, { "epoch": 1.7247343711391154, "grad_norm": 1.3761574717873768, "learning_rate": 1.019895013130805e-06, "loss": 0.24802619218826294, "step": 6980 }, { "epoch": 1.7249814677538917, "grad_norm": 1.6954886303583372, "learning_rate": 1.0180981048351812e-06, "loss": 0.26768097281455994, "step": 6981 }, { "epoch": 1.7252285643686682, "grad_norm": 1.4868976039901665, "learning_rate": 1.0163026959769062e-06, "loss": 0.28046077489852905, "step": 6982 }, { "epoch": 1.7254756609834445, "grad_norm": 1.5450372318477759, "learning_rate": 1.0145087868557058e-06, "loss": 0.2919427752494812, "step": 6983 }, { "epoch": 1.7257227575982208, "grad_norm": 1.4557142826921958, "learning_rate": 1.012716377771057e-06, "loss": 0.2533411979675293, "step": 6984 }, { "epoch": 1.7259698542129973, "grad_norm": 1.3470696062638452, "learning_rate": 1.0109254690221847e-06, "loss": 0.26078110933303833, "step": 6985 }, { "epoch": 1.7262169508277738, "grad_norm": 1.563647233874269, "learning_rate": 1.009136060908059e-06, "loss": 0.27244311571121216, "step": 6986 }, { "epoch": 1.7264640474425499, "grad_norm": 1.5940858888143363, "learning_rate": 1.0073481537274066e-06, "loss": 0.3001652956008911, "step": 6987 }, { "epoch": 1.7267111440573264, "grad_norm": 1.5606567138113037, "learning_rate": 1.0055617477787005e-06, "loss": 0.2813352346420288, "step": 6988 }, { "epoch": 1.726958240672103, "grad_norm": 1.5102599538819839, "learning_rate": 1.0037768433601614e-06, "loss": 0.2524513900279999, "step": 6989 }, { "epoch": 1.7272053372868792, "grad_norm": 1.374850068716971, "learning_rate": 1.0019934407697628e-06, "loss": 0.23375120759010315, "step": 6990 }, { "epoch": 1.7274524339016555, "grad_norm": 1.5127997763343946, "learning_rate": 1.0002115403052238e-06, "loss": 0.24856358766555786, "step": 6991 }, { "epoch": 1.727699530516432, "grad_norm": 1.4154427295773537, "learning_rate": 9.984311422640148e-07, "loss": 0.22540590167045593, "step": 6992 }, { "epoch": 1.7279466271312083, "grad_norm": 1.5405646839836176, "learning_rate": 9.96652246943356e-07, "loss": 0.3064621686935425, "step": 6993 }, { "epoch": 1.7281937237459846, "grad_norm": 1.5116089992985797, "learning_rate": 9.948748546402166e-07, "loss": 0.2746814489364624, "step": 6994 }, { "epoch": 1.728440820360761, "grad_norm": 1.395668028126461, "learning_rate": 9.930989656513146e-07, "loss": 0.2855615019798279, "step": 6995 }, { "epoch": 1.7286879169755376, "grad_norm": 1.4353910022678276, "learning_rate": 9.91324580273113e-07, "loss": 0.2697239816188812, "step": 6996 }, { "epoch": 1.7289350135903137, "grad_norm": 1.3253817941443313, "learning_rate": 9.895516988018305e-07, "loss": 0.2517755627632141, "step": 6997 }, { "epoch": 1.7291821102050902, "grad_norm": 1.5545203624843567, "learning_rate": 9.877803215334302e-07, "loss": 0.2806609869003296, "step": 6998 }, { "epoch": 1.7294292068198667, "grad_norm": 1.5287442937138296, "learning_rate": 9.86010448763628e-07, "loss": 0.26868969202041626, "step": 6999 }, { "epoch": 1.729676303434643, "grad_norm": 1.679722956796068, "learning_rate": 9.842420807878838e-07, "loss": 0.3168179988861084, "step": 7000 }, { "epoch": 1.7299234000494192, "grad_norm": 1.2655346978825488, "learning_rate": 9.824752179014086e-07, "loss": 0.25359728932380676, "step": 7001 }, { "epoch": 1.7301704966641958, "grad_norm": 1.5579294773648318, "learning_rate": 9.807098603991649e-07, "loss": 0.3333853483200073, "step": 7002 }, { "epoch": 1.730417593278972, "grad_norm": 1.338733160218028, "learning_rate": 9.789460085758573e-07, "loss": 0.25967302918434143, "step": 7003 }, { "epoch": 1.7306646898937483, "grad_norm": 1.3763454044244634, "learning_rate": 9.771836627259479e-07, "loss": 0.23997409641742706, "step": 7004 }, { "epoch": 1.7309117865085248, "grad_norm": 1.4641931068564011, "learning_rate": 9.75422823143637e-07, "loss": 0.26985466480255127, "step": 7005 }, { "epoch": 1.7311588831233014, "grad_norm": 1.4627117928042936, "learning_rate": 9.736634901228814e-07, "loss": 0.2465956211090088, "step": 7006 }, { "epoch": 1.7314059797380776, "grad_norm": 1.422332047079842, "learning_rate": 9.719056639573832e-07, "loss": 0.23249295353889465, "step": 7007 }, { "epoch": 1.731653076352854, "grad_norm": 1.5379050745916085, "learning_rate": 9.701493449405997e-07, "loss": 0.26134249567985535, "step": 7008 }, { "epoch": 1.7319001729676304, "grad_norm": 1.415142093653771, "learning_rate": 9.683945333657197e-07, "loss": 0.2527291178703308, "step": 7009 }, { "epoch": 1.7321472695824067, "grad_norm": 1.5668196746967507, "learning_rate": 9.666412295256978e-07, "loss": 0.2653007507324219, "step": 7010 }, { "epoch": 1.732394366197183, "grad_norm": 1.4747102525143454, "learning_rate": 9.648894337132297e-07, "loss": 0.2691558003425598, "step": 7011 }, { "epoch": 1.7326414628119595, "grad_norm": 1.4845746225238374, "learning_rate": 9.631391462207606e-07, "loss": 0.32042160630226135, "step": 7012 }, { "epoch": 1.7328885594267358, "grad_norm": 1.3015403260970282, "learning_rate": 9.6139036734048e-07, "loss": 0.2467983067035675, "step": 7013 }, { "epoch": 1.733135656041512, "grad_norm": 1.3891652343790746, "learning_rate": 9.596430973643312e-07, "loss": 0.2669774293899536, "step": 7014 }, { "epoch": 1.7333827526562886, "grad_norm": 1.2969782496403273, "learning_rate": 9.578973365840039e-07, "loss": 0.23802441358566284, "step": 7015 }, { "epoch": 1.7336298492710651, "grad_norm": 1.2954208063295727, "learning_rate": 9.56153085290933e-07, "loss": 0.23575998842716217, "step": 7016 }, { "epoch": 1.7338769458858414, "grad_norm": 1.529832866610951, "learning_rate": 9.544103437763064e-07, "loss": 0.24177321791648865, "step": 7017 }, { "epoch": 1.7341240425006177, "grad_norm": 1.4373102534890123, "learning_rate": 9.526691123310527e-07, "loss": 0.2990899682044983, "step": 7018 }, { "epoch": 1.7343711391153942, "grad_norm": 1.5345693460395042, "learning_rate": 9.509293912458562e-07, "loss": 0.23006945848464966, "step": 7019 }, { "epoch": 1.7346182357301705, "grad_norm": 1.4369978195462998, "learning_rate": 9.491911808111443e-07, "loss": 0.21100321412086487, "step": 7020 }, { "epoch": 1.7348653323449468, "grad_norm": 1.4451035729634072, "learning_rate": 9.474544813170971e-07, "loss": 0.29261428117752075, "step": 7021 }, { "epoch": 1.7351124289597233, "grad_norm": 1.4114440072948686, "learning_rate": 9.457192930536352e-07, "loss": 0.27242210507392883, "step": 7022 }, { "epoch": 1.7353595255744998, "grad_norm": 1.5435992671532344, "learning_rate": 9.4398561631043e-07, "loss": 0.31334203481674194, "step": 7023 }, { "epoch": 1.7356066221892759, "grad_norm": 1.4252156401182203, "learning_rate": 9.422534513769021e-07, "loss": 0.25711700320243835, "step": 7024 }, { "epoch": 1.7358537188040524, "grad_norm": 1.469723823635226, "learning_rate": 9.405227985422194e-07, "loss": 0.30853039026260376, "step": 7025 }, { "epoch": 1.7361008154188289, "grad_norm": 1.3652964241630063, "learning_rate": 9.387936580952995e-07, "loss": 0.24680203199386597, "step": 7026 }, { "epoch": 1.7363479120336052, "grad_norm": 1.5931251340969923, "learning_rate": 9.370660303248003e-07, "loss": 0.2646064758300781, "step": 7027 }, { "epoch": 1.7365950086483815, "grad_norm": 1.6456876687488031, "learning_rate": 9.353399155191334e-07, "loss": 0.30191540718078613, "step": 7028 }, { "epoch": 1.736842105263158, "grad_norm": 1.661117128637455, "learning_rate": 9.336153139664584e-07, "loss": 0.3063533902168274, "step": 7029 }, { "epoch": 1.7370892018779343, "grad_norm": 1.5784343449681126, "learning_rate": 9.318922259546759e-07, "loss": 0.31526827812194824, "step": 7030 }, { "epoch": 1.7373362984927105, "grad_norm": 1.4666304622115653, "learning_rate": 9.301706517714426e-07, "loss": 0.22025175392627716, "step": 7031 }, { "epoch": 1.737583395107487, "grad_norm": 1.7568016672388271, "learning_rate": 9.284505917041531e-07, "loss": 0.30093643069267273, "step": 7032 }, { "epoch": 1.7378304917222636, "grad_norm": 1.7105699432968355, "learning_rate": 9.267320460399565e-07, "loss": 0.31051915884017944, "step": 7033 }, { "epoch": 1.7380775883370396, "grad_norm": 1.4041564363682209, "learning_rate": 9.250150150657455e-07, "loss": 0.2892853617668152, "step": 7034 }, { "epoch": 1.7383246849518161, "grad_norm": 1.6308907116372882, "learning_rate": 9.232994990681643e-07, "loss": 0.2948622405529022, "step": 7035 }, { "epoch": 1.7385717815665926, "grad_norm": 1.3113306631848127, "learning_rate": 9.215854983335981e-07, "loss": 0.26532214879989624, "step": 7036 }, { "epoch": 1.738818878181369, "grad_norm": 1.6482964359293502, "learning_rate": 9.198730131481804e-07, "loss": 0.33185243606567383, "step": 7037 }, { "epoch": 1.7390659747961452, "grad_norm": 1.5801300810387569, "learning_rate": 9.181620437977945e-07, "loss": 0.3021954894065857, "step": 7038 }, { "epoch": 1.7393130714109217, "grad_norm": 1.319757548115974, "learning_rate": 9.16452590568071e-07, "loss": 0.23251134157180786, "step": 7039 }, { "epoch": 1.739560168025698, "grad_norm": 1.4902755129966716, "learning_rate": 9.147446537443849e-07, "loss": 0.2658090591430664, "step": 7040 }, { "epoch": 1.7398072646404743, "grad_norm": 1.4198905792647194, "learning_rate": 9.130382336118581e-07, "loss": 0.2616504430770874, "step": 7041 }, { "epoch": 1.7400543612552508, "grad_norm": 1.3747331047347595, "learning_rate": 9.113333304553596e-07, "loss": 0.23132388293743134, "step": 7042 }, { "epoch": 1.7403014578700273, "grad_norm": 1.4538992629125596, "learning_rate": 9.096299445595091e-07, "loss": 0.2680266499519348, "step": 7043 }, { "epoch": 1.7405485544848036, "grad_norm": 1.3977357361771228, "learning_rate": 9.079280762086662e-07, "loss": 0.2617233395576477, "step": 7044 }, { "epoch": 1.74079565109958, "grad_norm": 1.4002213724165724, "learning_rate": 9.062277256869423e-07, "loss": 0.27207720279693604, "step": 7045 }, { "epoch": 1.7410427477143564, "grad_norm": 1.4597707742499282, "learning_rate": 9.045288932781915e-07, "loss": 0.26453283429145813, "step": 7046 }, { "epoch": 1.7412898443291327, "grad_norm": 1.4350805891760696, "learning_rate": 9.028315792660169e-07, "loss": 0.24556319415569305, "step": 7047 }, { "epoch": 1.741536940943909, "grad_norm": 1.5335325963346629, "learning_rate": 9.011357839337698e-07, "loss": 0.32802218198776245, "step": 7048 }, { "epoch": 1.7417840375586855, "grad_norm": 1.5145795775050326, "learning_rate": 8.994415075645479e-07, "loss": 0.2628435492515564, "step": 7049 }, { "epoch": 1.7420311341734618, "grad_norm": 1.440021471590695, "learning_rate": 8.977487504411897e-07, "loss": 0.24684709310531616, "step": 7050 }, { "epoch": 1.742278230788238, "grad_norm": 1.46936282259678, "learning_rate": 8.960575128462833e-07, "loss": 0.28604769706726074, "step": 7051 }, { "epoch": 1.7425253274030146, "grad_norm": 1.5211224963619552, "learning_rate": 8.943677950621655e-07, "loss": 0.28017276525497437, "step": 7052 }, { "epoch": 1.742772424017791, "grad_norm": 1.3429059278170834, "learning_rate": 8.926795973709168e-07, "loss": 0.25432080030441284, "step": 7053 }, { "epoch": 1.7430195206325674, "grad_norm": 1.2701401937724157, "learning_rate": 8.909929200543666e-07, "loss": 0.2741658389568329, "step": 7054 }, { "epoch": 1.7432666172473437, "grad_norm": 1.618236667612114, "learning_rate": 8.893077633940861e-07, "loss": 0.30927833914756775, "step": 7055 }, { "epoch": 1.7435137138621202, "grad_norm": 1.544430986669348, "learning_rate": 8.876241276713959e-07, "loss": 0.25953003764152527, "step": 7056 }, { "epoch": 1.7437608104768965, "grad_norm": 1.3329908772495975, "learning_rate": 8.859420131673635e-07, "loss": 0.2608126401901245, "step": 7057 }, { "epoch": 1.7440079070916727, "grad_norm": 1.4637750201629471, "learning_rate": 8.842614201627975e-07, "loss": 0.2970116138458252, "step": 7058 }, { "epoch": 1.7442550037064493, "grad_norm": 1.440942833554796, "learning_rate": 8.825823489382601e-07, "loss": 0.23722106218338013, "step": 7059 }, { "epoch": 1.7445021003212255, "grad_norm": 1.4281309827561293, "learning_rate": 8.809047997740505e-07, "loss": 0.26075464487075806, "step": 7060 }, { "epoch": 1.7447491969360018, "grad_norm": 1.4235529461482381, "learning_rate": 8.792287729502213e-07, "loss": 0.2897312343120575, "step": 7061 }, { "epoch": 1.7449962935507783, "grad_norm": 1.3826954933230404, "learning_rate": 8.775542687465677e-07, "loss": 0.2589264512062073, "step": 7062 }, { "epoch": 1.7452433901655549, "grad_norm": 1.5226173977731219, "learning_rate": 8.758812874426326e-07, "loss": 0.338820219039917, "step": 7063 }, { "epoch": 1.7454904867803311, "grad_norm": 1.6523518787643756, "learning_rate": 8.742098293177026e-07, "loss": 0.28614282608032227, "step": 7064 }, { "epoch": 1.7457375833951074, "grad_norm": 1.2969321472556816, "learning_rate": 8.725398946508067e-07, "loss": 0.2583371698856354, "step": 7065 }, { "epoch": 1.745984680009884, "grad_norm": 1.2798358201505078, "learning_rate": 8.708714837207288e-07, "loss": 0.2552720010280609, "step": 7066 }, { "epoch": 1.7462317766246602, "grad_norm": 1.5466747740103526, "learning_rate": 8.692045968059892e-07, "loss": 0.3296077251434326, "step": 7067 }, { "epoch": 1.7464788732394365, "grad_norm": 1.4641144419092245, "learning_rate": 8.675392341848621e-07, "loss": 0.26922154426574707, "step": 7068 }, { "epoch": 1.746725969854213, "grad_norm": 1.391807020121466, "learning_rate": 8.658753961353594e-07, "loss": 0.24568891525268555, "step": 7069 }, { "epoch": 1.7469730664689895, "grad_norm": 1.3563477847493801, "learning_rate": 8.642130829352425e-07, "loss": 0.2520400583744049, "step": 7070 }, { "epoch": 1.7472201630837656, "grad_norm": 1.462046227493438, "learning_rate": 8.625522948620202e-07, "loss": 0.25285691022872925, "step": 7071 }, { "epoch": 1.747467259698542, "grad_norm": 1.5402617929653286, "learning_rate": 8.608930321929398e-07, "loss": 0.2646533250808716, "step": 7072 }, { "epoch": 1.7477143563133186, "grad_norm": 1.468233936392616, "learning_rate": 8.592352952050031e-07, "loss": 0.2825888991355896, "step": 7073 }, { "epoch": 1.747961452928095, "grad_norm": 1.456145752335527, "learning_rate": 8.575790841749477e-07, "loss": 0.26237475872039795, "step": 7074 }, { "epoch": 1.7482085495428712, "grad_norm": 1.5821034978602715, "learning_rate": 8.559243993792643e-07, "loss": 0.30398720502853394, "step": 7075 }, { "epoch": 1.7484556461576477, "grad_norm": 1.6053316295855664, "learning_rate": 8.542712410941856e-07, "loss": 0.3040617108345032, "step": 7076 }, { "epoch": 1.748702742772424, "grad_norm": 1.4742831640126333, "learning_rate": 8.526196095956907e-07, "loss": 0.3187906742095947, "step": 7077 }, { "epoch": 1.7489498393872003, "grad_norm": 1.7973605066286398, "learning_rate": 8.509695051595001e-07, "loss": 0.2908865213394165, "step": 7078 }, { "epoch": 1.7491969360019768, "grad_norm": 1.5487874765397522, "learning_rate": 8.493209280610826e-07, "loss": 0.3274089992046356, "step": 7079 }, { "epoch": 1.7494440326167533, "grad_norm": 1.5460680256333583, "learning_rate": 8.476738785756511e-07, "loss": 0.2925601005554199, "step": 7080 }, { "epoch": 1.7496911292315294, "grad_norm": 1.5425174979966447, "learning_rate": 8.460283569781646e-07, "loss": 0.2616004943847656, "step": 7081 }, { "epoch": 1.7499382258463059, "grad_norm": 1.5496626805738036, "learning_rate": 8.44384363543328e-07, "loss": 0.294275164604187, "step": 7082 }, { "epoch": 1.7501853224610824, "grad_norm": 1.6296360541232544, "learning_rate": 8.427418985455859e-07, "loss": 0.34085613489151, "step": 7083 }, { "epoch": 1.7504324190758587, "grad_norm": 1.4903823476347908, "learning_rate": 8.411009622591349e-07, "loss": 0.2518981397151947, "step": 7084 }, { "epoch": 1.750679515690635, "grad_norm": 1.4005037665000715, "learning_rate": 8.394615549579077e-07, "loss": 0.30500859022140503, "step": 7085 }, { "epoch": 1.7509266123054115, "grad_norm": 1.3995603370954992, "learning_rate": 8.378236769155901e-07, "loss": 0.23278474807739258, "step": 7086 }, { "epoch": 1.7511737089201878, "grad_norm": 1.4119283633940665, "learning_rate": 8.361873284056099e-07, "loss": 0.2834307551383972, "step": 7087 }, { "epoch": 1.751420805534964, "grad_norm": 1.5076506782779475, "learning_rate": 8.345525097011364e-07, "loss": 0.305387020111084, "step": 7088 }, { "epoch": 1.7516679021497406, "grad_norm": 1.3345766416018168, "learning_rate": 8.32919221075087e-07, "loss": 0.259630024433136, "step": 7089 }, { "epoch": 1.751914998764517, "grad_norm": 1.4090175664440674, "learning_rate": 8.312874628001233e-07, "loss": 0.2619383931159973, "step": 7090 }, { "epoch": 1.7521620953792933, "grad_norm": 1.4854870102766309, "learning_rate": 8.29657235148651e-07, "loss": 0.3048352599143982, "step": 7091 }, { "epoch": 1.7524091919940696, "grad_norm": 1.5800797493263932, "learning_rate": 8.280285383928199e-07, "loss": 0.3170607089996338, "step": 7092 }, { "epoch": 1.7526562886088461, "grad_norm": 1.2899396872069344, "learning_rate": 8.264013728045228e-07, "loss": 0.19046300649642944, "step": 7093 }, { "epoch": 1.7529033852236224, "grad_norm": 1.5660317863010729, "learning_rate": 8.247757386553989e-07, "loss": 0.31598973274230957, "step": 7094 }, { "epoch": 1.7531504818383987, "grad_norm": 1.4813224011370194, "learning_rate": 8.231516362168312e-07, "loss": 0.25487303733825684, "step": 7095 }, { "epoch": 1.7533975784531752, "grad_norm": 1.5253417754552727, "learning_rate": 8.215290657599506e-07, "loss": 0.2732323408126831, "step": 7096 }, { "epoch": 1.7536446750679515, "grad_norm": 1.4083051234250845, "learning_rate": 8.199080275556248e-07, "loss": 0.25464436411857605, "step": 7097 }, { "epoch": 1.7538917716827278, "grad_norm": 1.5043167080022275, "learning_rate": 8.182885218744719e-07, "loss": 0.30438733100891113, "step": 7098 }, { "epoch": 1.7541388682975043, "grad_norm": 1.3560153608452377, "learning_rate": 8.166705489868487e-07, "loss": 0.2742306590080261, "step": 7099 }, { "epoch": 1.7543859649122808, "grad_norm": 1.3389914013039628, "learning_rate": 8.150541091628639e-07, "loss": 0.26967939734458923, "step": 7100 }, { "epoch": 1.7546330615270571, "grad_norm": 1.3658612370589058, "learning_rate": 8.134392026723614e-07, "loss": 0.27098801732063293, "step": 7101 }, { "epoch": 1.7548801581418334, "grad_norm": 1.2435650189706546, "learning_rate": 8.118258297849369e-07, "loss": 0.20139552652835846, "step": 7102 }, { "epoch": 1.75512725475661, "grad_norm": 1.5492167171351696, "learning_rate": 8.102139907699235e-07, "loss": 0.3295280635356903, "step": 7103 }, { "epoch": 1.7553743513713862, "grad_norm": 1.5007113103411616, "learning_rate": 8.086036858964042e-07, "loss": 0.27312228083610535, "step": 7104 }, { "epoch": 1.7556214479861625, "grad_norm": 1.3174776232326355, "learning_rate": 8.069949154332047e-07, "loss": 0.25528717041015625, "step": 7105 }, { "epoch": 1.755868544600939, "grad_norm": 1.5145225027983595, "learning_rate": 8.053876796488869e-07, "loss": 0.2790910303592682, "step": 7106 }, { "epoch": 1.7561156412157155, "grad_norm": 1.4923205283486278, "learning_rate": 8.037819788117651e-07, "loss": 0.28049054741859436, "step": 7107 }, { "epoch": 1.7563627378304916, "grad_norm": 1.5979149739565874, "learning_rate": 8.021778131898961e-07, "loss": 0.28509998321533203, "step": 7108 }, { "epoch": 1.756609834445268, "grad_norm": 1.5934484886975524, "learning_rate": 8.0057518305108e-07, "loss": 0.3462734520435333, "step": 7109 }, { "epoch": 1.7568569310600446, "grad_norm": 1.4254829154393929, "learning_rate": 7.989740886628561e-07, "loss": 0.3157024383544922, "step": 7110 }, { "epoch": 1.7571040276748209, "grad_norm": 1.288150973318181, "learning_rate": 7.973745302925117e-07, "loss": 0.2728003263473511, "step": 7111 }, { "epoch": 1.7573511242895972, "grad_norm": 1.4394653430073954, "learning_rate": 7.957765082070812e-07, "loss": 0.28185635805130005, "step": 7112 }, { "epoch": 1.7575982209043737, "grad_norm": 1.3202399533053197, "learning_rate": 7.94180022673332e-07, "loss": 0.21164394915103912, "step": 7113 }, { "epoch": 1.75784531751915, "grad_norm": 1.529803833068833, "learning_rate": 7.925850739577867e-07, "loss": 0.2789773941040039, "step": 7114 }, { "epoch": 1.7580924141339263, "grad_norm": 1.1736252065922843, "learning_rate": 7.909916623267011e-07, "loss": 0.22488023340702057, "step": 7115 }, { "epoch": 1.7583395107487028, "grad_norm": 1.6469152541665275, "learning_rate": 7.893997880460802e-07, "loss": 0.322873592376709, "step": 7116 }, { "epoch": 1.7585866073634793, "grad_norm": 1.4998210174957802, "learning_rate": 7.878094513816737e-07, "loss": 0.290252685546875, "step": 7117 }, { "epoch": 1.7588337039782553, "grad_norm": 1.3701166645498506, "learning_rate": 7.862206525989712e-07, "loss": 0.2859611511230469, "step": 7118 }, { "epoch": 1.7590808005930318, "grad_norm": 1.358404105938211, "learning_rate": 7.846333919632054e-07, "loss": 0.27862387895584106, "step": 7119 }, { "epoch": 1.7593278972078084, "grad_norm": 1.3059743858097992, "learning_rate": 7.83047669739353e-07, "loss": 0.21874859929084778, "step": 7120 }, { "epoch": 1.7595749938225846, "grad_norm": 1.500863233791307, "learning_rate": 7.814634861921355e-07, "loss": 0.2748371958732605, "step": 7121 }, { "epoch": 1.759822090437361, "grad_norm": 1.435678857069854, "learning_rate": 7.798808415860149e-07, "loss": 0.3138872981071472, "step": 7122 }, { "epoch": 1.7600691870521374, "grad_norm": 1.3355212128428136, "learning_rate": 7.782997361852007e-07, "loss": 0.24284949898719788, "step": 7123 }, { "epoch": 1.7603162836669137, "grad_norm": 1.3492760260926602, "learning_rate": 7.767201702536386e-07, "loss": 0.24189189076423645, "step": 7124 }, { "epoch": 1.76056338028169, "grad_norm": 1.4998545541016455, "learning_rate": 7.751421440550234e-07, "loss": 0.27547532320022583, "step": 7125 }, { "epoch": 1.7608104768964665, "grad_norm": 1.3145225570083579, "learning_rate": 7.735656578527906e-07, "loss": 0.23785662651062012, "step": 7126 }, { "epoch": 1.761057573511243, "grad_norm": 1.3769729918291063, "learning_rate": 7.719907119101178e-07, "loss": 0.20646131038665771, "step": 7127 }, { "epoch": 1.7613046701260193, "grad_norm": 1.4647288481960021, "learning_rate": 7.704173064899268e-07, "loss": 0.2895504832267761, "step": 7128 }, { "epoch": 1.7615517667407956, "grad_norm": 1.5160912482968014, "learning_rate": 7.688454418548797e-07, "loss": 0.2884242534637451, "step": 7129 }, { "epoch": 1.7617988633555721, "grad_norm": 1.2310383309178483, "learning_rate": 7.672751182673854e-07, "loss": 0.2108556479215622, "step": 7130 }, { "epoch": 1.7620459599703484, "grad_norm": 1.5101635117935828, "learning_rate": 7.657063359895922e-07, "loss": 0.2732413709163666, "step": 7131 }, { "epoch": 1.7622930565851247, "grad_norm": 1.3434618464809152, "learning_rate": 7.64139095283395e-07, "loss": 0.23516914248466492, "step": 7132 }, { "epoch": 1.7625401531999012, "grad_norm": 1.5792431738086465, "learning_rate": 7.625733964104276e-07, "loss": 0.27065509557724, "step": 7133 }, { "epoch": 1.7627872498146775, "grad_norm": 1.4669020089055755, "learning_rate": 7.610092396320645e-07, "loss": 0.28431886434555054, "step": 7134 }, { "epoch": 1.7630343464294538, "grad_norm": 1.265617576392688, "learning_rate": 7.594466252094279e-07, "loss": 0.20756399631500244, "step": 7135 }, { "epoch": 1.7632814430442303, "grad_norm": 1.5820660570225569, "learning_rate": 7.578855534033813e-07, "loss": 0.3014175593852997, "step": 7136 }, { "epoch": 1.7635285396590068, "grad_norm": 1.5304785387425452, "learning_rate": 7.563260244745308e-07, "loss": 0.2523095905780792, "step": 7137 }, { "epoch": 1.763775636273783, "grad_norm": 1.4694301967701884, "learning_rate": 7.547680386832201e-07, "loss": 0.2535361647605896, "step": 7138 }, { "epoch": 1.7640227328885594, "grad_norm": 1.445441287210037, "learning_rate": 7.532115962895415e-07, "loss": 0.2782607078552246, "step": 7139 }, { "epoch": 1.7642698295033359, "grad_norm": 1.2767942919712658, "learning_rate": 7.51656697553329e-07, "loss": 0.1820635050535202, "step": 7140 }, { "epoch": 1.7645169261181122, "grad_norm": 1.342553165874373, "learning_rate": 7.501033427341531e-07, "loss": 0.2670515775680542, "step": 7141 }, { "epoch": 1.7647640227328885, "grad_norm": 1.3059032169002032, "learning_rate": 7.485515320913339e-07, "loss": 0.22665804624557495, "step": 7142 }, { "epoch": 1.765011119347665, "grad_norm": 1.7433748631858386, "learning_rate": 7.470012658839276e-07, "loss": 0.33565184473991394, "step": 7143 }, { "epoch": 1.7652582159624415, "grad_norm": 1.4360309777675748, "learning_rate": 7.454525443707383e-07, "loss": 0.26592159271240234, "step": 7144 }, { "epoch": 1.7655053125772175, "grad_norm": 1.3655058351084515, "learning_rate": 7.439053678103069e-07, "loss": 0.2281472533941269, "step": 7145 }, { "epoch": 1.765752409191994, "grad_norm": 1.4639456976029757, "learning_rate": 7.423597364609225e-07, "loss": 0.2890973687171936, "step": 7146 }, { "epoch": 1.7659995058067706, "grad_norm": 1.764457509237002, "learning_rate": 7.408156505806096e-07, "loss": 0.31241118907928467, "step": 7147 }, { "epoch": 1.7662466024215469, "grad_norm": 1.3674779464561428, "learning_rate": 7.392731104271356e-07, "loss": 0.22801880538463593, "step": 7148 }, { "epoch": 1.7664936990363231, "grad_norm": 1.4840475148822196, "learning_rate": 7.377321162580153e-07, "loss": 0.24410486221313477, "step": 7149 }, { "epoch": 1.7667407956510996, "grad_norm": 1.396095899302204, "learning_rate": 7.361926683305021e-07, "loss": 0.2730371952056885, "step": 7150 }, { "epoch": 1.766987892265876, "grad_norm": 1.5186261498275668, "learning_rate": 7.346547669015902e-07, "loss": 0.29948514699935913, "step": 7151 }, { "epoch": 1.7672349888806522, "grad_norm": 1.3418658841596236, "learning_rate": 7.331184122280166e-07, "loss": 0.23251518607139587, "step": 7152 }, { "epoch": 1.7674820854954287, "grad_norm": 1.2908376091245466, "learning_rate": 7.315836045662605e-07, "loss": 0.25712716579437256, "step": 7153 }, { "epoch": 1.7677291821102052, "grad_norm": 1.3913837480421916, "learning_rate": 7.300503441725415e-07, "loss": 0.23282870650291443, "step": 7154 }, { "epoch": 1.7679762787249813, "grad_norm": 1.3293153871832908, "learning_rate": 7.285186313028226e-07, "loss": 0.265070378780365, "step": 7155 }, { "epoch": 1.7682233753397578, "grad_norm": 1.38144711411244, "learning_rate": 7.269884662128091e-07, "loss": 0.2854062020778656, "step": 7156 }, { "epoch": 1.7684704719545343, "grad_norm": 1.4018565917811854, "learning_rate": 7.254598491579434e-07, "loss": 0.2164420634508133, "step": 7157 }, { "epoch": 1.7687175685693106, "grad_norm": 1.4128078906791213, "learning_rate": 7.239327803934137e-07, "loss": 0.2732732892036438, "step": 7158 }, { "epoch": 1.768964665184087, "grad_norm": 1.3089984081670094, "learning_rate": 7.224072601741505e-07, "loss": 0.24620801210403442, "step": 7159 }, { "epoch": 1.7692117617988634, "grad_norm": 1.3698388271462807, "learning_rate": 7.208832887548233e-07, "loss": 0.25515544414520264, "step": 7160 }, { "epoch": 1.7694588584136397, "grad_norm": 1.3789140106921418, "learning_rate": 7.193608663898433e-07, "loss": 0.2717010974884033, "step": 7161 }, { "epoch": 1.769705955028416, "grad_norm": 1.464077141273708, "learning_rate": 7.178399933333613e-07, "loss": 0.2622634768486023, "step": 7162 }, { "epoch": 1.7699530516431925, "grad_norm": 1.5160494626933427, "learning_rate": 7.163206698392744e-07, "loss": 0.32119572162628174, "step": 7163 }, { "epoch": 1.770200148257969, "grad_norm": 1.471995983531457, "learning_rate": 7.148028961612164e-07, "loss": 0.2797324061393738, "step": 7164 }, { "epoch": 1.7704472448727453, "grad_norm": 1.482144552983674, "learning_rate": 7.132866725525667e-07, "loss": 0.2870755195617676, "step": 7165 }, { "epoch": 1.7706943414875216, "grad_norm": 1.4834098471031925, "learning_rate": 7.117719992664406e-07, "loss": 0.24676188826560974, "step": 7166 }, { "epoch": 1.770941438102298, "grad_norm": 1.211745880588277, "learning_rate": 7.102588765557006e-07, "loss": 0.22622588276863098, "step": 7167 }, { "epoch": 1.7711885347170744, "grad_norm": 1.5389956247102343, "learning_rate": 7.087473046729432e-07, "loss": 0.342568039894104, "step": 7168 }, { "epoch": 1.7714356313318507, "grad_norm": 1.4484460499656664, "learning_rate": 7.072372838705122e-07, "loss": 0.3136149048805237, "step": 7169 }, { "epoch": 1.7716827279466272, "grad_norm": 1.3540338942742534, "learning_rate": 7.057288144004925e-07, "loss": 0.19889281690120697, "step": 7170 }, { "epoch": 1.7719298245614035, "grad_norm": 1.38448841102094, "learning_rate": 7.042218965147029e-07, "loss": 0.26753664016723633, "step": 7171 }, { "epoch": 1.7721769211761798, "grad_norm": 1.3633167955317587, "learning_rate": 7.027165304647099e-07, "loss": 0.25234413146972656, "step": 7172 }, { "epoch": 1.7724240177909563, "grad_norm": 1.3728387306929406, "learning_rate": 7.012127165018212e-07, "loss": 0.2557174861431122, "step": 7173 }, { "epoch": 1.7726711144057328, "grad_norm": 1.4331198853096012, "learning_rate": 6.997104548770828e-07, "loss": 0.23397429287433624, "step": 7174 }, { "epoch": 1.772918211020509, "grad_norm": 1.3367486584566703, "learning_rate": 6.982097458412807e-07, "loss": 0.22663122415542603, "step": 7175 }, { "epoch": 1.7731653076352853, "grad_norm": 1.3981431129461954, "learning_rate": 6.967105896449411e-07, "loss": 0.24172544479370117, "step": 7176 }, { "epoch": 1.7734124042500619, "grad_norm": 1.519786938449349, "learning_rate": 6.952129865383362e-07, "loss": 0.28087010979652405, "step": 7177 }, { "epoch": 1.7736595008648381, "grad_norm": 1.4208490280153339, "learning_rate": 6.937169367714747e-07, "loss": 0.2415667176246643, "step": 7178 }, { "epoch": 1.7739065974796144, "grad_norm": 1.4114902261057194, "learning_rate": 6.922224405941081e-07, "loss": 0.232968270778656, "step": 7179 }, { "epoch": 1.774153694094391, "grad_norm": 1.3633815342241735, "learning_rate": 6.907294982557244e-07, "loss": 0.2758219540119171, "step": 7180 }, { "epoch": 1.7744007907091672, "grad_norm": 1.381863103677937, "learning_rate": 6.892381100055589e-07, "loss": 0.2638804018497467, "step": 7181 }, { "epoch": 1.7746478873239435, "grad_norm": 1.4649649057627616, "learning_rate": 6.877482760925791e-07, "loss": 0.2918225824832916, "step": 7182 }, { "epoch": 1.77489498393872, "grad_norm": 1.3464028764517262, "learning_rate": 6.862599967655015e-07, "loss": 0.26540204882621765, "step": 7183 }, { "epoch": 1.7751420805534965, "grad_norm": 1.3761022905017513, "learning_rate": 6.847732722727784e-07, "loss": 0.27511006593704224, "step": 7184 }, { "epoch": 1.7753891771682728, "grad_norm": 1.4778747597598925, "learning_rate": 6.832881028626015e-07, "loss": 0.29165714979171753, "step": 7185 }, { "epoch": 1.7756362737830491, "grad_norm": 1.4966056025317973, "learning_rate": 6.818044887829067e-07, "loss": 0.2823261022567749, "step": 7186 }, { "epoch": 1.7758833703978256, "grad_norm": 1.4012143649582895, "learning_rate": 6.803224302813683e-07, "loss": 0.22102628648281097, "step": 7187 }, { "epoch": 1.776130467012602, "grad_norm": 1.4193545503843576, "learning_rate": 6.788419276054026e-07, "loss": 0.25942301750183105, "step": 7188 }, { "epoch": 1.7763775636273782, "grad_norm": 1.8698584369219242, "learning_rate": 6.773629810021587e-07, "loss": 0.31074655055999756, "step": 7189 }, { "epoch": 1.7766246602421547, "grad_norm": 1.602595409361056, "learning_rate": 6.758855907185369e-07, "loss": 0.27555811405181885, "step": 7190 }, { "epoch": 1.7768717568569312, "grad_norm": 1.4816907838706264, "learning_rate": 6.744097570011698e-07, "loss": 0.2587791979312897, "step": 7191 }, { "epoch": 1.7771188534717073, "grad_norm": 1.3442359675736313, "learning_rate": 6.729354800964339e-07, "loss": 0.22833478450775146, "step": 7192 }, { "epoch": 1.7773659500864838, "grad_norm": 1.389650181669367, "learning_rate": 6.714627602504476e-07, "loss": 0.24377813935279846, "step": 7193 }, { "epoch": 1.7776130467012603, "grad_norm": 1.629227127973072, "learning_rate": 6.69991597709061e-07, "loss": 0.2919546365737915, "step": 7194 }, { "epoch": 1.7778601433160366, "grad_norm": 1.3870678370033365, "learning_rate": 6.685219927178743e-07, "loss": 0.2699032425880432, "step": 7195 }, { "epoch": 1.7781072399308129, "grad_norm": 1.6274704212020337, "learning_rate": 6.670539455222202e-07, "loss": 0.3083862066268921, "step": 7196 }, { "epoch": 1.7783543365455894, "grad_norm": 1.3048463662915395, "learning_rate": 6.655874563671772e-07, "loss": 0.22485804557800293, "step": 7197 }, { "epoch": 1.7786014331603657, "grad_norm": 1.5464147307640834, "learning_rate": 6.641225254975581e-07, "loss": 0.27501198649406433, "step": 7198 }, { "epoch": 1.778848529775142, "grad_norm": 1.356717642218768, "learning_rate": 6.626591531579174e-07, "loss": 0.2565937638282776, "step": 7199 }, { "epoch": 1.7790956263899185, "grad_norm": 1.6176662671905535, "learning_rate": 6.61197339592553e-07, "loss": 0.3039495646953583, "step": 7200 }, { "epoch": 1.779342723004695, "grad_norm": 1.4455119583934593, "learning_rate": 6.59737085045501e-07, "loss": 0.29775452613830566, "step": 7201 }, { "epoch": 1.779589819619471, "grad_norm": 1.5064886501289485, "learning_rate": 6.582783897605327e-07, "loss": 0.2822996973991394, "step": 7202 }, { "epoch": 1.7798369162342476, "grad_norm": 1.5227741941533308, "learning_rate": 6.568212539811636e-07, "loss": 0.3127521872520447, "step": 7203 }, { "epoch": 1.780084012849024, "grad_norm": 1.5865166364470293, "learning_rate": 6.553656779506468e-07, "loss": 0.28981882333755493, "step": 7204 }, { "epoch": 1.7803311094638004, "grad_norm": 1.531876009452829, "learning_rate": 6.539116619119767e-07, "loss": 0.3223360478878021, "step": 7205 }, { "epoch": 1.7805782060785766, "grad_norm": 1.7156776646236838, "learning_rate": 6.524592061078894e-07, "loss": 0.3163338303565979, "step": 7206 }, { "epoch": 1.7808253026933532, "grad_norm": 1.6995259093766273, "learning_rate": 6.51008310780853e-07, "loss": 0.26631632447242737, "step": 7207 }, { "epoch": 1.7810723993081294, "grad_norm": 1.4455963086301784, "learning_rate": 6.495589761730825e-07, "loss": 0.24073761701583862, "step": 7208 }, { "epoch": 1.7813194959229057, "grad_norm": 1.5377659467904867, "learning_rate": 6.48111202526529e-07, "loss": 0.25546783208847046, "step": 7209 }, { "epoch": 1.7815665925376822, "grad_norm": 1.555423424553127, "learning_rate": 6.466649900828836e-07, "loss": 0.3248618245124817, "step": 7210 }, { "epoch": 1.7818136891524587, "grad_norm": 1.4643103261253512, "learning_rate": 6.452203390835765e-07, "loss": 0.2894611060619354, "step": 7211 }, { "epoch": 1.782060785767235, "grad_norm": 1.576796631780869, "learning_rate": 6.437772497697781e-07, "loss": 0.32627683877944946, "step": 7212 }, { "epoch": 1.7823078823820113, "grad_norm": 1.5101730498938364, "learning_rate": 6.423357223823956e-07, "loss": 0.331137478351593, "step": 7213 }, { "epoch": 1.7825549789967878, "grad_norm": 1.5885259343701341, "learning_rate": 6.4089575716208e-07, "loss": 0.3031613230705261, "step": 7214 }, { "epoch": 1.7828020756115641, "grad_norm": 1.4310056229406702, "learning_rate": 6.394573543492188e-07, "loss": 0.27067601680755615, "step": 7215 }, { "epoch": 1.7830491722263404, "grad_norm": 1.472797150612319, "learning_rate": 6.380205141839379e-07, "loss": 0.2506481111049652, "step": 7216 }, { "epoch": 1.783296268841117, "grad_norm": 1.7267340053025355, "learning_rate": 6.36585236906101e-07, "loss": 0.30580270290374756, "step": 7217 }, { "epoch": 1.7835433654558932, "grad_norm": 1.476606965449291, "learning_rate": 6.351515227553151e-07, "loss": 0.2784492075443268, "step": 7218 }, { "epoch": 1.7837904620706695, "grad_norm": 1.3444454118460465, "learning_rate": 6.337193719709256e-07, "loss": 0.27145448327064514, "step": 7219 }, { "epoch": 1.784037558685446, "grad_norm": 1.4167057817638036, "learning_rate": 6.322887847920145e-07, "loss": 0.2269367277622223, "step": 7220 }, { "epoch": 1.7842846553002225, "grad_norm": 1.3259430120380455, "learning_rate": 6.308597614574019e-07, "loss": 0.2463807910680771, "step": 7221 }, { "epoch": 1.7845317519149988, "grad_norm": 1.2143522290183328, "learning_rate": 6.294323022056525e-07, "loss": 0.17996427416801453, "step": 7222 }, { "epoch": 1.784778848529775, "grad_norm": 1.408396557270233, "learning_rate": 6.280064072750614e-07, "loss": 0.23633220791816711, "step": 7223 }, { "epoch": 1.7850259451445516, "grad_norm": 1.687394418349513, "learning_rate": 6.265820769036701e-07, "loss": 0.3535362482070923, "step": 7224 }, { "epoch": 1.7852730417593279, "grad_norm": 1.3253427991344315, "learning_rate": 6.251593113292575e-07, "loss": 0.2802887558937073, "step": 7225 }, { "epoch": 1.7855201383741042, "grad_norm": 1.3735109130008127, "learning_rate": 6.23738110789337e-07, "loss": 0.2711634337902069, "step": 7226 }, { "epoch": 1.7857672349888807, "grad_norm": 1.338567954251616, "learning_rate": 6.223184755211653e-07, "loss": 0.2208421528339386, "step": 7227 }, { "epoch": 1.7860143316036572, "grad_norm": 1.3888720417743292, "learning_rate": 6.20900405761734e-07, "loss": 0.2594459056854248, "step": 7228 }, { "epoch": 1.7862614282184333, "grad_norm": 1.5413882490087178, "learning_rate": 6.194839017477805e-07, "loss": 0.30657631158828735, "step": 7229 }, { "epoch": 1.7865085248332098, "grad_norm": 2.141837810768508, "learning_rate": 6.18068963715771e-07, "loss": 0.2846859097480774, "step": 7230 }, { "epoch": 1.7867556214479863, "grad_norm": 1.356225537403792, "learning_rate": 6.166555919019146e-07, "loss": 0.3149331212043762, "step": 7231 }, { "epoch": 1.7870027180627626, "grad_norm": 1.353716322207209, "learning_rate": 6.152437865421624e-07, "loss": 0.25160181522369385, "step": 7232 }, { "epoch": 1.7872498146775389, "grad_norm": 1.4601085763363446, "learning_rate": 6.13833547872199e-07, "loss": 0.2505767345428467, "step": 7233 }, { "epoch": 1.7874969112923154, "grad_norm": 1.6010551997265619, "learning_rate": 6.124248761274509e-07, "loss": 0.3229687213897705, "step": 7234 }, { "epoch": 1.7877440079070916, "grad_norm": 1.7111265664626507, "learning_rate": 6.110177715430799e-07, "loss": 0.29173845052719116, "step": 7235 }, { "epoch": 1.787991104521868, "grad_norm": 1.5261697141711175, "learning_rate": 6.096122343539912e-07, "loss": 0.33328086137771606, "step": 7236 }, { "epoch": 1.7882382011366444, "grad_norm": 1.4599681725209712, "learning_rate": 6.082082647948196e-07, "loss": 0.2824099659919739, "step": 7237 }, { "epoch": 1.788485297751421, "grad_norm": 1.441119767475417, "learning_rate": 6.068058630999474e-07, "loss": 0.2945958375930786, "step": 7238 }, { "epoch": 1.788732394366197, "grad_norm": 1.368293843578477, "learning_rate": 6.054050295034908e-07, "loss": 0.2544347941875458, "step": 7239 }, { "epoch": 1.7889794909809735, "grad_norm": 1.5614959763714582, "learning_rate": 6.040057642393038e-07, "loss": 0.2967540919780731, "step": 7240 }, { "epoch": 1.78922658759575, "grad_norm": 1.493817898813897, "learning_rate": 6.026080675409795e-07, "loss": 0.2685915231704712, "step": 7241 }, { "epoch": 1.7894736842105263, "grad_norm": 1.4094061262066908, "learning_rate": 6.012119396418492e-07, "loss": 0.2804909944534302, "step": 7242 }, { "epoch": 1.7897207808253026, "grad_norm": 1.8414711568325084, "learning_rate": 5.998173807749852e-07, "loss": 0.3089802861213684, "step": 7243 }, { "epoch": 1.7899678774400791, "grad_norm": 1.356483731520831, "learning_rate": 5.984243911731924e-07, "loss": 0.22996735572814941, "step": 7244 }, { "epoch": 1.7902149740548554, "grad_norm": 1.5403894187118712, "learning_rate": 5.970329710690148e-07, "loss": 0.2739906311035156, "step": 7245 }, { "epoch": 1.7904620706696317, "grad_norm": 1.531046502196378, "learning_rate": 5.956431206947377e-07, "loss": 0.22544237971305847, "step": 7246 }, { "epoch": 1.7907091672844082, "grad_norm": 1.4557955648014242, "learning_rate": 5.942548402823822e-07, "loss": 0.2988264262676239, "step": 7247 }, { "epoch": 1.7909562638991847, "grad_norm": 1.8234534762423016, "learning_rate": 5.928681300637096e-07, "loss": 0.3074156641960144, "step": 7248 }, { "epoch": 1.791203360513961, "grad_norm": 1.4706666460404154, "learning_rate": 5.914829902702135e-07, "loss": 0.31191158294677734, "step": 7249 }, { "epoch": 1.7914504571287373, "grad_norm": 7.118825417738696, "learning_rate": 5.900994211331324e-07, "loss": 0.28490743041038513, "step": 7250 }, { "epoch": 1.7916975537435138, "grad_norm": 1.3830525885732325, "learning_rate": 5.887174228834358e-07, "loss": 0.296026349067688, "step": 7251 }, { "epoch": 1.79194465035829, "grad_norm": 1.4054612313805661, "learning_rate": 5.873369957518349e-07, "loss": 0.2819541096687317, "step": 7252 }, { "epoch": 1.7921917469730664, "grad_norm": 1.3102463181843655, "learning_rate": 5.859581399687819e-07, "loss": 0.20203489065170288, "step": 7253 }, { "epoch": 1.792438843587843, "grad_norm": 1.4646690626145809, "learning_rate": 5.845808557644572e-07, "loss": 0.25468602776527405, "step": 7254 }, { "epoch": 1.7926859402026192, "grad_norm": 1.530437680541516, "learning_rate": 5.832051433687868e-07, "loss": 0.29824602603912354, "step": 7255 }, { "epoch": 1.7929330368173955, "grad_norm": 1.4175995820401097, "learning_rate": 5.818310030114338e-07, "loss": 0.2767382264137268, "step": 7256 }, { "epoch": 1.793180133432172, "grad_norm": 1.4324999014029636, "learning_rate": 5.804584349217957e-07, "loss": 0.20991533994674683, "step": 7257 }, { "epoch": 1.7934272300469485, "grad_norm": 1.628503534389833, "learning_rate": 5.79087439329008e-07, "loss": 0.28211501240730286, "step": 7258 }, { "epoch": 1.7936743266617248, "grad_norm": 1.4425291710532453, "learning_rate": 5.777180164619445e-07, "loss": 0.271312415599823, "step": 7259 }, { "epoch": 1.793921423276501, "grad_norm": 1.4264650580335092, "learning_rate": 5.763501665492167e-07, "loss": 0.23194241523742676, "step": 7260 }, { "epoch": 1.7941685198912776, "grad_norm": 1.3917519433355028, "learning_rate": 5.749838898191729e-07, "loss": 0.24541789293289185, "step": 7261 }, { "epoch": 1.7944156165060539, "grad_norm": 1.6606933602822012, "learning_rate": 5.736191864999019e-07, "loss": 0.26822203397750854, "step": 7262 }, { "epoch": 1.7946627131208301, "grad_norm": 1.9321244626743588, "learning_rate": 5.722560568192236e-07, "loss": 0.24706357717514038, "step": 7263 }, { "epoch": 1.7949098097356067, "grad_norm": 1.2819364941403981, "learning_rate": 5.708945010047007e-07, "loss": 0.24731212854385376, "step": 7264 }, { "epoch": 1.7951569063503832, "grad_norm": 1.4779396217497334, "learning_rate": 5.695345192836287e-07, "loss": 0.2853783071041107, "step": 7265 }, { "epoch": 1.7954040029651592, "grad_norm": 1.5292916032591275, "learning_rate": 5.681761118830454e-07, "loss": 0.24818530678749084, "step": 7266 }, { "epoch": 1.7956510995799357, "grad_norm": 1.5135209794436515, "learning_rate": 5.668192790297222e-07, "loss": 0.2565939128398895, "step": 7267 }, { "epoch": 1.7958981961947122, "grad_norm": 1.505036746372105, "learning_rate": 5.654640209501683e-07, "loss": 0.26240694522857666, "step": 7268 }, { "epoch": 1.7961452928094885, "grad_norm": 1.3552910825339715, "learning_rate": 5.641103378706303e-07, "loss": 0.24094629287719727, "step": 7269 }, { "epoch": 1.7963923894242648, "grad_norm": 1.335100873105719, "learning_rate": 5.627582300170908e-07, "loss": 0.24034711718559265, "step": 7270 }, { "epoch": 1.7966394860390413, "grad_norm": 1.5446300443465497, "learning_rate": 5.614076976152749e-07, "loss": 0.2682379484176636, "step": 7271 }, { "epoch": 1.7968865826538176, "grad_norm": 1.3710605200181913, "learning_rate": 5.600587408906366e-07, "loss": 0.23432423174381256, "step": 7272 }, { "epoch": 1.797133679268594, "grad_norm": 1.3106875651659122, "learning_rate": 5.587113600683691e-07, "loss": 0.2602561414241791, "step": 7273 }, { "epoch": 1.7973807758833704, "grad_norm": 1.3168806768420536, "learning_rate": 5.573655553734058e-07, "loss": 0.2353653907775879, "step": 7274 }, { "epoch": 1.797627872498147, "grad_norm": 1.4182841523589471, "learning_rate": 5.560213270304149e-07, "loss": 0.25219330191612244, "step": 7275 }, { "epoch": 1.797874969112923, "grad_norm": 1.5090071567540047, "learning_rate": 5.546786752638045e-07, "loss": 0.2839057743549347, "step": 7276 }, { "epoch": 1.7981220657276995, "grad_norm": 1.353610985876479, "learning_rate": 5.533376002977119e-07, "loss": 0.19550396502017975, "step": 7277 }, { "epoch": 1.798369162342476, "grad_norm": 1.5117452920987953, "learning_rate": 5.519981023560206e-07, "loss": 0.3089959919452667, "step": 7278 }, { "epoch": 1.7986162589572523, "grad_norm": 1.4209840793460724, "learning_rate": 5.506601816623414e-07, "loss": 0.23167935013771057, "step": 7279 }, { "epoch": 1.7988633555720286, "grad_norm": 1.5727687835747672, "learning_rate": 5.493238384400302e-07, "loss": 0.2809705138206482, "step": 7280 }, { "epoch": 1.799110452186805, "grad_norm": 1.412365799012861, "learning_rate": 5.47989072912174e-07, "loss": 0.2570524215698242, "step": 7281 }, { "epoch": 1.7993575488015814, "grad_norm": 1.3119962745652605, "learning_rate": 5.46655885301599e-07, "loss": 0.22960269451141357, "step": 7282 }, { "epoch": 1.7996046454163577, "grad_norm": 1.5376211758024965, "learning_rate": 5.453242758308675e-07, "loss": 0.3015490770339966, "step": 7283 }, { "epoch": 1.7998517420311342, "grad_norm": 1.319384097849337, "learning_rate": 5.439942447222791e-07, "loss": 0.2659420967102051, "step": 7284 }, { "epoch": 1.8000988386459107, "grad_norm": 1.4106750366687169, "learning_rate": 5.426657921978684e-07, "loss": 0.2807409167289734, "step": 7285 }, { "epoch": 1.800345935260687, "grad_norm": 1.4782248449019921, "learning_rate": 5.413389184794049e-07, "loss": 0.27009767293930054, "step": 7286 }, { "epoch": 1.8005930318754633, "grad_norm": 1.3828293210618476, "learning_rate": 5.400136237883991e-07, "loss": 0.2635558545589447, "step": 7287 }, { "epoch": 1.8008401284902398, "grad_norm": 1.2460058658946873, "learning_rate": 5.386899083460939e-07, "loss": 0.23288318514823914, "step": 7288 }, { "epoch": 1.801087225105016, "grad_norm": 1.5080916259388732, "learning_rate": 5.373677723734727e-07, "loss": 0.2637953758239746, "step": 7289 }, { "epoch": 1.8013343217197924, "grad_norm": 1.4567063301592085, "learning_rate": 5.3604721609125e-07, "loss": 0.2690514326095581, "step": 7290 }, { "epoch": 1.8015814183345689, "grad_norm": 1.3929366865711943, "learning_rate": 5.347282397198816e-07, "loss": 0.2515403926372528, "step": 7291 }, { "epoch": 1.8018285149493452, "grad_norm": 1.8678033544333168, "learning_rate": 5.334108434795549e-07, "loss": 0.2828708291053772, "step": 7292 }, { "epoch": 1.8020756115641214, "grad_norm": 1.39877516263123, "learning_rate": 5.32095027590197e-07, "loss": 0.2521718740463257, "step": 7293 }, { "epoch": 1.802322708178898, "grad_norm": 1.4988202160709407, "learning_rate": 5.307807922714714e-07, "loss": 0.2840971052646637, "step": 7294 }, { "epoch": 1.8025698047936745, "grad_norm": 1.4253794653740304, "learning_rate": 5.294681377427724e-07, "loss": 0.26122337579727173, "step": 7295 }, { "epoch": 1.8028169014084507, "grad_norm": 1.446237460318862, "learning_rate": 5.28157064223238e-07, "loss": 0.2944942116737366, "step": 7296 }, { "epoch": 1.803063998023227, "grad_norm": 1.2653897587074723, "learning_rate": 5.268475719317367e-07, "loss": 0.23189491033554077, "step": 7297 }, { "epoch": 1.8033110946380035, "grad_norm": 1.4818743932843554, "learning_rate": 5.255396610868769e-07, "loss": 0.3049568235874176, "step": 7298 }, { "epoch": 1.8035581912527798, "grad_norm": 1.6069320526975004, "learning_rate": 5.242333319070003e-07, "loss": 0.38215363025665283, "step": 7299 }, { "epoch": 1.8038052878675561, "grad_norm": 1.5517136654530848, "learning_rate": 5.229285846101839e-07, "loss": 0.2704274654388428, "step": 7300 }, { "epoch": 1.8040523844823326, "grad_norm": 1.4803415431776223, "learning_rate": 5.216254194142423e-07, "loss": 0.29951012134552, "step": 7301 }, { "epoch": 1.804299481097109, "grad_norm": 1.376130978992421, "learning_rate": 5.203238365367258e-07, "loss": 0.23084229230880737, "step": 7302 }, { "epoch": 1.8045465777118852, "grad_norm": 1.4615032954291158, "learning_rate": 5.190238361949229e-07, "loss": 0.27129247784614563, "step": 7303 }, { "epoch": 1.8047936743266617, "grad_norm": 1.4733161269391861, "learning_rate": 5.177254186058522e-07, "loss": 0.2563117444515228, "step": 7304 }, { "epoch": 1.8050407709414382, "grad_norm": 1.45002834968118, "learning_rate": 5.164285839862737e-07, "loss": 0.25866812467575073, "step": 7305 }, { "epoch": 1.8052878675562145, "grad_norm": 1.5158296358978594, "learning_rate": 5.151333325526786e-07, "loss": 0.2935503125190735, "step": 7306 }, { "epoch": 1.8055349641709908, "grad_norm": 1.549359424467405, "learning_rate": 5.138396645212973e-07, "loss": 0.3161199688911438, "step": 7307 }, { "epoch": 1.8057820607857673, "grad_norm": 1.2910605799354844, "learning_rate": 5.125475801080959e-07, "loss": 0.21641212701797485, "step": 7308 }, { "epoch": 1.8060291574005436, "grad_norm": 1.5986849779807328, "learning_rate": 5.112570795287708e-07, "loss": 0.24174445867538452, "step": 7309 }, { "epoch": 1.8062762540153199, "grad_norm": 1.5046699383001707, "learning_rate": 5.099681629987619e-07, "loss": 0.25441858172416687, "step": 7310 }, { "epoch": 1.8065233506300964, "grad_norm": 1.4561200210103902, "learning_rate": 5.08680830733238e-07, "loss": 0.2622832655906677, "step": 7311 }, { "epoch": 1.806770447244873, "grad_norm": 1.6077123642930486, "learning_rate": 5.073950829471097e-07, "loss": 0.2748345136642456, "step": 7312 }, { "epoch": 1.807017543859649, "grad_norm": 1.4516650933321313, "learning_rate": 5.061109198550163e-07, "loss": 0.25945520401000977, "step": 7313 }, { "epoch": 1.8072646404744255, "grad_norm": 1.3625502666518936, "learning_rate": 5.048283416713362e-07, "loss": 0.27959904074668884, "step": 7314 }, { "epoch": 1.807511737089202, "grad_norm": 1.6840112681236372, "learning_rate": 5.035473486101838e-07, "loss": 0.32973650097846985, "step": 7315 }, { "epoch": 1.8077588337039783, "grad_norm": 1.4447498283013729, "learning_rate": 5.022679408854059e-07, "loss": 0.26934248208999634, "step": 7316 }, { "epoch": 1.8080059303187546, "grad_norm": 1.2918173040598522, "learning_rate": 5.009901187105903e-07, "loss": 0.21633785963058472, "step": 7317 }, { "epoch": 1.808253026933531, "grad_norm": 1.448707125389005, "learning_rate": 4.997138822990533e-07, "loss": 0.2631053924560547, "step": 7318 }, { "epoch": 1.8085001235483074, "grad_norm": 1.7287325402009122, "learning_rate": 4.98439231863852e-07, "loss": 0.23177805542945862, "step": 7319 }, { "epoch": 1.8087472201630836, "grad_norm": 1.5735655621393512, "learning_rate": 4.971661676177731e-07, "loss": 0.2751893401145935, "step": 7320 }, { "epoch": 1.8089943167778602, "grad_norm": 1.639130198874289, "learning_rate": 4.958946897733441e-07, "loss": 0.3065826892852783, "step": 7321 }, { "epoch": 1.8092414133926367, "grad_norm": 1.2842028431575383, "learning_rate": 4.946247985428255e-07, "loss": 0.2425326108932495, "step": 7322 }, { "epoch": 1.809488510007413, "grad_norm": 1.4637014860535933, "learning_rate": 4.933564941382118e-07, "loss": 0.22478346526622772, "step": 7323 }, { "epoch": 1.8097356066221892, "grad_norm": 1.419014391629851, "learning_rate": 4.920897767712329e-07, "loss": 0.23483631014823914, "step": 7324 }, { "epoch": 1.8099827032369658, "grad_norm": 1.4003394651532708, "learning_rate": 4.908246466533562e-07, "loss": 0.2255045473575592, "step": 7325 }, { "epoch": 1.810229799851742, "grad_norm": 1.4844126829896518, "learning_rate": 4.895611039957826e-07, "loss": 0.27059584856033325, "step": 7326 }, { "epoch": 1.8104768964665183, "grad_norm": 1.4861756294813286, "learning_rate": 4.882991490094457e-07, "loss": 0.28097474575042725, "step": 7327 }, { "epoch": 1.8107239930812948, "grad_norm": 1.5000080590804339, "learning_rate": 4.87038781905017e-07, "loss": 0.2507167458534241, "step": 7328 }, { "epoch": 1.8109710896960711, "grad_norm": 1.605194790587171, "learning_rate": 4.857800028929005e-07, "loss": 0.23943284153938293, "step": 7329 }, { "epoch": 1.8112181863108474, "grad_norm": 1.394973712279901, "learning_rate": 4.84522812183239e-07, "loss": 0.29888916015625, "step": 7330 }, { "epoch": 1.811465282925624, "grad_norm": 1.5030670920869431, "learning_rate": 4.832672099859082e-07, "loss": 0.3168069124221802, "step": 7331 }, { "epoch": 1.8117123795404004, "grad_norm": 1.3751006853583416, "learning_rate": 4.820131965105157e-07, "loss": 0.27480363845825195, "step": 7332 }, { "epoch": 1.8119594761551767, "grad_norm": 1.3556039345897035, "learning_rate": 4.807607719664075e-07, "loss": 0.23337337374687195, "step": 7333 }, { "epoch": 1.812206572769953, "grad_norm": 1.2121421107153616, "learning_rate": 4.795099365626621e-07, "loss": 0.23153315484523773, "step": 7334 }, { "epoch": 1.8124536693847295, "grad_norm": 1.5821354439492818, "learning_rate": 4.782606905080955e-07, "loss": 0.24356475472450256, "step": 7335 }, { "epoch": 1.8127007659995058, "grad_norm": 1.565674761246089, "learning_rate": 4.770130340112566e-07, "loss": 0.22933894395828247, "step": 7336 }, { "epoch": 1.812947862614282, "grad_norm": 1.517382926585638, "learning_rate": 4.757669672804266e-07, "loss": 0.2550325393676758, "step": 7337 }, { "epoch": 1.8131949592290586, "grad_norm": 1.4543426583395722, "learning_rate": 4.745224905236256e-07, "loss": 0.27826038002967834, "step": 7338 }, { "epoch": 1.8134420558438349, "grad_norm": 1.4329604825092905, "learning_rate": 4.732796039486065e-07, "loss": 0.29096654057502747, "step": 7339 }, { "epoch": 1.8136891524586112, "grad_norm": 1.5311375104343798, "learning_rate": 4.7203830776285654e-07, "loss": 0.26911401748657227, "step": 7340 }, { "epoch": 1.8139362490733877, "grad_norm": 1.4818557132279766, "learning_rate": 4.7079860217359773e-07, "loss": 0.28571221232414246, "step": 7341 }, { "epoch": 1.8141833456881642, "grad_norm": 1.411995639939248, "learning_rate": 4.695604873877846e-07, "loss": 0.25222593545913696, "step": 7342 }, { "epoch": 1.8144304423029405, "grad_norm": 1.508825703274442, "learning_rate": 4.683239636121095e-07, "loss": 0.2652495205402374, "step": 7343 }, { "epoch": 1.8146775389177168, "grad_norm": 1.5534711877415375, "learning_rate": 4.670890310529963e-07, "loss": 0.2833341360092163, "step": 7344 }, { "epoch": 1.8149246355324933, "grad_norm": 1.3307786440050133, "learning_rate": 4.658556899166067e-07, "loss": 0.2589421272277832, "step": 7345 }, { "epoch": 1.8151717321472696, "grad_norm": 1.32636131069222, "learning_rate": 4.646239404088315e-07, "loss": 0.22725394368171692, "step": 7346 }, { "epoch": 1.8154188287620459, "grad_norm": 1.3387327710357066, "learning_rate": 4.6339378273530076e-07, "loss": 0.2104244828224182, "step": 7347 }, { "epoch": 1.8156659253768224, "grad_norm": 1.4421682963637605, "learning_rate": 4.6216521710137574e-07, "loss": 0.26888370513916016, "step": 7348 }, { "epoch": 1.8159130219915989, "grad_norm": 1.5583654315207722, "learning_rate": 4.6093824371215346e-07, "loss": 0.30123838782310486, "step": 7349 }, { "epoch": 1.816160118606375, "grad_norm": 1.444499824636036, "learning_rate": 4.597128627724667e-07, "loss": 0.2565641701221466, "step": 7350 }, { "epoch": 1.8164072152211514, "grad_norm": 1.4499788162840437, "learning_rate": 4.584890744868764e-07, "loss": 0.2513290047645569, "step": 7351 }, { "epoch": 1.816654311835928, "grad_norm": 1.3635120906294413, "learning_rate": 4.572668790596846e-07, "loss": 0.2523595988750458, "step": 7352 }, { "epoch": 1.8169014084507042, "grad_norm": 1.5802661026964773, "learning_rate": 4.560462766949225e-07, "loss": 0.32372456789016724, "step": 7353 }, { "epoch": 1.8171485050654805, "grad_norm": 1.7511272310918535, "learning_rate": 4.548272675963594e-07, "loss": 0.2772981524467468, "step": 7354 }, { "epoch": 1.817395601680257, "grad_norm": 1.6371197824922201, "learning_rate": 4.5360985196749584e-07, "loss": 0.28623470664024353, "step": 7355 }, { "epoch": 1.8176426982950333, "grad_norm": 1.5100884020913892, "learning_rate": 4.523940300115659e-07, "loss": 0.2446061670780182, "step": 7356 }, { "epoch": 1.8178897949098096, "grad_norm": 1.2531602102613635, "learning_rate": 4.511798019315383e-07, "loss": 0.2008104771375656, "step": 7357 }, { "epoch": 1.8181368915245861, "grad_norm": 1.3902101158073428, "learning_rate": 4.499671679301176e-07, "loss": 0.2209494411945343, "step": 7358 }, { "epoch": 1.8183839881393626, "grad_norm": 1.2911557597970782, "learning_rate": 4.487561282097408e-07, "loss": 0.23360934853553772, "step": 7359 }, { "epoch": 1.8186310847541387, "grad_norm": 1.4518551703775475, "learning_rate": 4.475466829725783e-07, "loss": 0.2574084997177124, "step": 7360 }, { "epoch": 1.8188781813689152, "grad_norm": 1.3545975697309567, "learning_rate": 4.4633883242053424e-07, "loss": 0.26705771684646606, "step": 7361 }, { "epoch": 1.8191252779836917, "grad_norm": 1.4741068279840515, "learning_rate": 4.451325767552461e-07, "loss": 0.27262699604034424, "step": 7362 }, { "epoch": 1.819372374598468, "grad_norm": 1.3757972007411132, "learning_rate": 4.439279161780874e-07, "loss": 0.21912652254104614, "step": 7363 }, { "epoch": 1.8196194712132443, "grad_norm": 1.546531373563033, "learning_rate": 4.4272485089016604e-07, "loss": 0.25081464648246765, "step": 7364 }, { "epoch": 1.8198665678280208, "grad_norm": 1.5292138580250094, "learning_rate": 4.4152338109231694e-07, "loss": 0.3021796941757202, "step": 7365 }, { "epoch": 1.820113664442797, "grad_norm": 2.009266160271486, "learning_rate": 4.403235069851164e-07, "loss": 0.2566278576850891, "step": 7366 }, { "epoch": 1.8203607610575734, "grad_norm": 1.5247400744125656, "learning_rate": 4.3912522876887186e-07, "loss": 0.26249027252197266, "step": 7367 }, { "epoch": 1.82060785767235, "grad_norm": 1.7598469279741387, "learning_rate": 4.379285466436212e-07, "loss": 0.3256896138191223, "step": 7368 }, { "epoch": 1.8208549542871264, "grad_norm": 1.3306512766121616, "learning_rate": 4.3673346080913894e-07, "loss": 0.23144933581352234, "step": 7369 }, { "epoch": 1.8211020509019027, "grad_norm": 1.613946250509877, "learning_rate": 4.355399714649333e-07, "loss": 0.2828035056591034, "step": 7370 }, { "epoch": 1.821349147516679, "grad_norm": 1.437645682948781, "learning_rate": 4.3434807881024497e-07, "loss": 0.2400469183921814, "step": 7371 }, { "epoch": 1.8215962441314555, "grad_norm": 1.6103321254317389, "learning_rate": 4.331577830440481e-07, "loss": 0.26464977860450745, "step": 7372 }, { "epoch": 1.8218433407462318, "grad_norm": 1.5935724658110153, "learning_rate": 4.319690843650526e-07, "loss": 0.2618095278739929, "step": 7373 }, { "epoch": 1.822090437361008, "grad_norm": 1.4857245422874776, "learning_rate": 4.307819829716975e-07, "loss": 0.27414003014564514, "step": 7374 }, { "epoch": 1.8223375339757846, "grad_norm": 1.5160776238616687, "learning_rate": 4.295964790621565e-07, "loss": 0.3121229112148285, "step": 7375 }, { "epoch": 1.8225846305905609, "grad_norm": 1.4330010562703157, "learning_rate": 4.28412572834338e-07, "loss": 0.2478843331336975, "step": 7376 }, { "epoch": 1.8228317272053371, "grad_norm": 1.4629125948636956, "learning_rate": 4.27230264485885e-07, "loss": 0.28256574273109436, "step": 7377 }, { "epoch": 1.8230788238201137, "grad_norm": 1.6132640758631227, "learning_rate": 4.260495542141696e-07, "loss": 0.30007314682006836, "step": 7378 }, { "epoch": 1.8233259204348902, "grad_norm": 1.4953506559005405, "learning_rate": 4.248704422163008e-07, "loss": 0.28648531436920166, "step": 7379 }, { "epoch": 1.8235730170496665, "grad_norm": 1.4852855933954132, "learning_rate": 4.2369292868911894e-07, "loss": 0.27941471338272095, "step": 7380 }, { "epoch": 1.8238201136644427, "grad_norm": 1.457087804260546, "learning_rate": 4.2251701382919895e-07, "loss": 0.27098044753074646, "step": 7381 }, { "epoch": 1.8240672102792193, "grad_norm": 1.5068548900283072, "learning_rate": 4.213426978328461e-07, "loss": 0.3192778527736664, "step": 7382 }, { "epoch": 1.8243143068939955, "grad_norm": 1.5816217278960516, "learning_rate": 4.2016998089610126e-07, "loss": 0.3250621557235718, "step": 7383 }, { "epoch": 1.8245614035087718, "grad_norm": 1.777936795953273, "learning_rate": 4.189988632147368e-07, "loss": 0.3616752326488495, "step": 7384 }, { "epoch": 1.8248085001235483, "grad_norm": 1.4777294277440696, "learning_rate": 4.178293449842596e-07, "loss": 0.27332088351249695, "step": 7385 }, { "epoch": 1.8250555967383248, "grad_norm": 1.6178504458476632, "learning_rate": 4.1666142639991137e-07, "loss": 0.2842009365558624, "step": 7386 }, { "epoch": 1.825302693353101, "grad_norm": 1.3061980712623302, "learning_rate": 4.1549510765666046e-07, "loss": 0.24376952648162842, "step": 7387 }, { "epoch": 1.8255497899678774, "grad_norm": 1.4618170511462512, "learning_rate": 4.143303889492145e-07, "loss": 0.27553778886795044, "step": 7388 }, { "epoch": 1.825796886582654, "grad_norm": 1.3815574187248152, "learning_rate": 4.131672704720091e-07, "loss": 0.254711389541626, "step": 7389 }, { "epoch": 1.8260439831974302, "grad_norm": 1.375530122798949, "learning_rate": 4.120057524192156e-07, "loss": 0.2691386342048645, "step": 7390 }, { "epoch": 1.8262910798122065, "grad_norm": 1.4106168427402117, "learning_rate": 4.1084583498474105e-07, "loss": 0.2572598457336426, "step": 7391 }, { "epoch": 1.826538176426983, "grad_norm": 1.3523195851155283, "learning_rate": 4.0968751836221734e-07, "loss": 0.2621334195137024, "step": 7392 }, { "epoch": 1.8267852730417593, "grad_norm": 1.5656722846917337, "learning_rate": 4.085308027450152e-07, "loss": 0.2820843458175659, "step": 7393 }, { "epoch": 1.8270323696565356, "grad_norm": 1.4648492625515088, "learning_rate": 4.07375688326237e-07, "loss": 0.2826763987541199, "step": 7394 }, { "epoch": 1.827279466271312, "grad_norm": 1.4197306781873742, "learning_rate": 4.0622217529871945e-07, "loss": 0.2765527665615082, "step": 7395 }, { "epoch": 1.8275265628860886, "grad_norm": 1.4319540397361894, "learning_rate": 4.0507026385502747e-07, "loss": 0.23999950289726257, "step": 7396 }, { "epoch": 1.8277736595008647, "grad_norm": 1.2988125454026056, "learning_rate": 4.0391995418745943e-07, "loss": 0.2464100420475006, "step": 7397 }, { "epoch": 1.8280207561156412, "grad_norm": 1.3530737524542165, "learning_rate": 4.0277124648804955e-07, "loss": 0.2389240264892578, "step": 7398 }, { "epoch": 1.8282678527304177, "grad_norm": 1.519578427332818, "learning_rate": 4.016241409485644e-07, "loss": 0.27958518266677856, "step": 7399 }, { "epoch": 1.828514949345194, "grad_norm": 1.382575232847148, "learning_rate": 4.0047863776049967e-07, "loss": 0.24659433960914612, "step": 7400 }, { "epoch": 1.8287620459599703, "grad_norm": 1.437172845790845, "learning_rate": 3.993347371150857e-07, "loss": 0.27832919359207153, "step": 7401 }, { "epoch": 1.8290091425747468, "grad_norm": 1.618892112485944, "learning_rate": 3.9819243920328764e-07, "loss": 0.3324907422065735, "step": 7402 }, { "epoch": 1.829256239189523, "grad_norm": 1.4479151228513356, "learning_rate": 3.970517442157962e-07, "loss": 0.2910301685333252, "step": 7403 }, { "epoch": 1.8295033358042994, "grad_norm": 1.566693464223357, "learning_rate": 3.959126523430401e-07, "loss": 0.3182874619960785, "step": 7404 }, { "epoch": 1.8297504324190759, "grad_norm": 1.4984398539939052, "learning_rate": 3.9477516377518287e-07, "loss": 0.2627241909503937, "step": 7405 }, { "epoch": 1.8299975290338524, "grad_norm": 1.4101908722090464, "learning_rate": 3.936392787021115e-07, "loss": 0.2514804005622864, "step": 7406 }, { "epoch": 1.8302446256486287, "grad_norm": 1.566984091399706, "learning_rate": 3.9250499731345314e-07, "loss": 0.3081667423248291, "step": 7407 }, { "epoch": 1.830491722263405, "grad_norm": 1.3373485658763153, "learning_rate": 3.9137231979856417e-07, "loss": 0.22018052637577057, "step": 7408 }, { "epoch": 1.8307388188781815, "grad_norm": 1.4681934498072058, "learning_rate": 3.9024124634653436e-07, "loss": 0.25727900862693787, "step": 7409 }, { "epoch": 1.8309859154929577, "grad_norm": 1.3748134463374788, "learning_rate": 3.89111777146185e-07, "loss": 0.2725503444671631, "step": 7410 }, { "epoch": 1.831233012107734, "grad_norm": 1.5767082872932279, "learning_rate": 3.8798391238606736e-07, "loss": 0.26968055963516235, "step": 7411 }, { "epoch": 1.8314801087225105, "grad_norm": 1.4639542083770487, "learning_rate": 3.8685765225446867e-07, "loss": 0.2620260417461395, "step": 7412 }, { "epoch": 1.8317272053372868, "grad_norm": 1.2680207885590236, "learning_rate": 3.8573299693940635e-07, "loss": 0.22086164355278015, "step": 7413 }, { "epoch": 1.8319743019520631, "grad_norm": 1.5663183150905744, "learning_rate": 3.846099466286313e-07, "loss": 0.30363720655441284, "step": 7414 }, { "epoch": 1.8322213985668396, "grad_norm": 1.3290970504500323, "learning_rate": 3.8348850150962237e-07, "loss": 0.27210986614227295, "step": 7415 }, { "epoch": 1.8324684951816161, "grad_norm": 1.3942323658762183, "learning_rate": 3.8236866176959763e-07, "loss": 0.2584017515182495, "step": 7416 }, { "epoch": 1.8327155917963924, "grad_norm": 1.530192247301346, "learning_rate": 3.812504275954987e-07, "loss": 0.29506564140319824, "step": 7417 }, { "epoch": 1.8329626884111687, "grad_norm": 1.4154591507039758, "learning_rate": 3.8013379917400505e-07, "loss": 0.24102783203125, "step": 7418 }, { "epoch": 1.8332097850259452, "grad_norm": 1.4367502709689626, "learning_rate": 3.7901877669152873e-07, "loss": 0.2861971855163574, "step": 7419 }, { "epoch": 1.8334568816407215, "grad_norm": 1.5645325061542565, "learning_rate": 3.779053603342087e-07, "loss": 0.2614057660102844, "step": 7420 }, { "epoch": 1.8337039782554978, "grad_norm": 1.5520483211327893, "learning_rate": 3.767935502879183e-07, "loss": 0.2737712562084198, "step": 7421 }, { "epoch": 1.8339510748702743, "grad_norm": 1.600916701150999, "learning_rate": 3.756833467382648e-07, "loss": 0.2772451639175415, "step": 7422 }, { "epoch": 1.8341981714850508, "grad_norm": 1.4151984885430369, "learning_rate": 3.745747498705854e-07, "loss": 0.26693516969680786, "step": 7423 }, { "epoch": 1.8344452680998269, "grad_norm": 1.4431218302245594, "learning_rate": 3.7346775986994767e-07, "loss": 0.2774654030799866, "step": 7424 }, { "epoch": 1.8346923647146034, "grad_norm": 1.5858226222085674, "learning_rate": 3.7236237692115264e-07, "loss": 0.28055962920188904, "step": 7425 }, { "epoch": 1.83493946132938, "grad_norm": 1.4011038917667298, "learning_rate": 3.712586012087327e-07, "loss": 0.2642943859100342, "step": 7426 }, { "epoch": 1.8351865579441562, "grad_norm": 1.4962577354984972, "learning_rate": 3.701564329169527e-07, "loss": 0.23825642466545105, "step": 7427 }, { "epoch": 1.8354336545589325, "grad_norm": 1.5033831247313054, "learning_rate": 3.690558722298088e-07, "loss": 0.2887841761112213, "step": 7428 }, { "epoch": 1.835680751173709, "grad_norm": 1.5443376795742916, "learning_rate": 3.6795691933102727e-07, "loss": 0.27292701601982117, "step": 7429 }, { "epoch": 1.8359278477884853, "grad_norm": 1.3323972826079704, "learning_rate": 3.6685957440406815e-07, "loss": 0.25949692726135254, "step": 7430 }, { "epoch": 1.8361749444032616, "grad_norm": 1.4810602446673622, "learning_rate": 3.657638376321204e-07, "loss": 0.27110713720321655, "step": 7431 }, { "epoch": 1.836422041018038, "grad_norm": 1.7455994760620377, "learning_rate": 3.646697091981066e-07, "loss": 0.32695215940475464, "step": 7432 }, { "epoch": 1.8366691376328146, "grad_norm": 1.3484654715642899, "learning_rate": 3.63577189284684e-07, "loss": 0.24168819189071655, "step": 7433 }, { "epoch": 1.8369162342475907, "grad_norm": 1.348760355499514, "learning_rate": 3.624862780742322e-07, "loss": 0.22911709547042847, "step": 7434 }, { "epoch": 1.8371633308623672, "grad_norm": 1.3325632079145548, "learning_rate": 3.613969757488711e-07, "loss": 0.22298628091812134, "step": 7435 }, { "epoch": 1.8374104274771437, "grad_norm": 1.6877406969278959, "learning_rate": 3.603092824904486e-07, "loss": 0.2606860399246216, "step": 7436 }, { "epoch": 1.83765752409192, "grad_norm": 1.3322424429148756, "learning_rate": 3.5922319848054166e-07, "loss": 0.24093477427959442, "step": 7437 }, { "epoch": 1.8379046207066962, "grad_norm": 1.3344832651725542, "learning_rate": 3.5813872390046524e-07, "loss": 0.23417755961418152, "step": 7438 }, { "epoch": 1.8381517173214728, "grad_norm": 1.7377310715913055, "learning_rate": 3.570558589312567e-07, "loss": 0.3329102396965027, "step": 7439 }, { "epoch": 1.838398813936249, "grad_norm": 1.5493122895841476, "learning_rate": 3.559746037536915e-07, "loss": 0.2511061429977417, "step": 7440 }, { "epoch": 1.8386459105510253, "grad_norm": 1.5879215442087877, "learning_rate": 3.5489495854827417e-07, "loss": 0.31843453645706177, "step": 7441 }, { "epoch": 1.8388930071658018, "grad_norm": 1.4290308183190428, "learning_rate": 3.5381692349524157e-07, "loss": 0.2299453467130661, "step": 7442 }, { "epoch": 1.8391401037805784, "grad_norm": 1.5295754301557016, "learning_rate": 3.527404987745597e-07, "loss": 0.30195170640945435, "step": 7443 }, { "epoch": 1.8393872003953546, "grad_norm": 1.3500237086068925, "learning_rate": 3.516656845659261e-07, "loss": 0.26613834500312805, "step": 7444 }, { "epoch": 1.839634297010131, "grad_norm": 1.5026237252526686, "learning_rate": 3.505924810487693e-07, "loss": 0.3277893662452698, "step": 7445 }, { "epoch": 1.8398813936249074, "grad_norm": 1.393591198778198, "learning_rate": 3.4952088840225273e-07, "loss": 0.25427448749542236, "step": 7446 }, { "epoch": 1.8401284902396837, "grad_norm": 1.6204574262145306, "learning_rate": 3.4845090680526663e-07, "loss": 0.2855403423309326, "step": 7447 }, { "epoch": 1.84037558685446, "grad_norm": 1.3787929435036634, "learning_rate": 3.4738253643643136e-07, "loss": 0.2655267119407654, "step": 7448 }, { "epoch": 1.8406226834692365, "grad_norm": 1.4196761064538554, "learning_rate": 3.4631577747410327e-07, "loss": 0.22764861583709717, "step": 7449 }, { "epoch": 1.8408697800840128, "grad_norm": 1.5496101361870653, "learning_rate": 3.4525063009636763e-07, "loss": 0.2990570366382599, "step": 7450 }, { "epoch": 1.841116876698789, "grad_norm": 1.3860621243559341, "learning_rate": 3.441870944810366e-07, "loss": 0.29463449120521545, "step": 7451 }, { "epoch": 1.8413639733135656, "grad_norm": 1.5879903999543734, "learning_rate": 3.4312517080565934e-07, "loss": 0.2889271676540375, "step": 7452 }, { "epoch": 1.8416110699283421, "grad_norm": 1.3592097592429746, "learning_rate": 3.4206485924751174e-07, "loss": 0.2640455961227417, "step": 7453 }, { "epoch": 1.8418581665431184, "grad_norm": 1.3784372789197734, "learning_rate": 3.410061599836023e-07, "loss": 0.25641512870788574, "step": 7454 }, { "epoch": 1.8421052631578947, "grad_norm": 1.3645188835586803, "learning_rate": 3.399490731906707e-07, "loss": 0.2587471604347229, "step": 7455 }, { "epoch": 1.8423523597726712, "grad_norm": 1.430784880881471, "learning_rate": 3.3889359904518695e-07, "loss": 0.2842384874820709, "step": 7456 }, { "epoch": 1.8425994563874475, "grad_norm": 1.6960136355342985, "learning_rate": 3.378397377233522e-07, "loss": 0.265235960483551, "step": 7457 }, { "epoch": 1.8428465530022238, "grad_norm": 1.5064164857175215, "learning_rate": 3.3678748940109587e-07, "loss": 0.25897538661956787, "step": 7458 }, { "epoch": 1.8430936496170003, "grad_norm": 1.595793813334338, "learning_rate": 3.3573685425408176e-07, "loss": 0.28945690393447876, "step": 7459 }, { "epoch": 1.8433407462317766, "grad_norm": 1.4590907107739866, "learning_rate": 3.346878324577019e-07, "loss": 0.29884397983551025, "step": 7460 }, { "epoch": 1.8435878428465529, "grad_norm": 1.4111023931462043, "learning_rate": 3.3364042418708053e-07, "loss": 0.2647027373313904, "step": 7461 }, { "epoch": 1.8438349394613294, "grad_norm": 1.6150425643124222, "learning_rate": 3.3259462961707234e-07, "loss": 0.2774980664253235, "step": 7462 }, { "epoch": 1.8440820360761059, "grad_norm": 1.522600428589784, "learning_rate": 3.3155044892225986e-07, "loss": 0.28330302238464355, "step": 7463 }, { "epoch": 1.8443291326908822, "grad_norm": 3.9066330642556912, "learning_rate": 3.305078822769614e-07, "loss": 0.2674205005168915, "step": 7464 }, { "epoch": 1.8445762293056585, "grad_norm": 1.452775536294623, "learning_rate": 3.294669298552222e-07, "loss": 0.24205216765403748, "step": 7465 }, { "epoch": 1.844823325920435, "grad_norm": 1.6498222767347226, "learning_rate": 3.2842759183081553e-07, "loss": 0.29402002692222595, "step": 7466 }, { "epoch": 1.8450704225352113, "grad_norm": 1.563125802422105, "learning_rate": 3.273898683772514e-07, "loss": 0.32920411229133606, "step": 7467 }, { "epoch": 1.8453175191499875, "grad_norm": 1.4857924941092862, "learning_rate": 3.263537596677668e-07, "loss": 0.3011128902435303, "step": 7468 }, { "epoch": 1.845564615764764, "grad_norm": 1.4474317713303635, "learning_rate": 3.2531926587533013e-07, "loss": 0.2719927728176117, "step": 7469 }, { "epoch": 1.8458117123795406, "grad_norm": 1.6555522242178324, "learning_rate": 3.242863871726376e-07, "loss": 0.3149169683456421, "step": 7470 }, { "epoch": 1.8460588089943166, "grad_norm": 1.537534510957421, "learning_rate": 3.232551237321213e-07, "loss": 0.3185245394706726, "step": 7471 }, { "epoch": 1.8463059056090931, "grad_norm": 1.348196058484984, "learning_rate": 3.2222547572593576e-07, "loss": 0.23788708448410034, "step": 7472 }, { "epoch": 1.8465530022238696, "grad_norm": 1.5199218581457883, "learning_rate": 3.2119744332597235e-07, "loss": 0.30474650859832764, "step": 7473 }, { "epoch": 1.846800098838646, "grad_norm": 1.4717569483235249, "learning_rate": 3.2017102670385156e-07, "loss": 0.29205742478370667, "step": 7474 }, { "epoch": 1.8470471954534222, "grad_norm": 1.5847472891890984, "learning_rate": 3.1914622603092196e-07, "loss": 0.2848324775695801, "step": 7475 }, { "epoch": 1.8472942920681987, "grad_norm": 1.3170810579988455, "learning_rate": 3.1812304147826324e-07, "loss": 0.21635335683822632, "step": 7476 }, { "epoch": 1.847541388682975, "grad_norm": 1.5781750045698, "learning_rate": 3.171014732166877e-07, "loss": 0.3094272315502167, "step": 7477 }, { "epoch": 1.8477884852977513, "grad_norm": 1.4671391216736325, "learning_rate": 3.160815214167345e-07, "loss": 0.28406256437301636, "step": 7478 }, { "epoch": 1.8480355819125278, "grad_norm": 1.493323596688216, "learning_rate": 3.1506318624867525e-07, "loss": 0.26511648297309875, "step": 7479 }, { "epoch": 1.8482826785273043, "grad_norm": 1.6143122686683753, "learning_rate": 3.1404646788250836e-07, "loss": 0.24862152338027954, "step": 7480 }, { "epoch": 1.8485297751420804, "grad_norm": 1.4621998874267257, "learning_rate": 3.1303136648796586e-07, "loss": 0.292457640171051, "step": 7481 }, { "epoch": 1.848776871756857, "grad_norm": 1.3499771499187025, "learning_rate": 3.120178822345088e-07, "loss": 0.2773153781890869, "step": 7482 }, { "epoch": 1.8490239683716334, "grad_norm": 1.3873142321197254, "learning_rate": 3.110060152913286e-07, "loss": 0.26657938957214355, "step": 7483 }, { "epoch": 1.8492710649864097, "grad_norm": 1.3237947941195012, "learning_rate": 3.0999576582734556e-07, "loss": 0.24720758199691772, "step": 7484 }, { "epoch": 1.849518161601186, "grad_norm": 1.4188831252590477, "learning_rate": 3.089871340112105e-07, "loss": 0.2564849853515625, "step": 7485 }, { "epoch": 1.8497652582159625, "grad_norm": 1.3838843611394072, "learning_rate": 3.079801200113042e-07, "loss": 0.27097901701927185, "step": 7486 }, { "epoch": 1.8500123548307388, "grad_norm": 1.436111829254444, "learning_rate": 3.0697472399573766e-07, "loss": 0.2905351519584656, "step": 7487 }, { "epoch": 1.850259451445515, "grad_norm": 1.6580232771917025, "learning_rate": 3.0597094613235236e-07, "loss": 0.29865795373916626, "step": 7488 }, { "epoch": 1.8505065480602916, "grad_norm": 1.321415165064949, "learning_rate": 3.0496878658871744e-07, "loss": 0.2075897455215454, "step": 7489 }, { "epoch": 1.850753644675068, "grad_norm": 1.6964840527138674, "learning_rate": 3.0396824553213244e-07, "loss": 0.3175673484802246, "step": 7490 }, { "epoch": 1.8510007412898444, "grad_norm": 1.608812908321754, "learning_rate": 3.0296932312963045e-07, "loss": 0.3065110445022583, "step": 7491 }, { "epoch": 1.8512478379046207, "grad_norm": 1.4678334751049358, "learning_rate": 3.019720195479703e-07, "loss": 0.2600344717502594, "step": 7492 }, { "epoch": 1.8514949345193972, "grad_norm": 1.3373661245807844, "learning_rate": 3.0097633495364097e-07, "loss": 0.23082825541496277, "step": 7493 }, { "epoch": 1.8517420311341735, "grad_norm": 1.383260563150785, "learning_rate": 2.999822695128618e-07, "loss": 0.2540821433067322, "step": 7494 }, { "epoch": 1.8519891277489497, "grad_norm": 1.537635145382784, "learning_rate": 2.989898233915822e-07, "loss": 0.2793530821800232, "step": 7495 }, { "epoch": 1.8522362243637263, "grad_norm": 1.2958845969253288, "learning_rate": 2.979989967554808e-07, "loss": 0.17904067039489746, "step": 7496 }, { "epoch": 1.8524833209785025, "grad_norm": 1.4745191077830762, "learning_rate": 2.970097897699675e-07, "loss": 0.24182000756263733, "step": 7497 }, { "epoch": 1.8527304175932788, "grad_norm": 1.5190581147099662, "learning_rate": 2.960222026001791e-07, "loss": 0.2669864594936371, "step": 7498 }, { "epoch": 1.8529775142080553, "grad_norm": 1.5490911244196228, "learning_rate": 2.950362354109826e-07, "loss": 0.25418779253959656, "step": 7499 }, { "epoch": 1.8532246108228319, "grad_norm": 1.6242377153445442, "learning_rate": 2.9405188836697515e-07, "loss": 0.31334826350212097, "step": 7500 }, { "epoch": 1.8534717074376081, "grad_norm": 1.6583175203663374, "learning_rate": 2.930691616324854e-07, "loss": 0.29721999168395996, "step": 7501 }, { "epoch": 1.8537188040523844, "grad_norm": 1.3852093878833311, "learning_rate": 2.920880553715699e-07, "loss": 0.26790541410446167, "step": 7502 }, { "epoch": 1.853965900667161, "grad_norm": 1.4186448179986015, "learning_rate": 2.9110856974801095e-07, "loss": 0.22842714190483093, "step": 7503 }, { "epoch": 1.8542129972819372, "grad_norm": 1.4303316918216862, "learning_rate": 2.9013070492532656e-07, "loss": 0.28604960441589355, "step": 7504 }, { "epoch": 1.8544600938967135, "grad_norm": 1.2596441773437796, "learning_rate": 2.891544610667607e-07, "loss": 0.2625197172164917, "step": 7505 }, { "epoch": 1.85470719051149, "grad_norm": 1.523671891152255, "learning_rate": 2.8817983833528626e-07, "loss": 0.25765472650527954, "step": 7506 }, { "epoch": 1.8549542871262665, "grad_norm": 1.430328115821207, "learning_rate": 2.8720683689360986e-07, "loss": 0.28379976749420166, "step": 7507 }, { "epoch": 1.8552013837410426, "grad_norm": 1.4727915672258411, "learning_rate": 2.862354569041603e-07, "loss": 0.28145819902420044, "step": 7508 }, { "epoch": 1.855448480355819, "grad_norm": 1.3329387512896913, "learning_rate": 2.8526569852910027e-07, "loss": 0.2876074016094208, "step": 7509 }, { "epoch": 1.8556955769705956, "grad_norm": 1.5231497903691333, "learning_rate": 2.842975619303234e-07, "loss": 0.31938695907592773, "step": 7510 }, { "epoch": 1.855942673585372, "grad_norm": 1.506961514985738, "learning_rate": 2.833310472694495e-07, "loss": 0.27701082825660706, "step": 7511 }, { "epoch": 1.8561897702001482, "grad_norm": 1.3915101549371276, "learning_rate": 2.8236615470782825e-07, "loss": 0.26446330547332764, "step": 7512 }, { "epoch": 1.8564368668149247, "grad_norm": 1.2660903274011834, "learning_rate": 2.814028844065364e-07, "loss": 0.22602255642414093, "step": 7513 }, { "epoch": 1.856683963429701, "grad_norm": 1.8092862648054795, "learning_rate": 2.804412365263842e-07, "loss": 0.21488907933235168, "step": 7514 }, { "epoch": 1.8569310600444773, "grad_norm": 1.5424980852014694, "learning_rate": 2.7948121122790995e-07, "loss": 0.28941652178764343, "step": 7515 }, { "epoch": 1.8571781566592538, "grad_norm": 1.5273787877931893, "learning_rate": 2.785228086713787e-07, "loss": 0.2795920670032501, "step": 7516 }, { "epoch": 1.8574252532740303, "grad_norm": 1.42668131840218, "learning_rate": 2.775660290167859e-07, "loss": 0.29538002610206604, "step": 7517 }, { "epoch": 1.8576723498888064, "grad_norm": 1.4392585757257728, "learning_rate": 2.766108724238559e-07, "loss": 0.2919466495513916, "step": 7518 }, { "epoch": 1.8579194465035829, "grad_norm": 1.6018516739521063, "learning_rate": 2.756573390520445e-07, "loss": 0.32244205474853516, "step": 7519 }, { "epoch": 1.8581665431183594, "grad_norm": 1.4208993587614518, "learning_rate": 2.747054290605322e-07, "loss": 0.24253326654434204, "step": 7520 }, { "epoch": 1.8584136397331357, "grad_norm": 1.5611009393392115, "learning_rate": 2.73755142608233e-07, "loss": 0.3085668087005615, "step": 7521 }, { "epoch": 1.858660736347912, "grad_norm": 2.505507980716684, "learning_rate": 2.728064798537844e-07, "loss": 0.27205953001976013, "step": 7522 }, { "epoch": 1.8589078329626885, "grad_norm": 1.4598063193319117, "learning_rate": 2.718594409555575e-07, "loss": 0.29534250497817993, "step": 7523 }, { "epoch": 1.8591549295774648, "grad_norm": 1.4343765312725323, "learning_rate": 2.709140260716525e-07, "loss": 0.27850672602653503, "step": 7524 }, { "epoch": 1.859402026192241, "grad_norm": 1.3823012463165776, "learning_rate": 2.6997023535989544e-07, "loss": 0.23831512033939362, "step": 7525 }, { "epoch": 1.8596491228070176, "grad_norm": 1.5040674680893091, "learning_rate": 2.690280689778424e-07, "loss": 0.3296036422252655, "step": 7526 }, { "epoch": 1.859896219421794, "grad_norm": 1.4436387280986456, "learning_rate": 2.680875270827776e-07, "loss": 0.2888143062591553, "step": 7527 }, { "epoch": 1.8601433160365703, "grad_norm": 1.4833545673863096, "learning_rate": 2.671486098317155e-07, "loss": 0.275798499584198, "step": 7528 }, { "epoch": 1.8603904126513466, "grad_norm": 1.442640664428109, "learning_rate": 2.6621131738140074e-07, "loss": 0.2740054726600647, "step": 7529 }, { "epoch": 1.8606375092661231, "grad_norm": 1.3064599807642685, "learning_rate": 2.6527564988830357e-07, "loss": 0.24403059482574463, "step": 7530 }, { "epoch": 1.8608846058808994, "grad_norm": 1.4917307176944912, "learning_rate": 2.643416075086225e-07, "loss": 0.22545981407165527, "step": 7531 }, { "epoch": 1.8611317024956757, "grad_norm": 1.5646599371703123, "learning_rate": 2.6340919039828826e-07, "loss": 0.3072686195373535, "step": 7532 }, { "epoch": 1.8613787991104522, "grad_norm": 1.533101375484395, "learning_rate": 2.6247839871295977e-07, "loss": 0.272394061088562, "step": 7533 }, { "epoch": 1.8616258957252285, "grad_norm": 1.5445862007782531, "learning_rate": 2.6154923260801934e-07, "loss": 0.2913052439689636, "step": 7534 }, { "epoch": 1.8618729923400048, "grad_norm": 1.3266147487669364, "learning_rate": 2.6062169223858516e-07, "loss": 0.2550148367881775, "step": 7535 }, { "epoch": 1.8621200889547813, "grad_norm": 1.5057876208945131, "learning_rate": 2.596957777594988e-07, "loss": 0.2662245035171509, "step": 7536 }, { "epoch": 1.8623671855695578, "grad_norm": 1.3913132104651154, "learning_rate": 2.587714893253335e-07, "loss": 0.24315503239631653, "step": 7537 }, { "epoch": 1.8626142821843341, "grad_norm": 1.263872223423778, "learning_rate": 2.5784882709038893e-07, "loss": 0.22198417782783508, "step": 7538 }, { "epoch": 1.8628613787991104, "grad_norm": 1.5907459953237475, "learning_rate": 2.5692779120869536e-07, "loss": 0.2932811975479126, "step": 7539 }, { "epoch": 1.863108475413887, "grad_norm": 1.540305712575099, "learning_rate": 2.560083818340098e-07, "loss": 0.25172707438468933, "step": 7540 }, { "epoch": 1.8633555720286632, "grad_norm": 1.401435522602949, "learning_rate": 2.5509059911981714e-07, "loss": 0.2195184975862503, "step": 7541 }, { "epoch": 1.8636026686434395, "grad_norm": 1.5768584848571907, "learning_rate": 2.541744432193338e-07, "loss": 0.33802706003189087, "step": 7542 }, { "epoch": 1.863849765258216, "grad_norm": 1.4920038051272144, "learning_rate": 2.532599142855008e-07, "loss": 0.24914000928401947, "step": 7543 }, { "epoch": 1.8640968618729925, "grad_norm": 1.3883777099845256, "learning_rate": 2.5234701247099147e-07, "loss": 0.331355482339859, "step": 7544 }, { "epoch": 1.8643439584877686, "grad_norm": 1.5894651557112878, "learning_rate": 2.5143573792820287e-07, "loss": 0.2692689299583435, "step": 7545 }, { "epoch": 1.864591055102545, "grad_norm": 1.521239950178576, "learning_rate": 2.505260908092655e-07, "loss": 0.26282799243927, "step": 7546 }, { "epoch": 1.8648381517173216, "grad_norm": 1.6464010393646127, "learning_rate": 2.4961807126603566e-07, "loss": 0.3759229779243469, "step": 7547 }, { "epoch": 1.8650852483320979, "grad_norm": 1.336850431370865, "learning_rate": 2.4871167945009654e-07, "loss": 0.26555266976356506, "step": 7548 }, { "epoch": 1.8653323449468742, "grad_norm": 1.6801875194654774, "learning_rate": 2.478069155127627e-07, "loss": 0.3102770447731018, "step": 7549 }, { "epoch": 1.8655794415616507, "grad_norm": 1.4478967633217719, "learning_rate": 2.469037796050733e-07, "loss": 0.22406771779060364, "step": 7550 }, { "epoch": 1.865826538176427, "grad_norm": 1.3258235551258848, "learning_rate": 2.460022718777977e-07, "loss": 0.2506429851055145, "step": 7551 }, { "epoch": 1.8660736347912033, "grad_norm": 1.4232662196961932, "learning_rate": 2.451023924814355e-07, "loss": 0.26525992155075073, "step": 7552 }, { "epoch": 1.8663207314059798, "grad_norm": 1.5249557348145775, "learning_rate": 2.442041415662122e-07, "loss": 0.3024711310863495, "step": 7553 }, { "epoch": 1.8665678280207563, "grad_norm": 1.25107536036197, "learning_rate": 2.4330751928208106e-07, "loss": 0.2265186905860901, "step": 7554 }, { "epoch": 1.8668149246355323, "grad_norm": 1.6018164175682883, "learning_rate": 2.4241252577872245e-07, "loss": 0.3078402280807495, "step": 7555 }, { "epoch": 1.8670620212503088, "grad_norm": 1.447248897967084, "learning_rate": 2.415191612055479e-07, "loss": 0.2484641671180725, "step": 7556 }, { "epoch": 1.8673091178650854, "grad_norm": 1.3746918675750333, "learning_rate": 2.4062742571169697e-07, "loss": 0.28974127769470215, "step": 7557 }, { "epoch": 1.8675562144798616, "grad_norm": 1.5355586004686557, "learning_rate": 2.3973731944603285e-07, "loss": 0.2618272304534912, "step": 7558 }, { "epoch": 1.867803311094638, "grad_norm": 1.3735073645747022, "learning_rate": 2.3884884255715115e-07, "loss": 0.24171504378318787, "step": 7559 }, { "epoch": 1.8680504077094144, "grad_norm": 1.48822975465262, "learning_rate": 2.3796199519337538e-07, "loss": 0.2643190026283264, "step": 7560 }, { "epoch": 1.8682975043241907, "grad_norm": 1.3540985771294238, "learning_rate": 2.3707677750275493e-07, "loss": 0.2423945516347885, "step": 7561 }, { "epoch": 1.868544600938967, "grad_norm": 1.3395705223921466, "learning_rate": 2.3619318963306713e-07, "loss": 0.24498698115348816, "step": 7562 }, { "epoch": 1.8687916975537435, "grad_norm": 1.4881247894267313, "learning_rate": 2.3531123173181847e-07, "loss": 0.2708097994327545, "step": 7563 }, { "epoch": 1.86903879416852, "grad_norm": 1.4956442467456312, "learning_rate": 2.3443090394624225e-07, "loss": 0.2427428960800171, "step": 7564 }, { "epoch": 1.8692858907832963, "grad_norm": 1.4923842293259477, "learning_rate": 2.335522064233009e-07, "loss": 0.31430166959762573, "step": 7565 }, { "epoch": 1.8695329873980726, "grad_norm": 1.630203259833823, "learning_rate": 2.3267513930968378e-07, "loss": 0.2939034104347229, "step": 7566 }, { "epoch": 1.8697800840128491, "grad_norm": 1.5793854760043364, "learning_rate": 2.317997027518104e-07, "loss": 0.29203733801841736, "step": 7567 }, { "epoch": 1.8700271806276254, "grad_norm": 1.6229019359008348, "learning_rate": 2.3092589689582168e-07, "loss": 0.33349281549453735, "step": 7568 }, { "epoch": 1.8702742772424017, "grad_norm": 1.3279325794087962, "learning_rate": 2.3005372188759423e-07, "loss": 0.26640552282333374, "step": 7569 }, { "epoch": 1.8705213738571782, "grad_norm": 1.3018028872531802, "learning_rate": 2.2918317787272714e-07, "loss": 0.2733840346336365, "step": 7570 }, { "epoch": 1.8707684704719545, "grad_norm": 1.4153316483511624, "learning_rate": 2.2831426499655085e-07, "loss": 0.27534592151641846, "step": 7571 }, { "epoch": 1.8710155670867308, "grad_norm": 1.636560263838681, "learning_rate": 2.274469834041182e-07, "loss": 0.26986801624298096, "step": 7572 }, { "epoch": 1.8712626637015073, "grad_norm": 1.4087779276510195, "learning_rate": 2.265813332402167e-07, "loss": 0.2560684084892273, "step": 7573 }, { "epoch": 1.8715097603162838, "grad_norm": 1.3515602895202248, "learning_rate": 2.2571731464935521e-07, "loss": 0.25309091806411743, "step": 7574 }, { "epoch": 1.87175685693106, "grad_norm": 1.4852361385184814, "learning_rate": 2.248549277757739e-07, "loss": 0.28303176164627075, "step": 7575 }, { "epoch": 1.8720039535458364, "grad_norm": 1.4141466145988704, "learning_rate": 2.2399417276344094e-07, "loss": 0.23862817883491516, "step": 7576 }, { "epoch": 1.8722510501606129, "grad_norm": 1.6452076608056339, "learning_rate": 2.2313504975604693e-07, "loss": 0.2778530418872833, "step": 7577 }, { "epoch": 1.8724981467753892, "grad_norm": 1.3084759996136828, "learning_rate": 2.2227755889701608e-07, "loss": 0.2554117441177368, "step": 7578 }, { "epoch": 1.8727452433901655, "grad_norm": 1.4897054210098755, "learning_rate": 2.2142170032949828e-07, "loss": 0.2738204598426819, "step": 7579 }, { "epoch": 1.872992340004942, "grad_norm": 1.6404976869824595, "learning_rate": 2.2056747419637037e-07, "loss": 0.24137690663337708, "step": 7580 }, { "epoch": 1.8732394366197183, "grad_norm": 1.7416067704167488, "learning_rate": 2.1971488064023604e-07, "loss": 0.34793373942375183, "step": 7581 }, { "epoch": 1.8734865332344945, "grad_norm": 1.447611023412462, "learning_rate": 2.188639198034259e-07, "loss": 0.260681688785553, "step": 7582 }, { "epoch": 1.873733629849271, "grad_norm": 1.3882769099752745, "learning_rate": 2.180145918280019e-07, "loss": 0.25454896688461304, "step": 7583 }, { "epoch": 1.8739807264640476, "grad_norm": 1.4940940063383643, "learning_rate": 2.1716689685574833e-07, "loss": 0.30735334753990173, "step": 7584 }, { "epoch": 1.8742278230788239, "grad_norm": 1.311391906868163, "learning_rate": 2.163208350281809e-07, "loss": 0.2512474060058594, "step": 7585 }, { "epoch": 1.8744749196936001, "grad_norm": 1.4348197714443818, "learning_rate": 2.1547640648654e-07, "loss": 0.30640408396720886, "step": 7586 }, { "epoch": 1.8747220163083766, "grad_norm": 1.3743874995483316, "learning_rate": 2.1463361137179506e-07, "loss": 0.23887354135513306, "step": 7587 }, { "epoch": 1.874969112923153, "grad_norm": 1.63752204659565, "learning_rate": 2.1379244982464243e-07, "loss": 0.31478166580200195, "step": 7588 }, { "epoch": 1.8752162095379292, "grad_norm": 1.663454660817295, "learning_rate": 2.1295292198550533e-07, "loss": 0.28974002599716187, "step": 7589 }, { "epoch": 1.8754633061527057, "grad_norm": 1.6787571169713376, "learning_rate": 2.1211502799453387e-07, "loss": 0.3279544711112976, "step": 7590 }, { "epoch": 1.8757104027674822, "grad_norm": 1.5975013901094512, "learning_rate": 2.1127876799160508e-07, "loss": 0.2569606304168701, "step": 7591 }, { "epoch": 1.8759574993822583, "grad_norm": 1.685655436134548, "learning_rate": 2.1044414211632615e-07, "loss": 0.26044926047325134, "step": 7592 }, { "epoch": 1.8762045959970348, "grad_norm": 1.3947907083651465, "learning_rate": 2.0961115050802783e-07, "loss": 0.2555093765258789, "step": 7593 }, { "epoch": 1.8764516926118113, "grad_norm": 1.2692300426709655, "learning_rate": 2.0877979330577115e-07, "loss": 0.23901231586933136, "step": 7594 }, { "epoch": 1.8766987892265876, "grad_norm": 1.2977889725050855, "learning_rate": 2.0795007064834172e-07, "loss": 0.2573810815811157, "step": 7595 }, { "epoch": 1.876945885841364, "grad_norm": 1.46735586875998, "learning_rate": 2.071219826742521e-07, "loss": 0.2871108055114746, "step": 7596 }, { "epoch": 1.8771929824561404, "grad_norm": 1.5017592844149201, "learning_rate": 2.0629552952174503e-07, "loss": 0.2664934992790222, "step": 7597 }, { "epoch": 1.8774400790709167, "grad_norm": 1.4420792750719893, "learning_rate": 2.0547071132878792e-07, "loss": 0.2523168921470642, "step": 7598 }, { "epoch": 1.877687175685693, "grad_norm": 1.5297808500478374, "learning_rate": 2.0464752823307732e-07, "loss": 0.30794185400009155, "step": 7599 }, { "epoch": 1.8779342723004695, "grad_norm": 1.5039485647378255, "learning_rate": 2.0382598037203217e-07, "loss": 0.29153141379356384, "step": 7600 }, { "epoch": 1.878181368915246, "grad_norm": 1.485884329024612, "learning_rate": 2.0300606788280274e-07, "loss": 0.25254178047180176, "step": 7601 }, { "epoch": 1.878428465530022, "grad_norm": 1.6719391482613906, "learning_rate": 2.0218779090226737e-07, "loss": 0.34788978099823, "step": 7602 }, { "epoch": 1.8786755621447986, "grad_norm": 1.2906094536708497, "learning_rate": 2.0137114956702563e-07, "loss": 0.22955027222633362, "step": 7603 }, { "epoch": 1.878922658759575, "grad_norm": 1.634506295841494, "learning_rate": 2.0055614401340962e-07, "loss": 0.3211899399757385, "step": 7604 }, { "epoch": 1.8791697553743514, "grad_norm": 1.4393345841109255, "learning_rate": 1.9974277437747602e-07, "loss": 0.2714894413948059, "step": 7605 }, { "epoch": 1.8794168519891277, "grad_norm": 1.390464663367239, "learning_rate": 1.9893104079500735e-07, "loss": 0.2639182507991791, "step": 7606 }, { "epoch": 1.8796639486039042, "grad_norm": 1.4593114279744437, "learning_rate": 1.9812094340151634e-07, "loss": 0.25715750455856323, "step": 7607 }, { "epoch": 1.8799110452186805, "grad_norm": 1.4399291731707147, "learning_rate": 1.973124823322392e-07, "loss": 0.2693710923194885, "step": 7608 }, { "epoch": 1.8801581418334568, "grad_norm": 1.362175534838936, "learning_rate": 1.9650565772214135e-07, "loss": 0.26250672340393066, "step": 7609 }, { "epoch": 1.8804052384482333, "grad_norm": 1.426813045014591, "learning_rate": 1.9570046970591172e-07, "loss": 0.2866879105567932, "step": 7610 }, { "epoch": 1.8806523350630098, "grad_norm": 1.3590795753821578, "learning_rate": 1.9489691841797055e-07, "loss": 0.24722671508789062, "step": 7611 }, { "epoch": 1.880899431677786, "grad_norm": 1.2432905478060616, "learning_rate": 1.9409500399246163e-07, "loss": 0.2169295847415924, "step": 7612 }, { "epoch": 1.8811465282925623, "grad_norm": 1.369748311549114, "learning_rate": 1.9329472656325676e-07, "loss": 0.29316917061805725, "step": 7613 }, { "epoch": 1.8813936249073389, "grad_norm": 1.5410133833319701, "learning_rate": 1.9249608626395354e-07, "loss": 0.30604851245880127, "step": 7614 }, { "epoch": 1.8816407215221151, "grad_norm": 1.5715919723349112, "learning_rate": 1.9169908322787755e-07, "loss": 0.29759442806243896, "step": 7615 }, { "epoch": 1.8818878181368914, "grad_norm": 1.507465058272572, "learning_rate": 1.9090371758808013e-07, "loss": 0.28366321325302124, "step": 7616 }, { "epoch": 1.882134914751668, "grad_norm": 1.3017201061835046, "learning_rate": 1.901099894773395e-07, "loss": 0.27468961477279663, "step": 7617 }, { "epoch": 1.8823820113664442, "grad_norm": 1.3552500561429246, "learning_rate": 1.8931789902816078e-07, "loss": 0.23208259046077728, "step": 7618 }, { "epoch": 1.8826291079812205, "grad_norm": 1.5208181934337768, "learning_rate": 1.8852744637277376e-07, "loss": 0.280447781085968, "step": 7619 }, { "epoch": 1.882876204595997, "grad_norm": 1.6181594042505738, "learning_rate": 1.8773863164313843e-07, "loss": 0.24268481135368347, "step": 7620 }, { "epoch": 1.8831233012107735, "grad_norm": 1.5286931044586654, "learning_rate": 1.8695145497093946e-07, "loss": 0.32167255878448486, "step": 7621 }, { "epoch": 1.8833703978255498, "grad_norm": 1.6480669305737374, "learning_rate": 1.861659164875873e-07, "loss": 0.2550937235355377, "step": 7622 }, { "epoch": 1.8836174944403261, "grad_norm": 1.516476832502718, "learning_rate": 1.8538201632422038e-07, "loss": 0.2751178443431854, "step": 7623 }, { "epoch": 1.8838645910551026, "grad_norm": 1.5197525623909576, "learning_rate": 1.8459975461170065e-07, "loss": 0.27797698974609375, "step": 7624 }, { "epoch": 1.884111687669879, "grad_norm": 1.4603386914171463, "learning_rate": 1.8381913148062035e-07, "loss": 0.27856698632240295, "step": 7625 }, { "epoch": 1.8843587842846552, "grad_norm": 1.4198176502513618, "learning_rate": 1.8304014706129637e-07, "loss": 0.2943823039531708, "step": 7626 }, { "epoch": 1.8846058808994317, "grad_norm": 1.402925957005897, "learning_rate": 1.822628014837735e-07, "loss": 0.2684933543205261, "step": 7627 }, { "epoch": 1.8848529775142082, "grad_norm": 1.5254544087506798, "learning_rate": 1.8148709487782023e-07, "loss": 0.27386415004730225, "step": 7628 }, { "epoch": 1.8851000741289843, "grad_norm": 1.5506759337477862, "learning_rate": 1.8071302737293294e-07, "loss": 0.3056357204914093, "step": 7629 }, { "epoch": 1.8853471707437608, "grad_norm": 1.393287008578136, "learning_rate": 1.7994059909833495e-07, "loss": 0.28482580184936523, "step": 7630 }, { "epoch": 1.8855942673585373, "grad_norm": 1.535700432693499, "learning_rate": 1.791698101829753e-07, "loss": 0.28105953335762024, "step": 7631 }, { "epoch": 1.8858413639733136, "grad_norm": 1.323090346052398, "learning_rate": 1.7840066075552887e-07, "loss": 0.2666962742805481, "step": 7632 }, { "epoch": 1.8860884605880899, "grad_norm": 1.59641010416777, "learning_rate": 1.7763315094439737e-07, "loss": 0.32034486532211304, "step": 7633 }, { "epoch": 1.8863355572028664, "grad_norm": 1.2511861451067, "learning_rate": 1.7686728087770944e-07, "loss": 0.23532992601394653, "step": 7634 }, { "epoch": 1.8865826538176427, "grad_norm": 1.4004845371588126, "learning_rate": 1.7610305068331833e-07, "loss": 0.23702482879161835, "step": 7635 }, { "epoch": 1.886829750432419, "grad_norm": 1.418637152741255, "learning_rate": 1.7534046048880538e-07, "loss": 0.2532722055912018, "step": 7636 }, { "epoch": 1.8870768470471955, "grad_norm": 1.7762218485336854, "learning_rate": 1.7457951042147757e-07, "loss": 0.3023557662963867, "step": 7637 }, { "epoch": 1.887323943661972, "grad_norm": 1.6363054894306586, "learning_rate": 1.7382020060836669e-07, "loss": 0.31595277786254883, "step": 7638 }, { "epoch": 1.887571040276748, "grad_norm": 1.6032745002791196, "learning_rate": 1.7306253117623128e-07, "loss": 0.3030865490436554, "step": 7639 }, { "epoch": 1.8878181368915246, "grad_norm": 1.4895779870171368, "learning_rate": 1.72306502251558e-07, "loss": 0.3272688090801239, "step": 7640 }, { "epoch": 1.888065233506301, "grad_norm": 1.3626729580984291, "learning_rate": 1.7155211396055914e-07, "loss": 0.24254970252513885, "step": 7641 }, { "epoch": 1.8883123301210774, "grad_norm": 1.460412483922241, "learning_rate": 1.7079936642916962e-07, "loss": 0.2681581377983093, "step": 7642 }, { "epoch": 1.8885594267358536, "grad_norm": 1.4100541394898891, "learning_rate": 1.700482597830555e-07, "loss": 0.25675734877586365, "step": 7643 }, { "epoch": 1.8888065233506302, "grad_norm": 1.6006312066707373, "learning_rate": 1.692987941476032e-07, "loss": 0.34730154275894165, "step": 7644 }, { "epoch": 1.8890536199654064, "grad_norm": 1.5800945504131425, "learning_rate": 1.6855096964793262e-07, "loss": 0.27404356002807617, "step": 7645 }, { "epoch": 1.8893007165801827, "grad_norm": 1.3859545551299952, "learning_rate": 1.6780478640888054e-07, "loss": 0.27143245935440063, "step": 7646 }, { "epoch": 1.8895478131949592, "grad_norm": 1.4441071003637789, "learning_rate": 1.6706024455501847e-07, "loss": 0.2455027997493744, "step": 7647 }, { "epoch": 1.8897949098097357, "grad_norm": 1.5612289493539315, "learning_rate": 1.6631734421063917e-07, "loss": 0.2888326048851013, "step": 7648 }, { "epoch": 1.890042006424512, "grad_norm": 1.381738366594722, "learning_rate": 1.6557608549976235e-07, "loss": 0.27781763672828674, "step": 7649 }, { "epoch": 1.8902891030392883, "grad_norm": 1.438861680018603, "learning_rate": 1.6483646854613345e-07, "loss": 0.29460573196411133, "step": 7650 }, { "epoch": 1.8905361996540648, "grad_norm": 1.4721242785225548, "learning_rate": 1.640984934732226e-07, "loss": 0.27441781759262085, "step": 7651 }, { "epoch": 1.8907832962688411, "grad_norm": 1.5249358753779252, "learning_rate": 1.6336216040422903e-07, "loss": 0.26756349205970764, "step": 7652 }, { "epoch": 1.8910303928836174, "grad_norm": 1.5494877707381063, "learning_rate": 1.6262746946207553e-07, "loss": 0.2824179232120514, "step": 7653 }, { "epoch": 1.891277489498394, "grad_norm": 1.4894483538011982, "learning_rate": 1.6189442076941285e-07, "loss": 0.29889774322509766, "step": 7654 }, { "epoch": 1.8915245861131702, "grad_norm": 1.2697433458808762, "learning_rate": 1.6116301444861316e-07, "loss": 0.19671396911144257, "step": 7655 }, { "epoch": 1.8917716827279465, "grad_norm": 1.3017372462914878, "learning_rate": 1.6043325062177872e-07, "loss": 0.23567475378513336, "step": 7656 }, { "epoch": 1.892018779342723, "grad_norm": 1.6064507352420734, "learning_rate": 1.5970512941073658e-07, "loss": 0.2798911929130554, "step": 7657 }, { "epoch": 1.8922658759574995, "grad_norm": 1.2291823662484236, "learning_rate": 1.5897865093703834e-07, "loss": 0.2111741006374359, "step": 7658 }, { "epoch": 1.8925129725722758, "grad_norm": 1.5134635434858599, "learning_rate": 1.582538153219637e-07, "loss": 0.26684117317199707, "step": 7659 }, { "epoch": 1.892760069187052, "grad_norm": 1.4170501232622637, "learning_rate": 1.5753062268651477e-07, "loss": 0.2202518731355667, "step": 7660 }, { "epoch": 1.8930071658018286, "grad_norm": 1.6286532675876546, "learning_rate": 1.5680907315142158e-07, "loss": 0.22600692510604858, "step": 7661 }, { "epoch": 1.8932542624166049, "grad_norm": 1.2622862620854467, "learning_rate": 1.5608916683714003e-07, "loss": 0.2500426769256592, "step": 7662 }, { "epoch": 1.8935013590313812, "grad_norm": 1.5951103734518044, "learning_rate": 1.553709038638529e-07, "loss": 0.3063618540763855, "step": 7663 }, { "epoch": 1.8937484556461577, "grad_norm": 1.4137159655227767, "learning_rate": 1.5465428435146311e-07, "loss": 0.24457594752311707, "step": 7664 }, { "epoch": 1.8939955522609342, "grad_norm": 1.5268287836195242, "learning_rate": 1.5393930841960614e-07, "loss": 0.23455733060836792, "step": 7665 }, { "epoch": 1.8942426488757103, "grad_norm": 1.5162629508791725, "learning_rate": 1.5322597618763756e-07, "loss": 0.24433188140392303, "step": 7666 }, { "epoch": 1.8944897454904868, "grad_norm": 1.4162859500698135, "learning_rate": 1.5251428777464328e-07, "loss": 0.26576757431030273, "step": 7667 }, { "epoch": 1.8947368421052633, "grad_norm": 1.4694757801010327, "learning_rate": 1.5180424329943044e-07, "loss": 0.23924419283866882, "step": 7668 }, { "epoch": 1.8949839387200396, "grad_norm": 1.355038707381716, "learning_rate": 1.5109584288053535e-07, "loss": 0.26823824644088745, "step": 7669 }, { "epoch": 1.8952310353348159, "grad_norm": 1.5534609372962143, "learning_rate": 1.5038908663621789e-07, "loss": 0.26500600576400757, "step": 7670 }, { "epoch": 1.8954781319495924, "grad_norm": 2.1328694402367385, "learning_rate": 1.496839746844636e-07, "loss": 0.2793307602405548, "step": 7671 }, { "epoch": 1.8957252285643686, "grad_norm": 1.5347620458488063, "learning_rate": 1.4898050714298285e-07, "loss": 0.26065772771835327, "step": 7672 }, { "epoch": 1.895972325179145, "grad_norm": 1.723367605148988, "learning_rate": 1.4827868412921386e-07, "loss": 0.2815890908241272, "step": 7673 }, { "epoch": 1.8962194217939214, "grad_norm": 1.4508245114319804, "learning_rate": 1.475785057603174e-07, "loss": 0.2824169993400574, "step": 7674 }, { "epoch": 1.896466518408698, "grad_norm": 1.6168467738263481, "learning_rate": 1.4687997215318327e-07, "loss": 0.30082452297210693, "step": 7675 }, { "epoch": 1.896713615023474, "grad_norm": 1.3530254481525406, "learning_rate": 1.4618308342442266e-07, "loss": 0.2570479214191437, "step": 7676 }, { "epoch": 1.8969607116382505, "grad_norm": 1.4893296354732222, "learning_rate": 1.4548783969037472e-07, "loss": 0.2676984667778015, "step": 7677 }, { "epoch": 1.897207808253027, "grad_norm": 1.3270057068622592, "learning_rate": 1.4479424106710438e-07, "loss": 0.23858293890953064, "step": 7678 }, { "epoch": 1.8974549048678033, "grad_norm": 1.4444272051908227, "learning_rate": 1.4410228767039792e-07, "loss": 0.27756792306900024, "step": 7679 }, { "epoch": 1.8977020014825796, "grad_norm": 1.704316710023323, "learning_rate": 1.434119796157729e-07, "loss": 0.3143061399459839, "step": 7680 }, { "epoch": 1.8979490980973561, "grad_norm": 1.290428529478605, "learning_rate": 1.4272331701846832e-07, "loss": 0.21527254581451416, "step": 7681 }, { "epoch": 1.8981961947121324, "grad_norm": 1.4282044866804482, "learning_rate": 1.420362999934499e-07, "loss": 0.2911001741886139, "step": 7682 }, { "epoch": 1.8984432913269087, "grad_norm": 1.4387726485702457, "learning_rate": 1.41350928655406e-07, "loss": 0.2048448920249939, "step": 7683 }, { "epoch": 1.8986903879416852, "grad_norm": 1.7519646997958116, "learning_rate": 1.4066720311875505e-07, "loss": 0.28883934020996094, "step": 7684 }, { "epoch": 1.8989374845564617, "grad_norm": 1.65128264690683, "learning_rate": 1.3998512349763682e-07, "loss": 0.28184974193573, "step": 7685 }, { "epoch": 1.899184581171238, "grad_norm": 1.5949068312753847, "learning_rate": 1.3930468990591696e-07, "loss": 0.3341290056705475, "step": 7686 }, { "epoch": 1.8994316777860143, "grad_norm": 1.7164944632025159, "learning_rate": 1.38625902457189e-07, "loss": 0.2670179605484009, "step": 7687 }, { "epoch": 1.8996787744007908, "grad_norm": 1.334135084582248, "learning_rate": 1.3794876126476785e-07, "loss": 0.2461448311805725, "step": 7688 }, { "epoch": 1.899925871015567, "grad_norm": 1.469458964662296, "learning_rate": 1.3727326644169536e-07, "loss": 0.21560928225517273, "step": 7689 }, { "epoch": 1.9001729676303434, "grad_norm": 1.3624482385225172, "learning_rate": 1.3659941810073906e-07, "loss": 0.25830841064453125, "step": 7690 }, { "epoch": 1.9004200642451199, "grad_norm": 1.448440476643941, "learning_rate": 1.3592721635439232e-07, "loss": 0.2328825294971466, "step": 7691 }, { "epoch": 1.9006671608598962, "grad_norm": 1.9026744618758449, "learning_rate": 1.352566613148709e-07, "loss": 0.26328667998313904, "step": 7692 }, { "epoch": 1.9009142574746725, "grad_norm": 1.4964254850112995, "learning_rate": 1.3458775309411642e-07, "loss": 0.26456156373023987, "step": 7693 }, { "epoch": 1.901161354089449, "grad_norm": 1.436666784209382, "learning_rate": 1.339204918037984e-07, "loss": 0.22698408365249634, "step": 7694 }, { "epoch": 1.9014084507042255, "grad_norm": 1.551078863227508, "learning_rate": 1.3325487755530664e-07, "loss": 0.26984280347824097, "step": 7695 }, { "epoch": 1.9016555473190018, "grad_norm": 1.5101390353384228, "learning_rate": 1.3259091045976225e-07, "loss": 0.271884024143219, "step": 7696 }, { "epoch": 1.901902643933778, "grad_norm": 1.494283520551065, "learning_rate": 1.3192859062800435e-07, "loss": 0.26775413751602173, "step": 7697 }, { "epoch": 1.9021497405485546, "grad_norm": 1.3795216686702876, "learning_rate": 1.3126791817060226e-07, "loss": 0.23127196729183197, "step": 7698 }, { "epoch": 1.9023968371633309, "grad_norm": 1.310150102939296, "learning_rate": 1.3060889319784885e-07, "loss": 0.2541719973087311, "step": 7699 }, { "epoch": 1.9026439337781071, "grad_norm": 1.4630124074369777, "learning_rate": 1.2995151581976062e-07, "loss": 0.24506641924381256, "step": 7700 }, { "epoch": 1.9028910303928837, "grad_norm": 1.4604353547075493, "learning_rate": 1.2929578614608196e-07, "loss": 0.25785043835639954, "step": 7701 }, { "epoch": 1.90313812700766, "grad_norm": 1.918257101346139, "learning_rate": 1.2864170428627642e-07, "loss": 0.2934967279434204, "step": 7702 }, { "epoch": 1.9033852236224362, "grad_norm": 1.697946359892805, "learning_rate": 1.2798927034954e-07, "loss": 0.30473843216896057, "step": 7703 }, { "epoch": 1.9036323202372127, "grad_norm": 1.655231602162212, "learning_rate": 1.2733848444478892e-07, "loss": 0.3299630284309387, "step": 7704 }, { "epoch": 1.9038794168519892, "grad_norm": 1.566665302994192, "learning_rate": 1.2668934668066513e-07, "loss": 0.2537187933921814, "step": 7705 }, { "epoch": 1.9041265134667655, "grad_norm": 1.3658811943248785, "learning_rate": 1.260418571655353e-07, "loss": 0.25339165329933167, "step": 7706 }, { "epoch": 1.9043736100815418, "grad_norm": 1.4361105916816705, "learning_rate": 1.2539601600749073e-07, "loss": 0.2596866488456726, "step": 7707 }, { "epoch": 1.9046207066963183, "grad_norm": 1.4691783138874666, "learning_rate": 1.2475182331434854e-07, "loss": 0.31841233372688293, "step": 7708 }, { "epoch": 1.9048678033110946, "grad_norm": 1.4005034349911885, "learning_rate": 1.2410927919365155e-07, "loss": 0.30916574597358704, "step": 7709 }, { "epoch": 1.905114899925871, "grad_norm": 1.523270782776925, "learning_rate": 1.2346838375266402e-07, "loss": 0.2809891104698181, "step": 7710 }, { "epoch": 1.9053619965406474, "grad_norm": 1.4739907860211416, "learning_rate": 1.2282913709837807e-07, "loss": 0.2603498697280884, "step": 7711 }, { "epoch": 1.905609093155424, "grad_norm": 1.5425745333284093, "learning_rate": 1.221915393375095e-07, "loss": 0.2889847755432129, "step": 7712 }, { "epoch": 1.9058561897702, "grad_norm": 1.3392223574844233, "learning_rate": 1.2155559057649756e-07, "loss": 0.23351556062698364, "step": 7713 }, { "epoch": 1.9061032863849765, "grad_norm": 1.390186599182692, "learning_rate": 1.209212909215074e-07, "loss": 0.24853403866291046, "step": 7714 }, { "epoch": 1.906350382999753, "grad_norm": 1.5888202725926626, "learning_rate": 1.2028864047843202e-07, "loss": 0.3164714574813843, "step": 7715 }, { "epoch": 1.9065974796145293, "grad_norm": 1.5525373405211866, "learning_rate": 1.1965763935288254e-07, "loss": 0.24787533283233643, "step": 7716 }, { "epoch": 1.9068445762293056, "grad_norm": 1.3821088121947442, "learning_rate": 1.1902828765019913e-07, "loss": 0.24618417024612427, "step": 7717 }, { "epoch": 1.907091672844082, "grad_norm": 1.41969179074221, "learning_rate": 1.1840058547544553e-07, "loss": 0.21116000413894653, "step": 7718 }, { "epoch": 1.9073387694588584, "grad_norm": 1.3587123654404483, "learning_rate": 1.1777453293341124e-07, "loss": 0.29300999641418457, "step": 7719 }, { "epoch": 1.9075858660736347, "grad_norm": 1.4206760895066861, "learning_rate": 1.1715013012860932e-07, "loss": 0.22250211238861084, "step": 7720 }, { "epoch": 1.9078329626884112, "grad_norm": 1.487119501653057, "learning_rate": 1.165273771652764e-07, "loss": 0.28351694345474243, "step": 7721 }, { "epoch": 1.9080800593031877, "grad_norm": 1.4367626264309337, "learning_rate": 1.1590627414737487e-07, "loss": 0.3200058341026306, "step": 7722 }, { "epoch": 1.9083271559179638, "grad_norm": 1.3686030694277909, "learning_rate": 1.152868211785918e-07, "loss": 0.259122371673584, "step": 7723 }, { "epoch": 1.9085742525327403, "grad_norm": 1.4523809213787837, "learning_rate": 1.1466901836234001e-07, "loss": 0.2886766493320465, "step": 7724 }, { "epoch": 1.9088213491475168, "grad_norm": 1.5071249028737532, "learning_rate": 1.1405286580175367e-07, "loss": 0.21896377205848694, "step": 7725 }, { "epoch": 1.909068445762293, "grad_norm": 1.5471770076417506, "learning_rate": 1.1343836359969384e-07, "loss": 0.252688467502594, "step": 7726 }, { "epoch": 1.9093155423770694, "grad_norm": 1.4337307917775675, "learning_rate": 1.1282551185874513e-07, "loss": 0.27547067403793335, "step": 7727 }, { "epoch": 1.9095626389918459, "grad_norm": 1.5420013991827923, "learning_rate": 1.1221431068121569e-07, "loss": 0.3126521110534668, "step": 7728 }, { "epoch": 1.9098097356066221, "grad_norm": 1.415642127477257, "learning_rate": 1.116047601691428e-07, "loss": 0.256824791431427, "step": 7729 }, { "epoch": 1.9100568322213984, "grad_norm": 1.53941321703307, "learning_rate": 1.1099686042428171e-07, "loss": 0.2823127210140228, "step": 7730 }, { "epoch": 1.910303928836175, "grad_norm": 1.2970589232497378, "learning_rate": 1.103906115481157e-07, "loss": 0.24313317239284515, "step": 7731 }, { "epoch": 1.9105510254509515, "grad_norm": 1.3623411880110567, "learning_rate": 1.0978601364185271e-07, "loss": 0.2892705202102661, "step": 7732 }, { "epoch": 1.9107981220657277, "grad_norm": 1.3779875036262852, "learning_rate": 1.0918306680642532e-07, "loss": 0.23576711118221283, "step": 7733 }, { "epoch": 1.911045218680504, "grad_norm": 1.4452368625749719, "learning_rate": 1.0858177114248525e-07, "loss": 0.2757891118526459, "step": 7734 }, { "epoch": 1.9112923152952805, "grad_norm": 1.3132526192399852, "learning_rate": 1.0798212675041664e-07, "loss": 0.2512792944908142, "step": 7735 }, { "epoch": 1.9115394119100568, "grad_norm": 1.5499906262016168, "learning_rate": 1.0738413373032164e-07, "loss": 0.28341764211654663, "step": 7736 }, { "epoch": 1.9117865085248331, "grad_norm": 1.3307209813416152, "learning_rate": 1.067877921820315e-07, "loss": 0.23103034496307373, "step": 7737 }, { "epoch": 1.9120336051396096, "grad_norm": 1.4926881205576585, "learning_rate": 1.061931022050966e-07, "loss": 0.2874508202075958, "step": 7738 }, { "epoch": 1.912280701754386, "grad_norm": 1.3739133011511409, "learning_rate": 1.056000638987964e-07, "loss": 0.2519743740558624, "step": 7739 }, { "epoch": 1.9125277983691622, "grad_norm": 1.5368465632223145, "learning_rate": 1.0500867736213283e-07, "loss": 0.3061490058898926, "step": 7740 }, { "epoch": 1.9127748949839387, "grad_norm": 1.4594169450286134, "learning_rate": 1.0441894269383024e-07, "loss": 0.27577659487724304, "step": 7741 }, { "epoch": 1.9130219915987152, "grad_norm": 1.3805747085492244, "learning_rate": 1.0383085999233988e-07, "loss": 0.23076091706752777, "step": 7742 }, { "epoch": 1.9132690882134915, "grad_norm": 1.3593998173535593, "learning_rate": 1.0324442935583545e-07, "loss": 0.24031765758991241, "step": 7743 }, { "epoch": 1.9135161848282678, "grad_norm": 1.476277186444128, "learning_rate": 1.0265965088221641e-07, "loss": 0.27189213037490845, "step": 7744 }, { "epoch": 1.9137632814430443, "grad_norm": 1.5241568886846775, "learning_rate": 1.0207652466910578e-07, "loss": 0.27810391783714294, "step": 7745 }, { "epoch": 1.9140103780578206, "grad_norm": 1.3710694591282429, "learning_rate": 1.0149505081385013e-07, "loss": 0.30604439973831177, "step": 7746 }, { "epoch": 1.9142574746725969, "grad_norm": 1.5528937782932146, "learning_rate": 1.0091522941352071e-07, "loss": 0.2554594874382019, "step": 7747 }, { "epoch": 1.9145045712873734, "grad_norm": 1.626916773191831, "learning_rate": 1.0033706056491233e-07, "loss": 0.31094247102737427, "step": 7748 }, { "epoch": 1.91475166790215, "grad_norm": 1.4039754909042315, "learning_rate": 9.976054436454441e-08, "loss": 0.23021727800369263, "step": 7749 }, { "epoch": 1.914998764516926, "grad_norm": 1.7502653145166727, "learning_rate": 9.91856809086611e-08, "loss": 0.2991485893726349, "step": 7750 }, { "epoch": 1.9152458611317025, "grad_norm": 1.462131854357723, "learning_rate": 9.861247029323007e-08, "loss": 0.221137136220932, "step": 7751 }, { "epoch": 1.915492957746479, "grad_norm": 1.6181288610083213, "learning_rate": 9.804091261394255e-08, "loss": 0.3208332657814026, "step": 7752 }, { "epoch": 1.9157400543612553, "grad_norm": 1.5116796397776608, "learning_rate": 9.747100796621445e-08, "loss": 0.3046235144138336, "step": 7753 }, { "epoch": 1.9159871509760316, "grad_norm": 1.6632367650731321, "learning_rate": 9.690275644518521e-08, "loss": 0.2635752558708191, "step": 7754 }, { "epoch": 1.916234247590808, "grad_norm": 1.6417017085521588, "learning_rate": 9.633615814572005e-08, "loss": 0.25113505125045776, "step": 7755 }, { "epoch": 1.9164813442055844, "grad_norm": 1.4252403234863311, "learning_rate": 9.577121316240446e-08, "loss": 0.25529932975769043, "step": 7756 }, { "epoch": 1.9167284408203606, "grad_norm": 1.3639632019302625, "learning_rate": 9.520792158955294e-08, "loss": 0.24919679760932922, "step": 7757 }, { "epoch": 1.9169755374351372, "grad_norm": 1.4865555585776302, "learning_rate": 9.464628352119809e-08, "loss": 0.2937927842140198, "step": 7758 }, { "epoch": 1.9172226340499137, "grad_norm": 1.592976985698777, "learning_rate": 9.408629905110267e-08, "loss": 0.3363581597805023, "step": 7759 }, { "epoch": 1.9174697306646897, "grad_norm": 1.4062921823093302, "learning_rate": 9.352796827274968e-08, "loss": 0.2576819360256195, "step": 7760 }, { "epoch": 1.9177168272794662, "grad_norm": 1.5682293170065367, "learning_rate": 9.297129127934567e-08, "loss": 0.27789443731307983, "step": 7761 }, { "epoch": 1.9179639238942428, "grad_norm": 1.4254838437912725, "learning_rate": 9.241626816382299e-08, "loss": 0.25183984637260437, "step": 7762 }, { "epoch": 1.918211020509019, "grad_norm": 1.6088317145722417, "learning_rate": 9.18628990188375e-08, "loss": 0.27802661061286926, "step": 7763 }, { "epoch": 1.9184581171237953, "grad_norm": 1.4334069613438887, "learning_rate": 9.131118393676752e-08, "loss": 0.2804318964481354, "step": 7764 }, { "epoch": 1.9187052137385718, "grad_norm": 1.3445035762876603, "learning_rate": 9.076112300971717e-08, "loss": 0.25562331080436707, "step": 7765 }, { "epoch": 1.9189523103533481, "grad_norm": 1.4072685717596811, "learning_rate": 9.0212716329513e-08, "loss": 0.2983361482620239, "step": 7766 }, { "epoch": 1.9191994069681244, "grad_norm": 1.2960239080666178, "learning_rate": 8.966596398770622e-08, "loss": 0.25989118218421936, "step": 7767 }, { "epoch": 1.919446503582901, "grad_norm": 1.4007358624980515, "learning_rate": 8.912086607557158e-08, "loss": 0.23955652117729187, "step": 7768 }, { "epoch": 1.9196936001976774, "grad_norm": 1.3668938969416637, "learning_rate": 8.857742268410741e-08, "loss": 0.22929230332374573, "step": 7769 }, { "epoch": 1.9199406968124537, "grad_norm": 1.8420164259327314, "learning_rate": 8.803563390403669e-08, "loss": 0.31644803285598755, "step": 7770 }, { "epoch": 1.92018779342723, "grad_norm": 1.4262115323917788, "learning_rate": 8.749549982580485e-08, "loss": 0.2440105825662613, "step": 7771 }, { "epoch": 1.9204348900420065, "grad_norm": 1.4106345320415898, "learning_rate": 8.695702053958199e-08, "loss": 0.2746068835258484, "step": 7772 }, { "epoch": 1.9206819866567828, "grad_norm": 1.9532834430177803, "learning_rate": 8.642019613526176e-08, "loss": 0.25225287675857544, "step": 7773 }, { "epoch": 1.920929083271559, "grad_norm": 1.4878307368627133, "learning_rate": 8.588502670246246e-08, "loss": 0.2628903090953827, "step": 7774 }, { "epoch": 1.9211761798863356, "grad_norm": 1.2527914727315568, "learning_rate": 8.535151233052374e-08, "loss": 0.25426509976387024, "step": 7775 }, { "epoch": 1.9214232765011119, "grad_norm": 1.353469152144085, "learning_rate": 8.481965310851103e-08, "loss": 0.23332767188549042, "step": 7776 }, { "epoch": 1.9216703731158882, "grad_norm": 1.4706811613030664, "learning_rate": 8.428944912521219e-08, "loss": 0.270473450422287, "step": 7777 }, { "epoch": 1.9219174697306647, "grad_norm": 1.5053427178982917, "learning_rate": 8.376090046913976e-08, "loss": 0.32594621181488037, "step": 7778 }, { "epoch": 1.9221645663454412, "grad_norm": 1.3675561391757827, "learning_rate": 8.323400722853092e-08, "loss": 0.26674655079841614, "step": 7779 }, { "epoch": 1.9224116629602175, "grad_norm": 1.3516349898197293, "learning_rate": 8.270876949134421e-08, "loss": 0.2343416064977646, "step": 7780 }, { "epoch": 1.9226587595749938, "grad_norm": 1.4189161700827655, "learning_rate": 8.21851873452617e-08, "loss": 0.2720703184604645, "step": 7781 }, { "epoch": 1.9229058561897703, "grad_norm": 1.661050847744562, "learning_rate": 8.166326087769128e-08, "loss": 0.2903425693511963, "step": 7782 }, { "epoch": 1.9231529528045466, "grad_norm": 1.3871510940287268, "learning_rate": 8.114299017576322e-08, "loss": 0.30629628896713257, "step": 7783 }, { "epoch": 1.9234000494193229, "grad_norm": 1.4678539585542512, "learning_rate": 8.062437532633139e-08, "loss": 0.2648102045059204, "step": 7784 }, { "epoch": 1.9236471460340994, "grad_norm": 1.4253172976066328, "learning_rate": 8.01074164159732e-08, "loss": 0.2820221185684204, "step": 7785 }, { "epoch": 1.9238942426488759, "grad_norm": 1.347398420631201, "learning_rate": 7.95921135309885e-08, "loss": 0.24675020575523376, "step": 7786 }, { "epoch": 1.924141339263652, "grad_norm": 1.3959983647184426, "learning_rate": 7.907846675740294e-08, "loss": 0.28079456090927124, "step": 7787 }, { "epoch": 1.9243884358784284, "grad_norm": 1.3744225901971607, "learning_rate": 7.856647618096569e-08, "loss": 0.2491040974855423, "step": 7788 }, { "epoch": 1.924635532493205, "grad_norm": 1.412102909847545, "learning_rate": 7.805614188714839e-08, "loss": 0.24432820081710815, "step": 7789 }, { "epoch": 1.9248826291079812, "grad_norm": 1.2827888047666214, "learning_rate": 7.754746396114288e-08, "loss": 0.24350860714912415, "step": 7790 }, { "epoch": 1.9251297257227575, "grad_norm": 1.4224271742436096, "learning_rate": 7.704044248787124e-08, "loss": 0.2755407392978668, "step": 7791 }, { "epoch": 1.925376822337534, "grad_norm": 1.652653132753339, "learning_rate": 7.653507755197354e-08, "loss": 0.3104887902736664, "step": 7792 }, { "epoch": 1.9256239189523103, "grad_norm": 1.4467345495359543, "learning_rate": 7.603136923781673e-08, "loss": 0.2377701699733734, "step": 7793 }, { "epoch": 1.9258710155670866, "grad_norm": 1.3814594452909525, "learning_rate": 7.552931762948912e-08, "loss": 0.24816060066223145, "step": 7794 }, { "epoch": 1.9261181121818631, "grad_norm": 1.4667897254166695, "learning_rate": 7.502892281080365e-08, "loss": 0.27101773023605347, "step": 7795 }, { "epoch": 1.9263652087966396, "grad_norm": 1.529639794136781, "learning_rate": 7.453018486529462e-08, "loss": 0.25877299904823303, "step": 7796 }, { "epoch": 1.9266123054114157, "grad_norm": 1.5505249498889926, "learning_rate": 7.40331038762232e-08, "loss": 0.32220566272735596, "step": 7797 }, { "epoch": 1.9268594020261922, "grad_norm": 1.5688644711071256, "learning_rate": 7.353767992657079e-08, "loss": 0.27811938524246216, "step": 7798 }, { "epoch": 1.9271064986409687, "grad_norm": 1.550575224869526, "learning_rate": 7.304391309904346e-08, "loss": 0.33142364025115967, "step": 7799 }, { "epoch": 1.927353595255745, "grad_norm": 1.4438937750057212, "learning_rate": 7.255180347607193e-08, "loss": 0.2535867393016815, "step": 7800 }, { "epoch": 1.9276006918705213, "grad_norm": 1.322577589079344, "learning_rate": 7.206135113980717e-08, "loss": 0.23115751147270203, "step": 7801 }, { "epoch": 1.9278477884852978, "grad_norm": 2.0100774157590715, "learning_rate": 7.157255617212589e-08, "loss": 0.258023202419281, "step": 7802 }, { "epoch": 1.928094885100074, "grad_norm": 1.479232773630159, "learning_rate": 7.108541865462837e-08, "loss": 0.27874550223350525, "step": 7803 }, { "epoch": 1.9283419817148504, "grad_norm": 1.336278024463435, "learning_rate": 7.059993866863513e-08, "loss": 0.25622034072875977, "step": 7804 }, { "epoch": 1.928589078329627, "grad_norm": 1.3499919184910896, "learning_rate": 7.011611629519355e-08, "loss": 0.24436530470848083, "step": 7805 }, { "epoch": 1.9288361749444034, "grad_norm": 1.4663915179466323, "learning_rate": 6.963395161507236e-08, "loss": 0.26853710412979126, "step": 7806 }, { "epoch": 1.9290832715591797, "grad_norm": 1.600019719799541, "learning_rate": 6.915344470876606e-08, "loss": 0.2737353444099426, "step": 7807 }, { "epoch": 1.929330368173956, "grad_norm": 1.3266583777747212, "learning_rate": 6.867459565648715e-08, "loss": 0.26351940631866455, "step": 7808 }, { "epoch": 1.9295774647887325, "grad_norm": 1.3734211765856952, "learning_rate": 6.819740453817725e-08, "loss": 0.26008710265159607, "step": 7809 }, { "epoch": 1.9298245614035088, "grad_norm": 1.59007714854448, "learning_rate": 6.772187143349706e-08, "loss": 0.3093194365501404, "step": 7810 }, { "epoch": 1.930071658018285, "grad_norm": 1.3523136322074134, "learning_rate": 6.7247996421832e-08, "loss": 0.21088388562202454, "step": 7811 }, { "epoch": 1.9303187546330616, "grad_norm": 1.6354270131704984, "learning_rate": 6.677577958229209e-08, "loss": 0.2970008850097656, "step": 7812 }, { "epoch": 1.9305658512478379, "grad_norm": 1.4141792213439406, "learning_rate": 6.630522099370651e-08, "loss": 0.26661932468414307, "step": 7813 }, { "epoch": 1.9308129478626141, "grad_norm": 1.3802011227265094, "learning_rate": 6.583632073463353e-08, "loss": 0.2579200267791748, "step": 7814 }, { "epoch": 1.9310600444773907, "grad_norm": 1.6290428623985103, "learning_rate": 6.536907888334832e-08, "loss": 0.3139927387237549, "step": 7815 }, { "epoch": 1.9313071410921672, "grad_norm": 1.631030330503979, "learning_rate": 6.490349551785513e-08, "loss": 0.32980480790138245, "step": 7816 }, { "epoch": 1.9315542377069435, "grad_norm": 1.5123050469776864, "learning_rate": 6.443957071587625e-08, "loss": 0.2759385108947754, "step": 7817 }, { "epoch": 1.9318013343217197, "grad_norm": 1.3177467950746553, "learning_rate": 6.397730455485973e-08, "loss": 0.2420787513256073, "step": 7818 }, { "epoch": 1.9320484309364963, "grad_norm": 1.5976736028286933, "learning_rate": 6.351669711197606e-08, "loss": 0.25080668926239014, "step": 7819 }, { "epoch": 1.9322955275512725, "grad_norm": 1.3299340676988696, "learning_rate": 6.305774846411927e-08, "loss": 0.24442285299301147, "step": 7820 }, { "epoch": 1.9325426241660488, "grad_norm": 1.5188637218001388, "learning_rate": 6.260045868790587e-08, "loss": 0.28317105770111084, "step": 7821 }, { "epoch": 1.9327897207808253, "grad_norm": 1.455584975910145, "learning_rate": 6.21448278596759e-08, "loss": 0.2804880142211914, "step": 7822 }, { "epoch": 1.9330368173956016, "grad_norm": 1.6734349555938377, "learning_rate": 6.169085605549297e-08, "loss": 0.30023956298828125, "step": 7823 }, { "epoch": 1.933283914010378, "grad_norm": 1.421071765427321, "learning_rate": 6.123854335114199e-08, "loss": 0.2823246121406555, "step": 7824 }, { "epoch": 1.9335310106251544, "grad_norm": 1.3223694702145135, "learning_rate": 6.078788982213257e-08, "loss": 0.27201324701309204, "step": 7825 }, { "epoch": 1.933778107239931, "grad_norm": 1.5337429481728568, "learning_rate": 6.033889554369565e-08, "loss": 0.2761976718902588, "step": 7826 }, { "epoch": 1.9340252038547072, "grad_norm": 1.2242343645396074, "learning_rate": 5.989156059078793e-08, "loss": 0.2552175521850586, "step": 7827 }, { "epoch": 1.9342723004694835, "grad_norm": 1.7792586113593487, "learning_rate": 5.944588503808635e-08, "loss": 0.2659943997859955, "step": 7828 }, { "epoch": 1.93451939708426, "grad_norm": 1.6094140740697578, "learning_rate": 5.900186895999249e-08, "loss": 0.3214871883392334, "step": 7829 }, { "epoch": 1.9347664936990363, "grad_norm": 1.5217016044783063, "learning_rate": 5.855951243063152e-08, "loss": 0.2966882586479187, "step": 7830 }, { "epoch": 1.9350135903138126, "grad_norm": 1.6705517121452207, "learning_rate": 5.811881552384768e-08, "loss": 0.31019526720046997, "step": 7831 }, { "epoch": 1.935260686928589, "grad_norm": 1.555864212263361, "learning_rate": 5.7679778313212144e-08, "loss": 0.3230799436569214, "step": 7832 }, { "epoch": 1.9355077835433656, "grad_norm": 1.4247282785266044, "learning_rate": 5.724240087201738e-08, "loss": 0.23703116178512573, "step": 7833 }, { "epoch": 1.9357548801581417, "grad_norm": 1.4765910889042486, "learning_rate": 5.6806683273279426e-08, "loss": 0.3245628774166107, "step": 7834 }, { "epoch": 1.9360019767729182, "grad_norm": 1.6981487241069384, "learning_rate": 5.637262558973788e-08, "loss": 0.29794830083847046, "step": 7835 }, { "epoch": 1.9362490733876947, "grad_norm": 1.5693948997171434, "learning_rate": 5.594022789385256e-08, "loss": 0.28641802072525024, "step": 7836 }, { "epoch": 1.936496170002471, "grad_norm": 1.534296913568113, "learning_rate": 5.550949025780905e-08, "loss": 0.24636678397655487, "step": 7837 }, { "epoch": 1.9367432666172473, "grad_norm": 1.482632382067674, "learning_rate": 5.508041275351317e-08, "loss": 0.25926271080970764, "step": 7838 }, { "epoch": 1.9369903632320238, "grad_norm": 1.4223651877325905, "learning_rate": 5.465299545259761e-08, "loss": 0.2797187566757202, "step": 7839 }, { "epoch": 1.9372374598468, "grad_norm": 1.3554570942670345, "learning_rate": 5.4227238426413084e-08, "loss": 0.2563769817352295, "step": 7840 }, { "epoch": 1.9374845564615764, "grad_norm": 1.3673446511515859, "learning_rate": 5.380314174603607e-08, "loss": 0.21719899773597717, "step": 7841 }, { "epoch": 1.9377316530763529, "grad_norm": 1.4709352884822144, "learning_rate": 5.338070548226548e-08, "loss": 0.2791734039783478, "step": 7842 }, { "epoch": 1.9379787496911294, "grad_norm": 1.6872448683638301, "learning_rate": 5.2959929705622696e-08, "loss": 0.2786208689212799, "step": 7843 }, { "epoch": 1.9382258463059057, "grad_norm": 1.504803899076616, "learning_rate": 5.254081448635151e-08, "loss": 0.27790164947509766, "step": 7844 }, { "epoch": 1.938472942920682, "grad_norm": 1.413107520238673, "learning_rate": 5.21233598944193e-08, "loss": 0.29043692350387573, "step": 7845 }, { "epoch": 1.9387200395354585, "grad_norm": 1.615939046218213, "learning_rate": 5.170756599951588e-08, "loss": 0.25807279348373413, "step": 7846 }, { "epoch": 1.9389671361502347, "grad_norm": 1.3221784991799288, "learning_rate": 5.1293432871053485e-08, "loss": 0.23634067177772522, "step": 7847 }, { "epoch": 1.939214232765011, "grad_norm": 1.3688515045977592, "learning_rate": 5.088096057816905e-08, "loss": 0.2846907377243042, "step": 7848 }, { "epoch": 1.9394613293797875, "grad_norm": 1.4063572178373045, "learning_rate": 5.0470149189717486e-08, "loss": 0.26835280656814575, "step": 7849 }, { "epoch": 1.9397084259945638, "grad_norm": 1.3982235115856103, "learning_rate": 5.006099877428283e-08, "loss": 0.2539755403995514, "step": 7850 }, { "epoch": 1.9399555226093401, "grad_norm": 1.6159529789622433, "learning_rate": 4.965350940016711e-08, "loss": 0.2773399353027344, "step": 7851 }, { "epoch": 1.9402026192241166, "grad_norm": 1.4424101966974079, "learning_rate": 4.924768113539591e-08, "loss": 0.2973312735557556, "step": 7852 }, { "epoch": 1.9404497158388931, "grad_norm": 1.4838480340478903, "learning_rate": 4.8843514047719475e-08, "loss": 0.2648480236530304, "step": 7853 }, { "epoch": 1.9406968124536694, "grad_norm": 1.5050624004389466, "learning_rate": 4.8441008204608285e-08, "loss": 0.269407719373703, "step": 7854 }, { "epoch": 1.9409439090684457, "grad_norm": 1.482023176211345, "learning_rate": 4.8040163673257475e-08, "loss": 0.2848457098007202, "step": 7855 }, { "epoch": 1.9411910056832222, "grad_norm": 1.5438635337258588, "learning_rate": 4.7640980520582416e-08, "loss": 0.25780341029167175, "step": 7856 }, { "epoch": 1.9414381022979985, "grad_norm": 1.5075647750242527, "learning_rate": 4.724345881322534e-08, "loss": 0.28355342149734497, "step": 7857 }, { "epoch": 1.9416851989127748, "grad_norm": 1.5869338979315297, "learning_rate": 4.684759861754651e-08, "loss": 0.29004257917404175, "step": 7858 }, { "epoch": 1.9419322955275513, "grad_norm": 1.5458903758681777, "learning_rate": 4.6453399999630834e-08, "loss": 0.2661609649658203, "step": 7859 }, { "epoch": 1.9421793921423276, "grad_norm": 1.636758976278049, "learning_rate": 4.606086302528678e-08, "loss": 0.33790868520736694, "step": 7860 }, { "epoch": 1.9424264887571039, "grad_norm": 1.567708095594168, "learning_rate": 4.566998776004306e-08, "loss": 0.30695733428001404, "step": 7861 }, { "epoch": 1.9426735853718804, "grad_norm": 1.4811740880429451, "learning_rate": 4.528077426915412e-08, "loss": 0.2859492301940918, "step": 7862 }, { "epoch": 1.942920681986657, "grad_norm": 1.4751152904274412, "learning_rate": 4.489322261759355e-08, "loss": 0.2855610251426697, "step": 7863 }, { "epoch": 1.9431677786014332, "grad_norm": 1.7591730916625536, "learning_rate": 4.4507332870059594e-08, "loss": 0.2456335425376892, "step": 7864 }, { "epoch": 1.9434148752162095, "grad_norm": 1.4384862222962758, "learning_rate": 4.412310509097295e-08, "loss": 0.2613995373249054, "step": 7865 }, { "epoch": 1.943661971830986, "grad_norm": 1.6721643100387031, "learning_rate": 4.374053934447675e-08, "loss": 0.2699396014213562, "step": 7866 }, { "epoch": 1.9439090684457623, "grad_norm": 1.5317116509508049, "learning_rate": 4.335963569443547e-08, "loss": 0.2699063718318939, "step": 7867 }, { "epoch": 1.9441561650605386, "grad_norm": 1.5827590188676939, "learning_rate": 4.298039420443822e-08, "loss": 0.35934561491012573, "step": 7868 }, { "epoch": 1.944403261675315, "grad_norm": 1.5278176727436157, "learning_rate": 4.260281493779439e-08, "loss": 0.29757583141326904, "step": 7869 }, { "epoch": 1.9446503582900916, "grad_norm": 1.5557132937962723, "learning_rate": 4.222689795753798e-08, "loss": 0.3125258684158325, "step": 7870 }, { "epoch": 1.9448974549048677, "grad_norm": 1.44849756434825, "learning_rate": 4.185264332642547e-08, "loss": 0.2679850459098816, "step": 7871 }, { "epoch": 1.9451445515196442, "grad_norm": 1.3797787552764373, "learning_rate": 4.148005110693243e-08, "loss": 0.27784186601638794, "step": 7872 }, { "epoch": 1.9453916481344207, "grad_norm": 1.46595642719156, "learning_rate": 4.110912136126022e-08, "loss": 0.2678932249546051, "step": 7873 }, { "epoch": 1.945638744749197, "grad_norm": 1.4559858058536648, "learning_rate": 4.073985415133264e-08, "loss": 0.22867846488952637, "step": 7874 }, { "epoch": 1.9458858413639732, "grad_norm": 1.6679478131822134, "learning_rate": 4.037224953879371e-08, "loss": 0.2770915627479553, "step": 7875 }, { "epoch": 1.9461329379787498, "grad_norm": 1.3679110914558892, "learning_rate": 4.000630758501323e-08, "loss": 0.24637486040592194, "step": 7876 }, { "epoch": 1.946380034593526, "grad_norm": 1.3662129076950933, "learning_rate": 3.964202835108011e-08, "loss": 0.22564464807510376, "step": 7877 }, { "epoch": 1.9466271312083023, "grad_norm": 1.501749778693604, "learning_rate": 3.9279411897807926e-08, "loss": 0.3198566436767578, "step": 7878 }, { "epoch": 1.9468742278230788, "grad_norm": 1.3140747188142623, "learning_rate": 3.891845828573049e-08, "loss": 0.23436716198921204, "step": 7879 }, { "epoch": 1.9471213244378553, "grad_norm": 1.4099366368799202, "learning_rate": 3.855916757510514e-08, "loss": 0.27676254510879517, "step": 7880 }, { "epoch": 1.9473684210526314, "grad_norm": 1.4597165015953648, "learning_rate": 3.8201539825915015e-08, "loss": 0.26475197076797485, "step": 7881 }, { "epoch": 1.947615517667408, "grad_norm": 1.5533922070628252, "learning_rate": 3.784557509785791e-08, "loss": 0.32035553455352783, "step": 7882 }, { "epoch": 1.9478626142821844, "grad_norm": 1.3677403150901435, "learning_rate": 3.749127345036185e-08, "loss": 0.2214643657207489, "step": 7883 }, { "epoch": 1.9481097108969607, "grad_norm": 1.441479958683605, "learning_rate": 3.713863494257286e-08, "loss": 0.29552847146987915, "step": 7884 }, { "epoch": 1.948356807511737, "grad_norm": 1.4666555599237425, "learning_rate": 3.678765963336051e-08, "loss": 0.22339282929897308, "step": 7885 }, { "epoch": 1.9486039041265135, "grad_norm": 1.3573016084363527, "learning_rate": 3.643834758131681e-08, "loss": 0.2670540511608124, "step": 7886 }, { "epoch": 1.9488510007412898, "grad_norm": 1.5654399070534912, "learning_rate": 3.6090698844755135e-08, "loss": 0.3080429434776306, "step": 7887 }, { "epoch": 1.949098097356066, "grad_norm": 1.29492087385377, "learning_rate": 3.574471348171127e-08, "loss": 0.24609088897705078, "step": 7888 }, { "epoch": 1.9493451939708426, "grad_norm": 1.3133036141243988, "learning_rate": 3.540039154994568e-08, "loss": 0.2665814161300659, "step": 7889 }, { "epoch": 1.9495922905856191, "grad_norm": 1.4826142686148485, "learning_rate": 3.5057733106939054e-08, "loss": 0.28203755617141724, "step": 7890 }, { "epoch": 1.9498393872003954, "grad_norm": 1.5669917374867426, "learning_rate": 3.471673820989341e-08, "loss": 0.3396734595298767, "step": 7891 }, { "epoch": 1.9500864838151717, "grad_norm": 1.3418425821745485, "learning_rate": 3.4377406915736545e-08, "loss": 0.24235616624355316, "step": 7892 }, { "epoch": 1.9503335804299482, "grad_norm": 1.5206778569243509, "learning_rate": 3.403973928111426e-08, "loss": 0.28257787227630615, "step": 7893 }, { "epoch": 1.9505806770447245, "grad_norm": 1.4532364854384607, "learning_rate": 3.370373536239813e-08, "loss": 0.28782299160957336, "step": 7894 }, { "epoch": 1.9508277736595008, "grad_norm": 1.3151823968015635, "learning_rate": 3.336939521567994e-08, "loss": 0.20705510675907135, "step": 7895 }, { "epoch": 1.9510748702742773, "grad_norm": 1.5409084536299642, "learning_rate": 3.3036718896773954e-08, "loss": 0.2783995270729065, "step": 7896 }, { "epoch": 1.9513219668890536, "grad_norm": 1.4916831384470945, "learning_rate": 3.270570646121907e-08, "loss": 0.270700067281723, "step": 7897 }, { "epoch": 1.9515690635038299, "grad_norm": 1.4109737623152747, "learning_rate": 3.237635796427108e-08, "loss": 0.2835002839565277, "step": 7898 }, { "epoch": 1.9518161601186064, "grad_norm": 1.5030398357756467, "learning_rate": 3.2048673460914894e-08, "loss": 0.298717737197876, "step": 7899 }, { "epoch": 1.9520632567333829, "grad_norm": 1.509138121334666, "learning_rate": 3.1722653005852314e-08, "loss": 0.26501500606536865, "step": 7900 }, { "epoch": 1.9523103533481592, "grad_norm": 1.542452551103809, "learning_rate": 3.13982966535098e-08, "loss": 0.24959328770637512, "step": 7901 }, { "epoch": 1.9525574499629355, "grad_norm": 1.390546183904877, "learning_rate": 3.107560445803404e-08, "loss": 0.29377517104148865, "step": 7902 }, { "epoch": 1.952804546577712, "grad_norm": 1.598179837195273, "learning_rate": 3.0754576473296385e-08, "loss": 0.24900753796100616, "step": 7903 }, { "epoch": 1.9530516431924883, "grad_norm": 1.4448142490816505, "learning_rate": 3.043521275288952e-08, "loss": 0.27990755438804626, "step": 7904 }, { "epoch": 1.9532987398072645, "grad_norm": 1.4407853648736597, "learning_rate": 3.011751335012747e-08, "loss": 0.3207048773765564, "step": 7905 }, { "epoch": 1.953545836422041, "grad_norm": 1.3916522480213764, "learning_rate": 2.9801478318046694e-08, "loss": 0.25653791427612305, "step": 7906 }, { "epoch": 1.9537929330368176, "grad_norm": 1.4836668383155311, "learning_rate": 2.9487107709406103e-08, "loss": 0.2926613390445709, "step": 7907 }, { "epoch": 1.9540400296515936, "grad_norm": 1.5118877724289572, "learning_rate": 2.917440157668705e-08, "loss": 0.24817460775375366, "step": 7908 }, { "epoch": 1.9542871262663701, "grad_norm": 1.556676506890191, "learning_rate": 2.886335997209222e-08, "loss": 0.3097233772277832, "step": 7909 }, { "epoch": 1.9545342228811466, "grad_norm": 1.5053218487219364, "learning_rate": 2.8553982947546744e-08, "loss": 0.2897550165653229, "step": 7910 }, { "epoch": 1.954781319495923, "grad_norm": 1.7481219261661902, "learning_rate": 2.8246270554698197e-08, "loss": 0.33404403924942017, "step": 7911 }, { "epoch": 1.9550284161106992, "grad_norm": 1.4317812827351286, "learning_rate": 2.7940222844916598e-08, "loss": 0.262379914522171, "step": 7912 }, { "epoch": 1.9552755127254757, "grad_norm": 1.8036251266995875, "learning_rate": 2.7635839869293302e-08, "loss": 0.3036887049674988, "step": 7913 }, { "epoch": 1.955522609340252, "grad_norm": 1.5106810132722317, "learning_rate": 2.7333121678640994e-08, "loss": 0.25551608204841614, "step": 7914 }, { "epoch": 1.9557697059550283, "grad_norm": 1.666865786261454, "learning_rate": 2.703206832349592e-08, "loss": 0.2599042057991028, "step": 7915 }, { "epoch": 1.9560168025698048, "grad_norm": 1.530231802724445, "learning_rate": 2.6732679854116762e-08, "loss": 0.21844980120658875, "step": 7916 }, { "epoch": 1.9562638991845813, "grad_norm": 1.5757637090925147, "learning_rate": 2.643495632048243e-08, "loss": 0.27233800292015076, "step": 7917 }, { "epoch": 1.9565109957993574, "grad_norm": 1.5940968559213207, "learning_rate": 2.613889777229539e-08, "loss": 0.25454506278038025, "step": 7918 }, { "epoch": 1.956758092414134, "grad_norm": 1.4996860013970161, "learning_rate": 2.584450425897833e-08, "loss": 0.2770470976829529, "step": 7919 }, { "epoch": 1.9570051890289104, "grad_norm": 1.5623921196697126, "learning_rate": 2.5551775829678606e-08, "loss": 0.3564360439777374, "step": 7920 }, { "epoch": 1.9572522856436867, "grad_norm": 1.5573155906127658, "learning_rate": 2.5260712533264896e-08, "loss": 0.3243398070335388, "step": 7921 }, { "epoch": 1.957499382258463, "grad_norm": 1.5285159824746162, "learning_rate": 2.497131441832501e-08, "loss": 0.2554134428501129, "step": 7922 }, { "epoch": 1.9577464788732395, "grad_norm": 1.5592861720874565, "learning_rate": 2.468358153317252e-08, "loss": 0.28641557693481445, "step": 7923 }, { "epoch": 1.9579935754880158, "grad_norm": 1.387273329568091, "learning_rate": 2.439751392584122e-08, "loss": 0.22688129544258118, "step": 7924 }, { "epoch": 1.958240672102792, "grad_norm": 1.4371364290914372, "learning_rate": 2.411311164408736e-08, "loss": 0.2841240167617798, "step": 7925 }, { "epoch": 1.9584877687175686, "grad_norm": 1.248638787702613, "learning_rate": 2.3830374735389626e-08, "loss": 0.24638184905052185, "step": 7926 }, { "epoch": 1.958734865332345, "grad_norm": 1.4198046350947116, "learning_rate": 2.3549303246946932e-08, "loss": 0.24552735686302185, "step": 7927 }, { "epoch": 1.9589819619471214, "grad_norm": 1.6876264382824189, "learning_rate": 2.3269897225681737e-08, "loss": 0.33167192339897156, "step": 7928 }, { "epoch": 1.9592290585618977, "grad_norm": 1.458129100363711, "learning_rate": 2.2992156718236735e-08, "loss": 0.2672352194786072, "step": 7929 }, { "epoch": 1.9594761551766742, "grad_norm": 1.4386510887409212, "learning_rate": 2.2716081770980392e-08, "loss": 0.26915597915649414, "step": 7930 }, { "epoch": 1.9597232517914505, "grad_norm": 1.5477229040468052, "learning_rate": 2.2441672429999172e-08, "loss": 0.33818918466567993, "step": 7931 }, { "epoch": 1.9599703484062267, "grad_norm": 1.4738159273844516, "learning_rate": 2.21689287411031e-08, "loss": 0.25748780369758606, "step": 7932 }, { "epoch": 1.9602174450210033, "grad_norm": 1.4026608049969256, "learning_rate": 2.1897850749824644e-08, "loss": 0.2987068295478821, "step": 7933 }, { "epoch": 1.9604645416357795, "grad_norm": 1.6054114111715212, "learning_rate": 2.1628438501416493e-08, "loss": 0.2721863389015198, "step": 7934 }, { "epoch": 1.9607116382505558, "grad_norm": 1.4695586175995505, "learning_rate": 2.1360692040853782e-08, "loss": 0.27753254771232605, "step": 7935 }, { "epoch": 1.9609587348653323, "grad_norm": 1.6975718135336528, "learning_rate": 2.1094611412836308e-08, "loss": 0.2532614469528198, "step": 7936 }, { "epoch": 1.9612058314801089, "grad_norm": 1.5384428604619118, "learning_rate": 2.0830196661780765e-08, "loss": 0.2663947641849518, "step": 7937 }, { "epoch": 1.9614529280948851, "grad_norm": 1.593372451612407, "learning_rate": 2.0567447831830732e-08, "loss": 0.27296990156173706, "step": 7938 }, { "epoch": 1.9617000247096614, "grad_norm": 1.506851291054344, "learning_rate": 2.03063649668489e-08, "loss": 0.2682268023490906, "step": 7939 }, { "epoch": 1.961947121324438, "grad_norm": 1.5222309129545983, "learning_rate": 2.0046948110420407e-08, "loss": 0.28577038645744324, "step": 7940 }, { "epoch": 1.9621942179392142, "grad_norm": 1.617723446055653, "learning_rate": 1.9789197305851715e-08, "loss": 0.30279427766799927, "step": 7941 }, { "epoch": 1.9624413145539905, "grad_norm": 1.4050714017190882, "learning_rate": 1.9533112596171745e-08, "loss": 0.26876944303512573, "step": 7942 }, { "epoch": 1.962688411168767, "grad_norm": 1.5750849350210752, "learning_rate": 1.927869402413185e-08, "loss": 0.27724307775497437, "step": 7943 }, { "epoch": 1.9629355077835435, "grad_norm": 1.5198834550651343, "learning_rate": 1.9025941632203616e-08, "loss": 0.29759126901626587, "step": 7944 }, { "epoch": 1.9631826043983196, "grad_norm": 1.3066187547575274, "learning_rate": 1.8774855462583285e-08, "loss": 0.2645805776119232, "step": 7945 }, { "epoch": 1.963429701013096, "grad_norm": 1.3155442577991796, "learning_rate": 1.852543555718511e-08, "loss": 0.22525417804718018, "step": 7946 }, { "epoch": 1.9636767976278726, "grad_norm": 1.4905861019302804, "learning_rate": 1.8277681957648006e-08, "loss": 0.33339208364486694, "step": 7947 }, { "epoch": 1.963923894242649, "grad_norm": 1.534460536702177, "learning_rate": 1.8031594705332222e-08, "loss": 0.28421714901924133, "step": 7948 }, { "epoch": 1.9641709908574252, "grad_norm": 1.222174798554784, "learning_rate": 1.778717384132045e-08, "loss": 0.20461347699165344, "step": 7949 }, { "epoch": 1.9644180874722017, "grad_norm": 1.4174627698562152, "learning_rate": 1.7544419406414494e-08, "loss": 0.25597479939460754, "step": 7950 }, { "epoch": 1.964665184086978, "grad_norm": 1.3174398409528774, "learning_rate": 1.7303331441139715e-08, "loss": 0.23595456779003143, "step": 7951 }, { "epoch": 1.9649122807017543, "grad_norm": 1.4710998546131755, "learning_rate": 1.706390998574392e-08, "loss": 0.27474790811538696, "step": 7952 }, { "epoch": 1.9651593773165308, "grad_norm": 1.4108203406825837, "learning_rate": 1.682615508019736e-08, "loss": 0.287238746881485, "step": 7953 }, { "epoch": 1.9654064739313073, "grad_norm": 2.0372858877617435, "learning_rate": 1.6590066764188284e-08, "loss": 0.32453709840774536, "step": 7954 }, { "epoch": 1.9656535705460834, "grad_norm": 1.5815507853463928, "learning_rate": 1.6355645077131833e-08, "loss": 0.2727739214897156, "step": 7955 }, { "epoch": 1.9659006671608599, "grad_norm": 1.28832267760107, "learning_rate": 1.6122890058160035e-08, "loss": 0.22829793393611908, "step": 7956 }, { "epoch": 1.9661477637756364, "grad_norm": 1.5837552075067378, "learning_rate": 1.5891801746130697e-08, "loss": 0.254392147064209, "step": 7957 }, { "epoch": 1.9663948603904127, "grad_norm": 1.5451802517291462, "learning_rate": 1.5662380179620738e-08, "loss": 0.3255796432495117, "step": 7958 }, { "epoch": 1.966641957005189, "grad_norm": 1.5457490457283736, "learning_rate": 1.5434625396930636e-08, "loss": 0.28645384311676025, "step": 7959 }, { "epoch": 1.9668890536199655, "grad_norm": 1.730692062965085, "learning_rate": 1.5208537436079973e-08, "loss": 0.27549999952316284, "step": 7960 }, { "epoch": 1.9671361502347418, "grad_norm": 1.453798020007439, "learning_rate": 1.4984116334814112e-08, "loss": 0.24558016657829285, "step": 7961 }, { "epoch": 1.967383246849518, "grad_norm": 1.6358532811006763, "learning_rate": 1.4761362130595313e-08, "loss": 0.2980736196041107, "step": 7962 }, { "epoch": 1.9676303434642946, "grad_norm": 1.3225828665179609, "learning_rate": 1.4540274860612713e-08, "loss": 0.23629876971244812, "step": 7963 }, { "epoch": 1.967877440079071, "grad_norm": 1.6018387008656645, "learning_rate": 1.4320854561773456e-08, "loss": 0.3334949314594269, "step": 7964 }, { "epoch": 1.9681245366938473, "grad_norm": 1.6173790673452266, "learning_rate": 1.410310127070713e-08, "loss": 0.3015991747379303, "step": 7965 }, { "epoch": 1.9683716333086236, "grad_norm": 1.563861094672763, "learning_rate": 1.3887015023765771e-08, "loss": 0.324150025844574, "step": 7966 }, { "epoch": 1.9686187299234001, "grad_norm": 1.4719371423570085, "learning_rate": 1.367259585702274e-08, "loss": 0.24738946557044983, "step": 7967 }, { "epoch": 1.9688658265381764, "grad_norm": 1.780060385310957, "learning_rate": 1.3459843806273853e-08, "loss": 0.2464209496974945, "step": 7968 }, { "epoch": 1.9691129231529527, "grad_norm": 1.649609690753151, "learning_rate": 1.3248758907035142e-08, "loss": 0.22506624460220337, "step": 7969 }, { "epoch": 1.9693600197677292, "grad_norm": 1.387933186744131, "learning_rate": 1.3039341194543976e-08, "loss": 0.3074156641960144, "step": 7970 }, { "epoch": 1.9696071163825055, "grad_norm": 1.35228798667804, "learning_rate": 1.2831590703763497e-08, "loss": 0.2703031599521637, "step": 7971 }, { "epoch": 1.9698542129972818, "grad_norm": 1.4530790535525644, "learning_rate": 1.2625507469372634e-08, "loss": 0.2675512433052063, "step": 7972 }, { "epoch": 1.9701013096120583, "grad_norm": 1.3815763764134004, "learning_rate": 1.2421091525776086e-08, "loss": 0.2595589756965637, "step": 7973 }, { "epoch": 1.9703484062268348, "grad_norm": 1.3684928512490953, "learning_rate": 1.2218342907099889e-08, "loss": 0.24315416812896729, "step": 7974 }, { "epoch": 1.9705955028416111, "grad_norm": 1.6191609209775093, "learning_rate": 1.2017261647189193e-08, "loss": 0.29712533950805664, "step": 7975 }, { "epoch": 1.9708425994563874, "grad_norm": 1.4715166529371144, "learning_rate": 1.1817847779613811e-08, "loss": 0.2709028422832489, "step": 7976 }, { "epoch": 1.971089696071164, "grad_norm": 1.441393648364009, "learning_rate": 1.162010133766267e-08, "loss": 0.28016117215156555, "step": 7977 }, { "epoch": 1.9713367926859402, "grad_norm": 1.426035714236178, "learning_rate": 1.1424022354348252e-08, "loss": 0.3024396002292633, "step": 7978 }, { "epoch": 1.9715838893007165, "grad_norm": 1.3195541637295107, "learning_rate": 1.1229610862404372e-08, "loss": 0.23781289160251617, "step": 7979 }, { "epoch": 1.971830985915493, "grad_norm": 1.5499973009484895, "learning_rate": 1.1036866894285069e-08, "loss": 0.29552721977233887, "step": 7980 }, { "epoch": 1.9720780825302693, "grad_norm": 1.4337674311806359, "learning_rate": 1.0845790482167939e-08, "loss": 0.2668747901916504, "step": 7981 }, { "epoch": 1.9723251791450456, "grad_norm": 1.4536479747048967, "learning_rate": 1.0656381657950798e-08, "loss": 0.28678011894226074, "step": 7982 }, { "epoch": 1.972572275759822, "grad_norm": 1.5834609460454205, "learning_rate": 1.0468640453253908e-08, "loss": 0.27014654874801636, "step": 7983 }, { "epoch": 1.9728193723745986, "grad_norm": 1.4284078894435976, "learning_rate": 1.0282566899417757e-08, "loss": 0.24147099256515503, "step": 7984 }, { "epoch": 1.9730664689893749, "grad_norm": 1.4208548075472542, "learning_rate": 1.0098161027505271e-08, "loss": 0.24963702261447906, "step": 7985 }, { "epoch": 1.9733135656041512, "grad_norm": 1.6488605146557955, "learning_rate": 9.915422868302937e-09, "loss": 0.32210037112236023, "step": 7986 }, { "epoch": 1.9735606622189277, "grad_norm": 1.50459494981197, "learning_rate": 9.734352452316354e-09, "loss": 0.3003718852996826, "step": 7987 }, { "epoch": 1.973807758833704, "grad_norm": 1.402271787262003, "learning_rate": 9.554949809772451e-09, "loss": 0.2317749261856079, "step": 7988 }, { "epoch": 1.9740548554484803, "grad_norm": 1.473066747148905, "learning_rate": 9.377214970620607e-09, "loss": 0.2673388123512268, "step": 7989 }, { "epoch": 1.9743019520632568, "grad_norm": 1.5053096470965517, "learning_rate": 9.201147964533753e-09, "loss": 0.2570013701915741, "step": 7990 }, { "epoch": 1.9745490486780333, "grad_norm": 1.5615285145866526, "learning_rate": 9.026748820902819e-09, "loss": 0.27092882990837097, "step": 7991 }, { "epoch": 1.9747961452928093, "grad_norm": 1.6959856183358804, "learning_rate": 8.854017568842298e-09, "loss": 0.2905053198337555, "step": 7992 }, { "epoch": 1.9750432419075858, "grad_norm": 1.4635766692570766, "learning_rate": 8.682954237188013e-09, "loss": 0.2950124740600586, "step": 7993 }, { "epoch": 1.9752903385223624, "grad_norm": 1.4720180666670946, "learning_rate": 8.513558854497116e-09, "loss": 0.25874966382980347, "step": 7994 }, { "epoch": 1.9755374351371386, "grad_norm": 1.361604148647233, "learning_rate": 8.345831449049213e-09, "loss": 0.25113505125045776, "step": 7995 }, { "epoch": 1.975784531751915, "grad_norm": 2.270898789056229, "learning_rate": 8.179772048843016e-09, "loss": 0.28341948986053467, "step": 7996 }, { "epoch": 1.9760316283666914, "grad_norm": 1.450299461811985, "learning_rate": 8.015380681603013e-09, "loss": 0.24227899312973022, "step": 7997 }, { "epoch": 1.9762787249814677, "grad_norm": 1.4856513935757758, "learning_rate": 7.852657374769478e-09, "loss": 0.2545000910758972, "step": 7998 }, { "epoch": 1.976525821596244, "grad_norm": 1.4039904563375794, "learning_rate": 7.69160215551068e-09, "loss": 0.26074135303497314, "step": 7999 }, { "epoch": 1.9767729182110205, "grad_norm": 1.4922195838220844, "learning_rate": 7.532215050710668e-09, "loss": 0.26654526591300964, "step": 8000 }, { "epoch": 1.977020014825797, "grad_norm": 1.3918181990801255, "learning_rate": 7.374496086978156e-09, "loss": 0.22909963130950928, "step": 8001 }, { "epoch": 1.977267111440573, "grad_norm": 1.3724870959835824, "learning_rate": 7.2184452906443046e-09, "loss": 0.25392961502075195, "step": 8002 }, { "epoch": 1.9775142080553496, "grad_norm": 1.400006654921131, "learning_rate": 7.0640626877571675e-09, "loss": 0.22159984707832336, "step": 8003 }, { "epoch": 1.9777613046701261, "grad_norm": 1.4035472104561912, "learning_rate": 6.911348304092791e-09, "loss": 0.2775552272796631, "step": 8004 }, { "epoch": 1.9780084012849024, "grad_norm": 1.4061857457599671, "learning_rate": 6.760302165141896e-09, "loss": 0.27451491355895996, "step": 8005 }, { "epoch": 1.9782554978996787, "grad_norm": 1.580146540289606, "learning_rate": 6.6109242961232e-09, "loss": 0.30019664764404297, "step": 8006 }, { "epoch": 1.9785025945144552, "grad_norm": 1.4321563217051683, "learning_rate": 6.4632147219712e-09, "loss": 0.25570327043533325, "step": 8007 }, { "epoch": 1.9787496911292315, "grad_norm": 1.3683296190661425, "learning_rate": 6.317173467347281e-09, "loss": 0.2569030821323395, "step": 8008 }, { "epoch": 1.9789967877440078, "grad_norm": 1.353641521069287, "learning_rate": 6.172800556628611e-09, "loss": 0.23953771591186523, "step": 8009 }, { "epoch": 1.9792438843587843, "grad_norm": 1.5124449898877839, "learning_rate": 6.0300960139192395e-09, "loss": 0.35285213589668274, "step": 8010 }, { "epoch": 1.9794909809735608, "grad_norm": 1.4962032039112718, "learning_rate": 5.889059863040114e-09, "loss": 0.2984076738357544, "step": 8011 }, { "epoch": 1.979738077588337, "grad_norm": 1.4448688670136058, "learning_rate": 5.749692127536843e-09, "loss": 0.2143530249595642, "step": 8012 }, { "epoch": 1.9799851742031134, "grad_norm": 1.3712342533425597, "learning_rate": 5.611992830676371e-09, "loss": 0.2513769567012787, "step": 8013 }, { "epoch": 1.9802322708178899, "grad_norm": 1.5368501524924745, "learning_rate": 5.475961995444756e-09, "loss": 0.2615547776222229, "step": 8014 }, { "epoch": 1.9804793674326662, "grad_norm": 1.6044486648395875, "learning_rate": 5.341599644551609e-09, "loss": 0.28103697299957275, "step": 8015 }, { "epoch": 1.9807264640474425, "grad_norm": 1.4256744015444673, "learning_rate": 5.208905800426767e-09, "loss": 0.27784186601638794, "step": 8016 }, { "epoch": 1.980973560662219, "grad_norm": 1.1716153864456478, "learning_rate": 5.077880485223619e-09, "loss": 0.18183189630508423, "step": 8017 }, { "epoch": 1.9812206572769953, "grad_norm": 1.2276917008910133, "learning_rate": 4.948523720813558e-09, "loss": 0.24521878361701965, "step": 8018 }, { "epoch": 1.9814677538917715, "grad_norm": 1.5497772965050467, "learning_rate": 4.8208355287926445e-09, "loss": 0.2322080135345459, "step": 8019 }, { "epoch": 1.981714850506548, "grad_norm": 1.5473038399827586, "learning_rate": 4.694815930477159e-09, "loss": 0.27557528018951416, "step": 8020 }, { "epoch": 1.9819619471213246, "grad_norm": 1.4370788016457454, "learning_rate": 4.570464946903608e-09, "loss": 0.26616597175598145, "step": 8021 }, { "epoch": 1.9822090437361009, "grad_norm": 1.7161987990318857, "learning_rate": 4.447782598832051e-09, "loss": 0.2297043800354004, "step": 8022 }, { "epoch": 1.9824561403508771, "grad_norm": 1.564540623349149, "learning_rate": 4.326768906743883e-09, "loss": 0.31720849871635437, "step": 8023 }, { "epoch": 1.9827032369656536, "grad_norm": 1.2098598258532263, "learning_rate": 4.207423890839613e-09, "loss": 0.24053113162517548, "step": 8024 }, { "epoch": 1.98295033358043, "grad_norm": 1.587213275387704, "learning_rate": 4.089747571043301e-09, "loss": 0.2541620135307312, "step": 8025 }, { "epoch": 1.9831974301952062, "grad_norm": 1.4848009166939162, "learning_rate": 3.973739966999235e-09, "loss": 0.2942860722541809, "step": 8026 }, { "epoch": 1.9834445268099827, "grad_norm": 1.4117993264275588, "learning_rate": 3.8594010980741445e-09, "loss": 0.25571203231811523, "step": 8027 }, { "epoch": 1.9836916234247592, "grad_norm": 1.5381097203814607, "learning_rate": 3.746730983356095e-09, "loss": 0.2972831726074219, "step": 8028 }, { "epoch": 1.9839387200395353, "grad_norm": 1.562106420047618, "learning_rate": 3.635729641654484e-09, "loss": 0.2675492763519287, "step": 8029 }, { "epoch": 1.9841858166543118, "grad_norm": 1.6335488069240243, "learning_rate": 3.526397091498934e-09, "loss": 0.33105140924453735, "step": 8030 }, { "epoch": 1.9844329132690883, "grad_norm": 1.718693247973088, "learning_rate": 3.4187333511415122e-09, "loss": 0.2673768699169159, "step": 8031 }, { "epoch": 1.9846800098838646, "grad_norm": 1.4364304548578242, "learning_rate": 3.3127384385567286e-09, "loss": 0.29284512996673584, "step": 8032 }, { "epoch": 1.984927106498641, "grad_norm": 1.5678029140941339, "learning_rate": 3.2084123714382075e-09, "loss": 0.3030710220336914, "step": 8033 }, { "epoch": 1.9851742031134174, "grad_norm": 1.586668035968233, "learning_rate": 3.105755167202018e-09, "loss": 0.279501736164093, "step": 8034 }, { "epoch": 1.9854212997281937, "grad_norm": 1.540471539248565, "learning_rate": 3.0047668429866726e-09, "loss": 0.30257225036621094, "step": 8035 }, { "epoch": 1.98566839634297, "grad_norm": 1.511127549081555, "learning_rate": 2.9054474156497982e-09, "loss": 0.2541095018386841, "step": 8036 }, { "epoch": 1.9859154929577465, "grad_norm": 1.3766690583094512, "learning_rate": 2.807796901773685e-09, "loss": 0.24543049931526184, "step": 8037 }, { "epoch": 1.986162589572523, "grad_norm": 1.6030819426853329, "learning_rate": 2.711815317657518e-09, "loss": 0.3193759322166443, "step": 8038 }, { "epoch": 1.986409686187299, "grad_norm": 1.6777194412034973, "learning_rate": 2.617502679327366e-09, "loss": 0.2709386348724365, "step": 8039 }, { "epoch": 1.9866567828020756, "grad_norm": 1.5562408024559151, "learning_rate": 2.5248590025250817e-09, "loss": 0.2760156989097595, "step": 8040 }, { "epoch": 1.986903879416852, "grad_norm": 1.340036725096054, "learning_rate": 2.4338843027194026e-09, "loss": 0.22154588997364044, "step": 8041 }, { "epoch": 1.9871509760316284, "grad_norm": 1.4411622285072552, "learning_rate": 2.3445785950948485e-09, "loss": 0.267788827419281, "step": 8042 }, { "epoch": 1.9873980726464047, "grad_norm": 1.519749498027436, "learning_rate": 2.256941894561715e-09, "loss": 0.3175414502620697, "step": 8043 }, { "epoch": 1.9876451692611812, "grad_norm": 1.4697046169672257, "learning_rate": 2.1709742157494106e-09, "loss": 0.2755863070487976, "step": 8044 }, { "epoch": 1.9878922658759575, "grad_norm": 1.33815748969451, "learning_rate": 2.086675573009789e-09, "loss": 0.22214514017105103, "step": 8045 }, { "epoch": 1.9881393624907338, "grad_norm": 1.627191553150876, "learning_rate": 2.004045980414926e-09, "loss": 0.2782999873161316, "step": 8046 }, { "epoch": 1.9883864591055103, "grad_norm": 1.3325986778171743, "learning_rate": 1.9230854517604538e-09, "loss": 0.22053858637809753, "step": 8047 }, { "epoch": 1.9886335557202868, "grad_norm": 1.3387843877581684, "learning_rate": 1.8437940005600064e-09, "loss": 0.24533918499946594, "step": 8048 }, { "epoch": 1.988880652335063, "grad_norm": 1.4425741823223608, "learning_rate": 1.7661716400529937e-09, "loss": 0.26433098316192627, "step": 8049 }, { "epoch": 1.9891277489498393, "grad_norm": 1.3655377391427426, "learning_rate": 1.6902183831946083e-09, "loss": 0.25804004073143005, "step": 8050 }, { "epoch": 1.9893748455646159, "grad_norm": 1.3741116792947177, "learning_rate": 1.6159342426669278e-09, "loss": 0.3083675503730774, "step": 8051 }, { "epoch": 1.9896219421793921, "grad_norm": 1.5375136540941308, "learning_rate": 1.5433192308700328e-09, "loss": 0.28533628582954407, "step": 8052 }, { "epoch": 1.9898690387941684, "grad_norm": 1.7400690325392116, "learning_rate": 1.4723733599253387e-09, "loss": 0.3058621883392334, "step": 8053 }, { "epoch": 1.990116135408945, "grad_norm": 1.5954737910451482, "learning_rate": 1.403096641678925e-09, "loss": 0.3004618287086487, "step": 8054 }, { "epoch": 1.9903632320237212, "grad_norm": 1.363823376014711, "learning_rate": 1.3354890876926541e-09, "loss": 0.2708754241466522, "step": 8055 }, { "epoch": 1.9906103286384975, "grad_norm": 1.5047340104998952, "learning_rate": 1.2695507092563842e-09, "loss": 0.30226433277130127, "step": 8056 }, { "epoch": 1.990857425253274, "grad_norm": 1.5080647172812043, "learning_rate": 1.2052815173746457e-09, "loss": 0.30025437474250793, "step": 8057 }, { "epoch": 1.9911045218680505, "grad_norm": 1.6324224691596445, "learning_rate": 1.1426815227788545e-09, "loss": 0.29597020149230957, "step": 8058 }, { "epoch": 1.9913516184828268, "grad_norm": 1.547682739258915, "learning_rate": 1.0817507359184298e-09, "loss": 0.28100261092185974, "step": 8059 }, { "epoch": 1.9915987150976031, "grad_norm": 1.4972080534023324, "learning_rate": 1.0224891669652349e-09, "loss": 0.33411070704460144, "step": 8060 }, { "epoch": 1.9918458117123796, "grad_norm": 1.418622490080817, "learning_rate": 9.64896825812467e-10, "loss": 0.2373616099357605, "step": 8061 }, { "epoch": 1.992092908327156, "grad_norm": 1.313640227991526, "learning_rate": 9.089737220746575e-10, "loss": 0.23761537671089172, "step": 8062 }, { "epoch": 1.9923400049419322, "grad_norm": 1.2147075466712045, "learning_rate": 8.54719865087672e-10, "loss": 0.21936210989952087, "step": 8063 }, { "epoch": 1.9925871015567087, "grad_norm": 1.4063858196783683, "learning_rate": 8.021352639087099e-10, "loss": 0.2691470980644226, "step": 8064 }, { "epoch": 1.9928341981714852, "grad_norm": 1.5856438236093773, "learning_rate": 7.512199273151944e-10, "loss": 0.27449604868888855, "step": 8065 }, { "epoch": 1.9930812947862613, "grad_norm": 1.4690212805482037, "learning_rate": 7.019738638081031e-10, "loss": 0.3127593994140625, "step": 8066 }, { "epoch": 1.9933283914010378, "grad_norm": 1.3594915679655684, "learning_rate": 6.543970816086375e-10, "loss": 0.2745647728443146, "step": 8067 }, { "epoch": 1.9935754880158143, "grad_norm": 1.5273250933728693, "learning_rate": 6.084895886582232e-10, "loss": 0.3042905926704407, "step": 8068 }, { "epoch": 1.9938225846305906, "grad_norm": 1.5415190622832735, "learning_rate": 5.642513926218396e-10, "loss": 0.2993146777153015, "step": 8069 }, { "epoch": 1.9940696812453669, "grad_norm": 1.544279031721406, "learning_rate": 5.216825008835802e-10, "loss": 0.2590772807598114, "step": 8070 }, { "epoch": 1.9943167778601434, "grad_norm": 1.436409592349238, "learning_rate": 4.807829205510928e-10, "loss": 0.26329097151756287, "step": 8071 }, { "epoch": 1.9945638744749197, "grad_norm": 1.7757690069982397, "learning_rate": 4.415526584511387e-10, "loss": 0.3018074631690979, "step": 8072 }, { "epoch": 1.994810971089696, "grad_norm": 1.4157882128758106, "learning_rate": 4.0399172113181337e-10, "loss": 0.26647812128067017, "step": 8073 }, { "epoch": 1.9950580677044725, "grad_norm": 1.3481124031716027, "learning_rate": 3.6810011486587695e-10, "loss": 0.2034432291984558, "step": 8074 }, { "epoch": 1.995305164319249, "grad_norm": 1.5760148166439651, "learning_rate": 3.3387784564409277e-10, "loss": 0.28893035650253296, "step": 8075 }, { "epoch": 1.995552260934025, "grad_norm": 1.368913965767856, "learning_rate": 3.013249191785583e-10, "loss": 0.28027522563934326, "step": 8076 }, { "epoch": 1.9957993575488016, "grad_norm": 1.4058504449429317, "learning_rate": 2.704413409060358e-10, "loss": 0.2542475759983063, "step": 8077 }, { "epoch": 1.996046454163578, "grad_norm": 1.4662639267972812, "learning_rate": 2.4122711597907023e-10, "loss": 0.288585901260376, "step": 8078 }, { "epoch": 1.9962935507783544, "grad_norm": 1.4237672727622148, "learning_rate": 2.136822492770918e-10, "loss": 0.2817254662513733, "step": 8079 }, { "epoch": 1.9965406473931306, "grad_norm": 1.4805580592992973, "learning_rate": 1.8780674539753408e-10, "loss": 0.27881044149398804, "step": 8080 }, { "epoch": 1.9967877440079072, "grad_norm": 1.4204938942471632, "learning_rate": 1.6360060866027482e-10, "loss": 0.2684822678565979, "step": 8081 }, { "epoch": 1.9970348406226834, "grad_norm": 1.2878603500792816, "learning_rate": 1.4106384310541564e-10, "loss": 0.2202989161014557, "step": 8082 }, { "epoch": 1.9972819372374597, "grad_norm": 1.350407606937241, "learning_rate": 1.201964524966126e-10, "loss": 0.21419286727905273, "step": 8083 }, { "epoch": 1.9975290338522362, "grad_norm": 1.409127295743063, "learning_rate": 1.0099844031774553e-10, "loss": 0.28738734126091003, "step": 8084 }, { "epoch": 1.9977761304670127, "grad_norm": 1.5456751656687073, "learning_rate": 8.346980977180785e-11, "loss": 0.3084934949874878, "step": 8085 }, { "epoch": 1.998023227081789, "grad_norm": 1.4319474707107034, "learning_rate": 6.761056378534747e-11, "loss": 0.22943732142448425, "step": 8086 }, { "epoch": 1.9982703236965653, "grad_norm": 1.548569244991153, "learning_rate": 5.342070500735652e-11, "loss": 0.27088379859924316, "step": 8087 }, { "epoch": 1.9985174203113418, "grad_norm": 1.3647149582716351, "learning_rate": 4.0900235807050935e-11, "loss": 0.2914436459541321, "step": 8088 }, { "epoch": 1.9987645169261181, "grad_norm": 1.337412958878818, "learning_rate": 3.0049158271650004e-11, "loss": 0.2794247567653656, "step": 8089 }, { "epoch": 1.9990116135408944, "grad_norm": 1.5302278192835908, "learning_rate": 2.0867474215258142e-11, "loss": 0.2694860100746155, "step": 8090 }, { "epoch": 1.999258710155671, "grad_norm": 1.5697122365021043, "learning_rate": 1.3355185169983132e-11, "loss": 0.2611023485660553, "step": 8091 }, { "epoch": 1.9995058067704472, "grad_norm": 1.5077668614012993, "learning_rate": 7.512292389266762e-12, "loss": 0.2517150640487671, "step": 8092 }, { "epoch": 1.9997529033852235, "grad_norm": 1.4021573005056178, "learning_rate": 3.338796850105297e-12, "loss": 0.2565285563468933, "step": 8093 }, { "epoch": 2.0, "grad_norm": 1.5246106368726418, "learning_rate": 8.346992474983495e-13, "loss": 0.2685915231704712, "step": 8094 }, { "epoch": 2.0, "step": 8094, "total_flos": 2355221372633088.0, "train_loss": 0.3477905158193753, "train_runtime": 42762.3455, "train_samples_per_second": 1.514, "train_steps_per_second": 0.189 } ], "logging_steps": 1, "max_steps": 8094, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2355221372633088.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }