{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 500, "global_step": 7532, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0002655689815429558, "grad_norm": 1.8881195832990014, "learning_rate": 0.0, "loss": 1.1502833366394043, "step": 1 }, { "epoch": 0.0005311379630859116, "grad_norm": 1.77718785062999, "learning_rate": 5.3050397877984086e-08, "loss": 1.1698756217956543, "step": 2 }, { "epoch": 0.0007967069446288673, "grad_norm": 1.6766718507101437, "learning_rate": 1.0610079575596817e-07, "loss": 1.1060130596160889, "step": 3 }, { "epoch": 0.0010622759261718232, "grad_norm": 1.876053682165919, "learning_rate": 1.5915119363395226e-07, "loss": 1.1075276136398315, "step": 4 }, { "epoch": 0.001327844907714779, "grad_norm": 1.88228417845019, "learning_rate": 2.1220159151193635e-07, "loss": 1.2153511047363281, "step": 5 }, { "epoch": 0.0015934138892577346, "grad_norm": 1.9273368394845023, "learning_rate": 2.6525198938992043e-07, "loss": 1.1400426626205444, "step": 6 }, { "epoch": 0.0018589828708006906, "grad_norm": 1.904814034912833, "learning_rate": 3.183023872679045e-07, "loss": 1.2070660591125488, "step": 7 }, { "epoch": 0.0021245518523436463, "grad_norm": 1.7346381008587795, "learning_rate": 3.713527851458886e-07, "loss": 1.1614588499069214, "step": 8 }, { "epoch": 0.002390120833886602, "grad_norm": 1.817032704311048, "learning_rate": 4.244031830238727e-07, "loss": 1.1739476919174194, "step": 9 }, { "epoch": 0.002655689815429558, "grad_norm": 1.8291974144657501, "learning_rate": 4.774535809018568e-07, "loss": 1.1559171676635742, "step": 10 }, { "epoch": 0.0029212587969725135, "grad_norm": 2.0039010539208744, "learning_rate": 5.305039787798409e-07, "loss": 1.2086225748062134, "step": 11 }, { "epoch": 0.0031868277785154693, "grad_norm": 1.876026657216244, "learning_rate": 5.83554376657825e-07, "loss": 1.227709174156189, "step": 12 }, { "epoch": 0.003452396760058425, "grad_norm": 2.0245192813139825, "learning_rate": 6.36604774535809e-07, "loss": 1.255577564239502, "step": 13 }, { "epoch": 0.003717965741601381, "grad_norm": 1.8641260357218605, "learning_rate": 6.896551724137931e-07, "loss": 1.1953760385513306, "step": 14 }, { "epoch": 0.0039835347231443365, "grad_norm": 1.9079733249323254, "learning_rate": 7.427055702917772e-07, "loss": 1.1325336694717407, "step": 15 }, { "epoch": 0.004249103704687293, "grad_norm": 1.8230190567516942, "learning_rate": 7.957559681697613e-07, "loss": 1.232974648475647, "step": 16 }, { "epoch": 0.004514672686230248, "grad_norm": 1.8532380418447003, "learning_rate": 8.488063660477454e-07, "loss": 1.1527395248413086, "step": 17 }, { "epoch": 0.004780241667773204, "grad_norm": 1.986294801704247, "learning_rate": 9.018567639257295e-07, "loss": 1.151026964187622, "step": 18 }, { "epoch": 0.00504581064931616, "grad_norm": 1.8048967405226255, "learning_rate": 9.549071618037136e-07, "loss": 1.155288815498352, "step": 19 }, { "epoch": 0.005311379630859116, "grad_norm": 2.1631450267380767, "learning_rate": 1.0079575596816979e-06, "loss": 1.183434009552002, "step": 20 }, { "epoch": 0.005576948612402072, "grad_norm": 1.88758019498484, "learning_rate": 1.0610079575596817e-06, "loss": 1.161030650138855, "step": 21 }, { "epoch": 0.005842517593945027, "grad_norm": 1.9605989446426395, "learning_rate": 1.1140583554376658e-06, "loss": 1.123382806777954, "step": 22 }, { "epoch": 0.006108086575487983, "grad_norm": 2.2042020560619306, "learning_rate": 1.16710875331565e-06, "loss": 1.238707423210144, "step": 23 }, { "epoch": 0.0063736555570309385, "grad_norm": 2.289866056000848, "learning_rate": 1.220159151193634e-06, "loss": 1.2058464288711548, "step": 24 }, { "epoch": 0.006639224538573895, "grad_norm": 2.724214643619529, "learning_rate": 1.273209549071618e-06, "loss": 1.2351092100143433, "step": 25 }, { "epoch": 0.00690479352011685, "grad_norm": 2.5088520951326028, "learning_rate": 1.3262599469496024e-06, "loss": 1.1739860773086548, "step": 26 }, { "epoch": 0.007170362501659806, "grad_norm": 2.3243798435890155, "learning_rate": 1.3793103448275862e-06, "loss": 1.1407617330551147, "step": 27 }, { "epoch": 0.007435931483202762, "grad_norm": 2.533007430657115, "learning_rate": 1.4323607427055705e-06, "loss": 1.1844531297683716, "step": 28 }, { "epoch": 0.007701500464745718, "grad_norm": 2.4702075978733804, "learning_rate": 1.4854111405835544e-06, "loss": 1.1293678283691406, "step": 29 }, { "epoch": 0.007967069446288673, "grad_norm": 3.0873404038783963, "learning_rate": 1.5384615384615387e-06, "loss": 1.1310899257659912, "step": 30 }, { "epoch": 0.00823263842783163, "grad_norm": 2.7098364862500013, "learning_rate": 1.5915119363395226e-06, "loss": 1.1015795469284058, "step": 31 }, { "epoch": 0.008498207409374585, "grad_norm": 2.8074949689582476, "learning_rate": 1.6445623342175069e-06, "loss": 1.0756056308746338, "step": 32 }, { "epoch": 0.00876377639091754, "grad_norm": 3.1563034348975676, "learning_rate": 1.6976127320954908e-06, "loss": 1.1496126651763916, "step": 33 }, { "epoch": 0.009029345372460496, "grad_norm": 2.842390896608423, "learning_rate": 1.750663129973475e-06, "loss": 1.203465461730957, "step": 34 }, { "epoch": 0.009294914354003453, "grad_norm": 2.6747271223349753, "learning_rate": 1.803713527851459e-06, "loss": 1.0613923072814941, "step": 35 }, { "epoch": 0.009560483335546408, "grad_norm": 2.146709655536541, "learning_rate": 1.8567639257294432e-06, "loss": 1.06027090549469, "step": 36 }, { "epoch": 0.009826052317089363, "grad_norm": 1.9942495143394863, "learning_rate": 1.909814323607427e-06, "loss": 1.0508522987365723, "step": 37 }, { "epoch": 0.01009162129863232, "grad_norm": 2.1704927298148107, "learning_rate": 1.9628647214854114e-06, "loss": 1.0353929996490479, "step": 38 }, { "epoch": 0.010357190280175276, "grad_norm": 1.8252380884349957, "learning_rate": 2.0159151193633957e-06, "loss": 0.9974027276039124, "step": 39 }, { "epoch": 0.010622759261718231, "grad_norm": 1.7188806752497834, "learning_rate": 2.0689655172413796e-06, "loss": 1.0849467515945435, "step": 40 }, { "epoch": 0.010888328243261186, "grad_norm": 1.3692667089198218, "learning_rate": 2.1220159151193635e-06, "loss": 1.005434274673462, "step": 41 }, { "epoch": 0.011153897224804143, "grad_norm": 1.3465343019370317, "learning_rate": 2.1750663129973478e-06, "loss": 1.052631139755249, "step": 42 }, { "epoch": 0.011419466206347099, "grad_norm": 1.352421126005469, "learning_rate": 2.2281167108753316e-06, "loss": 0.9470957517623901, "step": 43 }, { "epoch": 0.011685035187890054, "grad_norm": 1.2219308328594767, "learning_rate": 2.281167108753316e-06, "loss": 0.9865130186080933, "step": 44 }, { "epoch": 0.01195060416943301, "grad_norm": 1.19161259271228, "learning_rate": 2.3342175066313e-06, "loss": 0.9405577778816223, "step": 45 }, { "epoch": 0.012216173150975966, "grad_norm": 1.1603073869733838, "learning_rate": 2.387267904509284e-06, "loss": 0.9418795108795166, "step": 46 }, { "epoch": 0.012481742132518922, "grad_norm": 1.1897328813812988, "learning_rate": 2.440318302387268e-06, "loss": 0.9841142892837524, "step": 47 }, { "epoch": 0.012747311114061877, "grad_norm": 1.159720101499262, "learning_rate": 2.4933687002652523e-06, "loss": 0.9412609338760376, "step": 48 }, { "epoch": 0.013012880095604834, "grad_norm": 1.1421347262548374, "learning_rate": 2.546419098143236e-06, "loss": 0.9239889979362488, "step": 49 }, { "epoch": 0.01327844907714779, "grad_norm": 1.144363453746544, "learning_rate": 2.59946949602122e-06, "loss": 0.9212941527366638, "step": 50 }, { "epoch": 0.013544018058690745, "grad_norm": 0.9916816911141796, "learning_rate": 2.6525198938992047e-06, "loss": 0.8863773345947266, "step": 51 }, { "epoch": 0.0138095870402337, "grad_norm": 0.9890613082667745, "learning_rate": 2.7055702917771886e-06, "loss": 0.8990404009819031, "step": 52 }, { "epoch": 0.014075156021776657, "grad_norm": 1.1123466462737277, "learning_rate": 2.7586206896551725e-06, "loss": 0.9257171154022217, "step": 53 }, { "epoch": 0.014340725003319612, "grad_norm": 0.8689931750055545, "learning_rate": 2.8116710875331564e-06, "loss": 0.8239601254463196, "step": 54 }, { "epoch": 0.014606293984862568, "grad_norm": 0.9936229603029793, "learning_rate": 2.864721485411141e-06, "loss": 0.8656830787658691, "step": 55 }, { "epoch": 0.014871862966405525, "grad_norm": 1.0202371081091262, "learning_rate": 2.917771883289125e-06, "loss": 0.9470342397689819, "step": 56 }, { "epoch": 0.01513743194794848, "grad_norm": 0.9663900963956384, "learning_rate": 2.970822281167109e-06, "loss": 0.8699859976768494, "step": 57 }, { "epoch": 0.015403000929491435, "grad_norm": 0.940263545207204, "learning_rate": 3.0238726790450927e-06, "loss": 0.8668704628944397, "step": 58 }, { "epoch": 0.01566856991103439, "grad_norm": 0.9865381848251076, "learning_rate": 3.0769230769230774e-06, "loss": 0.841624915599823, "step": 59 }, { "epoch": 0.015934138892577346, "grad_norm": 0.8909972421095332, "learning_rate": 3.1299734748010613e-06, "loss": 0.8412661552429199, "step": 60 }, { "epoch": 0.0161997078741203, "grad_norm": 0.8771283277278942, "learning_rate": 3.183023872679045e-06, "loss": 0.818957507610321, "step": 61 }, { "epoch": 0.01646527685566326, "grad_norm": 0.9190140482494583, "learning_rate": 3.23607427055703e-06, "loss": 0.8030763268470764, "step": 62 }, { "epoch": 0.016730845837206215, "grad_norm": 0.8839367067386452, "learning_rate": 3.2891246684350138e-06, "loss": 0.7869359850883484, "step": 63 }, { "epoch": 0.01699641481874917, "grad_norm": 0.8058255896640879, "learning_rate": 3.3421750663129977e-06, "loss": 0.7912170886993408, "step": 64 }, { "epoch": 0.017261983800292126, "grad_norm": 0.8538938403853334, "learning_rate": 3.3952254641909815e-06, "loss": 0.7736695408821106, "step": 65 }, { "epoch": 0.01752755278183508, "grad_norm": 0.8652625375848492, "learning_rate": 3.448275862068966e-06, "loss": 0.768275260925293, "step": 66 }, { "epoch": 0.017793121763378036, "grad_norm": 0.8691478661970735, "learning_rate": 3.50132625994695e-06, "loss": 0.7210639119148254, "step": 67 }, { "epoch": 0.01805869074492099, "grad_norm": 0.8378031795839386, "learning_rate": 3.554376657824934e-06, "loss": 0.7488028407096863, "step": 68 }, { "epoch": 0.01832425972646395, "grad_norm": 0.8943989597273122, "learning_rate": 3.607427055702918e-06, "loss": 0.7329621911048889, "step": 69 }, { "epoch": 0.018589828708006906, "grad_norm": 0.92104620358882, "learning_rate": 3.660477453580902e-06, "loss": 0.7270619869232178, "step": 70 }, { "epoch": 0.01885539768954986, "grad_norm": 0.9782498013554233, "learning_rate": 3.7135278514588865e-06, "loss": 0.7271254658699036, "step": 71 }, { "epoch": 0.019120966671092816, "grad_norm": 0.9115603845811348, "learning_rate": 3.7665782493368703e-06, "loss": 0.787033200263977, "step": 72 }, { "epoch": 0.01938653565263577, "grad_norm": 0.8604692726067453, "learning_rate": 3.819628647214854e-06, "loss": 0.7049479484558105, "step": 73 }, { "epoch": 0.019652104634178727, "grad_norm": 0.8610577281688413, "learning_rate": 3.8726790450928385e-06, "loss": 0.7146892547607422, "step": 74 }, { "epoch": 0.019917673615721682, "grad_norm": 0.7602187567662452, "learning_rate": 3.925729442970823e-06, "loss": 0.7212516069412231, "step": 75 }, { "epoch": 0.02018324259726464, "grad_norm": 0.6842508042039768, "learning_rate": 3.978779840848806e-06, "loss": 0.6612375378608704, "step": 76 }, { "epoch": 0.020448811578807596, "grad_norm": 0.7781006919053841, "learning_rate": 4.031830238726791e-06, "loss": 0.7038244605064392, "step": 77 }, { "epoch": 0.02071438056035055, "grad_norm": 0.7186592057129139, "learning_rate": 4.084880636604775e-06, "loss": 0.7081903219223022, "step": 78 }, { "epoch": 0.020979949541893507, "grad_norm": 0.7655954113403886, "learning_rate": 4.137931034482759e-06, "loss": 0.7079841494560242, "step": 79 }, { "epoch": 0.021245518523436462, "grad_norm": 0.7149787673446053, "learning_rate": 4.190981432360743e-06, "loss": 0.7090641260147095, "step": 80 }, { "epoch": 0.021511087504979418, "grad_norm": 0.6657837070384769, "learning_rate": 4.244031830238727e-06, "loss": 0.6632575988769531, "step": 81 }, { "epoch": 0.021776656486522373, "grad_norm": 0.6666401713606211, "learning_rate": 4.297082228116711e-06, "loss": 0.7231097221374512, "step": 82 }, { "epoch": 0.02204222546806533, "grad_norm": 0.6804476609839887, "learning_rate": 4.3501326259946955e-06, "loss": 0.6696034669876099, "step": 83 }, { "epoch": 0.022307794449608287, "grad_norm": 0.7073638927991296, "learning_rate": 4.403183023872679e-06, "loss": 0.7550696134567261, "step": 84 }, { "epoch": 0.022573363431151242, "grad_norm": 0.7064770122504733, "learning_rate": 4.456233421750663e-06, "loss": 0.671328067779541, "step": 85 }, { "epoch": 0.022838932412694198, "grad_norm": 0.6506139330803743, "learning_rate": 4.5092838196286476e-06, "loss": 0.6864410638809204, "step": 86 }, { "epoch": 0.023104501394237153, "grad_norm": 0.6642837777732639, "learning_rate": 4.562334217506632e-06, "loss": 0.6870769262313843, "step": 87 }, { "epoch": 0.023370070375780108, "grad_norm": 0.6947506894199804, "learning_rate": 4.615384615384616e-06, "loss": 0.6539690494537354, "step": 88 }, { "epoch": 0.023635639357323063, "grad_norm": 0.6446743321890098, "learning_rate": 4.6684350132626e-06, "loss": 0.6946991086006165, "step": 89 }, { "epoch": 0.02390120833886602, "grad_norm": 0.6384512383480915, "learning_rate": 4.721485411140584e-06, "loss": 0.6177583932876587, "step": 90 }, { "epoch": 0.024166777320408978, "grad_norm": 0.7150510018442997, "learning_rate": 4.774535809018568e-06, "loss": 0.6890037059783936, "step": 91 }, { "epoch": 0.024432346301951933, "grad_norm": 0.6592991709316253, "learning_rate": 4.8275862068965525e-06, "loss": 0.6563063263893127, "step": 92 }, { "epoch": 0.024697915283494888, "grad_norm": 0.6897740926797078, "learning_rate": 4.880636604774536e-06, "loss": 0.714318573474884, "step": 93 }, { "epoch": 0.024963484265037843, "grad_norm": 0.6433596226177777, "learning_rate": 4.93368700265252e-06, "loss": 0.6720882654190063, "step": 94 }, { "epoch": 0.0252290532465808, "grad_norm": 0.5910528348002435, "learning_rate": 4.9867374005305045e-06, "loss": 0.602899968624115, "step": 95 }, { "epoch": 0.025494622228123754, "grad_norm": 0.6635651676723159, "learning_rate": 5.039787798408489e-06, "loss": 0.6628841161727905, "step": 96 }, { "epoch": 0.02576019120966671, "grad_norm": 0.6070065577903714, "learning_rate": 5.092838196286472e-06, "loss": 0.6486932635307312, "step": 97 }, { "epoch": 0.026025760191209668, "grad_norm": 0.6484848126679549, "learning_rate": 5.145888594164457e-06, "loss": 0.6719033122062683, "step": 98 }, { "epoch": 0.026291329172752623, "grad_norm": 0.6856934201881044, "learning_rate": 5.19893899204244e-06, "loss": 0.6818530559539795, "step": 99 }, { "epoch": 0.02655689815429558, "grad_norm": 0.6204811558305167, "learning_rate": 5.251989389920424e-06, "loss": 0.6306912899017334, "step": 100 }, { "epoch": 0.026822467135838534, "grad_norm": 0.7820574736690976, "learning_rate": 5.3050397877984095e-06, "loss": 0.5952945351600647, "step": 101 }, { "epoch": 0.02708803611738149, "grad_norm": 0.6546243503849497, "learning_rate": 5.358090185676394e-06, "loss": 0.6566107273101807, "step": 102 }, { "epoch": 0.027353605098924445, "grad_norm": 0.707921645301647, "learning_rate": 5.411140583554377e-06, "loss": 0.6981694102287292, "step": 103 }, { "epoch": 0.0276191740804674, "grad_norm": 0.6375441067969543, "learning_rate": 5.4641909814323615e-06, "loss": 0.6231328248977661, "step": 104 }, { "epoch": 0.02788474306201036, "grad_norm": 0.6964560869475424, "learning_rate": 5.517241379310345e-06, "loss": 0.6414977312088013, "step": 105 }, { "epoch": 0.028150312043553314, "grad_norm": 0.6835502446580011, "learning_rate": 5.570291777188329e-06, "loss": 0.6335234642028809, "step": 106 }, { "epoch": 0.02841588102509627, "grad_norm": 0.6248033284508979, "learning_rate": 5.623342175066313e-06, "loss": 0.6040852665901184, "step": 107 }, { "epoch": 0.028681450006639225, "grad_norm": 0.6645474785171195, "learning_rate": 5.676392572944297e-06, "loss": 0.6011114716529846, "step": 108 }, { "epoch": 0.02894701898818218, "grad_norm": 0.655106623405533, "learning_rate": 5.729442970822282e-06, "loss": 0.6042627096176147, "step": 109 }, { "epoch": 0.029212587969725135, "grad_norm": 0.720208539355598, "learning_rate": 5.782493368700266e-06, "loss": 0.6183412671089172, "step": 110 }, { "epoch": 0.02947815695126809, "grad_norm": 0.6666287454908232, "learning_rate": 5.83554376657825e-06, "loss": 0.6150818467140198, "step": 111 }, { "epoch": 0.02974372593281105, "grad_norm": 0.6840692324124527, "learning_rate": 5.888594164456234e-06, "loss": 0.6202039122581482, "step": 112 }, { "epoch": 0.030009294914354005, "grad_norm": 0.6626407253242022, "learning_rate": 5.941644562334218e-06, "loss": 0.6334809064865112, "step": 113 }, { "epoch": 0.03027486389589696, "grad_norm": 0.6319419097399773, "learning_rate": 5.994694960212202e-06, "loss": 0.5728089809417725, "step": 114 }, { "epoch": 0.030540432877439915, "grad_norm": 0.6988175213443283, "learning_rate": 6.0477453580901854e-06, "loss": 0.6884603500366211, "step": 115 }, { "epoch": 0.03080600185898287, "grad_norm": 0.6618120552387852, "learning_rate": 6.1007957559681706e-06, "loss": 0.5619829893112183, "step": 116 }, { "epoch": 0.031071570840525826, "grad_norm": 0.6756012639437595, "learning_rate": 6.153846153846155e-06, "loss": 0.6224710941314697, "step": 117 }, { "epoch": 0.03133713982206878, "grad_norm": 0.7208355833756769, "learning_rate": 6.206896551724138e-06, "loss": 0.6119496822357178, "step": 118 }, { "epoch": 0.03160270880361174, "grad_norm": 0.6917782946677038, "learning_rate": 6.259946949602123e-06, "loss": 0.6190857887268066, "step": 119 }, { "epoch": 0.03186827778515469, "grad_norm": 0.6704531181022263, "learning_rate": 6.312997347480107e-06, "loss": 0.6460769176483154, "step": 120 }, { "epoch": 0.03213384676669765, "grad_norm": 0.7493511248909543, "learning_rate": 6.36604774535809e-06, "loss": 0.6148796677589417, "step": 121 }, { "epoch": 0.0323994157482406, "grad_norm": 0.6359613412994526, "learning_rate": 6.419098143236075e-06, "loss": 0.558960497379303, "step": 122 }, { "epoch": 0.03266498472978356, "grad_norm": 0.6785691051694177, "learning_rate": 6.47214854111406e-06, "loss": 0.5844984650611877, "step": 123 }, { "epoch": 0.03293055371132652, "grad_norm": 0.6692815537253501, "learning_rate": 6.525198938992043e-06, "loss": 0.5343623161315918, "step": 124 }, { "epoch": 0.03319612269286947, "grad_norm": 0.6705726789318588, "learning_rate": 6.5782493368700276e-06, "loss": 0.5834348797798157, "step": 125 }, { "epoch": 0.03346169167441243, "grad_norm": 0.7626576562771024, "learning_rate": 6.631299734748011e-06, "loss": 0.5997360944747925, "step": 126 }, { "epoch": 0.03372726065595538, "grad_norm": 0.7117893752859364, "learning_rate": 6.684350132625995e-06, "loss": 0.5991666316986084, "step": 127 }, { "epoch": 0.03399282963749834, "grad_norm": 0.7060406683837459, "learning_rate": 6.737400530503979e-06, "loss": 0.581120491027832, "step": 128 }, { "epoch": 0.03425839861904129, "grad_norm": 0.6869761252397286, "learning_rate": 6.790450928381963e-06, "loss": 0.6219569444656372, "step": 129 }, { "epoch": 0.03452396760058425, "grad_norm": 0.6916173566260286, "learning_rate": 6.843501326259947e-06, "loss": 0.5950608253479004, "step": 130 }, { "epoch": 0.03478953658212721, "grad_norm": 0.6136480902733893, "learning_rate": 6.896551724137932e-06, "loss": 0.5762747526168823, "step": 131 }, { "epoch": 0.03505510556367016, "grad_norm": 0.670368708945713, "learning_rate": 6.949602122015916e-06, "loss": 0.6003131866455078, "step": 132 }, { "epoch": 0.03532067454521312, "grad_norm": 0.6439028776339482, "learning_rate": 7.0026525198939e-06, "loss": 0.5866605043411255, "step": 133 }, { "epoch": 0.03558624352675607, "grad_norm": 0.8324202287699098, "learning_rate": 7.055702917771884e-06, "loss": 0.6668443083763123, "step": 134 }, { "epoch": 0.03585181250829903, "grad_norm": 0.7064456856515898, "learning_rate": 7.108753315649868e-06, "loss": 0.5738306045532227, "step": 135 }, { "epoch": 0.03611738148984198, "grad_norm": 0.6941604370641007, "learning_rate": 7.1618037135278515e-06, "loss": 0.5774663686752319, "step": 136 }, { "epoch": 0.03638295047138494, "grad_norm": 0.7648336305672251, "learning_rate": 7.214854111405836e-06, "loss": 0.5721150636672974, "step": 137 }, { "epoch": 0.0366485194529279, "grad_norm": 0.7394576462203543, "learning_rate": 7.267904509283821e-06, "loss": 0.6350122690200806, "step": 138 }, { "epoch": 0.03691408843447085, "grad_norm": 0.6540602529440619, "learning_rate": 7.320954907161804e-06, "loss": 0.5435039401054382, "step": 139 }, { "epoch": 0.03717965741601381, "grad_norm": 0.6965351191908165, "learning_rate": 7.374005305039789e-06, "loss": 0.5869162678718567, "step": 140 }, { "epoch": 0.03744522639755676, "grad_norm": 0.6664228073022063, "learning_rate": 7.427055702917773e-06, "loss": 0.5645807981491089, "step": 141 }, { "epoch": 0.03771079537909972, "grad_norm": 0.6503771775205762, "learning_rate": 7.480106100795756e-06, "loss": 0.5502692461013794, "step": 142 }, { "epoch": 0.037976364360642674, "grad_norm": 0.6223645459397411, "learning_rate": 7.533156498673741e-06, "loss": 0.5602732300758362, "step": 143 }, { "epoch": 0.03824193334218563, "grad_norm": 0.8638951879324807, "learning_rate": 7.586206896551724e-06, "loss": 0.6011391282081604, "step": 144 }, { "epoch": 0.03850750232372859, "grad_norm": 0.6930636234613441, "learning_rate": 7.639257294429708e-06, "loss": 0.5482327938079834, "step": 145 }, { "epoch": 0.03877307130527154, "grad_norm": 0.6693652199128735, "learning_rate": 7.692307692307694e-06, "loss": 0.5926344394683838, "step": 146 }, { "epoch": 0.0390386402868145, "grad_norm": 0.8434991800954339, "learning_rate": 7.745358090185677e-06, "loss": 0.6558316946029663, "step": 147 }, { "epoch": 0.039304209268357454, "grad_norm": 0.6845819362079449, "learning_rate": 7.79840848806366e-06, "loss": 0.572425365447998, "step": 148 }, { "epoch": 0.03956977824990041, "grad_norm": 0.696296152543372, "learning_rate": 7.851458885941646e-06, "loss": 0.5684784650802612, "step": 149 }, { "epoch": 0.039835347231443365, "grad_norm": 0.6779490529346879, "learning_rate": 7.904509283819629e-06, "loss": 0.5843643546104431, "step": 150 }, { "epoch": 0.04010091621298632, "grad_norm": 0.6894842979231472, "learning_rate": 7.957559681697613e-06, "loss": 0.5471494793891907, "step": 151 }, { "epoch": 0.04036648519452928, "grad_norm": 0.7583250211136208, "learning_rate": 8.010610079575598e-06, "loss": 0.595018744468689, "step": 152 }, { "epoch": 0.040632054176072234, "grad_norm": 0.6904128122756304, "learning_rate": 8.063660477453583e-06, "loss": 0.5431865453720093, "step": 153 }, { "epoch": 0.04089762315761519, "grad_norm": 0.7943246581886504, "learning_rate": 8.116710875331566e-06, "loss": 0.5622385740280151, "step": 154 }, { "epoch": 0.041163192139158145, "grad_norm": 0.7792002007338675, "learning_rate": 8.16976127320955e-06, "loss": 0.5795880556106567, "step": 155 }, { "epoch": 0.0414287611207011, "grad_norm": 0.7432143976693507, "learning_rate": 8.222811671087533e-06, "loss": 0.5854965448379517, "step": 156 }, { "epoch": 0.041694330102244055, "grad_norm": 0.8104825185442435, "learning_rate": 8.275862068965518e-06, "loss": 0.5374501943588257, "step": 157 }, { "epoch": 0.041959899083787014, "grad_norm": 0.7598674115735401, "learning_rate": 8.328912466843502e-06, "loss": 0.5779006481170654, "step": 158 }, { "epoch": 0.04222546806532997, "grad_norm": 0.7033741631796787, "learning_rate": 8.381962864721485e-06, "loss": 0.550236701965332, "step": 159 }, { "epoch": 0.042491037046872925, "grad_norm": 0.7285453499901458, "learning_rate": 8.43501326259947e-06, "loss": 0.557443380355835, "step": 160 }, { "epoch": 0.04275660602841588, "grad_norm": 0.7050753960524794, "learning_rate": 8.488063660477454e-06, "loss": 0.5875238180160522, "step": 161 }, { "epoch": 0.043022175009958835, "grad_norm": 0.7215582793376403, "learning_rate": 8.541114058355439e-06, "loss": 0.510900616645813, "step": 162 }, { "epoch": 0.043287743991501794, "grad_norm": 0.7559114001900116, "learning_rate": 8.594164456233422e-06, "loss": 0.5465859174728394, "step": 163 }, { "epoch": 0.043553312973044746, "grad_norm": 0.7494489908601825, "learning_rate": 8.647214854111406e-06, "loss": 0.5508615970611572, "step": 164 }, { "epoch": 0.043818881954587705, "grad_norm": 0.7714387963397975, "learning_rate": 8.700265251989391e-06, "loss": 0.5437714457511902, "step": 165 }, { "epoch": 0.04408445093613066, "grad_norm": 0.7480600693956645, "learning_rate": 8.753315649867374e-06, "loss": 0.542698323726654, "step": 166 }, { "epoch": 0.044350019917673615, "grad_norm": 0.7339141407878966, "learning_rate": 8.806366047745358e-06, "loss": 0.5169371962547302, "step": 167 }, { "epoch": 0.044615588899216574, "grad_norm": 0.725595419270195, "learning_rate": 8.859416445623343e-06, "loss": 0.5436176061630249, "step": 168 }, { "epoch": 0.044881157880759526, "grad_norm": 0.8205411933516983, "learning_rate": 8.912466843501327e-06, "loss": 0.568030834197998, "step": 169 }, { "epoch": 0.045146726862302484, "grad_norm": 0.7544356200090666, "learning_rate": 8.965517241379312e-06, "loss": 0.5218889713287354, "step": 170 }, { "epoch": 0.045412295843845436, "grad_norm": 0.7860957525035722, "learning_rate": 9.018567639257295e-06, "loss": 0.5275779962539673, "step": 171 }, { "epoch": 0.045677864825388395, "grad_norm": 0.6938225497373272, "learning_rate": 9.071618037135279e-06, "loss": 0.5263184905052185, "step": 172 }, { "epoch": 0.045943433806931354, "grad_norm": 0.7549069812662602, "learning_rate": 9.124668435013264e-06, "loss": 0.563044548034668, "step": 173 }, { "epoch": 0.046209002788474306, "grad_norm": 0.9364041083837341, "learning_rate": 9.177718832891247e-06, "loss": 0.5896912217140198, "step": 174 }, { "epoch": 0.046474571770017264, "grad_norm": 0.7219752548557496, "learning_rate": 9.230769230769232e-06, "loss": 0.5163949131965637, "step": 175 }, { "epoch": 0.046740140751560216, "grad_norm": 0.8391633255974319, "learning_rate": 9.283819628647216e-06, "loss": 0.6203320026397705, "step": 176 }, { "epoch": 0.047005709733103175, "grad_norm": 0.9119997852547688, "learning_rate": 9.3368700265252e-06, "loss": 0.5528024435043335, "step": 177 }, { "epoch": 0.04727127871464613, "grad_norm": 0.8828541610102935, "learning_rate": 9.389920424403184e-06, "loss": 0.5657555460929871, "step": 178 }, { "epoch": 0.047536847696189086, "grad_norm": 0.7671789386737649, "learning_rate": 9.442970822281168e-06, "loss": 0.5301925539970398, "step": 179 }, { "epoch": 0.04780241667773204, "grad_norm": 0.8675940797859782, "learning_rate": 9.496021220159151e-06, "loss": 0.5388369560241699, "step": 180 }, { "epoch": 0.048067985659274996, "grad_norm": 0.7966332028310692, "learning_rate": 9.549071618037136e-06, "loss": 0.5549717545509338, "step": 181 }, { "epoch": 0.048333554640817955, "grad_norm": 0.8814678011939608, "learning_rate": 9.60212201591512e-06, "loss": 0.5959764719009399, "step": 182 }, { "epoch": 0.04859912362236091, "grad_norm": 0.7841222204736121, "learning_rate": 9.655172413793105e-06, "loss": 0.5461844205856323, "step": 183 }, { "epoch": 0.048864692603903866, "grad_norm": 0.7620084886447284, "learning_rate": 9.708222811671088e-06, "loss": 0.5428494811058044, "step": 184 }, { "epoch": 0.04913026158544682, "grad_norm": 0.7918991595575344, "learning_rate": 9.761273209549072e-06, "loss": 0.552198052406311, "step": 185 }, { "epoch": 0.049395830566989776, "grad_norm": 0.6896394660507362, "learning_rate": 9.814323607427057e-06, "loss": 0.49992549419403076, "step": 186 }, { "epoch": 0.04966139954853273, "grad_norm": 0.7875507527713166, "learning_rate": 9.86737400530504e-06, "loss": 0.557820200920105, "step": 187 }, { "epoch": 0.04992696853007569, "grad_norm": 0.8883719893129148, "learning_rate": 9.920424403183024e-06, "loss": 0.5238749384880066, "step": 188 }, { "epoch": 0.050192537511618646, "grad_norm": 0.988465476825029, "learning_rate": 9.973474801061009e-06, "loss": 0.5346978902816772, "step": 189 }, { "epoch": 0.0504581064931616, "grad_norm": 0.8024883433630577, "learning_rate": 1.0026525198938993e-05, "loss": 0.5256577730178833, "step": 190 }, { "epoch": 0.050723675474704556, "grad_norm": 0.8026852335394901, "learning_rate": 1.0079575596816978e-05, "loss": 0.5235393047332764, "step": 191 }, { "epoch": 0.05098924445624751, "grad_norm": 0.6835673591276205, "learning_rate": 1.013262599469496e-05, "loss": 0.4984837472438812, "step": 192 }, { "epoch": 0.05125481343779047, "grad_norm": 0.7829913352817355, "learning_rate": 1.0185676392572945e-05, "loss": 0.5209602117538452, "step": 193 }, { "epoch": 0.05152038241933342, "grad_norm": 0.8334733472253096, "learning_rate": 1.023872679045093e-05, "loss": 0.5468267202377319, "step": 194 }, { "epoch": 0.05178595140087638, "grad_norm": 0.8107908645155819, "learning_rate": 1.0291777188328913e-05, "loss": 0.5531667470932007, "step": 195 }, { "epoch": 0.052051520382419336, "grad_norm": 0.8437904919697584, "learning_rate": 1.0344827586206898e-05, "loss": 0.5741526484489441, "step": 196 }, { "epoch": 0.05231708936396229, "grad_norm": 0.6830882515315945, "learning_rate": 1.039787798408488e-05, "loss": 0.46132561564445496, "step": 197 }, { "epoch": 0.05258265834550525, "grad_norm": 0.8402230890409916, "learning_rate": 1.0450928381962865e-05, "loss": 0.5074198842048645, "step": 198 }, { "epoch": 0.0528482273270482, "grad_norm": 0.7476727742688456, "learning_rate": 1.0503978779840849e-05, "loss": 0.5193089842796326, "step": 199 }, { "epoch": 0.05311379630859116, "grad_norm": 0.7814745235248249, "learning_rate": 1.0557029177718834e-05, "loss": 0.5209243297576904, "step": 200 }, { "epoch": 0.05337936529013411, "grad_norm": 0.8844918483638834, "learning_rate": 1.0610079575596819e-05, "loss": 0.5607191920280457, "step": 201 }, { "epoch": 0.05364493427167707, "grad_norm": 0.7926104097207243, "learning_rate": 1.0663129973474802e-05, "loss": 0.5482805371284485, "step": 202 }, { "epoch": 0.05391050325322003, "grad_norm": 0.8109463956858287, "learning_rate": 1.0716180371352788e-05, "loss": 0.5579961538314819, "step": 203 }, { "epoch": 0.05417607223476298, "grad_norm": 0.8246893162942163, "learning_rate": 1.076923076923077e-05, "loss": 0.5119072794914246, "step": 204 }, { "epoch": 0.05444164121630594, "grad_norm": 0.8293246958439139, "learning_rate": 1.0822281167108754e-05, "loss": 0.5129292607307434, "step": 205 }, { "epoch": 0.05470721019784889, "grad_norm": 0.6895550242199711, "learning_rate": 1.0875331564986738e-05, "loss": 0.500032901763916, "step": 206 }, { "epoch": 0.05497277917939185, "grad_norm": 0.8385731092525408, "learning_rate": 1.0928381962864723e-05, "loss": 0.5264571309089661, "step": 207 }, { "epoch": 0.0552383481609348, "grad_norm": 0.7915802802090326, "learning_rate": 1.0981432360742708e-05, "loss": 0.5569590330123901, "step": 208 }, { "epoch": 0.05550391714247776, "grad_norm": 0.8546725938844908, "learning_rate": 1.103448275862069e-05, "loss": 0.5429908037185669, "step": 209 }, { "epoch": 0.05576948612402072, "grad_norm": 0.8175642333393268, "learning_rate": 1.1087533156498675e-05, "loss": 0.5073692202568054, "step": 210 }, { "epoch": 0.05603505510556367, "grad_norm": 0.9551222157670755, "learning_rate": 1.1140583554376659e-05, "loss": 0.5613659620285034, "step": 211 }, { "epoch": 0.05630062408710663, "grad_norm": 1.8348970874488084, "learning_rate": 1.1193633952254644e-05, "loss": 0.5197691917419434, "step": 212 }, { "epoch": 0.05656619306864958, "grad_norm": 0.9173115658326468, "learning_rate": 1.1246684350132625e-05, "loss": 0.5410990715026855, "step": 213 }, { "epoch": 0.05683176205019254, "grad_norm": 0.8562107533946397, "learning_rate": 1.129973474801061e-05, "loss": 0.5852477550506592, "step": 214 }, { "epoch": 0.05709733103173549, "grad_norm": 0.8483195878163089, "learning_rate": 1.1352785145888594e-05, "loss": 0.5312488079071045, "step": 215 }, { "epoch": 0.05736290001327845, "grad_norm": 0.8817111257753456, "learning_rate": 1.140583554376658e-05, "loss": 0.5075235366821289, "step": 216 }, { "epoch": 0.05762846899482141, "grad_norm": 0.8014885700994473, "learning_rate": 1.1458885941644564e-05, "loss": 0.5213298797607422, "step": 217 }, { "epoch": 0.05789403797636436, "grad_norm": 0.8852582070340804, "learning_rate": 1.1511936339522548e-05, "loss": 0.5564183592796326, "step": 218 }, { "epoch": 0.05815960695790732, "grad_norm": 1.0148412469588788, "learning_rate": 1.1564986737400531e-05, "loss": 0.5328387022018433, "step": 219 }, { "epoch": 0.05842517593945027, "grad_norm": 0.7824132338865165, "learning_rate": 1.1618037135278515e-05, "loss": 0.5010273456573486, "step": 220 }, { "epoch": 0.05869074492099323, "grad_norm": 0.8493817546068081, "learning_rate": 1.16710875331565e-05, "loss": 0.5473708510398865, "step": 221 }, { "epoch": 0.05895631390253618, "grad_norm": 1.1554913959885298, "learning_rate": 1.1724137931034483e-05, "loss": 0.5359818339347839, "step": 222 }, { "epoch": 0.05922188288407914, "grad_norm": 0.9663065987200732, "learning_rate": 1.1777188328912468e-05, "loss": 0.5274665951728821, "step": 223 }, { "epoch": 0.0594874518656221, "grad_norm": 0.8158672021913522, "learning_rate": 1.1830238726790454e-05, "loss": 0.5463781952857971, "step": 224 }, { "epoch": 0.05975302084716505, "grad_norm": 0.7817235200046289, "learning_rate": 1.1883289124668435e-05, "loss": 0.553212583065033, "step": 225 }, { "epoch": 0.06001858982870801, "grad_norm": 0.8540074681170072, "learning_rate": 1.193633952254642e-05, "loss": 0.47144171595573425, "step": 226 }, { "epoch": 0.06028415881025096, "grad_norm": 0.9191106803002166, "learning_rate": 1.1989389920424404e-05, "loss": 0.506844162940979, "step": 227 }, { "epoch": 0.06054972779179392, "grad_norm": 0.794192267301098, "learning_rate": 1.2042440318302389e-05, "loss": 0.4965322017669678, "step": 228 }, { "epoch": 0.06081529677333687, "grad_norm": 0.8421546110465796, "learning_rate": 1.2095490716180371e-05, "loss": 0.4815751612186432, "step": 229 }, { "epoch": 0.06108086575487983, "grad_norm": 0.8107361719185122, "learning_rate": 1.2148541114058356e-05, "loss": 0.5245312452316284, "step": 230 }, { "epoch": 0.06134643473642279, "grad_norm": 0.8749447967552209, "learning_rate": 1.2201591511936341e-05, "loss": 0.5215133428573608, "step": 231 }, { "epoch": 0.06161200371796574, "grad_norm": 0.8315635530714504, "learning_rate": 1.2254641909814325e-05, "loss": 0.5039419531822205, "step": 232 }, { "epoch": 0.0618775726995087, "grad_norm": 1.0583546039713638, "learning_rate": 1.230769230769231e-05, "loss": 0.5562925338745117, "step": 233 }, { "epoch": 0.06214314168105165, "grad_norm": 1.069780059811152, "learning_rate": 1.2360742705570291e-05, "loss": 0.5372984409332275, "step": 234 }, { "epoch": 0.06240871066259461, "grad_norm": 0.8766841361731121, "learning_rate": 1.2413793103448277e-05, "loss": 0.44987717270851135, "step": 235 }, { "epoch": 0.06267427964413756, "grad_norm": 0.9229136432445015, "learning_rate": 1.246684350132626e-05, "loss": 0.537068247795105, "step": 236 }, { "epoch": 0.06293984862568051, "grad_norm": 0.9828329951785308, "learning_rate": 1.2519893899204245e-05, "loss": 0.504779577255249, "step": 237 }, { "epoch": 0.06320541760722348, "grad_norm": 1.0061858451025696, "learning_rate": 1.257294429708223e-05, "loss": 0.5524113774299622, "step": 238 }, { "epoch": 0.06347098658876643, "grad_norm": 0.9888885225244529, "learning_rate": 1.2625994694960214e-05, "loss": 0.5089439153671265, "step": 239 }, { "epoch": 0.06373655557030938, "grad_norm": 0.8394940482178029, "learning_rate": 1.2679045092838197e-05, "loss": 0.4501679837703705, "step": 240 }, { "epoch": 0.06400212455185235, "grad_norm": 0.8117693384854435, "learning_rate": 1.273209549071618e-05, "loss": 0.5360216498374939, "step": 241 }, { "epoch": 0.0642676935333953, "grad_norm": 0.876954304053235, "learning_rate": 1.2785145888594166e-05, "loss": 0.5595712661743164, "step": 242 }, { "epoch": 0.06453326251493825, "grad_norm": 1.080992038181853, "learning_rate": 1.283819628647215e-05, "loss": 0.5010904669761658, "step": 243 }, { "epoch": 0.0647988314964812, "grad_norm": 1.0446842005075034, "learning_rate": 1.2891246684350134e-05, "loss": 0.5053697228431702, "step": 244 }, { "epoch": 0.06506440047802417, "grad_norm": 0.803002193385922, "learning_rate": 1.294429708222812e-05, "loss": 0.5045514106750488, "step": 245 }, { "epoch": 0.06532996945956712, "grad_norm": 0.7912163744531999, "learning_rate": 1.2997347480106101e-05, "loss": 0.5546073913574219, "step": 246 }, { "epoch": 0.06559553844111007, "grad_norm": 0.9572908035308383, "learning_rate": 1.3050397877984087e-05, "loss": 0.47276046872138977, "step": 247 }, { "epoch": 0.06586110742265304, "grad_norm": 0.8233476091470914, "learning_rate": 1.310344827586207e-05, "loss": 0.4757889211177826, "step": 248 }, { "epoch": 0.06612667640419599, "grad_norm": 0.8415305337388579, "learning_rate": 1.3156498673740055e-05, "loss": 0.5078848600387573, "step": 249 }, { "epoch": 0.06639224538573894, "grad_norm": 0.8437984625649567, "learning_rate": 1.3209549071618037e-05, "loss": 0.4890335202217102, "step": 250 }, { "epoch": 0.0666578143672819, "grad_norm": 0.8299999132068526, "learning_rate": 1.3262599469496022e-05, "loss": 0.5406580567359924, "step": 251 }, { "epoch": 0.06692338334882486, "grad_norm": 0.9307594142144101, "learning_rate": 1.3315649867374005e-05, "loss": 0.5236875414848328, "step": 252 }, { "epoch": 0.06718895233036781, "grad_norm": 1.0602580439454288, "learning_rate": 1.336870026525199e-05, "loss": 0.4991317391395569, "step": 253 }, { "epoch": 0.06745452131191076, "grad_norm": 0.8277603880683132, "learning_rate": 1.3421750663129976e-05, "loss": 0.4234679639339447, "step": 254 }, { "epoch": 0.06772009029345373, "grad_norm": 0.9984839302922622, "learning_rate": 1.3474801061007958e-05, "loss": 0.49749234318733215, "step": 255 }, { "epoch": 0.06798565927499668, "grad_norm": 0.9543855303701088, "learning_rate": 1.3527851458885943e-05, "loss": 0.5049105286598206, "step": 256 }, { "epoch": 0.06825122825653963, "grad_norm": 0.8443711840757044, "learning_rate": 1.3580901856763926e-05, "loss": 0.5355304479598999, "step": 257 }, { "epoch": 0.06851679723808259, "grad_norm": 0.9255144140027944, "learning_rate": 1.3633952254641911e-05, "loss": 0.46302929520606995, "step": 258 }, { "epoch": 0.06878236621962555, "grad_norm": 0.953877794861965, "learning_rate": 1.3687002652519895e-05, "loss": 0.5054173469543457, "step": 259 }, { "epoch": 0.0690479352011685, "grad_norm": 0.8214682466537866, "learning_rate": 1.374005305039788e-05, "loss": 0.5018566846847534, "step": 260 }, { "epoch": 0.06931350418271146, "grad_norm": 0.878430758752321, "learning_rate": 1.3793103448275863e-05, "loss": 0.4938735365867615, "step": 261 }, { "epoch": 0.06957907316425442, "grad_norm": 0.8343439459008911, "learning_rate": 1.3846153846153847e-05, "loss": 0.4605029225349426, "step": 262 }, { "epoch": 0.06984464214579737, "grad_norm": 0.8260329604526515, "learning_rate": 1.3899204244031832e-05, "loss": 0.5056782960891724, "step": 263 }, { "epoch": 0.07011021112734032, "grad_norm": 0.860551370737139, "learning_rate": 1.3952254641909815e-05, "loss": 0.5017784833908081, "step": 264 }, { "epoch": 0.07037578010888328, "grad_norm": 0.8353804409772935, "learning_rate": 1.40053050397878e-05, "loss": 0.5132012367248535, "step": 265 }, { "epoch": 0.07064134909042624, "grad_norm": 0.8151795113028358, "learning_rate": 1.4058355437665782e-05, "loss": 0.531212329864502, "step": 266 }, { "epoch": 0.0709069180719692, "grad_norm": 0.8086605566204427, "learning_rate": 1.4111405835543767e-05, "loss": 0.4900968074798584, "step": 267 }, { "epoch": 0.07117248705351215, "grad_norm": 0.8735731145360269, "learning_rate": 1.4164456233421753e-05, "loss": 0.45277124643325806, "step": 268 }, { "epoch": 0.07143805603505511, "grad_norm": 0.8760293380808535, "learning_rate": 1.4217506631299736e-05, "loss": 0.48026078939437866, "step": 269 }, { "epoch": 0.07170362501659806, "grad_norm": 0.9019281227597356, "learning_rate": 1.4270557029177721e-05, "loss": 0.5111234784126282, "step": 270 }, { "epoch": 0.07196919399814102, "grad_norm": 0.9120608197487232, "learning_rate": 1.4323607427055703e-05, "loss": 0.5448082685470581, "step": 271 }, { "epoch": 0.07223476297968397, "grad_norm": 0.9400729117423203, "learning_rate": 1.4376657824933688e-05, "loss": 0.5242921113967896, "step": 272 }, { "epoch": 0.07250033196122693, "grad_norm": 0.9404952891335322, "learning_rate": 1.4429708222811672e-05, "loss": 0.5194095373153687, "step": 273 }, { "epoch": 0.07276590094276988, "grad_norm": 0.8893776382848525, "learning_rate": 1.4482758620689657e-05, "loss": 0.4620330333709717, "step": 274 }, { "epoch": 0.07303146992431284, "grad_norm": 0.886983687866706, "learning_rate": 1.4535809018567642e-05, "loss": 0.4654063582420349, "step": 275 }, { "epoch": 0.0732970389058558, "grad_norm": 0.7984003718276244, "learning_rate": 1.4588859416445624e-05, "loss": 0.4637746810913086, "step": 276 }, { "epoch": 0.07356260788739875, "grad_norm": 0.8288882522584324, "learning_rate": 1.4641909814323609e-05, "loss": 0.47949421405792236, "step": 277 }, { "epoch": 0.0738281768689417, "grad_norm": 1.0041804846004008, "learning_rate": 1.4694960212201592e-05, "loss": 0.49565935134887695, "step": 278 }, { "epoch": 0.07409374585048466, "grad_norm": 0.9214786055945364, "learning_rate": 1.4748010610079577e-05, "loss": 0.5057941675186157, "step": 279 }, { "epoch": 0.07435931483202762, "grad_norm": 0.9073397896109812, "learning_rate": 1.480106100795756e-05, "loss": 0.5495956540107727, "step": 280 }, { "epoch": 0.07462488381357057, "grad_norm": 0.8743353741776648, "learning_rate": 1.4854111405835546e-05, "loss": 0.4502897560596466, "step": 281 }, { "epoch": 0.07489045279511353, "grad_norm": 0.8694785116368758, "learning_rate": 1.490716180371353e-05, "loss": 0.4799070954322815, "step": 282 }, { "epoch": 0.07515602177665649, "grad_norm": 0.886176954457428, "learning_rate": 1.4960212201591513e-05, "loss": 0.45640307664871216, "step": 283 }, { "epoch": 0.07542159075819944, "grad_norm": 0.8937725285994821, "learning_rate": 1.5013262599469498e-05, "loss": 0.47862207889556885, "step": 284 }, { "epoch": 0.0756871597397424, "grad_norm": 0.8717898339198907, "learning_rate": 1.5066312997347481e-05, "loss": 0.48195987939834595, "step": 285 }, { "epoch": 0.07595272872128535, "grad_norm": 0.9124586645482137, "learning_rate": 1.5119363395225467e-05, "loss": 0.518566370010376, "step": 286 }, { "epoch": 0.07621829770282831, "grad_norm": 0.9766882853479317, "learning_rate": 1.5172413793103448e-05, "loss": 0.5034162402153015, "step": 287 }, { "epoch": 0.07648386668437127, "grad_norm": 0.8995114639723897, "learning_rate": 1.5225464190981433e-05, "loss": 0.497822642326355, "step": 288 }, { "epoch": 0.07674943566591422, "grad_norm": 0.8484786603983125, "learning_rate": 1.5278514588859417e-05, "loss": 0.510530412197113, "step": 289 }, { "epoch": 0.07701500464745718, "grad_norm": 0.9406440408252492, "learning_rate": 1.53315649867374e-05, "loss": 0.5163881778717041, "step": 290 }, { "epoch": 0.07728057362900013, "grad_norm": 0.9825958938719339, "learning_rate": 1.5384615384615387e-05, "loss": 0.5161621570587158, "step": 291 }, { "epoch": 0.07754614261054309, "grad_norm": 0.8680267479326179, "learning_rate": 1.543766578249337e-05, "loss": 0.5260482430458069, "step": 292 }, { "epoch": 0.07781171159208604, "grad_norm": 0.8791995274446183, "learning_rate": 1.5490716180371354e-05, "loss": 0.4946279227733612, "step": 293 }, { "epoch": 0.078077280573629, "grad_norm": 0.9734620967906259, "learning_rate": 1.5543766578249338e-05, "loss": 0.5030514001846313, "step": 294 }, { "epoch": 0.07834284955517196, "grad_norm": 0.899295097408943, "learning_rate": 1.559681697612732e-05, "loss": 0.48864102363586426, "step": 295 }, { "epoch": 0.07860841853671491, "grad_norm": 0.8710376092284174, "learning_rate": 1.5649867374005304e-05, "loss": 0.48310425877571106, "step": 296 }, { "epoch": 0.07887398751825787, "grad_norm": 1.0094258392730318, "learning_rate": 1.570291777188329e-05, "loss": 0.4451446533203125, "step": 297 }, { "epoch": 0.07913955649980083, "grad_norm": 0.9863170561942101, "learning_rate": 1.5755968169761275e-05, "loss": 0.4884604811668396, "step": 298 }, { "epoch": 0.07940512548134378, "grad_norm": 0.8355693003184833, "learning_rate": 1.5809018567639258e-05, "loss": 0.5047659873962402, "step": 299 }, { "epoch": 0.07967069446288673, "grad_norm": 0.8879040718748079, "learning_rate": 1.586206896551724e-05, "loss": 0.49124205112457275, "step": 300 }, { "epoch": 0.0799362634444297, "grad_norm": 0.9411885452551192, "learning_rate": 1.5915119363395225e-05, "loss": 0.5113086700439453, "step": 301 }, { "epoch": 0.08020183242597265, "grad_norm": 0.9345380756850689, "learning_rate": 1.5968169761273212e-05, "loss": 0.5298338532447815, "step": 302 }, { "epoch": 0.0804674014075156, "grad_norm": 0.9050429706274331, "learning_rate": 1.6021220159151195e-05, "loss": 0.4673181176185608, "step": 303 }, { "epoch": 0.08073297038905856, "grad_norm": 0.8972864762330055, "learning_rate": 1.607427055702918e-05, "loss": 0.45361828804016113, "step": 304 }, { "epoch": 0.08099853937060152, "grad_norm": 0.8848533583648175, "learning_rate": 1.6127320954907166e-05, "loss": 0.5144034624099731, "step": 305 }, { "epoch": 0.08126410835214447, "grad_norm": 0.9263690972931414, "learning_rate": 1.6180371352785146e-05, "loss": 0.5027451515197754, "step": 306 }, { "epoch": 0.08152967733368742, "grad_norm": 0.8575377500476566, "learning_rate": 1.6233421750663133e-05, "loss": 0.4987551271915436, "step": 307 }, { "epoch": 0.08179524631523039, "grad_norm": 1.0121964253373468, "learning_rate": 1.6286472148541116e-05, "loss": 0.5433062314987183, "step": 308 }, { "epoch": 0.08206081529677334, "grad_norm": 0.8973695218716041, "learning_rate": 1.63395225464191e-05, "loss": 0.49603772163391113, "step": 309 }, { "epoch": 0.08232638427831629, "grad_norm": 0.9033181815462389, "learning_rate": 1.6392572944297083e-05, "loss": 0.47990959882736206, "step": 310 }, { "epoch": 0.08259195325985925, "grad_norm": 0.9843185449650845, "learning_rate": 1.6445623342175066e-05, "loss": 0.5196831226348877, "step": 311 }, { "epoch": 0.0828575222414022, "grad_norm": 0.8589822510995361, "learning_rate": 1.6498673740053053e-05, "loss": 0.4664091467857361, "step": 312 }, { "epoch": 0.08312309122294516, "grad_norm": 0.9077443936761218, "learning_rate": 1.6551724137931037e-05, "loss": 0.4405553936958313, "step": 313 }, { "epoch": 0.08338866020448811, "grad_norm": 0.8561334135462362, "learning_rate": 1.660477453580902e-05, "loss": 0.46172815561294556, "step": 314 }, { "epoch": 0.08365422918603108, "grad_norm": 0.8835708894071636, "learning_rate": 1.6657824933687004e-05, "loss": 0.5004327297210693, "step": 315 }, { "epoch": 0.08391979816757403, "grad_norm": 0.8452618593185571, "learning_rate": 1.6710875331564987e-05, "loss": 0.4727814197540283, "step": 316 }, { "epoch": 0.08418536714911698, "grad_norm": 0.7631381381409372, "learning_rate": 1.676392572944297e-05, "loss": 0.43602120876312256, "step": 317 }, { "epoch": 0.08445093613065995, "grad_norm": 0.9092168864142193, "learning_rate": 1.6816976127320957e-05, "loss": 0.5110410451889038, "step": 318 }, { "epoch": 0.0847165051122029, "grad_norm": 0.9902301773407237, "learning_rate": 1.687002652519894e-05, "loss": 0.4798283278942108, "step": 319 }, { "epoch": 0.08498207409374585, "grad_norm": 0.8572923551208312, "learning_rate": 1.6923076923076924e-05, "loss": 0.45690029859542847, "step": 320 }, { "epoch": 0.0852476430752888, "grad_norm": 0.8864718165003516, "learning_rate": 1.6976127320954908e-05, "loss": 0.4770117998123169, "step": 321 }, { "epoch": 0.08551321205683177, "grad_norm": 0.888032985544436, "learning_rate": 1.702917771883289e-05, "loss": 0.512240469455719, "step": 322 }, { "epoch": 0.08577878103837472, "grad_norm": 0.8665270088700595, "learning_rate": 1.7082228116710878e-05, "loss": 0.4696195423603058, "step": 323 }, { "epoch": 0.08604435001991767, "grad_norm": 0.8876364903970222, "learning_rate": 1.713527851458886e-05, "loss": 0.4779578149318695, "step": 324 }, { "epoch": 0.08630991900146064, "grad_norm": 0.9604080935445363, "learning_rate": 1.7188328912466845e-05, "loss": 0.48670440912246704, "step": 325 }, { "epoch": 0.08657548798300359, "grad_norm": 0.9813156772782552, "learning_rate": 1.7241379310344828e-05, "loss": 0.5285798907279968, "step": 326 }, { "epoch": 0.08684105696454654, "grad_norm": 0.9264252564283505, "learning_rate": 1.7294429708222812e-05, "loss": 0.46095865964889526, "step": 327 }, { "epoch": 0.08710662594608949, "grad_norm": 0.8953179311501671, "learning_rate": 1.73474801061008e-05, "loss": 0.44342565536499023, "step": 328 }, { "epoch": 0.08737219492763246, "grad_norm": 0.9640917124230414, "learning_rate": 1.7400530503978782e-05, "loss": 0.48974257707595825, "step": 329 }, { "epoch": 0.08763776390917541, "grad_norm": 1.3568266957703046, "learning_rate": 1.7453580901856765e-05, "loss": 0.4763977527618408, "step": 330 }, { "epoch": 0.08790333289071836, "grad_norm": 1.0231360729141987, "learning_rate": 1.750663129973475e-05, "loss": 0.5390856266021729, "step": 331 }, { "epoch": 0.08816890187226133, "grad_norm": 0.9254788253309115, "learning_rate": 1.7559681697612732e-05, "loss": 0.4833192825317383, "step": 332 }, { "epoch": 0.08843447085380428, "grad_norm": 0.9106057248503829, "learning_rate": 1.7612732095490716e-05, "loss": 0.47842955589294434, "step": 333 }, { "epoch": 0.08870003983534723, "grad_norm": 0.8653538374375338, "learning_rate": 1.7665782493368703e-05, "loss": 0.4543060064315796, "step": 334 }, { "epoch": 0.08896560881689018, "grad_norm": 0.9024795887264612, "learning_rate": 1.7718832891246686e-05, "loss": 0.4492039978504181, "step": 335 }, { "epoch": 0.08923117779843315, "grad_norm": 0.9660730803540603, "learning_rate": 1.777188328912467e-05, "loss": 0.4930066466331482, "step": 336 }, { "epoch": 0.0894967467799761, "grad_norm": 0.9494811659806174, "learning_rate": 1.7824933687002653e-05, "loss": 0.46343356370925903, "step": 337 }, { "epoch": 0.08976231576151905, "grad_norm": 0.98824099461907, "learning_rate": 1.7877984084880636e-05, "loss": 0.5118839740753174, "step": 338 }, { "epoch": 0.09002788474306202, "grad_norm": 0.9759312233085756, "learning_rate": 1.7931034482758623e-05, "loss": 0.4659194350242615, "step": 339 }, { "epoch": 0.09029345372460497, "grad_norm": 0.868792760549277, "learning_rate": 1.7984084880636607e-05, "loss": 0.45929303765296936, "step": 340 }, { "epoch": 0.09055902270614792, "grad_norm": 0.9774857416777888, "learning_rate": 1.803713527851459e-05, "loss": 0.5072556734085083, "step": 341 }, { "epoch": 0.09082459168769087, "grad_norm": 0.8722377179138728, "learning_rate": 1.8090185676392577e-05, "loss": 0.42370402812957764, "step": 342 }, { "epoch": 0.09109016066923384, "grad_norm": 0.9404121189660462, "learning_rate": 1.8143236074270557e-05, "loss": 0.5017818212509155, "step": 343 }, { "epoch": 0.09135572965077679, "grad_norm": 1.0279846493738434, "learning_rate": 1.8196286472148544e-05, "loss": 0.4746384620666504, "step": 344 }, { "epoch": 0.09162129863231974, "grad_norm": 1.0016746569872437, "learning_rate": 1.8249336870026527e-05, "loss": 0.49020540714263916, "step": 345 }, { "epoch": 0.09188686761386271, "grad_norm": 0.8521475505102624, "learning_rate": 1.830238726790451e-05, "loss": 0.4569393992424011, "step": 346 }, { "epoch": 0.09215243659540566, "grad_norm": 0.9587089968564823, "learning_rate": 1.8355437665782494e-05, "loss": 0.46831727027893066, "step": 347 }, { "epoch": 0.09241800557694861, "grad_norm": 0.909230845841239, "learning_rate": 1.8408488063660478e-05, "loss": 0.4795265197753906, "step": 348 }, { "epoch": 0.09268357455849156, "grad_norm": 0.9641043081337674, "learning_rate": 1.8461538461538465e-05, "loss": 0.5122503042221069, "step": 349 }, { "epoch": 0.09294914354003453, "grad_norm": 0.8617611974669258, "learning_rate": 1.8514588859416448e-05, "loss": 0.4190404713153839, "step": 350 }, { "epoch": 0.09321471252157748, "grad_norm": 0.9061006884991066, "learning_rate": 1.856763925729443e-05, "loss": 0.47778886556625366, "step": 351 }, { "epoch": 0.09348028150312043, "grad_norm": 0.9208451846579827, "learning_rate": 1.8620689655172415e-05, "loss": 0.45851507782936096, "step": 352 }, { "epoch": 0.09374585048466338, "grad_norm": 1.0050481975496854, "learning_rate": 1.86737400530504e-05, "loss": 0.4888782501220703, "step": 353 }, { "epoch": 0.09401141946620635, "grad_norm": 0.9454138173982718, "learning_rate": 1.8726790450928382e-05, "loss": 0.5032983422279358, "step": 354 }, { "epoch": 0.0942769884477493, "grad_norm": 0.9130362696106749, "learning_rate": 1.877984084880637e-05, "loss": 0.4754604697227478, "step": 355 }, { "epoch": 0.09454255742929225, "grad_norm": 0.9970889038933597, "learning_rate": 1.8832891246684352e-05, "loss": 0.488397479057312, "step": 356 }, { "epoch": 0.09480812641083522, "grad_norm": 1.222649143916529, "learning_rate": 1.8885941644562336e-05, "loss": 0.4775403141975403, "step": 357 }, { "epoch": 0.09507369539237817, "grad_norm": 0.9872263151320333, "learning_rate": 1.893899204244032e-05, "loss": 0.47063153982162476, "step": 358 }, { "epoch": 0.09533926437392112, "grad_norm": 1.0222144168199743, "learning_rate": 1.8992042440318303e-05, "loss": 0.4856908321380615, "step": 359 }, { "epoch": 0.09560483335546408, "grad_norm": 0.9195037496858368, "learning_rate": 1.904509283819629e-05, "loss": 0.440033495426178, "step": 360 }, { "epoch": 0.09587040233700704, "grad_norm": 0.9961899484684762, "learning_rate": 1.9098143236074273e-05, "loss": 0.4825770854949951, "step": 361 }, { "epoch": 0.09613597131854999, "grad_norm": 0.9443841189655576, "learning_rate": 1.9151193633952256e-05, "loss": 0.48192232847213745, "step": 362 }, { "epoch": 0.09640154030009294, "grad_norm": 0.9065595450317342, "learning_rate": 1.920424403183024e-05, "loss": 0.4689444899559021, "step": 363 }, { "epoch": 0.09666710928163591, "grad_norm": 0.9970961253516039, "learning_rate": 1.9257294429708223e-05, "loss": 0.47120895981788635, "step": 364 }, { "epoch": 0.09693267826317886, "grad_norm": 1.0106028234477955, "learning_rate": 1.931034482758621e-05, "loss": 0.4968941807746887, "step": 365 }, { "epoch": 0.09719824724472181, "grad_norm": 1.115125675989656, "learning_rate": 1.9363395225464193e-05, "loss": 0.46982288360595703, "step": 366 }, { "epoch": 0.09746381622626477, "grad_norm": 0.9408972278578609, "learning_rate": 1.9416445623342177e-05, "loss": 0.4541531205177307, "step": 367 }, { "epoch": 0.09772938520780773, "grad_norm": 0.9760564476186651, "learning_rate": 1.946949602122016e-05, "loss": 0.45576703548431396, "step": 368 }, { "epoch": 0.09799495418935068, "grad_norm": 0.9893999168346334, "learning_rate": 1.9522546419098144e-05, "loss": 0.48060357570648193, "step": 369 }, { "epoch": 0.09826052317089363, "grad_norm": 0.9675810264832774, "learning_rate": 1.9575596816976127e-05, "loss": 0.47536781430244446, "step": 370 }, { "epoch": 0.0985260921524366, "grad_norm": 0.9516181191759193, "learning_rate": 1.9628647214854114e-05, "loss": 0.46463894844055176, "step": 371 }, { "epoch": 0.09879166113397955, "grad_norm": 1.0082712913027811, "learning_rate": 1.9681697612732098e-05, "loss": 0.49570178985595703, "step": 372 }, { "epoch": 0.0990572301155225, "grad_norm": 1.0327922438955468, "learning_rate": 1.973474801061008e-05, "loss": 0.4764043390750885, "step": 373 }, { "epoch": 0.09932279909706546, "grad_norm": 0.9227866290107449, "learning_rate": 1.9787798408488064e-05, "loss": 0.43582671880722046, "step": 374 }, { "epoch": 0.09958836807860842, "grad_norm": 0.9360238854832598, "learning_rate": 1.9840848806366048e-05, "loss": 0.46077725291252136, "step": 375 }, { "epoch": 0.09985393706015137, "grad_norm": 0.9607682273492437, "learning_rate": 1.9893899204244035e-05, "loss": 0.4794929027557373, "step": 376 }, { "epoch": 0.10011950604169433, "grad_norm": 0.9619848398175739, "learning_rate": 1.9946949602122018e-05, "loss": 0.43174588680267334, "step": 377 }, { "epoch": 0.10038507502323729, "grad_norm": 0.90095462919728, "learning_rate": 2e-05, "loss": 0.44885915517807007, "step": 378 }, { "epoch": 0.10065064400478024, "grad_norm": 1.0789787198205218, "learning_rate": 1.9999999036058974e-05, "loss": 0.520150899887085, "step": 379 }, { "epoch": 0.1009162129863232, "grad_norm": 0.9699182604374589, "learning_rate": 1.9999996144236068e-05, "loss": 0.5139277577400208, "step": 380 }, { "epoch": 0.10118178196786615, "grad_norm": 1.0077278580199993, "learning_rate": 1.999999132453184e-05, "loss": 0.48935171961784363, "step": 381 }, { "epoch": 0.10144735094940911, "grad_norm": 0.9095465340361383, "learning_rate": 1.999998457694723e-05, "loss": 0.4805561304092407, "step": 382 }, { "epoch": 0.10171291993095206, "grad_norm": 0.9209321398292457, "learning_rate": 1.9999975901483532e-05, "loss": 0.4340912997722626, "step": 383 }, { "epoch": 0.10197848891249502, "grad_norm": 1.0414639039942946, "learning_rate": 1.999996529814242e-05, "loss": 0.48282474279403687, "step": 384 }, { "epoch": 0.10224405789403798, "grad_norm": 0.9753320144694753, "learning_rate": 1.999995276692593e-05, "loss": 0.4653206169605255, "step": 385 }, { "epoch": 0.10250962687558093, "grad_norm": 0.919281113033857, "learning_rate": 1.999993830783649e-05, "loss": 0.48501014709472656, "step": 386 }, { "epoch": 0.10277519585712389, "grad_norm": 1.0711296444042975, "learning_rate": 1.9999921920876882e-05, "loss": 0.48260143399238586, "step": 387 }, { "epoch": 0.10304076483866684, "grad_norm": 0.9590085896328235, "learning_rate": 1.9999903606050267e-05, "loss": 0.44557270407676697, "step": 388 }, { "epoch": 0.1033063338202098, "grad_norm": 1.111282066618818, "learning_rate": 1.9999883363360175e-05, "loss": 0.4843652546405792, "step": 389 }, { "epoch": 0.10357190280175275, "grad_norm": 0.9708048507544866, "learning_rate": 1.9999861192810508e-05, "loss": 0.4536727964878082, "step": 390 }, { "epoch": 0.1038374717832957, "grad_norm": 1.0216212958759847, "learning_rate": 1.9999837094405538e-05, "loss": 0.49557366967201233, "step": 391 }, { "epoch": 0.10410304076483867, "grad_norm": 1.0254795167373827, "learning_rate": 1.9999811068149917e-05, "loss": 0.45077240467071533, "step": 392 }, { "epoch": 0.10436860974638162, "grad_norm": 0.9857255709196505, "learning_rate": 1.9999783114048658e-05, "loss": 0.4554041624069214, "step": 393 }, { "epoch": 0.10463417872792458, "grad_norm": 0.8770920920154472, "learning_rate": 1.999975323210715e-05, "loss": 0.43526744842529297, "step": 394 }, { "epoch": 0.10489974770946753, "grad_norm": 0.9824982196768539, "learning_rate": 1.9999721422331154e-05, "loss": 0.4097936749458313, "step": 395 }, { "epoch": 0.1051653166910105, "grad_norm": 1.013432449022695, "learning_rate": 1.9999687684726803e-05, "loss": 0.4740130305290222, "step": 396 }, { "epoch": 0.10543088567255345, "grad_norm": 0.9786752992542405, "learning_rate": 1.9999652019300604e-05, "loss": 0.43374374508857727, "step": 397 }, { "epoch": 0.1056964546540964, "grad_norm": 0.9323415402935509, "learning_rate": 1.999961442605943e-05, "loss": 0.4423784911632538, "step": 398 }, { "epoch": 0.10596202363563936, "grad_norm": 1.0497518439124596, "learning_rate": 1.999957490501053e-05, "loss": 0.4660544693470001, "step": 399 }, { "epoch": 0.10622759261718231, "grad_norm": 1.11742327964835, "learning_rate": 1.999953345616152e-05, "loss": 0.4579896628856659, "step": 400 }, { "epoch": 0.10649316159872527, "grad_norm": 1.0653029752390735, "learning_rate": 1.9999490079520395e-05, "loss": 0.4634096920490265, "step": 401 }, { "epoch": 0.10675873058026822, "grad_norm": 0.9969566988589958, "learning_rate": 1.9999444775095517e-05, "loss": 0.45374077558517456, "step": 402 }, { "epoch": 0.10702429956181118, "grad_norm": 1.1298291912896017, "learning_rate": 1.9999397542895615e-05, "loss": 0.49752670526504517, "step": 403 }, { "epoch": 0.10728986854335414, "grad_norm": 1.049244919494092, "learning_rate": 1.99993483829298e-05, "loss": 0.4539335370063782, "step": 404 }, { "epoch": 0.10755543752489709, "grad_norm": 1.0017841795942442, "learning_rate": 1.999929729520755e-05, "loss": 0.4665772616863251, "step": 405 }, { "epoch": 0.10782100650644005, "grad_norm": 1.023688686658119, "learning_rate": 1.9999244279738713e-05, "loss": 0.4850832223892212, "step": 406 }, { "epoch": 0.108086575487983, "grad_norm": 0.9960763191436038, "learning_rate": 1.9999189336533508e-05, "loss": 0.43974876403808594, "step": 407 }, { "epoch": 0.10835214446952596, "grad_norm": 1.0378626233602128, "learning_rate": 1.9999132465602526e-05, "loss": 0.46823856234550476, "step": 408 }, { "epoch": 0.10861771345106891, "grad_norm": 1.0461372802003532, "learning_rate": 1.9999073666956734e-05, "loss": 0.49704545736312866, "step": 409 }, { "epoch": 0.10888328243261187, "grad_norm": 1.03380477635781, "learning_rate": 1.999901294060747e-05, "loss": 0.3863454759120941, "step": 410 }, { "epoch": 0.10914885141415483, "grad_norm": 1.1280569204620268, "learning_rate": 1.9998950286566438e-05, "loss": 0.4903780221939087, "step": 411 }, { "epoch": 0.10941442039569778, "grad_norm": 0.9546134462956446, "learning_rate": 1.9998885704845716e-05, "loss": 0.4312375485897064, "step": 412 }, { "epoch": 0.10967998937724074, "grad_norm": 0.9382591225300354, "learning_rate": 1.9998819195457756e-05, "loss": 0.4350954294204712, "step": 413 }, { "epoch": 0.1099455583587837, "grad_norm": 0.9201016144754837, "learning_rate": 1.999875075841538e-05, "loss": 0.4364873766899109, "step": 414 }, { "epoch": 0.11021112734032665, "grad_norm": 0.9578414566062486, "learning_rate": 1.999868039373178e-05, "loss": 0.42079728841781616, "step": 415 }, { "epoch": 0.1104766963218696, "grad_norm": 1.0011321946551845, "learning_rate": 1.9998608101420527e-05, "loss": 0.4396737515926361, "step": 416 }, { "epoch": 0.11074226530341257, "grad_norm": 0.9922478693245596, "learning_rate": 1.9998533881495552e-05, "loss": 0.44765806198120117, "step": 417 }, { "epoch": 0.11100783428495552, "grad_norm": 1.0219437952159112, "learning_rate": 1.999845773397117e-05, "loss": 0.46199291944503784, "step": 418 }, { "epoch": 0.11127340326649847, "grad_norm": 0.9510961467421052, "learning_rate": 1.9998379658862058e-05, "loss": 0.44561129808425903, "step": 419 }, { "epoch": 0.11153897224804143, "grad_norm": 1.0559368690309399, "learning_rate": 1.9998299656183263e-05, "loss": 0.46025681495666504, "step": 420 }, { "epoch": 0.11180454122958439, "grad_norm": 0.9881679042322009, "learning_rate": 1.999821772595022e-05, "loss": 0.4408613443374634, "step": 421 }, { "epoch": 0.11207011021112734, "grad_norm": 0.9620122842513851, "learning_rate": 1.999813386817871e-05, "loss": 0.4846842586994171, "step": 422 }, { "epoch": 0.11233567919267029, "grad_norm": 0.9697081207450757, "learning_rate": 1.999804808288491e-05, "loss": 0.44503283500671387, "step": 423 }, { "epoch": 0.11260124817421326, "grad_norm": 0.9687765160951803, "learning_rate": 1.9997960370085355e-05, "loss": 0.4090060293674469, "step": 424 }, { "epoch": 0.11286681715575621, "grad_norm": 0.9575575943579401, "learning_rate": 1.999787072979696e-05, "loss": 0.43246471881866455, "step": 425 }, { "epoch": 0.11313238613729916, "grad_norm": 1.001604978030575, "learning_rate": 1.9997779162036996e-05, "loss": 0.46283262968063354, "step": 426 }, { "epoch": 0.11339795511884213, "grad_norm": 0.9108113962903395, "learning_rate": 1.999768566682313e-05, "loss": 0.3866165578365326, "step": 427 }, { "epoch": 0.11366352410038508, "grad_norm": 0.9595506331685858, "learning_rate": 1.9997590244173374e-05, "loss": 0.4501144289970398, "step": 428 }, { "epoch": 0.11392909308192803, "grad_norm": 0.9153639565172541, "learning_rate": 1.9997492894106127e-05, "loss": 0.43005290627479553, "step": 429 }, { "epoch": 0.11419466206347098, "grad_norm": 0.9635360081712412, "learning_rate": 1.9997393616640165e-05, "loss": 0.4427964985370636, "step": 430 }, { "epoch": 0.11446023104501395, "grad_norm": 1.0560533392763956, "learning_rate": 1.999729241179462e-05, "loss": 0.4690951108932495, "step": 431 }, { "epoch": 0.1147258000265569, "grad_norm": 0.9559285214931015, "learning_rate": 1.9997189279589003e-05, "loss": 0.456949919462204, "step": 432 }, { "epoch": 0.11499136900809985, "grad_norm": 0.9851459681291062, "learning_rate": 1.99970842200432e-05, "loss": 0.456052303314209, "step": 433 }, { "epoch": 0.11525693798964282, "grad_norm": 0.9609923633405658, "learning_rate": 1.9996977233177466e-05, "loss": 0.43220120668411255, "step": 434 }, { "epoch": 0.11552250697118577, "grad_norm": 0.9022181145862976, "learning_rate": 1.9996868319012422e-05, "loss": 0.4237494170665741, "step": 435 }, { "epoch": 0.11578807595272872, "grad_norm": 1.1387519975876466, "learning_rate": 1.9996757477569072e-05, "loss": 0.4713878631591797, "step": 436 }, { "epoch": 0.11605364493427167, "grad_norm": 1.026114633188765, "learning_rate": 1.9996644708868776e-05, "loss": 0.4561111330986023, "step": 437 }, { "epoch": 0.11631921391581464, "grad_norm": 1.0425252904592188, "learning_rate": 1.9996530012933285e-05, "loss": 0.468253493309021, "step": 438 }, { "epoch": 0.11658478289735759, "grad_norm": 0.9323050726416767, "learning_rate": 1.9996413389784704e-05, "loss": 0.4815019369125366, "step": 439 }, { "epoch": 0.11685035187890054, "grad_norm": 0.9369313249225236, "learning_rate": 1.9996294839445518e-05, "loss": 0.4235987663269043, "step": 440 }, { "epoch": 0.1171159208604435, "grad_norm": 0.9217309559918773, "learning_rate": 1.999617436193858e-05, "loss": 0.40562817454338074, "step": 441 }, { "epoch": 0.11738148984198646, "grad_norm": 1.1384168500780398, "learning_rate": 1.999605195728712e-05, "loss": 0.424539715051651, "step": 442 }, { "epoch": 0.11764705882352941, "grad_norm": 0.9616123874834243, "learning_rate": 1.9995927625514736e-05, "loss": 0.43677473068237305, "step": 443 }, { "epoch": 0.11791262780507236, "grad_norm": 0.9761533315060044, "learning_rate": 1.9995801366645396e-05, "loss": 0.47325971722602844, "step": 444 }, { "epoch": 0.11817819678661533, "grad_norm": 0.9447069768738408, "learning_rate": 1.9995673180703443e-05, "loss": 0.4206562638282776, "step": 445 }, { "epoch": 0.11844376576815828, "grad_norm": 0.9743544240614231, "learning_rate": 1.999554306771359e-05, "loss": 0.4492834210395813, "step": 446 }, { "epoch": 0.11870933474970123, "grad_norm": 1.0629000505790311, "learning_rate": 1.9995411027700917e-05, "loss": 0.4445284605026245, "step": 447 }, { "epoch": 0.1189749037312442, "grad_norm": 0.9911650776890225, "learning_rate": 1.9995277060690885e-05, "loss": 0.4038352370262146, "step": 448 }, { "epoch": 0.11924047271278715, "grad_norm": 0.9418518804089067, "learning_rate": 1.9995141166709318e-05, "loss": 0.4261324405670166, "step": 449 }, { "epoch": 0.1195060416943301, "grad_norm": 1.067611227425969, "learning_rate": 1.9995003345782416e-05, "loss": 0.44187062978744507, "step": 450 }, { "epoch": 0.11977161067587305, "grad_norm": 0.9191915914869351, "learning_rate": 1.9994863597936752e-05, "loss": 0.44672587513923645, "step": 451 }, { "epoch": 0.12003717965741602, "grad_norm": 0.9882052007755191, "learning_rate": 1.999472192319926e-05, "loss": 0.44322314858436584, "step": 452 }, { "epoch": 0.12030274863895897, "grad_norm": 0.9882289435866314, "learning_rate": 1.9994578321597258e-05, "loss": 0.4396611154079437, "step": 453 }, { "epoch": 0.12056831762050192, "grad_norm": 0.9831868773412876, "learning_rate": 1.9994432793158433e-05, "loss": 0.4487733542919159, "step": 454 }, { "epoch": 0.12083388660204489, "grad_norm": 0.9360753951175719, "learning_rate": 1.999428533791084e-05, "loss": 0.3969653248786926, "step": 455 }, { "epoch": 0.12109945558358784, "grad_norm": 0.9662346637828156, "learning_rate": 1.9994135955882906e-05, "loss": 0.39312344789505005, "step": 456 }, { "epoch": 0.12136502456513079, "grad_norm": 0.9019524086641805, "learning_rate": 1.9993984647103425e-05, "loss": 0.3979804217815399, "step": 457 }, { "epoch": 0.12163059354667374, "grad_norm": 1.0970468981958466, "learning_rate": 1.9993831411601573e-05, "loss": 0.4430229365825653, "step": 458 }, { "epoch": 0.12189616252821671, "grad_norm": 0.994492352252997, "learning_rate": 1.9993676249406895e-05, "loss": 0.4511718451976776, "step": 459 }, { "epoch": 0.12216173150975966, "grad_norm": 1.091979336298699, "learning_rate": 1.9993519160549298e-05, "loss": 0.4686455726623535, "step": 460 }, { "epoch": 0.12242730049130261, "grad_norm": 1.0158374042593608, "learning_rate": 1.9993360145059073e-05, "loss": 0.4501730501651764, "step": 461 }, { "epoch": 0.12269286947284558, "grad_norm": 0.8530053413909426, "learning_rate": 1.999319920296687e-05, "loss": 0.40718767046928406, "step": 462 }, { "epoch": 0.12295843845438853, "grad_norm": 1.1181007301257784, "learning_rate": 1.9993036334303716e-05, "loss": 0.47313761711120605, "step": 463 }, { "epoch": 0.12322400743593148, "grad_norm": 0.9710975932515886, "learning_rate": 1.9992871539101018e-05, "loss": 0.47417378425598145, "step": 464 }, { "epoch": 0.12348957641747443, "grad_norm": 0.9297582414898758, "learning_rate": 1.999270481739054e-05, "loss": 0.44206154346466064, "step": 465 }, { "epoch": 0.1237551453990174, "grad_norm": 0.8745553533375581, "learning_rate": 1.9992536169204427e-05, "loss": 0.3800848722457886, "step": 466 }, { "epoch": 0.12402071438056035, "grad_norm": 0.9337162704530373, "learning_rate": 1.9992365594575194e-05, "loss": 0.40339407324790955, "step": 467 }, { "epoch": 0.1242862833621033, "grad_norm": 0.945328490567385, "learning_rate": 1.999219309353572e-05, "loss": 0.45280492305755615, "step": 468 }, { "epoch": 0.12455185234364627, "grad_norm": 1.0911195899085697, "learning_rate": 1.9992018666119266e-05, "loss": 0.4600910544395447, "step": 469 }, { "epoch": 0.12481742132518922, "grad_norm": 0.9649890056306747, "learning_rate": 1.9991842312359458e-05, "loss": 0.4475003480911255, "step": 470 }, { "epoch": 0.12508299030673217, "grad_norm": 1.0493048741226816, "learning_rate": 1.9991664032290297e-05, "loss": 0.45377033948898315, "step": 471 }, { "epoch": 0.12534855928827512, "grad_norm": 0.9964208438270044, "learning_rate": 1.9991483825946147e-05, "loss": 0.4397522509098053, "step": 472 }, { "epoch": 0.12561412826981808, "grad_norm": 0.9309535511597795, "learning_rate": 1.9991301693361756e-05, "loss": 0.4258221387863159, "step": 473 }, { "epoch": 0.12587969725136103, "grad_norm": 0.9120842027423138, "learning_rate": 1.9991117634572234e-05, "loss": 0.40272068977355957, "step": 474 }, { "epoch": 0.126145266232904, "grad_norm": 0.8761120829975514, "learning_rate": 1.9990931649613067e-05, "loss": 0.3721206784248352, "step": 475 }, { "epoch": 0.12641083521444696, "grad_norm": 0.9997105907953329, "learning_rate": 1.9990743738520115e-05, "loss": 0.4530203938484192, "step": 476 }, { "epoch": 0.1266764041959899, "grad_norm": 0.999446109489731, "learning_rate": 1.999055390132959e-05, "loss": 0.4281614422798157, "step": 477 }, { "epoch": 0.12694197317753286, "grad_norm": 1.3617327829527315, "learning_rate": 1.999036213807811e-05, "loss": 0.41965895891189575, "step": 478 }, { "epoch": 0.12720754215907581, "grad_norm": 0.9525189428273744, "learning_rate": 1.9990168448802633e-05, "loss": 0.40055203437805176, "step": 479 }, { "epoch": 0.12747311114061877, "grad_norm": 1.0868137290392272, "learning_rate": 1.99899728335405e-05, "loss": 0.4266522526741028, "step": 480 }, { "epoch": 0.12773868012216172, "grad_norm": 1.028316280940819, "learning_rate": 1.9989775292329425e-05, "loss": 0.42291250824928284, "step": 481 }, { "epoch": 0.1280042491037047, "grad_norm": 1.0319881226067493, "learning_rate": 1.9989575825207494e-05, "loss": 0.41346436738967896, "step": 482 }, { "epoch": 0.12826981808524765, "grad_norm": 1.0162482863207583, "learning_rate": 1.998937443221316e-05, "loss": 0.4092825651168823, "step": 483 }, { "epoch": 0.1285353870667906, "grad_norm": 0.9789070022917183, "learning_rate": 1.998917111338525e-05, "loss": 0.39763280749320984, "step": 484 }, { "epoch": 0.12880095604833355, "grad_norm": 1.1639998102533433, "learning_rate": 1.9988965868762956e-05, "loss": 0.45523273944854736, "step": 485 }, { "epoch": 0.1290665250298765, "grad_norm": 0.9737102573843942, "learning_rate": 1.9988758698385854e-05, "loss": 0.40181300044059753, "step": 486 }, { "epoch": 0.12933209401141946, "grad_norm": 1.0269411713354706, "learning_rate": 1.9988549602293884e-05, "loss": 0.42487743496894836, "step": 487 }, { "epoch": 0.1295976629929624, "grad_norm": 0.9805378587174307, "learning_rate": 1.998833858052735e-05, "loss": 0.41672298312187195, "step": 488 }, { "epoch": 0.1298632319745054, "grad_norm": 0.9804335652831319, "learning_rate": 1.998812563312694e-05, "loss": 0.36750108003616333, "step": 489 }, { "epoch": 0.13012880095604834, "grad_norm": 1.0991024476796578, "learning_rate": 1.9987910760133712e-05, "loss": 0.49290573596954346, "step": 490 }, { "epoch": 0.1303943699375913, "grad_norm": 0.9956647709409898, "learning_rate": 1.9987693961589084e-05, "loss": 0.460039347410202, "step": 491 }, { "epoch": 0.13065993891913424, "grad_norm": 1.269757897267166, "learning_rate": 1.998747523753485e-05, "loss": 0.4471668303012848, "step": 492 }, { "epoch": 0.1309255079006772, "grad_norm": 0.9411513149719377, "learning_rate": 1.9987254588013184e-05, "loss": 0.395844966173172, "step": 493 }, { "epoch": 0.13119107688222015, "grad_norm": 0.9546844808839872, "learning_rate": 1.9987032013066623e-05, "loss": 0.4465745985507965, "step": 494 }, { "epoch": 0.1314566458637631, "grad_norm": 1.0929917252775374, "learning_rate": 1.9986807512738075e-05, "loss": 0.43123912811279297, "step": 495 }, { "epoch": 0.13172221484530608, "grad_norm": 0.9741124155963404, "learning_rate": 1.9986581087070824e-05, "loss": 0.40066564083099365, "step": 496 }, { "epoch": 0.13198778382684903, "grad_norm": 0.9421948045046618, "learning_rate": 1.9986352736108515e-05, "loss": 0.38514643907546997, "step": 497 }, { "epoch": 0.13225335280839198, "grad_norm": 0.9713567699891517, "learning_rate": 1.9986122459895182e-05, "loss": 0.37397241592407227, "step": 498 }, { "epoch": 0.13251892178993493, "grad_norm": 0.9697777712481016, "learning_rate": 1.9985890258475215e-05, "loss": 0.44865745306015015, "step": 499 }, { "epoch": 0.1327844907714779, "grad_norm": 1.000823551239605, "learning_rate": 1.9985656131893374e-05, "loss": 0.4161406457424164, "step": 500 }, { "epoch": 0.13305005975302084, "grad_norm": 1.049045844462056, "learning_rate": 1.9985420080194804e-05, "loss": 0.41364359855651855, "step": 501 }, { "epoch": 0.1333156287345638, "grad_norm": 0.9766347522178017, "learning_rate": 1.9985182103425007e-05, "loss": 0.38466009497642517, "step": 502 }, { "epoch": 0.13358119771610677, "grad_norm": 0.9820108788569575, "learning_rate": 1.9984942201629868e-05, "loss": 0.4189472794532776, "step": 503 }, { "epoch": 0.13384676669764972, "grad_norm": 1.0124943582595707, "learning_rate": 1.998470037485563e-05, "loss": 0.4088754653930664, "step": 504 }, { "epoch": 0.13411233567919267, "grad_norm": 0.9404621165531668, "learning_rate": 1.9984456623148923e-05, "loss": 0.4197084307670593, "step": 505 }, { "epoch": 0.13437790466073563, "grad_norm": 1.022677047132229, "learning_rate": 1.998421094655673e-05, "loss": 0.4318644404411316, "step": 506 }, { "epoch": 0.13464347364227858, "grad_norm": 0.9443470782499029, "learning_rate": 1.9983963345126423e-05, "loss": 0.38180238008499146, "step": 507 }, { "epoch": 0.13490904262382153, "grad_norm": 0.9655473739081939, "learning_rate": 1.9983713818905733e-05, "loss": 0.38704103231430054, "step": 508 }, { "epoch": 0.13517461160536448, "grad_norm": 1.050357567916831, "learning_rate": 1.998346236794276e-05, "loss": 0.4206693768501282, "step": 509 }, { "epoch": 0.13544018058690746, "grad_norm": 1.1108901361228778, "learning_rate": 1.9983208992285993e-05, "loss": 0.42818987369537354, "step": 510 }, { "epoch": 0.1357057495684504, "grad_norm": 1.0771548955106338, "learning_rate": 1.9982953691984274e-05, "loss": 0.44592660665512085, "step": 511 }, { "epoch": 0.13597131854999336, "grad_norm": 1.006125968429414, "learning_rate": 1.9982696467086815e-05, "loss": 0.4272580146789551, "step": 512 }, { "epoch": 0.13623688753153632, "grad_norm": 1.084212872761102, "learning_rate": 1.9982437317643218e-05, "loss": 0.4416295289993286, "step": 513 }, { "epoch": 0.13650245651307927, "grad_norm": 1.1040865905907058, "learning_rate": 1.998217624370343e-05, "loss": 0.45108669996261597, "step": 514 }, { "epoch": 0.13676802549462222, "grad_norm": 0.9866796372680723, "learning_rate": 1.9981913245317802e-05, "loss": 0.40311864018440247, "step": 515 }, { "epoch": 0.13703359447616517, "grad_norm": 1.041531014011416, "learning_rate": 1.9981648322537017e-05, "loss": 0.4388020932674408, "step": 516 }, { "epoch": 0.13729916345770815, "grad_norm": 1.069295153220874, "learning_rate": 1.9981381475412162e-05, "loss": 0.42741361260414124, "step": 517 }, { "epoch": 0.1375647324392511, "grad_norm": 0.8562984414004653, "learning_rate": 1.9981112703994677e-05, "loss": 0.3766555190086365, "step": 518 }, { "epoch": 0.13783030142079405, "grad_norm": 0.9297024970383198, "learning_rate": 1.998084200833638e-05, "loss": 0.38618308305740356, "step": 519 }, { "epoch": 0.138095870402337, "grad_norm": 1.0033450202172107, "learning_rate": 1.9980569388489457e-05, "loss": 0.4553264379501343, "step": 520 }, { "epoch": 0.13836143938387996, "grad_norm": 1.024202819723292, "learning_rate": 1.9980294844506468e-05, "loss": 0.44632673263549805, "step": 521 }, { "epoch": 0.1386270083654229, "grad_norm": 1.0907023510727254, "learning_rate": 1.998001837644033e-05, "loss": 0.4285067617893219, "step": 522 }, { "epoch": 0.13889257734696586, "grad_norm": 0.9721672428790065, "learning_rate": 1.9979739984344365e-05, "loss": 0.39360538125038147, "step": 523 }, { "epoch": 0.13915814632850884, "grad_norm": 0.9475835393492287, "learning_rate": 1.9979459668272226e-05, "loss": 0.4007593095302582, "step": 524 }, { "epoch": 0.1394237153100518, "grad_norm": 1.028990364637073, "learning_rate": 1.9979177428277955e-05, "loss": 0.40176767110824585, "step": 525 }, { "epoch": 0.13968928429159475, "grad_norm": 1.0167293750004343, "learning_rate": 1.9978893264415978e-05, "loss": 0.4190528392791748, "step": 526 }, { "epoch": 0.1399548532731377, "grad_norm": 0.9871913820335487, "learning_rate": 1.9978607176741063e-05, "loss": 0.4139288067817688, "step": 527 }, { "epoch": 0.14022042225468065, "grad_norm": 0.8610694360554231, "learning_rate": 1.9978319165308373e-05, "loss": 0.3666151463985443, "step": 528 }, { "epoch": 0.1404859912362236, "grad_norm": 1.016794526359022, "learning_rate": 1.997802923017343e-05, "loss": 0.44621142745018005, "step": 529 }, { "epoch": 0.14075156021776655, "grad_norm": 0.9742602007181285, "learning_rate": 1.9977737371392134e-05, "loss": 0.4162977635860443, "step": 530 }, { "epoch": 0.14101712919930953, "grad_norm": 1.0386051117102446, "learning_rate": 1.997744358902075e-05, "loss": 0.438882052898407, "step": 531 }, { "epoch": 0.14128269818085248, "grad_norm": 0.9131334625730753, "learning_rate": 1.997714788311591e-05, "loss": 0.43381333351135254, "step": 532 }, { "epoch": 0.14154826716239544, "grad_norm": 1.0341262373297713, "learning_rate": 1.9976850253734633e-05, "loss": 0.41925039887428284, "step": 533 }, { "epoch": 0.1418138361439384, "grad_norm": 1.0366031704059997, "learning_rate": 1.997655070093429e-05, "loss": 0.40469998121261597, "step": 534 }, { "epoch": 0.14207940512548134, "grad_norm": 1.069653848503876, "learning_rate": 1.9976249224772638e-05, "loss": 0.4252749979496002, "step": 535 }, { "epoch": 0.1423449741070243, "grad_norm": 0.9131599330211423, "learning_rate": 1.9975945825307788e-05, "loss": 0.42437341809272766, "step": 536 }, { "epoch": 0.14261054308856724, "grad_norm": 0.9295944144104017, "learning_rate": 1.9975640502598243e-05, "loss": 0.3435184955596924, "step": 537 }, { "epoch": 0.14287611207011022, "grad_norm": 1.135805935036872, "learning_rate": 1.9975333256702864e-05, "loss": 0.4677535593509674, "step": 538 }, { "epoch": 0.14314168105165317, "grad_norm": 0.9857610455714647, "learning_rate": 1.9975024087680873e-05, "loss": 0.3860551118850708, "step": 539 }, { "epoch": 0.14340725003319613, "grad_norm": 1.0260051612127887, "learning_rate": 1.9974712995591887e-05, "loss": 0.4067271649837494, "step": 540 }, { "epoch": 0.14367281901473908, "grad_norm": 1.0673102525592195, "learning_rate": 1.9974399980495877e-05, "loss": 0.42236536741256714, "step": 541 }, { "epoch": 0.14393838799628203, "grad_norm": 0.9825710114440017, "learning_rate": 1.9974085042453188e-05, "loss": 0.45230624079704285, "step": 542 }, { "epoch": 0.14420395697782498, "grad_norm": 1.0223761508252163, "learning_rate": 1.997376818152453e-05, "loss": 0.428194522857666, "step": 543 }, { "epoch": 0.14446952595936793, "grad_norm": 1.0337438279048081, "learning_rate": 1.9973449397771004e-05, "loss": 0.40774789452552795, "step": 544 }, { "epoch": 0.1447350949409109, "grad_norm": 0.9168779980285519, "learning_rate": 1.9973128691254054e-05, "loss": 0.4086815118789673, "step": 545 }, { "epoch": 0.14500066392245387, "grad_norm": 0.9934439062572693, "learning_rate": 1.997280606203552e-05, "loss": 0.4045162796974182, "step": 546 }, { "epoch": 0.14526623290399682, "grad_norm": 1.0110955437735047, "learning_rate": 1.9972481510177594e-05, "loss": 0.40463268756866455, "step": 547 }, { "epoch": 0.14553180188553977, "grad_norm": 1.0029896014566093, "learning_rate": 1.9972155035742847e-05, "loss": 0.46733587980270386, "step": 548 }, { "epoch": 0.14579737086708272, "grad_norm": 0.9683751197048177, "learning_rate": 1.997182663879422e-05, "loss": 0.45210930705070496, "step": 549 }, { "epoch": 0.14606293984862567, "grad_norm": 0.9559484778346481, "learning_rate": 1.9971496319395022e-05, "loss": 0.39798587560653687, "step": 550 }, { "epoch": 0.14632850883016862, "grad_norm": 1.0582410708312875, "learning_rate": 1.9971164077608937e-05, "loss": 0.4166080057621002, "step": 551 }, { "epoch": 0.1465940778117116, "grad_norm": 0.99705391441119, "learning_rate": 1.9970829913500017e-05, "loss": 0.3995435833930969, "step": 552 }, { "epoch": 0.14685964679325456, "grad_norm": 0.9693599664680953, "learning_rate": 1.9970493827132686e-05, "loss": 0.39335039258003235, "step": 553 }, { "epoch": 0.1471252157747975, "grad_norm": 1.0653128556742777, "learning_rate": 1.9970155818571733e-05, "loss": 0.3923008441925049, "step": 554 }, { "epoch": 0.14739078475634046, "grad_norm": 1.1000528384874784, "learning_rate": 1.996981588788233e-05, "loss": 0.42148759961128235, "step": 555 }, { "epoch": 0.1476563537378834, "grad_norm": 0.9532704289154984, "learning_rate": 1.9969474035130005e-05, "loss": 0.36099517345428467, "step": 556 }, { "epoch": 0.14792192271942636, "grad_norm": 0.9498609858415961, "learning_rate": 1.9969130260380663e-05, "loss": 0.39650559425354004, "step": 557 }, { "epoch": 0.14818749170096931, "grad_norm": 0.9667452630427784, "learning_rate": 1.9968784563700586e-05, "loss": 0.36410078406333923, "step": 558 }, { "epoch": 0.1484530606825123, "grad_norm": 1.002419821858965, "learning_rate": 1.996843694515641e-05, "loss": 0.41312888264656067, "step": 559 }, { "epoch": 0.14871862966405525, "grad_norm": 1.1088153047335336, "learning_rate": 1.9968087404815162e-05, "loss": 0.3895263373851776, "step": 560 }, { "epoch": 0.1489841986455982, "grad_norm": 1.2422388501205763, "learning_rate": 1.9967735942744226e-05, "loss": 0.4400597810745239, "step": 561 }, { "epoch": 0.14924976762714115, "grad_norm": 1.1300700300497077, "learning_rate": 1.9967382559011356e-05, "loss": 0.36712852120399475, "step": 562 }, { "epoch": 0.1495153366086841, "grad_norm": 1.0425502358891738, "learning_rate": 1.9967027253684685e-05, "loss": 0.4043564200401306, "step": 563 }, { "epoch": 0.14978090559022705, "grad_norm": 1.101160625764444, "learning_rate": 1.9966670026832707e-05, "loss": 0.45233044028282166, "step": 564 }, { "epoch": 0.15004647457177, "grad_norm": 1.3277254520379258, "learning_rate": 1.9966310878524297e-05, "loss": 0.441600501537323, "step": 565 }, { "epoch": 0.15031204355331299, "grad_norm": 1.0833095900878238, "learning_rate": 1.9965949808828687e-05, "loss": 0.4268038868904114, "step": 566 }, { "epoch": 0.15057761253485594, "grad_norm": 1.1492448156590855, "learning_rate": 1.9965586817815494e-05, "loss": 0.41927874088287354, "step": 567 }, { "epoch": 0.1508431815163989, "grad_norm": 1.026170307581087, "learning_rate": 1.9965221905554695e-05, "loss": 0.41488781571388245, "step": 568 }, { "epoch": 0.15110875049794184, "grad_norm": 0.9559142330236491, "learning_rate": 1.9964855072116642e-05, "loss": 0.3624749779701233, "step": 569 }, { "epoch": 0.1513743194794848, "grad_norm": 1.254830306735622, "learning_rate": 1.996448631757206e-05, "loss": 0.45119866728782654, "step": 570 }, { "epoch": 0.15163988846102774, "grad_norm": 1.095837461898702, "learning_rate": 1.996411564199203e-05, "loss": 0.41389739513397217, "step": 571 }, { "epoch": 0.1519054574425707, "grad_norm": 0.9684460814064966, "learning_rate": 1.996374304544802e-05, "loss": 0.3640916347503662, "step": 572 }, { "epoch": 0.15217102642411368, "grad_norm": 1.0711015344753547, "learning_rate": 1.9963368528011867e-05, "loss": 0.45648565888404846, "step": 573 }, { "epoch": 0.15243659540565663, "grad_norm": 0.9722794055909949, "learning_rate": 1.9962992089755765e-05, "loss": 0.4335980713367462, "step": 574 }, { "epoch": 0.15270216438719958, "grad_norm": 1.158400874054287, "learning_rate": 1.996261373075229e-05, "loss": 0.3908158540725708, "step": 575 }, { "epoch": 0.15296773336874253, "grad_norm": 0.9311953954584888, "learning_rate": 1.996223345107439e-05, "loss": 0.36533305048942566, "step": 576 }, { "epoch": 0.15323330235028548, "grad_norm": 0.9771467412652409, "learning_rate": 1.9961851250795372e-05, "loss": 0.407212495803833, "step": 577 }, { "epoch": 0.15349887133182843, "grad_norm": 0.9988499065644934, "learning_rate": 1.996146712998892e-05, "loss": 0.4266315698623657, "step": 578 }, { "epoch": 0.1537644403133714, "grad_norm": 0.9843108485081927, "learning_rate": 1.9961081088729092e-05, "loss": 0.3806581199169159, "step": 579 }, { "epoch": 0.15403000929491437, "grad_norm": 0.9497423806639163, "learning_rate": 1.9960693127090312e-05, "loss": 0.40962716937065125, "step": 580 }, { "epoch": 0.15429557827645732, "grad_norm": 0.94680923059909, "learning_rate": 1.996030324514737e-05, "loss": 0.4195394515991211, "step": 581 }, { "epoch": 0.15456114725800027, "grad_norm": 1.0211843119224446, "learning_rate": 1.995991144297543e-05, "loss": 0.4366803765296936, "step": 582 }, { "epoch": 0.15482671623954322, "grad_norm": 1.1779341722116263, "learning_rate": 1.995951772065004e-05, "loss": 0.44951680302619934, "step": 583 }, { "epoch": 0.15509228522108617, "grad_norm": 1.1165714790353467, "learning_rate": 1.9959122078247088e-05, "loss": 0.42920851707458496, "step": 584 }, { "epoch": 0.15535785420262913, "grad_norm": 1.3260467831670406, "learning_rate": 1.9958724515842856e-05, "loss": 0.3805098533630371, "step": 585 }, { "epoch": 0.15562342318417208, "grad_norm": 1.1544212798945541, "learning_rate": 1.995832503351399e-05, "loss": 0.439333438873291, "step": 586 }, { "epoch": 0.15588899216571506, "grad_norm": 0.9414235863159184, "learning_rate": 1.9957923631337505e-05, "loss": 0.38338547945022583, "step": 587 }, { "epoch": 0.156154561147258, "grad_norm": 0.9711288321476074, "learning_rate": 1.9957520309390786e-05, "loss": 0.40603697299957275, "step": 588 }, { "epoch": 0.15642013012880096, "grad_norm": 0.9468286962292546, "learning_rate": 1.9957115067751594e-05, "loss": 0.42816999554634094, "step": 589 }, { "epoch": 0.1566856991103439, "grad_norm": 0.979497417166178, "learning_rate": 1.9956707906498046e-05, "loss": 0.42367884516716003, "step": 590 }, { "epoch": 0.15695126809188686, "grad_norm": 1.1158588594509518, "learning_rate": 1.995629882570864e-05, "loss": 0.4349297881126404, "step": 591 }, { "epoch": 0.15721683707342982, "grad_norm": 0.9762108745852242, "learning_rate": 1.995588782546225e-05, "loss": 0.37990960478782654, "step": 592 }, { "epoch": 0.15748240605497277, "grad_norm": 0.9495653219493333, "learning_rate": 1.9955474905838102e-05, "loss": 0.4085468649864197, "step": 593 }, { "epoch": 0.15774797503651575, "grad_norm": 0.9419429879365407, "learning_rate": 1.995506006691581e-05, "loss": 0.41362464427948, "step": 594 }, { "epoch": 0.1580135440180587, "grad_norm": 1.002559702640921, "learning_rate": 1.9954643308775342e-05, "loss": 0.3830018937587738, "step": 595 }, { "epoch": 0.15827911299960165, "grad_norm": 1.1505182326275074, "learning_rate": 1.995422463149705e-05, "loss": 0.48350822925567627, "step": 596 }, { "epoch": 0.1585446819811446, "grad_norm": 0.9889824166630486, "learning_rate": 1.995380403516165e-05, "loss": 0.4215185344219208, "step": 597 }, { "epoch": 0.15881025096268755, "grad_norm": 1.06826056700577, "learning_rate": 1.9953381519850224e-05, "loss": 0.42061948776245117, "step": 598 }, { "epoch": 0.1590758199442305, "grad_norm": 1.032451381790901, "learning_rate": 1.995295708564423e-05, "loss": 0.38956254720687866, "step": 599 }, { "epoch": 0.15934138892577346, "grad_norm": 1.0492553607775368, "learning_rate": 1.9952530732625492e-05, "loss": 0.3864685893058777, "step": 600 }, { "epoch": 0.15960695790731644, "grad_norm": 0.9770856461072062, "learning_rate": 1.9952102460876214e-05, "loss": 0.395724356174469, "step": 601 }, { "epoch": 0.1598725268888594, "grad_norm": 1.04245602393598, "learning_rate": 1.995167227047895e-05, "loss": 0.4220300316810608, "step": 602 }, { "epoch": 0.16013809587040234, "grad_norm": 1.1406615370546667, "learning_rate": 1.9951240161516643e-05, "loss": 0.4129142165184021, "step": 603 }, { "epoch": 0.1604036648519453, "grad_norm": 0.983753356740355, "learning_rate": 1.9950806134072595e-05, "loss": 0.3951375484466553, "step": 604 }, { "epoch": 0.16066923383348825, "grad_norm": 1.0214548083454909, "learning_rate": 1.9950370188230486e-05, "loss": 0.4117582142353058, "step": 605 }, { "epoch": 0.1609348028150312, "grad_norm": 1.0340746201961049, "learning_rate": 1.994993232407436e-05, "loss": 0.3920668363571167, "step": 606 }, { "epoch": 0.16120037179657415, "grad_norm": 0.9768399206450091, "learning_rate": 1.9949492541688626e-05, "loss": 0.3756999373435974, "step": 607 }, { "epoch": 0.16146594077811713, "grad_norm": 1.0034054922110034, "learning_rate": 1.9949050841158078e-05, "loss": 0.41009610891342163, "step": 608 }, { "epoch": 0.16173150975966008, "grad_norm": 0.9847346075479474, "learning_rate": 1.994860722256786e-05, "loss": 0.3986571729183197, "step": 609 }, { "epoch": 0.16199707874120303, "grad_norm": 0.9978440495541314, "learning_rate": 1.994816168600351e-05, "loss": 0.3903341591358185, "step": 610 }, { "epoch": 0.16226264772274598, "grad_norm": 0.9992231775305654, "learning_rate": 1.994771423155091e-05, "loss": 0.39725261926651, "step": 611 }, { "epoch": 0.16252821670428894, "grad_norm": 0.9446936558476315, "learning_rate": 1.994726485929633e-05, "loss": 0.39461129903793335, "step": 612 }, { "epoch": 0.1627937856858319, "grad_norm": 1.0162077284831286, "learning_rate": 1.99468135693264e-05, "loss": 0.41346144676208496, "step": 613 }, { "epoch": 0.16305935466737484, "grad_norm": 1.0305116850266922, "learning_rate": 1.9946360361728127e-05, "loss": 0.41148197650909424, "step": 614 }, { "epoch": 0.16332492364891782, "grad_norm": 0.9678436330540818, "learning_rate": 1.9945905236588884e-05, "loss": 0.38204139471054077, "step": 615 }, { "epoch": 0.16359049263046077, "grad_norm": 0.9830320911733957, "learning_rate": 1.9945448193996412e-05, "loss": 0.41496896743774414, "step": 616 }, { "epoch": 0.16385606161200372, "grad_norm": 0.9327494941136337, "learning_rate": 1.994498923403882e-05, "loss": 0.38998982310295105, "step": 617 }, { "epoch": 0.16412163059354667, "grad_norm": 1.0310759290486786, "learning_rate": 1.99445283568046e-05, "loss": 0.39018991589546204, "step": 618 }, { "epoch": 0.16438719957508963, "grad_norm": 1.1133251353738367, "learning_rate": 1.9944065562382594e-05, "loss": 0.41579991579055786, "step": 619 }, { "epoch": 0.16465276855663258, "grad_norm": 1.1413714641323347, "learning_rate": 1.9943600850862027e-05, "loss": 0.426283061504364, "step": 620 }, { "epoch": 0.16491833753817553, "grad_norm": 1.0537239280428552, "learning_rate": 1.9943134222332493e-05, "loss": 0.418672651052475, "step": 621 }, { "epoch": 0.1651839065197185, "grad_norm": 1.0177048245128393, "learning_rate": 1.9942665676883946e-05, "loss": 0.4014776349067688, "step": 622 }, { "epoch": 0.16544947550126146, "grad_norm": 0.9703989792649265, "learning_rate": 1.994219521460672e-05, "loss": 0.3714776933193207, "step": 623 }, { "epoch": 0.1657150444828044, "grad_norm": 1.005321267739283, "learning_rate": 1.9941722835591514e-05, "loss": 0.39415785670280457, "step": 624 }, { "epoch": 0.16598061346434737, "grad_norm": 1.739817458909074, "learning_rate": 1.9941248539929395e-05, "loss": 0.3706223964691162, "step": 625 }, { "epoch": 0.16624618244589032, "grad_norm": 0.9887487099192142, "learning_rate": 1.9940772327711807e-05, "loss": 0.4167429208755493, "step": 626 }, { "epoch": 0.16651175142743327, "grad_norm": 1.0502993213264278, "learning_rate": 1.9940294199030553e-05, "loss": 0.38234227895736694, "step": 627 }, { "epoch": 0.16677732040897622, "grad_norm": 0.9929957655695576, "learning_rate": 1.9939814153977813e-05, "loss": 0.4139519929885864, "step": 628 }, { "epoch": 0.1670428893905192, "grad_norm": 1.0428716869119874, "learning_rate": 1.9939332192646136e-05, "loss": 0.44490402936935425, "step": 629 }, { "epoch": 0.16730845837206215, "grad_norm": 0.9723220719956404, "learning_rate": 1.993884831512843e-05, "loss": 0.3870658278465271, "step": 630 }, { "epoch": 0.1675740273536051, "grad_norm": 0.9337218443909966, "learning_rate": 1.993836252151799e-05, "loss": 0.3308948278427124, "step": 631 }, { "epoch": 0.16783959633514806, "grad_norm": 1.1119638169858157, "learning_rate": 1.993787481190847e-05, "loss": 0.3727487623691559, "step": 632 }, { "epoch": 0.168105165316691, "grad_norm": 1.0025380900585623, "learning_rate": 1.9937385186393888e-05, "loss": 0.4277465343475342, "step": 633 }, { "epoch": 0.16837073429823396, "grad_norm": 1.2120120873899203, "learning_rate": 1.9936893645068647e-05, "loss": 0.4276485741138458, "step": 634 }, { "epoch": 0.1686363032797769, "grad_norm": 1.000070161461063, "learning_rate": 1.9936400188027502e-05, "loss": 0.374578058719635, "step": 635 }, { "epoch": 0.1689018722613199, "grad_norm": 1.113556890943216, "learning_rate": 1.993590481536559e-05, "loss": 0.4583400785923004, "step": 636 }, { "epoch": 0.16916744124286284, "grad_norm": 0.9731147624235688, "learning_rate": 1.9935407527178417e-05, "loss": 0.3734489679336548, "step": 637 }, { "epoch": 0.1694330102244058, "grad_norm": 1.0110441212525507, "learning_rate": 1.9934908323561846e-05, "loss": 0.39524513483047485, "step": 638 }, { "epoch": 0.16969857920594875, "grad_norm": 1.0264447655460065, "learning_rate": 1.9934407204612124e-05, "loss": 0.42300352454185486, "step": 639 }, { "epoch": 0.1699641481874917, "grad_norm": 0.9950374891978715, "learning_rate": 1.9933904170425858e-05, "loss": 0.4152276813983917, "step": 640 }, { "epoch": 0.17022971716903465, "grad_norm": 1.230783330329369, "learning_rate": 1.9933399221100026e-05, "loss": 0.43046653270721436, "step": 641 }, { "epoch": 0.1704952861505776, "grad_norm": 1.0095783418631343, "learning_rate": 1.993289235673198e-05, "loss": 0.4134339392185211, "step": 642 }, { "epoch": 0.17076085513212058, "grad_norm": 1.0051407398693462, "learning_rate": 1.9932383577419432e-05, "loss": 0.44028693437576294, "step": 643 }, { "epoch": 0.17102642411366353, "grad_norm": 1.0208746920457954, "learning_rate": 1.9931872883260473e-05, "loss": 0.3790222704410553, "step": 644 }, { "epoch": 0.17129199309520649, "grad_norm": 1.041462978505965, "learning_rate": 1.9931360274353556e-05, "loss": 0.3683086633682251, "step": 645 }, { "epoch": 0.17155756207674944, "grad_norm": 1.0400069352454702, "learning_rate": 1.993084575079751e-05, "loss": 0.3630594313144684, "step": 646 }, { "epoch": 0.1718231310582924, "grad_norm": 1.0694046561659416, "learning_rate": 1.993032931269153e-05, "loss": 0.4398641884326935, "step": 647 }, { "epoch": 0.17208870003983534, "grad_norm": 1.107156801944608, "learning_rate": 1.992981096013517e-05, "loss": 0.42222845554351807, "step": 648 }, { "epoch": 0.1723542690213783, "grad_norm": 1.043160064840446, "learning_rate": 1.992929069322837e-05, "loss": 0.38966643810272217, "step": 649 }, { "epoch": 0.17261983800292127, "grad_norm": 1.0607803195691352, "learning_rate": 1.992876851207143e-05, "loss": 0.4394804835319519, "step": 650 }, { "epoch": 0.17288540698446422, "grad_norm": 0.9714467718451273, "learning_rate": 1.9928244416765022e-05, "loss": 0.3475287854671478, "step": 651 }, { "epoch": 0.17315097596600718, "grad_norm": 0.9848879046616053, "learning_rate": 1.992771840741018e-05, "loss": 0.40047168731689453, "step": 652 }, { "epoch": 0.17341654494755013, "grad_norm": 1.0744593937096147, "learning_rate": 1.9927190484108315e-05, "loss": 0.4028981328010559, "step": 653 }, { "epoch": 0.17368211392909308, "grad_norm": 1.010491020672817, "learning_rate": 1.9926660646961208e-05, "loss": 0.3891482949256897, "step": 654 }, { "epoch": 0.17394768291063603, "grad_norm": 1.1163232689680433, "learning_rate": 1.9926128896071e-05, "loss": 0.4570680856704712, "step": 655 }, { "epoch": 0.17421325189217898, "grad_norm": 0.9509061944047602, "learning_rate": 1.992559523154021e-05, "loss": 0.392758309841156, "step": 656 }, { "epoch": 0.17447882087372196, "grad_norm": 0.9648168194829144, "learning_rate": 1.992505965347172e-05, "loss": 0.39552047848701477, "step": 657 }, { "epoch": 0.17474438985526491, "grad_norm": 1.045434666464082, "learning_rate": 1.992452216196879e-05, "loss": 0.4412619173526764, "step": 658 }, { "epoch": 0.17500995883680787, "grad_norm": 1.033655605856329, "learning_rate": 1.9923982757135028e-05, "loss": 0.4075942635536194, "step": 659 }, { "epoch": 0.17527552781835082, "grad_norm": 1.0660210414475448, "learning_rate": 1.9923441439074434e-05, "loss": 0.44615018367767334, "step": 660 }, { "epoch": 0.17554109679989377, "grad_norm": 0.9504988883268379, "learning_rate": 1.992289820789137e-05, "loss": 0.3957441449165344, "step": 661 }, { "epoch": 0.17580666578143672, "grad_norm": 0.9513339400965243, "learning_rate": 1.992235306369056e-05, "loss": 0.4014820158481598, "step": 662 }, { "epoch": 0.17607223476297967, "grad_norm": 0.9988043316582222, "learning_rate": 1.9921806006577102e-05, "loss": 0.39478158950805664, "step": 663 }, { "epoch": 0.17633780374452265, "grad_norm": 1.0278124558587338, "learning_rate": 1.9921257036656463e-05, "loss": 0.45742082595825195, "step": 664 }, { "epoch": 0.1766033727260656, "grad_norm": 0.9674516471555401, "learning_rate": 1.9920706154034477e-05, "loss": 0.36519041657447815, "step": 665 }, { "epoch": 0.17686894170760856, "grad_norm": 1.0086354363577679, "learning_rate": 1.992015335881735e-05, "loss": 0.40599358081817627, "step": 666 }, { "epoch": 0.1771345106891515, "grad_norm": 0.958585892866014, "learning_rate": 1.991959865111165e-05, "loss": 0.4064781367778778, "step": 667 }, { "epoch": 0.17740007967069446, "grad_norm": 0.9430583774727941, "learning_rate": 1.991904203102432e-05, "loss": 0.4076484143733978, "step": 668 }, { "epoch": 0.1776656486522374, "grad_norm": 1.1044553051326549, "learning_rate": 1.9918483498662678e-05, "loss": 0.42157143354415894, "step": 669 }, { "epoch": 0.17793121763378036, "grad_norm": 1.005923050768092, "learning_rate": 1.9917923054134388e-05, "loss": 0.3814900517463684, "step": 670 }, { "epoch": 0.17819678661532334, "grad_norm": 1.0156953904207233, "learning_rate": 1.9917360697547506e-05, "loss": 0.4211175739765167, "step": 671 }, { "epoch": 0.1784623555968663, "grad_norm": 1.0530805044024834, "learning_rate": 1.991679642901045e-05, "loss": 0.3975893259048462, "step": 672 }, { "epoch": 0.17872792457840925, "grad_norm": 0.9633270935214763, "learning_rate": 1.9916230248631993e-05, "loss": 0.36090826988220215, "step": 673 }, { "epoch": 0.1789934935599522, "grad_norm": 0.9408638333666679, "learning_rate": 1.99156621565213e-05, "loss": 0.36511334776878357, "step": 674 }, { "epoch": 0.17925906254149515, "grad_norm": 1.0839117569759185, "learning_rate": 1.9915092152787888e-05, "loss": 0.4131924510002136, "step": 675 }, { "epoch": 0.1795246315230381, "grad_norm": 1.1407281463751517, "learning_rate": 1.9914520237541644e-05, "loss": 0.4283728301525116, "step": 676 }, { "epoch": 0.17979020050458105, "grad_norm": 0.9751873028047018, "learning_rate": 1.991394641089283e-05, "loss": 0.3855544924736023, "step": 677 }, { "epoch": 0.18005576948612403, "grad_norm": 1.3517309919327671, "learning_rate": 1.9913370672952074e-05, "loss": 0.41288501024246216, "step": 678 }, { "epoch": 0.180321338467667, "grad_norm": 1.1127679640996702, "learning_rate": 1.9912793023830365e-05, "loss": 0.3824073076248169, "step": 679 }, { "epoch": 0.18058690744920994, "grad_norm": 1.0055812841256684, "learning_rate": 1.9912213463639077e-05, "loss": 0.39005106687545776, "step": 680 }, { "epoch": 0.1808524764307529, "grad_norm": 1.0115332151563563, "learning_rate": 1.9911631992489933e-05, "loss": 0.3521374464035034, "step": 681 }, { "epoch": 0.18111804541229584, "grad_norm": 0.983790464571211, "learning_rate": 1.9911048610495037e-05, "loss": 0.337347149848938, "step": 682 }, { "epoch": 0.1813836143938388, "grad_norm": 1.1534370397304132, "learning_rate": 1.9910463317766864e-05, "loss": 0.4349983334541321, "step": 683 }, { "epoch": 0.18164918337538175, "grad_norm": 1.059114838428009, "learning_rate": 1.9909876114418242e-05, "loss": 0.3783540427684784, "step": 684 }, { "epoch": 0.18191475235692472, "grad_norm": 1.0050293498117582, "learning_rate": 1.9909287000562383e-05, "loss": 0.4065130054950714, "step": 685 }, { "epoch": 0.18218032133846768, "grad_norm": 1.0122618604087057, "learning_rate": 1.990869597631286e-05, "loss": 0.3876315653324127, "step": 686 }, { "epoch": 0.18244589032001063, "grad_norm": 0.9622962910168786, "learning_rate": 1.9908103041783615e-05, "loss": 0.3716024160385132, "step": 687 }, { "epoch": 0.18271145930155358, "grad_norm": 1.086778230300176, "learning_rate": 1.990750819708896e-05, "loss": 0.4096733331680298, "step": 688 }, { "epoch": 0.18297702828309653, "grad_norm": 1.131269280292305, "learning_rate": 1.9906911442343567e-05, "loss": 0.41432395577430725, "step": 689 }, { "epoch": 0.18324259726463948, "grad_norm": 1.1182736792418642, "learning_rate": 1.9906312777662493e-05, "loss": 0.3934200406074524, "step": 690 }, { "epoch": 0.18350816624618244, "grad_norm": 1.0493015785833109, "learning_rate": 1.9905712203161148e-05, "loss": 0.4246784746646881, "step": 691 }, { "epoch": 0.18377373522772542, "grad_norm": 1.1362836227785695, "learning_rate": 1.9905109718955323e-05, "loss": 0.40027567744255066, "step": 692 }, { "epoch": 0.18403930420926837, "grad_norm": 1.056262242708622, "learning_rate": 1.990450532516116e-05, "loss": 0.4162583351135254, "step": 693 }, { "epoch": 0.18430487319081132, "grad_norm": 1.05760814074371, "learning_rate": 1.990389902189518e-05, "loss": 0.4133074879646301, "step": 694 }, { "epoch": 0.18457044217235427, "grad_norm": 1.0438921885629904, "learning_rate": 1.9903290809274277e-05, "loss": 0.333192378282547, "step": 695 }, { "epoch": 0.18483601115389722, "grad_norm": 0.9814281867123515, "learning_rate": 1.9902680687415704e-05, "loss": 0.39349496364593506, "step": 696 }, { "epoch": 0.18510158013544017, "grad_norm": 1.0366332083029342, "learning_rate": 1.9902068656437086e-05, "loss": 0.39678412675857544, "step": 697 }, { "epoch": 0.18536714911698313, "grad_norm": 1.0003960978434148, "learning_rate": 1.9901454716456415e-05, "loss": 0.3553932011127472, "step": 698 }, { "epoch": 0.18563271809852608, "grad_norm": 1.0876315802223169, "learning_rate": 1.990083886759205e-05, "loss": 0.4264630079269409, "step": 699 }, { "epoch": 0.18589828708006906, "grad_norm": 1.0135520655053032, "learning_rate": 1.9900221109962726e-05, "loss": 0.3883950412273407, "step": 700 }, { "epoch": 0.186163856061612, "grad_norm": 1.0408639715408188, "learning_rate": 1.989960144368753e-05, "loss": 0.38465407490730286, "step": 701 }, { "epoch": 0.18642942504315496, "grad_norm": 2.2198594223984065, "learning_rate": 1.9898979868885933e-05, "loss": 0.39897871017456055, "step": 702 }, { "epoch": 0.1866949940246979, "grad_norm": 1.120873004114704, "learning_rate": 1.9898356385677762e-05, "loss": 0.4386023283004761, "step": 703 }, { "epoch": 0.18696056300624087, "grad_norm": 1.0254606123190075, "learning_rate": 1.989773099418322e-05, "loss": 0.42621874809265137, "step": 704 }, { "epoch": 0.18722613198778382, "grad_norm": 1.0153284696458207, "learning_rate": 1.9897103694522877e-05, "loss": 0.3811546266078949, "step": 705 }, { "epoch": 0.18749170096932677, "grad_norm": 1.0634877610237485, "learning_rate": 1.989647448681767e-05, "loss": 0.4018982946872711, "step": 706 }, { "epoch": 0.18775726995086975, "grad_norm": 1.0316038713106725, "learning_rate": 1.9895843371188897e-05, "loss": 0.3920126259326935, "step": 707 }, { "epoch": 0.1880228389324127, "grad_norm": 0.9767495366810068, "learning_rate": 1.9895210347758233e-05, "loss": 0.3598487973213196, "step": 708 }, { "epoch": 0.18828840791395565, "grad_norm": 1.0286682270198635, "learning_rate": 1.9894575416647717e-05, "loss": 0.4204316735267639, "step": 709 }, { "epoch": 0.1885539768954986, "grad_norm": 0.9653709480495668, "learning_rate": 1.9893938577979755e-05, "loss": 0.33814263343811035, "step": 710 }, { "epoch": 0.18881954587704156, "grad_norm": 0.9588770367914977, "learning_rate": 1.9893299831877124e-05, "loss": 0.3788227140903473, "step": 711 }, { "epoch": 0.1890851148585845, "grad_norm": 0.9974371582936609, "learning_rate": 1.989265917846297e-05, "loss": 0.38141176104545593, "step": 712 }, { "epoch": 0.18935068384012746, "grad_norm": 1.0051109402301954, "learning_rate": 1.9892016617860793e-05, "loss": 0.3757280707359314, "step": 713 }, { "epoch": 0.18961625282167044, "grad_norm": 0.9863956856856875, "learning_rate": 1.989137215019448e-05, "loss": 0.37819087505340576, "step": 714 }, { "epoch": 0.1898818218032134, "grad_norm": 1.1797000402703188, "learning_rate": 1.9890725775588277e-05, "loss": 0.46046000719070435, "step": 715 }, { "epoch": 0.19014739078475634, "grad_norm": 0.9967163493181064, "learning_rate": 1.9890077494166792e-05, "loss": 0.33967363834381104, "step": 716 }, { "epoch": 0.1904129597662993, "grad_norm": 0.9620841339155507, "learning_rate": 1.988942730605501e-05, "loss": 0.36672675609588623, "step": 717 }, { "epoch": 0.19067852874784225, "grad_norm": 1.0666183498740949, "learning_rate": 1.9888775211378278e-05, "loss": 0.38705015182495117, "step": 718 }, { "epoch": 0.1909440977293852, "grad_norm": 1.0696051052523068, "learning_rate": 1.9888121210262313e-05, "loss": 0.35257095098495483, "step": 719 }, { "epoch": 0.19120966671092815, "grad_norm": 1.0337108803934987, "learning_rate": 1.9887465302833194e-05, "loss": 0.3803965449333191, "step": 720 }, { "epoch": 0.19147523569247113, "grad_norm": 1.0097965015220993, "learning_rate": 1.988680748921738e-05, "loss": 0.38166487216949463, "step": 721 }, { "epoch": 0.19174080467401408, "grad_norm": 0.971159209120872, "learning_rate": 1.988614776954169e-05, "loss": 0.4017483592033386, "step": 722 }, { "epoch": 0.19200637365555703, "grad_norm": 1.0651840937747212, "learning_rate": 1.98854861439333e-05, "loss": 0.4343035817146301, "step": 723 }, { "epoch": 0.19227194263709999, "grad_norm": 1.0527178531986199, "learning_rate": 1.9884822612519773e-05, "loss": 0.4017031192779541, "step": 724 }, { "epoch": 0.19253751161864294, "grad_norm": 0.9558335625340557, "learning_rate": 1.988415717542903e-05, "loss": 0.32294636964797974, "step": 725 }, { "epoch": 0.1928030806001859, "grad_norm": 1.018550638071552, "learning_rate": 1.988348983278935e-05, "loss": 0.34661561250686646, "step": 726 }, { "epoch": 0.19306864958172884, "grad_norm": 1.1264464061553692, "learning_rate": 1.98828205847294e-05, "loss": 0.3588724434375763, "step": 727 }, { "epoch": 0.19333421856327182, "grad_norm": 1.151476031768393, "learning_rate": 1.9882149431378194e-05, "loss": 0.45439180731773376, "step": 728 }, { "epoch": 0.19359978754481477, "grad_norm": 1.092854672146059, "learning_rate": 1.988147637286513e-05, "loss": 0.3916742205619812, "step": 729 }, { "epoch": 0.19386535652635772, "grad_norm": 1.1073017625666908, "learning_rate": 1.988080140931996e-05, "loss": 0.3838115334510803, "step": 730 }, { "epoch": 0.19413092550790068, "grad_norm": 1.0305888563782257, "learning_rate": 1.9880124540872813e-05, "loss": 0.3803096413612366, "step": 731 }, { "epoch": 0.19439649448944363, "grad_norm": 1.0697488639709387, "learning_rate": 1.987944576765418e-05, "loss": 0.4180675446987152, "step": 732 }, { "epoch": 0.19466206347098658, "grad_norm": 0.968492149308095, "learning_rate": 1.987876508979492e-05, "loss": 0.34485924243927, "step": 733 }, { "epoch": 0.19492763245252953, "grad_norm": 1.0301319893667387, "learning_rate": 1.987808250742626e-05, "loss": 0.3696223795413971, "step": 734 }, { "epoch": 0.1951932014340725, "grad_norm": 1.0070871597151176, "learning_rate": 1.9877398020679796e-05, "loss": 0.39920324087142944, "step": 735 }, { "epoch": 0.19545877041561546, "grad_norm": 0.9772548764362861, "learning_rate": 1.987671162968748e-05, "loss": 0.33534419536590576, "step": 736 }, { "epoch": 0.19572433939715841, "grad_norm": 0.955184588375953, "learning_rate": 1.9876023334581657e-05, "loss": 0.3698185682296753, "step": 737 }, { "epoch": 0.19598990837870137, "grad_norm": 1.0108475553340988, "learning_rate": 1.9875333135495e-05, "loss": 0.37388375401496887, "step": 738 }, { "epoch": 0.19625547736024432, "grad_norm": 0.9685434293396273, "learning_rate": 1.9874641032560594e-05, "loss": 0.3285469114780426, "step": 739 }, { "epoch": 0.19652104634178727, "grad_norm": 1.01794140535256, "learning_rate": 1.9873947025911854e-05, "loss": 0.3539549708366394, "step": 740 }, { "epoch": 0.19678661532333022, "grad_norm": 1.0943847325994938, "learning_rate": 1.9873251115682577e-05, "loss": 0.4707021117210388, "step": 741 }, { "epoch": 0.1970521843048732, "grad_norm": 0.9783865509799976, "learning_rate": 1.987255330200693e-05, "loss": 0.3871781826019287, "step": 742 }, { "epoch": 0.19731775328641615, "grad_norm": 1.0462206197157178, "learning_rate": 1.9871853585019446e-05, "loss": 0.3890243172645569, "step": 743 }, { "epoch": 0.1975833222679591, "grad_norm": 0.9914096392216383, "learning_rate": 1.9871151964855013e-05, "loss": 0.34914374351501465, "step": 744 }, { "epoch": 0.19784889124950206, "grad_norm": 1.0157439665946277, "learning_rate": 1.9870448441648905e-05, "loss": 0.41009777784347534, "step": 745 }, { "epoch": 0.198114460231045, "grad_norm": 1.0725931773033663, "learning_rate": 1.9869743015536747e-05, "loss": 0.39449363946914673, "step": 746 }, { "epoch": 0.19838002921258796, "grad_norm": 1.081644116196219, "learning_rate": 1.9869035686654538e-05, "loss": 0.3530065417289734, "step": 747 }, { "epoch": 0.1986455981941309, "grad_norm": 1.1338420898560146, "learning_rate": 1.986832645513864e-05, "loss": 0.4255196154117584, "step": 748 }, { "epoch": 0.1989111671756739, "grad_norm": 1.0625457917520444, "learning_rate": 1.9867615321125796e-05, "loss": 0.3921143114566803, "step": 749 }, { "epoch": 0.19917673615721684, "grad_norm": 1.1076371778966394, "learning_rate": 1.986690228475309e-05, "loss": 0.4157381057739258, "step": 750 }, { "epoch": 0.1994423051387598, "grad_norm": 0.9887260401437288, "learning_rate": 1.986618734615799e-05, "loss": 0.3922047019004822, "step": 751 }, { "epoch": 0.19970787412030275, "grad_norm": 1.2477225666156357, "learning_rate": 1.9865470505478335e-05, "loss": 0.4378710985183716, "step": 752 }, { "epoch": 0.1999734431018457, "grad_norm": 0.9960415180367619, "learning_rate": 1.986475176285232e-05, "loss": 0.3636753261089325, "step": 753 }, { "epoch": 0.20023901208338865, "grad_norm": 1.0691751577172293, "learning_rate": 1.986403111841851e-05, "loss": 0.3509834408760071, "step": 754 }, { "epoch": 0.2005045810649316, "grad_norm": 0.9490438891131449, "learning_rate": 1.986330857231583e-05, "loss": 0.3539624512195587, "step": 755 }, { "epoch": 0.20077015004647458, "grad_norm": 1.002849163142055, "learning_rate": 1.9862584124683587e-05, "loss": 0.417904257774353, "step": 756 }, { "epoch": 0.20103571902801753, "grad_norm": 0.9438738740406134, "learning_rate": 1.9861857775661442e-05, "loss": 0.3602277636528015, "step": 757 }, { "epoch": 0.2013012880095605, "grad_norm": 1.0703002408877305, "learning_rate": 1.986112952538943e-05, "loss": 0.41064661741256714, "step": 758 }, { "epoch": 0.20156685699110344, "grad_norm": 0.9789269746167363, "learning_rate": 1.9860399374007944e-05, "loss": 0.36313754320144653, "step": 759 }, { "epoch": 0.2018324259726464, "grad_norm": 1.0711706181502203, "learning_rate": 1.9859667321657755e-05, "loss": 0.39497628808021545, "step": 760 }, { "epoch": 0.20209799495418934, "grad_norm": 1.0173001682725575, "learning_rate": 1.9858933368479987e-05, "loss": 0.405613511800766, "step": 761 }, { "epoch": 0.2023635639357323, "grad_norm": 0.9881458101524105, "learning_rate": 1.9858197514616142e-05, "loss": 0.39093440771102905, "step": 762 }, { "epoch": 0.20262913291727527, "grad_norm": 1.0330584509521943, "learning_rate": 1.9857459760208084e-05, "loss": 0.39908382296562195, "step": 763 }, { "epoch": 0.20289470189881822, "grad_norm": 0.9416263868211369, "learning_rate": 1.9856720105398038e-05, "loss": 0.36787620186805725, "step": 764 }, { "epoch": 0.20316027088036118, "grad_norm": 1.0128388377672763, "learning_rate": 1.985597855032861e-05, "loss": 0.390550822019577, "step": 765 }, { "epoch": 0.20342583986190413, "grad_norm": 1.115759431869763, "learning_rate": 1.9855235095142754e-05, "loss": 0.4191611409187317, "step": 766 }, { "epoch": 0.20369140884344708, "grad_norm": 1.1288935622655036, "learning_rate": 1.985448973998381e-05, "loss": 0.4060766100883484, "step": 767 }, { "epoch": 0.20395697782499003, "grad_norm": 1.055264696895727, "learning_rate": 1.985374248499546e-05, "loss": 0.3906163275241852, "step": 768 }, { "epoch": 0.20422254680653298, "grad_norm": 1.0101644212894914, "learning_rate": 1.9852993330321774e-05, "loss": 0.3926839828491211, "step": 769 }, { "epoch": 0.20448811578807596, "grad_norm": 1.0474151984911524, "learning_rate": 1.9852242276107182e-05, "loss": 0.37276068329811096, "step": 770 }, { "epoch": 0.20475368476961892, "grad_norm": 0.9531396793135881, "learning_rate": 1.9851489322496476e-05, "loss": 0.3765360414981842, "step": 771 }, { "epoch": 0.20501925375116187, "grad_norm": 1.0017274873228423, "learning_rate": 1.9850734469634815e-05, "loss": 0.35091257095336914, "step": 772 }, { "epoch": 0.20528482273270482, "grad_norm": 1.1164065944268338, "learning_rate": 1.9849977717667725e-05, "loss": 0.4259791076183319, "step": 773 }, { "epoch": 0.20555039171424777, "grad_norm": 0.9939508272565134, "learning_rate": 1.9849219066741102e-05, "loss": 0.3563114404678345, "step": 774 }, { "epoch": 0.20581596069579072, "grad_norm": 1.0814350606971046, "learning_rate": 1.9848458517001203e-05, "loss": 0.4148223102092743, "step": 775 }, { "epoch": 0.20608152967733367, "grad_norm": 1.0296405515766518, "learning_rate": 1.9847696068594655e-05, "loss": 0.3817785382270813, "step": 776 }, { "epoch": 0.20634709865887665, "grad_norm": 1.115875170640065, "learning_rate": 1.984693172166845e-05, "loss": 0.41741886734962463, "step": 777 }, { "epoch": 0.2066126676404196, "grad_norm": 1.0479957521256793, "learning_rate": 1.9846165476369938e-05, "loss": 0.34800025820732117, "step": 778 }, { "epoch": 0.20687823662196256, "grad_norm": 1.0122784392492805, "learning_rate": 1.9845397332846848e-05, "loss": 0.38093405961990356, "step": 779 }, { "epoch": 0.2071438056035055, "grad_norm": 1.0953515150858002, "learning_rate": 1.9844627291247268e-05, "loss": 0.40733009576797485, "step": 780 }, { "epoch": 0.20740937458504846, "grad_norm": 1.1011295166986532, "learning_rate": 1.9843855351719655e-05, "loss": 0.3829066753387451, "step": 781 }, { "epoch": 0.2076749435665914, "grad_norm": 1.0316161170996605, "learning_rate": 1.9843081514412827e-05, "loss": 0.3574868440628052, "step": 782 }, { "epoch": 0.20794051254813437, "grad_norm": 1.071531696766489, "learning_rate": 1.984230577947597e-05, "loss": 0.3675144612789154, "step": 783 }, { "epoch": 0.20820608152967734, "grad_norm": 0.9982781618225591, "learning_rate": 1.9841528147058638e-05, "loss": 0.36120525002479553, "step": 784 }, { "epoch": 0.2084716505112203, "grad_norm": 1.0016427535647234, "learning_rate": 1.984074861731075e-05, "loss": 0.3651392459869385, "step": 785 }, { "epoch": 0.20873721949276325, "grad_norm": 1.1254815799645344, "learning_rate": 1.983996719038259e-05, "loss": 0.4204651117324829, "step": 786 }, { "epoch": 0.2090027884743062, "grad_norm": 1.0600310007301286, "learning_rate": 1.9839183866424806e-05, "loss": 0.4452149271965027, "step": 787 }, { "epoch": 0.20926835745584915, "grad_norm": 1.000047138771705, "learning_rate": 1.9838398645588418e-05, "loss": 0.3931270241737366, "step": 788 }, { "epoch": 0.2095339264373921, "grad_norm": 1.0009892054118905, "learning_rate": 1.98376115280248e-05, "loss": 0.3680538535118103, "step": 789 }, { "epoch": 0.20979949541893506, "grad_norm": 0.9848864128393906, "learning_rate": 1.9836822513885704e-05, "loss": 0.3766820728778839, "step": 790 }, { "epoch": 0.21006506440047804, "grad_norm": 1.0494510099931045, "learning_rate": 1.9836031603323245e-05, "loss": 0.3602439761161804, "step": 791 }, { "epoch": 0.210330633382021, "grad_norm": 0.9790632198207762, "learning_rate": 1.98352387964899e-05, "loss": 0.38925549387931824, "step": 792 }, { "epoch": 0.21059620236356394, "grad_norm": 1.0121548586068807, "learning_rate": 1.9834444093538504e-05, "loss": 0.3569640517234802, "step": 793 }, { "epoch": 0.2108617713451069, "grad_norm": 1.0171085592107372, "learning_rate": 1.9833647494622275e-05, "loss": 0.3543340265750885, "step": 794 }, { "epoch": 0.21112734032664984, "grad_norm": 1.0426744340585967, "learning_rate": 1.983284899989479e-05, "loss": 0.37313222885131836, "step": 795 }, { "epoch": 0.2113929093081928, "grad_norm": 1.0940501026222131, "learning_rate": 1.983204860950998e-05, "loss": 0.3874257802963257, "step": 796 }, { "epoch": 0.21165847828973575, "grad_norm": 1.005805069630653, "learning_rate": 1.983124632362216e-05, "loss": 0.3815164864063263, "step": 797 }, { "epoch": 0.21192404727127873, "grad_norm": 1.0879143214156584, "learning_rate": 1.9830442142386e-05, "loss": 0.39476731419563293, "step": 798 }, { "epoch": 0.21218961625282168, "grad_norm": 1.0888281701524323, "learning_rate": 1.9829636065956527e-05, "loss": 0.399338036775589, "step": 799 }, { "epoch": 0.21245518523436463, "grad_norm": 1.0679987938098825, "learning_rate": 1.9828828094489157e-05, "loss": 0.3940344452857971, "step": 800 }, { "epoch": 0.21272075421590758, "grad_norm": 1.0124680733329086, "learning_rate": 1.9828018228139647e-05, "loss": 0.35597044229507446, "step": 801 }, { "epoch": 0.21298632319745053, "grad_norm": 1.197291261672491, "learning_rate": 1.9827206467064133e-05, "loss": 0.4309435784816742, "step": 802 }, { "epoch": 0.21325189217899349, "grad_norm": 1.0158009285134544, "learning_rate": 1.9826392811419113e-05, "loss": 0.37327438592910767, "step": 803 }, { "epoch": 0.21351746116053644, "grad_norm": 0.9944187944281718, "learning_rate": 1.9825577261361454e-05, "loss": 0.35214242339134216, "step": 804 }, { "epoch": 0.21378303014207942, "grad_norm": 1.1575422458756877, "learning_rate": 1.982475981704838e-05, "loss": 0.41114968061447144, "step": 805 }, { "epoch": 0.21404859912362237, "grad_norm": 0.9719994027948292, "learning_rate": 1.9823940478637486e-05, "loss": 0.3632299304008484, "step": 806 }, { "epoch": 0.21431416810516532, "grad_norm": 1.1699036102992622, "learning_rate": 1.9823119246286727e-05, "loss": 0.39640772342681885, "step": 807 }, { "epoch": 0.21457973708670827, "grad_norm": 1.002397111320771, "learning_rate": 1.9822296120154433e-05, "loss": 0.39356929063796997, "step": 808 }, { "epoch": 0.21484530606825122, "grad_norm": 1.061754718166072, "learning_rate": 1.9821471100399294e-05, "loss": 0.3710761070251465, "step": 809 }, { "epoch": 0.21511087504979418, "grad_norm": 0.9713246248834058, "learning_rate": 1.9820644187180354e-05, "loss": 0.35515087842941284, "step": 810 }, { "epoch": 0.21537644403133713, "grad_norm": 1.0166244205196049, "learning_rate": 1.981981538065704e-05, "loss": 0.3803205192089081, "step": 811 }, { "epoch": 0.2156420130128801, "grad_norm": 1.0421456761704733, "learning_rate": 1.9818984680989134e-05, "loss": 0.40275394916534424, "step": 812 }, { "epoch": 0.21590758199442306, "grad_norm": 1.0872785008811605, "learning_rate": 1.9818152088336786e-05, "loss": 0.3711051344871521, "step": 813 }, { "epoch": 0.216173150975966, "grad_norm": 1.0872190904032264, "learning_rate": 1.9817317602860512e-05, "loss": 0.4198985695838928, "step": 814 }, { "epoch": 0.21643871995750896, "grad_norm": 0.9931448766878032, "learning_rate": 1.9816481224721185e-05, "loss": 0.38333773612976074, "step": 815 }, { "epoch": 0.21670428893905191, "grad_norm": 1.1679000778390602, "learning_rate": 1.9815642954080055e-05, "loss": 0.3959774971008301, "step": 816 }, { "epoch": 0.21696985792059487, "grad_norm": 1.1013876458182361, "learning_rate": 1.9814802791098728e-05, "loss": 0.3475337326526642, "step": 817 }, { "epoch": 0.21723542690213782, "grad_norm": 1.06867842878894, "learning_rate": 1.981396073593918e-05, "loss": 0.369370698928833, "step": 818 }, { "epoch": 0.2175009958836808, "grad_norm": 1.085763343280496, "learning_rate": 1.9813116788763744e-05, "loss": 0.3515776991844177, "step": 819 }, { "epoch": 0.21776656486522375, "grad_norm": 1.0780206278908893, "learning_rate": 1.9812270949735124e-05, "loss": 0.3637402355670929, "step": 820 }, { "epoch": 0.2180321338467667, "grad_norm": 1.0342672695189807, "learning_rate": 1.9811423219016395e-05, "loss": 0.3930947780609131, "step": 821 }, { "epoch": 0.21829770282830965, "grad_norm": 1.102521832922822, "learning_rate": 1.981057359677098e-05, "loss": 0.40081048011779785, "step": 822 }, { "epoch": 0.2185632718098526, "grad_norm": 1.0386373096164698, "learning_rate": 1.9809722083162682e-05, "loss": 0.3831724226474762, "step": 823 }, { "epoch": 0.21882884079139556, "grad_norm": 1.0516274934858763, "learning_rate": 1.9808868678355662e-05, "loss": 0.3919270932674408, "step": 824 }, { "epoch": 0.2190944097729385, "grad_norm": 1.0623138704484363, "learning_rate": 1.9808013382514448e-05, "loss": 0.41782522201538086, "step": 825 }, { "epoch": 0.2193599787544815, "grad_norm": 1.0570337251212087, "learning_rate": 1.9807156195803926e-05, "loss": 0.3751329779624939, "step": 826 }, { "epoch": 0.21962554773602444, "grad_norm": 1.0009279652164118, "learning_rate": 1.9806297118389353e-05, "loss": 0.36451685428619385, "step": 827 }, { "epoch": 0.2198911167175674, "grad_norm": 1.1911804759546862, "learning_rate": 1.9805436150436352e-05, "loss": 0.3924056887626648, "step": 828 }, { "epoch": 0.22015668569911034, "grad_norm": 0.9887238598202497, "learning_rate": 1.9804573292110906e-05, "loss": 0.34744757413864136, "step": 829 }, { "epoch": 0.2204222546806533, "grad_norm": 1.1506637434477502, "learning_rate": 1.980370854357936e-05, "loss": 0.4162982702255249, "step": 830 }, { "epoch": 0.22068782366219625, "grad_norm": 1.103994708633239, "learning_rate": 1.9802841905008434e-05, "loss": 0.36572596430778503, "step": 831 }, { "epoch": 0.2209533926437392, "grad_norm": 1.0028116020560682, "learning_rate": 1.98019733765652e-05, "loss": 0.3535170555114746, "step": 832 }, { "epoch": 0.22121896162528218, "grad_norm": 1.061392974987333, "learning_rate": 1.9801102958417107e-05, "loss": 0.3906480073928833, "step": 833 }, { "epoch": 0.22148453060682513, "grad_norm": 1.0646039703833918, "learning_rate": 1.980023065073195e-05, "loss": 0.34185755252838135, "step": 834 }, { "epoch": 0.22175009958836808, "grad_norm": 1.1983506875652454, "learning_rate": 1.9799356453677913e-05, "loss": 0.4216359853744507, "step": 835 }, { "epoch": 0.22201566856991103, "grad_norm": 1.038756499639493, "learning_rate": 1.979848036742352e-05, "loss": 0.365469366312027, "step": 836 }, { "epoch": 0.222281237551454, "grad_norm": 1.0128951338762324, "learning_rate": 1.9797602392137678e-05, "loss": 0.3570204973220825, "step": 837 }, { "epoch": 0.22254680653299694, "grad_norm": 1.0221196075964396, "learning_rate": 1.9796722527989646e-05, "loss": 0.3929975926876068, "step": 838 }, { "epoch": 0.2228123755145399, "grad_norm": 1.1512146064832047, "learning_rate": 1.979584077514905e-05, "loss": 0.39064258337020874, "step": 839 }, { "epoch": 0.22307794449608287, "grad_norm": 1.0559333522375243, "learning_rate": 1.9794957133785884e-05, "loss": 0.3626471757888794, "step": 840 }, { "epoch": 0.22334351347762582, "grad_norm": 1.0867316997584564, "learning_rate": 1.9794071604070506e-05, "loss": 0.4337238371372223, "step": 841 }, { "epoch": 0.22360908245916877, "grad_norm": 0.9358033183445809, "learning_rate": 1.9793184186173632e-05, "loss": 0.3361967206001282, "step": 842 }, { "epoch": 0.22387465144071172, "grad_norm": 0.961043072021178, "learning_rate": 1.9792294880266346e-05, "loss": 0.3429332971572876, "step": 843 }, { "epoch": 0.22414022042225468, "grad_norm": 1.012773989217256, "learning_rate": 1.97914036865201e-05, "loss": 0.39196616411209106, "step": 844 }, { "epoch": 0.22440578940379763, "grad_norm": 1.1250916546708978, "learning_rate": 1.9790510605106697e-05, "loss": 0.3763045072555542, "step": 845 }, { "epoch": 0.22467135838534058, "grad_norm": 1.1139610172600873, "learning_rate": 1.978961563619832e-05, "loss": 0.41614070534706116, "step": 846 }, { "epoch": 0.22493692736688356, "grad_norm": 1.065347693165354, "learning_rate": 1.9788718779967506e-05, "loss": 0.3834165334701538, "step": 847 }, { "epoch": 0.2252024963484265, "grad_norm": 0.9834992911039661, "learning_rate": 1.978782003658716e-05, "loss": 0.3552364110946655, "step": 848 }, { "epoch": 0.22546806532996946, "grad_norm": 1.0365749744504318, "learning_rate": 1.9786919406230544e-05, "loss": 0.3857925534248352, "step": 849 }, { "epoch": 0.22573363431151242, "grad_norm": 1.0779836727772776, "learning_rate": 1.9786016889071294e-05, "loss": 0.3501393795013428, "step": 850 }, { "epoch": 0.22599920329305537, "grad_norm": 1.1363104904390704, "learning_rate": 1.9785112485283404e-05, "loss": 0.36280643939971924, "step": 851 }, { "epoch": 0.22626477227459832, "grad_norm": 1.1791591930929934, "learning_rate": 1.978420619504123e-05, "loss": 0.3713894486427307, "step": 852 }, { "epoch": 0.22653034125614127, "grad_norm": 1.0682718312185442, "learning_rate": 1.97832980185195e-05, "loss": 0.3668733537197113, "step": 853 }, { "epoch": 0.22679591023768425, "grad_norm": 1.06232834606136, "learning_rate": 1.978238795589329e-05, "loss": 0.4054701626300812, "step": 854 }, { "epoch": 0.2270614792192272, "grad_norm": 1.1024819375758403, "learning_rate": 1.9781476007338058e-05, "loss": 0.3824681043624878, "step": 855 }, { "epoch": 0.22732704820077015, "grad_norm": 1.0604830101195206, "learning_rate": 1.978056217302961e-05, "loss": 0.4009544253349304, "step": 856 }, { "epoch": 0.2275926171823131, "grad_norm": 1.0150812264671392, "learning_rate": 1.9779646453144133e-05, "loss": 0.34773316979408264, "step": 857 }, { "epoch": 0.22785818616385606, "grad_norm": 1.0737509474924387, "learning_rate": 1.977872884785815e-05, "loss": 0.4067278206348419, "step": 858 }, { "epoch": 0.228123755145399, "grad_norm": 1.0566398666110703, "learning_rate": 1.9777809357348584e-05, "loss": 0.3843458890914917, "step": 859 }, { "epoch": 0.22838932412694196, "grad_norm": 1.083451143522079, "learning_rate": 1.977688798179269e-05, "loss": 0.4261704683303833, "step": 860 }, { "epoch": 0.22865489310848494, "grad_norm": 1.0145015740681522, "learning_rate": 1.9775964721368098e-05, "loss": 0.39109086990356445, "step": 861 }, { "epoch": 0.2289204620900279, "grad_norm": 1.1472642326588585, "learning_rate": 1.9775039576252807e-05, "loss": 0.39436954259872437, "step": 862 }, { "epoch": 0.22918603107157084, "grad_norm": 0.9770870267905873, "learning_rate": 1.9774112546625168e-05, "loss": 0.3787967562675476, "step": 863 }, { "epoch": 0.2294516000531138, "grad_norm": 1.5071435779935147, "learning_rate": 1.9773183632663907e-05, "loss": 0.3729320466518402, "step": 864 }, { "epoch": 0.22971716903465675, "grad_norm": 1.0048578103437809, "learning_rate": 1.9772252834548108e-05, "loss": 0.3817081153392792, "step": 865 }, { "epoch": 0.2299827380161997, "grad_norm": 0.9709592169890221, "learning_rate": 1.9771320152457212e-05, "loss": 0.3362218737602234, "step": 866 }, { "epoch": 0.23024830699774265, "grad_norm": 1.0194192402395448, "learning_rate": 1.9770385586571033e-05, "loss": 0.37274059653282166, "step": 867 }, { "epoch": 0.23051387597928563, "grad_norm": 1.058710969457703, "learning_rate": 1.9769449137069746e-05, "loss": 0.3832330107688904, "step": 868 }, { "epoch": 0.23077944496082858, "grad_norm": 0.9857605594513371, "learning_rate": 1.9768510804133886e-05, "loss": 0.37420010566711426, "step": 869 }, { "epoch": 0.23104501394237154, "grad_norm": 1.0333482020677847, "learning_rate": 1.976757058794435e-05, "loss": 0.35314565896987915, "step": 870 }, { "epoch": 0.2313105829239145, "grad_norm": 1.0404097802666386, "learning_rate": 1.97666284886824e-05, "loss": 0.34667372703552246, "step": 871 }, { "epoch": 0.23157615190545744, "grad_norm": 1.1826768759617956, "learning_rate": 1.976568450652967e-05, "loss": 0.3465980589389801, "step": 872 }, { "epoch": 0.2318417208870004, "grad_norm": 1.6479387485919323, "learning_rate": 1.9764738641668137e-05, "loss": 0.40539389848709106, "step": 873 }, { "epoch": 0.23210728986854334, "grad_norm": 1.090454596374008, "learning_rate": 1.976379089428016e-05, "loss": 0.35154545307159424, "step": 874 }, { "epoch": 0.23237285885008632, "grad_norm": 1.1033163387519414, "learning_rate": 1.9762841264548453e-05, "loss": 0.39748087525367737, "step": 875 }, { "epoch": 0.23263842783162927, "grad_norm": 1.0600221119400453, "learning_rate": 1.976188975265609e-05, "loss": 0.41628387570381165, "step": 876 }, { "epoch": 0.23290399681317223, "grad_norm": 1.0805125037340586, "learning_rate": 1.976093635878652e-05, "loss": 0.4076233208179474, "step": 877 }, { "epoch": 0.23316956579471518, "grad_norm": 0.9221839355888705, "learning_rate": 1.9759981083123533e-05, "loss": 0.3262259364128113, "step": 878 }, { "epoch": 0.23343513477625813, "grad_norm": 1.1690018828805817, "learning_rate": 1.9759023925851302e-05, "loss": 0.36561673879623413, "step": 879 }, { "epoch": 0.23370070375780108, "grad_norm": 1.083829918240926, "learning_rate": 1.9758064887154358e-05, "loss": 0.36661773920059204, "step": 880 }, { "epoch": 0.23396627273934403, "grad_norm": 1.0655263771494812, "learning_rate": 1.9757103967217587e-05, "loss": 0.34671685099601746, "step": 881 }, { "epoch": 0.234231841720887, "grad_norm": 1.0056372913167473, "learning_rate": 1.9756141166226246e-05, "loss": 0.3486331105232239, "step": 882 }, { "epoch": 0.23449741070242996, "grad_norm": 1.1177836982205323, "learning_rate": 1.9755176484365953e-05, "loss": 0.3883505165576935, "step": 883 }, { "epoch": 0.23476297968397292, "grad_norm": 1.0548520245203914, "learning_rate": 1.9754209921822683e-05, "loss": 0.3832106590270996, "step": 884 }, { "epoch": 0.23502854866551587, "grad_norm": 1.078830112662993, "learning_rate": 1.975324147878278e-05, "loss": 0.37876033782958984, "step": 885 }, { "epoch": 0.23529411764705882, "grad_norm": 1.0689289829128008, "learning_rate": 1.975227115543295e-05, "loss": 0.38931846618652344, "step": 886 }, { "epoch": 0.23555968662860177, "grad_norm": 0.956721500767322, "learning_rate": 1.9751298951960258e-05, "loss": 0.3581021726131439, "step": 887 }, { "epoch": 0.23582525561014472, "grad_norm": 1.0206944172292924, "learning_rate": 1.9750324868552133e-05, "loss": 0.35196465253829956, "step": 888 }, { "epoch": 0.2360908245916877, "grad_norm": 0.9996206423870837, "learning_rate": 1.974934890539637e-05, "loss": 0.3635658025741577, "step": 889 }, { "epoch": 0.23635639357323066, "grad_norm": 0.9523927655707425, "learning_rate": 1.9748371062681122e-05, "loss": 0.345594197511673, "step": 890 }, { "epoch": 0.2366219625547736, "grad_norm": 1.0443032231121456, "learning_rate": 1.97473913405949e-05, "loss": 0.357181191444397, "step": 891 }, { "epoch": 0.23688753153631656, "grad_norm": 1.0008000126392016, "learning_rate": 1.974640973932659e-05, "loss": 0.3264622986316681, "step": 892 }, { "epoch": 0.2371531005178595, "grad_norm": 0.9731630083329554, "learning_rate": 1.9745426259065434e-05, "loss": 0.37950894236564636, "step": 893 }, { "epoch": 0.23741866949940246, "grad_norm": 1.1493289415276364, "learning_rate": 1.9744440900001027e-05, "loss": 0.37400782108306885, "step": 894 }, { "epoch": 0.23768423848094541, "grad_norm": 1.0325785235739895, "learning_rate": 1.974345366232334e-05, "loss": 0.3455463945865631, "step": 895 }, { "epoch": 0.2379498074624884, "grad_norm": 1.1059511993758653, "learning_rate": 1.9742464546222702e-05, "loss": 0.3605351150035858, "step": 896 }, { "epoch": 0.23821537644403135, "grad_norm": 0.9763906212855142, "learning_rate": 1.97414735518898e-05, "loss": 0.3839051127433777, "step": 897 }, { "epoch": 0.2384809454255743, "grad_norm": 1.0304758127284366, "learning_rate": 1.974048067951569e-05, "loss": 0.34562867879867554, "step": 898 }, { "epoch": 0.23874651440711725, "grad_norm": 1.1332867443652592, "learning_rate": 1.9739485929291778e-05, "loss": 0.3986506760120392, "step": 899 }, { "epoch": 0.2390120833886602, "grad_norm": 1.1598961775072092, "learning_rate": 1.9738489301409848e-05, "loss": 0.3955162465572357, "step": 900 }, { "epoch": 0.23927765237020315, "grad_norm": 1.080226447361195, "learning_rate": 1.9737490796062036e-05, "loss": 0.370066374540329, "step": 901 }, { "epoch": 0.2395432213517461, "grad_norm": 1.0637004733407822, "learning_rate": 1.973649041344084e-05, "loss": 0.3777826726436615, "step": 902 }, { "epoch": 0.23980879033328908, "grad_norm": 1.1358293788080334, "learning_rate": 1.9735488153739128e-05, "loss": 0.327572226524353, "step": 903 }, { "epoch": 0.24007435931483204, "grad_norm": 1.071729158749965, "learning_rate": 1.973448401715011e-05, "loss": 0.3921743929386139, "step": 904 }, { "epoch": 0.240339928296375, "grad_norm": 1.0635179670685195, "learning_rate": 1.973347800386739e-05, "loss": 0.3683379888534546, "step": 905 }, { "epoch": 0.24060549727791794, "grad_norm": 1.023832589054702, "learning_rate": 1.9732470114084905e-05, "loss": 0.390872597694397, "step": 906 }, { "epoch": 0.2408710662594609, "grad_norm": 1.0814023137489452, "learning_rate": 1.9731460347996964e-05, "loss": 0.3772459626197815, "step": 907 }, { "epoch": 0.24113663524100384, "grad_norm": 1.0280982913686894, "learning_rate": 1.973044870579824e-05, "loss": 0.37990954518318176, "step": 908 }, { "epoch": 0.2414022042225468, "grad_norm": 1.0035238419205756, "learning_rate": 1.972943518768377e-05, "loss": 0.3380817770957947, "step": 909 }, { "epoch": 0.24166777320408978, "grad_norm": 0.9879847056007396, "learning_rate": 1.9728419793848935e-05, "loss": 0.3348115384578705, "step": 910 }, { "epoch": 0.24193334218563273, "grad_norm": 1.0561235323428824, "learning_rate": 1.9727402524489505e-05, "loss": 0.36936551332473755, "step": 911 }, { "epoch": 0.24219891116717568, "grad_norm": 1.0744513063457712, "learning_rate": 1.9726383379801593e-05, "loss": 0.3871539235115051, "step": 912 }, { "epoch": 0.24246448014871863, "grad_norm": 1.0904556770971818, "learning_rate": 1.9725362359981676e-05, "loss": 0.37087059020996094, "step": 913 }, { "epoch": 0.24273004913026158, "grad_norm": 0.9802916629421812, "learning_rate": 1.9724339465226595e-05, "loss": 0.35582688450813293, "step": 914 }, { "epoch": 0.24299561811180453, "grad_norm": 1.0947021466091125, "learning_rate": 1.9723314695733557e-05, "loss": 0.38500669598579407, "step": 915 }, { "epoch": 0.2432611870933475, "grad_norm": 0.9834121517145057, "learning_rate": 1.9722288051700116e-05, "loss": 0.32470762729644775, "step": 916 }, { "epoch": 0.24352675607489047, "grad_norm": 1.0805011919993295, "learning_rate": 1.9721259533324207e-05, "loss": 0.3822774589061737, "step": 917 }, { "epoch": 0.24379232505643342, "grad_norm": 0.9937398719966192, "learning_rate": 1.972022914080411e-05, "loss": 0.38374873995780945, "step": 918 }, { "epoch": 0.24405789403797637, "grad_norm": 1.0550770033370775, "learning_rate": 1.9719196874338472e-05, "loss": 0.3419352173805237, "step": 919 }, { "epoch": 0.24432346301951932, "grad_norm": 1.0164630853495407, "learning_rate": 1.9718162734126308e-05, "loss": 0.3294275403022766, "step": 920 }, { "epoch": 0.24458903200106227, "grad_norm": 1.0668295499881337, "learning_rate": 1.9717126720366982e-05, "loss": 0.3585365414619446, "step": 921 }, { "epoch": 0.24485460098260522, "grad_norm": 1.0609325079201495, "learning_rate": 1.9716088833260225e-05, "loss": 0.38130316138267517, "step": 922 }, { "epoch": 0.24512016996414818, "grad_norm": 1.0577067392982809, "learning_rate": 1.9715049073006133e-05, "loss": 0.3745136260986328, "step": 923 }, { "epoch": 0.24538573894569116, "grad_norm": 1.0457228779122651, "learning_rate": 1.971400743980516e-05, "loss": 0.3771660327911377, "step": 924 }, { "epoch": 0.2456513079272341, "grad_norm": 1.0133861698501567, "learning_rate": 1.971296393385812e-05, "loss": 0.29661691188812256, "step": 925 }, { "epoch": 0.24591687690877706, "grad_norm": 0.9516714902458889, "learning_rate": 1.9711918555366184e-05, "loss": 0.33783960342407227, "step": 926 }, { "epoch": 0.24618244589032, "grad_norm": 1.2469460687001952, "learning_rate": 1.971087130453089e-05, "loss": 0.42983683943748474, "step": 927 }, { "epoch": 0.24644801487186296, "grad_norm": 0.9725914261438413, "learning_rate": 1.9709822181554142e-05, "loss": 0.32242363691329956, "step": 928 }, { "epoch": 0.24671358385340592, "grad_norm": 1.0989308968162201, "learning_rate": 1.970877118663819e-05, "loss": 0.3576955795288086, "step": 929 }, { "epoch": 0.24697915283494887, "grad_norm": 1.116595385391156, "learning_rate": 1.9707718319985663e-05, "loss": 0.4185359477996826, "step": 930 }, { "epoch": 0.24724472181649185, "grad_norm": 1.1178442474909813, "learning_rate": 1.970666358179953e-05, "loss": 0.35377705097198486, "step": 931 }, { "epoch": 0.2475102907980348, "grad_norm": 1.1350743092525455, "learning_rate": 1.9705606972283143e-05, "loss": 0.3860151171684265, "step": 932 }, { "epoch": 0.24777585977957775, "grad_norm": 1.1915035264404457, "learning_rate": 1.9704548491640195e-05, "loss": 0.39463168382644653, "step": 933 }, { "epoch": 0.2480414287611207, "grad_norm": 1.0462444044755623, "learning_rate": 1.9703488140074752e-05, "loss": 0.3670084774494171, "step": 934 }, { "epoch": 0.24830699774266365, "grad_norm": 1.2914788702644175, "learning_rate": 1.9702425917791242e-05, "loss": 0.388730525970459, "step": 935 }, { "epoch": 0.2485725667242066, "grad_norm": 1.128517931307855, "learning_rate": 1.970136182499444e-05, "loss": 0.38767656683921814, "step": 936 }, { "epoch": 0.24883813570574956, "grad_norm": 1.0771582387425684, "learning_rate": 1.9700295861889497e-05, "loss": 0.35394930839538574, "step": 937 }, { "epoch": 0.24910370468729254, "grad_norm": 1.0639329095738126, "learning_rate": 1.9699228028681917e-05, "loss": 0.3360324501991272, "step": 938 }, { "epoch": 0.2493692736688355, "grad_norm": 1.116621384383513, "learning_rate": 1.9698158325577563e-05, "loss": 0.390169233083725, "step": 939 }, { "epoch": 0.24963484265037844, "grad_norm": 1.108635788765439, "learning_rate": 1.9697086752782666e-05, "loss": 0.3921571671962738, "step": 940 }, { "epoch": 0.2499004116319214, "grad_norm": 1.0665933445619122, "learning_rate": 1.9696013310503808e-05, "loss": 0.3795739710330963, "step": 941 }, { "epoch": 0.25016598061346434, "grad_norm": 1.2202319167117164, "learning_rate": 1.9694937998947935e-05, "loss": 0.3891025185585022, "step": 942 }, { "epoch": 0.2504315495950073, "grad_norm": 0.9751921056908068, "learning_rate": 1.9693860818322357e-05, "loss": 0.3548225164413452, "step": 943 }, { "epoch": 0.25069711857655025, "grad_norm": 1.0555900207888067, "learning_rate": 1.9692781768834747e-05, "loss": 0.3696819543838501, "step": 944 }, { "epoch": 0.2509626875580932, "grad_norm": 1.1322184210541604, "learning_rate": 1.9691700850693126e-05, "loss": 0.3906037211418152, "step": 945 }, { "epoch": 0.25122825653963615, "grad_norm": 1.072434154806742, "learning_rate": 1.9690618064105883e-05, "loss": 0.38181206583976746, "step": 946 }, { "epoch": 0.2514938255211791, "grad_norm": 1.0644124497842522, "learning_rate": 1.9689533409281765e-05, "loss": 0.36904582381248474, "step": 947 }, { "epoch": 0.25175939450272206, "grad_norm": 1.097105891991116, "learning_rate": 1.9688446886429885e-05, "loss": 0.3635823130607605, "step": 948 }, { "epoch": 0.25202496348426506, "grad_norm": 0.9954310874837226, "learning_rate": 1.9687358495759713e-05, "loss": 0.3527260422706604, "step": 949 }, { "epoch": 0.252290532465808, "grad_norm": 1.1902017812011518, "learning_rate": 1.968626823748107e-05, "loss": 0.3781110346317291, "step": 950 }, { "epoch": 0.25255610144735097, "grad_norm": 1.0346217070487125, "learning_rate": 1.968517611180415e-05, "loss": 0.3931560814380646, "step": 951 }, { "epoch": 0.2528216704288939, "grad_norm": 1.0783245371828571, "learning_rate": 1.9684082118939503e-05, "loss": 0.39111074805259705, "step": 952 }, { "epoch": 0.25308723941043687, "grad_norm": 1.2090013193363973, "learning_rate": 1.9682986259098037e-05, "loss": 0.385967880487442, "step": 953 }, { "epoch": 0.2533528083919798, "grad_norm": 1.0103878099057118, "learning_rate": 1.9681888532491022e-05, "loss": 0.34006553888320923, "step": 954 }, { "epoch": 0.2536183773735228, "grad_norm": 1.0077784550534965, "learning_rate": 1.9680788939330086e-05, "loss": 0.36069998145103455, "step": 955 }, { "epoch": 0.2538839463550657, "grad_norm": 1.090649670414093, "learning_rate": 1.9679687479827212e-05, "loss": 0.3354898691177368, "step": 956 }, { "epoch": 0.2541495153366087, "grad_norm": 1.0691933766101984, "learning_rate": 1.9678584154194756e-05, "loss": 0.35667335987091064, "step": 957 }, { "epoch": 0.25441508431815163, "grad_norm": 1.2652121820599898, "learning_rate": 1.9677478962645422e-05, "loss": 0.4003029465675354, "step": 958 }, { "epoch": 0.2546806532996946, "grad_norm": 1.0313200756086844, "learning_rate": 1.9676371905392278e-05, "loss": 0.34397056698799133, "step": 959 }, { "epoch": 0.25494622228123753, "grad_norm": 1.0544706314753822, "learning_rate": 1.9675262982648757e-05, "loss": 0.35319578647613525, "step": 960 }, { "epoch": 0.2552117912627805, "grad_norm": 1.0179000224070893, "learning_rate": 1.967415219462864e-05, "loss": 0.34840327501296997, "step": 961 }, { "epoch": 0.25547736024432344, "grad_norm": 0.9360325612494472, "learning_rate": 1.9673039541546076e-05, "loss": 0.3298989534378052, "step": 962 }, { "epoch": 0.25574292922586644, "grad_norm": 1.0904225305922717, "learning_rate": 1.9671925023615572e-05, "loss": 0.38438719511032104, "step": 963 }, { "epoch": 0.2560084982074094, "grad_norm": 1.128608711014793, "learning_rate": 1.9670808641051994e-05, "loss": 0.3834493160247803, "step": 964 }, { "epoch": 0.25627406718895235, "grad_norm": 1.0456501331264114, "learning_rate": 1.9669690394070564e-05, "loss": 0.3713288903236389, "step": 965 }, { "epoch": 0.2565396361704953, "grad_norm": 1.0864184401996346, "learning_rate": 1.966857028288687e-05, "loss": 0.37564241886138916, "step": 966 }, { "epoch": 0.25680520515203825, "grad_norm": 1.0329676619050974, "learning_rate": 1.9667448307716857e-05, "loss": 0.30162689089775085, "step": 967 }, { "epoch": 0.2570707741335812, "grad_norm": 1.0948768995323135, "learning_rate": 1.9666324468776826e-05, "loss": 0.35969680547714233, "step": 968 }, { "epoch": 0.25733634311512416, "grad_norm": 1.206651724690857, "learning_rate": 1.9665198766283444e-05, "loss": 0.40947285294532776, "step": 969 }, { "epoch": 0.2576019120966671, "grad_norm": 1.0651964473806064, "learning_rate": 1.9664071200453726e-05, "loss": 0.35868343710899353, "step": 970 }, { "epoch": 0.25786748107821006, "grad_norm": 1.1330033214419297, "learning_rate": 1.966294177150506e-05, "loss": 0.3569234311580658, "step": 971 }, { "epoch": 0.258133050059753, "grad_norm": 1.1641224987322216, "learning_rate": 1.9661810479655184e-05, "loss": 0.3381764888763428, "step": 972 }, { "epoch": 0.25839861904129596, "grad_norm": 1.535927577191984, "learning_rate": 1.9660677325122196e-05, "loss": 0.39847785234451294, "step": 973 }, { "epoch": 0.2586641880228389, "grad_norm": 0.9608622914302752, "learning_rate": 1.965954230812456e-05, "loss": 0.33162468671798706, "step": 974 }, { "epoch": 0.25892975700438187, "grad_norm": 1.0421688584245348, "learning_rate": 1.9658405428881087e-05, "loss": 0.3627605438232422, "step": 975 }, { "epoch": 0.2591953259859248, "grad_norm": 1.0501672081861986, "learning_rate": 1.9657266687610965e-05, "loss": 0.3253796100616455, "step": 976 }, { "epoch": 0.2594608949674678, "grad_norm": 1.0198628618780734, "learning_rate": 1.9656126084533716e-05, "loss": 0.3341265916824341, "step": 977 }, { "epoch": 0.2597264639490108, "grad_norm": 1.0202967346949672, "learning_rate": 1.9654983619869242e-05, "loss": 0.3714970052242279, "step": 978 }, { "epoch": 0.25999203293055373, "grad_norm": 1.0333982958482495, "learning_rate": 1.9653839293837798e-05, "loss": 0.3360912501811981, "step": 979 }, { "epoch": 0.2602576019120967, "grad_norm": 1.0322459892827835, "learning_rate": 1.9652693106659995e-05, "loss": 0.3780854642391205, "step": 980 }, { "epoch": 0.26052317089363963, "grad_norm": 1.1062219940451128, "learning_rate": 1.9651545058556803e-05, "loss": 0.33595478534698486, "step": 981 }, { "epoch": 0.2607887398751826, "grad_norm": 1.111464982167328, "learning_rate": 1.965039514974955e-05, "loss": 0.3608357012271881, "step": 982 }, { "epoch": 0.26105430885672554, "grad_norm": 1.0024532391943957, "learning_rate": 1.964924338045993e-05, "loss": 0.3807666599750519, "step": 983 }, { "epoch": 0.2613198778382685, "grad_norm": 1.0213030373156555, "learning_rate": 1.964808975090999e-05, "loss": 0.3551647663116455, "step": 984 }, { "epoch": 0.26158544681981144, "grad_norm": 1.0761922389740786, "learning_rate": 1.9646934261322135e-05, "loss": 0.3771904706954956, "step": 985 }, { "epoch": 0.2618510158013544, "grad_norm": 1.1925998045571422, "learning_rate": 1.964577691191913e-05, "loss": 0.41103222966194153, "step": 986 }, { "epoch": 0.26211658478289734, "grad_norm": 1.0270282722515527, "learning_rate": 1.9644617702924093e-05, "loss": 0.34439292550086975, "step": 987 }, { "epoch": 0.2623821537644403, "grad_norm": 1.1578988390038234, "learning_rate": 1.9643456634560515e-05, "loss": 0.41214391589164734, "step": 988 }, { "epoch": 0.26264772274598325, "grad_norm": 0.9879567855265076, "learning_rate": 1.9642293707052232e-05, "loss": 0.3186502754688263, "step": 989 }, { "epoch": 0.2629132917275262, "grad_norm": 1.039224300824638, "learning_rate": 1.9641128920623438e-05, "loss": 0.3534559905529022, "step": 990 }, { "epoch": 0.2631788607090692, "grad_norm": 1.0867820667103292, "learning_rate": 1.96399622754987e-05, "loss": 0.35217320919036865, "step": 991 }, { "epoch": 0.26344442969061216, "grad_norm": 0.954421559413849, "learning_rate": 1.9638793771902924e-05, "loss": 0.31661587953567505, "step": 992 }, { "epoch": 0.2637099986721551, "grad_norm": 0.9881195075112362, "learning_rate": 1.9637623410061392e-05, "loss": 0.32468482851982117, "step": 993 }, { "epoch": 0.26397556765369806, "grad_norm": 1.0355017939200293, "learning_rate": 1.9636451190199727e-05, "loss": 0.346771776676178, "step": 994 }, { "epoch": 0.264241136635241, "grad_norm": 1.0997948902450267, "learning_rate": 1.9635277112543928e-05, "loss": 0.36409270763397217, "step": 995 }, { "epoch": 0.26450670561678397, "grad_norm": 1.2132528670947562, "learning_rate": 1.963410117732034e-05, "loss": 0.404967725276947, "step": 996 }, { "epoch": 0.2647722745983269, "grad_norm": 1.1962964423617835, "learning_rate": 1.9632923384755666e-05, "loss": 0.39506661891937256, "step": 997 }, { "epoch": 0.26503784357986987, "grad_norm": 1.1967751692769375, "learning_rate": 1.9631743735076972e-05, "loss": 0.3833203911781311, "step": 998 }, { "epoch": 0.2653034125614128, "grad_norm": 1.083140773107417, "learning_rate": 1.9630562228511682e-05, "loss": 0.34522518515586853, "step": 999 }, { "epoch": 0.2655689815429558, "grad_norm": 1.1367328076589556, "learning_rate": 1.962937886528758e-05, "loss": 0.3818400800228119, "step": 1000 }, { "epoch": 0.2658345505244987, "grad_norm": 1.2496699132911573, "learning_rate": 1.9628193645632796e-05, "loss": 0.40827828645706177, "step": 1001 }, { "epoch": 0.2661001195060417, "grad_norm": 1.0406728708542907, "learning_rate": 1.962700656977583e-05, "loss": 0.3448852002620697, "step": 1002 }, { "epoch": 0.26636568848758463, "grad_norm": 1.1035895986897222, "learning_rate": 1.9625817637945542e-05, "loss": 0.36560773849487305, "step": 1003 }, { "epoch": 0.2666312574691276, "grad_norm": 1.1637977684704512, "learning_rate": 1.962462685037114e-05, "loss": 0.38305893540382385, "step": 1004 }, { "epoch": 0.2668968264506706, "grad_norm": 1.0320363555261158, "learning_rate": 1.962343420728219e-05, "loss": 0.3562568426132202, "step": 1005 }, { "epoch": 0.26716239543221354, "grad_norm": 1.18312934129538, "learning_rate": 1.9622239708908626e-05, "loss": 0.37458860874176025, "step": 1006 }, { "epoch": 0.2674279644137565, "grad_norm": 1.058042672523148, "learning_rate": 1.9621043355480726e-05, "loss": 0.35852503776550293, "step": 1007 }, { "epoch": 0.26769353339529944, "grad_norm": 1.0975239398171568, "learning_rate": 1.961984514722914e-05, "loss": 0.4056578278541565, "step": 1008 }, { "epoch": 0.2679591023768424, "grad_norm": 1.1773057151207822, "learning_rate": 1.9618645084384863e-05, "loss": 0.4531296491622925, "step": 1009 }, { "epoch": 0.26822467135838535, "grad_norm": 0.9095840908563808, "learning_rate": 1.9617443167179256e-05, "loss": 0.3356376886367798, "step": 1010 }, { "epoch": 0.2684902403399283, "grad_norm": 1.09880831555839, "learning_rate": 1.9616239395844033e-05, "loss": 0.38045161962509155, "step": 1011 }, { "epoch": 0.26875580932147125, "grad_norm": 1.028451509847456, "learning_rate": 1.9615033770611268e-05, "loss": 0.3549511730670929, "step": 1012 }, { "epoch": 0.2690213783030142, "grad_norm": 1.0546213631772847, "learning_rate": 1.9613826291713393e-05, "loss": 0.33363252878189087, "step": 1013 }, { "epoch": 0.26928694728455715, "grad_norm": 0.9539256345754278, "learning_rate": 1.961261695938319e-05, "loss": 0.3443339467048645, "step": 1014 }, { "epoch": 0.2695525162661001, "grad_norm": 0.9897755385014708, "learning_rate": 1.9611405773853807e-05, "loss": 0.3258364796638489, "step": 1015 }, { "epoch": 0.26981808524764306, "grad_norm": 1.0357196980681809, "learning_rate": 1.961019273535875e-05, "loss": 0.357122540473938, "step": 1016 }, { "epoch": 0.270083654229186, "grad_norm": 0.9668495504097999, "learning_rate": 1.9608977844131875e-05, "loss": 0.32092082500457764, "step": 1017 }, { "epoch": 0.27034922321072896, "grad_norm": 1.0067299219043435, "learning_rate": 1.96077611004074e-05, "loss": 0.36354511976242065, "step": 1018 }, { "epoch": 0.27061479219227197, "grad_norm": 1.0982243281899924, "learning_rate": 1.9606542504419895e-05, "loss": 0.37128758430480957, "step": 1019 }, { "epoch": 0.2708803611738149, "grad_norm": 1.1112959838703056, "learning_rate": 1.9605322056404294e-05, "loss": 0.3732859790325165, "step": 1020 }, { "epoch": 0.2711459301553579, "grad_norm": 1.0058814849372155, "learning_rate": 1.9604099756595885e-05, "loss": 0.32642674446105957, "step": 1021 }, { "epoch": 0.2714114991369008, "grad_norm": 1.10371255398192, "learning_rate": 1.9602875605230313e-05, "loss": 0.376791775226593, "step": 1022 }, { "epoch": 0.2716770681184438, "grad_norm": 1.0603007725295257, "learning_rate": 1.960164960254358e-05, "loss": 0.34514784812927246, "step": 1023 }, { "epoch": 0.27194263709998673, "grad_norm": 1.225533197470795, "learning_rate": 1.9600421748772044e-05, "loss": 0.3752189576625824, "step": 1024 }, { "epoch": 0.2722082060815297, "grad_norm": 1.0783483670765837, "learning_rate": 1.959919204415242e-05, "loss": 0.33100831508636475, "step": 1025 }, { "epoch": 0.27247377506307263, "grad_norm": 1.1910668751599112, "learning_rate": 1.9597960488921785e-05, "loss": 0.42713654041290283, "step": 1026 }, { "epoch": 0.2727393440446156, "grad_norm": 1.110777223027095, "learning_rate": 1.9596727083317565e-05, "loss": 0.3746519684791565, "step": 1027 }, { "epoch": 0.27300491302615854, "grad_norm": 1.1133725792972708, "learning_rate": 1.9595491827577543e-05, "loss": 0.39962098002433777, "step": 1028 }, { "epoch": 0.2732704820077015, "grad_norm": 1.0544310192284179, "learning_rate": 1.9594254721939866e-05, "loss": 0.35112401843070984, "step": 1029 }, { "epoch": 0.27353605098924444, "grad_norm": 1.0749153592990304, "learning_rate": 1.9593015766643037e-05, "loss": 0.3648139238357544, "step": 1030 }, { "epoch": 0.2738016199707874, "grad_norm": 1.0268996180520502, "learning_rate": 1.9591774961925902e-05, "loss": 0.31544098258018494, "step": 1031 }, { "epoch": 0.27406718895233034, "grad_norm": 1.1260952074052377, "learning_rate": 1.959053230802768e-05, "loss": 0.3593738079071045, "step": 1032 }, { "epoch": 0.27433275793387335, "grad_norm": 1.1009303195981317, "learning_rate": 1.958928780518794e-05, "loss": 0.39784368872642517, "step": 1033 }, { "epoch": 0.2745983269154163, "grad_norm": 1.1304731324804922, "learning_rate": 1.9588041453646606e-05, "loss": 0.3869936168193817, "step": 1034 }, { "epoch": 0.27486389589695925, "grad_norm": 0.9803124730292929, "learning_rate": 1.958679325364396e-05, "loss": 0.31108593940734863, "step": 1035 }, { "epoch": 0.2751294648785022, "grad_norm": 1.098791994520666, "learning_rate": 1.958554320542064e-05, "loss": 0.3917708098888397, "step": 1036 }, { "epoch": 0.27539503386004516, "grad_norm": 0.9969159455112034, "learning_rate": 1.958429130921764e-05, "loss": 0.36782944202423096, "step": 1037 }, { "epoch": 0.2756606028415881, "grad_norm": 0.9381100088398062, "learning_rate": 1.9583037565276314e-05, "loss": 0.36196422576904297, "step": 1038 }, { "epoch": 0.27592617182313106, "grad_norm": 1.0783473143219733, "learning_rate": 1.9581781973838368e-05, "loss": 0.32208555936813354, "step": 1039 }, { "epoch": 0.276191740804674, "grad_norm": 0.9653316626874986, "learning_rate": 1.958052453514586e-05, "loss": 0.33451759815216064, "step": 1040 }, { "epoch": 0.27645730978621696, "grad_norm": 1.0328342572912144, "learning_rate": 1.9579265249441216e-05, "loss": 0.3228047788143158, "step": 1041 }, { "epoch": 0.2767228787677599, "grad_norm": 1.0944658380016739, "learning_rate": 1.957800411696721e-05, "loss": 0.36992791295051575, "step": 1042 }, { "epoch": 0.27698844774930287, "grad_norm": 0.9799580951396849, "learning_rate": 1.9576741137966967e-05, "loss": 0.3072342276573181, "step": 1043 }, { "epoch": 0.2772540167308458, "grad_norm": 1.0637046756594408, "learning_rate": 1.9575476312683985e-05, "loss": 0.3372080326080322, "step": 1044 }, { "epoch": 0.27751958571238877, "grad_norm": 1.0509701364189301, "learning_rate": 1.95742096413621e-05, "loss": 0.34725332260131836, "step": 1045 }, { "epoch": 0.2777851546939317, "grad_norm": 1.1053591471100805, "learning_rate": 1.9572941124245516e-05, "loss": 0.36714982986450195, "step": 1046 }, { "epoch": 0.27805072367547473, "grad_norm": 1.208127444221669, "learning_rate": 1.957167076157878e-05, "loss": 0.4163498282432556, "step": 1047 }, { "epoch": 0.2783162926570177, "grad_norm": 1.1861975128714084, "learning_rate": 1.9570398553606815e-05, "loss": 0.40059348940849304, "step": 1048 }, { "epoch": 0.27858186163856063, "grad_norm": 1.085993120538819, "learning_rate": 1.956912450057488e-05, "loss": 0.3622320294380188, "step": 1049 }, { "epoch": 0.2788474306201036, "grad_norm": 1.1326017870689584, "learning_rate": 1.9567848602728595e-05, "loss": 0.35159534215927124, "step": 1050 }, { "epoch": 0.27911299960164654, "grad_norm": 0.9516936878211085, "learning_rate": 1.9566570860313944e-05, "loss": 0.3093762993812561, "step": 1051 }, { "epoch": 0.2793785685831895, "grad_norm": 1.040326152894859, "learning_rate": 1.9565291273577255e-05, "loss": 0.341474324464798, "step": 1052 }, { "epoch": 0.27964413756473244, "grad_norm": 1.0885626452470811, "learning_rate": 1.9564009842765225e-05, "loss": 0.35376566648483276, "step": 1053 }, { "epoch": 0.2799097065462754, "grad_norm": 1.09154548256864, "learning_rate": 1.9562726568124892e-05, "loss": 0.3487662374973297, "step": 1054 }, { "epoch": 0.28017527552781835, "grad_norm": 1.014222924008021, "learning_rate": 1.956144144990366e-05, "loss": 0.3610745370388031, "step": 1055 }, { "epoch": 0.2804408445093613, "grad_norm": 0.9789890869027496, "learning_rate": 1.9560154488349284e-05, "loss": 0.33230137825012207, "step": 1056 }, { "epoch": 0.28070641349090425, "grad_norm": 1.0104241821081763, "learning_rate": 1.9558865683709875e-05, "loss": 0.310351699590683, "step": 1057 }, { "epoch": 0.2809719824724472, "grad_norm": 1.1188708821966176, "learning_rate": 1.9557575036233897e-05, "loss": 0.39930224418640137, "step": 1058 }, { "epoch": 0.28123755145399015, "grad_norm": 1.0498907782820184, "learning_rate": 1.955628254617017e-05, "loss": 0.3345295488834381, "step": 1059 }, { "epoch": 0.2815031204355331, "grad_norm": 1.1059864789744056, "learning_rate": 1.9554988213767875e-05, "loss": 0.37963107228279114, "step": 1060 }, { "epoch": 0.2817686894170761, "grad_norm": 1.0825219178132603, "learning_rate": 1.9553692039276545e-05, "loss": 0.3923654854297638, "step": 1061 }, { "epoch": 0.28203425839861906, "grad_norm": 1.0736283126776336, "learning_rate": 1.9552394022946068e-05, "loss": 0.363646924495697, "step": 1062 }, { "epoch": 0.282299827380162, "grad_norm": 1.1051684289136041, "learning_rate": 1.9551094165026677e-05, "loss": 0.35486382246017456, "step": 1063 }, { "epoch": 0.28256539636170497, "grad_norm": 1.0845117937449689, "learning_rate": 1.954979246576898e-05, "loss": 0.35215455293655396, "step": 1064 }, { "epoch": 0.2828309653432479, "grad_norm": 1.1587243435425785, "learning_rate": 1.9548488925423924e-05, "loss": 0.3936809003353119, "step": 1065 }, { "epoch": 0.28309653432479087, "grad_norm": 1.0399965264634783, "learning_rate": 1.9547183544242817e-05, "loss": 0.36852866411209106, "step": 1066 }, { "epoch": 0.2833621033063338, "grad_norm": 1.0679817467710029, "learning_rate": 1.954587632247732e-05, "loss": 0.3552001714706421, "step": 1067 }, { "epoch": 0.2836276722878768, "grad_norm": 1.1330169189394568, "learning_rate": 1.9544567260379455e-05, "loss": 0.3684498965740204, "step": 1068 }, { "epoch": 0.2838932412694197, "grad_norm": 0.9857931835351914, "learning_rate": 1.9543256358201586e-05, "loss": 0.3367026448249817, "step": 1069 }, { "epoch": 0.2841588102509627, "grad_norm": 1.0677692738667734, "learning_rate": 1.9541943616196443e-05, "loss": 0.3702335059642792, "step": 1070 }, { "epoch": 0.28442437923250563, "grad_norm": 1.1114119189633371, "learning_rate": 1.9540629034617108e-05, "loss": 0.3430984318256378, "step": 1071 }, { "epoch": 0.2846899482140486, "grad_norm": 1.1406170357402363, "learning_rate": 1.953931261371702e-05, "loss": 0.36514735221862793, "step": 1072 }, { "epoch": 0.28495551719559153, "grad_norm": 1.0428104806049732, "learning_rate": 1.9537994353749963e-05, "loss": 0.3524945080280304, "step": 1073 }, { "epoch": 0.2852210861771345, "grad_norm": 1.0283973360981475, "learning_rate": 1.9536674254970088e-05, "loss": 0.32405683398246765, "step": 1074 }, { "epoch": 0.2854866551586775, "grad_norm": 1.0649875575316718, "learning_rate": 1.9535352317631888e-05, "loss": 0.30863165855407715, "step": 1075 }, { "epoch": 0.28575222414022045, "grad_norm": 1.0647565002745494, "learning_rate": 1.953402854199022e-05, "loss": 0.34343889355659485, "step": 1076 }, { "epoch": 0.2860177931217634, "grad_norm": 1.2339349330872973, "learning_rate": 1.9532702928300292e-05, "loss": 0.3639434576034546, "step": 1077 }, { "epoch": 0.28628336210330635, "grad_norm": 1.0888261251069975, "learning_rate": 1.9531375476817667e-05, "loss": 0.3380300998687744, "step": 1078 }, { "epoch": 0.2865489310848493, "grad_norm": 1.1078839119175599, "learning_rate": 1.9530046187798267e-05, "loss": 0.3323265016078949, "step": 1079 }, { "epoch": 0.28681450006639225, "grad_norm": 1.0529271541493659, "learning_rate": 1.9528715061498355e-05, "loss": 0.3439220190048218, "step": 1080 }, { "epoch": 0.2870800690479352, "grad_norm": 1.088357435010649, "learning_rate": 1.952738209817456e-05, "loss": 0.36376965045928955, "step": 1081 }, { "epoch": 0.28734563802947816, "grad_norm": 1.0188116446188513, "learning_rate": 1.952604729808386e-05, "loss": 0.3281211853027344, "step": 1082 }, { "epoch": 0.2876112070110211, "grad_norm": 1.0999135645201878, "learning_rate": 1.9524710661483594e-05, "loss": 0.3538089990615845, "step": 1083 }, { "epoch": 0.28787677599256406, "grad_norm": 1.1475903462769852, "learning_rate": 1.9523372188631442e-05, "loss": 0.3982803225517273, "step": 1084 }, { "epoch": 0.288142344974107, "grad_norm": 1.11408923860859, "learning_rate": 1.9522031879785453e-05, "loss": 0.3958810567855835, "step": 1085 }, { "epoch": 0.28840791395564996, "grad_norm": 1.191451776763126, "learning_rate": 1.9520689735204016e-05, "loss": 0.40133988857269287, "step": 1086 }, { "epoch": 0.2886734829371929, "grad_norm": 1.048862195613205, "learning_rate": 1.9519345755145886e-05, "loss": 0.32411646842956543, "step": 1087 }, { "epoch": 0.28893905191873587, "grad_norm": 1.210003646730205, "learning_rate": 1.9517999939870166e-05, "loss": 0.38678207993507385, "step": 1088 }, { "epoch": 0.2892046209002789, "grad_norm": 1.0663258874668164, "learning_rate": 1.951665228963631e-05, "loss": 0.36829686164855957, "step": 1089 }, { "epoch": 0.2894701898818218, "grad_norm": 0.9884592653808488, "learning_rate": 1.9515302804704134e-05, "loss": 0.38631704449653625, "step": 1090 }, { "epoch": 0.2897357588633648, "grad_norm": 1.1934503112083867, "learning_rate": 1.9513951485333798e-05, "loss": 0.39288902282714844, "step": 1091 }, { "epoch": 0.29000132784490773, "grad_norm": 1.0804742457342014, "learning_rate": 1.9512598331785822e-05, "loss": 0.3655658960342407, "step": 1092 }, { "epoch": 0.2902668968264507, "grad_norm": 0.9929300268939649, "learning_rate": 1.9511243344321076e-05, "loss": 0.3263852596282959, "step": 1093 }, { "epoch": 0.29053246580799363, "grad_norm": 1.1166275426043832, "learning_rate": 1.9509886523200792e-05, "loss": 0.37939125299453735, "step": 1094 }, { "epoch": 0.2907980347895366, "grad_norm": 1.074761796186792, "learning_rate": 1.9508527868686543e-05, "loss": 0.34218865633010864, "step": 1095 }, { "epoch": 0.29106360377107954, "grad_norm": 1.036633851483027, "learning_rate": 1.9507167381040263e-05, "loss": 0.368261456489563, "step": 1096 }, { "epoch": 0.2913291727526225, "grad_norm": 1.083724731335207, "learning_rate": 1.950580506052424e-05, "loss": 0.36133286356925964, "step": 1097 }, { "epoch": 0.29159474173416544, "grad_norm": 1.0542758401630365, "learning_rate": 1.9504440907401113e-05, "loss": 0.3667418658733368, "step": 1098 }, { "epoch": 0.2918603107157084, "grad_norm": 0.9961595646698646, "learning_rate": 1.950307492193387e-05, "loss": 0.34444570541381836, "step": 1099 }, { "epoch": 0.29212587969725134, "grad_norm": 1.1203470867439278, "learning_rate": 1.9501707104385863e-05, "loss": 0.41261589527130127, "step": 1100 }, { "epoch": 0.2923914486787943, "grad_norm": 1.0847270622391922, "learning_rate": 1.9500337455020788e-05, "loss": 0.3762981593608856, "step": 1101 }, { "epoch": 0.29265701766033725, "grad_norm": 1.108635996430537, "learning_rate": 1.9498965974102697e-05, "loss": 0.3527417480945587, "step": 1102 }, { "epoch": 0.29292258664188026, "grad_norm": 1.1555485155020386, "learning_rate": 1.9497592661895996e-05, "loss": 0.34812286496162415, "step": 1103 }, { "epoch": 0.2931881556234232, "grad_norm": 0.9844968948580171, "learning_rate": 1.9496217518665444e-05, "loss": 0.33663398027420044, "step": 1104 }, { "epoch": 0.29345372460496616, "grad_norm": 0.997090208380272, "learning_rate": 1.9494840544676156e-05, "loss": 0.3632991313934326, "step": 1105 }, { "epoch": 0.2937192935865091, "grad_norm": 1.3515018592791732, "learning_rate": 1.9493461740193587e-05, "loss": 0.37389490008354187, "step": 1106 }, { "epoch": 0.29398486256805206, "grad_norm": 1.204356467911551, "learning_rate": 1.949208110548356e-05, "loss": 0.3634020686149597, "step": 1107 }, { "epoch": 0.294250431549595, "grad_norm": 1.0778805299295515, "learning_rate": 1.9490698640812247e-05, "loss": 0.36032742261886597, "step": 1108 }, { "epoch": 0.29451600053113797, "grad_norm": 1.1504972318858309, "learning_rate": 1.9489314346446164e-05, "loss": 0.3385765552520752, "step": 1109 }, { "epoch": 0.2947815695126809, "grad_norm": 1.0946200184976398, "learning_rate": 1.9487928222652195e-05, "loss": 0.3751915991306305, "step": 1110 }, { "epoch": 0.29504713849422387, "grad_norm": 1.0903856446796527, "learning_rate": 1.9486540269697564e-05, "loss": 0.36069825291633606, "step": 1111 }, { "epoch": 0.2953127074757668, "grad_norm": 1.009573568422265, "learning_rate": 1.948515048784985e-05, "loss": 0.32703787088394165, "step": 1112 }, { "epoch": 0.2955782764573098, "grad_norm": 0.9196963642088989, "learning_rate": 1.948375887737699e-05, "loss": 0.312494158744812, "step": 1113 }, { "epoch": 0.2958438454388527, "grad_norm": 0.9880564768480579, "learning_rate": 1.9482365438547272e-05, "loss": 0.30626165866851807, "step": 1114 }, { "epoch": 0.2961094144203957, "grad_norm": 1.07827456569524, "learning_rate": 1.948097017162933e-05, "loss": 0.3625817894935608, "step": 1115 }, { "epoch": 0.29637498340193863, "grad_norm": 1.1789711489550672, "learning_rate": 1.9479573076892152e-05, "loss": 0.38403773307800293, "step": 1116 }, { "epoch": 0.2966405523834816, "grad_norm": 1.0638061154391991, "learning_rate": 1.9478174154605093e-05, "loss": 0.3645164966583252, "step": 1117 }, { "epoch": 0.2969061213650246, "grad_norm": 1.0428170431433939, "learning_rate": 1.9476773405037836e-05, "loss": 0.3714389503002167, "step": 1118 }, { "epoch": 0.29717169034656754, "grad_norm": 1.1488169814057956, "learning_rate": 1.9475370828460436e-05, "loss": 0.39809900522232056, "step": 1119 }, { "epoch": 0.2974372593281105, "grad_norm": 1.0702503358715294, "learning_rate": 1.9473966425143292e-05, "loss": 0.3698490262031555, "step": 1120 }, { "epoch": 0.29770282830965344, "grad_norm": 1.0166542138266799, "learning_rate": 1.947256019535716e-05, "loss": 0.3072658181190491, "step": 1121 }, { "epoch": 0.2979683972911964, "grad_norm": 1.0479599499698302, "learning_rate": 1.947115213937314e-05, "loss": 0.3294365406036377, "step": 1122 }, { "epoch": 0.29823396627273935, "grad_norm": 1.007749929257712, "learning_rate": 1.9469742257462684e-05, "loss": 0.34933674335479736, "step": 1123 }, { "epoch": 0.2984995352542823, "grad_norm": 1.133473784296847, "learning_rate": 1.946833054989761e-05, "loss": 0.34586772322654724, "step": 1124 }, { "epoch": 0.29876510423582525, "grad_norm": 1.0225090189343862, "learning_rate": 1.9466917016950076e-05, "loss": 0.33158159255981445, "step": 1125 }, { "epoch": 0.2990306732173682, "grad_norm": 1.0162208348084125, "learning_rate": 1.946550165889259e-05, "loss": 0.32665887475013733, "step": 1126 }, { "epoch": 0.29929624219891116, "grad_norm": 1.1065475895733048, "learning_rate": 1.946408447599802e-05, "loss": 0.3333032429218292, "step": 1127 }, { "epoch": 0.2995618111804541, "grad_norm": 1.0958997421479173, "learning_rate": 1.9462665468539582e-05, "loss": 0.3747228980064392, "step": 1128 }, { "epoch": 0.29982738016199706, "grad_norm": 0.9447906277138843, "learning_rate": 1.9461244636790845e-05, "loss": 0.34040436148643494, "step": 1129 }, { "epoch": 0.30009294914354, "grad_norm": 1.0062775259583612, "learning_rate": 1.9459821981025723e-05, "loss": 0.3279584050178528, "step": 1130 }, { "epoch": 0.30035851812508296, "grad_norm": 1.136819731097147, "learning_rate": 1.9458397501518496e-05, "loss": 0.33507707715034485, "step": 1131 }, { "epoch": 0.30062408710662597, "grad_norm": 0.9978141677663763, "learning_rate": 1.945697119854378e-05, "loss": 0.3511529862880707, "step": 1132 }, { "epoch": 0.3008896560881689, "grad_norm": 1.1038696900269844, "learning_rate": 1.945554307237655e-05, "loss": 0.33260345458984375, "step": 1133 }, { "epoch": 0.3011552250697119, "grad_norm": 1.1267244347055163, "learning_rate": 1.9454113123292133e-05, "loss": 0.37698423862457275, "step": 1134 }, { "epoch": 0.3014207940512548, "grad_norm": 1.0482054605062838, "learning_rate": 1.945268135156621e-05, "loss": 0.34843316674232483, "step": 1135 }, { "epoch": 0.3016863630327978, "grad_norm": 1.1518938911568848, "learning_rate": 1.9451247757474805e-05, "loss": 0.38723987340927124, "step": 1136 }, { "epoch": 0.30195193201434073, "grad_norm": 1.0597410032778982, "learning_rate": 1.9449812341294302e-05, "loss": 0.3836795389652252, "step": 1137 }, { "epoch": 0.3022175009958837, "grad_norm": 0.9828275773453091, "learning_rate": 1.9448375103301424e-05, "loss": 0.3362433612346649, "step": 1138 }, { "epoch": 0.30248306997742663, "grad_norm": 1.0750556057741842, "learning_rate": 1.9446936043773264e-05, "loss": 0.3615792393684387, "step": 1139 }, { "epoch": 0.3027486389589696, "grad_norm": 1.0233339727957385, "learning_rate": 1.944549516298725e-05, "loss": 0.33693915605545044, "step": 1140 }, { "epoch": 0.30301420794051254, "grad_norm": 1.0074205515838075, "learning_rate": 1.9444052461221167e-05, "loss": 0.32611170411109924, "step": 1141 }, { "epoch": 0.3032797769220555, "grad_norm": 1.0257687736898828, "learning_rate": 1.9442607938753153e-05, "loss": 0.3504132032394409, "step": 1142 }, { "epoch": 0.30354534590359844, "grad_norm": 1.081217851264946, "learning_rate": 1.944116159586169e-05, "loss": 0.3598168194293976, "step": 1143 }, { "epoch": 0.3038109148851414, "grad_norm": 1.025673115447757, "learning_rate": 1.9439713432825625e-05, "loss": 0.33447909355163574, "step": 1144 }, { "epoch": 0.30407648386668434, "grad_norm": 0.9795127759513904, "learning_rate": 1.943826344992414e-05, "loss": 0.34026333689689636, "step": 1145 }, { "epoch": 0.30434205284822735, "grad_norm": 1.070042442644686, "learning_rate": 1.9436811647436772e-05, "loss": 0.323203980922699, "step": 1146 }, { "epoch": 0.3046076218297703, "grad_norm": 1.0588861737680213, "learning_rate": 1.943535802564342e-05, "loss": 0.332398921251297, "step": 1147 }, { "epoch": 0.30487319081131325, "grad_norm": 1.175168490214782, "learning_rate": 1.9433902584824316e-05, "loss": 0.3882995545864105, "step": 1148 }, { "epoch": 0.3051387597928562, "grad_norm": 1.093435738226519, "learning_rate": 1.943244532526006e-05, "loss": 0.35262739658355713, "step": 1149 }, { "epoch": 0.30540432877439916, "grad_norm": 1.1043029209432185, "learning_rate": 1.9430986247231586e-05, "loss": 0.39694511890411377, "step": 1150 }, { "epoch": 0.3056698977559421, "grad_norm": 1.1276348856512544, "learning_rate": 1.9429525351020197e-05, "loss": 0.3692580759525299, "step": 1151 }, { "epoch": 0.30593546673748506, "grad_norm": 1.1284903074468042, "learning_rate": 1.9428062636907526e-05, "loss": 0.3685402572154999, "step": 1152 }, { "epoch": 0.306201035719028, "grad_norm": 1.1120189967723886, "learning_rate": 1.9426598105175575e-05, "loss": 0.37557253241539, "step": 1153 }, { "epoch": 0.30646660470057097, "grad_norm": 0.9544414078231065, "learning_rate": 1.9425131756106687e-05, "loss": 0.3323203921318054, "step": 1154 }, { "epoch": 0.3067321736821139, "grad_norm": 1.085159318227953, "learning_rate": 1.9423663589983554e-05, "loss": 0.37262290716171265, "step": 1155 }, { "epoch": 0.30699774266365687, "grad_norm": 1.138203326668225, "learning_rate": 1.9422193607089224e-05, "loss": 0.36621618270874023, "step": 1156 }, { "epoch": 0.3072633116451998, "grad_norm": 1.0326975743253168, "learning_rate": 1.942072180770709e-05, "loss": 0.3844982385635376, "step": 1157 }, { "epoch": 0.3075288806267428, "grad_norm": 0.9983252957319158, "learning_rate": 1.94192481921209e-05, "loss": 0.3229531943798065, "step": 1158 }, { "epoch": 0.3077944496082857, "grad_norm": 1.0805327657153956, "learning_rate": 1.9417772760614745e-05, "loss": 0.34862661361694336, "step": 1159 }, { "epoch": 0.30806001858982873, "grad_norm": 1.0329581193958253, "learning_rate": 1.941629551347308e-05, "loss": 0.35496509075164795, "step": 1160 }, { "epoch": 0.3083255875713717, "grad_norm": 1.051163133463375, "learning_rate": 1.9414816450980686e-05, "loss": 0.3695065975189209, "step": 1161 }, { "epoch": 0.30859115655291464, "grad_norm": 1.0254769076684076, "learning_rate": 1.9413335573422723e-05, "loss": 0.3472525179386139, "step": 1162 }, { "epoch": 0.3088567255344576, "grad_norm": 1.008969123299064, "learning_rate": 1.9411852881084683e-05, "loss": 0.3447483479976654, "step": 1163 }, { "epoch": 0.30912229451600054, "grad_norm": 0.9333424416365893, "learning_rate": 1.941036837425241e-05, "loss": 0.31047824025154114, "step": 1164 }, { "epoch": 0.3093878634975435, "grad_norm": 1.0570471012152007, "learning_rate": 1.9408882053212094e-05, "loss": 0.34502410888671875, "step": 1165 }, { "epoch": 0.30965343247908644, "grad_norm": 1.1849442151759089, "learning_rate": 1.940739391825029e-05, "loss": 0.3663109540939331, "step": 1166 }, { "epoch": 0.3099190014606294, "grad_norm": 1.1136723468346887, "learning_rate": 1.9405903969653887e-05, "loss": 0.3635792136192322, "step": 1167 }, { "epoch": 0.31018457044217235, "grad_norm": 1.0769441486287206, "learning_rate": 1.940441220771013e-05, "loss": 0.359528124332428, "step": 1168 }, { "epoch": 0.3104501394237153, "grad_norm": 1.043185528474707, "learning_rate": 1.9402918632706618e-05, "loss": 0.32566630840301514, "step": 1169 }, { "epoch": 0.31071570840525825, "grad_norm": 1.0286897614370414, "learning_rate": 1.940142324493129e-05, "loss": 0.34758460521698, "step": 1170 }, { "epoch": 0.3109812773868012, "grad_norm": 1.0148570847451444, "learning_rate": 1.9399926044672438e-05, "loss": 0.3484055995941162, "step": 1171 }, { "epoch": 0.31124684636834415, "grad_norm": 1.1806099587394492, "learning_rate": 1.93984270322187e-05, "loss": 0.41958773136138916, "step": 1172 }, { "epoch": 0.3115124153498871, "grad_norm": 1.085314216258339, "learning_rate": 1.9396926207859085e-05, "loss": 0.3578398525714874, "step": 1173 }, { "epoch": 0.3117779843314301, "grad_norm": 1.0721505496116728, "learning_rate": 1.9395423571882917e-05, "loss": 0.38140422105789185, "step": 1174 }, { "epoch": 0.31204355331297307, "grad_norm": 1.1224661464468277, "learning_rate": 1.9393919124579898e-05, "loss": 0.3782861828804016, "step": 1175 }, { "epoch": 0.312309122294516, "grad_norm": 1.0482874367837718, "learning_rate": 1.939241286624006e-05, "loss": 0.3211040496826172, "step": 1176 }, { "epoch": 0.31257469127605897, "grad_norm": 0.9909015391020882, "learning_rate": 1.9390904797153795e-05, "loss": 0.3090783953666687, "step": 1177 }, { "epoch": 0.3128402602576019, "grad_norm": 1.0203166402095418, "learning_rate": 1.938939491761184e-05, "loss": 0.3542889654636383, "step": 1178 }, { "epoch": 0.3131058292391449, "grad_norm": 1.016567110972503, "learning_rate": 1.9387883227905285e-05, "loss": 0.369164377450943, "step": 1179 }, { "epoch": 0.3133713982206878, "grad_norm": 1.1492868354113897, "learning_rate": 1.9386369728325562e-05, "loss": 0.35200801491737366, "step": 1180 }, { "epoch": 0.3136369672022308, "grad_norm": 1.1332626811675575, "learning_rate": 1.9384854419164454e-05, "loss": 0.3696276843547821, "step": 1181 }, { "epoch": 0.31390253618377373, "grad_norm": 0.9856387823657043, "learning_rate": 1.9383337300714104e-05, "loss": 0.3403652012348175, "step": 1182 }, { "epoch": 0.3141681051653167, "grad_norm": 0.9608300998441986, "learning_rate": 1.9381818373266987e-05, "loss": 0.3307063579559326, "step": 1183 }, { "epoch": 0.31443367414685963, "grad_norm": 1.002604353314113, "learning_rate": 1.9380297637115933e-05, "loss": 0.3223465085029602, "step": 1184 }, { "epoch": 0.3146992431284026, "grad_norm": 1.1668926481270334, "learning_rate": 1.9378775092554124e-05, "loss": 0.4013838768005371, "step": 1185 }, { "epoch": 0.31496481210994554, "grad_norm": 1.2376602965184098, "learning_rate": 1.9377250739875095e-05, "loss": 0.3596574664115906, "step": 1186 }, { "epoch": 0.3152303810914885, "grad_norm": 1.0683740579575798, "learning_rate": 1.937572457937271e-05, "loss": 0.41639968752861023, "step": 1187 }, { "epoch": 0.3154959500730315, "grad_norm": 0.950341293536979, "learning_rate": 1.9374196611341212e-05, "loss": 0.3001318573951721, "step": 1188 }, { "epoch": 0.31576151905457445, "grad_norm": 1.0390515723802394, "learning_rate": 1.937266683607516e-05, "loss": 0.33238667249679565, "step": 1189 }, { "epoch": 0.3160270880361174, "grad_norm": 1.0559788990716998, "learning_rate": 1.9371135253869483e-05, "loss": 0.33638086915016174, "step": 1190 }, { "epoch": 0.31629265701766035, "grad_norm": 1.0736881782093415, "learning_rate": 1.9369601865019452e-05, "loss": 0.34445878863334656, "step": 1191 }, { "epoch": 0.3165582259992033, "grad_norm": 1.116672373820781, "learning_rate": 1.9368066669820684e-05, "loss": 0.33554553985595703, "step": 1192 }, { "epoch": 0.31682379498074625, "grad_norm": 1.2940820576034424, "learning_rate": 1.936652966856915e-05, "loss": 0.3668493628501892, "step": 1193 }, { "epoch": 0.3170893639622892, "grad_norm": 1.1460266164336763, "learning_rate": 1.9364990861561163e-05, "loss": 0.3813396990299225, "step": 1194 }, { "epoch": 0.31735493294383216, "grad_norm": 1.048871056336621, "learning_rate": 1.936345024909339e-05, "loss": 0.33625900745391846, "step": 1195 }, { "epoch": 0.3176205019253751, "grad_norm": 1.0238786804477913, "learning_rate": 1.9361907831462836e-05, "loss": 0.31131428480148315, "step": 1196 }, { "epoch": 0.31788607090691806, "grad_norm": 0.9751456398999766, "learning_rate": 1.936036360896687e-05, "loss": 0.32571589946746826, "step": 1197 }, { "epoch": 0.318151639888461, "grad_norm": 1.1296061558872548, "learning_rate": 1.9358817581903193e-05, "loss": 0.36207717657089233, "step": 1198 }, { "epoch": 0.31841720887000396, "grad_norm": 1.062344543153862, "learning_rate": 1.9357269750569864e-05, "loss": 0.3743855059146881, "step": 1199 }, { "epoch": 0.3186827778515469, "grad_norm": 1.1254060799620074, "learning_rate": 1.9355720115265283e-05, "loss": 0.3862137794494629, "step": 1200 }, { "epoch": 0.31894834683308987, "grad_norm": 1.1135871061204583, "learning_rate": 1.935416867628821e-05, "loss": 0.33353424072265625, "step": 1201 }, { "epoch": 0.3192139158146329, "grad_norm": 9.759113022509682, "learning_rate": 1.9352615433937733e-05, "loss": 0.3277953267097473, "step": 1202 }, { "epoch": 0.3194794847961758, "grad_norm": 1.104737565124737, "learning_rate": 1.9351060388513304e-05, "loss": 0.38247692584991455, "step": 1203 }, { "epoch": 0.3197450537777188, "grad_norm": 1.0645482624060865, "learning_rate": 1.9349503540314724e-05, "loss": 0.3330709934234619, "step": 1204 }, { "epoch": 0.32001062275926173, "grad_norm": 1.1382102351287038, "learning_rate": 1.9347944889642125e-05, "loss": 0.3809449076652527, "step": 1205 }, { "epoch": 0.3202761917408047, "grad_norm": 0.9591245399492223, "learning_rate": 1.9346384436796e-05, "loss": 0.33623188734054565, "step": 1206 }, { "epoch": 0.32054176072234764, "grad_norm": 1.0414583731283242, "learning_rate": 1.9344822182077184e-05, "loss": 0.35465264320373535, "step": 1207 }, { "epoch": 0.3208073297038906, "grad_norm": 1.0419539507532576, "learning_rate": 1.9343258125786866e-05, "loss": 0.3532233238220215, "step": 1208 }, { "epoch": 0.32107289868543354, "grad_norm": 0.972348986123494, "learning_rate": 1.9341692268226572e-05, "loss": 0.3498903512954712, "step": 1209 }, { "epoch": 0.3213384676669765, "grad_norm": 1.057700016356479, "learning_rate": 1.9340124609698185e-05, "loss": 0.36124879121780396, "step": 1210 }, { "epoch": 0.32160403664851944, "grad_norm": 1.1891126233384992, "learning_rate": 1.933855515050393e-05, "loss": 0.38535434007644653, "step": 1211 }, { "epoch": 0.3218696056300624, "grad_norm": 1.1201736183139164, "learning_rate": 1.9336983890946383e-05, "loss": 0.39999911189079285, "step": 1212 }, { "epoch": 0.32213517461160535, "grad_norm": 1.1396977359685507, "learning_rate": 1.9335410831328457e-05, "loss": 0.3519791066646576, "step": 1213 }, { "epoch": 0.3224007435931483, "grad_norm": 1.1624196201646915, "learning_rate": 1.9333835971953424e-05, "loss": 0.35882368683815, "step": 1214 }, { "epoch": 0.32266631257469125, "grad_norm": 1.2089532713833613, "learning_rate": 1.93322593131249e-05, "loss": 0.36132001876831055, "step": 1215 }, { "epoch": 0.32293188155623426, "grad_norm": 1.0741169297687752, "learning_rate": 1.9330680855146845e-05, "loss": 0.36840832233428955, "step": 1216 }, { "epoch": 0.3231974505377772, "grad_norm": 1.1553079333487188, "learning_rate": 1.9329100598323563e-05, "loss": 0.3755963444709778, "step": 1217 }, { "epoch": 0.32346301951932016, "grad_norm": 1.1792888887437214, "learning_rate": 1.9327518542959717e-05, "loss": 0.400601863861084, "step": 1218 }, { "epoch": 0.3237285885008631, "grad_norm": 1.0342294479515497, "learning_rate": 1.93259346893603e-05, "loss": 0.3100128769874573, "step": 1219 }, { "epoch": 0.32399415748240606, "grad_norm": 1.0633052239431813, "learning_rate": 1.9324349037830665e-05, "loss": 0.3439880609512329, "step": 1220 }, { "epoch": 0.324259726463949, "grad_norm": 1.1634088151631976, "learning_rate": 1.9322761588676505e-05, "loss": 0.3612631559371948, "step": 1221 }, { "epoch": 0.32452529544549197, "grad_norm": 1.1292400605185824, "learning_rate": 1.9321172342203863e-05, "loss": 0.38202327489852905, "step": 1222 }, { "epoch": 0.3247908644270349, "grad_norm": 1.0253004653890312, "learning_rate": 1.9319581298719127e-05, "loss": 0.3405265808105469, "step": 1223 }, { "epoch": 0.32505643340857787, "grad_norm": 1.1499639639111883, "learning_rate": 1.931798845852903e-05, "loss": 0.4110907018184662, "step": 1224 }, { "epoch": 0.3253220023901208, "grad_norm": 1.2758168253168263, "learning_rate": 1.9316393821940654e-05, "loss": 0.3007548451423645, "step": 1225 }, { "epoch": 0.3255875713716638, "grad_norm": 2.5438383009304673, "learning_rate": 1.9314797389261426e-05, "loss": 0.32769858837127686, "step": 1226 }, { "epoch": 0.3258531403532067, "grad_norm": 1.0370704182885782, "learning_rate": 1.931319916079912e-05, "loss": 0.3619830310344696, "step": 1227 }, { "epoch": 0.3261187093347497, "grad_norm": 1.2983573666738066, "learning_rate": 1.9311599136861853e-05, "loss": 0.3470210134983063, "step": 1228 }, { "epoch": 0.32638427831629263, "grad_norm": 1.145435126731274, "learning_rate": 1.9309997317758093e-05, "loss": 0.3471665382385254, "step": 1229 }, { "epoch": 0.32664984729783564, "grad_norm": 1.0757592201920594, "learning_rate": 1.930839370379665e-05, "loss": 0.3717760443687439, "step": 1230 }, { "epoch": 0.3269154162793786, "grad_norm": 1.1173068015382108, "learning_rate": 1.9306788295286687e-05, "loss": 0.37279975414276123, "step": 1231 }, { "epoch": 0.32718098526092154, "grad_norm": 1.1523781527891401, "learning_rate": 1.93051810925377e-05, "loss": 0.3884522020816803, "step": 1232 }, { "epoch": 0.3274465542424645, "grad_norm": 1.1200431222189422, "learning_rate": 1.9303572095859545e-05, "loss": 0.4277604818344116, "step": 1233 }, { "epoch": 0.32771212322400745, "grad_norm": 1.1197023145386935, "learning_rate": 1.9301961305562415e-05, "loss": 0.2888818681240082, "step": 1234 }, { "epoch": 0.3279776922055504, "grad_norm": 1.0271311895282893, "learning_rate": 1.9300348721956854e-05, "loss": 0.3134511709213257, "step": 1235 }, { "epoch": 0.32824326118709335, "grad_norm": 1.0800984792046815, "learning_rate": 1.9298734345353745e-05, "loss": 0.38525280356407166, "step": 1236 }, { "epoch": 0.3285088301686363, "grad_norm": 1.134011749036063, "learning_rate": 1.9297118176064324e-05, "loss": 0.3692918121814728, "step": 1237 }, { "epoch": 0.32877439915017925, "grad_norm": 1.0348260315377988, "learning_rate": 1.9295500214400165e-05, "loss": 0.3443421721458435, "step": 1238 }, { "epoch": 0.3290399681317222, "grad_norm": 1.0129455663017488, "learning_rate": 1.9293880460673197e-05, "loss": 0.3228621184825897, "step": 1239 }, { "epoch": 0.32930553711326516, "grad_norm": 1.0116024279908165, "learning_rate": 1.9292258915195688e-05, "loss": 0.330943763256073, "step": 1240 }, { "epoch": 0.3295711060948081, "grad_norm": 1.1814587344422625, "learning_rate": 1.929063557828025e-05, "loss": 0.356637567281723, "step": 1241 }, { "epoch": 0.32983667507635106, "grad_norm": 0.9888159780201056, "learning_rate": 1.9289010450239843e-05, "loss": 0.3481113910675049, "step": 1242 }, { "epoch": 0.330102244057894, "grad_norm": 1.1876931030431213, "learning_rate": 1.928738353138778e-05, "loss": 0.36579906940460205, "step": 1243 }, { "epoch": 0.330367813039437, "grad_norm": 1.0281454378567854, "learning_rate": 1.9285754822037705e-05, "loss": 0.33025234937667847, "step": 1244 }, { "epoch": 0.33063338202097997, "grad_norm": 1.0936673160473642, "learning_rate": 1.9284124322503613e-05, "loss": 0.34848469495773315, "step": 1245 }, { "epoch": 0.3308989510025229, "grad_norm": 1.1232405017277023, "learning_rate": 1.928249203309985e-05, "loss": 0.3523876368999481, "step": 1246 }, { "epoch": 0.3311645199840659, "grad_norm": 1.140153458583263, "learning_rate": 1.92808579541411e-05, "loss": 0.3695565462112427, "step": 1247 }, { "epoch": 0.3314300889656088, "grad_norm": 1.0267337296320096, "learning_rate": 1.9279222085942396e-05, "loss": 0.3557945191860199, "step": 1248 }, { "epoch": 0.3316956579471518, "grad_norm": 1.0261133198060035, "learning_rate": 1.9277584428819113e-05, "loss": 0.3015502989292145, "step": 1249 }, { "epoch": 0.33196122692869473, "grad_norm": 0.9384869314897972, "learning_rate": 1.9275944983086964e-05, "loss": 0.31333664059638977, "step": 1250 }, { "epoch": 0.3322267959102377, "grad_norm": 1.103154580638619, "learning_rate": 1.9274303749062028e-05, "loss": 0.36595287919044495, "step": 1251 }, { "epoch": 0.33249236489178063, "grad_norm": 1.0573816777840739, "learning_rate": 1.9272660727060705e-05, "loss": 0.3400266170501709, "step": 1252 }, { "epoch": 0.3327579338733236, "grad_norm": 1.0994664368429343, "learning_rate": 1.927101591739976e-05, "loss": 0.3642529547214508, "step": 1253 }, { "epoch": 0.33302350285486654, "grad_norm": 1.08059410662081, "learning_rate": 1.926936932039628e-05, "loss": 0.3418777287006378, "step": 1254 }, { "epoch": 0.3332890718364095, "grad_norm": 1.0881678177934593, "learning_rate": 1.9267720936367723e-05, "loss": 0.33382388949394226, "step": 1255 }, { "epoch": 0.33355464081795244, "grad_norm": 1.1227567600503816, "learning_rate": 1.926607076563187e-05, "loss": 0.36257779598236084, "step": 1256 }, { "epoch": 0.3338202097994954, "grad_norm": 1.5546101865012443, "learning_rate": 1.926441880850686e-05, "loss": 0.3018002510070801, "step": 1257 }, { "epoch": 0.3340857787810384, "grad_norm": 1.0263747105982135, "learning_rate": 1.9262765065311165e-05, "loss": 0.3373662233352661, "step": 1258 }, { "epoch": 0.33435134776258135, "grad_norm": 1.0001644182280367, "learning_rate": 1.9261109536363613e-05, "loss": 0.3555397391319275, "step": 1259 }, { "epoch": 0.3346169167441243, "grad_norm": 1.1519069907937776, "learning_rate": 1.925945222198336e-05, "loss": 0.3004256784915924, "step": 1260 }, { "epoch": 0.33488248572566726, "grad_norm": 2.328412351070072, "learning_rate": 1.925779312248993e-05, "loss": 0.33299940824508667, "step": 1261 }, { "epoch": 0.3351480547072102, "grad_norm": 1.0617967738999583, "learning_rate": 1.9256132238203166e-05, "loss": 0.3715725541114807, "step": 1262 }, { "epoch": 0.33541362368875316, "grad_norm": 1.0140049717249513, "learning_rate": 1.9254469569443274e-05, "loss": 0.35133951902389526, "step": 1263 }, { "epoch": 0.3356791926702961, "grad_norm": 0.9980129680534503, "learning_rate": 1.92528051165308e-05, "loss": 0.3328818380832672, "step": 1264 }, { "epoch": 0.33594476165183906, "grad_norm": 1.0764552464682182, "learning_rate": 1.925113887978662e-05, "loss": 0.3665468692779541, "step": 1265 }, { "epoch": 0.336210330633382, "grad_norm": 1.0446302802374996, "learning_rate": 1.9249470859531976e-05, "loss": 0.3489571511745453, "step": 1266 }, { "epoch": 0.33647589961492497, "grad_norm": 1.0629721705272823, "learning_rate": 1.9247801056088433e-05, "loss": 0.30038982629776, "step": 1267 }, { "epoch": 0.3367414685964679, "grad_norm": 1.1798569183028156, "learning_rate": 1.9246129469777918e-05, "loss": 0.4163355827331543, "step": 1268 }, { "epoch": 0.33700703757801087, "grad_norm": 1.0428552063046848, "learning_rate": 1.924445610092269e-05, "loss": 0.33687612414360046, "step": 1269 }, { "epoch": 0.3372726065595538, "grad_norm": 1.0466869124167506, "learning_rate": 1.924278094984535e-05, "loss": 0.3448297679424286, "step": 1270 }, { "epoch": 0.3375381755410968, "grad_norm": 1.0979384797680924, "learning_rate": 1.9241104016868853e-05, "loss": 0.35257208347320557, "step": 1271 }, { "epoch": 0.3378037445226398, "grad_norm": 1.0794393535441016, "learning_rate": 1.9239425302316487e-05, "loss": 0.34880566596984863, "step": 1272 }, { "epoch": 0.33806931350418273, "grad_norm": 1.1081978913885613, "learning_rate": 1.9237744806511895e-05, "loss": 0.33643782138824463, "step": 1273 }, { "epoch": 0.3383348824857257, "grad_norm": 1.0185962864877929, "learning_rate": 1.9236062529779057e-05, "loss": 0.32345050573349, "step": 1274 }, { "epoch": 0.33860045146726864, "grad_norm": 1.0547576972102612, "learning_rate": 1.9234378472442286e-05, "loss": 0.33983978629112244, "step": 1275 }, { "epoch": 0.3388660204488116, "grad_norm": 1.0305326470674594, "learning_rate": 1.923269263482626e-05, "loss": 0.32825571298599243, "step": 1276 }, { "epoch": 0.33913158943035454, "grad_norm": 1.0836151603415423, "learning_rate": 1.923100501725598e-05, "loss": 0.3434044122695923, "step": 1277 }, { "epoch": 0.3393971584118975, "grad_norm": 1.1293248576076373, "learning_rate": 1.9229315620056805e-05, "loss": 0.3463204503059387, "step": 1278 }, { "epoch": 0.33966272739344044, "grad_norm": 1.0476463818396518, "learning_rate": 1.9227624443554425e-05, "loss": 0.3608240485191345, "step": 1279 }, { "epoch": 0.3399282963749834, "grad_norm": 1.111712780266586, "learning_rate": 1.9225931488074882e-05, "loss": 0.36131763458251953, "step": 1280 }, { "epoch": 0.34019386535652635, "grad_norm": 0.9948222919660873, "learning_rate": 1.922423675394456e-05, "loss": 0.3270101547241211, "step": 1281 }, { "epoch": 0.3404594343380693, "grad_norm": 1.1047356141038558, "learning_rate": 1.922254024149018e-05, "loss": 0.3551778495311737, "step": 1282 }, { "epoch": 0.34072500331961225, "grad_norm": 1.1057498393465535, "learning_rate": 1.9220841951038815e-05, "loss": 0.3686622381210327, "step": 1283 }, { "epoch": 0.3409905723011552, "grad_norm": 1.0810198379819234, "learning_rate": 1.921914188291787e-05, "loss": 0.35161536931991577, "step": 1284 }, { "epoch": 0.34125614128269816, "grad_norm": 1.1489267376414198, "learning_rate": 1.92174400374551e-05, "loss": 0.3549870550632477, "step": 1285 }, { "epoch": 0.34152171026424116, "grad_norm": 1.0904860537070935, "learning_rate": 1.9215736414978593e-05, "loss": 0.36780738830566406, "step": 1286 }, { "epoch": 0.3417872792457841, "grad_norm": 1.132171748367688, "learning_rate": 1.9214031015816803e-05, "loss": 0.36060047149658203, "step": 1287 }, { "epoch": 0.34205284822732707, "grad_norm": 1.0753334155968608, "learning_rate": 1.9212323840298502e-05, "loss": 0.32578715682029724, "step": 1288 }, { "epoch": 0.34231841720887, "grad_norm": 1.0380534929488934, "learning_rate": 1.9210614888752813e-05, "loss": 0.3505493402481079, "step": 1289 }, { "epoch": 0.34258398619041297, "grad_norm": 1.0227959332298084, "learning_rate": 1.9208904161509203e-05, "loss": 0.32681795954704285, "step": 1290 }, { "epoch": 0.3428495551719559, "grad_norm": 1.0227973616384467, "learning_rate": 1.9207191658897473e-05, "loss": 0.34808459877967834, "step": 1291 }, { "epoch": 0.3431151241534989, "grad_norm": 1.0810974703490968, "learning_rate": 1.920547738124779e-05, "loss": 0.3588678240776062, "step": 1292 }, { "epoch": 0.3433806931350418, "grad_norm": 1.2030053357742059, "learning_rate": 1.9203761328890626e-05, "loss": 0.3528832495212555, "step": 1293 }, { "epoch": 0.3436462621165848, "grad_norm": 1.35729757891191, "learning_rate": 1.9202043502156833e-05, "loss": 0.33549001812934875, "step": 1294 }, { "epoch": 0.34391183109812773, "grad_norm": 1.0986147605525078, "learning_rate": 1.920032390137758e-05, "loss": 0.3466021418571472, "step": 1295 }, { "epoch": 0.3441774000796707, "grad_norm": 1.0492164389172054, "learning_rate": 1.9198602526884388e-05, "loss": 0.35646146535873413, "step": 1296 }, { "epoch": 0.34444296906121363, "grad_norm": 1.0348991752364494, "learning_rate": 1.9196879379009112e-05, "loss": 0.3442128300666809, "step": 1297 }, { "epoch": 0.3447085380427566, "grad_norm": 1.083291442034964, "learning_rate": 1.9195154458083962e-05, "loss": 0.3854391872882843, "step": 1298 }, { "epoch": 0.34497410702429954, "grad_norm": 1.202325074766952, "learning_rate": 1.9193427764441477e-05, "loss": 0.376137375831604, "step": 1299 }, { "epoch": 0.34523967600584254, "grad_norm": 1.1591691335477168, "learning_rate": 1.9191699298414547e-05, "loss": 0.3115769028663635, "step": 1300 }, { "epoch": 0.3455052449873855, "grad_norm": 1.125127529667975, "learning_rate": 1.9189969060336396e-05, "loss": 0.32553282380104065, "step": 1301 }, { "epoch": 0.34577081396892845, "grad_norm": 1.2442677252107, "learning_rate": 1.9188237050540597e-05, "loss": 0.39529356360435486, "step": 1302 }, { "epoch": 0.3460363829504714, "grad_norm": 1.016155926476122, "learning_rate": 1.9186503269361063e-05, "loss": 0.3027458190917969, "step": 1303 }, { "epoch": 0.34630195193201435, "grad_norm": 1.2178145504108082, "learning_rate": 1.918476771713204e-05, "loss": 0.39317795634269714, "step": 1304 }, { "epoch": 0.3465675209135573, "grad_norm": 1.1358253756284789, "learning_rate": 1.918303039418813e-05, "loss": 0.3730325698852539, "step": 1305 }, { "epoch": 0.34683308989510025, "grad_norm": 1.0835224567793253, "learning_rate": 1.918129130086426e-05, "loss": 0.34862780570983887, "step": 1306 }, { "epoch": 0.3470986588766432, "grad_norm": 1.106131252801308, "learning_rate": 1.9179550437495707e-05, "loss": 0.32139018177986145, "step": 1307 }, { "epoch": 0.34736422785818616, "grad_norm": 1.118754726003564, "learning_rate": 1.91778078044181e-05, "loss": 0.37246090173721313, "step": 1308 }, { "epoch": 0.3476297968397291, "grad_norm": 1.035507147337034, "learning_rate": 1.9176063401967386e-05, "loss": 0.30985957384109497, "step": 1309 }, { "epoch": 0.34789536582127206, "grad_norm": 1.1303664709170593, "learning_rate": 1.917431723047987e-05, "loss": 0.3713758587837219, "step": 1310 }, { "epoch": 0.348160934802815, "grad_norm": 1.076206973404712, "learning_rate": 1.9172569290292193e-05, "loss": 0.3465833067893982, "step": 1311 }, { "epoch": 0.34842650378435797, "grad_norm": 1.1789932919731194, "learning_rate": 1.917081958174134e-05, "loss": 0.34807220101356506, "step": 1312 }, { "epoch": 0.3486920727659009, "grad_norm": 1.0178456651378849, "learning_rate": 1.9169068105164627e-05, "loss": 0.3369640111923218, "step": 1313 }, { "epoch": 0.3489576417474439, "grad_norm": 1.1714339652663717, "learning_rate": 1.9167314860899724e-05, "loss": 0.3521544337272644, "step": 1314 }, { "epoch": 0.3492232107289869, "grad_norm": 0.9756562815370131, "learning_rate": 1.9165559849284635e-05, "loss": 0.3256300687789917, "step": 1315 }, { "epoch": 0.34948877971052983, "grad_norm": 1.1173269078403432, "learning_rate": 1.9163803070657706e-05, "loss": 0.32401931285858154, "step": 1316 }, { "epoch": 0.3497543486920728, "grad_norm": 1.104564951170044, "learning_rate": 1.916204452535762e-05, "loss": 0.372749924659729, "step": 1317 }, { "epoch": 0.35001991767361573, "grad_norm": 1.053240444697934, "learning_rate": 1.9160284213723407e-05, "loss": 0.35853224992752075, "step": 1318 }, { "epoch": 0.3502854866551587, "grad_norm": 1.048325144857422, "learning_rate": 1.9158522136094433e-05, "loss": 0.32850801944732666, "step": 1319 }, { "epoch": 0.35055105563670164, "grad_norm": 1.1274703494911789, "learning_rate": 1.9156758292810404e-05, "loss": 0.3548474907875061, "step": 1320 }, { "epoch": 0.3508166246182446, "grad_norm": 1.10371779317482, "learning_rate": 1.9154992684211372e-05, "loss": 0.38709041476249695, "step": 1321 }, { "epoch": 0.35108219359978754, "grad_norm": 1.1369910570736041, "learning_rate": 1.9153225310637726e-05, "loss": 0.40369266271591187, "step": 1322 }, { "epoch": 0.3513477625813305, "grad_norm": 1.179710362637603, "learning_rate": 1.9151456172430186e-05, "loss": 0.3570155203342438, "step": 1323 }, { "epoch": 0.35161333156287344, "grad_norm": 1.0315056954444073, "learning_rate": 1.9149685269929833e-05, "loss": 0.34426411986351013, "step": 1324 }, { "epoch": 0.3518789005444164, "grad_norm": 1.0980268876500368, "learning_rate": 1.9147912603478066e-05, "loss": 0.35666006803512573, "step": 1325 }, { "epoch": 0.35214446952595935, "grad_norm": 1.0320732816254274, "learning_rate": 1.9146138173416643e-05, "loss": 0.36225512623786926, "step": 1326 }, { "epoch": 0.3524100385075023, "grad_norm": 1.0499655117353668, "learning_rate": 1.9144361980087643e-05, "loss": 0.3312349319458008, "step": 1327 }, { "epoch": 0.3526756074890453, "grad_norm": 1.0828461821707789, "learning_rate": 1.9142584023833506e-05, "loss": 0.3590523302555084, "step": 1328 }, { "epoch": 0.35294117647058826, "grad_norm": 1.2432343198034153, "learning_rate": 1.9140804304996997e-05, "loss": 0.341480016708374, "step": 1329 }, { "epoch": 0.3532067454521312, "grad_norm": 1.0165353851066345, "learning_rate": 1.913902282392122e-05, "loss": 0.37246501445770264, "step": 1330 }, { "epoch": 0.35347231443367416, "grad_norm": 1.0959834963108057, "learning_rate": 1.913723958094963e-05, "loss": 0.33834031224250793, "step": 1331 }, { "epoch": 0.3537378834152171, "grad_norm": 1.0066884605687934, "learning_rate": 1.913545457642601e-05, "loss": 0.29285067319869995, "step": 1332 }, { "epoch": 0.35400345239676007, "grad_norm": 1.0768479974972798, "learning_rate": 1.913366781069449e-05, "loss": 0.2903720736503601, "step": 1333 }, { "epoch": 0.354269021378303, "grad_norm": 1.1311334028851072, "learning_rate": 1.913187928409954e-05, "loss": 0.36428314447402954, "step": 1334 }, { "epoch": 0.35453459035984597, "grad_norm": 1.0473346547130091, "learning_rate": 1.9130088996985967e-05, "loss": 0.3379477560520172, "step": 1335 }, { "epoch": 0.3548001593413889, "grad_norm": 1.0963924260325884, "learning_rate": 1.912829694969891e-05, "loss": 0.35286659002304077, "step": 1336 }, { "epoch": 0.3550657283229319, "grad_norm": 1.1930831242867357, "learning_rate": 1.9126503142583864e-05, "loss": 0.3670174479484558, "step": 1337 }, { "epoch": 0.3553312973044748, "grad_norm": 1.1294601866875984, "learning_rate": 1.9124707575986642e-05, "loss": 0.3422902226448059, "step": 1338 }, { "epoch": 0.3555968662860178, "grad_norm": 0.9984746022499613, "learning_rate": 1.912291025025342e-05, "loss": 0.29778385162353516, "step": 1339 }, { "epoch": 0.35586243526756073, "grad_norm": 1.1907673127670892, "learning_rate": 1.91211111657307e-05, "loss": 0.36249661445617676, "step": 1340 }, { "epoch": 0.3561280042491037, "grad_norm": 1.1054946723600563, "learning_rate": 1.9119310322765315e-05, "loss": 0.340925395488739, "step": 1341 }, { "epoch": 0.3563935732306467, "grad_norm": 1.1964466720866056, "learning_rate": 1.9117507721704455e-05, "loss": 0.35674089193344116, "step": 1342 }, { "epoch": 0.35665914221218964, "grad_norm": 1.1077144979302902, "learning_rate": 1.9115703362895636e-05, "loss": 0.3602067828178406, "step": 1343 }, { "epoch": 0.3569247111937326, "grad_norm": 1.1669501112510636, "learning_rate": 1.9113897246686716e-05, "loss": 0.35211697220802307, "step": 1344 }, { "epoch": 0.35719028017527554, "grad_norm": 1.1098565168791754, "learning_rate": 1.91120893734259e-05, "loss": 0.3706115484237671, "step": 1345 }, { "epoch": 0.3574558491568185, "grad_norm": 0.955637908965499, "learning_rate": 1.9110279743461717e-05, "loss": 0.3365110754966736, "step": 1346 }, { "epoch": 0.35772141813836145, "grad_norm": 1.2071736385011052, "learning_rate": 1.9108468357143047e-05, "loss": 0.40012121200561523, "step": 1347 }, { "epoch": 0.3579869871199044, "grad_norm": 1.1409634140225444, "learning_rate": 1.91066552148191e-05, "loss": 0.4003351926803589, "step": 1348 }, { "epoch": 0.35825255610144735, "grad_norm": 1.0613274196364288, "learning_rate": 1.910484031683943e-05, "loss": 0.3574616014957428, "step": 1349 }, { "epoch": 0.3585181250829903, "grad_norm": 1.0904662824068834, "learning_rate": 1.910302366355393e-05, "loss": 0.3345073461532593, "step": 1350 }, { "epoch": 0.35878369406453325, "grad_norm": 1.0532412802136695, "learning_rate": 1.910120525531283e-05, "loss": 0.3467676341533661, "step": 1351 }, { "epoch": 0.3590492630460762, "grad_norm": 1.0529131768701299, "learning_rate": 1.9099385092466695e-05, "loss": 0.32433655858039856, "step": 1352 }, { "epoch": 0.35931483202761916, "grad_norm": 1.0442908892383016, "learning_rate": 1.909756317536643e-05, "loss": 0.3366447985172272, "step": 1353 }, { "epoch": 0.3595804010091621, "grad_norm": 1.0770054348386777, "learning_rate": 1.909573950436328e-05, "loss": 0.310118168592453, "step": 1354 }, { "epoch": 0.35984596999070506, "grad_norm": 1.4782002462322321, "learning_rate": 1.909391407980883e-05, "loss": 0.3503451943397522, "step": 1355 }, { "epoch": 0.36011153897224807, "grad_norm": 1.0889726916887852, "learning_rate": 1.9092086902054996e-05, "loss": 0.3375343978404999, "step": 1356 }, { "epoch": 0.360377107953791, "grad_norm": 0.9368081121032712, "learning_rate": 1.909025797145404e-05, "loss": 0.3056451082229614, "step": 1357 }, { "epoch": 0.360642676935334, "grad_norm": 0.9554491579006472, "learning_rate": 1.9088427288358556e-05, "loss": 0.3063391447067261, "step": 1358 }, { "epoch": 0.3609082459168769, "grad_norm": 0.9358824747825566, "learning_rate": 1.908659485312148e-05, "loss": 0.3055405616760254, "step": 1359 }, { "epoch": 0.3611738148984199, "grad_norm": 1.1828231629690173, "learning_rate": 1.908476066609608e-05, "loss": 0.38323235511779785, "step": 1360 }, { "epoch": 0.36143938387996283, "grad_norm": 1.0971994038941366, "learning_rate": 1.908292472763597e-05, "loss": 0.33526092767715454, "step": 1361 }, { "epoch": 0.3617049528615058, "grad_norm": 1.0449346093027478, "learning_rate": 1.9081087038095094e-05, "loss": 0.34485238790512085, "step": 1362 }, { "epoch": 0.36197052184304873, "grad_norm": 1.0943982229718532, "learning_rate": 1.907924759782774e-05, "loss": 0.2963239252567291, "step": 1363 }, { "epoch": 0.3622360908245917, "grad_norm": 1.2033822452903298, "learning_rate": 1.9077406407188532e-05, "loss": 0.3536864221096039, "step": 1364 }, { "epoch": 0.36250165980613464, "grad_norm": 1.1739216512613182, "learning_rate": 1.907556346653242e-05, "loss": 0.3724798858165741, "step": 1365 }, { "epoch": 0.3627672287876776, "grad_norm": 1.2035474175290464, "learning_rate": 1.9073718776214717e-05, "loss": 0.36241161823272705, "step": 1366 }, { "epoch": 0.36303279776922054, "grad_norm": 1.2262905723198394, "learning_rate": 1.9071872336591042e-05, "loss": 0.3484225273132324, "step": 1367 }, { "epoch": 0.3632983667507635, "grad_norm": 1.11285184075262, "learning_rate": 1.9070024148017375e-05, "loss": 0.33606311678886414, "step": 1368 }, { "epoch": 0.36356393573230644, "grad_norm": 1.076908267109863, "learning_rate": 1.906817421085002e-05, "loss": 0.3263503909111023, "step": 1369 }, { "epoch": 0.36382950471384945, "grad_norm": 1.126388175466026, "learning_rate": 1.906632252544563e-05, "loss": 0.33454492688179016, "step": 1370 }, { "epoch": 0.3640950736953924, "grad_norm": 1.1264022314316273, "learning_rate": 1.9064469092161185e-05, "loss": 0.34858438372612, "step": 1371 }, { "epoch": 0.36436064267693535, "grad_norm": 1.0527021112264499, "learning_rate": 1.9062613911354005e-05, "loss": 0.3466234505176544, "step": 1372 }, { "epoch": 0.3646262116584783, "grad_norm": 1.0325760706581486, "learning_rate": 1.9060756983381743e-05, "loss": 0.33574312925338745, "step": 1373 }, { "epoch": 0.36489178064002126, "grad_norm": 1.0321788657369535, "learning_rate": 1.90588983086024e-05, "loss": 0.3012363016605377, "step": 1374 }, { "epoch": 0.3651573496215642, "grad_norm": 1.0033389586223882, "learning_rate": 1.90570378873743e-05, "loss": 0.3050191402435303, "step": 1375 }, { "epoch": 0.36542291860310716, "grad_norm": 1.0078763869776561, "learning_rate": 1.905517572005611e-05, "loss": 0.35090070962905884, "step": 1376 }, { "epoch": 0.3656884875846501, "grad_norm": 1.011051809727729, "learning_rate": 1.9053311807006845e-05, "loss": 0.3276262581348419, "step": 1377 }, { "epoch": 0.36595405656619306, "grad_norm": 1.300904148134606, "learning_rate": 1.9051446148585833e-05, "loss": 0.3303500711917877, "step": 1378 }, { "epoch": 0.366219625547736, "grad_norm": 1.113413634877815, "learning_rate": 1.9049578745152754e-05, "loss": 0.3748486042022705, "step": 1379 }, { "epoch": 0.36648519452927897, "grad_norm": 0.8707302355459249, "learning_rate": 1.9047709597067628e-05, "loss": 0.30339744687080383, "step": 1380 }, { "epoch": 0.3667507635108219, "grad_norm": 1.0245709544347914, "learning_rate": 1.9045838704690796e-05, "loss": 0.31811147928237915, "step": 1381 }, { "epoch": 0.36701633249236487, "grad_norm": 1.1759156162745943, "learning_rate": 1.9043966068382945e-05, "loss": 0.3541119694709778, "step": 1382 }, { "epoch": 0.3672819014739078, "grad_norm": 1.0874467494483675, "learning_rate": 1.9042091688505104e-05, "loss": 0.36639657616615295, "step": 1383 }, { "epoch": 0.36754747045545083, "grad_norm": 1.0242460437241268, "learning_rate": 1.9040215565418628e-05, "loss": 0.35859787464141846, "step": 1384 }, { "epoch": 0.3678130394369938, "grad_norm": 1.017105790679022, "learning_rate": 1.9038337699485207e-05, "loss": 0.3210521340370178, "step": 1385 }, { "epoch": 0.36807860841853673, "grad_norm": 1.0362268895966902, "learning_rate": 1.9036458091066875e-05, "loss": 0.3207433819770813, "step": 1386 }, { "epoch": 0.3683441774000797, "grad_norm": 0.9948382455278952, "learning_rate": 1.9034576740526e-05, "loss": 0.3475082218647003, "step": 1387 }, { "epoch": 0.36860974638162264, "grad_norm": 1.167057707852143, "learning_rate": 1.903269364822528e-05, "loss": 0.33252987265586853, "step": 1388 }, { "epoch": 0.3688753153631656, "grad_norm": 1.0281516525035093, "learning_rate": 1.903080881452776e-05, "loss": 0.32200103998184204, "step": 1389 }, { "epoch": 0.36914088434470854, "grad_norm": 1.0752934055327636, "learning_rate": 1.9028922239796803e-05, "loss": 0.34780022501945496, "step": 1390 }, { "epoch": 0.3694064533262515, "grad_norm": 1.1028643639363398, "learning_rate": 1.902703392439613e-05, "loss": 0.35411912202835083, "step": 1391 }, { "epoch": 0.36967202230779445, "grad_norm": 1.6627965093255739, "learning_rate": 1.9025143868689773e-05, "loss": 0.35232803225517273, "step": 1392 }, { "epoch": 0.3699375912893374, "grad_norm": 1.168292115519334, "learning_rate": 1.9023252073042128e-05, "loss": 0.38561391830444336, "step": 1393 }, { "epoch": 0.37020316027088035, "grad_norm": 0.9982322437598163, "learning_rate": 1.9021358537817897e-05, "loss": 0.3184170126914978, "step": 1394 }, { "epoch": 0.3704687292524233, "grad_norm": 1.0557333187102689, "learning_rate": 1.9019463263382142e-05, "loss": 0.32455068826675415, "step": 1395 }, { "epoch": 0.37073429823396625, "grad_norm": 1.0862364532602506, "learning_rate": 1.901756625010024e-05, "loss": 0.32998934388160706, "step": 1396 }, { "epoch": 0.3709998672155092, "grad_norm": 1.1350071137219766, "learning_rate": 1.901566749833792e-05, "loss": 0.3361780643463135, "step": 1397 }, { "epoch": 0.37126543619705216, "grad_norm": 1.1483051699341575, "learning_rate": 1.9013767008461236e-05, "loss": 0.3618711829185486, "step": 1398 }, { "epoch": 0.37153100517859516, "grad_norm": 1.1250978483748488, "learning_rate": 1.901186478083658e-05, "loss": 0.3904131054878235, "step": 1399 }, { "epoch": 0.3717965741601381, "grad_norm": 1.0885741580509858, "learning_rate": 1.9009960815830676e-05, "loss": 0.35742759704589844, "step": 1400 }, { "epoch": 0.37206214314168107, "grad_norm": 1.073570835222054, "learning_rate": 1.9008055113810595e-05, "loss": 0.32880812883377075, "step": 1401 }, { "epoch": 0.372327712123224, "grad_norm": 1.0645240727318732, "learning_rate": 1.9006147675143724e-05, "loss": 0.3379839360713959, "step": 1402 }, { "epoch": 0.37259328110476697, "grad_norm": 1.1363528922504198, "learning_rate": 1.90042385001978e-05, "loss": 0.3635789453983307, "step": 1403 }, { "epoch": 0.3728588500863099, "grad_norm": 1.1103620354136925, "learning_rate": 1.900232758934089e-05, "loss": 0.3462461233139038, "step": 1404 }, { "epoch": 0.3731244190678529, "grad_norm": 1.1087128591527484, "learning_rate": 1.900041494294139e-05, "loss": 0.34578579664230347, "step": 1405 }, { "epoch": 0.3733899880493958, "grad_norm": 1.1067984269435176, "learning_rate": 1.899850056136804e-05, "loss": 0.36266931891441345, "step": 1406 }, { "epoch": 0.3736555570309388, "grad_norm": 1.089685836132972, "learning_rate": 1.899658444498991e-05, "loss": 0.34019365906715393, "step": 1407 }, { "epoch": 0.37392112601248173, "grad_norm": 1.0009475991478056, "learning_rate": 1.8994666594176404e-05, "loss": 0.3057953119277954, "step": 1408 }, { "epoch": 0.3741866949940247, "grad_norm": 1.1008245937613312, "learning_rate": 1.8992747009297265e-05, "loss": 0.3663131892681122, "step": 1409 }, { "epoch": 0.37445226397556763, "grad_norm": 1.0696938984110862, "learning_rate": 1.8990825690722557e-05, "loss": 0.3402065634727478, "step": 1410 }, { "epoch": 0.3747178329571106, "grad_norm": 1.017664192724319, "learning_rate": 1.8988902638822693e-05, "loss": 0.3437868654727936, "step": 1411 }, { "epoch": 0.37498340193865354, "grad_norm": 1.2246388577961873, "learning_rate": 1.8986977853968416e-05, "loss": 0.40972524881362915, "step": 1412 }, { "epoch": 0.37524897092019655, "grad_norm": 1.0293557658064552, "learning_rate": 1.89850513365308e-05, "loss": 0.3237977921962738, "step": 1413 }, { "epoch": 0.3755145399017395, "grad_norm": 0.9581631299919097, "learning_rate": 1.8983123086881254e-05, "loss": 0.3146173357963562, "step": 1414 }, { "epoch": 0.37578010888328245, "grad_norm": 0.9942979474502576, "learning_rate": 1.8981193105391524e-05, "loss": 0.33485543727874756, "step": 1415 }, { "epoch": 0.3760456778648254, "grad_norm": 1.0963696340494955, "learning_rate": 1.8979261392433685e-05, "loss": 0.36379897594451904, "step": 1416 }, { "epoch": 0.37631124684636835, "grad_norm": 0.902828061805848, "learning_rate": 1.8977327948380154e-05, "loss": 0.2737882137298584, "step": 1417 }, { "epoch": 0.3765768158279113, "grad_norm": 1.1168765744666191, "learning_rate": 1.897539277360367e-05, "loss": 0.3554575443267822, "step": 1418 }, { "epoch": 0.37684238480945426, "grad_norm": 1.0021058464909711, "learning_rate": 1.897345586847731e-05, "loss": 0.3297621011734009, "step": 1419 }, { "epoch": 0.3771079537909972, "grad_norm": 1.1638469907551372, "learning_rate": 1.8971517233374497e-05, "loss": 0.32272985577583313, "step": 1420 }, { "epoch": 0.37737352277254016, "grad_norm": 1.0280583772355378, "learning_rate": 1.8969576868668967e-05, "loss": 0.32175642251968384, "step": 1421 }, { "epoch": 0.3776390917540831, "grad_norm": 1.1136468557030246, "learning_rate": 1.8967634774734807e-05, "loss": 0.35973137617111206, "step": 1422 }, { "epoch": 0.37790466073562606, "grad_norm": 1.1892680335343753, "learning_rate": 1.8965690951946424e-05, "loss": 0.3385169506072998, "step": 1423 }, { "epoch": 0.378170229717169, "grad_norm": 1.1245023779822048, "learning_rate": 1.8963745400678564e-05, "loss": 0.3683067560195923, "step": 1424 }, { "epoch": 0.37843579869871197, "grad_norm": 1.1630069521478075, "learning_rate": 1.896179812130631e-05, "loss": 0.3711622357368469, "step": 1425 }, { "epoch": 0.3787013676802549, "grad_norm": 1.015020556732164, "learning_rate": 1.895984911420507e-05, "loss": 0.30416572093963623, "step": 1426 }, { "epoch": 0.3789669366617979, "grad_norm": 1.079958708031102, "learning_rate": 1.8957898379750598e-05, "loss": 0.3439522385597229, "step": 1427 }, { "epoch": 0.3792325056433409, "grad_norm": 1.1382084488728177, "learning_rate": 1.895594591831896e-05, "loss": 0.3663806617259979, "step": 1428 }, { "epoch": 0.37949807462488383, "grad_norm": 1.0501527452156108, "learning_rate": 1.895399173028658e-05, "loss": 0.32132354378700256, "step": 1429 }, { "epoch": 0.3797636436064268, "grad_norm": 0.9916462964383544, "learning_rate": 1.8952035816030196e-05, "loss": 0.3040635585784912, "step": 1430 }, { "epoch": 0.38002921258796973, "grad_norm": 1.1155299107557486, "learning_rate": 1.8950078175926886e-05, "loss": 0.3548869788646698, "step": 1431 }, { "epoch": 0.3802947815695127, "grad_norm": 1.1280933582225339, "learning_rate": 1.894811881035406e-05, "loss": 0.3114319443702698, "step": 1432 }, { "epoch": 0.38056035055105564, "grad_norm": 1.151174980739505, "learning_rate": 1.894615771968946e-05, "loss": 0.3589673936367035, "step": 1433 }, { "epoch": 0.3808259195325986, "grad_norm": 1.1074661491088642, "learning_rate": 1.894419490431116e-05, "loss": 0.3073863983154297, "step": 1434 }, { "epoch": 0.38109148851414154, "grad_norm": 1.0689323921068359, "learning_rate": 1.8942230364597572e-05, "loss": 0.32474076747894287, "step": 1435 }, { "epoch": 0.3813570574956845, "grad_norm": 2.6127931856999314, "learning_rate": 1.8940264100927432e-05, "loss": 0.3363546133041382, "step": 1436 }, { "epoch": 0.38162262647722744, "grad_norm": 0.9995665434586938, "learning_rate": 1.8938296113679814e-05, "loss": 0.33679312467575073, "step": 1437 }, { "epoch": 0.3818881954587704, "grad_norm": 1.0113319573344832, "learning_rate": 1.8936326403234125e-05, "loss": 0.33171382546424866, "step": 1438 }, { "epoch": 0.38215376444031335, "grad_norm": 1.0880785150495547, "learning_rate": 1.8934354969970097e-05, "loss": 0.3717402219772339, "step": 1439 }, { "epoch": 0.3824193334218563, "grad_norm": 1.1102375952968466, "learning_rate": 1.8932381814267802e-05, "loss": 0.335337370634079, "step": 1440 }, { "epoch": 0.3826849024033993, "grad_norm": 1.010201255539417, "learning_rate": 1.893040693650764e-05, "loss": 0.32745444774627686, "step": 1441 }, { "epoch": 0.38295047138494226, "grad_norm": 1.045820108792802, "learning_rate": 1.892843033707035e-05, "loss": 0.34863507747650146, "step": 1442 }, { "epoch": 0.3832160403664852, "grad_norm": 1.0344465763282014, "learning_rate": 1.8926452016336987e-05, "loss": 0.3428313732147217, "step": 1443 }, { "epoch": 0.38348160934802816, "grad_norm": 0.9882681324904586, "learning_rate": 1.8924471974688956e-05, "loss": 0.3223801851272583, "step": 1444 }, { "epoch": 0.3837471783295711, "grad_norm": 1.2003387152989082, "learning_rate": 1.8922490212507983e-05, "loss": 0.33248746395111084, "step": 1445 }, { "epoch": 0.38401274731111407, "grad_norm": 1.0404747226700646, "learning_rate": 1.8920506730176125e-05, "loss": 0.3472076654434204, "step": 1446 }, { "epoch": 0.384278316292657, "grad_norm": 1.229166058737197, "learning_rate": 1.891852152807578e-05, "loss": 0.4385136365890503, "step": 1447 }, { "epoch": 0.38454388527419997, "grad_norm": 1.0444838405880497, "learning_rate": 1.8916534606589666e-05, "loss": 0.36871540546417236, "step": 1448 }, { "epoch": 0.3848094542557429, "grad_norm": 1.0803859921763799, "learning_rate": 1.8914545966100843e-05, "loss": 0.3136710524559021, "step": 1449 }, { "epoch": 0.3850750232372859, "grad_norm": 1.0902031451870209, "learning_rate": 1.891255560699269e-05, "loss": 0.3236457109451294, "step": 1450 }, { "epoch": 0.3853405922188288, "grad_norm": 0.9936714818929803, "learning_rate": 1.8910563529648933e-05, "loss": 0.3176822066307068, "step": 1451 }, { "epoch": 0.3856061612003718, "grad_norm": 1.0635659473367998, "learning_rate": 1.890856973445362e-05, "loss": 0.3531719744205475, "step": 1452 }, { "epoch": 0.38587173018191473, "grad_norm": 0.9470574553293423, "learning_rate": 1.8906574221791127e-05, "loss": 0.2911416292190552, "step": 1453 }, { "epoch": 0.3861372991634577, "grad_norm": 1.0992858203425024, "learning_rate": 1.890457699204617e-05, "loss": 0.3522392511367798, "step": 1454 }, { "epoch": 0.3864028681450007, "grad_norm": 1.1706910837372075, "learning_rate": 1.8902578045603787e-05, "loss": 0.3724471628665924, "step": 1455 }, { "epoch": 0.38666843712654364, "grad_norm": 1.1807687078274312, "learning_rate": 1.890057738284935e-05, "loss": 0.2935449481010437, "step": 1456 }, { "epoch": 0.3869340061080866, "grad_norm": 1.1181603604376231, "learning_rate": 1.8898575004168568e-05, "loss": 0.3413137197494507, "step": 1457 }, { "epoch": 0.38719957508962954, "grad_norm": 1.1002740783107277, "learning_rate": 1.8896570909947477e-05, "loss": 0.32282277941703796, "step": 1458 }, { "epoch": 0.3874651440711725, "grad_norm": 1.0071931608273124, "learning_rate": 1.8894565100572435e-05, "loss": 0.3285476565361023, "step": 1459 }, { "epoch": 0.38773071305271545, "grad_norm": 1.010871057653593, "learning_rate": 1.8892557576430147e-05, "loss": 0.29517480731010437, "step": 1460 }, { "epoch": 0.3879962820342584, "grad_norm": 0.9710184588467288, "learning_rate": 1.8890548337907636e-05, "loss": 0.2913149297237396, "step": 1461 }, { "epoch": 0.38826185101580135, "grad_norm": 1.096024980027641, "learning_rate": 1.8888537385392258e-05, "loss": 0.32154160737991333, "step": 1462 }, { "epoch": 0.3885274199973443, "grad_norm": 1.157775550745099, "learning_rate": 1.88865247192717e-05, "loss": 0.30677905678749084, "step": 1463 }, { "epoch": 0.38879298897888726, "grad_norm": 1.1509749466488566, "learning_rate": 1.888451033993399e-05, "loss": 0.37568169832229614, "step": 1464 }, { "epoch": 0.3890585579604302, "grad_norm": 1.0554287268781006, "learning_rate": 1.8882494247767465e-05, "loss": 0.34972083568573, "step": 1465 }, { "epoch": 0.38932412694197316, "grad_norm": 1.1253148629548142, "learning_rate": 1.888047644316081e-05, "loss": 0.3198736906051636, "step": 1466 }, { "epoch": 0.3895896959235161, "grad_norm": 1.0268445477998984, "learning_rate": 1.887845692650303e-05, "loss": 0.3405846953392029, "step": 1467 }, { "epoch": 0.38985526490505906, "grad_norm": 1.1800981831391237, "learning_rate": 1.8876435698183465e-05, "loss": 0.3600257337093353, "step": 1468 }, { "epoch": 0.39012083388660207, "grad_norm": 1.042232512137109, "learning_rate": 1.887441275859179e-05, "loss": 0.32415103912353516, "step": 1469 }, { "epoch": 0.390386402868145, "grad_norm": 1.1736259107415346, "learning_rate": 1.8872388108117995e-05, "loss": 0.3450891673564911, "step": 1470 }, { "epoch": 0.390651971849688, "grad_norm": 1.0534871304087963, "learning_rate": 1.8870361747152416e-05, "loss": 0.3210057318210602, "step": 1471 }, { "epoch": 0.3909175408312309, "grad_norm": 1.1749127166764717, "learning_rate": 1.8868333676085707e-05, "loss": 0.3615706264972687, "step": 1472 }, { "epoch": 0.3911831098127739, "grad_norm": 1.0750237065987462, "learning_rate": 1.8866303895308856e-05, "loss": 0.34149813652038574, "step": 1473 }, { "epoch": 0.39144867879431683, "grad_norm": 0.91786674858188, "learning_rate": 1.8864272405213188e-05, "loss": 0.2795295715332031, "step": 1474 }, { "epoch": 0.3917142477758598, "grad_norm": 1.1110559595870293, "learning_rate": 1.8862239206190337e-05, "loss": 0.3459053933620453, "step": 1475 }, { "epoch": 0.39197981675740273, "grad_norm": 1.1048084354602663, "learning_rate": 1.8860204298632294e-05, "loss": 0.3531072735786438, "step": 1476 }, { "epoch": 0.3922453857389457, "grad_norm": 1.128095083544478, "learning_rate": 1.8858167682931357e-05, "loss": 0.3788977265357971, "step": 1477 }, { "epoch": 0.39251095472048864, "grad_norm": 1.3263027090109385, "learning_rate": 1.8856129359480163e-05, "loss": 0.3210671544075012, "step": 1478 }, { "epoch": 0.3927765237020316, "grad_norm": 1.0773816671223826, "learning_rate": 1.8854089328671673e-05, "loss": 0.3442102074623108, "step": 1479 }, { "epoch": 0.39304209268357454, "grad_norm": 1.0501956367137624, "learning_rate": 1.885204759089919e-05, "loss": 0.29128211736679077, "step": 1480 }, { "epoch": 0.3933076616651175, "grad_norm": 1.1403330671915806, "learning_rate": 1.885000414655633e-05, "loss": 0.3601154088973999, "step": 1481 }, { "epoch": 0.39357323064666044, "grad_norm": 1.032058056545269, "learning_rate": 1.8847958996037042e-05, "loss": 0.3173052668571472, "step": 1482 }, { "epoch": 0.39383879962820345, "grad_norm": 1.0840123249628424, "learning_rate": 1.8845912139735616e-05, "loss": 0.32759106159210205, "step": 1483 }, { "epoch": 0.3941043686097464, "grad_norm": 1.0868479290241493, "learning_rate": 1.8843863578046657e-05, "loss": 0.3213586211204529, "step": 1484 }, { "epoch": 0.39436993759128935, "grad_norm": 1.0263834848721582, "learning_rate": 1.8841813311365105e-05, "loss": 0.342970073223114, "step": 1485 }, { "epoch": 0.3946355065728323, "grad_norm": 1.1467746465148738, "learning_rate": 1.883976134008622e-05, "loss": 0.3852401375770569, "step": 1486 }, { "epoch": 0.39490107555437526, "grad_norm": 1.0974253808771965, "learning_rate": 1.883770766460561e-05, "loss": 0.2965390682220459, "step": 1487 }, { "epoch": 0.3951666445359182, "grad_norm": 1.1655078685340161, "learning_rate": 1.883565228531919e-05, "loss": 0.3899655044078827, "step": 1488 }, { "epoch": 0.39543221351746116, "grad_norm": 1.1086105484757183, "learning_rate": 1.8833595202623222e-05, "loss": 0.339199423789978, "step": 1489 }, { "epoch": 0.3956977824990041, "grad_norm": 1.049526058190211, "learning_rate": 1.8831536416914278e-05, "loss": 0.3121682405471802, "step": 1490 }, { "epoch": 0.39596335148054707, "grad_norm": 1.073417591294797, "learning_rate": 1.8829475928589272e-05, "loss": 0.31947991251945496, "step": 1491 }, { "epoch": 0.39622892046209, "grad_norm": 1.1660176936819076, "learning_rate": 1.882741373804544e-05, "loss": 0.3569333553314209, "step": 1492 }, { "epoch": 0.39649448944363297, "grad_norm": 1.1521030930761056, "learning_rate": 1.882534984568035e-05, "loss": 0.3739020526409149, "step": 1493 }, { "epoch": 0.3967600584251759, "grad_norm": 1.0930221251915908, "learning_rate": 1.882328425189189e-05, "loss": 0.34350353479385376, "step": 1494 }, { "epoch": 0.3970256274067189, "grad_norm": 1.0780622136577362, "learning_rate": 1.882121695707829e-05, "loss": 0.3103981614112854, "step": 1495 }, { "epoch": 0.3972911963882618, "grad_norm": 1.066229649085828, "learning_rate": 1.8819147961638104e-05, "loss": 0.33847716450691223, "step": 1496 }, { "epoch": 0.39755676536980483, "grad_norm": 0.943119049120047, "learning_rate": 1.8817077265970196e-05, "loss": 0.3080996870994568, "step": 1497 }, { "epoch": 0.3978223343513478, "grad_norm": 0.9758181744675688, "learning_rate": 1.8815004870473777e-05, "loss": 0.3247831463813782, "step": 1498 }, { "epoch": 0.39808790333289074, "grad_norm": 0.9965389459031595, "learning_rate": 1.8812930775548387e-05, "loss": 0.2919698655605316, "step": 1499 }, { "epoch": 0.3983534723144337, "grad_norm": 1.1815639690812958, "learning_rate": 1.8810854981593883e-05, "loss": 0.3627319931983948, "step": 1500 }, { "epoch": 0.39861904129597664, "grad_norm": 1.0245222516327634, "learning_rate": 1.880877748901045e-05, "loss": 0.3619319796562195, "step": 1501 }, { "epoch": 0.3988846102775196, "grad_norm": 1.0294076265521692, "learning_rate": 1.8806698298198608e-05, "loss": 0.3393789827823639, "step": 1502 }, { "epoch": 0.39915017925906254, "grad_norm": 1.1375999694611314, "learning_rate": 1.88046174095592e-05, "loss": 0.3736116886138916, "step": 1503 }, { "epoch": 0.3994157482406055, "grad_norm": 0.9615847393601772, "learning_rate": 1.8802534823493395e-05, "loss": 0.32829388976097107, "step": 1504 }, { "epoch": 0.39968131722214845, "grad_norm": 1.004520084683698, "learning_rate": 1.8800450540402694e-05, "loss": 0.340041846036911, "step": 1505 }, { "epoch": 0.3999468862036914, "grad_norm": 1.6423190284198783, "learning_rate": 1.8798364560688917e-05, "loss": 0.2830736041069031, "step": 1506 }, { "epoch": 0.40021245518523435, "grad_norm": 1.126838308447994, "learning_rate": 1.8796276884754224e-05, "loss": 0.33011579513549805, "step": 1507 }, { "epoch": 0.4004780241667773, "grad_norm": 1.0024833819275993, "learning_rate": 1.8794187513001088e-05, "loss": 0.2893834114074707, "step": 1508 }, { "epoch": 0.40074359314832025, "grad_norm": 1.0682148927963429, "learning_rate": 1.8792096445832317e-05, "loss": 0.3590015172958374, "step": 1509 }, { "epoch": 0.4010091621298632, "grad_norm": 1.1883404603513603, "learning_rate": 1.8790003683651045e-05, "loss": 0.3968508541584015, "step": 1510 }, { "epoch": 0.4012747311114062, "grad_norm": 1.1506641785596874, "learning_rate": 1.878790922686073e-05, "loss": 0.324398934841156, "step": 1511 }, { "epoch": 0.40154030009294917, "grad_norm": 1.0455658872732225, "learning_rate": 1.8785813075865164e-05, "loss": 0.35111895203590393, "step": 1512 }, { "epoch": 0.4018058690744921, "grad_norm": 1.055231257150353, "learning_rate": 1.8783715231068452e-05, "loss": 0.28124356269836426, "step": 1513 }, { "epoch": 0.40207143805603507, "grad_norm": 1.0070468428923411, "learning_rate": 1.878161569287504e-05, "loss": 0.28962311148643494, "step": 1514 }, { "epoch": 0.402337007037578, "grad_norm": 1.0934983041480315, "learning_rate": 1.877951446168969e-05, "loss": 0.3646606206893921, "step": 1515 }, { "epoch": 0.402602576019121, "grad_norm": 1.1065863254454682, "learning_rate": 1.8777411537917497e-05, "loss": 0.2815355360507965, "step": 1516 }, { "epoch": 0.4028681450006639, "grad_norm": 1.1372178900816394, "learning_rate": 1.877530692196388e-05, "loss": 0.33208370208740234, "step": 1517 }, { "epoch": 0.4031337139822069, "grad_norm": 1.0968319662456871, "learning_rate": 1.8773200614234587e-05, "loss": 0.33741289377212524, "step": 1518 }, { "epoch": 0.40339928296374983, "grad_norm": 1.1178822197952292, "learning_rate": 1.877109261513568e-05, "loss": 0.31304073333740234, "step": 1519 }, { "epoch": 0.4036648519452928, "grad_norm": 1.264796618244999, "learning_rate": 1.8768982925073566e-05, "loss": 0.32556387782096863, "step": 1520 }, { "epoch": 0.40393042092683573, "grad_norm": 1.1057344226732335, "learning_rate": 1.8766871544454963e-05, "loss": 0.3584224581718445, "step": 1521 }, { "epoch": 0.4041959899083787, "grad_norm": 1.0109621512685618, "learning_rate": 1.8764758473686918e-05, "loss": 0.2864416837692261, "step": 1522 }, { "epoch": 0.40446155888992164, "grad_norm": 1.0390539229722413, "learning_rate": 1.8762643713176815e-05, "loss": 0.28925320506095886, "step": 1523 }, { "epoch": 0.4047271278714646, "grad_norm": 1.022628245189221, "learning_rate": 1.876052726333235e-05, "loss": 0.30940550565719604, "step": 1524 }, { "epoch": 0.4049926968530076, "grad_norm": 1.1648500528958037, "learning_rate": 1.875840912456155e-05, "loss": 0.3463154733181, "step": 1525 }, { "epoch": 0.40525826583455055, "grad_norm": 1.1823420506345301, "learning_rate": 1.8756289297272764e-05, "loss": 0.3349658250808716, "step": 1526 }, { "epoch": 0.4055238348160935, "grad_norm": 1.0511817500052025, "learning_rate": 1.8754167781874674e-05, "loss": 0.32588714361190796, "step": 1527 }, { "epoch": 0.40578940379763645, "grad_norm": 1.0750045197041278, "learning_rate": 1.875204457877628e-05, "loss": 0.33787310123443604, "step": 1528 }, { "epoch": 0.4060549727791794, "grad_norm": 1.0444881434472735, "learning_rate": 1.8749919688386912e-05, "loss": 0.3223261833190918, "step": 1529 }, { "epoch": 0.40632054176072235, "grad_norm": 1.2251483540500576, "learning_rate": 1.8747793111116226e-05, "loss": 0.38505882024765015, "step": 1530 }, { "epoch": 0.4065861107422653, "grad_norm": 1.077913563059366, "learning_rate": 1.8745664847374197e-05, "loss": 0.33071833848953247, "step": 1531 }, { "epoch": 0.40685167972380826, "grad_norm": 1.2405893427169952, "learning_rate": 1.874353489757113e-05, "loss": 0.36603987216949463, "step": 1532 }, { "epoch": 0.4071172487053512, "grad_norm": 0.9982674001932202, "learning_rate": 1.874140326211766e-05, "loss": 0.3103085160255432, "step": 1533 }, { "epoch": 0.40738281768689416, "grad_norm": 1.1470515997968143, "learning_rate": 1.873926994142473e-05, "loss": 0.3471127152442932, "step": 1534 }, { "epoch": 0.4076483866684371, "grad_norm": 1.0759117431352352, "learning_rate": 1.873713493590363e-05, "loss": 0.33152899146080017, "step": 1535 }, { "epoch": 0.40791395564998006, "grad_norm": 1.0887192073538825, "learning_rate": 1.8734998245965958e-05, "loss": 0.340177059173584, "step": 1536 }, { "epoch": 0.408179524631523, "grad_norm": 1.175803638176176, "learning_rate": 1.8732859872023644e-05, "loss": 0.3331618010997772, "step": 1537 }, { "epoch": 0.40844509361306597, "grad_norm": 1.0971311272588662, "learning_rate": 1.8730719814488937e-05, "loss": 0.3911997675895691, "step": 1538 }, { "epoch": 0.408710662594609, "grad_norm": 1.0986179012488992, "learning_rate": 1.8728578073774427e-05, "loss": 0.3699817955493927, "step": 1539 }, { "epoch": 0.4089762315761519, "grad_norm": 1.086312859301249, "learning_rate": 1.8726434650293e-05, "loss": 0.31567275524139404, "step": 1540 }, { "epoch": 0.4092418005576949, "grad_norm": 1.1099279461258769, "learning_rate": 1.8724289544457897e-05, "loss": 0.3387305438518524, "step": 1541 }, { "epoch": 0.40950736953923783, "grad_norm": 1.6366665349052443, "learning_rate": 1.8722142756682663e-05, "loss": 0.3460234999656677, "step": 1542 }, { "epoch": 0.4097729385207808, "grad_norm": 1.1109783591024025, "learning_rate": 1.8719994287381173e-05, "loss": 0.35653382539749146, "step": 1543 }, { "epoch": 0.41003850750232373, "grad_norm": 1.1054235252004945, "learning_rate": 1.8717844136967626e-05, "loss": 0.3828277885913849, "step": 1544 }, { "epoch": 0.4103040764838667, "grad_norm": 1.0929819002464054, "learning_rate": 1.871569230585655e-05, "loss": 0.35883858799934387, "step": 1545 }, { "epoch": 0.41056964546540964, "grad_norm": 0.988264800308937, "learning_rate": 1.8713538794462783e-05, "loss": 0.27414464950561523, "step": 1546 }, { "epoch": 0.4108352144469526, "grad_norm": 1.0216234157414708, "learning_rate": 1.871138360320151e-05, "loss": 0.2924337387084961, "step": 1547 }, { "epoch": 0.41110078342849554, "grad_norm": 1.1264719097344291, "learning_rate": 1.8709226732488216e-05, "loss": 0.34270918369293213, "step": 1548 }, { "epoch": 0.4113663524100385, "grad_norm": 1.056133674601812, "learning_rate": 1.870706818273872e-05, "loss": 0.33866482973098755, "step": 1549 }, { "epoch": 0.41163192139158145, "grad_norm": 1.0578429496037574, "learning_rate": 1.8704907954369176e-05, "loss": 0.3350633382797241, "step": 1550 }, { "epoch": 0.4118974903731244, "grad_norm": 1.0981882806330738, "learning_rate": 1.870274604779604e-05, "loss": 0.32763785123825073, "step": 1551 }, { "epoch": 0.41216305935466735, "grad_norm": 1.1235534336905566, "learning_rate": 1.8700582463436102e-05, "loss": 0.3130378723144531, "step": 1552 }, { "epoch": 0.41242862833621036, "grad_norm": 1.1311593123986747, "learning_rate": 1.8698417201706484e-05, "loss": 0.34318777918815613, "step": 1553 }, { "epoch": 0.4126941973177533, "grad_norm": 1.038517953287962, "learning_rate": 1.8696250263024617e-05, "loss": 0.3250104784965515, "step": 1554 }, { "epoch": 0.41295976629929626, "grad_norm": 1.1047081419569766, "learning_rate": 1.869408164780826e-05, "loss": 0.3409217298030853, "step": 1555 }, { "epoch": 0.4132253352808392, "grad_norm": 0.9892429720688775, "learning_rate": 1.86919113564755e-05, "loss": 0.2885017395019531, "step": 1556 }, { "epoch": 0.41349090426238216, "grad_norm": 0.9861078966083267, "learning_rate": 1.8689739389444744e-05, "loss": 0.31912562251091003, "step": 1557 }, { "epoch": 0.4137564732439251, "grad_norm": 1.0037060940033242, "learning_rate": 1.8687565747134716e-05, "loss": 0.29874011874198914, "step": 1558 }, { "epoch": 0.41402204222546807, "grad_norm": 1.0308167425812278, "learning_rate": 1.8685390429964473e-05, "loss": 0.3132701516151428, "step": 1559 }, { "epoch": 0.414287611207011, "grad_norm": 1.0029824533275895, "learning_rate": 1.868321343835339e-05, "loss": 0.31158843636512756, "step": 1560 }, { "epoch": 0.41455318018855397, "grad_norm": 0.959841401113078, "learning_rate": 1.8681034772721167e-05, "loss": 0.30490344762802124, "step": 1561 }, { "epoch": 0.4148187491700969, "grad_norm": 1.1053356359227535, "learning_rate": 1.867885443348782e-05, "loss": 0.3150998055934906, "step": 1562 }, { "epoch": 0.4150843181516399, "grad_norm": 1.0578010897773087, "learning_rate": 1.86766724210737e-05, "loss": 0.3391645550727844, "step": 1563 }, { "epoch": 0.4153498871331828, "grad_norm": 1.1317933031731224, "learning_rate": 1.8674488735899466e-05, "loss": 0.35013002157211304, "step": 1564 }, { "epoch": 0.4156154561147258, "grad_norm": 1.1514144052665038, "learning_rate": 1.867230337838611e-05, "loss": 0.3455789387226105, "step": 1565 }, { "epoch": 0.41588102509626873, "grad_norm": 1.0985743755307058, "learning_rate": 1.8670116348954945e-05, "loss": 0.3179319500923157, "step": 1566 }, { "epoch": 0.41614659407781174, "grad_norm": 1.046997092909125, "learning_rate": 1.8667927648027596e-05, "loss": 0.3628920018672943, "step": 1567 }, { "epoch": 0.4164121630593547, "grad_norm": 1.1175553372657145, "learning_rate": 1.8665737276026033e-05, "loss": 0.33599400520324707, "step": 1568 }, { "epoch": 0.41667773204089764, "grad_norm": 1.0741100001694928, "learning_rate": 1.8663545233372524e-05, "loss": 0.31519144773483276, "step": 1569 }, { "epoch": 0.4169433010224406, "grad_norm": 1.0564388001425704, "learning_rate": 1.8661351520489667e-05, "loss": 0.3326237201690674, "step": 1570 }, { "epoch": 0.41720887000398355, "grad_norm": 1.0506499046982631, "learning_rate": 1.865915613780039e-05, "loss": 0.35254499316215515, "step": 1571 }, { "epoch": 0.4174744389855265, "grad_norm": 1.134962500533026, "learning_rate": 1.8656959085727936e-05, "loss": 0.36689436435699463, "step": 1572 }, { "epoch": 0.41774000796706945, "grad_norm": 1.104702895545828, "learning_rate": 1.8654760364695873e-05, "loss": 0.3113600015640259, "step": 1573 }, { "epoch": 0.4180055769486124, "grad_norm": 1.0072243279377031, "learning_rate": 1.865255997512808e-05, "loss": 0.3336432874202728, "step": 1574 }, { "epoch": 0.41827114593015535, "grad_norm": 1.1762721663897004, "learning_rate": 1.8650357917448774e-05, "loss": 0.3657492995262146, "step": 1575 }, { "epoch": 0.4185367149116983, "grad_norm": 1.1286123264778107, "learning_rate": 1.864815419208248e-05, "loss": 0.3087846338748932, "step": 1576 }, { "epoch": 0.41880228389324126, "grad_norm": 1.059893684126419, "learning_rate": 1.8645948799454058e-05, "loss": 0.31422343850135803, "step": 1577 }, { "epoch": 0.4190678528747842, "grad_norm": 1.0232345658393134, "learning_rate": 1.8643741739988672e-05, "loss": 0.3172760009765625, "step": 1578 }, { "epoch": 0.41933342185632716, "grad_norm": 1.131569038679809, "learning_rate": 1.8641533014111824e-05, "loss": 0.36819136142730713, "step": 1579 }, { "epoch": 0.4195989908378701, "grad_norm": 1.0215370560204735, "learning_rate": 1.863932262224933e-05, "loss": 0.29081088304519653, "step": 1580 }, { "epoch": 0.4198645598194131, "grad_norm": 1.0406040134422527, "learning_rate": 1.8637110564827325e-05, "loss": 0.3209632635116577, "step": 1581 }, { "epoch": 0.42013012880095607, "grad_norm": 1.9161132832998955, "learning_rate": 1.863489684227227e-05, "loss": 0.3357914686203003, "step": 1582 }, { "epoch": 0.420395697782499, "grad_norm": 1.0469990353974015, "learning_rate": 1.8632681455010937e-05, "loss": 0.285677969455719, "step": 1583 }, { "epoch": 0.420661266764042, "grad_norm": 1.1491447855439996, "learning_rate": 1.8630464403470435e-05, "loss": 0.377876341342926, "step": 1584 }, { "epoch": 0.4209268357455849, "grad_norm": 1.0642007656116979, "learning_rate": 1.8628245688078187e-05, "loss": 0.3141768276691437, "step": 1585 }, { "epoch": 0.4211924047271279, "grad_norm": 1.078787810404599, "learning_rate": 1.8626025309261927e-05, "loss": 0.34249693155288696, "step": 1586 }, { "epoch": 0.42145797370867083, "grad_norm": 1.1583509747022063, "learning_rate": 1.8623803267449722e-05, "loss": 0.32564717531204224, "step": 1587 }, { "epoch": 0.4217235426902138, "grad_norm": 1.0623179841052965, "learning_rate": 1.8621579563069957e-05, "loss": 0.3425004184246063, "step": 1588 }, { "epoch": 0.42198911167175673, "grad_norm": 1.05392590229203, "learning_rate": 1.8619354196551333e-05, "loss": 0.3676222562789917, "step": 1589 }, { "epoch": 0.4222546806532997, "grad_norm": 0.9612536546184688, "learning_rate": 1.8617127168322877e-05, "loss": 0.28915971517562866, "step": 1590 }, { "epoch": 0.42252024963484264, "grad_norm": 1.1293248025877465, "learning_rate": 1.8614898478813933e-05, "loss": 0.3387221097946167, "step": 1591 }, { "epoch": 0.4227858186163856, "grad_norm": 1.0804518757125117, "learning_rate": 1.8612668128454164e-05, "loss": 0.33886784315109253, "step": 1592 }, { "epoch": 0.42305138759792854, "grad_norm": 1.0780507904890781, "learning_rate": 1.8610436117673557e-05, "loss": 0.3364121913909912, "step": 1593 }, { "epoch": 0.4233169565794715, "grad_norm": 1.0590527240631433, "learning_rate": 1.8608202446902418e-05, "loss": 0.3661370873451233, "step": 1594 }, { "epoch": 0.4235825255610145, "grad_norm": 1.254416564930449, "learning_rate": 1.8605967116571372e-05, "loss": 0.2980557680130005, "step": 1595 }, { "epoch": 0.42384809454255745, "grad_norm": 1.180518248335952, "learning_rate": 1.8603730127111363e-05, "loss": 0.36112043261528015, "step": 1596 }, { "epoch": 0.4241136635241004, "grad_norm": 0.9967676484164163, "learning_rate": 1.860149147895366e-05, "loss": 0.30641958117485046, "step": 1597 }, { "epoch": 0.42437923250564336, "grad_norm": 1.06006138769355, "learning_rate": 1.8599251172529836e-05, "loss": 0.3312561511993408, "step": 1598 }, { "epoch": 0.4246448014871863, "grad_norm": 1.070580032885208, "learning_rate": 1.859700920827181e-05, "loss": 0.3757131099700928, "step": 1599 }, { "epoch": 0.42491037046872926, "grad_norm": 1.0514692584176801, "learning_rate": 1.8594765586611805e-05, "loss": 0.3225080370903015, "step": 1600 }, { "epoch": 0.4251759394502722, "grad_norm": 1.0857454483782787, "learning_rate": 1.859252030798236e-05, "loss": 0.35943928360939026, "step": 1601 }, { "epoch": 0.42544150843181516, "grad_norm": 0.9907794348406631, "learning_rate": 1.859027337281633e-05, "loss": 0.29319390654563904, "step": 1602 }, { "epoch": 0.4257070774133581, "grad_norm": 1.1441852776057728, "learning_rate": 1.8588024781546914e-05, "loss": 0.32320237159729004, "step": 1603 }, { "epoch": 0.42597264639490107, "grad_norm": 1.1070076098385897, "learning_rate": 1.8585774534607606e-05, "loss": 0.3381520211696625, "step": 1604 }, { "epoch": 0.426238215376444, "grad_norm": 0.9826840529093485, "learning_rate": 1.858352263243223e-05, "loss": 0.30010825395584106, "step": 1605 }, { "epoch": 0.42650378435798697, "grad_norm": 0.9805553200940528, "learning_rate": 1.8581269075454918e-05, "loss": 0.26282748579978943, "step": 1606 }, { "epoch": 0.4267693533395299, "grad_norm": 1.0395702570014627, "learning_rate": 1.857901386411014e-05, "loss": 0.33613401651382446, "step": 1607 }, { "epoch": 0.4270349223210729, "grad_norm": 1.1625768546626036, "learning_rate": 1.8576756998832667e-05, "loss": 0.34522315859794617, "step": 1608 }, { "epoch": 0.4273004913026159, "grad_norm": 1.0776480516530333, "learning_rate": 1.8574498480057598e-05, "loss": 0.3253153860569, "step": 1609 }, { "epoch": 0.42756606028415883, "grad_norm": 1.177683979502923, "learning_rate": 1.8572238308220347e-05, "loss": 0.32180655002593994, "step": 1610 }, { "epoch": 0.4278316292657018, "grad_norm": 1.2444289754345055, "learning_rate": 1.856997648375665e-05, "loss": 0.3274008333683014, "step": 1611 }, { "epoch": 0.42809719824724474, "grad_norm": 1.006782047196068, "learning_rate": 1.8567713007102565e-05, "loss": 0.3196510374546051, "step": 1612 }, { "epoch": 0.4283627672287877, "grad_norm": 1.0069133029708661, "learning_rate": 1.8565447878694455e-05, "loss": 0.2759617567062378, "step": 1613 }, { "epoch": 0.42862833621033064, "grad_norm": 1.1572573238869637, "learning_rate": 1.8563181098969017e-05, "loss": 0.35069289803504944, "step": 1614 }, { "epoch": 0.4288939051918736, "grad_norm": 1.1400434606874466, "learning_rate": 1.8560912668363253e-05, "loss": 0.3388484716415405, "step": 1615 }, { "epoch": 0.42915947417341654, "grad_norm": 1.0338736294243014, "learning_rate": 1.8558642587314496e-05, "loss": 0.34116029739379883, "step": 1616 }, { "epoch": 0.4294250431549595, "grad_norm": 1.0487376701262667, "learning_rate": 1.8556370856260387e-05, "loss": 0.30212706327438354, "step": 1617 }, { "epoch": 0.42969061213650245, "grad_norm": 1.0633174136084793, "learning_rate": 1.855409747563889e-05, "loss": 0.32250338792800903, "step": 1618 }, { "epoch": 0.4299561811180454, "grad_norm": 1.132237618998821, "learning_rate": 1.8551822445888285e-05, "loss": 0.35972943902015686, "step": 1619 }, { "epoch": 0.43022175009958835, "grad_norm": 0.9921112897877987, "learning_rate": 1.8549545767447174e-05, "loss": 0.3112533390522003, "step": 1620 }, { "epoch": 0.4304873190811313, "grad_norm": 1.0331176116114555, "learning_rate": 1.854726744075447e-05, "loss": 0.3044458031654358, "step": 1621 }, { "epoch": 0.43075288806267426, "grad_norm": 1.0421498129424722, "learning_rate": 1.8544987466249412e-05, "loss": 0.3261772096157074, "step": 1622 }, { "epoch": 0.43101845704421726, "grad_norm": 1.3249821498842442, "learning_rate": 1.8542705844371544e-05, "loss": 0.3485907018184662, "step": 1623 }, { "epoch": 0.4312840260257602, "grad_norm": 2.6643478315387576, "learning_rate": 1.8540422575560747e-05, "loss": 0.3016113340854645, "step": 1624 }, { "epoch": 0.43154959500730317, "grad_norm": 1.021133157663628, "learning_rate": 1.8538137660257198e-05, "loss": 0.35383081436157227, "step": 1625 }, { "epoch": 0.4318151639888461, "grad_norm": 1.170997891522692, "learning_rate": 1.8535851098901406e-05, "loss": 0.32015109062194824, "step": 1626 }, { "epoch": 0.43208073297038907, "grad_norm": 1.1526156179794622, "learning_rate": 1.8533562891934195e-05, "loss": 0.3801743984222412, "step": 1627 }, { "epoch": 0.432346301951932, "grad_norm": 1.0686097183664227, "learning_rate": 1.85312730397967e-05, "loss": 0.33140939474105835, "step": 1628 }, { "epoch": 0.432611870933475, "grad_norm": 1.232101025230023, "learning_rate": 1.8528981542930382e-05, "loss": 0.4052904546260834, "step": 1629 }, { "epoch": 0.4328774399150179, "grad_norm": 1.0850305465298753, "learning_rate": 1.8526688401777014e-05, "loss": 0.3661607801914215, "step": 1630 }, { "epoch": 0.4331430088965609, "grad_norm": 1.0520968780833948, "learning_rate": 1.852439361677868e-05, "loss": 0.33260756731033325, "step": 1631 }, { "epoch": 0.43340857787810383, "grad_norm": 1.0137607762513057, "learning_rate": 1.85220971883778e-05, "loss": 0.30222776532173157, "step": 1632 }, { "epoch": 0.4336741468596468, "grad_norm": 1.1138822281677037, "learning_rate": 1.8519799117017086e-05, "loss": 0.3444751799106598, "step": 1633 }, { "epoch": 0.43393971584118973, "grad_norm": 1.0896517914007275, "learning_rate": 1.8517499403139586e-05, "loss": 0.33887404203414917, "step": 1634 }, { "epoch": 0.4342052848227327, "grad_norm": 0.9260010903737679, "learning_rate": 1.8515198047188652e-05, "loss": 0.287893146276474, "step": 1635 }, { "epoch": 0.43447085380427564, "grad_norm": 1.0080783350179279, "learning_rate": 1.8512895049607965e-05, "loss": 0.32236215472221375, "step": 1636 }, { "epoch": 0.43473642278581864, "grad_norm": 1.0861808896793093, "learning_rate": 1.8510590410841515e-05, "loss": 0.30670079588890076, "step": 1637 }, { "epoch": 0.4350019917673616, "grad_norm": 1.045996826542631, "learning_rate": 1.8508284131333604e-05, "loss": 0.34104713797569275, "step": 1638 }, { "epoch": 0.43526756074890455, "grad_norm": 1.13616869746559, "learning_rate": 1.8505976211528857e-05, "loss": 0.3402378559112549, "step": 1639 }, { "epoch": 0.4355331297304475, "grad_norm": 1.1414650328718847, "learning_rate": 1.8503666651872217e-05, "loss": 0.35236096382141113, "step": 1640 }, { "epoch": 0.43579869871199045, "grad_norm": 1.1137846416322885, "learning_rate": 1.850135545280894e-05, "loss": 0.3385634422302246, "step": 1641 }, { "epoch": 0.4360642676935334, "grad_norm": 1.0049349552180111, "learning_rate": 1.849904261478459e-05, "loss": 0.32222414016723633, "step": 1642 }, { "epoch": 0.43632983667507635, "grad_norm": 1.1246487142505726, "learning_rate": 1.8496728138245062e-05, "loss": 0.3251120448112488, "step": 1643 }, { "epoch": 0.4365954056566193, "grad_norm": 1.3230672810485753, "learning_rate": 1.8494412023636563e-05, "loss": 0.3199063837528229, "step": 1644 }, { "epoch": 0.43686097463816226, "grad_norm": 1.031106173264746, "learning_rate": 1.8492094271405605e-05, "loss": 0.3470883071422577, "step": 1645 }, { "epoch": 0.4371265436197052, "grad_norm": 1.1420067933967792, "learning_rate": 1.848977488199903e-05, "loss": 0.319596529006958, "step": 1646 }, { "epoch": 0.43739211260124816, "grad_norm": 1.172387725238046, "learning_rate": 1.848745385586398e-05, "loss": 0.3445591628551483, "step": 1647 }, { "epoch": 0.4376576815827911, "grad_norm": 1.0622512502557289, "learning_rate": 1.848513119344793e-05, "loss": 0.35861149430274963, "step": 1648 }, { "epoch": 0.43792325056433407, "grad_norm": 1.3423176489021205, "learning_rate": 1.8482806895198658e-05, "loss": 0.36727622151374817, "step": 1649 }, { "epoch": 0.438188819545877, "grad_norm": 1.0985203266462633, "learning_rate": 1.848048096156426e-05, "loss": 0.3505704402923584, "step": 1650 }, { "epoch": 0.43845438852742, "grad_norm": 1.050005044594017, "learning_rate": 1.8478153392993154e-05, "loss": 0.3508742153644562, "step": 1651 }, { "epoch": 0.438719957508963, "grad_norm": 1.0688095584032915, "learning_rate": 1.8475824189934063e-05, "loss": 0.32757264375686646, "step": 1652 }, { "epoch": 0.43898552649050593, "grad_norm": 1.0768843323365103, "learning_rate": 1.8473493352836032e-05, "loss": 0.3117530643939972, "step": 1653 }, { "epoch": 0.4392510954720489, "grad_norm": 1.1751248406507369, "learning_rate": 1.8471160882148417e-05, "loss": 0.3506043553352356, "step": 1654 }, { "epoch": 0.43951666445359183, "grad_norm": 1.1247697965204402, "learning_rate": 1.8468826778320892e-05, "loss": 0.33997148275375366, "step": 1655 }, { "epoch": 0.4397822334351348, "grad_norm": 1.007133328419329, "learning_rate": 1.8466491041803446e-05, "loss": 0.30060335993766785, "step": 1656 }, { "epoch": 0.44004780241667774, "grad_norm": 0.9546594059496064, "learning_rate": 1.846415367304638e-05, "loss": 0.3057805597782135, "step": 1657 }, { "epoch": 0.4403133713982207, "grad_norm": 1.006954520739026, "learning_rate": 1.846181467250031e-05, "loss": 0.30772098898887634, "step": 1658 }, { "epoch": 0.44057894037976364, "grad_norm": 1.043209753174748, "learning_rate": 1.845947404061617e-05, "loss": 0.3183813989162445, "step": 1659 }, { "epoch": 0.4408445093613066, "grad_norm": 1.0413807475941115, "learning_rate": 1.8457131777845204e-05, "loss": 0.2986184358596802, "step": 1660 }, { "epoch": 0.44111007834284954, "grad_norm": 1.0330249735438937, "learning_rate": 1.8454787884638973e-05, "loss": 0.33342432975769043, "step": 1661 }, { "epoch": 0.4413756473243925, "grad_norm": 1.6337494282252796, "learning_rate": 1.8452442361449353e-05, "loss": 0.33435192704200745, "step": 1662 }, { "epoch": 0.44164121630593545, "grad_norm": 1.1084487395338765, "learning_rate": 1.8450095208728537e-05, "loss": 0.31596100330352783, "step": 1663 }, { "epoch": 0.4419067852874784, "grad_norm": 1.0372033094770008, "learning_rate": 1.8447746426929022e-05, "loss": 0.29850512742996216, "step": 1664 }, { "epoch": 0.4421723542690214, "grad_norm": 1.1891933812209383, "learning_rate": 1.8445396016503628e-05, "loss": 0.34898555278778076, "step": 1665 }, { "epoch": 0.44243792325056436, "grad_norm": 1.0486597661615855, "learning_rate": 1.8443043977905484e-05, "loss": 0.283272385597229, "step": 1666 }, { "epoch": 0.4427034922321073, "grad_norm": 1.041766578180328, "learning_rate": 1.844069031158804e-05, "loss": 0.32765433192253113, "step": 1667 }, { "epoch": 0.44296906121365026, "grad_norm": 1.1465241668847563, "learning_rate": 1.8438335018005052e-05, "loss": 0.347957044839859, "step": 1668 }, { "epoch": 0.4432346301951932, "grad_norm": 1.1330493919292772, "learning_rate": 1.8435978097610594e-05, "loss": 0.36188018321990967, "step": 1669 }, { "epoch": 0.44350019917673617, "grad_norm": 1.1541714860130494, "learning_rate": 1.843361955085905e-05, "loss": 0.35944315791130066, "step": 1670 }, { "epoch": 0.4437657681582791, "grad_norm": 1.0564596521414393, "learning_rate": 1.8431259378205122e-05, "loss": 0.33441367745399475, "step": 1671 }, { "epoch": 0.44403133713982207, "grad_norm": 1.1043363461383413, "learning_rate": 1.8428897580103827e-05, "loss": 0.3157849907875061, "step": 1672 }, { "epoch": 0.444296906121365, "grad_norm": 1.0760645254646117, "learning_rate": 1.8426534157010486e-05, "loss": 0.33416497707366943, "step": 1673 }, { "epoch": 0.444562475102908, "grad_norm": 1.1629646905519946, "learning_rate": 1.842416910938074e-05, "loss": 0.3611617684364319, "step": 1674 }, { "epoch": 0.4448280440844509, "grad_norm": 1.079831089952362, "learning_rate": 1.8421802437670546e-05, "loss": 0.3030395805835724, "step": 1675 }, { "epoch": 0.4450936130659939, "grad_norm": 0.9867988845558019, "learning_rate": 1.8419434142336167e-05, "loss": 0.30281510949134827, "step": 1676 }, { "epoch": 0.44535918204753683, "grad_norm": 1.2041533085675928, "learning_rate": 1.8417064223834184e-05, "loss": 0.3489738404750824, "step": 1677 }, { "epoch": 0.4456247510290798, "grad_norm": 1.0320394434428715, "learning_rate": 1.8414692682621487e-05, "loss": 0.30453425645828247, "step": 1678 }, { "epoch": 0.44589032001062273, "grad_norm": 0.9586890082829097, "learning_rate": 1.841231951915528e-05, "loss": 0.28717339038848877, "step": 1679 }, { "epoch": 0.44615588899216574, "grad_norm": 1.0685350052372018, "learning_rate": 1.840994473389309e-05, "loss": 0.3227912187576294, "step": 1680 }, { "epoch": 0.4464214579737087, "grad_norm": 1.0774879432227336, "learning_rate": 1.8407568327292737e-05, "loss": 0.3575928807258606, "step": 1681 }, { "epoch": 0.44668702695525164, "grad_norm": 1.0240612597420884, "learning_rate": 1.840519029981237e-05, "loss": 0.35601454973220825, "step": 1682 }, { "epoch": 0.4469525959367946, "grad_norm": 1.1829639598617365, "learning_rate": 1.8402810651910444e-05, "loss": 0.34867429733276367, "step": 1683 }, { "epoch": 0.44721816491833755, "grad_norm": 1.0185115495756123, "learning_rate": 1.8400429384045724e-05, "loss": 0.3333359360694885, "step": 1684 }, { "epoch": 0.4474837338998805, "grad_norm": 1.1658514468774803, "learning_rate": 1.8398046496677296e-05, "loss": 0.3269057273864746, "step": 1685 }, { "epoch": 0.44774930288142345, "grad_norm": 1.0186865264151983, "learning_rate": 1.839566199026455e-05, "loss": 0.3507213890552521, "step": 1686 }, { "epoch": 0.4480148718629664, "grad_norm": 1.0962029873559684, "learning_rate": 1.8393275865267185e-05, "loss": 0.32935822010040283, "step": 1687 }, { "epoch": 0.44828044084450935, "grad_norm": 1.168811125319112, "learning_rate": 1.8390888122145225e-05, "loss": 0.3780096769332886, "step": 1688 }, { "epoch": 0.4485460098260523, "grad_norm": 1.08432540630583, "learning_rate": 1.8388498761358997e-05, "loss": 0.3412250578403473, "step": 1689 }, { "epoch": 0.44881157880759526, "grad_norm": 1.0725143861051711, "learning_rate": 1.838610778336914e-05, "loss": 0.33751022815704346, "step": 1690 }, { "epoch": 0.4490771477891382, "grad_norm": 1.113628501747759, "learning_rate": 1.8383715188636608e-05, "loss": 0.35736170411109924, "step": 1691 }, { "epoch": 0.44934271677068116, "grad_norm": 1.0608679340591776, "learning_rate": 1.8381320977622664e-05, "loss": 0.3133913278579712, "step": 1692 }, { "epoch": 0.4496082857522241, "grad_norm": 1.0696112323301112, "learning_rate": 1.8378925150788886e-05, "loss": 0.2890821099281311, "step": 1693 }, { "epoch": 0.4498738547337671, "grad_norm": 1.0759892831738864, "learning_rate": 1.8376527708597155e-05, "loss": 0.34016966819763184, "step": 1694 }, { "epoch": 0.45013942371531007, "grad_norm": 1.0933611032669988, "learning_rate": 1.8374128651509676e-05, "loss": 0.3502900302410126, "step": 1695 }, { "epoch": 0.450404992696853, "grad_norm": 1.1956521483077693, "learning_rate": 1.8371727979988957e-05, "loss": 0.31828251481056213, "step": 1696 }, { "epoch": 0.450670561678396, "grad_norm": 1.1739995891800665, "learning_rate": 1.836932569449782e-05, "loss": 0.33322471380233765, "step": 1697 }, { "epoch": 0.4509361306599389, "grad_norm": 0.977715581129718, "learning_rate": 1.8366921795499394e-05, "loss": 0.28489458560943604, "step": 1698 }, { "epoch": 0.4512016996414819, "grad_norm": 1.0351592490047028, "learning_rate": 1.8364516283457127e-05, "loss": 0.3125787079334259, "step": 1699 }, { "epoch": 0.45146726862302483, "grad_norm": 1.6801930060854708, "learning_rate": 1.8362109158834767e-05, "loss": 0.3352596163749695, "step": 1700 }, { "epoch": 0.4517328376045678, "grad_norm": 1.0152758212914303, "learning_rate": 1.8359700422096385e-05, "loss": 0.2986747622489929, "step": 1701 }, { "epoch": 0.45199840658611073, "grad_norm": 1.0704573865215896, "learning_rate": 1.8357290073706355e-05, "loss": 0.3276829123497009, "step": 1702 }, { "epoch": 0.4522639755676537, "grad_norm": 1.05119725558451, "learning_rate": 1.8354878114129368e-05, "loss": 0.3183029890060425, "step": 1703 }, { "epoch": 0.45252954454919664, "grad_norm": 1.0595099003295023, "learning_rate": 1.835246454383041e-05, "loss": 0.32149460911750793, "step": 1704 }, { "epoch": 0.4527951135307396, "grad_norm": 1.0365725372264356, "learning_rate": 1.8350049363274802e-05, "loss": 0.2963859438896179, "step": 1705 }, { "epoch": 0.45306068251228254, "grad_norm": 1.132218144997021, "learning_rate": 1.8347632572928154e-05, "loss": 0.35251080989837646, "step": 1706 }, { "epoch": 0.4533262514938255, "grad_norm": 1.1840188868504486, "learning_rate": 1.8345214173256395e-05, "loss": 0.3585474491119385, "step": 1707 }, { "epoch": 0.4535918204753685, "grad_norm": 1.1792148584627284, "learning_rate": 1.834279416472577e-05, "loss": 0.32339078187942505, "step": 1708 }, { "epoch": 0.45385738945691145, "grad_norm": 1.030916532610971, "learning_rate": 1.8340372547802822e-05, "loss": 0.3473295569419861, "step": 1709 }, { "epoch": 0.4541229584384544, "grad_norm": 1.149162033618886, "learning_rate": 1.833794932295441e-05, "loss": 0.35146117210388184, "step": 1710 }, { "epoch": 0.45438852741999736, "grad_norm": 1.080751163824508, "learning_rate": 1.833552449064771e-05, "loss": 0.29697534441947937, "step": 1711 }, { "epoch": 0.4546540964015403, "grad_norm": 1.0590764839143914, "learning_rate": 1.8333098051350197e-05, "loss": 0.30980685353279114, "step": 1712 }, { "epoch": 0.45491966538308326, "grad_norm": 1.2023264217964575, "learning_rate": 1.8330670005529657e-05, "loss": 0.3271983861923218, "step": 1713 }, { "epoch": 0.4551852343646262, "grad_norm": 1.061456665590969, "learning_rate": 1.8328240353654193e-05, "loss": 0.3421804904937744, "step": 1714 }, { "epoch": 0.45545080334616916, "grad_norm": 0.988281834877126, "learning_rate": 1.8325809096192207e-05, "loss": 0.2949771285057068, "step": 1715 }, { "epoch": 0.4557163723277121, "grad_norm": 1.1467541005281106, "learning_rate": 1.832337623361242e-05, "loss": 0.35578668117523193, "step": 1716 }, { "epoch": 0.45598194130925507, "grad_norm": 1.099618839558401, "learning_rate": 1.832094176638387e-05, "loss": 0.3714647889137268, "step": 1717 }, { "epoch": 0.456247510290798, "grad_norm": 1.116087725713372, "learning_rate": 1.8318505694975877e-05, "loss": 0.36253875494003296, "step": 1718 }, { "epoch": 0.45651307927234097, "grad_norm": 1.0310426822464949, "learning_rate": 1.8316068019858093e-05, "loss": 0.3148016035556793, "step": 1719 }, { "epoch": 0.4567786482538839, "grad_norm": 1.0869949789046671, "learning_rate": 1.8313628741500476e-05, "loss": 0.3420512080192566, "step": 1720 }, { "epoch": 0.4570442172354269, "grad_norm": 1.0955610437646774, "learning_rate": 1.831118786037329e-05, "loss": 0.2941698431968689, "step": 1721 }, { "epoch": 0.4573097862169699, "grad_norm": 0.9987507632564111, "learning_rate": 1.83087453769471e-05, "loss": 0.3033481240272522, "step": 1722 }, { "epoch": 0.45757535519851283, "grad_norm": 1.0508818993675257, "learning_rate": 1.8306301291692798e-05, "loss": 0.3405943810939789, "step": 1723 }, { "epoch": 0.4578409241800558, "grad_norm": 1.0291343903638976, "learning_rate": 1.8303855605081567e-05, "loss": 0.32217931747436523, "step": 1724 }, { "epoch": 0.45810649316159874, "grad_norm": 1.1797464113481113, "learning_rate": 1.8301408317584913e-05, "loss": 0.3627573847770691, "step": 1725 }, { "epoch": 0.4583720621431417, "grad_norm": 1.1425882725361838, "learning_rate": 1.829895942967464e-05, "loss": 0.3512224853038788, "step": 1726 }, { "epoch": 0.45863763112468464, "grad_norm": 1.1358093316461328, "learning_rate": 1.8296508941822868e-05, "loss": 0.35433265566825867, "step": 1727 }, { "epoch": 0.4589032001062276, "grad_norm": 1.1217406683513973, "learning_rate": 1.829405685450202e-05, "loss": 0.33105185627937317, "step": 1728 }, { "epoch": 0.45916876908777055, "grad_norm": 1.0087946676492725, "learning_rate": 1.829160316818483e-05, "loss": 0.31765925884246826, "step": 1729 }, { "epoch": 0.4594343380693135, "grad_norm": 1.0268902541251206, "learning_rate": 1.8289147883344338e-05, "loss": 0.3276101350784302, "step": 1730 }, { "epoch": 0.45969990705085645, "grad_norm": 2.1185922480389676, "learning_rate": 1.8286691000453895e-05, "loss": 0.2921130061149597, "step": 1731 }, { "epoch": 0.4599654760323994, "grad_norm": 0.9680106013727008, "learning_rate": 1.828423251998716e-05, "loss": 0.3025062382221222, "step": 1732 }, { "epoch": 0.46023104501394235, "grad_norm": 1.0299077884479195, "learning_rate": 1.82817724424181e-05, "loss": 0.3128702640533447, "step": 1733 }, { "epoch": 0.4604966139954853, "grad_norm": 0.9957682350134235, "learning_rate": 1.8279310768220987e-05, "loss": 0.31156033277511597, "step": 1734 }, { "epoch": 0.46076218297702826, "grad_norm": 1.0327514294429654, "learning_rate": 1.82768474978704e-05, "loss": 0.30409976840019226, "step": 1735 }, { "epoch": 0.46102775195857126, "grad_norm": 1.0533664417585449, "learning_rate": 1.827438263184124e-05, "loss": 0.305557519197464, "step": 1736 }, { "epoch": 0.4612933209401142, "grad_norm": 1.1216722893854725, "learning_rate": 1.827191617060869e-05, "loss": 0.36079999804496765, "step": 1737 }, { "epoch": 0.46155888992165717, "grad_norm": 1.0546022345807051, "learning_rate": 1.8269448114648264e-05, "loss": 0.3341830372810364, "step": 1738 }, { "epoch": 0.4618244589032001, "grad_norm": 1.0085785444907966, "learning_rate": 1.8266978464435764e-05, "loss": 0.3222450017929077, "step": 1739 }, { "epoch": 0.46209002788474307, "grad_norm": 1.112818872130856, "learning_rate": 1.826450722044732e-05, "loss": 0.34665441513061523, "step": 1740 }, { "epoch": 0.462355596866286, "grad_norm": 1.1112300040840664, "learning_rate": 1.8262034383159357e-05, "loss": 0.31024169921875, "step": 1741 }, { "epoch": 0.462621165847829, "grad_norm": 1.2322752248386413, "learning_rate": 1.8259559953048606e-05, "loss": 0.2950369119644165, "step": 1742 }, { "epoch": 0.4628867348293719, "grad_norm": 1.109045795536776, "learning_rate": 1.8257083930592102e-05, "loss": 0.3378523886203766, "step": 1743 }, { "epoch": 0.4631523038109149, "grad_norm": 0.9899845397184047, "learning_rate": 1.8254606316267204e-05, "loss": 0.2930060923099518, "step": 1744 }, { "epoch": 0.46341787279245783, "grad_norm": 1.079619676645024, "learning_rate": 1.8252127110551564e-05, "loss": 0.3236517012119293, "step": 1745 }, { "epoch": 0.4636834417740008, "grad_norm": 0.9852877201201444, "learning_rate": 1.824964631392314e-05, "loss": 0.3010406196117401, "step": 1746 }, { "epoch": 0.46394901075554373, "grad_norm": 1.0095585954453505, "learning_rate": 1.8247163926860204e-05, "loss": 0.3269607424736023, "step": 1747 }, { "epoch": 0.4642145797370867, "grad_norm": 1.0474961373680607, "learning_rate": 1.8244679949841328e-05, "loss": 0.3437904715538025, "step": 1748 }, { "epoch": 0.46448014871862964, "grad_norm": 1.1512723462780612, "learning_rate": 1.8242194383345394e-05, "loss": 0.37820738554000854, "step": 1749 }, { "epoch": 0.46474571770017264, "grad_norm": 1.0989334641357904, "learning_rate": 1.8239707227851592e-05, "loss": 0.3365899920463562, "step": 1750 }, { "epoch": 0.4650112866817156, "grad_norm": 0.9943228703349263, "learning_rate": 1.8237218483839414e-05, "loss": 0.30418774485588074, "step": 1751 }, { "epoch": 0.46527685566325855, "grad_norm": 0.9379554406122236, "learning_rate": 1.823472815178866e-05, "loss": 0.2923222780227661, "step": 1752 }, { "epoch": 0.4655424246448015, "grad_norm": 1.1096787188742467, "learning_rate": 1.823223623217944e-05, "loss": 0.3358995020389557, "step": 1753 }, { "epoch": 0.46580799362634445, "grad_norm": 1.0997620749237405, "learning_rate": 1.822974272549216e-05, "loss": 0.3413343131542206, "step": 1754 }, { "epoch": 0.4660735626078874, "grad_norm": 1.0873990469892099, "learning_rate": 1.822724763220755e-05, "loss": 0.33553364872932434, "step": 1755 }, { "epoch": 0.46633913158943036, "grad_norm": 1.0957210856960815, "learning_rate": 1.8224750952806626e-05, "loss": 0.35896626114845276, "step": 1756 }, { "epoch": 0.4666047005709733, "grad_norm": 1.1032076691430248, "learning_rate": 1.8222252687770718e-05, "loss": 0.35345566272735596, "step": 1757 }, { "epoch": 0.46687026955251626, "grad_norm": 1.0034635235769087, "learning_rate": 1.8219752837581466e-05, "loss": 0.3146013617515564, "step": 1758 }, { "epoch": 0.4671358385340592, "grad_norm": 1.0191336075935247, "learning_rate": 1.8217251402720807e-05, "loss": 0.33270642161369324, "step": 1759 }, { "epoch": 0.46740140751560216, "grad_norm": 1.030475428136688, "learning_rate": 1.821474838367099e-05, "loss": 0.3172033727169037, "step": 1760 }, { "epoch": 0.4676669764971451, "grad_norm": 1.6535016363051902, "learning_rate": 1.8212243780914578e-05, "loss": 0.3277033567428589, "step": 1761 }, { "epoch": 0.46793254547868807, "grad_norm": 1.1570228647748637, "learning_rate": 1.820973759493441e-05, "loss": 0.3523799777030945, "step": 1762 }, { "epoch": 0.468198114460231, "grad_norm": 1.0907259849913267, "learning_rate": 1.8207229826213664e-05, "loss": 0.32437676191329956, "step": 1763 }, { "epoch": 0.468463683441774, "grad_norm": 1.1347618214788342, "learning_rate": 1.82047204752358e-05, "loss": 0.34185051918029785, "step": 1764 }, { "epoch": 0.468729252423317, "grad_norm": 1.0561382700570243, "learning_rate": 1.8202209542484594e-05, "loss": 0.32034197449684143, "step": 1765 }, { "epoch": 0.46899482140485993, "grad_norm": 1.097207173265362, "learning_rate": 1.8199697028444125e-05, "loss": 0.30969515442848206, "step": 1766 }, { "epoch": 0.4692603903864029, "grad_norm": 0.9320632629292236, "learning_rate": 1.8197182933598776e-05, "loss": 0.24751389026641846, "step": 1767 }, { "epoch": 0.46952595936794583, "grad_norm": 1.2001835130139573, "learning_rate": 1.8194667258433235e-05, "loss": 0.3859948217868805, "step": 1768 }, { "epoch": 0.4697915283494888, "grad_norm": 1.0989779617923678, "learning_rate": 1.819215000343249e-05, "loss": 0.29364967346191406, "step": 1769 }, { "epoch": 0.47005709733103174, "grad_norm": 1.1161641657952082, "learning_rate": 1.8189631169081845e-05, "loss": 0.3560323715209961, "step": 1770 }, { "epoch": 0.4703226663125747, "grad_norm": 1.6505675097600017, "learning_rate": 1.8187110755866898e-05, "loss": 0.3458098769187927, "step": 1771 }, { "epoch": 0.47058823529411764, "grad_norm": 1.0148526914708587, "learning_rate": 1.8184588764273555e-05, "loss": 0.32131001353263855, "step": 1772 }, { "epoch": 0.4708538042756606, "grad_norm": 1.0453234866463608, "learning_rate": 1.8182065194788024e-05, "loss": 0.3011054992675781, "step": 1773 }, { "epoch": 0.47111937325720354, "grad_norm": 1.1076832582073854, "learning_rate": 1.8179540047896827e-05, "loss": 0.3314674496650696, "step": 1774 }, { "epoch": 0.4713849422387465, "grad_norm": 1.0853788387965118, "learning_rate": 1.8177013324086774e-05, "loss": 0.3437536060810089, "step": 1775 }, { "epoch": 0.47165051122028945, "grad_norm": 1.166112048160084, "learning_rate": 1.8174485023844993e-05, "loss": 0.36137935519218445, "step": 1776 }, { "epoch": 0.4719160802018324, "grad_norm": 1.0726359370167762, "learning_rate": 1.8171955147658905e-05, "loss": 0.34018874168395996, "step": 1777 }, { "epoch": 0.4721816491833754, "grad_norm": 1.0596665602066746, "learning_rate": 1.8169423696016245e-05, "loss": 0.33298587799072266, "step": 1778 }, { "epoch": 0.47244721816491836, "grad_norm": 1.1107712039752602, "learning_rate": 1.816689066940505e-05, "loss": 0.3649418354034424, "step": 1779 }, { "epoch": 0.4727127871464613, "grad_norm": 1.0148859742506888, "learning_rate": 1.8164356068313646e-05, "loss": 0.32419171929359436, "step": 1780 }, { "epoch": 0.47297835612800426, "grad_norm": 1.047167823612948, "learning_rate": 1.8161819893230688e-05, "loss": 0.288555383682251, "step": 1781 }, { "epoch": 0.4732439251095472, "grad_norm": 1.005455205363293, "learning_rate": 1.815928214464511e-05, "loss": 0.3231011629104614, "step": 1782 }, { "epoch": 0.47350949409109017, "grad_norm": 1.0470674131364166, "learning_rate": 1.815674282304617e-05, "loss": 0.29310134053230286, "step": 1783 }, { "epoch": 0.4737750630726331, "grad_norm": 1.0390137248114197, "learning_rate": 1.815420192892341e-05, "loss": 0.32683852314949036, "step": 1784 }, { "epoch": 0.47404063205417607, "grad_norm": 1.0353379429668699, "learning_rate": 1.8151659462766685e-05, "loss": 0.3200969099998474, "step": 1785 }, { "epoch": 0.474306201035719, "grad_norm": 1.051359679014311, "learning_rate": 1.814911542506616e-05, "loss": 0.3091360032558441, "step": 1786 }, { "epoch": 0.474571770017262, "grad_norm": 1.1630088603070372, "learning_rate": 1.814656981631229e-05, "loss": 0.3679049611091614, "step": 1787 }, { "epoch": 0.4748373389988049, "grad_norm": 1.1065634125772459, "learning_rate": 1.814402263699584e-05, "loss": 0.290119469165802, "step": 1788 }, { "epoch": 0.4751029079803479, "grad_norm": 1.0987492456650414, "learning_rate": 1.8141473887607874e-05, "loss": 0.31878861784935, "step": 1789 }, { "epoch": 0.47536847696189083, "grad_norm": 1.1254389921885528, "learning_rate": 1.8138923568639763e-05, "loss": 0.35820287466049194, "step": 1790 }, { "epoch": 0.4756340459434338, "grad_norm": 1.0046454439717083, "learning_rate": 1.8136371680583176e-05, "loss": 0.2924647629261017, "step": 1791 }, { "epoch": 0.4758996149249768, "grad_norm": 1.2202907606610718, "learning_rate": 1.8133818223930092e-05, "loss": 0.3799927234649658, "step": 1792 }, { "epoch": 0.47616518390651974, "grad_norm": 1.1097316301591598, "learning_rate": 1.8131263199172783e-05, "loss": 0.3505420386791229, "step": 1793 }, { "epoch": 0.4764307528880627, "grad_norm": 1.1021438648339534, "learning_rate": 1.8128706606803823e-05, "loss": 0.3291688859462738, "step": 1794 }, { "epoch": 0.47669632186960564, "grad_norm": 1.0814065231113215, "learning_rate": 1.8126148447316104e-05, "loss": 0.34079697728157043, "step": 1795 }, { "epoch": 0.4769618908511486, "grad_norm": 1.2185578909639558, "learning_rate": 1.8123588721202802e-05, "loss": 0.2898064851760864, "step": 1796 }, { "epoch": 0.47722745983269155, "grad_norm": 1.0448194415877836, "learning_rate": 1.8121027428957402e-05, "loss": 0.32089224457740784, "step": 1797 }, { "epoch": 0.4774930288142345, "grad_norm": 1.903396083379018, "learning_rate": 1.8118464571073697e-05, "loss": 0.3402039408683777, "step": 1798 }, { "epoch": 0.47775859779577745, "grad_norm": 1.1693256768707747, "learning_rate": 1.8115900148045767e-05, "loss": 0.29904159903526306, "step": 1799 }, { "epoch": 0.4780241667773204, "grad_norm": 1.0688058843932313, "learning_rate": 1.8113334160368007e-05, "loss": 0.34074240922927856, "step": 1800 }, { "epoch": 0.47828973575886335, "grad_norm": 1.0404364284009804, "learning_rate": 1.811076660853511e-05, "loss": 0.28566253185272217, "step": 1801 }, { "epoch": 0.4785553047404063, "grad_norm": 1.0267154270839738, "learning_rate": 1.8108197493042065e-05, "loss": 0.34523358941078186, "step": 1802 }, { "epoch": 0.47882087372194926, "grad_norm": 1.0082361251695107, "learning_rate": 1.8105626814384173e-05, "loss": 0.3261171281337738, "step": 1803 }, { "epoch": 0.4790864427034922, "grad_norm": 1.0353580811121572, "learning_rate": 1.8103054573057027e-05, "loss": 0.2915942966938019, "step": 1804 }, { "epoch": 0.47935201168503516, "grad_norm": 1.117140176261941, "learning_rate": 1.810048076955653e-05, "loss": 0.2999255657196045, "step": 1805 }, { "epoch": 0.47961758066657817, "grad_norm": 1.0967176640726466, "learning_rate": 1.8097905404378874e-05, "loss": 0.3294594883918762, "step": 1806 }, { "epoch": 0.4798831496481211, "grad_norm": 1.025641731681811, "learning_rate": 1.8095328478020563e-05, "loss": 0.30720093846321106, "step": 1807 }, { "epoch": 0.4801487186296641, "grad_norm": 1.0583824100775536, "learning_rate": 1.8092749990978395e-05, "loss": 0.31076985597610474, "step": 1808 }, { "epoch": 0.480414287611207, "grad_norm": 1.0650372083327142, "learning_rate": 1.8090169943749477e-05, "loss": 0.3182013928890228, "step": 1809 }, { "epoch": 0.48067985659275, "grad_norm": 1.1560421045272382, "learning_rate": 1.8087588336831206e-05, "loss": 0.325716108083725, "step": 1810 }, { "epoch": 0.48094542557429293, "grad_norm": 1.034822212222003, "learning_rate": 1.8085005170721287e-05, "loss": 0.3148769736289978, "step": 1811 }, { "epoch": 0.4812109945558359, "grad_norm": 0.9998987744353804, "learning_rate": 1.8082420445917727e-05, "loss": 0.30645644664764404, "step": 1812 }, { "epoch": 0.48147656353737883, "grad_norm": 0.9765412034449941, "learning_rate": 1.807983416291883e-05, "loss": 0.2978900969028473, "step": 1813 }, { "epoch": 0.4817421325189218, "grad_norm": 1.1281577444413164, "learning_rate": 1.8077246322223194e-05, "loss": 0.34340181946754456, "step": 1814 }, { "epoch": 0.48200770150046474, "grad_norm": 1.0940690010095575, "learning_rate": 1.8074656924329733e-05, "loss": 0.3272106349468231, "step": 1815 }, { "epoch": 0.4822732704820077, "grad_norm": 1.0823130111098402, "learning_rate": 1.807206596973765e-05, "loss": 0.31061962246894836, "step": 1816 }, { "epoch": 0.48253883946355064, "grad_norm": 1.1134329507970786, "learning_rate": 1.8069473458946445e-05, "loss": 0.28947243094444275, "step": 1817 }, { "epoch": 0.4828044084450936, "grad_norm": 1.066867737773279, "learning_rate": 1.8066879392455932e-05, "loss": 0.35057532787323, "step": 1818 }, { "epoch": 0.48306997742663654, "grad_norm": 1.5202577425125505, "learning_rate": 1.8064283770766212e-05, "loss": 0.31032001972198486, "step": 1819 }, { "epoch": 0.48333554640817955, "grad_norm": 1.1166414917810035, "learning_rate": 1.8061686594377685e-05, "loss": 0.3802293539047241, "step": 1820 }, { "epoch": 0.4836011153897225, "grad_norm": 1.122052528401037, "learning_rate": 1.8059087863791066e-05, "loss": 0.3306402564048767, "step": 1821 }, { "epoch": 0.48386668437126545, "grad_norm": 1.051177925612534, "learning_rate": 1.8056487579507352e-05, "loss": 0.32170724868774414, "step": 1822 }, { "epoch": 0.4841322533528084, "grad_norm": 1.0182895505748566, "learning_rate": 1.8053885742027854e-05, "loss": 0.35058924555778503, "step": 1823 }, { "epoch": 0.48439782233435136, "grad_norm": 1.079491665486815, "learning_rate": 1.8051282351854168e-05, "loss": 0.3796595335006714, "step": 1824 }, { "epoch": 0.4846633913158943, "grad_norm": 1.0882057457557335, "learning_rate": 1.8048677409488205e-05, "loss": 0.28997284173965454, "step": 1825 }, { "epoch": 0.48492896029743726, "grad_norm": 1.7307038017833063, "learning_rate": 1.804607091543216e-05, "loss": 0.35110151767730713, "step": 1826 }, { "epoch": 0.4851945292789802, "grad_norm": 1.1036882170711018, "learning_rate": 1.8043462870188535e-05, "loss": 0.3194088637828827, "step": 1827 }, { "epoch": 0.48546009826052317, "grad_norm": 1.0664676604065728, "learning_rate": 1.8040853274260137e-05, "loss": 0.28777945041656494, "step": 1828 }, { "epoch": 0.4857256672420661, "grad_norm": 1.0702584286398438, "learning_rate": 1.803824212815006e-05, "loss": 0.3642069697380066, "step": 1829 }, { "epoch": 0.48599123622360907, "grad_norm": 1.0626897024145745, "learning_rate": 1.80356294323617e-05, "loss": 0.32396575808525085, "step": 1830 }, { "epoch": 0.486256805205152, "grad_norm": 1.205959051296984, "learning_rate": 1.8033015187398758e-05, "loss": 0.36421436071395874, "step": 1831 }, { "epoch": 0.486522374186695, "grad_norm": 1.0011906322370974, "learning_rate": 1.8030399393765227e-05, "loss": 0.3170832395553589, "step": 1832 }, { "epoch": 0.4867879431682379, "grad_norm": 0.9739220394650455, "learning_rate": 1.8027782051965408e-05, "loss": 0.3003416359424591, "step": 1833 }, { "epoch": 0.48705351214978093, "grad_norm": 1.0701369618567955, "learning_rate": 1.802516316250388e-05, "loss": 0.30362898111343384, "step": 1834 }, { "epoch": 0.4873190811313239, "grad_norm": 1.0466563888798912, "learning_rate": 1.802254272588555e-05, "loss": 0.32721444964408875, "step": 1835 }, { "epoch": 0.48758465011286684, "grad_norm": 1.345049864677536, "learning_rate": 1.8019920742615596e-05, "loss": 0.317483514547348, "step": 1836 }, { "epoch": 0.4878502190944098, "grad_norm": 1.0589953518283157, "learning_rate": 1.801729721319951e-05, "loss": 0.2928479015827179, "step": 1837 }, { "epoch": 0.48811578807595274, "grad_norm": 1.1098495840377043, "learning_rate": 1.8014672138143073e-05, "loss": 0.3425772190093994, "step": 1838 }, { "epoch": 0.4883813570574957, "grad_norm": 1.0286414092040284, "learning_rate": 1.801204551795238e-05, "loss": 0.334087997674942, "step": 1839 }, { "epoch": 0.48864692603903864, "grad_norm": 1.0797374159140127, "learning_rate": 1.80094173531338e-05, "loss": 0.3186641335487366, "step": 1840 }, { "epoch": 0.4889124950205816, "grad_norm": 1.0361897985848911, "learning_rate": 1.800678764419401e-05, "loss": 0.3153733015060425, "step": 1841 }, { "epoch": 0.48917806400212455, "grad_norm": 1.070217807683518, "learning_rate": 1.8004156391640004e-05, "loss": 0.3323214054107666, "step": 1842 }, { "epoch": 0.4894436329836675, "grad_norm": 0.9455521865874897, "learning_rate": 1.8001523595979043e-05, "loss": 0.2856762409210205, "step": 1843 }, { "epoch": 0.48970920196521045, "grad_norm": 1.0256135363684138, "learning_rate": 1.79988892577187e-05, "loss": 0.32493725419044495, "step": 1844 }, { "epoch": 0.4899747709467534, "grad_norm": 1.1082860888483268, "learning_rate": 1.7996253377366846e-05, "loss": 0.350448876619339, "step": 1845 }, { "epoch": 0.49024033992829635, "grad_norm": 1.096249407467401, "learning_rate": 1.7993615955431648e-05, "loss": 0.32246965169906616, "step": 1846 }, { "epoch": 0.4905059089098393, "grad_norm": 0.9715072313794847, "learning_rate": 1.799097699242157e-05, "loss": 0.302636057138443, "step": 1847 }, { "epoch": 0.4907714778913823, "grad_norm": 1.1573319310132777, "learning_rate": 1.7988336488845374e-05, "loss": 0.34280693531036377, "step": 1848 }, { "epoch": 0.49103704687292526, "grad_norm": 1.1205814585182334, "learning_rate": 1.7985694445212118e-05, "loss": 0.3650673031806946, "step": 1849 }, { "epoch": 0.4913026158544682, "grad_norm": 1.1348057531260405, "learning_rate": 1.798305086203115e-05, "loss": 0.33800822496414185, "step": 1850 }, { "epoch": 0.49156818483601117, "grad_norm": 1.0428655272942455, "learning_rate": 1.7980405739812134e-05, "loss": 0.31522083282470703, "step": 1851 }, { "epoch": 0.4918337538175541, "grad_norm": 1.177464907100392, "learning_rate": 1.7977759079065003e-05, "loss": 0.3374335765838623, "step": 1852 }, { "epoch": 0.49209932279909707, "grad_norm": 1.060278247692231, "learning_rate": 1.7975110880300018e-05, "loss": 0.33803191781044006, "step": 1853 }, { "epoch": 0.49236489178064, "grad_norm": 1.0982376140773644, "learning_rate": 1.797246114402771e-05, "loss": 0.37764933705329895, "step": 1854 }, { "epoch": 0.492630460762183, "grad_norm": 0.9654297547716862, "learning_rate": 1.796980987075892e-05, "loss": 0.3075840473175049, "step": 1855 }, { "epoch": 0.4928960297437259, "grad_norm": 0.9768928030686648, "learning_rate": 1.7967157061004782e-05, "loss": 0.306305855512619, "step": 1856 }, { "epoch": 0.4931615987252689, "grad_norm": 1.0225684543938522, "learning_rate": 1.796450271527673e-05, "loss": 0.3474302291870117, "step": 1857 }, { "epoch": 0.49342716770681183, "grad_norm": 1.0243106870487633, "learning_rate": 1.7961846834086483e-05, "loss": 0.31059685349464417, "step": 1858 }, { "epoch": 0.4936927366883548, "grad_norm": 1.0236396527349367, "learning_rate": 1.795918941794607e-05, "loss": 0.346218079328537, "step": 1859 }, { "epoch": 0.49395830566989773, "grad_norm": 0.9969229384493907, "learning_rate": 1.7956530467367805e-05, "loss": 0.28371214866638184, "step": 1860 }, { "epoch": 0.4942238746514407, "grad_norm": 0.8979156608776232, "learning_rate": 1.7953869982864306e-05, "loss": 0.27775150537490845, "step": 1861 }, { "epoch": 0.4944894436329837, "grad_norm": 1.279703247293047, "learning_rate": 1.795120796494848e-05, "loss": 0.328782856464386, "step": 1862 }, { "epoch": 0.49475501261452665, "grad_norm": 1.0950381369417217, "learning_rate": 1.7948544414133534e-05, "loss": 0.33220064640045166, "step": 1863 }, { "epoch": 0.4950205815960696, "grad_norm": 1.0528449584388764, "learning_rate": 1.794587933093297e-05, "loss": 0.32681554555892944, "step": 1864 }, { "epoch": 0.49528615057761255, "grad_norm": 1.1023465974826758, "learning_rate": 1.7943212715860586e-05, "loss": 0.32202866673469543, "step": 1865 }, { "epoch": 0.4955517195591555, "grad_norm": 2.266456857585339, "learning_rate": 1.7940544569430468e-05, "loss": 0.3051350712776184, "step": 1866 }, { "epoch": 0.49581728854069845, "grad_norm": 1.1617568134775966, "learning_rate": 1.793787489215701e-05, "loss": 0.3924705386161804, "step": 1867 }, { "epoch": 0.4960828575222414, "grad_norm": 1.018817969430421, "learning_rate": 1.793520368455489e-05, "loss": 0.30267882347106934, "step": 1868 }, { "epoch": 0.49634842650378436, "grad_norm": 1.0585020042998596, "learning_rate": 1.793253094713909e-05, "loss": 0.3150729238986969, "step": 1869 }, { "epoch": 0.4966139954853273, "grad_norm": 1.314679145900761, "learning_rate": 1.7929856680424872e-05, "loss": 0.33814147114753723, "step": 1870 }, { "epoch": 0.49687956446687026, "grad_norm": 1.010460021909887, "learning_rate": 1.7927180884927814e-05, "loss": 0.31929856538772583, "step": 1871 }, { "epoch": 0.4971451334484132, "grad_norm": 1.1376790681693039, "learning_rate": 1.7924503561163775e-05, "loss": 0.3797461688518524, "step": 1872 }, { "epoch": 0.49741070242995616, "grad_norm": 1.057594588942085, "learning_rate": 1.792182470964891e-05, "loss": 0.3056377172470093, "step": 1873 }, { "epoch": 0.4976762714114991, "grad_norm": 1.1254473942016883, "learning_rate": 1.7919144330899668e-05, "loss": 0.3526398539543152, "step": 1874 }, { "epoch": 0.49794184039304207, "grad_norm": 1.0289140670533532, "learning_rate": 1.79164624254328e-05, "loss": 0.3183595538139343, "step": 1875 }, { "epoch": 0.4982074093745851, "grad_norm": 1.1908370019011798, "learning_rate": 1.791377899376534e-05, "loss": 0.3604113459587097, "step": 1876 }, { "epoch": 0.498472978356128, "grad_norm": 1.1651856770093412, "learning_rate": 1.7911094036414623e-05, "loss": 0.3219848573207855, "step": 1877 }, { "epoch": 0.498738547337671, "grad_norm": 1.0586801467718077, "learning_rate": 1.7908407553898282e-05, "loss": 0.28773394227027893, "step": 1878 }, { "epoch": 0.49900411631921393, "grad_norm": 1.0649509880321448, "learning_rate": 1.7905719546734233e-05, "loss": 0.31453996896743774, "step": 1879 }, { "epoch": 0.4992696853007569, "grad_norm": 0.9878415524405192, "learning_rate": 1.7903030015440696e-05, "loss": 0.2947153151035309, "step": 1880 }, { "epoch": 0.49953525428229983, "grad_norm": 1.0652111521233423, "learning_rate": 1.7900338960536178e-05, "loss": 0.313723087310791, "step": 1881 }, { "epoch": 0.4998008232638428, "grad_norm": 1.0853994840945123, "learning_rate": 1.7897646382539485e-05, "loss": 0.3385108709335327, "step": 1882 }, { "epoch": 0.5000663922453857, "grad_norm": 1.0993457819479324, "learning_rate": 1.7894952281969712e-05, "loss": 0.31417039036750793, "step": 1883 }, { "epoch": 0.5003319612269287, "grad_norm": 1.1452192213941934, "learning_rate": 1.7892256659346253e-05, "loss": 0.3555717468261719, "step": 1884 }, { "epoch": 0.5005975302084716, "grad_norm": 1.1989261836629121, "learning_rate": 1.7889559515188793e-05, "loss": 0.3724518120288849, "step": 1885 }, { "epoch": 0.5008630991900146, "grad_norm": 1.0516015708006068, "learning_rate": 1.7886860850017306e-05, "loss": 0.32646167278289795, "step": 1886 }, { "epoch": 0.5011286681715575, "grad_norm": 1.079300223054909, "learning_rate": 1.7884160664352062e-05, "loss": 0.31072959303855896, "step": 1887 }, { "epoch": 0.5013942371531005, "grad_norm": 0.9518526173941219, "learning_rate": 1.7881458958713628e-05, "loss": 0.26987242698669434, "step": 1888 }, { "epoch": 0.5016598061346434, "grad_norm": 0.9908294117764815, "learning_rate": 1.787875573362286e-05, "loss": 0.30105817317962646, "step": 1889 }, { "epoch": 0.5019253751161864, "grad_norm": 1.0444226583374554, "learning_rate": 1.7876050989600908e-05, "loss": 0.31277188658714294, "step": 1890 }, { "epoch": 0.5021909440977294, "grad_norm": 1.0192470233304842, "learning_rate": 1.7873344727169214e-05, "loss": 0.31068161129951477, "step": 1891 }, { "epoch": 0.5024565130792723, "grad_norm": 1.0797105219167356, "learning_rate": 1.7870636946849512e-05, "loss": 0.3491121530532837, "step": 1892 }, { "epoch": 0.5027220820608153, "grad_norm": 1.0753654491775293, "learning_rate": 1.7867927649163838e-05, "loss": 0.3223581612110138, "step": 1893 }, { "epoch": 0.5029876510423582, "grad_norm": 1.1295999155195493, "learning_rate": 1.7865216834634506e-05, "loss": 0.345224529504776, "step": 1894 }, { "epoch": 0.5032532200239012, "grad_norm": 1.1419032071310418, "learning_rate": 1.7862504503784123e-05, "loss": 0.3408205211162567, "step": 1895 }, { "epoch": 0.5035187890054441, "grad_norm": 0.9713066472066385, "learning_rate": 1.7859790657135608e-05, "loss": 0.2680068016052246, "step": 1896 }, { "epoch": 0.5037843579869872, "grad_norm": 0.9186813995364894, "learning_rate": 1.7857075295212148e-05, "loss": 0.29733535647392273, "step": 1897 }, { "epoch": 0.5040499269685301, "grad_norm": 1.1196248802118025, "learning_rate": 1.785435841853724e-05, "loss": 0.34820133447647095, "step": 1898 }, { "epoch": 0.5043154959500731, "grad_norm": 1.134445876132798, "learning_rate": 1.785164002763466e-05, "loss": 0.3306594491004944, "step": 1899 }, { "epoch": 0.504581064931616, "grad_norm": 1.0579272410020724, "learning_rate": 1.7848920123028482e-05, "loss": 0.3166846036911011, "step": 1900 }, { "epoch": 0.504846633913159, "grad_norm": 1.2213509498849395, "learning_rate": 1.784619870524308e-05, "loss": 0.3406408727169037, "step": 1901 }, { "epoch": 0.5051122028947019, "grad_norm": 1.0410168562106317, "learning_rate": 1.78434757748031e-05, "loss": 0.36358171701431274, "step": 1902 }, { "epoch": 0.5053777718762449, "grad_norm": 1.0510382236040618, "learning_rate": 1.7840751332233498e-05, "loss": 0.34045761823654175, "step": 1903 }, { "epoch": 0.5056433408577878, "grad_norm": 1.0566120463915532, "learning_rate": 1.783802537805951e-05, "loss": 0.3442475199699402, "step": 1904 }, { "epoch": 0.5059089098393308, "grad_norm": 1.1632822330113848, "learning_rate": 1.7835297912806675e-05, "loss": 0.3488585650920868, "step": 1905 }, { "epoch": 0.5061744788208737, "grad_norm": 1.098650773563784, "learning_rate": 1.7832568937000808e-05, "loss": 0.3340107500553131, "step": 1906 }, { "epoch": 0.5064400478024167, "grad_norm": 1.0195614065654457, "learning_rate": 1.7829838451168027e-05, "loss": 0.3206177353858948, "step": 1907 }, { "epoch": 0.5067056167839596, "grad_norm": 1.0219563874782234, "learning_rate": 1.782710645583473e-05, "loss": 0.2851010262966156, "step": 1908 }, { "epoch": 0.5069711857655026, "grad_norm": 1.0249326570563306, "learning_rate": 1.782437295152763e-05, "loss": 0.31850844621658325, "step": 1909 }, { "epoch": 0.5072367547470455, "grad_norm": 1.0890541355083159, "learning_rate": 1.7821637938773704e-05, "loss": 0.3343108892440796, "step": 1910 }, { "epoch": 0.5075023237285885, "grad_norm": 1.1131994842325255, "learning_rate": 1.781890141810023e-05, "loss": 0.3423745930194855, "step": 1911 }, { "epoch": 0.5077678927101315, "grad_norm": 1.057536319451762, "learning_rate": 1.7816163390034775e-05, "loss": 0.30980780720710754, "step": 1912 }, { "epoch": 0.5080334616916744, "grad_norm": 1.0099692843485935, "learning_rate": 1.7813423855105203e-05, "loss": 0.31217479705810547, "step": 1913 }, { "epoch": 0.5082990306732174, "grad_norm": 1.0721675523916532, "learning_rate": 1.7810682813839664e-05, "loss": 0.34741947054862976, "step": 1914 }, { "epoch": 0.5085645996547603, "grad_norm": 1.1098427332228447, "learning_rate": 1.7807940266766595e-05, "loss": 0.32275527715682983, "step": 1915 }, { "epoch": 0.5088301686363033, "grad_norm": 1.1130434711054393, "learning_rate": 1.7805196214414728e-05, "loss": 0.32760411500930786, "step": 1916 }, { "epoch": 0.5090957376178462, "grad_norm": 1.1445787919507704, "learning_rate": 1.7802450657313086e-05, "loss": 0.3877720832824707, "step": 1917 }, { "epoch": 0.5093613065993892, "grad_norm": 1.1135916509560913, "learning_rate": 1.779970359599098e-05, "loss": 0.33458876609802246, "step": 1918 }, { "epoch": 0.5096268755809321, "grad_norm": 0.9826034605244246, "learning_rate": 1.7796955030978007e-05, "loss": 0.30603206157684326, "step": 1919 }, { "epoch": 0.5098924445624751, "grad_norm": 0.9902684589377142, "learning_rate": 1.7794204962804063e-05, "loss": 0.2920286953449249, "step": 1920 }, { "epoch": 0.510158013544018, "grad_norm": 1.1034173597508874, "learning_rate": 1.7791453391999325e-05, "loss": 0.32407981157302856, "step": 1921 }, { "epoch": 0.510423582525561, "grad_norm": 1.3200648964540613, "learning_rate": 1.7788700319094263e-05, "loss": 0.30423563718795776, "step": 1922 }, { "epoch": 0.5106891515071039, "grad_norm": 1.1213502448496324, "learning_rate": 1.7785945744619642e-05, "loss": 0.34691399335861206, "step": 1923 }, { "epoch": 0.5109547204886469, "grad_norm": 1.0498801582672959, "learning_rate": 1.7783189669106503e-05, "loss": 0.3217603266239166, "step": 1924 }, { "epoch": 0.5112202894701899, "grad_norm": 1.1943957961346587, "learning_rate": 1.7780432093086198e-05, "loss": 0.365132212638855, "step": 1925 }, { "epoch": 0.5114858584517329, "grad_norm": 0.9783494867108459, "learning_rate": 1.7777673017090344e-05, "loss": 0.29662930965423584, "step": 1926 }, { "epoch": 0.5117514274332758, "grad_norm": 1.0707541061431447, "learning_rate": 1.7774912441650857e-05, "loss": 0.3324819803237915, "step": 1927 }, { "epoch": 0.5120169964148188, "grad_norm": 1.0040789031204058, "learning_rate": 1.7772150367299953e-05, "loss": 0.29331067204475403, "step": 1928 }, { "epoch": 0.5122825653963617, "grad_norm": 1.064062495235822, "learning_rate": 1.7769386794570117e-05, "loss": 0.3158259987831116, "step": 1929 }, { "epoch": 0.5125481343779047, "grad_norm": 1.020159871349018, "learning_rate": 1.7766621723994145e-05, "loss": 0.2824791967868805, "step": 1930 }, { "epoch": 0.5128137033594476, "grad_norm": 1.0493215169042918, "learning_rate": 1.7763855156105097e-05, "loss": 0.2690732777118683, "step": 1931 }, { "epoch": 0.5130792723409906, "grad_norm": 1.043157004637876, "learning_rate": 1.7761087091436346e-05, "loss": 0.31360942125320435, "step": 1932 }, { "epoch": 0.5133448413225336, "grad_norm": 0.9858891902519169, "learning_rate": 1.7758317530521535e-05, "loss": 0.28334349393844604, "step": 1933 }, { "epoch": 0.5136104103040765, "grad_norm": 1.1739380172138798, "learning_rate": 1.7755546473894604e-05, "loss": 0.3857404589653015, "step": 1934 }, { "epoch": 0.5138759792856195, "grad_norm": 1.0280582546011092, "learning_rate": 1.7752773922089784e-05, "loss": 0.2852492332458496, "step": 1935 }, { "epoch": 0.5141415482671624, "grad_norm": 1.003050995152578, "learning_rate": 1.7749999875641585e-05, "loss": 0.2959831953048706, "step": 1936 }, { "epoch": 0.5144071172487054, "grad_norm": 1.100974201889633, "learning_rate": 1.7747224335084815e-05, "loss": 0.3129635453224182, "step": 1937 }, { "epoch": 0.5146726862302483, "grad_norm": 1.0336946735940622, "learning_rate": 1.774444730095456e-05, "loss": 0.31391531229019165, "step": 1938 }, { "epoch": 0.5149382552117913, "grad_norm": 1.0155253897885985, "learning_rate": 1.7741668773786202e-05, "loss": 0.30274757742881775, "step": 1939 }, { "epoch": 0.5152038241933342, "grad_norm": 1.026561688701391, "learning_rate": 1.7738888754115413e-05, "loss": 0.29162222146987915, "step": 1940 }, { "epoch": 0.5154693931748772, "grad_norm": 1.045931473256506, "learning_rate": 1.7736107242478143e-05, "loss": 0.30358970165252686, "step": 1941 }, { "epoch": 0.5157349621564201, "grad_norm": 1.11915386227621, "learning_rate": 1.7733324239410634e-05, "loss": 0.32268065214157104, "step": 1942 }, { "epoch": 0.5160005311379631, "grad_norm": 1.0626040245012975, "learning_rate": 1.7730539745449417e-05, "loss": 0.31925222277641296, "step": 1943 }, { "epoch": 0.516266100119506, "grad_norm": 1.1170224886553113, "learning_rate": 1.7727753761131312e-05, "loss": 0.32883748412132263, "step": 1944 }, { "epoch": 0.516531669101049, "grad_norm": 1.101510406621582, "learning_rate": 1.7724966286993425e-05, "loss": 0.3212829530239105, "step": 1945 }, { "epoch": 0.5167972380825919, "grad_norm": 1.1477333753851342, "learning_rate": 1.772217732357314e-05, "loss": 0.32909759879112244, "step": 1946 }, { "epoch": 0.5170628070641349, "grad_norm": 33.3722959000957, "learning_rate": 1.7719386871408147e-05, "loss": 0.3451213538646698, "step": 1947 }, { "epoch": 0.5173283760456778, "grad_norm": 1.0792459943819739, "learning_rate": 1.7716594931036402e-05, "loss": 0.318422794342041, "step": 1948 }, { "epoch": 0.5175939450272208, "grad_norm": 1.1243494025490273, "learning_rate": 1.7713801502996166e-05, "loss": 0.3165292739868164, "step": 1949 }, { "epoch": 0.5178595140087637, "grad_norm": 1.1353818628503742, "learning_rate": 1.7711006587825975e-05, "loss": 0.3116700351238251, "step": 1950 }, { "epoch": 0.5181250829903067, "grad_norm": 1.2005138291757869, "learning_rate": 1.7708210186064656e-05, "loss": 0.32102686166763306, "step": 1951 }, { "epoch": 0.5183906519718496, "grad_norm": 1.079523368082095, "learning_rate": 1.7705412298251323e-05, "loss": 0.33025500178337097, "step": 1952 }, { "epoch": 0.5186562209533926, "grad_norm": 1.2087703844513067, "learning_rate": 1.7702612924925377e-05, "loss": 0.36113062500953674, "step": 1953 }, { "epoch": 0.5189217899349357, "grad_norm": 1.1242566727618883, "learning_rate": 1.7699812066626503e-05, "loss": 0.3092479109764099, "step": 1954 }, { "epoch": 0.5191873589164786, "grad_norm": 1.117146005158035, "learning_rate": 1.769700972389467e-05, "loss": 0.3389117419719696, "step": 1955 }, { "epoch": 0.5194529278980216, "grad_norm": 1.1525168535902064, "learning_rate": 1.7694205897270147e-05, "loss": 0.3225803077220917, "step": 1956 }, { "epoch": 0.5197184968795645, "grad_norm": 1.0237361691251219, "learning_rate": 1.7691400587293467e-05, "loss": 0.3226786255836487, "step": 1957 }, { "epoch": 0.5199840658611075, "grad_norm": 1.0060672564491426, "learning_rate": 1.7688593794505466e-05, "loss": 0.27708399295806885, "step": 1958 }, { "epoch": 0.5202496348426504, "grad_norm": 1.0763214880079806, "learning_rate": 1.768578551944726e-05, "loss": 0.36100950837135315, "step": 1959 }, { "epoch": 0.5205152038241934, "grad_norm": 1.043549985204807, "learning_rate": 1.768297576266025e-05, "loss": 0.3138211965560913, "step": 1960 }, { "epoch": 0.5207807728057363, "grad_norm": 1.0618046264640966, "learning_rate": 1.7680164524686128e-05, "loss": 0.33959656953811646, "step": 1961 }, { "epoch": 0.5210463417872793, "grad_norm": 0.9826913420332539, "learning_rate": 1.7677351806066863e-05, "loss": 0.3093605637550354, "step": 1962 }, { "epoch": 0.5213119107688222, "grad_norm": 1.13307401094871, "learning_rate": 1.7674537607344717e-05, "loss": 0.3098641633987427, "step": 1963 }, { "epoch": 0.5215774797503652, "grad_norm": 1.0810255128706003, "learning_rate": 1.767172192906223e-05, "loss": 0.35172683000564575, "step": 1964 }, { "epoch": 0.5218430487319081, "grad_norm": 1.0729896509671073, "learning_rate": 1.7668904771762242e-05, "loss": 0.3535798192024231, "step": 1965 }, { "epoch": 0.5221086177134511, "grad_norm": 1.2521081937006913, "learning_rate": 1.766608613598785e-05, "loss": 0.36183854937553406, "step": 1966 }, { "epoch": 0.522374186694994, "grad_norm": 1.0735439944400962, "learning_rate": 1.7663266022282473e-05, "loss": 0.35995131731033325, "step": 1967 }, { "epoch": 0.522639755676537, "grad_norm": 1.117054454049305, "learning_rate": 1.766044443118978e-05, "loss": 0.38672733306884766, "step": 1968 }, { "epoch": 0.5229053246580799, "grad_norm": 1.0862044019422723, "learning_rate": 1.765762136325375e-05, "loss": 0.3389524221420288, "step": 1969 }, { "epoch": 0.5231708936396229, "grad_norm": 0.9847521483407152, "learning_rate": 1.7654796819018635e-05, "loss": 0.3325779139995575, "step": 1970 }, { "epoch": 0.5234364626211658, "grad_norm": 1.014607581135561, "learning_rate": 1.7651970799028976e-05, "loss": 0.328407347202301, "step": 1971 }, { "epoch": 0.5237020316027088, "grad_norm": 0.9793310107257689, "learning_rate": 1.764914330382959e-05, "loss": 0.3050537705421448, "step": 1972 }, { "epoch": 0.5239676005842517, "grad_norm": 1.1408686145630131, "learning_rate": 1.7646314333965588e-05, "loss": 0.35500285029411316, "step": 1973 }, { "epoch": 0.5242331695657947, "grad_norm": 1.1035893819341516, "learning_rate": 1.7643483889982364e-05, "loss": 0.30319780111312866, "step": 1974 }, { "epoch": 0.5244987385473376, "grad_norm": 1.0161223434375823, "learning_rate": 1.7640651972425592e-05, "loss": 0.315757691860199, "step": 1975 }, { "epoch": 0.5247643075288806, "grad_norm": 1.0278713767432786, "learning_rate": 1.7637818581841234e-05, "loss": 0.28562331199645996, "step": 1976 }, { "epoch": 0.5250298765104235, "grad_norm": 1.017204404946826, "learning_rate": 1.763498371877553e-05, "loss": 0.29798296093940735, "step": 1977 }, { "epoch": 0.5252954454919665, "grad_norm": 1.1245986087835715, "learning_rate": 1.763214738377501e-05, "loss": 0.2923639416694641, "step": 1978 }, { "epoch": 0.5255610144735094, "grad_norm": 1.0282257211254215, "learning_rate": 1.7629309577386492e-05, "loss": 0.2858009934425354, "step": 1979 }, { "epoch": 0.5258265834550524, "grad_norm": 1.1185725636940211, "learning_rate": 1.7626470300157064e-05, "loss": 0.3615952134132385, "step": 1980 }, { "epoch": 0.5260921524365954, "grad_norm": 1.1357118701340632, "learning_rate": 1.762362955263411e-05, "loss": 0.36142098903656006, "step": 1981 }, { "epoch": 0.5263577214181384, "grad_norm": 1.1305105783283786, "learning_rate": 1.762078733536529e-05, "loss": 0.3335961699485779, "step": 1982 }, { "epoch": 0.5266232903996814, "grad_norm": 1.2367655641806865, "learning_rate": 1.761794364889855e-05, "loss": 0.34549272060394287, "step": 1983 }, { "epoch": 0.5268888593812243, "grad_norm": 1.1166612317693478, "learning_rate": 1.761509849378212e-05, "loss": 0.3177812993526459, "step": 1984 }, { "epoch": 0.5271544283627673, "grad_norm": 1.1485560676920734, "learning_rate": 1.7612251870564515e-05, "loss": 0.33191388845443726, "step": 1985 }, { "epoch": 0.5274199973443102, "grad_norm": 1.0807821541967428, "learning_rate": 1.7609403779794523e-05, "loss": 0.30732038617134094, "step": 1986 }, { "epoch": 0.5276855663258532, "grad_norm": 1.1038043700347457, "learning_rate": 1.7606554222021226e-05, "loss": 0.33012068271636963, "step": 1987 }, { "epoch": 0.5279511353073961, "grad_norm": 1.2233212729045404, "learning_rate": 1.760370319779399e-05, "loss": 0.3396066427230835, "step": 1988 }, { "epoch": 0.5282167042889391, "grad_norm": 1.0755028443639627, "learning_rate": 1.7600850707662454e-05, "loss": 0.29053401947021484, "step": 1989 }, { "epoch": 0.528482273270482, "grad_norm": 1.0859289781343007, "learning_rate": 1.7597996752176545e-05, "loss": 0.32927206158638, "step": 1990 }, { "epoch": 0.528747842252025, "grad_norm": 1.0494460781018915, "learning_rate": 1.759514133188647e-05, "loss": 0.309224933385849, "step": 1991 }, { "epoch": 0.5290134112335679, "grad_norm": 1.0870307368096292, "learning_rate": 1.7592284447342725e-05, "loss": 0.31973862648010254, "step": 1992 }, { "epoch": 0.5292789802151109, "grad_norm": 1.0491029702582455, "learning_rate": 1.758942609909608e-05, "loss": 0.3331080377101898, "step": 1993 }, { "epoch": 0.5295445491966538, "grad_norm": 1.0710245753206995, "learning_rate": 1.7586566287697592e-05, "loss": 0.32755160331726074, "step": 1994 }, { "epoch": 0.5298101181781968, "grad_norm": 1.0377451052992368, "learning_rate": 1.7583705013698602e-05, "loss": 0.31942498683929443, "step": 1995 }, { "epoch": 0.5300756871597397, "grad_norm": 1.1665695354682926, "learning_rate": 1.7580842277650723e-05, "loss": 0.3199199438095093, "step": 1996 }, { "epoch": 0.5303412561412827, "grad_norm": 0.9680761404148592, "learning_rate": 1.7577978080105864e-05, "loss": 0.28153708577156067, "step": 1997 }, { "epoch": 0.5306068251228256, "grad_norm": 1.0336529884327843, "learning_rate": 1.7575112421616203e-05, "loss": 0.3050921559333801, "step": 1998 }, { "epoch": 0.5308723941043686, "grad_norm": 1.0836881519572394, "learning_rate": 1.7572245302734208e-05, "loss": 0.3242149353027344, "step": 1999 }, { "epoch": 0.5311379630859115, "grad_norm": 0.9889139549595165, "learning_rate": 1.7569376724012622e-05, "loss": 0.29947227239608765, "step": 2000 }, { "epoch": 0.5314035320674545, "grad_norm": 1.132976441688301, "learning_rate": 1.756650668600448e-05, "loss": 0.3229755163192749, "step": 2001 }, { "epoch": 0.5316691010489975, "grad_norm": 1.0802391073518836, "learning_rate": 1.7563635189263086e-05, "loss": 0.3544544577598572, "step": 2002 }, { "epoch": 0.5319346700305404, "grad_norm": 1.0996284853033707, "learning_rate": 1.756076223434203e-05, "loss": 0.32807621359825134, "step": 2003 }, { "epoch": 0.5322002390120834, "grad_norm": 0.9920629294688551, "learning_rate": 1.7557887821795192e-05, "loss": 0.3057190477848053, "step": 2004 }, { "epoch": 0.5324658079936263, "grad_norm": 1.0234244423063892, "learning_rate": 1.7555011952176716e-05, "loss": 0.29419198632240295, "step": 2005 }, { "epoch": 0.5327313769751693, "grad_norm": 0.9799120327217228, "learning_rate": 1.755213462604104e-05, "loss": 0.3232089877128601, "step": 2006 }, { "epoch": 0.5329969459567122, "grad_norm": 1.0186576745896931, "learning_rate": 1.7549255843942875e-05, "loss": 0.29784274101257324, "step": 2007 }, { "epoch": 0.5332625149382552, "grad_norm": 1.0470325382276877, "learning_rate": 1.7546375606437216e-05, "loss": 0.31421899795532227, "step": 2008 }, { "epoch": 0.5335280839197981, "grad_norm": 1.0641694414781755, "learning_rate": 1.7543493914079345e-05, "loss": 0.30681121349334717, "step": 2009 }, { "epoch": 0.5337936529013412, "grad_norm": 1.0092085906510277, "learning_rate": 1.7540610767424813e-05, "loss": 0.3114027976989746, "step": 2010 }, { "epoch": 0.5340592218828841, "grad_norm": 1.0064230726553411, "learning_rate": 1.753772616702946e-05, "loss": 0.3030378520488739, "step": 2011 }, { "epoch": 0.5343247908644271, "grad_norm": 1.1096181297712675, "learning_rate": 1.75348401134494e-05, "loss": 0.30272024869918823, "step": 2012 }, { "epoch": 0.53459035984597, "grad_norm": 1.049795668852804, "learning_rate": 1.7531952607241033e-05, "loss": 0.35117241740226746, "step": 2013 }, { "epoch": 0.534855928827513, "grad_norm": 1.2552056089457548, "learning_rate": 1.7529063648961035e-05, "loss": 0.297889769077301, "step": 2014 }, { "epoch": 0.5351214978090559, "grad_norm": 1.1238332501182418, "learning_rate": 1.752617323916636e-05, "loss": 0.32858210802078247, "step": 2015 }, { "epoch": 0.5353870667905989, "grad_norm": 1.117582559290418, "learning_rate": 1.7523281378414246e-05, "loss": 0.3095484673976898, "step": 2016 }, { "epoch": 0.5356526357721418, "grad_norm": 1.1072331793921826, "learning_rate": 1.752038806726222e-05, "loss": 0.34490731358528137, "step": 2017 }, { "epoch": 0.5359182047536848, "grad_norm": 1.1427367564985542, "learning_rate": 1.751749330626806e-05, "loss": 0.35144859552383423, "step": 2018 }, { "epoch": 0.5361837737352277, "grad_norm": 1.0337528414474293, "learning_rate": 1.751459709598985e-05, "loss": 0.26337549090385437, "step": 2019 }, { "epoch": 0.5364493427167707, "grad_norm": 1.0719958558069054, "learning_rate": 1.7511699436985952e-05, "loss": 0.3235297203063965, "step": 2020 }, { "epoch": 0.5367149116983136, "grad_norm": 1.1655117185465573, "learning_rate": 1.7508800329814993e-05, "loss": 0.35195302963256836, "step": 2021 }, { "epoch": 0.5369804806798566, "grad_norm": 1.0547432431007058, "learning_rate": 1.7505899775035887e-05, "loss": 0.3226467967033386, "step": 2022 }, { "epoch": 0.5372460496613995, "grad_norm": 1.0406958245289468, "learning_rate": 1.750299777320783e-05, "loss": 0.30616605281829834, "step": 2023 }, { "epoch": 0.5375116186429425, "grad_norm": 1.074902411593199, "learning_rate": 1.7500094324890294e-05, "loss": 0.3007400333881378, "step": 2024 }, { "epoch": 0.5377771876244855, "grad_norm": 1.1883491645763606, "learning_rate": 1.7497189430643025e-05, "loss": 0.35409432649612427, "step": 2025 }, { "epoch": 0.5380427566060284, "grad_norm": 1.6951314154408594, "learning_rate": 1.7494283091026053e-05, "loss": 0.33718281984329224, "step": 2026 }, { "epoch": 0.5383083255875714, "grad_norm": 1.0940933435725269, "learning_rate": 1.749137530659969e-05, "loss": 0.3589650094509125, "step": 2027 }, { "epoch": 0.5385738945691143, "grad_norm": 1.1114345705753812, "learning_rate": 1.7488466077924525e-05, "loss": 0.35314273834228516, "step": 2028 }, { "epoch": 0.5388394635506573, "grad_norm": 1.017869922891923, "learning_rate": 1.7485555405561412e-05, "loss": 0.28393587470054626, "step": 2029 }, { "epoch": 0.5391050325322002, "grad_norm": 1.0276825009259218, "learning_rate": 1.7482643290071503e-05, "loss": 0.3262496292591095, "step": 2030 }, { "epoch": 0.5393706015137432, "grad_norm": 1.122887144479208, "learning_rate": 1.7479729732016218e-05, "loss": 0.3549670875072479, "step": 2031 }, { "epoch": 0.5396361704952861, "grad_norm": 1.0211791251004596, "learning_rate": 1.7476814731957253e-05, "loss": 0.30668947100639343, "step": 2032 }, { "epoch": 0.5399017394768291, "grad_norm": 0.9278865240006526, "learning_rate": 1.747389829045659e-05, "loss": 0.2942228317260742, "step": 2033 }, { "epoch": 0.540167308458372, "grad_norm": 1.023956047651912, "learning_rate": 1.7470980408076484e-05, "loss": 0.3166583478450775, "step": 2034 }, { "epoch": 0.540432877439915, "grad_norm": 1.1503051826481139, "learning_rate": 1.7468061085379467e-05, "loss": 0.35149675607681274, "step": 2035 }, { "epoch": 0.5406984464214579, "grad_norm": 1.1081467050264138, "learning_rate": 1.7465140322928353e-05, "loss": 0.32645004987716675, "step": 2036 }, { "epoch": 0.5409640154030009, "grad_norm": 1.1656339653416823, "learning_rate": 1.7462218121286224e-05, "loss": 0.3078027367591858, "step": 2037 }, { "epoch": 0.5412295843845439, "grad_norm": 1.0310810248927436, "learning_rate": 1.7459294481016452e-05, "loss": 0.28726300597190857, "step": 2038 }, { "epoch": 0.5414951533660869, "grad_norm": 1.028103971871598, "learning_rate": 1.7456369402682675e-05, "loss": 0.29330572485923767, "step": 2039 }, { "epoch": 0.5417607223476298, "grad_norm": 1.176742297493161, "learning_rate": 1.7453442886848818e-05, "loss": 0.3151019215583801, "step": 2040 }, { "epoch": 0.5420262913291728, "grad_norm": 1.0830810759861134, "learning_rate": 1.745051493407908e-05, "loss": 0.3267561197280884, "step": 2041 }, { "epoch": 0.5422918603107157, "grad_norm": 1.0462822233377385, "learning_rate": 1.7447585544937933e-05, "loss": 0.2834410071372986, "step": 2042 }, { "epoch": 0.5425574292922587, "grad_norm": 0.9922210453154783, "learning_rate": 1.7444654719990128e-05, "loss": 0.29896080493927, "step": 2043 }, { "epoch": 0.5428229982738016, "grad_norm": 1.0716195406510356, "learning_rate": 1.7441722459800695e-05, "loss": 0.3084600865840912, "step": 2044 }, { "epoch": 0.5430885672553446, "grad_norm": 1.100381998832612, "learning_rate": 1.743878876493494e-05, "loss": 0.3178163170814514, "step": 2045 }, { "epoch": 0.5433541362368876, "grad_norm": 1.1512124937535644, "learning_rate": 1.743585363595844e-05, "loss": 0.32886385917663574, "step": 2046 }, { "epoch": 0.5436197052184305, "grad_norm": 1.0499932799675828, "learning_rate": 1.743291707343706e-05, "loss": 0.31810784339904785, "step": 2047 }, { "epoch": 0.5438852741999735, "grad_norm": 0.994229574171737, "learning_rate": 1.7429979077936928e-05, "loss": 0.3003198504447937, "step": 2048 }, { "epoch": 0.5441508431815164, "grad_norm": 1.1622503660754158, "learning_rate": 1.7427039650024462e-05, "loss": 0.33889323472976685, "step": 2049 }, { "epoch": 0.5444164121630594, "grad_norm": 1.062972427778211, "learning_rate": 1.7424098790266343e-05, "loss": 0.3238763213157654, "step": 2050 }, { "epoch": 0.5446819811446023, "grad_norm": 1.3651581380225686, "learning_rate": 1.742115649922954e-05, "loss": 0.34304776787757874, "step": 2051 }, { "epoch": 0.5449475501261453, "grad_norm": 1.1192647204238841, "learning_rate": 1.741821277748128e-05, "loss": 0.31528347730636597, "step": 2052 }, { "epoch": 0.5452131191076882, "grad_norm": 1.0728286121769783, "learning_rate": 1.7415267625589094e-05, "loss": 0.2992726266384125, "step": 2053 }, { "epoch": 0.5454786880892312, "grad_norm": 1.0217638219637288, "learning_rate": 1.741232104412076e-05, "loss": 0.31706419587135315, "step": 2054 }, { "epoch": 0.5457442570707741, "grad_norm": 1.8373163603702176, "learning_rate": 1.7409373033644355e-05, "loss": 0.2887676954269409, "step": 2055 }, { "epoch": 0.5460098260523171, "grad_norm": 1.1434290988558236, "learning_rate": 1.740642359472821e-05, "loss": 0.3410964906215668, "step": 2056 }, { "epoch": 0.54627539503386, "grad_norm": 1.0501323660770627, "learning_rate": 1.740347272794095e-05, "loss": 0.3711693286895752, "step": 2057 }, { "epoch": 0.546540964015403, "grad_norm": 1.10922453334831, "learning_rate": 1.7400520433851457e-05, "loss": 0.3512499928474426, "step": 2058 }, { "epoch": 0.5468065329969459, "grad_norm": 1.0790222544341648, "learning_rate": 1.739756671302891e-05, "loss": 0.3136678636074066, "step": 2059 }, { "epoch": 0.5470721019784889, "grad_norm": 1.0417668658369865, "learning_rate": 1.7394611566042748e-05, "loss": 0.2983730435371399, "step": 2060 }, { "epoch": 0.5473376709600318, "grad_norm": 1.1233530419836393, "learning_rate": 1.7391654993462686e-05, "loss": 0.36603933572769165, "step": 2061 }, { "epoch": 0.5476032399415748, "grad_norm": 1.1758952832381078, "learning_rate": 1.7388696995858717e-05, "loss": 0.3651789128780365, "step": 2062 }, { "epoch": 0.5478688089231177, "grad_norm": 1.2065493864331982, "learning_rate": 1.7385737573801108e-05, "loss": 0.30580615997314453, "step": 2063 }, { "epoch": 0.5481343779046607, "grad_norm": 0.981372496476623, "learning_rate": 1.7382776727860406e-05, "loss": 0.2630755305290222, "step": 2064 }, { "epoch": 0.5483999468862036, "grad_norm": 1.0020540486713174, "learning_rate": 1.7379814458607416e-05, "loss": 0.2947537899017334, "step": 2065 }, { "epoch": 0.5486655158677467, "grad_norm": 1.034048631807644, "learning_rate": 1.737685076661324e-05, "loss": 0.3119455873966217, "step": 2066 }, { "epoch": 0.5489310848492897, "grad_norm": 1.052273536899897, "learning_rate": 1.7373885652449237e-05, "loss": 0.3162347972393036, "step": 2067 }, { "epoch": 0.5491966538308326, "grad_norm": 1.2320011234530202, "learning_rate": 1.7370919116687047e-05, "loss": 0.34120452404022217, "step": 2068 }, { "epoch": 0.5494622228123756, "grad_norm": 1.095244169583748, "learning_rate": 1.7367951159898583e-05, "loss": 0.3126780092716217, "step": 2069 }, { "epoch": 0.5497277917939185, "grad_norm": 0.9591128480333501, "learning_rate": 1.7364981782656033e-05, "loss": 0.2833349406719208, "step": 2070 }, { "epoch": 0.5499933607754615, "grad_norm": 1.0921809927618633, "learning_rate": 1.7362010985531855e-05, "loss": 0.31617453694343567, "step": 2071 }, { "epoch": 0.5502589297570044, "grad_norm": 1.0809700153666713, "learning_rate": 1.735903876909879e-05, "loss": 0.31372442841529846, "step": 2072 }, { "epoch": 0.5505244987385474, "grad_norm": 1.1616077591637106, "learning_rate": 1.735606513392984e-05, "loss": 0.3500489592552185, "step": 2073 }, { "epoch": 0.5507900677200903, "grad_norm": 1.0373404262028456, "learning_rate": 1.735309008059829e-05, "loss": 0.3219031095504761, "step": 2074 }, { "epoch": 0.5510556367016333, "grad_norm": 1.0701365395287485, "learning_rate": 1.7350113609677694e-05, "loss": 0.32419610023498535, "step": 2075 }, { "epoch": 0.5513212056831762, "grad_norm": 1.1054492395059694, "learning_rate": 1.7347135721741874e-05, "loss": 0.34804612398147583, "step": 2076 }, { "epoch": 0.5515867746647192, "grad_norm": 1.09814942010155, "learning_rate": 1.7344156417364946e-05, "loss": 0.33105939626693726, "step": 2077 }, { "epoch": 0.5518523436462621, "grad_norm": 1.0139790776190714, "learning_rate": 1.7341175697121273e-05, "loss": 0.3426011800765991, "step": 2078 }, { "epoch": 0.5521179126278051, "grad_norm": 1.1120942872149455, "learning_rate": 1.7338193561585507e-05, "loss": 0.33207643032073975, "step": 2079 }, { "epoch": 0.552383481609348, "grad_norm": 0.9807946500665143, "learning_rate": 1.7335210011332573e-05, "loss": 0.31849467754364014, "step": 2080 }, { "epoch": 0.552649050590891, "grad_norm": 1.081622565959563, "learning_rate": 1.7332225046937655e-05, "loss": 0.3549337685108185, "step": 2081 }, { "epoch": 0.5529146195724339, "grad_norm": 0.9652343930669623, "learning_rate": 1.7329238668976224e-05, "loss": 0.2850857377052307, "step": 2082 }, { "epoch": 0.5531801885539769, "grad_norm": 1.1370461672740964, "learning_rate": 1.732625087802402e-05, "loss": 0.3277609348297119, "step": 2083 }, { "epoch": 0.5534457575355198, "grad_norm": 1.0712095451099939, "learning_rate": 1.732326167465705e-05, "loss": 0.2951444983482361, "step": 2084 }, { "epoch": 0.5537113265170628, "grad_norm": 1.0893938459197319, "learning_rate": 1.7320271059451597e-05, "loss": 0.36634138226509094, "step": 2085 }, { "epoch": 0.5539768954986057, "grad_norm": 1.060256238160636, "learning_rate": 1.7317279032984222e-05, "loss": 0.3407907783985138, "step": 2086 }, { "epoch": 0.5542424644801487, "grad_norm": 1.0563310141876696, "learning_rate": 1.7314285595831747e-05, "loss": 0.34038978815078735, "step": 2087 }, { "epoch": 0.5545080334616916, "grad_norm": 1.0558109709205228, "learning_rate": 1.7311290748571273e-05, "loss": 0.337898313999176, "step": 2088 }, { "epoch": 0.5547736024432346, "grad_norm": 1.1543867929059073, "learning_rate": 1.7308294491780175e-05, "loss": 0.3250765800476074, "step": 2089 }, { "epoch": 0.5550391714247775, "grad_norm": 1.101568217376945, "learning_rate": 1.730529682603609e-05, "loss": 0.31562721729278564, "step": 2090 }, { "epoch": 0.5553047404063205, "grad_norm": 1.2678079753749867, "learning_rate": 1.730229775191693e-05, "loss": 0.32757896184921265, "step": 2091 }, { "epoch": 0.5555703093878634, "grad_norm": 1.1010819086774664, "learning_rate": 1.7299297270000894e-05, "loss": 0.35861605405807495, "step": 2092 }, { "epoch": 0.5558358783694064, "grad_norm": 1.0999873688088635, "learning_rate": 1.7296295380866425e-05, "loss": 0.3383220434188843, "step": 2093 }, { "epoch": 0.5561014473509495, "grad_norm": 1.1431134206724336, "learning_rate": 1.7293292085092263e-05, "loss": 0.30144187808036804, "step": 2094 }, { "epoch": 0.5563670163324924, "grad_norm": 1.0354659821546437, "learning_rate": 1.72902873832574e-05, "loss": 0.2626546323299408, "step": 2095 }, { "epoch": 0.5566325853140354, "grad_norm": 1.0939710377386638, "learning_rate": 1.7287281275941112e-05, "loss": 0.3289363980293274, "step": 2096 }, { "epoch": 0.5568981542955783, "grad_norm": 0.9797533003070389, "learning_rate": 1.7284273763722943e-05, "loss": 0.26631784439086914, "step": 2097 }, { "epoch": 0.5571637232771213, "grad_norm": 1.0035421194069876, "learning_rate": 1.7281264847182697e-05, "loss": 0.3051939606666565, "step": 2098 }, { "epoch": 0.5574292922586642, "grad_norm": 1.0515034870910809, "learning_rate": 1.7278254526900468e-05, "loss": 0.34456121921539307, "step": 2099 }, { "epoch": 0.5576948612402072, "grad_norm": 1.2038994359149542, "learning_rate": 1.72752428034566e-05, "loss": 0.2747807502746582, "step": 2100 }, { "epoch": 0.5579604302217501, "grad_norm": 2.186270123050143, "learning_rate": 1.7272229677431723e-05, "loss": 0.31111812591552734, "step": 2101 }, { "epoch": 0.5582259992032931, "grad_norm": 1.0150701360001215, "learning_rate": 1.7269215149406737e-05, "loss": 0.29648226499557495, "step": 2102 }, { "epoch": 0.558491568184836, "grad_norm": 0.9846402594569152, "learning_rate": 1.72661992199628e-05, "loss": 0.28303876519203186, "step": 2103 }, { "epoch": 0.558757137166379, "grad_norm": 1.1069492435421613, "learning_rate": 1.726318188968135e-05, "loss": 0.30540165305137634, "step": 2104 }, { "epoch": 0.5590227061479219, "grad_norm": 1.2177152582591586, "learning_rate": 1.726016315914409e-05, "loss": 0.31810393929481506, "step": 2105 }, { "epoch": 0.5592882751294649, "grad_norm": 1.134577587954556, "learning_rate": 1.7257143028933004e-05, "loss": 0.33605068922042847, "step": 2106 }, { "epoch": 0.5595538441110078, "grad_norm": 1.089019585879268, "learning_rate": 1.725412149963033e-05, "loss": 0.3340590298175812, "step": 2107 }, { "epoch": 0.5598194130925508, "grad_norm": 0.9872121137775324, "learning_rate": 1.7251098571818586e-05, "loss": 0.29560500383377075, "step": 2108 }, { "epoch": 0.5600849820740937, "grad_norm": 1.0964006197085026, "learning_rate": 1.7248074246080555e-05, "loss": 0.30100107192993164, "step": 2109 }, { "epoch": 0.5603505510556367, "grad_norm": 1.1506338140671328, "learning_rate": 1.7245048522999294e-05, "loss": 0.35551172494888306, "step": 2110 }, { "epoch": 0.5606161200371796, "grad_norm": 1.0513397818607815, "learning_rate": 1.724202140315812e-05, "loss": 0.3182663023471832, "step": 2111 }, { "epoch": 0.5608816890187226, "grad_norm": 1.092960095111009, "learning_rate": 1.723899288714064e-05, "loss": 0.3160201609134674, "step": 2112 }, { "epoch": 0.5611472580002655, "grad_norm": 1.0656744789709975, "learning_rate": 1.72359629755307e-05, "loss": 0.3126063942909241, "step": 2113 }, { "epoch": 0.5614128269818085, "grad_norm": 1.0376603045942787, "learning_rate": 1.723293166891244e-05, "loss": 0.3222552239894867, "step": 2114 }, { "epoch": 0.5616783959633515, "grad_norm": 1.1154320347150413, "learning_rate": 1.722989896787026e-05, "loss": 0.33601805567741394, "step": 2115 }, { "epoch": 0.5619439649448944, "grad_norm": 1.0241046952841495, "learning_rate": 1.722686487298883e-05, "loss": 0.28679755330085754, "step": 2116 }, { "epoch": 0.5622095339264374, "grad_norm": 0.9498185678215705, "learning_rate": 1.722382938485308e-05, "loss": 0.2895340323448181, "step": 2117 }, { "epoch": 0.5624751029079803, "grad_norm": 1.3753225282493697, "learning_rate": 1.7220792504048227e-05, "loss": 0.310183048248291, "step": 2118 }, { "epoch": 0.5627406718895233, "grad_norm": 0.9776305745351022, "learning_rate": 1.7217754231159737e-05, "loss": 0.2768586277961731, "step": 2119 }, { "epoch": 0.5630062408710662, "grad_norm": 0.9838874956474448, "learning_rate": 1.7214714566773358e-05, "loss": 0.2785574793815613, "step": 2120 }, { "epoch": 0.5632718098526092, "grad_norm": 1.1815363465765012, "learning_rate": 1.72116735114751e-05, "loss": 0.30544358491897583, "step": 2121 }, { "epoch": 0.5635373788341522, "grad_norm": 1.0704755380783626, "learning_rate": 1.7208631065851243e-05, "loss": 0.31662559509277344, "step": 2122 }, { "epoch": 0.5638029478156952, "grad_norm": 0.9893085866675072, "learning_rate": 1.7205587230488335e-05, "loss": 0.31466105580329895, "step": 2123 }, { "epoch": 0.5640685167972381, "grad_norm": 1.1520731756820097, "learning_rate": 1.720254200597319e-05, "loss": 0.3471367359161377, "step": 2124 }, { "epoch": 0.5643340857787811, "grad_norm": 1.056530578075146, "learning_rate": 1.7199495392892892e-05, "loss": 0.3325269818305969, "step": 2125 }, { "epoch": 0.564599654760324, "grad_norm": 1.1040662937900534, "learning_rate": 1.7196447391834797e-05, "loss": 0.32423460483551025, "step": 2126 }, { "epoch": 0.564865223741867, "grad_norm": 1.0403895710374138, "learning_rate": 1.7193398003386514e-05, "loss": 0.3083527088165283, "step": 2127 }, { "epoch": 0.5651307927234099, "grad_norm": 1.1794029606730059, "learning_rate": 1.7190347228135933e-05, "loss": 0.3418716490268707, "step": 2128 }, { "epoch": 0.5653963617049529, "grad_norm": 1.0509473075306943, "learning_rate": 1.7187295066671214e-05, "loss": 0.33037957549095154, "step": 2129 }, { "epoch": 0.5656619306864958, "grad_norm": 1.229094630243538, "learning_rate": 1.7184241519580767e-05, "loss": 0.3383673131465912, "step": 2130 }, { "epoch": 0.5659274996680388, "grad_norm": 0.9364933789266218, "learning_rate": 1.718118658745329e-05, "loss": 0.27756133675575256, "step": 2131 }, { "epoch": 0.5661930686495817, "grad_norm": 1.1307081535546069, "learning_rate": 1.717813027087773e-05, "loss": 0.2987852692604065, "step": 2132 }, { "epoch": 0.5664586376311247, "grad_norm": 1.0924971268375117, "learning_rate": 1.717507257044331e-05, "loss": 0.30016621947288513, "step": 2133 }, { "epoch": 0.5667242066126676, "grad_norm": 1.0923612277165435, "learning_rate": 1.7172013486739528e-05, "loss": 0.31592345237731934, "step": 2134 }, { "epoch": 0.5669897755942106, "grad_norm": 1.0932899901018698, "learning_rate": 1.716895302035613e-05, "loss": 0.3500048816204071, "step": 2135 }, { "epoch": 0.5672553445757536, "grad_norm": 1.0529476139624208, "learning_rate": 1.7165891171883134e-05, "loss": 0.32069307565689087, "step": 2136 }, { "epoch": 0.5675209135572965, "grad_norm": 1.10329279559138, "learning_rate": 1.7162827941910837e-05, "loss": 0.3100130558013916, "step": 2137 }, { "epoch": 0.5677864825388395, "grad_norm": 1.080836142172887, "learning_rate": 1.715976333102979e-05, "loss": 0.3205985128879547, "step": 2138 }, { "epoch": 0.5680520515203824, "grad_norm": 1.0861679281182697, "learning_rate": 1.715669733983081e-05, "loss": 0.3243224024772644, "step": 2139 }, { "epoch": 0.5683176205019254, "grad_norm": 1.0818895017967487, "learning_rate": 1.7153629968904997e-05, "loss": 0.3278832733631134, "step": 2140 }, { "epoch": 0.5685831894834683, "grad_norm": 0.9949896264020713, "learning_rate": 1.7150561218843693e-05, "loss": 0.29137033224105835, "step": 2141 }, { "epoch": 0.5688487584650113, "grad_norm": 1.0470808838345107, "learning_rate": 1.7147491090238516e-05, "loss": 0.3065168857574463, "step": 2142 }, { "epoch": 0.5691143274465542, "grad_norm": 1.0368441449557109, "learning_rate": 1.7144419583681354e-05, "loss": 0.3367912173271179, "step": 2143 }, { "epoch": 0.5693798964280972, "grad_norm": 1.086220090850542, "learning_rate": 1.7141346699764357e-05, "loss": 0.32278239727020264, "step": 2144 }, { "epoch": 0.5696454654096401, "grad_norm": 1.080765529331453, "learning_rate": 1.713827243907994e-05, "loss": 0.2887166440486908, "step": 2145 }, { "epoch": 0.5699110343911831, "grad_norm": 1.1353258061614586, "learning_rate": 1.713519680222079e-05, "loss": 0.33214619755744934, "step": 2146 }, { "epoch": 0.570176603372726, "grad_norm": 1.1145274058321384, "learning_rate": 1.7132119789779846e-05, "loss": 0.2865470051765442, "step": 2147 }, { "epoch": 0.570442172354269, "grad_norm": 1.1145678631141913, "learning_rate": 1.7129041402350317e-05, "loss": 0.32746967673301697, "step": 2148 }, { "epoch": 0.5707077413358119, "grad_norm": 1.0454330804264187, "learning_rate": 1.712596164052569e-05, "loss": 0.3029513359069824, "step": 2149 }, { "epoch": 0.570973310317355, "grad_norm": 0.9779058393705973, "learning_rate": 1.7122880504899698e-05, "loss": 0.3052698075771332, "step": 2150 }, { "epoch": 0.5712388792988979, "grad_norm": 1.055591157713499, "learning_rate": 1.7119797996066355e-05, "loss": 0.29221272468566895, "step": 2151 }, { "epoch": 0.5715044482804409, "grad_norm": 1.0014263274293047, "learning_rate": 1.711671411461993e-05, "loss": 0.3165368139743805, "step": 2152 }, { "epoch": 0.5717700172619838, "grad_norm": 1.0763149059705845, "learning_rate": 1.7113628861154953e-05, "loss": 0.30877187848091125, "step": 2153 }, { "epoch": 0.5720355862435268, "grad_norm": 1.0826550246568385, "learning_rate": 1.711054223626623e-05, "loss": 0.2985781729221344, "step": 2154 }, { "epoch": 0.5723011552250697, "grad_norm": 1.1063225967671673, "learning_rate": 1.7107454240548825e-05, "loss": 0.3449699878692627, "step": 2155 }, { "epoch": 0.5725667242066127, "grad_norm": 1.0430022801820942, "learning_rate": 1.7104364874598066e-05, "loss": 0.3219606578350067, "step": 2156 }, { "epoch": 0.5728322931881557, "grad_norm": 1.0017795464639185, "learning_rate": 1.710127413900955e-05, "loss": 0.3059350550174713, "step": 2157 }, { "epoch": 0.5730978621696986, "grad_norm": 1.0027463566346577, "learning_rate": 1.7098182034379132e-05, "loss": 0.29461371898651123, "step": 2158 }, { "epoch": 0.5733634311512416, "grad_norm": 1.0159484116581767, "learning_rate": 1.709508856130293e-05, "loss": 0.2998795509338379, "step": 2159 }, { "epoch": 0.5736290001327845, "grad_norm": 1.0092216110834475, "learning_rate": 1.7091993720377336e-05, "loss": 0.28214582800865173, "step": 2160 }, { "epoch": 0.5738945691143275, "grad_norm": 1.2106483053766084, "learning_rate": 1.708889751219899e-05, "loss": 0.3036864697933197, "step": 2161 }, { "epoch": 0.5741601380958704, "grad_norm": 1.1139097359759478, "learning_rate": 1.7085799937364815e-05, "loss": 0.34146320819854736, "step": 2162 }, { "epoch": 0.5744257070774134, "grad_norm": 1.0631963944232283, "learning_rate": 1.708270099647198e-05, "loss": 0.33996909856796265, "step": 2163 }, { "epoch": 0.5746912760589563, "grad_norm": 1.0779467399705778, "learning_rate": 1.7079600690117924e-05, "loss": 0.3308744728565216, "step": 2164 }, { "epoch": 0.5749568450404993, "grad_norm": 1.0447240453690412, "learning_rate": 1.707649901890035e-05, "loss": 0.2945587933063507, "step": 2165 }, { "epoch": 0.5752224140220422, "grad_norm": 1.0321317558144223, "learning_rate": 1.7073395983417227e-05, "loss": 0.30348697304725647, "step": 2166 }, { "epoch": 0.5754879830035852, "grad_norm": 1.025806147580304, "learning_rate": 1.707029158426678e-05, "loss": 0.28789055347442627, "step": 2167 }, { "epoch": 0.5757535519851281, "grad_norm": 1.168965754707192, "learning_rate": 1.7067185822047502e-05, "loss": 0.3026643693447113, "step": 2168 }, { "epoch": 0.5760191209666711, "grad_norm": 1.1108861255752682, "learning_rate": 1.7064078697358147e-05, "loss": 0.34021061658859253, "step": 2169 }, { "epoch": 0.576284689948214, "grad_norm": 1.1062563353075296, "learning_rate": 1.7060970210797735e-05, "loss": 0.32793867588043213, "step": 2170 }, { "epoch": 0.576550258929757, "grad_norm": 1.1692826638365306, "learning_rate": 1.705786036296554e-05, "loss": 0.36144691705703735, "step": 2171 }, { "epoch": 0.5768158279112999, "grad_norm": 1.1177501875227254, "learning_rate": 1.7054749154461105e-05, "loss": 0.3630291223526001, "step": 2172 }, { "epoch": 0.5770813968928429, "grad_norm": 1.144365708172633, "learning_rate": 1.705163658588424e-05, "loss": 0.34964969754219055, "step": 2173 }, { "epoch": 0.5773469658743858, "grad_norm": 1.0298961015626151, "learning_rate": 1.7048522657835004e-05, "loss": 0.2877815067768097, "step": 2174 }, { "epoch": 0.5776125348559288, "grad_norm": 1.1148926749607628, "learning_rate": 1.7045407370913732e-05, "loss": 0.3185664713382721, "step": 2175 }, { "epoch": 0.5778781038374717, "grad_norm": 1.0393243287048395, "learning_rate": 1.704229072572101e-05, "loss": 0.3035257160663605, "step": 2176 }, { "epoch": 0.5781436728190147, "grad_norm": 1.048139429574759, "learning_rate": 1.7039172722857695e-05, "loss": 0.325702965259552, "step": 2177 }, { "epoch": 0.5784092418005577, "grad_norm": 1.1046410504333486, "learning_rate": 1.7036053362924896e-05, "loss": 0.32837462425231934, "step": 2178 }, { "epoch": 0.5786748107821007, "grad_norm": 1.066094854816524, "learning_rate": 1.703293264652399e-05, "loss": 0.3430028259754181, "step": 2179 }, { "epoch": 0.5789403797636437, "grad_norm": 1.1007701198247044, "learning_rate": 1.702981057425662e-05, "loss": 0.32792964577674866, "step": 2180 }, { "epoch": 0.5792059487451866, "grad_norm": 0.9964902607677808, "learning_rate": 1.7026687146724675e-05, "loss": 0.3037140965461731, "step": 2181 }, { "epoch": 0.5794715177267296, "grad_norm": 0.9962684392556416, "learning_rate": 1.7023562364530322e-05, "loss": 0.33083540201187134, "step": 2182 }, { "epoch": 0.5797370867082725, "grad_norm": 0.9979777099745417, "learning_rate": 1.702043622827598e-05, "loss": 0.3108663260936737, "step": 2183 }, { "epoch": 0.5800026556898155, "grad_norm": 0.9618495492417584, "learning_rate": 1.7017308738564336e-05, "loss": 0.2939792573451996, "step": 2184 }, { "epoch": 0.5802682246713584, "grad_norm": 1.1315656989934186, "learning_rate": 1.7014179895998322e-05, "loss": 0.3686106503009796, "step": 2185 }, { "epoch": 0.5805337936529014, "grad_norm": 1.0524191997810952, "learning_rate": 1.7011049701181152e-05, "loss": 0.3497159779071808, "step": 2186 }, { "epoch": 0.5807993626344443, "grad_norm": 1.0989364128809138, "learning_rate": 1.7007918154716286e-05, "loss": 0.31730401515960693, "step": 2187 }, { "epoch": 0.5810649316159873, "grad_norm": 1.0000330799865447, "learning_rate": 1.7004785257207456e-05, "loss": 0.3064701557159424, "step": 2188 }, { "epoch": 0.5813305005975302, "grad_norm": 1.1111458283716926, "learning_rate": 1.7001651009258635e-05, "loss": 0.37174129486083984, "step": 2189 }, { "epoch": 0.5815960695790732, "grad_norm": 1.068050904458805, "learning_rate": 1.699851541147408e-05, "loss": 0.3548140823841095, "step": 2190 }, { "epoch": 0.5818616385606161, "grad_norm": 1.2340650081251097, "learning_rate": 1.6995378464458292e-05, "loss": 0.3486049473285675, "step": 2191 }, { "epoch": 0.5821272075421591, "grad_norm": 1.996025853729682, "learning_rate": 1.6992240168816037e-05, "loss": 0.3083210587501526, "step": 2192 }, { "epoch": 0.582392776523702, "grad_norm": 1.0284637251594817, "learning_rate": 1.6989100525152346e-05, "loss": 0.3006829619407654, "step": 2193 }, { "epoch": 0.582658345505245, "grad_norm": 1.103386023825705, "learning_rate": 1.6985959534072502e-05, "loss": 0.32856425642967224, "step": 2194 }, { "epoch": 0.5829239144867879, "grad_norm": 1.1293873964177752, "learning_rate": 1.6982817196182052e-05, "loss": 0.3382526934146881, "step": 2195 }, { "epoch": 0.5831894834683309, "grad_norm": 1.0326113865244562, "learning_rate": 1.69796735120868e-05, "loss": 0.3311583399772644, "step": 2196 }, { "epoch": 0.5834550524498738, "grad_norm": 1.0267321140886136, "learning_rate": 1.6976528482392815e-05, "loss": 0.312778115272522, "step": 2197 }, { "epoch": 0.5837206214314168, "grad_norm": 1.0148067463802801, "learning_rate": 1.697338210770642e-05, "loss": 0.2996736466884613, "step": 2198 }, { "epoch": 0.5839861904129597, "grad_norm": 1.1885772355333009, "learning_rate": 1.6970234388634192e-05, "loss": 0.344571590423584, "step": 2199 }, { "epoch": 0.5842517593945027, "grad_norm": 0.9183671512098872, "learning_rate": 1.6967085325782984e-05, "loss": 0.25299468636512756, "step": 2200 }, { "epoch": 0.5845173283760456, "grad_norm": 1.042142544774348, "learning_rate": 1.6963934919759896e-05, "loss": 0.3080691695213318, "step": 2201 }, { "epoch": 0.5847828973575886, "grad_norm": 1.0216299822000434, "learning_rate": 1.6960783171172286e-05, "loss": 0.27491697669029236, "step": 2202 }, { "epoch": 0.5850484663391315, "grad_norm": 1.1629234714983534, "learning_rate": 1.6957630080627772e-05, "loss": 0.3422500193119049, "step": 2203 }, { "epoch": 0.5853140353206745, "grad_norm": 1.0832524871656921, "learning_rate": 1.695447564873424e-05, "loss": 0.27703234553337097, "step": 2204 }, { "epoch": 0.5855796043022174, "grad_norm": 1.0275000328668338, "learning_rate": 1.6951319876099825e-05, "loss": 0.3088543117046356, "step": 2205 }, { "epoch": 0.5858451732837605, "grad_norm": 1.0671359142705343, "learning_rate": 1.694816276333292e-05, "loss": 0.29875609278678894, "step": 2206 }, { "epoch": 0.5861107422653035, "grad_norm": 1.0185982306074886, "learning_rate": 1.6945004311042176e-05, "loss": 0.30804386734962463, "step": 2207 }, { "epoch": 0.5863763112468464, "grad_norm": 1.081134235929082, "learning_rate": 1.694184451983651e-05, "loss": 0.3324572741985321, "step": 2208 }, { "epoch": 0.5866418802283894, "grad_norm": 1.0822730402391103, "learning_rate": 1.6938683390325096e-05, "loss": 0.30302488803863525, "step": 2209 }, { "epoch": 0.5869074492099323, "grad_norm": 1.1499037543983048, "learning_rate": 1.6935520923117355e-05, "loss": 0.3264358341693878, "step": 2210 }, { "epoch": 0.5871730181914753, "grad_norm": 1.1305858167915457, "learning_rate": 1.693235711882298e-05, "loss": 0.3172164261341095, "step": 2211 }, { "epoch": 0.5874385871730182, "grad_norm": 0.9910314790510931, "learning_rate": 1.6929191978051908e-05, "loss": 0.300851047039032, "step": 2212 }, { "epoch": 0.5877041561545612, "grad_norm": 1.1122516205102002, "learning_rate": 1.6926025501414352e-05, "loss": 0.2887764871120453, "step": 2213 }, { "epoch": 0.5879697251361041, "grad_norm": 1.0991421920944897, "learning_rate": 1.692285768952076e-05, "loss": 0.3246796727180481, "step": 2214 }, { "epoch": 0.5882352941176471, "grad_norm": 1.1069795382063548, "learning_rate": 1.6919688542981852e-05, "loss": 0.30595412850379944, "step": 2215 }, { "epoch": 0.58850086309919, "grad_norm": 1.068918741300791, "learning_rate": 1.6916518062408604e-05, "loss": 0.2885501980781555, "step": 2216 }, { "epoch": 0.588766432080733, "grad_norm": 1.066918066226772, "learning_rate": 1.6913346248412245e-05, "loss": 0.34449082612991333, "step": 2217 }, { "epoch": 0.5890320010622759, "grad_norm": 1.0585511422631098, "learning_rate": 1.6910173101604267e-05, "loss": 0.29410409927368164, "step": 2218 }, { "epoch": 0.5892975700438189, "grad_norm": 1.1710793080996782, "learning_rate": 1.690699862259641e-05, "loss": 0.3250378370285034, "step": 2219 }, { "epoch": 0.5895631390253618, "grad_norm": 1.3327292763951073, "learning_rate": 1.690382281200068e-05, "loss": 0.34420648217201233, "step": 2220 }, { "epoch": 0.5898287080069048, "grad_norm": 1.1196949637967406, "learning_rate": 1.6900645670429338e-05, "loss": 0.33951860666275024, "step": 2221 }, { "epoch": 0.5900942769884477, "grad_norm": 1.064177847952839, "learning_rate": 1.6897467198494892e-05, "loss": 0.35045644640922546, "step": 2222 }, { "epoch": 0.5903598459699907, "grad_norm": 1.0378256375427404, "learning_rate": 1.689428739681012e-05, "loss": 0.3262789845466614, "step": 2223 }, { "epoch": 0.5906254149515336, "grad_norm": 1.0662878016953237, "learning_rate": 1.689110626598805e-05, "loss": 0.2959234118461609, "step": 2224 }, { "epoch": 0.5908909839330766, "grad_norm": 1.040953230887288, "learning_rate": 1.6887923806641965e-05, "loss": 0.3185187876224518, "step": 2225 }, { "epoch": 0.5911565529146195, "grad_norm": 0.9754385668000993, "learning_rate": 1.6884740019385403e-05, "loss": 0.2861860692501068, "step": 2226 }, { "epoch": 0.5914221218961625, "grad_norm": 1.0067160421449919, "learning_rate": 1.6881554904832163e-05, "loss": 0.28718897700309753, "step": 2227 }, { "epoch": 0.5916876908777055, "grad_norm": 1.0412433017248806, "learning_rate": 1.68783684635963e-05, "loss": 0.2919235825538635, "step": 2228 }, { "epoch": 0.5919532598592484, "grad_norm": 0.9981457951279066, "learning_rate": 1.687518069629212e-05, "loss": 0.29265689849853516, "step": 2229 }, { "epoch": 0.5922188288407914, "grad_norm": 1.105624159979672, "learning_rate": 1.6871991603534183e-05, "loss": 0.3257937431335449, "step": 2230 }, { "epoch": 0.5924843978223343, "grad_norm": 0.9776528734928177, "learning_rate": 1.6868801185937318e-05, "loss": 0.30709922313690186, "step": 2231 }, { "epoch": 0.5927499668038773, "grad_norm": 1.0470693079191735, "learning_rate": 1.6865609444116594e-05, "loss": 0.34016695618629456, "step": 2232 }, { "epoch": 0.5930155357854202, "grad_norm": 3.119158292180646, "learning_rate": 1.686241637868734e-05, "loss": 0.27988332509994507, "step": 2233 }, { "epoch": 0.5932811047669632, "grad_norm": 1.0478488923431404, "learning_rate": 1.685922199026514e-05, "loss": 0.33241748809814453, "step": 2234 }, { "epoch": 0.5935466737485062, "grad_norm": 1.131470783603603, "learning_rate": 1.685602627946584e-05, "loss": 0.29636645317077637, "step": 2235 }, { "epoch": 0.5938122427300492, "grad_norm": 1.0270882549188534, "learning_rate": 1.6852829246905532e-05, "loss": 0.32173705101013184, "step": 2236 }, { "epoch": 0.5940778117115921, "grad_norm": 1.0825392737706068, "learning_rate": 1.6849630893200567e-05, "loss": 0.318726122379303, "step": 2237 }, { "epoch": 0.5943433806931351, "grad_norm": 1.0382165285294276, "learning_rate": 1.684643121896755e-05, "loss": 0.3085494339466095, "step": 2238 }, { "epoch": 0.594608949674678, "grad_norm": 1.0527313536489507, "learning_rate": 1.684323022482334e-05, "loss": 0.3402160406112671, "step": 2239 }, { "epoch": 0.594874518656221, "grad_norm": 1.0380085019224927, "learning_rate": 1.684002791138505e-05, "loss": 0.28099578619003296, "step": 2240 }, { "epoch": 0.5951400876377639, "grad_norm": 1.0821564922133853, "learning_rate": 1.6836824279270053e-05, "loss": 0.3049670159816742, "step": 2241 }, { "epoch": 0.5954056566193069, "grad_norm": 1.0644252940512267, "learning_rate": 1.6833619329095966e-05, "loss": 0.2999834716320038, "step": 2242 }, { "epoch": 0.5956712256008498, "grad_norm": 1.0828247808996563, "learning_rate": 1.6830413061480663e-05, "loss": 0.2976648509502411, "step": 2243 }, { "epoch": 0.5959367945823928, "grad_norm": 0.9516700397999099, "learning_rate": 1.6827205477042282e-05, "loss": 0.2937200963497162, "step": 2244 }, { "epoch": 0.5962023635639357, "grad_norm": 0.9800041770842799, "learning_rate": 1.6823996576399208e-05, "loss": 0.27944231033325195, "step": 2245 }, { "epoch": 0.5964679325454787, "grad_norm": 1.2497901059935828, "learning_rate": 1.6820786360170073e-05, "loss": 0.37821248173713684, "step": 2246 }, { "epoch": 0.5967335015270216, "grad_norm": 1.0764913922139379, "learning_rate": 1.681757482897377e-05, "loss": 0.31929296255111694, "step": 2247 }, { "epoch": 0.5969990705085646, "grad_norm": 1.0997353700477965, "learning_rate": 1.6814361983429446e-05, "loss": 0.29905542731285095, "step": 2248 }, { "epoch": 0.5972646394901076, "grad_norm": 1.1012066663218303, "learning_rate": 1.6811147824156503e-05, "loss": 0.31056714057922363, "step": 2249 }, { "epoch": 0.5975302084716505, "grad_norm": 1.0740873036211436, "learning_rate": 1.6807932351774585e-05, "loss": 0.3311445415019989, "step": 2250 }, { "epoch": 0.5977957774531935, "grad_norm": 0.9539008733822649, "learning_rate": 1.6804715566903603e-05, "loss": 0.28413334488868713, "step": 2251 }, { "epoch": 0.5980613464347364, "grad_norm": 1.068533794622215, "learning_rate": 1.6801497470163717e-05, "loss": 0.27681154012680054, "step": 2252 }, { "epoch": 0.5983269154162794, "grad_norm": 1.0654200190327086, "learning_rate": 1.679827806217533e-05, "loss": 0.290216863155365, "step": 2253 }, { "epoch": 0.5985924843978223, "grad_norm": 1.1041469834048565, "learning_rate": 1.6795057343559115e-05, "loss": 0.31263259053230286, "step": 2254 }, { "epoch": 0.5988580533793653, "grad_norm": 1.126601485756597, "learning_rate": 1.6791835314935984e-05, "loss": 0.31527474522590637, "step": 2255 }, { "epoch": 0.5991236223609082, "grad_norm": 1.078203294441185, "learning_rate": 1.6788611976927104e-05, "loss": 0.308803915977478, "step": 2256 }, { "epoch": 0.5993891913424512, "grad_norm": 1.0503773076355036, "learning_rate": 1.6785387330153898e-05, "loss": 0.3038686215877533, "step": 2257 }, { "epoch": 0.5996547603239941, "grad_norm": 1.0216209005739547, "learning_rate": 1.6782161375238045e-05, "loss": 0.32485973834991455, "step": 2258 }, { "epoch": 0.5999203293055371, "grad_norm": 1.182450532742011, "learning_rate": 1.6778934112801467e-05, "loss": 0.32350587844848633, "step": 2259 }, { "epoch": 0.60018589828708, "grad_norm": 1.0888151703509321, "learning_rate": 1.6775705543466337e-05, "loss": 0.31593745946884155, "step": 2260 }, { "epoch": 0.600451467268623, "grad_norm": 1.0882766479814592, "learning_rate": 1.6772475667855098e-05, "loss": 0.3266843855381012, "step": 2261 }, { "epoch": 0.6007170362501659, "grad_norm": 1.1815872316974045, "learning_rate": 1.676924448659042e-05, "loss": 0.3334394693374634, "step": 2262 }, { "epoch": 0.600982605231709, "grad_norm": 1.1019346354795203, "learning_rate": 1.676601200029524e-05, "loss": 0.29688704013824463, "step": 2263 }, { "epoch": 0.6012481742132519, "grad_norm": 1.0675092497220116, "learning_rate": 1.6762778209592744e-05, "loss": 0.3163599967956543, "step": 2264 }, { "epoch": 0.6015137431947949, "grad_norm": 3.310146638883422, "learning_rate": 1.675954311510637e-05, "loss": 0.3001909554004669, "step": 2265 }, { "epoch": 0.6017793121763378, "grad_norm": 1.052342150287052, "learning_rate": 1.6756306717459804e-05, "loss": 0.306442528963089, "step": 2266 }, { "epoch": 0.6020448811578808, "grad_norm": 1.0462245388504205, "learning_rate": 1.6753069017276988e-05, "loss": 0.32714736461639404, "step": 2267 }, { "epoch": 0.6023104501394237, "grad_norm": 1.1462408299032063, "learning_rate": 1.6749830015182106e-05, "loss": 0.3276352286338806, "step": 2268 }, { "epoch": 0.6025760191209667, "grad_norm": 1.196238497855594, "learning_rate": 1.6746589711799607e-05, "loss": 0.3151017427444458, "step": 2269 }, { "epoch": 0.6028415881025097, "grad_norm": 1.0342963680315473, "learning_rate": 1.674334810775418e-05, "loss": 0.30252715945243835, "step": 2270 }, { "epoch": 0.6031071570840526, "grad_norm": 1.013150034994447, "learning_rate": 1.674010520367077e-05, "loss": 0.28994205594062805, "step": 2271 }, { "epoch": 0.6033727260655956, "grad_norm": 1.060884408167446, "learning_rate": 1.6736861000174566e-05, "loss": 0.31821542978286743, "step": 2272 }, { "epoch": 0.6036382950471385, "grad_norm": 1.0745731746159097, "learning_rate": 1.6733615497891018e-05, "loss": 0.33488404750823975, "step": 2273 }, { "epoch": 0.6039038640286815, "grad_norm": 1.1687722013665731, "learning_rate": 1.6730368697445815e-05, "loss": 0.32545825839042664, "step": 2274 }, { "epoch": 0.6041694330102244, "grad_norm": 1.0959659967153625, "learning_rate": 1.6727120599464904e-05, "loss": 0.3229105770587921, "step": 2275 }, { "epoch": 0.6044350019917674, "grad_norm": 1.0190980223229251, "learning_rate": 1.672387120457448e-05, "loss": 0.29090648889541626, "step": 2276 }, { "epoch": 0.6047005709733103, "grad_norm": 1.0135966931724694, "learning_rate": 1.6720620513400993e-05, "loss": 0.3102695345878601, "step": 2277 }, { "epoch": 0.6049661399548533, "grad_norm": 0.9853472262099896, "learning_rate": 1.6717368526571133e-05, "loss": 0.3104533851146698, "step": 2278 }, { "epoch": 0.6052317089363962, "grad_norm": 1.0624907138843722, "learning_rate": 1.671411524471184e-05, "loss": 0.3340798616409302, "step": 2279 }, { "epoch": 0.6054972779179392, "grad_norm": 0.9362556276145145, "learning_rate": 1.6710860668450318e-05, "loss": 0.2807982563972473, "step": 2280 }, { "epoch": 0.6057628468994821, "grad_norm": 1.0604829312359818, "learning_rate": 1.6707604798414005e-05, "loss": 0.28892064094543457, "step": 2281 }, { "epoch": 0.6060284158810251, "grad_norm": 1.1005771261022437, "learning_rate": 1.6704347635230594e-05, "loss": 0.29660698771476746, "step": 2282 }, { "epoch": 0.606293984862568, "grad_norm": 1.0826898129560842, "learning_rate": 1.6701089179528032e-05, "loss": 0.32079893350601196, "step": 2283 }, { "epoch": 0.606559553844111, "grad_norm": 1.0711524337358722, "learning_rate": 1.6697829431934508e-05, "loss": 0.3464012145996094, "step": 2284 }, { "epoch": 0.6068251228256539, "grad_norm": 1.113831391037599, "learning_rate": 1.669456839307846e-05, "loss": 0.3378494381904602, "step": 2285 }, { "epoch": 0.6070906918071969, "grad_norm": 1.1314381443012484, "learning_rate": 1.6691306063588583e-05, "loss": 0.2856704294681549, "step": 2286 }, { "epoch": 0.6073562607887398, "grad_norm": 1.117095467957477, "learning_rate": 1.6688042444093816e-05, "loss": 0.317970871925354, "step": 2287 }, { "epoch": 0.6076218297702828, "grad_norm": 0.9765740214705895, "learning_rate": 1.6684777535223338e-05, "loss": 0.3067381978034973, "step": 2288 }, { "epoch": 0.6078873987518257, "grad_norm": 0.9795122588790717, "learning_rate": 1.6681511337606594e-05, "loss": 0.28682243824005127, "step": 2289 }, { "epoch": 0.6081529677333687, "grad_norm": 1.0967806384391572, "learning_rate": 1.667824385187327e-05, "loss": 0.30516478419303894, "step": 2290 }, { "epoch": 0.6084185367149118, "grad_norm": 1.2090889717256932, "learning_rate": 1.6674975078653284e-05, "loss": 0.3114034831523895, "step": 2291 }, { "epoch": 0.6086841056964547, "grad_norm": 1.045779035897072, "learning_rate": 1.6671705018576837e-05, "loss": 0.3119916617870331, "step": 2292 }, { "epoch": 0.6089496746779977, "grad_norm": 1.0110290976394836, "learning_rate": 1.666843367227434e-05, "loss": 0.2695278823375702, "step": 2293 }, { "epoch": 0.6092152436595406, "grad_norm": 1.1042693591067085, "learning_rate": 1.6665161040376483e-05, "loss": 0.32162508368492126, "step": 2294 }, { "epoch": 0.6094808126410836, "grad_norm": 1.1533266295102853, "learning_rate": 1.6661887123514183e-05, "loss": 0.3115222752094269, "step": 2295 }, { "epoch": 0.6097463816226265, "grad_norm": 1.1903173397636237, "learning_rate": 1.6658611922318618e-05, "loss": 0.3239362835884094, "step": 2296 }, { "epoch": 0.6100119506041695, "grad_norm": 1.0224008240467277, "learning_rate": 1.66553354374212e-05, "loss": 0.29716256260871887, "step": 2297 }, { "epoch": 0.6102775195857124, "grad_norm": 1.1579823586849616, "learning_rate": 1.6652057669453606e-05, "loss": 0.3337557911872864, "step": 2298 }, { "epoch": 0.6105430885672554, "grad_norm": 1.0726602627394455, "learning_rate": 1.6648778619047747e-05, "loss": 0.30258649587631226, "step": 2299 }, { "epoch": 0.6108086575487983, "grad_norm": 1.0836532202857172, "learning_rate": 1.6645498286835784e-05, "loss": 0.3151426315307617, "step": 2300 }, { "epoch": 0.6110742265303413, "grad_norm": 0.9639622977001232, "learning_rate": 1.664221667345013e-05, "loss": 0.274954617023468, "step": 2301 }, { "epoch": 0.6113397955118842, "grad_norm": 1.0454921478368049, "learning_rate": 1.6638933779523437e-05, "loss": 0.3055363893508911, "step": 2302 }, { "epoch": 0.6116053644934272, "grad_norm": 1.0132221767482874, "learning_rate": 1.663564960568861e-05, "loss": 0.30296921730041504, "step": 2303 }, { "epoch": 0.6118709334749701, "grad_norm": 1.0766188111034134, "learning_rate": 1.66323641525788e-05, "loss": 0.3118343651294708, "step": 2304 }, { "epoch": 0.6121365024565131, "grad_norm": 1.164685781665666, "learning_rate": 1.6629077420827405e-05, "loss": 0.3277447819709778, "step": 2305 }, { "epoch": 0.612402071438056, "grad_norm": 1.11996036014055, "learning_rate": 1.6625789411068063e-05, "loss": 0.307643860578537, "step": 2306 }, { "epoch": 0.612667640419599, "grad_norm": 1.0752891079202938, "learning_rate": 1.6622500123934665e-05, "loss": 0.3043777346611023, "step": 2307 }, { "epoch": 0.6129332094011419, "grad_norm": 1.1229566611504027, "learning_rate": 1.6619209560061352e-05, "loss": 0.28634852170944214, "step": 2308 }, { "epoch": 0.6131987783826849, "grad_norm": 1.1746890844036781, "learning_rate": 1.6615917720082503e-05, "loss": 0.33200016617774963, "step": 2309 }, { "epoch": 0.6134643473642278, "grad_norm": 1.0620493011215435, "learning_rate": 1.661262460463274e-05, "loss": 0.26568055152893066, "step": 2310 }, { "epoch": 0.6137299163457708, "grad_norm": 1.0408157138123326, "learning_rate": 1.6609330214346945e-05, "loss": 0.2772855758666992, "step": 2311 }, { "epoch": 0.6139954853273137, "grad_norm": 1.2060076126932109, "learning_rate": 1.6606034549860236e-05, "loss": 0.3330409824848175, "step": 2312 }, { "epoch": 0.6142610543088567, "grad_norm": 1.0235644562455184, "learning_rate": 1.6602737611807975e-05, "loss": 0.27702978253364563, "step": 2313 }, { "epoch": 0.6145266232903996, "grad_norm": 1.1266755606893777, "learning_rate": 1.6599439400825775e-05, "loss": 0.29985183477401733, "step": 2314 }, { "epoch": 0.6147921922719426, "grad_norm": 1.0266522277907775, "learning_rate": 1.659613991754949e-05, "loss": 0.2666100859642029, "step": 2315 }, { "epoch": 0.6150577612534855, "grad_norm": 1.0676553477298287, "learning_rate": 1.6592839162615223e-05, "loss": 0.2968613803386688, "step": 2316 }, { "epoch": 0.6153233302350285, "grad_norm": 1.26155090118547, "learning_rate": 1.6589537136659326e-05, "loss": 0.2693714499473572, "step": 2317 }, { "epoch": 0.6155888992165715, "grad_norm": 1.1411779960646509, "learning_rate": 1.658623384031838e-05, "loss": 0.3192713260650635, "step": 2318 }, { "epoch": 0.6158544681981145, "grad_norm": 1.099028639770974, "learning_rate": 1.658292927422923e-05, "loss": 0.2958469092845917, "step": 2319 }, { "epoch": 0.6161200371796575, "grad_norm": 1.0613129939040433, "learning_rate": 1.657962343902895e-05, "loss": 0.28580743074417114, "step": 2320 }, { "epoch": 0.6163856061612004, "grad_norm": 1.2105545865052383, "learning_rate": 1.6576316335354875e-05, "loss": 0.34325680136680603, "step": 2321 }, { "epoch": 0.6166511751427434, "grad_norm": 1.076014963599046, "learning_rate": 1.657300796384457e-05, "loss": 0.3220894932746887, "step": 2322 }, { "epoch": 0.6169167441242863, "grad_norm": 1.003861259990267, "learning_rate": 1.656969832513585e-05, "loss": 0.2934642434120178, "step": 2323 }, { "epoch": 0.6171823131058293, "grad_norm": 1.0182182491222724, "learning_rate": 1.656638741986677e-05, "loss": 0.3066999912261963, "step": 2324 }, { "epoch": 0.6174478820873722, "grad_norm": 1.0780285957414313, "learning_rate": 1.6563075248675645e-05, "loss": 0.2947896122932434, "step": 2325 }, { "epoch": 0.6177134510689152, "grad_norm": 1.1567241875430703, "learning_rate": 1.6559761812201018e-05, "loss": 0.33616161346435547, "step": 2326 }, { "epoch": 0.6179790200504581, "grad_norm": 1.0754490235924812, "learning_rate": 1.6556447111081678e-05, "loss": 0.29555875062942505, "step": 2327 }, { "epoch": 0.6182445890320011, "grad_norm": 1.0070791342344025, "learning_rate": 1.655313114595666e-05, "loss": 0.276498019695282, "step": 2328 }, { "epoch": 0.618510158013544, "grad_norm": 1.0894248364537533, "learning_rate": 1.6549813917465242e-05, "loss": 0.3081165552139282, "step": 2329 }, { "epoch": 0.618775726995087, "grad_norm": 1.2153046006588315, "learning_rate": 1.654649542624695e-05, "loss": 0.3610053062438965, "step": 2330 }, { "epoch": 0.6190412959766299, "grad_norm": 1.0676492266011808, "learning_rate": 1.654317567294155e-05, "loss": 0.2775106430053711, "step": 2331 }, { "epoch": 0.6193068649581729, "grad_norm": 4.371469554540211, "learning_rate": 1.653985465818905e-05, "loss": 0.2915893793106079, "step": 2332 }, { "epoch": 0.6195724339397158, "grad_norm": 1.0032536414224313, "learning_rate": 1.6536532382629696e-05, "loss": 0.30868977308273315, "step": 2333 }, { "epoch": 0.6198380029212588, "grad_norm": 1.1011191125099704, "learning_rate": 1.6533208846903996e-05, "loss": 0.3083038330078125, "step": 2334 }, { "epoch": 0.6201035719028017, "grad_norm": 0.9895882037041855, "learning_rate": 1.652988405165268e-05, "loss": 0.25192466378211975, "step": 2335 }, { "epoch": 0.6203691408843447, "grad_norm": 1.1020677364796136, "learning_rate": 1.6526557997516737e-05, "loss": 0.32154130935668945, "step": 2336 }, { "epoch": 0.6206347098658876, "grad_norm": 1.1174587266065723, "learning_rate": 1.6523230685137382e-05, "loss": 0.2860945165157318, "step": 2337 }, { "epoch": 0.6209002788474306, "grad_norm": 1.1647384960602913, "learning_rate": 1.6519902115156084e-05, "loss": 0.3279789984226227, "step": 2338 }, { "epoch": 0.6211658478289735, "grad_norm": 1.062678685453679, "learning_rate": 1.6516572288214555e-05, "loss": 0.3082200884819031, "step": 2339 }, { "epoch": 0.6214314168105165, "grad_norm": 1.1253285275737313, "learning_rate": 1.6513241204954745e-05, "loss": 0.29032304883003235, "step": 2340 }, { "epoch": 0.6216969857920595, "grad_norm": 1.004918906125766, "learning_rate": 1.6509908866018843e-05, "loss": 0.3096848130226135, "step": 2341 }, { "epoch": 0.6219625547736024, "grad_norm": 1.021047856460921, "learning_rate": 1.6506575272049294e-05, "loss": 0.309989333152771, "step": 2342 }, { "epoch": 0.6222281237551454, "grad_norm": 1.119097166323709, "learning_rate": 1.6503240423688768e-05, "loss": 0.311350554227829, "step": 2343 }, { "epoch": 0.6224936927366883, "grad_norm": 1.0659510240862446, "learning_rate": 1.6499904321580187e-05, "loss": 0.3313952386379242, "step": 2344 }, { "epoch": 0.6227592617182313, "grad_norm": 1.0702797293760455, "learning_rate": 1.649656696636671e-05, "loss": 0.2984781265258789, "step": 2345 }, { "epoch": 0.6230248306997742, "grad_norm": 1.0312282361562104, "learning_rate": 1.6493228358691748e-05, "loss": 0.3058238625526428, "step": 2346 }, { "epoch": 0.6232903996813173, "grad_norm": 1.0462474005488736, "learning_rate": 1.6489888499198935e-05, "loss": 0.33439138531684875, "step": 2347 }, { "epoch": 0.6235559686628602, "grad_norm": 1.0386002000588619, "learning_rate": 1.6486547388532157e-05, "loss": 0.2883133292198181, "step": 2348 }, { "epoch": 0.6238215376444032, "grad_norm": 0.9997410916606129, "learning_rate": 1.648320502733555e-05, "loss": 0.30258435010910034, "step": 2349 }, { "epoch": 0.6240871066259461, "grad_norm": 1.0226158069339855, "learning_rate": 1.6479861416253476e-05, "loss": 0.316353440284729, "step": 2350 }, { "epoch": 0.6243526756074891, "grad_norm": 1.0638089423798769, "learning_rate": 1.647651655593054e-05, "loss": 0.3230556547641754, "step": 2351 }, { "epoch": 0.624618244589032, "grad_norm": 1.2043111611037318, "learning_rate": 1.6473170447011593e-05, "loss": 0.3327128291130066, "step": 2352 }, { "epoch": 0.624883813570575, "grad_norm": 1.081123131766037, "learning_rate": 1.6469823090141733e-05, "loss": 0.3152993619441986, "step": 2353 }, { "epoch": 0.6251493825521179, "grad_norm": 1.0655193061859811, "learning_rate": 1.6466474485966286e-05, "loss": 0.26792511343955994, "step": 2354 }, { "epoch": 0.6254149515336609, "grad_norm": 1.121022507517606, "learning_rate": 1.6463124635130824e-05, "loss": 0.31665652990341187, "step": 2355 }, { "epoch": 0.6256805205152038, "grad_norm": 1.0108098757868682, "learning_rate": 1.645977353828115e-05, "loss": 0.29573655128479004, "step": 2356 }, { "epoch": 0.6259460894967468, "grad_norm": 1.0973823257435635, "learning_rate": 1.6456421196063334e-05, "loss": 0.3210436999797821, "step": 2357 }, { "epoch": 0.6262116584782897, "grad_norm": 1.2424369194288305, "learning_rate": 1.6453067609123656e-05, "loss": 0.2837316691875458, "step": 2358 }, { "epoch": 0.6264772274598327, "grad_norm": 1.0217734190114693, "learning_rate": 1.6449712778108645e-05, "loss": 0.2885812520980835, "step": 2359 }, { "epoch": 0.6267427964413756, "grad_norm": 1.1369177274860889, "learning_rate": 1.6446356703665078e-05, "loss": 0.34908249974250793, "step": 2360 }, { "epoch": 0.6270083654229186, "grad_norm": 0.9942151080492051, "learning_rate": 1.6442999386439967e-05, "loss": 0.30398470163345337, "step": 2361 }, { "epoch": 0.6272739344044616, "grad_norm": 0.9838105681310805, "learning_rate": 1.6439640827080565e-05, "loss": 0.2780487537384033, "step": 2362 }, { "epoch": 0.6275395033860045, "grad_norm": 0.956534505955689, "learning_rate": 1.6436281026234357e-05, "loss": 0.2575770616531372, "step": 2363 }, { "epoch": 0.6278050723675475, "grad_norm": 0.9675911826739493, "learning_rate": 1.6432919984549077e-05, "loss": 0.2888547480106354, "step": 2364 }, { "epoch": 0.6280706413490904, "grad_norm": 1.2303845977564731, "learning_rate": 1.6429557702672694e-05, "loss": 0.3259009122848511, "step": 2365 }, { "epoch": 0.6283362103306334, "grad_norm": 1.3923197622537806, "learning_rate": 1.6426194181253415e-05, "loss": 0.2899959683418274, "step": 2366 }, { "epoch": 0.6286017793121763, "grad_norm": 1.058685915432802, "learning_rate": 1.6422829420939688e-05, "loss": 0.28471851348876953, "step": 2367 }, { "epoch": 0.6288673482937193, "grad_norm": 1.0822140266216713, "learning_rate": 1.64194634223802e-05, "loss": 0.2958947420120239, "step": 2368 }, { "epoch": 0.6291329172752622, "grad_norm": 1.1251439755337522, "learning_rate": 1.6416096186223872e-05, "loss": 0.3089750111103058, "step": 2369 }, { "epoch": 0.6293984862568052, "grad_norm": 1.0517657351777636, "learning_rate": 1.641272771311987e-05, "loss": 0.31597089767456055, "step": 2370 }, { "epoch": 0.6296640552383481, "grad_norm": 1.237586073778816, "learning_rate": 1.6409358003717598e-05, "loss": 0.2968488931655884, "step": 2371 }, { "epoch": 0.6299296242198911, "grad_norm": 1.0062603647307793, "learning_rate": 1.6405987058666694e-05, "loss": 0.27532660961151123, "step": 2372 }, { "epoch": 0.630195193201434, "grad_norm": 1.0061271713511417, "learning_rate": 1.6402614878617037e-05, "loss": 0.2800731956958771, "step": 2373 }, { "epoch": 0.630460762182977, "grad_norm": 1.0867786948587836, "learning_rate": 1.6399241464218744e-05, "loss": 0.31728652119636536, "step": 2374 }, { "epoch": 0.63072633116452, "grad_norm": 1.0634834793994077, "learning_rate": 1.6395866816122167e-05, "loss": 0.2776367664337158, "step": 2375 }, { "epoch": 0.630991900146063, "grad_norm": 1.2696308030410766, "learning_rate": 1.63924909349779e-05, "loss": 0.3308418095111847, "step": 2376 }, { "epoch": 0.6312574691276059, "grad_norm": 1.027144235831433, "learning_rate": 1.6389113821436775e-05, "loss": 0.31589487195014954, "step": 2377 }, { "epoch": 0.6315230381091489, "grad_norm": 0.9983142729953255, "learning_rate": 1.6385735476149855e-05, "loss": 0.27181899547576904, "step": 2378 }, { "epoch": 0.6317886070906918, "grad_norm": 1.0656862561919935, "learning_rate": 1.638235589976845e-05, "loss": 0.2603747546672821, "step": 2379 }, { "epoch": 0.6320541760722348, "grad_norm": 1.0543823342651422, "learning_rate": 1.63789750929441e-05, "loss": 0.29050707817077637, "step": 2380 }, { "epoch": 0.6323197450537777, "grad_norm": 1.0310549396867945, "learning_rate": 1.6375593056328586e-05, "loss": 0.2979413866996765, "step": 2381 }, { "epoch": 0.6325853140353207, "grad_norm": 1.0460005843129836, "learning_rate": 1.6372209790573926e-05, "loss": 0.30875420570373535, "step": 2382 }, { "epoch": 0.6328508830168637, "grad_norm": 0.9698416111844145, "learning_rate": 1.6368825296332366e-05, "loss": 0.2755935788154602, "step": 2383 }, { "epoch": 0.6331164519984066, "grad_norm": 1.1336778567410772, "learning_rate": 1.6365439574256406e-05, "loss": 0.3459136486053467, "step": 2384 }, { "epoch": 0.6333820209799496, "grad_norm": 1.116018329054477, "learning_rate": 1.6362052624998767e-05, "loss": 0.29043829441070557, "step": 2385 }, { "epoch": 0.6336475899614925, "grad_norm": 1.123039696178655, "learning_rate": 1.635866444921242e-05, "loss": 0.321551114320755, "step": 2386 }, { "epoch": 0.6339131589430355, "grad_norm": 1.0451682936950502, "learning_rate": 1.6355275047550553e-05, "loss": 0.28478139638900757, "step": 2387 }, { "epoch": 0.6341787279245784, "grad_norm": 1.060617338056141, "learning_rate": 1.6351884420666616e-05, "loss": 0.30913087725639343, "step": 2388 }, { "epoch": 0.6344442969061214, "grad_norm": 1.0996519301974148, "learning_rate": 1.6348492569214275e-05, "loss": 0.328342467546463, "step": 2389 }, { "epoch": 0.6347098658876643, "grad_norm": 1.0657562962668374, "learning_rate": 1.634509949384744e-05, "loss": 0.3291119933128357, "step": 2390 }, { "epoch": 0.6349754348692073, "grad_norm": 1.0805286951038287, "learning_rate": 1.6341705195220257e-05, "loss": 0.3542378544807434, "step": 2391 }, { "epoch": 0.6352410038507502, "grad_norm": 1.1387422668526126, "learning_rate": 1.63383096739871e-05, "loss": 0.3167935609817505, "step": 2392 }, { "epoch": 0.6355065728322932, "grad_norm": 0.9614211236141011, "learning_rate": 1.63349129308026e-05, "loss": 0.27623263001441956, "step": 2393 }, { "epoch": 0.6357721418138361, "grad_norm": 1.1351525352268206, "learning_rate": 1.6331514966321596e-05, "loss": 0.3615761399269104, "step": 2394 }, { "epoch": 0.6360377107953791, "grad_norm": 1.1430561223010627, "learning_rate": 1.632811578119918e-05, "loss": 0.3503292500972748, "step": 2395 }, { "epoch": 0.636303279776922, "grad_norm": 1.0400637290516392, "learning_rate": 1.6324715376090673e-05, "loss": 0.2994767129421234, "step": 2396 }, { "epoch": 0.636568848758465, "grad_norm": 1.2836743734514182, "learning_rate": 1.6321313751651638e-05, "loss": 0.29903143644332886, "step": 2397 }, { "epoch": 0.6368344177400079, "grad_norm": 1.0273086079776361, "learning_rate": 1.6317910908537865e-05, "loss": 0.310536652803421, "step": 2398 }, { "epoch": 0.6370999867215509, "grad_norm": 1.2820707601171073, "learning_rate": 1.6314506847405382e-05, "loss": 0.32584354281425476, "step": 2399 }, { "epoch": 0.6373655557030938, "grad_norm": 1.186095937719991, "learning_rate": 1.6311101568910448e-05, "loss": 0.3536352217197418, "step": 2400 }, { "epoch": 0.6376311246846368, "grad_norm": 1.0361661707144088, "learning_rate": 1.6307695073709565e-05, "loss": 0.3198434114456177, "step": 2401 }, { "epoch": 0.6378966936661797, "grad_norm": 0.8809138916670839, "learning_rate": 1.6304287362459462e-05, "loss": 0.264182448387146, "step": 2402 }, { "epoch": 0.6381622626477228, "grad_norm": 1.0526335869529386, "learning_rate": 1.6300878435817115e-05, "loss": 0.31182044744491577, "step": 2403 }, { "epoch": 0.6384278316292658, "grad_norm": 1.0495886453587215, "learning_rate": 1.6297468294439708e-05, "loss": 0.28221404552459717, "step": 2404 }, { "epoch": 0.6386934006108087, "grad_norm": 1.0211141314743026, "learning_rate": 1.6294056938984693e-05, "loss": 0.27788785099983215, "step": 2405 }, { "epoch": 0.6389589695923517, "grad_norm": 1.068610455564362, "learning_rate": 1.6290644370109728e-05, "loss": 0.3300796151161194, "step": 2406 }, { "epoch": 0.6392245385738946, "grad_norm": 1.0949996094795582, "learning_rate": 1.628723058847272e-05, "loss": 0.32170963287353516, "step": 2407 }, { "epoch": 0.6394901075554376, "grad_norm": 1.1320309851276869, "learning_rate": 1.628381559473181e-05, "loss": 0.3243589997291565, "step": 2408 }, { "epoch": 0.6397556765369805, "grad_norm": 1.4458945786524546, "learning_rate": 1.6280399389545358e-05, "loss": 0.311046838760376, "step": 2409 }, { "epoch": 0.6400212455185235, "grad_norm": 1.0237689913585555, "learning_rate": 1.6276981973571973e-05, "loss": 0.2642543911933899, "step": 2410 }, { "epoch": 0.6402868145000664, "grad_norm": 1.1424399755044237, "learning_rate": 1.62735633474705e-05, "loss": 0.3593730926513672, "step": 2411 }, { "epoch": 0.6405523834816094, "grad_norm": 1.1145611429504636, "learning_rate": 1.62701435119e-05, "loss": 0.3147425353527069, "step": 2412 }, { "epoch": 0.6408179524631523, "grad_norm": 1.1400749315540035, "learning_rate": 1.6266722467519783e-05, "loss": 0.32639142870903015, "step": 2413 }, { "epoch": 0.6410835214446953, "grad_norm": 1.1011849489387644, "learning_rate": 1.626330021498938e-05, "loss": 0.32113659381866455, "step": 2414 }, { "epoch": 0.6413490904262382, "grad_norm": 1.0371621680767618, "learning_rate": 1.6259876754968568e-05, "loss": 0.3188290297985077, "step": 2415 }, { "epoch": 0.6416146594077812, "grad_norm": 1.076893351246201, "learning_rate": 1.625645208811734e-05, "loss": 0.3145543932914734, "step": 2416 }, { "epoch": 0.6418802283893241, "grad_norm": 1.1368093372185335, "learning_rate": 1.6253026215095943e-05, "loss": 0.30433323979377747, "step": 2417 }, { "epoch": 0.6421457973708671, "grad_norm": 1.1042321396184265, "learning_rate": 1.6249599136564837e-05, "loss": 0.30946728587150574, "step": 2418 }, { "epoch": 0.64241136635241, "grad_norm": 0.991248414026241, "learning_rate": 1.6246170853184726e-05, "loss": 0.26245906949043274, "step": 2419 }, { "epoch": 0.642676935333953, "grad_norm": 1.1213671588278835, "learning_rate": 1.624274136561654e-05, "loss": 0.31468862295150757, "step": 2420 }, { "epoch": 0.6429425043154959, "grad_norm": 1.0200744973975597, "learning_rate": 1.6239310674521443e-05, "loss": 0.28946155309677124, "step": 2421 }, { "epoch": 0.6432080732970389, "grad_norm": 1.1088143851501708, "learning_rate": 1.6235878780560835e-05, "loss": 0.26272106170654297, "step": 2422 }, { "epoch": 0.6434736422785818, "grad_norm": 1.1185700160494145, "learning_rate": 1.6232445684396347e-05, "loss": 0.3094574213027954, "step": 2423 }, { "epoch": 0.6437392112601248, "grad_norm": 0.9377280048944331, "learning_rate": 1.6229011386689832e-05, "loss": 0.2503833770751953, "step": 2424 }, { "epoch": 0.6440047802416677, "grad_norm": 0.9657663244207705, "learning_rate": 1.6225575888103387e-05, "loss": 0.2655009627342224, "step": 2425 }, { "epoch": 0.6442703492232107, "grad_norm": 1.123117061290067, "learning_rate": 1.6222139189299336e-05, "loss": 0.2819611728191376, "step": 2426 }, { "epoch": 0.6445359182047536, "grad_norm": 1.0859641118248262, "learning_rate": 1.6218701290940232e-05, "loss": 0.2956068217754364, "step": 2427 }, { "epoch": 0.6448014871862966, "grad_norm": 1.2445728810553593, "learning_rate": 1.6215262193688862e-05, "loss": 0.3330997824668884, "step": 2428 }, { "epoch": 0.6450670561678395, "grad_norm": 1.0073602881165937, "learning_rate": 1.6211821898208242e-05, "loss": 0.25897055864334106, "step": 2429 }, { "epoch": 0.6453326251493825, "grad_norm": 1.1228221759016932, "learning_rate": 1.6208380405161623e-05, "loss": 0.3119947016239166, "step": 2430 }, { "epoch": 0.6455981941309256, "grad_norm": 1.143631742936843, "learning_rate": 1.6204937715212482e-05, "loss": 0.30833956599235535, "step": 2431 }, { "epoch": 0.6458637631124685, "grad_norm": 1.1584271404994573, "learning_rate": 1.620149382902453e-05, "loss": 0.2935214638710022, "step": 2432 }, { "epoch": 0.6461293320940115, "grad_norm": 1.6063755788258844, "learning_rate": 1.619804874726171e-05, "loss": 0.24297356605529785, "step": 2433 }, { "epoch": 0.6463949010755544, "grad_norm": 1.14218339304969, "learning_rate": 1.6194602470588186e-05, "loss": 0.319774866104126, "step": 2434 }, { "epoch": 0.6466604700570974, "grad_norm": 1.1751618225153557, "learning_rate": 1.6191154999668368e-05, "loss": 0.29197463393211365, "step": 2435 }, { "epoch": 0.6469260390386403, "grad_norm": 1.1008916130088804, "learning_rate": 1.6187706335166882e-05, "loss": 0.2939727306365967, "step": 2436 }, { "epoch": 0.6471916080201833, "grad_norm": 1.0935449463761302, "learning_rate": 1.6184256477748595e-05, "loss": 0.2941162586212158, "step": 2437 }, { "epoch": 0.6474571770017262, "grad_norm": 1.1336931987797143, "learning_rate": 1.6180805428078593e-05, "loss": 0.2823144197463989, "step": 2438 }, { "epoch": 0.6477227459832692, "grad_norm": 1.0912252779984561, "learning_rate": 1.61773531868222e-05, "loss": 0.30048274993896484, "step": 2439 }, { "epoch": 0.6479883149648121, "grad_norm": 1.183044095349839, "learning_rate": 1.617389975464497e-05, "loss": 0.30927354097366333, "step": 2440 }, { "epoch": 0.6482538839463551, "grad_norm": 1.166570736507726, "learning_rate": 1.6170445132212678e-05, "loss": 0.34835004806518555, "step": 2441 }, { "epoch": 0.648519452927898, "grad_norm": 1.0325781129961564, "learning_rate": 1.616698932019134e-05, "loss": 0.2890225648880005, "step": 2442 }, { "epoch": 0.648785021909441, "grad_norm": 1.1182329319338478, "learning_rate": 1.6163532319247195e-05, "loss": 0.31410521268844604, "step": 2443 }, { "epoch": 0.6490505908909839, "grad_norm": 0.9213656240638256, "learning_rate": 1.616007413004671e-05, "loss": 0.267375111579895, "step": 2444 }, { "epoch": 0.6493161598725269, "grad_norm": 1.1587177777274813, "learning_rate": 1.6156614753256583e-05, "loss": 0.3300023376941681, "step": 2445 }, { "epoch": 0.6495817288540698, "grad_norm": 1.0295072511714587, "learning_rate": 1.615315418954374e-05, "loss": 0.2822847366333008, "step": 2446 }, { "epoch": 0.6498472978356128, "grad_norm": 1.1626615137060834, "learning_rate": 1.6149692439575348e-05, "loss": 0.3093401789665222, "step": 2447 }, { "epoch": 0.6501128668171557, "grad_norm": 1.0475923101386018, "learning_rate": 1.6146229504018777e-05, "loss": 0.2892506718635559, "step": 2448 }, { "epoch": 0.6503784357986987, "grad_norm": 0.9972012319936079, "learning_rate": 1.6142765383541643e-05, "loss": 0.2805558741092682, "step": 2449 }, { "epoch": 0.6506440047802416, "grad_norm": 1.0535842654025462, "learning_rate": 1.6139300078811794e-05, "loss": 0.29852935671806335, "step": 2450 }, { "epoch": 0.6509095737617846, "grad_norm": 1.193949473615032, "learning_rate": 1.6135833590497295e-05, "loss": 0.3567991256713867, "step": 2451 }, { "epoch": 0.6511751427433276, "grad_norm": 1.1265709697559396, "learning_rate": 1.6132365919266442e-05, "loss": 0.29564782977104187, "step": 2452 }, { "epoch": 0.6514407117248705, "grad_norm": 1.011180050217134, "learning_rate": 1.612889706578777e-05, "loss": 0.30027297139167786, "step": 2453 }, { "epoch": 0.6517062807064135, "grad_norm": 1.0908136110597069, "learning_rate": 1.6125427030730027e-05, "loss": 0.3318096697330475, "step": 2454 }, { "epoch": 0.6519718496879564, "grad_norm": 1.0728958387824694, "learning_rate": 1.612195581476219e-05, "loss": 0.30962997674942017, "step": 2455 }, { "epoch": 0.6522374186694994, "grad_norm": 1.2969539714019946, "learning_rate": 1.6118483418553476e-05, "loss": 0.3152836859226227, "step": 2456 }, { "epoch": 0.6525029876510423, "grad_norm": 1.0160215490589632, "learning_rate": 1.6115009842773322e-05, "loss": 0.26117920875549316, "step": 2457 }, { "epoch": 0.6527685566325853, "grad_norm": 0.9780826840488046, "learning_rate": 1.6111535088091388e-05, "loss": 0.2705717384815216, "step": 2458 }, { "epoch": 0.6530341256141283, "grad_norm": 1.112935626593024, "learning_rate": 1.6108059155177568e-05, "loss": 0.3281205892562866, "step": 2459 }, { "epoch": 0.6532996945956713, "grad_norm": 1.0805050021999307, "learning_rate": 1.6104582044701983e-05, "loss": 0.3300125002861023, "step": 2460 }, { "epoch": 0.6535652635772142, "grad_norm": 1.0596352955938992, "learning_rate": 1.6101103757334973e-05, "loss": 0.29286977648735046, "step": 2461 }, { "epoch": 0.6538308325587572, "grad_norm": 1.114611766363321, "learning_rate": 1.6097624293747115e-05, "loss": 0.2920498847961426, "step": 2462 }, { "epoch": 0.6540964015403001, "grad_norm": 1.0455118881549736, "learning_rate": 1.609414365460921e-05, "loss": 0.31018689274787903, "step": 2463 }, { "epoch": 0.6543619705218431, "grad_norm": 1.0028130278859915, "learning_rate": 1.609066184059228e-05, "loss": 0.26806512475013733, "step": 2464 }, { "epoch": 0.654627539503386, "grad_norm": 1.0385768164913443, "learning_rate": 1.608717885236758e-05, "loss": 0.29770639538764954, "step": 2465 }, { "epoch": 0.654893108484929, "grad_norm": 1.0811683391440958, "learning_rate": 1.6083694690606592e-05, "loss": 0.36161965131759644, "step": 2466 }, { "epoch": 0.6551586774664719, "grad_norm": 1.1455214370068598, "learning_rate": 1.6080209355981016e-05, "loss": 0.36114081740379333, "step": 2467 }, { "epoch": 0.6554242464480149, "grad_norm": 0.9911085328884063, "learning_rate": 1.6076722849162786e-05, "loss": 0.28924882411956787, "step": 2468 }, { "epoch": 0.6556898154295578, "grad_norm": 1.1198872767040324, "learning_rate": 1.6073235170824058e-05, "loss": 0.3088049292564392, "step": 2469 }, { "epoch": 0.6559553844111008, "grad_norm": 1.062389027957873, "learning_rate": 1.6069746321637216e-05, "loss": 0.2684907615184784, "step": 2470 }, { "epoch": 0.6562209533926437, "grad_norm": 0.9850175058697045, "learning_rate": 1.6066256302274873e-05, "loss": 0.2674641013145447, "step": 2471 }, { "epoch": 0.6564865223741867, "grad_norm": 1.0658104164235327, "learning_rate": 1.6062765113409854e-05, "loss": 0.2865106165409088, "step": 2472 }, { "epoch": 0.6567520913557297, "grad_norm": 1.1117203943537428, "learning_rate": 1.605927275571523e-05, "loss": 0.33163607120513916, "step": 2473 }, { "epoch": 0.6570176603372726, "grad_norm": 1.1177244627769223, "learning_rate": 1.6055779229864276e-05, "loss": 0.32725927233695984, "step": 2474 }, { "epoch": 0.6572832293188156, "grad_norm": 1.171322314473831, "learning_rate": 1.605228453653051e-05, "loss": 0.31537747383117676, "step": 2475 }, { "epoch": 0.6575487983003585, "grad_norm": 1.0855461390356589, "learning_rate": 1.604878867638767e-05, "loss": 0.29331761598587036, "step": 2476 }, { "epoch": 0.6578143672819015, "grad_norm": 1.0342424424241736, "learning_rate": 1.6045291650109706e-05, "loss": 0.315193772315979, "step": 2477 }, { "epoch": 0.6580799362634444, "grad_norm": 1.2286540067411784, "learning_rate": 1.6041793458370812e-05, "loss": 0.3595796227455139, "step": 2478 }, { "epoch": 0.6583455052449874, "grad_norm": 1.0251892797499218, "learning_rate": 1.6038294101845394e-05, "loss": 0.3069949150085449, "step": 2479 }, { "epoch": 0.6586110742265303, "grad_norm": 1.1576253586981062, "learning_rate": 1.603479358120809e-05, "loss": 0.3154812455177307, "step": 2480 }, { "epoch": 0.6588766432080733, "grad_norm": 1.1008921076459075, "learning_rate": 1.6031291897133756e-05, "loss": 0.3005039691925049, "step": 2481 }, { "epoch": 0.6591422121896162, "grad_norm": 1.1463594149599334, "learning_rate": 1.6027789050297476e-05, "loss": 0.2885095775127411, "step": 2482 }, { "epoch": 0.6594077811711592, "grad_norm": 1.002066881102099, "learning_rate": 1.602428504137456e-05, "loss": 0.291950523853302, "step": 2483 }, { "epoch": 0.6596733501527021, "grad_norm": 1.0919380790727968, "learning_rate": 1.6020779871040538e-05, "loss": 0.31630760431289673, "step": 2484 }, { "epoch": 0.6599389191342451, "grad_norm": 1.0827567425634856, "learning_rate": 1.6017273539971167e-05, "loss": 0.29767507314682007, "step": 2485 }, { "epoch": 0.660204488115788, "grad_norm": 1.036820980968177, "learning_rate": 1.601376604884242e-05, "loss": 0.2882775664329529, "step": 2486 }, { "epoch": 0.6604700570973311, "grad_norm": 1.0885135950320362, "learning_rate": 1.601025739833051e-05, "loss": 0.325736403465271, "step": 2487 }, { "epoch": 0.660735626078874, "grad_norm": 1.048580856774253, "learning_rate": 1.6006747589111854e-05, "loss": 0.3007255792617798, "step": 2488 }, { "epoch": 0.661001195060417, "grad_norm": 1.146836506523448, "learning_rate": 1.6003236621863107e-05, "loss": 0.33199968934059143, "step": 2489 }, { "epoch": 0.6612667640419599, "grad_norm": 1.1430196866694278, "learning_rate": 1.5999724497261138e-05, "loss": 0.3784569799900055, "step": 2490 }, { "epoch": 0.6615323330235029, "grad_norm": 1.0506667031587968, "learning_rate": 1.5996211215983052e-05, "loss": 0.28146931529045105, "step": 2491 }, { "epoch": 0.6617979020050458, "grad_norm": 1.0621415260673002, "learning_rate": 1.599269677870616e-05, "loss": 0.32187730073928833, "step": 2492 }, { "epoch": 0.6620634709865888, "grad_norm": 1.0631524880676668, "learning_rate": 1.5989181186108003e-05, "loss": 0.3021823465824127, "step": 2493 }, { "epoch": 0.6623290399681317, "grad_norm": 1.0248198480240434, "learning_rate": 1.5985664438866354e-05, "loss": 0.3309648334980011, "step": 2494 }, { "epoch": 0.6625946089496747, "grad_norm": 1.0183038789118495, "learning_rate": 1.598214653765919e-05, "loss": 0.2939694821834564, "step": 2495 }, { "epoch": 0.6628601779312177, "grad_norm": 1.0091208408649601, "learning_rate": 1.597862748316473e-05, "loss": 0.31219810247421265, "step": 2496 }, { "epoch": 0.6631257469127606, "grad_norm": 1.3669850946739606, "learning_rate": 1.5975107276061405e-05, "loss": 0.29435622692108154, "step": 2497 }, { "epoch": 0.6633913158943036, "grad_norm": 1.0359724885535866, "learning_rate": 1.5971585917027864e-05, "loss": 0.27167004346847534, "step": 2498 }, { "epoch": 0.6636568848758465, "grad_norm": 1.121619558624798, "learning_rate": 1.5968063406742988e-05, "loss": 0.3360658884048462, "step": 2499 }, { "epoch": 0.6639224538573895, "grad_norm": 1.0767207810238415, "learning_rate": 1.596453974588587e-05, "loss": 0.2994089424610138, "step": 2500 }, { "epoch": 0.6641880228389324, "grad_norm": 1.0997593865705806, "learning_rate": 1.596101493513584e-05, "loss": 0.32302889227867126, "step": 2501 }, { "epoch": 0.6644535918204754, "grad_norm": 1.1249891187970829, "learning_rate": 1.595748897517243e-05, "loss": 0.3122987747192383, "step": 2502 }, { "epoch": 0.6647191608020183, "grad_norm": 1.014108779554691, "learning_rate": 1.5953961866675408e-05, "loss": 0.2746438980102539, "step": 2503 }, { "epoch": 0.6649847297835613, "grad_norm": 1.0758059481680302, "learning_rate": 1.5950433610324758e-05, "loss": 0.3043097257614136, "step": 2504 }, { "epoch": 0.6652502987651042, "grad_norm": 1.2204942135197403, "learning_rate": 1.594690420680069e-05, "loss": 0.3208698332309723, "step": 2505 }, { "epoch": 0.6655158677466472, "grad_norm": 1.1502218188727449, "learning_rate": 1.5943373656783628e-05, "loss": 0.317341148853302, "step": 2506 }, { "epoch": 0.6657814367281901, "grad_norm": 1.1223078751349502, "learning_rate": 1.5939841960954218e-05, "loss": 0.3250347673892975, "step": 2507 }, { "epoch": 0.6660470057097331, "grad_norm": 1.066903715567463, "learning_rate": 1.5936309119993333e-05, "loss": 0.32255828380584717, "step": 2508 }, { "epoch": 0.666312574691276, "grad_norm": 1.0591506680476068, "learning_rate": 1.593277513458206e-05, "loss": 0.3247614800930023, "step": 2509 }, { "epoch": 0.666578143672819, "grad_norm": 1.087253896768941, "learning_rate": 1.5929240005401715e-05, "loss": 0.34171730279922485, "step": 2510 }, { "epoch": 0.6668437126543619, "grad_norm": 1.092874100004657, "learning_rate": 1.5925703733133823e-05, "loss": 0.30671584606170654, "step": 2511 }, { "epoch": 0.6671092816359049, "grad_norm": 1.1250075389065, "learning_rate": 1.5922166318460138e-05, "loss": 0.3387908339500427, "step": 2512 }, { "epoch": 0.6673748506174478, "grad_norm": 1.0272141820522305, "learning_rate": 1.5918627762062635e-05, "loss": 0.2772873044013977, "step": 2513 }, { "epoch": 0.6676404195989908, "grad_norm": 1.0802689739154336, "learning_rate": 1.59150880646235e-05, "loss": 0.31555238366127014, "step": 2514 }, { "epoch": 0.6679059885805337, "grad_norm": 0.9930963010924009, "learning_rate": 1.5911547226825154e-05, "loss": 0.2821594476699829, "step": 2515 }, { "epoch": 0.6681715575620768, "grad_norm": 1.098936156337469, "learning_rate": 1.5908005249350217e-05, "loss": 0.3176054358482361, "step": 2516 }, { "epoch": 0.6684371265436198, "grad_norm": 1.083365844116071, "learning_rate": 1.590446213288155e-05, "loss": 0.28484907746315, "step": 2517 }, { "epoch": 0.6687026955251627, "grad_norm": 1.0028500327966023, "learning_rate": 1.590091787810222e-05, "loss": 0.25227850675582886, "step": 2518 }, { "epoch": 0.6689682645067057, "grad_norm": 0.993931866088294, "learning_rate": 1.5897372485695514e-05, "loss": 0.276819109916687, "step": 2519 }, { "epoch": 0.6692338334882486, "grad_norm": 1.1883846939575156, "learning_rate": 1.589382595634495e-05, "loss": 0.27944183349609375, "step": 2520 }, { "epoch": 0.6694994024697916, "grad_norm": 1.0217591474349375, "learning_rate": 1.589027829073425e-05, "loss": 0.295337975025177, "step": 2521 }, { "epoch": 0.6697649714513345, "grad_norm": 1.0940479681497102, "learning_rate": 1.5886729489547365e-05, "loss": 0.31168580055236816, "step": 2522 }, { "epoch": 0.6700305404328775, "grad_norm": 1.0847233646991081, "learning_rate": 1.5883179553468465e-05, "loss": 0.34520941972732544, "step": 2523 }, { "epoch": 0.6702961094144204, "grad_norm": 1.0941539012056998, "learning_rate": 1.587962848318193e-05, "loss": 0.3121863901615143, "step": 2524 }, { "epoch": 0.6705616783959634, "grad_norm": 1.2414605611463847, "learning_rate": 1.587607627937237e-05, "loss": 0.3450377583503723, "step": 2525 }, { "epoch": 0.6708272473775063, "grad_norm": 1.0575484463097053, "learning_rate": 1.58725229427246e-05, "loss": 0.33431196212768555, "step": 2526 }, { "epoch": 0.6710928163590493, "grad_norm": 2.8101197900274433, "learning_rate": 1.5868968473923675e-05, "loss": 0.2753226161003113, "step": 2527 }, { "epoch": 0.6713583853405922, "grad_norm": 1.1171540013343635, "learning_rate": 1.586541287365484e-05, "loss": 0.31394219398498535, "step": 2528 }, { "epoch": 0.6716239543221352, "grad_norm": 1.0940027543433968, "learning_rate": 1.586185614260358e-05, "loss": 0.352859765291214, "step": 2529 }, { "epoch": 0.6718895233036781, "grad_norm": 1.158790754412002, "learning_rate": 1.5858298281455592e-05, "loss": 0.3182204067707062, "step": 2530 }, { "epoch": 0.6721550922852211, "grad_norm": 1.0901686159979078, "learning_rate": 1.5854739290896785e-05, "loss": 0.3107008934020996, "step": 2531 }, { "epoch": 0.672420661266764, "grad_norm": 1.0367853416177613, "learning_rate": 1.5851179171613294e-05, "loss": 0.2737328112125397, "step": 2532 }, { "epoch": 0.672686230248307, "grad_norm": 1.070700914663809, "learning_rate": 1.5847617924291466e-05, "loss": 0.2744509279727936, "step": 2533 }, { "epoch": 0.6729517992298499, "grad_norm": 1.0763385778363233, "learning_rate": 1.584405554961787e-05, "loss": 0.3149082660675049, "step": 2534 }, { "epoch": 0.6732173682113929, "grad_norm": 1.1199335422347676, "learning_rate": 1.584049204827929e-05, "loss": 0.32643741369247437, "step": 2535 }, { "epoch": 0.6734829371929358, "grad_norm": 1.1153920819002263, "learning_rate": 1.583692742096272e-05, "loss": 0.31901559233665466, "step": 2536 }, { "epoch": 0.6737485061744788, "grad_norm": 1.037012713250851, "learning_rate": 1.583336166835539e-05, "loss": 0.3020802140235901, "step": 2537 }, { "epoch": 0.6740140751560217, "grad_norm": 0.9884255382698084, "learning_rate": 1.5829794791144723e-05, "loss": 0.29683804512023926, "step": 2538 }, { "epoch": 0.6742796441375647, "grad_norm": 1.0549080502640127, "learning_rate": 1.582622679001838e-05, "loss": 0.2898966073989868, "step": 2539 }, { "epoch": 0.6745452131191076, "grad_norm": 1.0628349250468347, "learning_rate": 1.582265766566422e-05, "loss": 0.2665000855922699, "step": 2540 }, { "epoch": 0.6748107821006506, "grad_norm": 1.1059852721256176, "learning_rate": 1.581908741877034e-05, "loss": 0.2987207770347595, "step": 2541 }, { "epoch": 0.6750763510821935, "grad_norm": 1.1051901132495052, "learning_rate": 1.5815516050025032e-05, "loss": 0.32591086626052856, "step": 2542 }, { "epoch": 0.6753419200637365, "grad_norm": 0.9752097662975195, "learning_rate": 1.581194356011682e-05, "loss": 0.28181299567222595, "step": 2543 }, { "epoch": 0.6756074890452796, "grad_norm": 1.0983389872703522, "learning_rate": 1.5808369949734433e-05, "loss": 0.3256041407585144, "step": 2544 }, { "epoch": 0.6758730580268225, "grad_norm": 1.1228012917357884, "learning_rate": 1.5804795219566825e-05, "loss": 0.3079703152179718, "step": 2545 }, { "epoch": 0.6761386270083655, "grad_norm": 1.1504916593616519, "learning_rate": 1.580121937030316e-05, "loss": 0.3364162743091583, "step": 2546 }, { "epoch": 0.6764041959899084, "grad_norm": 1.046870504650359, "learning_rate": 1.5797642402632816e-05, "loss": 0.2774898111820221, "step": 2547 }, { "epoch": 0.6766697649714514, "grad_norm": 1.1108782100380157, "learning_rate": 1.5794064317245396e-05, "loss": 0.33260244131088257, "step": 2548 }, { "epoch": 0.6769353339529943, "grad_norm": 1.16229568793775, "learning_rate": 1.5790485114830708e-05, "loss": 0.3327571153640747, "step": 2549 }, { "epoch": 0.6772009029345373, "grad_norm": 1.1256526679188055, "learning_rate": 1.5786904796078783e-05, "loss": 0.28527912497520447, "step": 2550 }, { "epoch": 0.6774664719160802, "grad_norm": 1.1757868172389025, "learning_rate": 1.5783323361679865e-05, "loss": 0.3100908100605011, "step": 2551 }, { "epoch": 0.6777320408976232, "grad_norm": 1.1187226402475792, "learning_rate": 1.577974081232441e-05, "loss": 0.3434574007987976, "step": 2552 }, { "epoch": 0.6779976098791661, "grad_norm": 1.0691671390255433, "learning_rate": 1.5776157148703094e-05, "loss": 0.3151341676712036, "step": 2553 }, { "epoch": 0.6782631788607091, "grad_norm": 1.1432839314923735, "learning_rate": 1.5772572371506803e-05, "loss": 0.33334124088287354, "step": 2554 }, { "epoch": 0.678528747842252, "grad_norm": 0.9718187941404679, "learning_rate": 1.576898648142664e-05, "loss": 0.26933547854423523, "step": 2555 }, { "epoch": 0.678794316823795, "grad_norm": 1.0146251280063243, "learning_rate": 1.576539947915392e-05, "loss": 0.3087029755115509, "step": 2556 }, { "epoch": 0.6790598858053379, "grad_norm": 2.0746649121309244, "learning_rate": 1.576181136538018e-05, "loss": 0.32620540261268616, "step": 2557 }, { "epoch": 0.6793254547868809, "grad_norm": 1.0462752825892652, "learning_rate": 1.575822214079716e-05, "loss": 0.29112139344215393, "step": 2558 }, { "epoch": 0.6795910237684238, "grad_norm": 1.108770761520566, "learning_rate": 1.5754631806096822e-05, "loss": 0.3394843339920044, "step": 2559 }, { "epoch": 0.6798565927499668, "grad_norm": 1.0789431162979184, "learning_rate": 1.5751040361971342e-05, "loss": 0.32754629850387573, "step": 2560 }, { "epoch": 0.6801221617315097, "grad_norm": 1.055729440740922, "learning_rate": 1.574744780911311e-05, "loss": 0.2829592823982239, "step": 2561 }, { "epoch": 0.6803877307130527, "grad_norm": 3.1916720491195423, "learning_rate": 1.5743854148214724e-05, "loss": 0.2718046307563782, "step": 2562 }, { "epoch": 0.6806532996945956, "grad_norm": 1.0355755791413483, "learning_rate": 1.5740259379969002e-05, "loss": 0.29244256019592285, "step": 2563 }, { "epoch": 0.6809188686761386, "grad_norm": 1.0678189150114252, "learning_rate": 1.5736663505068972e-05, "loss": 0.2925388514995575, "step": 2564 }, { "epoch": 0.6811844376576816, "grad_norm": 1.109826571766002, "learning_rate": 1.5733066524207875e-05, "loss": 0.26742440462112427, "step": 2565 }, { "epoch": 0.6814500066392245, "grad_norm": 1.0365586719986022, "learning_rate": 1.5729468438079167e-05, "loss": 0.33688807487487793, "step": 2566 }, { "epoch": 0.6817155756207675, "grad_norm": 1.0939355325909954, "learning_rate": 1.5725869247376514e-05, "loss": 0.2953096330165863, "step": 2567 }, { "epoch": 0.6819811446023104, "grad_norm": 1.081510188555139, "learning_rate": 1.5722268952793806e-05, "loss": 0.321500301361084, "step": 2568 }, { "epoch": 0.6822467135838534, "grad_norm": 1.1427798210793014, "learning_rate": 1.5718667555025127e-05, "loss": 0.29148590564727783, "step": 2569 }, { "epoch": 0.6825122825653963, "grad_norm": 1.0849106130015975, "learning_rate": 1.5715065054764792e-05, "loss": 0.26887139678001404, "step": 2570 }, { "epoch": 0.6827778515469393, "grad_norm": 0.9118900514894542, "learning_rate": 1.5711461452707316e-05, "loss": 0.2698139250278473, "step": 2571 }, { "epoch": 0.6830434205284823, "grad_norm": 0.9420578172190551, "learning_rate": 1.5707856749547433e-05, "loss": 0.264956533908844, "step": 2572 }, { "epoch": 0.6833089895100253, "grad_norm": 1.0786584040903482, "learning_rate": 1.5704250945980085e-05, "loss": 0.32535314559936523, "step": 2573 }, { "epoch": 0.6835745584915682, "grad_norm": 1.1132312438200667, "learning_rate": 1.5700644042700432e-05, "loss": 0.30529654026031494, "step": 2574 }, { "epoch": 0.6838401274731112, "grad_norm": 0.9518994724553314, "learning_rate": 1.569703604040384e-05, "loss": 0.27253150939941406, "step": 2575 }, { "epoch": 0.6841056964546541, "grad_norm": 1.0559070796873817, "learning_rate": 1.5693426939785886e-05, "loss": 0.27451053261756897, "step": 2576 }, { "epoch": 0.6843712654361971, "grad_norm": 1.1393124405849042, "learning_rate": 1.5689816741542374e-05, "loss": 0.33280283212661743, "step": 2577 }, { "epoch": 0.68463683441774, "grad_norm": 1.1306113061745138, "learning_rate": 1.5686205446369293e-05, "loss": 0.2911887764930725, "step": 2578 }, { "epoch": 0.684902403399283, "grad_norm": 1.0940465986734231, "learning_rate": 1.5682593054962866e-05, "loss": 0.2950279116630554, "step": 2579 }, { "epoch": 0.6851679723808259, "grad_norm": 1.0911163136563768, "learning_rate": 1.5678979568019518e-05, "loss": 0.3267458975315094, "step": 2580 }, { "epoch": 0.6854335413623689, "grad_norm": 1.2739312763430675, "learning_rate": 1.5675364986235887e-05, "loss": 0.3209132254123688, "step": 2581 }, { "epoch": 0.6856991103439118, "grad_norm": 1.1101887519376679, "learning_rate": 1.5671749310308818e-05, "loss": 0.3186662197113037, "step": 2582 }, { "epoch": 0.6859646793254548, "grad_norm": 0.9652854961372175, "learning_rate": 1.566813254093538e-05, "loss": 0.24875827133655548, "step": 2583 }, { "epoch": 0.6862302483069977, "grad_norm": 1.0684425959326884, "learning_rate": 1.5664514678812835e-05, "loss": 0.26657983660697937, "step": 2584 }, { "epoch": 0.6864958172885407, "grad_norm": 1.0670123202559558, "learning_rate": 1.5660895724638666e-05, "loss": 0.2889682650566101, "step": 2585 }, { "epoch": 0.6867613862700837, "grad_norm": 1.2310590689373582, "learning_rate": 1.5657275679110564e-05, "loss": 0.32035061717033386, "step": 2586 }, { "epoch": 0.6870269552516266, "grad_norm": 0.9946580402808185, "learning_rate": 1.5653654542926435e-05, "loss": 0.2844264507293701, "step": 2587 }, { "epoch": 0.6872925242331696, "grad_norm": 1.0738818938413612, "learning_rate": 1.5650032316784388e-05, "loss": 0.27645713090896606, "step": 2588 }, { "epoch": 0.6875580932147125, "grad_norm": 1.0078062598096618, "learning_rate": 1.5646409001382745e-05, "loss": 0.29902809858322144, "step": 2589 }, { "epoch": 0.6878236621962555, "grad_norm": 1.0662439819494403, "learning_rate": 1.564278459742004e-05, "loss": 0.28179824352264404, "step": 2590 }, { "epoch": 0.6880892311777984, "grad_norm": 0.9959782320912598, "learning_rate": 1.563915910559502e-05, "loss": 0.30527305603027344, "step": 2591 }, { "epoch": 0.6883548001593414, "grad_norm": 0.9640464455731136, "learning_rate": 1.5635532526606625e-05, "loss": 0.29411792755126953, "step": 2592 }, { "epoch": 0.6886203691408843, "grad_norm": 1.0659796212639145, "learning_rate": 1.563190486115403e-05, "loss": 0.32294154167175293, "step": 2593 }, { "epoch": 0.6888859381224273, "grad_norm": 1.0983041505312465, "learning_rate": 1.5628276109936594e-05, "loss": 0.31873172521591187, "step": 2594 }, { "epoch": 0.6891515071039702, "grad_norm": 1.2163401358885952, "learning_rate": 1.5624646273653908e-05, "loss": 0.37790048122406006, "step": 2595 }, { "epoch": 0.6894170760855132, "grad_norm": 1.0271206309222516, "learning_rate": 1.5621015353005754e-05, "loss": 0.27596205472946167, "step": 2596 }, { "epoch": 0.6896826450670561, "grad_norm": 1.2915034278595348, "learning_rate": 1.5617383348692135e-05, "loss": 0.30952686071395874, "step": 2597 }, { "epoch": 0.6899482140485991, "grad_norm": 1.089414433310086, "learning_rate": 1.5613750261413256e-05, "loss": 0.2933235764503479, "step": 2598 }, { "epoch": 0.690213783030142, "grad_norm": 1.1151043496896997, "learning_rate": 1.5610116091869538e-05, "loss": 0.2961776554584503, "step": 2599 }, { "epoch": 0.6904793520116851, "grad_norm": 1.0596230408388436, "learning_rate": 1.56064808407616e-05, "loss": 0.2843313217163086, "step": 2600 }, { "epoch": 0.690744920993228, "grad_norm": 1.0545406618996236, "learning_rate": 1.560284450879028e-05, "loss": 0.29366564750671387, "step": 2601 }, { "epoch": 0.691010489974771, "grad_norm": 1.028254286030692, "learning_rate": 1.5599207096656614e-05, "loss": 0.32668614387512207, "step": 2602 }, { "epoch": 0.6912760589563139, "grad_norm": 1.1962201821774399, "learning_rate": 1.5595568605061858e-05, "loss": 0.344367653131485, "step": 2603 }, { "epoch": 0.6915416279378569, "grad_norm": 1.2250839657368426, "learning_rate": 1.5591929034707468e-05, "loss": 0.2875809371471405, "step": 2604 }, { "epoch": 0.6918071969193998, "grad_norm": 0.9717157700868733, "learning_rate": 1.5588288386295113e-05, "loss": 0.2688799202442169, "step": 2605 }, { "epoch": 0.6920727659009428, "grad_norm": 1.2520016236289049, "learning_rate": 1.558464666052667e-05, "loss": 0.28575828671455383, "step": 2606 }, { "epoch": 0.6923383348824858, "grad_norm": 1.0741907315089707, "learning_rate": 1.5581003858104203e-05, "loss": 0.2800632119178772, "step": 2607 }, { "epoch": 0.6926039038640287, "grad_norm": 1.096176752690496, "learning_rate": 1.5577359979730022e-05, "loss": 0.3066416382789612, "step": 2608 }, { "epoch": 0.6928694728455717, "grad_norm": 1.0146792499875503, "learning_rate": 1.5573715026106617e-05, "loss": 0.3164110779762268, "step": 2609 }, { "epoch": 0.6931350418271146, "grad_norm": 1.0292100354922897, "learning_rate": 1.5570068997936686e-05, "loss": 0.2908422350883484, "step": 2610 }, { "epoch": 0.6934006108086576, "grad_norm": 0.9996966110923509, "learning_rate": 1.5566421895923148e-05, "loss": 0.29055240750312805, "step": 2611 }, { "epoch": 0.6936661797902005, "grad_norm": 1.1296077877181152, "learning_rate": 1.556277372076912e-05, "loss": 0.3247227370738983, "step": 2612 }, { "epoch": 0.6939317487717435, "grad_norm": 1.0869397458201258, "learning_rate": 1.555912447317792e-05, "loss": 0.29944315552711487, "step": 2613 }, { "epoch": 0.6941973177532864, "grad_norm": 1.140637727836958, "learning_rate": 1.5555474153853092e-05, "loss": 0.2984931170940399, "step": 2614 }, { "epoch": 0.6944628867348294, "grad_norm": 1.0644561032518303, "learning_rate": 1.5551822763498364e-05, "loss": 0.301285982131958, "step": 2615 }, { "epoch": 0.6947284557163723, "grad_norm": 1.0271314049069311, "learning_rate": 1.5548170302817683e-05, "loss": 0.2862967252731323, "step": 2616 }, { "epoch": 0.6949940246979153, "grad_norm": 1.0216494335731472, "learning_rate": 1.5544516772515207e-05, "loss": 0.3071482181549072, "step": 2617 }, { "epoch": 0.6952595936794582, "grad_norm": 1.153798162838472, "learning_rate": 1.5540862173295285e-05, "loss": 0.33668914437294006, "step": 2618 }, { "epoch": 0.6955251626610012, "grad_norm": 1.0451730984690786, "learning_rate": 1.5537206505862486e-05, "loss": 0.32204627990722656, "step": 2619 }, { "epoch": 0.6957907316425441, "grad_norm": 1.083101648134336, "learning_rate": 1.5533549770921576e-05, "loss": 0.30210041999816895, "step": 2620 }, { "epoch": 0.6960563006240871, "grad_norm": 1.1518417167078652, "learning_rate": 1.5529891969177535e-05, "loss": 0.3116886019706726, "step": 2621 }, { "epoch": 0.69632186960563, "grad_norm": 1.1473344970327815, "learning_rate": 1.5526233101335543e-05, "loss": 0.3460058867931366, "step": 2622 }, { "epoch": 0.696587438587173, "grad_norm": 1.0477810576486106, "learning_rate": 1.552257316810098e-05, "loss": 0.30080512166023254, "step": 2623 }, { "epoch": 0.6968530075687159, "grad_norm": 1.1107090823955428, "learning_rate": 1.5518912170179447e-05, "loss": 0.3381347954273224, "step": 2624 }, { "epoch": 0.6971185765502589, "grad_norm": 1.0737064011248665, "learning_rate": 1.5515250108276733e-05, "loss": 0.30345672369003296, "step": 2625 }, { "epoch": 0.6973841455318018, "grad_norm": 1.1809134250993814, "learning_rate": 1.5511586983098847e-05, "loss": 0.3002641797065735, "step": 2626 }, { "epoch": 0.6976497145133448, "grad_norm": 0.9975793486319376, "learning_rate": 1.5507922795351992e-05, "loss": 0.2848126292228699, "step": 2627 }, { "epoch": 0.6979152834948879, "grad_norm": 1.1203755244922207, "learning_rate": 1.5504257545742585e-05, "loss": 0.32360371947288513, "step": 2628 }, { "epoch": 0.6981808524764308, "grad_norm": 1.0674295201271842, "learning_rate": 1.5500591234977237e-05, "loss": 0.2970595955848694, "step": 2629 }, { "epoch": 0.6984464214579738, "grad_norm": 1.1343972682519483, "learning_rate": 1.5496923863762773e-05, "loss": 0.35431474447250366, "step": 2630 }, { "epoch": 0.6987119904395167, "grad_norm": 1.027377246814574, "learning_rate": 1.549325543280622e-05, "loss": 0.30133551359176636, "step": 2631 }, { "epoch": 0.6989775594210597, "grad_norm": 1.066148832325447, "learning_rate": 1.5489585942814807e-05, "loss": 0.3013160824775696, "step": 2632 }, { "epoch": 0.6992431284026026, "grad_norm": 1.1981871164483473, "learning_rate": 1.5485915394495967e-05, "loss": 0.3291313052177429, "step": 2633 }, { "epoch": 0.6995086973841456, "grad_norm": 1.3083774012082008, "learning_rate": 1.5482243788557336e-05, "loss": 0.32308053970336914, "step": 2634 }, { "epoch": 0.6997742663656885, "grad_norm": 1.0802428984314951, "learning_rate": 1.5478571125706762e-05, "loss": 0.321450412273407, "step": 2635 }, { "epoch": 0.7000398353472315, "grad_norm": 1.1144035500723286, "learning_rate": 1.547489740665229e-05, "loss": 0.30871254205703735, "step": 2636 }, { "epoch": 0.7003054043287744, "grad_norm": 1.1599776854022048, "learning_rate": 1.5471222632102168e-05, "loss": 0.29414835572242737, "step": 2637 }, { "epoch": 0.7005709733103174, "grad_norm": 1.019484878273918, "learning_rate": 1.546754680276485e-05, "loss": 0.2841604948043823, "step": 2638 }, { "epoch": 0.7008365422918603, "grad_norm": 1.039625714192533, "learning_rate": 1.546386991934899e-05, "loss": 0.2895316183567047, "step": 2639 }, { "epoch": 0.7011021112734033, "grad_norm": 1.0418724746200432, "learning_rate": 1.546019198256345e-05, "loss": 0.310278058052063, "step": 2640 }, { "epoch": 0.7013676802549462, "grad_norm": 1.1737622034955963, "learning_rate": 1.5456512993117297e-05, "loss": 0.3000732660293579, "step": 2641 }, { "epoch": 0.7016332492364892, "grad_norm": 1.034060473081883, "learning_rate": 1.545283295171979e-05, "loss": 0.2650133967399597, "step": 2642 }, { "epoch": 0.7018988182180321, "grad_norm": 1.1833814596994714, "learning_rate": 1.5449151859080395e-05, "loss": 0.3414345681667328, "step": 2643 }, { "epoch": 0.7021643871995751, "grad_norm": 0.9407765615747015, "learning_rate": 1.5445469715908793e-05, "loss": 0.26955321431159973, "step": 2644 }, { "epoch": 0.702429956181118, "grad_norm": 1.0775826100815478, "learning_rate": 1.5441786522914855e-05, "loss": 0.3028743863105774, "step": 2645 }, { "epoch": 0.702695525162661, "grad_norm": 1.1630883359211883, "learning_rate": 1.5438102280808653e-05, "loss": 0.28710106015205383, "step": 2646 }, { "epoch": 0.7029610941442039, "grad_norm": 1.0828201415955274, "learning_rate": 1.543441699030047e-05, "loss": 0.33343076705932617, "step": 2647 }, { "epoch": 0.7032266631257469, "grad_norm": 2.8774903725783445, "learning_rate": 1.543073065210078e-05, "loss": 0.27760642766952515, "step": 2648 }, { "epoch": 0.7034922321072898, "grad_norm": 1.0939125975780095, "learning_rate": 1.5427043266920276e-05, "loss": 0.2844334840774536, "step": 2649 }, { "epoch": 0.7037578010888328, "grad_norm": 1.0671776711844796, "learning_rate": 1.542335483546983e-05, "loss": 0.28979432582855225, "step": 2650 }, { "epoch": 0.7040233700703757, "grad_norm": 1.1018820862649594, "learning_rate": 1.5419665358460537e-05, "loss": 0.313267320394516, "step": 2651 }, { "epoch": 0.7042889390519187, "grad_norm": 1.122792570050495, "learning_rate": 1.5415974836603676e-05, "loss": 0.26702141761779785, "step": 2652 }, { "epoch": 0.7045545080334616, "grad_norm": 1.084104909381419, "learning_rate": 1.5412283270610752e-05, "loss": 0.3256012499332428, "step": 2653 }, { "epoch": 0.7048200770150046, "grad_norm": 1.1096374178765924, "learning_rate": 1.540859066119344e-05, "loss": 0.3035642206668854, "step": 2654 }, { "epoch": 0.7050856459965475, "grad_norm": 1.1410920430169775, "learning_rate": 1.5404897009063636e-05, "loss": 0.32206645607948303, "step": 2655 }, { "epoch": 0.7053512149780906, "grad_norm": 0.9596610334229038, "learning_rate": 1.5401202314933436e-05, "loss": 0.3023940920829773, "step": 2656 }, { "epoch": 0.7056167839596336, "grad_norm": 0.9678878502259071, "learning_rate": 1.539750657951513e-05, "loss": 0.2839987277984619, "step": 2657 }, { "epoch": 0.7058823529411765, "grad_norm": 0.9744312269236198, "learning_rate": 1.5393809803521213e-05, "loss": 0.2488149106502533, "step": 2658 }, { "epoch": 0.7061479219227195, "grad_norm": 1.0311988168007409, "learning_rate": 1.539011198766438e-05, "loss": 0.27156201004981995, "step": 2659 }, { "epoch": 0.7064134909042624, "grad_norm": 1.0925039664890526, "learning_rate": 1.5386413132657528e-05, "loss": 0.3038437068462372, "step": 2660 }, { "epoch": 0.7066790598858054, "grad_norm": 0.9713190505037098, "learning_rate": 1.5382713239213746e-05, "loss": 0.27626922726631165, "step": 2661 }, { "epoch": 0.7069446288673483, "grad_norm": 1.9675808121081846, "learning_rate": 1.537901230804634e-05, "loss": 0.27338162064552307, "step": 2662 }, { "epoch": 0.7072101978488913, "grad_norm": 0.9540020890839573, "learning_rate": 1.5375310339868798e-05, "loss": 0.2635098099708557, "step": 2663 }, { "epoch": 0.7074757668304342, "grad_norm": 1.1274430903932144, "learning_rate": 1.537160733539482e-05, "loss": 0.3245551288127899, "step": 2664 }, { "epoch": 0.7077413358119772, "grad_norm": 1.1100804783644485, "learning_rate": 1.53679032953383e-05, "loss": 0.3226238787174225, "step": 2665 }, { "epoch": 0.7080069047935201, "grad_norm": 1.0972084780717322, "learning_rate": 1.536419822041333e-05, "loss": 0.31588318943977356, "step": 2666 }, { "epoch": 0.7082724737750631, "grad_norm": 1.031778059845932, "learning_rate": 1.536049211133421e-05, "loss": 0.2494429647922516, "step": 2667 }, { "epoch": 0.708538042756606, "grad_norm": 1.1110915785079796, "learning_rate": 1.5356784968815436e-05, "loss": 0.30966901779174805, "step": 2668 }, { "epoch": 0.708803611738149, "grad_norm": 1.1803956993815392, "learning_rate": 1.5353076793571692e-05, "loss": 0.29383328557014465, "step": 2669 }, { "epoch": 0.7090691807196919, "grad_norm": 1.086625008831518, "learning_rate": 1.5349367586317875e-05, "loss": 0.30337825417518616, "step": 2670 }, { "epoch": 0.7093347497012349, "grad_norm": 1.0049086741144315, "learning_rate": 1.5345657347769082e-05, "loss": 0.28128665685653687, "step": 2671 }, { "epoch": 0.7096003186827778, "grad_norm": 1.1819105498956106, "learning_rate": 1.5341946078640594e-05, "loss": 0.35167062282562256, "step": 2672 }, { "epoch": 0.7098658876643208, "grad_norm": 1.0441531577784944, "learning_rate": 1.533823377964791e-05, "loss": 0.30409517884254456, "step": 2673 }, { "epoch": 0.7101314566458637, "grad_norm": 1.013441954819978, "learning_rate": 1.5334520451506706e-05, "loss": 0.2667735815048218, "step": 2674 }, { "epoch": 0.7103970256274067, "grad_norm": 1.130854753100919, "learning_rate": 1.5330806094932876e-05, "loss": 0.290219247341156, "step": 2675 }, { "epoch": 0.7106625946089496, "grad_norm": 1.120803532670259, "learning_rate": 1.5327090710642503e-05, "loss": 0.33118927478790283, "step": 2676 }, { "epoch": 0.7109281635904926, "grad_norm": 1.2896959817209073, "learning_rate": 1.5323374299351867e-05, "loss": 0.34287041425704956, "step": 2677 }, { "epoch": 0.7111937325720356, "grad_norm": 1.0183367847991263, "learning_rate": 1.531965686177745e-05, "loss": 0.27093711495399475, "step": 2678 }, { "epoch": 0.7114593015535785, "grad_norm": 1.0913550671130643, "learning_rate": 1.531593839863593e-05, "loss": 0.2987911105155945, "step": 2679 }, { "epoch": 0.7117248705351215, "grad_norm": 1.0145664449432468, "learning_rate": 1.5312218910644185e-05, "loss": 0.2914583086967468, "step": 2680 }, { "epoch": 0.7119904395166644, "grad_norm": 1.0712171950199525, "learning_rate": 1.530849839851928e-05, "loss": 0.34159964323043823, "step": 2681 }, { "epoch": 0.7122560084982074, "grad_norm": 1.0132523095253043, "learning_rate": 1.5304776862978496e-05, "loss": 0.28327372670173645, "step": 2682 }, { "epoch": 0.7125215774797503, "grad_norm": 1.0473430655235008, "learning_rate": 1.5301054304739292e-05, "loss": 0.2902851104736328, "step": 2683 }, { "epoch": 0.7127871464612934, "grad_norm": 1.106440530120003, "learning_rate": 1.5297330724519344e-05, "loss": 0.3192726969718933, "step": 2684 }, { "epoch": 0.7130527154428363, "grad_norm": 1.0682705697817987, "learning_rate": 1.5293606123036508e-05, "loss": 0.30242764949798584, "step": 2685 }, { "epoch": 0.7133182844243793, "grad_norm": 1.0059439200202651, "learning_rate": 1.528988050100884e-05, "loss": 0.2718653082847595, "step": 2686 }, { "epoch": 0.7135838534059222, "grad_norm": 1.019566462631627, "learning_rate": 1.52861538591546e-05, "loss": 0.3014821708202362, "step": 2687 }, { "epoch": 0.7138494223874652, "grad_norm": 1.1473508187880241, "learning_rate": 1.528242619819224e-05, "loss": 0.3378177881240845, "step": 2688 }, { "epoch": 0.7141149913690081, "grad_norm": 1.0632179838195628, "learning_rate": 1.5278697518840415e-05, "loss": 0.29286471009254456, "step": 2689 }, { "epoch": 0.7143805603505511, "grad_norm": 1.1140242619678895, "learning_rate": 1.527496782181796e-05, "loss": 0.3371768593788147, "step": 2690 }, { "epoch": 0.714646129332094, "grad_norm": 1.0421377750374783, "learning_rate": 1.5271237107843925e-05, "loss": 0.30571556091308594, "step": 2691 }, { "epoch": 0.714911698313637, "grad_norm": 1.0650624138184501, "learning_rate": 1.526750537763754e-05, "loss": 0.33064618706703186, "step": 2692 }, { "epoch": 0.7151772672951799, "grad_norm": 1.0787164498543842, "learning_rate": 1.5263772631918242e-05, "loss": 0.3369274139404297, "step": 2693 }, { "epoch": 0.7154428362767229, "grad_norm": 1.079249778019668, "learning_rate": 1.5260038871405663e-05, "loss": 0.2422705739736557, "step": 2694 }, { "epoch": 0.7157084052582658, "grad_norm": 1.3990281605221084, "learning_rate": 1.5256304096819628e-05, "loss": 0.35786008834838867, "step": 2695 }, { "epoch": 0.7159739742398088, "grad_norm": 1.0368618301698236, "learning_rate": 1.5252568308880155e-05, "loss": 0.2853243052959442, "step": 2696 }, { "epoch": 0.7162395432213517, "grad_norm": 1.1300838792843926, "learning_rate": 1.5248831508307459e-05, "loss": 0.2903040051460266, "step": 2697 }, { "epoch": 0.7165051122028947, "grad_norm": 1.0779989148221412, "learning_rate": 1.5245093695821954e-05, "loss": 0.3375359773635864, "step": 2698 }, { "epoch": 0.7167706811844377, "grad_norm": 0.9828776196369989, "learning_rate": 1.5241354872144242e-05, "loss": 0.27855974435806274, "step": 2699 }, { "epoch": 0.7170362501659806, "grad_norm": 1.0672391327565405, "learning_rate": 1.5237615037995129e-05, "loss": 0.32226768136024475, "step": 2700 }, { "epoch": 0.7173018191475236, "grad_norm": 1.1089458515112456, "learning_rate": 1.5233874194095606e-05, "loss": 0.32856303453445435, "step": 2701 }, { "epoch": 0.7175673881290665, "grad_norm": 1.15556869357308, "learning_rate": 1.5230132341166868e-05, "loss": 0.31619006395339966, "step": 2702 }, { "epoch": 0.7178329571106095, "grad_norm": 1.09474796019269, "learning_rate": 1.5226389479930296e-05, "loss": 0.29736411571502686, "step": 2703 }, { "epoch": 0.7180985260921524, "grad_norm": 1.0969127487202406, "learning_rate": 1.5222645611107477e-05, "loss": 0.2767728865146637, "step": 2704 }, { "epoch": 0.7183640950736954, "grad_norm": 1.054074095850648, "learning_rate": 1.5218900735420174e-05, "loss": 0.30994221568107605, "step": 2705 }, { "epoch": 0.7186296640552383, "grad_norm": 1.0931807335310835, "learning_rate": 1.5215154853590362e-05, "loss": 0.3419484496116638, "step": 2706 }, { "epoch": 0.7188952330367813, "grad_norm": 1.0503021732812985, "learning_rate": 1.5211407966340203e-05, "loss": 0.3063664436340332, "step": 2707 }, { "epoch": 0.7191608020183242, "grad_norm": 1.0345938706194526, "learning_rate": 1.520766007439205e-05, "loss": 0.2856604754924774, "step": 2708 }, { "epoch": 0.7194263709998672, "grad_norm": 0.9757823992785323, "learning_rate": 1.5203911178468453e-05, "loss": 0.23257851600646973, "step": 2709 }, { "epoch": 0.7196919399814101, "grad_norm": 1.0292145399058534, "learning_rate": 1.5200161279292154e-05, "loss": 0.31451839208602905, "step": 2710 }, { "epoch": 0.7199575089629531, "grad_norm": 1.1017577588578753, "learning_rate": 1.5196410377586095e-05, "loss": 0.30298277735710144, "step": 2711 }, { "epoch": 0.7202230779444961, "grad_norm": 1.0759590578514124, "learning_rate": 1.5192658474073398e-05, "loss": 0.28654640913009644, "step": 2712 }, { "epoch": 0.7204886469260391, "grad_norm": 1.1189221983197806, "learning_rate": 1.5188905569477391e-05, "loss": 0.3148455023765564, "step": 2713 }, { "epoch": 0.720754215907582, "grad_norm": 1.079970608729249, "learning_rate": 1.5185151664521585e-05, "loss": 0.3004840612411499, "step": 2714 }, { "epoch": 0.721019784889125, "grad_norm": 1.206470642332625, "learning_rate": 1.518139675992969e-05, "loss": 0.3378010392189026, "step": 2715 }, { "epoch": 0.721285353870668, "grad_norm": 1.0802971688897103, "learning_rate": 1.517764085642561e-05, "loss": 0.3084215223789215, "step": 2716 }, { "epoch": 0.7215509228522109, "grad_norm": 1.1196175790564493, "learning_rate": 1.517388395473344e-05, "loss": 0.3434324264526367, "step": 2717 }, { "epoch": 0.7218164918337538, "grad_norm": 1.2084125695848371, "learning_rate": 1.517012605557746e-05, "loss": 0.2862265706062317, "step": 2718 }, { "epoch": 0.7220820608152968, "grad_norm": 0.9574562560549519, "learning_rate": 1.5166367159682156e-05, "loss": 0.2760370671749115, "step": 2719 }, { "epoch": 0.7223476297968398, "grad_norm": 1.0623260792686084, "learning_rate": 1.5162607267772194e-05, "loss": 0.26659202575683594, "step": 2720 }, { "epoch": 0.7226131987783827, "grad_norm": 1.069380288412464, "learning_rate": 1.5158846380572439e-05, "loss": 0.31900978088378906, "step": 2721 }, { "epoch": 0.7228787677599257, "grad_norm": 0.9775730121294547, "learning_rate": 1.5155084498807941e-05, "loss": 0.2983658015727997, "step": 2722 }, { "epoch": 0.7231443367414686, "grad_norm": 1.0202126383266699, "learning_rate": 1.5151321623203953e-05, "loss": 0.3086162805557251, "step": 2723 }, { "epoch": 0.7234099057230116, "grad_norm": 1.2685875339489936, "learning_rate": 1.5147557754485908e-05, "loss": 0.3233461380004883, "step": 2724 }, { "epoch": 0.7236754747045545, "grad_norm": 1.1386667332230644, "learning_rate": 1.5143792893379441e-05, "loss": 0.2979195713996887, "step": 2725 }, { "epoch": 0.7239410436860975, "grad_norm": 0.9598628443474388, "learning_rate": 1.5140027040610367e-05, "loss": 0.27854713797569275, "step": 2726 }, { "epoch": 0.7242066126676404, "grad_norm": 1.0735596908703036, "learning_rate": 1.5136260196904704e-05, "loss": 0.293560266494751, "step": 2727 }, { "epoch": 0.7244721816491834, "grad_norm": 1.1273149809893865, "learning_rate": 1.513249236298865e-05, "loss": 0.3033742308616638, "step": 2728 }, { "epoch": 0.7247377506307263, "grad_norm": 1.1425183002588892, "learning_rate": 1.51287235395886e-05, "loss": 0.27958324551582336, "step": 2729 }, { "epoch": 0.7250033196122693, "grad_norm": 1.022839475112705, "learning_rate": 1.512495372743114e-05, "loss": 0.3063122034072876, "step": 2730 }, { "epoch": 0.7252688885938122, "grad_norm": 1.0524007495354166, "learning_rate": 1.5121182927243043e-05, "loss": 0.29126864671707153, "step": 2731 }, { "epoch": 0.7255344575753552, "grad_norm": 1.0517432179455284, "learning_rate": 1.5117411139751279e-05, "loss": 0.27507084608078003, "step": 2732 }, { "epoch": 0.7258000265568981, "grad_norm": 1.1167955582078537, "learning_rate": 1.5113638365682996e-05, "loss": 0.3432404398918152, "step": 2733 }, { "epoch": 0.7260655955384411, "grad_norm": 1.0687371329401973, "learning_rate": 1.5109864605765552e-05, "loss": 0.27633196115493774, "step": 2734 }, { "epoch": 0.726331164519984, "grad_norm": 1.0811244514830984, "learning_rate": 1.5106089860726474e-05, "loss": 0.274509072303772, "step": 2735 }, { "epoch": 0.726596733501527, "grad_norm": 0.97012581020674, "learning_rate": 1.5102314131293494e-05, "loss": 0.26650723814964294, "step": 2736 }, { "epoch": 0.7268623024830699, "grad_norm": 0.9681782432226156, "learning_rate": 1.5098537418194524e-05, "loss": 0.24476298689842224, "step": 2737 }, { "epoch": 0.7271278714646129, "grad_norm": 1.1154772400244737, "learning_rate": 1.5094759722157671e-05, "loss": 0.3337150812149048, "step": 2738 }, { "epoch": 0.7273934404461558, "grad_norm": 1.0187825093211873, "learning_rate": 1.509098104391123e-05, "loss": 0.3147660195827484, "step": 2739 }, { "epoch": 0.7276590094276989, "grad_norm": 0.969229068573487, "learning_rate": 1.5087201384183687e-05, "loss": 0.2613281309604645, "step": 2740 }, { "epoch": 0.7279245784092419, "grad_norm": 1.0641712204852296, "learning_rate": 1.5083420743703717e-05, "loss": 0.2773926854133606, "step": 2741 }, { "epoch": 0.7281901473907848, "grad_norm": 1.0826759541494775, "learning_rate": 1.5079639123200179e-05, "loss": 0.30515575408935547, "step": 2742 }, { "epoch": 0.7284557163723278, "grad_norm": 1.0619554532285063, "learning_rate": 1.5075856523402128e-05, "loss": 0.3174355626106262, "step": 2743 }, { "epoch": 0.7287212853538707, "grad_norm": 0.9676487172589012, "learning_rate": 1.5072072945038802e-05, "loss": 0.25163760781288147, "step": 2744 }, { "epoch": 0.7289868543354137, "grad_norm": 1.009992458232401, "learning_rate": 1.5068288388839634e-05, "loss": 0.28822118043899536, "step": 2745 }, { "epoch": 0.7292524233169566, "grad_norm": 1.1623698216562623, "learning_rate": 1.5064502855534237e-05, "loss": 0.3129134476184845, "step": 2746 }, { "epoch": 0.7295179922984996, "grad_norm": 1.0993962878508883, "learning_rate": 1.5060716345852423e-05, "loss": 0.332313597202301, "step": 2747 }, { "epoch": 0.7297835612800425, "grad_norm": 1.1989932540466257, "learning_rate": 1.5056928860524181e-05, "loss": 0.3425176739692688, "step": 2748 }, { "epoch": 0.7300491302615855, "grad_norm": 1.006044605592889, "learning_rate": 1.5053140400279693e-05, "loss": 0.2737991511821747, "step": 2749 }, { "epoch": 0.7303146992431284, "grad_norm": 0.963162900300573, "learning_rate": 1.5049350965849337e-05, "loss": 0.27506589889526367, "step": 2750 }, { "epoch": 0.7305802682246714, "grad_norm": 0.9901021314780329, "learning_rate": 1.5045560557963663e-05, "loss": 0.25581830739974976, "step": 2751 }, { "epoch": 0.7308458372062143, "grad_norm": 1.0977147554610498, "learning_rate": 1.5041769177353423e-05, "loss": 0.31746333837509155, "step": 2752 }, { "epoch": 0.7311114061877573, "grad_norm": 1.142455577048558, "learning_rate": 1.5037976824749545e-05, "loss": 0.3119337260723114, "step": 2753 }, { "epoch": 0.7313769751693002, "grad_norm": 1.0824713857839723, "learning_rate": 1.5034183500883153e-05, "loss": 0.3330266773700714, "step": 2754 }, { "epoch": 0.7316425441508432, "grad_norm": 1.1870819737785345, "learning_rate": 1.5030389206485554e-05, "loss": 0.2794867753982544, "step": 2755 }, { "epoch": 0.7319081131323861, "grad_norm": 1.0826714009199063, "learning_rate": 1.5026593942288248e-05, "loss": 0.33273079991340637, "step": 2756 }, { "epoch": 0.7321736821139291, "grad_norm": 1.1000195904608074, "learning_rate": 1.502279770902291e-05, "loss": 0.30673256516456604, "step": 2757 }, { "epoch": 0.732439251095472, "grad_norm": 1.1311236734843304, "learning_rate": 1.5019000507421412e-05, "loss": 0.3126910924911499, "step": 2758 }, { "epoch": 0.732704820077015, "grad_norm": 1.1665747930638253, "learning_rate": 1.5015202338215811e-05, "loss": 0.35423290729522705, "step": 2759 }, { "epoch": 0.7329703890585579, "grad_norm": 1.0691634248957984, "learning_rate": 1.5011403202138346e-05, "loss": 0.31541377305984497, "step": 2760 }, { "epoch": 0.7332359580401009, "grad_norm": 3.4446251175420257, "learning_rate": 1.5007603099921451e-05, "loss": 0.31460440158843994, "step": 2761 }, { "epoch": 0.7335015270216438, "grad_norm": 1.0828016056563536, "learning_rate": 1.5003802032297735e-05, "loss": 0.2786293923854828, "step": 2762 }, { "epoch": 0.7337670960031868, "grad_norm": 1.1025311021139896, "learning_rate": 1.5000000000000002e-05, "loss": 0.27977997064590454, "step": 2763 }, { "epoch": 0.7340326649847297, "grad_norm": 1.1136339551828278, "learning_rate": 1.4996197003761237e-05, "loss": 0.2933383584022522, "step": 2764 }, { "epoch": 0.7342982339662727, "grad_norm": 1.0743056930311463, "learning_rate": 1.4992393044314617e-05, "loss": 0.30623573064804077, "step": 2765 }, { "epoch": 0.7345638029478156, "grad_norm": 1.112681662128017, "learning_rate": 1.4988588122393497e-05, "loss": 0.28665077686309814, "step": 2766 }, { "epoch": 0.7348293719293586, "grad_norm": 1.0268941907147413, "learning_rate": 1.4984782238731422e-05, "loss": 0.3245697021484375, "step": 2767 }, { "epoch": 0.7350949409109017, "grad_norm": 1.118864717612721, "learning_rate": 1.4980975394062122e-05, "loss": 0.29477447271347046, "step": 2768 }, { "epoch": 0.7353605098924446, "grad_norm": 1.009879072463833, "learning_rate": 1.4977167589119508e-05, "loss": 0.29174134135246277, "step": 2769 }, { "epoch": 0.7356260788739876, "grad_norm": 1.010733766191454, "learning_rate": 1.4973358824637687e-05, "loss": 0.29473474621772766, "step": 2770 }, { "epoch": 0.7358916478555305, "grad_norm": 1.3454647120520804, "learning_rate": 1.4969549101350938e-05, "loss": 0.3095156252384186, "step": 2771 }, { "epoch": 0.7361572168370735, "grad_norm": 1.0578448721867733, "learning_rate": 1.4965738419993733e-05, "loss": 0.26295265555381775, "step": 2772 }, { "epoch": 0.7364227858186164, "grad_norm": 1.0590497560307077, "learning_rate": 1.4961926781300723e-05, "loss": 0.2989509701728821, "step": 2773 }, { "epoch": 0.7366883548001594, "grad_norm": 1.0783454816561941, "learning_rate": 1.4958114186006756e-05, "loss": 0.31087079644203186, "step": 2774 }, { "epoch": 0.7369539237817023, "grad_norm": 1.0953647378016445, "learning_rate": 1.4954300634846845e-05, "loss": 0.3063197433948517, "step": 2775 }, { "epoch": 0.7372194927632453, "grad_norm": 1.0858506486148067, "learning_rate": 1.4950486128556208e-05, "loss": 0.3149424195289612, "step": 2776 }, { "epoch": 0.7374850617447882, "grad_norm": 1.0199984929310564, "learning_rate": 1.4946670667870224e-05, "loss": 0.2724878191947937, "step": 2777 }, { "epoch": 0.7377506307263312, "grad_norm": 1.0033150283887489, "learning_rate": 1.4942854253524479e-05, "loss": 0.2556690275669098, "step": 2778 }, { "epoch": 0.7380161997078741, "grad_norm": 1.0594159401263619, "learning_rate": 1.4939036886254727e-05, "loss": 0.2704542875289917, "step": 2779 }, { "epoch": 0.7382817686894171, "grad_norm": 1.052456117640013, "learning_rate": 1.4935218566796918e-05, "loss": 0.26762163639068604, "step": 2780 }, { "epoch": 0.73854733767096, "grad_norm": 1.1328164222449624, "learning_rate": 1.4931399295887172e-05, "loss": 0.3376831114292145, "step": 2781 }, { "epoch": 0.738812906652503, "grad_norm": 1.0695003562166123, "learning_rate": 1.4927579074261803e-05, "loss": 0.2980082631111145, "step": 2782 }, { "epoch": 0.7390784756340459, "grad_norm": 1.0340858480290613, "learning_rate": 1.4923757902657306e-05, "loss": 0.27693796157836914, "step": 2783 }, { "epoch": 0.7393440446155889, "grad_norm": 1.0204290883803, "learning_rate": 1.4919935781810353e-05, "loss": 0.3109282851219177, "step": 2784 }, { "epoch": 0.7396096135971318, "grad_norm": 1.12631585013599, "learning_rate": 1.4916112712457807e-05, "loss": 0.3123949468135834, "step": 2785 }, { "epoch": 0.7398751825786748, "grad_norm": 1.143039341014623, "learning_rate": 1.4912288695336709e-05, "loss": 0.3232062757015228, "step": 2786 }, { "epoch": 0.7401407515602177, "grad_norm": 1.0315778016896975, "learning_rate": 1.4908463731184287e-05, "loss": 0.2685563862323761, "step": 2787 }, { "epoch": 0.7404063205417607, "grad_norm": 1.076569860938466, "learning_rate": 1.4904637820737945e-05, "loss": 0.25752881169319153, "step": 2788 }, { "epoch": 0.7406718895233037, "grad_norm": 1.2236263687690485, "learning_rate": 1.4900810964735279e-05, "loss": 0.2887497544288635, "step": 2789 }, { "epoch": 0.7409374585048466, "grad_norm": 1.126755867019387, "learning_rate": 1.489698316391406e-05, "loss": 0.28804779052734375, "step": 2790 }, { "epoch": 0.7412030274863896, "grad_norm": 1.0931262335064922, "learning_rate": 1.489315441901224e-05, "loss": 0.2684408724308014, "step": 2791 }, { "epoch": 0.7414685964679325, "grad_norm": 1.0509233991385625, "learning_rate": 1.4889324730767959e-05, "loss": 0.31945526599884033, "step": 2792 }, { "epoch": 0.7417341654494755, "grad_norm": 1.3391113530092205, "learning_rate": 1.488549409991953e-05, "loss": 0.34446024894714355, "step": 2793 }, { "epoch": 0.7419997344310184, "grad_norm": 1.094751814978447, "learning_rate": 1.488166252720546e-05, "loss": 0.28849151730537415, "step": 2794 }, { "epoch": 0.7422653034125614, "grad_norm": 1.0431424597135226, "learning_rate": 1.4877830013364429e-05, "loss": 0.2793633043766022, "step": 2795 }, { "epoch": 0.7425308723941043, "grad_norm": 1.1811188011136542, "learning_rate": 1.4873996559135298e-05, "loss": 0.3211687505245209, "step": 2796 }, { "epoch": 0.7427964413756474, "grad_norm": 1.004634818722801, "learning_rate": 1.4870162165257114e-05, "loss": 0.26225876808166504, "step": 2797 }, { "epoch": 0.7430620103571903, "grad_norm": 1.7885293848946355, "learning_rate": 1.4866326832469105e-05, "loss": 0.3100029528141022, "step": 2798 }, { "epoch": 0.7433275793387333, "grad_norm": 1.0428487423040855, "learning_rate": 1.4862490561510675e-05, "loss": 0.29399827122688293, "step": 2799 }, { "epoch": 0.7435931483202762, "grad_norm": 0.9886298200418341, "learning_rate": 1.4858653353121412e-05, "loss": 0.27357399463653564, "step": 2800 }, { "epoch": 0.7438587173018192, "grad_norm": 1.1101962385134683, "learning_rate": 1.4854815208041087e-05, "loss": 0.34575730562210083, "step": 2801 }, { "epoch": 0.7441242862833621, "grad_norm": 1.0351474931606812, "learning_rate": 1.4850976127009644e-05, "loss": 0.28487247228622437, "step": 2802 }, { "epoch": 0.7443898552649051, "grad_norm": 1.0283492066128257, "learning_rate": 1.484713611076722e-05, "loss": 0.264443576335907, "step": 2803 }, { "epoch": 0.744655424246448, "grad_norm": 1.085429543255666, "learning_rate": 1.4843295160054116e-05, "loss": 0.32750973105430603, "step": 2804 }, { "epoch": 0.744920993227991, "grad_norm": 1.0136013055294886, "learning_rate": 1.4839453275610827e-05, "loss": 0.24080191552639008, "step": 2805 }, { "epoch": 0.7451865622095339, "grad_norm": 1.1486643921382949, "learning_rate": 1.4835610458178025e-05, "loss": 0.31667011976242065, "step": 2806 }, { "epoch": 0.7454521311910769, "grad_norm": 1.0103490185384167, "learning_rate": 1.4831766708496553e-05, "loss": 0.2754175066947937, "step": 2807 }, { "epoch": 0.7457177001726198, "grad_norm": 1.0607394107689443, "learning_rate": 1.482792202730745e-05, "loss": 0.2890132963657379, "step": 2808 }, { "epoch": 0.7459832691541628, "grad_norm": 1.049970305589495, "learning_rate": 1.4824076415351918e-05, "loss": 0.3402877748012543, "step": 2809 }, { "epoch": 0.7462488381357057, "grad_norm": 1.0879104018503691, "learning_rate": 1.4820229873371347e-05, "loss": 0.3167210519313812, "step": 2810 }, { "epoch": 0.7465144071172487, "grad_norm": 0.9983910427341833, "learning_rate": 1.4816382402107308e-05, "loss": 0.2653643786907196, "step": 2811 }, { "epoch": 0.7467799760987917, "grad_norm": 1.2191167585139304, "learning_rate": 1.4812534002301547e-05, "loss": 0.3202674984931946, "step": 2812 }, { "epoch": 0.7470455450803346, "grad_norm": 1.0461975743299208, "learning_rate": 1.4808684674695985e-05, "loss": 0.2942724823951721, "step": 2813 }, { "epoch": 0.7473111140618776, "grad_norm": 1.0581736193326858, "learning_rate": 1.480483442003273e-05, "loss": 0.28640663623809814, "step": 2814 }, { "epoch": 0.7475766830434205, "grad_norm": 0.9932743335315769, "learning_rate": 1.4800983239054071e-05, "loss": 0.26214420795440674, "step": 2815 }, { "epoch": 0.7478422520249635, "grad_norm": 1.0324489729554576, "learning_rate": 1.4797131132502464e-05, "loss": 0.3288992643356323, "step": 2816 }, { "epoch": 0.7481078210065064, "grad_norm": 0.9775792939666473, "learning_rate": 1.4793278101120551e-05, "loss": 0.2622208297252655, "step": 2817 }, { "epoch": 0.7483733899880494, "grad_norm": 1.0856486279870832, "learning_rate": 1.4789424145651152e-05, "loss": 0.3223533034324646, "step": 2818 }, { "epoch": 0.7486389589695923, "grad_norm": 0.9640735701611682, "learning_rate": 1.4785569266837264e-05, "loss": 0.25849875807762146, "step": 2819 }, { "epoch": 0.7489045279511353, "grad_norm": 1.20204465384733, "learning_rate": 1.478171346542206e-05, "loss": 0.3477833569049835, "step": 2820 }, { "epoch": 0.7491700969326782, "grad_norm": 1.0577809669167442, "learning_rate": 1.4777856742148897e-05, "loss": 0.2799205780029297, "step": 2821 }, { "epoch": 0.7494356659142212, "grad_norm": 1.624939710599736, "learning_rate": 1.4773999097761304e-05, "loss": 0.2591988444328308, "step": 2822 }, { "epoch": 0.7497012348957641, "grad_norm": 1.2869478314125868, "learning_rate": 1.477014053300299e-05, "loss": 0.30161747336387634, "step": 2823 }, { "epoch": 0.7499668038773071, "grad_norm": 1.0738509532979332, "learning_rate": 1.4766281048617837e-05, "loss": 0.28202176094055176, "step": 2824 }, { "epoch": 0.7502323728588501, "grad_norm": 1.0042946509670743, "learning_rate": 1.4762420645349912e-05, "loss": 0.26074907183647156, "step": 2825 }, { "epoch": 0.7504979418403931, "grad_norm": 1.1385436298617553, "learning_rate": 1.4758559323943455e-05, "loss": 0.2822819948196411, "step": 2826 }, { "epoch": 0.750763510821936, "grad_norm": 1.1069166183989807, "learning_rate": 1.4754697085142879e-05, "loss": 0.2704991102218628, "step": 2827 }, { "epoch": 0.751029079803479, "grad_norm": 1.1005590878466516, "learning_rate": 1.4750833929692785e-05, "loss": 0.2627401053905487, "step": 2828 }, { "epoch": 0.751294648785022, "grad_norm": 1.0886740028659867, "learning_rate": 1.474696985833794e-05, "loss": 0.2898240089416504, "step": 2829 }, { "epoch": 0.7515602177665649, "grad_norm": 1.0291450176805186, "learning_rate": 1.4743104871823291e-05, "loss": 0.30080029368400574, "step": 2830 }, { "epoch": 0.7518257867481078, "grad_norm": 1.0953597523125502, "learning_rate": 1.473923897089396e-05, "loss": 0.2950359284877777, "step": 2831 }, { "epoch": 0.7520913557296508, "grad_norm": 1.1129882579718784, "learning_rate": 1.4735372156295253e-05, "loss": 0.31936827301979065, "step": 2832 }, { "epoch": 0.7523569247111938, "grad_norm": 1.1117484749822675, "learning_rate": 1.4731504428772642e-05, "loss": 0.2771468460559845, "step": 2833 }, { "epoch": 0.7526224936927367, "grad_norm": 1.1332551367729735, "learning_rate": 1.4727635789071779e-05, "loss": 0.3135997951030731, "step": 2834 }, { "epoch": 0.7528880626742797, "grad_norm": 1.1215560189558773, "learning_rate": 1.4723766237938495e-05, "loss": 0.29874372482299805, "step": 2835 }, { "epoch": 0.7531536316558226, "grad_norm": 1.0292177835845961, "learning_rate": 1.4719895776118789e-05, "loss": 0.249681293964386, "step": 2836 }, { "epoch": 0.7534192006373656, "grad_norm": 1.0567186687732057, "learning_rate": 1.4716024404358847e-05, "loss": 0.28544771671295166, "step": 2837 }, { "epoch": 0.7536847696189085, "grad_norm": 1.1290911495331684, "learning_rate": 1.4712152123405018e-05, "loss": 0.32532355189323425, "step": 2838 }, { "epoch": 0.7539503386004515, "grad_norm": 1.1212187873017119, "learning_rate": 1.4708278934003835e-05, "loss": 0.31663140654563904, "step": 2839 }, { "epoch": 0.7542159075819944, "grad_norm": 1.123142254862964, "learning_rate": 1.4704404836902005e-05, "loss": 0.30552318692207336, "step": 2840 }, { "epoch": 0.7544814765635374, "grad_norm": 1.1574657252500693, "learning_rate": 1.47005298328464e-05, "loss": 0.3019601106643677, "step": 2841 }, { "epoch": 0.7547470455450803, "grad_norm": 1.0814580547673966, "learning_rate": 1.4696653922584084e-05, "loss": 0.321606308221817, "step": 2842 }, { "epoch": 0.7550126145266233, "grad_norm": 1.138590953455986, "learning_rate": 1.4692777106862281e-05, "loss": 0.2709462642669678, "step": 2843 }, { "epoch": 0.7552781835081662, "grad_norm": 1.1366302949330385, "learning_rate": 1.46888993864284e-05, "loss": 0.2882609963417053, "step": 2844 }, { "epoch": 0.7555437524897092, "grad_norm": 0.9948609987035232, "learning_rate": 1.4685020762030019e-05, "loss": 0.25843000411987305, "step": 2845 }, { "epoch": 0.7558093214712521, "grad_norm": 1.1002004205654323, "learning_rate": 1.4681141234414889e-05, "loss": 0.30962038040161133, "step": 2846 }, { "epoch": 0.7560748904527951, "grad_norm": 1.2025960097123465, "learning_rate": 1.4677260804330938e-05, "loss": 0.304874062538147, "step": 2847 }, { "epoch": 0.756340459434338, "grad_norm": 1.2287867091921092, "learning_rate": 1.4673379472526268e-05, "loss": 0.3425619602203369, "step": 2848 }, { "epoch": 0.756606028415881, "grad_norm": 1.0701256182117689, "learning_rate": 1.4669497239749153e-05, "loss": 0.3002302050590515, "step": 2849 }, { "epoch": 0.7568715973974239, "grad_norm": 1.1005370830207322, "learning_rate": 1.4665614106748038e-05, "loss": 0.31008803844451904, "step": 2850 }, { "epoch": 0.7571371663789669, "grad_norm": 1.0175712407141912, "learning_rate": 1.4661730074271551e-05, "loss": 0.27829408645629883, "step": 2851 }, { "epoch": 0.7574027353605098, "grad_norm": 1.0501959661073665, "learning_rate": 1.4657845143068488e-05, "loss": 0.25915467739105225, "step": 2852 }, { "epoch": 0.7576683043420529, "grad_norm": 1.0719536636155031, "learning_rate": 1.4653959313887813e-05, "loss": 0.2843416929244995, "step": 2853 }, { "epoch": 0.7579338733235959, "grad_norm": 1.0489373710223147, "learning_rate": 1.465007258747867e-05, "loss": 0.2851647138595581, "step": 2854 }, { "epoch": 0.7581994423051388, "grad_norm": 1.085754694338766, "learning_rate": 1.4646184964590378e-05, "loss": 0.266017884016037, "step": 2855 }, { "epoch": 0.7584650112866818, "grad_norm": 1.0789098348141843, "learning_rate": 1.4642296445972421e-05, "loss": 0.30142179131507874, "step": 2856 }, { "epoch": 0.7587305802682247, "grad_norm": 0.9904299934324251, "learning_rate": 1.463840703237446e-05, "loss": 0.2878327965736389, "step": 2857 }, { "epoch": 0.7589961492497677, "grad_norm": 1.114310168260114, "learning_rate": 1.4634516724546326e-05, "loss": 0.2919169068336487, "step": 2858 }, { "epoch": 0.7592617182313106, "grad_norm": 0.9954308342175644, "learning_rate": 1.4630625523238027e-05, "loss": 0.2530924081802368, "step": 2859 }, { "epoch": 0.7595272872128536, "grad_norm": 1.0858688189416337, "learning_rate": 1.462673342919974e-05, "loss": 0.3009106516838074, "step": 2860 }, { "epoch": 0.7597928561943965, "grad_norm": 1.1572533440881312, "learning_rate": 1.4622840443181817e-05, "loss": 0.3114222288131714, "step": 2861 }, { "epoch": 0.7600584251759395, "grad_norm": 1.2224434370177688, "learning_rate": 1.4618946565934775e-05, "loss": 0.344540536403656, "step": 2862 }, { "epoch": 0.7603239941574824, "grad_norm": 1.0685722656113568, "learning_rate": 1.4615051798209312e-05, "loss": 0.263607919216156, "step": 2863 }, { "epoch": 0.7605895631390254, "grad_norm": 1.018611353798299, "learning_rate": 1.4611156140756293e-05, "loss": 0.2685706317424774, "step": 2864 }, { "epoch": 0.7608551321205683, "grad_norm": 1.1431197890714058, "learning_rate": 1.4607259594326752e-05, "loss": 0.32342326641082764, "step": 2865 }, { "epoch": 0.7611207011021113, "grad_norm": 1.182050624874759, "learning_rate": 1.4603362159671902e-05, "loss": 0.3088849186897278, "step": 2866 }, { "epoch": 0.7613862700836542, "grad_norm": 1.0482348167122462, "learning_rate": 1.4599463837543114e-05, "loss": 0.26718589663505554, "step": 2867 }, { "epoch": 0.7616518390651972, "grad_norm": 1.0051992534296357, "learning_rate": 1.4595564628691944e-05, "loss": 0.29511263966560364, "step": 2868 }, { "epoch": 0.7619174080467401, "grad_norm": 1.0974088254649037, "learning_rate": 1.4591664533870118e-05, "loss": 0.2940484285354614, "step": 2869 }, { "epoch": 0.7621829770282831, "grad_norm": 1.1564456059915547, "learning_rate": 1.4587763553829521e-05, "loss": 0.28167295455932617, "step": 2870 }, { "epoch": 0.762448546009826, "grad_norm": 1.0590804851451585, "learning_rate": 1.4583861689322219e-05, "loss": 0.3362962007522583, "step": 2871 }, { "epoch": 0.762714114991369, "grad_norm": 1.1206777555300773, "learning_rate": 1.4579958941100445e-05, "loss": 0.3003339171409607, "step": 2872 }, { "epoch": 0.7629796839729119, "grad_norm": 1.0572512051509857, "learning_rate": 1.4576055309916602e-05, "loss": 0.3191443979740143, "step": 2873 }, { "epoch": 0.7632452529544549, "grad_norm": 1.0684782615871369, "learning_rate": 1.4572150796523265e-05, "loss": 0.30804574489593506, "step": 2874 }, { "epoch": 0.7635108219359978, "grad_norm": 1.0214046475154577, "learning_rate": 1.4568245401673178e-05, "loss": 0.32462549209594727, "step": 2875 }, { "epoch": 0.7637763909175408, "grad_norm": 1.1357318078490404, "learning_rate": 1.4564339126119254e-05, "loss": 0.27751386165618896, "step": 2876 }, { "epoch": 0.7640419598990837, "grad_norm": 1.0701221152994065, "learning_rate": 1.4560431970614578e-05, "loss": 0.27194011211395264, "step": 2877 }, { "epoch": 0.7643075288806267, "grad_norm": 1.134082938487784, "learning_rate": 1.4556523935912406e-05, "loss": 0.28701072931289673, "step": 2878 }, { "epoch": 0.7645730978621696, "grad_norm": 1.0814539768930527, "learning_rate": 1.4552615022766156e-05, "loss": 0.3278783857822418, "step": 2879 }, { "epoch": 0.7648386668437126, "grad_norm": 1.096499511679905, "learning_rate": 1.4548705231929426e-05, "loss": 0.3292006254196167, "step": 2880 }, { "epoch": 0.7651042358252557, "grad_norm": 1.30563906707581, "learning_rate": 1.4544794564155971e-05, "loss": 0.33038759231567383, "step": 2881 }, { "epoch": 0.7653698048067986, "grad_norm": 1.0799053745016685, "learning_rate": 1.4540883020199725e-05, "loss": 0.29183000326156616, "step": 2882 }, { "epoch": 0.7656353737883416, "grad_norm": 1.049945067498866, "learning_rate": 1.4536970600814789e-05, "loss": 0.28066399693489075, "step": 2883 }, { "epoch": 0.7659009427698845, "grad_norm": 1.0673215015420034, "learning_rate": 1.4533057306755427e-05, "loss": 0.2832046151161194, "step": 2884 }, { "epoch": 0.7661665117514275, "grad_norm": 1.0799218487874103, "learning_rate": 1.4529143138776078e-05, "loss": 0.3006540834903717, "step": 2885 }, { "epoch": 0.7664320807329704, "grad_norm": 0.965945374746046, "learning_rate": 1.4525228097631351e-05, "loss": 0.2793240547180176, "step": 2886 }, { "epoch": 0.7666976497145134, "grad_norm": 1.0791298696355873, "learning_rate": 1.452131218407602e-05, "loss": 0.2895192503929138, "step": 2887 }, { "epoch": 0.7669632186960563, "grad_norm": 1.1085071656285739, "learning_rate": 1.4517395398865022e-05, "loss": 0.27707618474960327, "step": 2888 }, { "epoch": 0.7672287876775993, "grad_norm": 0.9801959170871006, "learning_rate": 1.4513477742753465e-05, "loss": 0.29167065024375916, "step": 2889 }, { "epoch": 0.7674943566591422, "grad_norm": 0.9760628575291594, "learning_rate": 1.4509559216496631e-05, "loss": 0.2670987844467163, "step": 2890 }, { "epoch": 0.7677599256406852, "grad_norm": 1.0541213606202946, "learning_rate": 1.4505639820849968e-05, "loss": 0.3025206923484802, "step": 2891 }, { "epoch": 0.7680254946222281, "grad_norm": 1.0721054101606857, "learning_rate": 1.4501719556569087e-05, "loss": 0.3104705512523651, "step": 2892 }, { "epoch": 0.7682910636037711, "grad_norm": 1.1715745485021363, "learning_rate": 1.4497798424409766e-05, "loss": 0.2972267270088196, "step": 2893 }, { "epoch": 0.768556632585314, "grad_norm": 1.3084992927105763, "learning_rate": 1.4493876425127957e-05, "loss": 0.34956347942352295, "step": 2894 }, { "epoch": 0.768822201566857, "grad_norm": 1.0910589486872886, "learning_rate": 1.4489953559479775e-05, "loss": 0.3122873902320862, "step": 2895 }, { "epoch": 0.7690877705483999, "grad_norm": 1.0070263080445798, "learning_rate": 1.4486029828221497e-05, "loss": 0.29645755887031555, "step": 2896 }, { "epoch": 0.7693533395299429, "grad_norm": 1.1312479199974272, "learning_rate": 1.448210523210958e-05, "loss": 0.33357223868370056, "step": 2897 }, { "epoch": 0.7696189085114858, "grad_norm": 1.0807209302083978, "learning_rate": 1.4478179771900634e-05, "loss": 0.2780191898345947, "step": 2898 }, { "epoch": 0.7698844774930288, "grad_norm": 1.098992372480737, "learning_rate": 1.447425344835144e-05, "loss": 0.31503236293792725, "step": 2899 }, { "epoch": 0.7701500464745717, "grad_norm": 1.0152023365250116, "learning_rate": 1.4470326262218955e-05, "loss": 0.2843332290649414, "step": 2900 }, { "epoch": 0.7704156154561147, "grad_norm": 1.1041753681410225, "learning_rate": 1.4466398214260286e-05, "loss": 0.305475652217865, "step": 2901 }, { "epoch": 0.7706811844376577, "grad_norm": 1.0159008972115877, "learning_rate": 1.446246930523272e-05, "loss": 0.28418007493019104, "step": 2902 }, { "epoch": 0.7709467534192006, "grad_norm": 2.0289726917266027, "learning_rate": 1.44585395358937e-05, "loss": 0.28237032890319824, "step": 2903 }, { "epoch": 0.7712123224007436, "grad_norm": 1.1334683720848762, "learning_rate": 1.4454608907000843e-05, "loss": 0.33727777004241943, "step": 2904 }, { "epoch": 0.7714778913822865, "grad_norm": 1.1393257541232447, "learning_rate": 1.4450677419311925e-05, "loss": 0.2977198660373688, "step": 2905 }, { "epoch": 0.7717434603638295, "grad_norm": 1.0793508547506123, "learning_rate": 1.4446745073584891e-05, "loss": 0.3095981776714325, "step": 2906 }, { "epoch": 0.7720090293453724, "grad_norm": 1.138471500425881, "learning_rate": 1.4442811870577851e-05, "loss": 0.29808440804481506, "step": 2907 }, { "epoch": 0.7722745983269154, "grad_norm": 1.2668271633221484, "learning_rate": 1.4438877811049079e-05, "loss": 0.32444530725479126, "step": 2908 }, { "epoch": 0.7725401673084584, "grad_norm": 1.0229226464155372, "learning_rate": 1.443494289575702e-05, "loss": 0.24782602488994598, "step": 2909 }, { "epoch": 0.7728057362900014, "grad_norm": 1.079755307057506, "learning_rate": 1.4431007125460274e-05, "loss": 0.31289762258529663, "step": 2910 }, { "epoch": 0.7730713052715443, "grad_norm": 1.0928540626872372, "learning_rate": 1.4427070500917615e-05, "loss": 0.31444042921066284, "step": 2911 }, { "epoch": 0.7733368742530873, "grad_norm": 1.1235251868548595, "learning_rate": 1.4423133022887973e-05, "loss": 0.31347882747650146, "step": 2912 }, { "epoch": 0.7736024432346302, "grad_norm": 1.1449169077961199, "learning_rate": 1.4419194692130453e-05, "loss": 0.3025411367416382, "step": 2913 }, { "epoch": 0.7738680122161732, "grad_norm": 0.9734590933720824, "learning_rate": 1.4415255509404316e-05, "loss": 0.2954581081867218, "step": 2914 }, { "epoch": 0.7741335811977161, "grad_norm": 1.051295802747811, "learning_rate": 1.4411315475468988e-05, "loss": 0.2675531506538391, "step": 2915 }, { "epoch": 0.7743991501792591, "grad_norm": 1.0207923958770302, "learning_rate": 1.4407374591084064e-05, "loss": 0.29307854175567627, "step": 2916 }, { "epoch": 0.774664719160802, "grad_norm": 0.9134258889524259, "learning_rate": 1.4403432857009295e-05, "loss": 0.2805953025817871, "step": 2917 }, { "epoch": 0.774930288142345, "grad_norm": 1.1114518211112974, "learning_rate": 1.439949027400461e-05, "loss": 0.30805838108062744, "step": 2918 }, { "epoch": 0.7751958571238879, "grad_norm": 1.063187320260136, "learning_rate": 1.4395546842830085e-05, "loss": 0.31501835584640503, "step": 2919 }, { "epoch": 0.7754614261054309, "grad_norm": 1.025310766436644, "learning_rate": 1.4391602564245975e-05, "loss": 0.2719186246395111, "step": 2920 }, { "epoch": 0.7757269950869738, "grad_norm": 1.0474571998069828, "learning_rate": 1.4387657439012677e-05, "loss": 0.29554325342178345, "step": 2921 }, { "epoch": 0.7759925640685168, "grad_norm": 1.0103166752174864, "learning_rate": 1.4383711467890776e-05, "loss": 0.2993816137313843, "step": 2922 }, { "epoch": 0.7762581330500598, "grad_norm": 1.087143911717871, "learning_rate": 1.4379764651641004e-05, "loss": 0.3412264883518219, "step": 2923 }, { "epoch": 0.7765237020316027, "grad_norm": 1.3163055539647115, "learning_rate": 1.4375816991024263e-05, "loss": 0.3137913942337036, "step": 2924 }, { "epoch": 0.7767892710131457, "grad_norm": 1.0026858390591848, "learning_rate": 1.4371868486801611e-05, "loss": 0.2710151672363281, "step": 2925 }, { "epoch": 0.7770548399946886, "grad_norm": 1.060508746597415, "learning_rate": 1.4367919139734279e-05, "loss": 0.28521692752838135, "step": 2926 }, { "epoch": 0.7773204089762316, "grad_norm": 0.9938687291505847, "learning_rate": 1.4363968950583651e-05, "loss": 0.2889919579029083, "step": 2927 }, { "epoch": 0.7775859779577745, "grad_norm": 1.0641534591195945, "learning_rate": 1.436001792011128e-05, "loss": 0.31562381982803345, "step": 2928 }, { "epoch": 0.7778515469393175, "grad_norm": 0.980719397790632, "learning_rate": 1.4356066049078871e-05, "loss": 0.2747528553009033, "step": 2929 }, { "epoch": 0.7781171159208604, "grad_norm": 1.0890864939874727, "learning_rate": 1.4352113338248303e-05, "loss": 0.2918938398361206, "step": 2930 }, { "epoch": 0.7783826849024034, "grad_norm": 1.1375978489291394, "learning_rate": 1.4348159788381615e-05, "loss": 0.3348507285118103, "step": 2931 }, { "epoch": 0.7786482538839463, "grad_norm": 1.049930284325584, "learning_rate": 1.4344205400241e-05, "loss": 0.27206242084503174, "step": 2932 }, { "epoch": 0.7789138228654893, "grad_norm": 1.0635705360778813, "learning_rate": 1.434025017458882e-05, "loss": 0.28496092557907104, "step": 2933 }, { "epoch": 0.7791793918470322, "grad_norm": 1.1207237235097192, "learning_rate": 1.4336294112187595e-05, "loss": 0.3080131411552429, "step": 2934 }, { "epoch": 0.7794449608285752, "grad_norm": 1.1562549835000784, "learning_rate": 1.4332337213800008e-05, "loss": 0.3116779029369354, "step": 2935 }, { "epoch": 0.7797105298101181, "grad_norm": 1.0230593279992428, "learning_rate": 1.43283794801889e-05, "loss": 0.26526543498039246, "step": 2936 }, { "epoch": 0.7799760987916612, "grad_norm": 1.0768548459396885, "learning_rate": 1.4324420912117274e-05, "loss": 0.2829325497150421, "step": 2937 }, { "epoch": 0.7802416677732041, "grad_norm": 1.197165846783245, "learning_rate": 1.43204615103483e-05, "loss": 0.34146445989608765, "step": 2938 }, { "epoch": 0.7805072367547471, "grad_norm": 1.1418950254878286, "learning_rate": 1.43165012756453e-05, "loss": 0.316609650850296, "step": 2939 }, { "epoch": 0.78077280573629, "grad_norm": 1.119861281862994, "learning_rate": 1.4312540208771766e-05, "loss": 0.3215107321739197, "step": 2940 }, { "epoch": 0.781038374717833, "grad_norm": 1.0591732101512668, "learning_rate": 1.4308578310491342e-05, "loss": 0.2834000587463379, "step": 2941 }, { "epoch": 0.781303943699376, "grad_norm": 1.1186376453102755, "learning_rate": 1.430461558156783e-05, "loss": 0.30184993147850037, "step": 2942 }, { "epoch": 0.7815695126809189, "grad_norm": 1.1319557052801907, "learning_rate": 1.4300652022765207e-05, "loss": 0.3299996256828308, "step": 2943 }, { "epoch": 0.7818350816624619, "grad_norm": 1.1269288601015153, "learning_rate": 1.4296687634847592e-05, "loss": 0.27565228939056396, "step": 2944 }, { "epoch": 0.7821006506440048, "grad_norm": 1.1019395409868211, "learning_rate": 1.4292722418579278e-05, "loss": 0.30347493290901184, "step": 2945 }, { "epoch": 0.7823662196255478, "grad_norm": 1.125677517693181, "learning_rate": 1.4288756374724709e-05, "loss": 0.31469428539276123, "step": 2946 }, { "epoch": 0.7826317886070907, "grad_norm": 1.0500101449680372, "learning_rate": 1.4284789504048493e-05, "loss": 0.27361029386520386, "step": 2947 }, { "epoch": 0.7828973575886337, "grad_norm": 1.057442611584268, "learning_rate": 1.428082180731539e-05, "loss": 0.29180705547332764, "step": 2948 }, { "epoch": 0.7831629265701766, "grad_norm": 1.0218659697209738, "learning_rate": 1.4276853285290334e-05, "loss": 0.281120628118515, "step": 2949 }, { "epoch": 0.7834284955517196, "grad_norm": 1.0029783457826962, "learning_rate": 1.4272883938738406e-05, "loss": 0.26144471764564514, "step": 2950 }, { "epoch": 0.7836940645332625, "grad_norm": 1.0904458839940374, "learning_rate": 1.4268913768424848e-05, "loss": 0.3118991255760193, "step": 2951 }, { "epoch": 0.7839596335148055, "grad_norm": 1.0581869365443632, "learning_rate": 1.4264942775115065e-05, "loss": 0.29352328181266785, "step": 2952 }, { "epoch": 0.7842252024963484, "grad_norm": 1.025234952757571, "learning_rate": 1.426097095957461e-05, "loss": 0.2687748968601227, "step": 2953 }, { "epoch": 0.7844907714778914, "grad_norm": 1.0817782920006436, "learning_rate": 1.4256998322569212e-05, "loss": 0.3106890916824341, "step": 2954 }, { "epoch": 0.7847563404594343, "grad_norm": 1.0039841255701216, "learning_rate": 1.4253024864864742e-05, "loss": 0.2522161304950714, "step": 2955 }, { "epoch": 0.7850219094409773, "grad_norm": 1.031799618380073, "learning_rate": 1.424905058722724e-05, "loss": 0.2994377613067627, "step": 2956 }, { "epoch": 0.7852874784225202, "grad_norm": 1.295564211303899, "learning_rate": 1.4245075490422893e-05, "loss": 0.3753565549850464, "step": 2957 }, { "epoch": 0.7855530474040632, "grad_norm": 1.2386689798654595, "learning_rate": 1.424109957521806e-05, "loss": 0.29544737935066223, "step": 2958 }, { "epoch": 0.7858186163856061, "grad_norm": 1.0381164701705432, "learning_rate": 1.423712284237925e-05, "loss": 0.307847797870636, "step": 2959 }, { "epoch": 0.7860841853671491, "grad_norm": 1.1107576873332587, "learning_rate": 1.4233145292673127e-05, "loss": 0.31758183240890503, "step": 2960 }, { "epoch": 0.786349754348692, "grad_norm": 1.0358601319268448, "learning_rate": 1.4229166926866517e-05, "loss": 0.307254433631897, "step": 2961 }, { "epoch": 0.786615323330235, "grad_norm": 1.2228062733167704, "learning_rate": 1.42251877457264e-05, "loss": 0.3513748049736023, "step": 2962 }, { "epoch": 0.7868808923117779, "grad_norm": 1.1359729522705007, "learning_rate": 1.422120775001992e-05, "loss": 0.3025718629360199, "step": 2963 }, { "epoch": 0.7871464612933209, "grad_norm": 1.076503168390535, "learning_rate": 1.4217226940514367e-05, "loss": 0.2922811508178711, "step": 2964 }, { "epoch": 0.787412030274864, "grad_norm": 1.07297262661661, "learning_rate": 1.42132453179772e-05, "loss": 0.29599297046661377, "step": 2965 }, { "epoch": 0.7876775992564069, "grad_norm": 0.992121967255531, "learning_rate": 1.4209262883176025e-05, "loss": 0.28336548805236816, "step": 2966 }, { "epoch": 0.7879431682379499, "grad_norm": 1.0655541697156172, "learning_rate": 1.4205279636878613e-05, "loss": 0.3100801110267639, "step": 2967 }, { "epoch": 0.7882087372194928, "grad_norm": 1.165527486411767, "learning_rate": 1.4201295579852881e-05, "loss": 0.33067989349365234, "step": 2968 }, { "epoch": 0.7884743062010358, "grad_norm": 1.1896877635723886, "learning_rate": 1.4197310712866909e-05, "loss": 0.282347172498703, "step": 2969 }, { "epoch": 0.7887398751825787, "grad_norm": 1.0769183433483809, "learning_rate": 1.419332503668894e-05, "loss": 0.30585426092147827, "step": 2970 }, { "epoch": 0.7890054441641217, "grad_norm": 1.0616062054836604, "learning_rate": 1.4189338552087351e-05, "loss": 0.3011561632156372, "step": 2971 }, { "epoch": 0.7892710131456646, "grad_norm": 0.9722574451184507, "learning_rate": 1.4185351259830705e-05, "loss": 0.2700524926185608, "step": 2972 }, { "epoch": 0.7895365821272076, "grad_norm": 1.0849811262666431, "learning_rate": 1.4181363160687693e-05, "loss": 0.2963382303714752, "step": 2973 }, { "epoch": 0.7898021511087505, "grad_norm": 1.0388990841328773, "learning_rate": 1.4177374255427183e-05, "loss": 0.27132824063301086, "step": 2974 }, { "epoch": 0.7900677200902935, "grad_norm": 0.9602477794817199, "learning_rate": 1.417338454481818e-05, "loss": 0.2539706826210022, "step": 2975 }, { "epoch": 0.7903332890718364, "grad_norm": 1.0972216427869486, "learning_rate": 1.416939402962986e-05, "loss": 0.28465601801872253, "step": 2976 }, { "epoch": 0.7905988580533794, "grad_norm": 1.1885027397372414, "learning_rate": 1.4165402710631544e-05, "loss": 0.3020748198032379, "step": 2977 }, { "epoch": 0.7908644270349223, "grad_norm": 1.0709231597298363, "learning_rate": 1.416141058859271e-05, "loss": 0.3157690465450287, "step": 2978 }, { "epoch": 0.7911299960164653, "grad_norm": 1.0874979641604023, "learning_rate": 1.4157417664282994e-05, "loss": 0.2720191776752472, "step": 2979 }, { "epoch": 0.7913955649980082, "grad_norm": 1.0670143355557837, "learning_rate": 1.4153423938472185e-05, "loss": 0.2931746542453766, "step": 2980 }, { "epoch": 0.7916611339795512, "grad_norm": 1.0836941185599118, "learning_rate": 1.4149429411930226e-05, "loss": 0.2683875560760498, "step": 2981 }, { "epoch": 0.7919267029610941, "grad_norm": 1.0454189872619364, "learning_rate": 1.4145434085427216e-05, "loss": 0.2559819519519806, "step": 2982 }, { "epoch": 0.7921922719426371, "grad_norm": 1.1028368657772893, "learning_rate": 1.4141437959733404e-05, "loss": 0.2845582365989685, "step": 2983 }, { "epoch": 0.79245784092418, "grad_norm": 1.05827279827959, "learning_rate": 1.4137441035619197e-05, "loss": 0.26766544580459595, "step": 2984 }, { "epoch": 0.792723409905723, "grad_norm": 1.2459472391823172, "learning_rate": 1.4133443313855155e-05, "loss": 0.32089024782180786, "step": 2985 }, { "epoch": 0.7929889788872659, "grad_norm": 1.053106908199776, "learning_rate": 1.4129444795211993e-05, "loss": 0.2756182551383972, "step": 2986 }, { "epoch": 0.7932545478688089, "grad_norm": 1.231241306668284, "learning_rate": 1.4125445480460573e-05, "loss": 0.29487302899360657, "step": 2987 }, { "epoch": 0.7935201168503518, "grad_norm": 1.1738297230948855, "learning_rate": 1.4121445370371922e-05, "loss": 0.3362561762332916, "step": 2988 }, { "epoch": 0.7937856858318948, "grad_norm": 1.1591988507026376, "learning_rate": 1.4117444465717209e-05, "loss": 0.2986692488193512, "step": 2989 }, { "epoch": 0.7940512548134377, "grad_norm": 1.0341012671875776, "learning_rate": 1.4113442767267766e-05, "loss": 0.2725266218185425, "step": 2990 }, { "epoch": 0.7943168237949807, "grad_norm": 1.1125466640148414, "learning_rate": 1.4109440275795071e-05, "loss": 0.29827257990837097, "step": 2991 }, { "epoch": 0.7945823927765236, "grad_norm": 1.0512885973195232, "learning_rate": 1.410543699207076e-05, "loss": 0.2506203055381775, "step": 2992 }, { "epoch": 0.7948479617580667, "grad_norm": 0.9867416114744889, "learning_rate": 1.410143291686661e-05, "loss": 0.2675034701824188, "step": 2993 }, { "epoch": 0.7951135307396097, "grad_norm": 1.1763547306282318, "learning_rate": 1.4097428050954571e-05, "loss": 0.34528690576553345, "step": 2994 }, { "epoch": 0.7953790997211526, "grad_norm": 1.1374135219725177, "learning_rate": 1.4093422395106726e-05, "loss": 0.27551063895225525, "step": 2995 }, { "epoch": 0.7956446687026956, "grad_norm": 1.1195982376159075, "learning_rate": 1.408941595009532e-05, "loss": 0.3176268935203552, "step": 2996 }, { "epoch": 0.7959102376842385, "grad_norm": 1.1804373403956752, "learning_rate": 1.408540871669275e-05, "loss": 0.30056723952293396, "step": 2997 }, { "epoch": 0.7961758066657815, "grad_norm": 1.124570387942151, "learning_rate": 1.4081400695671562e-05, "loss": 0.32109886407852173, "step": 2998 }, { "epoch": 0.7964413756473244, "grad_norm": 1.1262740571855958, "learning_rate": 1.4077391887804457e-05, "loss": 0.33622005581855774, "step": 2999 }, { "epoch": 0.7967069446288674, "grad_norm": 1.1195153536613822, "learning_rate": 1.4073382293864283e-05, "loss": 0.3054961860179901, "step": 3000 }, { "epoch": 0.7969725136104103, "grad_norm": 1.1210721039096916, "learning_rate": 1.4069371914624044e-05, "loss": 0.3022462725639343, "step": 3001 }, { "epoch": 0.7972380825919533, "grad_norm": 1.0116555063320039, "learning_rate": 1.4065360750856891e-05, "loss": 0.2500512897968292, "step": 3002 }, { "epoch": 0.7975036515734962, "grad_norm": 1.233947002119444, "learning_rate": 1.4061348803336135e-05, "loss": 0.2960171699523926, "step": 3003 }, { "epoch": 0.7977692205550392, "grad_norm": 3.53476121579318, "learning_rate": 1.4057336072835228e-05, "loss": 0.2941724359989166, "step": 3004 }, { "epoch": 0.7980347895365821, "grad_norm": 1.0143157952003843, "learning_rate": 1.4053322560127779e-05, "loss": 0.2827858328819275, "step": 3005 }, { "epoch": 0.7983003585181251, "grad_norm": 1.34417890867956, "learning_rate": 1.4049308265987544e-05, "loss": 0.32525116205215454, "step": 3006 }, { "epoch": 0.798565927499668, "grad_norm": 1.1622605286979444, "learning_rate": 1.4045293191188431e-05, "loss": 0.26509979367256165, "step": 3007 }, { "epoch": 0.798831496481211, "grad_norm": 1.1649049829769997, "learning_rate": 1.4041277336504503e-05, "loss": 0.3462742567062378, "step": 3008 }, { "epoch": 0.7990970654627539, "grad_norm": 1.118975693723979, "learning_rate": 1.4037260702709967e-05, "loss": 0.2971092164516449, "step": 3009 }, { "epoch": 0.7993626344442969, "grad_norm": 1.0541078602131526, "learning_rate": 1.4033243290579182e-05, "loss": 0.32359808683395386, "step": 3010 }, { "epoch": 0.7996282034258398, "grad_norm": 0.9819968107477214, "learning_rate": 1.4029225100886657e-05, "loss": 0.2949031591415405, "step": 3011 }, { "epoch": 0.7998937724073828, "grad_norm": 0.9639154080405838, "learning_rate": 1.4025206134407051e-05, "loss": 0.29888901114463806, "step": 3012 }, { "epoch": 0.8001593413889257, "grad_norm": 1.0921369087209054, "learning_rate": 1.4021186391915181e-05, "loss": 0.2999705672264099, "step": 3013 }, { "epoch": 0.8004249103704687, "grad_norm": 1.027092536189555, "learning_rate": 1.4017165874185996e-05, "loss": 0.2725638449192047, "step": 3014 }, { "epoch": 0.8006904793520117, "grad_norm": 1.6251260873819724, "learning_rate": 1.4013144581994609e-05, "loss": 0.2809314727783203, "step": 3015 }, { "epoch": 0.8009560483335546, "grad_norm": 1.194026798460289, "learning_rate": 1.400912251611628e-05, "loss": 0.30335327982902527, "step": 3016 }, { "epoch": 0.8012216173150976, "grad_norm": 1.0526756572542106, "learning_rate": 1.400509967732641e-05, "loss": 0.27780598402023315, "step": 3017 }, { "epoch": 0.8014871862966405, "grad_norm": 1.0036615790617616, "learning_rate": 1.400107606640056e-05, "loss": 0.2865309715270996, "step": 3018 }, { "epoch": 0.8017527552781835, "grad_norm": 1.067182271229665, "learning_rate": 1.3997051684114431e-05, "loss": 0.2691546082496643, "step": 3019 }, { "epoch": 0.8020183242597264, "grad_norm": 1.0174199108878024, "learning_rate": 1.3993026531243876e-05, "loss": 0.30289226770401, "step": 3020 }, { "epoch": 0.8022838932412695, "grad_norm": 1.1180967643802684, "learning_rate": 1.3989000608564905e-05, "loss": 0.2767682671546936, "step": 3021 }, { "epoch": 0.8025494622228124, "grad_norm": 1.1982508587685934, "learning_rate": 1.3984973916853657e-05, "loss": 0.3423742353916168, "step": 3022 }, { "epoch": 0.8028150312043554, "grad_norm": 1.1718790013716964, "learning_rate": 1.3980946456886439e-05, "loss": 0.3000536561012268, "step": 3023 }, { "epoch": 0.8030806001858983, "grad_norm": 1.1431161282459077, "learning_rate": 1.3976918229439698e-05, "loss": 0.3071063756942749, "step": 3024 }, { "epoch": 0.8033461691674413, "grad_norm": 1.6885640285561154, "learning_rate": 1.397288923529002e-05, "loss": 0.31261157989501953, "step": 3025 }, { "epoch": 0.8036117381489842, "grad_norm": 1.0076153318556622, "learning_rate": 1.3968859475214156e-05, "loss": 0.2658939063549042, "step": 3026 }, { "epoch": 0.8038773071305272, "grad_norm": 1.0309089161631302, "learning_rate": 1.3964828949988993e-05, "loss": 0.2772905230522156, "step": 3027 }, { "epoch": 0.8041428761120701, "grad_norm": 1.1271894525974708, "learning_rate": 1.396079766039157e-05, "loss": 0.2903479337692261, "step": 3028 }, { "epoch": 0.8044084450936131, "grad_norm": 1.2165332424367126, "learning_rate": 1.3956765607199069e-05, "loss": 0.35709524154663086, "step": 3029 }, { "epoch": 0.804674014075156, "grad_norm": 1.0863328323430816, "learning_rate": 1.3952732791188828e-05, "loss": 0.2929389774799347, "step": 3030 }, { "epoch": 0.804939583056699, "grad_norm": 0.999480167032172, "learning_rate": 1.3948699213138321e-05, "loss": 0.2609884440898895, "step": 3031 }, { "epoch": 0.805205152038242, "grad_norm": 1.0946442757602284, "learning_rate": 1.394466487382518e-05, "loss": 0.3026544749736786, "step": 3032 }, { "epoch": 0.8054707210197849, "grad_norm": 1.0415601836945267, "learning_rate": 1.394062977402717e-05, "loss": 0.28281137347221375, "step": 3033 }, { "epoch": 0.8057362900013278, "grad_norm": 0.9908513124522437, "learning_rate": 1.3936593914522214e-05, "loss": 0.26189178228378296, "step": 3034 }, { "epoch": 0.8060018589828708, "grad_norm": 1.0541854732158313, "learning_rate": 1.3932557296088383e-05, "loss": 0.27987509965896606, "step": 3035 }, { "epoch": 0.8062674279644138, "grad_norm": 0.9961129101435677, "learning_rate": 1.3928519919503884e-05, "loss": 0.2857724130153656, "step": 3036 }, { "epoch": 0.8065329969459567, "grad_norm": 0.9752377302684325, "learning_rate": 1.3924481785547076e-05, "loss": 0.28102418780326843, "step": 3037 }, { "epoch": 0.8067985659274997, "grad_norm": 1.06882045524996, "learning_rate": 1.3920442894996464e-05, "loss": 0.30250412225723267, "step": 3038 }, { "epoch": 0.8070641349090426, "grad_norm": 0.9854538363943691, "learning_rate": 1.3916403248630703e-05, "loss": 0.28951483964920044, "step": 3039 }, { "epoch": 0.8073297038905856, "grad_norm": 0.990016753911339, "learning_rate": 1.3912362847228585e-05, "loss": 0.28455328941345215, "step": 3040 }, { "epoch": 0.8075952728721285, "grad_norm": 1.0887176497400486, "learning_rate": 1.3908321691569048e-05, "loss": 0.29541105031967163, "step": 3041 }, { "epoch": 0.8078608418536715, "grad_norm": 1.162648796815669, "learning_rate": 1.3904279782431187e-05, "loss": 0.3057629466056824, "step": 3042 }, { "epoch": 0.8081264108352144, "grad_norm": 1.0909846424659564, "learning_rate": 1.3900237120594226e-05, "loss": 0.3204082250595093, "step": 3043 }, { "epoch": 0.8083919798167574, "grad_norm": 0.9793203113476959, "learning_rate": 1.3896193706837551e-05, "loss": 0.28629523515701294, "step": 3044 }, { "epoch": 0.8086575487983003, "grad_norm": 1.1874958252714642, "learning_rate": 1.389214954194068e-05, "loss": 0.298164427280426, "step": 3045 }, { "epoch": 0.8089231177798433, "grad_norm": 1.005892758898695, "learning_rate": 1.3888104626683282e-05, "loss": 0.27309298515319824, "step": 3046 }, { "epoch": 0.8091886867613862, "grad_norm": 0.9950263488620656, "learning_rate": 1.3884058961845166e-05, "loss": 0.25635263323783875, "step": 3047 }, { "epoch": 0.8094542557429292, "grad_norm": 1.002808171969614, "learning_rate": 1.3880012548206292e-05, "loss": 0.29926127195358276, "step": 3048 }, { "epoch": 0.8097198247244722, "grad_norm": 0.9867331912864394, "learning_rate": 1.387596538654676e-05, "loss": 0.26633137464523315, "step": 3049 }, { "epoch": 0.8099853937060152, "grad_norm": 1.0757993931692869, "learning_rate": 1.387191747764681e-05, "loss": 0.28725534677505493, "step": 3050 }, { "epoch": 0.8102509626875581, "grad_norm": 1.4955713597704303, "learning_rate": 1.3867868822286838e-05, "loss": 0.3015314042568207, "step": 3051 }, { "epoch": 0.8105165316691011, "grad_norm": 1.048643971484194, "learning_rate": 1.3863819421247375e-05, "loss": 0.3054691553115845, "step": 3052 }, { "epoch": 0.810782100650644, "grad_norm": 1.1596568650600225, "learning_rate": 1.3859769275309097e-05, "loss": 0.26315444707870483, "step": 3053 }, { "epoch": 0.811047669632187, "grad_norm": 1.024319547072995, "learning_rate": 1.3855718385252824e-05, "loss": 0.2973077595233917, "step": 3054 }, { "epoch": 0.81131323861373, "grad_norm": 1.1845129171721744, "learning_rate": 1.385166675185952e-05, "loss": 0.32824432849884033, "step": 3055 }, { "epoch": 0.8115788075952729, "grad_norm": 1.2351976774044444, "learning_rate": 1.3847614375910292e-05, "loss": 0.3127811849117279, "step": 3056 }, { "epoch": 0.8118443765768159, "grad_norm": 1.0840317870226388, "learning_rate": 1.384356125818639e-05, "loss": 0.2631932497024536, "step": 3057 }, { "epoch": 0.8121099455583588, "grad_norm": 1.0251225163823416, "learning_rate": 1.3839507399469213e-05, "loss": 0.2856106162071228, "step": 3058 }, { "epoch": 0.8123755145399018, "grad_norm": 1.2604810760435325, "learning_rate": 1.3835452800540288e-05, "loss": 0.28986629843711853, "step": 3059 }, { "epoch": 0.8126410835214447, "grad_norm": 1.0804422287227695, "learning_rate": 1.3831397462181298e-05, "loss": 0.28411972522735596, "step": 3060 }, { "epoch": 0.8129066525029877, "grad_norm": 1.117697190248139, "learning_rate": 1.3827341385174063e-05, "loss": 0.3234354853630066, "step": 3061 }, { "epoch": 0.8131722214845306, "grad_norm": 0.9917598533716923, "learning_rate": 1.3823284570300551e-05, "loss": 0.24779736995697021, "step": 3062 }, { "epoch": 0.8134377904660736, "grad_norm": 1.1743500466494587, "learning_rate": 1.3819227018342865e-05, "loss": 0.3306904137134552, "step": 3063 }, { "epoch": 0.8137033594476165, "grad_norm": 1.1120224667451313, "learning_rate": 1.3815168730083254e-05, "loss": 0.31705451011657715, "step": 3064 }, { "epoch": 0.8139689284291595, "grad_norm": 1.1351768868234977, "learning_rate": 1.3811109706304105e-05, "loss": 0.29830047488212585, "step": 3065 }, { "epoch": 0.8142344974107024, "grad_norm": 1.1496885073051233, "learning_rate": 1.3807049947787954e-05, "loss": 0.30605942010879517, "step": 3066 }, { "epoch": 0.8145000663922454, "grad_norm": 1.0745429008877887, "learning_rate": 1.3802989455317475e-05, "loss": 0.3139193058013916, "step": 3067 }, { "epoch": 0.8147656353737883, "grad_norm": 1.0541430221228831, "learning_rate": 1.3798928229675478e-05, "loss": 0.3175879716873169, "step": 3068 }, { "epoch": 0.8150312043553313, "grad_norm": 1.0450888698469754, "learning_rate": 1.3794866271644922e-05, "loss": 0.26391106843948364, "step": 3069 }, { "epoch": 0.8152967733368742, "grad_norm": 0.945534402365018, "learning_rate": 1.3790803582008906e-05, "loss": 0.24128863215446472, "step": 3070 }, { "epoch": 0.8155623423184172, "grad_norm": 1.1627322372772537, "learning_rate": 1.378674016155067e-05, "loss": 0.3249368965625763, "step": 3071 }, { "epoch": 0.8158279112999601, "grad_norm": 1.0060562228451158, "learning_rate": 1.3782676011053592e-05, "loss": 0.2871986925601959, "step": 3072 }, { "epoch": 0.8160934802815031, "grad_norm": 1.1624248444882197, "learning_rate": 1.377861113130119e-05, "loss": 0.29047372937202454, "step": 3073 }, { "epoch": 0.816359049263046, "grad_norm": 1.0925698386610025, "learning_rate": 1.3774545523077122e-05, "loss": 0.3055281341075897, "step": 3074 }, { "epoch": 0.816624618244589, "grad_norm": 0.9197098274775629, "learning_rate": 1.37704791871652e-05, "loss": 0.2565494179725647, "step": 3075 }, { "epoch": 0.8168901872261319, "grad_norm": 1.0377185359248249, "learning_rate": 1.3766412124349358e-05, "loss": 0.3016049861907959, "step": 3076 }, { "epoch": 0.8171557562076749, "grad_norm": 1.0790995041055653, "learning_rate": 1.3762344335413677e-05, "loss": 0.3021200895309448, "step": 3077 }, { "epoch": 0.817421325189218, "grad_norm": 1.0643017770253544, "learning_rate": 1.3758275821142382e-05, "loss": 0.3024774193763733, "step": 3078 }, { "epoch": 0.8176868941707609, "grad_norm": 1.0591328005001268, "learning_rate": 1.3754206582319836e-05, "loss": 0.33114269375801086, "step": 3079 }, { "epoch": 0.8179524631523039, "grad_norm": 1.0815809107319383, "learning_rate": 1.3750136619730534e-05, "loss": 0.27339494228363037, "step": 3080 }, { "epoch": 0.8182180321338468, "grad_norm": 1.170674128986789, "learning_rate": 1.3746065934159123e-05, "loss": 0.2827128767967224, "step": 3081 }, { "epoch": 0.8184836011153898, "grad_norm": 1.1064880736532463, "learning_rate": 1.3741994526390379e-05, "loss": 0.2972746193408966, "step": 3082 }, { "epoch": 0.8187491700969327, "grad_norm": 1.143548636761381, "learning_rate": 1.3737922397209222e-05, "loss": 0.29932117462158203, "step": 3083 }, { "epoch": 0.8190147390784757, "grad_norm": 1.0415876434255473, "learning_rate": 1.3733849547400713e-05, "loss": 0.28307998180389404, "step": 3084 }, { "epoch": 0.8192803080600186, "grad_norm": 1.1070561443231863, "learning_rate": 1.3729775977750048e-05, "loss": 0.2885883152484894, "step": 3085 }, { "epoch": 0.8195458770415616, "grad_norm": 1.1106477390667713, "learning_rate": 1.3725701689042564e-05, "loss": 0.28837913274765015, "step": 3086 }, { "epoch": 0.8198114460231045, "grad_norm": 1.0553526039271008, "learning_rate": 1.3721626682063733e-05, "loss": 0.2775058150291443, "step": 3087 }, { "epoch": 0.8200770150046475, "grad_norm": 1.153176622627066, "learning_rate": 1.3717550957599172e-05, "loss": 0.2813493609428406, "step": 3088 }, { "epoch": 0.8203425839861904, "grad_norm": 1.1477738573738745, "learning_rate": 1.371347451643463e-05, "loss": 0.2677592933177948, "step": 3089 }, { "epoch": 0.8206081529677334, "grad_norm": 1.184705398593534, "learning_rate": 1.3709397359355998e-05, "loss": 0.3104957938194275, "step": 3090 }, { "epoch": 0.8208737219492763, "grad_norm": 1.1714327280441006, "learning_rate": 1.3705319487149303e-05, "loss": 0.29315799474716187, "step": 3091 }, { "epoch": 0.8211392909308193, "grad_norm": 1.1179168081295616, "learning_rate": 1.370124090060071e-05, "loss": 0.3044348657131195, "step": 3092 }, { "epoch": 0.8214048599123622, "grad_norm": 1.1122209585212142, "learning_rate": 1.3697161600496525e-05, "loss": 0.2918691635131836, "step": 3093 }, { "epoch": 0.8216704288939052, "grad_norm": 1.0702091422822353, "learning_rate": 1.3693081587623187e-05, "loss": 0.2887750267982483, "step": 3094 }, { "epoch": 0.8219359978754481, "grad_norm": 1.1155429990394359, "learning_rate": 1.3689000862767274e-05, "loss": 0.3055661916732788, "step": 3095 }, { "epoch": 0.8222015668569911, "grad_norm": 1.0251756704247361, "learning_rate": 1.3684919426715504e-05, "loss": 0.271525114774704, "step": 3096 }, { "epoch": 0.822467135838534, "grad_norm": 1.1269584199088303, "learning_rate": 1.3680837280254726e-05, "loss": 0.3220426142215729, "step": 3097 }, { "epoch": 0.822732704820077, "grad_norm": 1.0149552227204566, "learning_rate": 1.3676754424171935e-05, "loss": 0.29091203212738037, "step": 3098 }, { "epoch": 0.8229982738016199, "grad_norm": 1.051328362150218, "learning_rate": 1.3672670859254252e-05, "loss": 0.2928692102432251, "step": 3099 }, { "epoch": 0.8232638427831629, "grad_norm": 1.0366528987524315, "learning_rate": 1.3668586586288942e-05, "loss": 0.28635919094085693, "step": 3100 }, { "epoch": 0.8235294117647058, "grad_norm": 1.0374876833794577, "learning_rate": 1.3664501606063402e-05, "loss": 0.2912571430206299, "step": 3101 }, { "epoch": 0.8237949807462488, "grad_norm": 1.051516198651511, "learning_rate": 1.3660415919365178e-05, "loss": 0.2783615291118622, "step": 3102 }, { "epoch": 0.8240605497277917, "grad_norm": 1.088921494432588, "learning_rate": 1.365632952698193e-05, "loss": 0.3064395785331726, "step": 3103 }, { "epoch": 0.8243261187093347, "grad_norm": 1.023130230207284, "learning_rate": 1.3652242429701477e-05, "loss": 0.2528907358646393, "step": 3104 }, { "epoch": 0.8245916876908777, "grad_norm": 1.0503421945431453, "learning_rate": 1.3648154628311754e-05, "loss": 0.2648676633834839, "step": 3105 }, { "epoch": 0.8248572566724207, "grad_norm": 1.2732480631249905, "learning_rate": 1.3644066123600846e-05, "loss": 0.33425620198249817, "step": 3106 }, { "epoch": 0.8251228256539637, "grad_norm": 1.0925062122156084, "learning_rate": 1.3639976916356965e-05, "loss": 0.3108072280883789, "step": 3107 }, { "epoch": 0.8253883946355066, "grad_norm": 1.0815679409684162, "learning_rate": 1.3635887007368467e-05, "loss": 0.2860543131828308, "step": 3108 }, { "epoch": 0.8256539636170496, "grad_norm": 1.0711932859903586, "learning_rate": 1.3631796397423833e-05, "loss": 0.25440749526023865, "step": 3109 }, { "epoch": 0.8259195325985925, "grad_norm": 1.1006663978120534, "learning_rate": 1.3627705087311687e-05, "loss": 0.2676115334033966, "step": 3110 }, { "epoch": 0.8261851015801355, "grad_norm": 1.1597529133358384, "learning_rate": 1.3623613077820788e-05, "loss": 0.28977078199386597, "step": 3111 }, { "epoch": 0.8264506705616784, "grad_norm": 1.1046761011596355, "learning_rate": 1.361952036974002e-05, "loss": 0.30161401629447937, "step": 3112 }, { "epoch": 0.8267162395432214, "grad_norm": 1.135120464396266, "learning_rate": 1.3615426963858416e-05, "loss": 0.28676310181617737, "step": 3113 }, { "epoch": 0.8269818085247643, "grad_norm": 1.100109147839879, "learning_rate": 1.361133286096513e-05, "loss": 0.2957243323326111, "step": 3114 }, { "epoch": 0.8272473775063073, "grad_norm": 1.0691905028493969, "learning_rate": 1.3607238061849461e-05, "loss": 0.3036375343799591, "step": 3115 }, { "epoch": 0.8275129464878502, "grad_norm": 1.1142331461612014, "learning_rate": 1.360314256730084e-05, "loss": 0.31175294518470764, "step": 3116 }, { "epoch": 0.8277785154693932, "grad_norm": 1.0665802680669934, "learning_rate": 1.3599046378108825e-05, "loss": 0.30212485790252686, "step": 3117 }, { "epoch": 0.8280440844509361, "grad_norm": 1.1992776426845386, "learning_rate": 1.3594949495063117e-05, "loss": 0.3290692865848541, "step": 3118 }, { "epoch": 0.8283096534324791, "grad_norm": 1.007005509411099, "learning_rate": 1.3590851918953542e-05, "loss": 0.25952839851379395, "step": 3119 }, { "epoch": 0.828575222414022, "grad_norm": 1.0949064818424232, "learning_rate": 1.3586753650570069e-05, "loss": 0.27737247943878174, "step": 3120 }, { "epoch": 0.828840791395565, "grad_norm": 1.0156990629875267, "learning_rate": 1.3582654690702795e-05, "loss": 0.29415374994277954, "step": 3121 }, { "epoch": 0.8291063603771079, "grad_norm": 1.066804105313739, "learning_rate": 1.3578555040141948e-05, "loss": 0.29197627305984497, "step": 3122 }, { "epoch": 0.8293719293586509, "grad_norm": 1.1089730397237387, "learning_rate": 1.3574454699677893e-05, "loss": 0.30318522453308105, "step": 3123 }, { "epoch": 0.8296374983401938, "grad_norm": 1.0916871079120407, "learning_rate": 1.357035367010113e-05, "loss": 0.3184241056442261, "step": 3124 }, { "epoch": 0.8299030673217368, "grad_norm": 1.3286365770942894, "learning_rate": 1.3566251952202288e-05, "loss": 0.30330199003219604, "step": 3125 }, { "epoch": 0.8301686363032797, "grad_norm": 1.1117453782986153, "learning_rate": 1.356214954677213e-05, "loss": 0.25366994738578796, "step": 3126 }, { "epoch": 0.8304342052848227, "grad_norm": 1.109752753436135, "learning_rate": 1.3558046454601552e-05, "loss": 0.3213343918323517, "step": 3127 }, { "epoch": 0.8306997742663657, "grad_norm": 1.0918389418395038, "learning_rate": 1.355394267648158e-05, "loss": 0.3012468218803406, "step": 3128 }, { "epoch": 0.8309653432479086, "grad_norm": 1.1319633441718049, "learning_rate": 1.3549838213203374e-05, "loss": 0.3272971510887146, "step": 3129 }, { "epoch": 0.8312309122294516, "grad_norm": 1.0778057413430624, "learning_rate": 1.354573306555823e-05, "loss": 0.30032482743263245, "step": 3130 }, { "epoch": 0.8314964812109945, "grad_norm": 1.0778331818873157, "learning_rate": 1.3541627234337567e-05, "loss": 0.2820669412612915, "step": 3131 }, { "epoch": 0.8317620501925375, "grad_norm": 1.0187129279356677, "learning_rate": 1.3537520720332943e-05, "loss": 0.2638673782348633, "step": 3132 }, { "epoch": 0.8320276191740804, "grad_norm": 1.0843507637886551, "learning_rate": 1.3533413524336043e-05, "loss": 0.2766842246055603, "step": 3133 }, { "epoch": 0.8322931881556235, "grad_norm": 1.2660530642163288, "learning_rate": 1.3529305647138689e-05, "loss": 0.330536425113678, "step": 3134 }, { "epoch": 0.8325587571371664, "grad_norm": 1.0925834195413107, "learning_rate": 1.3525197089532833e-05, "loss": 0.30375364422798157, "step": 3135 }, { "epoch": 0.8328243261187094, "grad_norm": 1.1657669106128519, "learning_rate": 1.3521087852310555e-05, "loss": 0.3092171549797058, "step": 3136 }, { "epoch": 0.8330898951002523, "grad_norm": 1.1686338102407274, "learning_rate": 1.3516977936264062e-05, "loss": 0.28651195764541626, "step": 3137 }, { "epoch": 0.8333554640817953, "grad_norm": 1.0845327487717817, "learning_rate": 1.3512867342185705e-05, "loss": 0.2882133722305298, "step": 3138 }, { "epoch": 0.8336210330633382, "grad_norm": 1.1325019700739036, "learning_rate": 1.3508756070867955e-05, "loss": 0.30633628368377686, "step": 3139 }, { "epoch": 0.8338866020448812, "grad_norm": 1.090943303162736, "learning_rate": 1.3504644123103415e-05, "loss": 0.2819565236568451, "step": 3140 }, { "epoch": 0.8341521710264241, "grad_norm": 1.0804420637943886, "learning_rate": 1.3500531499684819e-05, "loss": 0.29544374346733093, "step": 3141 }, { "epoch": 0.8344177400079671, "grad_norm": 1.10400689114043, "learning_rate": 1.3496418201405037e-05, "loss": 0.29383376240730286, "step": 3142 }, { "epoch": 0.83468330898951, "grad_norm": 0.9862964562028984, "learning_rate": 1.3492304229057062e-05, "loss": 0.24945983290672302, "step": 3143 }, { "epoch": 0.834948877971053, "grad_norm": 1.2055608503616826, "learning_rate": 1.3488189583434023e-05, "loss": 0.338919997215271, "step": 3144 }, { "epoch": 0.835214446952596, "grad_norm": 1.071166648249549, "learning_rate": 1.348407426532917e-05, "loss": 0.29555821418762207, "step": 3145 }, { "epoch": 0.8354800159341389, "grad_norm": 1.0650010322896095, "learning_rate": 1.3479958275535887e-05, "loss": 0.31038299202919006, "step": 3146 }, { "epoch": 0.8357455849156818, "grad_norm": 1.021351909092412, "learning_rate": 1.347584161484769e-05, "loss": 0.2595089077949524, "step": 3147 }, { "epoch": 0.8360111538972248, "grad_norm": 1.1885926674667484, "learning_rate": 1.3471724284058227e-05, "loss": 0.3287338614463806, "step": 3148 }, { "epoch": 0.8362767228787678, "grad_norm": 1.1997618392346763, "learning_rate": 1.3467606283961268e-05, "loss": 0.3109680414199829, "step": 3149 }, { "epoch": 0.8365422918603107, "grad_norm": 1.0762954067078139, "learning_rate": 1.346348761535071e-05, "loss": 0.2584227919578552, "step": 3150 }, { "epoch": 0.8368078608418537, "grad_norm": 1.137771769139511, "learning_rate": 1.345936827902059e-05, "loss": 0.3038554787635803, "step": 3151 }, { "epoch": 0.8370734298233966, "grad_norm": 1.029659281383911, "learning_rate": 1.3455248275765067e-05, "loss": 0.28267812728881836, "step": 3152 }, { "epoch": 0.8373389988049396, "grad_norm": 1.163661242492436, "learning_rate": 1.3451127606378425e-05, "loss": 0.3328094184398651, "step": 3153 }, { "epoch": 0.8376045677864825, "grad_norm": 1.084045978606854, "learning_rate": 1.3447006271655082e-05, "loss": 0.3235865533351898, "step": 3154 }, { "epoch": 0.8378701367680255, "grad_norm": 1.037100355990568, "learning_rate": 1.3442884272389583e-05, "loss": 0.25394493341445923, "step": 3155 }, { "epoch": 0.8381357057495684, "grad_norm": 1.1250984496593863, "learning_rate": 1.3438761609376604e-05, "loss": 0.29841768741607666, "step": 3156 }, { "epoch": 0.8384012747311114, "grad_norm": 1.1999100818775306, "learning_rate": 1.3434638283410942e-05, "loss": 0.3161924183368683, "step": 3157 }, { "epoch": 0.8386668437126543, "grad_norm": 0.9017579941601053, "learning_rate": 1.3430514295287526e-05, "loss": 0.22781039774417877, "step": 3158 }, { "epoch": 0.8389324126941973, "grad_norm": 1.0534948555265085, "learning_rate": 1.3426389645801415e-05, "loss": 0.2947984039783478, "step": 3159 }, { "epoch": 0.8391979816757402, "grad_norm": 1.0286789238265646, "learning_rate": 1.342226433574779e-05, "loss": 0.2827467918395996, "step": 3160 }, { "epoch": 0.8394635506572832, "grad_norm": 1.0453932660244052, "learning_rate": 1.3418138365921962e-05, "loss": 0.3149232268333435, "step": 3161 }, { "epoch": 0.8397291196388262, "grad_norm": 1.2487567497076437, "learning_rate": 1.3414011737119373e-05, "loss": 0.33154603838920593, "step": 3162 }, { "epoch": 0.8399946886203692, "grad_norm": 1.074983718750332, "learning_rate": 1.3409884450135581e-05, "loss": 0.28532034158706665, "step": 3163 }, { "epoch": 0.8402602576019121, "grad_norm": 1.0695327636228384, "learning_rate": 1.3405756505766286e-05, "loss": 0.2539500892162323, "step": 3164 }, { "epoch": 0.8405258265834551, "grad_norm": 1.0653532722719707, "learning_rate": 1.3401627904807302e-05, "loss": 0.3023888170719147, "step": 3165 }, { "epoch": 0.840791395564998, "grad_norm": 1.0811844194203637, "learning_rate": 1.3397498648054579e-05, "loss": 0.3088506758213043, "step": 3166 }, { "epoch": 0.841056964546541, "grad_norm": 1.2249048833028835, "learning_rate": 1.3393368736304184e-05, "loss": 0.3223467469215393, "step": 3167 }, { "epoch": 0.841322533528084, "grad_norm": 1.0772937869709083, "learning_rate": 1.3389238170352318e-05, "loss": 0.2541419565677643, "step": 3168 }, { "epoch": 0.8415881025096269, "grad_norm": 1.0463826735598363, "learning_rate": 1.3385106950995308e-05, "loss": 0.2915497422218323, "step": 3169 }, { "epoch": 0.8418536714911699, "grad_norm": 1.1726858597591174, "learning_rate": 1.3380975079029598e-05, "loss": 0.2907465994358063, "step": 3170 }, { "epoch": 0.8421192404727128, "grad_norm": 1.0581221380369799, "learning_rate": 1.337684255525177e-05, "loss": 0.2587417960166931, "step": 3171 }, { "epoch": 0.8423848094542558, "grad_norm": 1.1080472137531636, "learning_rate": 1.3372709380458522e-05, "loss": 0.2932469844818115, "step": 3172 }, { "epoch": 0.8426503784357987, "grad_norm": 1.2359417241278925, "learning_rate": 1.3368575555446681e-05, "loss": 0.31451860070228577, "step": 3173 }, { "epoch": 0.8429159474173417, "grad_norm": 1.067745190297883, "learning_rate": 1.3364441081013205e-05, "loss": 0.24513742327690125, "step": 3174 }, { "epoch": 0.8431815163988846, "grad_norm": 1.0795526820997523, "learning_rate": 1.3360305957955166e-05, "loss": 0.29781201481819153, "step": 3175 }, { "epoch": 0.8434470853804276, "grad_norm": 1.3176130252584213, "learning_rate": 1.3356170187069775e-05, "loss": 0.30925726890563965, "step": 3176 }, { "epoch": 0.8437126543619705, "grad_norm": 1.1110632932678028, "learning_rate": 1.3352033769154347e-05, "loss": 0.2822851538658142, "step": 3177 }, { "epoch": 0.8439782233435135, "grad_norm": 1.0033731418220575, "learning_rate": 1.3347896705006344e-05, "loss": 0.2511071264743805, "step": 3178 }, { "epoch": 0.8442437923250564, "grad_norm": 1.1921629041957855, "learning_rate": 1.3343758995423344e-05, "loss": 0.3002505302429199, "step": 3179 }, { "epoch": 0.8445093613065994, "grad_norm": 0.9942107511416755, "learning_rate": 1.3339620641203043e-05, "loss": 0.285504549741745, "step": 3180 }, { "epoch": 0.8447749302881423, "grad_norm": 1.1880306222164103, "learning_rate": 1.3335481643143271e-05, "loss": 0.31988856196403503, "step": 3181 }, { "epoch": 0.8450404992696853, "grad_norm": 1.0905691447057935, "learning_rate": 1.3331342002041973e-05, "loss": 0.29330819845199585, "step": 3182 }, { "epoch": 0.8453060682512282, "grad_norm": 1.049547579497453, "learning_rate": 1.3327201718697232e-05, "loss": 0.28694427013397217, "step": 3183 }, { "epoch": 0.8455716372327712, "grad_norm": 1.0561569710297949, "learning_rate": 1.3323060793907239e-05, "loss": 0.24912211298942566, "step": 3184 }, { "epoch": 0.8458372062143141, "grad_norm": 1.1346018526864223, "learning_rate": 1.3318919228470315e-05, "loss": 0.28117647767066956, "step": 3185 }, { "epoch": 0.8461027751958571, "grad_norm": 1.2524387900920857, "learning_rate": 1.3314777023184907e-05, "loss": 0.3176446557044983, "step": 3186 }, { "epoch": 0.8463683441774, "grad_norm": 1.0728463380702977, "learning_rate": 1.3310634178849583e-05, "loss": 0.31205689907073975, "step": 3187 }, { "epoch": 0.846633913158943, "grad_norm": 1.1500545538779043, "learning_rate": 1.3306490696263034e-05, "loss": 0.29942232370376587, "step": 3188 }, { "epoch": 0.8468994821404859, "grad_norm": 1.161750107962421, "learning_rate": 1.3302346576224077e-05, "loss": 0.3149508833885193, "step": 3189 }, { "epoch": 0.847165051122029, "grad_norm": 1.0924626607758976, "learning_rate": 1.3298201819531646e-05, "loss": 0.2930619418621063, "step": 3190 }, { "epoch": 0.847430620103572, "grad_norm": 1.0958680594537196, "learning_rate": 1.3294056426984804e-05, "loss": 0.3089582920074463, "step": 3191 }, { "epoch": 0.8476961890851149, "grad_norm": 1.2175163313381927, "learning_rate": 1.3289910399382733e-05, "loss": 0.3120991587638855, "step": 3192 }, { "epoch": 0.8479617580666579, "grad_norm": 1.0535688994558223, "learning_rate": 1.3285763737524738e-05, "loss": 0.2728833258152008, "step": 3193 }, { "epoch": 0.8482273270482008, "grad_norm": 1.0457465617551238, "learning_rate": 1.3281616442210246e-05, "loss": 0.2833358347415924, "step": 3194 }, { "epoch": 0.8484928960297438, "grad_norm": 1.0714039101779447, "learning_rate": 1.3277468514238803e-05, "loss": 0.26218950748443604, "step": 3195 }, { "epoch": 0.8487584650112867, "grad_norm": 1.0938436245702892, "learning_rate": 1.3273319954410088e-05, "loss": 0.3120720386505127, "step": 3196 }, { "epoch": 0.8490240339928297, "grad_norm": 1.0412833763909957, "learning_rate": 1.3269170763523892e-05, "loss": 0.2748696208000183, "step": 3197 }, { "epoch": 0.8492896029743726, "grad_norm": 1.0148051769031237, "learning_rate": 1.326502094238013e-05, "loss": 0.2892690598964691, "step": 3198 }, { "epoch": 0.8495551719559156, "grad_norm": 1.068648430192615, "learning_rate": 1.3260870491778835e-05, "loss": 0.26583510637283325, "step": 3199 }, { "epoch": 0.8498207409374585, "grad_norm": 1.105620955007001, "learning_rate": 1.325671941252017e-05, "loss": 0.31602388620376587, "step": 3200 }, { "epoch": 0.8500863099190015, "grad_norm": 1.068517421778971, "learning_rate": 1.3252567705404409e-05, "loss": 0.2980017364025116, "step": 3201 }, { "epoch": 0.8503518789005444, "grad_norm": 1.0740685936810315, "learning_rate": 1.3248415371231957e-05, "loss": 0.27081727981567383, "step": 3202 }, { "epoch": 0.8506174478820874, "grad_norm": 1.2590520587844396, "learning_rate": 1.3244262410803333e-05, "loss": 0.28895002603530884, "step": 3203 }, { "epoch": 0.8508830168636303, "grad_norm": 1.1373552047630993, "learning_rate": 1.3240108824919176e-05, "loss": 0.30804315209388733, "step": 3204 }, { "epoch": 0.8511485858451733, "grad_norm": 1.1074447190812993, "learning_rate": 1.3235954614380253e-05, "loss": 0.28173667192459106, "step": 3205 }, { "epoch": 0.8514141548267162, "grad_norm": 1.097058715769224, "learning_rate": 1.3231799779987445e-05, "loss": 0.3113047778606415, "step": 3206 }, { "epoch": 0.8516797238082592, "grad_norm": 1.0285862677327642, "learning_rate": 1.3227644322541754e-05, "loss": 0.247248113155365, "step": 3207 }, { "epoch": 0.8519452927898021, "grad_norm": 1.1032823581833329, "learning_rate": 1.3223488242844309e-05, "loss": 0.27078187465667725, "step": 3208 }, { "epoch": 0.8522108617713451, "grad_norm": 1.0635139884249352, "learning_rate": 1.321933154169634e-05, "loss": 0.2749357223510742, "step": 3209 }, { "epoch": 0.852476430752888, "grad_norm": 1.0129100217319345, "learning_rate": 1.3215174219899224e-05, "loss": 0.25382956862449646, "step": 3210 }, { "epoch": 0.852741999734431, "grad_norm": 1.0528151094235563, "learning_rate": 1.3211016278254436e-05, "loss": 0.3237685263156891, "step": 3211 }, { "epoch": 0.8530075687159739, "grad_norm": 1.273911241149791, "learning_rate": 1.3206857717563581e-05, "loss": 0.2899032235145569, "step": 3212 }, { "epoch": 0.8532731376975169, "grad_norm": 1.040323856520164, "learning_rate": 1.3202698538628376e-05, "loss": 0.25997933745384216, "step": 3213 }, { "epoch": 0.8535387066790598, "grad_norm": 1.121125084608177, "learning_rate": 1.3198538742250668e-05, "loss": 0.3228183090686798, "step": 3214 }, { "epoch": 0.8538042756606028, "grad_norm": 1.1002230220524851, "learning_rate": 1.3194378329232413e-05, "loss": 0.31993368268013, "step": 3215 }, { "epoch": 0.8540698446421457, "grad_norm": 1.157115702913611, "learning_rate": 1.3190217300375694e-05, "loss": 0.29520007967948914, "step": 3216 }, { "epoch": 0.8543354136236887, "grad_norm": 1.0898926058638614, "learning_rate": 1.3186055656482702e-05, "loss": 0.31073522567749023, "step": 3217 }, { "epoch": 0.8546009826052318, "grad_norm": 1.1465583376043518, "learning_rate": 1.3181893398355752e-05, "loss": 0.34354183077812195, "step": 3218 }, { "epoch": 0.8548665515867747, "grad_norm": 1.179928846812524, "learning_rate": 1.3177730526797286e-05, "loss": 0.27676698565483093, "step": 3219 }, { "epoch": 0.8551321205683177, "grad_norm": 1.0792983255501365, "learning_rate": 1.3173567042609852e-05, "loss": 0.27313530445098877, "step": 3220 }, { "epoch": 0.8553976895498606, "grad_norm": 0.9249374113484707, "learning_rate": 1.3169402946596119e-05, "loss": 0.2517555058002472, "step": 3221 }, { "epoch": 0.8556632585314036, "grad_norm": 1.0684778793194236, "learning_rate": 1.3165238239558878e-05, "loss": 0.29700207710266113, "step": 3222 }, { "epoch": 0.8559288275129465, "grad_norm": 1.1262235464302217, "learning_rate": 1.3161072922301037e-05, "loss": 0.3182620704174042, "step": 3223 }, { "epoch": 0.8561943964944895, "grad_norm": 1.123570804553303, "learning_rate": 1.3156906995625615e-05, "loss": 0.3112961947917938, "step": 3224 }, { "epoch": 0.8564599654760324, "grad_norm": 1.1746597736734636, "learning_rate": 1.3152740460335757e-05, "loss": 0.3080563545227051, "step": 3225 }, { "epoch": 0.8567255344575754, "grad_norm": 1.1646363575237453, "learning_rate": 1.3148573317234726e-05, "loss": 0.31197935342788696, "step": 3226 }, { "epoch": 0.8569911034391183, "grad_norm": 1.0455051980244612, "learning_rate": 1.3144405567125886e-05, "loss": 0.27377086877822876, "step": 3227 }, { "epoch": 0.8572566724206613, "grad_norm": 1.050528412475655, "learning_rate": 1.3140237210812741e-05, "loss": 0.25303182005882263, "step": 3228 }, { "epoch": 0.8575222414022042, "grad_norm": 1.0664458431943622, "learning_rate": 1.3136068249098899e-05, "loss": 0.27949726581573486, "step": 3229 }, { "epoch": 0.8577878103837472, "grad_norm": 1.0907347405782384, "learning_rate": 1.3131898682788082e-05, "loss": 0.278359055519104, "step": 3230 }, { "epoch": 0.8580533793652901, "grad_norm": 1.081462335761227, "learning_rate": 1.312772851268414e-05, "loss": 0.28507643938064575, "step": 3231 }, { "epoch": 0.8583189483468331, "grad_norm": 1.0256133822907842, "learning_rate": 1.3123557739591026e-05, "loss": 0.2689790427684784, "step": 3232 }, { "epoch": 0.858584517328376, "grad_norm": 1.1569049456144243, "learning_rate": 1.3119386364312821e-05, "loss": 0.31956973671913147, "step": 3233 }, { "epoch": 0.858850086309919, "grad_norm": 1.0914807974802394, "learning_rate": 1.3115214387653711e-05, "loss": 0.2837323546409607, "step": 3234 }, { "epoch": 0.8591156552914619, "grad_norm": 1.0015578039784754, "learning_rate": 1.3111041810418011e-05, "loss": 0.2756272554397583, "step": 3235 }, { "epoch": 0.8593812242730049, "grad_norm": 1.0283979772106548, "learning_rate": 1.3106868633410139e-05, "loss": 0.2664923369884491, "step": 3236 }, { "epoch": 0.8596467932545478, "grad_norm": 1.2217960050611696, "learning_rate": 1.3102694857434637e-05, "loss": 0.2842246890068054, "step": 3237 }, { "epoch": 0.8599123622360908, "grad_norm": 1.0632739499737671, "learning_rate": 1.3098520483296159e-05, "loss": 0.3066467344760895, "step": 3238 }, { "epoch": 0.8601779312176338, "grad_norm": 1.148754786147734, "learning_rate": 1.3094345511799478e-05, "loss": 0.3042510151863098, "step": 3239 }, { "epoch": 0.8604435001991767, "grad_norm": 0.9995895975923785, "learning_rate": 1.3090169943749475e-05, "loss": 0.2753696143627167, "step": 3240 }, { "epoch": 0.8607090691807197, "grad_norm": 1.0325788591675433, "learning_rate": 1.3085993779951154e-05, "loss": 0.2561766803264618, "step": 3241 }, { "epoch": 0.8609746381622626, "grad_norm": 1.2136300404308455, "learning_rate": 1.3081817021209626e-05, "loss": 0.297982782125473, "step": 3242 }, { "epoch": 0.8612402071438056, "grad_norm": 1.0615498924909679, "learning_rate": 1.3077639668330124e-05, "loss": 0.2961920499801636, "step": 3243 }, { "epoch": 0.8615057761253485, "grad_norm": 1.1445145037694135, "learning_rate": 1.3073461722117991e-05, "loss": 0.2868857979774475, "step": 3244 }, { "epoch": 0.8617713451068915, "grad_norm": 0.9475657969770804, "learning_rate": 1.3069283183378683e-05, "loss": 0.22930951416492462, "step": 3245 }, { "epoch": 0.8620369140884345, "grad_norm": 1.1416904771862697, "learning_rate": 1.306510405291778e-05, "loss": 0.29737964272499084, "step": 3246 }, { "epoch": 0.8623024830699775, "grad_norm": 1.0401904023883137, "learning_rate": 1.3060924331540964e-05, "loss": 0.2764522433280945, "step": 3247 }, { "epoch": 0.8625680520515204, "grad_norm": 0.9863739655208709, "learning_rate": 1.3056744020054039e-05, "loss": 0.27608832716941833, "step": 3248 }, { "epoch": 0.8628336210330634, "grad_norm": 1.0115944755696356, "learning_rate": 1.3052563119262915e-05, "loss": 0.25667035579681396, "step": 3249 }, { "epoch": 0.8630991900146063, "grad_norm": 1.1289498412687866, "learning_rate": 1.3048381629973622e-05, "loss": 0.3015863597393036, "step": 3250 }, { "epoch": 0.8633647589961493, "grad_norm": 1.123802742380982, "learning_rate": 1.3044199552992307e-05, "loss": 0.2798422873020172, "step": 3251 }, { "epoch": 0.8636303279776922, "grad_norm": 1.1385670465264601, "learning_rate": 1.304001688912522e-05, "loss": 0.2856596112251282, "step": 3252 }, { "epoch": 0.8638958969592352, "grad_norm": 1.2094473565150297, "learning_rate": 1.303583363917873e-05, "loss": 0.30247554183006287, "step": 3253 }, { "epoch": 0.8641614659407781, "grad_norm": 1.1517937069448307, "learning_rate": 1.303164980395932e-05, "loss": 0.26817965507507324, "step": 3254 }, { "epoch": 0.8644270349223211, "grad_norm": 1.197653632931973, "learning_rate": 1.3027465384273579e-05, "loss": 0.26919034123420715, "step": 3255 }, { "epoch": 0.864692603903864, "grad_norm": 1.1206851183742237, "learning_rate": 1.3023280380928223e-05, "loss": 0.29495447874069214, "step": 3256 }, { "epoch": 0.864958172885407, "grad_norm": 1.0428738517831404, "learning_rate": 1.3019094794730063e-05, "loss": 0.26766717433929443, "step": 3257 }, { "epoch": 0.86522374186695, "grad_norm": 0.9998039586765358, "learning_rate": 1.3014908626486032e-05, "loss": 0.2573341131210327, "step": 3258 }, { "epoch": 0.8654893108484929, "grad_norm": 1.226366277313196, "learning_rate": 1.3010721877003177e-05, "loss": 0.32776498794555664, "step": 3259 }, { "epoch": 0.8657548798300359, "grad_norm": 1.1631189448763641, "learning_rate": 1.3006534547088651e-05, "loss": 0.3107950687408447, "step": 3260 }, { "epoch": 0.8660204488115788, "grad_norm": 1.0476224109192296, "learning_rate": 1.3002346637549726e-05, "loss": 0.26143360137939453, "step": 3261 }, { "epoch": 0.8662860177931218, "grad_norm": 1.035123297672666, "learning_rate": 1.2998158149193773e-05, "loss": 0.25666722655296326, "step": 3262 }, { "epoch": 0.8665515867746647, "grad_norm": 1.1492097701405037, "learning_rate": 1.2993969082828296e-05, "loss": 0.2982695698738098, "step": 3263 }, { "epoch": 0.8668171557562077, "grad_norm": 1.0937256102841277, "learning_rate": 1.2989779439260888e-05, "loss": 0.30144304037094116, "step": 3264 }, { "epoch": 0.8670827247377506, "grad_norm": 1.0563159913050848, "learning_rate": 1.2985589219299264e-05, "loss": 0.30421534180641174, "step": 3265 }, { "epoch": 0.8673482937192936, "grad_norm": 1.0698350081311019, "learning_rate": 1.298139842375125e-05, "loss": 0.23653842508792877, "step": 3266 }, { "epoch": 0.8676138627008365, "grad_norm": 1.2059661362441823, "learning_rate": 1.2977207053424781e-05, "loss": 0.284118115901947, "step": 3267 }, { "epoch": 0.8678794316823795, "grad_norm": 1.0387152548948486, "learning_rate": 1.2973015109127907e-05, "loss": 0.30857348442077637, "step": 3268 }, { "epoch": 0.8681450006639224, "grad_norm": 1.0987728632322369, "learning_rate": 1.2968822591668784e-05, "loss": 0.2826589047908783, "step": 3269 }, { "epoch": 0.8684105696454654, "grad_norm": 1.109218087764862, "learning_rate": 1.2964629501855678e-05, "loss": 0.27634552121162415, "step": 3270 }, { "epoch": 0.8686761386270083, "grad_norm": 1.0217259699141916, "learning_rate": 1.296043584049697e-05, "loss": 0.25823545455932617, "step": 3271 }, { "epoch": 0.8689417076085513, "grad_norm": 1.148249635090711, "learning_rate": 1.2956241608401145e-05, "loss": 0.28939294815063477, "step": 3272 }, { "epoch": 0.8692072765900942, "grad_norm": 1.0622455952024017, "learning_rate": 1.2952046806376806e-05, "loss": 0.3042459785938263, "step": 3273 }, { "epoch": 0.8694728455716373, "grad_norm": 1.042505415392428, "learning_rate": 1.2947851435232658e-05, "loss": 0.2834415137767792, "step": 3274 }, { "epoch": 0.8697384145531802, "grad_norm": 1.144903021800522, "learning_rate": 1.2943655495777518e-05, "loss": 0.28226330876350403, "step": 3275 }, { "epoch": 0.8700039835347232, "grad_norm": 1.023547316743189, "learning_rate": 1.2939458988820317e-05, "loss": 0.2796105742454529, "step": 3276 }, { "epoch": 0.8702695525162661, "grad_norm": 0.9903193313068561, "learning_rate": 1.2935261915170091e-05, "loss": 0.24790553748607635, "step": 3277 }, { "epoch": 0.8705351214978091, "grad_norm": 1.0279177898991045, "learning_rate": 1.2931064275635987e-05, "loss": 0.25101587176322937, "step": 3278 }, { "epoch": 0.870800690479352, "grad_norm": 1.1728597267839225, "learning_rate": 1.2926866071027257e-05, "loss": 0.3060816526412964, "step": 3279 }, { "epoch": 0.871066259460895, "grad_norm": 1.1510511467115991, "learning_rate": 1.2922667302153268e-05, "loss": 0.3137212097644806, "step": 3280 }, { "epoch": 0.871331828442438, "grad_norm": 0.9977159840643061, "learning_rate": 1.2918467969823497e-05, "loss": 0.2391548752784729, "step": 3281 }, { "epoch": 0.8715973974239809, "grad_norm": 1.2003880700717509, "learning_rate": 1.2914268074847516e-05, "loss": 0.3219330608844757, "step": 3282 }, { "epoch": 0.8718629664055239, "grad_norm": 1.126134187698585, "learning_rate": 1.2910067618035025e-05, "loss": 0.2934436798095703, "step": 3283 }, { "epoch": 0.8721285353870668, "grad_norm": 1.2016016844780073, "learning_rate": 1.2905866600195815e-05, "loss": 0.2919486165046692, "step": 3284 }, { "epoch": 0.8723941043686098, "grad_norm": 1.1895929482131946, "learning_rate": 1.2901665022139796e-05, "loss": 0.2840641438961029, "step": 3285 }, { "epoch": 0.8726596733501527, "grad_norm": 1.0215741253911979, "learning_rate": 1.2897462884676983e-05, "loss": 0.24151530861854553, "step": 3286 }, { "epoch": 0.8729252423316957, "grad_norm": 1.0040194757671277, "learning_rate": 1.28932601886175e-05, "loss": 0.24515505135059357, "step": 3287 }, { "epoch": 0.8731908113132386, "grad_norm": 1.2173512735867882, "learning_rate": 1.2889056934771577e-05, "loss": 0.2561264634132385, "step": 3288 }, { "epoch": 0.8734563802947816, "grad_norm": 1.1645401251165897, "learning_rate": 1.2884853123949547e-05, "loss": 0.2798641622066498, "step": 3289 }, { "epoch": 0.8737219492763245, "grad_norm": 1.2693161910394721, "learning_rate": 1.288064875696186e-05, "loss": 0.35207298398017883, "step": 3290 }, { "epoch": 0.8739875182578675, "grad_norm": 1.0184365377421387, "learning_rate": 1.2876443834619066e-05, "loss": 0.2778821289539337, "step": 3291 }, { "epoch": 0.8742530872394104, "grad_norm": 1.044209880952949, "learning_rate": 1.2872238357731825e-05, "loss": 0.2691737413406372, "step": 3292 }, { "epoch": 0.8745186562209534, "grad_norm": 1.1392637940929287, "learning_rate": 1.2868032327110904e-05, "loss": 0.25476595759391785, "step": 3293 }, { "epoch": 0.8747842252024963, "grad_norm": 1.012064080488804, "learning_rate": 1.2863825743567174e-05, "loss": 0.258474737405777, "step": 3294 }, { "epoch": 0.8750497941840393, "grad_norm": 1.17733236715245, "learning_rate": 1.285961860791162e-05, "loss": 0.32421568036079407, "step": 3295 }, { "epoch": 0.8753153631655822, "grad_norm": 1.0747747984737868, "learning_rate": 1.2855410920955323e-05, "loss": 0.3090333342552185, "step": 3296 }, { "epoch": 0.8755809321471252, "grad_norm": 1.1729934635240566, "learning_rate": 1.2851202683509476e-05, "loss": 0.26548707485198975, "step": 3297 }, { "epoch": 0.8758465011286681, "grad_norm": 2.497627852681845, "learning_rate": 1.2846993896385378e-05, "loss": 0.3002355098724365, "step": 3298 }, { "epoch": 0.8761120701102111, "grad_norm": 1.1706582997439863, "learning_rate": 1.2842784560394433e-05, "loss": 0.2924933135509491, "step": 3299 }, { "epoch": 0.876377639091754, "grad_norm": 1.1544391256229967, "learning_rate": 1.2838574676348155e-05, "loss": 0.2886514663696289, "step": 3300 }, { "epoch": 0.876643208073297, "grad_norm": 1.1131138367993383, "learning_rate": 1.2834364245058155e-05, "loss": 0.29821154475212097, "step": 3301 }, { "epoch": 0.87690877705484, "grad_norm": 1.0278540671542709, "learning_rate": 1.2830153267336159e-05, "loss": 0.2656530737876892, "step": 3302 }, { "epoch": 0.877174346036383, "grad_norm": 1.2018449655833119, "learning_rate": 1.282594174399399e-05, "loss": 0.3437826633453369, "step": 3303 }, { "epoch": 0.877439915017926, "grad_norm": 1.0564301800372577, "learning_rate": 1.2821729675843581e-05, "loss": 0.29773175716400146, "step": 3304 }, { "epoch": 0.8777054839994689, "grad_norm": 1.0707167209814024, "learning_rate": 1.2817517063696973e-05, "loss": 0.29772818088531494, "step": 3305 }, { "epoch": 0.8779710529810119, "grad_norm": 1.1530012432828134, "learning_rate": 1.2813303908366303e-05, "loss": 0.3266611099243164, "step": 3306 }, { "epoch": 0.8782366219625548, "grad_norm": 1.0044541774243023, "learning_rate": 1.2809090210663818e-05, "loss": 0.26599690318107605, "step": 3307 }, { "epoch": 0.8785021909440978, "grad_norm": 1.0142651525790767, "learning_rate": 1.2804875971401872e-05, "loss": 0.27988117933273315, "step": 3308 }, { "epoch": 0.8787677599256407, "grad_norm": 1.0221522532224918, "learning_rate": 1.2800661191392916e-05, "loss": 0.2630334496498108, "step": 3309 }, { "epoch": 0.8790333289071837, "grad_norm": 1.022950247187023, "learning_rate": 1.2796445871449517e-05, "loss": 0.2628091871738434, "step": 3310 }, { "epoch": 0.8792988978887266, "grad_norm": 1.1994310454875075, "learning_rate": 1.2792230012384333e-05, "loss": 0.3443898558616638, "step": 3311 }, { "epoch": 0.8795644668702696, "grad_norm": 1.0673533832636588, "learning_rate": 1.2788013615010136e-05, "loss": 0.2966022491455078, "step": 3312 }, { "epoch": 0.8798300358518125, "grad_norm": 1.1030087744198647, "learning_rate": 1.2783796680139793e-05, "loss": 0.2995494604110718, "step": 3313 }, { "epoch": 0.8800956048333555, "grad_norm": 1.0504434000468303, "learning_rate": 1.2779579208586283e-05, "loss": 0.2652590870857239, "step": 3314 }, { "epoch": 0.8803611738148984, "grad_norm": 1.1388460976467547, "learning_rate": 1.2775361201162684e-05, "loss": 0.3145690858364105, "step": 3315 }, { "epoch": 0.8806267427964414, "grad_norm": 1.040210802651612, "learning_rate": 1.2771142658682175e-05, "loss": 0.25744086503982544, "step": 3316 }, { "epoch": 0.8808923117779843, "grad_norm": 1.1618029117732733, "learning_rate": 1.2766923581958046e-05, "loss": 0.3129793405532837, "step": 3317 }, { "epoch": 0.8811578807595273, "grad_norm": 1.166975234876197, "learning_rate": 1.2762703971803684e-05, "loss": 0.233384907245636, "step": 3318 }, { "epoch": 0.8814234497410702, "grad_norm": 0.9242808009438505, "learning_rate": 1.2758483829032579e-05, "loss": 0.2422962635755539, "step": 3319 }, { "epoch": 0.8816890187226132, "grad_norm": 1.0844595421589949, "learning_rate": 1.2754263154458328e-05, "loss": 0.2801973819732666, "step": 3320 }, { "epoch": 0.8819545877041561, "grad_norm": 1.294346594070355, "learning_rate": 1.2750041948894621e-05, "loss": 0.30659937858581543, "step": 3321 }, { "epoch": 0.8822201566856991, "grad_norm": 1.0921019252616484, "learning_rate": 1.274582021315526e-05, "loss": 0.28527066111564636, "step": 3322 }, { "epoch": 0.882485725667242, "grad_norm": 1.0598264473011552, "learning_rate": 1.2741597948054146e-05, "loss": 0.23065675795078278, "step": 3323 }, { "epoch": 0.882751294648785, "grad_norm": 1.0918730747592962, "learning_rate": 1.2737375154405283e-05, "loss": 0.2727832794189453, "step": 3324 }, { "epoch": 0.8830168636303279, "grad_norm": 1.0789259788038712, "learning_rate": 1.273315183302277e-05, "loss": 0.26809507608413696, "step": 3325 }, { "epoch": 0.8832824326118709, "grad_norm": 1.1647625824499415, "learning_rate": 1.2728927984720823e-05, "loss": 0.3250407576560974, "step": 3326 }, { "epoch": 0.8835480015934138, "grad_norm": 1.0915300736309757, "learning_rate": 1.2724703610313742e-05, "loss": 0.2651330232620239, "step": 3327 }, { "epoch": 0.8838135705749568, "grad_norm": 1.206298710080754, "learning_rate": 1.2720478710615944e-05, "loss": 0.27337920665740967, "step": 3328 }, { "epoch": 0.8840791395564997, "grad_norm": 1.0282478968996285, "learning_rate": 1.2716253286441935e-05, "loss": 0.2664092183113098, "step": 3329 }, { "epoch": 0.8843447085380428, "grad_norm": 1.1354570950284573, "learning_rate": 1.2712027338606323e-05, "loss": 0.27927765250205994, "step": 3330 }, { "epoch": 0.8846102775195858, "grad_norm": 1.1204979208217445, "learning_rate": 1.270780086792383e-05, "loss": 0.27241113781929016, "step": 3331 }, { "epoch": 0.8848758465011287, "grad_norm": 1.0795162414965664, "learning_rate": 1.2703573875209264e-05, "loss": 0.28279373049736023, "step": 3332 }, { "epoch": 0.8851414154826717, "grad_norm": 1.1634487658284207, "learning_rate": 1.2699346361277538e-05, "loss": 0.3011108934879303, "step": 3333 }, { "epoch": 0.8854069844642146, "grad_norm": 2.772716513531517, "learning_rate": 1.2695118326943671e-05, "loss": 0.3071288764476776, "step": 3334 }, { "epoch": 0.8856725534457576, "grad_norm": 1.0969950934626527, "learning_rate": 1.2690889773022778e-05, "loss": 0.2688761353492737, "step": 3335 }, { "epoch": 0.8859381224273005, "grad_norm": 1.1363327585955358, "learning_rate": 1.2686660700330074e-05, "loss": 0.2788669466972351, "step": 3336 }, { "epoch": 0.8862036914088435, "grad_norm": 1.0884694079711634, "learning_rate": 1.268243110968087e-05, "loss": 0.2801516652107239, "step": 3337 }, { "epoch": 0.8864692603903864, "grad_norm": 1.0414904749451368, "learning_rate": 1.2678201001890587e-05, "loss": 0.2876908779144287, "step": 3338 }, { "epoch": 0.8867348293719294, "grad_norm": 1.1731879069090343, "learning_rate": 1.2673970377774733e-05, "loss": 0.27709734439849854, "step": 3339 }, { "epoch": 0.8870003983534723, "grad_norm": 1.2053408848372587, "learning_rate": 1.266973923814893e-05, "loss": 0.3191622793674469, "step": 3340 }, { "epoch": 0.8872659673350153, "grad_norm": 1.098682297791164, "learning_rate": 1.2665507583828889e-05, "loss": 0.2873385548591614, "step": 3341 }, { "epoch": 0.8875315363165582, "grad_norm": 1.1730973936717166, "learning_rate": 1.2661275415630421e-05, "loss": 0.2922922372817993, "step": 3342 }, { "epoch": 0.8877971052981012, "grad_norm": 1.1127017834272521, "learning_rate": 1.2657042734369443e-05, "loss": 0.305694043636322, "step": 3343 }, { "epoch": 0.8880626742796441, "grad_norm": 1.120364019457983, "learning_rate": 1.2652809540861958e-05, "loss": 0.29108062386512756, "step": 3344 }, { "epoch": 0.8883282432611871, "grad_norm": 1.076655765525218, "learning_rate": 1.2648575835924084e-05, "loss": 0.24170495569705963, "step": 3345 }, { "epoch": 0.88859381224273, "grad_norm": 1.4853370236272063, "learning_rate": 1.2644341620372025e-05, "loss": 0.2987719476222992, "step": 3346 }, { "epoch": 0.888859381224273, "grad_norm": 0.9743774864126274, "learning_rate": 1.2640106895022088e-05, "loss": 0.21037599444389343, "step": 3347 }, { "epoch": 0.889124950205816, "grad_norm": 1.034527053965976, "learning_rate": 1.2635871660690677e-05, "loss": 0.25263655185699463, "step": 3348 }, { "epoch": 0.8893905191873589, "grad_norm": 1.2196740502064325, "learning_rate": 1.2631635918194301e-05, "loss": 0.30169543623924255, "step": 3349 }, { "epoch": 0.8896560881689018, "grad_norm": 1.0624381650731511, "learning_rate": 1.2627399668349554e-05, "loss": 0.26982420682907104, "step": 3350 }, { "epoch": 0.8899216571504448, "grad_norm": 1.1785068724165282, "learning_rate": 1.262316291197314e-05, "loss": 0.3281899690628052, "step": 3351 }, { "epoch": 0.8901872261319878, "grad_norm": 1.1157278400935415, "learning_rate": 1.2618925649881852e-05, "loss": 0.30140435695648193, "step": 3352 }, { "epoch": 0.8904527951135307, "grad_norm": 0.9928732296573972, "learning_rate": 1.261468788289259e-05, "loss": 0.22343885898590088, "step": 3353 }, { "epoch": 0.8907183640950737, "grad_norm": 1.0410264886026745, "learning_rate": 1.261044961182234e-05, "loss": 0.2889901399612427, "step": 3354 }, { "epoch": 0.8909839330766166, "grad_norm": 1.0933214790144683, "learning_rate": 1.260621083748819e-05, "loss": 0.27896153926849365, "step": 3355 }, { "epoch": 0.8912495020581596, "grad_norm": 1.077111437166839, "learning_rate": 1.2601971560707328e-05, "loss": 0.29390811920166016, "step": 3356 }, { "epoch": 0.8915150710397025, "grad_norm": 1.0468332572471015, "learning_rate": 1.2597731782297036e-05, "loss": 0.2872384190559387, "step": 3357 }, { "epoch": 0.8917806400212455, "grad_norm": 1.3094137802442116, "learning_rate": 1.2593491503074698e-05, "loss": 0.29753726720809937, "step": 3358 }, { "epoch": 0.8920462090027885, "grad_norm": 1.1441306843080605, "learning_rate": 1.2589250723857782e-05, "loss": 0.31631946563720703, "step": 3359 }, { "epoch": 0.8923117779843315, "grad_norm": 1.1374138683367387, "learning_rate": 1.2585009445463867e-05, "loss": 0.2932048738002777, "step": 3360 }, { "epoch": 0.8925773469658744, "grad_norm": 1.0483655110874528, "learning_rate": 1.2580767668710614e-05, "loss": 0.2902034521102905, "step": 3361 }, { "epoch": 0.8928429159474174, "grad_norm": 1.0712531988705474, "learning_rate": 1.2576525394415795e-05, "loss": 0.2596299648284912, "step": 3362 }, { "epoch": 0.8931084849289603, "grad_norm": 1.1916540375753872, "learning_rate": 1.2572282623397268e-05, "loss": 0.29102641344070435, "step": 3363 }, { "epoch": 0.8933740539105033, "grad_norm": 1.236954620143465, "learning_rate": 1.2568039356472985e-05, "loss": 0.2970406711101532, "step": 3364 }, { "epoch": 0.8936396228920462, "grad_norm": 1.1384210267422126, "learning_rate": 1.2563795594461003e-05, "loss": 0.2916618585586548, "step": 3365 }, { "epoch": 0.8939051918735892, "grad_norm": 1.1769911575713834, "learning_rate": 1.2559551338179468e-05, "loss": 0.3217374086380005, "step": 3366 }, { "epoch": 0.8941707608551321, "grad_norm": 1.1228623922561494, "learning_rate": 1.255530658844662e-05, "loss": 0.3000059425830841, "step": 3367 }, { "epoch": 0.8944363298366751, "grad_norm": 1.2170346898517979, "learning_rate": 1.2551061346080804e-05, "loss": 0.2848728895187378, "step": 3368 }, { "epoch": 0.894701898818218, "grad_norm": 1.3197542136745113, "learning_rate": 1.2546815611900442e-05, "loss": 0.3328903317451477, "step": 3369 }, { "epoch": 0.894967467799761, "grad_norm": 1.0838958961687528, "learning_rate": 1.2542569386724069e-05, "loss": 0.2920045256614685, "step": 3370 }, { "epoch": 0.895233036781304, "grad_norm": 1.0679716869166582, "learning_rate": 1.2538322671370305e-05, "loss": 0.30370092391967773, "step": 3371 }, { "epoch": 0.8954986057628469, "grad_norm": 1.069215534600395, "learning_rate": 1.2534075466657866e-05, "loss": 0.24454624950885773, "step": 3372 }, { "epoch": 0.8957641747443899, "grad_norm": 1.172481734803523, "learning_rate": 1.2529827773405566e-05, "loss": 0.30908581614494324, "step": 3373 }, { "epoch": 0.8960297437259328, "grad_norm": 1.1095939186212227, "learning_rate": 1.2525579592432304e-05, "loss": 0.2792360782623291, "step": 3374 }, { "epoch": 0.8962953127074758, "grad_norm": 1.0658472517819026, "learning_rate": 1.2521330924557087e-05, "loss": 0.285555362701416, "step": 3375 }, { "epoch": 0.8965608816890187, "grad_norm": 1.1649386203925687, "learning_rate": 1.2517081770599002e-05, "loss": 0.3159451484680176, "step": 3376 }, { "epoch": 0.8968264506705617, "grad_norm": 1.2867424735092035, "learning_rate": 1.2512832131377237e-05, "loss": 0.35929200053215027, "step": 3377 }, { "epoch": 0.8970920196521046, "grad_norm": 1.0781651079446009, "learning_rate": 1.2508582007711074e-05, "loss": 0.28624874353408813, "step": 3378 }, { "epoch": 0.8973575886336476, "grad_norm": 1.0156684050998903, "learning_rate": 1.2504331400419884e-05, "loss": 0.27670109272003174, "step": 3379 }, { "epoch": 0.8976231576151905, "grad_norm": 1.0786636895703534, "learning_rate": 1.2500080310323139e-05, "loss": 0.2894589304924011, "step": 3380 }, { "epoch": 0.8978887265967335, "grad_norm": 1.1385795160382524, "learning_rate": 1.2495828738240396e-05, "loss": 0.31378716230392456, "step": 3381 }, { "epoch": 0.8981542955782764, "grad_norm": 1.3149597134232174, "learning_rate": 1.2491576684991306e-05, "loss": 0.33676713705062866, "step": 3382 }, { "epoch": 0.8984198645598194, "grad_norm": 0.9814689350619926, "learning_rate": 1.2487324151395618e-05, "loss": 0.2875351011753082, "step": 3383 }, { "epoch": 0.8986854335413623, "grad_norm": 1.1646557221945626, "learning_rate": 1.2483071138273168e-05, "loss": 0.29729989171028137, "step": 3384 }, { "epoch": 0.8989510025229053, "grad_norm": 1.0864970585536224, "learning_rate": 1.2478817646443888e-05, "loss": 0.3227398991584778, "step": 3385 }, { "epoch": 0.8992165715044482, "grad_norm": 1.1586445900518523, "learning_rate": 1.2474563676727803e-05, "loss": 0.2664690315723419, "step": 3386 }, { "epoch": 0.8994821404859913, "grad_norm": 1.1748792923054732, "learning_rate": 1.2470309229945021e-05, "loss": 0.29543352127075195, "step": 3387 }, { "epoch": 0.8997477094675342, "grad_norm": 0.9899792334789409, "learning_rate": 1.2466054306915756e-05, "loss": 0.26658856868743896, "step": 3388 }, { "epoch": 0.9000132784490772, "grad_norm": 1.123207894421506, "learning_rate": 1.2461798908460305e-05, "loss": 0.2899627387523651, "step": 3389 }, { "epoch": 0.9002788474306201, "grad_norm": 1.1137567335053833, "learning_rate": 1.245754303539906e-05, "loss": 0.2708336114883423, "step": 3390 }, { "epoch": 0.9005444164121631, "grad_norm": 1.1459655330577214, "learning_rate": 1.2453286688552502e-05, "loss": 0.28124746680259705, "step": 3391 }, { "epoch": 0.900809985393706, "grad_norm": 1.0470005335558448, "learning_rate": 1.2449029868741202e-05, "loss": 0.2599399983882904, "step": 3392 }, { "epoch": 0.901075554375249, "grad_norm": 0.9576026734877732, "learning_rate": 1.2444772576785828e-05, "loss": 0.25035667419433594, "step": 3393 }, { "epoch": 0.901341123356792, "grad_norm": 1.1148471766082222, "learning_rate": 1.2440514813507136e-05, "loss": 0.2772521376609802, "step": 3394 }, { "epoch": 0.9016066923383349, "grad_norm": 1.103787889433512, "learning_rate": 1.2436256579725969e-05, "loss": 0.3282839357852936, "step": 3395 }, { "epoch": 0.9018722613198779, "grad_norm": 1.080988888326222, "learning_rate": 1.2431997876263269e-05, "loss": 0.2507914900779724, "step": 3396 }, { "epoch": 0.9021378303014208, "grad_norm": 1.1123927965933749, "learning_rate": 1.2427738703940055e-05, "loss": 0.2620914876461029, "step": 3397 }, { "epoch": 0.9024033992829638, "grad_norm": 1.0713438905056172, "learning_rate": 1.2423479063577458e-05, "loss": 0.26561641693115234, "step": 3398 }, { "epoch": 0.9026689682645067, "grad_norm": 1.151582271756571, "learning_rate": 1.2419218955996677e-05, "loss": 0.2998678386211395, "step": 3399 }, { "epoch": 0.9029345372460497, "grad_norm": 1.0484454707225395, "learning_rate": 1.2414958382019017e-05, "loss": 0.2368398755788803, "step": 3400 }, { "epoch": 0.9032001062275926, "grad_norm": 1.0429929570241405, "learning_rate": 1.241069734246586e-05, "loss": 0.2623558044433594, "step": 3401 }, { "epoch": 0.9034656752091356, "grad_norm": 1.0283944167565489, "learning_rate": 1.2406435838158686e-05, "loss": 0.2693074941635132, "step": 3402 }, { "epoch": 0.9037312441906785, "grad_norm": 1.1211950634171715, "learning_rate": 1.2402173869919063e-05, "loss": 0.2933652698993683, "step": 3403 }, { "epoch": 0.9039968131722215, "grad_norm": 1.0858313001207585, "learning_rate": 1.2397911438568651e-05, "loss": 0.28515487909317017, "step": 3404 }, { "epoch": 0.9042623821537644, "grad_norm": 1.1243916508543286, "learning_rate": 1.2393648544929193e-05, "loss": 0.282942533493042, "step": 3405 }, { "epoch": 0.9045279511353074, "grad_norm": 1.112018853789466, "learning_rate": 1.2389385189822526e-05, "loss": 0.28300392627716064, "step": 3406 }, { "epoch": 0.9047935201168503, "grad_norm": 1.0490322847853841, "learning_rate": 1.2385121374070577e-05, "loss": 0.25697019696235657, "step": 3407 }, { "epoch": 0.9050590890983933, "grad_norm": 1.15038978087342, "learning_rate": 1.2380857098495355e-05, "loss": 0.31156057119369507, "step": 3408 }, { "epoch": 0.9053246580799362, "grad_norm": 1.1544066045654053, "learning_rate": 1.2376592363918967e-05, "loss": 0.2943422794342041, "step": 3409 }, { "epoch": 0.9055902270614792, "grad_norm": 0.9968457114080438, "learning_rate": 1.2372327171163596e-05, "loss": 0.2792074680328369, "step": 3410 }, { "epoch": 0.9058557960430221, "grad_norm": 1.0328662447203703, "learning_rate": 1.2368061521051526e-05, "loss": 0.2547443211078644, "step": 3411 }, { "epoch": 0.9061213650245651, "grad_norm": 1.068901181257851, "learning_rate": 1.2363795414405125e-05, "loss": 0.25637373328208923, "step": 3412 }, { "epoch": 0.906386934006108, "grad_norm": 1.1660475318941728, "learning_rate": 1.2359528852046844e-05, "loss": 0.3269123435020447, "step": 3413 }, { "epoch": 0.906652502987651, "grad_norm": 1.0197427295072394, "learning_rate": 1.2355261834799232e-05, "loss": 0.28538423776626587, "step": 3414 }, { "epoch": 0.906918071969194, "grad_norm": 1.1343354993973966, "learning_rate": 1.2350994363484915e-05, "loss": 0.2961096167564392, "step": 3415 }, { "epoch": 0.907183640950737, "grad_norm": 1.0930595123597455, "learning_rate": 1.2346726438926613e-05, "loss": 0.3134537935256958, "step": 3416 }, { "epoch": 0.90744920993228, "grad_norm": 1.018679268761631, "learning_rate": 1.2342458061947129e-05, "loss": 0.2614031434059143, "step": 3417 }, { "epoch": 0.9077147789138229, "grad_norm": 1.0403373381004117, "learning_rate": 1.2338189233369357e-05, "loss": 0.27166056632995605, "step": 3418 }, { "epoch": 0.9079803478953659, "grad_norm": 1.0735839504787106, "learning_rate": 1.2333919954016277e-05, "loss": 0.26053497195243835, "step": 3419 }, { "epoch": 0.9082459168769088, "grad_norm": 1.1112591016079632, "learning_rate": 1.2329650224710956e-05, "loss": 0.3109636902809143, "step": 3420 }, { "epoch": 0.9085114858584518, "grad_norm": 1.081828404421451, "learning_rate": 1.232538004627655e-05, "loss": 0.2576507329940796, "step": 3421 }, { "epoch": 0.9087770548399947, "grad_norm": 1.0981308884589311, "learning_rate": 1.2321109419536292e-05, "loss": 0.2525216341018677, "step": 3422 }, { "epoch": 0.9090426238215377, "grad_norm": 1.0732531844020532, "learning_rate": 1.2316838345313517e-05, "loss": 0.2483336180448532, "step": 3423 }, { "epoch": 0.9093081928030806, "grad_norm": 1.1592146270526706, "learning_rate": 1.2312566824431631e-05, "loss": 0.26372796297073364, "step": 3424 }, { "epoch": 0.9095737617846236, "grad_norm": 1.1537675520237485, "learning_rate": 1.2308294857714138e-05, "loss": 0.2933644950389862, "step": 3425 }, { "epoch": 0.9098393307661665, "grad_norm": 1.0330883162146767, "learning_rate": 1.2304022445984618e-05, "loss": 0.2543371915817261, "step": 3426 }, { "epoch": 0.9101048997477095, "grad_norm": 1.1689002717846686, "learning_rate": 1.2299749590066745e-05, "loss": 0.29246431589126587, "step": 3427 }, { "epoch": 0.9103704687292524, "grad_norm": 1.0141798843769114, "learning_rate": 1.2295476290784273e-05, "loss": 0.2475431263446808, "step": 3428 }, { "epoch": 0.9106360377107954, "grad_norm": 1.1845034794986053, "learning_rate": 1.2291202548961042e-05, "loss": 0.3312363624572754, "step": 3429 }, { "epoch": 0.9109016066923383, "grad_norm": 1.0459618447051044, "learning_rate": 1.2286928365420987e-05, "loss": 0.25192639231681824, "step": 3430 }, { "epoch": 0.9111671756738813, "grad_norm": 1.2038671566275931, "learning_rate": 1.2282653740988114e-05, "loss": 0.23189345002174377, "step": 3431 }, { "epoch": 0.9114327446554242, "grad_norm": 1.17767221221897, "learning_rate": 1.2278378676486522e-05, "loss": 0.2888398766517639, "step": 3432 }, { "epoch": 0.9116983136369672, "grad_norm": 1.1295595703903276, "learning_rate": 1.2274103172740387e-05, "loss": 0.2857785224914551, "step": 3433 }, { "epoch": 0.9119638826185101, "grad_norm": 1.039533312390003, "learning_rate": 1.2269827230573986e-05, "loss": 0.23961025476455688, "step": 3434 }, { "epoch": 0.9122294516000531, "grad_norm": 1.1192521835175562, "learning_rate": 1.2265550850811663e-05, "loss": 0.2791004478931427, "step": 3435 }, { "epoch": 0.912495020581596, "grad_norm": 1.052040685054951, "learning_rate": 1.2261274034277858e-05, "loss": 0.2875480651855469, "step": 3436 }, { "epoch": 0.912760589563139, "grad_norm": 1.12188070500717, "learning_rate": 1.2256996781797086e-05, "loss": 0.29422929883003235, "step": 3437 }, { "epoch": 0.9130261585446819, "grad_norm": 1.2976046274469295, "learning_rate": 1.225271909419395e-05, "loss": 0.27114444971084595, "step": 3438 }, { "epoch": 0.9132917275262249, "grad_norm": 1.0684416452719028, "learning_rate": 1.2248440972293146e-05, "loss": 0.3007166385650635, "step": 3439 }, { "epoch": 0.9135572965077678, "grad_norm": 1.1408150577224654, "learning_rate": 1.224416241691944e-05, "loss": 0.28550055623054504, "step": 3440 }, { "epoch": 0.9138228654893108, "grad_norm": 1.1159473328967766, "learning_rate": 1.2239883428897687e-05, "loss": 0.2861761450767517, "step": 3441 }, { "epoch": 0.9140884344708538, "grad_norm": 1.1186358936011263, "learning_rate": 1.2235604009052823e-05, "loss": 0.3288506865501404, "step": 3442 }, { "epoch": 0.9143540034523968, "grad_norm": 1.2101661293343442, "learning_rate": 1.2231324158209876e-05, "loss": 0.33189019560813904, "step": 3443 }, { "epoch": 0.9146195724339398, "grad_norm": 0.9931883995236199, "learning_rate": 1.2227043877193947e-05, "loss": 0.20846885442733765, "step": 3444 }, { "epoch": 0.9148851414154827, "grad_norm": 0.9579263575635046, "learning_rate": 1.2222763166830223e-05, "loss": 0.25184741616249084, "step": 3445 }, { "epoch": 0.9151507103970257, "grad_norm": 1.0775642304955, "learning_rate": 1.2218482027943977e-05, "loss": 0.2954701781272888, "step": 3446 }, { "epoch": 0.9154162793785686, "grad_norm": 1.055908963813806, "learning_rate": 1.221420046136056e-05, "loss": 0.263336718082428, "step": 3447 }, { "epoch": 0.9156818483601116, "grad_norm": 1.2181481624195412, "learning_rate": 1.2209918467905405e-05, "loss": 0.31178128719329834, "step": 3448 }, { "epoch": 0.9159474173416545, "grad_norm": 1.1248939907914326, "learning_rate": 1.2205636048404037e-05, "loss": 0.30373090505599976, "step": 3449 }, { "epoch": 0.9162129863231975, "grad_norm": 1.1316476755108689, "learning_rate": 1.2201353203682052e-05, "loss": 0.31057459115982056, "step": 3450 }, { "epoch": 0.9164785553047404, "grad_norm": 1.0432699213656527, "learning_rate": 1.2197069934565126e-05, "loss": 0.26834744215011597, "step": 3451 }, { "epoch": 0.9167441242862834, "grad_norm": 1.0235490532622333, "learning_rate": 1.2192786241879033e-05, "loss": 0.30224066972732544, "step": 3452 }, { "epoch": 0.9170096932678263, "grad_norm": 1.1136690118430506, "learning_rate": 1.2188502126449616e-05, "loss": 0.28249508142471313, "step": 3453 }, { "epoch": 0.9172752622493693, "grad_norm": 1.0210144972314754, "learning_rate": 1.2184217589102798e-05, "loss": 0.24823793768882751, "step": 3454 }, { "epoch": 0.9175408312309122, "grad_norm": 1.1878687209379464, "learning_rate": 1.2179932630664589e-05, "loss": 0.32556289434432983, "step": 3455 }, { "epoch": 0.9178064002124552, "grad_norm": 1.0899520670240972, "learning_rate": 1.217564725196108e-05, "loss": 0.29420584440231323, "step": 3456 }, { "epoch": 0.9180719691939981, "grad_norm": 1.028247015068141, "learning_rate": 1.2171361453818437e-05, "loss": 0.29294469952583313, "step": 3457 }, { "epoch": 0.9183375381755411, "grad_norm": 1.0399893903415627, "learning_rate": 1.2167075237062918e-05, "loss": 0.3173823952674866, "step": 3458 }, { "epoch": 0.918603107157084, "grad_norm": 1.1571492956528482, "learning_rate": 1.2162788602520851e-05, "loss": 0.32950159907341003, "step": 3459 }, { "epoch": 0.918868676138627, "grad_norm": 1.0478118037587627, "learning_rate": 1.2158501551018647e-05, "loss": 0.3011544942855835, "step": 3460 }, { "epoch": 0.91913424512017, "grad_norm": 1.0135067760604335, "learning_rate": 1.2154214083382802e-05, "loss": 0.25775954127311707, "step": 3461 }, { "epoch": 0.9193998141017129, "grad_norm": 1.0514508898774713, "learning_rate": 1.214992620043989e-05, "loss": 0.286748468875885, "step": 3462 }, { "epoch": 0.9196653830832558, "grad_norm": 1.1050004366949897, "learning_rate": 1.214563790301656e-05, "loss": 0.30588221549987793, "step": 3463 }, { "epoch": 0.9199309520647988, "grad_norm": 1.0079666808538812, "learning_rate": 1.214134919193955e-05, "loss": 0.23506608605384827, "step": 3464 }, { "epoch": 0.9201965210463418, "grad_norm": 1.037364536446331, "learning_rate": 1.2137060068035672e-05, "loss": 0.2612350285053253, "step": 3465 }, { "epoch": 0.9204620900278847, "grad_norm": 1.0810309706979688, "learning_rate": 1.2132770532131815e-05, "loss": 0.3268318772315979, "step": 3466 }, { "epoch": 0.9207276590094277, "grad_norm": 1.0723394192428657, "learning_rate": 1.2128480585054951e-05, "loss": 0.2970179319381714, "step": 3467 }, { "epoch": 0.9209932279909706, "grad_norm": 1.0036147426745694, "learning_rate": 1.2124190227632138e-05, "loss": 0.2910206615924835, "step": 3468 }, { "epoch": 0.9212587969725136, "grad_norm": 1.1089890742219906, "learning_rate": 1.2119899460690496e-05, "loss": 0.3000222444534302, "step": 3469 }, { "epoch": 0.9215243659540565, "grad_norm": 1.1166450826016983, "learning_rate": 1.2115608285057242e-05, "loss": 0.30304765701293945, "step": 3470 }, { "epoch": 0.9217899349355996, "grad_norm": 0.9893826238823328, "learning_rate": 1.2111316701559663e-05, "loss": 0.26393038034439087, "step": 3471 }, { "epoch": 0.9220555039171425, "grad_norm": 1.1384217438340345, "learning_rate": 1.2107024711025128e-05, "loss": 0.3111063838005066, "step": 3472 }, { "epoch": 0.9223210728986855, "grad_norm": 0.9599961450252364, "learning_rate": 1.2102732314281073e-05, "loss": 0.2897321581840515, "step": 3473 }, { "epoch": 0.9225866418802284, "grad_norm": 1.1396280258666305, "learning_rate": 1.2098439512155028e-05, "loss": 0.2835896611213684, "step": 3474 }, { "epoch": 0.9228522108617714, "grad_norm": 1.0165194494005183, "learning_rate": 1.2094146305474596e-05, "loss": 0.27648821473121643, "step": 3475 }, { "epoch": 0.9231177798433143, "grad_norm": 1.1221504506656363, "learning_rate": 1.2089852695067457e-05, "loss": 0.2528097629547119, "step": 3476 }, { "epoch": 0.9233833488248573, "grad_norm": 1.1105562286202324, "learning_rate": 1.2085558681761361e-05, "loss": 0.2750067412853241, "step": 3477 }, { "epoch": 0.9236489178064002, "grad_norm": 1.1199967050670125, "learning_rate": 1.2081264266384148e-05, "loss": 0.3115938901901245, "step": 3478 }, { "epoch": 0.9239144867879432, "grad_norm": 1.1203071431737686, "learning_rate": 1.2076969449763734e-05, "loss": 0.2858419418334961, "step": 3479 }, { "epoch": 0.9241800557694861, "grad_norm": 1.051118385350032, "learning_rate": 1.2072674232728105e-05, "loss": 0.24990032613277435, "step": 3480 }, { "epoch": 0.9244456247510291, "grad_norm": 1.2991104394876676, "learning_rate": 1.206837861610533e-05, "loss": 0.23106999695301056, "step": 3481 }, { "epoch": 0.924711193732572, "grad_norm": 1.0396779513824141, "learning_rate": 1.2064082600723546e-05, "loss": 0.2737967371940613, "step": 3482 }, { "epoch": 0.924976762714115, "grad_norm": 1.1890061925781694, "learning_rate": 1.2059786187410984e-05, "loss": 0.2810317873954773, "step": 3483 }, { "epoch": 0.925242331695658, "grad_norm": 1.1358698893490913, "learning_rate": 1.2055489376995938e-05, "loss": 0.30852559208869934, "step": 3484 }, { "epoch": 0.9255079006772009, "grad_norm": 1.1003932874354148, "learning_rate": 1.2051192170306784e-05, "loss": 0.2956348657608032, "step": 3485 }, { "epoch": 0.9257734696587439, "grad_norm": 1.18261367067389, "learning_rate": 1.204689456817197e-05, "loss": 0.2825953960418701, "step": 3486 }, { "epoch": 0.9260390386402868, "grad_norm": 1.2502616697865143, "learning_rate": 1.2042596571420025e-05, "loss": 0.3351168632507324, "step": 3487 }, { "epoch": 0.9263046076218298, "grad_norm": 1.2354469073344645, "learning_rate": 1.2038298180879548e-05, "loss": 0.2718926668167114, "step": 3488 }, { "epoch": 0.9265701766033727, "grad_norm": 1.1387239259181285, "learning_rate": 1.2033999397379223e-05, "loss": 0.29036587476730347, "step": 3489 }, { "epoch": 0.9268357455849157, "grad_norm": 0.9499049433325992, "learning_rate": 1.2029700221747804e-05, "loss": 0.22917689383029938, "step": 3490 }, { "epoch": 0.9271013145664586, "grad_norm": 1.2322966399012754, "learning_rate": 1.2025400654814119e-05, "loss": 0.2963443398475647, "step": 3491 }, { "epoch": 0.9273668835480016, "grad_norm": 1.100231072465541, "learning_rate": 1.2021100697407075e-05, "loss": 0.2866464853286743, "step": 3492 }, { "epoch": 0.9276324525295445, "grad_norm": 1.1717529025248212, "learning_rate": 1.2016800350355654e-05, "loss": 0.3069216012954712, "step": 3493 }, { "epoch": 0.9278980215110875, "grad_norm": 1.0745448017128252, "learning_rate": 1.2012499614488913e-05, "loss": 0.27206870913505554, "step": 3494 }, { "epoch": 0.9281635904926304, "grad_norm": 1.0995365532444106, "learning_rate": 1.2008198490635978e-05, "loss": 0.32130372524261475, "step": 3495 }, { "epoch": 0.9284291594741734, "grad_norm": 1.151015013814654, "learning_rate": 1.2003896979626061e-05, "loss": 0.30631259083747864, "step": 3496 }, { "epoch": 0.9286947284557163, "grad_norm": 1.125856079122124, "learning_rate": 1.199959508228844e-05, "loss": 0.3005716800689697, "step": 3497 }, { "epoch": 0.9289602974372593, "grad_norm": 0.9983757548693274, "learning_rate": 1.1995292799452472e-05, "loss": 0.2381039410829544, "step": 3498 }, { "epoch": 0.9292258664188023, "grad_norm": 1.1338580261514946, "learning_rate": 1.1990990131947582e-05, "loss": 0.31764286756515503, "step": 3499 }, { "epoch": 0.9294914354003453, "grad_norm": 1.1445030838538803, "learning_rate": 1.1986687080603273e-05, "loss": 0.3029370903968811, "step": 3500 }, { "epoch": 0.9297570043818882, "grad_norm": 1.0814133109661386, "learning_rate": 1.198238364624913e-05, "loss": 0.30967646837234497, "step": 3501 }, { "epoch": 0.9300225733634312, "grad_norm": 1.0376796287878236, "learning_rate": 1.1978079829714799e-05, "loss": 0.24687506258487701, "step": 3502 }, { "epoch": 0.9302881423449741, "grad_norm": 1.0529899744692286, "learning_rate": 1.1973775631830007e-05, "loss": 0.25909408926963806, "step": 3503 }, { "epoch": 0.9305537113265171, "grad_norm": 1.1136411983367804, "learning_rate": 1.196947105342455e-05, "loss": 0.281025230884552, "step": 3504 }, { "epoch": 0.93081928030806, "grad_norm": 1.2858712177395888, "learning_rate": 1.1965166095328302e-05, "loss": 0.33401811122894287, "step": 3505 }, { "epoch": 0.931084849289603, "grad_norm": 0.9732764276792689, "learning_rate": 1.1960860758371208e-05, "loss": 0.25839388370513916, "step": 3506 }, { "epoch": 0.931350418271146, "grad_norm": 0.954364218435113, "learning_rate": 1.1956555043383286e-05, "loss": 0.23343560099601746, "step": 3507 }, { "epoch": 0.9316159872526889, "grad_norm": 1.176408931412559, "learning_rate": 1.1952248951194629e-05, "loss": 0.31106436252593994, "step": 3508 }, { "epoch": 0.9318815562342319, "grad_norm": 1.108418204277134, "learning_rate": 1.1947942482635395e-05, "loss": 0.29152095317840576, "step": 3509 }, { "epoch": 0.9321471252157748, "grad_norm": 1.2651732065185788, "learning_rate": 1.1943635638535827e-05, "loss": 0.31517675518989563, "step": 3510 }, { "epoch": 0.9324126941973178, "grad_norm": 1.2309480505410157, "learning_rate": 1.1939328419726231e-05, "loss": 0.33221137523651123, "step": 3511 }, { "epoch": 0.9326782631788607, "grad_norm": 1.2277892053470791, "learning_rate": 1.193502082703699e-05, "loss": 0.314359575510025, "step": 3512 }, { "epoch": 0.9329438321604037, "grad_norm": 1.129757464324541, "learning_rate": 1.1930712861298553e-05, "loss": 0.2879924178123474, "step": 3513 }, { "epoch": 0.9332094011419466, "grad_norm": 1.1622909402406336, "learning_rate": 1.1926404523341443e-05, "loss": 0.2732955515384674, "step": 3514 }, { "epoch": 0.9334749701234896, "grad_norm": 1.1586501434218468, "learning_rate": 1.1922095813996264e-05, "loss": 0.32156097888946533, "step": 3515 }, { "epoch": 0.9337405391050325, "grad_norm": 1.110486475282156, "learning_rate": 1.1917786734093682e-05, "loss": 0.2694319486618042, "step": 3516 }, { "epoch": 0.9340061080865755, "grad_norm": 1.0871387001943549, "learning_rate": 1.1913477284464434e-05, "loss": 0.3049655258655548, "step": 3517 }, { "epoch": 0.9342716770681184, "grad_norm": 1.0962864613999421, "learning_rate": 1.1909167465939334e-05, "loss": 0.30053725838661194, "step": 3518 }, { "epoch": 0.9345372460496614, "grad_norm": 1.0261517334123498, "learning_rate": 1.1904857279349265e-05, "loss": 0.2611788809299469, "step": 3519 }, { "epoch": 0.9348028150312043, "grad_norm": 1.1400957154071245, "learning_rate": 1.1900546725525175e-05, "loss": 0.28344646096229553, "step": 3520 }, { "epoch": 0.9350683840127473, "grad_norm": 1.067093022484818, "learning_rate": 1.1896235805298093e-05, "loss": 0.2504042685031891, "step": 3521 }, { "epoch": 0.9353339529942902, "grad_norm": 1.0534608212516616, "learning_rate": 1.1891924519499113e-05, "loss": 0.27877938747406006, "step": 3522 }, { "epoch": 0.9355995219758332, "grad_norm": 1.046331705593262, "learning_rate": 1.1887612868959394e-05, "loss": 0.28176525235176086, "step": 3523 }, { "epoch": 0.9358650909573761, "grad_norm": 1.1750063194789062, "learning_rate": 1.1883300854510178e-05, "loss": 0.32376354932785034, "step": 3524 }, { "epoch": 0.9361306599389191, "grad_norm": 1.0908366283033504, "learning_rate": 1.1878988476982772e-05, "loss": 0.2846054434776306, "step": 3525 }, { "epoch": 0.936396228920462, "grad_norm": 1.0507783491664777, "learning_rate": 1.1874675737208546e-05, "loss": 0.25711044669151306, "step": 3526 }, { "epoch": 0.9366617979020051, "grad_norm": 1.078360429057703, "learning_rate": 1.1870362636018946e-05, "loss": 0.2810837924480438, "step": 3527 }, { "epoch": 0.936927366883548, "grad_norm": 1.2088151262046463, "learning_rate": 1.186604917424549e-05, "loss": 0.3090322017669678, "step": 3528 }, { "epoch": 0.937192935865091, "grad_norm": 1.061646146170892, "learning_rate": 1.1861735352719763e-05, "loss": 0.2797972559928894, "step": 3529 }, { "epoch": 0.937458504846634, "grad_norm": 1.3937474116807773, "learning_rate": 1.1857421172273415e-05, "loss": 0.3124893605709076, "step": 3530 }, { "epoch": 0.9377240738281769, "grad_norm": 1.1043040217194096, "learning_rate": 1.1853106633738174e-05, "loss": 0.28317195177078247, "step": 3531 }, { "epoch": 0.9379896428097199, "grad_norm": 1.0483798154842934, "learning_rate": 1.1848791737945823e-05, "loss": 0.27804574370384216, "step": 3532 }, { "epoch": 0.9382552117912628, "grad_norm": 1.1007797171562173, "learning_rate": 1.1844476485728236e-05, "loss": 0.24936731159687042, "step": 3533 }, { "epoch": 0.9385207807728058, "grad_norm": 1.16922301793574, "learning_rate": 1.1840160877917335e-05, "loss": 0.296974778175354, "step": 3534 }, { "epoch": 0.9387863497543487, "grad_norm": 1.1172266681075624, "learning_rate": 1.1835844915345117e-05, "loss": 0.3048890233039856, "step": 3535 }, { "epoch": 0.9390519187358917, "grad_norm": 1.0372698095624082, "learning_rate": 1.1831528598843654e-05, "loss": 0.2703601121902466, "step": 3536 }, { "epoch": 0.9393174877174346, "grad_norm": 1.123009081238491, "learning_rate": 1.1827211929245075e-05, "loss": 0.30738013982772827, "step": 3537 }, { "epoch": 0.9395830566989776, "grad_norm": 1.0660333251952498, "learning_rate": 1.1822894907381589e-05, "loss": 0.26538529992103577, "step": 3538 }, { "epoch": 0.9398486256805205, "grad_norm": 1.1050453871275616, "learning_rate": 1.1818577534085462e-05, "loss": 0.26795464754104614, "step": 3539 }, { "epoch": 0.9401141946620635, "grad_norm": 1.1533311536850575, "learning_rate": 1.1814259810189034e-05, "loss": 0.30891868472099304, "step": 3540 }, { "epoch": 0.9403797636436064, "grad_norm": 1.8167204702159565, "learning_rate": 1.1809941736524713e-05, "loss": 0.29164037108421326, "step": 3541 }, { "epoch": 0.9406453326251494, "grad_norm": 1.0875424396631934, "learning_rate": 1.180562331392497e-05, "loss": 0.30322739481925964, "step": 3542 }, { "epoch": 0.9409109016066923, "grad_norm": 1.0765622649066557, "learning_rate": 1.1801304543222349e-05, "loss": 0.275432288646698, "step": 3543 }, { "epoch": 0.9411764705882353, "grad_norm": 1.1566847425916267, "learning_rate": 1.1796985425249459e-05, "loss": 0.2788141965866089, "step": 3544 }, { "epoch": 0.9414420395697782, "grad_norm": 1.203313197377309, "learning_rate": 1.1792665960838967e-05, "loss": 0.24254676699638367, "step": 3545 }, { "epoch": 0.9417076085513212, "grad_norm": 1.1050026210111878, "learning_rate": 1.1788346150823625e-05, "loss": 0.2803058326244354, "step": 3546 }, { "epoch": 0.9419731775328641, "grad_norm": 1.0993090963339842, "learning_rate": 1.1784025996036232e-05, "loss": 0.3068317174911499, "step": 3547 }, { "epoch": 0.9422387465144071, "grad_norm": 0.9977731134117688, "learning_rate": 1.1779705497309673e-05, "loss": 0.23124024271965027, "step": 3548 }, { "epoch": 0.94250431549595, "grad_norm": 1.080710306089679, "learning_rate": 1.177538465547688e-05, "loss": 0.2815462648868561, "step": 3549 }, { "epoch": 0.942769884477493, "grad_norm": 1.1118952137889662, "learning_rate": 1.1771063471370862e-05, "loss": 0.29448196291923523, "step": 3550 }, { "epoch": 0.9430354534590359, "grad_norm": 1.2691077751501818, "learning_rate": 1.1766741945824698e-05, "loss": 0.3176615834236145, "step": 3551 }, { "epoch": 0.9433010224405789, "grad_norm": 1.1390071879475103, "learning_rate": 1.1762420079671527e-05, "loss": 0.29126274585723877, "step": 3552 }, { "epoch": 0.9435665914221218, "grad_norm": 1.084504171285626, "learning_rate": 1.1758097873744547e-05, "loss": 0.27074337005615234, "step": 3553 }, { "epoch": 0.9438321604036648, "grad_norm": 1.0495499557301764, "learning_rate": 1.175377532887703e-05, "loss": 0.2756083011627197, "step": 3554 }, { "epoch": 0.9440977293852079, "grad_norm": 1.1028881447166687, "learning_rate": 1.1749452445902315e-05, "loss": 0.26918384432792664, "step": 3555 }, { "epoch": 0.9443632983667508, "grad_norm": 1.0856468025535497, "learning_rate": 1.17451292256538e-05, "loss": 0.2550349235534668, "step": 3556 }, { "epoch": 0.9446288673482938, "grad_norm": 1.0791996633460945, "learning_rate": 1.1740805668964954e-05, "loss": 0.2601481080055237, "step": 3557 }, { "epoch": 0.9448944363298367, "grad_norm": 1.1367109564667788, "learning_rate": 1.1736481776669307e-05, "loss": 0.2848352789878845, "step": 3558 }, { "epoch": 0.9451600053113797, "grad_norm": 1.1168278064757895, "learning_rate": 1.173215754960045e-05, "loss": 0.266584575176239, "step": 3559 }, { "epoch": 0.9454255742929226, "grad_norm": 0.9979692557530664, "learning_rate": 1.172783298859205e-05, "loss": 0.25037410855293274, "step": 3560 }, { "epoch": 0.9456911432744656, "grad_norm": 1.1049326363207628, "learning_rate": 1.1723508094477825e-05, "loss": 0.30239278078079224, "step": 3561 }, { "epoch": 0.9459567122560085, "grad_norm": 1.0413977608943958, "learning_rate": 1.1719182868091567e-05, "loss": 0.2893553078174591, "step": 3562 }, { "epoch": 0.9462222812375515, "grad_norm": 1.215187947788902, "learning_rate": 1.1714857310267124e-05, "loss": 0.2840202748775482, "step": 3563 }, { "epoch": 0.9464878502190944, "grad_norm": 1.0615180068139964, "learning_rate": 1.1710531421838422e-05, "loss": 0.2614031732082367, "step": 3564 }, { "epoch": 0.9467534192006374, "grad_norm": 1.0290230331800772, "learning_rate": 1.1706205203639433e-05, "loss": 0.267095148563385, "step": 3565 }, { "epoch": 0.9470189881821803, "grad_norm": 1.2397291626994196, "learning_rate": 1.1701878656504206e-05, "loss": 0.25835227966308594, "step": 3566 }, { "epoch": 0.9472845571637233, "grad_norm": 1.1319162410146095, "learning_rate": 1.1697551781266845e-05, "loss": 0.27547580003738403, "step": 3567 }, { "epoch": 0.9475501261452662, "grad_norm": 1.089656044815204, "learning_rate": 1.169322457876152e-05, "loss": 0.251165509223938, "step": 3568 }, { "epoch": 0.9478156951268092, "grad_norm": 1.2350323802819905, "learning_rate": 1.1688897049822467e-05, "loss": 0.2738516926765442, "step": 3569 }, { "epoch": 0.9480812641083521, "grad_norm": 1.0315369616879289, "learning_rate": 1.1684569195283981e-05, "loss": 0.2745274305343628, "step": 3570 }, { "epoch": 0.9483468330898951, "grad_norm": 1.180099592022995, "learning_rate": 1.1680241015980423e-05, "loss": 0.28586819767951965, "step": 3571 }, { "epoch": 0.948612402071438, "grad_norm": 1.2233918967574897, "learning_rate": 1.167591251274621e-05, "loss": 0.2559577524662018, "step": 3572 }, { "epoch": 0.948877971052981, "grad_norm": 1.155824963337958, "learning_rate": 1.1671583686415833e-05, "loss": 0.26069143414497375, "step": 3573 }, { "epoch": 0.949143540034524, "grad_norm": 1.078529730225554, "learning_rate": 1.1667254537823838e-05, "loss": 0.26866453886032104, "step": 3574 }, { "epoch": 0.9494091090160669, "grad_norm": 1.0772599867154102, "learning_rate": 1.166292506780483e-05, "loss": 0.25285348296165466, "step": 3575 }, { "epoch": 0.9496746779976099, "grad_norm": 1.1335172942215501, "learning_rate": 1.1658595277193479e-05, "loss": 0.3330434262752533, "step": 3576 }, { "epoch": 0.9499402469791528, "grad_norm": 1.076438251163932, "learning_rate": 1.1654265166824522e-05, "loss": 0.2789473533630371, "step": 3577 }, { "epoch": 0.9502058159606958, "grad_norm": 1.2746037306212283, "learning_rate": 1.164993473753275e-05, "loss": 0.30984824895858765, "step": 3578 }, { "epoch": 0.9504713849422387, "grad_norm": 1.0517088315750878, "learning_rate": 1.164560399015302e-05, "loss": 0.23881833255290985, "step": 3579 }, { "epoch": 0.9507369539237817, "grad_norm": 1.1012484750770577, "learning_rate": 1.164127292552025e-05, "loss": 0.3027937114238739, "step": 3580 }, { "epoch": 0.9510025229053246, "grad_norm": 1.1998484228117954, "learning_rate": 1.1636941544469413e-05, "loss": 0.2901906371116638, "step": 3581 }, { "epoch": 0.9512680918868676, "grad_norm": 1.069491787313744, "learning_rate": 1.1632609847835556e-05, "loss": 0.28961148858070374, "step": 3582 }, { "epoch": 0.9515336608684106, "grad_norm": 1.0782542825887276, "learning_rate": 1.1628277836453774e-05, "loss": 0.2730783224105835, "step": 3583 }, { "epoch": 0.9517992298499536, "grad_norm": 1.0952017771476839, "learning_rate": 1.1623945511159232e-05, "loss": 0.3195485770702362, "step": 3584 }, { "epoch": 0.9520647988314965, "grad_norm": 1.1514370971708257, "learning_rate": 1.1619612872787144e-05, "loss": 0.3097516894340515, "step": 3585 }, { "epoch": 0.9523303678130395, "grad_norm": 1.0422990071728377, "learning_rate": 1.1615279922172796e-05, "loss": 0.2716284692287445, "step": 3586 }, { "epoch": 0.9525959367945824, "grad_norm": 0.9669355988334725, "learning_rate": 1.1610946660151531e-05, "loss": 0.2601209878921509, "step": 3587 }, { "epoch": 0.9528615057761254, "grad_norm": 1.1027425019898653, "learning_rate": 1.1606613087558748e-05, "loss": 0.28665289282798767, "step": 3588 }, { "epoch": 0.9531270747576683, "grad_norm": 1.082078861677668, "learning_rate": 1.1602279205229912e-05, "loss": 0.3019893765449524, "step": 3589 }, { "epoch": 0.9533926437392113, "grad_norm": 0.9778282797717269, "learning_rate": 1.1597945014000537e-05, "loss": 0.2635146677494049, "step": 3590 }, { "epoch": 0.9536582127207542, "grad_norm": 1.0527782897227813, "learning_rate": 1.1593610514706217e-05, "loss": 0.2704858183860779, "step": 3591 }, { "epoch": 0.9539237817022972, "grad_norm": 1.2295509988273574, "learning_rate": 1.1589275708182581e-05, "loss": 0.31997931003570557, "step": 3592 }, { "epoch": 0.9541893506838401, "grad_norm": 1.1529907760165448, "learning_rate": 1.1584940595265332e-05, "loss": 0.2308788150548935, "step": 3593 }, { "epoch": 0.9544549196653831, "grad_norm": 1.0980235303762964, "learning_rate": 1.1580605176790229e-05, "loss": 0.28886470198631287, "step": 3594 }, { "epoch": 0.954720488646926, "grad_norm": 1.313883667721807, "learning_rate": 1.157626945359309e-05, "loss": 0.30698686838150024, "step": 3595 }, { "epoch": 0.954986057628469, "grad_norm": 1.1087251273709688, "learning_rate": 1.1571933426509789e-05, "loss": 0.27475905418395996, "step": 3596 }, { "epoch": 0.955251626610012, "grad_norm": 1.1064883207545173, "learning_rate": 1.1567597096376264e-05, "loss": 0.2568071484565735, "step": 3597 }, { "epoch": 0.9555171955915549, "grad_norm": 1.28706485993144, "learning_rate": 1.1563260464028507e-05, "loss": 0.2574060261249542, "step": 3598 }, { "epoch": 0.9557827645730979, "grad_norm": 1.193494963897618, "learning_rate": 1.1558923530302571e-05, "loss": 0.2847997546195984, "step": 3599 }, { "epoch": 0.9560483335546408, "grad_norm": 1.0723094070831873, "learning_rate": 1.155458629603456e-05, "loss": 0.2594734728336334, "step": 3600 }, { "epoch": 0.9563139025361838, "grad_norm": 1.0020160427681732, "learning_rate": 1.155024876206065e-05, "loss": 0.2300589680671692, "step": 3601 }, { "epoch": 0.9565794715177267, "grad_norm": 1.1475438454718678, "learning_rate": 1.1545910929217059e-05, "loss": 0.29174795746803284, "step": 3602 }, { "epoch": 0.9568450404992697, "grad_norm": 1.0425930414114217, "learning_rate": 1.1541572798340076e-05, "loss": 0.2666400074958801, "step": 3603 }, { "epoch": 0.9571106094808126, "grad_norm": 1.0067559469755134, "learning_rate": 1.1537234370266035e-05, "loss": 0.24651308357715607, "step": 3604 }, { "epoch": 0.9573761784623556, "grad_norm": 1.1542471481522265, "learning_rate": 1.1532895645831339e-05, "loss": 0.29991376399993896, "step": 3605 }, { "epoch": 0.9576417474438985, "grad_norm": 1.0631305192934537, "learning_rate": 1.1528556625872443e-05, "loss": 0.27713578939437866, "step": 3606 }, { "epoch": 0.9579073164254415, "grad_norm": 1.0497999275546905, "learning_rate": 1.1524217311225857e-05, "loss": 0.26503294706344604, "step": 3607 }, { "epoch": 0.9581728854069844, "grad_norm": 1.1479000180189152, "learning_rate": 1.1519877702728149e-05, "loss": 0.28627675771713257, "step": 3608 }, { "epoch": 0.9584384543885274, "grad_norm": 1.0333891142616893, "learning_rate": 1.1515537801215944e-05, "loss": 0.26862916350364685, "step": 3609 }, { "epoch": 0.9587040233700703, "grad_norm": 1.2518522451268181, "learning_rate": 1.1511197607525926e-05, "loss": 0.29697147011756897, "step": 3610 }, { "epoch": 0.9589695923516134, "grad_norm": 1.0668919106736792, "learning_rate": 1.1506857122494832e-05, "loss": 0.2980155944824219, "step": 3611 }, { "epoch": 0.9592351613331563, "grad_norm": 1.1016644329026075, "learning_rate": 1.1502516346959458e-05, "loss": 0.2847440838813782, "step": 3612 }, { "epoch": 0.9595007303146993, "grad_norm": 1.1131533712076647, "learning_rate": 1.149817528175665e-05, "loss": 0.2812016010284424, "step": 3613 }, { "epoch": 0.9597662992962422, "grad_norm": 1.0387818826049915, "learning_rate": 1.1493833927723319e-05, "loss": 0.26856982707977295, "step": 3614 }, { "epoch": 0.9600318682777852, "grad_norm": 1.0595715138301371, "learning_rate": 1.1489492285696424e-05, "loss": 0.2651693820953369, "step": 3615 }, { "epoch": 0.9602974372593281, "grad_norm": 1.1384265947297394, "learning_rate": 1.1485150356512986e-05, "loss": 0.29811644554138184, "step": 3616 }, { "epoch": 0.9605630062408711, "grad_norm": 1.0449713925688802, "learning_rate": 1.1480808141010071e-05, "loss": 0.2622855007648468, "step": 3617 }, { "epoch": 0.960828575222414, "grad_norm": 1.1964334046740135, "learning_rate": 1.1476465640024814e-05, "loss": 0.3067246377468109, "step": 3618 }, { "epoch": 0.961094144203957, "grad_norm": 1.0999678942020576, "learning_rate": 1.1472122854394394e-05, "loss": 0.25928011536598206, "step": 3619 }, { "epoch": 0.9613597131855, "grad_norm": 1.0356853160291564, "learning_rate": 1.146777978495605e-05, "loss": 0.2574170231819153, "step": 3620 }, { "epoch": 0.9616252821670429, "grad_norm": 1.1366453776894136, "learning_rate": 1.1463436432547073e-05, "loss": 0.2845388650894165, "step": 3621 }, { "epoch": 0.9618908511485859, "grad_norm": 1.1067131961561003, "learning_rate": 1.145909279800481e-05, "loss": 0.28735876083374023, "step": 3622 }, { "epoch": 0.9621564201301288, "grad_norm": 1.100639151702203, "learning_rate": 1.1454748882166666e-05, "loss": 0.25739723443984985, "step": 3623 }, { "epoch": 0.9624219891116718, "grad_norm": 1.0743852778260963, "learning_rate": 1.1450404685870098e-05, "loss": 0.25144338607788086, "step": 3624 }, { "epoch": 0.9626875580932147, "grad_norm": 1.0451944769292063, "learning_rate": 1.144606020995261e-05, "loss": 0.23981891572475433, "step": 3625 }, { "epoch": 0.9629531270747577, "grad_norm": 1.1215387475511582, "learning_rate": 1.1441715455251764e-05, "loss": 0.30925339460372925, "step": 3626 }, { "epoch": 0.9632186960563006, "grad_norm": 1.1193965021491372, "learning_rate": 1.1437370422605184e-05, "loss": 0.2559184432029724, "step": 3627 }, { "epoch": 0.9634842650378436, "grad_norm": 1.221260182162867, "learning_rate": 1.1433025112850542e-05, "loss": 0.3001229166984558, "step": 3628 }, { "epoch": 0.9637498340193865, "grad_norm": 0.9957913669659347, "learning_rate": 1.1428679526825557e-05, "loss": 0.24304218590259552, "step": 3629 }, { "epoch": 0.9640154030009295, "grad_norm": 1.0405086595778643, "learning_rate": 1.1424333665368011e-05, "loss": 0.25677186250686646, "step": 3630 }, { "epoch": 0.9642809719824724, "grad_norm": 1.0362119568252992, "learning_rate": 1.141998752931573e-05, "loss": 0.2589085102081299, "step": 3631 }, { "epoch": 0.9645465409640154, "grad_norm": 1.1004952842028541, "learning_rate": 1.1415641119506601e-05, "loss": 0.2588059604167938, "step": 3632 }, { "epoch": 0.9648121099455583, "grad_norm": 1.1379378571012249, "learning_rate": 1.1411294436778562e-05, "loss": 0.26097869873046875, "step": 3633 }, { "epoch": 0.9650776789271013, "grad_norm": 1.2218308438631786, "learning_rate": 1.1406947481969598e-05, "loss": 0.26022520661354065, "step": 3634 }, { "epoch": 0.9653432479086442, "grad_norm": 1.0737420773814035, "learning_rate": 1.140260025591775e-05, "loss": 0.26242876052856445, "step": 3635 }, { "epoch": 0.9656088168901872, "grad_norm": 1.1396910340144906, "learning_rate": 1.1398252759461119e-05, "loss": 0.30035555362701416, "step": 3636 }, { "epoch": 0.9658743858717301, "grad_norm": 1.1365210980452296, "learning_rate": 1.1393904993437848e-05, "loss": 0.26388341188430786, "step": 3637 }, { "epoch": 0.9661399548532731, "grad_norm": 1.06242333907382, "learning_rate": 1.1389556958686132e-05, "loss": 0.28116434812545776, "step": 3638 }, { "epoch": 0.966405523834816, "grad_norm": 1.0513966621960738, "learning_rate": 1.1385208656044222e-05, "loss": 0.25372493267059326, "step": 3639 }, { "epoch": 0.9666710928163591, "grad_norm": 1.1171784181414381, "learning_rate": 1.1380860086350422e-05, "loss": 0.2648317813873291, "step": 3640 }, { "epoch": 0.966936661797902, "grad_norm": 1.0508956007113521, "learning_rate": 1.1376511250443082e-05, "loss": 0.26981276273727417, "step": 3641 }, { "epoch": 0.967202230779445, "grad_norm": 1.1513465918880585, "learning_rate": 1.1372162149160608e-05, "loss": 0.2934207618236542, "step": 3642 }, { "epoch": 0.967467799760988, "grad_norm": 0.9705407845284122, "learning_rate": 1.1367812783341454e-05, "loss": 0.24250900745391846, "step": 3643 }, { "epoch": 0.9677333687425309, "grad_norm": 1.0409007473472116, "learning_rate": 1.1363463153824125e-05, "loss": 0.2565772235393524, "step": 3644 }, { "epoch": 0.9679989377240739, "grad_norm": 1.2386980142351325, "learning_rate": 1.1359113261447183e-05, "loss": 0.28407829999923706, "step": 3645 }, { "epoch": 0.9682645067056168, "grad_norm": 1.1134220293120092, "learning_rate": 1.1354763107049234e-05, "loss": 0.2974489629268646, "step": 3646 }, { "epoch": 0.9685300756871598, "grad_norm": 1.1611486704366027, "learning_rate": 1.1350412691468935e-05, "loss": 0.27539899945259094, "step": 3647 }, { "epoch": 0.9687956446687027, "grad_norm": 1.1777496863563888, "learning_rate": 1.1346062015544997e-05, "loss": 0.28256523609161377, "step": 3648 }, { "epoch": 0.9690612136502457, "grad_norm": 1.0910813538672366, "learning_rate": 1.1341711080116176e-05, "loss": 0.27582883834838867, "step": 3649 }, { "epoch": 0.9693267826317886, "grad_norm": 1.2299419127493794, "learning_rate": 1.1337359886021285e-05, "loss": 0.3199389576911926, "step": 3650 }, { "epoch": 0.9695923516133316, "grad_norm": 1.078226808322517, "learning_rate": 1.1333008434099178e-05, "loss": 0.2922326922416687, "step": 3651 }, { "epoch": 0.9698579205948745, "grad_norm": 1.1833154338367669, "learning_rate": 1.1328656725188767e-05, "loss": 0.285635381937027, "step": 3652 }, { "epoch": 0.9701234895764175, "grad_norm": 1.1606724829825772, "learning_rate": 1.1324304760129009e-05, "loss": 0.3347492814064026, "step": 3653 }, { "epoch": 0.9703890585579604, "grad_norm": 1.1079831575977723, "learning_rate": 1.1319952539758912e-05, "loss": 0.27379873394966125, "step": 3654 }, { "epoch": 0.9706546275395034, "grad_norm": 1.2487680540467303, "learning_rate": 1.1315600064917534e-05, "loss": 0.27911311388015747, "step": 3655 }, { "epoch": 0.9709201965210463, "grad_norm": 1.187492816658345, "learning_rate": 1.1311247336443982e-05, "loss": 0.25750118494033813, "step": 3656 }, { "epoch": 0.9711857655025893, "grad_norm": 1.1010343448161526, "learning_rate": 1.1306894355177405e-05, "loss": 0.28723078966140747, "step": 3657 }, { "epoch": 0.9714513344841322, "grad_norm": 1.0378840795289885, "learning_rate": 1.1302541121957008e-05, "loss": 0.25269389152526855, "step": 3658 }, { "epoch": 0.9717169034656752, "grad_norm": 1.1923604766845932, "learning_rate": 1.1298187637622046e-05, "loss": 0.3041607439517975, "step": 3659 }, { "epoch": 0.9719824724472181, "grad_norm": 1.0812687625707742, "learning_rate": 1.1293833903011819e-05, "loss": 0.2826605439186096, "step": 3660 }, { "epoch": 0.9722480414287611, "grad_norm": 1.1010565715724137, "learning_rate": 1.1289479918965675e-05, "loss": 0.2830520570278168, "step": 3661 }, { "epoch": 0.972513610410304, "grad_norm": 1.0160541896764337, "learning_rate": 1.1285125686323011e-05, "loss": 0.24295952916145325, "step": 3662 }, { "epoch": 0.972779179391847, "grad_norm": 1.108181435484162, "learning_rate": 1.1280771205923269e-05, "loss": 0.28775808215141296, "step": 3663 }, { "epoch": 0.97304474837339, "grad_norm": 0.9715417125511246, "learning_rate": 1.127641647860595e-05, "loss": 0.24650296568870544, "step": 3664 }, { "epoch": 0.9733103173549329, "grad_norm": 0.9305293200248026, "learning_rate": 1.1272061505210584e-05, "loss": 0.22344040870666504, "step": 3665 }, { "epoch": 0.9735758863364758, "grad_norm": 1.0859092127038839, "learning_rate": 1.1267706286576759e-05, "loss": 0.26920852065086365, "step": 3666 }, { "epoch": 0.9738414553180188, "grad_norm": 1.1792674236289236, "learning_rate": 1.1263350823544115e-05, "loss": 0.27615875005722046, "step": 3667 }, { "epoch": 0.9741070242995619, "grad_norm": 1.0470064037587914, "learning_rate": 1.1258995116952334e-05, "loss": 0.2768712043762207, "step": 3668 }, { "epoch": 0.9743725932811048, "grad_norm": 1.0568329464095596, "learning_rate": 1.1254639167641141e-05, "loss": 0.27764153480529785, "step": 3669 }, { "epoch": 0.9746381622626478, "grad_norm": 1.139437307258024, "learning_rate": 1.1250282976450316e-05, "loss": 0.27423611283302307, "step": 3670 }, { "epoch": 0.9749037312441907, "grad_norm": 1.1238013222894891, "learning_rate": 1.1245926544219676e-05, "loss": 0.2626228332519531, "step": 3671 }, { "epoch": 0.9751693002257337, "grad_norm": 1.2807555997920204, "learning_rate": 1.1241569871789096e-05, "loss": 0.25524014234542847, "step": 3672 }, { "epoch": 0.9754348692072766, "grad_norm": 1.1042234540757712, "learning_rate": 1.1237212959998485e-05, "loss": 0.30857735872268677, "step": 3673 }, { "epoch": 0.9757004381888196, "grad_norm": 1.0235359310129009, "learning_rate": 1.1232855809687807e-05, "loss": 0.25099021196365356, "step": 3674 }, { "epoch": 0.9759660071703625, "grad_norm": 1.0116202981123898, "learning_rate": 1.1228498421697068e-05, "loss": 0.22664576768875122, "step": 3675 }, { "epoch": 0.9762315761519055, "grad_norm": 1.151038777130998, "learning_rate": 1.1224140796866322e-05, "loss": 0.24727366864681244, "step": 3676 }, { "epoch": 0.9764971451334484, "grad_norm": 1.160849411640656, "learning_rate": 1.121978293603567e-05, "loss": 0.2561935782432556, "step": 3677 }, { "epoch": 0.9767627141149914, "grad_norm": 1.10648815955184, "learning_rate": 1.1215424840045254e-05, "loss": 0.2594214677810669, "step": 3678 }, { "epoch": 0.9770282830965343, "grad_norm": 1.130419852826836, "learning_rate": 1.1211066509735265e-05, "loss": 0.2383778691291809, "step": 3679 }, { "epoch": 0.9772938520780773, "grad_norm": 1.2393377504128167, "learning_rate": 1.1206707945945934e-05, "loss": 0.2864387035369873, "step": 3680 }, { "epoch": 0.9775594210596202, "grad_norm": 1.2012269867709167, "learning_rate": 1.1202349149517541e-05, "loss": 0.30415672063827515, "step": 3681 }, { "epoch": 0.9778249900411632, "grad_norm": 1.1590063847406842, "learning_rate": 1.1197990121290415e-05, "loss": 0.3030807375907898, "step": 3682 }, { "epoch": 0.9780905590227061, "grad_norm": 1.1251124481371277, "learning_rate": 1.1193630862104922e-05, "loss": 0.2518938481807709, "step": 3683 }, { "epoch": 0.9783561280042491, "grad_norm": 1.2096921428918863, "learning_rate": 1.1189271372801474e-05, "loss": 0.25353187322616577, "step": 3684 }, { "epoch": 0.978621696985792, "grad_norm": 1.401372369430627, "learning_rate": 1.1184911654220534e-05, "loss": 0.30639684200286865, "step": 3685 }, { "epoch": 0.978887265967335, "grad_norm": 1.1636733460077495, "learning_rate": 1.1180551707202602e-05, "loss": 0.295099139213562, "step": 3686 }, { "epoch": 0.979152834948878, "grad_norm": 1.0596592048702305, "learning_rate": 1.1176191532588224e-05, "loss": 0.2428167164325714, "step": 3687 }, { "epoch": 0.9794184039304209, "grad_norm": 1.0401088292404943, "learning_rate": 1.1171831131217989e-05, "loss": 0.2716362774372101, "step": 3688 }, { "epoch": 0.9796839729119639, "grad_norm": 1.1130709970940986, "learning_rate": 1.1167470503932534e-05, "loss": 0.28350287675857544, "step": 3689 }, { "epoch": 0.9799495418935068, "grad_norm": 1.0214004744947676, "learning_rate": 1.1163109651572535e-05, "loss": 0.2776945233345032, "step": 3690 }, { "epoch": 0.9802151108750498, "grad_norm": 1.041237294346951, "learning_rate": 1.115874857497871e-05, "loss": 0.2712942063808441, "step": 3691 }, { "epoch": 0.9804806798565927, "grad_norm": 1.058232702389033, "learning_rate": 1.1154387274991829e-05, "loss": 0.2530008852481842, "step": 3692 }, { "epoch": 0.9807462488381357, "grad_norm": 1.0327043619893976, "learning_rate": 1.1150025752452693e-05, "loss": 0.24889500439167023, "step": 3693 }, { "epoch": 0.9810118178196786, "grad_norm": 1.1013842404358833, "learning_rate": 1.1145664008202158e-05, "loss": 0.3051255941390991, "step": 3694 }, { "epoch": 0.9812773868012216, "grad_norm": 1.0503003262830894, "learning_rate": 1.1141302043081112e-05, "loss": 0.24781765043735504, "step": 3695 }, { "epoch": 0.9815429557827646, "grad_norm": 1.2510153019418302, "learning_rate": 1.1136939857930497e-05, "loss": 0.3021858036518097, "step": 3696 }, { "epoch": 0.9818085247643076, "grad_norm": 1.1052947984569603, "learning_rate": 1.1132577453591284e-05, "loss": 0.3026372194290161, "step": 3697 }, { "epoch": 0.9820740937458505, "grad_norm": 1.2367828155450835, "learning_rate": 1.1128214830904494e-05, "loss": 0.31511861085891724, "step": 3698 }, { "epoch": 0.9823396627273935, "grad_norm": 1.076549494496895, "learning_rate": 1.112385199071119e-05, "loss": 0.27885258197784424, "step": 3699 }, { "epoch": 0.9826052317089364, "grad_norm": 1.0546536629749794, "learning_rate": 1.1119488933852477e-05, "loss": 0.2724893391132355, "step": 3700 }, { "epoch": 0.9828708006904794, "grad_norm": 1.0683428715266594, "learning_rate": 1.1115125661169503e-05, "loss": 0.2836218774318695, "step": 3701 }, { "epoch": 0.9831363696720223, "grad_norm": 1.1039385208642913, "learning_rate": 1.111076217350345e-05, "loss": 0.24220457673072815, "step": 3702 }, { "epoch": 0.9834019386535653, "grad_norm": 1.1586770288767172, "learning_rate": 1.1106398471695554e-05, "loss": 0.28599557280540466, "step": 3703 }, { "epoch": 0.9836675076351082, "grad_norm": 1.0806945340822165, "learning_rate": 1.110203455658708e-05, "loss": 0.30559849739074707, "step": 3704 }, { "epoch": 0.9839330766166512, "grad_norm": 1.0573640293446354, "learning_rate": 1.109767042901934e-05, "loss": 0.2763117551803589, "step": 3705 }, { "epoch": 0.9841986455981941, "grad_norm": 0.9563131800944344, "learning_rate": 1.109330608983369e-05, "loss": 0.2028101086616516, "step": 3706 }, { "epoch": 0.9844642145797371, "grad_norm": 0.9787835815750591, "learning_rate": 1.1088941539871515e-05, "loss": 0.25386112928390503, "step": 3707 }, { "epoch": 0.98472978356128, "grad_norm": 1.075996733851366, "learning_rate": 1.1084576779974257e-05, "loss": 0.2588289976119995, "step": 3708 }, { "epoch": 0.984995352542823, "grad_norm": 1.3003014971272602, "learning_rate": 1.1080211810983385e-05, "loss": 0.3201071321964264, "step": 3709 }, { "epoch": 0.985260921524366, "grad_norm": 1.2030478206249715, "learning_rate": 1.107584663374042e-05, "loss": 0.28439003229141235, "step": 3710 }, { "epoch": 0.9855264905059089, "grad_norm": 1.060347062251152, "learning_rate": 1.1071481249086908e-05, "loss": 0.2734091579914093, "step": 3711 }, { "epoch": 0.9857920594874519, "grad_norm": 1.2115603819692051, "learning_rate": 1.1067115657864451e-05, "loss": 0.2917581796646118, "step": 3712 }, { "epoch": 0.9860576284689948, "grad_norm": 1.2063997459644484, "learning_rate": 1.1062749860914681e-05, "loss": 0.3569914996623993, "step": 3713 }, { "epoch": 0.9863231974505378, "grad_norm": 1.127711451799425, "learning_rate": 1.1058383859079271e-05, "loss": 0.2574514150619507, "step": 3714 }, { "epoch": 0.9865887664320807, "grad_norm": 1.119813552337215, "learning_rate": 1.1054017653199936e-05, "loss": 0.3035826086997986, "step": 3715 }, { "epoch": 0.9868543354136237, "grad_norm": 1.5863085854725767, "learning_rate": 1.1049651244118424e-05, "loss": 0.28067824244499207, "step": 3716 }, { "epoch": 0.9871199043951666, "grad_norm": 1.0916600834300794, "learning_rate": 1.1045284632676535e-05, "loss": 0.2511579394340515, "step": 3717 }, { "epoch": 0.9873854733767096, "grad_norm": 1.2657546371764674, "learning_rate": 1.1040917819716097e-05, "loss": 0.3059889078140259, "step": 3718 }, { "epoch": 0.9876510423582525, "grad_norm": 1.1224253435238671, "learning_rate": 1.103655080607898e-05, "loss": 0.2642200291156769, "step": 3719 }, { "epoch": 0.9879166113397955, "grad_norm": 1.0969568004465404, "learning_rate": 1.1032183592607094e-05, "loss": 0.2743483781814575, "step": 3720 }, { "epoch": 0.9881821803213384, "grad_norm": 1.1317768374698567, "learning_rate": 1.1027816180142383e-05, "loss": 0.2597433030605316, "step": 3721 }, { "epoch": 0.9884477493028814, "grad_norm": 1.0759312888673545, "learning_rate": 1.1023448569526834e-05, "loss": 0.24439337849617004, "step": 3722 }, { "epoch": 0.9887133182844243, "grad_norm": 1.0386429343076329, "learning_rate": 1.1019080761602473e-05, "loss": 0.2520195245742798, "step": 3723 }, { "epoch": 0.9889788872659674, "grad_norm": 1.0921837996926786, "learning_rate": 1.1014712757211359e-05, "loss": 0.2904737889766693, "step": 3724 }, { "epoch": 0.9892444562475103, "grad_norm": 1.12008182824954, "learning_rate": 1.1010344557195588e-05, "loss": 0.28096869587898254, "step": 3725 }, { "epoch": 0.9895100252290533, "grad_norm": 1.8392230806075218, "learning_rate": 1.1005976162397309e-05, "loss": 0.317839652299881, "step": 3726 }, { "epoch": 0.9897755942105962, "grad_norm": 1.19381185696067, "learning_rate": 1.100160757365869e-05, "loss": 0.29213201999664307, "step": 3727 }, { "epoch": 0.9900411631921392, "grad_norm": 1.215113877896921, "learning_rate": 1.0997238791821943e-05, "loss": 0.27034991979599, "step": 3728 }, { "epoch": 0.9903067321736821, "grad_norm": 1.2893524723691567, "learning_rate": 1.0992869817729317e-05, "loss": 0.30504971742630005, "step": 3729 }, { "epoch": 0.9905723011552251, "grad_norm": 1.109889585740049, "learning_rate": 1.09885006522231e-05, "loss": 0.30673110485076904, "step": 3730 }, { "epoch": 0.990837870136768, "grad_norm": 1.0963153712692437, "learning_rate": 1.0984131296145616e-05, "loss": 0.27990686893463135, "step": 3731 }, { "epoch": 0.991103439118311, "grad_norm": 1.0228240366531471, "learning_rate": 1.0979761750339225e-05, "loss": 0.24379019439220428, "step": 3732 }, { "epoch": 0.991369008099854, "grad_norm": 1.1055702239918885, "learning_rate": 1.0975392015646323e-05, "loss": 0.30554595589637756, "step": 3733 }, { "epoch": 0.9916345770813969, "grad_norm": 1.062606047652276, "learning_rate": 1.0971022092909342e-05, "loss": 0.245269775390625, "step": 3734 }, { "epoch": 0.9919001460629399, "grad_norm": 1.0977829197687445, "learning_rate": 1.0966651982970757e-05, "loss": 0.2732948064804077, "step": 3735 }, { "epoch": 0.9921657150444828, "grad_norm": 0.992060831416128, "learning_rate": 1.0962281686673071e-05, "loss": 0.25989004969596863, "step": 3736 }, { "epoch": 0.9924312840260258, "grad_norm": 1.1415489224758493, "learning_rate": 1.0957911204858824e-05, "loss": 0.32891198992729187, "step": 3737 }, { "epoch": 0.9926968530075687, "grad_norm": 1.094277657297916, "learning_rate": 1.0953540538370591e-05, "loss": 0.29184675216674805, "step": 3738 }, { "epoch": 0.9929624219891117, "grad_norm": 1.1381026162174743, "learning_rate": 1.094916968805099e-05, "loss": 0.2784018814563751, "step": 3739 }, { "epoch": 0.9932279909706546, "grad_norm": 1.1670677505581852, "learning_rate": 1.094479865474267e-05, "loss": 0.26586195826530457, "step": 3740 }, { "epoch": 0.9934935599521976, "grad_norm": 0.9575913416137994, "learning_rate": 1.094042743928831e-05, "loss": 0.24593298137187958, "step": 3741 }, { "epoch": 0.9937591289337405, "grad_norm": 1.065966707682552, "learning_rate": 1.0936056042530632e-05, "loss": 0.2462792694568634, "step": 3742 }, { "epoch": 0.9940246979152835, "grad_norm": 1.2074020558104472, "learning_rate": 1.0931684465312388e-05, "loss": 0.2688900828361511, "step": 3743 }, { "epoch": 0.9942902668968264, "grad_norm": 1.099682442025033, "learning_rate": 1.0927312708476367e-05, "loss": 0.2842782735824585, "step": 3744 }, { "epoch": 0.9945558358783694, "grad_norm": 1.0548829148077135, "learning_rate": 1.0922940772865393e-05, "loss": 0.249299556016922, "step": 3745 }, { "epoch": 0.9948214048599123, "grad_norm": 1.175705262338143, "learning_rate": 1.0918568659322325e-05, "loss": 0.2765413522720337, "step": 3746 }, { "epoch": 0.9950869738414553, "grad_norm": 1.1414819691892306, "learning_rate": 1.0914196368690049e-05, "loss": 0.29750365018844604, "step": 3747 }, { "epoch": 0.9953525428229982, "grad_norm": 1.153321336461836, "learning_rate": 1.0909823901811496e-05, "loss": 0.25272879004478455, "step": 3748 }, { "epoch": 0.9956181118045412, "grad_norm": 1.1906489486154657, "learning_rate": 1.0905451259529626e-05, "loss": 0.3056861460208893, "step": 3749 }, { "epoch": 0.9958836807860841, "grad_norm": 1.1596775625362263, "learning_rate": 1.090107844268743e-05, "loss": 0.26723814010620117, "step": 3750 }, { "epoch": 0.9961492497676271, "grad_norm": 1.167023454532776, "learning_rate": 1.0896705452127943e-05, "loss": 0.29998716711997986, "step": 3751 }, { "epoch": 0.9964148187491702, "grad_norm": 1.1519689723038142, "learning_rate": 1.0892332288694216e-05, "loss": 0.2690891623497009, "step": 3752 }, { "epoch": 0.9966803877307131, "grad_norm": 1.1385088428140973, "learning_rate": 1.0887958953229349e-05, "loss": 0.25555333495140076, "step": 3753 }, { "epoch": 0.996945956712256, "grad_norm": 1.1617836993376212, "learning_rate": 1.088358544657647e-05, "loss": 0.27788421511650085, "step": 3754 }, { "epoch": 0.997211525693799, "grad_norm": 1.0981105518173184, "learning_rate": 1.0879211769578734e-05, "loss": 0.2566586136817932, "step": 3755 }, { "epoch": 0.997477094675342, "grad_norm": 1.1742409056404244, "learning_rate": 1.0874837923079339e-05, "loss": 0.3028980493545532, "step": 3756 }, { "epoch": 0.9977426636568849, "grad_norm": 1.151070664269376, "learning_rate": 1.0870463907921512e-05, "loss": 0.30244824290275574, "step": 3757 }, { "epoch": 0.9980082326384279, "grad_norm": 1.0175517300218122, "learning_rate": 1.086608972494851e-05, "loss": 0.2610962390899658, "step": 3758 }, { "epoch": 0.9982738016199708, "grad_norm": 1.1587347636182326, "learning_rate": 1.0861715375003623e-05, "loss": 0.2733536660671234, "step": 3759 }, { "epoch": 0.9985393706015138, "grad_norm": 1.094010099730521, "learning_rate": 1.0857340858930175e-05, "loss": 0.2915020287036896, "step": 3760 }, { "epoch": 0.9988049395830567, "grad_norm": 1.1164899423303463, "learning_rate": 1.085296617757152e-05, "loss": 0.2940186560153961, "step": 3761 }, { "epoch": 0.9990705085645997, "grad_norm": 1.1441195343158572, "learning_rate": 1.0848591331771045e-05, "loss": 0.3002738952636719, "step": 3762 }, { "epoch": 0.9993360775461426, "grad_norm": 1.0530840422742196, "learning_rate": 1.0844216322372172e-05, "loss": 0.284588485956192, "step": 3763 }, { "epoch": 0.9996016465276856, "grad_norm": 1.0971261053209735, "learning_rate": 1.0839841150218347e-05, "loss": 0.29395923018455505, "step": 3764 }, { "epoch": 0.9998672155092285, "grad_norm": 1.1355876604442514, "learning_rate": 1.083546581615305e-05, "loss": 0.2574613094329834, "step": 3765 }, { "epoch": 1.0, "grad_norm": 1.535375625820537, "learning_rate": 1.0831090321019801e-05, "loss": 0.177712082862854, "step": 3766 }, { "epoch": 1.000265568981543, "grad_norm": 1.1101315935040728, "learning_rate": 1.0826714665662139e-05, "loss": 0.29758381843566895, "step": 3767 }, { "epoch": 1.000531137963086, "grad_norm": 1.055973006911073, "learning_rate": 1.0822338850923644e-05, "loss": 0.23377545177936554, "step": 3768 }, { "epoch": 1.0007967069446289, "grad_norm": 1.1573191222761028, "learning_rate": 1.0817962877647911e-05, "loss": 0.2505020797252655, "step": 3769 }, { "epoch": 1.0010622759261718, "grad_norm": 1.0395021899779042, "learning_rate": 1.0813586746678584e-05, "loss": 0.26122647523880005, "step": 3770 }, { "epoch": 1.0013278449077148, "grad_norm": 1.1508778318464672, "learning_rate": 1.0809210458859327e-05, "loss": 0.27962177991867065, "step": 3771 }, { "epoch": 1.0015934138892577, "grad_norm": 1.0479777844917506, "learning_rate": 1.080483401503384e-05, "loss": 0.21921640634536743, "step": 3772 }, { "epoch": 1.0018589828708007, "grad_norm": 1.1277812491041006, "learning_rate": 1.0800457416045845e-05, "loss": 0.24623796343803406, "step": 3773 }, { "epoch": 1.0021245518523436, "grad_norm": 1.259401152466985, "learning_rate": 1.0796080662739098e-05, "loss": 0.3130728006362915, "step": 3774 }, { "epoch": 1.0023901208338866, "grad_norm": 1.1209083810179328, "learning_rate": 1.0791703755957392e-05, "loss": 0.2548064589500427, "step": 3775 }, { "epoch": 1.0026556898154295, "grad_norm": 1.1167206534835417, "learning_rate": 1.078732669654454e-05, "loss": 0.20517288148403168, "step": 3776 }, { "epoch": 1.0029212587969725, "grad_norm": 1.1055374385175383, "learning_rate": 1.0782949485344385e-05, "loss": 0.2634897530078888, "step": 3777 }, { "epoch": 1.0031868277785154, "grad_norm": 1.3696848286677328, "learning_rate": 1.0778572123200804e-05, "loss": 0.2743223309516907, "step": 3778 }, { "epoch": 1.0034523967600584, "grad_norm": 0.9930991365195264, "learning_rate": 1.0774194610957695e-05, "loss": 0.24595436453819275, "step": 3779 }, { "epoch": 1.0037179657416013, "grad_norm": 1.0885778480679946, "learning_rate": 1.0769816949459002e-05, "loss": 0.2508128881454468, "step": 3780 }, { "epoch": 1.0039835347231443, "grad_norm": 1.1243431648812525, "learning_rate": 1.0765439139548677e-05, "loss": 0.2326367199420929, "step": 3781 }, { "epoch": 1.0042491037046872, "grad_norm": 1.1514050771182385, "learning_rate": 1.0761061182070716e-05, "loss": 0.2888404130935669, "step": 3782 }, { "epoch": 1.0045146726862302, "grad_norm": 1.1399638718055765, "learning_rate": 1.0756683077869133e-05, "loss": 0.2804296612739563, "step": 3783 }, { "epoch": 1.0047802416677731, "grad_norm": 1.1286027319524963, "learning_rate": 1.0752304827787979e-05, "loss": 0.2644953429698944, "step": 3784 }, { "epoch": 1.005045810649316, "grad_norm": 1.2396532451569051, "learning_rate": 1.0747926432671323e-05, "loss": 0.297788143157959, "step": 3785 }, { "epoch": 1.005311379630859, "grad_norm": 1.065071455363874, "learning_rate": 1.0743547893363276e-05, "loss": 0.2644156515598297, "step": 3786 }, { "epoch": 1.005576948612402, "grad_norm": 1.1640867578019738, "learning_rate": 1.073916921070796e-05, "loss": 0.23818905651569366, "step": 3787 }, { "epoch": 1.005842517593945, "grad_norm": 1.11872081222192, "learning_rate": 1.0734790385549538e-05, "loss": 0.2544933259487152, "step": 3788 }, { "epoch": 1.006108086575488, "grad_norm": 1.0836442452511366, "learning_rate": 1.0730411418732198e-05, "loss": 0.2569275498390198, "step": 3789 }, { "epoch": 1.0063736555570308, "grad_norm": 1.0348585374954582, "learning_rate": 1.0726032311100153e-05, "loss": 0.2248159945011139, "step": 3790 }, { "epoch": 1.0066392245385738, "grad_norm": 1.1242207493876892, "learning_rate": 1.072165306349764e-05, "loss": 0.25541940331459045, "step": 3791 }, { "epoch": 1.0069047935201167, "grad_norm": 9.328291099250833, "learning_rate": 1.0717273676768924e-05, "loss": 0.24429568648338318, "step": 3792 }, { "epoch": 1.0071703625016597, "grad_norm": 1.0574884647737486, "learning_rate": 1.0712894151758306e-05, "loss": 0.2586621344089508, "step": 3793 }, { "epoch": 1.0074359314832027, "grad_norm": 1.165205157800888, "learning_rate": 1.0708514489310103e-05, "loss": 0.28685104846954346, "step": 3794 }, { "epoch": 1.0077015004647458, "grad_norm": 1.1536672746294196, "learning_rate": 1.0704134690268661e-05, "loss": 0.2847924530506134, "step": 3795 }, { "epoch": 1.0079670694462888, "grad_norm": 1.1168453704329862, "learning_rate": 1.0699754755478358e-05, "loss": 0.24646440148353577, "step": 3796 }, { "epoch": 1.0082326384278317, "grad_norm": 1.217438590106057, "learning_rate": 1.0695374685783586e-05, "loss": 0.22286385297775269, "step": 3797 }, { "epoch": 1.0084982074093747, "grad_norm": 1.1352166249232278, "learning_rate": 1.069099448202878e-05, "loss": 0.2524179518222809, "step": 3798 }, { "epoch": 1.0087637763909176, "grad_norm": 1.109981913009372, "learning_rate": 1.0686614145058387e-05, "loss": 0.2625758647918701, "step": 3799 }, { "epoch": 1.0090293453724606, "grad_norm": 1.0622342238121125, "learning_rate": 1.0682233675716884e-05, "loss": 0.25318068265914917, "step": 3800 }, { "epoch": 1.0092949143540035, "grad_norm": 1.073699024276181, "learning_rate": 1.0677853074848774e-05, "loss": 0.24224570393562317, "step": 3801 }, { "epoch": 1.0095604833355465, "grad_norm": 1.1995813349182267, "learning_rate": 1.0673472343298588e-05, "loss": 0.28595417737960815, "step": 3802 }, { "epoch": 1.0098260523170894, "grad_norm": 1.1558738404506108, "learning_rate": 1.0669091481910874e-05, "loss": 0.26894015073776245, "step": 3803 }, { "epoch": 1.0100916212986324, "grad_norm": 1.0901744125075639, "learning_rate": 1.0664710491530214e-05, "loss": 0.2605208158493042, "step": 3804 }, { "epoch": 1.0103571902801753, "grad_norm": 1.082458382717597, "learning_rate": 1.0660329373001212e-05, "loss": 0.2595113515853882, "step": 3805 }, { "epoch": 1.0106227592617183, "grad_norm": 1.2467081294979763, "learning_rate": 1.0655948127168494e-05, "loss": 0.27478674054145813, "step": 3806 }, { "epoch": 1.0108883282432612, "grad_norm": 1.0742167098010935, "learning_rate": 1.0651566754876715e-05, "loss": 0.2587064504623413, "step": 3807 }, { "epoch": 1.0111538972248042, "grad_norm": 1.0593019665426413, "learning_rate": 1.064718525697055e-05, "loss": 0.2420537769794464, "step": 3808 }, { "epoch": 1.0114194662063472, "grad_norm": 1.1660072059036033, "learning_rate": 1.0642803634294699e-05, "loss": 0.29424652457237244, "step": 3809 }, { "epoch": 1.01168503518789, "grad_norm": 1.0902934718743655, "learning_rate": 1.0638421887693887e-05, "loss": 0.25162142515182495, "step": 3810 }, { "epoch": 1.011950604169433, "grad_norm": 1.1456242703963635, "learning_rate": 1.0634040018012865e-05, "loss": 0.25661247968673706, "step": 3811 }, { "epoch": 1.012216173150976, "grad_norm": 1.0060634238068926, "learning_rate": 1.0629658026096408e-05, "loss": 0.2042091339826584, "step": 3812 }, { "epoch": 1.012481742132519, "grad_norm": 1.0129340658577524, "learning_rate": 1.0625275912789307e-05, "loss": 0.22496266663074493, "step": 3813 }, { "epoch": 1.012747311114062, "grad_norm": 1.1382961966722176, "learning_rate": 1.0620893678936385e-05, "loss": 0.23609521985054016, "step": 3814 }, { "epoch": 1.0130128800956049, "grad_norm": 1.2645443214744188, "learning_rate": 1.0616511325382486e-05, "loss": 0.2561722993850708, "step": 3815 }, { "epoch": 1.0132784490771478, "grad_norm": 1.1379816472778304, "learning_rate": 1.0612128852972474e-05, "loss": 0.2617529630661011, "step": 3816 }, { "epoch": 1.0135440180586908, "grad_norm": 1.1862833237483508, "learning_rate": 1.060774626255124e-05, "loss": 0.2633543014526367, "step": 3817 }, { "epoch": 1.0138095870402337, "grad_norm": 1.0263666085354948, "learning_rate": 1.0603363554963693e-05, "loss": 0.19401729106903076, "step": 3818 }, { "epoch": 1.0140751560217767, "grad_norm": 1.0891094169836097, "learning_rate": 1.0598980731054765e-05, "loss": 0.2583369016647339, "step": 3819 }, { "epoch": 1.0143407250033196, "grad_norm": 1.1826598806695992, "learning_rate": 1.0594597791669419e-05, "loss": 0.26138922572135925, "step": 3820 }, { "epoch": 1.0146062939848626, "grad_norm": 1.1580137447688548, "learning_rate": 1.0590214737652632e-05, "loss": 0.2506800591945648, "step": 3821 }, { "epoch": 1.0148718629664055, "grad_norm": 1.032579662550809, "learning_rate": 1.0585831569849405e-05, "loss": 0.21569974720478058, "step": 3822 }, { "epoch": 1.0151374319479485, "grad_norm": 1.37079648056154, "learning_rate": 1.0581448289104759e-05, "loss": 0.2765602767467499, "step": 3823 }, { "epoch": 1.0154030009294914, "grad_norm": 1.2046968903946047, "learning_rate": 1.0577064896263743e-05, "loss": 0.25180384516716003, "step": 3824 }, { "epoch": 1.0156685699110344, "grad_norm": 1.0796182560924539, "learning_rate": 1.0572681392171417e-05, "loss": 0.24164071679115295, "step": 3825 }, { "epoch": 1.0159341388925773, "grad_norm": 1.1523354919316235, "learning_rate": 1.0568297777672875e-05, "loss": 0.24206972122192383, "step": 3826 }, { "epoch": 1.0161997078741203, "grad_norm": 1.115771237946875, "learning_rate": 1.0563914053613227e-05, "loss": 0.24563468992710114, "step": 3827 }, { "epoch": 1.0164652768556632, "grad_norm": 1.121826691352643, "learning_rate": 1.0559530220837593e-05, "loss": 0.23226243257522583, "step": 3828 }, { "epoch": 1.0167308458372062, "grad_norm": 1.4499652400392462, "learning_rate": 1.0555146280191137e-05, "loss": 0.2245083749294281, "step": 3829 }, { "epoch": 1.0169964148187491, "grad_norm": 1.1230707875328865, "learning_rate": 1.0550762232519023e-05, "loss": 0.24455049633979797, "step": 3830 }, { "epoch": 1.017261983800292, "grad_norm": 1.1434011419253403, "learning_rate": 1.0546378078666448e-05, "loss": 0.2540651857852936, "step": 3831 }, { "epoch": 1.017527552781835, "grad_norm": 1.222189193306495, "learning_rate": 1.0541993819478622e-05, "loss": 0.23392565548419952, "step": 3832 }, { "epoch": 1.017793121763378, "grad_norm": 1.239236731837986, "learning_rate": 1.053760945580078e-05, "loss": 0.21601927280426025, "step": 3833 }, { "epoch": 1.018058690744921, "grad_norm": 1.1697918037357793, "learning_rate": 1.0533224988478176e-05, "loss": 0.24622616171836853, "step": 3834 }, { "epoch": 1.018324259726464, "grad_norm": 1.186224891573799, "learning_rate": 1.0528840418356086e-05, "loss": 0.2774650752544403, "step": 3835 }, { "epoch": 1.0185898287080069, "grad_norm": 1.1218094293898884, "learning_rate": 1.0524455746279795e-05, "loss": 0.22323890030384064, "step": 3836 }, { "epoch": 1.0188553976895498, "grad_norm": 1.0569207532138136, "learning_rate": 1.0520070973094622e-05, "loss": 0.21901552379131317, "step": 3837 }, { "epoch": 1.0191209666710928, "grad_norm": 1.1936231752235407, "learning_rate": 1.0515686099645901e-05, "loss": 0.3037784695625305, "step": 3838 }, { "epoch": 1.0193865356526357, "grad_norm": 1.0847362828180318, "learning_rate": 1.0511301126778984e-05, "loss": 0.22658365964889526, "step": 3839 }, { "epoch": 1.0196521046341787, "grad_norm": 1.09040618490447, "learning_rate": 1.0506916055339237e-05, "loss": 0.23144160211086273, "step": 3840 }, { "epoch": 1.0199176736157216, "grad_norm": 1.28339134317777, "learning_rate": 1.0502530886172055e-05, "loss": 0.25658899545669556, "step": 3841 }, { "epoch": 1.0201832425972646, "grad_norm": 0.9689646092731519, "learning_rate": 1.0498145620122845e-05, "loss": 0.19658756256103516, "step": 3842 }, { "epoch": 1.0204488115788075, "grad_norm": 1.0949311372526576, "learning_rate": 1.049376025803703e-05, "loss": 0.19045208394527435, "step": 3843 }, { "epoch": 1.0207143805603505, "grad_norm": 1.1626763108379607, "learning_rate": 1.0489374800760066e-05, "loss": 0.2577810287475586, "step": 3844 }, { "epoch": 1.0209799495418934, "grad_norm": 1.1521055149329589, "learning_rate": 1.048498924913741e-05, "loss": 0.2807403802871704, "step": 3845 }, { "epoch": 1.0212455185234364, "grad_norm": 1.2275557893789377, "learning_rate": 1.0480603604014545e-05, "loss": 0.2710269093513489, "step": 3846 }, { "epoch": 1.0215110875049793, "grad_norm": 1.173604136076929, "learning_rate": 1.0476217866236974e-05, "loss": 0.2560620903968811, "step": 3847 }, { "epoch": 1.0217766564865223, "grad_norm": 1.1571778426612858, "learning_rate": 1.0471832036650217e-05, "loss": 0.2599894404411316, "step": 3848 }, { "epoch": 1.0220422254680652, "grad_norm": 1.1339420848197217, "learning_rate": 1.046744611609981e-05, "loss": 0.2411944717168808, "step": 3849 }, { "epoch": 1.0223077944496084, "grad_norm": 1.1528658942490468, "learning_rate": 1.0463060105431303e-05, "loss": 0.25216251611709595, "step": 3850 }, { "epoch": 1.0225733634311513, "grad_norm": 1.1884423925105638, "learning_rate": 1.0458674005490263e-05, "loss": 0.255629301071167, "step": 3851 }, { "epoch": 1.0228389324126943, "grad_norm": 1.0777718220336832, "learning_rate": 1.0454287817122291e-05, "loss": 0.24032849073410034, "step": 3852 }, { "epoch": 1.0231045013942373, "grad_norm": 1.1154013609024198, "learning_rate": 1.0449901541172983e-05, "loss": 0.23188306391239166, "step": 3853 }, { "epoch": 1.0233700703757802, "grad_norm": 1.149374478972437, "learning_rate": 1.0445515178487965e-05, "loss": 0.2718146741390228, "step": 3854 }, { "epoch": 1.0236356393573232, "grad_norm": 1.460691184866812, "learning_rate": 1.0441128729912876e-05, "loss": 0.30279839038848877, "step": 3855 }, { "epoch": 1.023901208338866, "grad_norm": 1.0711762201816422, "learning_rate": 1.0436742196293368e-05, "loss": 0.2185024917125702, "step": 3856 }, { "epoch": 1.024166777320409, "grad_norm": 1.2737960148140446, "learning_rate": 1.0432355578475118e-05, "loss": 0.2956481873989105, "step": 3857 }, { "epoch": 1.024432346301952, "grad_norm": 1.1913794327080105, "learning_rate": 1.0427968877303809e-05, "loss": 0.28460678458213806, "step": 3858 }, { "epoch": 1.024697915283495, "grad_norm": 1.1716718579119476, "learning_rate": 1.0423582093625146e-05, "loss": 0.24597057700157166, "step": 3859 }, { "epoch": 1.024963484265038, "grad_norm": 0.987642591779768, "learning_rate": 1.0419195228284856e-05, "loss": 0.23986583948135376, "step": 3860 }, { "epoch": 1.0252290532465809, "grad_norm": 1.0867576400643644, "learning_rate": 1.0414808282128668e-05, "loss": 0.2489446997642517, "step": 3861 }, { "epoch": 1.0254946222281238, "grad_norm": 1.1200031637603385, "learning_rate": 1.0410421256002334e-05, "loss": 0.26777884364128113, "step": 3862 }, { "epoch": 1.0257601912096668, "grad_norm": 1.1645962699086565, "learning_rate": 1.0406034150751625e-05, "loss": 0.23506489396095276, "step": 3863 }, { "epoch": 1.0260257601912097, "grad_norm": 1.1861093965134106, "learning_rate": 1.040164696722232e-05, "loss": 0.2526484429836273, "step": 3864 }, { "epoch": 1.0262913291727527, "grad_norm": 1.1320109702434422, "learning_rate": 1.0397259706260216e-05, "loss": 0.2179267853498459, "step": 3865 }, { "epoch": 1.0265568981542956, "grad_norm": 1.0267487594121727, "learning_rate": 1.0392872368711126e-05, "loss": 0.2431088387966156, "step": 3866 }, { "epoch": 1.0268224671358386, "grad_norm": 1.1394336459602463, "learning_rate": 1.0388484955420877e-05, "loss": 0.26101407408714294, "step": 3867 }, { "epoch": 1.0270880361173815, "grad_norm": 1.0741553283028158, "learning_rate": 1.0384097467235308e-05, "loss": 0.23780573904514313, "step": 3868 }, { "epoch": 1.0273536050989245, "grad_norm": 1.467981467949694, "learning_rate": 1.0379709905000278e-05, "loss": 0.2469894289970398, "step": 3869 }, { "epoch": 1.0276191740804674, "grad_norm": 1.074989572738127, "learning_rate": 1.0375322269561658e-05, "loss": 0.21271926164627075, "step": 3870 }, { "epoch": 1.0278847430620104, "grad_norm": 1.1192343716648714, "learning_rate": 1.0370934561765331e-05, "loss": 0.22995726764202118, "step": 3871 }, { "epoch": 1.0281503120435533, "grad_norm": 1.2051770162428763, "learning_rate": 1.0366546782457196e-05, "loss": 0.27448171377182007, "step": 3872 }, { "epoch": 1.0284158810250963, "grad_norm": 1.232887313588547, "learning_rate": 1.0362158932483165e-05, "loss": 0.25459539890289307, "step": 3873 }, { "epoch": 1.0286814500066392, "grad_norm": 1.1436601222318827, "learning_rate": 1.0357771012689162e-05, "loss": 0.23213380575180054, "step": 3874 }, { "epoch": 1.0289470189881822, "grad_norm": 1.107979602389345, "learning_rate": 1.0353383023921127e-05, "loss": 0.2219776064157486, "step": 3875 }, { "epoch": 1.0292125879697251, "grad_norm": 1.2445278934711803, "learning_rate": 1.0348994967025012e-05, "loss": 0.27059125900268555, "step": 3876 }, { "epoch": 1.029478156951268, "grad_norm": 1.2314072238589235, "learning_rate": 1.034460684284678e-05, "loss": 0.26921501755714417, "step": 3877 }, { "epoch": 1.029743725932811, "grad_norm": 1.153389282583655, "learning_rate": 1.0340218652232419e-05, "loss": 0.24727991223335266, "step": 3878 }, { "epoch": 1.030009294914354, "grad_norm": 1.2105369925319034, "learning_rate": 1.0335830396027912e-05, "loss": 0.26276054978370667, "step": 3879 }, { "epoch": 1.030274863895897, "grad_norm": 1.1222835146983237, "learning_rate": 1.0331442075079268e-05, "loss": 0.25906458497047424, "step": 3880 }, { "epoch": 1.03054043287744, "grad_norm": 1.1936099182612667, "learning_rate": 1.0327053690232498e-05, "loss": 0.2708794176578522, "step": 3881 }, { "epoch": 1.0308060018589829, "grad_norm": 1.1283814494585969, "learning_rate": 1.0322665242333634e-05, "loss": 0.24968653917312622, "step": 3882 }, { "epoch": 1.0310715708405258, "grad_norm": 1.1912763351930955, "learning_rate": 1.0318276732228716e-05, "loss": 0.2669135332107544, "step": 3883 }, { "epoch": 1.0313371398220688, "grad_norm": 1.0733368423352447, "learning_rate": 1.0313888160763799e-05, "loss": 0.24173730611801147, "step": 3884 }, { "epoch": 1.0316027088036117, "grad_norm": 1.4084549111395024, "learning_rate": 1.0309499528784948e-05, "loss": 0.27513059973716736, "step": 3885 }, { "epoch": 1.0318682777851547, "grad_norm": 1.163470416419209, "learning_rate": 1.0305110837138235e-05, "loss": 0.2512688934803009, "step": 3886 }, { "epoch": 1.0321338467666976, "grad_norm": 1.100016135139411, "learning_rate": 1.0300722086669753e-05, "loss": 0.2584962844848633, "step": 3887 }, { "epoch": 1.0323994157482406, "grad_norm": 1.1125458904355436, "learning_rate": 1.0296333278225599e-05, "loss": 0.23692303895950317, "step": 3888 }, { "epoch": 1.0326649847297835, "grad_norm": 1.1981051682884363, "learning_rate": 1.0291944412651884e-05, "loss": 0.2570871114730835, "step": 3889 }, { "epoch": 1.0329305537113265, "grad_norm": 1.1839354606788588, "learning_rate": 1.028755549079473e-05, "loss": 0.2896367609500885, "step": 3890 }, { "epoch": 1.0331961226928694, "grad_norm": 0.958593784491898, "learning_rate": 1.0283166513500267e-05, "loss": 0.19990365207195282, "step": 3891 }, { "epoch": 1.0334616916744124, "grad_norm": 1.1157517117826752, "learning_rate": 1.0278777481614639e-05, "loss": 0.25235646963119507, "step": 3892 }, { "epoch": 1.0337272606559553, "grad_norm": 1.1808927381569394, "learning_rate": 1.0274388395984003e-05, "loss": 0.23675012588500977, "step": 3893 }, { "epoch": 1.0339928296374983, "grad_norm": 1.1370597202642294, "learning_rate": 1.026999925745452e-05, "loss": 0.250516414642334, "step": 3894 }, { "epoch": 1.0342583986190412, "grad_norm": 1.0692414219621886, "learning_rate": 1.0265610066872365e-05, "loss": 0.24573490023612976, "step": 3895 }, { "epoch": 1.0345239676005842, "grad_norm": 1.085358990363196, "learning_rate": 1.026122082508372e-05, "loss": 0.2473086714744568, "step": 3896 }, { "epoch": 1.0347895365821271, "grad_norm": 1.162338198859519, "learning_rate": 1.0256831532934783e-05, "loss": 0.26546406745910645, "step": 3897 }, { "epoch": 1.03505510556367, "grad_norm": 1.1034436628854154, "learning_rate": 1.0252442191271754e-05, "loss": 0.2565246522426605, "step": 3898 }, { "epoch": 1.035320674545213, "grad_norm": 1.0272875416109402, "learning_rate": 1.0248052800940846e-05, "loss": 0.24923476576805115, "step": 3899 }, { "epoch": 1.035586243526756, "grad_norm": 1.1519345059696067, "learning_rate": 1.0243663362788286e-05, "loss": 0.3079240322113037, "step": 3900 }, { "epoch": 1.035851812508299, "grad_norm": 1.0586971174066726, "learning_rate": 1.0239273877660302e-05, "loss": 0.2482951581478119, "step": 3901 }, { "epoch": 1.036117381489842, "grad_norm": 1.1495296797401515, "learning_rate": 1.0234884346403138e-05, "loss": 0.2626204192638397, "step": 3902 }, { "epoch": 1.0363829504713848, "grad_norm": 1.0578834148114886, "learning_rate": 1.023049476986304e-05, "loss": 0.23181654512882233, "step": 3903 }, { "epoch": 1.0366485194529278, "grad_norm": 1.2527800012652353, "learning_rate": 1.0226105148886272e-05, "loss": 0.29164040088653564, "step": 3904 }, { "epoch": 1.0369140884344707, "grad_norm": 1.034136654365203, "learning_rate": 1.0221715484319094e-05, "loss": 0.22025801241397858, "step": 3905 }, { "epoch": 1.0371796574160137, "grad_norm": 1.1162047929812215, "learning_rate": 1.021732577700779e-05, "loss": 0.2819385826587677, "step": 3906 }, { "epoch": 1.0374452263975567, "grad_norm": 1.0524498644463125, "learning_rate": 1.0212936027798637e-05, "loss": 0.24709002673625946, "step": 3907 }, { "epoch": 1.0377107953790998, "grad_norm": 0.9984579723832369, "learning_rate": 1.0208546237537928e-05, "loss": 0.22570034861564636, "step": 3908 }, { "epoch": 1.0379763643606428, "grad_norm": 1.1543900299803864, "learning_rate": 1.0204156407071964e-05, "loss": 0.25642865896224976, "step": 3909 }, { "epoch": 1.0382419333421857, "grad_norm": 1.1657404882715603, "learning_rate": 1.0199766537247053e-05, "loss": 0.25970256328582764, "step": 3910 }, { "epoch": 1.0385075023237287, "grad_norm": 1.1347864223586095, "learning_rate": 1.019537662890951e-05, "loss": 0.2560003101825714, "step": 3911 }, { "epoch": 1.0387730713052716, "grad_norm": 1.3160565196765366, "learning_rate": 1.0190986682905656e-05, "loss": 0.28138649463653564, "step": 3912 }, { "epoch": 1.0390386402868146, "grad_norm": 1.4353879235637104, "learning_rate": 1.0186596700081825e-05, "loss": 0.23531222343444824, "step": 3913 }, { "epoch": 1.0393042092683575, "grad_norm": 1.1850676655471586, "learning_rate": 1.018220668128435e-05, "loss": 0.24912862479686737, "step": 3914 }, { "epoch": 1.0395697782499005, "grad_norm": 1.0811585337632708, "learning_rate": 1.0177816627359575e-05, "loss": 0.24188724160194397, "step": 3915 }, { "epoch": 1.0398353472314434, "grad_norm": 1.2093489820950423, "learning_rate": 1.0173426539153853e-05, "loss": 0.2709474563598633, "step": 3916 }, { "epoch": 1.0401009162129864, "grad_norm": 1.1793292324294091, "learning_rate": 1.0169036417513538e-05, "loss": 0.2400204837322235, "step": 3917 }, { "epoch": 1.0403664851945293, "grad_norm": 1.0489256907825586, "learning_rate": 1.0164646263284993e-05, "loss": 0.2687132954597473, "step": 3918 }, { "epoch": 1.0406320541760723, "grad_norm": 1.1628887826217675, "learning_rate": 1.0160256077314592e-05, "loss": 0.25139346718788147, "step": 3919 }, { "epoch": 1.0408976231576152, "grad_norm": 1.1762633281473511, "learning_rate": 1.0155865860448712e-05, "loss": 0.25873464345932007, "step": 3920 }, { "epoch": 1.0411631921391582, "grad_norm": 1.1207165962030725, "learning_rate": 1.0151475613533732e-05, "loss": 0.2510434687137604, "step": 3921 }, { "epoch": 1.0414287611207012, "grad_norm": 1.2260247662339232, "learning_rate": 1.0147085337416036e-05, "loss": 0.24567106366157532, "step": 3922 }, { "epoch": 1.041694330102244, "grad_norm": 1.1642096823951156, "learning_rate": 1.0142695032942024e-05, "loss": 0.25028282403945923, "step": 3923 }, { "epoch": 1.041959899083787, "grad_norm": 1.140963361472911, "learning_rate": 1.0138304700958096e-05, "loss": 0.23542484641075134, "step": 3924 }, { "epoch": 1.04222546806533, "grad_norm": 1.2475887570620718, "learning_rate": 1.0133914342310649e-05, "loss": 0.28974449634552, "step": 3925 }, { "epoch": 1.042491037046873, "grad_norm": 1.0648736453755918, "learning_rate": 1.0129523957846097e-05, "loss": 0.23417247831821442, "step": 3926 }, { "epoch": 1.042756606028416, "grad_norm": 1.1427047582178407, "learning_rate": 1.0125133548410852e-05, "loss": 0.23247018456459045, "step": 3927 }, { "epoch": 1.0430221750099589, "grad_norm": 1.1496713132119072, "learning_rate": 1.0120743114851337e-05, "loss": 0.23860129714012146, "step": 3928 }, { "epoch": 1.0432877439915018, "grad_norm": 1.1567405333157526, "learning_rate": 1.0116352658013973e-05, "loss": 0.2609105706214905, "step": 3929 }, { "epoch": 1.0435533129730448, "grad_norm": 1.2453984448185509, "learning_rate": 1.0111962178745187e-05, "loss": 0.2559507489204407, "step": 3930 }, { "epoch": 1.0438188819545877, "grad_norm": 1.2247288020965454, "learning_rate": 1.0107571677891415e-05, "loss": 0.2708527147769928, "step": 3931 }, { "epoch": 1.0440844509361307, "grad_norm": 1.2373037230453465, "learning_rate": 1.0103181156299091e-05, "loss": 0.25884875655174255, "step": 3932 }, { "epoch": 1.0443500199176736, "grad_norm": 1.3022673165052032, "learning_rate": 1.0098790614814658e-05, "loss": 0.2631877660751343, "step": 3933 }, { "epoch": 1.0446155888992166, "grad_norm": 1.0267097797291302, "learning_rate": 1.0094400054284559e-05, "loss": 0.27179086208343506, "step": 3934 }, { "epoch": 1.0448811578807595, "grad_norm": 2.1081344450494144, "learning_rate": 1.0090009475555245e-05, "loss": 0.21690386533737183, "step": 3935 }, { "epoch": 1.0451467268623025, "grad_norm": 1.0188398651288513, "learning_rate": 1.0085618879473162e-05, "loss": 0.20192815363407135, "step": 3936 }, { "epoch": 1.0454122958438454, "grad_norm": 1.213624997308106, "learning_rate": 1.0081228266884773e-05, "loss": 0.2680777907371521, "step": 3937 }, { "epoch": 1.0456778648253884, "grad_norm": 1.1871222610891168, "learning_rate": 1.007683763863653e-05, "loss": 0.2566579580307007, "step": 3938 }, { "epoch": 1.0459434338069313, "grad_norm": 1.1229802475790265, "learning_rate": 1.0072446995574895e-05, "loss": 0.2508152723312378, "step": 3939 }, { "epoch": 1.0462090027884743, "grad_norm": 1.0850640213400236, "learning_rate": 1.0068056338546335e-05, "loss": 0.2880190908908844, "step": 3940 }, { "epoch": 1.0464745717700172, "grad_norm": 1.1129549761108044, "learning_rate": 1.0063665668397316e-05, "loss": 0.2646787464618683, "step": 3941 }, { "epoch": 1.0467401407515602, "grad_norm": 1.1116528447502043, "learning_rate": 1.0059274985974305e-05, "loss": 0.2327616810798645, "step": 3942 }, { "epoch": 1.0470057097331031, "grad_norm": 1.1644185595792014, "learning_rate": 1.0054884292123778e-05, "loss": 0.24756258726119995, "step": 3943 }, { "epoch": 1.047271278714646, "grad_norm": 1.1010853288322209, "learning_rate": 1.0050493587692207e-05, "loss": 0.23657771944999695, "step": 3944 }, { "epoch": 1.047536847696189, "grad_norm": 1.1386107444709148, "learning_rate": 1.0046102873526068e-05, "loss": 0.2541351616382599, "step": 3945 }, { "epoch": 1.047802416677732, "grad_norm": 1.0912263009271301, "learning_rate": 1.0041712150471839e-05, "loss": 0.2330317348241806, "step": 3946 }, { "epoch": 1.048067985659275, "grad_norm": 1.0696190454357721, "learning_rate": 1.0037321419375997e-05, "loss": 0.23411181569099426, "step": 3947 }, { "epoch": 1.048333554640818, "grad_norm": 1.1223872975815399, "learning_rate": 1.0032930681085028e-05, "loss": 0.2605017125606537, "step": 3948 }, { "epoch": 1.0485991236223609, "grad_norm": 1.1766579775240698, "learning_rate": 1.0028539936445407e-05, "loss": 0.28651514649391174, "step": 3949 }, { "epoch": 1.0488646926039038, "grad_norm": 1.1469362905517786, "learning_rate": 1.0024149186303628e-05, "loss": 0.22912876307964325, "step": 3950 }, { "epoch": 1.0491302615854468, "grad_norm": 1.206814749340921, "learning_rate": 1.001975843150617e-05, "loss": 0.24032847583293915, "step": 3951 }, { "epoch": 1.0493958305669897, "grad_norm": 1.0089656289438405, "learning_rate": 1.0015367672899521e-05, "loss": 0.17826229333877563, "step": 3952 }, { "epoch": 1.0496613995485327, "grad_norm": 1.1440301784208975, "learning_rate": 1.0010976911330163e-05, "loss": 0.2619745433330536, "step": 3953 }, { "epoch": 1.0499269685300756, "grad_norm": 1.1124743886634039, "learning_rate": 1.0006586147644585e-05, "loss": 0.24104374647140503, "step": 3954 }, { "epoch": 1.0501925375116186, "grad_norm": 1.2465051058358483, "learning_rate": 1.0002195382689277e-05, "loss": 0.22913998365402222, "step": 3955 }, { "epoch": 1.0504581064931615, "grad_norm": 1.2288244416278613, "learning_rate": 9.997804617310724e-06, "loss": 0.2625126838684082, "step": 3956 }, { "epoch": 1.0507236754747045, "grad_norm": 1.1016811290492863, "learning_rate": 9.993413852355416e-06, "loss": 0.23098430037498474, "step": 3957 }, { "epoch": 1.0509892444562474, "grad_norm": 1.2581954843436995, "learning_rate": 9.98902308866984e-06, "loss": 0.2866731882095337, "step": 3958 }, { "epoch": 1.0512548134377904, "grad_norm": 1.2595027481112393, "learning_rate": 9.984632327100482e-06, "loss": 0.2520306706428528, "step": 3959 }, { "epoch": 1.0515203824193333, "grad_norm": 1.2731218614589663, "learning_rate": 9.980241568493834e-06, "loss": 0.29688766598701477, "step": 3960 }, { "epoch": 1.0517859514008763, "grad_norm": 1.2865298416208544, "learning_rate": 9.975850813696375e-06, "loss": 0.2876695990562439, "step": 3961 }, { "epoch": 1.0520515203824194, "grad_norm": 1.1190033835182807, "learning_rate": 9.971460063554595e-06, "loss": 0.2402629554271698, "step": 3962 }, { "epoch": 1.0523170893639624, "grad_norm": 1.288030170241207, "learning_rate": 9.967069318914977e-06, "loss": 0.32080164551734924, "step": 3963 }, { "epoch": 1.0525826583455054, "grad_norm": 1.3484684025161604, "learning_rate": 9.962678580624008e-06, "loss": 0.2642936110496521, "step": 3964 }, { "epoch": 1.0528482273270483, "grad_norm": 1.1668064537758471, "learning_rate": 9.958287849528163e-06, "loss": 0.255870521068573, "step": 3965 }, { "epoch": 1.0531137963085913, "grad_norm": 1.1779058124731279, "learning_rate": 9.953897126473933e-06, "loss": 0.2695184350013733, "step": 3966 }, { "epoch": 1.0533793652901342, "grad_norm": 1.1937956388734083, "learning_rate": 9.949506412307795e-06, "loss": 0.24576464295387268, "step": 3967 }, { "epoch": 1.0536449342716772, "grad_norm": 1.210893055599799, "learning_rate": 9.945115707876224e-06, "loss": 0.26517459750175476, "step": 3968 }, { "epoch": 1.05391050325322, "grad_norm": 1.261309936483727, "learning_rate": 9.940725014025696e-06, "loss": 0.30468082427978516, "step": 3969 }, { "epoch": 1.054176072234763, "grad_norm": 1.1007633858966879, "learning_rate": 9.936334331602687e-06, "loss": 0.25299298763275146, "step": 3970 }, { "epoch": 1.054441641216306, "grad_norm": 1.1621642625136148, "learning_rate": 9.931943661453668e-06, "loss": 0.2659488320350647, "step": 3971 }, { "epoch": 1.054707210197849, "grad_norm": 1.129768041847351, "learning_rate": 9.92755300442511e-06, "loss": 0.25957295298576355, "step": 3972 }, { "epoch": 1.054972779179392, "grad_norm": 1.0969185518732962, "learning_rate": 9.923162361363476e-06, "loss": 0.2416645884513855, "step": 3973 }, { "epoch": 1.0552383481609349, "grad_norm": 1.1032067417924427, "learning_rate": 9.91877173311523e-06, "loss": 0.2627662122249603, "step": 3974 }, { "epoch": 1.0555039171424778, "grad_norm": 1.1485553701369502, "learning_rate": 9.91438112052684e-06, "loss": 0.2876631021499634, "step": 3975 }, { "epoch": 1.0557694861240208, "grad_norm": 1.1306607772682384, "learning_rate": 9.90999052444476e-06, "loss": 0.28336596488952637, "step": 3976 }, { "epoch": 1.0560350551055637, "grad_norm": 1.266085815857313, "learning_rate": 9.905599945715443e-06, "loss": 0.2970484495162964, "step": 3977 }, { "epoch": 1.0563006240871067, "grad_norm": 1.188464425479595, "learning_rate": 9.901209385185345e-06, "loss": 0.27202755212783813, "step": 3978 }, { "epoch": 1.0565661930686496, "grad_norm": 1.0823738866829473, "learning_rate": 9.896818843700912e-06, "loss": 0.2702459990978241, "step": 3979 }, { "epoch": 1.0568317620501926, "grad_norm": 1.2166105195755876, "learning_rate": 9.89242832210859e-06, "loss": 0.26057881116867065, "step": 3980 }, { "epoch": 1.0570973310317355, "grad_norm": 1.1526398422075472, "learning_rate": 9.888037821254816e-06, "loss": 0.24006876349449158, "step": 3981 }, { "epoch": 1.0573629000132785, "grad_norm": 1.0864441989704317, "learning_rate": 9.883647341986032e-06, "loss": 0.2437625676393509, "step": 3982 }, { "epoch": 1.0576284689948214, "grad_norm": 1.0572722810626467, "learning_rate": 9.879256885148666e-06, "loss": 0.24256819486618042, "step": 3983 }, { "epoch": 1.0578940379763644, "grad_norm": 1.2008491436753201, "learning_rate": 9.874866451589151e-06, "loss": 0.2714581787586212, "step": 3984 }, { "epoch": 1.0581596069579073, "grad_norm": 1.1859043120388024, "learning_rate": 9.870476042153907e-06, "loss": 0.30309075117111206, "step": 3985 }, { "epoch": 1.0584251759394503, "grad_norm": 1.3001941243887445, "learning_rate": 9.866085657689355e-06, "loss": 0.2938288450241089, "step": 3986 }, { "epoch": 1.0586907449209932, "grad_norm": 1.1041962963159588, "learning_rate": 9.86169529904191e-06, "loss": 0.23748518526554108, "step": 3987 }, { "epoch": 1.0589563139025362, "grad_norm": 1.2345572480055271, "learning_rate": 9.857304967057977e-06, "loss": 0.2883969247341156, "step": 3988 }, { "epoch": 1.0592218828840791, "grad_norm": 1.0871048681541509, "learning_rate": 9.852914662583966e-06, "loss": 0.28301289677619934, "step": 3989 }, { "epoch": 1.059487451865622, "grad_norm": 1.0733060702724175, "learning_rate": 9.848524386466273e-06, "loss": 0.22616548836231232, "step": 3990 }, { "epoch": 1.059753020847165, "grad_norm": 1.06530549901144, "learning_rate": 9.844134139551291e-06, "loss": 0.2282804250717163, "step": 3991 }, { "epoch": 1.060018589828708, "grad_norm": 1.154557745213229, "learning_rate": 9.839743922685408e-06, "loss": 0.2407834678888321, "step": 3992 }, { "epoch": 1.060284158810251, "grad_norm": 1.0504099183304738, "learning_rate": 9.835353736715007e-06, "loss": 0.22690361738204956, "step": 3993 }, { "epoch": 1.060549727791794, "grad_norm": 1.529267187296219, "learning_rate": 9.830963582486465e-06, "loss": 0.23291411995887756, "step": 3994 }, { "epoch": 1.0608152967733369, "grad_norm": 1.0804914844168854, "learning_rate": 9.82657346084615e-06, "loss": 0.24524198472499847, "step": 3995 }, { "epoch": 1.0610808657548798, "grad_norm": 1.130929241291739, "learning_rate": 9.822183372640426e-06, "loss": 0.22087743878364563, "step": 3996 }, { "epoch": 1.0613464347364228, "grad_norm": 1.1374060021264791, "learning_rate": 9.817793318715652e-06, "loss": 0.2459079772233963, "step": 3997 }, { "epoch": 1.0616120037179657, "grad_norm": 1.1393890830478974, "learning_rate": 9.813403299918178e-06, "loss": 0.24429920315742493, "step": 3998 }, { "epoch": 1.0618775726995087, "grad_norm": 1.140499707599593, "learning_rate": 9.809013317094345e-06, "loss": 0.2332335114479065, "step": 3999 }, { "epoch": 1.0621431416810516, "grad_norm": 1.2157908167694267, "learning_rate": 9.804623371090493e-06, "loss": 0.2861659526824951, "step": 4000 }, { "epoch": 1.0624087106625946, "grad_norm": 1.1293440606459217, "learning_rate": 9.800233462752949e-06, "loss": 0.22731532156467438, "step": 4001 }, { "epoch": 1.0626742796441375, "grad_norm": 1.127775309467411, "learning_rate": 9.795843592928036e-06, "loss": 0.245025634765625, "step": 4002 }, { "epoch": 1.0629398486256805, "grad_norm": 1.2380242649872155, "learning_rate": 9.791453762462075e-06, "loss": 0.2826273441314697, "step": 4003 }, { "epoch": 1.0632054176072234, "grad_norm": 1.1330484645300947, "learning_rate": 9.787063972201368e-06, "loss": 0.24737229943275452, "step": 4004 }, { "epoch": 1.0634709865887664, "grad_norm": 1.3814870803010457, "learning_rate": 9.782674222992214e-06, "loss": 0.23368477821350098, "step": 4005 }, { "epoch": 1.0637365555703093, "grad_norm": 1.2631953536046527, "learning_rate": 9.778284515680908e-06, "loss": 0.2754492461681366, "step": 4006 }, { "epoch": 1.0640021245518523, "grad_norm": 1.1906091191722363, "learning_rate": 9.773894851113732e-06, "loss": 0.2814168334007263, "step": 4007 }, { "epoch": 1.0642676935333952, "grad_norm": 1.1594492512554253, "learning_rate": 9.769505230136962e-06, "loss": 0.25388047099113464, "step": 4008 }, { "epoch": 1.0645332625149382, "grad_norm": 1.2618382745485697, "learning_rate": 9.765115653596867e-06, "loss": 0.25435230135917664, "step": 4009 }, { "epoch": 1.0647988314964811, "grad_norm": 1.2251032153283614, "learning_rate": 9.760726122339698e-06, "loss": 0.265840083360672, "step": 4010 }, { "epoch": 1.065064400478024, "grad_norm": 1.1297656349054435, "learning_rate": 9.756336637211716e-06, "loss": 0.2533451020717621, "step": 4011 }, { "epoch": 1.065329969459567, "grad_norm": 1.0890158421111886, "learning_rate": 9.751947199059155e-06, "loss": 0.25214290618896484, "step": 4012 }, { "epoch": 1.06559553844111, "grad_norm": 1.0603532415232781, "learning_rate": 9.74755780872825e-06, "loss": 0.25039419531822205, "step": 4013 }, { "epoch": 1.065861107422653, "grad_norm": 1.0177623632775965, "learning_rate": 9.74316846706522e-06, "loss": 0.21251091361045837, "step": 4014 }, { "epoch": 1.066126676404196, "grad_norm": 1.123294230398497, "learning_rate": 9.738779174916281e-06, "loss": 0.25898969173431396, "step": 4015 }, { "epoch": 1.0663922453857388, "grad_norm": 1.1054663361669936, "learning_rate": 9.734389933127639e-06, "loss": 0.2655499577522278, "step": 4016 }, { "epoch": 1.0666578143672818, "grad_norm": 1.1153507141873742, "learning_rate": 9.730000742545485e-06, "loss": 0.2221338450908661, "step": 4017 }, { "epoch": 1.0669233833488247, "grad_norm": 1.1746716643835395, "learning_rate": 9.725611604016002e-06, "loss": 0.2567589581012726, "step": 4018 }, { "epoch": 1.0671889523303677, "grad_norm": 1.1090772377521565, "learning_rate": 9.721222518385361e-06, "loss": 0.24440976977348328, "step": 4019 }, { "epoch": 1.0674545213119107, "grad_norm": 1.061787642846094, "learning_rate": 9.716833486499735e-06, "loss": 0.2229192852973938, "step": 4020 }, { "epoch": 1.0677200902934538, "grad_norm": 1.1014121727705226, "learning_rate": 9.712444509205273e-06, "loss": 0.26231470704078674, "step": 4021 }, { "epoch": 1.0679856592749968, "grad_norm": 1.2531191320236732, "learning_rate": 9.708055587348119e-06, "loss": 0.25099092721939087, "step": 4022 }, { "epoch": 1.0682512282565397, "grad_norm": 1.1402160070516023, "learning_rate": 9.703666721774403e-06, "loss": 0.22979633510112762, "step": 4023 }, { "epoch": 1.0685167972380827, "grad_norm": 1.09571485621585, "learning_rate": 9.699277913330252e-06, "loss": 0.2361093908548355, "step": 4024 }, { "epoch": 1.0687823662196256, "grad_norm": 1.0765448804717204, "learning_rate": 9.694889162861768e-06, "loss": 0.2390863001346588, "step": 4025 }, { "epoch": 1.0690479352011686, "grad_norm": 1.2569917808844517, "learning_rate": 9.690500471215057e-06, "loss": 0.24917885661125183, "step": 4026 }, { "epoch": 1.0693135041827115, "grad_norm": 1.1387127210628816, "learning_rate": 9.686111839236206e-06, "loss": 0.24215272068977356, "step": 4027 }, { "epoch": 1.0695790731642545, "grad_norm": 1.2809085503832063, "learning_rate": 9.681723267771284e-06, "loss": 0.27874231338500977, "step": 4028 }, { "epoch": 1.0698446421457974, "grad_norm": 1.1707122559783085, "learning_rate": 9.677334757666368e-06, "loss": 0.24076086282730103, "step": 4029 }, { "epoch": 1.0701102111273404, "grad_norm": 1.1092369229920938, "learning_rate": 9.672946309767504e-06, "loss": 0.2444242238998413, "step": 4030 }, { "epoch": 1.0703757801088833, "grad_norm": 1.2086874522857378, "learning_rate": 9.668557924920735e-06, "loss": 0.2737279236316681, "step": 4031 }, { "epoch": 1.0706413490904263, "grad_norm": 1.1006436240463247, "learning_rate": 9.664169603972091e-06, "loss": 0.24105575680732727, "step": 4032 }, { "epoch": 1.0709069180719692, "grad_norm": 1.336482466569566, "learning_rate": 9.659781347767584e-06, "loss": 0.27791836857795715, "step": 4033 }, { "epoch": 1.0711724870535122, "grad_norm": 1.1518461528529822, "learning_rate": 9.655393157153221e-06, "loss": 0.255472868680954, "step": 4034 }, { "epoch": 1.0714380560350552, "grad_norm": 1.371220848551681, "learning_rate": 9.651005032974994e-06, "loss": 0.2523707151412964, "step": 4035 }, { "epoch": 1.071703625016598, "grad_norm": 1.235756547113907, "learning_rate": 9.64661697607888e-06, "loss": 0.24584606289863586, "step": 4036 }, { "epoch": 1.071969193998141, "grad_norm": 1.1497174260677319, "learning_rate": 9.64222898731084e-06, "loss": 0.25182732939720154, "step": 4037 }, { "epoch": 1.072234762979684, "grad_norm": 1.0822892740683951, "learning_rate": 9.637841067516837e-06, "loss": 0.254008412361145, "step": 4038 }, { "epoch": 1.072500331961227, "grad_norm": 1.080204167750926, "learning_rate": 9.633453217542806e-06, "loss": 0.2314324826002121, "step": 4039 }, { "epoch": 1.07276590094277, "grad_norm": 1.1139945732367915, "learning_rate": 9.62906543823467e-06, "loss": 0.2256058305501938, "step": 4040 }, { "epoch": 1.0730314699243129, "grad_norm": 1.283214941862177, "learning_rate": 9.624677730438344e-06, "loss": 0.2577894330024719, "step": 4041 }, { "epoch": 1.0732970389058558, "grad_norm": 1.0911199623079508, "learning_rate": 9.620290094999723e-06, "loss": 0.23520560562610626, "step": 4042 }, { "epoch": 1.0735626078873988, "grad_norm": 1.1791405346126818, "learning_rate": 9.615902532764695e-06, "loss": 0.2472849190235138, "step": 4043 }, { "epoch": 1.0738281768689417, "grad_norm": 1.2195787110249676, "learning_rate": 9.611515044579128e-06, "loss": 0.25053414702415466, "step": 4044 }, { "epoch": 1.0740937458504847, "grad_norm": 1.1090102650773974, "learning_rate": 9.607127631288879e-06, "loss": 0.24229007959365845, "step": 4045 }, { "epoch": 1.0743593148320276, "grad_norm": 1.4628298980675831, "learning_rate": 9.602740293739786e-06, "loss": 0.2793073058128357, "step": 4046 }, { "epoch": 1.0746248838135706, "grad_norm": 1.225079236387791, "learning_rate": 9.598353032777682e-06, "loss": 0.24547399580478668, "step": 4047 }, { "epoch": 1.0748904527951135, "grad_norm": 1.1980997957436126, "learning_rate": 9.593965849248378e-06, "loss": 0.2776937186717987, "step": 4048 }, { "epoch": 1.0751560217766565, "grad_norm": 1.0781858695117066, "learning_rate": 9.589578743997668e-06, "loss": 0.22677727043628693, "step": 4049 }, { "epoch": 1.0754215907581994, "grad_norm": 1.4867723677136682, "learning_rate": 9.585191717871336e-06, "loss": 0.23254704475402832, "step": 4050 }, { "epoch": 1.0756871597397424, "grad_norm": 1.3243435003953368, "learning_rate": 9.580804771715148e-06, "loss": 0.2899828255176544, "step": 4051 }, { "epoch": 1.0759527287212853, "grad_norm": 1.1397018772236696, "learning_rate": 9.576417906374856e-06, "loss": 0.24632850289344788, "step": 4052 }, { "epoch": 1.0762182977028283, "grad_norm": 1.2322214200527608, "learning_rate": 9.572031122696196e-06, "loss": 0.2661561369895935, "step": 4053 }, { "epoch": 1.0764838666843712, "grad_norm": 1.1394013200357536, "learning_rate": 9.567644421524889e-06, "loss": 0.22364279627799988, "step": 4054 }, { "epoch": 1.0767494356659142, "grad_norm": 1.5026366502842776, "learning_rate": 9.563257803706635e-06, "loss": 0.26748427748680115, "step": 4055 }, { "epoch": 1.0770150046474571, "grad_norm": 1.1794922225625246, "learning_rate": 9.55887127008713e-06, "loss": 0.22851283848285675, "step": 4056 }, { "epoch": 1.077280573629, "grad_norm": 1.1340260741391435, "learning_rate": 9.554484821512037e-06, "loss": 0.2456260323524475, "step": 4057 }, { "epoch": 1.077546142610543, "grad_norm": 1.2884657617459025, "learning_rate": 9.55009845882702e-06, "loss": 0.2556169629096985, "step": 4058 }, { "epoch": 1.077811711592086, "grad_norm": 1.274618544457263, "learning_rate": 9.545712182877714e-06, "loss": 0.280727744102478, "step": 4059 }, { "epoch": 1.078077280573629, "grad_norm": 1.1205087247319334, "learning_rate": 9.54132599450974e-06, "loss": 0.25315386056900024, "step": 4060 }, { "epoch": 1.078342849555172, "grad_norm": 1.1990539773915618, "learning_rate": 9.536939894568704e-06, "loss": 0.21985477209091187, "step": 4061 }, { "epoch": 1.0786084185367149, "grad_norm": 1.1575613416248978, "learning_rate": 9.532553883900196e-06, "loss": 0.24329043924808502, "step": 4062 }, { "epoch": 1.0788739875182578, "grad_norm": 1.173950465827748, "learning_rate": 9.528167963349786e-06, "loss": 0.2362256497144699, "step": 4063 }, { "epoch": 1.0791395564998008, "grad_norm": 1.1458704347110154, "learning_rate": 9.523782133763027e-06, "loss": 0.23685476183891296, "step": 4064 }, { "epoch": 1.0794051254813437, "grad_norm": 1.2383774104342302, "learning_rate": 9.519396395985456e-06, "loss": 0.26232481002807617, "step": 4065 }, { "epoch": 1.0796706944628867, "grad_norm": 1.2768574792534622, "learning_rate": 9.515010750862594e-06, "loss": 0.25196313858032227, "step": 4066 }, { "epoch": 1.0799362634444296, "grad_norm": 1.082792256362845, "learning_rate": 9.510625199239939e-06, "loss": 0.22520464658737183, "step": 4067 }, { "epoch": 1.0802018324259726, "grad_norm": 1.190229461562689, "learning_rate": 9.506239741962971e-06, "loss": 0.27422505617141724, "step": 4068 }, { "epoch": 1.0804674014075155, "grad_norm": 1.3120430811123187, "learning_rate": 9.50185437987716e-06, "loss": 0.2646682560443878, "step": 4069 }, { "epoch": 1.0807329703890585, "grad_norm": 1.3425819541318131, "learning_rate": 9.497469113827949e-06, "loss": 0.2661365866661072, "step": 4070 }, { "epoch": 1.0809985393706014, "grad_norm": 1.1101351469883673, "learning_rate": 9.493083944660766e-06, "loss": 0.23156839609146118, "step": 4071 }, { "epoch": 1.0812641083521444, "grad_norm": 1.1805541153651362, "learning_rate": 9.488698873221021e-06, "loss": 0.25353243947029114, "step": 4072 }, { "epoch": 1.0815296773336873, "grad_norm": 1.2862671823918606, "learning_rate": 9.484313900354099e-06, "loss": 0.27488404512405396, "step": 4073 }, { "epoch": 1.0817952463152305, "grad_norm": 1.4041005997261422, "learning_rate": 9.479929026905378e-06, "loss": 0.2580753564834595, "step": 4074 }, { "epoch": 1.0820608152967734, "grad_norm": 1.1405056260482733, "learning_rate": 9.475544253720206e-06, "loss": 0.2425471544265747, "step": 4075 }, { "epoch": 1.0823263842783164, "grad_norm": 1.2040355319488043, "learning_rate": 9.471159581643918e-06, "loss": 0.25268295407295227, "step": 4076 }, { "epoch": 1.0825919532598594, "grad_norm": 1.1573228524057126, "learning_rate": 9.466775011521825e-06, "loss": 0.2683602571487427, "step": 4077 }, { "epoch": 1.0828575222414023, "grad_norm": 1.1300610618916742, "learning_rate": 9.462390544199221e-06, "loss": 0.24945034086704254, "step": 4078 }, { "epoch": 1.0831230912229453, "grad_norm": 1.1698494765527112, "learning_rate": 9.458006180521379e-06, "loss": 0.21784156560897827, "step": 4079 }, { "epoch": 1.0833886602044882, "grad_norm": 1.136268907040887, "learning_rate": 9.453621921333554e-06, "loss": 0.22704020142555237, "step": 4080 }, { "epoch": 1.0836542291860312, "grad_norm": 1.1373990713388034, "learning_rate": 9.449237767480979e-06, "loss": 0.2532106637954712, "step": 4081 }, { "epoch": 1.0839197981675741, "grad_norm": 1.1568862012297532, "learning_rate": 9.444853719808864e-06, "loss": 0.27809134125709534, "step": 4082 }, { "epoch": 1.084185367149117, "grad_norm": 1.2102387789201872, "learning_rate": 9.440469779162407e-06, "loss": 0.25704264640808105, "step": 4083 }, { "epoch": 1.08445093613066, "grad_norm": 1.1827141084910668, "learning_rate": 9.436085946386778e-06, "loss": 0.2656276226043701, "step": 4084 }, { "epoch": 1.084716505112203, "grad_norm": 1.256991317445651, "learning_rate": 9.431702222327126e-06, "loss": 0.277826726436615, "step": 4085 }, { "epoch": 1.084982074093746, "grad_norm": 1.2975495041461134, "learning_rate": 9.427318607828584e-06, "loss": 0.24656976759433746, "step": 4086 }, { "epoch": 1.0852476430752889, "grad_norm": 1.1974770836803283, "learning_rate": 9.42293510373626e-06, "loss": 0.2498110830783844, "step": 4087 }, { "epoch": 1.0855132120568318, "grad_norm": 1.1492935678310237, "learning_rate": 9.418551710895243e-06, "loss": 0.24574093520641327, "step": 4088 }, { "epoch": 1.0857787810383748, "grad_norm": 1.2274895872775384, "learning_rate": 9.414168430150601e-06, "loss": 0.25271761417388916, "step": 4089 }, { "epoch": 1.0860443500199177, "grad_norm": 1.1759358027679858, "learning_rate": 9.409785262347373e-06, "loss": 0.29269370436668396, "step": 4090 }, { "epoch": 1.0863099190014607, "grad_norm": 1.1247973273146177, "learning_rate": 9.405402208330581e-06, "loss": 0.244449645280838, "step": 4091 }, { "epoch": 1.0865754879830036, "grad_norm": 1.186787867713906, "learning_rate": 9.401019268945237e-06, "loss": 0.23785406351089478, "step": 4092 }, { "epoch": 1.0868410569645466, "grad_norm": 1.1479686632621091, "learning_rate": 9.39663644503631e-06, "loss": 0.2493479996919632, "step": 4093 }, { "epoch": 1.0871066259460895, "grad_norm": 1.1474347559215512, "learning_rate": 9.392253737448764e-06, "loss": 0.23758000135421753, "step": 4094 }, { "epoch": 1.0873721949276325, "grad_norm": 1.0946885138749496, "learning_rate": 9.387871147027528e-06, "loss": 0.22560475766658783, "step": 4095 }, { "epoch": 1.0876377639091754, "grad_norm": 1.1552533162715968, "learning_rate": 9.383488674617515e-06, "loss": 0.2558273673057556, "step": 4096 }, { "epoch": 1.0879033328907184, "grad_norm": 1.2619180705972233, "learning_rate": 9.379106321063618e-06, "loss": 0.2822023034095764, "step": 4097 }, { "epoch": 1.0881689018722613, "grad_norm": 1.2076346653444254, "learning_rate": 9.374724087210698e-06, "loss": 0.2596978545188904, "step": 4098 }, { "epoch": 1.0884344708538043, "grad_norm": 1.6785014002913365, "learning_rate": 9.370341973903597e-06, "loss": 0.25353628396987915, "step": 4099 }, { "epoch": 1.0887000398353472, "grad_norm": 1.2184499887942242, "learning_rate": 9.365959981987135e-06, "loss": 0.2547294497489929, "step": 4100 }, { "epoch": 1.0889656088168902, "grad_norm": 1.40658558629773, "learning_rate": 9.361578112306115e-06, "loss": 0.2688470780849457, "step": 4101 }, { "epoch": 1.0892311777984331, "grad_norm": 1.207208011814592, "learning_rate": 9.357196365705303e-06, "loss": 0.25772029161453247, "step": 4102 }, { "epoch": 1.089496746779976, "grad_norm": 1.3552039168974384, "learning_rate": 9.352814743029454e-06, "loss": 0.2875550091266632, "step": 4103 }, { "epoch": 1.089762315761519, "grad_norm": 1.4164869081453233, "learning_rate": 9.34843324512329e-06, "loss": 0.23085735738277435, "step": 4104 }, { "epoch": 1.090027884743062, "grad_norm": 1.2013725541896922, "learning_rate": 9.34405187283151e-06, "loss": 0.2607901096343994, "step": 4105 }, { "epoch": 1.090293453724605, "grad_norm": 1.1738523720935938, "learning_rate": 9.339670626998791e-06, "loss": 0.26165345311164856, "step": 4106 }, { "epoch": 1.090559022706148, "grad_norm": 1.1931234826270498, "learning_rate": 9.335289508469789e-06, "loss": 0.27884238958358765, "step": 4107 }, { "epoch": 1.0908245916876909, "grad_norm": 1.283025870689831, "learning_rate": 9.33090851808913e-06, "loss": 0.2689289152622223, "step": 4108 }, { "epoch": 1.0910901606692338, "grad_norm": 1.2574326426613287, "learning_rate": 9.326527656701414e-06, "loss": 0.2633207440376282, "step": 4109 }, { "epoch": 1.0913557296507768, "grad_norm": 1.1611202948336292, "learning_rate": 9.322146925151226e-06, "loss": 0.26001888513565063, "step": 4110 }, { "epoch": 1.0916212986323197, "grad_norm": 1.1436383156785508, "learning_rate": 9.31776632428312e-06, "loss": 0.2739099860191345, "step": 4111 }, { "epoch": 1.0918868676138627, "grad_norm": 1.1080458686771364, "learning_rate": 9.313385854941616e-06, "loss": 0.24885550141334534, "step": 4112 }, { "epoch": 1.0921524365954056, "grad_norm": 1.1643870148920956, "learning_rate": 9.309005517971222e-06, "loss": 0.2609873414039612, "step": 4113 }, { "epoch": 1.0924180055769486, "grad_norm": 1.427636157796487, "learning_rate": 9.304625314216415e-06, "loss": 0.28853538632392883, "step": 4114 }, { "epoch": 1.0926835745584915, "grad_norm": 1.072833070391428, "learning_rate": 9.300245244521647e-06, "loss": 0.2629924714565277, "step": 4115 }, { "epoch": 1.0929491435400345, "grad_norm": 1.1804644749067619, "learning_rate": 9.295865309731342e-06, "loss": 0.2687820494174957, "step": 4116 }, { "epoch": 1.0932147125215774, "grad_norm": 1.0831905202820669, "learning_rate": 9.2914855106899e-06, "loss": 0.2293676733970642, "step": 4117 }, { "epoch": 1.0934802815031204, "grad_norm": 1.1645005992728827, "learning_rate": 9.287105848241694e-06, "loss": 0.25261443853378296, "step": 4118 }, { "epoch": 1.0937458504846633, "grad_norm": 1.1209341991417805, "learning_rate": 9.282726323231077e-06, "loss": 0.26238197088241577, "step": 4119 }, { "epoch": 1.0940114194662063, "grad_norm": 1.1230838898563178, "learning_rate": 9.278346936502364e-06, "loss": 0.25718310475349426, "step": 4120 }, { "epoch": 1.0942769884477492, "grad_norm": 1.1872711264618019, "learning_rate": 9.273967688899849e-06, "loss": 0.23810459673404694, "step": 4121 }, { "epoch": 1.0945425574292922, "grad_norm": 1.0680734314830214, "learning_rate": 9.269588581267804e-06, "loss": 0.2197081446647644, "step": 4122 }, { "epoch": 1.0948081264108351, "grad_norm": 1.1043223190124707, "learning_rate": 9.265209614450463e-06, "loss": 0.2429335117340088, "step": 4123 }, { "epoch": 1.095073695392378, "grad_norm": 1.1380552272436657, "learning_rate": 9.260830789292043e-06, "loss": 0.23028087615966797, "step": 4124 }, { "epoch": 1.095339264373921, "grad_norm": 1.2203393500716264, "learning_rate": 9.25645210663673e-06, "loss": 0.2783699035644531, "step": 4125 }, { "epoch": 1.095604833355464, "grad_norm": 1.1686978964802806, "learning_rate": 9.25207356732868e-06, "loss": 0.25055867433547974, "step": 4126 }, { "epoch": 1.095870402337007, "grad_norm": 1.2313132067115398, "learning_rate": 9.247695172212026e-06, "loss": 0.28629350662231445, "step": 4127 }, { "epoch": 1.09613597131855, "grad_norm": 1.2403423880097748, "learning_rate": 9.24331692213087e-06, "loss": 0.2626604735851288, "step": 4128 }, { "epoch": 1.0964015403000928, "grad_norm": 1.2478078302425437, "learning_rate": 9.238938817929288e-06, "loss": 0.237881600856781, "step": 4129 }, { "epoch": 1.0966671092816358, "grad_norm": 1.144955023428898, "learning_rate": 9.234560860451325e-06, "loss": 0.2602109909057617, "step": 4130 }, { "epoch": 1.0969326782631788, "grad_norm": 1.1775071297104545, "learning_rate": 9.230183050541001e-06, "loss": 0.2721475064754486, "step": 4131 }, { "epoch": 1.0971982472447217, "grad_norm": 1.7664052681173497, "learning_rate": 9.225805389042307e-06, "loss": 0.25844910740852356, "step": 4132 }, { "epoch": 1.0974638162262647, "grad_norm": 1.1612334633259545, "learning_rate": 9.221427876799201e-06, "loss": 0.26671040058135986, "step": 4133 }, { "epoch": 1.0977293852078078, "grad_norm": 1.3116748641368057, "learning_rate": 9.21705051465562e-06, "loss": 0.2610115706920624, "step": 4134 }, { "epoch": 1.0979949541893508, "grad_norm": 1.1348320206960383, "learning_rate": 9.212673303455464e-06, "loss": 0.2518802881240845, "step": 4135 }, { "epoch": 1.0982605231708937, "grad_norm": 1.2313324732863455, "learning_rate": 9.20829624404261e-06, "loss": 0.28600364923477173, "step": 4136 }, { "epoch": 1.0985260921524367, "grad_norm": 1.0787729379648288, "learning_rate": 9.203919337260903e-06, "loss": 0.2649504840373993, "step": 4137 }, { "epoch": 1.0987916611339796, "grad_norm": 1.0717018301402161, "learning_rate": 9.199542583954159e-06, "loss": 0.22613298892974854, "step": 4138 }, { "epoch": 1.0990572301155226, "grad_norm": 1.1049408193201318, "learning_rate": 9.195165984966163e-06, "loss": 0.22546961903572083, "step": 4139 }, { "epoch": 1.0993227990970655, "grad_norm": 1.1132579479037434, "learning_rate": 9.190789541140675e-06, "loss": 0.20618169009685516, "step": 4140 }, { "epoch": 1.0995883680786085, "grad_norm": 1.1910818165933836, "learning_rate": 9.18641325332142e-06, "loss": 0.2434382289648056, "step": 4141 }, { "epoch": 1.0998539370601514, "grad_norm": 1.0160349259469954, "learning_rate": 9.182037122352092e-06, "loss": 0.19114840030670166, "step": 4142 }, { "epoch": 1.1001195060416944, "grad_norm": 1.371175220167047, "learning_rate": 9.17766114907636e-06, "loss": 0.2793614864349365, "step": 4143 }, { "epoch": 1.1003850750232373, "grad_norm": 1.3230746818872392, "learning_rate": 9.173285334337863e-06, "loss": 0.2908466160297394, "step": 4144 }, { "epoch": 1.1006506440047803, "grad_norm": 1.1707475106499343, "learning_rate": 9.168909678980199e-06, "loss": 0.260933518409729, "step": 4145 }, { "epoch": 1.1009162129863233, "grad_norm": 1.170079737982666, "learning_rate": 9.16453418384695e-06, "loss": 0.2819761037826538, "step": 4146 }, { "epoch": 1.1011817819678662, "grad_norm": 1.251357168283767, "learning_rate": 9.160158849781657e-06, "loss": 0.25290411710739136, "step": 4147 }, { "epoch": 1.1014473509494092, "grad_norm": 1.0782378998536035, "learning_rate": 9.155783677627831e-06, "loss": 0.21255841851234436, "step": 4148 }, { "epoch": 1.101712919930952, "grad_norm": 0.9808101112826028, "learning_rate": 9.151408668228958e-06, "loss": 0.20631751418113708, "step": 4149 }, { "epoch": 1.101978488912495, "grad_norm": 1.0273447794760797, "learning_rate": 9.147033822428484e-06, "loss": 0.20976273715496063, "step": 4150 }, { "epoch": 1.102244057894038, "grad_norm": 1.0193138467531315, "learning_rate": 9.142659141069828e-06, "loss": 0.21464477479457855, "step": 4151 }, { "epoch": 1.102509626875581, "grad_norm": 1.182770191723374, "learning_rate": 9.13828462499638e-06, "loss": 0.2262338101863861, "step": 4152 }, { "epoch": 1.102775195857124, "grad_norm": 1.2057409707570275, "learning_rate": 9.133910275051493e-06, "loss": 0.26331469416618347, "step": 4153 }, { "epoch": 1.1030407648386669, "grad_norm": 1.1729382721759571, "learning_rate": 9.129536092078488e-06, "loss": 0.26280921697616577, "step": 4154 }, { "epoch": 1.1033063338202098, "grad_norm": 1.1474203361843618, "learning_rate": 9.12516207692066e-06, "loss": 0.2527182698249817, "step": 4155 }, { "epoch": 1.1035719028017528, "grad_norm": 1.114868090084267, "learning_rate": 9.120788230421267e-06, "loss": 0.21416455507278442, "step": 4156 }, { "epoch": 1.1038374717832957, "grad_norm": 1.149698502937602, "learning_rate": 9.116414553423535e-06, "loss": 0.25882014632225037, "step": 4157 }, { "epoch": 1.1041030407648387, "grad_norm": 1.1615644224212993, "learning_rate": 9.112041046770653e-06, "loss": 0.20510248839855194, "step": 4158 }, { "epoch": 1.1043686097463816, "grad_norm": 1.372282887646487, "learning_rate": 9.107667711305786e-06, "loss": 0.2348058819770813, "step": 4159 }, { "epoch": 1.1046341787279246, "grad_norm": 1.2389958643414019, "learning_rate": 9.10329454787206e-06, "loss": 0.24561384320259094, "step": 4160 }, { "epoch": 1.1048997477094675, "grad_norm": 1.133562757165387, "learning_rate": 9.098921557312573e-06, "loss": 0.23025226593017578, "step": 4161 }, { "epoch": 1.1051653166910105, "grad_norm": 1.2483870007074676, "learning_rate": 9.094548740470375e-06, "loss": 0.2724589705467224, "step": 4162 }, { "epoch": 1.1054308856725534, "grad_norm": 1.2319217483915181, "learning_rate": 9.090176098188504e-06, "loss": 0.25196704268455505, "step": 4163 }, { "epoch": 1.1056964546540964, "grad_norm": 1.0723466269314343, "learning_rate": 9.085803631309953e-06, "loss": 0.22673696279525757, "step": 4164 }, { "epoch": 1.1059620236356393, "grad_norm": 1.3129015386402236, "learning_rate": 9.081431340677679e-06, "loss": 0.23913519084453583, "step": 4165 }, { "epoch": 1.1062275926171823, "grad_norm": 1.3859005835374885, "learning_rate": 9.07705922713461e-06, "loss": 0.2723861336708069, "step": 4166 }, { "epoch": 1.1064931615987252, "grad_norm": 1.15651219284811, "learning_rate": 9.072687291523636e-06, "loss": 0.262167364358902, "step": 4167 }, { "epoch": 1.1067587305802682, "grad_norm": 1.4186208937810438, "learning_rate": 9.068315534687615e-06, "loss": 0.2394658625125885, "step": 4168 }, { "epoch": 1.1070242995618111, "grad_norm": 1.116555661084851, "learning_rate": 9.063943957469373e-06, "loss": 0.2547619938850403, "step": 4169 }, { "epoch": 1.107289868543354, "grad_norm": 1.1242129377429575, "learning_rate": 9.059572560711697e-06, "loss": 0.24057570099830627, "step": 4170 }, { "epoch": 1.107555437524897, "grad_norm": 1.057297781351654, "learning_rate": 9.055201345257331e-06, "loss": 0.21729445457458496, "step": 4171 }, { "epoch": 1.10782100650644, "grad_norm": 1.2310508574302907, "learning_rate": 9.05083031194901e-06, "loss": 0.26590001583099365, "step": 4172 }, { "epoch": 1.108086575487983, "grad_norm": 1.2932563576951384, "learning_rate": 9.04645946162941e-06, "loss": 0.26114848256111145, "step": 4173 }, { "epoch": 1.108352144469526, "grad_norm": 1.1776684059902396, "learning_rate": 9.04208879514118e-06, "loss": 0.2255469262599945, "step": 4174 }, { "epoch": 1.1086177134510689, "grad_norm": 1.1791871226781019, "learning_rate": 9.037718313326932e-06, "loss": 0.2597671151161194, "step": 4175 }, { "epoch": 1.1088832824326118, "grad_norm": 1.1140795273935102, "learning_rate": 9.033348017029247e-06, "loss": 0.24820469319820404, "step": 4176 }, { "epoch": 1.1091488514141548, "grad_norm": 1.2459789693741423, "learning_rate": 9.028977907090661e-06, "loss": 0.23886600136756897, "step": 4177 }, { "epoch": 1.1094144203956977, "grad_norm": 1.091274384086243, "learning_rate": 9.024607984353682e-06, "loss": 0.24204152822494507, "step": 4178 }, { "epoch": 1.1096799893772407, "grad_norm": 1.0934112812518066, "learning_rate": 9.02023824966078e-06, "loss": 0.23246638476848602, "step": 4179 }, { "epoch": 1.1099455583587836, "grad_norm": 1.124332043141092, "learning_rate": 9.015868703854386e-06, "loss": 0.25057342648506165, "step": 4180 }, { "epoch": 1.1102111273403266, "grad_norm": 1.117105393632997, "learning_rate": 9.011499347776902e-06, "loss": 0.2316257357597351, "step": 4181 }, { "epoch": 1.1104766963218695, "grad_norm": 1.4294765240232425, "learning_rate": 9.007130182270685e-06, "loss": 0.24824783205986023, "step": 4182 }, { "epoch": 1.1107422653034125, "grad_norm": 1.1667528236187257, "learning_rate": 9.002761208178059e-06, "loss": 0.25174480676651, "step": 4183 }, { "epoch": 1.1110078342849554, "grad_norm": 1.0615254217045484, "learning_rate": 8.998392426341313e-06, "loss": 0.22364717721939087, "step": 4184 }, { "epoch": 1.1112734032664984, "grad_norm": 1.0478203412338092, "learning_rate": 8.994023837602694e-06, "loss": 0.2205432504415512, "step": 4185 }, { "epoch": 1.1115389722480415, "grad_norm": 1.4181125559874541, "learning_rate": 8.989655442804413e-06, "loss": 0.23303675651550293, "step": 4186 }, { "epoch": 1.1118045412295845, "grad_norm": 1.2558407878646785, "learning_rate": 8.985287242788646e-06, "loss": 0.3003222644329071, "step": 4187 }, { "epoch": 1.1120701102111274, "grad_norm": 1.146183553652687, "learning_rate": 8.980919238397532e-06, "loss": 0.2734413146972656, "step": 4188 }, { "epoch": 1.1123356791926704, "grad_norm": 1.200748942223162, "learning_rate": 8.976551430473166e-06, "loss": 0.24086692929267883, "step": 4189 }, { "epoch": 1.1126012481742134, "grad_norm": 1.2277073829430902, "learning_rate": 8.972183819857618e-06, "loss": 0.2531188130378723, "step": 4190 }, { "epoch": 1.1128668171557563, "grad_norm": 1.1067327267341682, "learning_rate": 8.96781640739291e-06, "loss": 0.25059640407562256, "step": 4191 }, { "epoch": 1.1131323861372993, "grad_norm": 1.1987793097859372, "learning_rate": 8.963449193921023e-06, "loss": 0.22427335381507874, "step": 4192 }, { "epoch": 1.1133979551188422, "grad_norm": 1.1842662472837817, "learning_rate": 8.959082180283906e-06, "loss": 0.28835898637771606, "step": 4193 }, { "epoch": 1.1136635241003852, "grad_norm": 1.1161865281550452, "learning_rate": 8.954715367323468e-06, "loss": 0.23919034004211426, "step": 4194 }, { "epoch": 1.1139290930819281, "grad_norm": 1.186821665962327, "learning_rate": 8.950348755881578e-06, "loss": 0.24583986401557922, "step": 4195 }, { "epoch": 1.114194662063471, "grad_norm": 1.2519292440490923, "learning_rate": 8.94598234680007e-06, "loss": 0.23869696259498596, "step": 4196 }, { "epoch": 1.114460231045014, "grad_norm": 1.1662462204488522, "learning_rate": 8.941616140920734e-06, "loss": 0.2672434449195862, "step": 4197 }, { "epoch": 1.114725800026557, "grad_norm": 1.2253961517889995, "learning_rate": 8.937250139085322e-06, "loss": 0.2660336494445801, "step": 4198 }, { "epoch": 1.1149913690081, "grad_norm": 1.1608224464613695, "learning_rate": 8.932884342135552e-06, "loss": 0.26461780071258545, "step": 4199 }, { "epoch": 1.1152569379896429, "grad_norm": 1.1632580978978435, "learning_rate": 8.928518750913094e-06, "loss": 0.22947481274604797, "step": 4200 }, { "epoch": 1.1155225069711858, "grad_norm": 1.116659758904741, "learning_rate": 8.924153366259584e-06, "loss": 0.22715970873832703, "step": 4201 }, { "epoch": 1.1157880759527288, "grad_norm": 1.3785482068816968, "learning_rate": 8.919788189016618e-06, "loss": 0.2994215190410614, "step": 4202 }, { "epoch": 1.1160536449342717, "grad_norm": 1.158412598714371, "learning_rate": 8.915423220025747e-06, "loss": 0.2290656566619873, "step": 4203 }, { "epoch": 1.1163192139158147, "grad_norm": 1.093685203516635, "learning_rate": 8.911058460128489e-06, "loss": 0.22284844517707825, "step": 4204 }, { "epoch": 1.1165847828973576, "grad_norm": 1.0534371355750514, "learning_rate": 8.906693910166316e-06, "loss": 0.2095392495393753, "step": 4205 }, { "epoch": 1.1168503518789006, "grad_norm": 1.197609739800315, "learning_rate": 8.902329570980665e-06, "loss": 0.25098133087158203, "step": 4206 }, { "epoch": 1.1171159208604435, "grad_norm": 1.1630125842119448, "learning_rate": 8.897965443412923e-06, "loss": 0.24768148362636566, "step": 4207 }, { "epoch": 1.1173814898419865, "grad_norm": 1.1213395777051767, "learning_rate": 8.89360152830445e-06, "loss": 0.22255480289459229, "step": 4208 }, { "epoch": 1.1176470588235294, "grad_norm": 1.2306365389400118, "learning_rate": 8.889237826496551e-06, "loss": 0.23721200227737427, "step": 4209 }, { "epoch": 1.1179126278050724, "grad_norm": 1.1422779685655824, "learning_rate": 8.8848743388305e-06, "loss": 0.25002530217170715, "step": 4210 }, { "epoch": 1.1181781967866153, "grad_norm": 1.2862841308153614, "learning_rate": 8.880511066147524e-06, "loss": 0.27188029885292053, "step": 4211 }, { "epoch": 1.1184437657681583, "grad_norm": 1.1517061730387759, "learning_rate": 8.876148009288813e-06, "loss": 0.23056066036224365, "step": 4212 }, { "epoch": 1.1187093347497012, "grad_norm": 1.172676602980077, "learning_rate": 8.87178516909551e-06, "loss": 0.2336079478263855, "step": 4213 }, { "epoch": 1.1189749037312442, "grad_norm": 1.1868473876345316, "learning_rate": 8.86742254640872e-06, "loss": 0.27449533343315125, "step": 4214 }, { "epoch": 1.1192404727127871, "grad_norm": 1.1500112066365369, "learning_rate": 8.863060142069508e-06, "loss": 0.24714893102645874, "step": 4215 }, { "epoch": 1.11950604169433, "grad_norm": 1.072070573678295, "learning_rate": 8.858697956918886e-06, "loss": 0.2155439257621765, "step": 4216 }, { "epoch": 1.119771610675873, "grad_norm": 1.1798452175680678, "learning_rate": 8.854335991797842e-06, "loss": 0.23189155757427216, "step": 4217 }, { "epoch": 1.120037179657416, "grad_norm": 1.0773206236657924, "learning_rate": 8.849974247547307e-06, "loss": 0.23413527011871338, "step": 4218 }, { "epoch": 1.120302748638959, "grad_norm": 1.1991513784988423, "learning_rate": 8.845612725008173e-06, "loss": 0.2569039463996887, "step": 4219 }, { "epoch": 1.120568317620502, "grad_norm": 1.1795807532964264, "learning_rate": 8.84125142502129e-06, "loss": 0.2699541449546814, "step": 4220 }, { "epoch": 1.1208338866020449, "grad_norm": 1.1092727759218166, "learning_rate": 8.836890348427468e-06, "loss": 0.27172449231147766, "step": 4221 }, { "epoch": 1.1210994555835878, "grad_norm": 1.2315684717645485, "learning_rate": 8.83252949606747e-06, "loss": 0.2839444875717163, "step": 4222 }, { "epoch": 1.1213650245651308, "grad_norm": 1.1676850588618106, "learning_rate": 8.828168868782013e-06, "loss": 0.22782178223133087, "step": 4223 }, { "epoch": 1.1216305935466737, "grad_norm": 1.132889704492098, "learning_rate": 8.82380846741178e-06, "loss": 0.2567726671695709, "step": 4224 }, { "epoch": 1.1218961625282167, "grad_norm": 1.1872540675130212, "learning_rate": 8.8194482927974e-06, "loss": 0.25879523158073425, "step": 4225 }, { "epoch": 1.1221617315097596, "grad_norm": 1.0193477801534692, "learning_rate": 8.815088345779466e-06, "loss": 0.22109058499336243, "step": 4226 }, { "epoch": 1.1224273004913026, "grad_norm": 1.1414592493281657, "learning_rate": 8.810728627198526e-06, "loss": 0.23615925014019012, "step": 4227 }, { "epoch": 1.1226928694728455, "grad_norm": 1.160290266155045, "learning_rate": 8.806369137895081e-06, "loss": 0.2751353085041046, "step": 4228 }, { "epoch": 1.1229584384543885, "grad_norm": 1.2566953981709197, "learning_rate": 8.802009878709587e-06, "loss": 0.2361963391304016, "step": 4229 }, { "epoch": 1.1232240074359314, "grad_norm": 1.186723455251228, "learning_rate": 8.79765085048246e-06, "loss": 0.22435930371284485, "step": 4230 }, { "epoch": 1.1234895764174744, "grad_norm": 1.1759467333820823, "learning_rate": 8.79329205405407e-06, "loss": 0.2355855256319046, "step": 4231 }, { "epoch": 1.1237551453990173, "grad_norm": 1.1450490838951077, "learning_rate": 8.78893349026474e-06, "loss": 0.24127572774887085, "step": 4232 }, { "epoch": 1.1240207143805603, "grad_norm": 1.222656849347683, "learning_rate": 8.784575159954748e-06, "loss": 0.2677989602088928, "step": 4233 }, { "epoch": 1.1242862833621032, "grad_norm": 1.109384474337522, "learning_rate": 8.78021706396433e-06, "loss": 0.2283135950565338, "step": 4234 }, { "epoch": 1.1245518523436462, "grad_norm": 1.1669732456316693, "learning_rate": 8.775859203133678e-06, "loss": 0.2686103582382202, "step": 4235 }, { "epoch": 1.1248174213251891, "grad_norm": 1.3869789172842044, "learning_rate": 8.771501578302934e-06, "loss": 0.2638726234436035, "step": 4236 }, { "epoch": 1.125082990306732, "grad_norm": 1.0752600847920544, "learning_rate": 8.767144190312196e-06, "loss": 0.2517441511154175, "step": 4237 }, { "epoch": 1.125348559288275, "grad_norm": 1.1903096570499558, "learning_rate": 8.762787040001518e-06, "loss": 0.2593642771244049, "step": 4238 }, { "epoch": 1.125614128269818, "grad_norm": 1.123653942868709, "learning_rate": 8.758430128210908e-06, "loss": 0.23758336901664734, "step": 4239 }, { "epoch": 1.125879697251361, "grad_norm": 1.182033088729647, "learning_rate": 8.754073455780327e-06, "loss": 0.2557980716228485, "step": 4240 }, { "epoch": 1.126145266232904, "grad_norm": 1.1182311632466304, "learning_rate": 8.74971702354969e-06, "loss": 0.2484067678451538, "step": 4241 }, { "epoch": 1.1264108352144468, "grad_norm": 1.121886097833982, "learning_rate": 8.745360832358864e-06, "loss": 0.23103098571300507, "step": 4242 }, { "epoch": 1.1266764041959898, "grad_norm": 1.1856800379472048, "learning_rate": 8.741004883047667e-06, "loss": 0.2630731463432312, "step": 4243 }, { "epoch": 1.1269419731775328, "grad_norm": 1.1814851216743405, "learning_rate": 8.736649176455885e-06, "loss": 0.2413114309310913, "step": 4244 }, { "epoch": 1.1272075421590757, "grad_norm": 1.1465608986560651, "learning_rate": 8.732293713423243e-06, "loss": 0.22463169693946838, "step": 4245 }, { "epoch": 1.1274731111406187, "grad_norm": 1.1943136125759177, "learning_rate": 8.727938494789421e-06, "loss": 0.23641429841518402, "step": 4246 }, { "epoch": 1.1277386801221616, "grad_norm": 1.399290186521162, "learning_rate": 8.723583521394054e-06, "loss": 0.2547767162322998, "step": 4247 }, { "epoch": 1.1280042491037048, "grad_norm": 1.1274578262359225, "learning_rate": 8.719228794076733e-06, "loss": 0.25753074884414673, "step": 4248 }, { "epoch": 1.1282698180852477, "grad_norm": 1.2581544322188265, "learning_rate": 8.714874313676992e-06, "loss": 0.30602240562438965, "step": 4249 }, { "epoch": 1.1285353870667907, "grad_norm": 1.3693509289176364, "learning_rate": 8.710520081034328e-06, "loss": 0.28336623311042786, "step": 4250 }, { "epoch": 1.1288009560483336, "grad_norm": 1.179198933472593, "learning_rate": 8.706166096988185e-06, "loss": 0.24065867066383362, "step": 4251 }, { "epoch": 1.1290665250298766, "grad_norm": 1.1350442144429624, "learning_rate": 8.701812362377954e-06, "loss": 0.25674968957901, "step": 4252 }, { "epoch": 1.1293320940114195, "grad_norm": 1.0526431620404462, "learning_rate": 8.697458878042992e-06, "loss": 0.21502923965454102, "step": 4253 }, { "epoch": 1.1295976629929625, "grad_norm": 1.199807552125115, "learning_rate": 8.693105644822598e-06, "loss": 0.26848286390304565, "step": 4254 }, { "epoch": 1.1298632319745054, "grad_norm": 1.1632395937948599, "learning_rate": 8.688752663556022e-06, "loss": 0.24283824861049652, "step": 4255 }, { "epoch": 1.1301288009560484, "grad_norm": 1.231861138079484, "learning_rate": 8.684399935082468e-06, "loss": 0.2511506974697113, "step": 4256 }, { "epoch": 1.1303943699375913, "grad_norm": 1.1293067099587706, "learning_rate": 8.68004746024109e-06, "loss": 0.23932483792304993, "step": 4257 }, { "epoch": 1.1306599389191343, "grad_norm": 1.229437521917496, "learning_rate": 8.675695239870993e-06, "loss": 0.30030694603919983, "step": 4258 }, { "epoch": 1.1309255079006773, "grad_norm": 1.1154596754627621, "learning_rate": 8.671343274811238e-06, "loss": 0.24699059128761292, "step": 4259 }, { "epoch": 1.1311910768822202, "grad_norm": 1.1288414782501015, "learning_rate": 8.666991565900827e-06, "loss": 0.26828041672706604, "step": 4260 }, { "epoch": 1.1314566458637632, "grad_norm": 1.0765132569205758, "learning_rate": 8.662640113978717e-06, "loss": 0.2372082769870758, "step": 4261 }, { "epoch": 1.131722214845306, "grad_norm": 1.2100447285144145, "learning_rate": 8.658288919883824e-06, "loss": 0.26367881894111633, "step": 4262 }, { "epoch": 1.131987783826849, "grad_norm": 1.1035052537421275, "learning_rate": 8.653937984455007e-06, "loss": 0.2287222146987915, "step": 4263 }, { "epoch": 1.132253352808392, "grad_norm": 1.1417963040520365, "learning_rate": 8.649587308531067e-06, "loss": 0.244521826505661, "step": 4264 }, { "epoch": 1.132518921789935, "grad_norm": 1.2243689126496846, "learning_rate": 8.64523689295077e-06, "loss": 0.26912257075309753, "step": 4265 }, { "epoch": 1.132784490771478, "grad_norm": 1.2384832947619873, "learning_rate": 8.64088673855282e-06, "loss": 0.23002780973911285, "step": 4266 }, { "epoch": 1.1330500597530209, "grad_norm": 1.253742603342847, "learning_rate": 8.636536846175878e-06, "loss": 0.2561958432197571, "step": 4267 }, { "epoch": 1.1333156287345638, "grad_norm": 1.2156026453092519, "learning_rate": 8.63218721665855e-06, "loss": 0.25553008913993835, "step": 4268 }, { "epoch": 1.1335811977161068, "grad_norm": 1.1992385112791626, "learning_rate": 8.627837850839398e-06, "loss": 0.1992083340883255, "step": 4269 }, { "epoch": 1.1338467666976497, "grad_norm": 1.3643398602160783, "learning_rate": 8.62348874955692e-06, "loss": 0.23075388371944427, "step": 4270 }, { "epoch": 1.1341123356791927, "grad_norm": 1.1072751580070286, "learning_rate": 8.619139913649582e-06, "loss": 0.23691913485527039, "step": 4271 }, { "epoch": 1.1343779046607356, "grad_norm": 1.2656689209279672, "learning_rate": 8.61479134395578e-06, "loss": 0.2536017894744873, "step": 4272 }, { "epoch": 1.1346434736422786, "grad_norm": 1.2870409796681632, "learning_rate": 8.61044304131387e-06, "loss": 0.3014161288738251, "step": 4273 }, { "epoch": 1.1349090426238215, "grad_norm": 1.1669055614665604, "learning_rate": 8.606095006562156e-06, "loss": 0.26333582401275635, "step": 4274 }, { "epoch": 1.1351746116053645, "grad_norm": 1.2370251285176135, "learning_rate": 8.601747240538883e-06, "loss": 0.23796264827251434, "step": 4275 }, { "epoch": 1.1354401805869074, "grad_norm": 1.1989417705813543, "learning_rate": 8.597399744082251e-06, "loss": 0.23737141489982605, "step": 4276 }, { "epoch": 1.1357057495684504, "grad_norm": 1.1281376384049915, "learning_rate": 8.593052518030407e-06, "loss": 0.21073032915592194, "step": 4277 }, { "epoch": 1.1359713185499933, "grad_norm": 1.2935455290015059, "learning_rate": 8.588705563221444e-06, "loss": 0.2597163915634155, "step": 4278 }, { "epoch": 1.1362368875315363, "grad_norm": 1.137636804234172, "learning_rate": 8.584358880493402e-06, "loss": 0.24541154503822327, "step": 4279 }, { "epoch": 1.1365024565130792, "grad_norm": 1.1331800338594176, "learning_rate": 8.580012470684273e-06, "loss": 0.19294027984142303, "step": 4280 }, { "epoch": 1.1367680254946222, "grad_norm": 1.2387583554091215, "learning_rate": 8.575666334631994e-06, "loss": 0.26909738779067993, "step": 4281 }, { "epoch": 1.1370335944761651, "grad_norm": 1.2850664046416893, "learning_rate": 8.571320473174444e-06, "loss": 0.2550502121448517, "step": 4282 }, { "epoch": 1.137299163457708, "grad_norm": 1.138070930000495, "learning_rate": 8.566974887149461e-06, "loss": 0.2256634682416916, "step": 4283 }, { "epoch": 1.137564732439251, "grad_norm": 1.3289753418379673, "learning_rate": 8.562629577394817e-06, "loss": 0.26154983043670654, "step": 4284 }, { "epoch": 1.137830301420794, "grad_norm": 1.2426566834274124, "learning_rate": 8.558284544748239e-06, "loss": 0.24685145914554596, "step": 4285 }, { "epoch": 1.138095870402337, "grad_norm": 1.177162412641928, "learning_rate": 8.553939790047396e-06, "loss": 0.2584421932697296, "step": 4286 }, { "epoch": 1.13836143938388, "grad_norm": 1.2486541463378953, "learning_rate": 8.549595314129907e-06, "loss": 0.24582788348197937, "step": 4287 }, { "epoch": 1.1386270083654229, "grad_norm": 1.1978925998644077, "learning_rate": 8.545251117833334e-06, "loss": 0.26023977994918823, "step": 4288 }, { "epoch": 1.1388925773469658, "grad_norm": 1.2566090334130535, "learning_rate": 8.54090720199519e-06, "loss": 0.25575515627861023, "step": 4289 }, { "epoch": 1.1391581463285088, "grad_norm": 1.2234599227483165, "learning_rate": 8.53656356745293e-06, "loss": 0.2784460783004761, "step": 4290 }, { "epoch": 1.1394237153100517, "grad_norm": 1.11922615590049, "learning_rate": 8.532220215043953e-06, "loss": 0.24723297357559204, "step": 4291 }, { "epoch": 1.1396892842915947, "grad_norm": 1.1960822646368614, "learning_rate": 8.52787714560561e-06, "loss": 0.24694418907165527, "step": 4292 }, { "epoch": 1.1399548532731376, "grad_norm": 1.2073723964066632, "learning_rate": 8.52353435997519e-06, "loss": 0.19976040720939636, "step": 4293 }, { "epoch": 1.1402204222546806, "grad_norm": 1.0875644999756633, "learning_rate": 8.519191858989932e-06, "loss": 0.21742458641529083, "step": 4294 }, { "epoch": 1.1404859912362235, "grad_norm": 1.2040315384402727, "learning_rate": 8.514849643487018e-06, "loss": 0.26382917165756226, "step": 4295 }, { "epoch": 1.1407515602177665, "grad_norm": 1.3073789721234685, "learning_rate": 8.510507714303577e-06, "loss": 0.30778488516807556, "step": 4296 }, { "epoch": 1.1410171291993096, "grad_norm": 1.0727267660957265, "learning_rate": 8.506166072276681e-06, "loss": 0.20894449949264526, "step": 4297 }, { "epoch": 1.1412826981808526, "grad_norm": 1.2119089915252295, "learning_rate": 8.50182471824335e-06, "loss": 0.2389567494392395, "step": 4298 }, { "epoch": 1.1415482671623955, "grad_norm": 1.0286533711803312, "learning_rate": 8.497483653040545e-06, "loss": 0.20531126856803894, "step": 4299 }, { "epoch": 1.1418138361439385, "grad_norm": 1.2153067733576255, "learning_rate": 8.49314287750517e-06, "loss": 0.2577363848686218, "step": 4300 }, { "epoch": 1.1420794051254815, "grad_norm": 1.211343687077752, "learning_rate": 8.488802392474076e-06, "loss": 0.24225997924804688, "step": 4301 }, { "epoch": 1.1423449741070244, "grad_norm": 1.2698570110354703, "learning_rate": 8.484462198784058e-06, "loss": 0.26494917273521423, "step": 4302 }, { "epoch": 1.1426105430885674, "grad_norm": 1.2988704892129896, "learning_rate": 8.480122297271855e-06, "loss": 0.24903994798660278, "step": 4303 }, { "epoch": 1.1428761120701103, "grad_norm": 1.1681075442122268, "learning_rate": 8.475782688774147e-06, "loss": 0.25291907787323, "step": 4304 }, { "epoch": 1.1431416810516533, "grad_norm": 1.1301459507046017, "learning_rate": 8.47144337412756e-06, "loss": 0.22958475351333618, "step": 4305 }, { "epoch": 1.1434072500331962, "grad_norm": 1.175766015682232, "learning_rate": 8.46710435416866e-06, "loss": 0.2305452972650528, "step": 4306 }, { "epoch": 1.1436728190147392, "grad_norm": 1.2105790475425935, "learning_rate": 8.462765629733965e-06, "loss": 0.25028055906295776, "step": 4307 }, { "epoch": 1.1439383879962821, "grad_norm": 1.2809924485725674, "learning_rate": 8.458427201659926e-06, "loss": 0.24873222410678864, "step": 4308 }, { "epoch": 1.144203956977825, "grad_norm": 1.2345010944986379, "learning_rate": 8.454089070782943e-06, "loss": 0.23396535217761993, "step": 4309 }, { "epoch": 1.144469525959368, "grad_norm": 1.1955062282547588, "learning_rate": 8.449751237939354e-06, "loss": 0.27120494842529297, "step": 4310 }, { "epoch": 1.144735094940911, "grad_norm": 1.182924840045628, "learning_rate": 8.445413703965441e-06, "loss": 0.2734759449958801, "step": 4311 }, { "epoch": 1.145000663922454, "grad_norm": 1.1584309667252248, "learning_rate": 8.441076469697434e-06, "loss": 0.25353512167930603, "step": 4312 }, { "epoch": 1.1452662329039969, "grad_norm": 1.1913513856414861, "learning_rate": 8.436739535971497e-06, "loss": 0.23851020634174347, "step": 4313 }, { "epoch": 1.1455318018855398, "grad_norm": 1.2006838398252668, "learning_rate": 8.432402903623741e-06, "loss": 0.26320093870162964, "step": 4314 }, { "epoch": 1.1457973708670828, "grad_norm": 1.1065666799118796, "learning_rate": 8.428066573490211e-06, "loss": 0.23859955370426178, "step": 4315 }, { "epoch": 1.1460629398486257, "grad_norm": 1.197716796975668, "learning_rate": 8.423730546406911e-06, "loss": 0.2636772096157074, "step": 4316 }, { "epoch": 1.1463285088301687, "grad_norm": 1.2459962038175347, "learning_rate": 8.419394823209773e-06, "loss": 0.2656415104866028, "step": 4317 }, { "epoch": 1.1465940778117116, "grad_norm": 1.2225993542972535, "learning_rate": 8.41505940473467e-06, "loss": 0.2872830033302307, "step": 4318 }, { "epoch": 1.1468596467932546, "grad_norm": 1.4653362839323858, "learning_rate": 8.410724291817422e-06, "loss": 0.229783833026886, "step": 4319 }, { "epoch": 1.1471252157747975, "grad_norm": 4.273944826146497, "learning_rate": 8.406389485293786e-06, "loss": 0.24418675899505615, "step": 4320 }, { "epoch": 1.1473907847563405, "grad_norm": 1.2385236183806463, "learning_rate": 8.402054985999464e-06, "loss": 0.2535584270954132, "step": 4321 }, { "epoch": 1.1476563537378834, "grad_norm": 1.2116145926695832, "learning_rate": 8.397720794770093e-06, "loss": 0.23207828402519226, "step": 4322 }, { "epoch": 1.1479219227194264, "grad_norm": 1.8129143471218838, "learning_rate": 8.393386912441257e-06, "loss": 0.27990391850471497, "step": 4323 }, { "epoch": 1.1481874917009693, "grad_norm": 1.059877272327032, "learning_rate": 8.38905333984847e-06, "loss": 0.2098318189382553, "step": 4324 }, { "epoch": 1.1484530606825123, "grad_norm": 1.1462464609840002, "learning_rate": 8.384720077827204e-06, "loss": 0.25303804874420166, "step": 4325 }, { "epoch": 1.1487186296640552, "grad_norm": 1.0794728099252306, "learning_rate": 8.380387127212858e-06, "loss": 0.23481838405132294, "step": 4326 }, { "epoch": 1.1489841986455982, "grad_norm": 1.1782142095551065, "learning_rate": 8.376054488840771e-06, "loss": 0.24842356145381927, "step": 4327 }, { "epoch": 1.1492497676271411, "grad_norm": 1.136832039914945, "learning_rate": 8.37172216354623e-06, "loss": 0.23927366733551025, "step": 4328 }, { "epoch": 1.149515336608684, "grad_norm": 1.1577812724546028, "learning_rate": 8.367390152164448e-06, "loss": 0.23836453258991241, "step": 4329 }, { "epoch": 1.149780905590227, "grad_norm": 1.2492179140984832, "learning_rate": 8.36305845553059e-06, "loss": 0.2562161982059479, "step": 4330 }, { "epoch": 1.15004647457177, "grad_norm": 1.120151700121908, "learning_rate": 8.358727074479755e-06, "loss": 0.21255920827388763, "step": 4331 }, { "epoch": 1.150312043553313, "grad_norm": 1.1011600870179878, "learning_rate": 8.354396009846985e-06, "loss": 0.24200043082237244, "step": 4332 }, { "epoch": 1.150577612534856, "grad_norm": 1.1644551235897023, "learning_rate": 8.35006526246725e-06, "loss": 0.23582379519939423, "step": 4333 }, { "epoch": 1.1508431815163989, "grad_norm": 1.093546349726341, "learning_rate": 8.34573483317548e-06, "loss": 0.21554499864578247, "step": 4334 }, { "epoch": 1.1511087504979418, "grad_norm": 1.2460346716976907, "learning_rate": 8.341404722806525e-06, "loss": 0.2789759039878845, "step": 4335 }, { "epoch": 1.1513743194794848, "grad_norm": 1.212813860768853, "learning_rate": 8.337074932195175e-06, "loss": 0.24677832424640656, "step": 4336 }, { "epoch": 1.1516398884610277, "grad_norm": 1.2351497128261646, "learning_rate": 8.332745462176166e-06, "loss": 0.28122392296791077, "step": 4337 }, { "epoch": 1.1519054574425707, "grad_norm": 1.2447069177647443, "learning_rate": 8.328416313584169e-06, "loss": 0.23219403624534607, "step": 4338 }, { "epoch": 1.1521710264241136, "grad_norm": 1.1258797089625292, "learning_rate": 8.324087487253792e-06, "loss": 0.19928379356861115, "step": 4339 }, { "epoch": 1.1524365954056566, "grad_norm": 1.2737910298174706, "learning_rate": 8.31975898401958e-06, "loss": 0.27730467915534973, "step": 4340 }, { "epoch": 1.1527021643871995, "grad_norm": 1.3906235348842741, "learning_rate": 8.315430804716022e-06, "loss": 0.25462737679481506, "step": 4341 }, { "epoch": 1.1529677333687425, "grad_norm": 1.1703737499238527, "learning_rate": 8.311102950177533e-06, "loss": 0.2363007366657257, "step": 4342 }, { "epoch": 1.1532333023502854, "grad_norm": 1.2498285131266695, "learning_rate": 8.306775421238482e-06, "loss": 0.2648352384567261, "step": 4343 }, { "epoch": 1.1534988713318284, "grad_norm": 1.394847110607811, "learning_rate": 8.302448218733158e-06, "loss": 0.25645309686660767, "step": 4344 }, { "epoch": 1.1537644403133713, "grad_norm": 1.2178564426244172, "learning_rate": 8.298121343495797e-06, "loss": 0.22962522506713867, "step": 4345 }, { "epoch": 1.1540300092949143, "grad_norm": 1.132403649349265, "learning_rate": 8.293794796360569e-06, "loss": 0.21269623935222626, "step": 4346 }, { "epoch": 1.1542955782764572, "grad_norm": 1.1646919704485588, "learning_rate": 8.289468578161581e-06, "loss": 0.2518436014652252, "step": 4347 }, { "epoch": 1.1545611472580002, "grad_norm": 1.193830808481187, "learning_rate": 8.285142689732877e-06, "loss": 0.2318439483642578, "step": 4348 }, { "epoch": 1.1548267162395431, "grad_norm": 1.0953821300718658, "learning_rate": 8.280817131908438e-06, "loss": 0.2278512567281723, "step": 4349 }, { "epoch": 1.155092285221086, "grad_norm": 1.3446091578493078, "learning_rate": 8.27649190552218e-06, "loss": 0.2521114945411682, "step": 4350 }, { "epoch": 1.155357854202629, "grad_norm": 1.1722019112748296, "learning_rate": 8.272167011407955e-06, "loss": 0.2565760016441345, "step": 4351 }, { "epoch": 1.155623423184172, "grad_norm": 1.3209067321897832, "learning_rate": 8.267842450399552e-06, "loss": 0.2603546679019928, "step": 4352 }, { "epoch": 1.155888992165715, "grad_norm": 1.1697050726438265, "learning_rate": 8.263518223330698e-06, "loss": 0.2175855189561844, "step": 4353 }, { "epoch": 1.156154561147258, "grad_norm": 1.1937135661774867, "learning_rate": 8.25919433103505e-06, "loss": 0.24521774053573608, "step": 4354 }, { "epoch": 1.1564201301288008, "grad_norm": 1.3267445452853517, "learning_rate": 8.254870774346203e-06, "loss": 0.29673823714256287, "step": 4355 }, { "epoch": 1.1566856991103438, "grad_norm": 1.260162624950344, "learning_rate": 8.25054755409769e-06, "loss": 0.26994144916534424, "step": 4356 }, { "epoch": 1.1569512680918868, "grad_norm": 1.1578908727655277, "learning_rate": 8.246224671122974e-06, "loss": 0.2545935809612274, "step": 4357 }, { "epoch": 1.1572168370734297, "grad_norm": 1.1469888258961152, "learning_rate": 8.241902126255458e-06, "loss": 0.23589034378528595, "step": 4358 }, { "epoch": 1.1574824060549727, "grad_norm": 1.229284708155894, "learning_rate": 8.237579920328478e-06, "loss": 0.2617190480232239, "step": 4359 }, { "epoch": 1.1577479750365158, "grad_norm": 1.2741716320060574, "learning_rate": 8.233258054175302e-06, "loss": 0.3092418313026428, "step": 4360 }, { "epoch": 1.1580135440180588, "grad_norm": 1.1377305602079475, "learning_rate": 8.228936528629138e-06, "loss": 0.22873908281326294, "step": 4361 }, { "epoch": 1.1582791129996017, "grad_norm": 1.0592847205754, "learning_rate": 8.224615344523123e-06, "loss": 0.22549089789390564, "step": 4362 }, { "epoch": 1.1585446819811447, "grad_norm": 1.0288617285826194, "learning_rate": 8.22029450269033e-06, "loss": 0.19141459465026855, "step": 4363 }, { "epoch": 1.1588102509626876, "grad_norm": 1.1679333849265336, "learning_rate": 8.21597400396377e-06, "loss": 0.24277547001838684, "step": 4364 }, { "epoch": 1.1590758199442306, "grad_norm": 1.1463053400858605, "learning_rate": 8.21165384917638e-06, "loss": 0.2429513931274414, "step": 4365 }, { "epoch": 1.1593413889257735, "grad_norm": 1.0775583631999657, "learning_rate": 8.207334039161035e-06, "loss": 0.24710172414779663, "step": 4366 }, { "epoch": 1.1596069579073165, "grad_norm": 1.1226530732908067, "learning_rate": 8.203014574750546e-06, "loss": 0.2553783357143402, "step": 4367 }, { "epoch": 1.1598725268888594, "grad_norm": 1.1664625510577165, "learning_rate": 8.198695456777653e-06, "loss": 0.2558436095714569, "step": 4368 }, { "epoch": 1.1601380958704024, "grad_norm": 1.093371491828669, "learning_rate": 8.19437668607503e-06, "loss": 0.20780377089977264, "step": 4369 }, { "epoch": 1.1604036648519453, "grad_norm": 1.0184271240235683, "learning_rate": 8.190058263475288e-06, "loss": 0.22397254407405853, "step": 4370 }, { "epoch": 1.1606692338334883, "grad_norm": 1.1123966470918765, "learning_rate": 8.185740189810967e-06, "loss": 0.2763773798942566, "step": 4371 }, { "epoch": 1.1609348028150313, "grad_norm": 1.234569017856286, "learning_rate": 8.181422465914541e-06, "loss": 0.2801940441131592, "step": 4372 }, { "epoch": 1.1612003717965742, "grad_norm": 1.3078225086374202, "learning_rate": 8.177105092618413e-06, "loss": 0.20949441194534302, "step": 4373 }, { "epoch": 1.1614659407781172, "grad_norm": 1.020800458401727, "learning_rate": 8.172788070754927e-06, "loss": 0.24503354728221893, "step": 4374 }, { "epoch": 1.16173150975966, "grad_norm": 1.212252624187319, "learning_rate": 8.16847140115635e-06, "loss": 0.256147563457489, "step": 4375 }, { "epoch": 1.161997078741203, "grad_norm": 1.079933692504349, "learning_rate": 8.164155084654886e-06, "loss": 0.2178848683834076, "step": 4376 }, { "epoch": 1.162262647722746, "grad_norm": 1.0121292441974634, "learning_rate": 8.159839122082668e-06, "loss": 0.22624582052230835, "step": 4377 }, { "epoch": 1.162528216704289, "grad_norm": 1.0294597777179986, "learning_rate": 8.155523514271764e-06, "loss": 0.2184191346168518, "step": 4378 }, { "epoch": 1.162793785685832, "grad_norm": 1.2825595051682412, "learning_rate": 8.151208262054175e-06, "loss": 0.2623840868473053, "step": 4379 }, { "epoch": 1.1630593546673749, "grad_norm": 1.2529929341607686, "learning_rate": 8.14689336626183e-06, "loss": 0.27181199193000793, "step": 4380 }, { "epoch": 1.1633249236489178, "grad_norm": 1.282994089786083, "learning_rate": 8.142578827726587e-06, "loss": 0.2791554629802704, "step": 4381 }, { "epoch": 1.1635904926304608, "grad_norm": 1.221608581014812, "learning_rate": 8.13826464728024e-06, "loss": 0.2466641068458557, "step": 4382 }, { "epoch": 1.1638560616120037, "grad_norm": 0.9724735599541757, "learning_rate": 8.133950825754511e-06, "loss": 0.1951724737882614, "step": 4383 }, { "epoch": 1.1641216305935467, "grad_norm": 1.2462068833977051, "learning_rate": 8.129637363981056e-06, "loss": 0.2520062029361725, "step": 4384 }, { "epoch": 1.1643871995750896, "grad_norm": 1.230128345167748, "learning_rate": 8.12532426279146e-06, "loss": 0.24101334810256958, "step": 4385 }, { "epoch": 1.1646527685566326, "grad_norm": 1.244671245504639, "learning_rate": 8.121011523017235e-06, "loss": 0.2741190791130066, "step": 4386 }, { "epoch": 1.1649183375381755, "grad_norm": 1.1570746383559662, "learning_rate": 8.116699145489822e-06, "loss": 0.2575281858444214, "step": 4387 }, { "epoch": 1.1651839065197185, "grad_norm": 1.157233381368316, "learning_rate": 8.112387131040608e-06, "loss": 0.2557298243045807, "step": 4388 }, { "epoch": 1.1654494755012614, "grad_norm": 1.2560692108341776, "learning_rate": 8.108075480500892e-06, "loss": 0.27485036849975586, "step": 4389 }, { "epoch": 1.1657150444828044, "grad_norm": 1.2517544472207511, "learning_rate": 8.103764194701909e-06, "loss": 0.26458340883255005, "step": 4390 }, { "epoch": 1.1659806134643473, "grad_norm": 1.2310585386329624, "learning_rate": 8.099453274474827e-06, "loss": 0.2281840592622757, "step": 4391 }, { "epoch": 1.1662461824458903, "grad_norm": 1.2367230880082285, "learning_rate": 8.095142720650739e-06, "loss": 0.24956555664539337, "step": 4392 }, { "epoch": 1.1665117514274332, "grad_norm": 1.109202461245095, "learning_rate": 8.090832534060671e-06, "loss": 0.22619420289993286, "step": 4393 }, { "epoch": 1.1667773204089762, "grad_norm": 1.2922206575995636, "learning_rate": 8.086522715535571e-06, "loss": 0.2780688405036926, "step": 4394 }, { "epoch": 1.1670428893905191, "grad_norm": 1.2699378735794575, "learning_rate": 8.082213265906323e-06, "loss": 0.2600886821746826, "step": 4395 }, { "epoch": 1.167308458372062, "grad_norm": 1.244234758234162, "learning_rate": 8.077904186003736e-06, "loss": 0.25049078464508057, "step": 4396 }, { "epoch": 1.167574027353605, "grad_norm": 1.2327544821473595, "learning_rate": 8.073595476658558e-06, "loss": 0.27745798230171204, "step": 4397 }, { "epoch": 1.167839596335148, "grad_norm": 1.1682547274263488, "learning_rate": 8.069287138701452e-06, "loss": 0.2191929668188095, "step": 4398 }, { "epoch": 1.168105165316691, "grad_norm": 1.297306908163856, "learning_rate": 8.064979172963014e-06, "loss": 0.24307313561439514, "step": 4399 }, { "epoch": 1.168370734298234, "grad_norm": 1.1837345133145987, "learning_rate": 8.060671580273772e-06, "loss": 0.23036238551139832, "step": 4400 }, { "epoch": 1.1686363032797769, "grad_norm": 1.096627050675377, "learning_rate": 8.056364361464176e-06, "loss": 0.2394433617591858, "step": 4401 }, { "epoch": 1.1689018722613198, "grad_norm": 1.183557399538609, "learning_rate": 8.052057517364608e-06, "loss": 0.24099211394786835, "step": 4402 }, { "epoch": 1.1691674412428628, "grad_norm": 1.1293667282926971, "learning_rate": 8.047751048805376e-06, "loss": 0.22036173939704895, "step": 4403 }, { "epoch": 1.1694330102244057, "grad_norm": 1.185484128157471, "learning_rate": 8.043444956616717e-06, "loss": 0.22400429844856262, "step": 4404 }, { "epoch": 1.1696985792059487, "grad_norm": 1.0594769241160498, "learning_rate": 8.039139241628792e-06, "loss": 0.21649131178855896, "step": 4405 }, { "epoch": 1.1699641481874916, "grad_norm": 1.150957898906185, "learning_rate": 8.034833904671698e-06, "loss": 0.23412205278873444, "step": 4406 }, { "epoch": 1.1702297171690346, "grad_norm": 1.2025485392569255, "learning_rate": 8.030528946575453e-06, "loss": 0.23822304606437683, "step": 4407 }, { "epoch": 1.1704952861505775, "grad_norm": 1.2929661052617345, "learning_rate": 8.026224368169998e-06, "loss": 0.29250186681747437, "step": 4408 }, { "epoch": 1.1707608551321207, "grad_norm": 1.4098437716027425, "learning_rate": 8.021920170285205e-06, "loss": 0.26794207096099854, "step": 4409 }, { "epoch": 1.1710264241136636, "grad_norm": 1.2469013694849018, "learning_rate": 8.017616353750874e-06, "loss": 0.2573787271976471, "step": 4410 }, { "epoch": 1.1712919930952066, "grad_norm": 1.1835378975512396, "learning_rate": 8.01331291939673e-06, "loss": 0.2744356691837311, "step": 4411 }, { "epoch": 1.1715575620767495, "grad_norm": 1.4542599881672131, "learning_rate": 8.009009868052424e-06, "loss": 0.2582886815071106, "step": 4412 }, { "epoch": 1.1718231310582925, "grad_norm": 1.1766031171819216, "learning_rate": 8.004707200547534e-06, "loss": 0.2553568482398987, "step": 4413 }, { "epoch": 1.1720887000398355, "grad_norm": 1.144579662849428, "learning_rate": 8.00040491771156e-06, "loss": 0.2670289874076843, "step": 4414 }, { "epoch": 1.1723542690213784, "grad_norm": 1.1520006084984327, "learning_rate": 7.99610302037394e-06, "loss": 0.215460866689682, "step": 4415 }, { "epoch": 1.1726198380029214, "grad_norm": 1.2764670908026035, "learning_rate": 7.991801509364023e-06, "loss": 0.26481571793556213, "step": 4416 }, { "epoch": 1.1728854069844643, "grad_norm": 1.0239999030663398, "learning_rate": 7.98750038551109e-06, "loss": 0.2060776650905609, "step": 4417 }, { "epoch": 1.1731509759660073, "grad_norm": 1.147707044406535, "learning_rate": 7.983199649644349e-06, "loss": 0.2401561588048935, "step": 4418 }, { "epoch": 1.1734165449475502, "grad_norm": 1.3064882111410037, "learning_rate": 7.978899302592927e-06, "loss": 0.2545842230319977, "step": 4419 }, { "epoch": 1.1736821139290932, "grad_norm": 1.199445262296627, "learning_rate": 7.974599345185884e-06, "loss": 0.29925093054771423, "step": 4420 }, { "epoch": 1.1739476829106361, "grad_norm": 1.7583031900565322, "learning_rate": 7.9702997782522e-06, "loss": 0.23944757878780365, "step": 4421 }, { "epoch": 1.174213251892179, "grad_norm": 1.057746400765015, "learning_rate": 7.96600060262078e-06, "loss": 0.23745761811733246, "step": 4422 }, { "epoch": 1.174478820873722, "grad_norm": 1.1164780002442092, "learning_rate": 7.961701819120453e-06, "loss": 0.22170330584049225, "step": 4423 }, { "epoch": 1.174744389855265, "grad_norm": 1.2607094160663312, "learning_rate": 7.95740342857998e-06, "loss": 0.2645890712738037, "step": 4424 }, { "epoch": 1.175009958836808, "grad_norm": 1.2171129338535713, "learning_rate": 7.953105431828032e-06, "loss": 0.25232207775115967, "step": 4425 }, { "epoch": 1.1752755278183509, "grad_norm": 1.20503293579659, "learning_rate": 7.948807829693219e-06, "loss": 0.2656644880771637, "step": 4426 }, { "epoch": 1.1755410967998938, "grad_norm": 1.069230366230624, "learning_rate": 7.944510623004063e-06, "loss": 0.25290653109550476, "step": 4427 }, { "epoch": 1.1758066657814368, "grad_norm": 1.1825821036814732, "learning_rate": 7.940213812589018e-06, "loss": 0.27464741468429565, "step": 4428 }, { "epoch": 1.1760722347629797, "grad_norm": 1.4910942744639428, "learning_rate": 7.935917399276455e-06, "loss": 0.2562064528465271, "step": 4429 }, { "epoch": 1.1763378037445227, "grad_norm": 1.2720371671465533, "learning_rate": 7.931621383894676e-06, "loss": 0.267793208360672, "step": 4430 }, { "epoch": 1.1766033727260656, "grad_norm": 1.1490167098873316, "learning_rate": 7.9273257672719e-06, "loss": 0.23651085793972015, "step": 4431 }, { "epoch": 1.1768689417076086, "grad_norm": 1.0804412076412697, "learning_rate": 7.923030550236267e-06, "loss": 0.23691008985042572, "step": 4432 }, { "epoch": 1.1771345106891515, "grad_norm": 1.1540873295746452, "learning_rate": 7.918735733615852e-06, "loss": 0.24495704472064972, "step": 4433 }, { "epoch": 1.1774000796706945, "grad_norm": 1.4423069413713672, "learning_rate": 7.91444131823864e-06, "loss": 0.25423017144203186, "step": 4434 }, { "epoch": 1.1776656486522374, "grad_norm": 1.1113893983435537, "learning_rate": 7.910147304932548e-06, "loss": 0.22870390117168427, "step": 4435 }, { "epoch": 1.1779312176337804, "grad_norm": 1.0473620824498977, "learning_rate": 7.905853694525405e-06, "loss": 0.23037508130073547, "step": 4436 }, { "epoch": 1.1781967866153233, "grad_norm": 1.2886040363623328, "learning_rate": 7.901560487844973e-06, "loss": 0.31184864044189453, "step": 4437 }, { "epoch": 1.1784623555968663, "grad_norm": 1.302197101799982, "learning_rate": 7.89726768571893e-06, "loss": 0.24140426516532898, "step": 4438 }, { "epoch": 1.1787279245784092, "grad_norm": 1.2134032336682008, "learning_rate": 7.892975288974877e-06, "loss": 0.25602301955223083, "step": 4439 }, { "epoch": 1.1789934935599522, "grad_norm": 1.1868063067331378, "learning_rate": 7.888683298440339e-06, "loss": 0.2717514932155609, "step": 4440 }, { "epoch": 1.1792590625414952, "grad_norm": 1.1670818939848298, "learning_rate": 7.884391714942757e-06, "loss": 0.252475380897522, "step": 4441 }, { "epoch": 1.179524631523038, "grad_norm": 1.161546405047816, "learning_rate": 7.880100539309506e-06, "loss": 0.24777942895889282, "step": 4442 }, { "epoch": 1.179790200504581, "grad_norm": 1.194146333188245, "learning_rate": 7.875809772367867e-06, "loss": 0.25111010670661926, "step": 4443 }, { "epoch": 1.180055769486124, "grad_norm": 1.163412583383914, "learning_rate": 7.87151941494505e-06, "loss": 0.26183217763900757, "step": 4444 }, { "epoch": 1.180321338467667, "grad_norm": 1.2974065116766642, "learning_rate": 7.867229467868189e-06, "loss": 0.27538490295410156, "step": 4445 }, { "epoch": 1.18058690744921, "grad_norm": 1.078206017492716, "learning_rate": 7.862939931964333e-06, "loss": 0.2192106693983078, "step": 4446 }, { "epoch": 1.1808524764307529, "grad_norm": 1.2415747879020278, "learning_rate": 7.858650808060453e-06, "loss": 0.26506057381629944, "step": 4447 }, { "epoch": 1.1811180454122958, "grad_norm": 1.103375758703505, "learning_rate": 7.854362096983443e-06, "loss": 0.2345719337463379, "step": 4448 }, { "epoch": 1.1813836143938388, "grad_norm": 1.1651284585435833, "learning_rate": 7.850073799560114e-06, "loss": 0.21404311060905457, "step": 4449 }, { "epoch": 1.1816491833753817, "grad_norm": 1.1572235550991925, "learning_rate": 7.8457859166172e-06, "loss": 0.24332138895988464, "step": 4450 }, { "epoch": 1.1819147523569247, "grad_norm": 1.1687901862394692, "learning_rate": 7.841498448981354e-06, "loss": 0.25025150179862976, "step": 4451 }, { "epoch": 1.1821803213384676, "grad_norm": 1.167419454587793, "learning_rate": 7.837211397479152e-06, "loss": 0.21918940544128418, "step": 4452 }, { "epoch": 1.1824458903200106, "grad_norm": 1.1517463754639392, "learning_rate": 7.832924762937083e-06, "loss": 0.24976079165935516, "step": 4453 }, { "epoch": 1.1827114593015535, "grad_norm": 1.1165052000707918, "learning_rate": 7.828638546181565e-06, "loss": 0.21146243810653687, "step": 4454 }, { "epoch": 1.1829770282830965, "grad_norm": 1.1110608449393633, "learning_rate": 7.824352748038924e-06, "loss": 0.22921445965766907, "step": 4455 }, { "epoch": 1.1832425972646394, "grad_norm": 1.1833669908026252, "learning_rate": 7.820067369335413e-06, "loss": 0.24401478469371796, "step": 4456 }, { "epoch": 1.1835081662461824, "grad_norm": 1.2543977272663969, "learning_rate": 7.815782410897209e-06, "loss": 0.2717207074165344, "step": 4457 }, { "epoch": 1.1837737352277253, "grad_norm": 1.0934075655453726, "learning_rate": 7.81149787355039e-06, "loss": 0.20752058923244476, "step": 4458 }, { "epoch": 1.1840393042092683, "grad_norm": 1.3448722481333402, "learning_rate": 7.807213758120965e-06, "loss": 0.31095850467681885, "step": 4459 }, { "epoch": 1.1843048731908112, "grad_norm": 1.1769654791590503, "learning_rate": 7.802930065434874e-06, "loss": 0.23761102557182312, "step": 4460 }, { "epoch": 1.1845704421723542, "grad_norm": 1.3225327364557968, "learning_rate": 7.798646796317952e-06, "loss": 0.2509460151195526, "step": 4461 }, { "epoch": 1.1848360111538971, "grad_norm": 1.472525937697874, "learning_rate": 7.794363951595966e-06, "loss": 0.25903213024139404, "step": 4462 }, { "epoch": 1.18510158013544, "grad_norm": 1.1904413554334654, "learning_rate": 7.790081532094596e-06, "loss": 0.23304736614227295, "step": 4463 }, { "epoch": 1.185367149116983, "grad_norm": 1.311875765456408, "learning_rate": 7.785799538639445e-06, "loss": 0.28707265853881836, "step": 4464 }, { "epoch": 1.185632718098526, "grad_norm": 1.0202920254712324, "learning_rate": 7.781517972056028e-06, "loss": 0.20282745361328125, "step": 4465 }, { "epoch": 1.185898287080069, "grad_norm": 1.2606153791729335, "learning_rate": 7.777236833169782e-06, "loss": 0.24056631326675415, "step": 4466 }, { "epoch": 1.186163856061612, "grad_norm": 1.4946194524955894, "learning_rate": 7.772956122806058e-06, "loss": 0.2677255868911743, "step": 4467 }, { "epoch": 1.1864294250431549, "grad_norm": 1.2681064192856966, "learning_rate": 7.768675841790124e-06, "loss": 0.22032876312732697, "step": 4468 }, { "epoch": 1.1866949940246978, "grad_norm": 1.3138325978828467, "learning_rate": 7.764395990947177e-06, "loss": 0.2980336546897888, "step": 4469 }, { "epoch": 1.1869605630062408, "grad_norm": 1.2624280680532078, "learning_rate": 7.760116571102314e-06, "loss": 0.2562638521194458, "step": 4470 }, { "epoch": 1.1872261319877837, "grad_norm": 1.2207997545500016, "learning_rate": 7.755837583080561e-06, "loss": 0.262576699256897, "step": 4471 }, { "epoch": 1.1874917009693267, "grad_norm": 1.2672893771429377, "learning_rate": 7.751559027706858e-06, "loss": 0.2654029130935669, "step": 4472 }, { "epoch": 1.1877572699508698, "grad_norm": 1.2996444615622489, "learning_rate": 7.747280905806051e-06, "loss": 0.2946662902832031, "step": 4473 }, { "epoch": 1.1880228389324128, "grad_norm": 1.193974235945654, "learning_rate": 7.743003218202921e-06, "loss": 0.25140905380249023, "step": 4474 }, { "epoch": 1.1882884079139557, "grad_norm": 1.2240016583398612, "learning_rate": 7.738725965722149e-06, "loss": 0.2601654529571533, "step": 4475 }, { "epoch": 1.1885539768954987, "grad_norm": 1.9675422662507516, "learning_rate": 7.73444914918834e-06, "loss": 0.2639954090118408, "step": 4476 }, { "epoch": 1.1888195458770416, "grad_norm": 1.174151986382161, "learning_rate": 7.730172769426014e-06, "loss": 0.23391291499137878, "step": 4477 }, { "epoch": 1.1890851148585846, "grad_norm": 2.254589386622623, "learning_rate": 7.725896827259613e-06, "loss": 0.2912144958972931, "step": 4478 }, { "epoch": 1.1893506838401275, "grad_norm": 1.0905445077469016, "learning_rate": 7.72162132351348e-06, "loss": 0.23867549002170563, "step": 4479 }, { "epoch": 1.1896162528216705, "grad_norm": 1.1124853975848743, "learning_rate": 7.717346259011888e-06, "loss": 0.22434742748737335, "step": 4480 }, { "epoch": 1.1898818218032134, "grad_norm": 1.2440839352544732, "learning_rate": 7.713071634579017e-06, "loss": 0.2504398822784424, "step": 4481 }, { "epoch": 1.1901473907847564, "grad_norm": 1.1759629506533034, "learning_rate": 7.70879745103896e-06, "loss": 0.24887195229530334, "step": 4482 }, { "epoch": 1.1904129597662994, "grad_norm": 1.2603454999195398, "learning_rate": 7.704523709215732e-06, "loss": 0.2730141580104828, "step": 4483 }, { "epoch": 1.1906785287478423, "grad_norm": 1.2285382464481551, "learning_rate": 7.70025040993326e-06, "loss": 0.22197315096855164, "step": 4484 }, { "epoch": 1.1909440977293853, "grad_norm": 1.2004564929121084, "learning_rate": 7.695977554015387e-06, "loss": 0.2852731943130493, "step": 4485 }, { "epoch": 1.1912096667109282, "grad_norm": 1.2815387200597224, "learning_rate": 7.691705142285863e-06, "loss": 0.2577238976955414, "step": 4486 }, { "epoch": 1.1914752356924712, "grad_norm": 1.066499567502605, "learning_rate": 7.68743317556837e-06, "loss": 0.23510503768920898, "step": 4487 }, { "epoch": 1.191740804674014, "grad_norm": 1.557745891642732, "learning_rate": 7.683161654686486e-06, "loss": 0.2553985118865967, "step": 4488 }, { "epoch": 1.192006373655557, "grad_norm": 1.1965147913981737, "learning_rate": 7.67889058046371e-06, "loss": 0.2778642475605011, "step": 4489 }, { "epoch": 1.1922719426371, "grad_norm": 1.1622951487110165, "learning_rate": 7.674619953723455e-06, "loss": 0.24740618467330933, "step": 4490 }, { "epoch": 1.192537511618643, "grad_norm": 1.1598996003550786, "learning_rate": 7.670349775289047e-06, "loss": 0.2453901171684265, "step": 4491 }, { "epoch": 1.192803080600186, "grad_norm": 1.1444233008842855, "learning_rate": 7.666080045983726e-06, "loss": 0.2336064875125885, "step": 4492 }, { "epoch": 1.1930686495817289, "grad_norm": 1.18047841753512, "learning_rate": 7.661810766630648e-06, "loss": 0.2375800907611847, "step": 4493 }, { "epoch": 1.1933342185632718, "grad_norm": 1.1241813274405275, "learning_rate": 7.657541938052876e-06, "loss": 0.21272733807563782, "step": 4494 }, { "epoch": 1.1935997875448148, "grad_norm": 1.1531042348696576, "learning_rate": 7.65327356107339e-06, "loss": 0.26597708463668823, "step": 4495 }, { "epoch": 1.1938653565263577, "grad_norm": 1.1715955143508257, "learning_rate": 7.649005636515088e-06, "loss": 0.267806738615036, "step": 4496 }, { "epoch": 1.1941309255079007, "grad_norm": 1.1812545197713797, "learning_rate": 7.64473816520077e-06, "loss": 0.2260194569826126, "step": 4497 }, { "epoch": 1.1943964944894436, "grad_norm": 1.298416110387325, "learning_rate": 7.640471147953157e-06, "loss": 0.24523532390594482, "step": 4498 }, { "epoch": 1.1946620634709866, "grad_norm": 1.1020194586485352, "learning_rate": 7.636204585594879e-06, "loss": 0.23230910301208496, "step": 4499 }, { "epoch": 1.1949276324525295, "grad_norm": 1.1141631171804318, "learning_rate": 7.631938478948478e-06, "loss": 0.23322705924510956, "step": 4500 }, { "epoch": 1.1951932014340725, "grad_norm": 1.3011711597097497, "learning_rate": 7.6276728288364086e-06, "loss": 0.25614386796951294, "step": 4501 }, { "epoch": 1.1954587704156154, "grad_norm": 1.2188058731839337, "learning_rate": 7.62340763608104e-06, "loss": 0.22921821475028992, "step": 4502 }, { "epoch": 1.1957243393971584, "grad_norm": 1.1538976889459698, "learning_rate": 7.619142901504649e-06, "loss": 0.25528913736343384, "step": 4503 }, { "epoch": 1.1959899083787013, "grad_norm": 1.1730292690453887, "learning_rate": 7.614878625929425e-06, "loss": 0.2528502643108368, "step": 4504 }, { "epoch": 1.1962554773602443, "grad_norm": 1.2636827238002009, "learning_rate": 7.610614810177474e-06, "loss": 0.2519027590751648, "step": 4505 }, { "epoch": 1.1965210463417872, "grad_norm": 1.3563109831905724, "learning_rate": 7.606351455070808e-06, "loss": 0.2895655333995819, "step": 4506 }, { "epoch": 1.1967866153233302, "grad_norm": 1.2317858842714817, "learning_rate": 7.6020885614313515e-06, "loss": 0.24588793516159058, "step": 4507 }, { "epoch": 1.1970521843048731, "grad_norm": 1.3148149004868621, "learning_rate": 7.597826130080938e-06, "loss": 0.2996830940246582, "step": 4508 }, { "epoch": 1.197317753286416, "grad_norm": 1.2289139982746875, "learning_rate": 7.593564161841318e-06, "loss": 0.2654343247413635, "step": 4509 }, { "epoch": 1.197583322267959, "grad_norm": 1.2104660234722762, "learning_rate": 7.589302657534144e-06, "loss": 0.24949109554290771, "step": 4510 }, { "epoch": 1.197848891249502, "grad_norm": 1.1785955409512114, "learning_rate": 7.5850416179809886e-06, "loss": 0.23205731809139252, "step": 4511 }, { "epoch": 1.198114460231045, "grad_norm": 3.351023225066079, "learning_rate": 7.580781044003324e-06, "loss": 0.232904314994812, "step": 4512 }, { "epoch": 1.198380029212588, "grad_norm": 1.0569352775404934, "learning_rate": 7.576520936422542e-06, "loss": 0.25071364641189575, "step": 4513 }, { "epoch": 1.1986455981941309, "grad_norm": 1.3613643273685416, "learning_rate": 7.572261296059944e-06, "loss": 0.2574467658996582, "step": 4514 }, { "epoch": 1.1989111671756738, "grad_norm": 1.1866331959407248, "learning_rate": 7.568002123736735e-06, "loss": 0.23134055733680725, "step": 4515 }, { "epoch": 1.1991767361572168, "grad_norm": 1.093870770411857, "learning_rate": 7.5637434202740334e-06, "loss": 0.22163332998752594, "step": 4516 }, { "epoch": 1.1994423051387597, "grad_norm": 1.182308432196374, "learning_rate": 7.559485186492868e-06, "loss": 0.2665749788284302, "step": 4517 }, { "epoch": 1.1997078741203027, "grad_norm": 1.0758759053634162, "learning_rate": 7.555227423214174e-06, "loss": 0.2237103432416916, "step": 4518 }, { "epoch": 1.1999734431018456, "grad_norm": 1.2216323349035507, "learning_rate": 7.550970131258801e-06, "loss": 0.23287461698055267, "step": 4519 }, { "epoch": 1.2002390120833886, "grad_norm": 1.1237156855078405, "learning_rate": 7.5467133114475025e-06, "loss": 0.2296323925256729, "step": 4520 }, { "epoch": 1.2005045810649315, "grad_norm": 1.0900498705064874, "learning_rate": 7.542456964600944e-06, "loss": 0.21358339488506317, "step": 4521 }, { "epoch": 1.2007701500464747, "grad_norm": 1.2516498821908515, "learning_rate": 7.5382010915396954e-06, "loss": 0.2355872094631195, "step": 4522 }, { "epoch": 1.2010357190280176, "grad_norm": 1.2039029354448443, "learning_rate": 7.5339456930842455e-06, "loss": 0.25397661328315735, "step": 4523 }, { "epoch": 1.2013012880095606, "grad_norm": 1.1762399479435963, "learning_rate": 7.52969077005498e-06, "loss": 0.26658257842063904, "step": 4524 }, { "epoch": 1.2015668569911035, "grad_norm": 1.1889790145170218, "learning_rate": 7.525436323272201e-06, "loss": 0.27207136154174805, "step": 4525 }, { "epoch": 1.2018324259726465, "grad_norm": 1.1867510172835751, "learning_rate": 7.521182353556114e-06, "loss": 0.25889313220977783, "step": 4526 }, { "epoch": 1.2020979949541895, "grad_norm": 1.3095753328357655, "learning_rate": 7.516928861726834e-06, "loss": 0.272185742855072, "step": 4527 }, { "epoch": 1.2023635639357324, "grad_norm": 1.156226984644319, "learning_rate": 7.512675848604385e-06, "loss": 0.25371503829956055, "step": 4528 }, { "epoch": 1.2026291329172754, "grad_norm": 1.2028831911106082, "learning_rate": 7.5084233150086964e-06, "loss": 0.2554902732372284, "step": 4529 }, { "epoch": 1.2028947018988183, "grad_norm": 1.1714528701705076, "learning_rate": 7.50417126175961e-06, "loss": 0.22007369995117188, "step": 4530 }, { "epoch": 1.2031602708803613, "grad_norm": 1.2057968317835202, "learning_rate": 7.499919689676861e-06, "loss": 0.27492445707321167, "step": 4531 }, { "epoch": 1.2034258398619042, "grad_norm": 1.1229280499713745, "learning_rate": 7.4956685995801144e-06, "loss": 0.2321021854877472, "step": 4532 }, { "epoch": 1.2036914088434472, "grad_norm": 1.1735641467762012, "learning_rate": 7.491417992288927e-06, "loss": 0.25410759449005127, "step": 4533 }, { "epoch": 1.2039569778249901, "grad_norm": 1.0638924164212193, "learning_rate": 7.487167868622765e-06, "loss": 0.2080576866865158, "step": 4534 }, { "epoch": 1.204222546806533, "grad_norm": 1.115815492341061, "learning_rate": 7.482918229401001e-06, "loss": 0.2333327978849411, "step": 4535 }, { "epoch": 1.204488115788076, "grad_norm": 1.1999209092526242, "learning_rate": 7.478669075442917e-06, "loss": 0.23160479962825775, "step": 4536 }, { "epoch": 1.204753684769619, "grad_norm": 1.2136747509439494, "learning_rate": 7.474420407567699e-06, "loss": 0.2627696394920349, "step": 4537 }, { "epoch": 1.205019253751162, "grad_norm": 1.0694648198090266, "learning_rate": 7.470172226594441e-06, "loss": 0.18656940758228302, "step": 4538 }, { "epoch": 1.2052848227327049, "grad_norm": 1.2245138263513848, "learning_rate": 7.465924533342139e-06, "loss": 0.2749083340167999, "step": 4539 }, { "epoch": 1.2055503917142478, "grad_norm": 1.3944907322006155, "learning_rate": 7.461677328629696e-06, "loss": 0.27484387159347534, "step": 4540 }, { "epoch": 1.2058159606957908, "grad_norm": 1.254197138569937, "learning_rate": 7.457430613275934e-06, "loss": 0.26357588171958923, "step": 4541 }, { "epoch": 1.2060815296773337, "grad_norm": 1.2004336778554112, "learning_rate": 7.453184388099559e-06, "loss": 0.23495343327522278, "step": 4542 }, { "epoch": 1.2063470986588767, "grad_norm": 1.2123259782755003, "learning_rate": 7.4489386539192e-06, "loss": 0.253970205783844, "step": 4543 }, { "epoch": 1.2066126676404196, "grad_norm": 1.1523820852778563, "learning_rate": 7.444693411553383e-06, "loss": 0.24919062852859497, "step": 4544 }, { "epoch": 1.2068782366219626, "grad_norm": 1.2181666045865969, "learning_rate": 7.440448661820536e-06, "loss": 0.24373450875282288, "step": 4545 }, { "epoch": 1.2071438056035055, "grad_norm": 1.3762501451890354, "learning_rate": 7.436204405539002e-06, "loss": 0.24739482998847961, "step": 4546 }, { "epoch": 1.2074093745850485, "grad_norm": 1.2982074074943253, "learning_rate": 7.4319606435270195e-06, "loss": 0.27041494846343994, "step": 4547 }, { "epoch": 1.2076749435665914, "grad_norm": 1.1359942984852744, "learning_rate": 7.427717376602739e-06, "loss": 0.23243938386440277, "step": 4548 }, { "epoch": 1.2079405125481344, "grad_norm": 1.3118758722508392, "learning_rate": 7.423474605584206e-06, "loss": 0.2346343696117401, "step": 4549 }, { "epoch": 1.2082060815296773, "grad_norm": 1.1819354183035133, "learning_rate": 7.419232331289385e-06, "loss": 0.2587367296218872, "step": 4550 }, { "epoch": 1.2084716505112203, "grad_norm": 1.195922174249915, "learning_rate": 7.414990554536134e-06, "loss": 0.2552938461303711, "step": 4551 }, { "epoch": 1.2087372194927632, "grad_norm": 1.2688216449772127, "learning_rate": 7.410749276142221e-06, "loss": 0.2693648040294647, "step": 4552 }, { "epoch": 1.2090027884743062, "grad_norm": 1.1997939452425357, "learning_rate": 7.406508496925307e-06, "loss": 0.21543294191360474, "step": 4553 }, { "epoch": 1.2092683574558492, "grad_norm": 1.2385892147047024, "learning_rate": 7.402268217702966e-06, "loss": 0.2913009524345398, "step": 4554 }, { "epoch": 1.209533926437392, "grad_norm": 1.0671356100150298, "learning_rate": 7.398028439292675e-06, "loss": 0.23279520869255066, "step": 4555 }, { "epoch": 1.209799495418935, "grad_norm": 1.0946575444558022, "learning_rate": 7.393789162511815e-06, "loss": 0.25086939334869385, "step": 4556 }, { "epoch": 1.210065064400478, "grad_norm": 1.0964890001200192, "learning_rate": 7.389550388177662e-06, "loss": 0.21704714000225067, "step": 4557 }, { "epoch": 1.210330633382021, "grad_norm": 1.126699331966135, "learning_rate": 7.3853121171074115e-06, "loss": 0.230219304561615, "step": 4558 }, { "epoch": 1.210596202363564, "grad_norm": 1.1809668678269754, "learning_rate": 7.381074350118149e-06, "loss": 0.26073017716407776, "step": 4559 }, { "epoch": 1.2108617713451069, "grad_norm": 1.2065072762311946, "learning_rate": 7.376837088026863e-06, "loss": 0.25186216831207275, "step": 4560 }, { "epoch": 1.2111273403266498, "grad_norm": 1.3978877577958326, "learning_rate": 7.372600331650449e-06, "loss": 0.28719040751457214, "step": 4561 }, { "epoch": 1.2113929093081928, "grad_norm": 1.16073083909203, "learning_rate": 7.368364081805704e-06, "loss": 0.23972755670547485, "step": 4562 }, { "epoch": 1.2116584782897357, "grad_norm": 1.096919114864748, "learning_rate": 7.364128339309326e-06, "loss": 0.23053769767284393, "step": 4563 }, { "epoch": 1.2119240472712787, "grad_norm": 1.2910615683085556, "learning_rate": 7.359893104977917e-06, "loss": 0.25124189257621765, "step": 4564 }, { "epoch": 1.2121896162528216, "grad_norm": 1.1863697592423188, "learning_rate": 7.355658379627981e-06, "loss": 0.2243686318397522, "step": 4565 }, { "epoch": 1.2124551852343646, "grad_norm": 1.244591161752608, "learning_rate": 7.3514241640759175e-06, "loss": 0.26047343015670776, "step": 4566 }, { "epoch": 1.2127207542159075, "grad_norm": 1.1775978450301259, "learning_rate": 7.3471904591380434e-06, "loss": 0.23603469133377075, "step": 4567 }, { "epoch": 1.2129863231974505, "grad_norm": 1.2261707581126196, "learning_rate": 7.342957265630561e-06, "loss": 0.31320711970329285, "step": 4568 }, { "epoch": 1.2132518921789934, "grad_norm": 1.22464158648852, "learning_rate": 7.338724584369581e-06, "loss": 0.22159788012504578, "step": 4569 }, { "epoch": 1.2135174611605364, "grad_norm": 1.1206153371836056, "learning_rate": 7.334492416171114e-06, "loss": 0.21992239356040955, "step": 4570 }, { "epoch": 1.2137830301420793, "grad_norm": 1.3229661253734524, "learning_rate": 7.330260761851071e-06, "loss": 0.20708827674388885, "step": 4571 }, { "epoch": 1.2140485991236223, "grad_norm": 1.1899658624900848, "learning_rate": 7.326029622225269e-06, "loss": 0.2846507132053375, "step": 4572 }, { "epoch": 1.2143141681051652, "grad_norm": 1.2218224134688922, "learning_rate": 7.321798998109417e-06, "loss": 0.24903801083564758, "step": 4573 }, { "epoch": 1.2145797370867082, "grad_norm": 1.1817295734811926, "learning_rate": 7.317568890319134e-06, "loss": 0.23426681756973267, "step": 4574 }, { "epoch": 1.2148453060682511, "grad_norm": 1.1685993771040228, "learning_rate": 7.31333929966993e-06, "loss": 0.2374490350484848, "step": 4575 }, { "epoch": 1.215110875049794, "grad_norm": 1.13335327598736, "learning_rate": 7.309110226977223e-06, "loss": 0.24035832285881042, "step": 4576 }, { "epoch": 1.215376444031337, "grad_norm": 1.2837405582571324, "learning_rate": 7.30488167305633e-06, "loss": 0.21872258186340332, "step": 4577 }, { "epoch": 1.21564201301288, "grad_norm": 1.3425258296129825, "learning_rate": 7.300653638722463e-06, "loss": 0.2940255403518677, "step": 4578 }, { "epoch": 1.215907581994423, "grad_norm": 1.1158795437619367, "learning_rate": 7.29642612479074e-06, "loss": 0.20970892906188965, "step": 4579 }, { "epoch": 1.216173150975966, "grad_norm": 1.1571301789790744, "learning_rate": 7.292199132076175e-06, "loss": 0.21217449009418488, "step": 4580 }, { "epoch": 1.2164387199575089, "grad_norm": 1.2448503896532135, "learning_rate": 7.28797266139368e-06, "loss": 0.2463359832763672, "step": 4581 }, { "epoch": 1.2167042889390518, "grad_norm": 1.132320428820701, "learning_rate": 7.283746713558071e-06, "loss": 0.21921415627002716, "step": 4582 }, { "epoch": 1.2169698579205948, "grad_norm": 1.2437376760058587, "learning_rate": 7.279521289384059e-06, "loss": 0.2412380576133728, "step": 4583 }, { "epoch": 1.2172354269021377, "grad_norm": 1.180878934188553, "learning_rate": 7.275296389686258e-06, "loss": 0.2558564245700836, "step": 4584 }, { "epoch": 1.2175009958836809, "grad_norm": 1.2566060880081307, "learning_rate": 7.271072015279179e-06, "loss": 0.2548869848251343, "step": 4585 }, { "epoch": 1.2177665648652238, "grad_norm": 1.4407566508510072, "learning_rate": 7.2668481669772304e-06, "loss": 0.22183407843112946, "step": 4586 }, { "epoch": 1.2180321338467668, "grad_norm": 1.20165829214997, "learning_rate": 7.262624845594721e-06, "loss": 0.24722473323345184, "step": 4587 }, { "epoch": 1.2182977028283097, "grad_norm": 1.190564524584547, "learning_rate": 7.258402051945858e-06, "loss": 0.2678988575935364, "step": 4588 }, { "epoch": 1.2185632718098527, "grad_norm": 1.187777405395345, "learning_rate": 7.2541797868447435e-06, "loss": 0.2116469144821167, "step": 4589 }, { "epoch": 1.2188288407913956, "grad_norm": 1.2500071795758152, "learning_rate": 7.249958051105383e-06, "loss": 0.23897933959960938, "step": 4590 }, { "epoch": 1.2190944097729386, "grad_norm": 1.2473885744661077, "learning_rate": 7.245736845541676e-06, "loss": 0.25434061884880066, "step": 4591 }, { "epoch": 1.2193599787544815, "grad_norm": 1.2108382272450464, "learning_rate": 7.2415161709674235e-06, "loss": 0.2602628469467163, "step": 4592 }, { "epoch": 1.2196255477360245, "grad_norm": 3.1633443202169764, "learning_rate": 7.2372960281963165e-06, "loss": 0.2519065737724304, "step": 4593 }, { "epoch": 1.2198911167175674, "grad_norm": 1.550903602515833, "learning_rate": 7.233076418041954e-06, "loss": 0.24404102563858032, "step": 4594 }, { "epoch": 1.2201566856991104, "grad_norm": 1.1561711817096534, "learning_rate": 7.228857341317825e-06, "loss": 0.23633979260921478, "step": 4595 }, { "epoch": 1.2204222546806534, "grad_norm": 1.2128002082313463, "learning_rate": 7.224638798837319e-06, "loss": 0.2513781189918518, "step": 4596 }, { "epoch": 1.2206878236621963, "grad_norm": 1.2409533600026899, "learning_rate": 7.220420791413721e-06, "loss": 0.23270189762115479, "step": 4597 }, { "epoch": 1.2209533926437393, "grad_norm": 1.2503409564498669, "learning_rate": 7.21620331986021e-06, "loss": 0.2770010530948639, "step": 4598 }, { "epoch": 1.2212189616252822, "grad_norm": 1.1284522462719728, "learning_rate": 7.2119863849898684e-06, "loss": 0.2312745451927185, "step": 4599 }, { "epoch": 1.2214845306068252, "grad_norm": 1.2725314186948387, "learning_rate": 7.20776998761567e-06, "loss": 0.231276735663414, "step": 4600 }, { "epoch": 1.221750099588368, "grad_norm": 1.1715742737590393, "learning_rate": 7.203554128550486e-06, "loss": 0.24927708506584167, "step": 4601 }, { "epoch": 1.222015668569911, "grad_norm": 1.1138441718661785, "learning_rate": 7.199338808607084e-06, "loss": 0.23033373057842255, "step": 4602 }, { "epoch": 1.222281237551454, "grad_norm": 1.2545098885673684, "learning_rate": 7.195124028598131e-06, "loss": 0.24003425240516663, "step": 4603 }, { "epoch": 1.222546806532997, "grad_norm": 1.1872708193619057, "learning_rate": 7.190909789336185e-06, "loss": 0.22648809850215912, "step": 4604 }, { "epoch": 1.22281237551454, "grad_norm": 1.2511860493227276, "learning_rate": 7.1866960916337006e-06, "loss": 0.2605816125869751, "step": 4605 }, { "epoch": 1.2230779444960829, "grad_norm": 1.1424629632361756, "learning_rate": 7.1824829363030305e-06, "loss": 0.21549202501773834, "step": 4606 }, { "epoch": 1.2233435134776258, "grad_norm": 1.1532084986944064, "learning_rate": 7.17827032415642e-06, "loss": 0.23113220930099487, "step": 4607 }, { "epoch": 1.2236090824591688, "grad_norm": 1.1649312720163907, "learning_rate": 7.174058256006012e-06, "loss": 0.22736643254756927, "step": 4608 }, { "epoch": 1.2238746514407117, "grad_norm": 1.172011833362534, "learning_rate": 7.169846732663845e-06, "loss": 0.2686663866043091, "step": 4609 }, { "epoch": 1.2241402204222547, "grad_norm": 1.1555217624379808, "learning_rate": 7.1656357549418485e-06, "loss": 0.1980462670326233, "step": 4610 }, { "epoch": 1.2244057894037976, "grad_norm": 1.2401629806715768, "learning_rate": 7.161425323651846e-06, "loss": 0.22997641563415527, "step": 4611 }, { "epoch": 1.2246713583853406, "grad_norm": 1.3367939845671126, "learning_rate": 7.157215439605567e-06, "loss": 0.28781357407569885, "step": 4612 }, { "epoch": 1.2249369273668835, "grad_norm": 1.2895382897388425, "learning_rate": 7.153006103614624e-06, "loss": 0.22558270394802094, "step": 4613 }, { "epoch": 1.2252024963484265, "grad_norm": 1.1860196927831441, "learning_rate": 7.148797316490527e-06, "loss": 0.2435922622680664, "step": 4614 }, { "epoch": 1.2254680653299694, "grad_norm": 1.2828543438888096, "learning_rate": 7.14458907904468e-06, "loss": 0.27840936183929443, "step": 4615 }, { "epoch": 1.2257336343115124, "grad_norm": 1.2350405670943831, "learning_rate": 7.1403813920883825e-06, "loss": 0.2775651812553406, "step": 4616 }, { "epoch": 1.2259992032930553, "grad_norm": 1.2738452228129284, "learning_rate": 7.136174256432828e-06, "loss": 0.2430988848209381, "step": 4617 }, { "epoch": 1.2262647722745983, "grad_norm": 1.0618083363199646, "learning_rate": 7.131967672889101e-06, "loss": 0.2018759697675705, "step": 4618 }, { "epoch": 1.2265303412561412, "grad_norm": 1.2320094058432127, "learning_rate": 7.127761642268179e-06, "loss": 0.25314825773239136, "step": 4619 }, { "epoch": 1.2267959102376842, "grad_norm": 1.409693024729639, "learning_rate": 7.123556165380935e-06, "loss": 0.2542746365070343, "step": 4620 }, { "epoch": 1.2270614792192271, "grad_norm": 1.2571649384815597, "learning_rate": 7.119351243038142e-06, "loss": 0.2912300229072571, "step": 4621 }, { "epoch": 1.22732704820077, "grad_norm": 1.3877507856901592, "learning_rate": 7.115146876050454e-06, "loss": 0.26893284916877747, "step": 4622 }, { "epoch": 1.227592617182313, "grad_norm": 1.3833428208823224, "learning_rate": 7.110943065228425e-06, "loss": 0.2711215317249298, "step": 4623 }, { "epoch": 1.227858186163856, "grad_norm": 1.346165350849743, "learning_rate": 7.106739811382501e-06, "loss": 0.25530266761779785, "step": 4624 }, { "epoch": 1.228123755145399, "grad_norm": 1.268299981159743, "learning_rate": 7.102537115323018e-06, "loss": 0.2547178864479065, "step": 4625 }, { "epoch": 1.228389324126942, "grad_norm": 1.5802606545447795, "learning_rate": 7.0983349778602064e-06, "loss": 0.27973634004592896, "step": 4626 }, { "epoch": 1.2286548931084849, "grad_norm": 1.205257873334912, "learning_rate": 7.0941333998041884e-06, "loss": 0.24066339433193207, "step": 4627 }, { "epoch": 1.2289204620900278, "grad_norm": 1.1798307734371165, "learning_rate": 7.0899323819649816e-06, "loss": 0.24305742979049683, "step": 4628 }, { "epoch": 1.2291860310715708, "grad_norm": 1.163221794708842, "learning_rate": 7.085731925152484e-06, "loss": 0.22478783130645752, "step": 4629 }, { "epoch": 1.2294516000531137, "grad_norm": 1.1812808698189172, "learning_rate": 7.081532030176506e-06, "loss": 0.24995659291744232, "step": 4630 }, { "epoch": 1.2297171690346567, "grad_norm": 1.1575900439946216, "learning_rate": 7.077332697846733e-06, "loss": 0.2579454183578491, "step": 4631 }, { "epoch": 1.2299827380161996, "grad_norm": 1.2378373931288529, "learning_rate": 7.073133928972745e-06, "loss": 0.2513299286365509, "step": 4632 }, { "epoch": 1.2302483069977426, "grad_norm": 1.0751310135047412, "learning_rate": 7.068935724364016e-06, "loss": 0.23344315588474274, "step": 4633 }, { "epoch": 1.2305138759792857, "grad_norm": 1.1882346043976466, "learning_rate": 7.064738084829912e-06, "loss": 0.26750341057777405, "step": 4634 }, { "epoch": 1.2307794449608287, "grad_norm": 1.1622882344241228, "learning_rate": 7.0605410111796855e-06, "loss": 0.22424373030662537, "step": 4635 }, { "epoch": 1.2310450139423716, "grad_norm": 1.0711348851881108, "learning_rate": 7.056344504222485e-06, "loss": 0.24261844158172607, "step": 4636 }, { "epoch": 1.2313105829239146, "grad_norm": 1.1382788327638453, "learning_rate": 7.052148564767347e-06, "loss": 0.22273704409599304, "step": 4637 }, { "epoch": 1.2315761519054576, "grad_norm": 1.217398110209698, "learning_rate": 7.047953193623195e-06, "loss": 0.23726603388786316, "step": 4638 }, { "epoch": 1.2318417208870005, "grad_norm": 1.1961933626954258, "learning_rate": 7.043758391598856e-06, "loss": 0.2612340748310089, "step": 4639 }, { "epoch": 1.2321072898685435, "grad_norm": 1.3828917417203295, "learning_rate": 7.039564159503034e-06, "loss": 0.25722867250442505, "step": 4640 }, { "epoch": 1.2323728588500864, "grad_norm": 1.2106898963951274, "learning_rate": 7.035370498144325e-06, "loss": 0.25940731167793274, "step": 4641 }, { "epoch": 1.2326384278316294, "grad_norm": 1.1431229158704634, "learning_rate": 7.03117740833122e-06, "loss": 0.2328685224056244, "step": 4642 }, { "epoch": 1.2329039968131723, "grad_norm": 1.360549509974518, "learning_rate": 7.0269848908720965e-06, "loss": 0.3019352853298187, "step": 4643 }, { "epoch": 1.2331695657947153, "grad_norm": 1.370123584713732, "learning_rate": 7.022792946575222e-06, "loss": 0.2665002942085266, "step": 4644 }, { "epoch": 1.2334351347762582, "grad_norm": 1.2172549009924116, "learning_rate": 7.018601576248755e-06, "loss": 0.2425101399421692, "step": 4645 }, { "epoch": 1.2337007037578012, "grad_norm": 1.2088470091841177, "learning_rate": 7.014410780700743e-06, "loss": 0.23319771885871887, "step": 4646 }, { "epoch": 1.2339662727393441, "grad_norm": 1.1714631765087196, "learning_rate": 7.010220560739116e-06, "loss": 0.23033195734024048, "step": 4647 }, { "epoch": 1.234231841720887, "grad_norm": 1.211199620492339, "learning_rate": 7.006030917171707e-06, "loss": 0.24682006239891052, "step": 4648 }, { "epoch": 1.23449741070243, "grad_norm": 1.2881207045369418, "learning_rate": 7.001841850806228e-06, "loss": 0.25566285848617554, "step": 4649 }, { "epoch": 1.234762979683973, "grad_norm": 1.32329780476303, "learning_rate": 6.9976533624502784e-06, "loss": 0.2791779339313507, "step": 4650 }, { "epoch": 1.235028548665516, "grad_norm": 1.3093366388831746, "learning_rate": 6.993465452911352e-06, "loss": 0.25597846508026123, "step": 4651 }, { "epoch": 1.2352941176470589, "grad_norm": 1.197170425293823, "learning_rate": 6.9892781229968275e-06, "loss": 0.24034728109836578, "step": 4652 }, { "epoch": 1.2355596866286018, "grad_norm": 1.2583607623295634, "learning_rate": 6.985091373513972e-06, "loss": 0.2209509015083313, "step": 4653 }, { "epoch": 1.2358252556101448, "grad_norm": 1.298261075070858, "learning_rate": 6.980905205269942e-06, "loss": 0.29106947779655457, "step": 4654 }, { "epoch": 1.2360908245916877, "grad_norm": 1.226505577270481, "learning_rate": 6.976719619071782e-06, "loss": 0.24014753103256226, "step": 4655 }, { "epoch": 1.2363563935732307, "grad_norm": 1.2297022971330018, "learning_rate": 6.972534615726422e-06, "loss": 0.27135470509529114, "step": 4656 }, { "epoch": 1.2366219625547736, "grad_norm": 1.2219120714336154, "learning_rate": 6.968350196040683e-06, "loss": 0.23386257886886597, "step": 4657 }, { "epoch": 1.2368875315363166, "grad_norm": 1.1452987159774544, "learning_rate": 6.964166360821271e-06, "loss": 0.23119661211967468, "step": 4658 }, { "epoch": 1.2371531005178595, "grad_norm": 1.1767967288021879, "learning_rate": 6.959983110874782e-06, "loss": 0.2399922013282776, "step": 4659 }, { "epoch": 1.2374186694994025, "grad_norm": 1.0521231856668218, "learning_rate": 6.9558004470076944e-06, "loss": 0.18323534727096558, "step": 4660 }, { "epoch": 1.2376842384809454, "grad_norm": 1.1985431375912965, "learning_rate": 6.951618370026378e-06, "loss": 0.25683268904685974, "step": 4661 }, { "epoch": 1.2379498074624884, "grad_norm": 1.307367140627743, "learning_rate": 6.947436880737089e-06, "loss": 0.2861499786376953, "step": 4662 }, { "epoch": 1.2382153764440313, "grad_norm": 1.3831407282476516, "learning_rate": 6.943255979945965e-06, "loss": 0.28021398186683655, "step": 4663 }, { "epoch": 1.2384809454255743, "grad_norm": 1.2940713851528283, "learning_rate": 6.939075668459039e-06, "loss": 0.2739776074886322, "step": 4664 }, { "epoch": 1.2387465144071172, "grad_norm": 1.3433235944815516, "learning_rate": 6.934895947082221e-06, "loss": 0.26015231013298035, "step": 4665 }, { "epoch": 1.2390120833886602, "grad_norm": 1.3230400884249285, "learning_rate": 6.930716816621317e-06, "loss": 0.2572113871574402, "step": 4666 }, { "epoch": 1.2392776523702032, "grad_norm": 1.266134559335497, "learning_rate": 6.926538277882012e-06, "loss": 0.24094708263874054, "step": 4667 }, { "epoch": 1.239543221351746, "grad_norm": 1.1175335748548278, "learning_rate": 6.92236033166988e-06, "loss": 0.22803835570812225, "step": 4668 }, { "epoch": 1.239808790333289, "grad_norm": 1.1198379137737728, "learning_rate": 6.9181829787903774e-06, "loss": 0.23672322928905487, "step": 4669 }, { "epoch": 1.240074359314832, "grad_norm": 1.3356297624894082, "learning_rate": 6.91400622004885e-06, "loss": 0.2568579912185669, "step": 4670 }, { "epoch": 1.240339928296375, "grad_norm": 1.1768710116388783, "learning_rate": 6.909830056250527e-06, "loss": 0.25267845392227173, "step": 4671 }, { "epoch": 1.240605497277918, "grad_norm": 1.2702969549109802, "learning_rate": 6.905654488200524e-06, "loss": 0.30336999893188477, "step": 4672 }, { "epoch": 1.2408710662594609, "grad_norm": 1.17710991443045, "learning_rate": 6.901479516703842e-06, "loss": 0.2741299867630005, "step": 4673 }, { "epoch": 1.2411366352410038, "grad_norm": 1.276658372251755, "learning_rate": 6.897305142565363e-06, "loss": 0.2896823585033417, "step": 4674 }, { "epoch": 1.2414022042225468, "grad_norm": 1.2718591233587666, "learning_rate": 6.8931313665898625e-06, "loss": 0.23102329671382904, "step": 4675 }, { "epoch": 1.2416677732040897, "grad_norm": 1.3209479857777737, "learning_rate": 6.8889581895819915e-06, "loss": 0.2600775361061096, "step": 4676 }, { "epoch": 1.2419333421856327, "grad_norm": 1.1932453661715805, "learning_rate": 6.884785612346291e-06, "loss": 0.23589132726192474, "step": 4677 }, { "epoch": 1.2421989111671756, "grad_norm": 1.155454248544126, "learning_rate": 6.880613635687184e-06, "loss": 0.24419361352920532, "step": 4678 }, { "epoch": 1.2424644801487186, "grad_norm": 1.1323309321599895, "learning_rate": 6.876442260408977e-06, "loss": 0.23267227411270142, "step": 4679 }, { "epoch": 1.2427300491302615, "grad_norm": 1.2244929254620942, "learning_rate": 6.8722714873158635e-06, "loss": 0.2507064938545227, "step": 4680 }, { "epoch": 1.2429956181118045, "grad_norm": 1.2079227486812785, "learning_rate": 6.868101317211922e-06, "loss": 0.2529929280281067, "step": 4681 }, { "epoch": 1.2432611870933474, "grad_norm": 1.1627205371245832, "learning_rate": 6.863931750901107e-06, "loss": 0.23255379498004913, "step": 4682 }, { "epoch": 1.2435267560748904, "grad_norm": 1.1997195000446994, "learning_rate": 6.859762789187259e-06, "loss": 0.22757332026958466, "step": 4683 }, { "epoch": 1.2437923250564333, "grad_norm": 1.2115398233652928, "learning_rate": 6.8555944328741145e-06, "loss": 0.2578364312648773, "step": 4684 }, { "epoch": 1.2440578940379763, "grad_norm": 1.1854445431935166, "learning_rate": 6.851426682765278e-06, "loss": 0.27568408846855164, "step": 4685 }, { "epoch": 1.2443234630195192, "grad_norm": 1.19754548578965, "learning_rate": 6.847259539664244e-06, "loss": 0.25595831871032715, "step": 4686 }, { "epoch": 1.2445890320010622, "grad_norm": 1.1807617266458326, "learning_rate": 6.843093004374386e-06, "loss": 0.2195426970720291, "step": 4687 }, { "epoch": 1.2448546009826051, "grad_norm": 1.1623631531241645, "learning_rate": 6.838927077698967e-06, "loss": 0.23247741162776947, "step": 4688 }, { "epoch": 1.245120169964148, "grad_norm": 1.2953467781322094, "learning_rate": 6.834761760441127e-06, "loss": 0.26149916648864746, "step": 4689 }, { "epoch": 1.245385738945691, "grad_norm": 1.1310243964126157, "learning_rate": 6.830597053403885e-06, "loss": 0.2521447241306305, "step": 4690 }, { "epoch": 1.245651307927234, "grad_norm": 1.1803812700297758, "learning_rate": 6.826432957390155e-06, "loss": 0.23401981592178345, "step": 4691 }, { "epoch": 1.245916876908777, "grad_norm": 1.3114713754211442, "learning_rate": 6.822269473202714e-06, "loss": 0.25341230630874634, "step": 4692 }, { "epoch": 1.24618244589032, "grad_norm": 1.2025537581570156, "learning_rate": 6.818106601644248e-06, "loss": 0.2513907551765442, "step": 4693 }, { "epoch": 1.2464480148718629, "grad_norm": 1.2263403478965602, "learning_rate": 6.8139443435173005e-06, "loss": 0.2682073414325714, "step": 4694 }, { "epoch": 1.2467135838534058, "grad_norm": 1.1801313342439474, "learning_rate": 6.809782699624308e-06, "loss": 0.22726872563362122, "step": 4695 }, { "epoch": 1.2469791528349488, "grad_norm": 1.3004812874511507, "learning_rate": 6.805621670767588e-06, "loss": 0.24184030294418335, "step": 4696 }, { "epoch": 1.247244721816492, "grad_norm": 1.0395051535883466, "learning_rate": 6.801461257749334e-06, "loss": 0.203639417886734, "step": 4697 }, { "epoch": 1.2475102907980349, "grad_norm": 1.1786557175840897, "learning_rate": 6.797301461371626e-06, "loss": 0.2170606106519699, "step": 4698 }, { "epoch": 1.2477758597795778, "grad_norm": 1.1231113548110434, "learning_rate": 6.7931422824364245e-06, "loss": 0.2225056290626526, "step": 4699 }, { "epoch": 1.2480414287611208, "grad_norm": 1.1702414518259399, "learning_rate": 6.788983721745569e-06, "loss": 0.2388974130153656, "step": 4700 }, { "epoch": 1.2483069977426637, "grad_norm": 1.14649445863332, "learning_rate": 6.784825780100776e-06, "loss": 0.2291644811630249, "step": 4701 }, { "epoch": 1.2485725667242067, "grad_norm": 1.3474164807852358, "learning_rate": 6.7806684583036595e-06, "loss": 0.23793739080429077, "step": 4702 }, { "epoch": 1.2488381357057496, "grad_norm": 1.2839354787463726, "learning_rate": 6.776511757155695e-06, "loss": 0.2756902277469635, "step": 4703 }, { "epoch": 1.2491037046872926, "grad_norm": 1.3039866822855, "learning_rate": 6.772355677458249e-06, "loss": 0.25046268105506897, "step": 4704 }, { "epoch": 1.2493692736688355, "grad_norm": 1.3053078100109528, "learning_rate": 6.7682002200125575e-06, "loss": 0.238486647605896, "step": 4705 }, { "epoch": 1.2496348426503785, "grad_norm": 1.1855651210182463, "learning_rate": 6.764045385619751e-06, "loss": 0.2366628348827362, "step": 4706 }, { "epoch": 1.2499004116319214, "grad_norm": 1.21176387977239, "learning_rate": 6.759891175080827e-06, "loss": 0.24825221300125122, "step": 4707 }, { "epoch": 1.2501659806134644, "grad_norm": 1.2922207381934139, "learning_rate": 6.755737589196673e-06, "loss": 0.2304186224937439, "step": 4708 }, { "epoch": 1.2504315495950074, "grad_norm": 1.200468035859197, "learning_rate": 6.7515846287680476e-06, "loss": 0.2824471592903137, "step": 4709 }, { "epoch": 1.2506971185765503, "grad_norm": 1.1994302764371214, "learning_rate": 6.747432294595591e-06, "loss": 0.23130697011947632, "step": 4710 }, { "epoch": 1.2509626875580933, "grad_norm": 1.3183641444794993, "learning_rate": 6.7432805874798334e-06, "loss": 0.28371602296829224, "step": 4711 }, { "epoch": 1.2512282565396362, "grad_norm": 1.1529924861272876, "learning_rate": 6.739129508221167e-06, "loss": 0.23452092707157135, "step": 4712 }, { "epoch": 1.2514938255211792, "grad_norm": 1.245806995398341, "learning_rate": 6.734979057619873e-06, "loss": 0.22486859560012817, "step": 4713 }, { "epoch": 1.2517593945027221, "grad_norm": 1.3481589110906722, "learning_rate": 6.730829236476111e-06, "loss": 0.2818532884120941, "step": 4714 }, { "epoch": 1.252024963484265, "grad_norm": 1.172531442878329, "learning_rate": 6.7266800455899125e-06, "loss": 0.2060810923576355, "step": 4715 }, { "epoch": 1.252290532465808, "grad_norm": 1.2183128764116598, "learning_rate": 6.722531485761199e-06, "loss": 0.2183244377374649, "step": 4716 }, { "epoch": 1.252556101447351, "grad_norm": 1.2596677279915016, "learning_rate": 6.71838355778976e-06, "loss": 0.24757327139377594, "step": 4717 }, { "epoch": 1.252821670428894, "grad_norm": 1.3267776765958388, "learning_rate": 6.714236262475268e-06, "loss": 0.3058333396911621, "step": 4718 }, { "epoch": 1.2530872394104369, "grad_norm": 1.1893155452841293, "learning_rate": 6.71008960061727e-06, "loss": 0.24095620214939117, "step": 4719 }, { "epoch": 1.2533528083919798, "grad_norm": 1.3050165159615794, "learning_rate": 6.705943573015199e-06, "loss": 0.25614839792251587, "step": 4720 }, { "epoch": 1.2536183773735228, "grad_norm": 1.2537185610498753, "learning_rate": 6.701798180468356e-06, "loss": 0.22295254468917847, "step": 4721 }, { "epoch": 1.2538839463550657, "grad_norm": 1.1724661677534984, "learning_rate": 6.697653423775926e-06, "loss": 0.24783796072006226, "step": 4722 }, { "epoch": 1.2541495153366087, "grad_norm": 1.5676339911360846, "learning_rate": 6.693509303736969e-06, "loss": 0.19702200591564178, "step": 4723 }, { "epoch": 1.2544150843181516, "grad_norm": 1.2713976115459882, "learning_rate": 6.689365821150421e-06, "loss": 0.2539074122905731, "step": 4724 }, { "epoch": 1.2546806532996946, "grad_norm": 1.2015875463338734, "learning_rate": 6.6852229768150976e-06, "loss": 0.2480372041463852, "step": 4725 }, { "epoch": 1.2549462222812375, "grad_norm": 1.1742876462412417, "learning_rate": 6.68108077152969e-06, "loss": 0.2231048047542572, "step": 4726 }, { "epoch": 1.2552117912627805, "grad_norm": 1.1571308721577904, "learning_rate": 6.676939206092766e-06, "loss": 0.260783851146698, "step": 4727 }, { "epoch": 1.2554773602443234, "grad_norm": 1.2569537102203152, "learning_rate": 6.67279828130277e-06, "loss": 0.24069254100322723, "step": 4728 }, { "epoch": 1.2557429292258664, "grad_norm": 1.1732343490674524, "learning_rate": 6.668657997958027e-06, "loss": 0.2578867971897125, "step": 4729 }, { "epoch": 1.2560084982074093, "grad_norm": 1.102080552368197, "learning_rate": 6.664518356856732e-06, "loss": 0.20724457502365112, "step": 4730 }, { "epoch": 1.2562740671889523, "grad_norm": 1.1527224778451435, "learning_rate": 6.6603793587969586e-06, "loss": 0.23107580840587616, "step": 4731 }, { "epoch": 1.2565396361704952, "grad_norm": 1.123633807819834, "learning_rate": 6.656241004576659e-06, "loss": 0.2481832504272461, "step": 4732 }, { "epoch": 1.2568052051520382, "grad_norm": 1.1353422900728998, "learning_rate": 6.652103294993657e-06, "loss": 0.2219698578119278, "step": 4733 }, { "epoch": 1.2570707741335811, "grad_norm": 1.1538807443087884, "learning_rate": 6.647966230845655e-06, "loss": 0.2245863974094391, "step": 4734 }, { "epoch": 1.257336343115124, "grad_norm": 1.1991392114731283, "learning_rate": 6.643829812930231e-06, "loss": 0.2086387574672699, "step": 4735 }, { "epoch": 1.257601912096667, "grad_norm": 1.1702949625685939, "learning_rate": 6.6396940420448355e-06, "loss": 0.23484499752521515, "step": 4736 }, { "epoch": 1.25786748107821, "grad_norm": 1.1449620939429583, "learning_rate": 6.635558918986797e-06, "loss": 0.22011062502861023, "step": 4737 }, { "epoch": 1.258133050059753, "grad_norm": 1.240312422577115, "learning_rate": 6.631424444553319e-06, "loss": 0.2426830381155014, "step": 4738 }, { "epoch": 1.258398619041296, "grad_norm": 1.2472398676845469, "learning_rate": 6.627290619541481e-06, "loss": 0.2702174484729767, "step": 4739 }, { "epoch": 1.2586641880228389, "grad_norm": 1.4005529994015682, "learning_rate": 6.623157444748234e-06, "loss": 0.26594820618629456, "step": 4740 }, { "epoch": 1.2589297570043818, "grad_norm": 1.2550785934224764, "learning_rate": 6.619024920970405e-06, "loss": 0.2546013593673706, "step": 4741 }, { "epoch": 1.2591953259859248, "grad_norm": 1.425429985784882, "learning_rate": 6.614893049004696e-06, "loss": 0.27207985520362854, "step": 4742 }, { "epoch": 1.259460894967468, "grad_norm": 1.4445692953489113, "learning_rate": 6.610761829647685e-06, "loss": 0.2640937566757202, "step": 4743 }, { "epoch": 1.2597264639490109, "grad_norm": 1.4095791296432063, "learning_rate": 6.60663126369582e-06, "loss": 0.2890278697013855, "step": 4744 }, { "epoch": 1.2599920329305538, "grad_norm": 1.1225606468440805, "learning_rate": 6.602501351945425e-06, "loss": 0.24610492587089539, "step": 4745 }, { "epoch": 1.2602576019120968, "grad_norm": 1.5273064552741338, "learning_rate": 6.598372095192699e-06, "loss": 0.24946746230125427, "step": 4746 }, { "epoch": 1.2605231708936397, "grad_norm": 1.0546449518544165, "learning_rate": 6.594243494233717e-06, "loss": 0.2369944453239441, "step": 4747 }, { "epoch": 1.2607887398751827, "grad_norm": 1.180556169492091, "learning_rate": 6.590115549864421e-06, "loss": 0.20980143547058105, "step": 4748 }, { "epoch": 1.2610543088567256, "grad_norm": 1.1524244978042124, "learning_rate": 6.5859882628806315e-06, "loss": 0.22930344939231873, "step": 4749 }, { "epoch": 1.2613198778382686, "grad_norm": 1.1353386909454481, "learning_rate": 6.5818616340780405e-06, "loss": 0.22352416813373566, "step": 4750 }, { "epoch": 1.2615854468198116, "grad_norm": 1.0615225488277533, "learning_rate": 6.577735664252214e-06, "loss": 0.2049327939748764, "step": 4751 }, { "epoch": 1.2618510158013545, "grad_norm": 1.3420243952278277, "learning_rate": 6.573610354198587e-06, "loss": 0.21858355402946472, "step": 4752 }, { "epoch": 1.2621165847828975, "grad_norm": 1.1248247337478985, "learning_rate": 6.5694857047124786e-06, "loss": 0.225118950009346, "step": 4753 }, { "epoch": 1.2623821537644404, "grad_norm": 1.1623337764465298, "learning_rate": 6.565361716589063e-06, "loss": 0.25780409574508667, "step": 4754 }, { "epoch": 1.2626477227459834, "grad_norm": 1.1580907073042885, "learning_rate": 6.5612383906233964e-06, "loss": 0.23507939279079437, "step": 4755 }, { "epoch": 1.2629132917275263, "grad_norm": 1.1733914893757196, "learning_rate": 6.557115727610417e-06, "loss": 0.27884477376937866, "step": 4756 }, { "epoch": 1.2631788607090693, "grad_norm": 1.145599873702901, "learning_rate": 6.552993728344921e-06, "loss": 0.2564120888710022, "step": 4757 }, { "epoch": 1.2634444296906122, "grad_norm": 1.3139857622357067, "learning_rate": 6.548872393621578e-06, "loss": 0.259651243686676, "step": 4758 }, { "epoch": 1.2637099986721552, "grad_norm": 1.2930462493551071, "learning_rate": 6.544751724234937e-06, "loss": 0.23473814129829407, "step": 4759 }, { "epoch": 1.2639755676536981, "grad_norm": 1.4411652435541018, "learning_rate": 6.540631720979411e-06, "loss": 0.2447129189968109, "step": 4760 }, { "epoch": 1.264241136635241, "grad_norm": 1.1968236723875711, "learning_rate": 6.536512384649294e-06, "loss": 0.22695237398147583, "step": 4761 }, { "epoch": 1.264506705616784, "grad_norm": 1.117214929215876, "learning_rate": 6.532393716038738e-06, "loss": 0.24303656816482544, "step": 4762 }, { "epoch": 1.264772274598327, "grad_norm": 1.2106972269991043, "learning_rate": 6.528275715941776e-06, "loss": 0.23911908268928528, "step": 4763 }, { "epoch": 1.26503784357987, "grad_norm": 1.0480584899589354, "learning_rate": 6.524158385152309e-06, "loss": 0.19766747951507568, "step": 4764 }, { "epoch": 1.2653034125614129, "grad_norm": 1.390914844473808, "learning_rate": 6.520041724464114e-06, "loss": 0.24074134230613708, "step": 4765 }, { "epoch": 1.2655689815429558, "grad_norm": 1.3379815630375766, "learning_rate": 6.515925734670834e-06, "loss": 0.27557867765426636, "step": 4766 }, { "epoch": 1.2658345505244988, "grad_norm": 1.3286252957995823, "learning_rate": 6.511810416565979e-06, "loss": 0.24387787282466888, "step": 4767 }, { "epoch": 1.2661001195060417, "grad_norm": 1.4234035593814256, "learning_rate": 6.507695770942939e-06, "loss": 0.27863091230392456, "step": 4768 }, { "epoch": 1.2663656884875847, "grad_norm": 1.1364646133588507, "learning_rate": 6.503581798594965e-06, "loss": 0.23589591681957245, "step": 4769 }, { "epoch": 1.2666312574691276, "grad_norm": 1.1932509985997282, "learning_rate": 6.499468500315185e-06, "loss": 0.22869807481765747, "step": 4770 }, { "epoch": 1.2668968264506706, "grad_norm": 1.2498634762148577, "learning_rate": 6.495355876896592e-06, "loss": 0.2351568192243576, "step": 4771 }, { "epoch": 1.2671623954322135, "grad_norm": 1.1271253337210285, "learning_rate": 6.491243929132052e-06, "loss": 0.2291228175163269, "step": 4772 }, { "epoch": 1.2674279644137565, "grad_norm": 1.2013953219342957, "learning_rate": 6.487132657814297e-06, "loss": 0.23203743994235992, "step": 4773 }, { "epoch": 1.2676935333952994, "grad_norm": 1.0887907712326863, "learning_rate": 6.483022063735938e-06, "loss": 0.22035656869411469, "step": 4774 }, { "epoch": 1.2679591023768424, "grad_norm": 1.1270651148723736, "learning_rate": 6.478912147689448e-06, "loss": 0.21576716005802155, "step": 4775 }, { "epoch": 1.2682246713583853, "grad_norm": 1.3174966546949713, "learning_rate": 6.474802910467171e-06, "loss": 0.27764660120010376, "step": 4776 }, { "epoch": 1.2684902403399283, "grad_norm": 1.2418434137314485, "learning_rate": 6.4706943528613135e-06, "loss": 0.23715822398662567, "step": 4777 }, { "epoch": 1.2687558093214713, "grad_norm": 1.1794293567561218, "learning_rate": 6.4665864756639606e-06, "loss": 0.27764302492141724, "step": 4778 }, { "epoch": 1.2690213783030142, "grad_norm": 1.2157630211554828, "learning_rate": 6.4624792796670624e-06, "loss": 0.21634885668754578, "step": 4779 }, { "epoch": 1.2692869472845572, "grad_norm": 1.2217447541656432, "learning_rate": 6.458372765662438e-06, "loss": 0.27262234687805176, "step": 4780 }, { "epoch": 1.2695525162661, "grad_norm": 1.1716437260315133, "learning_rate": 6.454266934441775e-06, "loss": 0.2219458371400833, "step": 4781 }, { "epoch": 1.269818085247643, "grad_norm": 1.2515340549821425, "learning_rate": 6.450161786796625e-06, "loss": 0.22181497514247894, "step": 4782 }, { "epoch": 1.270083654229186, "grad_norm": 1.1858127036353512, "learning_rate": 6.446057323518422e-06, "loss": 0.22642338275909424, "step": 4783 }, { "epoch": 1.270349223210729, "grad_norm": 1.2243357553110101, "learning_rate": 6.441953545398451e-06, "loss": 0.239711195230484, "step": 4784 }, { "epoch": 1.270614792192272, "grad_norm": 1.29507599792429, "learning_rate": 6.437850453227872e-06, "loss": 0.2422255128622055, "step": 4785 }, { "epoch": 1.2708803611738149, "grad_norm": 1.3013507424737665, "learning_rate": 6.433748047797715e-06, "loss": 0.23184439539909363, "step": 4786 }, { "epoch": 1.2711459301553578, "grad_norm": 1.3032581886502261, "learning_rate": 6.429646329898873e-06, "loss": 0.2737428843975067, "step": 4787 }, { "epoch": 1.2714114991369008, "grad_norm": 1.2565288812855064, "learning_rate": 6.4255453003221115e-06, "loss": 0.23565897345542908, "step": 4788 }, { "epoch": 1.2716770681184437, "grad_norm": 1.3665497750328797, "learning_rate": 6.421444959858059e-06, "loss": 0.24349254369735718, "step": 4789 }, { "epoch": 1.2719426370999867, "grad_norm": 1.2050219186384792, "learning_rate": 6.4173453092972115e-06, "loss": 0.2637769281864166, "step": 4790 }, { "epoch": 1.2722082060815296, "grad_norm": 1.0381858832581394, "learning_rate": 6.413246349429934e-06, "loss": 0.21420228481292725, "step": 4791 }, { "epoch": 1.2724737750630726, "grad_norm": 1.1333618917642097, "learning_rate": 6.409148081046461e-06, "loss": 0.25270405411720276, "step": 4792 }, { "epoch": 1.2727393440446155, "grad_norm": 1.270676964933882, "learning_rate": 6.405050504936887e-06, "loss": 0.2710546851158142, "step": 4793 }, { "epoch": 1.2730049130261585, "grad_norm": 1.1608891040490155, "learning_rate": 6.400953621891178e-06, "loss": 0.2388489842414856, "step": 4794 }, { "epoch": 1.2732704820077014, "grad_norm": 1.1600463634666516, "learning_rate": 6.396857432699164e-06, "loss": 0.24581485986709595, "step": 4795 }, { "epoch": 1.2735360509892444, "grad_norm": 1.18464881130754, "learning_rate": 6.3927619381505404e-06, "loss": 0.24219104647636414, "step": 4796 }, { "epoch": 1.2738016199707873, "grad_norm": 1.0878857914267965, "learning_rate": 6.388667139034873e-06, "loss": 0.22722014784812927, "step": 4797 }, { "epoch": 1.2740671889523303, "grad_norm": 1.275017638940232, "learning_rate": 6.384573036141589e-06, "loss": 0.25177234411239624, "step": 4798 }, { "epoch": 1.2743327579338732, "grad_norm": 1.2824350948041237, "learning_rate": 6.380479630259983e-06, "loss": 0.2291412651538849, "step": 4799 }, { "epoch": 1.2745983269154162, "grad_norm": 1.3215047708165757, "learning_rate": 6.376386922179216e-06, "loss": 0.2528606951236725, "step": 4800 }, { "epoch": 1.2748638958969591, "grad_norm": 1.11001311385955, "learning_rate": 6.372294912688315e-06, "loss": 0.21383032202720642, "step": 4801 }, { "epoch": 1.275129464878502, "grad_norm": 1.2162134010863295, "learning_rate": 6.368203602576168e-06, "loss": 0.2538087069988251, "step": 4802 }, { "epoch": 1.275395033860045, "grad_norm": 1.2127822206191197, "learning_rate": 6.364112992631537e-06, "loss": 0.24437417089939117, "step": 4803 }, { "epoch": 1.275660602841588, "grad_norm": 1.1678428848154245, "learning_rate": 6.360023083643036e-06, "loss": 0.2347753942012787, "step": 4804 }, { "epoch": 1.275926171823131, "grad_norm": 1.226812886332051, "learning_rate": 6.3559338763991576e-06, "loss": 0.271645188331604, "step": 4805 }, { "epoch": 1.276191740804674, "grad_norm": 1.2088165730060163, "learning_rate": 6.35184537168825e-06, "loss": 0.2465275228023529, "step": 4806 }, { "epoch": 1.2764573097862169, "grad_norm": 1.216147524532817, "learning_rate": 6.347757570298527e-06, "loss": 0.26494044065475464, "step": 4807 }, { "epoch": 1.2767228787677598, "grad_norm": 3.360286997098956, "learning_rate": 6.343670473018071e-06, "loss": 0.28292080760002136, "step": 4808 }, { "epoch": 1.2769884477493028, "grad_norm": 1.2160142828428218, "learning_rate": 6.339584080634824e-06, "loss": 0.2525850534439087, "step": 4809 }, { "epoch": 1.2772540167308457, "grad_norm": 1.224576908350391, "learning_rate": 6.335498393936597e-06, "loss": 0.22056345641613007, "step": 4810 }, { "epoch": 1.2775195857123887, "grad_norm": 1.1603347806824698, "learning_rate": 6.331413413711061e-06, "loss": 0.23081058263778687, "step": 4811 }, { "epoch": 1.2777851546939316, "grad_norm": 1.2309265633693007, "learning_rate": 6.327329140745751e-06, "loss": 0.2722470760345459, "step": 4812 }, { "epoch": 1.2780507236754748, "grad_norm": 1.2598117885787161, "learning_rate": 6.32324557582807e-06, "loss": 0.24454641342163086, "step": 4813 }, { "epoch": 1.2783162926570177, "grad_norm": 1.2713820573097572, "learning_rate": 6.319162719745277e-06, "loss": 0.21884413063526154, "step": 4814 }, { "epoch": 1.2785818616385607, "grad_norm": 1.276590514388197, "learning_rate": 6.3150805732845e-06, "loss": 0.2737545669078827, "step": 4815 }, { "epoch": 1.2788474306201036, "grad_norm": 1.1747258996206047, "learning_rate": 6.31099913723273e-06, "loss": 0.2478230595588684, "step": 4816 }, { "epoch": 1.2791129996016466, "grad_norm": 1.2461752717378811, "learning_rate": 6.306918412376817e-06, "loss": 0.2508094310760498, "step": 4817 }, { "epoch": 1.2793785685831895, "grad_norm": 1.267840547546021, "learning_rate": 6.302838399503477e-06, "loss": 0.24666383862495422, "step": 4818 }, { "epoch": 1.2796441375647325, "grad_norm": 1.176059099377582, "learning_rate": 6.298759099399292e-06, "loss": 0.27833491563796997, "step": 4819 }, { "epoch": 1.2799097065462754, "grad_norm": 1.1948595147219725, "learning_rate": 6.294680512850699e-06, "loss": 0.23092475533485413, "step": 4820 }, { "epoch": 1.2801752755278184, "grad_norm": 1.1935160504644853, "learning_rate": 6.290602640644005e-06, "loss": 0.2714667022228241, "step": 4821 }, { "epoch": 1.2804408445093614, "grad_norm": 1.1769422055863235, "learning_rate": 6.286525483565373e-06, "loss": 0.23292411863803864, "step": 4822 }, { "epoch": 1.2807064134909043, "grad_norm": 1.1322856806053188, "learning_rate": 6.282449042400831e-06, "loss": 0.23809143900871277, "step": 4823 }, { "epoch": 1.2809719824724473, "grad_norm": 1.0235534573008647, "learning_rate": 6.278373317936269e-06, "loss": 0.22593267261981964, "step": 4824 }, { "epoch": 1.2812375514539902, "grad_norm": 1.2491300300411192, "learning_rate": 6.274298310957439e-06, "loss": 0.26024624705314636, "step": 4825 }, { "epoch": 1.2815031204355332, "grad_norm": 1.138185007529017, "learning_rate": 6.270224022249957e-06, "loss": 0.22418126463890076, "step": 4826 }, { "epoch": 1.2817686894170761, "grad_norm": 1.2374650134400174, "learning_rate": 6.266150452599288e-06, "loss": 0.26452577114105225, "step": 4827 }, { "epoch": 1.282034258398619, "grad_norm": 1.2453587043668277, "learning_rate": 6.262077602790779e-06, "loss": 0.24412381649017334, "step": 4828 }, { "epoch": 1.282299827380162, "grad_norm": 1.1670875672055734, "learning_rate": 6.258005473609623e-06, "loss": 0.22476118803024292, "step": 4829 }, { "epoch": 1.282565396361705, "grad_norm": 1.1744502576491334, "learning_rate": 6.25393406584088e-06, "loss": 0.2208547294139862, "step": 4830 }, { "epoch": 1.282830965343248, "grad_norm": 1.340282271944368, "learning_rate": 6.249863380269467e-06, "loss": 0.2903650999069214, "step": 4831 }, { "epoch": 1.2830965343247909, "grad_norm": 1.2018727401561922, "learning_rate": 6.245793417680168e-06, "loss": 0.24413639307022095, "step": 4832 }, { "epoch": 1.2833621033063338, "grad_norm": 1.162422850806728, "learning_rate": 6.241724178857621e-06, "loss": 0.2193944752216339, "step": 4833 }, { "epoch": 1.2836276722878768, "grad_norm": 1.2159517583191957, "learning_rate": 6.237655664586326e-06, "loss": 0.22847513854503632, "step": 4834 }, { "epoch": 1.2838932412694197, "grad_norm": 1.4211501406512423, "learning_rate": 6.233587875650648e-06, "loss": 0.269639253616333, "step": 4835 }, { "epoch": 1.2841588102509627, "grad_norm": 1.3153478129856002, "learning_rate": 6.229520812834801e-06, "loss": 0.26329392194747925, "step": 4836 }, { "epoch": 1.2844243792325056, "grad_norm": 1.0811891602166492, "learning_rate": 6.225454476922877e-06, "loss": 0.18800514936447144, "step": 4837 }, { "epoch": 1.2846899482140486, "grad_norm": 1.2987987933289529, "learning_rate": 6.2213888686988125e-06, "loss": 0.2617965340614319, "step": 4838 }, { "epoch": 1.2849555171955915, "grad_norm": 1.2029687476094635, "learning_rate": 6.217323988946411e-06, "loss": 0.22468717396259308, "step": 4839 }, { "epoch": 1.2852210861771345, "grad_norm": 1.2126923104659393, "learning_rate": 6.213259838449333e-06, "loss": 0.22465646266937256, "step": 4840 }, { "epoch": 1.2854866551586774, "grad_norm": 1.243457795287806, "learning_rate": 6.209196417991096e-06, "loss": 0.2655075490474701, "step": 4841 }, { "epoch": 1.2857522241402204, "grad_norm": 1.2818071805394324, "learning_rate": 6.205133728355081e-06, "loss": 0.25313282012939453, "step": 4842 }, { "epoch": 1.2860177931217633, "grad_norm": 1.2136879668034726, "learning_rate": 6.201071770324527e-06, "loss": 0.23176322877407074, "step": 4843 }, { "epoch": 1.2862833621033063, "grad_norm": 1.3628911983979357, "learning_rate": 6.197010544682531e-06, "loss": 0.27396953105926514, "step": 4844 }, { "epoch": 1.2865489310848492, "grad_norm": 1.2333432651370633, "learning_rate": 6.192950052212046e-06, "loss": 0.24966171383857727, "step": 4845 }, { "epoch": 1.2868145000663922, "grad_norm": 1.184789059228899, "learning_rate": 6.188890293695895e-06, "loss": 0.23290866613388062, "step": 4846 }, { "epoch": 1.2870800690479351, "grad_norm": 1.2080105834836115, "learning_rate": 6.184831269916749e-06, "loss": 0.2368975132703781, "step": 4847 }, { "epoch": 1.287345638029478, "grad_norm": 1.35199057217418, "learning_rate": 6.180772981657139e-06, "loss": 0.25305312871932983, "step": 4848 }, { "epoch": 1.287611207011021, "grad_norm": 1.1825950927599171, "learning_rate": 6.176715429699452e-06, "loss": 0.22752982378005981, "step": 4849 }, { "epoch": 1.287876775992564, "grad_norm": 1.152582857494987, "learning_rate": 6.1726586148259395e-06, "loss": 0.22426503896713257, "step": 4850 }, { "epoch": 1.288142344974107, "grad_norm": 1.2203273234703247, "learning_rate": 6.168602537818706e-06, "loss": 0.21261993050575256, "step": 4851 }, { "epoch": 1.28840791395565, "grad_norm": 1.1907151660933317, "learning_rate": 6.1645471994597185e-06, "loss": 0.237461656332016, "step": 4852 }, { "epoch": 1.2886734829371929, "grad_norm": 1.113120156932308, "learning_rate": 6.160492600530794e-06, "loss": 0.1926390826702118, "step": 4853 }, { "epoch": 1.2889390519187358, "grad_norm": 1.6824005161064397, "learning_rate": 6.156438741813608e-06, "loss": 0.22673740983009338, "step": 4854 }, { "epoch": 1.289204620900279, "grad_norm": 1.1453361708789405, "learning_rate": 6.15238562408971e-06, "loss": 0.22148582339286804, "step": 4855 }, { "epoch": 1.289470189881822, "grad_norm": 1.3581323367394031, "learning_rate": 6.148333248140483e-06, "loss": 0.28319716453552246, "step": 4856 }, { "epoch": 1.289735758863365, "grad_norm": 1.4367360633574449, "learning_rate": 6.14428161474718e-06, "loss": 0.23505647480487823, "step": 4857 }, { "epoch": 1.2900013278449078, "grad_norm": 1.2052965186154045, "learning_rate": 6.140230724690908e-06, "loss": 0.24323523044586182, "step": 4858 }, { "epoch": 1.2902668968264508, "grad_norm": 1.2357784405363281, "learning_rate": 6.136180578752629e-06, "loss": 0.22818386554718018, "step": 4859 }, { "epoch": 1.2905324658079937, "grad_norm": 1.2670464740614045, "learning_rate": 6.132131177713165e-06, "loss": 0.24285198748111725, "step": 4860 }, { "epoch": 1.2907980347895367, "grad_norm": 1.1369753370104339, "learning_rate": 6.128082522353194e-06, "loss": 0.24115213751792908, "step": 4861 }, { "epoch": 1.2910636037710796, "grad_norm": 1.2213111344560537, "learning_rate": 6.124034613453247e-06, "loss": 0.21564510464668274, "step": 4862 }, { "epoch": 1.2913291727526226, "grad_norm": 1.299973209896211, "learning_rate": 6.119987451793711e-06, "loss": 0.2329743504524231, "step": 4863 }, { "epoch": 1.2915947417341656, "grad_norm": 1.2218786239106318, "learning_rate": 6.115941038154835e-06, "loss": 0.2161208689212799, "step": 4864 }, { "epoch": 1.2918603107157085, "grad_norm": 1.2078035628631776, "learning_rate": 6.111895373316721e-06, "loss": 0.22765520215034485, "step": 4865 }, { "epoch": 1.2921258796972515, "grad_norm": 1.2199257873933993, "learning_rate": 6.107850458059322e-06, "loss": 0.25506818294525146, "step": 4866 }, { "epoch": 1.2923914486787944, "grad_norm": 1.2014544077782259, "learning_rate": 6.1038062931624505e-06, "loss": 0.22543852031230927, "step": 4867 }, { "epoch": 1.2926570176603374, "grad_norm": 1.282222410309602, "learning_rate": 6.099762879405776e-06, "loss": 0.24295030534267426, "step": 4868 }, { "epoch": 1.2929225866418803, "grad_norm": 1.2221545432256802, "learning_rate": 6.095720217568819e-06, "loss": 0.2385009229183197, "step": 4869 }, { "epoch": 1.2931881556234233, "grad_norm": 1.119514297375773, "learning_rate": 6.091678308430956e-06, "loss": 0.21410472691059113, "step": 4870 }, { "epoch": 1.2934537246049662, "grad_norm": 1.299309717988783, "learning_rate": 6.087637152771422e-06, "loss": 0.25934773683547974, "step": 4871 }, { "epoch": 1.2937192935865092, "grad_norm": 1.1783576597419445, "learning_rate": 6.0835967513693e-06, "loss": 0.24584373831748962, "step": 4872 }, { "epoch": 1.2939848625680521, "grad_norm": 1.3413866916188153, "learning_rate": 6.079557105003537e-06, "loss": 0.2403055876493454, "step": 4873 }, { "epoch": 1.294250431549595, "grad_norm": 1.2348806886655737, "learning_rate": 6.075518214452927e-06, "loss": 0.23861736059188843, "step": 4874 }, { "epoch": 1.294516000531138, "grad_norm": 1.2099712971645404, "learning_rate": 6.071480080496119e-06, "loss": 0.21356427669525146, "step": 4875 }, { "epoch": 1.294781569512681, "grad_norm": 1.314183683224707, "learning_rate": 6.067442703911621e-06, "loss": 0.2835869789123535, "step": 4876 }, { "epoch": 1.295047138494224, "grad_norm": 1.1868362719294436, "learning_rate": 6.063406085477788e-06, "loss": 0.24233242869377136, "step": 4877 }, { "epoch": 1.2953127074757669, "grad_norm": 1.2596980829406919, "learning_rate": 6.059370225972834e-06, "loss": 0.24986369907855988, "step": 4878 }, { "epoch": 1.2955782764573098, "grad_norm": 1.2583930460503605, "learning_rate": 6.055335126174826e-06, "loss": 0.2445756494998932, "step": 4879 }, { "epoch": 1.2958438454388528, "grad_norm": 1.0635663336037695, "learning_rate": 6.0513007868616825e-06, "loss": 0.21331898868083954, "step": 4880 }, { "epoch": 1.2961094144203957, "grad_norm": 1.1578193819974294, "learning_rate": 6.047267208811174e-06, "loss": 0.2782329320907593, "step": 4881 }, { "epoch": 1.2963749834019387, "grad_norm": 2.326385436360766, "learning_rate": 6.043234392800932e-06, "loss": 0.20866765081882477, "step": 4882 }, { "epoch": 1.2966405523834816, "grad_norm": 1.3211750202424803, "learning_rate": 6.039202339608432e-06, "loss": 0.2517815828323364, "step": 4883 }, { "epoch": 1.2969061213650246, "grad_norm": 1.283845753322191, "learning_rate": 6.03517105001101e-06, "loss": 0.2617926597595215, "step": 4884 }, { "epoch": 1.2971716903465675, "grad_norm": 1.3255504140080887, "learning_rate": 6.0311405247858465e-06, "loss": 0.24753305315971375, "step": 4885 }, { "epoch": 1.2974372593281105, "grad_norm": 1.1805849927447047, "learning_rate": 6.027110764709982e-06, "loss": 0.19791719317436218, "step": 4886 }, { "epoch": 1.2977028283096534, "grad_norm": 1.236398594932959, "learning_rate": 6.023081770560307e-06, "loss": 0.243608757853508, "step": 4887 }, { "epoch": 1.2979683972911964, "grad_norm": 1.3652744342035896, "learning_rate": 6.019053543113564e-06, "loss": 0.20469853281974792, "step": 4888 }, { "epoch": 1.2982339662727393, "grad_norm": 1.4682720215540639, "learning_rate": 6.015026083146345e-06, "loss": 0.25613903999328613, "step": 4889 }, { "epoch": 1.2984995352542823, "grad_norm": 1.236223607561111, "learning_rate": 6.010999391435097e-06, "loss": 0.23349006474018097, "step": 4890 }, { "epoch": 1.2987651042358253, "grad_norm": 1.1137410591057113, "learning_rate": 6.006973468756124e-06, "loss": 0.23646268248558044, "step": 4891 }, { "epoch": 1.2990306732173682, "grad_norm": 1.2845979720118916, "learning_rate": 6.002948315885572e-06, "loss": 0.2371794581413269, "step": 4892 }, { "epoch": 1.2992962421989112, "grad_norm": 1.1150236044260142, "learning_rate": 5.998923933599443e-06, "loss": 0.23791949450969696, "step": 4893 }, { "epoch": 1.299561811180454, "grad_norm": 1.2865838186648229, "learning_rate": 5.994900322673593e-06, "loss": 0.26923009753227234, "step": 4894 }, { "epoch": 1.299827380161997, "grad_norm": 1.2724647699376699, "learning_rate": 5.990877483883723e-06, "loss": 0.20164884626865387, "step": 4895 }, { "epoch": 1.30009294914354, "grad_norm": 1.1263986142938482, "learning_rate": 5.986855418005393e-06, "loss": 0.22345462441444397, "step": 4896 }, { "epoch": 1.300358518125083, "grad_norm": 1.2936789930425872, "learning_rate": 5.982834125814007e-06, "loss": 0.26678675413131714, "step": 4897 }, { "epoch": 1.300624087106626, "grad_norm": 1.3112472329084983, "learning_rate": 5.978813608084825e-06, "loss": 0.24674496054649353, "step": 4898 }, { "epoch": 1.3008896560881689, "grad_norm": 1.3746634467420622, "learning_rate": 5.974793865592947e-06, "loss": 0.2804900109767914, "step": 4899 }, { "epoch": 1.3011552250697118, "grad_norm": 1.3113866221822363, "learning_rate": 5.970774899113345e-06, "loss": 0.2413155734539032, "step": 4900 }, { "epoch": 1.3014207940512548, "grad_norm": 1.139036608300987, "learning_rate": 5.96675670942082e-06, "loss": 0.21217301487922668, "step": 4901 }, { "epoch": 1.3016863630327977, "grad_norm": 1.2012277530250777, "learning_rate": 5.962739297290035e-06, "loss": 0.23362940549850464, "step": 4902 }, { "epoch": 1.3019519320143407, "grad_norm": 1.251148135143295, "learning_rate": 5.958722663495499e-06, "loss": 0.2669242322444916, "step": 4903 }, { "epoch": 1.3022175009958836, "grad_norm": 1.2365395348631665, "learning_rate": 5.95470680881157e-06, "loss": 0.2234608232975006, "step": 4904 }, { "epoch": 1.3024830699774266, "grad_norm": 1.2441781101215288, "learning_rate": 5.95069173401246e-06, "loss": 0.25150394439697266, "step": 4905 }, { "epoch": 1.3027486389589695, "grad_norm": 1.127228294882686, "learning_rate": 5.9466774398722264e-06, "loss": 0.2408430427312851, "step": 4906 }, { "epoch": 1.3030142079405125, "grad_norm": 1.1200862415380408, "learning_rate": 5.942663927164776e-06, "loss": 0.2197013795375824, "step": 4907 }, { "epoch": 1.3032797769220554, "grad_norm": 1.1474317141184802, "learning_rate": 5.938651196663865e-06, "loss": 0.2224964201450348, "step": 4908 }, { "epoch": 1.3035453459035984, "grad_norm": 1.313380369558454, "learning_rate": 5.934639249143108e-06, "loss": 0.26466232538223267, "step": 4909 }, { "epoch": 1.3038109148851413, "grad_norm": 1.2910852400248352, "learning_rate": 5.930628085375958e-06, "loss": 0.257996141910553, "step": 4910 }, { "epoch": 1.3040764838666843, "grad_norm": 1.2056479933898356, "learning_rate": 5.92661770613572e-06, "loss": 0.21995162963867188, "step": 4911 }, { "epoch": 1.3043420528482272, "grad_norm": 1.3003100511120855, "learning_rate": 5.922608112195546e-06, "loss": 0.26007258892059326, "step": 4912 }, { "epoch": 1.3046076218297702, "grad_norm": 1.2951583817832037, "learning_rate": 5.918599304328442e-06, "loss": 0.25168827176094055, "step": 4913 }, { "epoch": 1.3048731908113131, "grad_norm": 1.1932184000685677, "learning_rate": 5.9145912833072535e-06, "loss": 0.24686852097511292, "step": 4914 }, { "epoch": 1.305138759792856, "grad_norm": 1.1951264683753895, "learning_rate": 5.910584049904684e-06, "loss": 0.247032031416893, "step": 4915 }, { "epoch": 1.305404328774399, "grad_norm": 1.1517786776797445, "learning_rate": 5.906577604893278e-06, "loss": 0.21644674241542816, "step": 4916 }, { "epoch": 1.305669897755942, "grad_norm": 1.3685662184124912, "learning_rate": 5.9025719490454304e-06, "loss": 0.28093478083610535, "step": 4917 }, { "epoch": 1.305935466737485, "grad_norm": 1.2246452754262638, "learning_rate": 5.898567083133389e-06, "loss": 0.23731757700443268, "step": 4918 }, { "epoch": 1.306201035719028, "grad_norm": 1.1125400405938466, "learning_rate": 5.894563007929243e-06, "loss": 0.20725491642951965, "step": 4919 }, { "epoch": 1.3064666047005709, "grad_norm": 1.3186749566879576, "learning_rate": 5.89055972420493e-06, "loss": 0.2509433329105377, "step": 4920 }, { "epoch": 1.3067321736821138, "grad_norm": 1.2793911736037649, "learning_rate": 5.886557232732235e-06, "loss": 0.2611580491065979, "step": 4921 }, { "epoch": 1.3069977426636568, "grad_norm": 1.1754660821918204, "learning_rate": 5.882555534282792e-06, "loss": 0.20567595958709717, "step": 4922 }, { "epoch": 1.3072633116451997, "grad_norm": 1.2179299933591687, "learning_rate": 5.878554629628081e-06, "loss": 0.22851137816905975, "step": 4923 }, { "epoch": 1.3075288806267427, "grad_norm": 1.2283350051517878, "learning_rate": 5.874554519539431e-06, "loss": 0.24295902252197266, "step": 4924 }, { "epoch": 1.3077944496082856, "grad_norm": 1.4565590371796837, "learning_rate": 5.870555204788013e-06, "loss": 0.29564642906188965, "step": 4925 }, { "epoch": 1.3080600185898288, "grad_norm": 1.1906652754397118, "learning_rate": 5.8665566861448465e-06, "loss": 0.2399739921092987, "step": 4926 }, { "epoch": 1.3083255875713717, "grad_norm": 1.2056826487968673, "learning_rate": 5.862558964380806e-06, "loss": 0.23882555961608887, "step": 4927 }, { "epoch": 1.3085911565529147, "grad_norm": 1.2167231777259742, "learning_rate": 5.858562040266599e-06, "loss": 0.2510842978954315, "step": 4928 }, { "epoch": 1.3088567255344576, "grad_norm": 1.3760419048772665, "learning_rate": 5.854565914572787e-06, "loss": 0.257358193397522, "step": 4929 }, { "epoch": 1.3091222945160006, "grad_norm": 1.1144476904886809, "learning_rate": 5.850570588069775e-06, "loss": 0.23228219151496887, "step": 4930 }, { "epoch": 1.3093878634975435, "grad_norm": 1.2711888334314898, "learning_rate": 5.846576061527818e-06, "loss": 0.2234456092119217, "step": 4931 }, { "epoch": 1.3096534324790865, "grad_norm": 1.1978737759145446, "learning_rate": 5.842582335717009e-06, "loss": 0.2273438423871994, "step": 4932 }, { "epoch": 1.3099190014606295, "grad_norm": 1.2382395020505186, "learning_rate": 5.838589411407294e-06, "loss": 0.2423306405544281, "step": 4933 }, { "epoch": 1.3101845704421724, "grad_norm": 1.2388376015521172, "learning_rate": 5.834597289368463e-06, "loss": 0.266438364982605, "step": 4934 }, { "epoch": 1.3104501394237154, "grad_norm": 1.2553012161793193, "learning_rate": 5.830605970370142e-06, "loss": 0.2469342052936554, "step": 4935 }, { "epoch": 1.3107157084052583, "grad_norm": 1.2077087937137967, "learning_rate": 5.8266154551818225e-06, "loss": 0.2834509611129761, "step": 4936 }, { "epoch": 1.3109812773868013, "grad_norm": 1.3037377411135151, "learning_rate": 5.822625744572821e-06, "loss": 0.2615162134170532, "step": 4937 }, { "epoch": 1.3112468463683442, "grad_norm": 1.1529903033018742, "learning_rate": 5.818636839312309e-06, "loss": 0.2247931957244873, "step": 4938 }, { "epoch": 1.3115124153498872, "grad_norm": 1.162136486746663, "learning_rate": 5.814648740169299e-06, "loss": 0.23759335279464722, "step": 4939 }, { "epoch": 1.3117779843314301, "grad_norm": 1.2647326324758852, "learning_rate": 5.8106614479126515e-06, "loss": 0.23381784558296204, "step": 4940 }, { "epoch": 1.312043553312973, "grad_norm": 1.2132087226777075, "learning_rate": 5.8066749633110675e-06, "loss": 0.2671264410018921, "step": 4941 }, { "epoch": 1.312309122294516, "grad_norm": 1.09997395594631, "learning_rate": 5.8026892871330944e-06, "loss": 0.226065531373024, "step": 4942 }, { "epoch": 1.312574691276059, "grad_norm": 1.3057172624305828, "learning_rate": 5.798704420147124e-06, "loss": 0.2654735743999481, "step": 4943 }, { "epoch": 1.312840260257602, "grad_norm": 1.2538641402604982, "learning_rate": 5.794720363121389e-06, "loss": 0.23757833242416382, "step": 4944 }, { "epoch": 1.3131058292391449, "grad_norm": 1.2131030914710175, "learning_rate": 5.790737116823975e-06, "loss": 0.2561591565608978, "step": 4945 }, { "epoch": 1.3133713982206878, "grad_norm": 1.1698592689009908, "learning_rate": 5.7867546820227995e-06, "loss": 0.22105304896831512, "step": 4946 }, { "epoch": 1.3136369672022308, "grad_norm": 1.190016500907537, "learning_rate": 5.7827730594856325e-06, "loss": 0.2485857605934143, "step": 4947 }, { "epoch": 1.3139025361837737, "grad_norm": 1.2087719424455774, "learning_rate": 5.7787922499800804e-06, "loss": 0.21256676316261292, "step": 4948 }, { "epoch": 1.3141681051653167, "grad_norm": 1.2561271472593831, "learning_rate": 5.774812254273604e-06, "loss": 0.2700715661048889, "step": 4949 }, { "epoch": 1.3144336741468596, "grad_norm": 1.072264118800501, "learning_rate": 5.770833073133488e-06, "loss": 0.22239381074905396, "step": 4950 }, { "epoch": 1.3146992431284026, "grad_norm": 1.2811464089131772, "learning_rate": 5.766854707326878e-06, "loss": 0.22973249852657318, "step": 4951 }, { "epoch": 1.3149648121099455, "grad_norm": 1.3904264621036453, "learning_rate": 5.762877157620751e-06, "loss": 0.27923673391342163, "step": 4952 }, { "epoch": 1.3152303810914885, "grad_norm": 1.1321859486950596, "learning_rate": 5.758900424781939e-06, "loss": 0.23142218589782715, "step": 4953 }, { "epoch": 1.3154959500730314, "grad_norm": 1.2732500147617782, "learning_rate": 5.754924509577107e-06, "loss": 0.23697996139526367, "step": 4954 }, { "epoch": 1.3157615190545744, "grad_norm": 1.2838523265227373, "learning_rate": 5.750949412772764e-06, "loss": 0.27600961923599243, "step": 4955 }, { "epoch": 1.3160270880361173, "grad_norm": 1.1644607269636458, "learning_rate": 5.74697513513526e-06, "loss": 0.2300705760717392, "step": 4956 }, { "epoch": 1.3162926570176603, "grad_norm": 1.2927833273456342, "learning_rate": 5.743001677430791e-06, "loss": 0.2771111726760864, "step": 4957 }, { "epoch": 1.3165582259992032, "grad_norm": 1.2582954956741819, "learning_rate": 5.739029040425391e-06, "loss": 0.2195657342672348, "step": 4958 }, { "epoch": 1.3168237949807462, "grad_norm": 1.3450534906440017, "learning_rate": 5.735057224884939e-06, "loss": 0.2877159118652344, "step": 4959 }, { "epoch": 1.3170893639622892, "grad_norm": 1.2211564124942835, "learning_rate": 5.731086231575154e-06, "loss": 0.264115571975708, "step": 4960 }, { "epoch": 1.317354932943832, "grad_norm": 1.1286607753384608, "learning_rate": 5.727116061261593e-06, "loss": 0.22574637830257416, "step": 4961 }, { "epoch": 1.317620501925375, "grad_norm": 1.3177978069758023, "learning_rate": 5.723146714709664e-06, "loss": 0.26063698530197144, "step": 4962 }, { "epoch": 1.317886070906918, "grad_norm": 1.2211473527893268, "learning_rate": 5.719178192684611e-06, "loss": 0.26272428035736084, "step": 4963 }, { "epoch": 1.318151639888461, "grad_norm": 1.257373941755789, "learning_rate": 5.715210495951513e-06, "loss": 0.27188578248023987, "step": 4964 }, { "epoch": 1.318417208870004, "grad_norm": 1.2786927551317604, "learning_rate": 5.711243625275296e-06, "loss": 0.26374363899230957, "step": 4965 }, { "epoch": 1.3186827778515469, "grad_norm": 1.2469422291735242, "learning_rate": 5.7072775814207275e-06, "loss": 0.24819093942642212, "step": 4966 }, { "epoch": 1.3189483468330898, "grad_norm": 1.3834225319345155, "learning_rate": 5.703312365152412e-06, "loss": 0.24387019872665405, "step": 4967 }, { "epoch": 1.319213915814633, "grad_norm": 1.2919715806670669, "learning_rate": 5.699347977234799e-06, "loss": 0.2198091745376587, "step": 4968 }, { "epoch": 1.319479484796176, "grad_norm": 1.3500197578827224, "learning_rate": 5.695384418432174e-06, "loss": 0.24349649250507355, "step": 4969 }, { "epoch": 1.319745053777719, "grad_norm": 1.238323956307032, "learning_rate": 5.691421689508661e-06, "loss": 0.2330506294965744, "step": 4970 }, { "epoch": 1.3200106227592618, "grad_norm": 1.2015417123740977, "learning_rate": 5.687459791228234e-06, "loss": 0.22821848094463348, "step": 4971 }, { "epoch": 1.3202761917408048, "grad_norm": 1.1813366864368284, "learning_rate": 5.683498724354699e-06, "loss": 0.2342798113822937, "step": 4972 }, { "epoch": 1.3205417607223477, "grad_norm": 1.0659168750954966, "learning_rate": 5.679538489651702e-06, "loss": 0.19689922034740448, "step": 4973 }, { "epoch": 1.3208073297038907, "grad_norm": 1.1808385090527131, "learning_rate": 5.675579087882727e-06, "loss": 0.23910056054592133, "step": 4974 }, { "epoch": 1.3210728986854336, "grad_norm": 1.381638431012013, "learning_rate": 5.671620519811105e-06, "loss": 0.25725993514060974, "step": 4975 }, { "epoch": 1.3213384676669766, "grad_norm": 1.3528699347449313, "learning_rate": 5.667662786199997e-06, "loss": 0.3030434250831604, "step": 4976 }, { "epoch": 1.3216040366485196, "grad_norm": 1.1182092617897728, "learning_rate": 5.6637058878124075e-06, "loss": 0.223737433552742, "step": 4977 }, { "epoch": 1.3218696056300625, "grad_norm": 1.07766141822832, "learning_rate": 5.659749825411183e-06, "loss": 0.21480265259742737, "step": 4978 }, { "epoch": 1.3221351746116055, "grad_norm": 1.2398269968997129, "learning_rate": 5.655794599759001e-06, "loss": 0.23288744688034058, "step": 4979 }, { "epoch": 1.3224007435931484, "grad_norm": 1.3344080514533678, "learning_rate": 5.651840211618387e-06, "loss": 0.23701068758964539, "step": 4980 }, { "epoch": 1.3226663125746914, "grad_norm": 1.2102834630940547, "learning_rate": 5.647886661751698e-06, "loss": 0.22164157032966614, "step": 4981 }, { "epoch": 1.3229318815562343, "grad_norm": 1.2096538262244674, "learning_rate": 5.643933950921132e-06, "loss": 0.23426607251167297, "step": 4982 }, { "epoch": 1.3231974505377773, "grad_norm": 1.1880047089826309, "learning_rate": 5.6399820798887266e-06, "loss": 0.2567834258079529, "step": 4983 }, { "epoch": 1.3234630195193202, "grad_norm": 1.3013809826248692, "learning_rate": 5.6360310494163525e-06, "loss": 0.2713038921356201, "step": 4984 }, { "epoch": 1.3237285885008632, "grad_norm": 1.2908080991459006, "learning_rate": 5.632080860265725e-06, "loss": 0.2548249661922455, "step": 4985 }, { "epoch": 1.3239941574824061, "grad_norm": 1.3471244082770852, "learning_rate": 5.628131513198392e-06, "loss": 0.2442832589149475, "step": 4986 }, { "epoch": 1.324259726463949, "grad_norm": 1.3063670062134878, "learning_rate": 5.6241830089757435e-06, "loss": 0.24654853343963623, "step": 4987 }, { "epoch": 1.324525295445492, "grad_norm": 1.2792033582455469, "learning_rate": 5.620235348358997e-06, "loss": 0.2802797853946686, "step": 4988 }, { "epoch": 1.324790864427035, "grad_norm": 1.0588655062771883, "learning_rate": 5.616288532109225e-06, "loss": 0.18801404535770416, "step": 4989 }, { "epoch": 1.325056433408578, "grad_norm": 1.2235746865490262, "learning_rate": 5.6123425609873235e-06, "loss": 0.2685382068157196, "step": 4990 }, { "epoch": 1.3253220023901209, "grad_norm": 1.1873888072876837, "learning_rate": 5.608397435754029e-06, "loss": 0.23479774594306946, "step": 4991 }, { "epoch": 1.3255875713716638, "grad_norm": 1.2164455244711625, "learning_rate": 5.604453157169914e-06, "loss": 0.24198031425476074, "step": 4992 }, { "epoch": 1.3258531403532068, "grad_norm": 1.3448749532595476, "learning_rate": 5.60050972599539e-06, "loss": 0.25523462891578674, "step": 4993 }, { "epoch": 1.3261187093347497, "grad_norm": 1.1695382845281797, "learning_rate": 5.596567142990703e-06, "loss": 0.23196743428707123, "step": 4994 }, { "epoch": 1.3263842783162927, "grad_norm": 1.3145586744837223, "learning_rate": 5.592625408915939e-06, "loss": 0.29365748167037964, "step": 4995 }, { "epoch": 1.3266498472978356, "grad_norm": 1.1946134760289593, "learning_rate": 5.588684524531014e-06, "loss": 0.24509185552597046, "step": 4996 }, { "epoch": 1.3269154162793786, "grad_norm": 1.3358300509723116, "learning_rate": 5.584744490595687e-06, "loss": 0.27032390236854553, "step": 4997 }, { "epoch": 1.3271809852609215, "grad_norm": 1.1645416268641489, "learning_rate": 5.580805307869549e-06, "loss": 0.24401508271694183, "step": 4998 }, { "epoch": 1.3274465542424645, "grad_norm": 1.1506901325018217, "learning_rate": 5.576866977112028e-06, "loss": 0.2216658741235733, "step": 4999 }, { "epoch": 1.3277121232240074, "grad_norm": 1.1830944265124126, "learning_rate": 5.5729294990823875e-06, "loss": 0.24545373022556305, "step": 5000 }, { "epoch": 1.3279776922055504, "grad_norm": 1.377548009409137, "learning_rate": 5.568992874539728e-06, "loss": 0.260816752910614, "step": 5001 }, { "epoch": 1.3282432611870933, "grad_norm": 1.1392730403811622, "learning_rate": 5.565057104242984e-06, "loss": 0.1850551962852478, "step": 5002 }, { "epoch": 1.3285088301686363, "grad_norm": 2.1232949408605624, "learning_rate": 5.561122188950923e-06, "loss": 0.26854407787323, "step": 5003 }, { "epoch": 1.3287743991501793, "grad_norm": 1.1591208934359583, "learning_rate": 5.557188129422153e-06, "loss": 0.24294906854629517, "step": 5004 }, { "epoch": 1.3290399681317222, "grad_norm": 1.1880501452095942, "learning_rate": 5.553254926415114e-06, "loss": 0.2533603310585022, "step": 5005 }, { "epoch": 1.3293055371132652, "grad_norm": 1.1756183262516449, "learning_rate": 5.549322580688077e-06, "loss": 0.2082313448190689, "step": 5006 }, { "epoch": 1.329571106094808, "grad_norm": 1.1602290025540025, "learning_rate": 5.545391092999158e-06, "loss": 0.24265842139720917, "step": 5007 }, { "epoch": 1.329836675076351, "grad_norm": 1.2321490774961563, "learning_rate": 5.541460464106301e-06, "loss": 0.2483578324317932, "step": 5008 }, { "epoch": 1.330102244057894, "grad_norm": 1.2798509363454456, "learning_rate": 5.537530694767281e-06, "loss": 0.2769540548324585, "step": 5009 }, { "epoch": 1.330367813039437, "grad_norm": 1.1781048091325885, "learning_rate": 5.533601785739714e-06, "loss": 0.2132025957107544, "step": 5010 }, { "epoch": 1.33063338202098, "grad_norm": 1.2726887496075767, "learning_rate": 5.529673737781047e-06, "loss": 0.25223806500434875, "step": 5011 }, { "epoch": 1.3308989510025229, "grad_norm": 1.13329365262538, "learning_rate": 5.52574655164856e-06, "loss": 0.22631296515464783, "step": 5012 }, { "epoch": 1.3311645199840658, "grad_norm": 1.1821255064699665, "learning_rate": 5.5218202280993725e-06, "loss": 0.23756693303585052, "step": 5013 }, { "epoch": 1.3314300889656088, "grad_norm": 1.2775335630974591, "learning_rate": 5.517894767890427e-06, "loss": 0.24746376276016235, "step": 5014 }, { "epoch": 1.3316956579471517, "grad_norm": 1.105165815318004, "learning_rate": 5.513970171778504e-06, "loss": 0.21463070809841156, "step": 5015 }, { "epoch": 1.3319612269286947, "grad_norm": 1.2090979668871258, "learning_rate": 5.510046440520228e-06, "loss": 0.21256107091903687, "step": 5016 }, { "epoch": 1.3322267959102376, "grad_norm": 1.1963664670778913, "learning_rate": 5.506123574872044e-06, "loss": 0.25800254940986633, "step": 5017 }, { "epoch": 1.3324923648917806, "grad_norm": 1.2726257558813519, "learning_rate": 5.502201575590236e-06, "loss": 0.2421891689300537, "step": 5018 }, { "epoch": 1.3327579338733235, "grad_norm": 1.3181283061442692, "learning_rate": 5.498280443430917e-06, "loss": 0.24375903606414795, "step": 5019 }, { "epoch": 1.3330235028548665, "grad_norm": 1.2419078132332353, "learning_rate": 5.494360179150033e-06, "loss": 0.22173303365707397, "step": 5020 }, { "epoch": 1.3332890718364094, "grad_norm": 1.1754676882141941, "learning_rate": 5.49044078350337e-06, "loss": 0.24005022644996643, "step": 5021 }, { "epoch": 1.3335546408179524, "grad_norm": 1.194558748352182, "learning_rate": 5.486522257246538e-06, "loss": 0.2600201964378357, "step": 5022 }, { "epoch": 1.3338202097994953, "grad_norm": 1.2112657273591712, "learning_rate": 5.482604601134984e-06, "loss": 0.22889836132526398, "step": 5023 }, { "epoch": 1.3340857787810383, "grad_norm": 1.151722502872684, "learning_rate": 5.478687815923981e-06, "loss": 0.25045812129974365, "step": 5024 }, { "epoch": 1.3343513477625812, "grad_norm": 1.2499612320902753, "learning_rate": 5.474771902368646e-06, "loss": 0.24649837613105774, "step": 5025 }, { "epoch": 1.3346169167441242, "grad_norm": 1.1975824340507155, "learning_rate": 5.470856861223919e-06, "loss": 0.23994389176368713, "step": 5026 }, { "epoch": 1.3348824857256671, "grad_norm": 1.2488470912807048, "learning_rate": 5.466942693244572e-06, "loss": 0.24381600320339203, "step": 5027 }, { "epoch": 1.33514805470721, "grad_norm": 1.1770895947351019, "learning_rate": 5.463029399185217e-06, "loss": 0.22110486030578613, "step": 5028 }, { "epoch": 1.335413623688753, "grad_norm": 1.2878634690011452, "learning_rate": 5.459116979800281e-06, "loss": 0.25733259320259094, "step": 5029 }, { "epoch": 1.335679192670296, "grad_norm": 1.2598918710105835, "learning_rate": 5.4552054358440355e-06, "loss": 0.22853803634643555, "step": 5030 }, { "epoch": 1.335944761651839, "grad_norm": 1.3118793520277159, "learning_rate": 5.451294768070581e-06, "loss": 0.27503639459609985, "step": 5031 }, { "epoch": 1.336210330633382, "grad_norm": 1.2721314541046291, "learning_rate": 5.447384977233849e-06, "loss": 0.27931997179985046, "step": 5032 }, { "epoch": 1.3364758996149249, "grad_norm": 1.2287817779118972, "learning_rate": 5.443476064087596e-06, "loss": 0.2477954626083374, "step": 5033 }, { "epoch": 1.3367414685964678, "grad_norm": 1.2204002745504476, "learning_rate": 5.439568029385422e-06, "loss": 0.2195623219013214, "step": 5034 }, { "epoch": 1.3370070375780108, "grad_norm": 1.230653492520276, "learning_rate": 5.435660873880747e-06, "loss": 0.22160238027572632, "step": 5035 }, { "epoch": 1.3372726065595537, "grad_norm": 1.6764380815480615, "learning_rate": 5.4317545983268235e-06, "loss": 0.24107405543327332, "step": 5036 }, { "epoch": 1.3375381755410967, "grad_norm": 1.2985203082435115, "learning_rate": 5.427849203476738e-06, "loss": 0.2480086386203766, "step": 5037 }, { "epoch": 1.3378037445226398, "grad_norm": 1.2654518356324462, "learning_rate": 5.4239446900834005e-06, "loss": 0.22476691007614136, "step": 5038 }, { "epoch": 1.3380693135041828, "grad_norm": 1.217906592075979, "learning_rate": 5.420041058899559e-06, "loss": 0.23685473203659058, "step": 5039 }, { "epoch": 1.3383348824857257, "grad_norm": 1.215790635675812, "learning_rate": 5.416138310677784e-06, "loss": 0.27753746509552, "step": 5040 }, { "epoch": 1.3386004514672687, "grad_norm": 1.2682075315501737, "learning_rate": 5.412236446170482e-06, "loss": 0.22446027398109436, "step": 5041 }, { "epoch": 1.3388660204488116, "grad_norm": 1.2214424011593596, "learning_rate": 5.4083354661298816e-06, "loss": 0.2535285949707031, "step": 5042 }, { "epoch": 1.3391315894303546, "grad_norm": 1.2982364680013232, "learning_rate": 5.4044353713080565e-06, "loss": 0.2412964254617691, "step": 5043 }, { "epoch": 1.3393971584118975, "grad_norm": 1.3092797704576777, "learning_rate": 5.4005361624568895e-06, "loss": 0.23863038420677185, "step": 5044 }, { "epoch": 1.3396627273934405, "grad_norm": 1.159506578977356, "learning_rate": 5.396637840328105e-06, "loss": 0.22741727530956268, "step": 5045 }, { "epoch": 1.3399282963749835, "grad_norm": 1.285452356277395, "learning_rate": 5.392740405673251e-06, "loss": 0.2497379630804062, "step": 5046 }, { "epoch": 1.3401938653565264, "grad_norm": 1.2401289485061215, "learning_rate": 5.388843859243712e-06, "loss": 0.19558298587799072, "step": 5047 }, { "epoch": 1.3404594343380694, "grad_norm": 1.2074615239750155, "learning_rate": 5.3849482017906914e-06, "loss": 0.2266748994588852, "step": 5048 }, { "epoch": 1.3407250033196123, "grad_norm": 1.2657162316868396, "learning_rate": 5.381053434065229e-06, "loss": 0.2410028576850891, "step": 5049 }, { "epoch": 1.3409905723011553, "grad_norm": 1.301692886719208, "learning_rate": 5.37715955681819e-06, "loss": 0.23965512216091156, "step": 5050 }, { "epoch": 1.3412561412826982, "grad_norm": 1.1756365557449155, "learning_rate": 5.373266570800262e-06, "loss": 0.22440138459205627, "step": 5051 }, { "epoch": 1.3415217102642412, "grad_norm": 1.2562473271519534, "learning_rate": 5.369374476761975e-06, "loss": 0.2509710192680359, "step": 5052 }, { "epoch": 1.3417872792457841, "grad_norm": 1.3381440207626536, "learning_rate": 5.365483275453677e-06, "loss": 0.26555800437927246, "step": 5053 }, { "epoch": 1.342052848227327, "grad_norm": 1.2240809600669689, "learning_rate": 5.361592967625544e-06, "loss": 0.23089733719825745, "step": 5054 }, { "epoch": 1.34231841720887, "grad_norm": 1.1178692263054482, "learning_rate": 5.357703554027582e-06, "loss": 0.2040700763463974, "step": 5055 }, { "epoch": 1.342583986190413, "grad_norm": 1.309704975193781, "learning_rate": 5.353815035409624e-06, "loss": 0.23539039492607117, "step": 5056 }, { "epoch": 1.342849555171956, "grad_norm": 1.7065922202358847, "learning_rate": 5.3499274125213294e-06, "loss": 0.2190464437007904, "step": 5057 }, { "epoch": 1.3431151241534989, "grad_norm": 1.1478595499251703, "learning_rate": 5.346040686112189e-06, "loss": 0.21557429432868958, "step": 5058 }, { "epoch": 1.3433806931350418, "grad_norm": 1.1934269644730748, "learning_rate": 5.342154856931515e-06, "loss": 0.24398267269134521, "step": 5059 }, { "epoch": 1.3436462621165848, "grad_norm": 1.1089059625649784, "learning_rate": 5.338269925728451e-06, "loss": 0.21652038395404816, "step": 5060 }, { "epoch": 1.3439118310981277, "grad_norm": 1.1937531358219302, "learning_rate": 5.334385893251966e-06, "loss": 0.2031325101852417, "step": 5061 }, { "epoch": 1.3441774000796707, "grad_norm": 1.1621991357090053, "learning_rate": 5.330502760250853e-06, "loss": 0.2484835982322693, "step": 5062 }, { "epoch": 1.3444429690612136, "grad_norm": 1.2657742595884374, "learning_rate": 5.326620527473737e-06, "loss": 0.23698699474334717, "step": 5063 }, { "epoch": 1.3447085380427566, "grad_norm": 1.2000433743668328, "learning_rate": 5.322739195669065e-06, "loss": 0.23928484320640564, "step": 5064 }, { "epoch": 1.3449741070242995, "grad_norm": 1.1828146199314795, "learning_rate": 5.318858765585115e-06, "loss": 0.22679512202739716, "step": 5065 }, { "epoch": 1.3452396760058425, "grad_norm": 1.2334385564497414, "learning_rate": 5.314979237969984e-06, "loss": 0.2115025818347931, "step": 5066 }, { "epoch": 1.3455052449873854, "grad_norm": 1.261129899382787, "learning_rate": 5.311100613571603e-06, "loss": 0.2441834807395935, "step": 5067 }, { "epoch": 1.3457708139689284, "grad_norm": 1.2722125718860966, "learning_rate": 5.307222893137722e-06, "loss": 0.2549205720424652, "step": 5068 }, { "epoch": 1.3460363829504713, "grad_norm": 1.179054242584843, "learning_rate": 5.3033460774159185e-06, "loss": 0.24652990698814392, "step": 5069 }, { "epoch": 1.3463019519320143, "grad_norm": 1.2062419936470874, "learning_rate": 5.299470167153602e-06, "loss": 0.2403775006532669, "step": 5070 }, { "epoch": 1.3465675209135572, "grad_norm": 1.1208895570259512, "learning_rate": 5.295595163097999e-06, "loss": 0.2215663194656372, "step": 5071 }, { "epoch": 1.3468330898951002, "grad_norm": 1.2914937229567889, "learning_rate": 5.291721065996167e-06, "loss": 0.2567424774169922, "step": 5072 }, { "epoch": 1.3470986588766432, "grad_norm": 1.0608079556396839, "learning_rate": 5.287847876594984e-06, "loss": 0.21162359416484833, "step": 5073 }, { "epoch": 1.347364227858186, "grad_norm": 1.221049341797181, "learning_rate": 5.283975595641155e-06, "loss": 0.21851085126399994, "step": 5074 }, { "epoch": 1.347629796839729, "grad_norm": 1.2935501467753354, "learning_rate": 5.280104223881212e-06, "loss": 0.2491171509027481, "step": 5075 }, { "epoch": 1.347895365821272, "grad_norm": 1.2921255335421646, "learning_rate": 5.276233762061507e-06, "loss": 0.22467780113220215, "step": 5076 }, { "epoch": 1.348160934802815, "grad_norm": 1.159790816626821, "learning_rate": 5.272364210928223e-06, "loss": 0.24531611800193787, "step": 5077 }, { "epoch": 1.348426503784358, "grad_norm": 1.2178282841242851, "learning_rate": 5.268495571227361e-06, "loss": 0.2582520544528961, "step": 5078 }, { "epoch": 1.3486920727659009, "grad_norm": 1.2175282778251775, "learning_rate": 5.264627843704749e-06, "loss": 0.21180811524391174, "step": 5079 }, { "epoch": 1.348957641747444, "grad_norm": 1.2942378328530906, "learning_rate": 5.2607610291060406e-06, "loss": 0.27026671171188354, "step": 5080 }, { "epoch": 1.349223210728987, "grad_norm": 1.1721525183169563, "learning_rate": 5.256895128176712e-06, "loss": 0.22954419255256653, "step": 5081 }, { "epoch": 1.34948877971053, "grad_norm": 1.3561853541918854, "learning_rate": 5.253030141662063e-06, "loss": 0.24064484238624573, "step": 5082 }, { "epoch": 1.349754348692073, "grad_norm": 1.1245550279116328, "learning_rate": 5.249166070307218e-06, "loss": 0.1981196105480194, "step": 5083 }, { "epoch": 1.3500199176736158, "grad_norm": 1.0881909699390468, "learning_rate": 5.2453029148571226e-06, "loss": 0.19882233440876007, "step": 5084 }, { "epoch": 1.3502854866551588, "grad_norm": 1.2123536275051694, "learning_rate": 5.24144067605655e-06, "loss": 0.2409907579421997, "step": 5085 }, { "epoch": 1.3505510556367017, "grad_norm": 1.2197874501412473, "learning_rate": 5.237579354650092e-06, "loss": 0.2205093652009964, "step": 5086 }, { "epoch": 1.3508166246182447, "grad_norm": 1.4716074796051495, "learning_rate": 5.233718951382163e-06, "loss": 0.2283058911561966, "step": 5087 }, { "epoch": 1.3510821935997877, "grad_norm": 1.2561007307780203, "learning_rate": 5.229859466997012e-06, "loss": 0.25584474205970764, "step": 5088 }, { "epoch": 1.3513477625813306, "grad_norm": 1.1491167817661179, "learning_rate": 5.226000902238696e-06, "loss": 0.22516845166683197, "step": 5089 }, { "epoch": 1.3516133315628736, "grad_norm": 1.2604818786719383, "learning_rate": 5.222143257851102e-06, "loss": 0.23440764844417572, "step": 5090 }, { "epoch": 1.3518789005444165, "grad_norm": 1.2156754572685655, "learning_rate": 5.218286534577938e-06, "loss": 0.25858962535858154, "step": 5091 }, { "epoch": 1.3521444695259595, "grad_norm": 1.1425154357949754, "learning_rate": 5.214430733162736e-06, "loss": 0.20676326751708984, "step": 5092 }, { "epoch": 1.3524100385075024, "grad_norm": 1.1266241214136956, "learning_rate": 5.210575854348853e-06, "loss": 0.21892425417900085, "step": 5093 }, { "epoch": 1.3526756074890454, "grad_norm": 1.2379350388596377, "learning_rate": 5.206721898879454e-06, "loss": 0.2538335919380188, "step": 5094 }, { "epoch": 1.3529411764705883, "grad_norm": 1.2059035716196298, "learning_rate": 5.202868867497542e-06, "loss": 0.24750448763370514, "step": 5095 }, { "epoch": 1.3532067454521313, "grad_norm": 1.2602608504342458, "learning_rate": 5.199016760945931e-06, "loss": 0.2569364011287689, "step": 5096 }, { "epoch": 1.3534723144336742, "grad_norm": 0.9860855220263709, "learning_rate": 5.19516557996727e-06, "loss": 0.16788914799690247, "step": 5097 }, { "epoch": 1.3537378834152172, "grad_norm": 1.0020852845957948, "learning_rate": 5.191315325304018e-06, "loss": 0.19006651639938354, "step": 5098 }, { "epoch": 1.3540034523967601, "grad_norm": 1.187896658740898, "learning_rate": 5.1874659976984575e-06, "loss": 0.23474551737308502, "step": 5099 }, { "epoch": 1.354269021378303, "grad_norm": 1.2829971661643687, "learning_rate": 5.183617597892694e-06, "loss": 0.26601099967956543, "step": 5100 }, { "epoch": 1.354534590359846, "grad_norm": 1.1758855450162613, "learning_rate": 5.179770126628654e-06, "loss": 0.24207550287246704, "step": 5101 }, { "epoch": 1.354800159341389, "grad_norm": 1.2535446057143411, "learning_rate": 5.175923584648083e-06, "loss": 0.2538307309150696, "step": 5102 }, { "epoch": 1.355065728322932, "grad_norm": 1.1865818667829109, "learning_rate": 5.172077972692553e-06, "loss": 0.23073242604732513, "step": 5103 }, { "epoch": 1.3553312973044749, "grad_norm": 1.348848385270533, "learning_rate": 5.168233291503448e-06, "loss": 0.2634595036506653, "step": 5104 }, { "epoch": 1.3555968662860178, "grad_norm": 1.225057907199874, "learning_rate": 5.1643895418219744e-06, "loss": 0.23282350599765778, "step": 5105 }, { "epoch": 1.3558624352675608, "grad_norm": 1.333152685269679, "learning_rate": 5.160546724389172e-06, "loss": 0.2543700933456421, "step": 5106 }, { "epoch": 1.3561280042491037, "grad_norm": 1.1449256417555271, "learning_rate": 5.1567048399458855e-06, "loss": 0.2005772739648819, "step": 5107 }, { "epoch": 1.3563935732306467, "grad_norm": 1.2429630346358373, "learning_rate": 5.152863889232787e-06, "loss": 0.2367073893547058, "step": 5108 }, { "epoch": 1.3566591422121896, "grad_norm": 1.2839253544945022, "learning_rate": 5.14902387299036e-06, "loss": 0.25600770115852356, "step": 5109 }, { "epoch": 1.3569247111937326, "grad_norm": 1.198566513294344, "learning_rate": 5.145184791958918e-06, "loss": 0.21678754687309265, "step": 5110 }, { "epoch": 1.3571902801752755, "grad_norm": 1.3894724787206996, "learning_rate": 5.141346646878591e-06, "loss": 0.265438973903656, "step": 5111 }, { "epoch": 1.3574558491568185, "grad_norm": 1.1239736089383028, "learning_rate": 5.13750943848933e-06, "loss": 0.24246999621391296, "step": 5112 }, { "epoch": 1.3577214181383614, "grad_norm": 1.299396280421792, "learning_rate": 5.133673167530899e-06, "loss": 0.25401771068573, "step": 5113 }, { "epoch": 1.3579869871199044, "grad_norm": 1.2329813534125698, "learning_rate": 5.129837834742885e-06, "loss": 0.2698017656803131, "step": 5114 }, { "epoch": 1.3582525561014474, "grad_norm": 1.2787210937788358, "learning_rate": 5.126003440864703e-06, "loss": 0.27006995677948, "step": 5115 }, { "epoch": 1.3585181250829903, "grad_norm": 1.2695682196385796, "learning_rate": 5.122169986635575e-06, "loss": 0.2370866984128952, "step": 5116 }, { "epoch": 1.3587836940645333, "grad_norm": 1.3031561376922138, "learning_rate": 5.1183374727945425e-06, "loss": 0.24017807841300964, "step": 5117 }, { "epoch": 1.3590492630460762, "grad_norm": 1.1487956614446662, "learning_rate": 5.114505900080473e-06, "loss": 0.21664533019065857, "step": 5118 }, { "epoch": 1.3593148320276192, "grad_norm": 4.246209132455192, "learning_rate": 5.110675269232046e-06, "loss": 0.24561598896980286, "step": 5119 }, { "epoch": 1.359580401009162, "grad_norm": 1.3902415348604562, "learning_rate": 5.106845580987763e-06, "loss": 0.26678937673568726, "step": 5120 }, { "epoch": 1.359845969990705, "grad_norm": 1.354168350096278, "learning_rate": 5.103016836085943e-06, "loss": 0.21919070184230804, "step": 5121 }, { "epoch": 1.360111538972248, "grad_norm": 1.3057665036353723, "learning_rate": 5.099189035264722e-06, "loss": 0.24887943267822266, "step": 5122 }, { "epoch": 1.360377107953791, "grad_norm": 1.2017875007060346, "learning_rate": 5.0953621792620556e-06, "loss": 0.23597784340381622, "step": 5123 }, { "epoch": 1.360642676935334, "grad_norm": 1.2098630506546966, "learning_rate": 5.091536268815717e-06, "loss": 0.21265193819999695, "step": 5124 }, { "epoch": 1.3609082459168769, "grad_norm": 1.3606980074054404, "learning_rate": 5.0877113046632945e-06, "loss": 0.29837465286254883, "step": 5125 }, { "epoch": 1.3611738148984198, "grad_norm": 1.1915793844006848, "learning_rate": 5.0838872875421975e-06, "loss": 0.2324269413948059, "step": 5126 }, { "epoch": 1.3614393838799628, "grad_norm": 1.0970197687294143, "learning_rate": 5.080064218189652e-06, "loss": 0.19149541854858398, "step": 5127 }, { "epoch": 1.3617049528615057, "grad_norm": 1.1710303609542994, "learning_rate": 5.0762420973427e-06, "loss": 0.247644305229187, "step": 5128 }, { "epoch": 1.3619705218430487, "grad_norm": 1.1403838601028529, "learning_rate": 5.0724209257382006e-06, "loss": 0.2272202968597412, "step": 5129 }, { "epoch": 1.3622360908245916, "grad_norm": 1.2012952880900256, "learning_rate": 5.068600704112832e-06, "loss": 0.25735989212989807, "step": 5130 }, { "epoch": 1.3625016598061346, "grad_norm": 1.1771555574179005, "learning_rate": 5.064781433203086e-06, "loss": 0.19970473647117615, "step": 5131 }, { "epoch": 1.3627672287876775, "grad_norm": 1.2156620394191346, "learning_rate": 5.060963113745272e-06, "loss": 0.24289372563362122, "step": 5132 }, { "epoch": 1.3630327977692205, "grad_norm": 1.2352988713677027, "learning_rate": 5.0571457464755226e-06, "loss": 0.2757350504398346, "step": 5133 }, { "epoch": 1.3632983667507634, "grad_norm": 1.2115447809386193, "learning_rate": 5.053329332129777e-06, "loss": 0.24552851915359497, "step": 5134 }, { "epoch": 1.3635639357323064, "grad_norm": 1.1546263092618338, "learning_rate": 5.049513871443797e-06, "loss": 0.22152797877788544, "step": 5135 }, { "epoch": 1.3638295047138493, "grad_norm": 1.2567398712194906, "learning_rate": 5.045699365153155e-06, "loss": 0.27098602056503296, "step": 5136 }, { "epoch": 1.3640950736953923, "grad_norm": 1.201852433475055, "learning_rate": 5.041885813993246e-06, "loss": 0.21275216341018677, "step": 5137 }, { "epoch": 1.3643606426769352, "grad_norm": 1.3326670101473788, "learning_rate": 5.038073218699275e-06, "loss": 0.2510162591934204, "step": 5138 }, { "epoch": 1.3646262116584782, "grad_norm": 1.2702563681918038, "learning_rate": 5.034261580006269e-06, "loss": 0.23203429579734802, "step": 5139 }, { "epoch": 1.3648917806400211, "grad_norm": 1.137285489869793, "learning_rate": 5.030450898649064e-06, "loss": 0.22178995609283447, "step": 5140 }, { "epoch": 1.365157349621564, "grad_norm": 1.2415754400243457, "learning_rate": 5.026641175362316e-06, "loss": 0.2567412257194519, "step": 5141 }, { "epoch": 1.365422918603107, "grad_norm": 1.232487080143156, "learning_rate": 5.022832410880494e-06, "loss": 0.21939827501773834, "step": 5142 }, { "epoch": 1.36568848758465, "grad_norm": 1.4733425270104286, "learning_rate": 5.019024605937882e-06, "loss": 0.2325637936592102, "step": 5143 }, { "epoch": 1.365954056566193, "grad_norm": 1.266575596941496, "learning_rate": 5.015217761268582e-06, "loss": 0.2416393756866455, "step": 5144 }, { "epoch": 1.366219625547736, "grad_norm": 1.289260413423763, "learning_rate": 5.011411877606507e-06, "loss": 0.2439568042755127, "step": 5145 }, { "epoch": 1.3664851945292789, "grad_norm": 1.1439689034996021, "learning_rate": 5.007606955685387e-06, "loss": 0.2495957612991333, "step": 5146 }, { "epoch": 1.3667507635108218, "grad_norm": 1.1937127912858143, "learning_rate": 5.003802996238766e-06, "loss": 0.23415328562259674, "step": 5147 }, { "epoch": 1.3670163324923648, "grad_norm": 1.26410321081345, "learning_rate": 5.000000000000003e-06, "loss": 0.2637922465801239, "step": 5148 }, { "epoch": 1.3672819014739077, "grad_norm": 1.243307173830296, "learning_rate": 4.9961979677022696e-06, "loss": 0.2319526970386505, "step": 5149 }, { "epoch": 1.3675474704554509, "grad_norm": 1.2115383829826751, "learning_rate": 4.992396900078551e-06, "loss": 0.2338445484638214, "step": 5150 }, { "epoch": 1.3678130394369938, "grad_norm": 1.1683439299091893, "learning_rate": 4.988596797861654e-06, "loss": 0.19041961431503296, "step": 5151 }, { "epoch": 1.3680786084185368, "grad_norm": 1.233073404450011, "learning_rate": 4.984797661784191e-06, "loss": 0.2698138952255249, "step": 5152 }, { "epoch": 1.3683441774000797, "grad_norm": 1.2592426315358647, "learning_rate": 4.980999492578588e-06, "loss": 0.2208167165517807, "step": 5153 }, { "epoch": 1.3686097463816227, "grad_norm": 1.1935159953807641, "learning_rate": 4.9772022909770915e-06, "loss": 0.2515152096748352, "step": 5154 }, { "epoch": 1.3688753153631656, "grad_norm": 1.3110804278343313, "learning_rate": 4.973406057711755e-06, "loss": 0.2393365204334259, "step": 5155 }, { "epoch": 1.3691408843447086, "grad_norm": 1.302037077529998, "learning_rate": 4.969610793514446e-06, "loss": 0.24546492099761963, "step": 5156 }, { "epoch": 1.3694064533262515, "grad_norm": 1.5300417364025873, "learning_rate": 4.965816499116849e-06, "loss": 0.252412348985672, "step": 5157 }, { "epoch": 1.3696720223077945, "grad_norm": 1.1552882128683561, "learning_rate": 4.962023175250461e-06, "loss": 0.22654281556606293, "step": 5158 }, { "epoch": 1.3699375912893375, "grad_norm": 1.2873880265204376, "learning_rate": 4.958230822646581e-06, "loss": 0.2542813718318939, "step": 5159 }, { "epoch": 1.3702031602708804, "grad_norm": 1.2851879635778218, "learning_rate": 4.9544394420363395e-06, "loss": 0.25376224517822266, "step": 5160 }, { "epoch": 1.3704687292524234, "grad_norm": 1.252574665809313, "learning_rate": 4.950649034150666e-06, "loss": 0.21911674737930298, "step": 5161 }, { "epoch": 1.3707342982339663, "grad_norm": 1.3527776455922371, "learning_rate": 4.946859599720308e-06, "loss": 0.2805126905441284, "step": 5162 }, { "epoch": 1.3709998672155093, "grad_norm": 1.1716388954292443, "learning_rate": 4.943071139475824e-06, "loss": 0.2189590483903885, "step": 5163 }, { "epoch": 1.3712654361970522, "grad_norm": 1.2218109142926636, "learning_rate": 4.939283654147582e-06, "loss": 0.21837599575519562, "step": 5164 }, { "epoch": 1.3715310051785952, "grad_norm": 1.2779646624690562, "learning_rate": 4.935497144465766e-06, "loss": 0.25090983510017395, "step": 5165 }, { "epoch": 1.3717965741601381, "grad_norm": 1.1988734011828608, "learning_rate": 4.93171161116037e-06, "loss": 0.22028754651546478, "step": 5166 }, { "epoch": 1.372062143141681, "grad_norm": 1.1554753760684375, "learning_rate": 4.927927054961201e-06, "loss": 0.20097196102142334, "step": 5167 }, { "epoch": 1.372327712123224, "grad_norm": 1.209557738779129, "learning_rate": 4.924143476597872e-06, "loss": 0.230082705616951, "step": 5168 }, { "epoch": 1.372593281104767, "grad_norm": 1.1549715219295726, "learning_rate": 4.920360876799821e-06, "loss": 0.23701804876327515, "step": 5169 }, { "epoch": 1.37285885008631, "grad_norm": 1.2740998730652584, "learning_rate": 4.9165792562962834e-06, "loss": 0.22357231378555298, "step": 5170 }, { "epoch": 1.3731244190678529, "grad_norm": 1.2042473616661704, "learning_rate": 4.912798615816312e-06, "loss": 0.2533026337623596, "step": 5171 }, { "epoch": 1.3733899880493958, "grad_norm": 1.3342025781776312, "learning_rate": 4.90901895608877e-06, "loss": 0.24878138303756714, "step": 5172 }, { "epoch": 1.3736555570309388, "grad_norm": 1.5415419516618216, "learning_rate": 4.905240277842335e-06, "loss": 0.22641420364379883, "step": 5173 }, { "epoch": 1.3739211260124817, "grad_norm": 1.2916997982097302, "learning_rate": 4.901462581805483e-06, "loss": 0.24495793879032135, "step": 5174 }, { "epoch": 1.3741866949940247, "grad_norm": 1.3531795848957913, "learning_rate": 4.897685868706512e-06, "loss": 0.2688868045806885, "step": 5175 }, { "epoch": 1.3744522639755676, "grad_norm": 1.2828126418821555, "learning_rate": 4.893910139273531e-06, "loss": 0.25796642899513245, "step": 5176 }, { "epoch": 1.3747178329571106, "grad_norm": 1.4091718050104127, "learning_rate": 4.890135394234451e-06, "loss": 0.27557405829429626, "step": 5177 }, { "epoch": 1.3749834019386535, "grad_norm": 1.620605499986823, "learning_rate": 4.886361634317004e-06, "loss": 0.23553809523582458, "step": 5178 }, { "epoch": 1.3752489709201965, "grad_norm": 1.2608742989736732, "learning_rate": 4.882588860248725e-06, "loss": 0.2454400360584259, "step": 5179 }, { "epoch": 1.3755145399017394, "grad_norm": 1.1743865548501493, "learning_rate": 4.878817072756959e-06, "loss": 0.19460657238960266, "step": 5180 }, { "epoch": 1.3757801088832824, "grad_norm": 1.2528300475452, "learning_rate": 4.875046272568863e-06, "loss": 0.24833449721336365, "step": 5181 }, { "epoch": 1.3760456778648253, "grad_norm": 1.3263672125712147, "learning_rate": 4.871276460411403e-06, "loss": 0.2774161994457245, "step": 5182 }, { "epoch": 1.3763112468463683, "grad_norm": 2.6268834337513667, "learning_rate": 4.867507637011353e-06, "loss": 0.2277964949607849, "step": 5183 }, { "epoch": 1.3765768158279112, "grad_norm": 1.8924198767245841, "learning_rate": 4.863739803095299e-06, "loss": 0.2176733911037445, "step": 5184 }, { "epoch": 1.3768423848094542, "grad_norm": 1.3153810073025014, "learning_rate": 4.859972959389634e-06, "loss": 0.23529113829135895, "step": 5185 }, { "epoch": 1.3771079537909972, "grad_norm": 1.3909544444662505, "learning_rate": 4.856207106620557e-06, "loss": 0.2646695077419281, "step": 5186 }, { "epoch": 1.37737352277254, "grad_norm": 1.2095108180861869, "learning_rate": 4.852442245514093e-06, "loss": 0.23179873824119568, "step": 5187 }, { "epoch": 1.377639091754083, "grad_norm": 1.1084014698771758, "learning_rate": 4.84867837679605e-06, "loss": 0.2127494066953659, "step": 5188 }, { "epoch": 1.377904660735626, "grad_norm": 1.2275201950569183, "learning_rate": 4.844915501192062e-06, "loss": 0.2204679548740387, "step": 5189 }, { "epoch": 1.378170229717169, "grad_norm": 1.2078653060668294, "learning_rate": 4.841153619427567e-06, "loss": 0.20271794497966766, "step": 5190 }, { "epoch": 1.378435798698712, "grad_norm": 1.4269963155687142, "learning_rate": 4.837392732227811e-06, "loss": 0.2785792052745819, "step": 5191 }, { "epoch": 1.3787013676802549, "grad_norm": 1.2501319487764966, "learning_rate": 4.8336328403178486e-06, "loss": 0.24904468655586243, "step": 5192 }, { "epoch": 1.378966936661798, "grad_norm": 1.1230965332904321, "learning_rate": 4.829873944422544e-06, "loss": 0.20045346021652222, "step": 5193 }, { "epoch": 1.379232505643341, "grad_norm": 1.1339816903135191, "learning_rate": 4.826116045266565e-06, "loss": 0.21814313530921936, "step": 5194 }, { "epoch": 1.379498074624884, "grad_norm": 1.236126479276255, "learning_rate": 4.82235914357439e-06, "loss": 0.2408592253923416, "step": 5195 }, { "epoch": 1.379763643606427, "grad_norm": 1.1229995433845732, "learning_rate": 4.818603240070311e-06, "loss": 0.21453416347503662, "step": 5196 }, { "epoch": 1.3800292125879698, "grad_norm": 1.2915687788203387, "learning_rate": 4.814848335478418e-06, "loss": 0.2578599154949188, "step": 5197 }, { "epoch": 1.3802947815695128, "grad_norm": 1.0696662022967476, "learning_rate": 4.811094430522613e-06, "loss": 0.1980094015598297, "step": 5198 }, { "epoch": 1.3805603505510557, "grad_norm": 1.202740960535961, "learning_rate": 4.807341525926604e-06, "loss": 0.24620960652828217, "step": 5199 }, { "epoch": 1.3808259195325987, "grad_norm": 1.2486655803425535, "learning_rate": 4.803589622413908e-06, "loss": 0.23525282740592957, "step": 5200 }, { "epoch": 1.3810914885141417, "grad_norm": 1.1657735912575689, "learning_rate": 4.799838720707847e-06, "loss": 0.2277744859457016, "step": 5201 }, { "epoch": 1.3813570574956846, "grad_norm": 1.2927728942283212, "learning_rate": 4.796088821531549e-06, "loss": 0.2727074921131134, "step": 5202 }, { "epoch": 1.3816226264772276, "grad_norm": 1.2370931993726209, "learning_rate": 4.7923399256079525e-06, "loss": 0.21686753630638123, "step": 5203 }, { "epoch": 1.3818881954587705, "grad_norm": 1.2572583885252075, "learning_rate": 4.788592033659799e-06, "loss": 0.2841380834579468, "step": 5204 }, { "epoch": 1.3821537644403135, "grad_norm": 1.1157272204593003, "learning_rate": 4.78484514640964e-06, "loss": 0.24577853083610535, "step": 5205 }, { "epoch": 1.3824193334218564, "grad_norm": 1.2077705032221964, "learning_rate": 4.7810992645798285e-06, "loss": 0.22289782762527466, "step": 5206 }, { "epoch": 1.3826849024033994, "grad_norm": 1.1476107334002954, "learning_rate": 4.7773543888925274e-06, "loss": 0.2223999947309494, "step": 5207 }, { "epoch": 1.3829504713849423, "grad_norm": 1.2183085137487102, "learning_rate": 4.773610520069706e-06, "loss": 0.23938870429992676, "step": 5208 }, { "epoch": 1.3832160403664853, "grad_norm": 1.219370193725879, "learning_rate": 4.769867658833136e-06, "loss": 0.260856568813324, "step": 5209 }, { "epoch": 1.3834816093480282, "grad_norm": 1.2333269697463725, "learning_rate": 4.766125805904398e-06, "loss": 0.23602089285850525, "step": 5210 }, { "epoch": 1.3837471783295712, "grad_norm": 1.156747833138865, "learning_rate": 4.762384962004877e-06, "loss": 0.22543978691101074, "step": 5211 }, { "epoch": 1.3840127473111141, "grad_norm": 1.3639051201807257, "learning_rate": 4.758645127855763e-06, "loss": 0.2432224452495575, "step": 5212 }, { "epoch": 1.384278316292657, "grad_norm": 1.3947016936895973, "learning_rate": 4.754906304178049e-06, "loss": 0.22764597833156586, "step": 5213 }, { "epoch": 1.3845438852742, "grad_norm": 1.2064067504011344, "learning_rate": 4.751168491692541e-06, "loss": 0.22503387928009033, "step": 5214 }, { "epoch": 1.384809454255743, "grad_norm": 1.1066861130484609, "learning_rate": 4.747431691119846e-06, "loss": 0.21889932453632355, "step": 5215 }, { "epoch": 1.385075023237286, "grad_norm": 1.3903278318809302, "learning_rate": 4.743695903180372e-06, "loss": 0.2695825695991516, "step": 5216 }, { "epoch": 1.3853405922188289, "grad_norm": 1.2921759622470506, "learning_rate": 4.739961128594336e-06, "loss": 0.265118271112442, "step": 5217 }, { "epoch": 1.3856061612003718, "grad_norm": 1.1349207398090602, "learning_rate": 4.736227368081757e-06, "loss": 0.2050788253545761, "step": 5218 }, { "epoch": 1.3858717301819148, "grad_norm": 1.23951121142384, "learning_rate": 4.7324946223624625e-06, "loss": 0.274588406085968, "step": 5219 }, { "epoch": 1.3861372991634577, "grad_norm": 1.209560473571303, "learning_rate": 4.728762892156079e-06, "loss": 0.2242514044046402, "step": 5220 }, { "epoch": 1.3864028681450007, "grad_norm": 1.1337174836883812, "learning_rate": 4.725032178182042e-06, "loss": 0.19989261031150818, "step": 5221 }, { "epoch": 1.3866684371265436, "grad_norm": 1.1989339880554155, "learning_rate": 4.721302481159588e-06, "loss": 0.24409207701683044, "step": 5222 }, { "epoch": 1.3869340061080866, "grad_norm": 1.2425140627800753, "learning_rate": 4.71757380180776e-06, "loss": 0.25146353244781494, "step": 5223 }, { "epoch": 1.3871995750896295, "grad_norm": 1.245669068902739, "learning_rate": 4.713846140845401e-06, "loss": 0.23076622188091278, "step": 5224 }, { "epoch": 1.3874651440711725, "grad_norm": 1.1122357580396618, "learning_rate": 4.7101194989911635e-06, "loss": 0.2159188687801361, "step": 5225 }, { "epoch": 1.3877307130527154, "grad_norm": 1.433039209205417, "learning_rate": 4.706393876963497e-06, "loss": 0.24891307950019836, "step": 5226 }, { "epoch": 1.3879962820342584, "grad_norm": 1.2167285098476437, "learning_rate": 4.702669275480659e-06, "loss": 0.26254773139953613, "step": 5227 }, { "epoch": 1.3882618510158014, "grad_norm": 1.0872799599118763, "learning_rate": 4.698945695260709e-06, "loss": 0.19589121639728546, "step": 5228 }, { "epoch": 1.3885274199973443, "grad_norm": 1.273899860234835, "learning_rate": 4.695223137021509e-06, "loss": 0.23796147108078003, "step": 5229 }, { "epoch": 1.3887929889788873, "grad_norm": 1.1566738109261303, "learning_rate": 4.6915016014807235e-06, "loss": 0.21211156249046326, "step": 5230 }, { "epoch": 1.3890585579604302, "grad_norm": 1.1477189909918881, "learning_rate": 4.687781089355817e-06, "loss": 0.22418555617332458, "step": 5231 }, { "epoch": 1.3893241269419732, "grad_norm": 1.1999712861158167, "learning_rate": 4.68406160136407e-06, "loss": 0.24140511453151703, "step": 5232 }, { "epoch": 1.389589695923516, "grad_norm": 1.3515422291949701, "learning_rate": 4.68034313822255e-06, "loss": 0.2863473892211914, "step": 5233 }, { "epoch": 1.389855264905059, "grad_norm": 1.1002404477789451, "learning_rate": 4.676625700648133e-06, "loss": 0.21283546090126038, "step": 5234 }, { "epoch": 1.390120833886602, "grad_norm": 1.311958297113244, "learning_rate": 4.672909289357498e-06, "loss": 0.2701990008354187, "step": 5235 }, { "epoch": 1.390386402868145, "grad_norm": 1.1672674472381515, "learning_rate": 4.669193905067124e-06, "loss": 0.23807264864444733, "step": 5236 }, { "epoch": 1.390651971849688, "grad_norm": 1.3282268361230456, "learning_rate": 4.665479548493298e-06, "loss": 0.22204206883907318, "step": 5237 }, { "epoch": 1.3909175408312309, "grad_norm": 1.2590492281878678, "learning_rate": 4.661766220352098e-06, "loss": 0.22389569878578186, "step": 5238 }, { "epoch": 1.3911831098127738, "grad_norm": 1.2844920522393721, "learning_rate": 4.65805392135941e-06, "loss": 0.23752997815608978, "step": 5239 }, { "epoch": 1.3914486787943168, "grad_norm": 1.8677910056359206, "learning_rate": 4.654342652230921e-06, "loss": 0.24055880308151245, "step": 5240 }, { "epoch": 1.3917142477758597, "grad_norm": 1.2030621240735913, "learning_rate": 4.6506324136821255e-06, "loss": 0.22136151790618896, "step": 5241 }, { "epoch": 1.3919798167574027, "grad_norm": 1.299031121789001, "learning_rate": 4.646923206428311e-06, "loss": 0.2616429924964905, "step": 5242 }, { "epoch": 1.3922453857389456, "grad_norm": 1.218734267375269, "learning_rate": 4.643215031184569e-06, "loss": 0.24827662110328674, "step": 5243 }, { "epoch": 1.3925109547204886, "grad_norm": 1.3223478407487963, "learning_rate": 4.639507888665792e-06, "loss": 0.21999669075012207, "step": 5244 }, { "epoch": 1.3927765237020315, "grad_norm": 1.3241857590600639, "learning_rate": 4.6358017795866715e-06, "loss": 0.24511300027370453, "step": 5245 }, { "epoch": 1.3930420926835745, "grad_norm": 1.2459535025826622, "learning_rate": 4.632096704661704e-06, "loss": 0.2410753220319748, "step": 5246 }, { "epoch": 1.3933076616651174, "grad_norm": 1.157173292152249, "learning_rate": 4.628392664605184e-06, "loss": 0.2160021960735321, "step": 5247 }, { "epoch": 1.3935732306466604, "grad_norm": 1.2204303717623475, "learning_rate": 4.624689660131204e-06, "loss": 0.22672782838344574, "step": 5248 }, { "epoch": 1.3938387996282033, "grad_norm": 1.3056904555347544, "learning_rate": 4.620987691953659e-06, "loss": 0.25474926829338074, "step": 5249 }, { "epoch": 1.3941043686097463, "grad_norm": 1.3078938706976893, "learning_rate": 4.617286760786252e-06, "loss": 0.2449323832988739, "step": 5250 }, { "epoch": 1.3943699375912892, "grad_norm": 1.4350253205296164, "learning_rate": 4.613586867342473e-06, "loss": 0.23727643489837646, "step": 5251 }, { "epoch": 1.3946355065728322, "grad_norm": 1.492440797106639, "learning_rate": 4.609888012335624e-06, "loss": 0.23727962374687195, "step": 5252 }, { "epoch": 1.3949010755543751, "grad_norm": 1.1595482332609377, "learning_rate": 4.60619019647879e-06, "loss": 0.21957805752754211, "step": 5253 }, { "epoch": 1.395166644535918, "grad_norm": 1.1972608851584254, "learning_rate": 4.6024934204848745e-06, "loss": 0.24184471368789673, "step": 5254 }, { "epoch": 1.395432213517461, "grad_norm": 1.2654091836286674, "learning_rate": 4.598797685066568e-06, "loss": 0.239216148853302, "step": 5255 }, { "epoch": 1.395697782499004, "grad_norm": 1.1503034311319646, "learning_rate": 4.595102990936367e-06, "loss": 0.17741018533706665, "step": 5256 }, { "epoch": 1.395963351480547, "grad_norm": 1.2669115039567294, "learning_rate": 4.591409338806566e-06, "loss": 0.26139867305755615, "step": 5257 }, { "epoch": 1.39622892046209, "grad_norm": 1.1295627244433792, "learning_rate": 4.587716729389251e-06, "loss": 0.23689255118370056, "step": 5258 }, { "epoch": 1.3964944894436329, "grad_norm": 1.3449494333614898, "learning_rate": 4.584025163396323e-06, "loss": 0.22679267823696136, "step": 5259 }, { "epoch": 1.3967600584251758, "grad_norm": 1.4665032620533849, "learning_rate": 4.580334641539467e-06, "loss": 0.2743435204029083, "step": 5260 }, { "epoch": 1.3970256274067188, "grad_norm": 1.166091966014122, "learning_rate": 4.5766451645301735e-06, "loss": 0.22738990187644958, "step": 5261 }, { "epoch": 1.3972911963882617, "grad_norm": 1.2398512539901747, "learning_rate": 4.57295673307973e-06, "loss": 0.24826082587242126, "step": 5262 }, { "epoch": 1.3975567653698049, "grad_norm": 1.2172880570038314, "learning_rate": 4.569269347899222e-06, "loss": 0.23121042549610138, "step": 5263 }, { "epoch": 1.3978223343513478, "grad_norm": 2.1881918032824443, "learning_rate": 4.5655830096995345e-06, "loss": 0.21382957696914673, "step": 5264 }, { "epoch": 1.3980879033328908, "grad_norm": 1.6700623666107715, "learning_rate": 4.561897719191349e-06, "loss": 0.24439184367656708, "step": 5265 }, { "epoch": 1.3983534723144337, "grad_norm": 1.1734120938371422, "learning_rate": 4.558213477085148e-06, "loss": 0.2106003314256668, "step": 5266 }, { "epoch": 1.3986190412959767, "grad_norm": 1.568387486793487, "learning_rate": 4.554530284091209e-06, "loss": 0.3073291480541229, "step": 5267 }, { "epoch": 1.3988846102775196, "grad_norm": 1.226744359266016, "learning_rate": 4.550848140919606e-06, "loss": 0.2448226660490036, "step": 5268 }, { "epoch": 1.3991501792590626, "grad_norm": 1.4434974870419186, "learning_rate": 4.5471670482802165e-06, "loss": 0.25378671288490295, "step": 5269 }, { "epoch": 1.3994157482406056, "grad_norm": 1.243366792714921, "learning_rate": 4.5434870068827086e-06, "loss": 0.2735089659690857, "step": 5270 }, { "epoch": 1.3996813172221485, "grad_norm": 1.3983115308066707, "learning_rate": 4.539808017436552e-06, "loss": 0.2530548870563507, "step": 5271 }, { "epoch": 1.3999468862036915, "grad_norm": 1.2566722493021396, "learning_rate": 4.536130080651015e-06, "loss": 0.23692254722118378, "step": 5272 }, { "epoch": 1.4002124551852344, "grad_norm": 1.257120121799197, "learning_rate": 4.532453197235155e-06, "loss": 0.24554882943630219, "step": 5273 }, { "epoch": 1.4004780241667774, "grad_norm": 1.2106096425654094, "learning_rate": 4.528777367897837e-06, "loss": 0.20152084529399872, "step": 5274 }, { "epoch": 1.4007435931483203, "grad_norm": 1.207683737630722, "learning_rate": 4.525102593347714e-06, "loss": 0.20908965170383453, "step": 5275 }, { "epoch": 1.4010091621298633, "grad_norm": 1.2398706056963738, "learning_rate": 4.521428874293238e-06, "loss": 0.23158209025859833, "step": 5276 }, { "epoch": 1.4012747311114062, "grad_norm": 1.2494835342931663, "learning_rate": 4.517756211442664e-06, "loss": 0.2483675330877304, "step": 5277 }, { "epoch": 1.4015403000929492, "grad_norm": 1.1662936164598174, "learning_rate": 4.514084605504035e-06, "loss": 0.23435397446155548, "step": 5278 }, { "epoch": 1.4018058690744921, "grad_norm": 1.242534131664269, "learning_rate": 4.510414057185195e-06, "loss": 0.2605316936969757, "step": 5279 }, { "epoch": 1.402071438056035, "grad_norm": 1.148911142729499, "learning_rate": 4.506744567193782e-06, "loss": 0.2279929518699646, "step": 5280 }, { "epoch": 1.402337007037578, "grad_norm": 1.1849060379752767, "learning_rate": 4.503076136237228e-06, "loss": 0.23011639714241028, "step": 5281 }, { "epoch": 1.402602576019121, "grad_norm": 1.1735153050753564, "learning_rate": 4.499408765022765e-06, "loss": 0.213611900806427, "step": 5282 }, { "epoch": 1.402868145000664, "grad_norm": 1.3225078215525052, "learning_rate": 4.495742454257418e-06, "loss": 0.25555503368377686, "step": 5283 }, { "epoch": 1.4031337139822069, "grad_norm": 1.331030123703595, "learning_rate": 4.4920772046480095e-06, "loss": 0.2694614827632904, "step": 5284 }, { "epoch": 1.4033992829637498, "grad_norm": 1.3958578164403037, "learning_rate": 4.4884130169011565e-06, "loss": 0.2160607874393463, "step": 5285 }, { "epoch": 1.4036648519452928, "grad_norm": 1.4996515147203022, "learning_rate": 4.48474989172327e-06, "loss": 0.2556128203868866, "step": 5286 }, { "epoch": 1.4039304209268357, "grad_norm": 1.2506403611380352, "learning_rate": 4.481087829820558e-06, "loss": 0.2251313328742981, "step": 5287 }, { "epoch": 1.4041959899083787, "grad_norm": 1.380992563161254, "learning_rate": 4.477426831899024e-06, "loss": 0.26856666803359985, "step": 5288 }, { "epoch": 1.4044615588899216, "grad_norm": 1.2429158128712894, "learning_rate": 4.473766898664464e-06, "loss": 0.25573840737342834, "step": 5289 }, { "epoch": 1.4047271278714646, "grad_norm": 1.2559748496125192, "learning_rate": 4.4701080308224685e-06, "loss": 0.26519301533699036, "step": 5290 }, { "epoch": 1.4049926968530075, "grad_norm": 1.5959863642176566, "learning_rate": 4.466450229078427e-06, "loss": 0.2329619824886322, "step": 5291 }, { "epoch": 1.4052582658345505, "grad_norm": 1.208485124140325, "learning_rate": 4.4627934941375185e-06, "loss": 0.2243901491165161, "step": 5292 }, { "epoch": 1.4055238348160934, "grad_norm": 1.2042065274178317, "learning_rate": 4.45913782670472e-06, "loss": 0.22516998648643494, "step": 5293 }, { "epoch": 1.4057894037976364, "grad_norm": 1.2427926273641645, "learning_rate": 4.455483227484796e-06, "loss": 0.25573113560676575, "step": 5294 }, { "epoch": 1.4060549727791793, "grad_norm": 1.3935629686917204, "learning_rate": 4.451829697182317e-06, "loss": 0.2568536698818207, "step": 5295 }, { "epoch": 1.4063205417607223, "grad_norm": 1.293797792298673, "learning_rate": 4.448177236501638e-06, "loss": 0.24510663747787476, "step": 5296 }, { "epoch": 1.4065861107422652, "grad_norm": 1.3445763390180965, "learning_rate": 4.444525846146911e-06, "loss": 0.24890470504760742, "step": 5297 }, { "epoch": 1.4068516797238082, "grad_norm": 1.3096169257052843, "learning_rate": 4.440875526822081e-06, "loss": 0.21442994475364685, "step": 5298 }, { "epoch": 1.4071172487053512, "grad_norm": 1.2628911672392604, "learning_rate": 4.437226279230884e-06, "loss": 0.24281370639801025, "step": 5299 }, { "epoch": 1.407382817686894, "grad_norm": 1.2336479145010515, "learning_rate": 4.433578104076853e-06, "loss": 0.19542500376701355, "step": 5300 }, { "epoch": 1.407648386668437, "grad_norm": 1.256359230599367, "learning_rate": 4.429931002063315e-06, "loss": 0.22688990831375122, "step": 5301 }, { "epoch": 1.40791395564998, "grad_norm": 1.3692436485711592, "learning_rate": 4.42628497389339e-06, "loss": 0.2520858347415924, "step": 5302 }, { "epoch": 1.408179524631523, "grad_norm": 1.1723697651028326, "learning_rate": 4.42264002026998e-06, "loss": 0.237991064786911, "step": 5303 }, { "epoch": 1.408445093613066, "grad_norm": 1.1277997255078087, "learning_rate": 4.418996141895797e-06, "loss": 0.20164436101913452, "step": 5304 }, { "epoch": 1.408710662594609, "grad_norm": 1.2657361694815492, "learning_rate": 4.415353339473338e-06, "loss": 0.24009189009666443, "step": 5305 }, { "epoch": 1.408976231576152, "grad_norm": 1.138145945953283, "learning_rate": 4.411711613704889e-06, "loss": 0.23170322179794312, "step": 5306 }, { "epoch": 1.409241800557695, "grad_norm": 1.2244077415708243, "learning_rate": 4.408070965292534e-06, "loss": 0.2280617356300354, "step": 5307 }, { "epoch": 1.409507369539238, "grad_norm": 1.2724409466040383, "learning_rate": 4.404431394938145e-06, "loss": 0.21982887387275696, "step": 5308 }, { "epoch": 1.409772938520781, "grad_norm": 1.265647410959733, "learning_rate": 4.40079290334339e-06, "loss": 0.25295430421829224, "step": 5309 }, { "epoch": 1.4100385075023238, "grad_norm": 1.1099961782761754, "learning_rate": 4.397155491209727e-06, "loss": 0.20109041035175323, "step": 5310 }, { "epoch": 1.4103040764838668, "grad_norm": 1.3436616824827443, "learning_rate": 4.393519159238405e-06, "loss": 0.2487715482711792, "step": 5311 }, { "epoch": 1.4105696454654097, "grad_norm": 1.1475311486694626, "learning_rate": 4.389883908130465e-06, "loss": 0.2031790167093277, "step": 5312 }, { "epoch": 1.4108352144469527, "grad_norm": 1.277969729475343, "learning_rate": 4.386249738586744e-06, "loss": 0.23029211163520813, "step": 5313 }, { "epoch": 1.4111007834284957, "grad_norm": 1.2100830863469687, "learning_rate": 4.382616651307866e-06, "loss": 0.23080995678901672, "step": 5314 }, { "epoch": 1.4113663524100386, "grad_norm": 1.2376227742095711, "learning_rate": 4.378984646994248e-06, "loss": 0.2450534999370575, "step": 5315 }, { "epoch": 1.4116319213915816, "grad_norm": 1.266655148641824, "learning_rate": 4.375353726346094e-06, "loss": 0.24349799752235413, "step": 5316 }, { "epoch": 1.4118974903731245, "grad_norm": 1.2696628766548714, "learning_rate": 4.371723890063411e-06, "loss": 0.2431599199771881, "step": 5317 }, { "epoch": 1.4121630593546675, "grad_norm": 1.3688178233929764, "learning_rate": 4.368095138845978e-06, "loss": 0.2051251232624054, "step": 5318 }, { "epoch": 1.4124286283362104, "grad_norm": 1.1726447102511934, "learning_rate": 4.36446747339338e-06, "loss": 0.21346575021743774, "step": 5319 }, { "epoch": 1.4126941973177534, "grad_norm": 1.2726406383058895, "learning_rate": 4.360840894404989e-06, "loss": 0.22193217277526855, "step": 5320 }, { "epoch": 1.4129597662992963, "grad_norm": 1.2762131056761095, "learning_rate": 4.357215402579961e-06, "loss": 0.2112501859664917, "step": 5321 }, { "epoch": 1.4132253352808393, "grad_norm": 1.1864412536946314, "learning_rate": 4.3535909986172565e-06, "loss": 0.2648766040802002, "step": 5322 }, { "epoch": 1.4134909042623822, "grad_norm": 1.1533413783243194, "learning_rate": 4.349967683215614e-06, "loss": 0.22139690816402435, "step": 5323 }, { "epoch": 1.4137564732439252, "grad_norm": 1.0259028802936685, "learning_rate": 4.346345457073568e-06, "loss": 0.21558481454849243, "step": 5324 }, { "epoch": 1.4140220422254681, "grad_norm": 1.2763949378052617, "learning_rate": 4.342724320889438e-06, "loss": 0.2013886272907257, "step": 5325 }, { "epoch": 1.414287611207011, "grad_norm": 1.2216640015824227, "learning_rate": 4.3391042753613375e-06, "loss": 0.2428729385137558, "step": 5326 }, { "epoch": 1.414553180188554, "grad_norm": 1.2385329501903242, "learning_rate": 4.3354853211871696e-06, "loss": 0.20930354297161102, "step": 5327 }, { "epoch": 1.414818749170097, "grad_norm": 1.1373474530618315, "learning_rate": 4.331867459064623e-06, "loss": 0.18988853693008423, "step": 5328 }, { "epoch": 1.41508431815164, "grad_norm": 1.2833653393491664, "learning_rate": 4.328250689691182e-06, "loss": 0.24618801474571228, "step": 5329 }, { "epoch": 1.4153498871331829, "grad_norm": 1.2635824567099267, "learning_rate": 4.324635013764113e-06, "loss": 0.23857265710830688, "step": 5330 }, { "epoch": 1.4156154561147258, "grad_norm": 1.3200622076177175, "learning_rate": 4.321020431980483e-06, "loss": 0.21869014203548431, "step": 5331 }, { "epoch": 1.4158810250962688, "grad_norm": 1.2317649692424293, "learning_rate": 4.317406945037138e-06, "loss": 0.2508969008922577, "step": 5332 }, { "epoch": 1.4161465940778117, "grad_norm": 1.2114692744130235, "learning_rate": 4.313794553630711e-06, "loss": 0.2406233549118042, "step": 5333 }, { "epoch": 1.4164121630593547, "grad_norm": 1.3314396378070763, "learning_rate": 4.310183258457632e-06, "loss": 0.2376224398612976, "step": 5334 }, { "epoch": 1.4166777320408976, "grad_norm": 1.4802475566731417, "learning_rate": 4.306573060214115e-06, "loss": 0.2818688750267029, "step": 5335 }, { "epoch": 1.4169433010224406, "grad_norm": 1.2248721858463099, "learning_rate": 4.302963959596165e-06, "loss": 0.2279777228832245, "step": 5336 }, { "epoch": 1.4172088700039835, "grad_norm": 1.3681495314955672, "learning_rate": 4.299355957299573e-06, "loss": 0.2652052640914917, "step": 5337 }, { "epoch": 1.4174744389855265, "grad_norm": 1.2814638931564002, "learning_rate": 4.2957490540199185e-06, "loss": 0.24415750801563263, "step": 5338 }, { "epoch": 1.4177400079670694, "grad_norm": 1.2028147011593575, "learning_rate": 4.292143250452569e-06, "loss": 0.2318287044763565, "step": 5339 }, { "epoch": 1.4180055769486124, "grad_norm": 1.1621443407054215, "learning_rate": 4.288538547292685e-06, "loss": 0.19914361834526062, "step": 5340 }, { "epoch": 1.4182711459301554, "grad_norm": 1.2533818722517012, "learning_rate": 4.2849349452352095e-06, "loss": 0.22550678253173828, "step": 5341 }, { "epoch": 1.4185367149116983, "grad_norm": 1.3481328868952585, "learning_rate": 4.281332444974874e-06, "loss": 0.25001436471939087, "step": 5342 }, { "epoch": 1.4188022838932413, "grad_norm": 1.2557895781680242, "learning_rate": 4.277731047206197e-06, "loss": 0.24873407185077667, "step": 5343 }, { "epoch": 1.4190678528747842, "grad_norm": 1.2532145662207181, "learning_rate": 4.274130752623487e-06, "loss": 0.25732600688934326, "step": 5344 }, { "epoch": 1.4193334218563272, "grad_norm": 1.1956499236331526, "learning_rate": 4.270531561920836e-06, "loss": 0.1894054263830185, "step": 5345 }, { "epoch": 1.4195989908378701, "grad_norm": 1.2861805940078326, "learning_rate": 4.2669334757921284e-06, "loss": 0.2632025480270386, "step": 5346 }, { "epoch": 1.419864559819413, "grad_norm": 1.1223708980675566, "learning_rate": 4.2633364949310315e-06, "loss": 0.22106415033340454, "step": 5347 }, { "epoch": 1.420130128800956, "grad_norm": 1.2191554963858982, "learning_rate": 4.259740620031e-06, "loss": 0.2246699184179306, "step": 5348 }, { "epoch": 1.420395697782499, "grad_norm": 1.2377251567235985, "learning_rate": 4.256145851785277e-06, "loss": 0.2335890382528305, "step": 5349 }, { "epoch": 1.420661266764042, "grad_norm": 1.3200881727026734, "learning_rate": 4.252552190886892e-06, "loss": 0.25485220551490784, "step": 5350 }, { "epoch": 1.4209268357455849, "grad_norm": 1.406483107573335, "learning_rate": 4.248959638028659e-06, "loss": 0.26234719157218933, "step": 5351 }, { "epoch": 1.4211924047271278, "grad_norm": 1.1946878328095272, "learning_rate": 4.245368193903181e-06, "loss": 0.22083795070648193, "step": 5352 }, { "epoch": 1.4214579737086708, "grad_norm": 1.288602079194267, "learning_rate": 4.241777859202846e-06, "loss": 0.1886332929134369, "step": 5353 }, { "epoch": 1.4217235426902137, "grad_norm": 1.506700165302322, "learning_rate": 4.238188634619826e-06, "loss": 0.26154160499572754, "step": 5354 }, { "epoch": 1.4219891116717567, "grad_norm": 1.1472960297751262, "learning_rate": 4.234600520846085e-06, "loss": 0.24761158227920532, "step": 5355 }, { "epoch": 1.4222546806532996, "grad_norm": 1.154393443673505, "learning_rate": 4.2310135185733625e-06, "loss": 0.20936736464500427, "step": 5356 }, { "epoch": 1.4225202496348426, "grad_norm": 1.15600424022186, "learning_rate": 4.227427628493198e-06, "loss": 0.2173127979040146, "step": 5357 }, { "epoch": 1.4227858186163855, "grad_norm": 1.217414245555098, "learning_rate": 4.223842851296907e-06, "loss": 0.2598559260368347, "step": 5358 }, { "epoch": 1.4230513875979285, "grad_norm": 1.224021391863692, "learning_rate": 4.22025918767559e-06, "loss": 0.23701196908950806, "step": 5359 }, { "epoch": 1.4233169565794714, "grad_norm": 1.2134140712383175, "learning_rate": 4.216676638320135e-06, "loss": 0.26052403450012207, "step": 5360 }, { "epoch": 1.4235825255610144, "grad_norm": 1.2465682642545985, "learning_rate": 4.213095203921217e-06, "loss": 0.2464584857225418, "step": 5361 }, { "epoch": 1.4238480945425573, "grad_norm": 1.2646547527576821, "learning_rate": 4.209514885169294e-06, "loss": 0.25889426469802856, "step": 5362 }, { "epoch": 1.4241136635241003, "grad_norm": 1.2990812156107416, "learning_rate": 4.2059356827546076e-06, "loss": 0.26529380679130554, "step": 5363 }, { "epoch": 1.4243792325056432, "grad_norm": 1.1509506747022789, "learning_rate": 4.202357597367187e-06, "loss": 0.2284630388021469, "step": 5364 }, { "epoch": 1.4246448014871862, "grad_norm": 1.1509689814009059, "learning_rate": 4.198780629696845e-06, "loss": 0.2361873984336853, "step": 5365 }, { "epoch": 1.4249103704687291, "grad_norm": 1.2489364054166838, "learning_rate": 4.195204780433179e-06, "loss": 0.2473624348640442, "step": 5366 }, { "epoch": 1.425175939450272, "grad_norm": 1.2584581044476912, "learning_rate": 4.19163005026557e-06, "loss": 0.24852773547172546, "step": 5367 }, { "epoch": 1.425441508431815, "grad_norm": 1.413523972125062, "learning_rate": 4.188056439883183e-06, "loss": 0.28409647941589355, "step": 5368 }, { "epoch": 1.425707077413358, "grad_norm": 1.2672381227374172, "learning_rate": 4.18448394997497e-06, "loss": 0.2500985562801361, "step": 5369 }, { "epoch": 1.425972646394901, "grad_norm": 1.2421534737421158, "learning_rate": 4.1809125812296635e-06, "loss": 0.23475977778434753, "step": 5370 }, { "epoch": 1.426238215376444, "grad_norm": 1.3107626948919207, "learning_rate": 4.177342334335782e-06, "loss": 0.22925345599651337, "step": 5371 }, { "epoch": 1.4265037843579869, "grad_norm": 1.1701714137905739, "learning_rate": 4.173773209981627e-06, "loss": 0.24463894963264465, "step": 5372 }, { "epoch": 1.4267693533395298, "grad_norm": 1.2600839330793319, "learning_rate": 4.170205208855281e-06, "loss": 0.2451590746641159, "step": 5373 }, { "epoch": 1.4270349223210728, "grad_norm": 1.192456234510782, "learning_rate": 4.166638331644613e-06, "loss": 0.21078437566757202, "step": 5374 }, { "epoch": 1.427300491302616, "grad_norm": 1.1548728286132999, "learning_rate": 4.163072579037279e-06, "loss": 0.21466529369354248, "step": 5375 }, { "epoch": 1.4275660602841589, "grad_norm": 1.3327200015078104, "learning_rate": 4.159507951720713e-06, "loss": 0.20103147625923157, "step": 5376 }, { "epoch": 1.4278316292657018, "grad_norm": 1.2634022835060015, "learning_rate": 4.15594445038213e-06, "loss": 0.2618871331214905, "step": 5377 }, { "epoch": 1.4280971982472448, "grad_norm": 1.314150540124243, "learning_rate": 4.152382075708534e-06, "loss": 0.2496388852596283, "step": 5378 }, { "epoch": 1.4283627672287877, "grad_norm": 1.2776066314767451, "learning_rate": 4.148820828386707e-06, "loss": 0.2663899064064026, "step": 5379 }, { "epoch": 1.4286283362103307, "grad_norm": 1.223751737565641, "learning_rate": 4.145260709103216e-06, "loss": 0.23617541790008545, "step": 5380 }, { "epoch": 1.4288939051918736, "grad_norm": 1.2184450229688006, "learning_rate": 4.141701718544411e-06, "loss": 0.200006365776062, "step": 5381 }, { "epoch": 1.4291594741734166, "grad_norm": 1.2899877428495155, "learning_rate": 4.138143857396425e-06, "loss": 0.22707203030586243, "step": 5382 }, { "epoch": 1.4294250431549596, "grad_norm": 1.210998695531734, "learning_rate": 4.134587126345162e-06, "loss": 0.23903624713420868, "step": 5383 }, { "epoch": 1.4296906121365025, "grad_norm": 1.56990305006701, "learning_rate": 4.131031526076329e-06, "loss": 0.2308908998966217, "step": 5384 }, { "epoch": 1.4299561811180455, "grad_norm": 1.2125776866133393, "learning_rate": 4.127477057275398e-06, "loss": 0.18762601912021637, "step": 5385 }, { "epoch": 1.4302217500995884, "grad_norm": 1.3670823879917342, "learning_rate": 4.123923720627633e-06, "loss": 0.281406044960022, "step": 5386 }, { "epoch": 1.4304873190811314, "grad_norm": 1.24677960623226, "learning_rate": 4.120371516818071e-06, "loss": 0.24858589470386505, "step": 5387 }, { "epoch": 1.4307528880626743, "grad_norm": 1.2017896897650255, "learning_rate": 4.116820446531538e-06, "loss": 0.22179371118545532, "step": 5388 }, { "epoch": 1.4310184570442173, "grad_norm": 1.1523445225939053, "learning_rate": 4.113270510452636e-06, "loss": 0.22086869180202484, "step": 5389 }, { "epoch": 1.4312840260257602, "grad_norm": 1.295626323300653, "learning_rate": 4.109721709265753e-06, "loss": 0.231503427028656, "step": 5390 }, { "epoch": 1.4315495950073032, "grad_norm": 1.31237620612278, "learning_rate": 4.106174043655054e-06, "loss": 0.255252867937088, "step": 5391 }, { "epoch": 1.4318151639888461, "grad_norm": 1.2773394357808008, "learning_rate": 4.1026275143044854e-06, "loss": 0.23336587846279144, "step": 5392 }, { "epoch": 1.432080732970389, "grad_norm": 1.3267952754600625, "learning_rate": 4.099082121897783e-06, "loss": 0.2468583881855011, "step": 5393 }, { "epoch": 1.432346301951932, "grad_norm": 1.2137255679394872, "learning_rate": 4.095537867118452e-06, "loss": 0.21211153268814087, "step": 5394 }, { "epoch": 1.432611870933475, "grad_norm": 1.2552061461264346, "learning_rate": 4.091994750649783e-06, "loss": 0.23173204064369202, "step": 5395 }, { "epoch": 1.432877439915018, "grad_norm": 1.2420339991667666, "learning_rate": 4.088452773174853e-06, "loss": 0.2606658935546875, "step": 5396 }, { "epoch": 1.4331430088965609, "grad_norm": 1.2141954954044303, "learning_rate": 4.084911935376502e-06, "loss": 0.21198314428329468, "step": 5397 }, { "epoch": 1.4334085778781038, "grad_norm": 1.273859413406427, "learning_rate": 4.08137223793737e-06, "loss": 0.216193288564682, "step": 5398 }, { "epoch": 1.4336741468596468, "grad_norm": 1.3862686522767422, "learning_rate": 4.077833681539866e-06, "loss": 0.27767330408096313, "step": 5399 }, { "epoch": 1.4339397158411897, "grad_norm": 1.193043888736233, "learning_rate": 4.0742962668661826e-06, "loss": 0.21584349870681763, "step": 5400 }, { "epoch": 1.4342052848227327, "grad_norm": 1.2801175216615184, "learning_rate": 4.070759994598288e-06, "loss": 0.220070481300354, "step": 5401 }, { "epoch": 1.4344708538042756, "grad_norm": 1.4276288870785, "learning_rate": 4.067224865417941e-06, "loss": 0.26035353541374207, "step": 5402 }, { "epoch": 1.4347364227858186, "grad_norm": 1.1784144309393945, "learning_rate": 4.063690880006671e-06, "loss": 0.23704876005649567, "step": 5403 }, { "epoch": 1.4350019917673615, "grad_norm": 1.2793709287846655, "learning_rate": 4.060158039045785e-06, "loss": 0.2345760464668274, "step": 5404 }, { "epoch": 1.4352675607489045, "grad_norm": 1.2583985201804126, "learning_rate": 4.056626343216377e-06, "loss": 0.21307331323623657, "step": 5405 }, { "epoch": 1.4355331297304474, "grad_norm": 1.2401804894465362, "learning_rate": 4.053095793199313e-06, "loss": 0.22029465436935425, "step": 5406 }, { "epoch": 1.4357986987119904, "grad_norm": 1.3865770800537958, "learning_rate": 4.049566389675244e-06, "loss": 0.23419252038002014, "step": 5407 }, { "epoch": 1.4360642676935333, "grad_norm": 1.2114754283066453, "learning_rate": 4.046038133324595e-06, "loss": 0.21648669242858887, "step": 5408 }, { "epoch": 1.4363298366750763, "grad_norm": 1.3682353450989566, "learning_rate": 4.042511024827573e-06, "loss": 0.2343464195728302, "step": 5409 }, { "epoch": 1.4365954056566193, "grad_norm": 1.28417678054491, "learning_rate": 4.0389850648641615e-06, "loss": 0.20108605921268463, "step": 5410 }, { "epoch": 1.4368609746381622, "grad_norm": 1.2806759093192033, "learning_rate": 4.0354602541141315e-06, "loss": 0.21885806322097778, "step": 5411 }, { "epoch": 1.4371265436197052, "grad_norm": 1.276580988371958, "learning_rate": 4.031936593257017e-06, "loss": 0.2382376492023468, "step": 5412 }, { "epoch": 1.437392112601248, "grad_norm": 1.1333519329501958, "learning_rate": 4.028414082972141e-06, "loss": 0.21434128284454346, "step": 5413 }, { "epoch": 1.437657681582791, "grad_norm": 1.2161992893188567, "learning_rate": 4.024892723938601e-06, "loss": 0.2345191240310669, "step": 5414 }, { "epoch": 1.437923250564334, "grad_norm": 1.309666461481554, "learning_rate": 4.021372516835273e-06, "loss": 0.2478899210691452, "step": 5415 }, { "epoch": 1.438188819545877, "grad_norm": 1.2593045594203824, "learning_rate": 4.017853462340813e-06, "loss": 0.21356827020645142, "step": 5416 }, { "epoch": 1.4384543885274201, "grad_norm": 1.3891493537034765, "learning_rate": 4.014335561133652e-06, "loss": 0.26329827308654785, "step": 5417 }, { "epoch": 1.438719957508963, "grad_norm": 1.3689872343615141, "learning_rate": 4.010818813892e-06, "loss": 0.25880998373031616, "step": 5418 }, { "epoch": 1.438985526490506, "grad_norm": 1.2738388972586026, "learning_rate": 4.007303221293844e-06, "loss": 0.22749441862106323, "step": 5419 }, { "epoch": 1.439251095472049, "grad_norm": 1.2267331489472144, "learning_rate": 4.00378878401695e-06, "loss": 0.2242615520954132, "step": 5420 }, { "epoch": 1.439516664453592, "grad_norm": 1.168704950265394, "learning_rate": 4.000275502738862e-06, "loss": 0.19751839339733124, "step": 5421 }, { "epoch": 1.439782233435135, "grad_norm": 1.4000090999513362, "learning_rate": 3.996763378136895e-06, "loss": 0.27319905161857605, "step": 5422 }, { "epoch": 1.4400478024166778, "grad_norm": 1.1483039760635705, "learning_rate": 3.993252410888149e-06, "loss": 0.21676769852638245, "step": 5423 }, { "epoch": 1.4403133713982208, "grad_norm": 1.222649759682682, "learning_rate": 3.989742601669494e-06, "loss": 0.22788718342781067, "step": 5424 }, { "epoch": 1.4405789403797638, "grad_norm": 1.1800102666876688, "learning_rate": 3.986233951157581e-06, "loss": 0.23224875330924988, "step": 5425 }, { "epoch": 1.4408445093613067, "grad_norm": 1.3242271211713557, "learning_rate": 3.982726460028836e-06, "loss": 0.23625247180461884, "step": 5426 }, { "epoch": 1.4411100783428497, "grad_norm": 1.237043381628487, "learning_rate": 3.979220128959463e-06, "loss": 0.2092093527317047, "step": 5427 }, { "epoch": 1.4413756473243926, "grad_norm": 1.164989095324882, "learning_rate": 3.975714958625442e-06, "loss": 0.22196070849895477, "step": 5428 }, { "epoch": 1.4416412163059356, "grad_norm": 1.248575755705502, "learning_rate": 3.972210949702525e-06, "loss": 0.21276375651359558, "step": 5429 }, { "epoch": 1.4419067852874785, "grad_norm": 1.2714203744447936, "learning_rate": 3.968708102866247e-06, "loss": 0.22150103747844696, "step": 5430 }, { "epoch": 1.4421723542690215, "grad_norm": 1.2519929176778726, "learning_rate": 3.965206418791914e-06, "loss": 0.24529573321342468, "step": 5431 }, { "epoch": 1.4424379232505644, "grad_norm": 1.3331662749929607, "learning_rate": 3.961705898154609e-06, "loss": 0.24349135160446167, "step": 5432 }, { "epoch": 1.4427034922321074, "grad_norm": 1.3094668545917496, "learning_rate": 3.9582065416291926e-06, "loss": 0.23481428623199463, "step": 5433 }, { "epoch": 1.4429690612136503, "grad_norm": 1.2664431166747565, "learning_rate": 3.954708349890299e-06, "loss": 0.2366936057806015, "step": 5434 }, { "epoch": 1.4432346301951933, "grad_norm": 1.2699903819491114, "learning_rate": 3.951211323612336e-06, "loss": 0.24792322516441345, "step": 5435 }, { "epoch": 1.4435001991767362, "grad_norm": 1.1943208090894295, "learning_rate": 3.947715463469493e-06, "loss": 0.22601652145385742, "step": 5436 }, { "epoch": 1.4437657681582792, "grad_norm": 1.1333130191791405, "learning_rate": 3.9442207701357235e-06, "loss": 0.19603165984153748, "step": 5437 }, { "epoch": 1.4440313371398221, "grad_norm": 1.26512939224431, "learning_rate": 3.940727244284772e-06, "loss": 0.22619353234767914, "step": 5438 }, { "epoch": 1.444296906121365, "grad_norm": 1.3207139711857465, "learning_rate": 3.937234886590146e-06, "loss": 0.24836638569831848, "step": 5439 }, { "epoch": 1.444562475102908, "grad_norm": 1.2114237797025103, "learning_rate": 3.933743697725129e-06, "loss": 0.21585768461227417, "step": 5440 }, { "epoch": 1.444828044084451, "grad_norm": 1.2037953387653635, "learning_rate": 3.930253678362784e-06, "loss": 0.20876167714595795, "step": 5441 }, { "epoch": 1.445093613065994, "grad_norm": 1.2825218153573943, "learning_rate": 3.926764829175943e-06, "loss": 0.24337999522686005, "step": 5442 }, { "epoch": 1.4453591820475369, "grad_norm": 1.2238662957767994, "learning_rate": 3.9232771508372155e-06, "loss": 0.2511219084262848, "step": 5443 }, { "epoch": 1.4456247510290798, "grad_norm": 1.2796769482653771, "learning_rate": 3.919790644018986e-06, "loss": 0.26257213950157166, "step": 5444 }, { "epoch": 1.4458903200106228, "grad_norm": 1.3570371082898334, "learning_rate": 3.91630530939341e-06, "loss": 0.2720959782600403, "step": 5445 }, { "epoch": 1.4461558889921657, "grad_norm": 1.2897968589877258, "learning_rate": 3.912821147632421e-06, "loss": 0.23849177360534668, "step": 5446 }, { "epoch": 1.4464214579737087, "grad_norm": 1.2539273982781811, "learning_rate": 3.909338159407722e-06, "loss": 0.2366214245557785, "step": 5447 }, { "epoch": 1.4466870269552516, "grad_norm": 1.21348130376658, "learning_rate": 3.905856345390793e-06, "loss": 0.21905584633350372, "step": 5448 }, { "epoch": 1.4469525959367946, "grad_norm": 1.3001423574977207, "learning_rate": 3.902375706252887e-06, "loss": 0.23964065313339233, "step": 5449 }, { "epoch": 1.4472181649183375, "grad_norm": 1.2161208716702177, "learning_rate": 3.89889624266503e-06, "loss": 0.22246500849723816, "step": 5450 }, { "epoch": 1.4474837338998805, "grad_norm": 1.2845367508241097, "learning_rate": 3.895417955298022e-06, "loss": 0.22980710864067078, "step": 5451 }, { "epoch": 1.4477493028814234, "grad_norm": 1.4690832477509688, "learning_rate": 3.8919408448224346e-06, "loss": 0.21276253461837769, "step": 5452 }, { "epoch": 1.4480148718629664, "grad_norm": 1.3515036942552143, "learning_rate": 3.888464911908616e-06, "loss": 0.23925542831420898, "step": 5453 }, { "epoch": 1.4482804408445094, "grad_norm": 1.1871457723177183, "learning_rate": 3.884990157226683e-06, "loss": 0.21528369188308716, "step": 5454 }, { "epoch": 1.4485460098260523, "grad_norm": 1.2673056278722348, "learning_rate": 3.8815165814465235e-06, "loss": 0.24563542008399963, "step": 5455 }, { "epoch": 1.4488115788075953, "grad_norm": 1.2561210989748839, "learning_rate": 3.87804418523781e-06, "loss": 0.2721150517463684, "step": 5456 }, { "epoch": 1.4490771477891382, "grad_norm": 1.3721328159682122, "learning_rate": 3.874572969269976e-06, "loss": 0.23716527223587036, "step": 5457 }, { "epoch": 1.4493427167706812, "grad_norm": 1.5185790933002854, "learning_rate": 3.871102934212231e-06, "loss": 0.2182254046201706, "step": 5458 }, { "epoch": 1.4496082857522241, "grad_norm": 1.233204842662738, "learning_rate": 3.867634080733557e-06, "loss": 0.2179020643234253, "step": 5459 }, { "epoch": 1.449873854733767, "grad_norm": 1.2633976965193632, "learning_rate": 3.864166409502706e-06, "loss": 0.22901684045791626, "step": 5460 }, { "epoch": 1.45013942371531, "grad_norm": 1.209132482684757, "learning_rate": 3.860699921188211e-06, "loss": 0.2287352979183197, "step": 5461 }, { "epoch": 1.450404992696853, "grad_norm": 1.214494370780124, "learning_rate": 3.85723461645836e-06, "loss": 0.2448873668909073, "step": 5462 }, { "epoch": 1.450670561678396, "grad_norm": 1.323933009108344, "learning_rate": 3.85377049598123e-06, "loss": 0.2693510055541992, "step": 5463 }, { "epoch": 1.4509361306599389, "grad_norm": 1.1826355120377283, "learning_rate": 3.8503075604246554e-06, "loss": 0.25414884090423584, "step": 5464 }, { "epoch": 1.4512016996414818, "grad_norm": 1.3400776704302024, "learning_rate": 3.846845810456258e-06, "loss": 0.27798837423324585, "step": 5465 }, { "epoch": 1.4514672686230248, "grad_norm": 1.3109571985733361, "learning_rate": 3.8433852467434175e-06, "loss": 0.23348593711853027, "step": 5466 }, { "epoch": 1.4517328376045677, "grad_norm": 1.148921292979252, "learning_rate": 3.839925869953292e-06, "loss": 0.20993635058403015, "step": 5467 }, { "epoch": 1.4519984065861107, "grad_norm": 1.1967150813107374, "learning_rate": 3.836467680752808e-06, "loss": 0.225263774394989, "step": 5468 }, { "epoch": 1.4522639755676536, "grad_norm": 4.549069881323283, "learning_rate": 3.833010679808662e-06, "loss": 0.2481595277786255, "step": 5469 }, { "epoch": 1.4525295445491966, "grad_norm": 1.098861894900169, "learning_rate": 3.829554867787324e-06, "loss": 0.20755310356616974, "step": 5470 }, { "epoch": 1.4527951135307395, "grad_norm": 1.3031978879220207, "learning_rate": 3.826100245355034e-06, "loss": 0.22124455869197845, "step": 5471 }, { "epoch": 1.4530606825122825, "grad_norm": 1.1779333046553406, "learning_rate": 3.822646813177803e-06, "loss": 0.23461398482322693, "step": 5472 }, { "epoch": 1.4533262514938254, "grad_norm": 1.123494857736561, "learning_rate": 3.819194571921407e-06, "loss": 0.22890526056289673, "step": 5473 }, { "epoch": 1.4535918204753684, "grad_norm": 1.1163449125196687, "learning_rate": 3.815743522251406e-06, "loss": 0.23236533999443054, "step": 5474 }, { "epoch": 1.4538573894569113, "grad_norm": 1.204733497516731, "learning_rate": 3.8122936648331164e-06, "loss": 0.2192365825176239, "step": 5475 }, { "epoch": 1.4541229584384543, "grad_norm": 1.3061324350348682, "learning_rate": 3.8088450003316346e-06, "loss": 0.23970162868499756, "step": 5476 }, { "epoch": 1.4543885274199972, "grad_norm": 1.256131451943752, "learning_rate": 3.8053975294118163e-06, "loss": 0.24270984530448914, "step": 5477 }, { "epoch": 1.4546540964015402, "grad_norm": 1.1616491435133687, "learning_rate": 3.801951252738295e-06, "loss": 0.22228944301605225, "step": 5478 }, { "epoch": 1.4549196653830831, "grad_norm": 1.2998939083384287, "learning_rate": 3.7985061709754735e-06, "loss": 0.25029584765434265, "step": 5479 }, { "epoch": 1.455185234364626, "grad_norm": 1.1546196330858232, "learning_rate": 3.795062284787522e-06, "loss": 0.23831725120544434, "step": 5480 }, { "epoch": 1.455450803346169, "grad_norm": 1.2698177511587796, "learning_rate": 3.7916195948383817e-06, "loss": 0.2571605145931244, "step": 5481 }, { "epoch": 1.455716372327712, "grad_norm": 1.4321109332673951, "learning_rate": 3.7881781017917586e-06, "loss": 0.2660857141017914, "step": 5482 }, { "epoch": 1.455981941309255, "grad_norm": 1.3406733437493707, "learning_rate": 3.7847378063111394e-06, "loss": 0.2468302845954895, "step": 5483 }, { "epoch": 1.456247510290798, "grad_norm": 1.363296358111954, "learning_rate": 3.7812987090597696e-06, "loss": 0.2559482753276825, "step": 5484 }, { "epoch": 1.4565130792723409, "grad_norm": 1.2144737578388247, "learning_rate": 3.7778608107006654e-06, "loss": 0.24484393000602722, "step": 5485 }, { "epoch": 1.4567786482538838, "grad_norm": 1.1782087302857855, "learning_rate": 3.774424111896614e-06, "loss": 0.2376541644334793, "step": 5486 }, { "epoch": 1.4570442172354268, "grad_norm": 1.1748479481028287, "learning_rate": 3.770988613310169e-06, "loss": 0.22265875339508057, "step": 5487 }, { "epoch": 1.45730978621697, "grad_norm": 1.2316185421612622, "learning_rate": 3.7675543156036555e-06, "loss": 0.2511552572250366, "step": 5488 }, { "epoch": 1.457575355198513, "grad_norm": 1.2601957381413438, "learning_rate": 3.764121219439165e-06, "loss": 0.2412843108177185, "step": 5489 }, { "epoch": 1.4578409241800558, "grad_norm": 1.2622123015546969, "learning_rate": 3.760689325478559e-06, "loss": 0.26342809200286865, "step": 5490 }, { "epoch": 1.4581064931615988, "grad_norm": 1.2994089172948287, "learning_rate": 3.7572586343834638e-06, "loss": 0.23315641283988953, "step": 5491 }, { "epoch": 1.4583720621431417, "grad_norm": 1.0927170518216454, "learning_rate": 3.753829146815279e-06, "loss": 0.24148929119110107, "step": 5492 }, { "epoch": 1.4586376311246847, "grad_norm": 1.363697618202234, "learning_rate": 3.750400863435166e-06, "loss": 0.22838115692138672, "step": 5493 }, { "epoch": 1.4589032001062276, "grad_norm": 1.2083898158968958, "learning_rate": 3.746973784904061e-06, "loss": 0.21669608354568481, "step": 5494 }, { "epoch": 1.4591687690877706, "grad_norm": 1.4819576271076944, "learning_rate": 3.743547911882662e-06, "loss": 0.25619322061538696, "step": 5495 }, { "epoch": 1.4594343380693136, "grad_norm": 1.2058542987095502, "learning_rate": 3.7401232450314384e-06, "loss": 0.23629480600357056, "step": 5496 }, { "epoch": 1.4596999070508565, "grad_norm": 1.189438722154431, "learning_rate": 3.7366997850106245e-06, "loss": 0.21799582242965698, "step": 5497 }, { "epoch": 1.4599654760323995, "grad_norm": 1.372571579127378, "learning_rate": 3.733277532480223e-06, "loss": 0.2582590579986572, "step": 5498 }, { "epoch": 1.4602310450139424, "grad_norm": 1.1675281771435806, "learning_rate": 3.729856488100003e-06, "loss": 0.23641736805438995, "step": 5499 }, { "epoch": 1.4604966139954854, "grad_norm": 1.3024331747300109, "learning_rate": 3.7264366525295e-06, "loss": 0.24150417745113373, "step": 5500 }, { "epoch": 1.4607621829770283, "grad_norm": 1.2012687985267718, "learning_rate": 3.7230180264280245e-06, "loss": 0.2474009394645691, "step": 5501 }, { "epoch": 1.4610277519585713, "grad_norm": 1.3411668359609863, "learning_rate": 3.7196006104546435e-06, "loss": 0.269604355096817, "step": 5502 }, { "epoch": 1.4612933209401142, "grad_norm": 1.3014753471077654, "learning_rate": 3.716184405268194e-06, "loss": 0.24324679374694824, "step": 5503 }, { "epoch": 1.4615588899216572, "grad_norm": 1.1306865007600708, "learning_rate": 3.7127694115272805e-06, "loss": 0.2249709963798523, "step": 5504 }, { "epoch": 1.4618244589032001, "grad_norm": 1.2915165646779034, "learning_rate": 3.7093556298902734e-06, "loss": 0.2560918629169464, "step": 5505 }, { "epoch": 1.462090027884743, "grad_norm": 1.154084739271703, "learning_rate": 3.705943061015309e-06, "loss": 0.22693020105361938, "step": 5506 }, { "epoch": 1.462355596866286, "grad_norm": 1.2640727525169442, "learning_rate": 3.702531705560292e-06, "loss": 0.2617371678352356, "step": 5507 }, { "epoch": 1.462621165847829, "grad_norm": 1.2561844307954502, "learning_rate": 3.6991215641828903e-06, "loss": 0.2314397394657135, "step": 5508 }, { "epoch": 1.462886734829372, "grad_norm": 1.1063207547372251, "learning_rate": 3.6957126375405383e-06, "loss": 0.23186162114143372, "step": 5509 }, { "epoch": 1.4631523038109149, "grad_norm": 1.2602306615156422, "learning_rate": 3.6923049262904375e-06, "loss": 0.21775083243846893, "step": 5510 }, { "epoch": 1.4634178727924578, "grad_norm": 1.2619669881473867, "learning_rate": 3.688898431089556e-06, "loss": 0.24707889556884766, "step": 5511 }, { "epoch": 1.4636834417740008, "grad_norm": 1.0923805026421214, "learning_rate": 3.6854931525946237e-06, "loss": 0.1941150575876236, "step": 5512 }, { "epoch": 1.4639490107555437, "grad_norm": 1.0123090946182933, "learning_rate": 3.6820890914621376e-06, "loss": 0.17808857560157776, "step": 5513 }, { "epoch": 1.4642145797370867, "grad_norm": 1.2139965705715394, "learning_rate": 3.678686248348363e-06, "loss": 0.2150077074766159, "step": 5514 }, { "epoch": 1.4644801487186296, "grad_norm": 1.4267562521267494, "learning_rate": 3.6752846239093276e-06, "loss": 0.2605292797088623, "step": 5515 }, { "epoch": 1.4647457177001726, "grad_norm": 1.202920213288267, "learning_rate": 3.671884218800822e-06, "loss": 0.22481867671012878, "step": 5516 }, { "epoch": 1.4650112866817155, "grad_norm": 5.588780783186036, "learning_rate": 3.668485033678406e-06, "loss": 0.24453294277191162, "step": 5517 }, { "epoch": 1.4652768556632585, "grad_norm": 1.379432138271627, "learning_rate": 3.6650870691973996e-06, "loss": 0.2672286033630371, "step": 5518 }, { "epoch": 1.4655424246448014, "grad_norm": 1.2625747265975353, "learning_rate": 3.661690326012897e-06, "loss": 0.2514987587928772, "step": 5519 }, { "epoch": 1.4658079936263444, "grad_norm": 1.3337549906693908, "learning_rate": 3.6582948047797438e-06, "loss": 0.25671514868736267, "step": 5520 }, { "epoch": 1.4660735626078873, "grad_norm": 1.3535247420304835, "learning_rate": 3.654900506152561e-06, "loss": 0.25485602021217346, "step": 5521 }, { "epoch": 1.4663391315894303, "grad_norm": 1.1813027271086827, "learning_rate": 3.6515074307857257e-06, "loss": 0.23556292057037354, "step": 5522 }, { "epoch": 1.4666047005709733, "grad_norm": 1.15604598759747, "learning_rate": 3.6481155793333855e-06, "loss": 0.23347696661949158, "step": 5523 }, { "epoch": 1.4668702695525162, "grad_norm": 1.218328581124676, "learning_rate": 3.6447249524494466e-06, "loss": 0.2405884712934494, "step": 5524 }, { "epoch": 1.4671358385340592, "grad_norm": 1.2423110513745568, "learning_rate": 3.6413355507875845e-06, "loss": 0.23668336868286133, "step": 5525 }, { "epoch": 1.467401407515602, "grad_norm": 1.207526661238473, "learning_rate": 3.6379473750012375e-06, "loss": 0.25534945726394653, "step": 5526 }, { "epoch": 1.467666976497145, "grad_norm": 1.267472887202726, "learning_rate": 3.634560425743596e-06, "loss": 0.22227410972118378, "step": 5527 }, { "epoch": 1.467932545478688, "grad_norm": 1.4853214348875312, "learning_rate": 3.631174703667636e-06, "loss": 0.23395927250385284, "step": 5528 }, { "epoch": 1.468198114460231, "grad_norm": 1.2396534638298151, "learning_rate": 3.6277902094260785e-06, "loss": 0.23419208824634552, "step": 5529 }, { "epoch": 1.4684636834417741, "grad_norm": 1.3441597355302621, "learning_rate": 3.6244069436714158e-06, "loss": 0.22185654938220978, "step": 5530 }, { "epoch": 1.468729252423317, "grad_norm": 1.2489989202798994, "learning_rate": 3.621024907055901e-06, "loss": 0.2705134153366089, "step": 5531 }, { "epoch": 1.46899482140486, "grad_norm": 1.23195362246657, "learning_rate": 3.617644100231551e-06, "loss": 0.23426109552383423, "step": 5532 }, { "epoch": 1.469260390386403, "grad_norm": 1.2477206941188708, "learning_rate": 3.6142645238501462e-06, "loss": 0.25527146458625793, "step": 5533 }, { "epoch": 1.469525959367946, "grad_norm": 1.1030456616341389, "learning_rate": 3.610886178563228e-06, "loss": 0.1882668435573578, "step": 5534 }, { "epoch": 1.469791528349489, "grad_norm": 1.2622509171219458, "learning_rate": 3.607509065022101e-06, "loss": 0.24060532450675964, "step": 5535 }, { "epoch": 1.4700570973310318, "grad_norm": 1.2245038712856335, "learning_rate": 3.6041331838778325e-06, "loss": 0.23555803298950195, "step": 5536 }, { "epoch": 1.4703226663125748, "grad_norm": 1.2192798079575136, "learning_rate": 3.6007585357812557e-06, "loss": 0.23126551508903503, "step": 5537 }, { "epoch": 1.4705882352941178, "grad_norm": 1.139497037450913, "learning_rate": 3.597385121382961e-06, "loss": 0.24203836917877197, "step": 5538 }, { "epoch": 1.4708538042756607, "grad_norm": 1.2467383616518404, "learning_rate": 3.5940129413333046e-06, "loss": 0.239767923951149, "step": 5539 }, { "epoch": 1.4711193732572037, "grad_norm": 1.158137574546163, "learning_rate": 3.5906419962824002e-06, "loss": 0.24732957780361176, "step": 5540 }, { "epoch": 1.4713849422387466, "grad_norm": 1.2722296085836442, "learning_rate": 3.587272286880131e-06, "loss": 0.2296421229839325, "step": 5541 }, { "epoch": 1.4716505112202896, "grad_norm": 1.2453973567418024, "learning_rate": 3.583903813776132e-06, "loss": 0.2339775711297989, "step": 5542 }, { "epoch": 1.4719160802018325, "grad_norm": 1.194940832073201, "learning_rate": 3.5805365776198052e-06, "loss": 0.230351984500885, "step": 5543 }, { "epoch": 1.4721816491833755, "grad_norm": 1.2792126719917591, "learning_rate": 3.5771705790603163e-06, "loss": 0.2501414716243744, "step": 5544 }, { "epoch": 1.4724472181649184, "grad_norm": 1.2327284472179139, "learning_rate": 3.5738058187465864e-06, "loss": 0.23387153446674347, "step": 5545 }, { "epoch": 1.4727127871464614, "grad_norm": 1.2921618045206031, "learning_rate": 3.570442297327307e-06, "loss": 0.23874594271183014, "step": 5546 }, { "epoch": 1.4729783561280043, "grad_norm": 1.2841826918754735, "learning_rate": 3.5670800154509245e-06, "loss": 0.21867451071739197, "step": 5547 }, { "epoch": 1.4732439251095473, "grad_norm": 1.2937830650411482, "learning_rate": 3.563718973765644e-06, "loss": 0.24124100804328918, "step": 5548 }, { "epoch": 1.4735094940910902, "grad_norm": 1.2156419794246578, "learning_rate": 3.5603591729194377e-06, "loss": 0.22185327112674713, "step": 5549 }, { "epoch": 1.4737750630726332, "grad_norm": 1.1571779294098303, "learning_rate": 3.5570006135600345e-06, "loss": 0.21193793416023254, "step": 5550 }, { "epoch": 1.4740406320541761, "grad_norm": 1.3939617841899903, "learning_rate": 3.553643296334924e-06, "loss": 0.2615143656730652, "step": 5551 }, { "epoch": 1.474306201035719, "grad_norm": 1.1936451275051074, "learning_rate": 3.5502872218913597e-06, "loss": 0.24937541782855988, "step": 5552 }, { "epoch": 1.474571770017262, "grad_norm": 1.0736225386439564, "learning_rate": 3.5469323908763507e-06, "loss": 0.22849224507808685, "step": 5553 }, { "epoch": 1.474837338998805, "grad_norm": 1.6488166459783042, "learning_rate": 3.5435788039366657e-06, "loss": 0.2209717333316803, "step": 5554 }, { "epoch": 1.475102907980348, "grad_norm": 1.2992665215674652, "learning_rate": 3.5402264617188453e-06, "loss": 0.2529235780239105, "step": 5555 }, { "epoch": 1.4753684769618909, "grad_norm": 1.2133685762997675, "learning_rate": 3.536875364869181e-06, "loss": 0.2045450657606125, "step": 5556 }, { "epoch": 1.4756340459434338, "grad_norm": 1.0591536248970717, "learning_rate": 3.5335255140337167e-06, "loss": 0.1973644196987152, "step": 5557 }, { "epoch": 1.4758996149249768, "grad_norm": 1.3059187006673687, "learning_rate": 3.5301769098582685e-06, "loss": 0.27417299151420593, "step": 5558 }, { "epoch": 1.4761651839065197, "grad_norm": 1.2500382678843112, "learning_rate": 3.5268295529884077e-06, "loss": 0.24541756510734558, "step": 5559 }, { "epoch": 1.4764307528880627, "grad_norm": 1.4461383875060436, "learning_rate": 3.5234834440694655e-06, "loss": 0.25785958766937256, "step": 5560 }, { "epoch": 1.4766963218696056, "grad_norm": 1.1676448271023605, "learning_rate": 3.5201385837465307e-06, "loss": 0.21099212765693665, "step": 5561 }, { "epoch": 1.4769618908511486, "grad_norm": 1.1787333048605453, "learning_rate": 3.5167949726644545e-06, "loss": 0.26023173332214355, "step": 5562 }, { "epoch": 1.4772274598326915, "grad_norm": 1.6670162101301063, "learning_rate": 3.5134526114678426e-06, "loss": 0.22882963716983795, "step": 5563 }, { "epoch": 1.4774930288142345, "grad_norm": 1.312450944331431, "learning_rate": 3.5101115008010677e-06, "loss": 0.21987251937389374, "step": 5564 }, { "epoch": 1.4777585977957775, "grad_norm": 1.163985983495263, "learning_rate": 3.506771641308255e-06, "loss": 0.2169610857963562, "step": 5565 }, { "epoch": 1.4780241667773204, "grad_norm": 4.440133890295746, "learning_rate": 3.50343303363329e-06, "loss": 0.22723034024238586, "step": 5566 }, { "epoch": 1.4782897357588634, "grad_norm": 1.2392064660120468, "learning_rate": 3.5000956784198157e-06, "loss": 0.23738276958465576, "step": 5567 }, { "epoch": 1.4785553047404063, "grad_norm": 1.1818266174210303, "learning_rate": 3.496759576311235e-06, "loss": 0.19922251999378204, "step": 5568 }, { "epoch": 1.4788208737219493, "grad_norm": 1.294067668946831, "learning_rate": 3.4934247279507092e-06, "loss": 0.22529268264770508, "step": 5569 }, { "epoch": 1.4790864427034922, "grad_norm": 1.3551359298814187, "learning_rate": 3.4900911339811583e-06, "loss": 0.26758015155792236, "step": 5570 }, { "epoch": 1.4793520116850352, "grad_norm": 1.2627897957153122, "learning_rate": 3.48675879504526e-06, "loss": 0.24752648174762726, "step": 5571 }, { "epoch": 1.4796175806665781, "grad_norm": 1.3085621441307098, "learning_rate": 3.483427711785449e-06, "loss": 0.25337618589401245, "step": 5572 }, { "epoch": 1.479883149648121, "grad_norm": 1.3543288061594618, "learning_rate": 3.480097884843919e-06, "loss": 0.24504786729812622, "step": 5573 }, { "epoch": 1.480148718629664, "grad_norm": 1.1750849317955903, "learning_rate": 3.4767693148626223e-06, "loss": 0.21255145967006683, "step": 5574 }, { "epoch": 1.480414287611207, "grad_norm": 1.2853041773936769, "learning_rate": 3.473442002483267e-06, "loss": 0.2501891553401947, "step": 5575 }, { "epoch": 1.48067985659275, "grad_norm": 1.195974425335747, "learning_rate": 3.4701159483473202e-06, "loss": 0.25276634097099304, "step": 5576 }, { "epoch": 1.4809454255742929, "grad_norm": 1.427206116406706, "learning_rate": 3.4667911530960052e-06, "loss": 0.2760567367076874, "step": 5577 }, { "epoch": 1.4812109945558358, "grad_norm": 1.2442739080424003, "learning_rate": 3.463467617370305e-06, "loss": 0.22686481475830078, "step": 5578 }, { "epoch": 1.4814765635373788, "grad_norm": 1.2374194002920247, "learning_rate": 3.4601453418109554e-06, "loss": 0.23262599110603333, "step": 5579 }, { "epoch": 1.4817421325189217, "grad_norm": 1.2263890428702933, "learning_rate": 3.4568243270584545e-06, "loss": 0.22231365740299225, "step": 5580 }, { "epoch": 1.4820077015004647, "grad_norm": 1.2193067799394695, "learning_rate": 3.4535045737530504e-06, "loss": 0.22237855195999146, "step": 5581 }, { "epoch": 1.4822732704820076, "grad_norm": 1.208437884817879, "learning_rate": 3.4501860825347587e-06, "loss": 0.2260412871837616, "step": 5582 }, { "epoch": 1.4825388394635506, "grad_norm": 1.3488909026023506, "learning_rate": 3.4468688540433425e-06, "loss": 0.2133496105670929, "step": 5583 }, { "epoch": 1.4828044084450935, "grad_norm": 1.231358912436915, "learning_rate": 3.4435528889183245e-06, "loss": 0.24750375747680664, "step": 5584 }, { "epoch": 1.4830699774266365, "grad_norm": 1.2053641188090713, "learning_rate": 3.440238187798983e-06, "loss": 0.23673412203788757, "step": 5585 }, { "epoch": 1.4833355464081794, "grad_norm": 1.312048381493266, "learning_rate": 3.436924751324354e-06, "loss": 0.2505243420600891, "step": 5586 }, { "epoch": 1.4836011153897224, "grad_norm": 1.2769153596955758, "learning_rate": 3.433612580133229e-06, "loss": 0.276151180267334, "step": 5587 }, { "epoch": 1.4838666843712653, "grad_norm": 1.0245497892529305, "learning_rate": 3.430301674864154e-06, "loss": 0.1756816953420639, "step": 5588 }, { "epoch": 1.4841322533528083, "grad_norm": 1.2667973514811224, "learning_rate": 3.4269920361554342e-06, "loss": 0.25901898741722107, "step": 5589 }, { "epoch": 1.4843978223343512, "grad_norm": 1.2034260428652863, "learning_rate": 3.4236836646451286e-06, "loss": 0.21196085214614868, "step": 5590 }, { "epoch": 1.4846633913158942, "grad_norm": 1.2887221468811698, "learning_rate": 3.4203765609710525e-06, "loss": 0.24153128266334534, "step": 5591 }, { "epoch": 1.4849289602974372, "grad_norm": 1.2285562462634616, "learning_rate": 3.4170707257707757e-06, "loss": 0.25715887546539307, "step": 5592 }, { "epoch": 1.48519452927898, "grad_norm": 1.430212837200284, "learning_rate": 3.413766159681624e-06, "loss": 0.2920379042625427, "step": 5593 }, { "epoch": 1.485460098260523, "grad_norm": 1.2173970332611068, "learning_rate": 3.41046286334068e-06, "loss": 0.22127456963062286, "step": 5594 }, { "epoch": 1.485725667242066, "grad_norm": 1.2534339617557788, "learning_rate": 3.4071608373847786e-06, "loss": 0.23103584349155426, "step": 5595 }, { "epoch": 1.485991236223609, "grad_norm": 1.2999427041349472, "learning_rate": 3.403860082450513e-06, "loss": 0.29068222641944885, "step": 5596 }, { "epoch": 1.486256805205152, "grad_norm": 1.2532608064541852, "learning_rate": 3.4005605991742296e-06, "loss": 0.23703888058662415, "step": 5597 }, { "epoch": 1.4865223741866949, "grad_norm": 1.4039489349034764, "learning_rate": 3.3972623881920296e-06, "loss": 0.23348261415958405, "step": 5598 }, { "epoch": 1.4867879431682378, "grad_norm": 1.1603139615742908, "learning_rate": 3.3939654501397645e-06, "loss": 0.24733223021030426, "step": 5599 }, { "epoch": 1.487053512149781, "grad_norm": 1.1220204153088178, "learning_rate": 3.3906697856530548e-06, "loss": 0.22576835751533508, "step": 5600 }, { "epoch": 1.487319081131324, "grad_norm": 1.1809335952834177, "learning_rate": 3.3873753953672593e-06, "loss": 0.20863527059555054, "step": 5601 }, { "epoch": 1.487584650112867, "grad_norm": 1.1823379745083873, "learning_rate": 3.384082279917499e-06, "loss": 0.2299712598323822, "step": 5602 }, { "epoch": 1.4878502190944098, "grad_norm": 1.1858521746021262, "learning_rate": 3.380790439938648e-06, "loss": 0.23058944940567017, "step": 5603 }, { "epoch": 1.4881157880759528, "grad_norm": 1.1304663814123712, "learning_rate": 3.3774998760653344e-06, "loss": 0.20307201147079468, "step": 5604 }, { "epoch": 1.4883813570574957, "grad_norm": 1.112411027996001, "learning_rate": 3.3742105889319388e-06, "loss": 0.2296266108751297, "step": 5605 }, { "epoch": 1.4886469260390387, "grad_norm": 1.3206442060716181, "learning_rate": 3.370922579172601e-06, "loss": 0.22702309489250183, "step": 5606 }, { "epoch": 1.4889124950205816, "grad_norm": 1.4590848907033545, "learning_rate": 3.3676358474212035e-06, "loss": 0.30432331562042236, "step": 5607 }, { "epoch": 1.4891780640021246, "grad_norm": 1.201356120373459, "learning_rate": 3.3643503943113907e-06, "loss": 0.2488052248954773, "step": 5608 }, { "epoch": 1.4894436329836676, "grad_norm": 1.2096846483257637, "learning_rate": 3.361066220476564e-06, "loss": 0.2221754938364029, "step": 5609 }, { "epoch": 1.4897092019652105, "grad_norm": 1.289556223007011, "learning_rate": 3.3577833265498728e-06, "loss": 0.2547761797904968, "step": 5610 }, { "epoch": 1.4899747709467535, "grad_norm": 1.3306628367975963, "learning_rate": 3.3545017131642164e-06, "loss": 0.21811938285827637, "step": 5611 }, { "epoch": 1.4902403399282964, "grad_norm": 1.4022029015386877, "learning_rate": 3.3512213809522554e-06, "loss": 0.30436158180236816, "step": 5612 }, { "epoch": 1.4905059089098394, "grad_norm": 1.2224150283856856, "learning_rate": 3.3479423305463953e-06, "loss": 0.2053622156381607, "step": 5613 }, { "epoch": 1.4907714778913823, "grad_norm": 1.3026832238379669, "learning_rate": 3.344664562578801e-06, "loss": 0.2017601728439331, "step": 5614 }, { "epoch": 1.4910370468729253, "grad_norm": 1.2856046275416113, "learning_rate": 3.341388077681387e-06, "loss": 0.23668046295642853, "step": 5615 }, { "epoch": 1.4913026158544682, "grad_norm": 1.1460002150937032, "learning_rate": 3.338112876485821e-06, "loss": 0.20016951858997345, "step": 5616 }, { "epoch": 1.4915681848360112, "grad_norm": 1.3606548245166536, "learning_rate": 3.3348389596235177e-06, "loss": 0.25477850437164307, "step": 5617 }, { "epoch": 1.4918337538175541, "grad_norm": 1.2758175160721472, "learning_rate": 3.3315663277256594e-06, "loss": 0.24063366651535034, "step": 5618 }, { "epoch": 1.492099322799097, "grad_norm": 1.2737128535751616, "learning_rate": 3.328294981423165e-06, "loss": 0.23443251848220825, "step": 5619 }, { "epoch": 1.49236489178064, "grad_norm": 1.1580169148577781, "learning_rate": 3.325024921346717e-06, "loss": 0.21191264688968658, "step": 5620 }, { "epoch": 1.492630460762183, "grad_norm": 1.213323558189925, "learning_rate": 3.3217561481267367e-06, "loss": 0.22062326967716217, "step": 5621 }, { "epoch": 1.492896029743726, "grad_norm": 1.1757529457487401, "learning_rate": 3.318488662393409e-06, "loss": 0.2235480695962906, "step": 5622 }, { "epoch": 1.4931615987252689, "grad_norm": 1.2611472240425432, "learning_rate": 3.315222464776665e-06, "loss": 0.26665517687797546, "step": 5623 }, { "epoch": 1.4934271677068118, "grad_norm": 1.270220596773442, "learning_rate": 3.3119575559061902e-06, "loss": 0.24300602078437805, "step": 5624 }, { "epoch": 1.4936927366883548, "grad_norm": 1.2622444254847978, "learning_rate": 3.308693936411421e-06, "loss": 0.25441884994506836, "step": 5625 }, { "epoch": 1.4939583056698977, "grad_norm": 1.2781695234171213, "learning_rate": 3.3054316069215407e-06, "loss": 0.23236152529716492, "step": 5626 }, { "epoch": 1.4942238746514407, "grad_norm": 1.2299113342509724, "learning_rate": 3.3021705680654946e-06, "loss": 0.24535568058490753, "step": 5627 }, { "epoch": 1.4944894436329836, "grad_norm": 1.3635919919461823, "learning_rate": 3.29891082047197e-06, "loss": 0.2542986273765564, "step": 5628 }, { "epoch": 1.4947550126145266, "grad_norm": 1.3442816383357798, "learning_rate": 3.295652364769407e-06, "loss": 0.26490268111228943, "step": 5629 }, { "epoch": 1.4950205815960695, "grad_norm": 1.2455944135633985, "learning_rate": 3.292395201585997e-06, "loss": 0.25576913356781006, "step": 5630 }, { "epoch": 1.4952861505776125, "grad_norm": 1.321982811797117, "learning_rate": 3.2891393315496846e-06, "loss": 0.2930823266506195, "step": 5631 }, { "epoch": 1.4955517195591554, "grad_norm": 1.3029577245101889, "learning_rate": 3.285884755288161e-06, "loss": 0.2426074892282486, "step": 5632 }, { "epoch": 1.4958172885406984, "grad_norm": 1.1912484566122454, "learning_rate": 3.2826314734288713e-06, "loss": 0.24090878665447235, "step": 5633 }, { "epoch": 1.4960828575222413, "grad_norm": 1.291391881665867, "learning_rate": 3.2793794865990092e-06, "loss": 0.26155173778533936, "step": 5634 }, { "epoch": 1.4963484265037843, "grad_norm": 1.2581171617638447, "learning_rate": 3.2761287954255195e-06, "loss": 0.2594009041786194, "step": 5635 }, { "epoch": 1.4966139954853273, "grad_norm": 1.248912763921314, "learning_rate": 3.2728794005350972e-06, "loss": 0.24434763193130493, "step": 5636 }, { "epoch": 1.4968795644668702, "grad_norm": 1.3459414061970596, "learning_rate": 3.269631302554188e-06, "loss": 0.2622208297252655, "step": 5637 }, { "epoch": 1.4971451334484132, "grad_norm": 1.2222057610309294, "learning_rate": 3.266384502108987e-06, "loss": 0.18913154304027557, "step": 5638 }, { "epoch": 1.497410702429956, "grad_norm": 1.260519406868159, "learning_rate": 3.263138999825437e-06, "loss": 0.2610907554626465, "step": 5639 }, { "epoch": 1.497676271411499, "grad_norm": 1.2585537664404678, "learning_rate": 3.2598947963292337e-06, "loss": 0.25841569900512695, "step": 5640 }, { "epoch": 1.497941840393042, "grad_norm": 1.1680179490188496, "learning_rate": 3.256651892245822e-06, "loss": 0.2066381573677063, "step": 5641 }, { "epoch": 1.4982074093745852, "grad_norm": 1.1877407935219242, "learning_rate": 3.253410288200396e-06, "loss": 0.23956719040870667, "step": 5642 }, { "epoch": 1.4984729783561281, "grad_norm": 1.1996406642135662, "learning_rate": 3.250169984817897e-06, "loss": 0.23999394476413727, "step": 5643 }, { "epoch": 1.498738547337671, "grad_norm": 1.4056134439986134, "learning_rate": 3.2469309827230156e-06, "loss": 0.24273940920829773, "step": 5644 }, { "epoch": 1.499004116319214, "grad_norm": 1.193555704549332, "learning_rate": 3.2436932825401977e-06, "loss": 0.2212621569633484, "step": 5645 }, { "epoch": 1.499269685300757, "grad_norm": 1.293874995027958, "learning_rate": 3.2404568848936325e-06, "loss": 0.2487148940563202, "step": 5646 }, { "epoch": 1.4995352542823, "grad_norm": 1.2610121684030642, "learning_rate": 3.237221790407259e-06, "loss": 0.29314422607421875, "step": 5647 }, { "epoch": 1.499800823263843, "grad_norm": 1.1765702458871505, "learning_rate": 3.233987999704763e-06, "loss": 0.22727417945861816, "step": 5648 }, { "epoch": 1.5000663922453858, "grad_norm": 1.1578089091098656, "learning_rate": 3.230755513409585e-06, "loss": 0.18877442181110382, "step": 5649 }, { "epoch": 1.5003319612269288, "grad_norm": 1.2855274132536632, "learning_rate": 3.2275243321449068e-06, "loss": 0.2504552900791168, "step": 5650 }, { "epoch": 1.5005975302084718, "grad_norm": 1.1905373910388852, "learning_rate": 3.224294456533663e-06, "loss": 0.23579174280166626, "step": 5651 }, { "epoch": 1.5008630991900147, "grad_norm": 1.3692203179408873, "learning_rate": 3.221065887198537e-06, "loss": 0.29236793518066406, "step": 5652 }, { "epoch": 1.5011286681715577, "grad_norm": 1.3245217175369617, "learning_rate": 3.2178386247619577e-06, "loss": 0.2735568881034851, "step": 5653 }, { "epoch": 1.5013942371531006, "grad_norm": 1.240462888838021, "learning_rate": 3.214612669846103e-06, "loss": 0.2391616702079773, "step": 5654 }, { "epoch": 1.5016598061346436, "grad_norm": 1.3766117264936455, "learning_rate": 3.2113880230729e-06, "loss": 0.24532485008239746, "step": 5655 }, { "epoch": 1.5019253751161865, "grad_norm": 1.3310069624279295, "learning_rate": 3.2081646850640215e-06, "loss": 0.2605767250061035, "step": 5656 }, { "epoch": 1.5021909440977295, "grad_norm": 1.2109489933208193, "learning_rate": 3.2049426564408893e-06, "loss": 0.2651350200176239, "step": 5657 }, { "epoch": 1.5024565130792724, "grad_norm": 1.3305800775425032, "learning_rate": 3.2017219378246734e-06, "loss": 0.2719389498233795, "step": 5658 }, { "epoch": 1.5027220820608154, "grad_norm": 1.2359239723239188, "learning_rate": 3.198502529836288e-06, "loss": 0.23077815771102905, "step": 5659 }, { "epoch": 1.5029876510423583, "grad_norm": 1.0838054114896152, "learning_rate": 3.1952844330964007e-06, "loss": 0.21954959630966187, "step": 5660 }, { "epoch": 1.5032532200239013, "grad_norm": 1.3480229773492907, "learning_rate": 3.1920676482254186e-06, "loss": 0.28229185938835144, "step": 5661 }, { "epoch": 1.5035187890054442, "grad_norm": 1.2587796771658648, "learning_rate": 3.1888521758435e-06, "loss": 0.24612295627593994, "step": 5662 }, { "epoch": 1.5037843579869872, "grad_norm": 1.2649379995915024, "learning_rate": 3.185638016570555e-06, "loss": 0.24191413819789886, "step": 5663 }, { "epoch": 1.5040499269685301, "grad_norm": 1.225446339219085, "learning_rate": 3.1824251710262323e-06, "loss": 0.2427935004234314, "step": 5664 }, { "epoch": 1.504315495950073, "grad_norm": 1.2595635392757376, "learning_rate": 3.17921363982993e-06, "loss": 0.2600318193435669, "step": 5665 }, { "epoch": 1.504581064931616, "grad_norm": 1.2817020254494476, "learning_rate": 3.1760034236007954e-06, "loss": 0.25215205550193787, "step": 5666 }, { "epoch": 1.504846633913159, "grad_norm": 1.2568573714231897, "learning_rate": 3.1727945229577183e-06, "loss": 0.24460548162460327, "step": 5667 }, { "epoch": 1.505112202894702, "grad_norm": 1.2881955251422392, "learning_rate": 3.169586938519338e-06, "loss": 0.2812577486038208, "step": 5668 }, { "epoch": 1.5053777718762449, "grad_norm": 1.1272225605105841, "learning_rate": 3.166380670904039e-06, "loss": 0.23297616839408875, "step": 5669 }, { "epoch": 1.5056433408577878, "grad_norm": 1.1954331932042688, "learning_rate": 3.163175720729954e-06, "loss": 0.21659572422504425, "step": 5670 }, { "epoch": 1.5059089098393308, "grad_norm": 1.2142230208725098, "learning_rate": 3.1599720886149508e-06, "loss": 0.22246181964874268, "step": 5671 }, { "epoch": 1.5061744788208737, "grad_norm": 1.132636194795227, "learning_rate": 3.1567697751766624e-06, "loss": 0.20020918548107147, "step": 5672 }, { "epoch": 1.5064400478024167, "grad_norm": 1.363041735701654, "learning_rate": 3.1535687810324523e-06, "loss": 0.25693628191947937, "step": 5673 }, { "epoch": 1.5067056167839596, "grad_norm": 1.5250673507385644, "learning_rate": 3.150369106799436e-06, "loss": 0.21841923892498016, "step": 5674 }, { "epoch": 1.5069711857655026, "grad_norm": 1.1710254495806258, "learning_rate": 3.1471707530944707e-06, "loss": 0.18131780624389648, "step": 5675 }, { "epoch": 1.5072367547470455, "grad_norm": 1.180596749481675, "learning_rate": 3.143973720534164e-06, "loss": 0.22510449588298798, "step": 5676 }, { "epoch": 1.5075023237285885, "grad_norm": 1.3952546557365002, "learning_rate": 3.1407780097348627e-06, "loss": 0.23721462488174438, "step": 5677 }, { "epoch": 1.5077678927101315, "grad_norm": 1.2200574848273704, "learning_rate": 3.1375836213126653e-06, "loss": 0.24281899631023407, "step": 5678 }, { "epoch": 1.5080334616916744, "grad_norm": 1.3211068465604292, "learning_rate": 3.134390555883412e-06, "loss": 0.23910081386566162, "step": 5679 }, { "epoch": 1.5082990306732174, "grad_norm": 1.357027881520108, "learning_rate": 3.1311988140626825e-06, "loss": 0.2635132670402527, "step": 5680 }, { "epoch": 1.5085645996547603, "grad_norm": 1.239638674575543, "learning_rate": 3.1280083964658147e-06, "loss": 0.24802634119987488, "step": 5681 }, { "epoch": 1.5088301686363033, "grad_norm": 1.3861680174510138, "learning_rate": 3.1248193037078823e-06, "loss": 0.24081437289714813, "step": 5682 }, { "epoch": 1.5090957376178462, "grad_norm": 1.2124748227090532, "learning_rate": 3.121631536403701e-06, "loss": 0.19550001621246338, "step": 5683 }, { "epoch": 1.5093613065993892, "grad_norm": 1.309177755877421, "learning_rate": 3.118445095167837e-06, "loss": 0.2397807538509369, "step": 5684 }, { "epoch": 1.5096268755809321, "grad_norm": 1.2243819490197418, "learning_rate": 3.115259980614602e-06, "loss": 0.2185651659965515, "step": 5685 }, { "epoch": 1.509892444562475, "grad_norm": 1.2555724014592389, "learning_rate": 3.1120761933580414e-06, "loss": 0.22214055061340332, "step": 5686 }, { "epoch": 1.510158013544018, "grad_norm": 1.4127254863789025, "learning_rate": 3.108893734011955e-06, "loss": 0.23971091210842133, "step": 5687 }, { "epoch": 1.510423582525561, "grad_norm": 1.3331222718828735, "learning_rate": 3.1057126031898843e-06, "loss": 0.26458197832107544, "step": 5688 }, { "epoch": 1.510689151507104, "grad_norm": 1.3487790050882777, "learning_rate": 3.1025328015051093e-06, "loss": 0.23730339109897614, "step": 5689 }, { "epoch": 1.5109547204886469, "grad_norm": 1.2964784198979393, "learning_rate": 3.0993543295706653e-06, "loss": 0.21981677412986755, "step": 5690 }, { "epoch": 1.5112202894701898, "grad_norm": 1.1812817656913812, "learning_rate": 3.0961771879993206e-06, "loss": 0.21984878182411194, "step": 5691 }, { "epoch": 1.5114858584517328, "grad_norm": 1.2732802047873515, "learning_rate": 3.093001377403592e-06, "loss": 0.23086440563201904, "step": 5692 }, { "epoch": 1.5117514274332757, "grad_norm": 2.3681680891314953, "learning_rate": 3.0898268983957368e-06, "loss": 0.2355024814605713, "step": 5693 }, { "epoch": 1.5120169964148187, "grad_norm": 1.3061363772251866, "learning_rate": 3.0866537515877584e-06, "loss": 0.21210229396820068, "step": 5694 }, { "epoch": 1.5122825653963616, "grad_norm": 1.3436771657394675, "learning_rate": 3.0834819375914003e-06, "loss": 0.2387622594833374, "step": 5695 }, { "epoch": 1.5125481343779046, "grad_norm": 1.3482258979232278, "learning_rate": 3.0803114570181527e-06, "loss": 0.23822402954101562, "step": 5696 }, { "epoch": 1.5128137033594475, "grad_norm": 1.3248058910768958, "learning_rate": 3.0771423104792454e-06, "loss": 0.26844173669815063, "step": 5697 }, { "epoch": 1.5130792723409905, "grad_norm": 1.2131778927640824, "learning_rate": 3.07397449858565e-06, "loss": 0.23288767039775848, "step": 5698 }, { "epoch": 1.5133448413225334, "grad_norm": 1.2716046597052009, "learning_rate": 3.0708080219480896e-06, "loss": 0.23273086547851562, "step": 5699 }, { "epoch": 1.5136104103040764, "grad_norm": 1.4240236624695346, "learning_rate": 3.067642881177023e-06, "loss": 0.2505509555339813, "step": 5700 }, { "epoch": 1.5138759792856193, "grad_norm": 1.1441752919653974, "learning_rate": 3.0644790768826473e-06, "loss": 0.22801508009433746, "step": 5701 }, { "epoch": 1.5141415482671623, "grad_norm": 1.1462347465841034, "learning_rate": 3.061316609674908e-06, "loss": 0.2110593169927597, "step": 5702 }, { "epoch": 1.5144071172487052, "grad_norm": 1.2145033288630525, "learning_rate": 3.0581554801634927e-06, "loss": 0.22201795876026154, "step": 5703 }, { "epoch": 1.5146726862302482, "grad_norm": 1.2993896506173446, "learning_rate": 3.054995688957829e-06, "loss": 0.23104460537433624, "step": 5704 }, { "epoch": 1.5149382552117912, "grad_norm": 1.5590161841107484, "learning_rate": 3.0518372366670877e-06, "loss": 0.23373261094093323, "step": 5705 }, { "epoch": 1.515203824193334, "grad_norm": 1.368121139637646, "learning_rate": 3.0486801239001806e-06, "loss": 0.2404957264661789, "step": 5706 }, { "epoch": 1.515469393174877, "grad_norm": 1.2346548477581518, "learning_rate": 3.0455243512657606e-06, "loss": 0.23209382593631744, "step": 5707 }, { "epoch": 1.51573496215642, "grad_norm": 1.156984368318911, "learning_rate": 3.042369919372228e-06, "loss": 0.218237042427063, "step": 5708 }, { "epoch": 1.516000531137963, "grad_norm": 12.380411974697722, "learning_rate": 3.039216828827717e-06, "loss": 0.25025027990341187, "step": 5709 }, { "epoch": 1.516266100119506, "grad_norm": 1.3454644235463973, "learning_rate": 3.036065080240106e-06, "loss": 0.24729448556900024, "step": 5710 }, { "epoch": 1.5165316691010489, "grad_norm": 1.246980236713752, "learning_rate": 3.032914674217017e-06, "loss": 0.23614796996116638, "step": 5711 }, { "epoch": 1.5167972380825918, "grad_norm": 1.1947534591327391, "learning_rate": 3.029765611365808e-06, "loss": 0.2313452661037445, "step": 5712 }, { "epoch": 1.5170628070641348, "grad_norm": 1.2169352172923076, "learning_rate": 3.0266178922935842e-06, "loss": 0.22152003645896912, "step": 5713 }, { "epoch": 1.5173283760456777, "grad_norm": 1.3132034423317465, "learning_rate": 3.0234715176071874e-06, "loss": 0.25942179560661316, "step": 5714 }, { "epoch": 1.5175939450272207, "grad_norm": 1.213532583392701, "learning_rate": 3.0203264879132e-06, "loss": 0.25030237436294556, "step": 5715 }, { "epoch": 1.5178595140087636, "grad_norm": 1.212709044397772, "learning_rate": 3.0171828038179497e-06, "loss": 0.2025807797908783, "step": 5716 }, { "epoch": 1.5181250829903066, "grad_norm": 1.3035190960753136, "learning_rate": 3.014040465927499e-06, "loss": 0.20455190539360046, "step": 5717 }, { "epoch": 1.5183906519718495, "grad_norm": 1.2171025232725439, "learning_rate": 3.010899474847655e-06, "loss": 0.24197113513946533, "step": 5718 }, { "epoch": 1.5186562209533925, "grad_norm": 1.243656057613246, "learning_rate": 3.007759831183964e-06, "loss": 0.22290384769439697, "step": 5719 }, { "epoch": 1.5189217899349357, "grad_norm": 1.133911078511842, "learning_rate": 3.0046215355417117e-06, "loss": 0.23087520897388458, "step": 5720 }, { "epoch": 1.5191873589164786, "grad_norm": 1.3329430419316783, "learning_rate": 3.0014845885259236e-06, "loss": 0.24425405263900757, "step": 5721 }, { "epoch": 1.5194529278980216, "grad_norm": 1.310265396817766, "learning_rate": 2.9983489907413675e-06, "loss": 0.24888862669467926, "step": 5722 }, { "epoch": 1.5197184968795645, "grad_norm": 1.3023172954247402, "learning_rate": 2.9952147427925493e-06, "loss": 0.23556756973266602, "step": 5723 }, { "epoch": 1.5199840658611075, "grad_norm": 1.3924872169111115, "learning_rate": 2.992081845283715e-06, "loss": 0.2532619833946228, "step": 5724 }, { "epoch": 1.5202496348426504, "grad_norm": 1.3351422936737996, "learning_rate": 2.988950298818848e-06, "loss": 0.2574974000453949, "step": 5725 }, { "epoch": 1.5205152038241934, "grad_norm": 1.1244851887087242, "learning_rate": 2.9858201040016775e-06, "loss": 0.21997734904289246, "step": 5726 }, { "epoch": 1.5207807728057363, "grad_norm": 1.3952335702566243, "learning_rate": 2.982691261435666e-06, "loss": 0.2174127697944641, "step": 5727 }, { "epoch": 1.5210463417872793, "grad_norm": 1.4277294646697747, "learning_rate": 2.979563771724019e-06, "loss": 0.22455093264579773, "step": 5728 }, { "epoch": 1.5213119107688222, "grad_norm": 1.2606427849530746, "learning_rate": 2.976437635469678e-06, "loss": 0.270727276802063, "step": 5729 }, { "epoch": 1.5215774797503652, "grad_norm": 1.1901052998095392, "learning_rate": 2.9733128532753254e-06, "loss": 0.2233714610338211, "step": 5730 }, { "epoch": 1.5218430487319081, "grad_norm": 1.364720864117707, "learning_rate": 2.970189425743383e-06, "loss": 0.23599566519260406, "step": 5731 }, { "epoch": 1.522108617713451, "grad_norm": 1.2707197493270106, "learning_rate": 2.967067353476011e-06, "loss": 0.23598654568195343, "step": 5732 }, { "epoch": 1.522374186694994, "grad_norm": 1.1793549120144597, "learning_rate": 2.963946637075107e-06, "loss": 0.205197274684906, "step": 5733 }, { "epoch": 1.522639755676537, "grad_norm": 1.1887492971446227, "learning_rate": 2.9608272771423073e-06, "loss": 0.23581506311893463, "step": 5734 }, { "epoch": 1.52290532465808, "grad_norm": 1.2937911951812968, "learning_rate": 2.9577092742789915e-06, "loss": 0.2088197022676468, "step": 5735 }, { "epoch": 1.5231708936396229, "grad_norm": 1.2943182118738674, "learning_rate": 2.95459262908627e-06, "loss": 0.22607067227363586, "step": 5736 }, { "epoch": 1.5234364626211658, "grad_norm": 1.1748118237242067, "learning_rate": 2.951477342164998e-06, "loss": 0.22242344915866852, "step": 5737 }, { "epoch": 1.5237020316027088, "grad_norm": 1.3280405020263697, "learning_rate": 2.9483634141157636e-06, "loss": 0.25626271963119507, "step": 5738 }, { "epoch": 1.5239676005842517, "grad_norm": 1.2212084732536523, "learning_rate": 2.9452508455388975e-06, "loss": 0.2241421341896057, "step": 5739 }, { "epoch": 1.5242331695657947, "grad_norm": 1.5088982481303157, "learning_rate": 2.9421396370344648e-06, "loss": 0.2191103994846344, "step": 5740 }, { "epoch": 1.5244987385473376, "grad_norm": 1.2411878451658047, "learning_rate": 2.9390297892022703e-06, "loss": 0.26252660155296326, "step": 5741 }, { "epoch": 1.5247643075288806, "grad_norm": 1.3964551352557335, "learning_rate": 2.9359213026418567e-06, "loss": 0.21522507071495056, "step": 5742 }, { "epoch": 1.5250298765104235, "grad_norm": 1.0905013771622027, "learning_rate": 2.932814177952499e-06, "loss": 0.20159044861793518, "step": 5743 }, { "epoch": 1.5252954454919665, "grad_norm": 1.138416177249403, "learning_rate": 2.929708415733221e-06, "loss": 0.22679558396339417, "step": 5744 }, { "epoch": 1.5255610144735094, "grad_norm": 1.199157018703913, "learning_rate": 2.926604016582776e-06, "loss": 0.2315664291381836, "step": 5745 }, { "epoch": 1.5258265834550524, "grad_norm": 1.2568252329386058, "learning_rate": 2.923500981099652e-06, "loss": 0.229634091258049, "step": 5746 }, { "epoch": 1.5260921524365954, "grad_norm": 1.2179751735416722, "learning_rate": 2.9203993098820793e-06, "loss": 0.20657674968242645, "step": 5747 }, { "epoch": 1.5263577214181385, "grad_norm": 1.2447733239425043, "learning_rate": 2.9172990035280237e-06, "loss": 0.2306358814239502, "step": 5748 }, { "epoch": 1.5266232903996815, "grad_norm": 1.2950411042959078, "learning_rate": 2.9142000626351875e-06, "loss": 0.2608031928539276, "step": 5749 }, { "epoch": 1.5268888593812244, "grad_norm": 1.337100599856471, "learning_rate": 2.911102487801013e-06, "loss": 0.24675670266151428, "step": 5750 }, { "epoch": 1.5271544283627674, "grad_norm": 1.3568337572597398, "learning_rate": 2.908006279622667e-06, "loss": 0.22544966638088226, "step": 5751 }, { "epoch": 1.5274199973443103, "grad_norm": 1.3214418017258782, "learning_rate": 2.904911438697071e-06, "loss": 0.2328556478023529, "step": 5752 }, { "epoch": 1.5276855663258533, "grad_norm": 1.25396823790717, "learning_rate": 2.901817965620871e-06, "loss": 0.2316005825996399, "step": 5753 }, { "epoch": 1.5279511353073962, "grad_norm": 1.2976508240318196, "learning_rate": 2.8987258609904522e-06, "loss": 0.2332756370306015, "step": 5754 }, { "epoch": 1.5282167042889392, "grad_norm": 1.3432276903845415, "learning_rate": 2.8956351254019355e-06, "loss": 0.24855142831802368, "step": 5755 }, { "epoch": 1.5284822732704821, "grad_norm": 1.2138875439685706, "learning_rate": 2.8925457594511775e-06, "loss": 0.18745368719100952, "step": 5756 }, { "epoch": 1.528747842252025, "grad_norm": 1.877743895818308, "learning_rate": 2.889457763733774e-06, "loss": 0.22402942180633545, "step": 5757 }, { "epoch": 1.529013411233568, "grad_norm": 1.292567134146249, "learning_rate": 2.886371138845051e-06, "loss": 0.2156108319759369, "step": 5758 }, { "epoch": 1.529278980215111, "grad_norm": 1.2848231417758293, "learning_rate": 2.883285885380076e-06, "loss": 0.22866520285606384, "step": 5759 }, { "epoch": 1.529544549196654, "grad_norm": 1.2907471990668473, "learning_rate": 2.880202003933645e-06, "loss": 0.2486938238143921, "step": 5760 }, { "epoch": 1.529810118178197, "grad_norm": 1.34098643692872, "learning_rate": 2.877119495100301e-06, "loss": 0.2565295696258545, "step": 5761 }, { "epoch": 1.5300756871597399, "grad_norm": 1.1480290388256142, "learning_rate": 2.8740383594743116e-06, "loss": 0.21510455012321472, "step": 5762 }, { "epoch": 1.5303412561412828, "grad_norm": 1.266250058472157, "learning_rate": 2.8709585976496825e-06, "loss": 0.2122025489807129, "step": 5763 }, { "epoch": 1.5306068251228258, "grad_norm": 1.3017513152107745, "learning_rate": 2.8678802102201575e-06, "loss": 0.24274399876594543, "step": 5764 }, { "epoch": 1.5308723941043687, "grad_norm": 1.4573413266326471, "learning_rate": 2.864803197779216e-06, "loss": 0.22325341403484344, "step": 5765 }, { "epoch": 1.5311379630859117, "grad_norm": 1.3303976558080437, "learning_rate": 2.8617275609200625e-06, "loss": 0.25205284357070923, "step": 5766 }, { "epoch": 1.5314035320674546, "grad_norm": 1.2638986714524767, "learning_rate": 2.8586533002356465e-06, "loss": 0.2047557830810547, "step": 5767 }, { "epoch": 1.5316691010489976, "grad_norm": 1.2195584514594966, "learning_rate": 2.8555804163186508e-06, "loss": 0.2166992425918579, "step": 5768 }, { "epoch": 1.5319346700305405, "grad_norm": 1.2333416807696795, "learning_rate": 2.8525089097614867e-06, "loss": 0.26253193616867065, "step": 5769 }, { "epoch": 1.5322002390120835, "grad_norm": 1.2030637435961495, "learning_rate": 2.8494387811563108e-06, "loss": 0.23307687044143677, "step": 5770 }, { "epoch": 1.5324658079936264, "grad_norm": 1.2191481171426857, "learning_rate": 2.8463700310950047e-06, "loss": 0.22128549218177795, "step": 5771 }, { "epoch": 1.5327313769751694, "grad_norm": 1.272136705974986, "learning_rate": 2.8433026601691883e-06, "loss": 0.21966281533241272, "step": 5772 }, { "epoch": 1.5329969459567123, "grad_norm": 1.341088625881783, "learning_rate": 2.840236668970213e-06, "loss": 0.22869305312633514, "step": 5773 }, { "epoch": 1.5332625149382553, "grad_norm": 1.2257027323986465, "learning_rate": 2.837172058089167e-06, "loss": 0.21431279182434082, "step": 5774 }, { "epoch": 1.5335280839197982, "grad_norm": 1.3512853622822856, "learning_rate": 2.8341088281168693e-06, "loss": 0.24610282480716705, "step": 5775 }, { "epoch": 1.5337936529013412, "grad_norm": 1.3400303957635655, "learning_rate": 2.8310469796438767e-06, "loss": 0.24414925277233124, "step": 5776 }, { "epoch": 1.5340592218828841, "grad_norm": 1.3597459613858938, "learning_rate": 2.8279865132604766e-06, "loss": 0.2330513596534729, "step": 5777 }, { "epoch": 1.534324790864427, "grad_norm": 1.2551411616890042, "learning_rate": 2.8249274295566863e-06, "loss": 0.23048308491706848, "step": 5778 }, { "epoch": 1.53459035984597, "grad_norm": 1.2566974883874766, "learning_rate": 2.821869729122273e-06, "loss": 0.2411375492811203, "step": 5779 }, { "epoch": 1.534855928827513, "grad_norm": 1.384873838300398, "learning_rate": 2.818813412546715e-06, "loss": 0.22985543310642242, "step": 5780 }, { "epoch": 1.535121497809056, "grad_norm": 1.320574666083159, "learning_rate": 2.815758480419235e-06, "loss": 0.20867247879505157, "step": 5781 }, { "epoch": 1.5353870667905989, "grad_norm": 2.0414068761810182, "learning_rate": 2.8127049333287913e-06, "loss": 0.26378586888313293, "step": 5782 }, { "epoch": 1.5356526357721418, "grad_norm": 1.552041032509997, "learning_rate": 2.8096527718640687e-06, "loss": 0.2690306305885315, "step": 5783 }, { "epoch": 1.5359182047536848, "grad_norm": 1.1602606034579108, "learning_rate": 2.8066019966134907e-06, "loss": 0.22226165235042572, "step": 5784 }, { "epoch": 1.5361837737352277, "grad_norm": 1.2201060637055436, "learning_rate": 2.803552608165209e-06, "loss": 0.23370322585105896, "step": 5785 }, { "epoch": 1.5364493427167707, "grad_norm": 1.3067141176486328, "learning_rate": 2.8005046071071107e-06, "loss": 0.26137909293174744, "step": 5786 }, { "epoch": 1.5367149116983136, "grad_norm": 1.3588127622676833, "learning_rate": 2.7974579940268096e-06, "loss": 0.22630617022514343, "step": 5787 }, { "epoch": 1.5369804806798566, "grad_norm": 1.2356618590652273, "learning_rate": 2.7944127695116663e-06, "loss": 0.22641140222549438, "step": 5788 }, { "epoch": 1.5372460496613995, "grad_norm": 1.266648551925957, "learning_rate": 2.791368934148757e-06, "loss": 0.19647541642189026, "step": 5789 }, { "epoch": 1.5375116186429425, "grad_norm": 1.212906210017999, "learning_rate": 2.788326488524901e-06, "loss": 0.22399532794952393, "step": 5790 }, { "epoch": 1.5377771876244855, "grad_norm": 1.2862970389756843, "learning_rate": 2.7852854332266434e-06, "loss": 0.22549685835838318, "step": 5791 }, { "epoch": 1.5380427566060284, "grad_norm": 1.168406987557996, "learning_rate": 2.7822457688402637e-06, "loss": 0.2129821628332138, "step": 5792 }, { "epoch": 1.5383083255875714, "grad_norm": 1.2301298306170827, "learning_rate": 2.7792074959517755e-06, "loss": 0.25330638885498047, "step": 5793 }, { "epoch": 1.5385738945691143, "grad_norm": 1.3148661968254225, "learning_rate": 2.7761706151469204e-06, "loss": 0.2413945198059082, "step": 5794 }, { "epoch": 1.5388394635506573, "grad_norm": 1.2551515744231165, "learning_rate": 2.773135127011174e-06, "loss": 0.21930523216724396, "step": 5795 }, { "epoch": 1.5391050325322002, "grad_norm": 1.2506577052831476, "learning_rate": 2.7701010321297416e-06, "loss": 0.25499141216278076, "step": 5796 }, { "epoch": 1.5393706015137432, "grad_norm": 1.1567311669751301, "learning_rate": 2.7670683310875613e-06, "loss": 0.19475680589675903, "step": 5797 }, { "epoch": 1.5396361704952861, "grad_norm": 1.3159422945276043, "learning_rate": 2.7640370244693026e-06, "loss": 0.22155825793743134, "step": 5798 }, { "epoch": 1.539901739476829, "grad_norm": 1.1818601031709017, "learning_rate": 2.761007112859365e-06, "loss": 0.2146138846874237, "step": 5799 }, { "epoch": 1.540167308458372, "grad_norm": 1.146035478957987, "learning_rate": 2.7579785968418804e-06, "loss": 0.22698411345481873, "step": 5800 }, { "epoch": 1.540432877439915, "grad_norm": 1.2904710642906891, "learning_rate": 2.75495147700071e-06, "loss": 0.23889532685279846, "step": 5801 }, { "epoch": 1.540698446421458, "grad_norm": 1.2353012354195356, "learning_rate": 2.7519257539194488e-06, "loss": 0.2514609694480896, "step": 5802 }, { "epoch": 1.5409640154030009, "grad_norm": 1.2405153867334813, "learning_rate": 2.7489014281814185e-06, "loss": 0.22332100570201874, "step": 5803 }, { "epoch": 1.5412295843845438, "grad_norm": 1.1768236369414826, "learning_rate": 2.745878500369673e-06, "loss": 0.21316683292388916, "step": 5804 }, { "epoch": 1.5414951533660868, "grad_norm": 1.2446325297163028, "learning_rate": 2.742856971066996e-06, "loss": 0.2228018194437027, "step": 5805 }, { "epoch": 1.5417607223476297, "grad_norm": 1.3243067869686356, "learning_rate": 2.7398368408559084e-06, "loss": 0.22217239439487457, "step": 5806 }, { "epoch": 1.5420262913291727, "grad_norm": 1.331116794742511, "learning_rate": 2.736818110318652e-06, "loss": 0.21147233247756958, "step": 5807 }, { "epoch": 1.5422918603107156, "grad_norm": 1.2851526092309566, "learning_rate": 2.7338007800372024e-06, "loss": 0.23844698071479797, "step": 5808 }, { "epoch": 1.5425574292922586, "grad_norm": 1.3238454632326748, "learning_rate": 2.7307848505932653e-06, "loss": 0.2361423820257187, "step": 5809 }, { "epoch": 1.5428229982738015, "grad_norm": 1.1977956377916248, "learning_rate": 2.727770322568277e-06, "loss": 0.21585656702518463, "step": 5810 }, { "epoch": 1.5430885672553445, "grad_norm": 1.172295737533699, "learning_rate": 2.724757196543403e-06, "loss": 0.233969584107399, "step": 5811 }, { "epoch": 1.5433541362368874, "grad_norm": 1.3309852612756656, "learning_rate": 2.7217454730995363e-06, "loss": 0.25040164589881897, "step": 5812 }, { "epoch": 1.5436197052184304, "grad_norm": 1.5198455877328005, "learning_rate": 2.7187351528173046e-06, "loss": 0.25848713517189026, "step": 5813 }, { "epoch": 1.5438852741999733, "grad_norm": 1.409976572144199, "learning_rate": 2.715726236277061e-06, "loss": 0.22255051136016846, "step": 5814 }, { "epoch": 1.5441508431815163, "grad_norm": 1.1799889920310853, "learning_rate": 2.7127187240588883e-06, "loss": 0.1882694661617279, "step": 5815 }, { "epoch": 1.5444164121630592, "grad_norm": 1.178741445510241, "learning_rate": 2.7097126167426002e-06, "loss": 0.20070400834083557, "step": 5816 }, { "epoch": 1.5446819811446022, "grad_norm": 1.2959554460073714, "learning_rate": 2.706707914907739e-06, "loss": 0.25316092371940613, "step": 5817 }, { "epoch": 1.5449475501261452, "grad_norm": 1.334925654094324, "learning_rate": 2.703704619133576e-06, "loss": 0.24665585160255432, "step": 5818 }, { "epoch": 1.545213119107688, "grad_norm": 1.290703779819622, "learning_rate": 2.7007027299991095e-06, "loss": 0.24172846972942352, "step": 5819 }, { "epoch": 1.545478688089231, "grad_norm": 1.2781945872260183, "learning_rate": 2.6977022480830708e-06, "loss": 0.2405129075050354, "step": 5820 }, { "epoch": 1.545744257070774, "grad_norm": 1.075296946307477, "learning_rate": 2.694703173963914e-06, "loss": 0.19716276228427887, "step": 5821 }, { "epoch": 1.546009826052317, "grad_norm": 1.1434881656258093, "learning_rate": 2.6917055082198284e-06, "loss": 0.20343703031539917, "step": 5822 }, { "epoch": 1.54627539503386, "grad_norm": 1.5985849963050902, "learning_rate": 2.688709251428725e-06, "loss": 0.24382619559764862, "step": 5823 }, { "epoch": 1.5465409640154029, "grad_norm": 1.7314575476063523, "learning_rate": 2.6857144041682514e-06, "loss": 0.2962399423122406, "step": 5824 }, { "epoch": 1.5468065329969458, "grad_norm": 1.2699118659079873, "learning_rate": 2.6827209670157774e-06, "loss": 0.24034687876701355, "step": 5825 }, { "epoch": 1.5470721019784888, "grad_norm": 1.3757632125147359, "learning_rate": 2.6797289405484016e-06, "loss": 0.2575085163116455, "step": 5826 }, { "epoch": 1.5473376709600317, "grad_norm": 1.556424910652697, "learning_rate": 2.6767383253429515e-06, "loss": 0.2586629092693329, "step": 5827 }, { "epoch": 1.5476032399415747, "grad_norm": 1.096117045688234, "learning_rate": 2.6737491219759815e-06, "loss": 0.18447624146938324, "step": 5828 }, { "epoch": 1.5478688089231176, "grad_norm": 1.3930188378643134, "learning_rate": 2.670761331023779e-06, "loss": 0.244853213429451, "step": 5829 }, { "epoch": 1.5481343779046606, "grad_norm": 1.3163693020327074, "learning_rate": 2.66777495306235e-06, "loss": 0.24641919136047363, "step": 5830 }, { "epoch": 1.5483999468862035, "grad_norm": 1.4086337954424433, "learning_rate": 2.6647899886674323e-06, "loss": 0.2364550232887268, "step": 5831 }, { "epoch": 1.5486655158677467, "grad_norm": 1.1695450852938096, "learning_rate": 2.6618064384144925e-06, "loss": 0.17760278284549713, "step": 5832 }, { "epoch": 1.5489310848492897, "grad_norm": 1.1988872335295608, "learning_rate": 2.6588243028787274e-06, "loss": 0.18571510910987854, "step": 5833 }, { "epoch": 1.5491966538308326, "grad_norm": 1.2537289047953852, "learning_rate": 2.655843582635057e-06, "loss": 0.23693162202835083, "step": 5834 }, { "epoch": 1.5494622228123756, "grad_norm": 1.3552352092705502, "learning_rate": 2.652864278258126e-06, "loss": 0.26481011509895325, "step": 5835 }, { "epoch": 1.5497277917939185, "grad_norm": 1.4182429828127188, "learning_rate": 2.6498863903223115e-06, "loss": 0.23405003547668457, "step": 5836 }, { "epoch": 1.5499933607754615, "grad_norm": 2.5576796684815686, "learning_rate": 2.6469099194017144e-06, "loss": 0.20662814378738403, "step": 5837 }, { "epoch": 1.5502589297570044, "grad_norm": 1.3124069479853646, "learning_rate": 2.6439348660701634e-06, "loss": 0.2722313404083252, "step": 5838 }, { "epoch": 1.5505244987385474, "grad_norm": 1.3906100112719377, "learning_rate": 2.6409612309012134e-06, "loss": 0.2288864552974701, "step": 5839 }, { "epoch": 1.5507900677200903, "grad_norm": 1.322570753297788, "learning_rate": 2.6379890144681464e-06, "loss": 0.2286190539598465, "step": 5840 }, { "epoch": 1.5510556367016333, "grad_norm": 1.2231420705695173, "learning_rate": 2.6350182173439666e-06, "loss": 0.22478938102722168, "step": 5841 }, { "epoch": 1.5513212056831762, "grad_norm": 1.415848841276022, "learning_rate": 2.6320488401014166e-06, "loss": 0.2520615756511688, "step": 5842 }, { "epoch": 1.5515867746647192, "grad_norm": 1.3741284890856262, "learning_rate": 2.629080883312952e-06, "loss": 0.2121289074420929, "step": 5843 }, { "epoch": 1.5518523436462621, "grad_norm": 1.3092311759839703, "learning_rate": 2.6261143475507656e-06, "loss": 0.2252352237701416, "step": 5844 }, { "epoch": 1.552117912627805, "grad_norm": 1.191285245143269, "learning_rate": 2.6231492333867626e-06, "loss": 0.21188892424106598, "step": 5845 }, { "epoch": 1.552383481609348, "grad_norm": 1.1276138403597054, "learning_rate": 2.6201855413925857e-06, "loss": 0.21534699201583862, "step": 5846 }, { "epoch": 1.552649050590891, "grad_norm": 1.2849885490704696, "learning_rate": 2.6172232721395998e-06, "loss": 0.21781614422798157, "step": 5847 }, { "epoch": 1.552914619572434, "grad_norm": 1.3317886914724781, "learning_rate": 2.6142624261988947e-06, "loss": 0.2476508915424347, "step": 5848 }, { "epoch": 1.5531801885539769, "grad_norm": 1.3439658215829489, "learning_rate": 2.611303004141287e-06, "loss": 0.2692151665687561, "step": 5849 }, { "epoch": 1.5534457575355198, "grad_norm": 1.2839746536411722, "learning_rate": 2.6083450065373163e-06, "loss": 0.24868687987327576, "step": 5850 }, { "epoch": 1.5537113265170628, "grad_norm": 1.2704813852574235, "learning_rate": 2.6053884339572543e-06, "loss": 0.24215853214263916, "step": 5851 }, { "epoch": 1.5539768954986057, "grad_norm": 1.2100819665594098, "learning_rate": 2.602433286971091e-06, "loss": 0.2157444804906845, "step": 5852 }, { "epoch": 1.5542424644801487, "grad_norm": 1.369237575424674, "learning_rate": 2.599479566148544e-06, "loss": 0.22152379155158997, "step": 5853 }, { "epoch": 1.5545080334616916, "grad_norm": 1.1930490692336162, "learning_rate": 2.596527272059055e-06, "loss": 0.2278299182653427, "step": 5854 }, { "epoch": 1.5547736024432346, "grad_norm": 1.406485645097326, "learning_rate": 2.593576405271793e-06, "loss": 0.23183950781822205, "step": 5855 }, { "epoch": 1.5550391714247775, "grad_norm": 1.209726796816396, "learning_rate": 2.5906269663556484e-06, "loss": 0.22167566418647766, "step": 5856 }, { "epoch": 1.5553047404063205, "grad_norm": 1.1790986825354977, "learning_rate": 2.5876789558792403e-06, "loss": 0.24111366271972656, "step": 5857 }, { "epoch": 1.5555703093878634, "grad_norm": 1.1706391072024214, "learning_rate": 2.5847323744109087e-06, "loss": 0.2090388983488083, "step": 5858 }, { "epoch": 1.5558358783694064, "grad_norm": 1.2588154614837785, "learning_rate": 2.58178722251872e-06, "loss": 0.2087189108133316, "step": 5859 }, { "epoch": 1.5561014473509496, "grad_norm": 1.300626487965864, "learning_rate": 2.578843500770465e-06, "loss": 0.2277342677116394, "step": 5860 }, { "epoch": 1.5563670163324925, "grad_norm": 1.3517116904487896, "learning_rate": 2.57590120973366e-06, "loss": 0.2204241305589676, "step": 5861 }, { "epoch": 1.5566325853140355, "grad_norm": 1.213807933631201, "learning_rate": 2.5729603499755416e-06, "loss": 0.2138606607913971, "step": 5862 }, { "epoch": 1.5568981542955784, "grad_norm": 1.4669648743657906, "learning_rate": 2.5700209220630733e-06, "loss": 0.21257862448692322, "step": 5863 }, { "epoch": 1.5571637232771214, "grad_norm": 1.2314998246120414, "learning_rate": 2.5670829265629437e-06, "loss": 0.20991909503936768, "step": 5864 }, { "epoch": 1.5574292922586643, "grad_norm": 1.294980658460416, "learning_rate": 2.5641463640415633e-06, "loss": 0.23745422065258026, "step": 5865 }, { "epoch": 1.5576948612402073, "grad_norm": 1.2425796180120088, "learning_rate": 2.561211235065065e-06, "loss": 0.21482989192008972, "step": 5866 }, { "epoch": 1.5579604302217502, "grad_norm": 1.008120888370748, "learning_rate": 2.558277540199309e-06, "loss": 0.17866572737693787, "step": 5867 }, { "epoch": 1.5582259992032932, "grad_norm": 1.2966262005019353, "learning_rate": 2.555345280009872e-06, "loss": 0.223822683095932, "step": 5868 }, { "epoch": 1.5584915681848361, "grad_norm": 1.339606961190666, "learning_rate": 2.552414455062068e-06, "loss": 0.2293519228696823, "step": 5869 }, { "epoch": 1.558757137166379, "grad_norm": 1.3023504432012787, "learning_rate": 2.5494850659209203e-06, "loss": 0.2556726038455963, "step": 5870 }, { "epoch": 1.559022706147922, "grad_norm": 1.255574464472328, "learning_rate": 2.546557113151181e-06, "loss": 0.26891303062438965, "step": 5871 }, { "epoch": 1.559288275129465, "grad_norm": 1.1754509839553133, "learning_rate": 2.5436305973173257e-06, "loss": 0.19510813057422638, "step": 5872 }, { "epoch": 1.559553844111008, "grad_norm": 1.2819966401856495, "learning_rate": 2.5407055189835518e-06, "loss": 0.22906547784805298, "step": 5873 }, { "epoch": 1.559819413092551, "grad_norm": 1.3121165067922245, "learning_rate": 2.5377818787137788e-06, "loss": 0.25452786684036255, "step": 5874 }, { "epoch": 1.5600849820740939, "grad_norm": 1.2743199898597464, "learning_rate": 2.5348596770716503e-06, "loss": 0.205597922205925, "step": 5875 }, { "epoch": 1.5603505510556368, "grad_norm": 1.3020148941868286, "learning_rate": 2.5319389146205344e-06, "loss": 0.24009352922439575, "step": 5876 }, { "epoch": 1.5606161200371798, "grad_norm": 1.433983972963341, "learning_rate": 2.5290195919235173e-06, "loss": 0.23381268978118896, "step": 5877 }, { "epoch": 1.5608816890187227, "grad_norm": 1.1554092234943296, "learning_rate": 2.52610170954341e-06, "loss": 0.2267276644706726, "step": 5878 }, { "epoch": 1.5611472580002657, "grad_norm": 1.2742422977156036, "learning_rate": 2.5231852680427482e-06, "loss": 0.24330289661884308, "step": 5879 }, { "epoch": 1.5614128269818086, "grad_norm": 1.2802855767249914, "learning_rate": 2.5202702679837852e-06, "loss": 0.24877145886421204, "step": 5880 }, { "epoch": 1.5616783959633516, "grad_norm": 1.1377670913842177, "learning_rate": 2.5173567099285e-06, "loss": 0.20410388708114624, "step": 5881 }, { "epoch": 1.5619439649448945, "grad_norm": 1.2268765869469427, "learning_rate": 2.514444594438591e-06, "loss": 0.21524877846240997, "step": 5882 }, { "epoch": 1.5622095339264375, "grad_norm": 1.1986269244208958, "learning_rate": 2.5115339220754796e-06, "loss": 0.18785043060779572, "step": 5883 }, { "epoch": 1.5624751029079804, "grad_norm": 1.3539528047627718, "learning_rate": 2.5086246934003113e-06, "loss": 0.21200208365917206, "step": 5884 }, { "epoch": 1.5627406718895234, "grad_norm": 1.6373531833898813, "learning_rate": 2.5057169089739485e-06, "loss": 0.20752021670341492, "step": 5885 }, { "epoch": 1.5630062408710663, "grad_norm": 1.1717071963534185, "learning_rate": 2.502810569356976e-06, "loss": 0.21395736932754517, "step": 5886 }, { "epoch": 1.5632718098526093, "grad_norm": 1.2664848714228343, "learning_rate": 2.499905675109707e-06, "loss": 0.26949262619018555, "step": 5887 }, { "epoch": 1.5635373788341522, "grad_norm": 1.5283985889023297, "learning_rate": 2.497002226792169e-06, "loss": 0.2309839278459549, "step": 5888 }, { "epoch": 1.5638029478156952, "grad_norm": 1.2596143819163301, "learning_rate": 2.4941002249641123e-06, "loss": 0.24415400624275208, "step": 5889 }, { "epoch": 1.5640685167972381, "grad_norm": 1.3074402223027564, "learning_rate": 2.4911996701850083e-06, "loss": 0.23493322730064392, "step": 5890 }, { "epoch": 1.564334085778781, "grad_norm": 1.260748243658743, "learning_rate": 2.488300563014049e-06, "loss": 0.23824438452720642, "step": 5891 }, { "epoch": 1.564599654760324, "grad_norm": 1.2534870916273309, "learning_rate": 2.4854029040101503e-06, "loss": 0.2523414194583893, "step": 5892 }, { "epoch": 1.564865223741867, "grad_norm": 1.2879106186872462, "learning_rate": 2.482506693731944e-06, "loss": 0.21360887587070465, "step": 5893 }, { "epoch": 1.56513079272341, "grad_norm": 1.1951820042572139, "learning_rate": 2.47961193273779e-06, "loss": 0.21182934939861298, "step": 5894 }, { "epoch": 1.5653963617049529, "grad_norm": 1.4293886797193323, "learning_rate": 2.4767186215857542e-06, "loss": 0.23104771971702576, "step": 5895 }, { "epoch": 1.5656619306864958, "grad_norm": 1.2606491547398977, "learning_rate": 2.473826760833643e-06, "loss": 0.22297397255897522, "step": 5896 }, { "epoch": 1.5659274996680388, "grad_norm": 1.176802218612286, "learning_rate": 2.4709363510389684e-06, "loss": 0.21597865223884583, "step": 5897 }, { "epoch": 1.5661930686495817, "grad_norm": 1.4303555951561693, "learning_rate": 2.468047392758969e-06, "loss": 0.27620527148246765, "step": 5898 }, { "epoch": 1.5664586376311247, "grad_norm": 1.373809252877093, "learning_rate": 2.465159886550601e-06, "loss": 0.25262463092803955, "step": 5899 }, { "epoch": 1.5667242066126676, "grad_norm": 1.376719462816966, "learning_rate": 2.462273832970542e-06, "loss": 0.2729034125804901, "step": 5900 }, { "epoch": 1.5669897755942106, "grad_norm": 1.3637563490895455, "learning_rate": 2.459389232575188e-06, "loss": 0.2313854992389679, "step": 5901 }, { "epoch": 1.5672553445757536, "grad_norm": 1.3202318144066494, "learning_rate": 2.456506085920658e-06, "loss": 0.22513791918754578, "step": 5902 }, { "epoch": 1.5675209135572965, "grad_norm": 1.3152362934287614, "learning_rate": 2.4536243935627856e-06, "loss": 0.2658824026584625, "step": 5903 }, { "epoch": 1.5677864825388395, "grad_norm": 1.1721087348112986, "learning_rate": 2.4507441560571275e-06, "loss": 0.21781010925769806, "step": 5904 }, { "epoch": 1.5680520515203824, "grad_norm": 1.3393030222309363, "learning_rate": 2.4478653739589632e-06, "loss": 0.21047937870025635, "step": 5905 }, { "epoch": 1.5683176205019254, "grad_norm": 1.2196979825563006, "learning_rate": 2.4449880478232858e-06, "loss": 0.21674057841300964, "step": 5906 }, { "epoch": 1.5685831894834683, "grad_norm": 1.200112520021674, "learning_rate": 2.44211217820481e-06, "loss": 0.22062627971172333, "step": 5907 }, { "epoch": 1.5688487584650113, "grad_norm": 1.3158234051142574, "learning_rate": 2.439237765657968e-06, "loss": 0.22440886497497559, "step": 5908 }, { "epoch": 1.5691143274465542, "grad_norm": 1.129873307165861, "learning_rate": 2.4363648107369175e-06, "loss": 0.21888123452663422, "step": 5909 }, { "epoch": 1.5693798964280972, "grad_norm": 1.2586007199788052, "learning_rate": 2.433493313995524e-06, "loss": 0.23104462027549744, "step": 5910 }, { "epoch": 1.5696454654096401, "grad_norm": 1.427902558182486, "learning_rate": 2.4306232759873803e-06, "loss": 0.23032237589359283, "step": 5911 }, { "epoch": 1.569911034391183, "grad_norm": 1.3780752776280365, "learning_rate": 2.4277546972657974e-06, "loss": 0.2588527202606201, "step": 5912 }, { "epoch": 1.570176603372726, "grad_norm": 1.4647042397629928, "learning_rate": 2.424887578383799e-06, "loss": 0.2845698893070221, "step": 5913 }, { "epoch": 1.570442172354269, "grad_norm": 1.338246310760916, "learning_rate": 2.4220219198941384e-06, "loss": 0.23010894656181335, "step": 5914 }, { "epoch": 1.570707741335812, "grad_norm": 1.3783426416349442, "learning_rate": 2.419157722349278e-06, "loss": 0.2623594403266907, "step": 5915 }, { "epoch": 1.5709733103173549, "grad_norm": 1.2349976574308903, "learning_rate": 2.416294986301401e-06, "loss": 0.2107153981924057, "step": 5916 }, { "epoch": 1.5712388792988978, "grad_norm": 1.3633626366853218, "learning_rate": 2.413433712302409e-06, "loss": 0.2115003615617752, "step": 5917 }, { "epoch": 1.5715044482804408, "grad_norm": 1.3738602333573011, "learning_rate": 2.410573900903921e-06, "loss": 0.22406762838363647, "step": 5918 }, { "epoch": 1.5717700172619837, "grad_norm": 1.3017270649216575, "learning_rate": 2.407715552657277e-06, "loss": 0.24878525733947754, "step": 5919 }, { "epoch": 1.5720355862435267, "grad_norm": 1.5003273963811, "learning_rate": 2.404858668113532e-06, "loss": 0.24546805024147034, "step": 5920 }, { "epoch": 1.5723011552250696, "grad_norm": 1.5650848412040055, "learning_rate": 2.402003247823459e-06, "loss": 0.23430263996124268, "step": 5921 }, { "epoch": 1.5725667242066126, "grad_norm": 1.3939131226044492, "learning_rate": 2.399149292337547e-06, "loss": 0.26935267448425293, "step": 5922 }, { "epoch": 1.5728322931881555, "grad_norm": 1.1554138984093538, "learning_rate": 2.3962968022060097e-06, "loss": 0.21104472875595093, "step": 5923 }, { "epoch": 1.5730978621696985, "grad_norm": 1.147816084956367, "learning_rate": 2.3934457779787755e-06, "loss": 0.17162750661373138, "step": 5924 }, { "epoch": 1.5733634311512414, "grad_norm": 1.2036391990293953, "learning_rate": 2.390596220205481e-06, "loss": 0.22233474254608154, "step": 5925 }, { "epoch": 1.5736290001327844, "grad_norm": 1.456348691360017, "learning_rate": 2.387748129435491e-06, "loss": 0.2326992005109787, "step": 5926 }, { "epoch": 1.5738945691143273, "grad_norm": 1.2656294085970974, "learning_rate": 2.3849015062178835e-06, "loss": 0.245779350399971, "step": 5927 }, { "epoch": 1.5741601380958703, "grad_norm": 1.2198185109849795, "learning_rate": 2.382056351101454e-06, "loss": 0.24269379675388336, "step": 5928 }, { "epoch": 1.5744257070774133, "grad_norm": 1.2241918308854736, "learning_rate": 2.3792126646347138e-06, "loss": 0.23644019663333893, "step": 5929 }, { "epoch": 1.5746912760589562, "grad_norm": 1.2680435600362268, "learning_rate": 2.376370447365893e-06, "loss": 0.254330575466156, "step": 5930 }, { "epoch": 1.5749568450404992, "grad_norm": 1.4146409212378834, "learning_rate": 2.373529699842936e-06, "loss": 0.2728506922721863, "step": 5931 }, { "epoch": 1.575222414022042, "grad_norm": 1.3627178065769006, "learning_rate": 2.3706904226135087e-06, "loss": 0.23671439290046692, "step": 5932 }, { "epoch": 1.575487983003585, "grad_norm": 1.409873356618632, "learning_rate": 2.367852616224989e-06, "loss": 0.24205748736858368, "step": 5933 }, { "epoch": 1.575753551985128, "grad_norm": 1.2728197754861583, "learning_rate": 2.3650162812244725e-06, "loss": 0.1915436089038849, "step": 5934 }, { "epoch": 1.576019120966671, "grad_norm": 1.2091326643578577, "learning_rate": 2.3621814181587697e-06, "loss": 0.23453299701213837, "step": 5935 }, { "epoch": 1.576284689948214, "grad_norm": 1.3060415308267561, "learning_rate": 2.3593480275744106e-06, "loss": 0.24066327512264252, "step": 5936 }, { "epoch": 1.5765502589297569, "grad_norm": 1.246429396187596, "learning_rate": 2.356516110017639e-06, "loss": 0.22510530054569244, "step": 5937 }, { "epoch": 1.5768158279112998, "grad_norm": 1.2889494549478113, "learning_rate": 2.3536856660344144e-06, "loss": 0.22967353463172913, "step": 5938 }, { "epoch": 1.5770813968928428, "grad_norm": 1.2404139099674472, "learning_rate": 2.3508566961704127e-06, "loss": 0.2299107313156128, "step": 5939 }, { "epoch": 1.5773469658743857, "grad_norm": 1.2560783974284127, "learning_rate": 2.3480292009710282e-06, "loss": 0.23418918251991272, "step": 5940 }, { "epoch": 1.5776125348559287, "grad_norm": 1.2857056044544095, "learning_rate": 2.3452031809813657e-06, "loss": 0.26528510451316833, "step": 5941 }, { "epoch": 1.5778781038374716, "grad_norm": 1.1247059842406957, "learning_rate": 2.342378636746251e-06, "loss": 0.21878717839717865, "step": 5942 }, { "epoch": 1.5781436728190146, "grad_norm": 1.1637472196421235, "learning_rate": 2.339555568810221e-06, "loss": 0.19697530567646027, "step": 5943 }, { "epoch": 1.5784092418005577, "grad_norm": 1.3422665805434115, "learning_rate": 2.3367339777175313e-06, "loss": 0.24812257289886475, "step": 5944 }, { "epoch": 1.5786748107821007, "grad_norm": 1.3285793357341238, "learning_rate": 2.3339138640121504e-06, "loss": 0.27651745080947876, "step": 5945 }, { "epoch": 1.5789403797636437, "grad_norm": 1.308131821171991, "learning_rate": 2.3310952282377643e-06, "loss": 0.2651634216308594, "step": 5946 }, { "epoch": 1.5792059487451866, "grad_norm": 1.3163549633798883, "learning_rate": 2.328278070937772e-06, "loss": 0.23799028992652893, "step": 5947 }, { "epoch": 1.5794715177267296, "grad_norm": 1.4229706240812914, "learning_rate": 2.3254623926552867e-06, "loss": 0.2528802752494812, "step": 5948 }, { "epoch": 1.5797370867082725, "grad_norm": 1.2071666314804592, "learning_rate": 2.322648193933137e-06, "loss": 0.23819346725940704, "step": 5949 }, { "epoch": 1.5800026556898155, "grad_norm": 1.2694222057013376, "learning_rate": 2.319835475313873e-06, "loss": 0.2510845959186554, "step": 5950 }, { "epoch": 1.5802682246713584, "grad_norm": 1.0731141255180743, "learning_rate": 2.31702423733975e-06, "loss": 0.20156612992286682, "step": 5951 }, { "epoch": 1.5805337936529014, "grad_norm": 1.320010192923148, "learning_rate": 2.3142144805527413e-06, "loss": 0.23375174403190613, "step": 5952 }, { "epoch": 1.5807993626344443, "grad_norm": 1.187058092026163, "learning_rate": 2.311406205494535e-06, "loss": 0.2378280758857727, "step": 5953 }, { "epoch": 1.5810649316159873, "grad_norm": 1.4550533599389408, "learning_rate": 2.308599412706535e-06, "loss": 0.2087683081626892, "step": 5954 }, { "epoch": 1.5813305005975302, "grad_norm": 1.2856302099767283, "learning_rate": 2.3057941027298557e-06, "loss": 0.2228693962097168, "step": 5955 }, { "epoch": 1.5815960695790732, "grad_norm": 1.4738789364963756, "learning_rate": 2.302990276105329e-06, "loss": 0.22694727778434753, "step": 5956 }, { "epoch": 1.5818616385606161, "grad_norm": 1.2486840544551192, "learning_rate": 2.300187933373499e-06, "loss": 0.22996942698955536, "step": 5957 }, { "epoch": 1.582127207542159, "grad_norm": 1.331719034245123, "learning_rate": 2.2973870750746253e-06, "loss": 0.2440253496170044, "step": 5958 }, { "epoch": 1.582392776523702, "grad_norm": 1.3266637203740035, "learning_rate": 2.2945877017486782e-06, "loss": 0.2507309019565582, "step": 5959 }, { "epoch": 1.582658345505245, "grad_norm": 2.8683041985739677, "learning_rate": 2.2917898139353467e-06, "loss": 0.24790918827056885, "step": 5960 }, { "epoch": 1.582923914486788, "grad_norm": 1.4168604850261965, "learning_rate": 2.2889934121740287e-06, "loss": 0.22106975317001343, "step": 5961 }, { "epoch": 1.5831894834683309, "grad_norm": 1.5726662217531726, "learning_rate": 2.2861984970038385e-06, "loss": 0.2410939633846283, "step": 5962 }, { "epoch": 1.5834550524498738, "grad_norm": 1.1559016560001114, "learning_rate": 2.283405068963601e-06, "loss": 0.22821484506130219, "step": 5963 }, { "epoch": 1.5837206214314168, "grad_norm": 1.2324685594628142, "learning_rate": 2.2806131285918588e-06, "loss": 0.21425281465053558, "step": 5964 }, { "epoch": 1.5839861904129597, "grad_norm": 1.2434376170807215, "learning_rate": 2.277822676426863e-06, "loss": 0.22428902983665466, "step": 5965 }, { "epoch": 1.5842517593945027, "grad_norm": 1.4592375031786005, "learning_rate": 2.27503371300658e-06, "loss": 0.2986769676208496, "step": 5966 }, { "epoch": 1.5845173283760456, "grad_norm": 1.4384957681975041, "learning_rate": 2.272246238868687e-06, "loss": 0.24697065353393555, "step": 5967 }, { "epoch": 1.5847828973575886, "grad_norm": 1.3175254870878064, "learning_rate": 2.269460254550583e-06, "loss": 0.23725461959838867, "step": 5968 }, { "epoch": 1.5850484663391315, "grad_norm": 1.5010497616053564, "learning_rate": 2.2666757605893664e-06, "loss": 0.2661248445510864, "step": 5969 }, { "epoch": 1.5853140353206745, "grad_norm": 1.2390278830143426, "learning_rate": 2.263892757521858e-06, "loss": 0.23328733444213867, "step": 5970 }, { "epoch": 1.5855796043022174, "grad_norm": 1.2547818797647754, "learning_rate": 2.2611112458845873e-06, "loss": 0.22886580228805542, "step": 5971 }, { "epoch": 1.5858451732837606, "grad_norm": 1.1882681583888588, "learning_rate": 2.2583312262137966e-06, "loss": 0.25051698088645935, "step": 5972 }, { "epoch": 1.5861107422653036, "grad_norm": 1.2988472953319592, "learning_rate": 2.2555526990454413e-06, "loss": 0.2400815784931183, "step": 5973 }, { "epoch": 1.5863763112468465, "grad_norm": 1.1598677166947555, "learning_rate": 2.2527756649151912e-06, "loss": 0.2212347537279129, "step": 5974 }, { "epoch": 1.5866418802283895, "grad_norm": 1.355013417523964, "learning_rate": 2.2500001243584204e-06, "loss": 0.3002026379108429, "step": 5975 }, { "epoch": 1.5869074492099324, "grad_norm": 1.1899701199057289, "learning_rate": 2.2472260779102185e-06, "loss": 0.19813531637191772, "step": 5976 }, { "epoch": 1.5871730181914754, "grad_norm": 1.2404972223723234, "learning_rate": 2.2444535261053968e-06, "loss": 0.2233983874320984, "step": 5977 }, { "epoch": 1.5874385871730183, "grad_norm": 1.417840431772693, "learning_rate": 2.2416824694784676e-06, "loss": 0.26059988141059875, "step": 5978 }, { "epoch": 1.5877041561545613, "grad_norm": 1.2961846276739968, "learning_rate": 2.2389129085636573e-06, "loss": 0.23058606684207916, "step": 5979 }, { "epoch": 1.5879697251361042, "grad_norm": 1.3397298592095879, "learning_rate": 2.236144843894904e-06, "loss": 0.2414383739233017, "step": 5980 }, { "epoch": 1.5882352941176472, "grad_norm": 1.2013757541083616, "learning_rate": 2.23337827600586e-06, "loss": 0.21688291430473328, "step": 5981 }, { "epoch": 1.5885008630991901, "grad_norm": 1.2977536190104755, "learning_rate": 2.2306132054298847e-06, "loss": 0.24297408759593964, "step": 5982 }, { "epoch": 1.588766432080733, "grad_norm": 1.449081017944755, "learning_rate": 2.227849632700052e-06, "loss": 0.2655821442604065, "step": 5983 }, { "epoch": 1.589032001062276, "grad_norm": 1.2305338711146763, "learning_rate": 2.225087558349146e-06, "loss": 0.20545080304145813, "step": 5984 }, { "epoch": 1.589297570043819, "grad_norm": 1.470607418959754, "learning_rate": 2.2223269829096593e-06, "loss": 0.24151475727558136, "step": 5985 }, { "epoch": 1.589563139025362, "grad_norm": 1.2194062039730535, "learning_rate": 2.2195679069138043e-06, "loss": 0.2294519543647766, "step": 5986 }, { "epoch": 1.589828708006905, "grad_norm": 1.3319096935394759, "learning_rate": 2.2168103308934953e-06, "loss": 0.2041824758052826, "step": 5987 }, { "epoch": 1.5900942769884479, "grad_norm": 1.181577384258167, "learning_rate": 2.21405425538036e-06, "loss": 0.1856188029050827, "step": 5988 }, { "epoch": 1.5903598459699908, "grad_norm": 1.2644853901124522, "learning_rate": 2.2112996809057395e-06, "loss": 0.24337685108184814, "step": 5989 }, { "epoch": 1.5906254149515338, "grad_norm": 1.1714048449744126, "learning_rate": 2.20854660800068e-06, "loss": 0.2201787382364273, "step": 5990 }, { "epoch": 1.5908909839330767, "grad_norm": 1.322531300676563, "learning_rate": 2.2057950371959427e-06, "loss": 0.23505619168281555, "step": 5991 }, { "epoch": 1.5911565529146197, "grad_norm": 1.4085526679551708, "learning_rate": 2.203044969021997e-06, "loss": 0.19528049230575562, "step": 5992 }, { "epoch": 1.5914221218961626, "grad_norm": 1.2299879902160842, "learning_rate": 2.2002964040090256e-06, "loss": 0.22281290590763092, "step": 5993 }, { "epoch": 1.5916876908777056, "grad_norm": 1.310771483519368, "learning_rate": 2.1975493426869155e-06, "loss": 0.19606761634349823, "step": 5994 }, { "epoch": 1.5919532598592485, "grad_norm": 1.2570005315725017, "learning_rate": 2.1948037855852733e-06, "loss": 0.22559323906898499, "step": 5995 }, { "epoch": 1.5922188288407915, "grad_norm": 1.2326545276620708, "learning_rate": 2.192059733233408e-06, "loss": 0.20417393743991852, "step": 5996 }, { "epoch": 1.5924843978223344, "grad_norm": 1.351064737074131, "learning_rate": 2.18931718616034e-06, "loss": 0.2579960525035858, "step": 5997 }, { "epoch": 1.5927499668038774, "grad_norm": 1.2980140620122547, "learning_rate": 2.1865761448948e-06, "loss": 0.23339781165122986, "step": 5998 }, { "epoch": 1.5930155357854203, "grad_norm": 1.2588476812522966, "learning_rate": 2.1838366099652274e-06, "loss": 0.2368197739124298, "step": 5999 }, { "epoch": 1.5932811047669633, "grad_norm": 1.2980274155826699, "learning_rate": 2.1810985818997743e-06, "loss": 0.2225847840309143, "step": 6000 }, { "epoch": 1.5935466737485062, "grad_norm": 1.3094945647641514, "learning_rate": 2.1783620612263e-06, "loss": 0.2426701784133911, "step": 6001 }, { "epoch": 1.5938122427300492, "grad_norm": 1.284834767608695, "learning_rate": 2.175627048472372e-06, "loss": 0.23647268116474152, "step": 6002 }, { "epoch": 1.5940778117115921, "grad_norm": 1.2525920428706867, "learning_rate": 2.1728935441652687e-06, "loss": 0.22843337059020996, "step": 6003 }, { "epoch": 1.594343380693135, "grad_norm": 1.1786632019087344, "learning_rate": 2.1701615488319785e-06, "loss": 0.21524465084075928, "step": 6004 }, { "epoch": 1.594608949674678, "grad_norm": 1.225831889373155, "learning_rate": 2.167431062999197e-06, "loss": 0.2160830795764923, "step": 6005 }, { "epoch": 1.594874518656221, "grad_norm": 1.238709201727011, "learning_rate": 2.1647020871933288e-06, "loss": 0.2321595996618271, "step": 6006 }, { "epoch": 1.595140087637764, "grad_norm": 1.164283210992047, "learning_rate": 2.1619746219404916e-06, "loss": 0.21255026757717133, "step": 6007 }, { "epoch": 1.5954056566193069, "grad_norm": 1.3822319128280973, "learning_rate": 2.1592486677665047e-06, "loss": 0.22851255536079407, "step": 6008 }, { "epoch": 1.5956712256008498, "grad_norm": 1.3982384304626327, "learning_rate": 2.1565242251969022e-06, "loss": 0.23844364285469055, "step": 6009 }, { "epoch": 1.5959367945823928, "grad_norm": 1.3184134341650149, "learning_rate": 2.153801294756924e-06, "loss": 0.2592385411262512, "step": 6010 }, { "epoch": 1.5962023635639357, "grad_norm": 1.221300094567036, "learning_rate": 2.151079876971519e-06, "loss": 0.22163718938827515, "step": 6011 }, { "epoch": 1.5964679325454787, "grad_norm": 1.1840952132259899, "learning_rate": 2.1483599723653415e-06, "loss": 0.1960998773574829, "step": 6012 }, { "epoch": 1.5967335015270216, "grad_norm": 1.1732770789502442, "learning_rate": 2.145641581462762e-06, "loss": 0.20811150968074799, "step": 6013 }, { "epoch": 1.5969990705085646, "grad_norm": 1.2065470685478314, "learning_rate": 2.1429247047878534e-06, "loss": 0.23184621334075928, "step": 6014 }, { "epoch": 1.5972646394901076, "grad_norm": 1.3338850940720004, "learning_rate": 2.1402093428643942e-06, "loss": 0.22043758630752563, "step": 6015 }, { "epoch": 1.5975302084716505, "grad_norm": 1.1736165993383876, "learning_rate": 2.137495496215878e-06, "loss": 0.18621152639389038, "step": 6016 }, { "epoch": 1.5977957774531935, "grad_norm": 1.332636421894691, "learning_rate": 2.1347831653654995e-06, "loss": 0.2422473132610321, "step": 6017 }, { "epoch": 1.5980613464347364, "grad_norm": 1.5933227500597664, "learning_rate": 2.132072350836164e-06, "loss": 0.2147202491760254, "step": 6018 }, { "epoch": 1.5983269154162794, "grad_norm": 1.5455916288717333, "learning_rate": 2.1293630531504873e-06, "loss": 0.23091933131217957, "step": 6019 }, { "epoch": 1.5985924843978223, "grad_norm": 1.290869089573798, "learning_rate": 2.1266552728307876e-06, "loss": 0.220037579536438, "step": 6020 }, { "epoch": 1.5988580533793653, "grad_norm": 1.3343924424387823, "learning_rate": 2.1239490103990946e-06, "loss": 0.25520551204681396, "step": 6021 }, { "epoch": 1.5991236223609082, "grad_norm": 1.412222062207012, "learning_rate": 2.1212442663771427e-06, "loss": 0.23216915130615234, "step": 6022 }, { "epoch": 1.5993891913424512, "grad_norm": 1.381515312381825, "learning_rate": 2.118541041286374e-06, "loss": 0.22098806500434875, "step": 6023 }, { "epoch": 1.5996547603239941, "grad_norm": 1.4609594644715316, "learning_rate": 2.11583933564794e-06, "loss": 0.261300265789032, "step": 6024 }, { "epoch": 1.599920329305537, "grad_norm": 1.2095539498781858, "learning_rate": 2.113139149982698e-06, "loss": 0.20427154004573822, "step": 6025 }, { "epoch": 1.60018589828708, "grad_norm": 1.2158101663646808, "learning_rate": 2.110440484811209e-06, "loss": 0.20700547099113464, "step": 6026 }, { "epoch": 1.600451467268623, "grad_norm": 1.4331467444820847, "learning_rate": 2.1077433406537475e-06, "loss": 0.2789752185344696, "step": 6027 }, { "epoch": 1.600717036250166, "grad_norm": 1.2991321976135584, "learning_rate": 2.1050477180302885e-06, "loss": 0.2205841988325119, "step": 6028 }, { "epoch": 1.6009826052317089, "grad_norm": 1.3197920849647402, "learning_rate": 2.1023536174605184e-06, "loss": 0.24921822547912598, "step": 6029 }, { "epoch": 1.6012481742132518, "grad_norm": 2.014197229906981, "learning_rate": 2.0996610394638228e-06, "loss": 0.2516329288482666, "step": 6030 }, { "epoch": 1.6015137431947948, "grad_norm": 1.2656936665142342, "learning_rate": 2.096969984559306e-06, "loss": 0.21832503378391266, "step": 6031 }, { "epoch": 1.6017793121763377, "grad_norm": 1.530808592055088, "learning_rate": 2.094280453265769e-06, "loss": 0.2499273419380188, "step": 6032 }, { "epoch": 1.6020448811578807, "grad_norm": 1.167125195859278, "learning_rate": 2.09159244610172e-06, "loss": 0.21701282262802124, "step": 6033 }, { "epoch": 1.6023104501394236, "grad_norm": 1.2536801575307182, "learning_rate": 2.0889059635853783e-06, "loss": 0.24446213245391846, "step": 6034 }, { "epoch": 1.6025760191209666, "grad_norm": 1.412317581200794, "learning_rate": 2.0862210062346622e-06, "loss": 0.27299973368644714, "step": 6035 }, { "epoch": 1.6028415881025095, "grad_norm": 1.320945278338079, "learning_rate": 2.0835375745672027e-06, "loss": 0.2384832501411438, "step": 6036 }, { "epoch": 1.6031071570840525, "grad_norm": 1.340788170535406, "learning_rate": 2.0808556691003335e-06, "loss": 0.2563338875770569, "step": 6037 }, { "epoch": 1.6033727260655954, "grad_norm": 1.5240284764155023, "learning_rate": 2.0781752903510954e-06, "loss": 0.29148975014686584, "step": 6038 }, { "epoch": 1.6036382950471384, "grad_norm": 1.1673304070468655, "learning_rate": 2.0754964388362264e-06, "loss": 0.24276503920555115, "step": 6039 }, { "epoch": 1.6039038640286813, "grad_norm": 1.2629655044665746, "learning_rate": 2.0728191150721866e-06, "loss": 0.1863931119441986, "step": 6040 }, { "epoch": 1.6041694330102243, "grad_norm": 1.1731073698012655, "learning_rate": 2.0701433195751286e-06, "loss": 0.21270868182182312, "step": 6041 }, { "epoch": 1.6044350019917673, "grad_norm": 1.2780583308550695, "learning_rate": 2.0674690528609155e-06, "loss": 0.21542516350746155, "step": 6042 }, { "epoch": 1.6047005709733102, "grad_norm": 1.256432235067539, "learning_rate": 2.0647963154451124e-06, "loss": 0.23099860548973083, "step": 6043 }, { "epoch": 1.6049661399548532, "grad_norm": 1.1769565332020941, "learning_rate": 2.062125107842993e-06, "loss": 0.22757291793823242, "step": 6044 }, { "epoch": 1.605231708936396, "grad_norm": 1.317404807729369, "learning_rate": 2.0594554305695346e-06, "loss": 0.2370409518480301, "step": 6045 }, { "epoch": 1.605497277917939, "grad_norm": 1.1803781252235817, "learning_rate": 2.0567872841394186e-06, "loss": 0.21620309352874756, "step": 6046 }, { "epoch": 1.605762846899482, "grad_norm": 1.2191738819977833, "learning_rate": 2.0541206690670324e-06, "loss": 0.22821158170700073, "step": 6047 }, { "epoch": 1.606028415881025, "grad_norm": 1.385940331470305, "learning_rate": 2.0514555858664663e-06, "loss": 0.24930253624916077, "step": 6048 }, { "epoch": 1.606293984862568, "grad_norm": 1.3966922562239508, "learning_rate": 2.048792035051521e-06, "loss": 0.2491561770439148, "step": 6049 }, { "epoch": 1.6065595538441109, "grad_norm": 1.3037697337655914, "learning_rate": 2.046130017135697e-06, "loss": 0.20652002096176147, "step": 6050 }, { "epoch": 1.6068251228256538, "grad_norm": 1.1970911046995705, "learning_rate": 2.0434695326321975e-06, "loss": 0.25670793652534485, "step": 6051 }, { "epoch": 1.6070906918071968, "grad_norm": 1.2469219040368793, "learning_rate": 2.0408105820539328e-06, "loss": 0.2328418493270874, "step": 6052 }, { "epoch": 1.6073562607887397, "grad_norm": 1.2657559287734064, "learning_rate": 2.0381531659135213e-06, "loss": 0.20811162889003754, "step": 6053 }, { "epoch": 1.6076218297702827, "grad_norm": 1.2637409014709644, "learning_rate": 2.0354972847232756e-06, "loss": 0.24068522453308105, "step": 6054 }, { "epoch": 1.6078873987518256, "grad_norm": 1.3537388998191249, "learning_rate": 2.032842938995221e-06, "loss": 0.2519197463989258, "step": 6055 }, { "epoch": 1.6081529677333686, "grad_norm": 1.349413355425799, "learning_rate": 2.030190129241083e-06, "loss": 0.2293267697095871, "step": 6056 }, { "epoch": 1.6084185367149118, "grad_norm": 1.8474927483406436, "learning_rate": 2.027538855972291e-06, "loss": 0.22398510575294495, "step": 6057 }, { "epoch": 1.6086841056964547, "grad_norm": 1.4186878733418118, "learning_rate": 2.0248891196999833e-06, "loss": 0.23074102401733398, "step": 6058 }, { "epoch": 1.6089496746779977, "grad_norm": 1.352152679115686, "learning_rate": 2.0222409209349957e-06, "loss": 0.2618173658847809, "step": 6059 }, { "epoch": 1.6092152436595406, "grad_norm": 1.2898742263880296, "learning_rate": 2.0195942601878703e-06, "loss": 0.25361114740371704, "step": 6060 }, { "epoch": 1.6094808126410836, "grad_norm": 1.2270527625039152, "learning_rate": 2.016949137968851e-06, "loss": 0.2276519238948822, "step": 6061 }, { "epoch": 1.6097463816226265, "grad_norm": 1.3155356069823825, "learning_rate": 2.0143055547878863e-06, "loss": 0.20834363996982574, "step": 6062 }, { "epoch": 1.6100119506041695, "grad_norm": 1.348708703656222, "learning_rate": 2.011663511154628e-06, "loss": 0.2579394578933716, "step": 6063 }, { "epoch": 1.6102775195857124, "grad_norm": 1.2574503425710122, "learning_rate": 2.009023007578431e-06, "loss": 0.22118912637233734, "step": 6064 }, { "epoch": 1.6105430885672554, "grad_norm": 1.1631210187007555, "learning_rate": 2.0063840445683537e-06, "loss": 0.1881515383720398, "step": 6065 }, { "epoch": 1.6108086575487983, "grad_norm": 1.2884662240297928, "learning_rate": 2.003746622633155e-06, "loss": 0.2270805984735489, "step": 6066 }, { "epoch": 1.6110742265303413, "grad_norm": 1.4261065534360056, "learning_rate": 2.0011107422813013e-06, "loss": 0.26356351375579834, "step": 6067 }, { "epoch": 1.6113397955118842, "grad_norm": 1.2506363457624738, "learning_rate": 1.9984764040209615e-06, "loss": 0.22937676310539246, "step": 6068 }, { "epoch": 1.6116053644934272, "grad_norm": 1.329188800311282, "learning_rate": 1.99584360836e-06, "loss": 0.25062739849090576, "step": 6069 }, { "epoch": 1.6118709334749701, "grad_norm": 1.1593663351806502, "learning_rate": 1.993212355805989e-06, "loss": 0.2031324952840805, "step": 6070 }, { "epoch": 1.612136502456513, "grad_norm": 1.3722085699931008, "learning_rate": 1.990582646866206e-06, "loss": 0.25769656896591187, "step": 6071 }, { "epoch": 1.612402071438056, "grad_norm": 1.3184109520906713, "learning_rate": 1.987954482047626e-06, "loss": 0.23856252431869507, "step": 6072 }, { "epoch": 1.612667640419599, "grad_norm": 1.3452730145342116, "learning_rate": 1.9853278618569284e-06, "loss": 0.2336723804473877, "step": 6073 }, { "epoch": 1.612933209401142, "grad_norm": 1.3427497614935235, "learning_rate": 1.9827027868004942e-06, "loss": 0.22327622771263123, "step": 6074 }, { "epoch": 1.6131987783826849, "grad_norm": 1.302817235652594, "learning_rate": 1.980079257384405e-06, "loss": 0.26695019006729126, "step": 6075 }, { "epoch": 1.6134643473642278, "grad_norm": 1.174792834468628, "learning_rate": 1.9774572741144514e-06, "loss": 0.2467387616634369, "step": 6076 }, { "epoch": 1.6137299163457708, "grad_norm": 1.3974546997540778, "learning_rate": 1.9748368374961193e-06, "loss": 0.25473737716674805, "step": 6077 }, { "epoch": 1.6139954853273137, "grad_norm": 1.295354894556923, "learning_rate": 1.972217948034596e-06, "loss": 0.25508594512939453, "step": 6078 }, { "epoch": 1.6142610543088567, "grad_norm": 1.2627621502033493, "learning_rate": 1.969600606234774e-06, "loss": 0.23020131886005402, "step": 6079 }, { "epoch": 1.6145266232903996, "grad_norm": 1.2036992831321345, "learning_rate": 1.9669848126012447e-06, "loss": 0.249805748462677, "step": 6080 }, { "epoch": 1.6147921922719426, "grad_norm": 1.2304217597704168, "learning_rate": 1.964370567638303e-06, "loss": 0.2377707064151764, "step": 6081 }, { "epoch": 1.6150577612534855, "grad_norm": 1.3812388616949685, "learning_rate": 1.9617578718499452e-06, "loss": 0.28656789660453796, "step": 6082 }, { "epoch": 1.6153233302350285, "grad_norm": 1.3083477730508752, "learning_rate": 1.9591467257398668e-06, "loss": 0.22079989314079285, "step": 6083 }, { "epoch": 1.6155888992165715, "grad_norm": 1.048982897357468, "learning_rate": 1.9565371298114666e-06, "loss": 0.1993042230606079, "step": 6084 }, { "epoch": 1.6158544681981146, "grad_norm": 1.1837758778278344, "learning_rate": 1.9539290845678438e-06, "loss": 0.20818357169628143, "step": 6085 }, { "epoch": 1.6161200371796576, "grad_norm": 1.2192677831294998, "learning_rate": 1.9513225905117996e-06, "loss": 0.20531761646270752, "step": 6086 }, { "epoch": 1.6163856061612005, "grad_norm": 1.2499003349392819, "learning_rate": 1.948717648145834e-06, "loss": 0.23414376378059387, "step": 6087 }, { "epoch": 1.6166511751427435, "grad_norm": 1.2073482694002922, "learning_rate": 1.9461142579721493e-06, "loss": 0.2025471031665802, "step": 6088 }, { "epoch": 1.6169167441242864, "grad_norm": 1.4729414889087271, "learning_rate": 1.943512420492649e-06, "loss": 0.19130446016788483, "step": 6089 }, { "epoch": 1.6171823131058294, "grad_norm": 1.1947055473554775, "learning_rate": 1.940912136208938e-06, "loss": 0.21637848019599915, "step": 6090 }, { "epoch": 1.6174478820873723, "grad_norm": 1.301401884532825, "learning_rate": 1.9383134056223176e-06, "loss": 0.26844075322151184, "step": 6091 }, { "epoch": 1.6177134510689153, "grad_norm": 1.1755891449306313, "learning_rate": 1.935716229233794e-06, "loss": 0.19573305547237396, "step": 6092 }, { "epoch": 1.6179790200504582, "grad_norm": 1.2705214543802177, "learning_rate": 1.93312060754407e-06, "loss": 0.22705954313278198, "step": 6093 }, { "epoch": 1.6182445890320012, "grad_norm": 1.279170245457384, "learning_rate": 1.9305265410535545e-06, "loss": 0.2505400478839874, "step": 6094 }, { "epoch": 1.6185101580135441, "grad_norm": 1.2108711177458409, "learning_rate": 1.927934030262353e-06, "loss": 0.2328193187713623, "step": 6095 }, { "epoch": 1.618775726995087, "grad_norm": 1.2588974628750198, "learning_rate": 1.9253430756702674e-06, "loss": 0.23876577615737915, "step": 6096 }, { "epoch": 1.61904129597663, "grad_norm": 1.3685755624123837, "learning_rate": 1.9227536777768063e-06, "loss": 0.2390732318162918, "step": 6097 }, { "epoch": 1.619306864958173, "grad_norm": 1.3858306009370809, "learning_rate": 1.9201658370811736e-06, "loss": 0.25231993198394775, "step": 6098 }, { "epoch": 1.619572433939716, "grad_norm": 1.2520374949609627, "learning_rate": 1.917579554082274e-06, "loss": 0.21527352929115295, "step": 6099 }, { "epoch": 1.619838002921259, "grad_norm": 1.2236250632687489, "learning_rate": 1.9149948292787133e-06, "loss": 0.21394580602645874, "step": 6100 }, { "epoch": 1.6201035719028019, "grad_norm": 1.3465338603905943, "learning_rate": 1.912411663168796e-06, "loss": 0.26093196868896484, "step": 6101 }, { "epoch": 1.6203691408843448, "grad_norm": 1.3518497357465815, "learning_rate": 1.9098300562505266e-06, "loss": 0.2631412744522095, "step": 6102 }, { "epoch": 1.6206347098658878, "grad_norm": 1.3007944720423297, "learning_rate": 1.9072500090216073e-06, "loss": 0.270250141620636, "step": 6103 }, { "epoch": 1.6209002788474307, "grad_norm": 1.3385737712068424, "learning_rate": 1.9046715219794397e-06, "loss": 0.22944031655788422, "step": 6104 }, { "epoch": 1.6211658478289737, "grad_norm": 1.2125488505372424, "learning_rate": 1.902094595621129e-06, "loss": 0.24429070949554443, "step": 6105 }, { "epoch": 1.6214314168105166, "grad_norm": 1.2581532570405378, "learning_rate": 1.8995192304434729e-06, "loss": 0.25656238198280334, "step": 6106 }, { "epoch": 1.6216969857920596, "grad_norm": 1.3466122688772229, "learning_rate": 1.8969454269429743e-06, "loss": 0.2575233280658722, "step": 6107 }, { "epoch": 1.6219625547736025, "grad_norm": 1.245984919504028, "learning_rate": 1.8943731856158299e-06, "loss": 0.24881063401699066, "step": 6108 }, { "epoch": 1.6222281237551455, "grad_norm": 1.2845731125917577, "learning_rate": 1.8918025069579382e-06, "loss": 0.23353847861289978, "step": 6109 }, { "epoch": 1.6224936927366884, "grad_norm": 1.2505489106727152, "learning_rate": 1.8892333914648953e-06, "loss": 0.21085457503795624, "step": 6110 }, { "epoch": 1.6227592617182314, "grad_norm": 1.4134001131082032, "learning_rate": 1.8866658396319947e-06, "loss": 0.28600943088531494, "step": 6111 }, { "epoch": 1.6230248306997743, "grad_norm": 1.1689838110439057, "learning_rate": 1.8840998519542352e-06, "loss": 0.22580507397651672, "step": 6112 }, { "epoch": 1.6232903996813173, "grad_norm": 1.212526750953587, "learning_rate": 1.8815354289263066e-06, "loss": 0.19310800731182098, "step": 6113 }, { "epoch": 1.6235559686628602, "grad_norm": 1.3020905454433194, "learning_rate": 1.8789725710425988e-06, "loss": 0.21633204817771912, "step": 6114 }, { "epoch": 1.6238215376444032, "grad_norm": 1.4315370828946672, "learning_rate": 1.8764112787972e-06, "loss": 0.21346023678779602, "step": 6115 }, { "epoch": 1.6240871066259461, "grad_norm": 1.21392020481053, "learning_rate": 1.8738515526838986e-06, "loss": 0.21206694841384888, "step": 6116 }, { "epoch": 1.624352675607489, "grad_norm": 1.3197096686410696, "learning_rate": 1.8712933931961773e-06, "loss": 0.2135339230298996, "step": 6117 }, { "epoch": 1.624618244589032, "grad_norm": 1.2484635869956482, "learning_rate": 1.8687368008272243e-06, "loss": 0.2168758660554886, "step": 6118 }, { "epoch": 1.624883813570575, "grad_norm": 1.1804251189525716, "learning_rate": 1.866181776069914e-06, "loss": 0.20825617015361786, "step": 6119 }, { "epoch": 1.625149382552118, "grad_norm": 1.291082575518304, "learning_rate": 1.863628319416826e-06, "loss": 0.25367867946624756, "step": 6120 }, { "epoch": 1.625414951533661, "grad_norm": 1.3053498393136334, "learning_rate": 1.8610764313602404e-06, "loss": 0.21604284644126892, "step": 6121 }, { "epoch": 1.6256805205152038, "grad_norm": 1.2871138327885168, "learning_rate": 1.8585261123921283e-06, "loss": 0.2324865758419037, "step": 6122 }, { "epoch": 1.6259460894967468, "grad_norm": 1.2467444217539543, "learning_rate": 1.8559773630041632e-06, "loss": 0.2077629417181015, "step": 6123 }, { "epoch": 1.6262116584782897, "grad_norm": 1.1704936500874914, "learning_rate": 1.8534301836877122e-06, "loss": 0.19919469952583313, "step": 6124 }, { "epoch": 1.6264772274598327, "grad_norm": 1.1998850682672693, "learning_rate": 1.8508845749338412e-06, "loss": 0.21069160103797913, "step": 6125 }, { "epoch": 1.6267427964413756, "grad_norm": 1.218804714337499, "learning_rate": 1.8483405372333152e-06, "loss": 0.2286640703678131, "step": 6126 }, { "epoch": 1.6270083654229186, "grad_norm": 1.33630910648056, "learning_rate": 1.8457980710765932e-06, "loss": 0.2430541068315506, "step": 6127 }, { "epoch": 1.6272739344044616, "grad_norm": 1.3713498598627625, "learning_rate": 1.8432571769538344e-06, "loss": 0.21875709295272827, "step": 6128 }, { "epoch": 1.6275395033860045, "grad_norm": 1.4416966555618131, "learning_rate": 1.8407178553548876e-06, "loss": 0.22591018676757812, "step": 6129 }, { "epoch": 1.6278050723675475, "grad_norm": 1.362917465597037, "learning_rate": 1.8381801067693129e-06, "loss": 0.25429075956344604, "step": 6130 }, { "epoch": 1.6280706413490904, "grad_norm": 1.31452454626215, "learning_rate": 1.8356439316863528e-06, "loss": 0.2437858283519745, "step": 6131 }, { "epoch": 1.6283362103306334, "grad_norm": 1.2489983792436092, "learning_rate": 1.8331093305949532e-06, "loss": 0.24196262657642365, "step": 6132 }, { "epoch": 1.6286017793121763, "grad_norm": 1.3756170241894088, "learning_rate": 1.8305763039837576e-06, "loss": 0.25779271125793457, "step": 6133 }, { "epoch": 1.6288673482937193, "grad_norm": 1.223955710903011, "learning_rate": 1.8280448523410987e-06, "loss": 0.23418015241622925, "step": 6134 }, { "epoch": 1.6291329172752622, "grad_norm": 1.3748973147827792, "learning_rate": 1.8255149761550128e-06, "loss": 0.2670775353908539, "step": 6135 }, { "epoch": 1.6293984862568052, "grad_norm": 1.423176544673552, "learning_rate": 1.822986675913231e-06, "loss": 0.29342639446258545, "step": 6136 }, { "epoch": 1.6296640552383481, "grad_norm": 1.244422511511833, "learning_rate": 1.8204599521031785e-06, "loss": 0.22768062353134155, "step": 6137 }, { "epoch": 1.629929624219891, "grad_norm": 1.6355607569945512, "learning_rate": 1.817934805211976e-06, "loss": 0.23938167095184326, "step": 6138 }, { "epoch": 1.630195193201434, "grad_norm": 1.311916117620117, "learning_rate": 1.8154112357264474e-06, "loss": 0.1982264518737793, "step": 6139 }, { "epoch": 1.630460762182977, "grad_norm": 1.3026965235969699, "learning_rate": 1.8128892441331047e-06, "loss": 0.23591312766075134, "step": 6140 }, { "epoch": 1.63072633116452, "grad_norm": 1.259123916156089, "learning_rate": 1.8103688309181567e-06, "loss": 0.20317673683166504, "step": 6141 }, { "epoch": 1.6309919001460629, "grad_norm": 1.2846300858550195, "learning_rate": 1.8078499965675112e-06, "loss": 0.233676478266716, "step": 6142 }, { "epoch": 1.6312574691276058, "grad_norm": 1.3296785293607047, "learning_rate": 1.8053327415667688e-06, "loss": 0.22850775718688965, "step": 6143 }, { "epoch": 1.6315230381091488, "grad_norm": 1.2850656633806874, "learning_rate": 1.8028170664012268e-06, "loss": 0.2603572607040405, "step": 6144 }, { "epoch": 1.6317886070906917, "grad_norm": 1.3208849168125785, "learning_rate": 1.8003029715558773e-06, "loss": 0.27881523966789246, "step": 6145 }, { "epoch": 1.6320541760722347, "grad_norm": 1.225668329292659, "learning_rate": 1.797790457515406e-06, "loss": 0.21744176745414734, "step": 6146 }, { "epoch": 1.6323197450537776, "grad_norm": 1.2220588910103882, "learning_rate": 1.7952795247642008e-06, "loss": 0.20449542999267578, "step": 6147 }, { "epoch": 1.6325853140353206, "grad_norm": 1.3015735321136237, "learning_rate": 1.7927701737863402e-06, "loss": 0.25641053915023804, "step": 6148 }, { "epoch": 1.6328508830168635, "grad_norm": 1.294201240106412, "learning_rate": 1.7902624050655914e-06, "loss": 0.23583751916885376, "step": 6149 }, { "epoch": 1.6331164519984065, "grad_norm": 1.4310897316272893, "learning_rate": 1.787756219085427e-06, "loss": 0.2709866762161255, "step": 6150 }, { "epoch": 1.6333820209799494, "grad_norm": 1.2536554341378991, "learning_rate": 1.785251616329009e-06, "loss": 0.233103945851326, "step": 6151 }, { "epoch": 1.6336475899614924, "grad_norm": 1.2660813048243769, "learning_rate": 1.7827485972791957e-06, "loss": 0.2665184438228607, "step": 6152 }, { "epoch": 1.6339131589430353, "grad_norm": 1.2551185732946457, "learning_rate": 1.7802471624185392e-06, "loss": 0.20934605598449707, "step": 6153 }, { "epoch": 1.6341787279245783, "grad_norm": 1.2179362426676639, "learning_rate": 1.7777473122292866e-06, "loss": 0.2102464735507965, "step": 6154 }, { "epoch": 1.6344442969061213, "grad_norm": 1.2289784110367914, "learning_rate": 1.7752490471933769e-06, "loss": 0.22889986634254456, "step": 6155 }, { "epoch": 1.6347098658876642, "grad_norm": 1.3627659705359922, "learning_rate": 1.772752367792452e-06, "loss": 0.2261584997177124, "step": 6156 }, { "epoch": 1.6349754348692072, "grad_norm": 1.2186249427048736, "learning_rate": 1.7702572745078395e-06, "loss": 0.21456710994243622, "step": 6157 }, { "epoch": 1.63524100385075, "grad_norm": 1.1535452073956258, "learning_rate": 1.7677637678205627e-06, "loss": 0.22762097418308258, "step": 6158 }, { "epoch": 1.635506572832293, "grad_norm": 1.306484526102534, "learning_rate": 1.7652718482113417e-06, "loss": 0.24772633612155914, "step": 6159 }, { "epoch": 1.635772141813836, "grad_norm": 1.3290630048425123, "learning_rate": 1.7627815161605887e-06, "loss": 0.22980757057666779, "step": 6160 }, { "epoch": 1.636037710795379, "grad_norm": 1.1593602123779645, "learning_rate": 1.760292772148411e-06, "loss": 0.19560125470161438, "step": 6161 }, { "epoch": 1.636303279776922, "grad_norm": 1.388673809129743, "learning_rate": 1.7578056166546086e-06, "loss": 0.23733064532279968, "step": 6162 }, { "epoch": 1.6365688487584649, "grad_norm": 1.2026681813349183, "learning_rate": 1.7553200501586743e-06, "loss": 0.21064560115337372, "step": 6163 }, { "epoch": 1.6368344177400078, "grad_norm": 1.3444341606502546, "learning_rate": 1.7528360731397986e-06, "loss": 0.26709994673728943, "step": 6164 }, { "epoch": 1.6370999867215508, "grad_norm": 1.2755110888757868, "learning_rate": 1.750353686076861e-06, "loss": 0.26555943489074707, "step": 6165 }, { "epoch": 1.6373655557030937, "grad_norm": 1.3299250322981557, "learning_rate": 1.7478728894484375e-06, "loss": 0.24480760097503662, "step": 6166 }, { "epoch": 1.6376311246846367, "grad_norm": 1.2560095314061934, "learning_rate": 1.7453936837327967e-06, "loss": 0.2170884907245636, "step": 6167 }, { "epoch": 1.6378966936661796, "grad_norm": 1.340756013397369, "learning_rate": 1.7429160694078983e-06, "loss": 0.24728982150554657, "step": 6168 }, { "epoch": 1.6381622626477228, "grad_norm": 1.1911402182063675, "learning_rate": 1.7404400469513994e-06, "loss": 0.20886945724487305, "step": 6169 }, { "epoch": 1.6384278316292658, "grad_norm": 1.2150445755778985, "learning_rate": 1.7379656168406467e-06, "loss": 0.1892474740743637, "step": 6170 }, { "epoch": 1.6386934006108087, "grad_norm": 1.3004801024505461, "learning_rate": 1.7354927795526821e-06, "loss": 0.24953782558441162, "step": 6171 }, { "epoch": 1.6389589695923517, "grad_norm": 1.2292705802712374, "learning_rate": 1.7330215355642377e-06, "loss": 0.2311600148677826, "step": 6172 }, { "epoch": 1.6392245385738946, "grad_norm": 1.2596864005467026, "learning_rate": 1.73055188535174e-06, "loss": 0.24018675088882446, "step": 6173 }, { "epoch": 1.6394901075554376, "grad_norm": 1.3394449685829455, "learning_rate": 1.7280838293913116e-06, "loss": 0.22607022523880005, "step": 6174 }, { "epoch": 1.6397556765369805, "grad_norm": 1.2860534255043978, "learning_rate": 1.7256173681587619e-06, "loss": 0.23725482821464539, "step": 6175 }, { "epoch": 1.6400212455185235, "grad_norm": 1.2500709715234832, "learning_rate": 1.723152502129597e-06, "loss": 0.241235613822937, "step": 6176 }, { "epoch": 1.6402868145000664, "grad_norm": 1.2070755501863832, "learning_rate": 1.7206892317790136e-06, "loss": 0.2150690108537674, "step": 6177 }, { "epoch": 1.6405523834816094, "grad_norm": 1.2557873581014805, "learning_rate": 1.7182275575819007e-06, "loss": 0.22133421897888184, "step": 6178 }, { "epoch": 1.6408179524631523, "grad_norm": 1.1297884729403, "learning_rate": 1.7157674800128399e-06, "loss": 0.1937463879585266, "step": 6179 }, { "epoch": 1.6410835214446953, "grad_norm": 1.0851305240668396, "learning_rate": 1.7133089995461062e-06, "loss": 0.18938027322292328, "step": 6180 }, { "epoch": 1.6413490904262382, "grad_norm": 1.2621430482402598, "learning_rate": 1.7108521166556646e-06, "loss": 0.23577997088432312, "step": 6181 }, { "epoch": 1.6416146594077812, "grad_norm": 1.2915526813468403, "learning_rate": 1.7083968318151734e-06, "loss": 0.2712448537349701, "step": 6182 }, { "epoch": 1.6418802283893241, "grad_norm": 1.276409938985324, "learning_rate": 1.7059431454979825e-06, "loss": 0.24242255091667175, "step": 6183 }, { "epoch": 1.642145797370867, "grad_norm": 1.3152058895449834, "learning_rate": 1.7034910581771347e-06, "loss": 0.22521010041236877, "step": 6184 }, { "epoch": 1.64241136635241, "grad_norm": 1.3840145244958133, "learning_rate": 1.7010405703253618e-06, "loss": 0.22026273608207703, "step": 6185 }, { "epoch": 1.642676935333953, "grad_norm": 1.458737402535225, "learning_rate": 1.6985916824150894e-06, "loss": 0.22726528346538544, "step": 6186 }, { "epoch": 1.642942504315496, "grad_norm": 1.3396783040947258, "learning_rate": 1.6961443949184353e-06, "loss": 0.25172409415245056, "step": 6187 }, { "epoch": 1.6432080732970389, "grad_norm": 1.1393591185728944, "learning_rate": 1.6936987083072065e-06, "loss": 0.21173113584518433, "step": 6188 }, { "epoch": 1.6434736422785818, "grad_norm": 1.3589729407555038, "learning_rate": 1.6912546230529036e-06, "loss": 0.22596749663352966, "step": 6189 }, { "epoch": 1.6437392112601248, "grad_norm": 1.3604263454917045, "learning_rate": 1.6888121396267166e-06, "loss": 0.2749077081680298, "step": 6190 }, { "epoch": 1.6440047802416677, "grad_norm": 2.5555069132462283, "learning_rate": 1.6863712584995252e-06, "loss": 0.22150780260562897, "step": 6191 }, { "epoch": 1.6442703492232107, "grad_norm": 1.2838243253096144, "learning_rate": 1.6839319801419073e-06, "loss": 0.23437368869781494, "step": 6192 }, { "epoch": 1.6445359182047536, "grad_norm": 1.3069256977628543, "learning_rate": 1.681494305024125e-06, "loss": 0.22949008643627167, "step": 6193 }, { "epoch": 1.6448014871862966, "grad_norm": 1.2956112975441718, "learning_rate": 1.6790582336161332e-06, "loss": 0.24147525429725647, "step": 6194 }, { "epoch": 1.6450670561678395, "grad_norm": 1.180082798545332, "learning_rate": 1.6766237663875773e-06, "loss": 0.2001456618309021, "step": 6195 }, { "epoch": 1.6453326251493825, "grad_norm": 1.2710753216206616, "learning_rate": 1.674190903807794e-06, "loss": 0.17668186128139496, "step": 6196 }, { "epoch": 1.6455981941309257, "grad_norm": 1.369840319031622, "learning_rate": 1.6717596463458107e-06, "loss": 0.24585255980491638, "step": 6197 }, { "epoch": 1.6458637631124686, "grad_norm": 1.2328642285488454, "learning_rate": 1.6693299944703479e-06, "loss": 0.2234572172164917, "step": 6198 }, { "epoch": 1.6461293320940116, "grad_norm": 1.2369910191993496, "learning_rate": 1.6669019486498083e-06, "loss": 0.2007240653038025, "step": 6199 }, { "epoch": 1.6463949010755545, "grad_norm": 1.317383450933259, "learning_rate": 1.6644755093522913e-06, "loss": 0.21926215291023254, "step": 6200 }, { "epoch": 1.6466604700570975, "grad_norm": 1.3404302006039666, "learning_rate": 1.662050677045589e-06, "loss": 0.24797898530960083, "step": 6201 }, { "epoch": 1.6469260390386404, "grad_norm": 1.285343354391859, "learning_rate": 1.65962745219718e-06, "loss": 0.22087037563323975, "step": 6202 }, { "epoch": 1.6471916080201834, "grad_norm": 1.2765781805195457, "learning_rate": 1.6572058352742327e-06, "loss": 0.23073960840702057, "step": 6203 }, { "epoch": 1.6474571770017263, "grad_norm": 1.3644493807061109, "learning_rate": 1.6547858267436056e-06, "loss": 0.2430298924446106, "step": 6204 }, { "epoch": 1.6477227459832693, "grad_norm": 1.286198443262182, "learning_rate": 1.6523674270718493e-06, "loss": 0.23337247967720032, "step": 6205 }, { "epoch": 1.6479883149648122, "grad_norm": 1.2144238817830517, "learning_rate": 1.6499506367252016e-06, "loss": 0.22141093015670776, "step": 6206 }, { "epoch": 1.6482538839463552, "grad_norm": 1.280282959866893, "learning_rate": 1.647535456169591e-06, "loss": 0.23247988522052765, "step": 6207 }, { "epoch": 1.6485194529278981, "grad_norm": 1.3728921390628253, "learning_rate": 1.6451218858706374e-06, "loss": 0.2659391760826111, "step": 6208 }, { "epoch": 1.648785021909441, "grad_norm": 1.2534645715863684, "learning_rate": 1.642709926293644e-06, "loss": 0.2154998630285263, "step": 6209 }, { "epoch": 1.649050590890984, "grad_norm": 1.322825591754104, "learning_rate": 1.6402995779036146e-06, "loss": 0.20363599061965942, "step": 6210 }, { "epoch": 1.649316159872527, "grad_norm": 1.3775669953664806, "learning_rate": 1.6378908411652328e-06, "loss": 0.23388779163360596, "step": 6211 }, { "epoch": 1.64958172885407, "grad_norm": 1.205059730534318, "learning_rate": 1.6354837165428772e-06, "loss": 0.20465341210365295, "step": 6212 }, { "epoch": 1.649847297835613, "grad_norm": 1.2409004364034002, "learning_rate": 1.6330782045006088e-06, "loss": 0.2233584225177765, "step": 6213 }, { "epoch": 1.6501128668171559, "grad_norm": 1.313264623251788, "learning_rate": 1.6306743055021834e-06, "loss": 0.2880077064037323, "step": 6214 }, { "epoch": 1.6503784357986988, "grad_norm": 1.2769524753658168, "learning_rate": 1.6282720200110458e-06, "loss": 0.23332230746746063, "step": 6215 }, { "epoch": 1.6506440047802418, "grad_norm": 1.2682336609825682, "learning_rate": 1.6258713484903266e-06, "loss": 0.22191204130649567, "step": 6216 }, { "epoch": 1.6509095737617847, "grad_norm": 1.2899982671052521, "learning_rate": 1.6234722914028478e-06, "loss": 0.2403659224510193, "step": 6217 }, { "epoch": 1.6511751427433277, "grad_norm": 1.2823746538865957, "learning_rate": 1.6210748492111161e-06, "loss": 0.2230256348848343, "step": 6218 }, { "epoch": 1.6514407117248706, "grad_norm": 1.233703409456991, "learning_rate": 1.6186790223773375e-06, "loss": 0.2086302787065506, "step": 6219 }, { "epoch": 1.6517062807064136, "grad_norm": 1.2696219439991872, "learning_rate": 1.6162848113633934e-06, "loss": 0.22336703538894653, "step": 6220 }, { "epoch": 1.6519718496879565, "grad_norm": 1.2026474951561137, "learning_rate": 1.6138922166308613e-06, "loss": 0.2354746013879776, "step": 6221 }, { "epoch": 1.6522374186694995, "grad_norm": 1.212799588563382, "learning_rate": 1.6115012386410045e-06, "loss": 0.23983564972877502, "step": 6222 }, { "epoch": 1.6525029876510424, "grad_norm": 1.3394195242071623, "learning_rate": 1.6091118778547765e-06, "loss": 0.25468897819519043, "step": 6223 }, { "epoch": 1.6527685566325854, "grad_norm": 1.2085737685975797, "learning_rate": 1.6067241347328166e-06, "loss": 0.2225346863269806, "step": 6224 }, { "epoch": 1.6530341256141283, "grad_norm": 1.4474708027397767, "learning_rate": 1.6043380097354543e-06, "loss": 0.28801992535591125, "step": 6225 }, { "epoch": 1.6532996945956713, "grad_norm": 1.1308003259460488, "learning_rate": 1.6019535033227063e-06, "loss": 0.1869816929101944, "step": 6226 }, { "epoch": 1.6535652635772142, "grad_norm": 1.3022141110443597, "learning_rate": 1.5995706159542768e-06, "loss": 0.2569049894809723, "step": 6227 }, { "epoch": 1.6538308325587572, "grad_norm": 1.2689496619282572, "learning_rate": 1.5971893480895583e-06, "loss": 0.19138488173484802, "step": 6228 }, { "epoch": 1.6540964015403001, "grad_norm": 1.2583553251304942, "learning_rate": 1.5948097001876318e-06, "loss": 0.23107777535915375, "step": 6229 }, { "epoch": 1.654361970521843, "grad_norm": 1.4140324563807463, "learning_rate": 1.5924316727072652e-06, "loss": 0.21682313084602356, "step": 6230 }, { "epoch": 1.654627539503386, "grad_norm": 1.6445896965406597, "learning_rate": 1.5900552661069135e-06, "loss": 0.27629974484443665, "step": 6231 }, { "epoch": 1.654893108484929, "grad_norm": 1.2060133562172235, "learning_rate": 1.587680480844721e-06, "loss": 0.21919876337051392, "step": 6232 }, { "epoch": 1.655158677466472, "grad_norm": 1.4827934801999716, "learning_rate": 1.5853073173785183e-06, "loss": 0.2556184232234955, "step": 6233 }, { "epoch": 1.655424246448015, "grad_norm": 1.1362954303327644, "learning_rate": 1.5829357761658214e-06, "loss": 0.1904449462890625, "step": 6234 }, { "epoch": 1.6556898154295578, "grad_norm": 1.2410374365127181, "learning_rate": 1.5805658576638372e-06, "loss": 0.1991434246301651, "step": 6235 }, { "epoch": 1.6559553844111008, "grad_norm": 1.4428347821081515, "learning_rate": 1.5781975623294554e-06, "loss": 0.2609177231788635, "step": 6236 }, { "epoch": 1.6562209533926437, "grad_norm": 1.276051044481299, "learning_rate": 1.575830890619261e-06, "loss": 0.2481592893600464, "step": 6237 }, { "epoch": 1.6564865223741867, "grad_norm": 1.2930470444266673, "learning_rate": 1.5734658429895156e-06, "loss": 0.23855090141296387, "step": 6238 }, { "epoch": 1.6567520913557297, "grad_norm": 1.326739898505445, "learning_rate": 1.5711024198961745e-06, "loss": 0.2480623573064804, "step": 6239 }, { "epoch": 1.6570176603372726, "grad_norm": 1.4145385747738486, "learning_rate": 1.5687406217948775e-06, "loss": 0.2504739463329315, "step": 6240 }, { "epoch": 1.6572832293188156, "grad_norm": 1.1843269954841462, "learning_rate": 1.5663804491409506e-06, "loss": 0.2068580538034439, "step": 6241 }, { "epoch": 1.6575487983003585, "grad_norm": 1.45151426190796, "learning_rate": 1.5640219023894077e-06, "loss": 0.2448163628578186, "step": 6242 }, { "epoch": 1.6578143672819015, "grad_norm": 1.3391765527579818, "learning_rate": 1.5616649819949492e-06, "loss": 0.2514716386795044, "step": 6243 }, { "epoch": 1.6580799362634444, "grad_norm": 1.1884099966156902, "learning_rate": 1.559309688411962e-06, "loss": 0.2067629098892212, "step": 6244 }, { "epoch": 1.6583455052449874, "grad_norm": 1.2042735442206352, "learning_rate": 1.5569560220945168e-06, "loss": 0.22909750044345856, "step": 6245 }, { "epoch": 1.6586110742265303, "grad_norm": 1.4646403481954997, "learning_rate": 1.5546039834963745e-06, "loss": 0.203629732131958, "step": 6246 }, { "epoch": 1.6588766432080733, "grad_norm": 1.2050936311763847, "learning_rate": 1.552253573070981e-06, "loss": 0.21919086575508118, "step": 6247 }, { "epoch": 1.6591422121896162, "grad_norm": 1.4379501702554756, "learning_rate": 1.549904791271466e-06, "loss": 0.2535661458969116, "step": 6248 }, { "epoch": 1.6594077811711592, "grad_norm": 1.2609582047884877, "learning_rate": 1.5475576385506475e-06, "loss": 0.224460631608963, "step": 6249 }, { "epoch": 1.6596733501527021, "grad_norm": 1.2625738742925756, "learning_rate": 1.5452121153610288e-06, "loss": 0.21925818920135498, "step": 6250 }, { "epoch": 1.659938919134245, "grad_norm": 1.2787763694898493, "learning_rate": 1.5428682221547997e-06, "loss": 0.2100696563720703, "step": 6251 }, { "epoch": 1.660204488115788, "grad_norm": 1.3484219674096825, "learning_rate": 1.540525959383834e-06, "loss": 0.25982293486595154, "step": 6252 }, { "epoch": 1.660470057097331, "grad_norm": 1.2527966644905648, "learning_rate": 1.538185327499694e-06, "loss": 0.23615162074565887, "step": 6253 }, { "epoch": 1.660735626078874, "grad_norm": 1.2738910414784854, "learning_rate": 1.5358463269536218e-06, "loss": 0.2454022467136383, "step": 6254 }, { "epoch": 1.6610011950604169, "grad_norm": 1.3825181535789863, "learning_rate": 1.5335089581965556e-06, "loss": 0.2330605536699295, "step": 6255 }, { "epoch": 1.6612667640419598, "grad_norm": 1.2169082012465264, "learning_rate": 1.5311732216791087e-06, "loss": 0.23193006217479706, "step": 6256 }, { "epoch": 1.6615323330235028, "grad_norm": 1.2690481284418431, "learning_rate": 1.5288391178515838e-06, "loss": 0.23254770040512085, "step": 6257 }, { "epoch": 1.6617979020050457, "grad_norm": 1.2246821396199268, "learning_rate": 1.5265066471639701e-06, "loss": 0.23240572214126587, "step": 6258 }, { "epoch": 1.6620634709865887, "grad_norm": 1.3414134094293932, "learning_rate": 1.5241758100659386e-06, "loss": 0.2765730619430542, "step": 6259 }, { "epoch": 1.6623290399681316, "grad_norm": 1.2956291225041994, "learning_rate": 1.5218466070068472e-06, "loss": 0.26366496086120605, "step": 6260 }, { "epoch": 1.6625946089496746, "grad_norm": 1.240730160583952, "learning_rate": 1.5195190384357405e-06, "loss": 0.22322653234004974, "step": 6261 }, { "epoch": 1.6628601779312175, "grad_norm": 1.2433877123660553, "learning_rate": 1.5171931048013466e-06, "loss": 0.24144116044044495, "step": 6262 }, { "epoch": 1.6631257469127605, "grad_norm": 1.3783130308299147, "learning_rate": 1.5148688065520734e-06, "loss": 0.24559618532657623, "step": 6263 }, { "epoch": 1.6633913158943034, "grad_norm": 1.3258590224160887, "learning_rate": 1.5125461441360223e-06, "loss": 0.24337056279182434, "step": 6264 }, { "epoch": 1.6636568848758464, "grad_norm": 1.3292875380649603, "learning_rate": 1.5102251180009752e-06, "loss": 0.2733612358570099, "step": 6265 }, { "epoch": 1.6639224538573893, "grad_norm": 1.2329811544038785, "learning_rate": 1.5079057285943976e-06, "loss": 0.2116459757089615, "step": 6266 }, { "epoch": 1.6641880228389323, "grad_norm": 1.2335642813115397, "learning_rate": 1.5055879763634407e-06, "loss": 0.21221664547920227, "step": 6267 }, { "epoch": 1.6644535918204753, "grad_norm": 1.2500150658336624, "learning_rate": 1.503271861754939e-06, "loss": 0.21166589856147766, "step": 6268 }, { "epoch": 1.6647191608020182, "grad_norm": 1.5113123418333367, "learning_rate": 1.5009573852154136e-06, "loss": 0.2652161121368408, "step": 6269 }, { "epoch": 1.6649847297835612, "grad_norm": 1.262834880378694, "learning_rate": 1.4986445471910672e-06, "loss": 0.22142267227172852, "step": 6270 }, { "epoch": 1.665250298765104, "grad_norm": 1.4442965183949772, "learning_rate": 1.4963333481277874e-06, "loss": 0.2307332456111908, "step": 6271 }, { "epoch": 1.665515867746647, "grad_norm": 1.411326986781179, "learning_rate": 1.494023788471144e-06, "loss": 0.2669411897659302, "step": 6272 }, { "epoch": 1.66578143672819, "grad_norm": 1.2823998109594834, "learning_rate": 1.4917158686663992e-06, "loss": 0.2468804121017456, "step": 6273 }, { "epoch": 1.666047005709733, "grad_norm": 1.2639666166307362, "learning_rate": 1.4894095891584882e-06, "loss": 0.24152463674545288, "step": 6274 }, { "epoch": 1.666312574691276, "grad_norm": 1.098201760932299, "learning_rate": 1.4871049503920353e-06, "loss": 0.1966545283794403, "step": 6275 }, { "epoch": 1.6665781436728189, "grad_norm": 1.2773845282560163, "learning_rate": 1.4848019528113477e-06, "loss": 0.24772626161575317, "step": 6276 }, { "epoch": 1.6668437126543618, "grad_norm": 1.3731672204722256, "learning_rate": 1.4825005968604189e-06, "loss": 0.22138851881027222, "step": 6277 }, { "epoch": 1.6671092816359048, "grad_norm": 1.2245583238686863, "learning_rate": 1.4802008829829172e-06, "loss": 0.24345465004444122, "step": 6278 }, { "epoch": 1.6673748506174477, "grad_norm": 1.3209828849983516, "learning_rate": 1.477902811622205e-06, "loss": 0.22862716019153595, "step": 6279 }, { "epoch": 1.6676404195989907, "grad_norm": 1.2914770883474422, "learning_rate": 1.4756063832213207e-06, "loss": 0.2763083577156067, "step": 6280 }, { "epoch": 1.6679059885805336, "grad_norm": 1.3142139937070516, "learning_rate": 1.4733115982229885e-06, "loss": 0.24631357192993164, "step": 6281 }, { "epoch": 1.6681715575620768, "grad_norm": 1.322429969576976, "learning_rate": 1.4710184570696184e-06, "loss": 0.22650030255317688, "step": 6282 }, { "epoch": 1.6684371265436198, "grad_norm": 1.3243342318873437, "learning_rate": 1.4687269602033006e-06, "loss": 0.2455909103155136, "step": 6283 }, { "epoch": 1.6687026955251627, "grad_norm": 1.3711517369784783, "learning_rate": 1.4664371080658079e-06, "loss": 0.25625506043434143, "step": 6284 }, { "epoch": 1.6689682645067057, "grad_norm": 1.1450036681372322, "learning_rate": 1.4641489010985954e-06, "loss": 0.22178369760513306, "step": 6285 }, { "epoch": 1.6692338334882486, "grad_norm": 1.2644620602089436, "learning_rate": 1.4618623397428055e-06, "loss": 0.23936234414577484, "step": 6286 }, { "epoch": 1.6694994024697916, "grad_norm": 1.2667144776178243, "learning_rate": 1.459577424439258e-06, "loss": 0.21629829704761505, "step": 6287 }, { "epoch": 1.6697649714513345, "grad_norm": 1.3486786043134158, "learning_rate": 1.457294155628457e-06, "loss": 0.238427072763443, "step": 6288 }, { "epoch": 1.6700305404328775, "grad_norm": 1.412674472973442, "learning_rate": 1.4550125337505926e-06, "loss": 0.23168250918388367, "step": 6289 }, { "epoch": 1.6702961094144204, "grad_norm": 1.3185872633193214, "learning_rate": 1.45273255924553e-06, "loss": 0.25518402457237244, "step": 6290 }, { "epoch": 1.6705616783959634, "grad_norm": 1.2092220747685465, "learning_rate": 1.450454232552826e-06, "loss": 0.2488553822040558, "step": 6291 }, { "epoch": 1.6708272473775063, "grad_norm": 1.4309048190710245, "learning_rate": 1.448177554111716e-06, "loss": 0.2684085965156555, "step": 6292 }, { "epoch": 1.6710928163590493, "grad_norm": 1.3645105519242562, "learning_rate": 1.4459025243611124e-06, "loss": 0.24627447128295898, "step": 6293 }, { "epoch": 1.6713583853405922, "grad_norm": 1.2960987120962004, "learning_rate": 1.4436291437396156e-06, "loss": 0.24725376069545746, "step": 6294 }, { "epoch": 1.6716239543221352, "grad_norm": 1.2752333210419433, "learning_rate": 1.4413574126855067e-06, "loss": 0.23488914966583252, "step": 6295 }, { "epoch": 1.6718895233036781, "grad_norm": 1.2385365684534737, "learning_rate": 1.4390873316367492e-06, "loss": 0.2031177133321762, "step": 6296 }, { "epoch": 1.672155092285221, "grad_norm": 1.265889760948498, "learning_rate": 1.4368189010309874e-06, "loss": 0.25378018617630005, "step": 6297 }, { "epoch": 1.672420661266764, "grad_norm": 1.2443137764428682, "learning_rate": 1.434552121305548e-06, "loss": 0.21305282413959503, "step": 6298 }, { "epoch": 1.672686230248307, "grad_norm": 1.1925787762252436, "learning_rate": 1.432286992897437e-06, "loss": 0.20908987522125244, "step": 6299 }, { "epoch": 1.67295179922985, "grad_norm": 1.2228377563088515, "learning_rate": 1.4300235162433496e-06, "loss": 0.21945340931415558, "step": 6300 }, { "epoch": 1.6732173682113929, "grad_norm": 1.3659267409445854, "learning_rate": 1.4277616917796544e-06, "loss": 0.22096669673919678, "step": 6301 }, { "epoch": 1.6734829371929358, "grad_norm": 1.2773291306452106, "learning_rate": 1.425501519942406e-06, "loss": 0.2233850657939911, "step": 6302 }, { "epoch": 1.6737485061744788, "grad_norm": 1.2672720076411363, "learning_rate": 1.423243001167337e-06, "loss": 0.21432995796203613, "step": 6303 }, { "epoch": 1.6740140751560217, "grad_norm": 1.3864014459258447, "learning_rate": 1.4209861358898636e-06, "loss": 0.2649557590484619, "step": 6304 }, { "epoch": 1.6742796441375647, "grad_norm": 1.2642836811067808, "learning_rate": 1.418730924545083e-06, "loss": 0.24918347597122192, "step": 6305 }, { "epoch": 1.6745452131191076, "grad_norm": 1.3089175693989048, "learning_rate": 1.4164773675677745e-06, "loss": 0.24121029675006866, "step": 6306 }, { "epoch": 1.6748107821006506, "grad_norm": 1.2569762960026158, "learning_rate": 1.4142254653923949e-06, "loss": 0.24401789903640747, "step": 6307 }, { "epoch": 1.6750763510821935, "grad_norm": 1.3272546708188746, "learning_rate": 1.4119752184530867e-06, "loss": 0.2374853938817978, "step": 6308 }, { "epoch": 1.6753419200637365, "grad_norm": 1.2973848864698938, "learning_rate": 1.4097266271836695e-06, "loss": 0.2351088970899582, "step": 6309 }, { "epoch": 1.6756074890452797, "grad_norm": 1.301417674196528, "learning_rate": 1.407479692017647e-06, "loss": 0.19560754299163818, "step": 6310 }, { "epoch": 1.6758730580268226, "grad_norm": 1.390250023674765, "learning_rate": 1.405234413388199e-06, "loss": 0.24124252796173096, "step": 6311 }, { "epoch": 1.6761386270083656, "grad_norm": 1.3742469305206364, "learning_rate": 1.4029907917281903e-06, "loss": 0.2208215445280075, "step": 6312 }, { "epoch": 1.6764041959899085, "grad_norm": 1.2125662977366807, "learning_rate": 1.4007488274701653e-06, "loss": 0.23888292908668518, "step": 6313 }, { "epoch": 1.6766697649714515, "grad_norm": 1.2936432356109655, "learning_rate": 1.3985085210463479e-06, "loss": 0.24079063534736633, "step": 6314 }, { "epoch": 1.6769353339529944, "grad_norm": 1.2011852751375642, "learning_rate": 1.3962698728886414e-06, "loss": 0.18975606560707092, "step": 6315 }, { "epoch": 1.6772009029345374, "grad_norm": 1.322599968285396, "learning_rate": 1.3940328834286333e-06, "loss": 0.201214998960495, "step": 6316 }, { "epoch": 1.6774664719160803, "grad_norm": 1.2090909210103018, "learning_rate": 1.3917975530975836e-06, "loss": 0.20079322159290314, "step": 6317 }, { "epoch": 1.6777320408976233, "grad_norm": 1.2732868066143843, "learning_rate": 1.3895638823264447e-06, "loss": 0.23593586683273315, "step": 6318 }, { "epoch": 1.6779976098791662, "grad_norm": 1.3931846809533017, "learning_rate": 1.3873318715458383e-06, "loss": 0.26574259996414185, "step": 6319 }, { "epoch": 1.6782631788607092, "grad_norm": 1.252943610173436, "learning_rate": 1.3851015211860696e-06, "loss": 0.20573323965072632, "step": 6320 }, { "epoch": 1.6785287478422521, "grad_norm": 1.4484920974875073, "learning_rate": 1.3828728316771244e-06, "loss": 0.25610506534576416, "step": 6321 }, { "epoch": 1.678794316823795, "grad_norm": 1.330338299337135, "learning_rate": 1.380645803448668e-06, "loss": 0.2138693630695343, "step": 6322 }, { "epoch": 1.679059885805338, "grad_norm": 1.1479105398064924, "learning_rate": 1.3784204369300447e-06, "loss": 0.21522866189479828, "step": 6323 }, { "epoch": 1.679325454786881, "grad_norm": 1.441538971613898, "learning_rate": 1.376196732550279e-06, "loss": 0.25622743368148804, "step": 6324 }, { "epoch": 1.679591023768424, "grad_norm": 1.354050705773023, "learning_rate": 1.3739746907380757e-06, "loss": 0.18025386333465576, "step": 6325 }, { "epoch": 1.679856592749967, "grad_norm": 1.1665775097977176, "learning_rate": 1.3717543119218168e-06, "loss": 0.18785078823566437, "step": 6326 }, { "epoch": 1.6801221617315099, "grad_norm": 1.3771154706722653, "learning_rate": 1.3695355965295653e-06, "loss": 0.24682481586933136, "step": 6327 }, { "epoch": 1.6803877307130528, "grad_norm": 1.2994385931646761, "learning_rate": 1.3673185449890647e-06, "loss": 0.2193487137556076, "step": 6328 }, { "epoch": 1.6806532996945958, "grad_norm": 1.2960131024456552, "learning_rate": 1.3651031577277351e-06, "loss": 0.24963265657424927, "step": 6329 }, { "epoch": 1.6809188686761387, "grad_norm": 1.2714587333981215, "learning_rate": 1.3628894351726785e-06, "loss": 0.21473057568073273, "step": 6330 }, { "epoch": 1.6811844376576817, "grad_norm": 1.4508064568072063, "learning_rate": 1.3606773777506731e-06, "loss": 0.2539534866809845, "step": 6331 }, { "epoch": 1.6814500066392246, "grad_norm": 1.5049767699399101, "learning_rate": 1.3584669858881771e-06, "loss": 0.2671799659729004, "step": 6332 }, { "epoch": 1.6817155756207676, "grad_norm": 1.211295376852026, "learning_rate": 1.3562582600113295e-06, "loss": 0.24291013181209564, "step": 6333 }, { "epoch": 1.6819811446023105, "grad_norm": 1.3672105989135315, "learning_rate": 1.354051200545946e-06, "loss": 0.24249233305454254, "step": 6334 }, { "epoch": 1.6822467135838535, "grad_norm": 1.2855842039831968, "learning_rate": 1.351845807917519e-06, "loss": 0.21647261083126068, "step": 6335 }, { "epoch": 1.6825122825653964, "grad_norm": 1.2764605035604815, "learning_rate": 1.349642082551227e-06, "loss": 0.2348332703113556, "step": 6336 }, { "epoch": 1.6827778515469394, "grad_norm": 1.3049495455341118, "learning_rate": 1.34744002487192e-06, "loss": 0.22503259778022766, "step": 6337 }, { "epoch": 1.6830434205284823, "grad_norm": 1.3236190891705721, "learning_rate": 1.3452396353041286e-06, "loss": 0.2397763580083847, "step": 6338 }, { "epoch": 1.6833089895100253, "grad_norm": 1.156426557066381, "learning_rate": 1.3430409142720624e-06, "loss": 0.23345956206321716, "step": 6339 }, { "epoch": 1.6835745584915682, "grad_norm": 1.1932341696009043, "learning_rate": 1.3408438621996088e-06, "loss": 0.19660598039627075, "step": 6340 }, { "epoch": 1.6838401274731112, "grad_norm": 1.262928020262074, "learning_rate": 1.3386484795103327e-06, "loss": 0.19148695468902588, "step": 6341 }, { "epoch": 1.6841056964546541, "grad_norm": 1.2112774084067142, "learning_rate": 1.3364547666274819e-06, "loss": 0.2078169733285904, "step": 6342 }, { "epoch": 1.684371265436197, "grad_norm": 1.3703852622718744, "learning_rate": 1.3342627239739715e-06, "loss": 0.23122575879096985, "step": 6343 }, { "epoch": 1.68463683441774, "grad_norm": 1.350523705417422, "learning_rate": 1.3320723519724032e-06, "loss": 0.2744083106517792, "step": 6344 }, { "epoch": 1.684902403399283, "grad_norm": 1.3462449472678248, "learning_rate": 1.3298836510450597e-06, "loss": 0.26361098885536194, "step": 6345 }, { "epoch": 1.685167972380826, "grad_norm": 1.2550654654863131, "learning_rate": 1.3276966216138932e-06, "loss": 0.21833205223083496, "step": 6346 }, { "epoch": 1.685433541362369, "grad_norm": 1.306325021058624, "learning_rate": 1.3255112641005374e-06, "loss": 0.22075100243091583, "step": 6347 }, { "epoch": 1.6856991103439118, "grad_norm": 1.4286786068270776, "learning_rate": 1.3233275789263034e-06, "loss": 0.24352343380451202, "step": 6348 }, { "epoch": 1.6859646793254548, "grad_norm": 1.5476580340833483, "learning_rate": 1.3211455665121808e-06, "loss": 0.2331303060054779, "step": 6349 }, { "epoch": 1.6862302483069977, "grad_norm": 1.398559395598541, "learning_rate": 1.3189652272788356e-06, "loss": 0.2511689066886902, "step": 6350 }, { "epoch": 1.6864958172885407, "grad_norm": 1.1704691076383393, "learning_rate": 1.3167865616466113e-06, "loss": 0.18535873293876648, "step": 6351 }, { "epoch": 1.6867613862700837, "grad_norm": 1.3097469055952822, "learning_rate": 1.3146095700355289e-06, "loss": 0.23924914002418518, "step": 6352 }, { "epoch": 1.6870269552516266, "grad_norm": 1.1591649275755667, "learning_rate": 1.3124342528652845e-06, "loss": 0.19710025191307068, "step": 6353 }, { "epoch": 1.6872925242331696, "grad_norm": 1.393629731020981, "learning_rate": 1.3102606105552585e-06, "loss": 0.21439281105995178, "step": 6354 }, { "epoch": 1.6875580932147125, "grad_norm": 1.3051512833867451, "learning_rate": 1.3080886435245e-06, "loss": 0.2647722363471985, "step": 6355 }, { "epoch": 1.6878236621962555, "grad_norm": 2.6038516980586355, "learning_rate": 1.3059183521917396e-06, "loss": 0.2202019840478897, "step": 6356 }, { "epoch": 1.6880892311777984, "grad_norm": 1.3022104210295473, "learning_rate": 1.3037497369753871e-06, "loss": 0.25833001732826233, "step": 6357 }, { "epoch": 1.6883548001593414, "grad_norm": 1.1906464618269579, "learning_rate": 1.3015827982935192e-06, "loss": 0.19984321296215057, "step": 6358 }, { "epoch": 1.6886203691408843, "grad_norm": 1.3347301103088016, "learning_rate": 1.2994175365638996e-06, "loss": 0.2190552055835724, "step": 6359 }, { "epoch": 1.6888859381224273, "grad_norm": 1.265894337049371, "learning_rate": 1.2972539522039652e-06, "loss": 0.26262593269348145, "step": 6360 }, { "epoch": 1.6891515071039702, "grad_norm": 1.285416913994909, "learning_rate": 1.2950920456308292e-06, "loss": 0.2665651738643646, "step": 6361 }, { "epoch": 1.6894170760855132, "grad_norm": 1.213162722605336, "learning_rate": 1.2929318172612803e-06, "loss": 0.22369208931922913, "step": 6362 }, { "epoch": 1.6896826450670561, "grad_norm": 1.2234073567984471, "learning_rate": 1.2907732675117878e-06, "loss": 0.21063543856143951, "step": 6363 }, { "epoch": 1.689948214048599, "grad_norm": 1.3608426715056905, "learning_rate": 1.2886163967984944e-06, "loss": 0.2303045690059662, "step": 6364 }, { "epoch": 1.690213783030142, "grad_norm": 1.1473656525455074, "learning_rate": 1.2864612055372182e-06, "loss": 0.20185884833335876, "step": 6365 }, { "epoch": 1.690479352011685, "grad_norm": 1.2673026097919315, "learning_rate": 1.284307694143455e-06, "loss": 0.22900527715682983, "step": 6366 }, { "epoch": 1.690744920993228, "grad_norm": 1.2373147270640896, "learning_rate": 1.282155863032377e-06, "loss": 0.21405862271785736, "step": 6367 }, { "epoch": 1.6910104899747709, "grad_norm": 1.3139606008654157, "learning_rate": 1.2800057126188304e-06, "loss": 0.26143258810043335, "step": 6368 }, { "epoch": 1.6912760589563138, "grad_norm": 1.319330305112879, "learning_rate": 1.2778572433173397e-06, "loss": 0.24437926709651947, "step": 6369 }, { "epoch": 1.6915416279378568, "grad_norm": 1.1954155676954614, "learning_rate": 1.275710455542104e-06, "loss": 0.24862337112426758, "step": 6370 }, { "epoch": 1.6918071969193997, "grad_norm": 1.2264107157331223, "learning_rate": 1.2735653497069978e-06, "loss": 0.2146604359149933, "step": 6371 }, { "epoch": 1.6920727659009427, "grad_norm": 1.3217815480091177, "learning_rate": 1.2714219262255777e-06, "loss": 0.2525256872177124, "step": 6372 }, { "epoch": 1.6923383348824856, "grad_norm": 1.289957068010404, "learning_rate": 1.2692801855110638e-06, "loss": 0.23462912440299988, "step": 6373 }, { "epoch": 1.6926039038640286, "grad_norm": 1.3468375801476438, "learning_rate": 1.2671401279763595e-06, "loss": 0.21551170945167542, "step": 6374 }, { "epoch": 1.6928694728455715, "grad_norm": 1.4457180200872415, "learning_rate": 1.2650017540340454e-06, "loss": 0.24094407260417938, "step": 6375 }, { "epoch": 1.6931350418271145, "grad_norm": 1.2168123169553724, "learning_rate": 1.2628650640963736e-06, "loss": 0.23101133108139038, "step": 6376 }, { "epoch": 1.6934006108086574, "grad_norm": 1.4830646801660192, "learning_rate": 1.2607300585752724e-06, "loss": 0.2513899803161621, "step": 6377 }, { "epoch": 1.6936661797902004, "grad_norm": 1.417144859782869, "learning_rate": 1.258596737882345e-06, "loss": 0.2490600198507309, "step": 6378 }, { "epoch": 1.6939317487717434, "grad_norm": 1.3403225341914131, "learning_rate": 1.256465102428872e-06, "loss": 0.25767675042152405, "step": 6379 }, { "epoch": 1.6941973177532863, "grad_norm": 1.2775246675329248, "learning_rate": 1.254335152625804e-06, "loss": 0.2231348305940628, "step": 6380 }, { "epoch": 1.6944628867348293, "grad_norm": 1.4410136520558763, "learning_rate": 1.2522068888837758e-06, "loss": 0.25873979926109314, "step": 6381 }, { "epoch": 1.6947284557163722, "grad_norm": 1.4111151195923193, "learning_rate": 1.2500803116130887e-06, "loss": 0.2848423421382904, "step": 6382 }, { "epoch": 1.6949940246979152, "grad_norm": 1.1110125207312456, "learning_rate": 1.247955421223721e-06, "loss": 0.21343804895877838, "step": 6383 }, { "epoch": 1.695259593679458, "grad_norm": 1.3025436504976033, "learning_rate": 1.245832218125328e-06, "loss": 0.23080062866210938, "step": 6384 }, { "epoch": 1.695525162661001, "grad_norm": 1.3020267493975237, "learning_rate": 1.2437107027272376e-06, "loss": 0.2397225797176361, "step": 6385 }, { "epoch": 1.695790731642544, "grad_norm": 1.3120966348534624, "learning_rate": 1.2415908754384532e-06, "loss": 0.22798654437065125, "step": 6386 }, { "epoch": 1.696056300624087, "grad_norm": 1.3399304326822938, "learning_rate": 1.2394727366676518e-06, "loss": 0.2534061074256897, "step": 6387 }, { "epoch": 1.69632186960563, "grad_norm": 1.2269756633197797, "learning_rate": 1.2373562868231858e-06, "loss": 0.2127036452293396, "step": 6388 }, { "epoch": 1.6965874385871729, "grad_norm": 1.341525895521795, "learning_rate": 1.2352415263130813e-06, "loss": 0.22341205179691315, "step": 6389 }, { "epoch": 1.6968530075687158, "grad_norm": 1.316572711467383, "learning_rate": 1.2331284555450406e-06, "loss": 0.2435426563024521, "step": 6390 }, { "epoch": 1.6971185765502588, "grad_norm": 1.3203864338710647, "learning_rate": 1.2310170749264383e-06, "loss": 0.24652531743049622, "step": 6391 }, { "epoch": 1.6973841455318017, "grad_norm": 1.251250109623578, "learning_rate": 1.228907384864323e-06, "loss": 0.24172671139240265, "step": 6392 }, { "epoch": 1.6976497145133447, "grad_norm": 1.293405881850453, "learning_rate": 1.2267993857654182e-06, "loss": 0.21534420549869537, "step": 6393 }, { "epoch": 1.6979152834948879, "grad_norm": 2.1259133697182575, "learning_rate": 1.2246930780361221e-06, "loss": 0.2617778182029724, "step": 6394 }, { "epoch": 1.6981808524764308, "grad_norm": 1.1793022391098469, "learning_rate": 1.2225884620825046e-06, "loss": 0.20388583838939667, "step": 6395 }, { "epoch": 1.6984464214579738, "grad_norm": 1.289033320527503, "learning_rate": 1.220485538310312e-06, "loss": 0.23714327812194824, "step": 6396 }, { "epoch": 1.6987119904395167, "grad_norm": 1.3592785135687544, "learning_rate": 1.2183843071249634e-06, "loss": 0.2495463341474533, "step": 6397 }, { "epoch": 1.6989775594210597, "grad_norm": 1.2730498991215184, "learning_rate": 1.2162847689315483e-06, "loss": 0.2419012188911438, "step": 6398 }, { "epoch": 1.6992431284026026, "grad_norm": 1.2226640861076554, "learning_rate": 1.214186924134838e-06, "loss": 0.23392438888549805, "step": 6399 }, { "epoch": 1.6995086973841456, "grad_norm": 1.3210458214149883, "learning_rate": 1.2120907731392695e-06, "loss": 0.22855526208877563, "step": 6400 }, { "epoch": 1.6997742663656885, "grad_norm": 1.2152782326664608, "learning_rate": 1.2099963163489558e-06, "loss": 0.22393949329853058, "step": 6401 }, { "epoch": 1.7000398353472315, "grad_norm": 1.3855673404796554, "learning_rate": 1.2079035541676832e-06, "loss": 0.2539960741996765, "step": 6402 }, { "epoch": 1.7003054043287744, "grad_norm": 1.3330270743987416, "learning_rate": 1.2058124869989129e-06, "loss": 0.23716852068901062, "step": 6403 }, { "epoch": 1.7005709733103174, "grad_norm": 1.347782549245642, "learning_rate": 1.2037231152457773e-06, "loss": 0.24658545851707458, "step": 6404 }, { "epoch": 1.7008365422918603, "grad_norm": 1.2494300647338343, "learning_rate": 1.201635439311083e-06, "loss": 0.2316630333662033, "step": 6405 }, { "epoch": 1.7011021112734033, "grad_norm": 1.0834142572483991, "learning_rate": 1.1995494595973089e-06, "loss": 0.20434345304965973, "step": 6406 }, { "epoch": 1.7013676802549462, "grad_norm": 1.3445140884275912, "learning_rate": 1.197465176506607e-06, "loss": 0.2585931420326233, "step": 6407 }, { "epoch": 1.7016332492364892, "grad_norm": 1.2567668360829787, "learning_rate": 1.1953825904408033e-06, "loss": 0.23007069528102875, "step": 6408 }, { "epoch": 1.7018988182180321, "grad_norm": 1.2770978609777501, "learning_rate": 1.1933017018013948e-06, "loss": 0.21822810173034668, "step": 6409 }, { "epoch": 1.702164387199575, "grad_norm": 1.2875752799081717, "learning_rate": 1.1912225109895526e-06, "loss": 0.241228848695755, "step": 6410 }, { "epoch": 1.702429956181118, "grad_norm": 1.3509759956774154, "learning_rate": 1.1891450184061203e-06, "loss": 0.28803908824920654, "step": 6411 }, { "epoch": 1.702695525162661, "grad_norm": 1.3018941028318989, "learning_rate": 1.1870692244516147e-06, "loss": 0.2387516349554062, "step": 6412 }, { "epoch": 1.702961094144204, "grad_norm": 1.2538051398244094, "learning_rate": 1.1849951295262242e-06, "loss": 0.19774140417575836, "step": 6413 }, { "epoch": 1.7032266631257469, "grad_norm": 1.269953409174644, "learning_rate": 1.1829227340298088e-06, "loss": 0.22842247784137726, "step": 6414 }, { "epoch": 1.7034922321072898, "grad_norm": 1.1987695898844528, "learning_rate": 1.1808520383619015e-06, "loss": 0.21994739770889282, "step": 6415 }, { "epoch": 1.7037578010888328, "grad_norm": 1.2719096074486522, "learning_rate": 1.1787830429217084e-06, "loss": 0.22328051924705505, "step": 6416 }, { "epoch": 1.7040233700703757, "grad_norm": 1.3583279531737376, "learning_rate": 1.1767157481081092e-06, "loss": 0.26704326272010803, "step": 6417 }, { "epoch": 1.7042889390519187, "grad_norm": 1.2796404749500392, "learning_rate": 1.174650154319653e-06, "loss": 0.2148481160402298, "step": 6418 }, { "epoch": 1.7045545080334616, "grad_norm": 1.1912742761204351, "learning_rate": 1.1725862619545625e-06, "loss": 0.21731218695640564, "step": 6419 }, { "epoch": 1.7048200770150046, "grad_norm": 1.3502505047017879, "learning_rate": 1.1705240714107301e-06, "loss": 0.20832043886184692, "step": 6420 }, { "epoch": 1.7050856459965475, "grad_norm": 1.2922565511595965, "learning_rate": 1.1684635830857249e-06, "loss": 0.21739046275615692, "step": 6421 }, { "epoch": 1.7053512149780907, "grad_norm": 1.3041232291639149, "learning_rate": 1.1664047973767811e-06, "loss": 0.23972246050834656, "step": 6422 }, { "epoch": 1.7056167839596337, "grad_norm": 1.2420174603299015, "learning_rate": 1.1643477146808092e-06, "loss": 0.2471289187669754, "step": 6423 }, { "epoch": 1.7058823529411766, "grad_norm": 1.2148999014811244, "learning_rate": 1.1622923353943916e-06, "loss": 0.2014283537864685, "step": 6424 }, { "epoch": 1.7061479219227196, "grad_norm": 1.1799937956162947, "learning_rate": 1.1602386599137782e-06, "loss": 0.21680915355682373, "step": 6425 }, { "epoch": 1.7064134909042625, "grad_norm": 1.2221660563202492, "learning_rate": 1.158186688634898e-06, "loss": 0.2101205736398697, "step": 6426 }, { "epoch": 1.7066790598858055, "grad_norm": 1.2879683442276364, "learning_rate": 1.1561364219533444e-06, "loss": 0.22114071249961853, "step": 6427 }, { "epoch": 1.7069446288673484, "grad_norm": 1.2910925736026095, "learning_rate": 1.1540878602643858e-06, "loss": 0.20608706772327423, "step": 6428 }, { "epoch": 1.7072101978488914, "grad_norm": 1.2486066037383718, "learning_rate": 1.1520410039629593e-06, "loss": 0.2247905433177948, "step": 6429 }, { "epoch": 1.7074757668304343, "grad_norm": 1.1718742986299986, "learning_rate": 1.1499958534436751e-06, "loss": 0.22623226046562195, "step": 6430 }, { "epoch": 1.7077413358119773, "grad_norm": 1.2776253558863635, "learning_rate": 1.1479524091008142e-06, "loss": 0.2063906192779541, "step": 6431 }, { "epoch": 1.7080069047935202, "grad_norm": 1.4035125322254989, "learning_rate": 1.1459106713283286e-06, "loss": 0.2787795960903168, "step": 6432 }, { "epoch": 1.7082724737750632, "grad_norm": 1.2096674582385407, "learning_rate": 1.1438706405198419e-06, "loss": 0.23090440034866333, "step": 6433 }, { "epoch": 1.7085380427566061, "grad_norm": 1.288319877687408, "learning_rate": 1.141832317068645e-06, "loss": 0.23690670728683472, "step": 6434 }, { "epoch": 1.708803611738149, "grad_norm": 1.2499926164056985, "learning_rate": 1.1397957013677064e-06, "loss": 0.209202378988266, "step": 6435 }, { "epoch": 1.709069180719692, "grad_norm": 1.2311768368116, "learning_rate": 1.1377607938096635e-06, "loss": 0.22541575133800507, "step": 6436 }, { "epoch": 1.709334749701235, "grad_norm": 1.3505125458173146, "learning_rate": 1.1357275947868162e-06, "loss": 0.2460884153842926, "step": 6437 }, { "epoch": 1.709600318682778, "grad_norm": 1.195327574575731, "learning_rate": 1.1336961046911443e-06, "loss": 0.21967202425003052, "step": 6438 }, { "epoch": 1.709865887664321, "grad_norm": 1.346022527152768, "learning_rate": 1.1316663239142954e-06, "loss": 0.23619329929351807, "step": 6439 }, { "epoch": 1.7101314566458639, "grad_norm": 1.3033234842407981, "learning_rate": 1.129638252847587e-06, "loss": 0.24563436210155487, "step": 6440 }, { "epoch": 1.7103970256274068, "grad_norm": 1.3840933006905622, "learning_rate": 1.1276118918820068e-06, "loss": 0.25508859753608704, "step": 6441 }, { "epoch": 1.7106625946089498, "grad_norm": 1.3406379279103604, "learning_rate": 1.1255872414082136e-06, "loss": 0.24761545658111572, "step": 6442 }, { "epoch": 1.7109281635904927, "grad_norm": 4.632018568484065, "learning_rate": 1.1235643018165344e-06, "loss": 0.2355962097644806, "step": 6443 }, { "epoch": 1.7111937325720357, "grad_norm": 1.3274457548497118, "learning_rate": 1.1215430734969723e-06, "loss": 0.2534273862838745, "step": 6444 }, { "epoch": 1.7114593015535786, "grad_norm": 1.2846712625276346, "learning_rate": 1.1195235568391938e-06, "loss": 0.2756424844264984, "step": 6445 }, { "epoch": 1.7117248705351216, "grad_norm": 1.2126020570228762, "learning_rate": 1.1175057522325383e-06, "loss": 0.2198309451341629, "step": 6446 }, { "epoch": 1.7119904395166645, "grad_norm": 1.2343738377988847, "learning_rate": 1.1154896600660136e-06, "loss": 0.21767666935920715, "step": 6447 }, { "epoch": 1.7122560084982075, "grad_norm": 1.4965895030859304, "learning_rate": 1.1134752807283e-06, "loss": 0.2679128348827362, "step": 6448 }, { "epoch": 1.7125215774797504, "grad_norm": 1.292131622576057, "learning_rate": 1.1114626146077457e-06, "loss": 0.2268792986869812, "step": 6449 }, { "epoch": 1.7127871464612934, "grad_norm": 1.224637524783582, "learning_rate": 1.109451662092369e-06, "loss": 0.21585378050804138, "step": 6450 }, { "epoch": 1.7130527154428363, "grad_norm": 1.3157463227820392, "learning_rate": 1.1074424235698567e-06, "loss": 0.2258647382259369, "step": 6451 }, { "epoch": 1.7133182844243793, "grad_norm": 1.3742268123946286, "learning_rate": 1.1054348994275677e-06, "loss": 0.2456682175397873, "step": 6452 }, { "epoch": 1.7135838534059222, "grad_norm": 1.4853732102975625, "learning_rate": 1.1034290900525279e-06, "loss": 0.22897745668888092, "step": 6453 }, { "epoch": 1.7138494223874652, "grad_norm": 1.133114987282755, "learning_rate": 1.101424995831435e-06, "loss": 0.1910650134086609, "step": 6454 }, { "epoch": 1.7141149913690081, "grad_norm": 1.2728981818199352, "learning_rate": 1.0994226171506529e-06, "loss": 0.2519158720970154, "step": 6455 }, { "epoch": 1.714380560350551, "grad_norm": 1.259309948081026, "learning_rate": 1.0974219543962184e-06, "loss": 0.24191951751708984, "step": 6456 }, { "epoch": 1.714646129332094, "grad_norm": 1.3159238719963862, "learning_rate": 1.0954230079538352e-06, "loss": 0.2560814619064331, "step": 6457 }, { "epoch": 1.714911698313637, "grad_norm": 1.2640782659289207, "learning_rate": 1.0934257782088763e-06, "loss": 0.22969035804271698, "step": 6458 }, { "epoch": 1.71517726729518, "grad_norm": 1.3584917562872394, "learning_rate": 1.0914302655463837e-06, "loss": 0.26114046573638916, "step": 6459 }, { "epoch": 1.715442836276723, "grad_norm": 1.2235177756044688, "learning_rate": 1.0894364703510685e-06, "loss": 0.21457752585411072, "step": 6460 }, { "epoch": 1.7157084052582658, "grad_norm": 1.164559577491723, "learning_rate": 1.0874443930073098e-06, "loss": 0.19998760521411896, "step": 6461 }, { "epoch": 1.7159739742398088, "grad_norm": 1.2278101157674874, "learning_rate": 1.0854540338991615e-06, "loss": 0.2379671037197113, "step": 6462 }, { "epoch": 1.7162395432213517, "grad_norm": 1.3827652808641404, "learning_rate": 1.0834653934103367e-06, "loss": 0.2236609309911728, "step": 6463 }, { "epoch": 1.7165051122028947, "grad_norm": 1.2673726734268553, "learning_rate": 1.0814784719242234e-06, "loss": 0.22507379949092865, "step": 6464 }, { "epoch": 1.7167706811844377, "grad_norm": 1.3174434539455087, "learning_rate": 1.079493269823877e-06, "loss": 0.22138816118240356, "step": 6465 }, { "epoch": 1.7170362501659806, "grad_norm": 1.3880746036316538, "learning_rate": 1.0775097874920204e-06, "loss": 0.227338969707489, "step": 6466 }, { "epoch": 1.7173018191475236, "grad_norm": 1.2588670866885754, "learning_rate": 1.0755280253110466e-06, "loss": 0.23694375157356262, "step": 6467 }, { "epoch": 1.7175673881290665, "grad_norm": 1.365387614603678, "learning_rate": 1.0735479836630136e-06, "loss": 0.26219409704208374, "step": 6468 }, { "epoch": 1.7178329571106095, "grad_norm": 1.20539748496599, "learning_rate": 1.0715696629296524e-06, "loss": 0.22215887904167175, "step": 6469 }, { "epoch": 1.7180985260921524, "grad_norm": 1.3543481839639284, "learning_rate": 1.0695930634923602e-06, "loss": 0.25434768199920654, "step": 6470 }, { "epoch": 1.7183640950736954, "grad_norm": 1.1809119822759757, "learning_rate": 1.0676181857321998e-06, "loss": 0.2092076987028122, "step": 6471 }, { "epoch": 1.7186296640552383, "grad_norm": 1.330663320526799, "learning_rate": 1.0656450300299048e-06, "loss": 0.2710237503051758, "step": 6472 }, { "epoch": 1.7188952330367813, "grad_norm": 1.2715188060789504, "learning_rate": 1.0636735967658785e-06, "loss": 0.2533886432647705, "step": 6473 }, { "epoch": 1.7191608020183242, "grad_norm": 1.2174102707049457, "learning_rate": 1.0617038863201878e-06, "loss": 0.2545754909515381, "step": 6474 }, { "epoch": 1.7194263709998672, "grad_norm": 1.2560655592374788, "learning_rate": 1.0597358990725703e-06, "loss": 0.26010993123054504, "step": 6475 }, { "epoch": 1.7196919399814101, "grad_norm": 1.2632076366916114, "learning_rate": 1.0577696354024314e-06, "loss": 0.22529907524585724, "step": 6476 }, { "epoch": 1.719957508962953, "grad_norm": 1.157260113755536, "learning_rate": 1.0558050956888433e-06, "loss": 0.1897469311952591, "step": 6477 }, { "epoch": 1.720223077944496, "grad_norm": 1.31651804495616, "learning_rate": 1.0538422803105441e-06, "loss": 0.24663670361042023, "step": 6478 }, { "epoch": 1.720488646926039, "grad_norm": 1.343902959790046, "learning_rate": 1.0518811896459423e-06, "loss": 0.2462892383337021, "step": 6479 }, { "epoch": 1.720754215907582, "grad_norm": 1.117431347891292, "learning_rate": 1.0499218240731157e-06, "loss": 0.18652144074440002, "step": 6480 }, { "epoch": 1.7210197848891249, "grad_norm": 1.2234103731079693, "learning_rate": 1.0479641839698052e-06, "loss": 0.24614468216896057, "step": 6481 }, { "epoch": 1.7212853538706678, "grad_norm": 1.2632894895468527, "learning_rate": 1.046008269713421e-06, "loss": 0.27925312519073486, "step": 6482 }, { "epoch": 1.7215509228522108, "grad_norm": 1.3426272887839532, "learning_rate": 1.0440540816810395e-06, "loss": 0.2626710832118988, "step": 6483 }, { "epoch": 1.7218164918337537, "grad_norm": 1.2982212521269376, "learning_rate": 1.042101620249405e-06, "loss": 0.23039895296096802, "step": 6484 }, { "epoch": 1.7220820608152967, "grad_norm": 1.2564768074123291, "learning_rate": 1.0401508857949295e-06, "loss": 0.19559775292873383, "step": 6485 }, { "epoch": 1.7223476297968396, "grad_norm": 1.222035384596064, "learning_rate": 1.0382018786936943e-06, "loss": 0.24982990324497223, "step": 6486 }, { "epoch": 1.7226131987783826, "grad_norm": 1.356827120814655, "learning_rate": 1.0362545993214402e-06, "loss": 0.26212313771247864, "step": 6487 }, { "epoch": 1.7228787677599255, "grad_norm": 1.2583181328160484, "learning_rate": 1.0343090480535788e-06, "loss": 0.22827446460723877, "step": 6488 }, { "epoch": 1.7231443367414685, "grad_norm": 1.3650470156220376, "learning_rate": 1.032365225265196e-06, "loss": 0.2710435390472412, "step": 6489 }, { "epoch": 1.7234099057230114, "grad_norm": 1.560435811081079, "learning_rate": 1.030423131331033e-06, "loss": 0.25116702914237976, "step": 6490 }, { "epoch": 1.7236754747045544, "grad_norm": 1.2598369270207033, "learning_rate": 1.0284827666255048e-06, "loss": 0.1980481743812561, "step": 6491 }, { "epoch": 1.7239410436860974, "grad_norm": 1.3159445178277585, "learning_rate": 1.0265441315226898e-06, "loss": 0.2777971625328064, "step": 6492 }, { "epoch": 1.7242066126676403, "grad_norm": 1.3290253215924488, "learning_rate": 1.0246072263963336e-06, "loss": 0.23041702806949615, "step": 6493 }, { "epoch": 1.7244721816491833, "grad_norm": 1.2761862568921072, "learning_rate": 1.0226720516198495e-06, "loss": 0.21428728103637695, "step": 6494 }, { "epoch": 1.7247377506307262, "grad_norm": 1.2965072992275601, "learning_rate": 1.020738607566316e-06, "loss": 0.22577518224716187, "step": 6495 }, { "epoch": 1.7250033196122692, "grad_norm": 1.2489154030372867, "learning_rate": 1.0188068946084783e-06, "loss": 0.21080979704856873, "step": 6496 }, { "epoch": 1.7252688885938121, "grad_norm": 1.1941107816051266, "learning_rate": 1.0168769131187472e-06, "loss": 0.21232858300209045, "step": 6497 }, { "epoch": 1.725534457575355, "grad_norm": 1.3035016990745079, "learning_rate": 1.0149486634692019e-06, "loss": 0.25525614619255066, "step": 6498 }, { "epoch": 1.725800026556898, "grad_norm": 1.2742578592858531, "learning_rate": 1.0130221460315858e-06, "loss": 0.26291778683662415, "step": 6499 }, { "epoch": 1.726065595538441, "grad_norm": 1.1747703502148148, "learning_rate": 1.011097361177308e-06, "loss": 0.21314382553100586, "step": 6500 }, { "epoch": 1.726331164519984, "grad_norm": 1.3027182735878766, "learning_rate": 1.0091743092774474e-06, "loss": 0.2106419950723648, "step": 6501 }, { "epoch": 1.7265967335015269, "grad_norm": 1.2753206037657139, "learning_rate": 1.0072529907027407e-06, "loss": 0.22456032037734985, "step": 6502 }, { "epoch": 1.7268623024830698, "grad_norm": 2.1059170179774807, "learning_rate": 1.0053334058235975e-06, "loss": 0.2301097959280014, "step": 6503 }, { "epoch": 1.7271278714646128, "grad_norm": 1.4062353485935484, "learning_rate": 1.0034155550100922e-06, "loss": 0.21207617223262787, "step": 6504 }, { "epoch": 1.7273934404461557, "grad_norm": 1.3379977808716934, "learning_rate": 1.0014994386319621e-06, "loss": 0.24378664791584015, "step": 6505 }, { "epoch": 1.727659009427699, "grad_norm": 1.402146752515372, "learning_rate": 9.995850570586107e-07, "loss": 0.24914023280143738, "step": 6506 }, { "epoch": 1.7279245784092419, "grad_norm": 1.2949159811476645, "learning_rate": 9.976724106591128e-07, "loss": 0.23235921561717987, "step": 6507 }, { "epoch": 1.7281901473907848, "grad_norm": 1.295455173430887, "learning_rate": 9.957614998022015e-07, "loss": 0.22441455721855164, "step": 6508 }, { "epoch": 1.7284557163723278, "grad_norm": 1.4195770964317103, "learning_rate": 9.93852324856278e-07, "loss": 0.2559920847415924, "step": 6509 }, { "epoch": 1.7287212853538707, "grad_norm": 1.2106097617539484, "learning_rate": 9.919448861894088e-07, "loss": 0.21378321945667267, "step": 6510 }, { "epoch": 1.7289868543354137, "grad_norm": 1.223247289196822, "learning_rate": 9.900391841693247e-07, "loss": 0.23622627556324005, "step": 6511 }, { "epoch": 1.7292524233169566, "grad_norm": 1.2354266119490807, "learning_rate": 9.88135219163424e-07, "loss": 0.217013418674469, "step": 6512 }, { "epoch": 1.7295179922984996, "grad_norm": 1.342902376475473, "learning_rate": 9.862329915387669e-07, "loss": 0.2221517264842987, "step": 6513 }, { "epoch": 1.7297835612800425, "grad_norm": 1.3136496001371853, "learning_rate": 9.84332501662083e-07, "loss": 0.24377144873142242, "step": 6514 }, { "epoch": 1.7300491302615855, "grad_norm": 1.2574348774674273, "learning_rate": 9.824337498997593e-07, "loss": 0.23368799686431885, "step": 6515 }, { "epoch": 1.7303146992431284, "grad_norm": 1.1949944292188206, "learning_rate": 9.805367366178608e-07, "loss": 0.23061680793762207, "step": 6516 }, { "epoch": 1.7305802682246714, "grad_norm": 1.2715048223769598, "learning_rate": 9.78641462182104e-07, "loss": 0.24157950282096863, "step": 6517 }, { "epoch": 1.7308458372062143, "grad_norm": 1.3248165077712177, "learning_rate": 9.76747926957875e-07, "loss": 0.2122395783662796, "step": 6518 }, { "epoch": 1.7311114061877573, "grad_norm": 1.320024810941134, "learning_rate": 9.748561313102266e-07, "loss": 0.2351134717464447, "step": 6519 }, { "epoch": 1.7313769751693002, "grad_norm": 1.2421546716744003, "learning_rate": 9.729660756038738e-07, "loss": 0.22462692856788635, "step": 6520 }, { "epoch": 1.7316425441508432, "grad_norm": 1.191887437920794, "learning_rate": 9.710777602031985e-07, "loss": 0.2140806019306183, "step": 6521 }, { "epoch": 1.7319081131323861, "grad_norm": 1.1138928252794336, "learning_rate": 9.691911854722447e-07, "loss": 0.22256694734096527, "step": 6522 }, { "epoch": 1.732173682113929, "grad_norm": 1.3703383963226383, "learning_rate": 9.673063517747216e-07, "loss": 0.26044604182243347, "step": 6523 }, { "epoch": 1.732439251095472, "grad_norm": 1.2598416492801234, "learning_rate": 9.65423259474001e-07, "loss": 0.22553196549415588, "step": 6524 }, { "epoch": 1.732704820077015, "grad_norm": 1.351471142700479, "learning_rate": 9.635419089331255e-07, "loss": 0.2240113914012909, "step": 6525 }, { "epoch": 1.732970389058558, "grad_norm": 1.1814437793767476, "learning_rate": 9.616623005147952e-07, "loss": 0.2239987701177597, "step": 6526 }, { "epoch": 1.7332359580401009, "grad_norm": 1.3385972692968178, "learning_rate": 9.597844345813746e-07, "loss": 0.2779507040977478, "step": 6527 }, { "epoch": 1.7335015270216438, "grad_norm": 1.24243402144453, "learning_rate": 9.57908311494896e-07, "loss": 0.20211297273635864, "step": 6528 }, { "epoch": 1.7337670960031868, "grad_norm": 1.3764658259437736, "learning_rate": 9.560339316170542e-07, "loss": 0.2552817165851593, "step": 6529 }, { "epoch": 1.7340326649847297, "grad_norm": 1.2797541334315956, "learning_rate": 9.54161295309206e-07, "loss": 0.248790442943573, "step": 6530 }, { "epoch": 1.7342982339662727, "grad_norm": 1.2952054804389268, "learning_rate": 9.522904029323754e-07, "loss": 0.22865381836891174, "step": 6531 }, { "epoch": 1.7345638029478156, "grad_norm": 1.2248102039230788, "learning_rate": 9.504212548472458e-07, "loss": 0.212583988904953, "step": 6532 }, { "epoch": 1.7348293719293586, "grad_norm": 1.3834113478738954, "learning_rate": 9.48553851414169e-07, "loss": 0.24632221460342407, "step": 6533 }, { "epoch": 1.7350949409109018, "grad_norm": 1.2843254083507383, "learning_rate": 9.466881929931582e-07, "loss": 0.2264299988746643, "step": 6534 }, { "epoch": 1.7353605098924447, "grad_norm": 1.1969400150248917, "learning_rate": 9.4482427994389e-07, "loss": 0.21560585498809814, "step": 6535 }, { "epoch": 1.7356260788739877, "grad_norm": 1.2133784097522973, "learning_rate": 9.429621126257038e-07, "loss": 0.24358224868774414, "step": 6536 }, { "epoch": 1.7358916478555306, "grad_norm": 1.2714225965713206, "learning_rate": 9.411016913976045e-07, "loss": 0.23307816684246063, "step": 6537 }, { "epoch": 1.7361572168370736, "grad_norm": 1.3040669928143356, "learning_rate": 9.392430166182597e-07, "loss": 0.28001490235328674, "step": 6538 }, { "epoch": 1.7364227858186165, "grad_norm": 1.271471324412232, "learning_rate": 9.373860886459996e-07, "loss": 0.22544093430042267, "step": 6539 }, { "epoch": 1.7366883548001595, "grad_norm": 1.196472605989987, "learning_rate": 9.355309078388186e-07, "loss": 0.2066478282213211, "step": 6540 }, { "epoch": 1.7369539237817024, "grad_norm": 1.3162468805281542, "learning_rate": 9.336774745543697e-07, "loss": 0.21185964345932007, "step": 6541 }, { "epoch": 1.7372194927632454, "grad_norm": 1.2806137892507987, "learning_rate": 9.318257891499793e-07, "loss": 0.2337890863418579, "step": 6542 }, { "epoch": 1.7374850617447883, "grad_norm": 1.3468215205180822, "learning_rate": 9.299758519826274e-07, "loss": 0.2430594563484192, "step": 6543 }, { "epoch": 1.7377506307263313, "grad_norm": 1.4072339591675835, "learning_rate": 9.281276634089609e-07, "loss": 0.24799269437789917, "step": 6544 }, { "epoch": 1.7380161997078742, "grad_norm": 1.3533264573117185, "learning_rate": 9.26281223785287e-07, "loss": 0.24756166338920593, "step": 6545 }, { "epoch": 1.7382817686894172, "grad_norm": 1.281195516970091, "learning_rate": 9.244365334675787e-07, "loss": 0.23465190827846527, "step": 6546 }, { "epoch": 1.7385473376709601, "grad_norm": 1.22953964144765, "learning_rate": 9.225935928114716e-07, "loss": 0.2039640098810196, "step": 6547 }, { "epoch": 1.738812906652503, "grad_norm": 1.3426382286400422, "learning_rate": 9.207524021722602e-07, "loss": 0.22304412722587585, "step": 6548 }, { "epoch": 1.739078475634046, "grad_norm": 1.2253196898929546, "learning_rate": 9.189129619049064e-07, "loss": 0.19985908269882202, "step": 6549 }, { "epoch": 1.739344044615589, "grad_norm": 1.3354963919439176, "learning_rate": 9.17075272364032e-07, "loss": 0.2335432469844818, "step": 6550 }, { "epoch": 1.739609613597132, "grad_norm": 1.6822196536181961, "learning_rate": 9.152393339039223e-07, "loss": 0.2313593327999115, "step": 6551 }, { "epoch": 1.739875182578675, "grad_norm": 1.310977344619443, "learning_rate": 9.134051468785243e-07, "loss": 0.2320600152015686, "step": 6552 }, { "epoch": 1.7401407515602179, "grad_norm": 1.0942022372096942, "learning_rate": 9.115727116414475e-07, "loss": 0.1870848387479782, "step": 6553 }, { "epoch": 1.7404063205417608, "grad_norm": 1.340037469005655, "learning_rate": 9.097420285459635e-07, "loss": 0.22922812402248383, "step": 6554 }, { "epoch": 1.7406718895233038, "grad_norm": 1.3705243227438364, "learning_rate": 9.079130979450068e-07, "loss": 0.2505050301551819, "step": 6555 }, { "epoch": 1.7409374585048467, "grad_norm": 1.3187608464438627, "learning_rate": 9.060859201911732e-07, "loss": 0.20445439219474792, "step": 6556 }, { "epoch": 1.7412030274863897, "grad_norm": 1.1489822386745985, "learning_rate": 9.042604956367218e-07, "loss": 0.22338441014289856, "step": 6557 }, { "epoch": 1.7414685964679326, "grad_norm": 1.2900464387857213, "learning_rate": 9.024368246335735e-07, "loss": 0.24923941493034363, "step": 6558 }, { "epoch": 1.7417341654494756, "grad_norm": 1.3383952744906746, "learning_rate": 9.006149075333071e-07, "loss": 0.22842931747436523, "step": 6559 }, { "epoch": 1.7419997344310185, "grad_norm": 1.391145524863548, "learning_rate": 8.987947446871703e-07, "loss": 0.22451579570770264, "step": 6560 }, { "epoch": 1.7422653034125615, "grad_norm": 1.3218089225892669, "learning_rate": 8.969763364460682e-07, "loss": 0.2521047592163086, "step": 6561 }, { "epoch": 1.7425308723941044, "grad_norm": 1.1675892500249985, "learning_rate": 8.951596831605691e-07, "loss": 0.25001099705696106, "step": 6562 }, { "epoch": 1.7427964413756474, "grad_norm": 1.175521207104519, "learning_rate": 8.933447851809007e-07, "loss": 0.19592508673667908, "step": 6563 }, { "epoch": 1.7430620103571903, "grad_norm": 1.399887131584603, "learning_rate": 8.915316428569554e-07, "loss": 0.2785179018974304, "step": 6564 }, { "epoch": 1.7433275793387333, "grad_norm": 1.1688351316361159, "learning_rate": 8.897202565382845e-07, "loss": 0.20700594782829285, "step": 6565 }, { "epoch": 1.7435931483202762, "grad_norm": 1.2225569857896341, "learning_rate": 8.879106265741044e-07, "loss": 0.253167062997818, "step": 6566 }, { "epoch": 1.7438587173018192, "grad_norm": 1.4278912909015264, "learning_rate": 8.861027533132859e-07, "loss": 0.27672937512397766, "step": 6567 }, { "epoch": 1.7441242862833621, "grad_norm": 1.3136368448280313, "learning_rate": 8.842966371043671e-07, "loss": 0.23050950467586517, "step": 6568 }, { "epoch": 1.744389855264905, "grad_norm": 1.2790658189865058, "learning_rate": 8.824922782955481e-07, "loss": 0.23529425263404846, "step": 6569 }, { "epoch": 1.744655424246448, "grad_norm": 1.2887213562899031, "learning_rate": 8.806896772346873e-07, "loss": 0.21803250908851624, "step": 6570 }, { "epoch": 1.744920993227991, "grad_norm": 1.3669961004756481, "learning_rate": 8.788888342693047e-07, "loss": 0.24237293004989624, "step": 6571 }, { "epoch": 1.745186562209534, "grad_norm": 1.1957319745445254, "learning_rate": 8.770897497465803e-07, "loss": 0.2008107602596283, "step": 6572 }, { "epoch": 1.745452131191077, "grad_norm": 1.2693790937709173, "learning_rate": 8.752924240133587e-07, "loss": 0.23106279969215393, "step": 6573 }, { "epoch": 1.7457177001726198, "grad_norm": 1.377716829660982, "learning_rate": 8.734968574161406e-07, "loss": 0.23726215958595276, "step": 6574 }, { "epoch": 1.7459832691541628, "grad_norm": 1.211024095215965, "learning_rate": 8.717030503010915e-07, "loss": 0.26349812746047974, "step": 6575 }, { "epoch": 1.7462488381357057, "grad_norm": 1.2871963140003055, "learning_rate": 8.699110030140367e-07, "loss": 0.23226451873779297, "step": 6576 }, { "epoch": 1.7465144071172487, "grad_norm": 1.3173524718115384, "learning_rate": 8.68120715900459e-07, "loss": 0.22188402712345123, "step": 6577 }, { "epoch": 1.7467799760987917, "grad_norm": 1.2367242455559135, "learning_rate": 8.663321893055087e-07, "loss": 0.21238234639167786, "step": 6578 }, { "epoch": 1.7470455450803346, "grad_norm": 1.3423960800972676, "learning_rate": 8.645454235739903e-07, "loss": 0.2700675427913666, "step": 6579 }, { "epoch": 1.7473111140618776, "grad_norm": 1.2737029023524005, "learning_rate": 8.627604190503714e-07, "loss": 0.24463894963264465, "step": 6580 }, { "epoch": 1.7475766830434205, "grad_norm": 1.2537801110870739, "learning_rate": 8.609771760787822e-07, "loss": 0.23429079353809357, "step": 6581 }, { "epoch": 1.7478422520249635, "grad_norm": 1.342775712878445, "learning_rate": 8.591956950030067e-07, "loss": 0.21767663955688477, "step": 6582 }, { "epoch": 1.7481078210065064, "grad_norm": 1.3390334282971272, "learning_rate": 8.574159761664957e-07, "loss": 0.2499813735485077, "step": 6583 }, { "epoch": 1.7483733899880494, "grad_norm": 1.471955255689367, "learning_rate": 8.556380199123582e-07, "loss": 0.28065958619117737, "step": 6584 }, { "epoch": 1.7486389589695923, "grad_norm": 1.3012440070718, "learning_rate": 8.538618265833621e-07, "loss": 0.2166985273361206, "step": 6585 }, { "epoch": 1.7489045279511353, "grad_norm": 1.2228700023368582, "learning_rate": 8.520873965219356e-07, "loss": 0.22835782170295715, "step": 6586 }, { "epoch": 1.7491700969326782, "grad_norm": 1.2209097376008975, "learning_rate": 8.503147300701709e-07, "loss": 0.23575961589813232, "step": 6587 }, { "epoch": 1.7494356659142212, "grad_norm": 1.1275514661567778, "learning_rate": 8.485438275698154e-07, "loss": 0.183369442820549, "step": 6588 }, { "epoch": 1.7497012348957641, "grad_norm": 1.519810508178025, "learning_rate": 8.467746893622786e-07, "loss": 0.2731352746486664, "step": 6589 }, { "epoch": 1.749966803877307, "grad_norm": 1.2913957246056922, "learning_rate": 8.450073157886296e-07, "loss": 0.20177578926086426, "step": 6590 }, { "epoch": 1.75023237285885, "grad_norm": 1.2742798574628598, "learning_rate": 8.432417071895982e-07, "loss": 0.21672385931015015, "step": 6591 }, { "epoch": 1.750497941840393, "grad_norm": 1.370933216008306, "learning_rate": 8.414778639055699e-07, "loss": 0.2503831386566162, "step": 6592 }, { "epoch": 1.750763510821936, "grad_norm": 1.2884133202144494, "learning_rate": 8.397157862765959e-07, "loss": 0.2427521049976349, "step": 6593 }, { "epoch": 1.7510290798034789, "grad_norm": 1.3424141731181953, "learning_rate": 8.379554746423824e-07, "loss": 0.23128533363342285, "step": 6594 }, { "epoch": 1.7512946487850218, "grad_norm": 1.2353999110478557, "learning_rate": 8.361969293422967e-07, "loss": 0.2470957189798355, "step": 6595 }, { "epoch": 1.7515602177665648, "grad_norm": 1.3335789710762707, "learning_rate": 8.344401507153665e-07, "loss": 0.29447510838508606, "step": 6596 }, { "epoch": 1.7518257867481077, "grad_norm": 1.197223419032368, "learning_rate": 8.326851391002777e-07, "loss": 0.21585828065872192, "step": 6597 }, { "epoch": 1.7520913557296507, "grad_norm": 1.2653558688292899, "learning_rate": 8.30931894835375e-07, "loss": 0.24081121385097504, "step": 6598 }, { "epoch": 1.7523569247111936, "grad_norm": 1.3408805119391818, "learning_rate": 8.291804182586638e-07, "loss": 0.23052063584327698, "step": 6599 }, { "epoch": 1.7526224936927366, "grad_norm": 1.2126901970374089, "learning_rate": 8.274307097078093e-07, "loss": 0.19008183479309082, "step": 6600 }, { "epoch": 1.7528880626742795, "grad_norm": 1.3285441470167585, "learning_rate": 8.25682769520132e-07, "loss": 0.2632960379123688, "step": 6601 }, { "epoch": 1.7531536316558225, "grad_norm": 1.4350439941988302, "learning_rate": 8.239365980326175e-07, "loss": 0.25958624482154846, "step": 6602 }, { "epoch": 1.7534192006373654, "grad_norm": 1.304275360361708, "learning_rate": 8.221921955819035e-07, "loss": 0.22370605170726776, "step": 6603 }, { "epoch": 1.7536847696189084, "grad_norm": 1.2385957043075924, "learning_rate": 8.204495625042919e-07, "loss": 0.22018703818321228, "step": 6604 }, { "epoch": 1.7539503386004514, "grad_norm": 1.3626754196729718, "learning_rate": 8.187086991357418e-07, "loss": 0.26802191138267517, "step": 6605 }, { "epoch": 1.7542159075819943, "grad_norm": 1.5313825040978437, "learning_rate": 8.169696058118725e-07, "loss": 0.21560518443584442, "step": 6606 }, { "epoch": 1.7544814765635373, "grad_norm": 1.270508998157205, "learning_rate": 8.152322828679593e-07, "loss": 0.23222430050373077, "step": 6607 }, { "epoch": 1.7547470455450802, "grad_norm": 1.1542994886817455, "learning_rate": 8.134967306389374e-07, "loss": 0.17638427019119263, "step": 6608 }, { "epoch": 1.7550126145266232, "grad_norm": 1.3257823658984844, "learning_rate": 8.117629494594015e-07, "loss": 0.21539513766765594, "step": 6609 }, { "epoch": 1.7552781835081661, "grad_norm": 1.3431199934216977, "learning_rate": 8.100309396636031e-07, "loss": 0.2265736162662506, "step": 6610 }, { "epoch": 1.755543752489709, "grad_norm": 1.3478032961337874, "learning_rate": 8.083007015854549e-07, "loss": 0.2688787281513214, "step": 6611 }, { "epoch": 1.755809321471252, "grad_norm": 1.3027271078273857, "learning_rate": 8.065722355585249e-07, "loss": 0.19756367802619934, "step": 6612 }, { "epoch": 1.756074890452795, "grad_norm": 1.3749986253881121, "learning_rate": 8.048455419160405e-07, "loss": 0.19934290647506714, "step": 6613 }, { "epoch": 1.756340459434338, "grad_norm": 1.5756000064179743, "learning_rate": 8.031206209908904e-07, "loss": 0.2523588538169861, "step": 6614 }, { "epoch": 1.7566060284158809, "grad_norm": 1.2988900493114706, "learning_rate": 8.01397473115616e-07, "loss": 0.22825747728347778, "step": 6615 }, { "epoch": 1.7568715973974238, "grad_norm": 1.3238944187902402, "learning_rate": 7.996760986224228e-07, "loss": 0.24525251984596252, "step": 6616 }, { "epoch": 1.7571371663789668, "grad_norm": 1.366323962207031, "learning_rate": 7.979564978431687e-07, "loss": 0.21883559226989746, "step": 6617 }, { "epoch": 1.7574027353605097, "grad_norm": 1.5827948860142422, "learning_rate": 7.96238671109374e-07, "loss": 0.2642098069190979, "step": 6618 }, { "epoch": 1.757668304342053, "grad_norm": 1.3345016667633411, "learning_rate": 7.945226187522159e-07, "loss": 0.24094998836517334, "step": 6619 }, { "epoch": 1.7579338733235959, "grad_norm": 1.2243450261876818, "learning_rate": 7.928083411025278e-07, "loss": 0.2225762903690338, "step": 6620 }, { "epoch": 1.7581994423051388, "grad_norm": 1.2991544127435968, "learning_rate": 7.910958384908041e-07, "loss": 0.26722851395606995, "step": 6621 }, { "epoch": 1.7584650112866818, "grad_norm": 1.3206157533666447, "learning_rate": 7.893851112471907e-07, "loss": 0.2176910787820816, "step": 6622 }, { "epoch": 1.7587305802682247, "grad_norm": 1.3618122023344794, "learning_rate": 7.876761597015003e-07, "loss": 0.20261354744434357, "step": 6623 }, { "epoch": 1.7589961492497677, "grad_norm": 1.1728416456458601, "learning_rate": 7.859689841831975e-07, "loss": 0.23314467072486877, "step": 6624 }, { "epoch": 1.7592617182313106, "grad_norm": 1.3115277523344588, "learning_rate": 7.842635850214054e-07, "loss": 0.19854989647865295, "step": 6625 }, { "epoch": 1.7595272872128536, "grad_norm": 1.2614486006783794, "learning_rate": 7.825599625449043e-07, "loss": 0.2422565519809723, "step": 6626 }, { "epoch": 1.7597928561943965, "grad_norm": 1.342773057026848, "learning_rate": 7.808581170821328e-07, "loss": 0.27029529213905334, "step": 6627 }, { "epoch": 1.7600584251759395, "grad_norm": 1.1918292148332001, "learning_rate": 7.791580489611872e-07, "loss": 0.23596832156181335, "step": 6628 }, { "epoch": 1.7603239941574824, "grad_norm": 1.2062344481848934, "learning_rate": 7.774597585098198e-07, "loss": 0.218271404504776, "step": 6629 }, { "epoch": 1.7605895631390254, "grad_norm": 1.3762692469809215, "learning_rate": 7.75763246055441e-07, "loss": 0.2551255226135254, "step": 6630 }, { "epoch": 1.7608551321205683, "grad_norm": 1.3049962391533094, "learning_rate": 7.740685119251179e-07, "loss": 0.24410653114318848, "step": 6631 }, { "epoch": 1.7611207011021113, "grad_norm": 1.2577276419448338, "learning_rate": 7.723755564455771e-07, "loss": 0.23044872283935547, "step": 6632 }, { "epoch": 1.7613862700836542, "grad_norm": 1.334208934461724, "learning_rate": 7.706843799431985e-07, "loss": 0.24569427967071533, "step": 6633 }, { "epoch": 1.7616518390651972, "grad_norm": 1.1605227177029394, "learning_rate": 7.689949827440224e-07, "loss": 0.200277179479599, "step": 6634 }, { "epoch": 1.7619174080467401, "grad_norm": 1.1742759165978003, "learning_rate": 7.673073651737428e-07, "loss": 0.19217821955680847, "step": 6635 }, { "epoch": 1.762182977028283, "grad_norm": 1.281151649074766, "learning_rate": 7.656215275577151e-07, "loss": 0.227005273103714, "step": 6636 }, { "epoch": 1.762448546009826, "grad_norm": 1.2211778988331632, "learning_rate": 7.639374702209468e-07, "loss": 0.21359863877296448, "step": 6637 }, { "epoch": 1.762714114991369, "grad_norm": 1.267969218396632, "learning_rate": 7.62255193488105e-07, "loss": 0.24056711792945862, "step": 6638 }, { "epoch": 1.762979683972912, "grad_norm": 1.28035138481303, "learning_rate": 7.605746976835127e-07, "loss": 0.20897413790225983, "step": 6639 }, { "epoch": 1.763245252954455, "grad_norm": 1.2567764889990254, "learning_rate": 7.588959831311493e-07, "loss": 0.20395967364311218, "step": 6640 }, { "epoch": 1.7635108219359978, "grad_norm": 1.4827108993688454, "learning_rate": 7.572190501546517e-07, "loss": 0.2334095984697342, "step": 6641 }, { "epoch": 1.7637763909175408, "grad_norm": 1.3358734576215814, "learning_rate": 7.555438990773134e-07, "loss": 0.23892858624458313, "step": 6642 }, { "epoch": 1.7640419598990837, "grad_norm": 1.3063666339869877, "learning_rate": 7.538705302220839e-07, "loss": 0.23515449464321136, "step": 6643 }, { "epoch": 1.7643075288806267, "grad_norm": 1.1919354046726482, "learning_rate": 7.521989439115674e-07, "loss": 0.19728611409664154, "step": 6644 }, { "epoch": 1.7645730978621696, "grad_norm": 1.2609989060636697, "learning_rate": 7.505291404680281e-07, "loss": 0.22277355194091797, "step": 6645 }, { "epoch": 1.7648386668437126, "grad_norm": 1.2129119488866849, "learning_rate": 7.488611202133822e-07, "loss": 0.24117602407932281, "step": 6646 }, { "epoch": 1.7651042358252558, "grad_norm": 1.3643314179100876, "learning_rate": 7.471948834692045e-07, "loss": 0.24675750732421875, "step": 6647 }, { "epoch": 1.7653698048067987, "grad_norm": 1.3261352525807495, "learning_rate": 7.455304305567279e-07, "loss": 0.2413899004459381, "step": 6648 }, { "epoch": 1.7656353737883417, "grad_norm": 1.3357210816225529, "learning_rate": 7.438677617968348e-07, "loss": 0.22125428915023804, "step": 6649 }, { "epoch": 1.7659009427698846, "grad_norm": 1.2099689083776513, "learning_rate": 7.422068775100732e-07, "loss": 0.205051988363266, "step": 6650 }, { "epoch": 1.7661665117514276, "grad_norm": 1.2734255069971199, "learning_rate": 7.405477780166415e-07, "loss": 0.23711715638637543, "step": 6651 }, { "epoch": 1.7664320807329705, "grad_norm": 1.4063590395204508, "learning_rate": 7.388904636363914e-07, "loss": 0.2591046988964081, "step": 6652 }, { "epoch": 1.7666976497145135, "grad_norm": 1.4323150626725398, "learning_rate": 7.372349346888363e-07, "loss": 0.24837243556976318, "step": 6653 }, { "epoch": 1.7669632186960564, "grad_norm": 1.1492996795155954, "learning_rate": 7.35581191493141e-07, "loss": 0.20910412073135376, "step": 6654 }, { "epoch": 1.7672287876775994, "grad_norm": 1.113119722429438, "learning_rate": 7.339292343681282e-07, "loss": 0.2056204229593277, "step": 6655 }, { "epoch": 1.7674943566591423, "grad_norm": 1.2927092177897141, "learning_rate": 7.322790636322764e-07, "loss": 0.2496742308139801, "step": 6656 }, { "epoch": 1.7677599256406853, "grad_norm": 1.3571185149739835, "learning_rate": 7.306306796037188e-07, "loss": 0.24432921409606934, "step": 6657 }, { "epoch": 1.7680254946222282, "grad_norm": 1.3006085174415165, "learning_rate": 7.289840826002414e-07, "loss": 0.2492775321006775, "step": 6658 }, { "epoch": 1.7682910636037712, "grad_norm": 1.3256617876861967, "learning_rate": 7.273392729392936e-07, "loss": 0.22673827409744263, "step": 6659 }, { "epoch": 1.7685566325853141, "grad_norm": 1.3730978211523115, "learning_rate": 7.25696250937975e-07, "loss": 0.2225622981786728, "step": 6660 }, { "epoch": 1.768822201566857, "grad_norm": 1.2296766172450786, "learning_rate": 7.240550169130378e-07, "loss": 0.24896883964538574, "step": 6661 }, { "epoch": 1.7690877705484, "grad_norm": 1.2103035123370711, "learning_rate": 7.224155711808923e-07, "loss": 0.2395302951335907, "step": 6662 }, { "epoch": 1.769353339529943, "grad_norm": 1.2658162555194572, "learning_rate": 7.207779140576066e-07, "loss": 0.2255886197090149, "step": 6663 }, { "epoch": 1.769618908511486, "grad_norm": 1.2518907529925698, "learning_rate": 7.191420458589005e-07, "loss": 0.24029678106307983, "step": 6664 }, { "epoch": 1.769884477493029, "grad_norm": 1.1016484922093457, "learning_rate": 7.175079669001506e-07, "loss": 0.19399142265319824, "step": 6665 }, { "epoch": 1.7701500464745719, "grad_norm": 1.2291425924678119, "learning_rate": 7.158756774963882e-07, "loss": 0.24569162726402283, "step": 6666 }, { "epoch": 1.7704156154561148, "grad_norm": 1.2180012837263907, "learning_rate": 7.142451779622971e-07, "loss": 0.2484329342842102, "step": 6667 }, { "epoch": 1.7706811844376578, "grad_norm": 1.2505833357389051, "learning_rate": 7.126164686122216e-07, "loss": 0.24423512816429138, "step": 6668 }, { "epoch": 1.7709467534192007, "grad_norm": 1.1277554918017485, "learning_rate": 7.109895497601571e-07, "loss": 0.20146678388118744, "step": 6669 }, { "epoch": 1.7712123224007437, "grad_norm": 1.2945002187740315, "learning_rate": 7.093644217197526e-07, "loss": 0.23329001665115356, "step": 6670 }, { "epoch": 1.7714778913822866, "grad_norm": 1.1689758736288713, "learning_rate": 7.077410848043165e-07, "loss": 0.2290019690990448, "step": 6671 }, { "epoch": 1.7717434603638296, "grad_norm": 1.2744441159542537, "learning_rate": 7.061195393268061e-07, "loss": 0.2329377382993698, "step": 6672 }, { "epoch": 1.7720090293453725, "grad_norm": 1.1430677052322078, "learning_rate": 7.04499785599837e-07, "loss": 0.21513575315475464, "step": 6673 }, { "epoch": 1.7722745983269155, "grad_norm": 1.1659646021132744, "learning_rate": 7.028818239356794e-07, "loss": 0.19022463262081146, "step": 6674 }, { "epoch": 1.7725401673084584, "grad_norm": 1.2837523861206293, "learning_rate": 7.012656546462571e-07, "loss": 0.2097887396812439, "step": 6675 }, { "epoch": 1.7728057362900014, "grad_norm": 1.3991640357566577, "learning_rate": 6.996512780431486e-07, "loss": 0.2559792101383209, "step": 6676 }, { "epoch": 1.7730713052715443, "grad_norm": 1.3219531410357084, "learning_rate": 6.980386944375849e-07, "loss": 0.24624274671077728, "step": 6677 }, { "epoch": 1.7733368742530873, "grad_norm": 1.2405076465604956, "learning_rate": 6.964279041404553e-07, "loss": 0.22904372215270996, "step": 6678 }, { "epoch": 1.7736024432346302, "grad_norm": 1.216707646052236, "learning_rate": 6.948189074623002e-07, "loss": 0.20808623731136322, "step": 6679 }, { "epoch": 1.7738680122161732, "grad_norm": 1.229477200185015, "learning_rate": 6.932117047133158e-07, "loss": 0.1931435763835907, "step": 6680 }, { "epoch": 1.7741335811977161, "grad_norm": 1.2962984681963328, "learning_rate": 6.91606296203351e-07, "loss": 0.22938531637191772, "step": 6681 }, { "epoch": 1.774399150179259, "grad_norm": 1.2921857742770726, "learning_rate": 6.900026822419103e-07, "loss": 0.240365132689476, "step": 6682 }, { "epoch": 1.774664719160802, "grad_norm": 1.3560359754116593, "learning_rate": 6.8840086313815e-07, "loss": 0.26665499806404114, "step": 6683 }, { "epoch": 1.774930288142345, "grad_norm": 1.1827095382370005, "learning_rate": 6.86800839200884e-07, "loss": 0.19775834679603577, "step": 6684 }, { "epoch": 1.775195857123888, "grad_norm": 1.2698613362606737, "learning_rate": 6.852026107385756e-07, "loss": 0.20334021747112274, "step": 6685 }, { "epoch": 1.775461426105431, "grad_norm": 1.1845529296493982, "learning_rate": 6.836061780593484e-07, "loss": 0.20670340955257416, "step": 6686 }, { "epoch": 1.7757269950869738, "grad_norm": 1.2940248868651125, "learning_rate": 6.820115414709727e-07, "loss": 0.2033209353685379, "step": 6687 }, { "epoch": 1.7759925640685168, "grad_norm": 1.101442360403221, "learning_rate": 6.804187012808761e-07, "loss": 0.23827815055847168, "step": 6688 }, { "epoch": 1.7762581330500598, "grad_norm": 1.200357834005043, "learning_rate": 6.788276577961394e-07, "loss": 0.2054731547832489, "step": 6689 }, { "epoch": 1.7765237020316027, "grad_norm": 1.3006753644657554, "learning_rate": 6.772384113234987e-07, "loss": 0.25553691387176514, "step": 6690 }, { "epoch": 1.7767892710131457, "grad_norm": 1.2800516387465457, "learning_rate": 6.756509621693385e-07, "loss": 0.23650874197483063, "step": 6691 }, { "epoch": 1.7770548399946886, "grad_norm": 1.2987358367196533, "learning_rate": 6.740653106397033e-07, "loss": 0.2353624701499939, "step": 6692 }, { "epoch": 1.7773204089762316, "grad_norm": 1.3578478166739052, "learning_rate": 6.724814570402871e-07, "loss": 0.26034629344940186, "step": 6693 }, { "epoch": 1.7775859779577745, "grad_norm": 1.2070636800070726, "learning_rate": 6.70899401676438e-07, "loss": 0.2272130399942398, "step": 6694 }, { "epoch": 1.7778515469393175, "grad_norm": 1.353295285146214, "learning_rate": 6.693191448531589e-07, "loss": 0.27940404415130615, "step": 6695 }, { "epoch": 1.7781171159208604, "grad_norm": 1.2726244327901954, "learning_rate": 6.677406868751013e-07, "loss": 0.22997702658176422, "step": 6696 }, { "epoch": 1.7783826849024034, "grad_norm": 1.2569026906720413, "learning_rate": 6.661640280465775e-07, "loss": 0.22918452322483063, "step": 6697 }, { "epoch": 1.7786482538839463, "grad_norm": 1.2456580683228033, "learning_rate": 6.645891686715456e-07, "loss": 0.18456090986728668, "step": 6698 }, { "epoch": 1.7789138228654893, "grad_norm": 1.3290472252808803, "learning_rate": 6.630161090536214e-07, "loss": 0.23256534337997437, "step": 6699 }, { "epoch": 1.7791793918470322, "grad_norm": 1.2224316750050632, "learning_rate": 6.614448494960713e-07, "loss": 0.21171879768371582, "step": 6700 }, { "epoch": 1.7794449608285752, "grad_norm": 1.201224789246079, "learning_rate": 6.598753903018163e-07, "loss": 0.21382400393486023, "step": 6701 }, { "epoch": 1.7797105298101181, "grad_norm": 1.2240177347792593, "learning_rate": 6.583077317734299e-07, "loss": 0.22954748570919037, "step": 6702 }, { "epoch": 1.779976098791661, "grad_norm": 1.519530195710278, "learning_rate": 6.56741874213136e-07, "loss": 0.25691086053848267, "step": 6703 }, { "epoch": 1.780241667773204, "grad_norm": 1.4662002194098382, "learning_rate": 6.551778179228174e-07, "loss": 0.23413901031017303, "step": 6704 }, { "epoch": 1.780507236754747, "grad_norm": 1.2775019242293946, "learning_rate": 6.536155632040031e-07, "loss": 0.2493733912706375, "step": 6705 }, { "epoch": 1.78077280573629, "grad_norm": 1.2512747936457356, "learning_rate": 6.520551103578776e-07, "loss": 0.26094138622283936, "step": 6706 }, { "epoch": 1.7810383747178329, "grad_norm": 1.3016608765448805, "learning_rate": 6.504964596852781e-07, "loss": 0.23509518802165985, "step": 6707 }, { "epoch": 1.7813039436993758, "grad_norm": 1.4726929969063267, "learning_rate": 6.489396114866942e-07, "loss": 0.2471122294664383, "step": 6708 }, { "epoch": 1.7815695126809188, "grad_norm": 1.3034668854019054, "learning_rate": 6.47384566062268e-07, "loss": 0.2363303005695343, "step": 6709 }, { "epoch": 1.7818350816624617, "grad_norm": 1.1801501968168786, "learning_rate": 6.458313237117953e-07, "loss": 0.18868233263492584, "step": 6710 }, { "epoch": 1.7821006506440047, "grad_norm": 1.3437880175802723, "learning_rate": 6.442798847347187e-07, "loss": 0.23380546271800995, "step": 6711 }, { "epoch": 1.7823662196255476, "grad_norm": 1.471740030592424, "learning_rate": 6.42730249430139e-07, "loss": 0.24112167954444885, "step": 6712 }, { "epoch": 1.7826317886070906, "grad_norm": 1.2664184946697812, "learning_rate": 6.411824180968096e-07, "loss": 0.2397521436214447, "step": 6713 }, { "epoch": 1.7828973575886335, "grad_norm": 1.309174308390434, "learning_rate": 6.396363910331338e-07, "loss": 0.23775406181812286, "step": 6714 }, { "epoch": 1.7831629265701765, "grad_norm": 1.4327166340451307, "learning_rate": 6.380921685371655e-07, "loss": 0.23278602957725525, "step": 6715 }, { "epoch": 1.7834284955517195, "grad_norm": 1.1135605228940266, "learning_rate": 6.365497509066143e-07, "loss": 0.20028996467590332, "step": 6716 }, { "epoch": 1.7836940645332624, "grad_norm": 1.146963533940078, "learning_rate": 6.35009138438839e-07, "loss": 0.20862875878810883, "step": 6717 }, { "epoch": 1.7839596335148054, "grad_norm": 1.3257848293601993, "learning_rate": 6.334703314308521e-07, "loss": 0.23522542417049408, "step": 6718 }, { "epoch": 1.7842252024963483, "grad_norm": 1.2172150430538355, "learning_rate": 6.319333301793173e-07, "loss": 0.24633824825286865, "step": 6719 }, { "epoch": 1.7844907714778913, "grad_norm": 1.3131451310460658, "learning_rate": 6.30398134980551e-07, "loss": 0.22141410410404205, "step": 6720 }, { "epoch": 1.7847563404594342, "grad_norm": 1.3593079444355614, "learning_rate": 6.288647461305186e-07, "loss": 0.23313754796981812, "step": 6721 }, { "epoch": 1.7850219094409772, "grad_norm": 1.2751593889081192, "learning_rate": 6.273331639248414e-07, "loss": 0.22015389800071716, "step": 6722 }, { "epoch": 1.7852874784225201, "grad_norm": 1.2716859790694561, "learning_rate": 6.258033886587911e-07, "loss": 0.21154522895812988, "step": 6723 }, { "epoch": 1.785553047404063, "grad_norm": 1.3319130935282857, "learning_rate": 6.242754206272883e-07, "loss": 0.2320503294467926, "step": 6724 }, { "epoch": 1.785818616385606, "grad_norm": 1.2016740259413836, "learning_rate": 6.227492601249097e-07, "loss": 0.21778921782970428, "step": 6725 }, { "epoch": 1.786084185367149, "grad_norm": 1.2321504813505204, "learning_rate": 6.212249074458776e-07, "loss": 0.2368871569633484, "step": 6726 }, { "epoch": 1.786349754348692, "grad_norm": 1.5195368545073897, "learning_rate": 6.197023628840704e-07, "loss": 0.27269479632377625, "step": 6727 }, { "epoch": 1.7866153233302349, "grad_norm": 1.2744130185555103, "learning_rate": 6.181816267330177e-07, "loss": 0.2414151132106781, "step": 6728 }, { "epoch": 1.7868808923117778, "grad_norm": 1.1197825562175172, "learning_rate": 6.166626992858993e-07, "loss": 0.2156972736120224, "step": 6729 }, { "epoch": 1.7871464612933208, "grad_norm": 1.2748992996552195, "learning_rate": 6.151455808355455e-07, "loss": 0.2510441541671753, "step": 6730 }, { "epoch": 1.787412030274864, "grad_norm": 1.2924509412618195, "learning_rate": 6.136302716744402e-07, "loss": 0.20290088653564453, "step": 6731 }, { "epoch": 1.787677599256407, "grad_norm": 1.3705736121123597, "learning_rate": 6.121167720947174e-07, "loss": 0.25088101625442505, "step": 6732 }, { "epoch": 1.7879431682379499, "grad_norm": 1.3723338572382136, "learning_rate": 6.106050823881604e-07, "loss": 0.2566376328468323, "step": 6733 }, { "epoch": 1.7882087372194928, "grad_norm": 1.1043772478174716, "learning_rate": 6.09095202846206e-07, "loss": 0.1882714033126831, "step": 6734 }, { "epoch": 1.7884743062010358, "grad_norm": 1.2323780172305254, "learning_rate": 6.075871337599404e-07, "loss": 0.18705856800079346, "step": 6735 }, { "epoch": 1.7887398751825787, "grad_norm": 1.1976910574931858, "learning_rate": 6.060808754201031e-07, "loss": 0.24756133556365967, "step": 6736 }, { "epoch": 1.7890054441641217, "grad_norm": 1.3197777974144425, "learning_rate": 6.045764281170818e-07, "loss": 0.2537599205970764, "step": 6737 }, { "epoch": 1.7892710131456646, "grad_norm": 1.330362234255321, "learning_rate": 6.030737921409169e-07, "loss": 0.22049202024936676, "step": 6738 }, { "epoch": 1.7895365821272076, "grad_norm": 1.1222347914068396, "learning_rate": 6.015729677812965e-07, "loss": 0.20820394158363342, "step": 6739 }, { "epoch": 1.7898021511087505, "grad_norm": 1.3153590716408405, "learning_rate": 6.00073955327567e-07, "loss": 0.2339879721403122, "step": 6740 }, { "epoch": 1.7900677200902935, "grad_norm": 1.2483259153993207, "learning_rate": 5.98576755068715e-07, "loss": 0.22082161903381348, "step": 6741 }, { "epoch": 1.7903332890718364, "grad_norm": 1.28162605766883, "learning_rate": 5.97081367293385e-07, "loss": 0.21883058547973633, "step": 6742 }, { "epoch": 1.7905988580533794, "grad_norm": 1.1591166092235485, "learning_rate": 5.955877922898712e-07, "loss": 0.214680016040802, "step": 6743 }, { "epoch": 1.7908644270349223, "grad_norm": 1.37628370977899, "learning_rate": 5.940960303461152e-07, "loss": 0.24533744156360626, "step": 6744 }, { "epoch": 1.7911299960164653, "grad_norm": 1.3046535737377691, "learning_rate": 5.926060817497137e-07, "loss": 0.19857585430145264, "step": 6745 }, { "epoch": 1.7913955649980082, "grad_norm": 1.4468975368000232, "learning_rate": 5.911179467879081e-07, "loss": 0.27493876218795776, "step": 6746 }, { "epoch": 1.7916611339795512, "grad_norm": 1.1490145590407708, "learning_rate": 5.896316257475954e-07, "loss": 0.20560544729232788, "step": 6747 }, { "epoch": 1.7919267029610941, "grad_norm": 1.2213631424870741, "learning_rate": 5.881471189153199e-07, "loss": 0.23559418320655823, "step": 6748 }, { "epoch": 1.792192271942637, "grad_norm": 1.3144055462601232, "learning_rate": 5.866644265772769e-07, "loss": 0.23055103421211243, "step": 6749 }, { "epoch": 1.79245784092418, "grad_norm": 1.4747052812755685, "learning_rate": 5.851835490193136e-07, "loss": 0.2780724763870239, "step": 6750 }, { "epoch": 1.792723409905723, "grad_norm": 1.2354333862915858, "learning_rate": 5.837044865269248e-07, "loss": 0.20216618478298187, "step": 6751 }, { "epoch": 1.792988978887266, "grad_norm": 1.308066661539038, "learning_rate": 5.822272393852557e-07, "loss": 0.2289930284023285, "step": 6752 }, { "epoch": 1.793254547868809, "grad_norm": 1.2952454297764495, "learning_rate": 5.80751807879103e-07, "loss": 0.2028929740190506, "step": 6753 }, { "epoch": 1.7935201168503518, "grad_norm": 1.2960791997009702, "learning_rate": 5.792781922929114e-07, "loss": 0.1964842826128006, "step": 6754 }, { "epoch": 1.7937856858318948, "grad_norm": 1.4512315838061285, "learning_rate": 5.77806392910778e-07, "loss": 0.2617039084434509, "step": 6755 }, { "epoch": 1.7940512548134377, "grad_norm": 1.325466585449178, "learning_rate": 5.76336410016447e-07, "loss": 0.2582395374774933, "step": 6756 }, { "epoch": 1.7943168237949807, "grad_norm": 1.2587701407069858, "learning_rate": 5.74868243893314e-07, "loss": 0.23379334807395935, "step": 6757 }, { "epoch": 1.7945823927765236, "grad_norm": 1.2979435124807637, "learning_rate": 5.734018948244247e-07, "loss": 0.2376977801322937, "step": 6758 }, { "epoch": 1.7948479617580668, "grad_norm": 1.414785341098569, "learning_rate": 5.719373630924741e-07, "loss": 0.21816037595272064, "step": 6759 }, { "epoch": 1.7951135307396098, "grad_norm": 1.1404163081963787, "learning_rate": 5.704746489798063e-07, "loss": 0.22156387567520142, "step": 6760 }, { "epoch": 1.7953790997211527, "grad_norm": 1.195358056085369, "learning_rate": 5.690137527684147e-07, "loss": 0.20818129181861877, "step": 6761 }, { "epoch": 1.7956446687026957, "grad_norm": 1.1501993150491747, "learning_rate": 5.67554674739944e-07, "loss": 0.18672943115234375, "step": 6762 }, { "epoch": 1.7959102376842386, "grad_norm": 1.2143392515173568, "learning_rate": 5.66097415175686e-07, "loss": 0.2023036777973175, "step": 6763 }, { "epoch": 1.7961758066657816, "grad_norm": 1.3551091626165586, "learning_rate": 5.646419743565845e-07, "loss": 0.24798424541950226, "step": 6764 }, { "epoch": 1.7964413756473245, "grad_norm": 1.2034553304236573, "learning_rate": 5.631883525632297e-07, "loss": 0.1885790377855301, "step": 6765 }, { "epoch": 1.7967069446288675, "grad_norm": 1.3693229184747842, "learning_rate": 5.617365500758631e-07, "loss": 0.24120381474494934, "step": 6766 }, { "epoch": 1.7969725136104104, "grad_norm": 1.2063823939207, "learning_rate": 5.602865671743763e-07, "loss": 0.24238690733909607, "step": 6767 }, { "epoch": 1.7972380825919534, "grad_norm": 1.2611645650605894, "learning_rate": 5.588384041383089e-07, "loss": 0.22928190231323242, "step": 6768 }, { "epoch": 1.7975036515734963, "grad_norm": 1.3148280979127052, "learning_rate": 5.573920612468486e-07, "loss": 0.2464730143547058, "step": 6769 }, { "epoch": 1.7977692205550393, "grad_norm": 1.149985298163883, "learning_rate": 5.559475387788348e-07, "loss": 0.2167670875787735, "step": 6770 }, { "epoch": 1.7980347895365822, "grad_norm": 1.3365719233561757, "learning_rate": 5.545048370127526e-07, "loss": 0.24080663919448853, "step": 6771 }, { "epoch": 1.7983003585181252, "grad_norm": 1.3571891328346308, "learning_rate": 5.530639562267382e-07, "loss": 0.25481417775154114, "step": 6772 }, { "epoch": 1.7985659274996681, "grad_norm": 1.3525822075957274, "learning_rate": 5.51624896698576e-07, "loss": 0.23328909277915955, "step": 6773 }, { "epoch": 1.798831496481211, "grad_norm": 1.136424514008492, "learning_rate": 5.50187658705702e-07, "loss": 0.18779747188091278, "step": 6774 }, { "epoch": 1.799097065462754, "grad_norm": 1.3089016035676113, "learning_rate": 5.487522425251968e-07, "loss": 0.24840545654296875, "step": 6775 }, { "epoch": 1.799362634444297, "grad_norm": 1.4658187281761286, "learning_rate": 5.473186484337911e-07, "loss": 0.2559642791748047, "step": 6776 }, { "epoch": 1.79962820342584, "grad_norm": 1.3714243263968933, "learning_rate": 5.458868767078673e-07, "loss": 0.2005981206893921, "step": 6777 }, { "epoch": 1.799893772407383, "grad_norm": 1.4085177100377464, "learning_rate": 5.444569276234523e-07, "loss": 0.2480883002281189, "step": 6778 }, { "epoch": 1.8001593413889259, "grad_norm": 1.2203856732153913, "learning_rate": 5.430288014562235e-07, "loss": 0.23043295741081238, "step": 6779 }, { "epoch": 1.8004249103704688, "grad_norm": 1.4245462518797845, "learning_rate": 5.416024984815072e-07, "loss": 0.22702521085739136, "step": 6780 }, { "epoch": 1.8006904793520118, "grad_norm": 1.153610007644359, "learning_rate": 5.401780189742789e-07, "loss": 0.19955751299858093, "step": 6781 }, { "epoch": 1.8009560483335547, "grad_norm": 1.2560139759300732, "learning_rate": 5.387553632091591e-07, "loss": 0.19743162393569946, "step": 6782 }, { "epoch": 1.8012216173150977, "grad_norm": 1.3072968250539403, "learning_rate": 5.373345314604206e-07, "loss": 0.2262525111436844, "step": 6783 }, { "epoch": 1.8014871862966406, "grad_norm": 1.2987858405959638, "learning_rate": 5.359155240019809e-07, "loss": 0.249632328748703, "step": 6784 }, { "epoch": 1.8017527552781836, "grad_norm": 1.1804135507002813, "learning_rate": 5.344983411074111e-07, "loss": 0.19300231337547302, "step": 6785 }, { "epoch": 1.8020183242597265, "grad_norm": 1.293291337799575, "learning_rate": 5.330829830499263e-07, "loss": 0.22256134450435638, "step": 6786 }, { "epoch": 1.8022838932412695, "grad_norm": 1.283065855572867, "learning_rate": 5.316694501023911e-07, "loss": 0.2666356563568115, "step": 6787 }, { "epoch": 1.8025494622228124, "grad_norm": 1.239663996945653, "learning_rate": 5.302577425373156e-07, "loss": 0.223050057888031, "step": 6788 }, { "epoch": 1.8028150312043554, "grad_norm": 1.3011452698852823, "learning_rate": 5.288478606268632e-07, "loss": 0.2298094481229782, "step": 6789 }, { "epoch": 1.8030806001858983, "grad_norm": 1.4761708863150307, "learning_rate": 5.27439804642843e-07, "loss": 0.23596417903900146, "step": 6790 }, { "epoch": 1.8033461691674413, "grad_norm": 1.226229776793909, "learning_rate": 5.26033574856708e-07, "loss": 0.19501623511314392, "step": 6791 }, { "epoch": 1.8036117381489842, "grad_norm": 1.2825838070785722, "learning_rate": 5.246291715395657e-07, "loss": 0.23518472909927368, "step": 6792 }, { "epoch": 1.8038773071305272, "grad_norm": 1.1820374841237484, "learning_rate": 5.232265949621651e-07, "loss": 0.2251899093389511, "step": 6793 }, { "epoch": 1.8041428761120701, "grad_norm": 1.1527654541489951, "learning_rate": 5.218258453949099e-07, "loss": 0.1764119267463684, "step": 6794 }, { "epoch": 1.804408445093613, "grad_norm": 1.2895741356204065, "learning_rate": 5.204269231078484e-07, "loss": 0.20768773555755615, "step": 6795 }, { "epoch": 1.804674014075156, "grad_norm": 1.3841780370828203, "learning_rate": 5.19029828370674e-07, "loss": 0.2115546613931656, "step": 6796 }, { "epoch": 1.804939583056699, "grad_norm": 1.315680847185169, "learning_rate": 5.176345614527312e-07, "loss": 0.2465972602367401, "step": 6797 }, { "epoch": 1.805205152038242, "grad_norm": 1.379203464130328, "learning_rate": 5.162411226230102e-07, "loss": 0.2359803020954132, "step": 6798 }, { "epoch": 1.805470721019785, "grad_norm": 1.4106819634653143, "learning_rate": 5.148495121501506e-07, "loss": 0.27518990635871887, "step": 6799 }, { "epoch": 1.8057362900013278, "grad_norm": 1.3653410113402416, "learning_rate": 5.134597303024391e-07, "loss": 0.23914849758148193, "step": 6800 }, { "epoch": 1.8060018589828708, "grad_norm": 1.256847668479307, "learning_rate": 5.120717773478068e-07, "loss": 0.21771098673343658, "step": 6801 }, { "epoch": 1.8062674279644138, "grad_norm": 1.2716100664289411, "learning_rate": 5.106856535538363e-07, "loss": 0.235421285033226, "step": 6802 }, { "epoch": 1.8065329969459567, "grad_norm": 1.4167241401735549, "learning_rate": 5.093013591877561e-07, "loss": 0.23973548412322998, "step": 6803 }, { "epoch": 1.8067985659274997, "grad_norm": 1.484886222602596, "learning_rate": 5.079188945164426e-07, "loss": 0.24059349298477173, "step": 6804 }, { "epoch": 1.8070641349090426, "grad_norm": 1.3840991454067133, "learning_rate": 5.065382598064161e-07, "loss": 0.25188207626342773, "step": 6805 }, { "epoch": 1.8073297038905856, "grad_norm": 1.1866308474402574, "learning_rate": 5.051594553238482e-07, "loss": 0.20124536752700806, "step": 6806 }, { "epoch": 1.8075952728721285, "grad_norm": 1.2234769875088154, "learning_rate": 5.037824813345571e-07, "loss": 0.2059330940246582, "step": 6807 }, { "epoch": 1.8078608418536715, "grad_norm": 1.2468279665046458, "learning_rate": 5.024073381040052e-07, "loss": 0.2122621238231659, "step": 6808 }, { "epoch": 1.8081264108352144, "grad_norm": 1.2203093249465347, "learning_rate": 5.010340258973046e-07, "loss": 0.20064303278923035, "step": 6809 }, { "epoch": 1.8083919798167574, "grad_norm": 1.3685187895509534, "learning_rate": 4.996625449792147e-07, "loss": 0.24773281812667847, "step": 6810 }, { "epoch": 1.8086575487983003, "grad_norm": 1.149837064877599, "learning_rate": 4.982928956141375e-07, "loss": 0.2111661732196808, "step": 6811 }, { "epoch": 1.8089231177798433, "grad_norm": 1.2721912706796665, "learning_rate": 4.969250780661306e-07, "loss": 0.24823394417762756, "step": 6812 }, { "epoch": 1.8091886867613862, "grad_norm": 1.410632443971984, "learning_rate": 4.955590925988896e-07, "loss": 0.24726605415344238, "step": 6813 }, { "epoch": 1.8094542557429292, "grad_norm": 1.3112520269484638, "learning_rate": 4.941949394757605e-07, "loss": 0.2269962728023529, "step": 6814 }, { "epoch": 1.8097198247244721, "grad_norm": 1.311172380903373, "learning_rate": 4.928326189597377e-07, "loss": 0.2336469292640686, "step": 6815 }, { "epoch": 1.809985393706015, "grad_norm": 1.3372206959113173, "learning_rate": 4.914721313134585e-07, "loss": 0.24872124195098877, "step": 6816 }, { "epoch": 1.810250962687558, "grad_norm": 1.3116570930981006, "learning_rate": 4.901134767992099e-07, "loss": 0.2484157383441925, "step": 6817 }, { "epoch": 1.810516531669101, "grad_norm": 1.5234901533359522, "learning_rate": 4.887566556789247e-07, "loss": 0.24683158099651337, "step": 6818 }, { "epoch": 1.810782100650644, "grad_norm": 1.1959899225802055, "learning_rate": 4.874016682141802e-07, "loss": 0.18717995285987854, "step": 6819 }, { "epoch": 1.8110476696321869, "grad_norm": 1.2862771000886628, "learning_rate": 4.860485146662053e-07, "loss": 0.2220807671546936, "step": 6820 }, { "epoch": 1.8113132386137298, "grad_norm": 1.196369102162481, "learning_rate": 4.84697195295869e-07, "loss": 0.2178400307893753, "step": 6821 }, { "epoch": 1.8115788075952728, "grad_norm": 1.2250082051849178, "learning_rate": 4.833477103636908e-07, "loss": 0.2056645154953003, "step": 6822 }, { "epoch": 1.8118443765768157, "grad_norm": 1.1729075702986809, "learning_rate": 4.820000601298358e-07, "loss": 0.21441905200481415, "step": 6823 }, { "epoch": 1.8121099455583587, "grad_norm": 1.4445497728186703, "learning_rate": 4.806542448541151e-07, "loss": 0.17688237130641937, "step": 6824 }, { "epoch": 1.8123755145399016, "grad_norm": 1.3216659704658935, "learning_rate": 4.793102647959847e-07, "loss": 0.22405505180358887, "step": 6825 }, { "epoch": 1.8126410835214446, "grad_norm": 1.4226735460298432, "learning_rate": 4.779681202145503e-07, "loss": 0.21617908775806427, "step": 6826 }, { "epoch": 1.8129066525029875, "grad_norm": 1.3284639992790963, "learning_rate": 4.766278113685596e-07, "loss": 0.23570871353149414, "step": 6827 }, { "epoch": 1.8131722214845305, "grad_norm": 1.222373726415007, "learning_rate": 4.7528933851641036e-07, "loss": 0.23806743323802948, "step": 6828 }, { "epoch": 1.8134377904660735, "grad_norm": 1.3312930220149763, "learning_rate": 4.739527019161405e-07, "loss": 0.24859179556369781, "step": 6829 }, { "epoch": 1.8137033594476164, "grad_norm": 1.2143252342774762, "learning_rate": 4.726179018254418e-07, "loss": 0.21314260363578796, "step": 6830 }, { "epoch": 1.8139689284291594, "grad_norm": 1.272910058647325, "learning_rate": 4.7128493850164715e-07, "loss": 0.25290659070014954, "step": 6831 }, { "epoch": 1.8142344974107023, "grad_norm": 1.1800117497978073, "learning_rate": 4.699538122017355e-07, "loss": 0.22606703639030457, "step": 6832 }, { "epoch": 1.8145000663922453, "grad_norm": 1.3037958158309495, "learning_rate": 4.6862452318233275e-07, "loss": 0.23973071575164795, "step": 6833 }, { "epoch": 1.8147656353737882, "grad_norm": 1.2341358358957555, "learning_rate": 4.672970716997094e-07, "loss": 0.2225341498851776, "step": 6834 }, { "epoch": 1.8150312043553312, "grad_norm": 1.441833447404081, "learning_rate": 4.6597145800978183e-07, "loss": 0.19153356552124023, "step": 6835 }, { "epoch": 1.8152967733368741, "grad_norm": 1.2010339801105188, "learning_rate": 4.646476823681145e-07, "loss": 0.19694843888282776, "step": 6836 }, { "epoch": 1.815562342318417, "grad_norm": 1.2719437537675773, "learning_rate": 4.6332574502991554e-07, "loss": 0.2353869527578354, "step": 6837 }, { "epoch": 1.81582791129996, "grad_norm": 1.3504470280928214, "learning_rate": 4.6200564625003775e-07, "loss": 0.20919787883758545, "step": 6838 }, { "epoch": 1.816093480281503, "grad_norm": 1.1775336742921327, "learning_rate": 4.6068738628298193e-07, "loss": 0.18352919816970825, "step": 6839 }, { "epoch": 1.816359049263046, "grad_norm": 1.3571378213568392, "learning_rate": 4.5937096538289147e-07, "loss": 0.24711212515830994, "step": 6840 }, { "epoch": 1.8166246182445889, "grad_norm": 1.2216287617055834, "learning_rate": 4.580563838035579e-07, "loss": 0.2350531816482544, "step": 6841 }, { "epoch": 1.8168901872261318, "grad_norm": 1.3731447849726235, "learning_rate": 4.5674364179841614e-07, "loss": 0.26124465465545654, "step": 6842 }, { "epoch": 1.8171557562076748, "grad_norm": 1.3819435677197398, "learning_rate": 4.5543273962054934e-07, "loss": 0.2110440880060196, "step": 6843 }, { "epoch": 1.817421325189218, "grad_norm": 1.425540844923539, "learning_rate": 4.5412367752268094e-07, "loss": 0.2409415990114212, "step": 6844 }, { "epoch": 1.817686894170761, "grad_norm": 1.2827549712815094, "learning_rate": 4.528164557571857e-07, "loss": 0.2280777543783188, "step": 6845 }, { "epoch": 1.8179524631523039, "grad_norm": 1.111661347066374, "learning_rate": 4.515110745760787e-07, "loss": 0.201339989900589, "step": 6846 }, { "epoch": 1.8182180321338468, "grad_norm": 1.2576623337538495, "learning_rate": 4.5020753423102083e-07, "loss": 0.22910752892494202, "step": 6847 }, { "epoch": 1.8184836011153898, "grad_norm": 1.2835742527474332, "learning_rate": 4.4890583497332327e-07, "loss": 0.21736779808998108, "step": 6848 }, { "epoch": 1.8187491700969327, "grad_norm": 1.282796826855034, "learning_rate": 4.476059770539354e-07, "loss": 0.20898449420928955, "step": 6849 }, { "epoch": 1.8190147390784757, "grad_norm": 1.2514312774528749, "learning_rate": 4.463079607234555e-07, "loss": 0.22159051895141602, "step": 6850 }, { "epoch": 1.8192803080600186, "grad_norm": 1.290667660986327, "learning_rate": 4.450117862321246e-07, "loss": 0.24081172049045563, "step": 6851 }, { "epoch": 1.8195458770415616, "grad_norm": 1.2092663587603776, "learning_rate": 4.4371745382983164e-07, "loss": 0.17856758832931519, "step": 6852 }, { "epoch": 1.8198114460231045, "grad_norm": 1.2002967167521004, "learning_rate": 4.424249637661071e-07, "loss": 0.20796868205070496, "step": 6853 }, { "epoch": 1.8200770150046475, "grad_norm": 1.5683273026632796, "learning_rate": 4.4113431629013046e-07, "loss": 0.24277149140834808, "step": 6854 }, { "epoch": 1.8203425839861904, "grad_norm": 1.1767967505464594, "learning_rate": 4.3984551165071944e-07, "loss": 0.19315838813781738, "step": 6855 }, { "epoch": 1.8206081529677334, "grad_norm": 1.2457379727303777, "learning_rate": 4.3855855009634075e-07, "loss": 0.20789340138435364, "step": 6856 }, { "epoch": 1.8208737219492763, "grad_norm": 1.4246348317049922, "learning_rate": 4.372734318751082e-07, "loss": 0.2871186137199402, "step": 6857 }, { "epoch": 1.8211392909308193, "grad_norm": 1.3878283876849893, "learning_rate": 4.359901572347758e-07, "loss": 0.2419736236333847, "step": 6858 }, { "epoch": 1.8214048599123622, "grad_norm": 1.3237602075469659, "learning_rate": 4.3470872642274455e-07, "loss": 0.2190292328596115, "step": 6859 }, { "epoch": 1.8216704288939052, "grad_norm": 1.3879953178475168, "learning_rate": 4.3342913968605903e-07, "loss": 0.2654367685317993, "step": 6860 }, { "epoch": 1.8219359978754481, "grad_norm": 1.3362249609314758, "learning_rate": 4.321513972714075e-07, "loss": 0.2536984086036682, "step": 6861 }, { "epoch": 1.822201566856991, "grad_norm": 1.3804156416489965, "learning_rate": 4.308754994251252e-07, "loss": 0.260431170463562, "step": 6862 }, { "epoch": 1.822467135838534, "grad_norm": 1.1376782237723586, "learning_rate": 4.2960144639318855e-07, "loss": 0.19348303973674774, "step": 6863 }, { "epoch": 1.822732704820077, "grad_norm": 1.3505211109720399, "learning_rate": 4.283292384212201e-07, "loss": 0.2284386157989502, "step": 6864 }, { "epoch": 1.82299827380162, "grad_norm": 1.2449697035186624, "learning_rate": 4.270588757544869e-07, "loss": 0.23439526557922363, "step": 6865 }, { "epoch": 1.823263842783163, "grad_norm": 1.247098399621602, "learning_rate": 4.2579035863790086e-07, "loss": 0.2123441994190216, "step": 6866 }, { "epoch": 1.8235294117647058, "grad_norm": 1.251423525262008, "learning_rate": 4.245236873160163e-07, "loss": 0.24568180739879608, "step": 6867 }, { "epoch": 1.8237949807462488, "grad_norm": 1.4504253184377665, "learning_rate": 4.232588620330325e-07, "loss": 0.24078285694122314, "step": 6868 }, { "epoch": 1.8240605497277917, "grad_norm": 1.157509101798501, "learning_rate": 4.2199588303279414e-07, "loss": 0.2003621608018875, "step": 6869 }, { "epoch": 1.8243261187093347, "grad_norm": 1.3049050095763572, "learning_rate": 4.2073475055878664e-07, "loss": 0.21201889216899872, "step": 6870 }, { "epoch": 1.8245916876908777, "grad_norm": 1.429124542908126, "learning_rate": 4.1947546485414215e-07, "loss": 0.23175427317619324, "step": 6871 }, { "epoch": 1.8248572566724208, "grad_norm": 1.3101487536079581, "learning_rate": 4.182180261616364e-07, "loss": 0.2391383945941925, "step": 6872 }, { "epoch": 1.8251228256539638, "grad_norm": 1.341869026992186, "learning_rate": 4.169624347236878e-07, "loss": 0.23120146989822388, "step": 6873 }, { "epoch": 1.8253883946355067, "grad_norm": 1.1699948636498165, "learning_rate": 4.157086907823604e-07, "loss": 0.22541432082653046, "step": 6874 }, { "epoch": 1.8256539636170497, "grad_norm": 1.3354293669412138, "learning_rate": 4.1445679457936094e-07, "loss": 0.25613510608673096, "step": 6875 }, { "epoch": 1.8259195325985926, "grad_norm": 1.191861909098097, "learning_rate": 4.1320674635604186e-07, "loss": 0.21002547442913055, "step": 6876 }, { "epoch": 1.8261851015801356, "grad_norm": 1.230870532242656, "learning_rate": 4.119585463533959e-07, "loss": 0.2593066692352295, "step": 6877 }, { "epoch": 1.8264506705616785, "grad_norm": 1.4772106156087776, "learning_rate": 4.1071219481206184e-07, "loss": 0.23771531879901886, "step": 6878 }, { "epoch": 1.8267162395432215, "grad_norm": 1.3106459571340912, "learning_rate": 4.094676919723206e-07, "loss": 0.2069541960954666, "step": 6879 }, { "epoch": 1.8269818085247644, "grad_norm": 1.2065450512433227, "learning_rate": 4.082250380740993e-07, "loss": 0.21314311027526855, "step": 6880 }, { "epoch": 1.8272473775063074, "grad_norm": 1.2723957233809677, "learning_rate": 4.069842333569662e-07, "loss": 0.198696106672287, "step": 6881 }, { "epoch": 1.8275129464878503, "grad_norm": 1.2365636263350124, "learning_rate": 4.057452780601334e-07, "loss": 0.22771228849887848, "step": 6882 }, { "epoch": 1.8277785154693933, "grad_norm": 1.3935711018120034, "learning_rate": 4.045081724224564e-07, "loss": 0.24176150560379028, "step": 6883 }, { "epoch": 1.8280440844509362, "grad_norm": 1.1711714123320747, "learning_rate": 4.0327291668243785e-07, "loss": 0.18257084488868713, "step": 6884 }, { "epoch": 1.8283096534324792, "grad_norm": 1.7740145369201021, "learning_rate": 4.02039511078216e-07, "loss": 0.2317531704902649, "step": 6885 }, { "epoch": 1.8285752224140222, "grad_norm": 1.237685133468282, "learning_rate": 4.008079558475797e-07, "loss": 0.22523516416549683, "step": 6886 }, { "epoch": 1.828840791395565, "grad_norm": 1.338469580607285, "learning_rate": 3.995782512279578e-07, "loss": 0.22351330518722534, "step": 6887 }, { "epoch": 1.829106360377108, "grad_norm": 1.3272231861758204, "learning_rate": 3.983503974564229e-07, "loss": 0.22151902318000793, "step": 6888 }, { "epoch": 1.829371929358651, "grad_norm": 1.2483501881623744, "learning_rate": 3.971243947696901e-07, "loss": 0.20800583064556122, "step": 6889 }, { "epoch": 1.829637498340194, "grad_norm": 1.189419989304772, "learning_rate": 3.959002434041181e-07, "loss": 0.21332690119743347, "step": 6890 }, { "epoch": 1.829903067321737, "grad_norm": 1.3040750377284556, "learning_rate": 3.946779435957093e-07, "loss": 0.2561502456665039, "step": 6891 }, { "epoch": 1.8301686363032799, "grad_norm": 1.2150229659643972, "learning_rate": 3.934574955801074e-07, "loss": 0.23636910319328308, "step": 6892 }, { "epoch": 1.8304342052848228, "grad_norm": 1.303931878967275, "learning_rate": 3.922388995926041e-07, "loss": 0.26683998107910156, "step": 6893 }, { "epoch": 1.8306997742663658, "grad_norm": 1.319570373744726, "learning_rate": 3.910221558681271e-07, "loss": 0.2779492735862732, "step": 6894 }, { "epoch": 1.8309653432479087, "grad_norm": 1.473106593059021, "learning_rate": 3.8980726464125095e-07, "loss": 0.20174488425254822, "step": 6895 }, { "epoch": 1.8312309122294517, "grad_norm": 1.3128034885814306, "learning_rate": 3.885942261461928e-07, "loss": 0.21486055850982666, "step": 6896 }, { "epoch": 1.8314964812109946, "grad_norm": 1.2201269476427121, "learning_rate": 3.8738304061681107e-07, "loss": 0.25637733936309814, "step": 6897 }, { "epoch": 1.8317620501925376, "grad_norm": 1.3661274524986262, "learning_rate": 3.8617370828661014e-07, "loss": 0.2518364489078522, "step": 6898 }, { "epoch": 1.8320276191740805, "grad_norm": 1.2902396654446358, "learning_rate": 3.849662293887324e-07, "loss": 0.25752246379852295, "step": 6899 }, { "epoch": 1.8322931881556235, "grad_norm": 1.1514833439027936, "learning_rate": 3.8376060415596826e-07, "loss": 0.20891718566417694, "step": 6900 }, { "epoch": 1.8325587571371664, "grad_norm": 1.378720679176223, "learning_rate": 3.825568328207452e-07, "loss": 0.20491960644721985, "step": 6901 }, { "epoch": 1.8328243261187094, "grad_norm": 1.2540067790590503, "learning_rate": 3.813549156151386e-07, "loss": 0.22183339297771454, "step": 6902 }, { "epoch": 1.8330898951002523, "grad_norm": 1.3321077338345055, "learning_rate": 3.801548527708621e-07, "loss": 0.2476987987756729, "step": 6903 }, { "epoch": 1.8333554640817953, "grad_norm": 1.470629998110282, "learning_rate": 3.7895664451927493e-07, "loss": 0.26486238837242126, "step": 6904 }, { "epoch": 1.8336210330633382, "grad_norm": 1.2524745099106778, "learning_rate": 3.777602910913769e-07, "loss": 0.25922873616218567, "step": 6905 }, { "epoch": 1.8338866020448812, "grad_norm": 1.317563058388092, "learning_rate": 3.7656579271781127e-07, "loss": 0.22682476043701172, "step": 6906 }, { "epoch": 1.8341521710264241, "grad_norm": 1.2391277284536568, "learning_rate": 3.753731496288626e-07, "loss": 0.20371592044830322, "step": 6907 }, { "epoch": 1.834417740007967, "grad_norm": 1.2444383452097851, "learning_rate": 3.7418236205445826e-07, "loss": 0.23857446014881134, "step": 6908 }, { "epoch": 1.83468330898951, "grad_norm": 2.6487436557467645, "learning_rate": 3.729934302241689e-07, "loss": 0.27119290828704834, "step": 6909 }, { "epoch": 1.834948877971053, "grad_norm": 1.254159773595776, "learning_rate": 3.7180635436720567e-07, "loss": 0.2354927361011505, "step": 6910 }, { "epoch": 1.835214446952596, "grad_norm": 1.301136184663389, "learning_rate": 3.706211347124233e-07, "loss": 0.26378512382507324, "step": 6911 }, { "epoch": 1.835480015934139, "grad_norm": 1.3296098934003593, "learning_rate": 3.6943777148831907e-07, "loss": 0.20725026726722717, "step": 6912 }, { "epoch": 1.8357455849156818, "grad_norm": 1.2212362377090786, "learning_rate": 3.682562649230304e-07, "loss": 0.2049856185913086, "step": 6913 }, { "epoch": 1.8360111538972248, "grad_norm": 1.2555620791922353, "learning_rate": 3.6707661524433833e-07, "loss": 0.19303423166275024, "step": 6914 }, { "epoch": 1.8362767228787678, "grad_norm": 1.2395332139010746, "learning_rate": 3.6589882267966445e-07, "loss": 0.21510104835033417, "step": 6915 }, { "epoch": 1.8365422918603107, "grad_norm": 1.1669418633603965, "learning_rate": 3.6472288745607376e-07, "loss": 0.1933138072490692, "step": 6916 }, { "epoch": 1.8368078608418537, "grad_norm": 1.112367559966563, "learning_rate": 3.6354880980027373e-07, "loss": 0.2015206664800644, "step": 6917 }, { "epoch": 1.8370734298233966, "grad_norm": 1.2823070307410491, "learning_rate": 3.6237658993861114e-07, "loss": 0.20550866425037384, "step": 6918 }, { "epoch": 1.8373389988049396, "grad_norm": 1.3067689335737758, "learning_rate": 3.612062280970763e-07, "loss": 0.221620112657547, "step": 6919 }, { "epoch": 1.8376045677864825, "grad_norm": 1.3556317520839982, "learning_rate": 3.6003772450130315e-07, "loss": 0.23098941147327423, "step": 6920 }, { "epoch": 1.8378701367680255, "grad_norm": 1.147765516964157, "learning_rate": 3.588710793765626e-07, "loss": 0.2119837999343872, "step": 6921 }, { "epoch": 1.8381357057495684, "grad_norm": 1.3802709807389941, "learning_rate": 3.5770629294777146e-07, "loss": 0.24879229068756104, "step": 6922 }, { "epoch": 1.8384012747311114, "grad_norm": 1.3060365647669372, "learning_rate": 3.565433654394879e-07, "loss": 0.18895789980888367, "step": 6923 }, { "epoch": 1.8386668437126543, "grad_norm": 1.2553378569117732, "learning_rate": 3.55382297075908e-07, "loss": 0.23148275911808014, "step": 6924 }, { "epoch": 1.8389324126941973, "grad_norm": 1.212120061404488, "learning_rate": 3.542230880808739e-07, "loss": 0.20919913053512573, "step": 6925 }, { "epoch": 1.8391979816757402, "grad_norm": 1.4703495422250146, "learning_rate": 3.53065738677868e-07, "loss": 0.22832845151424408, "step": 6926 }, { "epoch": 1.8394635506572832, "grad_norm": 1.2792392305491092, "learning_rate": 3.519102490900117e-07, "loss": 0.25866004824638367, "step": 6927 }, { "epoch": 1.8397291196388261, "grad_norm": 1.4425441758777668, "learning_rate": 3.507566195400691e-07, "loss": 0.23372048139572144, "step": 6928 }, { "epoch": 1.839994688620369, "grad_norm": 1.3100572186568338, "learning_rate": 3.496048502504501e-07, "loss": 0.2516997158527374, "step": 6929 }, { "epoch": 1.840260257601912, "grad_norm": 1.3352189279547024, "learning_rate": 3.4845494144320036e-07, "loss": 0.21170508861541748, "step": 6930 }, { "epoch": 1.840525826583455, "grad_norm": 1.3970465930645521, "learning_rate": 3.473068933400081e-07, "loss": 0.2642953395843506, "step": 6931 }, { "epoch": 1.840791395564998, "grad_norm": 1.2429277065520816, "learning_rate": 3.461607061622041e-07, "loss": 0.2294994294643402, "step": 6932 }, { "epoch": 1.8410569645465409, "grad_norm": 1.3898674163561502, "learning_rate": 3.450163801307582e-07, "loss": 0.2554621696472168, "step": 6933 }, { "epoch": 1.8413225335280838, "grad_norm": 1.5251200097904765, "learning_rate": 3.4387391546628733e-07, "loss": 0.2291295826435089, "step": 6934 }, { "epoch": 1.8415881025096268, "grad_norm": 1.2253918775229307, "learning_rate": 3.4273331238903974e-07, "loss": 0.1996842920780182, "step": 6935 }, { "epoch": 1.8418536714911697, "grad_norm": 1.3974356568527164, "learning_rate": 3.415945711189128e-07, "loss": 0.248038187623024, "step": 6936 }, { "epoch": 1.8421192404727127, "grad_norm": 1.4224083213114915, "learning_rate": 3.4045769187544096e-07, "loss": 0.232235848903656, "step": 6937 }, { "epoch": 1.8423848094542556, "grad_norm": 1.2811247103872994, "learning_rate": 3.3932267487780333e-07, "loss": 0.2526085376739502, "step": 6938 }, { "epoch": 1.8426503784357986, "grad_norm": 1.324059920588895, "learning_rate": 3.381895203448182e-07, "loss": 0.22401389479637146, "step": 6939 }, { "epoch": 1.8429159474173415, "grad_norm": 1.2904044842651823, "learning_rate": 3.3705822849494195e-07, "loss": 0.2509264647960663, "step": 6940 }, { "epoch": 1.8431815163988845, "grad_norm": 1.2502849304352568, "learning_rate": 3.3592879954627564e-07, "loss": 0.2451169192790985, "step": 6941 }, { "epoch": 1.8434470853804275, "grad_norm": 1.2774613485778883, "learning_rate": 3.3480123371655957e-07, "loss": 0.2361738532781601, "step": 6942 }, { "epoch": 1.8437126543619704, "grad_norm": 1.1823675774441849, "learning_rate": 3.3367553122317544e-07, "loss": 0.22336295247077942, "step": 6943 }, { "epoch": 1.8439782233435134, "grad_norm": 1.4218109729535482, "learning_rate": 3.325516922831451e-07, "loss": 0.22287659347057343, "step": 6944 }, { "epoch": 1.8442437923250563, "grad_norm": 1.2819242467045069, "learning_rate": 3.3142971711312975e-07, "loss": 0.21845945715904236, "step": 6945 }, { "epoch": 1.8445093613065993, "grad_norm": 1.2822597279006254, "learning_rate": 3.303096059294364e-07, "loss": 0.2650350332260132, "step": 6946 }, { "epoch": 1.8447749302881422, "grad_norm": 1.346661503925149, "learning_rate": 3.291913589480078e-07, "loss": 0.21282124519348145, "step": 6947 }, { "epoch": 1.8450404992696852, "grad_norm": 1.1254422779054267, "learning_rate": 3.280749763844293e-07, "loss": 0.17899346351623535, "step": 6948 }, { "epoch": 1.8453060682512281, "grad_norm": 1.3295675928838626, "learning_rate": 3.269604584539254e-07, "loss": 0.23462103307247162, "step": 6949 }, { "epoch": 1.845571637232771, "grad_norm": 1.2573990354862534, "learning_rate": 3.2584780537136206e-07, "loss": 0.20188388228416443, "step": 6950 }, { "epoch": 1.845837206214314, "grad_norm": 1.3823133322277716, "learning_rate": 3.247370173512443e-07, "loss": 0.2760109305381775, "step": 6951 }, { "epoch": 1.846102775195857, "grad_norm": 1.1542508493730164, "learning_rate": 3.236280946077219e-07, "loss": 0.20977352559566498, "step": 6952 }, { "epoch": 1.8463683441774, "grad_norm": 1.299549634983184, "learning_rate": 3.225210373545806e-07, "loss": 0.26468873023986816, "step": 6953 }, { "epoch": 1.8466339131589429, "grad_norm": 1.287524526318513, "learning_rate": 3.214158458052463e-07, "loss": 0.2362184375524521, "step": 6954 }, { "epoch": 1.8468994821404858, "grad_norm": 1.29131597308928, "learning_rate": 3.2031252017278966e-07, "loss": 0.21406327188014984, "step": 6955 }, { "epoch": 1.847165051122029, "grad_norm": 1.4794600314925854, "learning_rate": 3.1921106066991835e-07, "loss": 0.2698758840560913, "step": 6956 }, { "epoch": 1.847430620103572, "grad_norm": 1.3029413719135112, "learning_rate": 3.1811146750898025e-07, "loss": 0.22954389452934265, "step": 6957 }, { "epoch": 1.847696189085115, "grad_norm": 1.149631756175727, "learning_rate": 3.170137409019636e-07, "loss": 0.23005755245685577, "step": 6958 }, { "epoch": 1.8479617580666579, "grad_norm": 1.270561680049171, "learning_rate": 3.159178810604968e-07, "loss": 0.22408893704414368, "step": 6959 }, { "epoch": 1.8482273270482008, "grad_norm": 1.1761716687553918, "learning_rate": 3.14823888195851e-07, "loss": 0.1983698308467865, "step": 6960 }, { "epoch": 1.8484928960297438, "grad_norm": 1.387251984339494, "learning_rate": 3.137317625189329e-07, "loss": 0.24643054604530334, "step": 6961 }, { "epoch": 1.8487584650112867, "grad_norm": 1.3612119090250128, "learning_rate": 3.1264150424029083e-07, "loss": 0.274917870759964, "step": 6962 }, { "epoch": 1.8490240339928297, "grad_norm": 1.2836957141365997, "learning_rate": 3.115531135701155e-07, "loss": 0.2129468023777008, "step": 6963 }, { "epoch": 1.8492896029743726, "grad_norm": 1.3421884287788837, "learning_rate": 3.1046659071823695e-07, "loss": 0.24127928912639618, "step": 6964 }, { "epoch": 1.8495551719559156, "grad_norm": 1.2737231627436634, "learning_rate": 3.093819358941208e-07, "loss": 0.2528054416179657, "step": 6965 }, { "epoch": 1.8498207409374585, "grad_norm": 1.253824703575336, "learning_rate": 3.0829914930687767e-07, "loss": 0.23623798787593842, "step": 6966 }, { "epoch": 1.8500863099190015, "grad_norm": 1.231408637511902, "learning_rate": 3.0721823116525497e-07, "loss": 0.20241659879684448, "step": 6967 }, { "epoch": 1.8503518789005444, "grad_norm": 1.264350645442844, "learning_rate": 3.0613918167764156e-07, "loss": 0.24365916848182678, "step": 6968 }, { "epoch": 1.8506174478820874, "grad_norm": 1.311846273217192, "learning_rate": 3.0506200105206554e-07, "loss": 0.2550637722015381, "step": 6969 }, { "epoch": 1.8508830168636303, "grad_norm": 1.1438212130974086, "learning_rate": 3.0398668949619515e-07, "loss": 0.21531938016414642, "step": 6970 }, { "epoch": 1.8511485858451733, "grad_norm": 1.3468646282560623, "learning_rate": 3.029132472173368e-07, "loss": 0.22749900817871094, "step": 6971 }, { "epoch": 1.8514141548267162, "grad_norm": 1.186404759445675, "learning_rate": 3.018416744224373e-07, "loss": 0.1826775223016739, "step": 6972 }, { "epoch": 1.8516797238082592, "grad_norm": 1.1782373460713542, "learning_rate": 3.0077197131808344e-07, "loss": 0.21982814371585846, "step": 6973 }, { "epoch": 1.8519452927898021, "grad_norm": 1.2874557997839566, "learning_rate": 2.997041381105026e-07, "loss": 0.23515473306179047, "step": 6974 }, { "epoch": 1.852210861771345, "grad_norm": 1.2184369208885015, "learning_rate": 2.9863817500556e-07, "loss": 0.19620616734027863, "step": 6975 }, { "epoch": 1.852476430752888, "grad_norm": 1.208715706835639, "learning_rate": 2.975740822087603e-07, "loss": 0.22158116102218628, "step": 6976 }, { "epoch": 1.852741999734431, "grad_norm": 1.5176127203291871, "learning_rate": 2.96511859925247e-07, "loss": 0.23082244396209717, "step": 6977 }, { "epoch": 1.853007568715974, "grad_norm": 1.286088700644728, "learning_rate": 2.954515083598064e-07, "loss": 0.22743141651153564, "step": 6978 }, { "epoch": 1.853273137697517, "grad_norm": 1.3437900472909596, "learning_rate": 2.943930277168594e-07, "loss": 0.2329188883304596, "step": 6979 }, { "epoch": 1.8535387066790598, "grad_norm": 1.1892741095151198, "learning_rate": 2.9333641820047055e-07, "loss": 0.20360302925109863, "step": 6980 }, { "epoch": 1.8538042756606028, "grad_norm": 1.1771915113483071, "learning_rate": 2.922816800143402e-07, "loss": 0.1903664767742157, "step": 6981 }, { "epoch": 1.8540698446421457, "grad_norm": 1.2252145672801615, "learning_rate": 2.912288133618102e-07, "loss": 0.2247854322195053, "step": 6982 }, { "epoch": 1.8543354136236887, "grad_norm": 1.305215823982529, "learning_rate": 2.9017781844586035e-07, "loss": 0.22693192958831787, "step": 6983 }, { "epoch": 1.8546009826052319, "grad_norm": 1.3213552294005186, "learning_rate": 2.891286954691108e-07, "loss": 0.23769894242286682, "step": 6984 }, { "epoch": 1.8548665515867748, "grad_norm": 1.267542763443237, "learning_rate": 2.880814446338198e-07, "loss": 0.23251450061798096, "step": 6985 }, { "epoch": 1.8551321205683178, "grad_norm": 1.3253334264213772, "learning_rate": 2.870360661418847e-07, "loss": 0.20828741788864136, "step": 6986 }, { "epoch": 1.8553976895498607, "grad_norm": 1.2448815733296377, "learning_rate": 2.859925601948421e-07, "loss": 0.2324519008398056, "step": 6987 }, { "epoch": 1.8556632585314037, "grad_norm": 1.2799176737952995, "learning_rate": 2.8495092699386774e-07, "loss": 0.2166297733783722, "step": 6988 }, { "epoch": 1.8559288275129466, "grad_norm": 1.416567928880924, "learning_rate": 2.839111667397765e-07, "loss": 0.2760158181190491, "step": 6989 }, { "epoch": 1.8561943964944896, "grad_norm": 1.1117414218952344, "learning_rate": 2.8287327963302025e-07, "loss": 0.2263752520084381, "step": 6990 }, { "epoch": 1.8564599654760325, "grad_norm": 1.328135206527719, "learning_rate": 2.8183726587369455e-07, "loss": 0.2490656077861786, "step": 6991 }, { "epoch": 1.8567255344575755, "grad_norm": 1.4860885268210424, "learning_rate": 2.808031256615285e-07, "loss": 0.22495508193969727, "step": 6992 }, { "epoch": 1.8569911034391184, "grad_norm": 1.297235121122649, "learning_rate": 2.7977085919589253e-07, "loss": 0.2671046853065491, "step": 6993 }, { "epoch": 1.8572566724206614, "grad_norm": 1.2050300397617886, "learning_rate": 2.7874046667579535e-07, "loss": 0.19782954454421997, "step": 6994 }, { "epoch": 1.8575222414022043, "grad_norm": 1.3009259795352104, "learning_rate": 2.777119482998847e-07, "loss": 0.24458879232406616, "step": 6995 }, { "epoch": 1.8577878103837473, "grad_norm": 1.203325902936209, "learning_rate": 2.7668530426644637e-07, "loss": 0.23476794362068176, "step": 6996 }, { "epoch": 1.8580533793652902, "grad_norm": 1.3828799415147273, "learning_rate": 2.7566053477340535e-07, "loss": 0.2318287342786789, "step": 6997 }, { "epoch": 1.8583189483468332, "grad_norm": 1.1075382213650395, "learning_rate": 2.746376400183259e-07, "loss": 0.21341973543167114, "step": 6998 }, { "epoch": 1.8585845173283762, "grad_norm": 1.3634634009375282, "learning_rate": 2.7361662019840916e-07, "loss": 0.25269803404808044, "step": 6999 }, { "epoch": 1.858850086309919, "grad_norm": 1.2242004376785176, "learning_rate": 2.7259747551049653e-07, "loss": 0.24590039253234863, "step": 7000 }, { "epoch": 1.859115655291462, "grad_norm": 1.2116643717780577, "learning_rate": 2.715802061510664e-07, "loss": 0.19907096028327942, "step": 7001 }, { "epoch": 1.859381224273005, "grad_norm": 1.319285786592131, "learning_rate": 2.705648123162363e-07, "loss": 0.24304917454719543, "step": 7002 }, { "epoch": 1.859646793254548, "grad_norm": 1.3884525546157216, "learning_rate": 2.6955129420176193e-07, "loss": 0.24846915900707245, "step": 7003 }, { "epoch": 1.859912362236091, "grad_norm": 1.365283429552511, "learning_rate": 2.685396520030381e-07, "loss": 0.21709200739860535, "step": 7004 }, { "epoch": 1.8601779312176339, "grad_norm": 1.3687506828870908, "learning_rate": 2.675298859150977e-07, "loss": 0.28031325340270996, "step": 7005 }, { "epoch": 1.8604435001991768, "grad_norm": 1.1527129171653896, "learning_rate": 2.6652199613261155e-07, "loss": 0.20367707312107086, "step": 7006 }, { "epoch": 1.8607090691807198, "grad_norm": 1.1875101722790007, "learning_rate": 2.6551598284988877e-07, "loss": 0.20737403631210327, "step": 7007 }, { "epoch": 1.8609746381622627, "grad_norm": 1.3375926225189751, "learning_rate": 2.6451184626087646e-07, "loss": 0.2504046559333801, "step": 7008 }, { "epoch": 1.8612402071438057, "grad_norm": 1.3403751507501938, "learning_rate": 2.635095865591608e-07, "loss": 0.26347339153289795, "step": 7009 }, { "epoch": 1.8615057761253486, "grad_norm": 1.1832867553985462, "learning_rate": 2.625092039379662e-07, "loss": 0.2347220480442047, "step": 7010 }, { "epoch": 1.8617713451068916, "grad_norm": 1.2487098903864389, "learning_rate": 2.6151069859015386e-07, "loss": 0.23565630614757538, "step": 7011 }, { "epoch": 1.8620369140884345, "grad_norm": 1.2377624004623402, "learning_rate": 2.605140707082243e-07, "loss": 0.21462437510490417, "step": 7012 }, { "epoch": 1.8623024830699775, "grad_norm": 1.2992774401284823, "learning_rate": 2.595193204843149e-07, "loss": 0.24224728345870972, "step": 7013 }, { "epoch": 1.8625680520515204, "grad_norm": 1.3531530893390702, "learning_rate": 2.5852644811020344e-07, "loss": 0.24200880527496338, "step": 7014 }, { "epoch": 1.8628336210330634, "grad_norm": 1.2331149203562455, "learning_rate": 2.5753545377730227e-07, "loss": 0.23315191268920898, "step": 7015 }, { "epoch": 1.8630991900146063, "grad_norm": 1.4360061023192454, "learning_rate": 2.56546337676663e-07, "loss": 0.31112274527549744, "step": 7016 }, { "epoch": 1.8633647589961493, "grad_norm": 1.1775380155652753, "learning_rate": 2.555590999989754e-07, "loss": 0.2291945070028305, "step": 7017 }, { "epoch": 1.8636303279776922, "grad_norm": 1.3248749602779475, "learning_rate": 2.5457374093457057e-07, "loss": 0.2324746549129486, "step": 7018 }, { "epoch": 1.8638958969592352, "grad_norm": 1.3333311590100283, "learning_rate": 2.5359026067341086e-07, "loss": 0.2585206627845764, "step": 7019 }, { "epoch": 1.8641614659407781, "grad_norm": 1.254813387894953, "learning_rate": 2.5260865940510027e-07, "loss": 0.22986871004104614, "step": 7020 }, { "epoch": 1.864427034922321, "grad_norm": 1.3302473304174876, "learning_rate": 2.5162893731888074e-07, "loss": 0.22615428268909454, "step": 7021 }, { "epoch": 1.864692603903864, "grad_norm": 1.2311139475810073, "learning_rate": 2.5065109460363113e-07, "loss": 0.21324753761291504, "step": 7022 }, { "epoch": 1.864958172885407, "grad_norm": 1.2499721276179248, "learning_rate": 2.4967513144786736e-07, "loss": 0.2247733324766159, "step": 7023 }, { "epoch": 1.86522374186695, "grad_norm": 1.198842298043478, "learning_rate": 2.4870104803974336e-07, "loss": 0.22080597281455994, "step": 7024 }, { "epoch": 1.865489310848493, "grad_norm": 1.3721040923851937, "learning_rate": 2.4772884456705224e-07, "loss": 0.23669888079166412, "step": 7025 }, { "epoch": 1.8657548798300359, "grad_norm": 1.2946969495879501, "learning_rate": 2.4675852121722075e-07, "loss": 0.2320847064256668, "step": 7026 }, { "epoch": 1.8660204488115788, "grad_norm": 1.374404266409337, "learning_rate": 2.4579007817731925e-07, "loss": 0.2595662474632263, "step": 7027 }, { "epoch": 1.8662860177931218, "grad_norm": 1.2351512812852723, "learning_rate": 2.4482351563405174e-07, "loss": 0.22152045369148254, "step": 7028 }, { "epoch": 1.8665515867746647, "grad_norm": 1.270416082371449, "learning_rate": 2.4385883377375683e-07, "loss": 0.2391948401927948, "step": 7029 }, { "epoch": 1.8668171557562077, "grad_norm": 1.3234796115140017, "learning_rate": 2.428960327824159e-07, "loss": 0.23117749392986298, "step": 7030 }, { "epoch": 1.8670827247377506, "grad_norm": 1.313106749776766, "learning_rate": 2.41935112845646e-07, "loss": 0.24019500613212585, "step": 7031 }, { "epoch": 1.8673482937192936, "grad_norm": 1.253088890729472, "learning_rate": 2.4097607414869995e-07, "loss": 0.19560202956199646, "step": 7032 }, { "epoch": 1.8676138627008365, "grad_norm": 1.3625686769003584, "learning_rate": 2.4001891687647103e-07, "loss": 0.23110055923461914, "step": 7033 }, { "epoch": 1.8678794316823795, "grad_norm": 1.3388200482229684, "learning_rate": 2.39063641213485e-07, "loss": 0.2214709371328354, "step": 7034 }, { "epoch": 1.8681450006639224, "grad_norm": 1.2700799842548796, "learning_rate": 2.381102473439101e-07, "loss": 0.22123369574546814, "step": 7035 }, { "epoch": 1.8684105696454654, "grad_norm": 1.4629863869289934, "learning_rate": 2.371587354515481e-07, "loss": 0.23984813690185547, "step": 7036 }, { "epoch": 1.8686761386270083, "grad_norm": 1.4496870886295976, "learning_rate": 2.3620910571984124e-07, "loss": 0.26089030504226685, "step": 7037 }, { "epoch": 1.8689417076085513, "grad_norm": 1.2076380290124689, "learning_rate": 2.3526135833186527e-07, "loss": 0.2344229370355606, "step": 7038 }, { "epoch": 1.8692072765900942, "grad_norm": 1.290620691312973, "learning_rate": 2.34315493470334e-07, "loss": 0.24499498307704926, "step": 7039 }, { "epoch": 1.8694728455716372, "grad_norm": 1.2975050166282813, "learning_rate": 2.333715113176005e-07, "loss": 0.21971477568149567, "step": 7040 }, { "epoch": 1.8697384145531801, "grad_norm": 1.2659856510175163, "learning_rate": 2.3242941205565362e-07, "loss": 0.2594453990459442, "step": 7041 }, { "epoch": 1.870003983534723, "grad_norm": 1.3125676617059407, "learning_rate": 2.3148919586611806e-07, "loss": 0.24689960479736328, "step": 7042 }, { "epoch": 1.870269552516266, "grad_norm": 1.2165345453138858, "learning_rate": 2.3055086293025665e-07, "loss": 0.19972509145736694, "step": 7043 }, { "epoch": 1.870535121497809, "grad_norm": 1.2460782677559714, "learning_rate": 2.2961441342896795e-07, "loss": 0.2139236032962799, "step": 7044 }, { "epoch": 1.870800690479352, "grad_norm": 1.196552292185578, "learning_rate": 2.286798475427898e-07, "loss": 0.2251984179019928, "step": 7045 }, { "epoch": 1.8710662594608949, "grad_norm": 1.2395291577625112, "learning_rate": 2.277471654518959e-07, "loss": 0.24517378211021423, "step": 7046 }, { "epoch": 1.8713318284424378, "grad_norm": 1.3048847468612028, "learning_rate": 2.2681636733609457e-07, "loss": 0.19115275144577026, "step": 7047 }, { "epoch": 1.8715973974239808, "grad_norm": 1.2997607659373802, "learning_rate": 2.2588745337483454e-07, "loss": 0.26092633605003357, "step": 7048 }, { "epoch": 1.8718629664055237, "grad_norm": 1.2646212726473884, "learning_rate": 2.2496042374719807e-07, "loss": 0.18862302601337433, "step": 7049 }, { "epoch": 1.8721285353870667, "grad_norm": 1.1602330038245767, "learning_rate": 2.2403527863190554e-07, "loss": 0.20728996396064758, "step": 7050 }, { "epoch": 1.8723941043686096, "grad_norm": 1.236025812615254, "learning_rate": 2.231120182073143e-07, "loss": 0.24244122207164764, "step": 7051 }, { "epoch": 1.8726596733501526, "grad_norm": 1.205655043915546, "learning_rate": 2.2219064265141866e-07, "loss": 0.18956953287124634, "step": 7052 }, { "epoch": 1.8729252423316956, "grad_norm": 1.1159089015267554, "learning_rate": 2.2127115214184868e-07, "loss": 0.19873176515102386, "step": 7053 }, { "epoch": 1.8731908113132385, "grad_norm": 1.2896839736015335, "learning_rate": 2.203535468558704e-07, "loss": 0.23717360198497772, "step": 7054 }, { "epoch": 1.8734563802947815, "grad_norm": 1.3203924338573048, "learning_rate": 2.1943782697038896e-07, "loss": 0.24051904678344727, "step": 7055 }, { "epoch": 1.8737219492763244, "grad_norm": 1.3193670550613668, "learning_rate": 2.1852399266194312e-07, "loss": 0.23541691899299622, "step": 7056 }, { "epoch": 1.8739875182578674, "grad_norm": 1.3395958296451687, "learning_rate": 2.1761204410671088e-07, "loss": 0.22566163539886475, "step": 7057 }, { "epoch": 1.8742530872394103, "grad_norm": 1.297432294479727, "learning_rate": 2.167019814805027e-07, "loss": 0.25771743059158325, "step": 7058 }, { "epoch": 1.8745186562209533, "grad_norm": 1.1482951648622821, "learning_rate": 2.1579380495876934e-07, "loss": 0.22624637186527252, "step": 7059 }, { "epoch": 1.8747842252024962, "grad_norm": 1.3036126318267591, "learning_rate": 2.148875147165963e-07, "loss": 0.24671627581119537, "step": 7060 }, { "epoch": 1.8750497941840392, "grad_norm": 1.1983704285109544, "learning_rate": 2.1398311092870605e-07, "loss": 0.21607278287410736, "step": 7061 }, { "epoch": 1.8753153631655821, "grad_norm": 1.1102939736369823, "learning_rate": 2.1308059376945689e-07, "loss": 0.1960655301809311, "step": 7062 }, { "epoch": 1.875580932147125, "grad_norm": 1.2816228458436618, "learning_rate": 2.1217996341284297e-07, "loss": 0.22005721926689148, "step": 7063 }, { "epoch": 1.875846501128668, "grad_norm": 1.2746284533707484, "learning_rate": 2.1128122003249541e-07, "loss": 0.21442776918411255, "step": 7064 }, { "epoch": 1.876112070110211, "grad_norm": 1.1849768238897622, "learning_rate": 2.1038436380168114e-07, "loss": 0.23126785457134247, "step": 7065 }, { "epoch": 1.876377639091754, "grad_norm": 1.4246070766583077, "learning_rate": 2.094893948933041e-07, "loss": 0.24286629259586334, "step": 7066 }, { "epoch": 1.8766432080732969, "grad_norm": 1.3706445020134141, "learning_rate": 2.0859631347990406e-07, "loss": 0.25771957635879517, "step": 7067 }, { "epoch": 1.87690877705484, "grad_norm": 1.1754559873110961, "learning_rate": 2.0770511973365436e-07, "loss": 0.19837790727615356, "step": 7068 }, { "epoch": 1.877174346036383, "grad_norm": 1.2372359407501599, "learning_rate": 2.0681581382636984e-07, "loss": 0.21209359169006348, "step": 7069 }, { "epoch": 1.877439915017926, "grad_norm": 1.9178204608286211, "learning_rate": 2.0592839592949554e-07, "loss": 0.26641422510147095, "step": 7070 }, { "epoch": 1.877705483999469, "grad_norm": 1.3604176831947503, "learning_rate": 2.050428662141146e-07, "loss": 0.21609601378440857, "step": 7071 }, { "epoch": 1.8779710529810119, "grad_norm": 1.2861845280896875, "learning_rate": 2.0415922485095051e-07, "loss": 0.23642000555992126, "step": 7072 }, { "epoch": 1.8782366219625548, "grad_norm": 1.3854568667341272, "learning_rate": 2.0327747201035587e-07, "loss": 0.24564675986766815, "step": 7073 }, { "epoch": 1.8785021909440978, "grad_norm": 1.229212126818568, "learning_rate": 2.0239760786232355e-07, "loss": 0.20001479983329773, "step": 7074 }, { "epoch": 1.8787677599256407, "grad_norm": 1.2817747323253132, "learning_rate": 2.015196325764801e-07, "loss": 0.2590208649635315, "step": 7075 }, { "epoch": 1.8790333289071837, "grad_norm": 1.2462050168824985, "learning_rate": 2.0064354632208904e-07, "loss": 0.23298504948616028, "step": 7076 }, { "epoch": 1.8792988978887266, "grad_norm": 1.2573573484068483, "learning_rate": 1.997693492680497e-07, "loss": 0.22409996390342712, "step": 7077 }, { "epoch": 1.8795644668702696, "grad_norm": 1.410723892029772, "learning_rate": 1.9889704158289724e-07, "loss": 0.27316784858703613, "step": 7078 }, { "epoch": 1.8798300358518125, "grad_norm": 1.2924796650338854, "learning_rate": 1.980266234348016e-07, "loss": 0.2271946519613266, "step": 7079 }, { "epoch": 1.8800956048333555, "grad_norm": 1.2438429761767338, "learning_rate": 1.9715809499156858e-07, "loss": 0.20887964963912964, "step": 7080 }, { "epoch": 1.8803611738148984, "grad_norm": 1.2112268618082698, "learning_rate": 1.9629145642064197e-07, "loss": 0.23468685150146484, "step": 7081 }, { "epoch": 1.8806267427964414, "grad_norm": 1.308865144497765, "learning_rate": 1.9542670788909813e-07, "loss": 0.21624556183815002, "step": 7082 }, { "epoch": 1.8808923117779843, "grad_norm": 1.1751415989571612, "learning_rate": 1.9456384956365149e-07, "loss": 0.22328166663646698, "step": 7083 }, { "epoch": 1.8811578807595273, "grad_norm": 1.3508603820961609, "learning_rate": 1.93702881610649e-07, "loss": 0.2526431381702423, "step": 7084 }, { "epoch": 1.8814234497410702, "grad_norm": 1.3562256445660688, "learning_rate": 1.9284380419607784e-07, "loss": 0.23668771982192993, "step": 7085 }, { "epoch": 1.8816890187226132, "grad_norm": 1.2668189225170288, "learning_rate": 1.9198661748555557e-07, "loss": 0.24710845947265625, "step": 7086 }, { "epoch": 1.8819545877041561, "grad_norm": 1.4047256701053605, "learning_rate": 1.911313216443389e-07, "loss": 0.22696900367736816, "step": 7087 }, { "epoch": 1.882220156685699, "grad_norm": 1.3717447863189725, "learning_rate": 1.9027791683731922e-07, "loss": 0.21652163565158844, "step": 7088 }, { "epoch": 1.882485725667242, "grad_norm": 1.3189608691767827, "learning_rate": 1.894264032290205e-07, "loss": 0.2166716307401657, "step": 7089 }, { "epoch": 1.882751294648785, "grad_norm": 1.3746931913110367, "learning_rate": 1.8857678098360698e-07, "loss": 0.26200050115585327, "step": 7090 }, { "epoch": 1.883016863630328, "grad_norm": 1.2945644704190118, "learning_rate": 1.8772905026487654e-07, "loss": 0.2292764037847519, "step": 7091 }, { "epoch": 1.883282432611871, "grad_norm": 1.3106590918741248, "learning_rate": 1.8688321123625842e-07, "loss": 0.23893016576766968, "step": 7092 }, { "epoch": 1.8835480015934138, "grad_norm": 1.2241030970764724, "learning_rate": 1.860392640608244e-07, "loss": 0.2509230673313141, "step": 7093 }, { "epoch": 1.8838135705749568, "grad_norm": 1.2218686374923997, "learning_rate": 1.8519720890127434e-07, "loss": 0.24156486988067627, "step": 7094 }, { "epoch": 1.8840791395564997, "grad_norm": 1.2859122561460798, "learning_rate": 1.843570459199462e-07, "loss": 0.2120019942522049, "step": 7095 }, { "epoch": 1.884344708538043, "grad_norm": 1.6579646138710773, "learning_rate": 1.835187752788159e-07, "loss": 0.23400259017944336, "step": 7096 }, { "epoch": 1.8846102775195859, "grad_norm": 1.281132346942695, "learning_rate": 1.8268239713949087e-07, "loss": 0.20913103222846985, "step": 7097 }, { "epoch": 1.8848758465011288, "grad_norm": 1.3381319381686223, "learning_rate": 1.8184791166321546e-07, "loss": 0.24468877911567688, "step": 7098 }, { "epoch": 1.8851414154826718, "grad_norm": 1.236616212709848, "learning_rate": 1.8101531901086767e-07, "loss": 0.2038918137550354, "step": 7099 }, { "epoch": 1.8854069844642147, "grad_norm": 1.3201086548941574, "learning_rate": 1.8018461934296239e-07, "loss": 0.24191413819789886, "step": 7100 }, { "epoch": 1.8856725534457577, "grad_norm": 1.277539269643606, "learning_rate": 1.793558128196493e-07, "loss": 0.24394474923610687, "step": 7101 }, { "epoch": 1.8859381224273006, "grad_norm": 1.1561225023553612, "learning_rate": 1.7852889960071063e-07, "loss": 0.22630709409713745, "step": 7102 }, { "epoch": 1.8862036914088436, "grad_norm": 1.5472360212555962, "learning_rate": 1.7770387984556768e-07, "loss": 0.23936980962753296, "step": 7103 }, { "epoch": 1.8864692603903865, "grad_norm": 1.275471897769737, "learning_rate": 1.768807537132733e-07, "loss": 0.24808618426322937, "step": 7104 }, { "epoch": 1.8867348293719295, "grad_norm": 1.273035999339445, "learning_rate": 1.7605952136251603e-07, "loss": 0.23934635519981384, "step": 7105 }, { "epoch": 1.8870003983534724, "grad_norm": 1.189686791776393, "learning_rate": 1.7524018295162148e-07, "loss": 0.22107656300067902, "step": 7106 }, { "epoch": 1.8872659673350154, "grad_norm": 1.3496800848037154, "learning_rate": 1.7442273863854553e-07, "loss": 0.23253028094768524, "step": 7107 }, { "epoch": 1.8875315363165583, "grad_norm": 1.3028365552765204, "learning_rate": 1.7360718858088542e-07, "loss": 0.2501102387905121, "step": 7108 }, { "epoch": 1.8877971052981013, "grad_norm": 1.4057988238229884, "learning_rate": 1.7279353293586765e-07, "loss": 0.25537967681884766, "step": 7109 }, { "epoch": 1.8880626742796442, "grad_norm": 2.7876746143917033, "learning_rate": 1.7198177186035447e-07, "loss": 0.25701045989990234, "step": 7110 }, { "epoch": 1.8883282432611872, "grad_norm": 1.1447271563365653, "learning_rate": 1.7117190551084628e-07, "loss": 0.2109440565109253, "step": 7111 }, { "epoch": 1.8885938122427302, "grad_norm": 1.2454061070152636, "learning_rate": 1.7036393404347373e-07, "loss": 0.22767721116542816, "step": 7112 }, { "epoch": 1.888859381224273, "grad_norm": 1.1572937395529788, "learning_rate": 1.6955785761400444e-07, "loss": 0.1976814568042755, "step": 7113 }, { "epoch": 1.889124950205816, "grad_norm": 1.1727224852039306, "learning_rate": 1.687536763778419e-07, "loss": 0.21109873056411743, "step": 7114 }, { "epoch": 1.889390519187359, "grad_norm": 1.1916227822459606, "learning_rate": 1.6795139049002095e-07, "loss": 0.2165786623954773, "step": 7115 }, { "epoch": 1.889656088168902, "grad_norm": 1.2917556149315792, "learning_rate": 1.6715100010521347e-07, "loss": 0.23962441086769104, "step": 7116 }, { "epoch": 1.889921657150445, "grad_norm": 1.2423009900583697, "learning_rate": 1.6635250537772596e-07, "loss": 0.23351140320301056, "step": 7117 }, { "epoch": 1.8901872261319879, "grad_norm": 1.3034348272306633, "learning_rate": 1.6555590646149866e-07, "loss": 0.19999945163726807, "step": 7118 }, { "epoch": 1.8904527951135308, "grad_norm": 1.432201467842623, "learning_rate": 1.647612035101054e-07, "loss": 0.27142196893692017, "step": 7119 }, { "epoch": 1.8907183640950738, "grad_norm": 1.2861780172834696, "learning_rate": 1.6396839667675691e-07, "loss": 0.21525685489177704, "step": 7120 }, { "epoch": 1.8909839330766167, "grad_norm": 3.2062699859400396, "learning_rate": 1.631774861142965e-07, "loss": 0.24305005371570587, "step": 7121 }, { "epoch": 1.8912495020581597, "grad_norm": 1.2019998279555377, "learning_rate": 1.6238847197520113e-07, "loss": 0.23202842473983765, "step": 7122 }, { "epoch": 1.8915150710397026, "grad_norm": 1.4409003412080332, "learning_rate": 1.6160135441158576e-07, "loss": 0.24373790621757507, "step": 7123 }, { "epoch": 1.8917806400212456, "grad_norm": 1.2360359431057044, "learning_rate": 1.6081613357519565e-07, "loss": 0.22774222493171692, "step": 7124 }, { "epoch": 1.8920462090027885, "grad_norm": 1.2064368847282083, "learning_rate": 1.6003280961741196e-07, "loss": 0.20660057663917542, "step": 7125 }, { "epoch": 1.8923117779843315, "grad_norm": 1.3070998228758686, "learning_rate": 1.5925138268925166e-07, "loss": 0.23578912019729614, "step": 7126 }, { "epoch": 1.8925773469658744, "grad_norm": 1.2737250152668298, "learning_rate": 1.5847185294136313e-07, "loss": 0.20852091908454895, "step": 7127 }, { "epoch": 1.8928429159474174, "grad_norm": 1.1465883719364975, "learning_rate": 1.5769422052403172e-07, "loss": 0.17455898225307465, "step": 7128 }, { "epoch": 1.8931084849289603, "grad_norm": 1.5036497092390075, "learning_rate": 1.5691848558717638e-07, "loss": 0.29552748799324036, "step": 7129 }, { "epoch": 1.8933740539105033, "grad_norm": 1.3009458238394367, "learning_rate": 1.5614464828034746e-07, "loss": 0.22972649335861206, "step": 7130 }, { "epoch": 1.8936396228920462, "grad_norm": 1.2296689152648304, "learning_rate": 1.5537270875273348e-07, "loss": 0.2134108692407608, "step": 7131 }, { "epoch": 1.8939051918735892, "grad_norm": 1.4119584533896288, "learning_rate": 1.546026671531542e-07, "loss": 0.24145451188087463, "step": 7132 }, { "epoch": 1.8941707608551321, "grad_norm": 1.355860353407812, "learning_rate": 1.5383452363006534e-07, "loss": 0.2323920726776123, "step": 7133 }, { "epoch": 1.894436329836675, "grad_norm": 1.197617700552455, "learning_rate": 1.5306827833155403e-07, "loss": 0.20091015100479126, "step": 7134 }, { "epoch": 1.894701898818218, "grad_norm": 1.370489911603159, "learning_rate": 1.523039314053465e-07, "loss": 0.2451317310333252, "step": 7135 }, { "epoch": 1.894967467799761, "grad_norm": 1.2946538259097045, "learning_rate": 1.5154148299879822e-07, "loss": 0.22744594514369965, "step": 7136 }, { "epoch": 1.895233036781304, "grad_norm": 1.2046527835430252, "learning_rate": 1.5078093325889943e-07, "loss": 0.2460673749446869, "step": 7137 }, { "epoch": 1.895498605762847, "grad_norm": 1.4172423595206858, "learning_rate": 1.5002228233227722e-07, "loss": 0.2524537444114685, "step": 7138 }, { "epoch": 1.8957641747443899, "grad_norm": 1.1840127480017744, "learning_rate": 1.4926553036518798e-07, "loss": 0.2056279480457306, "step": 7139 }, { "epoch": 1.8960297437259328, "grad_norm": 1.2144930845419581, "learning_rate": 1.485106775035261e-07, "loss": 0.2656184732913971, "step": 7140 }, { "epoch": 1.8962953127074758, "grad_norm": 1.1903286988332102, "learning_rate": 1.477577238928185e-07, "loss": 0.2190116047859192, "step": 7141 }, { "epoch": 1.8965608816890187, "grad_norm": 1.206151177902952, "learning_rate": 1.4700666967822574e-07, "loss": 0.22984017431735992, "step": 7142 }, { "epoch": 1.8968264506705617, "grad_norm": 1.1949819121682481, "learning_rate": 1.462575150045409e-07, "loss": 0.17947378754615784, "step": 7143 }, { "epoch": 1.8970920196521046, "grad_norm": 1.2649423314993642, "learning_rate": 1.4551026001619395e-07, "loss": 0.24965715408325195, "step": 7144 }, { "epoch": 1.8973575886336476, "grad_norm": 1.236302993447548, "learning_rate": 1.4476490485724526e-07, "loss": 0.2337307333946228, "step": 7145 }, { "epoch": 1.8976231576151905, "grad_norm": 1.2205039464348546, "learning_rate": 1.4402144967139098e-07, "loss": 0.22668538987636566, "step": 7146 }, { "epoch": 1.8978887265967335, "grad_norm": 1.350785859399433, "learning_rate": 1.4327989460196091e-07, "loss": 0.21934781968593597, "step": 7147 }, { "epoch": 1.8981542955782764, "grad_norm": 1.2212959594670445, "learning_rate": 1.4254023979191844e-07, "loss": 0.1957930624485016, "step": 7148 }, { "epoch": 1.8984198645598194, "grad_norm": 1.1724780894008597, "learning_rate": 1.4180248538385956e-07, "loss": 0.22351369261741638, "step": 7149 }, { "epoch": 1.8986854335413623, "grad_norm": 1.3930947329130605, "learning_rate": 1.4106663152001487e-07, "loss": 0.2603265047073364, "step": 7150 }, { "epoch": 1.8989510025229053, "grad_norm": 1.260479860356455, "learning_rate": 1.4033267834224873e-07, "loss": 0.2566663324832916, "step": 7151 }, { "epoch": 1.8992165715044482, "grad_norm": 1.2799319314175146, "learning_rate": 1.3960062599205682e-07, "loss": 0.23130206763744354, "step": 7152 }, { "epoch": 1.8994821404859912, "grad_norm": 1.1757231252562024, "learning_rate": 1.3887047461057179e-07, "loss": 0.17946425080299377, "step": 7153 }, { "epoch": 1.8997477094675341, "grad_norm": 1.2434099546308155, "learning_rate": 1.3814222433855884e-07, "loss": 0.23946328461170197, "step": 7154 }, { "epoch": 1.900013278449077, "grad_norm": 1.2249367291717066, "learning_rate": 1.3741587531641566e-07, "loss": 0.21002715826034546, "step": 7155 }, { "epoch": 1.90027884743062, "grad_norm": 1.3062374823275615, "learning_rate": 1.3669142768417242e-07, "loss": 0.2121986746788025, "step": 7156 }, { "epoch": 1.900544416412163, "grad_norm": 1.373871289837254, "learning_rate": 1.3596888158149525e-07, "loss": 0.26400670409202576, "step": 7157 }, { "epoch": 1.900809985393706, "grad_norm": 1.1813353744292436, "learning_rate": 1.3524823714768375e-07, "loss": 0.18764406442642212, "step": 7158 }, { "epoch": 1.9010755543752489, "grad_norm": 1.415975931925435, "learning_rate": 1.3452949452166686e-07, "loss": 0.2550342381000519, "step": 7159 }, { "epoch": 1.9013411233567918, "grad_norm": 1.304366194966887, "learning_rate": 1.3381265384201035e-07, "loss": 0.23188576102256775, "step": 7160 }, { "epoch": 1.9016066923383348, "grad_norm": 1.2473914592639561, "learning_rate": 1.3309771524691372e-07, "loss": 0.23124513030052185, "step": 7161 }, { "epoch": 1.9018722613198777, "grad_norm": 1.2056745011797427, "learning_rate": 1.323846788742078e-07, "loss": 0.19941067695617676, "step": 7162 }, { "epoch": 1.9021378303014207, "grad_norm": 1.4624998875104938, "learning_rate": 1.316735448613593e-07, "loss": 0.22510412335395813, "step": 7163 }, { "epoch": 1.9024033992829636, "grad_norm": 1.2448961229015743, "learning_rate": 1.309643133454641e-07, "loss": 0.19102326035499573, "step": 7164 }, { "epoch": 1.9026689682645066, "grad_norm": 1.2307397875458914, "learning_rate": 1.3025698446325618e-07, "loss": 0.20826731622219086, "step": 7165 }, { "epoch": 1.9029345372460496, "grad_norm": 1.3483240422328144, "learning_rate": 1.2955155835109757e-07, "loss": 0.23238909244537354, "step": 7166 }, { "epoch": 1.9032001062275925, "grad_norm": 1.4338552298496805, "learning_rate": 1.2884803514498833e-07, "loss": 0.2635011374950409, "step": 7167 }, { "epoch": 1.9034656752091355, "grad_norm": 1.1745725675637841, "learning_rate": 1.281464149805578e-07, "loss": 0.2073322981595993, "step": 7168 }, { "epoch": 1.9037312441906784, "grad_norm": 1.2344038568124596, "learning_rate": 1.274466979930711e-07, "loss": 0.22091326117515564, "step": 7169 }, { "epoch": 1.9039968131722214, "grad_norm": 1.114689842836081, "learning_rate": 1.2674888431742472e-07, "loss": 0.18613001704216003, "step": 7170 }, { "epoch": 1.9042623821537643, "grad_norm": 1.2788383965135535, "learning_rate": 1.2605297408814887e-07, "loss": 0.2165849655866623, "step": 7171 }, { "epoch": 1.9045279511353073, "grad_norm": 1.294203512401496, "learning_rate": 1.2535896743940844e-07, "loss": 0.21317794919013977, "step": 7172 }, { "epoch": 1.9047935201168502, "grad_norm": 1.47127212987638, "learning_rate": 1.2466686450499866e-07, "loss": 0.25221073627471924, "step": 7173 }, { "epoch": 1.9050590890983932, "grad_norm": 1.2647474973058104, "learning_rate": 1.239766654183472e-07, "loss": 0.21598559617996216, "step": 7174 }, { "epoch": 1.9053246580799361, "grad_norm": 1.2635227030316536, "learning_rate": 1.232883703125187e-07, "loss": 0.2284495085477829, "step": 7175 }, { "epoch": 1.905590227061479, "grad_norm": 1.1825527167306378, "learning_rate": 1.2260197932020713e-07, "loss": 0.21899332106113434, "step": 7176 }, { "epoch": 1.905855796043022, "grad_norm": 1.3588902485974734, "learning_rate": 1.2191749257374097e-07, "loss": 0.2633277177810669, "step": 7177 }, { "epoch": 1.906121365024565, "grad_norm": 1.2643904365611611, "learning_rate": 1.2123491020508137e-07, "loss": 0.2330140471458435, "step": 7178 }, { "epoch": 1.906386934006108, "grad_norm": 1.2757939155257039, "learning_rate": 1.2055423234582087e-07, "loss": 0.21859750151634216, "step": 7179 }, { "epoch": 1.9066525029876509, "grad_norm": 1.3985563606047093, "learning_rate": 1.198754591271878e-07, "loss": 0.252164363861084, "step": 7180 }, { "epoch": 1.906918071969194, "grad_norm": 1.4365501399575176, "learning_rate": 1.191985906800408e-07, "loss": 0.24968160688877106, "step": 7181 }, { "epoch": 1.907183640950737, "grad_norm": 1.199067091736319, "learning_rate": 1.185236271348722e-07, "loss": 0.2083423137664795, "step": 7182 }, { "epoch": 1.90744920993228, "grad_norm": 1.258208503364781, "learning_rate": 1.1785056862180789e-07, "loss": 0.2468394935131073, "step": 7183 }, { "epoch": 1.907714778913823, "grad_norm": 1.2908738922715033, "learning_rate": 1.1717941527060405e-07, "loss": 0.22417521476745605, "step": 7184 }, { "epoch": 1.9079803478953659, "grad_norm": 1.2789853859840312, "learning_rate": 1.1651016721065167e-07, "loss": 0.2411842793226242, "step": 7185 }, { "epoch": 1.9082459168769088, "grad_norm": 1.311967953603668, "learning_rate": 1.1584282457097417e-07, "loss": 0.24650761485099792, "step": 7186 }, { "epoch": 1.9085114858584518, "grad_norm": 1.3305923315328496, "learning_rate": 1.1517738748022755e-07, "loss": 0.22433717548847198, "step": 7187 }, { "epoch": 1.9087770548399947, "grad_norm": 1.2666444248015347, "learning_rate": 1.145138560667003e-07, "loss": 0.20867910981178284, "step": 7188 }, { "epoch": 1.9090426238215377, "grad_norm": 1.2511449541105855, "learning_rate": 1.138522304583134e-07, "loss": 0.21889618039131165, "step": 7189 }, { "epoch": 1.9093081928030806, "grad_norm": 1.113107479716362, "learning_rate": 1.1319251078261928e-07, "loss": 0.19350749254226685, "step": 7190 }, { "epoch": 1.9095737617846236, "grad_norm": 1.183265546980091, "learning_rate": 1.125346971668051e-07, "loss": 0.19123657047748566, "step": 7191 }, { "epoch": 1.9098393307661665, "grad_norm": 1.2653223306994201, "learning_rate": 1.118787897376905e-07, "loss": 0.21433782577514648, "step": 7192 }, { "epoch": 1.9101048997477095, "grad_norm": 1.474925382041675, "learning_rate": 1.1122478862172437e-07, "loss": 0.2521187663078308, "step": 7193 }, { "epoch": 1.9103704687292524, "grad_norm": 1.2835872924926361, "learning_rate": 1.1057269394499248e-07, "loss": 0.2141486555337906, "step": 7194 }, { "epoch": 1.9106360377107954, "grad_norm": 1.271472683987379, "learning_rate": 1.0992250583320985e-07, "loss": 0.22960343956947327, "step": 7195 }, { "epoch": 1.9109016066923383, "grad_norm": 1.3433609684783299, "learning_rate": 1.092742244117262e-07, "loss": 0.21809744834899902, "step": 7196 }, { "epoch": 1.9111671756738813, "grad_norm": 1.248347973820862, "learning_rate": 1.0862784980552044e-07, "loss": 0.22418212890625, "step": 7197 }, { "epoch": 1.9114327446554242, "grad_norm": 1.2504701200893746, "learning_rate": 1.0798338213920845e-07, "loss": 0.22050701081752777, "step": 7198 }, { "epoch": 1.9116983136369672, "grad_norm": 1.206849931438756, "learning_rate": 1.0734082153703418e-07, "loss": 0.23200345039367676, "step": 7199 }, { "epoch": 1.9119638826185101, "grad_norm": 1.1102825382626649, "learning_rate": 1.0670016812287631e-07, "loss": 0.18366631865501404, "step": 7200 }, { "epoch": 1.912229451600053, "grad_norm": 1.2844567521026582, "learning_rate": 1.0606142202024605e-07, "loss": 0.24362193048000336, "step": 7201 }, { "epoch": 1.912495020581596, "grad_norm": 1.2822631921528913, "learning_rate": 1.0542458335228601e-07, "loss": 0.2216200977563858, "step": 7202 }, { "epoch": 1.912760589563139, "grad_norm": 1.0921875359661608, "learning_rate": 1.0478965224176907e-07, "loss": 0.20216065645217896, "step": 7203 }, { "epoch": 1.913026158544682, "grad_norm": 1.254966671592246, "learning_rate": 1.041566288111051e-07, "loss": 0.22054359316825867, "step": 7204 }, { "epoch": 1.913291727526225, "grad_norm": 1.3532366246655447, "learning_rate": 1.0352551318233206e-07, "loss": 0.21569015085697174, "step": 7205 }, { "epoch": 1.9135572965077678, "grad_norm": 1.2826756039782425, "learning_rate": 1.028963054771226e-07, "loss": 0.22967267036437988, "step": 7206 }, { "epoch": 1.9138228654893108, "grad_norm": 1.3494789006319945, "learning_rate": 1.0226900581677968e-07, "loss": 0.2422460913658142, "step": 7207 }, { "epoch": 1.9140884344708538, "grad_norm": 1.3606228589652338, "learning_rate": 1.0164361432223879e-07, "loss": 0.25891292095184326, "step": 7208 }, { "epoch": 1.914354003452397, "grad_norm": 1.3570561855059022, "learning_rate": 1.0102013111406905e-07, "loss": 0.26915764808654785, "step": 7209 }, { "epoch": 1.9146195724339399, "grad_norm": 1.3889996377213247, "learning_rate": 1.0039855631247097e-07, "loss": 0.2268485426902771, "step": 7210 }, { "epoch": 1.9148851414154828, "grad_norm": 1.254622691077732, "learning_rate": 9.977889003727647e-08, "loss": 0.22551512718200684, "step": 7211 }, { "epoch": 1.9151507103970258, "grad_norm": 1.233084698895248, "learning_rate": 9.91611324079489e-08, "loss": 0.24224743247032166, "step": 7212 }, { "epoch": 1.9154162793785687, "grad_norm": 1.2426176239380708, "learning_rate": 9.854528354358517e-08, "loss": 0.19550879299640656, "step": 7213 }, { "epoch": 1.9156818483601117, "grad_norm": 1.3449782320604147, "learning_rate": 9.793134356291478e-08, "loss": 0.24986523389816284, "step": 7214 }, { "epoch": 1.9159474173416546, "grad_norm": 1.3340583070384961, "learning_rate": 9.731931258429638e-08, "loss": 0.2565170228481293, "step": 7215 }, { "epoch": 1.9162129863231976, "grad_norm": 1.185156912642083, "learning_rate": 9.670919072572449e-08, "loss": 0.2166958749294281, "step": 7216 }, { "epoch": 1.9164785553047405, "grad_norm": 1.2903999319183896, "learning_rate": 9.610097810482166e-08, "loss": 0.2002115249633789, "step": 7217 }, { "epoch": 1.9167441242862835, "grad_norm": 1.1589813054229285, "learning_rate": 9.549467483884412e-08, "loss": 0.209486186504364, "step": 7218 }, { "epoch": 1.9170096932678264, "grad_norm": 1.2748483155423624, "learning_rate": 9.489028104468056e-08, "loss": 0.22061321139335632, "step": 7219 }, { "epoch": 1.9172752622493694, "grad_norm": 1.3916500275624957, "learning_rate": 9.428779683885114e-08, "loss": 0.21880047023296356, "step": 7220 }, { "epoch": 1.9175408312309123, "grad_norm": 1.174801358834737, "learning_rate": 9.368722233750849e-08, "loss": 0.22674325108528137, "step": 7221 }, { "epoch": 1.9178064002124553, "grad_norm": 1.2877078963500264, "learning_rate": 9.308855765643332e-08, "loss": 0.22100718319416046, "step": 7222 }, { "epoch": 1.9180719691939982, "grad_norm": 1.3291196619762962, "learning_rate": 9.249180291104553e-08, "loss": 0.23105254769325256, "step": 7223 }, { "epoch": 1.9183375381755412, "grad_norm": 1.2897395451200044, "learning_rate": 9.189695821638755e-08, "loss": 0.22483405470848083, "step": 7224 }, { "epoch": 1.9186031071570842, "grad_norm": 1.0701399001286365, "learning_rate": 9.130402368714208e-08, "loss": 0.1939004510641098, "step": 7225 }, { "epoch": 1.918868676138627, "grad_norm": 1.2349263677236755, "learning_rate": 9.071299943761769e-08, "loss": 0.21722440421581268, "step": 7226 }, { "epoch": 1.91913424512017, "grad_norm": 1.2911544131515666, "learning_rate": 9.012388558175877e-08, "loss": 0.24213966727256775, "step": 7227 }, { "epoch": 1.919399814101713, "grad_norm": 1.2266941536480729, "learning_rate": 8.953668223313783e-08, "loss": 0.2305546998977661, "step": 7228 }, { "epoch": 1.919665383083256, "grad_norm": 1.3932840646040938, "learning_rate": 8.895138950496207e-08, "loss": 0.2678033709526062, "step": 7229 }, { "epoch": 1.919930952064799, "grad_norm": 1.2449965535251106, "learning_rate": 8.836800751006791e-08, "loss": 0.2491014301776886, "step": 7230 }, { "epoch": 1.9201965210463419, "grad_norm": 1.2551836576043742, "learning_rate": 8.778653636092537e-08, "loss": 0.21837326884269714, "step": 7231 }, { "epoch": 1.9204620900278848, "grad_norm": 1.2745391136427304, "learning_rate": 8.72069761696348e-08, "loss": 0.24149999022483826, "step": 7232 }, { "epoch": 1.9207276590094278, "grad_norm": 1.3444140835580012, "learning_rate": 8.662932704792793e-08, "loss": 0.2124684453010559, "step": 7233 }, { "epoch": 1.9209932279909707, "grad_norm": 1.3660213009765734, "learning_rate": 8.60535891071712e-08, "loss": 0.2452150285243988, "step": 7234 }, { "epoch": 1.9212587969725137, "grad_norm": 1.2005299446152509, "learning_rate": 8.547976245835698e-08, "loss": 0.23598846793174744, "step": 7235 }, { "epoch": 1.9215243659540566, "grad_norm": 1.3152974069295431, "learning_rate": 8.490784721211454e-08, "loss": 0.2105225920677185, "step": 7236 }, { "epoch": 1.9217899349355996, "grad_norm": 1.4424977304862223, "learning_rate": 8.433784347870122e-08, "loss": 0.2585388720035553, "step": 7237 }, { "epoch": 1.9220555039171425, "grad_norm": 1.2300698994172445, "learning_rate": 8.376975136800691e-08, "loss": 0.21703900396823883, "step": 7238 }, { "epoch": 1.9223210728986855, "grad_norm": 1.2580366958382383, "learning_rate": 8.3203570989554e-08, "loss": 0.22771210968494415, "step": 7239 }, { "epoch": 1.9225866418802284, "grad_norm": 1.1645003525207898, "learning_rate": 8.263930245249408e-08, "loss": 0.22535575926303864, "step": 7240 }, { "epoch": 1.9228522108617714, "grad_norm": 1.1822452042500315, "learning_rate": 8.207694586561344e-08, "loss": 0.2052595466375351, "step": 7241 }, { "epoch": 1.9231177798433143, "grad_norm": 1.2683012213528768, "learning_rate": 8.151650133732536e-08, "loss": 0.19611456990242004, "step": 7242 }, { "epoch": 1.9233833488248573, "grad_norm": 1.2762939262923303, "learning_rate": 8.095796897567787e-08, "loss": 0.20256826281547546, "step": 7243 }, { "epoch": 1.9236489178064002, "grad_norm": 1.5444723931343434, "learning_rate": 8.040134888835038e-08, "loss": 0.25462138652801514, "step": 7244 }, { "epoch": 1.9239144867879432, "grad_norm": 1.2813246309729553, "learning_rate": 7.984664118265262e-08, "loss": 0.27362316846847534, "step": 7245 }, { "epoch": 1.9241800557694861, "grad_norm": 1.3526739723939418, "learning_rate": 7.929384596552459e-08, "loss": 0.23749098181724548, "step": 7246 }, { "epoch": 1.924445624751029, "grad_norm": 1.3016147885306604, "learning_rate": 7.874296334353882e-08, "loss": 0.2472018599510193, "step": 7247 }, { "epoch": 1.924711193732572, "grad_norm": 1.3451463766339227, "learning_rate": 7.819399342290034e-08, "loss": 0.23181989789009094, "step": 7248 }, { "epoch": 1.924976762714115, "grad_norm": 1.2415200588572097, "learning_rate": 7.764693630944231e-08, "loss": 0.21363665163516998, "step": 7249 }, { "epoch": 1.925242331695658, "grad_norm": 1.1849821155034532, "learning_rate": 7.710179210863144e-08, "loss": 0.21239221096038818, "step": 7250 }, { "epoch": 1.925507900677201, "grad_norm": 1.4494720585200522, "learning_rate": 7.655856092556591e-08, "loss": 0.2643742263317108, "step": 7251 }, { "epoch": 1.9257734696587439, "grad_norm": 1.251877664981762, "learning_rate": 7.601724286497414e-08, "loss": 0.2232428789138794, "step": 7252 }, { "epoch": 1.9260390386402868, "grad_norm": 1.313277386530887, "learning_rate": 7.547783803121489e-08, "loss": 0.2052377462387085, "step": 7253 }, { "epoch": 1.9263046076218298, "grad_norm": 1.2540878413614547, "learning_rate": 7.494034652827942e-08, "loss": 0.22194740176200867, "step": 7254 }, { "epoch": 1.9265701766033727, "grad_norm": 1.2500554609811554, "learning_rate": 7.440476845979038e-08, "loss": 0.22004084289073944, "step": 7255 }, { "epoch": 1.9268357455849157, "grad_norm": 1.5480704193409933, "learning_rate": 7.387110392899965e-08, "loss": 0.2218078374862671, "step": 7256 }, { "epoch": 1.9271013145664586, "grad_norm": 1.3006193889830067, "learning_rate": 7.33393530387927e-08, "loss": 0.23272839188575745, "step": 7257 }, { "epoch": 1.9273668835480016, "grad_norm": 1.3119971487868216, "learning_rate": 7.280951589168417e-08, "loss": 0.23666653037071228, "step": 7258 }, { "epoch": 1.9276324525295445, "grad_norm": 1.235294099691234, "learning_rate": 7.228159258982126e-08, "loss": 0.21946533024311066, "step": 7259 }, { "epoch": 1.9278980215110875, "grad_norm": 1.252328485116134, "learning_rate": 7.175558323498033e-08, "loss": 0.22158634662628174, "step": 7260 }, { "epoch": 1.9281635904926304, "grad_norm": 1.1330771135999202, "learning_rate": 7.123148792857026e-08, "loss": 0.19978654384613037, "step": 7261 }, { "epoch": 1.9284291594741734, "grad_norm": 1.2859436875650823, "learning_rate": 7.070930677163023e-08, "loss": 0.21197813749313354, "step": 7262 }, { "epoch": 1.9286947284557163, "grad_norm": 1.2611518825786316, "learning_rate": 7.018903986483083e-08, "loss": 0.22650468349456787, "step": 7263 }, { "epoch": 1.9289602974372593, "grad_norm": 1.2701948406662635, "learning_rate": 6.967068730847293e-08, "loss": 0.22257481515407562, "step": 7264 }, { "epoch": 1.9292258664188022, "grad_norm": 1.3219742856760701, "learning_rate": 6.915424920248992e-08, "loss": 0.24899804592132568, "step": 7265 }, { "epoch": 1.9294914354003452, "grad_norm": 1.2996576951077934, "learning_rate": 6.863972564644328e-08, "loss": 0.250610888004303, "step": 7266 }, { "epoch": 1.9297570043818881, "grad_norm": 1.251137163804366, "learning_rate": 6.81271167395292e-08, "loss": 0.22786292433738708, "step": 7267 }, { "epoch": 1.930022573363431, "grad_norm": 1.2890465128808872, "learning_rate": 6.761642258056977e-08, "loss": 0.22816789150238037, "step": 7268 }, { "epoch": 1.930288142344974, "grad_norm": 1.3522601458627446, "learning_rate": 6.7107643268024e-08, "loss": 0.2589687407016754, "step": 7269 }, { "epoch": 1.930553711326517, "grad_norm": 1.1963236616697677, "learning_rate": 6.660077889997673e-08, "loss": 0.2281583547592163, "step": 7270 }, { "epoch": 1.93081928030806, "grad_norm": 1.3347065729182181, "learning_rate": 6.60958295741454e-08, "loss": 0.22833740711212158, "step": 7271 }, { "epoch": 1.931084849289603, "grad_norm": 1.1611313283452582, "learning_rate": 6.559279538787877e-08, "loss": 0.20720313489437103, "step": 7272 }, { "epoch": 1.9313504182711458, "grad_norm": 1.1884544288263172, "learning_rate": 6.509167643815594e-08, "loss": 0.17191773653030396, "step": 7273 }, { "epoch": 1.9316159872526888, "grad_norm": 1.1354230474675757, "learning_rate": 6.459247282158632e-08, "loss": 0.23586943745613098, "step": 7274 }, { "epoch": 1.9318815562342317, "grad_norm": 1.3318856895013969, "learning_rate": 6.409518463441067e-08, "loss": 0.21353168785572052, "step": 7275 }, { "epoch": 1.9321471252157747, "grad_norm": 1.404937308132313, "learning_rate": 6.359981197250009e-08, "loss": 0.23148195445537567, "step": 7276 }, { "epoch": 1.9324126941973176, "grad_norm": 1.3040478141172254, "learning_rate": 6.310635493135709e-08, "loss": 0.2113666534423828, "step": 7277 }, { "epoch": 1.9326782631788606, "grad_norm": 1.3399999009479682, "learning_rate": 6.261481360611332e-08, "loss": 0.27689510583877563, "step": 7278 }, { "epoch": 1.9329438321604036, "grad_norm": 1.2809237898551964, "learning_rate": 6.2125188091533e-08, "loss": 0.23746277391910553, "step": 7279 }, { "epoch": 1.9332094011419465, "grad_norm": 1.4215326252349767, "learning_rate": 6.163747848201062e-08, "loss": 0.23123708367347717, "step": 7280 }, { "epoch": 1.9334749701234895, "grad_norm": 1.3095914464878196, "learning_rate": 6.115168487157097e-08, "loss": 0.23640167713165283, "step": 7281 }, { "epoch": 1.9337405391050324, "grad_norm": 1.3278235730632808, "learning_rate": 6.066780735386801e-08, "loss": 0.2259385585784912, "step": 7282 }, { "epoch": 1.9340061080865754, "grad_norm": 1.230137664492021, "learning_rate": 6.018584602218824e-08, "loss": 0.219761461019516, "step": 7283 }, { "epoch": 1.9342716770681183, "grad_norm": 1.43054331413576, "learning_rate": 5.970580096944733e-08, "loss": 0.24411989748477936, "step": 7284 }, { "epoch": 1.9345372460496613, "grad_norm": 1.196712051616964, "learning_rate": 5.922767228819459e-08, "loss": 0.232415571808815, "step": 7285 }, { "epoch": 1.9348028150312042, "grad_norm": 1.341424963494065, "learning_rate": 5.875146007060517e-08, "loss": 0.25938165187835693, "step": 7286 }, { "epoch": 1.9350683840127472, "grad_norm": 1.253589726996753, "learning_rate": 5.827716440848785e-08, "loss": 0.22138425707817078, "step": 7287 }, { "epoch": 1.9353339529942901, "grad_norm": 1.12038038288381, "learning_rate": 5.7804785393282825e-08, "loss": 0.19724398851394653, "step": 7288 }, { "epoch": 1.935599521975833, "grad_norm": 1.4840167690508577, "learning_rate": 5.7334323116056136e-08, "loss": 0.25307583808898926, "step": 7289 }, { "epoch": 1.935865090957376, "grad_norm": 1.2525903433235852, "learning_rate": 5.686577766751078e-08, "loss": 0.2436421811580658, "step": 7290 }, { "epoch": 1.936130659938919, "grad_norm": 1.2518328182394873, "learning_rate": 5.6399149137973394e-08, "loss": 0.2164984941482544, "step": 7291 }, { "epoch": 1.936396228920462, "grad_norm": 1.2277499731042363, "learning_rate": 5.5934437617407576e-08, "loss": 0.22526800632476807, "step": 7292 }, { "epoch": 1.936661797902005, "grad_norm": 2.195756796154145, "learning_rate": 5.547164319540277e-08, "loss": 0.27787747979164124, "step": 7293 }, { "epoch": 1.936927366883548, "grad_norm": 1.2647979578451993, "learning_rate": 5.5010765961179825e-08, "loss": 0.2188001275062561, "step": 7294 }, { "epoch": 1.937192935865091, "grad_norm": 1.2454775538056309, "learning_rate": 5.4551806003591e-08, "loss": 0.22620335221290588, "step": 7295 }, { "epoch": 1.937458504846634, "grad_norm": 1.186081247005514, "learning_rate": 5.409476341111775e-08, "loss": 0.20357783138751984, "step": 7296 }, { "epoch": 1.937724073828177, "grad_norm": 1.2316030990526627, "learning_rate": 5.3639638271872906e-08, "loss": 0.22717830538749695, "step": 7297 }, { "epoch": 1.9379896428097199, "grad_norm": 1.1600371116406252, "learning_rate": 5.318643067360074e-08, "loss": 0.20139163732528687, "step": 7298 }, { "epoch": 1.9382552117912628, "grad_norm": 1.3377291184643103, "learning_rate": 5.273514070367247e-08, "loss": 0.2620807886123657, "step": 7299 }, { "epoch": 1.9385207807728058, "grad_norm": 1.2240680803779018, "learning_rate": 5.2285768449091834e-08, "loss": 0.2102596014738083, "step": 7300 }, { "epoch": 1.9387863497543487, "grad_norm": 1.3057613284367482, "learning_rate": 5.183831399649175e-08, "loss": 0.2105238288640976, "step": 7301 }, { "epoch": 1.9390519187358917, "grad_norm": 1.2241670740951547, "learning_rate": 5.1392777432138773e-08, "loss": 0.22178848087787628, "step": 7302 }, { "epoch": 1.9393174877174346, "grad_norm": 1.3648564311332518, "learning_rate": 5.094915884192419e-08, "loss": 0.23375345766544342, "step": 7303 }, { "epoch": 1.9395830566989776, "grad_norm": 1.3411332724549108, "learning_rate": 5.050745831137405e-08, "loss": 0.22709332406520844, "step": 7304 }, { "epoch": 1.9398486256805205, "grad_norm": 1.270429998105922, "learning_rate": 5.0067675925642437e-08, "loss": 0.2312362790107727, "step": 7305 }, { "epoch": 1.9401141946620635, "grad_norm": 1.159162680689607, "learning_rate": 4.962981176951376e-08, "loss": 0.2014419138431549, "step": 7306 }, { "epoch": 1.9403797636436064, "grad_norm": 1.4294147842238243, "learning_rate": 4.9193865927404936e-08, "loss": 0.23700466752052307, "step": 7307 }, { "epoch": 1.9406453326251494, "grad_norm": 1.3814639969092575, "learning_rate": 4.8759838483358745e-08, "loss": 0.23362770676612854, "step": 7308 }, { "epoch": 1.9409109016066923, "grad_norm": 1.4217349736822034, "learning_rate": 4.832772952105269e-08, "loss": 0.26057323813438416, "step": 7309 }, { "epoch": 1.9411764705882353, "grad_norm": 1.1693504727058668, "learning_rate": 4.789753912379014e-08, "loss": 0.20954950153827667, "step": 7310 }, { "epoch": 1.9414420395697782, "grad_norm": 1.1532528532836688, "learning_rate": 4.746926737450919e-08, "loss": 0.2100827842950821, "step": 7311 }, { "epoch": 1.9417076085513212, "grad_norm": 1.2509560196931713, "learning_rate": 4.7042914355773795e-08, "loss": 0.216691792011261, "step": 7312 }, { "epoch": 1.9419731775328641, "grad_norm": 1.2086430330598397, "learning_rate": 4.6618480149780434e-08, "loss": 0.22815749049186707, "step": 7313 }, { "epoch": 1.942238746514407, "grad_norm": 1.3440658280324072, "learning_rate": 4.6195964838353646e-08, "loss": 0.23365731537342072, "step": 7314 }, { "epoch": 1.94250431549595, "grad_norm": 1.5301363693806977, "learning_rate": 4.577536850295161e-08, "loss": 0.2112172693014145, "step": 7315 }, { "epoch": 1.942769884477493, "grad_norm": 1.1945701714854287, "learning_rate": 4.5356691224659466e-08, "loss": 0.21821950376033783, "step": 7316 }, { "epoch": 1.943035453459036, "grad_norm": 1.1491339078592526, "learning_rate": 4.4939933084192646e-08, "loss": 0.2374412566423416, "step": 7317 }, { "epoch": 1.943301022440579, "grad_norm": 1.3549046355713708, "learning_rate": 4.4525094161897987e-08, "loss": 0.2483779489994049, "step": 7318 }, { "epoch": 1.9435665914221218, "grad_norm": 1.327945477663327, "learning_rate": 4.411217453775152e-08, "loss": 0.23641882836818695, "step": 7319 }, { "epoch": 1.9438321604036648, "grad_norm": 1.3586245026219714, "learning_rate": 4.370117429135956e-08, "loss": 0.24779492616653442, "step": 7320 }, { "epoch": 1.944097729385208, "grad_norm": 1.1641395539357577, "learning_rate": 4.329209350195651e-08, "loss": 0.20288071036338806, "step": 7321 }, { "epoch": 1.944363298366751, "grad_norm": 1.2676649817410126, "learning_rate": 4.288493224840928e-08, "loss": 0.24286144971847534, "step": 7322 }, { "epoch": 1.9446288673482939, "grad_norm": 1.3164985028745375, "learning_rate": 4.2479690609213976e-08, "loss": 0.22825902700424194, "step": 7323 }, { "epoch": 1.9448944363298368, "grad_norm": 1.255280762331411, "learning_rate": 4.207636866249587e-08, "loss": 0.22563335299491882, "step": 7324 }, { "epoch": 1.9451600053113798, "grad_norm": 1.2990544857906836, "learning_rate": 4.167496648601166e-08, "loss": 0.22853273153305054, "step": 7325 }, { "epoch": 1.9454255742929227, "grad_norm": 1.1281442356079434, "learning_rate": 4.1275484157147216e-08, "loss": 0.20790672302246094, "step": 7326 }, { "epoch": 1.9456911432744657, "grad_norm": 1.1980029703513235, "learning_rate": 4.087792175291649e-08, "loss": 0.2165423035621643, "step": 7327 }, { "epoch": 1.9459567122560086, "grad_norm": 1.3858946395294593, "learning_rate": 4.048227934996485e-08, "loss": 0.2605394721031189, "step": 7328 }, { "epoch": 1.9462222812375516, "grad_norm": 1.280554987273632, "learning_rate": 4.008855702456904e-08, "loss": 0.22624900937080383, "step": 7329 }, { "epoch": 1.9464878502190945, "grad_norm": 1.1967949808184344, "learning_rate": 3.9696754852632804e-08, "loss": 0.23086196184158325, "step": 7330 }, { "epoch": 1.9467534192006375, "grad_norm": 1.4330145211347993, "learning_rate": 3.9306872909691265e-08, "loss": 0.24633410573005676, "step": 7331 }, { "epoch": 1.9470189881821804, "grad_norm": 2.2568432653955894, "learning_rate": 3.8918911270908745e-08, "loss": 0.2535535395145416, "step": 7332 }, { "epoch": 1.9472845571637234, "grad_norm": 1.3555855555438505, "learning_rate": 3.853287001108097e-08, "loss": 0.23904260993003845, "step": 7333 }, { "epoch": 1.9475501261452663, "grad_norm": 1.3963340527453718, "learning_rate": 3.814874920463063e-08, "loss": 0.22525179386138916, "step": 7334 }, { "epoch": 1.9478156951268093, "grad_norm": 1.415360473918547, "learning_rate": 3.776654892561293e-08, "loss": 0.21139883995056152, "step": 7335 }, { "epoch": 1.9480812641083523, "grad_norm": 1.2272269269066283, "learning_rate": 3.738626924771005e-08, "loss": 0.21939310431480408, "step": 7336 }, { "epoch": 1.9483468330898952, "grad_norm": 1.1845473795192814, "learning_rate": 3.7007910244236664e-08, "loss": 0.22852283716201782, "step": 7337 }, { "epoch": 1.9486124020714382, "grad_norm": 1.2529721413425112, "learning_rate": 3.663147198813666e-08, "loss": 0.20769211649894714, "step": 7338 }, { "epoch": 1.948877971052981, "grad_norm": 1.216093250313145, "learning_rate": 3.625695455198086e-08, "loss": 0.21721890568733215, "step": 7339 }, { "epoch": 1.949143540034524, "grad_norm": 1.261493312403511, "learning_rate": 3.588435800797263e-08, "loss": 0.24236848950386047, "step": 7340 }, { "epoch": 1.949409109016067, "grad_norm": 1.21142050375974, "learning_rate": 3.5513682427944505e-08, "loss": 0.2300192266702652, "step": 7341 }, { "epoch": 1.94967467799761, "grad_norm": 1.1850825722481098, "learning_rate": 3.5144927883358215e-08, "loss": 0.21636728942394257, "step": 7342 }, { "epoch": 1.949940246979153, "grad_norm": 1.3000939007920165, "learning_rate": 3.477809444530578e-08, "loss": 0.25367966294288635, "step": 7343 }, { "epoch": 1.9502058159606959, "grad_norm": 1.4245768388392126, "learning_rate": 3.4413182184507285e-08, "loss": 0.24514247477054596, "step": 7344 }, { "epoch": 1.9504713849422388, "grad_norm": 1.1048557155163508, "learning_rate": 3.405019117131425e-08, "loss": 0.18460404872894287, "step": 7345 }, { "epoch": 1.9507369539237818, "grad_norm": 1.275062396510646, "learning_rate": 3.3689121475706244e-08, "loss": 0.2096845805644989, "step": 7346 }, { "epoch": 1.9510025229053247, "grad_norm": 1.2314050158221594, "learning_rate": 3.332997316729536e-08, "loss": 0.22435057163238525, "step": 7347 }, { "epoch": 1.9512680918868677, "grad_norm": 1.208912476805739, "learning_rate": 3.2972746315318436e-08, "loss": 0.20798128843307495, "step": 7348 }, { "epoch": 1.9515336608684106, "grad_norm": 1.2922181556866412, "learning_rate": 3.2617440988645945e-08, "loss": 0.23958316445350647, "step": 7349 }, { "epoch": 1.9517992298499536, "grad_norm": 1.3799363972113297, "learning_rate": 3.2264057255777525e-08, "loss": 0.21934574842453003, "step": 7350 }, { "epoch": 1.9520647988314965, "grad_norm": 1.2014453671941887, "learning_rate": 3.1912595184839804e-08, "loss": 0.24321375787258148, "step": 7351 }, { "epoch": 1.9523303678130395, "grad_norm": 1.1661737247347086, "learning_rate": 3.156305484359079e-08, "loss": 0.20932736992835999, "step": 7352 }, { "epoch": 1.9525959367945824, "grad_norm": 1.2983329607047998, "learning_rate": 3.12154362994177e-08, "loss": 0.19824840128421783, "step": 7353 }, { "epoch": 1.9528615057761254, "grad_norm": 1.3128795915591134, "learning_rate": 3.0869739619338034e-08, "loss": 0.212745800614357, "step": 7354 }, { "epoch": 1.9531270747576683, "grad_norm": 1.247129470001585, "learning_rate": 3.0525964869997374e-08, "loss": 0.23044779896736145, "step": 7355 }, { "epoch": 1.9533926437392113, "grad_norm": 1.2323689907378315, "learning_rate": 3.018411211767158e-08, "loss": 0.2237459123134613, "step": 7356 }, { "epoch": 1.9536582127207542, "grad_norm": 1.3228713238231502, "learning_rate": 2.984418142826684e-08, "loss": 0.2592429518699646, "step": 7357 }, { "epoch": 1.9539237817022972, "grad_norm": 1.1444806738907807, "learning_rate": 2.9506172867315163e-08, "loss": 0.17559123039245605, "step": 7358 }, { "epoch": 1.9541893506838401, "grad_norm": 1.287127142439038, "learning_rate": 2.917008649998332e-08, "loss": 0.24143017828464508, "step": 7359 }, { "epoch": 1.954454919665383, "grad_norm": 1.310526275865734, "learning_rate": 2.883592239106392e-08, "loss": 0.23560799658298492, "step": 7360 }, { "epoch": 1.954720488646926, "grad_norm": 1.357586181070064, "learning_rate": 2.8503680604979878e-08, "loss": 0.2456119805574417, "step": 7361 }, { "epoch": 1.954986057628469, "grad_norm": 1.2143945666113656, "learning_rate": 2.817336120578329e-08, "loss": 0.21878069639205933, "step": 7362 }, { "epoch": 1.955251626610012, "grad_norm": 1.2288786099560105, "learning_rate": 2.7844964257155438e-08, "loss": 0.20496608316898346, "step": 7363 }, { "epoch": 1.955517195591555, "grad_norm": 1.2067776880816419, "learning_rate": 2.7518489822407902e-08, "loss": 0.23219498991966248, "step": 7364 }, { "epoch": 1.9557827645730979, "grad_norm": 1.3499865013336032, "learning_rate": 2.7193937964481442e-08, "loss": 0.2284272015094757, "step": 7365 }, { "epoch": 1.9560483335546408, "grad_norm": 1.3177047034961433, "learning_rate": 2.68713087459449e-08, "loss": 0.22303974628448486, "step": 7366 }, { "epoch": 1.9563139025361838, "grad_norm": 1.337791009624748, "learning_rate": 2.655060222899741e-08, "loss": 0.22489243745803833, "step": 7367 }, { "epoch": 1.9565794715177267, "grad_norm": 1.2719472133739602, "learning_rate": 2.6231818475468407e-08, "loss": 0.27986854314804077, "step": 7368 }, { "epoch": 1.9568450404992697, "grad_norm": 1.3884495118427658, "learning_rate": 2.591495754681539e-08, "loss": 0.29321208596229553, "step": 7369 }, { "epoch": 1.9571106094808126, "grad_norm": 1.3942541242432065, "learning_rate": 2.5600019504125053e-08, "loss": 0.2560982406139374, "step": 7370 }, { "epoch": 1.9573761784623556, "grad_norm": 1.4283472016053, "learning_rate": 2.528700440811438e-08, "loss": 0.264164537191391, "step": 7371 }, { "epoch": 1.9576417474438985, "grad_norm": 1.1832183058517125, "learning_rate": 2.4975912319127326e-08, "loss": 0.2135474979877472, "step": 7372 }, { "epoch": 1.9579073164254415, "grad_norm": 1.265205421311282, "learning_rate": 2.466674329714036e-08, "loss": 0.2100939154624939, "step": 7373 }, { "epoch": 1.9581728854069844, "grad_norm": 1.395586955333931, "learning_rate": 2.4359497401758026e-08, "loss": 0.23327934741973877, "step": 7374 }, { "epoch": 1.9584384543885274, "grad_norm": 1.0722904974981595, "learning_rate": 2.405417469221183e-08, "loss": 0.18830639123916626, "step": 7375 }, { "epoch": 1.9587040233700703, "grad_norm": 1.284092871282835, "learning_rate": 2.3750775227364686e-08, "loss": 0.2558823227882385, "step": 7376 }, { "epoch": 1.9589695923516133, "grad_norm": 1.2598399224501151, "learning_rate": 2.3449299065710917e-08, "loss": 0.24241580069065094, "step": 7377 }, { "epoch": 1.9592351613331562, "grad_norm": 1.1684337819721369, "learning_rate": 2.3149746265368478e-08, "loss": 0.21678534150123596, "step": 7378 }, { "epoch": 1.9595007303146992, "grad_norm": 1.2804084693654512, "learning_rate": 2.2852116884088947e-08, "loss": 0.20956794917583466, "step": 7379 }, { "epoch": 1.9597662992962421, "grad_norm": 1.2682321373225172, "learning_rate": 2.2556410979253095e-08, "loss": 0.2185555249452591, "step": 7380 }, { "epoch": 1.960031868277785, "grad_norm": 1.3369178147645102, "learning_rate": 2.226262860786643e-08, "loss": 0.21802933514118195, "step": 7381 }, { "epoch": 1.960297437259328, "grad_norm": 1.4565773631347612, "learning_rate": 2.1970769826570317e-08, "loss": 0.22842684388160706, "step": 7382 }, { "epoch": 1.960563006240871, "grad_norm": 1.2737807469252465, "learning_rate": 2.1680834691628627e-08, "loss": 0.23380814492702484, "step": 7383 }, { "epoch": 1.960828575222414, "grad_norm": 1.311531421948895, "learning_rate": 2.1392823258938877e-08, "loss": 0.23476335406303406, "step": 7384 }, { "epoch": 1.961094144203957, "grad_norm": 1.2100451325455786, "learning_rate": 2.110673558402554e-08, "loss": 0.19657662510871887, "step": 7385 }, { "epoch": 1.9613597131854998, "grad_norm": 1.191542044024077, "learning_rate": 2.0822571722044494e-08, "loss": 0.1724000722169876, "step": 7386 }, { "epoch": 1.9616252821670428, "grad_norm": 1.3535695538712786, "learning_rate": 2.0540331727777475e-08, "loss": 0.22960031032562256, "step": 7387 }, { "epoch": 1.9618908511485857, "grad_norm": 1.4028518726902017, "learning_rate": 2.0260015655637623e-08, "loss": 0.2601638436317444, "step": 7388 }, { "epoch": 1.9621564201301287, "grad_norm": 1.3907771240802078, "learning_rate": 1.998162355966726e-08, "loss": 0.2562445402145386, "step": 7389 }, { "epoch": 1.9624219891116716, "grad_norm": 1.1881922077977833, "learning_rate": 1.9705155493535688e-08, "loss": 0.20073221623897552, "step": 7390 }, { "epoch": 1.9626875580932146, "grad_norm": 1.2076860773847395, "learning_rate": 1.9430611510544707e-08, "loss": 0.18454071879386902, "step": 7391 }, { "epoch": 1.9629531270747576, "grad_norm": 1.1878203901407238, "learning_rate": 1.915799166362087e-08, "loss": 0.18515023589134216, "step": 7392 }, { "epoch": 1.9632186960563005, "grad_norm": 1.3323308983960227, "learning_rate": 1.8887296005323242e-08, "loss": 0.25658512115478516, "step": 7393 }, { "epoch": 1.9634842650378435, "grad_norm": 1.4122913637661163, "learning_rate": 1.861852458783897e-08, "loss": 0.2219933569431305, "step": 7394 }, { "epoch": 1.9637498340193864, "grad_norm": 1.3005286775146463, "learning_rate": 1.8351677462983276e-08, "loss": 0.24949616193771362, "step": 7395 }, { "epoch": 1.9640154030009294, "grad_norm": 1.4026906711741571, "learning_rate": 1.808675468220167e-08, "loss": 0.24348726868629456, "step": 7396 }, { "epoch": 1.9642809719824723, "grad_norm": 1.3848607909391346, "learning_rate": 1.782375629656885e-08, "loss": 0.2329033762216568, "step": 7397 }, { "epoch": 1.9645465409640153, "grad_norm": 1.2075544796662319, "learning_rate": 1.7562682356786488e-08, "loss": 0.22265426814556122, "step": 7398 }, { "epoch": 1.9648121099455582, "grad_norm": 1.2895787739524316, "learning_rate": 1.730353291318654e-08, "loss": 0.24438990652561188, "step": 7399 }, { "epoch": 1.9650776789271012, "grad_norm": 1.3518107746112518, "learning_rate": 1.704630801573015e-08, "loss": 0.2632136642932892, "step": 7400 }, { "epoch": 1.9653432479086441, "grad_norm": 1.3377019916165274, "learning_rate": 1.6791007714008766e-08, "loss": 0.22230927646160126, "step": 7401 }, { "epoch": 1.965608816890187, "grad_norm": 1.3577982430958546, "learning_rate": 1.653763205723968e-08, "loss": 0.26317098736763, "step": 7402 }, { "epoch": 1.96587438587173, "grad_norm": 1.3261620865973216, "learning_rate": 1.628618109427049e-08, "loss": 0.23205846548080444, "step": 7403 }, { "epoch": 1.966139954853273, "grad_norm": 1.1507090645553337, "learning_rate": 1.6036654873579084e-08, "loss": 0.202583909034729, "step": 7404 }, { "epoch": 1.966405523834816, "grad_norm": 1.3959078486467311, "learning_rate": 1.5789053443270308e-08, "loss": 0.2579672038555145, "step": 7405 }, { "epoch": 1.966671092816359, "grad_norm": 1.4293268160842907, "learning_rate": 1.5543376851080428e-08, "loss": 0.27483606338500977, "step": 7406 }, { "epoch": 1.966936661797902, "grad_norm": 1.6466914863601023, "learning_rate": 1.5299625144370444e-08, "loss": 0.22510311007499695, "step": 7407 }, { "epoch": 1.967202230779445, "grad_norm": 1.3926470224592478, "learning_rate": 1.505779837013499e-08, "loss": 0.24941131472587585, "step": 7408 }, { "epoch": 1.967467799760988, "grad_norm": 1.316826202799614, "learning_rate": 1.481789657499344e-08, "loss": 0.22301170229911804, "step": 7409 }, { "epoch": 1.967733368742531, "grad_norm": 1.4513024231529628, "learning_rate": 1.4579919805198795e-08, "loss": 0.23045194149017334, "step": 7410 }, { "epoch": 1.9679989377240739, "grad_norm": 1.2632313332378347, "learning_rate": 1.4343868106627689e-08, "loss": 0.25892990827560425, "step": 7411 }, { "epoch": 1.9682645067056168, "grad_norm": 1.316940344896203, "learning_rate": 1.4109741524788167e-08, "loss": 0.23086567223072052, "step": 7412 }, { "epoch": 1.9685300756871598, "grad_norm": 1.2838593122102535, "learning_rate": 1.3877540104818566e-08, "loss": 0.2514735460281372, "step": 7413 }, { "epoch": 1.9687956446687027, "grad_norm": 1.2787980812943278, "learning_rate": 1.3647263891484187e-08, "loss": 0.21824213862419128, "step": 7414 }, { "epoch": 1.9690612136502457, "grad_norm": 1.3351479110439386, "learning_rate": 1.3418912929178407e-08, "loss": 0.2262609452009201, "step": 7415 }, { "epoch": 1.9693267826317886, "grad_norm": 1.2373165426791106, "learning_rate": 1.3192487261926013e-08, "loss": 0.23119492828845978, "step": 7416 }, { "epoch": 1.9695923516133316, "grad_norm": 1.2213219567044962, "learning_rate": 1.2967986933378751e-08, "loss": 0.20173534750938416, "step": 7417 }, { "epoch": 1.9698579205948745, "grad_norm": 1.3102471335629409, "learning_rate": 1.2745411986816447e-08, "loss": 0.2212662547826767, "step": 7418 }, { "epoch": 1.9701234895764175, "grad_norm": 1.2461352597734543, "learning_rate": 1.2524762465151442e-08, "loss": 0.21990706026554108, "step": 7419 }, { "epoch": 1.9703890585579604, "grad_norm": 1.2130065240866306, "learning_rate": 1.2306038410919707e-08, "loss": 0.18648189306259155, "step": 7420 }, { "epoch": 1.9706546275395034, "grad_norm": 1.334350070832243, "learning_rate": 1.2089239866289737e-08, "loss": 0.23273484408855438, "step": 7421 }, { "epoch": 1.9709201965210463, "grad_norm": 1.3083344252475524, "learning_rate": 1.1874366873059206e-08, "loss": 0.21514324843883514, "step": 7422 }, { "epoch": 1.9711857655025893, "grad_norm": 1.2628839077455776, "learning_rate": 1.1661419472650538e-08, "loss": 0.2544926106929779, "step": 7423 }, { "epoch": 1.9714513344841322, "grad_norm": 1.1881271398224822, "learning_rate": 1.1450397706119776e-08, "loss": 0.235082745552063, "step": 7424 }, { "epoch": 1.9717169034656752, "grad_norm": 1.3712056139426412, "learning_rate": 1.1241301614147715e-08, "loss": 0.24777358770370483, "step": 7425 }, { "epoch": 1.9719824724472181, "grad_norm": 1.5271853101134352, "learning_rate": 1.1034131237045443e-08, "loss": 0.23714174330234528, "step": 7426 }, { "epoch": 1.972248041428761, "grad_norm": 1.3430700979817631, "learning_rate": 1.0828886614754342e-08, "loss": 0.24665668606758118, "step": 7427 }, { "epoch": 1.972513610410304, "grad_norm": 1.3931055934155485, "learning_rate": 1.062556778684276e-08, "loss": 0.23421131074428558, "step": 7428 }, { "epoch": 1.972779179391847, "grad_norm": 1.274566697934482, "learning_rate": 1.0424174792508234e-08, "loss": 0.23443526029586792, "step": 7429 }, { "epoch": 1.97304474837339, "grad_norm": 1.3315316306417777, "learning_rate": 1.0224707670576373e-08, "loss": 0.24177192151546478, "step": 7430 }, { "epoch": 1.973310317354933, "grad_norm": 1.4439736433803494, "learning_rate": 1.002716645950197e-08, "loss": 0.20957472920417786, "step": 7431 }, { "epoch": 1.9735758863364758, "grad_norm": 1.2252184749081894, "learning_rate": 9.831551197370116e-09, "loss": 0.21594710648059845, "step": 7432 }, { "epoch": 1.9738414553180188, "grad_norm": 1.4445839220306718, "learning_rate": 9.637861921891756e-09, "loss": 0.2372155487537384, "step": 7433 }, { "epoch": 1.974107024299562, "grad_norm": 1.295551996082086, "learning_rate": 9.446098670408132e-09, "loss": 0.211237370967865, "step": 7434 }, { "epoch": 1.974372593281105, "grad_norm": 1.3006326416512255, "learning_rate": 9.256261479888562e-09, "loss": 0.25123757123947144, "step": 7435 }, { "epoch": 1.9746381622626479, "grad_norm": 1.2670719422156809, "learning_rate": 9.068350386932655e-09, "loss": 0.23048831522464752, "step": 7436 }, { "epoch": 1.9749037312441908, "grad_norm": 1.2157385411321804, "learning_rate": 8.882365427765883e-09, "loss": 0.22923544049263, "step": 7437 }, { "epoch": 1.9751693002257338, "grad_norm": 1.1040485462060259, "learning_rate": 8.698306638245114e-09, "loss": 0.199529767036438, "step": 7438 }, { "epoch": 1.9754348692072767, "grad_norm": 1.314383264088006, "learning_rate": 8.516174053854187e-09, "loss": 0.22778059542179108, "step": 7439 }, { "epoch": 1.9757004381888197, "grad_norm": 1.3428968973890816, "learning_rate": 8.335967709706128e-09, "loss": 0.22807848453521729, "step": 7440 }, { "epoch": 1.9759660071703626, "grad_norm": 1.3347725648799278, "learning_rate": 8.157687640543143e-09, "loss": 0.24764932692050934, "step": 7441 }, { "epoch": 1.9762315761519056, "grad_norm": 1.376463462320243, "learning_rate": 7.98133388073552e-09, "loss": 0.22213312983512878, "step": 7442 }, { "epoch": 1.9764971451334485, "grad_norm": 1.2799794398059858, "learning_rate": 7.806906464281617e-09, "loss": 0.22822709381580353, "step": 7443 }, { "epoch": 1.9767627141149915, "grad_norm": 1.2148981447749936, "learning_rate": 7.634405424808977e-09, "loss": 0.2236599326133728, "step": 7444 }, { "epoch": 1.9770282830965344, "grad_norm": 1.263255403192069, "learning_rate": 7.463830795574334e-09, "loss": 0.20294487476348877, "step": 7445 }, { "epoch": 1.9772938520780774, "grad_norm": 1.3034015114742201, "learning_rate": 7.295182609461382e-09, "loss": 0.2187870740890503, "step": 7446 }, { "epoch": 1.9775594210596203, "grad_norm": 1.362800468373944, "learning_rate": 7.128460898984113e-09, "loss": 0.2629002630710602, "step": 7447 }, { "epoch": 1.9778249900411633, "grad_norm": 1.3155096560899557, "learning_rate": 6.963665696285704e-09, "loss": 0.24024136364459991, "step": 7448 }, { "epoch": 1.9780905590227063, "grad_norm": 1.240780926418524, "learning_rate": 6.800797033134077e-09, "loss": 0.22334401309490204, "step": 7449 }, { "epoch": 1.9783561280042492, "grad_norm": 1.2853076050759633, "learning_rate": 6.639854940930779e-09, "loss": 0.21535055339336395, "step": 7450 }, { "epoch": 1.9786216969857922, "grad_norm": 1.3182931470109147, "learning_rate": 6.480839450703214e-09, "loss": 0.26096785068511963, "step": 7451 }, { "epoch": 1.978887265967335, "grad_norm": 1.2393293544951642, "learning_rate": 6.323750593106859e-09, "loss": 0.22461384534835815, "step": 7452 }, { "epoch": 1.979152834948878, "grad_norm": 1.2999818118404687, "learning_rate": 6.168588398426378e-09, "loss": 0.24372713267803192, "step": 7453 }, { "epoch": 1.979418403930421, "grad_norm": 1.2743158428703243, "learning_rate": 6.015352896576732e-09, "loss": 0.19544872641563416, "step": 7454 }, { "epoch": 1.979683972911964, "grad_norm": 1.1957228310016947, "learning_rate": 5.864044117097623e-09, "loss": 0.22004768252372742, "step": 7455 }, { "epoch": 1.979949541893507, "grad_norm": 1.3624679399119848, "learning_rate": 5.714662089162381e-09, "loss": 0.2509492337703705, "step": 7456 }, { "epoch": 1.9802151108750499, "grad_norm": 1.1563599654889156, "learning_rate": 5.567206841567974e-09, "loss": 0.19315078854560852, "step": 7457 }, { "epoch": 1.9804806798565928, "grad_norm": 1.1652222675857882, "learning_rate": 5.421678402741659e-09, "loss": 0.20722024142742157, "step": 7458 }, { "epoch": 1.9807462488381358, "grad_norm": 1.2430974429352135, "learning_rate": 5.278076800742105e-09, "loss": 0.2041238397359848, "step": 7459 }, { "epoch": 1.9810118178196787, "grad_norm": 1.226308526828602, "learning_rate": 5.136402063251611e-09, "loss": 0.21889238059520721, "step": 7460 }, { "epoch": 1.9812773868012217, "grad_norm": 1.2925316754685727, "learning_rate": 4.996654217584995e-09, "loss": 0.23580557107925415, "step": 7461 }, { "epoch": 1.9815429557827646, "grad_norm": 1.5912986799887796, "learning_rate": 4.858833290684039e-09, "loss": 0.24967315793037415, "step": 7462 }, { "epoch": 1.9818085247643076, "grad_norm": 1.3642305983011473, "learning_rate": 4.722939309116381e-09, "loss": 0.21802274882793427, "step": 7463 }, { "epoch": 1.9820740937458505, "grad_norm": 1.2778589071361273, "learning_rate": 4.588972299084393e-09, "loss": 0.2641376554965973, "step": 7464 }, { "epoch": 1.9823396627273935, "grad_norm": 1.181293128126433, "learning_rate": 4.456932286412974e-09, "loss": 0.20166629552841187, "step": 7465 }, { "epoch": 1.9826052317089364, "grad_norm": 1.3531318882305197, "learning_rate": 4.3268192965573164e-09, "loss": 0.22796592116355896, "step": 7466 }, { "epoch": 1.9828708006904794, "grad_norm": 1.1849961491022751, "learning_rate": 4.19863335460402e-09, "loss": 0.19833455979824066, "step": 7467 }, { "epoch": 1.9831363696720223, "grad_norm": 1.273561592311718, "learning_rate": 4.07237448526554e-09, "loss": 0.23009257018566132, "step": 7468 }, { "epoch": 1.9834019386535653, "grad_norm": 1.2188380225442625, "learning_rate": 3.9480427128812945e-09, "loss": 0.22418440878391266, "step": 7469 }, { "epoch": 1.9836675076351082, "grad_norm": 1.2878640211544259, "learning_rate": 3.825638061421e-09, "loss": 0.2015800178050995, "step": 7470 }, { "epoch": 1.9839330766166512, "grad_norm": 1.2488639013131106, "learning_rate": 3.705160554485776e-09, "loss": 0.22166767716407776, "step": 7471 }, { "epoch": 1.9841986455981941, "grad_norm": 1.476152466944419, "learning_rate": 3.5866102152981586e-09, "loss": 0.3154509961605072, "step": 7472 }, { "epoch": 1.984464214579737, "grad_norm": 1.3338840715084874, "learning_rate": 3.4699870667165292e-09, "loss": 0.25891417264938354, "step": 7473 }, { "epoch": 1.98472978356128, "grad_norm": 1.2984805204003045, "learning_rate": 3.355291131222904e-09, "loss": 0.24837851524353027, "step": 7474 }, { "epoch": 1.984995352542823, "grad_norm": 1.2923319105031845, "learning_rate": 3.2425224309307055e-09, "loss": 0.24254213273525238, "step": 7475 }, { "epoch": 1.985260921524366, "grad_norm": 1.3479980629574153, "learning_rate": 3.1316809875781005e-09, "loss": 0.24822884798049927, "step": 7476 }, { "epoch": 1.985526490505909, "grad_norm": 1.2515754926310612, "learning_rate": 3.022766822535772e-09, "loss": 0.19553488492965698, "step": 7477 }, { "epoch": 1.9857920594874519, "grad_norm": 1.289139949226706, "learning_rate": 2.9157799568002576e-09, "loss": 0.24758943915367126, "step": 7478 }, { "epoch": 1.9860576284689948, "grad_norm": 1.3254058481790592, "learning_rate": 2.810720410998391e-09, "loss": 0.22947746515274048, "step": 7479 }, { "epoch": 1.9863231974505378, "grad_norm": 1.1718425441422213, "learning_rate": 2.7075882053828605e-09, "loss": 0.20573696494102478, "step": 7480 }, { "epoch": 1.9865887664320807, "grad_norm": 1.3248019948595686, "learning_rate": 2.606383359837761e-09, "loss": 0.2547800838947296, "step": 7481 }, { "epoch": 1.9868543354136237, "grad_norm": 1.3239089800396548, "learning_rate": 2.507105893874151e-09, "loss": 0.22227191925048828, "step": 7482 }, { "epoch": 1.9871199043951666, "grad_norm": 1.379027057566697, "learning_rate": 2.409755826630056e-09, "loss": 0.24687603116035461, "step": 7483 }, { "epoch": 1.9873854733767096, "grad_norm": 1.3626347731044859, "learning_rate": 2.3143331768749053e-09, "loss": 0.23577818274497986, "step": 7484 }, { "epoch": 1.9876510423582525, "grad_norm": 1.2429616783261994, "learning_rate": 2.2208379630039858e-09, "loss": 0.23012465238571167, "step": 7485 }, { "epoch": 1.9879166113397955, "grad_norm": 1.2667278392117014, "learning_rate": 2.129270203043987e-09, "loss": 0.21479251980781555, "step": 7486 }, { "epoch": 1.9881821803213384, "grad_norm": 1.2419157692275362, "learning_rate": 2.039629914645236e-09, "loss": 0.24436548352241516, "step": 7487 }, { "epoch": 1.9884477493028814, "grad_norm": 1.3198752588445606, "learning_rate": 1.951917115091684e-09, "loss": 0.22225134074687958, "step": 7488 }, { "epoch": 1.9887133182844243, "grad_norm": 1.4243538533938824, "learning_rate": 1.8661318212920275e-09, "loss": 0.22320827841758728, "step": 7489 }, { "epoch": 1.9889788872659673, "grad_norm": 1.3025984911365984, "learning_rate": 1.7822740497852597e-09, "loss": 0.2317924201488495, "step": 7490 }, { "epoch": 1.9892444562475102, "grad_norm": 1.370204940685918, "learning_rate": 1.700343816738448e-09, "loss": 0.2275170385837555, "step": 7491 }, { "epoch": 1.9895100252290532, "grad_norm": 1.652167024814656, "learning_rate": 1.6203411379456247e-09, "loss": 0.24541540443897247, "step": 7492 }, { "epoch": 1.9897755942105961, "grad_norm": 1.311164124852614, "learning_rate": 1.5422660288322288e-09, "loss": 0.23041896522045135, "step": 7493 }, { "epoch": 1.990041163192139, "grad_norm": 1.301476042648128, "learning_rate": 1.4661185044484438e-09, "loss": 0.22362437844276428, "step": 7494 }, { "epoch": 1.990306732173682, "grad_norm": 1.1872303288026824, "learning_rate": 1.3918985794747486e-09, "loss": 0.22082944214344025, "step": 7495 }, { "epoch": 1.990572301155225, "grad_norm": 1.2985516009859217, "learning_rate": 1.3196062682208078e-09, "loss": 0.2210516780614853, "step": 7496 }, { "epoch": 1.990837870136768, "grad_norm": 1.2609254238659025, "learning_rate": 1.249241584623251e-09, "loss": 0.21891455352306366, "step": 7497 }, { "epoch": 1.991103439118311, "grad_norm": 1.2687100133579783, "learning_rate": 1.1808045422478932e-09, "loss": 0.23363247513771057, "step": 7498 }, { "epoch": 1.9913690080998538, "grad_norm": 1.188481032582791, "learning_rate": 1.1142951542875146e-09, "loss": 0.20676104724407196, "step": 7499 }, { "epoch": 1.9916345770813968, "grad_norm": 1.2983095103442552, "learning_rate": 1.0497134335663018e-09, "loss": 0.23037788271903992, "step": 7500 }, { "epoch": 1.9919001460629397, "grad_norm": 1.1706822471326355, "learning_rate": 9.870593925320748e-10, "loss": 0.21958573162555695, "step": 7501 }, { "epoch": 1.9921657150444827, "grad_norm": 1.3574206120623875, "learning_rate": 9.263330432662809e-10, "loss": 0.23280993103981018, "step": 7502 }, { "epoch": 1.9924312840260257, "grad_norm": 1.2662411212973668, "learning_rate": 8.675343974762219e-10, "loss": 0.2254818230867386, "step": 7503 }, { "epoch": 1.9926968530075686, "grad_norm": 1.255709874874282, "learning_rate": 8.106634664950541e-10, "loss": 0.1850586235523224, "step": 7504 }, { "epoch": 1.9929624219891116, "grad_norm": 1.1965362861662039, "learning_rate": 7.557202612895609e-10, "loss": 0.21080443263053894, "step": 7505 }, { "epoch": 1.9932279909706545, "grad_norm": 1.2788710791805473, "learning_rate": 7.027047924512698e-10, "loss": 0.21604907512664795, "step": 7506 }, { "epoch": 1.9934935599521975, "grad_norm": 1.287068201404914, "learning_rate": 6.516170701997837e-10, "loss": 0.24684564769268036, "step": 7507 }, { "epoch": 1.9937591289337404, "grad_norm": 1.2013851004960618, "learning_rate": 6.024571043861116e-10, "loss": 0.21735510230064392, "step": 7508 }, { "epoch": 1.9940246979152834, "grad_norm": 1.2853945699676002, "learning_rate": 5.552249044860069e-10, "loss": 0.23616179823875427, "step": 7509 }, { "epoch": 1.9942902668968263, "grad_norm": 1.280261468721699, "learning_rate": 5.099204796066293e-10, "loss": 0.23930129408836365, "step": 7510 }, { "epoch": 1.9945558358783693, "grad_norm": 1.30216307212454, "learning_rate": 4.665438384809928e-10, "loss": 0.2354714274406433, "step": 7511 }, { "epoch": 1.9948214048599122, "grad_norm": 1.4489462806357751, "learning_rate": 4.250949894724077e-10, "loss": 0.28315576910972595, "step": 7512 }, { "epoch": 1.9950869738414552, "grad_norm": 1.1749720994980957, "learning_rate": 3.8557394057114895e-10, "loss": 0.19599778950214386, "step": 7513 }, { "epoch": 1.9953525428229981, "grad_norm": 1.5080290285974376, "learning_rate": 3.4798069939667725e-10, "loss": 0.2295808494091034, "step": 7514 }, { "epoch": 1.995618111804541, "grad_norm": 1.2840127096725462, "learning_rate": 3.1231527319763864e-10, "loss": 0.23212578892707825, "step": 7515 }, { "epoch": 1.995883680786084, "grad_norm": 1.2763709143213344, "learning_rate": 2.78577668847424e-10, "loss": 0.2408447265625, "step": 7516 }, { "epoch": 1.996149249767627, "grad_norm": 1.325995428985527, "learning_rate": 2.4676789285305034e-10, "loss": 0.25482073426246643, "step": 7517 }, { "epoch": 1.9964148187491702, "grad_norm": 1.2453043840474796, "learning_rate": 2.1688595134516932e-10, "loss": 0.21228459477424622, "step": 7518 }, { "epoch": 1.996680387730713, "grad_norm": 1.3949495270151018, "learning_rate": 1.8893185008472814e-10, "loss": 0.2467353343963623, "step": 7519 }, { "epoch": 1.996945956712256, "grad_norm": 1.3819791453502894, "learning_rate": 1.6290559446185962e-10, "loss": 0.24475792050361633, "step": 7520 }, { "epoch": 1.997211525693799, "grad_norm": 1.3766398068169023, "learning_rate": 1.3880718949366155e-10, "loss": 0.24821621179580688, "step": 7521 }, { "epoch": 1.997477094675342, "grad_norm": 1.2860965423885737, "learning_rate": 1.1663663982530715e-10, "loss": 0.24725303053855896, "step": 7522 }, { "epoch": 1.997742663656885, "grad_norm": 1.2302869290522314, "learning_rate": 9.639394973226523e-11, "loss": 0.2319290041923523, "step": 7523 }, { "epoch": 1.9980082326384279, "grad_norm": 1.3169058540691405, "learning_rate": 7.807912311696974e-11, "loss": 0.22183239459991455, "step": 7524 }, { "epoch": 1.9982738016199708, "grad_norm": 1.3038532813647647, "learning_rate": 6.169216350881968e-11, "loss": 0.2154427468776703, "step": 7525 }, { "epoch": 1.9985393706015138, "grad_norm": 1.3153427866812037, "learning_rate": 4.723307406973021e-11, "loss": 0.22269389033317566, "step": 7526 }, { "epoch": 1.9988049395830567, "grad_norm": 1.1809886655167368, "learning_rate": 3.4701857584140686e-11, "loss": 0.20317527651786804, "step": 7527 }, { "epoch": 1.9990705085645997, "grad_norm": 1.2813479125348537, "learning_rate": 2.409851647011685e-11, "loss": 0.20792551338672638, "step": 7528 }, { "epoch": 1.9993360775461426, "grad_norm": 1.1774217019209885, "learning_rate": 1.5423052770469072e-11, "loss": 0.2128266990184784, "step": 7529 }, { "epoch": 1.9996016465276856, "grad_norm": 1.2535950646579268, "learning_rate": 8.67546815941367e-12, "loss": 0.23220527172088623, "step": 7530 }, { "epoch": 1.9998672155092285, "grad_norm": 1.234107937433565, "learning_rate": 3.8557639359115826e-12, "loss": 0.22269386053085327, "step": 7531 }, { "epoch": 2.0, "grad_norm": 2.3086652843747557, "learning_rate": 9.63941030329707e-13, "loss": 0.2053365409374237, "step": 7532 }, { "epoch": 2.0, "step": 7532, "total_flos": 5704003196682240.0, "train_loss": 0.29768029879729163, "train_runtime": 98000.2149, "train_samples_per_second": 1.229, "train_steps_per_second": 0.077 } ], "logging_steps": 1, "max_steps": 7532, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5704003196682240.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }